{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 18206, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "entropy": 1.894879937171936, "epoch": 0.00010985691137293675, "grad_norm": 1.5214658975601196, "learning_rate": 0.0, "loss": 1.4908, "mean_token_accuracy": 0.6401820083459219, "num_tokens": 168636.0, "step": 1 }, { "entropy": 1.9256814221541088, "epoch": 0.0002197138227458735, "grad_norm": 1.8711611032485962, "learning_rate": 3.656307129798904e-08, "loss": 1.4614, "mean_token_accuracy": 0.6461619188388189, "num_tokens": 297186.0, "step": 2 }, { "entropy": 1.936289479335149, "epoch": 0.00032957073411881023, "grad_norm": 1.2696285247802734, "learning_rate": 7.312614259597807e-08, "loss": 1.5012, "mean_token_accuracy": 0.6218364934126536, "num_tokens": 471129.0, "step": 3 }, { "entropy": 1.9193981885910034, "epoch": 0.000439427645491747, "grad_norm": 1.9576971530914307, "learning_rate": 1.096892138939671e-07, "loss": 1.4109, "mean_token_accuracy": 0.6370265334844589, "num_tokens": 632787.0, "step": 4 }, { "entropy": 1.8874422013759613, "epoch": 0.0005492845568646837, "grad_norm": 1.4753385782241821, "learning_rate": 1.4625228519195615e-07, "loss": 1.53, "mean_token_accuracy": 0.6317170361677805, "num_tokens": 814767.0, "step": 5 }, { "entropy": 1.9161963363488514, "epoch": 0.0006591414682376205, "grad_norm": 1.3654813766479492, "learning_rate": 1.8281535648994517e-07, "loss": 1.513, "mean_token_accuracy": 0.6349450548489889, "num_tokens": 955276.0, "step": 6 }, { "entropy": 1.9518279830614726, "epoch": 0.0007689983796105573, "grad_norm": 2.4183151721954346, "learning_rate": 2.193784277879342e-07, "loss": 1.4296, "mean_token_accuracy": 0.6418899148702621, "num_tokens": 1123639.0, "step": 7 }, { "entropy": 1.9186626176039379, "epoch": 0.000878855290983494, "grad_norm": 2.4768245220184326, "learning_rate": 2.5594149908592327e-07, "loss": 1.4314, "mean_token_accuracy": 0.6455866148074468, "num_tokens": 1256300.0, "step": 8 }, { "entropy": 1.8859932323296864, "epoch": 0.0009887122023564308, "grad_norm": 2.537692070007324, "learning_rate": 2.925045703839123e-07, "loss": 1.3804, "mean_token_accuracy": 0.6570307711760203, "num_tokens": 1391902.0, "step": 9 }, { "entropy": 1.9118959108988445, "epoch": 0.0010985691137293675, "grad_norm": 2.2627501487731934, "learning_rate": 3.2906764168190127e-07, "loss": 1.4766, "mean_token_accuracy": 0.6348355263471603, "num_tokens": 1513301.0, "step": 10 }, { "entropy": 1.927810400724411, "epoch": 0.0012084260251023042, "grad_norm": 2.3921263217926025, "learning_rate": 3.6563071297989034e-07, "loss": 1.4505, "mean_token_accuracy": 0.6397508382797241, "num_tokens": 1646836.0, "step": 11 }, { "entropy": 1.9784689545631409, "epoch": 0.001318282936475241, "grad_norm": 1.7080570459365845, "learning_rate": 4.021937842778794e-07, "loss": 1.4932, "mean_token_accuracy": 0.6328056206305822, "num_tokens": 1790819.0, "step": 12 }, { "entropy": 1.832368512948354, "epoch": 0.0014281398478481777, "grad_norm": 1.9487069845199585, "learning_rate": 4.387568555758684e-07, "loss": 1.4971, "mean_token_accuracy": 0.6369460622469584, "num_tokens": 1944997.0, "step": 13 }, { "entropy": 1.9581014811992645, "epoch": 0.0015379967592211146, "grad_norm": 1.672973871231079, "learning_rate": 4.7531992687385747e-07, "loss": 1.572, "mean_token_accuracy": 0.6146093358596166, "num_tokens": 2091666.0, "step": 14 }, { "entropy": 2.0294719139734902, "epoch": 0.0016478536705940513, "grad_norm": 2.086653709411621, "learning_rate": 5.118829981718465e-07, "loss": 1.4884, "mean_token_accuracy": 0.6278541535139084, "num_tokens": 2216390.0, "step": 15 }, { "entropy": 1.934642493724823, "epoch": 0.001757710581966988, "grad_norm": 1.553402066230774, "learning_rate": 5.484460694698355e-07, "loss": 1.4823, "mean_token_accuracy": 0.634530633687973, "num_tokens": 2388588.0, "step": 16 }, { "entropy": 1.977916826804479, "epoch": 0.0018675674933399248, "grad_norm": 1.5559946298599243, "learning_rate": 5.850091407678246e-07, "loss": 1.4955, "mean_token_accuracy": 0.6247076193491617, "num_tokens": 2557798.0, "step": 17 }, { "entropy": 1.8902287880579631, "epoch": 0.0019774244047128615, "grad_norm": 1.4230289459228516, "learning_rate": 6.215722120658136e-07, "loss": 1.4787, "mean_token_accuracy": 0.6302339931329092, "num_tokens": 2738112.0, "step": 18 }, { "entropy": 1.8939658204714458, "epoch": 0.0020872813160857985, "grad_norm": 1.492349624633789, "learning_rate": 6.581352833638025e-07, "loss": 1.4823, "mean_token_accuracy": 0.6441057572762171, "num_tokens": 2898646.0, "step": 19 }, { "entropy": 1.8878755668799083, "epoch": 0.002197138227458735, "grad_norm": 1.266729712486267, "learning_rate": 6.946983546617917e-07, "loss": 1.4777, "mean_token_accuracy": 0.6475276350975037, "num_tokens": 3110857.0, "step": 20 }, { "entropy": 1.8882379333178203, "epoch": 0.002306995138831672, "grad_norm": 1.5952550172805786, "learning_rate": 7.312614259597807e-07, "loss": 1.4565, "mean_token_accuracy": 0.6390559126933416, "num_tokens": 3307447.0, "step": 21 }, { "entropy": 1.9016104241212208, "epoch": 0.0024168520502046084, "grad_norm": 1.1307969093322754, "learning_rate": 7.678244972577697e-07, "loss": 1.4924, "mean_token_accuracy": 0.6293008426825205, "num_tokens": 3485179.0, "step": 22 }, { "entropy": 1.9216719369093578, "epoch": 0.0025267089615775454, "grad_norm": 1.1407924890518188, "learning_rate": 8.043875685557588e-07, "loss": 1.6257, "mean_token_accuracy": 0.6146015028158823, "num_tokens": 3717262.0, "step": 23 }, { "entropy": 1.8951739370822906, "epoch": 0.002636565872950482, "grad_norm": 1.7030082941055298, "learning_rate": 8.409506398537478e-07, "loss": 1.5923, "mean_token_accuracy": 0.6380182355642319, "num_tokens": 3838144.0, "step": 24 }, { "entropy": 1.9046282172203064, "epoch": 0.002746422784323419, "grad_norm": 1.6221411228179932, "learning_rate": 8.775137111517368e-07, "loss": 1.3792, "mean_token_accuracy": 0.6614825973908106, "num_tokens": 3988846.0, "step": 25 }, { "entropy": 1.9025114277998607, "epoch": 0.0028562796956963553, "grad_norm": 1.7412817478179932, "learning_rate": 9.140767824497258e-07, "loss": 1.5043, "mean_token_accuracy": 0.6499176571766535, "num_tokens": 4123835.0, "step": 26 }, { "entropy": 1.9772209624449413, "epoch": 0.0029661366070692923, "grad_norm": 1.6628296375274658, "learning_rate": 9.506398537477149e-07, "loss": 1.5479, "mean_token_accuracy": 0.6233152449131012, "num_tokens": 4264365.0, "step": 27 }, { "entropy": 1.9088290234406788, "epoch": 0.003075993518442229, "grad_norm": 1.2123667001724243, "learning_rate": 9.87202925045704e-07, "loss": 1.5121, "mean_token_accuracy": 0.6258416324853897, "num_tokens": 4480352.0, "step": 28 }, { "entropy": 1.9050040543079376, "epoch": 0.0031858504298151657, "grad_norm": 2.060734510421753, "learning_rate": 1.023765996343693e-06, "loss": 1.3907, "mean_token_accuracy": 0.6540684401988983, "num_tokens": 4613774.0, "step": 29 }, { "entropy": 1.9359141091505687, "epoch": 0.0032957073411881027, "grad_norm": 0.9076595306396484, "learning_rate": 1.060329067641682e-06, "loss": 1.6216, "mean_token_accuracy": 0.6132212653756142, "num_tokens": 4864127.0, "step": 30 }, { "entropy": 1.9393312633037567, "epoch": 0.003405564252561039, "grad_norm": 1.4876841306686401, "learning_rate": 1.096892138939671e-06, "loss": 1.5932, "mean_token_accuracy": 0.6290038675069809, "num_tokens": 5048366.0, "step": 31 }, { "entropy": 1.8906433979670207, "epoch": 0.003515421163933976, "grad_norm": 1.0335384607315063, "learning_rate": 1.13345521023766e-06, "loss": 1.6327, "mean_token_accuracy": 0.6198825240135193, "num_tokens": 5249785.0, "step": 32 }, { "entropy": 1.891570011774699, "epoch": 0.0036252780753069126, "grad_norm": 1.4123249053955078, "learning_rate": 1.1700182815356492e-06, "loss": 1.3654, "mean_token_accuracy": 0.6579603354136149, "num_tokens": 5414378.0, "step": 33 }, { "entropy": 1.9350098570187886, "epoch": 0.0037351349866798496, "grad_norm": 1.7732880115509033, "learning_rate": 1.206581352833638e-06, "loss": 1.4123, "mean_token_accuracy": 0.6357505569855372, "num_tokens": 5576716.0, "step": 34 }, { "entropy": 1.9273627698421478, "epoch": 0.003844991898052786, "grad_norm": 2.0864319801330566, "learning_rate": 1.2431444241316272e-06, "loss": 1.4579, "mean_token_accuracy": 0.6357160607973734, "num_tokens": 5702401.0, "step": 35 }, { "entropy": 1.8696773449579875, "epoch": 0.003954848809425723, "grad_norm": 1.2789888381958008, "learning_rate": 1.2797074954296162e-06, "loss": 1.5019, "mean_token_accuracy": 0.6273581286271414, "num_tokens": 5888777.0, "step": 36 }, { "entropy": 1.8879920840263367, "epoch": 0.0040647057207986595, "grad_norm": 1.2594259977340698, "learning_rate": 1.316270566727605e-06, "loss": 1.4994, "mean_token_accuracy": 0.6347703188657761, "num_tokens": 6086963.0, "step": 37 }, { "entropy": 1.8589064280192058, "epoch": 0.004174562632171597, "grad_norm": 1.607129693031311, "learning_rate": 1.3528336380255944e-06, "loss": 1.4691, "mean_token_accuracy": 0.6347041179736456, "num_tokens": 6240554.0, "step": 38 }, { "entropy": 1.917704850435257, "epoch": 0.004284419543544533, "grad_norm": 2.4967267513275146, "learning_rate": 1.3893967093235833e-06, "loss": 1.4689, "mean_token_accuracy": 0.634268601735433, "num_tokens": 6369609.0, "step": 39 }, { "entropy": 1.921486258506775, "epoch": 0.00439427645491747, "grad_norm": 1.696545124053955, "learning_rate": 1.4259597806215722e-06, "loss": 1.4784, "mean_token_accuracy": 0.6417160034179688, "num_tokens": 6521087.0, "step": 40 }, { "entropy": 1.9418790340423584, "epoch": 0.0045041333662904064, "grad_norm": 1.5666840076446533, "learning_rate": 1.4625228519195614e-06, "loss": 1.4172, "mean_token_accuracy": 0.6557289958000183, "num_tokens": 6688333.0, "step": 41 }, { "entropy": 1.8845481673876445, "epoch": 0.004613990277663344, "grad_norm": 1.5776560306549072, "learning_rate": 1.4990859232175503e-06, "loss": 1.4631, "mean_token_accuracy": 0.6422620664040247, "num_tokens": 6832991.0, "step": 42 }, { "entropy": 1.9316304922103882, "epoch": 0.00472384718903628, "grad_norm": 2.2713351249694824, "learning_rate": 1.5356489945155394e-06, "loss": 1.5357, "mean_token_accuracy": 0.6292731215556463, "num_tokens": 6962510.0, "step": 43 }, { "entropy": 1.8830671906471252, "epoch": 0.004833704100409217, "grad_norm": 1.8595447540283203, "learning_rate": 1.5722120658135283e-06, "loss": 1.3832, "mean_token_accuracy": 0.6504772454500198, "num_tokens": 7113892.0, "step": 44 }, { "entropy": 1.9839610954125722, "epoch": 0.004943561011782153, "grad_norm": 1.653223991394043, "learning_rate": 1.6087751371115177e-06, "loss": 1.6831, "mean_token_accuracy": 0.6217963248491287, "num_tokens": 7242184.0, "step": 45 }, { "entropy": 1.8768392503261566, "epoch": 0.005053417923155091, "grad_norm": 2.0047905445098877, "learning_rate": 1.6453382084095066e-06, "loss": 1.3338, "mean_token_accuracy": 0.6631583720445633, "num_tokens": 7439452.0, "step": 46 }, { "entropy": 1.9426932732264202, "epoch": 0.005163274834528027, "grad_norm": 1.812389850616455, "learning_rate": 1.6819012797074955e-06, "loss": 1.5984, "mean_token_accuracy": 0.6237726360559464, "num_tokens": 7581382.0, "step": 47 }, { "entropy": 1.9155326286951702, "epoch": 0.005273131745900964, "grad_norm": 1.7920594215393066, "learning_rate": 1.7184643510054846e-06, "loss": 1.502, "mean_token_accuracy": 0.6386721283197403, "num_tokens": 7734683.0, "step": 48 }, { "entropy": 1.9195171296596527, "epoch": 0.005382988657273901, "grad_norm": 2.0478262901306152, "learning_rate": 1.7550274223034736e-06, "loss": 1.4524, "mean_token_accuracy": 0.6335723251104355, "num_tokens": 7913144.0, "step": 49 }, { "entropy": 1.9190079867839813, "epoch": 0.005492845568646838, "grad_norm": 1.5729821920394897, "learning_rate": 1.7915904936014627e-06, "loss": 1.5228, "mean_token_accuracy": 0.6325205812851588, "num_tokens": 8053143.0, "step": 50 }, { "entropy": 1.9134818116823833, "epoch": 0.005602702480019774, "grad_norm": 2.2003588676452637, "learning_rate": 1.8281535648994516e-06, "loss": 1.4542, "mean_token_accuracy": 0.6385191728671392, "num_tokens": 8223896.0, "step": 51 }, { "entropy": 1.8575187226136525, "epoch": 0.005712559391392711, "grad_norm": 2.713563919067383, "learning_rate": 1.864716636197441e-06, "loss": 1.4948, "mean_token_accuracy": 0.6294996092716852, "num_tokens": 8382141.0, "step": 52 }, { "entropy": 1.9104352692763011, "epoch": 0.005822416302765648, "grad_norm": 1.6903510093688965, "learning_rate": 1.9012797074954299e-06, "loss": 1.4369, "mean_token_accuracy": 0.6408693542083105, "num_tokens": 8544191.0, "step": 53 }, { "entropy": 1.9804502725601196, "epoch": 0.0059322732141385845, "grad_norm": 2.0042126178741455, "learning_rate": 1.9378427787934186e-06, "loss": 1.5279, "mean_token_accuracy": 0.6251623729864756, "num_tokens": 8703483.0, "step": 54 }, { "entropy": 1.9068463842074077, "epoch": 0.006042130125511521, "grad_norm": 1.6808873414993286, "learning_rate": 1.974405850091408e-06, "loss": 1.4428, "mean_token_accuracy": 0.6341716150442759, "num_tokens": 8874273.0, "step": 55 }, { "entropy": 1.8545368413130443, "epoch": 0.006151987036884458, "grad_norm": 1.35502290725708, "learning_rate": 2.010968921389397e-06, "loss": 1.4585, "mean_token_accuracy": 0.6444613436857859, "num_tokens": 9032964.0, "step": 56 }, { "entropy": 1.9203276832898457, "epoch": 0.006261843948257395, "grad_norm": 2.1629741191864014, "learning_rate": 2.047531992687386e-06, "loss": 1.4645, "mean_token_accuracy": 0.6436514407396317, "num_tokens": 9161160.0, "step": 57 }, { "entropy": 1.9330822229385376, "epoch": 0.0063717008596303314, "grad_norm": 2.02375864982605, "learning_rate": 2.084095063985375e-06, "loss": 1.4975, "mean_token_accuracy": 0.6350375364224116, "num_tokens": 9332649.0, "step": 58 }, { "entropy": 1.8069697419802349, "epoch": 0.006481557771003268, "grad_norm": 1.4925216436386108, "learning_rate": 2.120658135283364e-06, "loss": 1.4355, "mean_token_accuracy": 0.6453156520922979, "num_tokens": 9484300.0, "step": 59 }, { "entropy": 1.9819251795609791, "epoch": 0.006591414682376205, "grad_norm": 1.2172040939331055, "learning_rate": 2.157221206581353e-06, "loss": 1.5083, "mean_token_accuracy": 0.6365701605876287, "num_tokens": 9689189.0, "step": 60 }, { "entropy": 1.8836710751056671, "epoch": 0.006701271593749142, "grad_norm": 1.4082310199737549, "learning_rate": 2.193784277879342e-06, "loss": 1.4055, "mean_token_accuracy": 0.641944482922554, "num_tokens": 9871400.0, "step": 61 }, { "entropy": 1.862162669499715, "epoch": 0.006811128505122078, "grad_norm": 1.952700138092041, "learning_rate": 2.230347349177331e-06, "loss": 1.4579, "mean_token_accuracy": 0.6387267460425695, "num_tokens": 10030629.0, "step": 62 }, { "entropy": 1.86505792538325, "epoch": 0.006920985416495015, "grad_norm": 2.528541326522827, "learning_rate": 2.26691042047532e-06, "loss": 1.4184, "mean_token_accuracy": 0.6428513675928116, "num_tokens": 10174909.0, "step": 63 }, { "entropy": 1.8634338974952698, "epoch": 0.007030842327867952, "grad_norm": 1.5209519863128662, "learning_rate": 2.3034734917733095e-06, "loss": 1.6417, "mean_token_accuracy": 0.6137880484263102, "num_tokens": 10389155.0, "step": 64 }, { "entropy": 2.011722683906555, "epoch": 0.007140699239240889, "grad_norm": 2.0947635173797607, "learning_rate": 2.3400365630712984e-06, "loss": 1.6333, "mean_token_accuracy": 0.6242827375729879, "num_tokens": 10510783.0, "step": 65 }, { "entropy": 1.9394526779651642, "epoch": 0.007250556150613825, "grad_norm": 2.9054830074310303, "learning_rate": 2.3765996343692873e-06, "loss": 1.4163, "mean_token_accuracy": 0.6395674695571264, "num_tokens": 10644860.0, "step": 66 }, { "entropy": 1.8742198546727498, "epoch": 0.007360413061986763, "grad_norm": 1.4175904989242554, "learning_rate": 2.413162705667276e-06, "loss": 1.4076, "mean_token_accuracy": 0.6420022894938787, "num_tokens": 10827216.0, "step": 67 }, { "entropy": 1.8331912557284038, "epoch": 0.007470269973359699, "grad_norm": 1.3983286619186401, "learning_rate": 2.449725776965265e-06, "loss": 1.5168, "mean_token_accuracy": 0.623612051208814, "num_tokens": 11040709.0, "step": 68 }, { "entropy": 2.0345064600308738, "epoch": 0.007580126884732636, "grad_norm": 1.5471314191818237, "learning_rate": 2.4862888482632545e-06, "loss": 1.486, "mean_token_accuracy": 0.6237680067618688, "num_tokens": 11237129.0, "step": 69 }, { "entropy": 1.9124818940957387, "epoch": 0.007689983796105572, "grad_norm": 2.0542972087860107, "learning_rate": 2.5228519195612434e-06, "loss": 1.5191, "mean_token_accuracy": 0.6362631718317667, "num_tokens": 11393268.0, "step": 70 }, { "entropy": 1.9250177542368572, "epoch": 0.0077998407074785095, "grad_norm": 1.6303218603134155, "learning_rate": 2.5594149908592323e-06, "loss": 1.566, "mean_token_accuracy": 0.6235620379447937, "num_tokens": 11577989.0, "step": 71 }, { "entropy": 1.9424512485663097, "epoch": 0.007909697618851446, "grad_norm": 1.447934627532959, "learning_rate": 2.5959780621572212e-06, "loss": 1.4839, "mean_token_accuracy": 0.630613719423612, "num_tokens": 11740054.0, "step": 72 }, { "entropy": 1.8930113911628723, "epoch": 0.008019554530224383, "grad_norm": 1.1407986879348755, "learning_rate": 2.63254113345521e-06, "loss": 1.4887, "mean_token_accuracy": 0.6346247841914495, "num_tokens": 12000084.0, "step": 73 }, { "entropy": 1.9313855667908986, "epoch": 0.008129411441597319, "grad_norm": 1.1938432455062866, "learning_rate": 2.6691042047531995e-06, "loss": 1.5619, "mean_token_accuracy": 0.6224364936351776, "num_tokens": 12206632.0, "step": 74 }, { "entropy": 1.9749772349993389, "epoch": 0.008239268352970256, "grad_norm": 1.916087031364441, "learning_rate": 2.705667276051189e-06, "loss": 1.5133, "mean_token_accuracy": 0.63983054459095, "num_tokens": 12304314.0, "step": 75 }, { "entropy": 1.9599759479363759, "epoch": 0.008349125264343194, "grad_norm": 1.359966516494751, "learning_rate": 2.7422303473491773e-06, "loss": 1.5562, "mean_token_accuracy": 0.6150669455528259, "num_tokens": 12501896.0, "step": 76 }, { "entropy": 1.8981466790040333, "epoch": 0.00845898217571613, "grad_norm": 1.2441012859344482, "learning_rate": 2.7787934186471667e-06, "loss": 1.5665, "mean_token_accuracy": 0.6264956047137579, "num_tokens": 12691020.0, "step": 77 }, { "entropy": 1.9287964701652527, "epoch": 0.008568839087089067, "grad_norm": 1.3505768775939941, "learning_rate": 2.8153564899451556e-06, "loss": 1.6804, "mean_token_accuracy": 0.6293187389771143, "num_tokens": 12892070.0, "step": 78 }, { "entropy": 1.8864564498265584, "epoch": 0.008678695998462002, "grad_norm": 1.230501413345337, "learning_rate": 2.8519195612431445e-06, "loss": 1.5448, "mean_token_accuracy": 0.6269682745138804, "num_tokens": 13066612.0, "step": 79 }, { "entropy": 1.9039170742034912, "epoch": 0.00878855290983494, "grad_norm": 1.3592942953109741, "learning_rate": 2.8884826325411334e-06, "loss": 1.4797, "mean_token_accuracy": 0.6321501731872559, "num_tokens": 13232664.0, "step": 80 }, { "entropy": 1.9469401339689891, "epoch": 0.008898409821207877, "grad_norm": 2.1602654457092285, "learning_rate": 2.9250457038391228e-06, "loss": 1.4569, "mean_token_accuracy": 0.6350138882795969, "num_tokens": 13378630.0, "step": 81 }, { "entropy": 1.8785783151785533, "epoch": 0.009008266732580813, "grad_norm": 1.3273346424102783, "learning_rate": 2.961608775137112e-06, "loss": 1.3908, "mean_token_accuracy": 0.6404936015605927, "num_tokens": 13525919.0, "step": 82 }, { "entropy": 1.9613807598749797, "epoch": 0.00911812364395375, "grad_norm": 1.595691204071045, "learning_rate": 2.9981718464351006e-06, "loss": 1.4964, "mean_token_accuracy": 0.6223418762286504, "num_tokens": 13685579.0, "step": 83 }, { "entropy": 1.9553207556406658, "epoch": 0.009227980555326688, "grad_norm": 1.1953165531158447, "learning_rate": 3.03473491773309e-06, "loss": 1.5688, "mean_token_accuracy": 0.6186368266741434, "num_tokens": 13901601.0, "step": 84 }, { "entropy": 1.8914847175280254, "epoch": 0.009337837466699623, "grad_norm": 2.109435558319092, "learning_rate": 3.071297989031079e-06, "loss": 1.4397, "mean_token_accuracy": 0.639526754617691, "num_tokens": 14060730.0, "step": 85 }, { "entropy": 1.9378711183865864, "epoch": 0.00944769437807256, "grad_norm": 1.584813117980957, "learning_rate": 3.1078610603290678e-06, "loss": 1.6438, "mean_token_accuracy": 0.6247407471140226, "num_tokens": 14220171.0, "step": 86 }, { "entropy": 1.8821549117565155, "epoch": 0.009557551289445498, "grad_norm": 1.3728727102279663, "learning_rate": 3.1444241316270567e-06, "loss": 1.4598, "mean_token_accuracy": 0.6492441594600677, "num_tokens": 14426508.0, "step": 87 }, { "entropy": 1.945362647374471, "epoch": 0.009667408200818434, "grad_norm": 1.353712797164917, "learning_rate": 3.180987202925046e-06, "loss": 1.4469, "mean_token_accuracy": 0.6372426698605219, "num_tokens": 14586179.0, "step": 88 }, { "entropy": 2.001281092564265, "epoch": 0.009777265112191371, "grad_norm": 1.7841237783432007, "learning_rate": 3.2175502742230354e-06, "loss": 1.6689, "mean_token_accuracy": 0.6123541295528412, "num_tokens": 14753587.0, "step": 89 }, { "entropy": 1.8964291512966156, "epoch": 0.009887122023564307, "grad_norm": 1.8038142919540405, "learning_rate": 3.254113345521024e-06, "loss": 1.5155, "mean_token_accuracy": 0.6321656405925751, "num_tokens": 14897850.0, "step": 90 }, { "entropy": 1.9467082222302754, "epoch": 0.009996978934937244, "grad_norm": 1.504103422164917, "learning_rate": 3.290676416819013e-06, "loss": 1.472, "mean_token_accuracy": 0.638060932358106, "num_tokens": 15085294.0, "step": 91 }, { "entropy": 1.895034392674764, "epoch": 0.010106835846310181, "grad_norm": 1.6101082563400269, "learning_rate": 3.327239488117002e-06, "loss": 1.4261, "mean_token_accuracy": 0.6464575280745825, "num_tokens": 15240577.0, "step": 92 }, { "entropy": 1.9196984469890594, "epoch": 0.010216692757683117, "grad_norm": 2.471315383911133, "learning_rate": 3.363802559414991e-06, "loss": 1.4078, "mean_token_accuracy": 0.6479363540808359, "num_tokens": 15390254.0, "step": 93 }, { "entropy": 1.8948115607102711, "epoch": 0.010326549669056054, "grad_norm": 1.4012452363967896, "learning_rate": 3.40036563071298e-06, "loss": 1.5964, "mean_token_accuracy": 0.6217137028773626, "num_tokens": 15591903.0, "step": 94 }, { "entropy": 1.9357371429602306, "epoch": 0.010436406580428992, "grad_norm": 1.724313735961914, "learning_rate": 3.4369287020109693e-06, "loss": 1.4818, "mean_token_accuracy": 0.6213598748048147, "num_tokens": 15757784.0, "step": 95 }, { "entropy": 1.9804232120513916, "epoch": 0.010546263491801927, "grad_norm": 1.3886477947235107, "learning_rate": 3.4734917733089586e-06, "loss": 1.5319, "mean_token_accuracy": 0.625564381480217, "num_tokens": 15919103.0, "step": 96 }, { "entropy": 1.8893166085084279, "epoch": 0.010656120403174865, "grad_norm": 2.0127227306365967, "learning_rate": 3.510054844606947e-06, "loss": 1.4025, "mean_token_accuracy": 0.652299756805102, "num_tokens": 16065462.0, "step": 97 }, { "entropy": 2.0099782148996987, "epoch": 0.010765977314547802, "grad_norm": 1.174246072769165, "learning_rate": 3.5466179159049365e-06, "loss": 1.5865, "mean_token_accuracy": 0.6206269313891729, "num_tokens": 16230912.0, "step": 98 }, { "entropy": 1.9607905944188435, "epoch": 0.010875834225920738, "grad_norm": 1.2836897373199463, "learning_rate": 3.5831809872029254e-06, "loss": 1.5893, "mean_token_accuracy": 0.6317617396513621, "num_tokens": 16401514.0, "step": 99 }, { "entropy": 1.9421872198581696, "epoch": 0.010985691137293675, "grad_norm": 1.39162015914917, "learning_rate": 3.6197440585009143e-06, "loss": 1.5033, "mean_token_accuracy": 0.6282460689544678, "num_tokens": 16572187.0, "step": 100 }, { "entropy": 1.9639080266157787, "epoch": 0.011095548048666611, "grad_norm": 1.5374300479888916, "learning_rate": 3.6563071297989032e-06, "loss": 1.5919, "mean_token_accuracy": 0.6466963390509287, "num_tokens": 16722551.0, "step": 101 }, { "entropy": 1.951703170935313, "epoch": 0.011205404960039548, "grad_norm": 1.8960427045822144, "learning_rate": 3.6928702010968926e-06, "loss": 1.5075, "mean_token_accuracy": 0.6368091404438019, "num_tokens": 16898011.0, "step": 102 }, { "entropy": 1.9429233868916829, "epoch": 0.011315261871412486, "grad_norm": 1.956182599067688, "learning_rate": 3.729433272394882e-06, "loss": 1.5054, "mean_token_accuracy": 0.6246836185455322, "num_tokens": 17058898.0, "step": 103 }, { "entropy": 1.873151530822118, "epoch": 0.011425118782785421, "grad_norm": 2.225306510925293, "learning_rate": 3.7659963436928704e-06, "loss": 1.4609, "mean_token_accuracy": 0.6369840502738953, "num_tokens": 17181314.0, "step": 104 }, { "entropy": 2.0154978732268014, "epoch": 0.011534975694158359, "grad_norm": 1.3400015830993652, "learning_rate": 3.8025594149908597e-06, "loss": 1.5389, "mean_token_accuracy": 0.6156003673871359, "num_tokens": 17384388.0, "step": 105 }, { "entropy": 1.8415813446044922, "epoch": 0.011644832605531296, "grad_norm": 1.7141013145446777, "learning_rate": 3.839122486288849e-06, "loss": 1.4315, "mean_token_accuracy": 0.642757977048556, "num_tokens": 17534920.0, "step": 106 }, { "entropy": 1.8614780008792877, "epoch": 0.011754689516904232, "grad_norm": 0.8813568949699402, "learning_rate": 3.875685557586837e-06, "loss": 1.5428, "mean_token_accuracy": 0.6348659793535868, "num_tokens": 17788973.0, "step": 107 }, { "entropy": 1.9813534617424011, "epoch": 0.011864546428277169, "grad_norm": 1.8785918951034546, "learning_rate": 3.912248628884827e-06, "loss": 1.5586, "mean_token_accuracy": 0.6213805874188741, "num_tokens": 17924037.0, "step": 108 }, { "entropy": 1.981442888577779, "epoch": 0.011974403339650106, "grad_norm": 1.5279945135116577, "learning_rate": 3.948811700182816e-06, "loss": 1.5167, "mean_token_accuracy": 0.6179635375738144, "num_tokens": 18063221.0, "step": 109 }, { "entropy": 1.9768773019313812, "epoch": 0.012084260251023042, "grad_norm": 0.9247986078262329, "learning_rate": 3.985374771480805e-06, "loss": 1.5735, "mean_token_accuracy": 0.6179714898268381, "num_tokens": 18322095.0, "step": 110 }, { "entropy": 1.8607787589232128, "epoch": 0.01219411716239598, "grad_norm": 1.2437028884887695, "learning_rate": 4.021937842778794e-06, "loss": 1.4921, "mean_token_accuracy": 0.6286398619413376, "num_tokens": 18535525.0, "step": 111 }, { "entropy": 1.913169115781784, "epoch": 0.012303974073768917, "grad_norm": 1.9801437854766846, "learning_rate": 4.058500914076783e-06, "loss": 1.4637, "mean_token_accuracy": 0.6328051636616389, "num_tokens": 18696140.0, "step": 112 }, { "entropy": 2.0254646937052407, "epoch": 0.012413830985141852, "grad_norm": 1.2046679258346558, "learning_rate": 4.095063985374772e-06, "loss": 1.5517, "mean_token_accuracy": 0.6129643271366755, "num_tokens": 18877101.0, "step": 113 }, { "entropy": 1.8931627968947093, "epoch": 0.01252368789651479, "grad_norm": 1.4202874898910522, "learning_rate": 4.1316270566727604e-06, "loss": 1.5459, "mean_token_accuracy": 0.6342976987361908, "num_tokens": 19060103.0, "step": 114 }, { "entropy": 1.9346506992975872, "epoch": 0.012633544807887725, "grad_norm": 1.8455705642700195, "learning_rate": 4.16819012797075e-06, "loss": 1.4788, "mean_token_accuracy": 0.635664368669192, "num_tokens": 19193959.0, "step": 115 }, { "entropy": 1.891279657681783, "epoch": 0.012743401719260663, "grad_norm": 1.2223505973815918, "learning_rate": 4.204753199268739e-06, "loss": 1.581, "mean_token_accuracy": 0.6405478020509084, "num_tokens": 19369545.0, "step": 116 }, { "entropy": 1.856790542602539, "epoch": 0.0128532586306336, "grad_norm": 2.1477673053741455, "learning_rate": 4.241316270566728e-06, "loss": 1.5261, "mean_token_accuracy": 0.6329584916432699, "num_tokens": 19520434.0, "step": 117 }, { "entropy": 1.9323392510414124, "epoch": 0.012963115542006536, "grad_norm": 1.760062336921692, "learning_rate": 4.277879341864717e-06, "loss": 1.5352, "mean_token_accuracy": 0.6246002415815989, "num_tokens": 19706187.0, "step": 118 }, { "entropy": 1.792329490184784, "epoch": 0.013072972453379473, "grad_norm": 1.7655450105667114, "learning_rate": 4.314442413162706e-06, "loss": 1.5414, "mean_token_accuracy": 0.6422147999207178, "num_tokens": 19895609.0, "step": 119 }, { "entropy": 1.9673140943050385, "epoch": 0.01318282936475241, "grad_norm": 1.106261968612671, "learning_rate": 4.351005484460696e-06, "loss": 1.5714, "mean_token_accuracy": 0.6150963505109152, "num_tokens": 20129061.0, "step": 120 }, { "entropy": 1.9717350602149963, "epoch": 0.013292686276125346, "grad_norm": 1.8436874151229858, "learning_rate": 4.387568555758684e-06, "loss": 1.553, "mean_token_accuracy": 0.617359588543574, "num_tokens": 20323094.0, "step": 121 }, { "entropy": 1.9026523629824321, "epoch": 0.013402543187498284, "grad_norm": 1.9257417917251587, "learning_rate": 4.4241316270566735e-06, "loss": 1.6199, "mean_token_accuracy": 0.6336856335401535, "num_tokens": 20478148.0, "step": 122 }, { "entropy": 1.9043993254502614, "epoch": 0.013512400098871221, "grad_norm": 1.8216344118118286, "learning_rate": 4.460694698354662e-06, "loss": 1.4728, "mean_token_accuracy": 0.6363042940696081, "num_tokens": 20659428.0, "step": 123 }, { "entropy": 1.923029104868571, "epoch": 0.013622257010244157, "grad_norm": 1.6235153675079346, "learning_rate": 4.497257769652651e-06, "loss": 1.4689, "mean_token_accuracy": 0.6350155224402746, "num_tokens": 20848469.0, "step": 124 }, { "entropy": 1.9097663859526317, "epoch": 0.013732113921617094, "grad_norm": 2.066504955291748, "learning_rate": 4.53382084095064e-06, "loss": 1.6551, "mean_token_accuracy": 0.6251655717690786, "num_tokens": 20990401.0, "step": 125 }, { "entropy": 1.8578063448270161, "epoch": 0.01384197083299003, "grad_norm": 2.0339179039001465, "learning_rate": 4.570383912248629e-06, "loss": 1.4402, "mean_token_accuracy": 0.6496027906735738, "num_tokens": 21126262.0, "step": 126 }, { "entropy": 1.8716703255971272, "epoch": 0.013951827744362967, "grad_norm": 1.5795789957046509, "learning_rate": 4.606946983546619e-06, "loss": 1.4847, "mean_token_accuracy": 0.6409290333588918, "num_tokens": 21277528.0, "step": 127 }, { "entropy": 1.960438460111618, "epoch": 0.014061684655735904, "grad_norm": 1.5360902547836304, "learning_rate": 4.643510054844607e-06, "loss": 1.5797, "mean_token_accuracy": 0.6345019638538361, "num_tokens": 21444158.0, "step": 128 }, { "entropy": 1.940459320942561, "epoch": 0.01417154156710884, "grad_norm": 1.4799344539642334, "learning_rate": 4.680073126142597e-06, "loss": 1.6964, "mean_token_accuracy": 0.6091892321904501, "num_tokens": 21610345.0, "step": 129 }, { "entropy": 1.9162492255369823, "epoch": 0.014281398478481777, "grad_norm": 1.6671146154403687, "learning_rate": 4.716636197440586e-06, "loss": 1.6158, "mean_token_accuracy": 0.616663247346878, "num_tokens": 21774213.0, "step": 130 }, { "entropy": 1.9880765875180562, "epoch": 0.014391255389854715, "grad_norm": 2.272390604019165, "learning_rate": 4.753199268738575e-06, "loss": 1.646, "mean_token_accuracy": 0.6181524445613226, "num_tokens": 21905255.0, "step": 131 }, { "entropy": 1.8855270047982533, "epoch": 0.01450111230122765, "grad_norm": 1.2006255388259888, "learning_rate": 4.7897623400365635e-06, "loss": 1.6963, "mean_token_accuracy": 0.6048007061084112, "num_tokens": 22098818.0, "step": 132 }, { "entropy": 1.9694058795770009, "epoch": 0.014610969212600588, "grad_norm": 1.6814290285110474, "learning_rate": 4.826325411334552e-06, "loss": 1.5216, "mean_token_accuracy": 0.6235235830148061, "num_tokens": 22262213.0, "step": 133 }, { "entropy": 1.8912192583084106, "epoch": 0.014720826123973525, "grad_norm": 2.5680627822875977, "learning_rate": 4.862888482632542e-06, "loss": 1.4485, "mean_token_accuracy": 0.639568880200386, "num_tokens": 22415716.0, "step": 134 }, { "entropy": 1.8896643221378326, "epoch": 0.014830683035346461, "grad_norm": 2.191824436187744, "learning_rate": 4.89945155393053e-06, "loss": 1.6072, "mean_token_accuracy": 0.6254559010267258, "num_tokens": 22554208.0, "step": 135 }, { "entropy": 1.838125040133794, "epoch": 0.014940539946719398, "grad_norm": 2.2072277069091797, "learning_rate": 4.93601462522852e-06, "loss": 1.4288, "mean_token_accuracy": 0.6442883511384329, "num_tokens": 22686348.0, "step": 136 }, { "entropy": 1.9419346153736115, "epoch": 0.015050396858092334, "grad_norm": 1.4282070398330688, "learning_rate": 4.972577696526509e-06, "loss": 1.588, "mean_token_accuracy": 0.6240701427062353, "num_tokens": 22914244.0, "step": 137 }, { "entropy": 1.9297908544540405, "epoch": 0.015160253769465271, "grad_norm": 1.7204630374908447, "learning_rate": 5.009140767824498e-06, "loss": 1.53, "mean_token_accuracy": 0.6246543924013773, "num_tokens": 23064559.0, "step": 138 }, { "entropy": 1.89528426527977, "epoch": 0.015270110680838209, "grad_norm": 1.2303673028945923, "learning_rate": 5.045703839122487e-06, "loss": 1.559, "mean_token_accuracy": 0.6299286683400472, "num_tokens": 23247723.0, "step": 139 }, { "entropy": 1.964120090007782, "epoch": 0.015379967592211144, "grad_norm": 1.3174797296524048, "learning_rate": 5.082266910420476e-06, "loss": 1.578, "mean_token_accuracy": 0.6308384935061137, "num_tokens": 23430389.0, "step": 140 }, { "entropy": 1.9435697793960571, "epoch": 0.015489824503584082, "grad_norm": 1.4430499076843262, "learning_rate": 5.118829981718465e-06, "loss": 1.4911, "mean_token_accuracy": 0.6396051446596781, "num_tokens": 23586104.0, "step": 141 }, { "entropy": 1.9481945832570393, "epoch": 0.015599681414957019, "grad_norm": 1.983757734298706, "learning_rate": 5.155393053016454e-06, "loss": 1.5476, "mean_token_accuracy": 0.6408113439877828, "num_tokens": 23735997.0, "step": 142 }, { "entropy": 1.9852807819843292, "epoch": 0.015709538326329955, "grad_norm": 1.51121985912323, "learning_rate": 5.1919561243144424e-06, "loss": 1.7149, "mean_token_accuracy": 0.6295592884222666, "num_tokens": 23906403.0, "step": 143 }, { "entropy": 1.9033388594786327, "epoch": 0.015819395237702892, "grad_norm": 1.7496778964996338, "learning_rate": 5.228519195612431e-06, "loss": 1.5541, "mean_token_accuracy": 0.6278304755687714, "num_tokens": 24082125.0, "step": 144 }, { "entropy": 1.929619828859965, "epoch": 0.01592925214907583, "grad_norm": 1.250235676765442, "learning_rate": 5.26508226691042e-06, "loss": 1.5135, "mean_token_accuracy": 0.6288062930107117, "num_tokens": 24305628.0, "step": 145 }, { "entropy": 2.0030274192492166, "epoch": 0.016039109060448767, "grad_norm": 1.5068280696868896, "learning_rate": 5.30164533820841e-06, "loss": 1.4626, "mean_token_accuracy": 0.6244446535905203, "num_tokens": 24472766.0, "step": 146 }, { "entropy": 1.9357780913511913, "epoch": 0.0161489659718217, "grad_norm": 1.569165825843811, "learning_rate": 5.338208409506399e-06, "loss": 1.4038, "mean_token_accuracy": 0.6452462822198868, "num_tokens": 24617139.0, "step": 147 }, { "entropy": 1.9441987375418346, "epoch": 0.016258822883194638, "grad_norm": 1.4736301898956299, "learning_rate": 5.374771480804388e-06, "loss": 1.5742, "mean_token_accuracy": 0.6242514302333196, "num_tokens": 24825673.0, "step": 148 }, { "entropy": 1.9694486260414124, "epoch": 0.016368679794567575, "grad_norm": 1.5289890766143799, "learning_rate": 5.411334552102378e-06, "loss": 1.4772, "mean_token_accuracy": 0.6373162666956583, "num_tokens": 24991928.0, "step": 149 }, { "entropy": 1.886027862628301, "epoch": 0.016478536705940513, "grad_norm": 1.8170188665390015, "learning_rate": 5.447897623400366e-06, "loss": 1.5112, "mean_token_accuracy": 0.6386896967887878, "num_tokens": 25126815.0, "step": 150 }, { "entropy": 2.017523467540741, "epoch": 0.01658839361731345, "grad_norm": 1.759878158569336, "learning_rate": 5.484460694698355e-06, "loss": 1.7175, "mean_token_accuracy": 0.6125520120064417, "num_tokens": 25317846.0, "step": 151 }, { "entropy": 1.9565064509709675, "epoch": 0.016698250528686388, "grad_norm": 1.3674354553222656, "learning_rate": 5.5210237659963435e-06, "loss": 1.5125, "mean_token_accuracy": 0.6129873792330424, "num_tokens": 25517208.0, "step": 152 }, { "entropy": 1.8165283501148224, "epoch": 0.01680810744005932, "grad_norm": 1.8085861206054688, "learning_rate": 5.557586837294333e-06, "loss": 1.3189, "mean_token_accuracy": 0.6418418337901434, "num_tokens": 25710724.0, "step": 153 }, { "entropy": 1.9543904463450115, "epoch": 0.01691796435143226, "grad_norm": 2.4002819061279297, "learning_rate": 5.594149908592322e-06, "loss": 1.5342, "mean_token_accuracy": 0.625735859076182, "num_tokens": 25863294.0, "step": 154 }, { "entropy": 1.9318216741085052, "epoch": 0.017027821262805196, "grad_norm": 1.7552255392074585, "learning_rate": 5.630712979890311e-06, "loss": 1.4478, "mean_token_accuracy": 0.6387412895758947, "num_tokens": 25995035.0, "step": 155 }, { "entropy": 1.843320260445277, "epoch": 0.017137678174178134, "grad_norm": 1.9175851345062256, "learning_rate": 5.667276051188301e-06, "loss": 1.4263, "mean_token_accuracy": 0.6444782565037409, "num_tokens": 26181344.0, "step": 156 }, { "entropy": 1.9701976378758748, "epoch": 0.01724753508555107, "grad_norm": 1.541599988937378, "learning_rate": 5.703839122486289e-06, "loss": 1.5272, "mean_token_accuracy": 0.642794132232666, "num_tokens": 26359706.0, "step": 157 }, { "entropy": 1.9773990114529927, "epoch": 0.017357391996924005, "grad_norm": 2.1367106437683105, "learning_rate": 5.740402193784278e-06, "loss": 1.5112, "mean_token_accuracy": 0.636732429265976, "num_tokens": 26479942.0, "step": 158 }, { "entropy": 1.9307551781336467, "epoch": 0.017467248908296942, "grad_norm": 2.5110087394714355, "learning_rate": 5.776965265082267e-06, "loss": 1.4965, "mean_token_accuracy": 0.628907784819603, "num_tokens": 26595408.0, "step": 159 }, { "entropy": 1.8777441680431366, "epoch": 0.01757710581966988, "grad_norm": 1.30020272731781, "learning_rate": 5.813528336380257e-06, "loss": 1.5253, "mean_token_accuracy": 0.6290498375892639, "num_tokens": 26817903.0, "step": 160 }, { "entropy": 1.9326928953329723, "epoch": 0.017686962731042817, "grad_norm": 1.6048803329467773, "learning_rate": 5.8500914076782455e-06, "loss": 1.5385, "mean_token_accuracy": 0.632832944393158, "num_tokens": 26960043.0, "step": 161 }, { "entropy": 2.001479814449946, "epoch": 0.017796819642415754, "grad_norm": 1.7055351734161377, "learning_rate": 5.886654478976234e-06, "loss": 1.4679, "mean_token_accuracy": 0.6328243414560953, "num_tokens": 27105562.0, "step": 162 }, { "entropy": 1.9939979513486226, "epoch": 0.017906676553788692, "grad_norm": 1.595533847808838, "learning_rate": 5.923217550274224e-06, "loss": 1.5478, "mean_token_accuracy": 0.6230746358633041, "num_tokens": 27254524.0, "step": 163 }, { "entropy": 1.9124678770701091, "epoch": 0.018016533465161626, "grad_norm": 1.401327133178711, "learning_rate": 5.959780621572212e-06, "loss": 1.6268, "mean_token_accuracy": 0.6118254562218984, "num_tokens": 27451743.0, "step": 164 }, { "entropy": 1.8883071442445118, "epoch": 0.018126390376534563, "grad_norm": 1.3325417041778564, "learning_rate": 5.996343692870201e-06, "loss": 1.5537, "mean_token_accuracy": 0.6336751828591028, "num_tokens": 27609520.0, "step": 165 }, { "entropy": 1.8839130004247029, "epoch": 0.0182362472879075, "grad_norm": 1.713529348373413, "learning_rate": 6.03290676416819e-06, "loss": 1.529, "mean_token_accuracy": 0.6286770900090536, "num_tokens": 27781402.0, "step": 166 }, { "entropy": 2.000154842933019, "epoch": 0.018346104199280438, "grad_norm": 1.4660075902938843, "learning_rate": 6.06946983546618e-06, "loss": 1.6884, "mean_token_accuracy": 0.6122722874085108, "num_tokens": 27947872.0, "step": 167 }, { "entropy": 1.9248465200265248, "epoch": 0.018455961110653375, "grad_norm": 1.3339463472366333, "learning_rate": 6.106032906764169e-06, "loss": 1.4863, "mean_token_accuracy": 0.6232618043820063, "num_tokens": 28170975.0, "step": 168 }, { "entropy": 1.895066907008489, "epoch": 0.01856581802202631, "grad_norm": 0.8725059032440186, "learning_rate": 6.142595978062158e-06, "loss": 1.5651, "mean_token_accuracy": 0.6305927385886511, "num_tokens": 28384244.0, "step": 169 }, { "entropy": 1.908642550309499, "epoch": 0.018675674933399247, "grad_norm": 1.8071403503417969, "learning_rate": 6.1791590493601475e-06, "loss": 1.4144, "mean_token_accuracy": 0.6478618135054907, "num_tokens": 28540304.0, "step": 170 }, { "entropy": 1.8365615010261536, "epoch": 0.018785531844772184, "grad_norm": 1.5986486673355103, "learning_rate": 6.2157221206581355e-06, "loss": 1.4964, "mean_token_accuracy": 0.6445286770661672, "num_tokens": 28693212.0, "step": 171 }, { "entropy": 1.8890428642431896, "epoch": 0.01889538875614512, "grad_norm": 1.403101921081543, "learning_rate": 6.2522851919561244e-06, "loss": 1.4468, "mean_token_accuracy": 0.6386788686116537, "num_tokens": 28850662.0, "step": 172 }, { "entropy": 1.9007167915503185, "epoch": 0.01900524566751806, "grad_norm": 1.734114170074463, "learning_rate": 6.288848263254113e-06, "loss": 1.4575, "mean_token_accuracy": 0.6430951108535131, "num_tokens": 29008183.0, "step": 173 }, { "entropy": 1.9034869869550068, "epoch": 0.019115102578890996, "grad_norm": 1.3347978591918945, "learning_rate": 6.325411334552103e-06, "loss": 1.4581, "mean_token_accuracy": 0.636214479804039, "num_tokens": 29178708.0, "step": 174 }, { "entropy": 1.9231548706690471, "epoch": 0.01922495949026393, "grad_norm": 0.9329301714897156, "learning_rate": 6.361974405850092e-06, "loss": 1.433, "mean_token_accuracy": 0.6341615468263626, "num_tokens": 29404955.0, "step": 175 }, { "entropy": 1.9202434321244557, "epoch": 0.019334816401636867, "grad_norm": 1.496565341949463, "learning_rate": 6.398537477148081e-06, "loss": 1.4469, "mean_token_accuracy": 0.6378689457972845, "num_tokens": 29591016.0, "step": 176 }, { "entropy": 1.9373224675655365, "epoch": 0.019444673313009805, "grad_norm": 1.8180593252182007, "learning_rate": 6.435100548446071e-06, "loss": 1.4546, "mean_token_accuracy": 0.6288415739933649, "num_tokens": 29730607.0, "step": 177 }, { "entropy": 1.8911834458510082, "epoch": 0.019554530224382742, "grad_norm": 1.377733588218689, "learning_rate": 6.471663619744059e-06, "loss": 1.4094, "mean_token_accuracy": 0.6404121816158295, "num_tokens": 29945024.0, "step": 178 }, { "entropy": 1.9313226739565532, "epoch": 0.01966438713575568, "grad_norm": 1.3180339336395264, "learning_rate": 6.508226691042048e-06, "loss": 1.4791, "mean_token_accuracy": 0.6364747583866119, "num_tokens": 30130011.0, "step": 179 }, { "entropy": 1.8546662032604218, "epoch": 0.019774244047128613, "grad_norm": 0.9060352444648743, "learning_rate": 6.544789762340037e-06, "loss": 1.5613, "mean_token_accuracy": 0.6342605948448181, "num_tokens": 30414308.0, "step": 180 }, { "entropy": 1.9606173137823741, "epoch": 0.01988410095850155, "grad_norm": 1.4632275104522705, "learning_rate": 6.581352833638026e-06, "loss": 1.5579, "mean_token_accuracy": 0.618272011478742, "num_tokens": 30618377.0, "step": 181 }, { "entropy": 1.8522493441899617, "epoch": 0.019993957869874488, "grad_norm": 1.4936124086380005, "learning_rate": 6.617915904936015e-06, "loss": 1.4107, "mean_token_accuracy": 0.6411967029174169, "num_tokens": 30836318.0, "step": 182 }, { "entropy": 2.0339955588181815, "epoch": 0.020103814781247425, "grad_norm": 1.1835054159164429, "learning_rate": 6.654478976234004e-06, "loss": 1.5639, "mean_token_accuracy": 0.6246629556020101, "num_tokens": 31012714.0, "step": 183 }, { "entropy": 1.972231497367223, "epoch": 0.020213671692620363, "grad_norm": 2.2008109092712402, "learning_rate": 6.691042047531994e-06, "loss": 1.4934, "mean_token_accuracy": 0.6375860174496969, "num_tokens": 31125259.0, "step": 184 }, { "entropy": 1.9959478378295898, "epoch": 0.0203235286039933, "grad_norm": 1.3271251916885376, "learning_rate": 6.727605118829982e-06, "loss": 1.5039, "mean_token_accuracy": 0.6275109102328619, "num_tokens": 31303032.0, "step": 185 }, { "entropy": 1.9124859770139058, "epoch": 0.020433385515366234, "grad_norm": 1.273916244506836, "learning_rate": 6.764168190127971e-06, "loss": 1.5072, "mean_token_accuracy": 0.6474013924598694, "num_tokens": 31472688.0, "step": 186 }, { "entropy": 1.94366854429245, "epoch": 0.02054324242673917, "grad_norm": 1.5350209474563599, "learning_rate": 6.80073126142596e-06, "loss": 1.5753, "mean_token_accuracy": 0.6296733965476354, "num_tokens": 31603992.0, "step": 187 }, { "entropy": 1.830802450577418, "epoch": 0.02065309933811211, "grad_norm": 1.5503315925598145, "learning_rate": 6.83729433272395e-06, "loss": 1.5068, "mean_token_accuracy": 0.6352782646814982, "num_tokens": 31754236.0, "step": 188 }, { "entropy": 1.961773047844569, "epoch": 0.020762956249485046, "grad_norm": 1.3327734470367432, "learning_rate": 6.873857404021939e-06, "loss": 1.6572, "mean_token_accuracy": 0.6188698361317316, "num_tokens": 31951147.0, "step": 189 }, { "entropy": 1.9484667479991913, "epoch": 0.020872813160857984, "grad_norm": 1.9495567083358765, "learning_rate": 6.9104204753199275e-06, "loss": 1.4939, "mean_token_accuracy": 0.6419304311275482, "num_tokens": 32069372.0, "step": 190 }, { "entropy": 1.952804942925771, "epoch": 0.020982670072230918, "grad_norm": 2.0952329635620117, "learning_rate": 6.946983546617917e-06, "loss": 1.466, "mean_token_accuracy": 0.6325685183207194, "num_tokens": 32247672.0, "step": 191 }, { "entropy": 1.8967472811539967, "epoch": 0.021092526983603855, "grad_norm": 1.9613308906555176, "learning_rate": 6.983546617915905e-06, "loss": 1.4358, "mean_token_accuracy": 0.6374901284774145, "num_tokens": 32400081.0, "step": 192 }, { "entropy": 1.8581411341826122, "epoch": 0.021202383894976792, "grad_norm": 1.4401475191116333, "learning_rate": 7.020109689213894e-06, "loss": 1.4406, "mean_token_accuracy": 0.6475908011198044, "num_tokens": 32612337.0, "step": 193 }, { "entropy": 1.9438115656375885, "epoch": 0.02131224080634973, "grad_norm": 1.6511321067810059, "learning_rate": 7.056672760511883e-06, "loss": 1.569, "mean_token_accuracy": 0.6184766987959543, "num_tokens": 32757705.0, "step": 194 }, { "entropy": 1.8474779923756917, "epoch": 0.021422097717722667, "grad_norm": 1.7577476501464844, "learning_rate": 7.093235831809873e-06, "loss": 1.4082, "mean_token_accuracy": 0.6572687774896622, "num_tokens": 32909533.0, "step": 195 }, { "entropy": 1.9480952123800914, "epoch": 0.021531954629095604, "grad_norm": 1.2074416875839233, "learning_rate": 7.129798903107862e-06, "loss": 1.4497, "mean_token_accuracy": 0.6373476584752401, "num_tokens": 33091590.0, "step": 196 }, { "entropy": 1.8216406504313152, "epoch": 0.02164181154046854, "grad_norm": 1.491611361503601, "learning_rate": 7.166361974405851e-06, "loss": 1.419, "mean_token_accuracy": 0.6391265342632929, "num_tokens": 33261986.0, "step": 197 }, { "entropy": 1.9061803619066875, "epoch": 0.021751668451841476, "grad_norm": 1.422453761100769, "learning_rate": 7.2029250457038405e-06, "loss": 1.5071, "mean_token_accuracy": 0.6227595210075378, "num_tokens": 33436559.0, "step": 198 }, { "entropy": 1.9401950438817341, "epoch": 0.021861525363214413, "grad_norm": 1.4699177742004395, "learning_rate": 7.239488117001829e-06, "loss": 1.5314, "mean_token_accuracy": 0.6238148510456085, "num_tokens": 33610708.0, "step": 199 }, { "entropy": 1.9451914032300313, "epoch": 0.02197138227458735, "grad_norm": 1.5435770750045776, "learning_rate": 7.2760511882998175e-06, "loss": 1.7341, "mean_token_accuracy": 0.6216254606842995, "num_tokens": 33789223.0, "step": 200 }, { "entropy": 1.9354910055796306, "epoch": 0.022081239185960288, "grad_norm": 1.6288329362869263, "learning_rate": 7.3126142595978065e-06, "loss": 1.4906, "mean_token_accuracy": 0.6289151956637701, "num_tokens": 33926328.0, "step": 201 }, { "entropy": 1.920799712340037, "epoch": 0.022191096097333222, "grad_norm": 2.159276247024536, "learning_rate": 7.349177330895796e-06, "loss": 1.4639, "mean_token_accuracy": 0.6422193894783655, "num_tokens": 34086922.0, "step": 202 }, { "entropy": 1.8773599565029144, "epoch": 0.02230095300870616, "grad_norm": 1.5784872770309448, "learning_rate": 7.385740402193785e-06, "loss": 1.4203, "mean_token_accuracy": 0.6452515174945196, "num_tokens": 34234623.0, "step": 203 }, { "entropy": 1.9058987200260162, "epoch": 0.022410809920079097, "grad_norm": 1.9286620616912842, "learning_rate": 7.422303473491774e-06, "loss": 1.4685, "mean_token_accuracy": 0.6433494488398234, "num_tokens": 34403187.0, "step": 204 }, { "entropy": 1.9196257293224335, "epoch": 0.022520666831452034, "grad_norm": 1.517842411994934, "learning_rate": 7.458866544789764e-06, "loss": 1.4634, "mean_token_accuracy": 0.6412399162848791, "num_tokens": 34547042.0, "step": 205 }, { "entropy": 1.9245548446973164, "epoch": 0.02263052374282497, "grad_norm": 1.4361844062805176, "learning_rate": 7.495429616087752e-06, "loss": 1.5076, "mean_token_accuracy": 0.6369704306125641, "num_tokens": 34719845.0, "step": 206 }, { "entropy": 1.9506233930587769, "epoch": 0.02274038065419791, "grad_norm": 2.3668041229248047, "learning_rate": 7.531992687385741e-06, "loss": 1.4556, "mean_token_accuracy": 0.6397630920012792, "num_tokens": 34826986.0, "step": 207 }, { "entropy": 1.915660818417867, "epoch": 0.022850237565570843, "grad_norm": 1.7417556047439575, "learning_rate": 7.56855575868373e-06, "loss": 1.4855, "mean_token_accuracy": 0.628084714214007, "num_tokens": 34995629.0, "step": 208 }, { "entropy": 1.9720592200756073, "epoch": 0.02296009447694378, "grad_norm": 1.7598878145217896, "learning_rate": 7.6051188299817195e-06, "loss": 1.6025, "mean_token_accuracy": 0.6329106787840525, "num_tokens": 35109490.0, "step": 209 }, { "entropy": 1.9277808268864949, "epoch": 0.023069951388316717, "grad_norm": 1.1871633529663086, "learning_rate": 7.641681901279708e-06, "loss": 1.5092, "mean_token_accuracy": 0.6277731756369272, "num_tokens": 35273576.0, "step": 210 }, { "entropy": 1.9870645701885223, "epoch": 0.023179808299689655, "grad_norm": 1.901222825050354, "learning_rate": 7.678244972577698e-06, "loss": 1.6778, "mean_token_accuracy": 0.6138087809085846, "num_tokens": 35420120.0, "step": 211 }, { "entropy": 1.8992568055788677, "epoch": 0.023289665211062592, "grad_norm": 1.1016038656234741, "learning_rate": 7.714808043875686e-06, "loss": 1.4592, "mean_token_accuracy": 0.6364717036485672, "num_tokens": 35602325.0, "step": 212 }, { "entropy": 1.888885885477066, "epoch": 0.02339952212243553, "grad_norm": 2.0824167728424072, "learning_rate": 7.751371115173674e-06, "loss": 1.5275, "mean_token_accuracy": 0.6334254344304403, "num_tokens": 35733356.0, "step": 213 }, { "entropy": 1.982912798722585, "epoch": 0.023509379033808463, "grad_norm": 1.8428221940994263, "learning_rate": 7.787934186471664e-06, "loss": 1.6603, "mean_token_accuracy": 0.6229775846004486, "num_tokens": 35880300.0, "step": 214 }, { "entropy": 1.9338585535685222, "epoch": 0.0236192359451814, "grad_norm": 1.234574794769287, "learning_rate": 7.824497257769654e-06, "loss": 1.5378, "mean_token_accuracy": 0.6240969995657603, "num_tokens": 36069653.0, "step": 215 }, { "entropy": 1.9078516761461894, "epoch": 0.023729092856554338, "grad_norm": 1.3974380493164062, "learning_rate": 7.861060329067642e-06, "loss": 1.4083, "mean_token_accuracy": 0.6485424588123957, "num_tokens": 36230390.0, "step": 216 }, { "entropy": 1.9500950674215953, "epoch": 0.023838949767927275, "grad_norm": 1.337957501411438, "learning_rate": 7.897623400365632e-06, "loss": 1.6635, "mean_token_accuracy": 0.6214189380407333, "num_tokens": 36418461.0, "step": 217 }, { "entropy": 1.8814655443032582, "epoch": 0.023948806679300213, "grad_norm": 0.930263876914978, "learning_rate": 7.934186471663621e-06, "loss": 1.4714, "mean_token_accuracy": 0.6292876054843267, "num_tokens": 36666565.0, "step": 218 }, { "entropy": 1.9561232924461365, "epoch": 0.024058663590673147, "grad_norm": 1.3758008480072021, "learning_rate": 7.97074954296161e-06, "loss": 1.4744, "mean_token_accuracy": 0.6297362099091212, "num_tokens": 36831778.0, "step": 219 }, { "entropy": 1.9421695868174236, "epoch": 0.024168520502046084, "grad_norm": 1.9885149002075195, "learning_rate": 8.007312614259598e-06, "loss": 1.4071, "mean_token_accuracy": 0.6410440603892008, "num_tokens": 36984298.0, "step": 220 }, { "entropy": 1.9113211333751678, "epoch": 0.02427837741341902, "grad_norm": 1.6628094911575317, "learning_rate": 8.043875685557587e-06, "loss": 1.3944, "mean_token_accuracy": 0.6423366914192835, "num_tokens": 37123033.0, "step": 221 }, { "entropy": 1.8789688448111217, "epoch": 0.02438823432479196, "grad_norm": 1.8397406339645386, "learning_rate": 8.080438756855577e-06, "loss": 1.3697, "mean_token_accuracy": 0.6559399515390396, "num_tokens": 37276948.0, "step": 222 }, { "entropy": 1.9408772091070812, "epoch": 0.024498091236164896, "grad_norm": 1.2659918069839478, "learning_rate": 8.117001828153565e-06, "loss": 1.6394, "mean_token_accuracy": 0.6148115148146948, "num_tokens": 37493584.0, "step": 223 }, { "entropy": 1.9383413990338643, "epoch": 0.024607948147537834, "grad_norm": 2.0002858638763428, "learning_rate": 8.153564899451555e-06, "loss": 1.4873, "mean_token_accuracy": 0.6363619416952133, "num_tokens": 37625713.0, "step": 224 }, { "entropy": 1.8870685597260792, "epoch": 0.024717805058910768, "grad_norm": 1.661469578742981, "learning_rate": 8.190127970749545e-06, "loss": 1.5554, "mean_token_accuracy": 0.6477372944355011, "num_tokens": 37796925.0, "step": 225 }, { "entropy": 1.9707025090853374, "epoch": 0.024827661970283705, "grad_norm": 1.3756967782974243, "learning_rate": 8.226691042047533e-06, "loss": 1.6058, "mean_token_accuracy": 0.6214409867922465, "num_tokens": 37958284.0, "step": 226 }, { "entropy": 1.9486571947733562, "epoch": 0.024937518881656642, "grad_norm": 1.891913652420044, "learning_rate": 8.263254113345521e-06, "loss": 1.548, "mean_token_accuracy": 0.6239824940760931, "num_tokens": 38121912.0, "step": 227 }, { "entropy": 1.9066686630249023, "epoch": 0.02504737579302958, "grad_norm": 1.301985263824463, "learning_rate": 8.29981718464351e-06, "loss": 1.4455, "mean_token_accuracy": 0.6375877112150192, "num_tokens": 38293286.0, "step": 228 }, { "entropy": 1.9013386964797974, "epoch": 0.025157232704402517, "grad_norm": 1.0296814441680908, "learning_rate": 8.3363802559415e-06, "loss": 1.5593, "mean_token_accuracy": 0.6204620003700256, "num_tokens": 38493428.0, "step": 229 }, { "entropy": 1.889691025018692, "epoch": 0.02526708961577545, "grad_norm": 1.2949299812316895, "learning_rate": 8.372943327239488e-06, "loss": 1.4609, "mean_token_accuracy": 0.6394095073143641, "num_tokens": 38689047.0, "step": 230 }, { "entropy": 1.9693353275458019, "epoch": 0.02537694652714839, "grad_norm": 1.41304349899292, "learning_rate": 8.409506398537478e-06, "loss": 1.6868, "mean_token_accuracy": 0.6182350367307663, "num_tokens": 38874286.0, "step": 231 }, { "entropy": 1.9255563914775848, "epoch": 0.025486803438521326, "grad_norm": 2.2932870388031006, "learning_rate": 8.446069469835468e-06, "loss": 1.4642, "mean_token_accuracy": 0.6350040584802628, "num_tokens": 39041995.0, "step": 232 }, { "entropy": 1.8631162444750469, "epoch": 0.025596660349894263, "grad_norm": 1.4570585489273071, "learning_rate": 8.482632541133456e-06, "loss": 1.4474, "mean_token_accuracy": 0.6374172319968542, "num_tokens": 39247147.0, "step": 233 }, { "entropy": 1.9238406717777252, "epoch": 0.0257065172612672, "grad_norm": 0.969900369644165, "learning_rate": 8.519195612431444e-06, "loss": 1.5364, "mean_token_accuracy": 0.6210780193408331, "num_tokens": 39442503.0, "step": 234 }, { "entropy": 1.830154150724411, "epoch": 0.025816374172640138, "grad_norm": 1.632808804512024, "learning_rate": 8.555758683729434e-06, "loss": 1.3361, "mean_token_accuracy": 0.6548681904872259, "num_tokens": 39608656.0, "step": 235 }, { "entropy": 1.8890958329041798, "epoch": 0.025926231084013072, "grad_norm": 1.6013661623001099, "learning_rate": 8.592321755027424e-06, "loss": 1.4588, "mean_token_accuracy": 0.6372386415799459, "num_tokens": 39745398.0, "step": 236 }, { "entropy": 1.9264869689941406, "epoch": 0.02603608799538601, "grad_norm": 1.3071633577346802, "learning_rate": 8.628884826325412e-06, "loss": 1.5205, "mean_token_accuracy": 0.6315742234388987, "num_tokens": 39932055.0, "step": 237 }, { "entropy": 1.8503678143024445, "epoch": 0.026145944906758947, "grad_norm": 1.1339292526245117, "learning_rate": 8.665447897623402e-06, "loss": 1.3504, "mean_token_accuracy": 0.647294615705808, "num_tokens": 40125022.0, "step": 238 }, { "entropy": 1.9106312990188599, "epoch": 0.026255801818131884, "grad_norm": 1.3184049129486084, "learning_rate": 8.702010968921391e-06, "loss": 1.5338, "mean_token_accuracy": 0.6321128904819489, "num_tokens": 40314336.0, "step": 239 }, { "entropy": 1.8953208327293396, "epoch": 0.02636565872950482, "grad_norm": 1.3068339824676514, "learning_rate": 8.73857404021938e-06, "loss": 1.5125, "mean_token_accuracy": 0.6311918099721273, "num_tokens": 40500962.0, "step": 240 }, { "entropy": 1.8495961129665375, "epoch": 0.026475515640877755, "grad_norm": 1.4714800119400024, "learning_rate": 8.775137111517367e-06, "loss": 1.4861, "mean_token_accuracy": 0.6417611440022787, "num_tokens": 40684363.0, "step": 241 }, { "entropy": 1.9483478566010792, "epoch": 0.026585372552250693, "grad_norm": 1.7499563694000244, "learning_rate": 8.811700182815357e-06, "loss": 1.513, "mean_token_accuracy": 0.6161421338717142, "num_tokens": 40854917.0, "step": 242 }, { "entropy": 1.9359776973724365, "epoch": 0.02669522946362363, "grad_norm": 1.2601664066314697, "learning_rate": 8.848263254113347e-06, "loss": 1.5529, "mean_token_accuracy": 0.614978551864624, "num_tokens": 41041982.0, "step": 243 }, { "entropy": 1.9827852447827656, "epoch": 0.026805086374996567, "grad_norm": 1.3667547702789307, "learning_rate": 8.884826325411335e-06, "loss": 1.5346, "mean_token_accuracy": 0.6175629794597626, "num_tokens": 41252018.0, "step": 244 }, { "entropy": 1.9970279932022095, "epoch": 0.026914943286369505, "grad_norm": 4.996973037719727, "learning_rate": 8.921389396709325e-06, "loss": 1.4708, "mean_token_accuracy": 0.6463018904129664, "num_tokens": 41388067.0, "step": 245 }, { "entropy": 1.95156333843867, "epoch": 0.027024800197742442, "grad_norm": 1.5964291095733643, "learning_rate": 8.957952468007315e-06, "loss": 1.633, "mean_token_accuracy": 0.611007904012998, "num_tokens": 41521865.0, "step": 246 }, { "entropy": 1.8909566402435303, "epoch": 0.027134657109115376, "grad_norm": 1.72969388961792, "learning_rate": 8.994515539305303e-06, "loss": 1.487, "mean_token_accuracy": 0.6293840358654658, "num_tokens": 41653324.0, "step": 247 }, { "entropy": 1.8896038234233856, "epoch": 0.027244514020488313, "grad_norm": 1.3360822200775146, "learning_rate": 9.03107861060329e-06, "loss": 1.474, "mean_token_accuracy": 0.6422435492277145, "num_tokens": 41849809.0, "step": 248 }, { "entropy": 1.9821850061416626, "epoch": 0.02735437093186125, "grad_norm": 1.6421992778778076, "learning_rate": 9.06764168190128e-06, "loss": 1.451, "mean_token_accuracy": 0.6302864154179891, "num_tokens": 41996056.0, "step": 249 }, { "entropy": 1.8893661499023438, "epoch": 0.027464227843234188, "grad_norm": 1.9778544902801514, "learning_rate": 9.10420475319927e-06, "loss": 1.4556, "mean_token_accuracy": 0.6386625617742538, "num_tokens": 42136282.0, "step": 250 }, { "entropy": 1.964593380689621, "epoch": 0.027574084754607125, "grad_norm": 1.21237313747406, "learning_rate": 9.140767824497258e-06, "loss": 1.5264, "mean_token_accuracy": 0.6220163901646932, "num_tokens": 42287103.0, "step": 251 }, { "entropy": 1.8926392396291096, "epoch": 0.02768394166598006, "grad_norm": 1.2302354574203491, "learning_rate": 9.177330895795248e-06, "loss": 1.4971, "mean_token_accuracy": 0.6313800662755966, "num_tokens": 42437027.0, "step": 252 }, { "entropy": 1.8762815594673157, "epoch": 0.027793798577352997, "grad_norm": 1.0389925241470337, "learning_rate": 9.213893967093238e-06, "loss": 1.5259, "mean_token_accuracy": 0.6269082774718603, "num_tokens": 42628984.0, "step": 253 }, { "entropy": 1.861397624015808, "epoch": 0.027903655488725934, "grad_norm": 1.1108849048614502, "learning_rate": 9.250457038391226e-06, "loss": 1.6276, "mean_token_accuracy": 0.6273967996239662, "num_tokens": 42804896.0, "step": 254 }, { "entropy": 1.9415236016114552, "epoch": 0.02801351240009887, "grad_norm": 1.8820470571517944, "learning_rate": 9.287020109689214e-06, "loss": 1.579, "mean_token_accuracy": 0.6264342914024988, "num_tokens": 42935360.0, "step": 255 }, { "entropy": 1.9186277190844219, "epoch": 0.02812336931147181, "grad_norm": 1.1692508459091187, "learning_rate": 9.323583180987204e-06, "loss": 1.4381, "mean_token_accuracy": 0.6264500568310419, "num_tokens": 43136428.0, "step": 256 }, { "entropy": 1.9373709559440613, "epoch": 0.028233226222844746, "grad_norm": 1.8746132850646973, "learning_rate": 9.360146252285193e-06, "loss": 1.4601, "mean_token_accuracy": 0.6457020888725916, "num_tokens": 43287978.0, "step": 257 }, { "entropy": 1.9027895232041676, "epoch": 0.02834308313421768, "grad_norm": 1.2509558200836182, "learning_rate": 9.396709323583182e-06, "loss": 1.4337, "mean_token_accuracy": 0.6443447520335516, "num_tokens": 43436841.0, "step": 258 }, { "entropy": 1.863025466601054, "epoch": 0.028452940045590618, "grad_norm": 1.5175418853759766, "learning_rate": 9.433272394881171e-06, "loss": 1.3925, "mean_token_accuracy": 0.65195099512736, "num_tokens": 43575151.0, "step": 259 }, { "entropy": 1.9013051688671112, "epoch": 0.028562796956963555, "grad_norm": 1.6341294050216675, "learning_rate": 9.469835466179161e-06, "loss": 1.5872, "mean_token_accuracy": 0.6424743135770162, "num_tokens": 43716089.0, "step": 260 }, { "entropy": 1.9514261881510417, "epoch": 0.028672653868336492, "grad_norm": 1.6734215021133423, "learning_rate": 9.50639853747715e-06, "loss": 1.5115, "mean_token_accuracy": 0.6237670431534449, "num_tokens": 43853512.0, "step": 261 }, { "entropy": 1.9595048030217488, "epoch": 0.02878251077970943, "grad_norm": 2.273057460784912, "learning_rate": 9.542961608775137e-06, "loss": 1.45, "mean_token_accuracy": 0.639571433266004, "num_tokens": 43964136.0, "step": 262 }, { "entropy": 1.86832395195961, "epoch": 0.028892367691082364, "grad_norm": 1.6332321166992188, "learning_rate": 9.579524680073127e-06, "loss": 1.481, "mean_token_accuracy": 0.6361501961946487, "num_tokens": 44106187.0, "step": 263 }, { "entropy": 1.8855204284191132, "epoch": 0.0290022246024553, "grad_norm": 1.0685125589370728, "learning_rate": 9.616087751371117e-06, "loss": 1.4531, "mean_token_accuracy": 0.6394118815660477, "num_tokens": 44263324.0, "step": 264 }, { "entropy": 1.9358879923820496, "epoch": 0.02911208151382824, "grad_norm": 1.6074949502944946, "learning_rate": 9.652650822669105e-06, "loss": 1.4889, "mean_token_accuracy": 0.643065462509791, "num_tokens": 44383088.0, "step": 265 }, { "entropy": 1.893241822719574, "epoch": 0.029221938425201176, "grad_norm": 1.4432519674301147, "learning_rate": 9.689213893967095e-06, "loss": 1.3719, "mean_token_accuracy": 0.6543690661589304, "num_tokens": 44514573.0, "step": 266 }, { "entropy": 1.9079320927460988, "epoch": 0.029331795336574113, "grad_norm": 1.385563611984253, "learning_rate": 9.725776965265084e-06, "loss": 1.4954, "mean_token_accuracy": 0.6275987525780996, "num_tokens": 44664023.0, "step": 267 }, { "entropy": 1.9555991490681965, "epoch": 0.02944165224794705, "grad_norm": 1.320134162902832, "learning_rate": 9.762340036563072e-06, "loss": 1.528, "mean_token_accuracy": 0.6275275399287542, "num_tokens": 44806322.0, "step": 268 }, { "entropy": 1.9195038080215454, "epoch": 0.029551509159319984, "grad_norm": 1.5830590724945068, "learning_rate": 9.79890310786106e-06, "loss": 1.4392, "mean_token_accuracy": 0.6394519209861755, "num_tokens": 44943371.0, "step": 269 }, { "entropy": 1.9248672624429066, "epoch": 0.029661366070692922, "grad_norm": 1.2923870086669922, "learning_rate": 9.83546617915905e-06, "loss": 1.5068, "mean_token_accuracy": 0.6360269586245219, "num_tokens": 45101406.0, "step": 270 }, { "entropy": 1.9413983821868896, "epoch": 0.02977122298206586, "grad_norm": 1.126284122467041, "learning_rate": 9.87202925045704e-06, "loss": 1.5993, "mean_token_accuracy": 0.6119322826464971, "num_tokens": 45359982.0, "step": 271 }, { "entropy": 1.9256538450717926, "epoch": 0.029881079893438797, "grad_norm": 1.5768324136734009, "learning_rate": 9.908592321755028e-06, "loss": 1.5289, "mean_token_accuracy": 0.6280013422171274, "num_tokens": 45489978.0, "step": 272 }, { "entropy": 1.9650197923183441, "epoch": 0.029990936804811734, "grad_norm": 2.4262402057647705, "learning_rate": 9.945155393053018e-06, "loss": 1.4462, "mean_token_accuracy": 0.6426471074422201, "num_tokens": 45591818.0, "step": 273 }, { "entropy": 1.966247429450353, "epoch": 0.030100793716184668, "grad_norm": 1.6343317031860352, "learning_rate": 9.981718464351006e-06, "loss": 1.4539, "mean_token_accuracy": 0.6315440734227499, "num_tokens": 45786109.0, "step": 274 }, { "entropy": 1.9131847222646077, "epoch": 0.030210650627557605, "grad_norm": 0.7987267971038818, "learning_rate": 1.0018281535648996e-05, "loss": 1.5537, "mean_token_accuracy": 0.6154775619506836, "num_tokens": 46015605.0, "step": 275 }, { "entropy": 1.9687570333480835, "epoch": 0.030320507538930543, "grad_norm": 1.7003246545791626, "learning_rate": 1.0054844606946985e-05, "loss": 1.5739, "mean_token_accuracy": 0.6201535413662592, "num_tokens": 46170159.0, "step": 276 }, { "entropy": 1.9644801914691925, "epoch": 0.03043036445030348, "grad_norm": 0.9804157614707947, "learning_rate": 1.0091407678244974e-05, "loss": 1.5158, "mean_token_accuracy": 0.6364769091208776, "num_tokens": 46387669.0, "step": 277 }, { "entropy": 1.898623416821162, "epoch": 0.030540221361676417, "grad_norm": 1.5173487663269043, "learning_rate": 1.0127970749542962e-05, "loss": 1.4926, "mean_token_accuracy": 0.6275862356026968, "num_tokens": 46563977.0, "step": 278 }, { "entropy": 1.995850036541621, "epoch": 0.030650078273049355, "grad_norm": 1.4851152896881104, "learning_rate": 1.0164533820840951e-05, "loss": 1.4706, "mean_token_accuracy": 0.6297584424416224, "num_tokens": 46718556.0, "step": 279 }, { "entropy": 1.8533688286940257, "epoch": 0.03075993518442229, "grad_norm": 1.2799378633499146, "learning_rate": 1.020109689213894e-05, "loss": 1.4704, "mean_token_accuracy": 0.6369777669509252, "num_tokens": 46878881.0, "step": 280 }, { "entropy": 1.9496891895929973, "epoch": 0.030869792095795226, "grad_norm": 7.689694404602051, "learning_rate": 1.023765996343693e-05, "loss": 1.443, "mean_token_accuracy": 0.6240266213814417, "num_tokens": 47091397.0, "step": 281 }, { "entropy": 1.9444605509440105, "epoch": 0.030979649007168163, "grad_norm": 2.042428970336914, "learning_rate": 1.0274223034734917e-05, "loss": 1.4686, "mean_token_accuracy": 0.6330529451370239, "num_tokens": 47221651.0, "step": 282 }, { "entropy": 1.889829029639562, "epoch": 0.0310895059185411, "grad_norm": 1.3333542346954346, "learning_rate": 1.0310786106032909e-05, "loss": 1.4242, "mean_token_accuracy": 0.6345295310020447, "num_tokens": 47396569.0, "step": 283 }, { "entropy": 1.844144841035207, "epoch": 0.031199362829914038, "grad_norm": 1.2614295482635498, "learning_rate": 1.0347349177330897e-05, "loss": 1.4626, "mean_token_accuracy": 0.6344168136517206, "num_tokens": 47542762.0, "step": 284 }, { "entropy": 1.8404381672541301, "epoch": 0.031309219741286975, "grad_norm": 2.0773274898529053, "learning_rate": 1.0383912248628885e-05, "loss": 1.3985, "mean_token_accuracy": 0.6463861962159475, "num_tokens": 47669565.0, "step": 285 }, { "entropy": 1.8757590055465698, "epoch": 0.03141907665265991, "grad_norm": 0.996104896068573, "learning_rate": 1.0420475319926875e-05, "loss": 1.4716, "mean_token_accuracy": 0.6297437200943629, "num_tokens": 47852702.0, "step": 286 }, { "entropy": 1.957614282766978, "epoch": 0.03152893356403285, "grad_norm": 1.0234733819961548, "learning_rate": 1.0457038391224863e-05, "loss": 1.5283, "mean_token_accuracy": 0.628383403023084, "num_tokens": 48039754.0, "step": 287 }, { "entropy": 1.8798251052697499, "epoch": 0.031638790475405784, "grad_norm": 1.2098981142044067, "learning_rate": 1.0493601462522852e-05, "loss": 1.5128, "mean_token_accuracy": 0.6379542450110117, "num_tokens": 48191936.0, "step": 288 }, { "entropy": 1.8851182560125987, "epoch": 0.03174864738677872, "grad_norm": 1.206680178642273, "learning_rate": 1.053016453382084e-05, "loss": 1.5542, "mean_token_accuracy": 0.6433884302775065, "num_tokens": 48385870.0, "step": 289 }, { "entropy": 1.8928188979625702, "epoch": 0.03185850429815166, "grad_norm": 1.523961067199707, "learning_rate": 1.0566727605118832e-05, "loss": 1.432, "mean_token_accuracy": 0.6425420343875885, "num_tokens": 48558121.0, "step": 290 }, { "entropy": 1.945473462343216, "epoch": 0.03196836120952459, "grad_norm": 0.9344412088394165, "learning_rate": 1.060329067641682e-05, "loss": 1.5703, "mean_token_accuracy": 0.6286270767450333, "num_tokens": 48722343.0, "step": 291 }, { "entropy": 1.881322979927063, "epoch": 0.032078218120897534, "grad_norm": 0.9196475148200989, "learning_rate": 1.0639853747714808e-05, "loss": 1.5234, "mean_token_accuracy": 0.6369537711143494, "num_tokens": 48887704.0, "step": 292 }, { "entropy": 1.933806041876475, "epoch": 0.03218807503227047, "grad_norm": 1.59644615650177, "learning_rate": 1.0676416819012798e-05, "loss": 1.4071, "mean_token_accuracy": 0.6504911333322525, "num_tokens": 49020557.0, "step": 293 }, { "entropy": 1.9321398834387462, "epoch": 0.0322979319436434, "grad_norm": 0.9138100147247314, "learning_rate": 1.0712979890310786e-05, "loss": 1.5022, "mean_token_accuracy": 0.6257292628288269, "num_tokens": 49207179.0, "step": 294 }, { "entropy": 1.968072275320689, "epoch": 0.03240778885501634, "grad_norm": 1.0833994150161743, "learning_rate": 1.0749542961608776e-05, "loss": 1.5052, "mean_token_accuracy": 0.6170014639695486, "num_tokens": 49368907.0, "step": 295 }, { "entropy": 1.897968828678131, "epoch": 0.032517645766389276, "grad_norm": 1.578821063041687, "learning_rate": 1.0786106032906764e-05, "loss": 1.4847, "mean_token_accuracy": 0.6308430184920629, "num_tokens": 49501311.0, "step": 296 }, { "entropy": 1.8983619312445323, "epoch": 0.03262750267776222, "grad_norm": 0.7992421388626099, "learning_rate": 1.0822669104204755e-05, "loss": 1.5683, "mean_token_accuracy": 0.622817466656367, "num_tokens": 49710655.0, "step": 297 }, { "entropy": 1.904332121213277, "epoch": 0.03273735958913515, "grad_norm": 0.8225474953651428, "learning_rate": 1.0859232175502743e-05, "loss": 1.6713, "mean_token_accuracy": 0.6234669287999471, "num_tokens": 49927190.0, "step": 298 }, { "entropy": 1.9980522493521373, "epoch": 0.032847216500508085, "grad_norm": 1.2448490858078003, "learning_rate": 1.0895795246800731e-05, "loss": 1.72, "mean_token_accuracy": 0.6147239456574122, "num_tokens": 50130606.0, "step": 299 }, { "entropy": 1.8948036630948384, "epoch": 0.032957073411881026, "grad_norm": 0.9344723224639893, "learning_rate": 1.0932358318098721e-05, "loss": 1.4923, "mean_token_accuracy": 0.636970043182373, "num_tokens": 50313035.0, "step": 300 }, { "entropy": 1.9244210918744404, "epoch": 0.03306693032325396, "grad_norm": 1.0805492401123047, "learning_rate": 1.096892138939671e-05, "loss": 1.5029, "mean_token_accuracy": 0.6375883320967356, "num_tokens": 50508718.0, "step": 301 }, { "entropy": 1.9689223965009053, "epoch": 0.0331767872346269, "grad_norm": 1.3119393587112427, "learning_rate": 1.1005484460694699e-05, "loss": 1.6139, "mean_token_accuracy": 0.6174919605255127, "num_tokens": 50686071.0, "step": 302 }, { "entropy": 1.9293088515599568, "epoch": 0.033286644145999834, "grad_norm": 1.0145171880722046, "learning_rate": 1.1042047531992687e-05, "loss": 1.4831, "mean_token_accuracy": 0.634314775466919, "num_tokens": 50862535.0, "step": 303 }, { "entropy": 1.9416919847329457, "epoch": 0.033396501057372775, "grad_norm": 1.5745701789855957, "learning_rate": 1.1078610603290679e-05, "loss": 1.5724, "mean_token_accuracy": 0.63288913667202, "num_tokens": 51040714.0, "step": 304 }, { "entropy": 1.914773811896642, "epoch": 0.03350635796874571, "grad_norm": 1.8054081201553345, "learning_rate": 1.1115173674588667e-05, "loss": 1.4908, "mean_token_accuracy": 0.6410078605016073, "num_tokens": 51196791.0, "step": 305 }, { "entropy": 1.8429247538248699, "epoch": 0.03361621488011864, "grad_norm": 0.9021672606468201, "learning_rate": 1.1151736745886655e-05, "loss": 1.584, "mean_token_accuracy": 0.6183707366387049, "num_tokens": 51438626.0, "step": 306 }, { "entropy": 1.926130364338557, "epoch": 0.033726071791491584, "grad_norm": 1.2748171091079712, "learning_rate": 1.1188299817184644e-05, "loss": 1.4385, "mean_token_accuracy": 0.6522895991802216, "num_tokens": 51584083.0, "step": 307 }, { "entropy": 1.893998791774114, "epoch": 0.03383592870286452, "grad_norm": 0.9571841955184937, "learning_rate": 1.1224862888482633e-05, "loss": 1.4666, "mean_token_accuracy": 0.638142466545105, "num_tokens": 51765032.0, "step": 308 }, { "entropy": 2.0008548498153687, "epoch": 0.03394578561423746, "grad_norm": 1.4947288036346436, "learning_rate": 1.1261425959780622e-05, "loss": 1.4839, "mean_token_accuracy": 0.6292637437582016, "num_tokens": 51912342.0, "step": 309 }, { "entropy": 1.8920707205931346, "epoch": 0.03405564252561039, "grad_norm": 1.2760716676712036, "learning_rate": 1.129798903107861e-05, "loss": 1.4213, "mean_token_accuracy": 0.6458606521288554, "num_tokens": 52051740.0, "step": 310 }, { "entropy": 1.9140145977338154, "epoch": 0.034165499436983326, "grad_norm": 0.9365392923355103, "learning_rate": 1.1334552102376602e-05, "loss": 1.5388, "mean_token_accuracy": 0.6314593305190405, "num_tokens": 52231050.0, "step": 311 }, { "entropy": 1.9039499859015148, "epoch": 0.03427535634835627, "grad_norm": 1.0918009281158447, "learning_rate": 1.137111517367459e-05, "loss": 1.6301, "mean_token_accuracy": 0.6101748992999395, "num_tokens": 52405268.0, "step": 312 }, { "entropy": 1.9176567395528157, "epoch": 0.0343852132597292, "grad_norm": 1.2120338678359985, "learning_rate": 1.1407678244972578e-05, "loss": 1.5766, "mean_token_accuracy": 0.6275134632984797, "num_tokens": 52615722.0, "step": 313 }, { "entropy": 1.8687150677045186, "epoch": 0.03449507017110214, "grad_norm": 1.3236573934555054, "learning_rate": 1.1444241316270568e-05, "loss": 1.5724, "mean_token_accuracy": 0.6229855716228485, "num_tokens": 52788308.0, "step": 314 }, { "entropy": 1.9559665719668071, "epoch": 0.034604927082475076, "grad_norm": 1.5506702661514282, "learning_rate": 1.1480804387568556e-05, "loss": 1.5085, "mean_token_accuracy": 0.6220680375893911, "num_tokens": 52927707.0, "step": 315 }, { "entropy": 1.88349653283755, "epoch": 0.03471478399384801, "grad_norm": 1.3259515762329102, "learning_rate": 1.1517367458866546e-05, "loss": 1.5174, "mean_token_accuracy": 0.6426070580879847, "num_tokens": 53062021.0, "step": 316 }, { "entropy": 1.9310015539328258, "epoch": 0.03482464090522095, "grad_norm": 1.5750491619110107, "learning_rate": 1.1553930530164534e-05, "loss": 1.5892, "mean_token_accuracy": 0.61604871849219, "num_tokens": 53181467.0, "step": 317 }, { "entropy": 1.8769896825154622, "epoch": 0.034934497816593885, "grad_norm": 0.743971049785614, "learning_rate": 1.1590493601462525e-05, "loss": 1.5217, "mean_token_accuracy": 0.6268236736456553, "num_tokens": 53407027.0, "step": 318 }, { "entropy": 1.9732247789700825, "epoch": 0.035044354727966825, "grad_norm": 1.4356918334960938, "learning_rate": 1.1627056672760513e-05, "loss": 1.4453, "mean_token_accuracy": 0.626713772614797, "num_tokens": 53556426.0, "step": 319 }, { "entropy": 1.9230054517587025, "epoch": 0.03515421163933976, "grad_norm": 1.635143756866455, "learning_rate": 1.1663619744058501e-05, "loss": 1.4392, "mean_token_accuracy": 0.6414381017287573, "num_tokens": 53663337.0, "step": 320 }, { "entropy": 1.9362250169118245, "epoch": 0.03526406855071269, "grad_norm": 1.7073150873184204, "learning_rate": 1.1700182815356491e-05, "loss": 1.617, "mean_token_accuracy": 0.6255223502715429, "num_tokens": 53799717.0, "step": 321 }, { "entropy": 1.8816980421543121, "epoch": 0.035373925462085634, "grad_norm": 1.4336020946502686, "learning_rate": 1.1736745886654479e-05, "loss": 1.4752, "mean_token_accuracy": 0.6383567899465561, "num_tokens": 53946152.0, "step": 322 }, { "entropy": 1.8929463227589924, "epoch": 0.03548378237345857, "grad_norm": 2.153032064437866, "learning_rate": 1.1773308957952469e-05, "loss": 1.5695, "mean_token_accuracy": 0.6398225873708725, "num_tokens": 54116796.0, "step": 323 }, { "entropy": 1.8566848436991374, "epoch": 0.03559363928483151, "grad_norm": 0.8859526515007019, "learning_rate": 1.1809872029250457e-05, "loss": 1.5324, "mean_token_accuracy": 0.6276658624410629, "num_tokens": 54293425.0, "step": 324 }, { "entropy": 1.8859939277172089, "epoch": 0.03570349619620444, "grad_norm": 1.7319366931915283, "learning_rate": 1.1846435100548448e-05, "loss": 1.4423, "mean_token_accuracy": 0.6443535685539246, "num_tokens": 54418340.0, "step": 325 }, { "entropy": 1.8691116273403168, "epoch": 0.035813353107577384, "grad_norm": 0.8846819400787354, "learning_rate": 1.1882998171846436e-05, "loss": 1.5374, "mean_token_accuracy": 0.6237656573454539, "num_tokens": 54593530.0, "step": 326 }, { "entropy": 1.9191945890585582, "epoch": 0.03592321001895032, "grad_norm": 1.6270257234573364, "learning_rate": 1.1919561243144425e-05, "loss": 1.3593, "mean_token_accuracy": 0.6514436999956766, "num_tokens": 54754982.0, "step": 327 }, { "entropy": 1.9296835362911224, "epoch": 0.03603306693032325, "grad_norm": 1.3112103939056396, "learning_rate": 1.1956124314442414e-05, "loss": 1.548, "mean_token_accuracy": 0.6345210323731104, "num_tokens": 54911330.0, "step": 328 }, { "entropy": 1.9412719508012135, "epoch": 0.03614292384169619, "grad_norm": 1.8282943964004517, "learning_rate": 1.1992687385740402e-05, "loss": 1.5502, "mean_token_accuracy": 0.6237274209658304, "num_tokens": 55070562.0, "step": 329 }, { "entropy": 1.9365448355674744, "epoch": 0.036252780753069126, "grad_norm": 1.3936512470245361, "learning_rate": 1.2029250457038392e-05, "loss": 1.4556, "mean_token_accuracy": 0.635651042064031, "num_tokens": 55234754.0, "step": 330 }, { "entropy": 1.90728959441185, "epoch": 0.03636263766444207, "grad_norm": 1.0101035833358765, "learning_rate": 1.206581352833638e-05, "loss": 1.4667, "mean_token_accuracy": 0.6326676507790884, "num_tokens": 55403593.0, "step": 331 }, { "entropy": 1.9017587800820668, "epoch": 0.036472494575815, "grad_norm": 0.9737944006919861, "learning_rate": 1.2102376599634372e-05, "loss": 1.4969, "mean_token_accuracy": 0.6238344510396322, "num_tokens": 55615207.0, "step": 332 }, { "entropy": 1.9239464402198792, "epoch": 0.036582351487187935, "grad_norm": 1.1247916221618652, "learning_rate": 1.213893967093236e-05, "loss": 1.5176, "mean_token_accuracy": 0.6404502739508947, "num_tokens": 55772900.0, "step": 333 }, { "entropy": 1.97471684217453, "epoch": 0.036692208398560876, "grad_norm": 1.3233532905578613, "learning_rate": 1.2175502742230348e-05, "loss": 1.463, "mean_token_accuracy": 0.6393190721670786, "num_tokens": 55917794.0, "step": 334 }, { "entropy": 1.8958436946074169, "epoch": 0.03680206530993381, "grad_norm": 1.441660761833191, "learning_rate": 1.2212065813528338e-05, "loss": 1.413, "mean_token_accuracy": 0.6413801709810892, "num_tokens": 56056541.0, "step": 335 }, { "entropy": 1.9842320779959361, "epoch": 0.03691192222130675, "grad_norm": 1.0363242626190186, "learning_rate": 1.2248628884826326e-05, "loss": 1.4963, "mean_token_accuracy": 0.6350095023711523, "num_tokens": 56192437.0, "step": 336 }, { "entropy": 1.9338072041670482, "epoch": 0.037021779132679684, "grad_norm": 1.5733563899993896, "learning_rate": 1.2285191956124315e-05, "loss": 1.554, "mean_token_accuracy": 0.6295024752616882, "num_tokens": 56358968.0, "step": 337 }, { "entropy": 1.897556722164154, "epoch": 0.03713163604405262, "grad_norm": 0.9414510130882263, "learning_rate": 1.2321755027422303e-05, "loss": 1.4555, "mean_token_accuracy": 0.6441336075464884, "num_tokens": 56547658.0, "step": 338 }, { "entropy": 1.9216489593187969, "epoch": 0.03724149295542556, "grad_norm": 0.890693187713623, "learning_rate": 1.2358318098720295e-05, "loss": 1.5634, "mean_token_accuracy": 0.6117081145445505, "num_tokens": 56755945.0, "step": 339 }, { "entropy": 1.878822495539983, "epoch": 0.03735134986679849, "grad_norm": 1.6289485692977905, "learning_rate": 1.2394881170018283e-05, "loss": 1.346, "mean_token_accuracy": 0.6585533966620764, "num_tokens": 56889301.0, "step": 340 }, { "entropy": 1.9101523856321971, "epoch": 0.037461206778171434, "grad_norm": 1.244612455368042, "learning_rate": 1.2431444241316271e-05, "loss": 1.4946, "mean_token_accuracy": 0.625670313835144, "num_tokens": 57043658.0, "step": 341 }, { "entropy": 1.9623491565386455, "epoch": 0.03757106368954437, "grad_norm": 1.3181416988372803, "learning_rate": 1.246800731261426e-05, "loss": 1.5015, "mean_token_accuracy": 0.6292295505603155, "num_tokens": 57171786.0, "step": 342 }, { "entropy": 1.894644061724345, "epoch": 0.03768092060091731, "grad_norm": 1.3845021724700928, "learning_rate": 1.2504570383912249e-05, "loss": 1.5347, "mean_token_accuracy": 0.6457602828741074, "num_tokens": 57329673.0, "step": 343 }, { "entropy": 1.9226838648319244, "epoch": 0.03779077751229024, "grad_norm": 1.0904666185379028, "learning_rate": 1.2541133455210239e-05, "loss": 1.4927, "mean_token_accuracy": 0.6328802009423574, "num_tokens": 57504063.0, "step": 344 }, { "entropy": 1.8712241252263386, "epoch": 0.037900634423663176, "grad_norm": 0.674035906791687, "learning_rate": 1.2577696526508227e-05, "loss": 1.5697, "mean_token_accuracy": 0.6206586708625158, "num_tokens": 57748561.0, "step": 345 }, { "entropy": 1.8309160272280376, "epoch": 0.03801049133503612, "grad_norm": 0.8603072166442871, "learning_rate": 1.2614259597806218e-05, "loss": 1.5903, "mean_token_accuracy": 0.6370584319035212, "num_tokens": 57941124.0, "step": 346 }, { "entropy": 1.867393175760905, "epoch": 0.03812034824640905, "grad_norm": 1.344534158706665, "learning_rate": 1.2650822669104206e-05, "loss": 1.425, "mean_token_accuracy": 0.6433531989653906, "num_tokens": 58091957.0, "step": 347 }, { "entropy": 1.8822752038637798, "epoch": 0.03823020515778199, "grad_norm": 1.8091611862182617, "learning_rate": 1.2687385740402194e-05, "loss": 1.3816, "mean_token_accuracy": 0.6489702761173248, "num_tokens": 58240675.0, "step": 348 }, { "entropy": 1.9269113938013713, "epoch": 0.038340062069154926, "grad_norm": 1.1259180307388306, "learning_rate": 1.2723948811700184e-05, "loss": 1.5647, "mean_token_accuracy": 0.6245926817258199, "num_tokens": 58431982.0, "step": 349 }, { "entropy": 1.861362983783086, "epoch": 0.03844991898052786, "grad_norm": 1.033488392829895, "learning_rate": 1.2760511882998172e-05, "loss": 1.468, "mean_token_accuracy": 0.6400075107812881, "num_tokens": 58609741.0, "step": 350 }, { "entropy": 1.956882357597351, "epoch": 0.0385597758919008, "grad_norm": 1.2942620515823364, "learning_rate": 1.2797074954296162e-05, "loss": 1.4972, "mean_token_accuracy": 0.6373551438252131, "num_tokens": 58725263.0, "step": 351 }, { "entropy": 1.9582510491212208, "epoch": 0.038669632803273735, "grad_norm": 1.3242194652557373, "learning_rate": 1.283363802559415e-05, "loss": 1.4572, "mean_token_accuracy": 0.6369108855724335, "num_tokens": 58890257.0, "step": 352 }, { "entropy": 1.8531585335731506, "epoch": 0.038779489714646675, "grad_norm": 1.8601542711257935, "learning_rate": 1.2870201096892141e-05, "loss": 1.3595, "mean_token_accuracy": 0.6562222242355347, "num_tokens": 59013387.0, "step": 353 }, { "entropy": 1.938563883304596, "epoch": 0.03888934662601961, "grad_norm": 1.148009181022644, "learning_rate": 1.290676416819013e-05, "loss": 1.4834, "mean_token_accuracy": 0.6284010857343674, "num_tokens": 59194237.0, "step": 354 }, { "entropy": 1.8674622178077698, "epoch": 0.03899920353739254, "grad_norm": 0.8510828018188477, "learning_rate": 1.2943327239488118e-05, "loss": 1.4612, "mean_token_accuracy": 0.6370283762613932, "num_tokens": 59393642.0, "step": 355 }, { "entropy": 1.8750587205092113, "epoch": 0.039109060448765484, "grad_norm": 1.027628779411316, "learning_rate": 1.2979890310786107e-05, "loss": 1.4421, "mean_token_accuracy": 0.635211726029714, "num_tokens": 59545332.0, "step": 356 }, { "entropy": 1.9101734459400177, "epoch": 0.03921891736013842, "grad_norm": 1.4470945596694946, "learning_rate": 1.3016453382084095e-05, "loss": 1.3906, "mean_token_accuracy": 0.6514510164658228, "num_tokens": 59690347.0, "step": 357 }, { "entropy": 1.8950016995271046, "epoch": 0.03932877427151136, "grad_norm": 1.020830512046814, "learning_rate": 1.3053016453382085e-05, "loss": 1.4614, "mean_token_accuracy": 0.6369933982690176, "num_tokens": 59897237.0, "step": 358 }, { "entropy": 1.9750393331050873, "epoch": 0.03943863118288429, "grad_norm": 1.3367540836334229, "learning_rate": 1.3089579524680073e-05, "loss": 1.5399, "mean_token_accuracy": 0.620476762453715, "num_tokens": 60056227.0, "step": 359 }, { "entropy": 1.833806296189626, "epoch": 0.03954848809425723, "grad_norm": 1.280727505683899, "learning_rate": 1.3126142595978065e-05, "loss": 1.4652, "mean_token_accuracy": 0.6454865237077078, "num_tokens": 60203405.0, "step": 360 }, { "entropy": 1.9267512361208599, "epoch": 0.03965834500563017, "grad_norm": 1.0081630945205688, "learning_rate": 1.3162705667276053e-05, "loss": 1.5328, "mean_token_accuracy": 0.6300081759691238, "num_tokens": 60402318.0, "step": 361 }, { "entropy": 1.8845655421415966, "epoch": 0.0397682019170031, "grad_norm": 0.7514256834983826, "learning_rate": 1.3199268738574041e-05, "loss": 1.4632, "mean_token_accuracy": 0.6361817816893259, "num_tokens": 60603115.0, "step": 362 }, { "entropy": 1.8751682738463085, "epoch": 0.03987805882837604, "grad_norm": 0.9772806167602539, "learning_rate": 1.323583180987203e-05, "loss": 1.5428, "mean_token_accuracy": 0.6149017065763474, "num_tokens": 60849896.0, "step": 363 }, { "entropy": 1.9592778881390889, "epoch": 0.039987915739748976, "grad_norm": 1.4042431116104126, "learning_rate": 1.3272394881170019e-05, "loss": 1.5267, "mean_token_accuracy": 0.638154923915863, "num_tokens": 61010485.0, "step": 364 }, { "entropy": 1.8496886094411213, "epoch": 0.04009777265112192, "grad_norm": 0.871859610080719, "learning_rate": 1.3308957952468008e-05, "loss": 1.5193, "mean_token_accuracy": 0.6397547672192255, "num_tokens": 61197551.0, "step": 365 }, { "entropy": 1.9339230159918468, "epoch": 0.04020762956249485, "grad_norm": 1.5222102403640747, "learning_rate": 1.3345521023765997e-05, "loss": 1.5256, "mean_token_accuracy": 0.6346002717812856, "num_tokens": 61334172.0, "step": 366 }, { "entropy": 1.84268722931544, "epoch": 0.040317486473867785, "grad_norm": 0.8281468749046326, "learning_rate": 1.3382084095063988e-05, "loss": 1.53, "mean_token_accuracy": 0.6305044641097387, "num_tokens": 61550034.0, "step": 367 }, { "entropy": 1.8627445697784424, "epoch": 0.040427343385240726, "grad_norm": 0.6707728505134583, "learning_rate": 1.3418647166361976e-05, "loss": 1.4511, "mean_token_accuracy": 0.6334594835837682, "num_tokens": 61766707.0, "step": 368 }, { "entropy": 1.927617460489273, "epoch": 0.04053720029661366, "grad_norm": 1.031119465827942, "learning_rate": 1.3455210237659964e-05, "loss": 1.4601, "mean_token_accuracy": 0.6361829191446304, "num_tokens": 61899435.0, "step": 369 }, { "entropy": 1.8108851512273152, "epoch": 0.0406470572079866, "grad_norm": 1.329870343208313, "learning_rate": 1.3491773308957954e-05, "loss": 1.392, "mean_token_accuracy": 0.6440057257811228, "num_tokens": 62068166.0, "step": 370 }, { "entropy": 1.83323472738266, "epoch": 0.040756914119359534, "grad_norm": 0.8489660620689392, "learning_rate": 1.3528336380255942e-05, "loss": 1.3747, "mean_token_accuracy": 0.651435524225235, "num_tokens": 62237791.0, "step": 371 }, { "entropy": 1.8929267923037212, "epoch": 0.04086677103073247, "grad_norm": 1.568328857421875, "learning_rate": 1.3564899451553932e-05, "loss": 1.3254, "mean_token_accuracy": 0.6630758593479792, "num_tokens": 62354504.0, "step": 372 }, { "entropy": 1.904485156138738, "epoch": 0.04097662794210541, "grad_norm": 1.0895646810531616, "learning_rate": 1.360146252285192e-05, "loss": 1.4935, "mean_token_accuracy": 0.6399994641542435, "num_tokens": 62560184.0, "step": 373 }, { "entropy": 1.9404686590035756, "epoch": 0.04108648485347834, "grad_norm": 0.7966954112052917, "learning_rate": 1.3638025594149911e-05, "loss": 1.6445, "mean_token_accuracy": 0.6181567882498106, "num_tokens": 62762821.0, "step": 374 }, { "entropy": 1.930189887682597, "epoch": 0.041196341764851284, "grad_norm": 1.237733006477356, "learning_rate": 1.36745886654479e-05, "loss": 1.4306, "mean_token_accuracy": 0.6403100987275442, "num_tokens": 62911553.0, "step": 375 }, { "entropy": 1.932339499394099, "epoch": 0.04130619867622422, "grad_norm": 1.387355923652649, "learning_rate": 1.3711151736745887e-05, "loss": 1.4726, "mean_token_accuracy": 0.6367992361386617, "num_tokens": 63048004.0, "step": 376 }, { "entropy": 1.8782165547211964, "epoch": 0.04141605558759715, "grad_norm": 1.2075997591018677, "learning_rate": 1.3747714808043877e-05, "loss": 1.4862, "mean_token_accuracy": 0.6394857068856558, "num_tokens": 63211370.0, "step": 377 }, { "entropy": 1.9089668989181519, "epoch": 0.04152591249897009, "grad_norm": 1.1602435111999512, "learning_rate": 1.3784277879341865e-05, "loss": 1.4771, "mean_token_accuracy": 0.6510659754276276, "num_tokens": 63393016.0, "step": 378 }, { "entropy": 1.993513544400533, "epoch": 0.041635769410343026, "grad_norm": 1.2444241046905518, "learning_rate": 1.3820840950639855e-05, "loss": 1.4608, "mean_token_accuracy": 0.6339425295591354, "num_tokens": 63528722.0, "step": 379 }, { "entropy": 1.794573426246643, "epoch": 0.04174562632171597, "grad_norm": 0.9674469232559204, "learning_rate": 1.3857404021937843e-05, "loss": 1.5345, "mean_token_accuracy": 0.6317428996165594, "num_tokens": 63712882.0, "step": 380 }, { "entropy": 1.9814475774765015, "epoch": 0.0418554832330889, "grad_norm": 1.197340488433838, "learning_rate": 1.3893967093235835e-05, "loss": 1.4852, "mean_token_accuracy": 0.6282167633374532, "num_tokens": 63842716.0, "step": 381 }, { "entropy": 1.9127925833066304, "epoch": 0.041965340144461835, "grad_norm": 1.0025110244750977, "learning_rate": 1.3930530164533823e-05, "loss": 1.446, "mean_token_accuracy": 0.6374160995086035, "num_tokens": 64050221.0, "step": 382 }, { "entropy": 1.8482964634895325, "epoch": 0.042075197055834776, "grad_norm": 1.02582585811615, "learning_rate": 1.396709323583181e-05, "loss": 1.4667, "mean_token_accuracy": 0.6344637920459112, "num_tokens": 64211219.0, "step": 383 }, { "entropy": 1.9587088723977406, "epoch": 0.04218505396720771, "grad_norm": 1.2455130815505981, "learning_rate": 1.40036563071298e-05, "loss": 1.5323, "mean_token_accuracy": 0.6333187768856684, "num_tokens": 64323175.0, "step": 384 }, { "entropy": 1.9571288426717122, "epoch": 0.04229491087858065, "grad_norm": 1.6702572107315063, "learning_rate": 1.4040219378427789e-05, "loss": 1.5039, "mean_token_accuracy": 0.6258624742428461, "num_tokens": 64442759.0, "step": 385 }, { "entropy": 1.9019165933132172, "epoch": 0.042404767789953585, "grad_norm": 0.7855273485183716, "learning_rate": 1.4076782449725778e-05, "loss": 1.6998, "mean_token_accuracy": 0.6055170843998591, "num_tokens": 64665676.0, "step": 386 }, { "entropy": 1.9035062193870544, "epoch": 0.042514624701326525, "grad_norm": 1.8162872791290283, "learning_rate": 1.4113345521023766e-05, "loss": 1.3691, "mean_token_accuracy": 0.6513401865959167, "num_tokens": 64782379.0, "step": 387 }, { "entropy": 1.903337796529134, "epoch": 0.04262448161269946, "grad_norm": 1.0532211065292358, "learning_rate": 1.4149908592321758e-05, "loss": 1.3714, "mean_token_accuracy": 0.6637005259593328, "num_tokens": 64934687.0, "step": 388 }, { "entropy": 1.9479155739148457, "epoch": 0.04273433852407239, "grad_norm": 1.551796555519104, "learning_rate": 1.4186471663619746e-05, "loss": 1.4819, "mean_token_accuracy": 0.6278212567170461, "num_tokens": 65082079.0, "step": 389 }, { "entropy": 1.8967718482017517, "epoch": 0.042844195435445334, "grad_norm": 1.1235419511795044, "learning_rate": 1.4223034734917734e-05, "loss": 1.4021, "mean_token_accuracy": 0.6365053604046503, "num_tokens": 65289665.0, "step": 390 }, { "entropy": 1.9098777274290721, "epoch": 0.04295405234681827, "grad_norm": 0.99347984790802, "learning_rate": 1.4259597806215724e-05, "loss": 1.5961, "mean_token_accuracy": 0.6231525763869286, "num_tokens": 65423010.0, "step": 391 }, { "entropy": 1.9435608784357707, "epoch": 0.04306390925819121, "grad_norm": 0.733677864074707, "learning_rate": 1.4296160877513712e-05, "loss": 1.5477, "mean_token_accuracy": 0.6210927665233612, "num_tokens": 65632732.0, "step": 392 }, { "entropy": 1.939713458220164, "epoch": 0.04317376616956414, "grad_norm": 1.312638282775879, "learning_rate": 1.4332723948811702e-05, "loss": 1.4456, "mean_token_accuracy": 0.6392714977264404, "num_tokens": 65774896.0, "step": 393 }, { "entropy": 1.8830204804738362, "epoch": 0.04328362308093708, "grad_norm": 0.9776220917701721, "learning_rate": 1.436928702010969e-05, "loss": 1.4522, "mean_token_accuracy": 0.6250886768102646, "num_tokens": 65965831.0, "step": 394 }, { "entropy": 1.8725888232390087, "epoch": 0.04339347999231002, "grad_norm": 0.9504810571670532, "learning_rate": 1.4405850091407681e-05, "loss": 1.507, "mean_token_accuracy": 0.6213281452655792, "num_tokens": 66131698.0, "step": 395 }, { "entropy": 1.946168194214503, "epoch": 0.04350333690368295, "grad_norm": 1.1905755996704102, "learning_rate": 1.444241316270567e-05, "loss": 1.4674, "mean_token_accuracy": 0.6266982605059942, "num_tokens": 66250281.0, "step": 396 }, { "entropy": 1.8628549575805664, "epoch": 0.04361319381505589, "grad_norm": 1.0694218873977661, "learning_rate": 1.4478976234003657e-05, "loss": 1.3903, "mean_token_accuracy": 0.6529761354128519, "num_tokens": 66380753.0, "step": 397 }, { "entropy": 1.8151433169841766, "epoch": 0.043723050726428826, "grad_norm": 1.0954636335372925, "learning_rate": 1.4515539305301647e-05, "loss": 1.3429, "mean_token_accuracy": 0.6568314780791601, "num_tokens": 66511047.0, "step": 398 }, { "entropy": 1.8963292141755421, "epoch": 0.04383290763780176, "grad_norm": 0.7910407781600952, "learning_rate": 1.4552102376599635e-05, "loss": 1.5738, "mean_token_accuracy": 0.6402320464452108, "num_tokens": 66691862.0, "step": 399 }, { "entropy": 1.9180162648359935, "epoch": 0.0439427645491747, "grad_norm": 0.9959750175476074, "learning_rate": 1.4588665447897625e-05, "loss": 1.7048, "mean_token_accuracy": 0.6067099720239639, "num_tokens": 66884961.0, "step": 400 }, { "entropy": 1.8748231430848439, "epoch": 0.044052621460547635, "grad_norm": 0.9748513102531433, "learning_rate": 1.4625228519195613e-05, "loss": 1.5165, "mean_token_accuracy": 0.6368064184983572, "num_tokens": 67041958.0, "step": 401 }, { "entropy": 1.85904856522878, "epoch": 0.044162478371920576, "grad_norm": 1.2120349407196045, "learning_rate": 1.4661791590493604e-05, "loss": 1.3507, "mean_token_accuracy": 0.6649549951155981, "num_tokens": 67176830.0, "step": 402 }, { "entropy": 1.8290843864281972, "epoch": 0.04427233528329351, "grad_norm": 0.9248878359794617, "learning_rate": 1.4698354661791592e-05, "loss": 1.4839, "mean_token_accuracy": 0.6410348663727442, "num_tokens": 67335145.0, "step": 403 }, { "entropy": 1.86801873644193, "epoch": 0.044382192194666444, "grad_norm": 1.033895492553711, "learning_rate": 1.473491773308958e-05, "loss": 1.6367, "mean_token_accuracy": 0.6232090393702189, "num_tokens": 67511434.0, "step": 404 }, { "entropy": 1.9208786884943645, "epoch": 0.044492049106039384, "grad_norm": 0.9780264496803284, "learning_rate": 1.477148080438757e-05, "loss": 1.4249, "mean_token_accuracy": 0.659845232963562, "num_tokens": 67644219.0, "step": 405 }, { "entropy": 1.8616258203983307, "epoch": 0.04460190601741232, "grad_norm": 1.050032377243042, "learning_rate": 1.4808043875685558e-05, "loss": 1.522, "mean_token_accuracy": 0.6423654605944952, "num_tokens": 67802624.0, "step": 406 }, { "entropy": 1.9324693580468495, "epoch": 0.04471176292878526, "grad_norm": 0.8673065900802612, "learning_rate": 1.4844606946983548e-05, "loss": 1.4363, "mean_token_accuracy": 0.6219440003236135, "num_tokens": 67979554.0, "step": 407 }, { "entropy": 1.8592036068439484, "epoch": 0.04482161984015819, "grad_norm": 0.809765100479126, "learning_rate": 1.4881170018281536e-05, "loss": 1.5808, "mean_token_accuracy": 0.63762233654658, "num_tokens": 68165091.0, "step": 408 }, { "entropy": 1.9416759411493938, "epoch": 0.044931476751531134, "grad_norm": 0.6962368488311768, "learning_rate": 1.4917733089579528e-05, "loss": 1.4768, "mean_token_accuracy": 0.6251722325881323, "num_tokens": 68410038.0, "step": 409 }, { "entropy": 1.950746734937032, "epoch": 0.04504133366290407, "grad_norm": 1.4196857213974, "learning_rate": 1.4954296160877516e-05, "loss": 1.402, "mean_token_accuracy": 0.6466809262832006, "num_tokens": 68514877.0, "step": 410 }, { "entropy": 1.963972936073939, "epoch": 0.045151190574277, "grad_norm": 1.451259970664978, "learning_rate": 1.4990859232175504e-05, "loss": 1.4126, "mean_token_accuracy": 0.6355726569890976, "num_tokens": 68660564.0, "step": 411 }, { "entropy": 1.8489231765270233, "epoch": 0.04526104748564994, "grad_norm": 1.0938141345977783, "learning_rate": 1.5027422303473494e-05, "loss": 1.387, "mean_token_accuracy": 0.6448115805784861, "num_tokens": 68811370.0, "step": 412 }, { "entropy": 1.8644197285175323, "epoch": 0.045370904397022876, "grad_norm": 1.0362029075622559, "learning_rate": 1.5063985374771482e-05, "loss": 1.5106, "mean_token_accuracy": 0.6414720167716345, "num_tokens": 68959627.0, "step": 413 }, { "entropy": 1.9586124916871388, "epoch": 0.04548076130839582, "grad_norm": 0.8306599259376526, "learning_rate": 1.5100548446069471e-05, "loss": 1.6478, "mean_token_accuracy": 0.6033438295125961, "num_tokens": 69141689.0, "step": 414 }, { "entropy": 1.9255466957887013, "epoch": 0.04559061821976875, "grad_norm": 0.7374395728111267, "learning_rate": 1.513711151736746e-05, "loss": 1.5721, "mean_token_accuracy": 0.622237409154574, "num_tokens": 69346313.0, "step": 415 }, { "entropy": 1.9461825489997864, "epoch": 0.045700475131141685, "grad_norm": 1.0214322805404663, "learning_rate": 1.5173674588665451e-05, "loss": 1.3917, "mean_token_accuracy": 0.6484548399845759, "num_tokens": 69484013.0, "step": 416 }, { "entropy": 1.9209075768788655, "epoch": 0.045810332042514626, "grad_norm": 1.165231466293335, "learning_rate": 1.5210237659963439e-05, "loss": 1.4843, "mean_token_accuracy": 0.637021337946256, "num_tokens": 69634261.0, "step": 417 }, { "entropy": 1.8663530945777893, "epoch": 0.04592018895388756, "grad_norm": 0.8267627358436584, "learning_rate": 1.5246800731261427e-05, "loss": 1.5228, "mean_token_accuracy": 0.6384973078966141, "num_tokens": 69810623.0, "step": 418 }, { "entropy": 1.8647344807783763, "epoch": 0.0460300458652605, "grad_norm": 1.1516979932785034, "learning_rate": 1.5283363802559417e-05, "loss": 1.3857, "mean_token_accuracy": 0.649631142616272, "num_tokens": 69971092.0, "step": 419 }, { "entropy": 1.8947654863198597, "epoch": 0.046139902776633435, "grad_norm": 1.20870041847229, "learning_rate": 1.5319926873857403e-05, "loss": 1.4169, "mean_token_accuracy": 0.6381567666927973, "num_tokens": 70117842.0, "step": 420 }, { "entropy": 1.95854847629865, "epoch": 0.04624975968800637, "grad_norm": 1.056316614151001, "learning_rate": 1.5356489945155396e-05, "loss": 1.4916, "mean_token_accuracy": 0.6343552867571512, "num_tokens": 70258941.0, "step": 421 }, { "entropy": 1.8973442415396373, "epoch": 0.04635961659937931, "grad_norm": 0.7159221172332764, "learning_rate": 1.5393053016453383e-05, "loss": 1.5651, "mean_token_accuracy": 0.621953676144282, "num_tokens": 70458247.0, "step": 422 }, { "entropy": 1.9163278142611186, "epoch": 0.04646947351075224, "grad_norm": 0.6779471039772034, "learning_rate": 1.5429616087751372e-05, "loss": 1.4651, "mean_token_accuracy": 0.6381291598081589, "num_tokens": 70633332.0, "step": 423 }, { "entropy": 1.853121320406596, "epoch": 0.046579330422125184, "grad_norm": 0.7182997465133667, "learning_rate": 1.5466179159049362e-05, "loss": 1.4749, "mean_token_accuracy": 0.6441103170315424, "num_tokens": 70828981.0, "step": 424 }, { "entropy": 1.9391433397928874, "epoch": 0.04668918733349812, "grad_norm": 0.8690926432609558, "learning_rate": 1.550274223034735e-05, "loss": 1.4578, "mean_token_accuracy": 0.6315609067678452, "num_tokens": 70989991.0, "step": 425 }, { "entropy": 1.8937304317951202, "epoch": 0.04679904424487106, "grad_norm": 1.377400517463684, "learning_rate": 1.553930530164534e-05, "loss": 1.4113, "mean_token_accuracy": 0.645716001590093, "num_tokens": 71123738.0, "step": 426 }, { "entropy": 1.92280246814092, "epoch": 0.04690890115624399, "grad_norm": 1.2862893342971802, "learning_rate": 1.5575868372943328e-05, "loss": 1.4553, "mean_token_accuracy": 0.6406665394703547, "num_tokens": 71236228.0, "step": 427 }, { "entropy": 1.8118858635425568, "epoch": 0.04701875806761693, "grad_norm": 0.903378963470459, "learning_rate": 1.5612431444241318e-05, "loss": 1.5105, "mean_token_accuracy": 0.6435635139544805, "num_tokens": 71383417.0, "step": 428 }, { "entropy": 2.0178940494855246, "epoch": 0.04712861497898987, "grad_norm": 1.603965163230896, "learning_rate": 1.5648994515539308e-05, "loss": 1.3997, "mean_token_accuracy": 0.6480491409699122, "num_tokens": 71521903.0, "step": 429 }, { "entropy": 1.865706165631612, "epoch": 0.0472384718903628, "grad_norm": 1.1885672807693481, "learning_rate": 1.5685557586837297e-05, "loss": 1.5575, "mean_token_accuracy": 0.6412462542454401, "num_tokens": 71704942.0, "step": 430 }, { "entropy": 1.816953221956889, "epoch": 0.04734832880173574, "grad_norm": 0.7502696514129639, "learning_rate": 1.5722120658135284e-05, "loss": 1.4532, "mean_token_accuracy": 0.6353533814350764, "num_tokens": 71906527.0, "step": 431 }, { "entropy": 1.8986171980698903, "epoch": 0.047458185713108676, "grad_norm": 1.043899655342102, "learning_rate": 1.5758683729433274e-05, "loss": 1.407, "mean_token_accuracy": 0.6350090801715851, "num_tokens": 72112800.0, "step": 432 }, { "entropy": 1.9298064609368641, "epoch": 0.04756804262448161, "grad_norm": 0.8479198217391968, "learning_rate": 1.5795246800731263e-05, "loss": 1.5159, "mean_token_accuracy": 0.6414108375708262, "num_tokens": 72320482.0, "step": 433 }, { "entropy": 1.9490727484226227, "epoch": 0.04767789953585455, "grad_norm": 0.848849356174469, "learning_rate": 1.583180987202925e-05, "loss": 1.5658, "mean_token_accuracy": 0.6233013023932775, "num_tokens": 72463724.0, "step": 434 }, { "entropy": 1.8321526845296223, "epoch": 0.047787756447227485, "grad_norm": 0.7417627573013306, "learning_rate": 1.5868372943327243e-05, "loss": 1.3661, "mean_token_accuracy": 0.6409125824769338, "num_tokens": 72635818.0, "step": 435 }, { "entropy": 1.8534736235936482, "epoch": 0.047897613358600426, "grad_norm": 0.836335301399231, "learning_rate": 1.590493601462523e-05, "loss": 1.6775, "mean_token_accuracy": 0.630969633658727, "num_tokens": 72801261.0, "step": 436 }, { "entropy": 1.969543606042862, "epoch": 0.04800747026997336, "grad_norm": 0.9996944665908813, "learning_rate": 1.594149908592322e-05, "loss": 1.4598, "mean_token_accuracy": 0.634533574183782, "num_tokens": 72940719.0, "step": 437 }, { "entropy": 1.893303821484248, "epoch": 0.048117327181346294, "grad_norm": 1.1567001342773438, "learning_rate": 1.597806215722121e-05, "loss": 1.3812, "mean_token_accuracy": 0.6442390580972036, "num_tokens": 73072820.0, "step": 438 }, { "entropy": 1.9349376459916432, "epoch": 0.048227184092719234, "grad_norm": 0.9590914845466614, "learning_rate": 1.6014625228519195e-05, "loss": 1.4763, "mean_token_accuracy": 0.6355199714501699, "num_tokens": 73241924.0, "step": 439 }, { "entropy": 1.8869278033574421, "epoch": 0.04833704100409217, "grad_norm": 0.7127754092216492, "learning_rate": 1.6051188299817185e-05, "loss": 1.4739, "mean_token_accuracy": 0.6266596366961797, "num_tokens": 73421388.0, "step": 440 }, { "entropy": 1.8205039203166962, "epoch": 0.04844689791546511, "grad_norm": 1.2497098445892334, "learning_rate": 1.6087751371115175e-05, "loss": 1.4651, "mean_token_accuracy": 0.6429401089747747, "num_tokens": 73574441.0, "step": 441 }, { "entropy": 1.880224694808324, "epoch": 0.04855675482683804, "grad_norm": 0.8544715046882629, "learning_rate": 1.6124314442413164e-05, "loss": 1.4796, "mean_token_accuracy": 0.6351951907078425, "num_tokens": 73732021.0, "step": 442 }, { "entropy": 1.845553586880366, "epoch": 0.04866661173821098, "grad_norm": 0.8492904305458069, "learning_rate": 1.6160877513711154e-05, "loss": 1.4662, "mean_token_accuracy": 0.6401056249936422, "num_tokens": 73898879.0, "step": 443 }, { "entropy": 1.8184046844641368, "epoch": 0.04877646864958392, "grad_norm": 0.8159205913543701, "learning_rate": 1.6197440585009144e-05, "loss": 1.3999, "mean_token_accuracy": 0.6590509961048762, "num_tokens": 74048185.0, "step": 444 }, { "entropy": 1.909311443567276, "epoch": 0.04888632556095685, "grad_norm": 0.8159104585647583, "learning_rate": 1.623400365630713e-05, "loss": 1.4732, "mean_token_accuracy": 0.639503538608551, "num_tokens": 74205846.0, "step": 445 }, { "entropy": 1.8864035904407501, "epoch": 0.04899618247232979, "grad_norm": 1.0417039394378662, "learning_rate": 1.627056672760512e-05, "loss": 1.35, "mean_token_accuracy": 0.6542015026013056, "num_tokens": 74350478.0, "step": 446 }, { "entropy": 1.9047284424304962, "epoch": 0.049106039383702726, "grad_norm": 0.7739196419715881, "learning_rate": 1.630712979890311e-05, "loss": 1.4764, "mean_token_accuracy": 0.6416764905055364, "num_tokens": 74523233.0, "step": 447 }, { "entropy": 1.8290265500545502, "epoch": 0.04921589629507567, "grad_norm": 0.8136515021324158, "learning_rate": 1.6343692870201096e-05, "loss": 1.4462, "mean_token_accuracy": 0.6408629318078359, "num_tokens": 74662652.0, "step": 448 }, { "entropy": 1.836196482181549, "epoch": 0.0493257532064486, "grad_norm": 0.8380835056304932, "learning_rate": 1.638025594149909e-05, "loss": 1.4125, "mean_token_accuracy": 0.6456181158622106, "num_tokens": 74836953.0, "step": 449 }, { "entropy": 1.8020283778508503, "epoch": 0.049435610117821535, "grad_norm": 1.37300705909729, "learning_rate": 1.6416819012797076e-05, "loss": 1.3302, "mean_token_accuracy": 0.6666079958279928, "num_tokens": 74981072.0, "step": 450 }, { "entropy": 1.9055909911791484, "epoch": 0.049545467029194476, "grad_norm": 0.9503870010375977, "learning_rate": 1.6453382084095066e-05, "loss": 1.4723, "mean_token_accuracy": 0.6394474705060323, "num_tokens": 75121906.0, "step": 451 }, { "entropy": 1.842297613620758, "epoch": 0.04965532394056741, "grad_norm": 0.7884616851806641, "learning_rate": 1.6489945155393055e-05, "loss": 1.4174, "mean_token_accuracy": 0.6418876697619756, "num_tokens": 75293694.0, "step": 452 }, { "entropy": 1.921643594900767, "epoch": 0.04976518085194035, "grad_norm": 1.0184119939804077, "learning_rate": 1.6526508226691042e-05, "loss": 1.5131, "mean_token_accuracy": 0.6456418732802073, "num_tokens": 75431517.0, "step": 453 }, { "entropy": 1.916659524043401, "epoch": 0.049875037763313285, "grad_norm": 1.5741225481033325, "learning_rate": 1.656307129798903e-05, "loss": 1.348, "mean_token_accuracy": 0.6527373790740967, "num_tokens": 75541765.0, "step": 454 }, { "entropy": 1.8336934447288513, "epoch": 0.04998489467468622, "grad_norm": 0.9903491735458374, "learning_rate": 1.659963436928702e-05, "loss": 1.399, "mean_token_accuracy": 0.653240958849589, "num_tokens": 75694830.0, "step": 455 }, { "entropy": 1.8270506660143535, "epoch": 0.05009475158605916, "grad_norm": 0.7361817955970764, "learning_rate": 1.663619744058501e-05, "loss": 1.5485, "mean_token_accuracy": 0.637207085887591, "num_tokens": 75927979.0, "step": 456 }, { "entropy": 1.9009975989659627, "epoch": 0.05020460849743209, "grad_norm": 1.2144572734832764, "learning_rate": 1.6672760511883e-05, "loss": 1.3843, "mean_token_accuracy": 0.6520481109619141, "num_tokens": 76028451.0, "step": 457 }, { "entropy": 1.933722198009491, "epoch": 0.050314465408805034, "grad_norm": 0.9374269843101501, "learning_rate": 1.6709323583180987e-05, "loss": 1.399, "mean_token_accuracy": 0.6511914978424708, "num_tokens": 76162186.0, "step": 458 }, { "entropy": 1.8920509020487468, "epoch": 0.05042432232017797, "grad_norm": 0.7262760400772095, "learning_rate": 1.6745886654478977e-05, "loss": 1.5039, "mean_token_accuracy": 0.6288343866666158, "num_tokens": 76330676.0, "step": 459 }, { "entropy": 1.8482555548350017, "epoch": 0.0505341792315509, "grad_norm": 0.8332237601280212, "learning_rate": 1.6782449725776967e-05, "loss": 1.5237, "mean_token_accuracy": 0.6457237054904302, "num_tokens": 76468568.0, "step": 460 }, { "entropy": 1.820657879114151, "epoch": 0.05064403614292384, "grad_norm": 1.824617624282837, "learning_rate": 1.6819012797074956e-05, "loss": 1.0706, "mean_token_accuracy": 0.6815401464700699, "num_tokens": 76622873.0, "step": 461 }, { "entropy": 1.8888212939103444, "epoch": 0.05075389305429678, "grad_norm": 0.8382301926612854, "learning_rate": 1.6855575868372943e-05, "loss": 1.5501, "mean_token_accuracy": 0.6339965413014094, "num_tokens": 76788721.0, "step": 462 }, { "entropy": 1.782186617453893, "epoch": 0.05086374996566972, "grad_norm": 0.8659656643867493, "learning_rate": 1.6892138939670936e-05, "loss": 1.382, "mean_token_accuracy": 0.6629950056473414, "num_tokens": 76929800.0, "step": 463 }, { "entropy": 1.84979913632075, "epoch": 0.05097360687704265, "grad_norm": 1.0096579790115356, "learning_rate": 1.6928702010968922e-05, "loss": 1.4331, "mean_token_accuracy": 0.6603454450766245, "num_tokens": 77088974.0, "step": 464 }, { "entropy": 1.8461360732714336, "epoch": 0.051083463788415585, "grad_norm": 0.851254940032959, "learning_rate": 1.6965265082266912e-05, "loss": 1.5439, "mean_token_accuracy": 0.6388277113437653, "num_tokens": 77269063.0, "step": 465 }, { "entropy": 1.8718996942043304, "epoch": 0.051193320699788526, "grad_norm": 1.469465732574463, "learning_rate": 1.7001828153564902e-05, "loss": 1.2971, "mean_token_accuracy": 0.6635722517967224, "num_tokens": 77403714.0, "step": 466 }, { "entropy": 1.785783976316452, "epoch": 0.05130317761116146, "grad_norm": 0.9720367193222046, "learning_rate": 1.7038391224862888e-05, "loss": 1.3768, "mean_token_accuracy": 0.660425583521525, "num_tokens": 77551768.0, "step": 467 }, { "entropy": 1.783895234266917, "epoch": 0.0514130345225344, "grad_norm": 0.8119345903396606, "learning_rate": 1.7074954296160878e-05, "loss": 1.3155, "mean_token_accuracy": 0.6677038272221884, "num_tokens": 77707970.0, "step": 468 }, { "entropy": 1.8844469288984935, "epoch": 0.051522891433907335, "grad_norm": 1.0332210063934326, "learning_rate": 1.7111517367458868e-05, "loss": 1.3061, "mean_token_accuracy": 0.6705901821454366, "num_tokens": 77838254.0, "step": 469 }, { "entropy": 1.899887502193451, "epoch": 0.051632748345280276, "grad_norm": 0.8115286231040955, "learning_rate": 1.7148080438756858e-05, "loss": 1.6136, "mean_token_accuracy": 0.6468661973873774, "num_tokens": 78030097.0, "step": 470 }, { "entropy": 1.8445066312948863, "epoch": 0.05174260525665321, "grad_norm": 0.67425936460495, "learning_rate": 1.7184643510054847e-05, "loss": 1.4869, "mean_token_accuracy": 0.6447852005561193, "num_tokens": 78218757.0, "step": 471 }, { "entropy": 1.8734458883603413, "epoch": 0.051852462168026144, "grad_norm": 0.7984296679496765, "learning_rate": 1.7221206581352834e-05, "loss": 1.4225, "mean_token_accuracy": 0.6457456847031912, "num_tokens": 78377793.0, "step": 472 }, { "entropy": 1.9007401863733928, "epoch": 0.051962319079399084, "grad_norm": 0.760857343673706, "learning_rate": 1.7257769652650823e-05, "loss": 1.5456, "mean_token_accuracy": 0.6302074193954468, "num_tokens": 78573934.0, "step": 473 }, { "entropy": 1.849319765965144, "epoch": 0.05207217599077202, "grad_norm": 1.178850531578064, "learning_rate": 1.7294332723948813e-05, "loss": 1.3321, "mean_token_accuracy": 0.6654231746991476, "num_tokens": 78684333.0, "step": 474 }, { "entropy": 1.8049305478731792, "epoch": 0.05218203290214496, "grad_norm": 0.7811275720596313, "learning_rate": 1.7330895795246803e-05, "loss": 1.3971, "mean_token_accuracy": 0.6538741886615753, "num_tokens": 78851016.0, "step": 475 }, { "entropy": 1.8060388763745625, "epoch": 0.05229188981351789, "grad_norm": 1.0945056676864624, "learning_rate": 1.736745886654479e-05, "loss": 1.339, "mean_token_accuracy": 0.6550626158714294, "num_tokens": 79003715.0, "step": 476 }, { "entropy": 1.8515853186448414, "epoch": 0.05240174672489083, "grad_norm": 0.6653461456298828, "learning_rate": 1.7404021937842783e-05, "loss": 1.5661, "mean_token_accuracy": 0.6174762199322382, "num_tokens": 79283121.0, "step": 477 }, { "entropy": 1.851629654566447, "epoch": 0.05251160363626377, "grad_norm": 0.7771194577217102, "learning_rate": 1.744058500914077e-05, "loss": 1.5499, "mean_token_accuracy": 0.6309017390012741, "num_tokens": 79463326.0, "step": 478 }, { "entropy": 1.8349732557932537, "epoch": 0.0526214605476367, "grad_norm": 0.9575709700584412, "learning_rate": 1.747714808043876e-05, "loss": 1.4673, "mean_token_accuracy": 0.6315694997708002, "num_tokens": 79601389.0, "step": 479 }, { "entropy": 1.9489451746145885, "epoch": 0.05273131745900964, "grad_norm": 0.7346012592315674, "learning_rate": 1.751371115173675e-05, "loss": 1.4289, "mean_token_accuracy": 0.6379700899124146, "num_tokens": 79742483.0, "step": 480 }, { "entropy": 1.8353569904963176, "epoch": 0.052841174370382576, "grad_norm": 0.7082385420799255, "learning_rate": 1.7550274223034735e-05, "loss": 1.4335, "mean_token_accuracy": 0.6611069192488989, "num_tokens": 79996396.0, "step": 481 }, { "entropy": 1.8154561916987102, "epoch": 0.05295103128175551, "grad_norm": 0.6445807218551636, "learning_rate": 1.7586837294332725e-05, "loss": 1.4676, "mean_token_accuracy": 0.6439694265524546, "num_tokens": 80231599.0, "step": 482 }, { "entropy": 1.8863433003425598, "epoch": 0.05306088819312845, "grad_norm": 0.8372637629508972, "learning_rate": 1.7623400365630714e-05, "loss": 1.6164, "mean_token_accuracy": 0.6192357142766317, "num_tokens": 80385089.0, "step": 483 }, { "entropy": 1.8703928589820862, "epoch": 0.053170745104501385, "grad_norm": 0.7205429673194885, "learning_rate": 1.7659963436928704e-05, "loss": 1.4353, "mean_token_accuracy": 0.6483421623706818, "num_tokens": 80590298.0, "step": 484 }, { "entropy": 1.869334836800893, "epoch": 0.053280602015874326, "grad_norm": 0.6076232194900513, "learning_rate": 1.7696526508226694e-05, "loss": 1.4977, "mean_token_accuracy": 0.6276814242204031, "num_tokens": 80811725.0, "step": 485 }, { "entropy": 1.7474851707617443, "epoch": 0.05339045892724726, "grad_norm": 0.8083134889602661, "learning_rate": 1.773308957952468e-05, "loss": 1.2768, "mean_token_accuracy": 0.6715359588464102, "num_tokens": 80953065.0, "step": 486 }, { "entropy": 1.8078358471393585, "epoch": 0.053500315838620194, "grad_norm": 0.9833588600158691, "learning_rate": 1.776965265082267e-05, "loss": 1.3843, "mean_token_accuracy": 0.6488884389400482, "num_tokens": 81123124.0, "step": 487 }, { "entropy": 1.8762567341327667, "epoch": 0.053610172749993135, "grad_norm": 0.7375379800796509, "learning_rate": 1.780621572212066e-05, "loss": 1.443, "mean_token_accuracy": 0.6359260429938635, "num_tokens": 81279364.0, "step": 488 }, { "entropy": 1.852634310722351, "epoch": 0.05372002966136607, "grad_norm": 0.9888647794723511, "learning_rate": 1.784277879341865e-05, "loss": 1.412, "mean_token_accuracy": 0.6469916899998983, "num_tokens": 81430039.0, "step": 489 }, { "entropy": 1.89104425907135, "epoch": 0.05382988657273901, "grad_norm": 0.8109338879585266, "learning_rate": 1.7879341864716636e-05, "loss": 1.4514, "mean_token_accuracy": 0.6364847421646118, "num_tokens": 81619758.0, "step": 490 }, { "entropy": 1.9149113893508911, "epoch": 0.05393974348411194, "grad_norm": 0.7840366363525391, "learning_rate": 1.791590493601463e-05, "loss": 1.4288, "mean_token_accuracy": 0.6536184300978979, "num_tokens": 81796494.0, "step": 491 }, { "entropy": 1.8676457504431407, "epoch": 0.054049600395484884, "grad_norm": 0.8361501097679138, "learning_rate": 1.7952468007312615e-05, "loss": 1.3977, "mean_token_accuracy": 0.663863534728686, "num_tokens": 81932706.0, "step": 492 }, { "entropy": 1.8649681508541107, "epoch": 0.05415945730685782, "grad_norm": 0.9290244579315186, "learning_rate": 1.7989031078610605e-05, "loss": 1.5228, "mean_token_accuracy": 0.6413531800111135, "num_tokens": 82154289.0, "step": 493 }, { "entropy": 1.8859212299187977, "epoch": 0.05426931421823075, "grad_norm": 0.792782723903656, "learning_rate": 1.8025594149908595e-05, "loss": 1.6165, "mean_token_accuracy": 0.6251773834228516, "num_tokens": 82343958.0, "step": 494 }, { "entropy": 1.867518424987793, "epoch": 0.05437917112960369, "grad_norm": 0.6810131669044495, "learning_rate": 1.806215722120658e-05, "loss": 1.4721, "mean_token_accuracy": 0.6292106856902441, "num_tokens": 82553546.0, "step": 495 }, { "entropy": 1.7545313934485118, "epoch": 0.05448902804097663, "grad_norm": 0.6590803861618042, "learning_rate": 1.809872029250457e-05, "loss": 1.3972, "mean_token_accuracy": 0.6627217878897985, "num_tokens": 82736724.0, "step": 496 }, { "entropy": 1.8064947426319122, "epoch": 0.05459888495234957, "grad_norm": 0.7147844433784485, "learning_rate": 1.813528336380256e-05, "loss": 1.4111, "mean_token_accuracy": 0.6673514246940613, "num_tokens": 82928814.0, "step": 497 }, { "entropy": 1.8595764935016632, "epoch": 0.0547087418637225, "grad_norm": 0.7674292325973511, "learning_rate": 1.817184643510055e-05, "loss": 1.5436, "mean_token_accuracy": 0.6367508967717489, "num_tokens": 83169052.0, "step": 498 }, { "entropy": 1.7980642318725586, "epoch": 0.054818598775095435, "grad_norm": 0.7615039348602295, "learning_rate": 1.820840950639854e-05, "loss": 1.4503, "mean_token_accuracy": 0.6509919663270315, "num_tokens": 83362711.0, "step": 499 }, { "entropy": 1.848442365725835, "epoch": 0.054928455686468376, "grad_norm": 0.6286888718605042, "learning_rate": 1.8244972577696527e-05, "loss": 1.3508, "mean_token_accuracy": 0.6550141274929047, "num_tokens": 83540687.0, "step": 500 }, { "entropy": 1.8486445744832356, "epoch": 0.05503831259784131, "grad_norm": 1.779735803604126, "learning_rate": 1.8281535648994517e-05, "loss": 1.1664, "mean_token_accuracy": 0.6659951458374659, "num_tokens": 83716601.0, "step": 501 }, { "entropy": 1.7482089002927144, "epoch": 0.05514816950921425, "grad_norm": 0.715691089630127, "learning_rate": 1.8318098720292506e-05, "loss": 1.4013, "mean_token_accuracy": 0.6483576248089472, "num_tokens": 83897803.0, "step": 502 }, { "entropy": 1.8891556064287822, "epoch": 0.055258026420587185, "grad_norm": 0.7861650586128235, "learning_rate": 1.8354661791590496e-05, "loss": 1.4081, "mean_token_accuracy": 0.6608035564422607, "num_tokens": 84052066.0, "step": 503 }, { "entropy": 1.8510177036126454, "epoch": 0.05536788333196012, "grad_norm": 1.1780167818069458, "learning_rate": 1.8391224862888482e-05, "loss": 1.4559, "mean_token_accuracy": 0.6438465466101965, "num_tokens": 84214794.0, "step": 504 }, { "entropy": 1.8411860366662343, "epoch": 0.05547774024333306, "grad_norm": 0.6785144805908203, "learning_rate": 1.8427787934186476e-05, "loss": 1.6069, "mean_token_accuracy": 0.6364410271247228, "num_tokens": 84426411.0, "step": 505 }, { "entropy": 1.7741001347700756, "epoch": 0.055587597154705994, "grad_norm": 0.7365214824676514, "learning_rate": 1.8464351005484462e-05, "loss": 1.3837, "mean_token_accuracy": 0.6612890263398489, "num_tokens": 84564688.0, "step": 506 }, { "entropy": 1.858109325170517, "epoch": 0.055697454066078934, "grad_norm": 0.6560879945755005, "learning_rate": 1.8500914076782452e-05, "loss": 1.5934, "mean_token_accuracy": 0.6287341316541036, "num_tokens": 84764716.0, "step": 507 }, { "entropy": 1.8872665762901306, "epoch": 0.05580731097745187, "grad_norm": 0.8644893169403076, "learning_rate": 1.853747714808044e-05, "loss": 1.3268, "mean_token_accuracy": 0.6630988270044327, "num_tokens": 84906107.0, "step": 508 }, { "entropy": 1.8340339064598083, "epoch": 0.05591716788882481, "grad_norm": 0.7128955125808716, "learning_rate": 1.8574040219378428e-05, "loss": 1.3998, "mean_token_accuracy": 0.658979594707489, "num_tokens": 85057303.0, "step": 509 }, { "entropy": 1.877872258424759, "epoch": 0.05602702480019774, "grad_norm": 1.0351197719573975, "learning_rate": 1.8610603290676418e-05, "loss": 1.4379, "mean_token_accuracy": 0.6406905551751455, "num_tokens": 85237921.0, "step": 510 }, { "entropy": 1.832102398077647, "epoch": 0.05613688171157068, "grad_norm": 0.9562404155731201, "learning_rate": 1.8647166361974407e-05, "loss": 1.4014, "mean_token_accuracy": 0.6476325045029322, "num_tokens": 85363257.0, "step": 511 }, { "entropy": 1.8245374759038289, "epoch": 0.05624673862294362, "grad_norm": 0.7608838081359863, "learning_rate": 1.8683729433272397e-05, "loss": 1.4446, "mean_token_accuracy": 0.6506583044926325, "num_tokens": 85492436.0, "step": 512 }, { "entropy": 1.8646320700645447, "epoch": 0.05635659553431655, "grad_norm": 0.967135488986969, "learning_rate": 1.8720292504570387e-05, "loss": 1.3519, "mean_token_accuracy": 0.6563466837008795, "num_tokens": 85653597.0, "step": 513 }, { "entropy": 1.8935543298721313, "epoch": 0.05646645244568949, "grad_norm": 0.8624943494796753, "learning_rate": 1.8756855575868373e-05, "loss": 1.4485, "mean_token_accuracy": 0.6568788141012192, "num_tokens": 85765957.0, "step": 514 }, { "entropy": 1.8232440849145253, "epoch": 0.056576309357062426, "grad_norm": 0.7825310230255127, "learning_rate": 1.8793418647166363e-05, "loss": 1.4999, "mean_token_accuracy": 0.6494305729866028, "num_tokens": 85933528.0, "step": 515 }, { "entropy": 1.8484807411829631, "epoch": 0.05668616626843536, "grad_norm": 0.6889421939849854, "learning_rate": 1.8829981718464353e-05, "loss": 1.5048, "mean_token_accuracy": 0.6314730395873388, "num_tokens": 86120045.0, "step": 516 }, { "entropy": 1.8484809299310048, "epoch": 0.0567960231798083, "grad_norm": 0.9059920310974121, "learning_rate": 1.8866544789762343e-05, "loss": 1.4745, "mean_token_accuracy": 0.6385734875996908, "num_tokens": 86286777.0, "step": 517 }, { "entropy": 1.853780855735143, "epoch": 0.056905880091181235, "grad_norm": 0.8004304766654968, "learning_rate": 1.890310786106033e-05, "loss": 1.547, "mean_token_accuracy": 0.640062207976977, "num_tokens": 86451355.0, "step": 518 }, { "entropy": 1.807072252035141, "epoch": 0.057015737002554176, "grad_norm": 0.7398921847343445, "learning_rate": 1.8939670932358322e-05, "loss": 1.3051, "mean_token_accuracy": 0.6686479697624842, "num_tokens": 86619527.0, "step": 519 }, { "entropy": 1.8454334139823914, "epoch": 0.05712559391392711, "grad_norm": 0.68968665599823, "learning_rate": 1.897623400365631e-05, "loss": 1.3527, "mean_token_accuracy": 0.6641424546639124, "num_tokens": 86762429.0, "step": 520 }, { "entropy": 1.90091206630071, "epoch": 0.057235450825300044, "grad_norm": 0.9172680974006653, "learning_rate": 1.90127970749543e-05, "loss": 1.3641, "mean_token_accuracy": 0.6482406457265218, "num_tokens": 86864597.0, "step": 521 }, { "entropy": 1.8825439810752869, "epoch": 0.057345307736672985, "grad_norm": 0.9436008334159851, "learning_rate": 1.9049360146252288e-05, "loss": 1.4111, "mean_token_accuracy": 0.6459259490172068, "num_tokens": 86980728.0, "step": 522 }, { "entropy": 1.847435434659322, "epoch": 0.05745516464804592, "grad_norm": 0.5968527793884277, "learning_rate": 1.9085923217550274e-05, "loss": 1.4007, "mean_token_accuracy": 0.6396257479985555, "num_tokens": 87210371.0, "step": 523 }, { "entropy": 1.8333716690540314, "epoch": 0.05756502155941886, "grad_norm": 0.7715458273887634, "learning_rate": 1.9122486288848264e-05, "loss": 1.5396, "mean_token_accuracy": 0.6410951962073644, "num_tokens": 87404365.0, "step": 524 }, { "entropy": 1.7693612972895305, "epoch": 0.05767487847079179, "grad_norm": 1.143162727355957, "learning_rate": 1.9159049360146254e-05, "loss": 1.3754, "mean_token_accuracy": 0.6539959609508514, "num_tokens": 87539027.0, "step": 525 }, { "entropy": 1.8589285711447399, "epoch": 0.05778473538216473, "grad_norm": 0.8187215924263, "learning_rate": 1.9195612431444244e-05, "loss": 1.4923, "mean_token_accuracy": 0.6357589811086655, "num_tokens": 87746160.0, "step": 526 }, { "entropy": 1.8425296048323314, "epoch": 0.05789459229353767, "grad_norm": 0.7849891185760498, "learning_rate": 1.9232175502742234e-05, "loss": 1.3778, "mean_token_accuracy": 0.6585688690344492, "num_tokens": 87885330.0, "step": 527 }, { "entropy": 1.813408613204956, "epoch": 0.0580044492049106, "grad_norm": 0.9070361256599426, "learning_rate": 1.926873857404022e-05, "loss": 1.4793, "mean_token_accuracy": 0.6466079652309418, "num_tokens": 88023429.0, "step": 528 }, { "entropy": 1.8070883452892303, "epoch": 0.05811430611628354, "grad_norm": 0.8531019687652588, "learning_rate": 1.930530164533821e-05, "loss": 1.5821, "mean_token_accuracy": 0.6390858789285024, "num_tokens": 88217336.0, "step": 529 }, { "entropy": 1.762501190106074, "epoch": 0.05822416302765648, "grad_norm": 0.6754366755485535, "learning_rate": 1.93418647166362e-05, "loss": 1.3344, "mean_token_accuracy": 0.6736961950858434, "num_tokens": 88391452.0, "step": 530 }, { "entropy": 1.8850489755471547, "epoch": 0.05833401993902942, "grad_norm": 0.8538105487823486, "learning_rate": 1.937842778793419e-05, "loss": 1.3894, "mean_token_accuracy": 0.6557394365469614, "num_tokens": 88522769.0, "step": 531 }, { "entropy": 1.8417104880015056, "epoch": 0.05844387685040235, "grad_norm": 0.665955662727356, "learning_rate": 1.9414990859232176e-05, "loss": 1.4531, "mean_token_accuracy": 0.6464942395687103, "num_tokens": 88695328.0, "step": 532 }, { "entropy": 1.8750036259492238, "epoch": 0.058553733761775285, "grad_norm": 0.8706235289573669, "learning_rate": 1.945155393053017e-05, "loss": 1.4869, "mean_token_accuracy": 0.6461243083079656, "num_tokens": 88884132.0, "step": 533 }, { "entropy": 1.9077441891034443, "epoch": 0.058663590673148226, "grad_norm": 0.7450928092002869, "learning_rate": 1.9488117001828155e-05, "loss": 1.3803, "mean_token_accuracy": 0.6466763665278753, "num_tokens": 89011757.0, "step": 534 }, { "entropy": 1.8615180750687916, "epoch": 0.05877344758452116, "grad_norm": 0.6712978482246399, "learning_rate": 1.9524680073126145e-05, "loss": 1.4863, "mean_token_accuracy": 0.63641490538915, "num_tokens": 89201664.0, "step": 535 }, { "entropy": 1.8326091468334198, "epoch": 0.0588833044958941, "grad_norm": 0.731995701789856, "learning_rate": 1.9561243144424135e-05, "loss": 1.4143, "mean_token_accuracy": 0.6499495257933935, "num_tokens": 89342221.0, "step": 536 }, { "entropy": 1.768342783053716, "epoch": 0.058993161407267035, "grad_norm": 0.7949745655059814, "learning_rate": 1.959780621572212e-05, "loss": 1.473, "mean_token_accuracy": 0.6656158169110616, "num_tokens": 89483457.0, "step": 537 }, { "entropy": 1.8990335762500763, "epoch": 0.05910301831863997, "grad_norm": 0.7969281077384949, "learning_rate": 1.963436928702011e-05, "loss": 1.3961, "mean_token_accuracy": 0.6416173179944357, "num_tokens": 89600539.0, "step": 538 }, { "entropy": 1.9141974449157715, "epoch": 0.05921287523001291, "grad_norm": 0.8687071800231934, "learning_rate": 1.96709323583181e-05, "loss": 1.4503, "mean_token_accuracy": 0.6416681607564291, "num_tokens": 89730626.0, "step": 539 }, { "entropy": 1.924128810564677, "epoch": 0.059322732141385844, "grad_norm": 0.8359556198120117, "learning_rate": 1.970749542961609e-05, "loss": 1.3636, "mean_token_accuracy": 0.6541641503572464, "num_tokens": 89842957.0, "step": 540 }, { "entropy": 1.90417875846227, "epoch": 0.059432589052758784, "grad_norm": 0.7051600217819214, "learning_rate": 1.974405850091408e-05, "loss": 1.4902, "mean_token_accuracy": 0.6339495678742727, "num_tokens": 90053658.0, "step": 541 }, { "entropy": 1.81476491689682, "epoch": 0.05954244596413172, "grad_norm": 1.2421592473983765, "learning_rate": 1.9780621572212066e-05, "loss": 1.3046, "mean_token_accuracy": 0.6642891814311346, "num_tokens": 90217444.0, "step": 542 }, { "entropy": 1.8745914101600647, "epoch": 0.05965230287550465, "grad_norm": 0.7224368453025818, "learning_rate": 1.9817184643510056e-05, "loss": 1.4492, "mean_token_accuracy": 0.6379654556512833, "num_tokens": 90424582.0, "step": 543 }, { "entropy": 1.8410755693912506, "epoch": 0.05976215978687759, "grad_norm": 0.8019373416900635, "learning_rate": 1.9853747714808046e-05, "loss": 1.3688, "mean_token_accuracy": 0.6595296114683151, "num_tokens": 90581155.0, "step": 544 }, { "entropy": 1.8492278754711151, "epoch": 0.05987201669825053, "grad_norm": 0.7192192673683167, "learning_rate": 1.9890310786106036e-05, "loss": 1.4485, "mean_token_accuracy": 0.6582233111063639, "num_tokens": 90770719.0, "step": 545 }, { "entropy": 1.7911238272984822, "epoch": 0.05998187360962347, "grad_norm": 0.7712220549583435, "learning_rate": 1.9926873857404022e-05, "loss": 1.4184, "mean_token_accuracy": 0.6521774580081304, "num_tokens": 90954476.0, "step": 546 }, { "entropy": 1.7789829870065053, "epoch": 0.0600917305209964, "grad_norm": 0.7799301147460938, "learning_rate": 1.9963436928702012e-05, "loss": 1.3837, "mean_token_accuracy": 0.6543701936801275, "num_tokens": 91118773.0, "step": 547 }, { "entropy": 1.8910561104615529, "epoch": 0.060201587432369336, "grad_norm": 0.7084527611732483, "learning_rate": 2e-05, "loss": 1.5324, "mean_token_accuracy": 0.6384007583061854, "num_tokens": 91291657.0, "step": 548 }, { "entropy": 1.8717210789521534, "epoch": 0.060311444343742276, "grad_norm": 0.8148479461669922, "learning_rate": 1.999999985757703e-05, "loss": 1.375, "mean_token_accuracy": 0.6619729151328405, "num_tokens": 91417362.0, "step": 549 }, { "entropy": 1.8685090740521748, "epoch": 0.06042130125511521, "grad_norm": 0.9293744564056396, "learning_rate": 1.9999999430308118e-05, "loss": 1.4431, "mean_token_accuracy": 0.644400030374527, "num_tokens": 91664996.0, "step": 550 }, { "entropy": 1.8850101232528687, "epoch": 0.06053115816648815, "grad_norm": 0.6854516267776489, "learning_rate": 1.999999871819328e-05, "loss": 1.4789, "mean_token_accuracy": 0.6571693470080694, "num_tokens": 91816730.0, "step": 551 }, { "entropy": 1.8250452876091003, "epoch": 0.060641015077861085, "grad_norm": 0.8001742959022522, "learning_rate": 1.9999997721232536e-05, "loss": 1.3613, "mean_token_accuracy": 0.6481845825910568, "num_tokens": 91975856.0, "step": 552 }, { "entropy": 1.798028330008189, "epoch": 0.060750871989234026, "grad_norm": 0.7020228505134583, "learning_rate": 1.999999643942592e-05, "loss": 1.484, "mean_token_accuracy": 0.6372916350762049, "num_tokens": 92166416.0, "step": 553 }, { "entropy": 1.7991608679294586, "epoch": 0.06086072890060696, "grad_norm": 0.6366422772407532, "learning_rate": 1.9999994872773474e-05, "loss": 1.4652, "mean_token_accuracy": 0.6406016697486242, "num_tokens": 92426296.0, "step": 554 }, { "entropy": 1.8256397744019826, "epoch": 0.060970585811979894, "grad_norm": 0.7002333998680115, "learning_rate": 1.9999993021275244e-05, "loss": 1.2857, "mean_token_accuracy": 0.659576748808225, "num_tokens": 92610146.0, "step": 555 }, { "entropy": 1.8464308480421703, "epoch": 0.061080442723352835, "grad_norm": 0.7711760997772217, "learning_rate": 1.999999088493129e-05, "loss": 1.3101, "mean_token_accuracy": 0.6737810522317886, "num_tokens": 92745387.0, "step": 556 }, { "entropy": 1.8655918041865032, "epoch": 0.06119029963472577, "grad_norm": 0.7186883091926575, "learning_rate": 1.999998846374168e-05, "loss": 1.461, "mean_token_accuracy": 0.6509124487638474, "num_tokens": 92891313.0, "step": 557 }, { "entropy": 1.8603834410508473, "epoch": 0.06130015654609871, "grad_norm": 0.7382420301437378, "learning_rate": 1.9999985757706496e-05, "loss": 1.4309, "mean_token_accuracy": 0.6502951284249624, "num_tokens": 93118663.0, "step": 558 }, { "entropy": 1.8049174745877583, "epoch": 0.06141001345747164, "grad_norm": 0.7095914483070374, "learning_rate": 1.9999982766825814e-05, "loss": 1.4092, "mean_token_accuracy": 0.6614819119373957, "num_tokens": 93288993.0, "step": 559 }, { "entropy": 1.8342632949352264, "epoch": 0.06151987036884458, "grad_norm": 0.8286252021789551, "learning_rate": 1.9999979491099732e-05, "loss": 1.3481, "mean_token_accuracy": 0.6633094002803167, "num_tokens": 93426205.0, "step": 560 }, { "entropy": 1.8448581198851268, "epoch": 0.06162972728021752, "grad_norm": 1.0569968223571777, "learning_rate": 1.9999975930528356e-05, "loss": 1.4604, "mean_token_accuracy": 0.6365474959214529, "num_tokens": 93634167.0, "step": 561 }, { "entropy": 1.789972831805547, "epoch": 0.06173958419159045, "grad_norm": 0.9230442643165588, "learning_rate": 1.9999972085111797e-05, "loss": 1.4326, "mean_token_accuracy": 0.6513793369134268, "num_tokens": 93781489.0, "step": 562 }, { "entropy": 1.8464637994766235, "epoch": 0.06184944110296339, "grad_norm": 0.8019546866416931, "learning_rate": 1.9999967954850177e-05, "loss": 1.419, "mean_token_accuracy": 0.6493262598911921, "num_tokens": 93996272.0, "step": 563 }, { "entropy": 1.8182610770066578, "epoch": 0.06195929801433633, "grad_norm": 0.8727892637252808, "learning_rate": 1.9999963539743628e-05, "loss": 1.4969, "mean_token_accuracy": 0.6455451051394144, "num_tokens": 94171319.0, "step": 564 }, { "entropy": 1.8235016266504924, "epoch": 0.06206915492570926, "grad_norm": 0.8368512392044067, "learning_rate": 1.9999958839792286e-05, "loss": 1.4576, "mean_token_accuracy": 0.6452821691830953, "num_tokens": 94335499.0, "step": 565 }, { "entropy": 1.78616197903951, "epoch": 0.0621790118370822, "grad_norm": 0.7931045889854431, "learning_rate": 1.9999953854996303e-05, "loss": 1.3282, "mean_token_accuracy": 0.6569087654352188, "num_tokens": 94479337.0, "step": 566 }, { "entropy": 1.7730096379915874, "epoch": 0.062288868748455135, "grad_norm": 0.8699260950088501, "learning_rate": 1.9999948585355836e-05, "loss": 1.3678, "mean_token_accuracy": 0.6560121526320776, "num_tokens": 94648746.0, "step": 567 }, { "entropy": 1.8760800262292225, "epoch": 0.062398725659828076, "grad_norm": 1.1178594827651978, "learning_rate": 1.9999943030871053e-05, "loss": 1.3277, "mean_token_accuracy": 0.661268358429273, "num_tokens": 94747983.0, "step": 568 }, { "entropy": 1.86210831006368, "epoch": 0.06250858257120101, "grad_norm": 0.7388879656791687, "learning_rate": 1.9999937191542128e-05, "loss": 1.4365, "mean_token_accuracy": 0.650276447335879, "num_tokens": 94919214.0, "step": 569 }, { "entropy": 1.8915256162484486, "epoch": 0.06261843948257395, "grad_norm": 0.7970117926597595, "learning_rate": 1.9999931067369246e-05, "loss": 1.4995, "mean_token_accuracy": 0.6465731561183929, "num_tokens": 95084859.0, "step": 570 }, { "entropy": 1.7767747739950817, "epoch": 0.06272829639394688, "grad_norm": 0.9821522235870361, "learning_rate": 1.99999246583526e-05, "loss": 1.506, "mean_token_accuracy": 0.6511118859052658, "num_tokens": 95242471.0, "step": 571 }, { "entropy": 1.7983069618542988, "epoch": 0.06283815330531982, "grad_norm": 0.7964573502540588, "learning_rate": 1.9999917964492393e-05, "loss": 1.418, "mean_token_accuracy": 0.6608897646268209, "num_tokens": 95411496.0, "step": 572 }, { "entropy": 1.7514270345369976, "epoch": 0.06294801021669276, "grad_norm": 0.7527838349342346, "learning_rate": 1.9999910985788842e-05, "loss": 1.4319, "mean_token_accuracy": 0.6370367358128229, "num_tokens": 95612011.0, "step": 573 }, { "entropy": 1.8501805861790974, "epoch": 0.0630578671280657, "grad_norm": 0.7433388233184814, "learning_rate": 1.999990372224216e-05, "loss": 1.4124, "mean_token_accuracy": 0.6372717867294947, "num_tokens": 95775330.0, "step": 574 }, { "entropy": 1.8343484302361805, "epoch": 0.06316772403943863, "grad_norm": 0.8306664824485779, "learning_rate": 1.9999896173852585e-05, "loss": 1.4024, "mean_token_accuracy": 0.6379790206750234, "num_tokens": 95954358.0, "step": 575 }, { "entropy": 1.8649726808071136, "epoch": 0.06327758095081157, "grad_norm": 0.7519362568855286, "learning_rate": 1.999988834062035e-05, "loss": 1.4086, "mean_token_accuracy": 0.6535822004079819, "num_tokens": 96118913.0, "step": 576 }, { "entropy": 1.866872598727544, "epoch": 0.06338743786218451, "grad_norm": 1.0160154104232788, "learning_rate": 1.9999880222545703e-05, "loss": 1.4077, "mean_token_accuracy": 0.6465723812580109, "num_tokens": 96233662.0, "step": 577 }, { "entropy": 1.8999827206134796, "epoch": 0.06349729477355744, "grad_norm": 0.7083912491798401, "learning_rate": 1.99998718196289e-05, "loss": 1.5182, "mean_token_accuracy": 0.6312810579935709, "num_tokens": 96372780.0, "step": 578 }, { "entropy": 1.8947786291440327, "epoch": 0.06360715168493038, "grad_norm": 0.771692156791687, "learning_rate": 1.9999863131870213e-05, "loss": 1.4229, "mean_token_accuracy": 0.6529962420463562, "num_tokens": 96532545.0, "step": 579 }, { "entropy": 1.849695046742757, "epoch": 0.06371700859630332, "grad_norm": 0.7248260378837585, "learning_rate": 1.9999854159269915e-05, "loss": 1.4054, "mean_token_accuracy": 0.6576380530993143, "num_tokens": 96708045.0, "step": 580 }, { "entropy": 1.757933537165324, "epoch": 0.06382686550767624, "grad_norm": 0.7588098645210266, "learning_rate": 1.9999844901828286e-05, "loss": 1.4921, "mean_token_accuracy": 0.6364376048247019, "num_tokens": 96907139.0, "step": 581 }, { "entropy": 1.796962042649587, "epoch": 0.06393672241904919, "grad_norm": 0.7149285674095154, "learning_rate": 1.9999835359545622e-05, "loss": 1.4685, "mean_token_accuracy": 0.649954711397489, "num_tokens": 97080342.0, "step": 582 }, { "entropy": 1.8116531372070312, "epoch": 0.06404657933042213, "grad_norm": 0.803112268447876, "learning_rate": 1.999982553242222e-05, "loss": 1.5352, "mean_token_accuracy": 0.6461968272924423, "num_tokens": 97248479.0, "step": 583 }, { "entropy": 1.8498141765594482, "epoch": 0.06415643624179507, "grad_norm": 1.0572980642318726, "learning_rate": 1.99998154204584e-05, "loss": 1.3259, "mean_token_accuracy": 0.6570860395828882, "num_tokens": 97389667.0, "step": 584 }, { "entropy": 1.8106311957041423, "epoch": 0.064266293153168, "grad_norm": 0.7023609280586243, "learning_rate": 1.9999805023654474e-05, "loss": 1.381, "mean_token_accuracy": 0.6562709013621012, "num_tokens": 97544195.0, "step": 585 }, { "entropy": 1.8761279384295146, "epoch": 0.06437615006454094, "grad_norm": 0.6949073076248169, "learning_rate": 1.9999794342010777e-05, "loss": 1.4237, "mean_token_accuracy": 0.6387066642443339, "num_tokens": 97697668.0, "step": 586 }, { "entropy": 1.7704632878303528, "epoch": 0.06448600697591388, "grad_norm": 0.6187701225280762, "learning_rate": 1.9999783375527647e-05, "loss": 1.4075, "mean_token_accuracy": 0.6577561795711517, "num_tokens": 97903522.0, "step": 587 }, { "entropy": 1.8219424188137054, "epoch": 0.0645958638872868, "grad_norm": 0.8520556688308716, "learning_rate": 1.9999772124205423e-05, "loss": 1.3681, "mean_token_accuracy": 0.6523155321677526, "num_tokens": 98057082.0, "step": 588 }, { "entropy": 1.8426192998886108, "epoch": 0.06470572079865974, "grad_norm": 0.8771872520446777, "learning_rate": 1.999976058804447e-05, "loss": 1.5718, "mean_token_accuracy": 0.6308561414480209, "num_tokens": 98242770.0, "step": 589 }, { "entropy": 1.8485606014728546, "epoch": 0.06481557771003268, "grad_norm": 0.7140512466430664, "learning_rate": 1.9999748767045148e-05, "loss": 1.4753, "mean_token_accuracy": 0.6466710418462753, "num_tokens": 98406161.0, "step": 590 }, { "entropy": 1.8343448042869568, "epoch": 0.06492543462140563, "grad_norm": 0.7732890248298645, "learning_rate": 1.9999736661207833e-05, "loss": 1.3634, "mean_token_accuracy": 0.6507249772548676, "num_tokens": 98576357.0, "step": 591 }, { "entropy": 1.8259783387184143, "epoch": 0.06503529153277855, "grad_norm": 0.8179787993431091, "learning_rate": 1.999972427053291e-05, "loss": 1.4097, "mean_token_accuracy": 0.6523296386003494, "num_tokens": 98705683.0, "step": 592 }, { "entropy": 1.8165560364723206, "epoch": 0.0651451484441515, "grad_norm": 0.945043683052063, "learning_rate": 1.999971159502077e-05, "loss": 1.3726, "mean_token_accuracy": 0.6657868524392446, "num_tokens": 98842481.0, "step": 593 }, { "entropy": 1.8081829249858856, "epoch": 0.06525500535552443, "grad_norm": 0.7620939612388611, "learning_rate": 1.9999698634671808e-05, "loss": 1.5107, "mean_token_accuracy": 0.6559838702281316, "num_tokens": 99017663.0, "step": 594 }, { "entropy": 1.808142175277074, "epoch": 0.06536486226689736, "grad_norm": 0.682893693447113, "learning_rate": 1.9999685389486444e-05, "loss": 1.4069, "mean_token_accuracy": 0.6539217978715897, "num_tokens": 99199192.0, "step": 595 }, { "entropy": 1.8011847337086995, "epoch": 0.0654747191782703, "grad_norm": 0.7496669888496399, "learning_rate": 1.9999671859465092e-05, "loss": 1.4311, "mean_token_accuracy": 0.6457031667232513, "num_tokens": 99373748.0, "step": 596 }, { "entropy": 1.7991874118645985, "epoch": 0.06558457608964324, "grad_norm": 0.624677836894989, "learning_rate": 1.999965804460818e-05, "loss": 1.4802, "mean_token_accuracy": 0.6430507103602091, "num_tokens": 99603545.0, "step": 597 }, { "entropy": 1.8343899448712666, "epoch": 0.06569443300101617, "grad_norm": 0.9112517237663269, "learning_rate": 1.999964394491615e-05, "loss": 1.5462, "mean_token_accuracy": 0.6385933210452398, "num_tokens": 99744710.0, "step": 598 }, { "entropy": 1.7479709486166637, "epoch": 0.06580428991238911, "grad_norm": 0.8736753463745117, "learning_rate": 1.999962956038944e-05, "loss": 1.3665, "mean_token_accuracy": 0.6576890349388123, "num_tokens": 99941601.0, "step": 599 }, { "entropy": 1.7895345389842987, "epoch": 0.06591414682376205, "grad_norm": 1.08870267868042, "learning_rate": 1.999961489102851e-05, "loss": 1.3301, "mean_token_accuracy": 0.6591797322034836, "num_tokens": 100054777.0, "step": 600 }, { "entropy": 1.755403737227122, "epoch": 0.06602400373513499, "grad_norm": 0.7063686847686768, "learning_rate": 1.9999599936833827e-05, "loss": 1.3455, "mean_token_accuracy": 0.6583442091941833, "num_tokens": 100205128.0, "step": 601 }, { "entropy": 1.8366802831490834, "epoch": 0.06613386064650792, "grad_norm": 0.7118000388145447, "learning_rate": 1.9999584697805858e-05, "loss": 1.4197, "mean_token_accuracy": 0.648671492934227, "num_tokens": 100395366.0, "step": 602 }, { "entropy": 1.802819162607193, "epoch": 0.06624371755788086, "grad_norm": 0.8801571726799011, "learning_rate": 1.999956917394509e-05, "loss": 1.5157, "mean_token_accuracy": 0.6466168984770775, "num_tokens": 100539414.0, "step": 603 }, { "entropy": 1.8523845076560974, "epoch": 0.0663535744692538, "grad_norm": 0.715670645236969, "learning_rate": 1.9999553365252014e-05, "loss": 1.4567, "mean_token_accuracy": 0.6485249350468317, "num_tokens": 100703282.0, "step": 604 }, { "entropy": 1.8390779892603557, "epoch": 0.06646343138062673, "grad_norm": 0.8692449331283569, "learning_rate": 1.9999537271727128e-05, "loss": 1.3827, "mean_token_accuracy": 0.6466074089209238, "num_tokens": 100889824.0, "step": 605 }, { "entropy": 1.7545512715975444, "epoch": 0.06657328829199967, "grad_norm": 0.7428275942802429, "learning_rate": 1.9999520893370944e-05, "loss": 1.3813, "mean_token_accuracy": 0.6578773707151413, "num_tokens": 101029032.0, "step": 606 }, { "entropy": 1.7871941129366558, "epoch": 0.06668314520337261, "grad_norm": 0.6753717064857483, "learning_rate": 1.9999504230183976e-05, "loss": 1.4339, "mean_token_accuracy": 0.6428654193878174, "num_tokens": 101216884.0, "step": 607 }, { "entropy": 1.811436951160431, "epoch": 0.06679300211474555, "grad_norm": 0.7294607162475586, "learning_rate": 1.9999487282166758e-05, "loss": 1.5457, "mean_token_accuracy": 0.628396287560463, "num_tokens": 101407501.0, "step": 608 }, { "entropy": 1.901543140411377, "epoch": 0.06690285902611848, "grad_norm": 0.8634178638458252, "learning_rate": 1.9999470049319823e-05, "loss": 1.4976, "mean_token_accuracy": 0.6367218047380447, "num_tokens": 101567263.0, "step": 609 }, { "entropy": 1.7725516855716705, "epoch": 0.06701271593749142, "grad_norm": 0.6490088701248169, "learning_rate": 1.999945253164371e-05, "loss": 1.4208, "mean_token_accuracy": 0.6521205753087997, "num_tokens": 101736088.0, "step": 610 }, { "entropy": 1.8182121813297272, "epoch": 0.06712257284886436, "grad_norm": 0.7487345933914185, "learning_rate": 1.999943472913899e-05, "loss": 1.5168, "mean_token_accuracy": 0.6501521567503611, "num_tokens": 101882039.0, "step": 611 }, { "entropy": 1.7695377667744954, "epoch": 0.06723242976023729, "grad_norm": 0.6472978591918945, "learning_rate": 1.9999416641806206e-05, "loss": 1.4747, "mean_token_accuracy": 0.6211903840303421, "num_tokens": 102114402.0, "step": 612 }, { "entropy": 1.827112078666687, "epoch": 0.06734228667161023, "grad_norm": 1.119243860244751, "learning_rate": 1.9999398269645947e-05, "loss": 1.4288, "mean_token_accuracy": 0.6412825981775919, "num_tokens": 102270509.0, "step": 613 }, { "entropy": 1.726913293202718, "epoch": 0.06745214358298317, "grad_norm": 0.7255959510803223, "learning_rate": 1.9999379612658785e-05, "loss": 1.3827, "mean_token_accuracy": 0.6515529453754425, "num_tokens": 102435141.0, "step": 614 }, { "entropy": 1.8226731022198994, "epoch": 0.0675620004943561, "grad_norm": 0.8500895500183105, "learning_rate": 1.9999360670845314e-05, "loss": 1.5447, "mean_token_accuracy": 0.6447988549868265, "num_tokens": 102575866.0, "step": 615 }, { "entropy": 1.8276772399743397, "epoch": 0.06767185740572904, "grad_norm": 1.0635449886322021, "learning_rate": 1.9999341444206133e-05, "loss": 1.321, "mean_token_accuracy": 0.6598222802082697, "num_tokens": 102710803.0, "step": 616 }, { "entropy": 1.7802319824695587, "epoch": 0.06778171431710198, "grad_norm": 0.7771666646003723, "learning_rate": 1.999932193274185e-05, "loss": 1.3659, "mean_token_accuracy": 0.6599055776993433, "num_tokens": 102828396.0, "step": 617 }, { "entropy": 1.7876626749833424, "epoch": 0.06789157122847492, "grad_norm": 4.962174892425537, "learning_rate": 1.9999302136453083e-05, "loss": 1.4445, "mean_token_accuracy": 0.6381362775961558, "num_tokens": 103047628.0, "step": 618 }, { "entropy": 1.8322969575723012, "epoch": 0.06800142813984784, "grad_norm": 0.9945211410522461, "learning_rate": 1.999928205534046e-05, "loss": 1.4898, "mean_token_accuracy": 0.6348124096790949, "num_tokens": 103266857.0, "step": 619 }, { "entropy": 1.7776095767815907, "epoch": 0.06811128505122079, "grad_norm": 0.9018236994743347, "learning_rate": 1.9999261689404615e-05, "loss": 1.3107, "mean_token_accuracy": 0.6572676748037338, "num_tokens": 103398868.0, "step": 620 }, { "entropy": 1.8384911715984344, "epoch": 0.06822114196259373, "grad_norm": 0.7996423244476318, "learning_rate": 1.9999241038646192e-05, "loss": 1.5596, "mean_token_accuracy": 0.6426488012075424, "num_tokens": 103562795.0, "step": 621 }, { "entropy": 1.790726323922475, "epoch": 0.06833099887396665, "grad_norm": 0.7970197796821594, "learning_rate": 1.9999220103065845e-05, "loss": 1.4247, "mean_token_accuracy": 0.6529985020558039, "num_tokens": 103716669.0, "step": 622 }, { "entropy": 1.8390385210514069, "epoch": 0.0684408557853396, "grad_norm": 0.7664891481399536, "learning_rate": 1.9999198882664236e-05, "loss": 1.4072, "mean_token_accuracy": 0.6501214305559794, "num_tokens": 103860168.0, "step": 623 }, { "entropy": 1.8309910794099171, "epoch": 0.06855071269671253, "grad_norm": 0.7682109475135803, "learning_rate": 1.9999177377442042e-05, "loss": 1.4699, "mean_token_accuracy": 0.6459651440382004, "num_tokens": 104010898.0, "step": 624 }, { "entropy": 1.8477097650369008, "epoch": 0.06866056960808548, "grad_norm": 0.9405747056007385, "learning_rate": 1.9999155587399934e-05, "loss": 1.3493, "mean_token_accuracy": 0.6499254653851191, "num_tokens": 104123289.0, "step": 625 }, { "entropy": 1.8265024423599243, "epoch": 0.0687704265194584, "grad_norm": 0.8012916445732117, "learning_rate": 1.999913351253861e-05, "loss": 1.3671, "mean_token_accuracy": 0.6617961873610815, "num_tokens": 104287167.0, "step": 626 }, { "entropy": 1.8607315520445507, "epoch": 0.06888028343083134, "grad_norm": 0.7399889230728149, "learning_rate": 1.999911115285876e-05, "loss": 1.5387, "mean_token_accuracy": 0.6346966524918874, "num_tokens": 104482574.0, "step": 627 }, { "entropy": 1.8020449976126354, "epoch": 0.06899014034220428, "grad_norm": 0.6279281377792358, "learning_rate": 1.9999088508361104e-05, "loss": 1.4417, "mean_token_accuracy": 0.6576824982961019, "num_tokens": 104674006.0, "step": 628 }, { "entropy": 1.7799376746018727, "epoch": 0.06909999725357721, "grad_norm": 0.705096960067749, "learning_rate": 1.999906557904635e-05, "loss": 1.5049, "mean_token_accuracy": 0.6500988801320394, "num_tokens": 104865258.0, "step": 629 }, { "entropy": 1.8464046518007915, "epoch": 0.06920985416495015, "grad_norm": 0.7601498365402222, "learning_rate": 1.9999042364915222e-05, "loss": 1.372, "mean_token_accuracy": 0.6506158063809077, "num_tokens": 104993966.0, "step": 630 }, { "entropy": 1.8217159807682037, "epoch": 0.06931971107632309, "grad_norm": 0.6956254243850708, "learning_rate": 1.9999018865968462e-05, "loss": 1.363, "mean_token_accuracy": 0.656999429066976, "num_tokens": 105120668.0, "step": 631 }, { "entropy": 1.880683700243632, "epoch": 0.06942956798769602, "grad_norm": 0.792460560798645, "learning_rate": 1.999899508220681e-05, "loss": 1.4723, "mean_token_accuracy": 0.6231094797452291, "num_tokens": 105278220.0, "step": 632 }, { "entropy": 1.9088138242562611, "epoch": 0.06953942489906896, "grad_norm": 0.7916814088821411, "learning_rate": 1.9998971013631017e-05, "loss": 1.5356, "mean_token_accuracy": 0.6298377265532812, "num_tokens": 105413384.0, "step": 633 }, { "entropy": 1.7535264392693837, "epoch": 0.0696492818104419, "grad_norm": 1.0666766166687012, "learning_rate": 1.9998946660241845e-05, "loss": 1.3734, "mean_token_accuracy": 0.6519220570723215, "num_tokens": 105636692.0, "step": 634 }, { "entropy": 1.762801597515742, "epoch": 0.06975913872181484, "grad_norm": 0.6462137699127197, "learning_rate": 1.9998922022040068e-05, "loss": 1.3319, "mean_token_accuracy": 0.6682475755612055, "num_tokens": 105776185.0, "step": 635 }, { "entropy": 1.802270730336507, "epoch": 0.06986899563318777, "grad_norm": 0.6272945404052734, "learning_rate": 1.9998897099026464e-05, "loss": 1.5092, "mean_token_accuracy": 0.6357052127520243, "num_tokens": 105943190.0, "step": 636 }, { "entropy": 1.7483725647131603, "epoch": 0.06997885254456071, "grad_norm": 0.6791301369667053, "learning_rate": 1.9998871891201822e-05, "loss": 1.3783, "mean_token_accuracy": 0.6612305889527003, "num_tokens": 106072514.0, "step": 637 }, { "entropy": 1.7557042439778645, "epoch": 0.07008870945593365, "grad_norm": 0.6274598836898804, "learning_rate": 1.9998846398566937e-05, "loss": 1.4067, "mean_token_accuracy": 0.636664499839147, "num_tokens": 106315956.0, "step": 638 }, { "entropy": 1.851299395163854, "epoch": 0.07019856636730658, "grad_norm": 0.8829707503318787, "learning_rate": 1.9998820621122623e-05, "loss": 1.5645, "mean_token_accuracy": 0.6381447166204453, "num_tokens": 106515312.0, "step": 639 }, { "entropy": 1.7740463018417358, "epoch": 0.07030842327867952, "grad_norm": 0.6600044369697571, "learning_rate": 1.999879455886969e-05, "loss": 1.4202, "mean_token_accuracy": 0.6525014142195383, "num_tokens": 106734565.0, "step": 640 }, { "entropy": 1.808898796637853, "epoch": 0.07041828019005246, "grad_norm": 0.7501150965690613, "learning_rate": 1.9998768211808962e-05, "loss": 1.5897, "mean_token_accuracy": 0.6105268895626068, "num_tokens": 106986985.0, "step": 641 }, { "entropy": 1.7719741264979045, "epoch": 0.07052813710142539, "grad_norm": 0.9000745415687561, "learning_rate": 1.9998741579941278e-05, "loss": 1.4318, "mean_token_accuracy": 0.6551828881104788, "num_tokens": 107135180.0, "step": 642 }, { "entropy": 1.8894204199314117, "epoch": 0.07063799401279833, "grad_norm": 0.8245925307273865, "learning_rate": 1.9998714663267476e-05, "loss": 1.461, "mean_token_accuracy": 0.6333466370900472, "num_tokens": 107343264.0, "step": 643 }, { "entropy": 1.8361808558305104, "epoch": 0.07074785092417127, "grad_norm": 0.8316481709480286, "learning_rate": 1.999868746178841e-05, "loss": 1.3824, "mean_token_accuracy": 0.657560924688975, "num_tokens": 107472183.0, "step": 644 }, { "entropy": 1.7973891297976177, "epoch": 0.07085770783554421, "grad_norm": 0.6126974821090698, "learning_rate": 1.999865997550494e-05, "loss": 1.5521, "mean_token_accuracy": 0.6135254551966985, "num_tokens": 107694331.0, "step": 645 }, { "entropy": 1.8026204109191895, "epoch": 0.07096756474691714, "grad_norm": 0.6637122631072998, "learning_rate": 1.9998632204417937e-05, "loss": 1.3808, "mean_token_accuracy": 0.6509968092044195, "num_tokens": 107878484.0, "step": 646 }, { "entropy": 1.7910412450631459, "epoch": 0.07107742165829008, "grad_norm": 0.6697704195976257, "learning_rate": 1.9998604148528284e-05, "loss": 1.658, "mean_token_accuracy": 0.6217399090528488, "num_tokens": 108061157.0, "step": 647 }, { "entropy": 1.7230544984340668, "epoch": 0.07118727856966302, "grad_norm": 0.6764496564865112, "learning_rate": 1.999857580783686e-05, "loss": 1.3284, "mean_token_accuracy": 0.6676372985045115, "num_tokens": 108202427.0, "step": 648 }, { "entropy": 1.7782044510046642, "epoch": 0.07129713548103594, "grad_norm": 0.7073798179626465, "learning_rate": 1.9998547182344564e-05, "loss": 1.4346, "mean_token_accuracy": 0.6538479775190353, "num_tokens": 108361157.0, "step": 649 }, { "entropy": 1.7819972435633342, "epoch": 0.07140699239240889, "grad_norm": 0.6922680735588074, "learning_rate": 1.999851827205231e-05, "loss": 1.3826, "mean_token_accuracy": 0.6540144383907318, "num_tokens": 108528441.0, "step": 650 }, { "entropy": 1.7943304777145386, "epoch": 0.07151684930378183, "grad_norm": 0.6376049518585205, "learning_rate": 1.9998489076961005e-05, "loss": 1.4412, "mean_token_accuracy": 0.636512354016304, "num_tokens": 108738071.0, "step": 651 }, { "entropy": 1.85460830728213, "epoch": 0.07162670621515477, "grad_norm": 0.8236610889434814, "learning_rate": 1.999845959707158e-05, "loss": 1.4868, "mean_token_accuracy": 0.6557626873254776, "num_tokens": 108885666.0, "step": 652 }, { "entropy": 1.7542377909024556, "epoch": 0.0717365631265277, "grad_norm": 0.7607982754707336, "learning_rate": 1.9998429832384953e-05, "loss": 1.3098, "mean_token_accuracy": 0.6667990038792292, "num_tokens": 109009005.0, "step": 653 }, { "entropy": 1.7943703830242157, "epoch": 0.07184642003790064, "grad_norm": 0.6870585680007935, "learning_rate": 1.9998399782902083e-05, "loss": 1.4376, "mean_token_accuracy": 0.6404406229654948, "num_tokens": 109180672.0, "step": 654 }, { "entropy": 1.81678972641627, "epoch": 0.07195627694927358, "grad_norm": 0.9632508158683777, "learning_rate": 1.9998369448623916e-05, "loss": 1.2715, "mean_token_accuracy": 0.6628138174613317, "num_tokens": 109287099.0, "step": 655 }, { "entropy": 1.7701709270477295, "epoch": 0.0720661338606465, "grad_norm": 0.941100001335144, "learning_rate": 1.999833882955141e-05, "loss": 1.367, "mean_token_accuracy": 0.648131713271141, "num_tokens": 109474542.0, "step": 656 }, { "entropy": 1.821062793334325, "epoch": 0.07217599077201944, "grad_norm": 0.6249318718910217, "learning_rate": 1.9998307925685534e-05, "loss": 1.3544, "mean_token_accuracy": 0.6468546291192373, "num_tokens": 109606382.0, "step": 657 }, { "entropy": 1.8237617115179698, "epoch": 0.07228584768339238, "grad_norm": 0.6935443878173828, "learning_rate": 1.9998276737027266e-05, "loss": 1.4338, "mean_token_accuracy": 0.6507659604152044, "num_tokens": 109770865.0, "step": 658 }, { "entropy": 1.8277263343334198, "epoch": 0.07239570459476531, "grad_norm": 0.6764071583747864, "learning_rate": 1.9998245263577596e-05, "loss": 1.5816, "mean_token_accuracy": 0.6434828341007233, "num_tokens": 109976506.0, "step": 659 }, { "entropy": 1.8320674896240234, "epoch": 0.07250556150613825, "grad_norm": 0.7720675468444824, "learning_rate": 1.999821350533752e-05, "loss": 1.64, "mean_token_accuracy": 0.6294133017460505, "num_tokens": 110132121.0, "step": 660 }, { "entropy": 1.7681880195935566, "epoch": 0.0726154184175112, "grad_norm": 0.6684180498123169, "learning_rate": 1.9998181462308037e-05, "loss": 1.4463, "mean_token_accuracy": 0.6456720034281412, "num_tokens": 110322861.0, "step": 661 }, { "entropy": 1.8428928156693776, "epoch": 0.07272527532888413, "grad_norm": 0.7397868633270264, "learning_rate": 1.9998149134490165e-05, "loss": 1.3386, "mean_token_accuracy": 0.6623791555563608, "num_tokens": 110483291.0, "step": 662 }, { "entropy": 1.7374838987986247, "epoch": 0.07283513224025706, "grad_norm": 0.6700147390365601, "learning_rate": 1.999811652188493e-05, "loss": 1.5732, "mean_token_accuracy": 0.6497671554485956, "num_tokens": 110678771.0, "step": 663 }, { "entropy": 1.8194133241971333, "epoch": 0.07294498915163, "grad_norm": 0.6404582858085632, "learning_rate": 1.999808362449336e-05, "loss": 1.623, "mean_token_accuracy": 0.6257813026507696, "num_tokens": 110835416.0, "step": 664 }, { "entropy": 1.7887710829575856, "epoch": 0.07305484606300294, "grad_norm": 1.0074845552444458, "learning_rate": 1.9998050442316503e-05, "loss": 1.4084, "mean_token_accuracy": 0.651343877116839, "num_tokens": 111000969.0, "step": 665 }, { "entropy": 1.764528065919876, "epoch": 0.07316470297437587, "grad_norm": 0.9937914609909058, "learning_rate": 1.9998016975355397e-05, "loss": 1.2696, "mean_token_accuracy": 0.6718964874744415, "num_tokens": 111115363.0, "step": 666 }, { "entropy": 1.8262263238430023, "epoch": 0.07327455988574881, "grad_norm": 0.8130075335502625, "learning_rate": 1.9997983223611112e-05, "loss": 1.4068, "mean_token_accuracy": 0.6590779721736908, "num_tokens": 111280328.0, "step": 667 }, { "entropy": 1.830139935016632, "epoch": 0.07338441679712175, "grad_norm": 0.7358323931694031, "learning_rate": 1.999794918708471e-05, "loss": 1.4667, "mean_token_accuracy": 0.6425051589806875, "num_tokens": 111466818.0, "step": 668 }, { "entropy": 1.8468297918637593, "epoch": 0.07349427370849469, "grad_norm": 0.8599507808685303, "learning_rate": 1.9997914865777273e-05, "loss": 1.3408, "mean_token_accuracy": 0.6542004893223444, "num_tokens": 111602406.0, "step": 669 }, { "entropy": 1.7656611204147339, "epoch": 0.07360413061986762, "grad_norm": 0.7185338735580444, "learning_rate": 1.9997880259689886e-05, "loss": 1.3936, "mean_token_accuracy": 0.6685215532779694, "num_tokens": 111751172.0, "step": 670 }, { "entropy": 1.7852209508419037, "epoch": 0.07371398753124056, "grad_norm": 0.7217685580253601, "learning_rate": 1.999784536882364e-05, "loss": 1.5029, "mean_token_accuracy": 0.6421723713477453, "num_tokens": 111929666.0, "step": 671 }, { "entropy": 1.8410307069619496, "epoch": 0.0738238444426135, "grad_norm": 0.6868441104888916, "learning_rate": 1.9997810193179647e-05, "loss": 1.4308, "mean_token_accuracy": 0.6462933719158173, "num_tokens": 112079312.0, "step": 672 }, { "entropy": 1.7862571875254314, "epoch": 0.07393370135398643, "grad_norm": 0.7277688980102539, "learning_rate": 1.9997774732759013e-05, "loss": 1.4031, "mean_token_accuracy": 0.647498811284701, "num_tokens": 112248478.0, "step": 673 }, { "entropy": 1.8328973750273387, "epoch": 0.07404355826535937, "grad_norm": 0.8798831105232239, "learning_rate": 1.9997738987562866e-05, "loss": 1.5971, "mean_token_accuracy": 0.6418457180261612, "num_tokens": 112434647.0, "step": 674 }, { "entropy": 1.821668843428294, "epoch": 0.07415341517673231, "grad_norm": 0.749891459941864, "learning_rate": 1.999770295759233e-05, "loss": 1.3465, "mean_token_accuracy": 0.654508168498675, "num_tokens": 112564952.0, "step": 675 }, { "entropy": 1.7971191604932149, "epoch": 0.07426327208810524, "grad_norm": 0.8182111978530884, "learning_rate": 1.9997666642848554e-05, "loss": 1.5722, "mean_token_accuracy": 0.6354757895072302, "num_tokens": 112725586.0, "step": 676 }, { "entropy": 1.8406287133693695, "epoch": 0.07437312899947818, "grad_norm": 0.8844257593154907, "learning_rate": 1.999763004333268e-05, "loss": 1.2809, "mean_token_accuracy": 0.6689592599868774, "num_tokens": 112853047.0, "step": 677 }, { "entropy": 1.8012821773688, "epoch": 0.07448298591085112, "grad_norm": 0.5953492522239685, "learning_rate": 1.9997593159045873e-05, "loss": 1.5063, "mean_token_accuracy": 0.6295547236998876, "num_tokens": 113080741.0, "step": 678 }, { "entropy": 1.8217666645844777, "epoch": 0.07459284282222406, "grad_norm": 220.8173370361328, "learning_rate": 1.9997555989989293e-05, "loss": 1.573, "mean_token_accuracy": 0.6443419431646665, "num_tokens": 113273229.0, "step": 679 }, { "entropy": 1.783298095067342, "epoch": 0.07470269973359699, "grad_norm": 0.668280303478241, "learning_rate": 1.9997518536164123e-05, "loss": 1.3078, "mean_token_accuracy": 0.6795926441748937, "num_tokens": 113430145.0, "step": 680 }, { "entropy": 1.7551721433798473, "epoch": 0.07481255664496993, "grad_norm": 0.7069210410118103, "learning_rate": 1.9997480797571547e-05, "loss": 1.3379, "mean_token_accuracy": 0.6589397639036179, "num_tokens": 113588815.0, "step": 681 }, { "entropy": 1.8191516598065693, "epoch": 0.07492241355634287, "grad_norm": 0.610933244228363, "learning_rate": 1.9997442774212753e-05, "loss": 1.4519, "mean_token_accuracy": 0.6368223875761032, "num_tokens": 113777021.0, "step": 682 }, { "entropy": 1.8061704138914745, "epoch": 0.0750322704677158, "grad_norm": 0.7445184588432312, "learning_rate": 1.9997404466088953e-05, "loss": 1.4369, "mean_token_accuracy": 0.6480630288521448, "num_tokens": 113933174.0, "step": 683 }, { "entropy": 1.7527543703715007, "epoch": 0.07514212737908874, "grad_norm": 0.6358804702758789, "learning_rate": 1.9997365873201356e-05, "loss": 1.4386, "mean_token_accuracy": 0.6501162797212601, "num_tokens": 114121410.0, "step": 684 }, { "entropy": 1.799436867237091, "epoch": 0.07525198429046168, "grad_norm": 0.8749188780784607, "learning_rate": 1.9997326995551183e-05, "loss": 1.5263, "mean_token_accuracy": 0.6480574657519659, "num_tokens": 114282873.0, "step": 685 }, { "entropy": 1.7453529338041942, "epoch": 0.07536184120183462, "grad_norm": 0.7320595979690552, "learning_rate": 1.9997287833139666e-05, "loss": 1.5296, "mean_token_accuracy": 0.657763327161471, "num_tokens": 114508923.0, "step": 686 }, { "entropy": 1.7952920198440552, "epoch": 0.07547169811320754, "grad_norm": 0.6980917453765869, "learning_rate": 1.9997248385968042e-05, "loss": 1.4428, "mean_token_accuracy": 0.6694839894771576, "num_tokens": 114681044.0, "step": 687 }, { "entropy": 1.7734041313330333, "epoch": 0.07558155502458049, "grad_norm": 0.6694082617759705, "learning_rate": 1.999720865403756e-05, "loss": 1.4143, "mean_token_accuracy": 0.6455632597208023, "num_tokens": 114862846.0, "step": 688 }, { "entropy": 1.8399201730887096, "epoch": 0.07569141193595343, "grad_norm": 0.7080643773078918, "learning_rate": 1.999716863734948e-05, "loss": 1.4556, "mean_token_accuracy": 0.6443889985481898, "num_tokens": 115039990.0, "step": 689 }, { "entropy": 1.7809200982252757, "epoch": 0.07580126884732635, "grad_norm": 0.6346096396446228, "learning_rate": 1.9997128335905066e-05, "loss": 1.4591, "mean_token_accuracy": 0.6494338313738505, "num_tokens": 115206329.0, "step": 690 }, { "entropy": 1.8355284929275513, "epoch": 0.0759111257586993, "grad_norm": 0.7674968242645264, "learning_rate": 1.9997087749705595e-05, "loss": 1.4145, "mean_token_accuracy": 0.6622882982095083, "num_tokens": 115352467.0, "step": 691 }, { "entropy": 1.7761068443457286, "epoch": 0.07602098267007223, "grad_norm": 0.6778233647346497, "learning_rate": 1.999704687875235e-05, "loss": 1.4126, "mean_token_accuracy": 0.658729096253713, "num_tokens": 115559309.0, "step": 692 }, { "entropy": 1.772262881199519, "epoch": 0.07613083958144516, "grad_norm": 0.7021632194519043, "learning_rate": 1.9997005723046628e-05, "loss": 1.4743, "mean_token_accuracy": 0.6380442132552465, "num_tokens": 115747604.0, "step": 693 }, { "entropy": 1.7603289783000946, "epoch": 0.0762406964928181, "grad_norm": 0.7222464680671692, "learning_rate": 1.9996964282589724e-05, "loss": 1.4163, "mean_token_accuracy": 0.6446563949187597, "num_tokens": 115907827.0, "step": 694 }, { "entropy": 1.841255287329356, "epoch": 0.07635055340419104, "grad_norm": 0.6588021516799927, "learning_rate": 1.999692255738296e-05, "loss": 1.4226, "mean_token_accuracy": 0.6450923730929693, "num_tokens": 116087244.0, "step": 695 }, { "entropy": 1.7898233930269878, "epoch": 0.07646041031556398, "grad_norm": 0.5840282440185547, "learning_rate": 1.999688054742765e-05, "loss": 1.4013, "mean_token_accuracy": 0.6547726740439733, "num_tokens": 116257019.0, "step": 696 }, { "entropy": 1.7955358823140461, "epoch": 0.07657026722693691, "grad_norm": 0.729434609413147, "learning_rate": 1.9996838252725123e-05, "loss": 1.3782, "mean_token_accuracy": 0.6612616926431656, "num_tokens": 116414138.0, "step": 697 }, { "entropy": 1.8139538665612538, "epoch": 0.07668012413830985, "grad_norm": 0.6815258264541626, "learning_rate": 1.999679567327672e-05, "loss": 1.4255, "mean_token_accuracy": 0.6536463449398676, "num_tokens": 116605641.0, "step": 698 }, { "entropy": 1.8517298797766368, "epoch": 0.07678998104968279, "grad_norm": 0.7695915102958679, "learning_rate": 1.9996752809083788e-05, "loss": 1.4368, "mean_token_accuracy": 0.6444319734970728, "num_tokens": 116753233.0, "step": 699 }, { "entropy": 1.746784100929896, "epoch": 0.07689983796105572, "grad_norm": 0.6605033278465271, "learning_rate": 1.9996709660147683e-05, "loss": 1.4952, "mean_token_accuracy": 0.6499835948149363, "num_tokens": 116916082.0, "step": 700 }, { "entropy": 1.7918027838071187, "epoch": 0.07700969487242866, "grad_norm": 0.7656397819519043, "learning_rate": 1.999666622646977e-05, "loss": 1.6114, "mean_token_accuracy": 0.6382357329130173, "num_tokens": 117116194.0, "step": 701 }, { "entropy": 1.7874523599942524, "epoch": 0.0771195517838016, "grad_norm": 0.6298776268959045, "learning_rate": 1.999662250805143e-05, "loss": 1.3409, "mean_token_accuracy": 0.6549411416053772, "num_tokens": 117255116.0, "step": 702 }, { "entropy": 1.8126642107963562, "epoch": 0.07722940869517453, "grad_norm": 0.9679374694824219, "learning_rate": 1.9996578504894037e-05, "loss": 1.3772, "mean_token_accuracy": 0.6570501724878947, "num_tokens": 117399980.0, "step": 703 }, { "entropy": 1.8066406548023224, "epoch": 0.07733926560654747, "grad_norm": 0.6479452252388, "learning_rate": 1.999653421699899e-05, "loss": 1.6135, "mean_token_accuracy": 0.6204476977388064, "num_tokens": 117653352.0, "step": 704 }, { "entropy": 1.746168166399002, "epoch": 0.07744912251792041, "grad_norm": 0.8578298091888428, "learning_rate": 1.9996489644367688e-05, "loss": 1.4505, "mean_token_accuracy": 0.6426874895890554, "num_tokens": 117816334.0, "step": 705 }, { "entropy": 1.729866623878479, "epoch": 0.07755897942929335, "grad_norm": 0.766589343547821, "learning_rate": 1.999644478700154e-05, "loss": 1.3927, "mean_token_accuracy": 0.6578174283107122, "num_tokens": 118010653.0, "step": 706 }, { "entropy": 1.7975072264671326, "epoch": 0.07766883634066628, "grad_norm": 0.9121028780937195, "learning_rate": 1.9996399644901976e-05, "loss": 1.5192, "mean_token_accuracy": 0.6352412700653076, "num_tokens": 118195093.0, "step": 707 }, { "entropy": 1.8069658875465393, "epoch": 0.07777869325203922, "grad_norm": 0.6980465650558472, "learning_rate": 1.999635421807041e-05, "loss": 1.4175, "mean_token_accuracy": 0.6498169104258219, "num_tokens": 118338858.0, "step": 708 }, { "entropy": 1.8504317104816437, "epoch": 0.07788855016341216, "grad_norm": 0.9060118794441223, "learning_rate": 1.999630850650829e-05, "loss": 1.4035, "mean_token_accuracy": 0.6471539338429769, "num_tokens": 118444002.0, "step": 709 }, { "entropy": 1.829773207505544, "epoch": 0.07799840707478509, "grad_norm": 0.7004634141921997, "learning_rate": 1.9996262510217058e-05, "loss": 1.3789, "mean_token_accuracy": 0.6589366098244985, "num_tokens": 118603170.0, "step": 710 }, { "entropy": 1.7559443612893422, "epoch": 0.07810826398615803, "grad_norm": 0.6373770236968994, "learning_rate": 1.9996216229198175e-05, "loss": 1.2677, "mean_token_accuracy": 0.680022269487381, "num_tokens": 118757923.0, "step": 711 }, { "entropy": 1.805822531382243, "epoch": 0.07821812089753097, "grad_norm": 0.7866727709770203, "learning_rate": 1.9996169663453096e-05, "loss": 1.5077, "mean_token_accuracy": 0.6585622876882553, "num_tokens": 119017661.0, "step": 712 }, { "entropy": 1.7577200531959534, "epoch": 0.07832797780890391, "grad_norm": 0.7266113758087158, "learning_rate": 1.9996122812983307e-05, "loss": 1.49, "mean_token_accuracy": 0.6384626974662145, "num_tokens": 119187084.0, "step": 713 }, { "entropy": 1.8151898682117462, "epoch": 0.07843783472027684, "grad_norm": 0.8451136350631714, "learning_rate": 1.9996075677790284e-05, "loss": 1.3513, "mean_token_accuracy": 0.6595414280891418, "num_tokens": 119316009.0, "step": 714 }, { "entropy": 1.7623566885789235, "epoch": 0.07854769163164978, "grad_norm": 0.8786435127258301, "learning_rate": 1.9996028257875518e-05, "loss": 1.2819, "mean_token_accuracy": 0.667223796248436, "num_tokens": 119478080.0, "step": 715 }, { "entropy": 1.793608546257019, "epoch": 0.07865754854302272, "grad_norm": 0.7934389114379883, "learning_rate": 1.999598055324051e-05, "loss": 1.4843, "mean_token_accuracy": 0.6354232827822367, "num_tokens": 119646813.0, "step": 716 }, { "entropy": 1.7802114486694336, "epoch": 0.07876740545439564, "grad_norm": 0.701699435710907, "learning_rate": 1.9995932563886774e-05, "loss": 1.5294, "mean_token_accuracy": 0.6498485853274664, "num_tokens": 119826582.0, "step": 717 }, { "entropy": 1.7280071278413136, "epoch": 0.07887726236576859, "grad_norm": 0.7847176194190979, "learning_rate": 1.9995884289815822e-05, "loss": 1.2134, "mean_token_accuracy": 0.6839973479509354, "num_tokens": 119938664.0, "step": 718 }, { "entropy": 1.8675900995731354, "epoch": 0.07898711927714153, "grad_norm": 0.9124090671539307, "learning_rate": 1.9995835731029188e-05, "loss": 1.3989, "mean_token_accuracy": 0.6580939839283625, "num_tokens": 120071040.0, "step": 719 }, { "entropy": 1.7469736437002819, "epoch": 0.07909697618851445, "grad_norm": 0.6930084228515625, "learning_rate": 1.999578688752841e-05, "loss": 1.4251, "mean_token_accuracy": 0.6595380107561747, "num_tokens": 120280438.0, "step": 720 }, { "entropy": 1.8206437130769093, "epoch": 0.0792068330998874, "grad_norm": 0.7369340062141418, "learning_rate": 1.9995737759315025e-05, "loss": 1.5097, "mean_token_accuracy": 0.6350439786911011, "num_tokens": 120485289.0, "step": 721 }, { "entropy": 1.7503860990206401, "epoch": 0.07931669001126034, "grad_norm": 0.7530749440193176, "learning_rate": 1.99956883463906e-05, "loss": 1.4151, "mean_token_accuracy": 0.6404246886571249, "num_tokens": 120689052.0, "step": 722 }, { "entropy": 1.7276439766089122, "epoch": 0.07942654692263328, "grad_norm": 0.8877029418945312, "learning_rate": 1.9995638648756686e-05, "loss": 1.2943, "mean_token_accuracy": 0.6667290230592092, "num_tokens": 120798819.0, "step": 723 }, { "entropy": 1.7703753213087718, "epoch": 0.0795364038340062, "grad_norm": 0.7141546607017517, "learning_rate": 1.9995588666414866e-05, "loss": 1.4063, "mean_token_accuracy": 0.6415145248174667, "num_tokens": 120975866.0, "step": 724 }, { "entropy": 1.8610213895638783, "epoch": 0.07964626074537914, "grad_norm": 0.8251237869262695, "learning_rate": 1.9995538399366716e-05, "loss": 1.5718, "mean_token_accuracy": 0.6517770936091741, "num_tokens": 121107698.0, "step": 725 }, { "entropy": 1.7903367479642232, "epoch": 0.07975611765675208, "grad_norm": 0.7166335582733154, "learning_rate": 1.9995487847613832e-05, "loss": 1.5287, "mean_token_accuracy": 0.633270596464475, "num_tokens": 121295774.0, "step": 726 }, { "entropy": 1.7588022152582805, "epoch": 0.07986597456812501, "grad_norm": 0.723822832107544, "learning_rate": 1.9995437011157805e-05, "loss": 1.7178, "mean_token_accuracy": 0.6282697518666586, "num_tokens": 121554849.0, "step": 727 }, { "entropy": 1.8302726646264393, "epoch": 0.07997583147949795, "grad_norm": 0.7193813920021057, "learning_rate": 1.9995385890000256e-05, "loss": 1.4198, "mean_token_accuracy": 0.6537833462158839, "num_tokens": 121708057.0, "step": 728 }, { "entropy": 1.8833401600519817, "epoch": 0.0800856883908709, "grad_norm": 0.896878182888031, "learning_rate": 1.9995334484142797e-05, "loss": 1.4591, "mean_token_accuracy": 0.6470515926678976, "num_tokens": 121871319.0, "step": 729 }, { "entropy": 1.8520794709523518, "epoch": 0.08019554530224383, "grad_norm": 0.6501368284225464, "learning_rate": 1.999528279358705e-05, "loss": 1.5772, "mean_token_accuracy": 0.6148606240749359, "num_tokens": 122104472.0, "step": 730 }, { "entropy": 1.8304372231165569, "epoch": 0.08030540221361676, "grad_norm": 0.7300158143043518, "learning_rate": 1.9995230818334665e-05, "loss": 1.5162, "mean_token_accuracy": 0.6417268216609955, "num_tokens": 122318282.0, "step": 731 }, { "entropy": 1.8412455519040425, "epoch": 0.0804152591249897, "grad_norm": 0.7814407348632812, "learning_rate": 1.9995178558387268e-05, "loss": 1.5187, "mean_token_accuracy": 0.6483007321755091, "num_tokens": 122441137.0, "step": 732 }, { "entropy": 1.8073117434978485, "epoch": 0.08052511603636264, "grad_norm": 0.761740505695343, "learning_rate": 1.9995126013746527e-05, "loss": 1.436, "mean_token_accuracy": 0.6581438233455023, "num_tokens": 122574227.0, "step": 733 }, { "entropy": 1.7240748008092244, "epoch": 0.08063497294773557, "grad_norm": 0.6440936326980591, "learning_rate": 1.9995073184414103e-05, "loss": 1.31, "mean_token_accuracy": 0.6681271195411682, "num_tokens": 122704629.0, "step": 734 }, { "entropy": 1.8669129113356273, "epoch": 0.08074482985910851, "grad_norm": 0.8220887184143066, "learning_rate": 1.9995020070391666e-05, "loss": 1.5414, "mean_token_accuracy": 0.645127202073733, "num_tokens": 122870109.0, "step": 735 }, { "entropy": 1.767956554889679, "epoch": 0.08085468677048145, "grad_norm": 0.8655692338943481, "learning_rate": 1.9994966671680892e-05, "loss": 1.3607, "mean_token_accuracy": 0.6563311517238617, "num_tokens": 122991223.0, "step": 736 }, { "entropy": 1.8697227636973064, "epoch": 0.08096454368185438, "grad_norm": 0.8299003839492798, "learning_rate": 1.999491298828348e-05, "loss": 1.4838, "mean_token_accuracy": 0.6484878609577814, "num_tokens": 123119953.0, "step": 737 }, { "entropy": 1.8050644993782043, "epoch": 0.08107440059322732, "grad_norm": 0.9768658876419067, "learning_rate": 1.9994859020201124e-05, "loss": 1.301, "mean_token_accuracy": 0.6755692362785339, "num_tokens": 123261135.0, "step": 738 }, { "entropy": 1.7627781132857006, "epoch": 0.08118425750460026, "grad_norm": 0.846538245677948, "learning_rate": 1.9994804767435535e-05, "loss": 1.368, "mean_token_accuracy": 0.6451980670293173, "num_tokens": 123420991.0, "step": 739 }, { "entropy": 1.8099198838075001, "epoch": 0.0812941144159732, "grad_norm": 0.6865768432617188, "learning_rate": 1.9994750229988426e-05, "loss": 1.3777, "mean_token_accuracy": 0.6596326579650243, "num_tokens": 123562426.0, "step": 740 }, { "entropy": 1.7973152299722035, "epoch": 0.08140397132734613, "grad_norm": 0.6900340914726257, "learning_rate": 1.9994695407861526e-05, "loss": 1.4664, "mean_token_accuracy": 0.6281344542900721, "num_tokens": 123751162.0, "step": 741 }, { "entropy": 1.796221762895584, "epoch": 0.08151382823871907, "grad_norm": 0.661390483379364, "learning_rate": 1.999464030105657e-05, "loss": 1.6038, "mean_token_accuracy": 0.6439404537280401, "num_tokens": 123962173.0, "step": 742 }, { "entropy": 1.8211529751618702, "epoch": 0.08162368515009201, "grad_norm": 0.838100254535675, "learning_rate": 1.99945849095753e-05, "loss": 1.5889, "mean_token_accuracy": 0.6407252550125122, "num_tokens": 124092071.0, "step": 743 }, { "entropy": 1.7539305289586384, "epoch": 0.08173354206146494, "grad_norm": 0.7083438038825989, "learning_rate": 1.999452923341947e-05, "loss": 1.4526, "mean_token_accuracy": 0.661969467997551, "num_tokens": 124272313.0, "step": 744 }, { "entropy": 1.8283264338970184, "epoch": 0.08184339897283788, "grad_norm": 0.9990186095237732, "learning_rate": 1.9994473272590848e-05, "loss": 1.5733, "mean_token_accuracy": 0.6551229556401571, "num_tokens": 124408723.0, "step": 745 }, { "entropy": 1.7998952567577362, "epoch": 0.08195325588421082, "grad_norm": 0.9326064586639404, "learning_rate": 1.9994417027091193e-05, "loss": 1.3575, "mean_token_accuracy": 0.6747980813185374, "num_tokens": 124546937.0, "step": 746 }, { "entropy": 1.8038958807786305, "epoch": 0.08206311279558375, "grad_norm": 0.6932543516159058, "learning_rate": 1.9994360496922297e-05, "loss": 1.4123, "mean_token_accuracy": 0.6614984820286433, "num_tokens": 124690925.0, "step": 747 }, { "entropy": 1.8390637238820393, "epoch": 0.08217296970695669, "grad_norm": 0.9938632845878601, "learning_rate": 1.9994303682085946e-05, "loss": 1.4262, "mean_token_accuracy": 0.6745589772860209, "num_tokens": 124849459.0, "step": 748 }, { "entropy": 1.7794020473957062, "epoch": 0.08228282661832963, "grad_norm": 0.7585030198097229, "learning_rate": 1.999424658258393e-05, "loss": 1.5437, "mean_token_accuracy": 0.63681960105896, "num_tokens": 125045326.0, "step": 749 }, { "entropy": 1.752739042043686, "epoch": 0.08239268352970257, "grad_norm": 0.7272341251373291, "learning_rate": 1.9994189198418067e-05, "loss": 1.5744, "mean_token_accuracy": 0.6451994031667709, "num_tokens": 125215785.0, "step": 750 }, { "entropy": 1.7343992094198863, "epoch": 0.0825025404410755, "grad_norm": 0.8098207712173462, "learning_rate": 1.9994131529590166e-05, "loss": 1.5917, "mean_token_accuracy": 0.6374679381648699, "num_tokens": 125409547.0, "step": 751 }, { "entropy": 1.8226308226585388, "epoch": 0.08261239735244844, "grad_norm": 0.7430676221847534, "learning_rate": 1.9994073576102058e-05, "loss": 1.399, "mean_token_accuracy": 0.6658477435509363, "num_tokens": 125530731.0, "step": 752 }, { "entropy": 1.7828458150227864, "epoch": 0.08272225426382138, "grad_norm": 0.7506123185157776, "learning_rate": 1.999401533795557e-05, "loss": 1.3961, "mean_token_accuracy": 0.6559909929831823, "num_tokens": 125660742.0, "step": 753 }, { "entropy": 1.7625751396020253, "epoch": 0.0828321111751943, "grad_norm": 1.280418038368225, "learning_rate": 1.9993956815152553e-05, "loss": 1.1615, "mean_token_accuracy": 0.6699869732062022, "num_tokens": 125830165.0, "step": 754 }, { "entropy": 1.8572514255841572, "epoch": 0.08294196808656724, "grad_norm": 0.9156613945960999, "learning_rate": 1.9993898007694857e-05, "loss": 1.6035, "mean_token_accuracy": 0.634151021639506, "num_tokens": 125965207.0, "step": 755 }, { "entropy": 1.7810141642888386, "epoch": 0.08305182499794019, "grad_norm": 0.714108407497406, "learning_rate": 1.999383891558434e-05, "loss": 1.3414, "mean_token_accuracy": 0.6630875319242477, "num_tokens": 126126227.0, "step": 756 }, { "entropy": 1.755696713924408, "epoch": 0.08316168190931313, "grad_norm": 0.6537689566612244, "learning_rate": 1.9993779538822873e-05, "loss": 1.46, "mean_token_accuracy": 0.6578283309936523, "num_tokens": 126285094.0, "step": 757 }, { "entropy": 1.8099895517031352, "epoch": 0.08327153882068605, "grad_norm": 0.8549863696098328, "learning_rate": 1.9993719877412333e-05, "loss": 1.3394, "mean_token_accuracy": 0.6538981397946676, "num_tokens": 126461471.0, "step": 758 }, { "entropy": 1.8493448694547017, "epoch": 0.083381395732059, "grad_norm": 0.85292649269104, "learning_rate": 1.9993659931354616e-05, "loss": 1.4327, "mean_token_accuracy": 0.6453457971413931, "num_tokens": 126644623.0, "step": 759 }, { "entropy": 1.8416785299777985, "epoch": 0.08349125264343193, "grad_norm": 0.7345470190048218, "learning_rate": 1.9993599700651612e-05, "loss": 1.4578, "mean_token_accuracy": 0.6381366650263468, "num_tokens": 126832969.0, "step": 760 }, { "entropy": 1.7897752424081166, "epoch": 0.08360110955480486, "grad_norm": 0.6783207654953003, "learning_rate": 1.9993539185305236e-05, "loss": 1.3596, "mean_token_accuracy": 0.6550974746545156, "num_tokens": 127023280.0, "step": 761 }, { "entropy": 1.7872902353604634, "epoch": 0.0837109664661778, "grad_norm": 0.7059661746025085, "learning_rate": 1.9993478385317392e-05, "loss": 1.3619, "mean_token_accuracy": 0.6592389543851217, "num_tokens": 127161642.0, "step": 762 }, { "entropy": 1.8159588476022084, "epoch": 0.08382082337755074, "grad_norm": 0.6670881509780884, "learning_rate": 1.999341730069001e-05, "loss": 1.3591, "mean_token_accuracy": 0.6564019024372101, "num_tokens": 127301579.0, "step": 763 }, { "entropy": 1.7422301471233368, "epoch": 0.08393068028892367, "grad_norm": 0.6835038661956787, "learning_rate": 1.9993355931425026e-05, "loss": 1.3785, "mean_token_accuracy": 0.6575357466936111, "num_tokens": 127444174.0, "step": 764 }, { "entropy": 1.7988096475601196, "epoch": 0.08404053720029661, "grad_norm": 0.6521595120429993, "learning_rate": 1.9993294277524376e-05, "loss": 1.4665, "mean_token_accuracy": 0.6468783915042877, "num_tokens": 127635492.0, "step": 765 }, { "entropy": 1.808186541001002, "epoch": 0.08415039411166955, "grad_norm": 0.8931750655174255, "learning_rate": 1.9993232338990017e-05, "loss": 1.5414, "mean_token_accuracy": 0.642045333981514, "num_tokens": 127793125.0, "step": 766 }, { "entropy": 1.8431545893351238, "epoch": 0.08426025102304249, "grad_norm": 0.7338786125183105, "learning_rate": 1.9993170115823907e-05, "loss": 1.5627, "mean_token_accuracy": 0.6344971805810928, "num_tokens": 127993475.0, "step": 767 }, { "entropy": 1.822217325369517, "epoch": 0.08437010793441542, "grad_norm": 0.7734959721565247, "learning_rate": 1.9993107608028014e-05, "loss": 1.4285, "mean_token_accuracy": 0.6521108448505402, "num_tokens": 128139318.0, "step": 768 }, { "entropy": 1.8207875788211823, "epoch": 0.08447996484578836, "grad_norm": 0.6687442064285278, "learning_rate": 1.9993044815604315e-05, "loss": 1.4348, "mean_token_accuracy": 0.6397057324647903, "num_tokens": 128349536.0, "step": 769 }, { "entropy": 1.776291400194168, "epoch": 0.0845898217571613, "grad_norm": 0.7247187495231628, "learning_rate": 1.9992981738554804e-05, "loss": 1.4005, "mean_token_accuracy": 0.6597668379545212, "num_tokens": 128529318.0, "step": 770 }, { "entropy": 1.8065292338530223, "epoch": 0.08469967866853423, "grad_norm": 0.7673947215080261, "learning_rate": 1.999291837688147e-05, "loss": 1.2979, "mean_token_accuracy": 0.6667843461036682, "num_tokens": 128647932.0, "step": 771 }, { "entropy": 1.8455777664979298, "epoch": 0.08480953557990717, "grad_norm": 0.8621540069580078, "learning_rate": 1.9992854730586328e-05, "loss": 1.3875, "mean_token_accuracy": 0.6518898904323578, "num_tokens": 128806671.0, "step": 772 }, { "entropy": 1.7859807113806407, "epoch": 0.08491939249128011, "grad_norm": 0.806907057762146, "learning_rate": 1.999279079967138e-05, "loss": 1.4682, "mean_token_accuracy": 0.6510076969861984, "num_tokens": 128956643.0, "step": 773 }, { "entropy": 1.8100234270095825, "epoch": 0.08502924940265305, "grad_norm": 0.7432371973991394, "learning_rate": 1.9992726584138654e-05, "loss": 1.4843, "mean_token_accuracy": 0.637720063328743, "num_tokens": 129165449.0, "step": 774 }, { "entropy": 1.810623029867808, "epoch": 0.08513910631402598, "grad_norm": 0.7347936034202576, "learning_rate": 1.999266208399019e-05, "loss": 1.4643, "mean_token_accuracy": 0.6474610765775045, "num_tokens": 129345429.0, "step": 775 }, { "entropy": 1.8380460838476818, "epoch": 0.08524896322539892, "grad_norm": 0.778282642364502, "learning_rate": 1.999259729922802e-05, "loss": 1.364, "mean_token_accuracy": 0.6652749627828598, "num_tokens": 129470095.0, "step": 776 }, { "entropy": 1.733398546775182, "epoch": 0.08535882013677186, "grad_norm": 1.2146008014678955, "learning_rate": 1.9992532229854198e-05, "loss": 1.2652, "mean_token_accuracy": 0.6567486921946207, "num_tokens": 129738611.0, "step": 777 }, { "entropy": 1.7519733210404713, "epoch": 0.08546867704814479, "grad_norm": 0.7072291374206543, "learning_rate": 1.9992466875870783e-05, "loss": 1.3025, "mean_token_accuracy": 0.6597904910643896, "num_tokens": 129854306.0, "step": 778 }, { "entropy": 1.8180581033229828, "epoch": 0.08557853395951773, "grad_norm": 0.9036336541175842, "learning_rate": 1.9992401237279842e-05, "loss": 1.2928, "mean_token_accuracy": 0.6717847138643265, "num_tokens": 129988460.0, "step": 779 }, { "entropy": 1.8475947678089142, "epoch": 0.08568839087089067, "grad_norm": 0.690317690372467, "learning_rate": 1.9992335314083456e-05, "loss": 1.3999, "mean_token_accuracy": 0.6502603391806284, "num_tokens": 130130018.0, "step": 780 }, { "entropy": 1.7420289814472198, "epoch": 0.0857982477822636, "grad_norm": 0.5954359173774719, "learning_rate": 1.999226910628371e-05, "loss": 1.5268, "mean_token_accuracy": 0.6370938271284103, "num_tokens": 130339750.0, "step": 781 }, { "entropy": 1.833437740802765, "epoch": 0.08590810469363654, "grad_norm": 0.972395122051239, "learning_rate": 1.9992202613882697e-05, "loss": 1.4811, "mean_token_accuracy": 0.6551183809836706, "num_tokens": 130484058.0, "step": 782 }, { "entropy": 1.7667591671148937, "epoch": 0.08601796160500948, "grad_norm": 0.7758358716964722, "learning_rate": 1.999213583688252e-05, "loss": 1.3965, "mean_token_accuracy": 0.6554379711548487, "num_tokens": 130646142.0, "step": 783 }, { "entropy": 1.7984866201877594, "epoch": 0.08612781851638242, "grad_norm": 0.8162248134613037, "learning_rate": 1.9992068775285306e-05, "loss": 1.5106, "mean_token_accuracy": 0.6294661909341812, "num_tokens": 130834885.0, "step": 784 }, { "entropy": 1.7651668687661488, "epoch": 0.08623767542775534, "grad_norm": 0.9227822422981262, "learning_rate": 1.9992001429093156e-05, "loss": 1.2935, "mean_token_accuracy": 0.6639310071865717, "num_tokens": 130975509.0, "step": 785 }, { "entropy": 1.7787472208340962, "epoch": 0.08634753233912829, "grad_norm": 0.6106439828872681, "learning_rate": 1.9991933798308222e-05, "loss": 1.397, "mean_token_accuracy": 0.6517676264047623, "num_tokens": 131148150.0, "step": 786 }, { "entropy": 1.7501886288324993, "epoch": 0.08645738925050123, "grad_norm": 0.6313499808311462, "learning_rate": 1.9991865882932628e-05, "loss": 1.4274, "mean_token_accuracy": 0.6402166783809662, "num_tokens": 131368022.0, "step": 787 }, { "entropy": 1.8370747168858845, "epoch": 0.08656724616187415, "grad_norm": 0.7224745154380798, "learning_rate": 1.9991797682968533e-05, "loss": 1.337, "mean_token_accuracy": 0.6554816514253616, "num_tokens": 131494243.0, "step": 788 }, { "entropy": 1.7572102049986522, "epoch": 0.0866771030732471, "grad_norm": 0.6437149047851562, "learning_rate": 1.9991729198418094e-05, "loss": 1.3779, "mean_token_accuracy": 0.6482534607251486, "num_tokens": 131657195.0, "step": 789 }, { "entropy": 1.8201068341732025, "epoch": 0.08678695998462004, "grad_norm": 0.5996161699295044, "learning_rate": 1.9991660429283475e-05, "loss": 1.4695, "mean_token_accuracy": 0.6353013664484024, "num_tokens": 131837418.0, "step": 790 }, { "entropy": 1.737343708674113, "epoch": 0.08689681689599298, "grad_norm": 0.8244271278381348, "learning_rate": 1.999159137556686e-05, "loss": 1.3923, "mean_token_accuracy": 0.6581053187449774, "num_tokens": 131986253.0, "step": 791 }, { "entropy": 1.831222931543986, "epoch": 0.0870066738073659, "grad_norm": 0.6725685000419617, "learning_rate": 1.9991522037270426e-05, "loss": 1.5433, "mean_token_accuracy": 0.6341465910275778, "num_tokens": 132156185.0, "step": 792 }, { "entropy": 1.7623351514339447, "epoch": 0.08711653071873884, "grad_norm": 0.6132712364196777, "learning_rate": 1.9991452414396374e-05, "loss": 1.5282, "mean_token_accuracy": 0.6300236731767654, "num_tokens": 132375195.0, "step": 793 }, { "entropy": 1.8128896256287892, "epoch": 0.08722638763011178, "grad_norm": 0.8201103210449219, "learning_rate": 1.99913825069469e-05, "loss": 1.4092, "mean_token_accuracy": 0.6543792635202408, "num_tokens": 132572019.0, "step": 794 }, { "entropy": 1.8170464436213176, "epoch": 0.08733624454148471, "grad_norm": 0.7330581545829773, "learning_rate": 1.9991312314924223e-05, "loss": 1.5836, "mean_token_accuracy": 0.6373669604460398, "num_tokens": 132757713.0, "step": 795 }, { "entropy": 1.7433435519536336, "epoch": 0.08744610145285765, "grad_norm": 0.7479463219642639, "learning_rate": 1.9991241838330563e-05, "loss": 1.2838, "mean_token_accuracy": 0.6742591361204783, "num_tokens": 132907608.0, "step": 796 }, { "entropy": 1.7819043000539143, "epoch": 0.0875559583642306, "grad_norm": 0.8788211941719055, "learning_rate": 1.999117107716815e-05, "loss": 1.2552, "mean_token_accuracy": 0.6783933192491531, "num_tokens": 133021304.0, "step": 797 }, { "entropy": 1.803941269715627, "epoch": 0.08766581527560352, "grad_norm": 0.7351856827735901, "learning_rate": 1.9991100031439226e-05, "loss": 1.4558, "mean_token_accuracy": 0.6422074437141418, "num_tokens": 133170851.0, "step": 798 }, { "entropy": 1.7430227200190227, "epoch": 0.08777567218697646, "grad_norm": 0.7564266324043274, "learning_rate": 1.999102870114604e-05, "loss": 1.5169, "mean_token_accuracy": 0.6481150388717651, "num_tokens": 133331023.0, "step": 799 }, { "entropy": 1.8221300840377808, "epoch": 0.0878855290983494, "grad_norm": 0.6594426035881042, "learning_rate": 1.9990957086290842e-05, "loss": 1.5665, "mean_token_accuracy": 0.6504103392362595, "num_tokens": 133496097.0, "step": 800 }, { "entropy": 1.781892587741216, "epoch": 0.08799538600972234, "grad_norm": 0.7563036680221558, "learning_rate": 1.9990885186875903e-05, "loss": 1.5135, "mean_token_accuracy": 0.6499631603558859, "num_tokens": 133688537.0, "step": 801 }, { "entropy": 1.7549792230129242, "epoch": 0.08810524292109527, "grad_norm": 0.7245374321937561, "learning_rate": 1.9990813002903504e-05, "loss": 1.4476, "mean_token_accuracy": 0.6485745906829834, "num_tokens": 133853936.0, "step": 802 }, { "entropy": 1.762273798386256, "epoch": 0.08821509983246821, "grad_norm": 0.9437369108200073, "learning_rate": 1.999074053437592e-05, "loss": 1.3344, "mean_token_accuracy": 0.6619789600372314, "num_tokens": 134003645.0, "step": 803 }, { "entropy": 1.8330159882704418, "epoch": 0.08832495674384115, "grad_norm": 0.8058866858482361, "learning_rate": 1.9990667781295453e-05, "loss": 1.3736, "mean_token_accuracy": 0.6473863820234934, "num_tokens": 134140327.0, "step": 804 }, { "entropy": 1.7547661860783894, "epoch": 0.08843481365521408, "grad_norm": 0.7638370394706726, "learning_rate": 1.9990594743664402e-05, "loss": 1.4696, "mean_token_accuracy": 0.652462974190712, "num_tokens": 134275518.0, "step": 805 }, { "entropy": 1.7573677500089009, "epoch": 0.08854467056658702, "grad_norm": 0.7340158224105835, "learning_rate": 1.9990521421485077e-05, "loss": 1.2733, "mean_token_accuracy": 0.6660685688257217, "num_tokens": 134395935.0, "step": 806 }, { "entropy": 1.8871439099311829, "epoch": 0.08865452747795996, "grad_norm": 0.8130138516426086, "learning_rate": 1.99904478147598e-05, "loss": 1.3587, "mean_token_accuracy": 0.6492532193660736, "num_tokens": 134495765.0, "step": 807 }, { "entropy": 1.8198732137680054, "epoch": 0.08876438438933289, "grad_norm": 0.8263359665870667, "learning_rate": 1.99903739234909e-05, "loss": 1.4117, "mean_token_accuracy": 0.6728939761718115, "num_tokens": 134650045.0, "step": 808 }, { "entropy": 1.7290976345539093, "epoch": 0.08887424130070583, "grad_norm": 1.5325140953063965, "learning_rate": 1.999029974768072e-05, "loss": 1.2137, "mean_token_accuracy": 0.675426850716273, "num_tokens": 134879431.0, "step": 809 }, { "entropy": 1.7537180085976918, "epoch": 0.08898409821207877, "grad_norm": 0.7787648439407349, "learning_rate": 1.99902252873316e-05, "loss": 1.4029, "mean_token_accuracy": 0.6435067852338155, "num_tokens": 135037088.0, "step": 810 }, { "entropy": 1.793819099664688, "epoch": 0.08909395512345171, "grad_norm": 0.6838997006416321, "learning_rate": 1.9990150542445904e-05, "loss": 1.512, "mean_token_accuracy": 0.650256002942721, "num_tokens": 135257457.0, "step": 811 }, { "entropy": 1.8345171809196472, "epoch": 0.08920381203482464, "grad_norm": 0.6854283809661865, "learning_rate": 1.999007551302599e-05, "loss": 1.4529, "mean_token_accuracy": 0.6385099937518438, "num_tokens": 135443159.0, "step": 812 }, { "entropy": 1.7310991485913594, "epoch": 0.08931366894619758, "grad_norm": 0.7480428814888, "learning_rate": 1.9990000199074244e-05, "loss": 1.385, "mean_token_accuracy": 0.6632754951715469, "num_tokens": 135602809.0, "step": 813 }, { "entropy": 1.7727793554464977, "epoch": 0.08942352585757052, "grad_norm": 0.9646157026290894, "learning_rate": 1.9989924600593037e-05, "loss": 1.3619, "mean_token_accuracy": 0.6555025527874628, "num_tokens": 135716606.0, "step": 814 }, { "entropy": 1.7907099723815918, "epoch": 0.08953338276894345, "grad_norm": 0.7035730481147766, "learning_rate": 1.998984871758477e-05, "loss": 1.4499, "mean_token_accuracy": 0.6439538051684698, "num_tokens": 135878432.0, "step": 815 }, { "entropy": 1.8181606729825337, "epoch": 0.08964323968031639, "grad_norm": 0.6870342493057251, "learning_rate": 1.998977255005184e-05, "loss": 1.4373, "mean_token_accuracy": 0.6356525272130966, "num_tokens": 136081813.0, "step": 816 }, { "entropy": 1.7305361131827037, "epoch": 0.08975309659168933, "grad_norm": 0.5997280478477478, "learning_rate": 1.9989696097996662e-05, "loss": 1.4796, "mean_token_accuracy": 0.6471946984529495, "num_tokens": 136295261.0, "step": 817 }, { "entropy": 1.7904970049858093, "epoch": 0.08986295350306227, "grad_norm": 0.8890630006790161, "learning_rate": 1.998961936142165e-05, "loss": 1.403, "mean_token_accuracy": 0.6517884830633799, "num_tokens": 136466043.0, "step": 818 }, { "entropy": 1.770477294921875, "epoch": 0.0899728104144352, "grad_norm": 0.6830163598060608, "learning_rate": 1.998954234032924e-05, "loss": 1.5319, "mean_token_accuracy": 0.6378083527088165, "num_tokens": 136665178.0, "step": 819 }, { "entropy": 1.8271574676036835, "epoch": 0.09008266732580814, "grad_norm": 0.755791187286377, "learning_rate": 1.9989465034721866e-05, "loss": 1.533, "mean_token_accuracy": 0.6352124363183975, "num_tokens": 136800420.0, "step": 820 }, { "entropy": 1.7916331390539806, "epoch": 0.09019252423718108, "grad_norm": 0.7258456945419312, "learning_rate": 1.998938744460197e-05, "loss": 1.4128, "mean_token_accuracy": 0.654478500286738, "num_tokens": 136971123.0, "step": 821 }, { "entropy": 1.773153026898702, "epoch": 0.090302381148554, "grad_norm": 0.5961330533027649, "learning_rate": 1.9989309569972014e-05, "loss": 1.4252, "mean_token_accuracy": 0.6488613287607828, "num_tokens": 137200207.0, "step": 822 }, { "entropy": 1.7114134629567463, "epoch": 0.09041223805992694, "grad_norm": 0.6579198241233826, "learning_rate": 1.9989231410834462e-05, "loss": 1.4714, "mean_token_accuracy": 0.6568086395661036, "num_tokens": 137407339.0, "step": 823 }, { "entropy": 1.7578770518302917, "epoch": 0.09052209497129989, "grad_norm": 0.7298670411109924, "learning_rate": 1.9989152967191788e-05, "loss": 1.4054, "mean_token_accuracy": 0.6561851551135381, "num_tokens": 137533281.0, "step": 824 }, { "entropy": 1.7987795968850453, "epoch": 0.09063195188267281, "grad_norm": 0.7497153878211975, "learning_rate": 1.9989074239046467e-05, "loss": 1.5478, "mean_token_accuracy": 0.6374392211437225, "num_tokens": 137734737.0, "step": 825 }, { "entropy": 1.8171394268671672, "epoch": 0.09074180879404575, "grad_norm": 0.7937276363372803, "learning_rate": 1.9988995226401e-05, "loss": 1.4708, "mean_token_accuracy": 0.6504966169595718, "num_tokens": 137901984.0, "step": 826 }, { "entropy": 1.7756889462471008, "epoch": 0.0908516657054187, "grad_norm": 0.7066166996955872, "learning_rate": 1.9988915929257887e-05, "loss": 1.3252, "mean_token_accuracy": 0.6584804703791937, "num_tokens": 138067546.0, "step": 827 }, { "entropy": 1.6937337219715118, "epoch": 0.09096152261679163, "grad_norm": 0.8448572754859924, "learning_rate": 1.9988836347619634e-05, "loss": 1.2457, "mean_token_accuracy": 0.6707619080940882, "num_tokens": 138239095.0, "step": 828 }, { "entropy": 1.777471164862315, "epoch": 0.09107137952816456, "grad_norm": 0.7635485529899597, "learning_rate": 1.998875648148876e-05, "loss": 1.4174, "mean_token_accuracy": 0.6492311904827753, "num_tokens": 138382813.0, "step": 829 }, { "entropy": 1.7190299828847249, "epoch": 0.0911812364395375, "grad_norm": 0.6621118783950806, "learning_rate": 1.9988676330867798e-05, "loss": 1.4274, "mean_token_accuracy": 0.6445133884747823, "num_tokens": 138589846.0, "step": 830 }, { "entropy": 1.7541986008485158, "epoch": 0.09129109335091044, "grad_norm": 0.7409399151802063, "learning_rate": 1.9988595895759276e-05, "loss": 1.3882, "mean_token_accuracy": 0.6580299437046051, "num_tokens": 138717947.0, "step": 831 }, { "entropy": 1.8276172975699108, "epoch": 0.09140095026228337, "grad_norm": 0.815585196018219, "learning_rate": 1.9988515176165748e-05, "loss": 1.6175, "mean_token_accuracy": 0.6387762427330017, "num_tokens": 138900529.0, "step": 832 }, { "entropy": 1.7899891436100006, "epoch": 0.09151080717365631, "grad_norm": 0.6510075926780701, "learning_rate": 1.998843417208976e-05, "loss": 1.4375, "mean_token_accuracy": 0.6496662348508835, "num_tokens": 139045331.0, "step": 833 }, { "entropy": 1.7600055237611134, "epoch": 0.09162066408502925, "grad_norm": 0.7253756523132324, "learning_rate": 1.9988352883533883e-05, "loss": 1.5197, "mean_token_accuracy": 0.6373974829912186, "num_tokens": 139188419.0, "step": 834 }, { "entropy": 1.7518123586972554, "epoch": 0.09173052099640219, "grad_norm": 0.735378086566925, "learning_rate": 1.9988271310500686e-05, "loss": 1.3998, "mean_token_accuracy": 0.6474562138319016, "num_tokens": 139335708.0, "step": 835 }, { "entropy": 1.795258621374766, "epoch": 0.09184037790777512, "grad_norm": 0.659813404083252, "learning_rate": 1.9988189452992755e-05, "loss": 1.4814, "mean_token_accuracy": 0.6482225656509399, "num_tokens": 139570367.0, "step": 836 }, { "entropy": 1.8820240795612335, "epoch": 0.09195023481914806, "grad_norm": 0.6837143301963806, "learning_rate": 1.9988107311012675e-05, "loss": 1.4863, "mean_token_accuracy": 0.6410119732220968, "num_tokens": 139719920.0, "step": 837 }, { "entropy": 1.7456907431284587, "epoch": 0.092060091730521, "grad_norm": 0.6721776127815247, "learning_rate": 1.9988024884563054e-05, "loss": 1.339, "mean_token_accuracy": 0.6614281088113785, "num_tokens": 139892505.0, "step": 838 }, { "entropy": 1.7585683763027191, "epoch": 0.09216994864189393, "grad_norm": 0.713774561882019, "learning_rate": 1.9987942173646488e-05, "loss": 1.3296, "mean_token_accuracy": 0.6708425531784693, "num_tokens": 140041887.0, "step": 839 }, { "entropy": 1.753789484500885, "epoch": 0.09227980555326687, "grad_norm": 0.9725476503372192, "learning_rate": 1.998785917826561e-05, "loss": 1.5496, "mean_token_accuracy": 0.6513581027587255, "num_tokens": 140250586.0, "step": 840 }, { "entropy": 1.8161666095256805, "epoch": 0.09238966246463981, "grad_norm": 0.657247006893158, "learning_rate": 1.9987775898423036e-05, "loss": 1.3999, "mean_token_accuracy": 0.6466280668973923, "num_tokens": 140445865.0, "step": 841 }, { "entropy": 1.843330095211665, "epoch": 0.09249951937601274, "grad_norm": 0.7465802431106567, "learning_rate": 1.9987692334121402e-05, "loss": 1.4241, "mean_token_accuracy": 0.6497205942869186, "num_tokens": 140638169.0, "step": 842 }, { "entropy": 1.737547109524409, "epoch": 0.09260937628738568, "grad_norm": 0.6497205495834351, "learning_rate": 1.998760848536336e-05, "loss": 1.4405, "mean_token_accuracy": 0.6456060359875361, "num_tokens": 140833553.0, "step": 843 }, { "entropy": 1.6892894407113392, "epoch": 0.09271923319875862, "grad_norm": 0.6569632887840271, "learning_rate": 1.9987524352151556e-05, "loss": 1.438, "mean_token_accuracy": 0.6561574091513952, "num_tokens": 141038298.0, "step": 844 }, { "entropy": 1.7608444193998973, "epoch": 0.09282909011013156, "grad_norm": 0.7499677538871765, "learning_rate": 1.9987439934488656e-05, "loss": 1.3569, "mean_token_accuracy": 0.6573406010866165, "num_tokens": 141167798.0, "step": 845 }, { "entropy": 1.7646370430787404, "epoch": 0.09293894702150449, "grad_norm": 0.8782757520675659, "learning_rate": 1.9987355232377334e-05, "loss": 1.4156, "mean_token_accuracy": 0.655216450492541, "num_tokens": 141305587.0, "step": 846 }, { "entropy": 1.6969274083773296, "epoch": 0.09304880393287743, "grad_norm": 0.7114330530166626, "learning_rate": 1.9987270245820266e-05, "loss": 1.4441, "mean_token_accuracy": 0.6574098318815231, "num_tokens": 141494915.0, "step": 847 }, { "entropy": 1.7382513582706451, "epoch": 0.09315866084425037, "grad_norm": 0.633026659488678, "learning_rate": 1.998718497482015e-05, "loss": 1.4415, "mean_token_accuracy": 0.6520839134852091, "num_tokens": 141665803.0, "step": 848 }, { "entropy": 1.7377788325150807, "epoch": 0.0932685177556233, "grad_norm": 0.8016728162765503, "learning_rate": 1.9987099419379674e-05, "loss": 1.2266, "mean_token_accuracy": 0.6830306202173233, "num_tokens": 141771140.0, "step": 849 }, { "entropy": 1.7722203036149342, "epoch": 0.09337837466699624, "grad_norm": 0.816390335559845, "learning_rate": 1.998701357950155e-05, "loss": 1.4453, "mean_token_accuracy": 0.6510142137606939, "num_tokens": 141921548.0, "step": 850 }, { "entropy": 1.797320306301117, "epoch": 0.09348823157836918, "grad_norm": 0.7288161516189575, "learning_rate": 1.9986927455188503e-05, "loss": 1.4359, "mean_token_accuracy": 0.647617906332016, "num_tokens": 142051031.0, "step": 851 }, { "entropy": 1.7711346745491028, "epoch": 0.09359808848974212, "grad_norm": 0.6476997137069702, "learning_rate": 1.9986841046443245e-05, "loss": 1.4482, "mean_token_accuracy": 0.6498673806587855, "num_tokens": 142227046.0, "step": 852 }, { "entropy": 1.7939196527004242, "epoch": 0.09370794540111504, "grad_norm": 0.6596160531044006, "learning_rate": 1.9986754353268522e-05, "loss": 1.4829, "mean_token_accuracy": 0.6461114784081777, "num_tokens": 142400594.0, "step": 853 }, { "entropy": 1.723371555407842, "epoch": 0.09381780231248799, "grad_norm": 0.8292514681816101, "learning_rate": 1.9986667375667067e-05, "loss": 1.3327, "mean_token_accuracy": 0.6621809701124827, "num_tokens": 142541930.0, "step": 854 }, { "entropy": 1.7200664281845093, "epoch": 0.09392765922386093, "grad_norm": 0.6569286584854126, "learning_rate": 1.9986580113641645e-05, "loss": 1.4073, "mean_token_accuracy": 0.6560418456792831, "num_tokens": 142707884.0, "step": 855 }, { "entropy": 1.763913631439209, "epoch": 0.09403751613523385, "grad_norm": 0.7746670842170715, "learning_rate": 1.998649256719501e-05, "loss": 1.257, "mean_token_accuracy": 0.680213044087092, "num_tokens": 142845415.0, "step": 856 }, { "entropy": 1.7523279786109924, "epoch": 0.0941473730466068, "grad_norm": 0.8351315259933472, "learning_rate": 1.9986404736329935e-05, "loss": 1.3057, "mean_token_accuracy": 0.6638447294632593, "num_tokens": 142991596.0, "step": 857 }, { "entropy": 1.7808737655480702, "epoch": 0.09425722995797974, "grad_norm": 0.7735828161239624, "learning_rate": 1.9986316621049198e-05, "loss": 1.3841, "mean_token_accuracy": 0.6511393884817759, "num_tokens": 143141446.0, "step": 858 }, { "entropy": 1.8006083170572917, "epoch": 0.09436708686935266, "grad_norm": 0.6989002227783203, "learning_rate": 1.9986228221355593e-05, "loss": 1.4593, "mean_token_accuracy": 0.6463498423496882, "num_tokens": 143304040.0, "step": 859 }, { "entropy": 1.7336869637171428, "epoch": 0.0944769437807256, "grad_norm": 0.6207464337348938, "learning_rate": 1.998613953725191e-05, "loss": 1.4264, "mean_token_accuracy": 0.6408118307590485, "num_tokens": 143521739.0, "step": 860 }, { "entropy": 1.7804212868213654, "epoch": 0.09458680069209854, "grad_norm": 0.7322898507118225, "learning_rate": 1.998605056874096e-05, "loss": 1.2565, "mean_token_accuracy": 0.6800819089015325, "num_tokens": 143669420.0, "step": 861 }, { "entropy": 1.8095572888851166, "epoch": 0.09469665760347148, "grad_norm": 0.6634199619293213, "learning_rate": 1.998596131582556e-05, "loss": 1.3476, "mean_token_accuracy": 0.6624392867088318, "num_tokens": 143890464.0, "step": 862 }, { "entropy": 1.7374099691708882, "epoch": 0.09480651451484441, "grad_norm": 0.5917066931724548, "learning_rate": 1.9985871778508536e-05, "loss": 1.3316, "mean_token_accuracy": 0.6642079999049505, "num_tokens": 144071612.0, "step": 863 }, { "entropy": 1.7235216995080311, "epoch": 0.09491637142621735, "grad_norm": 0.6594040989875793, "learning_rate": 1.9985781956792712e-05, "loss": 1.5624, "mean_token_accuracy": 0.6464419017235438, "num_tokens": 144275211.0, "step": 864 }, { "entropy": 1.7869758109251659, "epoch": 0.0950262283375903, "grad_norm": 0.6814702153205872, "learning_rate": 1.9985691850680945e-05, "loss": 1.3704, "mean_token_accuracy": 0.6551233877738317, "num_tokens": 144461229.0, "step": 865 }, { "entropy": 1.7437097529570262, "epoch": 0.09513608524896322, "grad_norm": 0.6789867281913757, "learning_rate": 1.998560146017608e-05, "loss": 1.4037, "mean_token_accuracy": 0.6533855001131693, "num_tokens": 144626703.0, "step": 866 }, { "entropy": 1.797066976626714, "epoch": 0.09524594216033616, "grad_norm": 0.7511526346206665, "learning_rate": 1.9985510785280973e-05, "loss": 1.4707, "mean_token_accuracy": 0.6475979636112849, "num_tokens": 144816688.0, "step": 867 }, { "entropy": 1.8556463519732158, "epoch": 0.0953557990717091, "grad_norm": 0.729511559009552, "learning_rate": 1.99854198259985e-05, "loss": 1.4624, "mean_token_accuracy": 0.6474516242742538, "num_tokens": 144974185.0, "step": 868 }, { "entropy": 1.7502427001794179, "epoch": 0.09546565598308203, "grad_norm": 0.7686792016029358, "learning_rate": 1.9985328582331543e-05, "loss": 1.3928, "mean_token_accuracy": 0.6506832788387934, "num_tokens": 145131985.0, "step": 869 }, { "entropy": 1.8473133345444996, "epoch": 0.09557551289445497, "grad_norm": 0.7290443181991577, "learning_rate": 1.998523705428298e-05, "loss": 1.4376, "mean_token_accuracy": 0.6440041263898214, "num_tokens": 145300425.0, "step": 870 }, { "entropy": 1.7909137805302937, "epoch": 0.09568536980582791, "grad_norm": 1.0265684127807617, "learning_rate": 1.9985145241855715e-05, "loss": 1.3176, "mean_token_accuracy": 0.6611884931723276, "num_tokens": 145434375.0, "step": 871 }, { "entropy": 1.8016241292158763, "epoch": 0.09579522671720085, "grad_norm": 0.8837740421295166, "learning_rate": 1.998505314505265e-05, "loss": 1.4417, "mean_token_accuracy": 0.6421179672082266, "num_tokens": 145595112.0, "step": 872 }, { "entropy": 1.7559907833735149, "epoch": 0.09590508362857378, "grad_norm": 0.7442582249641418, "learning_rate": 1.9984960763876707e-05, "loss": 1.4568, "mean_token_accuracy": 0.6378819495439529, "num_tokens": 145809619.0, "step": 873 }, { "entropy": 1.7758004764715831, "epoch": 0.09601494053994672, "grad_norm": 0.7401770353317261, "learning_rate": 1.99848680983308e-05, "loss": 1.3542, "mean_token_accuracy": 0.6558797508478165, "num_tokens": 145962185.0, "step": 874 }, { "entropy": 1.701437811056773, "epoch": 0.09612479745131966, "grad_norm": 0.6981661319732666, "learning_rate": 1.998477514841787e-05, "loss": 1.272, "mean_token_accuracy": 0.6712963829437891, "num_tokens": 146122219.0, "step": 875 }, { "entropy": 1.8346630732218425, "epoch": 0.09623465436269259, "grad_norm": 1.0015085935592651, "learning_rate": 1.998468191414085e-05, "loss": 1.4049, "mean_token_accuracy": 0.6612652838230133, "num_tokens": 146286262.0, "step": 876 }, { "entropy": 1.8047532737255096, "epoch": 0.09634451127406553, "grad_norm": 0.6116564869880676, "learning_rate": 1.99845883955027e-05, "loss": 1.4939, "mean_token_accuracy": 0.6393746683994929, "num_tokens": 146482748.0, "step": 877 }, { "entropy": 1.8028401136398315, "epoch": 0.09645436818543847, "grad_norm": 0.8170140981674194, "learning_rate": 1.9984494592506375e-05, "loss": 1.4695, "mean_token_accuracy": 0.6653387248516083, "num_tokens": 146623953.0, "step": 878 }, { "entropy": 1.8251157999038696, "epoch": 0.09656422509681141, "grad_norm": 0.7140014171600342, "learning_rate": 1.9984400505154845e-05, "loss": 1.3936, "mean_token_accuracy": 0.6512231677770615, "num_tokens": 146771851.0, "step": 879 }, { "entropy": 1.7021491626898448, "epoch": 0.09667408200818434, "grad_norm": 0.7085208296775818, "learning_rate": 1.9984306133451085e-05, "loss": 1.5546, "mean_token_accuracy": 0.6158442795276642, "num_tokens": 147018276.0, "step": 880 }, { "entropy": 1.7345736026763916, "epoch": 0.09678393891955728, "grad_norm": 0.6248441934585571, "learning_rate": 1.9984211477398087e-05, "loss": 1.3856, "mean_token_accuracy": 0.6412098606427511, "num_tokens": 147164089.0, "step": 881 }, { "entropy": 1.7824423809846242, "epoch": 0.09689379583093022, "grad_norm": 0.7416592240333557, "learning_rate": 1.9984116536998842e-05, "loss": 1.3108, "mean_token_accuracy": 0.6656797031561533, "num_tokens": 147277397.0, "step": 882 }, { "entropy": 1.7290455897649128, "epoch": 0.09700365274230315, "grad_norm": 0.5920156836509705, "learning_rate": 1.998402131225636e-05, "loss": 1.464, "mean_token_accuracy": 0.646095464626948, "num_tokens": 147526287.0, "step": 883 }, { "entropy": 1.8121102452278137, "epoch": 0.09711350965367609, "grad_norm": 0.6940451264381409, "learning_rate": 1.998392580317365e-05, "loss": 1.5421, "mean_token_accuracy": 0.6315521448850632, "num_tokens": 147680370.0, "step": 884 }, { "entropy": 1.7699245909849803, "epoch": 0.09722336656504903, "grad_norm": 0.6525464653968811, "learning_rate": 1.9983830009753736e-05, "loss": 1.3875, "mean_token_accuracy": 0.6589068621397018, "num_tokens": 147840870.0, "step": 885 }, { "entropy": 1.7928034762541454, "epoch": 0.09733322347642195, "grad_norm": 0.6272910833358765, "learning_rate": 1.9983733931999652e-05, "loss": 1.4885, "mean_token_accuracy": 0.6306471476952235, "num_tokens": 148027455.0, "step": 886 }, { "entropy": 1.7910848359266918, "epoch": 0.0974430803877949, "grad_norm": 0.7335218191146851, "learning_rate": 1.9983637569914434e-05, "loss": 1.609, "mean_token_accuracy": 0.619565524160862, "num_tokens": 148195323.0, "step": 887 }, { "entropy": 1.8034427464008331, "epoch": 0.09755293729916784, "grad_norm": 1.0481171607971191, "learning_rate": 1.9983540923501136e-05, "loss": 1.3526, "mean_token_accuracy": 0.6704971541961035, "num_tokens": 148342254.0, "step": 888 }, { "entropy": 1.799752136071523, "epoch": 0.09766279421054078, "grad_norm": 0.8031909465789795, "learning_rate": 1.9983443992762818e-05, "loss": 1.4918, "mean_token_accuracy": 0.660582959651947, "num_tokens": 148456069.0, "step": 889 }, { "entropy": 1.7524477740128834, "epoch": 0.0977726511219137, "grad_norm": 0.66947340965271, "learning_rate": 1.9983346777702546e-05, "loss": 1.422, "mean_token_accuracy": 0.6511275370915731, "num_tokens": 148639070.0, "step": 890 }, { "entropy": 1.760518193244934, "epoch": 0.09788250803328664, "grad_norm": 0.71653151512146, "learning_rate": 1.9983249278323394e-05, "loss": 1.4249, "mean_token_accuracy": 0.64531609416008, "num_tokens": 148808597.0, "step": 891 }, { "entropy": 1.7460968097050984, "epoch": 0.09799236494465959, "grad_norm": 0.7005469799041748, "learning_rate": 1.9983151494628452e-05, "loss": 1.4135, "mean_token_accuracy": 0.6504943122466406, "num_tokens": 148986736.0, "step": 892 }, { "entropy": 1.6886422137419383, "epoch": 0.09810222185603251, "grad_norm": 0.7256191968917847, "learning_rate": 1.9983053426620812e-05, "loss": 1.282, "mean_token_accuracy": 0.6670690476894379, "num_tokens": 149123138.0, "step": 893 }, { "entropy": 1.764316588640213, "epoch": 0.09821207876740545, "grad_norm": 0.7242082357406616, "learning_rate": 1.998295507430358e-05, "loss": 1.4177, "mean_token_accuracy": 0.6532685707012812, "num_tokens": 149304620.0, "step": 894 }, { "entropy": 1.709983338912328, "epoch": 0.0983219356787784, "grad_norm": 0.760214626789093, "learning_rate": 1.998285643767987e-05, "loss": 1.3549, "mean_token_accuracy": 0.6476463874181112, "num_tokens": 149458167.0, "step": 895 }, { "entropy": 1.7316644787788391, "epoch": 0.09843179259015133, "grad_norm": 0.7626999020576477, "learning_rate": 1.99827575167528e-05, "loss": 1.3188, "mean_token_accuracy": 0.6596734821796417, "num_tokens": 149637723.0, "step": 896 }, { "entropy": 1.737456738948822, "epoch": 0.09854164950152426, "grad_norm": 0.7550251483917236, "learning_rate": 1.9982658311525497e-05, "loss": 1.488, "mean_token_accuracy": 0.6350032538175583, "num_tokens": 149820282.0, "step": 897 }, { "entropy": 1.7865496476491292, "epoch": 0.0986515064128972, "grad_norm": 0.7146956324577332, "learning_rate": 1.9982558822001107e-05, "loss": 1.3048, "mean_token_accuracy": 0.6644372095664343, "num_tokens": 149984370.0, "step": 898 }, { "entropy": 1.7460536360740662, "epoch": 0.09876136332427014, "grad_norm": 0.7782373428344727, "learning_rate": 1.9982459048182787e-05, "loss": 1.2861, "mean_token_accuracy": 0.6806688259045283, "num_tokens": 150100533.0, "step": 899 }, { "entropy": 1.7238269646962483, "epoch": 0.09887122023564307, "grad_norm": 0.9449548125267029, "learning_rate": 1.9982358990073677e-05, "loss": 1.3353, "mean_token_accuracy": 0.6628151287635168, "num_tokens": 150251335.0, "step": 900 }, { "entropy": 1.808404137690862, "epoch": 0.09898107714701601, "grad_norm": 0.7466446161270142, "learning_rate": 1.9982258647676955e-05, "loss": 1.5167, "mean_token_accuracy": 0.6374181012312571, "num_tokens": 150410855.0, "step": 901 }, { "entropy": 1.7830796142419179, "epoch": 0.09909093405838895, "grad_norm": 0.6440333127975464, "learning_rate": 1.9982158020995797e-05, "loss": 1.611, "mean_token_accuracy": 0.6330165167649587, "num_tokens": 150600761.0, "step": 902 }, { "entropy": 1.792793979247411, "epoch": 0.09920079096976188, "grad_norm": 0.6835723519325256, "learning_rate": 1.998205711003338e-05, "loss": 1.6041, "mean_token_accuracy": 0.6333948771158854, "num_tokens": 150845548.0, "step": 903 }, { "entropy": 1.7442950308322906, "epoch": 0.09931064788113482, "grad_norm": 0.7617843151092529, "learning_rate": 1.9981955914792906e-05, "loss": 1.3282, "mean_token_accuracy": 0.6637561370929083, "num_tokens": 150972304.0, "step": 904 }, { "entropy": 1.7536637882391612, "epoch": 0.09942050479250776, "grad_norm": 0.7179109454154968, "learning_rate": 1.9981854435277577e-05, "loss": 1.5067, "mean_token_accuracy": 0.6481045782566071, "num_tokens": 151143396.0, "step": 905 }, { "entropy": 1.7627435723940532, "epoch": 0.0995303617038807, "grad_norm": 0.7069958448410034, "learning_rate": 1.9981752671490598e-05, "loss": 1.3833, "mean_token_accuracy": 0.6542354027430216, "num_tokens": 151325547.0, "step": 906 }, { "entropy": 1.7693799436092377, "epoch": 0.09964021861525363, "grad_norm": 0.6431506872177124, "learning_rate": 1.9981650623435194e-05, "loss": 1.3678, "mean_token_accuracy": 0.6576612442731857, "num_tokens": 151536185.0, "step": 907 }, { "entropy": 1.7635951141516368, "epoch": 0.09975007552662657, "grad_norm": 0.7126159071922302, "learning_rate": 1.9981548291114595e-05, "loss": 1.3731, "mean_token_accuracy": 0.6596753050883611, "num_tokens": 151683858.0, "step": 908 }, { "entropy": 1.7573548754056294, "epoch": 0.09985993243799951, "grad_norm": 0.8124845027923584, "learning_rate": 1.9981445674532046e-05, "loss": 1.2352, "mean_token_accuracy": 0.6722429444392523, "num_tokens": 151803918.0, "step": 909 }, { "entropy": 1.7850454151630402, "epoch": 0.09996978934937244, "grad_norm": 0.7685918211936951, "learning_rate": 1.9981342773690783e-05, "loss": 1.512, "mean_token_accuracy": 0.6380599588155746, "num_tokens": 151971626.0, "step": 910 }, { "entropy": 1.805126855770747, "epoch": 0.10007964626074538, "grad_norm": 0.6546812057495117, "learning_rate": 1.9981239588594072e-05, "loss": 1.5592, "mean_token_accuracy": 0.6306341290473938, "num_tokens": 152201475.0, "step": 911 }, { "entropy": 1.7827772498130798, "epoch": 0.10018950317211832, "grad_norm": 0.6877257823944092, "learning_rate": 1.998113611924517e-05, "loss": 1.4371, "mean_token_accuracy": 0.6537010818719864, "num_tokens": 152351189.0, "step": 912 }, { "entropy": 1.7649867534637451, "epoch": 0.10029936008349125, "grad_norm": 0.7598903179168701, "learning_rate": 1.998103236564736e-05, "loss": 1.372, "mean_token_accuracy": 0.6497568885485331, "num_tokens": 152524073.0, "step": 913 }, { "entropy": 1.7680945495764415, "epoch": 0.10040921699486419, "grad_norm": 0.7872158885002136, "learning_rate": 1.9980928327803923e-05, "loss": 1.5074, "mean_token_accuracy": 0.6456826428572336, "num_tokens": 152670080.0, "step": 914 }, { "entropy": 1.7560344437758129, "epoch": 0.10051907390623713, "grad_norm": 0.5883249640464783, "learning_rate": 1.998082400571815e-05, "loss": 1.3791, "mean_token_accuracy": 0.64297587176164, "num_tokens": 152845888.0, "step": 915 }, { "entropy": 1.7675274113814037, "epoch": 0.10062893081761007, "grad_norm": 0.7053165435791016, "learning_rate": 1.9980719399393343e-05, "loss": 1.4325, "mean_token_accuracy": 0.6427458872397741, "num_tokens": 152978290.0, "step": 916 }, { "entropy": 1.7399719854195912, "epoch": 0.100738787728983, "grad_norm": 0.7673996686935425, "learning_rate": 1.9980614508832815e-05, "loss": 1.425, "mean_token_accuracy": 0.6594684372345606, "num_tokens": 153149847.0, "step": 917 }, { "entropy": 1.762896368900935, "epoch": 0.10084864464035594, "grad_norm": 0.8135155439376831, "learning_rate": 1.9980509334039885e-05, "loss": 1.5537, "mean_token_accuracy": 0.6251424799362818, "num_tokens": 153411080.0, "step": 918 }, { "entropy": 1.817118614912033, "epoch": 0.10095850155172888, "grad_norm": 1.1829191446304321, "learning_rate": 1.998040387501788e-05, "loss": 1.4132, "mean_token_accuracy": 0.6469329843918482, "num_tokens": 153647902.0, "step": 919 }, { "entropy": 1.723706712325414, "epoch": 0.1010683584631018, "grad_norm": 0.6135080456733704, "learning_rate": 1.998029813177014e-05, "loss": 1.4235, "mean_token_accuracy": 0.6411093920469284, "num_tokens": 153887618.0, "step": 920 }, { "entropy": 1.7523813048998516, "epoch": 0.10117821537447474, "grad_norm": 0.6740756034851074, "learning_rate": 1.998019210430001e-05, "loss": 1.4545, "mean_token_accuracy": 0.6468822509050369, "num_tokens": 154040907.0, "step": 921 }, { "entropy": 1.7299329936504364, "epoch": 0.10128807228584769, "grad_norm": 0.670302152633667, "learning_rate": 1.998008579261085e-05, "loss": 1.4126, "mean_token_accuracy": 0.6671505371729533, "num_tokens": 154192950.0, "step": 922 }, { "entropy": 1.763699213663737, "epoch": 0.10139792919722063, "grad_norm": 0.803800106048584, "learning_rate": 1.9979979196706015e-05, "loss": 1.478, "mean_token_accuracy": 0.6557512432336807, "num_tokens": 154343446.0, "step": 923 }, { "entropy": 1.7053393324216206, "epoch": 0.10150778610859355, "grad_norm": 0.9171890020370483, "learning_rate": 1.997987231658889e-05, "loss": 1.4155, "mean_token_accuracy": 0.6599433819452921, "num_tokens": 154542170.0, "step": 924 }, { "entropy": 1.8018841644128163, "epoch": 0.1016176430199665, "grad_norm": 0.7261690497398376, "learning_rate": 1.997976515226285e-05, "loss": 1.3835, "mean_token_accuracy": 0.6623082359631857, "num_tokens": 154683041.0, "step": 925 }, { "entropy": 1.8124282856782277, "epoch": 0.10172749993133944, "grad_norm": 0.8906520009040833, "learning_rate": 1.9979657703731293e-05, "loss": 1.424, "mean_token_accuracy": 0.6509944200515747, "num_tokens": 154823935.0, "step": 926 }, { "entropy": 1.680219441652298, "epoch": 0.10183735684271236, "grad_norm": 0.7079525589942932, "learning_rate": 1.9979549970997613e-05, "loss": 1.35, "mean_token_accuracy": 0.6591299722592036, "num_tokens": 154979227.0, "step": 927 }, { "entropy": 1.7787267863750458, "epoch": 0.1019472137540853, "grad_norm": 0.8107025623321533, "learning_rate": 1.9979441954065222e-05, "loss": 1.3122, "mean_token_accuracy": 0.6689551870028178, "num_tokens": 155096447.0, "step": 928 }, { "entropy": 1.70075523853302, "epoch": 0.10205707066545824, "grad_norm": 0.642701268196106, "learning_rate": 1.997933365293754e-05, "loss": 1.4148, "mean_token_accuracy": 0.6453291227420171, "num_tokens": 155272639.0, "step": 929 }, { "entropy": 1.778489778439204, "epoch": 0.10216692757683117, "grad_norm": 0.7005394101142883, "learning_rate": 1.9979225067617995e-05, "loss": 1.3191, "mean_token_accuracy": 0.662499725818634, "num_tokens": 155389573.0, "step": 930 }, { "entropy": 1.7405783832073212, "epoch": 0.10227678448820411, "grad_norm": 0.7797371745109558, "learning_rate": 1.9979116198110022e-05, "loss": 1.4646, "mean_token_accuracy": 0.6487743357817332, "num_tokens": 155582478.0, "step": 931 }, { "entropy": 1.8176141182581584, "epoch": 0.10238664139957705, "grad_norm": 0.6615299582481384, "learning_rate": 1.9979007044417068e-05, "loss": 1.5006, "mean_token_accuracy": 0.6347943594058355, "num_tokens": 155734907.0, "step": 932 }, { "entropy": 1.801711489756902, "epoch": 0.10249649831095, "grad_norm": 0.9806280136108398, "learning_rate": 1.9978897606542585e-05, "loss": 1.532, "mean_token_accuracy": 0.636087437470754, "num_tokens": 155891281.0, "step": 933 }, { "entropy": 1.719766726096471, "epoch": 0.10260635522232292, "grad_norm": 0.6877168416976929, "learning_rate": 1.9978787884490042e-05, "loss": 1.4065, "mean_token_accuracy": 0.6426805555820465, "num_tokens": 156096557.0, "step": 934 }, { "entropy": 1.808685193459193, "epoch": 0.10271621213369586, "grad_norm": 0.8238107562065125, "learning_rate": 1.997867787826291e-05, "loss": 1.2805, "mean_token_accuracy": 0.6664846589167913, "num_tokens": 156197634.0, "step": 935 }, { "entropy": 1.6944943865140278, "epoch": 0.1028260690450688, "grad_norm": 0.5987890958786011, "learning_rate": 1.9978567587864662e-05, "loss": 1.2886, "mean_token_accuracy": 0.6632740994294485, "num_tokens": 156353948.0, "step": 936 }, { "entropy": 1.7655479113260906, "epoch": 0.10293592595644173, "grad_norm": 0.6797613501548767, "learning_rate": 1.99784570132988e-05, "loss": 1.4642, "mean_token_accuracy": 0.6423184126615524, "num_tokens": 156535094.0, "step": 937 }, { "entropy": 1.7759600281715393, "epoch": 0.10304578286781467, "grad_norm": 0.7226538062095642, "learning_rate": 1.9978346154568816e-05, "loss": 1.4255, "mean_token_accuracy": 0.6578917105992635, "num_tokens": 156662107.0, "step": 938 }, { "entropy": 1.7849325835704803, "epoch": 0.10315563977918761, "grad_norm": 0.7838888764381409, "learning_rate": 1.9978235011678227e-05, "loss": 1.457, "mean_token_accuracy": 0.6387642721335093, "num_tokens": 156816532.0, "step": 939 }, { "entropy": 1.8332215547561646, "epoch": 0.10326549669056055, "grad_norm": 1.3963543176651, "learning_rate": 1.9978123584630543e-05, "loss": 1.5022, "mean_token_accuracy": 0.6504113674163818, "num_tokens": 157001513.0, "step": 940 }, { "entropy": 1.7760482331116993, "epoch": 0.10337535360193348, "grad_norm": 0.7974473237991333, "learning_rate": 1.9978011873429293e-05, "loss": 1.5326, "mean_token_accuracy": 0.6438284814357758, "num_tokens": 157140942.0, "step": 941 }, { "entropy": 1.739799976348877, "epoch": 0.10348521051330642, "grad_norm": 0.6732959151268005, "learning_rate": 1.9977899878078014e-05, "loss": 1.3565, "mean_token_accuracy": 0.6677434494098028, "num_tokens": 157322095.0, "step": 942 }, { "entropy": 1.7245545585950215, "epoch": 0.10359506742467936, "grad_norm": 0.6832246780395508, "learning_rate": 1.997778759858025e-05, "loss": 1.4486, "mean_token_accuracy": 0.6488750129938126, "num_tokens": 157489321.0, "step": 943 }, { "entropy": 1.7096454600493114, "epoch": 0.10370492433605229, "grad_norm": 0.669666588306427, "learning_rate": 1.9977675034939552e-05, "loss": 1.398, "mean_token_accuracy": 0.6563703020413717, "num_tokens": 157648478.0, "step": 944 }, { "entropy": 1.7183943092823029, "epoch": 0.10381478124742523, "grad_norm": 0.6543197631835938, "learning_rate": 1.9977562187159485e-05, "loss": 1.5301, "mean_token_accuracy": 0.6386249661445618, "num_tokens": 157843823.0, "step": 945 }, { "entropy": 1.8044803241888683, "epoch": 0.10392463815879817, "grad_norm": 0.8148881793022156, "learning_rate": 1.997744905524362e-05, "loss": 1.3916, "mean_token_accuracy": 0.6454024910926819, "num_tokens": 157990593.0, "step": 946 }, { "entropy": 1.6993337571620941, "epoch": 0.1040344950701711, "grad_norm": 0.6236430406570435, "learning_rate": 1.997733563919554e-05, "loss": 1.3219, "mean_token_accuracy": 0.6601608147223791, "num_tokens": 158132629.0, "step": 947 }, { "entropy": 1.8001770675182343, "epoch": 0.10414435198154404, "grad_norm": 0.884178638458252, "learning_rate": 1.9977221939018828e-05, "loss": 1.2416, "mean_token_accuracy": 0.6784195502599081, "num_tokens": 158246943.0, "step": 948 }, { "entropy": 1.6941777964433034, "epoch": 0.10425420889291698, "grad_norm": 0.6711726784706116, "learning_rate": 1.997710795471709e-05, "loss": 1.3108, "mean_token_accuracy": 0.6635520309209824, "num_tokens": 158415493.0, "step": 949 }, { "entropy": 1.7117989857991536, "epoch": 0.10436406580428992, "grad_norm": 0.6808717846870422, "learning_rate": 1.997699368629393e-05, "loss": 1.2102, "mean_token_accuracy": 0.6949316064516703, "num_tokens": 158540142.0, "step": 950 }, { "entropy": 1.6830700536568959, "epoch": 0.10447392271566285, "grad_norm": 0.5724490880966187, "learning_rate": 1.9976879133752968e-05, "loss": 1.3332, "mean_token_accuracy": 0.6712360580762228, "num_tokens": 158724951.0, "step": 951 }, { "entropy": 1.7850287755330403, "epoch": 0.10458377962703579, "grad_norm": 0.6788071393966675, "learning_rate": 1.9976764297097822e-05, "loss": 1.5117, "mean_token_accuracy": 0.6374179919560751, "num_tokens": 158927317.0, "step": 952 }, { "entropy": 1.7926728427410126, "epoch": 0.10469363653840873, "grad_norm": 0.6966988444328308, "learning_rate": 1.997664917633213e-05, "loss": 1.4233, "mean_token_accuracy": 0.6491448630889257, "num_tokens": 159079596.0, "step": 953 }, { "entropy": 1.8218385577201843, "epoch": 0.10480349344978165, "grad_norm": 0.6010218262672424, "learning_rate": 1.997653377145954e-05, "loss": 1.5112, "mean_token_accuracy": 0.6297382464011511, "num_tokens": 159274755.0, "step": 954 }, { "entropy": 1.7728229264418285, "epoch": 0.1049133503611546, "grad_norm": 0.7513667345046997, "learning_rate": 1.9976418082483702e-05, "loss": 1.4339, "mean_token_accuracy": 0.6446366707483927, "num_tokens": 159482155.0, "step": 955 }, { "entropy": 1.7381539046764374, "epoch": 0.10502320727252754, "grad_norm": 0.7744698524475098, "learning_rate": 1.9976302109408274e-05, "loss": 1.2582, "mean_token_accuracy": 0.6699808637301127, "num_tokens": 159597958.0, "step": 956 }, { "entropy": 1.7696404854456584, "epoch": 0.10513306418390048, "grad_norm": 0.7949146032333374, "learning_rate": 1.997618585223693e-05, "loss": 1.5213, "mean_token_accuracy": 0.644510825475057, "num_tokens": 159798577.0, "step": 957 }, { "entropy": 1.7655962506930034, "epoch": 0.1052429210952734, "grad_norm": 0.8974981904029846, "learning_rate": 1.9976069310973346e-05, "loss": 1.5157, "mean_token_accuracy": 0.6608059157927831, "num_tokens": 159983233.0, "step": 958 }, { "entropy": 1.8399652739365895, "epoch": 0.10535277800664634, "grad_norm": 0.7629010081291199, "learning_rate": 1.9975952485621216e-05, "loss": 1.5352, "mean_token_accuracy": 0.6394019822279612, "num_tokens": 160163249.0, "step": 959 }, { "entropy": 1.791842778523763, "epoch": 0.10546263491801929, "grad_norm": 0.6020023822784424, "learning_rate": 1.9975835376184234e-05, "loss": 1.5612, "mean_token_accuracy": 0.636442189415296, "num_tokens": 160345525.0, "step": 960 }, { "entropy": 1.7129618525505066, "epoch": 0.10557249182939221, "grad_norm": 0.6331304311752319, "learning_rate": 1.9975717982666106e-05, "loss": 1.3834, "mean_token_accuracy": 0.6419539799292883, "num_tokens": 160529851.0, "step": 961 }, { "entropy": 1.719476044178009, "epoch": 0.10568234874076515, "grad_norm": 0.6035734415054321, "learning_rate": 1.997560030507055e-05, "loss": 1.3994, "mean_token_accuracy": 0.6515898952881495, "num_tokens": 160716656.0, "step": 962 }, { "entropy": 1.7441074351469676, "epoch": 0.1057922056521381, "grad_norm": 0.7362382411956787, "learning_rate": 1.9975482343401288e-05, "loss": 1.4038, "mean_token_accuracy": 0.6557846516370773, "num_tokens": 160877936.0, "step": 963 }, { "entropy": 1.7481865584850311, "epoch": 0.10590206256351102, "grad_norm": 0.7199084162712097, "learning_rate": 1.9975364097662052e-05, "loss": 1.4177, "mean_token_accuracy": 0.6450687150160471, "num_tokens": 161095705.0, "step": 964 }, { "entropy": 1.8243679702281952, "epoch": 0.10601191947488396, "grad_norm": 0.7219046950340271, "learning_rate": 1.9975245567856588e-05, "loss": 1.3518, "mean_token_accuracy": 0.6582407156626383, "num_tokens": 161210783.0, "step": 965 }, { "entropy": 1.7808333535989125, "epoch": 0.1061217763862569, "grad_norm": 2.4213051795959473, "learning_rate": 1.9975126753988647e-05, "loss": 1.1241, "mean_token_accuracy": 0.6723516434431076, "num_tokens": 161360356.0, "step": 966 }, { "entropy": 1.7639015515645344, "epoch": 0.10623163329762984, "grad_norm": 0.7218215465545654, "learning_rate": 1.997500765606199e-05, "loss": 1.3598, "mean_token_accuracy": 0.6689661492904028, "num_tokens": 161492497.0, "step": 967 }, { "entropy": 1.7146398623784382, "epoch": 0.10634149020900277, "grad_norm": 0.6931177377700806, "learning_rate": 1.997488827408038e-05, "loss": 1.253, "mean_token_accuracy": 0.6601031124591827, "num_tokens": 161610533.0, "step": 968 }, { "entropy": 1.8045812646547954, "epoch": 0.10645134712037571, "grad_norm": 0.7326215505599976, "learning_rate": 1.997476860804761e-05, "loss": 1.417, "mean_token_accuracy": 0.6494764337937037, "num_tokens": 161748491.0, "step": 969 }, { "entropy": 1.8294326663017273, "epoch": 0.10656120403174865, "grad_norm": 1.0038849115371704, "learning_rate": 1.9974648657967446e-05, "loss": 1.6506, "mean_token_accuracy": 0.6360284189383189, "num_tokens": 161920941.0, "step": 970 }, { "entropy": 1.7476938267548878, "epoch": 0.10667106094312158, "grad_norm": 0.6628261804580688, "learning_rate": 1.9974528423843703e-05, "loss": 1.3734, "mean_token_accuracy": 0.6495839109023412, "num_tokens": 162065785.0, "step": 971 }, { "entropy": 1.724196970462799, "epoch": 0.10678091785449452, "grad_norm": 0.7884043455123901, "learning_rate": 1.9974407905680176e-05, "loss": 1.2882, "mean_token_accuracy": 0.6736412594715754, "num_tokens": 162210428.0, "step": 972 }, { "entropy": 1.7328240772088368, "epoch": 0.10689077476586746, "grad_norm": 0.7030888795852661, "learning_rate": 1.9974287103480684e-05, "loss": 1.4312, "mean_token_accuracy": 0.6504674156506857, "num_tokens": 162358621.0, "step": 973 }, { "entropy": 1.7429528137048085, "epoch": 0.10700063167724039, "grad_norm": 0.782061755657196, "learning_rate": 1.997416601724905e-05, "loss": 1.2338, "mean_token_accuracy": 0.6769704719384512, "num_tokens": 162479846.0, "step": 974 }, { "entropy": 1.7454247772693634, "epoch": 0.10711048858861333, "grad_norm": 0.6557988524436951, "learning_rate": 1.9974044646989104e-05, "loss": 1.4533, "mean_token_accuracy": 0.6591964761416117, "num_tokens": 162650080.0, "step": 975 }, { "entropy": 1.7760846018791199, "epoch": 0.10722034549998627, "grad_norm": 0.6828281879425049, "learning_rate": 1.997392299270469e-05, "loss": 1.407, "mean_token_accuracy": 0.651309072971344, "num_tokens": 162793424.0, "step": 976 }, { "entropy": 1.6812183062235515, "epoch": 0.10733020241135921, "grad_norm": 0.6630516052246094, "learning_rate": 1.997380105439966e-05, "loss": 1.3284, "mean_token_accuracy": 0.6731794824202856, "num_tokens": 162959826.0, "step": 977 }, { "entropy": 1.6819258431593578, "epoch": 0.10744005932273214, "grad_norm": 0.7564113736152649, "learning_rate": 1.9973678832077864e-05, "loss": 1.3177, "mean_token_accuracy": 0.6723869691292444, "num_tokens": 163115903.0, "step": 978 }, { "entropy": 1.7295123835404713, "epoch": 0.10754991623410508, "grad_norm": 0.7460121512413025, "learning_rate": 1.997355632574318e-05, "loss": 1.3707, "mean_token_accuracy": 0.671826089421908, "num_tokens": 163258000.0, "step": 979 }, { "entropy": 1.739086627960205, "epoch": 0.10765977314547802, "grad_norm": 0.7044259905815125, "learning_rate": 1.997343353539948e-05, "loss": 1.243, "mean_token_accuracy": 0.670919140179952, "num_tokens": 163390903.0, "step": 980 }, { "entropy": 1.7413827578226726, "epoch": 0.10776963005685095, "grad_norm": 0.6770658493041992, "learning_rate": 1.9973310461050656e-05, "loss": 1.5073, "mean_token_accuracy": 0.6495876014232635, "num_tokens": 163623202.0, "step": 981 }, { "entropy": 1.7152122557163239, "epoch": 0.10787948696822389, "grad_norm": 0.6223786473274231, "learning_rate": 1.99731871027006e-05, "loss": 1.3162, "mean_token_accuracy": 0.6673212250073751, "num_tokens": 163747993.0, "step": 982 }, { "entropy": 1.76864688595136, "epoch": 0.10798934387959683, "grad_norm": 0.6262992024421692, "learning_rate": 1.9973063460353207e-05, "loss": 1.4522, "mean_token_accuracy": 0.645324652393659, "num_tokens": 163992793.0, "step": 983 }, { "entropy": 1.76500407854716, "epoch": 0.10809920079096977, "grad_norm": 0.5834012627601624, "learning_rate": 1.997293953401241e-05, "loss": 1.4136, "mean_token_accuracy": 0.6545668741067251, "num_tokens": 164176810.0, "step": 984 }, { "entropy": 1.7618548075358074, "epoch": 0.1082090577023427, "grad_norm": 0.6489772200584412, "learning_rate": 1.997281532368211e-05, "loss": 1.3492, "mean_token_accuracy": 0.65315709511439, "num_tokens": 164342656.0, "step": 985 }, { "entropy": 1.7967805167039235, "epoch": 0.10831891461371564, "grad_norm": 0.8636541962623596, "learning_rate": 1.9972690829366254e-05, "loss": 1.4586, "mean_token_accuracy": 0.6364806840817133, "num_tokens": 164534044.0, "step": 986 }, { "entropy": 1.79604172706604, "epoch": 0.10842877152508858, "grad_norm": 0.6198572516441345, "learning_rate": 1.9972566051068775e-05, "loss": 1.4286, "mean_token_accuracy": 0.6450680047273636, "num_tokens": 164692093.0, "step": 987 }, { "entropy": 1.74147434035937, "epoch": 0.1085386284364615, "grad_norm": 0.9842249155044556, "learning_rate": 1.9972440988793623e-05, "loss": 1.4988, "mean_token_accuracy": 0.6356190939744314, "num_tokens": 164968178.0, "step": 988 }, { "entropy": 1.699428141117096, "epoch": 0.10864848534783444, "grad_norm": 0.6791302561759949, "learning_rate": 1.997231564254476e-05, "loss": 1.3922, "mean_token_accuracy": 0.6636618773142496, "num_tokens": 165128695.0, "step": 989 }, { "entropy": 1.832966188589732, "epoch": 0.10875834225920739, "grad_norm": 0.7480493187904358, "learning_rate": 1.9972190012326146e-05, "loss": 1.505, "mean_token_accuracy": 0.6330814162890116, "num_tokens": 165312932.0, "step": 990 }, { "entropy": 1.7748298744360607, "epoch": 0.10886819917058031, "grad_norm": 0.8279789090156555, "learning_rate": 1.9972064098141763e-05, "loss": 1.5419, "mean_token_accuracy": 0.6653905312220255, "num_tokens": 165499258.0, "step": 991 }, { "entropy": 1.750197817881902, "epoch": 0.10897805608195325, "grad_norm": 0.8178195953369141, "learning_rate": 1.9971937899995595e-05, "loss": 1.2939, "mean_token_accuracy": 0.6684920241435369, "num_tokens": 165641236.0, "step": 992 }, { "entropy": 1.6943954626719158, "epoch": 0.1090879129933262, "grad_norm": 0.6711166501045227, "learning_rate": 1.9971811417891634e-05, "loss": 1.2783, "mean_token_accuracy": 0.6719668805599213, "num_tokens": 165795977.0, "step": 993 }, { "entropy": 1.7269932230313618, "epoch": 0.10919776990469914, "grad_norm": 0.6310712099075317, "learning_rate": 1.9971684651833886e-05, "loss": 1.3686, "mean_token_accuracy": 0.6621618568897247, "num_tokens": 165955938.0, "step": 994 }, { "entropy": 1.7690664927164714, "epoch": 0.10930762681607206, "grad_norm": 0.7837558388710022, "learning_rate": 1.9971557601826358e-05, "loss": 1.3947, "mean_token_accuracy": 0.6452465703090032, "num_tokens": 166087991.0, "step": 995 }, { "entropy": 1.7119774222373962, "epoch": 0.109417483727445, "grad_norm": 0.5863208770751953, "learning_rate": 1.9971430267873077e-05, "loss": 1.535, "mean_token_accuracy": 0.6391033828258514, "num_tokens": 166281825.0, "step": 996 }, { "entropy": 1.748874545097351, "epoch": 0.10952734063881794, "grad_norm": 0.5901543498039246, "learning_rate": 1.997130264997807e-05, "loss": 1.3562, "mean_token_accuracy": 0.6514041026433309, "num_tokens": 166481277.0, "step": 997 }, { "entropy": 1.7872538765271504, "epoch": 0.10963719755019087, "grad_norm": 0.7273993492126465, "learning_rate": 1.9971174748145376e-05, "loss": 1.3969, "mean_token_accuracy": 0.6459978073835373, "num_tokens": 166638635.0, "step": 998 }, { "entropy": 1.7589583992958069, "epoch": 0.10974705446156381, "grad_norm": 0.7236528992652893, "learning_rate": 1.997104656237905e-05, "loss": 1.443, "mean_token_accuracy": 0.6443432023127874, "num_tokens": 166842013.0, "step": 999 }, { "entropy": 1.716710348924001, "epoch": 0.10985691137293675, "grad_norm": 0.6968439817428589, "learning_rate": 1.9970918092683133e-05, "loss": 1.3732, "mean_token_accuracy": 0.6586572974920273, "num_tokens": 166983909.0, "step": 1000 }, { "entropy": 1.7281900147596996, "epoch": 0.1099667682843097, "grad_norm": 0.7102954983711243, "learning_rate": 1.9970789339061707e-05, "loss": 1.4898, "mean_token_accuracy": 0.6502237866322199, "num_tokens": 167207507.0, "step": 1001 }, { "entropy": 1.7563473085562389, "epoch": 0.11007662519568262, "grad_norm": 0.687754213809967, "learning_rate": 1.997066030151884e-05, "loss": 1.4765, "mean_token_accuracy": 0.6534829437732697, "num_tokens": 167361838.0, "step": 1002 }, { "entropy": 1.7929190198580425, "epoch": 0.11018648210705556, "grad_norm": 0.671809732913971, "learning_rate": 1.9970530980058614e-05, "loss": 1.518, "mean_token_accuracy": 0.6443095902601877, "num_tokens": 167562959.0, "step": 1003 }, { "entropy": 1.7678324580192566, "epoch": 0.1102963390184285, "grad_norm": 0.602029025554657, "learning_rate": 1.9970401374685126e-05, "loss": 1.4024, "mean_token_accuracy": 0.6431177506844202, "num_tokens": 167755692.0, "step": 1004 }, { "entropy": 1.7461299399534862, "epoch": 0.11040619592980143, "grad_norm": 0.564751386642456, "learning_rate": 1.9970271485402478e-05, "loss": 1.458, "mean_token_accuracy": 0.6464813202619553, "num_tokens": 167945870.0, "step": 1005 }, { "entropy": 1.7465039889017742, "epoch": 0.11051605284117437, "grad_norm": 0.6950850486755371, "learning_rate": 1.9970141312214778e-05, "loss": 1.3697, "mean_token_accuracy": 0.654125397404035, "num_tokens": 168123031.0, "step": 1006 }, { "entropy": 1.7230841716130574, "epoch": 0.11062590975254731, "grad_norm": 0.5795581340789795, "learning_rate": 1.9970010855126148e-05, "loss": 1.3064, "mean_token_accuracy": 0.6747977683941523, "num_tokens": 168344827.0, "step": 1007 }, { "entropy": 1.768018126487732, "epoch": 0.11073576666392024, "grad_norm": 0.727882444858551, "learning_rate": 1.9969880114140717e-05, "loss": 1.3756, "mean_token_accuracy": 0.6502227435509363, "num_tokens": 168493595.0, "step": 1008 }, { "entropy": 1.7701697448889415, "epoch": 0.11084562357529318, "grad_norm": 0.7508238554000854, "learning_rate": 1.9969749089262623e-05, "loss": 1.2765, "mean_token_accuracy": 0.6652316004037857, "num_tokens": 168603351.0, "step": 1009 }, { "entropy": 1.7724807858467102, "epoch": 0.11095548048666612, "grad_norm": 0.6007605791091919, "learning_rate": 1.9969617780496008e-05, "loss": 1.3051, "mean_token_accuracy": 0.666801263888677, "num_tokens": 168780284.0, "step": 1010 }, { "entropy": 1.7377927700678508, "epoch": 0.11106533739803906, "grad_norm": 0.7759512662887573, "learning_rate": 1.9969486187845037e-05, "loss": 1.4752, "mean_token_accuracy": 0.6457094500462214, "num_tokens": 168976335.0, "step": 1011 }, { "entropy": 1.7481233775615692, "epoch": 0.11117519430941199, "grad_norm": 0.6538453698158264, "learning_rate": 1.9969354311313868e-05, "loss": 1.4398, "mean_token_accuracy": 0.646173338095347, "num_tokens": 169130001.0, "step": 1012 }, { "entropy": 1.7797774871190388, "epoch": 0.11128505122078493, "grad_norm": 0.8638194799423218, "learning_rate": 1.9969222150906677e-05, "loss": 1.4131, "mean_token_accuracy": 0.6468067765235901, "num_tokens": 169258271.0, "step": 1013 }, { "entropy": 1.7161829272905986, "epoch": 0.11139490813215787, "grad_norm": 0.8480156064033508, "learning_rate": 1.9969089706627646e-05, "loss": 1.351, "mean_token_accuracy": 0.668408066034317, "num_tokens": 169448988.0, "step": 1014 }, { "entropy": 1.721673975388209, "epoch": 0.1115047650435308, "grad_norm": 0.6855505108833313, "learning_rate": 1.996895697848097e-05, "loss": 1.4182, "mean_token_accuracy": 0.6560923904180527, "num_tokens": 169617296.0, "step": 1015 }, { "entropy": 1.754950036605199, "epoch": 0.11161462195490374, "grad_norm": 0.5711190104484558, "learning_rate": 1.9968823966470844e-05, "loss": 1.5817, "mean_token_accuracy": 0.6348544011513392, "num_tokens": 169828139.0, "step": 1016 }, { "entropy": 1.8216430644194286, "epoch": 0.11172447886627668, "grad_norm": 0.7866774201393127, "learning_rate": 1.9968690670601483e-05, "loss": 1.4799, "mean_token_accuracy": 0.6281097233295441, "num_tokens": 169993288.0, "step": 1017 }, { "entropy": 1.7923205494880676, "epoch": 0.11183433577764962, "grad_norm": 0.7133738398551941, "learning_rate": 1.99685570908771e-05, "loss": 1.5161, "mean_token_accuracy": 0.6534708514809608, "num_tokens": 170185324.0, "step": 1018 }, { "entropy": 1.7083572149276733, "epoch": 0.11194419268902255, "grad_norm": 0.6993825435638428, "learning_rate": 1.9968423227301928e-05, "loss": 1.3804, "mean_token_accuracy": 0.6609247972567877, "num_tokens": 170347764.0, "step": 1019 }, { "entropy": 1.7944194575150807, "epoch": 0.11205404960039549, "grad_norm": 0.6917293071746826, "learning_rate": 1.9968289079880204e-05, "loss": 1.4405, "mean_token_accuracy": 0.6581203043460846, "num_tokens": 170498990.0, "step": 1020 }, { "entropy": 1.7612830102443695, "epoch": 0.11216390651176843, "grad_norm": 0.6517383456230164, "learning_rate": 1.9968154648616174e-05, "loss": 1.4844, "mean_token_accuracy": 0.6349473794301351, "num_tokens": 170659958.0, "step": 1021 }, { "entropy": 1.7332187394301097, "epoch": 0.11227376342314135, "grad_norm": 0.7480264902114868, "learning_rate": 1.996801993351408e-05, "loss": 1.3936, "mean_token_accuracy": 0.6647941619157791, "num_tokens": 170819653.0, "step": 1022 }, { "entropy": 1.7455625931421916, "epoch": 0.1123836203345143, "grad_norm": 0.6116794943809509, "learning_rate": 1.996788493457821e-05, "loss": 1.4525, "mean_token_accuracy": 0.6416516304016113, "num_tokens": 170994301.0, "step": 1023 }, { "entropy": 1.731279730796814, "epoch": 0.11249347724588724, "grad_norm": 0.6867764592170715, "learning_rate": 1.9967749651812815e-05, "loss": 1.321, "mean_token_accuracy": 0.6761416296164194, "num_tokens": 171174374.0, "step": 1024 }, { "entropy": 1.7446598211924236, "epoch": 0.11260333415726016, "grad_norm": 0.7553854584693909, "learning_rate": 1.9967614085222187e-05, "loss": 1.5306, "mean_token_accuracy": 0.6366226524114609, "num_tokens": 171329470.0, "step": 1025 }, { "entropy": 1.7547922631104786, "epoch": 0.1127131910686331, "grad_norm": 0.7220216393470764, "learning_rate": 1.996747823481061e-05, "loss": 1.3455, "mean_token_accuracy": 0.6626160144805908, "num_tokens": 171493059.0, "step": 1026 }, { "entropy": 1.690296709537506, "epoch": 0.11282304798000604, "grad_norm": 0.6687548160552979, "learning_rate": 1.9967342100582394e-05, "loss": 1.3581, "mean_token_accuracy": 0.6656670471032461, "num_tokens": 171646415.0, "step": 1027 }, { "entropy": 1.7647127310434978, "epoch": 0.11293290489137899, "grad_norm": 0.637204110622406, "learning_rate": 1.9967205682541834e-05, "loss": 1.4116, "mean_token_accuracy": 0.6447415898243586, "num_tokens": 171828506.0, "step": 1028 }, { "entropy": 1.7863658169905345, "epoch": 0.11304276180275191, "grad_norm": 0.6792446374893188, "learning_rate": 1.9967068980693262e-05, "loss": 1.3788, "mean_token_accuracy": 0.6593380073706309, "num_tokens": 171976616.0, "step": 1029 }, { "entropy": 1.7621726393699646, "epoch": 0.11315261871412485, "grad_norm": 0.7082951068878174, "learning_rate": 1.9966931995040992e-05, "loss": 1.4682, "mean_token_accuracy": 0.6501838515202204, "num_tokens": 172158583.0, "step": 1030 }, { "entropy": 1.8039735853672028, "epoch": 0.1132624756254978, "grad_norm": 0.6356441974639893, "learning_rate": 1.9966794725589368e-05, "loss": 1.4936, "mean_token_accuracy": 0.6384439915418625, "num_tokens": 172320260.0, "step": 1031 }, { "entropy": 1.731901486714681, "epoch": 0.11337233253687072, "grad_norm": 0.7175825238227844, "learning_rate": 1.9966657172342733e-05, "loss": 1.2825, "mean_token_accuracy": 0.6734850654999415, "num_tokens": 172454571.0, "step": 1032 }, { "entropy": 1.7200752000013988, "epoch": 0.11348218944824366, "grad_norm": 0.9528830647468567, "learning_rate": 1.9966519335305434e-05, "loss": 1.3735, "mean_token_accuracy": 0.653776541352272, "num_tokens": 172605672.0, "step": 1033 }, { "entropy": 1.7589248915513356, "epoch": 0.1135920463596166, "grad_norm": 0.7243776917457581, "learning_rate": 1.996638121448184e-05, "loss": 1.4101, "mean_token_accuracy": 0.6624555786450704, "num_tokens": 172817225.0, "step": 1034 }, { "entropy": 1.7110880215962727, "epoch": 0.11370190327098953, "grad_norm": 0.7603755593299866, "learning_rate": 1.9966242809876323e-05, "loss": 1.3095, "mean_token_accuracy": 0.6720947672923406, "num_tokens": 172991993.0, "step": 1035 }, { "entropy": 1.70220681031545, "epoch": 0.11381176018236247, "grad_norm": 0.6258305311203003, "learning_rate": 1.9966104121493262e-05, "loss": 1.4045, "mean_token_accuracy": 0.6527692576249441, "num_tokens": 173175089.0, "step": 1036 }, { "entropy": 1.7945917348066966, "epoch": 0.11392161709373541, "grad_norm": 0.7107659578323364, "learning_rate": 1.9965965149337044e-05, "loss": 1.4265, "mean_token_accuracy": 0.6489067127307256, "num_tokens": 173367208.0, "step": 1037 }, { "entropy": 1.7529179851214092, "epoch": 0.11403147400510835, "grad_norm": 0.6977424621582031, "learning_rate": 1.9965825893412066e-05, "loss": 1.4642, "mean_token_accuracy": 0.6490010867516199, "num_tokens": 173529488.0, "step": 1038 }, { "entropy": 1.7823486824830372, "epoch": 0.11414133091648128, "grad_norm": 0.7429659366607666, "learning_rate": 1.9965686353722744e-05, "loss": 1.4706, "mean_token_accuracy": 0.6589999397595724, "num_tokens": 173743382.0, "step": 1039 }, { "entropy": 1.7522972325483959, "epoch": 0.11425118782785422, "grad_norm": 0.6352205276489258, "learning_rate": 1.9965546530273484e-05, "loss": 1.4292, "mean_token_accuracy": 0.6400622725486755, "num_tokens": 173912436.0, "step": 1040 }, { "entropy": 1.7382917702198029, "epoch": 0.11436104473922716, "grad_norm": 0.7589994072914124, "learning_rate": 1.9965406423068722e-05, "loss": 1.3622, "mean_token_accuracy": 0.6633311361074448, "num_tokens": 174093358.0, "step": 1041 }, { "entropy": 1.7625388304392497, "epoch": 0.11447090165060009, "grad_norm": 0.6129192113876343, "learning_rate": 1.9965266032112883e-05, "loss": 1.4215, "mean_token_accuracy": 0.6418607930342356, "num_tokens": 174317864.0, "step": 1042 }, { "entropy": 1.6891125738620758, "epoch": 0.11458075856197303, "grad_norm": 0.7192829251289368, "learning_rate": 1.9965125357410415e-05, "loss": 1.437, "mean_token_accuracy": 0.6587806989749273, "num_tokens": 174456645.0, "step": 1043 }, { "entropy": 1.7558738390604656, "epoch": 0.11469061547334597, "grad_norm": 0.8326495289802551, "learning_rate": 1.9964984398965768e-05, "loss": 1.3175, "mean_token_accuracy": 0.6619067142407099, "num_tokens": 174574320.0, "step": 1044 }, { "entropy": 1.7502717077732086, "epoch": 0.11480047238471891, "grad_norm": 0.6890818476676941, "learning_rate": 1.9964843156783406e-05, "loss": 1.456, "mean_token_accuracy": 0.6418903172016144, "num_tokens": 174761804.0, "step": 1045 }, { "entropy": 1.7364663283030193, "epoch": 0.11491032929609184, "grad_norm": 0.6975388526916504, "learning_rate": 1.99647016308678e-05, "loss": 1.4514, "mean_token_accuracy": 0.6475592801968256, "num_tokens": 174937492.0, "step": 1046 }, { "entropy": 1.7429968516031902, "epoch": 0.11502018620746478, "grad_norm": 0.8240295052528381, "learning_rate": 1.9964559821223423e-05, "loss": 1.3563, "mean_token_accuracy": 0.6688429315884908, "num_tokens": 175073825.0, "step": 1047 }, { "entropy": 1.765154093503952, "epoch": 0.11513004311883772, "grad_norm": 0.742708146572113, "learning_rate": 1.9964417727854766e-05, "loss": 1.6561, "mean_token_accuracy": 0.629709780216217, "num_tokens": 175240948.0, "step": 1048 }, { "entropy": 1.7909338076909382, "epoch": 0.11523990003021065, "grad_norm": 10.230766296386719, "learning_rate": 1.9964275350766328e-05, "loss": 1.2824, "mean_token_accuracy": 0.6872695883115133, "num_tokens": 175389639.0, "step": 1049 }, { "entropy": 1.7785474856694539, "epoch": 0.11534975694158359, "grad_norm": 0.7796043157577515, "learning_rate": 1.996413268996262e-05, "loss": 1.3464, "mean_token_accuracy": 0.6501271277666092, "num_tokens": 175507065.0, "step": 1050 }, { "entropy": 1.70500651995341, "epoch": 0.11545961385295653, "grad_norm": 0.7728234529495239, "learning_rate": 1.9963989745448148e-05, "loss": 1.314, "mean_token_accuracy": 0.6640622218449911, "num_tokens": 175645190.0, "step": 1051 }, { "entropy": 1.778702090183894, "epoch": 0.11556947076432945, "grad_norm": 0.7463306784629822, "learning_rate": 1.9963846517227438e-05, "loss": 1.4866, "mean_token_accuracy": 0.6476029555002848, "num_tokens": 175787142.0, "step": 1052 }, { "entropy": 1.7820600767930348, "epoch": 0.1156793276757024, "grad_norm": 0.8117401003837585, "learning_rate": 1.9963703005305026e-05, "loss": 1.6304, "mean_token_accuracy": 0.6415733198324839, "num_tokens": 175978725.0, "step": 1053 }, { "entropy": 1.767964760462443, "epoch": 0.11578918458707534, "grad_norm": 0.7086589336395264, "learning_rate": 1.9963559209685453e-05, "loss": 1.488, "mean_token_accuracy": 0.6404663970073065, "num_tokens": 176151940.0, "step": 1054 }, { "entropy": 1.7119649251302083, "epoch": 0.11589904149844828, "grad_norm": 0.6427537798881531, "learning_rate": 1.9963415130373272e-05, "loss": 1.4038, "mean_token_accuracy": 0.6565761119127274, "num_tokens": 176346366.0, "step": 1055 }, { "entropy": 1.6838423609733582, "epoch": 0.1160088984098212, "grad_norm": 0.6571147441864014, "learning_rate": 1.996327076737304e-05, "loss": 1.4475, "mean_token_accuracy": 0.657142753402392, "num_tokens": 176531787.0, "step": 1056 }, { "entropy": 1.7079651753107707, "epoch": 0.11611875532119414, "grad_norm": 0.7071012258529663, "learning_rate": 1.9963126120689327e-05, "loss": 1.245, "mean_token_accuracy": 0.6794669379790624, "num_tokens": 176644581.0, "step": 1057 }, { "entropy": 1.7078370153903961, "epoch": 0.11622861223256709, "grad_norm": 0.6504570841789246, "learning_rate": 1.996298119032671e-05, "loss": 1.4807, "mean_token_accuracy": 0.6452956398328146, "num_tokens": 176800118.0, "step": 1058 }, { "entropy": 1.7567949692408245, "epoch": 0.11633846914394001, "grad_norm": 0.6000586152076721, "learning_rate": 1.996283597628978e-05, "loss": 1.5075, "mean_token_accuracy": 0.6261270443598429, "num_tokens": 176997873.0, "step": 1059 }, { "entropy": 1.737035463253657, "epoch": 0.11644832605531295, "grad_norm": 0.785273551940918, "learning_rate": 1.996269047858313e-05, "loss": 1.4997, "mean_token_accuracy": 0.6675494114557902, "num_tokens": 177170996.0, "step": 1060 }, { "entropy": 1.778021514415741, "epoch": 0.1165581829666859, "grad_norm": 0.6912639141082764, "learning_rate": 1.996254469721136e-05, "loss": 1.3567, "mean_token_accuracy": 0.6619671285152435, "num_tokens": 177310138.0, "step": 1061 }, { "entropy": 1.7289757231871288, "epoch": 0.11666803987805884, "grad_norm": 0.6175736784934998, "learning_rate": 1.9962398632179095e-05, "loss": 1.4535, "mean_token_accuracy": 0.6459249506394068, "num_tokens": 177492607.0, "step": 1062 }, { "entropy": 1.7960573335488637, "epoch": 0.11677789678943176, "grad_norm": 0.64888596534729, "learning_rate": 1.996225228349095e-05, "loss": 1.3823, "mean_token_accuracy": 0.6632821957270304, "num_tokens": 177682018.0, "step": 1063 }, { "entropy": 1.7856767276922862, "epoch": 0.1168877537008047, "grad_norm": 0.8114152550697327, "learning_rate": 1.9962105651151554e-05, "loss": 1.507, "mean_token_accuracy": 0.6501527229944865, "num_tokens": 177849461.0, "step": 1064 }, { "entropy": 1.7049864828586578, "epoch": 0.11699761061217764, "grad_norm": 0.6832014918327332, "learning_rate": 1.9961958735165558e-05, "loss": 1.4545, "mean_token_accuracy": 0.6496352106332779, "num_tokens": 178013404.0, "step": 1065 }, { "entropy": 1.766481727361679, "epoch": 0.11710746752355057, "grad_norm": 0.925279438495636, "learning_rate": 1.9961811535537607e-05, "loss": 1.5581, "mean_token_accuracy": 0.6499437689781189, "num_tokens": 178180175.0, "step": 1066 }, { "entropy": 1.8251918852329254, "epoch": 0.11721732443492351, "grad_norm": 0.7407745718955994, "learning_rate": 1.9961664052272355e-05, "loss": 1.4513, "mean_token_accuracy": 0.6350584477186203, "num_tokens": 178338753.0, "step": 1067 }, { "entropy": 1.7265767951806386, "epoch": 0.11732718134629645, "grad_norm": 0.7054161429405212, "learning_rate": 1.996151628537448e-05, "loss": 1.3095, "mean_token_accuracy": 0.6645342856645584, "num_tokens": 178489201.0, "step": 1068 }, { "entropy": 1.687457690636317, "epoch": 0.11743703825766938, "grad_norm": 0.6299598813056946, "learning_rate": 1.9961368234848647e-05, "loss": 1.4949, "mean_token_accuracy": 0.6444557011127472, "num_tokens": 178682246.0, "step": 1069 }, { "entropy": 1.736327697833379, "epoch": 0.11754689516904232, "grad_norm": 0.7318028211593628, "learning_rate": 1.9961219900699545e-05, "loss": 1.5979, "mean_token_accuracy": 0.6583315829435984, "num_tokens": 178856231.0, "step": 1070 }, { "entropy": 1.7476358910401661, "epoch": 0.11765675208041526, "grad_norm": 0.5873830318450928, "learning_rate": 1.996107128293188e-05, "loss": 1.5281, "mean_token_accuracy": 0.6360836823781332, "num_tokens": 179036805.0, "step": 1071 }, { "entropy": 1.6975926160812378, "epoch": 0.1177666089917882, "grad_norm": 0.7855924367904663, "learning_rate": 1.9960922381550342e-05, "loss": 1.3234, "mean_token_accuracy": 0.6804987043142319, "num_tokens": 179179515.0, "step": 1072 }, { "entropy": 1.746078997850418, "epoch": 0.11787646590316113, "grad_norm": 0.7362106442451477, "learning_rate": 1.9960773196559647e-05, "loss": 1.3612, "mean_token_accuracy": 0.6657725870609283, "num_tokens": 179309228.0, "step": 1073 }, { "entropy": 1.6905361711978912, "epoch": 0.11798632281453407, "grad_norm": 0.5651462078094482, "learning_rate": 1.9960623727964522e-05, "loss": 1.379, "mean_token_accuracy": 0.6537969360748926, "num_tokens": 179544581.0, "step": 1074 }, { "entropy": 1.7257480025291443, "epoch": 0.11809617972590701, "grad_norm": 0.9071959257125854, "learning_rate": 1.9960473975769693e-05, "loss": 1.3199, "mean_token_accuracy": 0.6615277131398519, "num_tokens": 179650333.0, "step": 1075 }, { "entropy": 1.7202151914437611, "epoch": 0.11820603663727994, "grad_norm": 0.5977594256401062, "learning_rate": 1.9960323939979894e-05, "loss": 1.4065, "mean_token_accuracy": 0.6557470411062241, "num_tokens": 179832640.0, "step": 1076 }, { "entropy": 1.7765184839566548, "epoch": 0.11831589354865288, "grad_norm": 0.7578040361404419, "learning_rate": 1.9960173620599887e-05, "loss": 1.3649, "mean_token_accuracy": 0.6576367566982905, "num_tokens": 179984410.0, "step": 1077 }, { "entropy": 1.8273439009984334, "epoch": 0.11842575046002582, "grad_norm": 0.8141303062438965, "learning_rate": 1.996002301763442e-05, "loss": 1.444, "mean_token_accuracy": 0.6420343518257141, "num_tokens": 180146615.0, "step": 1078 }, { "entropy": 1.7694277266661327, "epoch": 0.11853560737139875, "grad_norm": 0.6545711755752563, "learning_rate": 1.9959872131088264e-05, "loss": 1.417, "mean_token_accuracy": 0.6598865836858749, "num_tokens": 180292276.0, "step": 1079 }, { "entropy": 1.7825441459814708, "epoch": 0.11864546428277169, "grad_norm": 0.5675626397132874, "learning_rate": 1.995972096096619e-05, "loss": 1.46, "mean_token_accuracy": 0.6376805355151495, "num_tokens": 180499401.0, "step": 1080 }, { "entropy": 1.738411416610082, "epoch": 0.11875532119414463, "grad_norm": 0.7769458889961243, "learning_rate": 1.9959569507272985e-05, "loss": 1.5524, "mean_token_accuracy": 0.6482310444116592, "num_tokens": 180664053.0, "step": 1081 }, { "entropy": 1.7660705149173737, "epoch": 0.11886517810551757, "grad_norm": 0.6864280700683594, "learning_rate": 1.9959417770013445e-05, "loss": 1.4833, "mean_token_accuracy": 0.6475979934136072, "num_tokens": 180832870.0, "step": 1082 }, { "entropy": 1.7743451297283173, "epoch": 0.1189750350168905, "grad_norm": 0.7256821990013123, "learning_rate": 1.995926574919237e-05, "loss": 1.3349, "mean_token_accuracy": 0.6730091770490011, "num_tokens": 180944514.0, "step": 1083 }, { "entropy": 1.791404128074646, "epoch": 0.11908489192826344, "grad_norm": 0.8112614154815674, "learning_rate": 1.9959113444814567e-05, "loss": 1.3597, "mean_token_accuracy": 0.6570585270722707, "num_tokens": 181104580.0, "step": 1084 }, { "entropy": 1.6880301733811696, "epoch": 0.11919474883963638, "grad_norm": 0.6577028632164001, "learning_rate": 1.9958960856884862e-05, "loss": 1.4622, "mean_token_accuracy": 0.6448287467161814, "num_tokens": 181300738.0, "step": 1085 }, { "entropy": 1.722762902577718, "epoch": 0.1193046057510093, "grad_norm": 0.7643953561782837, "learning_rate": 1.9958807985408083e-05, "loss": 1.4948, "mean_token_accuracy": 0.647366444269816, "num_tokens": 181454434.0, "step": 1086 }, { "entropy": 1.7054348190625508, "epoch": 0.11941446266238225, "grad_norm": 0.7724200487136841, "learning_rate": 1.995865483038907e-05, "loss": 1.4808, "mean_token_accuracy": 0.662505899866422, "num_tokens": 181651668.0, "step": 1087 }, { "entropy": 1.7870188256104786, "epoch": 0.11952431957375519, "grad_norm": 0.8176293969154358, "learning_rate": 1.995850139183267e-05, "loss": 1.3453, "mean_token_accuracy": 0.6639283945163091, "num_tokens": 181788460.0, "step": 1088 }, { "entropy": 1.7554938594500225, "epoch": 0.11963417648512813, "grad_norm": 0.7022227644920349, "learning_rate": 1.995834766974373e-05, "loss": 1.3982, "mean_token_accuracy": 0.6482542703549067, "num_tokens": 182038105.0, "step": 1089 }, { "entropy": 1.7522353132565816, "epoch": 0.11974403339650105, "grad_norm": 0.9576560854911804, "learning_rate": 1.995819366412713e-05, "loss": 1.4416, "mean_token_accuracy": 0.6475793421268463, "num_tokens": 182216542.0, "step": 1090 }, { "entropy": 1.7578060527642567, "epoch": 0.119853890307874, "grad_norm": 0.6407710313796997, "learning_rate": 1.9958039374987738e-05, "loss": 1.5203, "mean_token_accuracy": 0.6500546783208847, "num_tokens": 182385751.0, "step": 1091 }, { "entropy": 1.777414192756017, "epoch": 0.11996374721924694, "grad_norm": 0.8721911311149597, "learning_rate": 1.995788480233043e-05, "loss": 1.5271, "mean_token_accuracy": 0.635983943939209, "num_tokens": 182566662.0, "step": 1092 }, { "entropy": 1.7177048722902934, "epoch": 0.12007360413061986, "grad_norm": 0.6299294829368591, "learning_rate": 1.9957729946160108e-05, "loss": 1.2945, "mean_token_accuracy": 0.6612618813912073, "num_tokens": 182701870.0, "step": 1093 }, { "entropy": 1.6905933519204457, "epoch": 0.1201834610419928, "grad_norm": 0.7173838019371033, "learning_rate": 1.995757480648167e-05, "loss": 1.4643, "mean_token_accuracy": 0.6514955560366312, "num_tokens": 182853909.0, "step": 1094 }, { "entropy": 1.8044182658195496, "epoch": 0.12029331795336574, "grad_norm": 0.7805381417274475, "learning_rate": 1.995741938330003e-05, "loss": 1.4079, "mean_token_accuracy": 0.6487281521161398, "num_tokens": 182996153.0, "step": 1095 }, { "entropy": 1.7813920577367146, "epoch": 0.12040317486473867, "grad_norm": 0.6752137541770935, "learning_rate": 1.9957263676620094e-05, "loss": 1.5411, "mean_token_accuracy": 0.6410238941510519, "num_tokens": 183196179.0, "step": 1096 }, { "entropy": 1.7193073829015095, "epoch": 0.12051303177611161, "grad_norm": 0.7395818829536438, "learning_rate": 1.9957107686446805e-05, "loss": 1.2063, "mean_token_accuracy": 0.678206260005633, "num_tokens": 183324746.0, "step": 1097 }, { "entropy": 1.7467433512210846, "epoch": 0.12062288868748455, "grad_norm": 0.7600111365318298, "learning_rate": 1.995695141278509e-05, "loss": 1.402, "mean_token_accuracy": 0.6539099762837092, "num_tokens": 183455165.0, "step": 1098 }, { "entropy": 1.766270915667216, "epoch": 0.1207327455988575, "grad_norm": 0.6678837537765503, "learning_rate": 1.9956794855639902e-05, "loss": 1.5132, "mean_token_accuracy": 0.6394678006569544, "num_tokens": 183653879.0, "step": 1099 }, { "entropy": 1.70885169506073, "epoch": 0.12084260251023042, "grad_norm": 0.6890634894371033, "learning_rate": 1.9956638015016192e-05, "loss": 1.4759, "mean_token_accuracy": 0.6423639605442683, "num_tokens": 183865460.0, "step": 1100 }, { "entropy": 1.7804962793986003, "epoch": 0.12095245942160336, "grad_norm": 0.6882741451263428, "learning_rate": 1.9956480890918923e-05, "loss": 1.3699, "mean_token_accuracy": 0.6530927171309789, "num_tokens": 184012429.0, "step": 1101 }, { "entropy": 1.7800053457419078, "epoch": 0.1210623163329763, "grad_norm": 0.7321879863739014, "learning_rate": 1.9956323483353073e-05, "loss": 1.3303, "mean_token_accuracy": 0.6687973191340765, "num_tokens": 184132575.0, "step": 1102 }, { "entropy": 1.7981793681780498, "epoch": 0.12117217324434923, "grad_norm": 0.7735952138900757, "learning_rate": 1.995616579232362e-05, "loss": 1.5259, "mean_token_accuracy": 0.6438850810130438, "num_tokens": 184294442.0, "step": 1103 }, { "entropy": 1.6824164589246113, "epoch": 0.12128203015572217, "grad_norm": 0.8133576512336731, "learning_rate": 1.995600781783555e-05, "loss": 1.3995, "mean_token_accuracy": 0.6544992427031199, "num_tokens": 184454040.0, "step": 1104 }, { "entropy": 1.8094957967599232, "epoch": 0.12139188706709511, "grad_norm": 0.7161358594894409, "learning_rate": 1.9955849559893878e-05, "loss": 1.3367, "mean_token_accuracy": 0.6558258583148321, "num_tokens": 184576608.0, "step": 1105 }, { "entropy": 1.6985692779223125, "epoch": 0.12150174397846805, "grad_norm": 0.7241420745849609, "learning_rate": 1.9955691018503592e-05, "loss": 1.5111, "mean_token_accuracy": 0.6411018818616867, "num_tokens": 184743688.0, "step": 1106 }, { "entropy": 1.8171222706635792, "epoch": 0.12161160088984098, "grad_norm": 0.7848587036132812, "learning_rate": 1.995553219366973e-05, "loss": 1.3863, "mean_token_accuracy": 0.6589597115914027, "num_tokens": 184908891.0, "step": 1107 }, { "entropy": 1.7540997366110485, "epoch": 0.12172145780121392, "grad_norm": 0.709570050239563, "learning_rate": 1.9955373085397304e-05, "loss": 1.3756, "mean_token_accuracy": 0.6544551948706309, "num_tokens": 185037502.0, "step": 1108 }, { "entropy": 1.7399804890155792, "epoch": 0.12183131471258686, "grad_norm": 2.1236889362335205, "learning_rate": 1.9955213693691358e-05, "loss": 1.2212, "mean_token_accuracy": 0.6755783557891846, "num_tokens": 185228287.0, "step": 1109 }, { "entropy": 1.7062017718950908, "epoch": 0.12194117162395979, "grad_norm": 0.6356903910636902, "learning_rate": 1.9955054018556936e-05, "loss": 1.4158, "mean_token_accuracy": 0.647825613617897, "num_tokens": 185421671.0, "step": 1110 }, { "entropy": 1.8161241014798482, "epoch": 0.12205102853533273, "grad_norm": 0.9130175709724426, "learning_rate": 1.9954894059999082e-05, "loss": 1.2078, "mean_token_accuracy": 0.671512246131897, "num_tokens": 185538648.0, "step": 1111 }, { "entropy": 1.6999003887176514, "epoch": 0.12216088544670567, "grad_norm": 0.6050575375556946, "learning_rate": 1.9954733818022873e-05, "loss": 1.3409, "mean_token_accuracy": 0.6672930121421814, "num_tokens": 185718319.0, "step": 1112 }, { "entropy": 1.7451957364877064, "epoch": 0.1222707423580786, "grad_norm": 0.7081188559532166, "learning_rate": 1.995457329263337e-05, "loss": 1.4324, "mean_token_accuracy": 0.6614968578020731, "num_tokens": 185890214.0, "step": 1113 }, { "entropy": 1.7832291225592296, "epoch": 0.12238059926945154, "grad_norm": 0.6266985535621643, "learning_rate": 1.9954412483835658e-05, "loss": 1.4527, "mean_token_accuracy": 0.6494892338911692, "num_tokens": 186067267.0, "step": 1114 }, { "entropy": 1.7837847769260406, "epoch": 0.12249045618082448, "grad_norm": 0.8108149170875549, "learning_rate": 1.995425139163483e-05, "loss": 1.394, "mean_token_accuracy": 0.6565323745210966, "num_tokens": 186187770.0, "step": 1115 }, { "entropy": 1.7373790740966797, "epoch": 0.12260031309219742, "grad_norm": 0.6153585910797119, "learning_rate": 1.9954090016035975e-05, "loss": 1.4611, "mean_token_accuracy": 0.6373415837685267, "num_tokens": 186378770.0, "step": 1116 }, { "entropy": 1.8052891790866852, "epoch": 0.12271017000357035, "grad_norm": 0.7305307388305664, "learning_rate": 1.9953928357044207e-05, "loss": 1.584, "mean_token_accuracy": 0.6416770915190378, "num_tokens": 186558752.0, "step": 1117 }, { "entropy": 1.8447977602481842, "epoch": 0.12282002691494329, "grad_norm": 0.6544065475463867, "learning_rate": 1.9953766414664643e-05, "loss": 1.5598, "mean_token_accuracy": 0.6420470277468363, "num_tokens": 186735295.0, "step": 1118 }, { "entropy": 1.7785147627194722, "epoch": 0.12292988382631623, "grad_norm": 0.9531629681587219, "learning_rate": 1.9953604188902407e-05, "loss": 1.4761, "mean_token_accuracy": 0.6378699193398157, "num_tokens": 186921485.0, "step": 1119 }, { "entropy": 1.732460230588913, "epoch": 0.12303974073768915, "grad_norm": 0.6433352828025818, "learning_rate": 1.995344167976263e-05, "loss": 1.4005, "mean_token_accuracy": 0.6507207999626795, "num_tokens": 187083038.0, "step": 1120 }, { "entropy": 1.7945673267046611, "epoch": 0.1231495976490621, "grad_norm": 0.7739344239234924, "learning_rate": 1.995327888725046e-05, "loss": 1.3051, "mean_token_accuracy": 0.6669691900412241, "num_tokens": 187232544.0, "step": 1121 }, { "entropy": 1.735634684562683, "epoch": 0.12325945456043504, "grad_norm": 0.6914976239204407, "learning_rate": 1.995311581137105e-05, "loss": 1.3125, "mean_token_accuracy": 0.6667511413494746, "num_tokens": 187355432.0, "step": 1122 }, { "entropy": 1.7393087248007457, "epoch": 0.12336931147180798, "grad_norm": 0.6433930993080139, "learning_rate": 1.9952952452129557e-05, "loss": 1.3744, "mean_token_accuracy": 0.6738745719194412, "num_tokens": 187522816.0, "step": 1123 }, { "entropy": 1.7415795028209686, "epoch": 0.1234791683831809, "grad_norm": 0.632103443145752, "learning_rate": 1.995278880953115e-05, "loss": 1.4548, "mean_token_accuracy": 0.6446659713983536, "num_tokens": 187709230.0, "step": 1124 }, { "entropy": 1.7378952999909718, "epoch": 0.12358902529455384, "grad_norm": 0.6405538320541382, "learning_rate": 1.9952624883581015e-05, "loss": 1.5702, "mean_token_accuracy": 0.6246414035558701, "num_tokens": 187965508.0, "step": 1125 }, { "entropy": 1.7145747939745586, "epoch": 0.12369888220592679, "grad_norm": 0.7018551826477051, "learning_rate": 1.9952460674284335e-05, "loss": 1.2834, "mean_token_accuracy": 0.6730683247248331, "num_tokens": 188080441.0, "step": 1126 }, { "entropy": 1.6676550805568695, "epoch": 0.12380873911729971, "grad_norm": 0.5789968967437744, "learning_rate": 1.995229618164631e-05, "loss": 1.36, "mean_token_accuracy": 0.6580460617939631, "num_tokens": 188292733.0, "step": 1127 }, { "entropy": 1.7675324380397797, "epoch": 0.12391859602867265, "grad_norm": 1.9231761693954468, "learning_rate": 1.9952131405672145e-05, "loss": 1.3133, "mean_token_accuracy": 0.6596612135569254, "num_tokens": 188507601.0, "step": 1128 }, { "entropy": 1.7851240833600361, "epoch": 0.1240284529400456, "grad_norm": 0.6579344272613525, "learning_rate": 1.9951966346367054e-05, "loss": 1.3917, "mean_token_accuracy": 0.6518159955739975, "num_tokens": 188647743.0, "step": 1129 }, { "entropy": 1.8078663349151611, "epoch": 0.12413830985141852, "grad_norm": 0.9145793318748474, "learning_rate": 1.9951801003736263e-05, "loss": 1.198, "mean_token_accuracy": 0.6740387082099915, "num_tokens": 188748896.0, "step": 1130 }, { "entropy": 1.7432547012964885, "epoch": 0.12424816676279146, "grad_norm": 0.6863495707511902, "learning_rate": 1.9951635377785002e-05, "loss": 1.4592, "mean_token_accuracy": 0.6456627547740936, "num_tokens": 188953780.0, "step": 1131 }, { "entropy": 1.7238787710666656, "epoch": 0.1243580236741644, "grad_norm": 0.8133369088172913, "learning_rate": 1.9951469468518516e-05, "loss": 1.3483, "mean_token_accuracy": 0.6647701015075048, "num_tokens": 189125417.0, "step": 1132 }, { "entropy": 1.6680325170358021, "epoch": 0.12446788058553734, "grad_norm": 0.6783432364463806, "learning_rate": 1.9951303275942055e-05, "loss": 1.2737, "mean_token_accuracy": 0.6636984546979269, "num_tokens": 189260690.0, "step": 1133 }, { "entropy": 1.722198059161504, "epoch": 0.12457773749691027, "grad_norm": 0.784504234790802, "learning_rate": 1.995113680006088e-05, "loss": 1.3104, "mean_token_accuracy": 0.6713638504346212, "num_tokens": 189388782.0, "step": 1134 }, { "entropy": 1.7054710189501445, "epoch": 0.12468759440828321, "grad_norm": 0.7793455123901367, "learning_rate": 1.995097004088026e-05, "loss": 1.3697, "mean_token_accuracy": 0.66965984304746, "num_tokens": 189574036.0, "step": 1135 }, { "entropy": 1.7583240966002147, "epoch": 0.12479745131965615, "grad_norm": 0.7912724018096924, "learning_rate": 1.9950802998405468e-05, "loss": 1.4305, "mean_token_accuracy": 0.6560654044151306, "num_tokens": 189702401.0, "step": 1136 }, { "entropy": 1.8047844966252644, "epoch": 0.12490730823102908, "grad_norm": 0.9431250691413879, "learning_rate": 1.9950635672641797e-05, "loss": 1.4202, "mean_token_accuracy": 0.6618384718894958, "num_tokens": 189861738.0, "step": 1137 }, { "entropy": 1.764178196589152, "epoch": 0.12501716514240202, "grad_norm": 0.7213315367698669, "learning_rate": 1.995046806359454e-05, "loss": 1.4782, "mean_token_accuracy": 0.6546398401260376, "num_tokens": 190010964.0, "step": 1138 }, { "entropy": 1.7561284104983013, "epoch": 0.12512702205377496, "grad_norm": 0.7379279136657715, "learning_rate": 1.9950300171269e-05, "loss": 1.3491, "mean_token_accuracy": 0.6550362805525461, "num_tokens": 190186627.0, "step": 1139 }, { "entropy": 1.6882170836130779, "epoch": 0.1252368789651479, "grad_norm": 0.6673735976219177, "learning_rate": 1.9950131995670494e-05, "loss": 1.414, "mean_token_accuracy": 0.6644150763750076, "num_tokens": 190355893.0, "step": 1140 }, { "entropy": 1.696480651696523, "epoch": 0.12534673587652084, "grad_norm": 2.3444578647613525, "learning_rate": 1.994996353680434e-05, "loss": 0.9837, "mean_token_accuracy": 0.6970398674408594, "num_tokens": 190506476.0, "step": 1141 }, { "entropy": 1.7339637279510498, "epoch": 0.12545659278789376, "grad_norm": 0.6325587630271912, "learning_rate": 1.994979479467588e-05, "loss": 1.4113, "mean_token_accuracy": 0.6466666658719381, "num_tokens": 190693618.0, "step": 1142 }, { "entropy": 1.7287168403466542, "epoch": 0.1255664496992667, "grad_norm": 0.7597485184669495, "learning_rate": 1.9949625769290442e-05, "loss": 1.3352, "mean_token_accuracy": 0.6628235826889673, "num_tokens": 190805076.0, "step": 1143 }, { "entropy": 1.70640030503273, "epoch": 0.12567630661063964, "grad_norm": 0.6051517128944397, "learning_rate": 1.9949456460653382e-05, "loss": 1.3608, "mean_token_accuracy": 0.656550352772077, "num_tokens": 190980015.0, "step": 1144 }, { "entropy": 1.7363159358501434, "epoch": 0.12578616352201258, "grad_norm": 0.7932606339454651, "learning_rate": 1.9949286868770063e-05, "loss": 1.4355, "mean_token_accuracy": 0.6533750792344412, "num_tokens": 191172729.0, "step": 1145 }, { "entropy": 1.7515104512373607, "epoch": 0.12589602043338552, "grad_norm": 0.5937777161598206, "learning_rate": 1.9949116993645842e-05, "loss": 1.3832, "mean_token_accuracy": 0.6598383535941442, "num_tokens": 191371147.0, "step": 1146 }, { "entropy": 1.8300546904404957, "epoch": 0.12600587734475846, "grad_norm": 0.7814804315567017, "learning_rate": 1.9948946835286102e-05, "loss": 1.4727, "mean_token_accuracy": 0.630996306737264, "num_tokens": 191552878.0, "step": 1147 }, { "entropy": 1.696203629175822, "epoch": 0.1261157342561314, "grad_norm": 0.6297745704650879, "learning_rate": 1.9948776393696227e-05, "loss": 1.3124, "mean_token_accuracy": 0.667043482263883, "num_tokens": 191733626.0, "step": 1148 }, { "entropy": 1.710612674554189, "epoch": 0.12622559116750431, "grad_norm": 0.803044855594635, "learning_rate": 1.9948605668881608e-05, "loss": 1.4977, "mean_token_accuracy": 0.6440107375383377, "num_tokens": 191888183.0, "step": 1149 }, { "entropy": 1.7293962438901265, "epoch": 0.12633544807887725, "grad_norm": 0.7975369095802307, "learning_rate": 1.9948434660847658e-05, "loss": 1.2799, "mean_token_accuracy": 0.6679138342539469, "num_tokens": 192005132.0, "step": 1150 }, { "entropy": 1.7680123845736186, "epoch": 0.1264453049902502, "grad_norm": 0.7152736186981201, "learning_rate": 1.994826336959978e-05, "loss": 1.4389, "mean_token_accuracy": 0.6575280626614889, "num_tokens": 192155335.0, "step": 1151 }, { "entropy": 1.7631124357382457, "epoch": 0.12655516190162314, "grad_norm": 0.7395944595336914, "learning_rate": 1.99480917951434e-05, "loss": 1.2721, "mean_token_accuracy": 0.6726651241381963, "num_tokens": 192270766.0, "step": 1152 }, { "entropy": 1.727259635925293, "epoch": 0.12666501881299608, "grad_norm": 0.6742376089096069, "learning_rate": 1.9947919937483944e-05, "loss": 1.43, "mean_token_accuracy": 0.6421197056770325, "num_tokens": 192433846.0, "step": 1153 }, { "entropy": 1.81740140914917, "epoch": 0.12677487572436902, "grad_norm": 0.8998958468437195, "learning_rate": 1.9947747796626854e-05, "loss": 1.3927, "mean_token_accuracy": 0.653491660952568, "num_tokens": 192557670.0, "step": 1154 }, { "entropy": 1.7740411162376404, "epoch": 0.12688473263574196, "grad_norm": 0.7846980690956116, "learning_rate": 1.9947575372577583e-05, "loss": 1.3235, "mean_token_accuracy": 0.6758840531110764, "num_tokens": 192690479.0, "step": 1155 }, { "entropy": 1.765354762474696, "epoch": 0.12699458954711487, "grad_norm": 0.7348425388336182, "learning_rate": 1.994740266534158e-05, "loss": 1.3549, "mean_token_accuracy": 0.661831850806872, "num_tokens": 192830194.0, "step": 1156 }, { "entropy": 1.7395083606243134, "epoch": 0.1271044464584878, "grad_norm": 0.6277830600738525, "learning_rate": 1.9947229674924316e-05, "loss": 1.5185, "mean_token_accuracy": 0.6336728284756342, "num_tokens": 193034894.0, "step": 1157 }, { "entropy": 1.7037833829720814, "epoch": 0.12721430336986075, "grad_norm": 0.6423079967498779, "learning_rate": 1.9947056401331265e-05, "loss": 1.4111, "mean_token_accuracy": 0.6632378300031027, "num_tokens": 193280199.0, "step": 1158 }, { "entropy": 1.7469445566336315, "epoch": 0.1273241602812337, "grad_norm": 0.7091412544250488, "learning_rate": 1.9946882844567906e-05, "loss": 1.3312, "mean_token_accuracy": 0.6669044842322668, "num_tokens": 193443768.0, "step": 1159 }, { "entropy": 1.7258818745613098, "epoch": 0.12743401719260664, "grad_norm": 0.6195393800735474, "learning_rate": 1.994670900463974e-05, "loss": 1.3969, "mean_token_accuracy": 0.6519241978724798, "num_tokens": 193645217.0, "step": 1160 }, { "entropy": 1.7719605664412181, "epoch": 0.12754387410397958, "grad_norm": 0.6821731925010681, "learning_rate": 1.9946534881552266e-05, "loss": 1.3772, "mean_token_accuracy": 0.6459381332000097, "num_tokens": 193758088.0, "step": 1161 }, { "entropy": 1.7489538192749023, "epoch": 0.1276537310153525, "grad_norm": 0.6906759738922119, "learning_rate": 1.9946360475310993e-05, "loss": 1.4159, "mean_token_accuracy": 0.662546748916308, "num_tokens": 193951389.0, "step": 1162 }, { "entropy": 1.7279592752456665, "epoch": 0.12776358792672543, "grad_norm": 0.6182255148887634, "learning_rate": 1.9946185785921442e-05, "loss": 1.4738, "mean_token_accuracy": 0.6493389358123144, "num_tokens": 194166434.0, "step": 1163 }, { "entropy": 1.6567539076010387, "epoch": 0.12787344483809837, "grad_norm": 0.659775972366333, "learning_rate": 1.9946010813389143e-05, "loss": 1.1995, "mean_token_accuracy": 0.689688558379809, "num_tokens": 194303583.0, "step": 1164 }, { "entropy": 1.807248870531718, "epoch": 0.1279833017494713, "grad_norm": 0.6941498517990112, "learning_rate": 1.9945835557719632e-05, "loss": 1.4976, "mean_token_accuracy": 0.6428570051987966, "num_tokens": 194473880.0, "step": 1165 }, { "entropy": 1.7196752826372783, "epoch": 0.12809315866084425, "grad_norm": 0.6665788888931274, "learning_rate": 1.9945660018918456e-05, "loss": 1.467, "mean_token_accuracy": 0.6387737194697062, "num_tokens": 194655840.0, "step": 1166 }, { "entropy": 1.70614160100619, "epoch": 0.1282030155722172, "grad_norm": 0.7460064888000488, "learning_rate": 1.9945484196991173e-05, "loss": 1.4486, "mean_token_accuracy": 0.6422029336293539, "num_tokens": 194826668.0, "step": 1167 }, { "entropy": 1.713955005009969, "epoch": 0.12831287248359013, "grad_norm": 0.7533404231071472, "learning_rate": 1.9945308091943348e-05, "loss": 1.5628, "mean_token_accuracy": 0.656499852736791, "num_tokens": 195019203.0, "step": 1168 }, { "entropy": 1.7552619874477386, "epoch": 0.12842272939496305, "grad_norm": 0.9428662061691284, "learning_rate": 1.994513170378055e-05, "loss": 1.4442, "mean_token_accuracy": 0.6546032627423605, "num_tokens": 195199499.0, "step": 1169 }, { "entropy": 1.7821119626363118, "epoch": 0.128532586306336, "grad_norm": 0.7639563679695129, "learning_rate": 1.9944955032508365e-05, "loss": 1.4387, "mean_token_accuracy": 0.6538108189900717, "num_tokens": 195356654.0, "step": 1170 }, { "entropy": 1.7284887731075287, "epoch": 0.12864244321770893, "grad_norm": 2.3883559703826904, "learning_rate": 1.994477807813238e-05, "loss": 0.9387, "mean_token_accuracy": 0.6993002146482468, "num_tokens": 195492267.0, "step": 1171 }, { "entropy": 1.7171109517415364, "epoch": 0.12875230012908187, "grad_norm": 0.6380852460861206, "learning_rate": 1.9944600840658207e-05, "loss": 1.2273, "mean_token_accuracy": 0.6765789190928141, "num_tokens": 195647384.0, "step": 1172 }, { "entropy": 1.7350752850373585, "epoch": 0.1288621570404548, "grad_norm": 0.6649433374404907, "learning_rate": 1.9944423320091445e-05, "loss": 1.4263, "mean_token_accuracy": 0.6540283511082331, "num_tokens": 195798088.0, "step": 1173 }, { "entropy": 1.6744823853174846, "epoch": 0.12897201395182775, "grad_norm": 0.6647149920463562, "learning_rate": 1.9944245516437714e-05, "loss": 1.2886, "mean_token_accuracy": 0.6681593358516693, "num_tokens": 195964314.0, "step": 1174 }, { "entropy": 1.7713862359523773, "epoch": 0.1290818708632007, "grad_norm": 0.8901455998420715, "learning_rate": 1.9944067429702644e-05, "loss": 1.2277, "mean_token_accuracy": 0.6852303644021353, "num_tokens": 196078305.0, "step": 1175 }, { "entropy": 1.793348143498103, "epoch": 0.1291917277745736, "grad_norm": 0.8101892471313477, "learning_rate": 1.994388905989187e-05, "loss": 1.3155, "mean_token_accuracy": 0.6616990566253662, "num_tokens": 196179334.0, "step": 1176 }, { "entropy": 1.6949261128902435, "epoch": 0.12930158468594655, "grad_norm": 0.5549296736717224, "learning_rate": 1.9943710407011038e-05, "loss": 1.3279, "mean_token_accuracy": 0.6614242494106293, "num_tokens": 196345485.0, "step": 1177 }, { "entropy": 1.7543793419996898, "epoch": 0.1294114415973195, "grad_norm": 0.8616530299186707, "learning_rate": 1.9943531471065798e-05, "loss": 1.5046, "mean_token_accuracy": 0.6579241951306661, "num_tokens": 196497507.0, "step": 1178 }, { "entropy": 1.741701563199361, "epoch": 0.12952129850869243, "grad_norm": 0.7313582897186279, "learning_rate": 1.9943352252061818e-05, "loss": 1.418, "mean_token_accuracy": 0.6414368947347006, "num_tokens": 196674374.0, "step": 1179 }, { "entropy": 1.7809857726097107, "epoch": 0.12963115542006537, "grad_norm": 0.7206242680549622, "learning_rate": 1.9943172750004773e-05, "loss": 1.5279, "mean_token_accuracy": 0.6377990394830704, "num_tokens": 196852648.0, "step": 1180 }, { "entropy": 1.7742149730523427, "epoch": 0.1297410123314383, "grad_norm": 0.712735116481781, "learning_rate": 1.994299296490034e-05, "loss": 1.4867, "mean_token_accuracy": 0.6416104336579641, "num_tokens": 197005775.0, "step": 1181 }, { "entropy": 1.801190088192622, "epoch": 0.12985086924281125, "grad_norm": 0.7176236510276794, "learning_rate": 1.9942812896754206e-05, "loss": 1.535, "mean_token_accuracy": 0.6445636649926504, "num_tokens": 197172268.0, "step": 1182 }, { "entropy": 1.7565678854783375, "epoch": 0.12996072615418416, "grad_norm": 0.8221584558486938, "learning_rate": 1.9942632545572073e-05, "loss": 1.442, "mean_token_accuracy": 0.6304828822612762, "num_tokens": 197381956.0, "step": 1183 }, { "entropy": 1.7492178777853649, "epoch": 0.1300705830655571, "grad_norm": 0.569448709487915, "learning_rate": 1.9942451911359655e-05, "loss": 1.5449, "mean_token_accuracy": 0.6221815447012583, "num_tokens": 197666755.0, "step": 1184 }, { "entropy": 1.7311831414699554, "epoch": 0.13018043997693005, "grad_norm": 0.652637779712677, "learning_rate": 1.994227099412266e-05, "loss": 1.5645, "mean_token_accuracy": 0.637279137969017, "num_tokens": 197885096.0, "step": 1185 }, { "entropy": 1.7549742658933003, "epoch": 0.130290296888303, "grad_norm": 0.6172470450401306, "learning_rate": 1.994208979386682e-05, "loss": 1.471, "mean_token_accuracy": 0.6358410517374674, "num_tokens": 198047424.0, "step": 1186 }, { "entropy": 1.69119127591451, "epoch": 0.13040015379967593, "grad_norm": 0.572778582572937, "learning_rate": 1.9941908310597862e-05, "loss": 1.4412, "mean_token_accuracy": 0.6451299836238226, "num_tokens": 198258790.0, "step": 1187 }, { "entropy": 1.7269008060296376, "epoch": 0.13051001071104887, "grad_norm": 0.6882736682891846, "learning_rate": 1.994172654432154e-05, "loss": 1.4044, "mean_token_accuracy": 0.6471477945645651, "num_tokens": 198405031.0, "step": 1188 }, { "entropy": 1.8405869603157043, "epoch": 0.1306198676224218, "grad_norm": 0.7171852588653564, "learning_rate": 1.99415444950436e-05, "loss": 1.4245, "mean_token_accuracy": 0.6366102347771326, "num_tokens": 198536441.0, "step": 1189 }, { "entropy": 1.708700180053711, "epoch": 0.13072972453379472, "grad_norm": 0.6238212585449219, "learning_rate": 1.994136216276981e-05, "loss": 1.3952, "mean_token_accuracy": 0.6541391809781393, "num_tokens": 198771632.0, "step": 1190 }, { "entropy": 1.686782290538152, "epoch": 0.13083958144516766, "grad_norm": 0.6770201921463013, "learning_rate": 1.994117954750593e-05, "loss": 1.4392, "mean_token_accuracy": 0.6496217797199885, "num_tokens": 198940548.0, "step": 1191 }, { "entropy": 1.732615441083908, "epoch": 0.1309494383565406, "grad_norm": 0.635962724685669, "learning_rate": 1.994099664925775e-05, "loss": 1.4054, "mean_token_accuracy": 0.6503652880589167, "num_tokens": 199146118.0, "step": 1192 }, { "entropy": 1.6821688016255696, "epoch": 0.13105929526791354, "grad_norm": 0.6260414719581604, "learning_rate": 1.9940813468031056e-05, "loss": 1.5164, "mean_token_accuracy": 0.639798546830813, "num_tokens": 199337245.0, "step": 1193 }, { "entropy": 1.751666744550069, "epoch": 0.13116915217928649, "grad_norm": 0.6123976707458496, "learning_rate": 1.9940630003831644e-05, "loss": 1.6292, "mean_token_accuracy": 0.6407297352949778, "num_tokens": 199596254.0, "step": 1194 }, { "entropy": 1.74623238046964, "epoch": 0.13127900909065943, "grad_norm": 0.7188962697982788, "learning_rate": 1.9940446256665317e-05, "loss": 1.4197, "mean_token_accuracy": 0.6419958025217056, "num_tokens": 199772672.0, "step": 1195 }, { "entropy": 1.7535964945952098, "epoch": 0.13138886600203234, "grad_norm": 0.6890870928764343, "learning_rate": 1.99402622265379e-05, "loss": 1.3874, "mean_token_accuracy": 0.6490504443645477, "num_tokens": 199920027.0, "step": 1196 }, { "entropy": 1.7611814439296722, "epoch": 0.13149872291340528, "grad_norm": 0.8501882553100586, "learning_rate": 1.994007791345521e-05, "loss": 1.4224, "mean_token_accuracy": 0.6603156328201294, "num_tokens": 200084942.0, "step": 1197 }, { "entropy": 1.7293813327948253, "epoch": 0.13160857982477822, "grad_norm": 0.6405790448188782, "learning_rate": 1.9939893317423086e-05, "loss": 1.5273, "mean_token_accuracy": 0.6357795844475428, "num_tokens": 200288548.0, "step": 1198 }, { "entropy": 1.68119282523791, "epoch": 0.13171843673615116, "grad_norm": 0.6990883350372314, "learning_rate": 1.9939708438447357e-05, "loss": 1.1898, "mean_token_accuracy": 0.685491551955541, "num_tokens": 200429774.0, "step": 1199 }, { "entropy": 1.764894962310791, "epoch": 0.1318282936475241, "grad_norm": 0.629033088684082, "learning_rate": 1.9939523276533893e-05, "loss": 1.3889, "mean_token_accuracy": 0.6700173169374466, "num_tokens": 200570651.0, "step": 1200 }, { "entropy": 1.7846981485684712, "epoch": 0.13193815055889704, "grad_norm": 0.646364688873291, "learning_rate": 1.9939337831688544e-05, "loss": 1.5205, "mean_token_accuracy": 0.6368842373291651, "num_tokens": 200748458.0, "step": 1201 }, { "entropy": 1.7097224394480388, "epoch": 0.13204800747026998, "grad_norm": 0.641553521156311, "learning_rate": 1.993915210391718e-05, "loss": 1.5153, "mean_token_accuracy": 0.650927260518074, "num_tokens": 200931234.0, "step": 1202 }, { "entropy": 1.7454093396663666, "epoch": 0.1321578643816429, "grad_norm": 0.8706952333450317, "learning_rate": 1.9938966093225683e-05, "loss": 1.3199, "mean_token_accuracy": 0.6697538246711096, "num_tokens": 201066141.0, "step": 1203 }, { "entropy": 1.7411855657895405, "epoch": 0.13226772129301584, "grad_norm": 0.6181418895721436, "learning_rate": 1.993877979961993e-05, "loss": 1.3747, "mean_token_accuracy": 0.6520515978336334, "num_tokens": 201257542.0, "step": 1204 }, { "entropy": 1.7680325210094452, "epoch": 0.13237757820438878, "grad_norm": 0.7731313109397888, "learning_rate": 1.993859322310583e-05, "loss": 1.5037, "mean_token_accuracy": 0.6352165639400482, "num_tokens": 201434347.0, "step": 1205 }, { "entropy": 1.710806429386139, "epoch": 0.13248743511576172, "grad_norm": 0.7883396148681641, "learning_rate": 1.993840636368928e-05, "loss": 1.4203, "mean_token_accuracy": 0.6624042391777039, "num_tokens": 201612352.0, "step": 1206 }, { "entropy": 1.7501719494660695, "epoch": 0.13259729202713466, "grad_norm": 0.6519463062286377, "learning_rate": 1.9938219221376198e-05, "loss": 1.343, "mean_token_accuracy": 0.656904548406601, "num_tokens": 201805098.0, "step": 1207 }, { "entropy": 1.6922560433546703, "epoch": 0.1327071489385076, "grad_norm": 0.6619210243225098, "learning_rate": 1.9938031796172504e-05, "loss": 1.4414, "mean_token_accuracy": 0.6696604192256927, "num_tokens": 201974466.0, "step": 1208 }, { "entropy": 1.7348144352436066, "epoch": 0.13281700584988054, "grad_norm": 0.7165763974189758, "learning_rate": 1.993784408808413e-05, "loss": 1.422, "mean_token_accuracy": 0.6480233718951544, "num_tokens": 202120925.0, "step": 1209 }, { "entropy": 1.6581164697806041, "epoch": 0.13292686276125346, "grad_norm": 0.6778578162193298, "learning_rate": 1.993765609711702e-05, "loss": 1.4294, "mean_token_accuracy": 0.6534278045097986, "num_tokens": 202282750.0, "step": 1210 }, { "entropy": 1.6809436281522114, "epoch": 0.1330367196726264, "grad_norm": 0.7304653525352478, "learning_rate": 1.9937467823277122e-05, "loss": 1.4323, "mean_token_accuracy": 0.6725066850582758, "num_tokens": 202462082.0, "step": 1211 }, { "entropy": 1.6749194264411926, "epoch": 0.13314657658399934, "grad_norm": 0.8034223914146423, "learning_rate": 1.9937279266570395e-05, "loss": 1.4945, "mean_token_accuracy": 0.6492439856131872, "num_tokens": 202629513.0, "step": 1212 }, { "entropy": 1.7574256658554077, "epoch": 0.13325643349537228, "grad_norm": 0.741016149520874, "learning_rate": 1.9937090427002806e-05, "loss": 1.3436, "mean_token_accuracy": 0.6710640490055084, "num_tokens": 202774386.0, "step": 1213 }, { "entropy": 1.7507571478684743, "epoch": 0.13336629040674522, "grad_norm": 0.7227981686592102, "learning_rate": 1.993690130458033e-05, "loss": 1.4202, "mean_token_accuracy": 0.6539425303538641, "num_tokens": 202919220.0, "step": 1214 }, { "entropy": 1.784160981575648, "epoch": 0.13347614731811816, "grad_norm": 0.7705137133598328, "learning_rate": 1.993671189930896e-05, "loss": 1.3467, "mean_token_accuracy": 0.6598477313915888, "num_tokens": 203027863.0, "step": 1215 }, { "entropy": 1.642378608385722, "epoch": 0.1335860042294911, "grad_norm": 0.5922091603279114, "learning_rate": 1.993652221119468e-05, "loss": 1.232, "mean_token_accuracy": 0.6796272893746694, "num_tokens": 203157757.0, "step": 1216 }, { "entropy": 1.8348711729049683, "epoch": 0.13369586114086401, "grad_norm": 0.6379344463348389, "learning_rate": 1.9936332240243503e-05, "loss": 1.516, "mean_token_accuracy": 0.6400525023539861, "num_tokens": 203348155.0, "step": 1217 }, { "entropy": 1.695642501115799, "epoch": 0.13380571805223695, "grad_norm": 0.6818593740463257, "learning_rate": 1.9936141986461434e-05, "loss": 1.3798, "mean_token_accuracy": 0.6709787100553513, "num_tokens": 203499917.0, "step": 1218 }, { "entropy": 1.7197688619295757, "epoch": 0.1339155749636099, "grad_norm": 0.6506553292274475, "learning_rate": 1.9935951449854502e-05, "loss": 1.5437, "mean_token_accuracy": 0.6586721042792002, "num_tokens": 203678606.0, "step": 1219 }, { "entropy": 1.6951692700386047, "epoch": 0.13402543187498284, "grad_norm": 0.7173891067504883, "learning_rate": 1.993576063042873e-05, "loss": 1.3165, "mean_token_accuracy": 0.6613740225632986, "num_tokens": 203802059.0, "step": 1220 }, { "entropy": 1.7536791861057281, "epoch": 0.13413528878635578, "grad_norm": 0.8501359820365906, "learning_rate": 1.993556952819016e-05, "loss": 1.3935, "mean_token_accuracy": 0.6629123538732529, "num_tokens": 203961628.0, "step": 1221 }, { "entropy": 1.7680688103040059, "epoch": 0.13424514569772872, "grad_norm": 0.5954765677452087, "learning_rate": 1.993537814314484e-05, "loss": 1.386, "mean_token_accuracy": 0.660725419720014, "num_tokens": 204138349.0, "step": 1222 }, { "entropy": 1.8008166750272114, "epoch": 0.13435500260910163, "grad_norm": 0.7124298214912415, "learning_rate": 1.993518647529883e-05, "loss": 1.5329, "mean_token_accuracy": 0.6388672788937887, "num_tokens": 204301649.0, "step": 1223 }, { "entropy": 1.780760755141576, "epoch": 0.13446485952047457, "grad_norm": 0.6755843162536621, "learning_rate": 1.9934994524658196e-05, "loss": 1.3597, "mean_token_accuracy": 0.6550761957963308, "num_tokens": 204474630.0, "step": 1224 }, { "entropy": 1.7628767291704814, "epoch": 0.1345747164318475, "grad_norm": 0.6415227651596069, "learning_rate": 1.993480229122901e-05, "loss": 1.5455, "mean_token_accuracy": 0.6402927239735922, "num_tokens": 204680151.0, "step": 1225 }, { "entropy": 1.723549763361613, "epoch": 0.13468457334322045, "grad_norm": 0.605971097946167, "learning_rate": 1.9934609775017357e-05, "loss": 1.5025, "mean_token_accuracy": 0.6315073023239771, "num_tokens": 204876536.0, "step": 1226 }, { "entropy": 1.711145242055257, "epoch": 0.1347944302545934, "grad_norm": 0.696202278137207, "learning_rate": 1.993441697602933e-05, "loss": 1.4026, "mean_token_accuracy": 0.6470880806446075, "num_tokens": 205090640.0, "step": 1227 }, { "entropy": 1.7900481621424358, "epoch": 0.13490428716596634, "grad_norm": 0.7666972875595093, "learning_rate": 1.9934223894271035e-05, "loss": 1.4558, "mean_token_accuracy": 0.6359467854102453, "num_tokens": 205354906.0, "step": 1228 }, { "entropy": 1.7724877794583638, "epoch": 0.13501414407733928, "grad_norm": 0.6311783790588379, "learning_rate": 1.993403052974858e-05, "loss": 1.444, "mean_token_accuracy": 0.6428662339846293, "num_tokens": 205529189.0, "step": 1229 }, { "entropy": 1.7515977422396343, "epoch": 0.1351240009887122, "grad_norm": 0.706529974937439, "learning_rate": 1.993383688246808e-05, "loss": 1.3015, "mean_token_accuracy": 0.6625747283299764, "num_tokens": 205683718.0, "step": 1230 }, { "entropy": 1.7582630415757496, "epoch": 0.13523385790008513, "grad_norm": 0.6772528886795044, "learning_rate": 1.993364295243567e-05, "loss": 1.405, "mean_token_accuracy": 0.6459440638621649, "num_tokens": 205812951.0, "step": 1231 }, { "entropy": 1.7285764515399933, "epoch": 0.13534371481145807, "grad_norm": 0.6248936057090759, "learning_rate": 1.9933448739657487e-05, "loss": 1.3699, "mean_token_accuracy": 0.6622784286737442, "num_tokens": 205944308.0, "step": 1232 }, { "entropy": 1.6804889539877574, "epoch": 0.135453571722831, "grad_norm": 0.8179889917373657, "learning_rate": 1.9933254244139675e-05, "loss": 1.341, "mean_token_accuracy": 0.6590113043785095, "num_tokens": 206136941.0, "step": 1233 }, { "entropy": 1.7611981630325317, "epoch": 0.13556342863420395, "grad_norm": 0.7938576340675354, "learning_rate": 1.9933059465888394e-05, "loss": 1.4829, "mean_token_accuracy": 0.6553379346927007, "num_tokens": 206305282.0, "step": 1234 }, { "entropy": 1.7320065299669902, "epoch": 0.1356732855455769, "grad_norm": 0.6579363942146301, "learning_rate": 1.9932864404909808e-05, "loss": 1.399, "mean_token_accuracy": 0.6568387846151987, "num_tokens": 206530940.0, "step": 1235 }, { "entropy": 1.8249189853668213, "epoch": 0.13578314245694983, "grad_norm": 0.6946649551391602, "learning_rate": 1.9932669061210082e-05, "loss": 1.4977, "mean_token_accuracy": 0.6561338355143865, "num_tokens": 206728889.0, "step": 1236 }, { "entropy": 1.7712652484575908, "epoch": 0.13589299936832275, "grad_norm": 0.8788438439369202, "learning_rate": 1.993247343479541e-05, "loss": 1.2986, "mean_token_accuracy": 0.6657395313183466, "num_tokens": 206838081.0, "step": 1237 }, { "entropy": 1.7870614627997081, "epoch": 0.1360028562796957, "grad_norm": 0.9152439832687378, "learning_rate": 1.993227752567198e-05, "loss": 1.4701, "mean_token_accuracy": 0.648246243596077, "num_tokens": 206974371.0, "step": 1238 }, { "entropy": 1.7998821139335632, "epoch": 0.13611271319106863, "grad_norm": 0.6228598952293396, "learning_rate": 1.9932081333845988e-05, "loss": 1.3967, "mean_token_accuracy": 0.6436318705479304, "num_tokens": 207125473.0, "step": 1239 }, { "entropy": 1.7932134866714478, "epoch": 0.13622257010244157, "grad_norm": 0.8686763048171997, "learning_rate": 1.993188485932365e-05, "loss": 1.6097, "mean_token_accuracy": 0.6425013393163681, "num_tokens": 207272468.0, "step": 1240 }, { "entropy": 1.7816320955753326, "epoch": 0.1363324270138145, "grad_norm": 0.8052454590797424, "learning_rate": 1.993168810211118e-05, "loss": 1.2604, "mean_token_accuracy": 0.6674903134504954, "num_tokens": 207394408.0, "step": 1241 }, { "entropy": 1.7591705024242401, "epoch": 0.13644228392518745, "grad_norm": 0.8733900189399719, "learning_rate": 1.9931491062214806e-05, "loss": 1.326, "mean_token_accuracy": 0.670804500579834, "num_tokens": 207520568.0, "step": 1242 }, { "entropy": 1.7159309188524883, "epoch": 0.1365521408365604, "grad_norm": 0.6903548240661621, "learning_rate": 1.993129373964076e-05, "loss": 1.4192, "mean_token_accuracy": 0.668173685669899, "num_tokens": 207663994.0, "step": 1243 }, { "entropy": 1.7172527611255646, "epoch": 0.1366619977479333, "grad_norm": 0.8254104256629944, "learning_rate": 1.9931096134395298e-05, "loss": 1.366, "mean_token_accuracy": 0.664167582988739, "num_tokens": 207861655.0, "step": 1244 }, { "entropy": 1.7482962310314178, "epoch": 0.13677185465930625, "grad_norm": 0.805140495300293, "learning_rate": 1.9930898246484664e-05, "loss": 1.4019, "mean_token_accuracy": 0.6474734991788864, "num_tokens": 208035591.0, "step": 1245 }, { "entropy": 1.7081841230392456, "epoch": 0.1368817115706792, "grad_norm": 0.7142578959465027, "learning_rate": 1.9930700075915127e-05, "loss": 1.4685, "mean_token_accuracy": 0.6366176108519236, "num_tokens": 208215389.0, "step": 1246 }, { "entropy": 1.7701348463694255, "epoch": 0.13699156848205213, "grad_norm": 0.6305694580078125, "learning_rate": 1.9930501622692955e-05, "loss": 1.4108, "mean_token_accuracy": 0.6463577598333359, "num_tokens": 208432404.0, "step": 1247 }, { "entropy": 1.805136779944102, "epoch": 0.13710142539342507, "grad_norm": 0.9272505640983582, "learning_rate": 1.9930302886824434e-05, "loss": 1.4094, "mean_token_accuracy": 0.641325443983078, "num_tokens": 208584833.0, "step": 1248 }, { "entropy": 1.760273923476537, "epoch": 0.137211282304798, "grad_norm": 0.6633432507514954, "learning_rate": 1.9930103868315845e-05, "loss": 1.3002, "mean_token_accuracy": 0.6639900704224905, "num_tokens": 208717845.0, "step": 1249 }, { "entropy": 1.688180943330129, "epoch": 0.13732113921617095, "grad_norm": 0.662477433681488, "learning_rate": 1.99299045671735e-05, "loss": 1.5009, "mean_token_accuracy": 0.6593229522307714, "num_tokens": 208926014.0, "step": 1250 }, { "entropy": 1.8261633117993672, "epoch": 0.13743099612754386, "grad_norm": 0.8224329948425293, "learning_rate": 1.9929704983403694e-05, "loss": 1.2925, "mean_token_accuracy": 0.6615334004163742, "num_tokens": 209031355.0, "step": 1251 }, { "entropy": 1.7323819696903229, "epoch": 0.1375408530389168, "grad_norm": 0.5979375243186951, "learning_rate": 1.9929505117012753e-05, "loss": 1.5184, "mean_token_accuracy": 0.6293915957212448, "num_tokens": 209224580.0, "step": 1252 }, { "entropy": 1.6968580385049183, "epoch": 0.13765070995028975, "grad_norm": 0.6671169996261597, "learning_rate": 1.9929304968006996e-05, "loss": 1.3719, "mean_token_accuracy": 0.6572729150454203, "num_tokens": 209431303.0, "step": 1253 }, { "entropy": 1.7308607598145802, "epoch": 0.1377605668616627, "grad_norm": 0.7142148017883301, "learning_rate": 1.992910453639276e-05, "loss": 1.4229, "mean_token_accuracy": 0.6546590526898702, "num_tokens": 209596420.0, "step": 1254 }, { "entropy": 1.787158230940501, "epoch": 0.13787042377303563, "grad_norm": 0.6323195695877075, "learning_rate": 1.9928903822176392e-05, "loss": 1.5243, "mean_token_accuracy": 0.6278480341037115, "num_tokens": 209779786.0, "step": 1255 }, { "entropy": 1.6387408177057903, "epoch": 0.13798028068440857, "grad_norm": 3.0804104804992676, "learning_rate": 1.992870282536424e-05, "loss": 1.4143, "mean_token_accuracy": 0.6523379882176717, "num_tokens": 209938901.0, "step": 1256 }, { "entropy": 1.7122711837291718, "epoch": 0.13809013759578148, "grad_norm": 0.669571042060852, "learning_rate": 1.9928501545962666e-05, "loss": 1.3667, "mean_token_accuracy": 0.6676834921042124, "num_tokens": 210133192.0, "step": 1257 }, { "entropy": 1.8118088046709697, "epoch": 0.13819999450715442, "grad_norm": 0.7653446197509766, "learning_rate": 1.992829998397804e-05, "loss": 1.5617, "mean_token_accuracy": 0.6499693269530932, "num_tokens": 210305749.0, "step": 1258 }, { "entropy": 1.7411625186602275, "epoch": 0.13830985141852736, "grad_norm": 0.7534335255622864, "learning_rate": 1.9928098139416745e-05, "loss": 1.457, "mean_token_accuracy": 0.6617699215809504, "num_tokens": 210448909.0, "step": 1259 }, { "entropy": 1.742474267880122, "epoch": 0.1384197083299003, "grad_norm": 0.6153781414031982, "learning_rate": 1.9927896012285168e-05, "loss": 1.4539, "mean_token_accuracy": 0.6413547496000925, "num_tokens": 210620811.0, "step": 1260 }, { "entropy": 1.7018092572689056, "epoch": 0.13852956524127324, "grad_norm": 0.6760329008102417, "learning_rate": 1.99276936025897e-05, "loss": 1.3766, "mean_token_accuracy": 0.6578912138938904, "num_tokens": 210781045.0, "step": 1261 }, { "entropy": 1.7072451611359913, "epoch": 0.13863942215264619, "grad_norm": 0.6856552958488464, "learning_rate": 1.992749091033676e-05, "loss": 1.3279, "mean_token_accuracy": 0.6731646209955215, "num_tokens": 210906125.0, "step": 1262 }, { "entropy": 1.758314996957779, "epoch": 0.13874927906401913, "grad_norm": 0.7295485138893127, "learning_rate": 1.9927287935532748e-05, "loss": 1.6247, "mean_token_accuracy": 0.6324874858061472, "num_tokens": 211094941.0, "step": 1263 }, { "entropy": 1.7821235358715057, "epoch": 0.13885913597539204, "grad_norm": 0.7965490221977234, "learning_rate": 1.99270846781841e-05, "loss": 1.4094, "mean_token_accuracy": 0.654320701956749, "num_tokens": 211215450.0, "step": 1264 }, { "entropy": 1.7328162292639415, "epoch": 0.13896899288676498, "grad_norm": 0.7370543479919434, "learning_rate": 1.9926881138297246e-05, "loss": 1.4465, "mean_token_accuracy": 0.6546304225921631, "num_tokens": 211399399.0, "step": 1265 }, { "entropy": 1.7225729425748189, "epoch": 0.13907884979813792, "grad_norm": 0.7675600051879883, "learning_rate": 1.9926677315878624e-05, "loss": 1.2386, "mean_token_accuracy": 0.6786759148041407, "num_tokens": 211565381.0, "step": 1266 }, { "entropy": 1.8305182953675587, "epoch": 0.13918870670951086, "grad_norm": 0.710245668888092, "learning_rate": 1.9926473210934686e-05, "loss": 1.5366, "mean_token_accuracy": 0.6416638592878977, "num_tokens": 211706810.0, "step": 1267 }, { "entropy": 1.773509681224823, "epoch": 0.1392985636208838, "grad_norm": 0.6955118775367737, "learning_rate": 1.9926268823471894e-05, "loss": 1.4023, "mean_token_accuracy": 0.6475071211655935, "num_tokens": 211901076.0, "step": 1268 }, { "entropy": 1.7048786679903667, "epoch": 0.13940842053225674, "grad_norm": 0.6570739150047302, "learning_rate": 1.992606415349672e-05, "loss": 1.3676, "mean_token_accuracy": 0.6701171100139618, "num_tokens": 212066863.0, "step": 1269 }, { "entropy": 1.7483859062194824, "epoch": 0.13951827744362968, "grad_norm": 0.6274306774139404, "learning_rate": 1.9925859201015633e-05, "loss": 1.4419, "mean_token_accuracy": 0.649980386098226, "num_tokens": 212264417.0, "step": 1270 }, { "entropy": 1.6661617755889893, "epoch": 0.1396281343550026, "grad_norm": 0.8265875577926636, "learning_rate": 1.9925653966035126e-05, "loss": 1.1776, "mean_token_accuracy": 0.6817067364851633, "num_tokens": 212372748.0, "step": 1271 }, { "entropy": 1.7644007603327434, "epoch": 0.13973799126637554, "grad_norm": 0.653523862361908, "learning_rate": 1.992544844856169e-05, "loss": 1.3228, "mean_token_accuracy": 0.6687373667955399, "num_tokens": 212509269.0, "step": 1272 }, { "entropy": 1.750530868768692, "epoch": 0.13984784817774848, "grad_norm": 0.7181108593940735, "learning_rate": 1.9925242648601837e-05, "loss": 1.5806, "mean_token_accuracy": 0.6340227698286375, "num_tokens": 212692270.0, "step": 1273 }, { "entropy": 1.7549054125944774, "epoch": 0.13995770508912142, "grad_norm": 0.7157692909240723, "learning_rate": 1.992503656616208e-05, "loss": 1.3865, "mean_token_accuracy": 0.6512027333180109, "num_tokens": 212868340.0, "step": 1274 }, { "entropy": 1.7521459460258484, "epoch": 0.14006756200049436, "grad_norm": 0.8038479089736938, "learning_rate": 1.9924830201248928e-05, "loss": 1.3245, "mean_token_accuracy": 0.6748414585987726, "num_tokens": 212980640.0, "step": 1275 }, { "entropy": 1.7466616133848827, "epoch": 0.1401774189118673, "grad_norm": 0.6825430989265442, "learning_rate": 1.9924623553868927e-05, "loss": 1.3675, "mean_token_accuracy": 0.6585159202416738, "num_tokens": 213130813.0, "step": 1276 }, { "entropy": 1.7830199599266052, "epoch": 0.14028727582324024, "grad_norm": 0.7503454089164734, "learning_rate": 1.992441662402861e-05, "loss": 1.3358, "mean_token_accuracy": 0.6596743067105612, "num_tokens": 213304286.0, "step": 1277 }, { "entropy": 1.7447825769583385, "epoch": 0.14039713273461316, "grad_norm": 0.7130751013755798, "learning_rate": 1.9924209411734526e-05, "loss": 1.5629, "mean_token_accuracy": 0.6291346848011017, "num_tokens": 213532550.0, "step": 1278 }, { "entropy": 1.8167424301306407, "epoch": 0.1405069896459861, "grad_norm": 0.7561038732528687, "learning_rate": 1.9924001916993238e-05, "loss": 1.4062, "mean_token_accuracy": 0.6401105572779974, "num_tokens": 213737731.0, "step": 1279 }, { "entropy": 1.7469572722911835, "epoch": 0.14061684655735904, "grad_norm": 0.7100579142570496, "learning_rate": 1.9923794139811313e-05, "loss": 1.4572, "mean_token_accuracy": 0.6476842015981674, "num_tokens": 213912106.0, "step": 1280 }, { "entropy": 1.7979302604993184, "epoch": 0.14072670346873198, "grad_norm": 0.6927300691604614, "learning_rate": 1.9923586080195323e-05, "loss": 1.36, "mean_token_accuracy": 0.6531442552804947, "num_tokens": 214069371.0, "step": 1281 }, { "entropy": 1.6450412273406982, "epoch": 0.14083656038010492, "grad_norm": 0.689548671245575, "learning_rate": 1.9923377738151856e-05, "loss": 1.2596, "mean_token_accuracy": 0.6867117385069529, "num_tokens": 214201260.0, "step": 1282 }, { "entropy": 1.762027770280838, "epoch": 0.14094641729147786, "grad_norm": 0.709928572177887, "learning_rate": 1.9923169113687503e-05, "loss": 1.5263, "mean_token_accuracy": 0.6340511639912924, "num_tokens": 214406160.0, "step": 1283 }, { "entropy": 1.7095829248428345, "epoch": 0.14105627420285077, "grad_norm": 8.520343780517578, "learning_rate": 1.9922960206808867e-05, "loss": 1.0589, "mean_token_accuracy": 0.6897023518880209, "num_tokens": 214592081.0, "step": 1284 }, { "entropy": 1.7550367613633473, "epoch": 0.14116613111422371, "grad_norm": 0.6775065064430237, "learning_rate": 1.992275101752256e-05, "loss": 1.4911, "mean_token_accuracy": 0.6530732462803522, "num_tokens": 214764303.0, "step": 1285 }, { "entropy": 1.7507551113764446, "epoch": 0.14127598802559665, "grad_norm": 0.6907753944396973, "learning_rate": 1.992254154583521e-05, "loss": 1.4727, "mean_token_accuracy": 0.6412150710821152, "num_tokens": 214920143.0, "step": 1286 }, { "entropy": 1.7401640017827351, "epoch": 0.1413858449369696, "grad_norm": 0.7834397554397583, "learning_rate": 1.9922331791753435e-05, "loss": 1.3464, "mean_token_accuracy": 0.6618963032960892, "num_tokens": 215046344.0, "step": 1287 }, { "entropy": 1.7570099631945293, "epoch": 0.14149570184834254, "grad_norm": 0.70011967420578, "learning_rate": 1.992212175528388e-05, "loss": 1.3707, "mean_token_accuracy": 0.6539260894060135, "num_tokens": 215180311.0, "step": 1288 }, { "entropy": 1.6938535173734028, "epoch": 0.14160555875971548, "grad_norm": 0.7015315294265747, "learning_rate": 1.9921911436433194e-05, "loss": 1.5209, "mean_token_accuracy": 0.6413632233937582, "num_tokens": 215372204.0, "step": 1289 }, { "entropy": 1.7938569088776906, "epoch": 0.14171541567108842, "grad_norm": 0.6672449707984924, "learning_rate": 1.992170083520803e-05, "loss": 1.4559, "mean_token_accuracy": 0.6347246567408243, "num_tokens": 215554411.0, "step": 1290 }, { "entropy": 1.7035801708698273, "epoch": 0.14182527258246133, "grad_norm": 0.8814971446990967, "learning_rate": 1.9921489951615057e-05, "loss": 1.3534, "mean_token_accuracy": 0.6513211578130722, "num_tokens": 215689080.0, "step": 1291 }, { "entropy": 1.7192172209421794, "epoch": 0.14193512949383427, "grad_norm": 0.690263569355011, "learning_rate": 1.9921278785660946e-05, "loss": 1.4102, "mean_token_accuracy": 0.6569246202707291, "num_tokens": 215849803.0, "step": 1292 }, { "entropy": 1.7479885419209797, "epoch": 0.1420449864052072, "grad_norm": 0.6556616425514221, "learning_rate": 1.9921067337352384e-05, "loss": 1.4046, "mean_token_accuracy": 0.6546976268291473, "num_tokens": 216019645.0, "step": 1293 }, { "entropy": 1.7502675553162892, "epoch": 0.14215484331658015, "grad_norm": 0.7258479595184326, "learning_rate": 1.9920855606696054e-05, "loss": 1.5266, "mean_token_accuracy": 0.6518742392460505, "num_tokens": 216187505.0, "step": 1294 }, { "entropy": 1.7436367273330688, "epoch": 0.1422647002279531, "grad_norm": 0.8124620914459229, "learning_rate": 1.992064359369867e-05, "loss": 1.3587, "mean_token_accuracy": 0.6601897577444712, "num_tokens": 216323904.0, "step": 1295 }, { "entropy": 1.706505278746287, "epoch": 0.14237455713932604, "grad_norm": 0.7671903371810913, "learning_rate": 1.992043129836693e-05, "loss": 1.2957, "mean_token_accuracy": 0.6700823853413264, "num_tokens": 216450218.0, "step": 1296 }, { "entropy": 1.6708903312683105, "epoch": 0.14248441405069898, "grad_norm": 0.6247614026069641, "learning_rate": 1.9920218720707563e-05, "loss": 1.2455, "mean_token_accuracy": 0.683778112133344, "num_tokens": 216583167.0, "step": 1297 }, { "entropy": 1.755852033694585, "epoch": 0.1425942709620719, "grad_norm": 0.8267444968223572, "learning_rate": 1.992000586072729e-05, "loss": 1.2248, "mean_token_accuracy": 0.6767070343097051, "num_tokens": 216706610.0, "step": 1298 }, { "entropy": 1.7532562911510468, "epoch": 0.14270412787344483, "grad_norm": 0.7160501480102539, "learning_rate": 1.9919792718432858e-05, "loss": 1.4451, "mean_token_accuracy": 0.6385903209447861, "num_tokens": 216851311.0, "step": 1299 }, { "entropy": 1.7782372931639354, "epoch": 0.14281398478481777, "grad_norm": 0.8023732900619507, "learning_rate": 1.9919579293831e-05, "loss": 1.4043, "mean_token_accuracy": 0.6392123301823934, "num_tokens": 217002266.0, "step": 1300 }, { "entropy": 1.7266008655230205, "epoch": 0.1429238416961907, "grad_norm": 0.706461489200592, "learning_rate": 1.9919365586928477e-05, "loss": 1.431, "mean_token_accuracy": 0.6418863981962204, "num_tokens": 217151987.0, "step": 1301 }, { "entropy": 1.6731161773204803, "epoch": 0.14303369860756365, "grad_norm": 0.5926313400268555, "learning_rate": 1.9919151597732055e-05, "loss": 1.5354, "mean_token_accuracy": 0.6458988140026728, "num_tokens": 217370579.0, "step": 1302 }, { "entropy": 1.7368605931599934, "epoch": 0.1431435555189366, "grad_norm": 0.7459754943847656, "learning_rate": 1.9918937326248503e-05, "loss": 1.3562, "mean_token_accuracy": 0.6788886686166128, "num_tokens": 217495749.0, "step": 1303 }, { "entropy": 1.77315154671669, "epoch": 0.14325341243030953, "grad_norm": 0.9817308187484741, "learning_rate": 1.99187227724846e-05, "loss": 1.4833, "mean_token_accuracy": 0.6524508446455002, "num_tokens": 217617122.0, "step": 1304 }, { "entropy": 1.6556137005488079, "epoch": 0.14336326934168245, "grad_norm": 0.6126701235771179, "learning_rate": 1.9918507936447146e-05, "loss": 1.3457, "mean_token_accuracy": 0.657344122727712, "num_tokens": 217822669.0, "step": 1305 }, { "entropy": 1.7017977635065715, "epoch": 0.1434731262530554, "grad_norm": 0.8157427906990051, "learning_rate": 1.9918292818142934e-05, "loss": 1.3017, "mean_token_accuracy": 0.6645906120538712, "num_tokens": 217935144.0, "step": 1306 }, { "entropy": 1.6964992980162303, "epoch": 0.14358298316442833, "grad_norm": 0.664656400680542, "learning_rate": 1.9918077417578768e-05, "loss": 1.3651, "mean_token_accuracy": 0.6545301824808121, "num_tokens": 218130755.0, "step": 1307 }, { "entropy": 1.7069012820720673, "epoch": 0.14369284007580127, "grad_norm": 0.5952320694923401, "learning_rate": 1.9917861734761476e-05, "loss": 1.3134, "mean_token_accuracy": 0.6620885580778122, "num_tokens": 218293252.0, "step": 1308 }, { "entropy": 1.7563343445460002, "epoch": 0.1438026969871742, "grad_norm": 0.710450291633606, "learning_rate": 1.9917645769697874e-05, "loss": 1.2887, "mean_token_accuracy": 0.6759348313013712, "num_tokens": 218437112.0, "step": 1309 }, { "entropy": 1.6977879603703816, "epoch": 0.14391255389854715, "grad_norm": 0.5810772776603699, "learning_rate": 1.99174295223948e-05, "loss": 1.4859, "mean_token_accuracy": 0.6420779774586359, "num_tokens": 218652082.0, "step": 1310 }, { "entropy": 1.729242612918218, "epoch": 0.1440224108099201, "grad_norm": 0.6845049858093262, "learning_rate": 1.9917212992859104e-05, "loss": 1.5988, "mean_token_accuracy": 0.6266419490178426, "num_tokens": 218867489.0, "step": 1311 }, { "entropy": 1.7926994264125824, "epoch": 0.144132267721293, "grad_norm": 0.6462848782539368, "learning_rate": 1.9916996181097635e-05, "loss": 1.4226, "mean_token_accuracy": 0.6535110026597977, "num_tokens": 219003067.0, "step": 1312 }, { "entropy": 1.7463144659996033, "epoch": 0.14424212463266595, "grad_norm": 0.7083527445793152, "learning_rate": 1.9916779087117255e-05, "loss": 1.4073, "mean_token_accuracy": 0.654004101951917, "num_tokens": 219137606.0, "step": 1313 }, { "entropy": 1.7422666052977245, "epoch": 0.1443519815440389, "grad_norm": 0.6640010476112366, "learning_rate": 1.9916561710924834e-05, "loss": 1.4153, "mean_token_accuracy": 0.6418075213829676, "num_tokens": 219327443.0, "step": 1314 }, { "entropy": 1.7220148543516796, "epoch": 0.14446183845541183, "grad_norm": 0.7796133756637573, "learning_rate": 1.9916344052527252e-05, "loss": 1.2958, "mean_token_accuracy": 0.6594913254181544, "num_tokens": 219472432.0, "step": 1315 }, { "entropy": 1.7227765917778015, "epoch": 0.14457169536678477, "grad_norm": 0.8143858909606934, "learning_rate": 1.99161261119314e-05, "loss": 1.5051, "mean_token_accuracy": 0.6485525220632553, "num_tokens": 219634962.0, "step": 1316 }, { "entropy": 1.687673379977544, "epoch": 0.1446815522781577, "grad_norm": 0.7880772352218628, "learning_rate": 1.9915907889144175e-05, "loss": 1.2464, "mean_token_accuracy": 0.6796143154303232, "num_tokens": 219760215.0, "step": 1317 }, { "entropy": 1.69181493918101, "epoch": 0.14479140918953062, "grad_norm": 0.6202834248542786, "learning_rate": 1.991568938417248e-05, "loss": 1.3949, "mean_token_accuracy": 0.6500192880630493, "num_tokens": 220040624.0, "step": 1318 }, { "entropy": 1.7221740285555522, "epoch": 0.14490126610090356, "grad_norm": 0.6479632258415222, "learning_rate": 1.9915470597023235e-05, "loss": 1.4433, "mean_token_accuracy": 0.6498556931813558, "num_tokens": 220187439.0, "step": 1319 }, { "entropy": 1.8096940517425537, "epoch": 0.1450111230122765, "grad_norm": 0.8540252447128296, "learning_rate": 1.9915251527703364e-05, "loss": 1.4752, "mean_token_accuracy": 0.6390475134054819, "num_tokens": 220368796.0, "step": 1320 }, { "entropy": 1.7524564564228058, "epoch": 0.14512097992364945, "grad_norm": 0.6428526043891907, "learning_rate": 1.9915032176219796e-05, "loss": 1.4522, "mean_token_accuracy": 0.638989175359408, "num_tokens": 220553587.0, "step": 1321 }, { "entropy": 1.7355947295824687, "epoch": 0.1452308368350224, "grad_norm": 0.6749572157859802, "learning_rate": 1.991481254257948e-05, "loss": 1.4713, "mean_token_accuracy": 0.6340489635864893, "num_tokens": 220841767.0, "step": 1322 }, { "entropy": 1.7015680869420369, "epoch": 0.14534069374639533, "grad_norm": 0.6925226449966431, "learning_rate": 1.9914592626789364e-05, "loss": 1.5523, "mean_token_accuracy": 0.6562323371569315, "num_tokens": 221024974.0, "step": 1323 }, { "entropy": 1.712952087322871, "epoch": 0.14545055065776827, "grad_norm": 0.6849307417869568, "learning_rate": 1.9914372428856407e-05, "loss": 1.3889, "mean_token_accuracy": 0.652979368964831, "num_tokens": 221204468.0, "step": 1324 }, { "entropy": 1.6939956446488698, "epoch": 0.14556040756914118, "grad_norm": 0.877672553062439, "learning_rate": 1.991415194878758e-05, "loss": 1.2828, "mean_token_accuracy": 0.6566557884216309, "num_tokens": 221341403.0, "step": 1325 }, { "entropy": 1.674074947834015, "epoch": 0.14567026448051412, "grad_norm": 0.655870795249939, "learning_rate": 1.9913931186589863e-05, "loss": 1.3431, "mean_token_accuracy": 0.6632597943147024, "num_tokens": 221473989.0, "step": 1326 }, { "entropy": 1.8000195523103077, "epoch": 0.14578012139188706, "grad_norm": 0.6739341616630554, "learning_rate": 1.991371014227024e-05, "loss": 1.4806, "mean_token_accuracy": 0.6396994342406591, "num_tokens": 221646735.0, "step": 1327 }, { "entropy": 1.7230163713296254, "epoch": 0.14588997830326, "grad_norm": 0.6686187386512756, "learning_rate": 1.9913488815835703e-05, "loss": 1.3263, "mean_token_accuracy": 0.65727499127388, "num_tokens": 221804801.0, "step": 1328 }, { "entropy": 1.7675328155358632, "epoch": 0.14599983521463294, "grad_norm": 0.7255124449729919, "learning_rate": 1.9913267207293266e-05, "loss": 1.4374, "mean_token_accuracy": 0.665775845448176, "num_tokens": 221967384.0, "step": 1329 }, { "entropy": 1.689390778541565, "epoch": 0.14610969212600589, "grad_norm": 0.6025556921958923, "learning_rate": 1.991304531664994e-05, "loss": 1.3426, "mean_token_accuracy": 0.6495958268642426, "num_tokens": 222153590.0, "step": 1330 }, { "entropy": 1.7810916304588318, "epoch": 0.14621954903737883, "grad_norm": 0.7592765688896179, "learning_rate": 1.991282314391274e-05, "loss": 1.4854, "mean_token_accuracy": 0.6385663896799088, "num_tokens": 222342691.0, "step": 1331 }, { "entropy": 1.7684324781099956, "epoch": 0.14632940594875174, "grad_norm": 0.6830516457557678, "learning_rate": 1.9912600689088706e-05, "loss": 1.4813, "mean_token_accuracy": 0.6428666114807129, "num_tokens": 222508014.0, "step": 1332 }, { "entropy": 1.7351989547411601, "epoch": 0.14643926286012468, "grad_norm": 0.632705569267273, "learning_rate": 1.9912377952184877e-05, "loss": 1.3404, "mean_token_accuracy": 0.6745259712139765, "num_tokens": 222669382.0, "step": 1333 }, { "entropy": 1.7773430248101552, "epoch": 0.14654911977149762, "grad_norm": 0.7237338423728943, "learning_rate": 1.9912154933208304e-05, "loss": 1.3179, "mean_token_accuracy": 0.669169470667839, "num_tokens": 222795017.0, "step": 1334 }, { "entropy": 1.718500792980194, "epoch": 0.14665897668287056, "grad_norm": 0.756379246711731, "learning_rate": 1.991193163216604e-05, "loss": 1.3977, "mean_token_accuracy": 0.6572215805451075, "num_tokens": 222920812.0, "step": 1335 }, { "entropy": 1.7016732394695282, "epoch": 0.1467688335942435, "grad_norm": 0.6574013829231262, "learning_rate": 1.9911708049065156e-05, "loss": 1.4359, "mean_token_accuracy": 0.6623116632302603, "num_tokens": 223102526.0, "step": 1336 }, { "entropy": 1.7057139774163563, "epoch": 0.14687869050561644, "grad_norm": 0.6829228401184082, "learning_rate": 1.991148418391273e-05, "loss": 1.4427, "mean_token_accuracy": 0.6601560066143671, "num_tokens": 223284435.0, "step": 1337 }, { "entropy": 1.7417578597863514, "epoch": 0.14698854741698938, "grad_norm": 0.7731585502624512, "learning_rate": 1.9911260036715847e-05, "loss": 1.2842, "mean_token_accuracy": 0.6613179345925649, "num_tokens": 223436682.0, "step": 1338 }, { "entropy": 1.7527393798033397, "epoch": 0.1470984043283623, "grad_norm": 0.741357147693634, "learning_rate": 1.9911035607481593e-05, "loss": 1.4087, "mean_token_accuracy": 0.6580107063055038, "num_tokens": 223598162.0, "step": 1339 }, { "entropy": 1.7267196973164876, "epoch": 0.14720826123973524, "grad_norm": 0.5621564388275146, "learning_rate": 1.991081089621708e-05, "loss": 1.3998, "mean_token_accuracy": 0.6478342314561208, "num_tokens": 223756258.0, "step": 1340 }, { "entropy": 1.741438736518224, "epoch": 0.14731811815110818, "grad_norm": 0.7508729100227356, "learning_rate": 1.991058590292942e-05, "loss": 1.6231, "mean_token_accuracy": 0.6208352545897166, "num_tokens": 223971677.0, "step": 1341 }, { "entropy": 1.6777463555335999, "epoch": 0.14742797506248112, "grad_norm": 0.7933493852615356, "learning_rate": 1.9910360627625727e-05, "loss": 1.3107, "mean_token_accuracy": 0.6797795047362646, "num_tokens": 224146005.0, "step": 1342 }, { "entropy": 1.7229306896527607, "epoch": 0.14753783197385406, "grad_norm": 0.8197740316390991, "learning_rate": 1.991013507031314e-05, "loss": 1.4139, "mean_token_accuracy": 0.6649947216113409, "num_tokens": 224323644.0, "step": 1343 }, { "entropy": 1.748872071504593, "epoch": 0.147647688885227, "grad_norm": 0.6963921785354614, "learning_rate": 1.9909909230998792e-05, "loss": 1.3859, "mean_token_accuracy": 0.6595577448606491, "num_tokens": 224447585.0, "step": 1344 }, { "entropy": 1.778872420390447, "epoch": 0.14775754579659992, "grad_norm": 0.7228877544403076, "learning_rate": 1.9909683109689832e-05, "loss": 1.4321, "mean_token_accuracy": 0.6432789415121078, "num_tokens": 224622414.0, "step": 1345 }, { "entropy": 1.7511253654956818, "epoch": 0.14786740270797286, "grad_norm": 0.6408666968345642, "learning_rate": 1.9909456706393412e-05, "loss": 1.432, "mean_token_accuracy": 0.6518001953760783, "num_tokens": 224786040.0, "step": 1346 }, { "entropy": 1.710090051094691, "epoch": 0.1479772596193458, "grad_norm": 0.5858021974563599, "learning_rate": 1.990923002111671e-05, "loss": 1.4401, "mean_token_accuracy": 0.6551679819822311, "num_tokens": 225004135.0, "step": 1347 }, { "entropy": 1.7244882980982463, "epoch": 0.14808711653071874, "grad_norm": 0.6452533006668091, "learning_rate": 1.9909003053866884e-05, "loss": 1.3192, "mean_token_accuracy": 0.6712134927511215, "num_tokens": 225190143.0, "step": 1348 }, { "entropy": 1.7229234476884205, "epoch": 0.14819697344209168, "grad_norm": 0.6897783875465393, "learning_rate": 1.990877580465113e-05, "loss": 1.3271, "mean_token_accuracy": 0.6669703423976898, "num_tokens": 225357614.0, "step": 1349 }, { "entropy": 1.7231020828088124, "epoch": 0.14830683035346462, "grad_norm": 0.7694151997566223, "learning_rate": 1.9908548273476634e-05, "loss": 1.3355, "mean_token_accuracy": 0.6548380752404531, "num_tokens": 225467576.0, "step": 1350 }, { "entropy": 1.6866406897703807, "epoch": 0.14841668726483756, "grad_norm": 0.6810007095336914, "learning_rate": 1.9908320460350604e-05, "loss": 1.3045, "mean_token_accuracy": 0.681728353103002, "num_tokens": 225624930.0, "step": 1351 }, { "entropy": 1.7498182157675426, "epoch": 0.14852654417621047, "grad_norm": 0.6989384293556213, "learning_rate": 1.990809236528024e-05, "loss": 1.527, "mean_token_accuracy": 0.6339794049660364, "num_tokens": 225834299.0, "step": 1352 }, { "entropy": 1.7136845489343007, "epoch": 0.14863640108758341, "grad_norm": 0.8216297626495361, "learning_rate": 1.990786398827277e-05, "loss": 1.3575, "mean_token_accuracy": 0.6578425218661627, "num_tokens": 226001216.0, "step": 1353 }, { "entropy": 1.7596096694469452, "epoch": 0.14874625799895635, "grad_norm": 0.7053200006484985, "learning_rate": 1.9907635329335417e-05, "loss": 1.39, "mean_token_accuracy": 0.6508052945137024, "num_tokens": 226164348.0, "step": 1354 }, { "entropy": 1.7615208824475606, "epoch": 0.1488561149103293, "grad_norm": 0.6843299269676208, "learning_rate": 1.990740638847542e-05, "loss": 1.443, "mean_token_accuracy": 0.648722713192304, "num_tokens": 226324003.0, "step": 1355 }, { "entropy": 1.73859507838885, "epoch": 0.14896597182170224, "grad_norm": 0.7353606820106506, "learning_rate": 1.9907177165700026e-05, "loss": 1.4766, "mean_token_accuracy": 0.6503659536441168, "num_tokens": 226517283.0, "step": 1356 }, { "entropy": 1.681709756453832, "epoch": 0.14907582873307518, "grad_norm": 0.709335207939148, "learning_rate": 1.9906947661016488e-05, "loss": 1.3196, "mean_token_accuracy": 0.6770564814408621, "num_tokens": 226653702.0, "step": 1357 }, { "entropy": 1.7128639618555705, "epoch": 0.14918568564444812, "grad_norm": 0.6673030257225037, "learning_rate": 1.9906717874432068e-05, "loss": 1.3176, "mean_token_accuracy": 0.6593418667713801, "num_tokens": 226783131.0, "step": 1358 }, { "entropy": 1.6683493653933208, "epoch": 0.14929554255582103, "grad_norm": 0.7545623183250427, "learning_rate": 1.9906487805954046e-05, "loss": 1.332, "mean_token_accuracy": 0.6640367060899734, "num_tokens": 226903165.0, "step": 1359 }, { "entropy": 1.7147560715675354, "epoch": 0.14940539946719397, "grad_norm": 0.6294198632240295, "learning_rate": 1.9906257455589693e-05, "loss": 1.3845, "mean_token_accuracy": 0.6489193687836329, "num_tokens": 227123559.0, "step": 1360 }, { "entropy": 1.7493448158105214, "epoch": 0.1495152563785669, "grad_norm": 0.7184653878211975, "learning_rate": 1.9906026823346304e-05, "loss": 1.3539, "mean_token_accuracy": 0.6593509018421173, "num_tokens": 227249956.0, "step": 1361 }, { "entropy": 1.8085836668809254, "epoch": 0.14962511328993985, "grad_norm": 0.751181960105896, "learning_rate": 1.9905795909231184e-05, "loss": 1.4853, "mean_token_accuracy": 0.648095632592837, "num_tokens": 227436120.0, "step": 1362 }, { "entropy": 1.8024127682050068, "epoch": 0.1497349702013128, "grad_norm": 0.6900661587715149, "learning_rate": 1.990556471325163e-05, "loss": 1.477, "mean_token_accuracy": 0.6429435362418493, "num_tokens": 227581787.0, "step": 1363 }, { "entropy": 1.761184275150299, "epoch": 0.14984482711268574, "grad_norm": 0.6370431184768677, "learning_rate": 1.9905333235414974e-05, "loss": 1.4608, "mean_token_accuracy": 0.6300752957661947, "num_tokens": 227798907.0, "step": 1364 }, { "entropy": 1.729689121246338, "epoch": 0.14995468402405868, "grad_norm": 0.6400964856147766, "learning_rate": 1.990510147572853e-05, "loss": 1.3613, "mean_token_accuracy": 0.6598215152819952, "num_tokens": 227928221.0, "step": 1365 }, { "entropy": 1.7528914511203766, "epoch": 0.1500645409354316, "grad_norm": 0.6673919558525085, "learning_rate": 1.9904869434199638e-05, "loss": 1.3918, "mean_token_accuracy": 0.6615985929965973, "num_tokens": 228047390.0, "step": 1366 }, { "entropy": 1.7323197424411774, "epoch": 0.15017439784680453, "grad_norm": 0.9341157674789429, "learning_rate": 1.9904637110835637e-05, "loss": 1.4983, "mean_token_accuracy": 0.651843269666036, "num_tokens": 228240212.0, "step": 1367 }, { "entropy": 1.7418803771336873, "epoch": 0.15028425475817747, "grad_norm": 0.7539012432098389, "learning_rate": 1.990440450564389e-05, "loss": 1.3946, "mean_token_accuracy": 0.652935266494751, "num_tokens": 228398736.0, "step": 1368 }, { "entropy": 1.7218880355358124, "epoch": 0.1503941116695504, "grad_norm": 0.6365805268287659, "learning_rate": 1.9904171618631745e-05, "loss": 1.3038, "mean_token_accuracy": 0.6718200296163559, "num_tokens": 228572349.0, "step": 1369 }, { "entropy": 1.7416918476422627, "epoch": 0.15050396858092335, "grad_norm": 0.6741893887519836, "learning_rate": 1.990393844980659e-05, "loss": 1.4381, "mean_token_accuracy": 0.6531198918819427, "num_tokens": 228733406.0, "step": 1370 }, { "entropy": 1.7413328488667805, "epoch": 0.1506138254922963, "grad_norm": 0.7957093715667725, "learning_rate": 1.9903704999175787e-05, "loss": 1.4825, "mean_token_accuracy": 0.6500815153121948, "num_tokens": 228933666.0, "step": 1371 }, { "entropy": 1.7816022833188374, "epoch": 0.15072368240366923, "grad_norm": 0.7574257850646973, "learning_rate": 1.990347126674674e-05, "loss": 1.4431, "mean_token_accuracy": 0.6395841191212336, "num_tokens": 229105787.0, "step": 1372 }, { "entropy": 1.7355500161647797, "epoch": 0.15083353931504215, "grad_norm": 0.7089441418647766, "learning_rate": 1.9903237252526834e-05, "loss": 1.3002, "mean_token_accuracy": 0.665576363603274, "num_tokens": 229232132.0, "step": 1373 }, { "entropy": 1.7314164439837139, "epoch": 0.1509433962264151, "grad_norm": 0.6388537287712097, "learning_rate": 1.9903002956523483e-05, "loss": 1.378, "mean_token_accuracy": 0.649604876836141, "num_tokens": 229425376.0, "step": 1374 }, { "entropy": 1.7354730864365895, "epoch": 0.15105325313778803, "grad_norm": 0.6285607218742371, "learning_rate": 1.99027683787441e-05, "loss": 1.3263, "mean_token_accuracy": 0.6639270832141241, "num_tokens": 229590289.0, "step": 1375 }, { "entropy": 1.7280907134215038, "epoch": 0.15116311004916097, "grad_norm": 0.6440637111663818, "learning_rate": 1.990253351919611e-05, "loss": 1.3824, "mean_token_accuracy": 0.6578231900930405, "num_tokens": 229803918.0, "step": 1376 }, { "entropy": 1.7382831076780956, "epoch": 0.1512729669605339, "grad_norm": 0.6672256588935852, "learning_rate": 1.9902298377886946e-05, "loss": 1.3339, "mean_token_accuracy": 0.6681515922149023, "num_tokens": 229969814.0, "step": 1377 }, { "entropy": 1.7324320872624714, "epoch": 0.15138282387190685, "grad_norm": 0.7574326992034912, "learning_rate": 1.990206295482405e-05, "loss": 1.4158, "mean_token_accuracy": 0.6632709354162216, "num_tokens": 230122719.0, "step": 1378 }, { "entropy": 1.6780237257480621, "epoch": 0.15149268078327977, "grad_norm": 0.6588215231895447, "learning_rate": 1.990182725001487e-05, "loss": 1.2503, "mean_token_accuracy": 0.6718401412169138, "num_tokens": 230236303.0, "step": 1379 }, { "entropy": 1.711432198683421, "epoch": 0.1516025376946527, "grad_norm": 0.6185994744300842, "learning_rate": 1.9901591263466872e-05, "loss": 1.4058, "mean_token_accuracy": 0.6546655098597208, "num_tokens": 230396650.0, "step": 1380 }, { "entropy": 1.7140692472457886, "epoch": 0.15171239460602565, "grad_norm": 0.6763261556625366, "learning_rate": 1.9901354995187517e-05, "loss": 1.3125, "mean_token_accuracy": 0.6666051745414734, "num_tokens": 230512660.0, "step": 1381 }, { "entropy": 1.7215290268262227, "epoch": 0.1518222515173986, "grad_norm": 0.6929253935813904, "learning_rate": 1.9901118445184292e-05, "loss": 1.2935, "mean_token_accuracy": 0.6653975496689478, "num_tokens": 230644801.0, "step": 1382 }, { "entropy": 1.744094043970108, "epoch": 0.15193210842877153, "grad_norm": 0.67906653881073, "learning_rate": 1.990088161346468e-05, "loss": 1.5015, "mean_token_accuracy": 0.633465126156807, "num_tokens": 230843687.0, "step": 1383 }, { "entropy": 1.714278946320216, "epoch": 0.15204196534014447, "grad_norm": 0.6904776692390442, "learning_rate": 1.9900644500036174e-05, "loss": 1.3465, "mean_token_accuracy": 0.6527802546819051, "num_tokens": 231002297.0, "step": 1384 }, { "entropy": 1.7312476833661397, "epoch": 0.1521518222515174, "grad_norm": 0.6808450818061829, "learning_rate": 1.990040710490628e-05, "loss": 1.3135, "mean_token_accuracy": 0.6642954846223196, "num_tokens": 231185181.0, "step": 1385 }, { "entropy": 1.7672143479188283, "epoch": 0.15226167916289032, "grad_norm": 0.7930114269256592, "learning_rate": 1.990016942808251e-05, "loss": 1.3884, "mean_token_accuracy": 0.6474938144286474, "num_tokens": 231335247.0, "step": 1386 }, { "entropy": 1.7301386694113414, "epoch": 0.15237153607426326, "grad_norm": 0.7413761615753174, "learning_rate": 1.989993146957239e-05, "loss": 1.5081, "mean_token_accuracy": 0.6514854778846105, "num_tokens": 231490527.0, "step": 1387 }, { "entropy": 1.6940825978914897, "epoch": 0.1524813929856362, "grad_norm": 0.6954035758972168, "learning_rate": 1.9899693229383447e-05, "loss": 1.3801, "mean_token_accuracy": 0.6533026595910391, "num_tokens": 231707365.0, "step": 1388 }, { "entropy": 1.7286064724127452, "epoch": 0.15259124989700915, "grad_norm": 0.7006916999816895, "learning_rate": 1.9899454707523228e-05, "loss": 1.3657, "mean_token_accuracy": 0.6559472481409708, "num_tokens": 231838489.0, "step": 1389 }, { "entropy": 1.689795325199763, "epoch": 0.1527011068083821, "grad_norm": 0.7353735566139221, "learning_rate": 1.9899215903999272e-05, "loss": 1.4271, "mean_token_accuracy": 0.6647296249866486, "num_tokens": 231989155.0, "step": 1390 }, { "entropy": 1.8015301525592804, "epoch": 0.15281096371975503, "grad_norm": 0.8433383703231812, "learning_rate": 1.989897681881915e-05, "loss": 1.4108, "mean_token_accuracy": 0.659388080239296, "num_tokens": 232130833.0, "step": 1391 }, { "entropy": 1.7052685618400574, "epoch": 0.15292082063112797, "grad_norm": 0.7113513946533203, "learning_rate": 1.989873745199042e-05, "loss": 1.3169, "mean_token_accuracy": 0.6690341283877691, "num_tokens": 232299980.0, "step": 1392 }, { "entropy": 1.7220211327075958, "epoch": 0.15303067754250088, "grad_norm": 0.6374592781066895, "learning_rate": 1.9898497803520652e-05, "loss": 1.3122, "mean_token_accuracy": 0.6635673840840658, "num_tokens": 232454219.0, "step": 1393 }, { "entropy": 1.6884993215401967, "epoch": 0.15314053445387382, "grad_norm": 0.5745070576667786, "learning_rate": 1.9898257873417445e-05, "loss": 1.4229, "mean_token_accuracy": 0.6347446690003077, "num_tokens": 232668443.0, "step": 1394 }, { "entropy": 1.7229583462079365, "epoch": 0.15325039136524676, "grad_norm": 0.6316061615943909, "learning_rate": 1.9898017661688384e-05, "loss": 1.4632, "mean_token_accuracy": 0.651705930630366, "num_tokens": 232827806.0, "step": 1395 }, { "entropy": 1.7420443991820018, "epoch": 0.1533602482766197, "grad_norm": 0.8270453810691833, "learning_rate": 1.9897777168341078e-05, "loss": 1.3791, "mean_token_accuracy": 0.6554538011550903, "num_tokens": 232976688.0, "step": 1396 }, { "entropy": 1.7458803057670593, "epoch": 0.15347010518799264, "grad_norm": 0.725121021270752, "learning_rate": 1.9897536393383126e-05, "loss": 1.4105, "mean_token_accuracy": 0.6579022953907648, "num_tokens": 233112726.0, "step": 1397 }, { "entropy": 1.7431099613507588, "epoch": 0.15357996209936559, "grad_norm": 0.7982557415962219, "learning_rate": 1.9897295336822163e-05, "loss": 1.2854, "mean_token_accuracy": 0.6716776788234711, "num_tokens": 233223044.0, "step": 1398 }, { "entropy": 1.7481550176938374, "epoch": 0.15368981901073853, "grad_norm": 0.7132703065872192, "learning_rate": 1.989705399866581e-05, "loss": 1.5079, "mean_token_accuracy": 0.6489944805701574, "num_tokens": 233380580.0, "step": 1399 }, { "entropy": 1.7383308410644531, "epoch": 0.15379967592211144, "grad_norm": 0.7662017941474915, "learning_rate": 1.9896812378921705e-05, "loss": 1.5184, "mean_token_accuracy": 0.6418096820513407, "num_tokens": 233544584.0, "step": 1400 }, { "entropy": 1.7894498109817505, "epoch": 0.15390953283348438, "grad_norm": 0.6829231977462769, "learning_rate": 1.98965704775975e-05, "loss": 1.4861, "mean_token_accuracy": 0.6324234555164973, "num_tokens": 233743461.0, "step": 1401 }, { "entropy": 1.7661231060822804, "epoch": 0.15401938974485732, "grad_norm": 0.758860170841217, "learning_rate": 1.989632829470085e-05, "loss": 1.3311, "mean_token_accuracy": 0.6602408438920975, "num_tokens": 233886963.0, "step": 1402 }, { "entropy": 1.8040996094544728, "epoch": 0.15412924665623026, "grad_norm": 0.7048920392990112, "learning_rate": 1.989608583023941e-05, "loss": 1.4562, "mean_token_accuracy": 0.6474844366312027, "num_tokens": 234039711.0, "step": 1403 }, { "entropy": 1.6952104270458221, "epoch": 0.1542391035676032, "grad_norm": 0.6951699256896973, "learning_rate": 1.989584308422087e-05, "loss": 1.5166, "mean_token_accuracy": 0.666146586338679, "num_tokens": 234184971.0, "step": 1404 }, { "entropy": 1.7021946410338085, "epoch": 0.15434896047897614, "grad_norm": 0.6730145812034607, "learning_rate": 1.9895600056652904e-05, "loss": 1.3321, "mean_token_accuracy": 0.6629207084576288, "num_tokens": 234305549.0, "step": 1405 }, { "entropy": 1.7574187914530437, "epoch": 0.15445881739034906, "grad_norm": 0.6781946420669556, "learning_rate": 1.98953567475432e-05, "loss": 1.3731, "mean_token_accuracy": 0.6600083112716675, "num_tokens": 234429812.0, "step": 1406 }, { "entropy": 1.7185521523157756, "epoch": 0.154568674301722, "grad_norm": 0.6366341710090637, "learning_rate": 1.9895113156899468e-05, "loss": 1.4108, "mean_token_accuracy": 0.6553384065628052, "num_tokens": 234591666.0, "step": 1407 }, { "entropy": 1.7048729260762532, "epoch": 0.15467853121309494, "grad_norm": 0.7763659358024597, "learning_rate": 1.989486928472941e-05, "loss": 1.2446, "mean_token_accuracy": 0.682253509759903, "num_tokens": 234729738.0, "step": 1408 }, { "entropy": 1.7599883476893108, "epoch": 0.15478838812446788, "grad_norm": 0.6926746368408203, "learning_rate": 1.9894625131040746e-05, "loss": 1.3447, "mean_token_accuracy": 0.658067504564921, "num_tokens": 234885495.0, "step": 1409 }, { "entropy": 1.7993106842041016, "epoch": 0.15489824503584082, "grad_norm": 0.895706295967102, "learning_rate": 1.9894380695841207e-05, "loss": 1.6742, "mean_token_accuracy": 0.6284699141979218, "num_tokens": 235065714.0, "step": 1410 }, { "entropy": 1.7256481846173604, "epoch": 0.15500810194721376, "grad_norm": 0.6546118855476379, "learning_rate": 1.989413597913853e-05, "loss": 1.3875, "mean_token_accuracy": 0.6590452939271927, "num_tokens": 235210267.0, "step": 1411 }, { "entropy": 1.7416211764017742, "epoch": 0.1551179588585867, "grad_norm": 0.79004967212677, "learning_rate": 1.9893890980940456e-05, "loss": 1.388, "mean_token_accuracy": 0.6537879854440689, "num_tokens": 235364422.0, "step": 1412 }, { "entropy": 1.7044924398263295, "epoch": 0.15522781576995962, "grad_norm": 0.5850828289985657, "learning_rate": 1.9893645701254737e-05, "loss": 1.3368, "mean_token_accuracy": 0.666097084681193, "num_tokens": 235548464.0, "step": 1413 }, { "entropy": 1.757189800341924, "epoch": 0.15533767268133256, "grad_norm": 0.6978262066841125, "learning_rate": 1.9893400140089138e-05, "loss": 1.3671, "mean_token_accuracy": 0.651861180861791, "num_tokens": 235702467.0, "step": 1414 }, { "entropy": 1.7016917367776234, "epoch": 0.1554475295927055, "grad_norm": 0.934744656085968, "learning_rate": 1.9893154297451437e-05, "loss": 1.4425, "mean_token_accuracy": 0.6469552119572958, "num_tokens": 235865582.0, "step": 1415 }, { "entropy": 1.6989723841349285, "epoch": 0.15555738650407844, "grad_norm": 0.7889364361763, "learning_rate": 1.9892908173349405e-05, "loss": 1.3199, "mean_token_accuracy": 0.6727441449960073, "num_tokens": 235991724.0, "step": 1416 }, { "entropy": 1.6890127261479695, "epoch": 0.15566724341545138, "grad_norm": 0.8580669164657593, "learning_rate": 1.989266176779084e-05, "loss": 1.3835, "mean_token_accuracy": 0.6598442941904068, "num_tokens": 236149820.0, "step": 1417 }, { "entropy": 1.765044758717219, "epoch": 0.15577710032682432, "grad_norm": 0.7060015797615051, "learning_rate": 1.9892415080783535e-05, "loss": 1.397, "mean_token_accuracy": 0.6596352259318033, "num_tokens": 236275889.0, "step": 1418 }, { "entropy": 1.7240539093812306, "epoch": 0.15588695723819726, "grad_norm": 0.6542495489120483, "learning_rate": 1.9892168112335303e-05, "loss": 1.4078, "mean_token_accuracy": 0.6570458362499872, "num_tokens": 236448746.0, "step": 1419 }, { "entropy": 1.686035692691803, "epoch": 0.15599681414957017, "grad_norm": 0.7507334351539612, "learning_rate": 1.9891920862453954e-05, "loss": 1.5029, "mean_token_accuracy": 0.6390158931414286, "num_tokens": 236653518.0, "step": 1420 }, { "entropy": 1.751685917377472, "epoch": 0.15610667106094311, "grad_norm": 0.7609370946884155, "learning_rate": 1.9891673331147315e-05, "loss": 1.3791, "mean_token_accuracy": 0.6601094206174215, "num_tokens": 236785623.0, "step": 1421 }, { "entropy": 1.7528755863507588, "epoch": 0.15621652797231605, "grad_norm": 0.6571503281593323, "learning_rate": 1.9891425518423225e-05, "loss": 1.329, "mean_token_accuracy": 0.6659893939892451, "num_tokens": 236959510.0, "step": 1422 }, { "entropy": 1.8252331515153248, "epoch": 0.156326384883689, "grad_norm": 0.6614378094673157, "learning_rate": 1.9891177424289524e-05, "loss": 1.4472, "mean_token_accuracy": 0.6394238173961639, "num_tokens": 237176421.0, "step": 1423 }, { "entropy": 1.7615261673927307, "epoch": 0.15643624179506194, "grad_norm": 0.6956297755241394, "learning_rate": 1.989092904875406e-05, "loss": 1.5164, "mean_token_accuracy": 0.6591284970442454, "num_tokens": 237347871.0, "step": 1424 }, { "entropy": 1.7877203325430553, "epoch": 0.15654609870643488, "grad_norm": 0.6468766331672668, "learning_rate": 1.9890680391824703e-05, "loss": 1.5098, "mean_token_accuracy": 0.6396308938662211, "num_tokens": 237543259.0, "step": 1425 }, { "entropy": 1.7254225611686707, "epoch": 0.15665595561780782, "grad_norm": 0.6719499826431274, "learning_rate": 1.9890431453509317e-05, "loss": 1.5274, "mean_token_accuracy": 0.6330312093098959, "num_tokens": 237710891.0, "step": 1426 }, { "entropy": 1.6991487741470337, "epoch": 0.15676581252918073, "grad_norm": 0.7204530835151672, "learning_rate": 1.9890182233815777e-05, "loss": 1.2901, "mean_token_accuracy": 0.6699622919162115, "num_tokens": 237850728.0, "step": 1427 }, { "entropy": 1.7319742838541667, "epoch": 0.15687566944055367, "grad_norm": 0.7595884203910828, "learning_rate": 1.988993273275198e-05, "loss": 1.3026, "mean_token_accuracy": 0.6677504330873489, "num_tokens": 238048528.0, "step": 1428 }, { "entropy": 1.7359434564908345, "epoch": 0.1569855263519266, "grad_norm": 0.6204552054405212, "learning_rate": 1.9889682950325814e-05, "loss": 1.2607, "mean_token_accuracy": 0.6708816637595495, "num_tokens": 238201749.0, "step": 1429 }, { "entropy": 1.7319020132223766, "epoch": 0.15709538326329955, "grad_norm": 0.7870994806289673, "learning_rate": 1.988943288654519e-05, "loss": 1.4783, "mean_token_accuracy": 0.6482026080290476, "num_tokens": 238388738.0, "step": 1430 }, { "entropy": 1.7651021579901378, "epoch": 0.1572052401746725, "grad_norm": 0.6856090426445007, "learning_rate": 1.9889182541418025e-05, "loss": 1.4452, "mean_token_accuracy": 0.6410435736179352, "num_tokens": 238573579.0, "step": 1431 }, { "entropy": 1.7478333910306294, "epoch": 0.15731509708604544, "grad_norm": 0.8158244490623474, "learning_rate": 1.9888931914952233e-05, "loss": 1.391, "mean_token_accuracy": 0.6592821230491003, "num_tokens": 238712138.0, "step": 1432 }, { "entropy": 1.7397787074247997, "epoch": 0.15742495399741835, "grad_norm": 0.7736004590988159, "learning_rate": 1.9888681007155754e-05, "loss": 1.3865, "mean_token_accuracy": 0.6538245578606924, "num_tokens": 238883329.0, "step": 1433 }, { "entropy": 1.7427138984203339, "epoch": 0.1575348109087913, "grad_norm": 0.6095617413520813, "learning_rate": 1.9888429818036526e-05, "loss": 1.456, "mean_token_accuracy": 0.6447610855102539, "num_tokens": 239158770.0, "step": 1434 }, { "entropy": 1.7088470856348674, "epoch": 0.15764466782016423, "grad_norm": 0.6134941577911377, "learning_rate": 1.98881783476025e-05, "loss": 1.3449, "mean_token_accuracy": 0.6597955723603567, "num_tokens": 239303140.0, "step": 1435 }, { "entropy": 1.731537361939748, "epoch": 0.15775452473153717, "grad_norm": 0.6490273475646973, "learning_rate": 1.988792659586163e-05, "loss": 1.2962, "mean_token_accuracy": 0.6675442407528559, "num_tokens": 239429104.0, "step": 1436 }, { "entropy": 1.7000961601734161, "epoch": 0.1578643816429101, "grad_norm": 0.6515488028526306, "learning_rate": 1.9887674562821892e-05, "loss": 1.4389, "mean_token_accuracy": 0.659287025531133, "num_tokens": 239630290.0, "step": 1437 }, { "entropy": 1.6991292238235474, "epoch": 0.15797423855428305, "grad_norm": 0.630832850933075, "learning_rate": 1.9887422248491263e-05, "loss": 1.3143, "mean_token_accuracy": 0.6765478601058325, "num_tokens": 239751335.0, "step": 1438 }, { "entropy": 1.7824784815311432, "epoch": 0.158084095465656, "grad_norm": 0.7180033326148987, "learning_rate": 1.988716965287772e-05, "loss": 1.373, "mean_token_accuracy": 0.6581806441148123, "num_tokens": 239933924.0, "step": 1439 }, { "entropy": 1.732376217842102, "epoch": 0.1581939523770289, "grad_norm": 0.7710111141204834, "learning_rate": 1.9886916775989263e-05, "loss": 1.3055, "mean_token_accuracy": 0.674410010377566, "num_tokens": 240093624.0, "step": 1440 }, { "entropy": 1.6590780516465504, "epoch": 0.15830380928840185, "grad_norm": 0.702910840511322, "learning_rate": 1.988666361783389e-05, "loss": 1.233, "mean_token_accuracy": 0.6768196622530619, "num_tokens": 240232409.0, "step": 1441 }, { "entropy": 1.7165300846099854, "epoch": 0.1584136661997748, "grad_norm": 0.6681031584739685, "learning_rate": 1.9886410178419624e-05, "loss": 1.32, "mean_token_accuracy": 0.6700728883345922, "num_tokens": 240390864.0, "step": 1442 }, { "entropy": 1.7243541578451793, "epoch": 0.15852352311114773, "grad_norm": 0.6899517178535461, "learning_rate": 1.9886156457754476e-05, "loss": 1.2152, "mean_token_accuracy": 0.6867374628782272, "num_tokens": 240554611.0, "step": 1443 }, { "entropy": 1.6928447286287944, "epoch": 0.15863338002252067, "grad_norm": 0.698421835899353, "learning_rate": 1.9885902455846486e-05, "loss": 1.3928, "mean_token_accuracy": 0.6612624774376551, "num_tokens": 240762372.0, "step": 1444 }, { "entropy": 1.6924518247445424, "epoch": 0.1587432369338936, "grad_norm": 0.6793832182884216, "learning_rate": 1.988564817270368e-05, "loss": 1.3986, "mean_token_accuracy": 0.649079958597819, "num_tokens": 240915721.0, "step": 1445 }, { "entropy": 1.7608232696851094, "epoch": 0.15885309384526655, "grad_norm": 0.7623583078384399, "learning_rate": 1.988539360833412e-05, "loss": 1.4448, "mean_token_accuracy": 0.6457181026538213, "num_tokens": 241082642.0, "step": 1446 }, { "entropy": 1.709537297487259, "epoch": 0.15896295075663947, "grad_norm": 0.7644019722938538, "learning_rate": 1.988513876274585e-05, "loss": 1.5179, "mean_token_accuracy": 0.6708864470322927, "num_tokens": 241260001.0, "step": 1447 }, { "entropy": 1.7161897718906403, "epoch": 0.1590728076680124, "grad_norm": 0.7442562580108643, "learning_rate": 1.9884883635946946e-05, "loss": 1.4027, "mean_token_accuracy": 0.6529113352298737, "num_tokens": 241416125.0, "step": 1448 }, { "entropy": 1.784531682729721, "epoch": 0.15918266457938535, "grad_norm": 0.7441882491111755, "learning_rate": 1.988462822794548e-05, "loss": 1.5471, "mean_token_accuracy": 0.6351829022169113, "num_tokens": 241538589.0, "step": 1449 }, { "entropy": 1.8008837799231212, "epoch": 0.1592925214907583, "grad_norm": 0.6900771260261536, "learning_rate": 1.988437253874953e-05, "loss": 1.3957, "mean_token_accuracy": 0.6521695852279663, "num_tokens": 241676766.0, "step": 1450 }, { "entropy": 1.7292368113994598, "epoch": 0.15940237840213123, "grad_norm": 0.7718958854675293, "learning_rate": 1.9884116568367197e-05, "loss": 1.3787, "mean_token_accuracy": 0.6554831564426422, "num_tokens": 241893683.0, "step": 1451 }, { "entropy": 1.722067544857661, "epoch": 0.15951223531350417, "grad_norm": 0.7179570198059082, "learning_rate": 1.9883860316806574e-05, "loss": 1.3341, "mean_token_accuracy": 0.6695725172758102, "num_tokens": 242045379.0, "step": 1452 }, { "entropy": 1.7238109707832336, "epoch": 0.1596220922248771, "grad_norm": 0.7079585790634155, "learning_rate": 1.9883603784075775e-05, "loss": 1.2653, "mean_token_accuracy": 0.668131892879804, "num_tokens": 242161184.0, "step": 1453 }, { "entropy": 1.7084789176781972, "epoch": 0.15973194913625002, "grad_norm": 0.7071990966796875, "learning_rate": 1.988334697018292e-05, "loss": 1.2958, "mean_token_accuracy": 0.6706744233767191, "num_tokens": 242318729.0, "step": 1454 }, { "entropy": 1.6312975188096364, "epoch": 0.15984180604762296, "grad_norm": 0.6232081055641174, "learning_rate": 1.9883089875136138e-05, "loss": 1.524, "mean_token_accuracy": 0.6439757943153381, "num_tokens": 242585751.0, "step": 1455 }, { "entropy": 1.730732500553131, "epoch": 0.1599516629589959, "grad_norm": 0.8543137311935425, "learning_rate": 1.9882832498943565e-05, "loss": 1.5473, "mean_token_accuracy": 0.650491843620936, "num_tokens": 242744635.0, "step": 1456 }, { "entropy": 1.7197604576746623, "epoch": 0.16006151987036885, "grad_norm": 0.6170863509178162, "learning_rate": 1.9882574841613343e-05, "loss": 1.3721, "mean_token_accuracy": 0.6532426675160726, "num_tokens": 242906122.0, "step": 1457 }, { "entropy": 1.6996217370033264, "epoch": 0.1601713767817418, "grad_norm": 0.6345753073692322, "learning_rate": 1.988231690315363e-05, "loss": 1.4325, "mean_token_accuracy": 0.6547591636578242, "num_tokens": 243090125.0, "step": 1458 }, { "entropy": 1.7933961947758992, "epoch": 0.16028123369311473, "grad_norm": 0.6274416446685791, "learning_rate": 1.9882058683572592e-05, "loss": 1.5511, "mean_token_accuracy": 0.6325584451357523, "num_tokens": 243343304.0, "step": 1459 }, { "entropy": 1.6818243861198425, "epoch": 0.16039109060448767, "grad_norm": 0.8472453355789185, "learning_rate": 1.9881800182878398e-05, "loss": 1.3233, "mean_token_accuracy": 0.6590960721174876, "num_tokens": 243460895.0, "step": 1460 }, { "entropy": 1.7377244929472606, "epoch": 0.16050094751586058, "grad_norm": 0.6753596067428589, "learning_rate": 1.988154140107923e-05, "loss": 1.3971, "mean_token_accuracy": 0.6536543518304825, "num_tokens": 243660279.0, "step": 1461 }, { "entropy": 1.7900232076644897, "epoch": 0.16061080442723352, "grad_norm": 0.7756820917129517, "learning_rate": 1.9881282338183277e-05, "loss": 1.3319, "mean_token_accuracy": 0.6654796799023946, "num_tokens": 243799068.0, "step": 1462 }, { "entropy": 1.659742573897044, "epoch": 0.16072066133860646, "grad_norm": 0.6284655332565308, "learning_rate": 1.9881022994198744e-05, "loss": 1.4305, "mean_token_accuracy": 0.65923244257768, "num_tokens": 243991348.0, "step": 1463 }, { "entropy": 1.7306521832942963, "epoch": 0.1608305182499794, "grad_norm": 0.6783806681632996, "learning_rate": 1.988076336913383e-05, "loss": 1.347, "mean_token_accuracy": 0.6588092744350433, "num_tokens": 244144588.0, "step": 1464 }, { "entropy": 1.690244237581889, "epoch": 0.16094037516135234, "grad_norm": 0.7397460341453552, "learning_rate": 1.9880503462996763e-05, "loss": 1.5079, "mean_token_accuracy": 0.6375847011804581, "num_tokens": 244393970.0, "step": 1465 }, { "entropy": 1.6876225968201954, "epoch": 0.16105023207272529, "grad_norm": 0.7748399376869202, "learning_rate": 1.9880243275795758e-05, "loss": 1.1538, "mean_token_accuracy": 0.6858840386072794, "num_tokens": 244502713.0, "step": 1466 }, { "entropy": 1.6742197672526042, "epoch": 0.1611600889840982, "grad_norm": 0.6609341502189636, "learning_rate": 1.987998280753906e-05, "loss": 1.2905, "mean_token_accuracy": 0.6699156562487284, "num_tokens": 244680505.0, "step": 1467 }, { "entropy": 1.8177895247936249, "epoch": 0.16126994589547114, "grad_norm": 0.6985216736793518, "learning_rate": 1.9879722058234903e-05, "loss": 1.419, "mean_token_accuracy": 0.6576731552680334, "num_tokens": 244816173.0, "step": 1468 }, { "entropy": 1.7414447963237762, "epoch": 0.16137980280684408, "grad_norm": 0.7506465911865234, "learning_rate": 1.9879461027891546e-05, "loss": 1.3904, "mean_token_accuracy": 0.6590597579876581, "num_tokens": 244951775.0, "step": 1469 }, { "entropy": 1.676265945037206, "epoch": 0.16148965971821702, "grad_norm": 0.5871666669845581, "learning_rate": 1.9879199716517247e-05, "loss": 1.2981, "mean_token_accuracy": 0.6679906199375788, "num_tokens": 245154813.0, "step": 1470 }, { "entropy": 1.7278977930545807, "epoch": 0.16159951662958996, "grad_norm": 0.6722689270973206, "learning_rate": 1.987893812412028e-05, "loss": 1.5139, "mean_token_accuracy": 0.6363293901085854, "num_tokens": 245349545.0, "step": 1471 }, { "entropy": 1.6537324488162994, "epoch": 0.1617093735409629, "grad_norm": 0.7102640867233276, "learning_rate": 1.9878676250708922e-05, "loss": 1.3051, "mean_token_accuracy": 0.6738253484169642, "num_tokens": 245523769.0, "step": 1472 }, { "entropy": 1.7391831477483113, "epoch": 0.16181923045233584, "grad_norm": 0.7415077090263367, "learning_rate": 1.9878414096291462e-05, "loss": 1.3022, "mean_token_accuracy": 0.6666351109743118, "num_tokens": 245682976.0, "step": 1473 }, { "entropy": 1.7627001007397969, "epoch": 0.16192908736370876, "grad_norm": 0.6851019263267517, "learning_rate": 1.9878151660876195e-05, "loss": 1.4372, "mean_token_accuracy": 0.6531008581320444, "num_tokens": 245828968.0, "step": 1474 }, { "entropy": 1.7610424359639485, "epoch": 0.1620389442750817, "grad_norm": 0.7833350896835327, "learning_rate": 1.9877888944471432e-05, "loss": 1.3799, "mean_token_accuracy": 0.6726367622613907, "num_tokens": 245963753.0, "step": 1475 }, { "entropy": 1.7145276069641113, "epoch": 0.16214880118645464, "grad_norm": 0.7759976983070374, "learning_rate": 1.9877625947085478e-05, "loss": 1.5535, "mean_token_accuracy": 0.6358107725779215, "num_tokens": 246109245.0, "step": 1476 }, { "entropy": 1.717542956272761, "epoch": 0.16225865809782758, "grad_norm": 0.6734223961830139, "learning_rate": 1.987736266872667e-05, "loss": 1.535, "mean_token_accuracy": 0.643081416686376, "num_tokens": 246266085.0, "step": 1477 }, { "entropy": 1.7120660841464996, "epoch": 0.16236851500920052, "grad_norm": 0.6512724161148071, "learning_rate": 1.987709910940333e-05, "loss": 1.3746, "mean_token_accuracy": 0.6628182381391525, "num_tokens": 246450821.0, "step": 1478 }, { "entropy": 1.7386384705702465, "epoch": 0.16247837192057346, "grad_norm": 1.0366206169128418, "learning_rate": 1.9876835269123806e-05, "loss": 1.4757, "mean_token_accuracy": 0.6452366163333257, "num_tokens": 246654303.0, "step": 1479 }, { "entropy": 1.7971782286961873, "epoch": 0.1625882288319464, "grad_norm": 0.6871124505996704, "learning_rate": 1.987657114789644e-05, "loss": 1.3246, "mean_token_accuracy": 0.6647598246733347, "num_tokens": 246766857.0, "step": 1480 }, { "entropy": 1.7070013185342152, "epoch": 0.16269808574331932, "grad_norm": 0.7218469381332397, "learning_rate": 1.98763067457296e-05, "loss": 1.4415, "mean_token_accuracy": 0.6488531132539114, "num_tokens": 246930724.0, "step": 1481 }, { "entropy": 1.6882566809654236, "epoch": 0.16280794265469226, "grad_norm": 0.6939437389373779, "learning_rate": 1.9876042062631655e-05, "loss": 1.3713, "mean_token_accuracy": 0.6539578934510549, "num_tokens": 247155211.0, "step": 1482 }, { "entropy": 1.6750567058722179, "epoch": 0.1629177995660652, "grad_norm": 0.5919098258018494, "learning_rate": 1.9875777098610973e-05, "loss": 1.3869, "mean_token_accuracy": 0.6595859378576279, "num_tokens": 247370725.0, "step": 1483 }, { "entropy": 1.6641695896784465, "epoch": 0.16302765647743814, "grad_norm": 0.6180868148803711, "learning_rate": 1.9875511853675952e-05, "loss": 1.5215, "mean_token_accuracy": 0.6372140099604925, "num_tokens": 247577638.0, "step": 1484 }, { "entropy": 1.641531765460968, "epoch": 0.16313751338881108, "grad_norm": 0.6315323710441589, "learning_rate": 1.9875246327834973e-05, "loss": 1.4135, "mean_token_accuracy": 0.647536481420199, "num_tokens": 247794309.0, "step": 1485 }, { "entropy": 1.7311479051907857, "epoch": 0.16324737030018402, "grad_norm": 0.6436353921890259, "learning_rate": 1.987498052109645e-05, "loss": 1.3109, "mean_token_accuracy": 0.6649970014890035, "num_tokens": 247932675.0, "step": 1486 }, { "entropy": 1.6989285945892334, "epoch": 0.16335722721155696, "grad_norm": 0.7341669201850891, "learning_rate": 1.9874714433468792e-05, "loss": 1.3223, "mean_token_accuracy": 0.6625909308592478, "num_tokens": 248085847.0, "step": 1487 }, { "entropy": 1.729597012201945, "epoch": 0.16346708412292987, "grad_norm": 0.6755861639976501, "learning_rate": 1.9874448064960422e-05, "loss": 1.502, "mean_token_accuracy": 0.6398074775934219, "num_tokens": 248277427.0, "step": 1488 }, { "entropy": 1.6631129284699757, "epoch": 0.16357694103430281, "grad_norm": 0.6498894095420837, "learning_rate": 1.987418141557977e-05, "loss": 1.2334, "mean_token_accuracy": 0.6779984682798386, "num_tokens": 248423875.0, "step": 1489 }, { "entropy": 1.7226392328739166, "epoch": 0.16368679794567575, "grad_norm": 0.5937130451202393, "learning_rate": 1.9873914485335274e-05, "loss": 1.4005, "mean_token_accuracy": 0.6503377507130305, "num_tokens": 248638269.0, "step": 1490 }, { "entropy": 1.7222268283367157, "epoch": 0.1637966548570487, "grad_norm": 0.7151001691818237, "learning_rate": 1.9873647274235384e-05, "loss": 1.4244, "mean_token_accuracy": 0.6669965138038, "num_tokens": 248764528.0, "step": 1491 }, { "entropy": 1.7631146212418873, "epoch": 0.16390651176842164, "grad_norm": 0.7749632000923157, "learning_rate": 1.9873379782288555e-05, "loss": 1.3909, "mean_token_accuracy": 0.6470388472080231, "num_tokens": 248911474.0, "step": 1492 }, { "entropy": 1.7954902946949005, "epoch": 0.16401636867979458, "grad_norm": 0.6423087120056152, "learning_rate": 1.9873112009503256e-05, "loss": 1.4206, "mean_token_accuracy": 0.649329255024592, "num_tokens": 249077416.0, "step": 1493 }, { "entropy": 1.6648548245429993, "epoch": 0.1641262255911675, "grad_norm": 0.9466790556907654, "learning_rate": 1.987284395588796e-05, "loss": 1.4267, "mean_token_accuracy": 0.6575258076190948, "num_tokens": 249211722.0, "step": 1494 }, { "entropy": 1.767043113708496, "epoch": 0.16423608250254043, "grad_norm": 0.6665019989013672, "learning_rate": 1.987257562145115e-05, "loss": 1.3619, "mean_token_accuracy": 0.6605860988299052, "num_tokens": 249442176.0, "step": 1495 }, { "entropy": 1.7210952043533325, "epoch": 0.16434593941391337, "grad_norm": 0.769982099533081, "learning_rate": 1.987230700620132e-05, "loss": 1.3586, "mean_token_accuracy": 0.6576197892427444, "num_tokens": 249569258.0, "step": 1496 }, { "entropy": 1.7499938607215881, "epoch": 0.1644557963252863, "grad_norm": 0.8687669634819031, "learning_rate": 1.987203811014697e-05, "loss": 1.4874, "mean_token_accuracy": 0.6517137040694555, "num_tokens": 249739841.0, "step": 1497 }, { "entropy": 1.7234773536523182, "epoch": 0.16456565323665925, "grad_norm": 0.7829402089118958, "learning_rate": 1.9871768933296616e-05, "loss": 1.3424, "mean_token_accuracy": 0.6686098178227743, "num_tokens": 249887148.0, "step": 1498 }, { "entropy": 1.7844958702723186, "epoch": 0.1646755101480322, "grad_norm": 0.7004623413085938, "learning_rate": 1.987149947565877e-05, "loss": 1.5753, "mean_token_accuracy": 0.6275525540113449, "num_tokens": 250106989.0, "step": 1499 }, { "entropy": 1.7767977714538574, "epoch": 0.16478536705940514, "grad_norm": 0.8283874988555908, "learning_rate": 1.9871229737241963e-05, "loss": 1.4238, "mean_token_accuracy": 0.6419435540835062, "num_tokens": 250244200.0, "step": 1500 }, { "entropy": 1.7311366498470306, "epoch": 0.16489522397077805, "grad_norm": 0.6668105125427246, "learning_rate": 1.9870959718054733e-05, "loss": 1.4291, "mean_token_accuracy": 0.6505243728558222, "num_tokens": 250418231.0, "step": 1501 }, { "entropy": 1.7808765669663746, "epoch": 0.165005080882151, "grad_norm": 0.6634812951087952, "learning_rate": 1.9870689418105623e-05, "loss": 1.3597, "mean_token_accuracy": 0.6567443857590357, "num_tokens": 250544059.0, "step": 1502 }, { "entropy": 1.7454225818316143, "epoch": 0.16511493779352393, "grad_norm": 0.7223145961761475, "learning_rate": 1.9870418837403194e-05, "loss": 1.4386, "mean_token_accuracy": 0.6378096987803777, "num_tokens": 250731948.0, "step": 1503 }, { "entropy": 1.7013043363889058, "epoch": 0.16522479470489687, "grad_norm": 0.712053656578064, "learning_rate": 1.9870147975956004e-05, "loss": 1.3979, "mean_token_accuracy": 0.6579237480958303, "num_tokens": 250889151.0, "step": 1504 }, { "entropy": 1.692932019631068, "epoch": 0.1653346516162698, "grad_norm": 0.7106614112854004, "learning_rate": 1.9869876833772625e-05, "loss": 1.5012, "mean_token_accuracy": 0.6438654214143753, "num_tokens": 251113024.0, "step": 1505 }, { "entropy": 1.7009440064430237, "epoch": 0.16544450852764275, "grad_norm": 0.7013898491859436, "learning_rate": 1.9869605410861646e-05, "loss": 1.2613, "mean_token_accuracy": 0.6773126920064291, "num_tokens": 251247569.0, "step": 1506 }, { "entropy": 1.7212568124135335, "epoch": 0.1655543654390157, "grad_norm": 0.6236050724983215, "learning_rate": 1.986933370723165e-05, "loss": 1.3193, "mean_token_accuracy": 0.6535242249568304, "num_tokens": 251385146.0, "step": 1507 }, { "entropy": 1.669597287972768, "epoch": 0.1656642223503886, "grad_norm": 0.7364504933357239, "learning_rate": 1.9869061722891235e-05, "loss": 1.2212, "mean_token_accuracy": 0.6872139424085617, "num_tokens": 251535729.0, "step": 1508 }, { "entropy": 1.7369881371657054, "epoch": 0.16577407926176155, "grad_norm": 0.7119384407997131, "learning_rate": 1.9868789457849018e-05, "loss": 1.4191, "mean_token_accuracy": 0.6483266254266103, "num_tokens": 251685078.0, "step": 1509 }, { "entropy": 1.7074172000090282, "epoch": 0.1658839361731345, "grad_norm": 0.5950348377227783, "learning_rate": 1.986851691211361e-05, "loss": 1.3744, "mean_token_accuracy": 0.6569475283225378, "num_tokens": 251863638.0, "step": 1510 }, { "entropy": 1.7139769693215687, "epoch": 0.16599379308450743, "grad_norm": 0.7062231302261353, "learning_rate": 1.986824408569364e-05, "loss": 1.3153, "mean_token_accuracy": 0.6584917455911636, "num_tokens": 252028124.0, "step": 1511 }, { "entropy": 1.774364709854126, "epoch": 0.16610364999588037, "grad_norm": 0.6657449007034302, "learning_rate": 1.9867970978597738e-05, "loss": 1.3523, "mean_token_accuracy": 0.6527550766865412, "num_tokens": 252189853.0, "step": 1512 }, { "entropy": 1.645888904730479, "epoch": 0.1662135069072533, "grad_norm": 1.011468768119812, "learning_rate": 1.9867697590834552e-05, "loss": 1.4475, "mean_token_accuracy": 0.6760109663009644, "num_tokens": 252391083.0, "step": 1513 }, { "entropy": 1.7124705612659454, "epoch": 0.16632336381862625, "grad_norm": 0.866172194480896, "learning_rate": 1.9867423922412732e-05, "loss": 1.3624, "mean_token_accuracy": 0.6526060750087103, "num_tokens": 252555620.0, "step": 1514 }, { "entropy": 1.7539417843023937, "epoch": 0.16643322072999917, "grad_norm": 0.7852609753608704, "learning_rate": 1.986714997334094e-05, "loss": 1.3325, "mean_token_accuracy": 0.6687429994344711, "num_tokens": 252705040.0, "step": 1515 }, { "entropy": 1.7840530971686046, "epoch": 0.1665430776413721, "grad_norm": 0.7107824087142944, "learning_rate": 1.9866875743627845e-05, "loss": 1.5625, "mean_token_accuracy": 0.6361605624357859, "num_tokens": 252910429.0, "step": 1516 }, { "entropy": 1.6945801079273224, "epoch": 0.16665293455274505, "grad_norm": 0.7252150177955627, "learning_rate": 1.9866601233282133e-05, "loss": 1.3175, "mean_token_accuracy": 0.6691045463085175, "num_tokens": 253035185.0, "step": 1517 }, { "entropy": 1.738978087902069, "epoch": 0.166762791464118, "grad_norm": 1.032089352607727, "learning_rate": 1.9866326442312485e-05, "loss": 1.574, "mean_token_accuracy": 0.6497288842995962, "num_tokens": 253196871.0, "step": 1518 }, { "entropy": 1.7240648766358693, "epoch": 0.16687264837549093, "grad_norm": 0.6749817132949829, "learning_rate": 1.9866051370727604e-05, "loss": 1.3699, "mean_token_accuracy": 0.6552683015664419, "num_tokens": 253352031.0, "step": 1519 }, { "entropy": 1.7319549322128296, "epoch": 0.16698250528686387, "grad_norm": 0.6335762143135071, "learning_rate": 1.9865776018536188e-05, "loss": 1.4646, "mean_token_accuracy": 0.6430019934972128, "num_tokens": 253511981.0, "step": 1520 }, { "entropy": 1.7338798642158508, "epoch": 0.1670923621982368, "grad_norm": 0.6900236010551453, "learning_rate": 1.9865500385746954e-05, "loss": 1.3692, "mean_token_accuracy": 0.6590522130330404, "num_tokens": 253663727.0, "step": 1521 }, { "entropy": 1.7445284326871235, "epoch": 0.16720221910960972, "grad_norm": 0.7063678503036499, "learning_rate": 1.9865224472368634e-05, "loss": 1.5004, "mean_token_accuracy": 0.6508728663126627, "num_tokens": 253860814.0, "step": 1522 }, { "entropy": 1.7685929238796234, "epoch": 0.16731207602098266, "grad_norm": 0.7194231152534485, "learning_rate": 1.986494827840995e-05, "loss": 1.3029, "mean_token_accuracy": 0.6638128211100897, "num_tokens": 253980941.0, "step": 1523 }, { "entropy": 1.750822017590205, "epoch": 0.1674219329323556, "grad_norm": 0.6481338143348694, "learning_rate": 1.9864671803879648e-05, "loss": 1.3924, "mean_token_accuracy": 0.6541791011889776, "num_tokens": 254148839.0, "step": 1524 }, { "entropy": 1.6678146918614705, "epoch": 0.16753178984372855, "grad_norm": 0.9855983257293701, "learning_rate": 1.9864395048786477e-05, "loss": 1.6043, "mean_token_accuracy": 0.6395136813322703, "num_tokens": 254354163.0, "step": 1525 }, { "entropy": 1.718589961528778, "epoch": 0.1676416467551015, "grad_norm": 0.8905704617500305, "learning_rate": 1.98641180131392e-05, "loss": 1.2566, "mean_token_accuracy": 0.6782409648100535, "num_tokens": 254504599.0, "step": 1526 }, { "entropy": 1.7314582268397014, "epoch": 0.16775150366647443, "grad_norm": 0.6675595641136169, "learning_rate": 1.986384069694658e-05, "loss": 1.4248, "mean_token_accuracy": 0.660760889450709, "num_tokens": 254671668.0, "step": 1527 }, { "entropy": 1.819986879825592, "epoch": 0.16786136057784734, "grad_norm": 0.7095764875411987, "learning_rate": 1.9863563100217397e-05, "loss": 1.5101, "mean_token_accuracy": 0.6234359592199326, "num_tokens": 254837716.0, "step": 1528 }, { "entropy": 1.6930086314678192, "epoch": 0.16797121748922028, "grad_norm": 0.7223114371299744, "learning_rate": 1.9863285222960436e-05, "loss": 1.3947, "mean_token_accuracy": 0.6587740182876587, "num_tokens": 255038553.0, "step": 1529 }, { "entropy": 1.7511506875356038, "epoch": 0.16808107440059322, "grad_norm": 0.6249548196792603, "learning_rate": 1.986300706518449e-05, "loss": 1.4064, "mean_token_accuracy": 0.6461884180704752, "num_tokens": 255246012.0, "step": 1530 }, { "entropy": 1.7595790127913158, "epoch": 0.16819093131196616, "grad_norm": 0.8911905288696289, "learning_rate": 1.9862728626898363e-05, "loss": 1.3936, "mean_token_accuracy": 0.6536350101232529, "num_tokens": 255406852.0, "step": 1531 }, { "entropy": 1.7233379284540813, "epoch": 0.1683007882233391, "grad_norm": 0.6526165008544922, "learning_rate": 1.9862449908110876e-05, "loss": 1.5453, "mean_token_accuracy": 0.6314892421166102, "num_tokens": 255600884.0, "step": 1532 }, { "entropy": 1.7559981842835743, "epoch": 0.16841064513471204, "grad_norm": 0.7608028054237366, "learning_rate": 1.9862170908830837e-05, "loss": 1.5887, "mean_token_accuracy": 0.6392476956049601, "num_tokens": 255762853.0, "step": 1533 }, { "entropy": 1.7382706304391224, "epoch": 0.16852050204608499, "grad_norm": 0.651728630065918, "learning_rate": 1.986189162906708e-05, "loss": 1.5546, "mean_token_accuracy": 0.6259034971396128, "num_tokens": 255989620.0, "step": 1534 }, { "entropy": 1.7760844230651855, "epoch": 0.1686303589574579, "grad_norm": 0.8464280366897583, "learning_rate": 1.986161206882845e-05, "loss": 1.309, "mean_token_accuracy": 0.6692384978135427, "num_tokens": 256139502.0, "step": 1535 }, { "entropy": 1.7130251824855804, "epoch": 0.16874021586883084, "grad_norm": 0.6570628881454468, "learning_rate": 1.986133222812379e-05, "loss": 1.3684, "mean_token_accuracy": 0.6575349122285843, "num_tokens": 256303217.0, "step": 1536 }, { "entropy": 1.746040016412735, "epoch": 0.16885007278020378, "grad_norm": 0.7733869552612305, "learning_rate": 1.986105210696196e-05, "loss": 1.4488, "mean_token_accuracy": 0.6521646479765574, "num_tokens": 256480152.0, "step": 1537 }, { "entropy": 1.7764933109283447, "epoch": 0.16895992969157672, "grad_norm": 0.8233133554458618, "learning_rate": 1.9860771705351822e-05, "loss": 1.453, "mean_token_accuracy": 0.6571964025497437, "num_tokens": 256621819.0, "step": 1538 }, { "entropy": 1.70499520500501, "epoch": 0.16906978660294966, "grad_norm": 0.7217221260070801, "learning_rate": 1.9860491023302252e-05, "loss": 1.4539, "mean_token_accuracy": 0.6460290650526682, "num_tokens": 256864065.0, "step": 1539 }, { "entropy": 1.6891511678695679, "epoch": 0.1691796435143226, "grad_norm": 0.7945336103439331, "learning_rate": 1.9860210060822137e-05, "loss": 1.4004, "mean_token_accuracy": 0.6728538970152537, "num_tokens": 257076992.0, "step": 1540 }, { "entropy": 1.7396195034186046, "epoch": 0.16928950042569554, "grad_norm": 0.7375956773757935, "learning_rate": 1.9859928817920363e-05, "loss": 1.3562, "mean_token_accuracy": 0.6567103415727615, "num_tokens": 257245992.0, "step": 1541 }, { "entropy": 1.7729649543762207, "epoch": 0.16939935733706846, "grad_norm": 0.5919457674026489, "learning_rate": 1.9859647294605832e-05, "loss": 1.3707, "mean_token_accuracy": 0.6635322074095408, "num_tokens": 257484248.0, "step": 1542 }, { "entropy": 1.7026795248190563, "epoch": 0.1695092142484414, "grad_norm": 0.7219969630241394, "learning_rate": 1.985936549088746e-05, "loss": 1.3216, "mean_token_accuracy": 0.6651143580675125, "num_tokens": 257612083.0, "step": 1543 }, { "entropy": 1.6986474494139354, "epoch": 0.16961907115981434, "grad_norm": 0.6384711861610413, "learning_rate": 1.985908340677416e-05, "loss": 1.2761, "mean_token_accuracy": 0.6821579784154892, "num_tokens": 257801207.0, "step": 1544 }, { "entropy": 1.740968902905782, "epoch": 0.16972892807118728, "grad_norm": 1.006049633026123, "learning_rate": 1.9858801042274865e-05, "loss": 1.6709, "mean_token_accuracy": 0.6359262764453888, "num_tokens": 257943835.0, "step": 1545 }, { "entropy": 1.761252890030543, "epoch": 0.16983878498256022, "grad_norm": 0.678752601146698, "learning_rate": 1.9858518397398506e-05, "loss": 1.3871, "mean_token_accuracy": 0.6560403803984324, "num_tokens": 258112452.0, "step": 1546 }, { "entropy": 1.7248517572879791, "epoch": 0.16994864189393316, "grad_norm": 0.6440653204917908, "learning_rate": 1.9858235472154035e-05, "loss": 1.3485, "mean_token_accuracy": 0.6657489885886511, "num_tokens": 258272795.0, "step": 1547 }, { "entropy": 1.7436510026454926, "epoch": 0.1700584988053061, "grad_norm": 0.6682828068733215, "learning_rate": 1.98579522665504e-05, "loss": 1.2871, "mean_token_accuracy": 0.6653337130943934, "num_tokens": 258421193.0, "step": 1548 }, { "entropy": 1.768191655476888, "epoch": 0.17016835571667902, "grad_norm": 0.6639819145202637, "learning_rate": 1.9857668780596566e-05, "loss": 1.3542, "mean_token_accuracy": 0.6570809185504913, "num_tokens": 258586838.0, "step": 1549 }, { "entropy": 1.7020961840947468, "epoch": 0.17027821262805196, "grad_norm": 0.713425874710083, "learning_rate": 1.985738501430151e-05, "loss": 1.4274, "mean_token_accuracy": 0.6487694978713989, "num_tokens": 258785508.0, "step": 1550 }, { "entropy": 1.7292237877845764, "epoch": 0.1703880695394249, "grad_norm": 0.6867907047271729, "learning_rate": 1.9857100967674207e-05, "loss": 1.4103, "mean_token_accuracy": 0.646657998363177, "num_tokens": 258951979.0, "step": 1551 }, { "entropy": 1.7403623362382252, "epoch": 0.17049792645079784, "grad_norm": 0.6921045780181885, "learning_rate": 1.985681664072365e-05, "loss": 1.4913, "mean_token_accuracy": 0.6297862927118937, "num_tokens": 259176216.0, "step": 1552 }, { "entropy": 1.7630130350589752, "epoch": 0.17060778336217078, "grad_norm": 0.6721217632293701, "learning_rate": 1.9856532033458838e-05, "loss": 1.349, "mean_token_accuracy": 0.6710087656974792, "num_tokens": 259352224.0, "step": 1553 }, { "entropy": 1.8647314310073853, "epoch": 0.17071764027354372, "grad_norm": 0.8853358626365662, "learning_rate": 1.985624714588878e-05, "loss": 1.4459, "mean_token_accuracy": 0.6719841261704763, "num_tokens": 259450105.0, "step": 1554 }, { "entropy": 1.7859836916128795, "epoch": 0.17082749718491663, "grad_norm": 0.7029029726982117, "learning_rate": 1.9855961978022487e-05, "loss": 1.3664, "mean_token_accuracy": 0.6556002298990885, "num_tokens": 259563607.0, "step": 1555 }, { "entropy": 1.7396026750405629, "epoch": 0.17093735409628957, "grad_norm": 0.9122359156608582, "learning_rate": 1.9855676529868987e-05, "loss": 1.4244, "mean_token_accuracy": 0.6562978277603785, "num_tokens": 259724551.0, "step": 1556 }, { "entropy": 1.6989874243736267, "epoch": 0.17104721100766251, "grad_norm": 0.7166799306869507, "learning_rate": 1.985539080143732e-05, "loss": 1.4114, "mean_token_accuracy": 0.6575746287902197, "num_tokens": 259892519.0, "step": 1557 }, { "entropy": 1.763812651236852, "epoch": 0.17115706791903545, "grad_norm": 0.8969507217407227, "learning_rate": 1.9855104792736523e-05, "loss": 1.2682, "mean_token_accuracy": 0.6701969256003698, "num_tokens": 260020525.0, "step": 1558 }, { "entropy": 1.6851574281851451, "epoch": 0.1712669248304084, "grad_norm": 0.6716583967208862, "learning_rate": 1.985481850377565e-05, "loss": 1.2937, "mean_token_accuracy": 0.6693290968736013, "num_tokens": 260173052.0, "step": 1559 }, { "entropy": 1.7508944670359294, "epoch": 0.17137678174178134, "grad_norm": 0.7718809247016907, "learning_rate": 1.9854531934563756e-05, "loss": 1.3227, "mean_token_accuracy": 0.6782904316981634, "num_tokens": 260302029.0, "step": 1560 }, { "entropy": 1.725020448366801, "epoch": 0.17148663865315428, "grad_norm": 0.6612850427627563, "learning_rate": 1.985424508510992e-05, "loss": 1.5275, "mean_token_accuracy": 0.6432696729898453, "num_tokens": 260518768.0, "step": 1561 }, { "entropy": 1.706325650215149, "epoch": 0.1715964955645272, "grad_norm": 0.6583466529846191, "learning_rate": 1.985395795542322e-05, "loss": 1.382, "mean_token_accuracy": 0.6552939414978027, "num_tokens": 260747412.0, "step": 1562 }, { "entropy": 1.7528244455655415, "epoch": 0.17170635247590013, "grad_norm": 0.6246992349624634, "learning_rate": 1.985367054551274e-05, "loss": 1.4976, "mean_token_accuracy": 0.6324788878361384, "num_tokens": 260942417.0, "step": 1563 }, { "entropy": 1.658085564772288, "epoch": 0.17181620938727307, "grad_norm": 0.8413445353507996, "learning_rate": 1.985338285538757e-05, "loss": 1.2427, "mean_token_accuracy": 0.6698757459719976, "num_tokens": 261082216.0, "step": 1564 }, { "entropy": 1.7026270429293315, "epoch": 0.171926066298646, "grad_norm": 0.6878015995025635, "learning_rate": 1.9853094885056824e-05, "loss": 1.3236, "mean_token_accuracy": 0.6607059886058172, "num_tokens": 261265341.0, "step": 1565 }, { "entropy": 1.7412429749965668, "epoch": 0.17203592321001895, "grad_norm": 0.6753911375999451, "learning_rate": 1.9852806634529617e-05, "loss": 1.5171, "mean_token_accuracy": 0.645161176721255, "num_tokens": 261451694.0, "step": 1566 }, { "entropy": 1.7042547861735027, "epoch": 0.1721457801213919, "grad_norm": 0.6290127635002136, "learning_rate": 1.985251810381507e-05, "loss": 1.3632, "mean_token_accuracy": 0.6557406087716421, "num_tokens": 261608260.0, "step": 1567 }, { "entropy": 1.707568456729253, "epoch": 0.17225563703276484, "grad_norm": 0.7598758935928345, "learning_rate": 1.985222929292231e-05, "loss": 1.5189, "mean_token_accuracy": 0.643217921257019, "num_tokens": 261830010.0, "step": 1568 }, { "entropy": 1.6917735834916432, "epoch": 0.17236549394413775, "grad_norm": 0.7184498906135559, "learning_rate": 1.9851940201860486e-05, "loss": 1.3412, "mean_token_accuracy": 0.656810333331426, "num_tokens": 261946266.0, "step": 1569 }, { "entropy": 1.723334978024165, "epoch": 0.1724753508555107, "grad_norm": 0.5953544974327087, "learning_rate": 1.985165083063874e-05, "loss": 1.437, "mean_token_accuracy": 0.638761967420578, "num_tokens": 262116192.0, "step": 1570 }, { "entropy": 1.7972069382667542, "epoch": 0.17258520776688363, "grad_norm": 0.8096028566360474, "learning_rate": 1.985136117926624e-05, "loss": 1.3684, "mean_token_accuracy": 0.6510176906983057, "num_tokens": 262310188.0, "step": 1571 }, { "entropy": 1.7401937345663707, "epoch": 0.17269506467825657, "grad_norm": 0.699908435344696, "learning_rate": 1.9851071247752144e-05, "loss": 1.512, "mean_token_accuracy": 0.6426756829023361, "num_tokens": 262485269.0, "step": 1572 }, { "entropy": 1.739349255959193, "epoch": 0.1728049215896295, "grad_norm": 0.7645807266235352, "learning_rate": 1.9850781036105628e-05, "loss": 1.3933, "mean_token_accuracy": 0.6666643818219503, "num_tokens": 262634754.0, "step": 1573 }, { "entropy": 1.7648814817269642, "epoch": 0.17291477850100245, "grad_norm": 0.5633745789527893, "learning_rate": 1.9850490544335883e-05, "loss": 1.4968, "mean_token_accuracy": 0.6367639104525248, "num_tokens": 262848023.0, "step": 1574 }, { "entropy": 1.7256119847297668, "epoch": 0.1730246354123754, "grad_norm": 0.7680408358573914, "learning_rate": 1.9850199772452102e-05, "loss": 1.2459, "mean_token_accuracy": 0.6759609977404276, "num_tokens": 262952079.0, "step": 1575 }, { "entropy": 1.7688967287540436, "epoch": 0.1731344923237483, "grad_norm": 0.7607701420783997, "learning_rate": 1.9849908720463483e-05, "loss": 1.686, "mean_token_accuracy": 0.6304403940836588, "num_tokens": 263123172.0, "step": 1576 }, { "entropy": 1.7065779368082683, "epoch": 0.17324434923512125, "grad_norm": 0.7314710021018982, "learning_rate": 1.9849617388379243e-05, "loss": 1.3961, "mean_token_accuracy": 0.662748172879219, "num_tokens": 263287696.0, "step": 1577 }, { "entropy": 1.7852273086706798, "epoch": 0.1733542061464942, "grad_norm": 0.7075253129005432, "learning_rate": 1.9849325776208597e-05, "loss": 1.5109, "mean_token_accuracy": 0.6389070451259613, "num_tokens": 263463542.0, "step": 1578 }, { "entropy": 1.759129822254181, "epoch": 0.17346406305786713, "grad_norm": 0.6782553791999817, "learning_rate": 1.984903388396078e-05, "loss": 1.3754, "mean_token_accuracy": 0.66612375775973, "num_tokens": 263611324.0, "step": 1579 }, { "entropy": 1.7275327742099762, "epoch": 0.17357391996924007, "grad_norm": 0.6759166121482849, "learning_rate": 1.984874171164503e-05, "loss": 1.4564, "mean_token_accuracy": 0.6484881341457367, "num_tokens": 263803481.0, "step": 1580 }, { "entropy": 1.7481872042020161, "epoch": 0.173683776880613, "grad_norm": 0.7830897569656372, "learning_rate": 1.9848449259270594e-05, "loss": 1.4403, "mean_token_accuracy": 0.6442924290895462, "num_tokens": 263980547.0, "step": 1581 }, { "entropy": 1.6898111701011658, "epoch": 0.17379363379198595, "grad_norm": 0.7016710638999939, "learning_rate": 1.984815652684672e-05, "loss": 1.2634, "mean_token_accuracy": 0.677551324168841, "num_tokens": 264100476.0, "step": 1582 }, { "entropy": 1.756488412618637, "epoch": 0.17390349070335887, "grad_norm": 0.725829005241394, "learning_rate": 1.9847863514382684e-05, "loss": 1.3736, "mean_token_accuracy": 0.6665743341048559, "num_tokens": 264267717.0, "step": 1583 }, { "entropy": 1.7239744464556377, "epoch": 0.1740133476147318, "grad_norm": 0.7309584617614746, "learning_rate": 1.9847570221887752e-05, "loss": 1.4512, "mean_token_accuracy": 0.6631620625654856, "num_tokens": 264435736.0, "step": 1584 }, { "entropy": 1.754454771677653, "epoch": 0.17412320452610475, "grad_norm": 0.7535955905914307, "learning_rate": 1.984727664937121e-05, "loss": 1.3803, "mean_token_accuracy": 0.6569480895996094, "num_tokens": 264575234.0, "step": 1585 }, { "entropy": 1.7318914433320363, "epoch": 0.1742330614374777, "grad_norm": 0.7644667625427246, "learning_rate": 1.9846982796842348e-05, "loss": 1.3139, "mean_token_accuracy": 0.6594865024089813, "num_tokens": 264726884.0, "step": 1586 }, { "entropy": 1.716256360212962, "epoch": 0.17434291834885063, "grad_norm": 0.9378094673156738, "learning_rate": 1.9846688664310466e-05, "loss": 1.3089, "mean_token_accuracy": 0.6705767214298248, "num_tokens": 264888577.0, "step": 1587 }, { "entropy": 1.7499485909938812, "epoch": 0.17445277526022357, "grad_norm": 0.6963350772857666, "learning_rate": 1.9846394251784878e-05, "loss": 1.3224, "mean_token_accuracy": 0.6636629452308019, "num_tokens": 265017805.0, "step": 1588 }, { "entropy": 1.6987595359484355, "epoch": 0.17456263217159648, "grad_norm": 0.549950122833252, "learning_rate": 1.9846099559274896e-05, "loss": 1.445, "mean_token_accuracy": 0.6353604396184286, "num_tokens": 265263719.0, "step": 1589 }, { "entropy": 1.663997044165929, "epoch": 0.17467248908296942, "grad_norm": 0.6349430680274963, "learning_rate": 1.9845804586789846e-05, "loss": 1.497, "mean_token_accuracy": 0.6526202807823817, "num_tokens": 265452023.0, "step": 1590 }, { "entropy": 1.7305479149023693, "epoch": 0.17478234599434236, "grad_norm": 0.641980767250061, "learning_rate": 1.984550933433907e-05, "loss": 1.5094, "mean_token_accuracy": 0.6398686319589615, "num_tokens": 265639435.0, "step": 1591 }, { "entropy": 1.7175764441490173, "epoch": 0.1748922029057153, "grad_norm": 0.6882075071334839, "learning_rate": 1.9845213801931912e-05, "loss": 1.5512, "mean_token_accuracy": 0.6555089851220449, "num_tokens": 265805955.0, "step": 1592 }, { "entropy": 1.7594363292058308, "epoch": 0.17500205981708825, "grad_norm": 0.6819611191749573, "learning_rate": 1.984491798957772e-05, "loss": 1.6119, "mean_token_accuracy": 0.623213991522789, "num_tokens": 265979623.0, "step": 1593 }, { "entropy": 1.6993054151535034, "epoch": 0.1751119167284612, "grad_norm": 0.6733065843582153, "learning_rate": 1.9844621897285857e-05, "loss": 1.3148, "mean_token_accuracy": 0.6543001731236776, "num_tokens": 266115341.0, "step": 1594 }, { "entropy": 1.633053998152415, "epoch": 0.17522177363983413, "grad_norm": 0.7140861749649048, "learning_rate": 1.9844325525065703e-05, "loss": 1.3898, "mean_token_accuracy": 0.6798456112543741, "num_tokens": 266245546.0, "step": 1595 }, { "entropy": 1.7286945780118306, "epoch": 0.17533163055120704, "grad_norm": 0.7508774399757385, "learning_rate": 1.9844028872926624e-05, "loss": 1.5096, "mean_token_accuracy": 0.6422994434833527, "num_tokens": 266441953.0, "step": 1596 }, { "entropy": 1.6760883927345276, "epoch": 0.17544148746257998, "grad_norm": 0.6171284317970276, "learning_rate": 1.984373194087802e-05, "loss": 1.3983, "mean_token_accuracy": 0.6639473040898641, "num_tokens": 266596780.0, "step": 1597 }, { "entropy": 1.6912609040737152, "epoch": 0.17555134437395292, "grad_norm": 0.6284732818603516, "learning_rate": 1.9843434728929287e-05, "loss": 1.2327, "mean_token_accuracy": 0.6864048341910044, "num_tokens": 266738616.0, "step": 1598 }, { "entropy": 1.6915510594844818, "epoch": 0.17566120128532586, "grad_norm": 0.6584773063659668, "learning_rate": 1.9843137237089825e-05, "loss": 1.3557, "mean_token_accuracy": 0.6557959119478861, "num_tokens": 266897766.0, "step": 1599 }, { "entropy": 1.7651262879371643, "epoch": 0.1757710581966988, "grad_norm": 0.8828444480895996, "learning_rate": 1.984283946536906e-05, "loss": 1.4619, "mean_token_accuracy": 0.6481021742026011, "num_tokens": 267065155.0, "step": 1600 }, { "entropy": 1.8013077477614086, "epoch": 0.17588091510807174, "grad_norm": 0.685353696346283, "learning_rate": 1.9842541413776405e-05, "loss": 1.3999, "mean_token_accuracy": 0.6464814196030298, "num_tokens": 267229641.0, "step": 1601 }, { "entropy": 1.8050518830617268, "epoch": 0.17599077201944469, "grad_norm": 0.8125794529914856, "learning_rate": 1.98422430823213e-05, "loss": 1.5728, "mean_token_accuracy": 0.6321427176396052, "num_tokens": 267378156.0, "step": 1602 }, { "entropy": 1.651296724875768, "epoch": 0.1761006289308176, "grad_norm": 0.6428789496421814, "learning_rate": 1.984194447101319e-05, "loss": 1.3641, "mean_token_accuracy": 0.6623906741539637, "num_tokens": 267590605.0, "step": 1603 }, { "entropy": 1.7778548002243042, "epoch": 0.17621048584219054, "grad_norm": 0.6255530118942261, "learning_rate": 1.984164557986152e-05, "loss": 1.4539, "mean_token_accuracy": 0.6396249979734421, "num_tokens": 267756215.0, "step": 1604 }, { "entropy": 1.6877602239449818, "epoch": 0.17632034275356348, "grad_norm": 0.7188239693641663, "learning_rate": 1.984134640887575e-05, "loss": 1.4997, "mean_token_accuracy": 0.6404122064510981, "num_tokens": 268034363.0, "step": 1605 }, { "entropy": 1.760389010111491, "epoch": 0.17643019966493642, "grad_norm": 0.758726954460144, "learning_rate": 1.984104695806535e-05, "loss": 1.3262, "mean_token_accuracy": 0.6760849605003992, "num_tokens": 268157355.0, "step": 1606 }, { "entropy": 1.6769930223623912, "epoch": 0.17654005657630936, "grad_norm": 0.6320821642875671, "learning_rate": 1.98407472274398e-05, "loss": 1.4481, "mean_token_accuracy": 0.6526689926783243, "num_tokens": 268370854.0, "step": 1607 }, { "entropy": 1.7059557735919952, "epoch": 0.1766499134876823, "grad_norm": 0.5866220593452454, "learning_rate": 1.9840447217008583e-05, "loss": 1.4575, "mean_token_accuracy": 0.6406320333480835, "num_tokens": 268603959.0, "step": 1608 }, { "entropy": 1.6889616250991821, "epoch": 0.17675977039905524, "grad_norm": 0.802364706993103, "learning_rate": 1.9840146926781193e-05, "loss": 1.3417, "mean_token_accuracy": 0.6644609669844309, "num_tokens": 268784211.0, "step": 1609 }, { "entropy": 1.7760377724965413, "epoch": 0.17686962731042816, "grad_norm": 0.760371744632721, "learning_rate": 1.9839846356767135e-05, "loss": 1.5012, "mean_token_accuracy": 0.6282084981600443, "num_tokens": 268997493.0, "step": 1610 }, { "entropy": 1.750706394513448, "epoch": 0.1769794842218011, "grad_norm": 0.8299148678779602, "learning_rate": 1.983954550697593e-05, "loss": 1.3053, "mean_token_accuracy": 0.6595065792401632, "num_tokens": 269102555.0, "step": 1611 }, { "entropy": 1.7330009837945302, "epoch": 0.17708934113317404, "grad_norm": 0.6234577298164368, "learning_rate": 1.9839244377417087e-05, "loss": 1.4342, "mean_token_accuracy": 0.6495455453793207, "num_tokens": 269270569.0, "step": 1612 }, { "entropy": 1.712340384721756, "epoch": 0.17719919804454698, "grad_norm": 0.6255055069923401, "learning_rate": 1.9838942968100145e-05, "loss": 1.4671, "mean_token_accuracy": 0.6483869006236395, "num_tokens": 269488489.0, "step": 1613 }, { "entropy": 1.6904551486174266, "epoch": 0.17730905495591992, "grad_norm": 1.2697219848632812, "learning_rate": 1.983864127903464e-05, "loss": 1.229, "mean_token_accuracy": 0.6800280114014944, "num_tokens": 269745519.0, "step": 1614 }, { "entropy": 1.7634007533391316, "epoch": 0.17741891186729286, "grad_norm": 0.6574758291244507, "learning_rate": 1.9838339310230123e-05, "loss": 1.4662, "mean_token_accuracy": 0.6376509219408035, "num_tokens": 269926677.0, "step": 1615 }, { "entropy": 1.6982669830322266, "epoch": 0.17752876877866577, "grad_norm": 1.7531147003173828, "learning_rate": 1.983803706169615e-05, "loss": 1.0775, "mean_token_accuracy": 0.6754929721355438, "num_tokens": 270145004.0, "step": 1616 }, { "entropy": 1.7733216385046642, "epoch": 0.17763862569003872, "grad_norm": 0.7520893216133118, "learning_rate": 1.983773453344228e-05, "loss": 1.4172, "mean_token_accuracy": 0.6514776547749838, "num_tokens": 270295428.0, "step": 1617 }, { "entropy": 1.7757883270581563, "epoch": 0.17774848260141166, "grad_norm": 0.6765945553779602, "learning_rate": 1.98374317254781e-05, "loss": 1.3595, "mean_token_accuracy": 0.661077231168747, "num_tokens": 270507846.0, "step": 1618 }, { "entropy": 1.7714926997820537, "epoch": 0.1778583395127846, "grad_norm": 0.8542430400848389, "learning_rate": 1.9837128637813187e-05, "loss": 1.4031, "mean_token_accuracy": 0.6585122595230738, "num_tokens": 270682825.0, "step": 1619 }, { "entropy": 1.7776615619659424, "epoch": 0.17796819642415754, "grad_norm": 0.7186983823776245, "learning_rate": 1.9836825270457133e-05, "loss": 1.3677, "mean_token_accuracy": 0.6544285813967387, "num_tokens": 270818145.0, "step": 1620 }, { "entropy": 1.7131075461705525, "epoch": 0.17807805333553048, "grad_norm": 0.5991750359535217, "learning_rate": 1.9836521623419546e-05, "loss": 1.3429, "mean_token_accuracy": 0.6644314974546432, "num_tokens": 270978762.0, "step": 1621 }, { "entropy": 1.704333871603012, "epoch": 0.17818791024690342, "grad_norm": 0.9044831395149231, "learning_rate": 1.983621769671003e-05, "loss": 1.4033, "mean_token_accuracy": 0.6686131457487742, "num_tokens": 271125197.0, "step": 1622 }, { "entropy": 1.7072937885920207, "epoch": 0.17829776715827633, "grad_norm": 0.6216189861297607, "learning_rate": 1.98359134903382e-05, "loss": 1.4799, "mean_token_accuracy": 0.6512039552132288, "num_tokens": 271319205.0, "step": 1623 }, { "entropy": 1.6969341238339741, "epoch": 0.17840762406964927, "grad_norm": 0.8598399758338928, "learning_rate": 1.9835609004313693e-05, "loss": 1.3197, "mean_token_accuracy": 0.6660919090112051, "num_tokens": 271482991.0, "step": 1624 }, { "entropy": 1.7475373148918152, "epoch": 0.17851748098102221, "grad_norm": 0.8220011591911316, "learning_rate": 1.9835304238646146e-05, "loss": 1.3325, "mean_token_accuracy": 0.6602373421192169, "num_tokens": 271615079.0, "step": 1625 }, { "entropy": 1.7845442990461986, "epoch": 0.17862733789239515, "grad_norm": 0.7265953421592712, "learning_rate": 1.9834999193345197e-05, "loss": 1.2799, "mean_token_accuracy": 0.6676252981026968, "num_tokens": 271737120.0, "step": 1626 }, { "entropy": 1.7501579523086548, "epoch": 0.1787371948037681, "grad_norm": 0.6195200681686401, "learning_rate": 1.9834693868420505e-05, "loss": 1.4889, "mean_token_accuracy": 0.643327941497167, "num_tokens": 271926352.0, "step": 1627 }, { "entropy": 1.7393087645371754, "epoch": 0.17884705171514104, "grad_norm": 0.8133971095085144, "learning_rate": 1.9834388263881736e-05, "loss": 1.4181, "mean_token_accuracy": 0.6791380792856216, "num_tokens": 272038740.0, "step": 1628 }, { "entropy": 1.6529111862182617, "epoch": 0.17895690862651398, "grad_norm": 0.653113603591919, "learning_rate": 1.9834082379738556e-05, "loss": 1.4357, "mean_token_accuracy": 0.6570485532283783, "num_tokens": 272226850.0, "step": 1629 }, { "entropy": 1.701356291770935, "epoch": 0.1790667655378869, "grad_norm": 0.7334955334663391, "learning_rate": 1.983377621600065e-05, "loss": 1.3291, "mean_token_accuracy": 0.6579341193040212, "num_tokens": 272393860.0, "step": 1630 }, { "entropy": 1.7137031455834706, "epoch": 0.17917662244925983, "grad_norm": 0.6799026727676392, "learning_rate": 1.983346977267771e-05, "loss": 1.4359, "mean_token_accuracy": 0.6577520171801249, "num_tokens": 272577442.0, "step": 1631 }, { "entropy": 1.7033151189486186, "epoch": 0.17928647936063277, "grad_norm": 0.7878915071487427, "learning_rate": 1.983316304977943e-05, "loss": 1.4819, "mean_token_accuracy": 0.6430552899837494, "num_tokens": 272770738.0, "step": 1632 }, { "entropy": 1.6933635870615642, "epoch": 0.1793963362720057, "grad_norm": 0.7030259966850281, "learning_rate": 1.9832856047315522e-05, "loss": 1.3218, "mean_token_accuracy": 0.6660377085208893, "num_tokens": 272972512.0, "step": 1633 }, { "entropy": 1.767316649357478, "epoch": 0.17950619318337865, "grad_norm": 0.6197423934936523, "learning_rate": 1.9832548765295696e-05, "loss": 1.4417, "mean_token_accuracy": 0.6372250666220983, "num_tokens": 273161984.0, "step": 1634 }, { "entropy": 1.6606941322485607, "epoch": 0.1796160500947516, "grad_norm": 0.6114673614501953, "learning_rate": 1.9832241203729684e-05, "loss": 1.4749, "mean_token_accuracy": 0.649383544921875, "num_tokens": 273362338.0, "step": 1635 }, { "entropy": 1.757198413213094, "epoch": 0.17972590700612454, "grad_norm": 0.7329999208450317, "learning_rate": 1.9831933362627215e-05, "loss": 1.5256, "mean_token_accuracy": 0.6449063618977865, "num_tokens": 273550420.0, "step": 1636 }, { "entropy": 1.7132985492547352, "epoch": 0.17983576391749745, "grad_norm": 0.72648024559021, "learning_rate": 1.983162524199804e-05, "loss": 1.5848, "mean_token_accuracy": 0.6244159291187922, "num_tokens": 273767744.0, "step": 1637 }, { "entropy": 1.7032330830891926, "epoch": 0.1799456208288704, "grad_norm": 0.7133116126060486, "learning_rate": 1.9831316841851906e-05, "loss": 1.4667, "mean_token_accuracy": 0.6442908545335134, "num_tokens": 274002746.0, "step": 1638 }, { "entropy": 1.700139433145523, "epoch": 0.18005547774024333, "grad_norm": 0.662652850151062, "learning_rate": 1.9831008162198565e-05, "loss": 1.3707, "mean_token_accuracy": 0.6429425726334254, "num_tokens": 274183020.0, "step": 1639 }, { "entropy": 1.700132042169571, "epoch": 0.18016533465161627, "grad_norm": 0.7111302018165588, "learning_rate": 1.9830699203047804e-05, "loss": 1.4556, "mean_token_accuracy": 0.6517406602700552, "num_tokens": 274383586.0, "step": 1640 }, { "entropy": 1.7377035915851593, "epoch": 0.1802751915629892, "grad_norm": 0.7069066762924194, "learning_rate": 1.983038996440939e-05, "loss": 1.3404, "mean_token_accuracy": 0.6645782341559728, "num_tokens": 274524820.0, "step": 1641 }, { "entropy": 1.7640187640984852, "epoch": 0.18038504847436215, "grad_norm": 0.7231025099754333, "learning_rate": 1.983008044629311e-05, "loss": 1.3854, "mean_token_accuracy": 0.6507704704999924, "num_tokens": 274656672.0, "step": 1642 }, { "entropy": 1.728807379802068, "epoch": 0.1804949053857351, "grad_norm": 0.6958527565002441, "learning_rate": 1.9829770648708764e-05, "loss": 1.4108, "mean_token_accuracy": 0.6552976568539938, "num_tokens": 274853709.0, "step": 1643 }, { "entropy": 1.7781054377555847, "epoch": 0.180604762297108, "grad_norm": 0.8837335705757141, "learning_rate": 1.9829460571666156e-05, "loss": 1.4283, "mean_token_accuracy": 0.6518157124519348, "num_tokens": 274988556.0, "step": 1644 }, { "entropy": 1.7898385723431904, "epoch": 0.18071461920848095, "grad_norm": 0.801447331905365, "learning_rate": 1.9829150215175103e-05, "loss": 1.5257, "mean_token_accuracy": 0.6439861307541529, "num_tokens": 275141877.0, "step": 1645 }, { "entropy": 1.6892934044202168, "epoch": 0.1808244761198539, "grad_norm": 0.621210515499115, "learning_rate": 1.982883957924542e-05, "loss": 1.337, "mean_token_accuracy": 0.6619320660829544, "num_tokens": 275286921.0, "step": 1646 }, { "entropy": 1.7738712231318157, "epoch": 0.18093433303122683, "grad_norm": 0.7250021696090698, "learning_rate": 1.9828528663886946e-05, "loss": 1.4375, "mean_token_accuracy": 0.6405630757411321, "num_tokens": 275465643.0, "step": 1647 }, { "entropy": 1.7501648565133412, "epoch": 0.18104418994259977, "grad_norm": 0.7406297326087952, "learning_rate": 1.9828217469109514e-05, "loss": 1.6333, "mean_token_accuracy": 0.631665957470735, "num_tokens": 275636335.0, "step": 1648 }, { "entropy": 1.7328572471936543, "epoch": 0.1811540468539727, "grad_norm": 0.7023396492004395, "learning_rate": 1.982790599492298e-05, "loss": 1.3489, "mean_token_accuracy": 0.6668912867705027, "num_tokens": 275778888.0, "step": 1649 }, { "entropy": 1.7373796900113423, "epoch": 0.18126390376534562, "grad_norm": 0.6394554376602173, "learning_rate": 1.9827594241337196e-05, "loss": 1.2949, "mean_token_accuracy": 0.6552981982628504, "num_tokens": 275936865.0, "step": 1650 }, { "entropy": 1.6960271497567494, "epoch": 0.18137376067671857, "grad_norm": 0.6193222403526306, "learning_rate": 1.9827282208362034e-05, "loss": 1.3971, "mean_token_accuracy": 0.6560710817575455, "num_tokens": 276084652.0, "step": 1651 }, { "entropy": 1.747694154580434, "epoch": 0.1814836175880915, "grad_norm": 0.6936965584754944, "learning_rate": 1.982696989600737e-05, "loss": 1.3516, "mean_token_accuracy": 0.6600579669078191, "num_tokens": 276264605.0, "step": 1652 }, { "entropy": 1.7094822824001312, "epoch": 0.18159347449946445, "grad_norm": 0.7348982095718384, "learning_rate": 1.9826657304283085e-05, "loss": 1.3374, "mean_token_accuracy": 0.6641669621070226, "num_tokens": 276425397.0, "step": 1653 }, { "entropy": 1.7678433259328206, "epoch": 0.1817033314108374, "grad_norm": 0.6787462830543518, "learning_rate": 1.982634443319907e-05, "loss": 1.4428, "mean_token_accuracy": 0.6486629645029703, "num_tokens": 276586282.0, "step": 1654 }, { "entropy": 1.7211858630180359, "epoch": 0.18181318832221033, "grad_norm": 0.6997633576393127, "learning_rate": 1.9826031282765233e-05, "loss": 1.3731, "mean_token_accuracy": 0.6508075048526129, "num_tokens": 276786867.0, "step": 1655 }, { "entropy": 1.7509803275267284, "epoch": 0.18192304523358327, "grad_norm": 0.7596442699432373, "learning_rate": 1.9825717852991487e-05, "loss": 1.4368, "mean_token_accuracy": 0.6510319958130518, "num_tokens": 276926610.0, "step": 1656 }, { "entropy": 1.7846331695715587, "epoch": 0.18203290214495618, "grad_norm": 0.7824363708496094, "learning_rate": 1.9825404143887746e-05, "loss": 1.5756, "mean_token_accuracy": 0.6446571896473566, "num_tokens": 277112438.0, "step": 1657 }, { "entropy": 1.7570526401201885, "epoch": 0.18214275905632912, "grad_norm": 0.7908049821853638, "learning_rate": 1.9825090155463936e-05, "loss": 1.3862, "mean_token_accuracy": 0.6663278043270111, "num_tokens": 277257253.0, "step": 1658 }, { "entropy": 1.6966181596120198, "epoch": 0.18225261596770206, "grad_norm": 0.7348120808601379, "learning_rate": 1.9824775887730006e-05, "loss": 1.2518, "mean_token_accuracy": 0.6801349520683289, "num_tokens": 277366545.0, "step": 1659 }, { "entropy": 1.7101906538009644, "epoch": 0.182362472879075, "grad_norm": 0.7664896845817566, "learning_rate": 1.9824461340695892e-05, "loss": 1.6055, "mean_token_accuracy": 0.6423424060146014, "num_tokens": 277553846.0, "step": 1660 }, { "entropy": 1.6806841989358265, "epoch": 0.18247232979044795, "grad_norm": 0.5992354154586792, "learning_rate": 1.9824146514371553e-05, "loss": 1.4604, "mean_token_accuracy": 0.6359513700008392, "num_tokens": 277779533.0, "step": 1661 }, { "entropy": 1.7670801480611165, "epoch": 0.1825821867018209, "grad_norm": 0.8071349263191223, "learning_rate": 1.9823831408766953e-05, "loss": 1.703, "mean_token_accuracy": 0.6325116107861201, "num_tokens": 277985352.0, "step": 1662 }, { "entropy": 1.6910718381404877, "epoch": 0.18269204361319383, "grad_norm": 0.6777242422103882, "learning_rate": 1.9823516023892067e-05, "loss": 1.5038, "mean_token_accuracy": 0.6417184472084045, "num_tokens": 278220879.0, "step": 1663 }, { "entropy": 1.728562315305074, "epoch": 0.18280190052456674, "grad_norm": 0.6362787485122681, "learning_rate": 1.9823200359756875e-05, "loss": 1.4164, "mean_token_accuracy": 0.6453818678855896, "num_tokens": 278368023.0, "step": 1664 }, { "entropy": 1.7313311994075775, "epoch": 0.18291175743593968, "grad_norm": 0.6767556071281433, "learning_rate": 1.9822884416371364e-05, "loss": 1.3056, "mean_token_accuracy": 0.6659288257360458, "num_tokens": 278496388.0, "step": 1665 }, { "entropy": 1.6881966690222423, "epoch": 0.18302161434731262, "grad_norm": 0.6294616460800171, "learning_rate": 1.982256819374554e-05, "loss": 1.4921, "mean_token_accuracy": 0.6462472081184387, "num_tokens": 278674023.0, "step": 1666 }, { "entropy": 1.6751320759455364, "epoch": 0.18313147125868556, "grad_norm": 0.5912817120552063, "learning_rate": 1.9822251691889408e-05, "loss": 1.3839, "mean_token_accuracy": 0.6660457054773966, "num_tokens": 278837833.0, "step": 1667 }, { "entropy": 1.7794308066368103, "epoch": 0.1832413281700585, "grad_norm": 1.0613802671432495, "learning_rate": 1.9821934910812984e-05, "loss": 1.5629, "mean_token_accuracy": 0.6530888924996058, "num_tokens": 279020010.0, "step": 1668 }, { "entropy": 1.6890638172626495, "epoch": 0.18335118508143144, "grad_norm": 0.9429357647895813, "learning_rate": 1.9821617850526297e-05, "loss": 1.7154, "mean_token_accuracy": 0.6422553857167562, "num_tokens": 279192730.0, "step": 1669 }, { "entropy": 1.7527574300765991, "epoch": 0.18346104199280439, "grad_norm": 0.6539662480354309, "learning_rate": 1.9821300511039378e-05, "loss": 1.3789, "mean_token_accuracy": 0.6461327920357386, "num_tokens": 279341934.0, "step": 1670 }, { "entropy": 1.7701294422149658, "epoch": 0.1835708989041773, "grad_norm": 0.7200759053230286, "learning_rate": 1.9820982892362274e-05, "loss": 1.3941, "mean_token_accuracy": 0.65191750228405, "num_tokens": 279491511.0, "step": 1671 }, { "entropy": 1.8013378481070201, "epoch": 0.18368075581555024, "grad_norm": 0.7267040610313416, "learning_rate": 1.9820664994505035e-05, "loss": 1.4202, "mean_token_accuracy": 0.6435056875149409, "num_tokens": 279678965.0, "step": 1672 }, { "entropy": 1.723004271586736, "epoch": 0.18379061272692318, "grad_norm": 0.7009273171424866, "learning_rate": 1.9820346817477725e-05, "loss": 1.4127, "mean_token_accuracy": 0.6442108601331711, "num_tokens": 279853720.0, "step": 1673 }, { "entropy": 1.710123598575592, "epoch": 0.18390046963829612, "grad_norm": 0.8877512812614441, "learning_rate": 1.982002836129041e-05, "loss": 1.4214, "mean_token_accuracy": 0.6699336767196655, "num_tokens": 279997611.0, "step": 1674 }, { "entropy": 1.7392498552799225, "epoch": 0.18401032654966906, "grad_norm": 0.8386234641075134, "learning_rate": 1.9819709625953174e-05, "loss": 1.285, "mean_token_accuracy": 0.6787472317616144, "num_tokens": 280114602.0, "step": 1675 }, { "entropy": 1.763936976591746, "epoch": 0.184120183461042, "grad_norm": 0.8790518641471863, "learning_rate": 1.9819390611476105e-05, "loss": 1.3905, "mean_token_accuracy": 0.6481430331865946, "num_tokens": 280325517.0, "step": 1676 }, { "entropy": 1.725318839152654, "epoch": 0.18423004037241492, "grad_norm": 0.5728416442871094, "learning_rate": 1.9819071317869295e-05, "loss": 1.5904, "mean_token_accuracy": 0.6158607254425684, "num_tokens": 280595902.0, "step": 1677 }, { "entropy": 1.7371946076552074, "epoch": 0.18433989728378786, "grad_norm": 0.5821994543075562, "learning_rate": 1.9818751745142853e-05, "loss": 1.4596, "mean_token_accuracy": 0.645810733238856, "num_tokens": 280830692.0, "step": 1678 }, { "entropy": 1.8042426307996113, "epoch": 0.1844497541951608, "grad_norm": 0.6637836694717407, "learning_rate": 1.9818431893306887e-05, "loss": 1.5292, "mean_token_accuracy": 0.6280421316623688, "num_tokens": 281004533.0, "step": 1679 }, { "entropy": 1.7169641653696697, "epoch": 0.18455961110653374, "grad_norm": 0.7053066492080688, "learning_rate": 1.981811176237153e-05, "loss": 1.3631, "mean_token_accuracy": 0.6582474460204443, "num_tokens": 281160638.0, "step": 1680 }, { "entropy": 1.777447024981181, "epoch": 0.18466946801790668, "grad_norm": 0.7402387857437134, "learning_rate": 1.981779135234691e-05, "loss": 1.5166, "mean_token_accuracy": 0.6345723768075308, "num_tokens": 281338443.0, "step": 1681 }, { "entropy": 1.7175538738568623, "epoch": 0.18477932492927962, "grad_norm": 0.5362944602966309, "learning_rate": 1.9817470663243165e-05, "loss": 0.9837, "mean_token_accuracy": 0.6940766374270121, "num_tokens": 281502174.0, "step": 1682 }, { "entropy": 1.77180611093839, "epoch": 0.18488918184065256, "grad_norm": 0.9495187401771545, "learning_rate": 1.9817149695070447e-05, "loss": 1.4066, "mean_token_accuracy": 0.6637826462586721, "num_tokens": 281636795.0, "step": 1683 }, { "entropy": 1.7006933093070984, "epoch": 0.18499903875202547, "grad_norm": 0.5615609884262085, "learning_rate": 1.9816828447838913e-05, "loss": 1.3676, "mean_token_accuracy": 0.6498903632164001, "num_tokens": 281795185.0, "step": 1684 }, { "entropy": 1.8015896479288738, "epoch": 0.18510889566339842, "grad_norm": 0.7910170555114746, "learning_rate": 1.9816506921558733e-05, "loss": 1.4873, "mean_token_accuracy": 0.6395314981540045, "num_tokens": 281967043.0, "step": 1685 }, { "entropy": 1.7635705371697743, "epoch": 0.18521875257477136, "grad_norm": 0.6194027066230774, "learning_rate": 1.9816185116240084e-05, "loss": 1.4286, "mean_token_accuracy": 0.6360458632310232, "num_tokens": 282209695.0, "step": 1686 }, { "entropy": 1.727480669816335, "epoch": 0.1853286094861443, "grad_norm": 0.6732087135314941, "learning_rate": 1.981586303189315e-05, "loss": 1.2988, "mean_token_accuracy": 0.6633017708857855, "num_tokens": 282393134.0, "step": 1687 }, { "entropy": 1.7175672849019368, "epoch": 0.18543846639751724, "grad_norm": 0.7200150489807129, "learning_rate": 1.9815540668528116e-05, "loss": 1.4865, "mean_token_accuracy": 0.652409682671229, "num_tokens": 282586386.0, "step": 1688 }, { "entropy": 1.7157348195711772, "epoch": 0.18554832330889018, "grad_norm": 0.6159201264381409, "learning_rate": 1.9815218026155194e-05, "loss": 1.4005, "mean_token_accuracy": 0.6451119929552078, "num_tokens": 282769139.0, "step": 1689 }, { "entropy": 1.7268279194831848, "epoch": 0.18565818022026312, "grad_norm": 0.8370085954666138, "learning_rate": 1.9814895104784598e-05, "loss": 1.3224, "mean_token_accuracy": 0.6635098308324814, "num_tokens": 282906774.0, "step": 1690 }, { "entropy": 1.7369357744852703, "epoch": 0.18576803713163603, "grad_norm": 0.8012304306030273, "learning_rate": 1.9814571904426543e-05, "loss": 1.3494, "mean_token_accuracy": 0.6629086136817932, "num_tokens": 283026160.0, "step": 1691 }, { "entropy": 1.766795575618744, "epoch": 0.18587789404300897, "grad_norm": 0.6900972127914429, "learning_rate": 1.9814248425091256e-05, "loss": 1.4208, "mean_token_accuracy": 0.6468196511268616, "num_tokens": 283225811.0, "step": 1692 }, { "entropy": 1.739597777525584, "epoch": 0.18598775095438191, "grad_norm": 0.6812617778778076, "learning_rate": 1.981392466678898e-05, "loss": 1.4982, "mean_token_accuracy": 0.6387915263573328, "num_tokens": 283443567.0, "step": 1693 }, { "entropy": 1.6632297138373058, "epoch": 0.18609760786575485, "grad_norm": 0.7416886687278748, "learning_rate": 1.981360062952996e-05, "loss": 1.4409, "mean_token_accuracy": 0.6323288530111313, "num_tokens": 283658416.0, "step": 1694 }, { "entropy": 1.7458237608273823, "epoch": 0.1862074647771278, "grad_norm": 0.5728136897087097, "learning_rate": 1.9813276313324453e-05, "loss": 1.3206, "mean_token_accuracy": 0.6567148516575495, "num_tokens": 283807479.0, "step": 1695 }, { "entropy": 1.7193138301372528, "epoch": 0.18631732168850074, "grad_norm": 0.6321941018104553, "learning_rate": 1.981295171818272e-05, "loss": 1.3395, "mean_token_accuracy": 0.6582736521959305, "num_tokens": 284013422.0, "step": 1696 }, { "entropy": 1.7184888124465942, "epoch": 0.18642717859987368, "grad_norm": 0.579247236251831, "learning_rate": 1.981262684411504e-05, "loss": 1.4342, "mean_token_accuracy": 0.6520635535319647, "num_tokens": 284242706.0, "step": 1697 }, { "entropy": 1.7161762118339539, "epoch": 0.1865370355112466, "grad_norm": 0.6291903853416443, "learning_rate": 1.9812301691131688e-05, "loss": 1.3629, "mean_token_accuracy": 0.663032611211141, "num_tokens": 284413353.0, "step": 1698 }, { "entropy": 1.7629437744617462, "epoch": 0.18664689242261953, "grad_norm": 0.7114847898483276, "learning_rate": 1.981197625924296e-05, "loss": 1.3757, "mean_token_accuracy": 0.6550849924484888, "num_tokens": 284543844.0, "step": 1699 }, { "entropy": 1.7381982902685802, "epoch": 0.18675674933399247, "grad_norm": 0.7382696866989136, "learning_rate": 1.9811650548459155e-05, "loss": 1.4664, "mean_token_accuracy": 0.6513356864452362, "num_tokens": 284731517.0, "step": 1700 }, { "entropy": 1.674417903025945, "epoch": 0.1868666062453654, "grad_norm": 0.7865607142448425, "learning_rate": 1.9811324558790573e-05, "loss": 1.3178, "mean_token_accuracy": 0.6622059692939123, "num_tokens": 284848458.0, "step": 1701 }, { "entropy": 1.6967013478279114, "epoch": 0.18697646315673835, "grad_norm": 0.6472486853599548, "learning_rate": 1.9810998290247547e-05, "loss": 1.4338, "mean_token_accuracy": 0.6450558453798294, "num_tokens": 285063455.0, "step": 1702 }, { "entropy": 1.701272616783778, "epoch": 0.1870863200681113, "grad_norm": 0.6370506882667542, "learning_rate": 1.9810671742840394e-05, "loss": 1.3558, "mean_token_accuracy": 0.6611084739367167, "num_tokens": 285219438.0, "step": 1703 }, { "entropy": 1.7011124789714813, "epoch": 0.18719617697948424, "grad_norm": 0.7365835309028625, "learning_rate": 1.981034491657945e-05, "loss": 1.4027, "mean_token_accuracy": 0.6517395476500193, "num_tokens": 285421503.0, "step": 1704 }, { "entropy": 1.7084963818391163, "epoch": 0.18730603389085715, "grad_norm": 0.7742033004760742, "learning_rate": 1.9810017811475058e-05, "loss": 1.3874, "mean_token_accuracy": 0.6629381775856018, "num_tokens": 285546599.0, "step": 1705 }, { "entropy": 1.7513247827688854, "epoch": 0.1874158908022301, "grad_norm": 0.6956959962844849, "learning_rate": 1.9809690427537577e-05, "loss": 1.436, "mean_token_accuracy": 0.6494058966636658, "num_tokens": 285726907.0, "step": 1706 }, { "entropy": 1.6769114037354786, "epoch": 0.18752574771360303, "grad_norm": 0.6903713345527649, "learning_rate": 1.9809362764777357e-05, "loss": 1.3839, "mean_token_accuracy": 0.656981165210406, "num_tokens": 285913198.0, "step": 1707 }, { "entropy": 1.7068938712279003, "epoch": 0.18763560462497597, "grad_norm": 0.6433144807815552, "learning_rate": 1.980903482320478e-05, "loss": 1.4031, "mean_token_accuracy": 0.6512850423653921, "num_tokens": 286084913.0, "step": 1708 }, { "entropy": 1.7556909918785095, "epoch": 0.1877454615363489, "grad_norm": 0.6782954335212708, "learning_rate": 1.980870660283022e-05, "loss": 1.3747, "mean_token_accuracy": 0.6523662805557251, "num_tokens": 286225973.0, "step": 1709 }, { "entropy": 1.7448440194129944, "epoch": 0.18785531844772185, "grad_norm": 0.7479444742202759, "learning_rate": 1.9808378103664064e-05, "loss": 1.3416, "mean_token_accuracy": 0.6604155600070953, "num_tokens": 286384384.0, "step": 1710 }, { "entropy": 1.7512960731983185, "epoch": 0.18796517535909477, "grad_norm": 0.7333407402038574, "learning_rate": 1.980804932571671e-05, "loss": 1.3877, "mean_token_accuracy": 0.6567022105058035, "num_tokens": 286546675.0, "step": 1711 }, { "entropy": 1.6740979949633281, "epoch": 0.1880750322704677, "grad_norm": 0.8082526922225952, "learning_rate": 1.9807720268998563e-05, "loss": 1.3267, "mean_token_accuracy": 0.6659936855236689, "num_tokens": 286679729.0, "step": 1712 }, { "entropy": 1.7247611383597057, "epoch": 0.18818488918184065, "grad_norm": 0.6447880268096924, "learning_rate": 1.980739093352004e-05, "loss": 1.3205, "mean_token_accuracy": 0.6574007123708725, "num_tokens": 286806107.0, "step": 1713 }, { "entropy": 1.683200756708781, "epoch": 0.1882947460932136, "grad_norm": 0.7221059203147888, "learning_rate": 1.9807061319291562e-05, "loss": 1.4732, "mean_token_accuracy": 0.6614238594969114, "num_tokens": 286932425.0, "step": 1714 }, { "entropy": 1.733568549156189, "epoch": 0.18840460300458653, "grad_norm": 0.8345680832862854, "learning_rate": 1.980673142632356e-05, "loss": 1.3026, "mean_token_accuracy": 0.6688729921976725, "num_tokens": 287051397.0, "step": 1715 }, { "entropy": 1.700208157300949, "epoch": 0.18851445991595947, "grad_norm": 0.7973415851593018, "learning_rate": 1.9806401254626483e-05, "loss": 1.3171, "mean_token_accuracy": 0.6919489403565725, "num_tokens": 287216101.0, "step": 1716 }, { "entropy": 1.7363602022329967, "epoch": 0.1886243168273324, "grad_norm": 0.798485517501831, "learning_rate": 1.9806070804210768e-05, "loss": 1.4979, "mean_token_accuracy": 0.6549982378880183, "num_tokens": 287384291.0, "step": 1717 }, { "entropy": 1.6927721202373505, "epoch": 0.18873417373870532, "grad_norm": 0.6318584084510803, "learning_rate": 1.9805740075086884e-05, "loss": 1.2625, "mean_token_accuracy": 0.6866245418787003, "num_tokens": 287521194.0, "step": 1718 }, { "entropy": 1.7616549928983052, "epoch": 0.18884403065007827, "grad_norm": 0.6952632069587708, "learning_rate": 1.980540906726529e-05, "loss": 1.5152, "mean_token_accuracy": 0.6423781365156174, "num_tokens": 287695130.0, "step": 1719 }, { "entropy": 1.7475587129592896, "epoch": 0.1889538875614512, "grad_norm": 0.8023023009300232, "learning_rate": 1.9805077780756473e-05, "loss": 1.2237, "mean_token_accuracy": 0.6784818867842356, "num_tokens": 287806319.0, "step": 1720 }, { "entropy": 1.7310992081960042, "epoch": 0.18906374447282415, "grad_norm": 0.7073454856872559, "learning_rate": 1.9804746215570908e-05, "loss": 1.4297, "mean_token_accuracy": 0.6600957165161768, "num_tokens": 288006487.0, "step": 1721 }, { "entropy": 1.6791634062925975, "epoch": 0.1891736013841971, "grad_norm": 0.7400916218757629, "learning_rate": 1.9804414371719096e-05, "loss": 1.2141, "mean_token_accuracy": 0.6767335186402003, "num_tokens": 288109036.0, "step": 1722 }, { "entropy": 1.7825380861759186, "epoch": 0.18928345829557003, "grad_norm": 0.786323070526123, "learning_rate": 1.9804082249211533e-05, "loss": 1.4554, "mean_token_accuracy": 0.6546699553728104, "num_tokens": 288234316.0, "step": 1723 }, { "entropy": 1.7622264524300892, "epoch": 0.18939331520694297, "grad_norm": 0.7630921602249146, "learning_rate": 1.9803749848058733e-05, "loss": 1.2852, "mean_token_accuracy": 0.6784159690141678, "num_tokens": 288358675.0, "step": 1724 }, { "entropy": 1.6970917185147603, "epoch": 0.18950317211831588, "grad_norm": 0.9257987141609192, "learning_rate": 1.980341716827122e-05, "loss": 1.3535, "mean_token_accuracy": 0.658329596122106, "num_tokens": 288525891.0, "step": 1725 }, { "entropy": 1.7701091667016347, "epoch": 0.18961302902968882, "grad_norm": 0.7394087910652161, "learning_rate": 1.980308420985952e-05, "loss": 1.3935, "mean_token_accuracy": 0.6555753747622172, "num_tokens": 288695061.0, "step": 1726 }, { "entropy": 1.7800631125768025, "epoch": 0.18972288594106176, "grad_norm": 0.8137099742889404, "learning_rate": 1.980275097283417e-05, "loss": 1.5755, "mean_token_accuracy": 0.6337922463814417, "num_tokens": 288879071.0, "step": 1727 }, { "entropy": 1.6966053247451782, "epoch": 0.1898327428524347, "grad_norm": 0.6805859804153442, "learning_rate": 1.980241745720572e-05, "loss": 1.56, "mean_token_accuracy": 0.6277511119842529, "num_tokens": 289092246.0, "step": 1728 }, { "entropy": 1.7272930939992268, "epoch": 0.18994259976380765, "grad_norm": 0.6892310976982117, "learning_rate": 1.9802083662984727e-05, "loss": 1.5014, "mean_token_accuracy": 0.645158996184667, "num_tokens": 289246411.0, "step": 1729 }, { "entropy": 1.718581090370814, "epoch": 0.1900524566751806, "grad_norm": 0.7332895994186401, "learning_rate": 1.9801749590181747e-05, "loss": 1.4741, "mean_token_accuracy": 0.6542643109957377, "num_tokens": 289450051.0, "step": 1730 }, { "entropy": 1.7375418742497761, "epoch": 0.19016231358655353, "grad_norm": 0.6508983969688416, "learning_rate": 1.980141523880736e-05, "loss": 1.4436, "mean_token_accuracy": 0.6606499453385671, "num_tokens": 289635594.0, "step": 1731 }, { "entropy": 1.749743362267812, "epoch": 0.19027217049792644, "grad_norm": 0.6622723937034607, "learning_rate": 1.980108060887215e-05, "loss": 1.3431, "mean_token_accuracy": 0.6567785541216532, "num_tokens": 289785489.0, "step": 1732 }, { "entropy": 1.7038879295190175, "epoch": 0.19038202740929938, "grad_norm": 0.7151694297790527, "learning_rate": 1.98007457003867e-05, "loss": 1.4463, "mean_token_accuracy": 0.6599321961402893, "num_tokens": 289955840.0, "step": 1733 }, { "entropy": 1.6714328130086262, "epoch": 0.19049188432067232, "grad_norm": 0.5718501210212708, "learning_rate": 1.980041051336162e-05, "loss": 1.3941, "mean_token_accuracy": 0.6486310015122095, "num_tokens": 290155918.0, "step": 1734 }, { "entropy": 1.7431990305582683, "epoch": 0.19060174123204526, "grad_norm": 0.7136338949203491, "learning_rate": 1.9800075047807507e-05, "loss": 1.4286, "mean_token_accuracy": 0.6443975865840912, "num_tokens": 290318477.0, "step": 1735 }, { "entropy": 1.7279701729615529, "epoch": 0.1907115981434182, "grad_norm": 0.8319575786590576, "learning_rate": 1.9799739303734986e-05, "loss": 1.3872, "mean_token_accuracy": 0.6527506609757742, "num_tokens": 290442722.0, "step": 1736 }, { "entropy": 1.6454756160577138, "epoch": 0.19082145505479114, "grad_norm": 0.6253258585929871, "learning_rate": 1.9799403281154684e-05, "loss": 1.2394, "mean_token_accuracy": 0.6801058252652487, "num_tokens": 290589905.0, "step": 1737 }, { "entropy": 1.7290644546349843, "epoch": 0.19093131196616406, "grad_norm": 0.6695640087127686, "learning_rate": 1.9799066980077227e-05, "loss": 1.4237, "mean_token_accuracy": 0.6563388605912527, "num_tokens": 290762031.0, "step": 1738 }, { "entropy": 1.7278599540392559, "epoch": 0.191041168877537, "grad_norm": 0.7530442476272583, "learning_rate": 1.979873040051327e-05, "loss": 1.5137, "mean_token_accuracy": 0.6513047764698664, "num_tokens": 290924823.0, "step": 1739 }, { "entropy": 1.79681396484375, "epoch": 0.19115102578890994, "grad_norm": 0.7993313074111938, "learning_rate": 1.9798393542473456e-05, "loss": 1.4511, "mean_token_accuracy": 0.6647708465655645, "num_tokens": 291080098.0, "step": 1740 }, { "entropy": 1.7253247797489166, "epoch": 0.19126088270028288, "grad_norm": 0.6776132583618164, "learning_rate": 1.9798056405968457e-05, "loss": 1.3921, "mean_token_accuracy": 0.6470450113217036, "num_tokens": 291206890.0, "step": 1741 }, { "entropy": 1.7353723645210266, "epoch": 0.19137073961165582, "grad_norm": 0.8562172055244446, "learning_rate": 1.9797718991008936e-05, "loss": 1.3435, "mean_token_accuracy": 0.6580035636822382, "num_tokens": 291343320.0, "step": 1742 }, { "entropy": 1.7603330214818318, "epoch": 0.19148059652302876, "grad_norm": 0.7309443950653076, "learning_rate": 1.979738129760557e-05, "loss": 1.5365, "mean_token_accuracy": 0.622960185011228, "num_tokens": 291550070.0, "step": 1743 }, { "entropy": 1.7653774221738179, "epoch": 0.1915904534344017, "grad_norm": 0.833625316619873, "learning_rate": 1.9797043325769056e-05, "loss": 1.3869, "mean_token_accuracy": 0.6533713638782501, "num_tokens": 291680108.0, "step": 1744 }, { "entropy": 1.715358128150304, "epoch": 0.19170031034577462, "grad_norm": 0.7196187973022461, "learning_rate": 1.979670507551008e-05, "loss": 1.4072, "mean_token_accuracy": 0.6509930094083151, "num_tokens": 291852862.0, "step": 1745 }, { "entropy": 1.7125201920668285, "epoch": 0.19181016725714756, "grad_norm": 0.6884719729423523, "learning_rate": 1.9796366546839354e-05, "loss": 1.3614, "mean_token_accuracy": 0.655213917295138, "num_tokens": 292033459.0, "step": 1746 }, { "entropy": 1.6897228856881459, "epoch": 0.1919200241685205, "grad_norm": 0.6630612015724182, "learning_rate": 1.9796027739767587e-05, "loss": 1.5917, "mean_token_accuracy": 0.6360281805197397, "num_tokens": 292222658.0, "step": 1747 }, { "entropy": 1.6932558019955952, "epoch": 0.19202988107989344, "grad_norm": 0.6871110200881958, "learning_rate": 1.979568865430551e-05, "loss": 1.3225, "mean_token_accuracy": 0.6635150760412216, "num_tokens": 292398658.0, "step": 1748 }, { "entropy": 1.7077955702940624, "epoch": 0.19213973799126638, "grad_norm": 0.6830503344535828, "learning_rate": 1.979534929046385e-05, "loss": 1.4689, "mean_token_accuracy": 0.6499410420656204, "num_tokens": 292556109.0, "step": 1749 }, { "entropy": 1.6917288800080617, "epoch": 0.19224959490263932, "grad_norm": 0.7428691983222961, "learning_rate": 1.9795009648253346e-05, "loss": 1.4188, "mean_token_accuracy": 0.6616235027710596, "num_tokens": 292701727.0, "step": 1750 }, { "entropy": 1.7118379374345143, "epoch": 0.19235945181401226, "grad_norm": 0.7000189423561096, "learning_rate": 1.979466972768475e-05, "loss": 1.399, "mean_token_accuracy": 0.6520606428384781, "num_tokens": 292871798.0, "step": 1751 }, { "entropy": 1.6876471141974132, "epoch": 0.19246930872538517, "grad_norm": 0.7701053619384766, "learning_rate": 1.9794329528768822e-05, "loss": 1.2992, "mean_token_accuracy": 0.665013869603475, "num_tokens": 293021064.0, "step": 1752 }, { "entropy": 1.7469976941744487, "epoch": 0.19257916563675812, "grad_norm": 0.6615880727767944, "learning_rate": 1.9793989051516327e-05, "loss": 1.4366, "mean_token_accuracy": 0.6483164032300314, "num_tokens": 293149174.0, "step": 1753 }, { "entropy": 1.7197850545247395, "epoch": 0.19268902254813106, "grad_norm": 0.714011549949646, "learning_rate": 1.979364829593804e-05, "loss": 1.4344, "mean_token_accuracy": 0.639839842915535, "num_tokens": 293300752.0, "step": 1754 }, { "entropy": 1.7304006119569142, "epoch": 0.192798879459504, "grad_norm": 0.7182620763778687, "learning_rate": 1.9793307262044748e-05, "loss": 1.4202, "mean_token_accuracy": 0.6542019993066788, "num_tokens": 293456002.0, "step": 1755 }, { "entropy": 1.7409031490484874, "epoch": 0.19290873637087694, "grad_norm": 0.6725859045982361, "learning_rate": 1.9792965949847242e-05, "loss": 1.3865, "mean_token_accuracy": 0.6448834588130316, "num_tokens": 293628935.0, "step": 1756 }, { "entropy": 1.6798570553461711, "epoch": 0.19301859328224988, "grad_norm": 0.7474890351295471, "learning_rate": 1.9792624359356326e-05, "loss": 1.2733, "mean_token_accuracy": 0.6787517368793488, "num_tokens": 293755756.0, "step": 1757 }, { "entropy": 1.7863287031650543, "epoch": 0.19312845019362282, "grad_norm": 0.8425063490867615, "learning_rate": 1.9792282490582812e-05, "loss": 1.4917, "mean_token_accuracy": 0.6436772296826044, "num_tokens": 293919313.0, "step": 1758 }, { "entropy": 1.6916013459364574, "epoch": 0.19323830710499573, "grad_norm": 0.7149840593338013, "learning_rate": 1.9791940343537517e-05, "loss": 1.4658, "mean_token_accuracy": 0.653274287780126, "num_tokens": 294076984.0, "step": 1759 }, { "entropy": 1.7235734164714813, "epoch": 0.19334816401636867, "grad_norm": 0.7532820701599121, "learning_rate": 1.9791597918231278e-05, "loss": 1.2797, "mean_token_accuracy": 0.6706577440102895, "num_tokens": 294259181.0, "step": 1760 }, { "entropy": 1.727324555317561, "epoch": 0.19345802092774161, "grad_norm": 0.711613655090332, "learning_rate": 1.9791255214674922e-05, "loss": 1.4411, "mean_token_accuracy": 0.6571811487277349, "num_tokens": 294419560.0, "step": 1761 }, { "entropy": 1.7226892411708832, "epoch": 0.19356787783911455, "grad_norm": 0.6647672653198242, "learning_rate": 1.97909122328793e-05, "loss": 1.5226, "mean_token_accuracy": 0.6560903539260229, "num_tokens": 294579172.0, "step": 1762 }, { "entropy": 1.719457467397054, "epoch": 0.1936777347504875, "grad_norm": 0.6270412802696228, "learning_rate": 1.9790568972855266e-05, "loss": 1.3127, "mean_token_accuracy": 0.6619095156590143, "num_tokens": 294728030.0, "step": 1763 }, { "entropy": 1.6665961543718975, "epoch": 0.19378759166186044, "grad_norm": 0.7340520620346069, "learning_rate": 1.9790225434613687e-05, "loss": 1.3513, "mean_token_accuracy": 0.681754027803739, "num_tokens": 294866688.0, "step": 1764 }, { "entropy": 1.8049577971299489, "epoch": 0.19389744857323335, "grad_norm": 0.7559868097305298, "learning_rate": 1.9789881618165434e-05, "loss": 1.3979, "mean_token_accuracy": 0.6505992064873377, "num_tokens": 295021152.0, "step": 1765 }, { "entropy": 1.6748623251914978, "epoch": 0.1940073054846063, "grad_norm": 0.5827370882034302, "learning_rate": 1.9789537523521387e-05, "loss": 1.3721, "mean_token_accuracy": 0.6543336113293966, "num_tokens": 295229519.0, "step": 1766 }, { "entropy": 1.7147534688313801, "epoch": 0.19411716239597923, "grad_norm": 6.890192031860352, "learning_rate": 1.9789193150692438e-05, "loss": 1.4899, "mean_token_accuracy": 0.6559451967477798, "num_tokens": 295384686.0, "step": 1767 }, { "entropy": 1.707640786965688, "epoch": 0.19422701930735217, "grad_norm": 0.6139290928840637, "learning_rate": 1.978884849968949e-05, "loss": 1.2937, "mean_token_accuracy": 0.6562622785568237, "num_tokens": 295563230.0, "step": 1768 }, { "entropy": 1.7182945013046265, "epoch": 0.1943368762187251, "grad_norm": 0.6669062972068787, "learning_rate": 1.9788503570523443e-05, "loss": 1.4615, "mean_token_accuracy": 0.6506092747052511, "num_tokens": 295726034.0, "step": 1769 }, { "entropy": 1.7365097900231679, "epoch": 0.19444673313009805, "grad_norm": 0.7182427048683167, "learning_rate": 1.978815836320522e-05, "loss": 1.4676, "mean_token_accuracy": 0.651889423529307, "num_tokens": 295918492.0, "step": 1770 }, { "entropy": 1.7327903906504314, "epoch": 0.194556590041471, "grad_norm": 0.6817887425422668, "learning_rate": 1.9787812877745745e-05, "loss": 1.3728, "mean_token_accuracy": 0.6570483843485514, "num_tokens": 296085849.0, "step": 1771 }, { "entropy": 1.693399171034495, "epoch": 0.1946664469528439, "grad_norm": 0.715168833732605, "learning_rate": 1.978746711415595e-05, "loss": 1.3975, "mean_token_accuracy": 0.6593023041884104, "num_tokens": 296266624.0, "step": 1772 }, { "entropy": 1.7591918508211772, "epoch": 0.19477630386421685, "grad_norm": 0.7243099808692932, "learning_rate": 1.9787121072446785e-05, "loss": 1.4259, "mean_token_accuracy": 0.6468930890162786, "num_tokens": 296396972.0, "step": 1773 }, { "entropy": 1.6918930610020955, "epoch": 0.1948861607755898, "grad_norm": 0.7280333042144775, "learning_rate": 1.9786774752629195e-05, "loss": 1.3678, "mean_token_accuracy": 0.6707959572474161, "num_tokens": 296548243.0, "step": 1774 }, { "entropy": 1.7696085572242737, "epoch": 0.19499601768696273, "grad_norm": 0.5832480788230896, "learning_rate": 1.9786428154714143e-05, "loss": 1.5862, "mean_token_accuracy": 0.6292986472447714, "num_tokens": 296747351.0, "step": 1775 }, { "entropy": 1.7673150698343914, "epoch": 0.19510587459833567, "grad_norm": 0.6457276940345764, "learning_rate": 1.9786081278712598e-05, "loss": 1.4639, "mean_token_accuracy": 0.6444855431715647, "num_tokens": 296935015.0, "step": 1776 }, { "entropy": 1.6158926784992218, "epoch": 0.1952157315097086, "grad_norm": 0.5862371921539307, "learning_rate": 1.9785734124635544e-05, "loss": 1.3359, "mean_token_accuracy": 0.6693507929642996, "num_tokens": 297145524.0, "step": 1777 }, { "entropy": 1.7320951322714488, "epoch": 0.19532558842108155, "grad_norm": 0.7013179659843445, "learning_rate": 1.978538669249396e-05, "loss": 1.4895, "mean_token_accuracy": 0.6407797584931055, "num_tokens": 297288935.0, "step": 1778 }, { "entropy": 1.7831117709477742, "epoch": 0.19543544533245447, "grad_norm": 0.7159624099731445, "learning_rate": 1.978503898229885e-05, "loss": 1.4075, "mean_token_accuracy": 0.6433553198973337, "num_tokens": 297459113.0, "step": 1779 }, { "entropy": 1.6679442922274272, "epoch": 0.1955453022438274, "grad_norm": 0.725671112537384, "learning_rate": 1.978469099406121e-05, "loss": 1.3892, "mean_token_accuracy": 0.655972421169281, "num_tokens": 297624207.0, "step": 1780 }, { "entropy": 1.7021668950716655, "epoch": 0.19565515915520035, "grad_norm": 0.6634953022003174, "learning_rate": 1.978434272779206e-05, "loss": 1.3119, "mean_token_accuracy": 0.6691978275775909, "num_tokens": 297786571.0, "step": 1781 }, { "entropy": 1.7528183460235596, "epoch": 0.1957650160665733, "grad_norm": 0.7583564519882202, "learning_rate": 1.9783994183502423e-05, "loss": 1.5388, "mean_token_accuracy": 0.626950333515803, "num_tokens": 298018826.0, "step": 1782 }, { "entropy": 1.6614128748575847, "epoch": 0.19587487297794623, "grad_norm": 0.5320108532905579, "learning_rate": 1.9783645361203324e-05, "loss": 1.3674, "mean_token_accuracy": 0.6552481253941854, "num_tokens": 298245649.0, "step": 1783 }, { "entropy": 1.770830233891805, "epoch": 0.19598472988931917, "grad_norm": 0.6707810163497925, "learning_rate": 1.9783296260905812e-05, "loss": 1.3718, "mean_token_accuracy": 0.6541836063067118, "num_tokens": 298382824.0, "step": 1784 }, { "entropy": 1.759553889433543, "epoch": 0.1960945868006921, "grad_norm": 0.7541280388832092, "learning_rate": 1.978294688262093e-05, "loss": 1.2791, "mean_token_accuracy": 0.6692831069231033, "num_tokens": 298517297.0, "step": 1785 }, { "entropy": 1.7564668953418732, "epoch": 0.19620444371206502, "grad_norm": 0.6617575287818909, "learning_rate": 1.9782597226359737e-05, "loss": 1.4138, "mean_token_accuracy": 0.6551504383484522, "num_tokens": 298666505.0, "step": 1786 }, { "entropy": 1.760979433854421, "epoch": 0.19631430062343797, "grad_norm": 0.8420041799545288, "learning_rate": 1.97822472921333e-05, "loss": 1.2352, "mean_token_accuracy": 0.6695485363403956, "num_tokens": 298824770.0, "step": 1787 }, { "entropy": 1.7239550054073334, "epoch": 0.1964241575348109, "grad_norm": 0.7422584295272827, "learning_rate": 1.9781897079952693e-05, "loss": 1.4415, "mean_token_accuracy": 0.6553243845701218, "num_tokens": 298993460.0, "step": 1788 }, { "entropy": 1.7003070811430614, "epoch": 0.19653401444618385, "grad_norm": 0.6796644926071167, "learning_rate": 1.9781546589828993e-05, "loss": 1.4076, "mean_token_accuracy": 0.6579789767662684, "num_tokens": 299145182.0, "step": 1789 }, { "entropy": 1.7247399985790253, "epoch": 0.1966438713575568, "grad_norm": 0.7494299411773682, "learning_rate": 1.9781195821773313e-05, "loss": 1.2761, "mean_token_accuracy": 0.6815857142210007, "num_tokens": 299274678.0, "step": 1790 }, { "entropy": 1.7042510112126668, "epoch": 0.19675372826892973, "grad_norm": 0.6861709952354431, "learning_rate": 1.9780844775796733e-05, "loss": 1.3178, "mean_token_accuracy": 0.6655194312334061, "num_tokens": 299407876.0, "step": 1791 }, { "entropy": 1.6969355642795563, "epoch": 0.19686358518030267, "grad_norm": 0.7215892672538757, "learning_rate": 1.978049345191038e-05, "loss": 1.3921, "mean_token_accuracy": 0.6600968192021052, "num_tokens": 299560247.0, "step": 1792 }, { "entropy": 1.7007923424243927, "epoch": 0.19697344209167558, "grad_norm": 0.5989768505096436, "learning_rate": 1.9780141850125362e-05, "loss": 1.3689, "mean_token_accuracy": 0.6636965026458105, "num_tokens": 299788837.0, "step": 1793 }, { "entropy": 1.672113170226415, "epoch": 0.19708329900304852, "grad_norm": 0.751150906085968, "learning_rate": 1.977978997045281e-05, "loss": 1.3912, "mean_token_accuracy": 0.6588180909554163, "num_tokens": 299966636.0, "step": 1794 }, { "entropy": 1.6974561214447021, "epoch": 0.19719315591442146, "grad_norm": 0.6662459969520569, "learning_rate": 1.9779437812903862e-05, "loss": 1.3124, "mean_token_accuracy": 0.6676592777172724, "num_tokens": 300121520.0, "step": 1795 }, { "entropy": 1.7895707885424297, "epoch": 0.1973030128257944, "grad_norm": 0.6926854252815247, "learning_rate": 1.9779085377489663e-05, "loss": 1.4074, "mean_token_accuracy": 0.6511611789464951, "num_tokens": 300249227.0, "step": 1796 }, { "entropy": 1.6819771826267242, "epoch": 0.19741286973716735, "grad_norm": 3.198624849319458, "learning_rate": 1.977873266422137e-05, "loss": 1.1345, "mean_token_accuracy": 0.6746688683827718, "num_tokens": 300417964.0, "step": 1797 }, { "entropy": 1.7427258292833965, "epoch": 0.1975227266485403, "grad_norm": 0.7579461336135864, "learning_rate": 1.977837967311014e-05, "loss": 1.3167, "mean_token_accuracy": 0.6665651847918829, "num_tokens": 300538034.0, "step": 1798 }, { "entropy": 1.7113410731156666, "epoch": 0.1976325835599132, "grad_norm": 0.7278785705566406, "learning_rate": 1.977802640416715e-05, "loss": 1.3031, "mean_token_accuracy": 0.6744206001361212, "num_tokens": 300652123.0, "step": 1799 }, { "entropy": 1.7057754894097645, "epoch": 0.19774244047128614, "grad_norm": 0.6383393406867981, "learning_rate": 1.9777672857403584e-05, "loss": 1.5941, "mean_token_accuracy": 0.6359094778696696, "num_tokens": 300918339.0, "step": 1800 }, { "entropy": 1.796320726474126, "epoch": 0.19785229738265908, "grad_norm": 0.8345091342926025, "learning_rate": 1.9777319032830624e-05, "loss": 1.433, "mean_token_accuracy": 0.6534850498040518, "num_tokens": 301084973.0, "step": 1801 }, { "entropy": 1.7382001678148906, "epoch": 0.19796215429403202, "grad_norm": 0.8948132395744324, "learning_rate": 1.9776964930459474e-05, "loss": 1.3021, "mean_token_accuracy": 0.6653626014788946, "num_tokens": 301208519.0, "step": 1802 }, { "entropy": 1.7888563871383667, "epoch": 0.19807201120540496, "grad_norm": 0.6303662061691284, "learning_rate": 1.9776610550301338e-05, "loss": 1.4763, "mean_token_accuracy": 0.6427715172370275, "num_tokens": 301351718.0, "step": 1803 }, { "entropy": 1.6515491306781769, "epoch": 0.1981818681167779, "grad_norm": 0.7126901745796204, "learning_rate": 1.977625589236743e-05, "loss": 1.2714, "mean_token_accuracy": 0.6703294515609741, "num_tokens": 301507032.0, "step": 1804 }, { "entropy": 1.7583887577056885, "epoch": 0.19829172502815084, "grad_norm": 0.68550044298172, "learning_rate": 1.977590095666898e-05, "loss": 1.39, "mean_token_accuracy": 0.6463887542486191, "num_tokens": 301646202.0, "step": 1805 }, { "entropy": 1.68940003712972, "epoch": 0.19840158193952376, "grad_norm": 0.6771380305290222, "learning_rate": 1.977554574321722e-05, "loss": 1.482, "mean_token_accuracy": 0.6526958445707957, "num_tokens": 301804123.0, "step": 1806 }, { "entropy": 1.7353551983833313, "epoch": 0.1985114388508967, "grad_norm": 0.7520933747291565, "learning_rate": 1.977519025202339e-05, "loss": 1.5235, "mean_token_accuracy": 0.6432921489079794, "num_tokens": 301974575.0, "step": 1807 }, { "entropy": 1.7340351243813832, "epoch": 0.19862129576226964, "grad_norm": 0.6563875675201416, "learning_rate": 1.9774834483098745e-05, "loss": 1.3366, "mean_token_accuracy": 0.6615680456161499, "num_tokens": 302144787.0, "step": 1808 }, { "entropy": 1.6780883272488911, "epoch": 0.19873115267364258, "grad_norm": 0.6213021278381348, "learning_rate": 1.977447843645454e-05, "loss": 1.3939, "mean_token_accuracy": 0.6571485896905264, "num_tokens": 302332010.0, "step": 1809 }, { "entropy": 1.7402231593926747, "epoch": 0.19884100958501552, "grad_norm": 0.7091407775878906, "learning_rate": 1.9774122112102047e-05, "loss": 1.4103, "mean_token_accuracy": 0.6477878441413244, "num_tokens": 302472018.0, "step": 1810 }, { "entropy": 1.6822114984194438, "epoch": 0.19895086649638846, "grad_norm": 0.7527471780776978, "learning_rate": 1.9773765510052546e-05, "loss": 1.5214, "mean_token_accuracy": 0.6414872830112776, "num_tokens": 302690599.0, "step": 1811 }, { "entropy": 1.7382448216279347, "epoch": 0.1990607234077614, "grad_norm": 0.8488646149635315, "learning_rate": 1.9773408630317316e-05, "loss": 1.4812, "mean_token_accuracy": 0.6391682376464208, "num_tokens": 302884159.0, "step": 1812 }, { "entropy": 1.7655917604764302, "epoch": 0.19917058031913432, "grad_norm": 0.8058510422706604, "learning_rate": 1.9773051472907657e-05, "loss": 1.558, "mean_token_accuracy": 0.6351256171862284, "num_tokens": 303045104.0, "step": 1813 }, { "entropy": 1.7527148922284443, "epoch": 0.19928043723050726, "grad_norm": 0.7928998470306396, "learning_rate": 1.9772694037834873e-05, "loss": 1.5035, "mean_token_accuracy": 0.64784603814284, "num_tokens": 303195849.0, "step": 1814 }, { "entropy": 1.7500303983688354, "epoch": 0.1993902941418802, "grad_norm": 0.6854771375656128, "learning_rate": 1.977233632511028e-05, "loss": 1.3672, "mean_token_accuracy": 0.6633496433496475, "num_tokens": 303362204.0, "step": 1815 }, { "entropy": 1.7209295729796092, "epoch": 0.19950015105325314, "grad_norm": 0.7351916432380676, "learning_rate": 1.9771978334745184e-05, "loss": 1.4907, "mean_token_accuracy": 0.6455176422993342, "num_tokens": 303576314.0, "step": 1816 }, { "entropy": 1.7865357597668965, "epoch": 0.19961000796462608, "grad_norm": 0.79336017370224, "learning_rate": 1.9771620066750937e-05, "loss": 1.4667, "mean_token_accuracy": 0.6478342215220133, "num_tokens": 303792834.0, "step": 1817 }, { "entropy": 1.6429633895556133, "epoch": 0.19971986487599902, "grad_norm": 0.6706152558326721, "learning_rate": 1.9771261521138862e-05, "loss": 1.2351, "mean_token_accuracy": 0.6809234966834387, "num_tokens": 303906864.0, "step": 1818 }, { "entropy": 1.8072155614693959, "epoch": 0.19982972178737196, "grad_norm": 0.8159286975860596, "learning_rate": 1.9770902697920315e-05, "loss": 1.5045, "mean_token_accuracy": 0.6369387259085973, "num_tokens": 304084742.0, "step": 1819 }, { "entropy": 1.7179032564163208, "epoch": 0.19993957869874487, "grad_norm": 0.770172655582428, "learning_rate": 1.977054359710665e-05, "loss": 1.2617, "mean_token_accuracy": 0.6699782609939575, "num_tokens": 304223214.0, "step": 1820 }, { "entropy": 1.712960034608841, "epoch": 0.20004943561011782, "grad_norm": 0.9042562246322632, "learning_rate": 1.977018421870923e-05, "loss": 1.3977, "mean_token_accuracy": 0.6594905803600947, "num_tokens": 304405961.0, "step": 1821 }, { "entropy": 1.6442652344703674, "epoch": 0.20015929252149076, "grad_norm": 0.609815776348114, "learning_rate": 1.976982456273943e-05, "loss": 1.3585, "mean_token_accuracy": 0.6593890488147736, "num_tokens": 304574118.0, "step": 1822 }, { "entropy": 1.6892712612946827, "epoch": 0.2002691494328637, "grad_norm": 0.7102997303009033, "learning_rate": 1.9769464629208643e-05, "loss": 1.2489, "mean_token_accuracy": 0.6777701675891876, "num_tokens": 304742861.0, "step": 1823 }, { "entropy": 1.6933867732683818, "epoch": 0.20037900634423664, "grad_norm": 0.596263587474823, "learning_rate": 1.976910441812824e-05, "loss": 1.5056, "mean_token_accuracy": 0.6288742969433466, "num_tokens": 305020688.0, "step": 1824 }, { "entropy": 1.7255509893099468, "epoch": 0.20048886325560958, "grad_norm": 0.730722188949585, "learning_rate": 1.9768743929509643e-05, "loss": 1.425, "mean_token_accuracy": 0.6644620796044668, "num_tokens": 305151455.0, "step": 1825 }, { "entropy": 1.7349488337834675, "epoch": 0.2005987201669825, "grad_norm": 0.6531474590301514, "learning_rate": 1.9768383163364248e-05, "loss": 1.6278, "mean_token_accuracy": 0.6154864877462387, "num_tokens": 305348902.0, "step": 1826 }, { "entropy": 1.7766931653022766, "epoch": 0.20070857707835543, "grad_norm": 0.7823039293289185, "learning_rate": 1.9768022119703477e-05, "loss": 1.3906, "mean_token_accuracy": 0.6641748547554016, "num_tokens": 305467608.0, "step": 1827 }, { "entropy": 1.7294628620147705, "epoch": 0.20081843398972837, "grad_norm": 0.600829541683197, "learning_rate": 1.9767660798538757e-05, "loss": 1.3125, "mean_token_accuracy": 0.6706470201412836, "num_tokens": 305626175.0, "step": 1828 }, { "entropy": 1.7608105738957722, "epoch": 0.20092829090110131, "grad_norm": 0.6462728977203369, "learning_rate": 1.9767299199881524e-05, "loss": 1.4496, "mean_token_accuracy": 0.6452651371558508, "num_tokens": 305863805.0, "step": 1829 }, { "entropy": 1.7059124012788136, "epoch": 0.20103814781247425, "grad_norm": 0.5904053449630737, "learning_rate": 1.9766937323743226e-05, "loss": 1.2778, "mean_token_accuracy": 0.6797713836034139, "num_tokens": 306001168.0, "step": 1830 }, { "entropy": 1.6253532270590465, "epoch": 0.2011480047238472, "grad_norm": 0.7917356491088867, "learning_rate": 1.976657517013531e-05, "loss": 1.2863, "mean_token_accuracy": 0.6701359699169794, "num_tokens": 306199106.0, "step": 1831 }, { "entropy": 1.7297336757183075, "epoch": 0.20125786163522014, "grad_norm": 0.7113462686538696, "learning_rate": 1.9766212739069233e-05, "loss": 1.4775, "mean_token_accuracy": 0.6459426383177439, "num_tokens": 306362651.0, "step": 1832 }, { "entropy": 1.7070247530937195, "epoch": 0.20136771854659305, "grad_norm": 0.6649527549743652, "learning_rate": 1.976585003055648e-05, "loss": 1.4208, "mean_token_accuracy": 0.6574066330989202, "num_tokens": 306544467.0, "step": 1833 }, { "entropy": 1.7177915970484416, "epoch": 0.201477575457966, "grad_norm": 0.742743730545044, "learning_rate": 1.976548704460852e-05, "loss": 1.2361, "mean_token_accuracy": 0.6762651801109314, "num_tokens": 306659110.0, "step": 1834 }, { "entropy": 1.706160436073939, "epoch": 0.20158743236933893, "grad_norm": 0.907835841178894, "learning_rate": 1.976512378123685e-05, "loss": 1.3505, "mean_token_accuracy": 0.6621433893839518, "num_tokens": 306810908.0, "step": 1835 }, { "entropy": 1.700301080942154, "epoch": 0.20169728928071187, "grad_norm": 0.7097511291503906, "learning_rate": 1.9764760240452957e-05, "loss": 1.4589, "mean_token_accuracy": 0.6501483097672462, "num_tokens": 306986279.0, "step": 1836 }, { "entropy": 1.7191430628299713, "epoch": 0.2018071461920848, "grad_norm": 0.607349157333374, "learning_rate": 1.9764396422268356e-05, "loss": 1.3485, "mean_token_accuracy": 0.662805438041687, "num_tokens": 307116996.0, "step": 1837 }, { "entropy": 1.7494115829467773, "epoch": 0.20191700310345775, "grad_norm": 0.7002793550491333, "learning_rate": 1.976403232669455e-05, "loss": 1.5679, "mean_token_accuracy": 0.6226314206918081, "num_tokens": 307295127.0, "step": 1838 }, { "entropy": 1.7843216558297474, "epoch": 0.2020268600148307, "grad_norm": 0.6927679777145386, "learning_rate": 1.9763667953743078e-05, "loss": 1.4433, "mean_token_accuracy": 0.650288388133049, "num_tokens": 307474847.0, "step": 1839 }, { "entropy": 1.7510626216729481, "epoch": 0.2021367169262036, "grad_norm": 0.652125895023346, "learning_rate": 1.9763303303425463e-05, "loss": 1.3464, "mean_token_accuracy": 0.6607447812954584, "num_tokens": 307652843.0, "step": 1840 }, { "entropy": 1.7755654851595561, "epoch": 0.20224657383757655, "grad_norm": 0.9156058430671692, "learning_rate": 1.9762938375753245e-05, "loss": 1.3372, "mean_token_accuracy": 0.6591128408908844, "num_tokens": 307769467.0, "step": 1841 }, { "entropy": 1.7441391746203105, "epoch": 0.2023564307489495, "grad_norm": 0.7158201932907104, "learning_rate": 1.976257317073798e-05, "loss": 1.366, "mean_token_accuracy": 0.6583238691091537, "num_tokens": 307913812.0, "step": 1842 }, { "entropy": 1.7281207144260406, "epoch": 0.20246628766032243, "grad_norm": 0.5468607544898987, "learning_rate": 1.9762207688391216e-05, "loss": 1.4142, "mean_token_accuracy": 0.6494698971509933, "num_tokens": 308126776.0, "step": 1843 }, { "entropy": 1.7128788232803345, "epoch": 0.20257614457169537, "grad_norm": 0.6986768841743469, "learning_rate": 1.976184192872453e-05, "loss": 1.3821, "mean_token_accuracy": 0.650952065984408, "num_tokens": 308296663.0, "step": 1844 }, { "entropy": 1.7056085566679637, "epoch": 0.2026860014830683, "grad_norm": 0.6520532369613647, "learning_rate": 1.9761475891749496e-05, "loss": 1.3298, "mean_token_accuracy": 0.6659070352713267, "num_tokens": 308484182.0, "step": 1845 }, { "entropy": 1.7646136184533436, "epoch": 0.20279585839444125, "grad_norm": 0.6946887373924255, "learning_rate": 1.9761109577477696e-05, "loss": 1.5495, "mean_token_accuracy": 0.6431872049967448, "num_tokens": 308660541.0, "step": 1846 }, { "entropy": 1.716064860423406, "epoch": 0.20290571530581417, "grad_norm": 0.8077802062034607, "learning_rate": 1.9760742985920726e-05, "loss": 1.5907, "mean_token_accuracy": 0.6451023171345392, "num_tokens": 308825957.0, "step": 1847 }, { "entropy": 1.7179016371568043, "epoch": 0.2030155722171871, "grad_norm": 0.7027926445007324, "learning_rate": 1.976037611709019e-05, "loss": 1.4141, "mean_token_accuracy": 0.6579538484414419, "num_tokens": 308979266.0, "step": 1848 }, { "entropy": 1.724165548880895, "epoch": 0.20312542912856005, "grad_norm": 0.5816169381141663, "learning_rate": 1.9760008970997702e-05, "loss": 1.3984, "mean_token_accuracy": 0.6541108936071396, "num_tokens": 309144227.0, "step": 1849 }, { "entropy": 1.7344149947166443, "epoch": 0.203235286039933, "grad_norm": 0.7658873796463013, "learning_rate": 1.975964154765487e-05, "loss": 1.5109, "mean_token_accuracy": 0.6461022893587748, "num_tokens": 309307786.0, "step": 1850 }, { "entropy": 1.7166369756062825, "epoch": 0.20334514295130593, "grad_norm": 0.6772670149803162, "learning_rate": 1.975927384707333e-05, "loss": 1.4384, "mean_token_accuracy": 0.6566036691268285, "num_tokens": 309459208.0, "step": 1851 }, { "entropy": 1.6875809729099274, "epoch": 0.20345499986267887, "grad_norm": 0.7443994283676147, "learning_rate": 1.9758905869264725e-05, "loss": 1.5135, "mean_token_accuracy": 0.6605640351772308, "num_tokens": 309607725.0, "step": 1852 }, { "entropy": 1.775184839963913, "epoch": 0.2035648567740518, "grad_norm": 0.7913832664489746, "learning_rate": 1.9758537614240692e-05, "loss": 1.3856, "mean_token_accuracy": 0.6432444254557291, "num_tokens": 309776782.0, "step": 1853 }, { "entropy": 1.7546232839425404, "epoch": 0.20367471368542472, "grad_norm": 0.7907248139381409, "learning_rate": 1.9758169082012893e-05, "loss": 1.3165, "mean_token_accuracy": 0.6588219453891119, "num_tokens": 309925121.0, "step": 1854 }, { "entropy": 1.7209657728672028, "epoch": 0.20378457059679767, "grad_norm": 0.770335853099823, "learning_rate": 1.975780027259299e-05, "loss": 1.4543, "mean_token_accuracy": 0.6663737197717031, "num_tokens": 310094118.0, "step": 1855 }, { "entropy": 1.6761006116867065, "epoch": 0.2038944275081706, "grad_norm": 0.7291706204414368, "learning_rate": 1.975743118599265e-05, "loss": 1.322, "mean_token_accuracy": 0.672456423441569, "num_tokens": 310234093.0, "step": 1856 }, { "entropy": 1.7500018080075581, "epoch": 0.20400428441954355, "grad_norm": 0.7051176428794861, "learning_rate": 1.975706182222356e-05, "loss": 1.4993, "mean_token_accuracy": 0.6471735189358393, "num_tokens": 310438381.0, "step": 1857 }, { "entropy": 1.7876022458076477, "epoch": 0.2041141413309165, "grad_norm": 0.7112467288970947, "learning_rate": 1.9756692181297412e-05, "loss": 1.5468, "mean_token_accuracy": 0.6419008473555247, "num_tokens": 310624466.0, "step": 1858 }, { "entropy": 1.7213526765505474, "epoch": 0.20422399824228943, "grad_norm": 0.6644498705863953, "learning_rate": 1.9756322263225903e-05, "loss": 1.3655, "mean_token_accuracy": 0.6711994310220083, "num_tokens": 310774375.0, "step": 1859 }, { "entropy": 1.6958336333433788, "epoch": 0.20433385515366234, "grad_norm": 0.6671877503395081, "learning_rate": 1.9755952068020737e-05, "loss": 1.3116, "mean_token_accuracy": 0.6657513082027435, "num_tokens": 310940187.0, "step": 1860 }, { "entropy": 1.6833363076051076, "epoch": 0.20444371206503528, "grad_norm": 0.7353310585021973, "learning_rate": 1.9755581595693636e-05, "loss": 1.4442, "mean_token_accuracy": 0.6834416141112646, "num_tokens": 311079868.0, "step": 1861 }, { "entropy": 1.7643102804819744, "epoch": 0.20455356897640822, "grad_norm": 0.7783114910125732, "learning_rate": 1.975521084625632e-05, "loss": 1.3208, "mean_token_accuracy": 0.6558940261602402, "num_tokens": 311220945.0, "step": 1862 }, { "entropy": 1.7230792840321858, "epoch": 0.20466342588778116, "grad_norm": 0.6682336926460266, "learning_rate": 1.975483981972053e-05, "loss": 1.426, "mean_token_accuracy": 0.6528936872879664, "num_tokens": 311384579.0, "step": 1863 }, { "entropy": 1.7676608264446259, "epoch": 0.2047732827991541, "grad_norm": 0.7327151298522949, "learning_rate": 1.9754468516098003e-05, "loss": 1.3778, "mean_token_accuracy": 0.6655658980210623, "num_tokens": 311529478.0, "step": 1864 }, { "entropy": 1.773693968852361, "epoch": 0.20488313971052705, "grad_norm": 0.8596065044403076, "learning_rate": 1.975409693540049e-05, "loss": 1.2687, "mean_token_accuracy": 0.6674908697605133, "num_tokens": 311648679.0, "step": 1865 }, { "entropy": 1.682075430949529, "epoch": 0.2049929966219, "grad_norm": 0.7088510990142822, "learning_rate": 1.9753725077639757e-05, "loss": 1.3837, "mean_token_accuracy": 0.6538609862327576, "num_tokens": 311807028.0, "step": 1866 }, { "entropy": 1.7813390990098317, "epoch": 0.2051028535332729, "grad_norm": 0.7097972631454468, "learning_rate": 1.9753352942827568e-05, "loss": 1.6051, "mean_token_accuracy": 0.6367628425359726, "num_tokens": 312009929.0, "step": 1867 }, { "entropy": 1.7026053667068481, "epoch": 0.20521271044464584, "grad_norm": 0.7793052196502686, "learning_rate": 1.9752980530975702e-05, "loss": 1.3475, "mean_token_accuracy": 0.6695610036452612, "num_tokens": 312142042.0, "step": 1868 }, { "entropy": 1.7217031021912892, "epoch": 0.20532256735601878, "grad_norm": 0.6053064465522766, "learning_rate": 1.975260784209595e-05, "loss": 1.4688, "mean_token_accuracy": 0.6375043392181396, "num_tokens": 312356696.0, "step": 1869 }, { "entropy": 1.7617976367473602, "epoch": 0.20543242426739172, "grad_norm": 1.0582380294799805, "learning_rate": 1.9752234876200097e-05, "loss": 1.5011, "mean_token_accuracy": 0.6502614468336105, "num_tokens": 312534643.0, "step": 1870 }, { "entropy": 1.6304903427759807, "epoch": 0.20554228117876466, "grad_norm": 0.5571168065071106, "learning_rate": 1.975186163329996e-05, "loss": 1.2778, "mean_token_accuracy": 0.6664902319510778, "num_tokens": 312700073.0, "step": 1871 }, { "entropy": 1.6912108063697815, "epoch": 0.2056521380901376, "grad_norm": 0.6295304894447327, "learning_rate": 1.9751488113407343e-05, "loss": 1.4227, "mean_token_accuracy": 0.6689607550700506, "num_tokens": 312847666.0, "step": 1872 }, { "entropy": 1.7157114446163177, "epoch": 0.20576199500151054, "grad_norm": 0.8937650918960571, "learning_rate": 1.975111431653407e-05, "loss": 1.5174, "mean_token_accuracy": 0.6386149227619171, "num_tokens": 313025199.0, "step": 1873 }, { "entropy": 1.7388854622840881, "epoch": 0.20587185191288346, "grad_norm": 0.5921317934989929, "learning_rate": 1.9750740242691978e-05, "loss": 1.5385, "mean_token_accuracy": 0.6445744981368383, "num_tokens": 313268338.0, "step": 1874 }, { "entropy": 1.6713208258152008, "epoch": 0.2059817088242564, "grad_norm": 0.7296470403671265, "learning_rate": 1.9750365891892894e-05, "loss": 1.4052, "mean_token_accuracy": 0.6561514039834341, "num_tokens": 313477471.0, "step": 1875 }, { "entropy": 1.6889991958936055, "epoch": 0.20609156573562934, "grad_norm": 0.591461181640625, "learning_rate": 1.9749991264148676e-05, "loss": 1.3788, "mean_token_accuracy": 0.6569076975186666, "num_tokens": 313703842.0, "step": 1876 }, { "entropy": 1.710012932618459, "epoch": 0.20620142264700228, "grad_norm": 0.7003285884857178, "learning_rate": 1.9749616359471176e-05, "loss": 1.2286, "mean_token_accuracy": 0.6813757121562958, "num_tokens": 313828662.0, "step": 1877 }, { "entropy": 1.7340795695781708, "epoch": 0.20631127955837522, "grad_norm": 0.7556050419807434, "learning_rate": 1.974924117787226e-05, "loss": 1.3818, "mean_token_accuracy": 0.6705377250909805, "num_tokens": 313992143.0, "step": 1878 }, { "entropy": 1.6594361861546834, "epoch": 0.20642113646974816, "grad_norm": 0.67914217710495, "learning_rate": 1.974886571936381e-05, "loss": 1.4848, "mean_token_accuracy": 0.655074879527092, "num_tokens": 314195437.0, "step": 1879 }, { "entropy": 1.7556609710057576, "epoch": 0.2065309933811211, "grad_norm": 0.7271071672439575, "learning_rate": 1.9748489983957692e-05, "loss": 1.4298, "mean_token_accuracy": 0.641119142373403, "num_tokens": 314352057.0, "step": 1880 }, { "entropy": 1.747815767923991, "epoch": 0.20664085029249402, "grad_norm": 0.6552515625953674, "learning_rate": 1.9748113971665816e-05, "loss": 1.3886, "mean_token_accuracy": 0.6543693294127783, "num_tokens": 314532136.0, "step": 1881 }, { "entropy": 1.723093291123708, "epoch": 0.20675070720386696, "grad_norm": 0.6786137223243713, "learning_rate": 1.9747737682500072e-05, "loss": 1.5003, "mean_token_accuracy": 0.6459396133820215, "num_tokens": 314696517.0, "step": 1882 }, { "entropy": 1.7882917523384094, "epoch": 0.2068605641152399, "grad_norm": 0.7087535262107849, "learning_rate": 1.9747361116472373e-05, "loss": 1.4855, "mean_token_accuracy": 0.6424766977628072, "num_tokens": 314906715.0, "step": 1883 }, { "entropy": 1.7414319415887196, "epoch": 0.20697042102661284, "grad_norm": 0.8721282482147217, "learning_rate": 1.9746984273594632e-05, "loss": 1.4097, "mean_token_accuracy": 0.654596209526062, "num_tokens": 315155191.0, "step": 1884 }, { "entropy": 1.7770436803499858, "epoch": 0.20708027793798578, "grad_norm": 0.7305892705917358, "learning_rate": 1.9746607153878786e-05, "loss": 1.4086, "mean_token_accuracy": 0.6574834038813909, "num_tokens": 315321135.0, "step": 1885 }, { "entropy": 1.7179120083649952, "epoch": 0.20719013484935872, "grad_norm": 0.7217333912849426, "learning_rate": 1.9746229757336763e-05, "loss": 1.4068, "mean_token_accuracy": 0.6624855548143387, "num_tokens": 315492089.0, "step": 1886 }, { "entropy": 1.7504285176595051, "epoch": 0.20729999176073163, "grad_norm": 0.6315979957580566, "learning_rate": 1.9745852083980507e-05, "loss": 1.5327, "mean_token_accuracy": 0.6477139939864477, "num_tokens": 315664077.0, "step": 1887 }, { "entropy": 1.6375745435555775, "epoch": 0.20740984867210457, "grad_norm": 0.7350934147834778, "learning_rate": 1.9745474133821978e-05, "loss": 1.379, "mean_token_accuracy": 0.6684954961140951, "num_tokens": 315862163.0, "step": 1888 }, { "entropy": 1.6826703051726024, "epoch": 0.20751970558347752, "grad_norm": 0.6202029585838318, "learning_rate": 1.974509590687313e-05, "loss": 1.3122, "mean_token_accuracy": 0.6682248959938685, "num_tokens": 316025052.0, "step": 1889 }, { "entropy": 1.724346548318863, "epoch": 0.20762956249485046, "grad_norm": 0.7694988250732422, "learning_rate": 1.9744717403145935e-05, "loss": 1.3031, "mean_token_accuracy": 0.6729477494955063, "num_tokens": 316128499.0, "step": 1890 }, { "entropy": 1.7621622681617737, "epoch": 0.2077394194062234, "grad_norm": 0.6370652318000793, "learning_rate": 1.974433862265238e-05, "loss": 1.4028, "mean_token_accuracy": 0.653249795238177, "num_tokens": 316363618.0, "step": 1891 }, { "entropy": 1.733237236738205, "epoch": 0.20784927631759634, "grad_norm": 0.7276230454444885, "learning_rate": 1.9743959565404444e-05, "loss": 1.2583, "mean_token_accuracy": 0.6862328201532364, "num_tokens": 316514946.0, "step": 1892 }, { "entropy": 1.6722540160020192, "epoch": 0.20795913322896928, "grad_norm": 0.6568346619606018, "learning_rate": 1.974358023141413e-05, "loss": 1.3413, "mean_token_accuracy": 0.6587617894013723, "num_tokens": 316683060.0, "step": 1893 }, { "entropy": 1.7204617460568745, "epoch": 0.2080689901403422, "grad_norm": 0.7202989459037781, "learning_rate": 1.9743200620693442e-05, "loss": 1.3252, "mean_token_accuracy": 0.6609460512797037, "num_tokens": 316879147.0, "step": 1894 }, { "entropy": 1.703949401775996, "epoch": 0.20817884705171513, "grad_norm": 0.7430747747421265, "learning_rate": 1.9742820733254394e-05, "loss": 1.3856, "mean_token_accuracy": 0.6529827465613683, "num_tokens": 317073037.0, "step": 1895 }, { "entropy": 1.7440508604049683, "epoch": 0.20828870396308807, "grad_norm": 0.6155939698219299, "learning_rate": 1.9742440569109008e-05, "loss": 1.4088, "mean_token_accuracy": 0.6512663116057714, "num_tokens": 317300611.0, "step": 1896 }, { "entropy": 1.7047240138053894, "epoch": 0.20839856087446101, "grad_norm": 0.7423590421676636, "learning_rate": 1.974206012826932e-05, "loss": 1.4194, "mean_token_accuracy": 0.6572458644707998, "num_tokens": 317448579.0, "step": 1897 }, { "entropy": 1.7306395471096039, "epoch": 0.20850841778583395, "grad_norm": 0.644149661064148, "learning_rate": 1.9741679410747364e-05, "loss": 1.4372, "mean_token_accuracy": 0.6524844119946162, "num_tokens": 317628610.0, "step": 1898 }, { "entropy": 1.7001280784606934, "epoch": 0.2086182746972069, "grad_norm": 0.6855803728103638, "learning_rate": 1.9741298416555196e-05, "loss": 1.4569, "mean_token_accuracy": 0.6421359926462173, "num_tokens": 317810190.0, "step": 1899 }, { "entropy": 1.7195194760958354, "epoch": 0.20872813160857984, "grad_norm": 0.7779269218444824, "learning_rate": 1.974091714570487e-05, "loss": 1.5581, "mean_token_accuracy": 0.6334304213523865, "num_tokens": 318001402.0, "step": 1900 }, { "entropy": 1.7282605667908986, "epoch": 0.20883798851995275, "grad_norm": 0.6376639604568481, "learning_rate": 1.9740535598208458e-05, "loss": 1.3576, "mean_token_accuracy": 0.6543524712324142, "num_tokens": 318167363.0, "step": 1901 }, { "entropy": 1.7092109819253285, "epoch": 0.2089478454313257, "grad_norm": 0.6729239821434021, "learning_rate": 1.9740153774078033e-05, "loss": 1.346, "mean_token_accuracy": 0.658411035935084, "num_tokens": 318317612.0, "step": 1902 }, { "entropy": 1.7186111609141033, "epoch": 0.20905770234269863, "grad_norm": 0.8159520030021667, "learning_rate": 1.9739771673325678e-05, "loss": 1.4808, "mean_token_accuracy": 0.6539090524117152, "num_tokens": 318481892.0, "step": 1903 }, { "entropy": 1.7617724239826202, "epoch": 0.20916755925407157, "grad_norm": 0.6777756214141846, "learning_rate": 1.9739389295963486e-05, "loss": 1.5622, "mean_token_accuracy": 0.6342104425032934, "num_tokens": 318709241.0, "step": 1904 }, { "entropy": 1.719315081834793, "epoch": 0.2092774161654445, "grad_norm": 0.663506805896759, "learning_rate": 1.9739006642003566e-05, "loss": 1.3675, "mean_token_accuracy": 0.668515016635259, "num_tokens": 318948897.0, "step": 1905 }, { "entropy": 1.7113699913024902, "epoch": 0.20938727307681745, "grad_norm": 0.6316990256309509, "learning_rate": 1.973862371145802e-05, "loss": 1.4644, "mean_token_accuracy": 0.6517230321963629, "num_tokens": 319134577.0, "step": 1906 }, { "entropy": 1.7475859622160594, "epoch": 0.2094971299881904, "grad_norm": 0.7789897322654724, "learning_rate": 1.973824050433897e-05, "loss": 1.4932, "mean_token_accuracy": 0.6495751440525055, "num_tokens": 319295735.0, "step": 1907 }, { "entropy": 1.685575932264328, "epoch": 0.2096069868995633, "grad_norm": 0.5883545279502869, "learning_rate": 1.973785702065855e-05, "loss": 1.4238, "mean_token_accuracy": 0.6530423561731974, "num_tokens": 319516027.0, "step": 1908 }, { "entropy": 1.7320783734321594, "epoch": 0.20971684381093625, "grad_norm": 0.8050070405006409, "learning_rate": 1.9737473260428894e-05, "loss": 1.337, "mean_token_accuracy": 0.6576566646496455, "num_tokens": 319616804.0, "step": 1909 }, { "entropy": 1.7201977968215942, "epoch": 0.2098267007223092, "grad_norm": 0.7059934139251709, "learning_rate": 1.973708922366214e-05, "loss": 1.2972, "mean_token_accuracy": 0.6773047844568888, "num_tokens": 319738775.0, "step": 1910 }, { "entropy": 1.7452878654003143, "epoch": 0.20993655763368213, "grad_norm": 0.6112817525863647, "learning_rate": 1.973670491037045e-05, "loss": 1.4088, "mean_token_accuracy": 0.6482658833265305, "num_tokens": 319930662.0, "step": 1911 }, { "entropy": 1.710121254126231, "epoch": 0.21004641454505507, "grad_norm": 0.7919174432754517, "learning_rate": 1.973632032056599e-05, "loss": 1.4035, "mean_token_accuracy": 0.6597762157519659, "num_tokens": 320076366.0, "step": 1912 }, { "entropy": 1.7030129532019298, "epoch": 0.210156271456428, "grad_norm": 0.700587272644043, "learning_rate": 1.9735935454260925e-05, "loss": 1.3965, "mean_token_accuracy": 0.6516723334789276, "num_tokens": 320247392.0, "step": 1913 }, { "entropy": 1.7211789786815643, "epoch": 0.21026612836780095, "grad_norm": 0.7128605842590332, "learning_rate": 1.9735550311467443e-05, "loss": 1.4136, "mean_token_accuracy": 0.6487771173318228, "num_tokens": 320408383.0, "step": 1914 }, { "entropy": 1.7001396020253499, "epoch": 0.21037598527917387, "grad_norm": 0.7244299650192261, "learning_rate": 1.973516489219773e-05, "loss": 1.499, "mean_token_accuracy": 0.6356903513272604, "num_tokens": 320659297.0, "step": 1915 }, { "entropy": 1.7240975697835286, "epoch": 0.2104858421905468, "grad_norm": 0.5836021900177002, "learning_rate": 1.973477919646398e-05, "loss": 1.316, "mean_token_accuracy": 0.6605116327603658, "num_tokens": 320823387.0, "step": 1916 }, { "entropy": 1.724854737520218, "epoch": 0.21059569910191975, "grad_norm": 0.7972742319107056, "learning_rate": 1.9734393224278406e-05, "loss": 1.3694, "mean_token_accuracy": 0.6639392127593359, "num_tokens": 320951543.0, "step": 1917 }, { "entropy": 1.696036696434021, "epoch": 0.2107055560132927, "grad_norm": 0.8939576745033264, "learning_rate": 1.9734006975653224e-05, "loss": 1.2696, "mean_token_accuracy": 0.6718998452027639, "num_tokens": 321087040.0, "step": 1918 }, { "entropy": 1.6345807611942291, "epoch": 0.21081541292466563, "grad_norm": 0.5838897824287415, "learning_rate": 1.9733620450600655e-05, "loss": 1.3427, "mean_token_accuracy": 0.6704151580731074, "num_tokens": 321269169.0, "step": 1919 }, { "entropy": 1.7143397529919941, "epoch": 0.21092526983603857, "grad_norm": 0.7261598110198975, "learning_rate": 1.9733233649132938e-05, "loss": 1.4234, "mean_token_accuracy": 0.6666987985372543, "num_tokens": 321418860.0, "step": 1920 }, { "entropy": 1.7327088514963787, "epoch": 0.21103512674741148, "grad_norm": 0.7012075185775757, "learning_rate": 1.9732846571262304e-05, "loss": 1.4299, "mean_token_accuracy": 0.6525350759426752, "num_tokens": 321598118.0, "step": 1921 }, { "entropy": 1.7444495658079784, "epoch": 0.21114498365878442, "grad_norm": 0.6507946252822876, "learning_rate": 1.9732459217001017e-05, "loss": 1.4639, "mean_token_accuracy": 0.6573313424984614, "num_tokens": 321804284.0, "step": 1922 }, { "entropy": 1.724996030330658, "epoch": 0.21125484057015737, "grad_norm": 0.620772123336792, "learning_rate": 1.9732071586361334e-05, "loss": 1.5714, "mean_token_accuracy": 0.625721663236618, "num_tokens": 322021779.0, "step": 1923 }, { "entropy": 1.6913380126158397, "epoch": 0.2113646974815303, "grad_norm": 0.5675688982009888, "learning_rate": 1.973168367935551e-05, "loss": 1.4439, "mean_token_accuracy": 0.6470164060592651, "num_tokens": 322200625.0, "step": 1924 }, { "entropy": 1.647382875283559, "epoch": 0.21147455439290325, "grad_norm": 0.6393111348152161, "learning_rate": 1.9731295495995838e-05, "loss": 1.3366, "mean_token_accuracy": 0.6661768307288488, "num_tokens": 322380416.0, "step": 1925 }, { "entropy": 1.6876880129178364, "epoch": 0.2115844113042762, "grad_norm": 2.3705599308013916, "learning_rate": 1.97309070362946e-05, "loss": 1.175, "mean_token_accuracy": 0.6751070966323217, "num_tokens": 322560122.0, "step": 1926 }, { "entropy": 1.7260343929131825, "epoch": 0.21169426821564913, "grad_norm": 0.689630389213562, "learning_rate": 1.9730518300264086e-05, "loss": 1.4034, "mean_token_accuracy": 0.66618379453818, "num_tokens": 322702668.0, "step": 1927 }, { "entropy": 1.6774284541606903, "epoch": 0.21180412512702204, "grad_norm": 0.6801313757896423, "learning_rate": 1.97301292879166e-05, "loss": 1.3852, "mean_token_accuracy": 0.6520472516616186, "num_tokens": 322920370.0, "step": 1928 }, { "entropy": 1.6716736455758412, "epoch": 0.21191398203839498, "grad_norm": 0.7043926119804382, "learning_rate": 1.9729739999264458e-05, "loss": 1.4412, "mean_token_accuracy": 0.6420286248127619, "num_tokens": 323090618.0, "step": 1929 }, { "entropy": 1.7062031924724579, "epoch": 0.21202383894976792, "grad_norm": 0.6019328832626343, "learning_rate": 1.9729350434319977e-05, "loss": 1.5193, "mean_token_accuracy": 0.6340185602506002, "num_tokens": 323303523.0, "step": 1930 }, { "entropy": 1.71775417526563, "epoch": 0.21213369586114086, "grad_norm": 0.655889093875885, "learning_rate": 1.9728960593095493e-05, "loss": 1.3497, "mean_token_accuracy": 0.6655166298151016, "num_tokens": 323446877.0, "step": 1931 }, { "entropy": 1.6558316747347515, "epoch": 0.2122435527725138, "grad_norm": 0.703708291053772, "learning_rate": 1.9728570475603336e-05, "loss": 1.3943, "mean_token_accuracy": 0.6581457406282425, "num_tokens": 323604718.0, "step": 1932 }, { "entropy": 1.7211906711260478, "epoch": 0.21235340968388675, "grad_norm": 0.7779010534286499, "learning_rate": 1.9728180081855855e-05, "loss": 1.4799, "mean_token_accuracy": 0.6504116902748743, "num_tokens": 323744606.0, "step": 1933 }, { "entropy": 1.636175235112508, "epoch": 0.2124632665952597, "grad_norm": 0.6399657726287842, "learning_rate": 1.972778941186541e-05, "loss": 1.2702, "mean_token_accuracy": 0.6657491226991018, "num_tokens": 323939422.0, "step": 1934 }, { "entropy": 1.7119918167591095, "epoch": 0.2125731235066326, "grad_norm": 0.6368902921676636, "learning_rate": 1.9727398465644363e-05, "loss": 1.3036, "mean_token_accuracy": 0.668227881193161, "num_tokens": 324097047.0, "step": 1935 }, { "entropy": 1.6312094032764435, "epoch": 0.21268298041800554, "grad_norm": 0.6294612884521484, "learning_rate": 1.972700724320509e-05, "loss": 1.2651, "mean_token_accuracy": 0.6779163281122843, "num_tokens": 324248984.0, "step": 1936 }, { "entropy": 1.7897210717201233, "epoch": 0.21279283732937848, "grad_norm": 0.7651355266571045, "learning_rate": 1.9726615744559965e-05, "loss": 1.4585, "mean_token_accuracy": 0.6460629999637604, "num_tokens": 324427567.0, "step": 1937 }, { "entropy": 1.760125567515691, "epoch": 0.21290269424075142, "grad_norm": 0.9342473745346069, "learning_rate": 1.9726223969721384e-05, "loss": 1.3453, "mean_token_accuracy": 0.6497061004241308, "num_tokens": 324608248.0, "step": 1938 }, { "entropy": 1.7319878935813904, "epoch": 0.21301255115212436, "grad_norm": 0.6311997771263123, "learning_rate": 1.972583191870175e-05, "loss": 1.3187, "mean_token_accuracy": 0.6541954825321833, "num_tokens": 324767775.0, "step": 1939 }, { "entropy": 1.681753158569336, "epoch": 0.2131224080634973, "grad_norm": 0.7324146032333374, "learning_rate": 1.9725439591513467e-05, "loss": 1.3592, "mean_token_accuracy": 0.6560372710227966, "num_tokens": 324951412.0, "step": 1940 }, { "entropy": 1.7503413657347362, "epoch": 0.21323226497487024, "grad_norm": 0.7484403252601624, "learning_rate": 1.972504698816895e-05, "loss": 1.3736, "mean_token_accuracy": 0.662136490146319, "num_tokens": 325081774.0, "step": 1941 }, { "entropy": 1.6506099005540211, "epoch": 0.21334212188624316, "grad_norm": 0.6231799125671387, "learning_rate": 1.972465410868063e-05, "loss": 1.2779, "mean_token_accuracy": 0.6839319914579391, "num_tokens": 325248232.0, "step": 1942 }, { "entropy": 1.7160189251104991, "epoch": 0.2134519787976161, "grad_norm": 0.7348440289497375, "learning_rate": 1.972426095306094e-05, "loss": 1.488, "mean_token_accuracy": 0.6483329683542252, "num_tokens": 325456430.0, "step": 1943 }, { "entropy": 1.6814146141211193, "epoch": 0.21356183570898904, "grad_norm": 0.6065512299537659, "learning_rate": 1.972386752132232e-05, "loss": 1.3807, "mean_token_accuracy": 0.6708929588397344, "num_tokens": 325625943.0, "step": 1944 }, { "entropy": 1.7090165813763936, "epoch": 0.21367169262036198, "grad_norm": 0.6108605861663818, "learning_rate": 1.9723473813477223e-05, "loss": 1.363, "mean_token_accuracy": 0.6590655495723089, "num_tokens": 325806626.0, "step": 1945 }, { "entropy": 1.7288100719451904, "epoch": 0.21378154953173492, "grad_norm": 0.814892590045929, "learning_rate": 1.9723079829538115e-05, "loss": 1.4412, "mean_token_accuracy": 0.6585568189620972, "num_tokens": 325963539.0, "step": 1946 }, { "entropy": 1.7768865327040355, "epoch": 0.21389140644310786, "grad_norm": 0.8345417976379395, "learning_rate": 1.9722685569517455e-05, "loss": 1.4554, "mean_token_accuracy": 0.6512434879938761, "num_tokens": 326093531.0, "step": 1947 }, { "entropy": 1.6884494721889496, "epoch": 0.21400126335448078, "grad_norm": 0.6763792634010315, "learning_rate": 1.9722291033427733e-05, "loss": 1.3025, "mean_token_accuracy": 0.6711457918087641, "num_tokens": 326244680.0, "step": 1948 }, { "entropy": 1.6721834540367126, "epoch": 0.21411112026585372, "grad_norm": 0.7668681144714355, "learning_rate": 1.9721896221281426e-05, "loss": 1.3331, "mean_token_accuracy": 0.6610443890094757, "num_tokens": 326351781.0, "step": 1949 }, { "entropy": 1.6889389057954152, "epoch": 0.21422097717722666, "grad_norm": 0.6436994075775146, "learning_rate": 1.9721501133091035e-05, "loss": 1.3498, "mean_token_accuracy": 0.6683029731114706, "num_tokens": 326496856.0, "step": 1950 }, { "entropy": 1.726958990097046, "epoch": 0.2143308340885996, "grad_norm": 0.7337366342544556, "learning_rate": 1.9721105768869066e-05, "loss": 1.5077, "mean_token_accuracy": 0.6476754397153854, "num_tokens": 326642845.0, "step": 1951 }, { "entropy": 1.750323196252187, "epoch": 0.21444069099997254, "grad_norm": 0.6473729610443115, "learning_rate": 1.972071012862802e-05, "loss": 1.5035, "mean_token_accuracy": 0.6414787570635477, "num_tokens": 326801852.0, "step": 1952 }, { "entropy": 1.6994662880897522, "epoch": 0.21455054791134548, "grad_norm": 0.6995154023170471, "learning_rate": 1.9720314212380437e-05, "loss": 1.3645, "mean_token_accuracy": 0.6681473056475321, "num_tokens": 327002498.0, "step": 1953 }, { "entropy": 1.7458200256029766, "epoch": 0.21466040482271842, "grad_norm": 0.7394571900367737, "learning_rate": 1.971991802013884e-05, "loss": 1.6005, "mean_token_accuracy": 0.6400333990653356, "num_tokens": 327189288.0, "step": 1954 }, { "entropy": 1.7399512827396393, "epoch": 0.21477026173409133, "grad_norm": 1.1217888593673706, "learning_rate": 1.9719521551915763e-05, "loss": 1.3252, "mean_token_accuracy": 0.6774489680926005, "num_tokens": 327317080.0, "step": 1955 }, { "entropy": 1.7320358057816823, "epoch": 0.21488011864546427, "grad_norm": 0.6957964897155762, "learning_rate": 1.971912480772376e-05, "loss": 1.4162, "mean_token_accuracy": 0.6480669528245926, "num_tokens": 327514283.0, "step": 1956 }, { "entropy": 1.6627892553806305, "epoch": 0.21498997555683722, "grad_norm": 0.7532937526702881, "learning_rate": 1.9718727787575383e-05, "loss": 1.3621, "mean_token_accuracy": 0.6672651420036951, "num_tokens": 327668545.0, "step": 1957 }, { "entropy": 1.6754935681819916, "epoch": 0.21509983246821016, "grad_norm": 0.6984850764274597, "learning_rate": 1.97183304914832e-05, "loss": 1.3214, "mean_token_accuracy": 0.6585142463445663, "num_tokens": 327835589.0, "step": 1958 }, { "entropy": 1.7026499410470326, "epoch": 0.2152096893795831, "grad_norm": 0.558738112449646, "learning_rate": 1.9717932919459784e-05, "loss": 1.4541, "mean_token_accuracy": 0.6451480984687805, "num_tokens": 328089156.0, "step": 1959 }, { "entropy": 1.6809870799382527, "epoch": 0.21531954629095604, "grad_norm": 0.6252606511116028, "learning_rate": 1.9717535071517724e-05, "loss": 1.4261, "mean_token_accuracy": 0.6565803388754526, "num_tokens": 328295200.0, "step": 1960 }, { "entropy": 1.736928681532542, "epoch": 0.21542940320232898, "grad_norm": 0.7609971165657043, "learning_rate": 1.9717136947669606e-05, "loss": 1.3809, "mean_token_accuracy": 0.6587243974208832, "num_tokens": 328509731.0, "step": 1961 }, { "entropy": 1.6860435704390209, "epoch": 0.2155392601137019, "grad_norm": 0.751956045627594, "learning_rate": 1.971673854792803e-05, "loss": 1.2261, "mean_token_accuracy": 0.6756429572900137, "num_tokens": 328612622.0, "step": 1962 }, { "entropy": 1.7120301028092701, "epoch": 0.21564911702507483, "grad_norm": 0.700073778629303, "learning_rate": 1.971633987230561e-05, "loss": 1.4035, "mean_token_accuracy": 0.6547966102759043, "num_tokens": 328845263.0, "step": 1963 }, { "entropy": 1.7697708209355671, "epoch": 0.21575897393644777, "grad_norm": 0.7105919718742371, "learning_rate": 1.971594092081496e-05, "loss": 1.312, "mean_token_accuracy": 0.6595413237810135, "num_tokens": 328975866.0, "step": 1964 }, { "entropy": 1.7790792485078175, "epoch": 0.21586883084782071, "grad_norm": 0.8269145488739014, "learning_rate": 1.9715541693468703e-05, "loss": 1.3614, "mean_token_accuracy": 0.6634324043989182, "num_tokens": 329095429.0, "step": 1965 }, { "entropy": 1.6846925516923268, "epoch": 0.21597868775919365, "grad_norm": 0.5915414094924927, "learning_rate": 1.9715142190279482e-05, "loss": 1.3213, "mean_token_accuracy": 0.6567167490720749, "num_tokens": 329249605.0, "step": 1966 }, { "entropy": 1.744320313135783, "epoch": 0.2160885446705666, "grad_norm": 0.6608003973960876, "learning_rate": 1.971474241125994e-05, "loss": 1.4533, "mean_token_accuracy": 0.6462043275435766, "num_tokens": 329411071.0, "step": 1967 }, { "entropy": 1.6898845732212067, "epoch": 0.21619840158193954, "grad_norm": 0.6290837526321411, "learning_rate": 1.9714342356422723e-05, "loss": 1.4013, "mean_token_accuracy": 0.6717150410016378, "num_tokens": 329570535.0, "step": 1968 }, { "entropy": 1.7154962023099263, "epoch": 0.21630825849331245, "grad_norm": 0.7625136971473694, "learning_rate": 1.97139420257805e-05, "loss": 1.3062, "mean_token_accuracy": 0.6675903101762136, "num_tokens": 329703050.0, "step": 1969 }, { "entropy": 1.7551279962062836, "epoch": 0.2164181154046854, "grad_norm": 0.736630380153656, "learning_rate": 1.971354141934594e-05, "loss": 1.4062, "mean_token_accuracy": 0.6460110992193222, "num_tokens": 329872834.0, "step": 1970 }, { "entropy": 1.7381382485230763, "epoch": 0.21652797231605833, "grad_norm": 0.6895781755447388, "learning_rate": 1.9713140537131715e-05, "loss": 1.4274, "mean_token_accuracy": 0.6558477779229482, "num_tokens": 330013651.0, "step": 1971 }, { "entropy": 1.7030009031295776, "epoch": 0.21663782922743127, "grad_norm": 0.7224587798118591, "learning_rate": 1.9712739379150523e-05, "loss": 1.4991, "mean_token_accuracy": 0.6588802685340246, "num_tokens": 330178313.0, "step": 1972 }, { "entropy": 1.681588480869929, "epoch": 0.2167476861388042, "grad_norm": 0.5660827159881592, "learning_rate": 1.9712337945415054e-05, "loss": 1.4877, "mean_token_accuracy": 0.6458199421564738, "num_tokens": 330385757.0, "step": 1973 }, { "entropy": 1.6638726989428203, "epoch": 0.21685754305017715, "grad_norm": 0.623779296875, "learning_rate": 1.9711936235938014e-05, "loss": 1.3621, "mean_token_accuracy": 0.659187431136767, "num_tokens": 330589361.0, "step": 1974 }, { "entropy": 1.691465973854065, "epoch": 0.2169673999615501, "grad_norm": 0.6252658367156982, "learning_rate": 1.971153425073212e-05, "loss": 1.3112, "mean_token_accuracy": 0.6599445144335429, "num_tokens": 330753043.0, "step": 1975 }, { "entropy": 1.7143527666727703, "epoch": 0.217077256872923, "grad_norm": 0.7959213256835938, "learning_rate": 1.971113198981009e-05, "loss": 1.4267, "mean_token_accuracy": 0.6643683314323425, "num_tokens": 330914792.0, "step": 1976 }, { "entropy": 1.7403099636236827, "epoch": 0.21718711378429595, "grad_norm": 0.7242742776870728, "learning_rate": 1.9710729453184663e-05, "loss": 1.4078, "mean_token_accuracy": 0.6562838604052862, "num_tokens": 331075502.0, "step": 1977 }, { "entropy": 1.721234291791916, "epoch": 0.2172969706956689, "grad_norm": 0.6983941197395325, "learning_rate": 1.9710326640868568e-05, "loss": 1.429, "mean_token_accuracy": 0.6565836171309153, "num_tokens": 331268768.0, "step": 1978 }, { "entropy": 1.728889485200246, "epoch": 0.21740682760704183, "grad_norm": 0.7118070125579834, "learning_rate": 1.9709923552874565e-05, "loss": 1.3784, "mean_token_accuracy": 0.6568760176499685, "num_tokens": 331436142.0, "step": 1979 }, { "entropy": 1.7155893842379253, "epoch": 0.21751668451841477, "grad_norm": 0.738287091255188, "learning_rate": 1.9709520189215403e-05, "loss": 1.4332, "mean_token_accuracy": 0.6453232914209366, "num_tokens": 331580092.0, "step": 1980 }, { "entropy": 1.7075227002302806, "epoch": 0.2176265414297877, "grad_norm": 0.6078463792800903, "learning_rate": 1.970911654990385e-05, "loss": 1.501, "mean_token_accuracy": 0.6402676453193029, "num_tokens": 331803746.0, "step": 1981 }, { "entropy": 1.6620681583881378, "epoch": 0.21773639834116063, "grad_norm": 0.6076948046684265, "learning_rate": 1.9708712634952688e-05, "loss": 1.4018, "mean_token_accuracy": 0.6624077359835306, "num_tokens": 331958945.0, "step": 1982 }, { "entropy": 1.6899429162343342, "epoch": 0.21784625525253357, "grad_norm": 0.6573540568351746, "learning_rate": 1.970830844437469e-05, "loss": 1.4148, "mean_token_accuracy": 0.6478807330131531, "num_tokens": 332191045.0, "step": 1983 }, { "entropy": 1.705435295899709, "epoch": 0.2179561121639065, "grad_norm": 0.6898738145828247, "learning_rate": 1.970790397818266e-05, "loss": 1.4029, "mean_token_accuracy": 0.6507929762204488, "num_tokens": 332347935.0, "step": 1984 }, { "entropy": 1.7328907350699108, "epoch": 0.21806596907527945, "grad_norm": 0.6054257750511169, "learning_rate": 1.9707499236389384e-05, "loss": 1.4292, "mean_token_accuracy": 0.6487182925144831, "num_tokens": 332523846.0, "step": 1985 }, { "entropy": 1.6892162561416626, "epoch": 0.2181758259866524, "grad_norm": 0.7589190006256104, "learning_rate": 1.9707094219007687e-05, "loss": 1.2616, "mean_token_accuracy": 0.6733472148577372, "num_tokens": 332684862.0, "step": 1986 }, { "entropy": 1.6447477738062541, "epoch": 0.21828568289802533, "grad_norm": 0.7766509056091309, "learning_rate": 1.970668892605038e-05, "loss": 1.3476, "mean_token_accuracy": 0.6714780976374944, "num_tokens": 332864922.0, "step": 1987 }, { "entropy": 1.7524027526378632, "epoch": 0.21839553980939827, "grad_norm": 0.6996143460273743, "learning_rate": 1.9706283357530294e-05, "loss": 1.3252, "mean_token_accuracy": 0.66605643928051, "num_tokens": 333030584.0, "step": 1988 }, { "entropy": 1.7636100550492604, "epoch": 0.21850539672077118, "grad_norm": 0.8666709661483765, "learning_rate": 1.9705877513460257e-05, "loss": 1.4356, "mean_token_accuracy": 0.64682570596536, "num_tokens": 333186942.0, "step": 1989 }, { "entropy": 1.7297316590944927, "epoch": 0.21861525363214412, "grad_norm": 0.8126122951507568, "learning_rate": 1.9705471393853126e-05, "loss": 1.3266, "mean_token_accuracy": 0.6661059657732645, "num_tokens": 333316991.0, "step": 1990 }, { "entropy": 1.7401694059371948, "epoch": 0.21872511054351707, "grad_norm": 0.7024965286254883, "learning_rate": 1.9705064998721742e-05, "loss": 1.2493, "mean_token_accuracy": 0.6787941058476766, "num_tokens": 333439176.0, "step": 1991 }, { "entropy": 1.708668867746989, "epoch": 0.21883496745489, "grad_norm": 0.6795996427536011, "learning_rate": 1.970465832807898e-05, "loss": 1.4659, "mean_token_accuracy": 0.660365030169487, "num_tokens": 333613726.0, "step": 1992 }, { "entropy": 1.696395069360733, "epoch": 0.21894482436626295, "grad_norm": 0.7904760837554932, "learning_rate": 1.9704251381937703e-05, "loss": 1.2613, "mean_token_accuracy": 0.6752869784832001, "num_tokens": 333778956.0, "step": 1993 }, { "entropy": 1.698264519373576, "epoch": 0.2190546812776359, "grad_norm": 0.5874459147453308, "learning_rate": 1.970384416031079e-05, "loss": 1.4253, "mean_token_accuracy": 0.6463806182146072, "num_tokens": 333982509.0, "step": 1994 }, { "entropy": 1.7498473624388378, "epoch": 0.21916453818900883, "grad_norm": 0.7056718468666077, "learning_rate": 1.970343666321113e-05, "loss": 1.3865, "mean_token_accuracy": 0.6528016924858093, "num_tokens": 334137289.0, "step": 1995 }, { "entropy": 1.7133673230806987, "epoch": 0.21927439510038174, "grad_norm": 0.7165104150772095, "learning_rate": 1.9703028890651625e-05, "loss": 1.3948, "mean_token_accuracy": 0.6589706838130951, "num_tokens": 334332222.0, "step": 1996 }, { "entropy": 1.704959104458491, "epoch": 0.21938425201175468, "grad_norm": 0.6553063988685608, "learning_rate": 1.9702620842645176e-05, "loss": 1.5619, "mean_token_accuracy": 0.6511781016985575, "num_tokens": 334525106.0, "step": 1997 }, { "entropy": 1.7078428268432617, "epoch": 0.21949410892312762, "grad_norm": 0.7418580651283264, "learning_rate": 1.9702212519204697e-05, "loss": 1.3736, "mean_token_accuracy": 0.669340506196022, "num_tokens": 334729517.0, "step": 1998 }, { "entropy": 1.71206929286321, "epoch": 0.21960396583450056, "grad_norm": 2.2254767417907715, "learning_rate": 1.9701803920343117e-05, "loss": 1.1656, "mean_token_accuracy": 0.6783639788627625, "num_tokens": 334926935.0, "step": 1999 }, { "entropy": 1.6726875305175781, "epoch": 0.2197138227458735, "grad_norm": 0.6199320554733276, "learning_rate": 1.9701395046073358e-05, "loss": 1.4867, "mean_token_accuracy": 0.6402423232793808, "num_tokens": 335119572.0, "step": 2000 }, { "entropy": 1.6810939411322277, "epoch": 0.21982367965724645, "grad_norm": 0.6892693638801575, "learning_rate": 1.970098589640837e-05, "loss": 1.4422, "mean_token_accuracy": 0.6712821374336878, "num_tokens": 335300516.0, "step": 2001 }, { "entropy": 1.7373451888561249, "epoch": 0.2199335365686194, "grad_norm": 0.652580201625824, "learning_rate": 1.9700576471361103e-05, "loss": 1.383, "mean_token_accuracy": 0.6453498254219691, "num_tokens": 335441164.0, "step": 2002 }, { "entropy": 1.7873657743136089, "epoch": 0.2200433934799923, "grad_norm": 0.6044803261756897, "learning_rate": 1.9700166770944505e-05, "loss": 1.4454, "mean_token_accuracy": 0.6438331256310145, "num_tokens": 335614767.0, "step": 2003 }, { "entropy": 1.7439679205417633, "epoch": 0.22015325039136524, "grad_norm": 0.718855619430542, "learning_rate": 1.9699756795171553e-05, "loss": 1.5, "mean_token_accuracy": 0.6593141506115595, "num_tokens": 335782527.0, "step": 2004 }, { "entropy": 1.680442641178767, "epoch": 0.22026310730273818, "grad_norm": 6.6189284324646, "learning_rate": 1.9699346544055217e-05, "loss": 1.3119, "mean_token_accuracy": 0.6821538011233012, "num_tokens": 335921470.0, "step": 2005 }, { "entropy": 1.7134496867656708, "epoch": 0.22037296421411112, "grad_norm": 0.749874472618103, "learning_rate": 1.9698936017608484e-05, "loss": 1.4309, "mean_token_accuracy": 0.6579453547795614, "num_tokens": 336076125.0, "step": 2006 }, { "entropy": 1.6650103032588959, "epoch": 0.22048282112548406, "grad_norm": 0.6054286956787109, "learning_rate": 1.9698525215844347e-05, "loss": 1.3048, "mean_token_accuracy": 0.6650077700614929, "num_tokens": 336246010.0, "step": 2007 }, { "entropy": 1.7242847979068756, "epoch": 0.220592678036857, "grad_norm": 0.8481932878494263, "learning_rate": 1.96981141387758e-05, "loss": 1.1978, "mean_token_accuracy": 0.6881664743026098, "num_tokens": 336384043.0, "step": 2008 }, { "entropy": 1.6481478810310364, "epoch": 0.22070253494822992, "grad_norm": 0.8285883665084839, "learning_rate": 1.9697702786415866e-05, "loss": 1.4015, "mean_token_accuracy": 0.6562249759833018, "num_tokens": 336584871.0, "step": 2009 }, { "entropy": 1.7549095054467518, "epoch": 0.22081239185960286, "grad_norm": 0.6210580468177795, "learning_rate": 1.969729115877756e-05, "loss": 1.4517, "mean_token_accuracy": 0.6447333445151647, "num_tokens": 336785253.0, "step": 2010 }, { "entropy": 1.7175431450208027, "epoch": 0.2209222487709758, "grad_norm": 0.8552317023277283, "learning_rate": 1.9696879255873902e-05, "loss": 1.5219, "mean_token_accuracy": 0.6434709678093592, "num_tokens": 336988668.0, "step": 2011 }, { "entropy": 1.7095571756362915, "epoch": 0.22103210568234874, "grad_norm": 0.6405702233314514, "learning_rate": 1.969646707771794e-05, "loss": 1.3564, "mean_token_accuracy": 0.6549317836761475, "num_tokens": 337175572.0, "step": 2012 }, { "entropy": 1.7163640260696411, "epoch": 0.22114196259372168, "grad_norm": 0.8766898512840271, "learning_rate": 1.969605462432271e-05, "loss": 1.3813, "mean_token_accuracy": 0.6668533583482107, "num_tokens": 337325813.0, "step": 2013 }, { "entropy": 1.6816040774186451, "epoch": 0.22125181950509462, "grad_norm": 0.7182164192199707, "learning_rate": 1.969564189570127e-05, "loss": 1.4426, "mean_token_accuracy": 0.6525221814711889, "num_tokens": 337460554.0, "step": 2014 }, { "entropy": 1.768685221672058, "epoch": 0.22136167641646756, "grad_norm": 0.7436334490776062, "learning_rate": 1.9695228891866683e-05, "loss": 1.611, "mean_token_accuracy": 0.6214992552995682, "num_tokens": 337641696.0, "step": 2015 }, { "entropy": 1.656598299741745, "epoch": 0.22147153332784048, "grad_norm": 0.7697328925132751, "learning_rate": 1.9694815612832018e-05, "loss": 1.3634, "mean_token_accuracy": 0.6659995466470718, "num_tokens": 337771813.0, "step": 2016 }, { "entropy": 1.7032875816027324, "epoch": 0.22158139023921342, "grad_norm": 0.6816899180412292, "learning_rate": 1.969440205861036e-05, "loss": 1.483, "mean_token_accuracy": 0.6475236068169276, "num_tokens": 337973121.0, "step": 2017 }, { "entropy": 1.7433029512564342, "epoch": 0.22169124715058636, "grad_norm": 0.6455709934234619, "learning_rate": 1.969398822921479e-05, "loss": 1.4009, "mean_token_accuracy": 0.6455973982810974, "num_tokens": 338137737.0, "step": 2018 }, { "entropy": 1.7247349818547566, "epoch": 0.2218011040619593, "grad_norm": 0.6982224583625793, "learning_rate": 1.9693574124658414e-05, "loss": 1.4222, "mean_token_accuracy": 0.6396381010611852, "num_tokens": 338369135.0, "step": 2019 }, { "entropy": 1.67547611395518, "epoch": 0.22191096097333224, "grad_norm": 0.6213436722755432, "learning_rate": 1.9693159744954335e-05, "loss": 1.4448, "mean_token_accuracy": 0.6563169062137604, "num_tokens": 338550921.0, "step": 2020 }, { "entropy": 1.7012183169523876, "epoch": 0.22202081788470518, "grad_norm": 0.6709868311882019, "learning_rate": 1.9692745090115664e-05, "loss": 1.3577, "mean_token_accuracy": 0.6605485628048579, "num_tokens": 338739755.0, "step": 2021 }, { "entropy": 1.7212641338507335, "epoch": 0.22213067479607812, "grad_norm": 0.6741979122161865, "learning_rate": 1.969233016015553e-05, "loss": 1.4985, "mean_token_accuracy": 0.6394399156173071, "num_tokens": 338942812.0, "step": 2022 }, { "entropy": 1.7291043400764465, "epoch": 0.22224053170745103, "grad_norm": 0.7105062007904053, "learning_rate": 1.9691914955087065e-05, "loss": 1.4693, "mean_token_accuracy": 0.6584506978591284, "num_tokens": 339081060.0, "step": 2023 }, { "entropy": 1.7811701993147533, "epoch": 0.22235038861882397, "grad_norm": 0.7212976217269897, "learning_rate": 1.9691499474923405e-05, "loss": 1.4486, "mean_token_accuracy": 0.644217719634374, "num_tokens": 339226880.0, "step": 2024 }, { "entropy": 1.6993672450383503, "epoch": 0.22246024553019692, "grad_norm": 0.7805929780006409, "learning_rate": 1.9691083719677707e-05, "loss": 1.381, "mean_token_accuracy": 0.672341451048851, "num_tokens": 339366403.0, "step": 2025 }, { "entropy": 1.6640028357505798, "epoch": 0.22257010244156986, "grad_norm": 0.643774151802063, "learning_rate": 1.969066768936312e-05, "loss": 1.3309, "mean_token_accuracy": 0.6650595118602117, "num_tokens": 339535229.0, "step": 2026 }, { "entropy": 1.7151845892270405, "epoch": 0.2226799593529428, "grad_norm": 0.7019052505493164, "learning_rate": 1.969025138399282e-05, "loss": 1.3497, "mean_token_accuracy": 0.6575596183538437, "num_tokens": 339704603.0, "step": 2027 }, { "entropy": 1.6844234863917034, "epoch": 0.22278981626431574, "grad_norm": 0.7261092066764832, "learning_rate": 1.9689834803579983e-05, "loss": 1.4324, "mean_token_accuracy": 0.652221143245697, "num_tokens": 339868917.0, "step": 2028 }, { "entropy": 1.7535183529059093, "epoch": 0.22289967317568868, "grad_norm": 0.8210894465446472, "learning_rate": 1.9689417948137786e-05, "loss": 1.5589, "mean_token_accuracy": 0.6526251584291458, "num_tokens": 340013342.0, "step": 2029 }, { "entropy": 1.8341341416041057, "epoch": 0.2230095300870616, "grad_norm": 0.8437470197677612, "learning_rate": 1.9689000817679428e-05, "loss": 1.496, "mean_token_accuracy": 0.6356715758641561, "num_tokens": 340169704.0, "step": 2030 }, { "entropy": 1.6810695727666218, "epoch": 0.22311938699843453, "grad_norm": 0.6405393481254578, "learning_rate": 1.9688583412218108e-05, "loss": 1.36, "mean_token_accuracy": 0.6580146849155426, "num_tokens": 340359004.0, "step": 2031 }, { "entropy": 1.7621925572554271, "epoch": 0.22322924390980747, "grad_norm": 0.7428179979324341, "learning_rate": 1.9688165731767037e-05, "loss": 1.5521, "mean_token_accuracy": 0.6292354067166647, "num_tokens": 340577217.0, "step": 2032 }, { "entropy": 1.6683639585971832, "epoch": 0.22333910082118041, "grad_norm": 0.6185237765312195, "learning_rate": 1.968774777633944e-05, "loss": 1.3931, "mean_token_accuracy": 0.6481966078281403, "num_tokens": 340762287.0, "step": 2033 }, { "entropy": 1.6244226296742756, "epoch": 0.22344895773255335, "grad_norm": 0.6757908463478088, "learning_rate": 1.9687329545948533e-05, "loss": 1.3674, "mean_token_accuracy": 0.6705109626054764, "num_tokens": 340905555.0, "step": 2034 }, { "entropy": 1.6744134823481243, "epoch": 0.2235588146439263, "grad_norm": 0.6213631629943848, "learning_rate": 1.968691104060757e-05, "loss": 1.4006, "mean_token_accuracy": 0.6517485429843267, "num_tokens": 341092632.0, "step": 2035 }, { "entropy": 1.6906062265237172, "epoch": 0.22366867155529924, "grad_norm": 0.6319667100906372, "learning_rate": 1.9686492260329783e-05, "loss": 1.3007, "mean_token_accuracy": 0.6612733155488968, "num_tokens": 341223672.0, "step": 2036 }, { "entropy": 1.7172020475069683, "epoch": 0.22377852846667215, "grad_norm": 0.6903420090675354, "learning_rate": 1.968607320512843e-05, "loss": 1.3112, "mean_token_accuracy": 0.6633612463871638, "num_tokens": 341388402.0, "step": 2037 }, { "entropy": 1.7287559310595195, "epoch": 0.2238883853780451, "grad_norm": 0.7884252071380615, "learning_rate": 1.9685653875016773e-05, "loss": 1.252, "mean_token_accuracy": 0.6740682969490687, "num_tokens": 341510142.0, "step": 2038 }, { "entropy": 1.741033395131429, "epoch": 0.22399824228941803, "grad_norm": 0.6842942237854004, "learning_rate": 1.9685234270008085e-05, "loss": 1.378, "mean_token_accuracy": 0.6639875521262487, "num_tokens": 341675965.0, "step": 2039 }, { "entropy": 1.7549268503983815, "epoch": 0.22410809920079097, "grad_norm": 0.7328823208808899, "learning_rate": 1.9684814390115644e-05, "loss": 1.3624, "mean_token_accuracy": 0.6532031744718552, "num_tokens": 341811346.0, "step": 2040 }, { "entropy": 1.7577114800612132, "epoch": 0.2242179561121639, "grad_norm": 0.6929943561553955, "learning_rate": 1.9684394235352744e-05, "loss": 1.5978, "mean_token_accuracy": 0.6406635567545891, "num_tokens": 341991608.0, "step": 2041 }, { "entropy": 1.7216146389643352, "epoch": 0.22432781302353685, "grad_norm": 0.7165713310241699, "learning_rate": 1.9683973805732684e-05, "loss": 1.4438, "mean_token_accuracy": 0.6414127250512441, "num_tokens": 342162750.0, "step": 2042 }, { "entropy": 1.6384899119536083, "epoch": 0.22443766993490977, "grad_norm": 0.694940984249115, "learning_rate": 1.9683553101268756e-05, "loss": 1.3885, "mean_token_accuracy": 0.6634632100661596, "num_tokens": 342342619.0, "step": 2043 }, { "entropy": 1.721029241879781, "epoch": 0.2245475268462827, "grad_norm": 2.877837657928467, "learning_rate": 1.968313212197429e-05, "loss": 1.3462, "mean_token_accuracy": 0.6564084043105444, "num_tokens": 342549662.0, "step": 2044 }, { "entropy": 1.7353296478589375, "epoch": 0.22465738375765565, "grad_norm": 0.7393618226051331, "learning_rate": 1.968271086786261e-05, "loss": 1.3049, "mean_token_accuracy": 0.6696875343720118, "num_tokens": 342671498.0, "step": 2045 }, { "entropy": 1.6773792306582134, "epoch": 0.2247672406690286, "grad_norm": 0.6500130295753479, "learning_rate": 1.9682289338947037e-05, "loss": 1.3325, "mean_token_accuracy": 0.6623169581095377, "num_tokens": 342824666.0, "step": 2046 }, { "entropy": 1.647678832213084, "epoch": 0.22487709758040153, "grad_norm": 0.6050171256065369, "learning_rate": 1.9681867535240924e-05, "loss": 1.35, "mean_token_accuracy": 0.665855829914411, "num_tokens": 343013627.0, "step": 2047 }, { "entropy": 1.703715850909551, "epoch": 0.22498695449177447, "grad_norm": 0.7125741839408875, "learning_rate": 1.968144545675761e-05, "loss": 1.4769, "mean_token_accuracy": 0.650515486796697, "num_tokens": 343157329.0, "step": 2048 }, { "entropy": 1.717505931854248, "epoch": 0.2250968114031474, "grad_norm": 0.7336664795875549, "learning_rate": 1.9681023103510465e-05, "loss": 1.3677, "mean_token_accuracy": 0.6719395915667216, "num_tokens": 343305654.0, "step": 2049 }, { "entropy": 1.7339052259922028, "epoch": 0.22520666831452033, "grad_norm": 0.7247207164764404, "learning_rate": 1.9680600475512844e-05, "loss": 1.3452, "mean_token_accuracy": 0.6586270729700724, "num_tokens": 343467941.0, "step": 2050 }, { "entropy": 1.6390206515789032, "epoch": 0.22531652522589327, "grad_norm": 0.7296788096427917, "learning_rate": 1.9680177572778135e-05, "loss": 1.2363, "mean_token_accuracy": 0.6724939694007238, "num_tokens": 343596667.0, "step": 2051 }, { "entropy": 1.7341953416665394, "epoch": 0.2254263821372662, "grad_norm": 0.6788911819458008, "learning_rate": 1.9679754395319714e-05, "loss": 1.3616, "mean_token_accuracy": 0.6549450208743414, "num_tokens": 343744356.0, "step": 2052 }, { "entropy": 1.7105887234210968, "epoch": 0.22553623904863915, "grad_norm": 0.6413341760635376, "learning_rate": 1.9679330943150982e-05, "loss": 1.3892, "mean_token_accuracy": 0.6598889281352361, "num_tokens": 343920401.0, "step": 2053 }, { "entropy": 1.7920573552449544, "epoch": 0.2256460959600121, "grad_norm": 0.7013512849807739, "learning_rate": 1.967890721628533e-05, "loss": 1.4534, "mean_token_accuracy": 0.6496995538473129, "num_tokens": 344058015.0, "step": 2054 }, { "entropy": 1.6886359850565593, "epoch": 0.22575595287138503, "grad_norm": 0.6699264049530029, "learning_rate": 1.967848321473618e-05, "loss": 1.4349, "mean_token_accuracy": 0.6515084008375803, "num_tokens": 344246715.0, "step": 2055 }, { "entropy": 1.7019972801208496, "epoch": 0.22586580978275797, "grad_norm": 0.6247515678405762, "learning_rate": 1.9678058938516946e-05, "loss": 1.3938, "mean_token_accuracy": 0.6596282968918482, "num_tokens": 344416664.0, "step": 2056 }, { "entropy": 1.6917652984460194, "epoch": 0.22597566669413088, "grad_norm": 0.7000340223312378, "learning_rate": 1.9677634387641056e-05, "loss": 1.4938, "mean_token_accuracy": 0.6429780870676041, "num_tokens": 344609791.0, "step": 2057 }, { "entropy": 1.729089339574178, "epoch": 0.22608552360550382, "grad_norm": 0.7346123456954956, "learning_rate": 1.967720956212195e-05, "loss": 1.4291, "mean_token_accuracy": 0.6589990357557932, "num_tokens": 344777080.0, "step": 2058 }, { "entropy": 1.7153427302837372, "epoch": 0.22619538051687677, "grad_norm": 0.6257114410400391, "learning_rate": 1.9676784461973068e-05, "loss": 1.4968, "mean_token_accuracy": 0.6334412743647894, "num_tokens": 344995489.0, "step": 2059 }, { "entropy": 1.766806423664093, "epoch": 0.2263052374282497, "grad_norm": 0.7080755233764648, "learning_rate": 1.967635908720787e-05, "loss": 1.3861, "mean_token_accuracy": 0.6605810771385828, "num_tokens": 345155660.0, "step": 2060 }, { "entropy": 1.659007489681244, "epoch": 0.22641509433962265, "grad_norm": 0.728387713432312, "learning_rate": 1.9675933437839817e-05, "loss": 1.3944, "mean_token_accuracy": 0.6693829894065857, "num_tokens": 345282543.0, "step": 2061 }, { "entropy": 1.757192333539327, "epoch": 0.2265249512509956, "grad_norm": 0.6841723322868347, "learning_rate": 1.967550751388238e-05, "loss": 1.4359, "mean_token_accuracy": 0.645816907286644, "num_tokens": 345459681.0, "step": 2062 }, { "entropy": 1.7275842030843098, "epoch": 0.22663480816236853, "grad_norm": 0.7722262740135193, "learning_rate": 1.9675081315349037e-05, "loss": 1.3776, "mean_token_accuracy": 0.6634769191344579, "num_tokens": 345643276.0, "step": 2063 }, { "entropy": 1.6928351819515228, "epoch": 0.22674466507374144, "grad_norm": 0.7217462658882141, "learning_rate": 1.9674654842253283e-05, "loss": 1.3029, "mean_token_accuracy": 0.6618945797284445, "num_tokens": 345780782.0, "step": 2064 }, { "entropy": 1.7282683352629344, "epoch": 0.22685452198511438, "grad_norm": 0.6660979390144348, "learning_rate": 1.967422809460861e-05, "loss": 1.4675, "mean_token_accuracy": 0.6451341956853867, "num_tokens": 345949242.0, "step": 2065 }, { "entropy": 1.6747341950734456, "epoch": 0.22696437889648732, "grad_norm": 0.632645308971405, "learning_rate": 1.9673801072428528e-05, "loss": 1.402, "mean_token_accuracy": 0.6539940188328425, "num_tokens": 346131283.0, "step": 2066 }, { "entropy": 1.823501318693161, "epoch": 0.22707423580786026, "grad_norm": 0.8174815773963928, "learning_rate": 1.967337377572655e-05, "loss": 1.5313, "mean_token_accuracy": 0.6354220509529114, "num_tokens": 346348047.0, "step": 2067 }, { "entropy": 1.7198161383469899, "epoch": 0.2271840927192332, "grad_norm": 0.6577022671699524, "learning_rate": 1.96729462045162e-05, "loss": 1.3033, "mean_token_accuracy": 0.6620743771394094, "num_tokens": 346500237.0, "step": 2068 }, { "entropy": 1.7309946616490681, "epoch": 0.22729394963060615, "grad_norm": 0.641176164150238, "learning_rate": 1.967251835881101e-05, "loss": 1.4701, "mean_token_accuracy": 0.6394395977258682, "num_tokens": 346672848.0, "step": 2069 }, { "entropy": 1.758286048968633, "epoch": 0.22740380654197906, "grad_norm": 0.7229630947113037, "learning_rate": 1.967209023862452e-05, "loss": 1.3428, "mean_token_accuracy": 0.6649421552817026, "num_tokens": 346808156.0, "step": 2070 }, { "entropy": 1.6775102814038594, "epoch": 0.227513663453352, "grad_norm": 0.6443026661872864, "learning_rate": 1.9671661843970283e-05, "loss": 1.4133, "mean_token_accuracy": 0.6403845498959223, "num_tokens": 346997057.0, "step": 2071 }, { "entropy": 1.7017942468325298, "epoch": 0.22762352036472494, "grad_norm": 0.7220076322555542, "learning_rate": 1.967123317486186e-05, "loss": 1.4647, "mean_token_accuracy": 0.6385684708754221, "num_tokens": 347201593.0, "step": 2072 }, { "entropy": 1.729232649008433, "epoch": 0.22773337727609788, "grad_norm": 0.7669538259506226, "learning_rate": 1.967080423131281e-05, "loss": 1.28, "mean_token_accuracy": 0.6778450608253479, "num_tokens": 347340620.0, "step": 2073 }, { "entropy": 1.7673300007979076, "epoch": 0.22784323418747082, "grad_norm": 0.7315675616264343, "learning_rate": 1.9670375013336716e-05, "loss": 1.3434, "mean_token_accuracy": 0.661798839767774, "num_tokens": 347537266.0, "step": 2074 }, { "entropy": 1.7187786897023518, "epoch": 0.22795309109884376, "grad_norm": 0.6596832275390625, "learning_rate": 1.966994552094716e-05, "loss": 1.372, "mean_token_accuracy": 0.6497376809517542, "num_tokens": 347677741.0, "step": 2075 }, { "entropy": 1.6955298682053883, "epoch": 0.2280629480102167, "grad_norm": 0.698900580406189, "learning_rate": 1.9669515754157732e-05, "loss": 1.3391, "mean_token_accuracy": 0.6492668986320496, "num_tokens": 347890197.0, "step": 2076 }, { "entropy": 1.7692882418632507, "epoch": 0.22817280492158962, "grad_norm": 0.669717013835907, "learning_rate": 1.9669085712982038e-05, "loss": 1.6084, "mean_token_accuracy": 0.6258754978577296, "num_tokens": 348110513.0, "step": 2077 }, { "entropy": 1.7276891966660817, "epoch": 0.22828266183296256, "grad_norm": 0.6462763547897339, "learning_rate": 1.966865539743369e-05, "loss": 1.3711, "mean_token_accuracy": 0.6671418696641922, "num_tokens": 348238587.0, "step": 2078 }, { "entropy": 1.708987295627594, "epoch": 0.2283925187443355, "grad_norm": 0.7623677253723145, "learning_rate": 1.9668224807526306e-05, "loss": 1.3025, "mean_token_accuracy": 0.6712601681550344, "num_tokens": 348356342.0, "step": 2079 }, { "entropy": 1.6536230842272441, "epoch": 0.22850237565570844, "grad_norm": 0.5909548997879028, "learning_rate": 1.9667793943273507e-05, "loss": 1.3718, "mean_token_accuracy": 0.6658143649498621, "num_tokens": 348573661.0, "step": 2080 }, { "entropy": 1.6918116807937622, "epoch": 0.22861223256708138, "grad_norm": 0.6436260938644409, "learning_rate": 1.966736280468894e-05, "loss": 1.4668, "mean_token_accuracy": 0.6581264610091845, "num_tokens": 348730332.0, "step": 2081 }, { "entropy": 1.7079047163327534, "epoch": 0.22872208947845432, "grad_norm": 0.680583119392395, "learning_rate": 1.966693139178624e-05, "loss": 1.353, "mean_token_accuracy": 0.6601024568080902, "num_tokens": 348882457.0, "step": 2082 }, { "entropy": 1.655688891808192, "epoch": 0.22883194638982726, "grad_norm": 0.6544623970985413, "learning_rate": 1.9666499704579074e-05, "loss": 1.3916, "mean_token_accuracy": 0.6687876433134079, "num_tokens": 349018470.0, "step": 2083 }, { "entropy": 1.6593633393446605, "epoch": 0.22894180330120018, "grad_norm": 0.7558723092079163, "learning_rate": 1.9666067743081094e-05, "loss": 1.2681, "mean_token_accuracy": 0.681985874970754, "num_tokens": 349155041.0, "step": 2084 }, { "entropy": 1.755085527896881, "epoch": 0.22905166021257312, "grad_norm": 0.7257434725761414, "learning_rate": 1.9665635507305975e-05, "loss": 1.368, "mean_token_accuracy": 0.6560467928647995, "num_tokens": 349303770.0, "step": 2085 }, { "entropy": 1.7510084907213848, "epoch": 0.22916151712394606, "grad_norm": 0.7767149209976196, "learning_rate": 1.9665202997267398e-05, "loss": 1.33, "mean_token_accuracy": 0.666937862833341, "num_tokens": 349421045.0, "step": 2086 }, { "entropy": 1.7155856092770894, "epoch": 0.229271374035319, "grad_norm": 0.6380376219749451, "learning_rate": 1.9664770212979048e-05, "loss": 1.3557, "mean_token_accuracy": 0.6549844940503439, "num_tokens": 349573846.0, "step": 2087 }, { "entropy": 1.7313959101835887, "epoch": 0.22938123094669194, "grad_norm": 0.7793395519256592, "learning_rate": 1.966433715445463e-05, "loss": 1.4454, "mean_token_accuracy": 0.6458447525898615, "num_tokens": 349733574.0, "step": 2088 }, { "entropy": 1.7679253121217091, "epoch": 0.22949108785806488, "grad_norm": 0.7481524348258972, "learning_rate": 1.9663903821707843e-05, "loss": 1.4977, "mean_token_accuracy": 0.646313488483429, "num_tokens": 349914696.0, "step": 2089 }, { "entropy": 1.7201211750507355, "epoch": 0.22960094476943782, "grad_norm": 0.6787753701210022, "learning_rate": 1.9663470214752404e-05, "loss": 1.2731, "mean_token_accuracy": 0.6699012964963913, "num_tokens": 350019115.0, "step": 2090 }, { "entropy": 1.744313398996989, "epoch": 0.22971080168081073, "grad_norm": 0.8407695293426514, "learning_rate": 1.966303633360204e-05, "loss": 1.5798, "mean_token_accuracy": 0.6501995325088501, "num_tokens": 350204871.0, "step": 2091 }, { "entropy": 1.66973876953125, "epoch": 0.22982065859218367, "grad_norm": 0.6919146180152893, "learning_rate": 1.9662602178270473e-05, "loss": 1.4768, "mean_token_accuracy": 0.6421976536512375, "num_tokens": 350414385.0, "step": 2092 }, { "entropy": 1.710203657547633, "epoch": 0.22993051550355662, "grad_norm": 0.6580731272697449, "learning_rate": 1.9662167748771456e-05, "loss": 1.4226, "mean_token_accuracy": 0.6441041479508082, "num_tokens": 350563611.0, "step": 2093 }, { "entropy": 1.7262790699799855, "epoch": 0.23004037241492956, "grad_norm": 0.6261684894561768, "learning_rate": 1.966173304511873e-05, "loss": 1.3752, "mean_token_accuracy": 0.6569176514943441, "num_tokens": 350733461.0, "step": 2094 }, { "entropy": 1.7107460002104442, "epoch": 0.2301502293263025, "grad_norm": 0.722224235534668, "learning_rate": 1.9661298067326057e-05, "loss": 1.4182, "mean_token_accuracy": 0.6536510388056437, "num_tokens": 350896912.0, "step": 2095 }, { "entropy": 1.7100590467453003, "epoch": 0.23026008623767544, "grad_norm": 0.9301192760467529, "learning_rate": 1.9660862815407203e-05, "loss": 1.503, "mean_token_accuracy": 0.6540075987577438, "num_tokens": 351056942.0, "step": 2096 }, { "entropy": 1.6752377649148305, "epoch": 0.23036994314904835, "grad_norm": 0.6664115190505981, "learning_rate": 1.9660427289375945e-05, "loss": 1.4077, "mean_token_accuracy": 0.6520341883103052, "num_tokens": 351293001.0, "step": 2097 }, { "entropy": 1.7409682472546895, "epoch": 0.2304798000604213, "grad_norm": 0.6675509810447693, "learning_rate": 1.965999148924606e-05, "loss": 1.5078, "mean_token_accuracy": 0.6427519768476486, "num_tokens": 351525219.0, "step": 2098 }, { "entropy": 1.7048235932985942, "epoch": 0.23058965697179423, "grad_norm": 0.6759990453720093, "learning_rate": 1.9659555415031352e-05, "loss": 1.4459, "mean_token_accuracy": 0.6511163661877314, "num_tokens": 351698289.0, "step": 2099 }, { "entropy": 1.7245367964108784, "epoch": 0.23069951388316717, "grad_norm": 0.6386352181434631, "learning_rate": 1.965911906674562e-05, "loss": 1.477, "mean_token_accuracy": 0.646999349196752, "num_tokens": 351893291.0, "step": 2100 }, { "entropy": 1.6794813175996144, "epoch": 0.23080937079454011, "grad_norm": 0.7493569850921631, "learning_rate": 1.9658682444402666e-05, "loss": 1.0951, "mean_token_accuracy": 0.6764346112807592, "num_tokens": 352074217.0, "step": 2101 }, { "entropy": 1.6989248593648274, "epoch": 0.23091922770591305, "grad_norm": 0.6547640562057495, "learning_rate": 1.9658245548016314e-05, "loss": 1.3522, "mean_token_accuracy": 0.661203866203626, "num_tokens": 352207973.0, "step": 2102 }, { "entropy": 1.7575849791367848, "epoch": 0.231029084617286, "grad_norm": 0.75108802318573, "learning_rate": 1.9657808377600395e-05, "loss": 1.3971, "mean_token_accuracy": 0.6579893529415131, "num_tokens": 352402101.0, "step": 2103 }, { "entropy": 1.6805921494960785, "epoch": 0.2311389415286589, "grad_norm": 0.6265072226524353, "learning_rate": 1.965737093316874e-05, "loss": 1.246, "mean_token_accuracy": 0.6822475343942642, "num_tokens": 352530008.0, "step": 2104 }, { "entropy": 1.7218637466430664, "epoch": 0.23124879844003185, "grad_norm": 0.7253437042236328, "learning_rate": 1.96569332147352e-05, "loss": 1.3307, "mean_token_accuracy": 0.6561064024766287, "num_tokens": 352674554.0, "step": 2105 }, { "entropy": 1.6980949739615123, "epoch": 0.2313586553514048, "grad_norm": 0.5350558757781982, "learning_rate": 1.965649522231362e-05, "loss": 1.4637, "mean_token_accuracy": 0.6409613688786825, "num_tokens": 352954153.0, "step": 2106 }, { "entropy": 1.6509924431641896, "epoch": 0.23146851226277773, "grad_norm": 0.6137299537658691, "learning_rate": 1.965605695591787e-05, "loss": 1.3385, "mean_token_accuracy": 0.6657126645247141, "num_tokens": 353113713.0, "step": 2107 }, { "entropy": 1.690576394399007, "epoch": 0.23157836917415067, "grad_norm": 0.6963815689086914, "learning_rate": 1.9655618415561816e-05, "loss": 1.3486, "mean_token_accuracy": 0.660049964984258, "num_tokens": 353263381.0, "step": 2108 }, { "entropy": 1.7236856520175934, "epoch": 0.2316882260855236, "grad_norm": 0.678913950920105, "learning_rate": 1.965517960125934e-05, "loss": 1.3219, "mean_token_accuracy": 0.6630937109390894, "num_tokens": 353390958.0, "step": 2109 }, { "entropy": 1.8335295915603638, "epoch": 0.23179808299689655, "grad_norm": 0.8892464637756348, "learning_rate": 1.965474051302433e-05, "loss": 1.4938, "mean_token_accuracy": 0.6548814475536346, "num_tokens": 353538619.0, "step": 2110 }, { "entropy": 1.7106240193049114, "epoch": 0.23190793990826947, "grad_norm": 0.7260220646858215, "learning_rate": 1.965430115087068e-05, "loss": 1.3023, "mean_token_accuracy": 0.6652000844478607, "num_tokens": 353691828.0, "step": 2111 }, { "entropy": 1.7223450640837352, "epoch": 0.2320177968196424, "grad_norm": 0.6633872389793396, "learning_rate": 1.9653861514812305e-05, "loss": 1.439, "mean_token_accuracy": 0.6730594833691915, "num_tokens": 353900581.0, "step": 2112 }, { "entropy": 1.6289326747258503, "epoch": 0.23212765373101535, "grad_norm": 0.6929929256439209, "learning_rate": 1.965342160486311e-05, "loss": 1.3896, "mean_token_accuracy": 0.6601720203955969, "num_tokens": 354106680.0, "step": 2113 }, { "entropy": 1.6527433395385742, "epoch": 0.2322375106423883, "grad_norm": 0.6912239193916321, "learning_rate": 1.9652981421037016e-05, "loss": 1.3321, "mean_token_accuracy": 0.6719396263360977, "num_tokens": 354240940.0, "step": 2114 }, { "entropy": 1.7169695695241292, "epoch": 0.23234736755376123, "grad_norm": 0.682310938835144, "learning_rate": 1.965254096334796e-05, "loss": 1.3884, "mean_token_accuracy": 0.6444130092859268, "num_tokens": 354378597.0, "step": 2115 }, { "entropy": 1.794925073782603, "epoch": 0.23245722446513417, "grad_norm": 0.8051143288612366, "learning_rate": 1.9652100231809886e-05, "loss": 1.5086, "mean_token_accuracy": 0.6416242470343908, "num_tokens": 354547066.0, "step": 2116 }, { "entropy": 1.7723517616589863, "epoch": 0.2325670813765071, "grad_norm": 0.8096024990081787, "learning_rate": 1.9651659226436736e-05, "loss": 1.4075, "mean_token_accuracy": 0.6405593703190485, "num_tokens": 354728773.0, "step": 2117 }, { "entropy": 1.7235789597034454, "epoch": 0.23267693828788003, "grad_norm": 0.7707192897796631, "learning_rate": 1.965121794724247e-05, "loss": 1.6905, "mean_token_accuracy": 0.6104727784792582, "num_tokens": 354986725.0, "step": 2118 }, { "entropy": 1.6637963851292927, "epoch": 0.23278679519925297, "grad_norm": 0.7707030177116394, "learning_rate": 1.9650776394241053e-05, "loss": 1.244, "mean_token_accuracy": 0.6805417140324911, "num_tokens": 355113452.0, "step": 2119 }, { "entropy": 1.7366061508655548, "epoch": 0.2328966521106259, "grad_norm": 0.7351778149604797, "learning_rate": 1.9650334567446464e-05, "loss": 1.2731, "mean_token_accuracy": 0.6736204822858175, "num_tokens": 355213687.0, "step": 2120 }, { "entropy": 1.7788714170455933, "epoch": 0.23300650902199885, "grad_norm": 0.7859005928039551, "learning_rate": 1.964989246687268e-05, "loss": 1.3793, "mean_token_accuracy": 0.651082048813502, "num_tokens": 355321358.0, "step": 2121 }, { "entropy": 1.737849046786626, "epoch": 0.2331163659333718, "grad_norm": 0.7442585825920105, "learning_rate": 1.96494500925337e-05, "loss": 1.311, "mean_token_accuracy": 0.6615156581004461, "num_tokens": 355425618.0, "step": 2122 }, { "entropy": 1.7283195753892262, "epoch": 0.23322622284474473, "grad_norm": 0.638728141784668, "learning_rate": 1.964900744444352e-05, "loss": 1.4032, "mean_token_accuracy": 0.6568774382273356, "num_tokens": 355597017.0, "step": 2123 }, { "entropy": 1.6358279883861542, "epoch": 0.23333607975611767, "grad_norm": 0.6525757312774658, "learning_rate": 1.9648564522616156e-05, "loss": 1.2853, "mean_token_accuracy": 0.6706103881200155, "num_tokens": 355760536.0, "step": 2124 }, { "entropy": 1.737657070159912, "epoch": 0.23344593666749058, "grad_norm": 0.6344397664070129, "learning_rate": 1.9648121327065618e-05, "loss": 1.4552, "mean_token_accuracy": 0.6427590002616247, "num_tokens": 355950982.0, "step": 2125 }, { "entropy": 1.7366611162821453, "epoch": 0.23355579357886352, "grad_norm": 0.9384574294090271, "learning_rate": 1.964767785780594e-05, "loss": 1.3528, "mean_token_accuracy": 0.6612386802832285, "num_tokens": 356072860.0, "step": 2126 }, { "entropy": 1.696535994609197, "epoch": 0.23366565049023647, "grad_norm": 0.7266330718994141, "learning_rate": 1.9647234114851152e-05, "loss": 1.275, "mean_token_accuracy": 0.6714907536904017, "num_tokens": 356205844.0, "step": 2127 }, { "entropy": 1.7079964379469554, "epoch": 0.2337755074016094, "grad_norm": 0.7104028463363647, "learning_rate": 1.9646790098215302e-05, "loss": 1.4331, "mean_token_accuracy": 0.6543066402276357, "num_tokens": 356376146.0, "step": 2128 }, { "entropy": 1.6866773664951324, "epoch": 0.23388536431298235, "grad_norm": 1.0117084980010986, "learning_rate": 1.964634580791244e-05, "loss": 1.3217, "mean_token_accuracy": 0.6756196220715841, "num_tokens": 356535528.0, "step": 2129 }, { "entropy": 1.7522582213083904, "epoch": 0.2339952212243553, "grad_norm": 0.6908559799194336, "learning_rate": 1.964590124395663e-05, "loss": 1.4413, "mean_token_accuracy": 0.6561037798722585, "num_tokens": 356697797.0, "step": 2130 }, { "entropy": 1.7344152132670085, "epoch": 0.2341050781357282, "grad_norm": 0.6250041127204895, "learning_rate": 1.9645456406361945e-05, "loss": 1.4769, "mean_token_accuracy": 0.6356958995262781, "num_tokens": 356880210.0, "step": 2131 }, { "entropy": 1.76920285820961, "epoch": 0.23421493504710114, "grad_norm": 0.7835322618484497, "learning_rate": 1.9645011295142456e-05, "loss": 1.5479, "mean_token_accuracy": 0.6402261257171631, "num_tokens": 357043606.0, "step": 2132 }, { "entropy": 1.6776104768117268, "epoch": 0.23432479195847408, "grad_norm": 0.745183527469635, "learning_rate": 1.9644565910312257e-05, "loss": 1.4785, "mean_token_accuracy": 0.6556303252776464, "num_tokens": 357235188.0, "step": 2133 }, { "entropy": 1.7570060988267262, "epoch": 0.23443464886984702, "grad_norm": 1.126607060432434, "learning_rate": 1.9644120251885442e-05, "loss": 1.4158, "mean_token_accuracy": 0.6616190771261851, "num_tokens": 357369106.0, "step": 2134 }, { "entropy": 1.7477031548817952, "epoch": 0.23454450578121996, "grad_norm": 0.5550586581230164, "learning_rate": 1.9643674319876116e-05, "loss": 1.5428, "mean_token_accuracy": 0.6342013676961263, "num_tokens": 357630109.0, "step": 2135 }, { "entropy": 1.7409445345401764, "epoch": 0.2346543626925929, "grad_norm": 0.6409521102905273, "learning_rate": 1.9643228114298394e-05, "loss": 1.3992, "mean_token_accuracy": 0.6574912518262863, "num_tokens": 357876601.0, "step": 2136 }, { "entropy": 1.755222608645757, "epoch": 0.23476421960396585, "grad_norm": 0.8085409998893738, "learning_rate": 1.9642781635166394e-05, "loss": 1.3566, "mean_token_accuracy": 0.6657391488552094, "num_tokens": 358031405.0, "step": 2137 }, { "entropy": 1.7329784830411274, "epoch": 0.23487407651533876, "grad_norm": 0.7524353265762329, "learning_rate": 1.9642334882494252e-05, "loss": 1.2204, "mean_token_accuracy": 0.6737546324729919, "num_tokens": 358140931.0, "step": 2138 }, { "entropy": 1.7543505827585857, "epoch": 0.2349839334267117, "grad_norm": 0.673270046710968, "learning_rate": 1.9641887856296103e-05, "loss": 1.4576, "mean_token_accuracy": 0.6485131829977036, "num_tokens": 358374623.0, "step": 2139 }, { "entropy": 1.7349696457386017, "epoch": 0.23509379033808464, "grad_norm": 0.6543999314308167, "learning_rate": 1.9641440556586103e-05, "loss": 1.2942, "mean_token_accuracy": 0.6671332617600759, "num_tokens": 358489422.0, "step": 2140 }, { "entropy": 1.7005629340807598, "epoch": 0.23520364724945758, "grad_norm": 0.6191766262054443, "learning_rate": 1.9640992983378396e-05, "loss": 1.4521, "mean_token_accuracy": 0.6530391176541647, "num_tokens": 358675459.0, "step": 2141 }, { "entropy": 1.7502335608005524, "epoch": 0.23531350416083052, "grad_norm": 0.7959282994270325, "learning_rate": 1.9640545136687163e-05, "loss": 1.5671, "mean_token_accuracy": 0.6383850276470184, "num_tokens": 358841502.0, "step": 2142 }, { "entropy": 1.7522724668184917, "epoch": 0.23542336107220346, "grad_norm": 0.8718724846839905, "learning_rate": 1.9640097016526562e-05, "loss": 1.5196, "mean_token_accuracy": 0.6397239714860916, "num_tokens": 359026040.0, "step": 2143 }, { "entropy": 1.7374042570590973, "epoch": 0.2355332179835764, "grad_norm": 0.6595825552940369, "learning_rate": 1.9639648622910786e-05, "loss": 1.4752, "mean_token_accuracy": 0.6529039045174917, "num_tokens": 359254305.0, "step": 2144 }, { "entropy": 1.643284171819687, "epoch": 0.23564307489494932, "grad_norm": 0.6858879327774048, "learning_rate": 1.963919995585403e-05, "loss": 1.3472, "mean_token_accuracy": 0.6681111405293146, "num_tokens": 359394696.0, "step": 2145 }, { "entropy": 1.722949246565501, "epoch": 0.23575293180632226, "grad_norm": 0.687818706035614, "learning_rate": 1.9638751015370482e-05, "loss": 1.2756, "mean_token_accuracy": 0.6662940879662832, "num_tokens": 359529099.0, "step": 2146 }, { "entropy": 1.6889635523160298, "epoch": 0.2358627887176952, "grad_norm": 0.643974781036377, "learning_rate": 1.963830180147436e-05, "loss": 1.4641, "mean_token_accuracy": 0.651663934191068, "num_tokens": 359724038.0, "step": 2147 }, { "entropy": 1.6901886363824208, "epoch": 0.23597264562906814, "grad_norm": 0.5987870097160339, "learning_rate": 1.9637852314179874e-05, "loss": 1.4741, "mean_token_accuracy": 0.6606296797593435, "num_tokens": 359880863.0, "step": 2148 }, { "entropy": 1.716208666563034, "epoch": 0.23608250254044108, "grad_norm": 0.7272748351097107, "learning_rate": 1.963740255350126e-05, "loss": 1.5086, "mean_token_accuracy": 0.6366155793269476, "num_tokens": 360047860.0, "step": 2149 }, { "entropy": 1.7702193856239319, "epoch": 0.23619235945181402, "grad_norm": 0.664681613445282, "learning_rate": 1.9636952519452744e-05, "loss": 1.3891, "mean_token_accuracy": 0.6544702003399531, "num_tokens": 360202676.0, "step": 2150 }, { "entropy": 1.7394584218660991, "epoch": 0.23630221636318696, "grad_norm": 0.6830300688743591, "learning_rate": 1.9636502212048572e-05, "loss": 1.3563, "mean_token_accuracy": 0.6578590472539266, "num_tokens": 360355267.0, "step": 2151 }, { "entropy": 1.7491403818130493, "epoch": 0.23641207327455988, "grad_norm": 0.7265613675117493, "learning_rate": 1.9636051631303e-05, "loss": 1.477, "mean_token_accuracy": 0.6517923126618067, "num_tokens": 360478966.0, "step": 2152 }, { "entropy": 1.7051890293757122, "epoch": 0.23652193018593282, "grad_norm": 0.7303532361984253, "learning_rate": 1.9635600777230282e-05, "loss": 1.3862, "mean_token_accuracy": 0.6609994073708853, "num_tokens": 360660424.0, "step": 2153 }, { "entropy": 1.7975957592328389, "epoch": 0.23663178709730576, "grad_norm": 0.5887877345085144, "learning_rate": 1.9635149649844692e-05, "loss": 1.5907, "mean_token_accuracy": 0.6157426983118057, "num_tokens": 360924995.0, "step": 2154 }, { "entropy": 1.6898219386736553, "epoch": 0.2367416440086787, "grad_norm": 0.6663623452186584, "learning_rate": 1.963469824916051e-05, "loss": 1.3366, "mean_token_accuracy": 0.6628438780705134, "num_tokens": 361077147.0, "step": 2155 }, { "entropy": 1.6904780368010204, "epoch": 0.23685150092005164, "grad_norm": 0.6058080196380615, "learning_rate": 1.9634246575192016e-05, "loss": 1.5434, "mean_token_accuracy": 0.6356114248434702, "num_tokens": 361294273.0, "step": 2156 }, { "entropy": 1.7322813769181569, "epoch": 0.23696135783142458, "grad_norm": 0.6489390730857849, "learning_rate": 1.963379462795351e-05, "loss": 1.2153, "mean_token_accuracy": 0.6865619271993637, "num_tokens": 361438046.0, "step": 2157 }, { "entropy": 1.7179724077383678, "epoch": 0.2370712147427975, "grad_norm": 0.8556140661239624, "learning_rate": 1.9633342407459293e-05, "loss": 1.4153, "mean_token_accuracy": 0.6623386641343435, "num_tokens": 361588264.0, "step": 2158 }, { "entropy": 1.728331635395686, "epoch": 0.23718107165417043, "grad_norm": 0.8043140172958374, "learning_rate": 1.963288991372368e-05, "loss": 1.4934, "mean_token_accuracy": 0.6493467340866724, "num_tokens": 361753598.0, "step": 2159 }, { "entropy": 1.732284684975942, "epoch": 0.23729092856554337, "grad_norm": 0.7125091552734375, "learning_rate": 1.963243714676099e-05, "loss": 1.4851, "mean_token_accuracy": 0.6488629430532455, "num_tokens": 361902073.0, "step": 2160 }, { "entropy": 1.78290989001592, "epoch": 0.23740078547691632, "grad_norm": 0.7010405659675598, "learning_rate": 1.9631984106585555e-05, "loss": 1.3971, "mean_token_accuracy": 0.6474642306566238, "num_tokens": 362076912.0, "step": 2161 }, { "entropy": 1.7021795014540355, "epoch": 0.23751064238828926, "grad_norm": 0.6787356734275818, "learning_rate": 1.9631530793211714e-05, "loss": 1.4077, "mean_token_accuracy": 0.648701603213946, "num_tokens": 362238050.0, "step": 2162 }, { "entropy": 1.7377806107203166, "epoch": 0.2376204992996622, "grad_norm": 0.5525624752044678, "learning_rate": 1.9631077206653813e-05, "loss": 1.3933, "mean_token_accuracy": 0.6540517012278239, "num_tokens": 362480190.0, "step": 2163 }, { "entropy": 1.7550107041994731, "epoch": 0.23773035621103514, "grad_norm": 0.7044647336006165, "learning_rate": 1.9630623346926204e-05, "loss": 1.5253, "mean_token_accuracy": 0.635568325718244, "num_tokens": 362670764.0, "step": 2164 }, { "entropy": 1.7480690081914265, "epoch": 0.23784021312240805, "grad_norm": 0.7633349895477295, "learning_rate": 1.9630169214043256e-05, "loss": 1.4851, "mean_token_accuracy": 0.6614658435185751, "num_tokens": 362833342.0, "step": 2165 }, { "entropy": 1.7279286682605743, "epoch": 0.237950070033781, "grad_norm": 0.6929425597190857, "learning_rate": 1.9629714808019346e-05, "loss": 1.3254, "mean_token_accuracy": 0.668560599287351, "num_tokens": 363009578.0, "step": 2166 }, { "entropy": 1.6972400446732838, "epoch": 0.23805992694515393, "grad_norm": 0.6299599409103394, "learning_rate": 1.9629260128868845e-05, "loss": 1.4519, "mean_token_accuracy": 0.6551623294750849, "num_tokens": 363187419.0, "step": 2167 }, { "entropy": 1.7553909023602803, "epoch": 0.23816978385652687, "grad_norm": 0.8156332969665527, "learning_rate": 1.9628805176606154e-05, "loss": 1.4541, "mean_token_accuracy": 0.647150124112765, "num_tokens": 363328352.0, "step": 2168 }, { "entropy": 1.7071846425533295, "epoch": 0.23827964076789981, "grad_norm": 0.856622040271759, "learning_rate": 1.9628349951245664e-05, "loss": 1.3319, "mean_token_accuracy": 0.6602184077103933, "num_tokens": 363489818.0, "step": 2169 }, { "entropy": 1.739296982685725, "epoch": 0.23838949767927275, "grad_norm": 0.6858103275299072, "learning_rate": 1.962789445280179e-05, "loss": 1.2651, "mean_token_accuracy": 0.6659305195013682, "num_tokens": 363619436.0, "step": 2170 }, { "entropy": 1.6668515503406525, "epoch": 0.2384993545906457, "grad_norm": 0.6311803460121155, "learning_rate": 1.962743868128894e-05, "loss": 1.4142, "mean_token_accuracy": 0.6425204674402872, "num_tokens": 363811652.0, "step": 2171 }, { "entropy": 1.7442655265331268, "epoch": 0.2386092115020186, "grad_norm": 0.6944742202758789, "learning_rate": 1.9626982636721545e-05, "loss": 1.4477, "mean_token_accuracy": 0.6392138053973516, "num_tokens": 363977169.0, "step": 2172 }, { "entropy": 1.7125125726064045, "epoch": 0.23871906841339155, "grad_norm": 0.7651383876800537, "learning_rate": 1.9626526319114036e-05, "loss": 1.3283, "mean_token_accuracy": 0.6706061810255051, "num_tokens": 364116621.0, "step": 2173 }, { "entropy": 1.7354065477848053, "epoch": 0.2388289253247645, "grad_norm": 0.6315815448760986, "learning_rate": 1.9626069728480858e-05, "loss": 1.443, "mean_token_accuracy": 0.6499165147542953, "num_tokens": 364276980.0, "step": 2174 }, { "entropy": 1.735385040442149, "epoch": 0.23893878223613743, "grad_norm": 0.7463067173957825, "learning_rate": 1.962561286483646e-05, "loss": 1.3995, "mean_token_accuracy": 0.6562738716602325, "num_tokens": 364459519.0, "step": 2175 }, { "entropy": 1.7214009960492451, "epoch": 0.23904863914751037, "grad_norm": 0.7005197405815125, "learning_rate": 1.9625155728195302e-05, "loss": 1.3058, "mean_token_accuracy": 0.666901707649231, "num_tokens": 364587858.0, "step": 2176 }, { "entropy": 1.712921271721522, "epoch": 0.2391584960588833, "grad_norm": 0.6100747585296631, "learning_rate": 1.962469831857185e-05, "loss": 1.4452, "mean_token_accuracy": 0.6609046856562296, "num_tokens": 364753196.0, "step": 2177 }, { "entropy": 1.7237416009108226, "epoch": 0.23926835297025625, "grad_norm": 0.7957034111022949, "learning_rate": 1.9624240635980584e-05, "loss": 1.2262, "mean_token_accuracy": 0.674635981520017, "num_tokens": 364910423.0, "step": 2178 }, { "entropy": 1.700117399295171, "epoch": 0.23937820988162917, "grad_norm": 0.6685667037963867, "learning_rate": 1.9623782680435987e-05, "loss": 1.5193, "mean_token_accuracy": 0.6441571215788523, "num_tokens": 365077021.0, "step": 2179 }, { "entropy": 1.7847689390182495, "epoch": 0.2394880667930021, "grad_norm": 0.7147764563560486, "learning_rate": 1.9623324451952553e-05, "loss": 1.5083, "mean_token_accuracy": 0.6502701590458552, "num_tokens": 365244591.0, "step": 2180 }, { "entropy": 1.7398345371087391, "epoch": 0.23959792370437505, "grad_norm": 0.8563188910484314, "learning_rate": 1.962286595054479e-05, "loss": 1.3004, "mean_token_accuracy": 0.6682943751414617, "num_tokens": 365376513.0, "step": 2181 }, { "entropy": 1.7507529258728027, "epoch": 0.239707780615748, "grad_norm": 0.693526566028595, "learning_rate": 1.9622407176227203e-05, "loss": 1.2758, "mean_token_accuracy": 0.6713242183128992, "num_tokens": 365498732.0, "step": 2182 }, { "entropy": 1.7492507894833882, "epoch": 0.23981763752712093, "grad_norm": 0.6851478219032288, "learning_rate": 1.9621948129014313e-05, "loss": 1.4017, "mean_token_accuracy": 0.6537040372689565, "num_tokens": 365646829.0, "step": 2183 }, { "entropy": 1.6749079823493958, "epoch": 0.23992749443849387, "grad_norm": 0.6201784610748291, "learning_rate": 1.962148880892065e-05, "loss": 1.283, "mean_token_accuracy": 0.6736765950918198, "num_tokens": 365806583.0, "step": 2184 }, { "entropy": 1.709314078092575, "epoch": 0.2400373513498668, "grad_norm": 0.758945643901825, "learning_rate": 1.9621029215960754e-05, "loss": 1.4324, "mean_token_accuracy": 0.6545686274766922, "num_tokens": 365961748.0, "step": 2185 }, { "entropy": 1.6410066386063893, "epoch": 0.24014720826123973, "grad_norm": 0.5668028593063354, "learning_rate": 1.9620569350149165e-05, "loss": 1.4312, "mean_token_accuracy": 0.6502448171377182, "num_tokens": 366235040.0, "step": 2186 }, { "entropy": 1.6869204839070637, "epoch": 0.24025706517261267, "grad_norm": 0.648259162902832, "learning_rate": 1.962010921150044e-05, "loss": 1.4071, "mean_token_accuracy": 0.653958131869634, "num_tokens": 366404008.0, "step": 2187 }, { "entropy": 1.762471745411555, "epoch": 0.2403669220839856, "grad_norm": 0.730883002281189, "learning_rate": 1.9619648800029147e-05, "loss": 1.507, "mean_token_accuracy": 0.6531516114870707, "num_tokens": 366593272.0, "step": 2188 }, { "entropy": 1.7137305339177449, "epoch": 0.24047677899535855, "grad_norm": 0.6646022796630859, "learning_rate": 1.961918811574985e-05, "loss": 1.3821, "mean_token_accuracy": 0.6573305775721868, "num_tokens": 366745378.0, "step": 2189 }, { "entropy": 1.724080502986908, "epoch": 0.2405866359067315, "grad_norm": 0.7894087433815002, "learning_rate": 1.9618727158677135e-05, "loss": 1.2611, "mean_token_accuracy": 0.6693530778090159, "num_tokens": 366856467.0, "step": 2190 }, { "entropy": 1.6957077880700429, "epoch": 0.24069649281810443, "grad_norm": 0.7428924441337585, "learning_rate": 1.9618265928825585e-05, "loss": 1.478, "mean_token_accuracy": 0.6377401451269785, "num_tokens": 367087055.0, "step": 2191 }, { "entropy": 1.7296242912610371, "epoch": 0.24080634972947734, "grad_norm": 0.820216178894043, "learning_rate": 1.9617804426209806e-05, "loss": 1.4666, "mean_token_accuracy": 0.6516137719154358, "num_tokens": 367269105.0, "step": 2192 }, { "entropy": 1.7189955015977223, "epoch": 0.24091620664085028, "grad_norm": 0.6569713950157166, "learning_rate": 1.96173426508444e-05, "loss": 1.3206, "mean_token_accuracy": 0.6575327118237814, "num_tokens": 367475382.0, "step": 2193 }, { "entropy": 1.6480527619520824, "epoch": 0.24102606355222322, "grad_norm": 0.6472831964492798, "learning_rate": 1.961688060274398e-05, "loss": 1.3378, "mean_token_accuracy": 0.6720782270034155, "num_tokens": 367684314.0, "step": 2194 }, { "entropy": 1.7488195300102234, "epoch": 0.24113592046359617, "grad_norm": 0.6956831812858582, "learning_rate": 1.9616418281923173e-05, "loss": 1.3187, "mean_token_accuracy": 0.6687667121489843, "num_tokens": 367816401.0, "step": 2195 }, { "entropy": 1.7608485917250316, "epoch": 0.2412457773749691, "grad_norm": 0.7031329274177551, "learning_rate": 1.9615955688396612e-05, "loss": 1.3447, "mean_token_accuracy": 0.6478245705366135, "num_tokens": 367957070.0, "step": 2196 }, { "entropy": 1.6865754624207814, "epoch": 0.24135563428634205, "grad_norm": 0.604003369808197, "learning_rate": 1.961549282217893e-05, "loss": 1.4346, "mean_token_accuracy": 0.650801420211792, "num_tokens": 368155745.0, "step": 2197 }, { "entropy": 1.7110398511091869, "epoch": 0.241465491197715, "grad_norm": 0.6684320569038391, "learning_rate": 1.961502968328479e-05, "loss": 1.3022, "mean_token_accuracy": 0.6626110126574835, "num_tokens": 368276371.0, "step": 2198 }, { "entropy": 1.7122747302055359, "epoch": 0.2415753481090879, "grad_norm": 0.6755695343017578, "learning_rate": 1.9614566271728837e-05, "loss": 1.3599, "mean_token_accuracy": 0.648558442791303, "num_tokens": 368465965.0, "step": 2199 }, { "entropy": 1.8012695610523224, "epoch": 0.24168520502046084, "grad_norm": 0.684968888759613, "learning_rate": 1.9614102587525747e-05, "loss": 1.4568, "mean_token_accuracy": 0.6433351039886475, "num_tokens": 368636338.0, "step": 2200 }, { "entropy": 1.7449529965718586, "epoch": 0.24179506193183378, "grad_norm": 0.7659276127815247, "learning_rate": 1.961363863069019e-05, "loss": 1.5253, "mean_token_accuracy": 0.633417159318924, "num_tokens": 368846134.0, "step": 2201 }, { "entropy": 1.6875906387964885, "epoch": 0.24190491884320672, "grad_norm": 0.5927108526229858, "learning_rate": 1.9613174401236854e-05, "loss": 1.3356, "mean_token_accuracy": 0.6597074568271637, "num_tokens": 369033966.0, "step": 2202 }, { "entropy": 1.6861758530139923, "epoch": 0.24201477575457966, "grad_norm": 0.6627983450889587, "learning_rate": 1.9612709899180426e-05, "loss": 1.2911, "mean_token_accuracy": 0.6747742146253586, "num_tokens": 369165869.0, "step": 2203 }, { "entropy": 1.6657120088736217, "epoch": 0.2421246326659526, "grad_norm": 0.6609204411506653, "learning_rate": 1.961224512453561e-05, "loss": 1.3563, "mean_token_accuracy": 0.6647985180219015, "num_tokens": 369339600.0, "step": 2204 }, { "entropy": 1.7627909282843273, "epoch": 0.24223448957732555, "grad_norm": 0.6282365322113037, "learning_rate": 1.961178007731712e-05, "loss": 1.4089, "mean_token_accuracy": 0.6540078123410543, "num_tokens": 369538959.0, "step": 2205 }, { "entropy": 1.722537229458491, "epoch": 0.24234434648869846, "grad_norm": 0.7731828093528748, "learning_rate": 1.961131475753967e-05, "loss": 1.4734, "mean_token_accuracy": 0.648734783132871, "num_tokens": 369681113.0, "step": 2206 }, { "entropy": 1.7212282319863637, "epoch": 0.2424542034000714, "grad_norm": 0.8537726402282715, "learning_rate": 1.9610849165217987e-05, "loss": 1.4472, "mean_token_accuracy": 0.6453748544057211, "num_tokens": 369894724.0, "step": 2207 }, { "entropy": 1.6924510598182678, "epoch": 0.24256406031144434, "grad_norm": 0.6755207777023315, "learning_rate": 1.9610383300366805e-05, "loss": 1.4163, "mean_token_accuracy": 0.6647496223449707, "num_tokens": 370034227.0, "step": 2208 }, { "entropy": 1.649043579896291, "epoch": 0.24267391722281728, "grad_norm": 0.7188857793807983, "learning_rate": 1.960991716300088e-05, "loss": 1.3476, "mean_token_accuracy": 0.6680084963639578, "num_tokens": 370255912.0, "step": 2209 }, { "entropy": 1.6831135253111522, "epoch": 0.24278377413419022, "grad_norm": 0.6619141101837158, "learning_rate": 1.960945075313495e-05, "loss": 1.4331, "mean_token_accuracy": 0.652848685781161, "num_tokens": 370441362.0, "step": 2210 }, { "entropy": 1.7197281420230865, "epoch": 0.24289363104556316, "grad_norm": 0.710666298866272, "learning_rate": 1.9608984070783783e-05, "loss": 1.3113, "mean_token_accuracy": 0.6746131976445516, "num_tokens": 370578910.0, "step": 2211 }, { "entropy": 1.6992753148078918, "epoch": 0.2430034879569361, "grad_norm": 0.6412252187728882, "learning_rate": 1.9608517115962155e-05, "loss": 1.4755, "mean_token_accuracy": 0.6443726023038229, "num_tokens": 370759506.0, "step": 2212 }, { "entropy": 1.7628650764624278, "epoch": 0.24311334486830902, "grad_norm": 0.8076620101928711, "learning_rate": 1.9608049888684834e-05, "loss": 1.369, "mean_token_accuracy": 0.6632248312234879, "num_tokens": 370888869.0, "step": 2213 }, { "entropy": 1.7496830423672993, "epoch": 0.24322320177968196, "grad_norm": 0.6189358234405518, "learning_rate": 1.9607582388966616e-05, "loss": 1.3547, "mean_token_accuracy": 0.6613173534472784, "num_tokens": 371046402.0, "step": 2214 }, { "entropy": 1.7203446328639984, "epoch": 0.2433330586910549, "grad_norm": 0.6774999499320984, "learning_rate": 1.960711461682229e-05, "loss": 1.411, "mean_token_accuracy": 0.6542786955833435, "num_tokens": 371304026.0, "step": 2215 }, { "entropy": 1.6830947597821553, "epoch": 0.24344291560242784, "grad_norm": 0.6460142731666565, "learning_rate": 1.960664657226667e-05, "loss": 1.3369, "mean_token_accuracy": 0.6676846394936243, "num_tokens": 371470000.0, "step": 2216 }, { "entropy": 1.7166868448257446, "epoch": 0.24355277251380078, "grad_norm": 0.6478844285011292, "learning_rate": 1.960617825531456e-05, "loss": 1.2987, "mean_token_accuracy": 0.6614114989837011, "num_tokens": 371579838.0, "step": 2217 }, { "entropy": 1.7828513085842133, "epoch": 0.24366262942517372, "grad_norm": 1.131135106086731, "learning_rate": 1.960570966598079e-05, "loss": 1.5558, "mean_token_accuracy": 0.6534903893868128, "num_tokens": 371739465.0, "step": 2218 }, { "entropy": 1.728717068831126, "epoch": 0.24377248633654663, "grad_norm": 0.6150972247123718, "learning_rate": 1.9605240804280185e-05, "loss": 1.4331, "mean_token_accuracy": 0.6512966354688009, "num_tokens": 371937968.0, "step": 2219 }, { "entropy": 1.7227512498696644, "epoch": 0.24388234324791958, "grad_norm": 0.7741029262542725, "learning_rate": 1.9604771670227586e-05, "loss": 1.5728, "mean_token_accuracy": 0.6268777251243591, "num_tokens": 372118632.0, "step": 2220 }, { "entropy": 1.7896570165952046, "epoch": 0.24399220015929252, "grad_norm": 0.6654759049415588, "learning_rate": 1.960430226383784e-05, "loss": 1.4358, "mean_token_accuracy": 0.6406222383181254, "num_tokens": 372328145.0, "step": 2221 }, { "entropy": 1.7172939280668895, "epoch": 0.24410205707066546, "grad_norm": 0.759647011756897, "learning_rate": 1.9603832585125807e-05, "loss": 1.472, "mean_token_accuracy": 0.6392781734466553, "num_tokens": 372540925.0, "step": 2222 }, { "entropy": 1.7888973255952199, "epoch": 0.2442119139820384, "grad_norm": 0.7657260298728943, "learning_rate": 1.960336263410635e-05, "loss": 1.3073, "mean_token_accuracy": 0.671954408288002, "num_tokens": 372674433.0, "step": 2223 }, { "entropy": 1.6941000918547313, "epoch": 0.24432177089341134, "grad_norm": 0.7540859580039978, "learning_rate": 1.960289241079434e-05, "loss": 1.3743, "mean_token_accuracy": 0.6615285774072012, "num_tokens": 372864530.0, "step": 2224 }, { "entropy": 1.7387097477912903, "epoch": 0.24443162780478428, "grad_norm": 0.629091739654541, "learning_rate": 1.960242191520466e-05, "loss": 1.4295, "mean_token_accuracy": 0.6414004961649576, "num_tokens": 373026958.0, "step": 2225 }, { "entropy": 1.6648336052894592, "epoch": 0.2445414847161572, "grad_norm": 0.5851943492889404, "learning_rate": 1.960195114735221e-05, "loss": 1.4927, "mean_token_accuracy": 0.6401997953653336, "num_tokens": 373255586.0, "step": 2226 }, { "entropy": 1.6632899244626362, "epoch": 0.24465134162753013, "grad_norm": 0.5614564418792725, "learning_rate": 1.9601480107251875e-05, "loss": 1.3345, "mean_token_accuracy": 0.6726627051830292, "num_tokens": 373415841.0, "step": 2227 }, { "entropy": 1.7338934938112895, "epoch": 0.24476119853890307, "grad_norm": 0.6574128866195679, "learning_rate": 1.960100879491857e-05, "loss": 1.4711, "mean_token_accuracy": 0.6444676717122396, "num_tokens": 373600936.0, "step": 2228 }, { "entropy": 1.7459152539571126, "epoch": 0.24487105545027602, "grad_norm": 0.7730247974395752, "learning_rate": 1.960053721036722e-05, "loss": 1.3288, "mean_token_accuracy": 0.6706405679384867, "num_tokens": 373739586.0, "step": 2229 }, { "entropy": 1.7264153758684795, "epoch": 0.24498091236164896, "grad_norm": 0.6619213819503784, "learning_rate": 1.9600065353612735e-05, "loss": 1.5545, "mean_token_accuracy": 0.6263764947652817, "num_tokens": 373943250.0, "step": 2230 }, { "entropy": 1.7214798033237457, "epoch": 0.2450907692730219, "grad_norm": 0.7965298295021057, "learning_rate": 1.959959322467006e-05, "loss": 1.5338, "mean_token_accuracy": 0.6338108479976654, "num_tokens": 374145099.0, "step": 2231 }, { "entropy": 1.7392083803812664, "epoch": 0.24520062618439484, "grad_norm": 0.6499489545822144, "learning_rate": 1.9599120823554137e-05, "loss": 1.3013, "mean_token_accuracy": 0.6649557749430338, "num_tokens": 374267307.0, "step": 2232 }, { "entropy": 1.6417719821135204, "epoch": 0.24531048309576775, "grad_norm": 0.6339417099952698, "learning_rate": 1.959864815027991e-05, "loss": 1.4139, "mean_token_accuracy": 0.6582342187563578, "num_tokens": 374443719.0, "step": 2233 }, { "entropy": 1.7121461629867554, "epoch": 0.2454203400071407, "grad_norm": 0.6677350997924805, "learning_rate": 1.9598175204862348e-05, "loss": 1.3063, "mean_token_accuracy": 0.6706344981988271, "num_tokens": 374593607.0, "step": 2234 }, { "entropy": 1.7524688243865967, "epoch": 0.24553019691851363, "grad_norm": 0.7025663256645203, "learning_rate": 1.959770198731641e-05, "loss": 1.4846, "mean_token_accuracy": 0.6432475497325262, "num_tokens": 374766690.0, "step": 2235 }, { "entropy": 1.7159065902233124, "epoch": 0.24564005382988657, "grad_norm": 0.7714294791221619, "learning_rate": 1.9597228497657084e-05, "loss": 1.3186, "mean_token_accuracy": 0.6638337969779968, "num_tokens": 374890263.0, "step": 2236 }, { "entropy": 1.722405840953191, "epoch": 0.24574991074125951, "grad_norm": 0.863430380821228, "learning_rate": 1.9596754735899347e-05, "loss": 1.2672, "mean_token_accuracy": 0.669483408331871, "num_tokens": 375022168.0, "step": 2237 }, { "entropy": 1.7640716234842937, "epoch": 0.24585976765263245, "grad_norm": 0.790648877620697, "learning_rate": 1.95962807020582e-05, "loss": 1.5312, "mean_token_accuracy": 0.6371087779601415, "num_tokens": 375212695.0, "step": 2238 }, { "entropy": 1.6626634697119396, "epoch": 0.2459696245640054, "grad_norm": 0.6195372343063354, "learning_rate": 1.959580639614864e-05, "loss": 1.5048, "mean_token_accuracy": 0.631678581237793, "num_tokens": 375458846.0, "step": 2239 }, { "entropy": 1.7087614436944325, "epoch": 0.2460794814753783, "grad_norm": 0.7846135497093201, "learning_rate": 1.959533181818568e-05, "loss": 1.3327, "mean_token_accuracy": 0.6647358934084574, "num_tokens": 375608137.0, "step": 2240 }, { "entropy": 1.6494195957978566, "epoch": 0.24618933838675125, "grad_norm": 0.9152674674987793, "learning_rate": 1.9594856968184338e-05, "loss": 1.4468, "mean_token_accuracy": 0.664206475019455, "num_tokens": 375798556.0, "step": 2241 }, { "entropy": 1.7339473962783813, "epoch": 0.2462991952981242, "grad_norm": 0.7858838438987732, "learning_rate": 1.959438184615965e-05, "loss": 1.4387, "mean_token_accuracy": 0.660494844118754, "num_tokens": 375947922.0, "step": 2242 }, { "entropy": 1.7143746713797252, "epoch": 0.24640905220949713, "grad_norm": 0.6970986127853394, "learning_rate": 1.9593906452126646e-05, "loss": 1.6092, "mean_token_accuracy": 0.6282972743113836, "num_tokens": 376216590.0, "step": 2243 }, { "entropy": 1.6953730583190918, "epoch": 0.24651890912087007, "grad_norm": 0.6785817742347717, "learning_rate": 1.9593430786100382e-05, "loss": 1.2938, "mean_token_accuracy": 0.6653183003266653, "num_tokens": 376336959.0, "step": 2244 }, { "entropy": 1.722437173128128, "epoch": 0.246628766032243, "grad_norm": 0.6590582132339478, "learning_rate": 1.9592954848095904e-05, "loss": 1.65, "mean_token_accuracy": 0.6332317143678665, "num_tokens": 376538380.0, "step": 2245 }, { "entropy": 1.721213052670161, "epoch": 0.24673862294361595, "grad_norm": 0.8105208277702332, "learning_rate": 1.9592478638128272e-05, "loss": 1.3964, "mean_token_accuracy": 0.6566237409909567, "num_tokens": 376704949.0, "step": 2246 }, { "entropy": 1.7157046496868134, "epoch": 0.24684847985498887, "grad_norm": 0.6640613675117493, "learning_rate": 1.9592002156212568e-05, "loss": 1.3865, "mean_token_accuracy": 0.6623821159203848, "num_tokens": 376867333.0, "step": 2247 }, { "entropy": 1.6208436489105225, "epoch": 0.2469583367663618, "grad_norm": 0.6282928586006165, "learning_rate": 1.9591525402363864e-05, "loss": 1.309, "mean_token_accuracy": 0.67738905052344, "num_tokens": 377061118.0, "step": 2248 }, { "entropy": 1.7082207401593525, "epoch": 0.24706819367773475, "grad_norm": 0.6272217035293579, "learning_rate": 1.9591048376597253e-05, "loss": 1.5739, "mean_token_accuracy": 0.636579230427742, "num_tokens": 377247432.0, "step": 2249 }, { "entropy": 1.7537776231765747, "epoch": 0.2471780505891077, "grad_norm": 0.7704234719276428, "learning_rate": 1.959057107892783e-05, "loss": 1.6269, "mean_token_accuracy": 0.626529390613238, "num_tokens": 377454531.0, "step": 2250 }, { "entropy": 1.681376536687215, "epoch": 0.24728790750048063, "grad_norm": 0.65434330701828, "learning_rate": 1.9590093509370708e-05, "loss": 1.3295, "mean_token_accuracy": 0.6573110024134318, "num_tokens": 377664696.0, "step": 2251 }, { "entropy": 1.69319083293279, "epoch": 0.24739776441185357, "grad_norm": 0.7122017741203308, "learning_rate": 1.9589615667940994e-05, "loss": 1.48, "mean_token_accuracy": 0.645169585943222, "num_tokens": 377844199.0, "step": 2252 }, { "entropy": 1.7546374201774597, "epoch": 0.24750762132322648, "grad_norm": 0.6402483582496643, "learning_rate": 1.958913755465382e-05, "loss": 1.3988, "mean_token_accuracy": 0.6497417340675989, "num_tokens": 378018473.0, "step": 2253 }, { "entropy": 1.82759756843249, "epoch": 0.24761747823459943, "grad_norm": 0.7866818308830261, "learning_rate": 1.958865916952431e-05, "loss": 1.6695, "mean_token_accuracy": 0.6300811717907587, "num_tokens": 378212776.0, "step": 2254 }, { "entropy": 1.7436250348885853, "epoch": 0.24772733514597237, "grad_norm": 0.6628825068473816, "learning_rate": 1.9588180512567604e-05, "loss": 1.4227, "mean_token_accuracy": 0.6468610117832819, "num_tokens": 378394493.0, "step": 2255 }, { "entropy": 1.7446848253409069, "epoch": 0.2478371920573453, "grad_norm": 0.6798176765441895, "learning_rate": 1.958770158379886e-05, "loss": 1.4955, "mean_token_accuracy": 0.6422973871231079, "num_tokens": 378575118.0, "step": 2256 }, { "entropy": 1.7074936429659526, "epoch": 0.24794704896871825, "grad_norm": 1.042772650718689, "learning_rate": 1.9587222383233228e-05, "loss": 1.5275, "mean_token_accuracy": 0.6391404122114182, "num_tokens": 378807469.0, "step": 2257 }, { "entropy": 1.72696053981781, "epoch": 0.2480569058800912, "grad_norm": 0.766135573387146, "learning_rate": 1.9586742910885874e-05, "loss": 1.4886, "mean_token_accuracy": 0.6411213676134745, "num_tokens": 378956471.0, "step": 2258 }, { "entropy": 1.6875308553377788, "epoch": 0.24816676279146413, "grad_norm": 0.6208860874176025, "learning_rate": 1.9586263166771976e-05, "loss": 1.3516, "mean_token_accuracy": 0.6571200539668401, "num_tokens": 379099761.0, "step": 2259 }, { "entropy": 1.6892028748989105, "epoch": 0.24827661970283704, "grad_norm": 0.871165931224823, "learning_rate": 1.958578315090672e-05, "loss": 1.5336, "mean_token_accuracy": 0.6406076997518539, "num_tokens": 379257672.0, "step": 2260 }, { "entropy": 1.7612777749697368, "epoch": 0.24838647661420998, "grad_norm": 0.7290350198745728, "learning_rate": 1.95853028633053e-05, "loss": 1.3248, "mean_token_accuracy": 0.6579936047395071, "num_tokens": 379388678.0, "step": 2261 }, { "entropy": 1.7003148396809895, "epoch": 0.24849633352558292, "grad_norm": 0.652847170829773, "learning_rate": 1.958482230398291e-05, "loss": 1.3746, "mean_token_accuracy": 0.6677578836679459, "num_tokens": 379561397.0, "step": 2262 }, { "entropy": 1.7450473109881084, "epoch": 0.24860619043695587, "grad_norm": 0.6660063862800598, "learning_rate": 1.958434147295476e-05, "loss": 1.4158, "mean_token_accuracy": 0.6531000037988027, "num_tokens": 379754068.0, "step": 2263 }, { "entropy": 1.738626629114151, "epoch": 0.2487160473483288, "grad_norm": 0.6657839417457581, "learning_rate": 1.9583860370236073e-05, "loss": 1.401, "mean_token_accuracy": 0.6500038256247839, "num_tokens": 379929662.0, "step": 2264 }, { "entropy": 1.6692744890848796, "epoch": 0.24882590425970175, "grad_norm": 0.5950648188591003, "learning_rate": 1.9583378995842073e-05, "loss": 1.359, "mean_token_accuracy": 0.6600983838240305, "num_tokens": 380102552.0, "step": 2265 }, { "entropy": 1.6895807385444641, "epoch": 0.2489357611710747, "grad_norm": 0.6088550686836243, "learning_rate": 1.9582897349788e-05, "loss": 1.49, "mean_token_accuracy": 0.6435723503430685, "num_tokens": 380318259.0, "step": 2266 }, { "entropy": 1.660117010275523, "epoch": 0.2490456180824476, "grad_norm": 0.5880101919174194, "learning_rate": 1.9582415432089086e-05, "loss": 1.3985, "mean_token_accuracy": 0.6589946647485098, "num_tokens": 380484218.0, "step": 2267 }, { "entropy": 1.7047088046868641, "epoch": 0.24915547499382054, "grad_norm": 0.7071229815483093, "learning_rate": 1.9581933242760595e-05, "loss": 1.5435, "mean_token_accuracy": 0.6445372601350149, "num_tokens": 380657830.0, "step": 2268 }, { "entropy": 1.6842391391595204, "epoch": 0.24926533190519348, "grad_norm": 0.6728826761245728, "learning_rate": 1.9581450781817782e-05, "loss": 1.4207, "mean_token_accuracy": 0.6447325150171915, "num_tokens": 380870392.0, "step": 2269 }, { "entropy": 1.7044761975606282, "epoch": 0.24937518881656642, "grad_norm": 0.6908706426620483, "learning_rate": 1.9580968049275918e-05, "loss": 1.379, "mean_token_accuracy": 0.6554999053478241, "num_tokens": 381045560.0, "step": 2270 }, { "entropy": 1.7423875729242961, "epoch": 0.24948504572793936, "grad_norm": 0.6786364912986755, "learning_rate": 1.9580485045150284e-05, "loss": 1.3842, "mean_token_accuracy": 0.6539370367924372, "num_tokens": 381184792.0, "step": 2271 }, { "entropy": 1.7371763586997986, "epoch": 0.2495949026393123, "grad_norm": 0.6156378984451294, "learning_rate": 1.9580001769456166e-05, "loss": 1.5311, "mean_token_accuracy": 0.6272181322177252, "num_tokens": 381470099.0, "step": 2272 }, { "entropy": 1.7165015836556752, "epoch": 0.24970475955068525, "grad_norm": 0.6726257801055908, "learning_rate": 1.9579518222208855e-05, "loss": 1.3107, "mean_token_accuracy": 0.6664568881193796, "num_tokens": 381608195.0, "step": 2273 }, { "entropy": 1.7067488332589467, "epoch": 0.24981461646205816, "grad_norm": 0.7491520643234253, "learning_rate": 1.957903440342366e-05, "loss": 1.5999, "mean_token_accuracy": 0.6128611117601395, "num_tokens": 381845892.0, "step": 2274 }, { "entropy": 1.742664744456609, "epoch": 0.2499244733734311, "grad_norm": 0.6049597263336182, "learning_rate": 1.9578550313115892e-05, "loss": 1.3704, "mean_token_accuracy": 0.6534209748109182, "num_tokens": 382016391.0, "step": 2275 }, { "entropy": 1.6452626784642537, "epoch": 0.25003433028480404, "grad_norm": 0.6422023773193359, "learning_rate": 1.9578065951300873e-05, "loss": 1.4084, "mean_token_accuracy": 0.6613962203264236, "num_tokens": 382212478.0, "step": 2276 }, { "entropy": 1.6521940728028615, "epoch": 0.250144187196177, "grad_norm": 0.816700279712677, "learning_rate": 1.957758131799393e-05, "loss": 1.3941, "mean_token_accuracy": 0.6591680943965912, "num_tokens": 382412188.0, "step": 2277 }, { "entropy": 1.6978717148303986, "epoch": 0.2502540441075499, "grad_norm": 0.6817605495452881, "learning_rate": 1.9577096413210405e-05, "loss": 1.4719, "mean_token_accuracy": 0.6562831451495489, "num_tokens": 382571361.0, "step": 2278 }, { "entropy": 1.7319901784261067, "epoch": 0.25036390101892286, "grad_norm": 0.9123170971870422, "learning_rate": 1.9576611236965644e-05, "loss": 1.3404, "mean_token_accuracy": 0.6749412715435028, "num_tokens": 382719176.0, "step": 2279 }, { "entropy": 1.680397629737854, "epoch": 0.2504737579302958, "grad_norm": 0.6984215974807739, "learning_rate": 1.9576125789275e-05, "loss": 1.3374, "mean_token_accuracy": 0.6687712669372559, "num_tokens": 382873172.0, "step": 2280 }, { "entropy": 1.7036446233590443, "epoch": 0.25058361484166874, "grad_norm": 0.5962554216384888, "learning_rate": 1.957564007015384e-05, "loss": 1.464, "mean_token_accuracy": 0.6451049596071243, "num_tokens": 383057241.0, "step": 2281 }, { "entropy": 1.771241287390391, "epoch": 0.2506934717530417, "grad_norm": 0.7492452263832092, "learning_rate": 1.9575154079617535e-05, "loss": 1.3027, "mean_token_accuracy": 0.6861212154229482, "num_tokens": 383177887.0, "step": 2282 }, { "entropy": 1.6081528663635254, "epoch": 0.2508033286644146, "grad_norm": 0.6222009062767029, "learning_rate": 1.957466781768147e-05, "loss": 1.3991, "mean_token_accuracy": 0.6638480375210444, "num_tokens": 383382040.0, "step": 2283 }, { "entropy": 1.736664613087972, "epoch": 0.2509131855757875, "grad_norm": 0.7736158967018127, "learning_rate": 1.957418128436103e-05, "loss": 1.3175, "mean_token_accuracy": 0.672918826341629, "num_tokens": 383497583.0, "step": 2284 }, { "entropy": 1.7848374644915264, "epoch": 0.25102304248716045, "grad_norm": 0.70754075050354, "learning_rate": 1.957369447967162e-05, "loss": 1.3359, "mean_token_accuracy": 0.6519061873356501, "num_tokens": 383651660.0, "step": 2285 }, { "entropy": 1.7001720269521077, "epoch": 0.2511328993985334, "grad_norm": 0.6765170693397522, "learning_rate": 1.9573207403628638e-05, "loss": 1.2694, "mean_token_accuracy": 0.6806812932093939, "num_tokens": 383782790.0, "step": 2286 }, { "entropy": 1.723912199338277, "epoch": 0.25124275630990633, "grad_norm": 0.6241678595542908, "learning_rate": 1.957272005624751e-05, "loss": 1.4251, "mean_token_accuracy": 0.6483699729045233, "num_tokens": 383980808.0, "step": 2287 }, { "entropy": 1.7109817663828533, "epoch": 0.2513526132212793, "grad_norm": 0.6277598142623901, "learning_rate": 1.957223243754365e-05, "loss": 1.3538, "mean_token_accuracy": 0.6669852336247762, "num_tokens": 384136251.0, "step": 2288 }, { "entropy": 1.7253372172514598, "epoch": 0.2514624701326522, "grad_norm": 0.6478981375694275, "learning_rate": 1.95717445475325e-05, "loss": 1.3742, "mean_token_accuracy": 0.6614843010902405, "num_tokens": 384350465.0, "step": 2289 }, { "entropy": 1.7166267931461334, "epoch": 0.25157232704402516, "grad_norm": 0.8400107026100159, "learning_rate": 1.9571256386229494e-05, "loss": 1.3264, "mean_token_accuracy": 0.6640830139319102, "num_tokens": 384468885.0, "step": 2290 }, { "entropy": 1.663007269303004, "epoch": 0.2516821839553981, "grad_norm": 0.6767208576202393, "learning_rate": 1.9570767953650088e-05, "loss": 1.4499, "mean_token_accuracy": 0.6597578575213751, "num_tokens": 384655353.0, "step": 2291 }, { "entropy": 1.7039824028809865, "epoch": 0.25179204086677104, "grad_norm": 0.6444749236106873, "learning_rate": 1.957027924980974e-05, "loss": 1.2652, "mean_token_accuracy": 0.6771525144577026, "num_tokens": 384792003.0, "step": 2292 }, { "entropy": 1.7554666300614674, "epoch": 0.251901897778144, "grad_norm": 0.6999207139015198, "learning_rate": 1.956979027472391e-05, "loss": 1.4989, "mean_token_accuracy": 0.6585345417261124, "num_tokens": 384962519.0, "step": 2293 }, { "entropy": 1.7081598440806072, "epoch": 0.2520117546895169, "grad_norm": 0.6971555352210999, "learning_rate": 1.9569301028408084e-05, "loss": 1.5249, "mean_token_accuracy": 0.6455909609794617, "num_tokens": 385179673.0, "step": 2294 }, { "entropy": 1.7678824067115784, "epoch": 0.25212161160088986, "grad_norm": 0.6623879075050354, "learning_rate": 1.9568811510877742e-05, "loss": 1.461, "mean_token_accuracy": 0.6422907660404841, "num_tokens": 385361347.0, "step": 2295 }, { "entropy": 1.7030884822209675, "epoch": 0.2522314685122628, "grad_norm": 0.6273277401924133, "learning_rate": 1.9568321722148376e-05, "loss": 1.3993, "mean_token_accuracy": 0.6431881437699, "num_tokens": 385552421.0, "step": 2296 }, { "entropy": 1.710710922876994, "epoch": 0.2523413254236357, "grad_norm": 0.633685290813446, "learning_rate": 1.9567831662235485e-05, "loss": 1.4126, "mean_token_accuracy": 0.6471965213616689, "num_tokens": 385726239.0, "step": 2297 }, { "entropy": 1.7225467264652252, "epoch": 0.25245118233500863, "grad_norm": 0.6174577474594116, "learning_rate": 1.956734133115459e-05, "loss": 1.5379, "mean_token_accuracy": 0.618008534113566, "num_tokens": 386036858.0, "step": 2298 }, { "entropy": 1.6922851900259654, "epoch": 0.25256103924638157, "grad_norm": 0.6310000419616699, "learning_rate": 1.9566850728921196e-05, "loss": 1.5632, "mean_token_accuracy": 0.6343136032422384, "num_tokens": 386284124.0, "step": 2299 }, { "entropy": 1.7784220079580944, "epoch": 0.2526708961577545, "grad_norm": 0.6629165410995483, "learning_rate": 1.9566359855550837e-05, "loss": 1.4848, "mean_token_accuracy": 0.642065703868866, "num_tokens": 386461755.0, "step": 2300 }, { "entropy": 1.7046751876672108, "epoch": 0.25278075306912745, "grad_norm": 0.6529744863510132, "learning_rate": 1.9565868711059054e-05, "loss": 1.319, "mean_token_accuracy": 0.675203874707222, "num_tokens": 386579521.0, "step": 2301 }, { "entropy": 1.7891161839167278, "epoch": 0.2528906099805004, "grad_norm": 0.7346999645233154, "learning_rate": 1.956537729546138e-05, "loss": 1.3972, "mean_token_accuracy": 0.6581176420052847, "num_tokens": 386772847.0, "step": 2302 }, { "entropy": 1.6651329696178436, "epoch": 0.25300046689187333, "grad_norm": 0.7511558532714844, "learning_rate": 1.956488560877338e-05, "loss": 1.5864, "mean_token_accuracy": 0.6338248377044996, "num_tokens": 386982952.0, "step": 2303 }, { "entropy": 1.7310482561588287, "epoch": 0.2531103238032463, "grad_norm": 0.788306713104248, "learning_rate": 1.9564393651010603e-05, "loss": 1.5, "mean_token_accuracy": 0.6546668658653895, "num_tokens": 387175533.0, "step": 2304 }, { "entropy": 1.7673071126143138, "epoch": 0.2532201807146192, "grad_norm": 0.6239743828773499, "learning_rate": 1.9563901422188635e-05, "loss": 1.3626, "mean_token_accuracy": 0.6721245894829432, "num_tokens": 387353943.0, "step": 2305 }, { "entropy": 1.7524100144704182, "epoch": 0.25333003762599215, "grad_norm": 0.7127106189727783, "learning_rate": 1.956340892232304e-05, "loss": 1.4402, "mean_token_accuracy": 0.6585116336743037, "num_tokens": 387549620.0, "step": 2306 }, { "entropy": 1.7112055122852325, "epoch": 0.2534398945373651, "grad_norm": 0.8389919400215149, "learning_rate": 1.956291615142941e-05, "loss": 1.4082, "mean_token_accuracy": 0.6524738470713297, "num_tokens": 387680799.0, "step": 2307 }, { "entropy": 1.698311318953832, "epoch": 0.25354975144873804, "grad_norm": 0.7265674471855164, "learning_rate": 1.9562423109523346e-05, "loss": 1.3202, "mean_token_accuracy": 0.6675619333982468, "num_tokens": 387830972.0, "step": 2308 }, { "entropy": 1.741395463546117, "epoch": 0.253659608360111, "grad_norm": 0.7003577351570129, "learning_rate": 1.956192979662045e-05, "loss": 1.4395, "mean_token_accuracy": 0.6541390617688497, "num_tokens": 387975525.0, "step": 2309 }, { "entropy": 1.7452284097671509, "epoch": 0.2537694652714839, "grad_norm": 0.6919611096382141, "learning_rate": 1.956143621273633e-05, "loss": 1.4696, "mean_token_accuracy": 0.6432512650887171, "num_tokens": 388168464.0, "step": 2310 }, { "entropy": 1.7431021829446156, "epoch": 0.2538793221828568, "grad_norm": 0.7462813854217529, "learning_rate": 1.9560942357886612e-05, "loss": 1.4758, "mean_token_accuracy": 0.6642291049162546, "num_tokens": 388318737.0, "step": 2311 }, { "entropy": 1.7515191932519276, "epoch": 0.25398917909422974, "grad_norm": 0.7343306541442871, "learning_rate": 1.9560448232086927e-05, "loss": 1.2869, "mean_token_accuracy": 0.6709674050410589, "num_tokens": 388467029.0, "step": 2312 }, { "entropy": 1.7023037274678547, "epoch": 0.2540990360056027, "grad_norm": 0.7146450281143188, "learning_rate": 1.9559953835352916e-05, "loss": 1.2726, "mean_token_accuracy": 0.6788142820199331, "num_tokens": 388662606.0, "step": 2313 }, { "entropy": 1.755051185687383, "epoch": 0.2542088929169756, "grad_norm": 0.7248855233192444, "learning_rate": 1.955945916770022e-05, "loss": 1.36, "mean_token_accuracy": 0.6611029158035914, "num_tokens": 388797070.0, "step": 2314 }, { "entropy": 1.7301162481307983, "epoch": 0.25431874982834857, "grad_norm": 0.6761884689331055, "learning_rate": 1.9558964229144498e-05, "loss": 1.2911, "mean_token_accuracy": 0.6739692836999893, "num_tokens": 388932368.0, "step": 2315 }, { "entropy": 1.7212721010049183, "epoch": 0.2544286067397215, "grad_norm": 0.9040817022323608, "learning_rate": 1.9558469019701415e-05, "loss": 1.2838, "mean_token_accuracy": 0.6708403180042902, "num_tokens": 389061358.0, "step": 2316 }, { "entropy": 1.6814491947491963, "epoch": 0.25453846365109445, "grad_norm": 0.5825700759887695, "learning_rate": 1.9557973539386648e-05, "loss": 1.1823, "mean_token_accuracy": 0.6723542312781016, "num_tokens": 389254558.0, "step": 2317 }, { "entropy": 1.7602062424023945, "epoch": 0.2546483205624674, "grad_norm": 0.8323972821235657, "learning_rate": 1.955747778821587e-05, "loss": 1.5388, "mean_token_accuracy": 0.636329710483551, "num_tokens": 389451108.0, "step": 2318 }, { "entropy": 1.7078562676906586, "epoch": 0.25475817747384033, "grad_norm": 0.6664144992828369, "learning_rate": 1.9556981766204778e-05, "loss": 1.4438, "mean_token_accuracy": 0.6484460184971491, "num_tokens": 389610942.0, "step": 2319 }, { "entropy": 1.7442236840724945, "epoch": 0.25486803438521327, "grad_norm": 0.7855071425437927, "learning_rate": 1.955648547336907e-05, "loss": 1.4623, "mean_token_accuracy": 0.660668358206749, "num_tokens": 389720189.0, "step": 2320 }, { "entropy": 1.7352332770824432, "epoch": 0.2549778912965862, "grad_norm": 0.7771610021591187, "learning_rate": 1.9555988909724452e-05, "loss": 1.3173, "mean_token_accuracy": 0.65856105585893, "num_tokens": 389873275.0, "step": 2321 }, { "entropy": 1.7454373637835185, "epoch": 0.25508774820795915, "grad_norm": 0.8752892017364502, "learning_rate": 1.9555492075286637e-05, "loss": 1.2962, "mean_token_accuracy": 0.6755866954723994, "num_tokens": 389978431.0, "step": 2322 }, { "entropy": 1.752275397380193, "epoch": 0.2551976051193321, "grad_norm": 0.7649509906768799, "learning_rate": 1.955499497007136e-05, "loss": 1.5054, "mean_token_accuracy": 0.6548398286104202, "num_tokens": 390126888.0, "step": 2323 }, { "entropy": 1.7507530748844147, "epoch": 0.255307462030705, "grad_norm": 0.9283920526504517, "learning_rate": 1.955449759409434e-05, "loss": 1.5272, "mean_token_accuracy": 0.6637701342503229, "num_tokens": 390347120.0, "step": 2324 }, { "entropy": 1.7015658716360729, "epoch": 0.2554173189420779, "grad_norm": 0.6118226647377014, "learning_rate": 1.955399994737133e-05, "loss": 1.3858, "mean_token_accuracy": 0.6548377076784769, "num_tokens": 390538791.0, "step": 2325 }, { "entropy": 1.6828657388687134, "epoch": 0.25552717585345086, "grad_norm": 0.7431015968322754, "learning_rate": 1.9553502029918075e-05, "loss": 1.2795, "mean_token_accuracy": 0.67412897447745, "num_tokens": 390704930.0, "step": 2326 }, { "entropy": 1.7173655331134796, "epoch": 0.2556370327648238, "grad_norm": 0.6374660730361938, "learning_rate": 1.9553003841750334e-05, "loss": 1.4074, "mean_token_accuracy": 0.6585131883621216, "num_tokens": 390836480.0, "step": 2327 }, { "entropy": 1.6791688601175945, "epoch": 0.25574688967619674, "grad_norm": 0.6104090213775635, "learning_rate": 1.9552505382883876e-05, "loss": 1.4401, "mean_token_accuracy": 0.6437318821748098, "num_tokens": 391014632.0, "step": 2328 }, { "entropy": 1.7703735729058583, "epoch": 0.2558567465875697, "grad_norm": 0.7664080858230591, "learning_rate": 1.9552006653334478e-05, "loss": 1.3211, "mean_token_accuracy": 0.6642909646034241, "num_tokens": 391148660.0, "step": 2329 }, { "entropy": 1.6809141437212627, "epoch": 0.2559666034989426, "grad_norm": 0.5534673929214478, "learning_rate": 1.955150765311792e-05, "loss": 1.346, "mean_token_accuracy": 0.6547142614920934, "num_tokens": 391358170.0, "step": 2330 }, { "entropy": 1.7062424222628276, "epoch": 0.25607646041031557, "grad_norm": 0.7570271492004395, "learning_rate": 1.9551008382250002e-05, "loss": 1.435, "mean_token_accuracy": 0.671400730808576, "num_tokens": 391484679.0, "step": 2331 }, { "entropy": 1.7625846366087596, "epoch": 0.2561863173216885, "grad_norm": 0.7675605416297913, "learning_rate": 1.955050884074652e-05, "loss": 1.2715, "mean_token_accuracy": 0.6735413372516632, "num_tokens": 391603471.0, "step": 2332 }, { "entropy": 1.7328182061513264, "epoch": 0.25629617423306145, "grad_norm": 0.6863852739334106, "learning_rate": 1.955000902862329e-05, "loss": 1.4326, "mean_token_accuracy": 0.6669427951176962, "num_tokens": 391764718.0, "step": 2333 }, { "entropy": 1.685337871313095, "epoch": 0.2564060311444344, "grad_norm": 0.8060728311538696, "learning_rate": 1.954950894589612e-05, "loss": 1.3938, "mean_token_accuracy": 0.658026655515035, "num_tokens": 391928953.0, "step": 2334 }, { "entropy": 1.705341676870982, "epoch": 0.25651588805580733, "grad_norm": 0.7532424330711365, "learning_rate": 1.9549008592580845e-05, "loss": 1.3471, "mean_token_accuracy": 0.6698052088419596, "num_tokens": 392067702.0, "step": 2335 }, { "entropy": 1.6493215759595234, "epoch": 0.25662574496718027, "grad_norm": 0.715596616268158, "learning_rate": 1.9548507968693306e-05, "loss": 1.4487, "mean_token_accuracy": 0.655690461397171, "num_tokens": 392257962.0, "step": 2336 }, { "entropy": 1.6872890889644623, "epoch": 0.2567356018785532, "grad_norm": 0.8979227542877197, "learning_rate": 1.954800707424934e-05, "loss": 1.3481, "mean_token_accuracy": 0.6613185554742813, "num_tokens": 392388267.0, "step": 2337 }, { "entropy": 1.7475295166174571, "epoch": 0.2568454587899261, "grad_norm": 0.6289818286895752, "learning_rate": 1.95475059092648e-05, "loss": 1.3003, "mean_token_accuracy": 0.6711251934369405, "num_tokens": 392512381.0, "step": 2338 }, { "entropy": 1.7208695312341054, "epoch": 0.25695531570129904, "grad_norm": 0.6243081092834473, "learning_rate": 1.9547004473755548e-05, "loss": 1.4753, "mean_token_accuracy": 0.6438407252232233, "num_tokens": 392734191.0, "step": 2339 }, { "entropy": 1.715543230374654, "epoch": 0.257065172612672, "grad_norm": 0.5962228775024414, "learning_rate": 1.954650276773746e-05, "loss": 1.3443, "mean_token_accuracy": 0.6543434709310532, "num_tokens": 392910062.0, "step": 2340 }, { "entropy": 1.763334850470225, "epoch": 0.2571750295240449, "grad_norm": 0.7142077684402466, "learning_rate": 1.9546000791226407e-05, "loss": 1.4651, "mean_token_accuracy": 0.634983961780866, "num_tokens": 393052254.0, "step": 2341 }, { "entropy": 1.6993257999420166, "epoch": 0.25728488643541786, "grad_norm": 0.6107556819915771, "learning_rate": 1.954549854423828e-05, "loss": 1.3589, "mean_token_accuracy": 0.6691757639249166, "num_tokens": 393172773.0, "step": 2342 }, { "entropy": 1.6871144970258076, "epoch": 0.2573947433467908, "grad_norm": 0.8920505046844482, "learning_rate": 1.9544996026788978e-05, "loss": 1.2929, "mean_token_accuracy": 0.6790098696947098, "num_tokens": 393294814.0, "step": 2343 }, { "entropy": 1.7936709821224213, "epoch": 0.25750460025816374, "grad_norm": 0.8346240520477295, "learning_rate": 1.95444932388944e-05, "loss": 1.4746, "mean_token_accuracy": 0.6549756973981857, "num_tokens": 393466030.0, "step": 2344 }, { "entropy": 1.613057146469752, "epoch": 0.2576144571695367, "grad_norm": 0.8011279106140137, "learning_rate": 1.9543990180570464e-05, "loss": 1.2318, "mean_token_accuracy": 0.6732364992300669, "num_tokens": 393633963.0, "step": 2345 }, { "entropy": 1.6945912341276805, "epoch": 0.2577243140809096, "grad_norm": 0.6898323893547058, "learning_rate": 1.9543486851833085e-05, "loss": 1.3876, "mean_token_accuracy": 0.6581351061662039, "num_tokens": 393811077.0, "step": 2346 }, { "entropy": 1.7314487596352894, "epoch": 0.25783417099228256, "grad_norm": 0.6320956945419312, "learning_rate": 1.9542983252698198e-05, "loss": 1.4795, "mean_token_accuracy": 0.627448558807373, "num_tokens": 394006293.0, "step": 2347 }, { "entropy": 1.7345199584960938, "epoch": 0.2579440279036555, "grad_norm": 0.864216148853302, "learning_rate": 1.954247938318174e-05, "loss": 1.5121, "mean_token_accuracy": 0.6495955387751261, "num_tokens": 394196329.0, "step": 2348 }, { "entropy": 1.692550351222356, "epoch": 0.25805388481502844, "grad_norm": 2.546358108520508, "learning_rate": 1.954197524329966e-05, "loss": 1.189, "mean_token_accuracy": 0.6847096085548401, "num_tokens": 394355250.0, "step": 2349 }, { "entropy": 1.738300661245982, "epoch": 0.2581637417264014, "grad_norm": 0.6396709680557251, "learning_rate": 1.9541470833067916e-05, "loss": 1.3458, "mean_token_accuracy": 0.6599492281675339, "num_tokens": 394485210.0, "step": 2350 }, { "entropy": 1.7182751496632893, "epoch": 0.2582735986377743, "grad_norm": 0.7549358606338501, "learning_rate": 1.9540966152502463e-05, "loss": 1.2255, "mean_token_accuracy": 0.6786756366491318, "num_tokens": 394596219.0, "step": 2351 }, { "entropy": 1.7194179395834606, "epoch": 0.2583834555491472, "grad_norm": 0.6862432956695557, "learning_rate": 1.9540461201619283e-05, "loss": 1.223, "mean_token_accuracy": 0.680388276775678, "num_tokens": 394749092.0, "step": 2352 }, { "entropy": 1.72928582628568, "epoch": 0.25849331246052015, "grad_norm": 0.6295669078826904, "learning_rate": 1.9539955980434354e-05, "loss": 1.4289, "mean_token_accuracy": 0.6479186564683914, "num_tokens": 394905639.0, "step": 2353 }, { "entropy": 1.7089947859446208, "epoch": 0.2586031693718931, "grad_norm": 0.6928002238273621, "learning_rate": 1.9539450488963665e-05, "loss": 1.5692, "mean_token_accuracy": 0.6375485310951868, "num_tokens": 395055391.0, "step": 2354 }, { "entropy": 1.7104221880435944, "epoch": 0.25871302628326603, "grad_norm": 0.7009070515632629, "learning_rate": 1.953894472722322e-05, "loss": 1.4035, "mean_token_accuracy": 0.6675273527701696, "num_tokens": 395197551.0, "step": 2355 }, { "entropy": 1.6930966973304749, "epoch": 0.258822883194639, "grad_norm": 0.6557918787002563, "learning_rate": 1.9538438695229017e-05, "loss": 1.5066, "mean_token_accuracy": 0.657018855214119, "num_tokens": 395393178.0, "step": 2356 }, { "entropy": 1.7057071129480998, "epoch": 0.2589327401060119, "grad_norm": 0.6957089900970459, "learning_rate": 1.9537932392997083e-05, "loss": 1.4734, "mean_token_accuracy": 0.6418667435646057, "num_tokens": 395564794.0, "step": 2357 }, { "entropy": 1.766910860935847, "epoch": 0.25904259701738486, "grad_norm": 0.6810352802276611, "learning_rate": 1.9537425820543427e-05, "loss": 1.5344, "mean_token_accuracy": 0.6410610576470693, "num_tokens": 395751580.0, "step": 2358 }, { "entropy": 1.6266848941644032, "epoch": 0.2591524539287578, "grad_norm": 0.6686707139015198, "learning_rate": 1.95369189778841e-05, "loss": 1.2843, "mean_token_accuracy": 0.6765497128168741, "num_tokens": 395922802.0, "step": 2359 }, { "entropy": 1.7174728314081829, "epoch": 0.25926231084013074, "grad_norm": 0.6954838633537292, "learning_rate": 1.9536411865035126e-05, "loss": 1.5106, "mean_token_accuracy": 0.6519478385647138, "num_tokens": 396112141.0, "step": 2360 }, { "entropy": 1.7171052594979603, "epoch": 0.2593721677515037, "grad_norm": 0.715525209903717, "learning_rate": 1.953590448201257e-05, "loss": 1.3681, "mean_token_accuracy": 0.6634097794691721, "num_tokens": 396273450.0, "step": 2361 }, { "entropy": 1.7169589797655742, "epoch": 0.2594820246628766, "grad_norm": 0.7676208019256592, "learning_rate": 1.953539682883248e-05, "loss": 1.4953, "mean_token_accuracy": 0.6550164272387823, "num_tokens": 396436428.0, "step": 2362 }, { "entropy": 1.7600337266921997, "epoch": 0.25959188157424956, "grad_norm": 0.665710985660553, "learning_rate": 1.953488890551093e-05, "loss": 1.4444, "mean_token_accuracy": 0.6462709407011668, "num_tokens": 396612360.0, "step": 2363 }, { "entropy": 1.772403875986735, "epoch": 0.2597017384856225, "grad_norm": 0.7826092839241028, "learning_rate": 1.953438071206399e-05, "loss": 1.6427, "mean_token_accuracy": 0.607773964603742, "num_tokens": 396858383.0, "step": 2364 }, { "entropy": 1.7608717381954193, "epoch": 0.2598115953969954, "grad_norm": 0.7619585990905762, "learning_rate": 1.9533872248507743e-05, "loss": 1.4618, "mean_token_accuracy": 0.6620588004589081, "num_tokens": 396976485.0, "step": 2365 }, { "entropy": 1.7266217470169067, "epoch": 0.25992145230836833, "grad_norm": 0.7657930850982666, "learning_rate": 1.9533363514858285e-05, "loss": 1.4424, "mean_token_accuracy": 0.655510276556015, "num_tokens": 397162060.0, "step": 2366 }, { "entropy": 1.7765484750270844, "epoch": 0.26003130921974127, "grad_norm": 0.827907145023346, "learning_rate": 1.9532854511131723e-05, "loss": 1.4477, "mean_token_accuracy": 0.6488782366116842, "num_tokens": 397310390.0, "step": 2367 }, { "entropy": 1.7494306067625682, "epoch": 0.2601411661311142, "grad_norm": 0.808404803276062, "learning_rate": 1.9532345237344154e-05, "loss": 1.6514, "mean_token_accuracy": 0.637022852897644, "num_tokens": 397545166.0, "step": 2368 }, { "entropy": 1.7077105244000752, "epoch": 0.26025102304248715, "grad_norm": 0.6834400296211243, "learning_rate": 1.9531835693511706e-05, "loss": 1.4438, "mean_token_accuracy": 0.6453288247187933, "num_tokens": 397702370.0, "step": 2369 }, { "entropy": 1.7838773429393768, "epoch": 0.2603608799538601, "grad_norm": 0.6579210758209229, "learning_rate": 1.95313258796505e-05, "loss": 1.5525, "mean_token_accuracy": 0.6188159386316935, "num_tokens": 397913924.0, "step": 2370 }, { "entropy": 1.6815649668375652, "epoch": 0.26047073686523303, "grad_norm": 0.6755560636520386, "learning_rate": 1.953081579577668e-05, "loss": 1.3892, "mean_token_accuracy": 0.6632423549890518, "num_tokens": 398065506.0, "step": 2371 }, { "entropy": 1.7138336102167766, "epoch": 0.260580593776606, "grad_norm": 0.6806965470314026, "learning_rate": 1.9530305441906384e-05, "loss": 1.5041, "mean_token_accuracy": 0.634550929069519, "num_tokens": 398251769.0, "step": 2372 }, { "entropy": 1.7888704140981038, "epoch": 0.2606904506879789, "grad_norm": 0.7154719829559326, "learning_rate": 1.952979481805576e-05, "loss": 1.3385, "mean_token_accuracy": 0.6612465778986613, "num_tokens": 398355980.0, "step": 2373 }, { "entropy": 1.6614461243152618, "epoch": 0.26080030759935185, "grad_norm": 0.6078544855117798, "learning_rate": 1.9529283924240976e-05, "loss": 1.362, "mean_token_accuracy": 0.660592312614123, "num_tokens": 398553184.0, "step": 2374 }, { "entropy": 1.6379695137341816, "epoch": 0.2609101645107248, "grad_norm": 0.6618489623069763, "learning_rate": 1.95287727604782e-05, "loss": 1.3702, "mean_token_accuracy": 0.6538491994142532, "num_tokens": 398771887.0, "step": 2375 }, { "entropy": 1.7040774722894032, "epoch": 0.26102002142209774, "grad_norm": 0.6134635806083679, "learning_rate": 1.9528261326783608e-05, "loss": 1.2476, "mean_token_accuracy": 0.6677992393573126, "num_tokens": 398964496.0, "step": 2376 }, { "entropy": 1.6660463015238445, "epoch": 0.2611298783334707, "grad_norm": 0.8864745497703552, "learning_rate": 1.9527749623173388e-05, "loss": 1.2948, "mean_token_accuracy": 0.6655771185954412, "num_tokens": 399113614.0, "step": 2377 }, { "entropy": 1.721436192591985, "epoch": 0.2612397352448436, "grad_norm": 0.7737887501716614, "learning_rate": 1.9527237649663736e-05, "loss": 1.3916, "mean_token_accuracy": 0.655788873632749, "num_tokens": 399281508.0, "step": 2378 }, { "entropy": 1.6972166001796722, "epoch": 0.2613495921562165, "grad_norm": 0.6834520697593689, "learning_rate": 1.952672540627085e-05, "loss": 1.2625, "mean_token_accuracy": 0.6793500383694967, "num_tokens": 399438374.0, "step": 2379 }, { "entropy": 1.718246688445409, "epoch": 0.26145944906758944, "grad_norm": 0.5676518082618713, "learning_rate": 1.9526212893010955e-05, "loss": 1.4461, "mean_token_accuracy": 0.6427881369988123, "num_tokens": 399631632.0, "step": 2380 }, { "entropy": 1.731793224811554, "epoch": 0.2615693059789624, "grad_norm": 0.6785540580749512, "learning_rate": 1.9525700109900257e-05, "loss": 1.6035, "mean_token_accuracy": 0.6242391665776571, "num_tokens": 399854358.0, "step": 2381 }, { "entropy": 1.729330152273178, "epoch": 0.2616791628903353, "grad_norm": 0.675688624382019, "learning_rate": 1.9525187056955e-05, "loss": 1.428, "mean_token_accuracy": 0.6407847801844279, "num_tokens": 400026748.0, "step": 2382 }, { "entropy": 1.7342185775438945, "epoch": 0.26178901980170827, "grad_norm": 0.6857423782348633, "learning_rate": 1.9524673734191407e-05, "loss": 1.5219, "mean_token_accuracy": 0.6373167236646017, "num_tokens": 400228664.0, "step": 2383 }, { "entropy": 1.71209650238355, "epoch": 0.2618988767130812, "grad_norm": 0.8391352891921997, "learning_rate": 1.952416014162573e-05, "loss": 1.3521, "mean_token_accuracy": 0.6658279597759247, "num_tokens": 400383096.0, "step": 2384 }, { "entropy": 1.6978493134180705, "epoch": 0.26200873362445415, "grad_norm": 0.6963003873825073, "learning_rate": 1.952364627927423e-05, "loss": 1.3041, "mean_token_accuracy": 0.6771473834911982, "num_tokens": 400554043.0, "step": 2385 }, { "entropy": 1.7559028963247936, "epoch": 0.2621185905358271, "grad_norm": 0.6737483739852905, "learning_rate": 1.9523132147153167e-05, "loss": 1.3949, "mean_token_accuracy": 0.6481351753075918, "num_tokens": 400803149.0, "step": 2386 }, { "entropy": 1.7359568774700165, "epoch": 0.26222844744720003, "grad_norm": 0.8350893259048462, "learning_rate": 1.952261774527881e-05, "loss": 1.5246, "mean_token_accuracy": 0.6469388355811437, "num_tokens": 400981816.0, "step": 2387 }, { "entropy": 1.6978191137313843, "epoch": 0.26233830435857297, "grad_norm": 0.5784783959388733, "learning_rate": 1.9522103073667444e-05, "loss": 1.3718, "mean_token_accuracy": 0.6644446154435476, "num_tokens": 401135137.0, "step": 2388 }, { "entropy": 1.7019244929154713, "epoch": 0.2624481612699459, "grad_norm": 0.6432830095291138, "learning_rate": 1.9521588132335352e-05, "loss": 1.303, "mean_token_accuracy": 0.6735948820908865, "num_tokens": 401289759.0, "step": 2389 }, { "entropy": 1.7148842712243397, "epoch": 0.26255801818131885, "grad_norm": 0.7368968725204468, "learning_rate": 1.952107292129884e-05, "loss": 1.4875, "mean_token_accuracy": 0.6443512886762619, "num_tokens": 401476228.0, "step": 2390 }, { "entropy": 1.6623288591702778, "epoch": 0.2626678750926918, "grad_norm": 0.6117868423461914, "learning_rate": 1.952055744057421e-05, "loss": 1.316, "mean_token_accuracy": 0.6717942307392756, "num_tokens": 401697757.0, "step": 2391 }, { "entropy": 1.7370295623938243, "epoch": 0.2627777320040647, "grad_norm": 0.5878593921661377, "learning_rate": 1.9520041690177775e-05, "loss": 1.3522, "mean_token_accuracy": 0.6460974911848704, "num_tokens": 401882412.0, "step": 2392 }, { "entropy": 1.7986371119817097, "epoch": 0.2628875889154376, "grad_norm": 0.6486433744430542, "learning_rate": 1.9519525670125857e-05, "loss": 1.3154, "mean_token_accuracy": 0.6704276005427042, "num_tokens": 402013794.0, "step": 2393 }, { "entropy": 1.6841194530328114, "epoch": 0.26299744582681056, "grad_norm": 0.5892508625984192, "learning_rate": 1.951900938043479e-05, "loss": 1.4242, "mean_token_accuracy": 0.6558033525943756, "num_tokens": 402207088.0, "step": 2394 }, { "entropy": 1.7395052810509999, "epoch": 0.2631073027381835, "grad_norm": 0.7096725106239319, "learning_rate": 1.951849282112092e-05, "loss": 1.4639, "mean_token_accuracy": 0.6478641132513682, "num_tokens": 402393514.0, "step": 2395 }, { "entropy": 1.7166326642036438, "epoch": 0.26321715964955644, "grad_norm": 0.6767532229423523, "learning_rate": 1.9517975992200588e-05, "loss": 1.4591, "mean_token_accuracy": 0.6498099068800608, "num_tokens": 402598639.0, "step": 2396 }, { "entropy": 1.679230511188507, "epoch": 0.2633270165609294, "grad_norm": 0.6896412372589111, "learning_rate": 1.9517458893690154e-05, "loss": 1.4353, "mean_token_accuracy": 0.6464412113030752, "num_tokens": 402787828.0, "step": 2397 }, { "entropy": 1.7297282814979553, "epoch": 0.2634368734723023, "grad_norm": 0.8391108512878418, "learning_rate": 1.9516941525605985e-05, "loss": 1.5188, "mean_token_accuracy": 0.6511440972487131, "num_tokens": 402958227.0, "step": 2398 }, { "entropy": 1.7749987840652466, "epoch": 0.26354673038367527, "grad_norm": 0.7650160193443298, "learning_rate": 1.9516423887964454e-05, "loss": 1.3217, "mean_token_accuracy": 0.6643730302651724, "num_tokens": 403099927.0, "step": 2399 }, { "entropy": 1.6371342937151592, "epoch": 0.2636565872950482, "grad_norm": 0.6480301022529602, "learning_rate": 1.9515905980781944e-05, "loss": 1.3876, "mean_token_accuracy": 0.6590510805447897, "num_tokens": 403265885.0, "step": 2400 }, { "entropy": 1.686698744694392, "epoch": 0.26376644420642115, "grad_norm": 0.6983146667480469, "learning_rate": 1.9515387804074845e-05, "loss": 1.4885, "mean_token_accuracy": 0.6543626685937246, "num_tokens": 403427044.0, "step": 2401 }, { "entropy": 1.7273488442103069, "epoch": 0.2638763011177941, "grad_norm": 0.7281332015991211, "learning_rate": 1.9514869357859565e-05, "loss": 1.4179, "mean_token_accuracy": 0.651305079460144, "num_tokens": 403589146.0, "step": 2402 }, { "entropy": 1.6473909517129262, "epoch": 0.26398615802916703, "grad_norm": 0.6067411303520203, "learning_rate": 1.95143506421525e-05, "loss": 1.4189, "mean_token_accuracy": 0.6576470931371053, "num_tokens": 403788427.0, "step": 2403 }, { "entropy": 1.6839656432469685, "epoch": 0.26409601494053997, "grad_norm": 0.7861200571060181, "learning_rate": 1.9513831656970078e-05, "loss": 1.2481, "mean_token_accuracy": 0.6709416210651398, "num_tokens": 403935089.0, "step": 2404 }, { "entropy": 1.656613161166509, "epoch": 0.2642058718519129, "grad_norm": 0.5541518926620483, "learning_rate": 1.951331240232872e-05, "loss": 1.5037, "mean_token_accuracy": 0.6444850116968155, "num_tokens": 404176482.0, "step": 2405 }, { "entropy": 1.6887876590092976, "epoch": 0.2643157287632858, "grad_norm": 0.6820752620697021, "learning_rate": 1.9512792878244863e-05, "loss": 1.3964, "mean_token_accuracy": 0.6626077542702357, "num_tokens": 404329820.0, "step": 2406 }, { "entropy": 1.681986967722575, "epoch": 0.26442558567465874, "grad_norm": 0.6239963173866272, "learning_rate": 1.9512273084734942e-05, "loss": 1.3412, "mean_token_accuracy": 0.6608551541964213, "num_tokens": 404576812.0, "step": 2407 }, { "entropy": 1.695441444714864, "epoch": 0.2645354425860317, "grad_norm": 0.8196799755096436, "learning_rate": 1.9511753021815418e-05, "loss": 1.53, "mean_token_accuracy": 0.6412807106971741, "num_tokens": 404766940.0, "step": 2408 }, { "entropy": 1.6683510939280193, "epoch": 0.2646452994974046, "grad_norm": 0.6213725805282593, "learning_rate": 1.9511232689502744e-05, "loss": 1.403, "mean_token_accuracy": 0.6513712704181671, "num_tokens": 404960575.0, "step": 2409 }, { "entropy": 1.6807579199473064, "epoch": 0.26475515640877756, "grad_norm": 0.6994633078575134, "learning_rate": 1.9510712087813392e-05, "loss": 1.4742, "mean_token_accuracy": 0.6587166041135788, "num_tokens": 405117547.0, "step": 2410 }, { "entropy": 1.7289150754610698, "epoch": 0.2648650133201505, "grad_norm": 0.7993577122688293, "learning_rate": 1.9510191216763836e-05, "loss": 1.4124, "mean_token_accuracy": 0.6584879656632742, "num_tokens": 405246798.0, "step": 2411 }, { "entropy": 1.664443125327428, "epoch": 0.26497487023152344, "grad_norm": 0.6626906991004944, "learning_rate": 1.9509670076370563e-05, "loss": 1.3149, "mean_token_accuracy": 0.6721779108047485, "num_tokens": 405377290.0, "step": 2412 }, { "entropy": 1.7044403950373332, "epoch": 0.2650847271428964, "grad_norm": 0.7034863829612732, "learning_rate": 1.9509148666650065e-05, "loss": 1.3123, "mean_token_accuracy": 0.6630191802978516, "num_tokens": 405550836.0, "step": 2413 }, { "entropy": 1.7661688923835754, "epoch": 0.2651945840542693, "grad_norm": 0.7401782274246216, "learning_rate": 1.9508626987618847e-05, "loss": 1.3626, "mean_token_accuracy": 0.6758743226528168, "num_tokens": 405714724.0, "step": 2414 }, { "entropy": 1.65723983446757, "epoch": 0.26530444096564226, "grad_norm": 0.7574884295463562, "learning_rate": 1.9508105039293422e-05, "loss": 1.385, "mean_token_accuracy": 0.6691581656535467, "num_tokens": 405882381.0, "step": 2415 }, { "entropy": 1.7135234475135803, "epoch": 0.2654142978770152, "grad_norm": 0.670354425907135, "learning_rate": 1.9507582821690308e-05, "loss": 1.3597, "mean_token_accuracy": 0.6573469589153925, "num_tokens": 406058875.0, "step": 2416 }, { "entropy": 1.6622610290845234, "epoch": 0.26552415478838814, "grad_norm": 0.6127861142158508, "learning_rate": 1.9507060334826024e-05, "loss": 1.3034, "mean_token_accuracy": 0.6667560587326685, "num_tokens": 406201318.0, "step": 2417 }, { "entropy": 1.692472666501999, "epoch": 0.2656340116997611, "grad_norm": 0.6307097673416138, "learning_rate": 1.9506537578717116e-05, "loss": 1.3596, "mean_token_accuracy": 0.6713347236315409, "num_tokens": 406390930.0, "step": 2418 }, { "entropy": 1.7068506876627605, "epoch": 0.26574386861113397, "grad_norm": 0.6524941325187683, "learning_rate": 1.9506014553380134e-05, "loss": 1.3678, "mean_token_accuracy": 0.6618810991446177, "num_tokens": 406554961.0, "step": 2419 }, { "entropy": 1.7632996737957, "epoch": 0.2658537255225069, "grad_norm": 0.5874424576759338, "learning_rate": 1.9505491258831615e-05, "loss": 1.4096, "mean_token_accuracy": 0.6508981684843699, "num_tokens": 406756062.0, "step": 2420 }, { "entropy": 1.6941703458627064, "epoch": 0.26596358243387985, "grad_norm": 0.7010703086853027, "learning_rate": 1.9504967695088135e-05, "loss": 1.2203, "mean_token_accuracy": 0.6803840696811676, "num_tokens": 406885417.0, "step": 2421 }, { "entropy": 1.6123863955338795, "epoch": 0.2660734393452528, "grad_norm": 0.7297987341880798, "learning_rate": 1.9504443862166258e-05, "loss": 1.3876, "mean_token_accuracy": 0.664091577132543, "num_tokens": 407036264.0, "step": 2422 }, { "entropy": 1.6742206513881683, "epoch": 0.26618329625662573, "grad_norm": 0.6762830018997192, "learning_rate": 1.9503919760082566e-05, "loss": 1.4989, "mean_token_accuracy": 0.6637553547819456, "num_tokens": 407194733.0, "step": 2423 }, { "entropy": 1.6779598693052928, "epoch": 0.2662931531679987, "grad_norm": 0.6086080074310303, "learning_rate": 1.9503395388853646e-05, "loss": 1.3268, "mean_token_accuracy": 0.6625054279963175, "num_tokens": 407344026.0, "step": 2424 }, { "entropy": 1.7654085954030354, "epoch": 0.2664030100793716, "grad_norm": 0.7018662691116333, "learning_rate": 1.950287074849609e-05, "loss": 1.5278, "mean_token_accuracy": 0.6402523169914881, "num_tokens": 407520433.0, "step": 2425 }, { "entropy": 1.6793859700361888, "epoch": 0.26651286699074456, "grad_norm": 0.7431981563568115, "learning_rate": 1.9502345839026508e-05, "loss": 1.4142, "mean_token_accuracy": 0.6648030032714208, "num_tokens": 407670492.0, "step": 2426 }, { "entropy": 1.7211259802182515, "epoch": 0.2666227239021175, "grad_norm": 0.7325606346130371, "learning_rate": 1.9501820660461515e-05, "loss": 1.377, "mean_token_accuracy": 0.6672718872626623, "num_tokens": 407799323.0, "step": 2427 }, { "entropy": 1.6678573687871296, "epoch": 0.26673258081349044, "grad_norm": 0.7462669014930725, "learning_rate": 1.9501295212817725e-05, "loss": 1.3763, "mean_token_accuracy": 0.6571807414293289, "num_tokens": 408006005.0, "step": 2428 }, { "entropy": 1.7572451035181682, "epoch": 0.2668424377248634, "grad_norm": 0.5830179452896118, "learning_rate": 1.9500769496111774e-05, "loss": 1.5124, "mean_token_accuracy": 0.640462522705396, "num_tokens": 408199992.0, "step": 2429 }, { "entropy": 1.6478756566842396, "epoch": 0.2669522946362363, "grad_norm": 0.8301806449890137, "learning_rate": 1.95002435103603e-05, "loss": 1.2939, "mean_token_accuracy": 0.6621855149666468, "num_tokens": 408334984.0, "step": 2430 }, { "entropy": 1.7445407410462697, "epoch": 0.26706215154760926, "grad_norm": 0.67365562915802, "learning_rate": 1.949971725557995e-05, "loss": 1.3452, "mean_token_accuracy": 0.6596545328696569, "num_tokens": 408455952.0, "step": 2431 }, { "entropy": 1.6797068814436595, "epoch": 0.2671720084589822, "grad_norm": 0.7494860291481018, "learning_rate": 1.9499190731787376e-05, "loss": 1.4513, "mean_token_accuracy": 0.6530092904965082, "num_tokens": 408607105.0, "step": 2432 }, { "entropy": 1.7337973912556965, "epoch": 0.2672818653703551, "grad_norm": 0.6328655481338501, "learning_rate": 1.9498663938999244e-05, "loss": 1.4147, "mean_token_accuracy": 0.6540708690881729, "num_tokens": 408779812.0, "step": 2433 }, { "entropy": 1.7009416421254475, "epoch": 0.26739172228172803, "grad_norm": 0.6963172554969788, "learning_rate": 1.949813687723223e-05, "loss": 1.2986, "mean_token_accuracy": 0.6816080609957377, "num_tokens": 408938627.0, "step": 2434 }, { "entropy": 1.693843275308609, "epoch": 0.26750157919310097, "grad_norm": 0.7338165640830994, "learning_rate": 1.9497609546503017e-05, "loss": 1.2886, "mean_token_accuracy": 0.6752390662829081, "num_tokens": 409084769.0, "step": 2435 }, { "entropy": 1.7020284434159596, "epoch": 0.2676114361044739, "grad_norm": 0.6143628358840942, "learning_rate": 1.9497081946828287e-05, "loss": 1.5255, "mean_token_accuracy": 0.648758257428805, "num_tokens": 409265453.0, "step": 2436 }, { "entropy": 1.727043906847636, "epoch": 0.26772129301584685, "grad_norm": 0.5915109515190125, "learning_rate": 1.9496554078224743e-05, "loss": 1.5126, "mean_token_accuracy": 0.6246416866779327, "num_tokens": 409486969.0, "step": 2437 }, { "entropy": 1.6795673767725627, "epoch": 0.2678311499272198, "grad_norm": 0.6236563920974731, "learning_rate": 1.949602594070909e-05, "loss": 1.4403, "mean_token_accuracy": 0.6531639695167542, "num_tokens": 409666706.0, "step": 2438 }, { "entropy": 1.6891235609849293, "epoch": 0.26794100683859273, "grad_norm": 0.774161696434021, "learning_rate": 1.949549753429804e-05, "loss": 1.3583, "mean_token_accuracy": 0.6626383264859518, "num_tokens": 409838047.0, "step": 2439 }, { "entropy": 1.737920731306076, "epoch": 0.2680508637499657, "grad_norm": 0.9070193767547607, "learning_rate": 1.949496885900833e-05, "loss": 1.4545, "mean_token_accuracy": 0.6440466195344925, "num_tokens": 410038258.0, "step": 2440 }, { "entropy": 1.6907021900018055, "epoch": 0.2681607206613386, "grad_norm": 0.7839999198913574, "learning_rate": 1.949443991485668e-05, "loss": 1.2923, "mean_token_accuracy": 0.6767375022172928, "num_tokens": 410213875.0, "step": 2441 }, { "entropy": 1.6959629952907562, "epoch": 0.26827057757271155, "grad_norm": 0.7793353199958801, "learning_rate": 1.9493910701859832e-05, "loss": 1.2393, "mean_token_accuracy": 0.6860839327176412, "num_tokens": 410367133.0, "step": 2442 }, { "entropy": 1.7073424855868022, "epoch": 0.2683804344840845, "grad_norm": 0.688703179359436, "learning_rate": 1.949338122003454e-05, "loss": 1.4447, "mean_token_accuracy": 0.6503096967935562, "num_tokens": 410551050.0, "step": 2443 }, { "entropy": 1.7885093291600545, "epoch": 0.26849029139545744, "grad_norm": 0.6733722686767578, "learning_rate": 1.949285146939756e-05, "loss": 1.5393, "mean_token_accuracy": 0.6366397589445114, "num_tokens": 410772639.0, "step": 2444 }, { "entropy": 1.7265475789705913, "epoch": 0.2686001483068304, "grad_norm": 0.7780793905258179, "learning_rate": 1.9492321449965657e-05, "loss": 1.3903, "mean_token_accuracy": 0.6643284608920416, "num_tokens": 410931367.0, "step": 2445 }, { "entropy": 1.6977720061937969, "epoch": 0.26871000521820326, "grad_norm": 0.7139464020729065, "learning_rate": 1.949179116175561e-05, "loss": 1.3582, "mean_token_accuracy": 0.6607271184523901, "num_tokens": 411120350.0, "step": 2446 }, { "entropy": 1.7429304122924805, "epoch": 0.2688198621295762, "grad_norm": 0.7446684241294861, "learning_rate": 1.9491260604784196e-05, "loss": 1.5354, "mean_token_accuracy": 0.6431114623943964, "num_tokens": 411333636.0, "step": 2447 }, { "entropy": 1.6738013923168182, "epoch": 0.26892971904094914, "grad_norm": 0.7107298970222473, "learning_rate": 1.949072977906821e-05, "loss": 1.3707, "mean_token_accuracy": 0.6652788172165552, "num_tokens": 411496037.0, "step": 2448 }, { "entropy": 1.7873223821322124, "epoch": 0.2690395759523221, "grad_norm": 0.749606728553772, "learning_rate": 1.9490198684624456e-05, "loss": 1.5195, "mean_token_accuracy": 0.6423113072911898, "num_tokens": 411652722.0, "step": 2449 }, { "entropy": 1.7064524590969086, "epoch": 0.269149432863695, "grad_norm": 0.6809104084968567, "learning_rate": 1.9489667321469733e-05, "loss": 1.492, "mean_token_accuracy": 0.6354218969742457, "num_tokens": 411817928.0, "step": 2450 }, { "entropy": 1.6833800375461578, "epoch": 0.26925928977506797, "grad_norm": 0.7017293572425842, "learning_rate": 1.948913568962087e-05, "loss": 1.4219, "mean_token_accuracy": 0.6560679723819097, "num_tokens": 411959682.0, "step": 2451 }, { "entropy": 1.7220069666703541, "epoch": 0.2693691466864409, "grad_norm": 0.7343468070030212, "learning_rate": 1.9488603789094687e-05, "loss": 1.1667, "mean_token_accuracy": 0.6866755535205206, "num_tokens": 412080212.0, "step": 2452 }, { "entropy": 1.694863756497701, "epoch": 0.26947900359781385, "grad_norm": 0.5863001942634583, "learning_rate": 1.9488071619908016e-05, "loss": 1.3702, "mean_token_accuracy": 0.6538634697596232, "num_tokens": 412248760.0, "step": 2453 }, { "entropy": 1.7061160604159038, "epoch": 0.2695888605091868, "grad_norm": 0.6747974753379822, "learning_rate": 1.9487539182077707e-05, "loss": 1.4749, "mean_token_accuracy": 0.6355902552604675, "num_tokens": 412431548.0, "step": 2454 }, { "entropy": 1.7680587371190388, "epoch": 0.26969871742055973, "grad_norm": 0.6596987843513489, "learning_rate": 1.9487006475620606e-05, "loss": 1.4807, "mean_token_accuracy": 0.6433413873116175, "num_tokens": 412608344.0, "step": 2455 }, { "entropy": 1.6938173572222393, "epoch": 0.26980857433193267, "grad_norm": 0.6757233738899231, "learning_rate": 1.9486473500553575e-05, "loss": 1.4271, "mean_token_accuracy": 0.6577209134896597, "num_tokens": 412769029.0, "step": 2456 }, { "entropy": 1.666108379761378, "epoch": 0.2699184312433056, "grad_norm": 0.8024738430976868, "learning_rate": 1.9485940256893477e-05, "loss": 1.4351, "mean_token_accuracy": 0.6482095420360565, "num_tokens": 412906545.0, "step": 2457 }, { "entropy": 1.7094764014085133, "epoch": 0.27002828815467855, "grad_norm": 0.6070849299430847, "learning_rate": 1.94854067446572e-05, "loss": 1.3914, "mean_token_accuracy": 0.658433347940445, "num_tokens": 413051352.0, "step": 2458 }, { "entropy": 1.740122099717458, "epoch": 0.2701381450660515, "grad_norm": 0.7562992572784424, "learning_rate": 1.948487296386162e-05, "loss": 1.4434, "mean_token_accuracy": 0.664603571097056, "num_tokens": 413255124.0, "step": 2459 }, { "entropy": 1.7619945506254833, "epoch": 0.2702480019774244, "grad_norm": 0.6791596412658691, "learning_rate": 1.9484338914523634e-05, "loss": 1.4725, "mean_token_accuracy": 0.6506190747022629, "num_tokens": 413466529.0, "step": 2460 }, { "entropy": 1.6560695469379425, "epoch": 0.2703578588887973, "grad_norm": 0.6612438559532166, "learning_rate": 1.9483804596660144e-05, "loss": 1.2345, "mean_token_accuracy": 0.6793079773585001, "num_tokens": 413646644.0, "step": 2461 }, { "entropy": 1.7678084075450897, "epoch": 0.27046771580017026, "grad_norm": 0.7152573466300964, "learning_rate": 1.9483270010288064e-05, "loss": 1.5106, "mean_token_accuracy": 0.6360708425442377, "num_tokens": 413830566.0, "step": 2462 }, { "entropy": 1.715853621562322, "epoch": 0.2705775727115432, "grad_norm": 0.700097382068634, "learning_rate": 1.948273515542431e-05, "loss": 1.4053, "mean_token_accuracy": 0.6485730955998102, "num_tokens": 413993907.0, "step": 2463 }, { "entropy": 1.741620510816574, "epoch": 0.27068742962291614, "grad_norm": 0.6623098850250244, "learning_rate": 1.948220003208581e-05, "loss": 1.5757, "mean_token_accuracy": 0.6438944588104883, "num_tokens": 414233249.0, "step": 2464 }, { "entropy": 1.755933254957199, "epoch": 0.2707972865342891, "grad_norm": 0.7296027541160583, "learning_rate": 1.9481664640289503e-05, "loss": 1.6268, "mean_token_accuracy": 0.628890261054039, "num_tokens": 414414584.0, "step": 2465 }, { "entropy": 1.784269521633784, "epoch": 0.270907143445662, "grad_norm": 0.6351885795593262, "learning_rate": 1.9481128980052328e-05, "loss": 1.4174, "mean_token_accuracy": 0.6424904266993204, "num_tokens": 414551766.0, "step": 2466 }, { "entropy": 1.6945275962352753, "epoch": 0.27101700035703497, "grad_norm": 0.7583399415016174, "learning_rate": 1.948059305139125e-05, "loss": 1.2475, "mean_token_accuracy": 0.6736636360486349, "num_tokens": 414688590.0, "step": 2467 }, { "entropy": 1.7745845814545949, "epoch": 0.2711268572684079, "grad_norm": 0.7532066106796265, "learning_rate": 1.9480056854323214e-05, "loss": 1.3369, "mean_token_accuracy": 0.6628710478544235, "num_tokens": 414788942.0, "step": 2468 }, { "entropy": 1.6538518170515697, "epoch": 0.27123671417978085, "grad_norm": 0.5937896370887756, "learning_rate": 1.9479520388865206e-05, "loss": 1.4212, "mean_token_accuracy": 0.6629629383484522, "num_tokens": 414982964.0, "step": 2469 }, { "entropy": 1.7156404356161754, "epoch": 0.2713465710911538, "grad_norm": 0.7552958726882935, "learning_rate": 1.9478983655034195e-05, "loss": 1.4249, "mean_token_accuracy": 0.6556862344344457, "num_tokens": 415179129.0, "step": 2470 }, { "entropy": 1.677052636941274, "epoch": 0.27145642800252673, "grad_norm": 0.6952337622642517, "learning_rate": 1.9478446652847177e-05, "loss": 1.404, "mean_token_accuracy": 0.6581928680340449, "num_tokens": 415343003.0, "step": 2471 }, { "entropy": 1.7252871096134186, "epoch": 0.27156628491389967, "grad_norm": 0.6775010824203491, "learning_rate": 1.9477909382321138e-05, "loss": 1.2959, "mean_token_accuracy": 0.6630641867717108, "num_tokens": 415454975.0, "step": 2472 }, { "entropy": 1.7001695533593495, "epoch": 0.2716761418252726, "grad_norm": 0.6910304427146912, "learning_rate": 1.947737184347309e-05, "loss": 1.4267, "mean_token_accuracy": 0.6569666018088659, "num_tokens": 415659032.0, "step": 2473 }, { "entropy": 1.6541123191515605, "epoch": 0.2717859987366455, "grad_norm": 0.6405661106109619, "learning_rate": 1.9476834036320044e-05, "loss": 1.3477, "mean_token_accuracy": 0.6658432185649872, "num_tokens": 415812784.0, "step": 2474 }, { "entropy": 1.6903918882211049, "epoch": 0.27189585564801844, "grad_norm": 0.6007583141326904, "learning_rate": 1.9476295960879015e-05, "loss": 1.4172, "mean_token_accuracy": 0.6355639646450678, "num_tokens": 416109941.0, "step": 2475 }, { "entropy": 1.7218912939230602, "epoch": 0.2720057125593914, "grad_norm": 0.693682074546814, "learning_rate": 1.947575761716704e-05, "loss": 1.3423, "mean_token_accuracy": 0.6594141821066538, "num_tokens": 416216952.0, "step": 2476 }, { "entropy": 1.699045052131017, "epoch": 0.2721155694707643, "grad_norm": 0.884042501449585, "learning_rate": 1.947521900520116e-05, "loss": 1.4059, "mean_token_accuracy": 0.6671479294697443, "num_tokens": 416341238.0, "step": 2477 }, { "entropy": 1.7600041528542836, "epoch": 0.27222542638213726, "grad_norm": 0.7523099184036255, "learning_rate": 1.9474680124998414e-05, "loss": 1.4538, "mean_token_accuracy": 0.645432690779368, "num_tokens": 416550006.0, "step": 2478 }, { "entropy": 1.7305284440517426, "epoch": 0.2723352832935102, "grad_norm": 0.8291406631469727, "learning_rate": 1.9474140976575862e-05, "loss": 1.4125, "mean_token_accuracy": 0.6581102510293325, "num_tokens": 416699068.0, "step": 2479 }, { "entropy": 1.7285153965155284, "epoch": 0.27244514020488314, "grad_norm": 0.6339166164398193, "learning_rate": 1.9473601559950566e-05, "loss": 1.3232, "mean_token_accuracy": 0.6745259314775467, "num_tokens": 416836056.0, "step": 2480 }, { "entropy": 1.6818051934242249, "epoch": 0.2725549971162561, "grad_norm": 0.6953151226043701, "learning_rate": 1.9473061875139603e-05, "loss": 1.3799, "mean_token_accuracy": 0.6700175007184347, "num_tokens": 417010817.0, "step": 2481 }, { "entropy": 1.744279553492864, "epoch": 0.272664854027629, "grad_norm": 0.6217046976089478, "learning_rate": 1.9472521922160044e-05, "loss": 1.395, "mean_token_accuracy": 0.6566085666418076, "num_tokens": 417189250.0, "step": 2482 }, { "entropy": 1.7224168479442596, "epoch": 0.27277471093900196, "grad_norm": 0.6365125775337219, "learning_rate": 1.9471981701028988e-05, "loss": 1.4695, "mean_token_accuracy": 0.6413618673880895, "num_tokens": 417406226.0, "step": 2483 }, { "entropy": 1.7924610773722331, "epoch": 0.2728845678503749, "grad_norm": 0.9400225281715393, "learning_rate": 1.9471441211763526e-05, "loss": 1.4758, "mean_token_accuracy": 0.6626063287258148, "num_tokens": 417552783.0, "step": 2484 }, { "entropy": 1.718948523203532, "epoch": 0.27299442476174784, "grad_norm": 0.7052686214447021, "learning_rate": 1.947090045438077e-05, "loss": 1.418, "mean_token_accuracy": 0.6460021386543909, "num_tokens": 417764247.0, "step": 2485 }, { "entropy": 1.7755232155323029, "epoch": 0.2731042816731208, "grad_norm": 0.8216487169265747, "learning_rate": 1.9470359428897827e-05, "loss": 1.4258, "mean_token_accuracy": 0.6543919444084167, "num_tokens": 417922392.0, "step": 2486 }, { "entropy": 1.6645058989524841, "epoch": 0.27321413858449367, "grad_norm": 0.5918928980827332, "learning_rate": 1.946981813533183e-05, "loss": 1.449, "mean_token_accuracy": 0.6550898949305216, "num_tokens": 418126736.0, "step": 2487 }, { "entropy": 1.7120232780774434, "epoch": 0.2733239954958666, "grad_norm": 0.6520063877105713, "learning_rate": 1.9469276573699902e-05, "loss": 1.5254, "mean_token_accuracy": 0.6442107409238815, "num_tokens": 418302170.0, "step": 2488 }, { "entropy": 1.690334975719452, "epoch": 0.27343385240723955, "grad_norm": 0.6218593120574951, "learning_rate": 1.9468734744019187e-05, "loss": 1.5628, "mean_token_accuracy": 0.6418820967276891, "num_tokens": 418522765.0, "step": 2489 }, { "entropy": 1.771299680074056, "epoch": 0.2735437093186125, "grad_norm": 0.6886900663375854, "learning_rate": 1.9468192646306836e-05, "loss": 1.4371, "mean_token_accuracy": 0.6507747322320938, "num_tokens": 418675411.0, "step": 2490 }, { "entropy": 1.7038741906483967, "epoch": 0.27365356622998543, "grad_norm": 0.7144287824630737, "learning_rate": 1.9467650280580002e-05, "loss": 1.5871, "mean_token_accuracy": 0.6539332419633865, "num_tokens": 418883702.0, "step": 2491 }, { "entropy": 1.6580975651741028, "epoch": 0.2737634231413584, "grad_norm": 0.6173264384269714, "learning_rate": 1.946710764685585e-05, "loss": 1.4252, "mean_token_accuracy": 0.6434942533572515, "num_tokens": 419136995.0, "step": 2492 }, { "entropy": 1.6869386335213978, "epoch": 0.2738732800527313, "grad_norm": 8.725295066833496, "learning_rate": 1.946656474515156e-05, "loss": 1.3962, "mean_token_accuracy": 0.6559255520502726, "num_tokens": 419314327.0, "step": 2493 }, { "entropy": 1.664640615383784, "epoch": 0.27398313696410426, "grad_norm": 0.7240090370178223, "learning_rate": 1.946602157548431e-05, "loss": 1.2949, "mean_token_accuracy": 0.6713322947422663, "num_tokens": 419461578.0, "step": 2494 }, { "entropy": 1.6624565223852794, "epoch": 0.2740929938754772, "grad_norm": 1.0136929750442505, "learning_rate": 1.946547813787129e-05, "loss": 1.3312, "mean_token_accuracy": 0.6643867244323095, "num_tokens": 419636649.0, "step": 2495 }, { "entropy": 1.6829350888729095, "epoch": 0.27420285078685014, "grad_norm": 0.6138503551483154, "learning_rate": 1.9464934432329706e-05, "loss": 1.4834, "mean_token_accuracy": 0.6375181674957275, "num_tokens": 419815875.0, "step": 2496 }, { "entropy": 1.7264296412467957, "epoch": 0.2743127076982231, "grad_norm": 0.6041257977485657, "learning_rate": 1.9464390458876757e-05, "loss": 1.3408, "mean_token_accuracy": 0.6562155981858572, "num_tokens": 419975058.0, "step": 2497 }, { "entropy": 1.7106037835280101, "epoch": 0.274422564609596, "grad_norm": 0.7350174784660339, "learning_rate": 1.9463846217529666e-05, "loss": 1.4776, "mean_token_accuracy": 0.6583420137564341, "num_tokens": 420145254.0, "step": 2498 }, { "entropy": 1.7087813913822174, "epoch": 0.27453242152096896, "grad_norm": 0.652927577495575, "learning_rate": 1.9463301708305654e-05, "loss": 1.3691, "mean_token_accuracy": 0.6640399495760599, "num_tokens": 420326840.0, "step": 2499 }, { "entropy": 1.702909102042516, "epoch": 0.2746422784323419, "grad_norm": 0.6692368984222412, "learning_rate": 1.946275693122196e-05, "loss": 1.3357, "mean_token_accuracy": 0.6595581869284312, "num_tokens": 420463074.0, "step": 2500 }, { "entropy": 1.7041659752527873, "epoch": 0.2747521353437148, "grad_norm": 0.8389718532562256, "learning_rate": 1.9462211886295823e-05, "loss": 1.4709, "mean_token_accuracy": 0.6703790177901586, "num_tokens": 420626300.0, "step": 2501 }, { "entropy": 1.750393122434616, "epoch": 0.27486199225508773, "grad_norm": 0.6642520427703857, "learning_rate": 1.9461666573544488e-05, "loss": 1.3781, "mean_token_accuracy": 0.6633904526631037, "num_tokens": 420765456.0, "step": 2502 }, { "entropy": 1.7149128119150798, "epoch": 0.27497184916646067, "grad_norm": 0.6768452525138855, "learning_rate": 1.9461120992985222e-05, "loss": 1.4081, "mean_token_accuracy": 0.6513208548227946, "num_tokens": 420919832.0, "step": 2503 }, { "entropy": 1.7376088599363964, "epoch": 0.2750817060778336, "grad_norm": 0.6658154129981995, "learning_rate": 1.946057514463529e-05, "loss": 1.3253, "mean_token_accuracy": 0.6688035577535629, "num_tokens": 421060527.0, "step": 2504 }, { "entropy": 1.7594562570254009, "epoch": 0.27519156298920655, "grad_norm": 0.7218078374862671, "learning_rate": 1.9460029028511965e-05, "loss": 1.3467, "mean_token_accuracy": 0.6551917244990667, "num_tokens": 421228365.0, "step": 2505 }, { "entropy": 1.7568883796532948, "epoch": 0.2753014199005795, "grad_norm": 0.9153607487678528, "learning_rate": 1.9459482644632537e-05, "loss": 1.4104, "mean_token_accuracy": 0.6578507423400879, "num_tokens": 421371677.0, "step": 2506 }, { "entropy": 1.6713534692923229, "epoch": 0.27541127681195243, "grad_norm": 0.782477617263794, "learning_rate": 1.9458935993014292e-05, "loss": 1.219, "mean_token_accuracy": 0.680430273214976, "num_tokens": 421521194.0, "step": 2507 }, { "entropy": 1.7415608565012615, "epoch": 0.2755211337233254, "grad_norm": 0.8416798710823059, "learning_rate": 1.9458389073674536e-05, "loss": 1.3152, "mean_token_accuracy": 0.6562491556008657, "num_tokens": 421672704.0, "step": 2508 }, { "entropy": 1.6363226175308228, "epoch": 0.2756309906346983, "grad_norm": 0.730694055557251, "learning_rate": 1.9457841886630576e-05, "loss": 1.3778, "mean_token_accuracy": 0.6615554342667261, "num_tokens": 421828497.0, "step": 2509 }, { "entropy": 1.6957507530848186, "epoch": 0.27574084754607125, "grad_norm": 0.6770949363708496, "learning_rate": 1.9457294431899733e-05, "loss": 1.3599, "mean_token_accuracy": 0.6570597738027573, "num_tokens": 421983856.0, "step": 2510 }, { "entropy": 1.6833610932032268, "epoch": 0.2758507044574442, "grad_norm": 0.7206348776817322, "learning_rate": 1.9456746709499332e-05, "loss": 1.2937, "mean_token_accuracy": 0.6710290809472402, "num_tokens": 422128478.0, "step": 2511 }, { "entropy": 1.6595212817192078, "epoch": 0.27596056136881714, "grad_norm": 0.6251102685928345, "learning_rate": 1.945619871944671e-05, "loss": 1.3831, "mean_token_accuracy": 0.6632163723309835, "num_tokens": 422334158.0, "step": 2512 }, { "entropy": 1.7431277732054393, "epoch": 0.2760704182801901, "grad_norm": 0.7098563313484192, "learning_rate": 1.9455650461759202e-05, "loss": 1.3254, "mean_token_accuracy": 0.6785684078931808, "num_tokens": 422495092.0, "step": 2513 }, { "entropy": 1.7170985639095306, "epoch": 0.27618027519156296, "grad_norm": 0.6277499198913574, "learning_rate": 1.9455101936454174e-05, "loss": 1.2778, "mean_token_accuracy": 0.6751286735137304, "num_tokens": 422625201.0, "step": 2514 }, { "entropy": 1.7012461324532826, "epoch": 0.2762901321029359, "grad_norm": 0.608238697052002, "learning_rate": 1.9454553143548977e-05, "loss": 1.3602, "mean_token_accuracy": 0.6591375966866811, "num_tokens": 422785134.0, "step": 2515 }, { "entropy": 1.729185124238332, "epoch": 0.27639998901430884, "grad_norm": 0.6700869202613831, "learning_rate": 1.945400408306098e-05, "loss": 1.4432, "mean_token_accuracy": 0.6543838481108347, "num_tokens": 422931936.0, "step": 2516 }, { "entropy": 1.747061401605606, "epoch": 0.2765098459256818, "grad_norm": 0.9989476203918457, "learning_rate": 1.945345475500757e-05, "loss": 1.1749, "mean_token_accuracy": 0.6789939254522324, "num_tokens": 423050412.0, "step": 2517 }, { "entropy": 1.7638193666934967, "epoch": 0.2766197028370547, "grad_norm": 0.7275906205177307, "learning_rate": 1.9452905159406124e-05, "loss": 1.4777, "mean_token_accuracy": 0.641241709391276, "num_tokens": 423224244.0, "step": 2518 }, { "entropy": 1.686012178659439, "epoch": 0.27672955974842767, "grad_norm": 0.7584397792816162, "learning_rate": 1.9452355296274036e-05, "loss": 1.363, "mean_token_accuracy": 0.6610731234153112, "num_tokens": 423354122.0, "step": 2519 }, { "entropy": 1.7558989524841309, "epoch": 0.2768394166598006, "grad_norm": 0.8733357787132263, "learning_rate": 1.9451805165628713e-05, "loss": 1.4204, "mean_token_accuracy": 0.6505479166905085, "num_tokens": 423498784.0, "step": 2520 }, { "entropy": 1.7407618463039398, "epoch": 0.27694927357117355, "grad_norm": 0.6735062599182129, "learning_rate": 1.9451254767487564e-05, "loss": 1.3931, "mean_token_accuracy": 0.6727576404809952, "num_tokens": 423624180.0, "step": 2521 }, { "entropy": 1.7527087032794952, "epoch": 0.2770591304825465, "grad_norm": 0.7081560492515564, "learning_rate": 1.9450704101868012e-05, "loss": 1.3928, "mean_token_accuracy": 0.652918224533399, "num_tokens": 423776308.0, "step": 2522 }, { "entropy": 1.7728693286577861, "epoch": 0.27716898739391943, "grad_norm": 0.706057071685791, "learning_rate": 1.945015316878748e-05, "loss": 1.4191, "mean_token_accuracy": 0.6447743972142538, "num_tokens": 423957168.0, "step": 2523 }, { "entropy": 1.718264530102412, "epoch": 0.27727884430529237, "grad_norm": 0.666571319103241, "learning_rate": 1.9449601968263413e-05, "loss": 1.4477, "mean_token_accuracy": 0.6623519708712896, "num_tokens": 424136873.0, "step": 2524 }, { "entropy": 1.753323624531428, "epoch": 0.2773887012166653, "grad_norm": 0.7180684208869934, "learning_rate": 1.9449050500313247e-05, "loss": 1.2229, "mean_token_accuracy": 0.680413618683815, "num_tokens": 424229190.0, "step": 2525 }, { "entropy": 1.6851735214392345, "epoch": 0.27749855812803825, "grad_norm": 0.8312351703643799, "learning_rate": 1.944849876495444e-05, "loss": 1.318, "mean_token_accuracy": 0.6703576147556305, "num_tokens": 424367123.0, "step": 2526 }, { "entropy": 1.6827231248219807, "epoch": 0.2776084150394112, "grad_norm": 0.6143300533294678, "learning_rate": 1.9447946762204454e-05, "loss": 1.4498, "mean_token_accuracy": 0.634101668993632, "num_tokens": 424587988.0, "step": 2527 }, { "entropy": 1.6629830300807953, "epoch": 0.2777182719507841, "grad_norm": 0.6683552265167236, "learning_rate": 1.944739449208076e-05, "loss": 1.4396, "mean_token_accuracy": 0.6639639983574549, "num_tokens": 424786286.0, "step": 2528 }, { "entropy": 1.6775904496510823, "epoch": 0.277828128862157, "grad_norm": 0.6482076048851013, "learning_rate": 1.944684195460084e-05, "loss": 1.4997, "mean_token_accuracy": 0.6430053263902664, "num_tokens": 424967840.0, "step": 2529 }, { "entropy": 1.7745787998040516, "epoch": 0.27793798577352996, "grad_norm": 0.6843352913856506, "learning_rate": 1.9446289149782175e-05, "loss": 1.4148, "mean_token_accuracy": 0.6597040891647339, "num_tokens": 425137083.0, "step": 2530 }, { "entropy": 1.6696482102076213, "epoch": 0.2780478426849029, "grad_norm": 0.6596343517303467, "learning_rate": 1.9445736077642266e-05, "loss": 1.1965, "mean_token_accuracy": 0.6828029155731201, "num_tokens": 425253600.0, "step": 2531 }, { "entropy": 1.7446503738562267, "epoch": 0.27815769959627584, "grad_norm": 0.6544666290283203, "learning_rate": 1.9445182738198614e-05, "loss": 1.4899, "mean_token_accuracy": 0.6364033321539561, "num_tokens": 425425203.0, "step": 2532 }, { "entropy": 1.674493948618571, "epoch": 0.2782675565076488, "grad_norm": 0.6259612441062927, "learning_rate": 1.944462913146874e-05, "loss": 1.2664, "mean_token_accuracy": 0.6673529297113419, "num_tokens": 425549724.0, "step": 2533 }, { "entropy": 1.7295575936635335, "epoch": 0.2783774134190217, "grad_norm": 0.7398607730865479, "learning_rate": 1.944407525747015e-05, "loss": 1.4909, "mean_token_accuracy": 0.6412216623624166, "num_tokens": 425749328.0, "step": 2534 }, { "entropy": 1.759381393591563, "epoch": 0.27848727033039467, "grad_norm": 0.7434036135673523, "learning_rate": 1.9443521116220386e-05, "loss": 1.4622, "mean_token_accuracy": 0.6621341158946356, "num_tokens": 425963889.0, "step": 2535 }, { "entropy": 1.7146111925443013, "epoch": 0.2785971272417676, "grad_norm": 0.6938877105712891, "learning_rate": 1.9442966707736987e-05, "loss": 1.355, "mean_token_accuracy": 0.657206580042839, "num_tokens": 426114600.0, "step": 2536 }, { "entropy": 1.7992856403191884, "epoch": 0.27870698415314055, "grad_norm": 0.7209751009941101, "learning_rate": 1.944241203203749e-05, "loss": 1.5263, "mean_token_accuracy": 0.6411692102750143, "num_tokens": 426320358.0, "step": 2537 }, { "entropy": 1.688951204220454, "epoch": 0.2788168410645135, "grad_norm": 1.081633448600769, "learning_rate": 1.9441857089139464e-05, "loss": 1.2315, "mean_token_accuracy": 0.6716073205073675, "num_tokens": 426498576.0, "step": 2538 }, { "entropy": 1.7655569116274517, "epoch": 0.27892669797588643, "grad_norm": 0.8024057745933533, "learning_rate": 1.944130187906046e-05, "loss": 1.389, "mean_token_accuracy": 0.6557512134313583, "num_tokens": 426645970.0, "step": 2539 }, { "entropy": 1.6769147912661235, "epoch": 0.27903655488725937, "grad_norm": 0.7822548151016235, "learning_rate": 1.944074640181806e-05, "loss": 1.4512, "mean_token_accuracy": 0.646538108587265, "num_tokens": 426806650.0, "step": 2540 }, { "entropy": 1.7401223282019298, "epoch": 0.27914641179863225, "grad_norm": 0.753135085105896, "learning_rate": 1.9440190657429833e-05, "loss": 1.392, "mean_token_accuracy": 0.661638930439949, "num_tokens": 426943210.0, "step": 2541 }, { "entropy": 1.792117138703664, "epoch": 0.2792562687100052, "grad_norm": 1.012791633605957, "learning_rate": 1.943963464591338e-05, "loss": 1.4671, "mean_token_accuracy": 0.6386220256487528, "num_tokens": 427126900.0, "step": 2542 }, { "entropy": 1.6871282557646434, "epoch": 0.27936612562137814, "grad_norm": 0.6758045554161072, "learning_rate": 1.943907836728629e-05, "loss": 1.5413, "mean_token_accuracy": 0.6594254424174627, "num_tokens": 427294304.0, "step": 2543 }, { "entropy": 1.7226394315560658, "epoch": 0.2794759825327511, "grad_norm": 0.6298893094062805, "learning_rate": 1.9438521821566178e-05, "loss": 1.4598, "mean_token_accuracy": 0.635049377878507, "num_tokens": 427491263.0, "step": 2544 }, { "entropy": 1.7836333811283112, "epoch": 0.279585839444124, "grad_norm": 0.8018297553062439, "learning_rate": 1.9437965008770647e-05, "loss": 1.6433, "mean_token_accuracy": 0.625967395802339, "num_tokens": 427671894.0, "step": 2545 }, { "entropy": 1.731053650379181, "epoch": 0.27969569635549696, "grad_norm": 0.6557754278182983, "learning_rate": 1.9437407928917327e-05, "loss": 1.4259, "mean_token_accuracy": 0.6569652110338211, "num_tokens": 427880951.0, "step": 2546 }, { "entropy": 1.7691397269566853, "epoch": 0.2798055532668699, "grad_norm": 0.717713475227356, "learning_rate": 1.943685058202385e-05, "loss": 1.5615, "mean_token_accuracy": 0.6480028629302979, "num_tokens": 428026306.0, "step": 2547 }, { "entropy": 1.722131739060084, "epoch": 0.27991541017824284, "grad_norm": 0.706473708152771, "learning_rate": 1.9436292968107854e-05, "loss": 1.4702, "mean_token_accuracy": 0.6523783256610235, "num_tokens": 428229212.0, "step": 2548 }, { "entropy": 1.7450311680634816, "epoch": 0.2800252670896158, "grad_norm": 0.6654791831970215, "learning_rate": 1.9435735087186985e-05, "loss": 1.2768, "mean_token_accuracy": 0.6689060380061468, "num_tokens": 428410903.0, "step": 2549 }, { "entropy": 1.7161929905414581, "epoch": 0.2801351240009887, "grad_norm": 0.722743570804596, "learning_rate": 1.9435176939278902e-05, "loss": 1.4746, "mean_token_accuracy": 0.6436196118593216, "num_tokens": 428586998.0, "step": 2550 }, { "entropy": 1.7112940152486165, "epoch": 0.28024498091236166, "grad_norm": 0.6534221172332764, "learning_rate": 1.9434618524401273e-05, "loss": 1.3635, "mean_token_accuracy": 0.654671644171079, "num_tokens": 428723631.0, "step": 2551 }, { "entropy": 1.741028368473053, "epoch": 0.2803548378237346, "grad_norm": 0.763145387172699, "learning_rate": 1.9434059842571766e-05, "loss": 1.5508, "mean_token_accuracy": 0.635222981373469, "num_tokens": 428984871.0, "step": 2552 }, { "entropy": 1.7181775569915771, "epoch": 0.28046469473510754, "grad_norm": 0.6733216047286987, "learning_rate": 1.9433500893808064e-05, "loss": 1.3059, "mean_token_accuracy": 0.6693119158347448, "num_tokens": 429135765.0, "step": 2553 }, { "entropy": 1.7046631177266438, "epoch": 0.2805745516464805, "grad_norm": 0.7447198629379272, "learning_rate": 1.9432941678127863e-05, "loss": 1.2777, "mean_token_accuracy": 0.6620823442935944, "num_tokens": 429255761.0, "step": 2554 }, { "entropy": 1.7015548547108967, "epoch": 0.28068440855785337, "grad_norm": 0.7502123117446899, "learning_rate": 1.943238219554885e-05, "loss": 1.3332, "mean_token_accuracy": 0.6637969613075256, "num_tokens": 429404829.0, "step": 2555 }, { "entropy": 1.6994233131408691, "epoch": 0.2807942654692263, "grad_norm": 0.5920188426971436, "learning_rate": 1.943182244608875e-05, "loss": 1.4957, "mean_token_accuracy": 0.6309523532787958, "num_tokens": 429608954.0, "step": 2556 }, { "entropy": 1.6985510190327961, "epoch": 0.28090412238059925, "grad_norm": 0.6346762180328369, "learning_rate": 1.943126242976526e-05, "loss": 1.4234, "mean_token_accuracy": 0.6668014178673426, "num_tokens": 429760077.0, "step": 2557 }, { "entropy": 1.716838429371516, "epoch": 0.2810139792919722, "grad_norm": 0.6307840347290039, "learning_rate": 1.943070214659612e-05, "loss": 1.3829, "mean_token_accuracy": 0.6482276519139608, "num_tokens": 429933647.0, "step": 2558 }, { "entropy": 1.7555852731068928, "epoch": 0.28112383620334513, "grad_norm": 0.7939680218696594, "learning_rate": 1.9430141596599055e-05, "loss": 1.5112, "mean_token_accuracy": 0.6550355777144432, "num_tokens": 430065355.0, "step": 2559 }, { "entropy": 1.7047178248564403, "epoch": 0.2812336931147181, "grad_norm": 0.6013801097869873, "learning_rate": 1.9429580779791806e-05, "loss": 1.4673, "mean_token_accuracy": 0.6566463013490041, "num_tokens": 430241848.0, "step": 2560 }, { "entropy": 1.7068589230378468, "epoch": 0.281343550026091, "grad_norm": 0.6323118209838867, "learning_rate": 1.9429019696192122e-05, "loss": 1.5224, "mean_token_accuracy": 0.6419420739014944, "num_tokens": 430428484.0, "step": 2561 }, { "entropy": 1.6809816559155781, "epoch": 0.28145340693746396, "grad_norm": 0.754179060459137, "learning_rate": 1.9428458345817762e-05, "loss": 1.4271, "mean_token_accuracy": 0.6452597826719284, "num_tokens": 430591922.0, "step": 2562 }, { "entropy": 1.652672717968623, "epoch": 0.2815632638488369, "grad_norm": 0.7418878674507141, "learning_rate": 1.94278967286865e-05, "loss": 1.1637, "mean_token_accuracy": 0.6897034098704656, "num_tokens": 430712080.0, "step": 2563 }, { "entropy": 1.6429332792758942, "epoch": 0.28167312076020984, "grad_norm": 0.6739898324012756, "learning_rate": 1.94273348448161e-05, "loss": 1.3419, "mean_token_accuracy": 0.6748927334944407, "num_tokens": 430883503.0, "step": 2564 }, { "entropy": 1.697869877020518, "epoch": 0.2817829776715828, "grad_norm": 0.6139808297157288, "learning_rate": 1.9426772694224346e-05, "loss": 1.37, "mean_token_accuracy": 0.656757061680158, "num_tokens": 431038787.0, "step": 2565 }, { "entropy": 1.8296188414096832, "epoch": 0.2818928345829557, "grad_norm": 0.6441859006881714, "learning_rate": 1.9426210276929038e-05, "loss": 1.6558, "mean_token_accuracy": 0.6172501345475515, "num_tokens": 431232258.0, "step": 2566 }, { "entropy": 1.729474276304245, "epoch": 0.28200269149432866, "grad_norm": 0.6634087562561035, "learning_rate": 1.942564759294797e-05, "loss": 1.5779, "mean_token_accuracy": 0.6198930492003759, "num_tokens": 431501611.0, "step": 2567 }, { "entropy": 1.693379670381546, "epoch": 0.28211254840570155, "grad_norm": 0.7475607395172119, "learning_rate": 1.9425084642298956e-05, "loss": 1.3763, "mean_token_accuracy": 0.668298656741778, "num_tokens": 431651634.0, "step": 2568 }, { "entropy": 1.7199491361776988, "epoch": 0.2822224053170745, "grad_norm": 0.6126656532287598, "learning_rate": 1.9424521424999805e-05, "loss": 1.3842, "mean_token_accuracy": 0.6540129085381826, "num_tokens": 431811528.0, "step": 2569 }, { "entropy": 1.711053987344106, "epoch": 0.28233226222844743, "grad_norm": 0.8134360909461975, "learning_rate": 1.942395794106835e-05, "loss": 1.2594, "mean_token_accuracy": 0.6801566729942957, "num_tokens": 431973926.0, "step": 2570 }, { "entropy": 1.715971678495407, "epoch": 0.28244211913982037, "grad_norm": 0.623103678226471, "learning_rate": 1.942339419052242e-05, "loss": 1.5081, "mean_token_accuracy": 0.6435102721055349, "num_tokens": 432176408.0, "step": 2571 }, { "entropy": 1.6517931123574574, "epoch": 0.2825519760511933, "grad_norm": 0.7378969192504883, "learning_rate": 1.942283017337986e-05, "loss": 1.3061, "mean_token_accuracy": 0.6691179027160009, "num_tokens": 432306283.0, "step": 2572 }, { "entropy": 1.671871801217397, "epoch": 0.28266183296256625, "grad_norm": 0.6152805685997009, "learning_rate": 1.942226588965852e-05, "loss": 1.2702, "mean_token_accuracy": 0.6683402210474014, "num_tokens": 432466149.0, "step": 2573 }, { "entropy": 1.7728142738342285, "epoch": 0.2827716898739392, "grad_norm": 0.6616373658180237, "learning_rate": 1.9421701339376263e-05, "loss": 1.407, "mean_token_accuracy": 0.6552805304527283, "num_tokens": 432602942.0, "step": 2574 }, { "entropy": 1.7376613914966583, "epoch": 0.28288154678531213, "grad_norm": 0.6483553647994995, "learning_rate": 1.942113652255095e-05, "loss": 1.432, "mean_token_accuracy": 0.6605344464381536, "num_tokens": 432771571.0, "step": 2575 }, { "entropy": 1.67216690381368, "epoch": 0.2829914036966851, "grad_norm": 0.6230313181877136, "learning_rate": 1.9420571439200463e-05, "loss": 1.4043, "mean_token_accuracy": 0.6436713586250941, "num_tokens": 432970686.0, "step": 2576 }, { "entropy": 1.6591029067834218, "epoch": 0.283101260608058, "grad_norm": 0.6028016209602356, "learning_rate": 1.942000608934268e-05, "loss": 1.3898, "mean_token_accuracy": 0.6501336942116419, "num_tokens": 433158122.0, "step": 2577 }, { "entropy": 1.756819248199463, "epoch": 0.28321111751943095, "grad_norm": 0.6819401979446411, "learning_rate": 1.9419440472995502e-05, "loss": 1.2936, "mean_token_accuracy": 0.6685625910758972, "num_tokens": 433336335.0, "step": 2578 }, { "entropy": 1.7187687456607819, "epoch": 0.2833209744308039, "grad_norm": 0.8300583362579346, "learning_rate": 1.9418874590176827e-05, "loss": 1.5282, "mean_token_accuracy": 0.6508554766575495, "num_tokens": 433457129.0, "step": 2579 }, { "entropy": 1.6912387907505035, "epoch": 0.28343083134217684, "grad_norm": 0.8252399563789368, "learning_rate": 1.9418308440904564e-05, "loss": 1.4709, "mean_token_accuracy": 0.6553937296072642, "num_tokens": 433624991.0, "step": 2580 }, { "entropy": 1.7515579263369243, "epoch": 0.2835406882535498, "grad_norm": 0.7480166554450989, "learning_rate": 1.9417742025196635e-05, "loss": 1.5038, "mean_token_accuracy": 0.6306808292865753, "num_tokens": 433839600.0, "step": 2581 }, { "entropy": 1.724341442187627, "epoch": 0.28365054516492266, "grad_norm": 0.8125796914100647, "learning_rate": 1.9417175343070962e-05, "loss": 1.3742, "mean_token_accuracy": 0.6564011871814728, "num_tokens": 433995970.0, "step": 2582 }, { "entropy": 1.6599280138810475, "epoch": 0.2837604020762956, "grad_norm": 0.6576691269874573, "learning_rate": 1.941660839454548e-05, "loss": 1.365, "mean_token_accuracy": 0.6600429564714432, "num_tokens": 434197426.0, "step": 2583 }, { "entropy": 1.659876714150111, "epoch": 0.28387025898766854, "grad_norm": 0.6102942824363708, "learning_rate": 1.9416041179638138e-05, "loss": 1.3061, "mean_token_accuracy": 0.67480997244517, "num_tokens": 434399328.0, "step": 2584 }, { "entropy": 1.6985367238521576, "epoch": 0.2839801158990415, "grad_norm": 0.6151925921440125, "learning_rate": 1.941547369836688e-05, "loss": 1.2711, "mean_token_accuracy": 0.6702224761247635, "num_tokens": 434537957.0, "step": 2585 }, { "entropy": 1.673358827829361, "epoch": 0.2840899728104144, "grad_norm": 0.6189048886299133, "learning_rate": 1.941490595074968e-05, "loss": 1.3391, "mean_token_accuracy": 0.6615988264481226, "num_tokens": 434758718.0, "step": 2586 }, { "entropy": 1.7454047600428264, "epoch": 0.28419982972178737, "grad_norm": 0.6552925109863281, "learning_rate": 1.941433793680449e-05, "loss": 1.4267, "mean_token_accuracy": 0.6499452342589697, "num_tokens": 434917375.0, "step": 2587 }, { "entropy": 1.7055931588013966, "epoch": 0.2843096866331603, "grad_norm": 0.8626599311828613, "learning_rate": 1.94137696565493e-05, "loss": 1.4959, "mean_token_accuracy": 0.6463074237108231, "num_tokens": 435103637.0, "step": 2588 }, { "entropy": 1.7163316309452057, "epoch": 0.28441954354453325, "grad_norm": 0.6372457146644592, "learning_rate": 1.9413201110002094e-05, "loss": 1.5373, "mean_token_accuracy": 0.6396234631538391, "num_tokens": 435295478.0, "step": 2589 }, { "entropy": 1.7566253244876862, "epoch": 0.2845294004559062, "grad_norm": 0.6608404517173767, "learning_rate": 1.941263229718086e-05, "loss": 1.4931, "mean_token_accuracy": 0.6418844411770502, "num_tokens": 435479005.0, "step": 2590 }, { "entropy": 1.6568923095862071, "epoch": 0.28463925736727913, "grad_norm": 0.7189907431602478, "learning_rate": 1.9412063218103607e-05, "loss": 1.2423, "mean_token_accuracy": 0.6729675034681956, "num_tokens": 435628265.0, "step": 2591 }, { "entropy": 1.6527254382769268, "epoch": 0.28474911427865207, "grad_norm": 0.6984722018241882, "learning_rate": 1.9411493872788342e-05, "loss": 1.4279, "mean_token_accuracy": 0.6531344701846441, "num_tokens": 435898622.0, "step": 2592 }, { "entropy": 1.7152815461158752, "epoch": 0.284858971190025, "grad_norm": 0.7125999331474304, "learning_rate": 1.941092426125309e-05, "loss": 1.5202, "mean_token_accuracy": 0.6567995101213455, "num_tokens": 436075360.0, "step": 2593 }, { "entropy": 1.6898160974184673, "epoch": 0.28496882810139795, "grad_norm": 0.6797850728034973, "learning_rate": 1.9410354383515872e-05, "loss": 1.3609, "mean_token_accuracy": 0.667990709344546, "num_tokens": 436269293.0, "step": 2594 }, { "entropy": 1.6990408897399902, "epoch": 0.28507868501277084, "grad_norm": 0.6356927752494812, "learning_rate": 1.9409784239594726e-05, "loss": 1.4249, "mean_token_accuracy": 0.659389058748881, "num_tokens": 436420010.0, "step": 2595 }, { "entropy": 1.7738368213176727, "epoch": 0.2851885419241438, "grad_norm": 0.5506088137626648, "learning_rate": 1.94092138295077e-05, "loss": 1.5087, "mean_token_accuracy": 0.621019164721171, "num_tokens": 436673612.0, "step": 2596 }, { "entropy": 1.7091120680173237, "epoch": 0.2852983988355167, "grad_norm": 0.6980639100074768, "learning_rate": 1.9408643153272845e-05, "loss": 1.3243, "mean_token_accuracy": 0.6670518765846888, "num_tokens": 436835491.0, "step": 2597 }, { "entropy": 1.779990682999293, "epoch": 0.28540825574688966, "grad_norm": 0.6934612393379211, "learning_rate": 1.9408072210908224e-05, "loss": 1.4711, "mean_token_accuracy": 0.6409854739904404, "num_tokens": 437001428.0, "step": 2598 }, { "entropy": 1.6988802353541057, "epoch": 0.2855181126582626, "grad_norm": 0.7314718961715698, "learning_rate": 1.9407501002431906e-05, "loss": 1.5671, "mean_token_accuracy": 0.6539310614267985, "num_tokens": 437139054.0, "step": 2599 }, { "entropy": 1.7167824109395344, "epoch": 0.28562796956963554, "grad_norm": 0.8039875030517578, "learning_rate": 1.940692952786197e-05, "loss": 1.4672, "mean_token_accuracy": 0.6653132339318594, "num_tokens": 437270459.0, "step": 2600 }, { "entropy": 1.6449208458264668, "epoch": 0.2857378264810085, "grad_norm": 0.7214610576629639, "learning_rate": 1.9406357787216504e-05, "loss": 1.4112, "mean_token_accuracy": 0.6606322924296061, "num_tokens": 437421392.0, "step": 2601 }, { "entropy": 1.680547167857488, "epoch": 0.2858476833923814, "grad_norm": 0.7055097222328186, "learning_rate": 1.94057857805136e-05, "loss": 1.3738, "mean_token_accuracy": 0.6694934616486231, "num_tokens": 437561791.0, "step": 2602 }, { "entropy": 1.6792938709259033, "epoch": 0.28595754030375437, "grad_norm": 0.6724585890769958, "learning_rate": 1.9405213507771363e-05, "loss": 1.4334, "mean_token_accuracy": 0.6348255177338918, "num_tokens": 437791784.0, "step": 2603 }, { "entropy": 1.6977178752422333, "epoch": 0.2860673972151273, "grad_norm": 0.8410064578056335, "learning_rate": 1.9404640969007907e-05, "loss": 1.6249, "mean_token_accuracy": 0.6277847041686376, "num_tokens": 437997002.0, "step": 2604 }, { "entropy": 1.7250556250413258, "epoch": 0.28617725412650025, "grad_norm": 0.7040321230888367, "learning_rate": 1.9404068164241354e-05, "loss": 1.3341, "mean_token_accuracy": 0.6668216039737066, "num_tokens": 438129008.0, "step": 2605 }, { "entropy": 1.6219845215479534, "epoch": 0.2862871110378732, "grad_norm": 0.5418662428855896, "learning_rate": 1.940349509348983e-05, "loss": 1.3108, "mean_token_accuracy": 0.6690922429164251, "num_tokens": 438323789.0, "step": 2606 }, { "entropy": 1.6409521003564198, "epoch": 0.28639696794924613, "grad_norm": 0.6410753130912781, "learning_rate": 1.9402921756771467e-05, "loss": 1.3104, "mean_token_accuracy": 0.6703230490287145, "num_tokens": 438483486.0, "step": 2607 }, { "entropy": 1.7267462313175201, "epoch": 0.28650682486061907, "grad_norm": 0.8248845338821411, "learning_rate": 1.940234815410442e-05, "loss": 1.2461, "mean_token_accuracy": 0.6791750093301138, "num_tokens": 438633786.0, "step": 2608 }, { "entropy": 1.7135661741097767, "epoch": 0.28661668177199195, "grad_norm": 0.6371444463729858, "learning_rate": 1.9401774285506844e-05, "loss": 1.4584, "mean_token_accuracy": 0.6644980758428574, "num_tokens": 438804168.0, "step": 2609 }, { "entropy": 1.73434716463089, "epoch": 0.2867265386833649, "grad_norm": 0.7363563179969788, "learning_rate": 1.9401200150996897e-05, "loss": 1.4111, "mean_token_accuracy": 0.6546731541554133, "num_tokens": 439014763.0, "step": 2610 }, { "entropy": 1.6565505663553874, "epoch": 0.28683639559473784, "grad_norm": 0.9262635111808777, "learning_rate": 1.940062575059275e-05, "loss": 1.4191, "mean_token_accuracy": 0.6618759582440058, "num_tokens": 439230751.0, "step": 2611 }, { "entropy": 1.6508768200874329, "epoch": 0.2869462525061108, "grad_norm": 0.8240963220596313, "learning_rate": 1.9400051084312582e-05, "loss": 1.2587, "mean_token_accuracy": 0.6758377949396769, "num_tokens": 439369696.0, "step": 2612 }, { "entropy": 1.7037352323532104, "epoch": 0.2870561094174837, "grad_norm": 0.6460655927658081, "learning_rate": 1.9399476152174582e-05, "loss": 1.3585, "mean_token_accuracy": 0.6592906763156255, "num_tokens": 439513162.0, "step": 2613 }, { "entropy": 1.689726283152898, "epoch": 0.28716596632885666, "grad_norm": 0.621805727481842, "learning_rate": 1.939890095419695e-05, "loss": 1.5793, "mean_token_accuracy": 0.6374916980663935, "num_tokens": 439714986.0, "step": 2614 }, { "entropy": 1.7064528862635295, "epoch": 0.2872758232402296, "grad_norm": 0.6384214162826538, "learning_rate": 1.9398325490397882e-05, "loss": 1.3932, "mean_token_accuracy": 0.6516889532407125, "num_tokens": 439906645.0, "step": 2615 }, { "entropy": 1.80901434024175, "epoch": 0.28738568015160254, "grad_norm": 0.7344052195549011, "learning_rate": 1.93977497607956e-05, "loss": 1.3694, "mean_token_accuracy": 0.6523802032073339, "num_tokens": 440005633.0, "step": 2616 }, { "entropy": 1.7248846590518951, "epoch": 0.2874955370629755, "grad_norm": 0.7670570611953735, "learning_rate": 1.939717376540832e-05, "loss": 1.5491, "mean_token_accuracy": 0.6424847940603892, "num_tokens": 440210280.0, "step": 2617 }, { "entropy": 1.7063851058483124, "epoch": 0.2876053939743484, "grad_norm": 0.8078677654266357, "learning_rate": 1.939659750425428e-05, "loss": 1.4602, "mean_token_accuracy": 0.6642357558012009, "num_tokens": 440382539.0, "step": 2618 }, { "entropy": 1.7136432727177937, "epoch": 0.28771525088572136, "grad_norm": 0.6970177888870239, "learning_rate": 1.9396020977351707e-05, "loss": 1.456, "mean_token_accuracy": 0.6549165745576223, "num_tokens": 440542147.0, "step": 2619 }, { "entropy": 1.769452879826228, "epoch": 0.2878251077970943, "grad_norm": 0.6893923282623291, "learning_rate": 1.9395444184718856e-05, "loss": 1.361, "mean_token_accuracy": 0.6599644472201666, "num_tokens": 440687660.0, "step": 2620 }, { "entropy": 1.730559726556142, "epoch": 0.28793496470846724, "grad_norm": 0.6542354822158813, "learning_rate": 1.9394867126373978e-05, "loss": 1.473, "mean_token_accuracy": 0.639530157049497, "num_tokens": 440882125.0, "step": 2621 }, { "entropy": 1.7187020281950633, "epoch": 0.2880448216198402, "grad_norm": 0.6793704628944397, "learning_rate": 1.939428980233534e-05, "loss": 1.3705, "mean_token_accuracy": 0.6617551843325297, "num_tokens": 441024849.0, "step": 2622 }, { "entropy": 1.8135265906651814, "epoch": 0.28815467853121307, "grad_norm": 0.9341657757759094, "learning_rate": 1.939371221262121e-05, "loss": 1.4504, "mean_token_accuracy": 0.6392282346884409, "num_tokens": 441207195.0, "step": 2623 }, { "entropy": 1.705136905113856, "epoch": 0.288264535442586, "grad_norm": 0.623195469379425, "learning_rate": 1.9393134357249873e-05, "loss": 1.5089, "mean_token_accuracy": 0.6382016837596893, "num_tokens": 441406980.0, "step": 2624 }, { "entropy": 1.7127378980318706, "epoch": 0.28837439235395895, "grad_norm": 0.8345064520835876, "learning_rate": 1.939255623623961e-05, "loss": 1.4068, "mean_token_accuracy": 0.6660717676083246, "num_tokens": 441559825.0, "step": 2625 }, { "entropy": 1.658822198708852, "epoch": 0.2884842492653319, "grad_norm": 0.7187853455543518, "learning_rate": 1.939197784960873e-05, "loss": 1.3836, "mean_token_accuracy": 0.6508085081974665, "num_tokens": 441766932.0, "step": 2626 }, { "entropy": 1.704751859108607, "epoch": 0.28859410617670483, "grad_norm": 0.6862353682518005, "learning_rate": 1.9391399197375532e-05, "loss": 1.3871, "mean_token_accuracy": 0.6600909431775411, "num_tokens": 441913413.0, "step": 2627 }, { "entropy": 1.6826223929723103, "epoch": 0.2887039630880778, "grad_norm": 0.6673212647438049, "learning_rate": 1.939082027955833e-05, "loss": 1.3096, "mean_token_accuracy": 0.6611541360616684, "num_tokens": 442074956.0, "step": 2628 }, { "entropy": 1.7774581213792164, "epoch": 0.2888138199994507, "grad_norm": 0.7454673051834106, "learning_rate": 1.9390241096175446e-05, "loss": 1.3841, "mean_token_accuracy": 0.6583873877922694, "num_tokens": 442194901.0, "step": 2629 }, { "entropy": 1.7498468160629272, "epoch": 0.28892367691082366, "grad_norm": 0.6279725432395935, "learning_rate": 1.9389661647245216e-05, "loss": 1.4632, "mean_token_accuracy": 0.6340119491020838, "num_tokens": 442375203.0, "step": 2630 }, { "entropy": 1.6489406327406566, "epoch": 0.2890335338221966, "grad_norm": 2.050079345703125, "learning_rate": 1.9389081932785972e-05, "loss": 1.1596, "mean_token_accuracy": 0.6808596501747767, "num_tokens": 442579070.0, "step": 2631 }, { "entropy": 1.6880771319071453, "epoch": 0.28914339073356954, "grad_norm": 0.7001965045928955, "learning_rate": 1.9388501952816065e-05, "loss": 1.4539, "mean_token_accuracy": 0.6618387450774511, "num_tokens": 442764662.0, "step": 2632 }, { "entropy": 1.7235000828901927, "epoch": 0.2892532476449425, "grad_norm": 0.7411707639694214, "learning_rate": 1.9387921707353852e-05, "loss": 1.3913, "mean_token_accuracy": 0.661471888422966, "num_tokens": 442937159.0, "step": 2633 }, { "entropy": 1.7213360567887623, "epoch": 0.2893631045563154, "grad_norm": 0.7139886021614075, "learning_rate": 1.9387341196417693e-05, "loss": 1.369, "mean_token_accuracy": 0.6659169793128967, "num_tokens": 443095204.0, "step": 2634 }, { "entropy": 1.7081999878088634, "epoch": 0.28947296146768836, "grad_norm": 0.6855827569961548, "learning_rate": 1.938676042002597e-05, "loss": 1.4566, "mean_token_accuracy": 0.6534863263368607, "num_tokens": 443244399.0, "step": 2635 }, { "entropy": 1.732550968726476, "epoch": 0.28958281837906125, "grad_norm": 0.7183929681777954, "learning_rate": 1.9386179378197057e-05, "loss": 1.4231, "mean_token_accuracy": 0.6524986227353414, "num_tokens": 443443407.0, "step": 2636 }, { "entropy": 1.7156991362571716, "epoch": 0.2896926752904342, "grad_norm": 0.8171935677528381, "learning_rate": 1.9385598070949344e-05, "loss": 1.2888, "mean_token_accuracy": 0.6588062097628912, "num_tokens": 443554465.0, "step": 2637 }, { "entropy": 1.7135216891765594, "epoch": 0.28980253220180713, "grad_norm": 0.6728255748748779, "learning_rate": 1.938501649830123e-05, "loss": 1.5447, "mean_token_accuracy": 0.6269475817680359, "num_tokens": 443758902.0, "step": 2638 }, { "entropy": 1.6548403700192769, "epoch": 0.28991238911318007, "grad_norm": 0.5925636887550354, "learning_rate": 1.9384434660271127e-05, "loss": 1.4282, "mean_token_accuracy": 0.6455462525288264, "num_tokens": 443975978.0, "step": 2639 }, { "entropy": 1.6577099462350209, "epoch": 0.290022246024553, "grad_norm": 0.6581486463546753, "learning_rate": 1.9383852556877442e-05, "loss": 1.4118, "mean_token_accuracy": 0.6570763885974884, "num_tokens": 444185925.0, "step": 2640 }, { "entropy": 1.6643743515014648, "epoch": 0.29013210293592595, "grad_norm": 0.581012487411499, "learning_rate": 1.93832701881386e-05, "loss": 1.2362, "mean_token_accuracy": 0.6839745144049326, "num_tokens": 444347755.0, "step": 2641 }, { "entropy": 1.6794247229894002, "epoch": 0.2902419598472989, "grad_norm": 0.6780709028244019, "learning_rate": 1.9382687554073037e-05, "loss": 1.494, "mean_token_accuracy": 0.6450832734505335, "num_tokens": 444602974.0, "step": 2642 }, { "entropy": 1.732841948668162, "epoch": 0.29035181675867183, "grad_norm": 0.6316855549812317, "learning_rate": 1.9382104654699188e-05, "loss": 1.4737, "mean_token_accuracy": 0.6366277585426966, "num_tokens": 444861153.0, "step": 2643 }, { "entropy": 1.7282691299915314, "epoch": 0.2904616736700448, "grad_norm": 0.6324109435081482, "learning_rate": 1.9381521490035507e-05, "loss": 1.4287, "mean_token_accuracy": 0.64259501794974, "num_tokens": 445064095.0, "step": 2644 }, { "entropy": 1.7289496064186096, "epoch": 0.2905715305814177, "grad_norm": 0.6488730311393738, "learning_rate": 1.9380938060100444e-05, "loss": 1.4234, "mean_token_accuracy": 0.6523800839980444, "num_tokens": 445277916.0, "step": 2645 }, { "entropy": 1.7048724194367726, "epoch": 0.29068138749279065, "grad_norm": 0.8301265835762024, "learning_rate": 1.938035436491247e-05, "loss": 1.4233, "mean_token_accuracy": 0.6592496484518051, "num_tokens": 445421134.0, "step": 2646 }, { "entropy": 1.6666546662648518, "epoch": 0.2907912444041636, "grad_norm": 0.6903906464576721, "learning_rate": 1.9379770404490055e-05, "loss": 1.434, "mean_token_accuracy": 0.6567947318156561, "num_tokens": 445598132.0, "step": 2647 }, { "entropy": 1.6847756405671437, "epoch": 0.29090110131553654, "grad_norm": 0.7805249691009521, "learning_rate": 1.9379186178851682e-05, "loss": 1.56, "mean_token_accuracy": 0.6398202478885651, "num_tokens": 445778211.0, "step": 2648 }, { "entropy": 1.7226019004980724, "epoch": 0.2910109582269095, "grad_norm": 0.7891348004341125, "learning_rate": 1.9378601688015844e-05, "loss": 1.3392, "mean_token_accuracy": 0.6544974197944006, "num_tokens": 445908005.0, "step": 2649 }, { "entropy": 1.7356652915477753, "epoch": 0.29112081513828236, "grad_norm": 0.6843798160552979, "learning_rate": 1.9378016932001038e-05, "loss": 1.4653, "mean_token_accuracy": 0.6467985957860947, "num_tokens": 446039417.0, "step": 2650 }, { "entropy": 1.6962276101112366, "epoch": 0.2912306720496553, "grad_norm": 0.8159408569335938, "learning_rate": 1.937743191082577e-05, "loss": 1.2746, "mean_token_accuracy": 0.6776145696640015, "num_tokens": 446183804.0, "step": 2651 }, { "entropy": 1.6974614063898723, "epoch": 0.29134052896102824, "grad_norm": 0.6378005743026733, "learning_rate": 1.937684662450856e-05, "loss": 1.4558, "mean_token_accuracy": 0.6375326613585154, "num_tokens": 446395323.0, "step": 2652 }, { "entropy": 1.708219935496648, "epoch": 0.2914503858724012, "grad_norm": 0.6746168732643127, "learning_rate": 1.9376261073067924e-05, "loss": 1.4131, "mean_token_accuracy": 0.6394032041231791, "num_tokens": 446585024.0, "step": 2653 }, { "entropy": 1.7100668549537659, "epoch": 0.2915602427837741, "grad_norm": 0.7768794298171997, "learning_rate": 1.9375675256522407e-05, "loss": 1.4283, "mean_token_accuracy": 0.6499427556991577, "num_tokens": 446745055.0, "step": 2654 }, { "entropy": 1.6924604872862499, "epoch": 0.29167009969514707, "grad_norm": 0.8306671977043152, "learning_rate": 1.9375089174890535e-05, "loss": 1.3603, "mean_token_accuracy": 0.6594620595375696, "num_tokens": 446914924.0, "step": 2655 }, { "entropy": 1.7755940457185109, "epoch": 0.29177995660652, "grad_norm": 0.7717196941375732, "learning_rate": 1.937450282819087e-05, "loss": 1.485, "mean_token_accuracy": 0.6404218624035517, "num_tokens": 447059653.0, "step": 2656 }, { "entropy": 1.7116881906986237, "epoch": 0.29188981351789295, "grad_norm": 0.7639763951301575, "learning_rate": 1.937391621644196e-05, "loss": 1.3773, "mean_token_accuracy": 0.653444285194079, "num_tokens": 447200194.0, "step": 2657 }, { "entropy": 1.6779767175515492, "epoch": 0.2919996704292659, "grad_norm": 0.6334971189498901, "learning_rate": 1.9373329339662376e-05, "loss": 1.4107, "mean_token_accuracy": 0.6518335590759913, "num_tokens": 447396983.0, "step": 2658 }, { "entropy": 1.682463566462199, "epoch": 0.29210952734063883, "grad_norm": 0.673891007900238, "learning_rate": 1.9372742197870694e-05, "loss": 1.3052, "mean_token_accuracy": 0.6681603988011678, "num_tokens": 447551504.0, "step": 2659 }, { "entropy": 1.6600935558478038, "epoch": 0.29221938425201177, "grad_norm": 0.6958986520767212, "learning_rate": 1.9372154791085494e-05, "loss": 1.3451, "mean_token_accuracy": 0.6691215733687083, "num_tokens": 447691507.0, "step": 2660 }, { "entropy": 1.6572713057200115, "epoch": 0.2923292411633847, "grad_norm": 0.6040387153625488, "learning_rate": 1.9371567119325366e-05, "loss": 1.4619, "mean_token_accuracy": 0.641932855049769, "num_tokens": 447887087.0, "step": 2661 }, { "entropy": 1.6765493253866832, "epoch": 0.29243909807475765, "grad_norm": 0.6503280401229858, "learning_rate": 1.937097918260891e-05, "loss": 1.3431, "mean_token_accuracy": 0.6661610007286072, "num_tokens": 448043292.0, "step": 2662 }, { "entropy": 1.681261380513509, "epoch": 0.29254895498613054, "grad_norm": 0.7642120718955994, "learning_rate": 1.9370390980954734e-05, "loss": 1.4735, "mean_token_accuracy": 0.653852661450704, "num_tokens": 448207574.0, "step": 2663 }, { "entropy": 1.6833062370618184, "epoch": 0.2926588118975035, "grad_norm": 0.670602023601532, "learning_rate": 1.936980251438146e-05, "loss": 1.3351, "mean_token_accuracy": 0.6685332159201304, "num_tokens": 448357135.0, "step": 2664 }, { "entropy": 1.7469529509544373, "epoch": 0.2927686688088764, "grad_norm": 0.7529553771018982, "learning_rate": 1.9369213782907704e-05, "loss": 1.5406, "mean_token_accuracy": 0.6347383807102839, "num_tokens": 448561405.0, "step": 2665 }, { "entropy": 1.7639080087343852, "epoch": 0.29287852572024936, "grad_norm": 0.7545667290687561, "learning_rate": 1.9368624786552103e-05, "loss": 1.5237, "mean_token_accuracy": 0.6413746724526087, "num_tokens": 448745327.0, "step": 2666 }, { "entropy": 1.694331020116806, "epoch": 0.2929883826316223, "grad_norm": 0.6679127216339111, "learning_rate": 1.93680355253333e-05, "loss": 1.3199, "mean_token_accuracy": 0.6663316388924917, "num_tokens": 448887664.0, "step": 2667 }, { "entropy": 1.7083205878734589, "epoch": 0.29309823954299524, "grad_norm": 0.6043710708618164, "learning_rate": 1.9367445999269942e-05, "loss": 1.3662, "mean_token_accuracy": 0.6552055925130844, "num_tokens": 449053784.0, "step": 2668 }, { "entropy": 1.7981708248456318, "epoch": 0.2932080964543682, "grad_norm": 0.5984474420547485, "learning_rate": 1.9366856208380692e-05, "loss": 1.3922, "mean_token_accuracy": 0.6492073982954025, "num_tokens": 449226912.0, "step": 2669 }, { "entropy": 1.7615818778673809, "epoch": 0.2933179533657411, "grad_norm": 0.7663693428039551, "learning_rate": 1.936626615268421e-05, "loss": 1.2619, "mean_token_accuracy": 0.6588031202554703, "num_tokens": 449331942.0, "step": 2670 }, { "entropy": 1.7315702736377716, "epoch": 0.29342781027711407, "grad_norm": 0.7021830677986145, "learning_rate": 1.9365675832199173e-05, "loss": 1.3047, "mean_token_accuracy": 0.6822420656681061, "num_tokens": 449455287.0, "step": 2671 }, { "entropy": 1.6805367469787598, "epoch": 0.293537667188487, "grad_norm": 0.5745053887367249, "learning_rate": 1.936508524694427e-05, "loss": 1.3666, "mean_token_accuracy": 0.6640961915254593, "num_tokens": 449670325.0, "step": 2672 }, { "entropy": 1.6514671444892883, "epoch": 0.29364752409985995, "grad_norm": 0.727182924747467, "learning_rate": 1.9364494396938183e-05, "loss": 1.2007, "mean_token_accuracy": 0.6855193028847376, "num_tokens": 449786535.0, "step": 2673 }, { "entropy": 1.7635166545708973, "epoch": 0.2937573810112329, "grad_norm": 0.7026004791259766, "learning_rate": 1.9363903282199622e-05, "loss": 1.5577, "mean_token_accuracy": 0.6341749678055445, "num_tokens": 449959641.0, "step": 2674 }, { "entropy": 1.6938972075780232, "epoch": 0.29386723792260583, "grad_norm": 0.7433229088783264, "learning_rate": 1.936331190274729e-05, "loss": 1.4235, "mean_token_accuracy": 0.6720124930143356, "num_tokens": 450131900.0, "step": 2675 }, { "entropy": 1.683308909336726, "epoch": 0.29397709483397877, "grad_norm": 0.7515206933021545, "learning_rate": 1.9362720258599906e-05, "loss": 1.433, "mean_token_accuracy": 0.6503423452377319, "num_tokens": 450296861.0, "step": 2676 }, { "entropy": 1.6832621296246846, "epoch": 0.29408695174535165, "grad_norm": 0.7027547955513, "learning_rate": 1.936212834977619e-05, "loss": 1.2631, "mean_token_accuracy": 0.6792778372764587, "num_tokens": 450446299.0, "step": 2677 }, { "entropy": 1.7150346239407857, "epoch": 0.2941968086567246, "grad_norm": 0.6275519132614136, "learning_rate": 1.9361536176294884e-05, "loss": 1.4966, "mean_token_accuracy": 0.6483021924893061, "num_tokens": 450598627.0, "step": 2678 }, { "entropy": 1.7051092684268951, "epoch": 0.29430666556809754, "grad_norm": 0.7069133520126343, "learning_rate": 1.9360943738174723e-05, "loss": 1.4622, "mean_token_accuracy": 0.6559437364339828, "num_tokens": 450730927.0, "step": 2679 }, { "entropy": 1.684261292219162, "epoch": 0.2944165224794705, "grad_norm": 1.7670702934265137, "learning_rate": 1.9360351035434462e-05, "loss": 1.3459, "mean_token_accuracy": 0.6596929530302683, "num_tokens": 450949743.0, "step": 2680 }, { "entropy": 1.711377779642741, "epoch": 0.2945263793908434, "grad_norm": 0.6422090530395508, "learning_rate": 1.9359758068092856e-05, "loss": 1.4483, "mean_token_accuracy": 0.6548082033793131, "num_tokens": 451108520.0, "step": 2681 }, { "entropy": 1.6563451290130615, "epoch": 0.29463623630221636, "grad_norm": 0.7376955151557922, "learning_rate": 1.9359164836168673e-05, "loss": 1.1897, "mean_token_accuracy": 0.6866026818752289, "num_tokens": 451255541.0, "step": 2682 }, { "entropy": 1.7390979429086049, "epoch": 0.2947460932135893, "grad_norm": 0.78875732421875, "learning_rate": 1.9358571339680695e-05, "loss": 1.3601, "mean_token_accuracy": 0.6453971515099207, "num_tokens": 451456051.0, "step": 2683 }, { "entropy": 1.7441952129205067, "epoch": 0.29485595012496224, "grad_norm": 0.7171877026557922, "learning_rate": 1.93579775786477e-05, "loss": 1.4548, "mean_token_accuracy": 0.6368372937043508, "num_tokens": 451669627.0, "step": 2684 }, { "entropy": 1.75757697224617, "epoch": 0.2949658070363352, "grad_norm": 0.7782573103904724, "learning_rate": 1.9357383553088475e-05, "loss": 1.4544, "mean_token_accuracy": 0.645165205001831, "num_tokens": 451796991.0, "step": 2685 }, { "entropy": 1.6625976363817851, "epoch": 0.2950756639477081, "grad_norm": 0.8164569735527039, "learning_rate": 1.935678926302183e-05, "loss": 1.3995, "mean_token_accuracy": 0.6670281787713369, "num_tokens": 451954998.0, "step": 2686 }, { "entropy": 1.7122354706128438, "epoch": 0.29518552085908106, "grad_norm": 0.6264376044273376, "learning_rate": 1.935619470846657e-05, "loss": 1.3778, "mean_token_accuracy": 0.6613652606805166, "num_tokens": 452135263.0, "step": 2687 }, { "entropy": 1.7165345946947734, "epoch": 0.295295377770454, "grad_norm": 0.6703044176101685, "learning_rate": 1.9355599889441514e-05, "loss": 1.3914, "mean_token_accuracy": 0.6461621175209681, "num_tokens": 452312831.0, "step": 2688 }, { "entropy": 1.6509647866090138, "epoch": 0.29540523468182694, "grad_norm": 0.6027291417121887, "learning_rate": 1.9355004805965488e-05, "loss": 1.4686, "mean_token_accuracy": 0.6490070174137751, "num_tokens": 452490265.0, "step": 2689 }, { "entropy": 1.7152677079041798, "epoch": 0.29551509159319983, "grad_norm": 0.6414744257926941, "learning_rate": 1.935440945805732e-05, "loss": 1.4622, "mean_token_accuracy": 0.6658103515704473, "num_tokens": 452695410.0, "step": 2690 }, { "entropy": 1.7520911594231923, "epoch": 0.29562494850457277, "grad_norm": 0.701274037361145, "learning_rate": 1.935381384573586e-05, "loss": 1.3646, "mean_token_accuracy": 0.6607696761687597, "num_tokens": 452872274.0, "step": 2691 }, { "entropy": 1.646621435880661, "epoch": 0.2957348054159457, "grad_norm": 0.6311284303665161, "learning_rate": 1.9353217969019955e-05, "loss": 1.3512, "mean_token_accuracy": 0.6649795571962992, "num_tokens": 453021211.0, "step": 2692 }, { "entropy": 1.7599443395932515, "epoch": 0.29584466232731865, "grad_norm": 0.6941083073616028, "learning_rate": 1.9352621827928467e-05, "loss": 1.3329, "mean_token_accuracy": 0.6627233326435089, "num_tokens": 453146565.0, "step": 2693 }, { "entropy": 1.7353703478972118, "epoch": 0.2959545192386916, "grad_norm": 0.659747838973999, "learning_rate": 1.9352025422480263e-05, "loss": 1.4226, "mean_token_accuracy": 0.6492767333984375, "num_tokens": 453362565.0, "step": 2694 }, { "entropy": 1.7668009400367737, "epoch": 0.29606437615006453, "grad_norm": 0.6181442737579346, "learning_rate": 1.9351428752694215e-05, "loss": 1.4072, "mean_token_accuracy": 0.6527328540881475, "num_tokens": 453547775.0, "step": 2695 }, { "entropy": 1.7386986712614696, "epoch": 0.2961742330614375, "grad_norm": 0.7841033339500427, "learning_rate": 1.9350831818589207e-05, "loss": 1.4427, "mean_token_accuracy": 0.6478182226419449, "num_tokens": 453707336.0, "step": 2696 }, { "entropy": 1.7775764167308807, "epoch": 0.2962840899728104, "grad_norm": 0.7095794081687927, "learning_rate": 1.935023462018414e-05, "loss": 1.3005, "mean_token_accuracy": 0.6672998617092768, "num_tokens": 453824420.0, "step": 2697 }, { "entropy": 1.6585584084192913, "epoch": 0.29639394688418336, "grad_norm": 0.6653675436973572, "learning_rate": 1.9349637157497912e-05, "loss": 1.4212, "mean_token_accuracy": 0.6473642687002817, "num_tokens": 454019843.0, "step": 2698 }, { "entropy": 1.6676256358623505, "epoch": 0.2965038037955563, "grad_norm": 0.5587428212165833, "learning_rate": 1.934903943054943e-05, "loss": 1.3297, "mean_token_accuracy": 0.6603167007366816, "num_tokens": 454209292.0, "step": 2699 }, { "entropy": 1.7065544823805492, "epoch": 0.29661366070692924, "grad_norm": 0.6772942543029785, "learning_rate": 1.9348441439357607e-05, "loss": 1.3898, "mean_token_accuracy": 0.6574710955222448, "num_tokens": 454350359.0, "step": 2700 }, { "entropy": 1.7034912804762523, "epoch": 0.2967235176183022, "grad_norm": 0.8810101747512817, "learning_rate": 1.9347843183941376e-05, "loss": 1.2346, "mean_token_accuracy": 0.6790671199560165, "num_tokens": 454487540.0, "step": 2701 }, { "entropy": 1.791587918996811, "epoch": 0.2968333745296751, "grad_norm": 0.707517147064209, "learning_rate": 1.9347244664319674e-05, "loss": 1.4353, "mean_token_accuracy": 0.637357547879219, "num_tokens": 454670144.0, "step": 2702 }, { "entropy": 1.65616970260938, "epoch": 0.29694323144104806, "grad_norm": 0.6979170441627502, "learning_rate": 1.9346645880511435e-05, "loss": 1.3765, "mean_token_accuracy": 0.6787627389033636, "num_tokens": 454805453.0, "step": 2703 }, { "entropy": 1.7858235935370128, "epoch": 0.29705308835242095, "grad_norm": 0.7254014015197754, "learning_rate": 1.9346046832535616e-05, "loss": 1.3475, "mean_token_accuracy": 0.6682693660259247, "num_tokens": 454926309.0, "step": 2704 }, { "entropy": 1.7087645729382832, "epoch": 0.2971629452637939, "grad_norm": 0.6836649775505066, "learning_rate": 1.9345447520411176e-05, "loss": 1.4221, "mean_token_accuracy": 0.6326878815889359, "num_tokens": 455124796.0, "step": 2705 }, { "entropy": 1.6126576364040375, "epoch": 0.29727280217516683, "grad_norm": 0.6172579526901245, "learning_rate": 1.9344847944157082e-05, "loss": 1.4618, "mean_token_accuracy": 0.6497344275315603, "num_tokens": 455349822.0, "step": 2706 }, { "entropy": 1.6935412486394246, "epoch": 0.29738265908653977, "grad_norm": 0.7592183351516724, "learning_rate": 1.9344248103792312e-05, "loss": 1.5196, "mean_token_accuracy": 0.6431877315044403, "num_tokens": 455536405.0, "step": 2707 }, { "entropy": 1.8123325010140736, "epoch": 0.2974925159979127, "grad_norm": 0.6768981218338013, "learning_rate": 1.9343647999335852e-05, "loss": 1.5576, "mean_token_accuracy": 0.6386434634526571, "num_tokens": 455720583.0, "step": 2708 }, { "entropy": 1.7398807009061177, "epoch": 0.29760237290928565, "grad_norm": 0.5949780941009521, "learning_rate": 1.9343047630806686e-05, "loss": 1.4819, "mean_token_accuracy": 0.6428021887938181, "num_tokens": 455956692.0, "step": 2709 }, { "entropy": 1.7306031584739685, "epoch": 0.2977122298206586, "grad_norm": 0.7073639035224915, "learning_rate": 1.9342446998223828e-05, "loss": 1.4921, "mean_token_accuracy": 0.6601720154285431, "num_tokens": 456096324.0, "step": 2710 }, { "entropy": 1.6887525916099548, "epoch": 0.29782208673203153, "grad_norm": 0.7725453972816467, "learning_rate": 1.934184610160628e-05, "loss": 1.2753, "mean_token_accuracy": 0.6711432288090388, "num_tokens": 456218859.0, "step": 2711 }, { "entropy": 1.6401757498582203, "epoch": 0.2979319436434045, "grad_norm": 0.7324026823043823, "learning_rate": 1.934124494097306e-05, "loss": 1.3085, "mean_token_accuracy": 0.6673613637685776, "num_tokens": 456346809.0, "step": 2712 }, { "entropy": 1.734503875176112, "epoch": 0.2980418005547774, "grad_norm": 0.7018632888793945, "learning_rate": 1.9340643516343197e-05, "loss": 1.3471, "mean_token_accuracy": 0.6849260876576105, "num_tokens": 456517652.0, "step": 2713 }, { "entropy": 1.6619239548842113, "epoch": 0.29815165746615035, "grad_norm": 0.7496260404586792, "learning_rate": 1.9340041827735724e-05, "loss": 1.3782, "mean_token_accuracy": 0.6606259942054749, "num_tokens": 456674429.0, "step": 2714 }, { "entropy": 1.7683907647927601, "epoch": 0.2982615143775233, "grad_norm": 0.7211847305297852, "learning_rate": 1.9339439875169688e-05, "loss": 1.3487, "mean_token_accuracy": 0.6543639997641245, "num_tokens": 456820642.0, "step": 2715 }, { "entropy": 1.6940323412418365, "epoch": 0.29837137128889624, "grad_norm": 0.8029798865318298, "learning_rate": 1.933883765866414e-05, "loss": 1.284, "mean_token_accuracy": 0.6674282451470693, "num_tokens": 456965638.0, "step": 2716 }, { "entropy": 1.7033016582330067, "epoch": 0.2984812282002691, "grad_norm": 0.6672521829605103, "learning_rate": 1.933823517823813e-05, "loss": 1.416, "mean_token_accuracy": 0.6518148928880692, "num_tokens": 457158175.0, "step": 2717 }, { "entropy": 1.7121039628982544, "epoch": 0.29859108511164206, "grad_norm": 0.9051281213760376, "learning_rate": 1.933763243391074e-05, "loss": 1.4112, "mean_token_accuracy": 0.666137158870697, "num_tokens": 457327515.0, "step": 2718 }, { "entropy": 1.699680785338084, "epoch": 0.298700942023015, "grad_norm": 0.6100730299949646, "learning_rate": 1.933702942570104e-05, "loss": 1.2842, "mean_token_accuracy": 0.6708463281393051, "num_tokens": 457486212.0, "step": 2719 }, { "entropy": 1.7419918080170949, "epoch": 0.29881079893438794, "grad_norm": 0.711141049861908, "learning_rate": 1.9336426153628112e-05, "loss": 1.4956, "mean_token_accuracy": 0.6444249103466669, "num_tokens": 457693330.0, "step": 2720 }, { "entropy": 1.6464999218781788, "epoch": 0.2989206558457609, "grad_norm": 0.8733800649642944, "learning_rate": 1.9335822617711054e-05, "loss": 1.2148, "mean_token_accuracy": 0.6803951313098272, "num_tokens": 457821397.0, "step": 2721 }, { "entropy": 1.6942930221557617, "epoch": 0.2990305127571338, "grad_norm": 0.7114554643630981, "learning_rate": 1.9335218817968967e-05, "loss": 1.3726, "mean_token_accuracy": 0.664305662115415, "num_tokens": 457983544.0, "step": 2722 }, { "entropy": 1.6836686829725902, "epoch": 0.29914036966850677, "grad_norm": 0.708476722240448, "learning_rate": 1.9334614754420958e-05, "loss": 1.2791, "mean_token_accuracy": 0.6689166078964869, "num_tokens": 458134039.0, "step": 2723 }, { "entropy": 1.7399452825387318, "epoch": 0.2992502265798797, "grad_norm": 0.7190913558006287, "learning_rate": 1.9334010427086154e-05, "loss": 1.3825, "mean_token_accuracy": 0.6503856033086777, "num_tokens": 458282778.0, "step": 2724 }, { "entropy": 1.752762794494629, "epoch": 0.29936008349125265, "grad_norm": 0.6258200407028198, "learning_rate": 1.933340583598367e-05, "loss": 1.4285, "mean_token_accuracy": 0.6458547860383987, "num_tokens": 458475221.0, "step": 2725 }, { "entropy": 1.732886830965678, "epoch": 0.2994699404026256, "grad_norm": 0.6532291769981384, "learning_rate": 1.9332800981132648e-05, "loss": 1.3873, "mean_token_accuracy": 0.6579280296961466, "num_tokens": 458642845.0, "step": 2726 }, { "entropy": 1.7705637713273366, "epoch": 0.29957979731399853, "grad_norm": 0.7122387290000916, "learning_rate": 1.933219586255223e-05, "loss": 1.574, "mean_token_accuracy": 0.6305630256732305, "num_tokens": 458841123.0, "step": 2727 }, { "entropy": 1.736388514439265, "epoch": 0.29968965422537147, "grad_norm": 0.6985000967979431, "learning_rate": 1.9331590480261568e-05, "loss": 1.3021, "mean_token_accuracy": 0.6691931088765463, "num_tokens": 458985028.0, "step": 2728 }, { "entropy": 1.7540164987246196, "epoch": 0.2997995111367444, "grad_norm": 0.7186359763145447, "learning_rate": 1.933098483427982e-05, "loss": 1.5224, "mean_token_accuracy": 0.6380279958248138, "num_tokens": 459144300.0, "step": 2729 }, { "entropy": 1.7715636988480885, "epoch": 0.29990936804811735, "grad_norm": 0.8147019147872925, "learning_rate": 1.9330378924626156e-05, "loss": 1.4505, "mean_token_accuracy": 0.6575749566157659, "num_tokens": 459280365.0, "step": 2730 }, { "entropy": 1.6673146188259125, "epoch": 0.30001922495949024, "grad_norm": 0.6811074018478394, "learning_rate": 1.9329772751319755e-05, "loss": 1.4948, "mean_token_accuracy": 0.6632718841234843, "num_tokens": 459456372.0, "step": 2731 }, { "entropy": 1.7491925756136577, "epoch": 0.3001290818708632, "grad_norm": 0.7324425578117371, "learning_rate": 1.93291663143798e-05, "loss": 1.4991, "mean_token_accuracy": 0.652273048957189, "num_tokens": 459617699.0, "step": 2732 }, { "entropy": 1.6765054762363434, "epoch": 0.3002389387822361, "grad_norm": 0.7338621616363525, "learning_rate": 1.9328559613825483e-05, "loss": 1.2981, "mean_token_accuracy": 0.6762462556362152, "num_tokens": 459785217.0, "step": 2733 }, { "entropy": 1.648837725321452, "epoch": 0.30034879569360906, "grad_norm": 0.6431924104690552, "learning_rate": 1.9327952649676006e-05, "loss": 1.2079, "mean_token_accuracy": 0.676081563035647, "num_tokens": 459913674.0, "step": 2734 }, { "entropy": 1.6991098026434581, "epoch": 0.300458652604982, "grad_norm": 0.6374147534370422, "learning_rate": 1.932734542195058e-05, "loss": 1.4266, "mean_token_accuracy": 0.6573264350493749, "num_tokens": 460110173.0, "step": 2735 }, { "entropy": 1.6661823689937592, "epoch": 0.30056850951635494, "grad_norm": 3.0219857692718506, "learning_rate": 1.9326737930668425e-05, "loss": 1.3622, "mean_token_accuracy": 0.6620204945405325, "num_tokens": 460326181.0, "step": 2736 }, { "entropy": 1.769929716984431, "epoch": 0.3006783664277279, "grad_norm": 0.822551429271698, "learning_rate": 1.932613017584877e-05, "loss": 1.4545, "mean_token_accuracy": 0.6456111868222555, "num_tokens": 460473005.0, "step": 2737 }, { "entropy": 1.7742596765359242, "epoch": 0.3007882233391008, "grad_norm": 0.6372090578079224, "learning_rate": 1.9325522157510842e-05, "loss": 1.5462, "mean_token_accuracy": 0.6321751674016317, "num_tokens": 460711663.0, "step": 2738 }, { "entropy": 1.7386666536331177, "epoch": 0.30089808025047377, "grad_norm": 0.7106104493141174, "learning_rate": 1.9324913875673893e-05, "loss": 1.48, "mean_token_accuracy": 0.6593476285537084, "num_tokens": 460896697.0, "step": 2739 }, { "entropy": 1.6937275826931, "epoch": 0.3010079371618467, "grad_norm": 0.654400646686554, "learning_rate": 1.932430533035717e-05, "loss": 1.5314, "mean_token_accuracy": 0.627138485511144, "num_tokens": 461179979.0, "step": 2740 }, { "entropy": 1.6967305839061737, "epoch": 0.30111779407321965, "grad_norm": 0.7618654370307922, "learning_rate": 1.9323696521579933e-05, "loss": 1.3714, "mean_token_accuracy": 0.6593584269285202, "num_tokens": 461381167.0, "step": 2741 }, { "entropy": 1.7463171482086182, "epoch": 0.3012276509845926, "grad_norm": 0.7259137034416199, "learning_rate": 1.932308744936145e-05, "loss": 1.3643, "mean_token_accuracy": 0.6586255977551142, "num_tokens": 461596991.0, "step": 2742 }, { "entropy": 1.7076434095700581, "epoch": 0.30133750789596553, "grad_norm": 0.6767429709434509, "learning_rate": 1.9322478113721e-05, "loss": 1.3569, "mean_token_accuracy": 0.670208474000295, "num_tokens": 461746827.0, "step": 2743 }, { "entropy": 1.6913744111855824, "epoch": 0.30144736480733847, "grad_norm": 0.7813226580619812, "learning_rate": 1.9321868514677874e-05, "loss": 1.3386, "mean_token_accuracy": 0.6839630007743835, "num_tokens": 461894088.0, "step": 2744 }, { "entropy": 1.7275190949440002, "epoch": 0.30155722171871135, "grad_norm": 0.7166203856468201, "learning_rate": 1.9321258652251354e-05, "loss": 1.2261, "mean_token_accuracy": 0.6746133218208948, "num_tokens": 462015371.0, "step": 2745 }, { "entropy": 1.7810499270757039, "epoch": 0.3016670786300843, "grad_norm": 0.6879790425300598, "learning_rate": 1.932064852646075e-05, "loss": 1.4334, "mean_token_accuracy": 0.6484188586473465, "num_tokens": 462235970.0, "step": 2746 }, { "entropy": 1.7113142510255177, "epoch": 0.30177693554145724, "grad_norm": 0.9017158150672913, "learning_rate": 1.9320038137325364e-05, "loss": 1.4883, "mean_token_accuracy": 0.6399280428886414, "num_tokens": 462409137.0, "step": 2747 }, { "entropy": 1.7389302551746368, "epoch": 0.3018867924528302, "grad_norm": 0.7917147874832153, "learning_rate": 1.9319427484864526e-05, "loss": 1.3114, "mean_token_accuracy": 0.6665030618508657, "num_tokens": 462590216.0, "step": 2748 }, { "entropy": 1.6455481847127278, "epoch": 0.3019966493642031, "grad_norm": 0.7206093072891235, "learning_rate": 1.9318816569097557e-05, "loss": 1.3726, "mean_token_accuracy": 0.6634992212057114, "num_tokens": 462764694.0, "step": 2749 }, { "entropy": 1.6946881413459778, "epoch": 0.30210650627557606, "grad_norm": 0.6624197959899902, "learning_rate": 1.9318205390043786e-05, "loss": 1.3401, "mean_token_accuracy": 0.6626093486944834, "num_tokens": 462918633.0, "step": 2750 }, { "entropy": 1.754339079062144, "epoch": 0.302216363186949, "grad_norm": 0.6414199471473694, "learning_rate": 1.931759394772257e-05, "loss": 1.3364, "mean_token_accuracy": 0.6651070167620977, "num_tokens": 463065938.0, "step": 2751 }, { "entropy": 1.7131946782271068, "epoch": 0.30232622009832194, "grad_norm": 0.7476382255554199, "learning_rate": 1.931698224215325e-05, "loss": 1.3628, "mean_token_accuracy": 0.6677864193916321, "num_tokens": 463202521.0, "step": 2752 }, { "entropy": 1.6731769144535065, "epoch": 0.3024360770096949, "grad_norm": 1.3017513751983643, "learning_rate": 1.931637027335519e-05, "loss": 1.1589, "mean_token_accuracy": 0.6780026058355967, "num_tokens": 463403149.0, "step": 2753 }, { "entropy": 1.6853074034055073, "epoch": 0.3025459339210678, "grad_norm": 0.5546422600746155, "learning_rate": 1.9315758041347758e-05, "loss": 1.3004, "mean_token_accuracy": 0.6725151340166727, "num_tokens": 463590458.0, "step": 2754 }, { "entropy": 1.6593196491400402, "epoch": 0.30265579083244076, "grad_norm": 0.699647843837738, "learning_rate": 1.931514554615033e-05, "loss": 1.31, "mean_token_accuracy": 0.6586611072222391, "num_tokens": 463747655.0, "step": 2755 }, { "entropy": 1.6894031167030334, "epoch": 0.3027656477438137, "grad_norm": 0.6589730978012085, "learning_rate": 1.9314532787782295e-05, "loss": 1.3531, "mean_token_accuracy": 0.6681742072105408, "num_tokens": 463913959.0, "step": 2756 }, { "entropy": 1.7593967119852703, "epoch": 0.30287550465518664, "grad_norm": 0.6436064839363098, "learning_rate": 1.9313919766263043e-05, "loss": 1.3402, "mean_token_accuracy": 0.6659826586643854, "num_tokens": 464060668.0, "step": 2757 }, { "entropy": 1.746421605348587, "epoch": 0.30298536156655953, "grad_norm": 0.7272017598152161, "learning_rate": 1.9313306481611977e-05, "loss": 1.3655, "mean_token_accuracy": 0.661940743525823, "num_tokens": 464204000.0, "step": 2758 }, { "entropy": 1.710692157347997, "epoch": 0.30309521847793247, "grad_norm": 0.6198656558990479, "learning_rate": 1.9312692933848505e-05, "loss": 1.3947, "mean_token_accuracy": 0.6594121058781942, "num_tokens": 464384758.0, "step": 2759 }, { "entropy": 1.7099157869815826, "epoch": 0.3032050753893054, "grad_norm": 0.5952613353729248, "learning_rate": 1.931207912299205e-05, "loss": 1.44, "mean_token_accuracy": 0.6447249750296274, "num_tokens": 464582558.0, "step": 2760 }, { "entropy": 1.6782328983147938, "epoch": 0.30331493230067835, "grad_norm": 0.7183821201324463, "learning_rate": 1.9311465049062036e-05, "loss": 1.2911, "mean_token_accuracy": 0.6735262523094813, "num_tokens": 464715667.0, "step": 2761 }, { "entropy": 1.6694311400254567, "epoch": 0.3034247892120513, "grad_norm": 0.8863995671272278, "learning_rate": 1.9310850712077897e-05, "loss": 1.1808, "mean_token_accuracy": 0.6926176349322001, "num_tokens": 464851884.0, "step": 2762 }, { "entropy": 1.6931216617425282, "epoch": 0.30353464612342423, "grad_norm": 0.6631143689155579, "learning_rate": 1.9310236112059076e-05, "loss": 1.5559, "mean_token_accuracy": 0.6472532153129578, "num_tokens": 465012783.0, "step": 2763 }, { "entropy": 1.7171483039855957, "epoch": 0.3036445030347972, "grad_norm": 0.8214719295501709, "learning_rate": 1.9309621249025033e-05, "loss": 1.4744, "mean_token_accuracy": 0.6650175104538599, "num_tokens": 465166374.0, "step": 2764 }, { "entropy": 1.6988566915194194, "epoch": 0.3037543599461701, "grad_norm": 0.7386065125465393, "learning_rate": 1.930900612299522e-05, "loss": 1.5162, "mean_token_accuracy": 0.6555062482754389, "num_tokens": 465343817.0, "step": 2765 }, { "entropy": 1.721668581167857, "epoch": 0.30386421685754306, "grad_norm": 0.6086675524711609, "learning_rate": 1.93083907339891e-05, "loss": 1.3695, "mean_token_accuracy": 0.6555673032999039, "num_tokens": 465495475.0, "step": 2766 }, { "entropy": 1.7061657309532166, "epoch": 0.303974073768916, "grad_norm": 0.9607113599777222, "learning_rate": 1.930777508202617e-05, "loss": 1.294, "mean_token_accuracy": 0.6664957702159882, "num_tokens": 465650381.0, "step": 2767 }, { "entropy": 1.6989998122056325, "epoch": 0.30408393068028894, "grad_norm": 0.7101160883903503, "learning_rate": 1.9307159167125887e-05, "loss": 1.3634, "mean_token_accuracy": 0.6705865065256754, "num_tokens": 465802895.0, "step": 2768 }, { "entropy": 1.7586182653903961, "epoch": 0.3041937875916619, "grad_norm": 0.7714918851852417, "learning_rate": 1.9306542989307774e-05, "loss": 1.5817, "mean_token_accuracy": 0.6417257438103358, "num_tokens": 465991579.0, "step": 2769 }, { "entropy": 1.6654569506645203, "epoch": 0.3043036445030348, "grad_norm": 0.7866225242614746, "learning_rate": 1.930592654859131e-05, "loss": 1.5116, "mean_token_accuracy": 0.6589531550804774, "num_tokens": 466160734.0, "step": 2770 }, { "entropy": 1.7186169922351837, "epoch": 0.30441350141440776, "grad_norm": 0.8639843463897705, "learning_rate": 1.9305309844996014e-05, "loss": 1.4303, "mean_token_accuracy": 0.6495883216460546, "num_tokens": 466326061.0, "step": 2771 }, { "entropy": 1.7337853809197743, "epoch": 0.30452335832578065, "grad_norm": 0.7233784794807434, "learning_rate": 1.9304692878541407e-05, "loss": 1.3016, "mean_token_accuracy": 0.6698676447073618, "num_tokens": 466463745.0, "step": 2772 }, { "entropy": 1.6232518255710602, "epoch": 0.3046332152371536, "grad_norm": 0.6475121378898621, "learning_rate": 1.930407564924701e-05, "loss": 1.4022, "mean_token_accuracy": 0.6630886395772299, "num_tokens": 466593450.0, "step": 2773 }, { "entropy": 1.6947415073712666, "epoch": 0.30474307214852653, "grad_norm": 0.6529332399368286, "learning_rate": 1.930345815713236e-05, "loss": 1.489, "mean_token_accuracy": 0.6480614195267359, "num_tokens": 466750956.0, "step": 2774 }, { "entropy": 1.7117355068524678, "epoch": 0.30485292905989947, "grad_norm": 0.7926428914070129, "learning_rate": 1.9302840402217004e-05, "loss": 1.5965, "mean_token_accuracy": 0.638613685965538, "num_tokens": 466927278.0, "step": 2775 }, { "entropy": 1.6899478038152058, "epoch": 0.3049627859712724, "grad_norm": 0.7616437673568726, "learning_rate": 1.930222238452049e-05, "loss": 1.4495, "mean_token_accuracy": 0.6520648350318273, "num_tokens": 467105233.0, "step": 2776 }, { "entropy": 1.6401771505673726, "epoch": 0.30507264288264535, "grad_norm": 0.6086208820343018, "learning_rate": 1.9301604104062378e-05, "loss": 1.383, "mean_token_accuracy": 0.6533524294694265, "num_tokens": 467265754.0, "step": 2777 }, { "entropy": 1.6210876603921254, "epoch": 0.3051824997940183, "grad_norm": 0.717979907989502, "learning_rate": 1.9300985560862235e-05, "loss": 1.3769, "mean_token_accuracy": 0.6591470589240392, "num_tokens": 467411535.0, "step": 2778 }, { "entropy": 1.7154032389322917, "epoch": 0.30529235670539123, "grad_norm": 0.6699005365371704, "learning_rate": 1.9300366754939642e-05, "loss": 1.4056, "mean_token_accuracy": 0.639826089143753, "num_tokens": 467624066.0, "step": 2779 }, { "entropy": 1.7107741832733154, "epoch": 0.3054022136167642, "grad_norm": 0.6464679837226868, "learning_rate": 1.9299747686314178e-05, "loss": 1.3999, "mean_token_accuracy": 0.6531073401371638, "num_tokens": 467762610.0, "step": 2780 }, { "entropy": 1.723695029815038, "epoch": 0.3055120705281371, "grad_norm": 0.645092785358429, "learning_rate": 1.9299128355005443e-05, "loss": 1.2986, "mean_token_accuracy": 0.6608155220746994, "num_tokens": 467932748.0, "step": 2781 }, { "entropy": 1.7858980596065521, "epoch": 0.30562192743951005, "grad_norm": 0.7578333020210266, "learning_rate": 1.9298508761033035e-05, "loss": 1.4476, "mean_token_accuracy": 0.6441960881153742, "num_tokens": 468085168.0, "step": 2782 }, { "entropy": 1.6949599981307983, "epoch": 0.305731784350883, "grad_norm": 0.7087303400039673, "learning_rate": 1.929788890441656e-05, "loss": 1.324, "mean_token_accuracy": 0.6659413874149323, "num_tokens": 468243090.0, "step": 2783 }, { "entropy": 1.7236262162526448, "epoch": 0.30584164126225594, "grad_norm": 0.652289628982544, "learning_rate": 1.9297268785175647e-05, "loss": 1.3338, "mean_token_accuracy": 0.66497070590655, "num_tokens": 468357807.0, "step": 2784 }, { "entropy": 1.7121999263763428, "epoch": 0.3059514981736288, "grad_norm": 0.7053149938583374, "learning_rate": 1.9296648403329915e-05, "loss": 1.5589, "mean_token_accuracy": 0.6464631706476212, "num_tokens": 468557805.0, "step": 2785 }, { "entropy": 1.7528938154379528, "epoch": 0.30606135508500176, "grad_norm": 0.8355051279067993, "learning_rate": 1.9296027758898993e-05, "loss": 1.5363, "mean_token_accuracy": 0.6388404369354248, "num_tokens": 468705510.0, "step": 2786 }, { "entropy": 1.6805029014746349, "epoch": 0.3061712119963747, "grad_norm": 0.7608030438423157, "learning_rate": 1.9295406851902538e-05, "loss": 1.4483, "mean_token_accuracy": 0.6516820987065634, "num_tokens": 468892051.0, "step": 2787 }, { "entropy": 1.6512704094250996, "epoch": 0.30628106890774764, "grad_norm": 0.789167046546936, "learning_rate": 1.929478568236019e-05, "loss": 1.2027, "mean_token_accuracy": 0.6777097682158152, "num_tokens": 469054572.0, "step": 2788 }, { "entropy": 1.6551719705263774, "epoch": 0.3063909258191206, "grad_norm": 0.6679402589797974, "learning_rate": 1.9294164250291613e-05, "loss": 1.4609, "mean_token_accuracy": 0.6612751533587774, "num_tokens": 469218937.0, "step": 2789 }, { "entropy": 1.6761693557103474, "epoch": 0.3065007827304935, "grad_norm": 0.6211634278297424, "learning_rate": 1.9293542555716476e-05, "loss": 1.3792, "mean_token_accuracy": 0.6515985727310181, "num_tokens": 469472537.0, "step": 2790 }, { "entropy": 1.685540109872818, "epoch": 0.30661063964186647, "grad_norm": 0.7555694580078125, "learning_rate": 1.9292920598654455e-05, "loss": 1.2874, "mean_token_accuracy": 0.6632583638032278, "num_tokens": 469610592.0, "step": 2791 }, { "entropy": 1.760539670785268, "epoch": 0.3067204965532394, "grad_norm": 0.6980460286140442, "learning_rate": 1.9292298379125235e-05, "loss": 1.3597, "mean_token_accuracy": 0.6550954182942709, "num_tokens": 469752029.0, "step": 2792 }, { "entropy": 1.7144795854886372, "epoch": 0.30683035346461235, "grad_norm": 0.6551751494407654, "learning_rate": 1.9291675897148504e-05, "loss": 1.3658, "mean_token_accuracy": 0.6568573415279388, "num_tokens": 469937147.0, "step": 2793 }, { "entropy": 1.7677671909332275, "epoch": 0.3069402103759853, "grad_norm": 0.7456969618797302, "learning_rate": 1.9291053152743968e-05, "loss": 1.5226, "mean_token_accuracy": 0.6467147767543793, "num_tokens": 470105109.0, "step": 2794 }, { "entropy": 1.7279229164123535, "epoch": 0.30705006728735823, "grad_norm": 0.7341436743736267, "learning_rate": 1.929043014593134e-05, "loss": 1.3933, "mean_token_accuracy": 0.6459408948818842, "num_tokens": 470253996.0, "step": 2795 }, { "entropy": 1.7426668008168538, "epoch": 0.30715992419873117, "grad_norm": 0.7237141132354736, "learning_rate": 1.9289806876730328e-05, "loss": 1.3304, "mean_token_accuracy": 0.6563834051291147, "num_tokens": 470407684.0, "step": 2796 }, { "entropy": 1.7966994444529216, "epoch": 0.3072697811101041, "grad_norm": 0.6734642386436462, "learning_rate": 1.9289183345160666e-05, "loss": 1.368, "mean_token_accuracy": 0.6494917968908945, "num_tokens": 470537389.0, "step": 2797 }, { "entropy": 1.7864558796087902, "epoch": 0.30737963802147705, "grad_norm": 0.7371609210968018, "learning_rate": 1.9288559551242084e-05, "loss": 1.4408, "mean_token_accuracy": 0.651896004875501, "num_tokens": 470702673.0, "step": 2798 }, { "entropy": 1.7101215819517772, "epoch": 0.30748949493284994, "grad_norm": 0.9700989127159119, "learning_rate": 1.9287935494994333e-05, "loss": 1.4321, "mean_token_accuracy": 0.6406466712554296, "num_tokens": 470848334.0, "step": 2799 }, { "entropy": 1.66282253464063, "epoch": 0.3075993518442229, "grad_norm": 0.6381438970565796, "learning_rate": 1.9287311176437154e-05, "loss": 1.3278, "mean_token_accuracy": 0.6706396341323853, "num_tokens": 470980854.0, "step": 2800 }, { "entropy": 1.7123548885186513, "epoch": 0.3077092087555958, "grad_norm": 0.6777034401893616, "learning_rate": 1.928668659559031e-05, "loss": 1.2528, "mean_token_accuracy": 0.6732690334320068, "num_tokens": 471106170.0, "step": 2801 }, { "entropy": 1.6131497025489807, "epoch": 0.30781906566696876, "grad_norm": 0.6792572736740112, "learning_rate": 1.9286061752473575e-05, "loss": 1.3078, "mean_token_accuracy": 0.6727622449398041, "num_tokens": 471244822.0, "step": 2802 }, { "entropy": 1.731354941924413, "epoch": 0.3079289225783417, "grad_norm": 0.6826562881469727, "learning_rate": 1.9285436647106716e-05, "loss": 1.4002, "mean_token_accuracy": 0.6561285803715388, "num_tokens": 471422667.0, "step": 2803 }, { "entropy": 1.6713635822137196, "epoch": 0.30803877948971464, "grad_norm": 0.592697262763977, "learning_rate": 1.9284811279509518e-05, "loss": 1.46, "mean_token_accuracy": 0.649135539929072, "num_tokens": 471606192.0, "step": 2804 }, { "entropy": 1.702138513326645, "epoch": 0.3081486364010876, "grad_norm": 0.8116854429244995, "learning_rate": 1.928418564970178e-05, "loss": 1.2933, "mean_token_accuracy": 0.6709979226191839, "num_tokens": 471749051.0, "step": 2805 }, { "entropy": 1.6859862705071766, "epoch": 0.3082584933124605, "grad_norm": 0.670845091342926, "learning_rate": 1.9283559757703295e-05, "loss": 1.2985, "mean_token_accuracy": 0.6729146291812261, "num_tokens": 471883696.0, "step": 2806 }, { "entropy": 1.7104649444421132, "epoch": 0.30836835022383347, "grad_norm": 0.7185712456703186, "learning_rate": 1.928293360353388e-05, "loss": 1.4908, "mean_token_accuracy": 0.6591552595297495, "num_tokens": 472068070.0, "step": 2807 }, { "entropy": 1.6668047904968262, "epoch": 0.3084782071352064, "grad_norm": 0.6307313442230225, "learning_rate": 1.9282307187213346e-05, "loss": 1.2885, "mean_token_accuracy": 0.6734770586093267, "num_tokens": 472204796.0, "step": 2808 }, { "entropy": 1.756018191576004, "epoch": 0.30858806404657935, "grad_norm": 0.6855459213256836, "learning_rate": 1.928168050876152e-05, "loss": 1.4125, "mean_token_accuracy": 0.6466716329256693, "num_tokens": 472354680.0, "step": 2809 }, { "entropy": 1.7116002043088276, "epoch": 0.3086979209579523, "grad_norm": 0.7074426412582397, "learning_rate": 1.9281053568198245e-05, "loss": 1.3565, "mean_token_accuracy": 0.6588050077358881, "num_tokens": 472525501.0, "step": 2810 }, { "entropy": 1.6833085417747498, "epoch": 0.30880777786932523, "grad_norm": 0.7049890160560608, "learning_rate": 1.928042636554335e-05, "loss": 1.3164, "mean_token_accuracy": 0.6683538804451624, "num_tokens": 472719716.0, "step": 2811 }, { "entropy": 1.6787909964720409, "epoch": 0.3089176347806981, "grad_norm": 0.703137218952179, "learning_rate": 1.9279798900816696e-05, "loss": 1.3727, "mean_token_accuracy": 0.6661172757546107, "num_tokens": 472881173.0, "step": 2812 }, { "entropy": 1.7261568506558735, "epoch": 0.30902749169207105, "grad_norm": 0.6603509783744812, "learning_rate": 1.9279171174038132e-05, "loss": 1.3456, "mean_token_accuracy": 0.6602363437414169, "num_tokens": 473034728.0, "step": 2813 }, { "entropy": 1.685040682554245, "epoch": 0.309137348603444, "grad_norm": 0.743989109992981, "learning_rate": 1.9278543185227535e-05, "loss": 1.3698, "mean_token_accuracy": 0.6495349953571955, "num_tokens": 473229426.0, "step": 2814 }, { "entropy": 1.7586186130841572, "epoch": 0.30924720551481694, "grad_norm": 0.7787970900535583, "learning_rate": 1.9277914934404774e-05, "loss": 1.4331, "mean_token_accuracy": 0.6600687354803085, "num_tokens": 473377396.0, "step": 2815 }, { "entropy": 1.6828500429789226, "epoch": 0.3093570624261899, "grad_norm": 0.6871913075447083, "learning_rate": 1.927728642158974e-05, "loss": 1.3833, "mean_token_accuracy": 0.6608909120162328, "num_tokens": 473539223.0, "step": 2816 }, { "entropy": 1.749051849047343, "epoch": 0.3094669193375628, "grad_norm": 0.7910235524177551, "learning_rate": 1.9276657646802318e-05, "loss": 1.4661, "mean_token_accuracy": 0.6595475325981776, "num_tokens": 473693265.0, "step": 2817 }, { "entropy": 1.6784795920054119, "epoch": 0.30957677624893576, "grad_norm": 0.5966885089874268, "learning_rate": 1.9276028610062412e-05, "loss": 1.3034, "mean_token_accuracy": 0.6713368693987528, "num_tokens": 473845058.0, "step": 2818 }, { "entropy": 1.662716676791509, "epoch": 0.3096866331603087, "grad_norm": 0.6432068347930908, "learning_rate": 1.927539931138993e-05, "loss": 1.3785, "mean_token_accuracy": 0.6545740862687429, "num_tokens": 474043963.0, "step": 2819 }, { "entropy": 1.7873293658097584, "epoch": 0.30979649007168164, "grad_norm": 0.733140766620636, "learning_rate": 1.9274769750804786e-05, "loss": 1.4616, "mean_token_accuracy": 0.6570829351743063, "num_tokens": 474264410.0, "step": 2820 }, { "entropy": 1.6941826542218525, "epoch": 0.3099063469830546, "grad_norm": 0.6925363540649414, "learning_rate": 1.9274139928326913e-05, "loss": 1.206, "mean_token_accuracy": 0.6815744390090307, "num_tokens": 474365234.0, "step": 2821 }, { "entropy": 1.690369079510371, "epoch": 0.3100162038944275, "grad_norm": 0.5778855681419373, "learning_rate": 1.927350984397623e-05, "loss": 1.3985, "mean_token_accuracy": 0.655535156528155, "num_tokens": 474574557.0, "step": 2822 }, { "entropy": 1.639299343029658, "epoch": 0.31012606080580046, "grad_norm": 0.6779983639717102, "learning_rate": 1.92728794977727e-05, "loss": 1.3839, "mean_token_accuracy": 0.6478785822788874, "num_tokens": 474785559.0, "step": 2823 }, { "entropy": 1.6963231166203816, "epoch": 0.3102359177171734, "grad_norm": 0.6037493944168091, "learning_rate": 1.9272248889736255e-05, "loss": 1.4553, "mean_token_accuracy": 0.6422694027423859, "num_tokens": 474970243.0, "step": 2824 }, { "entropy": 1.7773303886254628, "epoch": 0.31034577462854634, "grad_norm": 0.6794271469116211, "learning_rate": 1.927161801988686e-05, "loss": 1.454, "mean_token_accuracy": 0.6485249102115631, "num_tokens": 475167904.0, "step": 2825 }, { "entropy": 1.7010780572891235, "epoch": 0.31045563153991923, "grad_norm": 0.7082254886627197, "learning_rate": 1.9270986888244486e-05, "loss": 1.3759, "mean_token_accuracy": 0.6544408549865087, "num_tokens": 475314141.0, "step": 2826 }, { "entropy": 1.7761707603931427, "epoch": 0.31056548845129217, "grad_norm": 0.6975213289260864, "learning_rate": 1.92703554948291e-05, "loss": 1.5001, "mean_token_accuracy": 0.6315238277117411, "num_tokens": 475504348.0, "step": 2827 }, { "entropy": 1.7582517365614574, "epoch": 0.3106753453626651, "grad_norm": 0.7386556267738342, "learning_rate": 1.926972383966069e-05, "loss": 1.548, "mean_token_accuracy": 0.6362548967202505, "num_tokens": 475668498.0, "step": 2828 }, { "entropy": 1.7129935026168823, "epoch": 0.31078520227403805, "grad_norm": 0.6855206489562988, "learning_rate": 1.9269091922759248e-05, "loss": 1.4024, "mean_token_accuracy": 0.6538207034269968, "num_tokens": 475810430.0, "step": 2829 }, { "entropy": 1.6759546200434368, "epoch": 0.310895059185411, "grad_norm": 0.7332449555397034, "learning_rate": 1.9268459744144775e-05, "loss": 1.3902, "mean_token_accuracy": 0.6686356763044993, "num_tokens": 476027236.0, "step": 2830 }, { "entropy": 1.7742613156636555, "epoch": 0.31100491609678393, "grad_norm": 0.6197798252105713, "learning_rate": 1.9267827303837277e-05, "loss": 1.4344, "mean_token_accuracy": 0.6507979234059652, "num_tokens": 476190360.0, "step": 2831 }, { "entropy": 1.6519160469373066, "epoch": 0.3111147730081569, "grad_norm": 0.5655571818351746, "learning_rate": 1.9267194601856765e-05, "loss": 1.3515, "mean_token_accuracy": 0.6520318339268366, "num_tokens": 476376327.0, "step": 2832 }, { "entropy": 1.7403542399406433, "epoch": 0.3112246299195298, "grad_norm": 0.7627872228622437, "learning_rate": 1.9266561638223272e-05, "loss": 1.2572, "mean_token_accuracy": 0.6759726454814275, "num_tokens": 476486395.0, "step": 2833 }, { "entropy": 1.767777254184087, "epoch": 0.31133448683090276, "grad_norm": 0.7002077102661133, "learning_rate": 1.926592841295683e-05, "loss": 1.4234, "mean_token_accuracy": 0.6400815695524216, "num_tokens": 476665343.0, "step": 2834 }, { "entropy": 1.6697245140870411, "epoch": 0.3114443437422757, "grad_norm": 0.6324102282524109, "learning_rate": 1.9265294926077476e-05, "loss": 1.4785, "mean_token_accuracy": 0.6443575223286947, "num_tokens": 476858023.0, "step": 2835 }, { "entropy": 1.6648296117782593, "epoch": 0.31155420065364864, "grad_norm": 0.6571188569068909, "learning_rate": 1.9264661177605264e-05, "loss": 1.281, "mean_token_accuracy": 0.6755544741948446, "num_tokens": 477049028.0, "step": 2836 }, { "entropy": 1.7388223707675934, "epoch": 0.3116640575650216, "grad_norm": 0.7424740195274353, "learning_rate": 1.926402716756025e-05, "loss": 1.4497, "mean_token_accuracy": 0.6438274731238683, "num_tokens": 477173446.0, "step": 2837 }, { "entropy": 1.7497480809688568, "epoch": 0.3117739144763945, "grad_norm": 0.7240679860115051, "learning_rate": 1.9263392895962497e-05, "loss": 1.4083, "mean_token_accuracy": 0.6496013253927231, "num_tokens": 477314695.0, "step": 2838 }, { "entropy": 1.6946297883987427, "epoch": 0.3118837713877674, "grad_norm": 0.680553674697876, "learning_rate": 1.9262758362832082e-05, "loss": 1.272, "mean_token_accuracy": 0.6792226930459341, "num_tokens": 477478810.0, "step": 2839 }, { "entropy": 1.7206788162390392, "epoch": 0.31199362829914035, "grad_norm": 0.7550413012504578, "learning_rate": 1.9262123568189094e-05, "loss": 1.4304, "mean_token_accuracy": 0.6598779608805975, "num_tokens": 477598456.0, "step": 2840 }, { "entropy": 1.655482719341914, "epoch": 0.3121034852105133, "grad_norm": 0.6688495874404907, "learning_rate": 1.9261488512053615e-05, "loss": 1.2902, "mean_token_accuracy": 0.667375867565473, "num_tokens": 477733015.0, "step": 2841 }, { "entropy": 1.727291077375412, "epoch": 0.31221334212188623, "grad_norm": 0.7079230546951294, "learning_rate": 1.9260853194445743e-05, "loss": 1.4079, "mean_token_accuracy": 0.6517117569843928, "num_tokens": 477897769.0, "step": 2842 }, { "entropy": 1.7456689874331157, "epoch": 0.31232319903325917, "grad_norm": 0.6888191103935242, "learning_rate": 1.9260217615385593e-05, "loss": 1.5706, "mean_token_accuracy": 0.6395171880722046, "num_tokens": 478082329.0, "step": 2843 }, { "entropy": 1.7743544578552246, "epoch": 0.3124330559446321, "grad_norm": 0.7306809425354004, "learning_rate": 1.9259581774893278e-05, "loss": 1.3308, "mean_token_accuracy": 0.660937691728274, "num_tokens": 478250478.0, "step": 2844 }, { "entropy": 1.7033919493357341, "epoch": 0.31254291285600505, "grad_norm": 0.7696589827537537, "learning_rate": 1.9258945672988926e-05, "loss": 1.3718, "mean_token_accuracy": 0.6629279802242914, "num_tokens": 478402917.0, "step": 2845 }, { "entropy": 1.6651720503966014, "epoch": 0.312652769767378, "grad_norm": 0.7585199475288391, "learning_rate": 1.925830930969266e-05, "loss": 1.2988, "mean_token_accuracy": 0.675694132844607, "num_tokens": 478585743.0, "step": 2846 }, { "entropy": 1.7346419990062714, "epoch": 0.31276262667875093, "grad_norm": 0.7175132036209106, "learning_rate": 1.9257672685024625e-05, "loss": 1.5035, "mean_token_accuracy": 0.6487277994553248, "num_tokens": 478737605.0, "step": 2847 }, { "entropy": 1.714732418457667, "epoch": 0.3128724835901239, "grad_norm": 0.7112360596656799, "learning_rate": 1.9257035799004974e-05, "loss": 1.5867, "mean_token_accuracy": 0.6398107608159384, "num_tokens": 478931966.0, "step": 2848 }, { "entropy": 1.6614445745944977, "epoch": 0.3129823405014968, "grad_norm": 0.6705912351608276, "learning_rate": 1.925639865165386e-05, "loss": 1.2835, "mean_token_accuracy": 0.67622738579909, "num_tokens": 479049933.0, "step": 2849 }, { "entropy": 1.7204997936884563, "epoch": 0.31309219741286975, "grad_norm": 0.5869386196136475, "learning_rate": 1.9255761242991445e-05, "loss": 1.4058, "mean_token_accuracy": 0.6542830715576807, "num_tokens": 479252205.0, "step": 2850 }, { "entropy": 1.7119795382022858, "epoch": 0.3132020543242427, "grad_norm": 0.6954273581504822, "learning_rate": 1.925512357303791e-05, "loss": 1.3548, "mean_token_accuracy": 0.6612391769886017, "num_tokens": 479422251.0, "step": 2851 }, { "entropy": 1.6646559834480286, "epoch": 0.31331191123561564, "grad_norm": 0.8026860952377319, "learning_rate": 1.9254485641813434e-05, "loss": 1.4965, "mean_token_accuracy": 0.6526677558819453, "num_tokens": 479628173.0, "step": 2852 }, { "entropy": 1.7153493762016296, "epoch": 0.3134217681469885, "grad_norm": 0.6951051950454712, "learning_rate": 1.9253847449338202e-05, "loss": 1.3979, "mean_token_accuracy": 0.6515944103399912, "num_tokens": 479815276.0, "step": 2853 }, { "entropy": 1.6854738493760426, "epoch": 0.31353162505836146, "grad_norm": 0.6943417191505432, "learning_rate": 1.9253208995632426e-05, "loss": 1.3106, "mean_token_accuracy": 0.6591322422027588, "num_tokens": 480010470.0, "step": 2854 }, { "entropy": 1.7210654417673747, "epoch": 0.3136414819697344, "grad_norm": 0.7324227690696716, "learning_rate": 1.9252570280716298e-05, "loss": 1.2533, "mean_token_accuracy": 0.6761431097984314, "num_tokens": 480149477.0, "step": 2855 }, { "entropy": 1.7122070292631786, "epoch": 0.31375133888110734, "grad_norm": 0.723173975944519, "learning_rate": 1.9251931304610042e-05, "loss": 1.3591, "mean_token_accuracy": 0.6570076793432236, "num_tokens": 480358379.0, "step": 2856 }, { "entropy": 1.7481829424699147, "epoch": 0.3138611957924803, "grad_norm": 0.7296705842018127, "learning_rate": 1.925129206733388e-05, "loss": 1.352, "mean_token_accuracy": 0.6633712897698084, "num_tokens": 480463609.0, "step": 2857 }, { "entropy": 1.6913323799769084, "epoch": 0.3139710527038532, "grad_norm": 0.6928101181983948, "learning_rate": 1.925065256890804e-05, "loss": 1.4533, "mean_token_accuracy": 0.6504810303449631, "num_tokens": 480692043.0, "step": 2858 }, { "entropy": 1.6184549927711487, "epoch": 0.31408090961522617, "grad_norm": 0.6742601990699768, "learning_rate": 1.9250012809352764e-05, "loss": 1.2693, "mean_token_accuracy": 0.6729711244503657, "num_tokens": 480841939.0, "step": 2859 }, { "entropy": 1.763518790404002, "epoch": 0.3141907665265991, "grad_norm": 0.9082183241844177, "learning_rate": 1.92493727886883e-05, "loss": 1.4003, "mean_token_accuracy": 0.6487570206324259, "num_tokens": 480978037.0, "step": 2860 }, { "entropy": 1.6946383118629456, "epoch": 0.31430062343797205, "grad_norm": 0.613614559173584, "learning_rate": 1.9248732506934902e-05, "loss": 1.3008, "mean_token_accuracy": 0.6719754189252853, "num_tokens": 481144692.0, "step": 2861 }, { "entropy": 1.6866445640722911, "epoch": 0.314410480349345, "grad_norm": 0.6680022478103638, "learning_rate": 1.924809196411284e-05, "loss": 1.3679, "mean_token_accuracy": 0.6613704959551493, "num_tokens": 481335314.0, "step": 2862 }, { "entropy": 1.710518628358841, "epoch": 0.31452033726071793, "grad_norm": 0.5925086140632629, "learning_rate": 1.9247451160242385e-05, "loss": 1.4802, "mean_token_accuracy": 0.6449962158997854, "num_tokens": 481534394.0, "step": 2863 }, { "entropy": 1.7207396229108174, "epoch": 0.31463019417209087, "grad_norm": 0.7651747465133667, "learning_rate": 1.9246810095343815e-05, "loss": 1.5563, "mean_token_accuracy": 0.6396962677439054, "num_tokens": 481754072.0, "step": 2864 }, { "entropy": 1.6443149745464325, "epoch": 0.3147400510834638, "grad_norm": 0.6283020973205566, "learning_rate": 1.9246168769437426e-05, "loss": 1.2379, "mean_token_accuracy": 0.6734432826439539, "num_tokens": 481925605.0, "step": 2865 }, { "entropy": 1.7003800670305889, "epoch": 0.3148499079948367, "grad_norm": 0.6588708162307739, "learning_rate": 1.9245527182543506e-05, "loss": 1.4739, "mean_token_accuracy": 0.6499484032392502, "num_tokens": 482134082.0, "step": 2866 }, { "entropy": 1.6552692552407582, "epoch": 0.31495976490620964, "grad_norm": 0.7375697493553162, "learning_rate": 1.9244885334682367e-05, "loss": 1.2952, "mean_token_accuracy": 0.6863390256961187, "num_tokens": 482280461.0, "step": 2867 }, { "entropy": 1.682984471321106, "epoch": 0.3150696218175826, "grad_norm": 0.6565489172935486, "learning_rate": 1.9244243225874328e-05, "loss": 1.321, "mean_token_accuracy": 0.6630292187134424, "num_tokens": 482426569.0, "step": 2868 }, { "entropy": 1.7070794304211934, "epoch": 0.3151794787289555, "grad_norm": 0.6953569650650024, "learning_rate": 1.92436008561397e-05, "loss": 1.2777, "mean_token_accuracy": 0.6620368659496307, "num_tokens": 482561769.0, "step": 2869 }, { "entropy": 1.7198736766974132, "epoch": 0.31528933564032846, "grad_norm": 0.6899498701095581, "learning_rate": 1.924295822549882e-05, "loss": 1.3307, "mean_token_accuracy": 0.6546566337347031, "num_tokens": 482694606.0, "step": 2870 }, { "entropy": 1.7433649897575378, "epoch": 0.3153991925517014, "grad_norm": 0.624694287776947, "learning_rate": 1.9242315333972028e-05, "loss": 1.464, "mean_token_accuracy": 0.6367517908414205, "num_tokens": 482880755.0, "step": 2871 }, { "entropy": 1.6819024284680684, "epoch": 0.31550904946307434, "grad_norm": 1.0012410879135132, "learning_rate": 1.924167218157967e-05, "loss": 1.3411, "mean_token_accuracy": 0.6579955021540324, "num_tokens": 483037033.0, "step": 2872 }, { "entropy": 1.7648041447003682, "epoch": 0.3156189063744473, "grad_norm": 0.772260844707489, "learning_rate": 1.9241028768342097e-05, "loss": 1.4098, "mean_token_accuracy": 0.6752532223860422, "num_tokens": 483178611.0, "step": 2873 }, { "entropy": 1.741791735092799, "epoch": 0.3157287632858202, "grad_norm": 0.6674953103065491, "learning_rate": 1.9240385094279682e-05, "loss": 1.3883, "mean_token_accuracy": 0.6550341794888178, "num_tokens": 483365090.0, "step": 2874 }, { "entropy": 1.7017056147257488, "epoch": 0.31583862019719317, "grad_norm": 0.7384848594665527, "learning_rate": 1.923974115941279e-05, "loss": 1.4, "mean_token_accuracy": 0.6497112860282263, "num_tokens": 483590442.0, "step": 2875 }, { "entropy": 1.7605169018109639, "epoch": 0.3159484771085661, "grad_norm": 0.6867170929908752, "learning_rate": 1.92390969637618e-05, "loss": 1.4002, "mean_token_accuracy": 0.6571317712465922, "num_tokens": 483711913.0, "step": 2876 }, { "entropy": 1.7703985174496968, "epoch": 0.31605833401993905, "grad_norm": 0.7674762010574341, "learning_rate": 1.9238452507347112e-05, "loss": 1.3849, "mean_token_accuracy": 0.659190704425176, "num_tokens": 483868222.0, "step": 2877 }, { "entropy": 1.7050415376822154, "epoch": 0.316168190931312, "grad_norm": 0.69243985414505, "learning_rate": 1.9237807790189108e-05, "loss": 1.4312, "mean_token_accuracy": 0.6475951820611954, "num_tokens": 484031399.0, "step": 2878 }, { "entropy": 1.6624840299288433, "epoch": 0.31627804784268493, "grad_norm": 0.670750617980957, "learning_rate": 1.9237162812308204e-05, "loss": 1.3691, "mean_token_accuracy": 0.657655676205953, "num_tokens": 484182873.0, "step": 2879 }, { "entropy": 1.748474011818568, "epoch": 0.3163879047540578, "grad_norm": 0.7633766531944275, "learning_rate": 1.9236517573724808e-05, "loss": 1.4173, "mean_token_accuracy": 0.6639850089947382, "num_tokens": 484326382.0, "step": 2880 }, { "entropy": 1.7452166378498077, "epoch": 0.31649776166543075, "grad_norm": 0.6374519467353821, "learning_rate": 1.923587207445934e-05, "loss": 1.428, "mean_token_accuracy": 0.643037294348081, "num_tokens": 484478175.0, "step": 2881 }, { "entropy": 1.7703827420870464, "epoch": 0.3166076185768037, "grad_norm": 0.6338856816291809, "learning_rate": 1.923522631453223e-05, "loss": 1.4608, "mean_token_accuracy": 0.6400000900030136, "num_tokens": 484684598.0, "step": 2882 }, { "entropy": 1.6763095458348591, "epoch": 0.31671747548817664, "grad_norm": 0.7417557835578918, "learning_rate": 1.9234580293963922e-05, "loss": 1.377, "mean_token_accuracy": 0.6634032080570856, "num_tokens": 484851276.0, "step": 2883 }, { "entropy": 1.7742979725201924, "epoch": 0.3168273323995496, "grad_norm": 0.8382861018180847, "learning_rate": 1.9233934012774855e-05, "loss": 1.3599, "mean_token_accuracy": 0.6617914984623591, "num_tokens": 485027045.0, "step": 2884 }, { "entropy": 1.6628169218699138, "epoch": 0.3169371893109225, "grad_norm": 0.6727002263069153, "learning_rate": 1.923328747098549e-05, "loss": 1.2461, "mean_token_accuracy": 0.6721131453911463, "num_tokens": 485188142.0, "step": 2885 }, { "entropy": 1.6688977181911469, "epoch": 0.31704704622229546, "grad_norm": 0.6404067277908325, "learning_rate": 1.9232640668616284e-05, "loss": 1.4282, "mean_token_accuracy": 0.6530584941307703, "num_tokens": 485375508.0, "step": 2886 }, { "entropy": 1.717422644297282, "epoch": 0.3171569031336684, "grad_norm": 0.6284343004226685, "learning_rate": 1.923199360568771e-05, "loss": 1.3367, "mean_token_accuracy": 0.6689218978087107, "num_tokens": 485534089.0, "step": 2887 }, { "entropy": 1.6929986675580342, "epoch": 0.31726676004504134, "grad_norm": 0.6857246160507202, "learning_rate": 1.923134628222025e-05, "loss": 1.357, "mean_token_accuracy": 0.663006509343783, "num_tokens": 485680262.0, "step": 2888 }, { "entropy": 1.680146853129069, "epoch": 0.3173766169564143, "grad_norm": 0.6327770352363586, "learning_rate": 1.923069869823439e-05, "loss": 1.4815, "mean_token_accuracy": 0.6378313849369684, "num_tokens": 485910473.0, "step": 2889 }, { "entropy": 1.752420961856842, "epoch": 0.3174864738677872, "grad_norm": 0.7673040628433228, "learning_rate": 1.9230050853750624e-05, "loss": 1.4938, "mean_token_accuracy": 0.6509816845258077, "num_tokens": 486064919.0, "step": 2890 }, { "entropy": 1.7698513368765514, "epoch": 0.31759633077916016, "grad_norm": 0.9164318442344666, "learning_rate": 1.9229402748789456e-05, "loss": 1.2388, "mean_token_accuracy": 0.6785450875759125, "num_tokens": 486196513.0, "step": 2891 }, { "entropy": 1.7393431663513184, "epoch": 0.3177061876905331, "grad_norm": 0.6718737483024597, "learning_rate": 1.92287543833714e-05, "loss": 1.3484, "mean_token_accuracy": 0.6540123621622721, "num_tokens": 486378261.0, "step": 2892 }, { "entropy": 1.707463949918747, "epoch": 0.31781604460190604, "grad_norm": 0.8465375304222107, "learning_rate": 1.9228105757516974e-05, "loss": 1.3403, "mean_token_accuracy": 0.6693908423185349, "num_tokens": 486592874.0, "step": 2893 }, { "entropy": 1.6605522235234578, "epoch": 0.31792590151327893, "grad_norm": 0.6796486377716064, "learning_rate": 1.9227456871246714e-05, "loss": 1.3849, "mean_token_accuracy": 0.6787869185209274, "num_tokens": 486712525.0, "step": 2894 }, { "entropy": 1.7513254880905151, "epoch": 0.31803575842465187, "grad_norm": 0.7111853957176208, "learning_rate": 1.9226807724581148e-05, "loss": 1.4619, "mean_token_accuracy": 0.6655588646729788, "num_tokens": 486866331.0, "step": 2895 }, { "entropy": 1.7475620210170746, "epoch": 0.3181456153360248, "grad_norm": 0.737848699092865, "learning_rate": 1.922615831754082e-05, "loss": 1.3467, "mean_token_accuracy": 0.6666428198417028, "num_tokens": 487025593.0, "step": 2896 }, { "entropy": 1.6630546947320302, "epoch": 0.31825547224739775, "grad_norm": 0.8860530853271484, "learning_rate": 1.9225508650146294e-05, "loss": 1.5313, "mean_token_accuracy": 0.6354395796855291, "num_tokens": 487258804.0, "step": 2897 }, { "entropy": 1.753849446773529, "epoch": 0.3183653291587707, "grad_norm": 0.8779124617576599, "learning_rate": 1.9224858722418122e-05, "loss": 1.581, "mean_token_accuracy": 0.6377290387948354, "num_tokens": 487413807.0, "step": 2898 }, { "entropy": 1.6524033447106679, "epoch": 0.31847518607014363, "grad_norm": 0.7529869079589844, "learning_rate": 1.922420853437688e-05, "loss": 1.1943, "mean_token_accuracy": 0.679450144370397, "num_tokens": 487517690.0, "step": 2899 }, { "entropy": 1.6763293743133545, "epoch": 0.3185850429815166, "grad_norm": 0.656287670135498, "learning_rate": 1.9223558086043147e-05, "loss": 1.3652, "mean_token_accuracy": 0.6669528832038244, "num_tokens": 487677299.0, "step": 2900 }, { "entropy": 1.6773990790049236, "epoch": 0.3186948998928895, "grad_norm": 0.7613338828086853, "learning_rate": 1.92229073774375e-05, "loss": 1.5412, "mean_token_accuracy": 0.6458795020977656, "num_tokens": 487835814.0, "step": 2901 }, { "entropy": 1.736249138911565, "epoch": 0.31880475680426246, "grad_norm": 0.5995625853538513, "learning_rate": 1.9222256408580545e-05, "loss": 1.3598, "mean_token_accuracy": 0.6582354704538981, "num_tokens": 487995593.0, "step": 2902 }, { "entropy": 1.715472271045049, "epoch": 0.3189146137156354, "grad_norm": 0.6768810153007507, "learning_rate": 1.9221605179492878e-05, "loss": 1.3586, "mean_token_accuracy": 0.6697869201501211, "num_tokens": 488186812.0, "step": 2903 }, { "entropy": 1.7300209204355876, "epoch": 0.31902447062700834, "grad_norm": 0.7611496448516846, "learning_rate": 1.922095369019511e-05, "loss": 1.4155, "mean_token_accuracy": 0.6667229980230331, "num_tokens": 488359701.0, "step": 2904 }, { "entropy": 1.7256126403808594, "epoch": 0.3191343275383813, "grad_norm": 0.6969589591026306, "learning_rate": 1.922030194070786e-05, "loss": 1.4225, "mean_token_accuracy": 0.6631851394971212, "num_tokens": 488511590.0, "step": 2905 }, { "entropy": 1.678377350171407, "epoch": 0.3192441844497542, "grad_norm": 0.6946367025375366, "learning_rate": 1.9219649931051764e-05, "loss": 1.3259, "mean_token_accuracy": 0.6693602552016577, "num_tokens": 488646530.0, "step": 2906 }, { "entropy": 1.6788996458053589, "epoch": 0.3193540413611271, "grad_norm": 0.5399008989334106, "learning_rate": 1.9218997661247446e-05, "loss": 1.3327, "mean_token_accuracy": 0.6513966371615728, "num_tokens": 488871769.0, "step": 2907 }, { "entropy": 1.695472886164983, "epoch": 0.31946389827250005, "grad_norm": 0.7106685638427734, "learning_rate": 1.921834513131556e-05, "loss": 1.3448, "mean_token_accuracy": 0.6767653375864029, "num_tokens": 489036941.0, "step": 2908 }, { "entropy": 1.750376472870509, "epoch": 0.319573755183873, "grad_norm": 0.6660662889480591, "learning_rate": 1.921769234127675e-05, "loss": 1.3957, "mean_token_accuracy": 0.6495022475719452, "num_tokens": 489212402.0, "step": 2909 }, { "entropy": 1.6709170639514923, "epoch": 0.31968361209524593, "grad_norm": 0.6726402044296265, "learning_rate": 1.9217039291151684e-05, "loss": 1.2903, "mean_token_accuracy": 0.6766814192136129, "num_tokens": 489359701.0, "step": 2910 }, { "entropy": 1.772289474805196, "epoch": 0.31979346900661887, "grad_norm": 0.8132745027542114, "learning_rate": 1.9216385980961027e-05, "loss": 1.3114, "mean_token_accuracy": 0.6673167099555334, "num_tokens": 489509117.0, "step": 2911 }, { "entropy": 1.6465057233969371, "epoch": 0.3199033259179918, "grad_norm": 0.6829494833946228, "learning_rate": 1.9215732410725453e-05, "loss": 1.4098, "mean_token_accuracy": 0.6528383443752924, "num_tokens": 489687106.0, "step": 2912 }, { "entropy": 1.6819744805494945, "epoch": 0.32001318282936475, "grad_norm": 0.6408959031105042, "learning_rate": 1.9215078580465653e-05, "loss": 1.3749, "mean_token_accuracy": 0.6629981398582458, "num_tokens": 489879747.0, "step": 2913 }, { "entropy": 1.7291929125785828, "epoch": 0.3201230397407377, "grad_norm": 0.7077094912528992, "learning_rate": 1.9214424490202316e-05, "loss": 1.4732, "mean_token_accuracy": 0.6318852504094442, "num_tokens": 490037336.0, "step": 2914 }, { "entropy": 1.7284752825895946, "epoch": 0.32023289665211063, "grad_norm": 0.6826415657997131, "learning_rate": 1.9213770139956145e-05, "loss": 1.413, "mean_token_accuracy": 0.6539119978745779, "num_tokens": 490202718.0, "step": 2915 }, { "entropy": 1.6593516568342845, "epoch": 0.3203427535634836, "grad_norm": 0.6397992372512817, "learning_rate": 1.921311552974785e-05, "loss": 1.3014, "mean_token_accuracy": 0.6723136901855469, "num_tokens": 490349621.0, "step": 2916 }, { "entropy": 1.7860759397347767, "epoch": 0.3204526104748565, "grad_norm": 0.6551001071929932, "learning_rate": 1.9212460659598153e-05, "loss": 1.3407, "mean_token_accuracy": 0.6589092761278152, "num_tokens": 490457337.0, "step": 2917 }, { "entropy": 1.730087826649348, "epoch": 0.32056246738622945, "grad_norm": 0.7016686201095581, "learning_rate": 1.9211805529527775e-05, "loss": 1.484, "mean_token_accuracy": 0.6437141746282578, "num_tokens": 490669111.0, "step": 2918 }, { "entropy": 1.6922438045342763, "epoch": 0.3206723242976024, "grad_norm": 0.7173215746879578, "learning_rate": 1.921115013955745e-05, "loss": 1.4722, "mean_token_accuracy": 0.6466464251279831, "num_tokens": 490849927.0, "step": 2919 }, { "entropy": 1.7310430804888408, "epoch": 0.32078218120897534, "grad_norm": 0.7087364196777344, "learning_rate": 1.9210494489707926e-05, "loss": 1.3491, "mean_token_accuracy": 0.66358715792497, "num_tokens": 490961402.0, "step": 2920 }, { "entropy": 1.6876995464166005, "epoch": 0.3208920381203482, "grad_norm": 0.697143018245697, "learning_rate": 1.9209838579999947e-05, "loss": 1.3488, "mean_token_accuracy": 0.6534036248922348, "num_tokens": 491129298.0, "step": 2921 }, { "entropy": 1.680976579586665, "epoch": 0.32100189503172116, "grad_norm": 0.7552234530448914, "learning_rate": 1.920918241045428e-05, "loss": 1.5304, "mean_token_accuracy": 0.6349671731392542, "num_tokens": 491341947.0, "step": 2922 }, { "entropy": 1.6714160442352295, "epoch": 0.3211117519430941, "grad_norm": 0.6923167109489441, "learning_rate": 1.920852598109169e-05, "loss": 1.2721, "mean_token_accuracy": 0.678156390786171, "num_tokens": 491468492.0, "step": 2923 }, { "entropy": 1.7526885271072388, "epoch": 0.32122160885446704, "grad_norm": 0.6262015700340271, "learning_rate": 1.920786929193295e-05, "loss": 1.4856, "mean_token_accuracy": 0.6357933630545934, "num_tokens": 491655738.0, "step": 2924 }, { "entropy": 1.799785594145457, "epoch": 0.32133146576584, "grad_norm": 0.7483623623847961, "learning_rate": 1.920721234299884e-05, "loss": 1.4297, "mean_token_accuracy": 0.645599807302157, "num_tokens": 491883634.0, "step": 2925 }, { "entropy": 1.6690879464149475, "epoch": 0.3214413226772129, "grad_norm": 0.7060349583625793, "learning_rate": 1.9206555134310166e-05, "loss": 1.2896, "mean_token_accuracy": 0.6781369696060816, "num_tokens": 492049115.0, "step": 2926 }, { "entropy": 1.7402922709782918, "epoch": 0.32155117958858587, "grad_norm": 0.8359885215759277, "learning_rate": 1.9205897665887718e-05, "loss": 1.5249, "mean_token_accuracy": 0.644447940091292, "num_tokens": 492183166.0, "step": 2927 }, { "entropy": 1.7350502908229828, "epoch": 0.3216610364999588, "grad_norm": 0.7333374619483948, "learning_rate": 1.9205239937752304e-05, "loss": 1.3024, "mean_token_accuracy": 0.6708205292622248, "num_tokens": 492369207.0, "step": 2928 }, { "entropy": 1.665328135093053, "epoch": 0.32177089341133175, "grad_norm": 0.7489623427391052, "learning_rate": 1.9204581949924744e-05, "loss": 1.2896, "mean_token_accuracy": 0.6738118877013525, "num_tokens": 492533237.0, "step": 2929 }, { "entropy": 1.7022863527139027, "epoch": 0.3218807503227047, "grad_norm": 0.7714312076568604, "learning_rate": 1.9203923702425863e-05, "loss": 1.4599, "mean_token_accuracy": 0.6606474220752716, "num_tokens": 492733024.0, "step": 2930 }, { "entropy": 1.732410063346227, "epoch": 0.32199060723407763, "grad_norm": 0.7454637885093689, "learning_rate": 1.9203265195276494e-05, "loss": 1.2414, "mean_token_accuracy": 0.6792856454849243, "num_tokens": 492879920.0, "step": 2931 }, { "entropy": 1.6901346445083618, "epoch": 0.32210046414545057, "grad_norm": 0.6151790618896484, "learning_rate": 1.9202606428497476e-05, "loss": 1.3307, "mean_token_accuracy": 0.6711449126402537, "num_tokens": 493008404.0, "step": 2932 }, { "entropy": 1.7341221272945404, "epoch": 0.3222103210568235, "grad_norm": 0.6119834780693054, "learning_rate": 1.9201947402109663e-05, "loss": 1.5834, "mean_token_accuracy": 0.6300620784362158, "num_tokens": 493215482.0, "step": 2933 }, { "entropy": 1.6682228247324626, "epoch": 0.3223201779681964, "grad_norm": 0.6601076722145081, "learning_rate": 1.920128811613391e-05, "loss": 1.4085, "mean_token_accuracy": 0.651889776190122, "num_tokens": 493409342.0, "step": 2934 }, { "entropy": 1.701788494984309, "epoch": 0.32243003487956934, "grad_norm": 0.7248215079307556, "learning_rate": 1.9200628570591084e-05, "loss": 1.3979, "mean_token_accuracy": 0.6579019526640574, "num_tokens": 493560199.0, "step": 2935 }, { "entropy": 1.684028019507726, "epoch": 0.3225398917909423, "grad_norm": 0.6509939432144165, "learning_rate": 1.919996876550206e-05, "loss": 1.4448, "mean_token_accuracy": 0.6362205495436987, "num_tokens": 493753256.0, "step": 2936 }, { "entropy": 1.720295896132787, "epoch": 0.3226497487023152, "grad_norm": 0.7366635203361511, "learning_rate": 1.919930870088772e-05, "loss": 1.5125, "mean_token_accuracy": 0.6424083262681961, "num_tokens": 493932673.0, "step": 2937 }, { "entropy": 1.6438338458538055, "epoch": 0.32275960561368816, "grad_norm": 0.6553351879119873, "learning_rate": 1.919864837676895e-05, "loss": 1.2257, "mean_token_accuracy": 0.6853679070870081, "num_tokens": 494050760.0, "step": 2938 }, { "entropy": 1.7294786274433136, "epoch": 0.3228694625250611, "grad_norm": 0.655874490737915, "learning_rate": 1.9197987793166655e-05, "loss": 1.3517, "mean_token_accuracy": 0.658056045571963, "num_tokens": 494240349.0, "step": 2939 }, { "entropy": 1.7637418508529663, "epoch": 0.32297931943643404, "grad_norm": 0.7153424620628357, "learning_rate": 1.9197326950101744e-05, "loss": 1.4253, "mean_token_accuracy": 0.655969480673472, "num_tokens": 494379250.0, "step": 2940 }, { "entropy": 1.696879784266154, "epoch": 0.323089176347807, "grad_norm": 0.6477358937263489, "learning_rate": 1.9196665847595126e-05, "loss": 1.3708, "mean_token_accuracy": 0.6583545009295145, "num_tokens": 494535109.0, "step": 2941 }, { "entropy": 1.716434359550476, "epoch": 0.3231990332591799, "grad_norm": 0.6917335987091064, "learning_rate": 1.9196004485667728e-05, "loss": 1.5066, "mean_token_accuracy": 0.6468228300412496, "num_tokens": 494667235.0, "step": 2942 }, { "entropy": 1.7584334413210552, "epoch": 0.32330889017055287, "grad_norm": 0.6509451866149902, "learning_rate": 1.9195342864340477e-05, "loss": 1.4719, "mean_token_accuracy": 0.6429315656423569, "num_tokens": 494841807.0, "step": 2943 }, { "entropy": 1.7075772682825725, "epoch": 0.3234187470819258, "grad_norm": 0.682874858379364, "learning_rate": 1.9194680983634323e-05, "loss": 1.3857, "mean_token_accuracy": 0.6495272219181061, "num_tokens": 494983967.0, "step": 2944 }, { "entropy": 1.6379812856515248, "epoch": 0.32352860399329875, "grad_norm": 0.8594545722007751, "learning_rate": 1.9194018843570208e-05, "loss": 1.3222, "mean_token_accuracy": 0.6915220071872076, "num_tokens": 495111243.0, "step": 2945 }, { "entropy": 1.6541229287783306, "epoch": 0.3236384609046717, "grad_norm": 0.6572254300117493, "learning_rate": 1.9193356444169086e-05, "loss": 1.3144, "mean_token_accuracy": 0.6642016619443893, "num_tokens": 495250273.0, "step": 2946 }, { "entropy": 1.721500555674235, "epoch": 0.32374831781604463, "grad_norm": 0.643337070941925, "learning_rate": 1.9192693785451925e-05, "loss": 1.388, "mean_token_accuracy": 0.6487238456805547, "num_tokens": 495413516.0, "step": 2947 }, { "entropy": 1.6922288636366527, "epoch": 0.3238581747274175, "grad_norm": 0.6867654919624329, "learning_rate": 1.91920308674397e-05, "loss": 1.4196, "mean_token_accuracy": 0.6719547808170319, "num_tokens": 495576233.0, "step": 2948 }, { "entropy": 1.6915934085845947, "epoch": 0.32396803163879045, "grad_norm": 0.680091142654419, "learning_rate": 1.919136769015339e-05, "loss": 1.3689, "mean_token_accuracy": 0.6606242706378301, "num_tokens": 495789140.0, "step": 2949 }, { "entropy": 1.6307895680268605, "epoch": 0.3240778885501634, "grad_norm": 0.650629460811615, "learning_rate": 1.919070425361398e-05, "loss": 1.2485, "mean_token_accuracy": 0.678497518102328, "num_tokens": 495930360.0, "step": 2950 }, { "entropy": 1.7259167035420735, "epoch": 0.32418774546153634, "grad_norm": 0.8071044683456421, "learning_rate": 1.9190040557842472e-05, "loss": 1.4053, "mean_token_accuracy": 0.6503327190876007, "num_tokens": 496059841.0, "step": 2951 }, { "entropy": 1.6474729379018147, "epoch": 0.3242976023729093, "grad_norm": 0.6372878551483154, "learning_rate": 1.918937660285987e-05, "loss": 1.3471, "mean_token_accuracy": 0.673143744468689, "num_tokens": 496224948.0, "step": 2952 }, { "entropy": 1.6423666775226593, "epoch": 0.3244074592842822, "grad_norm": 0.6071237325668335, "learning_rate": 1.918871238868719e-05, "loss": 1.4078, "mean_token_accuracy": 0.6574894885222117, "num_tokens": 496408432.0, "step": 2953 }, { "entropy": 1.7058760623137157, "epoch": 0.32451731619565516, "grad_norm": 0.7525854706764221, "learning_rate": 1.9188047915345455e-05, "loss": 1.3487, "mean_token_accuracy": 0.6681368251641592, "num_tokens": 496548095.0, "step": 2954 }, { "entropy": 1.6724230746428173, "epoch": 0.3246271731070281, "grad_norm": 0.615999698638916, "learning_rate": 1.9187383182855693e-05, "loss": 1.5475, "mean_token_accuracy": 0.65153868496418, "num_tokens": 496777867.0, "step": 2955 }, { "entropy": 1.7171376744906108, "epoch": 0.32473703001840104, "grad_norm": 0.5861404538154602, "learning_rate": 1.918671819123894e-05, "loss": 1.2934, "mean_token_accuracy": 0.6681883285442988, "num_tokens": 496927140.0, "step": 2956 }, { "entropy": 1.7224361499150593, "epoch": 0.324846886929774, "grad_norm": 0.6110925674438477, "learning_rate": 1.9186052940516245e-05, "loss": 1.3695, "mean_token_accuracy": 0.6483077257871628, "num_tokens": 497121766.0, "step": 2957 }, { "entropy": 1.7283575534820557, "epoch": 0.3249567438411469, "grad_norm": 0.6756021976470947, "learning_rate": 1.9185387430708663e-05, "loss": 1.3304, "mean_token_accuracy": 0.6640335768461227, "num_tokens": 497257864.0, "step": 2958 }, { "entropy": 1.6929436028003693, "epoch": 0.32506660075251986, "grad_norm": 0.6733092069625854, "learning_rate": 1.918472166183726e-05, "loss": 1.5546, "mean_token_accuracy": 0.6462220996618271, "num_tokens": 497457514.0, "step": 2959 }, { "entropy": 1.7729289134343464, "epoch": 0.3251764576638928, "grad_norm": 0.7560225129127502, "learning_rate": 1.9184055633923105e-05, "loss": 1.3431, "mean_token_accuracy": 0.6635515093803406, "num_tokens": 497622838.0, "step": 2960 }, { "entropy": 1.7306861976782482, "epoch": 0.3252863145752657, "grad_norm": 0.6715700626373291, "learning_rate": 1.9183389346987274e-05, "loss": 1.3844, "mean_token_accuracy": 0.6618200093507767, "num_tokens": 497773384.0, "step": 2961 }, { "entropy": 1.664503941933314, "epoch": 0.32539617148663863, "grad_norm": 0.6396395564079285, "learning_rate": 1.9182722801050858e-05, "loss": 1.3481, "mean_token_accuracy": 0.6635024050871531, "num_tokens": 497959238.0, "step": 2962 }, { "entropy": 1.6557064652442932, "epoch": 0.32550602839801157, "grad_norm": 0.6370712518692017, "learning_rate": 1.9182055996134955e-05, "loss": 1.4172, "mean_token_accuracy": 0.6532147924105326, "num_tokens": 498113737.0, "step": 2963 }, { "entropy": 1.7849931518236797, "epoch": 0.3256158853093845, "grad_norm": 0.7333866357803345, "learning_rate": 1.9181388932260663e-05, "loss": 1.4514, "mean_token_accuracy": 0.6394390016794205, "num_tokens": 498298336.0, "step": 2964 }, { "entropy": 1.697540670633316, "epoch": 0.32572574222075745, "grad_norm": 0.6613360047340393, "learning_rate": 1.91807216094491e-05, "loss": 1.3528, "mean_token_accuracy": 0.6626348445812861, "num_tokens": 498469831.0, "step": 2965 }, { "entropy": 1.788981705904007, "epoch": 0.3258355991321304, "grad_norm": 0.7449756264686584, "learning_rate": 1.9180054027721386e-05, "loss": 1.3713, "mean_token_accuracy": 0.6606344183286031, "num_tokens": 498594651.0, "step": 2966 }, { "entropy": 1.749136467774709, "epoch": 0.32594545604350333, "grad_norm": 0.7143117785453796, "learning_rate": 1.9179386187098648e-05, "loss": 1.3748, "mean_token_accuracy": 0.6651021838188171, "num_tokens": 498725535.0, "step": 2967 }, { "entropy": 1.714682827393214, "epoch": 0.3260553129548763, "grad_norm": 0.7331691980361938, "learning_rate": 1.917871808760202e-05, "loss": 1.4736, "mean_token_accuracy": 0.664309561252594, "num_tokens": 498953694.0, "step": 2968 }, { "entropy": 1.630650371313095, "epoch": 0.3261651698662492, "grad_norm": 0.7111721038818359, "learning_rate": 1.917804972925265e-05, "loss": 1.3526, "mean_token_accuracy": 0.6674729784329733, "num_tokens": 499166088.0, "step": 2969 }, { "entropy": 1.695541262626648, "epoch": 0.32627502677762216, "grad_norm": 0.5924942493438721, "learning_rate": 1.9177381112071693e-05, "loss": 1.3686, "mean_token_accuracy": 0.6531450500090917, "num_tokens": 499373411.0, "step": 2970 }, { "entropy": 1.6883581181367238, "epoch": 0.3263848836889951, "grad_norm": 0.6998016834259033, "learning_rate": 1.917671223608031e-05, "loss": 1.3799, "mean_token_accuracy": 0.6497906893491745, "num_tokens": 499509178.0, "step": 2971 }, { "entropy": 1.7307297984759014, "epoch": 0.32649474060036804, "grad_norm": 0.6792302131652832, "learning_rate": 1.9176043101299664e-05, "loss": 1.4943, "mean_token_accuracy": 0.6375556737184525, "num_tokens": 499681437.0, "step": 2972 }, { "entropy": 1.7387321988741558, "epoch": 0.326604597511741, "grad_norm": 0.7723334431648254, "learning_rate": 1.917537370775094e-05, "loss": 1.5843, "mean_token_accuracy": 0.6392184148232142, "num_tokens": 499865905.0, "step": 2973 }, { "entropy": 1.7423201700051625, "epoch": 0.3267144544231139, "grad_norm": 0.721889853477478, "learning_rate": 1.9174704055455327e-05, "loss": 1.4046, "mean_token_accuracy": 0.6562129308780035, "num_tokens": 499992743.0, "step": 2974 }, { "entropy": 1.711453249057134, "epoch": 0.3268243113344868, "grad_norm": 0.6114857196807861, "learning_rate": 1.917403414443401e-05, "loss": 1.4313, "mean_token_accuracy": 0.6468845208485922, "num_tokens": 500185819.0, "step": 2975 }, { "entropy": 1.6875855922698975, "epoch": 0.32693416824585975, "grad_norm": 0.6063688397407532, "learning_rate": 1.9173363974708196e-05, "loss": 1.4764, "mean_token_accuracy": 0.6517787824074427, "num_tokens": 500420510.0, "step": 2976 }, { "entropy": 1.6635367274284363, "epoch": 0.3270440251572327, "grad_norm": 0.7195242047309875, "learning_rate": 1.9172693546299094e-05, "loss": 1.296, "mean_token_accuracy": 0.6765902439753214, "num_tokens": 500555525.0, "step": 2977 }, { "entropy": 1.717705875635147, "epoch": 0.32715388206860563, "grad_norm": 0.6923717856407166, "learning_rate": 1.9172022859227927e-05, "loss": 1.5837, "mean_token_accuracy": 0.6424253235260645, "num_tokens": 500746134.0, "step": 2978 }, { "entropy": 1.7490998009840648, "epoch": 0.32726373897997857, "grad_norm": 0.747678279876709, "learning_rate": 1.9171351913515916e-05, "loss": 1.4725, "mean_token_accuracy": 0.6539937580625216, "num_tokens": 500921039.0, "step": 2979 }, { "entropy": 1.6731195151805878, "epoch": 0.3273735958913515, "grad_norm": 0.8201509118080139, "learning_rate": 1.91706807091843e-05, "loss": 1.4356, "mean_token_accuracy": 0.6559693316618601, "num_tokens": 501079984.0, "step": 2980 }, { "entropy": 1.7330588301022847, "epoch": 0.32748345280272445, "grad_norm": 0.777803897857666, "learning_rate": 1.9170009246254323e-05, "loss": 1.3363, "mean_token_accuracy": 0.6543682813644409, "num_tokens": 501218988.0, "step": 2981 }, { "entropy": 1.7402281661828358, "epoch": 0.3275933097140974, "grad_norm": 0.7554537653923035, "learning_rate": 1.9169337524747232e-05, "loss": 1.3441, "mean_token_accuracy": 0.6577061663071314, "num_tokens": 501333041.0, "step": 2982 }, { "entropy": 1.794272631406784, "epoch": 0.32770316662547033, "grad_norm": 0.6783401966094971, "learning_rate": 1.9168665544684292e-05, "loss": 1.568, "mean_token_accuracy": 0.6587167580922445, "num_tokens": 501481324.0, "step": 2983 }, { "entropy": 1.822703331708908, "epoch": 0.3278130235368433, "grad_norm": 0.7269600629806519, "learning_rate": 1.9167993306086768e-05, "loss": 1.6219, "mean_token_accuracy": 0.613858292500178, "num_tokens": 501706864.0, "step": 2984 }, { "entropy": 1.7719651063283284, "epoch": 0.3279228804482162, "grad_norm": 0.6666971445083618, "learning_rate": 1.9167320808975936e-05, "loss": 1.4524, "mean_token_accuracy": 0.6489970783392588, "num_tokens": 501867433.0, "step": 2985 }, { "entropy": 1.6982284088929493, "epoch": 0.32803273735958915, "grad_norm": 0.655606210231781, "learning_rate": 1.916664805337308e-05, "loss": 1.4744, "mean_token_accuracy": 0.6488128999869028, "num_tokens": 502050616.0, "step": 2986 }, { "entropy": 1.676661233107249, "epoch": 0.3281425942709621, "grad_norm": 1.2920039892196655, "learning_rate": 1.9165975039299497e-05, "loss": 1.41, "mean_token_accuracy": 0.6578782151142756, "num_tokens": 502183884.0, "step": 2987 }, { "entropy": 1.7440513372421265, "epoch": 0.328252451182335, "grad_norm": 0.6458728909492493, "learning_rate": 1.9165301766776478e-05, "loss": 1.3387, "mean_token_accuracy": 0.6633184005816778, "num_tokens": 502312558.0, "step": 2988 }, { "entropy": 1.7916107575098674, "epoch": 0.3283623080937079, "grad_norm": 0.6091529130935669, "learning_rate": 1.916462823582534e-05, "loss": 1.4744, "mean_token_accuracy": 0.6307684083779653, "num_tokens": 502501943.0, "step": 2989 }, { "entropy": 1.7316680351893108, "epoch": 0.32847216500508086, "grad_norm": 0.8422166705131531, "learning_rate": 1.9163954446467396e-05, "loss": 1.4116, "mean_token_accuracy": 0.6442118336757024, "num_tokens": 502643511.0, "step": 2990 }, { "entropy": 1.7342032194137573, "epoch": 0.3285820219164538, "grad_norm": 0.7511305212974548, "learning_rate": 1.9163280398723974e-05, "loss": 1.497, "mean_token_accuracy": 0.6555758366982142, "num_tokens": 502797590.0, "step": 2991 }, { "entropy": 1.7133084932963054, "epoch": 0.32869187882782674, "grad_norm": 0.6199161410331726, "learning_rate": 1.9162606092616407e-05, "loss": 1.4054, "mean_token_accuracy": 0.6550638278325399, "num_tokens": 503013251.0, "step": 2992 }, { "entropy": 1.6971174776554108, "epoch": 0.3288017357391997, "grad_norm": 0.7281301617622375, "learning_rate": 1.9161931528166034e-05, "loss": 1.4092, "mean_token_accuracy": 0.6583978980779648, "num_tokens": 503184291.0, "step": 2993 }, { "entropy": 1.6845273971557617, "epoch": 0.3289115926505726, "grad_norm": 0.720726728439331, "learning_rate": 1.9161256705394204e-05, "loss": 1.3414, "mean_token_accuracy": 0.6659517834583918, "num_tokens": 503315433.0, "step": 2994 }, { "entropy": 1.729546884695689, "epoch": 0.32902144956194557, "grad_norm": 0.7851258516311646, "learning_rate": 1.916058162432228e-05, "loss": 1.2136, "mean_token_accuracy": 0.6868036687374115, "num_tokens": 503460120.0, "step": 2995 }, { "entropy": 1.6761589348316193, "epoch": 0.3291313064733185, "grad_norm": 0.742510974407196, "learning_rate": 1.9159906284971627e-05, "loss": 1.2834, "mean_token_accuracy": 0.6747480084498724, "num_tokens": 503610117.0, "step": 2996 }, { "entropy": 1.7127369443575542, "epoch": 0.32924116338469145, "grad_norm": 0.6928642392158508, "learning_rate": 1.915923068736361e-05, "loss": 1.3747, "mean_token_accuracy": 0.6500995755195618, "num_tokens": 503752043.0, "step": 2997 }, { "entropy": 1.7473743855953217, "epoch": 0.3293510202960644, "grad_norm": 0.7999443411827087, "learning_rate": 1.915855483151962e-05, "loss": 1.3799, "mean_token_accuracy": 0.6510451088349024, "num_tokens": 503902442.0, "step": 2998 }, { "entropy": 1.7298793097337086, "epoch": 0.32946087720743733, "grad_norm": 0.7435027956962585, "learning_rate": 1.9157878717461048e-05, "loss": 1.362, "mean_token_accuracy": 0.6558040330807368, "num_tokens": 504050985.0, "step": 2999 }, { "entropy": 1.752679854631424, "epoch": 0.32957073411881027, "grad_norm": 0.7258424758911133, "learning_rate": 1.9157202345209293e-05, "loss": 1.3282, "mean_token_accuracy": 0.6720992128054301, "num_tokens": 504192687.0, "step": 3000 }, { "entropy": 1.7436956961949666, "epoch": 0.3296805910301832, "grad_norm": 0.7615739703178406, "learning_rate": 1.9156525714785758e-05, "loss": 1.5075, "mean_token_accuracy": 0.6405996978282928, "num_tokens": 504369108.0, "step": 3001 }, { "entropy": 1.6998974084854126, "epoch": 0.3297904479415561, "grad_norm": 0.6445051431655884, "learning_rate": 1.9155848826211856e-05, "loss": 1.3417, "mean_token_accuracy": 0.6661799550056458, "num_tokens": 504538330.0, "step": 3002 }, { "entropy": 1.7470042010148366, "epoch": 0.32990030485292904, "grad_norm": 0.729354202747345, "learning_rate": 1.915517167950902e-05, "loss": 1.3582, "mean_token_accuracy": 0.669076090057691, "num_tokens": 504667747.0, "step": 3003 }, { "entropy": 1.7464979787667592, "epoch": 0.330010161764302, "grad_norm": 0.61955326795578, "learning_rate": 1.9154494274698668e-05, "loss": 1.6182, "mean_token_accuracy": 0.6314500818649927, "num_tokens": 504866089.0, "step": 3004 }, { "entropy": 1.7116199831167858, "epoch": 0.3301200186756749, "grad_norm": 0.675858736038208, "learning_rate": 1.9153816611802252e-05, "loss": 1.438, "mean_token_accuracy": 0.652592346072197, "num_tokens": 505020897.0, "step": 3005 }, { "entropy": 1.7077987988789876, "epoch": 0.33022987558704786, "grad_norm": 0.674115002155304, "learning_rate": 1.9153138690841212e-05, "loss": 1.3522, "mean_token_accuracy": 0.6594054301579794, "num_tokens": 505194598.0, "step": 3006 }, { "entropy": 1.7043022612730663, "epoch": 0.3303397324984208, "grad_norm": 0.8764580488204956, "learning_rate": 1.9152460511837006e-05, "loss": 1.4887, "mean_token_accuracy": 0.6579779237508774, "num_tokens": 505360115.0, "step": 3007 }, { "entropy": 1.6655643979708354, "epoch": 0.33044958940979374, "grad_norm": 0.7459789514541626, "learning_rate": 1.9151782074811098e-05, "loss": 1.2533, "mean_token_accuracy": 0.6698919186989466, "num_tokens": 505487058.0, "step": 3008 }, { "entropy": 1.6778320570786793, "epoch": 0.3305594463211667, "grad_norm": 0.6878098845481873, "learning_rate": 1.9151103379784964e-05, "loss": 1.2724, "mean_token_accuracy": 0.6792107174793879, "num_tokens": 505668993.0, "step": 3009 }, { "entropy": 1.7397213677565257, "epoch": 0.3306693032325396, "grad_norm": 0.6972684264183044, "learning_rate": 1.915042442678008e-05, "loss": 1.4157, "mean_token_accuracy": 0.6579104761282603, "num_tokens": 505806107.0, "step": 3010 }, { "entropy": 1.790239155292511, "epoch": 0.33077916014391257, "grad_norm": 0.6503934264183044, "learning_rate": 1.914974521581793e-05, "loss": 1.4499, "mean_token_accuracy": 0.6496985306342443, "num_tokens": 505975483.0, "step": 3011 }, { "entropy": 1.7129294872283936, "epoch": 0.3308890170552855, "grad_norm": 0.6534547805786133, "learning_rate": 1.9149065746920023e-05, "loss": 1.6048, "mean_token_accuracy": 0.640040377775828, "num_tokens": 506158617.0, "step": 3012 }, { "entropy": 1.7680010497570038, "epoch": 0.33099887396665845, "grad_norm": 0.6420508027076721, "learning_rate": 1.9148386020107857e-05, "loss": 1.422, "mean_token_accuracy": 0.6478531509637833, "num_tokens": 506352143.0, "step": 3013 }, { "entropy": 1.693307230869929, "epoch": 0.3311087308780314, "grad_norm": 0.6774839758872986, "learning_rate": 1.914770603540294e-05, "loss": 1.4863, "mean_token_accuracy": 0.6670309404532114, "num_tokens": 506573320.0, "step": 3014 }, { "entropy": 1.6939273178577423, "epoch": 0.33121858778940433, "grad_norm": 0.6691749691963196, "learning_rate": 1.9147025792826803e-05, "loss": 1.4095, "mean_token_accuracy": 0.6504554947217306, "num_tokens": 506733027.0, "step": 3015 }, { "entropy": 1.7147826254367828, "epoch": 0.3313284447007772, "grad_norm": 0.6580132246017456, "learning_rate": 1.914634529240097e-05, "loss": 1.3658, "mean_token_accuracy": 0.6597993324200312, "num_tokens": 506878473.0, "step": 3016 }, { "entropy": 1.6655152241388957, "epoch": 0.33143830161215015, "grad_norm": 0.6758162379264832, "learning_rate": 1.914566453414698e-05, "loss": 1.2672, "mean_token_accuracy": 0.6674109697341919, "num_tokens": 507007095.0, "step": 3017 }, { "entropy": 1.6784224609533946, "epoch": 0.3315481585235231, "grad_norm": 0.7197324633598328, "learning_rate": 1.9144983518086378e-05, "loss": 1.41, "mean_token_accuracy": 0.6585833777983984, "num_tokens": 507154336.0, "step": 3018 }, { "entropy": 1.6712388892968495, "epoch": 0.33165801543489604, "grad_norm": 0.7722300887107849, "learning_rate": 1.9144302244240715e-05, "loss": 1.4964, "mean_token_accuracy": 0.6578239550193151, "num_tokens": 507338056.0, "step": 3019 }, { "entropy": 1.6774245699246724, "epoch": 0.331767872346269, "grad_norm": 0.7282935976982117, "learning_rate": 1.9143620712631555e-05, "loss": 1.2622, "mean_token_accuracy": 0.6662948727607727, "num_tokens": 507455716.0, "step": 3020 }, { "entropy": 1.6499692300955455, "epoch": 0.3318777292576419, "grad_norm": 0.6513247489929199, "learning_rate": 1.914293892328047e-05, "loss": 1.4438, "mean_token_accuracy": 0.6459585577249527, "num_tokens": 507658074.0, "step": 3021 }, { "entropy": 1.7383529146512349, "epoch": 0.33198758616901486, "grad_norm": 0.629367470741272, "learning_rate": 1.9142256876209046e-05, "loss": 1.308, "mean_token_accuracy": 0.666439284880956, "num_tokens": 507828153.0, "step": 3022 }, { "entropy": 1.732998142639796, "epoch": 0.3320974430803878, "grad_norm": 0.890418529510498, "learning_rate": 1.914157457143885e-05, "loss": 1.2594, "mean_token_accuracy": 0.6755412022272745, "num_tokens": 507958961.0, "step": 3023 }, { "entropy": 1.7181305587291718, "epoch": 0.33220729999176074, "grad_norm": 0.6836814880371094, "learning_rate": 1.914089200899149e-05, "loss": 1.4408, "mean_token_accuracy": 0.6476282527049383, "num_tokens": 508133687.0, "step": 3024 }, { "entropy": 1.7462326188882191, "epoch": 0.3323171569031337, "grad_norm": 0.6367360949516296, "learning_rate": 1.914020918888857e-05, "loss": 1.4157, "mean_token_accuracy": 0.6456411679585775, "num_tokens": 508330078.0, "step": 3025 }, { "entropy": 1.611125926176707, "epoch": 0.3324270138145066, "grad_norm": 0.5883477330207825, "learning_rate": 1.9139526111151695e-05, "loss": 1.3606, "mean_token_accuracy": 0.6693080514669418, "num_tokens": 508519565.0, "step": 3026 }, { "entropy": 1.7090338071187336, "epoch": 0.33253687072587956, "grad_norm": 0.7724202275276184, "learning_rate": 1.9138842775802483e-05, "loss": 1.3513, "mean_token_accuracy": 0.6597883005936941, "num_tokens": 508705625.0, "step": 3027 }, { "entropy": 1.6161086161931355, "epoch": 0.3326467276372525, "grad_norm": 0.6500586867332458, "learning_rate": 1.913815918286257e-05, "loss": 1.2911, "mean_token_accuracy": 0.6685569137334824, "num_tokens": 508844283.0, "step": 3028 }, { "entropy": 1.7713151176770527, "epoch": 0.3327565845486254, "grad_norm": 0.8597967624664307, "learning_rate": 1.9137475332353586e-05, "loss": 1.5667, "mean_token_accuracy": 0.6257463147242864, "num_tokens": 509093149.0, "step": 3029 }, { "entropy": 1.6782112022240956, "epoch": 0.33286644145999833, "grad_norm": 0.6756134033203125, "learning_rate": 1.913679122429717e-05, "loss": 1.592, "mean_token_accuracy": 0.6362019727627436, "num_tokens": 509269750.0, "step": 3030 }, { "entropy": 1.7369333803653717, "epoch": 0.33297629837137127, "grad_norm": 0.5684821605682373, "learning_rate": 1.9136106858714983e-05, "loss": 1.4523, "mean_token_accuracy": 0.6342654128869375, "num_tokens": 509476830.0, "step": 3031 }, { "entropy": 1.7342596749464672, "epoch": 0.3330861552827442, "grad_norm": 0.6759097576141357, "learning_rate": 1.9135422235628676e-05, "loss": 1.1854, "mean_token_accuracy": 0.6867292175690333, "num_tokens": 509569400.0, "step": 3032 }, { "entropy": 1.671131859223048, "epoch": 0.33319601219411715, "grad_norm": 0.7441453337669373, "learning_rate": 1.913473735505992e-05, "loss": 1.5021, "mean_token_accuracy": 0.6428874333699545, "num_tokens": 509764136.0, "step": 3033 }, { "entropy": 1.7323594490687053, "epoch": 0.3333058691054901, "grad_norm": 0.6669164299964905, "learning_rate": 1.91340522170304e-05, "loss": 1.3045, "mean_token_accuracy": 0.6643939961989721, "num_tokens": 509890442.0, "step": 3034 }, { "entropy": 1.7155201435089111, "epoch": 0.33341572601686303, "grad_norm": 0.6839675307273865, "learning_rate": 1.9133366821561788e-05, "loss": 1.3481, "mean_token_accuracy": 0.6636628260215124, "num_tokens": 510052207.0, "step": 3035 }, { "entropy": 1.7188059786955516, "epoch": 0.333525582928236, "grad_norm": 0.7544474601745605, "learning_rate": 1.9132681168675778e-05, "loss": 1.4636, "mean_token_accuracy": 0.6669894407192866, "num_tokens": 510227328.0, "step": 3036 }, { "entropy": 1.6821933786074321, "epoch": 0.3336354398396089, "grad_norm": 0.6986051797866821, "learning_rate": 1.9131995258394077e-05, "loss": 1.3955, "mean_token_accuracy": 0.6599144538243612, "num_tokens": 510424924.0, "step": 3037 }, { "entropy": 1.7697338461875916, "epoch": 0.33374529675098186, "grad_norm": 0.6908696293830872, "learning_rate": 1.913130909073839e-05, "loss": 1.2912, "mean_token_accuracy": 0.6725708792606989, "num_tokens": 510524846.0, "step": 3038 }, { "entropy": 1.7425800959269206, "epoch": 0.3338551536623548, "grad_norm": 0.6675323247909546, "learning_rate": 1.9130622665730434e-05, "loss": 1.5795, "mean_token_accuracy": 0.6429319083690643, "num_tokens": 510715188.0, "step": 3039 }, { "entropy": 1.6848439772923787, "epoch": 0.33396501057372774, "grad_norm": 0.6058026552200317, "learning_rate": 1.9129935983391933e-05, "loss": 1.435, "mean_token_accuracy": 0.6421783665815989, "num_tokens": 510913269.0, "step": 3040 }, { "entropy": 1.718187967936198, "epoch": 0.3340748674851007, "grad_norm": 0.6798617839813232, "learning_rate": 1.9129249043744627e-05, "loss": 1.4205, "mean_token_accuracy": 0.6531337300936381, "num_tokens": 511088529.0, "step": 3041 }, { "entropy": 1.7231755753358204, "epoch": 0.3341847243964736, "grad_norm": 0.821942150592804, "learning_rate": 1.9128561846810247e-05, "loss": 1.4694, "mean_token_accuracy": 0.6571696201960245, "num_tokens": 511284719.0, "step": 3042 }, { "entropy": 1.722469339768092, "epoch": 0.3342945813078465, "grad_norm": 0.6946660876274109, "learning_rate": 1.9127874392610548e-05, "loss": 1.4006, "mean_token_accuracy": 0.6625101615985235, "num_tokens": 511411692.0, "step": 3043 }, { "entropy": 1.7435683111349742, "epoch": 0.33440443821921945, "grad_norm": 0.7410975098609924, "learning_rate": 1.9127186681167288e-05, "loss": 1.4159, "mean_token_accuracy": 0.6528633783260981, "num_tokens": 511551200.0, "step": 3044 }, { "entropy": 1.7684580485026042, "epoch": 0.3345142951305924, "grad_norm": 0.7469737529754639, "learning_rate": 1.912649871250223e-05, "loss": 1.4624, "mean_token_accuracy": 0.6433129956324896, "num_tokens": 511779579.0, "step": 3045 }, { "entropy": 1.7564850250879924, "epoch": 0.33462415204196533, "grad_norm": 0.6813525557518005, "learning_rate": 1.9125810486637152e-05, "loss": 1.3189, "mean_token_accuracy": 0.6654183914264044, "num_tokens": 511907802.0, "step": 3046 }, { "entropy": 1.739985744158427, "epoch": 0.33473400895333827, "grad_norm": 0.7431362271308899, "learning_rate": 1.9125122003593833e-05, "loss": 1.3795, "mean_token_accuracy": 0.6528779665629069, "num_tokens": 512047494.0, "step": 3047 }, { "entropy": 1.7247369190057118, "epoch": 0.3348438658647112, "grad_norm": 0.6911116242408752, "learning_rate": 1.9124433263394063e-05, "loss": 1.6181, "mean_token_accuracy": 0.6445044080416361, "num_tokens": 512268944.0, "step": 3048 }, { "entropy": 1.7096697489420574, "epoch": 0.33495372277608415, "grad_norm": 0.7822225689888, "learning_rate": 1.9123744266059644e-05, "loss": 1.3366, "mean_token_accuracy": 0.6592263529698054, "num_tokens": 512381190.0, "step": 3049 }, { "entropy": 1.7738013366858165, "epoch": 0.3350635796874571, "grad_norm": 0.9529100656509399, "learning_rate": 1.9123055011612375e-05, "loss": 1.4496, "mean_token_accuracy": 0.6615629196166992, "num_tokens": 512579620.0, "step": 3050 }, { "entropy": 1.7056050995985668, "epoch": 0.33517343659883003, "grad_norm": 0.7717422842979431, "learning_rate": 1.912236550007408e-05, "loss": 1.4488, "mean_token_accuracy": 0.6731387178103129, "num_tokens": 512725425.0, "step": 3051 }, { "entropy": 1.7472182114919026, "epoch": 0.335283293510203, "grad_norm": 0.687759518623352, "learning_rate": 1.9121675731466572e-05, "loss": 1.4012, "mean_token_accuracy": 0.6478379468123118, "num_tokens": 512904679.0, "step": 3052 }, { "entropy": 1.7227794031302135, "epoch": 0.3353931504215759, "grad_norm": 0.6436380743980408, "learning_rate": 1.912098570581169e-05, "loss": 1.3285, "mean_token_accuracy": 0.6682673941055933, "num_tokens": 513037137.0, "step": 3053 }, { "entropy": 1.7546689013640087, "epoch": 0.33550300733294885, "grad_norm": 0.7144925594329834, "learning_rate": 1.912029542313127e-05, "loss": 1.5938, "mean_token_accuracy": 0.6321324606736501, "num_tokens": 513241089.0, "step": 3054 }, { "entropy": 1.7048328717549641, "epoch": 0.3356128642443218, "grad_norm": 0.5735582709312439, "learning_rate": 1.9119604883447155e-05, "loss": 1.4212, "mean_token_accuracy": 0.644182562828064, "num_tokens": 513485301.0, "step": 3055 }, { "entropy": 1.710164765516917, "epoch": 0.3357227211556947, "grad_norm": 0.6873478293418884, "learning_rate": 1.9118914086781208e-05, "loss": 1.3381, "mean_token_accuracy": 0.6514832923809687, "num_tokens": 513653369.0, "step": 3056 }, { "entropy": 1.6649628281593323, "epoch": 0.3358325780670676, "grad_norm": 0.573425829410553, "learning_rate": 1.911822303315529e-05, "loss": 1.4072, "mean_token_accuracy": 0.6597304493188858, "num_tokens": 513852950.0, "step": 3057 }, { "entropy": 1.686522215604782, "epoch": 0.33594243497844056, "grad_norm": 0.62139493227005, "learning_rate": 1.9117531722591267e-05, "loss": 1.4055, "mean_token_accuracy": 0.6543814688920975, "num_tokens": 513995038.0, "step": 3058 }, { "entropy": 1.6593074301878612, "epoch": 0.3360522918898135, "grad_norm": 0.7323905229568481, "learning_rate": 1.9116840155111024e-05, "loss": 1.2915, "mean_token_accuracy": 0.6618063052495321, "num_tokens": 514138366.0, "step": 3059 }, { "entropy": 1.738914539416631, "epoch": 0.33616214880118644, "grad_norm": 0.6564517021179199, "learning_rate": 1.911614833073645e-05, "loss": 1.4708, "mean_token_accuracy": 0.657853235801061, "num_tokens": 514286217.0, "step": 3060 }, { "entropy": 1.7365315755208333, "epoch": 0.3362720057125594, "grad_norm": 0.7343533039093018, "learning_rate": 1.9115456249489438e-05, "loss": 1.368, "mean_token_accuracy": 0.6527054756879807, "num_tokens": 514488524.0, "step": 3061 }, { "entropy": 1.6972340444723766, "epoch": 0.3363818626239323, "grad_norm": 0.705672562122345, "learning_rate": 1.911476391139189e-05, "loss": 1.3701, "mean_token_accuracy": 0.6614405562480291, "num_tokens": 514625795.0, "step": 3062 }, { "entropy": 1.729969580968221, "epoch": 0.33649171953530527, "grad_norm": 0.7283251881599426, "learning_rate": 1.9114071316465724e-05, "loss": 1.3981, "mean_token_accuracy": 0.6484886904557546, "num_tokens": 514758846.0, "step": 3063 }, { "entropy": 1.7162734270095825, "epoch": 0.3366015764466782, "grad_norm": 0.7314303517341614, "learning_rate": 1.9113378464732855e-05, "loss": 1.3198, "mean_token_accuracy": 0.6710415830214819, "num_tokens": 514886650.0, "step": 3064 }, { "entropy": 1.7744275629520416, "epoch": 0.33671143335805115, "grad_norm": 0.6890711784362793, "learning_rate": 1.9112685356215213e-05, "loss": 1.5018, "mean_token_accuracy": 0.6435980846484503, "num_tokens": 515096800.0, "step": 3065 }, { "entropy": 1.7427007655302684, "epoch": 0.3368212902694241, "grad_norm": 0.8294128775596619, "learning_rate": 1.9111991990934736e-05, "loss": 1.2624, "mean_token_accuracy": 0.668077364563942, "num_tokens": 515220739.0, "step": 3066 }, { "entropy": 1.7589279413223267, "epoch": 0.33693114718079703, "grad_norm": 0.6529168486595154, "learning_rate": 1.9111298368913368e-05, "loss": 1.4271, "mean_token_accuracy": 0.6581545720497767, "num_tokens": 515409312.0, "step": 3067 }, { "entropy": 1.7398897409439087, "epoch": 0.33704100409216997, "grad_norm": 0.705767035484314, "learning_rate": 1.9110604490173065e-05, "loss": 1.4301, "mean_token_accuracy": 0.6575960069894791, "num_tokens": 515532475.0, "step": 3068 }, { "entropy": 1.6691156526406605, "epoch": 0.3371508610035429, "grad_norm": 0.6828332543373108, "learning_rate": 1.9109910354735778e-05, "loss": 1.2612, "mean_token_accuracy": 0.6719839175542196, "num_tokens": 515668637.0, "step": 3069 }, { "entropy": 1.7897346119085948, "epoch": 0.3372607179149158, "grad_norm": 0.7696541547775269, "learning_rate": 1.910921596262349e-05, "loss": 1.5465, "mean_token_accuracy": 0.6406855061650276, "num_tokens": 515800127.0, "step": 3070 }, { "entropy": 1.7390386561552684, "epoch": 0.33737057482628874, "grad_norm": 0.8148576617240906, "learning_rate": 1.9108521313858164e-05, "loss": 1.5137, "mean_token_accuracy": 0.640427882472674, "num_tokens": 516004144.0, "step": 3071 }, { "entropy": 1.645291765530904, "epoch": 0.3374804317376617, "grad_norm": 0.7999487519264221, "learning_rate": 1.9107826408461796e-05, "loss": 1.3468, "mean_token_accuracy": 0.668777272105217, "num_tokens": 516178099.0, "step": 3072 }, { "entropy": 1.7338153024514515, "epoch": 0.3375902886490346, "grad_norm": 0.6503975987434387, "learning_rate": 1.9107131246456372e-05, "loss": 1.4304, "mean_token_accuracy": 0.6472113927205404, "num_tokens": 516343512.0, "step": 3073 }, { "entropy": 1.7268471519152324, "epoch": 0.33770014556040756, "grad_norm": 0.8366050124168396, "learning_rate": 1.9106435827863903e-05, "loss": 1.372, "mean_token_accuracy": 0.6583438813686371, "num_tokens": 516489688.0, "step": 3074 }, { "entropy": 1.7441972096761067, "epoch": 0.3378100024717805, "grad_norm": 0.6372826099395752, "learning_rate": 1.9105740152706388e-05, "loss": 1.4328, "mean_token_accuracy": 0.6417555212974548, "num_tokens": 516658774.0, "step": 3075 }, { "entropy": 1.660355657339096, "epoch": 0.33791985938315344, "grad_norm": 0.6428292989730835, "learning_rate": 1.9105044221005852e-05, "loss": 1.3697, "mean_token_accuracy": 0.6568067520856857, "num_tokens": 516796670.0, "step": 3076 }, { "entropy": 1.69850026567777, "epoch": 0.3380297162945264, "grad_norm": 0.7151433825492859, "learning_rate": 1.910434803278432e-05, "loss": 1.3114, "mean_token_accuracy": 0.6674358497063319, "num_tokens": 516955225.0, "step": 3077 }, { "entropy": 1.6989915072917938, "epoch": 0.3381395732058993, "grad_norm": 0.7246211767196655, "learning_rate": 1.9103651588063822e-05, "loss": 1.3039, "mean_token_accuracy": 0.6708632856607437, "num_tokens": 517099008.0, "step": 3078 }, { "entropy": 1.7012667655944824, "epoch": 0.33824943011727227, "grad_norm": 0.8172794580459595, "learning_rate": 1.9102954886866404e-05, "loss": 1.3946, "mean_token_accuracy": 0.6592543323834738, "num_tokens": 517245097.0, "step": 3079 }, { "entropy": 1.7260870933532715, "epoch": 0.3383592870286452, "grad_norm": 0.8274314999580383, "learning_rate": 1.9102257929214114e-05, "loss": 1.4324, "mean_token_accuracy": 0.6518008708953857, "num_tokens": 517406042.0, "step": 3080 }, { "entropy": 1.7201440036296844, "epoch": 0.33846914394001815, "grad_norm": 0.7528727054595947, "learning_rate": 1.9101560715129013e-05, "loss": 1.4812, "mean_token_accuracy": 0.6550854941209158, "num_tokens": 517555565.0, "step": 3081 }, { "entropy": 1.6386935810248058, "epoch": 0.3385790008513911, "grad_norm": 0.6425924897193909, "learning_rate": 1.9100863244633165e-05, "loss": 1.3557, "mean_token_accuracy": 0.6622224648793539, "num_tokens": 517724677.0, "step": 3082 }, { "entropy": 1.7015309532483418, "epoch": 0.338688857762764, "grad_norm": 0.6084007620811462, "learning_rate": 1.9100165517748647e-05, "loss": 1.4147, "mean_token_accuracy": 0.6466822971900305, "num_tokens": 517907631.0, "step": 3083 }, { "entropy": 1.7300503849983215, "epoch": 0.3387987146741369, "grad_norm": 0.6377536058425903, "learning_rate": 1.909946753449754e-05, "loss": 1.5793, "mean_token_accuracy": 0.641609787940979, "num_tokens": 518073726.0, "step": 3084 }, { "entropy": 1.7079397439956665, "epoch": 0.33890857158550985, "grad_norm": 0.6480128169059753, "learning_rate": 1.9098769294901933e-05, "loss": 1.3289, "mean_token_accuracy": 0.661668727795283, "num_tokens": 518225623.0, "step": 3085 }, { "entropy": 1.7619627118110657, "epoch": 0.3390184284968828, "grad_norm": 0.8301718831062317, "learning_rate": 1.909807079898393e-05, "loss": 1.5109, "mean_token_accuracy": 0.644140308101972, "num_tokens": 518415989.0, "step": 3086 }, { "entropy": 1.7237402101357777, "epoch": 0.33912828540825574, "grad_norm": 0.7291299104690552, "learning_rate": 1.9097372046765632e-05, "loss": 1.3323, "mean_token_accuracy": 0.6596636722485224, "num_tokens": 518567833.0, "step": 3087 }, { "entropy": 1.7579568723837535, "epoch": 0.3392381423196287, "grad_norm": 0.9242023825645447, "learning_rate": 1.909667303826916e-05, "loss": 1.3778, "mean_token_accuracy": 0.6666527688503265, "num_tokens": 518740218.0, "step": 3088 }, { "entropy": 1.7019338707129161, "epoch": 0.3393479992310016, "grad_norm": 0.7077080607414246, "learning_rate": 1.9095973773516634e-05, "loss": 1.407, "mean_token_accuracy": 0.6584417273600897, "num_tokens": 518915130.0, "step": 3089 }, { "entropy": 1.8156996667385101, "epoch": 0.33945785614237456, "grad_norm": 0.7829475998878479, "learning_rate": 1.9095274252530187e-05, "loss": 1.42, "mean_token_accuracy": 0.65616142253081, "num_tokens": 519088777.0, "step": 3090 }, { "entropy": 1.7283440331617992, "epoch": 0.3395677130537475, "grad_norm": 0.7416720986366272, "learning_rate": 1.9094574475331956e-05, "loss": 1.2568, "mean_token_accuracy": 0.6756617377201716, "num_tokens": 519255742.0, "step": 3091 }, { "entropy": 1.6994885007540386, "epoch": 0.33967756996512044, "grad_norm": 0.6388126611709595, "learning_rate": 1.9093874441944095e-05, "loss": 1.3153, "mean_token_accuracy": 0.6684769292672476, "num_tokens": 519396522.0, "step": 3092 }, { "entropy": 1.7258747617403667, "epoch": 0.3397874268764934, "grad_norm": 0.7206714153289795, "learning_rate": 1.909317415238875e-05, "loss": 1.4058, "mean_token_accuracy": 0.6619496643543243, "num_tokens": 519590462.0, "step": 3093 }, { "entropy": 1.7497619986534119, "epoch": 0.3398972837878663, "grad_norm": 0.7609780430793762, "learning_rate": 1.909247360668809e-05, "loss": 1.3087, "mean_token_accuracy": 0.6699084391196569, "num_tokens": 519720103.0, "step": 3094 }, { "entropy": 1.6820762356122334, "epoch": 0.34000714069923926, "grad_norm": 0.8035679459571838, "learning_rate": 1.9091772804864292e-05, "loss": 1.4856, "mean_token_accuracy": 0.6475498353441557, "num_tokens": 519923412.0, "step": 3095 }, { "entropy": 1.6931442022323608, "epoch": 0.3401169976106122, "grad_norm": 0.6897434592247009, "learning_rate": 1.9091071746939526e-05, "loss": 1.3514, "mean_token_accuracy": 0.6593603193759918, "num_tokens": 520072967.0, "step": 3096 }, { "entropy": 1.7068798343340557, "epoch": 0.3402268545219851, "grad_norm": 0.6774353981018066, "learning_rate": 1.909037043293599e-05, "loss": 1.4233, "mean_token_accuracy": 0.6536633421977361, "num_tokens": 520255988.0, "step": 3097 }, { "entropy": 1.7121065855026245, "epoch": 0.34033671143335803, "grad_norm": 0.7132356762886047, "learning_rate": 1.908966886287587e-05, "loss": 1.3878, "mean_token_accuracy": 0.6654232740402222, "num_tokens": 520488918.0, "step": 3098 }, { "entropy": 1.6239934662977855, "epoch": 0.34044656834473097, "grad_norm": 0.8887202739715576, "learning_rate": 1.908896703678138e-05, "loss": 1.4214, "mean_token_accuracy": 0.6726511965195338, "num_tokens": 520660679.0, "step": 3099 }, { "entropy": 1.7160980502764385, "epoch": 0.3405564252561039, "grad_norm": 0.6955882906913757, "learning_rate": 1.9088264954674724e-05, "loss": 1.3324, "mean_token_accuracy": 0.6596625298261642, "num_tokens": 520838220.0, "step": 3100 }, { "entropy": 1.6648767292499542, "epoch": 0.34066628216747685, "grad_norm": 0.6133254170417786, "learning_rate": 1.908756261657813e-05, "loss": 1.4626, "mean_token_accuracy": 0.6427382330099741, "num_tokens": 521036582.0, "step": 3101 }, { "entropy": 1.7698853611946106, "epoch": 0.3407761390788498, "grad_norm": 0.7591292262077332, "learning_rate": 1.9086860022513823e-05, "loss": 1.3989, "mean_token_accuracy": 0.646201545993487, "num_tokens": 521195519.0, "step": 3102 }, { "entropy": 1.6912595828374226, "epoch": 0.34088599599022273, "grad_norm": 0.6244848966598511, "learning_rate": 1.9086157172504036e-05, "loss": 1.4302, "mean_token_accuracy": 0.6519865940014521, "num_tokens": 521348443.0, "step": 3103 }, { "entropy": 1.7508087356885274, "epoch": 0.3409958529015957, "grad_norm": 0.6853779554367065, "learning_rate": 1.9085454066571023e-05, "loss": 1.4347, "mean_token_accuracy": 0.6526261965433756, "num_tokens": 521505284.0, "step": 3104 }, { "entropy": 1.6787457764148712, "epoch": 0.3411057098129686, "grad_norm": 0.7456424236297607, "learning_rate": 1.908475070473703e-05, "loss": 1.3914, "mean_token_accuracy": 0.6657826354106268, "num_tokens": 521671735.0, "step": 3105 }, { "entropy": 1.7271059552828472, "epoch": 0.34121556672434156, "grad_norm": 0.7938264608383179, "learning_rate": 1.9084047087024325e-05, "loss": 1.4352, "mean_token_accuracy": 0.657663548986117, "num_tokens": 521839358.0, "step": 3106 }, { "entropy": 1.6707509557406108, "epoch": 0.3413254236357145, "grad_norm": 0.6704132556915283, "learning_rate": 1.9083343213455167e-05, "loss": 1.4081, "mean_token_accuracy": 0.655000850558281, "num_tokens": 522070055.0, "step": 3107 }, { "entropy": 1.6707326571146648, "epoch": 0.34143528054708744, "grad_norm": 0.7726478576660156, "learning_rate": 1.908263908405184e-05, "loss": 1.1888, "mean_token_accuracy": 0.6913275470336279, "num_tokens": 522186687.0, "step": 3108 }, { "entropy": 1.7188027401765187, "epoch": 0.3415451374584604, "grad_norm": 0.673206627368927, "learning_rate": 1.908193469883663e-05, "loss": 1.3749, "mean_token_accuracy": 0.6531964292128881, "num_tokens": 522337959.0, "step": 3109 }, { "entropy": 1.7751700381437938, "epoch": 0.34165499436983326, "grad_norm": 0.6284109950065613, "learning_rate": 1.9081230057831827e-05, "loss": 1.3603, "mean_token_accuracy": 0.6515864779551824, "num_tokens": 522509221.0, "step": 3110 }, { "entropy": 1.66496338446935, "epoch": 0.3417648512812062, "grad_norm": 0.7413091063499451, "learning_rate": 1.9080525161059737e-05, "loss": 1.3669, "mean_token_accuracy": 0.6810042262077332, "num_tokens": 522637505.0, "step": 3111 }, { "entropy": 1.7317347327868144, "epoch": 0.34187470819257915, "grad_norm": 0.7235397100448608, "learning_rate": 1.907982000854266e-05, "loss": 1.2779, "mean_token_accuracy": 0.6744700570901235, "num_tokens": 522731744.0, "step": 3112 }, { "entropy": 1.6716107825438182, "epoch": 0.3419845651039521, "grad_norm": 0.7283676266670227, "learning_rate": 1.9079114600302926e-05, "loss": 1.3479, "mean_token_accuracy": 0.6658121645450592, "num_tokens": 522907464.0, "step": 3113 }, { "entropy": 1.6335497895876567, "epoch": 0.34209442201532503, "grad_norm": 0.7072910070419312, "learning_rate": 1.9078408936362857e-05, "loss": 1.2444, "mean_token_accuracy": 0.6787208517392477, "num_tokens": 523057454.0, "step": 3114 }, { "entropy": 1.7214987377325695, "epoch": 0.34220427892669797, "grad_norm": 0.7019241452217102, "learning_rate": 1.907770301674478e-05, "loss": 1.3578, "mean_token_accuracy": 0.6487798243761063, "num_tokens": 523240620.0, "step": 3115 }, { "entropy": 1.646393616994222, "epoch": 0.3423141358380709, "grad_norm": 0.7203688621520996, "learning_rate": 1.9076996841471047e-05, "loss": 1.3311, "mean_token_accuracy": 0.6660736699899038, "num_tokens": 523391299.0, "step": 3116 }, { "entropy": 1.7005057732264202, "epoch": 0.34242399274944385, "grad_norm": 0.636923611164093, "learning_rate": 1.9076290410564e-05, "loss": 1.4847, "mean_token_accuracy": 0.6475814878940582, "num_tokens": 523564225.0, "step": 3117 }, { "entropy": 1.7076924443244934, "epoch": 0.3425338496608168, "grad_norm": 0.7187374234199524, "learning_rate": 1.9075583724046004e-05, "loss": 1.4605, "mean_token_accuracy": 0.6543427258729935, "num_tokens": 523745353.0, "step": 3118 }, { "entropy": 1.7326221863428752, "epoch": 0.34264370657218973, "grad_norm": 0.6638636589050293, "learning_rate": 1.907487678193942e-05, "loss": 1.432, "mean_token_accuracy": 0.6483491808176041, "num_tokens": 523914991.0, "step": 3119 }, { "entropy": 1.692298283179601, "epoch": 0.3427535634835627, "grad_norm": 0.7324991822242737, "learning_rate": 1.9074169584266627e-05, "loss": 1.4448, "mean_token_accuracy": 0.6582270761330923, "num_tokens": 524095746.0, "step": 3120 }, { "entropy": 1.733394632736842, "epoch": 0.3428634203949356, "grad_norm": 0.7477669715881348, "learning_rate": 1.9073462131050002e-05, "loss": 1.5347, "mean_token_accuracy": 0.6465493490298589, "num_tokens": 524332980.0, "step": 3121 }, { "entropy": 1.7090126971403758, "epoch": 0.34297327730630855, "grad_norm": 0.6135408878326416, "learning_rate": 1.9072754422311937e-05, "loss": 1.3248, "mean_token_accuracy": 0.6581533948580424, "num_tokens": 524465802.0, "step": 3122 }, { "entropy": 1.715635746717453, "epoch": 0.3430831342176815, "grad_norm": 0.5586002469062805, "learning_rate": 1.9072046458074834e-05, "loss": 1.5206, "mean_token_accuracy": 0.6238716145356497, "num_tokens": 524743379.0, "step": 3123 }, { "entropy": 1.7447759707768757, "epoch": 0.3431929911290544, "grad_norm": 0.8412876129150391, "learning_rate": 1.90713382383611e-05, "loss": 1.4483, "mean_token_accuracy": 0.6630438417196274, "num_tokens": 524892897.0, "step": 3124 }, { "entropy": 1.7658887306849163, "epoch": 0.3433028480404273, "grad_norm": 0.7582389116287231, "learning_rate": 1.9070629763193148e-05, "loss": 1.5115, "mean_token_accuracy": 0.6470319529374441, "num_tokens": 525073268.0, "step": 3125 }, { "entropy": 1.7027207911014557, "epoch": 0.34341270495180026, "grad_norm": 0.7003147006034851, "learning_rate": 1.90699210325934e-05, "loss": 1.3185, "mean_token_accuracy": 0.6545501202344894, "num_tokens": 525223000.0, "step": 3126 }, { "entropy": 1.6880733569463093, "epoch": 0.3435225618631732, "grad_norm": 0.8535897731781006, "learning_rate": 1.9069212046584288e-05, "loss": 1.2261, "mean_token_accuracy": 0.6908506006002426, "num_tokens": 525346110.0, "step": 3127 }, { "entropy": 1.6838291088740032, "epoch": 0.34363241877454614, "grad_norm": 0.7103913426399231, "learning_rate": 1.9068502805188247e-05, "loss": 1.4846, "mean_token_accuracy": 0.6660540848970413, "num_tokens": 525510089.0, "step": 3128 }, { "entropy": 1.6504852771759033, "epoch": 0.3437422756859191, "grad_norm": 0.6084645390510559, "learning_rate": 1.9067793308427734e-05, "loss": 1.3674, "mean_token_accuracy": 0.6550189206997553, "num_tokens": 525680601.0, "step": 3129 }, { "entropy": 1.66959352294604, "epoch": 0.343852132597292, "grad_norm": 0.9643108248710632, "learning_rate": 1.906708355632519e-05, "loss": 1.186, "mean_token_accuracy": 0.684942439198494, "num_tokens": 525819677.0, "step": 3130 }, { "entropy": 1.7390025953451793, "epoch": 0.34396198950866497, "grad_norm": 0.8256139755249023, "learning_rate": 1.9066373548903097e-05, "loss": 1.4807, "mean_token_accuracy": 0.6392683138449987, "num_tokens": 526016590.0, "step": 3131 }, { "entropy": 1.6989341179529827, "epoch": 0.3440718464200379, "grad_norm": 0.6582311987876892, "learning_rate": 1.906566328618391e-05, "loss": 1.3251, "mean_token_accuracy": 0.6610842347145081, "num_tokens": 526146630.0, "step": 3132 }, { "entropy": 1.6985157827536266, "epoch": 0.34418170333141085, "grad_norm": 0.7080286741256714, "learning_rate": 1.9064952768190114e-05, "loss": 1.2785, "mean_token_accuracy": 0.6769105891386668, "num_tokens": 526281535.0, "step": 3133 }, { "entropy": 1.7005742291609447, "epoch": 0.3442915602427838, "grad_norm": 0.741958737373352, "learning_rate": 1.9064241994944197e-05, "loss": 1.2767, "mean_token_accuracy": 0.6701702376206716, "num_tokens": 526426500.0, "step": 3134 }, { "entropy": 1.6816225151220958, "epoch": 0.34440141715415673, "grad_norm": 0.6872779130935669, "learning_rate": 1.9063530966468655e-05, "loss": 1.4314, "mean_token_accuracy": 0.6407319158315659, "num_tokens": 526647271.0, "step": 3135 }, { "entropy": 1.6521427631378174, "epoch": 0.34451127406552967, "grad_norm": 0.7281518578529358, "learning_rate": 1.9062819682785993e-05, "loss": 1.3395, "mean_token_accuracy": 0.6678001085917155, "num_tokens": 526775519.0, "step": 3136 }, { "entropy": 1.6584477225939434, "epoch": 0.3446211309769026, "grad_norm": 0.7105032205581665, "learning_rate": 1.906210814391872e-05, "loss": 1.5403, "mean_token_accuracy": 0.6434331585963567, "num_tokens": 526960781.0, "step": 3137 }, { "entropy": 1.7096496224403381, "epoch": 0.3447309878882755, "grad_norm": 1.1132054328918457, "learning_rate": 1.9061396349889357e-05, "loss": 1.3313, "mean_token_accuracy": 0.6592603524525961, "num_tokens": 527077059.0, "step": 3138 }, { "entropy": 1.7816942930221558, "epoch": 0.34484084479964844, "grad_norm": 0.623199462890625, "learning_rate": 1.9060684300720435e-05, "loss": 1.4786, "mean_token_accuracy": 0.6451230843861898, "num_tokens": 527242392.0, "step": 3139 }, { "entropy": 1.7357947031656902, "epoch": 0.3449507017110214, "grad_norm": 0.6942022442817688, "learning_rate": 1.9059971996434483e-05, "loss": 1.6755, "mean_token_accuracy": 0.6327243894338608, "num_tokens": 527421563.0, "step": 3140 }, { "entropy": 1.719354470570882, "epoch": 0.3450605586223943, "grad_norm": 0.6363817453384399, "learning_rate": 1.9059259437054052e-05, "loss": 1.2542, "mean_token_accuracy": 0.6765570292870203, "num_tokens": 527542290.0, "step": 3141 }, { "entropy": 1.6994706292947133, "epoch": 0.34517041553376726, "grad_norm": 0.6537553071975708, "learning_rate": 1.9058546622601688e-05, "loss": 1.522, "mean_token_accuracy": 0.6443347980578741, "num_tokens": 527721230.0, "step": 3142 }, { "entropy": 1.7906754612922668, "epoch": 0.3452802724451402, "grad_norm": 0.7728573679924011, "learning_rate": 1.9057833553099957e-05, "loss": 1.444, "mean_token_accuracy": 0.6405621866385142, "num_tokens": 527925672.0, "step": 3143 }, { "entropy": 1.635061929623286, "epoch": 0.34539012935651314, "grad_norm": 0.7225202918052673, "learning_rate": 1.9057120228571426e-05, "loss": 1.2604, "mean_token_accuracy": 0.6803568998972574, "num_tokens": 528045373.0, "step": 3144 }, { "entropy": 1.7567674815654755, "epoch": 0.3454999862678861, "grad_norm": 0.7282200455665588, "learning_rate": 1.905640664903867e-05, "loss": 1.6159, "mean_token_accuracy": 0.6386793802181879, "num_tokens": 528235110.0, "step": 3145 }, { "entropy": 1.6713021596272786, "epoch": 0.345609843179259, "grad_norm": 0.6087730526924133, "learning_rate": 1.9055692814524273e-05, "loss": 1.2903, "mean_token_accuracy": 0.6718141039212545, "num_tokens": 528415004.0, "step": 3146 }, { "entropy": 1.6759739617506664, "epoch": 0.34571970009063197, "grad_norm": 0.8467540144920349, "learning_rate": 1.9054978725050827e-05, "loss": 1.3079, "mean_token_accuracy": 0.6630802005529404, "num_tokens": 528553683.0, "step": 3147 }, { "entropy": 1.709736426671346, "epoch": 0.3458295570020049, "grad_norm": 0.6705769896507263, "learning_rate": 1.9054264380640936e-05, "loss": 1.3064, "mean_token_accuracy": 0.6646785189708074, "num_tokens": 528671808.0, "step": 3148 }, { "entropy": 1.747974932193756, "epoch": 0.34593941391337785, "grad_norm": 0.7119439244270325, "learning_rate": 1.9053549781317208e-05, "loss": 1.2835, "mean_token_accuracy": 0.6721477657556534, "num_tokens": 528778625.0, "step": 3149 }, { "entropy": 1.741389234860738, "epoch": 0.3460492708247508, "grad_norm": 0.6632856130599976, "learning_rate": 1.9052834927102255e-05, "loss": 1.3884, "mean_token_accuracy": 0.6711229979991913, "num_tokens": 528918469.0, "step": 3150 }, { "entropy": 1.7209580143292744, "epoch": 0.3461591277361237, "grad_norm": 0.6064859628677368, "learning_rate": 1.905211981801871e-05, "loss": 1.2897, "mean_token_accuracy": 0.6677672813336054, "num_tokens": 529046367.0, "step": 3151 }, { "entropy": 1.7029893298943837, "epoch": 0.3462689846474966, "grad_norm": 0.6776720285415649, "learning_rate": 1.9051404454089196e-05, "loss": 1.473, "mean_token_accuracy": 0.6507180581490198, "num_tokens": 529254785.0, "step": 3152 }, { "entropy": 1.699689010779063, "epoch": 0.34637884155886955, "grad_norm": 0.7268986701965332, "learning_rate": 1.9050688835336358e-05, "loss": 1.3269, "mean_token_accuracy": 0.674399678905805, "num_tokens": 529399490.0, "step": 3153 }, { "entropy": 1.7162836492061615, "epoch": 0.3464886984702425, "grad_norm": 0.7248696088790894, "learning_rate": 1.904997296178285e-05, "loss": 1.3693, "mean_token_accuracy": 0.6586166570583979, "num_tokens": 529527378.0, "step": 3154 }, { "entropy": 1.6862310767173767, "epoch": 0.34659855538161544, "grad_norm": 0.7161970138549805, "learning_rate": 1.9049256833451327e-05, "loss": 1.481, "mean_token_accuracy": 0.6531454250216484, "num_tokens": 529734726.0, "step": 3155 }, { "entropy": 1.6877350012461345, "epoch": 0.3467084122929884, "grad_norm": 0.6686804294586182, "learning_rate": 1.904854045036445e-05, "loss": 1.3981, "mean_token_accuracy": 0.65309705833594, "num_tokens": 529949884.0, "step": 3156 }, { "entropy": 1.7140614589055378, "epoch": 0.3468182692043613, "grad_norm": 0.6998611092567444, "learning_rate": 1.9047823812544893e-05, "loss": 1.2816, "mean_token_accuracy": 0.6777733812729517, "num_tokens": 530095707.0, "step": 3157 }, { "entropy": 1.7329360047976177, "epoch": 0.34692812611573426, "grad_norm": 0.7617079615592957, "learning_rate": 1.904710692001534e-05, "loss": 1.27, "mean_token_accuracy": 0.6669184813896815, "num_tokens": 530233076.0, "step": 3158 }, { "entropy": 1.7423981527487438, "epoch": 0.3470379830271072, "grad_norm": 0.7219134569168091, "learning_rate": 1.904638977279848e-05, "loss": 1.4189, "mean_token_accuracy": 0.6537288725376129, "num_tokens": 530361395.0, "step": 3159 }, { "entropy": 1.7932091653347015, "epoch": 0.34714783993848014, "grad_norm": 0.7211331129074097, "learning_rate": 1.9045672370917008e-05, "loss": 1.3775, "mean_token_accuracy": 0.6503029266993204, "num_tokens": 530476029.0, "step": 3160 }, { "entropy": 1.6707605421543121, "epoch": 0.3472576968498531, "grad_norm": 0.6406380534172058, "learning_rate": 1.904495471439363e-05, "loss": 1.2947, "mean_token_accuracy": 0.6745659758647283, "num_tokens": 530620082.0, "step": 3161 }, { "entropy": 1.635949860016505, "epoch": 0.347367553761226, "grad_norm": 0.5812481641769409, "learning_rate": 1.9044236803251063e-05, "loss": 1.318, "mean_token_accuracy": 0.6674651255210241, "num_tokens": 530800009.0, "step": 3162 }, { "entropy": 1.686038355032603, "epoch": 0.34747741067259896, "grad_norm": 0.65413898229599, "learning_rate": 1.9043518637512027e-05, "loss": 1.39, "mean_token_accuracy": 0.6651994735002518, "num_tokens": 530939319.0, "step": 3163 }, { "entropy": 1.7452342510223389, "epoch": 0.3475872675839719, "grad_norm": 0.711044192314148, "learning_rate": 1.9042800217199248e-05, "loss": 1.3807, "mean_token_accuracy": 0.6522654493649801, "num_tokens": 531069878.0, "step": 3164 }, { "entropy": 1.672851413488388, "epoch": 0.3476971244953448, "grad_norm": 0.5760392546653748, "learning_rate": 1.9042081542335467e-05, "loss": 1.4053, "mean_token_accuracy": 0.6590311825275421, "num_tokens": 531295489.0, "step": 3165 }, { "entropy": 1.7025360067685444, "epoch": 0.34780698140671773, "grad_norm": 0.6651199460029602, "learning_rate": 1.9041362612943432e-05, "loss": 1.4792, "mean_token_accuracy": 0.6444245874881744, "num_tokens": 531499724.0, "step": 3166 }, { "entropy": 1.7628304362297058, "epoch": 0.34791683831809067, "grad_norm": 0.6585642099380493, "learning_rate": 1.9040643429045887e-05, "loss": 1.4042, "mean_token_accuracy": 0.6575342814127604, "num_tokens": 531731480.0, "step": 3167 }, { "entropy": 1.740038514137268, "epoch": 0.3480266952294636, "grad_norm": 0.7570586800575256, "learning_rate": 1.9039923990665605e-05, "loss": 1.439, "mean_token_accuracy": 0.6459324061870575, "num_tokens": 531912616.0, "step": 3168 }, { "entropy": 1.7111516793568928, "epoch": 0.34813655214083655, "grad_norm": 0.6636490225791931, "learning_rate": 1.903920429782535e-05, "loss": 1.3603, "mean_token_accuracy": 0.6567764480908712, "num_tokens": 532036362.0, "step": 3169 }, { "entropy": 1.7178413569927216, "epoch": 0.3482464090522095, "grad_norm": 0.8525426387786865, "learning_rate": 1.9038484350547903e-05, "loss": 1.3025, "mean_token_accuracy": 0.6610483030478159, "num_tokens": 532188392.0, "step": 3170 }, { "entropy": 1.7293661733468373, "epoch": 0.34835626596358243, "grad_norm": 0.7002199292182922, "learning_rate": 1.903776414885605e-05, "loss": 1.4114, "mean_token_accuracy": 0.6590321709712347, "num_tokens": 532327314.0, "step": 3171 }, { "entropy": 1.691053032875061, "epoch": 0.3484661228749554, "grad_norm": 0.7545453310012817, "learning_rate": 1.903704369277258e-05, "loss": 1.336, "mean_token_accuracy": 0.6653313388427099, "num_tokens": 532475041.0, "step": 3172 }, { "entropy": 1.7258902490139008, "epoch": 0.3485759797863283, "grad_norm": 0.7080891132354736, "learning_rate": 1.90363229823203e-05, "loss": 1.3609, "mean_token_accuracy": 0.6499852339426676, "num_tokens": 532608570.0, "step": 3173 }, { "entropy": 1.7066673735777538, "epoch": 0.34868583669770126, "grad_norm": 0.7277325391769409, "learning_rate": 1.9035602017522018e-05, "loss": 1.3951, "mean_token_accuracy": 0.6554910639921824, "num_tokens": 532771155.0, "step": 3174 }, { "entropy": 1.6777300437291462, "epoch": 0.3487956936090742, "grad_norm": 0.6466101408004761, "learning_rate": 1.9034880798400556e-05, "loss": 1.5736, "mean_token_accuracy": 0.6363010754187902, "num_tokens": 532958303.0, "step": 3175 }, { "entropy": 1.7415178914864857, "epoch": 0.34890555052044714, "grad_norm": 0.7584067583084106, "learning_rate": 1.9034159324978735e-05, "loss": 1.2576, "mean_token_accuracy": 0.6761174450318018, "num_tokens": 533125729.0, "step": 3176 }, { "entropy": 1.7105094691117604, "epoch": 0.3490154074318201, "grad_norm": 0.7372577786445618, "learning_rate": 1.9033437597279392e-05, "loss": 1.3823, "mean_token_accuracy": 0.6669119844834009, "num_tokens": 533287732.0, "step": 3177 }, { "entropy": 1.6837959190209706, "epoch": 0.34912526434319296, "grad_norm": 0.8528002500534058, "learning_rate": 1.903271561532537e-05, "loss": 1.3608, "mean_token_accuracy": 0.6598199556271235, "num_tokens": 533488739.0, "step": 3178 }, { "entropy": 1.7475207646687825, "epoch": 0.3492351212545659, "grad_norm": 0.7588545083999634, "learning_rate": 1.9031993379139517e-05, "loss": 1.4702, "mean_token_accuracy": 0.6513977944850922, "num_tokens": 533632736.0, "step": 3179 }, { "entropy": 1.676239550113678, "epoch": 0.34934497816593885, "grad_norm": 0.6096740365028381, "learning_rate": 1.903127088874469e-05, "loss": 1.221, "mean_token_accuracy": 0.6817640314499537, "num_tokens": 533766233.0, "step": 3180 }, { "entropy": 1.734337071577708, "epoch": 0.3494548350773118, "grad_norm": 0.6904963850975037, "learning_rate": 1.9030548144163766e-05, "loss": 1.4203, "mean_token_accuracy": 0.6599701891342798, "num_tokens": 533970894.0, "step": 3181 }, { "entropy": 1.7419179677963257, "epoch": 0.34956469198868473, "grad_norm": 0.8853073120117188, "learning_rate": 1.9029825145419606e-05, "loss": 1.3835, "mean_token_accuracy": 0.6622406442960104, "num_tokens": 534094496.0, "step": 3182 }, { "entropy": 1.6786328554153442, "epoch": 0.34967454890005767, "grad_norm": 0.6363751292228699, "learning_rate": 1.90291018925351e-05, "loss": 1.4218, "mean_token_accuracy": 0.661681205034256, "num_tokens": 534283850.0, "step": 3183 }, { "entropy": 1.6927332083384197, "epoch": 0.3497844058114306, "grad_norm": 0.6172245144844055, "learning_rate": 1.902837838553314e-05, "loss": 1.3543, "mean_token_accuracy": 0.6617726981639862, "num_tokens": 534467200.0, "step": 3184 }, { "entropy": 1.7397380471229553, "epoch": 0.34989426272280355, "grad_norm": 0.7281948328018188, "learning_rate": 1.9027654624436617e-05, "loss": 1.3724, "mean_token_accuracy": 0.6596235682566961, "num_tokens": 534630747.0, "step": 3185 }, { "entropy": 1.7327102224032085, "epoch": 0.3500041196341765, "grad_norm": 0.6819478869438171, "learning_rate": 1.9026930609268445e-05, "loss": 1.3829, "mean_token_accuracy": 0.6533329288164774, "num_tokens": 534801851.0, "step": 3186 }, { "entropy": 1.630759596824646, "epoch": 0.35011397654554943, "grad_norm": 0.6898466944694519, "learning_rate": 1.9026206340051535e-05, "loss": 1.2503, "mean_token_accuracy": 0.6898584812879562, "num_tokens": 534974937.0, "step": 3187 }, { "entropy": 1.7490450243155162, "epoch": 0.3502238334569224, "grad_norm": 0.6831504106521606, "learning_rate": 1.902548181680881e-05, "loss": 1.3835, "mean_token_accuracy": 0.6639950623114904, "num_tokens": 535144941.0, "step": 3188 }, { "entropy": 1.7011112074057262, "epoch": 0.3503336903682953, "grad_norm": 0.6328549385070801, "learning_rate": 1.902475703956321e-05, "loss": 1.4859, "mean_token_accuracy": 0.6441772828499476, "num_tokens": 535306022.0, "step": 3189 }, { "entropy": 1.7117084761460621, "epoch": 0.35044354727966825, "grad_norm": 0.6839233040809631, "learning_rate": 1.9024032008337654e-05, "loss": 1.3128, "mean_token_accuracy": 0.6650984783967336, "num_tokens": 535457085.0, "step": 3190 }, { "entropy": 1.639816661675771, "epoch": 0.3505534041910412, "grad_norm": 0.6154528856277466, "learning_rate": 1.9023306723155108e-05, "loss": 1.4108, "mean_token_accuracy": 0.645085021853447, "num_tokens": 535644965.0, "step": 3191 }, { "entropy": 1.7268753548463185, "epoch": 0.3506632611024141, "grad_norm": 0.7392616271972656, "learning_rate": 1.902258118403852e-05, "loss": 1.5011, "mean_token_accuracy": 0.6435932318369547, "num_tokens": 535819854.0, "step": 3192 }, { "entropy": 1.7294295032819111, "epoch": 0.350773118013787, "grad_norm": 0.750583827495575, "learning_rate": 1.9021855391010848e-05, "loss": 1.4695, "mean_token_accuracy": 0.6535586913426717, "num_tokens": 535986320.0, "step": 3193 }, { "entropy": 1.6876760522524517, "epoch": 0.35088297492515996, "grad_norm": 0.733034610748291, "learning_rate": 1.902112934409507e-05, "loss": 1.5651, "mean_token_accuracy": 0.6665263374646505, "num_tokens": 536133774.0, "step": 3194 }, { "entropy": 1.724027395248413, "epoch": 0.3509928318365329, "grad_norm": 0.705089271068573, "learning_rate": 1.9020403043314165e-05, "loss": 1.3844, "mean_token_accuracy": 0.6622153123219808, "num_tokens": 536301152.0, "step": 3195 }, { "entropy": 1.6385993957519531, "epoch": 0.35110268874790584, "grad_norm": 0.6220270991325378, "learning_rate": 1.9019676488691113e-05, "loss": 1.3943, "mean_token_accuracy": 0.6637493073940277, "num_tokens": 536466359.0, "step": 3196 }, { "entropy": 1.647113859653473, "epoch": 0.3512125456592788, "grad_norm": 0.6425076723098755, "learning_rate": 1.9018949680248913e-05, "loss": 1.2825, "mean_token_accuracy": 0.6719297617673874, "num_tokens": 536602915.0, "step": 3197 }, { "entropy": 1.6742305755615234, "epoch": 0.3513224025706517, "grad_norm": 0.683866024017334, "learning_rate": 1.9018222618010577e-05, "loss": 1.3446, "mean_token_accuracy": 0.6559995263814926, "num_tokens": 536785708.0, "step": 3198 }, { "entropy": 1.6582418382167816, "epoch": 0.35143225948202467, "grad_norm": 0.5620256066322327, "learning_rate": 1.90174953019991e-05, "loss": 1.3776, "mean_token_accuracy": 0.6558230916659037, "num_tokens": 537010057.0, "step": 3199 }, { "entropy": 1.7322443127632141, "epoch": 0.3515421163933976, "grad_norm": 0.7158662676811218, "learning_rate": 1.9016767732237517e-05, "loss": 1.4791, "mean_token_accuracy": 0.6457269241412481, "num_tokens": 537170570.0, "step": 3200 }, { "entropy": 1.7214918732643127, "epoch": 0.35165197330477055, "grad_norm": 0.7073965072631836, "learning_rate": 1.901603990874884e-05, "loss": 1.3967, "mean_token_accuracy": 0.6600227405627569, "num_tokens": 537366594.0, "step": 3201 }, { "entropy": 1.6927407383918762, "epoch": 0.3517618302161435, "grad_norm": 0.6808587312698364, "learning_rate": 1.9015311831556115e-05, "loss": 1.326, "mean_token_accuracy": 0.6713967969020208, "num_tokens": 537506637.0, "step": 3202 }, { "entropy": 1.719231108824412, "epoch": 0.35187168712751643, "grad_norm": 0.8534165024757385, "learning_rate": 1.9014583500682384e-05, "loss": 1.3823, "mean_token_accuracy": 0.6730857292811075, "num_tokens": 537656682.0, "step": 3203 }, { "entropy": 1.684444894393285, "epoch": 0.35198154403888937, "grad_norm": 0.74547278881073, "learning_rate": 1.90138549161507e-05, "loss": 1.2741, "mean_token_accuracy": 0.670419305562973, "num_tokens": 537803818.0, "step": 3204 }, { "entropy": 1.7365160286426544, "epoch": 0.35209140095026226, "grad_norm": 0.8732142448425293, "learning_rate": 1.901312607798411e-05, "loss": 1.5498, "mean_token_accuracy": 0.6439683735370636, "num_tokens": 537990938.0, "step": 3205 }, { "entropy": 1.7507870495319366, "epoch": 0.3522012578616352, "grad_norm": 0.7832520008087158, "learning_rate": 1.9012396986205695e-05, "loss": 1.5008, "mean_token_accuracy": 0.6420899679263433, "num_tokens": 538123491.0, "step": 3206 }, { "entropy": 1.6451840698719025, "epoch": 0.35231111477300814, "grad_norm": 0.6481114029884338, "learning_rate": 1.9011667640838527e-05, "loss": 1.4059, "mean_token_accuracy": 0.6524686167637507, "num_tokens": 538325290.0, "step": 3207 }, { "entropy": 1.645903656880061, "epoch": 0.3524209716843811, "grad_norm": 0.6290945410728455, "learning_rate": 1.901093804190569e-05, "loss": 1.3211, "mean_token_accuracy": 0.6677955438693365, "num_tokens": 538458740.0, "step": 3208 }, { "entropy": 1.7137588659922283, "epoch": 0.352530828595754, "grad_norm": 0.6638056635856628, "learning_rate": 1.901020818943027e-05, "loss": 1.2841, "mean_token_accuracy": 0.6764950404564539, "num_tokens": 538596800.0, "step": 3209 }, { "entropy": 1.7618902027606964, "epoch": 0.35264068550712696, "grad_norm": 0.7724607586860657, "learning_rate": 1.9009478083435372e-05, "loss": 1.473, "mean_token_accuracy": 0.6430220901966095, "num_tokens": 538746035.0, "step": 3210 }, { "entropy": 1.6959488193194072, "epoch": 0.3527505424184999, "grad_norm": 0.7325376272201538, "learning_rate": 1.90087477239441e-05, "loss": 1.4151, "mean_token_accuracy": 0.6453822106122971, "num_tokens": 538922341.0, "step": 3211 }, { "entropy": 1.743853767712911, "epoch": 0.35286039932987284, "grad_norm": 0.7420296669006348, "learning_rate": 1.9008017110979573e-05, "loss": 1.4575, "mean_token_accuracy": 0.6492985039949417, "num_tokens": 539055514.0, "step": 3212 }, { "entropy": 1.6643742322921753, "epoch": 0.3529702562412458, "grad_norm": 0.616858959197998, "learning_rate": 1.9007286244564912e-05, "loss": 1.3077, "mean_token_accuracy": 0.6636150479316711, "num_tokens": 539195690.0, "step": 3213 }, { "entropy": 1.7134017944335938, "epoch": 0.3530801131526187, "grad_norm": 0.6087394952774048, "learning_rate": 1.900655512472325e-05, "loss": 1.4671, "mean_token_accuracy": 0.642287035783132, "num_tokens": 539452254.0, "step": 3214 }, { "entropy": 1.6220239003499348, "epoch": 0.35318997006399167, "grad_norm": 0.6543572545051575, "learning_rate": 1.9005823751477727e-05, "loss": 1.4195, "mean_token_accuracy": 0.6753101100524267, "num_tokens": 539651551.0, "step": 3215 }, { "entropy": 1.7377901673316956, "epoch": 0.3532998269753646, "grad_norm": 0.6597190499305725, "learning_rate": 1.9005092124851488e-05, "loss": 1.4769, "mean_token_accuracy": 0.6341644277175268, "num_tokens": 539839472.0, "step": 3216 }, { "entropy": 1.636192907889684, "epoch": 0.35340968388673755, "grad_norm": 0.7926854491233826, "learning_rate": 1.9004360244867692e-05, "loss": 1.4022, "mean_token_accuracy": 0.6752565801143646, "num_tokens": 539974405.0, "step": 3217 }, { "entropy": 1.7549065450827281, "epoch": 0.3535195407981105, "grad_norm": 0.733349621295929, "learning_rate": 1.90036281115495e-05, "loss": 1.309, "mean_token_accuracy": 0.6662448445955912, "num_tokens": 540115247.0, "step": 3218 }, { "entropy": 1.7415490448474884, "epoch": 0.3536293977094834, "grad_norm": 0.6571947336196899, "learning_rate": 1.9002895724920084e-05, "loss": 1.4145, "mean_token_accuracy": 0.6497042328119278, "num_tokens": 540260715.0, "step": 3219 }, { "entropy": 1.7305750052134197, "epoch": 0.3537392546208563, "grad_norm": 0.6744175553321838, "learning_rate": 1.9002163085002627e-05, "loss": 1.2965, "mean_token_accuracy": 0.6661575684944788, "num_tokens": 540413230.0, "step": 3220 }, { "entropy": 1.7879510422547658, "epoch": 0.35384911153222925, "grad_norm": 0.7192217707633972, "learning_rate": 1.900143019182031e-05, "loss": 1.5031, "mean_token_accuracy": 0.6537016083796819, "num_tokens": 540599645.0, "step": 3221 }, { "entropy": 1.7366569141546886, "epoch": 0.3539589684436022, "grad_norm": 0.728387713432312, "learning_rate": 1.9000697045396335e-05, "loss": 1.6104, "mean_token_accuracy": 0.6425615598758062, "num_tokens": 540770142.0, "step": 3222 }, { "entropy": 1.6701840062936146, "epoch": 0.35406882535497514, "grad_norm": 0.6737568974494934, "learning_rate": 1.8999963645753907e-05, "loss": 1.4392, "mean_token_accuracy": 0.6645657767852148, "num_tokens": 540924432.0, "step": 3223 }, { "entropy": 1.7021212875843048, "epoch": 0.3541786822663481, "grad_norm": 1.4555866718292236, "learning_rate": 1.8999229992916234e-05, "loss": 1.2265, "mean_token_accuracy": 0.688769077261289, "num_tokens": 541139968.0, "step": 3224 }, { "entropy": 1.6931440830230713, "epoch": 0.354288539177721, "grad_norm": 0.7598428726196289, "learning_rate": 1.8998496086906536e-05, "loss": 1.415, "mean_token_accuracy": 0.6580548882484436, "num_tokens": 541332213.0, "step": 3225 }, { "entropy": 1.7305771907170613, "epoch": 0.35439839608909396, "grad_norm": 0.6824182271957397, "learning_rate": 1.8997761927748038e-05, "loss": 1.3613, "mean_token_accuracy": 0.6714488168557485, "num_tokens": 541503362.0, "step": 3226 }, { "entropy": 1.7079228858153026, "epoch": 0.3545082530004669, "grad_norm": 0.663765013217926, "learning_rate": 1.8997027515463982e-05, "loss": 1.5137, "mean_token_accuracy": 0.6328500509262085, "num_tokens": 541703771.0, "step": 3227 }, { "entropy": 1.7136943340301514, "epoch": 0.35461810991183984, "grad_norm": 0.6330761313438416, "learning_rate": 1.8996292850077605e-05, "loss": 1.3637, "mean_token_accuracy": 0.6594545394182205, "num_tokens": 541901051.0, "step": 3228 }, { "entropy": 1.6656453013420105, "epoch": 0.3547279668232128, "grad_norm": 0.7125634551048279, "learning_rate": 1.8995557931612162e-05, "loss": 1.4978, "mean_token_accuracy": 0.6360595971345901, "num_tokens": 542105364.0, "step": 3229 }, { "entropy": 1.6696421404679616, "epoch": 0.3548378237345857, "grad_norm": 0.6307772994041443, "learning_rate": 1.8994822760090917e-05, "loss": 1.3209, "mean_token_accuracy": 0.6642138212919235, "num_tokens": 542278837.0, "step": 3230 }, { "entropy": 1.7220669488112132, "epoch": 0.35494768064595866, "grad_norm": 0.6235775947570801, "learning_rate": 1.8994087335537136e-05, "loss": 1.4231, "mean_token_accuracy": 0.6583664764960607, "num_tokens": 542477234.0, "step": 3231 }, { "entropy": 1.767273376385371, "epoch": 0.35505753755733155, "grad_norm": 0.75313800573349, "learning_rate": 1.8993351657974088e-05, "loss": 1.3379, "mean_token_accuracy": 0.6546075393756231, "num_tokens": 542579859.0, "step": 3232 }, { "entropy": 1.7123860716819763, "epoch": 0.3551673944687045, "grad_norm": 0.7081466317176819, "learning_rate": 1.8992615727425064e-05, "loss": 1.4038, "mean_token_accuracy": 0.6437129030625025, "num_tokens": 542748402.0, "step": 3233 }, { "entropy": 1.7314150631427765, "epoch": 0.35527725138007743, "grad_norm": 0.7206461429595947, "learning_rate": 1.8991879543913353e-05, "loss": 1.4612, "mean_token_accuracy": 0.6518258800109228, "num_tokens": 542947728.0, "step": 3234 }, { "entropy": 1.7437797288099925, "epoch": 0.35538710829145037, "grad_norm": 0.6989411115646362, "learning_rate": 1.8991143107462256e-05, "loss": 1.4143, "mean_token_accuracy": 0.6447295347849528, "num_tokens": 543108539.0, "step": 3235 }, { "entropy": 1.739637513955434, "epoch": 0.3554969652028233, "grad_norm": 0.8311676979064941, "learning_rate": 1.8990406418095083e-05, "loss": 1.3188, "mean_token_accuracy": 0.6700087090333303, "num_tokens": 543256908.0, "step": 3236 }, { "entropy": 1.6945122977097828, "epoch": 0.35560682211419625, "grad_norm": 0.7516961097717285, "learning_rate": 1.8989669475835145e-05, "loss": 1.3135, "mean_token_accuracy": 0.6661293009916941, "num_tokens": 543373158.0, "step": 3237 }, { "entropy": 1.7841602961222331, "epoch": 0.3557166790255692, "grad_norm": 0.8799614310264587, "learning_rate": 1.898893228070577e-05, "loss": 1.2702, "mean_token_accuracy": 0.6760559976100922, "num_tokens": 543513318.0, "step": 3238 }, { "entropy": 1.7264136672019958, "epoch": 0.35582653593694213, "grad_norm": 0.7641453742980957, "learning_rate": 1.8988194832730283e-05, "loss": 1.301, "mean_token_accuracy": 0.6664343724648157, "num_tokens": 543645245.0, "step": 3239 }, { "entropy": 1.6593830386797588, "epoch": 0.3559363928483151, "grad_norm": 0.7157340049743652, "learning_rate": 1.8987457131932036e-05, "loss": 1.4008, "mean_token_accuracy": 0.6607535431782404, "num_tokens": 543795740.0, "step": 3240 }, { "entropy": 1.7583003342151642, "epoch": 0.356046249759688, "grad_norm": 0.8725547194480896, "learning_rate": 1.898671917833437e-05, "loss": 1.5383, "mean_token_accuracy": 0.6420815885066986, "num_tokens": 543958970.0, "step": 3241 }, { "entropy": 1.6769887109597523, "epoch": 0.35615610667106096, "grad_norm": 0.6710975766181946, "learning_rate": 1.8985980971960637e-05, "loss": 1.5089, "mean_token_accuracy": 0.6415905406077703, "num_tokens": 544152972.0, "step": 3242 }, { "entropy": 1.6808498601118724, "epoch": 0.3562659635824339, "grad_norm": 0.6576784253120422, "learning_rate": 1.8985242512834205e-05, "loss": 1.4222, "mean_token_accuracy": 0.6540278444687525, "num_tokens": 544305414.0, "step": 3243 }, { "entropy": 1.7014685571193695, "epoch": 0.35637582049380684, "grad_norm": 0.67486572265625, "learning_rate": 1.8984503800978444e-05, "loss": 1.4781, "mean_token_accuracy": 0.6487467388312022, "num_tokens": 544497707.0, "step": 3244 }, { "entropy": 1.7330358525117238, "epoch": 0.3564856774051798, "grad_norm": 0.6918492317199707, "learning_rate": 1.898376483641674e-05, "loss": 1.3808, "mean_token_accuracy": 0.6515394548575083, "num_tokens": 544661597.0, "step": 3245 }, { "entropy": 1.7030868232250214, "epoch": 0.35659553431655266, "grad_norm": 0.6160433292388916, "learning_rate": 1.898302561917247e-05, "loss": 1.3579, "mean_token_accuracy": 0.662481889128685, "num_tokens": 544814617.0, "step": 3246 }, { "entropy": 1.7345664103825886, "epoch": 0.3567053912279256, "grad_norm": 0.7806865572929382, "learning_rate": 1.8982286149269043e-05, "loss": 1.505, "mean_token_accuracy": 0.6493661950031916, "num_tokens": 544950318.0, "step": 3247 }, { "entropy": 1.7066125174363453, "epoch": 0.35681524813929855, "grad_norm": 0.6025816202163696, "learning_rate": 1.8981546426729856e-05, "loss": 1.3322, "mean_token_accuracy": 0.6618035733699799, "num_tokens": 545107729.0, "step": 3248 }, { "entropy": 1.7195513546466827, "epoch": 0.3569251050506715, "grad_norm": 0.7217980027198792, "learning_rate": 1.898080645157832e-05, "loss": 1.4037, "mean_token_accuracy": 0.6431277443965276, "num_tokens": 545338954.0, "step": 3249 }, { "entropy": 1.6746398607889812, "epoch": 0.35703496196204443, "grad_norm": 0.5947571992874146, "learning_rate": 1.8980066223837857e-05, "loss": 1.3889, "mean_token_accuracy": 0.6642505377531052, "num_tokens": 545502181.0, "step": 3250 }, { "entropy": 1.782493571440379, "epoch": 0.35714481887341737, "grad_norm": 0.6762712001800537, "learning_rate": 1.8979325743531892e-05, "loss": 1.3322, "mean_token_accuracy": 0.6563690652449926, "num_tokens": 545647976.0, "step": 3251 }, { "entropy": 1.6596081058184307, "epoch": 0.3572546757847903, "grad_norm": 0.665545642375946, "learning_rate": 1.897858501068386e-05, "loss": 1.3157, "mean_token_accuracy": 0.6745046228170395, "num_tokens": 545789298.0, "step": 3252 }, { "entropy": 1.7242101629575093, "epoch": 0.35736453269616325, "grad_norm": 0.6829879879951477, "learning_rate": 1.8977844025317212e-05, "loss": 1.4886, "mean_token_accuracy": 0.645786871512731, "num_tokens": 546005021.0, "step": 3253 }, { "entropy": 1.6587198774019878, "epoch": 0.3574743896075362, "grad_norm": 0.645124614238739, "learning_rate": 1.897710278745539e-05, "loss": 1.4629, "mean_token_accuracy": 0.6476227790117264, "num_tokens": 546211606.0, "step": 3254 }, { "entropy": 1.77561150987943, "epoch": 0.35758424651890913, "grad_norm": 0.7814483642578125, "learning_rate": 1.897636129712187e-05, "loss": 1.5806, "mean_token_accuracy": 0.6428438226381937, "num_tokens": 546374799.0, "step": 3255 }, { "entropy": 1.6502399047215779, "epoch": 0.3576941034302821, "grad_norm": 0.6323907971382141, "learning_rate": 1.8975619554340103e-05, "loss": 1.3035, "mean_token_accuracy": 0.6714171419541041, "num_tokens": 546556026.0, "step": 3256 }, { "entropy": 1.622281789779663, "epoch": 0.357803960341655, "grad_norm": 0.6249427795410156, "learning_rate": 1.8974877559133568e-05, "loss": 1.4739, "mean_token_accuracy": 0.6602053095897039, "num_tokens": 546763855.0, "step": 3257 }, { "entropy": 1.7764336963494618, "epoch": 0.35791381725302795, "grad_norm": 0.7319939136505127, "learning_rate": 1.8974135311525756e-05, "loss": 1.3925, "mean_token_accuracy": 0.6508530924717585, "num_tokens": 546905288.0, "step": 3258 }, { "entropy": 1.6988587478796642, "epoch": 0.35802367416440084, "grad_norm": 0.5522320866584778, "learning_rate": 1.897339281154015e-05, "loss": 1.3956, "mean_token_accuracy": 0.6408476581176122, "num_tokens": 547116190.0, "step": 3259 }, { "entropy": 1.717311978340149, "epoch": 0.3581335310757738, "grad_norm": 0.6752801537513733, "learning_rate": 1.897265005920026e-05, "loss": 1.4233, "mean_token_accuracy": 0.6433817644913992, "num_tokens": 547287521.0, "step": 3260 }, { "entropy": 1.65370711684227, "epoch": 0.3582433879871467, "grad_norm": 0.6644560098648071, "learning_rate": 1.8971907054529585e-05, "loss": 1.5168, "mean_token_accuracy": 0.6519752393166224, "num_tokens": 547490966.0, "step": 3261 }, { "entropy": 1.7368830641110737, "epoch": 0.35835324489851966, "grad_norm": 0.6558582782745361, "learning_rate": 1.8971163797551645e-05, "loss": 1.4857, "mean_token_accuracy": 0.6533776869376501, "num_tokens": 547688075.0, "step": 3262 }, { "entropy": 1.6827348172664642, "epoch": 0.3584631018098926, "grad_norm": 0.6018016934394836, "learning_rate": 1.8970420288289963e-05, "loss": 1.4116, "mean_token_accuracy": 0.6425057997306188, "num_tokens": 547881243.0, "step": 3263 }, { "entropy": 1.7337345282236736, "epoch": 0.35857295872126554, "grad_norm": 0.6800892353057861, "learning_rate": 1.8969676526768072e-05, "loss": 1.4148, "mean_token_accuracy": 0.6537298361460367, "num_tokens": 548022572.0, "step": 3264 }, { "entropy": 1.7320611973603566, "epoch": 0.3586828156326385, "grad_norm": 0.6835919618606567, "learning_rate": 1.8968932513009507e-05, "loss": 1.4243, "mean_token_accuracy": 0.65031631787618, "num_tokens": 548206869.0, "step": 3265 }, { "entropy": 1.6919034918149312, "epoch": 0.3587926725440114, "grad_norm": 0.703696608543396, "learning_rate": 1.8968188247037823e-05, "loss": 1.411, "mean_token_accuracy": 0.655804713567098, "num_tokens": 548390855.0, "step": 3266 }, { "entropy": 1.7181176046530406, "epoch": 0.35890252945538437, "grad_norm": 0.7795320153236389, "learning_rate": 1.8967443728876566e-05, "loss": 1.2687, "mean_token_accuracy": 0.6653892497221628, "num_tokens": 548546869.0, "step": 3267 }, { "entropy": 1.7424982289473216, "epoch": 0.3590123863667573, "grad_norm": 0.7345746755599976, "learning_rate": 1.896669895854931e-05, "loss": 1.6389, "mean_token_accuracy": 0.6303468098243078, "num_tokens": 548770908.0, "step": 3268 }, { "entropy": 1.7184557716051738, "epoch": 0.35912224327813025, "grad_norm": 0.7089744806289673, "learning_rate": 1.8965953936079616e-05, "loss": 1.5394, "mean_token_accuracy": 0.6407269140084585, "num_tokens": 548980049.0, "step": 3269 }, { "entropy": 1.768985648949941, "epoch": 0.3592321001895032, "grad_norm": 0.7779526114463806, "learning_rate": 1.8965208661491073e-05, "loss": 1.452, "mean_token_accuracy": 0.6549462129672369, "num_tokens": 549138218.0, "step": 3270 }, { "entropy": 1.734433690706889, "epoch": 0.35934195710087613, "grad_norm": 0.67804354429245, "learning_rate": 1.8964463134807265e-05, "loss": 1.4667, "mean_token_accuracy": 0.6576692014932632, "num_tokens": 549297807.0, "step": 3271 }, { "entropy": 1.6583465834458668, "epoch": 0.35945181401224907, "grad_norm": 0.6600108742713928, "learning_rate": 1.896371735605179e-05, "loss": 1.442, "mean_token_accuracy": 0.651568760474523, "num_tokens": 549471832.0, "step": 3272 }, { "entropy": 1.7016997933387756, "epoch": 0.35956167092362196, "grad_norm": 0.5997833013534546, "learning_rate": 1.8962971325248246e-05, "loss": 1.5253, "mean_token_accuracy": 0.6380040893952051, "num_tokens": 549645821.0, "step": 3273 }, { "entropy": 1.728733738263448, "epoch": 0.3596715278349949, "grad_norm": 0.7934627532958984, "learning_rate": 1.8962225042420248e-05, "loss": 1.5075, "mean_token_accuracy": 0.6416665812333425, "num_tokens": 549809498.0, "step": 3274 }, { "entropy": 1.7281453907489777, "epoch": 0.35978138474636784, "grad_norm": 0.6657528877258301, "learning_rate": 1.8961478507591417e-05, "loss": 1.3891, "mean_token_accuracy": 0.6498565276463827, "num_tokens": 549995261.0, "step": 3275 }, { "entropy": 1.7271487216154735, "epoch": 0.3598912416577408, "grad_norm": 0.6277911067008972, "learning_rate": 1.8960731720785378e-05, "loss": 1.3812, "mean_token_accuracy": 0.6533424854278564, "num_tokens": 550156327.0, "step": 3276 }, { "entropy": 1.7464400331179302, "epoch": 0.3600010985691137, "grad_norm": 0.6984190940856934, "learning_rate": 1.8959984682025767e-05, "loss": 1.3108, "mean_token_accuracy": 0.6629662662744522, "num_tokens": 550289813.0, "step": 3277 }, { "entropy": 1.7461791435877483, "epoch": 0.36011095548048666, "grad_norm": 0.8859475255012512, "learning_rate": 1.8959237391336226e-05, "loss": 1.3565, "mean_token_accuracy": 0.6585030903418859, "num_tokens": 550433398.0, "step": 3278 }, { "entropy": 1.7174023687839508, "epoch": 0.3602208123918596, "grad_norm": 0.6496213674545288, "learning_rate": 1.895848984874041e-05, "loss": 1.3238, "mean_token_accuracy": 0.6703794449567795, "num_tokens": 550571694.0, "step": 3279 }, { "entropy": 1.6957313120365143, "epoch": 0.36033066930323254, "grad_norm": 0.7161815762519836, "learning_rate": 1.8957742054261976e-05, "loss": 1.4328, "mean_token_accuracy": 0.6538204352060953, "num_tokens": 550742742.0, "step": 3280 }, { "entropy": 1.7091784179210663, "epoch": 0.3604405262146055, "grad_norm": 0.6954407095909119, "learning_rate": 1.8956994007924595e-05, "loss": 1.4701, "mean_token_accuracy": 0.6483365694681803, "num_tokens": 550915669.0, "step": 3281 }, { "entropy": 1.759674459695816, "epoch": 0.3605503831259784, "grad_norm": 0.8163644671440125, "learning_rate": 1.8956245709751932e-05, "loss": 1.4494, "mean_token_accuracy": 0.6482079128424326, "num_tokens": 551072402.0, "step": 3282 }, { "entropy": 1.6610101958115895, "epoch": 0.36066024003735137, "grad_norm": 0.6455697417259216, "learning_rate": 1.8955497159767683e-05, "loss": 1.3841, "mean_token_accuracy": 0.6573386738697687, "num_tokens": 551243907.0, "step": 3283 }, { "entropy": 1.765950342019399, "epoch": 0.3607700969487243, "grad_norm": 0.6208654046058655, "learning_rate": 1.8954748357995532e-05, "loss": 1.472, "mean_token_accuracy": 0.6405810018380483, "num_tokens": 551437093.0, "step": 3284 }, { "entropy": 1.6940280695756276, "epoch": 0.36087995386009725, "grad_norm": 0.7773803472518921, "learning_rate": 1.8953999304459182e-05, "loss": 1.306, "mean_token_accuracy": 0.670986607670784, "num_tokens": 551578647.0, "step": 3285 }, { "entropy": 1.7210556169350941, "epoch": 0.3609898107714702, "grad_norm": 0.6685793399810791, "learning_rate": 1.8953249999182336e-05, "loss": 1.3721, "mean_token_accuracy": 0.6593438486258189, "num_tokens": 551716346.0, "step": 3286 }, { "entropy": 1.7289181451002757, "epoch": 0.3610996676828431, "grad_norm": 0.6698284149169922, "learning_rate": 1.895250044218871e-05, "loss": 1.4681, "mean_token_accuracy": 0.6375825703144073, "num_tokens": 551901315.0, "step": 3287 }, { "entropy": 1.7202289899190266, "epoch": 0.361209524594216, "grad_norm": 0.7790493369102478, "learning_rate": 1.895175063350203e-05, "loss": 1.4252, "mean_token_accuracy": 0.6556966801484426, "num_tokens": 552055231.0, "step": 3288 }, { "entropy": 1.638626625140508, "epoch": 0.36131938150558895, "grad_norm": 0.6459670662879944, "learning_rate": 1.8951000573146028e-05, "loss": 1.1587, "mean_token_accuracy": 0.6860545178254446, "num_tokens": 552154538.0, "step": 3289 }, { "entropy": 1.7047406236330669, "epoch": 0.3614292384169619, "grad_norm": 0.6823393702507019, "learning_rate": 1.895025026114444e-05, "loss": 1.3642, "mean_token_accuracy": 0.6609081079562505, "num_tokens": 552306200.0, "step": 3290 }, { "entropy": 1.7296662827332814, "epoch": 0.36153909532833484, "grad_norm": 0.6827804446220398, "learning_rate": 1.8949499697521013e-05, "loss": 1.5255, "mean_token_accuracy": 0.6456420173247656, "num_tokens": 552495063.0, "step": 3291 }, { "entropy": 1.7409346004327138, "epoch": 0.3616489522397078, "grad_norm": 0.6589847803115845, "learning_rate": 1.89487488822995e-05, "loss": 1.569, "mean_token_accuracy": 0.6369834740956625, "num_tokens": 552680325.0, "step": 3292 }, { "entropy": 1.7292551795641582, "epoch": 0.3617588091510807, "grad_norm": 0.683055579662323, "learning_rate": 1.8947997815503668e-05, "loss": 1.3601, "mean_token_accuracy": 0.6582022855679194, "num_tokens": 552842160.0, "step": 3293 }, { "entropy": 1.6185003022352855, "epoch": 0.36186866606245366, "grad_norm": 0.5870607495307922, "learning_rate": 1.8947246497157287e-05, "loss": 1.2843, "mean_token_accuracy": 0.6818203230698904, "num_tokens": 553002411.0, "step": 3294 }, { "entropy": 1.7373617390791576, "epoch": 0.3619785229738266, "grad_norm": 0.8702647089958191, "learning_rate": 1.8946494927284134e-05, "loss": 1.4134, "mean_token_accuracy": 0.6574988017479578, "num_tokens": 553134679.0, "step": 3295 }, { "entropy": 1.6635343730449677, "epoch": 0.36208837988519954, "grad_norm": 0.6780598163604736, "learning_rate": 1.8945743105908004e-05, "loss": 1.2698, "mean_token_accuracy": 0.672525574763616, "num_tokens": 553279210.0, "step": 3296 }, { "entropy": 1.7142191926638286, "epoch": 0.3621982367965725, "grad_norm": 0.6832349896430969, "learning_rate": 1.894499103305268e-05, "loss": 1.388, "mean_token_accuracy": 0.6695650964975357, "num_tokens": 553454135.0, "step": 3297 }, { "entropy": 1.7063461641470592, "epoch": 0.3623080937079454, "grad_norm": 0.6330212354660034, "learning_rate": 1.894423870874197e-05, "loss": 1.4071, "mean_token_accuracy": 0.6505293697118759, "num_tokens": 553628489.0, "step": 3298 }, { "entropy": 1.705621709426244, "epoch": 0.36241795061931836, "grad_norm": 0.732349693775177, "learning_rate": 1.894348613299968e-05, "loss": 1.3747, "mean_token_accuracy": 0.6729957262674967, "num_tokens": 553793405.0, "step": 3299 }, { "entropy": 1.7087404429912567, "epoch": 0.36252780753069125, "grad_norm": 0.6913777589797974, "learning_rate": 1.8942733305849643e-05, "loss": 1.4367, "mean_token_accuracy": 0.6505131224791209, "num_tokens": 554019155.0, "step": 3300 }, { "entropy": 1.723453958829244, "epoch": 0.3626376644420642, "grad_norm": 0.6385790705680847, "learning_rate": 1.8941980227315672e-05, "loss": 1.3893, "mean_token_accuracy": 0.6500000605980555, "num_tokens": 554154296.0, "step": 3301 }, { "entropy": 1.7123183111349742, "epoch": 0.36274752135343713, "grad_norm": 0.7006135582923889, "learning_rate": 1.89412268974216e-05, "loss": 1.368, "mean_token_accuracy": 0.6440122773249944, "num_tokens": 554283476.0, "step": 3302 }, { "entropy": 1.7936599254608154, "epoch": 0.36285737826481007, "grad_norm": 0.6872299909591675, "learning_rate": 1.8940473316191282e-05, "loss": 1.4587, "mean_token_accuracy": 0.646611750125885, "num_tokens": 554451283.0, "step": 3303 }, { "entropy": 1.69118133187294, "epoch": 0.362967235176183, "grad_norm": 0.6158702969551086, "learning_rate": 1.893971948364856e-05, "loss": 1.3139, "mean_token_accuracy": 0.6713261753320694, "num_tokens": 554591528.0, "step": 3304 }, { "entropy": 1.707411030928294, "epoch": 0.36307709208755595, "grad_norm": 0.7171065807342529, "learning_rate": 1.8938965399817295e-05, "loss": 1.4017, "mean_token_accuracy": 0.6603502780199051, "num_tokens": 554776939.0, "step": 3305 }, { "entropy": 1.6777910987536113, "epoch": 0.3631869489989289, "grad_norm": 0.7720420360565186, "learning_rate": 1.8938211064721348e-05, "loss": 1.3903, "mean_token_accuracy": 0.67117311557134, "num_tokens": 554967794.0, "step": 3306 }, { "entropy": 1.745880534251531, "epoch": 0.36329680591030183, "grad_norm": 0.7731246948242188, "learning_rate": 1.89374564783846e-05, "loss": 1.5085, "mean_token_accuracy": 0.6443192313114802, "num_tokens": 555120674.0, "step": 3307 }, { "entropy": 1.7017800112565358, "epoch": 0.3634066628216748, "grad_norm": 0.6351495981216431, "learning_rate": 1.8936701640830932e-05, "loss": 1.3872, "mean_token_accuracy": 0.6458842406670252, "num_tokens": 555372792.0, "step": 3308 }, { "entropy": 1.731082151333491, "epoch": 0.3635165197330477, "grad_norm": 0.6268585324287415, "learning_rate": 1.8935946552084235e-05, "loss": 1.3286, "mean_token_accuracy": 0.6559985081354777, "num_tokens": 555557204.0, "step": 3309 }, { "entropy": 1.7254813611507416, "epoch": 0.36362637664442066, "grad_norm": 0.7122596502304077, "learning_rate": 1.8935191212168404e-05, "loss": 1.3135, "mean_token_accuracy": 0.6687471518913904, "num_tokens": 555714696.0, "step": 3310 }, { "entropy": 1.7088161011536915, "epoch": 0.3637362335557936, "grad_norm": 0.6561578512191772, "learning_rate": 1.8934435621107348e-05, "loss": 1.4729, "mean_token_accuracy": 0.6405731240908304, "num_tokens": 555929585.0, "step": 3311 }, { "entropy": 1.6857821742693584, "epoch": 0.36384609046716654, "grad_norm": 0.6950667500495911, "learning_rate": 1.8933679778924977e-05, "loss": 1.4068, "mean_token_accuracy": 0.6570146431525549, "num_tokens": 556083537.0, "step": 3312 }, { "entropy": 1.6994845469792683, "epoch": 0.3639559473785395, "grad_norm": 0.6382138133049011, "learning_rate": 1.8932923685645218e-05, "loss": 1.5331, "mean_token_accuracy": 0.6430691679318746, "num_tokens": 556268816.0, "step": 3313 }, { "entropy": 1.7211009760697682, "epoch": 0.36406580428991236, "grad_norm": 0.753278911113739, "learning_rate": 1.8932167341291998e-05, "loss": 1.416, "mean_token_accuracy": 0.6640914579232534, "num_tokens": 556411187.0, "step": 3314 }, { "entropy": 1.7074837684631348, "epoch": 0.3641756612012853, "grad_norm": 0.7115225195884705, "learning_rate": 1.893141074588926e-05, "loss": 1.2538, "mean_token_accuracy": 0.6794544955094656, "num_tokens": 556560154.0, "step": 3315 }, { "entropy": 1.6474250952402751, "epoch": 0.36428551811265825, "grad_norm": 0.6773630976676941, "learning_rate": 1.893065389946094e-05, "loss": 1.4965, "mean_token_accuracy": 0.6429052402575811, "num_tokens": 556758287.0, "step": 3316 }, { "entropy": 1.7411263982454936, "epoch": 0.3643953750240312, "grad_norm": 0.7541442513465881, "learning_rate": 1.8929896802031e-05, "loss": 1.3983, "mean_token_accuracy": 0.6654303272565206, "num_tokens": 556906714.0, "step": 3317 }, { "entropy": 1.6828208565711975, "epoch": 0.36450523193540413, "grad_norm": 0.5869950652122498, "learning_rate": 1.89291394536234e-05, "loss": 1.5068, "mean_token_accuracy": 0.638722355167071, "num_tokens": 557114352.0, "step": 3318 }, { "entropy": 1.6942188839117687, "epoch": 0.36461508884677707, "grad_norm": 0.7280264496803284, "learning_rate": 1.8928381854262107e-05, "loss": 1.4158, "mean_token_accuracy": 0.6560780803362528, "num_tokens": 557283476.0, "step": 3319 }, { "entropy": 1.7298544545968373, "epoch": 0.36472494575815, "grad_norm": 0.6916755437850952, "learning_rate": 1.8927624003971104e-05, "loss": 1.4664, "mean_token_accuracy": 0.6391513794660568, "num_tokens": 557482545.0, "step": 3320 }, { "entropy": 1.6934645175933838, "epoch": 0.36483480266952295, "grad_norm": 0.6088461875915527, "learning_rate": 1.892686590277437e-05, "loss": 1.3194, "mean_token_accuracy": 0.6680668840805689, "num_tokens": 557629627.0, "step": 3321 }, { "entropy": 1.7159304022789001, "epoch": 0.3649446595808959, "grad_norm": 0.6701193451881409, "learning_rate": 1.8926107550695907e-05, "loss": 1.4102, "mean_token_accuracy": 0.661454955736796, "num_tokens": 557788278.0, "step": 3322 }, { "entropy": 1.6919244428475697, "epoch": 0.36505451649226883, "grad_norm": 0.606071949005127, "learning_rate": 1.892534894775971e-05, "loss": 1.3828, "mean_token_accuracy": 0.6501694321632385, "num_tokens": 557979234.0, "step": 3323 }, { "entropy": 1.7174591918786366, "epoch": 0.3651643734036418, "grad_norm": 0.6546566486358643, "learning_rate": 1.892459009398979e-05, "loss": 1.3609, "mean_token_accuracy": 0.6671615242958069, "num_tokens": 558163091.0, "step": 3324 }, { "entropy": 1.7426952123641968, "epoch": 0.3652742303150147, "grad_norm": 0.6608150005340576, "learning_rate": 1.8923830989410165e-05, "loss": 1.4249, "mean_token_accuracy": 0.6464882989724478, "num_tokens": 558294742.0, "step": 3325 }, { "entropy": 1.7673422197500865, "epoch": 0.36538408722638765, "grad_norm": 0.5727324485778809, "learning_rate": 1.8923071634044855e-05, "loss": 1.4988, "mean_token_accuracy": 0.6281411349773407, "num_tokens": 558479560.0, "step": 3326 }, { "entropy": 1.7090481917063396, "epoch": 0.36549394413776054, "grad_norm": 0.7117367386817932, "learning_rate": 1.89223120279179e-05, "loss": 1.3147, "mean_token_accuracy": 0.6621511876583099, "num_tokens": 558594005.0, "step": 3327 }, { "entropy": 1.6214572985967, "epoch": 0.3656038010491335, "grad_norm": 0.8425611853599548, "learning_rate": 1.8921552171053344e-05, "loss": 1.4351, "mean_token_accuracy": 0.6586425652106603, "num_tokens": 558773354.0, "step": 3328 }, { "entropy": 1.6668421924114227, "epoch": 0.3657136579605064, "grad_norm": 0.6190313100814819, "learning_rate": 1.8920792063475228e-05, "loss": 1.3001, "mean_token_accuracy": 0.6651297012964884, "num_tokens": 558940163.0, "step": 3329 }, { "entropy": 1.7648253838221233, "epoch": 0.36582351487187936, "grad_norm": 0.7122219204902649, "learning_rate": 1.892003170520761e-05, "loss": 1.3979, "mean_token_accuracy": 0.6517335921525955, "num_tokens": 559094548.0, "step": 3330 }, { "entropy": 1.738343745470047, "epoch": 0.3659333717832523, "grad_norm": 0.6885458827018738, "learning_rate": 1.8919271096274562e-05, "loss": 1.3878, "mean_token_accuracy": 0.6585352619489034, "num_tokens": 559293154.0, "step": 3331 }, { "entropy": 1.7013998627662659, "epoch": 0.36604322869462524, "grad_norm": 0.5736583471298218, "learning_rate": 1.8918510236700148e-05, "loss": 1.4884, "mean_token_accuracy": 0.6527662177880605, "num_tokens": 559486360.0, "step": 3332 }, { "entropy": 1.7290991048018138, "epoch": 0.3661530856059982, "grad_norm": 0.6130762100219727, "learning_rate": 1.8917749126508454e-05, "loss": 1.3987, "mean_token_accuracy": 0.649641344944636, "num_tokens": 559646780.0, "step": 3333 }, { "entropy": 1.665138175090154, "epoch": 0.3662629425173711, "grad_norm": 0.7527748346328735, "learning_rate": 1.891698776572357e-05, "loss": 1.4943, "mean_token_accuracy": 0.6480821569760641, "num_tokens": 559842312.0, "step": 3334 }, { "entropy": 1.7401958505312602, "epoch": 0.36637279942874407, "grad_norm": 0.7090706825256348, "learning_rate": 1.891622615436959e-05, "loss": 1.5787, "mean_token_accuracy": 0.6238933056592941, "num_tokens": 560034055.0, "step": 3335 }, { "entropy": 1.6688226958115895, "epoch": 0.366482656340117, "grad_norm": 0.6134145259857178, "learning_rate": 1.891546429247062e-05, "loss": 1.443, "mean_token_accuracy": 0.6504640529553095, "num_tokens": 560205068.0, "step": 3336 }, { "entropy": 1.5998762051264446, "epoch": 0.36659251325148995, "grad_norm": 0.5931162238121033, "learning_rate": 1.891470218005077e-05, "loss": 1.2993, "mean_token_accuracy": 0.6780025462309519, "num_tokens": 560354488.0, "step": 3337 }, { "entropy": 1.6810278395811717, "epoch": 0.3667023701628629, "grad_norm": 0.7367040514945984, "learning_rate": 1.8913939817134167e-05, "loss": 1.4098, "mean_token_accuracy": 0.6589592695236206, "num_tokens": 560507374.0, "step": 3338 }, { "entropy": 1.6817876795927684, "epoch": 0.36681222707423583, "grad_norm": 0.8555600047111511, "learning_rate": 1.8913177203744927e-05, "loss": 1.3993, "mean_token_accuracy": 0.6524364600578944, "num_tokens": 560662070.0, "step": 3339 }, { "entropy": 1.6980493466059368, "epoch": 0.36692208398560877, "grad_norm": 0.6242254972457886, "learning_rate": 1.89124143399072e-05, "loss": 1.3962, "mean_token_accuracy": 0.6619236518939337, "num_tokens": 560866103.0, "step": 3340 }, { "entropy": 1.676784485578537, "epoch": 0.36703194089698166, "grad_norm": 0.633709192276001, "learning_rate": 1.891165122564512e-05, "loss": 1.386, "mean_token_accuracy": 0.6684582183758417, "num_tokens": 561025085.0, "step": 3341 }, { "entropy": 1.6602611144383748, "epoch": 0.3671417978083546, "grad_norm": 0.5967673659324646, "learning_rate": 1.891088786098285e-05, "loss": 1.3113, "mean_token_accuracy": 0.6616858939329783, "num_tokens": 561194282.0, "step": 3342 }, { "entropy": 1.7069261968135834, "epoch": 0.36725165471972754, "grad_norm": 0.7268139719963074, "learning_rate": 1.8910124245944544e-05, "loss": 1.4762, "mean_token_accuracy": 0.641165554523468, "num_tokens": 561399006.0, "step": 3343 }, { "entropy": 1.640328695376714, "epoch": 0.3673615116311005, "grad_norm": 0.6956763863563538, "learning_rate": 1.8909360380554366e-05, "loss": 1.1811, "mean_token_accuracy": 0.6891622543334961, "num_tokens": 561511064.0, "step": 3344 }, { "entropy": 1.7172695597012837, "epoch": 0.3674713685424734, "grad_norm": 0.6288443207740784, "learning_rate": 1.8908596264836496e-05, "loss": 1.3749, "mean_token_accuracy": 0.6524456491072973, "num_tokens": 561693661.0, "step": 3345 }, { "entropy": 1.7315253218015034, "epoch": 0.36758122545384636, "grad_norm": 0.6713343858718872, "learning_rate": 1.8907831898815118e-05, "loss": 1.5466, "mean_token_accuracy": 0.6562629292408625, "num_tokens": 561882529.0, "step": 3346 }, { "entropy": 1.7233446737130482, "epoch": 0.3676910823652193, "grad_norm": 0.6376742124557495, "learning_rate": 1.8907067282514426e-05, "loss": 1.4229, "mean_token_accuracy": 0.6561285456021627, "num_tokens": 562071206.0, "step": 3347 }, { "entropy": 1.775407483180364, "epoch": 0.36780093927659224, "grad_norm": 0.7270674705505371, "learning_rate": 1.8906302415958617e-05, "loss": 1.4222, "mean_token_accuracy": 0.6504105776548386, "num_tokens": 562217092.0, "step": 3348 }, { "entropy": 1.6748477617899578, "epoch": 0.3679107961879652, "grad_norm": 0.7032795548439026, "learning_rate": 1.89055372991719e-05, "loss": 1.4193, "mean_token_accuracy": 0.6519673566023508, "num_tokens": 562405459.0, "step": 3349 }, { "entropy": 1.694932798544566, "epoch": 0.3680206530993381, "grad_norm": 0.5827463269233704, "learning_rate": 1.8904771932178484e-05, "loss": 1.3895, "mean_token_accuracy": 0.6607838769753774, "num_tokens": 562585278.0, "step": 3350 }, { "entropy": 1.7321241994698842, "epoch": 0.36813051001071107, "grad_norm": 0.7316332459449768, "learning_rate": 1.8904006315002605e-05, "loss": 1.4072, "mean_token_accuracy": 0.6589037328958511, "num_tokens": 562743792.0, "step": 3351 }, { "entropy": 1.7170052528381348, "epoch": 0.368240366922084, "grad_norm": 0.9120453596115112, "learning_rate": 1.8903240447668485e-05, "loss": 1.4018, "mean_token_accuracy": 0.6695507715145746, "num_tokens": 562855638.0, "step": 3352 }, { "entropy": 1.7425238887468975, "epoch": 0.36835022383345695, "grad_norm": 0.6906775832176208, "learning_rate": 1.8902474330200368e-05, "loss": 1.4636, "mean_token_accuracy": 0.6504637797673544, "num_tokens": 563050942.0, "step": 3353 }, { "entropy": 1.7821686168511708, "epoch": 0.36846008074482983, "grad_norm": 0.7374799847602844, "learning_rate": 1.8901707962622497e-05, "loss": 1.4039, "mean_token_accuracy": 0.6607334365447363, "num_tokens": 563186855.0, "step": 3354 }, { "entropy": 1.6392212013403575, "epoch": 0.3685699376562028, "grad_norm": 0.5854918360710144, "learning_rate": 1.890094134495913e-05, "loss": 1.4505, "mean_token_accuracy": 0.6548234969377518, "num_tokens": 563366408.0, "step": 3355 }, { "entropy": 1.7537512481212616, "epoch": 0.3686797945675757, "grad_norm": 0.6592661738395691, "learning_rate": 1.890017447723453e-05, "loss": 1.3427, "mean_token_accuracy": 0.6637191027402878, "num_tokens": 563500860.0, "step": 3356 }, { "entropy": 1.7331876854101818, "epoch": 0.36878965147894865, "grad_norm": 0.7768663763999939, "learning_rate": 1.8899407359472966e-05, "loss": 1.4831, "mean_token_accuracy": 0.6461548010508219, "num_tokens": 563724218.0, "step": 3357 }, { "entropy": 1.7273361086845398, "epoch": 0.3688995083903216, "grad_norm": 0.6514448523521423, "learning_rate": 1.8898639991698723e-05, "loss": 1.5468, "mean_token_accuracy": 0.6472266266743342, "num_tokens": 563920710.0, "step": 3358 }, { "entropy": 1.7227802574634552, "epoch": 0.36900936530169454, "grad_norm": 0.753607988357544, "learning_rate": 1.889787237393608e-05, "loss": 1.3811, "mean_token_accuracy": 0.6565194974342982, "num_tokens": 564108660.0, "step": 3359 }, { "entropy": 1.6839437087376912, "epoch": 0.3691192222130675, "grad_norm": 0.7818706631660461, "learning_rate": 1.8897104506209336e-05, "loss": 1.5772, "mean_token_accuracy": 0.6447829628984133, "num_tokens": 564274275.0, "step": 3360 }, { "entropy": 1.6813337802886963, "epoch": 0.3692290791244404, "grad_norm": 0.6889169812202454, "learning_rate": 1.8896336388542794e-05, "loss": 1.503, "mean_token_accuracy": 0.6593608756860098, "num_tokens": 564465967.0, "step": 3361 }, { "entropy": 1.6902830203374226, "epoch": 0.36933893603581336, "grad_norm": 0.6168293952941895, "learning_rate": 1.889556802096076e-05, "loss": 1.4396, "mean_token_accuracy": 0.6531057059764862, "num_tokens": 564676289.0, "step": 3362 }, { "entropy": 1.7228737076123555, "epoch": 0.3694487929471863, "grad_norm": 0.7138974666595459, "learning_rate": 1.889479940348756e-05, "loss": 1.3175, "mean_token_accuracy": 0.6605416287978491, "num_tokens": 564820337.0, "step": 3363 }, { "entropy": 1.6877204477787018, "epoch": 0.36955864985855924, "grad_norm": 0.8381320834159851, "learning_rate": 1.8894030536147513e-05, "loss": 1.5089, "mean_token_accuracy": 0.658082976937294, "num_tokens": 564968781.0, "step": 3364 }, { "entropy": 1.7128780285517375, "epoch": 0.3696685067699322, "grad_norm": 0.6636347770690918, "learning_rate": 1.889326141896496e-05, "loss": 1.4628, "mean_token_accuracy": 0.6563326021035513, "num_tokens": 565162237.0, "step": 3365 }, { "entropy": 1.6565657357374828, "epoch": 0.3697783636813051, "grad_norm": 0.7915597558021545, "learning_rate": 1.889249205196424e-05, "loss": 1.4628, "mean_token_accuracy": 0.6564295887947083, "num_tokens": 565318530.0, "step": 3366 }, { "entropy": 1.7151463528474171, "epoch": 0.36988822059267806, "grad_norm": 0.647465169429779, "learning_rate": 1.8891722435169703e-05, "loss": 1.4666, "mean_token_accuracy": 0.656380852063497, "num_tokens": 565493389.0, "step": 3367 }, { "entropy": 1.7164535621802013, "epoch": 0.36999807750405095, "grad_norm": 0.7960186004638672, "learning_rate": 1.8890952568605704e-05, "loss": 1.6024, "mean_token_accuracy": 0.6565567404031754, "num_tokens": 565649508.0, "step": 3368 }, { "entropy": 1.7425066431363423, "epoch": 0.3701079344154239, "grad_norm": 0.7645494937896729, "learning_rate": 1.8890182452296612e-05, "loss": 1.5191, "mean_token_accuracy": 0.6238025277853012, "num_tokens": 565856767.0, "step": 3369 }, { "entropy": 1.7707313100496929, "epoch": 0.37021779132679683, "grad_norm": 0.7492119669914246, "learning_rate": 1.88894120862668e-05, "loss": 1.4832, "mean_token_accuracy": 0.649360736211141, "num_tokens": 566027002.0, "step": 3370 }, { "entropy": 1.67772110303243, "epoch": 0.37032764823816977, "grad_norm": 0.6959682106971741, "learning_rate": 1.8888641470540652e-05, "loss": 1.4024, "mean_token_accuracy": 0.6710788160562515, "num_tokens": 566189471.0, "step": 3371 }, { "entropy": 1.7469698985417683, "epoch": 0.3704375051495427, "grad_norm": 0.6655913591384888, "learning_rate": 1.8887870605142557e-05, "loss": 1.3536, "mean_token_accuracy": 0.6533726006746292, "num_tokens": 566349741.0, "step": 3372 }, { "entropy": 1.6737736264864604, "epoch": 0.37054736206091565, "grad_norm": 0.7479509115219116, "learning_rate": 1.8887099490096914e-05, "loss": 1.2131, "mean_token_accuracy": 0.6783681710561117, "num_tokens": 566478311.0, "step": 3373 }, { "entropy": 1.6851101120313008, "epoch": 0.3706572189722886, "grad_norm": 0.6885315179824829, "learning_rate": 1.8886328125428123e-05, "loss": 1.3959, "mean_token_accuracy": 0.6712721387545267, "num_tokens": 566626451.0, "step": 3374 }, { "entropy": 1.6743651628494263, "epoch": 0.37076707588366153, "grad_norm": 0.6012775897979736, "learning_rate": 1.88855565111606e-05, "loss": 1.4005, "mean_token_accuracy": 0.650342067082723, "num_tokens": 566793333.0, "step": 3375 }, { "entropy": 1.6919374863306682, "epoch": 0.3708769327950345, "grad_norm": 0.6600878238677979, "learning_rate": 1.888478464731877e-05, "loss": 1.2489, "mean_token_accuracy": 0.6797458678483963, "num_tokens": 566944348.0, "step": 3376 }, { "entropy": 1.6906745433807373, "epoch": 0.3709867897064074, "grad_norm": 0.6562890410423279, "learning_rate": 1.8884012533927056e-05, "loss": 1.2859, "mean_token_accuracy": 0.6720538040002187, "num_tokens": 567083801.0, "step": 3377 }, { "entropy": 1.7041854957739513, "epoch": 0.37109664661778036, "grad_norm": 0.6055631637573242, "learning_rate": 1.88832401710099e-05, "loss": 1.4635, "mean_token_accuracy": 0.652542233467102, "num_tokens": 567280452.0, "step": 3378 }, { "entropy": 1.7743679384390514, "epoch": 0.3712065035291533, "grad_norm": 0.7774965763092041, "learning_rate": 1.8882467558591744e-05, "loss": 1.3845, "mean_token_accuracy": 0.656991238395373, "num_tokens": 567482039.0, "step": 3379 }, { "entropy": 1.6754270593325298, "epoch": 0.37131636044052624, "grad_norm": 0.8339126110076904, "learning_rate": 1.8881694696697043e-05, "loss": 1.4245, "mean_token_accuracy": 0.6630517592032751, "num_tokens": 567662177.0, "step": 3380 }, { "entropy": 1.7435904542605083, "epoch": 0.3714262173518991, "grad_norm": 0.591063916683197, "learning_rate": 1.888092158535025e-05, "loss": 1.4174, "mean_token_accuracy": 0.6464981784423193, "num_tokens": 567855805.0, "step": 3381 }, { "entropy": 1.6988468567530315, "epoch": 0.37153607426327206, "grad_norm": 0.6442577242851257, "learning_rate": 1.8880148224575845e-05, "loss": 1.2865, "mean_token_accuracy": 0.6738250454266866, "num_tokens": 567992063.0, "step": 3382 }, { "entropy": 1.7117958962917328, "epoch": 0.371645931174645, "grad_norm": 0.663324236869812, "learning_rate": 1.8879374614398302e-05, "loss": 1.4257, "mean_token_accuracy": 0.6571160405874252, "num_tokens": 568189030.0, "step": 3383 }, { "entropy": 1.7384320894877117, "epoch": 0.37175578808601795, "grad_norm": 0.6145911812782288, "learning_rate": 1.8878600754842097e-05, "loss": 1.4549, "mean_token_accuracy": 0.6452573786179224, "num_tokens": 568426788.0, "step": 3384 }, { "entropy": 1.746810535589854, "epoch": 0.3718656449973909, "grad_norm": 0.7367507815361023, "learning_rate": 1.8877826645931735e-05, "loss": 1.4924, "mean_token_accuracy": 0.6439058085282644, "num_tokens": 568575666.0, "step": 3385 }, { "entropy": 1.7746360798676808, "epoch": 0.37197550190876383, "grad_norm": 0.6903072595596313, "learning_rate": 1.8877052287691703e-05, "loss": 1.3686, "mean_token_accuracy": 0.6597933818896612, "num_tokens": 568727841.0, "step": 3386 }, { "entropy": 1.7943654855092366, "epoch": 0.37208535882013677, "grad_norm": 0.7477875351905823, "learning_rate": 1.887627768014652e-05, "loss": 1.5748, "mean_token_accuracy": 0.6329491684834162, "num_tokens": 568951489.0, "step": 3387 }, { "entropy": 1.6621526181697845, "epoch": 0.3721952157315097, "grad_norm": 0.694236159324646, "learning_rate": 1.8875502823320695e-05, "loss": 1.3952, "mean_token_accuracy": 0.661028265953064, "num_tokens": 569116054.0, "step": 3388 }, { "entropy": 1.72781902551651, "epoch": 0.37230507264288265, "grad_norm": 0.7133349180221558, "learning_rate": 1.8874727717238756e-05, "loss": 1.4526, "mean_token_accuracy": 0.6458436846733093, "num_tokens": 569290226.0, "step": 3389 }, { "entropy": 1.7413524389266968, "epoch": 0.3724149295542556, "grad_norm": 0.6716554164886475, "learning_rate": 1.8873952361925233e-05, "loss": 1.3317, "mean_token_accuracy": 0.6584653854370117, "num_tokens": 569445157.0, "step": 3390 }, { "entropy": 1.676645815372467, "epoch": 0.37252478646562853, "grad_norm": 0.6800124645233154, "learning_rate": 1.8873176757404666e-05, "loss": 1.5388, "mean_token_accuracy": 0.6283597896496455, "num_tokens": 569724434.0, "step": 3391 }, { "entropy": 1.6982823014259338, "epoch": 0.3726346433770015, "grad_norm": 0.6546007394790649, "learning_rate": 1.8872400903701602e-05, "loss": 1.3469, "mean_token_accuracy": 0.6600322326024374, "num_tokens": 569872986.0, "step": 3392 }, { "entropy": 1.716494898001353, "epoch": 0.3727445002883744, "grad_norm": 0.711789608001709, "learning_rate": 1.8871624800840595e-05, "loss": 1.3059, "mean_token_accuracy": 0.664145290851593, "num_tokens": 569984629.0, "step": 3393 }, { "entropy": 1.723552147547404, "epoch": 0.37285435719974735, "grad_norm": 0.7755283713340759, "learning_rate": 1.887084844884621e-05, "loss": 1.3659, "mean_token_accuracy": 0.6677046219507853, "num_tokens": 570215230.0, "step": 3394 }, { "entropy": 1.7178972562154133, "epoch": 0.37296421411112024, "grad_norm": 0.7582644820213318, "learning_rate": 1.8870071847743023e-05, "loss": 1.395, "mean_token_accuracy": 0.65921584268411, "num_tokens": 570419958.0, "step": 3395 }, { "entropy": 1.7200748125712078, "epoch": 0.3730740710224932, "grad_norm": 0.9002476930618286, "learning_rate": 1.8869294997555604e-05, "loss": 1.3866, "mean_token_accuracy": 0.6646259625752767, "num_tokens": 570546912.0, "step": 3396 }, { "entropy": 1.6706381837526958, "epoch": 0.3731839279338661, "grad_norm": 0.820124626159668, "learning_rate": 1.8868517898308548e-05, "loss": 1.4343, "mean_token_accuracy": 0.6535915782054266, "num_tokens": 570738384.0, "step": 3397 }, { "entropy": 1.700315882762273, "epoch": 0.37329378484523906, "grad_norm": 0.6420239210128784, "learning_rate": 1.8867740550026443e-05, "loss": 1.4069, "mean_token_accuracy": 0.6586662083864212, "num_tokens": 570966515.0, "step": 3398 }, { "entropy": 1.7011422216892242, "epoch": 0.373403641756612, "grad_norm": 0.774379312992096, "learning_rate": 1.8866962952733898e-05, "loss": 1.4374, "mean_token_accuracy": 0.6431511243184408, "num_tokens": 571157353.0, "step": 3399 }, { "entropy": 1.7771065930525463, "epoch": 0.37351349866798494, "grad_norm": 0.6903269290924072, "learning_rate": 1.886618510645552e-05, "loss": 1.2827, "mean_token_accuracy": 0.6651173532009125, "num_tokens": 571271399.0, "step": 3400 }, { "entropy": 1.7323172986507416, "epoch": 0.3736233555793579, "grad_norm": 0.5979676246643066, "learning_rate": 1.8865407011215922e-05, "loss": 1.3776, "mean_token_accuracy": 0.6612506260474523, "num_tokens": 571452904.0, "step": 3401 }, { "entropy": 1.7338752647240956, "epoch": 0.3737332124907308, "grad_norm": 0.7211189270019531, "learning_rate": 1.8864628667039742e-05, "loss": 1.3743, "mean_token_accuracy": 0.658988431096077, "num_tokens": 571594720.0, "step": 3402 }, { "entropy": 1.6841518382231395, "epoch": 0.37384306940210377, "grad_norm": 0.5563689470291138, "learning_rate": 1.8863850073951608e-05, "loss": 1.3286, "mean_token_accuracy": 0.6501269191503525, "num_tokens": 571787515.0, "step": 3403 }, { "entropy": 1.7327661911646526, "epoch": 0.3739529263134767, "grad_norm": 0.6619580984115601, "learning_rate": 1.886307123197616e-05, "loss": 1.5635, "mean_token_accuracy": 0.6362739006678263, "num_tokens": 571974947.0, "step": 3404 }, { "entropy": 1.7072784701983135, "epoch": 0.37406278322484965, "grad_norm": 0.6384603381156921, "learning_rate": 1.8862292141138053e-05, "loss": 1.3928, "mean_token_accuracy": 0.6528366059064865, "num_tokens": 572153036.0, "step": 3405 }, { "entropy": 1.7138684292634327, "epoch": 0.3741726401362226, "grad_norm": 0.6947295069694519, "learning_rate": 1.8861512801461943e-05, "loss": 1.3127, "mean_token_accuracy": 0.6598065594832102, "num_tokens": 572292952.0, "step": 3406 }, { "entropy": 1.7150700986385345, "epoch": 0.37428249704759553, "grad_norm": 0.711796760559082, "learning_rate": 1.8860733212972497e-05, "loss": 1.3518, "mean_token_accuracy": 0.6606613347927729, "num_tokens": 572445917.0, "step": 3407 }, { "entropy": 1.7485672036806743, "epoch": 0.37439235395896847, "grad_norm": 0.6744566559791565, "learning_rate": 1.8859953375694383e-05, "loss": 1.4157, "mean_token_accuracy": 0.6547067513068517, "num_tokens": 572594708.0, "step": 3408 }, { "entropy": 1.7242934902509053, "epoch": 0.37450221087034136, "grad_norm": 0.7786602973937988, "learning_rate": 1.8859173289652288e-05, "loss": 1.4001, "mean_token_accuracy": 0.6599243432283401, "num_tokens": 572783684.0, "step": 3409 }, { "entropy": 1.606506069501241, "epoch": 0.3746120677817143, "grad_norm": 0.6144716143608093, "learning_rate": 1.88583929548709e-05, "loss": 1.3857, "mean_token_accuracy": 0.6582548320293427, "num_tokens": 573015722.0, "step": 3410 }, { "entropy": 1.774994472662608, "epoch": 0.37472192469308724, "grad_norm": 0.6956934332847595, "learning_rate": 1.8857612371374914e-05, "loss": 1.4739, "mean_token_accuracy": 0.6409755696853002, "num_tokens": 573214915.0, "step": 3411 }, { "entropy": 1.7437300086021423, "epoch": 0.3748317816044602, "grad_norm": 0.7915884852409363, "learning_rate": 1.885683153918904e-05, "loss": 1.4826, "mean_token_accuracy": 0.6374993075927099, "num_tokens": 573455789.0, "step": 3412 }, { "entropy": 1.663759668668111, "epoch": 0.3749416385158331, "grad_norm": 0.7212685942649841, "learning_rate": 1.8856050458337985e-05, "loss": 1.3996, "mean_token_accuracy": 0.6592791775862376, "num_tokens": 573633232.0, "step": 3413 }, { "entropy": 1.7127246956030528, "epoch": 0.37505149542720606, "grad_norm": 0.5660611391067505, "learning_rate": 1.885526912884648e-05, "loss": 1.4225, "mean_token_accuracy": 0.6511427859465281, "num_tokens": 573834729.0, "step": 3414 }, { "entropy": 1.7536275585492451, "epoch": 0.375161352338579, "grad_norm": 0.6715665459632874, "learning_rate": 1.885448755073924e-05, "loss": 1.2663, "mean_token_accuracy": 0.6831430196762085, "num_tokens": 573966929.0, "step": 3415 }, { "entropy": 1.6836529274781544, "epoch": 0.37527120924995194, "grad_norm": 0.6396788358688354, "learning_rate": 1.8853705724041008e-05, "loss": 1.5899, "mean_token_accuracy": 0.6472560266653696, "num_tokens": 574171428.0, "step": 3416 }, { "entropy": 1.701483239730199, "epoch": 0.3753810661613249, "grad_norm": 0.6624711155891418, "learning_rate": 1.8852923648776534e-05, "loss": 1.3481, "mean_token_accuracy": 0.6699345856904984, "num_tokens": 574356626.0, "step": 3417 }, { "entropy": 1.7148587902386982, "epoch": 0.3754909230726978, "grad_norm": 0.6820365786552429, "learning_rate": 1.885214132497056e-05, "loss": 1.3628, "mean_token_accuracy": 0.6586700628201166, "num_tokens": 574479005.0, "step": 3418 }, { "entropy": 1.6891990701357524, "epoch": 0.37560077998407077, "grad_norm": 0.8026529550552368, "learning_rate": 1.8851358752647855e-05, "loss": 1.483, "mean_token_accuracy": 0.6601535677909851, "num_tokens": 574600042.0, "step": 3419 }, { "entropy": 1.6742752293745677, "epoch": 0.3757106368954437, "grad_norm": 0.7593013048171997, "learning_rate": 1.885057593183318e-05, "loss": 1.2939, "mean_token_accuracy": 0.6685616920391718, "num_tokens": 574733331.0, "step": 3420 }, { "entropy": 1.6507653097311656, "epoch": 0.37582049380681665, "grad_norm": 0.5844452977180481, "learning_rate": 1.8849792862551318e-05, "loss": 1.3512, "mean_token_accuracy": 0.6571490665276846, "num_tokens": 574935739.0, "step": 3421 }, { "entropy": 1.739424576361974, "epoch": 0.37593035071818953, "grad_norm": 0.6515421867370605, "learning_rate": 1.8849009544827048e-05, "loss": 1.5581, "mean_token_accuracy": 0.6339648912350336, "num_tokens": 575141640.0, "step": 3422 }, { "entropy": 1.7240214546521504, "epoch": 0.3760402076295625, "grad_norm": 0.8800962567329407, "learning_rate": 1.8848225978685163e-05, "loss": 1.4257, "mean_token_accuracy": 0.6527638087670008, "num_tokens": 575274148.0, "step": 3423 }, { "entropy": 1.728806068499883, "epoch": 0.3761500645409354, "grad_norm": 0.6778604388237, "learning_rate": 1.884744216415046e-05, "loss": 1.4071, "mean_token_accuracy": 0.6551901549100876, "num_tokens": 575411261.0, "step": 3424 }, { "entropy": 1.6978506247202556, "epoch": 0.37625992145230835, "grad_norm": 0.7038581967353821, "learning_rate": 1.8846658101247748e-05, "loss": 1.3712, "mean_token_accuracy": 0.6615935812393824, "num_tokens": 575587362.0, "step": 3425 }, { "entropy": 1.6618886888027191, "epoch": 0.3763697783636813, "grad_norm": 0.6956599354743958, "learning_rate": 1.8845873790001848e-05, "loss": 1.308, "mean_token_accuracy": 0.6621120274066925, "num_tokens": 575763635.0, "step": 3426 }, { "entropy": 1.7019036809603374, "epoch": 0.37647963527505424, "grad_norm": 0.7013808488845825, "learning_rate": 1.8845089230437573e-05, "loss": 1.3993, "mean_token_accuracy": 0.6591120461622874, "num_tokens": 575906834.0, "step": 3427 }, { "entropy": 1.6568001906077068, "epoch": 0.3765894921864272, "grad_norm": 0.654579758644104, "learning_rate": 1.8844304422579756e-05, "loss": 1.4497, "mean_token_accuracy": 0.6660924007495245, "num_tokens": 576101135.0, "step": 3428 }, { "entropy": 1.734635551770528, "epoch": 0.3766993490978001, "grad_norm": 0.7906395792961121, "learning_rate": 1.884351936645325e-05, "loss": 1.3475, "mean_token_accuracy": 0.6772653212149938, "num_tokens": 576231678.0, "step": 3429 }, { "entropy": 1.6919464965661366, "epoch": 0.37680920600917306, "grad_norm": 0.7029792666435242, "learning_rate": 1.8842734062082878e-05, "loss": 1.5751, "mean_token_accuracy": 0.6377601400017738, "num_tokens": 576394578.0, "step": 3430 }, { "entropy": 1.74019859234492, "epoch": 0.376919062920546, "grad_norm": 0.6424586772918701, "learning_rate": 1.8841948509493517e-05, "loss": 1.4304, "mean_token_accuracy": 0.6536309023698171, "num_tokens": 576599050.0, "step": 3431 }, { "entropy": 1.7083908418814342, "epoch": 0.37702891983191894, "grad_norm": 0.6925437450408936, "learning_rate": 1.8841162708710015e-05, "loss": 1.4701, "mean_token_accuracy": 0.6461886862913767, "num_tokens": 576788717.0, "step": 3432 }, { "entropy": 1.7433116436004639, "epoch": 0.3771387767432919, "grad_norm": 0.6714274287223816, "learning_rate": 1.8840376659757247e-05, "loss": 1.4304, "mean_token_accuracy": 0.6540505588054657, "num_tokens": 576958988.0, "step": 3433 }, { "entropy": 1.765092372894287, "epoch": 0.3772486336546648, "grad_norm": 0.7673705220222473, "learning_rate": 1.8839590362660088e-05, "loss": 1.5501, "mean_token_accuracy": 0.6285836895306905, "num_tokens": 577128512.0, "step": 3434 }, { "entropy": 1.7895864645640056, "epoch": 0.37735849056603776, "grad_norm": 0.6441873908042908, "learning_rate": 1.8838803817443428e-05, "loss": 1.5039, "mean_token_accuracy": 0.6534687529007593, "num_tokens": 577271620.0, "step": 3435 }, { "entropy": 1.6606577336788177, "epoch": 0.37746834747741065, "grad_norm": 0.6333370804786682, "learning_rate": 1.8838017024132163e-05, "loss": 1.4591, "mean_token_accuracy": 0.6549960821866989, "num_tokens": 577514456.0, "step": 3436 }, { "entropy": 1.6238328516483307, "epoch": 0.3775782043887836, "grad_norm": 0.6960654854774475, "learning_rate": 1.883722998275119e-05, "loss": 1.5595, "mean_token_accuracy": 0.6362607727448145, "num_tokens": 577746103.0, "step": 3437 }, { "entropy": 1.7373960614204407, "epoch": 0.37768806130015653, "grad_norm": 0.7334326505661011, "learning_rate": 1.8836442693325415e-05, "loss": 1.2604, "mean_token_accuracy": 0.6687419414520264, "num_tokens": 577906176.0, "step": 3438 }, { "entropy": 1.651681274175644, "epoch": 0.37779791821152947, "grad_norm": 0.6971881985664368, "learning_rate": 1.8835655155879765e-05, "loss": 1.3592, "mean_token_accuracy": 0.6670136153697968, "num_tokens": 578070962.0, "step": 3439 }, { "entropy": 1.7302599747975667, "epoch": 0.3779077751229024, "grad_norm": 0.6110210418701172, "learning_rate": 1.8834867370439158e-05, "loss": 1.3591, "mean_token_accuracy": 0.6587302833795547, "num_tokens": 578248089.0, "step": 3440 }, { "entropy": 1.7012074490388234, "epoch": 0.37801763203427535, "grad_norm": 0.5965494513511658, "learning_rate": 1.883407933702853e-05, "loss": 1.4663, "mean_token_accuracy": 0.6482534607251486, "num_tokens": 578431091.0, "step": 3441 }, { "entropy": 1.7487133642037709, "epoch": 0.3781274889456483, "grad_norm": 0.7280961275100708, "learning_rate": 1.8833291055672823e-05, "loss": 1.3637, "mean_token_accuracy": 0.6615277727444967, "num_tokens": 578597636.0, "step": 3442 }, { "entropy": 1.6576574742794037, "epoch": 0.37823734585702123, "grad_norm": 0.7443664073944092, "learning_rate": 1.883250252639698e-05, "loss": 1.508, "mean_token_accuracy": 0.6484198073546091, "num_tokens": 578781033.0, "step": 3443 }, { "entropy": 1.7675111095110576, "epoch": 0.3783472027683942, "grad_norm": 0.818128228187561, "learning_rate": 1.883171374922596e-05, "loss": 1.4656, "mean_token_accuracy": 0.6324175000190735, "num_tokens": 578991493.0, "step": 3444 }, { "entropy": 1.7102212806542714, "epoch": 0.3784570596797671, "grad_norm": 0.5803321599960327, "learning_rate": 1.8830924724184735e-05, "loss": 1.5241, "mean_token_accuracy": 0.6512432843446732, "num_tokens": 579180046.0, "step": 3445 }, { "entropy": 1.6907643973827362, "epoch": 0.37856691659114006, "grad_norm": 0.6002610921859741, "learning_rate": 1.8830135451298267e-05, "loss": 1.3832, "mean_token_accuracy": 0.6592213263114294, "num_tokens": 579369176.0, "step": 3446 }, { "entropy": 1.6708094378312428, "epoch": 0.378676773502513, "grad_norm": 0.7741876840591431, "learning_rate": 1.882934593059154e-05, "loss": 1.3733, "mean_token_accuracy": 0.6698874334494272, "num_tokens": 579516065.0, "step": 3447 }, { "entropy": 1.6863604684670765, "epoch": 0.37878663041388594, "grad_norm": 0.5927191972732544, "learning_rate": 1.8828556162089544e-05, "loss": 1.3393, "mean_token_accuracy": 0.6622706055641174, "num_tokens": 579667258.0, "step": 3448 }, { "entropy": 1.6480213006337483, "epoch": 0.3788964873252588, "grad_norm": 0.6203927397727966, "learning_rate": 1.882776614581727e-05, "loss": 1.3284, "mean_token_accuracy": 0.6719114383061727, "num_tokens": 579833662.0, "step": 3449 }, { "entropy": 1.6983853876590729, "epoch": 0.37900634423663176, "grad_norm": 0.7154219150543213, "learning_rate": 1.882697588179973e-05, "loss": 1.2398, "mean_token_accuracy": 0.6767720828453699, "num_tokens": 579961516.0, "step": 3450 }, { "entropy": 1.6762764751911163, "epoch": 0.3791162011480047, "grad_norm": 0.8314480781555176, "learning_rate": 1.882618537006193e-05, "loss": 1.331, "mean_token_accuracy": 0.6671194086472193, "num_tokens": 580068709.0, "step": 3451 }, { "entropy": 1.6201636294523876, "epoch": 0.37922605805937765, "grad_norm": 0.666167676448822, "learning_rate": 1.8825394610628885e-05, "loss": 1.2919, "mean_token_accuracy": 0.6708478977282842, "num_tokens": 580270556.0, "step": 3452 }, { "entropy": 1.6574621001879375, "epoch": 0.3793359149707506, "grad_norm": 0.6271048188209534, "learning_rate": 1.882460360352563e-05, "loss": 1.4808, "mean_token_accuracy": 0.6487952421108881, "num_tokens": 580449275.0, "step": 3453 }, { "entropy": 1.6838637391726177, "epoch": 0.37944577188212353, "grad_norm": 0.7442733645439148, "learning_rate": 1.8823812348777194e-05, "loss": 1.4904, "mean_token_accuracy": 0.6588171670834223, "num_tokens": 580589870.0, "step": 3454 }, { "entropy": 1.7033161123593648, "epoch": 0.37955562879349647, "grad_norm": 0.6353382468223572, "learning_rate": 1.8823020846408624e-05, "loss": 1.3264, "mean_token_accuracy": 0.6653269082307816, "num_tokens": 580750981.0, "step": 3455 }, { "entropy": 1.6577220559120178, "epoch": 0.3796654857048694, "grad_norm": 0.7376974821090698, "learning_rate": 1.8822229096444974e-05, "loss": 1.3135, "mean_token_accuracy": 0.6758194168408712, "num_tokens": 580903947.0, "step": 3456 }, { "entropy": 1.713914414246877, "epoch": 0.37977534261624235, "grad_norm": 0.6808127164840698, "learning_rate": 1.882143709891129e-05, "loss": 1.4825, "mean_token_accuracy": 0.6571186631917953, "num_tokens": 581099884.0, "step": 3457 }, { "entropy": 1.7346510489781697, "epoch": 0.3798851995276153, "grad_norm": 0.8213891983032227, "learning_rate": 1.882064485383265e-05, "loss": 1.656, "mean_token_accuracy": 0.6358093395829201, "num_tokens": 581266800.0, "step": 3458 }, { "entropy": 1.6873709658781688, "epoch": 0.37999505643898823, "grad_norm": 0.6150254607200623, "learning_rate": 1.8819852361234122e-05, "loss": 1.3555, "mean_token_accuracy": 0.6584896544615427, "num_tokens": 581444967.0, "step": 3459 }, { "entropy": 1.65604763229688, "epoch": 0.3801049133503612, "grad_norm": 0.6561197638511658, "learning_rate": 1.8819059621140795e-05, "loss": 1.2817, "mean_token_accuracy": 0.6694964021444321, "num_tokens": 581564226.0, "step": 3460 }, { "entropy": 1.727463076512019, "epoch": 0.3802147702617341, "grad_norm": 0.8681771159172058, "learning_rate": 1.8818266633577754e-05, "loss": 1.505, "mean_token_accuracy": 0.633656973640124, "num_tokens": 581760317.0, "step": 3461 }, { "entropy": 1.6862981617450714, "epoch": 0.38032462717310705, "grad_norm": 0.7154708504676819, "learning_rate": 1.8817473398570093e-05, "loss": 1.3987, "mean_token_accuracy": 0.6559828768173853, "num_tokens": 581924558.0, "step": 3462 }, { "entropy": 1.7164887289206188, "epoch": 0.38043448408447994, "grad_norm": 0.6370391249656677, "learning_rate": 1.8816679916142926e-05, "loss": 1.4157, "mean_token_accuracy": 0.6538357237974802, "num_tokens": 582107450.0, "step": 3463 }, { "entropy": 1.7019615570704143, "epoch": 0.3805443409958529, "grad_norm": 0.6402043700218201, "learning_rate": 1.881588618632136e-05, "loss": 1.3387, "mean_token_accuracy": 0.6574279069900513, "num_tokens": 582239856.0, "step": 3464 }, { "entropy": 1.7193419933319092, "epoch": 0.3806541979072258, "grad_norm": 0.6499764323234558, "learning_rate": 1.8815092209130517e-05, "loss": 1.3768, "mean_token_accuracy": 0.6483793556690216, "num_tokens": 582387148.0, "step": 3465 }, { "entropy": 1.7380633453528087, "epoch": 0.38076405481859876, "grad_norm": 0.637168824672699, "learning_rate": 1.881429798459553e-05, "loss": 1.549, "mean_token_accuracy": 0.6460103044907252, "num_tokens": 582581215.0, "step": 3466 }, { "entropy": 1.7223777274290721, "epoch": 0.3808739117299717, "grad_norm": 0.8232377171516418, "learning_rate": 1.881350351274153e-05, "loss": 1.4829, "mean_token_accuracy": 0.6414266675710678, "num_tokens": 582737460.0, "step": 3467 }, { "entropy": 1.715603917837143, "epoch": 0.38098376864134464, "grad_norm": 0.7367724776268005, "learning_rate": 1.8812708793593665e-05, "loss": 1.4147, "mean_token_accuracy": 0.6520026822884878, "num_tokens": 582887078.0, "step": 3468 }, { "entropy": 1.6945099035898845, "epoch": 0.3810936255527176, "grad_norm": 0.7187338471412659, "learning_rate": 1.8811913827177086e-05, "loss": 1.3723, "mean_token_accuracy": 0.6605968276659647, "num_tokens": 583095650.0, "step": 3469 }, { "entropy": 1.7937320371468861, "epoch": 0.3812034824640905, "grad_norm": 0.7050454020500183, "learning_rate": 1.8811118613516958e-05, "loss": 1.3214, "mean_token_accuracy": 0.6556558360656103, "num_tokens": 583233702.0, "step": 3470 }, { "entropy": 1.739583859841029, "epoch": 0.38131333937546347, "grad_norm": 4.8021721839904785, "learning_rate": 1.8810323152638442e-05, "loss": 1.2322, "mean_token_accuracy": 0.657580296198527, "num_tokens": 583395812.0, "step": 3471 }, { "entropy": 1.6864960491657257, "epoch": 0.3814231962868364, "grad_norm": 0.767795979976654, "learning_rate": 1.8809527444566724e-05, "loss": 1.4271, "mean_token_accuracy": 0.6574305593967438, "num_tokens": 583581686.0, "step": 3472 }, { "entropy": 1.7561549345652263, "epoch": 0.38153305319820935, "grad_norm": 0.7138762474060059, "learning_rate": 1.8808731489326976e-05, "loss": 1.4787, "mean_token_accuracy": 0.6449342767397562, "num_tokens": 583820312.0, "step": 3473 }, { "entropy": 1.7409043808778126, "epoch": 0.3816429101095823, "grad_norm": 0.6710432171821594, "learning_rate": 1.8807935286944397e-05, "loss": 1.5292, "mean_token_accuracy": 0.6325879693031311, "num_tokens": 584045229.0, "step": 3474 }, { "entropy": 1.7295817732810974, "epoch": 0.38175276702095523, "grad_norm": 0.7256639003753662, "learning_rate": 1.880713883744418e-05, "loss": 1.5294, "mean_token_accuracy": 0.6477811336517334, "num_tokens": 584199841.0, "step": 3475 }, { "entropy": 1.6922647754351299, "epoch": 0.3818626239323281, "grad_norm": 0.7137476801872253, "learning_rate": 1.8806342140851545e-05, "loss": 1.2719, "mean_token_accuracy": 0.6746822595596313, "num_tokens": 584302842.0, "step": 3476 }, { "entropy": 1.741408884525299, "epoch": 0.38197248084370106, "grad_norm": 0.6547417044639587, "learning_rate": 1.880554519719169e-05, "loss": 1.4459, "mean_token_accuracy": 0.6518658250570297, "num_tokens": 584533354.0, "step": 3477 }, { "entropy": 1.6792203883330028, "epoch": 0.382082337755074, "grad_norm": 0.6794640421867371, "learning_rate": 1.8804748006489852e-05, "loss": 1.4004, "mean_token_accuracy": 0.6527031362056732, "num_tokens": 584699604.0, "step": 3478 }, { "entropy": 1.7532505889733632, "epoch": 0.38219219466644694, "grad_norm": 0.7683124542236328, "learning_rate": 1.880395056877126e-05, "loss": 1.4457, "mean_token_accuracy": 0.6515724509954453, "num_tokens": 584869532.0, "step": 3479 }, { "entropy": 1.7253733774026234, "epoch": 0.3823020515778199, "grad_norm": 0.6482527256011963, "learning_rate": 1.880315288406114e-05, "loss": 1.5189, "mean_token_accuracy": 0.6325220863024393, "num_tokens": 585042675.0, "step": 3480 }, { "entropy": 1.7039113640785217, "epoch": 0.3824119084891928, "grad_norm": 0.6514295935630798, "learning_rate": 1.8802354952384753e-05, "loss": 1.4191, "mean_token_accuracy": 0.6613591512044271, "num_tokens": 585205823.0, "step": 3481 }, { "entropy": 1.6615497569243114, "epoch": 0.38252176540056576, "grad_norm": 0.7592434883117676, "learning_rate": 1.8801556773767348e-05, "loss": 1.2488, "mean_token_accuracy": 0.6740445991357168, "num_tokens": 585359134.0, "step": 3482 }, { "entropy": 1.730480541785558, "epoch": 0.3826316223119387, "grad_norm": 0.7494388818740845, "learning_rate": 1.8800758348234184e-05, "loss": 1.4356, "mean_token_accuracy": 0.6558689872423807, "num_tokens": 585562435.0, "step": 3483 }, { "entropy": 1.7431610922018688, "epoch": 0.38274147922331164, "grad_norm": 0.6161172986030579, "learning_rate": 1.8799959675810537e-05, "loss": 1.3557, "mean_token_accuracy": 0.655944844086965, "num_tokens": 585766828.0, "step": 3484 }, { "entropy": 1.7082973023255665, "epoch": 0.3828513361346846, "grad_norm": 1.0127819776535034, "learning_rate": 1.8799160756521678e-05, "loss": 1.2512, "mean_token_accuracy": 0.6891203025976816, "num_tokens": 585899197.0, "step": 3485 }, { "entropy": 1.7385966678460438, "epoch": 0.3829611930460575, "grad_norm": 0.7442635893821716, "learning_rate": 1.8798361590392894e-05, "loss": 1.4916, "mean_token_accuracy": 0.6403134316205978, "num_tokens": 586067153.0, "step": 3486 }, { "entropy": 1.72092600663503, "epoch": 0.38307104995743047, "grad_norm": 0.6867280006408691, "learning_rate": 1.8797562177449483e-05, "loss": 1.3125, "mean_token_accuracy": 0.6737407147884369, "num_tokens": 586200744.0, "step": 3487 }, { "entropy": 1.6985297699769337, "epoch": 0.3831809068688034, "grad_norm": 0.6545002460479736, "learning_rate": 1.879676251771674e-05, "loss": 1.5108, "mean_token_accuracy": 0.632008487979571, "num_tokens": 586382656.0, "step": 3488 }, { "entropy": 1.665940374135971, "epoch": 0.38329076378017635, "grad_norm": 0.7648383975028992, "learning_rate": 1.879596261121998e-05, "loss": 1.4246, "mean_token_accuracy": 0.6555665085713068, "num_tokens": 586614246.0, "step": 3489 }, { "entropy": 1.6751268605391185, "epoch": 0.38340062069154923, "grad_norm": 0.6534166932106018, "learning_rate": 1.8795162457984516e-05, "loss": 1.4129, "mean_token_accuracy": 0.6518707672754923, "num_tokens": 586793947.0, "step": 3490 }, { "entropy": 1.6860232551892598, "epoch": 0.3835104776029222, "grad_norm": 0.697482705116272, "learning_rate": 1.8794362058035665e-05, "loss": 1.1756, "mean_token_accuracy": 0.6882057338953018, "num_tokens": 586907081.0, "step": 3491 }, { "entropy": 1.6990710695584614, "epoch": 0.3836203345142951, "grad_norm": 0.559978187084198, "learning_rate": 1.879356141139878e-05, "loss": 1.5079, "mean_token_accuracy": 0.6403456131617228, "num_tokens": 587108420.0, "step": 3492 }, { "entropy": 1.7045027613639832, "epoch": 0.38373019142566805, "grad_norm": 0.6749347448348999, "learning_rate": 1.879276051809918e-05, "loss": 1.3578, "mean_token_accuracy": 0.6670193572839102, "num_tokens": 587270046.0, "step": 3493 }, { "entropy": 1.6886097590128581, "epoch": 0.383840048337041, "grad_norm": 0.7157772779464722, "learning_rate": 1.879195937816222e-05, "loss": 1.3459, "mean_token_accuracy": 0.6764027178287506, "num_tokens": 587451691.0, "step": 3494 }, { "entropy": 1.696417550245921, "epoch": 0.38394990524841394, "grad_norm": 0.7377708554267883, "learning_rate": 1.8791157991613258e-05, "loss": 1.506, "mean_token_accuracy": 0.6467360059420267, "num_tokens": 587615528.0, "step": 3495 }, { "entropy": 1.7199425995349884, "epoch": 0.3840597621597869, "grad_norm": 0.7708967328071594, "learning_rate": 1.879035635847766e-05, "loss": 1.4605, "mean_token_accuracy": 0.6508774061997732, "num_tokens": 587779213.0, "step": 3496 }, { "entropy": 1.719101478656133, "epoch": 0.3841696190711598, "grad_norm": 0.7188828587532043, "learning_rate": 1.878955447878079e-05, "loss": 1.6502, "mean_token_accuracy": 0.6291324868798256, "num_tokens": 587963491.0, "step": 3497 }, { "entropy": 1.6726809938748677, "epoch": 0.38427947598253276, "grad_norm": 0.8379467725753784, "learning_rate": 1.8788752352548032e-05, "loss": 1.4745, "mean_token_accuracy": 0.6412243594725927, "num_tokens": 588138029.0, "step": 3498 }, { "entropy": 1.7113063037395477, "epoch": 0.3843893328939057, "grad_norm": 0.6474940180778503, "learning_rate": 1.8787949979804773e-05, "loss": 1.4364, "mean_token_accuracy": 0.6462200383345286, "num_tokens": 588299515.0, "step": 3499 }, { "entropy": 1.7047918836275737, "epoch": 0.38449918980527864, "grad_norm": 0.8152151703834534, "learning_rate": 1.8787147360576407e-05, "loss": 1.421, "mean_token_accuracy": 0.6530329436063766, "num_tokens": 588460227.0, "step": 3500 }, { "entropy": 1.68595157066981, "epoch": 0.3846090467166516, "grad_norm": 0.6358811855316162, "learning_rate": 1.8786344494888334e-05, "loss": 1.3389, "mean_token_accuracy": 0.6701284398635229, "num_tokens": 588603997.0, "step": 3501 }, { "entropy": 1.6604685087998707, "epoch": 0.3847189036280245, "grad_norm": 0.6849839091300964, "learning_rate": 1.8785541382765963e-05, "loss": 1.2876, "mean_token_accuracy": 0.6715737382570902, "num_tokens": 588756310.0, "step": 3502 }, { "entropy": 1.7432369391123455, "epoch": 0.3848287605393974, "grad_norm": 0.7382224202156067, "learning_rate": 1.8784738024234724e-05, "loss": 1.335, "mean_token_accuracy": 0.6696681876977285, "num_tokens": 588952647.0, "step": 3503 }, { "entropy": 1.7226824462413788, "epoch": 0.38493861745077035, "grad_norm": 0.7350408434867859, "learning_rate": 1.8783934419320026e-05, "loss": 1.4502, "mean_token_accuracy": 0.652747223774592, "num_tokens": 589164790.0, "step": 3504 }, { "entropy": 1.7092638711134593, "epoch": 0.3850484743621433, "grad_norm": 0.7419540286064148, "learning_rate": 1.8783130568047317e-05, "loss": 1.2935, "mean_token_accuracy": 0.6710209945837656, "num_tokens": 589299732.0, "step": 3505 }, { "entropy": 1.7275305191675823, "epoch": 0.38515833127351623, "grad_norm": 0.6263718008995056, "learning_rate": 1.878232647044203e-05, "loss": 1.395, "mean_token_accuracy": 0.6440123667319616, "num_tokens": 589477221.0, "step": 3506 }, { "entropy": 1.691060076157252, "epoch": 0.38526818818488917, "grad_norm": 0.6086033582687378, "learning_rate": 1.8781522126529615e-05, "loss": 1.3346, "mean_token_accuracy": 0.6638441930214564, "num_tokens": 589632490.0, "step": 3507 }, { "entropy": 1.7170771658420563, "epoch": 0.3853780450962621, "grad_norm": 0.6135653853416443, "learning_rate": 1.8780717536335534e-05, "loss": 1.3926, "mean_token_accuracy": 0.6520104904969534, "num_tokens": 589837072.0, "step": 3508 }, { "entropy": 1.7135487794876099, "epoch": 0.38548790200763505, "grad_norm": 0.8644580841064453, "learning_rate": 1.877991269988525e-05, "loss": 1.5439, "mean_token_accuracy": 0.6479515383640925, "num_tokens": 590037145.0, "step": 3509 }, { "entropy": 1.656055251757304, "epoch": 0.385597758919008, "grad_norm": 0.6589810252189636, "learning_rate": 1.8779107617204232e-05, "loss": 1.3376, "mean_token_accuracy": 0.6675926595926285, "num_tokens": 590181728.0, "step": 3510 }, { "entropy": 1.6446336209774017, "epoch": 0.38570761583038093, "grad_norm": 0.7715820074081421, "learning_rate": 1.8778302288317965e-05, "loss": 1.42, "mean_token_accuracy": 0.6649827063083649, "num_tokens": 590345528.0, "step": 3511 }, { "entropy": 1.7644979854424794, "epoch": 0.3858174727417539, "grad_norm": 0.6795924305915833, "learning_rate": 1.8777496713251937e-05, "loss": 1.5448, "mean_token_accuracy": 0.6330472528934479, "num_tokens": 590543297.0, "step": 3512 }, { "entropy": 1.7632849017779033, "epoch": 0.3859273296531268, "grad_norm": 0.8066057562828064, "learning_rate": 1.8776690892031642e-05, "loss": 1.2179, "mean_token_accuracy": 0.6771250069141388, "num_tokens": 590649907.0, "step": 3513 }, { "entropy": 1.6934345563252766, "epoch": 0.38603718656449976, "grad_norm": 0.6281071901321411, "learning_rate": 1.877588482468258e-05, "loss": 1.3695, "mean_token_accuracy": 0.6520146181186041, "num_tokens": 590816034.0, "step": 3514 }, { "entropy": 1.711225817600886, "epoch": 0.3861470434758727, "grad_norm": 0.8094905614852905, "learning_rate": 1.8775078511230275e-05, "loss": 1.3598, "mean_token_accuracy": 0.6714527507623037, "num_tokens": 590967779.0, "step": 3515 }, { "entropy": 1.7611852586269379, "epoch": 0.38625690038724564, "grad_norm": 0.7497817873954773, "learning_rate": 1.877427195170023e-05, "loss": 1.4466, "mean_token_accuracy": 0.6378799378871918, "num_tokens": 591115206.0, "step": 3516 }, { "entropy": 1.7012092570463817, "epoch": 0.3863667572986185, "grad_norm": 0.7083910703659058, "learning_rate": 1.8773465146117988e-05, "loss": 1.3734, "mean_token_accuracy": 0.6618959506352743, "num_tokens": 591263829.0, "step": 3517 }, { "entropy": 1.697861025730769, "epoch": 0.38647661420999146, "grad_norm": 0.6678640842437744, "learning_rate": 1.8772658094509072e-05, "loss": 1.419, "mean_token_accuracy": 0.6634480754534403, "num_tokens": 591459207.0, "step": 3518 }, { "entropy": 1.7712201476097107, "epoch": 0.3865864711213644, "grad_norm": 0.7492165565490723, "learning_rate": 1.8771850796899034e-05, "loss": 1.2713, "mean_token_accuracy": 0.6765512228012085, "num_tokens": 591593988.0, "step": 3519 }, { "entropy": 1.6945832471052806, "epoch": 0.38669632803273735, "grad_norm": 0.7029894590377808, "learning_rate": 1.877104325331342e-05, "loss": 1.4178, "mean_token_accuracy": 0.6622582574685415, "num_tokens": 591758058.0, "step": 3520 }, { "entropy": 1.6844372848669689, "epoch": 0.3868061849441103, "grad_norm": 0.6502472758293152, "learning_rate": 1.8770235463777784e-05, "loss": 1.3107, "mean_token_accuracy": 0.6579713672399521, "num_tokens": 591902067.0, "step": 3521 }, { "entropy": 1.698314368724823, "epoch": 0.38691604185548323, "grad_norm": 0.8369100093841553, "learning_rate": 1.87694274283177e-05, "loss": 1.3315, "mean_token_accuracy": 0.6667650043964386, "num_tokens": 592026730.0, "step": 3522 }, { "entropy": 1.6990590989589691, "epoch": 0.38702589876685617, "grad_norm": 0.757598876953125, "learning_rate": 1.8768619146958736e-05, "loss": 1.3108, "mean_token_accuracy": 0.6711003084977468, "num_tokens": 592159351.0, "step": 3523 }, { "entropy": 1.6836991906166077, "epoch": 0.3871357556782291, "grad_norm": 0.6812123656272888, "learning_rate": 1.8767810619726486e-05, "loss": 1.5014, "mean_token_accuracy": 0.6574613849322001, "num_tokens": 592313020.0, "step": 3524 }, { "entropy": 1.6621138453483582, "epoch": 0.38724561258960205, "grad_norm": 0.7579445242881775, "learning_rate": 1.8767001846646522e-05, "loss": 1.3334, "mean_token_accuracy": 0.6703773736953735, "num_tokens": 592465715.0, "step": 3525 }, { "entropy": 1.7132685979207356, "epoch": 0.387355469500975, "grad_norm": 0.683297872543335, "learning_rate": 1.876619282774445e-05, "loss": 1.5687, "mean_token_accuracy": 0.6472751895586649, "num_tokens": 592714454.0, "step": 3526 }, { "entropy": 1.6917611062526703, "epoch": 0.38746532641234793, "grad_norm": 0.7978048920631409, "learning_rate": 1.876538356304588e-05, "loss": 1.4326, "mean_token_accuracy": 0.6595585942268372, "num_tokens": 592855814.0, "step": 3527 }, { "entropy": 1.7584986786047618, "epoch": 0.3875751833237209, "grad_norm": 0.6933776140213013, "learning_rate": 1.876457405257641e-05, "loss": 1.2829, "mean_token_accuracy": 0.6754846076170603, "num_tokens": 592996822.0, "step": 3528 }, { "entropy": 1.679332544406255, "epoch": 0.3876850402350938, "grad_norm": 0.6294096112251282, "learning_rate": 1.8763764296361676e-05, "loss": 1.2627, "mean_token_accuracy": 0.6760277499755224, "num_tokens": 593154964.0, "step": 3529 }, { "entropy": 1.704353282848994, "epoch": 0.3877948971464667, "grad_norm": 0.65788334608078, "learning_rate": 1.8762954294427298e-05, "loss": 1.41, "mean_token_accuracy": 0.6482875148455302, "num_tokens": 593295801.0, "step": 3530 }, { "entropy": 1.7058403293291728, "epoch": 0.38790475405783964, "grad_norm": 0.7336824536323547, "learning_rate": 1.8762144046798917e-05, "loss": 1.4683, "mean_token_accuracy": 0.6580928464730581, "num_tokens": 593505141.0, "step": 3531 }, { "entropy": 1.7358074982961018, "epoch": 0.3880146109692126, "grad_norm": 0.7603702545166016, "learning_rate": 1.8761333553502173e-05, "loss": 1.3445, "mean_token_accuracy": 0.6589676340421041, "num_tokens": 593635482.0, "step": 3532 }, { "entropy": 1.7584581673145294, "epoch": 0.3881244678805855, "grad_norm": 0.6100241541862488, "learning_rate": 1.8760522814562723e-05, "loss": 1.5353, "mean_token_accuracy": 0.6196905672550201, "num_tokens": 593889864.0, "step": 3533 }, { "entropy": 1.709738661845525, "epoch": 0.38823432479195846, "grad_norm": 0.6657153964042664, "learning_rate": 1.875971183000622e-05, "loss": 1.3076, "mean_token_accuracy": 0.6685143858194351, "num_tokens": 594063330.0, "step": 3534 }, { "entropy": 1.6752463181813557, "epoch": 0.3883441817033314, "grad_norm": 0.62481689453125, "learning_rate": 1.8758900599858333e-05, "loss": 1.299, "mean_token_accuracy": 0.6609266599019369, "num_tokens": 594265034.0, "step": 3535 }, { "entropy": 1.7667845884958904, "epoch": 0.38845403861470434, "grad_norm": 0.7150773406028748, "learning_rate": 1.875808912414474e-05, "loss": 1.4817, "mean_token_accuracy": 0.6347835808992386, "num_tokens": 594428055.0, "step": 3536 }, { "entropy": 1.74485116203626, "epoch": 0.3885638955260773, "grad_norm": 0.6251989006996155, "learning_rate": 1.8757277402891118e-05, "loss": 1.405, "mean_token_accuracy": 0.6552664488554001, "num_tokens": 594605592.0, "step": 3537 }, { "entropy": 1.7133037547270458, "epoch": 0.3886737524374502, "grad_norm": 0.695165753364563, "learning_rate": 1.8756465436123167e-05, "loss": 1.3105, "mean_token_accuracy": 0.670314704378446, "num_tokens": 594744857.0, "step": 3538 }, { "entropy": 1.7227947811285655, "epoch": 0.38878360934882317, "grad_norm": 0.7755094766616821, "learning_rate": 1.875565322386658e-05, "loss": 1.3068, "mean_token_accuracy": 0.6800702015558878, "num_tokens": 594938776.0, "step": 3539 }, { "entropy": 1.6865267256895702, "epoch": 0.3888934662601961, "grad_norm": 0.671947181224823, "learning_rate": 1.875484076614706e-05, "loss": 1.4058, "mean_token_accuracy": 0.6469751199086508, "num_tokens": 595122731.0, "step": 3540 }, { "entropy": 1.6733566025892894, "epoch": 0.38900332317156905, "grad_norm": 0.6642799377441406, "learning_rate": 1.8754028062990327e-05, "loss": 1.3554, "mean_token_accuracy": 0.6744746913512548, "num_tokens": 595306659.0, "step": 3541 }, { "entropy": 1.7339465618133545, "epoch": 0.389113180082942, "grad_norm": 0.7320308089256287, "learning_rate": 1.8753215114422096e-05, "loss": 1.3023, "mean_token_accuracy": 0.6688550561666489, "num_tokens": 595470855.0, "step": 3542 }, { "entropy": 1.7501880327860515, "epoch": 0.38922303699431493, "grad_norm": 0.8129941821098328, "learning_rate": 1.8752401920468105e-05, "loss": 1.4079, "mean_token_accuracy": 0.656624640027682, "num_tokens": 595645644.0, "step": 3543 }, { "entropy": 1.7234038313229878, "epoch": 0.3893328939056878, "grad_norm": 0.6668652892112732, "learning_rate": 1.8751588481154083e-05, "loss": 1.4884, "mean_token_accuracy": 0.6411069482564926, "num_tokens": 595842197.0, "step": 3544 }, { "entropy": 1.6663442055384319, "epoch": 0.38944275081706076, "grad_norm": 0.6142482757568359, "learning_rate": 1.875077479650578e-05, "loss": 1.4848, "mean_token_accuracy": 0.6513569702704748, "num_tokens": 596060521.0, "step": 3545 }, { "entropy": 1.6824649969736736, "epoch": 0.3895526077284337, "grad_norm": 0.6500999331474304, "learning_rate": 1.8749960866548948e-05, "loss": 1.3553, "mean_token_accuracy": 0.6778768996397654, "num_tokens": 596237180.0, "step": 3546 }, { "entropy": 1.6939981679121654, "epoch": 0.38966246463980664, "grad_norm": 0.6637330055236816, "learning_rate": 1.8749146691309347e-05, "loss": 1.4655, "mean_token_accuracy": 0.6524067719777426, "num_tokens": 596402651.0, "step": 3547 }, { "entropy": 1.7055251995722454, "epoch": 0.3897723215511796, "grad_norm": 0.7360928058624268, "learning_rate": 1.8748332270812746e-05, "loss": 1.3932, "mean_token_accuracy": 0.6488986412684122, "num_tokens": 596604743.0, "step": 3548 }, { "entropy": 1.7154695093631744, "epoch": 0.3898821784625525, "grad_norm": 0.7440617084503174, "learning_rate": 1.8747517605084914e-05, "loss": 1.3314, "mean_token_accuracy": 0.6643383254607519, "num_tokens": 596728567.0, "step": 3549 }, { "entropy": 1.6730522513389587, "epoch": 0.38999203537392546, "grad_norm": 0.6638359427452087, "learning_rate": 1.8746702694151645e-05, "loss": 1.3219, "mean_token_accuracy": 0.6653886139392853, "num_tokens": 596933860.0, "step": 3550 }, { "entropy": 1.6828788320223491, "epoch": 0.3901018922852984, "grad_norm": 0.6851414442062378, "learning_rate": 1.8745887538038727e-05, "loss": 1.3891, "mean_token_accuracy": 0.6566885908444723, "num_tokens": 597094207.0, "step": 3551 }, { "entropy": 1.7113615274429321, "epoch": 0.39021174919667134, "grad_norm": 0.7445501089096069, "learning_rate": 1.874507213677196e-05, "loss": 1.3147, "mean_token_accuracy": 0.6675726721684138, "num_tokens": 597232948.0, "step": 3552 }, { "entropy": 1.7263106803099315, "epoch": 0.3903216061080443, "grad_norm": 0.5610165596008301, "learning_rate": 1.8744256490377147e-05, "loss": 1.4161, "mean_token_accuracy": 0.6461490740378698, "num_tokens": 597413908.0, "step": 3553 }, { "entropy": 1.6911349991957347, "epoch": 0.3904314630194172, "grad_norm": 0.7104760408401489, "learning_rate": 1.874344059888011e-05, "loss": 1.433, "mean_token_accuracy": 0.6454216440518697, "num_tokens": 597659679.0, "step": 3554 }, { "entropy": 1.7260303298632305, "epoch": 0.39054131993079017, "grad_norm": 0.7172141671180725, "learning_rate": 1.874262446230666e-05, "loss": 1.295, "mean_token_accuracy": 0.6699913293123245, "num_tokens": 597778281.0, "step": 3555 }, { "entropy": 1.6977204084396362, "epoch": 0.3906511768421631, "grad_norm": 0.636026918888092, "learning_rate": 1.8741808080682642e-05, "loss": 1.3092, "mean_token_accuracy": 0.6655734032392502, "num_tokens": 597910822.0, "step": 3556 }, { "entropy": 1.7228349049886067, "epoch": 0.39076103375353605, "grad_norm": 0.7579364776611328, "learning_rate": 1.8740991454033883e-05, "loss": 1.45, "mean_token_accuracy": 0.6555042515198389, "num_tokens": 598051246.0, "step": 3557 }, { "entropy": 1.715238094329834, "epoch": 0.39087089066490893, "grad_norm": 0.7158708572387695, "learning_rate": 1.8740174582386234e-05, "loss": 1.3264, "mean_token_accuracy": 0.6576440383990606, "num_tokens": 598170261.0, "step": 3558 }, { "entropy": 1.7376553813616435, "epoch": 0.3909807475762819, "grad_norm": 0.8242320418357849, "learning_rate": 1.8739357465765547e-05, "loss": 1.3275, "mean_token_accuracy": 0.6688285072644552, "num_tokens": 598289904.0, "step": 3559 }, { "entropy": 1.6719888945420582, "epoch": 0.3910906044876548, "grad_norm": 0.603971004486084, "learning_rate": 1.8738540104197683e-05, "loss": 1.5734, "mean_token_accuracy": 0.6278845717509588, "num_tokens": 598516225.0, "step": 3560 }, { "entropy": 1.697850485642751, "epoch": 0.39120046139902775, "grad_norm": 0.61806720495224, "learning_rate": 1.873772249770851e-05, "loss": 1.5395, "mean_token_accuracy": 0.6368564814329147, "num_tokens": 598787097.0, "step": 3561 }, { "entropy": 1.6998209357261658, "epoch": 0.3913103183104007, "grad_norm": 0.6823562979698181, "learning_rate": 1.873690464632391e-05, "loss": 1.4176, "mean_token_accuracy": 0.6531643867492676, "num_tokens": 598950071.0, "step": 3562 }, { "entropy": 1.7084301312764485, "epoch": 0.39142017522177364, "grad_norm": 0.7508410811424255, "learning_rate": 1.8736086550069766e-05, "loss": 1.5139, "mean_token_accuracy": 0.6545840700467428, "num_tokens": 599121424.0, "step": 3563 }, { "entropy": 1.7422731916109722, "epoch": 0.3915300321331466, "grad_norm": 0.6909976601600647, "learning_rate": 1.8735268208971965e-05, "loss": 1.496, "mean_token_accuracy": 0.639715259273847, "num_tokens": 599284329.0, "step": 3564 }, { "entropy": 1.6988299985726674, "epoch": 0.3916398890445195, "grad_norm": 0.728016197681427, "learning_rate": 1.873444962305641e-05, "loss": 1.274, "mean_token_accuracy": 0.6804704517126083, "num_tokens": 599418243.0, "step": 3565 }, { "entropy": 1.7136310239632924, "epoch": 0.39174974595589246, "grad_norm": 0.623084545135498, "learning_rate": 1.8733630792349014e-05, "loss": 1.5038, "mean_token_accuracy": 0.6375333170096079, "num_tokens": 599602975.0, "step": 3566 }, { "entropy": 1.686454842487971, "epoch": 0.3918596028672654, "grad_norm": 0.6495208144187927, "learning_rate": 1.8732811716875684e-05, "loss": 1.4385, "mean_token_accuracy": 0.6662272214889526, "num_tokens": 599821930.0, "step": 3567 }, { "entropy": 1.7124264140923817, "epoch": 0.39196945977863834, "grad_norm": 0.7537272572517395, "learning_rate": 1.873199239666235e-05, "loss": 1.5257, "mean_token_accuracy": 0.6516513874133428, "num_tokens": 600017465.0, "step": 3568 }, { "entropy": 1.6850533187389374, "epoch": 0.3920793166900113, "grad_norm": 0.6643959879875183, "learning_rate": 1.8731172831734937e-05, "loss": 1.2957, "mean_token_accuracy": 0.6703493893146515, "num_tokens": 600164676.0, "step": 3569 }, { "entropy": 1.699459304412206, "epoch": 0.3921891736013842, "grad_norm": 0.6547852754592896, "learning_rate": 1.8730353022119392e-05, "loss": 1.4598, "mean_token_accuracy": 0.652552917599678, "num_tokens": 600314512.0, "step": 3570 }, { "entropy": 1.6793744961420696, "epoch": 0.3922990305127571, "grad_norm": 0.7872046828269958, "learning_rate": 1.8729532967841657e-05, "loss": 1.5209, "mean_token_accuracy": 0.6407067527373632, "num_tokens": 600560727.0, "step": 3571 }, { "entropy": 1.7016756534576416, "epoch": 0.39240888742413005, "grad_norm": 0.804166853427887, "learning_rate": 1.8728712668927684e-05, "loss": 1.5712, "mean_token_accuracy": 0.6527331074078878, "num_tokens": 600701171.0, "step": 3572 }, { "entropy": 1.6971095005671184, "epoch": 0.392518744335503, "grad_norm": 0.6559096574783325, "learning_rate": 1.8727892125403437e-05, "loss": 1.4343, "mean_token_accuracy": 0.6504131704568863, "num_tokens": 600853204.0, "step": 3573 }, { "entropy": 1.740959644317627, "epoch": 0.39262860124687593, "grad_norm": 0.7399430871009827, "learning_rate": 1.8727071337294892e-05, "loss": 1.404, "mean_token_accuracy": 0.6460892607768377, "num_tokens": 601018363.0, "step": 3574 }, { "entropy": 1.70658544699351, "epoch": 0.39273845815824887, "grad_norm": 0.6616029143333435, "learning_rate": 1.8726250304628017e-05, "loss": 1.4447, "mean_token_accuracy": 0.6470039238532385, "num_tokens": 601166522.0, "step": 3575 }, { "entropy": 1.7305179238319397, "epoch": 0.3928483150696218, "grad_norm": 0.693975031375885, "learning_rate": 1.8725429027428802e-05, "loss": 1.3161, "mean_token_accuracy": 0.6667521148920059, "num_tokens": 601345354.0, "step": 3576 }, { "entropy": 1.7331166168053944, "epoch": 0.39295817198099475, "grad_norm": 0.6473891139030457, "learning_rate": 1.8724607505723236e-05, "loss": 1.3952, "mean_token_accuracy": 0.6563832859198252, "num_tokens": 601489345.0, "step": 3577 }, { "entropy": 1.7098148167133331, "epoch": 0.3930680288923677, "grad_norm": 0.7081977725028992, "learning_rate": 1.8723785739537328e-05, "loss": 1.4453, "mean_token_accuracy": 0.6467587898174921, "num_tokens": 601633917.0, "step": 3578 }, { "entropy": 1.6743212342262268, "epoch": 0.39317788580374063, "grad_norm": 0.7473645210266113, "learning_rate": 1.8722963728897078e-05, "loss": 1.2851, "mean_token_accuracy": 0.6733796795209249, "num_tokens": 601771977.0, "step": 3579 }, { "entropy": 1.6344492137432098, "epoch": 0.3932877427151136, "grad_norm": 0.6567934155464172, "learning_rate": 1.872214147382851e-05, "loss": 1.2201, "mean_token_accuracy": 0.6826841433842977, "num_tokens": 601917258.0, "step": 3580 }, { "entropy": 1.7242592175801594, "epoch": 0.3933975996264865, "grad_norm": 0.7916681170463562, "learning_rate": 1.872131897435764e-05, "loss": 1.4052, "mean_token_accuracy": 0.6629040241241455, "num_tokens": 602070528.0, "step": 3581 }, { "entropy": 1.7825438876946766, "epoch": 0.39350745653785946, "grad_norm": 0.6252172589302063, "learning_rate": 1.872049623051051e-05, "loss": 1.5612, "mean_token_accuracy": 0.6375692586104075, "num_tokens": 602269702.0, "step": 3582 }, { "entropy": 1.7129474182923634, "epoch": 0.3936173134492324, "grad_norm": 0.6330097913742065, "learning_rate": 1.871967324231315e-05, "loss": 1.3636, "mean_token_accuracy": 0.6603029817342758, "num_tokens": 602439795.0, "step": 3583 }, { "entropy": 1.7473669946193695, "epoch": 0.39372717036060534, "grad_norm": 0.6051161885261536, "learning_rate": 1.871885000979161e-05, "loss": 1.4629, "mean_token_accuracy": 0.6398325165112814, "num_tokens": 602620971.0, "step": 3584 }, { "entropy": 1.6432409286499023, "epoch": 0.3938370272719782, "grad_norm": 0.7886459231376648, "learning_rate": 1.8718026532971945e-05, "loss": 1.4551, "mean_token_accuracy": 0.6793592671553293, "num_tokens": 602787338.0, "step": 3585 }, { "entropy": 1.696602314710617, "epoch": 0.39394688418335116, "grad_norm": 0.7194052338600159, "learning_rate": 1.871720281188022e-05, "loss": 1.3187, "mean_token_accuracy": 0.6622123072544733, "num_tokens": 602910036.0, "step": 3586 }, { "entropy": 1.6780929962793987, "epoch": 0.3940567410947241, "grad_norm": 1.4930285215377808, "learning_rate": 1.87163788465425e-05, "loss": 1.5072, "mean_token_accuracy": 0.6442895332972208, "num_tokens": 603118858.0, "step": 3587 }, { "entropy": 1.6930700143178303, "epoch": 0.39416659800609705, "grad_norm": 0.7970458269119263, "learning_rate": 1.8715554636984868e-05, "loss": 1.3497, "mean_token_accuracy": 0.6615445464849472, "num_tokens": 603300138.0, "step": 3588 }, { "entropy": 1.676286409298579, "epoch": 0.39427645491747, "grad_norm": 0.7440655827522278, "learning_rate": 1.871473018323341e-05, "loss": 1.5556, "mean_token_accuracy": 0.6397054543097814, "num_tokens": 603524712.0, "step": 3589 }, { "entropy": 1.677784413099289, "epoch": 0.39438631182884293, "grad_norm": 0.6356014609336853, "learning_rate": 1.8713905485314216e-05, "loss": 1.3834, "mean_token_accuracy": 0.6559326549371084, "num_tokens": 603680062.0, "step": 3590 }, { "entropy": 1.7333962221940358, "epoch": 0.39449616874021587, "grad_norm": 0.7091386914253235, "learning_rate": 1.871308054325339e-05, "loss": 1.3963, "mean_token_accuracy": 0.6534087806940079, "num_tokens": 603863458.0, "step": 3591 }, { "entropy": 1.6834155718485515, "epoch": 0.3946060256515888, "grad_norm": 0.6516834497451782, "learning_rate": 1.871225535707704e-05, "loss": 1.3878, "mean_token_accuracy": 0.6710058401028315, "num_tokens": 603994946.0, "step": 3592 }, { "entropy": 1.645541141430537, "epoch": 0.39471588256296175, "grad_norm": 0.6310259103775024, "learning_rate": 1.8711429926811285e-05, "loss": 1.2944, "mean_token_accuracy": 0.6635814557472864, "num_tokens": 604168355.0, "step": 3593 }, { "entropy": 1.7525591452916462, "epoch": 0.3948257394743347, "grad_norm": 0.7263670563697815, "learning_rate": 1.8710604252482244e-05, "loss": 1.2925, "mean_token_accuracy": 0.667170450091362, "num_tokens": 604272321.0, "step": 3594 }, { "entropy": 1.673738161722819, "epoch": 0.39493559638570763, "grad_norm": 0.6252807378768921, "learning_rate": 1.8709778334116057e-05, "loss": 1.3787, "mean_token_accuracy": 0.6551141440868378, "num_tokens": 604478407.0, "step": 3595 }, { "entropy": 1.7633110185464222, "epoch": 0.3950454532970806, "grad_norm": 0.6537090539932251, "learning_rate": 1.8708952171738856e-05, "loss": 1.5101, "mean_token_accuracy": 0.6308721353610357, "num_tokens": 604695750.0, "step": 3596 }, { "entropy": 1.7199938992659252, "epoch": 0.3951553102084535, "grad_norm": 0.7916152477264404, "learning_rate": 1.87081257653768e-05, "loss": 1.4183, "mean_token_accuracy": 0.6466862559318542, "num_tokens": 604842866.0, "step": 3597 }, { "entropy": 1.6832565764586132, "epoch": 0.3952651671198264, "grad_norm": 0.6747387051582336, "learning_rate": 1.870729911505603e-05, "loss": 1.2859, "mean_token_accuracy": 0.6659845014413198, "num_tokens": 604984494.0, "step": 3598 }, { "entropy": 1.7077111999193828, "epoch": 0.39537502403119934, "grad_norm": 0.6704530715942383, "learning_rate": 1.8706472220802717e-05, "loss": 1.4066, "mean_token_accuracy": 0.6525115470091502, "num_tokens": 605147587.0, "step": 3599 }, { "entropy": 1.6967969636122386, "epoch": 0.3954848809425723, "grad_norm": 0.7108339071273804, "learning_rate": 1.8705645082643032e-05, "loss": 1.3964, "mean_token_accuracy": 0.6673119068145752, "num_tokens": 605284305.0, "step": 3600 }, { "entropy": 1.6884620984395344, "epoch": 0.3955947378539452, "grad_norm": 0.6969875693321228, "learning_rate": 1.8704817700603154e-05, "loss": 1.406, "mean_token_accuracy": 0.6541502624750137, "num_tokens": 605428461.0, "step": 3601 }, { "entropy": 1.6805487771828969, "epoch": 0.39570459476531816, "grad_norm": 0.6379789710044861, "learning_rate": 1.8703990074709263e-05, "loss": 1.4033, "mean_token_accuracy": 0.6565418342749277, "num_tokens": 605654217.0, "step": 3602 }, { "entropy": 1.6334502398967743, "epoch": 0.3958144516766911, "grad_norm": 0.7022704482078552, "learning_rate": 1.870316220498756e-05, "loss": 1.3101, "mean_token_accuracy": 0.6683827390273412, "num_tokens": 605810204.0, "step": 3603 }, { "entropy": 1.6775756180286407, "epoch": 0.39592430858806404, "grad_norm": 0.6165929436683655, "learning_rate": 1.8702334091464246e-05, "loss": 1.4418, "mean_token_accuracy": 0.6538349191347758, "num_tokens": 605976026.0, "step": 3604 }, { "entropy": 1.6821360886096954, "epoch": 0.396034165499437, "grad_norm": 0.6474902033805847, "learning_rate": 1.8701505734165527e-05, "loss": 1.3874, "mean_token_accuracy": 0.6578802863756815, "num_tokens": 606145063.0, "step": 3605 }, { "entropy": 1.6627052525679271, "epoch": 0.3961440224108099, "grad_norm": 0.6694169044494629, "learning_rate": 1.870067713311762e-05, "loss": 1.4774, "mean_token_accuracy": 0.6556628793478012, "num_tokens": 606323836.0, "step": 3606 }, { "entropy": 1.7426823377609253, "epoch": 0.39625387932218287, "grad_norm": 0.7143035531044006, "learning_rate": 1.8699848288346754e-05, "loss": 1.5342, "mean_token_accuracy": 0.6395404686530431, "num_tokens": 606527328.0, "step": 3607 }, { "entropy": 1.6447638769944508, "epoch": 0.3963637362335558, "grad_norm": 0.6995284557342529, "learning_rate": 1.869901919987916e-05, "loss": 1.2323, "mean_token_accuracy": 0.681049590309461, "num_tokens": 606652449.0, "step": 3608 }, { "entropy": 1.7393419643243153, "epoch": 0.39647359314492875, "grad_norm": 0.7996697425842285, "learning_rate": 1.8698189867741076e-05, "loss": 1.312, "mean_token_accuracy": 0.6633258064587911, "num_tokens": 606786412.0, "step": 3609 }, { "entropy": 1.7173935075600941, "epoch": 0.3965834500563017, "grad_norm": 0.8304112553596497, "learning_rate": 1.8697360291958754e-05, "loss": 1.3365, "mean_token_accuracy": 0.6675741821527481, "num_tokens": 606941342.0, "step": 3610 }, { "entropy": 1.699825793504715, "epoch": 0.39669330696767463, "grad_norm": 0.6240324378013611, "learning_rate": 1.8696530472558443e-05, "loss": 1.5215, "mean_token_accuracy": 0.641850084066391, "num_tokens": 607137523.0, "step": 3611 }, { "entropy": 1.7180300255616505, "epoch": 0.3968031638790475, "grad_norm": 0.730658769607544, "learning_rate": 1.8695700409566415e-05, "loss": 1.4504, "mean_token_accuracy": 0.6601553956667582, "num_tokens": 607275218.0, "step": 3612 }, { "entropy": 1.7043689092000325, "epoch": 0.39691302079042046, "grad_norm": 0.7190737128257751, "learning_rate": 1.8694870103008935e-05, "loss": 1.3154, "mean_token_accuracy": 0.6722518901030222, "num_tokens": 607422791.0, "step": 3613 }, { "entropy": 1.710739016532898, "epoch": 0.3970228777017934, "grad_norm": 0.6662858128547668, "learning_rate": 1.8694039552912284e-05, "loss": 1.3454, "mean_token_accuracy": 0.6655974884827932, "num_tokens": 607574450.0, "step": 3614 }, { "entropy": 1.7331662873427074, "epoch": 0.39713273461316634, "grad_norm": 0.6620702743530273, "learning_rate": 1.8693208759302747e-05, "loss": 1.3848, "mean_token_accuracy": 0.64388441046079, "num_tokens": 607709925.0, "step": 3615 }, { "entropy": 1.6359326243400574, "epoch": 0.3972425915245393, "grad_norm": 0.6715786457061768, "learning_rate": 1.869237772220662e-05, "loss": 1.32, "mean_token_accuracy": 0.6786454369624456, "num_tokens": 607877472.0, "step": 3616 }, { "entropy": 1.742279092470805, "epoch": 0.3973524484359122, "grad_norm": 0.7671318054199219, "learning_rate": 1.8691546441650207e-05, "loss": 1.5367, "mean_token_accuracy": 0.6638617217540741, "num_tokens": 608006491.0, "step": 3617 }, { "entropy": 1.6939302285512288, "epoch": 0.39746230534728516, "grad_norm": 0.7670230269432068, "learning_rate": 1.8690714917659814e-05, "loss": 1.3973, "mean_token_accuracy": 0.6581595738728842, "num_tokens": 608179437.0, "step": 3618 }, { "entropy": 1.6857011218865712, "epoch": 0.3975721622586581, "grad_norm": 0.787647008895874, "learning_rate": 1.8689883150261757e-05, "loss": 1.3985, "mean_token_accuracy": 0.6678627133369446, "num_tokens": 608310733.0, "step": 3619 }, { "entropy": 1.623725155989329, "epoch": 0.39768201917003104, "grad_norm": 0.8164381980895996, "learning_rate": 1.8689051139482365e-05, "loss": 1.5179, "mean_token_accuracy": 0.6581759800513586, "num_tokens": 608498736.0, "step": 3620 }, { "entropy": 1.6751043697198231, "epoch": 0.397791876081404, "grad_norm": 0.652132511138916, "learning_rate": 1.8688218885347965e-05, "loss": 1.2893, "mean_token_accuracy": 0.6741900146007538, "num_tokens": 608661402.0, "step": 3621 }, { "entropy": 1.7041152914365132, "epoch": 0.3979017329927769, "grad_norm": 0.7432838082313538, "learning_rate": 1.868738638788491e-05, "loss": 1.4467, "mean_token_accuracy": 0.6503884643316269, "num_tokens": 608825180.0, "step": 3622 }, { "entropy": 1.737585683663686, "epoch": 0.39801158990414987, "grad_norm": 0.6796532869338989, "learning_rate": 1.868655364711953e-05, "loss": 1.4838, "mean_token_accuracy": 0.6391591926415762, "num_tokens": 609006138.0, "step": 3623 }, { "entropy": 1.7053319811820984, "epoch": 0.3981214468155228, "grad_norm": 0.7604497671127319, "learning_rate": 1.86857206630782e-05, "loss": 1.6561, "mean_token_accuracy": 0.6299934685230255, "num_tokens": 609212521.0, "step": 3624 }, { "entropy": 1.6852892835934956, "epoch": 0.3982313037268957, "grad_norm": 0.6800695061683655, "learning_rate": 1.868488743578727e-05, "loss": 1.2921, "mean_token_accuracy": 0.6707666118939718, "num_tokens": 609346073.0, "step": 3625 }, { "entropy": 1.667622039715449, "epoch": 0.39834116063826863, "grad_norm": 0.7599472403526306, "learning_rate": 1.8684053965273113e-05, "loss": 1.3797, "mean_token_accuracy": 0.6550516585508982, "num_tokens": 609528585.0, "step": 3626 }, { "entropy": 1.7041009267171223, "epoch": 0.3984510175496416, "grad_norm": 0.6519821882247925, "learning_rate": 1.8683220251562116e-05, "loss": 1.4673, "mean_token_accuracy": 0.649181400736173, "num_tokens": 609699624.0, "step": 3627 }, { "entropy": 1.7341491381327312, "epoch": 0.3985608744610145, "grad_norm": 0.6558710932731628, "learning_rate": 1.8682386294680656e-05, "loss": 1.4285, "mean_token_accuracy": 0.6576865861813227, "num_tokens": 609851288.0, "step": 3628 }, { "entropy": 1.6496534844239552, "epoch": 0.39867073137238745, "grad_norm": 0.7691988348960876, "learning_rate": 1.8681552094655132e-05, "loss": 1.4595, "mean_token_accuracy": 0.6498380750417709, "num_tokens": 610005266.0, "step": 3629 }, { "entropy": 1.7265863120555878, "epoch": 0.3987805882837604, "grad_norm": 0.683956503868103, "learning_rate": 1.8680717651511948e-05, "loss": 1.3603, "mean_token_accuracy": 0.6562889615694681, "num_tokens": 610131391.0, "step": 3630 }, { "entropy": 1.6970125834147136, "epoch": 0.39889044519513334, "grad_norm": 1.811918020248413, "learning_rate": 1.8679882965277508e-05, "loss": 1.1718, "mean_token_accuracy": 0.6783278286457062, "num_tokens": 610327332.0, "step": 3631 }, { "entropy": 1.7868920266628265, "epoch": 0.3990003021065063, "grad_norm": 0.8308510184288025, "learning_rate": 1.8679048035978236e-05, "loss": 1.4282, "mean_token_accuracy": 0.6506891945997874, "num_tokens": 610455251.0, "step": 3632 }, { "entropy": 1.6640632251898448, "epoch": 0.3991101590178792, "grad_norm": 0.6303699612617493, "learning_rate": 1.8678212863640552e-05, "loss": 1.4084, "mean_token_accuracy": 0.6633955190579096, "num_tokens": 610614206.0, "step": 3633 }, { "entropy": 1.6598373850186665, "epoch": 0.39922001592925216, "grad_norm": 0.5654789209365845, "learning_rate": 1.8677377448290892e-05, "loss": 1.2886, "mean_token_accuracy": 0.6726368019978205, "num_tokens": 610776600.0, "step": 3634 }, { "entropy": 1.771477371454239, "epoch": 0.3993298728406251, "grad_norm": 0.6862777471542358, "learning_rate": 1.8676541789955692e-05, "loss": 1.4854, "mean_token_accuracy": 0.6504343748092651, "num_tokens": 610950858.0, "step": 3635 }, { "entropy": 1.7534089088439941, "epoch": 0.39943972975199804, "grad_norm": 0.7172439098358154, "learning_rate": 1.867570588866141e-05, "loss": 1.5362, "mean_token_accuracy": 0.6323390305042267, "num_tokens": 611180246.0, "step": 3636 }, { "entropy": 1.6689714988072712, "epoch": 0.399549586663371, "grad_norm": 0.5847604870796204, "learning_rate": 1.867486974443449e-05, "loss": 1.394, "mean_token_accuracy": 0.6602125515540441, "num_tokens": 611354464.0, "step": 3637 }, { "entropy": 1.7680908838907878, "epoch": 0.3996594435747439, "grad_norm": 0.7248014807701111, "learning_rate": 1.8674033357301402e-05, "loss": 1.4446, "mean_token_accuracy": 0.6542030622561773, "num_tokens": 611522664.0, "step": 3638 }, { "entropy": 1.714485635360082, "epoch": 0.3997693004861168, "grad_norm": 0.6951132416725159, "learning_rate": 1.8673196727288616e-05, "loss": 1.2956, "mean_token_accuracy": 0.6643576820691427, "num_tokens": 611647329.0, "step": 3639 }, { "entropy": 1.6996821860472362, "epoch": 0.39987915739748975, "grad_norm": 0.7710621953010559, "learning_rate": 1.8672359854422614e-05, "loss": 1.4314, "mean_token_accuracy": 0.6488352914651235, "num_tokens": 611814510.0, "step": 3640 }, { "entropy": 1.680074393749237, "epoch": 0.3999890143088627, "grad_norm": 0.7552538514137268, "learning_rate": 1.867152273872988e-05, "loss": 1.3634, "mean_token_accuracy": 0.662238617738088, "num_tokens": 611955456.0, "step": 3641 }, { "entropy": 1.715712159872055, "epoch": 0.40009887122023563, "grad_norm": 0.612894594669342, "learning_rate": 1.86706853802369e-05, "loss": 1.387, "mean_token_accuracy": 0.6484750012556711, "num_tokens": 612145076.0, "step": 3642 }, { "entropy": 1.7193404138088226, "epoch": 0.40020872813160857, "grad_norm": 0.6383233070373535, "learning_rate": 1.866984777897019e-05, "loss": 1.3055, "mean_token_accuracy": 0.6797666301329931, "num_tokens": 612270224.0, "step": 3643 }, { "entropy": 1.6991442839304607, "epoch": 0.4003185850429815, "grad_norm": 0.6264839172363281, "learning_rate": 1.8669009934956256e-05, "loss": 1.376, "mean_token_accuracy": 0.6612722476323446, "num_tokens": 612439048.0, "step": 3644 }, { "entropy": 1.700903097788493, "epoch": 0.40042844195435445, "grad_norm": 0.7194094061851501, "learning_rate": 1.866817184822161e-05, "loss": 1.4956, "mean_token_accuracy": 0.6383681247631708, "num_tokens": 612610609.0, "step": 3645 }, { "entropy": 1.6729531685511272, "epoch": 0.4005382988657274, "grad_norm": 0.6744722127914429, "learning_rate": 1.8667333518792786e-05, "loss": 1.3917, "mean_token_accuracy": 0.674171636501948, "num_tokens": 612762506.0, "step": 3646 }, { "entropy": 1.7685239613056183, "epoch": 0.40064815577710033, "grad_norm": 0.7136297821998596, "learning_rate": 1.8666494946696306e-05, "loss": 1.5135, "mean_token_accuracy": 0.6325026253859202, "num_tokens": 612936758.0, "step": 3647 }, { "entropy": 1.702545295159022, "epoch": 0.4007580126884733, "grad_norm": 0.6659870147705078, "learning_rate": 1.8665656131958717e-05, "loss": 1.3334, "mean_token_accuracy": 0.6652649194002151, "num_tokens": 613057363.0, "step": 3648 }, { "entropy": 1.7028338809808095, "epoch": 0.4008678695998462, "grad_norm": 0.6186485290527344, "learning_rate": 1.8664817074606565e-05, "loss": 1.6413, "mean_token_accuracy": 0.6314787616332372, "num_tokens": 613307099.0, "step": 3649 }, { "entropy": 1.7399461170037587, "epoch": 0.40097772651121916, "grad_norm": 0.7118646502494812, "learning_rate": 1.8663977774666403e-05, "loss": 1.3848, "mean_token_accuracy": 0.6634780565897623, "num_tokens": 613478548.0, "step": 3650 }, { "entropy": 1.7087344527244568, "epoch": 0.4010875834225921, "grad_norm": 0.6944850087165833, "learning_rate": 1.8663138232164804e-05, "loss": 1.5035, "mean_token_accuracy": 0.6449535042047501, "num_tokens": 613651212.0, "step": 3651 }, { "entropy": 1.679858426253001, "epoch": 0.401197440333965, "grad_norm": 0.7054488062858582, "learning_rate": 1.866229844712833e-05, "loss": 1.3144, "mean_token_accuracy": 0.6721477111180624, "num_tokens": 613780246.0, "step": 3652 }, { "entropy": 1.715953419605891, "epoch": 0.4013072972453379, "grad_norm": 0.7255501747131348, "learning_rate": 1.8661458419583563e-05, "loss": 1.3845, "mean_token_accuracy": 0.6507025410731634, "num_tokens": 613943278.0, "step": 3653 }, { "entropy": 1.7305433750152588, "epoch": 0.40141715415671086, "grad_norm": 0.6626403331756592, "learning_rate": 1.866061814955709e-05, "loss": 1.5169, "mean_token_accuracy": 0.6476994504531225, "num_tokens": 614082100.0, "step": 3654 }, { "entropy": 1.7705755233764648, "epoch": 0.4015270110680838, "grad_norm": 0.8271293640136719, "learning_rate": 1.8659777637075503e-05, "loss": 1.6316, "mean_token_accuracy": 0.6363982160886129, "num_tokens": 614323862.0, "step": 3655 }, { "entropy": 1.7688746849695842, "epoch": 0.40163686797945675, "grad_norm": 0.6675280928611755, "learning_rate": 1.8658936882165408e-05, "loss": 1.4183, "mean_token_accuracy": 0.6585695048173269, "num_tokens": 614479973.0, "step": 3656 }, { "entropy": 1.6870961685975392, "epoch": 0.4017467248908297, "grad_norm": 0.7141885161399841, "learning_rate": 1.8658095884853412e-05, "loss": 1.369, "mean_token_accuracy": 0.6499126503864924, "num_tokens": 614624882.0, "step": 3657 }, { "entropy": 1.7352818648020427, "epoch": 0.40185658180220263, "grad_norm": 0.8047354221343994, "learning_rate": 1.865725464516613e-05, "loss": 1.3427, "mean_token_accuracy": 0.6598058293263117, "num_tokens": 614770100.0, "step": 3658 }, { "entropy": 1.702322781085968, "epoch": 0.40196643871357557, "grad_norm": 0.5614283680915833, "learning_rate": 1.865641316313019e-05, "loss": 1.3603, "mean_token_accuracy": 0.6561979601780573, "num_tokens": 614947375.0, "step": 3659 }, { "entropy": 1.68387637535731, "epoch": 0.4020762956249485, "grad_norm": 0.6392550468444824, "learning_rate": 1.865557143877222e-05, "loss": 1.3327, "mean_token_accuracy": 0.6635664403438568, "num_tokens": 615118931.0, "step": 3660 }, { "entropy": 1.8135263323783875, "epoch": 0.40218615253632145, "grad_norm": 0.586390495300293, "learning_rate": 1.8654729472118867e-05, "loss": 1.3753, "mean_token_accuracy": 0.6475066045920054, "num_tokens": 615330975.0, "step": 3661 }, { "entropy": 1.6571489373842876, "epoch": 0.4022960094476944, "grad_norm": 0.6854637861251831, "learning_rate": 1.8653887263196775e-05, "loss": 1.4143, "mean_token_accuracy": 0.6672212878863016, "num_tokens": 615511217.0, "step": 3662 }, { "entropy": 1.7072084446748097, "epoch": 0.40240586635906733, "grad_norm": 0.6002740859985352, "learning_rate": 1.86530448120326e-05, "loss": 1.3311, "mean_token_accuracy": 0.6561245868603388, "num_tokens": 615670287.0, "step": 3663 }, { "entropy": 1.709785560766856, "epoch": 0.4025157232704403, "grad_norm": 0.6385271549224854, "learning_rate": 1.8652202118653005e-05, "loss": 1.4953, "mean_token_accuracy": 0.6401058932145437, "num_tokens": 615857571.0, "step": 3664 }, { "entropy": 1.6435896158218384, "epoch": 0.4026255801818132, "grad_norm": 0.66823810338974, "learning_rate": 1.8651359183084664e-05, "loss": 1.3375, "mean_token_accuracy": 0.6652998874584833, "num_tokens": 616018039.0, "step": 3665 }, { "entropy": 1.6801452438036601, "epoch": 0.4027354370931861, "grad_norm": 0.5973647832870483, "learning_rate": 1.8650516005354245e-05, "loss": 1.44, "mean_token_accuracy": 0.6624323775370916, "num_tokens": 616182931.0, "step": 3666 }, { "entropy": 1.7209701438744862, "epoch": 0.40284529400455904, "grad_norm": 0.7251614332199097, "learning_rate": 1.864967258548845e-05, "loss": 1.3669, "mean_token_accuracy": 0.6691777855157852, "num_tokens": 616300469.0, "step": 3667 }, { "entropy": 1.7649596532185872, "epoch": 0.402955150915932, "grad_norm": 0.7085322737693787, "learning_rate": 1.864882892351396e-05, "loss": 1.4425, "mean_token_accuracy": 0.6525527884562811, "num_tokens": 616507234.0, "step": 3668 }, { "entropy": 1.7601352433363597, "epoch": 0.4030650078273049, "grad_norm": 0.8236812353134155, "learning_rate": 1.8647985019457482e-05, "loss": 1.3432, "mean_token_accuracy": 0.6626549661159515, "num_tokens": 616629147.0, "step": 3669 }, { "entropy": 1.6714808940887451, "epoch": 0.40317486473867786, "grad_norm": 0.7395771145820618, "learning_rate": 1.8647140873345727e-05, "loss": 1.3773, "mean_token_accuracy": 0.6534648189942042, "num_tokens": 616778411.0, "step": 3670 }, { "entropy": 1.6952777008215587, "epoch": 0.4032847216500508, "grad_norm": 0.6873879432678223, "learning_rate": 1.864629648520541e-05, "loss": 1.3186, "mean_token_accuracy": 0.66050224006176, "num_tokens": 616922969.0, "step": 3671 }, { "entropy": 1.6377867658933003, "epoch": 0.40339457856142374, "grad_norm": 0.6935714483261108, "learning_rate": 1.8645451855063252e-05, "loss": 1.3697, "mean_token_accuracy": 0.662156730890274, "num_tokens": 617068169.0, "step": 3672 }, { "entropy": 1.704243501027425, "epoch": 0.4035044354727967, "grad_norm": 1.5501242876052856, "learning_rate": 1.8644606982945988e-05, "loss": 1.2649, "mean_token_accuracy": 0.6755559096733729, "num_tokens": 617234636.0, "step": 3673 }, { "entropy": 1.7551777064800262, "epoch": 0.4036142923841696, "grad_norm": 0.7300415635108948, "learning_rate": 1.8643761868880356e-05, "loss": 1.531, "mean_token_accuracy": 0.6428199609120687, "num_tokens": 617415408.0, "step": 3674 }, { "entropy": 1.7334574957688649, "epoch": 0.40372414929554257, "grad_norm": 0.6666220426559448, "learning_rate": 1.8642916512893108e-05, "loss": 1.4268, "mean_token_accuracy": 0.6364853282769521, "num_tokens": 617605330.0, "step": 3675 }, { "entropy": 1.766120086113612, "epoch": 0.4038340062069155, "grad_norm": 0.8933466076850891, "learning_rate": 1.8642070915010994e-05, "loss": 1.5728, "mean_token_accuracy": 0.6363476316134135, "num_tokens": 617780089.0, "step": 3676 }, { "entropy": 1.730029950539271, "epoch": 0.40394386311828845, "grad_norm": 0.7603272795677185, "learning_rate": 1.8641225075260784e-05, "loss": 1.3936, "mean_token_accuracy": 0.6469805290301641, "num_tokens": 617927420.0, "step": 3677 }, { "entropy": 1.689517339070638, "epoch": 0.4040537200296614, "grad_norm": 0.6826215386390686, "learning_rate": 1.864037899366924e-05, "loss": 1.4607, "mean_token_accuracy": 0.6500324904918671, "num_tokens": 618092883.0, "step": 3678 }, { "entropy": 1.663270503282547, "epoch": 0.40416357694103433, "grad_norm": 0.7083759307861328, "learning_rate": 1.8639532670263142e-05, "loss": 1.3416, "mean_token_accuracy": 0.663003941377004, "num_tokens": 618253174.0, "step": 3679 }, { "entropy": 1.6893990735212963, "epoch": 0.4042734338524072, "grad_norm": 0.6897298693656921, "learning_rate": 1.863868610506928e-05, "loss": 1.3883, "mean_token_accuracy": 0.6646070778369904, "num_tokens": 618398292.0, "step": 3680 }, { "entropy": 1.7317078014214833, "epoch": 0.40438329076378016, "grad_norm": 0.7244861721992493, "learning_rate": 1.8637839298114445e-05, "loss": 1.3297, "mean_token_accuracy": 0.6592196226119995, "num_tokens": 618534629.0, "step": 3681 }, { "entropy": 1.7509274383385975, "epoch": 0.4044931476751531, "grad_norm": 0.7792545557022095, "learning_rate": 1.8636992249425436e-05, "loss": 1.2519, "mean_token_accuracy": 0.6701816469430923, "num_tokens": 618627524.0, "step": 3682 }, { "entropy": 1.659875859816869, "epoch": 0.40460300458652604, "grad_norm": 0.7609748244285583, "learning_rate": 1.8636144959029063e-05, "loss": 1.4746, "mean_token_accuracy": 0.6501151074965795, "num_tokens": 618774107.0, "step": 3683 }, { "entropy": 1.7485106388727825, "epoch": 0.404712861497899, "grad_norm": 0.8140855431556702, "learning_rate": 1.8635297426952147e-05, "loss": 1.4954, "mean_token_accuracy": 0.6546749224265417, "num_tokens": 618906293.0, "step": 3684 }, { "entropy": 1.6669574677944183, "epoch": 0.4048227184092719, "grad_norm": 0.6584160923957825, "learning_rate": 1.8634449653221505e-05, "loss": 1.3567, "mean_token_accuracy": 0.6570019920667013, "num_tokens": 619075570.0, "step": 3685 }, { "entropy": 1.6605021357536316, "epoch": 0.40493257532064486, "grad_norm": 0.5621921420097351, "learning_rate": 1.863360163786397e-05, "loss": 1.4514, "mean_token_accuracy": 0.6414414793252945, "num_tokens": 619360684.0, "step": 3686 }, { "entropy": 1.6636900802453358, "epoch": 0.4050424322320178, "grad_norm": 0.6727263331413269, "learning_rate": 1.8632753380906387e-05, "loss": 1.3516, "mean_token_accuracy": 0.6557752440373102, "num_tokens": 619555666.0, "step": 3687 }, { "entropy": 1.7114156087239583, "epoch": 0.40515228914339074, "grad_norm": 0.6485511064529419, "learning_rate": 1.8631904882375595e-05, "loss": 1.5021, "mean_token_accuracy": 0.6425280173619589, "num_tokens": 619733822.0, "step": 3688 }, { "entropy": 1.6890954673290253, "epoch": 0.4052621460547637, "grad_norm": 0.6665855646133423, "learning_rate": 1.8631056142298457e-05, "loss": 1.3798, "mean_token_accuracy": 0.6521730422973633, "num_tokens": 619927481.0, "step": 3689 }, { "entropy": 1.6435322761535645, "epoch": 0.4053720029661366, "grad_norm": 0.6213128566741943, "learning_rate": 1.8630207160701827e-05, "loss": 1.3392, "mean_token_accuracy": 0.6614899933338165, "num_tokens": 620096992.0, "step": 3690 }, { "entropy": 1.7578924596309662, "epoch": 0.40548185987750957, "grad_norm": 0.7708293795585632, "learning_rate": 1.862935793761258e-05, "loss": 1.1762, "mean_token_accuracy": 0.6814229289690653, "num_tokens": 620243412.0, "step": 3691 }, { "entropy": 1.7739320397377014, "epoch": 0.4055917167888825, "grad_norm": 0.8655069470405579, "learning_rate": 1.8628508473057592e-05, "loss": 1.5393, "mean_token_accuracy": 0.648578479886055, "num_tokens": 620390926.0, "step": 3692 }, { "entropy": 1.70758917927742, "epoch": 0.4057015737002554, "grad_norm": 1.9093140363693237, "learning_rate": 1.862765876706375e-05, "loss": 1.098, "mean_token_accuracy": 0.6809806078672409, "num_tokens": 620557421.0, "step": 3693 }, { "entropy": 1.6794420282046, "epoch": 0.40581143061162833, "grad_norm": 0.679584801197052, "learning_rate": 1.862680881965794e-05, "loss": 1.3353, "mean_token_accuracy": 0.6540734767913818, "num_tokens": 620718410.0, "step": 3694 }, { "entropy": 1.73444531361262, "epoch": 0.4059212875230013, "grad_norm": 0.6241161227226257, "learning_rate": 1.8625958630867072e-05, "loss": 1.3099, "mean_token_accuracy": 0.6632533123095831, "num_tokens": 620863809.0, "step": 3695 }, { "entropy": 1.7056554555892944, "epoch": 0.4060311444343742, "grad_norm": 0.7893586158752441, "learning_rate": 1.862510820071805e-05, "loss": 1.3113, "mean_token_accuracy": 0.6655853390693665, "num_tokens": 620989409.0, "step": 3696 }, { "entropy": 1.6801581581433613, "epoch": 0.40614100134574715, "grad_norm": 0.7889465689659119, "learning_rate": 1.862425752923779e-05, "loss": 1.4217, "mean_token_accuracy": 0.6615896672010422, "num_tokens": 621155720.0, "step": 3697 }, { "entropy": 1.7147592107454936, "epoch": 0.4062508582571201, "grad_norm": 0.7899035215377808, "learning_rate": 1.8623406616453213e-05, "loss": 1.271, "mean_token_accuracy": 0.6728020161390305, "num_tokens": 621271342.0, "step": 3698 }, { "entropy": 1.707566757996877, "epoch": 0.40636071516849304, "grad_norm": 0.7400942444801331, "learning_rate": 1.862255546239125e-05, "loss": 1.2851, "mean_token_accuracy": 0.6638195067644119, "num_tokens": 621403853.0, "step": 3699 }, { "entropy": 1.7490671475728352, "epoch": 0.406470572079866, "grad_norm": 0.6932427287101746, "learning_rate": 1.8621704067078842e-05, "loss": 1.3392, "mean_token_accuracy": 0.6685472031434377, "num_tokens": 621515651.0, "step": 3700 }, { "entropy": 1.7084580659866333, "epoch": 0.4065804289912389, "grad_norm": 0.6444189548492432, "learning_rate": 1.8620852430542936e-05, "loss": 1.3692, "mean_token_accuracy": 0.6660451342662176, "num_tokens": 621678160.0, "step": 3701 }, { "entropy": 1.734459678332011, "epoch": 0.40669028590261186, "grad_norm": 0.7342776656150818, "learning_rate": 1.8620000552810488e-05, "loss": 1.3653, "mean_token_accuracy": 0.6498020191987356, "num_tokens": 621814935.0, "step": 3702 }, { "entropy": 1.642607440551122, "epoch": 0.4068001428139848, "grad_norm": 0.6483259201049805, "learning_rate": 1.861914843390845e-05, "loss": 1.3316, "mean_token_accuracy": 0.6714847981929779, "num_tokens": 621967804.0, "step": 3703 }, { "entropy": 1.7405053277810414, "epoch": 0.40690999972535774, "grad_norm": 0.6480836868286133, "learning_rate": 1.86182960738638e-05, "loss": 1.459, "mean_token_accuracy": 0.6403814305861791, "num_tokens": 622156492.0, "step": 3704 }, { "entropy": 1.6267158389091492, "epoch": 0.4070198566367307, "grad_norm": 0.7061080932617188, "learning_rate": 1.8617443472703514e-05, "loss": 1.2146, "mean_token_accuracy": 0.684728279709816, "num_tokens": 622266394.0, "step": 3705 }, { "entropy": 1.7066716353098552, "epoch": 0.4071297135481036, "grad_norm": 0.8183558583259583, "learning_rate": 1.861659063045457e-05, "loss": 1.4803, "mean_token_accuracy": 0.6585542360941569, "num_tokens": 622446280.0, "step": 3706 }, { "entropy": 1.7405121624469757, "epoch": 0.4072395704594765, "grad_norm": 0.7175570726394653, "learning_rate": 1.8615737547143968e-05, "loss": 1.5347, "mean_token_accuracy": 0.6411835898955663, "num_tokens": 622610924.0, "step": 3707 }, { "entropy": 1.7092544833819072, "epoch": 0.40734942737084945, "grad_norm": 0.9672302007675171, "learning_rate": 1.8614884222798705e-05, "loss": 1.4165, "mean_token_accuracy": 0.6539698441823324, "num_tokens": 622775599.0, "step": 3708 }, { "entropy": 1.7085430522759755, "epoch": 0.4074592842822224, "grad_norm": 0.6571292281150818, "learning_rate": 1.8614030657445785e-05, "loss": 1.4122, "mean_token_accuracy": 0.6727810104688009, "num_tokens": 622962845.0, "step": 3709 }, { "entropy": 1.7726508279641469, "epoch": 0.40756914119359533, "grad_norm": 0.6792343854904175, "learning_rate": 1.861317685111223e-05, "loss": 1.5236, "mean_token_accuracy": 0.6427791565656662, "num_tokens": 623157911.0, "step": 3710 }, { "entropy": 1.7251368661721547, "epoch": 0.40767899810496827, "grad_norm": 0.7350800037384033, "learning_rate": 1.8612322803825053e-05, "loss": 1.2582, "mean_token_accuracy": 0.678009644150734, "num_tokens": 623285497.0, "step": 3711 }, { "entropy": 1.6766076783339183, "epoch": 0.4077888550163412, "grad_norm": 0.6149843335151672, "learning_rate": 1.861146851561129e-05, "loss": 1.51, "mean_token_accuracy": 0.6478391562898954, "num_tokens": 623448272.0, "step": 3712 }, { "entropy": 1.6991031368573506, "epoch": 0.40789871192771415, "grad_norm": 0.6349066495895386, "learning_rate": 1.861061398649798e-05, "loss": 1.2536, "mean_token_accuracy": 0.6752463430166245, "num_tokens": 623590450.0, "step": 3713 }, { "entropy": 1.7172236144542694, "epoch": 0.4080085688390871, "grad_norm": 0.6747751832008362, "learning_rate": 1.860975921651217e-05, "loss": 1.5217, "mean_token_accuracy": 0.6311314900716146, "num_tokens": 623802101.0, "step": 3714 }, { "entropy": 1.7139861385027568, "epoch": 0.40811842575046003, "grad_norm": 0.6247215270996094, "learning_rate": 1.8608904205680906e-05, "loss": 1.4003, "mean_token_accuracy": 0.6497706820567449, "num_tokens": 623950911.0, "step": 3715 }, { "entropy": 1.749284307161967, "epoch": 0.408228282661833, "grad_norm": 0.5963855981826782, "learning_rate": 1.8608048954031254e-05, "loss": 1.4282, "mean_token_accuracy": 0.6429949502150217, "num_tokens": 624154271.0, "step": 3716 }, { "entropy": 1.7446833749612172, "epoch": 0.4083381395732059, "grad_norm": 0.6486496925354004, "learning_rate": 1.8607193461590277e-05, "loss": 1.3916, "mean_token_accuracy": 0.6541141470273336, "num_tokens": 624326903.0, "step": 3717 }, { "entropy": 1.6626928846041362, "epoch": 0.40844799648457886, "grad_norm": 0.7017315030097961, "learning_rate": 1.860633772838506e-05, "loss": 1.3665, "mean_token_accuracy": 0.6529461542765299, "num_tokens": 624533835.0, "step": 3718 }, { "entropy": 1.6866288880507152, "epoch": 0.4085578533959518, "grad_norm": 0.7589619159698486, "learning_rate": 1.860548175444268e-05, "loss": 1.3975, "mean_token_accuracy": 0.6569940547148386, "num_tokens": 624695576.0, "step": 3719 }, { "entropy": 1.7539471586545308, "epoch": 0.4086677103073247, "grad_norm": 0.7970021963119507, "learning_rate": 1.8604625539790228e-05, "loss": 1.6079, "mean_token_accuracy": 0.640375425418218, "num_tokens": 624842347.0, "step": 3720 }, { "entropy": 1.6378857394059498, "epoch": 0.4087775672186976, "grad_norm": 0.7092182040214539, "learning_rate": 1.8603769084454804e-05, "loss": 1.4028, "mean_token_accuracy": 0.6625880300998688, "num_tokens": 625027646.0, "step": 3721 }, { "entropy": 1.618373692035675, "epoch": 0.40888742413007056, "grad_norm": 0.6855277419090271, "learning_rate": 1.8602912388463517e-05, "loss": 1.4195, "mean_token_accuracy": 0.6505888452132543, "num_tokens": 625206719.0, "step": 3722 }, { "entropy": 1.674377828836441, "epoch": 0.4089972810414435, "grad_norm": 0.6490065455436707, "learning_rate": 1.8602055451843478e-05, "loss": 1.2185, "mean_token_accuracy": 0.6873969038327535, "num_tokens": 625327376.0, "step": 3723 }, { "entropy": 1.664337585369746, "epoch": 0.40910713795281645, "grad_norm": 0.7654528021812439, "learning_rate": 1.860119827462181e-05, "loss": 1.4033, "mean_token_accuracy": 0.6536544114351273, "num_tokens": 625505992.0, "step": 3724 }, { "entropy": 1.7024443646272023, "epoch": 0.4092169948641894, "grad_norm": 0.6893821954727173, "learning_rate": 1.860034085682564e-05, "loss": 1.437, "mean_token_accuracy": 0.6580548087755839, "num_tokens": 625700752.0, "step": 3725 }, { "entropy": 1.6648207604885101, "epoch": 0.40932685177556233, "grad_norm": 0.7746477723121643, "learning_rate": 1.859948319848211e-05, "loss": 1.1891, "mean_token_accuracy": 0.6822925706704458, "num_tokens": 625814896.0, "step": 3726 }, { "entropy": 1.7079435586929321, "epoch": 0.40943670868693527, "grad_norm": 0.6155371069908142, "learning_rate": 1.859862529961836e-05, "loss": 1.3543, "mean_token_accuracy": 0.668289711078008, "num_tokens": 625962535.0, "step": 3727 }, { "entropy": 1.7189550697803497, "epoch": 0.4095465655983082, "grad_norm": 0.7430447936058044, "learning_rate": 1.859776716026154e-05, "loss": 1.3355, "mean_token_accuracy": 0.6572021146615347, "num_tokens": 626096526.0, "step": 3728 }, { "entropy": 1.6810458799203236, "epoch": 0.40965642250968115, "grad_norm": 1.085142731666565, "learning_rate": 1.8596908780438814e-05, "loss": 1.0983, "mean_token_accuracy": 0.6744260291258494, "num_tokens": 626249920.0, "step": 3729 }, { "entropy": 1.7445210615793865, "epoch": 0.4097662794210541, "grad_norm": 0.7419488430023193, "learning_rate": 1.8596050160177352e-05, "loss": 1.3508, "mean_token_accuracy": 0.66064981619517, "num_tokens": 626378039.0, "step": 3730 }, { "entropy": 1.760192632675171, "epoch": 0.40987613633242703, "grad_norm": 0.6193534135818481, "learning_rate": 1.859519129950432e-05, "loss": 1.3936, "mean_token_accuracy": 0.6515404631694158, "num_tokens": 626544591.0, "step": 3731 }, { "entropy": 1.7204260925451915, "epoch": 0.4099859932438, "grad_norm": 1.0320173501968384, "learning_rate": 1.859433219844691e-05, "loss": 1.5482, "mean_token_accuracy": 0.6566920280456543, "num_tokens": 626683443.0, "step": 3732 }, { "entropy": 1.7170347174008687, "epoch": 0.4100958501551729, "grad_norm": 0.747575044631958, "learning_rate": 1.8593472857032308e-05, "loss": 1.4343, "mean_token_accuracy": 0.6497025390466055, "num_tokens": 626814527.0, "step": 3733 }, { "entropy": 1.6637854278087616, "epoch": 0.4102057070665458, "grad_norm": 0.6838406324386597, "learning_rate": 1.859261327528771e-05, "loss": 1.3744, "mean_token_accuracy": 0.6690096110105515, "num_tokens": 626996166.0, "step": 3734 }, { "entropy": 1.6826592286427815, "epoch": 0.41031556397791874, "grad_norm": 0.6222158670425415, "learning_rate": 1.8591753453240325e-05, "loss": 1.4596, "mean_token_accuracy": 0.6647855440775553, "num_tokens": 627196669.0, "step": 3735 }, { "entropy": 1.678474356730779, "epoch": 0.4104254208892917, "grad_norm": 0.6561827659606934, "learning_rate": 1.8590893390917363e-05, "loss": 1.3236, "mean_token_accuracy": 0.6612599492073059, "num_tokens": 627341643.0, "step": 3736 }, { "entropy": 1.6994609435399373, "epoch": 0.4105352778006646, "grad_norm": 0.6723978519439697, "learning_rate": 1.8590033088346045e-05, "loss": 1.4024, "mean_token_accuracy": 0.6625661303599676, "num_tokens": 627504221.0, "step": 3737 }, { "entropy": 1.7354566156864166, "epoch": 0.41064513471203756, "grad_norm": 0.6468124985694885, "learning_rate": 1.85891725455536e-05, "loss": 1.3671, "mean_token_accuracy": 0.6559626658757528, "num_tokens": 627629476.0, "step": 3738 }, { "entropy": 1.7572604417800903, "epoch": 0.4107549916234105, "grad_norm": 0.6548281908035278, "learning_rate": 1.8588311762567265e-05, "loss": 1.4999, "mean_token_accuracy": 0.649440790216128, "num_tokens": 627838130.0, "step": 3739 }, { "entropy": 1.6867552896340687, "epoch": 0.41086484853478344, "grad_norm": 0.6936057806015015, "learning_rate": 1.8587450739414282e-05, "loss": 1.3552, "mean_token_accuracy": 0.6642784823973974, "num_tokens": 628019384.0, "step": 3740 }, { "entropy": 1.7941297590732574, "epoch": 0.4109747054461564, "grad_norm": 0.6662188768386841, "learning_rate": 1.85865894761219e-05, "loss": 1.4409, "mean_token_accuracy": 0.6453611056009928, "num_tokens": 628160091.0, "step": 3741 }, { "entropy": 1.677331139643987, "epoch": 0.4110845623575293, "grad_norm": 0.7274539470672607, "learning_rate": 1.858572797271738e-05, "loss": 1.2365, "mean_token_accuracy": 0.6719293495019277, "num_tokens": 628273375.0, "step": 3742 }, { "entropy": 1.7725926240285237, "epoch": 0.41119441926890227, "grad_norm": 0.7785374522209167, "learning_rate": 1.8584866229227992e-05, "loss": 1.2674, "mean_token_accuracy": 0.6794732809066772, "num_tokens": 628386434.0, "step": 3743 }, { "entropy": 1.7157474358876545, "epoch": 0.4113042761802752, "grad_norm": 0.6738847494125366, "learning_rate": 1.8584004245681e-05, "loss": 1.3154, "mean_token_accuracy": 0.6582375268141428, "num_tokens": 628503456.0, "step": 3744 }, { "entropy": 1.6891884605089824, "epoch": 0.41141413309164815, "grad_norm": 0.706418514251709, "learning_rate": 1.8583142022103694e-05, "loss": 1.5105, "mean_token_accuracy": 0.6367523421843847, "num_tokens": 628722141.0, "step": 3745 }, { "entropy": 1.771670550107956, "epoch": 0.4115239900030211, "grad_norm": 0.8060712814331055, "learning_rate": 1.858227955852336e-05, "loss": 1.3965, "mean_token_accuracy": 0.6502161224683126, "num_tokens": 628870665.0, "step": 3746 }, { "entropy": 1.6912760337193806, "epoch": 0.411633846914394, "grad_norm": 0.6966044902801514, "learning_rate": 1.8581416854967293e-05, "loss": 1.4553, "mean_token_accuracy": 0.6377379248539606, "num_tokens": 629086050.0, "step": 3747 }, { "entropy": 1.7380321621894836, "epoch": 0.4117437038257669, "grad_norm": 0.6073872447013855, "learning_rate": 1.85805539114628e-05, "loss": 1.4282, "mean_token_accuracy": 0.6448562443256378, "num_tokens": 629258009.0, "step": 3748 }, { "entropy": 1.6827170252799988, "epoch": 0.41185356073713986, "grad_norm": 0.6829259395599365, "learning_rate": 1.8579690728037195e-05, "loss": 1.3912, "mean_token_accuracy": 0.6493991017341614, "num_tokens": 629397477.0, "step": 3749 }, { "entropy": 1.72429492076238, "epoch": 0.4119634176485128, "grad_norm": 0.6729731559753418, "learning_rate": 1.857882730471779e-05, "loss": 1.3648, "mean_token_accuracy": 0.6580958614746729, "num_tokens": 629596068.0, "step": 3750 }, { "entropy": 1.7216622432072957, "epoch": 0.41207327455988574, "grad_norm": 0.7398632764816284, "learning_rate": 1.8577963641531915e-05, "loss": 1.2278, "mean_token_accuracy": 0.6858376761277517, "num_tokens": 629741637.0, "step": 3751 }, { "entropy": 1.7678700387477875, "epoch": 0.4121831314712587, "grad_norm": 1.0076205730438232, "learning_rate": 1.857709973850691e-05, "loss": 1.4064, "mean_token_accuracy": 0.6636515309413274, "num_tokens": 629853316.0, "step": 3752 }, { "entropy": 1.7365649839242299, "epoch": 0.4122929883826316, "grad_norm": 0.673788845539093, "learning_rate": 1.8576235595670105e-05, "loss": 1.2618, "mean_token_accuracy": 0.6891842633485794, "num_tokens": 629972831.0, "step": 3753 }, { "entropy": 1.7559735278288524, "epoch": 0.41240284529400456, "grad_norm": 0.9235732555389404, "learning_rate": 1.8575371213048867e-05, "loss": 1.4149, "mean_token_accuracy": 0.6592928121487299, "num_tokens": 630109076.0, "step": 3754 }, { "entropy": 1.7470454176266987, "epoch": 0.4125127022053775, "grad_norm": 0.6771215796470642, "learning_rate": 1.8574506590670534e-05, "loss": 1.4049, "mean_token_accuracy": 0.6512129505475363, "num_tokens": 630262653.0, "step": 3755 }, { "entropy": 1.6346890528996785, "epoch": 0.41262255911675044, "grad_norm": 0.6646851897239685, "learning_rate": 1.8573641728562488e-05, "loss": 1.5104, "mean_token_accuracy": 0.64119320611159, "num_tokens": 630444943.0, "step": 3756 }, { "entropy": 1.7178200781345367, "epoch": 0.4127324160281234, "grad_norm": 0.6681410670280457, "learning_rate": 1.8572776626752092e-05, "loss": 1.3195, "mean_token_accuracy": 0.6758150657018026, "num_tokens": 630608705.0, "step": 3757 }, { "entropy": 1.7018628120422363, "epoch": 0.4128422729394963, "grad_norm": 0.8846207857131958, "learning_rate": 1.857191128526673e-05, "loss": 1.5712, "mean_token_accuracy": 0.6433374732732773, "num_tokens": 630828140.0, "step": 3758 }, { "entropy": 1.7014219065507252, "epoch": 0.41295212985086927, "grad_norm": 0.6484429836273193, "learning_rate": 1.857104570413378e-05, "loss": 1.342, "mean_token_accuracy": 0.692674994468689, "num_tokens": 630979788.0, "step": 3759 }, { "entropy": 1.6751723786195118, "epoch": 0.4130619867622422, "grad_norm": 0.6921824812889099, "learning_rate": 1.8570179883380652e-05, "loss": 1.4416, "mean_token_accuracy": 0.6488529096047083, "num_tokens": 631180442.0, "step": 3760 }, { "entropy": 1.680382361014684, "epoch": 0.4131718436736151, "grad_norm": 0.7185025215148926, "learning_rate": 1.8569313823034743e-05, "loss": 1.4073, "mean_token_accuracy": 0.6465843468904495, "num_tokens": 631387922.0, "step": 3761 }, { "entropy": 1.6748826801776886, "epoch": 0.41328170058498803, "grad_norm": 0.592786967754364, "learning_rate": 1.8568447523123457e-05, "loss": 1.5418, "mean_token_accuracy": 0.6322498073180517, "num_tokens": 631598046.0, "step": 3762 }, { "entropy": 1.7133816381295521, "epoch": 0.413391557496361, "grad_norm": 0.7715355157852173, "learning_rate": 1.856758098367422e-05, "loss": 1.2644, "mean_token_accuracy": 0.6847187926371893, "num_tokens": 631744350.0, "step": 3763 }, { "entropy": 1.7190758188565571, "epoch": 0.4135014144077339, "grad_norm": 0.7023261189460754, "learning_rate": 1.8566714204714454e-05, "loss": 1.3741, "mean_token_accuracy": 0.6586879988511404, "num_tokens": 631894493.0, "step": 3764 }, { "entropy": 1.7163704931735992, "epoch": 0.41361127131910685, "grad_norm": 0.8502719402313232, "learning_rate": 1.8565847186271594e-05, "loss": 1.5034, "mean_token_accuracy": 0.6552699059247971, "num_tokens": 632056905.0, "step": 3765 }, { "entropy": 1.679179718097051, "epoch": 0.4137211282304798, "grad_norm": 0.7581773400306702, "learning_rate": 1.8564979928373083e-05, "loss": 1.2715, "mean_token_accuracy": 0.6689160714546839, "num_tokens": 632218501.0, "step": 3766 }, { "entropy": 1.6978221833705902, "epoch": 0.41383098514185274, "grad_norm": 0.7345089316368103, "learning_rate": 1.856411243104636e-05, "loss": 1.345, "mean_token_accuracy": 0.6770609468221664, "num_tokens": 632370942.0, "step": 3767 }, { "entropy": 1.727396120627721, "epoch": 0.4139408420532257, "grad_norm": 0.6835429072380066, "learning_rate": 1.856324469431889e-05, "loss": 1.2722, "mean_token_accuracy": 0.6730792969465256, "num_tokens": 632518943.0, "step": 3768 }, { "entropy": 1.7132271230220795, "epoch": 0.4140506989645986, "grad_norm": 0.7705096006393433, "learning_rate": 1.8562376718218133e-05, "loss": 1.3787, "mean_token_accuracy": 0.6688175052404404, "num_tokens": 632642148.0, "step": 3769 }, { "entropy": 1.6458029548327129, "epoch": 0.41416055587597156, "grad_norm": 0.5635362267494202, "learning_rate": 1.856150850277156e-05, "loss": 1.4653, "mean_token_accuracy": 0.6459923932949702, "num_tokens": 632871437.0, "step": 3770 }, { "entropy": 1.7273524105548859, "epoch": 0.4142704127873445, "grad_norm": 0.7517685890197754, "learning_rate": 1.8560640048006652e-05, "loss": 1.347, "mean_token_accuracy": 0.6664700706799825, "num_tokens": 633005488.0, "step": 3771 }, { "entropy": 1.6999563177426655, "epoch": 0.41438026969871744, "grad_norm": 0.6293025016784668, "learning_rate": 1.8559771353950893e-05, "loss": 1.385, "mean_token_accuracy": 0.6580439954996109, "num_tokens": 633211034.0, "step": 3772 }, { "entropy": 1.737761527299881, "epoch": 0.4144901266100904, "grad_norm": 0.7360339164733887, "learning_rate": 1.8558902420631776e-05, "loss": 1.4929, "mean_token_accuracy": 0.6542394310235977, "num_tokens": 633363034.0, "step": 3773 }, { "entropy": 1.7658338248729706, "epoch": 0.41459998352146327, "grad_norm": 0.6309468150138855, "learning_rate": 1.85580332480768e-05, "loss": 1.3943, "mean_token_accuracy": 0.6525468230247498, "num_tokens": 633528700.0, "step": 3774 }, { "entropy": 1.677381157875061, "epoch": 0.4147098404328362, "grad_norm": 0.8243128657341003, "learning_rate": 1.8557163836313486e-05, "loss": 1.4033, "mean_token_accuracy": 0.660991777976354, "num_tokens": 633681640.0, "step": 3775 }, { "entropy": 1.7099827925364177, "epoch": 0.41481969734420915, "grad_norm": 0.5814919471740723, "learning_rate": 1.8556294185369336e-05, "loss": 1.3706, "mean_token_accuracy": 0.6490457753340403, "num_tokens": 633876385.0, "step": 3776 }, { "entropy": 1.7081574300924938, "epoch": 0.4149295542555821, "grad_norm": 0.7149261236190796, "learning_rate": 1.855542429527188e-05, "loss": 1.3391, "mean_token_accuracy": 0.6678998519976934, "num_tokens": 634025843.0, "step": 3777 }, { "entropy": 1.743098219235738, "epoch": 0.41503941116695503, "grad_norm": 0.6656703352928162, "learning_rate": 1.8554554166048654e-05, "loss": 1.4263, "mean_token_accuracy": 0.6521339118480682, "num_tokens": 634164379.0, "step": 3778 }, { "entropy": 1.7285428146521251, "epoch": 0.41514926807832797, "grad_norm": 0.6746429800987244, "learning_rate": 1.8553683797727188e-05, "loss": 1.4432, "mean_token_accuracy": 0.6566011756658554, "num_tokens": 634307434.0, "step": 3779 }, { "entropy": 1.6698183516661327, "epoch": 0.4152591249897009, "grad_norm": 0.8133582472801208, "learning_rate": 1.8552813190335034e-05, "loss": 1.301, "mean_token_accuracy": 0.6644681245088577, "num_tokens": 634444248.0, "step": 3780 }, { "entropy": 1.699441949526469, "epoch": 0.41536898190107385, "grad_norm": 0.8208682537078857, "learning_rate": 1.855194234389975e-05, "loss": 1.2526, "mean_token_accuracy": 0.6720673541227976, "num_tokens": 634586931.0, "step": 3781 }, { "entropy": 1.7422561248143513, "epoch": 0.4154788388124468, "grad_norm": 0.6837664246559143, "learning_rate": 1.8551071258448892e-05, "loss": 1.6275, "mean_token_accuracy": 0.6357202082872391, "num_tokens": 634794197.0, "step": 3782 }, { "entropy": 1.6618089973926544, "epoch": 0.41558869572381973, "grad_norm": 0.7491135001182556, "learning_rate": 1.855019993401003e-05, "loss": 1.3817, "mean_token_accuracy": 0.6542213608821233, "num_tokens": 634995369.0, "step": 3783 }, { "entropy": 1.7044015129407246, "epoch": 0.4156985526351927, "grad_norm": 0.613198459148407, "learning_rate": 1.854932837061074e-05, "loss": 1.4134, "mean_token_accuracy": 0.6356542706489563, "num_tokens": 635184727.0, "step": 3784 }, { "entropy": 1.671468476454417, "epoch": 0.4158084095465656, "grad_norm": 0.6908861994743347, "learning_rate": 1.8548456568278616e-05, "loss": 1.4499, "mean_token_accuracy": 0.6709446410338084, "num_tokens": 635343732.0, "step": 3785 }, { "entropy": 1.723008821407954, "epoch": 0.41591826645793856, "grad_norm": 0.6988131403923035, "learning_rate": 1.8547584527041235e-05, "loss": 1.4046, "mean_token_accuracy": 0.6638121704260508, "num_tokens": 635507468.0, "step": 3786 }, { "entropy": 1.6880267560482025, "epoch": 0.4160281233693115, "grad_norm": 0.6676950454711914, "learning_rate": 1.8546712246926207e-05, "loss": 1.2988, "mean_token_accuracy": 0.6631821393966675, "num_tokens": 635649868.0, "step": 3787 }, { "entropy": 1.7301316161950429, "epoch": 0.4161379802806844, "grad_norm": 0.7677087783813477, "learning_rate": 1.854583972796114e-05, "loss": 1.5427, "mean_token_accuracy": 0.652143269777298, "num_tokens": 635819810.0, "step": 3788 }, { "entropy": 1.6811153590679169, "epoch": 0.4162478371920573, "grad_norm": 0.6712203621864319, "learning_rate": 1.8544966970173645e-05, "loss": 1.3512, "mean_token_accuracy": 0.6549779176712036, "num_tokens": 635984503.0, "step": 3789 }, { "entropy": 1.7444909314314525, "epoch": 0.41635769410343026, "grad_norm": 0.6587154865264893, "learning_rate": 1.8544093973591343e-05, "loss": 1.4814, "mean_token_accuracy": 0.6409175097942352, "num_tokens": 636134931.0, "step": 3790 }, { "entropy": 1.6592991352081299, "epoch": 0.4164675510148032, "grad_norm": 0.5977832078933716, "learning_rate": 1.854322073824187e-05, "loss": 1.4304, "mean_token_accuracy": 0.6406304885943731, "num_tokens": 636318464.0, "step": 3791 }, { "entropy": 1.6651886999607086, "epoch": 0.41657740792617615, "grad_norm": 0.6955407857894897, "learning_rate": 1.8542347264152855e-05, "loss": 1.3467, "mean_token_accuracy": 0.6668408364057541, "num_tokens": 636459849.0, "step": 3792 }, { "entropy": 1.7133266230424244, "epoch": 0.4166872648375491, "grad_norm": 0.7298943996429443, "learning_rate": 1.854147355135195e-05, "loss": 1.569, "mean_token_accuracy": 0.6349954207738241, "num_tokens": 636634478.0, "step": 3793 }, { "entropy": 1.6949812173843384, "epoch": 0.41679712174892203, "grad_norm": 0.9010490775108337, "learning_rate": 1.8540599599866806e-05, "loss": 1.3849, "mean_token_accuracy": 0.6672961960236231, "num_tokens": 636753414.0, "step": 3794 }, { "entropy": 1.700540026028951, "epoch": 0.41690697866029497, "grad_norm": 0.7625768184661865, "learning_rate": 1.853972540972508e-05, "loss": 1.1909, "mean_token_accuracy": 0.6855234503746033, "num_tokens": 636855409.0, "step": 3795 }, { "entropy": 1.790819029013316, "epoch": 0.4170168355716679, "grad_norm": 0.6526350975036621, "learning_rate": 1.8538850980954446e-05, "loss": 1.5692, "mean_token_accuracy": 0.6258653302987417, "num_tokens": 637050334.0, "step": 3796 }, { "entropy": 1.7093652784824371, "epoch": 0.41712669248304085, "grad_norm": 0.5973331332206726, "learning_rate": 1.8537976313582573e-05, "loss": 1.5218, "mean_token_accuracy": 0.6466895639896393, "num_tokens": 637235606.0, "step": 3797 }, { "entropy": 1.6295418043931325, "epoch": 0.4172365493944138, "grad_norm": 0.6998474597930908, "learning_rate": 1.853710140763715e-05, "loss": 1.4507, "mean_token_accuracy": 0.6505574633677801, "num_tokens": 637439394.0, "step": 3798 }, { "entropy": 1.7180461982885997, "epoch": 0.41734640630578673, "grad_norm": 0.6476940512657166, "learning_rate": 1.8536226263145857e-05, "loss": 1.6517, "mean_token_accuracy": 0.6157064388195673, "num_tokens": 637652827.0, "step": 3799 }, { "entropy": 1.7250114878018696, "epoch": 0.4174562632171597, "grad_norm": 0.7295282483100891, "learning_rate": 1.8535350880136403e-05, "loss": 1.4413, "mean_token_accuracy": 0.6517095665136973, "num_tokens": 637839760.0, "step": 3800 }, { "entropy": 1.7239097158114116, "epoch": 0.41756612012853256, "grad_norm": 0.6322441697120667, "learning_rate": 1.8534475258636488e-05, "loss": 1.4804, "mean_token_accuracy": 0.6419643859068552, "num_tokens": 638075885.0, "step": 3801 }, { "entropy": 1.6781774560610454, "epoch": 0.4176759770399055, "grad_norm": 0.6442082524299622, "learning_rate": 1.8533599398673826e-05, "loss": 1.5032, "mean_token_accuracy": 0.6423897991577784, "num_tokens": 638270735.0, "step": 3802 }, { "entropy": 1.713065505027771, "epoch": 0.41778583395127844, "grad_norm": 0.5872926712036133, "learning_rate": 1.853272330027614e-05, "loss": 1.4092, "mean_token_accuracy": 0.645249143242836, "num_tokens": 638459204.0, "step": 3803 }, { "entropy": 1.6904393831888835, "epoch": 0.4178956908626514, "grad_norm": 0.6932823061943054, "learning_rate": 1.8531846963471155e-05, "loss": 1.2647, "mean_token_accuracy": 0.674810583392779, "num_tokens": 638574639.0, "step": 3804 }, { "entropy": 1.7162601053714752, "epoch": 0.4180055477740243, "grad_norm": 0.716964840888977, "learning_rate": 1.8530970388286605e-05, "loss": 1.4352, "mean_token_accuracy": 0.6317101766665777, "num_tokens": 638765017.0, "step": 3805 }, { "entropy": 1.6731017033259075, "epoch": 0.41811540468539726, "grad_norm": 0.6435312628746033, "learning_rate": 1.853009357475024e-05, "loss": 1.3129, "mean_token_accuracy": 0.6804841359456381, "num_tokens": 638896095.0, "step": 3806 }, { "entropy": 1.7432553172111511, "epoch": 0.4182252615967702, "grad_norm": 0.668488621711731, "learning_rate": 1.8529216522889802e-05, "loss": 1.2866, "mean_token_accuracy": 0.6711077938477198, "num_tokens": 639039152.0, "step": 3807 }, { "entropy": 1.715178112188975, "epoch": 0.41833511850814314, "grad_norm": 0.8195774555206299, "learning_rate": 1.852833923273306e-05, "loss": 1.3363, "mean_token_accuracy": 0.664946511387825, "num_tokens": 639171512.0, "step": 3808 }, { "entropy": 1.6638068159421284, "epoch": 0.4184449754195161, "grad_norm": 0.6470533013343811, "learning_rate": 1.852746170430777e-05, "loss": 1.2879, "mean_token_accuracy": 0.6690946668386459, "num_tokens": 639301924.0, "step": 3809 }, { "entropy": 1.7037453750769298, "epoch": 0.418554832330889, "grad_norm": 0.6436436772346497, "learning_rate": 1.8526583937641708e-05, "loss": 1.3852, "mean_token_accuracy": 0.6688676526149114, "num_tokens": 639463966.0, "step": 3810 }, { "entropy": 1.7554031908512115, "epoch": 0.41866468924226197, "grad_norm": 0.736873984336853, "learning_rate": 1.8525705932762658e-05, "loss": 1.6376, "mean_token_accuracy": 0.6553641508022944, "num_tokens": 639647344.0, "step": 3811 }, { "entropy": 1.7499909301598866, "epoch": 0.4187745461536349, "grad_norm": 0.6630018949508667, "learning_rate": 1.8524827689698403e-05, "loss": 1.3634, "mean_token_accuracy": 0.6541923681894938, "num_tokens": 639786116.0, "step": 3812 }, { "entropy": 1.65069513519605, "epoch": 0.41888440306500785, "grad_norm": 0.5848079919815063, "learning_rate": 1.8523949208476744e-05, "loss": 1.3979, "mean_token_accuracy": 0.6475146114826202, "num_tokens": 640023361.0, "step": 3813 }, { "entropy": 1.7403970857461293, "epoch": 0.4189942599763808, "grad_norm": 0.7221378684043884, "learning_rate": 1.8523070489125484e-05, "loss": 1.3454, "mean_token_accuracy": 0.6649908721446991, "num_tokens": 640193104.0, "step": 3814 }, { "entropy": 1.700227975845337, "epoch": 0.4191041168877537, "grad_norm": 0.6060642004013062, "learning_rate": 1.8522191531672433e-05, "loss": 1.2714, "mean_token_accuracy": 0.6907776196797689, "num_tokens": 640348073.0, "step": 3815 }, { "entropy": 1.7109603087107341, "epoch": 0.4192139737991266, "grad_norm": 0.650009036064148, "learning_rate": 1.8521312336145406e-05, "loss": 1.4307, "mean_token_accuracy": 0.6513733565807343, "num_tokens": 640537136.0, "step": 3816 }, { "entropy": 1.7343104382356007, "epoch": 0.41932383071049956, "grad_norm": 0.6624605655670166, "learning_rate": 1.8520432902572238e-05, "loss": 1.5207, "mean_token_accuracy": 0.6583341757456461, "num_tokens": 640755737.0, "step": 3817 }, { "entropy": 1.6712620953718822, "epoch": 0.4194336876218725, "grad_norm": 0.8070269823074341, "learning_rate": 1.8519553230980755e-05, "loss": 1.5578, "mean_token_accuracy": 0.6397651135921478, "num_tokens": 640971967.0, "step": 3818 }, { "entropy": 1.6985514958699544, "epoch": 0.41954354453324544, "grad_norm": 0.7821574211120605, "learning_rate": 1.85186733213988e-05, "loss": 1.2669, "mean_token_accuracy": 0.6710014641284943, "num_tokens": 641114791.0, "step": 3819 }, { "entropy": 1.7274354596932728, "epoch": 0.4196534014446184, "grad_norm": 0.7574983835220337, "learning_rate": 1.8517793173854222e-05, "loss": 1.4655, "mean_token_accuracy": 0.6479006856679916, "num_tokens": 641240015.0, "step": 3820 }, { "entropy": 1.773825873931249, "epoch": 0.4197632583559913, "grad_norm": 0.8301964998245239, "learning_rate": 1.851691278837488e-05, "loss": 1.3614, "mean_token_accuracy": 0.6692739625771841, "num_tokens": 641334970.0, "step": 3821 }, { "entropy": 1.6614450514316559, "epoch": 0.41987311526736426, "grad_norm": 0.6518927216529846, "learning_rate": 1.8516032164988633e-05, "loss": 1.4603, "mean_token_accuracy": 0.6561418920755386, "num_tokens": 641505654.0, "step": 3822 }, { "entropy": 1.7007905542850494, "epoch": 0.4199829721787372, "grad_norm": 0.6501317024230957, "learning_rate": 1.8515151303723356e-05, "loss": 1.4902, "mean_token_accuracy": 0.6453222384055456, "num_tokens": 641729379.0, "step": 3823 }, { "entropy": 1.7722167372703552, "epoch": 0.42009282909011014, "grad_norm": 0.7290734648704529, "learning_rate": 1.851427020460693e-05, "loss": 1.3669, "mean_token_accuracy": 0.6568130205074946, "num_tokens": 641850231.0, "step": 3824 }, { "entropy": 1.6922112007935841, "epoch": 0.4202026860014831, "grad_norm": 0.6940089464187622, "learning_rate": 1.851338886766723e-05, "loss": 1.3394, "mean_token_accuracy": 0.6545537859201431, "num_tokens": 642018167.0, "step": 3825 }, { "entropy": 1.7041266858577728, "epoch": 0.420312542912856, "grad_norm": 0.6359767317771912, "learning_rate": 1.8512507292932164e-05, "loss": 1.3692, "mean_token_accuracy": 0.6600731213887533, "num_tokens": 642225951.0, "step": 3826 }, { "entropy": 1.7149433890978496, "epoch": 0.42042239982422897, "grad_norm": 0.7300947308540344, "learning_rate": 1.8511625480429626e-05, "loss": 1.3976, "mean_token_accuracy": 0.6626198341449102, "num_tokens": 642380453.0, "step": 3827 }, { "entropy": 1.6327539483706157, "epoch": 0.4205322567356019, "grad_norm": 0.9952742457389832, "learning_rate": 1.851074343018753e-05, "loss": 1.2784, "mean_token_accuracy": 0.6821300486723582, "num_tokens": 642539722.0, "step": 3828 }, { "entropy": 1.700599084297816, "epoch": 0.4206421136469748, "grad_norm": 0.7428448796272278, "learning_rate": 1.8509861142233783e-05, "loss": 1.4237, "mean_token_accuracy": 0.6547687749067942, "num_tokens": 642698818.0, "step": 3829 }, { "entropy": 1.7721853852272034, "epoch": 0.42075197055834773, "grad_norm": 0.6790313124656677, "learning_rate": 1.8508978616596318e-05, "loss": 1.3894, "mean_token_accuracy": 0.6521950215101242, "num_tokens": 642883257.0, "step": 3830 }, { "entropy": 1.6714214185873668, "epoch": 0.4208618274697207, "grad_norm": 0.6689066290855408, "learning_rate": 1.8508095853303064e-05, "loss": 1.2784, "mean_token_accuracy": 0.6672136187553406, "num_tokens": 643011025.0, "step": 3831 }, { "entropy": 1.7504200140635173, "epoch": 0.4209716843810936, "grad_norm": 0.6306473016738892, "learning_rate": 1.8507212852381958e-05, "loss": 1.3652, "mean_token_accuracy": 0.6601114968458811, "num_tokens": 643175908.0, "step": 3832 }, { "entropy": 1.6559196809927623, "epoch": 0.42108154129246655, "grad_norm": 0.6528786420822144, "learning_rate": 1.8506329613860944e-05, "loss": 1.3326, "mean_token_accuracy": 0.664582168062528, "num_tokens": 643328930.0, "step": 3833 }, { "entropy": 1.7539990444978077, "epoch": 0.4211913982038395, "grad_norm": 0.910399854183197, "learning_rate": 1.8505446137767984e-05, "loss": 1.4594, "mean_token_accuracy": 0.6541984180609385, "num_tokens": 643510121.0, "step": 3834 }, { "entropy": 1.7388847768306732, "epoch": 0.42130125511521244, "grad_norm": 0.6952354907989502, "learning_rate": 1.8504562424131035e-05, "loss": 1.5782, "mean_token_accuracy": 0.6242658942937851, "num_tokens": 643682378.0, "step": 3835 }, { "entropy": 1.7046812276045482, "epoch": 0.4214111120265854, "grad_norm": 0.7153732180595398, "learning_rate": 1.8503678472978072e-05, "loss": 1.5552, "mean_token_accuracy": 0.6502560079097748, "num_tokens": 643904701.0, "step": 3836 }, { "entropy": 1.7115220228830974, "epoch": 0.4215209689379583, "grad_norm": 0.7493833899497986, "learning_rate": 1.8502794284337063e-05, "loss": 1.3032, "mean_token_accuracy": 0.6713364919026693, "num_tokens": 644010789.0, "step": 3837 }, { "entropy": 1.7032727301120758, "epoch": 0.42163082584933126, "grad_norm": 0.7664533257484436, "learning_rate": 1.8501909858235996e-05, "loss": 1.2455, "mean_token_accuracy": 0.6849518169959387, "num_tokens": 644123734.0, "step": 3838 }, { "entropy": 1.7397787670294445, "epoch": 0.4217406827607042, "grad_norm": 0.5922208428382874, "learning_rate": 1.850102519470286e-05, "loss": 1.3723, "mean_token_accuracy": 0.6501923749844233, "num_tokens": 644283398.0, "step": 3839 }, { "entropy": 1.6781314810117085, "epoch": 0.42185053967207714, "grad_norm": 0.6052653193473816, "learning_rate": 1.8500140293765655e-05, "loss": 1.4668, "mean_token_accuracy": 0.6537490636110306, "num_tokens": 644454020.0, "step": 3840 }, { "entropy": 1.6567552785078685, "epoch": 0.4219603965834501, "grad_norm": 0.5396919846534729, "learning_rate": 1.8499255155452397e-05, "loss": 1.5281, "mean_token_accuracy": 0.6358696967363358, "num_tokens": 644708504.0, "step": 3841 }, { "entropy": 1.7547682126363118, "epoch": 0.42207025349482297, "grad_norm": 0.6936992406845093, "learning_rate": 1.8498369779791085e-05, "loss": 1.3401, "mean_token_accuracy": 0.6526836852232615, "num_tokens": 644878677.0, "step": 3842 }, { "entropy": 1.693197379509608, "epoch": 0.4221801104061959, "grad_norm": 0.7279648780822754, "learning_rate": 1.8497484166809752e-05, "loss": 1.3146, "mean_token_accuracy": 0.6758704036474228, "num_tokens": 645019940.0, "step": 3843 }, { "entropy": 1.69556125998497, "epoch": 0.42228996731756885, "grad_norm": 0.6791149377822876, "learning_rate": 1.8496598316536425e-05, "loss": 1.3299, "mean_token_accuracy": 0.6537687480449677, "num_tokens": 645159667.0, "step": 3844 }, { "entropy": 1.6956780850887299, "epoch": 0.4223998242289418, "grad_norm": 0.782292902469635, "learning_rate": 1.8495712228999138e-05, "loss": 1.3682, "mean_token_accuracy": 0.6713423679272333, "num_tokens": 645279821.0, "step": 3845 }, { "entropy": 1.728989193836848, "epoch": 0.42250968114031473, "grad_norm": 0.6862851977348328, "learning_rate": 1.8494825904225933e-05, "loss": 1.3393, "mean_token_accuracy": 0.663465549548467, "num_tokens": 645414111.0, "step": 3846 }, { "entropy": 1.7368880609671276, "epoch": 0.42261953805168767, "grad_norm": 1.014003872871399, "learning_rate": 1.8493939342244868e-05, "loss": 1.394, "mean_token_accuracy": 0.6592635711034139, "num_tokens": 645560546.0, "step": 3847 }, { "entropy": 1.675550679365794, "epoch": 0.4227293949630606, "grad_norm": 0.7420448660850525, "learning_rate": 1.8493052543084e-05, "loss": 1.2888, "mean_token_accuracy": 0.6641269127527872, "num_tokens": 645690733.0, "step": 3848 }, { "entropy": 1.6391872266928356, "epoch": 0.42283925187443355, "grad_norm": 0.7968411445617676, "learning_rate": 1.84921655067714e-05, "loss": 1.3562, "mean_token_accuracy": 0.6737810671329498, "num_tokens": 645852109.0, "step": 3849 }, { "entropy": 1.6818280915419261, "epoch": 0.4229491087858065, "grad_norm": 0.7735076546669006, "learning_rate": 1.849127823333513e-05, "loss": 1.4612, "mean_token_accuracy": 0.6462418337663015, "num_tokens": 646007934.0, "step": 3850 }, { "entropy": 1.7462623516718547, "epoch": 0.42305896569717943, "grad_norm": 0.7541219592094421, "learning_rate": 1.849039072280328e-05, "loss": 1.4529, "mean_token_accuracy": 0.6520050664742788, "num_tokens": 646170910.0, "step": 3851 }, { "entropy": 1.6965330342451732, "epoch": 0.4231688226085524, "grad_norm": 0.7076205611228943, "learning_rate": 1.8489502975203945e-05, "loss": 1.6429, "mean_token_accuracy": 0.6315357536077499, "num_tokens": 646372868.0, "step": 3852 }, { "entropy": 1.6979803641637166, "epoch": 0.4232786795199253, "grad_norm": 0.7357332706451416, "learning_rate": 1.8488614990565214e-05, "loss": 1.3529, "mean_token_accuracy": 0.6634115974108378, "num_tokens": 646543074.0, "step": 3853 }, { "entropy": 1.722920149564743, "epoch": 0.42338853643129826, "grad_norm": 0.8576663732528687, "learning_rate": 1.8487726768915192e-05, "loss": 1.5067, "mean_token_accuracy": 0.6446111053228378, "num_tokens": 646707309.0, "step": 3854 }, { "entropy": 1.7091480791568756, "epoch": 0.4234983933426712, "grad_norm": 0.6144663095474243, "learning_rate": 1.848683831028199e-05, "loss": 1.4944, "mean_token_accuracy": 0.6384020894765854, "num_tokens": 646886585.0, "step": 3855 }, { "entropy": 1.6747375428676605, "epoch": 0.4236082502540441, "grad_norm": 0.81898033618927, "learning_rate": 1.8485949614693727e-05, "loss": 1.1378, "mean_token_accuracy": 0.6897122313578924, "num_tokens": 647014518.0, "step": 3856 }, { "entropy": 1.7361929814020793, "epoch": 0.423718107165417, "grad_norm": 0.7163565158843994, "learning_rate": 1.8485060682178537e-05, "loss": 1.3322, "mean_token_accuracy": 0.6657196134328842, "num_tokens": 647213559.0, "step": 3857 }, { "entropy": 1.7410944600900014, "epoch": 0.42382796407678996, "grad_norm": 0.7472032904624939, "learning_rate": 1.848417151276455e-05, "loss": 1.3762, "mean_token_accuracy": 0.6659711500008901, "num_tokens": 647367678.0, "step": 3858 }, { "entropy": 1.681091417868932, "epoch": 0.4239378209881629, "grad_norm": 1.138753890991211, "learning_rate": 1.8483282106479902e-05, "loss": 1.4024, "mean_token_accuracy": 0.6617006063461304, "num_tokens": 647554358.0, "step": 3859 }, { "entropy": 1.7275851269563038, "epoch": 0.42404767789953585, "grad_norm": 0.6139320135116577, "learning_rate": 1.848239246335275e-05, "loss": 1.4733, "mean_token_accuracy": 0.6429044504960378, "num_tokens": 647728748.0, "step": 3860 }, { "entropy": 1.6915673911571503, "epoch": 0.4241575348109088, "grad_norm": 0.6831756234169006, "learning_rate": 1.8481502583411247e-05, "loss": 1.3334, "mean_token_accuracy": 0.6636428534984589, "num_tokens": 647878607.0, "step": 3861 }, { "entropy": 1.750975062449773, "epoch": 0.42426739172228173, "grad_norm": 0.7183418273925781, "learning_rate": 1.848061246668356e-05, "loss": 1.5977, "mean_token_accuracy": 0.6374113808075587, "num_tokens": 648024338.0, "step": 3862 }, { "entropy": 1.7088340322176616, "epoch": 0.42437724863365467, "grad_norm": 0.7100759744644165, "learning_rate": 1.847972211319786e-05, "loss": 1.3605, "mean_token_accuracy": 0.6605943193038305, "num_tokens": 648196447.0, "step": 3863 }, { "entropy": 1.6845079759756725, "epoch": 0.4244871055450276, "grad_norm": 0.687178909778595, "learning_rate": 1.8478831522982324e-05, "loss": 1.416, "mean_token_accuracy": 0.6500076999266943, "num_tokens": 648383053.0, "step": 3864 }, { "entropy": 1.7246264616648357, "epoch": 0.42459696245640055, "grad_norm": 0.7078330516815186, "learning_rate": 1.847794069606514e-05, "loss": 1.5448, "mean_token_accuracy": 0.6318613439798355, "num_tokens": 648584553.0, "step": 3865 }, { "entropy": 1.7536778251330059, "epoch": 0.4247068193677735, "grad_norm": 0.7490545511245728, "learning_rate": 1.8477049632474508e-05, "loss": 1.4708, "mean_token_accuracy": 0.6561292608579, "num_tokens": 648745917.0, "step": 3866 }, { "entropy": 1.6898792386054993, "epoch": 0.42481667627914643, "grad_norm": 0.7261310815811157, "learning_rate": 1.8476158332238617e-05, "loss": 1.3375, "mean_token_accuracy": 0.6690275917450587, "num_tokens": 648890450.0, "step": 3867 }, { "entropy": 1.710007667541504, "epoch": 0.4249265331905194, "grad_norm": 0.7561746835708618, "learning_rate": 1.8475266795385685e-05, "loss": 1.3708, "mean_token_accuracy": 0.6579129894574484, "num_tokens": 649015823.0, "step": 3868 }, { "entropy": 1.719689855972926, "epoch": 0.42503639010189226, "grad_norm": 0.7121495604515076, "learning_rate": 1.8474375021943932e-05, "loss": 1.2898, "mean_token_accuracy": 0.6680507610241572, "num_tokens": 649190115.0, "step": 3869 }, { "entropy": 1.7542536358038585, "epoch": 0.4251462470132652, "grad_norm": 0.7546373605728149, "learning_rate": 1.8473483011941574e-05, "loss": 1.3253, "mean_token_accuracy": 0.6552920093139013, "num_tokens": 649304401.0, "step": 3870 }, { "entropy": 1.716896414756775, "epoch": 0.42525610392463814, "grad_norm": 0.6879488825798035, "learning_rate": 1.8472590765406845e-05, "loss": 1.4352, "mean_token_accuracy": 0.6727662235498428, "num_tokens": 649459866.0, "step": 3871 }, { "entropy": 1.6816608607769012, "epoch": 0.4253659608360111, "grad_norm": 0.8443351984024048, "learning_rate": 1.847169828236799e-05, "loss": 1.3892, "mean_token_accuracy": 0.6620204697052637, "num_tokens": 649626901.0, "step": 3872 }, { "entropy": 1.740911195675532, "epoch": 0.425475817747384, "grad_norm": 0.7434075474739075, "learning_rate": 1.8470805562853244e-05, "loss": 1.5953, "mean_token_accuracy": 0.6165072818597158, "num_tokens": 649809343.0, "step": 3873 }, { "entropy": 1.7113256255785625, "epoch": 0.42558567465875696, "grad_norm": 0.8285733461380005, "learning_rate": 1.846991260689087e-05, "loss": 1.3399, "mean_token_accuracy": 0.665142834186554, "num_tokens": 649991640.0, "step": 3874 }, { "entropy": 1.6987220545609791, "epoch": 0.4256955315701299, "grad_norm": 0.6770405173301697, "learning_rate": 1.8469019414509136e-05, "loss": 1.4514, "mean_token_accuracy": 0.6441337664922079, "num_tokens": 650164246.0, "step": 3875 }, { "entropy": 1.7160173257191975, "epoch": 0.42580538848150284, "grad_norm": 0.7959917187690735, "learning_rate": 1.8468125985736295e-05, "loss": 1.5868, "mean_token_accuracy": 0.6459453006585439, "num_tokens": 650305891.0, "step": 3876 }, { "entropy": 1.6745306452115376, "epoch": 0.4259152453928758, "grad_norm": 0.6555696725845337, "learning_rate": 1.8467232320600638e-05, "loss": 1.4623, "mean_token_accuracy": 0.6556547085444132, "num_tokens": 650507562.0, "step": 3877 }, { "entropy": 1.7178467512130737, "epoch": 0.4260251023042487, "grad_norm": 0.6489704847335815, "learning_rate": 1.846633841913044e-05, "loss": 1.3224, "mean_token_accuracy": 0.6665374338626862, "num_tokens": 650667834.0, "step": 3878 }, { "entropy": 1.6714246372381847, "epoch": 0.42613495921562167, "grad_norm": 0.7068523168563843, "learning_rate": 1.8465444281353992e-05, "loss": 1.4546, "mean_token_accuracy": 0.6516213566064835, "num_tokens": 650840788.0, "step": 3879 }, { "entropy": 1.7186284760634105, "epoch": 0.4262448161269946, "grad_norm": 0.7457088828086853, "learning_rate": 1.84645499072996e-05, "loss": 1.2781, "mean_token_accuracy": 0.672358974814415, "num_tokens": 650941361.0, "step": 3880 }, { "entropy": 1.6859534084796906, "epoch": 0.42635467303836755, "grad_norm": 0.679847240447998, "learning_rate": 1.8463655296995567e-05, "loss": 1.3869, "mean_token_accuracy": 0.6544978270928065, "num_tokens": 651105965.0, "step": 3881 }, { "entropy": 1.7387334704399109, "epoch": 0.4264645299497405, "grad_norm": 0.8297735452651978, "learning_rate": 1.8462760450470207e-05, "loss": 1.4147, "mean_token_accuracy": 0.6685324857632319, "num_tokens": 651242560.0, "step": 3882 }, { "entropy": 1.808528443177541, "epoch": 0.4265743868611134, "grad_norm": 0.7231261730194092, "learning_rate": 1.846186536775184e-05, "loss": 1.5466, "mean_token_accuracy": 0.6377104272445043, "num_tokens": 651430287.0, "step": 3883 }, { "entropy": 1.7021079659461975, "epoch": 0.4266842437724863, "grad_norm": 0.7810244560241699, "learning_rate": 1.84609700488688e-05, "loss": 1.2778, "mean_token_accuracy": 0.6832303404808044, "num_tokens": 651559583.0, "step": 3884 }, { "entropy": 1.689590334892273, "epoch": 0.42679410068385926, "grad_norm": 0.6838991045951843, "learning_rate": 1.8460074493849416e-05, "loss": 1.2951, "mean_token_accuracy": 0.6713648786147436, "num_tokens": 651691973.0, "step": 3885 }, { "entropy": 1.714994877576828, "epoch": 0.4269039575952322, "grad_norm": 0.6699170470237732, "learning_rate": 1.8459178702722037e-05, "loss": 1.2707, "mean_token_accuracy": 0.6692277739445368, "num_tokens": 651816035.0, "step": 3886 }, { "entropy": 1.729461799065272, "epoch": 0.42701381450660514, "grad_norm": 0.6075051426887512, "learning_rate": 1.8458282675515016e-05, "loss": 1.3528, "mean_token_accuracy": 0.6616584012905756, "num_tokens": 652024333.0, "step": 3887 }, { "entropy": 1.6672471364339192, "epoch": 0.4271236714179781, "grad_norm": 0.6613723039627075, "learning_rate": 1.8457386412256704e-05, "loss": 1.3707, "mean_token_accuracy": 0.6597268283367157, "num_tokens": 652168775.0, "step": 3888 }, { "entropy": 1.662774880727132, "epoch": 0.427233528329351, "grad_norm": 0.7188613414764404, "learning_rate": 1.8456489912975477e-05, "loss": 1.4895, "mean_token_accuracy": 0.6598201990127563, "num_tokens": 652318630.0, "step": 3889 }, { "entropy": 1.7089114785194397, "epoch": 0.42734338524072396, "grad_norm": 0.7041028141975403, "learning_rate": 1.8455593177699704e-05, "loss": 1.4792, "mean_token_accuracy": 0.6515317956606547, "num_tokens": 652480939.0, "step": 3890 }, { "entropy": 1.706920713186264, "epoch": 0.4274532421520969, "grad_norm": 0.7197327613830566, "learning_rate": 1.845469620645776e-05, "loss": 1.441, "mean_token_accuracy": 0.6517567286888758, "num_tokens": 652646579.0, "step": 3891 }, { "entropy": 1.7612548073132832, "epoch": 0.42756309906346984, "grad_norm": 0.78521329164505, "learning_rate": 1.8453798999278047e-05, "loss": 1.5184, "mean_token_accuracy": 0.6430316617091497, "num_tokens": 652805441.0, "step": 3892 }, { "entropy": 1.711044450600942, "epoch": 0.4276729559748428, "grad_norm": 0.624224066734314, "learning_rate": 1.8452901556188952e-05, "loss": 1.4991, "mean_token_accuracy": 0.6483441591262817, "num_tokens": 652968733.0, "step": 3893 }, { "entropy": 1.7002309560775757, "epoch": 0.4277828128862157, "grad_norm": 0.6248944997787476, "learning_rate": 1.845200387721888e-05, "loss": 1.4019, "mean_token_accuracy": 0.6617890248696009, "num_tokens": 653141796.0, "step": 3894 }, { "entropy": 1.705695738395055, "epoch": 0.42789266979758867, "grad_norm": 0.6925023198127747, "learning_rate": 1.8451105962396247e-05, "loss": 1.3145, "mean_token_accuracy": 0.6632231523593267, "num_tokens": 653294696.0, "step": 3895 }, { "entropy": 1.751973956823349, "epoch": 0.42800252670896155, "grad_norm": 0.6819112300872803, "learning_rate": 1.845020781174947e-05, "loss": 1.3375, "mean_token_accuracy": 0.6607561757167181, "num_tokens": 653435104.0, "step": 3896 }, { "entropy": 1.686410774787267, "epoch": 0.4281123836203345, "grad_norm": 0.6627749800682068, "learning_rate": 1.8449309425306963e-05, "loss": 1.2681, "mean_token_accuracy": 0.6718742549419403, "num_tokens": 653540901.0, "step": 3897 }, { "entropy": 1.7340122958024342, "epoch": 0.42822224053170743, "grad_norm": 0.7688063979148865, "learning_rate": 1.8448410803097177e-05, "loss": 1.3647, "mean_token_accuracy": 0.6578405052423477, "num_tokens": 653723766.0, "step": 3898 }, { "entropy": 1.71702042222023, "epoch": 0.4283320974430804, "grad_norm": 0.7477086186408997, "learning_rate": 1.8447511945148544e-05, "loss": 1.3483, "mean_token_accuracy": 0.65413269897302, "num_tokens": 653885687.0, "step": 3899 }, { "entropy": 1.7188432812690735, "epoch": 0.4284419543544533, "grad_norm": 0.7725921273231506, "learning_rate": 1.8446612851489513e-05, "loss": 1.3839, "mean_token_accuracy": 0.6639880041281382, "num_tokens": 654016294.0, "step": 3900 }, { "entropy": 1.6381129622459412, "epoch": 0.42855181126582625, "grad_norm": 0.6253584027290344, "learning_rate": 1.844571352214854e-05, "loss": 1.3036, "mean_token_accuracy": 0.6645344942808151, "num_tokens": 654174210.0, "step": 3901 }, { "entropy": 1.7396024366219838, "epoch": 0.4286616681771992, "grad_norm": 0.6562190055847168, "learning_rate": 1.8444813957154094e-05, "loss": 1.4815, "mean_token_accuracy": 0.6449996630350748, "num_tokens": 654341703.0, "step": 3902 }, { "entropy": 1.7696809967358906, "epoch": 0.42877152508857214, "grad_norm": 0.7089744806289673, "learning_rate": 1.8443914156534636e-05, "loss": 1.338, "mean_token_accuracy": 0.664988378683726, "num_tokens": 654454220.0, "step": 3903 }, { "entropy": 1.7581712106863658, "epoch": 0.4288813819999451, "grad_norm": 0.7849205732345581, "learning_rate": 1.8443014120318653e-05, "loss": 1.2782, "mean_token_accuracy": 0.6888795097668966, "num_tokens": 654565966.0, "step": 3904 }, { "entropy": 1.6995848814646404, "epoch": 0.428991238911318, "grad_norm": 0.7430975437164307, "learning_rate": 1.844211384853462e-05, "loss": 1.5033, "mean_token_accuracy": 0.6458527992169062, "num_tokens": 654706153.0, "step": 3905 }, { "entropy": 1.7555510600407918, "epoch": 0.42910109582269096, "grad_norm": 0.6673111319541931, "learning_rate": 1.8441213341211042e-05, "loss": 1.3924, "mean_token_accuracy": 0.6652990728616714, "num_tokens": 654923563.0, "step": 3906 }, { "entropy": 1.6870744427045186, "epoch": 0.4292109527340639, "grad_norm": 0.6765327453613281, "learning_rate": 1.8440312598376417e-05, "loss": 1.4085, "mean_token_accuracy": 0.6474004884560903, "num_tokens": 655082747.0, "step": 3907 }, { "entropy": 1.8040563662846882, "epoch": 0.42932080964543684, "grad_norm": 0.7210556268692017, "learning_rate": 1.843941162005925e-05, "loss": 1.4306, "mean_token_accuracy": 0.648560548822085, "num_tokens": 655255550.0, "step": 3908 }, { "entropy": 1.730605661869049, "epoch": 0.4294306665568098, "grad_norm": 0.7293521165847778, "learning_rate": 1.8438510406288054e-05, "loss": 1.4941, "mean_token_accuracy": 0.6423317690690359, "num_tokens": 655410501.0, "step": 3909 }, { "entropy": 1.6763539016246796, "epoch": 0.42954052346818267, "grad_norm": 0.5762035250663757, "learning_rate": 1.8437608957091356e-05, "loss": 1.4347, "mean_token_accuracy": 0.6584045539299647, "num_tokens": 655612794.0, "step": 3910 }, { "entropy": 1.6941528419653575, "epoch": 0.4296503803795556, "grad_norm": 0.7608525156974792, "learning_rate": 1.8436707272497687e-05, "loss": 1.4372, "mean_token_accuracy": 0.6504789739847183, "num_tokens": 655775219.0, "step": 3911 }, { "entropy": 1.6654709080855052, "epoch": 0.42976023729092855, "grad_norm": 0.6557295918464661, "learning_rate": 1.8435805352535588e-05, "loss": 1.3395, "mean_token_accuracy": 0.6681905190149943, "num_tokens": 655919108.0, "step": 3912 }, { "entropy": 1.6901763478914897, "epoch": 0.4298700942023015, "grad_norm": 0.8068464994430542, "learning_rate": 1.8434903197233594e-05, "loss": 1.2511, "mean_token_accuracy": 0.6827605366706848, "num_tokens": 656042827.0, "step": 3913 }, { "entropy": 1.7511335114638011, "epoch": 0.42997995111367443, "grad_norm": 0.7122650146484375, "learning_rate": 1.843400080662027e-05, "loss": 1.4982, "mean_token_accuracy": 0.6384455660978953, "num_tokens": 656224813.0, "step": 3914 }, { "entropy": 1.7839158376057942, "epoch": 0.43008980802504737, "grad_norm": 0.581892192363739, "learning_rate": 1.8433098180724165e-05, "loss": 1.4154, "mean_token_accuracy": 0.6490354090929031, "num_tokens": 656425344.0, "step": 3915 }, { "entropy": 1.7053211430708568, "epoch": 0.4301996649364203, "grad_norm": 0.8363472819328308, "learning_rate": 1.8432195319573855e-05, "loss": 1.399, "mean_token_accuracy": 0.6475622256596884, "num_tokens": 656602663.0, "step": 3916 }, { "entropy": 1.711295525232951, "epoch": 0.43030952184779325, "grad_norm": 0.8222143054008484, "learning_rate": 1.843129222319791e-05, "loss": 1.5267, "mean_token_accuracy": 0.6486119305094084, "num_tokens": 656744412.0, "step": 3917 }, { "entropy": 1.703042854865392, "epoch": 0.4304193787591662, "grad_norm": 0.612709641456604, "learning_rate": 1.8430388891624915e-05, "loss": 1.505, "mean_token_accuracy": 0.6417644868294398, "num_tokens": 656950229.0, "step": 3918 }, { "entropy": 1.6889925201733906, "epoch": 0.43052923567053913, "grad_norm": 0.711890459060669, "learning_rate": 1.8429485324883464e-05, "loss": 1.4388, "mean_token_accuracy": 0.6587661306063334, "num_tokens": 657105618.0, "step": 3919 }, { "entropy": 1.688792844613393, "epoch": 0.4306390925819121, "grad_norm": 0.6642475128173828, "learning_rate": 1.8428581523002146e-05, "loss": 1.3509, "mean_token_accuracy": 0.6492424011230469, "num_tokens": 657267221.0, "step": 3920 }, { "entropy": 1.6864960094292958, "epoch": 0.430748949493285, "grad_norm": 0.6970334053039551, "learning_rate": 1.842767748600957e-05, "loss": 1.5943, "mean_token_accuracy": 0.6339181611935297, "num_tokens": 657467480.0, "step": 3921 }, { "entropy": 1.7039388716220856, "epoch": 0.43085880640465796, "grad_norm": 0.6595334410667419, "learning_rate": 1.842677321393435e-05, "loss": 1.4437, "mean_token_accuracy": 0.6527088582515717, "num_tokens": 657643535.0, "step": 3922 }, { "entropy": 1.7325763603051503, "epoch": 0.43096866331603084, "grad_norm": 0.7706548571586609, "learning_rate": 1.8425868706805103e-05, "loss": 1.5503, "mean_token_accuracy": 0.6559620996316274, "num_tokens": 657831508.0, "step": 3923 }, { "entropy": 1.6889927089214325, "epoch": 0.4310785202274038, "grad_norm": 0.8752990365028381, "learning_rate": 1.842496396465046e-05, "loss": 1.4015, "mean_token_accuracy": 0.6509895920753479, "num_tokens": 658002447.0, "step": 3924 }, { "entropy": 1.6617620189984639, "epoch": 0.4311883771387767, "grad_norm": 1.0145397186279297, "learning_rate": 1.842405898749905e-05, "loss": 1.3561, "mean_token_accuracy": 0.6678018321593603, "num_tokens": 658178958.0, "step": 3925 }, { "entropy": 1.7041081885496776, "epoch": 0.43129823405014966, "grad_norm": 0.8633971214294434, "learning_rate": 1.842315377537952e-05, "loss": 1.2597, "mean_token_accuracy": 0.6791882663965225, "num_tokens": 658312769.0, "step": 3926 }, { "entropy": 1.6727077662944794, "epoch": 0.4314080909615226, "grad_norm": 0.7873262166976929, "learning_rate": 1.842224832832052e-05, "loss": 1.3808, "mean_token_accuracy": 0.6544581105311712, "num_tokens": 658453178.0, "step": 3927 }, { "entropy": 1.7058905164400737, "epoch": 0.43151794787289555, "grad_norm": 0.6460711359977722, "learning_rate": 1.8421342646350704e-05, "loss": 1.36, "mean_token_accuracy": 0.6649837543567022, "num_tokens": 658667304.0, "step": 3928 }, { "entropy": 1.6966310739517212, "epoch": 0.4316278047842685, "grad_norm": 0.675844669342041, "learning_rate": 1.8420436729498736e-05, "loss": 1.4495, "mean_token_accuracy": 0.6551729142665863, "num_tokens": 658824408.0, "step": 3929 }, { "entropy": 1.7804734110832214, "epoch": 0.43173766169564143, "grad_norm": 0.5847294926643372, "learning_rate": 1.841953057779329e-05, "loss": 1.4555, "mean_token_accuracy": 0.6308817764123281, "num_tokens": 659070341.0, "step": 3930 }, { "entropy": 1.775219549735387, "epoch": 0.43184751860701437, "grad_norm": 0.6830585598945618, "learning_rate": 1.8418624191263047e-05, "loss": 1.552, "mean_token_accuracy": 0.6412175844113032, "num_tokens": 659256021.0, "step": 3931 }, { "entropy": 1.711199273665746, "epoch": 0.4319573755183873, "grad_norm": 0.7030532956123352, "learning_rate": 1.8417717569936688e-05, "loss": 1.4169, "mean_token_accuracy": 0.654847651720047, "num_tokens": 659433299.0, "step": 3932 }, { "entropy": 1.7225093841552734, "epoch": 0.43206723242976025, "grad_norm": 0.7642560601234436, "learning_rate": 1.841681071384291e-05, "loss": 1.5811, "mean_token_accuracy": 0.6175984516739845, "num_tokens": 659652811.0, "step": 3933 }, { "entropy": 1.6770412425200145, "epoch": 0.4321770893411332, "grad_norm": 0.634323000907898, "learning_rate": 1.8415903623010415e-05, "loss": 1.5018, "mean_token_accuracy": 0.6280664106210073, "num_tokens": 659865805.0, "step": 3934 }, { "entropy": 1.7322843472162883, "epoch": 0.43228694625250613, "grad_norm": 0.9006372094154358, "learning_rate": 1.8414996297467917e-05, "loss": 1.3899, "mean_token_accuracy": 0.6665350049734116, "num_tokens": 660052306.0, "step": 3935 }, { "entropy": 1.7359480261802673, "epoch": 0.4323968031638791, "grad_norm": 0.6146532297134399, "learning_rate": 1.841408873724412e-05, "loss": 1.5926, "mean_token_accuracy": 0.6407150129477183, "num_tokens": 660243568.0, "step": 3936 }, { "entropy": 1.7125855485598247, "epoch": 0.43250666007525196, "grad_norm": 0.718245267868042, "learning_rate": 1.841318094236776e-05, "loss": 1.4883, "mean_token_accuracy": 0.6494996398687363, "num_tokens": 660530282.0, "step": 3937 }, { "entropy": 1.749464641014735, "epoch": 0.4326165169866249, "grad_norm": 0.7096315622329712, "learning_rate": 1.8412272912867563e-05, "loss": 1.3765, "mean_token_accuracy": 0.6703631083170573, "num_tokens": 660667046.0, "step": 3938 }, { "entropy": 1.7773485879103343, "epoch": 0.43272637389799784, "grad_norm": 0.9372892379760742, "learning_rate": 1.8411364648772268e-05, "loss": 1.6595, "mean_token_accuracy": 0.6321366230646769, "num_tokens": 660833411.0, "step": 3939 }, { "entropy": 1.7520179847876232, "epoch": 0.4328362308093708, "grad_norm": 0.6285591721534729, "learning_rate": 1.841045615011062e-05, "loss": 1.3919, "mean_token_accuracy": 0.6557580778996149, "num_tokens": 661011775.0, "step": 3940 }, { "entropy": 1.736931214729945, "epoch": 0.4329460877207437, "grad_norm": 0.7110692262649536, "learning_rate": 1.8409547416911378e-05, "loss": 1.4228, "mean_token_accuracy": 0.6697671810785929, "num_tokens": 661139457.0, "step": 3941 }, { "entropy": 1.7010645965735118, "epoch": 0.43305594463211666, "grad_norm": 0.6413223743438721, "learning_rate": 1.8408638449203296e-05, "loss": 1.3613, "mean_token_accuracy": 0.6522543032964071, "num_tokens": 661303901.0, "step": 3942 }, { "entropy": 1.7435734967390697, "epoch": 0.4331658015434896, "grad_norm": 0.7069315910339355, "learning_rate": 1.8407729247015146e-05, "loss": 1.6427, "mean_token_accuracy": 0.619743749499321, "num_tokens": 661521048.0, "step": 3943 }, { "entropy": 1.6970693071683247, "epoch": 0.43327565845486254, "grad_norm": 0.7432729601860046, "learning_rate": 1.8406819810375706e-05, "loss": 1.4804, "mean_token_accuracy": 0.6531406243642172, "num_tokens": 661687623.0, "step": 3944 }, { "entropy": 1.770354559024175, "epoch": 0.4333855153662355, "grad_norm": 0.6008591651916504, "learning_rate": 1.840591013931375e-05, "loss": 1.3533, "mean_token_accuracy": 0.655795618891716, "num_tokens": 661876251.0, "step": 3945 }, { "entropy": 1.770843635002772, "epoch": 0.4334953722776084, "grad_norm": 0.7439899444580078, "learning_rate": 1.8405000233858083e-05, "loss": 1.4859, "mean_token_accuracy": 0.6457716375589371, "num_tokens": 662002220.0, "step": 3946 }, { "entropy": 1.7036712368329365, "epoch": 0.43360522918898137, "grad_norm": 0.6265702247619629, "learning_rate": 1.8404090094037488e-05, "loss": 1.536, "mean_token_accuracy": 0.6389161348342896, "num_tokens": 662199953.0, "step": 3947 }, { "entropy": 1.6811943848927815, "epoch": 0.4337150861003543, "grad_norm": 0.6257259249687195, "learning_rate": 1.8403179719880782e-05, "loss": 1.3708, "mean_token_accuracy": 0.6628421694040298, "num_tokens": 662393764.0, "step": 3948 }, { "entropy": 1.6686974863211315, "epoch": 0.43382494301172725, "grad_norm": 0.6457135081291199, "learning_rate": 1.8402269111416776e-05, "loss": 1.5583, "mean_token_accuracy": 0.6230059290925661, "num_tokens": 662600456.0, "step": 3949 }, { "entropy": 1.7240578730901082, "epoch": 0.4339347999231002, "grad_norm": 0.6716305017471313, "learning_rate": 1.8401358268674282e-05, "loss": 1.5154, "mean_token_accuracy": 0.6316413730382919, "num_tokens": 662798175.0, "step": 3950 }, { "entropy": 1.711452802022298, "epoch": 0.4340446568344731, "grad_norm": 0.7926602363586426, "learning_rate": 1.840044719168214e-05, "loss": 1.4417, "mean_token_accuracy": 0.6603543410698572, "num_tokens": 662943808.0, "step": 3951 }, { "entropy": 1.692206621170044, "epoch": 0.434154513745846, "grad_norm": 0.7351948022842407, "learning_rate": 1.8399535880469174e-05, "loss": 1.3929, "mean_token_accuracy": 0.6664231171210607, "num_tokens": 663096189.0, "step": 3952 }, { "entropy": 1.6513939301172893, "epoch": 0.43426437065721896, "grad_norm": 0.9081413149833679, "learning_rate": 1.8398624335064234e-05, "loss": 1.5123, "mean_token_accuracy": 0.6428524355093638, "num_tokens": 663262484.0, "step": 3953 }, { "entropy": 1.6838249266147614, "epoch": 0.4343742275685919, "grad_norm": 0.6735399961471558, "learning_rate": 1.839771255549617e-05, "loss": 1.3138, "mean_token_accuracy": 0.6686491419871649, "num_tokens": 663412036.0, "step": 3954 }, { "entropy": 1.705833335717519, "epoch": 0.43448408447996484, "grad_norm": 0.6810485124588013, "learning_rate": 1.8396800541793837e-05, "loss": 1.3139, "mean_token_accuracy": 0.6651433457930883, "num_tokens": 663526186.0, "step": 3955 }, { "entropy": 1.729738712310791, "epoch": 0.4345939413913378, "grad_norm": 0.7022128701210022, "learning_rate": 1.8395888293986096e-05, "loss": 1.3078, "mean_token_accuracy": 0.6703850577274958, "num_tokens": 663670981.0, "step": 3956 }, { "entropy": 1.701034019390742, "epoch": 0.4347037983027107, "grad_norm": 0.6978190541267395, "learning_rate": 1.8394975812101824e-05, "loss": 1.4422, "mean_token_accuracy": 0.6494102279345194, "num_tokens": 663868779.0, "step": 3957 }, { "entropy": 1.7295649846394856, "epoch": 0.43481365521408366, "grad_norm": 0.6927241683006287, "learning_rate": 1.8394063096169904e-05, "loss": 1.3859, "mean_token_accuracy": 0.6548526287078857, "num_tokens": 664041829.0, "step": 3958 }, { "entropy": 1.7171918253103893, "epoch": 0.4349235121254566, "grad_norm": 0.745561420917511, "learning_rate": 1.8393150146219214e-05, "loss": 1.4569, "mean_token_accuracy": 0.6500047942002615, "num_tokens": 664221934.0, "step": 3959 }, { "entropy": 1.6659683287143707, "epoch": 0.43503336903682954, "grad_norm": 0.6515949368476868, "learning_rate": 1.8392236962278656e-05, "loss": 1.3887, "mean_token_accuracy": 0.6540011912584305, "num_tokens": 664382086.0, "step": 3960 }, { "entropy": 1.6935789982477825, "epoch": 0.4351432259482025, "grad_norm": 0.6703075766563416, "learning_rate": 1.839132354437713e-05, "loss": 1.4385, "mean_token_accuracy": 0.6582238326470057, "num_tokens": 664527955.0, "step": 3961 }, { "entropy": 1.7064592937628429, "epoch": 0.4352530828595754, "grad_norm": 0.6275252103805542, "learning_rate": 1.839040989254354e-05, "loss": 1.346, "mean_token_accuracy": 0.6551967660586039, "num_tokens": 664697710.0, "step": 3962 }, { "entropy": 1.7683529754479725, "epoch": 0.43536293977094837, "grad_norm": 0.7211574912071228, "learning_rate": 1.838949600680681e-05, "loss": 1.374, "mean_token_accuracy": 0.6625560075044632, "num_tokens": 664850356.0, "step": 3963 }, { "entropy": 1.7406555513540904, "epoch": 0.43547279668232125, "grad_norm": 0.6910867094993591, "learning_rate": 1.838858188719586e-05, "loss": 1.2189, "mean_token_accuracy": 0.6768646488587061, "num_tokens": 664945901.0, "step": 3964 }, { "entropy": 1.746801644563675, "epoch": 0.4355826535936942, "grad_norm": 0.7130513787269592, "learning_rate": 1.8387667533739627e-05, "loss": 1.3185, "mean_token_accuracy": 0.6610147307316462, "num_tokens": 665097327.0, "step": 3965 }, { "entropy": 1.7367882827917736, "epoch": 0.43569251050506713, "grad_norm": 0.7409621477127075, "learning_rate": 1.8386752946467043e-05, "loss": 1.3773, "mean_token_accuracy": 0.6539178440968195, "num_tokens": 665328976.0, "step": 3966 }, { "entropy": 1.6604821781317394, "epoch": 0.4358023674164401, "grad_norm": 0.6217142343521118, "learning_rate": 1.8385838125407053e-05, "loss": 1.3066, "mean_token_accuracy": 0.6670824587345123, "num_tokens": 665484819.0, "step": 3967 }, { "entropy": 1.637445976336797, "epoch": 0.435912224327813, "grad_norm": 0.6176252365112305, "learning_rate": 1.838492307058862e-05, "loss": 1.409, "mean_token_accuracy": 0.6582680841286978, "num_tokens": 665638564.0, "step": 3968 }, { "entropy": 1.7330170969168346, "epoch": 0.43602208123918595, "grad_norm": 0.8453408479690552, "learning_rate": 1.8384007782040693e-05, "loss": 1.3667, "mean_token_accuracy": 0.6572331438461939, "num_tokens": 665789481.0, "step": 3969 }, { "entropy": 1.7294095953305562, "epoch": 0.4361319381505589, "grad_norm": 0.7358590960502625, "learning_rate": 1.8383092259792254e-05, "loss": 1.3653, "mean_token_accuracy": 0.6577903230985006, "num_tokens": 665932917.0, "step": 3970 }, { "entropy": 1.735178271929423, "epoch": 0.43624179506193184, "grad_norm": 0.6520756483078003, "learning_rate": 1.8382176503872266e-05, "loss": 1.3795, "mean_token_accuracy": 0.6685875505208969, "num_tokens": 666070750.0, "step": 3971 }, { "entropy": 1.7012029190858204, "epoch": 0.4363516519733048, "grad_norm": 0.640774130821228, "learning_rate": 1.8381260514309722e-05, "loss": 1.6331, "mean_token_accuracy": 0.6277043521404266, "num_tokens": 666299387.0, "step": 3972 }, { "entropy": 1.7609250446160634, "epoch": 0.4364615088846777, "grad_norm": 0.6770412921905518, "learning_rate": 1.838034429113361e-05, "loss": 1.3659, "mean_token_accuracy": 0.6573913991451263, "num_tokens": 666416424.0, "step": 3973 }, { "entropy": 1.7060511807600658, "epoch": 0.43657136579605066, "grad_norm": 0.7520270943641663, "learning_rate": 1.837942783437292e-05, "loss": 1.527, "mean_token_accuracy": 0.670257126291593, "num_tokens": 666584190.0, "step": 3974 }, { "entropy": 1.722565899292628, "epoch": 0.4366812227074236, "grad_norm": 0.6892067193984985, "learning_rate": 1.8378511144056673e-05, "loss": 1.4379, "mean_token_accuracy": 0.6610093315442404, "num_tokens": 666777502.0, "step": 3975 }, { "entropy": 1.7519932488600414, "epoch": 0.43679107961879654, "grad_norm": 0.701661229133606, "learning_rate": 1.8377594220213867e-05, "loss": 1.5047, "mean_token_accuracy": 0.6375877310832342, "num_tokens": 666962588.0, "step": 3976 }, { "entropy": 1.7207889755566914, "epoch": 0.4369009365301695, "grad_norm": 0.705245316028595, "learning_rate": 1.837667706287353e-05, "loss": 1.4913, "mean_token_accuracy": 0.6380893290042877, "num_tokens": 667138508.0, "step": 3977 }, { "entropy": 1.7350668410460155, "epoch": 0.43701079344154237, "grad_norm": 0.6259267330169678, "learning_rate": 1.837575967206469e-05, "loss": 1.3351, "mean_token_accuracy": 0.6648585498332977, "num_tokens": 667306171.0, "step": 3978 }, { "entropy": 1.6554445624351501, "epoch": 0.4371206503529153, "grad_norm": 0.7273040413856506, "learning_rate": 1.837484204781638e-05, "loss": 1.5635, "mean_token_accuracy": 0.639080340663592, "num_tokens": 667481940.0, "step": 3979 }, { "entropy": 1.6817783216635387, "epoch": 0.43723050726428825, "grad_norm": 0.6706110835075378, "learning_rate": 1.837392419015764e-05, "loss": 1.2797, "mean_token_accuracy": 0.6793971409400305, "num_tokens": 667638137.0, "step": 3980 }, { "entropy": 1.6714449326197307, "epoch": 0.4373403641756612, "grad_norm": 0.6944328546524048, "learning_rate": 1.837300609911752e-05, "loss": 1.4153, "mean_token_accuracy": 0.6490057408809662, "num_tokens": 667801298.0, "step": 3981 }, { "entropy": 1.7301402886708577, "epoch": 0.43745022108703413, "grad_norm": 0.6663272380828857, "learning_rate": 1.8372087774725086e-05, "loss": 1.3182, "mean_token_accuracy": 0.6672502309083939, "num_tokens": 667935582.0, "step": 3982 }, { "entropy": 1.6425547401110332, "epoch": 0.43756007799840707, "grad_norm": 0.6449568271636963, "learning_rate": 1.837116921700939e-05, "loss": 1.3805, "mean_token_accuracy": 0.6671392023563385, "num_tokens": 668108676.0, "step": 3983 }, { "entropy": 1.7166595160961151, "epoch": 0.43766993490978, "grad_norm": 0.7882794737815857, "learning_rate": 1.8370250425999513e-05, "loss": 1.5201, "mean_token_accuracy": 0.6302592655022939, "num_tokens": 668335947.0, "step": 3984 }, { "entropy": 1.6976383328437805, "epoch": 0.43777979182115295, "grad_norm": 0.7272980809211731, "learning_rate": 1.836933140172453e-05, "loss": 1.3725, "mean_token_accuracy": 0.6644467264413834, "num_tokens": 668487476.0, "step": 3985 }, { "entropy": 1.708019107580185, "epoch": 0.4378896487325259, "grad_norm": 0.6897531151771545, "learning_rate": 1.8368412144213527e-05, "loss": 1.3867, "mean_token_accuracy": 0.6588212251663208, "num_tokens": 668685652.0, "step": 3986 }, { "entropy": 1.7161248624324799, "epoch": 0.43799950564389883, "grad_norm": 0.6213241815567017, "learning_rate": 1.8367492653495603e-05, "loss": 1.3569, "mean_token_accuracy": 0.6537472208340963, "num_tokens": 668831434.0, "step": 3987 }, { "entropy": 1.6518361568450928, "epoch": 0.4381093625552718, "grad_norm": 0.6153174042701721, "learning_rate": 1.8366572929599853e-05, "loss": 1.418, "mean_token_accuracy": 0.6647394746541977, "num_tokens": 669014444.0, "step": 3988 }, { "entropy": 1.764929711818695, "epoch": 0.4382192194666447, "grad_norm": 0.789162278175354, "learning_rate": 1.8365652972555395e-05, "loss": 1.214, "mean_token_accuracy": 0.6773978173732758, "num_tokens": 669117485.0, "step": 3989 }, { "entropy": 1.732424944639206, "epoch": 0.43832907637801766, "grad_norm": 0.7520033121109009, "learning_rate": 1.836473278239133e-05, "loss": 1.3209, "mean_token_accuracy": 0.6744516342878342, "num_tokens": 669273942.0, "step": 3990 }, { "entropy": 1.7452230354150136, "epoch": 0.43843893328939054, "grad_norm": 1.1524012088775635, "learning_rate": 1.83638123591368e-05, "loss": 1.1884, "mean_token_accuracy": 0.662539561589559, "num_tokens": 669456919.0, "step": 3991 }, { "entropy": 1.7065231601397197, "epoch": 0.4385487902007635, "grad_norm": 0.8054308891296387, "learning_rate": 1.8362891702820928e-05, "loss": 1.4485, "mean_token_accuracy": 0.6653054704268774, "num_tokens": 669595872.0, "step": 3992 }, { "entropy": 1.6997679869333904, "epoch": 0.4386586471121364, "grad_norm": 0.6387143731117249, "learning_rate": 1.8361970813472847e-05, "loss": 1.4909, "mean_token_accuracy": 0.6452397058407465, "num_tokens": 669834698.0, "step": 3993 }, { "entropy": 1.6552885274092357, "epoch": 0.43876850402350936, "grad_norm": 0.659257173538208, "learning_rate": 1.8361049691121703e-05, "loss": 1.3508, "mean_token_accuracy": 0.6857950339714686, "num_tokens": 670012914.0, "step": 3994 }, { "entropy": 1.7258447209994, "epoch": 0.4388783609348823, "grad_norm": 0.6504175066947937, "learning_rate": 1.836012833579666e-05, "loss": 1.3421, "mean_token_accuracy": 0.6709899504979452, "num_tokens": 670167144.0, "step": 3995 }, { "entropy": 1.6821411649386089, "epoch": 0.43898821784625525, "grad_norm": 0.7438737750053406, "learning_rate": 1.835920674752687e-05, "loss": 1.4678, "mean_token_accuracy": 0.6426598926385244, "num_tokens": 670364528.0, "step": 3996 }, { "entropy": 1.6863566239674885, "epoch": 0.4390980747576282, "grad_norm": 0.6272345185279846, "learning_rate": 1.8358284926341502e-05, "loss": 1.3699, "mean_token_accuracy": 0.6651715586582819, "num_tokens": 670559676.0, "step": 3997 }, { "entropy": 1.7127248346805573, "epoch": 0.43920793166900113, "grad_norm": 0.7947668433189392, "learning_rate": 1.835736287226973e-05, "loss": 1.5277, "mean_token_accuracy": 0.6522035598754883, "num_tokens": 670751739.0, "step": 3998 }, { "entropy": 1.6697716514269512, "epoch": 0.43931778858037407, "grad_norm": 0.7031921744346619, "learning_rate": 1.835644058534074e-05, "loss": 1.4262, "mean_token_accuracy": 0.668352390329043, "num_tokens": 670926584.0, "step": 3999 }, { "entropy": 1.7474531829357147, "epoch": 0.439427645491747, "grad_norm": 0.8411499261856079, "learning_rate": 1.8355518065583725e-05, "loss": 1.3682, "mean_token_accuracy": 0.6587245215972265, "num_tokens": 671068877.0, "step": 4000 }, { "entropy": 1.7535376648108165, "epoch": 0.43953750240311995, "grad_norm": 0.6565456986427307, "learning_rate": 1.835459531302787e-05, "loss": 1.4578, "mean_token_accuracy": 0.636372705300649, "num_tokens": 671261086.0, "step": 4001 }, { "entropy": 1.7117730776468914, "epoch": 0.4396473593144929, "grad_norm": 0.7486881613731384, "learning_rate": 1.835367232770239e-05, "loss": 1.4796, "mean_token_accuracy": 0.6628977358341217, "num_tokens": 671432376.0, "step": 4002 }, { "entropy": 1.6681561470031738, "epoch": 0.43975721622586583, "grad_norm": 0.7439094185829163, "learning_rate": 1.8352749109636498e-05, "loss": 1.5121, "mean_token_accuracy": 0.6407729138930639, "num_tokens": 671633326.0, "step": 4003 }, { "entropy": 1.726431320110957, "epoch": 0.4398670731372388, "grad_norm": 0.6899568438529968, "learning_rate": 1.8351825658859405e-05, "loss": 1.3619, "mean_token_accuracy": 0.6614230573177338, "num_tokens": 671759712.0, "step": 4004 }, { "entropy": 1.7370514472325642, "epoch": 0.43997693004861166, "grad_norm": 0.6520119905471802, "learning_rate": 1.8350901975400347e-05, "loss": 1.3351, "mean_token_accuracy": 0.6576328774293264, "num_tokens": 671896888.0, "step": 4005 }, { "entropy": 1.6817982296148937, "epoch": 0.4400867869599846, "grad_norm": 0.6815257668495178, "learning_rate": 1.834997805928855e-05, "loss": 1.3744, "mean_token_accuracy": 0.6502506881952286, "num_tokens": 672042565.0, "step": 4006 }, { "entropy": 1.7248376111189525, "epoch": 0.44019664387135754, "grad_norm": 0.5518606901168823, "learning_rate": 1.8349053910553264e-05, "loss": 1.4485, "mean_token_accuracy": 0.6401482870181402, "num_tokens": 672283150.0, "step": 4007 }, { "entropy": 1.7700274089972179, "epoch": 0.4403065007827305, "grad_norm": 0.85428386926651, "learning_rate": 1.834812952922373e-05, "loss": 1.4366, "mean_token_accuracy": 0.64705158273379, "num_tokens": 672418954.0, "step": 4008 }, { "entropy": 1.574070413907369, "epoch": 0.4404163576941034, "grad_norm": 0.7439473867416382, "learning_rate": 1.8347204915329207e-05, "loss": 1.2231, "mean_token_accuracy": 0.6847544858853022, "num_tokens": 672559063.0, "step": 4009 }, { "entropy": 1.7344237864017487, "epoch": 0.44052621460547636, "grad_norm": 0.6516211628913879, "learning_rate": 1.834628006889896e-05, "loss": 1.3989, "mean_token_accuracy": 0.6455797751744589, "num_tokens": 672704524.0, "step": 4010 }, { "entropy": 1.7220177451769512, "epoch": 0.4406360715168493, "grad_norm": 0.6377677321434021, "learning_rate": 1.8345354989962262e-05, "loss": 1.4016, "mean_token_accuracy": 0.6649409184853236, "num_tokens": 672869556.0, "step": 4011 }, { "entropy": 1.7253966728846233, "epoch": 0.44074592842822224, "grad_norm": 0.7619969844818115, "learning_rate": 1.834442967854838e-05, "loss": 1.3548, "mean_token_accuracy": 0.6682330717643102, "num_tokens": 673023121.0, "step": 4012 }, { "entropy": 1.6592314541339874, "epoch": 0.4408557853395952, "grad_norm": 0.8172975778579712, "learning_rate": 1.834350413468662e-05, "loss": 1.3785, "mean_token_accuracy": 0.6685704290866852, "num_tokens": 673202298.0, "step": 4013 }, { "entropy": 1.6940802733103435, "epoch": 0.4409656422509681, "grad_norm": 0.8062915802001953, "learning_rate": 1.8342578358406253e-05, "loss": 1.446, "mean_token_accuracy": 0.6594598790009817, "num_tokens": 673377211.0, "step": 4014 }, { "entropy": 1.7303107678890228, "epoch": 0.44107549916234107, "grad_norm": 0.7482844591140747, "learning_rate": 1.8341652349736593e-05, "loss": 1.4081, "mean_token_accuracy": 0.6585040787855784, "num_tokens": 673529505.0, "step": 4015 }, { "entropy": 1.7489991386731465, "epoch": 0.441185356073714, "grad_norm": 0.7208905220031738, "learning_rate": 1.8340726108706948e-05, "loss": 1.4499, "mean_token_accuracy": 0.6458181291818619, "num_tokens": 673705981.0, "step": 4016 }, { "entropy": 1.708936224381129, "epoch": 0.44129521298508695, "grad_norm": 0.6100684404373169, "learning_rate": 1.8339799635346624e-05, "loss": 1.4808, "mean_token_accuracy": 0.6417776246865591, "num_tokens": 673975367.0, "step": 4017 }, { "entropy": 1.6850675543149312, "epoch": 0.44140506989645983, "grad_norm": 0.5926774740219116, "learning_rate": 1.8338872929684953e-05, "loss": 1.4223, "mean_token_accuracy": 0.651220291852951, "num_tokens": 674168873.0, "step": 4018 }, { "entropy": 1.709786633650462, "epoch": 0.4415149268078328, "grad_norm": 0.8183510899543762, "learning_rate": 1.833794599175126e-05, "loss": 1.3911, "mean_token_accuracy": 0.6542472541332245, "num_tokens": 674343048.0, "step": 4019 }, { "entropy": 1.7094794114430745, "epoch": 0.4416247837192057, "grad_norm": 0.7141227126121521, "learning_rate": 1.833701882157488e-05, "loss": 1.43, "mean_token_accuracy": 0.6631141652663549, "num_tokens": 674505930.0, "step": 4020 }, { "entropy": 1.7598174810409546, "epoch": 0.44173464063057866, "grad_norm": 0.6930931210517883, "learning_rate": 1.833609141918516e-05, "loss": 1.4248, "mean_token_accuracy": 0.6471636444330215, "num_tokens": 674681600.0, "step": 4021 }, { "entropy": 1.7052448689937592, "epoch": 0.4418444975419516, "grad_norm": 0.748052716255188, "learning_rate": 1.833516378461146e-05, "loss": 1.4185, "mean_token_accuracy": 0.6527615735928217, "num_tokens": 674852988.0, "step": 4022 }, { "entropy": 1.735913723707199, "epoch": 0.44195435445332454, "grad_norm": 0.8284699320793152, "learning_rate": 1.8334235917883124e-05, "loss": 1.5755, "mean_token_accuracy": 0.6278869633873304, "num_tokens": 675030743.0, "step": 4023 }, { "entropy": 1.718154142300288, "epoch": 0.4420642113646975, "grad_norm": 0.8008006811141968, "learning_rate": 1.833330781902953e-05, "loss": 1.258, "mean_token_accuracy": 0.6701284448305765, "num_tokens": 675149913.0, "step": 4024 }, { "entropy": 1.6997297902901967, "epoch": 0.4421740682760704, "grad_norm": 0.9283497929573059, "learning_rate": 1.8332379488080046e-05, "loss": 1.1681, "mean_token_accuracy": 0.6950256576140722, "num_tokens": 675290297.0, "step": 4025 }, { "entropy": 1.7370579838752747, "epoch": 0.44228392518744336, "grad_norm": 0.7015495896339417, "learning_rate": 1.8331450925064057e-05, "loss": 1.3353, "mean_token_accuracy": 0.6572895298401514, "num_tokens": 675441895.0, "step": 4026 }, { "entropy": 1.638562301794688, "epoch": 0.4423937820988163, "grad_norm": 0.6312068700790405, "learning_rate": 1.833052213001095e-05, "loss": 1.271, "mean_token_accuracy": 0.6829714129368464, "num_tokens": 675571939.0, "step": 4027 }, { "entropy": 1.763380487759908, "epoch": 0.44250363901018924, "grad_norm": 0.7643721699714661, "learning_rate": 1.8329593102950115e-05, "loss": 1.4964, "mean_token_accuracy": 0.6557734707991282, "num_tokens": 675760546.0, "step": 4028 }, { "entropy": 1.675294816493988, "epoch": 0.4426134959215622, "grad_norm": 0.7117913365364075, "learning_rate": 1.832866384391097e-05, "loss": 1.3794, "mean_token_accuracy": 0.6646661460399628, "num_tokens": 675909939.0, "step": 4029 }, { "entropy": 1.7222477793693542, "epoch": 0.4427233528329351, "grad_norm": 0.609602153301239, "learning_rate": 1.8327734352922912e-05, "loss": 1.385, "mean_token_accuracy": 0.6593077381451925, "num_tokens": 676074765.0, "step": 4030 }, { "entropy": 1.7006352543830872, "epoch": 0.44283320974430807, "grad_norm": 0.6742071509361267, "learning_rate": 1.8326804630015364e-05, "loss": 1.4513, "mean_token_accuracy": 0.6537482092777888, "num_tokens": 676233903.0, "step": 4031 }, { "entropy": 1.672971785068512, "epoch": 0.44294306665568095, "grad_norm": 0.7731028199195862, "learning_rate": 1.8325874675217747e-05, "loss": 1.293, "mean_token_accuracy": 0.6704902996619543, "num_tokens": 676397595.0, "step": 4032 }, { "entropy": 1.7384453018506367, "epoch": 0.4430529235670539, "grad_norm": 0.9834579825401306, "learning_rate": 1.8324944488559505e-05, "loss": 1.5189, "mean_token_accuracy": 0.6412886679172516, "num_tokens": 676566969.0, "step": 4033 }, { "entropy": 1.7224073906739552, "epoch": 0.44316278047842683, "grad_norm": 0.6622791290283203, "learning_rate": 1.8324014070070063e-05, "loss": 1.563, "mean_token_accuracy": 0.6388835261265436, "num_tokens": 676803944.0, "step": 4034 }, { "entropy": 1.7606834868590038, "epoch": 0.4432726373897998, "grad_norm": 0.6880962252616882, "learning_rate": 1.832308341977888e-05, "loss": 1.3521, "mean_token_accuracy": 0.6607374300559362, "num_tokens": 676954542.0, "step": 4035 }, { "entropy": 1.6744478940963745, "epoch": 0.4433824943011727, "grad_norm": 0.7972778081893921, "learning_rate": 1.8322152537715408e-05, "loss": 1.4395, "mean_token_accuracy": 0.6546510507663091, "num_tokens": 677134397.0, "step": 4036 }, { "entropy": 1.6546966234842937, "epoch": 0.44349235121254565, "grad_norm": 0.7038325667381287, "learning_rate": 1.8321221423909105e-05, "loss": 1.2629, "mean_token_accuracy": 0.6734778136014938, "num_tokens": 677303311.0, "step": 4037 }, { "entropy": 1.744826744000117, "epoch": 0.4436022081239186, "grad_norm": 0.739396333694458, "learning_rate": 1.8320290078389448e-05, "loss": 1.5118, "mean_token_accuracy": 0.65053657690684, "num_tokens": 677507834.0, "step": 4038 }, { "entropy": 1.6409766773382823, "epoch": 0.44371206503529154, "grad_norm": 0.7920038104057312, "learning_rate": 1.8319358501185903e-05, "loss": 1.5389, "mean_token_accuracy": 0.6472673763831457, "num_tokens": 677656509.0, "step": 4039 }, { "entropy": 1.7347593108812969, "epoch": 0.4438219219466645, "grad_norm": 0.7497395873069763, "learning_rate": 1.8318426692327958e-05, "loss": 1.5772, "mean_token_accuracy": 0.6470949848492941, "num_tokens": 677792553.0, "step": 4040 }, { "entropy": 1.7003744939963024, "epoch": 0.4439317788580374, "grad_norm": 0.6454471945762634, "learning_rate": 1.8317494651845113e-05, "loss": 1.3954, "mean_token_accuracy": 0.6603303998708725, "num_tokens": 677975581.0, "step": 4041 }, { "entropy": 1.724554717540741, "epoch": 0.44404163576941036, "grad_norm": 0.7715175747871399, "learning_rate": 1.8316562379766855e-05, "loss": 1.6706, "mean_token_accuracy": 0.6182306359211603, "num_tokens": 678156142.0, "step": 4042 }, { "entropy": 1.7756297886371613, "epoch": 0.4441514926807833, "grad_norm": 0.7351700663566589, "learning_rate": 1.83156298761227e-05, "loss": 1.5132, "mean_token_accuracy": 0.6380604902903239, "num_tokens": 678314615.0, "step": 4043 }, { "entropy": 1.6692375938097637, "epoch": 0.44426134959215624, "grad_norm": 0.7419458627700806, "learning_rate": 1.831469714094215e-05, "loss": 1.3315, "mean_token_accuracy": 0.6649828652540842, "num_tokens": 678494083.0, "step": 4044 }, { "entropy": 1.7391583820184071, "epoch": 0.4443712065035291, "grad_norm": 0.6613411903381348, "learning_rate": 1.831376417425473e-05, "loss": 1.4028, "mean_token_accuracy": 0.6531075437863668, "num_tokens": 678688690.0, "step": 4045 }, { "entropy": 1.7318035662174225, "epoch": 0.44448106341490207, "grad_norm": 0.6976780295372009, "learning_rate": 1.831283097608997e-05, "loss": 1.429, "mean_token_accuracy": 0.6665694663921992, "num_tokens": 678819678.0, "step": 4046 }, { "entropy": 1.6295676430066426, "epoch": 0.444590920326275, "grad_norm": 0.6185345649719238, "learning_rate": 1.8311897546477412e-05, "loss": 1.3514, "mean_token_accuracy": 0.6613521029551824, "num_tokens": 679001480.0, "step": 4047 }, { "entropy": 1.7244457403818767, "epoch": 0.44470077723764795, "grad_norm": 0.7285200953483582, "learning_rate": 1.831096388544659e-05, "loss": 1.5472, "mean_token_accuracy": 0.6468717704216639, "num_tokens": 679229552.0, "step": 4048 }, { "entropy": 1.6660625040531158, "epoch": 0.4448106341490209, "grad_norm": 0.7275331020355225, "learning_rate": 1.831002999302705e-05, "loss": 1.3838, "mean_token_accuracy": 0.6649215320746104, "num_tokens": 679408172.0, "step": 4049 }, { "entropy": 1.6905015210310619, "epoch": 0.44492049106039383, "grad_norm": 0.7201270461082458, "learning_rate": 1.8309095869248355e-05, "loss": 1.3025, "mean_token_accuracy": 0.671828548113505, "num_tokens": 679548700.0, "step": 4050 }, { "entropy": 1.6812767088413239, "epoch": 0.44503034797176677, "grad_norm": 0.666533887386322, "learning_rate": 1.8308161514140073e-05, "loss": 1.2311, "mean_token_accuracy": 0.6841448297103246, "num_tokens": 679682542.0, "step": 4051 }, { "entropy": 1.6990590194861095, "epoch": 0.4451402048831397, "grad_norm": 0.7694803476333618, "learning_rate": 1.8307226927731773e-05, "loss": 1.508, "mean_token_accuracy": 0.653964231411616, "num_tokens": 679890013.0, "step": 4052 }, { "entropy": 1.6936483283837636, "epoch": 0.44525006179451265, "grad_norm": 0.7189439535140991, "learning_rate": 1.830629211005303e-05, "loss": 1.3196, "mean_token_accuracy": 0.6643483489751816, "num_tokens": 680032399.0, "step": 4053 }, { "entropy": 1.7283145984013875, "epoch": 0.4453599187058856, "grad_norm": 0.6931325793266296, "learning_rate": 1.8305357061133432e-05, "loss": 1.2627, "mean_token_accuracy": 0.6824038575092951, "num_tokens": 680186468.0, "step": 4054 }, { "entropy": 1.7289324204126995, "epoch": 0.44546977561725853, "grad_norm": 0.7332465052604675, "learning_rate": 1.830442178100258e-05, "loss": 1.3448, "mean_token_accuracy": 0.6571111728747686, "num_tokens": 680332193.0, "step": 4055 }, { "entropy": 1.7295528848965962, "epoch": 0.4455796325286315, "grad_norm": 0.6440022587776184, "learning_rate": 1.830348626969007e-05, "loss": 1.3409, "mean_token_accuracy": 0.6647644688685735, "num_tokens": 680522384.0, "step": 4056 }, { "entropy": 1.6799738903840382, "epoch": 0.4456894894400044, "grad_norm": 0.6439666152000427, "learning_rate": 1.8302550527225507e-05, "loss": 1.4989, "mean_token_accuracy": 0.6489834437767664, "num_tokens": 680717915.0, "step": 4057 }, { "entropy": 1.7007416983445485, "epoch": 0.44579934635137736, "grad_norm": 0.5994968414306641, "learning_rate": 1.830161455363851e-05, "loss": 1.3362, "mean_token_accuracy": 0.6613183865944544, "num_tokens": 680932364.0, "step": 4058 }, { "entropy": 1.7307079831759136, "epoch": 0.44590920326275024, "grad_norm": 0.772515058517456, "learning_rate": 1.8300678348958708e-05, "loss": 1.5584, "mean_token_accuracy": 0.6598212644457817, "num_tokens": 681086770.0, "step": 4059 }, { "entropy": 1.7124249339103699, "epoch": 0.4460190601741232, "grad_norm": 0.6902133822441101, "learning_rate": 1.829974191321572e-05, "loss": 1.3396, "mean_token_accuracy": 0.6813353697458903, "num_tokens": 681208149.0, "step": 4060 }, { "entropy": 1.6109780669212341, "epoch": 0.4461289170854961, "grad_norm": 0.7003684639930725, "learning_rate": 1.8298805246439197e-05, "loss": 1.3003, "mean_token_accuracy": 0.6696944236755371, "num_tokens": 681407580.0, "step": 4061 }, { "entropy": 1.68620361884435, "epoch": 0.44623877399686906, "grad_norm": 0.8141494393348694, "learning_rate": 1.829786834865877e-05, "loss": 1.3527, "mean_token_accuracy": 0.6631912092367808, "num_tokens": 681612026.0, "step": 4062 }, { "entropy": 1.6939865350723267, "epoch": 0.446348630908242, "grad_norm": 0.75359046459198, "learning_rate": 1.82969312199041e-05, "loss": 1.34, "mean_token_accuracy": 0.6642651607592901, "num_tokens": 681769299.0, "step": 4063 }, { "entropy": 1.7100327412287395, "epoch": 0.44645848781961495, "grad_norm": 0.5752301812171936, "learning_rate": 1.8295993860204845e-05, "loss": 1.5232, "mean_token_accuracy": 0.6351617823044459, "num_tokens": 682005797.0, "step": 4064 }, { "entropy": 1.6280939678351085, "epoch": 0.4465683447309879, "grad_norm": 0.8501309156417847, "learning_rate": 1.8295056269590675e-05, "loss": 1.3887, "mean_token_accuracy": 0.6680960903565089, "num_tokens": 682239395.0, "step": 4065 }, { "entropy": 1.6829663415749867, "epoch": 0.44667820164236083, "grad_norm": 0.7033583521842957, "learning_rate": 1.8294118448091255e-05, "loss": 1.3061, "mean_token_accuracy": 0.6647171477476755, "num_tokens": 682383725.0, "step": 4066 }, { "entropy": 1.7133256395657857, "epoch": 0.44678805855373377, "grad_norm": 0.630029559135437, "learning_rate": 1.8293180395736278e-05, "loss": 1.5028, "mean_token_accuracy": 0.6593478719393412, "num_tokens": 682577842.0, "step": 4067 }, { "entropy": 1.6594026386737823, "epoch": 0.4468979154651067, "grad_norm": 0.7004885077476501, "learning_rate": 1.8292242112555428e-05, "loss": 1.4813, "mean_token_accuracy": 0.6722660760084788, "num_tokens": 682739696.0, "step": 4068 }, { "entropy": 1.6464967628320057, "epoch": 0.44700777237647965, "grad_norm": 0.6789165139198303, "learning_rate": 1.82913035985784e-05, "loss": 1.3377, "mean_token_accuracy": 0.6721230993668238, "num_tokens": 682906514.0, "step": 4069 }, { "entropy": 1.6850634415944417, "epoch": 0.4471176292878526, "grad_norm": 0.6883268356323242, "learning_rate": 1.8290364853834898e-05, "loss": 1.4961, "mean_token_accuracy": 0.6561469584703445, "num_tokens": 683089692.0, "step": 4070 }, { "entropy": 1.722246805826823, "epoch": 0.44722748619922553, "grad_norm": 0.7800368070602417, "learning_rate": 1.8289425878354633e-05, "loss": 1.495, "mean_token_accuracy": 0.6425358355045319, "num_tokens": 683271437.0, "step": 4071 }, { "entropy": 1.6442756354808807, "epoch": 0.4473373431105985, "grad_norm": 0.5757925510406494, "learning_rate": 1.8288486672167327e-05, "loss": 1.4154, "mean_token_accuracy": 0.6556447048981985, "num_tokens": 683521616.0, "step": 4072 }, { "entropy": 1.7113324999809265, "epoch": 0.44744720002197136, "grad_norm": 0.6120255589485168, "learning_rate": 1.82875472353027e-05, "loss": 1.3365, "mean_token_accuracy": 0.6572584211826324, "num_tokens": 683677858.0, "step": 4073 }, { "entropy": 1.756430298089981, "epoch": 0.4475570569333443, "grad_norm": 0.9076440334320068, "learning_rate": 1.8286607567790485e-05, "loss": 1.3104, "mean_token_accuracy": 0.6704193005959193, "num_tokens": 683788448.0, "step": 4074 }, { "entropy": 1.6399111052354176, "epoch": 0.44766691384471724, "grad_norm": 0.6672569513320923, "learning_rate": 1.8285667669660426e-05, "loss": 1.3935, "mean_token_accuracy": 0.6672036250432333, "num_tokens": 683930395.0, "step": 4075 }, { "entropy": 1.6177968084812164, "epoch": 0.4477767707560902, "grad_norm": 0.5370674729347229, "learning_rate": 1.8284727540942266e-05, "loss": 1.3163, "mean_token_accuracy": 0.6624527275562286, "num_tokens": 684129876.0, "step": 4076 }, { "entropy": 1.7107476492722828, "epoch": 0.4478866276674631, "grad_norm": 0.6975926756858826, "learning_rate": 1.8283787181665766e-05, "loss": 1.4658, "mean_token_accuracy": 0.6649006853501002, "num_tokens": 684294327.0, "step": 4077 }, { "entropy": 1.7037384510040283, "epoch": 0.44799648457883606, "grad_norm": 0.7005517482757568, "learning_rate": 1.828284659186068e-05, "loss": 1.2692, "mean_token_accuracy": 0.6762935618559519, "num_tokens": 684409485.0, "step": 4078 }, { "entropy": 1.7332588632901509, "epoch": 0.448106341490209, "grad_norm": 0.7170990109443665, "learning_rate": 1.828190577155678e-05, "loss": 1.3511, "mean_token_accuracy": 0.655609572927157, "num_tokens": 684573662.0, "step": 4079 }, { "entropy": 1.656719873348872, "epoch": 0.44821619840158194, "grad_norm": 0.7201644778251648, "learning_rate": 1.8280964720783847e-05, "loss": 1.4354, "mean_token_accuracy": 0.6612338771422704, "num_tokens": 684745344.0, "step": 4080 }, { "entropy": 1.6805977523326874, "epoch": 0.4483260553129549, "grad_norm": 0.6354820728302002, "learning_rate": 1.8280023439571662e-05, "loss": 1.5477, "mean_token_accuracy": 0.6504618128140768, "num_tokens": 684920877.0, "step": 4081 }, { "entropy": 1.6839341123898823, "epoch": 0.4484359122243278, "grad_norm": 0.6117289662361145, "learning_rate": 1.8279081927950012e-05, "loss": 1.3716, "mean_token_accuracy": 0.6628950238227844, "num_tokens": 685094960.0, "step": 4082 }, { "entropy": 1.6351311802864075, "epoch": 0.44854576913570077, "grad_norm": 0.8039343357086182, "learning_rate": 1.8278140185948706e-05, "loss": 1.4611, "mean_token_accuracy": 0.6522148499886194, "num_tokens": 685258463.0, "step": 4083 }, { "entropy": 1.7667676905790966, "epoch": 0.4486556260470737, "grad_norm": 0.9782058000564575, "learning_rate": 1.8277198213597535e-05, "loss": 1.3842, "mean_token_accuracy": 0.655416414141655, "num_tokens": 685386946.0, "step": 4084 }, { "entropy": 1.6758897999922435, "epoch": 0.44876548295844665, "grad_norm": 0.6297981142997742, "learning_rate": 1.8276256010926325e-05, "loss": 1.2551, "mean_token_accuracy": 0.6806664168834686, "num_tokens": 685520402.0, "step": 4085 }, { "entropy": 1.729985237121582, "epoch": 0.44887533986981953, "grad_norm": 0.7002821564674377, "learning_rate": 1.8275313577964885e-05, "loss": 1.2529, "mean_token_accuracy": 0.6838051875432333, "num_tokens": 685648637.0, "step": 4086 }, { "entropy": 1.7272369960943859, "epoch": 0.4489851967811925, "grad_norm": 0.5778807401657104, "learning_rate": 1.8274370914743054e-05, "loss": 1.3942, "mean_token_accuracy": 0.6614800641934077, "num_tokens": 685844253.0, "step": 4087 }, { "entropy": 1.717577338218689, "epoch": 0.4490950536925654, "grad_norm": 0.8240344524383545, "learning_rate": 1.8273428021290658e-05, "loss": 1.5095, "mean_token_accuracy": 0.6495123704274496, "num_tokens": 686028134.0, "step": 4088 }, { "entropy": 1.7241438726584117, "epoch": 0.44920491060393836, "grad_norm": 0.6883000731468201, "learning_rate": 1.8272484897637546e-05, "loss": 1.3816, "mean_token_accuracy": 0.6627028236786524, "num_tokens": 686191080.0, "step": 4089 }, { "entropy": 1.6412847638130188, "epoch": 0.4493147675153113, "grad_norm": 0.6836743950843811, "learning_rate": 1.827154154381356e-05, "loss": 1.5132, "mean_token_accuracy": 0.653800850113233, "num_tokens": 686350524.0, "step": 4090 }, { "entropy": 1.6573506991068523, "epoch": 0.44942462442668424, "grad_norm": 0.6218001246452332, "learning_rate": 1.8270597959848563e-05, "loss": 1.3456, "mean_token_accuracy": 0.6675258924563726, "num_tokens": 686518644.0, "step": 4091 }, { "entropy": 1.709700067838033, "epoch": 0.4495344813380572, "grad_norm": 0.6943197846412659, "learning_rate": 1.826965414577242e-05, "loss": 1.2814, "mean_token_accuracy": 0.6749483694632848, "num_tokens": 686666877.0, "step": 4092 }, { "entropy": 1.6798059542973836, "epoch": 0.4496443382494301, "grad_norm": 0.8047642707824707, "learning_rate": 1.8268710101614996e-05, "loss": 1.4749, "mean_token_accuracy": 0.6598973522583643, "num_tokens": 686831962.0, "step": 4093 }, { "entropy": 1.720873127381007, "epoch": 0.44975419516080306, "grad_norm": 0.7131839394569397, "learning_rate": 1.8267765827406173e-05, "loss": 1.3472, "mean_token_accuracy": 0.6661649147669474, "num_tokens": 686961662.0, "step": 4094 }, { "entropy": 1.7046670416990917, "epoch": 0.449864052072176, "grad_norm": 0.6924872398376465, "learning_rate": 1.8266821323175833e-05, "loss": 1.3393, "mean_token_accuracy": 0.6566531558831533, "num_tokens": 687125536.0, "step": 4095 }, { "entropy": 1.6233412722746532, "epoch": 0.44997390898354894, "grad_norm": 0.6866830587387085, "learning_rate": 1.826587658895388e-05, "loss": 1.3427, "mean_token_accuracy": 0.6586346874634424, "num_tokens": 687304023.0, "step": 4096 }, { "entropy": 1.659420023361842, "epoch": 0.4500837658949219, "grad_norm": 0.7031760811805725, "learning_rate": 1.8264931624770198e-05, "loss": 1.4316, "mean_token_accuracy": 0.6656246980031332, "num_tokens": 687457564.0, "step": 4097 }, { "entropy": 1.7746712168057759, "epoch": 0.4501936228062948, "grad_norm": 0.6770200729370117, "learning_rate": 1.8263986430654713e-05, "loss": 1.4262, "mean_token_accuracy": 0.654616062839826, "num_tokens": 687663560.0, "step": 4098 }, { "entropy": 1.6433165371418, "epoch": 0.45030347971766777, "grad_norm": 0.6647341251373291, "learning_rate": 1.8263041006637326e-05, "loss": 1.3283, "mean_token_accuracy": 0.6662708769241968, "num_tokens": 687804204.0, "step": 4099 }, { "entropy": 1.6864981253941853, "epoch": 0.45041333662904065, "grad_norm": 0.8306163549423218, "learning_rate": 1.8262095352747964e-05, "loss": 1.4174, "mean_token_accuracy": 0.673849806189537, "num_tokens": 687948844.0, "step": 4100 }, { "entropy": 1.6836271584033966, "epoch": 0.4505231935404136, "grad_norm": 0.712296187877655, "learning_rate": 1.8261149469016554e-05, "loss": 1.435, "mean_token_accuracy": 0.6426206976175308, "num_tokens": 688141646.0, "step": 4101 }, { "entropy": 1.702450027068456, "epoch": 0.45063305045178653, "grad_norm": 0.6855894923210144, "learning_rate": 1.826020335547304e-05, "loss": 1.4706, "mean_token_accuracy": 0.6506734440724055, "num_tokens": 688304546.0, "step": 4102 }, { "entropy": 1.7510944306850433, "epoch": 0.4507429073631595, "grad_norm": 0.7090582251548767, "learning_rate": 1.825925701214736e-05, "loss": 1.5314, "mean_token_accuracy": 0.6348803093036016, "num_tokens": 688491140.0, "step": 4103 }, { "entropy": 1.6313609679539998, "epoch": 0.4508527642745324, "grad_norm": 0.6523467898368835, "learning_rate": 1.8258310439069464e-05, "loss": 1.2844, "mean_token_accuracy": 0.6678755730390549, "num_tokens": 688670616.0, "step": 4104 }, { "entropy": 1.6910007297992706, "epoch": 0.45096262118590535, "grad_norm": 0.7028467655181885, "learning_rate": 1.8257363636269315e-05, "loss": 1.6276, "mean_token_accuracy": 0.622983917593956, "num_tokens": 688883798.0, "step": 4105 }, { "entropy": 1.735917756954829, "epoch": 0.4510724780972783, "grad_norm": 0.6216316819190979, "learning_rate": 1.825641660377688e-05, "loss": 1.39, "mean_token_accuracy": 0.643417959411939, "num_tokens": 689099236.0, "step": 4106 }, { "entropy": 1.7116466561953227, "epoch": 0.45118233500865124, "grad_norm": 0.639937698841095, "learning_rate": 1.8255469341622127e-05, "loss": 1.2285, "mean_token_accuracy": 0.6806729783614477, "num_tokens": 689222746.0, "step": 4107 }, { "entropy": 1.7362925708293915, "epoch": 0.4512921919200242, "grad_norm": 0.6595767140388489, "learning_rate": 1.8254521849835038e-05, "loss": 1.4364, "mean_token_accuracy": 0.6543554663658142, "num_tokens": 689427218.0, "step": 4108 }, { "entropy": 1.6418420473734539, "epoch": 0.4514020488313971, "grad_norm": 0.7125058770179749, "learning_rate": 1.82535741284456e-05, "loss": 1.4369, "mean_token_accuracy": 0.6766296525796255, "num_tokens": 689584306.0, "step": 4109 }, { "entropy": 1.709335704644521, "epoch": 0.45151190574277006, "grad_norm": 0.6635110974311829, "learning_rate": 1.825262617748381e-05, "loss": 1.3638, "mean_token_accuracy": 0.668940449754397, "num_tokens": 689719332.0, "step": 4110 }, { "entropy": 1.6440646350383759, "epoch": 0.451621762654143, "grad_norm": 0.6006796956062317, "learning_rate": 1.8251677996979674e-05, "loss": 1.3163, "mean_token_accuracy": 0.671658530831337, "num_tokens": 689892119.0, "step": 4111 }, { "entropy": 1.6859375437100728, "epoch": 0.45173161956551594, "grad_norm": 0.8752461075782776, "learning_rate": 1.825072958696319e-05, "loss": 1.4466, "mean_token_accuracy": 0.6580664763847986, "num_tokens": 690084462.0, "step": 4112 }, { "entropy": 1.6572500467300415, "epoch": 0.4518414764768888, "grad_norm": 0.7441008687019348, "learning_rate": 1.8249780947464388e-05, "loss": 1.2938, "mean_token_accuracy": 0.6691722124814987, "num_tokens": 690248438.0, "step": 4113 }, { "entropy": 1.738774597644806, "epoch": 0.45195133338826177, "grad_norm": 0.6293894648551941, "learning_rate": 1.8248832078513284e-05, "loss": 1.5194, "mean_token_accuracy": 0.6342577387889227, "num_tokens": 690441913.0, "step": 4114 }, { "entropy": 1.7527674436569214, "epoch": 0.4520611902996347, "grad_norm": 1.1343533992767334, "learning_rate": 1.824788298013991e-05, "loss": 1.3726, "mean_token_accuracy": 0.6623023301362991, "num_tokens": 690558299.0, "step": 4115 }, { "entropy": 1.6640233397483826, "epoch": 0.45217104721100765, "grad_norm": 0.6992958784103394, "learning_rate": 1.8246933652374307e-05, "loss": 1.2844, "mean_token_accuracy": 0.6788427929083506, "num_tokens": 690739947.0, "step": 4116 }, { "entropy": 1.7419516444206238, "epoch": 0.4522809041223806, "grad_norm": 0.5939506888389587, "learning_rate": 1.8245984095246518e-05, "loss": 1.4716, "mean_token_accuracy": 0.6305443296829859, "num_tokens": 690997943.0, "step": 4117 }, { "entropy": 1.748884916305542, "epoch": 0.45239076103375353, "grad_norm": 1.0354598760604858, "learning_rate": 1.8245034308786598e-05, "loss": 1.4619, "mean_token_accuracy": 0.6558974186579386, "num_tokens": 691153064.0, "step": 4118 }, { "entropy": 1.6696610649426777, "epoch": 0.45250061794512647, "grad_norm": 0.8129194974899292, "learning_rate": 1.8244084293024607e-05, "loss": 1.3371, "mean_token_accuracy": 0.6734591573476791, "num_tokens": 691283368.0, "step": 4119 }, { "entropy": 1.7136721312999725, "epoch": 0.4526104748564994, "grad_norm": 0.7549412250518799, "learning_rate": 1.8243134047990615e-05, "loss": 1.5517, "mean_token_accuracy": 0.6566175570090612, "num_tokens": 691452676.0, "step": 4120 }, { "entropy": 1.6813490390777588, "epoch": 0.45272033176787235, "grad_norm": 0.7577283978462219, "learning_rate": 1.824218357371469e-05, "loss": 1.2974, "mean_token_accuracy": 0.6722496549288431, "num_tokens": 691597131.0, "step": 4121 }, { "entropy": 1.713281015555064, "epoch": 0.4528301886792453, "grad_norm": 0.7372899651527405, "learning_rate": 1.824123287022692e-05, "loss": 1.4402, "mean_token_accuracy": 0.6518440991640091, "num_tokens": 691743841.0, "step": 4122 }, { "entropy": 1.6881952385107677, "epoch": 0.45294004559061823, "grad_norm": 0.5627362728118896, "learning_rate": 1.824028193755739e-05, "loss": 1.4554, "mean_token_accuracy": 0.6475944221019745, "num_tokens": 691960827.0, "step": 4123 }, { "entropy": 1.7053532501061757, "epoch": 0.4530499025019912, "grad_norm": 0.7508504986763, "learning_rate": 1.8239330775736208e-05, "loss": 1.4518, "mean_token_accuracy": 0.660346490641435, "num_tokens": 692117432.0, "step": 4124 }, { "entropy": 1.7332323094209034, "epoch": 0.4531597594133641, "grad_norm": 0.7479596734046936, "learning_rate": 1.823837938479346e-05, "loss": 1.3183, "mean_token_accuracy": 0.6600077897310257, "num_tokens": 692233378.0, "step": 4125 }, { "entropy": 1.7280583083629608, "epoch": 0.45326961632473706, "grad_norm": 0.7284284234046936, "learning_rate": 1.8237427764759268e-05, "loss": 1.2877, "mean_token_accuracy": 0.6728298515081406, "num_tokens": 692352078.0, "step": 4126 }, { "entropy": 1.733269860347112, "epoch": 0.45337947323610994, "grad_norm": 0.6703977584838867, "learning_rate": 1.823647591566375e-05, "loss": 1.318, "mean_token_accuracy": 0.6564703285694122, "num_tokens": 692524229.0, "step": 4127 }, { "entropy": 1.7730626364549, "epoch": 0.4534893301474829, "grad_norm": 0.720513105392456, "learning_rate": 1.823552383753703e-05, "loss": 1.5289, "mean_token_accuracy": 0.6443274269501368, "num_tokens": 692654697.0, "step": 4128 }, { "entropy": 1.6843830545743306, "epoch": 0.4535991870588558, "grad_norm": 0.6629505157470703, "learning_rate": 1.823457153040924e-05, "loss": 1.4347, "mean_token_accuracy": 0.6500236590703329, "num_tokens": 692822773.0, "step": 4129 }, { "entropy": 1.6885010202725728, "epoch": 0.45370904397022876, "grad_norm": 0.7359088659286499, "learning_rate": 1.823361899431052e-05, "loss": 1.1937, "mean_token_accuracy": 0.6897448152303696, "num_tokens": 692937863.0, "step": 4130 }, { "entropy": 1.6834450860818226, "epoch": 0.4538189008816017, "grad_norm": 0.6505681276321411, "learning_rate": 1.8232666229271022e-05, "loss": 1.4981, "mean_token_accuracy": 0.6411355634530386, "num_tokens": 693128486.0, "step": 4131 }, { "entropy": 1.67915278673172, "epoch": 0.45392875779297465, "grad_norm": 0.6337352991104126, "learning_rate": 1.8231713235320897e-05, "loss": 1.4664, "mean_token_accuracy": 0.6389060864845911, "num_tokens": 693290525.0, "step": 4132 }, { "entropy": 1.7464499572912853, "epoch": 0.4540386147043476, "grad_norm": 0.6891757249832153, "learning_rate": 1.8230760012490303e-05, "loss": 1.4274, "mean_token_accuracy": 0.6466375986735026, "num_tokens": 693428652.0, "step": 4133 }, { "entropy": 1.6625401278336842, "epoch": 0.45414847161572053, "grad_norm": 0.6760391592979431, "learning_rate": 1.8229806560809414e-05, "loss": 1.2088, "mean_token_accuracy": 0.6958808700243632, "num_tokens": 693542006.0, "step": 4134 }, { "entropy": 1.6763292451699574, "epoch": 0.45425832852709347, "grad_norm": 0.6257210373878479, "learning_rate": 1.8228852880308406e-05, "loss": 1.3335, "mean_token_accuracy": 0.655666912595431, "num_tokens": 693691484.0, "step": 4135 }, { "entropy": 1.6853256324927013, "epoch": 0.4543681854384664, "grad_norm": 0.7661891579627991, "learning_rate": 1.8227898971017463e-05, "loss": 1.3239, "mean_token_accuracy": 0.6566215753555298, "num_tokens": 693824382.0, "step": 4136 }, { "entropy": 1.7048971951007843, "epoch": 0.45447804234983935, "grad_norm": 0.5919292569160461, "learning_rate": 1.822694483296677e-05, "loss": 1.4844, "mean_token_accuracy": 0.6445303509632746, "num_tokens": 694012674.0, "step": 4137 }, { "entropy": 1.7726793487866719, "epoch": 0.4545878992612123, "grad_norm": 0.8488749265670776, "learning_rate": 1.8225990466186535e-05, "loss": 1.3119, "mean_token_accuracy": 0.6763729850451151, "num_tokens": 694131834.0, "step": 4138 }, { "entropy": 1.6792495449384053, "epoch": 0.45469775617258523, "grad_norm": 0.6812113523483276, "learning_rate": 1.8225035870706954e-05, "loss": 1.3836, "mean_token_accuracy": 0.6785061955451965, "num_tokens": 694265200.0, "step": 4139 }, { "entropy": 1.7176821033159893, "epoch": 0.4548076130839581, "grad_norm": 0.6587532162666321, "learning_rate": 1.8224081046558245e-05, "loss": 1.2966, "mean_token_accuracy": 0.6683917393287023, "num_tokens": 694386638.0, "step": 4140 }, { "entropy": 1.720909317334493, "epoch": 0.45491746999533106, "grad_norm": 0.6936853528022766, "learning_rate": 1.8223125993770628e-05, "loss": 1.2505, "mean_token_accuracy": 0.6704763223727545, "num_tokens": 694537910.0, "step": 4141 }, { "entropy": 1.616556574900945, "epoch": 0.455027326906704, "grad_norm": 0.6663881540298462, "learning_rate": 1.8222170712374324e-05, "loss": 1.4531, "mean_token_accuracy": 0.6384978145360947, "num_tokens": 694748185.0, "step": 4142 }, { "entropy": 1.7091066340605419, "epoch": 0.45513718381807694, "grad_norm": 0.6039077043533325, "learning_rate": 1.8221215202399575e-05, "loss": 1.4285, "mean_token_accuracy": 0.6513800273338953, "num_tokens": 694907770.0, "step": 4143 }, { "entropy": 1.7064509391784668, "epoch": 0.4552470407294499, "grad_norm": 0.6601234674453735, "learning_rate": 1.8220259463876618e-05, "loss": 1.4402, "mean_token_accuracy": 0.6421432644128799, "num_tokens": 695075241.0, "step": 4144 }, { "entropy": 1.7203948597113292, "epoch": 0.4553568976408228, "grad_norm": 1.0562800168991089, "learning_rate": 1.8219303496835698e-05, "loss": 1.3034, "mean_token_accuracy": 0.6741875658432642, "num_tokens": 695192463.0, "step": 4145 }, { "entropy": 1.7525762518246968, "epoch": 0.45546675455219576, "grad_norm": 0.6298994421958923, "learning_rate": 1.8218347301307082e-05, "loss": 1.4266, "mean_token_accuracy": 0.6470825970172882, "num_tokens": 695375939.0, "step": 4146 }, { "entropy": 1.7427566250165303, "epoch": 0.4555766114635687, "grad_norm": 0.5814052224159241, "learning_rate": 1.8217390877321025e-05, "loss": 1.3958, "mean_token_accuracy": 0.6584447820981344, "num_tokens": 695578543.0, "step": 4147 }, { "entropy": 1.7967002391815186, "epoch": 0.45568646837494164, "grad_norm": 0.8577557802200317, "learning_rate": 1.8216434224907797e-05, "loss": 1.36, "mean_token_accuracy": 0.6580723375082016, "num_tokens": 695749141.0, "step": 4148 }, { "entropy": 1.7214231391747792, "epoch": 0.4557963252863146, "grad_norm": 0.6104874610900879, "learning_rate": 1.8215477344097678e-05, "loss": 1.4512, "mean_token_accuracy": 0.6513601044813792, "num_tokens": 695926344.0, "step": 4149 }, { "entropy": 1.693449040253957, "epoch": 0.4559061821976875, "grad_norm": 0.5731639862060547, "learning_rate": 1.821452023492095e-05, "loss": 1.2681, "mean_token_accuracy": 0.6703798075517019, "num_tokens": 696083860.0, "step": 4150 }, { "entropy": 1.7001817524433136, "epoch": 0.45601603910906047, "grad_norm": 0.6833996176719666, "learning_rate": 1.8213562897407915e-05, "loss": 1.2219, "mean_token_accuracy": 0.6803639133771261, "num_tokens": 696195681.0, "step": 4151 }, { "entropy": 1.718630462884903, "epoch": 0.4561258960204334, "grad_norm": 0.6625598073005676, "learning_rate": 1.8212605331588858e-05, "loss": 1.4388, "mean_token_accuracy": 0.6539454261461893, "num_tokens": 696368212.0, "step": 4152 }, { "entropy": 1.7498544255892436, "epoch": 0.45623575293180635, "grad_norm": 0.6957936882972717, "learning_rate": 1.8211647537494093e-05, "loss": 1.2725, "mean_token_accuracy": 0.6697985430558523, "num_tokens": 696502071.0, "step": 4153 }, { "entropy": 1.6797556082407634, "epoch": 0.45634560984317923, "grad_norm": 0.8322230577468872, "learning_rate": 1.8210689515153934e-05, "loss": 1.4798, "mean_token_accuracy": 0.6528271933396658, "num_tokens": 696684855.0, "step": 4154 }, { "entropy": 1.7090383271376293, "epoch": 0.4564554667545522, "grad_norm": 0.6606098413467407, "learning_rate": 1.82097312645987e-05, "loss": 1.2863, "mean_token_accuracy": 0.6675911794106165, "num_tokens": 696875468.0, "step": 4155 }, { "entropy": 1.6464302639166515, "epoch": 0.4565653236659251, "grad_norm": 0.7480058670043945, "learning_rate": 1.8208772785858724e-05, "loss": 1.3633, "mean_token_accuracy": 0.6650909036397934, "num_tokens": 697063206.0, "step": 4156 }, { "entropy": 1.6582618256409962, "epoch": 0.45667518057729806, "grad_norm": 0.5773199796676636, "learning_rate": 1.8207814078964335e-05, "loss": 1.4054, "mean_token_accuracy": 0.6530261288086573, "num_tokens": 697347872.0, "step": 4157 }, { "entropy": 1.7507151464621227, "epoch": 0.456785037488671, "grad_norm": 0.6207919716835022, "learning_rate": 1.820685514394588e-05, "loss": 1.4224, "mean_token_accuracy": 0.6475935826698939, "num_tokens": 697519220.0, "step": 4158 }, { "entropy": 1.7405341863632202, "epoch": 0.45689489440004394, "grad_norm": 0.7598936557769775, "learning_rate": 1.8205895980833708e-05, "loss": 1.4921, "mean_token_accuracy": 0.6516546607017517, "num_tokens": 697663160.0, "step": 4159 }, { "entropy": 1.7247182031472523, "epoch": 0.4570047513114169, "grad_norm": 0.7689334750175476, "learning_rate": 1.8204936589658172e-05, "loss": 1.3283, "mean_token_accuracy": 0.6829536060492197, "num_tokens": 697796527.0, "step": 4160 }, { "entropy": 1.6589301824569702, "epoch": 0.4571146082227898, "grad_norm": 0.6805022358894348, "learning_rate": 1.820397697044964e-05, "loss": 1.3969, "mean_token_accuracy": 0.6660082787275314, "num_tokens": 698022883.0, "step": 4161 }, { "entropy": 1.7611735065778096, "epoch": 0.45722446513416276, "grad_norm": 0.774599015712738, "learning_rate": 1.8203017123238484e-05, "loss": 1.4106, "mean_token_accuracy": 0.6451850136121114, "num_tokens": 698259147.0, "step": 4162 }, { "entropy": 1.7202747464179993, "epoch": 0.4573343220455357, "grad_norm": 0.6693452000617981, "learning_rate": 1.820205704805508e-05, "loss": 1.3082, "mean_token_accuracy": 0.6705300956964493, "num_tokens": 698409150.0, "step": 4163 }, { "entropy": 1.6769547561804454, "epoch": 0.45744417895690864, "grad_norm": 0.5999146699905396, "learning_rate": 1.820109674492982e-05, "loss": 1.4154, "mean_token_accuracy": 0.6518602818250656, "num_tokens": 698583103.0, "step": 4164 }, { "entropy": 1.7415876587231953, "epoch": 0.4575540358682816, "grad_norm": 0.6980689167976379, "learning_rate": 1.820013621389309e-05, "loss": 1.4015, "mean_token_accuracy": 0.6452522526184717, "num_tokens": 698776410.0, "step": 4165 }, { "entropy": 1.7366366783777873, "epoch": 0.4576638927796545, "grad_norm": 0.6637096405029297, "learning_rate": 1.8199175454975293e-05, "loss": 1.3677, "mean_token_accuracy": 0.6576673090457916, "num_tokens": 698964243.0, "step": 4166 }, { "entropy": 1.7057179510593414, "epoch": 0.4577737496910274, "grad_norm": 1.1949498653411865, "learning_rate": 1.8198214468206836e-05, "loss": 1.3636, "mean_token_accuracy": 0.6599676311016083, "num_tokens": 699149054.0, "step": 4167 }, { "entropy": 1.6758360266685486, "epoch": 0.45788360660240035, "grad_norm": 0.6972344517707825, "learning_rate": 1.819725325361814e-05, "loss": 1.2711, "mean_token_accuracy": 0.6723342637221018, "num_tokens": 699279792.0, "step": 4168 }, { "entropy": 1.7160559793313344, "epoch": 0.4579934635137733, "grad_norm": 0.6513280868530273, "learning_rate": 1.8196291811239614e-05, "loss": 1.6112, "mean_token_accuracy": 0.6390035400787989, "num_tokens": 699483735.0, "step": 4169 }, { "entropy": 1.6684886713822682, "epoch": 0.45810332042514623, "grad_norm": 0.6418635249137878, "learning_rate": 1.81953301411017e-05, "loss": 1.4819, "mean_token_accuracy": 0.6575515071551005, "num_tokens": 699650354.0, "step": 4170 }, { "entropy": 1.7136259178320568, "epoch": 0.4582131773365192, "grad_norm": 0.8453693389892578, "learning_rate": 1.819436824323483e-05, "loss": 1.3379, "mean_token_accuracy": 0.6703378160794576, "num_tokens": 699778940.0, "step": 4171 }, { "entropy": 1.688475747903188, "epoch": 0.4583230342478921, "grad_norm": 0.6598884463310242, "learning_rate": 1.8193406117669442e-05, "loss": 1.3524, "mean_token_accuracy": 0.654664620757103, "num_tokens": 699941197.0, "step": 4172 }, { "entropy": 1.703117161989212, "epoch": 0.45843289115926505, "grad_norm": 0.6558519601821899, "learning_rate": 1.8192443764435996e-05, "loss": 1.3037, "mean_token_accuracy": 0.6665119081735611, "num_tokens": 700098267.0, "step": 4173 }, { "entropy": 1.699433147907257, "epoch": 0.458542748070638, "grad_norm": 0.7802864909172058, "learning_rate": 1.8191481183564947e-05, "loss": 1.5111, "mean_token_accuracy": 0.6549594352642695, "num_tokens": 700262148.0, "step": 4174 }, { "entropy": 1.7704039216041565, "epoch": 0.45865260498201094, "grad_norm": 0.8732252717018127, "learning_rate": 1.8190518375086756e-05, "loss": 1.4362, "mean_token_accuracy": 0.6573774913946787, "num_tokens": 700446932.0, "step": 4175 }, { "entropy": 1.6488378445307414, "epoch": 0.4587624618933839, "grad_norm": 0.702743649482727, "learning_rate": 1.81895553390319e-05, "loss": 1.3467, "mean_token_accuracy": 0.6733838816483816, "num_tokens": 700593036.0, "step": 4176 }, { "entropy": 1.73904745777448, "epoch": 0.4588723188047568, "grad_norm": 0.6903389096260071, "learning_rate": 1.8188592075430854e-05, "loss": 1.5451, "mean_token_accuracy": 0.6281401266654333, "num_tokens": 700807095.0, "step": 4177 }, { "entropy": 1.7598382830619812, "epoch": 0.45898217571612976, "grad_norm": 0.7444609999656677, "learning_rate": 1.8187628584314113e-05, "loss": 1.4259, "mean_token_accuracy": 0.6544857770204544, "num_tokens": 701000480.0, "step": 4178 }, { "entropy": 1.7172856330871582, "epoch": 0.4590920326275027, "grad_norm": 0.7468621134757996, "learning_rate": 1.8186664865712163e-05, "loss": 1.4648, "mean_token_accuracy": 0.6561927249034246, "num_tokens": 701148536.0, "step": 4179 }, { "entropy": 1.7054723699887593, "epoch": 0.45920188953887564, "grad_norm": 0.7430470585823059, "learning_rate": 1.818570091965551e-05, "loss": 1.3712, "mean_token_accuracy": 0.662121370434761, "num_tokens": 701303541.0, "step": 4180 }, { "entropy": 1.7247373064359028, "epoch": 0.4593117464502485, "grad_norm": 0.6628074645996094, "learning_rate": 1.8184736746174658e-05, "loss": 1.4219, "mean_token_accuracy": 0.6571998844544092, "num_tokens": 701471247.0, "step": 4181 }, { "entropy": 1.6875219146410625, "epoch": 0.45942160336162147, "grad_norm": 0.7151501178741455, "learning_rate": 1.818377234530013e-05, "loss": 1.3622, "mean_token_accuracy": 0.6559572865565618, "num_tokens": 701645326.0, "step": 4182 }, { "entropy": 1.7306556105613708, "epoch": 0.4595314602729944, "grad_norm": 0.6144996881484985, "learning_rate": 1.818280771706244e-05, "loss": 1.4024, "mean_token_accuracy": 0.6534335116545359, "num_tokens": 701805577.0, "step": 4183 }, { "entropy": 1.7050406138102214, "epoch": 0.45964131718436735, "grad_norm": 0.6486047506332397, "learning_rate": 1.8181842861492126e-05, "loss": 1.3204, "mean_token_accuracy": 0.6658004969358444, "num_tokens": 701938888.0, "step": 4184 }, { "entropy": 1.6983507772286732, "epoch": 0.4597511740957403, "grad_norm": 0.6919155716896057, "learning_rate": 1.818087777861972e-05, "loss": 1.4086, "mean_token_accuracy": 0.6503052040934563, "num_tokens": 702099604.0, "step": 4185 }, { "entropy": 1.6433574159940083, "epoch": 0.45986103100711323, "grad_norm": 0.8726625442504883, "learning_rate": 1.8179912468475768e-05, "loss": 1.2663, "mean_token_accuracy": 0.6762971927722295, "num_tokens": 702232628.0, "step": 4186 }, { "entropy": 1.6802456776301067, "epoch": 0.45997088791848617, "grad_norm": 0.8037099242210388, "learning_rate": 1.8178946931090822e-05, "loss": 1.3511, "mean_token_accuracy": 0.6654014339049658, "num_tokens": 702373194.0, "step": 4187 }, { "entropy": 1.6643012166023254, "epoch": 0.4600807448298591, "grad_norm": 0.794750452041626, "learning_rate": 1.817798116649544e-05, "loss": 1.3445, "mean_token_accuracy": 0.6783427347739538, "num_tokens": 702492194.0, "step": 4188 }, { "entropy": 1.7596985697746277, "epoch": 0.46019060174123205, "grad_norm": 0.6531470417976379, "learning_rate": 1.8177015174720186e-05, "loss": 1.5094, "mean_token_accuracy": 0.642010380824407, "num_tokens": 702706325.0, "step": 4189 }, { "entropy": 1.7269844611485798, "epoch": 0.460300458652605, "grad_norm": 0.7435563206672668, "learning_rate": 1.817604895579564e-05, "loss": 1.4141, "mean_token_accuracy": 0.6406523485978445, "num_tokens": 702871807.0, "step": 4190 }, { "entropy": 1.7121829390525818, "epoch": 0.46041031556397793, "grad_norm": 0.6904016137123108, "learning_rate": 1.817508250975238e-05, "loss": 1.4689, "mean_token_accuracy": 0.6557297557592392, "num_tokens": 703015510.0, "step": 4191 }, { "entropy": 1.6617660621802013, "epoch": 0.4605201724753509, "grad_norm": 0.5894798636436462, "learning_rate": 1.8174115836620985e-05, "loss": 1.385, "mean_token_accuracy": 0.6519947598377863, "num_tokens": 703225072.0, "step": 4192 }, { "entropy": 1.717599133650462, "epoch": 0.4606300293867238, "grad_norm": 0.6478140354156494, "learning_rate": 1.8173148936432062e-05, "loss": 1.4113, "mean_token_accuracy": 0.6411587198575338, "num_tokens": 703450060.0, "step": 4193 }, { "entropy": 1.69918089111646, "epoch": 0.4607398862980967, "grad_norm": 0.6551547050476074, "learning_rate": 1.8172181809216206e-05, "loss": 1.4039, "mean_token_accuracy": 0.6578977555036545, "num_tokens": 703686905.0, "step": 4194 }, { "entropy": 1.7954679628213246, "epoch": 0.46084974320946964, "grad_norm": 0.7491250038146973, "learning_rate": 1.8171214455004024e-05, "loss": 1.4899, "mean_token_accuracy": 0.6477667888005575, "num_tokens": 703859777.0, "step": 4195 }, { "entropy": 1.6918367048104603, "epoch": 0.4609596001208426, "grad_norm": 0.6992884874343872, "learning_rate": 1.817024687382614e-05, "loss": 1.3772, "mean_token_accuracy": 0.6759544163942337, "num_tokens": 703980670.0, "step": 4196 }, { "entropy": 1.6738916039466858, "epoch": 0.4610694570322155, "grad_norm": 0.9104806184768677, "learning_rate": 1.8169279065713173e-05, "loss": 1.3039, "mean_token_accuracy": 0.6699705421924591, "num_tokens": 704127133.0, "step": 4197 }, { "entropy": 1.731379359960556, "epoch": 0.46117931394358846, "grad_norm": 0.6715532541275024, "learning_rate": 1.8168311030695753e-05, "loss": 1.4613, "mean_token_accuracy": 0.6445593535900116, "num_tokens": 704309900.0, "step": 4198 }, { "entropy": 1.6891814172267914, "epoch": 0.4612891708549614, "grad_norm": 0.5850751399993896, "learning_rate": 1.8167342768804518e-05, "loss": 1.4346, "mean_token_accuracy": 0.6435820659001669, "num_tokens": 704530111.0, "step": 4199 }, { "entropy": 1.6919045945008595, "epoch": 0.46139902776633435, "grad_norm": 0.7237669825553894, "learning_rate": 1.8166374280070118e-05, "loss": 1.5484, "mean_token_accuracy": 0.627090315024058, "num_tokens": 704732824.0, "step": 4200 }, { "entropy": 1.7176773647467296, "epoch": 0.4615088846777073, "grad_norm": 0.7041877508163452, "learning_rate": 1.81654055645232e-05, "loss": 1.5581, "mean_token_accuracy": 0.6256896555423737, "num_tokens": 704992928.0, "step": 4201 }, { "entropy": 1.6486956278483074, "epoch": 0.46161874158908023, "grad_norm": 0.7475844621658325, "learning_rate": 1.8164436622194425e-05, "loss": 1.4561, "mean_token_accuracy": 0.6535168488820394, "num_tokens": 705157798.0, "step": 4202 }, { "entropy": 1.7114735345045726, "epoch": 0.46172859850045317, "grad_norm": 0.657800555229187, "learning_rate": 1.8163467453114454e-05, "loss": 1.2966, "mean_token_accuracy": 0.6700218071540197, "num_tokens": 705312195.0, "step": 4203 }, { "entropy": 1.7229937215646107, "epoch": 0.4618384554118261, "grad_norm": 0.6623897552490234, "learning_rate": 1.816249805731397e-05, "loss": 1.393, "mean_token_accuracy": 0.6550327589114507, "num_tokens": 705484259.0, "step": 4204 }, { "entropy": 1.7269688149293263, "epoch": 0.46194831232319905, "grad_norm": 0.6905580163002014, "learning_rate": 1.816152843482365e-05, "loss": 1.4828, "mean_token_accuracy": 0.6393428792556127, "num_tokens": 705693058.0, "step": 4205 }, { "entropy": 1.6766071319580078, "epoch": 0.462058169234572, "grad_norm": 0.6602928638458252, "learning_rate": 1.816055858567418e-05, "loss": 1.3148, "mean_token_accuracy": 0.6683008025089899, "num_tokens": 705870511.0, "step": 4206 }, { "entropy": 1.6405730545520782, "epoch": 0.46216802614594493, "grad_norm": 0.7517810463905334, "learning_rate": 1.8159588509896262e-05, "loss": 1.2918, "mean_token_accuracy": 0.6697532882293066, "num_tokens": 706068879.0, "step": 4207 }, { "entropy": 1.7424190441767375, "epoch": 0.4622778830573178, "grad_norm": 0.7065527439117432, "learning_rate": 1.815861820752059e-05, "loss": 1.3579, "mean_token_accuracy": 0.6661938230196635, "num_tokens": 706280061.0, "step": 4208 }, { "entropy": 1.648894727230072, "epoch": 0.46238773996869076, "grad_norm": 0.5504491925239563, "learning_rate": 1.815764767857788e-05, "loss": 1.3697, "mean_token_accuracy": 0.6563327610492706, "num_tokens": 706477220.0, "step": 4209 }, { "entropy": 1.6771936317284901, "epoch": 0.4624975968800637, "grad_norm": 0.6242183446884155, "learning_rate": 1.8156676923098847e-05, "loss": 1.435, "mean_token_accuracy": 0.6519751648108164, "num_tokens": 706659831.0, "step": 4210 }, { "entropy": 1.6414129038651784, "epoch": 0.46260745379143664, "grad_norm": 0.6485480070114136, "learning_rate": 1.815570594111421e-05, "loss": 1.2985, "mean_token_accuracy": 0.6742209245761236, "num_tokens": 706803926.0, "step": 4211 }, { "entropy": 1.7177407443523407, "epoch": 0.4627173107028096, "grad_norm": 0.6429049968719482, "learning_rate": 1.8154734732654708e-05, "loss": 1.4829, "mean_token_accuracy": 0.6438324997822443, "num_tokens": 707004084.0, "step": 4212 }, { "entropy": 1.7026935319105785, "epoch": 0.4628271676141825, "grad_norm": 0.5772531628608704, "learning_rate": 1.8153763297751072e-05, "loss": 1.3852, "mean_token_accuracy": 0.6560295174519221, "num_tokens": 707183877.0, "step": 4213 }, { "entropy": 1.7273212869962056, "epoch": 0.46293702452555546, "grad_norm": 0.6105433702468872, "learning_rate": 1.8152791636434057e-05, "loss": 1.596, "mean_token_accuracy": 0.6389935463666916, "num_tokens": 707371269.0, "step": 4214 }, { "entropy": 1.7505205670992534, "epoch": 0.4630468814369284, "grad_norm": 0.6398033499717712, "learning_rate": 1.8151819748734404e-05, "loss": 1.4717, "mean_token_accuracy": 0.6463633726040522, "num_tokens": 707525669.0, "step": 4215 }, { "entropy": 1.7202585935592651, "epoch": 0.46315673834830134, "grad_norm": 0.6473333835601807, "learning_rate": 1.8150847634682883e-05, "loss": 1.4063, "mean_token_accuracy": 0.643185963233312, "num_tokens": 707735780.0, "step": 4216 }, { "entropy": 1.7204928199450176, "epoch": 0.4632665952596743, "grad_norm": 0.7724722027778625, "learning_rate": 1.8149875294310253e-05, "loss": 1.4856, "mean_token_accuracy": 0.6558432877063751, "num_tokens": 707897505.0, "step": 4217 }, { "entropy": 1.7396677335103352, "epoch": 0.4633764521710472, "grad_norm": 0.7432756423950195, "learning_rate": 1.8148902727647293e-05, "loss": 1.3591, "mean_token_accuracy": 0.651182030638059, "num_tokens": 708040117.0, "step": 4218 }, { "entropy": 1.716854214668274, "epoch": 0.46348630908242017, "grad_norm": 0.7438036799430847, "learning_rate": 1.8147929934724783e-05, "loss": 1.4251, "mean_token_accuracy": 0.6668249318997065, "num_tokens": 708197909.0, "step": 4219 }, { "entropy": 1.770167201757431, "epoch": 0.4635961659937931, "grad_norm": 0.7183382511138916, "learning_rate": 1.8146956915573512e-05, "loss": 1.5752, "mean_token_accuracy": 0.6293011605739594, "num_tokens": 708386792.0, "step": 4220 }, { "entropy": 1.8206903139750164, "epoch": 0.46370602290516605, "grad_norm": 0.8068307638168335, "learning_rate": 1.8145983670224278e-05, "loss": 1.3832, "mean_token_accuracy": 0.6531219184398651, "num_tokens": 708535932.0, "step": 4221 }, { "entropy": 1.7339389224847157, "epoch": 0.46381587981653893, "grad_norm": 0.5660984516143799, "learning_rate": 1.8145010198707875e-05, "loss": 1.4435, "mean_token_accuracy": 0.6397424240907034, "num_tokens": 708781487.0, "step": 4222 }, { "entropy": 1.7772690852483113, "epoch": 0.4639257367279119, "grad_norm": 0.7798457145690918, "learning_rate": 1.8144036501055123e-05, "loss": 1.5243, "mean_token_accuracy": 0.6494849175214767, "num_tokens": 708920912.0, "step": 4223 }, { "entropy": 1.7303331792354584, "epoch": 0.4640355936392848, "grad_norm": 0.6171626448631287, "learning_rate": 1.8143062577296835e-05, "loss": 1.4185, "mean_token_accuracy": 0.6400276025136312, "num_tokens": 709104156.0, "step": 4224 }, { "entropy": 1.6459304491678874, "epoch": 0.46414545055065776, "grad_norm": 0.6023601293563843, "learning_rate": 1.814208842746383e-05, "loss": 1.3254, "mean_token_accuracy": 0.6633083323637644, "num_tokens": 709240703.0, "step": 4225 }, { "entropy": 1.6952888270219166, "epoch": 0.4642553074620307, "grad_norm": 0.7373741865158081, "learning_rate": 1.814111405158695e-05, "loss": 1.4096, "mean_token_accuracy": 0.641968791683515, "num_tokens": 709451932.0, "step": 4226 }, { "entropy": 1.745063195625941, "epoch": 0.46436516437340364, "grad_norm": 0.7500935792922974, "learning_rate": 1.8140139449697028e-05, "loss": 1.4981, "mean_token_accuracy": 0.65280049542586, "num_tokens": 709691349.0, "step": 4227 }, { "entropy": 1.8040038843949635, "epoch": 0.4644750212847766, "grad_norm": 0.8563746213912964, "learning_rate": 1.8139164621824907e-05, "loss": 1.4651, "mean_token_accuracy": 0.6291163365046183, "num_tokens": 709840840.0, "step": 4228 }, { "entropy": 1.665471722682317, "epoch": 0.4645848781961495, "grad_norm": 0.6224023103713989, "learning_rate": 1.8138189568001445e-05, "loss": 1.3097, "mean_token_accuracy": 0.6575134992599487, "num_tokens": 709985259.0, "step": 4229 }, { "entropy": 1.605366716782252, "epoch": 0.46469473510752246, "grad_norm": 0.6934778690338135, "learning_rate": 1.8137214288257497e-05, "loss": 1.352, "mean_token_accuracy": 0.6759183506170908, "num_tokens": 710148873.0, "step": 4230 }, { "entropy": 1.7328666150569916, "epoch": 0.4648045920188954, "grad_norm": 0.6883534789085388, "learning_rate": 1.8136238782623937e-05, "loss": 1.4765, "mean_token_accuracy": 0.6576277663310369, "num_tokens": 710287781.0, "step": 4231 }, { "entropy": 1.7534802854061127, "epoch": 0.46491444893026834, "grad_norm": 0.7432095408439636, "learning_rate": 1.813526305113163e-05, "loss": 1.386, "mean_token_accuracy": 0.6680583655834198, "num_tokens": 710462445.0, "step": 4232 }, { "entropy": 1.7207884788513184, "epoch": 0.4650243058416413, "grad_norm": 0.6437093615531921, "learning_rate": 1.813428709381147e-05, "loss": 1.2856, "mean_token_accuracy": 0.6679480870564779, "num_tokens": 710589866.0, "step": 4233 }, { "entropy": 1.667872816324234, "epoch": 0.4651341627530142, "grad_norm": 0.6963672637939453, "learning_rate": 1.813331091069433e-05, "loss": 1.3454, "mean_token_accuracy": 0.6745727012554804, "num_tokens": 710746216.0, "step": 4234 }, { "entropy": 1.7533026337623596, "epoch": 0.4652440196643871, "grad_norm": 0.9982355833053589, "learning_rate": 1.813233450181112e-05, "loss": 1.5738, "mean_token_accuracy": 0.6681816776593527, "num_tokens": 710904614.0, "step": 4235 }, { "entropy": 1.6505067149798076, "epoch": 0.46535387657576005, "grad_norm": 0.9331510066986084, "learning_rate": 1.8131357867192738e-05, "loss": 1.0794, "mean_token_accuracy": 0.6999476154645284, "num_tokens": 711051143.0, "step": 4236 }, { "entropy": 1.690159449974696, "epoch": 0.465463733487133, "grad_norm": 0.7899004817008972, "learning_rate": 1.8130381006870087e-05, "loss": 1.544, "mean_token_accuracy": 0.6387489885091782, "num_tokens": 711244954.0, "step": 4237 }, { "entropy": 1.726913332939148, "epoch": 0.46557359039850593, "grad_norm": 0.6733956933021545, "learning_rate": 1.8129403920874093e-05, "loss": 1.4999, "mean_token_accuracy": 0.6343253751595815, "num_tokens": 711452273.0, "step": 4238 }, { "entropy": 1.7664103507995605, "epoch": 0.4656834473098789, "grad_norm": 0.7927049398422241, "learning_rate": 1.8128426609235673e-05, "loss": 1.3575, "mean_token_accuracy": 0.6616076578696569, "num_tokens": 711585439.0, "step": 4239 }, { "entropy": 1.729611297448476, "epoch": 0.4657933042212518, "grad_norm": 0.7896009683609009, "learning_rate": 1.812744907198577e-05, "loss": 1.3908, "mean_token_accuracy": 0.6635908087094625, "num_tokens": 711773155.0, "step": 4240 }, { "entropy": 1.6715228458245595, "epoch": 0.46590316113262475, "grad_norm": 0.7059125304222107, "learning_rate": 1.8126471309155314e-05, "loss": 1.4095, "mean_token_accuracy": 0.6561342726151148, "num_tokens": 711914804.0, "step": 4241 }, { "entropy": 1.7312528987725575, "epoch": 0.4660130180439977, "grad_norm": 0.6025689244270325, "learning_rate": 1.812549332077525e-05, "loss": 1.546, "mean_token_accuracy": 0.6508001486460367, "num_tokens": 712129813.0, "step": 4242 }, { "entropy": 1.7039500772953033, "epoch": 0.46612287495537064, "grad_norm": 0.8072606921195984, "learning_rate": 1.8124515106876534e-05, "loss": 1.5661, "mean_token_accuracy": 0.6641567001740137, "num_tokens": 712276945.0, "step": 4243 }, { "entropy": 1.716274122397105, "epoch": 0.4662327318667436, "grad_norm": 0.7391960620880127, "learning_rate": 1.8123536667490127e-05, "loss": 1.2449, "mean_token_accuracy": 0.6780173579851786, "num_tokens": 712433226.0, "step": 4244 }, { "entropy": 1.7017336984475453, "epoch": 0.4663425887781165, "grad_norm": 0.6935653686523438, "learning_rate": 1.812255800264699e-05, "loss": 1.3873, "mean_token_accuracy": 0.6610340823729833, "num_tokens": 712584310.0, "step": 4245 }, { "entropy": 1.6905235250790913, "epoch": 0.46645244568948946, "grad_norm": 0.6185707449913025, "learning_rate": 1.8121579112378106e-05, "loss": 1.5186, "mean_token_accuracy": 0.6343037039041519, "num_tokens": 712824746.0, "step": 4246 }, { "entropy": 1.7747002641359966, "epoch": 0.4665623026008624, "grad_norm": 0.7528815269470215, "learning_rate": 1.812059999671445e-05, "loss": 1.5891, "mean_token_accuracy": 0.6364632795254389, "num_tokens": 713007981.0, "step": 4247 }, { "entropy": 1.7169700662295024, "epoch": 0.46667215951223534, "grad_norm": 0.6410589218139648, "learning_rate": 1.811962065568702e-05, "loss": 1.3224, "mean_token_accuracy": 0.6646380325158437, "num_tokens": 713151048.0, "step": 4248 }, { "entropy": 1.7213744620482128, "epoch": 0.4667820164236082, "grad_norm": 0.6539469957351685, "learning_rate": 1.8118641089326795e-05, "loss": 1.3819, "mean_token_accuracy": 0.6464784244696299, "num_tokens": 713338388.0, "step": 4249 }, { "entropy": 1.69509752591451, "epoch": 0.46689187333498117, "grad_norm": 0.6454856395721436, "learning_rate": 1.811766129766479e-05, "loss": 1.3495, "mean_token_accuracy": 0.663901224732399, "num_tokens": 713520267.0, "step": 4250 }, { "entropy": 1.7052615284919739, "epoch": 0.4670017302463541, "grad_norm": 0.6770211458206177, "learning_rate": 1.811668128073201e-05, "loss": 1.5541, "mean_token_accuracy": 0.6339400360981623, "num_tokens": 713708802.0, "step": 4251 }, { "entropy": 1.6545226871967316, "epoch": 0.46711158715772705, "grad_norm": 0.7631199359893799, "learning_rate": 1.811570103855948e-05, "loss": 1.1884, "mean_token_accuracy": 0.6902492394049963, "num_tokens": 713859057.0, "step": 4252 }, { "entropy": 1.755904217561086, "epoch": 0.4672214440691, "grad_norm": 0.7086718082427979, "learning_rate": 1.8114720571178215e-05, "loss": 1.3183, "mean_token_accuracy": 0.6702389965454737, "num_tokens": 713975952.0, "step": 4253 }, { "entropy": 1.7080712616443634, "epoch": 0.46733130098047293, "grad_norm": 0.7158058881759644, "learning_rate": 1.811373987861925e-05, "loss": 1.5163, "mean_token_accuracy": 0.649703840414683, "num_tokens": 714152247.0, "step": 4254 }, { "entropy": 1.6785069803396861, "epoch": 0.46744115789184587, "grad_norm": 0.8712213039398193, "learning_rate": 1.8112758960913622e-05, "loss": 1.4157, "mean_token_accuracy": 0.6431390146414439, "num_tokens": 714309228.0, "step": 4255 }, { "entropy": 1.6953572432200115, "epoch": 0.4675510148032188, "grad_norm": 0.7182803750038147, "learning_rate": 1.811177781809238e-05, "loss": 1.4312, "mean_token_accuracy": 0.6548483719428381, "num_tokens": 714477884.0, "step": 4256 }, { "entropy": 1.6632357239723206, "epoch": 0.46766087171459175, "grad_norm": 0.6293473839759827, "learning_rate": 1.8110796450186575e-05, "loss": 1.4182, "mean_token_accuracy": 0.6520185619592667, "num_tokens": 714659406.0, "step": 4257 }, { "entropy": 1.7589415311813354, "epoch": 0.4677707286259647, "grad_norm": 0.6381205916404724, "learning_rate": 1.810981485722727e-05, "loss": 1.3121, "mean_token_accuracy": 0.6649649838606516, "num_tokens": 714782279.0, "step": 4258 }, { "entropy": 1.7126728395620983, "epoch": 0.46788058553733763, "grad_norm": 0.7846829295158386, "learning_rate": 1.8108833039245522e-05, "loss": 1.3273, "mean_token_accuracy": 0.6554552515347799, "num_tokens": 714981056.0, "step": 4259 }, { "entropy": 1.702657401561737, "epoch": 0.4679904424487106, "grad_norm": 0.9136094450950623, "learning_rate": 1.8107850996272414e-05, "loss": 1.5338, "mean_token_accuracy": 0.654092272122701, "num_tokens": 715155260.0, "step": 4260 }, { "entropy": 1.6792535583178203, "epoch": 0.4681002993600835, "grad_norm": 0.6011461019515991, "learning_rate": 1.8106868728339024e-05, "loss": 1.3942, "mean_token_accuracy": 0.6538469940423965, "num_tokens": 715359586.0, "step": 4261 }, { "entropy": 1.7443041900793712, "epoch": 0.4682101562714564, "grad_norm": 0.8767224550247192, "learning_rate": 1.810588623547644e-05, "loss": 1.5, "mean_token_accuracy": 0.650155504544576, "num_tokens": 715519166.0, "step": 4262 }, { "entropy": 1.7525466084480286, "epoch": 0.46832001318282934, "grad_norm": 0.707300066947937, "learning_rate": 1.8104903517715765e-05, "loss": 1.4655, "mean_token_accuracy": 0.6447683721780777, "num_tokens": 715712649.0, "step": 4263 }, { "entropy": 1.756022532780965, "epoch": 0.4684298700942023, "grad_norm": 0.6850044131278992, "learning_rate": 1.8103920575088092e-05, "loss": 1.3964, "mean_token_accuracy": 0.6608478824297587, "num_tokens": 715836727.0, "step": 4264 }, { "entropy": 1.7747258146603901, "epoch": 0.4685397270055752, "grad_norm": 0.8568280935287476, "learning_rate": 1.810293740762453e-05, "loss": 1.3805, "mean_token_accuracy": 0.6580488632122675, "num_tokens": 715980752.0, "step": 4265 }, { "entropy": 1.7230225205421448, "epoch": 0.46864958391694816, "grad_norm": 0.7651719450950623, "learning_rate": 1.8101954015356204e-05, "loss": 1.3571, "mean_token_accuracy": 0.6602768997351328, "num_tokens": 716150567.0, "step": 4266 }, { "entropy": 1.6723913550376892, "epoch": 0.4687594408283211, "grad_norm": 0.9501248002052307, "learning_rate": 1.810097039831423e-05, "loss": 1.3772, "mean_token_accuracy": 0.6587880253791809, "num_tokens": 716307037.0, "step": 4267 }, { "entropy": 1.7002276877562206, "epoch": 0.46886929773969405, "grad_norm": 0.6281111836433411, "learning_rate": 1.8099986556529748e-05, "loss": 1.5066, "mean_token_accuracy": 0.630008652806282, "num_tokens": 716473812.0, "step": 4268 }, { "entropy": 1.7168980836868286, "epoch": 0.468979154651067, "grad_norm": 0.6951456665992737, "learning_rate": 1.8099002490033886e-05, "loss": 1.3071, "mean_token_accuracy": 0.6706042140722275, "num_tokens": 716625498.0, "step": 4269 }, { "entropy": 1.6649049123128254, "epoch": 0.46908901156243993, "grad_norm": 0.783587634563446, "learning_rate": 1.8098018198857797e-05, "loss": 1.5879, "mean_token_accuracy": 0.6355616301298141, "num_tokens": 716806833.0, "step": 4270 }, { "entropy": 1.6906762719154358, "epoch": 0.46919886847381287, "grad_norm": 0.7415374517440796, "learning_rate": 1.8097033683032627e-05, "loss": 1.2876, "mean_token_accuracy": 0.6685866812864939, "num_tokens": 716934086.0, "step": 4271 }, { "entropy": 1.6387253900369008, "epoch": 0.4693087253851858, "grad_norm": 0.7111901640892029, "learning_rate": 1.8096048942589545e-05, "loss": 1.4057, "mean_token_accuracy": 0.6655648052692413, "num_tokens": 717123686.0, "step": 4272 }, { "entropy": 1.7091521223386128, "epoch": 0.46941858229655875, "grad_norm": 0.8598929047584534, "learning_rate": 1.8095063977559706e-05, "loss": 1.4529, "mean_token_accuracy": 0.6519492069880167, "num_tokens": 717281760.0, "step": 4273 }, { "entropy": 1.7045681079228718, "epoch": 0.4695284392079317, "grad_norm": 0.6577821969985962, "learning_rate": 1.809407878797429e-05, "loss": 1.4014, "mean_token_accuracy": 0.6432823886473974, "num_tokens": 717548049.0, "step": 4274 }, { "entropy": 1.7074416081110637, "epoch": 0.46963829611930463, "grad_norm": 0.7430242300033569, "learning_rate": 1.809309337386448e-05, "loss": 1.216, "mean_token_accuracy": 0.6894436677296957, "num_tokens": 717666119.0, "step": 4275 }, { "entropy": 1.7699640194574993, "epoch": 0.4697481530306775, "grad_norm": 0.635347306728363, "learning_rate": 1.8092107735261456e-05, "loss": 1.3919, "mean_token_accuracy": 0.6452954411506653, "num_tokens": 717850472.0, "step": 4276 }, { "entropy": 1.6869786580403645, "epoch": 0.46985800994205046, "grad_norm": 0.6386687755584717, "learning_rate": 1.8091121872196424e-05, "loss": 1.396, "mean_token_accuracy": 0.6727607051531473, "num_tokens": 718040207.0, "step": 4277 }, { "entropy": 1.741450657447179, "epoch": 0.4699678668534234, "grad_norm": 0.6524372696876526, "learning_rate": 1.8090135784700573e-05, "loss": 1.4028, "mean_token_accuracy": 0.6533336142698923, "num_tokens": 718201275.0, "step": 4278 }, { "entropy": 1.712234725554784, "epoch": 0.47007772376479634, "grad_norm": 0.6172965168952942, "learning_rate": 1.8089149472805124e-05, "loss": 1.3392, "mean_token_accuracy": 0.6610304166873296, "num_tokens": 718387782.0, "step": 4279 }, { "entropy": 1.7065201203028362, "epoch": 0.4701875806761693, "grad_norm": 1.2049860954284668, "learning_rate": 1.808816293654129e-05, "loss": 1.3301, "mean_token_accuracy": 0.6645681808392206, "num_tokens": 718584128.0, "step": 4280 }, { "entropy": 1.7023885548114777, "epoch": 0.4702974375875422, "grad_norm": 0.7760981917381287, "learning_rate": 1.808717617594029e-05, "loss": 1.459, "mean_token_accuracy": 0.6542429427305857, "num_tokens": 718796876.0, "step": 4281 }, { "entropy": 1.6860670546690624, "epoch": 0.47040729449891516, "grad_norm": 0.5433915257453918, "learning_rate": 1.808618919103336e-05, "loss": 1.3945, "mean_token_accuracy": 0.6503080328305563, "num_tokens": 719018527.0, "step": 4282 }, { "entropy": 1.6572819451491039, "epoch": 0.4705171514102881, "grad_norm": 0.6897891759872437, "learning_rate": 1.8085201981851736e-05, "loss": 1.2418, "mean_token_accuracy": 0.6722082744042078, "num_tokens": 719147052.0, "step": 4283 }, { "entropy": 1.6799413760503132, "epoch": 0.47062700832166104, "grad_norm": 0.6752045154571533, "learning_rate": 1.8084214548426654e-05, "loss": 1.4649, "mean_token_accuracy": 0.6453558802604675, "num_tokens": 719329449.0, "step": 4284 }, { "entropy": 1.722192605336507, "epoch": 0.470736865233034, "grad_norm": 0.6820024847984314, "learning_rate": 1.808322689078938e-05, "loss": 1.4336, "mean_token_accuracy": 0.6577473928531011, "num_tokens": 719487213.0, "step": 4285 }, { "entropy": 1.74460373322169, "epoch": 0.4708467221444069, "grad_norm": 0.6988834142684937, "learning_rate": 1.808223900897117e-05, "loss": 1.4352, "mean_token_accuracy": 0.6543379972378413, "num_tokens": 719694255.0, "step": 4286 }, { "entropy": 1.7346366445223491, "epoch": 0.47095657905577987, "grad_norm": 0.583162248134613, "learning_rate": 1.808125090300328e-05, "loss": 1.5286, "mean_token_accuracy": 0.641033207376798, "num_tokens": 719893163.0, "step": 4287 }, { "entropy": 1.7112852732340496, "epoch": 0.4710664359671528, "grad_norm": 0.7566839456558228, "learning_rate": 1.8080262572916995e-05, "loss": 1.4578, "mean_token_accuracy": 0.6502961864074072, "num_tokens": 720100241.0, "step": 4288 }, { "entropy": 1.7262468834718068, "epoch": 0.4711762928785257, "grad_norm": 0.7071588039398193, "learning_rate": 1.8079274018743586e-05, "loss": 1.4254, "mean_token_accuracy": 0.6600110133488973, "num_tokens": 720246867.0, "step": 4289 }, { "entropy": 1.7163510620594025, "epoch": 0.47128614978989863, "grad_norm": 0.6831440925598145, "learning_rate": 1.8078285240514346e-05, "loss": 1.4533, "mean_token_accuracy": 0.6454131652911504, "num_tokens": 720458244.0, "step": 4290 }, { "entropy": 1.728024274110794, "epoch": 0.4713960067012716, "grad_norm": 0.7538992762565613, "learning_rate": 1.8077296238260566e-05, "loss": 1.411, "mean_token_accuracy": 0.6426265041033427, "num_tokens": 720649523.0, "step": 4291 }, { "entropy": 1.7086349626382191, "epoch": 0.4715058636126445, "grad_norm": 0.7316638231277466, "learning_rate": 1.807630701201355e-05, "loss": 1.5146, "mean_token_accuracy": 0.6412151654561361, "num_tokens": 720851857.0, "step": 4292 }, { "entropy": 1.7693568468093872, "epoch": 0.47161572052401746, "grad_norm": 0.632696807384491, "learning_rate": 1.8075317561804607e-05, "loss": 1.4909, "mean_token_accuracy": 0.6514314661423365, "num_tokens": 721060765.0, "step": 4293 }, { "entropy": 1.6974764168262482, "epoch": 0.4717255774353904, "grad_norm": 0.6965688467025757, "learning_rate": 1.8074327887665055e-05, "loss": 1.2898, "mean_token_accuracy": 0.6642700731754303, "num_tokens": 721194660.0, "step": 4294 }, { "entropy": 1.6968635221322377, "epoch": 0.47183543434676334, "grad_norm": 0.6898081302642822, "learning_rate": 1.8073337989626204e-05, "loss": 1.3322, "mean_token_accuracy": 0.6627618571122488, "num_tokens": 721383484.0, "step": 4295 }, { "entropy": 1.6964893241723378, "epoch": 0.4719452912581363, "grad_norm": 0.8017412424087524, "learning_rate": 1.80723478677194e-05, "loss": 1.5026, "mean_token_accuracy": 0.6530030220746994, "num_tokens": 721549670.0, "step": 4296 }, { "entropy": 1.6955971519152324, "epoch": 0.4720551481695092, "grad_norm": 0.6565828919410706, "learning_rate": 1.8071357521975973e-05, "loss": 1.3502, "mean_token_accuracy": 0.65217158695062, "num_tokens": 721745653.0, "step": 4297 }, { "entropy": 1.7536290884017944, "epoch": 0.47216500508088216, "grad_norm": 0.8173093795776367, "learning_rate": 1.8070366952427264e-05, "loss": 1.2962, "mean_token_accuracy": 0.6676636189222336, "num_tokens": 721858073.0, "step": 4298 }, { "entropy": 1.7875964442888896, "epoch": 0.4722748619922551, "grad_norm": 0.7748461961746216, "learning_rate": 1.8069376159104627e-05, "loss": 1.3616, "mean_token_accuracy": 0.6534435500701269, "num_tokens": 721959036.0, "step": 4299 }, { "entropy": 1.6941121816635132, "epoch": 0.47238471890362804, "grad_norm": 0.6175353527069092, "learning_rate": 1.8068385142039423e-05, "loss": 1.422, "mean_token_accuracy": 0.6544578274091085, "num_tokens": 722165019.0, "step": 4300 }, { "entropy": 1.7405302226543427, "epoch": 0.472494575815001, "grad_norm": 0.7650004625320435, "learning_rate": 1.8067393901263012e-05, "loss": 1.4173, "mean_token_accuracy": 0.6511756877104441, "num_tokens": 722308976.0, "step": 4301 }, { "entropy": 1.676590492328008, "epoch": 0.4726044327263739, "grad_norm": 0.6478630900382996, "learning_rate": 1.806640243680677e-05, "loss": 1.4806, "mean_token_accuracy": 0.6535717646280924, "num_tokens": 722469917.0, "step": 4302 }, { "entropy": 1.7782863477865856, "epoch": 0.4727142896377468, "grad_norm": 0.719274640083313, "learning_rate": 1.8065410748702074e-05, "loss": 1.2816, "mean_token_accuracy": 0.65965636074543, "num_tokens": 722610621.0, "step": 4303 }, { "entropy": 1.710064172744751, "epoch": 0.47282414654911975, "grad_norm": 0.7524754405021667, "learning_rate": 1.8064418836980308e-05, "loss": 1.309, "mean_token_accuracy": 0.6599444597959518, "num_tokens": 722755392.0, "step": 4304 }, { "entropy": 1.7238652805487316, "epoch": 0.4729340034604927, "grad_norm": 0.7304325699806213, "learning_rate": 1.8063426701672873e-05, "loss": 1.4739, "mean_token_accuracy": 0.6304669479529063, "num_tokens": 722957312.0, "step": 4305 }, { "entropy": 1.7300621767838795, "epoch": 0.47304386037186563, "grad_norm": 0.6810824275016785, "learning_rate": 1.8062434342811162e-05, "loss": 1.2365, "mean_token_accuracy": 0.676527221997579, "num_tokens": 723083250.0, "step": 4306 }, { "entropy": 1.6840552985668182, "epoch": 0.4731537172832386, "grad_norm": 0.6959050297737122, "learning_rate": 1.806144176042659e-05, "loss": 1.2747, "mean_token_accuracy": 0.6719006498654684, "num_tokens": 723200410.0, "step": 4307 }, { "entropy": 1.674494077761968, "epoch": 0.4732635741946115, "grad_norm": 0.821858823299408, "learning_rate": 1.806044895455057e-05, "loss": 1.5042, "mean_token_accuracy": 0.6550217668215433, "num_tokens": 723415334.0, "step": 4308 }, { "entropy": 1.7024798194567363, "epoch": 0.47337343110598445, "grad_norm": 0.6039428114891052, "learning_rate": 1.805945592521452e-05, "loss": 1.529, "mean_token_accuracy": 0.6412127415339152, "num_tokens": 723682772.0, "step": 4309 }, { "entropy": 1.6901743511358898, "epoch": 0.4734832880173574, "grad_norm": 0.6753347516059875, "learning_rate": 1.805846267244987e-05, "loss": 1.4604, "mean_token_accuracy": 0.6477407167355219, "num_tokens": 723833718.0, "step": 4310 }, { "entropy": 1.7576901117960613, "epoch": 0.47359314492873034, "grad_norm": 0.6248276233673096, "learning_rate": 1.805746919628806e-05, "loss": 1.4668, "mean_token_accuracy": 0.6350358178218206, "num_tokens": 724044003.0, "step": 4311 }, { "entropy": 1.8473595082759857, "epoch": 0.4737030018401033, "grad_norm": 0.74982750415802, "learning_rate": 1.805647549676053e-05, "loss": 1.3729, "mean_token_accuracy": 0.6502262949943542, "num_tokens": 724178065.0, "step": 4312 }, { "entropy": 1.701841801404953, "epoch": 0.4738128587514762, "grad_norm": 0.6254753470420837, "learning_rate": 1.805548157389873e-05, "loss": 1.4934, "mean_token_accuracy": 0.6539788742860159, "num_tokens": 724368669.0, "step": 4313 }, { "entropy": 1.706893652677536, "epoch": 0.47392271566284916, "grad_norm": 0.7036319971084595, "learning_rate": 1.8054487427734114e-05, "loss": 1.3255, "mean_token_accuracy": 0.6660927186409632, "num_tokens": 724504066.0, "step": 4314 }, { "entropy": 1.7539520064989726, "epoch": 0.4740325725742221, "grad_norm": 0.7536458373069763, "learning_rate": 1.805349305829815e-05, "loss": 1.4542, "mean_token_accuracy": 0.655565415819486, "num_tokens": 724665861.0, "step": 4315 }, { "entropy": 1.6917597552140553, "epoch": 0.474142429485595, "grad_norm": 0.7118553519248962, "learning_rate": 1.8052498465622314e-05, "loss": 1.3756, "mean_token_accuracy": 0.6556372493505478, "num_tokens": 724831820.0, "step": 4316 }, { "entropy": 1.6829906304677327, "epoch": 0.4742522863969679, "grad_norm": 0.6873073577880859, "learning_rate": 1.8051503649738072e-05, "loss": 1.2962, "mean_token_accuracy": 0.6665286769469579, "num_tokens": 724954659.0, "step": 4317 }, { "entropy": 1.7101400991280873, "epoch": 0.47436214330834087, "grad_norm": 0.6849009990692139, "learning_rate": 1.8050508610676922e-05, "loss": 1.3413, "mean_token_accuracy": 0.6543218890825907, "num_tokens": 725144933.0, "step": 4318 }, { "entropy": 1.7625108063220978, "epoch": 0.4744720002197138, "grad_norm": 0.6787395477294922, "learning_rate": 1.804951334847035e-05, "loss": 1.4429, "mean_token_accuracy": 0.6538337916135788, "num_tokens": 725328948.0, "step": 4319 }, { "entropy": 1.7518400251865387, "epoch": 0.47458185713108675, "grad_norm": 0.7725258469581604, "learning_rate": 1.804851786314986e-05, "loss": 1.4116, "mean_token_accuracy": 0.6575403213500977, "num_tokens": 725488681.0, "step": 4320 }, { "entropy": 1.7236202557881672, "epoch": 0.4746917140424597, "grad_norm": 0.6077833771705627, "learning_rate": 1.8047522154746953e-05, "loss": 1.5031, "mean_token_accuracy": 0.6396900862455368, "num_tokens": 725679245.0, "step": 4321 }, { "entropy": 1.6955150763193767, "epoch": 0.47480157095383263, "grad_norm": 0.628399670124054, "learning_rate": 1.8046526223293147e-05, "loss": 1.4053, "mean_token_accuracy": 0.6565594325462977, "num_tokens": 725865088.0, "step": 4322 }, { "entropy": 1.6883783340454102, "epoch": 0.47491142786520557, "grad_norm": 0.7310377359390259, "learning_rate": 1.804553006881996e-05, "loss": 1.479, "mean_token_accuracy": 0.6422811150550842, "num_tokens": 726050927.0, "step": 4323 }, { "entropy": 1.709149529536565, "epoch": 0.4750212847765785, "grad_norm": 0.683785080909729, "learning_rate": 1.8044533691358924e-05, "loss": 1.3372, "mean_token_accuracy": 0.655320425828298, "num_tokens": 726212417.0, "step": 4324 }, { "entropy": 1.694841782251994, "epoch": 0.47513114168795145, "grad_norm": 0.6588259339332581, "learning_rate": 1.8043537090941566e-05, "loss": 1.447, "mean_token_accuracy": 0.6471510380506516, "num_tokens": 726413076.0, "step": 4325 }, { "entropy": 1.632349779208501, "epoch": 0.4752409985993244, "grad_norm": 0.7610387802124023, "learning_rate": 1.8042540267599434e-05, "loss": 1.2092, "mean_token_accuracy": 0.6765281210343043, "num_tokens": 726564146.0, "step": 4326 }, { "entropy": 1.6315424640973408, "epoch": 0.47535085551069733, "grad_norm": 0.660910427570343, "learning_rate": 1.804154322136408e-05, "loss": 1.4702, "mean_token_accuracy": 0.652957613269488, "num_tokens": 726780422.0, "step": 4327 }, { "entropy": 1.766084998846054, "epoch": 0.4754607124220703, "grad_norm": 0.7246162295341492, "learning_rate": 1.8040545952267054e-05, "loss": 1.3591, "mean_token_accuracy": 0.6496909161408743, "num_tokens": 726905268.0, "step": 4328 }, { "entropy": 1.6654809912045796, "epoch": 0.4755705693334432, "grad_norm": 0.5605107545852661, "learning_rate": 1.803954846033992e-05, "loss": 1.4205, "mean_token_accuracy": 0.6426665484905243, "num_tokens": 727121679.0, "step": 4329 }, { "entropy": 1.651865929365158, "epoch": 0.4756804262448161, "grad_norm": 0.7960909605026245, "learning_rate": 1.803855074561425e-05, "loss": 1.4079, "mean_token_accuracy": 0.6638611356417338, "num_tokens": 727277692.0, "step": 4330 }, { "entropy": 1.6862823764483135, "epoch": 0.47579028315618904, "grad_norm": 0.7021883726119995, "learning_rate": 1.8037552808121623e-05, "loss": 1.3779, "mean_token_accuracy": 0.6561082353194555, "num_tokens": 727419140.0, "step": 4331 }, { "entropy": 1.6958340108394623, "epoch": 0.475900140067562, "grad_norm": 0.5929916501045227, "learning_rate": 1.8036554647893614e-05, "loss": 1.4561, "mean_token_accuracy": 0.6269871642192205, "num_tokens": 727672721.0, "step": 4332 }, { "entropy": 1.667265961567561, "epoch": 0.4760099969789349, "grad_norm": 0.7513339519500732, "learning_rate": 1.8035556264961827e-05, "loss": 1.4436, "mean_token_accuracy": 0.6567800442377726, "num_tokens": 727874717.0, "step": 4333 }, { "entropy": 1.768438736597697, "epoch": 0.47611985389030786, "grad_norm": 0.893416702747345, "learning_rate": 1.8034557659357854e-05, "loss": 1.3738, "mean_token_accuracy": 0.6622404058774313, "num_tokens": 728038303.0, "step": 4334 }, { "entropy": 1.6817525227864583, "epoch": 0.4762297108016808, "grad_norm": 0.6666687726974487, "learning_rate": 1.8033558831113296e-05, "loss": 1.4322, "mean_token_accuracy": 0.6520895212888718, "num_tokens": 728211654.0, "step": 4335 }, { "entropy": 1.7423338790734608, "epoch": 0.47633956771305375, "grad_norm": 0.7516040205955505, "learning_rate": 1.8032559780259777e-05, "loss": 1.4885, "mean_token_accuracy": 0.6466931303342184, "num_tokens": 728382033.0, "step": 4336 }, { "entropy": 1.714792827765147, "epoch": 0.4764494246244267, "grad_norm": 0.6818469762802124, "learning_rate": 1.803156050682891e-05, "loss": 1.4962, "mean_token_accuracy": 0.6401876310507456, "num_tokens": 728568806.0, "step": 4337 }, { "entropy": 1.6942576467990875, "epoch": 0.47655928153579963, "grad_norm": 0.7585091590881348, "learning_rate": 1.8030561010852318e-05, "loss": 1.417, "mean_token_accuracy": 0.6545371363560358, "num_tokens": 728696501.0, "step": 4338 }, { "entropy": 1.6838724116484325, "epoch": 0.47666913844717257, "grad_norm": 0.6353939771652222, "learning_rate": 1.8029561292361636e-05, "loss": 1.3464, "mean_token_accuracy": 0.6659842431545258, "num_tokens": 728827471.0, "step": 4339 }, { "entropy": 1.736908346414566, "epoch": 0.4767789953585455, "grad_norm": 0.6974917650222778, "learning_rate": 1.802856135138851e-05, "loss": 1.3136, "mean_token_accuracy": 0.6713606069485346, "num_tokens": 728948450.0, "step": 4340 }, { "entropy": 1.7290816803773243, "epoch": 0.47688885226991845, "grad_norm": 0.6494654417037964, "learning_rate": 1.8027561187964583e-05, "loss": 1.3931, "mean_token_accuracy": 0.6582505901654562, "num_tokens": 729130424.0, "step": 4341 }, { "entropy": 1.7498342792193096, "epoch": 0.4769987091812914, "grad_norm": 0.7365612983703613, "learning_rate": 1.8026560802121514e-05, "loss": 1.4257, "mean_token_accuracy": 0.656251793106397, "num_tokens": 729273974.0, "step": 4342 }, { "entropy": 1.6831127107143402, "epoch": 0.47710856609266433, "grad_norm": 0.6563628315925598, "learning_rate": 1.8025560193890957e-05, "loss": 1.2273, "mean_token_accuracy": 0.6860732932885488, "num_tokens": 729412026.0, "step": 4343 }, { "entropy": 1.7000919779141743, "epoch": 0.4772184230040372, "grad_norm": 0.6804105639457703, "learning_rate": 1.802455936330459e-05, "loss": 1.4399, "mean_token_accuracy": 0.6513183464606603, "num_tokens": 729599255.0, "step": 4344 }, { "entropy": 1.6972023944060008, "epoch": 0.47732827991541016, "grad_norm": 0.7301200032234192, "learning_rate": 1.8023558310394085e-05, "loss": 1.4833, "mean_token_accuracy": 0.6496182779471079, "num_tokens": 729748405.0, "step": 4345 }, { "entropy": 1.6879161496957142, "epoch": 0.4774381368267831, "grad_norm": 0.7359111309051514, "learning_rate": 1.802255703519112e-05, "loss": 1.3136, "mean_token_accuracy": 0.6658755342165629, "num_tokens": 729884748.0, "step": 4346 }, { "entropy": 1.6763985455036163, "epoch": 0.47754799373815604, "grad_norm": 0.6414416432380676, "learning_rate": 1.802155553772739e-05, "loss": 1.4129, "mean_token_accuracy": 0.6613647441069285, "num_tokens": 730055896.0, "step": 4347 }, { "entropy": 1.670947680870692, "epoch": 0.477657850649529, "grad_norm": 0.6289905905723572, "learning_rate": 1.8020553818034598e-05, "loss": 1.3287, "mean_token_accuracy": 0.6603012681007385, "num_tokens": 730249265.0, "step": 4348 }, { "entropy": 1.7056255042552948, "epoch": 0.4777677075609019, "grad_norm": 0.6382940411567688, "learning_rate": 1.801955187614443e-05, "loss": 1.288, "mean_token_accuracy": 0.6747541030248007, "num_tokens": 730391443.0, "step": 4349 }, { "entropy": 1.6851585308710735, "epoch": 0.47787756447227486, "grad_norm": 0.735909640789032, "learning_rate": 1.8018549712088616e-05, "loss": 1.4657, "mean_token_accuracy": 0.6491023351748785, "num_tokens": 730555554.0, "step": 4350 }, { "entropy": 1.7253076136112213, "epoch": 0.4779874213836478, "grad_norm": 0.6947519183158875, "learning_rate": 1.8017547325898867e-05, "loss": 1.5152, "mean_token_accuracy": 0.6418954481681188, "num_tokens": 730748007.0, "step": 4351 }, { "entropy": 1.7013379335403442, "epoch": 0.47809727829502074, "grad_norm": 0.7011768221855164, "learning_rate": 1.8016544717606902e-05, "loss": 1.4242, "mean_token_accuracy": 0.6465904712677002, "num_tokens": 730952979.0, "step": 4352 }, { "entropy": 1.6715512077013652, "epoch": 0.4782071352063937, "grad_norm": 0.7021841406822205, "learning_rate": 1.8015541887244464e-05, "loss": 1.5118, "mean_token_accuracy": 0.6352577755848566, "num_tokens": 731173855.0, "step": 4353 }, { "entropy": 1.6683913866678874, "epoch": 0.4783169921177666, "grad_norm": 0.8116162419319153, "learning_rate": 1.801453883484328e-05, "loss": 1.5141, "mean_token_accuracy": 0.6527464812000593, "num_tokens": 731356672.0, "step": 4354 }, { "entropy": 1.7470175723234813, "epoch": 0.47842684902913957, "grad_norm": 0.72096848487854, "learning_rate": 1.801353556043511e-05, "loss": 1.3687, "mean_token_accuracy": 0.653298462430636, "num_tokens": 731479511.0, "step": 4355 }, { "entropy": 1.6899640957514446, "epoch": 0.4785367059405125, "grad_norm": 0.7514692544937134, "learning_rate": 1.8012532064051695e-05, "loss": 1.4348, "mean_token_accuracy": 0.6547542959451675, "num_tokens": 731618070.0, "step": 4356 }, { "entropy": 1.7060795327027638, "epoch": 0.4786465628518854, "grad_norm": 0.6599856019020081, "learning_rate": 1.8011528345724804e-05, "loss": 1.4117, "mean_token_accuracy": 0.6612872232993444, "num_tokens": 731758018.0, "step": 4357 }, { "entropy": 1.7181631028652191, "epoch": 0.47875641976325833, "grad_norm": 0.8530781865119934, "learning_rate": 1.8010524405486197e-05, "loss": 1.6284, "mean_token_accuracy": 0.6449073478579521, "num_tokens": 731909530.0, "step": 4358 }, { "entropy": 1.7219158411026, "epoch": 0.4788662766746313, "grad_norm": 0.6865781545639038, "learning_rate": 1.8009520243367652e-05, "loss": 1.4611, "mean_token_accuracy": 0.6500067412853241, "num_tokens": 732098400.0, "step": 4359 }, { "entropy": 1.6570659577846527, "epoch": 0.4789761335860042, "grad_norm": 0.6869319677352905, "learning_rate": 1.800851585940095e-05, "loss": 1.4586, "mean_token_accuracy": 0.6624219765265783, "num_tokens": 732254430.0, "step": 4360 }, { "entropy": 1.719583551088969, "epoch": 0.47908599049737716, "grad_norm": 0.6726648807525635, "learning_rate": 1.800751125361788e-05, "loss": 1.5502, "mean_token_accuracy": 0.643532986442248, "num_tokens": 732476478.0, "step": 4361 }, { "entropy": 1.714612752199173, "epoch": 0.4791958474087501, "grad_norm": 0.654414713382721, "learning_rate": 1.8006506426050235e-05, "loss": 1.4203, "mean_token_accuracy": 0.6522560815016428, "num_tokens": 732620031.0, "step": 4362 }, { "entropy": 1.7117332716782887, "epoch": 0.47930570432012304, "grad_norm": 0.7520214319229126, "learning_rate": 1.8005501376729818e-05, "loss": 1.4046, "mean_token_accuracy": 0.6597649057706197, "num_tokens": 732780049.0, "step": 4363 }, { "entropy": 1.845967213312785, "epoch": 0.479415561231496, "grad_norm": 0.6956945061683655, "learning_rate": 1.800449610568844e-05, "loss": 1.525, "mean_token_accuracy": 0.6418049583832423, "num_tokens": 732964821.0, "step": 4364 }, { "entropy": 1.7225728332996368, "epoch": 0.4795254181428689, "grad_norm": 1.2998478412628174, "learning_rate": 1.800349061295792e-05, "loss": 1.3252, "mean_token_accuracy": 0.6638927906751633, "num_tokens": 733081631.0, "step": 4365 }, { "entropy": 1.6589208642641704, "epoch": 0.47963527505424186, "grad_norm": 0.5991522073745728, "learning_rate": 1.8002484898570073e-05, "loss": 1.4725, "mean_token_accuracy": 0.6591239819924036, "num_tokens": 733257026.0, "step": 4366 }, { "entropy": 1.6971391240755718, "epoch": 0.4797451319656148, "grad_norm": 0.6521365642547607, "learning_rate": 1.800147896255674e-05, "loss": 1.3539, "mean_token_accuracy": 0.6691495180130005, "num_tokens": 733446044.0, "step": 4367 }, { "entropy": 1.7168017029762268, "epoch": 0.47985498887698774, "grad_norm": 0.6156492233276367, "learning_rate": 1.800047280494975e-05, "loss": 1.4011, "mean_token_accuracy": 0.6625112245480219, "num_tokens": 733656891.0, "step": 4368 }, { "entropy": 1.7018579840660095, "epoch": 0.4799648457883607, "grad_norm": 0.7289679050445557, "learning_rate": 1.7999466425780948e-05, "loss": 1.1951, "mean_token_accuracy": 0.6875071277221044, "num_tokens": 733770636.0, "step": 4369 }, { "entropy": 1.71112060546875, "epoch": 0.4800747026997336, "grad_norm": 0.7808387279510498, "learning_rate": 1.7998459825082192e-05, "loss": 1.2365, "mean_token_accuracy": 0.6787021855513254, "num_tokens": 733883779.0, "step": 4370 }, { "entropy": 1.778799831867218, "epoch": 0.4801845596111065, "grad_norm": 0.736013650894165, "learning_rate": 1.799745300288533e-05, "loss": 1.5431, "mean_token_accuracy": 0.6357332865397135, "num_tokens": 734086295.0, "step": 4371 }, { "entropy": 1.648956149816513, "epoch": 0.48029441652247945, "grad_norm": 0.5882608294487, "learning_rate": 1.7996445959222237e-05, "loss": 1.4369, "mean_token_accuracy": 0.6544050325949987, "num_tokens": 734312987.0, "step": 4372 }, { "entropy": 1.6493046085039775, "epoch": 0.4804042734338524, "grad_norm": 0.7231053709983826, "learning_rate": 1.7995438694124782e-05, "loss": 1.2697, "mean_token_accuracy": 0.6721268246571223, "num_tokens": 734444526.0, "step": 4373 }, { "entropy": 1.7278722524642944, "epoch": 0.48051413034522533, "grad_norm": 0.7068110108375549, "learning_rate": 1.7994431207624845e-05, "loss": 1.4124, "mean_token_accuracy": 0.654108315706253, "num_tokens": 734630234.0, "step": 4374 }, { "entropy": 1.7103682061036427, "epoch": 0.4806239872565983, "grad_norm": 0.605778157711029, "learning_rate": 1.7993423499754314e-05, "loss": 1.372, "mean_token_accuracy": 0.6507877210776011, "num_tokens": 734822527.0, "step": 4375 }, { "entropy": 1.7161585489908855, "epoch": 0.4807338441679712, "grad_norm": 0.7217233777046204, "learning_rate": 1.7992415570545078e-05, "loss": 1.2415, "mean_token_accuracy": 0.6822354594866434, "num_tokens": 734943030.0, "step": 4376 }, { "entropy": 1.6399229069550831, "epoch": 0.48084370107934415, "grad_norm": 0.5977116823196411, "learning_rate": 1.799140742002904e-05, "loss": 1.3927, "mean_token_accuracy": 0.6513981918493906, "num_tokens": 735178947.0, "step": 4377 }, { "entropy": 1.6955311596393585, "epoch": 0.4809535579907171, "grad_norm": 0.8311605453491211, "learning_rate": 1.7990399048238107e-05, "loss": 1.3563, "mean_token_accuracy": 0.6646893272797266, "num_tokens": 735359480.0, "step": 4378 }, { "entropy": 1.7144683202107747, "epoch": 0.48106341490209004, "grad_norm": 0.6836698055267334, "learning_rate": 1.7989390455204193e-05, "loss": 1.3922, "mean_token_accuracy": 0.6572039127349854, "num_tokens": 735565677.0, "step": 4379 }, { "entropy": 1.7366363008817036, "epoch": 0.481173271813463, "grad_norm": 0.8185579180717468, "learning_rate": 1.7988381640959223e-05, "loss": 1.4593, "mean_token_accuracy": 0.6663338194290797, "num_tokens": 735749818.0, "step": 4380 }, { "entropy": 1.712560087442398, "epoch": 0.4812831287248359, "grad_norm": 0.7879271507263184, "learning_rate": 1.7987372605535123e-05, "loss": 1.5386, "mean_token_accuracy": 0.655731255809466, "num_tokens": 735916786.0, "step": 4381 }, { "entropy": 1.7204462985197704, "epoch": 0.48139298563620886, "grad_norm": 0.8967468738555908, "learning_rate": 1.798636334896383e-05, "loss": 1.3121, "mean_token_accuracy": 0.665294274687767, "num_tokens": 736095507.0, "step": 4382 }, { "entropy": 1.6719180544217427, "epoch": 0.4815028425475818, "grad_norm": 0.7155001163482666, "learning_rate": 1.7985353871277284e-05, "loss": 1.4705, "mean_token_accuracy": 0.6568170140186945, "num_tokens": 736227693.0, "step": 4383 }, { "entropy": 1.6862878203392029, "epoch": 0.4816126994589547, "grad_norm": 0.7051180601119995, "learning_rate": 1.798434417250743e-05, "loss": 1.4039, "mean_token_accuracy": 0.658642495671908, "num_tokens": 736398354.0, "step": 4384 }, { "entropy": 1.7053897380828857, "epoch": 0.4817225563703276, "grad_norm": 0.6290838122367859, "learning_rate": 1.7983334252686236e-05, "loss": 1.3671, "mean_token_accuracy": 0.6522834599018097, "num_tokens": 736528732.0, "step": 4385 }, { "entropy": 1.7519411742687225, "epoch": 0.48183241328170057, "grad_norm": 0.7291600704193115, "learning_rate": 1.798232411184566e-05, "loss": 1.3714, "mean_token_accuracy": 0.6766092479228973, "num_tokens": 736658095.0, "step": 4386 }, { "entropy": 1.6966538329919179, "epoch": 0.4819422701930735, "grad_norm": 0.8086454272270203, "learning_rate": 1.7981313750017672e-05, "loss": 1.405, "mean_token_accuracy": 0.6665053268273672, "num_tokens": 736795590.0, "step": 4387 }, { "entropy": 1.7400443057219188, "epoch": 0.48205212710444645, "grad_norm": 0.6428614854812622, "learning_rate": 1.798030316723425e-05, "loss": 1.4453, "mean_token_accuracy": 0.6424688597520193, "num_tokens": 736984070.0, "step": 4388 }, { "entropy": 1.6672666768232982, "epoch": 0.4821619840158194, "grad_norm": 0.7134124040603638, "learning_rate": 1.7979292363527375e-05, "loss": 1.469, "mean_token_accuracy": 0.6481978793938955, "num_tokens": 737202061.0, "step": 4389 }, { "entropy": 1.7022729615370433, "epoch": 0.48227184092719233, "grad_norm": 0.6730693578720093, "learning_rate": 1.7978281338929048e-05, "loss": 1.3695, "mean_token_accuracy": 0.655126636226972, "num_tokens": 737403611.0, "step": 4390 }, { "entropy": 1.6466123759746552, "epoch": 0.48238169783856527, "grad_norm": 0.7256969809532166, "learning_rate": 1.7977270093471254e-05, "loss": 1.4186, "mean_token_accuracy": 0.6552423536777496, "num_tokens": 737593766.0, "step": 4391 }, { "entropy": 1.7008427878220875, "epoch": 0.4824915547499382, "grad_norm": 0.9718957543373108, "learning_rate": 1.7976258627186008e-05, "loss": 1.2792, "mean_token_accuracy": 0.6838826090097427, "num_tokens": 737759579.0, "step": 4392 }, { "entropy": 1.7387726704279582, "epoch": 0.48260141166131115, "grad_norm": 0.8315878510475159, "learning_rate": 1.797524694010532e-05, "loss": 1.4845, "mean_token_accuracy": 0.658362532655398, "num_tokens": 737911150.0, "step": 4393 }, { "entropy": 1.7291166086991627, "epoch": 0.4827112685726841, "grad_norm": 0.6660434603691101, "learning_rate": 1.797423503226121e-05, "loss": 1.3869, "mean_token_accuracy": 0.642819325129191, "num_tokens": 738088880.0, "step": 4394 }, { "entropy": 1.772065391143163, "epoch": 0.48282112548405703, "grad_norm": 0.6660803556442261, "learning_rate": 1.7973222903685702e-05, "loss": 1.359, "mean_token_accuracy": 0.6438145389159521, "num_tokens": 738234766.0, "step": 4395 }, { "entropy": 1.731687754392624, "epoch": 0.48293098239543, "grad_norm": 0.6683153510093689, "learning_rate": 1.7972210554410834e-05, "loss": 1.2882, "mean_token_accuracy": 0.6693693796793619, "num_tokens": 738386427.0, "step": 4396 }, { "entropy": 1.710336794455846, "epoch": 0.4830408393068029, "grad_norm": 0.6479591727256775, "learning_rate": 1.797119798446864e-05, "loss": 1.4377, "mean_token_accuracy": 0.6659507850805918, "num_tokens": 738579020.0, "step": 4397 }, { "entropy": 1.7027187943458557, "epoch": 0.4831506962181758, "grad_norm": 0.6037660837173462, "learning_rate": 1.7970185193891176e-05, "loss": 1.439, "mean_token_accuracy": 0.6502730449040731, "num_tokens": 738777017.0, "step": 4398 }, { "entropy": 1.7039030492305756, "epoch": 0.48326055312954874, "grad_norm": 0.673244059085846, "learning_rate": 1.796917218271049e-05, "loss": 1.3637, "mean_token_accuracy": 0.6623422205448151, "num_tokens": 738963981.0, "step": 4399 }, { "entropy": 1.753426233927409, "epoch": 0.4833704100409217, "grad_norm": 0.706851065158844, "learning_rate": 1.7968158950958642e-05, "loss": 1.4578, "mean_token_accuracy": 0.6659545401732127, "num_tokens": 739139126.0, "step": 4400 }, { "entropy": 1.6917196214199066, "epoch": 0.4834802669522946, "grad_norm": 0.7611788511276245, "learning_rate": 1.7967145498667706e-05, "loss": 1.3534, "mean_token_accuracy": 0.6647525678078333, "num_tokens": 739309024.0, "step": 4401 }, { "entropy": 1.7070514857769012, "epoch": 0.48359012386366756, "grad_norm": 1.02863347530365, "learning_rate": 1.7966131825869753e-05, "loss": 1.5179, "mean_token_accuracy": 0.6395848045746485, "num_tokens": 739513763.0, "step": 4402 }, { "entropy": 1.6623660226662953, "epoch": 0.4836999807750405, "grad_norm": 0.628810703754425, "learning_rate": 1.7965117932596866e-05, "loss": 1.2627, "mean_token_accuracy": 0.6694687008857727, "num_tokens": 739651066.0, "step": 4403 }, { "entropy": 1.68434273203214, "epoch": 0.48380983768641345, "grad_norm": 0.7587992548942566, "learning_rate": 1.7964103818881138e-05, "loss": 1.3369, "mean_token_accuracy": 0.6658577223618826, "num_tokens": 739786817.0, "step": 4404 }, { "entropy": 1.664050579071045, "epoch": 0.4839196945977864, "grad_norm": 0.6519520878791809, "learning_rate": 1.7963089484754663e-05, "loss": 1.3324, "mean_token_accuracy": 0.6710617194573084, "num_tokens": 739941289.0, "step": 4405 }, { "entropy": 1.716511865456899, "epoch": 0.48402955150915933, "grad_norm": 0.659646213054657, "learning_rate": 1.7962074930249537e-05, "loss": 1.5393, "mean_token_accuracy": 0.6423588742812475, "num_tokens": 740142557.0, "step": 4406 }, { "entropy": 1.682478795448939, "epoch": 0.48413940842053227, "grad_norm": 0.5458212494850159, "learning_rate": 1.796106015539788e-05, "loss": 1.4259, "mean_token_accuracy": 0.637840062379837, "num_tokens": 740392429.0, "step": 4407 }, { "entropy": 1.7216579516728718, "epoch": 0.4842492653319052, "grad_norm": 0.66231369972229, "learning_rate": 1.796004516023181e-05, "loss": 1.4048, "mean_token_accuracy": 0.6517205735047659, "num_tokens": 740528700.0, "step": 4408 }, { "entropy": 1.7604460815588634, "epoch": 0.48435912224327815, "grad_norm": 0.6421491503715515, "learning_rate": 1.795902994478344e-05, "loss": 1.3674, "mean_token_accuracy": 0.6596113989750544, "num_tokens": 740723647.0, "step": 4409 }, { "entropy": 1.6752793689568837, "epoch": 0.4844689791546511, "grad_norm": 0.6714054346084595, "learning_rate": 1.7958014509084912e-05, "loss": 1.4342, "mean_token_accuracy": 0.6613290458917618, "num_tokens": 740917843.0, "step": 4410 }, { "entropy": 1.6832947333653767, "epoch": 0.484578836066024, "grad_norm": 0.7269577980041504, "learning_rate": 1.795699885316836e-05, "loss": 1.3856, "mean_token_accuracy": 0.6611769000689188, "num_tokens": 741057925.0, "step": 4411 }, { "entropy": 1.7017952899138133, "epoch": 0.4846886929773969, "grad_norm": 0.6696016788482666, "learning_rate": 1.7955982977065928e-05, "loss": 1.3691, "mean_token_accuracy": 0.6629203210274378, "num_tokens": 741243418.0, "step": 4412 }, { "entropy": 1.745782047510147, "epoch": 0.48479854988876986, "grad_norm": 1.2233498096466064, "learning_rate": 1.7954966880809772e-05, "loss": 1.5333, "mean_token_accuracy": 0.6656019041935602, "num_tokens": 741355751.0, "step": 4413 }, { "entropy": 1.6869251827398937, "epoch": 0.4849084068001428, "grad_norm": 0.7494601607322693, "learning_rate": 1.7953950564432044e-05, "loss": 1.4045, "mean_token_accuracy": 0.6549041916926702, "num_tokens": 741529089.0, "step": 4414 }, { "entropy": 1.6820165514945984, "epoch": 0.48501826371151574, "grad_norm": 0.7255376577377319, "learning_rate": 1.7952934027964917e-05, "loss": 1.2338, "mean_token_accuracy": 0.675830195347468, "num_tokens": 741679225.0, "step": 4415 }, { "entropy": 1.7140280703703563, "epoch": 0.4851281206228887, "grad_norm": 0.6368587613105774, "learning_rate": 1.795191727144056e-05, "loss": 1.3813, "mean_token_accuracy": 0.6670277963081995, "num_tokens": 741825088.0, "step": 4416 }, { "entropy": 1.6830491324265797, "epoch": 0.4852379775342616, "grad_norm": 0.6319524645805359, "learning_rate": 1.7950900294891154e-05, "loss": 1.3061, "mean_token_accuracy": 0.6556812673807144, "num_tokens": 742051866.0, "step": 4417 }, { "entropy": 1.7843901813030243, "epoch": 0.48534783444563456, "grad_norm": 0.6758350729942322, "learning_rate": 1.794988309834889e-05, "loss": 1.4969, "mean_token_accuracy": 0.637373631199201, "num_tokens": 742230190.0, "step": 4418 }, { "entropy": 1.6947405536969502, "epoch": 0.4854576913570075, "grad_norm": 0.5639503598213196, "learning_rate": 1.7948865681845952e-05, "loss": 1.3941, "mean_token_accuracy": 0.647869884967804, "num_tokens": 742475673.0, "step": 4419 }, { "entropy": 1.7732278009255726, "epoch": 0.48556754826838044, "grad_norm": 0.7282741665840149, "learning_rate": 1.7947848045414548e-05, "loss": 1.3657, "mean_token_accuracy": 0.6630784372488657, "num_tokens": 742612272.0, "step": 4420 }, { "entropy": 1.6553172965844472, "epoch": 0.4856774051797534, "grad_norm": 0.629048228263855, "learning_rate": 1.7946830189086883e-05, "loss": 1.5002, "mean_token_accuracy": 0.6393060237169266, "num_tokens": 742798477.0, "step": 4421 }, { "entropy": 1.7043809791405995, "epoch": 0.4857872620911263, "grad_norm": 0.7646244168281555, "learning_rate": 1.7945812112895177e-05, "loss": 1.2612, "mean_token_accuracy": 0.6775466799736023, "num_tokens": 742897713.0, "step": 4422 }, { "entropy": 1.7202276587486267, "epoch": 0.48589711900249927, "grad_norm": 0.7730578780174255, "learning_rate": 1.794479381687164e-05, "loss": 1.4446, "mean_token_accuracy": 0.6524418741464615, "num_tokens": 743076435.0, "step": 4423 }, { "entropy": 1.7322371204694111, "epoch": 0.4860069759138722, "grad_norm": 0.781139612197876, "learning_rate": 1.7943775301048517e-05, "loss": 1.4211, "mean_token_accuracy": 0.6420817424853643, "num_tokens": 743201684.0, "step": 4424 }, { "entropy": 1.6999266147613525, "epoch": 0.4861168328252451, "grad_norm": 0.7373039126396179, "learning_rate": 1.7942756565458027e-05, "loss": 1.4017, "mean_token_accuracy": 0.6548285136620203, "num_tokens": 743370658.0, "step": 4425 }, { "entropy": 1.6692801713943481, "epoch": 0.48622668973661803, "grad_norm": 0.7501516342163086, "learning_rate": 1.7941737610132424e-05, "loss": 1.1894, "mean_token_accuracy": 0.6914908140897751, "num_tokens": 743481925.0, "step": 4426 }, { "entropy": 1.6509476502736409, "epoch": 0.486336546647991, "grad_norm": 0.6218562126159668, "learning_rate": 1.7940718435103954e-05, "loss": 1.3426, "mean_token_accuracy": 0.6639609535535177, "num_tokens": 743628666.0, "step": 4427 }, { "entropy": 1.747880756855011, "epoch": 0.4864464035593639, "grad_norm": 0.6217747926712036, "learning_rate": 1.7939699040404875e-05, "loss": 1.4158, "mean_token_accuracy": 0.6518111626307169, "num_tokens": 743787470.0, "step": 4428 }, { "entropy": 1.756358911593755, "epoch": 0.48655626047073686, "grad_norm": 0.7568204402923584, "learning_rate": 1.7938679426067446e-05, "loss": 1.5833, "mean_token_accuracy": 0.6492966512838999, "num_tokens": 743928141.0, "step": 4429 }, { "entropy": 1.673755685488383, "epoch": 0.4866661173821098, "grad_norm": 0.7150989174842834, "learning_rate": 1.7937659592123935e-05, "loss": 1.3197, "mean_token_accuracy": 0.6532700707515081, "num_tokens": 744042694.0, "step": 4430 }, { "entropy": 1.7202748954296112, "epoch": 0.48677597429348274, "grad_norm": 0.636058509349823, "learning_rate": 1.7936639538606632e-05, "loss": 1.4411, "mean_token_accuracy": 0.6541072924931844, "num_tokens": 744207583.0, "step": 4431 }, { "entropy": 1.6693733135859172, "epoch": 0.4868858312048557, "grad_norm": 0.7030401229858398, "learning_rate": 1.793561926554781e-05, "loss": 1.2652, "mean_token_accuracy": 0.673239087065061, "num_tokens": 744323763.0, "step": 4432 }, { "entropy": 1.720289280017217, "epoch": 0.4869956881162286, "grad_norm": 0.7169565558433533, "learning_rate": 1.7934598772979764e-05, "loss": 1.375, "mean_token_accuracy": 0.6588334242502848, "num_tokens": 744476040.0, "step": 4433 }, { "entropy": 1.723343511422475, "epoch": 0.48710554502760156, "grad_norm": 0.6646847724914551, "learning_rate": 1.7933578060934788e-05, "loss": 1.405, "mean_token_accuracy": 0.645707756280899, "num_tokens": 744626251.0, "step": 4434 }, { "entropy": 1.7332356572151184, "epoch": 0.4872154019389745, "grad_norm": 0.7763069272041321, "learning_rate": 1.7932557129445195e-05, "loss": 1.3344, "mean_token_accuracy": 0.6678619384765625, "num_tokens": 744754753.0, "step": 4435 }, { "entropy": 1.6924299697081249, "epoch": 0.48732525885034744, "grad_norm": 0.7134109735488892, "learning_rate": 1.7931535978543295e-05, "loss": 1.3427, "mean_token_accuracy": 0.6666603734095892, "num_tokens": 744889940.0, "step": 4436 }, { "entropy": 1.6620939671993256, "epoch": 0.4874351157617204, "grad_norm": 0.5839754939079285, "learning_rate": 1.79305146082614e-05, "loss": 1.3568, "mean_token_accuracy": 0.6570223172505697, "num_tokens": 745065068.0, "step": 4437 }, { "entropy": 1.693219780921936, "epoch": 0.48754497267309327, "grad_norm": 0.7458364963531494, "learning_rate": 1.792949301863184e-05, "loss": 1.3768, "mean_token_accuracy": 0.6642978092034658, "num_tokens": 745247547.0, "step": 4438 }, { "entropy": 1.724270612001419, "epoch": 0.4876548295844662, "grad_norm": 0.7193183898925781, "learning_rate": 1.792847120968695e-05, "loss": 1.3431, "mean_token_accuracy": 0.6732023855050405, "num_tokens": 745370035.0, "step": 4439 }, { "entropy": 1.695033311843872, "epoch": 0.48776468649583915, "grad_norm": 0.6847495436668396, "learning_rate": 1.792744918145907e-05, "loss": 1.4098, "mean_token_accuracy": 0.6749068647623062, "num_tokens": 745555069.0, "step": 4440 }, { "entropy": 1.6993265946706135, "epoch": 0.4878745434072121, "grad_norm": 0.7159141302108765, "learning_rate": 1.7926426933980543e-05, "loss": 1.5518, "mean_token_accuracy": 0.6463808168967565, "num_tokens": 745735255.0, "step": 4441 }, { "entropy": 1.7386601070563, "epoch": 0.48798440031858503, "grad_norm": 0.7798255085945129, "learning_rate": 1.7925404467283727e-05, "loss": 1.5025, "mean_token_accuracy": 0.655009463429451, "num_tokens": 745875552.0, "step": 4442 }, { "entropy": 1.7042417724927266, "epoch": 0.488094257229958, "grad_norm": 0.6437844634056091, "learning_rate": 1.7924381781400978e-05, "loss": 1.4309, "mean_token_accuracy": 0.6641700814167658, "num_tokens": 746048420.0, "step": 4443 }, { "entropy": 1.7275199890136719, "epoch": 0.4882041141413309, "grad_norm": 0.7542555928230286, "learning_rate": 1.7923358876364665e-05, "loss": 1.5449, "mean_token_accuracy": 0.6298695877194405, "num_tokens": 746276936.0, "step": 4444 }, { "entropy": 1.7163825233777363, "epoch": 0.48831397105270385, "grad_norm": 0.6992437243461609, "learning_rate": 1.7922335752207164e-05, "loss": 1.3827, "mean_token_accuracy": 0.6514418671528498, "num_tokens": 746413460.0, "step": 4445 }, { "entropy": 1.75390621026357, "epoch": 0.4884238279640768, "grad_norm": 0.7331777811050415, "learning_rate": 1.792131240896086e-05, "loss": 1.3556, "mean_token_accuracy": 0.6733482579390208, "num_tokens": 746529153.0, "step": 4446 }, { "entropy": 1.6997243563334148, "epoch": 0.48853368487544974, "grad_norm": 0.763002336025238, "learning_rate": 1.792028884665813e-05, "loss": 1.4874, "mean_token_accuracy": 0.6523915976285934, "num_tokens": 746715032.0, "step": 4447 }, { "entropy": 1.6606244643529255, "epoch": 0.4886435417868227, "grad_norm": 0.5833660364151001, "learning_rate": 1.791926506533138e-05, "loss": 1.3891, "mean_token_accuracy": 0.6538758873939514, "num_tokens": 746927795.0, "step": 4448 }, { "entropy": 1.7194021840890248, "epoch": 0.4887533986981956, "grad_norm": 0.7860300540924072, "learning_rate": 1.791824106501301e-05, "loss": 1.5285, "mean_token_accuracy": 0.6494088371594747, "num_tokens": 747073603.0, "step": 4449 }, { "entropy": 1.7067668239275615, "epoch": 0.48886325560956856, "grad_norm": 0.6611225605010986, "learning_rate": 1.7917216845735427e-05, "loss": 1.4851, "mean_token_accuracy": 0.6474853207667669, "num_tokens": 747276179.0, "step": 4450 }, { "entropy": 1.6850255330403645, "epoch": 0.4889731125209415, "grad_norm": 0.8468097448348999, "learning_rate": 1.7916192407531045e-05, "loss": 1.3876, "mean_token_accuracy": 0.6508926798899969, "num_tokens": 747433017.0, "step": 4451 }, { "entropy": 1.7228013674418132, "epoch": 0.4890829694323144, "grad_norm": 0.6649202704429626, "learning_rate": 1.7915167750432293e-05, "loss": 1.4806, "mean_token_accuracy": 0.641407698392868, "num_tokens": 747595215.0, "step": 4452 }, { "entropy": 1.7136965990066528, "epoch": 0.4891928263436873, "grad_norm": 0.6682122945785522, "learning_rate": 1.7914142874471597e-05, "loss": 1.5594, "mean_token_accuracy": 0.642529770731926, "num_tokens": 747794900.0, "step": 4453 }, { "entropy": 1.7261795302232106, "epoch": 0.48930268325506027, "grad_norm": 0.634833037853241, "learning_rate": 1.7913117779681396e-05, "loss": 1.4382, "mean_token_accuracy": 0.6446546812852224, "num_tokens": 748022714.0, "step": 4454 }, { "entropy": 1.6885162591934204, "epoch": 0.4894125401664332, "grad_norm": 0.7759763598442078, "learning_rate": 1.791209246609413e-05, "loss": 1.2402, "mean_token_accuracy": 0.6778834859530131, "num_tokens": 748177827.0, "step": 4455 }, { "entropy": 1.6844123204549153, "epoch": 0.48952239707780615, "grad_norm": 0.7225537896156311, "learning_rate": 1.791106693374225e-05, "loss": 1.485, "mean_token_accuracy": 0.6536487142244974, "num_tokens": 748361870.0, "step": 4456 }, { "entropy": 1.6790739993254344, "epoch": 0.4896322539891791, "grad_norm": 0.728463351726532, "learning_rate": 1.791004118265822e-05, "loss": 1.5057, "mean_token_accuracy": 0.6347111016511917, "num_tokens": 748650354.0, "step": 4457 }, { "entropy": 1.695796012878418, "epoch": 0.48974211090055203, "grad_norm": 0.6192063689231873, "learning_rate": 1.79090152128745e-05, "loss": 1.4496, "mean_token_accuracy": 0.6385799000660578, "num_tokens": 748830552.0, "step": 4458 }, { "entropy": 1.7009925544261932, "epoch": 0.48985196781192497, "grad_norm": 0.6495517492294312, "learning_rate": 1.7907989024423557e-05, "loss": 1.2719, "mean_token_accuracy": 0.6756831457217535, "num_tokens": 748964018.0, "step": 4459 }, { "entropy": 1.7054378390312195, "epoch": 0.4899618247232979, "grad_norm": 0.6384367346763611, "learning_rate": 1.790696261733788e-05, "loss": 1.4366, "mean_token_accuracy": 0.6499034762382507, "num_tokens": 749180374.0, "step": 4460 }, { "entropy": 1.728455811738968, "epoch": 0.49007168163467085, "grad_norm": 0.6712200045585632, "learning_rate": 1.790593599164994e-05, "loss": 1.3554, "mean_token_accuracy": 0.658992608388265, "num_tokens": 749318103.0, "step": 4461 }, { "entropy": 1.7136681576569874, "epoch": 0.4901815385460438, "grad_norm": 0.6368706226348877, "learning_rate": 1.7904909147392247e-05, "loss": 1.3982, "mean_token_accuracy": 0.6673699120680491, "num_tokens": 749529503.0, "step": 4462 }, { "entropy": 1.6937820812066395, "epoch": 0.49029139545741673, "grad_norm": 0.6878102421760559, "learning_rate": 1.7903882084597287e-05, "loss": 1.4045, "mean_token_accuracy": 0.6586268643538157, "num_tokens": 749703151.0, "step": 4463 }, { "entropy": 1.7233433425426483, "epoch": 0.4904012523687897, "grad_norm": 0.7706781625747681, "learning_rate": 1.7902854803297575e-05, "loss": 1.45, "mean_token_accuracy": 0.6549730747938156, "num_tokens": 749851059.0, "step": 4464 }, { "entropy": 1.701189527908961, "epoch": 0.49051110928016256, "grad_norm": 0.7401324510574341, "learning_rate": 1.7901827303525613e-05, "loss": 1.3076, "mean_token_accuracy": 0.6692301680644354, "num_tokens": 749986623.0, "step": 4465 }, { "entropy": 1.7433607876300812, "epoch": 0.4906209661915355, "grad_norm": 0.8140077590942383, "learning_rate": 1.790079958531393e-05, "loss": 1.4647, "mean_token_accuracy": 0.6411833713452021, "num_tokens": 750134250.0, "step": 4466 }, { "entropy": 1.778177946805954, "epoch": 0.49073082310290844, "grad_norm": 0.6582893133163452, "learning_rate": 1.7899771648695048e-05, "loss": 1.3474, "mean_token_accuracy": 0.656017060081164, "num_tokens": 750276063.0, "step": 4467 }, { "entropy": 1.713607092698415, "epoch": 0.4908406800142814, "grad_norm": 0.7441067695617676, "learning_rate": 1.78987434937015e-05, "loss": 1.4452, "mean_token_accuracy": 0.6515307128429413, "num_tokens": 750403743.0, "step": 4468 }, { "entropy": 1.602983335653941, "epoch": 0.4909505369256543, "grad_norm": 0.59381502866745, "learning_rate": 1.7897715120365836e-05, "loss": 1.2882, "mean_token_accuracy": 0.6720236440499624, "num_tokens": 750542516.0, "step": 4469 }, { "entropy": 1.6924793124198914, "epoch": 0.49106039383702726, "grad_norm": 0.977676272392273, "learning_rate": 1.789668652872059e-05, "loss": 1.3781, "mean_token_accuracy": 0.6760386377573013, "num_tokens": 750699705.0, "step": 4470 }, { "entropy": 1.7474435865879059, "epoch": 0.4911702507484002, "grad_norm": 0.6933272480964661, "learning_rate": 1.7895657718798327e-05, "loss": 1.2821, "mean_token_accuracy": 0.6641086836655935, "num_tokens": 750805798.0, "step": 4471 }, { "entropy": 1.7326118846734364, "epoch": 0.49128010765977315, "grad_norm": 0.6269978880882263, "learning_rate": 1.7894628690631603e-05, "loss": 1.3857, "mean_token_accuracy": 0.6626192231973013, "num_tokens": 750959285.0, "step": 4472 }, { "entropy": 1.6883742014567058, "epoch": 0.4913899645711461, "grad_norm": 0.7878180146217346, "learning_rate": 1.7893599444252987e-05, "loss": 1.4636, "mean_token_accuracy": 0.6607634474833807, "num_tokens": 751144530.0, "step": 4473 }, { "entropy": 1.6989558438460033, "epoch": 0.49149982148251903, "grad_norm": 0.6729726195335388, "learning_rate": 1.789256997969506e-05, "loss": 1.4828, "mean_token_accuracy": 0.6353020220994949, "num_tokens": 751357135.0, "step": 4474 }, { "entropy": 1.7190554738044739, "epoch": 0.49160967839389197, "grad_norm": 0.6713739037513733, "learning_rate": 1.789154029699039e-05, "loss": 1.419, "mean_token_accuracy": 0.654554526011149, "num_tokens": 751494654.0, "step": 4475 }, { "entropy": 1.7132868468761444, "epoch": 0.4917195353052649, "grad_norm": 0.6637577414512634, "learning_rate": 1.7890510396171582e-05, "loss": 1.2547, "mean_token_accuracy": 0.6833054423332214, "num_tokens": 751625039.0, "step": 4476 }, { "entropy": 1.6934907635052998, "epoch": 0.49182939221663785, "grad_norm": 0.7464581727981567, "learning_rate": 1.788948027727122e-05, "loss": 1.3886, "mean_token_accuracy": 0.6680949032306671, "num_tokens": 751844906.0, "step": 4477 }, { "entropy": 1.7225241959095001, "epoch": 0.4919392491280108, "grad_norm": 0.6509267091751099, "learning_rate": 1.7888449940321917e-05, "loss": 1.3403, "mean_token_accuracy": 0.6601742456356684, "num_tokens": 751975242.0, "step": 4478 }, { "entropy": 1.6977383097012837, "epoch": 0.4920491060393837, "grad_norm": 0.6957575678825378, "learning_rate": 1.7887419385356273e-05, "loss": 1.3555, "mean_token_accuracy": 0.6612063000599543, "num_tokens": 752113545.0, "step": 4479 }, { "entropy": 1.7447110712528229, "epoch": 0.4921589629507566, "grad_norm": 0.7940369248390198, "learning_rate": 1.788638861240691e-05, "loss": 1.5439, "mean_token_accuracy": 0.640469511349996, "num_tokens": 752291398.0, "step": 4480 }, { "entropy": 1.679489274819692, "epoch": 0.49226881986212956, "grad_norm": 0.7236880660057068, "learning_rate": 1.7885357621506453e-05, "loss": 1.2744, "mean_token_accuracy": 0.6717104216416677, "num_tokens": 752428595.0, "step": 4481 }, { "entropy": 1.721798598766327, "epoch": 0.4923786767735025, "grad_norm": 0.7670096755027771, "learning_rate": 1.788432641268753e-05, "loss": 1.293, "mean_token_accuracy": 0.6676533122857412, "num_tokens": 752569748.0, "step": 4482 }, { "entropy": 1.6857047577699025, "epoch": 0.49248853368487544, "grad_norm": 0.6117812395095825, "learning_rate": 1.7883294985982772e-05, "loss": 1.3615, "mean_token_accuracy": 0.6617532074451447, "num_tokens": 752779194.0, "step": 4483 }, { "entropy": 1.7368919948736827, "epoch": 0.4925983905962484, "grad_norm": 0.6931723356246948, "learning_rate": 1.7882263341424838e-05, "loss": 1.3838, "mean_token_accuracy": 0.6611840128898621, "num_tokens": 752931824.0, "step": 4484 }, { "entropy": 1.7122445404529572, "epoch": 0.4927082475076213, "grad_norm": 0.5953887104988098, "learning_rate": 1.7881231479046364e-05, "loss": 1.3601, "mean_token_accuracy": 0.6550240367650986, "num_tokens": 753122621.0, "step": 4485 }, { "entropy": 1.6985189219315846, "epoch": 0.49281810441899426, "grad_norm": 0.6507508158683777, "learning_rate": 1.7880199398880018e-05, "loss": 1.4812, "mean_token_accuracy": 0.6505904843409857, "num_tokens": 753270777.0, "step": 4486 }, { "entropy": 1.7590989172458649, "epoch": 0.4929279613303672, "grad_norm": 0.6733105182647705, "learning_rate": 1.787916710095846e-05, "loss": 1.49, "mean_token_accuracy": 0.6299261252085367, "num_tokens": 753456920.0, "step": 4487 }, { "entropy": 1.6458527743816376, "epoch": 0.49303781824174014, "grad_norm": 0.7507854700088501, "learning_rate": 1.7878134585314363e-05, "loss": 1.4018, "mean_token_accuracy": 0.6749738603830338, "num_tokens": 753613788.0, "step": 4488 }, { "entropy": 1.6525565882523854, "epoch": 0.4931476751531131, "grad_norm": 0.5717189311981201, "learning_rate": 1.7877101851980404e-05, "loss": 1.4242, "mean_token_accuracy": 0.6573587109645208, "num_tokens": 753800915.0, "step": 4489 }, { "entropy": 1.692369411389033, "epoch": 0.493257532064486, "grad_norm": 0.7131385803222656, "learning_rate": 1.7876068900989274e-05, "loss": 1.2987, "mean_token_accuracy": 0.6656559258699417, "num_tokens": 753944837.0, "step": 4490 }, { "entropy": 1.6467917760213215, "epoch": 0.49336738897585897, "grad_norm": 0.6811399459838867, "learning_rate": 1.7875035732373658e-05, "loss": 1.301, "mean_token_accuracy": 0.671561042467753, "num_tokens": 754086477.0, "step": 4491 }, { "entropy": 1.781138926744461, "epoch": 0.4934772458872319, "grad_norm": 0.6936325430870056, "learning_rate": 1.7874002346166263e-05, "loss": 1.4253, "mean_token_accuracy": 0.650108148654302, "num_tokens": 754275449.0, "step": 4492 }, { "entropy": 1.7359409630298615, "epoch": 0.4935871027986048, "grad_norm": 0.7305500507354736, "learning_rate": 1.7872968742399786e-05, "loss": 1.4184, "mean_token_accuracy": 0.6582159698009491, "num_tokens": 754439599.0, "step": 4493 }, { "entropy": 1.6950959861278534, "epoch": 0.49369695970997773, "grad_norm": 0.5642681121826172, "learning_rate": 1.787193492110695e-05, "loss": 1.6434, "mean_token_accuracy": 0.633182168006897, "num_tokens": 754640289.0, "step": 4494 }, { "entropy": 1.706775536139806, "epoch": 0.4938068166213507, "grad_norm": 0.6591259241104126, "learning_rate": 1.7870900882320467e-05, "loss": 1.46, "mean_token_accuracy": 0.6489850531021754, "num_tokens": 754801543.0, "step": 4495 }, { "entropy": 1.724029650290807, "epoch": 0.4939166735327236, "grad_norm": 0.7175586819648743, "learning_rate": 1.786986662607307e-05, "loss": 1.3789, "mean_token_accuracy": 0.6690233945846558, "num_tokens": 754964581.0, "step": 4496 }, { "entropy": 1.6978013416131337, "epoch": 0.49402653044409656, "grad_norm": 0.5896649956703186, "learning_rate": 1.786883215239749e-05, "loss": 1.3869, "mean_token_accuracy": 0.6515233715375265, "num_tokens": 755162191.0, "step": 4497 }, { "entropy": 1.7095728317896526, "epoch": 0.4941363873554695, "grad_norm": 0.671416163444519, "learning_rate": 1.7867797461326466e-05, "loss": 1.4268, "mean_token_accuracy": 0.6535071631272634, "num_tokens": 755335730.0, "step": 4498 }, { "entropy": 1.708453506231308, "epoch": 0.49424624426684244, "grad_norm": 0.7201808094978333, "learning_rate": 1.786676255289275e-05, "loss": 1.4291, "mean_token_accuracy": 0.662381132443746, "num_tokens": 755479598.0, "step": 4499 }, { "entropy": 1.6969383358955383, "epoch": 0.4943561011782154, "grad_norm": 0.6979060769081116, "learning_rate": 1.7865727427129087e-05, "loss": 1.3646, "mean_token_accuracy": 0.6659565716981888, "num_tokens": 755646726.0, "step": 4500 }, { "entropy": 1.7146336535612743, "epoch": 0.4944659580895883, "grad_norm": 0.6201717853546143, "learning_rate": 1.786469208406825e-05, "loss": 1.2754, "mean_token_accuracy": 0.6736994286378225, "num_tokens": 755793537.0, "step": 4501 }, { "entropy": 1.6687723497549694, "epoch": 0.49457581500096126, "grad_norm": 0.6654130816459656, "learning_rate": 1.7863656523743002e-05, "loss": 1.3844, "mean_token_accuracy": 0.6587973336378733, "num_tokens": 755989928.0, "step": 4502 }, { "entropy": 1.6637324293454487, "epoch": 0.4946856719123342, "grad_norm": 0.8360695242881775, "learning_rate": 1.7862620746186115e-05, "loss": 1.3913, "mean_token_accuracy": 0.6531608452399572, "num_tokens": 756136563.0, "step": 4503 }, { "entropy": 1.7622207999229431, "epoch": 0.49479552882370714, "grad_norm": 0.6121542453765869, "learning_rate": 1.7861584751430373e-05, "loss": 1.5139, "mean_token_accuracy": 0.6296129673719406, "num_tokens": 756322688.0, "step": 4504 }, { "entropy": 1.6936173935731251, "epoch": 0.4949053857350801, "grad_norm": 0.7181201577186584, "learning_rate": 1.786054853950857e-05, "loss": 1.4316, "mean_token_accuracy": 0.6570375859737396, "num_tokens": 756472507.0, "step": 4505 }, { "entropy": 1.7213451365629833, "epoch": 0.49501524264645297, "grad_norm": 0.7521805763244629, "learning_rate": 1.7859512110453493e-05, "loss": 1.3723, "mean_token_accuracy": 0.6491851558287939, "num_tokens": 756630765.0, "step": 4506 }, { "entropy": 1.7246767083803813, "epoch": 0.4951250995578259, "grad_norm": 0.6681076288223267, "learning_rate": 1.7858475464297952e-05, "loss": 1.5371, "mean_token_accuracy": 0.6519037286440531, "num_tokens": 756811520.0, "step": 4507 }, { "entropy": 1.724827955166499, "epoch": 0.49523495646919885, "grad_norm": 0.7627193331718445, "learning_rate": 1.785743860107475e-05, "loss": 1.3533, "mean_token_accuracy": 0.6570751518011093, "num_tokens": 756947022.0, "step": 4508 }, { "entropy": 1.7522861162821453, "epoch": 0.4953448133805718, "grad_norm": 0.7629287242889404, "learning_rate": 1.7856401520816707e-05, "loss": 1.4731, "mean_token_accuracy": 0.6540786474943161, "num_tokens": 757085562.0, "step": 4509 }, { "entropy": 1.7017335096995037, "epoch": 0.49545467029194473, "grad_norm": 0.6842020153999329, "learning_rate": 1.7855364223556647e-05, "loss": 1.4837, "mean_token_accuracy": 0.6524115850528082, "num_tokens": 757267752.0, "step": 4510 }, { "entropy": 1.754370888074239, "epoch": 0.4955645272033177, "grad_norm": 0.7372490763664246, "learning_rate": 1.78543267093274e-05, "loss": 1.3641, "mean_token_accuracy": 0.6588715563217798, "num_tokens": 757385616.0, "step": 4511 }, { "entropy": 1.7240610619386036, "epoch": 0.4956743841146906, "grad_norm": 0.7373293042182922, "learning_rate": 1.7853288978161797e-05, "loss": 1.4162, "mean_token_accuracy": 0.6595638593037924, "num_tokens": 757553063.0, "step": 4512 }, { "entropy": 1.6941333214441936, "epoch": 0.49578424102606355, "grad_norm": 0.6690158843994141, "learning_rate": 1.7852251030092686e-05, "loss": 1.5101, "mean_token_accuracy": 0.6566884964704514, "num_tokens": 757694092.0, "step": 4513 }, { "entropy": 1.7551434238751729, "epoch": 0.4958940979374365, "grad_norm": 0.5994437336921692, "learning_rate": 1.785121286515292e-05, "loss": 1.438, "mean_token_accuracy": 0.6497039496898651, "num_tokens": 757875258.0, "step": 4514 }, { "entropy": 1.7067551612854004, "epoch": 0.49600395484880944, "grad_norm": 0.7220733761787415, "learning_rate": 1.7850174483375353e-05, "loss": 1.2979, "mean_token_accuracy": 0.6739718317985535, "num_tokens": 758004198.0, "step": 4515 }, { "entropy": 1.6703088978926341, "epoch": 0.4961138117601824, "grad_norm": 0.6553620100021362, "learning_rate": 1.7849135884792853e-05, "loss": 1.4163, "mean_token_accuracy": 0.6570161531368891, "num_tokens": 758196716.0, "step": 4516 }, { "entropy": 1.7132277488708496, "epoch": 0.4962236686715553, "grad_norm": 0.690665066242218, "learning_rate": 1.784809706943829e-05, "loss": 1.4286, "mean_token_accuracy": 0.6602544039487839, "num_tokens": 758382983.0, "step": 4517 }, { "entropy": 1.7164062758286793, "epoch": 0.49633352558292826, "grad_norm": 0.6790395975112915, "learning_rate": 1.784705803734453e-05, "loss": 1.4582, "mean_token_accuracy": 0.6742985943953196, "num_tokens": 758572656.0, "step": 4518 }, { "entropy": 1.67890664935112, "epoch": 0.4964433824943012, "grad_norm": 1.2717398405075073, "learning_rate": 1.784601878854448e-05, "loss": 1.4087, "mean_token_accuracy": 0.660420835018158, "num_tokens": 758793304.0, "step": 4519 }, { "entropy": 1.7838424642880757, "epoch": 0.4965532394056741, "grad_norm": 0.6524944305419922, "learning_rate": 1.7844979323071016e-05, "loss": 1.4004, "mean_token_accuracy": 0.6474776168664297, "num_tokens": 758916623.0, "step": 4520 }, { "entropy": 1.6456107993920643, "epoch": 0.496663096317047, "grad_norm": 0.7245497703552246, "learning_rate": 1.7843939640957038e-05, "loss": 1.4214, "mean_token_accuracy": 0.6555730899175009, "num_tokens": 759097278.0, "step": 4521 }, { "entropy": 1.7413156827290852, "epoch": 0.49677295322841997, "grad_norm": 0.7097477316856384, "learning_rate": 1.7842899742235458e-05, "loss": 1.3122, "mean_token_accuracy": 0.664093608657519, "num_tokens": 759245582.0, "step": 4522 }, { "entropy": 1.6793685257434845, "epoch": 0.4968828101397929, "grad_norm": 0.717451810836792, "learning_rate": 1.7841859626939185e-05, "loss": 1.4663, "mean_token_accuracy": 0.6448543965816498, "num_tokens": 759450714.0, "step": 4523 }, { "entropy": 1.7351475755373638, "epoch": 0.49699266705116585, "grad_norm": 0.7763004899024963, "learning_rate": 1.784081929510113e-05, "loss": 1.4956, "mean_token_accuracy": 0.6306874205668768, "num_tokens": 759665939.0, "step": 4524 }, { "entropy": 1.6486627856890361, "epoch": 0.4971025239625388, "grad_norm": 0.6308198571205139, "learning_rate": 1.783977874675424e-05, "loss": 1.3943, "mean_token_accuracy": 0.6578763922055563, "num_tokens": 759846721.0, "step": 4525 }, { "entropy": 1.702409029006958, "epoch": 0.49721238087391173, "grad_norm": 0.7921629548072815, "learning_rate": 1.7838737981931425e-05, "loss": 1.4551, "mean_token_accuracy": 0.6458623309930166, "num_tokens": 760104447.0, "step": 4526 }, { "entropy": 1.685009628534317, "epoch": 0.49732223778528467, "grad_norm": 0.6673221588134766, "learning_rate": 1.7837697000665638e-05, "loss": 1.3185, "mean_token_accuracy": 0.6742521325747172, "num_tokens": 760293720.0, "step": 4527 }, { "entropy": 1.7403136988480885, "epoch": 0.4974320946966576, "grad_norm": 0.6920185685157776, "learning_rate": 1.783665580298982e-05, "loss": 1.2648, "mean_token_accuracy": 0.6715550472338995, "num_tokens": 760387875.0, "step": 4528 }, { "entropy": 1.6765425205230713, "epoch": 0.49754195160803055, "grad_norm": 0.6350904107093811, "learning_rate": 1.7835614388936927e-05, "loss": 1.4115, "mean_token_accuracy": 0.6627595176299413, "num_tokens": 760546642.0, "step": 4529 }, { "entropy": 1.7340703904628754, "epoch": 0.4976518085194035, "grad_norm": 0.7262073755264282, "learning_rate": 1.7834572758539922e-05, "loss": 1.2949, "mean_token_accuracy": 0.6761989891529083, "num_tokens": 760670647.0, "step": 4530 }, { "entropy": 1.733684519926707, "epoch": 0.49776166543077643, "grad_norm": 0.6469461917877197, "learning_rate": 1.7833530911831767e-05, "loss": 1.4896, "mean_token_accuracy": 0.6465209424495697, "num_tokens": 760823361.0, "step": 4531 }, { "entropy": 1.7105149626731873, "epoch": 0.4978715223421494, "grad_norm": 0.7094139456748962, "learning_rate": 1.7832488848845438e-05, "loss": 1.3516, "mean_token_accuracy": 0.6633161505063375, "num_tokens": 760986995.0, "step": 4532 }, { "entropy": 1.7280435959498088, "epoch": 0.49798137925352226, "grad_norm": 0.5853270888328552, "learning_rate": 1.7831446569613915e-05, "loss": 1.4263, "mean_token_accuracy": 0.6446743756532669, "num_tokens": 761178147.0, "step": 4533 }, { "entropy": 1.6787182490030925, "epoch": 0.4980912361648952, "grad_norm": 0.6534310579299927, "learning_rate": 1.7830404074170188e-05, "loss": 1.4564, "mean_token_accuracy": 0.6509255568186442, "num_tokens": 761373044.0, "step": 4534 }, { "entropy": 1.7558597127596538, "epoch": 0.49820109307626814, "grad_norm": 0.6207196116447449, "learning_rate": 1.7829361362547248e-05, "loss": 1.5756, "mean_token_accuracy": 0.628658264875412, "num_tokens": 761576148.0, "step": 4535 }, { "entropy": 1.6844909886519115, "epoch": 0.4983109499876411, "grad_norm": 0.7019922733306885, "learning_rate": 1.7828318434778098e-05, "loss": 1.1964, "mean_token_accuracy": 0.679168184598287, "num_tokens": 761706398.0, "step": 4536 }, { "entropy": 1.6976350645224254, "epoch": 0.498420806899014, "grad_norm": 0.7925885915756226, "learning_rate": 1.7827275290895745e-05, "loss": 1.4483, "mean_token_accuracy": 0.6481008778015772, "num_tokens": 761941759.0, "step": 4537 }, { "entropy": 1.7484288016955059, "epoch": 0.49853066381038696, "grad_norm": 0.8223802447319031, "learning_rate": 1.782623193093321e-05, "loss": 1.4108, "mean_token_accuracy": 0.6608029355605444, "num_tokens": 762094461.0, "step": 4538 }, { "entropy": 1.6935599744319916, "epoch": 0.4986405207217599, "grad_norm": 0.6052656769752502, "learning_rate": 1.782518835492351e-05, "loss": 1.299, "mean_token_accuracy": 0.6712607592344284, "num_tokens": 762245616.0, "step": 4539 }, { "entropy": 1.768530507882436, "epoch": 0.49875037763313285, "grad_norm": 0.6017442941665649, "learning_rate": 1.782414456289967e-05, "loss": 1.5665, "mean_token_accuracy": 0.6245706081390381, "num_tokens": 762425328.0, "step": 4540 }, { "entropy": 1.743304302295049, "epoch": 0.4988602345445058, "grad_norm": 0.7569601535797119, "learning_rate": 1.782310055489473e-05, "loss": 1.6859, "mean_token_accuracy": 0.6368949313958486, "num_tokens": 762582323.0, "step": 4541 }, { "entropy": 1.7565892438093822, "epoch": 0.49897009145587873, "grad_norm": 0.7533148527145386, "learning_rate": 1.782205633094174e-05, "loss": 1.5002, "mean_token_accuracy": 0.6462213893731436, "num_tokens": 762732218.0, "step": 4542 }, { "entropy": 1.7235056459903717, "epoch": 0.49907994836725167, "grad_norm": 0.7903603315353394, "learning_rate": 1.7821011891073732e-05, "loss": 1.5051, "mean_token_accuracy": 0.6527835627396902, "num_tokens": 762921443.0, "step": 4543 }, { "entropy": 1.6993493934472401, "epoch": 0.4991898052786246, "grad_norm": 0.8247820138931274, "learning_rate": 1.7819967235323773e-05, "loss": 1.2088, "mean_token_accuracy": 0.6772788912057877, "num_tokens": 763015220.0, "step": 4544 }, { "entropy": 1.7162687877813976, "epoch": 0.49929966218999755, "grad_norm": 0.8071532249450684, "learning_rate": 1.7818922363724926e-05, "loss": 1.3451, "mean_token_accuracy": 0.6634253213802973, "num_tokens": 763167117.0, "step": 4545 }, { "entropy": 1.6914705137411754, "epoch": 0.4994095191013705, "grad_norm": 0.787642240524292, "learning_rate": 1.7817877276310257e-05, "loss": 1.4834, "mean_token_accuracy": 0.6583209584156672, "num_tokens": 763346825.0, "step": 4546 }, { "entropy": 1.6890590290228527, "epoch": 0.4995193760127434, "grad_norm": 0.6730331778526306, "learning_rate": 1.781683197311285e-05, "loss": 1.488, "mean_token_accuracy": 0.6388949304819107, "num_tokens": 763552338.0, "step": 4547 }, { "entropy": 1.752114752928416, "epoch": 0.4996292329241163, "grad_norm": 0.7115664482116699, "learning_rate": 1.7815786454165776e-05, "loss": 1.4952, "mean_token_accuracy": 0.6519523759682974, "num_tokens": 763708637.0, "step": 4548 }, { "entropy": 1.7708389262358348, "epoch": 0.49973908983548926, "grad_norm": 0.8620888590812683, "learning_rate": 1.7814740719502135e-05, "loss": 1.434, "mean_token_accuracy": 0.6429966588815054, "num_tokens": 763848626.0, "step": 4549 }, { "entropy": 1.7015598714351654, "epoch": 0.4998489467468622, "grad_norm": 0.6294053196907043, "learning_rate": 1.7813694769155022e-05, "loss": 1.4717, "mean_token_accuracy": 0.6426868637402853, "num_tokens": 764021913.0, "step": 4550 }, { "entropy": 1.6892776091893513, "epoch": 0.49995880365823514, "grad_norm": 0.6385858058929443, "learning_rate": 1.781264860315754e-05, "loss": 1.2928, "mean_token_accuracy": 0.6771899660428365, "num_tokens": 764190314.0, "step": 4551 }, { "entropy": 1.7139320472876232, "epoch": 0.5000686605696081, "grad_norm": 0.6113952994346619, "learning_rate": 1.7811602221542795e-05, "loss": 1.3597, "mean_token_accuracy": 0.658218597372373, "num_tokens": 764343722.0, "step": 4552 }, { "entropy": 1.698420782883962, "epoch": 0.5001785174809811, "grad_norm": 0.7712813019752502, "learning_rate": 1.781055562434391e-05, "loss": 1.5078, "mean_token_accuracy": 0.6449939161539078, "num_tokens": 764543119.0, "step": 4553 }, { "entropy": 1.6849484543005626, "epoch": 0.500288374392354, "grad_norm": 0.8930643200874329, "learning_rate": 1.7809508811594015e-05, "loss": 1.2912, "mean_token_accuracy": 0.6756371855735779, "num_tokens": 764718997.0, "step": 4554 }, { "entropy": 1.669805000225703, "epoch": 0.5003982313037268, "grad_norm": 0.6647879481315613, "learning_rate": 1.7808461783326228e-05, "loss": 1.3159, "mean_token_accuracy": 0.6644556125005087, "num_tokens": 764865095.0, "step": 4555 }, { "entropy": 1.6393884023030598, "epoch": 0.5005080882150998, "grad_norm": 0.6310734152793884, "learning_rate": 1.7807414539573696e-05, "loss": 1.3232, "mean_token_accuracy": 0.6677381098270416, "num_tokens": 765049052.0, "step": 4556 }, { "entropy": 1.6223057607809703, "epoch": 0.5006179451264727, "grad_norm": 0.9187425971031189, "learning_rate": 1.780636708036956e-05, "loss": 1.4747, "mean_token_accuracy": 0.6508963604768118, "num_tokens": 765223488.0, "step": 4557 }, { "entropy": 1.6926488975683849, "epoch": 0.5007278020378457, "grad_norm": 0.7379801869392395, "learning_rate": 1.780531940574697e-05, "loss": 1.3283, "mean_token_accuracy": 0.6766380667686462, "num_tokens": 765383670.0, "step": 4558 }, { "entropy": 1.74741593003273, "epoch": 0.5008376589492186, "grad_norm": 0.6469904780387878, "learning_rate": 1.7804271515739096e-05, "loss": 1.3902, "mean_token_accuracy": 0.653177946805954, "num_tokens": 765523127.0, "step": 4559 }, { "entropy": 1.701665033896764, "epoch": 0.5009475158605916, "grad_norm": 0.6925680041313171, "learning_rate": 1.780322341037909e-05, "loss": 1.4267, "mean_token_accuracy": 0.6503228594859441, "num_tokens": 765700770.0, "step": 4560 }, { "entropy": 1.7469732860724132, "epoch": 0.5010573727719645, "grad_norm": 0.7270328998565674, "learning_rate": 1.780217508970013e-05, "loss": 1.584, "mean_token_accuracy": 0.6263004789749781, "num_tokens": 765875041.0, "step": 4561 }, { "entropy": 1.6818542679150899, "epoch": 0.5011672296833375, "grad_norm": 0.7486089468002319, "learning_rate": 1.780112655373539e-05, "loss": 1.4211, "mean_token_accuracy": 0.6588012526432673, "num_tokens": 766026284.0, "step": 4562 }, { "entropy": 1.7353224456310272, "epoch": 0.5012770865947104, "grad_norm": 0.6791040301322937, "learning_rate": 1.7800077802518062e-05, "loss": 1.4015, "mean_token_accuracy": 0.6505940506855646, "num_tokens": 766178332.0, "step": 4563 }, { "entropy": 1.6911177138487499, "epoch": 0.5013869435060834, "grad_norm": 0.6527238488197327, "learning_rate": 1.7799028836081333e-05, "loss": 1.321, "mean_token_accuracy": 0.6581740925709406, "num_tokens": 766368332.0, "step": 4564 }, { "entropy": 1.7469729781150818, "epoch": 0.5014968004174563, "grad_norm": 0.6890334486961365, "learning_rate": 1.7797979654458408e-05, "loss": 1.415, "mean_token_accuracy": 0.6579999874035517, "num_tokens": 766550333.0, "step": 4565 }, { "entropy": 1.7444235583146412, "epoch": 0.5016066573288293, "grad_norm": 0.6815066933631897, "learning_rate": 1.7796930257682487e-05, "loss": 1.3465, "mean_token_accuracy": 0.6652761151393255, "num_tokens": 766711842.0, "step": 4566 }, { "entropy": 1.745679686466853, "epoch": 0.5017165142402021, "grad_norm": 0.6902961730957031, "learning_rate": 1.7795880645786788e-05, "loss": 1.2986, "mean_token_accuracy": 0.6666453431049982, "num_tokens": 766838368.0, "step": 4567 }, { "entropy": 1.7069720129172008, "epoch": 0.501826371151575, "grad_norm": 0.6801702380180359, "learning_rate": 1.779483081880453e-05, "loss": 1.495, "mean_token_accuracy": 0.6321636736392975, "num_tokens": 767011476.0, "step": 4568 }, { "entropy": 1.6371342142422993, "epoch": 0.501936228062948, "grad_norm": 0.6808404326438904, "learning_rate": 1.779378077676894e-05, "loss": 1.5138, "mean_token_accuracy": 0.6529216965039571, "num_tokens": 767228694.0, "step": 4569 }, { "entropy": 1.624429355065028, "epoch": 0.5020460849743209, "grad_norm": 0.7168002724647522, "learning_rate": 1.7792730519713245e-05, "loss": 1.3463, "mean_token_accuracy": 0.6598889629046122, "num_tokens": 767386847.0, "step": 4570 }, { "entropy": 1.6681628028551738, "epoch": 0.5021559418856939, "grad_norm": 0.7332079410552979, "learning_rate": 1.7791680047670696e-05, "loss": 1.5026, "mean_token_accuracy": 0.6463381548722585, "num_tokens": 767562231.0, "step": 4571 }, { "entropy": 1.7176694869995117, "epoch": 0.5022657987970668, "grad_norm": 0.6463919878005981, "learning_rate": 1.7790629360674528e-05, "loss": 1.336, "mean_token_accuracy": 0.6605811516443888, "num_tokens": 767694393.0, "step": 4572 }, { "entropy": 1.7141607999801636, "epoch": 0.5023756557084398, "grad_norm": 0.6464807391166687, "learning_rate": 1.7789578458758004e-05, "loss": 1.4295, "mean_token_accuracy": 0.6666828741629919, "num_tokens": 767842803.0, "step": 4573 }, { "entropy": 1.6756874024868011, "epoch": 0.5024855126198127, "grad_norm": 0.6780061721801758, "learning_rate": 1.7788527341954378e-05, "loss": 1.3292, "mean_token_accuracy": 0.6682515839735667, "num_tokens": 767995295.0, "step": 4574 }, { "entropy": 1.7120186189810436, "epoch": 0.5025953695311857, "grad_norm": 0.6508592963218689, "learning_rate": 1.7787476010296922e-05, "loss": 1.5077, "mean_token_accuracy": 0.6511927644411722, "num_tokens": 768165030.0, "step": 4575 }, { "entropy": 1.7303796609242756, "epoch": 0.5027052264425586, "grad_norm": 0.7258087992668152, "learning_rate": 1.778642446381891e-05, "loss": 1.3302, "mean_token_accuracy": 0.664924239118894, "num_tokens": 768276477.0, "step": 4576 }, { "entropy": 1.735611488421758, "epoch": 0.5028150833539315, "grad_norm": 0.6726675629615784, "learning_rate": 1.7785372702553624e-05, "loss": 1.5562, "mean_token_accuracy": 0.6415247122446696, "num_tokens": 768471537.0, "step": 4577 }, { "entropy": 1.6898790796597798, "epoch": 0.5029249402653044, "grad_norm": 0.6226996779441833, "learning_rate": 1.7784320726534345e-05, "loss": 1.3483, "mean_token_accuracy": 0.6662185192108154, "num_tokens": 768638518.0, "step": 4578 }, { "entropy": 1.6402369737625122, "epoch": 0.5030347971766774, "grad_norm": 0.5572636127471924, "learning_rate": 1.7783268535794376e-05, "loss": 1.4847, "mean_token_accuracy": 0.6341389069954554, "num_tokens": 768861174.0, "step": 4579 }, { "entropy": 1.6863535543282826, "epoch": 0.5031446540880503, "grad_norm": 0.634425938129425, "learning_rate": 1.778221613036701e-05, "loss": 1.4483, "mean_token_accuracy": 0.6415148476759592, "num_tokens": 769041187.0, "step": 4580 }, { "entropy": 1.706571986277898, "epoch": 0.5032545109994232, "grad_norm": 0.659997284412384, "learning_rate": 1.7781163510285564e-05, "loss": 1.2811, "mean_token_accuracy": 0.6710793773333231, "num_tokens": 769218716.0, "step": 4581 }, { "entropy": 1.6553989350795746, "epoch": 0.5033643679107962, "grad_norm": 0.7408038377761841, "learning_rate": 1.7780110675583345e-05, "loss": 1.4047, "mean_token_accuracy": 0.6643812855084738, "num_tokens": 769399208.0, "step": 4582 }, { "entropy": 1.7550034324328105, "epoch": 0.5034742248221691, "grad_norm": 0.8029376268386841, "learning_rate": 1.777905762629368e-05, "loss": 1.4544, "mean_token_accuracy": 0.6457452674706777, "num_tokens": 769538565.0, "step": 4583 }, { "entropy": 1.701194703578949, "epoch": 0.5035840817335421, "grad_norm": 0.6260977387428284, "learning_rate": 1.7778004362449897e-05, "loss": 1.5601, "mean_token_accuracy": 0.6248839298884074, "num_tokens": 769785297.0, "step": 4584 }, { "entropy": 1.6856548488140106, "epoch": 0.503693938644915, "grad_norm": 0.8055636882781982, "learning_rate": 1.7776950884085325e-05, "loss": 1.4075, "mean_token_accuracy": 0.6510484715302786, "num_tokens": 769974441.0, "step": 4585 }, { "entropy": 1.696067899465561, "epoch": 0.503803795556288, "grad_norm": 0.6942035555839539, "learning_rate": 1.777589719123332e-05, "loss": 1.398, "mean_token_accuracy": 0.6512449930111567, "num_tokens": 770158357.0, "step": 4586 }, { "entropy": 1.7241803507010143, "epoch": 0.5039136524676608, "grad_norm": 0.7550591230392456, "learning_rate": 1.7774843283927215e-05, "loss": 1.5366, "mean_token_accuracy": 0.6322035938501358, "num_tokens": 770356874.0, "step": 4587 }, { "entropy": 1.7421184480190277, "epoch": 0.5040235093790338, "grad_norm": 0.6920149326324463, "learning_rate": 1.7773789162200378e-05, "loss": 1.4192, "mean_token_accuracy": 0.6601304560899734, "num_tokens": 770542678.0, "step": 4588 }, { "entropy": 1.7092435161272685, "epoch": 0.5041333662904067, "grad_norm": 0.6304470896720886, "learning_rate": 1.777273482608616e-05, "loss": 1.5378, "mean_token_accuracy": 0.6353190094232559, "num_tokens": 770728579.0, "step": 4589 }, { "entropy": 1.6159135500590007, "epoch": 0.5042432232017797, "grad_norm": 0.680613100528717, "learning_rate": 1.777168027561794e-05, "loss": 1.2946, "mean_token_accuracy": 0.6721889674663544, "num_tokens": 770911912.0, "step": 4590 }, { "entropy": 1.7247054477532704, "epoch": 0.5043530801131526, "grad_norm": 0.7321951389312744, "learning_rate": 1.777062551082909e-05, "loss": 1.4008, "mean_token_accuracy": 0.6659555484851202, "num_tokens": 771043582.0, "step": 4591 }, { "entropy": 1.641870786746343, "epoch": 0.5044629370245256, "grad_norm": 0.658308744430542, "learning_rate": 1.7769570531752995e-05, "loss": 1.2654, "mean_token_accuracy": 0.688631405433019, "num_tokens": 771194901.0, "step": 4592 }, { "entropy": 1.7228473524252574, "epoch": 0.5045727939358985, "grad_norm": 0.7475135326385498, "learning_rate": 1.7768515338423044e-05, "loss": 1.3101, "mean_token_accuracy": 0.6785301913817724, "num_tokens": 771320509.0, "step": 4593 }, { "entropy": 1.7426639099915822, "epoch": 0.5046826508472714, "grad_norm": 0.7170320749282837, "learning_rate": 1.776745993087263e-05, "loss": 1.6534, "mean_token_accuracy": 0.6144102613131205, "num_tokens": 771526316.0, "step": 4594 }, { "entropy": 1.6618753671646118, "epoch": 0.5047925077586444, "grad_norm": 0.7222857475280762, "learning_rate": 1.776640430913516e-05, "loss": 1.4575, "mean_token_accuracy": 0.6568075368801752, "num_tokens": 771684867.0, "step": 4595 }, { "entropy": 1.7489372690518696, "epoch": 0.5049023646700173, "grad_norm": 0.8467748761177063, "learning_rate": 1.7765348473244042e-05, "loss": 1.4878, "mean_token_accuracy": 0.6579713523387909, "num_tokens": 771824096.0, "step": 4596 }, { "entropy": 1.7483091453711193, "epoch": 0.5050122215813903, "grad_norm": 0.6738401651382446, "learning_rate": 1.7764292423232694e-05, "loss": 1.4071, "mean_token_accuracy": 0.6498169700304667, "num_tokens": 771969053.0, "step": 4597 }, { "entropy": 1.6820040146509807, "epoch": 0.5051220784927631, "grad_norm": 0.7400838136672974, "learning_rate": 1.7763236159134538e-05, "loss": 1.3708, "mean_token_accuracy": 0.6718896230061849, "num_tokens": 772120605.0, "step": 4598 }, { "entropy": 1.694365570942561, "epoch": 0.5052319354041361, "grad_norm": 0.6368020176887512, "learning_rate": 1.7762179680983007e-05, "loss": 1.4105, "mean_token_accuracy": 0.6443684299786886, "num_tokens": 772319397.0, "step": 4599 }, { "entropy": 1.7446305255095165, "epoch": 0.505341792315509, "grad_norm": 0.7226773500442505, "learning_rate": 1.7761122988811533e-05, "loss": 1.4982, "mean_token_accuracy": 0.6516734858353933, "num_tokens": 772512021.0, "step": 4600 }, { "entropy": 1.6962950527668, "epoch": 0.505451649226882, "grad_norm": 0.6551104784011841, "learning_rate": 1.7760066082653566e-05, "loss": 1.4639, "mean_token_accuracy": 0.6513085216283798, "num_tokens": 772707795.0, "step": 4601 }, { "entropy": 1.7322811285654705, "epoch": 0.5055615061382549, "grad_norm": 0.7607578635215759, "learning_rate": 1.775900896254255e-05, "loss": 1.4611, "mean_token_accuracy": 0.653966099023819, "num_tokens": 772888030.0, "step": 4602 }, { "entropy": 1.7565113206704457, "epoch": 0.5056713630496279, "grad_norm": 0.780271053314209, "learning_rate": 1.7757951628511953e-05, "loss": 1.3276, "mean_token_accuracy": 0.6591301510731379, "num_tokens": 773074743.0, "step": 4603 }, { "entropy": 1.7075756589571636, "epoch": 0.5057812199610008, "grad_norm": 0.8063814640045166, "learning_rate": 1.7756894080595225e-05, "loss": 1.2822, "mean_token_accuracy": 0.672131285071373, "num_tokens": 773213092.0, "step": 4604 }, { "entropy": 1.7614882191022236, "epoch": 0.5058910768723738, "grad_norm": 0.7280179858207703, "learning_rate": 1.7755836318825846e-05, "loss": 1.5566, "mean_token_accuracy": 0.6349164942900339, "num_tokens": 773418841.0, "step": 4605 }, { "entropy": 1.7605247398217518, "epoch": 0.5060009337837467, "grad_norm": 0.6884891390800476, "learning_rate": 1.7754778343237294e-05, "loss": 1.5146, "mean_token_accuracy": 0.6434789101282755, "num_tokens": 773579477.0, "step": 4606 }, { "entropy": 1.7038259605566661, "epoch": 0.5061107906951197, "grad_norm": 0.6832484602928162, "learning_rate": 1.775372015386305e-05, "loss": 1.2357, "mean_token_accuracy": 0.6903716921806335, "num_tokens": 773730733.0, "step": 4607 }, { "entropy": 1.6778667668501537, "epoch": 0.5062206476064925, "grad_norm": 0.7103332281112671, "learning_rate": 1.7752661750736608e-05, "loss": 1.3739, "mean_token_accuracy": 0.6615954885880152, "num_tokens": 773881901.0, "step": 4608 }, { "entropy": 1.747862070798874, "epoch": 0.5063305045178654, "grad_norm": 0.6749265789985657, "learning_rate": 1.7751603133891463e-05, "loss": 1.5966, "mean_token_accuracy": 0.622282862663269, "num_tokens": 774103203.0, "step": 4609 }, { "entropy": 1.7335962454477947, "epoch": 0.5064403614292384, "grad_norm": 0.7897728085517883, "learning_rate": 1.775054430336112e-05, "loss": 1.4646, "mean_token_accuracy": 0.6427331467469534, "num_tokens": 774268686.0, "step": 4610 }, { "entropy": 1.7124591569105785, "epoch": 0.5065502183406113, "grad_norm": 0.6754243969917297, "learning_rate": 1.774948525917909e-05, "loss": 1.316, "mean_token_accuracy": 0.6721870998541514, "num_tokens": 774395456.0, "step": 4611 }, { "entropy": 1.6793800294399261, "epoch": 0.5066600752519843, "grad_norm": 0.6710286736488342, "learning_rate": 1.7748426001378897e-05, "loss": 1.2881, "mean_token_accuracy": 0.6775974581638972, "num_tokens": 774532094.0, "step": 4612 }, { "entropy": 1.6808149913946788, "epoch": 0.5067699321633572, "grad_norm": 0.6567005515098572, "learning_rate": 1.774736652999406e-05, "loss": 1.3745, "mean_token_accuracy": 0.6517610251903534, "num_tokens": 774685619.0, "step": 4613 }, { "entropy": 1.751223752895991, "epoch": 0.5068797890747302, "grad_norm": 0.6677860021591187, "learning_rate": 1.7746306845058113e-05, "loss": 1.4449, "mean_token_accuracy": 0.6329387575387955, "num_tokens": 774885515.0, "step": 4614 }, { "entropy": 1.674304574728012, "epoch": 0.5069896459861031, "grad_norm": 0.779396653175354, "learning_rate": 1.7745246946604594e-05, "loss": 1.1997, "mean_token_accuracy": 0.6830140401919683, "num_tokens": 774995865.0, "step": 4615 }, { "entropy": 1.689636766910553, "epoch": 0.5070995028974761, "grad_norm": 0.872032642364502, "learning_rate": 1.774418683466705e-05, "loss": 1.5374, "mean_token_accuracy": 0.6345034042994181, "num_tokens": 775247345.0, "step": 4616 }, { "entropy": 1.6756864488124847, "epoch": 0.507209359808849, "grad_norm": 0.7993032932281494, "learning_rate": 1.7743126509279028e-05, "loss": 1.2167, "mean_token_accuracy": 0.6787678301334381, "num_tokens": 775362640.0, "step": 4617 }, { "entropy": 1.7568728228410084, "epoch": 0.507319216720222, "grad_norm": 0.6227509379386902, "learning_rate": 1.7742065970474096e-05, "loss": 1.3976, "mean_token_accuracy": 0.6522872199614843, "num_tokens": 775565203.0, "step": 4618 }, { "entropy": 1.706842044989268, "epoch": 0.5074290736315948, "grad_norm": 0.7736787796020508, "learning_rate": 1.774100521828581e-05, "loss": 1.5008, "mean_token_accuracy": 0.6404502143462499, "num_tokens": 775781749.0, "step": 4619 }, { "entropy": 1.6234171589215596, "epoch": 0.5075389305429678, "grad_norm": 0.7655417919158936, "learning_rate": 1.773994425274775e-05, "loss": 1.2916, "mean_token_accuracy": 0.6704551080862681, "num_tokens": 775925323.0, "step": 4620 }, { "entropy": 1.7151438395182292, "epoch": 0.5076487874543407, "grad_norm": 0.7209942936897278, "learning_rate": 1.7738883073893488e-05, "loss": 1.2973, "mean_token_accuracy": 0.6704816569884618, "num_tokens": 776032961.0, "step": 4621 }, { "entropy": 1.7023293673992157, "epoch": 0.5077586443657136, "grad_norm": 0.7943500280380249, "learning_rate": 1.7737821681756615e-05, "loss": 1.505, "mean_token_accuracy": 0.655063678820928, "num_tokens": 776200041.0, "step": 4622 }, { "entropy": 1.72823366522789, "epoch": 0.5078685012770866, "grad_norm": 0.7248928546905518, "learning_rate": 1.773676007637072e-05, "loss": 1.4479, "mean_token_accuracy": 0.6552989184856415, "num_tokens": 776358646.0, "step": 4623 }, { "entropy": 1.6740071376164753, "epoch": 0.5079783581884595, "grad_norm": 0.6074709296226501, "learning_rate": 1.7735698257769407e-05, "loss": 1.3812, "mean_token_accuracy": 0.6505249341328939, "num_tokens": 776571031.0, "step": 4624 }, { "entropy": 1.6604996422926586, "epoch": 0.5080882150998325, "grad_norm": 0.7322157025337219, "learning_rate": 1.7734636225986276e-05, "loss": 1.3079, "mean_token_accuracy": 0.6671365002791086, "num_tokens": 776722724.0, "step": 4625 }, { "entropy": 1.6912861963113148, "epoch": 0.5081980720112054, "grad_norm": 0.6166011691093445, "learning_rate": 1.7733573981054947e-05, "loss": 1.4829, "mean_token_accuracy": 0.6378809263308843, "num_tokens": 776971710.0, "step": 4626 }, { "entropy": 1.707468460003535, "epoch": 0.5083079289225784, "grad_norm": 0.6257423162460327, "learning_rate": 1.773251152300903e-05, "loss": 1.4643, "mean_token_accuracy": 0.6333738813797632, "num_tokens": 777202200.0, "step": 4627 }, { "entropy": 1.6994255880514781, "epoch": 0.5084177858339513, "grad_norm": 0.5768167972564697, "learning_rate": 1.7731448851882162e-05, "loss": 1.432, "mean_token_accuracy": 0.6558799743652344, "num_tokens": 777407416.0, "step": 4628 }, { "entropy": 1.7263440589110057, "epoch": 0.5085276427453242, "grad_norm": 0.809219241142273, "learning_rate": 1.7730385967707974e-05, "loss": 1.4374, "mean_token_accuracy": 0.6372658809026083, "num_tokens": 777583936.0, "step": 4629 }, { "entropy": 1.7062489092350006, "epoch": 0.5086374996566971, "grad_norm": 0.7165677547454834, "learning_rate": 1.7729322870520097e-05, "loss": 1.3335, "mean_token_accuracy": 0.6688279807567596, "num_tokens": 777709835.0, "step": 4630 }, { "entropy": 1.7441291213035583, "epoch": 0.5087473565680701, "grad_norm": 0.7295101881027222, "learning_rate": 1.7728259560352185e-05, "loss": 1.5241, "mean_token_accuracy": 0.6376437743504842, "num_tokens": 777845178.0, "step": 4631 }, { "entropy": 1.7269198099772136, "epoch": 0.508857213479443, "grad_norm": 0.858462929725647, "learning_rate": 1.772719603723789e-05, "loss": 1.329, "mean_token_accuracy": 0.6706622143586477, "num_tokens": 777971047.0, "step": 4632 }, { "entropy": 1.730207492907842, "epoch": 0.508967070390816, "grad_norm": 0.9941986203193665, "learning_rate": 1.7726132301210873e-05, "loss": 1.4901, "mean_token_accuracy": 0.6505331347386042, "num_tokens": 778156638.0, "step": 4633 }, { "entropy": 1.6990565558274586, "epoch": 0.5090769273021889, "grad_norm": 0.7046946287155151, "learning_rate": 1.7725068352304797e-05, "loss": 1.359, "mean_token_accuracy": 0.6794573764006296, "num_tokens": 778290929.0, "step": 4634 }, { "entropy": 1.649871587753296, "epoch": 0.5091867842135618, "grad_norm": 0.703970193862915, "learning_rate": 1.772400419055334e-05, "loss": 1.4921, "mean_token_accuracy": 0.6583941678206126, "num_tokens": 778452940.0, "step": 4635 }, { "entropy": 1.6483552952607472, "epoch": 0.5092966411249348, "grad_norm": 0.74805748462677, "learning_rate": 1.7722939815990182e-05, "loss": 1.3589, "mean_token_accuracy": 0.6700867811838785, "num_tokens": 778598364.0, "step": 4636 }, { "entropy": 1.6974958876768749, "epoch": 0.5094064980363077, "grad_norm": 0.735685408115387, "learning_rate": 1.7721875228649004e-05, "loss": 1.3238, "mean_token_accuracy": 0.6773294111092886, "num_tokens": 778717920.0, "step": 4637 }, { "entropy": 1.6946069101492565, "epoch": 0.5095163549476807, "grad_norm": 0.6127060651779175, "learning_rate": 1.7720810428563505e-05, "loss": 1.44, "mean_token_accuracy": 0.639764870206515, "num_tokens": 778896607.0, "step": 4638 }, { "entropy": 1.733677049477895, "epoch": 0.5096262118590535, "grad_norm": 0.6817284822463989, "learning_rate": 1.7719745415767388e-05, "loss": 1.3651, "mean_token_accuracy": 0.6538938681284586, "num_tokens": 779050088.0, "step": 4639 }, { "entropy": 1.6518004635969799, "epoch": 0.5097360687704265, "grad_norm": 0.7022289633750916, "learning_rate": 1.7718680190294353e-05, "loss": 1.4326, "mean_token_accuracy": 0.6522137075662613, "num_tokens": 779254586.0, "step": 4640 }, { "entropy": 1.7444305717945099, "epoch": 0.5098459256817994, "grad_norm": 0.6511925458908081, "learning_rate": 1.7717614752178118e-05, "loss": 1.4276, "mean_token_accuracy": 0.6486201186974844, "num_tokens": 779421316.0, "step": 4641 }, { "entropy": 1.6676433086395264, "epoch": 0.5099557825931724, "grad_norm": 0.7357949018478394, "learning_rate": 1.7716549101452402e-05, "loss": 1.3955, "mean_token_accuracy": 0.6542571783065796, "num_tokens": 779594236.0, "step": 4642 }, { "entropy": 1.7073079347610474, "epoch": 0.5100656395045453, "grad_norm": 0.5698668360710144, "learning_rate": 1.7715483238150937e-05, "loss": 1.4319, "mean_token_accuracy": 0.6443966527779897, "num_tokens": 779786398.0, "step": 4643 }, { "entropy": 1.7269446750481923, "epoch": 0.5101754964159183, "grad_norm": 0.7325721979141235, "learning_rate": 1.771441716230745e-05, "loss": 1.3253, "mean_token_accuracy": 0.6597764392693838, "num_tokens": 779919839.0, "step": 4644 }, { "entropy": 1.671245684226354, "epoch": 0.5102853533272912, "grad_norm": 0.5406301021575928, "learning_rate": 1.7713350873955688e-05, "loss": 1.587, "mean_token_accuracy": 0.6348774433135986, "num_tokens": 780150472.0, "step": 4645 }, { "entropy": 1.6824797888596852, "epoch": 0.5103952102386642, "grad_norm": 0.6999531388282776, "learning_rate": 1.7712284373129397e-05, "loss": 1.352, "mean_token_accuracy": 0.6638096670309702, "num_tokens": 780343092.0, "step": 4646 }, { "entropy": 1.7021546860535939, "epoch": 0.5105050671500371, "grad_norm": 0.6942962408065796, "learning_rate": 1.771121765986233e-05, "loss": 1.4937, "mean_token_accuracy": 0.6427315473556519, "num_tokens": 780531400.0, "step": 4647 }, { "entropy": 1.7465800046920776, "epoch": 0.51061492406141, "grad_norm": 0.7457360625267029, "learning_rate": 1.7710150734188242e-05, "loss": 1.4176, "mean_token_accuracy": 0.6418644239505132, "num_tokens": 780693513.0, "step": 4648 }, { "entropy": 1.7026427487532299, "epoch": 0.510724780972783, "grad_norm": 0.8374441862106323, "learning_rate": 1.7709083596140914e-05, "loss": 1.4585, "mean_token_accuracy": 0.6526228909691175, "num_tokens": 780839738.0, "step": 4649 }, { "entropy": 1.6698659559090931, "epoch": 0.5108346378841558, "grad_norm": 0.7197142839431763, "learning_rate": 1.770801624575411e-05, "loss": 1.4273, "mean_token_accuracy": 0.6493804206450781, "num_tokens": 781048603.0, "step": 4650 }, { "entropy": 1.6799350480238597, "epoch": 0.5109444947955288, "grad_norm": 0.7679303288459778, "learning_rate": 1.7706948683061612e-05, "loss": 1.3085, "mean_token_accuracy": 0.6658252626657486, "num_tokens": 781188858.0, "step": 4651 }, { "entropy": 1.6943186322848003, "epoch": 0.5110543517069017, "grad_norm": 0.6792766451835632, "learning_rate": 1.7705880908097214e-05, "loss": 1.3816, "mean_token_accuracy": 0.6598533739646276, "num_tokens": 781320802.0, "step": 4652 }, { "entropy": 1.7791239122549694, "epoch": 0.5111642086182747, "grad_norm": 0.706912100315094, "learning_rate": 1.7704812920894708e-05, "loss": 1.3435, "mean_token_accuracy": 0.658470019698143, "num_tokens": 781474836.0, "step": 4653 }, { "entropy": 1.7638680239518483, "epoch": 0.5112740655296476, "grad_norm": 0.6044894456863403, "learning_rate": 1.770374472148789e-05, "loss": 1.5903, "mean_token_accuracy": 0.6270778377850851, "num_tokens": 781724073.0, "step": 4654 }, { "entropy": 1.7358222007751465, "epoch": 0.5113839224410206, "grad_norm": 0.6887006759643555, "learning_rate": 1.770267630991058e-05, "loss": 1.4788, "mean_token_accuracy": 0.6502551734447479, "num_tokens": 781917542.0, "step": 4655 }, { "entropy": 1.6472338835398357, "epoch": 0.5114937793523935, "grad_norm": 0.5894920229911804, "learning_rate": 1.770160768619658e-05, "loss": 1.4373, "mean_token_accuracy": 0.6528671483198801, "num_tokens": 782156130.0, "step": 4656 }, { "entropy": 1.6651289065678914, "epoch": 0.5116036362637665, "grad_norm": 0.6169312000274658, "learning_rate": 1.7700538850379715e-05, "loss": 1.4626, "mean_token_accuracy": 0.6592658758163452, "num_tokens": 782327215.0, "step": 4657 }, { "entropy": 1.7671948075294495, "epoch": 0.5117134931751394, "grad_norm": 0.7001243233680725, "learning_rate": 1.7699469802493818e-05, "loss": 1.3362, "mean_token_accuracy": 0.6645391583442688, "num_tokens": 782488955.0, "step": 4658 }, { "entropy": 1.6598977148532867, "epoch": 0.5118233500865124, "grad_norm": 0.6964993476867676, "learning_rate": 1.7698400542572717e-05, "loss": 1.3115, "mean_token_accuracy": 0.6722627530495325, "num_tokens": 782611825.0, "step": 4659 }, { "entropy": 1.727137674887975, "epoch": 0.5119332069978852, "grad_norm": 0.7142112851142883, "learning_rate": 1.769733107065026e-05, "loss": 1.231, "mean_token_accuracy": 0.6792268306016922, "num_tokens": 782755942.0, "step": 4660 }, { "entropy": 1.7172054847081502, "epoch": 0.5120430639092582, "grad_norm": 0.6606463193893433, "learning_rate": 1.769626138676029e-05, "loss": 1.4273, "mean_token_accuracy": 0.6487047125895818, "num_tokens": 782910156.0, "step": 4661 }, { "entropy": 1.687313159306844, "epoch": 0.5121529208206311, "grad_norm": 0.5523031949996948, "learning_rate": 1.7695191490936666e-05, "loss": 1.4415, "mean_token_accuracy": 0.649812196691831, "num_tokens": 783153434.0, "step": 4662 }, { "entropy": 1.6816494663556416, "epoch": 0.512262777732004, "grad_norm": 0.7296652793884277, "learning_rate": 1.769412138321325e-05, "loss": 1.3972, "mean_token_accuracy": 0.6516829133033752, "num_tokens": 783299870.0, "step": 4663 }, { "entropy": 1.6838775873184204, "epoch": 0.512372634643377, "grad_norm": 0.6139092445373535, "learning_rate": 1.769305106362391e-05, "loss": 1.4849, "mean_token_accuracy": 0.6372493157784144, "num_tokens": 783502449.0, "step": 4664 }, { "entropy": 1.6079521874586742, "epoch": 0.5124824915547499, "grad_norm": 0.7152103781700134, "learning_rate": 1.7691980532202515e-05, "loss": 1.2326, "mean_token_accuracy": 0.6814493189255396, "num_tokens": 783649568.0, "step": 4665 }, { "entropy": 1.7108632425467174, "epoch": 0.5125923484661229, "grad_norm": 0.6728825569152832, "learning_rate": 1.7690909788982955e-05, "loss": 1.5291, "mean_token_accuracy": 0.640701100230217, "num_tokens": 783821716.0, "step": 4666 }, { "entropy": 1.7542012830575306, "epoch": 0.5127022053774958, "grad_norm": 0.7167527675628662, "learning_rate": 1.7689838833999114e-05, "loss": 1.4436, "mean_token_accuracy": 0.6508532166481018, "num_tokens": 783965529.0, "step": 4667 }, { "entropy": 1.7339794039726257, "epoch": 0.5128120622888688, "grad_norm": 0.7113919854164124, "learning_rate": 1.768876766728489e-05, "loss": 1.4262, "mean_token_accuracy": 0.6611630270878474, "num_tokens": 784156298.0, "step": 4668 }, { "entropy": 1.7534903983275096, "epoch": 0.5129219192002417, "grad_norm": 0.7554349899291992, "learning_rate": 1.7687696288874182e-05, "loss": 1.5153, "mean_token_accuracy": 0.6601629306872686, "num_tokens": 784364848.0, "step": 4669 }, { "entropy": 1.6630991399288177, "epoch": 0.5130317761116147, "grad_norm": 0.7319965958595276, "learning_rate": 1.7686624698800897e-05, "loss": 1.432, "mean_token_accuracy": 0.6549634039402008, "num_tokens": 784550039.0, "step": 4670 }, { "entropy": 1.721706211566925, "epoch": 0.5131416330229875, "grad_norm": 0.6252912878990173, "learning_rate": 1.7685552897098955e-05, "loss": 1.3013, "mean_token_accuracy": 0.674846296509107, "num_tokens": 784672000.0, "step": 4671 }, { "entropy": 1.6824569404125214, "epoch": 0.5132514899343605, "grad_norm": 0.6189214587211609, "learning_rate": 1.768448088380228e-05, "loss": 1.3107, "mean_token_accuracy": 0.6635235399007797, "num_tokens": 784879713.0, "step": 4672 }, { "entropy": 1.7048685650030773, "epoch": 0.5133613468457334, "grad_norm": 0.5870686769485474, "learning_rate": 1.7683408658944795e-05, "loss": 1.4357, "mean_token_accuracy": 0.6394040137529373, "num_tokens": 785094291.0, "step": 4673 }, { "entropy": 1.7368608117103577, "epoch": 0.5134712037571064, "grad_norm": 0.7474855184555054, "learning_rate": 1.7682336222560438e-05, "loss": 1.4872, "mean_token_accuracy": 0.6371827771266302, "num_tokens": 785282608.0, "step": 4674 }, { "entropy": 1.718473623196284, "epoch": 0.5135810606684793, "grad_norm": 0.5720936059951782, "learning_rate": 1.768126357468315e-05, "loss": 1.4686, "mean_token_accuracy": 0.6401006182034811, "num_tokens": 785508116.0, "step": 4675 }, { "entropy": 1.712001125017802, "epoch": 0.5136909175798522, "grad_norm": 0.8043569922447205, "learning_rate": 1.7680190715346876e-05, "loss": 1.502, "mean_token_accuracy": 0.6602791597445806, "num_tokens": 785672334.0, "step": 4676 }, { "entropy": 1.7085146109263103, "epoch": 0.5138007744912252, "grad_norm": 0.855053722858429, "learning_rate": 1.7679117644585583e-05, "loss": 1.5407, "mean_token_accuracy": 0.6532570545872053, "num_tokens": 785838017.0, "step": 4677 }, { "entropy": 1.6705704132715862, "epoch": 0.5139106314025981, "grad_norm": 0.659695029258728, "learning_rate": 1.7678044362433224e-05, "loss": 1.2934, "mean_token_accuracy": 0.6710864454507828, "num_tokens": 786031568.0, "step": 4678 }, { "entropy": 1.6983959476153057, "epoch": 0.5140204883139711, "grad_norm": 0.6366593241691589, "learning_rate": 1.767697086892377e-05, "loss": 1.4668, "mean_token_accuracy": 0.6328398436307907, "num_tokens": 786232555.0, "step": 4679 }, { "entropy": 1.7187410493691762, "epoch": 0.514130345225344, "grad_norm": 0.7009495496749878, "learning_rate": 1.7675897164091197e-05, "loss": 1.365, "mean_token_accuracy": 0.6644991288582484, "num_tokens": 786446202.0, "step": 4680 }, { "entropy": 1.6954729159673054, "epoch": 0.514240202136717, "grad_norm": 0.6604549288749695, "learning_rate": 1.7674823247969487e-05, "loss": 1.4347, "mean_token_accuracy": 0.6466411848862966, "num_tokens": 786658386.0, "step": 4681 }, { "entropy": 1.6737544735272725, "epoch": 0.5143500590480898, "grad_norm": 0.7725059986114502, "learning_rate": 1.7673749120592627e-05, "loss": 1.2401, "mean_token_accuracy": 0.679726297656695, "num_tokens": 786790667.0, "step": 4682 }, { "entropy": 1.7507797380288441, "epoch": 0.5144599159594628, "grad_norm": 0.7108690142631531, "learning_rate": 1.7672674781994617e-05, "loss": 1.4978, "mean_token_accuracy": 0.6364447275797526, "num_tokens": 786961389.0, "step": 4683 }, { "entropy": 1.708246519168218, "epoch": 0.5145697728708357, "grad_norm": 0.6990996599197388, "learning_rate": 1.7671600232209456e-05, "loss": 1.3944, "mean_token_accuracy": 0.660065030058225, "num_tokens": 787090202.0, "step": 4684 }, { "entropy": 1.725678304831187, "epoch": 0.5146796297822087, "grad_norm": 0.6911423206329346, "learning_rate": 1.7670525471271152e-05, "loss": 1.5114, "mean_token_accuracy": 0.6476559440294901, "num_tokens": 787250677.0, "step": 4685 }, { "entropy": 1.673937330643336, "epoch": 0.5147894866935816, "grad_norm": 0.5852583050727844, "learning_rate": 1.7669450499213725e-05, "loss": 1.395, "mean_token_accuracy": 0.6683137913544973, "num_tokens": 787421903.0, "step": 4686 }, { "entropy": 1.6368590195973713, "epoch": 0.5148993436049546, "grad_norm": 0.5973182320594788, "learning_rate": 1.7668375316071195e-05, "loss": 1.3752, "mean_token_accuracy": 0.6662927816311518, "num_tokens": 787611043.0, "step": 4687 }, { "entropy": 1.7234513560930889, "epoch": 0.5150092005163275, "grad_norm": 0.6610357165336609, "learning_rate": 1.7667299921877588e-05, "loss": 1.4776, "mean_token_accuracy": 0.6609990646441778, "num_tokens": 787769655.0, "step": 4688 }, { "entropy": 1.7650385200977325, "epoch": 0.5151190574277004, "grad_norm": 0.6733184456825256, "learning_rate": 1.766622431666695e-05, "loss": 1.5617, "mean_token_accuracy": 0.6246377180020014, "num_tokens": 788020214.0, "step": 4689 }, { "entropy": 1.7412991126378377, "epoch": 0.5152289143390734, "grad_norm": 0.7549744248390198, "learning_rate": 1.766514850047331e-05, "loss": 1.4773, "mean_token_accuracy": 0.6487500021855036, "num_tokens": 788192824.0, "step": 4690 }, { "entropy": 1.73355237642924, "epoch": 0.5153387712504462, "grad_norm": 0.6612739562988281, "learning_rate": 1.7664072473330724e-05, "loss": 1.5076, "mean_token_accuracy": 0.6572160919507345, "num_tokens": 788373703.0, "step": 4691 }, { "entropy": 1.7703491747379303, "epoch": 0.5154486281618192, "grad_norm": 0.780145525932312, "learning_rate": 1.766299623527325e-05, "loss": 1.3967, "mean_token_accuracy": 0.6519978841145834, "num_tokens": 788511272.0, "step": 4692 }, { "entropy": 1.7383658389250438, "epoch": 0.5155584850731921, "grad_norm": 0.6199609041213989, "learning_rate": 1.7661919786334945e-05, "loss": 1.3815, "mean_token_accuracy": 0.6616942385832468, "num_tokens": 788658018.0, "step": 4693 }, { "entropy": 1.6493331988652546, "epoch": 0.5156683419845651, "grad_norm": 0.672944962978363, "learning_rate": 1.766084312654988e-05, "loss": 1.3136, "mean_token_accuracy": 0.6737960278987885, "num_tokens": 788789174.0, "step": 4694 }, { "entropy": 1.7152611513932545, "epoch": 0.515778198895938, "grad_norm": 0.5982018709182739, "learning_rate": 1.7659766255952134e-05, "loss": 1.3505, "mean_token_accuracy": 0.6602517565091451, "num_tokens": 788936963.0, "step": 4695 }, { "entropy": 1.651655336221059, "epoch": 0.515888055807311, "grad_norm": 0.6493039131164551, "learning_rate": 1.7658689174575785e-05, "loss": 1.2292, "mean_token_accuracy": 0.673387145002683, "num_tokens": 789087968.0, "step": 4696 }, { "entropy": 1.7118739585081737, "epoch": 0.5159979127186839, "grad_norm": 0.634550929069519, "learning_rate": 1.7657611882454925e-05, "loss": 1.2787, "mean_token_accuracy": 0.6688071837027868, "num_tokens": 789233586.0, "step": 4697 }, { "entropy": 1.7723990778128307, "epoch": 0.5161077696300569, "grad_norm": 0.7632473707199097, "learning_rate": 1.7656534379623652e-05, "loss": 1.5881, "mean_token_accuracy": 0.635076088209947, "num_tokens": 789379455.0, "step": 4698 }, { "entropy": 1.7294781108697255, "epoch": 0.5162176265414298, "grad_norm": 0.6788251996040344, "learning_rate": 1.765545666611606e-05, "loss": 1.344, "mean_token_accuracy": 0.660454789797465, "num_tokens": 789515444.0, "step": 4699 }, { "entropy": 1.7380349238713582, "epoch": 0.5163274834528028, "grad_norm": 0.6425085663795471, "learning_rate": 1.7654378741966264e-05, "loss": 1.5548, "mean_token_accuracy": 0.627402106920878, "num_tokens": 789708134.0, "step": 4700 }, { "entropy": 1.7113960087299347, "epoch": 0.5164373403641757, "grad_norm": 0.7413278818130493, "learning_rate": 1.7653300607208385e-05, "loss": 1.3543, "mean_token_accuracy": 0.6587297916412354, "num_tokens": 789853557.0, "step": 4701 }, { "entropy": 1.7466691235701244, "epoch": 0.5165471972755487, "grad_norm": 0.6979295015335083, "learning_rate": 1.7652222261876536e-05, "loss": 1.4306, "mean_token_accuracy": 0.6643926600615183, "num_tokens": 790024665.0, "step": 4702 }, { "entropy": 1.7214144865671794, "epoch": 0.5166570541869215, "grad_norm": 0.9388607740402222, "learning_rate": 1.7651143706004847e-05, "loss": 1.3205, "mean_token_accuracy": 0.6683923502763113, "num_tokens": 790147552.0, "step": 4703 }, { "entropy": 1.734945813814799, "epoch": 0.5167669110982944, "grad_norm": 0.7507519721984863, "learning_rate": 1.765006493962746e-05, "loss": 1.3504, "mean_token_accuracy": 0.6659105022748312, "num_tokens": 790333823.0, "step": 4704 }, { "entropy": 1.6679200232028961, "epoch": 0.5168767680096674, "grad_norm": 0.6735995411872864, "learning_rate": 1.7648985962778514e-05, "loss": 1.3773, "mean_token_accuracy": 0.6531980137030283, "num_tokens": 790525621.0, "step": 4705 }, { "entropy": 1.7311872939268749, "epoch": 0.5169866249210403, "grad_norm": 0.6073651909828186, "learning_rate": 1.764790677549216e-05, "loss": 1.4879, "mean_token_accuracy": 0.6515211214621862, "num_tokens": 790711418.0, "step": 4706 }, { "entropy": 1.6866132219632466, "epoch": 0.5170964818324133, "grad_norm": 0.5769153237342834, "learning_rate": 1.764682737780255e-05, "loss": 1.4447, "mean_token_accuracy": 0.6449514329433441, "num_tokens": 790912827.0, "step": 4707 }, { "entropy": 1.701870898405711, "epoch": 0.5172063387437862, "grad_norm": 0.6132122874259949, "learning_rate": 1.7645747769743852e-05, "loss": 1.4732, "mean_token_accuracy": 0.6592078804969788, "num_tokens": 791053561.0, "step": 4708 }, { "entropy": 1.6868782341480255, "epoch": 0.5173161956551592, "grad_norm": 0.7153650522232056, "learning_rate": 1.764466795135023e-05, "loss": 1.4347, "mean_token_accuracy": 0.6551420340935389, "num_tokens": 791206764.0, "step": 4709 }, { "entropy": 1.6517098446687062, "epoch": 0.5174260525665321, "grad_norm": 0.6770890355110168, "learning_rate": 1.7643587922655855e-05, "loss": 1.4078, "mean_token_accuracy": 0.6538704832394918, "num_tokens": 791388634.0, "step": 4710 }, { "entropy": 1.6130631566047668, "epoch": 0.5175359094779051, "grad_norm": 0.6374915838241577, "learning_rate": 1.7642507683694924e-05, "loss": 1.3895, "mean_token_accuracy": 0.6648249477148056, "num_tokens": 791576408.0, "step": 4711 }, { "entropy": 1.6589768826961517, "epoch": 0.517645766389278, "grad_norm": 0.5480639338493347, "learning_rate": 1.7641427234501614e-05, "loss": 1.3822, "mean_token_accuracy": 0.6536247779925665, "num_tokens": 791848186.0, "step": 4712 }, { "entropy": 1.7161799172560375, "epoch": 0.517755623300651, "grad_norm": 0.6144800782203674, "learning_rate": 1.7640346575110127e-05, "loss": 1.4613, "mean_token_accuracy": 0.6567795326312383, "num_tokens": 792008443.0, "step": 4713 }, { "entropy": 1.7252692977587383, "epoch": 0.5178654802120238, "grad_norm": 0.6382774114608765, "learning_rate": 1.7639265705554664e-05, "loss": 1.4068, "mean_token_accuracy": 0.6472566872835159, "num_tokens": 792193171.0, "step": 4714 }, { "entropy": 1.6851609845956166, "epoch": 0.5179753371233968, "grad_norm": 0.5510247945785522, "learning_rate": 1.763818462586943e-05, "loss": 1.4839, "mean_token_accuracy": 0.6341405063867569, "num_tokens": 792463312.0, "step": 4715 }, { "entropy": 1.712292383114497, "epoch": 0.5180851940347697, "grad_norm": 0.666677713394165, "learning_rate": 1.7637103336088642e-05, "loss": 1.4422, "mean_token_accuracy": 0.6612060219049454, "num_tokens": 792584786.0, "step": 4716 }, { "entropy": 1.6544869939486186, "epoch": 0.5181950509461426, "grad_norm": 0.7078261375427246, "learning_rate": 1.7636021836246527e-05, "loss": 1.3223, "mean_token_accuracy": 0.6622943033774694, "num_tokens": 792722437.0, "step": 4717 }, { "entropy": 1.7123637199401855, "epoch": 0.5183049078575156, "grad_norm": 0.6480149030685425, "learning_rate": 1.7634940126377315e-05, "loss": 1.3032, "mean_token_accuracy": 0.6672448466221491, "num_tokens": 792887386.0, "step": 4718 }, { "entropy": 1.7484397490819295, "epoch": 0.5184147647688885, "grad_norm": 0.782447099685669, "learning_rate": 1.7633858206515234e-05, "loss": 1.3804, "mean_token_accuracy": 0.6604089935620626, "num_tokens": 793053286.0, "step": 4719 }, { "entropy": 1.6887016395727794, "epoch": 0.5185246216802615, "grad_norm": 0.7643845081329346, "learning_rate": 1.763277607669453e-05, "loss": 1.277, "mean_token_accuracy": 0.6705976724624634, "num_tokens": 793189872.0, "step": 4720 }, { "entropy": 1.689270446697871, "epoch": 0.5186344785916344, "grad_norm": 0.6115739941596985, "learning_rate": 1.7631693736949452e-05, "loss": 1.3857, "mean_token_accuracy": 0.6669471363226572, "num_tokens": 793367014.0, "step": 4721 }, { "entropy": 1.6571108798185985, "epoch": 0.5187443355030074, "grad_norm": 0.6382631659507751, "learning_rate": 1.7630611187314255e-05, "loss": 1.3474, "mean_token_accuracy": 0.6653915196657181, "num_tokens": 793566430.0, "step": 4722 }, { "entropy": 1.7158975899219513, "epoch": 0.5188541924143802, "grad_norm": 0.6732120513916016, "learning_rate": 1.7629528427823204e-05, "loss": 1.4011, "mean_token_accuracy": 0.6585634350776672, "num_tokens": 793742006.0, "step": 4723 }, { "entropy": 1.7122439642747243, "epoch": 0.5189640493257532, "grad_norm": 0.672660231590271, "learning_rate": 1.7628445458510564e-05, "loss": 1.347, "mean_token_accuracy": 0.6541797667741776, "num_tokens": 793896843.0, "step": 4724 }, { "entropy": 1.6989329655965169, "epoch": 0.5190739062371261, "grad_norm": 0.6647095084190369, "learning_rate": 1.7627362279410612e-05, "loss": 1.3124, "mean_token_accuracy": 0.6610298504432043, "num_tokens": 794009999.0, "step": 4725 }, { "entropy": 1.6795764764149983, "epoch": 0.5191837631484991, "grad_norm": 0.6302659511566162, "learning_rate": 1.7626278890557634e-05, "loss": 1.3697, "mean_token_accuracy": 0.6671308130025864, "num_tokens": 794196147.0, "step": 4726 }, { "entropy": 1.6657175024350483, "epoch": 0.519293620059872, "grad_norm": 0.6287113428115845, "learning_rate": 1.762519529198591e-05, "loss": 1.3875, "mean_token_accuracy": 0.6727512627840042, "num_tokens": 794431033.0, "step": 4727 }, { "entropy": 1.678794761498769, "epoch": 0.519403476971245, "grad_norm": 0.7044976949691772, "learning_rate": 1.762411148372974e-05, "loss": 1.3174, "mean_token_accuracy": 0.654426708817482, "num_tokens": 794584379.0, "step": 4728 }, { "entropy": 1.6541787485281627, "epoch": 0.5195133338826179, "grad_norm": 0.5799862146377563, "learning_rate": 1.762302746582343e-05, "loss": 1.3385, "mean_token_accuracy": 0.6738729576269785, "num_tokens": 794760900.0, "step": 4729 }, { "entropy": 1.7091464201609294, "epoch": 0.5196231907939908, "grad_norm": 0.664982259273529, "learning_rate": 1.762194323830128e-05, "loss": 1.3998, "mean_token_accuracy": 0.6595932294925054, "num_tokens": 794928750.0, "step": 4730 }, { "entropy": 1.6746362348397572, "epoch": 0.5197330477053638, "grad_norm": 0.5980947613716125, "learning_rate": 1.7620858801197617e-05, "loss": 1.3872, "mean_token_accuracy": 0.6583947539329529, "num_tokens": 795101737.0, "step": 4731 }, { "entropy": 1.713165243466695, "epoch": 0.5198429046167367, "grad_norm": 0.7423360347747803, "learning_rate": 1.761977415454675e-05, "loss": 1.3698, "mean_token_accuracy": 0.649875541528066, "num_tokens": 795282151.0, "step": 4732 }, { "entropy": 1.7285096148649852, "epoch": 0.5199527615281097, "grad_norm": 0.7760996222496033, "learning_rate": 1.761868929838302e-05, "loss": 1.2879, "mean_token_accuracy": 0.6752708901961645, "num_tokens": 795420946.0, "step": 4733 }, { "entropy": 1.730550895134608, "epoch": 0.5200626184394825, "grad_norm": 0.6987181901931763, "learning_rate": 1.761760423274075e-05, "loss": 1.36, "mean_token_accuracy": 0.6588483999172846, "num_tokens": 795599685.0, "step": 4734 }, { "entropy": 1.6970980167388916, "epoch": 0.5201724753508555, "grad_norm": 0.6524776220321655, "learning_rate": 1.761651895765429e-05, "loss": 1.4112, "mean_token_accuracy": 0.6538165758053461, "num_tokens": 795780156.0, "step": 4735 }, { "entropy": 1.6695838073889415, "epoch": 0.5202823322622284, "grad_norm": 0.7201446890830994, "learning_rate": 1.7615433473157993e-05, "loss": 1.4756, "mean_token_accuracy": 0.6536561946074168, "num_tokens": 795962820.0, "step": 4736 }, { "entropy": 1.7082595427831013, "epoch": 0.5203921891736014, "grad_norm": 0.6136282682418823, "learning_rate": 1.76143477792862e-05, "loss": 1.3876, "mean_token_accuracy": 0.6512712786595026, "num_tokens": 796138721.0, "step": 4737 }, { "entropy": 1.6985305150349934, "epoch": 0.5205020460849743, "grad_norm": 0.7493578791618347, "learning_rate": 1.7613261876073285e-05, "loss": 1.3506, "mean_token_accuracy": 0.6512027581532797, "num_tokens": 796276157.0, "step": 4738 }, { "entropy": 1.7088461021582286, "epoch": 0.5206119029963473, "grad_norm": 0.7467851042747498, "learning_rate": 1.7612175763553607e-05, "loss": 1.4494, "mean_token_accuracy": 0.6403765877087911, "num_tokens": 796476359.0, "step": 4739 }, { "entropy": 1.6997049450874329, "epoch": 0.5207217599077202, "grad_norm": 0.7162910103797913, "learning_rate": 1.7611089441761548e-05, "loss": 1.3843, "mean_token_accuracy": 0.647352010011673, "num_tokens": 796648058.0, "step": 4740 }, { "entropy": 1.7611575225989025, "epoch": 0.5208316168190932, "grad_norm": 0.7801529765129089, "learning_rate": 1.7610002910731486e-05, "loss": 1.4835, "mean_token_accuracy": 0.6569034606218338, "num_tokens": 796791732.0, "step": 4741 }, { "entropy": 1.7292684316635132, "epoch": 0.5209414737304661, "grad_norm": 0.6974871754646301, "learning_rate": 1.7608916170497812e-05, "loss": 1.4167, "mean_token_accuracy": 0.6439520965019861, "num_tokens": 796942236.0, "step": 4742 }, { "entropy": 1.7139594753583272, "epoch": 0.521051330641839, "grad_norm": 0.7737529277801514, "learning_rate": 1.7607829221094922e-05, "loss": 1.5007, "mean_token_accuracy": 0.6438900580008825, "num_tokens": 797114208.0, "step": 4743 }, { "entropy": 1.653033008178075, "epoch": 0.521161187553212, "grad_norm": 0.7125444412231445, "learning_rate": 1.760674206255721e-05, "loss": 1.3416, "mean_token_accuracy": 0.6721793164809545, "num_tokens": 797292358.0, "step": 4744 }, { "entropy": 1.7628530263900757, "epoch": 0.5212710444645848, "grad_norm": 0.7119945883750916, "learning_rate": 1.760565469491909e-05, "loss": 1.2987, "mean_token_accuracy": 0.6636313299338022, "num_tokens": 797431327.0, "step": 4745 }, { "entropy": 1.748626043399175, "epoch": 0.5213809013759578, "grad_norm": 0.7918199300765991, "learning_rate": 1.7604567118214975e-05, "loss": 1.4559, "mean_token_accuracy": 0.6555203547080358, "num_tokens": 797555640.0, "step": 4746 }, { "entropy": 1.6953304906686146, "epoch": 0.5214907582873307, "grad_norm": 0.9008361101150513, "learning_rate": 1.7603479332479284e-05, "loss": 1.3674, "mean_token_accuracy": 0.6686960806449255, "num_tokens": 797751389.0, "step": 4747 }, { "entropy": 1.6819122731685638, "epoch": 0.5216006151987037, "grad_norm": 0.6910074949264526, "learning_rate": 1.7602391337746458e-05, "loss": 1.2831, "mean_token_accuracy": 0.6620205889145533, "num_tokens": 797870103.0, "step": 4748 }, { "entropy": 1.7419504225254059, "epoch": 0.5217104721100766, "grad_norm": 0.6273844242095947, "learning_rate": 1.760130313405091e-05, "loss": 1.3587, "mean_token_accuracy": 0.6500416547060013, "num_tokens": 798027954.0, "step": 4749 }, { "entropy": 1.7702117661635082, "epoch": 0.5218203290214496, "grad_norm": 0.8262366652488708, "learning_rate": 1.76002147214271e-05, "loss": 1.52, "mean_token_accuracy": 0.6454491962989172, "num_tokens": 798183172.0, "step": 4750 }, { "entropy": 1.6769113938013713, "epoch": 0.5219301859328225, "grad_norm": 0.6600481271743774, "learning_rate": 1.7599126099909464e-05, "loss": 1.6274, "mean_token_accuracy": 0.6358497887849808, "num_tokens": 798411244.0, "step": 4751 }, { "entropy": 1.712651213010152, "epoch": 0.5220400428441955, "grad_norm": 0.8367064595222473, "learning_rate": 1.759803726953246e-05, "loss": 1.3546, "mean_token_accuracy": 0.6678621719280878, "num_tokens": 798620785.0, "step": 4752 }, { "entropy": 1.7246687213579814, "epoch": 0.5221498997555684, "grad_norm": 0.7160963416099548, "learning_rate": 1.759694823033055e-05, "loss": 1.4249, "mean_token_accuracy": 0.6564734677473704, "num_tokens": 798805644.0, "step": 4753 }, { "entropy": 1.7259255250295003, "epoch": 0.5222597566669414, "grad_norm": 0.7170692682266235, "learning_rate": 1.7595858982338204e-05, "loss": 1.3938, "mean_token_accuracy": 0.6538248707850774, "num_tokens": 798956926.0, "step": 4754 }, { "entropy": 1.7542922695477803, "epoch": 0.5223696135783142, "grad_norm": 0.6757575869560242, "learning_rate": 1.759476952558989e-05, "loss": 1.5494, "mean_token_accuracy": 0.6471764942010244, "num_tokens": 799127230.0, "step": 4755 }, { "entropy": 1.6838472684224446, "epoch": 0.5224794704896872, "grad_norm": 0.8514281511306763, "learning_rate": 1.7593679860120097e-05, "loss": 1.3779, "mean_token_accuracy": 0.6497372736533483, "num_tokens": 799315432.0, "step": 4756 }, { "entropy": 1.6587632795174916, "epoch": 0.5225893274010601, "grad_norm": 0.6651864647865295, "learning_rate": 1.7592589985963303e-05, "loss": 1.4102, "mean_token_accuracy": 0.6655519803365072, "num_tokens": 799512409.0, "step": 4757 }, { "entropy": 1.7543583710988362, "epoch": 0.522699184312433, "grad_norm": 0.798579216003418, "learning_rate": 1.759149990315401e-05, "loss": 1.368, "mean_token_accuracy": 0.6612423459688822, "num_tokens": 799651240.0, "step": 4758 }, { "entropy": 1.6546010772387187, "epoch": 0.522809041223806, "grad_norm": 0.7455418109893799, "learning_rate": 1.759040961172671e-05, "loss": 1.3556, "mean_token_accuracy": 0.67067651450634, "num_tokens": 799845563.0, "step": 4759 }, { "entropy": 1.7300900121529896, "epoch": 0.5229188981351789, "grad_norm": 0.7077800631523132, "learning_rate": 1.758931911171592e-05, "loss": 1.4215, "mean_token_accuracy": 0.6541052609682083, "num_tokens": 800017346.0, "step": 4760 }, { "entropy": 1.7226456105709076, "epoch": 0.5230287550465519, "grad_norm": 0.5930922627449036, "learning_rate": 1.758822840315615e-05, "loss": 1.4411, "mean_token_accuracy": 0.6517085035641988, "num_tokens": 800192442.0, "step": 4761 }, { "entropy": 1.7274209260940552, "epoch": 0.5231386119579248, "grad_norm": 0.6994463205337524, "learning_rate": 1.7587137486081916e-05, "loss": 1.4818, "mean_token_accuracy": 0.6374549319346746, "num_tokens": 800395066.0, "step": 4762 }, { "entropy": 1.7726040482521057, "epoch": 0.5232484688692978, "grad_norm": 0.8898850083351135, "learning_rate": 1.7586046360527753e-05, "loss": 1.5346, "mean_token_accuracy": 0.6310683737198511, "num_tokens": 800573953.0, "step": 4763 }, { "entropy": 1.7151733040809631, "epoch": 0.5233583257806707, "grad_norm": 0.756864070892334, "learning_rate": 1.758495502652819e-05, "loss": 1.3788, "mean_token_accuracy": 0.658622587720553, "num_tokens": 800712722.0, "step": 4764 }, { "entropy": 1.747101644674937, "epoch": 0.5234681826920436, "grad_norm": 0.7063867449760437, "learning_rate": 1.7583863484117766e-05, "loss": 1.3973, "mean_token_accuracy": 0.652265245715777, "num_tokens": 800860313.0, "step": 4765 }, { "entropy": 1.7522001167138417, "epoch": 0.5235780396034165, "grad_norm": 0.5486934781074524, "learning_rate": 1.7582771733331027e-05, "loss": 1.4967, "mean_token_accuracy": 0.6310158222913742, "num_tokens": 801086371.0, "step": 4766 }, { "entropy": 1.716380735238393, "epoch": 0.5236878965147895, "grad_norm": 0.6977860927581787, "learning_rate": 1.7581679774202534e-05, "loss": 1.43, "mean_token_accuracy": 0.652380645275116, "num_tokens": 801271543.0, "step": 4767 }, { "entropy": 1.7060090104738872, "epoch": 0.5237977534261624, "grad_norm": 0.6787402033805847, "learning_rate": 1.7580587606766838e-05, "loss": 1.543, "mean_token_accuracy": 0.6341428657372793, "num_tokens": 801457586.0, "step": 4768 }, { "entropy": 1.682382086912791, "epoch": 0.5239076103375354, "grad_norm": 0.7111299633979797, "learning_rate": 1.757949523105851e-05, "loss": 1.244, "mean_token_accuracy": 0.6785935560862223, "num_tokens": 801599366.0, "step": 4769 }, { "entropy": 1.7559874653816223, "epoch": 0.5240174672489083, "grad_norm": 0.6998342275619507, "learning_rate": 1.7578402647112124e-05, "loss": 1.4426, "mean_token_accuracy": 0.6572673618793488, "num_tokens": 801751007.0, "step": 4770 }, { "entropy": 1.7509790360927582, "epoch": 0.5241273241602812, "grad_norm": 1.142534613609314, "learning_rate": 1.7577309854962256e-05, "loss": 1.1564, "mean_token_accuracy": 0.6708057522773743, "num_tokens": 801943593.0, "step": 4771 }, { "entropy": 1.6971227129300435, "epoch": 0.5242371810716542, "grad_norm": 0.6250020861625671, "learning_rate": 1.75762168546435e-05, "loss": 1.5585, "mean_token_accuracy": 0.6346791485945383, "num_tokens": 802173777.0, "step": 4772 }, { "entropy": 1.695990224679311, "epoch": 0.5243470379830271, "grad_norm": 0.7434117794036865, "learning_rate": 1.757512364619044e-05, "loss": 1.311, "mean_token_accuracy": 0.6738946636517843, "num_tokens": 802347486.0, "step": 4773 }, { "entropy": 1.6820709705352783, "epoch": 0.5244568948944001, "grad_norm": 0.6679350137710571, "learning_rate": 1.757403022963768e-05, "loss": 1.3477, "mean_token_accuracy": 0.6635241111119589, "num_tokens": 802506390.0, "step": 4774 }, { "entropy": 1.6601552367210388, "epoch": 0.524566751805773, "grad_norm": 0.7635940313339233, "learning_rate": 1.757293660501983e-05, "loss": 1.4112, "mean_token_accuracy": 0.6675299257040024, "num_tokens": 802666473.0, "step": 4775 }, { "entropy": 1.651892900466919, "epoch": 0.5246766087171459, "grad_norm": 0.6960279941558838, "learning_rate": 1.757184277237149e-05, "loss": 1.3256, "mean_token_accuracy": 0.6727364957332611, "num_tokens": 802810524.0, "step": 4776 }, { "entropy": 1.704396516084671, "epoch": 0.5247864656285188, "grad_norm": 0.5887051820755005, "learning_rate": 1.7570748731727293e-05, "loss": 1.3548, "mean_token_accuracy": 0.6513074586788813, "num_tokens": 803037321.0, "step": 4777 }, { "entropy": 1.7361102004845936, "epoch": 0.5248963225398918, "grad_norm": 0.8553687930107117, "learning_rate": 1.7569654483121857e-05, "loss": 1.4555, "mean_token_accuracy": 0.652128721276919, "num_tokens": 803197712.0, "step": 4778 }, { "entropy": 1.7074143290519714, "epoch": 0.5250061794512647, "grad_norm": 0.6320570111274719, "learning_rate": 1.7568560026589818e-05, "loss": 1.3462, "mean_token_accuracy": 0.6641202121973038, "num_tokens": 803369072.0, "step": 4779 }, { "entropy": 1.7333478927612305, "epoch": 0.5251160363626377, "grad_norm": 0.776484489440918, "learning_rate": 1.7567465362165818e-05, "loss": 1.6121, "mean_token_accuracy": 0.62413057188193, "num_tokens": 803557142.0, "step": 4780 }, { "entropy": 1.7478283047676086, "epoch": 0.5252258932740106, "grad_norm": 0.6890655755996704, "learning_rate": 1.756637048988449e-05, "loss": 1.5187, "mean_token_accuracy": 0.6530665705601374, "num_tokens": 803710112.0, "step": 4781 }, { "entropy": 1.6944151123364766, "epoch": 0.5253357501853836, "grad_norm": 0.8545740842819214, "learning_rate": 1.7565275409780504e-05, "loss": 1.5678, "mean_token_accuracy": 0.6560395757357279, "num_tokens": 803873255.0, "step": 4782 }, { "entropy": 1.754347950220108, "epoch": 0.5254456070967565, "grad_norm": 0.717082679271698, "learning_rate": 1.7564180121888504e-05, "loss": 1.4302, "mean_token_accuracy": 0.6466685732205709, "num_tokens": 804002011.0, "step": 4783 }, { "entropy": 1.6676461199919383, "epoch": 0.5255554640081294, "grad_norm": 0.7122258543968201, "learning_rate": 1.756308462624316e-05, "loss": 1.2871, "mean_token_accuracy": 0.681659941871961, "num_tokens": 804132887.0, "step": 4784 }, { "entropy": 1.6842567523320515, "epoch": 0.5256653209195024, "grad_norm": 0.7058034539222717, "learning_rate": 1.7561988922879147e-05, "loss": 1.2925, "mean_token_accuracy": 0.667713056008021, "num_tokens": 804271045.0, "step": 4785 }, { "entropy": 1.6645110448201497, "epoch": 0.5257751778308752, "grad_norm": 0.6072272062301636, "learning_rate": 1.756089301183114e-05, "loss": 1.413, "mean_token_accuracy": 0.6601279675960541, "num_tokens": 804484901.0, "step": 4786 }, { "entropy": 1.6867012182871501, "epoch": 0.5258850347422482, "grad_norm": 0.734171986579895, "learning_rate": 1.755979689313383e-05, "loss": 1.4347, "mean_token_accuracy": 0.6705892930428187, "num_tokens": 804631499.0, "step": 4787 }, { "entropy": 1.7348099152247112, "epoch": 0.5259948916536211, "grad_norm": 0.6457310914993286, "learning_rate": 1.75587005668219e-05, "loss": 1.3843, "mean_token_accuracy": 0.6592828581730524, "num_tokens": 804800057.0, "step": 4788 }, { "entropy": 1.7324989934762318, "epoch": 0.5261047485649941, "grad_norm": 0.7020387053489685, "learning_rate": 1.7557604032930056e-05, "loss": 1.3377, "mean_token_accuracy": 0.6654490580161413, "num_tokens": 804932809.0, "step": 4789 }, { "entropy": 1.7356117367744446, "epoch": 0.526214605476367, "grad_norm": 0.8487410545349121, "learning_rate": 1.7556507291493e-05, "loss": 1.5411, "mean_token_accuracy": 0.638851081331571, "num_tokens": 805093928.0, "step": 4790 }, { "entropy": 1.66506223877271, "epoch": 0.52632446238774, "grad_norm": 0.6197313070297241, "learning_rate": 1.755541034254544e-05, "loss": 1.4225, "mean_token_accuracy": 0.6506545394659042, "num_tokens": 805275885.0, "step": 4791 }, { "entropy": 1.6665216783682506, "epoch": 0.5264343192991129, "grad_norm": 0.7226223945617676, "learning_rate": 1.7554313186122095e-05, "loss": 1.3719, "mean_token_accuracy": 0.659637118379275, "num_tokens": 805455227.0, "step": 4792 }, { "entropy": 1.7102013031641643, "epoch": 0.5265441762104859, "grad_norm": 0.7095229625701904, "learning_rate": 1.7553215822257692e-05, "loss": 1.4586, "mean_token_accuracy": 0.6529026329517365, "num_tokens": 805613449.0, "step": 4793 }, { "entropy": 1.6817038357257843, "epoch": 0.5266540331218588, "grad_norm": 0.6859667301177979, "learning_rate": 1.7552118250986962e-05, "loss": 1.3303, "mean_token_accuracy": 0.6643107682466507, "num_tokens": 805728020.0, "step": 4794 }, { "entropy": 1.66180619597435, "epoch": 0.5267638900332318, "grad_norm": 0.6296705007553101, "learning_rate": 1.7551020472344643e-05, "loss": 1.3457, "mean_token_accuracy": 0.6573961029450098, "num_tokens": 805862681.0, "step": 4795 }, { "entropy": 1.699836363395055, "epoch": 0.5268737469446046, "grad_norm": 0.6737419962882996, "learning_rate": 1.7549922486365478e-05, "loss": 1.537, "mean_token_accuracy": 0.6582231894135475, "num_tokens": 806037799.0, "step": 4796 }, { "entropy": 1.7254907389481862, "epoch": 0.5269836038559775, "grad_norm": 0.7271363735198975, "learning_rate": 1.7548824293084214e-05, "loss": 1.4136, "mean_token_accuracy": 0.6628665079673132, "num_tokens": 806221209.0, "step": 4797 }, { "entropy": 1.6994514266649883, "epoch": 0.5270934607673505, "grad_norm": 0.7013587951660156, "learning_rate": 1.7547725892535615e-05, "loss": 1.3026, "mean_token_accuracy": 0.6708128750324249, "num_tokens": 806383779.0, "step": 4798 }, { "entropy": 1.681582232316335, "epoch": 0.5272033176787234, "grad_norm": 0.6710511445999146, "learning_rate": 1.754662728475444e-05, "loss": 1.3914, "mean_token_accuracy": 0.6664837151765823, "num_tokens": 806556636.0, "step": 4799 }, { "entropy": 1.7152946889400482, "epoch": 0.5273131745900964, "grad_norm": 0.6160458326339722, "learning_rate": 1.7545528469775467e-05, "loss": 1.3605, "mean_token_accuracy": 0.6646227290232977, "num_tokens": 806699683.0, "step": 4800 }, { "entropy": 1.6824420094490051, "epoch": 0.5274230315014693, "grad_norm": 0.7939539551734924, "learning_rate": 1.7544429447633464e-05, "loss": 1.3189, "mean_token_accuracy": 0.6681206673383713, "num_tokens": 806867720.0, "step": 4801 }, { "entropy": 1.6991462310155232, "epoch": 0.5275328884128423, "grad_norm": 0.6454995274543762, "learning_rate": 1.7543330218363214e-05, "loss": 1.4584, "mean_token_accuracy": 0.6686960806449255, "num_tokens": 807035383.0, "step": 4802 }, { "entropy": 1.7097290853659313, "epoch": 0.5276427453242152, "grad_norm": 0.6477057933807373, "learning_rate": 1.7542230781999518e-05, "loss": 1.2847, "mean_token_accuracy": 0.671577995022138, "num_tokens": 807190570.0, "step": 4803 }, { "entropy": 1.7435030043125153, "epoch": 0.5277526022355882, "grad_norm": 0.72170490026474, "learning_rate": 1.754113113857716e-05, "loss": 1.4119, "mean_token_accuracy": 0.6534734964370728, "num_tokens": 807344375.0, "step": 4804 }, { "entropy": 1.6681690216064453, "epoch": 0.5278624591469611, "grad_norm": 0.6512613296508789, "learning_rate": 1.754003128813095e-05, "loss": 1.3037, "mean_token_accuracy": 0.6649158795674642, "num_tokens": 807482406.0, "step": 4805 }, { "entropy": 1.7332804004351299, "epoch": 0.5279723160583341, "grad_norm": 0.6607586741447449, "learning_rate": 1.75389312306957e-05, "loss": 1.5653, "mean_token_accuracy": 0.6444868743419647, "num_tokens": 807672269.0, "step": 4806 }, { "entropy": 1.722442050774892, "epoch": 0.5280821729697069, "grad_norm": 0.7250016927719116, "learning_rate": 1.7537830966306224e-05, "loss": 1.3925, "mean_token_accuracy": 0.6732650498549143, "num_tokens": 807822790.0, "step": 4807 }, { "entropy": 1.7573895851771038, "epoch": 0.5281920298810799, "grad_norm": 0.7589662671089172, "learning_rate": 1.753673049499734e-05, "loss": 1.3706, "mean_token_accuracy": 0.6612446457147598, "num_tokens": 807990370.0, "step": 4808 }, { "entropy": 1.6575620273749034, "epoch": 0.5283018867924528, "grad_norm": 0.5413112044334412, "learning_rate": 1.753562981680388e-05, "loss": 1.3177, "mean_token_accuracy": 0.649698426326116, "num_tokens": 808176218.0, "step": 4809 }, { "entropy": 1.728501945734024, "epoch": 0.5284117437038258, "grad_norm": 0.6827234029769897, "learning_rate": 1.7534528931760683e-05, "loss": 1.3945, "mean_token_accuracy": 0.6664670258760452, "num_tokens": 808328962.0, "step": 4810 }, { "entropy": 1.6973057091236115, "epoch": 0.5285216006151987, "grad_norm": 0.6527639627456665, "learning_rate": 1.753342783990259e-05, "loss": 1.3681, "mean_token_accuracy": 0.6585773775974909, "num_tokens": 808462565.0, "step": 4811 }, { "entropy": 1.7292284766832988, "epoch": 0.5286314575265716, "grad_norm": 0.8234806656837463, "learning_rate": 1.7532326541264454e-05, "loss": 1.5218, "mean_token_accuracy": 0.6624507009983063, "num_tokens": 808601980.0, "step": 4812 }, { "entropy": 1.6868124802907307, "epoch": 0.5287413144379446, "grad_norm": 0.5748288631439209, "learning_rate": 1.753122503588112e-05, "loss": 1.4779, "mean_token_accuracy": 0.6543243726094564, "num_tokens": 808866293.0, "step": 4813 }, { "entropy": 1.7639289100964863, "epoch": 0.5288511713493175, "grad_norm": 0.7285853624343872, "learning_rate": 1.753012332378746e-05, "loss": 1.4627, "mean_token_accuracy": 0.6443410267432531, "num_tokens": 809048139.0, "step": 4814 }, { "entropy": 1.7024679978688557, "epoch": 0.5289610282606905, "grad_norm": 0.7826334238052368, "learning_rate": 1.752902140501834e-05, "loss": 1.4016, "mean_token_accuracy": 0.6619109660387039, "num_tokens": 809214733.0, "step": 4815 }, { "entropy": 1.630326509475708, "epoch": 0.5290708851720634, "grad_norm": 0.7884056568145752, "learning_rate": 1.7527919279608633e-05, "loss": 1.3239, "mean_token_accuracy": 0.6573766022920609, "num_tokens": 809357746.0, "step": 4816 }, { "entropy": 1.7573048671086628, "epoch": 0.5291807420834364, "grad_norm": 0.8989459276199341, "learning_rate": 1.7526816947593224e-05, "loss": 1.4258, "mean_token_accuracy": 0.6477284530798594, "num_tokens": 809518204.0, "step": 4817 }, { "entropy": 1.635603408018748, "epoch": 0.5292905989948092, "grad_norm": 0.6966229677200317, "learning_rate": 1.7525714409006998e-05, "loss": 1.3025, "mean_token_accuracy": 0.6680015424887339, "num_tokens": 809662973.0, "step": 4818 }, { "entropy": 1.6825711230436962, "epoch": 0.5294004559061822, "grad_norm": 0.6718734502792358, "learning_rate": 1.7524611663884852e-05, "loss": 1.4107, "mean_token_accuracy": 0.6541168093681335, "num_tokens": 809835552.0, "step": 4819 }, { "entropy": 1.707334001859029, "epoch": 0.5295103128175551, "grad_norm": 0.687263548374176, "learning_rate": 1.7523508712261685e-05, "loss": 1.3849, "mean_token_accuracy": 0.6574449588855108, "num_tokens": 810024901.0, "step": 4820 }, { "entropy": 1.713168462117513, "epoch": 0.5296201697289281, "grad_norm": 0.6556559801101685, "learning_rate": 1.752240555417241e-05, "loss": 1.425, "mean_token_accuracy": 0.6502045691013336, "num_tokens": 810199634.0, "step": 4821 }, { "entropy": 1.7425429324309032, "epoch": 0.529730026640301, "grad_norm": 0.6769330501556396, "learning_rate": 1.7521302189651937e-05, "loss": 1.332, "mean_token_accuracy": 0.6587035904328028, "num_tokens": 810362916.0, "step": 4822 }, { "entropy": 1.6665611068407695, "epoch": 0.529839883551674, "grad_norm": 0.8234541416168213, "learning_rate": 1.752019861873519e-05, "loss": 1.3859, "mean_token_accuracy": 0.6658773571252823, "num_tokens": 810514991.0, "step": 4823 }, { "entropy": 1.6548964281876881, "epoch": 0.5299497404630469, "grad_norm": 0.6913493275642395, "learning_rate": 1.7519094841457092e-05, "loss": 1.4466, "mean_token_accuracy": 0.6507799476385117, "num_tokens": 810675171.0, "step": 4824 }, { "entropy": 1.73293998837471, "epoch": 0.5300595973744198, "grad_norm": 0.7102120518684387, "learning_rate": 1.751799085785258e-05, "loss": 1.4008, "mean_token_accuracy": 0.661358987291654, "num_tokens": 810802681.0, "step": 4825 }, { "entropy": 1.7738582690556843, "epoch": 0.5301694542857928, "grad_norm": 0.6953791379928589, "learning_rate": 1.7516886667956596e-05, "loss": 1.4149, "mean_token_accuracy": 0.6516300787528356, "num_tokens": 811021221.0, "step": 4826 }, { "entropy": 1.7118199865023296, "epoch": 0.5302793111971656, "grad_norm": 0.6161823272705078, "learning_rate": 1.7515782271804084e-05, "loss": 1.4182, "mean_token_accuracy": 0.6468035380045573, "num_tokens": 811208787.0, "step": 4827 }, { "entropy": 1.7987407644589741, "epoch": 0.5303891681085386, "grad_norm": 0.8192143440246582, "learning_rate": 1.7514677669430003e-05, "loss": 1.3789, "mean_token_accuracy": 0.6606917083263397, "num_tokens": 811338291.0, "step": 4828 }, { "entropy": 1.6597908238569896, "epoch": 0.5304990250199115, "grad_norm": 0.833269476890564, "learning_rate": 1.7513572860869306e-05, "loss": 1.1746, "mean_token_accuracy": 0.6920550564924876, "num_tokens": 811441985.0, "step": 4829 }, { "entropy": 1.7635074357191722, "epoch": 0.5306088819312845, "grad_norm": 0.7363488078117371, "learning_rate": 1.751246784615696e-05, "loss": 1.4692, "mean_token_accuracy": 0.6375128527482351, "num_tokens": 811634058.0, "step": 4830 }, { "entropy": 1.7353008687496185, "epoch": 0.5307187388426574, "grad_norm": 0.7418941259384155, "learning_rate": 1.7511362625327947e-05, "loss": 1.4245, "mean_token_accuracy": 0.6573583434025446, "num_tokens": 811760632.0, "step": 4831 }, { "entropy": 1.7234211166699727, "epoch": 0.5308285957540304, "grad_norm": 0.6825436353683472, "learning_rate": 1.751025719841724e-05, "loss": 1.3955, "mean_token_accuracy": 0.6559055695931116, "num_tokens": 811922986.0, "step": 4832 }, { "entropy": 1.7144565085570018, "epoch": 0.5309384526654033, "grad_norm": 0.6612206101417542, "learning_rate": 1.7509151565459823e-05, "loss": 1.5228, "mean_token_accuracy": 0.6367465257644653, "num_tokens": 812112649.0, "step": 4833 }, { "entropy": 1.7186235984166462, "epoch": 0.5310483095767763, "grad_norm": 0.6164059638977051, "learning_rate": 1.7508045726490695e-05, "loss": 1.3859, "mean_token_accuracy": 0.655776783823967, "num_tokens": 812281703.0, "step": 4834 }, { "entropy": 1.7127485771973927, "epoch": 0.5311581664881492, "grad_norm": 0.8158262372016907, "learning_rate": 1.750693968154485e-05, "loss": 1.4447, "mean_token_accuracy": 0.6493441561857859, "num_tokens": 812448324.0, "step": 4835 }, { "entropy": 1.696526567141215, "epoch": 0.5312680233995222, "grad_norm": 0.7266950011253357, "learning_rate": 1.7505833430657298e-05, "loss": 1.3453, "mean_token_accuracy": 0.6664767960707346, "num_tokens": 812601651.0, "step": 4836 }, { "entropy": 1.7216827968756359, "epoch": 0.5313778803108951, "grad_norm": 0.7393192052841187, "learning_rate": 1.7504726973863053e-05, "loss": 1.4682, "mean_token_accuracy": 0.6517121444145838, "num_tokens": 812800925.0, "step": 4837 }, { "entropy": 1.7090055843194325, "epoch": 0.5314877372222679, "grad_norm": 0.7309879064559937, "learning_rate": 1.7503620311197124e-05, "loss": 1.3463, "mean_token_accuracy": 0.6595309128363928, "num_tokens": 812928476.0, "step": 4838 }, { "entropy": 1.6507777372996013, "epoch": 0.5315975941336409, "grad_norm": 0.5989612340927124, "learning_rate": 1.7502513442694546e-05, "loss": 1.3661, "mean_token_accuracy": 0.6650850723187128, "num_tokens": 813101231.0, "step": 4839 }, { "entropy": 1.780005156993866, "epoch": 0.5317074510450138, "grad_norm": 0.8584796786308289, "learning_rate": 1.7501406368390344e-05, "loss": 1.6108, "mean_token_accuracy": 0.6370650803049406, "num_tokens": 813269613.0, "step": 4840 }, { "entropy": 1.6397046148777008, "epoch": 0.5318173079563868, "grad_norm": 0.7476561665534973, "learning_rate": 1.7500299088319566e-05, "loss": 1.4002, "mean_token_accuracy": 0.6559847990671793, "num_tokens": 813481357.0, "step": 4841 }, { "entropy": 1.7163714965184529, "epoch": 0.5319271648677597, "grad_norm": 0.7873140573501587, "learning_rate": 1.7499191602517245e-05, "loss": 1.4229, "mean_token_accuracy": 0.6485390017429987, "num_tokens": 813693879.0, "step": 4842 }, { "entropy": 1.6578922768433888, "epoch": 0.5320370217791327, "grad_norm": 0.7017808556556702, "learning_rate": 1.749808391101844e-05, "loss": 1.2177, "mean_token_accuracy": 0.6817097862561544, "num_tokens": 813842919.0, "step": 4843 }, { "entropy": 1.6423076788584392, "epoch": 0.5321468786905056, "grad_norm": 0.685593843460083, "learning_rate": 1.7496976013858207e-05, "loss": 1.2957, "mean_token_accuracy": 0.6747591296831766, "num_tokens": 813995390.0, "step": 4844 }, { "entropy": 1.7144256333510082, "epoch": 0.5322567356018786, "grad_norm": 0.7866110801696777, "learning_rate": 1.749586791107162e-05, "loss": 1.3694, "mean_token_accuracy": 0.6647010346253713, "num_tokens": 814119386.0, "step": 4845 }, { "entropy": 1.7141908307870228, "epoch": 0.5323665925132515, "grad_norm": 0.6247113943099976, "learning_rate": 1.749475960269373e-05, "loss": 1.4549, "mean_token_accuracy": 0.647007574637731, "num_tokens": 814297783.0, "step": 4846 }, { "entropy": 1.7318035264809926, "epoch": 0.5324764494246245, "grad_norm": 0.8848351240158081, "learning_rate": 1.7493651088759628e-05, "loss": 1.5185, "mean_token_accuracy": 0.6626861343781153, "num_tokens": 814495884.0, "step": 4847 }, { "entropy": 1.7968285183111827, "epoch": 0.5325863063359974, "grad_norm": 0.774684727191925, "learning_rate": 1.7492542369304394e-05, "loss": 1.4586, "mean_token_accuracy": 0.6544144451618195, "num_tokens": 814629185.0, "step": 4848 }, { "entropy": 1.788926084836324, "epoch": 0.5326961632473703, "grad_norm": 0.7674810886383057, "learning_rate": 1.749143344436312e-05, "loss": 1.4702, "mean_token_accuracy": 0.6507733265558878, "num_tokens": 814789810.0, "step": 4849 }, { "entropy": 1.7028450568517048, "epoch": 0.5328060201587432, "grad_norm": 0.8451623320579529, "learning_rate": 1.7490324313970905e-05, "loss": 1.3885, "mean_token_accuracy": 0.6632057080666224, "num_tokens": 814936262.0, "step": 4850 }, { "entropy": 1.7443354924519856, "epoch": 0.5329158770701162, "grad_norm": 0.7909703254699707, "learning_rate": 1.748921497816285e-05, "loss": 1.4668, "mean_token_accuracy": 0.6472253054380417, "num_tokens": 815079833.0, "step": 4851 }, { "entropy": 1.7053726116816204, "epoch": 0.5330257339814891, "grad_norm": 0.7654147148132324, "learning_rate": 1.7488105436974062e-05, "loss": 1.5746, "mean_token_accuracy": 0.6418164720137914, "num_tokens": 815251335.0, "step": 4852 }, { "entropy": 1.7527342240015666, "epoch": 0.533135590892862, "grad_norm": 0.6501696705818176, "learning_rate": 1.7486995690439666e-05, "loss": 1.5457, "mean_token_accuracy": 0.6359410037597021, "num_tokens": 815454559.0, "step": 4853 }, { "entropy": 1.719238390525182, "epoch": 0.533245447804235, "grad_norm": 2.1015734672546387, "learning_rate": 1.7485885738594773e-05, "loss": 1.2862, "mean_token_accuracy": 0.663156678279241, "num_tokens": 815642372.0, "step": 4854 }, { "entropy": 1.7204244335492451, "epoch": 0.5333553047156079, "grad_norm": 0.8685169816017151, "learning_rate": 1.748477558147452e-05, "loss": 1.3431, "mean_token_accuracy": 0.6598953902721405, "num_tokens": 815782219.0, "step": 4855 }, { "entropy": 1.7100018362204235, "epoch": 0.5334651616269809, "grad_norm": 1.181349515914917, "learning_rate": 1.7483665219114045e-05, "loss": 1.1781, "mean_token_accuracy": 0.6599796116352081, "num_tokens": 815931852.0, "step": 4856 }, { "entropy": 1.6601063509782155, "epoch": 0.5335750185383538, "grad_norm": 0.7519736289978027, "learning_rate": 1.7482554651548485e-05, "loss": 1.4403, "mean_token_accuracy": 0.6680620610713959, "num_tokens": 816123952.0, "step": 4857 }, { "entropy": 1.69512935479482, "epoch": 0.5336848754497268, "grad_norm": 0.747626781463623, "learning_rate": 1.7481443878812996e-05, "loss": 1.3925, "mean_token_accuracy": 0.6561353007952372, "num_tokens": 816256226.0, "step": 4858 }, { "entropy": 1.726113458474477, "epoch": 0.5337947323610996, "grad_norm": 0.671970784664154, "learning_rate": 1.7480332900942722e-05, "loss": 1.3526, "mean_token_accuracy": 0.6597483803828558, "num_tokens": 816387859.0, "step": 4859 }, { "entropy": 1.6943202714125316, "epoch": 0.5339045892724726, "grad_norm": 0.7271941304206848, "learning_rate": 1.747922171797284e-05, "loss": 1.2837, "mean_token_accuracy": 0.6677757650613785, "num_tokens": 816532923.0, "step": 4860 }, { "entropy": 1.7412850956122081, "epoch": 0.5340144461838455, "grad_norm": 0.7533559203147888, "learning_rate": 1.74781103299385e-05, "loss": 1.4504, "mean_token_accuracy": 0.664714311559995, "num_tokens": 816655245.0, "step": 4861 }, { "entropy": 1.7081489165623982, "epoch": 0.5341243030952185, "grad_norm": 0.6002654433250427, "learning_rate": 1.7476998736874896e-05, "loss": 1.42, "mean_token_accuracy": 0.6530174712340037, "num_tokens": 816838841.0, "step": 4862 }, { "entropy": 1.7771287858486176, "epoch": 0.5342341600065914, "grad_norm": 0.9644994139671326, "learning_rate": 1.74758869388172e-05, "loss": 1.2972, "mean_token_accuracy": 0.6680352141459783, "num_tokens": 817007159.0, "step": 4863 }, { "entropy": 1.7283466557661693, "epoch": 0.5343440169179644, "grad_norm": 0.7296448945999146, "learning_rate": 1.7474774935800594e-05, "loss": 1.474, "mean_token_accuracy": 0.6520171463489532, "num_tokens": 817134109.0, "step": 4864 }, { "entropy": 1.6883817911148071, "epoch": 0.5344538738293373, "grad_norm": 0.6540996432304382, "learning_rate": 1.7473662727860285e-05, "loss": 1.2972, "mean_token_accuracy": 0.6820466021696726, "num_tokens": 817272978.0, "step": 4865 }, { "entropy": 1.7053470313549042, "epoch": 0.5345637307407102, "grad_norm": 0.5956873297691345, "learning_rate": 1.747255031503146e-05, "loss": 1.3342, "mean_token_accuracy": 0.6530234664678574, "num_tokens": 817430466.0, "step": 4866 }, { "entropy": 1.725131352742513, "epoch": 0.5346735876520832, "grad_norm": 0.6326978206634521, "learning_rate": 1.7471437697349342e-05, "loss": 1.4458, "mean_token_accuracy": 0.6490624397993088, "num_tokens": 817587619.0, "step": 4867 }, { "entropy": 1.7081331610679626, "epoch": 0.5347834445634561, "grad_norm": 0.7116954922676086, "learning_rate": 1.7470324874849133e-05, "loss": 1.5435, "mean_token_accuracy": 0.6464557300011317, "num_tokens": 817794229.0, "step": 4868 }, { "entropy": 1.7534505824247997, "epoch": 0.534893301474829, "grad_norm": 0.7030560970306396, "learning_rate": 1.7469211847566062e-05, "loss": 1.3342, "mean_token_accuracy": 0.6696716099977493, "num_tokens": 817920264.0, "step": 4869 }, { "entropy": 1.7026815017064412, "epoch": 0.5350031583862019, "grad_norm": 0.6979672908782959, "learning_rate": 1.7468098615535347e-05, "loss": 1.5933, "mean_token_accuracy": 0.635742649435997, "num_tokens": 818085089.0, "step": 4870 }, { "entropy": 1.6528734962145488, "epoch": 0.5351130152975749, "grad_norm": 0.7067160606384277, "learning_rate": 1.7466985178792222e-05, "loss": 1.2836, "mean_token_accuracy": 0.6861815551916758, "num_tokens": 818278281.0, "step": 4871 }, { "entropy": 1.7629452645778656, "epoch": 0.5352228722089478, "grad_norm": 0.709534227848053, "learning_rate": 1.7465871537371938e-05, "loss": 1.3458, "mean_token_accuracy": 0.669313962260882, "num_tokens": 818428687.0, "step": 4872 }, { "entropy": 1.7045076290766399, "epoch": 0.5353327291203208, "grad_norm": 0.7088239789009094, "learning_rate": 1.746475769130973e-05, "loss": 1.3678, "mean_token_accuracy": 0.6577753275632858, "num_tokens": 818577943.0, "step": 4873 }, { "entropy": 1.7641015152136486, "epoch": 0.5354425860316937, "grad_norm": 0.7473663687705994, "learning_rate": 1.746364364064085e-05, "loss": 1.3615, "mean_token_accuracy": 0.6545535524686178, "num_tokens": 818704762.0, "step": 4874 }, { "entropy": 1.73165625333786, "epoch": 0.5355524429430667, "grad_norm": 0.6683910489082336, "learning_rate": 1.7462529385400567e-05, "loss": 1.2848, "mean_token_accuracy": 0.6737756431102753, "num_tokens": 818836769.0, "step": 4875 }, { "entropy": 1.727175106604894, "epoch": 0.5356622998544396, "grad_norm": 0.7146100997924805, "learning_rate": 1.7461414925624144e-05, "loss": 1.2557, "mean_token_accuracy": 0.6819527049859365, "num_tokens": 819013897.0, "step": 4876 }, { "entropy": 1.6949097514152527, "epoch": 0.5357721567658126, "grad_norm": 0.7949661016464233, "learning_rate": 1.7460300261346842e-05, "loss": 1.4601, "mean_token_accuracy": 0.6500726789236069, "num_tokens": 819200845.0, "step": 4877 }, { "entropy": 1.7253201305866241, "epoch": 0.5358820136771855, "grad_norm": 0.6605319976806641, "learning_rate": 1.745918539260395e-05, "loss": 1.5041, "mean_token_accuracy": 0.6472121477127075, "num_tokens": 819388584.0, "step": 4878 }, { "entropy": 1.7595091660817463, "epoch": 0.5359918705885584, "grad_norm": 0.9308416247367859, "learning_rate": 1.7458070319430754e-05, "loss": 1.5802, "mean_token_accuracy": 0.6386895179748535, "num_tokens": 819596639.0, "step": 4879 }, { "entropy": 1.6913528839747112, "epoch": 0.5361017274999313, "grad_norm": 0.7783805727958679, "learning_rate": 1.7456955041862543e-05, "loss": 1.2217, "mean_token_accuracy": 0.6871163348356882, "num_tokens": 819708937.0, "step": 4880 }, { "entropy": 1.708279420932134, "epoch": 0.5362115844113042, "grad_norm": 0.6577259302139282, "learning_rate": 1.745583955993461e-05, "loss": 1.3714, "mean_token_accuracy": 0.6517359912395477, "num_tokens": 819915470.0, "step": 4881 }, { "entropy": 1.703675111134847, "epoch": 0.5363214413226772, "grad_norm": 0.5665971636772156, "learning_rate": 1.7454723873682268e-05, "loss": 1.4554, "mean_token_accuracy": 0.6373900771141052, "num_tokens": 820172132.0, "step": 4882 }, { "entropy": 1.7301917870839436, "epoch": 0.5364312982340501, "grad_norm": 0.5813365578651428, "learning_rate": 1.7453607983140823e-05, "loss": 1.5125, "mean_token_accuracy": 0.6375814924637476, "num_tokens": 820379791.0, "step": 4883 }, { "entropy": 1.7420111298561096, "epoch": 0.5365411551454231, "grad_norm": 0.6530336141586304, "learning_rate": 1.745249188834559e-05, "loss": 1.456, "mean_token_accuracy": 0.6411418666442236, "num_tokens": 820565833.0, "step": 4884 }, { "entropy": 1.801695555448532, "epoch": 0.536651012056796, "grad_norm": 0.7685750722885132, "learning_rate": 1.74513755893319e-05, "loss": 1.5374, "mean_token_accuracy": 0.634370227654775, "num_tokens": 820688906.0, "step": 4885 }, { "entropy": 1.751669466495514, "epoch": 0.536760868968169, "grad_norm": 0.7663411498069763, "learning_rate": 1.7450259086135078e-05, "loss": 1.4194, "mean_token_accuracy": 0.6512367278337479, "num_tokens": 820836128.0, "step": 4886 }, { "entropy": 1.7377658585707347, "epoch": 0.5368707258795419, "grad_norm": 0.7554279565811157, "learning_rate": 1.744914237879046e-05, "loss": 1.198, "mean_token_accuracy": 0.6807886908451716, "num_tokens": 820951980.0, "step": 4887 }, { "entropy": 1.782357394695282, "epoch": 0.5369805827909149, "grad_norm": 0.672864556312561, "learning_rate": 1.74480254673334e-05, "loss": 1.3533, "mean_token_accuracy": 0.65904101729393, "num_tokens": 821101205.0, "step": 4888 }, { "entropy": 1.742653727531433, "epoch": 0.5370904397022878, "grad_norm": 0.6784942150115967, "learning_rate": 1.7446908351799233e-05, "loss": 1.2928, "mean_token_accuracy": 0.6659032354752222, "num_tokens": 821266573.0, "step": 4889 }, { "entropy": 1.679027299086253, "epoch": 0.5372002966136608, "grad_norm": 0.6892912983894348, "learning_rate": 1.7445791032223322e-05, "loss": 1.3674, "mean_token_accuracy": 0.6666253606478373, "num_tokens": 821437038.0, "step": 4890 }, { "entropy": 1.7067703604698181, "epoch": 0.5373101535250336, "grad_norm": 0.6747919917106628, "learning_rate": 1.744467350864103e-05, "loss": 1.2386, "mean_token_accuracy": 0.6692210485537847, "num_tokens": 821571344.0, "step": 4891 }, { "entropy": 1.7276716828346252, "epoch": 0.5374200104364065, "grad_norm": 0.9028007388114929, "learning_rate": 1.7443555781087726e-05, "loss": 1.5016, "mean_token_accuracy": 0.6459170381228129, "num_tokens": 821732320.0, "step": 4892 }, { "entropy": 1.6987963616847992, "epoch": 0.5375298673477795, "grad_norm": 0.7619015574455261, "learning_rate": 1.7442437849598785e-05, "loss": 1.3314, "mean_token_accuracy": 0.6694723268349966, "num_tokens": 821925047.0, "step": 4893 }, { "entropy": 1.74809134999911, "epoch": 0.5376397242591524, "grad_norm": 0.6891080141067505, "learning_rate": 1.744131971420959e-05, "loss": 1.6016, "mean_token_accuracy": 0.633436476190885, "num_tokens": 822098417.0, "step": 4894 }, { "entropy": 1.7045816977818806, "epoch": 0.5377495811705254, "grad_norm": 0.603262186050415, "learning_rate": 1.7440201374955528e-05, "loss": 1.5185, "mean_token_accuracy": 0.6483029425144196, "num_tokens": 822284175.0, "step": 4895 }, { "entropy": 1.7040814061959584, "epoch": 0.5378594380818983, "grad_norm": 0.6909913420677185, "learning_rate": 1.7439082831871997e-05, "loss": 1.3295, "mean_token_accuracy": 0.6752724895874659, "num_tokens": 822404812.0, "step": 4896 }, { "entropy": 1.6904057959715526, "epoch": 0.5379692949932713, "grad_norm": 0.6549181342124939, "learning_rate": 1.743796408499439e-05, "loss": 1.3216, "mean_token_accuracy": 0.6638427078723907, "num_tokens": 822590204.0, "step": 4897 }, { "entropy": 1.6527547438939412, "epoch": 0.5380791519046442, "grad_norm": 0.7167640924453735, "learning_rate": 1.7436845134358123e-05, "loss": 1.3751, "mean_token_accuracy": 0.6618984391291937, "num_tokens": 822782283.0, "step": 4898 }, { "entropy": 1.6621145009994507, "epoch": 0.5381890088160172, "grad_norm": 0.7170586585998535, "learning_rate": 1.743572597999861e-05, "loss": 1.3813, "mean_token_accuracy": 0.657960906624794, "num_tokens": 822958457.0, "step": 4899 }, { "entropy": 1.6679618457953136, "epoch": 0.53829886572739, "grad_norm": 0.6240289807319641, "learning_rate": 1.743460662195127e-05, "loss": 1.3381, "mean_token_accuracy": 0.6678373962640762, "num_tokens": 823116527.0, "step": 4900 }, { "entropy": 1.7151422599951427, "epoch": 0.538408722638763, "grad_norm": 0.6621232628822327, "learning_rate": 1.7433487060251527e-05, "loss": 1.5341, "mean_token_accuracy": 0.6566172788540522, "num_tokens": 823296593.0, "step": 4901 }, { "entropy": 1.6715179483095806, "epoch": 0.5385185795501359, "grad_norm": 0.663271427154541, "learning_rate": 1.743236729493482e-05, "loss": 1.3851, "mean_token_accuracy": 0.6610744049151739, "num_tokens": 823449082.0, "step": 4902 }, { "entropy": 1.7014525334040325, "epoch": 0.5386284364615089, "grad_norm": 0.6124004125595093, "learning_rate": 1.7431247326036583e-05, "loss": 1.4697, "mean_token_accuracy": 0.6529516379038492, "num_tokens": 823615779.0, "step": 4903 }, { "entropy": 1.6754214763641357, "epoch": 0.5387382933728818, "grad_norm": 0.7338191270828247, "learning_rate": 1.7430127153592272e-05, "loss": 1.3501, "mean_token_accuracy": 0.6698144127925237, "num_tokens": 823744244.0, "step": 4904 }, { "entropy": 1.7061218818028767, "epoch": 0.5388481502842548, "grad_norm": 0.6320319771766663, "learning_rate": 1.742900677763733e-05, "loss": 1.333, "mean_token_accuracy": 0.66542187333107, "num_tokens": 823896705.0, "step": 4905 }, { "entropy": 1.70430921514829, "epoch": 0.5389580071956277, "grad_norm": 0.7169397473335266, "learning_rate": 1.742788619820722e-05, "loss": 1.4334, "mean_token_accuracy": 0.6706308672825495, "num_tokens": 824071474.0, "step": 4906 }, { "entropy": 1.7034543951352437, "epoch": 0.5390678641070006, "grad_norm": 0.6652551293373108, "learning_rate": 1.7426765415337406e-05, "loss": 1.5371, "mean_token_accuracy": 0.6554579238096873, "num_tokens": 824216176.0, "step": 4907 }, { "entropy": 1.739420880873998, "epoch": 0.5391777210183736, "grad_norm": 0.9010681509971619, "learning_rate": 1.7425644429063372e-05, "loss": 1.5553, "mean_token_accuracy": 0.6332442959149679, "num_tokens": 824346870.0, "step": 4908 }, { "entropy": 1.7290275891621907, "epoch": 0.5392875779297465, "grad_norm": 0.6607105135917664, "learning_rate": 1.742452323942058e-05, "loss": 1.4219, "mean_token_accuracy": 0.6464213828245798, "num_tokens": 824523488.0, "step": 4909 }, { "entropy": 1.7586493094762166, "epoch": 0.5393974348411195, "grad_norm": 0.7161458730697632, "learning_rate": 1.742340184644452e-05, "loss": 1.3448, "mean_token_accuracy": 0.6701912134885788, "num_tokens": 824669157.0, "step": 4910 }, { "entropy": 1.679528295993805, "epoch": 0.5395072917524923, "grad_norm": 0.6938197612762451, "learning_rate": 1.7422280250170693e-05, "loss": 1.3921, "mean_token_accuracy": 0.6523070633411407, "num_tokens": 824861531.0, "step": 4911 }, { "entropy": 1.6967064638932545, "epoch": 0.5396171486638653, "grad_norm": 0.6822254657745361, "learning_rate": 1.7421158450634586e-05, "loss": 1.448, "mean_token_accuracy": 0.6576181898514429, "num_tokens": 825039311.0, "step": 4912 }, { "entropy": 1.6692471305529277, "epoch": 0.5397270055752382, "grad_norm": 0.7633799910545349, "learning_rate": 1.742003644787171e-05, "loss": 1.3524, "mean_token_accuracy": 0.6830050398906072, "num_tokens": 825165157.0, "step": 4913 }, { "entropy": 1.684964507818222, "epoch": 0.5398368624866112, "grad_norm": 0.5935234427452087, "learning_rate": 1.7418914241917572e-05, "loss": 1.3683, "mean_token_accuracy": 0.6735412726799647, "num_tokens": 825335892.0, "step": 4914 }, { "entropy": 1.7406889696915944, "epoch": 0.5399467193979841, "grad_norm": 0.7186749577522278, "learning_rate": 1.741779183280769e-05, "loss": 1.594, "mean_token_accuracy": 0.6328186293443044, "num_tokens": 825567908.0, "step": 4915 }, { "entropy": 1.6296118994553883, "epoch": 0.5400565763093571, "grad_norm": 0.7475072741508484, "learning_rate": 1.741666922057759e-05, "loss": 1.3133, "mean_token_accuracy": 0.6770787388086319, "num_tokens": 825723520.0, "step": 4916 }, { "entropy": 1.6742859582106273, "epoch": 0.54016643322073, "grad_norm": 0.7436842918395996, "learning_rate": 1.7415546405262797e-05, "loss": 1.3603, "mean_token_accuracy": 0.6628256142139435, "num_tokens": 825860111.0, "step": 4917 }, { "entropy": 1.6383836766084034, "epoch": 0.540276290132103, "grad_norm": 0.6304495930671692, "learning_rate": 1.7414423386898857e-05, "loss": 1.3379, "mean_token_accuracy": 0.664565180738767, "num_tokens": 826041019.0, "step": 4918 }, { "entropy": 1.6786586443583171, "epoch": 0.5403861470434759, "grad_norm": 0.6277565956115723, "learning_rate": 1.74133001655213e-05, "loss": 1.4681, "mean_token_accuracy": 0.6444205145041147, "num_tokens": 826262462.0, "step": 4919 }, { "entropy": 1.6958061456680298, "epoch": 0.5404960039548488, "grad_norm": 0.8898590803146362, "learning_rate": 1.7412176741165687e-05, "loss": 1.4818, "mean_token_accuracy": 0.6551410953203837, "num_tokens": 826420243.0, "step": 4920 }, { "entropy": 1.7218329807122548, "epoch": 0.5406058608662218, "grad_norm": 0.6511401534080505, "learning_rate": 1.741105311386757e-05, "loss": 1.3571, "mean_token_accuracy": 0.6615760376056036, "num_tokens": 826616257.0, "step": 4921 }, { "entropy": 1.7913587391376495, "epoch": 0.5407157177775946, "grad_norm": 0.6388662457466125, "learning_rate": 1.740992928366251e-05, "loss": 1.3981, "mean_token_accuracy": 0.6544657200574875, "num_tokens": 826805321.0, "step": 4922 }, { "entropy": 1.6517898738384247, "epoch": 0.5408255746889676, "grad_norm": 0.6503629684448242, "learning_rate": 1.7408805250586077e-05, "loss": 1.4909, "mean_token_accuracy": 0.6562631527582804, "num_tokens": 826976397.0, "step": 4923 }, { "entropy": 1.7049082815647125, "epoch": 0.5409354316003405, "grad_norm": 0.7112172842025757, "learning_rate": 1.7407681014673844e-05, "loss": 1.3882, "mean_token_accuracy": 0.6503375222285589, "num_tokens": 827134334.0, "step": 4924 }, { "entropy": 1.721593697865804, "epoch": 0.5410452885117135, "grad_norm": 0.6750729084014893, "learning_rate": 1.7406556575961394e-05, "loss": 1.3612, "mean_token_accuracy": 0.6642037878433863, "num_tokens": 827268866.0, "step": 4925 }, { "entropy": 1.743778149286906, "epoch": 0.5411551454230864, "grad_norm": 0.8075199723243713, "learning_rate": 1.7405431934484318e-05, "loss": 1.5953, "mean_token_accuracy": 0.6419996519883474, "num_tokens": 827456250.0, "step": 4926 }, { "entropy": 1.701025813817978, "epoch": 0.5412650023344594, "grad_norm": 0.6938318014144897, "learning_rate": 1.7404307090278206e-05, "loss": 1.2758, "mean_token_accuracy": 0.6731408586104711, "num_tokens": 827569239.0, "step": 4927 }, { "entropy": 1.6912222007910411, "epoch": 0.5413748592458323, "grad_norm": 0.7330572009086609, "learning_rate": 1.7403182043378662e-05, "loss": 1.3019, "mean_token_accuracy": 0.6731463919083277, "num_tokens": 827733160.0, "step": 4928 }, { "entropy": 1.6323689023653667, "epoch": 0.5414847161572053, "grad_norm": 0.6897679567337036, "learning_rate": 1.740205679382129e-05, "loss": 1.3017, "mean_token_accuracy": 0.6699829796950022, "num_tokens": 827857219.0, "step": 4929 }, { "entropy": 1.6964257657527924, "epoch": 0.5415945730685782, "grad_norm": 0.6199988722801208, "learning_rate": 1.7400931341641706e-05, "loss": 1.4628, "mean_token_accuracy": 0.6376723100741705, "num_tokens": 828046638.0, "step": 4930 }, { "entropy": 1.6805628935496013, "epoch": 0.5417044299799512, "grad_norm": 0.7456372976303101, "learning_rate": 1.7399805686875527e-05, "loss": 1.2353, "mean_token_accuracy": 0.6760278890530268, "num_tokens": 828160151.0, "step": 4931 }, { "entropy": 1.7248408893744152, "epoch": 0.541814286891324, "grad_norm": 0.69236159324646, "learning_rate": 1.7398679829558386e-05, "loss": 1.4688, "mean_token_accuracy": 0.6398780643939972, "num_tokens": 828331278.0, "step": 4932 }, { "entropy": 1.6821700930595398, "epoch": 0.5419241438026969, "grad_norm": 0.7130243182182312, "learning_rate": 1.739755376972591e-05, "loss": 1.4732, "mean_token_accuracy": 0.6641266047954559, "num_tokens": 828474991.0, "step": 4933 }, { "entropy": 1.6392736335595448, "epoch": 0.5420340007140699, "grad_norm": 0.7200448513031006, "learning_rate": 1.7396427507413737e-05, "loss": 1.3565, "mean_token_accuracy": 0.6657882034778595, "num_tokens": 828615538.0, "step": 4934 }, { "entropy": 1.6556443572044373, "epoch": 0.5421438576254428, "grad_norm": 0.7323468327522278, "learning_rate": 1.739530104265752e-05, "loss": 1.1859, "mean_token_accuracy": 0.6867117136716843, "num_tokens": 828766143.0, "step": 4935 }, { "entropy": 1.7083774209022522, "epoch": 0.5422537145368158, "grad_norm": 0.77711021900177, "learning_rate": 1.7394174375492906e-05, "loss": 1.2845, "mean_token_accuracy": 0.6667287697394689, "num_tokens": 828880028.0, "step": 4936 }, { "entropy": 1.6991042792797089, "epoch": 0.5423635714481887, "grad_norm": 0.694174587726593, "learning_rate": 1.739304750595555e-05, "loss": 1.2199, "mean_token_accuracy": 0.6751233587662379, "num_tokens": 828983386.0, "step": 4937 }, { "entropy": 1.7104702393213909, "epoch": 0.5424734283595617, "grad_norm": 0.6545060873031616, "learning_rate": 1.7391920434081126e-05, "loss": 1.4173, "mean_token_accuracy": 0.6476683566967646, "num_tokens": 829168137.0, "step": 4938 }, { "entropy": 1.693225493033727, "epoch": 0.5425832852709346, "grad_norm": 0.7685295939445496, "learning_rate": 1.73907931599053e-05, "loss": 1.2846, "mean_token_accuracy": 0.6751909504334132, "num_tokens": 829405939.0, "step": 4939 }, { "entropy": 1.7319706281026204, "epoch": 0.5426931421823076, "grad_norm": 0.8708465695381165, "learning_rate": 1.7389665683463748e-05, "loss": 1.558, "mean_token_accuracy": 0.6340043346087137, "num_tokens": 829636248.0, "step": 4940 }, { "entropy": 1.7094059487183888, "epoch": 0.5428029990936805, "grad_norm": 0.9573303461074829, "learning_rate": 1.738853800479216e-05, "loss": 1.4207, "mean_token_accuracy": 0.6643887509902319, "num_tokens": 829784445.0, "step": 4941 }, { "entropy": 1.748300055662791, "epoch": 0.5429128560050535, "grad_norm": 0.8324919939041138, "learning_rate": 1.738741012392622e-05, "loss": 1.3749, "mean_token_accuracy": 0.6539400120576223, "num_tokens": 829953714.0, "step": 4942 }, { "entropy": 1.699168602625529, "epoch": 0.5430227129164263, "grad_norm": 0.6905611157417297, "learning_rate": 1.7386282040901626e-05, "loss": 1.487, "mean_token_accuracy": 0.6424149125814438, "num_tokens": 830112166.0, "step": 4943 }, { "entropy": 1.683763285477956, "epoch": 0.5431325698277993, "grad_norm": 0.8066513538360596, "learning_rate": 1.7385153755754087e-05, "loss": 1.3186, "mean_token_accuracy": 0.6723660826683044, "num_tokens": 830229332.0, "step": 4944 }, { "entropy": 1.7084158559640248, "epoch": 0.5432424267391722, "grad_norm": 0.6088327765464783, "learning_rate": 1.7384025268519307e-05, "loss": 1.5253, "mean_token_accuracy": 0.62581634024779, "num_tokens": 830507166.0, "step": 4945 }, { "entropy": 1.7765875260035198, "epoch": 0.5433522836505452, "grad_norm": 0.8466908931732178, "learning_rate": 1.7382896579233003e-05, "loss": 1.5159, "mean_token_accuracy": 0.6420450657606125, "num_tokens": 830700696.0, "step": 4946 }, { "entropy": 1.7295196950435638, "epoch": 0.5434621405619181, "grad_norm": 0.7594314813613892, "learning_rate": 1.7381767687930903e-05, "loss": 1.3716, "mean_token_accuracy": 0.6728833566109339, "num_tokens": 830855018.0, "step": 4947 }, { "entropy": 1.6678162415822346, "epoch": 0.543571997473291, "grad_norm": 0.5881645083427429, "learning_rate": 1.7380638594648728e-05, "loss": 1.4235, "mean_token_accuracy": 0.6409558157126108, "num_tokens": 831035506.0, "step": 4948 }, { "entropy": 1.744312971830368, "epoch": 0.543681854384664, "grad_norm": 0.6560190916061401, "learning_rate": 1.7379509299422216e-05, "loss": 1.3315, "mean_token_accuracy": 0.6596087117989858, "num_tokens": 831167690.0, "step": 4949 }, { "entropy": 1.7742221355438232, "epoch": 0.5437917112960369, "grad_norm": 0.7557669281959534, "learning_rate": 1.7378379802287113e-05, "loss": 1.3269, "mean_token_accuracy": 0.6664220293362936, "num_tokens": 831302737.0, "step": 4950 }, { "entropy": 1.6314336359500885, "epoch": 0.5439015682074099, "grad_norm": 2.5049948692321777, "learning_rate": 1.737725010327916e-05, "loss": 1.4156, "mean_token_accuracy": 0.6630344639221827, "num_tokens": 831506832.0, "step": 4951 }, { "entropy": 1.6986377537250519, "epoch": 0.5440114251187828, "grad_norm": 0.6551359295845032, "learning_rate": 1.737612020243411e-05, "loss": 1.3055, "mean_token_accuracy": 0.659782146414121, "num_tokens": 831643698.0, "step": 4952 }, { "entropy": 1.6850547790527344, "epoch": 0.5441212820301558, "grad_norm": 0.6227862238883972, "learning_rate": 1.7374990099787737e-05, "loss": 1.3241, "mean_token_accuracy": 0.6679957658052444, "num_tokens": 831809561.0, "step": 4953 }, { "entropy": 1.7365097105503082, "epoch": 0.5442311389415286, "grad_norm": 0.7013810873031616, "learning_rate": 1.7373859795375797e-05, "loss": 1.5199, "mean_token_accuracy": 0.6493206669886907, "num_tokens": 831954609.0, "step": 4954 }, { "entropy": 1.6703599095344543, "epoch": 0.5443409958529016, "grad_norm": 0.782053530216217, "learning_rate": 1.7372729289234064e-05, "loss": 1.2796, "mean_token_accuracy": 0.6680347323417664, "num_tokens": 832063290.0, "step": 4955 }, { "entropy": 1.7207246720790863, "epoch": 0.5444508527642745, "grad_norm": 0.5964549779891968, "learning_rate": 1.7371598581398325e-05, "loss": 1.3947, "mean_token_accuracy": 0.6506600578625997, "num_tokens": 832266378.0, "step": 4956 }, { "entropy": 1.690430869658788, "epoch": 0.5445607096756475, "grad_norm": 0.690424382686615, "learning_rate": 1.737046767190436e-05, "loss": 1.3475, "mean_token_accuracy": 0.6578344404697418, "num_tokens": 832434529.0, "step": 4957 }, { "entropy": 1.7267669141292572, "epoch": 0.5446705665870204, "grad_norm": 0.7527792453765869, "learning_rate": 1.7369336560787966e-05, "loss": 1.3702, "mean_token_accuracy": 0.6688602467377981, "num_tokens": 832555033.0, "step": 4958 }, { "entropy": 1.6294583181540172, "epoch": 0.5447804234983934, "grad_norm": 0.8316755890846252, "learning_rate": 1.736820524808494e-05, "loss": 1.3852, "mean_token_accuracy": 0.6587762931982676, "num_tokens": 832758932.0, "step": 4959 }, { "entropy": 1.7353256543477376, "epoch": 0.5448902804097663, "grad_norm": 0.6854872107505798, "learning_rate": 1.7367073733831085e-05, "loss": 1.4275, "mean_token_accuracy": 0.6549768050511678, "num_tokens": 832967800.0, "step": 4960 }, { "entropy": 1.666805108388265, "epoch": 0.5450001373211392, "grad_norm": 0.6198726892471313, "learning_rate": 1.7365942018062216e-05, "loss": 1.567, "mean_token_accuracy": 0.6273992757002512, "num_tokens": 833157652.0, "step": 4961 }, { "entropy": 1.7501900096734364, "epoch": 0.5451099942325122, "grad_norm": 0.701242208480835, "learning_rate": 1.736481010081415e-05, "loss": 1.3403, "mean_token_accuracy": 0.6606934070587158, "num_tokens": 833284661.0, "step": 4962 }, { "entropy": 1.7042124271392822, "epoch": 0.545219851143885, "grad_norm": 0.7061309814453125, "learning_rate": 1.7363677982122713e-05, "loss": 1.4459, "mean_token_accuracy": 0.6599131673574448, "num_tokens": 833426934.0, "step": 4963 }, { "entropy": 1.670636922121048, "epoch": 0.545329708055258, "grad_norm": 0.6328370571136475, "learning_rate": 1.7362545662023735e-05, "loss": 1.3507, "mean_token_accuracy": 0.6591466615597407, "num_tokens": 833573679.0, "step": 4964 }, { "entropy": 1.6602637072404225, "epoch": 0.5454395649666309, "grad_norm": 0.6920685172080994, "learning_rate": 1.7361413140553058e-05, "loss": 1.2574, "mean_token_accuracy": 0.6768335302670797, "num_tokens": 833695555.0, "step": 4965 }, { "entropy": 1.740365246931712, "epoch": 0.5455494218780039, "grad_norm": 0.7330303192138672, "learning_rate": 1.7360280417746515e-05, "loss": 1.3766, "mean_token_accuracy": 0.6667214135328928, "num_tokens": 833814908.0, "step": 4966 }, { "entropy": 1.6844541629155476, "epoch": 0.5456592787893768, "grad_norm": 0.6612024307250977, "learning_rate": 1.7359147493639966e-05, "loss": 1.3186, "mean_token_accuracy": 0.662054200967153, "num_tokens": 833944687.0, "step": 4967 }, { "entropy": 1.706677258014679, "epoch": 0.5457691357007498, "grad_norm": 0.7018133401870728, "learning_rate": 1.7358014368269265e-05, "loss": 1.4785, "mean_token_accuracy": 0.6449083437522253, "num_tokens": 834118400.0, "step": 4968 }, { "entropy": 1.6965554157892864, "epoch": 0.5458789926121227, "grad_norm": 0.6533994078636169, "learning_rate": 1.735688104167027e-05, "loss": 1.4246, "mean_token_accuracy": 0.648577556014061, "num_tokens": 834269641.0, "step": 4969 }, { "entropy": 1.6984079976876576, "epoch": 0.5459888495234957, "grad_norm": 0.8152681589126587, "learning_rate": 1.735574751387886e-05, "loss": 1.2825, "mean_token_accuracy": 0.6712081631024679, "num_tokens": 834417936.0, "step": 4970 }, { "entropy": 1.630674570798874, "epoch": 0.5460987064348686, "grad_norm": 0.6994221806526184, "learning_rate": 1.7354613784930904e-05, "loss": 1.4286, "mean_token_accuracy": 0.6537977854410807, "num_tokens": 834607646.0, "step": 4971 }, { "entropy": 1.7504248122374217, "epoch": 0.5462085633462416, "grad_norm": 0.8393651247024536, "learning_rate": 1.7353479854862285e-05, "loss": 1.4681, "mean_token_accuracy": 0.6475894500811895, "num_tokens": 834760505.0, "step": 4972 }, { "entropy": 1.6946745415528615, "epoch": 0.5463184202576145, "grad_norm": 0.6594904661178589, "learning_rate": 1.735234572370889e-05, "loss": 1.4675, "mean_token_accuracy": 0.6494403878847758, "num_tokens": 834951630.0, "step": 4973 }, { "entropy": 1.677021066347758, "epoch": 0.5464282771689873, "grad_norm": 0.6898400187492371, "learning_rate": 1.735121139150662e-05, "loss": 1.4039, "mean_token_accuracy": 0.6618401060501734, "num_tokens": 835132695.0, "step": 4974 }, { "entropy": 1.6862121025721233, "epoch": 0.5465381340803603, "grad_norm": 0.7192554473876953, "learning_rate": 1.7350076858291363e-05, "loss": 1.452, "mean_token_accuracy": 0.6425764660040537, "num_tokens": 835332699.0, "step": 4975 }, { "entropy": 1.7146854003270466, "epoch": 0.5466479909917332, "grad_norm": 0.6000041365623474, "learning_rate": 1.734894212409904e-05, "loss": 1.38, "mean_token_accuracy": 0.6442343592643738, "num_tokens": 835491957.0, "step": 4976 }, { "entropy": 1.7366932928562164, "epoch": 0.5467578479031062, "grad_norm": 1.8402376174926758, "learning_rate": 1.734780718896556e-05, "loss": 1.1827, "mean_token_accuracy": 0.6654492169618607, "num_tokens": 835675120.0, "step": 4977 }, { "entropy": 1.7179711163043976, "epoch": 0.5468677048144791, "grad_norm": 0.7776644825935364, "learning_rate": 1.7346672052926838e-05, "loss": 1.3917, "mean_token_accuracy": 0.6652675569057465, "num_tokens": 835855578.0, "step": 4978 }, { "entropy": 1.7254438002904255, "epoch": 0.5469775617258521, "grad_norm": 0.7210192084312439, "learning_rate": 1.734553671601881e-05, "loss": 1.4128, "mean_token_accuracy": 0.6519081691900889, "num_tokens": 836044866.0, "step": 4979 }, { "entropy": 1.7057076493899028, "epoch": 0.547087418637225, "grad_norm": 0.7255688309669495, "learning_rate": 1.7344401178277405e-05, "loss": 1.3313, "mean_token_accuracy": 0.6629131535689036, "num_tokens": 836172342.0, "step": 4980 }, { "entropy": 1.764757553736369, "epoch": 0.547197275548598, "grad_norm": 0.9099174737930298, "learning_rate": 1.734326543973856e-05, "loss": 1.556, "mean_token_accuracy": 0.6452460636695226, "num_tokens": 836337102.0, "step": 4981 }, { "entropy": 1.7524670163790386, "epoch": 0.5473071324599709, "grad_norm": 0.8093464374542236, "learning_rate": 1.734212950043822e-05, "loss": 1.4775, "mean_token_accuracy": 0.6518103977044424, "num_tokens": 836471055.0, "step": 4982 }, { "entropy": 1.7805779079596202, "epoch": 0.5474169893713439, "grad_norm": 0.7849537134170532, "learning_rate": 1.7340993360412343e-05, "loss": 1.4111, "mean_token_accuracy": 0.6559980014959971, "num_tokens": 836607555.0, "step": 4983 }, { "entropy": 1.7531798581282299, "epoch": 0.5475268462827168, "grad_norm": 0.7503484487533569, "learning_rate": 1.733985701969688e-05, "loss": 1.5242, "mean_token_accuracy": 0.64345849553744, "num_tokens": 836777172.0, "step": 4984 }, { "entropy": 1.6708122690518696, "epoch": 0.5476367031940897, "grad_norm": 0.7763621807098389, "learning_rate": 1.73387204783278e-05, "loss": 1.3054, "mean_token_accuracy": 0.6630136370658875, "num_tokens": 836904265.0, "step": 4985 }, { "entropy": 1.6611828605333965, "epoch": 0.5477465601054626, "grad_norm": 0.7332755327224731, "learning_rate": 1.7337583736341077e-05, "loss": 1.4492, "mean_token_accuracy": 0.6425531009833018, "num_tokens": 837153610.0, "step": 4986 }, { "entropy": 1.6737177173296611, "epoch": 0.5478564170168355, "grad_norm": 0.7409669756889343, "learning_rate": 1.7336446793772682e-05, "loss": 1.3822, "mean_token_accuracy": 0.6608980546394984, "num_tokens": 837313064.0, "step": 4987 }, { "entropy": 1.687143345673879, "epoch": 0.5479662739282085, "grad_norm": 0.8636589646339417, "learning_rate": 1.73353096506586e-05, "loss": 1.2404, "mean_token_accuracy": 0.6764566948016485, "num_tokens": 837451066.0, "step": 4988 }, { "entropy": 1.649695744117101, "epoch": 0.5480761308395814, "grad_norm": 0.7815621495246887, "learning_rate": 1.733417230703482e-05, "loss": 1.3502, "mean_token_accuracy": 0.658309539159139, "num_tokens": 837614506.0, "step": 4989 }, { "entropy": 1.7112309634685516, "epoch": 0.5481859877509544, "grad_norm": 0.6810916066169739, "learning_rate": 1.7333034762937346e-05, "loss": 1.31, "mean_token_accuracy": 0.6706115355094274, "num_tokens": 837773388.0, "step": 4990 }, { "entropy": 1.719188928604126, "epoch": 0.5482958446623273, "grad_norm": 0.6780290007591248, "learning_rate": 1.7331897018402175e-05, "loss": 1.3669, "mean_token_accuracy": 0.6579122791687647, "num_tokens": 837943358.0, "step": 4991 }, { "entropy": 1.731730043888092, "epoch": 0.5484057015737003, "grad_norm": 0.6477614045143127, "learning_rate": 1.7330759073465317e-05, "loss": 1.4375, "mean_token_accuracy": 0.6566148449977239, "num_tokens": 838148527.0, "step": 4992 }, { "entropy": 1.7366572121779125, "epoch": 0.5485155584850732, "grad_norm": 0.7073691487312317, "learning_rate": 1.7329620928162785e-05, "loss": 1.4436, "mean_token_accuracy": 0.6603938837846121, "num_tokens": 838351432.0, "step": 4993 }, { "entropy": 1.710929661989212, "epoch": 0.5486254153964462, "grad_norm": 0.6895067691802979, "learning_rate": 1.7328482582530598e-05, "loss": 1.3865, "mean_token_accuracy": 0.660644123951594, "num_tokens": 838525802.0, "step": 4994 }, { "entropy": 1.7042444845040639, "epoch": 0.548735272307819, "grad_norm": 0.767922580242157, "learning_rate": 1.7327344036604796e-05, "loss": 1.3309, "mean_token_accuracy": 0.6714604794979095, "num_tokens": 838674351.0, "step": 4995 }, { "entropy": 1.6385613679885864, "epoch": 0.548845129219192, "grad_norm": 0.7014147639274597, "learning_rate": 1.7326205290421405e-05, "loss": 1.315, "mean_token_accuracy": 0.6713072061538696, "num_tokens": 838818241.0, "step": 4996 }, { "entropy": 1.731259047985077, "epoch": 0.5489549861305649, "grad_norm": 0.7416298389434814, "learning_rate": 1.7325066344016467e-05, "loss": 1.3877, "mean_token_accuracy": 0.6583247681458791, "num_tokens": 838993948.0, "step": 4997 }, { "entropy": 1.6808960835138957, "epoch": 0.5490648430419379, "grad_norm": 0.6791642308235168, "learning_rate": 1.732392719742603e-05, "loss": 1.425, "mean_token_accuracy": 0.6509095182021459, "num_tokens": 839149363.0, "step": 4998 }, { "entropy": 1.6175450483957927, "epoch": 0.5491746999533108, "grad_norm": 0.653907060623169, "learning_rate": 1.7322787850686143e-05, "loss": 1.3148, "mean_token_accuracy": 0.6778159439563751, "num_tokens": 839305980.0, "step": 4999 }, { "entropy": 1.6603448390960693, "epoch": 0.5492845568646838, "grad_norm": 0.6586391925811768, "learning_rate": 1.732164830383287e-05, "loss": 1.576, "mean_token_accuracy": 0.6371851215759913, "num_tokens": 839486072.0, "step": 5000 }, { "entropy": 1.6676330765088399, "epoch": 0.5493944137760567, "grad_norm": 0.7712739109992981, "learning_rate": 1.732050855690228e-05, "loss": 1.4334, "mean_token_accuracy": 0.654510036110878, "num_tokens": 839662761.0, "step": 5001 }, { "entropy": 1.78163543343544, "epoch": 0.5495042706874296, "grad_norm": 4.66588020324707, "learning_rate": 1.7319368609930442e-05, "loss": 1.4213, "mean_token_accuracy": 0.654133602976799, "num_tokens": 839814998.0, "step": 5002 }, { "entropy": 1.672917405764262, "epoch": 0.5496141275988026, "grad_norm": 0.6977851390838623, "learning_rate": 1.7318228462953436e-05, "loss": 1.3021, "mean_token_accuracy": 0.6800949474175771, "num_tokens": 839946153.0, "step": 5003 }, { "entropy": 1.7138892312844594, "epoch": 0.5497239845101755, "grad_norm": 0.6364522576332092, "learning_rate": 1.7317088116007347e-05, "loss": 1.2984, "mean_token_accuracy": 0.6754194498062134, "num_tokens": 840104761.0, "step": 5004 }, { "entropy": 1.7337297697861989, "epoch": 0.5498338414215485, "grad_norm": 0.6527485251426697, "learning_rate": 1.731594756912826e-05, "loss": 1.4891, "mean_token_accuracy": 0.6498339672883352, "num_tokens": 840271652.0, "step": 5005 }, { "entropy": 1.698456237713496, "epoch": 0.5499436983329213, "grad_norm": 0.7499955892562866, "learning_rate": 1.7314806822352283e-05, "loss": 1.2699, "mean_token_accuracy": 0.6654329647620519, "num_tokens": 840397079.0, "step": 5006 }, { "entropy": 1.6331544518470764, "epoch": 0.5500535552442943, "grad_norm": 0.6753321290016174, "learning_rate": 1.7313665875715513e-05, "loss": 1.2348, "mean_token_accuracy": 0.6808192729949951, "num_tokens": 840503621.0, "step": 5007 }, { "entropy": 1.7092650135358174, "epoch": 0.5501634121556672, "grad_norm": 0.7636615037918091, "learning_rate": 1.7312524729254066e-05, "loss": 1.5449, "mean_token_accuracy": 0.6385166347026825, "num_tokens": 840684309.0, "step": 5008 }, { "entropy": 1.702101041873296, "epoch": 0.5502732690670402, "grad_norm": 0.7472963333129883, "learning_rate": 1.7311383383004052e-05, "loss": 1.4984, "mean_token_accuracy": 0.6420136094093323, "num_tokens": 840943695.0, "step": 5009 }, { "entropy": 1.710230439901352, "epoch": 0.5503831259784131, "grad_norm": 0.7253463864326477, "learning_rate": 1.73102418370016e-05, "loss": 1.4516, "mean_token_accuracy": 0.6447204500436783, "num_tokens": 841164320.0, "step": 5010 }, { "entropy": 1.7154650886853535, "epoch": 0.5504929828897861, "grad_norm": 2.171661376953125, "learning_rate": 1.7309100091282837e-05, "loss": 1.4222, "mean_token_accuracy": 0.6575359304745992, "num_tokens": 841288359.0, "step": 5011 }, { "entropy": 1.7397755086421967, "epoch": 0.550602839801159, "grad_norm": 0.7591625452041626, "learning_rate": 1.7307958145883898e-05, "loss": 1.402, "mean_token_accuracy": 0.65840412179629, "num_tokens": 841437589.0, "step": 5012 }, { "entropy": 1.6358317236105602, "epoch": 0.550712696712532, "grad_norm": 0.617072582244873, "learning_rate": 1.730681600084093e-05, "loss": 1.3611, "mean_token_accuracy": 0.6660978297392527, "num_tokens": 841609979.0, "step": 5013 }, { "entropy": 1.6907884379227955, "epoch": 0.5508225536239049, "grad_norm": 0.6868788599967957, "learning_rate": 1.7305673656190074e-05, "loss": 1.4394, "mean_token_accuracy": 0.6435293157895406, "num_tokens": 841771066.0, "step": 5014 }, { "entropy": 1.7315944532553356, "epoch": 0.5509324105352778, "grad_norm": 0.6620607376098633, "learning_rate": 1.730453111196749e-05, "loss": 1.4044, "mean_token_accuracy": 0.6534575472275416, "num_tokens": 841932098.0, "step": 5015 }, { "entropy": 1.6816769540309906, "epoch": 0.5510422674466507, "grad_norm": 0.8678973913192749, "learning_rate": 1.7303388368209337e-05, "loss": 1.2427, "mean_token_accuracy": 0.6835728486378988, "num_tokens": 842056049.0, "step": 5016 }, { "entropy": 1.6632795631885529, "epoch": 0.5511521243580236, "grad_norm": 0.6091153025627136, "learning_rate": 1.7302245424951783e-05, "loss": 1.4623, "mean_token_accuracy": 0.6500665346781412, "num_tokens": 842275188.0, "step": 5017 }, { "entropy": 1.6769650379816692, "epoch": 0.5512619812693966, "grad_norm": 0.5689995288848877, "learning_rate": 1.7301102282231e-05, "loss": 1.4242, "mean_token_accuracy": 0.649879202246666, "num_tokens": 842487986.0, "step": 5018 }, { "entropy": 1.8732970456282299, "epoch": 0.5513718381807695, "grad_norm": 0.833006739616394, "learning_rate": 1.7299958940083168e-05, "loss": 1.5442, "mean_token_accuracy": 0.6393305758635203, "num_tokens": 842638311.0, "step": 5019 }, { "entropy": 1.7400578459103901, "epoch": 0.5514816950921425, "grad_norm": 0.6837904453277588, "learning_rate": 1.7298815398544474e-05, "loss": 1.3732, "mean_token_accuracy": 0.6732922891775767, "num_tokens": 842788496.0, "step": 5020 }, { "entropy": 1.735822359720866, "epoch": 0.5515915520035154, "grad_norm": 0.6274124979972839, "learning_rate": 1.729767165765111e-05, "loss": 1.3365, "mean_token_accuracy": 0.673115094502767, "num_tokens": 842980742.0, "step": 5021 }, { "entropy": 1.7332566777865093, "epoch": 0.5517014089148884, "grad_norm": 0.6682114601135254, "learning_rate": 1.7296527717439285e-05, "loss": 1.3757, "mean_token_accuracy": 0.6619204978148142, "num_tokens": 843135183.0, "step": 5022 }, { "entropy": 1.6546105941136677, "epoch": 0.5518112658262613, "grad_norm": 0.64713054895401, "learning_rate": 1.7295383577945183e-05, "loss": 1.4332, "mean_token_accuracy": 0.6506260534127554, "num_tokens": 843357807.0, "step": 5023 }, { "entropy": 1.6766623953978221, "epoch": 0.5519211227376343, "grad_norm": 0.6391339898109436, "learning_rate": 1.7294239239205036e-05, "loss": 1.4106, "mean_token_accuracy": 0.6604893952608109, "num_tokens": 843511591.0, "step": 5024 }, { "entropy": 1.7070300082365673, "epoch": 0.5520309796490072, "grad_norm": 0.6645405888557434, "learning_rate": 1.7293094701255052e-05, "loss": 1.4886, "mean_token_accuracy": 0.6455263296763102, "num_tokens": 843686858.0, "step": 5025 }, { "entropy": 1.7414699892203014, "epoch": 0.5521408365603802, "grad_norm": 0.7096126079559326, "learning_rate": 1.7291949964131454e-05, "loss": 1.5641, "mean_token_accuracy": 0.6493834306796392, "num_tokens": 843852202.0, "step": 5026 }, { "entropy": 1.7158599992593129, "epoch": 0.552250693471753, "grad_norm": 0.5695939660072327, "learning_rate": 1.7290805027870475e-05, "loss": 1.4462, "mean_token_accuracy": 0.6488019227981567, "num_tokens": 844059832.0, "step": 5027 }, { "entropy": 1.6897384027640026, "epoch": 0.5523605503831259, "grad_norm": 0.6646971702575684, "learning_rate": 1.7289659892508353e-05, "loss": 1.4142, "mean_token_accuracy": 0.6486612806717554, "num_tokens": 844275183.0, "step": 5028 }, { "entropy": 1.7032889624436696, "epoch": 0.5524704072944989, "grad_norm": 0.7060292959213257, "learning_rate": 1.728851455808133e-05, "loss": 1.2169, "mean_token_accuracy": 0.6796109775702158, "num_tokens": 844383058.0, "step": 5029 }, { "entropy": 1.7134819130102794, "epoch": 0.5525802642058718, "grad_norm": 0.7380111217498779, "learning_rate": 1.7287369024625652e-05, "loss": 1.5068, "mean_token_accuracy": 0.6495644648869833, "num_tokens": 844531120.0, "step": 5030 }, { "entropy": 1.6543967723846436, "epoch": 0.5526901211172448, "grad_norm": 0.6035121083259583, "learning_rate": 1.728622329217758e-05, "loss": 1.4167, "mean_token_accuracy": 0.6516775141159693, "num_tokens": 844707979.0, "step": 5031 }, { "entropy": 1.7068756620089214, "epoch": 0.5527999780286177, "grad_norm": 0.8700978755950928, "learning_rate": 1.7285077360773374e-05, "loss": 1.3426, "mean_token_accuracy": 0.6678166339794794, "num_tokens": 844852167.0, "step": 5032 }, { "entropy": 1.7389213939507802, "epoch": 0.5529098349399907, "grad_norm": 0.6411224603652954, "learning_rate": 1.7283931230449297e-05, "loss": 1.4793, "mean_token_accuracy": 0.6375206708908081, "num_tokens": 845043801.0, "step": 5033 }, { "entropy": 1.657431811094284, "epoch": 0.5530196918513636, "grad_norm": 0.6940959692001343, "learning_rate": 1.7282784901241632e-05, "loss": 1.2937, "mean_token_accuracy": 0.6709872682889303, "num_tokens": 845167525.0, "step": 5034 }, { "entropy": 1.7134600778420765, "epoch": 0.5531295487627366, "grad_norm": 0.7191624641418457, "learning_rate": 1.7281638373186655e-05, "loss": 1.4491, "mean_token_accuracy": 0.6502556055784225, "num_tokens": 845367554.0, "step": 5035 }, { "entropy": 1.6771671672662098, "epoch": 0.5532394056741095, "grad_norm": 0.6489148139953613, "learning_rate": 1.7280491646320654e-05, "loss": 1.3884, "mean_token_accuracy": 0.6516650716463724, "num_tokens": 845533357.0, "step": 5036 }, { "entropy": 1.716915915409724, "epoch": 0.5533492625854824, "grad_norm": 0.688566267490387, "learning_rate": 1.7279344720679924e-05, "loss": 1.4762, "mean_token_accuracy": 0.6542300681273142, "num_tokens": 845713492.0, "step": 5037 }, { "entropy": 1.6464048027992249, "epoch": 0.5534591194968553, "grad_norm": 0.6653256416320801, "learning_rate": 1.727819759630076e-05, "loss": 1.4109, "mean_token_accuracy": 0.662084236741066, "num_tokens": 845908415.0, "step": 5038 }, { "entropy": 1.663993815581004, "epoch": 0.5535689764082283, "grad_norm": 0.651810884475708, "learning_rate": 1.7277050273219477e-05, "loss": 1.4258, "mean_token_accuracy": 0.6560534288485845, "num_tokens": 846110806.0, "step": 5039 }, { "entropy": 1.7180224458376567, "epoch": 0.5536788333196012, "grad_norm": 0.6927412748336792, "learning_rate": 1.7275902751472375e-05, "loss": 1.34, "mean_token_accuracy": 0.6658701201279958, "num_tokens": 846291645.0, "step": 5040 }, { "entropy": 1.7129448254903157, "epoch": 0.5537886902309741, "grad_norm": 0.8613117337226868, "learning_rate": 1.7274755031095782e-05, "loss": 1.5887, "mean_token_accuracy": 0.6217222909132639, "num_tokens": 846451007.0, "step": 5041 }, { "entropy": 1.7207149465878804, "epoch": 0.5538985471423471, "grad_norm": 0.7448726892471313, "learning_rate": 1.727360711212602e-05, "loss": 1.3178, "mean_token_accuracy": 0.6566254794597626, "num_tokens": 846581772.0, "step": 5042 }, { "entropy": 1.67302605509758, "epoch": 0.55400840405372, "grad_norm": 0.7350447177886963, "learning_rate": 1.727245899459942e-05, "loss": 1.5016, "mean_token_accuracy": 0.6568493197361628, "num_tokens": 846760830.0, "step": 5043 }, { "entropy": 1.675746778647105, "epoch": 0.554118260965093, "grad_norm": 0.799920916557312, "learning_rate": 1.7271310678552316e-05, "loss": 1.3671, "mean_token_accuracy": 0.661032055815061, "num_tokens": 846907065.0, "step": 5044 }, { "entropy": 1.7543572783470154, "epoch": 0.5542281178764659, "grad_norm": 0.7802977561950684, "learning_rate": 1.7270162164021058e-05, "loss": 1.3284, "mean_token_accuracy": 0.6583187431097031, "num_tokens": 847073867.0, "step": 5045 }, { "entropy": 1.6700835426648457, "epoch": 0.5543379747878389, "grad_norm": 0.6491420269012451, "learning_rate": 1.726901345104199e-05, "loss": 1.4677, "mean_token_accuracy": 0.6603880474964777, "num_tokens": 847231745.0, "step": 5046 }, { "entropy": 1.7336505154768627, "epoch": 0.5544478316992117, "grad_norm": 0.6150977611541748, "learning_rate": 1.7267864539651476e-05, "loss": 1.4324, "mean_token_accuracy": 0.6436419288317362, "num_tokens": 847459685.0, "step": 5047 }, { "entropy": 1.6739269097646077, "epoch": 0.5545576886105847, "grad_norm": 0.6661935448646545, "learning_rate": 1.726671542988587e-05, "loss": 1.3945, "mean_token_accuracy": 0.6670573254426321, "num_tokens": 847611418.0, "step": 5048 }, { "entropy": 1.6247097651163738, "epoch": 0.5546675455219576, "grad_norm": 0.6128849983215332, "learning_rate": 1.7265566121781545e-05, "loss": 1.3081, "mean_token_accuracy": 0.6608265737692515, "num_tokens": 847802471.0, "step": 5049 }, { "entropy": 1.7174389859040577, "epoch": 0.5547774024333306, "grad_norm": 0.7090808153152466, "learning_rate": 1.7264416615374875e-05, "loss": 1.4592, "mean_token_accuracy": 0.640943189462026, "num_tokens": 847976449.0, "step": 5050 }, { "entropy": 1.7501269181569417, "epoch": 0.5548872593447035, "grad_norm": 0.7363408207893372, "learning_rate": 1.7263266910702247e-05, "loss": 1.3542, "mean_token_accuracy": 0.6537466496229172, "num_tokens": 848079456.0, "step": 5051 }, { "entropy": 1.6778157651424408, "epoch": 0.5549971162560765, "grad_norm": 0.600395917892456, "learning_rate": 1.7262117007800033e-05, "loss": 1.4896, "mean_token_accuracy": 0.6465341796477636, "num_tokens": 848310668.0, "step": 5052 }, { "entropy": 1.7230773468812306, "epoch": 0.5551069731674494, "grad_norm": 0.6006616950035095, "learning_rate": 1.726096690670465e-05, "loss": 1.3888, "mean_token_accuracy": 0.6528857400019964, "num_tokens": 848468063.0, "step": 5053 }, { "entropy": 1.7066966394583385, "epoch": 0.5552168300788224, "grad_norm": 0.69357830286026, "learning_rate": 1.7259816607452477e-05, "loss": 1.3105, "mean_token_accuracy": 0.6618163386980692, "num_tokens": 848617851.0, "step": 5054 }, { "entropy": 1.754871626694997, "epoch": 0.5553266869901953, "grad_norm": 0.7361278533935547, "learning_rate": 1.7258666110079933e-05, "loss": 1.4104, "mean_token_accuracy": 0.6681383550167084, "num_tokens": 848741009.0, "step": 5055 }, { "entropy": 1.7040483752886455, "epoch": 0.5554365439015682, "grad_norm": 0.6013309359550476, "learning_rate": 1.7257515414623427e-05, "loss": 1.3651, "mean_token_accuracy": 0.6659112522999445, "num_tokens": 848888733.0, "step": 5056 }, { "entropy": 1.6922965149084728, "epoch": 0.5555464008129412, "grad_norm": 0.7231853008270264, "learning_rate": 1.7256364521119377e-05, "loss": 1.4536, "mean_token_accuracy": 0.6473261117935181, "num_tokens": 849101982.0, "step": 5057 }, { "entropy": 1.6854231754938762, "epoch": 0.555656257724314, "grad_norm": 0.6253160238265991, "learning_rate": 1.7255213429604204e-05, "loss": 1.3522, "mean_token_accuracy": 0.6646686444679896, "num_tokens": 849240891.0, "step": 5058 }, { "entropy": 1.675849974155426, "epoch": 0.555766114635687, "grad_norm": 0.7990770936012268, "learning_rate": 1.725406214011435e-05, "loss": 1.3278, "mean_token_accuracy": 0.6695520381132761, "num_tokens": 849394620.0, "step": 5059 }, { "entropy": 1.7733904123306274, "epoch": 0.5558759715470599, "grad_norm": 0.6922385096549988, "learning_rate": 1.7252910652686248e-05, "loss": 1.459, "mean_token_accuracy": 0.6450273891290029, "num_tokens": 849547571.0, "step": 5060 }, { "entropy": 1.7009385426839192, "epoch": 0.5559858284584329, "grad_norm": 0.7180578112602234, "learning_rate": 1.725175896735634e-05, "loss": 1.4134, "mean_token_accuracy": 0.6490115920702616, "num_tokens": 849694441.0, "step": 5061 }, { "entropy": 1.6811227997144063, "epoch": 0.5560956853698058, "grad_norm": 0.8859359622001648, "learning_rate": 1.7250607084161078e-05, "loss": 1.467, "mean_token_accuracy": 0.636049841841062, "num_tokens": 849891733.0, "step": 5062 }, { "entropy": 1.700390100479126, "epoch": 0.5562055422811788, "grad_norm": 0.6678200364112854, "learning_rate": 1.724945500313692e-05, "loss": 1.3254, "mean_token_accuracy": 0.6696620285511017, "num_tokens": 850015651.0, "step": 5063 }, { "entropy": 1.6472013394037883, "epoch": 0.5563153991925517, "grad_norm": 0.6910000443458557, "learning_rate": 1.7248302724320324e-05, "loss": 1.4257, "mean_token_accuracy": 0.6657865395148596, "num_tokens": 850187479.0, "step": 5064 }, { "entropy": 1.6618298788865407, "epoch": 0.5564252561039247, "grad_norm": 0.6528242826461792, "learning_rate": 1.7247150247747765e-05, "loss": 1.3154, "mean_token_accuracy": 0.6672501713037491, "num_tokens": 850354624.0, "step": 5065 }, { "entropy": 1.718745857477188, "epoch": 0.5565351130152976, "grad_norm": 0.7377060651779175, "learning_rate": 1.724599757345571e-05, "loss": 1.5872, "mean_token_accuracy": 0.6395946790774664, "num_tokens": 850531179.0, "step": 5066 }, { "entropy": 1.6819844444592793, "epoch": 0.5566449699266706, "grad_norm": 0.6145383715629578, "learning_rate": 1.7244844701480654e-05, "loss": 1.3237, "mean_token_accuracy": 0.6742121378580729, "num_tokens": 850668747.0, "step": 5067 }, { "entropy": 1.6457071900367737, "epoch": 0.5567548268380434, "grad_norm": 0.6610442399978638, "learning_rate": 1.7243691631859075e-05, "loss": 1.3572, "mean_token_accuracy": 0.669839675227801, "num_tokens": 850875419.0, "step": 5068 }, { "entropy": 1.6283689141273499, "epoch": 0.5568646837494163, "grad_norm": 0.6071202754974365, "learning_rate": 1.7242538364627467e-05, "loss": 1.2843, "mean_token_accuracy": 0.6711755692958832, "num_tokens": 850996689.0, "step": 5069 }, { "entropy": 1.7722647686799367, "epoch": 0.5569745406607893, "grad_norm": 0.6648354530334473, "learning_rate": 1.7241384899822334e-05, "loss": 1.4833, "mean_token_accuracy": 0.6351625472307205, "num_tokens": 851177217.0, "step": 5070 }, { "entropy": 1.7500501374403636, "epoch": 0.5570843975721622, "grad_norm": 0.6843627691268921, "learning_rate": 1.724023123748018e-05, "loss": 1.3283, "mean_token_accuracy": 0.6687018970648447, "num_tokens": 851312948.0, "step": 5071 }, { "entropy": 1.730874131123225, "epoch": 0.5571942544835352, "grad_norm": 0.752149224281311, "learning_rate": 1.723907737763752e-05, "loss": 1.2589, "mean_token_accuracy": 0.6734604885180792, "num_tokens": 851434762.0, "step": 5072 }, { "entropy": 1.7467269003391266, "epoch": 0.5573041113949081, "grad_norm": 0.8023228049278259, "learning_rate": 1.7237923320330875e-05, "loss": 1.3556, "mean_token_accuracy": 0.6621319899956385, "num_tokens": 851601105.0, "step": 5073 }, { "entropy": 1.684466113646825, "epoch": 0.5574139683062811, "grad_norm": 0.8053759932518005, "learning_rate": 1.7236769065596765e-05, "loss": 1.3469, "mean_token_accuracy": 0.6632759322722753, "num_tokens": 851761746.0, "step": 5074 }, { "entropy": 1.7283134460449219, "epoch": 0.557523825217654, "grad_norm": 0.7793658971786499, "learning_rate": 1.7235614613471726e-05, "loss": 1.2974, "mean_token_accuracy": 0.6666330446799597, "num_tokens": 851894994.0, "step": 5075 }, { "entropy": 1.6891380151112874, "epoch": 0.557633682129027, "grad_norm": 0.6810115575790405, "learning_rate": 1.723445996399229e-05, "loss": 1.4283, "mean_token_accuracy": 0.6489190608263016, "num_tokens": 852019579.0, "step": 5076 }, { "entropy": 1.720542977253596, "epoch": 0.5577435390403999, "grad_norm": 0.7062191963195801, "learning_rate": 1.723330511719501e-05, "loss": 1.3266, "mean_token_accuracy": 0.6564305424690247, "num_tokens": 852170454.0, "step": 5077 }, { "entropy": 1.7275803287823994, "epoch": 0.5578533959517729, "grad_norm": 0.6995865702629089, "learning_rate": 1.7232150073116434e-05, "loss": 1.31, "mean_token_accuracy": 0.6622872352600098, "num_tokens": 852276883.0, "step": 5078 }, { "entropy": 1.735807627439499, "epoch": 0.5579632528631457, "grad_norm": 0.7578923106193542, "learning_rate": 1.7230994831793112e-05, "loss": 1.5665, "mean_token_accuracy": 0.6405892074108124, "num_tokens": 852553346.0, "step": 5079 }, { "entropy": 1.7293440699577332, "epoch": 0.5580731097745187, "grad_norm": 0.6468039751052856, "learning_rate": 1.722983939326161e-05, "loss": 1.3335, "mean_token_accuracy": 0.6611098150412241, "num_tokens": 852716230.0, "step": 5080 }, { "entropy": 1.6878082553545635, "epoch": 0.5581829666858916, "grad_norm": 0.6970882415771484, "learning_rate": 1.7228683757558506e-05, "loss": 1.4551, "mean_token_accuracy": 0.6598817507425944, "num_tokens": 852879187.0, "step": 5081 }, { "entropy": 1.6392480432987213, "epoch": 0.5582928235972645, "grad_norm": 0.6725665926933289, "learning_rate": 1.722752792472036e-05, "loss": 1.2592, "mean_token_accuracy": 0.6828918804725012, "num_tokens": 853012620.0, "step": 5082 }, { "entropy": 1.6959756811459858, "epoch": 0.5584026805086375, "grad_norm": 0.7385476231575012, "learning_rate": 1.7226371894783768e-05, "loss": 1.231, "mean_token_accuracy": 0.6803264965613683, "num_tokens": 853167262.0, "step": 5083 }, { "entropy": 1.6905551254749298, "epoch": 0.5585125374200104, "grad_norm": 0.6331995725631714, "learning_rate": 1.7225215667785305e-05, "loss": 1.3169, "mean_token_accuracy": 0.6700136860211691, "num_tokens": 853326108.0, "step": 5084 }, { "entropy": 1.7254391411940257, "epoch": 0.5586223943313834, "grad_norm": 0.7044715285301208, "learning_rate": 1.7224059243761572e-05, "loss": 1.4471, "mean_token_accuracy": 0.6458353300889333, "num_tokens": 853488209.0, "step": 5085 }, { "entropy": 1.7297268311182659, "epoch": 0.5587322512427563, "grad_norm": 0.7728154063224792, "learning_rate": 1.7222902622749173e-05, "loss": 1.281, "mean_token_accuracy": 0.6573230673869451, "num_tokens": 853610205.0, "step": 5086 }, { "entropy": 1.735485980908076, "epoch": 0.5588421081541293, "grad_norm": 0.837343692779541, "learning_rate": 1.7221745804784707e-05, "loss": 1.3709, "mean_token_accuracy": 0.6560803353786469, "num_tokens": 853751392.0, "step": 5087 }, { "entropy": 1.6720333397388458, "epoch": 0.5589519650655022, "grad_norm": 0.6489691138267517, "learning_rate": 1.722058878990479e-05, "loss": 1.4012, "mean_token_accuracy": 0.6520512402057648, "num_tokens": 853991504.0, "step": 5088 }, { "entropy": 1.694252997636795, "epoch": 0.5590618219768752, "grad_norm": 0.8448305130004883, "learning_rate": 1.721943157814604e-05, "loss": 1.4325, "mean_token_accuracy": 0.6456566154956818, "num_tokens": 854165712.0, "step": 5089 }, { "entropy": 1.750052273273468, "epoch": 0.559171678888248, "grad_norm": 0.6790991425514221, "learning_rate": 1.7218274169545082e-05, "loss": 1.4332, "mean_token_accuracy": 0.6550088077783585, "num_tokens": 854287722.0, "step": 5090 }, { "entropy": 1.7848297357559204, "epoch": 0.559281535799621, "grad_norm": 0.6880961060523987, "learning_rate": 1.721711656413855e-05, "loss": 1.3807, "mean_token_accuracy": 0.655860627690951, "num_tokens": 854402682.0, "step": 5091 }, { "entropy": 1.7680182953675587, "epoch": 0.5593913927109939, "grad_norm": 0.7541852593421936, "learning_rate": 1.7215958761963085e-05, "loss": 1.4438, "mean_token_accuracy": 0.659633050362269, "num_tokens": 854582653.0, "step": 5092 }, { "entropy": 1.743993620077769, "epoch": 0.5595012496223669, "grad_norm": 0.7386744618415833, "learning_rate": 1.7214800763055323e-05, "loss": 1.3355, "mean_token_accuracy": 0.655690461397171, "num_tokens": 854733417.0, "step": 5093 }, { "entropy": 1.7236657241980236, "epoch": 0.5596111065337398, "grad_norm": 0.6876170039176941, "learning_rate": 1.7213642567451917e-05, "loss": 1.5466, "mean_token_accuracy": 0.6431126991907755, "num_tokens": 854876880.0, "step": 5094 }, { "entropy": 1.6323457062244415, "epoch": 0.5597209634451128, "grad_norm": 0.6821046471595764, "learning_rate": 1.7212484175189522e-05, "loss": 1.5167, "mean_token_accuracy": 0.6490619430939356, "num_tokens": 855089649.0, "step": 5095 }, { "entropy": 1.6829971273740132, "epoch": 0.5598308203564857, "grad_norm": 0.698646605014801, "learning_rate": 1.7211325586304802e-05, "loss": 1.4024, "mean_token_accuracy": 0.6564787675937017, "num_tokens": 855301819.0, "step": 5096 }, { "entropy": 1.6667365928490956, "epoch": 0.5599406772678586, "grad_norm": 0.6255282759666443, "learning_rate": 1.721016680083443e-05, "loss": 1.4059, "mean_token_accuracy": 0.6555192569891611, "num_tokens": 855474924.0, "step": 5097 }, { "entropy": 1.7034937342007954, "epoch": 0.5600505341792316, "grad_norm": 0.6177133321762085, "learning_rate": 1.7209007818815074e-05, "loss": 1.4076, "mean_token_accuracy": 0.6450115591287613, "num_tokens": 855642413.0, "step": 5098 }, { "entropy": 1.6558322707811992, "epoch": 0.5601603910906044, "grad_norm": 0.7308685779571533, "learning_rate": 1.720784864028342e-05, "loss": 1.4158, "mean_token_accuracy": 0.6546875933806101, "num_tokens": 855823504.0, "step": 5099 }, { "entropy": 1.7279444734255474, "epoch": 0.5602702480019774, "grad_norm": 0.6921755075454712, "learning_rate": 1.720668926527615e-05, "loss": 1.3391, "mean_token_accuracy": 0.6614320774873098, "num_tokens": 855945864.0, "step": 5100 }, { "entropy": 1.678322970867157, "epoch": 0.5603801049133503, "grad_norm": 0.57282555103302, "learning_rate": 1.7205529693829965e-05, "loss": 1.4324, "mean_token_accuracy": 0.6524877349535624, "num_tokens": 856171543.0, "step": 5101 }, { "entropy": 1.712640792131424, "epoch": 0.5604899618247233, "grad_norm": 0.6682943105697632, "learning_rate": 1.720436992598156e-05, "loss": 1.4408, "mean_token_accuracy": 0.6717189103364944, "num_tokens": 856322857.0, "step": 5102 }, { "entropy": 1.7849741280078888, "epoch": 0.5605998187360962, "grad_norm": 0.8261640667915344, "learning_rate": 1.7203209961767646e-05, "loss": 1.4446, "mean_token_accuracy": 0.6582596053679785, "num_tokens": 856486604.0, "step": 5103 }, { "entropy": 1.7254948616027832, "epoch": 0.5607096756474692, "grad_norm": 0.6827483177185059, "learning_rate": 1.720204980122493e-05, "loss": 1.6162, "mean_token_accuracy": 0.6409385999043783, "num_tokens": 856730292.0, "step": 5104 }, { "entropy": 1.7715973059336345, "epoch": 0.5608195325588421, "grad_norm": 0.8116368055343628, "learning_rate": 1.720088944439013e-05, "loss": 1.3775, "mean_token_accuracy": 0.6552746097246805, "num_tokens": 856885143.0, "step": 5105 }, { "entropy": 1.7679732938607533, "epoch": 0.5609293894702151, "grad_norm": 0.6571024060249329, "learning_rate": 1.7199728891299974e-05, "loss": 1.3537, "mean_token_accuracy": 0.6583919723828634, "num_tokens": 857019977.0, "step": 5106 }, { "entropy": 1.6773741841316223, "epoch": 0.561039246381588, "grad_norm": 0.5981674790382385, "learning_rate": 1.7198568141991193e-05, "loss": 1.4617, "mean_token_accuracy": 0.6360589961210886, "num_tokens": 857227441.0, "step": 5107 }, { "entropy": 1.6567221482594807, "epoch": 0.561149103292961, "grad_norm": 0.7425564527511597, "learning_rate": 1.7197407196500525e-05, "loss": 1.3022, "mean_token_accuracy": 0.6754785428444544, "num_tokens": 857391082.0, "step": 5108 }, { "entropy": 1.7228084901968639, "epoch": 0.5612589602043339, "grad_norm": 0.6551631093025208, "learning_rate": 1.7196246054864708e-05, "loss": 1.593, "mean_token_accuracy": 0.6327783366044363, "num_tokens": 857596607.0, "step": 5109 }, { "entropy": 1.7758533358573914, "epoch": 0.5613688171157067, "grad_norm": 0.6851291656494141, "learning_rate": 1.71950847171205e-05, "loss": 1.4648, "mean_token_accuracy": 0.6309924423694611, "num_tokens": 857772597.0, "step": 5110 }, { "entropy": 1.76680189371109, "epoch": 0.5614786740270797, "grad_norm": 0.7714706063270569, "learning_rate": 1.719392318330465e-05, "loss": 1.3437, "mean_token_accuracy": 0.6618274201949438, "num_tokens": 857905051.0, "step": 5111 }, { "entropy": 1.645568698644638, "epoch": 0.5615885309384526, "grad_norm": 0.589038074016571, "learning_rate": 1.7192761453453924e-05, "loss": 1.3992, "mean_token_accuracy": 0.6517779429753622, "num_tokens": 858079797.0, "step": 5112 }, { "entropy": 1.7456736266613007, "epoch": 0.5616983878498256, "grad_norm": 0.7510016560554504, "learning_rate": 1.719159952760509e-05, "loss": 1.3539, "mean_token_accuracy": 0.6672643373409907, "num_tokens": 858237023.0, "step": 5113 }, { "entropy": 1.6943009197711945, "epoch": 0.5618082447611985, "grad_norm": 0.646049976348877, "learning_rate": 1.7190437405794917e-05, "loss": 1.3997, "mean_token_accuracy": 0.6529371738433838, "num_tokens": 858452256.0, "step": 5114 }, { "entropy": 1.7592908143997192, "epoch": 0.5619181016725715, "grad_norm": 0.6745445728302002, "learning_rate": 1.718927508806019e-05, "loss": 1.3, "mean_token_accuracy": 0.6665283391873041, "num_tokens": 858604338.0, "step": 5115 }, { "entropy": 1.74533345301946, "epoch": 0.5620279585839444, "grad_norm": 0.7311209440231323, "learning_rate": 1.7188112574437696e-05, "loss": 1.3826, "mean_token_accuracy": 0.6617701351642609, "num_tokens": 858738333.0, "step": 5116 }, { "entropy": 1.6821747322877247, "epoch": 0.5621378154953174, "grad_norm": 0.6801566481590271, "learning_rate": 1.7186949864964225e-05, "loss": 1.4588, "mean_token_accuracy": 0.6743607322374979, "num_tokens": 858926628.0, "step": 5117 }, { "entropy": 1.6699562072753906, "epoch": 0.5622476724066903, "grad_norm": 0.7358706593513489, "learning_rate": 1.718578695967658e-05, "loss": 1.3404, "mean_token_accuracy": 0.6704433461030325, "num_tokens": 859072049.0, "step": 5118 }, { "entropy": 1.6408792237440746, "epoch": 0.5623575293180633, "grad_norm": 0.717157781124115, "learning_rate": 1.718462385861157e-05, "loss": 1.2962, "mean_token_accuracy": 0.6739445279041926, "num_tokens": 859230044.0, "step": 5119 }, { "entropy": 1.7437108953793843, "epoch": 0.5624673862294362, "grad_norm": 1.0900483131408691, "learning_rate": 1.7183460561806e-05, "loss": 1.5626, "mean_token_accuracy": 0.6493689517180125, "num_tokens": 859372507.0, "step": 5120 }, { "entropy": 1.6718702812989552, "epoch": 0.5625772431408091, "grad_norm": 0.7479756474494934, "learning_rate": 1.718229706929669e-05, "loss": 1.2736, "mean_token_accuracy": 0.6695879648129145, "num_tokens": 859517643.0, "step": 5121 }, { "entropy": 1.6360397239526112, "epoch": 0.562687100052182, "grad_norm": 0.7399976849555969, "learning_rate": 1.718113338112046e-05, "loss": 1.379, "mean_token_accuracy": 0.6537938465674719, "num_tokens": 859682232.0, "step": 5122 }, { "entropy": 1.6917523245016735, "epoch": 0.5627969569635549, "grad_norm": 0.7607491612434387, "learning_rate": 1.7179969497314145e-05, "loss": 1.4321, "mean_token_accuracy": 0.6621414522329966, "num_tokens": 859806144.0, "step": 5123 }, { "entropy": 1.7270215352376301, "epoch": 0.5629068138749279, "grad_norm": 0.663026750087738, "learning_rate": 1.7178805417914576e-05, "loss": 1.3397, "mean_token_accuracy": 0.666211391488711, "num_tokens": 859949673.0, "step": 5124 }, { "entropy": 1.6595263083775837, "epoch": 0.5630166707863008, "grad_norm": 0.6144124865531921, "learning_rate": 1.7177641142958604e-05, "loss": 1.2972, "mean_token_accuracy": 0.6727373351653417, "num_tokens": 860090516.0, "step": 5125 }, { "entropy": 1.7171042064825695, "epoch": 0.5631265276976738, "grad_norm": 0.7884184122085571, "learning_rate": 1.7176476672483077e-05, "loss": 1.4505, "mean_token_accuracy": 0.6440162112315496, "num_tokens": 860271153.0, "step": 5126 }, { "entropy": 1.6738516787687938, "epoch": 0.5632363846090467, "grad_norm": 0.67924964427948, "learning_rate": 1.717531200652484e-05, "loss": 1.3207, "mean_token_accuracy": 0.6723797023296356, "num_tokens": 860404625.0, "step": 5127 }, { "entropy": 1.7053898572921753, "epoch": 0.5633462415204197, "grad_norm": 0.6389914155006409, "learning_rate": 1.7174147145120766e-05, "loss": 1.3741, "mean_token_accuracy": 0.6581660558780035, "num_tokens": 860625943.0, "step": 5128 }, { "entropy": 1.6907204886277516, "epoch": 0.5634560984317926, "grad_norm": 0.7094506621360779, "learning_rate": 1.7172982088307715e-05, "loss": 1.3378, "mean_token_accuracy": 0.6622174034516016, "num_tokens": 860803832.0, "step": 5129 }, { "entropy": 1.6670372982819874, "epoch": 0.5635659553431656, "grad_norm": 0.8901845216751099, "learning_rate": 1.717181683612256e-05, "loss": 1.4248, "mean_token_accuracy": 0.6624594082434972, "num_tokens": 860978869.0, "step": 5130 }, { "entropy": 1.667426864306132, "epoch": 0.5636758122545384, "grad_norm": 0.7643829584121704, "learning_rate": 1.717065138860219e-05, "loss": 1.2617, "mean_token_accuracy": 0.6825516323248545, "num_tokens": 861113828.0, "step": 5131 }, { "entropy": 1.790122111638387, "epoch": 0.5637856691659114, "grad_norm": 0.8605037331581116, "learning_rate": 1.7169485745783475e-05, "loss": 1.433, "mean_token_accuracy": 0.6626231670379639, "num_tokens": 861276229.0, "step": 5132 }, { "entropy": 1.7115785876909893, "epoch": 0.5638955260772843, "grad_norm": 0.7499393820762634, "learning_rate": 1.716831990770332e-05, "loss": 1.3592, "mean_token_accuracy": 0.6678246607383093, "num_tokens": 861403520.0, "step": 5133 }, { "entropy": 1.7232838968435924, "epoch": 0.5640053829886573, "grad_norm": 0.6720132827758789, "learning_rate": 1.7167153874398622e-05, "loss": 1.4723, "mean_token_accuracy": 0.6535212695598602, "num_tokens": 861577011.0, "step": 5134 }, { "entropy": 1.7426664630572002, "epoch": 0.5641152399000302, "grad_norm": 0.7186594605445862, "learning_rate": 1.716598764590628e-05, "loss": 1.4079, "mean_token_accuracy": 0.6688724607229233, "num_tokens": 861725353.0, "step": 5135 }, { "entropy": 1.6872510810693104, "epoch": 0.5642250968114031, "grad_norm": 0.7637690305709839, "learning_rate": 1.7164821222263207e-05, "loss": 1.186, "mean_token_accuracy": 0.6860415786504745, "num_tokens": 861893900.0, "step": 5136 }, { "entropy": 1.7718837360541027, "epoch": 0.5643349537227761, "grad_norm": 0.7247793674468994, "learning_rate": 1.7163654603506327e-05, "loss": 1.6064, "mean_token_accuracy": 0.6367716689904531, "num_tokens": 862109571.0, "step": 5137 }, { "entropy": 1.7636443078517914, "epoch": 0.564444810634149, "grad_norm": 0.7421050071716309, "learning_rate": 1.716248778967255e-05, "loss": 1.4571, "mean_token_accuracy": 0.6570049126942953, "num_tokens": 862250376.0, "step": 5138 }, { "entropy": 1.7156391243139903, "epoch": 0.564554667545522, "grad_norm": 0.7368531227111816, "learning_rate": 1.7161320780798812e-05, "loss": 1.4297, "mean_token_accuracy": 0.6491710195938746, "num_tokens": 862402788.0, "step": 5139 }, { "entropy": 1.7167876561482747, "epoch": 0.5646645244568949, "grad_norm": 0.6028063297271729, "learning_rate": 1.716015357692205e-05, "loss": 1.4445, "mean_token_accuracy": 0.6677204618851343, "num_tokens": 862608011.0, "step": 5140 }, { "entropy": 1.6989375948905945, "epoch": 0.5647743813682679, "grad_norm": 0.8523213267326355, "learning_rate": 1.71589861780792e-05, "loss": 1.3444, "mean_token_accuracy": 0.6717568387587866, "num_tokens": 862778996.0, "step": 5141 }, { "entropy": 1.6621976296106975, "epoch": 0.5648842382796407, "grad_norm": 0.6297332048416138, "learning_rate": 1.715781858430721e-05, "loss": 1.4134, "mean_token_accuracy": 0.6557877908150355, "num_tokens": 862939813.0, "step": 5142 }, { "entropy": 1.6616567373275757, "epoch": 0.5649940951910137, "grad_norm": 0.6319537162780762, "learning_rate": 1.7156650795643043e-05, "loss": 1.3247, "mean_token_accuracy": 0.6654583762089411, "num_tokens": 863113346.0, "step": 5143 }, { "entropy": 1.7592324515183766, "epoch": 0.5651039521023866, "grad_norm": 0.6727480888366699, "learning_rate": 1.715548281212365e-05, "loss": 1.4165, "mean_token_accuracy": 0.6434768736362457, "num_tokens": 863291829.0, "step": 5144 }, { "entropy": 1.7069471180438995, "epoch": 0.5652138090137596, "grad_norm": 0.6831556558609009, "learning_rate": 1.7154314633785997e-05, "loss": 1.4489, "mean_token_accuracy": 0.6467359215021133, "num_tokens": 863514790.0, "step": 5145 }, { "entropy": 1.6704001724720001, "epoch": 0.5653236659251325, "grad_norm": 0.6945511102676392, "learning_rate": 1.7153146260667064e-05, "loss": 1.2975, "mean_token_accuracy": 0.6630304008722305, "num_tokens": 863686919.0, "step": 5146 }, { "entropy": 1.6741840541362762, "epoch": 0.5654335228365055, "grad_norm": 0.7052369713783264, "learning_rate": 1.7151977692803824e-05, "loss": 1.3397, "mean_token_accuracy": 0.6724216043949127, "num_tokens": 863810676.0, "step": 5147 }, { "entropy": 1.6889431178569794, "epoch": 0.5655433797478784, "grad_norm": 0.7486838698387146, "learning_rate": 1.715080893023326e-05, "loss": 1.3683, "mean_token_accuracy": 0.654551774263382, "num_tokens": 863981409.0, "step": 5148 }, { "entropy": 1.7260994116465251, "epoch": 0.5656532366592514, "grad_norm": 0.776213526725769, "learning_rate": 1.7149639972992363e-05, "loss": 1.3205, "mean_token_accuracy": 0.6820149670044581, "num_tokens": 864179470.0, "step": 5149 }, { "entropy": 1.614399919907252, "epoch": 0.5657630935706243, "grad_norm": 0.7526430487632751, "learning_rate": 1.7148470821118135e-05, "loss": 1.118, "mean_token_accuracy": 0.6992814292510351, "num_tokens": 864298750.0, "step": 5150 }, { "entropy": 1.708221822977066, "epoch": 0.5658729504819972, "grad_norm": 0.7361465692520142, "learning_rate": 1.7147301474647577e-05, "loss": 1.2939, "mean_token_accuracy": 0.6688221096992493, "num_tokens": 864423445.0, "step": 5151 }, { "entropy": 1.673103392124176, "epoch": 0.5659828073933701, "grad_norm": 0.7637960314750671, "learning_rate": 1.7146131933617695e-05, "loss": 1.4075, "mean_token_accuracy": 0.6531636367241541, "num_tokens": 864600118.0, "step": 5152 }, { "entropy": 1.6958302358786266, "epoch": 0.566092664304743, "grad_norm": 0.6391355395317078, "learning_rate": 1.7144962198065507e-05, "loss": 1.323, "mean_token_accuracy": 0.6684149752060572, "num_tokens": 864750929.0, "step": 5153 }, { "entropy": 1.6995809276898701, "epoch": 0.566202521216116, "grad_norm": 0.7472272515296936, "learning_rate": 1.7143792268028036e-05, "loss": 1.3566, "mean_token_accuracy": 0.6624608635902405, "num_tokens": 864923982.0, "step": 5154 }, { "entropy": 1.708970695734024, "epoch": 0.5663123781274889, "grad_norm": 0.7742936611175537, "learning_rate": 1.7142622143542307e-05, "loss": 1.3687, "mean_token_accuracy": 0.657701775431633, "num_tokens": 865069429.0, "step": 5155 }, { "entropy": 1.6366569598515828, "epoch": 0.5664222350388619, "grad_norm": 0.6137021780014038, "learning_rate": 1.7141451824645356e-05, "loss": 1.3238, "mean_token_accuracy": 0.6683499167362849, "num_tokens": 865216437.0, "step": 5156 }, { "entropy": 1.698869526386261, "epoch": 0.5665320919502348, "grad_norm": 0.7175676822662354, "learning_rate": 1.714028131137422e-05, "loss": 1.4583, "mean_token_accuracy": 0.6476651877164841, "num_tokens": 865468974.0, "step": 5157 }, { "entropy": 1.7646108369032543, "epoch": 0.5666419488616078, "grad_norm": 0.6280926465988159, "learning_rate": 1.713911060376595e-05, "loss": 1.3422, "mean_token_accuracy": 0.6493685891230901, "num_tokens": 865631225.0, "step": 5158 }, { "entropy": 1.701049913962682, "epoch": 0.5667518057729807, "grad_norm": 0.764488935470581, "learning_rate": 1.7137939701857593e-05, "loss": 1.4099, "mean_token_accuracy": 0.671028807759285, "num_tokens": 865778041.0, "step": 5159 }, { "entropy": 1.7382206519444783, "epoch": 0.5668616626843537, "grad_norm": 0.8662286996841431, "learning_rate": 1.713676860568621e-05, "loss": 1.5012, "mean_token_accuracy": 0.6548273215691248, "num_tokens": 865997091.0, "step": 5160 }, { "entropy": 1.7540892759958904, "epoch": 0.5669715195957266, "grad_norm": 0.759167492389679, "learning_rate": 1.7135597315288873e-05, "loss": 1.2949, "mean_token_accuracy": 0.6608439882596334, "num_tokens": 866148237.0, "step": 5161 }, { "entropy": 1.7543078362941742, "epoch": 0.5670813765070996, "grad_norm": 0.6145092844963074, "learning_rate": 1.7134425830702638e-05, "loss": 1.4108, "mean_token_accuracy": 0.6426503856976827, "num_tokens": 866347643.0, "step": 5162 }, { "entropy": 1.7160977522532146, "epoch": 0.5671912334184724, "grad_norm": 0.6957924365997314, "learning_rate": 1.7133254151964594e-05, "loss": 1.4157, "mean_token_accuracy": 0.6433221797148386, "num_tokens": 866546498.0, "step": 5163 }, { "entropy": 1.7012445231278737, "epoch": 0.5673010903298453, "grad_norm": 0.6757133603096008, "learning_rate": 1.7132082279111816e-05, "loss": 1.3596, "mean_token_accuracy": 0.6617651581764221, "num_tokens": 866681108.0, "step": 5164 }, { "entropy": 1.6479481756687164, "epoch": 0.5674109472412183, "grad_norm": 0.6679365634918213, "learning_rate": 1.71309102121814e-05, "loss": 1.2512, "mean_token_accuracy": 0.6751443793376287, "num_tokens": 866783889.0, "step": 5165 }, { "entropy": 1.6679150362809498, "epoch": 0.5675208041525912, "grad_norm": 0.6778741478919983, "learning_rate": 1.712973795121044e-05, "loss": 1.4702, "mean_token_accuracy": 0.6485533167918524, "num_tokens": 866960522.0, "step": 5166 }, { "entropy": 1.7731144726276398, "epoch": 0.5676306610639642, "grad_norm": 0.8737553358078003, "learning_rate": 1.712856549623603e-05, "loss": 1.5449, "mean_token_accuracy": 0.646806518236796, "num_tokens": 867142270.0, "step": 5167 }, { "entropy": 1.7592595716317494, "epoch": 0.5677405179753371, "grad_norm": 0.6949407458305359, "learning_rate": 1.7127392847295286e-05, "loss": 1.4931, "mean_token_accuracy": 0.6439671516418457, "num_tokens": 867304938.0, "step": 5168 }, { "entropy": 1.6744161943594615, "epoch": 0.5678503748867101, "grad_norm": 0.7399938702583313, "learning_rate": 1.7126220004425324e-05, "loss": 1.3912, "mean_token_accuracy": 0.6706068366765976, "num_tokens": 867469987.0, "step": 5169 }, { "entropy": 1.6979783276716869, "epoch": 0.567960231798083, "grad_norm": 0.7100719809532166, "learning_rate": 1.7125046967663255e-05, "loss": 1.3621, "mean_token_accuracy": 0.6856355915466944, "num_tokens": 867633028.0, "step": 5170 }, { "entropy": 1.6883835395177205, "epoch": 0.568070088709456, "grad_norm": 0.6703440546989441, "learning_rate": 1.7123873737046207e-05, "loss": 1.4014, "mean_token_accuracy": 0.6576645423968633, "num_tokens": 867826775.0, "step": 5171 }, { "entropy": 1.6956737736860912, "epoch": 0.5681799456208289, "grad_norm": 0.5624609589576721, "learning_rate": 1.7122700312611324e-05, "loss": 1.435, "mean_token_accuracy": 0.6521128962437311, "num_tokens": 868003547.0, "step": 5172 }, { "entropy": 1.7059629559516907, "epoch": 0.5682898025322018, "grad_norm": 0.6375492215156555, "learning_rate": 1.7121526694395726e-05, "loss": 1.4911, "mean_token_accuracy": 0.6470068991184235, "num_tokens": 868185524.0, "step": 5173 }, { "entropy": 1.7254150609175365, "epoch": 0.5683996594435747, "grad_norm": 0.7593937516212463, "learning_rate": 1.712035288243657e-05, "loss": 1.4746, "mean_token_accuracy": 0.6589858829975128, "num_tokens": 868345630.0, "step": 5174 }, { "entropy": 1.7823002735773723, "epoch": 0.5685095163549477, "grad_norm": 0.6700795888900757, "learning_rate": 1.7119178876771004e-05, "loss": 1.4431, "mean_token_accuracy": 0.6534734417994817, "num_tokens": 868487484.0, "step": 5175 }, { "entropy": 1.7280798256397247, "epoch": 0.5686193732663206, "grad_norm": 0.6447996497154236, "learning_rate": 1.711800467743618e-05, "loss": 1.43, "mean_token_accuracy": 0.649931788444519, "num_tokens": 868665888.0, "step": 5176 }, { "entropy": 1.6930799186229706, "epoch": 0.5687292301776935, "grad_norm": 0.6100133061408997, "learning_rate": 1.711683028446927e-05, "loss": 1.4271, "mean_token_accuracy": 0.6684134354194006, "num_tokens": 868854573.0, "step": 5177 }, { "entropy": 1.6725496153036754, "epoch": 0.5688390870890665, "grad_norm": 0.6703057885169983, "learning_rate": 1.7115655697907437e-05, "loss": 1.3392, "mean_token_accuracy": 0.6644681443770727, "num_tokens": 869022423.0, "step": 5178 }, { "entropy": 1.6850597560405731, "epoch": 0.5689489440004394, "grad_norm": 0.7215490937232971, "learning_rate": 1.7114480917787854e-05, "loss": 1.3246, "mean_token_accuracy": 0.675572469830513, "num_tokens": 869164793.0, "step": 5179 }, { "entropy": 1.7287063002586365, "epoch": 0.5690588009118124, "grad_norm": 0.7284601330757141, "learning_rate": 1.7113305944147705e-05, "loss": 1.3834, "mean_token_accuracy": 0.6688296049833298, "num_tokens": 869319413.0, "step": 5180 }, { "entropy": 1.7300913234551747, "epoch": 0.5691686578231853, "grad_norm": 0.8334223031997681, "learning_rate": 1.711213077702418e-05, "loss": 1.4058, "mean_token_accuracy": 0.6505585461854935, "num_tokens": 869499491.0, "step": 5181 }, { "entropy": 1.6942098836104076, "epoch": 0.5692785147345583, "grad_norm": 0.5407121181488037, "learning_rate": 1.711095541645447e-05, "loss": 1.4164, "mean_token_accuracy": 0.6361008981863657, "num_tokens": 869772386.0, "step": 5182 }, { "entropy": 1.768281082312266, "epoch": 0.5693883716459311, "grad_norm": 0.7168862819671631, "learning_rate": 1.7109779862475773e-05, "loss": 1.3441, "mean_token_accuracy": 0.6586054215828577, "num_tokens": 869944947.0, "step": 5183 }, { "entropy": 1.5970544119675953, "epoch": 0.5694982285573041, "grad_norm": 0.6595725417137146, "learning_rate": 1.7108604115125298e-05, "loss": 1.3401, "mean_token_accuracy": 0.6841567407051722, "num_tokens": 870131433.0, "step": 5184 }, { "entropy": 1.7234369615713756, "epoch": 0.569608085468677, "grad_norm": 0.687660276889801, "learning_rate": 1.7107428174440254e-05, "loss": 1.2686, "mean_token_accuracy": 0.6720225811004639, "num_tokens": 870280419.0, "step": 5185 }, { "entropy": 1.667283058166504, "epoch": 0.56971794238005, "grad_norm": 0.6603217124938965, "learning_rate": 1.710625204045786e-05, "loss": 1.4857, "mean_token_accuracy": 0.6498481879631678, "num_tokens": 870435215.0, "step": 5186 }, { "entropy": 1.6752402385075886, "epoch": 0.5698277992914229, "grad_norm": 0.6785051226615906, "learning_rate": 1.7105075713215343e-05, "loss": 1.4667, "mean_token_accuracy": 0.6571814368168513, "num_tokens": 870635341.0, "step": 5187 }, { "entropy": 1.761198987563451, "epoch": 0.5699376562027959, "grad_norm": 0.8082736134529114, "learning_rate": 1.710389919274993e-05, "loss": 1.4905, "mean_token_accuracy": 0.6487453877925873, "num_tokens": 870824878.0, "step": 5188 }, { "entropy": 1.6660157044728596, "epoch": 0.5700475131141688, "grad_norm": 0.6341331601142883, "learning_rate": 1.7102722479098855e-05, "loss": 1.5113, "mean_token_accuracy": 0.6298314034938812, "num_tokens": 871034276.0, "step": 5189 }, { "entropy": 1.6461931069691975, "epoch": 0.5701573700255417, "grad_norm": 0.6263504028320312, "learning_rate": 1.7101545572299368e-05, "loss": 1.3666, "mean_token_accuracy": 0.6588836163282394, "num_tokens": 871199755.0, "step": 5190 }, { "entropy": 1.7178466320037842, "epoch": 0.5702672269369147, "grad_norm": 0.7136973142623901, "learning_rate": 1.710036847238871e-05, "loss": 1.3612, "mean_token_accuracy": 0.6669484178225199, "num_tokens": 871378421.0, "step": 5191 }, { "entropy": 1.7081640462080638, "epoch": 0.5703770838482876, "grad_norm": 0.6606138348579407, "learning_rate": 1.709919117940414e-05, "loss": 1.3281, "mean_token_accuracy": 0.6701463560263315, "num_tokens": 871577120.0, "step": 5192 }, { "entropy": 1.7162544826666515, "epoch": 0.5704869407596606, "grad_norm": 0.6978714466094971, "learning_rate": 1.709801369338292e-05, "loss": 1.1938, "mean_token_accuracy": 0.6883192261060079, "num_tokens": 871681735.0, "step": 5193 }, { "entropy": 1.7487797538439434, "epoch": 0.5705967976710334, "grad_norm": 0.6483967900276184, "learning_rate": 1.709683601436231e-05, "loss": 1.4388, "mean_token_accuracy": 0.6695059786240259, "num_tokens": 871823015.0, "step": 5194 }, { "entropy": 1.6930171847343445, "epoch": 0.5707066545824064, "grad_norm": 0.6266820430755615, "learning_rate": 1.709565814237959e-05, "loss": 1.4659, "mean_token_accuracy": 0.6464860786994299, "num_tokens": 871993586.0, "step": 5195 }, { "entropy": 1.6980145176251729, "epoch": 0.5708165114937793, "grad_norm": 0.7094162106513977, "learning_rate": 1.7094480077472035e-05, "loss": 1.3551, "mean_token_accuracy": 0.6648579289515814, "num_tokens": 872131292.0, "step": 5196 }, { "entropy": 1.694219281276067, "epoch": 0.5709263684051523, "grad_norm": 0.7052621841430664, "learning_rate": 1.7093301819676935e-05, "loss": 1.3833, "mean_token_accuracy": 0.6764810482660929, "num_tokens": 872314113.0, "step": 5197 }, { "entropy": 1.7130983074506123, "epoch": 0.5710362253165252, "grad_norm": 0.6613668203353882, "learning_rate": 1.7092123369031575e-05, "loss": 1.4329, "mean_token_accuracy": 0.6563561856746674, "num_tokens": 872505265.0, "step": 5198 }, { "entropy": 1.7371846238772075, "epoch": 0.5711460822278982, "grad_norm": 0.6612892746925354, "learning_rate": 1.7090944725573254e-05, "loss": 1.4961, "mean_token_accuracy": 0.6479866852362951, "num_tokens": 872658236.0, "step": 5199 }, { "entropy": 1.6984902322292328, "epoch": 0.5712559391392711, "grad_norm": 0.7463762164115906, "learning_rate": 1.708976588933928e-05, "loss": 1.5082, "mean_token_accuracy": 0.6461683760086695, "num_tokens": 872821473.0, "step": 5200 }, { "entropy": 1.696275144815445, "epoch": 0.5713657960506441, "grad_norm": 0.7056490778923035, "learning_rate": 1.708858686036696e-05, "loss": 1.4607, "mean_token_accuracy": 0.6508284409840902, "num_tokens": 872984515.0, "step": 5201 }, { "entropy": 1.626963605483373, "epoch": 0.571475652962017, "grad_norm": 0.7029443383216858, "learning_rate": 1.7087407638693607e-05, "loss": 1.3804, "mean_token_accuracy": 0.6720445652802786, "num_tokens": 873183647.0, "step": 5202 }, { "entropy": 1.7299024264017742, "epoch": 0.57158550987339, "grad_norm": 0.586719810962677, "learning_rate": 1.7086228224356543e-05, "loss": 1.3943, "mean_token_accuracy": 0.6482478181521097, "num_tokens": 873348548.0, "step": 5203 }, { "entropy": 1.71010688940684, "epoch": 0.5716953667847628, "grad_norm": 0.7238386869430542, "learning_rate": 1.7085048617393104e-05, "loss": 1.365, "mean_token_accuracy": 0.6639541685581207, "num_tokens": 873549913.0, "step": 5204 }, { "entropy": 1.6785256763299305, "epoch": 0.5718052236961357, "grad_norm": 0.6056387424468994, "learning_rate": 1.7083868817840617e-05, "loss": 1.3858, "mean_token_accuracy": 0.6524512271086375, "num_tokens": 873733597.0, "step": 5205 }, { "entropy": 1.6776171326637268, "epoch": 0.5719150806075087, "grad_norm": 0.7004038095474243, "learning_rate": 1.7082688825736424e-05, "loss": 1.256, "mean_token_accuracy": 0.6724594185749689, "num_tokens": 873854790.0, "step": 5206 }, { "entropy": 1.6850430766741435, "epoch": 0.5720249375188816, "grad_norm": 0.6626861691474915, "learning_rate": 1.7081508641117866e-05, "loss": 1.3037, "mean_token_accuracy": 0.6690275172392527, "num_tokens": 874031885.0, "step": 5207 }, { "entropy": 1.68301260471344, "epoch": 0.5721347944302546, "grad_norm": 1.1614986658096313, "learning_rate": 1.7080328264022307e-05, "loss": 1.5599, "mean_token_accuracy": 0.6682318995396296, "num_tokens": 874203630.0, "step": 5208 }, { "entropy": 1.791142870982488, "epoch": 0.5722446513416275, "grad_norm": 0.736549973487854, "learning_rate": 1.7079147694487093e-05, "loss": 1.3728, "mean_token_accuracy": 0.6522929718097051, "num_tokens": 874342919.0, "step": 5209 }, { "entropy": 1.6703368723392487, "epoch": 0.5723545082530005, "grad_norm": 0.6671043634414673, "learning_rate": 1.7077966932549595e-05, "loss": 1.401, "mean_token_accuracy": 0.6502569168806076, "num_tokens": 874581234.0, "step": 5210 }, { "entropy": 1.7057646413644154, "epoch": 0.5724643651643734, "grad_norm": 0.652962863445282, "learning_rate": 1.707678597824718e-05, "loss": 1.4256, "mean_token_accuracy": 0.648739273349444, "num_tokens": 874765878.0, "step": 5211 }, { "entropy": 1.754818469285965, "epoch": 0.5725742220757464, "grad_norm": 0.6058507561683655, "learning_rate": 1.707560483161723e-05, "loss": 1.5231, "mean_token_accuracy": 0.6460634718338648, "num_tokens": 874977004.0, "step": 5212 }, { "entropy": 1.7265916963418324, "epoch": 0.5726840789871193, "grad_norm": 0.6572061777114868, "learning_rate": 1.7074423492697127e-05, "loss": 1.5681, "mean_token_accuracy": 0.6427283038695654, "num_tokens": 875176239.0, "step": 5213 }, { "entropy": 1.6362906793753307, "epoch": 0.5727939358984923, "grad_norm": 0.7420666217803955, "learning_rate": 1.7073241961524253e-05, "loss": 1.455, "mean_token_accuracy": 0.6566400279601415, "num_tokens": 875350911.0, "step": 5214 }, { "entropy": 1.7721679508686066, "epoch": 0.5729037928098651, "grad_norm": 0.6848008632659912, "learning_rate": 1.707206023813601e-05, "loss": 1.4325, "mean_token_accuracy": 0.6503975490729014, "num_tokens": 875533134.0, "step": 5215 }, { "entropy": 1.7172233561674755, "epoch": 0.5730136497212381, "grad_norm": 0.7289333343505859, "learning_rate": 1.7070878322569797e-05, "loss": 1.4098, "mean_token_accuracy": 0.6530760476986567, "num_tokens": 875694665.0, "step": 5216 }, { "entropy": 1.7005958954493205, "epoch": 0.573123506632611, "grad_norm": 0.7095157504081726, "learning_rate": 1.706969621486302e-05, "loss": 1.4573, "mean_token_accuracy": 0.6517948259909948, "num_tokens": 875849820.0, "step": 5217 }, { "entropy": 1.7994357645511627, "epoch": 0.5732333635439839, "grad_norm": 0.638963520526886, "learning_rate": 1.706851391505309e-05, "loss": 1.5509, "mean_token_accuracy": 0.6139777153730392, "num_tokens": 876084718.0, "step": 5218 }, { "entropy": 1.7117552955945332, "epoch": 0.5733432204553569, "grad_norm": 0.6520084738731384, "learning_rate": 1.7067331423177433e-05, "loss": 1.4279, "mean_token_accuracy": 0.649882584810257, "num_tokens": 876279619.0, "step": 5219 }, { "entropy": 1.6188062528769176, "epoch": 0.5734530773667298, "grad_norm": 0.6222274303436279, "learning_rate": 1.706614873927347e-05, "loss": 1.2127, "mean_token_accuracy": 0.6836750755707423, "num_tokens": 876402605.0, "step": 5220 }, { "entropy": 1.7084797322750092, "epoch": 0.5735629342781028, "grad_norm": 0.7118093967437744, "learning_rate": 1.7064965863378634e-05, "loss": 1.4423, "mean_token_accuracy": 0.6552711973587672, "num_tokens": 876580269.0, "step": 5221 }, { "entropy": 1.6655645966529846, "epoch": 0.5736727911894757, "grad_norm": 0.6326475143432617, "learning_rate": 1.7063782795530357e-05, "loss": 1.3397, "mean_token_accuracy": 0.6672480752070745, "num_tokens": 876767038.0, "step": 5222 }, { "entropy": 1.7708354194959004, "epoch": 0.5737826481008487, "grad_norm": 0.7146939635276794, "learning_rate": 1.7062599535766092e-05, "loss": 1.4182, "mean_token_accuracy": 0.6546067396799723, "num_tokens": 876905753.0, "step": 5223 }, { "entropy": 1.6421829263369243, "epoch": 0.5738925050122216, "grad_norm": 0.7064340710639954, "learning_rate": 1.706141608412328e-05, "loss": 1.5359, "mean_token_accuracy": 0.6482568581899008, "num_tokens": 877073242.0, "step": 5224 }, { "entropy": 1.7002604206403096, "epoch": 0.5740023619235946, "grad_norm": 0.7547617554664612, "learning_rate": 1.706023244063938e-05, "loss": 1.4337, "mean_token_accuracy": 0.6474874764680862, "num_tokens": 877218384.0, "step": 5225 }, { "entropy": 1.792248547077179, "epoch": 0.5741122188349674, "grad_norm": 0.7492245435714722, "learning_rate": 1.7059048605351857e-05, "loss": 1.5136, "mean_token_accuracy": 0.6328116208314896, "num_tokens": 877364436.0, "step": 5226 }, { "entropy": 1.7168182233969371, "epoch": 0.5742220757463404, "grad_norm": 0.5984399914741516, "learning_rate": 1.7057864578298175e-05, "loss": 1.3634, "mean_token_accuracy": 0.6506437808275223, "num_tokens": 877520851.0, "step": 5227 }, { "entropy": 1.6336182951927185, "epoch": 0.5743319326577133, "grad_norm": 0.6459986567497253, "learning_rate": 1.7056680359515807e-05, "loss": 1.4619, "mean_token_accuracy": 0.658488447467486, "num_tokens": 877714817.0, "step": 5228 }, { "entropy": 1.755718320608139, "epoch": 0.5744417895690863, "grad_norm": 0.691868782043457, "learning_rate": 1.7055495949042236e-05, "loss": 1.4811, "mean_token_accuracy": 0.6512856880823771, "num_tokens": 877896899.0, "step": 5229 }, { "entropy": 1.7979524632294972, "epoch": 0.5745516464804592, "grad_norm": 0.6676319241523743, "learning_rate": 1.7054311346914948e-05, "loss": 1.3411, "mean_token_accuracy": 0.6650152256091436, "num_tokens": 878029570.0, "step": 5230 }, { "entropy": 1.7446431517601013, "epoch": 0.5746615033918321, "grad_norm": 0.810004711151123, "learning_rate": 1.705312655317143e-05, "loss": 1.5819, "mean_token_accuracy": 0.6404779901107153, "num_tokens": 878204154.0, "step": 5231 }, { "entropy": 1.6578894356886547, "epoch": 0.5747713603032051, "grad_norm": 0.6192182898521423, "learning_rate": 1.7051941567849188e-05, "loss": 1.4529, "mean_token_accuracy": 0.6516250371932983, "num_tokens": 878372472.0, "step": 5232 }, { "entropy": 1.694698413213094, "epoch": 0.574881217214578, "grad_norm": 0.7715234160423279, "learning_rate": 1.7050756390985722e-05, "loss": 1.3658, "mean_token_accuracy": 0.6625189731518427, "num_tokens": 878498037.0, "step": 5233 }, { "entropy": 1.6993319789568584, "epoch": 0.574991074125951, "grad_norm": 0.8013604283332825, "learning_rate": 1.7049571022618542e-05, "loss": 1.255, "mean_token_accuracy": 0.6749153534571329, "num_tokens": 878628141.0, "step": 5234 }, { "entropy": 1.7042160034179688, "epoch": 0.5751009310373238, "grad_norm": 0.6807699203491211, "learning_rate": 1.7048385462785165e-05, "loss": 1.3983, "mean_token_accuracy": 0.6614261368910471, "num_tokens": 878808460.0, "step": 5235 }, { "entropy": 1.7180274625619252, "epoch": 0.5752107879486968, "grad_norm": 0.7537125945091248, "learning_rate": 1.7047199711523114e-05, "loss": 1.3341, "mean_token_accuracy": 0.659697949886322, "num_tokens": 878953257.0, "step": 5236 }, { "entropy": 1.7314948936303456, "epoch": 0.5753206448600697, "grad_norm": 0.6685804128646851, "learning_rate": 1.7046013768869917e-05, "loss": 1.3889, "mean_token_accuracy": 0.6570734431346258, "num_tokens": 879172278.0, "step": 5237 }, { "entropy": 1.6946994264920552, "epoch": 0.5754305017714427, "grad_norm": 0.691840648651123, "learning_rate": 1.7044827634863114e-05, "loss": 1.4046, "mean_token_accuracy": 0.6577261487642924, "num_tokens": 879297522.0, "step": 5238 }, { "entropy": 1.6562682489554088, "epoch": 0.5755403586828156, "grad_norm": 0.6487468481063843, "learning_rate": 1.704364130954023e-05, "loss": 1.3072, "mean_token_accuracy": 0.6770086338122686, "num_tokens": 879456037.0, "step": 5239 }, { "entropy": 1.7011775175730388, "epoch": 0.5756502155941886, "grad_norm": 0.7802998423576355, "learning_rate": 1.7042454792938827e-05, "loss": 1.3878, "mean_token_accuracy": 0.656680092215538, "num_tokens": 879654239.0, "step": 5240 }, { "entropy": 1.788252015908559, "epoch": 0.5757600725055615, "grad_norm": 0.8169171214103699, "learning_rate": 1.704126808509645e-05, "loss": 1.631, "mean_token_accuracy": 0.627186248699824, "num_tokens": 879815488.0, "step": 5241 }, { "entropy": 1.724605659643809, "epoch": 0.5758699294169345, "grad_norm": 0.624187707901001, "learning_rate": 1.7040081186050666e-05, "loss": 1.4769, "mean_token_accuracy": 0.6461042215426763, "num_tokens": 879993713.0, "step": 5242 }, { "entropy": 1.7722249925136566, "epoch": 0.5759797863283074, "grad_norm": 0.7050033211708069, "learning_rate": 1.703889409583903e-05, "loss": 1.5217, "mean_token_accuracy": 0.6346140801906586, "num_tokens": 880182622.0, "step": 5243 }, { "entropy": 1.7021553913752239, "epoch": 0.5760896432396804, "grad_norm": 0.6425755023956299, "learning_rate": 1.703770681449912e-05, "loss": 1.4137, "mean_token_accuracy": 0.6676051765680313, "num_tokens": 880327373.0, "step": 5244 }, { "entropy": 1.7722231149673462, "epoch": 0.5761995001510533, "grad_norm": 0.7244229316711426, "learning_rate": 1.7036519342068507e-05, "loss": 1.4358, "mean_token_accuracy": 0.6465661724408468, "num_tokens": 880481323.0, "step": 5245 }, { "entropy": 1.7024872402350109, "epoch": 0.5763093570624261, "grad_norm": 0.909506618976593, "learning_rate": 1.7035331678584776e-05, "loss": 1.4059, "mean_token_accuracy": 0.6582985470692316, "num_tokens": 880643104.0, "step": 5246 }, { "entropy": 1.688640296459198, "epoch": 0.5764192139737991, "grad_norm": 0.8793448805809021, "learning_rate": 1.703414382408552e-05, "loss": 1.5182, "mean_token_accuracy": 0.6507097780704498, "num_tokens": 880824149.0, "step": 5247 }, { "entropy": 1.7103163798650105, "epoch": 0.576529070885172, "grad_norm": 0.6955628395080566, "learning_rate": 1.703295577860833e-05, "loss": 1.4729, "mean_token_accuracy": 0.6492475817600886, "num_tokens": 881005364.0, "step": 5248 }, { "entropy": 1.7185686628023784, "epoch": 0.576638927796545, "grad_norm": 0.6877673268318176, "learning_rate": 1.703176754219081e-05, "loss": 1.36, "mean_token_accuracy": 0.6541955421368281, "num_tokens": 881203802.0, "step": 5249 }, { "entropy": 1.6697140634059906, "epoch": 0.5767487847079179, "grad_norm": 0.6812824010848999, "learning_rate": 1.703057911487056e-05, "loss": 1.3598, "mean_token_accuracy": 0.6645796249310175, "num_tokens": 881348954.0, "step": 5250 }, { "entropy": 1.7107038895289104, "epoch": 0.5768586416192909, "grad_norm": 0.676230788230896, "learning_rate": 1.70293904966852e-05, "loss": 1.4258, "mean_token_accuracy": 0.6471339017152786, "num_tokens": 881511658.0, "step": 5251 }, { "entropy": 1.693604399760564, "epoch": 0.5769684985306638, "grad_norm": 0.6729671955108643, "learning_rate": 1.702820168767235e-05, "loss": 1.3252, "mean_token_accuracy": 0.6672259618838629, "num_tokens": 881661578.0, "step": 5252 }, { "entropy": 1.7389824489752452, "epoch": 0.5770783554420368, "grad_norm": 0.7641178369522095, "learning_rate": 1.7027012687869637e-05, "loss": 1.32, "mean_token_accuracy": 0.6692226231098175, "num_tokens": 881774550.0, "step": 5253 }, { "entropy": 1.7137807210286458, "epoch": 0.5771882123534097, "grad_norm": 0.7289935350418091, "learning_rate": 1.7025823497314682e-05, "loss": 1.3686, "mean_token_accuracy": 0.6578000535567602, "num_tokens": 881928763.0, "step": 5254 }, { "entropy": 1.6480054656664531, "epoch": 0.5772980692647827, "grad_norm": 0.8076556921005249, "learning_rate": 1.7024634116045133e-05, "loss": 1.3671, "mean_token_accuracy": 0.669852688908577, "num_tokens": 882123111.0, "step": 5255 }, { "entropy": 1.6956392228603363, "epoch": 0.5774079261761556, "grad_norm": 0.7042696475982666, "learning_rate": 1.7023444544098624e-05, "loss": 1.3784, "mean_token_accuracy": 0.6528898576895396, "num_tokens": 882287513.0, "step": 5256 }, { "entropy": 1.6527445713678997, "epoch": 0.5775177830875285, "grad_norm": 0.5778684616088867, "learning_rate": 1.702225478151281e-05, "loss": 1.403, "mean_token_accuracy": 0.660379151503245, "num_tokens": 882511864.0, "step": 5257 }, { "entropy": 1.7085080047448475, "epoch": 0.5776276399989014, "grad_norm": 0.8756363391876221, "learning_rate": 1.7021064828325347e-05, "loss": 1.437, "mean_token_accuracy": 0.6682372838258743, "num_tokens": 882708567.0, "step": 5258 }, { "entropy": 1.7350213130315144, "epoch": 0.5777374969102743, "grad_norm": 0.7906478047370911, "learning_rate": 1.7019874684573897e-05, "loss": 1.4177, "mean_token_accuracy": 0.6588336328665415, "num_tokens": 882865802.0, "step": 5259 }, { "entropy": 1.7045749227205913, "epoch": 0.5778473538216473, "grad_norm": 0.6288008689880371, "learning_rate": 1.7018684350296123e-05, "loss": 1.3069, "mean_token_accuracy": 0.6615212808052698, "num_tokens": 882992111.0, "step": 5260 }, { "entropy": 1.7101500928401947, "epoch": 0.5779572107330202, "grad_norm": 0.6722090840339661, "learning_rate": 1.7017493825529703e-05, "loss": 1.3803, "mean_token_accuracy": 0.6603521555662155, "num_tokens": 883145631.0, "step": 5261 }, { "entropy": 1.7001839975516002, "epoch": 0.5780670676443932, "grad_norm": 0.6493961811065674, "learning_rate": 1.7016303110312316e-05, "loss": 1.3758, "mean_token_accuracy": 0.6508415341377258, "num_tokens": 883353534.0, "step": 5262 }, { "entropy": 1.7026556134223938, "epoch": 0.5781769245557661, "grad_norm": 0.5992609858512878, "learning_rate": 1.7015112204681644e-05, "loss": 1.4741, "mean_token_accuracy": 0.6399723639090856, "num_tokens": 883605305.0, "step": 5263 }, { "entropy": 1.7291531364123027, "epoch": 0.5782867814671391, "grad_norm": 0.8137235641479492, "learning_rate": 1.7013921108675385e-05, "loss": 1.4096, "mean_token_accuracy": 0.6661647657553355, "num_tokens": 883813260.0, "step": 5264 }, { "entropy": 1.672690361738205, "epoch": 0.578396638378512, "grad_norm": 0.5643144845962524, "learning_rate": 1.701272982233123e-05, "loss": 1.3007, "mean_token_accuracy": 0.6663571248451868, "num_tokens": 883975166.0, "step": 5265 }, { "entropy": 1.7014889319737752, "epoch": 0.578506495289885, "grad_norm": 0.8740093111991882, "learning_rate": 1.7011538345686887e-05, "loss": 1.3405, "mean_token_accuracy": 0.6774832854668299, "num_tokens": 884141235.0, "step": 5266 }, { "entropy": 1.709878146648407, "epoch": 0.5786163522012578, "grad_norm": 0.8314535021781921, "learning_rate": 1.7010346678780062e-05, "loss": 1.4373, "mean_token_accuracy": 0.6721083223819733, "num_tokens": 884279533.0, "step": 5267 }, { "entropy": 1.7554621994495392, "epoch": 0.5787262091126308, "grad_norm": 0.6525247097015381, "learning_rate": 1.7009154821648478e-05, "loss": 1.418, "mean_token_accuracy": 0.6456648210684458, "num_tokens": 884459831.0, "step": 5268 }, { "entropy": 1.6662676533063252, "epoch": 0.5788360660240037, "grad_norm": 0.765891969203949, "learning_rate": 1.7007962774329846e-05, "loss": 1.4096, "mean_token_accuracy": 0.650817280014356, "num_tokens": 884612746.0, "step": 5269 }, { "entropy": 1.6593515574932098, "epoch": 0.5789459229353767, "grad_norm": 0.5619024038314819, "learning_rate": 1.7006770536861902e-05, "loss": 1.444, "mean_token_accuracy": 0.6437307397524515, "num_tokens": 884817763.0, "step": 5270 }, { "entropy": 1.7293136517206829, "epoch": 0.5790557798467496, "grad_norm": 0.7005648612976074, "learning_rate": 1.7005578109282377e-05, "loss": 1.4625, "mean_token_accuracy": 0.6423446436723074, "num_tokens": 885024512.0, "step": 5271 }, { "entropy": 1.7019110818703969, "epoch": 0.5791656367581225, "grad_norm": 0.6571354269981384, "learning_rate": 1.700438549162901e-05, "loss": 1.3648, "mean_token_accuracy": 0.6714861591657003, "num_tokens": 885150107.0, "step": 5272 }, { "entropy": 1.683734953403473, "epoch": 0.5792754936694955, "grad_norm": 0.6158427596092224, "learning_rate": 1.7003192683939547e-05, "loss": 1.6153, "mean_token_accuracy": 0.6282309715946516, "num_tokens": 885329600.0, "step": 5273 }, { "entropy": 1.7012614409128826, "epoch": 0.5793853505808684, "grad_norm": 0.6291089057922363, "learning_rate": 1.7001999686251743e-05, "loss": 1.2531, "mean_token_accuracy": 0.6708623866240183, "num_tokens": 885477430.0, "step": 5274 }, { "entropy": 1.7729269862174988, "epoch": 0.5794952074922414, "grad_norm": 0.6474009156227112, "learning_rate": 1.7000806498603354e-05, "loss": 1.4934, "mean_token_accuracy": 0.6346175720294317, "num_tokens": 885718460.0, "step": 5275 }, { "entropy": 1.646110604206721, "epoch": 0.5796050644036143, "grad_norm": 1.444273829460144, "learning_rate": 1.6999613121032143e-05, "loss": 1.169, "mean_token_accuracy": 0.6865449994802475, "num_tokens": 885930047.0, "step": 5276 }, { "entropy": 1.733050415913264, "epoch": 0.5797149213149873, "grad_norm": 1.0887868404388428, "learning_rate": 1.6998419553575877e-05, "loss": 1.5272, "mean_token_accuracy": 0.6553502380847931, "num_tokens": 886082958.0, "step": 5277 }, { "entropy": 1.7067347665627797, "epoch": 0.5798247782263601, "grad_norm": 0.7280968427658081, "learning_rate": 1.6997225796272342e-05, "loss": 1.3507, "mean_token_accuracy": 0.6697677026192347, "num_tokens": 886202974.0, "step": 5278 }, { "entropy": 1.6995122532049816, "epoch": 0.5799346351377331, "grad_norm": 0.6713830232620239, "learning_rate": 1.6996031849159304e-05, "loss": 1.3817, "mean_token_accuracy": 0.6518794447183609, "num_tokens": 886396571.0, "step": 5279 }, { "entropy": 1.7079652845859528, "epoch": 0.580044492049106, "grad_norm": 0.7544903755187988, "learning_rate": 1.6994837712274566e-05, "loss": 1.3505, "mean_token_accuracy": 0.663679818312327, "num_tokens": 886534124.0, "step": 5280 }, { "entropy": 1.718712459007899, "epoch": 0.580154348960479, "grad_norm": 0.6247344613075256, "learning_rate": 1.6993643385655914e-05, "loss": 1.3006, "mean_token_accuracy": 0.6607035100460052, "num_tokens": 886670601.0, "step": 5281 }, { "entropy": 1.730804314215978, "epoch": 0.5802642058718519, "grad_norm": 0.5982744693756104, "learning_rate": 1.6992448869341147e-05, "loss": 1.4283, "mean_token_accuracy": 0.6605020463466644, "num_tokens": 886849250.0, "step": 5282 }, { "entropy": 1.6990150213241577, "epoch": 0.5803740627832249, "grad_norm": 0.7501466274261475, "learning_rate": 1.6991254163368077e-05, "loss": 1.4045, "mean_token_accuracy": 0.6659552901983261, "num_tokens": 886994164.0, "step": 5283 }, { "entropy": 1.7572198311487834, "epoch": 0.5804839196945978, "grad_norm": 0.7074136734008789, "learning_rate": 1.699005926777451e-05, "loss": 1.6011, "mean_token_accuracy": 0.6400540322065353, "num_tokens": 887194351.0, "step": 5284 }, { "entropy": 1.6333990295728047, "epoch": 0.5805937766059707, "grad_norm": 0.8395227193832397, "learning_rate": 1.698886418259827e-05, "loss": 1.2298, "mean_token_accuracy": 0.68232361972332, "num_tokens": 887338240.0, "step": 5285 }, { "entropy": 1.7285989026228588, "epoch": 0.5807036335173437, "grad_norm": 0.6749279499053955, "learning_rate": 1.6987668907877176e-05, "loss": 1.5952, "mean_token_accuracy": 0.637232648829619, "num_tokens": 887543389.0, "step": 5286 }, { "entropy": 1.6935764054457347, "epoch": 0.5808134904287166, "grad_norm": 0.6975464224815369, "learning_rate": 1.6986473443649058e-05, "loss": 1.3784, "mean_token_accuracy": 0.6656419287125269, "num_tokens": 887737148.0, "step": 5287 }, { "entropy": 1.7259495158990223, "epoch": 0.5809233473400895, "grad_norm": 0.6545588970184326, "learning_rate": 1.698527778995175e-05, "loss": 1.4611, "mean_token_accuracy": 0.6462489118178686, "num_tokens": 887956818.0, "step": 5288 }, { "entropy": 1.7162467340628307, "epoch": 0.5810332042514624, "grad_norm": 0.7039337158203125, "learning_rate": 1.6984081946823102e-05, "loss": 1.416, "mean_token_accuracy": 0.6598154058059057, "num_tokens": 888168154.0, "step": 5289 }, { "entropy": 1.7691878577073414, "epoch": 0.5811430611628354, "grad_norm": 0.7095263004302979, "learning_rate": 1.698288591430096e-05, "loss": 1.3154, "mean_token_accuracy": 0.661227265993754, "num_tokens": 888288120.0, "step": 5290 }, { "entropy": 1.66457137465477, "epoch": 0.5812529180742083, "grad_norm": 0.7228137850761414, "learning_rate": 1.6981689692423166e-05, "loss": 1.3557, "mean_token_accuracy": 0.668318991859754, "num_tokens": 888435126.0, "step": 5291 }, { "entropy": 1.7254151900609334, "epoch": 0.5813627749855813, "grad_norm": 0.6591452360153198, "learning_rate": 1.6980493281227595e-05, "loss": 1.426, "mean_token_accuracy": 0.6703950862089793, "num_tokens": 888628454.0, "step": 5292 }, { "entropy": 1.7352023720741272, "epoch": 0.5814726318969542, "grad_norm": 0.7988258004188538, "learning_rate": 1.6979296680752103e-05, "loss": 1.4883, "mean_token_accuracy": 0.6490569015343984, "num_tokens": 888814092.0, "step": 5293 }, { "entropy": 1.6795012454191844, "epoch": 0.5815824888083272, "grad_norm": 0.8168243169784546, "learning_rate": 1.6978099891034564e-05, "loss": 1.3261, "mean_token_accuracy": 0.6650880227486292, "num_tokens": 888976522.0, "step": 5294 }, { "entropy": 1.7140100101629894, "epoch": 0.5816923457197001, "grad_norm": 0.6555765867233276, "learning_rate": 1.6976902912112862e-05, "loss": 1.3571, "mean_token_accuracy": 0.668232391277949, "num_tokens": 889116020.0, "step": 5295 }, { "entropy": 1.6489605208237965, "epoch": 0.5818022026310731, "grad_norm": 0.5820825695991516, "learning_rate": 1.6975705744024875e-05, "loss": 1.3159, "mean_token_accuracy": 0.6601169308026632, "num_tokens": 889297392.0, "step": 5296 }, { "entropy": 1.726332853237788, "epoch": 0.581912059542446, "grad_norm": 0.7054926156997681, "learning_rate": 1.697450838680849e-05, "loss": 1.5213, "mean_token_accuracy": 0.6465317706267039, "num_tokens": 889440904.0, "step": 5297 }, { "entropy": 1.675535907347997, "epoch": 0.582021916453819, "grad_norm": 0.6241676211357117, "learning_rate": 1.697331084050161e-05, "loss": 1.3933, "mean_token_accuracy": 0.6527293970187505, "num_tokens": 889625922.0, "step": 5298 }, { "entropy": 1.6990026930967967, "epoch": 0.5821317733651918, "grad_norm": 0.7096611261367798, "learning_rate": 1.6972113105142134e-05, "loss": 1.3727, "mean_token_accuracy": 0.6669509063164393, "num_tokens": 889747954.0, "step": 5299 }, { "entropy": 1.7199326157569885, "epoch": 0.5822416302765647, "grad_norm": 0.7114792466163635, "learning_rate": 1.697091518076797e-05, "loss": 1.7058, "mean_token_accuracy": 0.6259814451138178, "num_tokens": 889963566.0, "step": 5300 }, { "entropy": 1.728680282831192, "epoch": 0.5823514871879377, "grad_norm": 0.6678593158721924, "learning_rate": 1.6969717067417027e-05, "loss": 1.3901, "mean_token_accuracy": 0.6541063139835993, "num_tokens": 890097701.0, "step": 5301 }, { "entropy": 1.7306519746780396, "epoch": 0.5824613440993106, "grad_norm": 0.621885359287262, "learning_rate": 1.6968518765127234e-05, "loss": 1.458, "mean_token_accuracy": 0.6357903728882471, "num_tokens": 890300403.0, "step": 5302 }, { "entropy": 1.7454725603262584, "epoch": 0.5825712010106836, "grad_norm": 0.7045135498046875, "learning_rate": 1.696732027393651e-05, "loss": 1.5412, "mean_token_accuracy": 0.6473953574895859, "num_tokens": 890504802.0, "step": 5303 }, { "entropy": 1.7602245509624481, "epoch": 0.5826810579220565, "grad_norm": 0.6078582406044006, "learning_rate": 1.6966121593882783e-05, "loss": 1.4724, "mean_token_accuracy": 0.6533922801415125, "num_tokens": 890684992.0, "step": 5304 }, { "entropy": 1.6792764365673065, "epoch": 0.5827909148334295, "grad_norm": 0.626315176486969, "learning_rate": 1.6964922725004e-05, "loss": 1.2841, "mean_token_accuracy": 0.6726724753777186, "num_tokens": 890831358.0, "step": 5305 }, { "entropy": 1.681016316016515, "epoch": 0.5829007717448024, "grad_norm": 0.6014984846115112, "learning_rate": 1.6963723667338104e-05, "loss": 1.3824, "mean_token_accuracy": 0.6497796426216761, "num_tokens": 891039826.0, "step": 5306 }, { "entropy": 1.6937636534372966, "epoch": 0.5830106286561754, "grad_norm": 0.616007924079895, "learning_rate": 1.696252442092304e-05, "loss": 1.3501, "mean_token_accuracy": 0.6541839092969894, "num_tokens": 891155706.0, "step": 5307 }, { "entropy": 1.689687172571818, "epoch": 0.5831204855675483, "grad_norm": 0.5852237343788147, "learning_rate": 1.696132498579676e-05, "loss": 1.3242, "mean_token_accuracy": 0.6588374376296997, "num_tokens": 891334512.0, "step": 5308 }, { "entropy": 1.7258604069550831, "epoch": 0.5832303424789212, "grad_norm": 0.6953949332237244, "learning_rate": 1.6960125361997232e-05, "loss": 1.4621, "mean_token_accuracy": 0.6593173642953237, "num_tokens": 891501515.0, "step": 5309 }, { "entropy": 1.710837850968043, "epoch": 0.5833401993902941, "grad_norm": 0.6133494973182678, "learning_rate": 1.6958925549562423e-05, "loss": 1.5322, "mean_token_accuracy": 0.6437032918135325, "num_tokens": 891691280.0, "step": 5310 }, { "entropy": 1.637757400671641, "epoch": 0.5834500563016671, "grad_norm": 0.6457231044769287, "learning_rate": 1.6957725548530307e-05, "loss": 1.2902, "mean_token_accuracy": 0.6730124702056249, "num_tokens": 891841543.0, "step": 5311 }, { "entropy": 1.676180859406789, "epoch": 0.58355991321304, "grad_norm": 0.6418126225471497, "learning_rate": 1.6956525358938866e-05, "loss": 1.5549, "mean_token_accuracy": 0.6378814553221067, "num_tokens": 892038000.0, "step": 5312 }, { "entropy": 1.7321598728497822, "epoch": 0.5836697701244129, "grad_norm": 0.6593100428581238, "learning_rate": 1.6955324980826073e-05, "loss": 1.4845, "mean_token_accuracy": 0.6519307891527811, "num_tokens": 892184783.0, "step": 5313 }, { "entropy": 1.7277933657169342, "epoch": 0.5837796270357859, "grad_norm": 0.7424345016479492, "learning_rate": 1.695412441422993e-05, "loss": 1.455, "mean_token_accuracy": 0.6451116353273392, "num_tokens": 892375390.0, "step": 5314 }, { "entropy": 1.7113395134607952, "epoch": 0.5838894839471588, "grad_norm": 0.705172061920166, "learning_rate": 1.6952923659188437e-05, "loss": 1.276, "mean_token_accuracy": 0.6679862240950266, "num_tokens": 892535909.0, "step": 5315 }, { "entropy": 1.589612990617752, "epoch": 0.5839993408585318, "grad_norm": 0.5760466456413269, "learning_rate": 1.6951722715739584e-05, "loss": 1.3377, "mean_token_accuracy": 0.6666668653488159, "num_tokens": 892712710.0, "step": 5316 }, { "entropy": 1.6956795851389568, "epoch": 0.5841091977699047, "grad_norm": 0.8694625496864319, "learning_rate": 1.6950521583921397e-05, "loss": 1.4102, "mean_token_accuracy": 0.6633008221785227, "num_tokens": 892862269.0, "step": 5317 }, { "entropy": 1.7499979138374329, "epoch": 0.5842190546812777, "grad_norm": 0.6729763150215149, "learning_rate": 1.694932026377188e-05, "loss": 1.5164, "mean_token_accuracy": 0.6467028011878332, "num_tokens": 893046579.0, "step": 5318 }, { "entropy": 1.7273075977961223, "epoch": 0.5843289115926505, "grad_norm": 0.7413749098777771, "learning_rate": 1.6948118755329058e-05, "loss": 1.6088, "mean_token_accuracy": 0.6405004958311716, "num_tokens": 893236120.0, "step": 5319 }, { "entropy": 1.763388415177663, "epoch": 0.5844387685040235, "grad_norm": 0.7787388563156128, "learning_rate": 1.6946917058630955e-05, "loss": 1.3354, "mean_token_accuracy": 0.6630240182081858, "num_tokens": 893371667.0, "step": 5320 }, { "entropy": 1.7534803748130798, "epoch": 0.5845486254153964, "grad_norm": 0.8445398211479187, "learning_rate": 1.6945715173715613e-05, "loss": 0.913, "mean_token_accuracy": 0.6981311688820521, "num_tokens": 893492510.0, "step": 5321 }, { "entropy": 1.7531782786051433, "epoch": 0.5846584823267694, "grad_norm": 0.7116334438323975, "learning_rate": 1.694451310062106e-05, "loss": 1.3404, "mean_token_accuracy": 0.6567851354678472, "num_tokens": 893645939.0, "step": 5322 }, { "entropy": 1.7355563342571259, "epoch": 0.5847683392381423, "grad_norm": 0.6662753224372864, "learning_rate": 1.6943310839385346e-05, "loss": 1.3407, "mean_token_accuracy": 0.6634679039319357, "num_tokens": 893779093.0, "step": 5323 }, { "entropy": 1.7133808135986328, "epoch": 0.5848781961495153, "grad_norm": 0.7315980792045593, "learning_rate": 1.6942108390046523e-05, "loss": 1.5366, "mean_token_accuracy": 0.6463624636332194, "num_tokens": 893920200.0, "step": 5324 }, { "entropy": 1.733003169298172, "epoch": 0.5849880530608882, "grad_norm": 0.7187158465385437, "learning_rate": 1.6940905752642648e-05, "loss": 1.3796, "mean_token_accuracy": 0.6481821984052658, "num_tokens": 894070681.0, "step": 5325 }, { "entropy": 1.7708965142567952, "epoch": 0.5850979099722611, "grad_norm": 0.8311673998832703, "learning_rate": 1.693970292721178e-05, "loss": 1.518, "mean_token_accuracy": 0.6394187857707342, "num_tokens": 894259155.0, "step": 5326 }, { "entropy": 1.7388906975587208, "epoch": 0.5852077668836341, "grad_norm": 0.7034797072410583, "learning_rate": 1.6938499913791996e-05, "loss": 1.4722, "mean_token_accuracy": 0.6401112427314123, "num_tokens": 894413541.0, "step": 5327 }, { "entropy": 1.6831568082173665, "epoch": 0.585317623795007, "grad_norm": 0.6965767741203308, "learning_rate": 1.6937296712421364e-05, "loss": 1.2915, "mean_token_accuracy": 0.6696919500827789, "num_tokens": 894541182.0, "step": 5328 }, { "entropy": 1.6878312130769093, "epoch": 0.58542748070638, "grad_norm": 0.6519395709037781, "learning_rate": 1.6936093323137963e-05, "loss": 1.2841, "mean_token_accuracy": 0.6752176831165949, "num_tokens": 894678154.0, "step": 5329 }, { "entropy": 1.6015850404898326, "epoch": 0.5855373376177528, "grad_norm": 0.7328821420669556, "learning_rate": 1.6934889745979886e-05, "loss": 1.2998, "mean_token_accuracy": 0.6716904441515604, "num_tokens": 894839877.0, "step": 5330 }, { "entropy": 1.6900799870491028, "epoch": 0.5856471945291258, "grad_norm": 0.6380017399787903, "learning_rate": 1.6933685980985224e-05, "loss": 1.4289, "mean_token_accuracy": 0.6551753083864847, "num_tokens": 894977460.0, "step": 5331 }, { "entropy": 1.7345438599586487, "epoch": 0.5857570514404987, "grad_norm": 0.7761285901069641, "learning_rate": 1.6932482028192074e-05, "loss": 1.2974, "mean_token_accuracy": 0.6700637092192968, "num_tokens": 895105165.0, "step": 5332 }, { "entropy": 1.6849959095319111, "epoch": 0.5858669083518717, "grad_norm": 0.6113293170928955, "learning_rate": 1.6931277887638537e-05, "loss": 1.4208, "mean_token_accuracy": 0.6713494658470154, "num_tokens": 895276035.0, "step": 5333 }, { "entropy": 1.7273829380671184, "epoch": 0.5859767652632446, "grad_norm": 0.6847710013389587, "learning_rate": 1.6930073559362732e-05, "loss": 1.3862, "mean_token_accuracy": 0.6546449114878973, "num_tokens": 895459349.0, "step": 5334 }, { "entropy": 1.7116826176643372, "epoch": 0.5860866221746176, "grad_norm": 0.6733460426330566, "learning_rate": 1.692886904340277e-05, "loss": 1.3885, "mean_token_accuracy": 0.6552889744440714, "num_tokens": 895648082.0, "step": 5335 }, { "entropy": 1.7135928471883137, "epoch": 0.5861964790859905, "grad_norm": 0.7363564968109131, "learning_rate": 1.6927664339796773e-05, "loss": 1.2452, "mean_token_accuracy": 0.6762679914633433, "num_tokens": 895792045.0, "step": 5336 }, { "entropy": 1.708931068579356, "epoch": 0.5863063359973635, "grad_norm": 0.6197008490562439, "learning_rate": 1.692645944858287e-05, "loss": 1.5095, "mean_token_accuracy": 0.6281079649925232, "num_tokens": 896003794.0, "step": 5337 }, { "entropy": 1.7107830742994945, "epoch": 0.5864161929087364, "grad_norm": 0.7372785210609436, "learning_rate": 1.69252543697992e-05, "loss": 1.4289, "mean_token_accuracy": 0.6556178480386734, "num_tokens": 896152637.0, "step": 5338 }, { "entropy": 1.6730316678682964, "epoch": 0.5865260498201094, "grad_norm": 0.7120349407196045, "learning_rate": 1.6924049103483896e-05, "loss": 1.4772, "mean_token_accuracy": 0.6575778424739838, "num_tokens": 896352786.0, "step": 5339 }, { "entropy": 1.7376192808151245, "epoch": 0.5866359067314822, "grad_norm": 0.7281277775764465, "learning_rate": 1.692284364967511e-05, "loss": 1.4657, "mean_token_accuracy": 0.6524485051631927, "num_tokens": 896493812.0, "step": 5340 }, { "entropy": 1.7086794475714366, "epoch": 0.5867457636428551, "grad_norm": 0.6899880766868591, "learning_rate": 1.6921638008410984e-05, "loss": 1.3757, "mean_token_accuracy": 0.6638755599657694, "num_tokens": 896656202.0, "step": 5341 }, { "entropy": 1.7046737869580586, "epoch": 0.5868556205542281, "grad_norm": 0.6136064529418945, "learning_rate": 1.692043217972969e-05, "loss": 1.4449, "mean_token_accuracy": 0.6559189210335413, "num_tokens": 896874423.0, "step": 5342 }, { "entropy": 1.7691023747126262, "epoch": 0.586965477465601, "grad_norm": 0.6861811876296997, "learning_rate": 1.6919226163669385e-05, "loss": 1.5529, "mean_token_accuracy": 0.6326283564170202, "num_tokens": 897049573.0, "step": 5343 }, { "entropy": 1.703104058901469, "epoch": 0.587075334376974, "grad_norm": 0.6944000720977783, "learning_rate": 1.691801996026824e-05, "loss": 1.3242, "mean_token_accuracy": 0.6579422255357107, "num_tokens": 897206230.0, "step": 5344 }, { "entropy": 1.766764263312022, "epoch": 0.5871851912883469, "grad_norm": 0.6695455312728882, "learning_rate": 1.6916813569564428e-05, "loss": 1.3056, "mean_token_accuracy": 0.6696594009796778, "num_tokens": 897363683.0, "step": 5345 }, { "entropy": 1.779427985350291, "epoch": 0.5872950481997199, "grad_norm": 0.7089075446128845, "learning_rate": 1.6915606991596132e-05, "loss": 1.3939, "mean_token_accuracy": 0.6523445149262747, "num_tokens": 897537706.0, "step": 5346 }, { "entropy": 1.7377333045005798, "epoch": 0.5874049051110928, "grad_norm": 0.7925550937652588, "learning_rate": 1.691440022640154e-05, "loss": 1.4406, "mean_token_accuracy": 0.651837890346845, "num_tokens": 897722349.0, "step": 5347 }, { "entropy": 1.673417071501414, "epoch": 0.5875147620224658, "grad_norm": 0.77927565574646, "learning_rate": 1.6913193274018848e-05, "loss": 1.3656, "mean_token_accuracy": 0.6586094995339712, "num_tokens": 897887572.0, "step": 5348 }, { "entropy": 1.6529791951179504, "epoch": 0.5876246189338387, "grad_norm": 0.7137171626091003, "learning_rate": 1.6911986134486252e-05, "loss": 1.3981, "mean_token_accuracy": 0.6643014947573344, "num_tokens": 898057976.0, "step": 5349 }, { "entropy": 1.7111935218175252, "epoch": 0.5877344758452117, "grad_norm": 0.6308448910713196, "learning_rate": 1.691077880784196e-05, "loss": 1.2921, "mean_token_accuracy": 0.6741099208593369, "num_tokens": 898208741.0, "step": 5350 }, { "entropy": 1.7605046530564625, "epoch": 0.5878443327565845, "grad_norm": 0.6674914956092834, "learning_rate": 1.6909571294124184e-05, "loss": 1.5404, "mean_token_accuracy": 0.6369107812643051, "num_tokens": 898382084.0, "step": 5351 }, { "entropy": 1.723831405242284, "epoch": 0.5879541896679575, "grad_norm": 0.8111042976379395, "learning_rate": 1.6908363593371134e-05, "loss": 1.4523, "mean_token_accuracy": 0.6531597375869751, "num_tokens": 898528919.0, "step": 5352 }, { "entropy": 1.7647210558255513, "epoch": 0.5880640465793304, "grad_norm": 0.6686208844184875, "learning_rate": 1.6907155705621044e-05, "loss": 1.452, "mean_token_accuracy": 0.6530319501956304, "num_tokens": 898678535.0, "step": 5353 }, { "entropy": 1.7188012103239696, "epoch": 0.5881739034907033, "grad_norm": 0.6231117248535156, "learning_rate": 1.6905947630912137e-05, "loss": 1.4291, "mean_token_accuracy": 0.6495647728443146, "num_tokens": 898862122.0, "step": 5354 }, { "entropy": 1.7267694274584453, "epoch": 0.5882837604020763, "grad_norm": 0.6327919960021973, "learning_rate": 1.6904739369282646e-05, "loss": 1.3621, "mean_token_accuracy": 0.6660894205172857, "num_tokens": 899026487.0, "step": 5355 }, { "entropy": 1.732596476872762, "epoch": 0.5883936173134492, "grad_norm": 0.6500836610794067, "learning_rate": 1.6903530920770818e-05, "loss": 1.3427, "mean_token_accuracy": 0.660812055071195, "num_tokens": 899202132.0, "step": 5356 }, { "entropy": 1.7914599478244781, "epoch": 0.5885034742248222, "grad_norm": 0.7583115100860596, "learning_rate": 1.6902322285414893e-05, "loss": 1.3587, "mean_token_accuracy": 0.6623788376649221, "num_tokens": 899361031.0, "step": 5357 }, { "entropy": 1.6479543348153431, "epoch": 0.5886133311361951, "grad_norm": 0.6269407868385315, "learning_rate": 1.6901113463253126e-05, "loss": 1.2791, "mean_token_accuracy": 0.6785383919874827, "num_tokens": 899498069.0, "step": 5358 }, { "entropy": 1.6799676318963368, "epoch": 0.5887231880475681, "grad_norm": 0.6469370722770691, "learning_rate": 1.6899904454323782e-05, "loss": 1.3679, "mean_token_accuracy": 0.6630029827356339, "num_tokens": 899695117.0, "step": 5359 }, { "entropy": 1.7061224579811096, "epoch": 0.588833044958941, "grad_norm": 0.6230675578117371, "learning_rate": 1.689869525866512e-05, "loss": 1.3548, "mean_token_accuracy": 0.6579580803712209, "num_tokens": 899856534.0, "step": 5360 }, { "entropy": 1.725221852461497, "epoch": 0.588942901870314, "grad_norm": 0.6036698818206787, "learning_rate": 1.689748587631541e-05, "loss": 1.392, "mean_token_accuracy": 0.6504359195629755, "num_tokens": 900066575.0, "step": 5361 }, { "entropy": 1.7007695138454437, "epoch": 0.5890527587816868, "grad_norm": 0.7017976641654968, "learning_rate": 1.689627630731293e-05, "loss": 1.3983, "mean_token_accuracy": 0.6593217353026072, "num_tokens": 900232711.0, "step": 5362 }, { "entropy": 1.6633349259694417, "epoch": 0.5891626156930598, "grad_norm": 0.6416093111038208, "learning_rate": 1.6895066551695958e-05, "loss": 1.2961, "mean_token_accuracy": 0.6652289082606634, "num_tokens": 900425804.0, "step": 5363 }, { "entropy": 1.7397405008474986, "epoch": 0.5892724726044327, "grad_norm": 0.7236183881759644, "learning_rate": 1.689385660950279e-05, "loss": 1.317, "mean_token_accuracy": 0.6659122854471207, "num_tokens": 900551096.0, "step": 5364 }, { "entropy": 1.753376970688502, "epoch": 0.5893823295158057, "grad_norm": 0.7134180068969727, "learning_rate": 1.6892646480771714e-05, "loss": 1.409, "mean_token_accuracy": 0.6489014178514481, "num_tokens": 900751781.0, "step": 5365 }, { "entropy": 1.7056757907072704, "epoch": 0.5894921864271786, "grad_norm": 0.7589015960693359, "learning_rate": 1.6891436165541033e-05, "loss": 1.3076, "mean_token_accuracy": 0.6677842835585276, "num_tokens": 900921952.0, "step": 5366 }, { "entropy": 1.6635324656963348, "epoch": 0.5896020433385515, "grad_norm": 0.6845018267631531, "learning_rate": 1.6890225663849053e-05, "loss": 1.4531, "mean_token_accuracy": 0.6488099843263626, "num_tokens": 901075512.0, "step": 5367 }, { "entropy": 1.7018173734347026, "epoch": 0.5897119002499245, "grad_norm": 0.6907767653465271, "learning_rate": 1.6889014975734086e-05, "loss": 1.4226, "mean_token_accuracy": 0.6496013949314753, "num_tokens": 901238201.0, "step": 5368 }, { "entropy": 1.618338406085968, "epoch": 0.5898217571612974, "grad_norm": 0.707096517086029, "learning_rate": 1.6887804101234442e-05, "loss": 1.1916, "mean_token_accuracy": 0.6871578395366669, "num_tokens": 901386796.0, "step": 5369 }, { "entropy": 1.7351085146268208, "epoch": 0.5899316140726704, "grad_norm": 0.7170294523239136, "learning_rate": 1.6886593040388458e-05, "loss": 1.4842, "mean_token_accuracy": 0.6426356732845306, "num_tokens": 901541333.0, "step": 5370 }, { "entropy": 1.6429972449938457, "epoch": 0.5900414709840432, "grad_norm": 0.5985599756240845, "learning_rate": 1.6885381793234457e-05, "loss": 1.3731, "mean_token_accuracy": 0.6494886229435602, "num_tokens": 901705291.0, "step": 5371 }, { "entropy": 1.7603021562099457, "epoch": 0.5901513278954162, "grad_norm": 0.6294792890548706, "learning_rate": 1.688417035981077e-05, "loss": 1.5715, "mean_token_accuracy": 0.6342183103164037, "num_tokens": 901907928.0, "step": 5372 }, { "entropy": 1.7004207074642181, "epoch": 0.5902611848067891, "grad_norm": 0.6703773140907288, "learning_rate": 1.688295874015575e-05, "loss": 1.315, "mean_token_accuracy": 0.667037362853686, "num_tokens": 902036622.0, "step": 5373 }, { "entropy": 1.7578519781430562, "epoch": 0.5903710417181621, "grad_norm": 0.7326668500900269, "learning_rate": 1.688174693430773e-05, "loss": 1.5802, "mean_token_accuracy": 0.6321331361929575, "num_tokens": 902229056.0, "step": 5374 }, { "entropy": 1.6765822271505992, "epoch": 0.590480898629535, "grad_norm": 0.7106185555458069, "learning_rate": 1.688053494230507e-05, "loss": 1.3502, "mean_token_accuracy": 0.6803947786490122, "num_tokens": 902370906.0, "step": 5375 }, { "entropy": 1.670622855424881, "epoch": 0.590590755540908, "grad_norm": 0.6781371831893921, "learning_rate": 1.687932276418613e-05, "loss": 1.327, "mean_token_accuracy": 0.6617625802755356, "num_tokens": 902526144.0, "step": 5376 }, { "entropy": 1.6448892652988434, "epoch": 0.5907006124522809, "grad_norm": 0.6684084534645081, "learning_rate": 1.6878110399989274e-05, "loss": 1.436, "mean_token_accuracy": 0.648414189616839, "num_tokens": 902715944.0, "step": 5377 }, { "entropy": 1.6711215178171794, "epoch": 0.5908104693636539, "grad_norm": 0.6007981896400452, "learning_rate": 1.6876897849752875e-05, "loss": 1.3683, "mean_token_accuracy": 0.6540864855051041, "num_tokens": 902883356.0, "step": 5378 }, { "entropy": 1.6590981384118397, "epoch": 0.5909203262750268, "grad_norm": 0.7310410737991333, "learning_rate": 1.6875685113515304e-05, "loss": 1.5214, "mean_token_accuracy": 0.6452625741561254, "num_tokens": 903075535.0, "step": 5379 }, { "entropy": 1.7468369503815968, "epoch": 0.5910301831863997, "grad_norm": 0.6988155841827393, "learning_rate": 1.6874472191314947e-05, "loss": 1.3991, "mean_token_accuracy": 0.6527721385161082, "num_tokens": 903201717.0, "step": 5380 }, { "entropy": 1.670701116323471, "epoch": 0.5911400400977727, "grad_norm": 0.8909992575645447, "learning_rate": 1.6873259083190193e-05, "loss": 1.2956, "mean_token_accuracy": 0.6799812763929367, "num_tokens": 903322154.0, "step": 5381 }, { "entropy": 1.6859600047270458, "epoch": 0.5912498970091455, "grad_norm": 0.694366991519928, "learning_rate": 1.6872045789179435e-05, "loss": 1.2805, "mean_token_accuracy": 0.6731400340795517, "num_tokens": 903459004.0, "step": 5382 }, { "entropy": 1.7458996375401814, "epoch": 0.5913597539205185, "grad_norm": 0.6606221795082092, "learning_rate": 1.6870832309321076e-05, "loss": 1.5232, "mean_token_accuracy": 0.6352545966704687, "num_tokens": 903637357.0, "step": 5383 }, { "entropy": 1.7042790353298187, "epoch": 0.5914696108318914, "grad_norm": 0.7215031385421753, "learning_rate": 1.6869618643653517e-05, "loss": 1.3872, "mean_token_accuracy": 0.6490479856729507, "num_tokens": 903786222.0, "step": 5384 }, { "entropy": 1.6865171492099762, "epoch": 0.5915794677432644, "grad_norm": 0.6201615333557129, "learning_rate": 1.6868404792215177e-05, "loss": 1.3853, "mean_token_accuracy": 0.6442459026972452, "num_tokens": 903972153.0, "step": 5385 }, { "entropy": 1.7092716892560322, "epoch": 0.5916893246546373, "grad_norm": 0.7146571278572083, "learning_rate": 1.686719075504447e-05, "loss": 1.4497, "mean_token_accuracy": 0.648409311970075, "num_tokens": 904151519.0, "step": 5386 }, { "entropy": 1.7341859141985576, "epoch": 0.5917991815660103, "grad_norm": 0.7208521962165833, "learning_rate": 1.6865976532179815e-05, "loss": 1.4917, "mean_token_accuracy": 0.6417253216107687, "num_tokens": 904338083.0, "step": 5387 }, { "entropy": 1.7087008953094482, "epoch": 0.5919090384773832, "grad_norm": 0.614952027797699, "learning_rate": 1.6864762123659645e-05, "loss": 1.6023, "mean_token_accuracy": 0.6175388197104136, "num_tokens": 904544903.0, "step": 5388 }, { "entropy": 1.6603162388006847, "epoch": 0.5920188953887562, "grad_norm": 0.702585756778717, "learning_rate": 1.68635475295224e-05, "loss": 1.3771, "mean_token_accuracy": 0.6522153516610464, "num_tokens": 904681339.0, "step": 5389 }, { "entropy": 1.7155030568440754, "epoch": 0.5921287523001291, "grad_norm": 0.6054456830024719, "learning_rate": 1.6862332749806515e-05, "loss": 1.5448, "mean_token_accuracy": 0.6397515883048376, "num_tokens": 904863737.0, "step": 5390 }, { "entropy": 1.630701909462611, "epoch": 0.5922386092115021, "grad_norm": 0.6318526864051819, "learning_rate": 1.6861117784550444e-05, "loss": 1.3956, "mean_token_accuracy": 0.6545381247997284, "num_tokens": 905036452.0, "step": 5391 }, { "entropy": 1.7371818919976552, "epoch": 0.592348466122875, "grad_norm": 0.642082691192627, "learning_rate": 1.6859902633792633e-05, "loss": 1.4205, "mean_token_accuracy": 0.6491349885861079, "num_tokens": 905230942.0, "step": 5392 }, { "entropy": 1.6382679243882496, "epoch": 0.592458323034248, "grad_norm": 0.7798934578895569, "learning_rate": 1.6858687297571544e-05, "loss": 1.3769, "mean_token_accuracy": 0.6772749076286951, "num_tokens": 905357564.0, "step": 5393 }, { "entropy": 1.726676990588506, "epoch": 0.5925681799456208, "grad_norm": 0.7513710260391235, "learning_rate": 1.6857471775925646e-05, "loss": 1.4649, "mean_token_accuracy": 0.6569699744383494, "num_tokens": 905517686.0, "step": 5394 }, { "entropy": 1.6619043449560802, "epoch": 0.5926780368569937, "grad_norm": 0.5446877479553223, "learning_rate": 1.68562560688934e-05, "loss": 1.5784, "mean_token_accuracy": 0.6213619013627371, "num_tokens": 905793640.0, "step": 5395 }, { "entropy": 1.6814130246639252, "epoch": 0.5927878937683667, "grad_norm": 0.7586706876754761, "learning_rate": 1.6855040176513294e-05, "loss": 1.3387, "mean_token_accuracy": 0.6601586639881134, "num_tokens": 905963864.0, "step": 5396 }, { "entropy": 1.6784147222836812, "epoch": 0.5928977506797396, "grad_norm": 0.6829437613487244, "learning_rate": 1.68538240988238e-05, "loss": 1.3593, "mean_token_accuracy": 0.656915009021759, "num_tokens": 906143020.0, "step": 5397 }, { "entropy": 1.6552779972553253, "epoch": 0.5930076075911126, "grad_norm": 0.6679306626319885, "learning_rate": 1.6852607835863416e-05, "loss": 1.3276, "mean_token_accuracy": 0.6578433761994044, "num_tokens": 906300883.0, "step": 5398 }, { "entropy": 1.64952618877093, "epoch": 0.5931174645024855, "grad_norm": 0.5992361307144165, "learning_rate": 1.6851391387670627e-05, "loss": 1.3282, "mean_token_accuracy": 0.6626505106687546, "num_tokens": 906476656.0, "step": 5399 }, { "entropy": 1.722427527109782, "epoch": 0.5932273214138585, "grad_norm": 0.7228727340698242, "learning_rate": 1.685017475428394e-05, "loss": 1.435, "mean_token_accuracy": 0.6515922645727793, "num_tokens": 906654249.0, "step": 5400 }, { "entropy": 1.7722227871418, "epoch": 0.5933371783252314, "grad_norm": 0.7820917963981628, "learning_rate": 1.6848957935741854e-05, "loss": 1.3257, "mean_token_accuracy": 0.6733630647261938, "num_tokens": 906818481.0, "step": 5401 }, { "entropy": 1.6531193753083546, "epoch": 0.5934470352366044, "grad_norm": 0.7379235625267029, "learning_rate": 1.684774093208289e-05, "loss": 1.3019, "mean_token_accuracy": 0.6712036629517873, "num_tokens": 906975820.0, "step": 5402 }, { "entropy": 1.6952547132968903, "epoch": 0.5935568921479772, "grad_norm": 0.7943975925445557, "learning_rate": 1.684652374334556e-05, "loss": 1.2802, "mean_token_accuracy": 0.6676329722007116, "num_tokens": 907096991.0, "step": 5403 }, { "entropy": 1.6849872171878815, "epoch": 0.5936667490593502, "grad_norm": 0.7202121019363403, "learning_rate": 1.6845306369568382e-05, "loss": 1.2806, "mean_token_accuracy": 0.6844563235839208, "num_tokens": 907235353.0, "step": 5404 }, { "entropy": 1.7030098736286163, "epoch": 0.5937766059707231, "grad_norm": 0.6924136877059937, "learning_rate": 1.68440888107899e-05, "loss": 1.4624, "mean_token_accuracy": 0.6487593750158945, "num_tokens": 907394416.0, "step": 5405 }, { "entropy": 1.688938041528066, "epoch": 0.5938864628820961, "grad_norm": 0.7259036898612976, "learning_rate": 1.6842871067048633e-05, "loss": 1.5779, "mean_token_accuracy": 0.6511958241462708, "num_tokens": 907558715.0, "step": 5406 }, { "entropy": 1.723609745502472, "epoch": 0.593996319793469, "grad_norm": 0.6626251339912415, "learning_rate": 1.6841653138383137e-05, "loss": 1.4507, "mean_token_accuracy": 0.6560012847185135, "num_tokens": 907730055.0, "step": 5407 }, { "entropy": 1.7112099329630535, "epoch": 0.5941061767048419, "grad_norm": 0.6510260701179504, "learning_rate": 1.6840435024831944e-05, "loss": 1.556, "mean_token_accuracy": 0.6182466248671213, "num_tokens": 907988991.0, "step": 5408 }, { "entropy": 1.7646510402361553, "epoch": 0.5942160336162149, "grad_norm": 0.9172123670578003, "learning_rate": 1.6839216726433616e-05, "loss": 1.3719, "mean_token_accuracy": 0.6614715158939362, "num_tokens": 908184419.0, "step": 5409 }, { "entropy": 1.7178015510241191, "epoch": 0.5943258905275878, "grad_norm": 0.7179532647132874, "learning_rate": 1.6837998243226712e-05, "loss": 1.415, "mean_token_accuracy": 0.6340557535489401, "num_tokens": 908434034.0, "step": 5410 }, { "entropy": 1.7165958086649578, "epoch": 0.5944357474389608, "grad_norm": 0.6283994317054749, "learning_rate": 1.6836779575249796e-05, "loss": 1.3807, "mean_token_accuracy": 0.6633997658888499, "num_tokens": 908589399.0, "step": 5411 }, { "entropy": 1.7015175918738048, "epoch": 0.5945456043503337, "grad_norm": 0.6560877561569214, "learning_rate": 1.6835560722541434e-05, "loss": 1.3933, "mean_token_accuracy": 0.653649906317393, "num_tokens": 908770306.0, "step": 5412 }, { "entropy": 1.6911265850067139, "epoch": 0.5946554612617067, "grad_norm": 0.6705179214477539, "learning_rate": 1.6834341685140205e-05, "loss": 1.4012, "mean_token_accuracy": 0.6415108740329742, "num_tokens": 908992577.0, "step": 5413 }, { "entropy": 1.6993640164534252, "epoch": 0.5947653181730795, "grad_norm": 0.6580267548561096, "learning_rate": 1.683312246308469e-05, "loss": 1.4285, "mean_token_accuracy": 0.6524773985147476, "num_tokens": 909181844.0, "step": 5414 }, { "entropy": 1.730893741051356, "epoch": 0.5948751750844525, "grad_norm": 0.6987996697425842, "learning_rate": 1.6831903056413477e-05, "loss": 1.3224, "mean_token_accuracy": 0.6611655751864115, "num_tokens": 909304767.0, "step": 5415 }, { "entropy": 1.7225580215454102, "epoch": 0.5949850319958254, "grad_norm": 0.8656018376350403, "learning_rate": 1.683068346516516e-05, "loss": 1.3047, "mean_token_accuracy": 0.6596146573623022, "num_tokens": 909433583.0, "step": 5416 }, { "entropy": 1.7331815858681996, "epoch": 0.5950948889071984, "grad_norm": 0.6861293911933899, "learning_rate": 1.682946368937834e-05, "loss": 1.4714, "mean_token_accuracy": 0.6402883330980936, "num_tokens": 909643336.0, "step": 5417 }, { "entropy": 1.7125715414683025, "epoch": 0.5952047458185713, "grad_norm": 0.6575915813446045, "learning_rate": 1.6828243729091626e-05, "loss": 1.4151, "mean_token_accuracy": 0.66132952272892, "num_tokens": 909778117.0, "step": 5418 }, { "entropy": 1.657194048166275, "epoch": 0.5953146027299443, "grad_norm": 0.7357683777809143, "learning_rate": 1.6827023584343615e-05, "loss": 1.2943, "mean_token_accuracy": 0.6715851227442423, "num_tokens": 909929876.0, "step": 5419 }, { "entropy": 1.662133087714513, "epoch": 0.5954244596413172, "grad_norm": 0.6675532460212708, "learning_rate": 1.682580325517294e-05, "loss": 1.3832, "mean_token_accuracy": 0.6506559252738953, "num_tokens": 910095389.0, "step": 5420 }, { "entropy": 1.7440082132816315, "epoch": 0.5955343165526901, "grad_norm": 0.7201164960861206, "learning_rate": 1.6824582741618215e-05, "loss": 1.4109, "mean_token_accuracy": 0.6457555194695791, "num_tokens": 910265221.0, "step": 5421 }, { "entropy": 1.7047818501790364, "epoch": 0.5956441734640631, "grad_norm": 0.5827786922454834, "learning_rate": 1.682336204371807e-05, "loss": 1.4126, "mean_token_accuracy": 0.6369202633698782, "num_tokens": 910458062.0, "step": 5422 }, { "entropy": 1.6782648364702861, "epoch": 0.595754030375436, "grad_norm": 0.8056417107582092, "learning_rate": 1.682214116151114e-05, "loss": 1.591, "mean_token_accuracy": 0.6487656235694885, "num_tokens": 910712200.0, "step": 5423 }, { "entropy": 1.6988307734330494, "epoch": 0.595863887286809, "grad_norm": 0.561825692653656, "learning_rate": 1.6820920095036068e-05, "loss": 1.3425, "mean_token_accuracy": 0.6547723909219106, "num_tokens": 910893187.0, "step": 5424 }, { "entropy": 1.611896812915802, "epoch": 0.5959737441981818, "grad_norm": 0.633956789970398, "learning_rate": 1.6819698844331497e-05, "loss": 1.3578, "mean_token_accuracy": 0.667489156126976, "num_tokens": 911126313.0, "step": 5425 }, { "entropy": 1.6960194905598958, "epoch": 0.5960836011095548, "grad_norm": 0.7324186563491821, "learning_rate": 1.6818477409436078e-05, "loss": 1.4714, "mean_token_accuracy": 0.645858551065127, "num_tokens": 911338849.0, "step": 5426 }, { "entropy": 1.655552089214325, "epoch": 0.5961934580209277, "grad_norm": 0.7095927000045776, "learning_rate": 1.6817255790388472e-05, "loss": 1.4871, "mean_token_accuracy": 0.656248539686203, "num_tokens": 911516630.0, "step": 5427 }, { "entropy": 1.7007441520690918, "epoch": 0.5963033149323007, "grad_norm": 0.7084668874740601, "learning_rate": 1.6816033987227342e-05, "loss": 1.5267, "mean_token_accuracy": 0.6427704642216364, "num_tokens": 911699013.0, "step": 5428 }, { "entropy": 1.7174124121665955, "epoch": 0.5964131718436736, "grad_norm": 0.6124807596206665, "learning_rate": 1.6814811999991357e-05, "loss": 1.4031, "mean_token_accuracy": 0.6490861674149832, "num_tokens": 911857669.0, "step": 5429 }, { "entropy": 1.6959696312745411, "epoch": 0.5965230287550466, "grad_norm": 0.9495405554771423, "learning_rate": 1.6813589828719195e-05, "loss": 1.1984, "mean_token_accuracy": 0.678948904077212, "num_tokens": 911990431.0, "step": 5430 }, { "entropy": 1.7092759013175964, "epoch": 0.5966328856664195, "grad_norm": 0.6147773861885071, "learning_rate": 1.6812367473449528e-05, "loss": 1.396, "mean_token_accuracy": 0.6636812587579092, "num_tokens": 912200494.0, "step": 5431 }, { "entropy": 1.709659606218338, "epoch": 0.5967427425777925, "grad_norm": 0.748707115650177, "learning_rate": 1.6811144934221057e-05, "loss": 1.2594, "mean_token_accuracy": 0.668513630827268, "num_tokens": 912323384.0, "step": 5432 }, { "entropy": 1.6889064808686574, "epoch": 0.5968525994891654, "grad_norm": 0.65199875831604, "learning_rate": 1.6809922211072462e-05, "loss": 1.3911, "mean_token_accuracy": 0.659337967634201, "num_tokens": 912475513.0, "step": 5433 }, { "entropy": 1.6920311848322551, "epoch": 0.5969624564005382, "grad_norm": 0.6664896607398987, "learning_rate": 1.680869930404245e-05, "loss": 1.5205, "mean_token_accuracy": 0.648727094133695, "num_tokens": 912661477.0, "step": 5434 }, { "entropy": 1.7712519864241283, "epoch": 0.5970723133119112, "grad_norm": 0.6912421584129333, "learning_rate": 1.680747621316972e-05, "loss": 1.3873, "mean_token_accuracy": 0.6598270038763682, "num_tokens": 912808322.0, "step": 5435 }, { "entropy": 1.6839702626069386, "epoch": 0.5971821702232841, "grad_norm": 0.6543471813201904, "learning_rate": 1.680625293849299e-05, "loss": 1.3244, "mean_token_accuracy": 0.6574779450893402, "num_tokens": 912985667.0, "step": 5436 }, { "entropy": 1.7119268377621968, "epoch": 0.5972920271346571, "grad_norm": 0.6256197690963745, "learning_rate": 1.6805029480050965e-05, "loss": 1.3939, "mean_token_accuracy": 0.6478220820426941, "num_tokens": 913142770.0, "step": 5437 }, { "entropy": 1.6727095246315002, "epoch": 0.59740188404603, "grad_norm": 0.6122837662696838, "learning_rate": 1.6803805837882373e-05, "loss": 1.3201, "mean_token_accuracy": 0.6651313950618108, "num_tokens": 913284604.0, "step": 5438 }, { "entropy": 1.6385413606961567, "epoch": 0.597511740957403, "grad_norm": 0.7011650800704956, "learning_rate": 1.6802582012025948e-05, "loss": 1.2644, "mean_token_accuracy": 0.6758585671583811, "num_tokens": 913418447.0, "step": 5439 }, { "entropy": 1.7271398703257244, "epoch": 0.5976215978687759, "grad_norm": 0.8002787232398987, "learning_rate": 1.680135800252041e-05, "loss": 1.475, "mean_token_accuracy": 0.6593929280837377, "num_tokens": 913595488.0, "step": 5440 }, { "entropy": 1.7234635253747304, "epoch": 0.5977314547801489, "grad_norm": 0.6236562728881836, "learning_rate": 1.6800133809404507e-05, "loss": 1.621, "mean_token_accuracy": 0.6271956562995911, "num_tokens": 913811760.0, "step": 5441 }, { "entropy": 1.663605233033498, "epoch": 0.5978413116915218, "grad_norm": 0.686917781829834, "learning_rate": 1.6798909432716987e-05, "loss": 1.3732, "mean_token_accuracy": 0.659597580631574, "num_tokens": 913955134.0, "step": 5442 }, { "entropy": 1.6567329565684001, "epoch": 0.5979511686028948, "grad_norm": 0.8035587668418884, "learning_rate": 1.679768487249659e-05, "loss": 1.4351, "mean_token_accuracy": 0.6736761331558228, "num_tokens": 914122511.0, "step": 5443 }, { "entropy": 1.7484365304311116, "epoch": 0.5980610255142677, "grad_norm": 0.6443024277687073, "learning_rate": 1.6796460128782084e-05, "loss": 1.6444, "mean_token_accuracy": 0.618500699599584, "num_tokens": 914346097.0, "step": 5444 }, { "entropy": 1.687730719645818, "epoch": 0.5981708824256406, "grad_norm": 0.6121844053268433, "learning_rate": 1.6795235201612226e-05, "loss": 1.4166, "mean_token_accuracy": 0.6520683070023855, "num_tokens": 914609850.0, "step": 5445 }, { "entropy": 1.6414126654465993, "epoch": 0.5982807393370135, "grad_norm": 0.7661187052726746, "learning_rate": 1.6794010091025785e-05, "loss": 1.206, "mean_token_accuracy": 0.6839122573534647, "num_tokens": 914733707.0, "step": 5446 }, { "entropy": 1.6634565393129985, "epoch": 0.5983905962483865, "grad_norm": 0.6488881707191467, "learning_rate": 1.6792784797061533e-05, "loss": 1.5076, "mean_token_accuracy": 0.641075556476911, "num_tokens": 914935285.0, "step": 5447 }, { "entropy": 1.713909884293874, "epoch": 0.5985004531597594, "grad_norm": 0.690524697303772, "learning_rate": 1.6791559319758256e-05, "loss": 1.3711, "mean_token_accuracy": 0.658517986536026, "num_tokens": 915077128.0, "step": 5448 }, { "entropy": 1.6755700409412384, "epoch": 0.5986103100711323, "grad_norm": 0.5590389370918274, "learning_rate": 1.6790333659154735e-05, "loss": 0.9613, "mean_token_accuracy": 0.689162497719129, "num_tokens": 915211004.0, "step": 5449 }, { "entropy": 1.7446764012177784, "epoch": 0.5987201669825053, "grad_norm": 0.6876842975616455, "learning_rate": 1.678910781528976e-05, "loss": 1.3854, "mean_token_accuracy": 0.6610653201738993, "num_tokens": 915326622.0, "step": 5450 }, { "entropy": 1.690832147995631, "epoch": 0.5988300238938782, "grad_norm": 0.5802832841873169, "learning_rate": 1.6787881788202135e-05, "loss": 1.4026, "mean_token_accuracy": 0.6581792682409286, "num_tokens": 915547066.0, "step": 5451 }, { "entropy": 1.728159526983897, "epoch": 0.5989398808052512, "grad_norm": 0.7079800367355347, "learning_rate": 1.6786655577930658e-05, "loss": 1.5643, "mean_token_accuracy": 0.6469894895950953, "num_tokens": 915692436.0, "step": 5452 }, { "entropy": 1.6673387587070465, "epoch": 0.5990497377166241, "grad_norm": 0.6151366829872131, "learning_rate": 1.678542918451414e-05, "loss": 1.4437, "mean_token_accuracy": 0.6431677391131719, "num_tokens": 915919308.0, "step": 5453 }, { "entropy": 1.7032847106456757, "epoch": 0.5991595946279971, "grad_norm": 0.7041369676589966, "learning_rate": 1.6784202607991396e-05, "loss": 1.4604, "mean_token_accuracy": 0.6400700211524963, "num_tokens": 916121360.0, "step": 5454 }, { "entropy": 1.759365479151408, "epoch": 0.59926945153937, "grad_norm": 0.745280385017395, "learning_rate": 1.6782975848401244e-05, "loss": 1.3423, "mean_token_accuracy": 0.6642535577217737, "num_tokens": 916264675.0, "step": 5455 }, { "entropy": 1.792235126097997, "epoch": 0.5993793084507429, "grad_norm": 0.69027179479599, "learning_rate": 1.6781748905782512e-05, "loss": 1.5797, "mean_token_accuracy": 0.6236337820688883, "num_tokens": 916506247.0, "step": 5456 }, { "entropy": 1.687427928050359, "epoch": 0.5994891653621158, "grad_norm": 0.7181799411773682, "learning_rate": 1.6780521780174032e-05, "loss": 1.4468, "mean_token_accuracy": 0.6493152479330698, "num_tokens": 916654838.0, "step": 5457 }, { "entropy": 1.7065490186214447, "epoch": 0.5995990222734888, "grad_norm": 0.7162134051322937, "learning_rate": 1.6779294471614647e-05, "loss": 1.322, "mean_token_accuracy": 0.662808025876681, "num_tokens": 916801918.0, "step": 5458 }, { "entropy": 1.6874915262063344, "epoch": 0.5997088791848617, "grad_norm": 0.6525692343711853, "learning_rate": 1.6778066980143194e-05, "loss": 1.3566, "mean_token_accuracy": 0.6498881727457047, "num_tokens": 916998423.0, "step": 5459 }, { "entropy": 1.747247964143753, "epoch": 0.5998187360962347, "grad_norm": 0.6652222275733948, "learning_rate": 1.6776839305798523e-05, "loss": 1.5106, "mean_token_accuracy": 0.6408848663171133, "num_tokens": 917183535.0, "step": 5460 }, { "entropy": 1.6877395709355671, "epoch": 0.5999285930076076, "grad_norm": 0.6594512462615967, "learning_rate": 1.6775611448619494e-05, "loss": 1.4626, "mean_token_accuracy": 0.6533342649539312, "num_tokens": 917332369.0, "step": 5461 }, { "entropy": 1.6684443255265553, "epoch": 0.6000384499189805, "grad_norm": 0.5910911560058594, "learning_rate": 1.6774383408644957e-05, "loss": 1.402, "mean_token_accuracy": 0.664065291484197, "num_tokens": 917501787.0, "step": 5462 }, { "entropy": 1.7669705549875896, "epoch": 0.6001483068303535, "grad_norm": 0.6818894743919373, "learning_rate": 1.6773155185913795e-05, "loss": 1.5071, "mean_token_accuracy": 0.6319067428509394, "num_tokens": 917694774.0, "step": 5463 }, { "entropy": 1.640412410100301, "epoch": 0.6002581637417264, "grad_norm": 0.6774711012840271, "learning_rate": 1.6771926780464874e-05, "loss": 1.3312, "mean_token_accuracy": 0.6684557646512985, "num_tokens": 917895672.0, "step": 5464 }, { "entropy": 1.6497747401396434, "epoch": 0.6003680206530994, "grad_norm": 0.6946020722389221, "learning_rate": 1.677069819233707e-05, "loss": 1.4321, "mean_token_accuracy": 0.6589450190464655, "num_tokens": 918106673.0, "step": 5465 }, { "entropy": 1.751279056072235, "epoch": 0.6004778775644722, "grad_norm": 0.7799018025398254, "learning_rate": 1.6769469421569265e-05, "loss": 1.3527, "mean_token_accuracy": 0.6531901061534882, "num_tokens": 918293869.0, "step": 5466 }, { "entropy": 1.7017297645409901, "epoch": 0.6005877344758452, "grad_norm": 0.7740174531936646, "learning_rate": 1.6768240468200354e-05, "loss": 1.3378, "mean_token_accuracy": 0.6581835597753525, "num_tokens": 918457119.0, "step": 5467 }, { "entropy": 1.6877376039822896, "epoch": 0.6006975913872181, "grad_norm": 0.6579194664955139, "learning_rate": 1.6767011332269233e-05, "loss": 1.4362, "mean_token_accuracy": 0.6469058791796366, "num_tokens": 918644483.0, "step": 5468 }, { "entropy": 1.7218892375628154, "epoch": 0.6008074482985911, "grad_norm": 0.63138747215271, "learning_rate": 1.67657820138148e-05, "loss": 1.4232, "mean_token_accuracy": 0.6666744997104009, "num_tokens": 918799975.0, "step": 5469 }, { "entropy": 1.7323795755704243, "epoch": 0.600917305209964, "grad_norm": 0.7608441710472107, "learning_rate": 1.6764552512875967e-05, "loss": 1.4874, "mean_token_accuracy": 0.6452402472496033, "num_tokens": 918920586.0, "step": 5470 }, { "entropy": 1.6932736833890278, "epoch": 0.601027162121337, "grad_norm": 0.6323309540748596, "learning_rate": 1.6763322829491643e-05, "loss": 1.4062, "mean_token_accuracy": 0.6532955567042033, "num_tokens": 919121611.0, "step": 5471 }, { "entropy": 1.6725764473279316, "epoch": 0.6011370190327099, "grad_norm": 0.8088108897209167, "learning_rate": 1.6762092963700746e-05, "loss": 1.2678, "mean_token_accuracy": 0.6674557526906332, "num_tokens": 919245911.0, "step": 5472 }, { "entropy": 1.758307288090388, "epoch": 0.6012468759440829, "grad_norm": 0.7137458324432373, "learning_rate": 1.676086291554221e-05, "loss": 1.3491, "mean_token_accuracy": 0.6584253559509913, "num_tokens": 919384474.0, "step": 5473 }, { "entropy": 1.6837130685647328, "epoch": 0.6013567328554558, "grad_norm": 1.0152652263641357, "learning_rate": 1.675963268505495e-05, "loss": 1.3494, "mean_token_accuracy": 0.6644057482481003, "num_tokens": 919528252.0, "step": 5474 }, { "entropy": 1.654979646205902, "epoch": 0.6014665897668287, "grad_norm": 0.7153851985931396, "learning_rate": 1.6758402272277915e-05, "loss": 1.4471, "mean_token_accuracy": 0.6493087609608968, "num_tokens": 919760402.0, "step": 5475 }, { "entropy": 1.7279058396816254, "epoch": 0.6015764466782016, "grad_norm": 0.6236484050750732, "learning_rate": 1.6757171677250045e-05, "loss": 1.4912, "mean_token_accuracy": 0.6396552622318268, "num_tokens": 919969841.0, "step": 5476 }, { "entropy": 1.710862507422765, "epoch": 0.6016863035895745, "grad_norm": 0.7031605839729309, "learning_rate": 1.675594090001028e-05, "loss": 1.3442, "mean_token_accuracy": 0.6595994979143143, "num_tokens": 920101743.0, "step": 5477 }, { "entropy": 1.682221661011378, "epoch": 0.6017961605009475, "grad_norm": 0.6580245494842529, "learning_rate": 1.6754709940597584e-05, "loss": 1.3735, "mean_token_accuracy": 0.6630414674679438, "num_tokens": 920264770.0, "step": 5478 }, { "entropy": 1.731015940507253, "epoch": 0.6019060174123204, "grad_norm": 0.6210698485374451, "learning_rate": 1.675347879905091e-05, "loss": 1.3219, "mean_token_accuracy": 0.6625956445932388, "num_tokens": 920391189.0, "step": 5479 }, { "entropy": 1.682970017194748, "epoch": 0.6020158743236934, "grad_norm": 0.7015461325645447, "learning_rate": 1.6752247475409226e-05, "loss": 1.6362, "mean_token_accuracy": 0.6371884370843569, "num_tokens": 920593115.0, "step": 5480 }, { "entropy": 1.6944019198417664, "epoch": 0.6021257312350663, "grad_norm": 0.6100954413414001, "learning_rate": 1.67510159697115e-05, "loss": 1.3737, "mean_token_accuracy": 0.6664390613635381, "num_tokens": 920749916.0, "step": 5481 }, { "entropy": 1.7003353436787922, "epoch": 0.6022355881464393, "grad_norm": 0.7275522351264954, "learning_rate": 1.6749784281996712e-05, "loss": 1.414, "mean_token_accuracy": 0.6477395196755728, "num_tokens": 920929325.0, "step": 5482 }, { "entropy": 1.687132587035497, "epoch": 0.6023454450578122, "grad_norm": 0.6985853910446167, "learning_rate": 1.674855241230384e-05, "loss": 1.3656, "mean_token_accuracy": 0.6509382625420889, "num_tokens": 921084679.0, "step": 5483 }, { "entropy": 1.6675028403600056, "epoch": 0.6024553019691852, "grad_norm": 0.722407877445221, "learning_rate": 1.6747320360671873e-05, "loss": 1.2328, "mean_token_accuracy": 0.6737691164016724, "num_tokens": 921194486.0, "step": 5484 }, { "entropy": 1.712370087703069, "epoch": 0.6025651588805581, "grad_norm": 0.7098826169967651, "learning_rate": 1.674608812713981e-05, "loss": 1.3426, "mean_token_accuracy": 0.6622453580300013, "num_tokens": 921331468.0, "step": 5485 }, { "entropy": 1.7794211904207866, "epoch": 0.6026750157919311, "grad_norm": 0.8509989380836487, "learning_rate": 1.6744855711746647e-05, "loss": 1.4642, "mean_token_accuracy": 0.6489201337099075, "num_tokens": 921468742.0, "step": 5486 }, { "entropy": 1.7174873848756154, "epoch": 0.6027848727033039, "grad_norm": 0.6344740986824036, "learning_rate": 1.674362311453139e-05, "loss": 1.3383, "mean_token_accuracy": 0.662666474779447, "num_tokens": 921629136.0, "step": 5487 }, { "entropy": 1.7565912504990895, "epoch": 0.6028947296146769, "grad_norm": 0.6211560964584351, "learning_rate": 1.6742390335533044e-05, "loss": 1.59, "mean_token_accuracy": 0.6265095522006353, "num_tokens": 921833335.0, "step": 5488 }, { "entropy": 1.6729480028152466, "epoch": 0.6030045865260498, "grad_norm": 0.6941152215003967, "learning_rate": 1.6741157374790636e-05, "loss": 1.3861, "mean_token_accuracy": 0.6670657147963842, "num_tokens": 922007520.0, "step": 5489 }, { "entropy": 1.7290611962477367, "epoch": 0.6031144434374227, "grad_norm": 0.6478551030158997, "learning_rate": 1.673992423234318e-05, "loss": 1.4021, "mean_token_accuracy": 0.6645165433486303, "num_tokens": 922188846.0, "step": 5490 }, { "entropy": 1.6348287463188171, "epoch": 0.6032243003487957, "grad_norm": 0.5874102115631104, "learning_rate": 1.6738690908229714e-05, "loss": 1.4658, "mean_token_accuracy": 0.645725334684054, "num_tokens": 922380313.0, "step": 5491 }, { "entropy": 1.7524794340133667, "epoch": 0.6033341572601686, "grad_norm": 0.8978797793388367, "learning_rate": 1.6737457402489266e-05, "loss": 1.53, "mean_token_accuracy": 0.6540175626675288, "num_tokens": 922512596.0, "step": 5492 }, { "entropy": 1.6377544303735097, "epoch": 0.6034440141715416, "grad_norm": 0.7050206661224365, "learning_rate": 1.673622371516087e-05, "loss": 1.4592, "mean_token_accuracy": 0.6444994062185287, "num_tokens": 922750463.0, "step": 5493 }, { "entropy": 1.6732242107391357, "epoch": 0.6035538710829145, "grad_norm": 0.738034188747406, "learning_rate": 1.673498984628359e-05, "loss": 1.3387, "mean_token_accuracy": 0.66953673462073, "num_tokens": 922907666.0, "step": 5494 }, { "entropy": 1.6891792019208272, "epoch": 0.6036637279942875, "grad_norm": 0.6616693735122681, "learning_rate": 1.673375579589646e-05, "loss": 1.376, "mean_token_accuracy": 0.6620072374741236, "num_tokens": 923087081.0, "step": 5495 }, { "entropy": 1.740149160226186, "epoch": 0.6037735849056604, "grad_norm": 0.7156772613525391, "learning_rate": 1.673252156403854e-05, "loss": 1.2699, "mean_token_accuracy": 0.6738031804561615, "num_tokens": 923216649.0, "step": 5496 }, { "entropy": 1.640553206205368, "epoch": 0.6038834418170334, "grad_norm": 0.5988604426383972, "learning_rate": 1.6731287150748894e-05, "loss": 1.3356, "mean_token_accuracy": 0.6715440601110458, "num_tokens": 923402586.0, "step": 5497 }, { "entropy": 1.646197388569514, "epoch": 0.6039932987284062, "grad_norm": 0.6301091313362122, "learning_rate": 1.67300525560666e-05, "loss": 1.4681, "mean_token_accuracy": 0.6478618681430817, "num_tokens": 923608007.0, "step": 5498 }, { "entropy": 1.7597610652446747, "epoch": 0.6041031556397792, "grad_norm": 0.6319087147712708, "learning_rate": 1.6728817780030718e-05, "loss": 1.4828, "mean_token_accuracy": 0.6508717288573583, "num_tokens": 923815695.0, "step": 5499 }, { "entropy": 1.7612847288449605, "epoch": 0.6042130125511521, "grad_norm": 0.7959107160568237, "learning_rate": 1.6727582822680336e-05, "loss": 1.5721, "mean_token_accuracy": 0.6293148795763651, "num_tokens": 923990182.0, "step": 5500 }, { "entropy": 1.705323855082194, "epoch": 0.6043228694625251, "grad_norm": 0.6401039361953735, "learning_rate": 1.672634768405454e-05, "loss": 1.5592, "mean_token_accuracy": 0.6398047258456548, "num_tokens": 924204209.0, "step": 5501 }, { "entropy": 1.726824293533961, "epoch": 0.604432726373898, "grad_norm": 0.7112311720848083, "learning_rate": 1.672511236419242e-05, "loss": 1.3496, "mean_token_accuracy": 0.6554846813281378, "num_tokens": 924376901.0, "step": 5502 }, { "entropy": 1.6738229095935822, "epoch": 0.6045425832852709, "grad_norm": 0.6750335693359375, "learning_rate": 1.672387686313307e-05, "loss": 1.4294, "mean_token_accuracy": 0.6564183880885442, "num_tokens": 924580944.0, "step": 5503 }, { "entropy": 1.7278579473495483, "epoch": 0.6046524401966439, "grad_norm": 0.7934318780899048, "learning_rate": 1.6722641180915602e-05, "loss": 1.3263, "mean_token_accuracy": 0.6604352543751398, "num_tokens": 924708411.0, "step": 5504 }, { "entropy": 1.7267205317815144, "epoch": 0.6047622971080168, "grad_norm": 0.636107325553894, "learning_rate": 1.6721405317579116e-05, "loss": 1.3168, "mean_token_accuracy": 0.6644595215717951, "num_tokens": 924842526.0, "step": 5505 }, { "entropy": 1.719021886587143, "epoch": 0.6048721540193898, "grad_norm": 0.7312912940979004, "learning_rate": 1.6720169273162733e-05, "loss": 1.1689, "mean_token_accuracy": 0.6934764335552851, "num_tokens": 924950234.0, "step": 5506 }, { "entropy": 1.6559423406918843, "epoch": 0.6049820109307626, "grad_norm": 0.6602075099945068, "learning_rate": 1.6718933047705567e-05, "loss": 1.3758, "mean_token_accuracy": 0.6585381329059601, "num_tokens": 925108872.0, "step": 5507 }, { "entropy": 1.6688361366589863, "epoch": 0.6050918678421356, "grad_norm": 0.7089828848838806, "learning_rate": 1.6717696641246747e-05, "loss": 1.4099, "mean_token_accuracy": 0.6570945431788763, "num_tokens": 925316481.0, "step": 5508 }, { "entropy": 1.7144095798333485, "epoch": 0.6052017247535085, "grad_norm": 0.6471745371818542, "learning_rate": 1.6716460053825405e-05, "loss": 1.4705, "mean_token_accuracy": 0.6423929333686829, "num_tokens": 925487174.0, "step": 5509 }, { "entropy": 1.6315881411234539, "epoch": 0.6053115816648815, "grad_norm": 0.6560130715370178, "learning_rate": 1.671522328548068e-05, "loss": 1.2288, "mean_token_accuracy": 0.6750545849402746, "num_tokens": 925663600.0, "step": 5510 }, { "entropy": 1.6976039409637451, "epoch": 0.6054214385762544, "grad_norm": 0.6420303583145142, "learning_rate": 1.6713986336251712e-05, "loss": 1.3936, "mean_token_accuracy": 0.6477069954077402, "num_tokens": 925858263.0, "step": 5511 }, { "entropy": 1.6715827286243439, "epoch": 0.6055312954876274, "grad_norm": 0.6958088874816895, "learning_rate": 1.671274920617765e-05, "loss": 1.3601, "mean_token_accuracy": 0.6639330287774404, "num_tokens": 925989227.0, "step": 5512 }, { "entropy": 1.7799000144004822, "epoch": 0.6056411523990003, "grad_norm": 0.7925567030906677, "learning_rate": 1.671151189529765e-05, "loss": 1.4278, "mean_token_accuracy": 0.6552622616291046, "num_tokens": 926130198.0, "step": 5513 }, { "entropy": 1.7007222870985668, "epoch": 0.6057510093103733, "grad_norm": 0.7391920685768127, "learning_rate": 1.6710274403650878e-05, "loss": 1.3117, "mean_token_accuracy": 0.6625027805566788, "num_tokens": 926257462.0, "step": 5514 }, { "entropy": 1.7189228733380635, "epoch": 0.6058608662217462, "grad_norm": 0.6808819770812988, "learning_rate": 1.6709036731276487e-05, "loss": 1.5554, "mean_token_accuracy": 0.6345362067222595, "num_tokens": 926453546.0, "step": 5515 }, { "entropy": 1.7309414744377136, "epoch": 0.6059707231331191, "grad_norm": 0.7075436115264893, "learning_rate": 1.670779887821366e-05, "loss": 1.5652, "mean_token_accuracy": 0.6544856876134872, "num_tokens": 926613311.0, "step": 5516 }, { "entropy": 1.7191075285275776, "epoch": 0.6060805800444921, "grad_norm": 0.6736297011375427, "learning_rate": 1.670656084450157e-05, "loss": 1.4221, "mean_token_accuracy": 0.6376722504695257, "num_tokens": 926779157.0, "step": 5517 }, { "entropy": 1.776821494102478, "epoch": 0.6061904369558649, "grad_norm": 0.7885594964027405, "learning_rate": 1.6705322630179398e-05, "loss": 1.3079, "mean_token_accuracy": 0.6540821691354116, "num_tokens": 926900181.0, "step": 5518 }, { "entropy": 1.6601528028647106, "epoch": 0.6063002938672379, "grad_norm": 1.221449613571167, "learning_rate": 1.6704084235286336e-05, "loss": 1.4191, "mean_token_accuracy": 0.6419506842891375, "num_tokens": 927094727.0, "step": 5519 }, { "entropy": 1.6796456972757976, "epoch": 0.6064101507786108, "grad_norm": 0.695866048336029, "learning_rate": 1.6702845659861585e-05, "loss": 1.4795, "mean_token_accuracy": 0.664507215221723, "num_tokens": 927270945.0, "step": 5520 }, { "entropy": 1.752757598956426, "epoch": 0.6065200076899838, "grad_norm": 0.6824564337730408, "learning_rate": 1.6701606903944328e-05, "loss": 1.4799, "mean_token_accuracy": 0.6392989705006281, "num_tokens": 927413568.0, "step": 5521 }, { "entropy": 1.7567788263161976, "epoch": 0.6066298646013567, "grad_norm": 0.6957651376724243, "learning_rate": 1.6700367967573786e-05, "loss": 1.4415, "mean_token_accuracy": 0.6470659871896108, "num_tokens": 927553465.0, "step": 5522 }, { "entropy": 1.7466229895750682, "epoch": 0.6067397215127297, "grad_norm": 0.6938997507095337, "learning_rate": 1.669912885078917e-05, "loss": 1.3197, "mean_token_accuracy": 0.6793912301460902, "num_tokens": 927687114.0, "step": 5523 }, { "entropy": 1.7211142977078755, "epoch": 0.6068495784241026, "grad_norm": 0.7023671865463257, "learning_rate": 1.669788955362969e-05, "loss": 1.4972, "mean_token_accuracy": 0.6480685224135717, "num_tokens": 927872297.0, "step": 5524 }, { "entropy": 1.7373531758785248, "epoch": 0.6069594353354756, "grad_norm": 0.7833293080329895, "learning_rate": 1.6696650076134576e-05, "loss": 1.449, "mean_token_accuracy": 0.652032271027565, "num_tokens": 928010124.0, "step": 5525 }, { "entropy": 1.6578099131584167, "epoch": 0.6070692922468485, "grad_norm": 0.6297708749771118, "learning_rate": 1.6695410418343054e-05, "loss": 1.283, "mean_token_accuracy": 0.6739129473765691, "num_tokens": 928139059.0, "step": 5526 }, { "entropy": 1.7742358942826588, "epoch": 0.6071791491582215, "grad_norm": 0.6884430646896362, "learning_rate": 1.6694170580294356e-05, "loss": 1.4999, "mean_token_accuracy": 0.622523158788681, "num_tokens": 928385655.0, "step": 5527 }, { "entropy": 1.7077501217524211, "epoch": 0.6072890060695944, "grad_norm": 0.7137337923049927, "learning_rate": 1.6692930562027725e-05, "loss": 1.4385, "mean_token_accuracy": 0.6593621472517649, "num_tokens": 928531980.0, "step": 5528 }, { "entropy": 1.7548049290974934, "epoch": 0.6073988629809672, "grad_norm": 0.5949137806892395, "learning_rate": 1.6691690363582412e-05, "loss": 1.3751, "mean_token_accuracy": 0.6646410326162974, "num_tokens": 928686528.0, "step": 5529 }, { "entropy": 1.7064041197299957, "epoch": 0.6075087198923402, "grad_norm": 0.660309910774231, "learning_rate": 1.669044998499766e-05, "loss": 1.4325, "mean_token_accuracy": 0.6318048238754272, "num_tokens": 928910481.0, "step": 5530 }, { "entropy": 1.6907892227172852, "epoch": 0.6076185768037131, "grad_norm": 0.6489412784576416, "learning_rate": 1.668920942631273e-05, "loss": 1.4504, "mean_token_accuracy": 0.6479069888591766, "num_tokens": 929134057.0, "step": 5531 }, { "entropy": 1.7279614110787709, "epoch": 0.6077284337150861, "grad_norm": 0.6888580322265625, "learning_rate": 1.6687968687566885e-05, "loss": 1.4058, "mean_token_accuracy": 0.6593583077192307, "num_tokens": 929299191.0, "step": 5532 }, { "entropy": 1.7116707563400269, "epoch": 0.607838290626459, "grad_norm": 0.5436456203460693, "learning_rate": 1.6686727768799393e-05, "loss": 1.2332, "mean_token_accuracy": 0.684593141078949, "num_tokens": 929531505.0, "step": 5533 }, { "entropy": 1.675088753302892, "epoch": 0.607948147537832, "grad_norm": 0.6057883501052856, "learning_rate": 1.6685486670049533e-05, "loss": 1.4831, "mean_token_accuracy": 0.6440512239933014, "num_tokens": 929694373.0, "step": 5534 }, { "entropy": 1.7029797037442524, "epoch": 0.6080580044492049, "grad_norm": 0.695314347743988, "learning_rate": 1.668424539135658e-05, "loss": 1.4127, "mean_token_accuracy": 0.6551998356978098, "num_tokens": 929877449.0, "step": 5535 }, { "entropy": 1.6930132706960042, "epoch": 0.6081678613605779, "grad_norm": 0.718914806842804, "learning_rate": 1.668300393275982e-05, "loss": 1.3504, "mean_token_accuracy": 0.6599302838246027, "num_tokens": 930056568.0, "step": 5536 }, { "entropy": 1.699955701828003, "epoch": 0.6082777182719508, "grad_norm": 0.8041271567344666, "learning_rate": 1.6681762294298548e-05, "loss": 1.3738, "mean_token_accuracy": 0.6663634926080704, "num_tokens": 930193126.0, "step": 5537 }, { "entropy": 1.6799223522345226, "epoch": 0.6083875751833238, "grad_norm": 0.6648184061050415, "learning_rate": 1.6680520476012064e-05, "loss": 1.5068, "mean_token_accuracy": 0.6448338876167933, "num_tokens": 930414595.0, "step": 5538 }, { "entropy": 1.7127573291460674, "epoch": 0.6084974320946966, "grad_norm": 0.7391481399536133, "learning_rate": 1.667927847793966e-05, "loss": 1.3562, "mean_token_accuracy": 0.6645816365877787, "num_tokens": 930543870.0, "step": 5539 }, { "entropy": 1.6624590853850048, "epoch": 0.6086072890060696, "grad_norm": 0.6188486814498901, "learning_rate": 1.6678036300120653e-05, "loss": 1.37, "mean_token_accuracy": 0.6666365414857864, "num_tokens": 930747185.0, "step": 5540 }, { "entropy": 1.7099547684192657, "epoch": 0.6087171459174425, "grad_norm": 0.5991356372833252, "learning_rate": 1.6676793942594357e-05, "loss": 1.3754, "mean_token_accuracy": 0.6520405461390814, "num_tokens": 930921087.0, "step": 5541 }, { "entropy": 1.6891375382741292, "epoch": 0.6088270028288155, "grad_norm": 0.8991249799728394, "learning_rate": 1.667555140540009e-05, "loss": 1.3934, "mean_token_accuracy": 0.6690708696842194, "num_tokens": 931109978.0, "step": 5542 }, { "entropy": 1.7334303557872772, "epoch": 0.6089368597401884, "grad_norm": 0.694080114364624, "learning_rate": 1.667430868857718e-05, "loss": 1.6048, "mean_token_accuracy": 0.6428815325101217, "num_tokens": 931323499.0, "step": 5543 }, { "entropy": 1.7433924575646718, "epoch": 0.6090467166515613, "grad_norm": 0.7323341965675354, "learning_rate": 1.6673065792164954e-05, "loss": 1.3905, "mean_token_accuracy": 0.6570341090361277, "num_tokens": 931470697.0, "step": 5544 }, { "entropy": 1.7255782683690388, "epoch": 0.6091565735629343, "grad_norm": 0.6894171237945557, "learning_rate": 1.6671822716202754e-05, "loss": 1.236, "mean_token_accuracy": 0.6878236333529154, "num_tokens": 931601393.0, "step": 5545 }, { "entropy": 1.706501563390096, "epoch": 0.6092664304743072, "grad_norm": 0.7516751289367676, "learning_rate": 1.667057946072992e-05, "loss": 1.3684, "mean_token_accuracy": 0.6665105720361074, "num_tokens": 931776426.0, "step": 5546 }, { "entropy": 1.7160039345423381, "epoch": 0.6093762873856802, "grad_norm": 1.0031617879867554, "learning_rate": 1.6669336025785802e-05, "loss": 1.3054, "mean_token_accuracy": 0.6578457355499268, "num_tokens": 931890991.0, "step": 5547 }, { "entropy": 1.6809919476509094, "epoch": 0.6094861442970531, "grad_norm": 0.5633423924446106, "learning_rate": 1.6668092411409752e-05, "loss": 1.3895, "mean_token_accuracy": 0.6533411145210266, "num_tokens": 932072346.0, "step": 5548 }, { "entropy": 1.6685432493686676, "epoch": 0.609596001208426, "grad_norm": 0.7651619911193848, "learning_rate": 1.6666848617641134e-05, "loss": 1.3651, "mean_token_accuracy": 0.6608080416917801, "num_tokens": 932234097.0, "step": 5549 }, { "entropy": 1.6282340387503307, "epoch": 0.6097058581197989, "grad_norm": 0.5770187377929688, "learning_rate": 1.666560464451931e-05, "loss": 1.3481, "mean_token_accuracy": 0.6698790689309438, "num_tokens": 932407066.0, "step": 5550 }, { "entropy": 1.7054801086584728, "epoch": 0.6098157150311719, "grad_norm": 0.7385131120681763, "learning_rate": 1.666436049208365e-05, "loss": 1.276, "mean_token_accuracy": 0.668360099196434, "num_tokens": 932523952.0, "step": 5551 }, { "entropy": 1.743846187988917, "epoch": 0.6099255719425448, "grad_norm": 0.7189563512802124, "learning_rate": 1.6663116160373532e-05, "loss": 1.4179, "mean_token_accuracy": 0.6598916351795197, "num_tokens": 932686362.0, "step": 5552 }, { "entropy": 1.7578631341457367, "epoch": 0.6100354288539178, "grad_norm": 0.7211261987686157, "learning_rate": 1.6661871649428344e-05, "loss": 1.2996, "mean_token_accuracy": 0.6601702322562536, "num_tokens": 932795312.0, "step": 5553 }, { "entropy": 1.7131268680095673, "epoch": 0.6101452857652907, "grad_norm": 0.6948407888412476, "learning_rate": 1.6660626959287468e-05, "loss": 1.4821, "mean_token_accuracy": 0.6617609262466431, "num_tokens": 932979185.0, "step": 5554 }, { "entropy": 1.7209480007489522, "epoch": 0.6102551426766637, "grad_norm": 0.6682031750679016, "learning_rate": 1.66593820899903e-05, "loss": 1.3365, "mean_token_accuracy": 0.6623590737581253, "num_tokens": 933132863.0, "step": 5555 }, { "entropy": 1.7183450758457184, "epoch": 0.6103649995880366, "grad_norm": 0.8200631737709045, "learning_rate": 1.6658137041576236e-05, "loss": 1.2704, "mean_token_accuracy": 0.6729947527249655, "num_tokens": 933246415.0, "step": 5556 }, { "entropy": 1.7393971184889476, "epoch": 0.6104748564994095, "grad_norm": 0.6439294815063477, "learning_rate": 1.6656891814084685e-05, "loss": 1.3755, "mean_token_accuracy": 0.6616205821434656, "num_tokens": 933432973.0, "step": 5557 }, { "entropy": 1.6542038420836132, "epoch": 0.6105847134107825, "grad_norm": 0.631920337677002, "learning_rate": 1.665564640755506e-05, "loss": 1.3404, "mean_token_accuracy": 0.6656839350859324, "num_tokens": 933603655.0, "step": 5558 }, { "entropy": 1.7492648462454479, "epoch": 0.6106945703221554, "grad_norm": 0.79639732837677, "learning_rate": 1.6654400822026774e-05, "loss": 1.3435, "mean_token_accuracy": 0.656333123644193, "num_tokens": 933731281.0, "step": 5559 }, { "entropy": 1.6991788546244304, "epoch": 0.6108044272335283, "grad_norm": 0.6638270020484924, "learning_rate": 1.6653155057539248e-05, "loss": 1.26, "mean_token_accuracy": 0.6817802836497625, "num_tokens": 933874973.0, "step": 5560 }, { "entropy": 1.6869306564331055, "epoch": 0.6109142841449012, "grad_norm": 0.7141402959823608, "learning_rate": 1.665190911413191e-05, "loss": 1.3595, "mean_token_accuracy": 0.6624280711015066, "num_tokens": 934074045.0, "step": 5561 }, { "entropy": 1.723333050807317, "epoch": 0.6110241410562742, "grad_norm": 0.6675291657447815, "learning_rate": 1.6650662991844196e-05, "loss": 1.6021, "mean_token_accuracy": 0.6266117841005325, "num_tokens": 934258442.0, "step": 5562 }, { "entropy": 1.6694819529851277, "epoch": 0.6111339979676471, "grad_norm": 0.7414196133613586, "learning_rate": 1.6649416690715552e-05, "loss": 1.3676, "mean_token_accuracy": 0.6675353596607844, "num_tokens": 934488821.0, "step": 5563 }, { "entropy": 1.683532973130544, "epoch": 0.6112438548790201, "grad_norm": 0.9000768065452576, "learning_rate": 1.6648170210785405e-05, "loss": 1.1238, "mean_token_accuracy": 0.6990447590748469, "num_tokens": 934594435.0, "step": 5564 }, { "entropy": 1.6689512928326924, "epoch": 0.611353711790393, "grad_norm": 0.6170567274093628, "learning_rate": 1.664692355209322e-05, "loss": 1.3279, "mean_token_accuracy": 0.6679205298423767, "num_tokens": 934723939.0, "step": 5565 }, { "entropy": 1.6680874923865001, "epoch": 0.611463568701766, "grad_norm": 0.6829505562782288, "learning_rate": 1.6645676714678455e-05, "loss": 1.5437, "mean_token_accuracy": 0.6364776839812597, "num_tokens": 934951400.0, "step": 5566 }, { "entropy": 1.6776273846626282, "epoch": 0.6115734256131389, "grad_norm": 0.7855686545372009, "learning_rate": 1.664442969858056e-05, "loss": 1.4367, "mean_token_accuracy": 0.6567247211933136, "num_tokens": 935139302.0, "step": 5567 }, { "entropy": 1.7481131454308827, "epoch": 0.6116832825245119, "grad_norm": 0.7802530527114868, "learning_rate": 1.664318250383901e-05, "loss": 1.449, "mean_token_accuracy": 0.6351684182882309, "num_tokens": 935338558.0, "step": 5568 }, { "entropy": 1.6226297517617543, "epoch": 0.6117931394358848, "grad_norm": 1.6184296607971191, "learning_rate": 1.6641935130493276e-05, "loss": 1.3117, "mean_token_accuracy": 0.6598619123299917, "num_tokens": 935512009.0, "step": 5569 }, { "entropy": 1.7038521965344746, "epoch": 0.6119029963472576, "grad_norm": 0.6111783981323242, "learning_rate": 1.6640687578582835e-05, "loss": 1.3716, "mean_token_accuracy": 0.665923555692037, "num_tokens": 935663636.0, "step": 5570 }, { "entropy": 1.724490185578664, "epoch": 0.6120128532586306, "grad_norm": 0.601383626461029, "learning_rate": 1.6639439848147177e-05, "loss": 1.4497, "mean_token_accuracy": 0.6473473062117895, "num_tokens": 935844408.0, "step": 5571 }, { "entropy": 1.6795639892419179, "epoch": 0.6121227101700035, "grad_norm": 0.7149496078491211, "learning_rate": 1.6638191939225787e-05, "loss": 1.2712, "mean_token_accuracy": 0.6758081962664922, "num_tokens": 935969137.0, "step": 5572 }, { "entropy": 1.6430913706620534, "epoch": 0.6122325670813765, "grad_norm": 0.6253581643104553, "learning_rate": 1.6636943851858166e-05, "loss": 1.4073, "mean_token_accuracy": 0.6706066131591797, "num_tokens": 936162195.0, "step": 5573 }, { "entropy": 1.6803169250488281, "epoch": 0.6123424239927494, "grad_norm": 0.646298348903656, "learning_rate": 1.6635695586083808e-05, "loss": 1.3594, "mean_token_accuracy": 0.6621912568807602, "num_tokens": 936321911.0, "step": 5574 }, { "entropy": 1.7561656534671783, "epoch": 0.6124522809041224, "grad_norm": 0.6321209669113159, "learning_rate": 1.663444714194223e-05, "loss": 1.499, "mean_token_accuracy": 0.6410553604364395, "num_tokens": 936510584.0, "step": 5575 }, { "entropy": 1.6951843996842701, "epoch": 0.6125621378154953, "grad_norm": 0.7173265218734741, "learning_rate": 1.6633198519472933e-05, "loss": 1.4487, "mean_token_accuracy": 0.651951809724172, "num_tokens": 936713499.0, "step": 5576 }, { "entropy": 1.7019581099351246, "epoch": 0.6126719947268683, "grad_norm": 0.5802896022796631, "learning_rate": 1.6631949718715445e-05, "loss": 1.3271, "mean_token_accuracy": 0.6597871532042822, "num_tokens": 936940791.0, "step": 5577 }, { "entropy": 1.6554476817448933, "epoch": 0.6127818516382412, "grad_norm": 0.7390364408493042, "learning_rate": 1.6630700739709282e-05, "loss": 1.4072, "mean_token_accuracy": 0.6636865039666494, "num_tokens": 937082308.0, "step": 5578 }, { "entropy": 1.6889912883440654, "epoch": 0.6128917085496142, "grad_norm": 0.686810314655304, "learning_rate": 1.6629451582493983e-05, "loss": 1.4255, "mean_token_accuracy": 0.6632570077975591, "num_tokens": 937229279.0, "step": 5579 }, { "entropy": 1.7240253388881683, "epoch": 0.613001565460987, "grad_norm": 0.6423957347869873, "learning_rate": 1.6628202247109072e-05, "loss": 1.4287, "mean_token_accuracy": 0.6554444034894308, "num_tokens": 937395643.0, "step": 5580 }, { "entropy": 1.7453702688217163, "epoch": 0.61311142237236, "grad_norm": 0.7353253960609436, "learning_rate": 1.66269527335941e-05, "loss": 1.4087, "mean_token_accuracy": 0.6449921876192093, "num_tokens": 937539093.0, "step": 5581 }, { "entropy": 1.6367293000221252, "epoch": 0.6132212792837329, "grad_norm": 0.608282744884491, "learning_rate": 1.662570304198861e-05, "loss": 1.4691, "mean_token_accuracy": 0.6490232745806376, "num_tokens": 937756052.0, "step": 5582 }, { "entropy": 1.686044067144394, "epoch": 0.6133311361951058, "grad_norm": 0.6353262066841125, "learning_rate": 1.6624453172332154e-05, "loss": 1.4763, "mean_token_accuracy": 0.6541228095690409, "num_tokens": 937904214.0, "step": 5583 }, { "entropy": 1.69670374194781, "epoch": 0.6134409931064788, "grad_norm": 0.6989722847938538, "learning_rate": 1.662320312466429e-05, "loss": 1.2754, "mean_token_accuracy": 0.6773638278245926, "num_tokens": 938031279.0, "step": 5584 }, { "entropy": 1.724222093820572, "epoch": 0.6135508500178517, "grad_norm": 0.6987892985343933, "learning_rate": 1.6621952899024578e-05, "loss": 1.4692, "mean_token_accuracy": 0.6530686269203821, "num_tokens": 938194279.0, "step": 5585 }, { "entropy": 1.777810384829839, "epoch": 0.6136607069292247, "grad_norm": 0.6809293627738953, "learning_rate": 1.662070249545259e-05, "loss": 1.5097, "mean_token_accuracy": 0.6327020525932312, "num_tokens": 938368428.0, "step": 5586 }, { "entropy": 1.6900599499543507, "epoch": 0.6137705638405976, "grad_norm": 0.5528995990753174, "learning_rate": 1.6619451913987905e-05, "loss": 1.401, "mean_token_accuracy": 0.6550226360559464, "num_tokens": 938568730.0, "step": 5587 }, { "entropy": 1.7115299503008525, "epoch": 0.6138804207519706, "grad_norm": 0.7030913233757019, "learning_rate": 1.6618201154670096e-05, "loss": 1.2342, "mean_token_accuracy": 0.6826610863208771, "num_tokens": 938711244.0, "step": 5588 }, { "entropy": 1.674224187930425, "epoch": 0.6139902776633435, "grad_norm": 0.6814902424812317, "learning_rate": 1.6616950217538752e-05, "loss": 1.3691, "mean_token_accuracy": 0.6572864949703217, "num_tokens": 938882831.0, "step": 5589 }, { "entropy": 1.677575667699178, "epoch": 0.6141001345747165, "grad_norm": 0.7386172413825989, "learning_rate": 1.6615699102633466e-05, "loss": 1.4307, "mean_token_accuracy": 0.6639702320098877, "num_tokens": 939044197.0, "step": 5590 }, { "entropy": 1.6663434406121571, "epoch": 0.6142099914860893, "grad_norm": 0.7466452717781067, "learning_rate": 1.6614447809993833e-05, "loss": 1.3802, "mean_token_accuracy": 0.657066822052002, "num_tokens": 939214259.0, "step": 5591 }, { "entropy": 1.665649155775706, "epoch": 0.6143198483974623, "grad_norm": 0.701424777507782, "learning_rate": 1.6613196339659454e-05, "loss": 1.214, "mean_token_accuracy": 0.6855147878328959, "num_tokens": 939396313.0, "step": 5592 }, { "entropy": 1.6514354248841603, "epoch": 0.6144297053088352, "grad_norm": 0.6594340801239014, "learning_rate": 1.6611944691669944e-05, "loss": 1.5042, "mean_token_accuracy": 0.6493265976508459, "num_tokens": 939601857.0, "step": 5593 }, { "entropy": 1.7074782649676006, "epoch": 0.6145395622202082, "grad_norm": 0.6715470552444458, "learning_rate": 1.6610692866064912e-05, "loss": 1.5188, "mean_token_accuracy": 0.6534653852383295, "num_tokens": 939812970.0, "step": 5594 }, { "entropy": 1.7174999515215557, "epoch": 0.6146494191315811, "grad_norm": 0.7222854495048523, "learning_rate": 1.660944086288398e-05, "loss": 1.2172, "mean_token_accuracy": 0.6784104257822037, "num_tokens": 939938382.0, "step": 5595 }, { "entropy": 1.7256175378958385, "epoch": 0.6147592760429541, "grad_norm": 0.6657436490058899, "learning_rate": 1.660818868216677e-05, "loss": 1.3559, "mean_token_accuracy": 0.6577880332867304, "num_tokens": 940127967.0, "step": 5596 }, { "entropy": 1.6247844000657399, "epoch": 0.614869132954327, "grad_norm": 0.6978784203529358, "learning_rate": 1.660693632395292e-05, "loss": 1.3566, "mean_token_accuracy": 0.6696316401163737, "num_tokens": 940261869.0, "step": 5597 }, { "entropy": 1.6865403950214386, "epoch": 0.6149789898656999, "grad_norm": 0.5856928825378418, "learning_rate": 1.6605683788282057e-05, "loss": 1.4175, "mean_token_accuracy": 0.652377262711525, "num_tokens": 940444933.0, "step": 5598 }, { "entropy": 1.7653738756974537, "epoch": 0.6150888467770729, "grad_norm": 0.6951313614845276, "learning_rate": 1.6604431075193833e-05, "loss": 1.4452, "mean_token_accuracy": 0.6424042185147604, "num_tokens": 940576065.0, "step": 5599 }, { "entropy": 1.6887817184130351, "epoch": 0.6151987036884458, "grad_norm": 0.7151344418525696, "learning_rate": 1.6603178184727888e-05, "loss": 1.3428, "mean_token_accuracy": 0.6740106294552485, "num_tokens": 940717460.0, "step": 5600 }, { "entropy": 1.7164513369401295, "epoch": 0.6153085605998188, "grad_norm": 0.6633734703063965, "learning_rate": 1.6601925116923875e-05, "loss": 1.4313, "mean_token_accuracy": 0.647556280096372, "num_tokens": 940948397.0, "step": 5601 }, { "entropy": 1.6911031305789948, "epoch": 0.6154184175111916, "grad_norm": 0.687568187713623, "learning_rate": 1.660067187182146e-05, "loss": 1.4035, "mean_token_accuracy": 0.6575697958469391, "num_tokens": 941105151.0, "step": 5602 }, { "entropy": 1.7053539156913757, "epoch": 0.6155282744225646, "grad_norm": 0.700892448425293, "learning_rate": 1.6599418449460305e-05, "loss": 1.3932, "mean_token_accuracy": 0.6440112143754959, "num_tokens": 941275369.0, "step": 5603 }, { "entropy": 1.691804975271225, "epoch": 0.6156381313339375, "grad_norm": 0.6623601317405701, "learning_rate": 1.6598164849880077e-05, "loss": 1.4609, "mean_token_accuracy": 0.6414239406585693, "num_tokens": 941451129.0, "step": 5604 }, { "entropy": 1.6720061600208282, "epoch": 0.6157479882453105, "grad_norm": 0.7391043901443481, "learning_rate": 1.6596911073120455e-05, "loss": 1.3943, "mean_token_accuracy": 0.6600146691004435, "num_tokens": 941580404.0, "step": 5605 }, { "entropy": 1.7468764384587605, "epoch": 0.6158578451566834, "grad_norm": 0.5990471839904785, "learning_rate": 1.6595657119221124e-05, "loss": 1.3827, "mean_token_accuracy": 0.6457860618829727, "num_tokens": 941801078.0, "step": 5606 }, { "entropy": 1.7029780944188435, "epoch": 0.6159677020680564, "grad_norm": 0.6301769614219666, "learning_rate": 1.659440298822176e-05, "loss": 1.4816, "mean_token_accuracy": 0.6631426165501276, "num_tokens": 941977723.0, "step": 5607 }, { "entropy": 1.7141645848751068, "epoch": 0.6160775589794293, "grad_norm": 0.7286873459815979, "learning_rate": 1.6593148680162063e-05, "loss": 1.3945, "mean_token_accuracy": 0.6584265381097794, "num_tokens": 942126547.0, "step": 5608 }, { "entropy": 1.7053893009821575, "epoch": 0.6161874158908023, "grad_norm": 0.7642189860343933, "learning_rate": 1.659189419508173e-05, "loss": 1.5418, "mean_token_accuracy": 0.6277876098950704, "num_tokens": 942298233.0, "step": 5609 }, { "entropy": 1.6716360052426655, "epoch": 0.6162972728021752, "grad_norm": 0.6552688479423523, "learning_rate": 1.659063953302047e-05, "loss": 1.402, "mean_token_accuracy": 0.6565060516198477, "num_tokens": 942466556.0, "step": 5610 }, { "entropy": 1.7290156185626984, "epoch": 0.616407129713548, "grad_norm": 0.650862991809845, "learning_rate": 1.6589384694017984e-05, "loss": 1.48, "mean_token_accuracy": 0.6433103134234747, "num_tokens": 942692434.0, "step": 5611 }, { "entropy": 1.7001596788565319, "epoch": 0.616516986624921, "grad_norm": 0.6333754062652588, "learning_rate": 1.6588129678113992e-05, "loss": 1.5025, "mean_token_accuracy": 0.6430030663808187, "num_tokens": 942845785.0, "step": 5612 }, { "entropy": 1.7286210159460704, "epoch": 0.6166268435362939, "grad_norm": 0.8158491849899292, "learning_rate": 1.6586874485348216e-05, "loss": 1.2876, "mean_token_accuracy": 0.6615132391452789, "num_tokens": 942966008.0, "step": 5613 }, { "entropy": 1.6635615924994152, "epoch": 0.6167367004476669, "grad_norm": 0.7491524815559387, "learning_rate": 1.658561911576038e-05, "loss": 1.3309, "mean_token_accuracy": 0.6790938824415207, "num_tokens": 943116395.0, "step": 5614 }, { "entropy": 1.686743954817454, "epoch": 0.6168465573590398, "grad_norm": 0.6865391135215759, "learning_rate": 1.6584363569390213e-05, "loss": 1.4197, "mean_token_accuracy": 0.6580019642909368, "num_tokens": 943271670.0, "step": 5615 }, { "entropy": 1.6392175356547039, "epoch": 0.6169564142704128, "grad_norm": 0.6303647756576538, "learning_rate": 1.6583107846277455e-05, "loss": 1.2924, "mean_token_accuracy": 0.6750722229480743, "num_tokens": 943457679.0, "step": 5616 }, { "entropy": 1.6606386701265972, "epoch": 0.6170662711817857, "grad_norm": 0.7184935808181763, "learning_rate": 1.658185194646185e-05, "loss": 1.4147, "mean_token_accuracy": 0.6562165568272272, "num_tokens": 943637649.0, "step": 5617 }, { "entropy": 1.6875253518422444, "epoch": 0.6171761280931587, "grad_norm": 0.5899779200553894, "learning_rate": 1.658059586998315e-05, "loss": 1.3247, "mean_token_accuracy": 0.6731048425038656, "num_tokens": 943799454.0, "step": 5618 }, { "entropy": 1.6526349087556202, "epoch": 0.6172859850045316, "grad_norm": 0.6579746603965759, "learning_rate": 1.65793396168811e-05, "loss": 1.4656, "mean_token_accuracy": 0.6476482550303141, "num_tokens": 944009136.0, "step": 5619 }, { "entropy": 1.669644723335902, "epoch": 0.6173958419159046, "grad_norm": 1.451614499092102, "learning_rate": 1.6578083187195467e-05, "loss": 1.3359, "mean_token_accuracy": 0.6453322917222977, "num_tokens": 944230639.0, "step": 5620 }, { "entropy": 1.6753608882427216, "epoch": 0.6175056988272775, "grad_norm": 0.6836093068122864, "learning_rate": 1.6576826580966015e-05, "loss": 1.3984, "mean_token_accuracy": 0.655582994222641, "num_tokens": 944397785.0, "step": 5621 }, { "entropy": 1.710491806268692, "epoch": 0.6176155557386505, "grad_norm": 0.7552167177200317, "learning_rate": 1.657556979823252e-05, "loss": 1.5175, "mean_token_accuracy": 0.6515548129876455, "num_tokens": 944603736.0, "step": 5622 }, { "entropy": 1.7582048177719116, "epoch": 0.6177254126500233, "grad_norm": 0.5882629752159119, "learning_rate": 1.6574312839034745e-05, "loss": 1.4611, "mean_token_accuracy": 0.6304621398448944, "num_tokens": 944782321.0, "step": 5623 }, { "entropy": 1.7265077730019887, "epoch": 0.6178352695613962, "grad_norm": 0.6855106353759766, "learning_rate": 1.6573055703412486e-05, "loss": 1.4187, "mean_token_accuracy": 0.6633595625559489, "num_tokens": 944970965.0, "step": 5624 }, { "entropy": 1.6792520582675934, "epoch": 0.6179451264727692, "grad_norm": 0.628589928150177, "learning_rate": 1.6571798391405523e-05, "loss": 1.417, "mean_token_accuracy": 0.652607669432958, "num_tokens": 945171431.0, "step": 5625 }, { "entropy": 1.61854421099027, "epoch": 0.6180549833841421, "grad_norm": 0.8567890524864197, "learning_rate": 1.6570540903053653e-05, "loss": 1.4852, "mean_token_accuracy": 0.6646982729434967, "num_tokens": 945370917.0, "step": 5626 }, { "entropy": 1.742018034060796, "epoch": 0.6181648402955151, "grad_norm": 0.7597964406013489, "learning_rate": 1.6569283238396672e-05, "loss": 1.4677, "mean_token_accuracy": 0.6531796753406525, "num_tokens": 945532898.0, "step": 5627 }, { "entropy": 1.7409979899724324, "epoch": 0.618274697206888, "grad_norm": 0.6351725459098816, "learning_rate": 1.6568025397474388e-05, "loss": 1.4706, "mean_token_accuracy": 0.6394537637631098, "num_tokens": 945718529.0, "step": 5628 }, { "entropy": 1.779521683851878, "epoch": 0.618384554118261, "grad_norm": 0.6235055923461914, "learning_rate": 1.6566767380326604e-05, "loss": 1.3624, "mean_token_accuracy": 0.6629078437884649, "num_tokens": 945854934.0, "step": 5629 }, { "entropy": 1.704407960176468, "epoch": 0.6184944110296339, "grad_norm": 0.7060872316360474, "learning_rate": 1.656550918699315e-05, "loss": 1.4615, "mean_token_accuracy": 0.6496474295854568, "num_tokens": 946015936.0, "step": 5630 }, { "entropy": 1.680654654900233, "epoch": 0.6186042679410069, "grad_norm": 0.6293652057647705, "learning_rate": 1.656425081751383e-05, "loss": 1.3839, "mean_token_accuracy": 0.6551440358161926, "num_tokens": 946154876.0, "step": 5631 }, { "entropy": 1.6304692129294078, "epoch": 0.6187141248523798, "grad_norm": 0.6845012903213501, "learning_rate": 1.656299227192848e-05, "loss": 1.4222, "mean_token_accuracy": 0.6561487466096878, "num_tokens": 946303938.0, "step": 5632 }, { "entropy": 1.6892323593298595, "epoch": 0.6188239817637528, "grad_norm": 0.6096817255020142, "learning_rate": 1.6561733550276934e-05, "loss": 1.3853, "mean_token_accuracy": 0.6653565714756647, "num_tokens": 946472387.0, "step": 5633 }, { "entropy": 1.740951379140218, "epoch": 0.6189338386751256, "grad_norm": 0.8123107552528381, "learning_rate": 1.6560474652599025e-05, "loss": 1.5593, "mean_token_accuracy": 0.6589376678069433, "num_tokens": 946664473.0, "step": 5634 }, { "entropy": 1.7017356554667156, "epoch": 0.6190436955864986, "grad_norm": 0.5863003730773926, "learning_rate": 1.6559215578934602e-05, "loss": 1.3029, "mean_token_accuracy": 0.6699723253647486, "num_tokens": 946817763.0, "step": 5635 }, { "entropy": 1.7194437483946483, "epoch": 0.6191535524978715, "grad_norm": 0.8233284950256348, "learning_rate": 1.655795632932351e-05, "loss": 1.4635, "mean_token_accuracy": 0.6592583407958349, "num_tokens": 946962720.0, "step": 5636 }, { "entropy": 1.6594391167163849, "epoch": 0.6192634094092445, "grad_norm": 0.5975894927978516, "learning_rate": 1.6556696903805604e-05, "loss": 1.4232, "mean_token_accuracy": 0.6385360260804495, "num_tokens": 947145238.0, "step": 5637 }, { "entropy": 1.7108287413914998, "epoch": 0.6193732663206174, "grad_norm": 0.6864363551139832, "learning_rate": 1.6555437302420746e-05, "loss": 1.5409, "mean_token_accuracy": 0.6319515456755956, "num_tokens": 947358881.0, "step": 5638 }, { "entropy": 1.6026353538036346, "epoch": 0.6194831232319903, "grad_norm": 0.6471695899963379, "learning_rate": 1.6554177525208798e-05, "loss": 1.4167, "mean_token_accuracy": 0.6550784210364023, "num_tokens": 947560663.0, "step": 5639 }, { "entropy": 1.7613280514876049, "epoch": 0.6195929801433633, "grad_norm": 0.7547305822372437, "learning_rate": 1.6552917572209637e-05, "loss": 1.3111, "mean_token_accuracy": 0.6676426778237025, "num_tokens": 947677967.0, "step": 5640 }, { "entropy": 1.6681643426418304, "epoch": 0.6197028370547362, "grad_norm": 0.5885694622993469, "learning_rate": 1.6551657443463132e-05, "loss": 1.3086, "mean_token_accuracy": 0.6593044847249985, "num_tokens": 947840975.0, "step": 5641 }, { "entropy": 1.684307485818863, "epoch": 0.6198126939661092, "grad_norm": 0.6141138076782227, "learning_rate": 1.6550397139009174e-05, "loss": 1.3726, "mean_token_accuracy": 0.6515480875968933, "num_tokens": 948049432.0, "step": 5642 }, { "entropy": 1.698849121729533, "epoch": 0.619922550877482, "grad_norm": 0.6681082844734192, "learning_rate": 1.654913665888765e-05, "loss": 1.5047, "mean_token_accuracy": 0.6400974442561468, "num_tokens": 948241513.0, "step": 5643 }, { "entropy": 1.6541813611984253, "epoch": 0.620032407788855, "grad_norm": 0.6509910821914673, "learning_rate": 1.654787600313845e-05, "loss": 1.3013, "mean_token_accuracy": 0.6745069374640783, "num_tokens": 948397238.0, "step": 5644 }, { "entropy": 1.7138587733109791, "epoch": 0.6201422647002279, "grad_norm": 0.6395068764686584, "learning_rate": 1.654661517180147e-05, "loss": 1.3956, "mean_token_accuracy": 0.6503481864929199, "num_tokens": 948558440.0, "step": 5645 }, { "entropy": 1.7701470851898193, "epoch": 0.6202521216116009, "grad_norm": 0.7288310527801514, "learning_rate": 1.6545354164916624e-05, "loss": 1.3433, "mean_token_accuracy": 0.6570984820524851, "num_tokens": 948686589.0, "step": 5646 }, { "entropy": 1.68064480026563, "epoch": 0.6203619785229738, "grad_norm": 0.6537264585494995, "learning_rate": 1.6544092982523817e-05, "loss": 1.4848, "mean_token_accuracy": 0.6528479357560476, "num_tokens": 948869305.0, "step": 5647 }, { "entropy": 1.7192702094713848, "epoch": 0.6204718354343468, "grad_norm": 0.7808331251144409, "learning_rate": 1.654283162466296e-05, "loss": 1.4711, "mean_token_accuracy": 0.6476468493541082, "num_tokens": 949060303.0, "step": 5648 }, { "entropy": 1.6918814182281494, "epoch": 0.6205816923457197, "grad_norm": 0.6144691705703735, "learning_rate": 1.654157009137399e-05, "loss": 1.3763, "mean_token_accuracy": 0.6593097994724909, "num_tokens": 949221427.0, "step": 5649 }, { "entropy": 1.7113316158453624, "epoch": 0.6206915492570927, "grad_norm": 0.7773605585098267, "learning_rate": 1.6540308382696814e-05, "loss": 1.2532, "mean_token_accuracy": 0.6766321261723837, "num_tokens": 949344612.0, "step": 5650 }, { "entropy": 1.686803976694743, "epoch": 0.6208014061684656, "grad_norm": 0.8215593099594116, "learning_rate": 1.6539046498671377e-05, "loss": 1.398, "mean_token_accuracy": 0.6599339644114176, "num_tokens": 949479081.0, "step": 5651 }, { "entropy": 1.7015974322954814, "epoch": 0.6209112630798385, "grad_norm": 0.6959190368652344, "learning_rate": 1.6537784439337618e-05, "loss": 1.3426, "mean_token_accuracy": 0.6654083828131357, "num_tokens": 949657684.0, "step": 5652 }, { "entropy": 1.6332708994547527, "epoch": 0.6210211199912115, "grad_norm": 0.6165181398391724, "learning_rate": 1.6536522204735473e-05, "loss": 1.3202, "mean_token_accuracy": 0.6605967779954275, "num_tokens": 949808395.0, "step": 5653 }, { "entropy": 1.7021582822004955, "epoch": 0.6211309769025843, "grad_norm": 0.736033022403717, "learning_rate": 1.6535259794904895e-05, "loss": 1.284, "mean_token_accuracy": 0.6535018235445023, "num_tokens": 949936209.0, "step": 5654 }, { "entropy": 1.7629179656505585, "epoch": 0.6212408338139573, "grad_norm": 0.6508721113204956, "learning_rate": 1.6533997209885843e-05, "loss": 1.4651, "mean_token_accuracy": 0.647722914814949, "num_tokens": 950155281.0, "step": 5655 }, { "entropy": 1.7487357060114543, "epoch": 0.6213506907253302, "grad_norm": 0.6706015467643738, "learning_rate": 1.653273444971827e-05, "loss": 1.4312, "mean_token_accuracy": 0.6624927769104639, "num_tokens": 950338044.0, "step": 5656 }, { "entropy": 1.699324498573939, "epoch": 0.6214605476367032, "grad_norm": 0.6364463567733765, "learning_rate": 1.6531471514442143e-05, "loss": 1.4349, "mean_token_accuracy": 0.6458380470673243, "num_tokens": 950510346.0, "step": 5657 }, { "entropy": 1.7458198368549347, "epoch": 0.6215704045480761, "grad_norm": 0.702688992023468, "learning_rate": 1.653020840409744e-05, "loss": 1.331, "mean_token_accuracy": 0.6683972229560217, "num_tokens": 950676071.0, "step": 5658 }, { "entropy": 1.6945001284281414, "epoch": 0.6216802614594491, "grad_norm": 0.722691535949707, "learning_rate": 1.652894511872413e-05, "loss": 1.3648, "mean_token_accuracy": 0.6641036917765936, "num_tokens": 950826008.0, "step": 5659 }, { "entropy": 1.6684882044792175, "epoch": 0.621790118370822, "grad_norm": 0.5960844159126282, "learning_rate": 1.6527681658362195e-05, "loss": 1.4113, "mean_token_accuracy": 0.6465084751447042, "num_tokens": 951009559.0, "step": 5660 }, { "entropy": 1.7376012802124023, "epoch": 0.621899975282195, "grad_norm": 0.9526035785675049, "learning_rate": 1.652641802305163e-05, "loss": 1.2984, "mean_token_accuracy": 0.6551149984200796, "num_tokens": 951117465.0, "step": 5661 }, { "entropy": 1.6809014678001404, "epoch": 0.6220098321935679, "grad_norm": 0.6386826038360596, "learning_rate": 1.6525154212832427e-05, "loss": 1.5054, "mean_token_accuracy": 0.6454970935980479, "num_tokens": 951316564.0, "step": 5662 }, { "entropy": 1.747914433479309, "epoch": 0.6221196891049409, "grad_norm": 0.637520432472229, "learning_rate": 1.652389022774458e-05, "loss": 1.4074, "mean_token_accuracy": 0.660906101266543, "num_tokens": 951550546.0, "step": 5663 }, { "entropy": 1.6579786936442058, "epoch": 0.6222295460163138, "grad_norm": 0.7350385189056396, "learning_rate": 1.6522626067828096e-05, "loss": 1.4238, "mean_token_accuracy": 0.6841726005077362, "num_tokens": 951679384.0, "step": 5664 }, { "entropy": 1.7201216916243236, "epoch": 0.6223394029276866, "grad_norm": 0.7172040343284607, "learning_rate": 1.6521361733122988e-05, "loss": 1.473, "mean_token_accuracy": 0.6475165237983068, "num_tokens": 951851937.0, "step": 5665 }, { "entropy": 1.6948122183481853, "epoch": 0.6224492598390596, "grad_norm": 0.7633089423179626, "learning_rate": 1.6520097223669265e-05, "loss": 1.3301, "mean_token_accuracy": 0.6592757950226465, "num_tokens": 952008420.0, "step": 5666 }, { "entropy": 1.7121588389078777, "epoch": 0.6225591167504325, "grad_norm": 0.8232876062393188, "learning_rate": 1.6518832539506956e-05, "loss": 1.2325, "mean_token_accuracy": 0.67981685201327, "num_tokens": 952133734.0, "step": 5667 }, { "entropy": 1.6649379134178162, "epoch": 0.6226689736618055, "grad_norm": 0.6807710528373718, "learning_rate": 1.6517567680676082e-05, "loss": 1.3521, "mean_token_accuracy": 0.6662793705860773, "num_tokens": 952280589.0, "step": 5668 }, { "entropy": 1.7094309329986572, "epoch": 0.6227788305731784, "grad_norm": 0.6411172151565552, "learning_rate": 1.6516302647216678e-05, "loss": 1.3567, "mean_token_accuracy": 0.6611761103073756, "num_tokens": 952461092.0, "step": 5669 }, { "entropy": 1.686687747637431, "epoch": 0.6228886874845514, "grad_norm": 0.6205955743789673, "learning_rate": 1.651503743916878e-05, "loss": 1.395, "mean_token_accuracy": 0.6670850316683451, "num_tokens": 952633888.0, "step": 5670 }, { "entropy": 1.7188594837983449, "epoch": 0.6229985443959243, "grad_norm": 0.795706570148468, "learning_rate": 1.6513772056572434e-05, "loss": 1.436, "mean_token_accuracy": 0.6592790633440018, "num_tokens": 952787296.0, "step": 5671 }, { "entropy": 1.6902817885080974, "epoch": 0.6231084013072973, "grad_norm": 0.5516853928565979, "learning_rate": 1.6512506499467683e-05, "loss": 1.4364, "mean_token_accuracy": 0.6347800940275192, "num_tokens": 953006969.0, "step": 5672 }, { "entropy": 1.7436016698678334, "epoch": 0.6232182582186702, "grad_norm": 0.8173310160636902, "learning_rate": 1.651124076789459e-05, "loss": 1.1976, "mean_token_accuracy": 0.6767911414305369, "num_tokens": 953116217.0, "step": 5673 }, { "entropy": 1.7197916905085247, "epoch": 0.6233281151300432, "grad_norm": 0.7681940793991089, "learning_rate": 1.6509974861893207e-05, "loss": 1.382, "mean_token_accuracy": 0.658002108335495, "num_tokens": 953280585.0, "step": 5674 }, { "entropy": 1.7034264703591664, "epoch": 0.623437972041416, "grad_norm": 0.6472874879837036, "learning_rate": 1.6508708781503604e-05, "loss": 1.2812, "mean_token_accuracy": 0.6792033066352209, "num_tokens": 953424705.0, "step": 5675 }, { "entropy": 1.676049013932546, "epoch": 0.623547828952789, "grad_norm": 0.6681833863258362, "learning_rate": 1.650744252676585e-05, "loss": 1.2813, "mean_token_accuracy": 0.6753066728512446, "num_tokens": 953580943.0, "step": 5676 }, { "entropy": 1.6816561023394268, "epoch": 0.6236576858641619, "grad_norm": 0.910898745059967, "learning_rate": 1.6506176097720025e-05, "loss": 1.238, "mean_token_accuracy": 0.6695930411418279, "num_tokens": 953734061.0, "step": 5677 }, { "entropy": 1.735485553741455, "epoch": 0.6237675427755348, "grad_norm": 0.6783795952796936, "learning_rate": 1.6504909494406202e-05, "loss": 1.3318, "mean_token_accuracy": 0.6575172245502472, "num_tokens": 953846716.0, "step": 5678 }, { "entropy": 1.8034160832564037, "epoch": 0.6238773996869078, "grad_norm": 0.689171314239502, "learning_rate": 1.6503642716864475e-05, "loss": 1.401, "mean_token_accuracy": 0.6448919673760732, "num_tokens": 953994690.0, "step": 5679 }, { "entropy": 1.6796988149483998, "epoch": 0.6239872565982807, "grad_norm": 0.7800900936126709, "learning_rate": 1.650237576513494e-05, "loss": 1.2953, "mean_token_accuracy": 0.6639833003282547, "num_tokens": 954154318.0, "step": 5680 }, { "entropy": 1.7101227541764576, "epoch": 0.6240971135096537, "grad_norm": 0.7329297065734863, "learning_rate": 1.650110863925769e-05, "loss": 1.3773, "mean_token_accuracy": 0.6709673305352529, "num_tokens": 954305906.0, "step": 5681 }, { "entropy": 1.683126191298167, "epoch": 0.6242069704210266, "grad_norm": 0.7751897573471069, "learning_rate": 1.6499841339272826e-05, "loss": 1.2098, "mean_token_accuracy": 0.6865563889344534, "num_tokens": 954448481.0, "step": 5682 }, { "entropy": 1.686317543188731, "epoch": 0.6243168273323996, "grad_norm": 0.6909394860267639, "learning_rate": 1.649857386522047e-05, "loss": 1.4806, "mean_token_accuracy": 0.658604254325231, "num_tokens": 954588846.0, "step": 5683 }, { "entropy": 1.738376796245575, "epoch": 0.6244266842437725, "grad_norm": 0.6977959871292114, "learning_rate": 1.6497306217140723e-05, "loss": 1.3653, "mean_token_accuracy": 0.6561718732118607, "num_tokens": 954703540.0, "step": 5684 }, { "entropy": 1.7150452435016632, "epoch": 0.6245365411551455, "grad_norm": 0.7359358072280884, "learning_rate": 1.6496038395073714e-05, "loss": 1.3309, "mean_token_accuracy": 0.6774442195892334, "num_tokens": 954823216.0, "step": 5685 }, { "entropy": 1.6885569989681244, "epoch": 0.6246463980665183, "grad_norm": 0.6183052659034729, "learning_rate": 1.649477039905956e-05, "loss": 1.4629, "mean_token_accuracy": 0.6438476542631785, "num_tokens": 955010837.0, "step": 5686 }, { "entropy": 1.7190197507540386, "epoch": 0.6247562549778913, "grad_norm": 0.731508731842041, "learning_rate": 1.6493502229138404e-05, "loss": 1.3833, "mean_token_accuracy": 0.6536182264486948, "num_tokens": 955149407.0, "step": 5687 }, { "entropy": 1.735072563091914, "epoch": 0.6248661118892642, "grad_norm": 0.6480312943458557, "learning_rate": 1.6492233885350378e-05, "loss": 1.4092, "mean_token_accuracy": 0.6455465306838354, "num_tokens": 955323698.0, "step": 5688 }, { "entropy": 1.7098636428515117, "epoch": 0.6249759688006372, "grad_norm": 0.6097153425216675, "learning_rate": 1.6490965367735627e-05, "loss": 1.4695, "mean_token_accuracy": 0.6430481324593226, "num_tokens": 955565112.0, "step": 5689 }, { "entropy": 1.6718948781490326, "epoch": 0.6250858257120101, "grad_norm": 0.8378857374191284, "learning_rate": 1.6489696676334292e-05, "loss": 1.4418, "mean_token_accuracy": 0.6423271497090658, "num_tokens": 955780285.0, "step": 5690 }, { "entropy": 1.782025973002116, "epoch": 0.6251956826233831, "grad_norm": 0.7587185502052307, "learning_rate": 1.6488427811186533e-05, "loss": 1.6231, "mean_token_accuracy": 0.6388049274682999, "num_tokens": 955942572.0, "step": 5691 }, { "entropy": 1.7340028285980225, "epoch": 0.625305539534756, "grad_norm": 0.6405612230300903, "learning_rate": 1.6487158772332504e-05, "loss": 1.5302, "mean_token_accuracy": 0.6268052359422048, "num_tokens": 956175191.0, "step": 5692 }, { "entropy": 1.7250635226567586, "epoch": 0.6254153964461289, "grad_norm": 0.748051106929779, "learning_rate": 1.6485889559812377e-05, "loss": 1.3172, "mean_token_accuracy": 0.6627877404292425, "num_tokens": 956336559.0, "step": 5693 }, { "entropy": 1.6929832597573597, "epoch": 0.6255252533575019, "grad_norm": 0.631056010723114, "learning_rate": 1.6484620173666314e-05, "loss": 1.5161, "mean_token_accuracy": 0.646266758441925, "num_tokens": 956509627.0, "step": 5694 }, { "entropy": 1.692749907573064, "epoch": 0.6256351102688748, "grad_norm": 0.6480849981307983, "learning_rate": 1.6483350613934497e-05, "loss": 1.2836, "mean_token_accuracy": 0.6649422496557236, "num_tokens": 956687120.0, "step": 5695 }, { "entropy": 1.6549834311008453, "epoch": 0.6257449671802477, "grad_norm": 0.7371950149536133, "learning_rate": 1.64820808806571e-05, "loss": 1.3504, "mean_token_accuracy": 0.6728880554437637, "num_tokens": 956826533.0, "step": 5696 }, { "entropy": 1.7084954679012299, "epoch": 0.6258548240916206, "grad_norm": 0.7008212208747864, "learning_rate": 1.6480810973874316e-05, "loss": 1.357, "mean_token_accuracy": 0.6564209510882696, "num_tokens": 956956502.0, "step": 5697 }, { "entropy": 1.7462473213672638, "epoch": 0.6259646810029936, "grad_norm": 0.716570258140564, "learning_rate": 1.6479540893626332e-05, "loss": 1.4223, "mean_token_accuracy": 0.6491911063591639, "num_tokens": 957127208.0, "step": 5698 }, { "entropy": 1.7282981077829997, "epoch": 0.6260745379143665, "grad_norm": 0.8346961736679077, "learning_rate": 1.647827063995335e-05, "loss": 1.5449, "mean_token_accuracy": 0.6457051436106364, "num_tokens": 957332479.0, "step": 5699 }, { "entropy": 1.6583941678206127, "epoch": 0.6261843948257395, "grad_norm": 0.7575819492340088, "learning_rate": 1.6477000212895573e-05, "loss": 1.3436, "mean_token_accuracy": 0.6612271418174108, "num_tokens": 957498700.0, "step": 5700 }, { "entropy": 1.7637586295604706, "epoch": 0.6262942517371124, "grad_norm": 0.5901393294334412, "learning_rate": 1.6475729612493202e-05, "loss": 1.389, "mean_token_accuracy": 0.6556883603334427, "num_tokens": 957713899.0, "step": 5701 }, { "entropy": 1.7112191021442413, "epoch": 0.6264041086484854, "grad_norm": 0.6072997450828552, "learning_rate": 1.647445883878646e-05, "loss": 1.349, "mean_token_accuracy": 0.6535293956597646, "num_tokens": 957877257.0, "step": 5702 }, { "entropy": 1.6826953887939453, "epoch": 0.6265139655598583, "grad_norm": 0.7811943292617798, "learning_rate": 1.6473187891815563e-05, "loss": 1.3923, "mean_token_accuracy": 0.6466284741957983, "num_tokens": 958124142.0, "step": 5703 }, { "entropy": 1.7394831478595734, "epoch": 0.6266238224712313, "grad_norm": 0.7273539900779724, "learning_rate": 1.6471916771620734e-05, "loss": 1.4109, "mean_token_accuracy": 0.6454877008994421, "num_tokens": 958342923.0, "step": 5704 }, { "entropy": 1.7591275970141094, "epoch": 0.6267336793826042, "grad_norm": 0.641975998878479, "learning_rate": 1.6470645478242203e-05, "loss": 1.4495, "mean_token_accuracy": 0.6493928283452988, "num_tokens": 958589574.0, "step": 5705 }, { "entropy": 1.6759761174519856, "epoch": 0.626843536293977, "grad_norm": 0.677434504032135, "learning_rate": 1.6469374011720213e-05, "loss": 1.2512, "mean_token_accuracy": 0.68131522834301, "num_tokens": 958743999.0, "step": 5706 }, { "entropy": 1.7099489271640778, "epoch": 0.62695339320535, "grad_norm": 0.6134753823280334, "learning_rate": 1.6468102372094995e-05, "loss": 1.3581, "mean_token_accuracy": 0.659825325012207, "num_tokens": 958909549.0, "step": 5707 }, { "entropy": 1.7220982710520427, "epoch": 0.6270632501167229, "grad_norm": 0.7816711664199829, "learning_rate": 1.6466830559406805e-05, "loss": 1.3075, "mean_token_accuracy": 0.6649517863988876, "num_tokens": 959021472.0, "step": 5708 }, { "entropy": 1.6756068567434947, "epoch": 0.6271731070280959, "grad_norm": 0.6301890015602112, "learning_rate": 1.6465558573695888e-05, "loss": 1.3314, "mean_token_accuracy": 0.6648290057977041, "num_tokens": 959159822.0, "step": 5709 }, { "entropy": 1.7089245716730754, "epoch": 0.6272829639394688, "grad_norm": 0.7451719045639038, "learning_rate": 1.6464286415002504e-05, "loss": 1.2651, "mean_token_accuracy": 0.6750001311302185, "num_tokens": 959259070.0, "step": 5710 }, { "entropy": 1.7253338595231373, "epoch": 0.6273928208508418, "grad_norm": 0.7505415081977844, "learning_rate": 1.646301408336692e-05, "loss": 1.3964, "mean_token_accuracy": 0.6479530483484268, "num_tokens": 959434654.0, "step": 5711 }, { "entropy": 1.708227703968684, "epoch": 0.6275026777622147, "grad_norm": 0.6948199272155762, "learning_rate": 1.64617415788294e-05, "loss": 1.3378, "mean_token_accuracy": 0.6625229269266129, "num_tokens": 959588905.0, "step": 5712 }, { "entropy": 1.7235769430796306, "epoch": 0.6276125346735877, "grad_norm": 0.6836848855018616, "learning_rate": 1.6460468901430225e-05, "loss": 1.4183, "mean_token_accuracy": 0.6595809658368429, "num_tokens": 959737641.0, "step": 5713 }, { "entropy": 1.704572280248006, "epoch": 0.6277223915849606, "grad_norm": 0.6386615633964539, "learning_rate": 1.6459196051209663e-05, "loss": 1.521, "mean_token_accuracy": 0.63822074731191, "num_tokens": 960007106.0, "step": 5714 }, { "entropy": 1.6550373832384746, "epoch": 0.6278322484963336, "grad_norm": 0.6144903898239136, "learning_rate": 1.645792302820801e-05, "loss": 1.3233, "mean_token_accuracy": 0.6593523075183233, "num_tokens": 960197922.0, "step": 5715 }, { "entropy": 1.6816544930140178, "epoch": 0.6279421054077065, "grad_norm": 0.7425700426101685, "learning_rate": 1.645664983246555e-05, "loss": 1.2967, "mean_token_accuracy": 0.6748471558094025, "num_tokens": 960332207.0, "step": 5716 }, { "entropy": 1.6723963618278503, "epoch": 0.6280519623190794, "grad_norm": 0.7503743767738342, "learning_rate": 1.6455376464022585e-05, "loss": 1.2984, "mean_token_accuracy": 0.6709683686494827, "num_tokens": 960458091.0, "step": 5717 }, { "entropy": 1.6904148161411285, "epoch": 0.6281618192304523, "grad_norm": 0.70115727186203, "learning_rate": 1.645410292291941e-05, "loss": 1.5021, "mean_token_accuracy": 0.6557003756364187, "num_tokens": 960652785.0, "step": 5718 }, { "entropy": 1.7101349135239918, "epoch": 0.6282716761418252, "grad_norm": 0.7017475962638855, "learning_rate": 1.6452829209196337e-05, "loss": 1.3333, "mean_token_accuracy": 0.6754453778266907, "num_tokens": 960782480.0, "step": 5719 }, { "entropy": 1.7187654972076416, "epoch": 0.6283815330531982, "grad_norm": 0.7320044636726379, "learning_rate": 1.6451555322893676e-05, "loss": 1.5041, "mean_token_accuracy": 0.6363671620686849, "num_tokens": 960975432.0, "step": 5720 }, { "entropy": 1.629583348830541, "epoch": 0.6284913899645711, "grad_norm": 0.6926023364067078, "learning_rate": 1.6450281264051746e-05, "loss": 1.3311, "mean_token_accuracy": 0.6710839569568634, "num_tokens": 961145110.0, "step": 5721 }, { "entropy": 1.6936370134353638, "epoch": 0.6286012468759441, "grad_norm": 0.776731014251709, "learning_rate": 1.644900703271087e-05, "loss": 1.2384, "mean_token_accuracy": 0.6770918766657511, "num_tokens": 961334917.0, "step": 5722 }, { "entropy": 1.7333524624506633, "epoch": 0.628711103787317, "grad_norm": 0.6435438990592957, "learning_rate": 1.6447732628911375e-05, "loss": 1.4127, "mean_token_accuracy": 0.6506403088569641, "num_tokens": 961515935.0, "step": 5723 }, { "entropy": 1.729872743288676, "epoch": 0.62882096069869, "grad_norm": 0.6495027542114258, "learning_rate": 1.64464580526936e-05, "loss": 1.3786, "mean_token_accuracy": 0.6543400337298712, "num_tokens": 961656876.0, "step": 5724 }, { "entropy": 1.734206845362981, "epoch": 0.6289308176100629, "grad_norm": 0.6567356586456299, "learning_rate": 1.6445183304097882e-05, "loss": 1.5658, "mean_token_accuracy": 0.6441596001386642, "num_tokens": 961848097.0, "step": 5725 }, { "entropy": 1.7201481660207112, "epoch": 0.6290406745214359, "grad_norm": 0.7097489833831787, "learning_rate": 1.6443908383164565e-05, "loss": 1.4798, "mean_token_accuracy": 0.6604090680678686, "num_tokens": 961977227.0, "step": 5726 }, { "entropy": 1.6500455737113953, "epoch": 0.6291505314328087, "grad_norm": 0.5988762378692627, "learning_rate": 1.6442633289934e-05, "loss": 1.4845, "mean_token_accuracy": 0.6477457582950592, "num_tokens": 962218485.0, "step": 5727 }, { "entropy": 1.6345641314983368, "epoch": 0.6292603883441817, "grad_norm": 0.6215258240699768, "learning_rate": 1.6441358024446543e-05, "loss": 1.3618, "mean_token_accuracy": 0.6687282770872116, "num_tokens": 962381448.0, "step": 5728 }, { "entropy": 1.696586012840271, "epoch": 0.6293702452555546, "grad_norm": 0.6843472719192505, "learning_rate": 1.6440082586742558e-05, "loss": 1.3164, "mean_token_accuracy": 0.6669615209102631, "num_tokens": 962529434.0, "step": 5729 }, { "entropy": 1.6618889768918355, "epoch": 0.6294801021669276, "grad_norm": 0.7052629590034485, "learning_rate": 1.643880697686241e-05, "loss": 1.5781, "mean_token_accuracy": 0.638151670495669, "num_tokens": 962704605.0, "step": 5730 }, { "entropy": 1.707265595595042, "epoch": 0.6295899590783005, "grad_norm": 0.7040795683860779, "learning_rate": 1.6437531194846473e-05, "loss": 1.2903, "mean_token_accuracy": 0.6720566848913828, "num_tokens": 962824714.0, "step": 5731 }, { "entropy": 1.6971515615781148, "epoch": 0.6296998159896734, "grad_norm": 0.6653143763542175, "learning_rate": 1.6436255240735123e-05, "loss": 1.2856, "mean_token_accuracy": 0.6751369287570318, "num_tokens": 962983287.0, "step": 5732 }, { "entropy": 1.7146797279516857, "epoch": 0.6298096729010464, "grad_norm": 0.6092875003814697, "learning_rate": 1.643497911456874e-05, "loss": 1.3847, "mean_token_accuracy": 0.6554857790470123, "num_tokens": 963192937.0, "step": 5733 }, { "entropy": 1.723339209953944, "epoch": 0.6299195298124193, "grad_norm": 0.7470570802688599, "learning_rate": 1.6433702816387726e-05, "loss": 1.379, "mean_token_accuracy": 0.6623584628105164, "num_tokens": 963342862.0, "step": 5734 }, { "entropy": 1.7229750553766887, "epoch": 0.6300293867237923, "grad_norm": 0.6485400795936584, "learning_rate": 1.643242634623246e-05, "loss": 1.365, "mean_token_accuracy": 0.647914802034696, "num_tokens": 963467152.0, "step": 5735 }, { "entropy": 1.7394628127415974, "epoch": 0.6301392436351652, "grad_norm": 0.7105817794799805, "learning_rate": 1.643114970414335e-05, "loss": 1.4991, "mean_token_accuracy": 0.6525513231754303, "num_tokens": 963653661.0, "step": 5736 }, { "entropy": 1.677705059448878, "epoch": 0.6302491005465382, "grad_norm": 0.6255174875259399, "learning_rate": 1.64298728901608e-05, "loss": 1.2898, "mean_token_accuracy": 0.6665566811958948, "num_tokens": 963820917.0, "step": 5737 }, { "entropy": 1.6519030431906383, "epoch": 0.630358957457911, "grad_norm": 0.6910622119903564, "learning_rate": 1.6428595904325216e-05, "loss": 1.326, "mean_token_accuracy": 0.6713027606407801, "num_tokens": 963981593.0, "step": 5738 }, { "entropy": 1.750054806470871, "epoch": 0.630468814369284, "grad_norm": 0.7107712030410767, "learning_rate": 1.642731874667702e-05, "loss": 1.3285, "mean_token_accuracy": 0.6556506305932999, "num_tokens": 964125721.0, "step": 5739 }, { "entropy": 1.6838609278202057, "epoch": 0.6305786712806569, "grad_norm": 0.6880468726158142, "learning_rate": 1.6426041417256633e-05, "loss": 1.412, "mean_token_accuracy": 0.6564341684182485, "num_tokens": 964285108.0, "step": 5740 }, { "entropy": 1.6792670687039692, "epoch": 0.6306885281920299, "grad_norm": 0.6007390022277832, "learning_rate": 1.6424763916104477e-05, "loss": 1.3549, "mean_token_accuracy": 0.6552864263455073, "num_tokens": 964458866.0, "step": 5741 }, { "entropy": 1.7109587788581848, "epoch": 0.6307983851034028, "grad_norm": 0.730143129825592, "learning_rate": 1.6423486243260993e-05, "loss": 1.5021, "mean_token_accuracy": 0.663687601685524, "num_tokens": 964577666.0, "step": 5742 }, { "entropy": 1.6826227903366089, "epoch": 0.6309082420147758, "grad_norm": 0.662407636642456, "learning_rate": 1.642220839876661e-05, "loss": 1.3491, "mean_token_accuracy": 0.6631719022989273, "num_tokens": 964731662.0, "step": 5743 }, { "entropy": 1.6952384213606517, "epoch": 0.6310180989261487, "grad_norm": 0.638367772102356, "learning_rate": 1.6420930382661773e-05, "loss": 1.4605, "mean_token_accuracy": 0.6421840240557989, "num_tokens": 964979467.0, "step": 5744 }, { "entropy": 1.6700897018114726, "epoch": 0.6311279558375217, "grad_norm": 0.692187488079071, "learning_rate": 1.641965219498693e-05, "loss": 1.3674, "mean_token_accuracy": 0.6602755586306254, "num_tokens": 965202445.0, "step": 5745 }, { "entropy": 1.6474164128303528, "epoch": 0.6312378127488946, "grad_norm": 0.5569962859153748, "learning_rate": 1.6418373835782542e-05, "loss": 1.4016, "mean_token_accuracy": 0.6398782779773077, "num_tokens": 965414482.0, "step": 5746 }, { "entropy": 1.7786280512809753, "epoch": 0.6313476696602675, "grad_norm": 0.7615206241607666, "learning_rate": 1.6417095305089062e-05, "loss": 1.3846, "mean_token_accuracy": 0.6566232194503149, "num_tokens": 965560099.0, "step": 5747 }, { "entropy": 1.7956977883974712, "epoch": 0.6314575265716404, "grad_norm": 0.7831846475601196, "learning_rate": 1.641581660294696e-05, "loss": 1.4214, "mean_token_accuracy": 0.6555542002121607, "num_tokens": 965726148.0, "step": 5748 }, { "entropy": 1.6914733946323395, "epoch": 0.6315673834830133, "grad_norm": 0.6474779844284058, "learning_rate": 1.6414537729396698e-05, "loss": 1.4031, "mean_token_accuracy": 0.6505423734585444, "num_tokens": 965937834.0, "step": 5749 }, { "entropy": 1.7051812211672466, "epoch": 0.6316772403943863, "grad_norm": 0.7757088541984558, "learning_rate": 1.641325868447876e-05, "loss": 1.276, "mean_token_accuracy": 0.6699098100264868, "num_tokens": 966046845.0, "step": 5750 }, { "entropy": 1.6874237755934398, "epoch": 0.6317870973057592, "grad_norm": 0.8079215884208679, "learning_rate": 1.641197946823362e-05, "loss": 1.4308, "mean_token_accuracy": 0.6719500770171484, "num_tokens": 966238160.0, "step": 5751 }, { "entropy": 1.719059665997823, "epoch": 0.6318969542171322, "grad_norm": 0.6266113519668579, "learning_rate": 1.641070008070177e-05, "loss": 1.3024, "mean_token_accuracy": 0.6625022441148758, "num_tokens": 966383153.0, "step": 5752 }, { "entropy": 1.6473219096660614, "epoch": 0.6320068111285051, "grad_norm": 0.7965893149375916, "learning_rate": 1.6409420521923705e-05, "loss": 1.3459, "mean_token_accuracy": 0.6579365134239197, "num_tokens": 966589655.0, "step": 5753 }, { "entropy": 1.6831459005673726, "epoch": 0.6321166680398781, "grad_norm": 0.6689232587814331, "learning_rate": 1.6408140791939914e-05, "loss": 1.2523, "mean_token_accuracy": 0.6745945314566294, "num_tokens": 966736286.0, "step": 5754 }, { "entropy": 1.7096228897571564, "epoch": 0.632226524951251, "grad_norm": 0.6370944380760193, "learning_rate": 1.6406860890790904e-05, "loss": 1.4404, "mean_token_accuracy": 0.6629820168018341, "num_tokens": 966930006.0, "step": 5755 }, { "entropy": 1.643300195535024, "epoch": 0.632336381862624, "grad_norm": 0.5664902925491333, "learning_rate": 1.6405580818517183e-05, "loss": 1.4674, "mean_token_accuracy": 0.6461287786563238, "num_tokens": 967215614.0, "step": 5756 }, { "entropy": 1.6857891182104747, "epoch": 0.6324462387739969, "grad_norm": 0.6653056740760803, "learning_rate": 1.6404300575159266e-05, "loss": 1.3096, "mean_token_accuracy": 0.6652368158102036, "num_tokens": 967351993.0, "step": 5757 }, { "entropy": 1.6991856694221497, "epoch": 0.6325560956853699, "grad_norm": 0.7402186989784241, "learning_rate": 1.640302016075767e-05, "loss": 1.4144, "mean_token_accuracy": 0.6631848861773809, "num_tokens": 967491867.0, "step": 5758 }, { "entropy": 1.7615701655546825, "epoch": 0.6326659525967427, "grad_norm": 0.8464493751525879, "learning_rate": 1.6401739575352922e-05, "loss": 1.4789, "mean_token_accuracy": 0.6543097992738088, "num_tokens": 967645170.0, "step": 5759 }, { "entropy": 1.6885365744431813, "epoch": 0.6327758095081156, "grad_norm": 0.7495446801185608, "learning_rate": 1.640045881898555e-05, "loss": 1.3634, "mean_token_accuracy": 0.669794961810112, "num_tokens": 967769391.0, "step": 5760 }, { "entropy": 1.7515502472718556, "epoch": 0.6328856664194886, "grad_norm": 0.6737968325614929, "learning_rate": 1.639917789169609e-05, "loss": 1.376, "mean_token_accuracy": 0.6509286512931188, "num_tokens": 967905792.0, "step": 5761 }, { "entropy": 1.6751770774523418, "epoch": 0.6329955233308615, "grad_norm": 0.6613587141036987, "learning_rate": 1.639789679352508e-05, "loss": 1.3934, "mean_token_accuracy": 0.6460143725077311, "num_tokens": 968068487.0, "step": 5762 }, { "entropy": 1.7160434822241466, "epoch": 0.6331053802422345, "grad_norm": 0.7124457359313965, "learning_rate": 1.639661552451307e-05, "loss": 1.2378, "mean_token_accuracy": 0.6820594320694605, "num_tokens": 968185469.0, "step": 5763 }, { "entropy": 1.7458227773507435, "epoch": 0.6332152371536074, "grad_norm": 0.6111082434654236, "learning_rate": 1.6395334084700613e-05, "loss": 1.4604, "mean_token_accuracy": 0.6464193016290665, "num_tokens": 968396545.0, "step": 5764 }, { "entropy": 1.7225351532300313, "epoch": 0.6333250940649804, "grad_norm": 0.7520685791969299, "learning_rate": 1.6394052474128262e-05, "loss": 1.3592, "mean_token_accuracy": 0.6575525949398676, "num_tokens": 968577101.0, "step": 5765 }, { "entropy": 1.7581463356812794, "epoch": 0.6334349509763533, "grad_norm": 0.6961262822151184, "learning_rate": 1.639277069283658e-05, "loss": 1.4337, "mean_token_accuracy": 0.6417265683412552, "num_tokens": 968738011.0, "step": 5766 }, { "entropy": 1.6993624071280162, "epoch": 0.6335448078877263, "grad_norm": 0.7254540920257568, "learning_rate": 1.6391488740866137e-05, "loss": 1.6107, "mean_token_accuracy": 0.6395700896779696, "num_tokens": 968951008.0, "step": 5767 }, { "entropy": 1.712354451417923, "epoch": 0.6336546647990992, "grad_norm": 0.8201763033866882, "learning_rate": 1.6390206618257504e-05, "loss": 1.3487, "mean_token_accuracy": 0.6735559155543646, "num_tokens": 969087698.0, "step": 5768 }, { "entropy": 1.6842971344788868, "epoch": 0.6337645217104722, "grad_norm": 0.6519754528999329, "learning_rate": 1.6388924325051262e-05, "loss": 1.401, "mean_token_accuracy": 0.6581608355045319, "num_tokens": 969272928.0, "step": 5769 }, { "entropy": 1.673642873764038, "epoch": 0.633874378621845, "grad_norm": 0.6902750134468079, "learning_rate": 1.6387641861287988e-05, "loss": 1.401, "mean_token_accuracy": 0.6717785199483236, "num_tokens": 969452469.0, "step": 5770 }, { "entropy": 1.6944104433059692, "epoch": 0.633984235533218, "grad_norm": 0.7268493175506592, "learning_rate": 1.6386359227008283e-05, "loss": 1.4569, "mean_token_accuracy": 0.6476317048072815, "num_tokens": 969642687.0, "step": 5771 }, { "entropy": 1.6414244969685872, "epoch": 0.6340940924445909, "grad_norm": 0.5762456655502319, "learning_rate": 1.6385076422252735e-05, "loss": 1.4876, "mean_token_accuracy": 0.6473345657189687, "num_tokens": 969895358.0, "step": 5772 }, { "entropy": 1.7236258288224537, "epoch": 0.6342039493559638, "grad_norm": 0.773324728012085, "learning_rate": 1.638379344706194e-05, "loss": 1.4069, "mean_token_accuracy": 0.6589711755514145, "num_tokens": 970022354.0, "step": 5773 }, { "entropy": 1.6633692582448323, "epoch": 0.6343138062673368, "grad_norm": 0.6954154968261719, "learning_rate": 1.6382510301476514e-05, "loss": 1.3966, "mean_token_accuracy": 0.6627478500207266, "num_tokens": 970183041.0, "step": 5774 }, { "entropy": 1.722163478533427, "epoch": 0.6344236631787097, "grad_norm": 0.6416156888008118, "learning_rate": 1.638122698553706e-05, "loss": 1.3794, "mean_token_accuracy": 0.6767031103372574, "num_tokens": 970325617.0, "step": 5775 }, { "entropy": 1.7074719667434692, "epoch": 0.6345335200900827, "grad_norm": 0.6638447046279907, "learning_rate": 1.6379943499284194e-05, "loss": 1.366, "mean_token_accuracy": 0.6629780729611715, "num_tokens": 970503187.0, "step": 5776 }, { "entropy": 1.7575515409310658, "epoch": 0.6346433770014556, "grad_norm": 0.6173842549324036, "learning_rate": 1.6378659842758545e-05, "loss": 1.4809, "mean_token_accuracy": 0.6375833203395208, "num_tokens": 970734107.0, "step": 5777 }, { "entropy": 1.6983816027641296, "epoch": 0.6347532339128286, "grad_norm": 0.638751208782196, "learning_rate": 1.6377376016000735e-05, "loss": 1.3994, "mean_token_accuracy": 0.6642539451519648, "num_tokens": 970909962.0, "step": 5778 }, { "entropy": 1.681634436051051, "epoch": 0.6348630908242014, "grad_norm": 0.7870696187019348, "learning_rate": 1.6376092019051396e-05, "loss": 1.5343, "mean_token_accuracy": 0.6600727339585623, "num_tokens": 971073686.0, "step": 5779 }, { "entropy": 1.6831410626570384, "epoch": 0.6349729477355744, "grad_norm": 0.7157701849937439, "learning_rate": 1.6374807851951166e-05, "loss": 1.4874, "mean_token_accuracy": 0.6633025457461675, "num_tokens": 971233291.0, "step": 5780 }, { "entropy": 1.6157586574554443, "epoch": 0.6350828046469473, "grad_norm": 0.6183757185935974, "learning_rate": 1.637352351474069e-05, "loss": 1.318, "mean_token_accuracy": 0.6748292644818624, "num_tokens": 971387042.0, "step": 5781 }, { "entropy": 1.7227964301904042, "epoch": 0.6351926615583203, "grad_norm": 0.7202012538909912, "learning_rate": 1.6372239007460618e-05, "loss": 1.38, "mean_token_accuracy": 0.6581563999255499, "num_tokens": 971608939.0, "step": 5782 }, { "entropy": 1.6987042327721913, "epoch": 0.6353025184696932, "grad_norm": 0.598973274230957, "learning_rate": 1.63709543301516e-05, "loss": 1.5169, "mean_token_accuracy": 0.6307604809602102, "num_tokens": 971829170.0, "step": 5783 }, { "entropy": 1.6923502782980602, "epoch": 0.6354123753810662, "grad_norm": 0.6629777550697327, "learning_rate": 1.6369669482854298e-05, "loss": 1.3937, "mean_token_accuracy": 0.6485069692134857, "num_tokens": 971977025.0, "step": 5784 }, { "entropy": 1.6766403019428253, "epoch": 0.6355222322924391, "grad_norm": 0.7367562055587769, "learning_rate": 1.6368384465609376e-05, "loss": 1.4578, "mean_token_accuracy": 0.6594801992177963, "num_tokens": 972163071.0, "step": 5785 }, { "entropy": 1.7118110756079357, "epoch": 0.6356320892038121, "grad_norm": 0.6576919555664062, "learning_rate": 1.636709927845751e-05, "loss": 1.4844, "mean_token_accuracy": 0.6427266945441564, "num_tokens": 972368158.0, "step": 5786 }, { "entropy": 1.712723731994629, "epoch": 0.635741946115185, "grad_norm": 0.6602995991706848, "learning_rate": 1.6365813921439365e-05, "loss": 1.3296, "mean_token_accuracy": 0.6641115595897039, "num_tokens": 972525592.0, "step": 5787 }, { "entropy": 1.7280906836191814, "epoch": 0.6358518030265579, "grad_norm": 0.7627761960029602, "learning_rate": 1.6364528394595627e-05, "loss": 1.3437, "mean_token_accuracy": 0.6574230591456095, "num_tokens": 972716798.0, "step": 5788 }, { "entropy": 1.7323518792788188, "epoch": 0.6359616599379309, "grad_norm": 0.714829683303833, "learning_rate": 1.6363242697966984e-05, "loss": 1.495, "mean_token_accuracy": 0.6422079453865687, "num_tokens": 972883401.0, "step": 5789 }, { "entropy": 1.6686233679453533, "epoch": 0.6360715168493037, "grad_norm": 0.6240226030349731, "learning_rate": 1.636195683159413e-05, "loss": 1.4466, "mean_token_accuracy": 0.662479097644488, "num_tokens": 973067632.0, "step": 5790 }, { "entropy": 1.6985422770182292, "epoch": 0.6361813737606767, "grad_norm": 0.7517790198326111, "learning_rate": 1.6360670795517754e-05, "loss": 1.2413, "mean_token_accuracy": 0.6770857026179632, "num_tokens": 973199330.0, "step": 5791 }, { "entropy": 1.7671143412590027, "epoch": 0.6362912306720496, "grad_norm": 0.6133571863174438, "learning_rate": 1.6359384589778563e-05, "loss": 1.3633, "mean_token_accuracy": 0.6553646673758825, "num_tokens": 973335730.0, "step": 5792 }, { "entropy": 1.7020770212014515, "epoch": 0.6364010875834226, "grad_norm": 0.800118088722229, "learning_rate": 1.6358098214417263e-05, "loss": 1.5184, "mean_token_accuracy": 0.6433244993289312, "num_tokens": 973529438.0, "step": 5793 }, { "entropy": 1.6572450300057728, "epoch": 0.6365109444947955, "grad_norm": 0.6743999123573303, "learning_rate": 1.635681166947457e-05, "loss": 1.3541, "mean_token_accuracy": 0.665309856335322, "num_tokens": 973686576.0, "step": 5794 }, { "entropy": 1.7047623197237651, "epoch": 0.6366208014061685, "grad_norm": 0.7407748699188232, "learning_rate": 1.6355524954991205e-05, "loss": 1.3828, "mean_token_accuracy": 0.6678665081659952, "num_tokens": 973906181.0, "step": 5795 }, { "entropy": 1.6628866891066234, "epoch": 0.6367306583175414, "grad_norm": 0.5496436953544617, "learning_rate": 1.6354238071007887e-05, "loss": 1.2899, "mean_token_accuracy": 0.6707668304443359, "num_tokens": 974089134.0, "step": 5796 }, { "entropy": 1.7366754313309987, "epoch": 0.6368405152289144, "grad_norm": 0.5878070592880249, "learning_rate": 1.6352951017565346e-05, "loss": 1.5668, "mean_token_accuracy": 0.6261717478434244, "num_tokens": 974292842.0, "step": 5797 }, { "entropy": 1.6523142755031586, "epoch": 0.6369503721402873, "grad_norm": 0.6288219690322876, "learning_rate": 1.6351663794704316e-05, "loss": 1.4322, "mean_token_accuracy": 0.6486099511384964, "num_tokens": 974493940.0, "step": 5798 }, { "entropy": 1.6582373181978862, "epoch": 0.6370602290516603, "grad_norm": 0.7235569953918457, "learning_rate": 1.635037640246554e-05, "loss": 1.2853, "mean_token_accuracy": 0.6691482861836752, "num_tokens": 974630137.0, "step": 5799 }, { "entropy": 1.744762162367503, "epoch": 0.6371700859630332, "grad_norm": 0.7221118211746216, "learning_rate": 1.634908884088976e-05, "loss": 1.4468, "mean_token_accuracy": 0.6466242223978043, "num_tokens": 974816980.0, "step": 5800 }, { "entropy": 1.7268809576829274, "epoch": 0.637279942874406, "grad_norm": 0.6569739580154419, "learning_rate": 1.634780111001773e-05, "loss": 1.3366, "mean_token_accuracy": 0.6608653118213018, "num_tokens": 974959442.0, "step": 5801 }, { "entropy": 1.7274185717105865, "epoch": 0.637389799785779, "grad_norm": 0.6722861528396606, "learning_rate": 1.6346513209890206e-05, "loss": 1.2967, "mean_token_accuracy": 0.6615037868420283, "num_tokens": 975083415.0, "step": 5802 }, { "entropy": 1.6528538862864177, "epoch": 0.6374996566971519, "grad_norm": 0.6227561235427856, "learning_rate": 1.6345225140547946e-05, "loss": 1.3146, "mean_token_accuracy": 0.6666281570990881, "num_tokens": 975209878.0, "step": 5803 }, { "entropy": 1.6657472550868988, "epoch": 0.6376095136085249, "grad_norm": 0.6815557479858398, "learning_rate": 1.634393690203172e-05, "loss": 1.3838, "mean_token_accuracy": 0.6573799202839533, "num_tokens": 975397134.0, "step": 5804 }, { "entropy": 1.6994928816954296, "epoch": 0.6377193705198978, "grad_norm": 0.6876154541969299, "learning_rate": 1.63426484943823e-05, "loss": 1.3021, "mean_token_accuracy": 0.6681809027989706, "num_tokens": 975549830.0, "step": 5805 }, { "entropy": 1.6880747079849243, "epoch": 0.6378292274312708, "grad_norm": 0.649737536907196, "learning_rate": 1.6341359917640462e-05, "loss": 1.2683, "mean_token_accuracy": 0.6772895157337189, "num_tokens": 975724955.0, "step": 5806 }, { "entropy": 1.7019491692384083, "epoch": 0.6379390843426437, "grad_norm": 0.680798351764679, "learning_rate": 1.634007117184699e-05, "loss": 1.3433, "mean_token_accuracy": 0.6644106159607569, "num_tokens": 975899143.0, "step": 5807 }, { "entropy": 1.8168910245100658, "epoch": 0.6380489412540167, "grad_norm": 1.041638970375061, "learning_rate": 1.633878225704267e-05, "loss": 1.4986, "mean_token_accuracy": 0.640401303768158, "num_tokens": 976090892.0, "step": 5808 }, { "entropy": 1.7052318652470906, "epoch": 0.6381587981653896, "grad_norm": 0.5862450003623962, "learning_rate": 1.63374931732683e-05, "loss": 1.3986, "mean_token_accuracy": 0.6688533673683802, "num_tokens": 976245143.0, "step": 5809 }, { "entropy": 1.6716302533944447, "epoch": 0.6382686550767626, "grad_norm": 0.6785920858383179, "learning_rate": 1.633620392056467e-05, "loss": 1.3501, "mean_token_accuracy": 0.6693478226661682, "num_tokens": 976369806.0, "step": 5810 }, { "entropy": 1.6544945339361827, "epoch": 0.6383785119881354, "grad_norm": 0.5717010498046875, "learning_rate": 1.6334914498972595e-05, "loss": 1.3667, "mean_token_accuracy": 0.6694445610046387, "num_tokens": 976553064.0, "step": 5811 }, { "entropy": 1.7117507060368855, "epoch": 0.6384883688995084, "grad_norm": 0.6321550607681274, "learning_rate": 1.633362490853288e-05, "loss": 1.5681, "mean_token_accuracy": 0.6327792455752691, "num_tokens": 976731552.0, "step": 5812 }, { "entropy": 1.70002148548762, "epoch": 0.6385982258108813, "grad_norm": 0.634379506111145, "learning_rate": 1.633233514928634e-05, "loss": 1.3679, "mean_token_accuracy": 0.6579567342996597, "num_tokens": 976914840.0, "step": 5813 }, { "entropy": 1.7061622142791748, "epoch": 0.6387080827222542, "grad_norm": 0.6758495569229126, "learning_rate": 1.6331045221273795e-05, "loss": 1.3905, "mean_token_accuracy": 0.6592559516429901, "num_tokens": 977102576.0, "step": 5814 }, { "entropy": 1.6816378434499104, "epoch": 0.6388179396336272, "grad_norm": 0.6217886805534363, "learning_rate": 1.6329755124536074e-05, "loss": 1.3363, "mean_token_accuracy": 0.6602163165807724, "num_tokens": 977243005.0, "step": 5815 }, { "entropy": 1.6444752017656963, "epoch": 0.6389277965450001, "grad_norm": 0.7023751735687256, "learning_rate": 1.6328464859113998e-05, "loss": 1.3488, "mean_token_accuracy": 0.6646647155284882, "num_tokens": 977397257.0, "step": 5816 }, { "entropy": 1.756317913532257, "epoch": 0.6390376534563731, "grad_norm": 0.753580629825592, "learning_rate": 1.6327174425048415e-05, "loss": 1.34, "mean_token_accuracy": 0.6718757003545761, "num_tokens": 977535347.0, "step": 5817 }, { "entropy": 1.6862174967924755, "epoch": 0.639147510367746, "grad_norm": 0.6223485469818115, "learning_rate": 1.632588382238016e-05, "loss": 1.2357, "mean_token_accuracy": 0.6681343664725622, "num_tokens": 977680666.0, "step": 5818 }, { "entropy": 1.6936693688233693, "epoch": 0.639257367279119, "grad_norm": 0.6414377689361572, "learning_rate": 1.6324593051150084e-05, "loss": 1.3485, "mean_token_accuracy": 0.6657200207312902, "num_tokens": 977840285.0, "step": 5819 }, { "entropy": 1.6185721854368846, "epoch": 0.6393672241904919, "grad_norm": 0.5484555959701538, "learning_rate": 1.632330211139904e-05, "loss": 1.2784, "mean_token_accuracy": 0.6813697318236033, "num_tokens": 978022560.0, "step": 5820 }, { "entropy": 1.6399174928665161, "epoch": 0.6394770811018649, "grad_norm": 0.6649972796440125, "learning_rate": 1.6322011003167877e-05, "loss": 1.3435, "mean_token_accuracy": 0.6716059247652689, "num_tokens": 978163797.0, "step": 5821 }, { "entropy": 1.761791964371999, "epoch": 0.6395869380132377, "grad_norm": 0.7113659381866455, "learning_rate": 1.6320719726497465e-05, "loss": 1.3612, "mean_token_accuracy": 0.655978669722875, "num_tokens": 978316603.0, "step": 5822 }, { "entropy": 1.7087388435999553, "epoch": 0.6396967949246107, "grad_norm": 0.6851733326911926, "learning_rate": 1.6319428281428674e-05, "loss": 1.3932, "mean_token_accuracy": 0.6515816897153854, "num_tokens": 978468692.0, "step": 5823 }, { "entropy": 1.7219915489355724, "epoch": 0.6398066518359836, "grad_norm": 0.6899517774581909, "learning_rate": 1.6318136668002374e-05, "loss": 1.5738, "mean_token_accuracy": 0.6320692549149195, "num_tokens": 978646354.0, "step": 5824 }, { "entropy": 1.705000917116801, "epoch": 0.6399165087473566, "grad_norm": 0.7402633428573608, "learning_rate": 1.6316844886259443e-05, "loss": 1.2827, "mean_token_accuracy": 0.6657779663801193, "num_tokens": 978781744.0, "step": 5825 }, { "entropy": 1.7203875084718068, "epoch": 0.6400263656587295, "grad_norm": 0.8369673490524292, "learning_rate": 1.631555293624077e-05, "loss": 1.4431, "mean_token_accuracy": 0.6510690748691559, "num_tokens": 978918388.0, "step": 5826 }, { "entropy": 1.6911202172438304, "epoch": 0.6401362225701024, "grad_norm": 0.7420011162757874, "learning_rate": 1.6314260817987237e-05, "loss": 1.3414, "mean_token_accuracy": 0.6616135090589523, "num_tokens": 979058916.0, "step": 5827 }, { "entropy": 1.7454005479812622, "epoch": 0.6402460794814754, "grad_norm": 0.6640864610671997, "learning_rate": 1.6312968531539748e-05, "loss": 1.4186, "mean_token_accuracy": 0.6549846281607946, "num_tokens": 979208637.0, "step": 5828 }, { "entropy": 1.7146745920181274, "epoch": 0.6403559363928483, "grad_norm": 0.736393392086029, "learning_rate": 1.6311676076939197e-05, "loss": 1.4239, "mean_token_accuracy": 0.6446640143791834, "num_tokens": 979366751.0, "step": 5829 }, { "entropy": 1.7976812819639842, "epoch": 0.6404657933042213, "grad_norm": 0.7656483054161072, "learning_rate": 1.6310383454226496e-05, "loss": 1.5592, "mean_token_accuracy": 0.633850152293841, "num_tokens": 979523642.0, "step": 5830 }, { "entropy": 1.7306662797927856, "epoch": 0.6405756502155942, "grad_norm": 0.7166682481765747, "learning_rate": 1.6309090663442546e-05, "loss": 1.416, "mean_token_accuracy": 0.6429750323295593, "num_tokens": 979684950.0, "step": 5831 }, { "entropy": 1.6555648644765217, "epoch": 0.6406855071269671, "grad_norm": 0.6912305951118469, "learning_rate": 1.6307797704628272e-05, "loss": 1.2916, "mean_token_accuracy": 0.6630461364984512, "num_tokens": 979822099.0, "step": 5832 }, { "entropy": 1.7157772084077199, "epoch": 0.64079536403834, "grad_norm": 0.5763871073722839, "learning_rate": 1.6306504577824594e-05, "loss": 1.4631, "mean_token_accuracy": 0.6457369774580002, "num_tokens": 980073550.0, "step": 5833 }, { "entropy": 1.6614607473214467, "epoch": 0.640905220949713, "grad_norm": 0.6631821990013123, "learning_rate": 1.6305211283072432e-05, "loss": 1.4923, "mean_token_accuracy": 0.6427841186523438, "num_tokens": 980267997.0, "step": 5834 }, { "entropy": 1.7480806112289429, "epoch": 0.6410150778610859, "grad_norm": 0.6579576730728149, "learning_rate": 1.6303917820412726e-05, "loss": 1.4038, "mean_token_accuracy": 0.6576637079318365, "num_tokens": 980443398.0, "step": 5835 }, { "entropy": 1.757957011461258, "epoch": 0.6411249347724589, "grad_norm": 0.8210548758506775, "learning_rate": 1.630262418988641e-05, "loss": 1.6428, "mean_token_accuracy": 0.6485659529765447, "num_tokens": 980594525.0, "step": 5836 }, { "entropy": 1.736217776934306, "epoch": 0.6412347916838318, "grad_norm": 0.6790369749069214, "learning_rate": 1.6301330391534432e-05, "loss": 1.5209, "mean_token_accuracy": 0.6406088074048361, "num_tokens": 980755445.0, "step": 5837 }, { "entropy": 1.7437133093674977, "epoch": 0.6413446485952048, "grad_norm": 0.7322378754615784, "learning_rate": 1.6300036425397732e-05, "loss": 1.4433, "mean_token_accuracy": 0.6456714073816935, "num_tokens": 980925437.0, "step": 5838 }, { "entropy": 1.7406888504823048, "epoch": 0.6414545055065777, "grad_norm": 0.8085801601409912, "learning_rate": 1.629874229151727e-05, "loss": 1.4783, "mean_token_accuracy": 0.6532542854547501, "num_tokens": 981085144.0, "step": 5839 }, { "entropy": 1.6952376067638397, "epoch": 0.6415643624179507, "grad_norm": 0.7089763879776001, "learning_rate": 1.6297447989934e-05, "loss": 1.3368, "mean_token_accuracy": 0.6644267588853836, "num_tokens": 981266976.0, "step": 5840 }, { "entropy": 1.6870313982168834, "epoch": 0.6416742193293236, "grad_norm": 0.6560161709785461, "learning_rate": 1.6296153520688886e-05, "loss": 1.4609, "mean_token_accuracy": 0.6392157872517904, "num_tokens": 981482379.0, "step": 5841 }, { "entropy": 1.713809609413147, "epoch": 0.6417840762406964, "grad_norm": 0.6234894394874573, "learning_rate": 1.6294858883822902e-05, "loss": 1.3206, "mean_token_accuracy": 0.6720296243826548, "num_tokens": 981646187.0, "step": 5842 }, { "entropy": 1.7012270887692769, "epoch": 0.6418939331520694, "grad_norm": 0.6372230052947998, "learning_rate": 1.6293564079377024e-05, "loss": 1.399, "mean_token_accuracy": 0.646788035829862, "num_tokens": 981809605.0, "step": 5843 }, { "entropy": 1.6442164182662964, "epoch": 0.6420037900634423, "grad_norm": 0.6384155750274658, "learning_rate": 1.6292269107392223e-05, "loss": 1.3745, "mean_token_accuracy": 0.6641842971245447, "num_tokens": 981981077.0, "step": 5844 }, { "entropy": 1.723905752102534, "epoch": 0.6421136469748153, "grad_norm": 0.6278502345085144, "learning_rate": 1.6290973967909492e-05, "loss": 1.3378, "mean_token_accuracy": 0.66019007563591, "num_tokens": 982155308.0, "step": 5845 }, { "entropy": 1.704160491625468, "epoch": 0.6422235038861882, "grad_norm": 0.6128730177879333, "learning_rate": 1.6289678660969818e-05, "loss": 1.4123, "mean_token_accuracy": 0.663148025671641, "num_tokens": 982334839.0, "step": 5846 }, { "entropy": 1.6866064369678497, "epoch": 0.6423333607975612, "grad_norm": 0.5298371911048889, "learning_rate": 1.6288383186614198e-05, "loss": 1.4093, "mean_token_accuracy": 0.6518398175636927, "num_tokens": 982563768.0, "step": 5847 }, { "entropy": 1.716744065284729, "epoch": 0.6424432177089341, "grad_norm": 0.6819528937339783, "learning_rate": 1.6287087544883633e-05, "loss": 1.354, "mean_token_accuracy": 0.6657714794079462, "num_tokens": 982751166.0, "step": 5848 }, { "entropy": 1.6699997087319691, "epoch": 0.6425530746203071, "grad_norm": 0.8962133526802063, "learning_rate": 1.628579173581913e-05, "loss": 1.3544, "mean_token_accuracy": 0.6710561861594518, "num_tokens": 982903845.0, "step": 5849 }, { "entropy": 1.6850681801637013, "epoch": 0.64266293153168, "grad_norm": 0.6950148940086365, "learning_rate": 1.62844957594617e-05, "loss": 1.4295, "mean_token_accuracy": 0.6709064096212387, "num_tokens": 983059833.0, "step": 5850 }, { "entropy": 1.6466986139615376, "epoch": 0.642772788443053, "grad_norm": 0.7298344373703003, "learning_rate": 1.6283199615852364e-05, "loss": 1.2233, "mean_token_accuracy": 0.6797444274028143, "num_tokens": 983193013.0, "step": 5851 }, { "entropy": 1.6685850421587627, "epoch": 0.6428826453544259, "grad_norm": 0.7035362124443054, "learning_rate": 1.6281903305032135e-05, "loss": 1.3384, "mean_token_accuracy": 0.6661973843971888, "num_tokens": 983321092.0, "step": 5852 }, { "entropy": 1.722305456797282, "epoch": 0.6429925022657988, "grad_norm": 0.6722835302352905, "learning_rate": 1.6280606827042053e-05, "loss": 1.3151, "mean_token_accuracy": 0.6609906901915868, "num_tokens": 983476645.0, "step": 5853 }, { "entropy": 1.7051582833131154, "epoch": 0.6431023591771717, "grad_norm": 0.591584324836731, "learning_rate": 1.6279310181923137e-05, "loss": 1.4214, "mean_token_accuracy": 0.65077872077624, "num_tokens": 983641414.0, "step": 5854 }, { "entropy": 1.7542580962181091, "epoch": 0.6432122160885446, "grad_norm": 0.7439432144165039, "learning_rate": 1.627801336971644e-05, "loss": 1.3505, "mean_token_accuracy": 0.6534154663483301, "num_tokens": 983766157.0, "step": 5855 }, { "entropy": 1.7984492977460225, "epoch": 0.6433220729999176, "grad_norm": 0.7941092848777771, "learning_rate": 1.627671639046299e-05, "loss": 1.7088, "mean_token_accuracy": 0.6389260292053223, "num_tokens": 983977429.0, "step": 5856 }, { "entropy": 1.720232754945755, "epoch": 0.6434319299112905, "grad_norm": 0.8250980973243713, "learning_rate": 1.6275419244203853e-05, "loss": 1.519, "mean_token_accuracy": 0.6374199092388153, "num_tokens": 984183890.0, "step": 5857 }, { "entropy": 1.736306478579839, "epoch": 0.6435417868226635, "grad_norm": 0.6806954741477966, "learning_rate": 1.627412193098007e-05, "loss": 1.5107, "mean_token_accuracy": 0.6463406682014465, "num_tokens": 984373464.0, "step": 5858 }, { "entropy": 1.7540569305419922, "epoch": 0.6436516437340364, "grad_norm": 0.7410135865211487, "learning_rate": 1.62728244508327e-05, "loss": 1.2862, "mean_token_accuracy": 0.6681255847215652, "num_tokens": 984513422.0, "step": 5859 }, { "entropy": 1.6755077838897705, "epoch": 0.6437615006454094, "grad_norm": 0.6239484548568726, "learning_rate": 1.6271526803802818e-05, "loss": 1.3604, "mean_token_accuracy": 0.6560343901316324, "num_tokens": 984693720.0, "step": 5860 }, { "entropy": 1.645962009827296, "epoch": 0.6438713575567823, "grad_norm": 0.5675744414329529, "learning_rate": 1.6270228989931487e-05, "loss": 1.3538, "mean_token_accuracy": 0.6722413251797358, "num_tokens": 984879033.0, "step": 5861 }, { "entropy": 1.7259068687756856, "epoch": 0.6439812144681553, "grad_norm": 0.6161162853240967, "learning_rate": 1.6268931009259782e-05, "loss": 1.4002, "mean_token_accuracy": 0.6652526358763377, "num_tokens": 985044446.0, "step": 5862 }, { "entropy": 1.7283404767513275, "epoch": 0.6440910713795281, "grad_norm": 0.7567533254623413, "learning_rate": 1.6267632861828784e-05, "loss": 1.3376, "mean_token_accuracy": 0.6561006804307302, "num_tokens": 985207012.0, "step": 5863 }, { "entropy": 1.6944616238276164, "epoch": 0.6442009282909011, "grad_norm": 0.676316499710083, "learning_rate": 1.6266334547679584e-05, "loss": 1.4184, "mean_token_accuracy": 0.65648120145003, "num_tokens": 985355826.0, "step": 5864 }, { "entropy": 1.7600885530312855, "epoch": 0.644310785202274, "grad_norm": 0.672926127910614, "learning_rate": 1.626503606685326e-05, "loss": 1.4699, "mean_token_accuracy": 0.6459670712550482, "num_tokens": 985505942.0, "step": 5865 }, { "entropy": 1.7543242474397023, "epoch": 0.644420642113647, "grad_norm": 0.7283417582511902, "learning_rate": 1.6263737419390924e-05, "loss": 1.3375, "mean_token_accuracy": 0.6589344541231791, "num_tokens": 985632534.0, "step": 5866 }, { "entropy": 1.7102359334627788, "epoch": 0.6445304990250199, "grad_norm": 0.6508517265319824, "learning_rate": 1.626243860533367e-05, "loss": 1.3067, "mean_token_accuracy": 0.6687440226475397, "num_tokens": 985812243.0, "step": 5867 }, { "entropy": 1.651482840379079, "epoch": 0.6446403559363928, "grad_norm": 0.8385653495788574, "learning_rate": 1.6261139624722607e-05, "loss": 1.3541, "mean_token_accuracy": 0.6784233748912811, "num_tokens": 986033493.0, "step": 5868 }, { "entropy": 1.8157791793346405, "epoch": 0.6447502128477658, "grad_norm": 0.7561879754066467, "learning_rate": 1.6259840477598842e-05, "loss": 1.6814, "mean_token_accuracy": 0.6107426683108012, "num_tokens": 986264905.0, "step": 5869 }, { "entropy": 1.7169578472773235, "epoch": 0.6448600697591387, "grad_norm": 0.6490621566772461, "learning_rate": 1.6258541164003497e-05, "loss": 1.48, "mean_token_accuracy": 0.6457877457141876, "num_tokens": 986432715.0, "step": 5870 }, { "entropy": 1.694730967283249, "epoch": 0.6449699266705117, "grad_norm": 0.6287488341331482, "learning_rate": 1.6257241683977695e-05, "loss": 1.3854, "mean_token_accuracy": 0.6665887037913004, "num_tokens": 986596620.0, "step": 5871 }, { "entropy": 1.7151194314161937, "epoch": 0.6450797835818846, "grad_norm": 0.7435504794120789, "learning_rate": 1.625594203756256e-05, "loss": 1.5081, "mean_token_accuracy": 0.6542889624834061, "num_tokens": 986763978.0, "step": 5872 }, { "entropy": 1.7020180424054463, "epoch": 0.6451896404932576, "grad_norm": 0.6360806226730347, "learning_rate": 1.625464222479923e-05, "loss": 1.3542, "mean_token_accuracy": 0.664187510808309, "num_tokens": 986915228.0, "step": 5873 }, { "entropy": 1.6966914137204487, "epoch": 0.6452994974046304, "grad_norm": 0.8072082996368408, "learning_rate": 1.625334224572884e-05, "loss": 1.4327, "mean_token_accuracy": 0.6710225045681, "num_tokens": 987091477.0, "step": 5874 }, { "entropy": 1.6498130361239116, "epoch": 0.6454093543160034, "grad_norm": 0.7897175550460815, "learning_rate": 1.6252042100392535e-05, "loss": 1.4103, "mean_token_accuracy": 0.6601500709851583, "num_tokens": 987264375.0, "step": 5875 }, { "entropy": 1.687047004699707, "epoch": 0.6455192112273763, "grad_norm": 0.607562243938446, "learning_rate": 1.6250741788831466e-05, "loss": 1.4087, "mean_token_accuracy": 0.655857135852178, "num_tokens": 987426445.0, "step": 5876 }, { "entropy": 1.7364888588587444, "epoch": 0.6456290681387493, "grad_norm": 0.5785548090934753, "learning_rate": 1.6249441311086788e-05, "loss": 1.5631, "mean_token_accuracy": 0.6313952604929606, "num_tokens": 987658927.0, "step": 5877 }, { "entropy": 1.6707642773787181, "epoch": 0.6457389250501222, "grad_norm": 0.8916179537773132, "learning_rate": 1.624814066719965e-05, "loss": 1.3344, "mean_token_accuracy": 0.6687265535195669, "num_tokens": 987785339.0, "step": 5878 }, { "entropy": 1.6992899874846141, "epoch": 0.6458487819614952, "grad_norm": 0.6406787633895874, "learning_rate": 1.624683985721123e-05, "loss": 1.3642, "mean_token_accuracy": 0.6527419139941534, "num_tokens": 987956404.0, "step": 5879 }, { "entropy": 1.6912338038285573, "epoch": 0.6459586388728681, "grad_norm": 0.7404332160949707, "learning_rate": 1.6245538881162693e-05, "loss": 1.2955, "mean_token_accuracy": 0.6724090029795965, "num_tokens": 988096917.0, "step": 5880 }, { "entropy": 1.7176282107830048, "epoch": 0.6460684957842411, "grad_norm": 0.737346887588501, "learning_rate": 1.624423773909521e-05, "loss": 1.3986, "mean_token_accuracy": 0.6630405435959498, "num_tokens": 988245509.0, "step": 5881 }, { "entropy": 1.709346890449524, "epoch": 0.646178352695614, "grad_norm": 0.6211323738098145, "learning_rate": 1.6242936431049973e-05, "loss": 1.4867, "mean_token_accuracy": 0.6363749404748281, "num_tokens": 988448841.0, "step": 5882 }, { "entropy": 1.7240298589070637, "epoch": 0.6462882096069869, "grad_norm": 0.6329193115234375, "learning_rate": 1.6241634957068155e-05, "loss": 1.4412, "mean_token_accuracy": 0.6437714745601019, "num_tokens": 988619793.0, "step": 5883 }, { "entropy": 1.724859396616618, "epoch": 0.6463980665183598, "grad_norm": 0.6122381687164307, "learning_rate": 1.6240333317190953e-05, "loss": 1.4146, "mean_token_accuracy": 0.6419190764427185, "num_tokens": 988803888.0, "step": 5884 }, { "entropy": 1.7188538114229839, "epoch": 0.6465079234297327, "grad_norm": 0.6661576628684998, "learning_rate": 1.6239031511459564e-05, "loss": 1.381, "mean_token_accuracy": 0.657758911450704, "num_tokens": 988978211.0, "step": 5885 }, { "entropy": 1.7248981595039368, "epoch": 0.6466177803411057, "grad_norm": 0.7169018387794495, "learning_rate": 1.6237729539915187e-05, "loss": 1.4442, "mean_token_accuracy": 0.6563170303901037, "num_tokens": 989143411.0, "step": 5886 }, { "entropy": 1.7199460367361705, "epoch": 0.6467276372524786, "grad_norm": 0.5790041089057922, "learning_rate": 1.6236427402599032e-05, "loss": 1.5012, "mean_token_accuracy": 0.6406532824039459, "num_tokens": 989369510.0, "step": 5887 }, { "entropy": 1.7657626469930012, "epoch": 0.6468374941638516, "grad_norm": 0.8561536073684692, "learning_rate": 1.623512509955231e-05, "loss": 1.4471, "mean_token_accuracy": 0.659746582309405, "num_tokens": 989548102.0, "step": 5888 }, { "entropy": 1.6926214396953583, "epoch": 0.6469473510752245, "grad_norm": 0.6689512133598328, "learning_rate": 1.6233822630816234e-05, "loss": 1.2409, "mean_token_accuracy": 0.6762634168068568, "num_tokens": 989679599.0, "step": 5889 }, { "entropy": 1.7333985964457195, "epoch": 0.6470572079865975, "grad_norm": 0.7683215141296387, "learning_rate": 1.6232519996432035e-05, "loss": 1.3786, "mean_token_accuracy": 0.6628393729527792, "num_tokens": 989827866.0, "step": 5890 }, { "entropy": 1.7282644311587017, "epoch": 0.6471670648979704, "grad_norm": 0.731306254863739, "learning_rate": 1.623121719644093e-05, "loss": 1.3069, "mean_token_accuracy": 0.6695700138807297, "num_tokens": 989936813.0, "step": 5891 }, { "entropy": 1.6467431485652924, "epoch": 0.6472769218093434, "grad_norm": 0.7342338562011719, "learning_rate": 1.6229914230884163e-05, "loss": 1.4412, "mean_token_accuracy": 0.6496950487295786, "num_tokens": 990122059.0, "step": 5892 }, { "entropy": 1.7180868089199066, "epoch": 0.6473867787207163, "grad_norm": 0.7283189296722412, "learning_rate": 1.6228611099802964e-05, "loss": 1.2448, "mean_token_accuracy": 0.6772717932860056, "num_tokens": 990230529.0, "step": 5893 }, { "entropy": 1.7020895679791768, "epoch": 0.6474966356320893, "grad_norm": 0.6616482734680176, "learning_rate": 1.6227307803238585e-05, "loss": 1.6071, "mean_token_accuracy": 0.6459571321805319, "num_tokens": 990458221.0, "step": 5894 }, { "entropy": 1.7780593534310658, "epoch": 0.6476064925434621, "grad_norm": 0.686166524887085, "learning_rate": 1.6226004341232265e-05, "loss": 1.3715, "mean_token_accuracy": 0.6415807555119196, "num_tokens": 990660990.0, "step": 5895 }, { "entropy": 1.6996767024199169, "epoch": 0.647716349454835, "grad_norm": 0.7599523067474365, "learning_rate": 1.622470071382526e-05, "loss": 1.326, "mean_token_accuracy": 0.6618270923693975, "num_tokens": 990797885.0, "step": 5896 }, { "entropy": 1.6373738249142964, "epoch": 0.647826206366208, "grad_norm": 0.60347580909729, "learning_rate": 1.622339692105884e-05, "loss": 1.315, "mean_token_accuracy": 0.6724487642447153, "num_tokens": 990944305.0, "step": 5897 }, { "entropy": 1.648904373248418, "epoch": 0.6479360632775809, "grad_norm": 0.6465940475463867, "learning_rate": 1.6222092962974255e-05, "loss": 1.3056, "mean_token_accuracy": 0.6669845134019852, "num_tokens": 991127904.0, "step": 5898 }, { "entropy": 1.7032305300235748, "epoch": 0.6480459201889539, "grad_norm": 0.7024495601654053, "learning_rate": 1.622078883961278e-05, "loss": 1.3523, "mean_token_accuracy": 0.6614350527524948, "num_tokens": 991310770.0, "step": 5899 }, { "entropy": 1.7386066218217213, "epoch": 0.6481557771003268, "grad_norm": 0.7756279110908508, "learning_rate": 1.6219484551015694e-05, "loss": 1.4412, "mean_token_accuracy": 0.6557717273632685, "num_tokens": 991453676.0, "step": 5900 }, { "entropy": 1.6870729128519695, "epoch": 0.6482656340116998, "grad_norm": 0.7938646078109741, "learning_rate": 1.6218180097224273e-05, "loss": 1.4533, "mean_token_accuracy": 0.6631547510623932, "num_tokens": 991615628.0, "step": 5901 }, { "entropy": 1.6689063012599945, "epoch": 0.6483754909230727, "grad_norm": 0.7687073945999146, "learning_rate": 1.6216875478279802e-05, "loss": 1.2669, "mean_token_accuracy": 0.6779245138168335, "num_tokens": 991759251.0, "step": 5902 }, { "entropy": 1.7391593058904011, "epoch": 0.6484853478344457, "grad_norm": 0.7042144536972046, "learning_rate": 1.6215570694223574e-05, "loss": 1.5169, "mean_token_accuracy": 0.6359234601259232, "num_tokens": 992007896.0, "step": 5903 }, { "entropy": 1.6700084805488586, "epoch": 0.6485952047458186, "grad_norm": 0.7073227167129517, "learning_rate": 1.6214265745096885e-05, "loss": 1.337, "mean_token_accuracy": 0.6727247337500254, "num_tokens": 992143508.0, "step": 5904 }, { "entropy": 1.7103844285011292, "epoch": 0.6487050616571916, "grad_norm": 1.0630773305892944, "learning_rate": 1.6212960630941035e-05, "loss": 1.5428, "mean_token_accuracy": 0.6607689758141836, "num_tokens": 992350316.0, "step": 5905 }, { "entropy": 1.6851065456867218, "epoch": 0.6488149185685644, "grad_norm": 0.7298637628555298, "learning_rate": 1.6211655351797326e-05, "loss": 1.4339, "mean_token_accuracy": 0.6611693799495697, "num_tokens": 992508315.0, "step": 5906 }, { "entropy": 1.7009667754173279, "epoch": 0.6489247754799374, "grad_norm": 0.7233312726020813, "learning_rate": 1.6210349907707076e-05, "loss": 1.5053, "mean_token_accuracy": 0.6501431415478388, "num_tokens": 992700402.0, "step": 5907 }, { "entropy": 1.6802937785784404, "epoch": 0.6490346323913103, "grad_norm": 0.6345446109771729, "learning_rate": 1.62090442987116e-05, "loss": 1.3397, "mean_token_accuracy": 0.6666805297136307, "num_tokens": 992846905.0, "step": 5908 }, { "entropy": 1.7321950197219849, "epoch": 0.6491444893026832, "grad_norm": 0.6220639944076538, "learning_rate": 1.6207738524852217e-05, "loss": 1.4515, "mean_token_accuracy": 0.6630930602550507, "num_tokens": 993004017.0, "step": 5909 }, { "entropy": 1.714138279358546, "epoch": 0.6492543462140562, "grad_norm": 0.6821590065956116, "learning_rate": 1.620643258617026e-05, "loss": 1.4696, "mean_token_accuracy": 0.6569486111402512, "num_tokens": 993141938.0, "step": 5910 }, { "entropy": 1.7272202372550964, "epoch": 0.6493642031254291, "grad_norm": 0.798577606678009, "learning_rate": 1.6205126482707058e-05, "loss": 1.3874, "mean_token_accuracy": 0.6723422755797704, "num_tokens": 993281834.0, "step": 5911 }, { "entropy": 1.7264328002929688, "epoch": 0.6494740600368021, "grad_norm": 0.7377980351448059, "learning_rate": 1.6203820214503942e-05, "loss": 1.4137, "mean_token_accuracy": 0.6667486826578776, "num_tokens": 993403525.0, "step": 5912 }, { "entropy": 1.6910780568917592, "epoch": 0.649583916948175, "grad_norm": 4.774479389190674, "learning_rate": 1.6202513781602266e-05, "loss": 1.1981, "mean_token_accuracy": 0.6723181953032812, "num_tokens": 993567459.0, "step": 5913 }, { "entropy": 1.7071150839328766, "epoch": 0.649693773859548, "grad_norm": 0.7636151313781738, "learning_rate": 1.6201207184043372e-05, "loss": 1.4607, "mean_token_accuracy": 0.658385788400968, "num_tokens": 993712733.0, "step": 5914 }, { "entropy": 1.790613979101181, "epoch": 0.6498036307709208, "grad_norm": 0.7202587723731995, "learning_rate": 1.6199900421868616e-05, "loss": 1.5591, "mean_token_accuracy": 0.6297978659470876, "num_tokens": 993897946.0, "step": 5915 }, { "entropy": 1.6419591108957927, "epoch": 0.6499134876822938, "grad_norm": 0.6573414206504822, "learning_rate": 1.6198593495119352e-05, "loss": 1.4063, "mean_token_accuracy": 0.6666153768698374, "num_tokens": 994054233.0, "step": 5916 }, { "entropy": 1.7275590499242146, "epoch": 0.6500233445936667, "grad_norm": 0.6958465576171875, "learning_rate": 1.6197286403836947e-05, "loss": 1.499, "mean_token_accuracy": 0.6515706777572632, "num_tokens": 994235070.0, "step": 5917 }, { "entropy": 1.7174121638139088, "epoch": 0.6501332015050397, "grad_norm": 0.677852213382721, "learning_rate": 1.619597914806277e-05, "loss": 1.5725, "mean_token_accuracy": 0.6482388724883398, "num_tokens": 994427969.0, "step": 5918 }, { "entropy": 1.6932408213615417, "epoch": 0.6502430584164126, "grad_norm": 0.8987300395965576, "learning_rate": 1.6194671727838193e-05, "loss": 1.445, "mean_token_accuracy": 0.6716097990671793, "num_tokens": 994573905.0, "step": 5919 }, { "entropy": 1.694073627392451, "epoch": 0.6503529153277856, "grad_norm": 0.7888936400413513, "learning_rate": 1.61933641432046e-05, "loss": 1.382, "mean_token_accuracy": 0.6525902102390925, "num_tokens": 994713145.0, "step": 5920 }, { "entropy": 1.7361188928286235, "epoch": 0.6504627722391585, "grad_norm": 0.6191293001174927, "learning_rate": 1.619205639420337e-05, "loss": 1.418, "mean_token_accuracy": 0.6470876733462015, "num_tokens": 994913257.0, "step": 5921 }, { "entropy": 1.7108994523684184, "epoch": 0.6505726291505314, "grad_norm": 0.7681605815887451, "learning_rate": 1.6190748480875893e-05, "loss": 1.3487, "mean_token_accuracy": 0.661737248301506, "num_tokens": 995089384.0, "step": 5922 }, { "entropy": 1.6949030856291454, "epoch": 0.6506824860619044, "grad_norm": 0.6882128119468689, "learning_rate": 1.6189440403263568e-05, "loss": 1.3357, "mean_token_accuracy": 0.6652708401282629, "num_tokens": 995250744.0, "step": 5923 }, { "entropy": 1.7186113198598225, "epoch": 0.6507923429732773, "grad_norm": 0.6824566721916199, "learning_rate": 1.618813216140779e-05, "loss": 1.5937, "mean_token_accuracy": 0.6373369594415029, "num_tokens": 995448756.0, "step": 5924 }, { "entropy": 1.7350513140360515, "epoch": 0.6509021998846503, "grad_norm": 0.6943637132644653, "learning_rate": 1.618682375534997e-05, "loss": 1.3293, "mean_token_accuracy": 0.6592056502898535, "num_tokens": 995579361.0, "step": 5925 }, { "entropy": 1.688408652941386, "epoch": 0.6510120567960231, "grad_norm": 0.6207675337791443, "learning_rate": 1.6185515185131516e-05, "loss": 1.3164, "mean_token_accuracy": 0.6692043642203013, "num_tokens": 995739892.0, "step": 5926 }, { "entropy": 1.720192591349284, "epoch": 0.6511219137073961, "grad_norm": 0.7376704216003418, "learning_rate": 1.6184206450793838e-05, "loss": 1.37, "mean_token_accuracy": 0.6484977056582769, "num_tokens": 995890285.0, "step": 5927 }, { "entropy": 1.7388510406017303, "epoch": 0.651231770618769, "grad_norm": 0.69366455078125, "learning_rate": 1.6182897552378366e-05, "loss": 1.3361, "mean_token_accuracy": 0.6686133096615473, "num_tokens": 996020909.0, "step": 5928 }, { "entropy": 1.7152843674023945, "epoch": 0.651341627530142, "grad_norm": 0.7456363439559937, "learning_rate": 1.618158848992652e-05, "loss": 1.341, "mean_token_accuracy": 0.6697397083044052, "num_tokens": 996149476.0, "step": 5929 }, { "entropy": 1.7538359761238098, "epoch": 0.6514514844415149, "grad_norm": 0.8604733943939209, "learning_rate": 1.6180279263479736e-05, "loss": 1.7488, "mean_token_accuracy": 0.6189997419714928, "num_tokens": 996320305.0, "step": 5930 }, { "entropy": 1.7112461129824321, "epoch": 0.6515613413528879, "grad_norm": 0.711691677570343, "learning_rate": 1.6178969873079445e-05, "loss": 1.6228, "mean_token_accuracy": 0.6372633626063665, "num_tokens": 996484824.0, "step": 5931 }, { "entropy": 1.709233562151591, "epoch": 0.6516711982642608, "grad_norm": 0.6133729219436646, "learning_rate": 1.6177660318767094e-05, "loss": 1.4192, "mean_token_accuracy": 0.6441813757022222, "num_tokens": 996650733.0, "step": 5932 }, { "entropy": 1.6950431764125824, "epoch": 0.6517810551756338, "grad_norm": 0.796097457408905, "learning_rate": 1.6176350600584127e-05, "loss": 1.3982, "mean_token_accuracy": 0.654624213774999, "num_tokens": 996826862.0, "step": 5933 }, { "entropy": 1.797725111246109, "epoch": 0.6518909120870067, "grad_norm": 0.6709982752799988, "learning_rate": 1.617504071857199e-05, "loss": 1.4719, "mean_token_accuracy": 0.6494367867708206, "num_tokens": 996948195.0, "step": 5934 }, { "entropy": 1.729837417602539, "epoch": 0.6520007689983797, "grad_norm": 0.5902916789054871, "learning_rate": 1.6173730672772154e-05, "loss": 1.4126, "mean_token_accuracy": 0.6551411052544912, "num_tokens": 997153677.0, "step": 5935 }, { "entropy": 1.6588162382443745, "epoch": 0.6521106259097526, "grad_norm": 0.5839589238166809, "learning_rate": 1.617242046322607e-05, "loss": 1.443, "mean_token_accuracy": 0.6415523837010065, "num_tokens": 997350676.0, "step": 5936 }, { "entropy": 1.7090435028076172, "epoch": 0.6522204828211254, "grad_norm": 1.012498378753662, "learning_rate": 1.6171110089975203e-05, "loss": 1.3634, "mean_token_accuracy": 0.6589536915222803, "num_tokens": 997514467.0, "step": 5937 }, { "entropy": 1.6660225788752239, "epoch": 0.6523303397324984, "grad_norm": 0.7169985175132751, "learning_rate": 1.616979955306104e-05, "loss": 1.4925, "mean_token_accuracy": 0.6534644216299057, "num_tokens": 997686163.0, "step": 5938 }, { "entropy": 1.7207870185375214, "epoch": 0.6524401966438713, "grad_norm": 0.8183472752571106, "learning_rate": 1.6168488852525048e-05, "loss": 1.41, "mean_token_accuracy": 0.658767968416214, "num_tokens": 997864009.0, "step": 5939 }, { "entropy": 1.682630827029546, "epoch": 0.6525500535552443, "grad_norm": 0.6713327169418335, "learning_rate": 1.6167177988408713e-05, "loss": 1.429, "mean_token_accuracy": 0.6574962337811788, "num_tokens": 998095171.0, "step": 5940 }, { "entropy": 1.6624448994795482, "epoch": 0.6526599104666172, "grad_norm": 0.6571035385131836, "learning_rate": 1.6165866960753525e-05, "loss": 1.5789, "mean_token_accuracy": 0.6504167219003042, "num_tokens": 998273104.0, "step": 5941 }, { "entropy": 1.70240314801534, "epoch": 0.6527697673779902, "grad_norm": 0.8170379400253296, "learning_rate": 1.6164555769600974e-05, "loss": 1.4324, "mean_token_accuracy": 0.649359330534935, "num_tokens": 998471102.0, "step": 5942 }, { "entropy": 1.7425579031308491, "epoch": 0.6528796242893631, "grad_norm": 0.6321161985397339, "learning_rate": 1.616324441499256e-05, "loss": 1.3006, "mean_token_accuracy": 0.6682200183471044, "num_tokens": 998615680.0, "step": 5943 }, { "entropy": 1.7166448334852855, "epoch": 0.6529894812007361, "grad_norm": 0.6595907211303711, "learning_rate": 1.6161932896969784e-05, "loss": 1.3832, "mean_token_accuracy": 0.6572774350643158, "num_tokens": 998779804.0, "step": 5944 }, { "entropy": 1.6734323004881542, "epoch": 0.653099338112109, "grad_norm": 0.7008593678474426, "learning_rate": 1.616062121557416e-05, "loss": 1.3083, "mean_token_accuracy": 0.6746059507131577, "num_tokens": 998937935.0, "step": 5945 }, { "entropy": 1.7303222517172496, "epoch": 0.653209195023482, "grad_norm": 0.7347795367240906, "learning_rate": 1.6159309370847204e-05, "loss": 1.4708, "mean_token_accuracy": 0.6639884759982427, "num_tokens": 999103145.0, "step": 5946 }, { "entropy": 1.6455318927764893, "epoch": 0.6533190519348548, "grad_norm": 0.6177557706832886, "learning_rate": 1.6157997362830427e-05, "loss": 1.364, "mean_token_accuracy": 0.6617141962051392, "num_tokens": 999262435.0, "step": 5947 }, { "entropy": 1.690677026907603, "epoch": 0.6534289088462278, "grad_norm": 0.7298170924186707, "learning_rate": 1.6156685191565357e-05, "loss": 1.3552, "mean_token_accuracy": 0.6585644831260046, "num_tokens": 999440995.0, "step": 5948 }, { "entropy": 1.7055202027161915, "epoch": 0.6535387657576007, "grad_norm": 0.7108672261238098, "learning_rate": 1.6155372857093528e-05, "loss": 1.3826, "mean_token_accuracy": 0.6667246073484421, "num_tokens": 999613356.0, "step": 5949 }, { "entropy": 1.6879318157831829, "epoch": 0.6536486226689736, "grad_norm": 0.7076107859611511, "learning_rate": 1.615406035945647e-05, "loss": 1.54, "mean_token_accuracy": 0.6409845153490702, "num_tokens": 999816032.0, "step": 5950 }, { "entropy": 1.7342401643594105, "epoch": 0.6537584795803466, "grad_norm": 0.9219125509262085, "learning_rate": 1.615274769869572e-05, "loss": 1.4078, "mean_token_accuracy": 0.6552455176909765, "num_tokens": 999966674.0, "step": 5951 }, { "entropy": 1.7484307487805684, "epoch": 0.6538683364917195, "grad_norm": 0.64826899766922, "learning_rate": 1.615143487485283e-05, "loss": 1.4302, "mean_token_accuracy": 0.6445176502068838, "num_tokens": 1000183638.0, "step": 5952 }, { "entropy": 1.735739419857661, "epoch": 0.6539781934030925, "grad_norm": 0.823397159576416, "learning_rate": 1.615012188796935e-05, "loss": 1.283, "mean_token_accuracy": 0.6683905571699142, "num_tokens": 1000379339.0, "step": 5953 }, { "entropy": 1.7015184263388317, "epoch": 0.6540880503144654, "grad_norm": 0.6577404737472534, "learning_rate": 1.614880873808683e-05, "loss": 1.3707, "mean_token_accuracy": 0.6538449923197428, "num_tokens": 1000517761.0, "step": 5954 }, { "entropy": 1.6893195311228435, "epoch": 0.6541979072258384, "grad_norm": 0.7297143340110779, "learning_rate": 1.6147495425246834e-05, "loss": 1.3336, "mean_token_accuracy": 0.6523545185724894, "num_tokens": 1000677812.0, "step": 5955 }, { "entropy": 1.6598160068194072, "epoch": 0.6543077641372113, "grad_norm": 0.6622530221939087, "learning_rate": 1.6146181949490926e-05, "loss": 1.3761, "mean_token_accuracy": 0.6585600723822912, "num_tokens": 1000849086.0, "step": 5956 }, { "entropy": 1.700823446114858, "epoch": 0.6544176210485843, "grad_norm": 0.866723895072937, "learning_rate": 1.6144868310860683e-05, "loss": 1.3303, "mean_token_accuracy": 0.6645294477542242, "num_tokens": 1000997207.0, "step": 5957 }, { "entropy": 1.7490895291169484, "epoch": 0.6545274779599571, "grad_norm": 0.6628153324127197, "learning_rate": 1.6143554509397673e-05, "loss": 1.3853, "mean_token_accuracy": 0.6574032058318456, "num_tokens": 1001126891.0, "step": 5958 }, { "entropy": 1.695980042219162, "epoch": 0.6546373348713301, "grad_norm": 0.6430051326751709, "learning_rate": 1.6142240545143478e-05, "loss": 1.5197, "mean_token_accuracy": 0.6624879688024521, "num_tokens": 1001291749.0, "step": 5959 }, { "entropy": 1.7520456314086914, "epoch": 0.654747191782703, "grad_norm": 0.7919518351554871, "learning_rate": 1.614092641813969e-05, "loss": 1.4127, "mean_token_accuracy": 0.6570608119169871, "num_tokens": 1001420291.0, "step": 5960 }, { "entropy": 1.7864876786867778, "epoch": 0.654857048694076, "grad_norm": 0.7036291360855103, "learning_rate": 1.61396121284279e-05, "loss": 1.4129, "mean_token_accuracy": 0.6543021847804388, "num_tokens": 1001557544.0, "step": 5961 }, { "entropy": 1.7022630870342255, "epoch": 0.6549669056054489, "grad_norm": 0.6970117092132568, "learning_rate": 1.6138297676049697e-05, "loss": 1.3648, "mean_token_accuracy": 0.6602020363012949, "num_tokens": 1001702592.0, "step": 5962 }, { "entropy": 1.676472932100296, "epoch": 0.6550767625168218, "grad_norm": 0.7215110063552856, "learning_rate": 1.613698306104669e-05, "loss": 1.3701, "mean_token_accuracy": 0.6555512299140295, "num_tokens": 1001865740.0, "step": 5963 }, { "entropy": 1.686583936214447, "epoch": 0.6551866194281948, "grad_norm": 0.6436832547187805, "learning_rate": 1.6135668283460485e-05, "loss": 1.3576, "mean_token_accuracy": 0.6521365145842234, "num_tokens": 1002008262.0, "step": 5964 }, { "entropy": 1.7136501669883728, "epoch": 0.6552964763395677, "grad_norm": 0.8774862289428711, "learning_rate": 1.613435334333269e-05, "loss": 1.3359, "mean_token_accuracy": 0.654481420914332, "num_tokens": 1002180351.0, "step": 5965 }, { "entropy": 1.6534929970900218, "epoch": 0.6554063332509407, "grad_norm": 0.7280681133270264, "learning_rate": 1.6133038240704927e-05, "loss": 1.2625, "mean_token_accuracy": 0.6773078391949335, "num_tokens": 1002293818.0, "step": 5966 }, { "entropy": 1.649037887652715, "epoch": 0.6555161901623136, "grad_norm": 0.696632981300354, "learning_rate": 1.6131722975618817e-05, "loss": 1.2936, "mean_token_accuracy": 0.6795135736465454, "num_tokens": 1002424977.0, "step": 5967 }, { "entropy": 1.7681506077448528, "epoch": 0.6556260470736865, "grad_norm": 0.7785711288452148, "learning_rate": 1.6130407548115986e-05, "loss": 1.2863, "mean_token_accuracy": 0.6662353525559107, "num_tokens": 1002526808.0, "step": 5968 }, { "entropy": 1.6666353146235149, "epoch": 0.6557359039850594, "grad_norm": 0.7321269512176514, "learning_rate": 1.612909195823807e-05, "loss": 1.3184, "mean_token_accuracy": 0.6688967347145081, "num_tokens": 1002680478.0, "step": 5969 }, { "entropy": 1.6807195643583934, "epoch": 0.6558457608964324, "grad_norm": 1.010912299156189, "learning_rate": 1.6127776206026706e-05, "loss": 1.3433, "mean_token_accuracy": 0.6613064755996069, "num_tokens": 1002807257.0, "step": 5970 }, { "entropy": 1.7417626976966858, "epoch": 0.6559556178078053, "grad_norm": 0.7049437165260315, "learning_rate": 1.612646029152353e-05, "loss": 1.4176, "mean_token_accuracy": 0.6610104193290075, "num_tokens": 1002995627.0, "step": 5971 }, { "entropy": 1.7317078411579132, "epoch": 0.6560654747191783, "grad_norm": 0.7525424957275391, "learning_rate": 1.61251442147702e-05, "loss": 1.3605, "mean_token_accuracy": 0.6603756298621496, "num_tokens": 1003115921.0, "step": 5972 }, { "entropy": 1.7446248630682628, "epoch": 0.6561753316305512, "grad_norm": 0.7605143189430237, "learning_rate": 1.6123827975808366e-05, "loss": 1.4968, "mean_token_accuracy": 0.646631787220637, "num_tokens": 1003238280.0, "step": 5973 }, { "entropy": 1.6948024133841197, "epoch": 0.6562851885419242, "grad_norm": 0.635215699672699, "learning_rate": 1.612251157467968e-05, "loss": 1.4391, "mean_token_accuracy": 0.6588515788316727, "num_tokens": 1003439468.0, "step": 5974 }, { "entropy": 1.695201168457667, "epoch": 0.6563950454532971, "grad_norm": 0.7155786752700806, "learning_rate": 1.6121195011425818e-05, "loss": 1.2564, "mean_token_accuracy": 0.6733829925457636, "num_tokens": 1003559915.0, "step": 5975 }, { "entropy": 1.7206557989120483, "epoch": 0.65650490236467, "grad_norm": 0.6989328861236572, "learning_rate": 1.611987828608844e-05, "loss": 1.4135, "mean_token_accuracy": 0.6583918134371439, "num_tokens": 1003742377.0, "step": 5976 }, { "entropy": 1.6666361689567566, "epoch": 0.656614759276043, "grad_norm": 0.605663001537323, "learning_rate": 1.6118561398709218e-05, "loss": 1.498, "mean_token_accuracy": 0.6402202894290289, "num_tokens": 1003918193.0, "step": 5977 }, { "entropy": 1.659655769666036, "epoch": 0.6567246161874158, "grad_norm": 2.3435916900634766, "learning_rate": 1.6117244349329837e-05, "loss": 1.258, "mean_token_accuracy": 0.6593276808659235, "num_tokens": 1004142681.0, "step": 5978 }, { "entropy": 1.6928699215253193, "epoch": 0.6568344730987888, "grad_norm": 0.7714124917984009, "learning_rate": 1.6115927137991977e-05, "loss": 1.3056, "mean_token_accuracy": 0.6578250130017599, "num_tokens": 1004262570.0, "step": 5979 }, { "entropy": 1.6898990372816722, "epoch": 0.6569443300101617, "grad_norm": 0.607197642326355, "learning_rate": 1.6114609764737324e-05, "loss": 1.4254, "mean_token_accuracy": 0.6577767829100291, "num_tokens": 1004466029.0, "step": 5980 }, { "entropy": 1.7032975753148396, "epoch": 0.6570541869215347, "grad_norm": 0.7361001372337341, "learning_rate": 1.611329222960758e-05, "loss": 1.3741, "mean_token_accuracy": 0.6660144229729971, "num_tokens": 1004631550.0, "step": 5981 }, { "entropy": 1.775945911804835, "epoch": 0.6571640438329076, "grad_norm": 0.7697334885597229, "learning_rate": 1.6111974532644444e-05, "loss": 1.5169, "mean_token_accuracy": 0.6399200161298116, "num_tokens": 1004854848.0, "step": 5982 }, { "entropy": 1.674217273791631, "epoch": 0.6572739007442806, "grad_norm": 0.7080762386322021, "learning_rate": 1.6110656673889615e-05, "loss": 1.4399, "mean_token_accuracy": 0.6500117778778076, "num_tokens": 1005128683.0, "step": 5983 }, { "entropy": 1.7122255663077037, "epoch": 0.6573837576556535, "grad_norm": 0.6646968722343445, "learning_rate": 1.6109338653384806e-05, "loss": 1.4163, "mean_token_accuracy": 0.6497417688369751, "num_tokens": 1005307086.0, "step": 5984 }, { "entropy": 1.8177510897318523, "epoch": 0.6574936145670265, "grad_norm": 0.7551073431968689, "learning_rate": 1.6108020471171733e-05, "loss": 1.3673, "mean_token_accuracy": 0.6538062343994776, "num_tokens": 1005454014.0, "step": 5985 }, { "entropy": 1.69020011027654, "epoch": 0.6576034714783994, "grad_norm": 0.7540128827095032, "learning_rate": 1.610670212729211e-05, "loss": 1.3295, "mean_token_accuracy": 0.6638794293006262, "num_tokens": 1005580386.0, "step": 5986 }, { "entropy": 1.6580841739972432, "epoch": 0.6577133283897724, "grad_norm": 0.6165665984153748, "learning_rate": 1.610538362178767e-05, "loss": 1.3407, "mean_token_accuracy": 0.6651915510495504, "num_tokens": 1005745720.0, "step": 5987 }, { "entropy": 1.7150371372699738, "epoch": 0.6578231853011453, "grad_norm": 0.7540215849876404, "learning_rate": 1.6104064954700137e-05, "loss": 1.4733, "mean_token_accuracy": 0.6606999586025873, "num_tokens": 1005910475.0, "step": 5988 }, { "entropy": 1.704669823249181, "epoch": 0.6579330422125182, "grad_norm": 0.7729107141494751, "learning_rate": 1.6102746126071245e-05, "loss": 1.5419, "mean_token_accuracy": 0.6427391221125921, "num_tokens": 1006067481.0, "step": 5989 }, { "entropy": 1.676388919353485, "epoch": 0.6580428991238911, "grad_norm": 0.7432935833930969, "learning_rate": 1.610142713594274e-05, "loss": 1.263, "mean_token_accuracy": 0.6768547048171362, "num_tokens": 1006217348.0, "step": 5990 }, { "entropy": 1.726919690767924, "epoch": 0.658152756035264, "grad_norm": 0.73004549741745, "learning_rate": 1.6100107984356362e-05, "loss": 1.4621, "mean_token_accuracy": 0.6482563465833664, "num_tokens": 1006448796.0, "step": 5991 }, { "entropy": 1.6418760021527607, "epoch": 0.658262612946637, "grad_norm": 0.6615251898765564, "learning_rate": 1.6098788671353872e-05, "loss": 1.3493, "mean_token_accuracy": 0.6650320092837015, "num_tokens": 1006618329.0, "step": 5992 }, { "entropy": 1.619303782780965, "epoch": 0.6583724698580099, "grad_norm": 0.6965547800064087, "learning_rate": 1.6097469196977012e-05, "loss": 1.2817, "mean_token_accuracy": 0.6767748643954595, "num_tokens": 1006751014.0, "step": 5993 }, { "entropy": 1.6654784083366394, "epoch": 0.6584823267693829, "grad_norm": 0.7190507054328918, "learning_rate": 1.609614956126755e-05, "loss": 1.3126, "mean_token_accuracy": 0.6592614303032557, "num_tokens": 1006902725.0, "step": 5994 }, { "entropy": 1.689567784468333, "epoch": 0.6585921836807558, "grad_norm": 1.1087243556976318, "learning_rate": 1.6094829764267254e-05, "loss": 1.3326, "mean_token_accuracy": 0.6678081601858139, "num_tokens": 1007055528.0, "step": 5995 }, { "entropy": 1.7132113973299663, "epoch": 0.6587020405921288, "grad_norm": 0.6450228095054626, "learning_rate": 1.6093509806017883e-05, "loss": 1.3825, "mean_token_accuracy": 0.645787293712298, "num_tokens": 1007254824.0, "step": 5996 }, { "entropy": 1.6614897946516674, "epoch": 0.6588118975035017, "grad_norm": 0.6441484093666077, "learning_rate": 1.609218968656123e-05, "loss": 1.4582, "mean_token_accuracy": 0.6584440817435583, "num_tokens": 1007429624.0, "step": 5997 }, { "entropy": 1.6524133781592052, "epoch": 0.6589217544148747, "grad_norm": 0.6148684024810791, "learning_rate": 1.6090869405939067e-05, "loss": 1.3715, "mean_token_accuracy": 0.6544036467870077, "num_tokens": 1007626408.0, "step": 5998 }, { "entropy": 1.6548963785171509, "epoch": 0.6590316113262475, "grad_norm": 0.6921628713607788, "learning_rate": 1.608954896419318e-05, "loss": 1.3684, "mean_token_accuracy": 0.6596562564373016, "num_tokens": 1007779392.0, "step": 5999 }, { "entropy": 1.6788422763347626, "epoch": 0.6591414682376205, "grad_norm": 0.7359298467636108, "learning_rate": 1.608822836136536e-05, "loss": 1.3503, "mean_token_accuracy": 0.6542917539676031, "num_tokens": 1007942114.0, "step": 6000 }, { "entropy": 1.6935390035311382, "epoch": 0.6592513251489934, "grad_norm": 0.5704781413078308, "learning_rate": 1.6086907597497406e-05, "loss": 1.4471, "mean_token_accuracy": 0.648592452208201, "num_tokens": 1008134066.0, "step": 6001 }, { "entropy": 1.6664865513642628, "epoch": 0.6593611820603664, "grad_norm": 0.6393500566482544, "learning_rate": 1.608558667263112e-05, "loss": 1.4475, "mean_token_accuracy": 0.6586243808269501, "num_tokens": 1008313195.0, "step": 6002 }, { "entropy": 1.7087785402933757, "epoch": 0.6594710389717393, "grad_norm": 0.7804690599441528, "learning_rate": 1.6084265586808304e-05, "loss": 1.5634, "mean_token_accuracy": 0.6387995928525925, "num_tokens": 1008491339.0, "step": 6003 }, { "entropy": 1.6799262662728627, "epoch": 0.6595808958831122, "grad_norm": 1.2888646125793457, "learning_rate": 1.6082944340070777e-05, "loss": 1.2409, "mean_token_accuracy": 0.6644798517227173, "num_tokens": 1008699500.0, "step": 6004 }, { "entropy": 1.6629238029321034, "epoch": 0.6596907527944852, "grad_norm": 1.3199480772018433, "learning_rate": 1.6081622932460352e-05, "loss": 1.0079, "mean_token_accuracy": 0.6921218782663345, "num_tokens": 1008848456.0, "step": 6005 }, { "entropy": 1.6802177329858143, "epoch": 0.6598006097058581, "grad_norm": 0.7184219360351562, "learning_rate": 1.6080301364018852e-05, "loss": 1.3617, "mean_token_accuracy": 0.6671679069598516, "num_tokens": 1008991473.0, "step": 6006 }, { "entropy": 1.7172418534755707, "epoch": 0.6599104666172311, "grad_norm": 0.7799577116966248, "learning_rate": 1.6078979634788102e-05, "loss": 1.4283, "mean_token_accuracy": 0.645437479019165, "num_tokens": 1009116554.0, "step": 6007 }, { "entropy": 1.7622934381167095, "epoch": 0.660020323528604, "grad_norm": 0.7395654916763306, "learning_rate": 1.607765774480993e-05, "loss": 1.5234, "mean_token_accuracy": 0.6265956362088522, "num_tokens": 1009265660.0, "step": 6008 }, { "entropy": 1.7269906798998516, "epoch": 0.660130180439977, "grad_norm": 0.7985388040542603, "learning_rate": 1.6076335694126187e-05, "loss": 1.4323, "mean_token_accuracy": 0.6677784125010172, "num_tokens": 1009437681.0, "step": 6009 }, { "entropy": 1.7253870368003845, "epoch": 0.6602400373513498, "grad_norm": 0.6861431002616882, "learning_rate": 1.60750134827787e-05, "loss": 1.3246, "mean_token_accuracy": 0.666627456744512, "num_tokens": 1009607245.0, "step": 6010 }, { "entropy": 1.714435617129008, "epoch": 0.6603498942627228, "grad_norm": 0.8335552215576172, "learning_rate": 1.6073691110809325e-05, "loss": 1.4504, "mean_token_accuracy": 0.6543582628170649, "num_tokens": 1009790328.0, "step": 6011 }, { "entropy": 1.6384719610214233, "epoch": 0.6604597511740957, "grad_norm": 0.6027631759643555, "learning_rate": 1.6072368578259914e-05, "loss": 1.379, "mean_token_accuracy": 0.6624562293291092, "num_tokens": 1009931118.0, "step": 6012 }, { "entropy": 1.652736137310664, "epoch": 0.6605696080854687, "grad_norm": 0.6054850220680237, "learning_rate": 1.6071045885172322e-05, "loss": 1.2664, "mean_token_accuracy": 0.681120495001475, "num_tokens": 1010055574.0, "step": 6013 }, { "entropy": 1.6163685818513234, "epoch": 0.6606794649968416, "grad_norm": 0.5509641766548157, "learning_rate": 1.6069723031588412e-05, "loss": 1.2797, "mean_token_accuracy": 0.6819182386000952, "num_tokens": 1010220664.0, "step": 6014 }, { "entropy": 1.6701125005880992, "epoch": 0.6607893219082146, "grad_norm": 0.7778674364089966, "learning_rate": 1.6068400017550055e-05, "loss": 1.4248, "mean_token_accuracy": 0.6782306134700775, "num_tokens": 1010398174.0, "step": 6015 }, { "entropy": 1.7769503196080525, "epoch": 0.6608991788195875, "grad_norm": 0.7546883225440979, "learning_rate": 1.6067076843099125e-05, "loss": 1.4241, "mean_token_accuracy": 0.642538994550705, "num_tokens": 1010594125.0, "step": 6016 }, { "entropy": 1.6197692056496937, "epoch": 0.6610090357309604, "grad_norm": 0.7641433477401733, "learning_rate": 1.6065753508277488e-05, "loss": 1.2113, "mean_token_accuracy": 0.6883720109860102, "num_tokens": 1010715132.0, "step": 6017 }, { "entropy": 1.6694122155507405, "epoch": 0.6611188926423334, "grad_norm": 0.6795452237129211, "learning_rate": 1.6064430013127036e-05, "loss": 1.476, "mean_token_accuracy": 0.6509568393230438, "num_tokens": 1010891272.0, "step": 6018 }, { "entropy": 1.6499930421511333, "epoch": 0.6612287495537063, "grad_norm": 0.6633215546607971, "learning_rate": 1.6063106357689662e-05, "loss": 1.5788, "mean_token_accuracy": 0.6357754915952682, "num_tokens": 1011164958.0, "step": 6019 }, { "entropy": 1.7087593972682953, "epoch": 0.6613386064650792, "grad_norm": 0.7861476540565491, "learning_rate": 1.606178254200725e-05, "loss": 1.5698, "mean_token_accuracy": 0.6314490288496017, "num_tokens": 1011325949.0, "step": 6020 }, { "entropy": 1.6702162524064381, "epoch": 0.6614484633764521, "grad_norm": 0.6822460293769836, "learning_rate": 1.60604585661217e-05, "loss": 1.3802, "mean_token_accuracy": 0.6540361742178599, "num_tokens": 1011516122.0, "step": 6021 }, { "entropy": 1.7486283381779988, "epoch": 0.6615583202878251, "grad_norm": 0.7449422478675842, "learning_rate": 1.6059134430074917e-05, "loss": 1.3651, "mean_token_accuracy": 0.6583732018868128, "num_tokens": 1011658572.0, "step": 6022 }, { "entropy": 1.7109368344148, "epoch": 0.661668177199198, "grad_norm": 0.6396523118019104, "learning_rate": 1.6057810133908812e-05, "loss": 1.5154, "mean_token_accuracy": 0.6504169950882593, "num_tokens": 1011820872.0, "step": 6023 }, { "entropy": 1.6798317929108937, "epoch": 0.661778034110571, "grad_norm": 0.7457844614982605, "learning_rate": 1.605648567766529e-05, "loss": 1.3378, "mean_token_accuracy": 0.6725800782442093, "num_tokens": 1011939184.0, "step": 6024 }, { "entropy": 1.7137371897697449, "epoch": 0.6618878910219439, "grad_norm": 0.7281384468078613, "learning_rate": 1.6055161061386282e-05, "loss": 1.3466, "mean_token_accuracy": 0.6607188185056051, "num_tokens": 1012063344.0, "step": 6025 }, { "entropy": 1.7295754949251811, "epoch": 0.6619977479333169, "grad_norm": 0.9593626856803894, "learning_rate": 1.6053836285113703e-05, "loss": 1.5888, "mean_token_accuracy": 0.639353816707929, "num_tokens": 1012202910.0, "step": 6026 }, { "entropy": 1.74645792444547, "epoch": 0.6621076048446898, "grad_norm": 0.6115472912788391, "learning_rate": 1.6052511348889475e-05, "loss": 1.272, "mean_token_accuracy": 0.6679097364346186, "num_tokens": 1012353481.0, "step": 6027 }, { "entropy": 1.6415168742338817, "epoch": 0.6622174617560628, "grad_norm": 0.6827746629714966, "learning_rate": 1.6051186252755548e-05, "loss": 1.3812, "mean_token_accuracy": 0.663339634736379, "num_tokens": 1012531799.0, "step": 6028 }, { "entropy": 1.7094530860582988, "epoch": 0.6623273186674357, "grad_norm": 0.6740764379501343, "learning_rate": 1.604986099675385e-05, "loss": 1.2458, "mean_token_accuracy": 0.6794106811285019, "num_tokens": 1012644516.0, "step": 6029 }, { "entropy": 1.6730037033557892, "epoch": 0.6624371755788087, "grad_norm": 0.6922171115875244, "learning_rate": 1.604853558092632e-05, "loss": 1.4073, "mean_token_accuracy": 0.6618214547634125, "num_tokens": 1012839703.0, "step": 6030 }, { "entropy": 1.6856864591439564, "epoch": 0.6625470324901815, "grad_norm": 0.6982408761978149, "learning_rate": 1.6047210005314927e-05, "loss": 1.3656, "mean_token_accuracy": 0.6610573281844457, "num_tokens": 1012997627.0, "step": 6031 }, { "entropy": 1.7156126201152802, "epoch": 0.6626568894015544, "grad_norm": 0.8553928136825562, "learning_rate": 1.6045884269961602e-05, "loss": 1.4574, "mean_token_accuracy": 0.6595203479131063, "num_tokens": 1013141489.0, "step": 6032 }, { "entropy": 1.7379735112190247, "epoch": 0.6627667463129274, "grad_norm": 0.6452311873435974, "learning_rate": 1.6044558374908313e-05, "loss": 1.3783, "mean_token_accuracy": 0.6580146799484888, "num_tokens": 1013350551.0, "step": 6033 }, { "entropy": 1.6629939476648967, "epoch": 0.6628766032243003, "grad_norm": 0.7060854434967041, "learning_rate": 1.604323232019703e-05, "loss": 1.3811, "mean_token_accuracy": 0.6662647575139999, "num_tokens": 1013522519.0, "step": 6034 }, { "entropy": 1.6722846726576488, "epoch": 0.6629864601356733, "grad_norm": 0.581295371055603, "learning_rate": 1.6041906105869716e-05, "loss": 1.3575, "mean_token_accuracy": 0.6602567632993063, "num_tokens": 1013698530.0, "step": 6035 }, { "entropy": 1.8078128496805828, "epoch": 0.6630963170470462, "grad_norm": 0.7791680693626404, "learning_rate": 1.6040579731968342e-05, "loss": 1.3535, "mean_token_accuracy": 0.6552262306213379, "num_tokens": 1013835125.0, "step": 6036 }, { "entropy": 1.6595743894577026, "epoch": 0.6632061739584192, "grad_norm": 0.758542001247406, "learning_rate": 1.6039253198534893e-05, "loss": 1.4194, "mean_token_accuracy": 0.6629291425148646, "num_tokens": 1013973848.0, "step": 6037 }, { "entropy": 1.6578982969125111, "epoch": 0.6633160308697921, "grad_norm": 0.7612101435661316, "learning_rate": 1.6037926505611353e-05, "loss": 1.3237, "mean_token_accuracy": 0.6753773540258408, "num_tokens": 1014105985.0, "step": 6038 }, { "entropy": 1.7778889040152233, "epoch": 0.6634258877811651, "grad_norm": 0.6914167404174805, "learning_rate": 1.6036599653239705e-05, "loss": 1.4301, "mean_token_accuracy": 0.6623369753360748, "num_tokens": 1014286159.0, "step": 6039 }, { "entropy": 1.730593462785085, "epoch": 0.663535744692538, "grad_norm": 0.6559991240501404, "learning_rate": 1.6035272641461953e-05, "loss": 1.5694, "mean_token_accuracy": 0.6481152127186457, "num_tokens": 1014489852.0, "step": 6040 }, { "entropy": 1.7553011178970337, "epoch": 0.663645601603911, "grad_norm": 0.6367934942245483, "learning_rate": 1.6033945470320088e-05, "loss": 1.3236, "mean_token_accuracy": 0.6699994951486588, "num_tokens": 1014642102.0, "step": 6041 }, { "entropy": 1.692327857017517, "epoch": 0.6637554585152838, "grad_norm": 0.6805416941642761, "learning_rate": 1.6032618139856116e-05, "loss": 1.3413, "mean_token_accuracy": 0.6565508594115576, "num_tokens": 1014788802.0, "step": 6042 }, { "entropy": 1.7228071590264638, "epoch": 0.6638653154266568, "grad_norm": 0.5855072140693665, "learning_rate": 1.6031290650112047e-05, "loss": 1.2014, "mean_token_accuracy": 0.6715217183033625, "num_tokens": 1014978143.0, "step": 6043 }, { "entropy": 1.7340465486049652, "epoch": 0.6639751723380297, "grad_norm": 0.7417824864387512, "learning_rate": 1.6029963001129897e-05, "loss": 1.4479, "mean_token_accuracy": 0.6491431444883347, "num_tokens": 1015185187.0, "step": 6044 }, { "entropy": 1.722789963086446, "epoch": 0.6640850292494026, "grad_norm": 0.6903808116912842, "learning_rate": 1.6028635192951686e-05, "loss": 1.4496, "mean_token_accuracy": 0.6459407409032186, "num_tokens": 1015347694.0, "step": 6045 }, { "entropy": 1.6786029835542042, "epoch": 0.6641948861607756, "grad_norm": 0.7279839515686035, "learning_rate": 1.6027307225619434e-05, "loss": 1.2846, "mean_token_accuracy": 0.6784352113803228, "num_tokens": 1015469969.0, "step": 6046 }, { "entropy": 1.719456136226654, "epoch": 0.6643047430721485, "grad_norm": 0.7053220868110657, "learning_rate": 1.6025979099175176e-05, "loss": 1.3669, "mean_token_accuracy": 0.6673380633195242, "num_tokens": 1015623741.0, "step": 6047 }, { "entropy": 1.7085906167825062, "epoch": 0.6644145999835215, "grad_norm": 0.6075806021690369, "learning_rate": 1.6024650813660946e-05, "loss": 1.4022, "mean_token_accuracy": 0.6560228218634924, "num_tokens": 1015805903.0, "step": 6048 }, { "entropy": 1.718571404616038, "epoch": 0.6645244568948944, "grad_norm": 0.7968881130218506, "learning_rate": 1.6023322369118777e-05, "loss": 1.5538, "mean_token_accuracy": 0.6444092392921448, "num_tokens": 1015977088.0, "step": 6049 }, { "entropy": 1.6368082066377003, "epoch": 0.6646343138062674, "grad_norm": 0.6820107102394104, "learning_rate": 1.6021993765590724e-05, "loss": 1.3924, "mean_token_accuracy": 0.6599597285191218, "num_tokens": 1016132000.0, "step": 6050 }, { "entropy": 1.7395052810509999, "epoch": 0.6647441707176402, "grad_norm": 1.213407039642334, "learning_rate": 1.6020665003118828e-05, "loss": 1.4726, "mean_token_accuracy": 0.625076542297999, "num_tokens": 1016336850.0, "step": 6051 }, { "entropy": 1.6540366212526958, "epoch": 0.6648540276290132, "grad_norm": 0.6258625388145447, "learning_rate": 1.6019336081745143e-05, "loss": 1.4585, "mean_token_accuracy": 0.6550316015879313, "num_tokens": 1016548471.0, "step": 6052 }, { "entropy": 1.7461306750774384, "epoch": 0.6649638845403861, "grad_norm": 0.7281144857406616, "learning_rate": 1.601800700151174e-05, "loss": 1.4278, "mean_token_accuracy": 0.6685409446557363, "num_tokens": 1016688609.0, "step": 6053 }, { "entropy": 1.6394382019837697, "epoch": 0.6650737414517591, "grad_norm": 0.7716277241706848, "learning_rate": 1.6016677762460677e-05, "loss": 1.2538, "mean_token_accuracy": 0.6766884575287501, "num_tokens": 1016848324.0, "step": 6054 }, { "entropy": 1.6753660937150319, "epoch": 0.665183598363132, "grad_norm": 0.721605122089386, "learning_rate": 1.601534836463402e-05, "loss": 1.4277, "mean_token_accuracy": 0.65249036749204, "num_tokens": 1017024377.0, "step": 6055 }, { "entropy": 1.709774265686671, "epoch": 0.665293455274505, "grad_norm": 0.7922242283821106, "learning_rate": 1.601401880807385e-05, "loss": 1.523, "mean_token_accuracy": 0.667452315489451, "num_tokens": 1017182257.0, "step": 6056 }, { "entropy": 1.7641392350196838, "epoch": 0.6654033121858779, "grad_norm": 0.8408894538879395, "learning_rate": 1.601268909282224e-05, "loss": 1.3981, "mean_token_accuracy": 0.6577701171239217, "num_tokens": 1017329247.0, "step": 6057 }, { "entropy": 1.6443076431751251, "epoch": 0.6655131690972508, "grad_norm": 0.7164208292961121, "learning_rate": 1.601135921892128e-05, "loss": 1.4252, "mean_token_accuracy": 0.659592812259992, "num_tokens": 1017531523.0, "step": 6058 }, { "entropy": 1.7700778742631276, "epoch": 0.6656230260086238, "grad_norm": 0.8573195338249207, "learning_rate": 1.601002918641306e-05, "loss": 1.451, "mean_token_accuracy": 0.6490327517191569, "num_tokens": 1017706081.0, "step": 6059 }, { "entropy": 1.6549913088480632, "epoch": 0.6657328829199967, "grad_norm": 0.6733830571174622, "learning_rate": 1.6008698995339674e-05, "loss": 1.4763, "mean_token_accuracy": 0.6520965496699015, "num_tokens": 1017895248.0, "step": 6060 }, { "entropy": 1.689163068930308, "epoch": 0.6658427398313697, "grad_norm": 0.7045179605484009, "learning_rate": 1.6007368645743222e-05, "loss": 1.3279, "mean_token_accuracy": 0.6655579805374146, "num_tokens": 1018012601.0, "step": 6061 }, { "entropy": 1.664953072865804, "epoch": 0.6659525967427425, "grad_norm": 0.5830453634262085, "learning_rate": 1.6006038137665808e-05, "loss": 1.4024, "mean_token_accuracy": 0.6456159402926763, "num_tokens": 1018229159.0, "step": 6062 }, { "entropy": 1.687490314245224, "epoch": 0.6660624536541155, "grad_norm": 0.6919242143630981, "learning_rate": 1.600470747114954e-05, "loss": 1.4013, "mean_token_accuracy": 0.6645840257406235, "num_tokens": 1018361148.0, "step": 6063 }, { "entropy": 1.7308449447154999, "epoch": 0.6661723105654884, "grad_norm": 0.7102833986282349, "learning_rate": 1.600337664623654e-05, "loss": 1.3756, "mean_token_accuracy": 0.6476593216260275, "num_tokens": 1018503933.0, "step": 6064 }, { "entropy": 1.7429296175638835, "epoch": 0.6662821674768614, "grad_norm": 0.6323913931846619, "learning_rate": 1.6002045662968924e-05, "loss": 1.5065, "mean_token_accuracy": 0.6419780949751536, "num_tokens": 1018704772.0, "step": 6065 }, { "entropy": 1.7037050426006317, "epoch": 0.6663920243882343, "grad_norm": 0.6635475158691406, "learning_rate": 1.6000714521388812e-05, "loss": 1.5844, "mean_token_accuracy": 0.6312363793452581, "num_tokens": 1018884802.0, "step": 6066 }, { "entropy": 1.672195961078008, "epoch": 0.6665018812996073, "grad_norm": 0.647525429725647, "learning_rate": 1.599938322153834e-05, "loss": 1.4732, "mean_token_accuracy": 0.6414629220962524, "num_tokens": 1019090452.0, "step": 6067 }, { "entropy": 1.6700976292292278, "epoch": 0.6666117382109802, "grad_norm": 0.6400693655014038, "learning_rate": 1.5998051763459646e-05, "loss": 1.3905, "mean_token_accuracy": 0.6553190549214681, "num_tokens": 1019247192.0, "step": 6068 }, { "entropy": 1.6819157501061757, "epoch": 0.6667215951223532, "grad_norm": 0.7235324382781982, "learning_rate": 1.5996720147194865e-05, "loss": 1.29, "mean_token_accuracy": 0.6759899059931437, "num_tokens": 1019407725.0, "step": 6069 }, { "entropy": 1.6707827945550282, "epoch": 0.6668314520337261, "grad_norm": 0.6338719129562378, "learning_rate": 1.599538837278614e-05, "loss": 1.455, "mean_token_accuracy": 0.6488498498996099, "num_tokens": 1019603082.0, "step": 6070 }, { "entropy": 1.7216089765230815, "epoch": 0.666941308945099, "grad_norm": 0.7594712376594543, "learning_rate": 1.5994056440275626e-05, "loss": 1.4081, "mean_token_accuracy": 0.6522118002176285, "num_tokens": 1019850616.0, "step": 6071 }, { "entropy": 1.701182136933009, "epoch": 0.667051165856472, "grad_norm": 0.6392696499824524, "learning_rate": 1.5992724349705476e-05, "loss": 1.4646, "mean_token_accuracy": 0.648558313647906, "num_tokens": 1020043487.0, "step": 6072 }, { "entropy": 1.6712729334831238, "epoch": 0.6671610227678448, "grad_norm": 0.7950101494789124, "learning_rate": 1.5991392101117847e-05, "loss": 1.1902, "mean_token_accuracy": 0.6798241138458252, "num_tokens": 1020178559.0, "step": 6073 }, { "entropy": 1.6639493604501088, "epoch": 0.6672708796792178, "grad_norm": 0.7325677871704102, "learning_rate": 1.599005969455491e-05, "loss": 1.5157, "mean_token_accuracy": 0.6516855508089066, "num_tokens": 1020341544.0, "step": 6074 }, { "entropy": 1.7209535439809163, "epoch": 0.6673807365905907, "grad_norm": 0.7139765620231628, "learning_rate": 1.598872713005883e-05, "loss": 1.3183, "mean_token_accuracy": 0.6776246974865595, "num_tokens": 1020498782.0, "step": 6075 }, { "entropy": 1.7369226813316345, "epoch": 0.6674905935019637, "grad_norm": 0.7224074602127075, "learning_rate": 1.598739440767179e-05, "loss": 1.4519, "mean_token_accuracy": 0.6571163336435953, "num_tokens": 1020617813.0, "step": 6076 }, { "entropy": 1.7044867078463237, "epoch": 0.6676004504133366, "grad_norm": 0.8380149006843567, "learning_rate": 1.598606152743596e-05, "loss": 1.4734, "mean_token_accuracy": 0.6627581169207891, "num_tokens": 1020769525.0, "step": 6077 }, { "entropy": 1.7417923708756764, "epoch": 0.6677103073247096, "grad_norm": 0.5832151770591736, "learning_rate": 1.598472848939353e-05, "loss": 1.377, "mean_token_accuracy": 0.6470159838596979, "num_tokens": 1020952842.0, "step": 6078 }, { "entropy": 1.7149604658285778, "epoch": 0.6678201642360825, "grad_norm": 0.7015511393547058, "learning_rate": 1.598339529358669e-05, "loss": 1.3452, "mean_token_accuracy": 0.6668038119872411, "num_tokens": 1021135830.0, "step": 6079 }, { "entropy": 1.7343719899654388, "epoch": 0.6679300211474555, "grad_norm": 0.5889387726783752, "learning_rate": 1.5982061940057633e-05, "loss": 1.4322, "mean_token_accuracy": 0.6490494459867477, "num_tokens": 1021313312.0, "step": 6080 }, { "entropy": 1.7713063756624858, "epoch": 0.6680398780588284, "grad_norm": 0.6595631241798401, "learning_rate": 1.598072842884856e-05, "loss": 1.419, "mean_token_accuracy": 0.6418772985537847, "num_tokens": 1021484880.0, "step": 6081 }, { "entropy": 1.695546378691991, "epoch": 0.6681497349702014, "grad_norm": 0.7553979754447937, "learning_rate": 1.597939476000168e-05, "loss": 1.2275, "mean_token_accuracy": 0.6762440800666809, "num_tokens": 1021614110.0, "step": 6082 }, { "entropy": 1.7010157803694408, "epoch": 0.6682595918815742, "grad_norm": 0.6879470348358154, "learning_rate": 1.59780609335592e-05, "loss": 1.5772, "mean_token_accuracy": 0.6345613052447637, "num_tokens": 1021797865.0, "step": 6083 }, { "entropy": 1.6960657437642415, "epoch": 0.6683694487929472, "grad_norm": 0.6763353943824768, "learning_rate": 1.597672694956333e-05, "loss": 1.3334, "mean_token_accuracy": 0.6662516544262568, "num_tokens": 1021966095.0, "step": 6084 }, { "entropy": 1.7035949130853016, "epoch": 0.6684793057043201, "grad_norm": 0.6424010396003723, "learning_rate": 1.5975392808056297e-05, "loss": 1.412, "mean_token_accuracy": 0.653632586201032, "num_tokens": 1022124115.0, "step": 6085 }, { "entropy": 1.7454820175965626, "epoch": 0.668589162615693, "grad_norm": 0.7985848188400269, "learning_rate": 1.5974058509080322e-05, "loss": 1.5415, "mean_token_accuracy": 0.6329482396443685, "num_tokens": 1022284660.0, "step": 6086 }, { "entropy": 1.7059528827667236, "epoch": 0.668699019527066, "grad_norm": 0.6430754065513611, "learning_rate": 1.5972724052677636e-05, "loss": 1.3376, "mean_token_accuracy": 0.6498004595438639, "num_tokens": 1022462814.0, "step": 6087 }, { "entropy": 1.6707301139831543, "epoch": 0.6688088764384389, "grad_norm": 0.7058833837509155, "learning_rate": 1.597138943889048e-05, "loss": 1.3133, "mean_token_accuracy": 0.6808335582415262, "num_tokens": 1022597570.0, "step": 6088 }, { "entropy": 1.7404019236564636, "epoch": 0.6689187333498119, "grad_norm": 0.8028521537780762, "learning_rate": 1.5970054667761086e-05, "loss": 1.5094, "mean_token_accuracy": 0.65455295642217, "num_tokens": 1022767945.0, "step": 6089 }, { "entropy": 1.6390142341454823, "epoch": 0.6690285902611848, "grad_norm": 0.7467424273490906, "learning_rate": 1.59687197393317e-05, "loss": 1.2249, "mean_token_accuracy": 0.684510534008344, "num_tokens": 1022885224.0, "step": 6090 }, { "entropy": 1.698186457157135, "epoch": 0.6691384471725578, "grad_norm": 0.6739535927772522, "learning_rate": 1.5967384653644573e-05, "loss": 1.5205, "mean_token_accuracy": 0.6390059242645899, "num_tokens": 1023125924.0, "step": 6091 }, { "entropy": 1.7658388912677765, "epoch": 0.6692483040839307, "grad_norm": 0.6793767213821411, "learning_rate": 1.596604941074196e-05, "loss": 1.3172, "mean_token_accuracy": 0.6668230046828588, "num_tokens": 1023261228.0, "step": 6092 }, { "entropy": 1.6249745587507884, "epoch": 0.6693581609953037, "grad_norm": 0.6603535413742065, "learning_rate": 1.596471401066612e-05, "loss": 1.383, "mean_token_accuracy": 0.6608263403177261, "num_tokens": 1023451278.0, "step": 6093 }, { "entropy": 1.6451916893323262, "epoch": 0.6694680179066765, "grad_norm": 0.6392114758491516, "learning_rate": 1.5963378453459322e-05, "loss": 1.4267, "mean_token_accuracy": 0.6546541998783747, "num_tokens": 1023641335.0, "step": 6094 }, { "entropy": 1.663988600174586, "epoch": 0.6695778748180495, "grad_norm": 0.5604124069213867, "learning_rate": 1.596204273916383e-05, "loss": 1.4705, "mean_token_accuracy": 0.6506281395753225, "num_tokens": 1023914875.0, "step": 6095 }, { "entropy": 1.7463338673114777, "epoch": 0.6696877317294224, "grad_norm": 0.693678617477417, "learning_rate": 1.5960706867821922e-05, "loss": 1.4098, "mean_token_accuracy": 0.6399700790643692, "num_tokens": 1024096467.0, "step": 6096 }, { "entropy": 1.6922602653503418, "epoch": 0.6697975886407954, "grad_norm": 0.6785783767700195, "learning_rate": 1.5959370839475878e-05, "loss": 1.3892, "mean_token_accuracy": 0.6647334198156992, "num_tokens": 1024245472.0, "step": 6097 }, { "entropy": 1.7134245534737904, "epoch": 0.6699074455521683, "grad_norm": 0.7681015133857727, "learning_rate": 1.595803465416798e-05, "loss": 1.2404, "mean_token_accuracy": 0.6760171254475912, "num_tokens": 1024351814.0, "step": 6098 }, { "entropy": 1.7330508331457775, "epoch": 0.6700173024635412, "grad_norm": 0.7412785291671753, "learning_rate": 1.595669831194052e-05, "loss": 1.4319, "mean_token_accuracy": 0.662334273258845, "num_tokens": 1024520510.0, "step": 6099 }, { "entropy": 1.7534303267796834, "epoch": 0.6701271593749142, "grad_norm": 0.6362935900688171, "learning_rate": 1.595536181283579e-05, "loss": 1.3547, "mean_token_accuracy": 0.6595286975304285, "num_tokens": 1024702512.0, "step": 6100 }, { "entropy": 1.7748811344305675, "epoch": 0.6702370162862871, "grad_norm": 0.771950364112854, "learning_rate": 1.5954025156896094e-05, "loss": 1.4267, "mean_token_accuracy": 0.6493061731259028, "num_tokens": 1024826301.0, "step": 6101 }, { "entropy": 1.6831330458323162, "epoch": 0.6703468731976601, "grad_norm": 0.5883938670158386, "learning_rate": 1.5952688344163738e-05, "loss": 1.3333, "mean_token_accuracy": 0.6650530050198237, "num_tokens": 1025013672.0, "step": 6102 }, { "entropy": 1.6784860491752625, "epoch": 0.670456730109033, "grad_norm": 0.6166484355926514, "learning_rate": 1.595135137468102e-05, "loss": 1.3632, "mean_token_accuracy": 0.6598242670297623, "num_tokens": 1025172109.0, "step": 6103 }, { "entropy": 1.7556925614674885, "epoch": 0.670566587020406, "grad_norm": 0.7905380725860596, "learning_rate": 1.5950014248490268e-05, "loss": 1.347, "mean_token_accuracy": 0.673460324605306, "num_tokens": 1025321964.0, "step": 6104 }, { "entropy": 1.742532879114151, "epoch": 0.6706764439317788, "grad_norm": 0.7387831807136536, "learning_rate": 1.5948676965633792e-05, "loss": 1.303, "mean_token_accuracy": 0.6769290367762247, "num_tokens": 1025430147.0, "step": 6105 }, { "entropy": 1.7955981294314067, "epoch": 0.6707863008431518, "grad_norm": 0.8614792227745056, "learning_rate": 1.594733952615392e-05, "loss": 1.4031, "mean_token_accuracy": 0.6521740754445394, "num_tokens": 1025548085.0, "step": 6106 }, { "entropy": 1.7102086047331493, "epoch": 0.6708961577545247, "grad_norm": 0.6578072309494019, "learning_rate": 1.5946001930092983e-05, "loss": 1.4516, "mean_token_accuracy": 0.6479578018188477, "num_tokens": 1025705026.0, "step": 6107 }, { "entropy": 1.635363906621933, "epoch": 0.6710060146658977, "grad_norm": 0.6915444731712341, "learning_rate": 1.5944664177493313e-05, "loss": 1.4012, "mean_token_accuracy": 0.6578799436489741, "num_tokens": 1025840042.0, "step": 6108 }, { "entropy": 1.7048865755399067, "epoch": 0.6711158715772706, "grad_norm": 0.666670024394989, "learning_rate": 1.594332626839725e-05, "loss": 1.5948, "mean_token_accuracy": 0.6330678189794222, "num_tokens": 1026035070.0, "step": 6109 }, { "entropy": 1.711029291152954, "epoch": 0.6712257284886436, "grad_norm": 0.7791758179664612, "learning_rate": 1.594198820284714e-05, "loss": 1.4302, "mean_token_accuracy": 0.6709011346101761, "num_tokens": 1026175979.0, "step": 6110 }, { "entropy": 1.6606249113877614, "epoch": 0.6713355854000165, "grad_norm": 0.7085611820220947, "learning_rate": 1.5940649980885324e-05, "loss": 1.5086, "mean_token_accuracy": 0.645029549797376, "num_tokens": 1026404722.0, "step": 6111 }, { "entropy": 1.7165546814600627, "epoch": 0.6714454423113894, "grad_norm": 0.7058658599853516, "learning_rate": 1.5939311602554168e-05, "loss": 1.3256, "mean_token_accuracy": 0.6548676739136378, "num_tokens": 1026548934.0, "step": 6112 }, { "entropy": 1.7104195555051167, "epoch": 0.6715552992227624, "grad_norm": 0.6687393188476562, "learning_rate": 1.5937973067896025e-05, "loss": 1.4702, "mean_token_accuracy": 0.6464681526025137, "num_tokens": 1026779833.0, "step": 6113 }, { "entropy": 1.6975898842016857, "epoch": 0.6716651561341352, "grad_norm": 0.575406014919281, "learning_rate": 1.593663437695326e-05, "loss": 1.3619, "mean_token_accuracy": 0.6549033125241598, "num_tokens": 1026985027.0, "step": 6114 }, { "entropy": 1.7248026132583618, "epoch": 0.6717750130455082, "grad_norm": 0.7494511008262634, "learning_rate": 1.593529552976824e-05, "loss": 1.3871, "mean_token_accuracy": 0.6491140872240067, "num_tokens": 1027159348.0, "step": 6115 }, { "entropy": 1.629348337650299, "epoch": 0.6718848699568811, "grad_norm": 0.650229275226593, "learning_rate": 1.593395652638334e-05, "loss": 1.4643, "mean_token_accuracy": 0.6616547207037607, "num_tokens": 1027322685.0, "step": 6116 }, { "entropy": 1.6977149446805317, "epoch": 0.6719947268682541, "grad_norm": 0.7905436754226685, "learning_rate": 1.593261736684094e-05, "loss": 1.4179, "mean_token_accuracy": 0.6655702938636144, "num_tokens": 1027467183.0, "step": 6117 }, { "entropy": 1.6418705681959789, "epoch": 0.672104583779627, "grad_norm": 0.6807728409767151, "learning_rate": 1.593127805118342e-05, "loss": 1.4285, "mean_token_accuracy": 0.6624650160471598, "num_tokens": 1027669991.0, "step": 6118 }, { "entropy": 1.693480223417282, "epoch": 0.672214440691, "grad_norm": 0.6987659335136414, "learning_rate": 1.5929938579453178e-05, "loss": 1.4377, "mean_token_accuracy": 0.6526973893245062, "num_tokens": 1027818472.0, "step": 6119 }, { "entropy": 1.7187944451967876, "epoch": 0.6723242976023729, "grad_norm": 0.6811079382896423, "learning_rate": 1.5928598951692596e-05, "loss": 1.277, "mean_token_accuracy": 0.6688454498847326, "num_tokens": 1027933443.0, "step": 6120 }, { "entropy": 1.6929615139961243, "epoch": 0.6724341545137459, "grad_norm": 0.6487001776695251, "learning_rate": 1.592725916794408e-05, "loss": 1.4681, "mean_token_accuracy": 0.6509808599948883, "num_tokens": 1028117633.0, "step": 6121 }, { "entropy": 1.700806051492691, "epoch": 0.6725440114251188, "grad_norm": 0.6837039589881897, "learning_rate": 1.5925919228250034e-05, "loss": 1.3143, "mean_token_accuracy": 0.6903966218233109, "num_tokens": 1028242732.0, "step": 6122 }, { "entropy": 1.6499686141808827, "epoch": 0.6726538683364918, "grad_norm": 0.6767922043800354, "learning_rate": 1.592457913265286e-05, "loss": 1.3945, "mean_token_accuracy": 0.6531237810850143, "num_tokens": 1028426037.0, "step": 6123 }, { "entropy": 1.6716104646523793, "epoch": 0.6727637252478647, "grad_norm": 0.6955103874206543, "learning_rate": 1.5923238881194976e-05, "loss": 1.4092, "mean_token_accuracy": 0.649432510137558, "num_tokens": 1028628426.0, "step": 6124 }, { "entropy": 1.6994199852148693, "epoch": 0.6728735821592375, "grad_norm": 2.9740710258483887, "learning_rate": 1.5921898473918802e-05, "loss": 1.017, "mean_token_accuracy": 0.6964697390794754, "num_tokens": 1028793307.0, "step": 6125 }, { "entropy": 1.6781040628751118, "epoch": 0.6729834390706105, "grad_norm": 0.6323825120925903, "learning_rate": 1.592055791086676e-05, "loss": 1.406, "mean_token_accuracy": 0.6498600840568542, "num_tokens": 1029003857.0, "step": 6126 }, { "entropy": 1.691665271917979, "epoch": 0.6730932959819834, "grad_norm": 0.6065126657485962, "learning_rate": 1.5919217192081273e-05, "loss": 1.4436, "mean_token_accuracy": 0.6468114952246348, "num_tokens": 1029184665.0, "step": 6127 }, { "entropy": 1.7233734627564747, "epoch": 0.6732031528933564, "grad_norm": 0.8082062005996704, "learning_rate": 1.5917876317604785e-05, "loss": 1.4065, "mean_token_accuracy": 0.6604608694712321, "num_tokens": 1029348548.0, "step": 6128 }, { "entropy": 1.6546673774719238, "epoch": 0.6733130098047293, "grad_norm": 0.6178780198097229, "learning_rate": 1.591653528747972e-05, "loss": 1.2925, "mean_token_accuracy": 0.6696785638729731, "num_tokens": 1029529896.0, "step": 6129 }, { "entropy": 1.7272561589876811, "epoch": 0.6734228667161023, "grad_norm": 0.6660004258155823, "learning_rate": 1.591519410174853e-05, "loss": 1.4051, "mean_token_accuracy": 0.6591061949729919, "num_tokens": 1029681501.0, "step": 6130 }, { "entropy": 1.7080715497334797, "epoch": 0.6735327236274752, "grad_norm": 0.5781081318855286, "learning_rate": 1.5913852760453667e-05, "loss": 1.394, "mean_token_accuracy": 0.6472490082184473, "num_tokens": 1029910927.0, "step": 6131 }, { "entropy": 1.6690656940142314, "epoch": 0.6736425805388482, "grad_norm": 0.6107634902000427, "learning_rate": 1.5912511263637576e-05, "loss": 1.3232, "mean_token_accuracy": 0.66473917166392, "num_tokens": 1030091804.0, "step": 6132 }, { "entropy": 1.7400188446044922, "epoch": 0.6737524374502211, "grad_norm": 0.7845686674118042, "learning_rate": 1.5911169611342716e-05, "loss": 1.3731, "mean_token_accuracy": 0.6524705936511358, "num_tokens": 1030234959.0, "step": 6133 }, { "entropy": 1.6985375185807545, "epoch": 0.6738622943615941, "grad_norm": 0.6205952763557434, "learning_rate": 1.5909827803611553e-05, "loss": 1.3825, "mean_token_accuracy": 0.6545093754927317, "num_tokens": 1030412195.0, "step": 6134 }, { "entropy": 1.7195107738176982, "epoch": 0.673972151272967, "grad_norm": 0.8514347672462463, "learning_rate": 1.590848584048655e-05, "loss": 1.4261, "mean_token_accuracy": 0.6498820533355077, "num_tokens": 1030556047.0, "step": 6135 }, { "entropy": 1.717844436566035, "epoch": 0.6740820081843399, "grad_norm": 0.8321981430053711, "learning_rate": 1.5907143722010183e-05, "loss": 1.4249, "mean_token_accuracy": 0.6527943263451258, "num_tokens": 1030723162.0, "step": 6136 }, { "entropy": 1.7162880500157673, "epoch": 0.6741918650957128, "grad_norm": 0.6410884857177734, "learning_rate": 1.590580144822493e-05, "loss": 1.3525, "mean_token_accuracy": 0.6689903736114502, "num_tokens": 1030890509.0, "step": 6137 }, { "entropy": 1.7301206290721893, "epoch": 0.6743017220070858, "grad_norm": 0.735442578792572, "learning_rate": 1.5904459019173266e-05, "loss": 1.2337, "mean_token_accuracy": 0.6780840853850046, "num_tokens": 1030998724.0, "step": 6138 }, { "entropy": 1.693600704272588, "epoch": 0.6744115789184587, "grad_norm": 0.6858952641487122, "learning_rate": 1.590311643489769e-05, "loss": 1.3579, "mean_token_accuracy": 0.6619731138149897, "num_tokens": 1031180321.0, "step": 6139 }, { "entropy": 1.691060831149419, "epoch": 0.6745214358298316, "grad_norm": 0.7244452238082886, "learning_rate": 1.5901773695440684e-05, "loss": 1.5419, "mean_token_accuracy": 0.638955608010292, "num_tokens": 1031349446.0, "step": 6140 }, { "entropy": 1.673604021469752, "epoch": 0.6746312927412046, "grad_norm": 0.5594522953033447, "learning_rate": 1.5900430800844752e-05, "loss": 1.3823, "mean_token_accuracy": 0.6514309992392858, "num_tokens": 1031556297.0, "step": 6141 }, { "entropy": 1.69135985771815, "epoch": 0.6747411496525775, "grad_norm": 0.7993478775024414, "learning_rate": 1.5899087751152395e-05, "loss": 1.2584, "mean_token_accuracy": 0.675132155418396, "num_tokens": 1031680407.0, "step": 6142 }, { "entropy": 1.6793262263139088, "epoch": 0.6748510065639505, "grad_norm": 0.7275417447090149, "learning_rate": 1.5897744546406117e-05, "loss": 1.3664, "mean_token_accuracy": 0.6670361111561457, "num_tokens": 1031833401.0, "step": 6143 }, { "entropy": 1.7409860094388325, "epoch": 0.6749608634753234, "grad_norm": 0.6921661496162415, "learning_rate": 1.5896401186648428e-05, "loss": 1.3467, "mean_token_accuracy": 0.6612462997436523, "num_tokens": 1031967537.0, "step": 6144 }, { "entropy": 1.7186478873093922, "epoch": 0.6750707203866964, "grad_norm": 0.5948835611343384, "learning_rate": 1.589505767192185e-05, "loss": 1.4484, "mean_token_accuracy": 0.651375338435173, "num_tokens": 1032134696.0, "step": 6145 }, { "entropy": 1.753745198249817, "epoch": 0.6751805772980692, "grad_norm": 0.7294279932975769, "learning_rate": 1.58937140022689e-05, "loss": 1.4038, "mean_token_accuracy": 0.6501768082380295, "num_tokens": 1032292941.0, "step": 6146 }, { "entropy": 1.7068160772323608, "epoch": 0.6752904342094422, "grad_norm": 0.8932915925979614, "learning_rate": 1.5892370177732112e-05, "loss": 1.4417, "mean_token_accuracy": 0.6559292525053024, "num_tokens": 1032429050.0, "step": 6147 }, { "entropy": 1.7068482637405396, "epoch": 0.6754002911208151, "grad_norm": 0.7544932961463928, "learning_rate": 1.5891026198354007e-05, "loss": 1.4954, "mean_token_accuracy": 0.6591473271449407, "num_tokens": 1032581362.0, "step": 6148 }, { "entropy": 1.7536579171816509, "epoch": 0.6755101480321881, "grad_norm": 0.7992092967033386, "learning_rate": 1.588968206417713e-05, "loss": 1.3389, "mean_token_accuracy": 0.6687672038873037, "num_tokens": 1032691286.0, "step": 6149 }, { "entropy": 1.7206375002861023, "epoch": 0.675620004943561, "grad_norm": 0.7990248799324036, "learning_rate": 1.588833777524402e-05, "loss": 1.3008, "mean_token_accuracy": 0.6754638602336248, "num_tokens": 1032812888.0, "step": 6150 }, { "entropy": 1.7509271105130513, "epoch": 0.675729861854934, "grad_norm": 0.6977331638336182, "learning_rate": 1.588699333159722e-05, "loss": 1.5671, "mean_token_accuracy": 0.6463130315144857, "num_tokens": 1032974721.0, "step": 6151 }, { "entropy": 1.6518239478270214, "epoch": 0.6758397187663069, "grad_norm": 0.8337535262107849, "learning_rate": 1.5885648733279286e-05, "loss": 1.5285, "mean_token_accuracy": 0.6606726894776026, "num_tokens": 1033125949.0, "step": 6152 }, { "entropy": 1.7431319256623585, "epoch": 0.6759495756776798, "grad_norm": 0.6203035116195679, "learning_rate": 1.588430398033277e-05, "loss": 1.4083, "mean_token_accuracy": 0.6556287507216135, "num_tokens": 1033291109.0, "step": 6153 }, { "entropy": 1.7404470642407734, "epoch": 0.6760594325890528, "grad_norm": 0.6632646322250366, "learning_rate": 1.588295907280023e-05, "loss": 1.49, "mean_token_accuracy": 0.6481446127096812, "num_tokens": 1033508731.0, "step": 6154 }, { "entropy": 1.6975667675336201, "epoch": 0.6761692895004257, "grad_norm": 0.6166266202926636, "learning_rate": 1.588161401072424e-05, "loss": 1.3478, "mean_token_accuracy": 0.6563719709714254, "num_tokens": 1033700041.0, "step": 6155 }, { "entropy": 1.6918539802233379, "epoch": 0.6762791464117986, "grad_norm": 0.6156592965126038, "learning_rate": 1.5880268794147365e-05, "loss": 1.4967, "mean_token_accuracy": 0.6441917419433594, "num_tokens": 1033902009.0, "step": 6156 }, { "entropy": 1.6845496892929077, "epoch": 0.6763890033231715, "grad_norm": 0.7185717821121216, "learning_rate": 1.587892342311218e-05, "loss": 1.3168, "mean_token_accuracy": 0.6657861719528834, "num_tokens": 1034057209.0, "step": 6157 }, { "entropy": 1.716551125049591, "epoch": 0.6764988602345445, "grad_norm": 0.8153448104858398, "learning_rate": 1.587757789766127e-05, "loss": 1.482, "mean_token_accuracy": 0.6406472225983938, "num_tokens": 1034276379.0, "step": 6158 }, { "entropy": 1.6670089562733967, "epoch": 0.6766087171459174, "grad_norm": 0.6375927925109863, "learning_rate": 1.5876232217837216e-05, "loss": 1.3635, "mean_token_accuracy": 0.667756125330925, "num_tokens": 1034474837.0, "step": 6159 }, { "entropy": 1.762067049741745, "epoch": 0.6767185740572904, "grad_norm": 0.6941974759101868, "learning_rate": 1.587488638368261e-05, "loss": 1.4817, "mean_token_accuracy": 0.6408074299494425, "num_tokens": 1034653291.0, "step": 6160 }, { "entropy": 1.6927312711874645, "epoch": 0.6768284309686633, "grad_norm": 0.6574122309684753, "learning_rate": 1.5873540395240046e-05, "loss": 1.2565, "mean_token_accuracy": 0.67339259882768, "num_tokens": 1034797989.0, "step": 6161 }, { "entropy": 1.6968045830726624, "epoch": 0.6769382878800363, "grad_norm": 0.8542110919952393, "learning_rate": 1.5872194252552127e-05, "loss": 1.6373, "mean_token_accuracy": 0.6415252710382143, "num_tokens": 1034991775.0, "step": 6162 }, { "entropy": 1.6781950394312541, "epoch": 0.6770481447914092, "grad_norm": 0.6783519983291626, "learning_rate": 1.587084795566145e-05, "loss": 1.2693, "mean_token_accuracy": 0.6693742970625559, "num_tokens": 1035130150.0, "step": 6163 }, { "entropy": 1.6896279752254486, "epoch": 0.6771580017027822, "grad_norm": 0.5958226323127747, "learning_rate": 1.5869501504610636e-05, "loss": 1.449, "mean_token_accuracy": 0.6530238687992096, "num_tokens": 1035318883.0, "step": 6164 }, { "entropy": 1.7746345500151317, "epoch": 0.6772678586141551, "grad_norm": 0.7527851462364197, "learning_rate": 1.5868154899442293e-05, "loss": 1.4133, "mean_token_accuracy": 0.6448677480220795, "num_tokens": 1035489150.0, "step": 6165 }, { "entropy": 1.610521674156189, "epoch": 0.677377715525528, "grad_norm": 0.6777754426002502, "learning_rate": 1.5866808140199037e-05, "loss": 1.4785, "mean_token_accuracy": 0.65462859471639, "num_tokens": 1035662720.0, "step": 6166 }, { "entropy": 1.7118818759918213, "epoch": 0.6774875724369009, "grad_norm": 0.6027317047119141, "learning_rate": 1.5865461226923497e-05, "loss": 1.4702, "mean_token_accuracy": 0.6561297823985418, "num_tokens": 1035828628.0, "step": 6167 }, { "entropy": 1.6369926929473877, "epoch": 0.6775974293482738, "grad_norm": 0.707374095916748, "learning_rate": 1.5864114159658305e-05, "loss": 1.4544, "mean_token_accuracy": 0.6614964008331299, "num_tokens": 1036021623.0, "step": 6168 }, { "entropy": 1.6904229422410328, "epoch": 0.6777072862596468, "grad_norm": 0.6496714949607849, "learning_rate": 1.5862766938446092e-05, "loss": 1.499, "mean_token_accuracy": 0.6475720703601837, "num_tokens": 1036206904.0, "step": 6169 }, { "entropy": 1.6665221452713013, "epoch": 0.6778171431710197, "grad_norm": 0.6841485500335693, "learning_rate": 1.5861419563329493e-05, "loss": 1.4886, "mean_token_accuracy": 0.6600009699662527, "num_tokens": 1036450534.0, "step": 6170 }, { "entropy": 1.6735211809476216, "epoch": 0.6779270000823927, "grad_norm": 0.6442497968673706, "learning_rate": 1.586007203435115e-05, "loss": 1.2632, "mean_token_accuracy": 0.6747845361630121, "num_tokens": 1036623414.0, "step": 6171 }, { "entropy": 1.6664145290851593, "epoch": 0.6780368569937656, "grad_norm": 0.7533520460128784, "learning_rate": 1.585872435155373e-05, "loss": 1.4729, "mean_token_accuracy": 0.662231961886088, "num_tokens": 1036784462.0, "step": 6172 }, { "entropy": 1.7120993534723918, "epoch": 0.6781467139051386, "grad_norm": 0.6117445826530457, "learning_rate": 1.5857376514979866e-05, "loss": 1.3504, "mean_token_accuracy": 0.659230629603068, "num_tokens": 1036938244.0, "step": 6173 }, { "entropy": 1.7340730726718903, "epoch": 0.6782565708165115, "grad_norm": 0.7913200259208679, "learning_rate": 1.5856028524672227e-05, "loss": 1.452, "mean_token_accuracy": 0.668962687253952, "num_tokens": 1037128398.0, "step": 6174 }, { "entropy": 1.7015940447648366, "epoch": 0.6783664277278845, "grad_norm": 0.6943639516830444, "learning_rate": 1.585468038067347e-05, "loss": 1.5163, "mean_token_accuracy": 0.6409951796134313, "num_tokens": 1037288363.0, "step": 6175 }, { "entropy": 1.673763672510783, "epoch": 0.6784762846392574, "grad_norm": 0.6866292357444763, "learning_rate": 1.5853332083026268e-05, "loss": 1.3081, "mean_token_accuracy": 0.6622492522001266, "num_tokens": 1037442624.0, "step": 6176 }, { "entropy": 1.703792671362559, "epoch": 0.6785861415506304, "grad_norm": 0.6945456266403198, "learning_rate": 1.5851983631773297e-05, "loss": 1.4855, "mean_token_accuracy": 0.6414697915315628, "num_tokens": 1037590916.0, "step": 6177 }, { "entropy": 1.652319351832072, "epoch": 0.6786959984620032, "grad_norm": 0.6403668522834778, "learning_rate": 1.5850635026957226e-05, "loss": 1.5006, "mean_token_accuracy": 0.6437227378288904, "num_tokens": 1037788898.0, "step": 6178 }, { "entropy": 1.675025353829066, "epoch": 0.6788058553733762, "grad_norm": 0.663935124874115, "learning_rate": 1.5849286268620744e-05, "loss": 1.3069, "mean_token_accuracy": 0.6672319124142329, "num_tokens": 1037951523.0, "step": 6179 }, { "entropy": 1.7284078498681386, "epoch": 0.6789157122847491, "grad_norm": 0.7808162569999695, "learning_rate": 1.5847937356806536e-05, "loss": 1.4562, "mean_token_accuracy": 0.651724100112915, "num_tokens": 1038103992.0, "step": 6180 }, { "entropy": 1.6787553032239277, "epoch": 0.679025569196122, "grad_norm": 0.6067541837692261, "learning_rate": 1.584658829155729e-05, "loss": 1.4918, "mean_token_accuracy": 0.646204670270284, "num_tokens": 1038323458.0, "step": 6181 }, { "entropy": 1.6700343191623688, "epoch": 0.679135426107495, "grad_norm": 0.919571042060852, "learning_rate": 1.5845239072915715e-05, "loss": 1.3506, "mean_token_accuracy": 0.669276679555575, "num_tokens": 1038462666.0, "step": 6182 }, { "entropy": 1.720471332470576, "epoch": 0.6792452830188679, "grad_norm": 0.7604859471321106, "learning_rate": 1.5843889700924503e-05, "loss": 1.414, "mean_token_accuracy": 0.6454981962839762, "num_tokens": 1038631829.0, "step": 6183 }, { "entropy": 1.734667807817459, "epoch": 0.6793551399302409, "grad_norm": 0.7525255084037781, "learning_rate": 1.5842540175626368e-05, "loss": 1.3427, "mean_token_accuracy": 0.6558729112148285, "num_tokens": 1038806094.0, "step": 6184 }, { "entropy": 1.7814012865225475, "epoch": 0.6794649968416138, "grad_norm": 0.678103506565094, "learning_rate": 1.584119049706402e-05, "loss": 1.568, "mean_token_accuracy": 0.6203633447488149, "num_tokens": 1039038882.0, "step": 6185 }, { "entropy": 1.7217522263526917, "epoch": 0.6795748537529868, "grad_norm": 0.8400986790657043, "learning_rate": 1.5839840665280168e-05, "loss": 1.2395, "mean_token_accuracy": 0.6749545534451803, "num_tokens": 1039160977.0, "step": 6186 }, { "entropy": 1.76739635070165, "epoch": 0.6796847106643596, "grad_norm": 0.7183486819267273, "learning_rate": 1.583849068031754e-05, "loss": 1.3549, "mean_token_accuracy": 0.6573175837596258, "num_tokens": 1039304153.0, "step": 6187 }, { "entropy": 1.7237287163734436, "epoch": 0.6797945675757326, "grad_norm": 0.7184303402900696, "learning_rate": 1.583714054221887e-05, "loss": 1.2839, "mean_token_accuracy": 0.6642322937647501, "num_tokens": 1039447894.0, "step": 6188 }, { "entropy": 1.7578026056289673, "epoch": 0.6799044244871055, "grad_norm": 0.7254183292388916, "learning_rate": 1.5835790251026875e-05, "loss": 1.3413, "mean_token_accuracy": 0.6657581379016241, "num_tokens": 1039613902.0, "step": 6189 }, { "entropy": 1.7511154313882191, "epoch": 0.6800142813984785, "grad_norm": 0.6806755065917969, "learning_rate": 1.5834439806784302e-05, "loss": 1.3657, "mean_token_accuracy": 0.656483938296636, "num_tokens": 1039774043.0, "step": 6190 }, { "entropy": 1.7452365458011627, "epoch": 0.6801241383098514, "grad_norm": 0.654483437538147, "learning_rate": 1.5833089209533883e-05, "loss": 1.3834, "mean_token_accuracy": 0.6558238019545873, "num_tokens": 1039942032.0, "step": 6191 }, { "entropy": 1.7572171986103058, "epoch": 0.6802339952212244, "grad_norm": 0.6855461597442627, "learning_rate": 1.583173845931837e-05, "loss": 1.3906, "mean_token_accuracy": 0.6548371364672979, "num_tokens": 1040063994.0, "step": 6192 }, { "entropy": 1.6424880524476368, "epoch": 0.6803438521325973, "grad_norm": 0.6944648623466492, "learning_rate": 1.5830387556180513e-05, "loss": 1.3699, "mean_token_accuracy": 0.6685495773951212, "num_tokens": 1040204031.0, "step": 6193 }, { "entropy": 1.6730513870716095, "epoch": 0.6804537090439702, "grad_norm": 0.6423060894012451, "learning_rate": 1.5829036500163068e-05, "loss": 1.489, "mean_token_accuracy": 0.6339628795782725, "num_tokens": 1040432452.0, "step": 6194 }, { "entropy": 1.7136758367220561, "epoch": 0.6805635659553432, "grad_norm": 1.136176347732544, "learning_rate": 1.582768529130879e-05, "loss": 1.3342, "mean_token_accuracy": 0.667153442899386, "num_tokens": 1040602648.0, "step": 6195 }, { "entropy": 1.6480139096577961, "epoch": 0.6806734228667161, "grad_norm": 0.6092087626457214, "learning_rate": 1.582633392966045e-05, "loss": 1.4523, "mean_token_accuracy": 0.6621314485867819, "num_tokens": 1040799318.0, "step": 6196 }, { "entropy": 1.7159779965877533, "epoch": 0.6807832797780891, "grad_norm": 0.7131394743919373, "learning_rate": 1.5824982415260815e-05, "loss": 1.3849, "mean_token_accuracy": 0.6469999005397161, "num_tokens": 1040951208.0, "step": 6197 }, { "entropy": 1.6891703208287556, "epoch": 0.6808931366894619, "grad_norm": 0.6422286033630371, "learning_rate": 1.5823630748152663e-05, "loss": 1.4928, "mean_token_accuracy": 0.6543713063001633, "num_tokens": 1041111600.0, "step": 6198 }, { "entropy": 1.6836450199286144, "epoch": 0.6810029936008349, "grad_norm": 0.6734887361526489, "learning_rate": 1.582227892837877e-05, "loss": 1.3554, "mean_token_accuracy": 0.6502887507279714, "num_tokens": 1041265524.0, "step": 6199 }, { "entropy": 1.7192556262016296, "epoch": 0.6811128505122078, "grad_norm": 0.6905456781387329, "learning_rate": 1.582092695598192e-05, "loss": 1.4584, "mean_token_accuracy": 0.6490669349829356, "num_tokens": 1041441178.0, "step": 6200 }, { "entropy": 1.7496082484722137, "epoch": 0.6812227074235808, "grad_norm": 0.7641483545303345, "learning_rate": 1.5819574831004908e-05, "loss": 1.5886, "mean_token_accuracy": 0.6521731615066528, "num_tokens": 1041595991.0, "step": 6201 }, { "entropy": 1.6868942181269329, "epoch": 0.6813325643349537, "grad_norm": 0.7057383060455322, "learning_rate": 1.5818222553490522e-05, "loss": 1.5181, "mean_token_accuracy": 0.6546740233898163, "num_tokens": 1041778203.0, "step": 6202 }, { "entropy": 1.7335337499777477, "epoch": 0.6814424212463267, "grad_norm": 0.6150233149528503, "learning_rate": 1.5816870123481563e-05, "loss": 1.5563, "mean_token_accuracy": 0.6352181782325109, "num_tokens": 1041979155.0, "step": 6203 }, { "entropy": 1.7341649134953816, "epoch": 0.6815522781576996, "grad_norm": 0.6595054864883423, "learning_rate": 1.5815517541020832e-05, "loss": 1.32, "mean_token_accuracy": 0.6687678645054499, "num_tokens": 1042146615.0, "step": 6204 }, { "entropy": 1.7071249385674794, "epoch": 0.6816621350690726, "grad_norm": 0.6596662998199463, "learning_rate": 1.5814164806151146e-05, "loss": 1.36, "mean_token_accuracy": 0.6612856537103653, "num_tokens": 1042321366.0, "step": 6205 }, { "entropy": 1.6722217202186584, "epoch": 0.6817719919804455, "grad_norm": 0.7822057008743286, "learning_rate": 1.5812811918915313e-05, "loss": 1.2883, "mean_token_accuracy": 0.6672490139802297, "num_tokens": 1042459772.0, "step": 6206 }, { "entropy": 1.7275211314360301, "epoch": 0.6818818488918184, "grad_norm": 0.6742196679115295, "learning_rate": 1.581145887935615e-05, "loss": 1.331, "mean_token_accuracy": 0.6632985124985377, "num_tokens": 1042570398.0, "step": 6207 }, { "entropy": 1.6800503234068553, "epoch": 0.6819917058031914, "grad_norm": 0.6366540193557739, "learning_rate": 1.581010568751648e-05, "loss": 1.3628, "mean_token_accuracy": 0.6498023221890131, "num_tokens": 1042718102.0, "step": 6208 }, { "entropy": 1.7179384032885234, "epoch": 0.6821015627145642, "grad_norm": 0.6339711546897888, "learning_rate": 1.5808752343439133e-05, "loss": 1.4198, "mean_token_accuracy": 0.6559414863586426, "num_tokens": 1042906564.0, "step": 6209 }, { "entropy": 1.6670476098855336, "epoch": 0.6822114196259372, "grad_norm": 0.6430389881134033, "learning_rate": 1.5807398847166943e-05, "loss": 1.3518, "mean_token_accuracy": 0.6729106456041336, "num_tokens": 1043114289.0, "step": 6210 }, { "entropy": 1.714191863934199, "epoch": 0.6823212765373101, "grad_norm": 0.7927173376083374, "learning_rate": 1.5806045198742743e-05, "loss": 1.4358, "mean_token_accuracy": 0.6493388712406158, "num_tokens": 1043303392.0, "step": 6211 }, { "entropy": 1.7568532327810924, "epoch": 0.6824311334486831, "grad_norm": 0.7937549352645874, "learning_rate": 1.5804691398209386e-05, "loss": 1.3607, "mean_token_accuracy": 0.673745925227801, "num_tokens": 1043483335.0, "step": 6212 }, { "entropy": 1.7293648322423298, "epoch": 0.682540990360056, "grad_norm": 0.6161656975746155, "learning_rate": 1.5803337445609705e-05, "loss": 1.4712, "mean_token_accuracy": 0.6608046044905981, "num_tokens": 1043684540.0, "step": 6213 }, { "entropy": 1.6890058120091755, "epoch": 0.682650847271429, "grad_norm": 0.7689279913902283, "learning_rate": 1.5801983340986556e-05, "loss": 1.3705, "mean_token_accuracy": 0.6688697884480158, "num_tokens": 1043864929.0, "step": 6214 }, { "entropy": 1.6985827287038167, "epoch": 0.6827607041828019, "grad_norm": 0.7502182126045227, "learning_rate": 1.58006290843828e-05, "loss": 1.4028, "mean_token_accuracy": 0.6728604584932327, "num_tokens": 1044030942.0, "step": 6215 }, { "entropy": 1.7380037407080333, "epoch": 0.6828705610941749, "grad_norm": 0.7070371508598328, "learning_rate": 1.57992746758413e-05, "loss": 1.3499, "mean_token_accuracy": 0.6579258392254511, "num_tokens": 1044225669.0, "step": 6216 }, { "entropy": 1.7195264895757039, "epoch": 0.6829804180055478, "grad_norm": 0.6869642734527588, "learning_rate": 1.5797920115404913e-05, "loss": 1.5184, "mean_token_accuracy": 0.639784961938858, "num_tokens": 1044399612.0, "step": 6217 }, { "entropy": 1.7750992178916931, "epoch": 0.6830902749169208, "grad_norm": 0.7000865936279297, "learning_rate": 1.579656540311652e-05, "loss": 1.5207, "mean_token_accuracy": 0.6384162952502569, "num_tokens": 1044622029.0, "step": 6218 }, { "entropy": 1.7211995124816895, "epoch": 0.6832001318282936, "grad_norm": 0.6937360763549805, "learning_rate": 1.5795210539018996e-05, "loss": 1.3323, "mean_token_accuracy": 0.6666232148806254, "num_tokens": 1044760684.0, "step": 6219 }, { "entropy": 1.723791241645813, "epoch": 0.6833099887396665, "grad_norm": 0.6360770463943481, "learning_rate": 1.5793855523155214e-05, "loss": 1.4283, "mean_token_accuracy": 0.6450154383977255, "num_tokens": 1044965265.0, "step": 6220 }, { "entropy": 1.6446198523044586, "epoch": 0.6834198456510395, "grad_norm": 0.7213537096977234, "learning_rate": 1.5792500355568068e-05, "loss": 1.2592, "mean_token_accuracy": 0.683139756321907, "num_tokens": 1045099732.0, "step": 6221 }, { "entropy": 1.6635667781035106, "epoch": 0.6835297025624124, "grad_norm": 0.7380548119544983, "learning_rate": 1.5791145036300442e-05, "loss": 1.4444, "mean_token_accuracy": 0.6551113228003184, "num_tokens": 1045308158.0, "step": 6222 }, { "entropy": 1.6907614171504974, "epoch": 0.6836395594737854, "grad_norm": 0.7286980748176575, "learning_rate": 1.578978956539524e-05, "loss": 1.4878, "mean_token_accuracy": 0.6524398873249689, "num_tokens": 1045510413.0, "step": 6223 }, { "entropy": 1.662454883257548, "epoch": 0.6837494163851583, "grad_norm": 0.6473574042320251, "learning_rate": 1.5788433942895355e-05, "loss": 1.2923, "mean_token_accuracy": 0.6852824489275614, "num_tokens": 1045657586.0, "step": 6224 }, { "entropy": 1.670077880223592, "epoch": 0.6838592732965313, "grad_norm": 0.7075192332267761, "learning_rate": 1.5787078168843692e-05, "loss": 1.2946, "mean_token_accuracy": 0.6709181269009908, "num_tokens": 1045801180.0, "step": 6225 }, { "entropy": 1.6907998820145924, "epoch": 0.6839691302079042, "grad_norm": 0.8029499053955078, "learning_rate": 1.578572224328316e-05, "loss": 1.5165, "mean_token_accuracy": 0.6446396013100942, "num_tokens": 1046004096.0, "step": 6226 }, { "entropy": 1.6825261414051056, "epoch": 0.6840789871192772, "grad_norm": 0.6166555285453796, "learning_rate": 1.578436616625668e-05, "loss": 1.4492, "mean_token_accuracy": 0.6457721889019012, "num_tokens": 1046213155.0, "step": 6227 }, { "entropy": 1.6593104998270671, "epoch": 0.6841888440306501, "grad_norm": 0.6207210421562195, "learning_rate": 1.5783009937807163e-05, "loss": 1.438, "mean_token_accuracy": 0.64504507680734, "num_tokens": 1046418954.0, "step": 6228 }, { "entropy": 1.6873585283756256, "epoch": 0.684298700942023, "grad_norm": 0.7427178621292114, "learning_rate": 1.578165355797754e-05, "loss": 1.414, "mean_token_accuracy": 0.6532572110493978, "num_tokens": 1046565869.0, "step": 6229 }, { "entropy": 1.7066354652245839, "epoch": 0.6844085578533959, "grad_norm": 0.6317359209060669, "learning_rate": 1.5780297026810735e-05, "loss": 1.3242, "mean_token_accuracy": 0.6639684538046519, "num_tokens": 1046700589.0, "step": 6230 }, { "entropy": 1.677226612965266, "epoch": 0.6845184147647689, "grad_norm": 0.6120962500572205, "learning_rate": 1.5778940344349683e-05, "loss": 1.369, "mean_token_accuracy": 0.6590713312228521, "num_tokens": 1046882958.0, "step": 6231 }, { "entropy": 1.7622087995211284, "epoch": 0.6846282716761418, "grad_norm": 0.7370443940162659, "learning_rate": 1.5777583510637322e-05, "loss": 1.4434, "mean_token_accuracy": 0.6658334483702978, "num_tokens": 1047045020.0, "step": 6232 }, { "entropy": 1.6845203638076782, "epoch": 0.6847381285875148, "grad_norm": 0.7158510088920593, "learning_rate": 1.5776226525716597e-05, "loss": 1.5932, "mean_token_accuracy": 0.6466062217950821, "num_tokens": 1047231565.0, "step": 6233 }, { "entropy": 1.679573267698288, "epoch": 0.6848479854988877, "grad_norm": 0.6586665511131287, "learning_rate": 1.5774869389630452e-05, "loss": 1.4108, "mean_token_accuracy": 0.6435726036628088, "num_tokens": 1047446628.0, "step": 6234 }, { "entropy": 1.7317407031853993, "epoch": 0.6849578424102606, "grad_norm": 0.7508777976036072, "learning_rate": 1.5773512102421845e-05, "loss": 1.2569, "mean_token_accuracy": 0.6819585313399633, "num_tokens": 1047551424.0, "step": 6235 }, { "entropy": 1.718717743953069, "epoch": 0.6850676993216336, "grad_norm": 0.6454728841781616, "learning_rate": 1.5772154664133728e-05, "loss": 1.3644, "mean_token_accuracy": 0.6561121046543121, "num_tokens": 1047686224.0, "step": 6236 }, { "entropy": 1.662040372689565, "epoch": 0.6851775562330065, "grad_norm": 0.7368992567062378, "learning_rate": 1.5770797074809072e-05, "loss": 1.3664, "mean_token_accuracy": 0.6641696294148763, "num_tokens": 1047865517.0, "step": 6237 }, { "entropy": 1.6964517335096996, "epoch": 0.6852874131443795, "grad_norm": 0.7247259020805359, "learning_rate": 1.5769439334490836e-05, "loss": 1.4358, "mean_token_accuracy": 0.6385401288668314, "num_tokens": 1048103495.0, "step": 6238 }, { "entropy": 1.6373733182748158, "epoch": 0.6853972700557524, "grad_norm": 0.664681613445282, "learning_rate": 1.576808144322199e-05, "loss": 1.5413, "mean_token_accuracy": 0.6597782919804255, "num_tokens": 1048314364.0, "step": 6239 }, { "entropy": 1.689589689175288, "epoch": 0.6855071269671253, "grad_norm": 0.7749432921409607, "learning_rate": 1.576672340104552e-05, "loss": 1.331, "mean_token_accuracy": 0.6749097357193629, "num_tokens": 1048518232.0, "step": 6240 }, { "entropy": 1.64166193207105, "epoch": 0.6856169838784982, "grad_norm": 0.6244519352912903, "learning_rate": 1.57653652080044e-05, "loss": 1.4521, "mean_token_accuracy": 0.64791539311409, "num_tokens": 1048744637.0, "step": 6241 }, { "entropy": 1.7174479762713115, "epoch": 0.6857268407898712, "grad_norm": 0.6479034423828125, "learning_rate": 1.576400686414162e-05, "loss": 1.2893, "mean_token_accuracy": 0.6633307288090388, "num_tokens": 1048870167.0, "step": 6242 }, { "entropy": 1.6295473476250966, "epoch": 0.6858366977012441, "grad_norm": 0.6615036725997925, "learning_rate": 1.5762648369500168e-05, "loss": 1.2225, "mean_token_accuracy": 0.6706995218992233, "num_tokens": 1049004380.0, "step": 6243 }, { "entropy": 1.6601495742797852, "epoch": 0.6859465546126171, "grad_norm": 0.6963624358177185, "learning_rate": 1.576128972412304e-05, "loss": 1.4818, "mean_token_accuracy": 0.6445038865009943, "num_tokens": 1049221111.0, "step": 6244 }, { "entropy": 1.740762710571289, "epoch": 0.68605641152399, "grad_norm": 0.746908962726593, "learning_rate": 1.575993092805324e-05, "loss": 1.3535, "mean_token_accuracy": 0.6617808093627294, "num_tokens": 1049354467.0, "step": 6245 }, { "entropy": 1.7130144536495209, "epoch": 0.686166268435363, "grad_norm": 0.6983469724655151, "learning_rate": 1.575857198133377e-05, "loss": 1.3761, "mean_token_accuracy": 0.6857681721448898, "num_tokens": 1049515088.0, "step": 6246 }, { "entropy": 1.6818876961867015, "epoch": 0.6862761253467359, "grad_norm": 0.6924529671669006, "learning_rate": 1.575721288400764e-05, "loss": 1.3247, "mean_token_accuracy": 0.661658505598704, "num_tokens": 1049683399.0, "step": 6247 }, { "entropy": 1.683126876751582, "epoch": 0.6863859822581088, "grad_norm": 0.7934529185295105, "learning_rate": 1.5755853636117868e-05, "loss": 1.27, "mean_token_accuracy": 0.6863173047701517, "num_tokens": 1049839321.0, "step": 6248 }, { "entropy": 1.7363257110118866, "epoch": 0.6864958391694818, "grad_norm": 0.8478215932846069, "learning_rate": 1.575449423770747e-05, "loss": 1.4775, "mean_token_accuracy": 0.641467904051145, "num_tokens": 1050004112.0, "step": 6249 }, { "entropy": 1.7177755236625671, "epoch": 0.6866056960808546, "grad_norm": 0.7801303863525391, "learning_rate": 1.575313468881947e-05, "loss": 1.2044, "mean_token_accuracy": 0.6855588108301163, "num_tokens": 1050126993.0, "step": 6250 }, { "entropy": 1.6946475009123485, "epoch": 0.6867155529922276, "grad_norm": 0.6409901976585388, "learning_rate": 1.5751774989496905e-05, "loss": 1.4604, "mean_token_accuracy": 0.6573443065087, "num_tokens": 1050290667.0, "step": 6251 }, { "entropy": 1.7695001463095348, "epoch": 0.6868254099036005, "grad_norm": 0.8223657608032227, "learning_rate": 1.5750415139782796e-05, "loss": 1.3509, "mean_token_accuracy": 0.661173606912295, "num_tokens": 1050391272.0, "step": 6252 }, { "entropy": 1.7316470444202423, "epoch": 0.6869352668149735, "grad_norm": 0.6836245656013489, "learning_rate": 1.5749055139720194e-05, "loss": 1.2609, "mean_token_accuracy": 0.6763073106606802, "num_tokens": 1050517210.0, "step": 6253 }, { "entropy": 1.7469671567281086, "epoch": 0.6870451237263464, "grad_norm": 0.7463729381561279, "learning_rate": 1.5747694989352133e-05, "loss": 1.3966, "mean_token_accuracy": 0.650151307384173, "num_tokens": 1050692998.0, "step": 6254 }, { "entropy": 1.6787622570991516, "epoch": 0.6871549806377194, "grad_norm": 0.6898899078369141, "learning_rate": 1.5746334688721668e-05, "loss": 1.4101, "mean_token_accuracy": 0.6534204135338465, "num_tokens": 1050859750.0, "step": 6255 }, { "entropy": 1.677261749903361, "epoch": 0.6872648375490923, "grad_norm": 0.6710191369056702, "learning_rate": 1.5744974237871844e-05, "loss": 1.4762, "mean_token_accuracy": 0.6464169124762217, "num_tokens": 1051093727.0, "step": 6256 }, { "entropy": 1.6988216042518616, "epoch": 0.6873746944604653, "grad_norm": 0.6617063879966736, "learning_rate": 1.5743613636845728e-05, "loss": 1.4268, "mean_token_accuracy": 0.6438167144854864, "num_tokens": 1051229009.0, "step": 6257 }, { "entropy": 1.6750045617421467, "epoch": 0.6874845513718382, "grad_norm": 0.83284592628479, "learning_rate": 1.5742252885686376e-05, "loss": 1.3649, "mean_token_accuracy": 0.6614518413941065, "num_tokens": 1051365725.0, "step": 6258 }, { "entropy": 1.7287048399448395, "epoch": 0.6875944082832112, "grad_norm": 0.7290322780609131, "learning_rate": 1.574089198443686e-05, "loss": 1.4128, "mean_token_accuracy": 0.6501226375500361, "num_tokens": 1051502305.0, "step": 6259 }, { "entropy": 1.6979427337646484, "epoch": 0.687704265194584, "grad_norm": 0.8345046043395996, "learning_rate": 1.5739530933140246e-05, "loss": 1.3527, "mean_token_accuracy": 0.6696512003739675, "num_tokens": 1051651829.0, "step": 6260 }, { "entropy": 1.7011751234531403, "epoch": 0.6878141221059569, "grad_norm": 0.6528536677360535, "learning_rate": 1.5738169731839614e-05, "loss": 1.49, "mean_token_accuracy": 0.6397085040807724, "num_tokens": 1051851383.0, "step": 6261 }, { "entropy": 1.6753086646397908, "epoch": 0.6879239790173299, "grad_norm": 0.7235574126243591, "learning_rate": 1.5736808380578046e-05, "loss": 1.262, "mean_token_accuracy": 0.6736620018879572, "num_tokens": 1051992386.0, "step": 6262 }, { "entropy": 1.710617204507192, "epoch": 0.6880338359287028, "grad_norm": 0.6896214485168457, "learning_rate": 1.5735446879398623e-05, "loss": 1.3153, "mean_token_accuracy": 0.6659458925326666, "num_tokens": 1052110869.0, "step": 6263 }, { "entropy": 1.7205885648727417, "epoch": 0.6881436928400758, "grad_norm": 0.7014085650444031, "learning_rate": 1.5734085228344444e-05, "loss": 1.5599, "mean_token_accuracy": 0.6437405745188395, "num_tokens": 1052279865.0, "step": 6264 }, { "entropy": 1.7538027067979176, "epoch": 0.6882535497514487, "grad_norm": 0.7048670053482056, "learning_rate": 1.57327234274586e-05, "loss": 1.4151, "mean_token_accuracy": 0.6546515574057897, "num_tokens": 1052427858.0, "step": 6265 }, { "entropy": 1.7071273426214855, "epoch": 0.6883634066628217, "grad_norm": 0.7168692350387573, "learning_rate": 1.5731361476784194e-05, "loss": 1.4215, "mean_token_accuracy": 0.6494481960932413, "num_tokens": 1052601924.0, "step": 6266 }, { "entropy": 1.7125795582930248, "epoch": 0.6884732635741946, "grad_norm": 0.9502230882644653, "learning_rate": 1.5729999376364325e-05, "loss": 1.4322, "mean_token_accuracy": 0.6579029063383738, "num_tokens": 1052754664.0, "step": 6267 }, { "entropy": 1.7782972554365795, "epoch": 0.6885831204855676, "grad_norm": 0.7288692593574524, "learning_rate": 1.572863712624211e-05, "loss": 1.4447, "mean_token_accuracy": 0.6494678606589636, "num_tokens": 1052874157.0, "step": 6268 }, { "entropy": 1.762984275817871, "epoch": 0.6886929773969405, "grad_norm": 0.8050958514213562, "learning_rate": 1.5727274726460663e-05, "loss": 1.3848, "mean_token_accuracy": 0.6562901983658472, "num_tokens": 1053025611.0, "step": 6269 }, { "entropy": 1.7107179462909698, "epoch": 0.6888028343083135, "grad_norm": 0.6803024411201477, "learning_rate": 1.57259121770631e-05, "loss": 1.4613, "mean_token_accuracy": 0.6524020483096441, "num_tokens": 1053217911.0, "step": 6270 }, { "entropy": 1.7233806550502777, "epoch": 0.6889126912196863, "grad_norm": 0.811576783657074, "learning_rate": 1.5724549478092544e-05, "loss": 1.446, "mean_token_accuracy": 0.6530528217554092, "num_tokens": 1053439531.0, "step": 6271 }, { "entropy": 1.7882909178733826, "epoch": 0.6890225481310593, "grad_norm": 1.5311130285263062, "learning_rate": 1.572318662959213e-05, "loss": 1.5377, "mean_token_accuracy": 0.6330806364615759, "num_tokens": 1053607774.0, "step": 6272 }, { "entropy": 1.714803675810496, "epoch": 0.6891324050424322, "grad_norm": 0.6682481169700623, "learning_rate": 1.572182363160498e-05, "loss": 1.4491, "mean_token_accuracy": 0.6306456079085668, "num_tokens": 1053808448.0, "step": 6273 }, { "entropy": 1.7131713926792145, "epoch": 0.6892422619538052, "grad_norm": 0.6053488254547119, "learning_rate": 1.5720460484174248e-05, "loss": 1.3343, "mean_token_accuracy": 0.6529037654399872, "num_tokens": 1053985060.0, "step": 6274 }, { "entropy": 1.7150470713774364, "epoch": 0.6893521188651781, "grad_norm": 0.5887476801872253, "learning_rate": 1.571909718734306e-05, "loss": 1.4858, "mean_token_accuracy": 0.638887827595075, "num_tokens": 1054203107.0, "step": 6275 }, { "entropy": 1.6366633176803589, "epoch": 0.689461975776551, "grad_norm": 0.7320172786712646, "learning_rate": 1.5717733741154578e-05, "loss": 1.3864, "mean_token_accuracy": 0.6679045160611471, "num_tokens": 1054457206.0, "step": 6276 }, { "entropy": 1.7303629020849864, "epoch": 0.689571832687924, "grad_norm": 0.7379525303840637, "learning_rate": 1.5716370145651952e-05, "loss": 1.2187, "mean_token_accuracy": 0.6834086825450262, "num_tokens": 1054595268.0, "step": 6277 }, { "entropy": 1.688062181075414, "epoch": 0.6896816895992969, "grad_norm": 0.620086669921875, "learning_rate": 1.571500640087833e-05, "loss": 1.4843, "mean_token_accuracy": 0.6472740769386292, "num_tokens": 1054820248.0, "step": 6278 }, { "entropy": 1.6793719629446666, "epoch": 0.6897915465106699, "grad_norm": 0.7746139764785767, "learning_rate": 1.5713642506876882e-05, "loss": 1.3796, "mean_token_accuracy": 0.6590060293674469, "num_tokens": 1054990280.0, "step": 6279 }, { "entropy": 1.7257548173268635, "epoch": 0.6899014034220428, "grad_norm": 0.5978219509124756, "learning_rate": 1.5712278463690774e-05, "loss": 1.5817, "mean_token_accuracy": 0.6256022801001867, "num_tokens": 1055207569.0, "step": 6280 }, { "entropy": 1.7200091977914174, "epoch": 0.6900112603334158, "grad_norm": 0.6994427442550659, "learning_rate": 1.5710914271363177e-05, "loss": 1.2819, "mean_token_accuracy": 0.6736390839020411, "num_tokens": 1055332726.0, "step": 6281 }, { "entropy": 1.6921034355958302, "epoch": 0.6901211172447886, "grad_norm": 0.6004651784896851, "learning_rate": 1.5709549929937263e-05, "loss": 1.355, "mean_token_accuracy": 0.6671501100063324, "num_tokens": 1055490790.0, "step": 6282 }, { "entropy": 1.683781623840332, "epoch": 0.6902309741561616, "grad_norm": 0.7334898114204407, "learning_rate": 1.5708185439456216e-05, "loss": 1.3117, "mean_token_accuracy": 0.6710262993971506, "num_tokens": 1055607208.0, "step": 6283 }, { "entropy": 1.6216355661551158, "epoch": 0.6903408310675345, "grad_norm": 0.6549416780471802, "learning_rate": 1.570682079996322e-05, "loss": 1.3981, "mean_token_accuracy": 0.6610698650280634, "num_tokens": 1055755985.0, "step": 6284 }, { "entropy": 1.6326968371868134, "epoch": 0.6904506879789075, "grad_norm": 0.7157843708992004, "learning_rate": 1.570545601150147e-05, "loss": 1.4508, "mean_token_accuracy": 0.6708127508560816, "num_tokens": 1055946453.0, "step": 6285 }, { "entropy": 1.7335582971572876, "epoch": 0.6905605448902804, "grad_norm": 0.8362358212471008, "learning_rate": 1.570409107411416e-05, "loss": 1.3361, "mean_token_accuracy": 0.654664600888888, "num_tokens": 1056076709.0, "step": 6286 }, { "entropy": 1.7172273596127827, "epoch": 0.6906704018016534, "grad_norm": 0.6369051933288574, "learning_rate": 1.5702725987844483e-05, "loss": 1.4074, "mean_token_accuracy": 0.6473657737175623, "num_tokens": 1056239339.0, "step": 6287 }, { "entropy": 1.7123624682426453, "epoch": 0.6907802587130263, "grad_norm": 0.7884210348129272, "learning_rate": 1.5701360752735648e-05, "loss": 1.3425, "mean_token_accuracy": 0.6655222127834955, "num_tokens": 1056418414.0, "step": 6288 }, { "entropy": 1.6548800269762676, "epoch": 0.6908901156243992, "grad_norm": 0.5178220868110657, "learning_rate": 1.5699995368830866e-05, "loss": 1.3778, "mean_token_accuracy": 0.6649152934551239, "num_tokens": 1056649643.0, "step": 6289 }, { "entropy": 1.7105699678262074, "epoch": 0.6909999725357722, "grad_norm": 0.7892933487892151, "learning_rate": 1.5698629836173346e-05, "loss": 1.4091, "mean_token_accuracy": 0.6649167090654373, "num_tokens": 1056777360.0, "step": 6290 }, { "entropy": 1.7512224813302357, "epoch": 0.691109829447145, "grad_norm": 0.893334686756134, "learning_rate": 1.5697264154806307e-05, "loss": 1.4202, "mean_token_accuracy": 0.6645146906375885, "num_tokens": 1056975900.0, "step": 6291 }, { "entropy": 1.7694937487443287, "epoch": 0.691219686358518, "grad_norm": 0.6863387823104858, "learning_rate": 1.569589832477298e-05, "loss": 1.3598, "mean_token_accuracy": 0.653782253464063, "num_tokens": 1057120050.0, "step": 6292 }, { "entropy": 1.7361672918001811, "epoch": 0.6913295432698909, "grad_norm": 0.8135111927986145, "learning_rate": 1.5694532346116583e-05, "loss": 1.4873, "mean_token_accuracy": 0.6366982012987137, "num_tokens": 1057309306.0, "step": 6293 }, { "entropy": 1.6364782353242238, "epoch": 0.6914394001812639, "grad_norm": 0.6994947195053101, "learning_rate": 1.5693166218880352e-05, "loss": 1.3949, "mean_token_accuracy": 0.6615471492211024, "num_tokens": 1057484747.0, "step": 6294 }, { "entropy": 1.683074374993642, "epoch": 0.6915492570926368, "grad_norm": 0.6820729970932007, "learning_rate": 1.5691799943107525e-05, "loss": 1.3869, "mean_token_accuracy": 0.6594087183475494, "num_tokens": 1057653783.0, "step": 6295 }, { "entropy": 1.7118146419525146, "epoch": 0.6916591140040098, "grad_norm": 0.6727958917617798, "learning_rate": 1.569043351884135e-05, "loss": 1.3533, "mean_token_accuracy": 0.6669302682081858, "num_tokens": 1057793762.0, "step": 6296 }, { "entropy": 1.7263545592625935, "epoch": 0.6917689709153827, "grad_norm": 0.6792665123939514, "learning_rate": 1.568906694612506e-05, "loss": 1.4398, "mean_token_accuracy": 0.6504052480061849, "num_tokens": 1057966395.0, "step": 6297 }, { "entropy": 1.6999529401461284, "epoch": 0.6918788278267557, "grad_norm": 0.689646303653717, "learning_rate": 1.5687700225001918e-05, "loss": 1.227, "mean_token_accuracy": 0.6813174386819204, "num_tokens": 1058117252.0, "step": 6298 }, { "entropy": 1.6544790466626484, "epoch": 0.6919886847381286, "grad_norm": 0.7752074003219604, "learning_rate": 1.5686333355515174e-05, "loss": 1.3064, "mean_token_accuracy": 0.6794366339842478, "num_tokens": 1058273397.0, "step": 6299 }, { "entropy": 1.655057470003764, "epoch": 0.6920985416495016, "grad_norm": 0.6356586217880249, "learning_rate": 1.5684966337708092e-05, "loss": 1.3602, "mean_token_accuracy": 0.6644938240448633, "num_tokens": 1058443764.0, "step": 6300 }, { "entropy": 1.7445928851763408, "epoch": 0.6922083985608745, "grad_norm": 0.6769724488258362, "learning_rate": 1.568359917162394e-05, "loss": 1.3361, "mean_token_accuracy": 0.6576990932226181, "num_tokens": 1058589677.0, "step": 6301 }, { "entropy": 1.7161558667818706, "epoch": 0.6923182554722473, "grad_norm": 0.5942689180374146, "learning_rate": 1.5682231857305978e-05, "loss": 1.439, "mean_token_accuracy": 0.6340092917283376, "num_tokens": 1058821342.0, "step": 6302 }, { "entropy": 1.6530260841051738, "epoch": 0.6924281123836203, "grad_norm": 0.7436016798019409, "learning_rate": 1.5680864394797492e-05, "loss": 1.2778, "mean_token_accuracy": 0.6772982229789098, "num_tokens": 1058986237.0, "step": 6303 }, { "entropy": 1.7283445000648499, "epoch": 0.6925379692949932, "grad_norm": 0.588995635509491, "learning_rate": 1.5679496784141757e-05, "loss": 1.4262, "mean_token_accuracy": 0.6482555766900381, "num_tokens": 1059187307.0, "step": 6304 }, { "entropy": 1.6520406504472096, "epoch": 0.6926478262063662, "grad_norm": 0.5398334860801697, "learning_rate": 1.5678129025382055e-05, "loss": 1.3553, "mean_token_accuracy": 0.6467950393756231, "num_tokens": 1059396490.0, "step": 6305 }, { "entropy": 1.739362935225169, "epoch": 0.6927576831177391, "grad_norm": 0.7606070041656494, "learning_rate": 1.5676761118561677e-05, "loss": 1.2727, "mean_token_accuracy": 0.6647885292768478, "num_tokens": 1059501468.0, "step": 6306 }, { "entropy": 1.7537157237529755, "epoch": 0.6928675400291121, "grad_norm": 0.7173927426338196, "learning_rate": 1.567539306372392e-05, "loss": 1.3187, "mean_token_accuracy": 0.664209653933843, "num_tokens": 1059610640.0, "step": 6307 }, { "entropy": 1.699594388405482, "epoch": 0.692977396940485, "grad_norm": 0.6624416708946228, "learning_rate": 1.5674024860912082e-05, "loss": 1.3283, "mean_token_accuracy": 0.6614074061314265, "num_tokens": 1059746715.0, "step": 6308 }, { "entropy": 1.6853571037451427, "epoch": 0.693087253851858, "grad_norm": 0.6250735521316528, "learning_rate": 1.5672656510169458e-05, "loss": 1.4415, "mean_token_accuracy": 0.6549326082070669, "num_tokens": 1059928735.0, "step": 6309 }, { "entropy": 1.7501811683177948, "epoch": 0.6931971107632309, "grad_norm": 0.6659023761749268, "learning_rate": 1.5671288011539364e-05, "loss": 1.4479, "mean_token_accuracy": 0.6498491813739141, "num_tokens": 1060079413.0, "step": 6310 }, { "entropy": 1.6326484680175781, "epoch": 0.6933069676746039, "grad_norm": 0.5456228256225586, "learning_rate": 1.5669919365065108e-05, "loss": 1.4142, "mean_token_accuracy": 0.6575722495714823, "num_tokens": 1060259112.0, "step": 6311 }, { "entropy": 1.615660309791565, "epoch": 0.6934168245859768, "grad_norm": 0.8097618222236633, "learning_rate": 1.5668550570790005e-05, "loss": 1.5787, "mean_token_accuracy": 0.6560301234324774, "num_tokens": 1060428273.0, "step": 6312 }, { "entropy": 1.7176474730173747, "epoch": 0.6935266814973498, "grad_norm": 0.8573592901229858, "learning_rate": 1.5667181628757388e-05, "loss": 1.3698, "mean_token_accuracy": 0.6745945662260056, "num_tokens": 1060607857.0, "step": 6313 }, { "entropy": 1.6333107848962147, "epoch": 0.6936365384087226, "grad_norm": 0.6240670680999756, "learning_rate": 1.566581253901057e-05, "loss": 1.2348, "mean_token_accuracy": 0.6824917644262314, "num_tokens": 1060749584.0, "step": 6314 }, { "entropy": 1.7455682655175526, "epoch": 0.6937463953200955, "grad_norm": 0.6654044985771179, "learning_rate": 1.5664443301592887e-05, "loss": 1.4652, "mean_token_accuracy": 0.6368297090133032, "num_tokens": 1060897523.0, "step": 6315 }, { "entropy": 1.7202888826529186, "epoch": 0.6938562522314685, "grad_norm": 0.6972677707672119, "learning_rate": 1.5663073916547676e-05, "loss": 1.3784, "mean_token_accuracy": 0.6670991877714793, "num_tokens": 1061067739.0, "step": 6316 }, { "entropy": 1.6854785978794098, "epoch": 0.6939661091428414, "grad_norm": 1.7558538913726807, "learning_rate": 1.5661704383918277e-05, "loss": 1.4532, "mean_token_accuracy": 0.6418699026107788, "num_tokens": 1061300533.0, "step": 6317 }, { "entropy": 1.6643561919530232, "epoch": 0.6940759660542144, "grad_norm": 0.7051677107810974, "learning_rate": 1.5660334703748037e-05, "loss": 1.3727, "mean_token_accuracy": 0.6603148529926935, "num_tokens": 1061439950.0, "step": 6318 }, { "entropy": 1.7029017508029938, "epoch": 0.6941858229655873, "grad_norm": 0.7083843946456909, "learning_rate": 1.5658964876080304e-05, "loss": 1.3186, "mean_token_accuracy": 0.6736210584640503, "num_tokens": 1061611747.0, "step": 6319 }, { "entropy": 1.6688139041264851, "epoch": 0.6942956798769603, "grad_norm": 0.6067943572998047, "learning_rate": 1.565759490095843e-05, "loss": 1.5312, "mean_token_accuracy": 0.6481608798106512, "num_tokens": 1061835071.0, "step": 6320 }, { "entropy": 1.7166197299957275, "epoch": 0.6944055367883332, "grad_norm": 0.6531895995140076, "learning_rate": 1.5656224778425776e-05, "loss": 1.4703, "mean_token_accuracy": 0.6500556915998459, "num_tokens": 1062056631.0, "step": 6321 }, { "entropy": 1.6937325994173686, "epoch": 0.6945153936997062, "grad_norm": 0.6659431457519531, "learning_rate": 1.565485450852571e-05, "loss": 1.5078, "mean_token_accuracy": 0.6421338419119517, "num_tokens": 1062239900.0, "step": 6322 }, { "entropy": 1.7239821255207062, "epoch": 0.694625250611079, "grad_norm": 0.7423164248466492, "learning_rate": 1.5653484091301588e-05, "loss": 1.2976, "mean_token_accuracy": 0.6711178521315256, "num_tokens": 1062353554.0, "step": 6323 }, { "entropy": 1.633136639992396, "epoch": 0.694735107522452, "grad_norm": 0.6296790242195129, "learning_rate": 1.5652113526796798e-05, "loss": 1.3606, "mean_token_accuracy": 0.6649264395236969, "num_tokens": 1062555732.0, "step": 6324 }, { "entropy": 1.7429419159889221, "epoch": 0.6948449644338249, "grad_norm": 0.910716712474823, "learning_rate": 1.5650742815054706e-05, "loss": 1.4169, "mean_token_accuracy": 0.6470295091470083, "num_tokens": 1062773381.0, "step": 6325 }, { "entropy": 1.6380923291047413, "epoch": 0.6949548213451979, "grad_norm": 0.5968899726867676, "learning_rate": 1.564937195611871e-05, "loss": 1.3611, "mean_token_accuracy": 0.6606245140234629, "num_tokens": 1062970446.0, "step": 6326 }, { "entropy": 1.7097805937131245, "epoch": 0.6950646782565708, "grad_norm": 0.8309935331344604, "learning_rate": 1.5648000950032177e-05, "loss": 1.3135, "mean_token_accuracy": 0.6708834419647852, "num_tokens": 1063075251.0, "step": 6327 }, { "entropy": 1.687490463256836, "epoch": 0.6951745351679438, "grad_norm": 0.6703983545303345, "learning_rate": 1.564662979683851e-05, "loss": 1.3279, "mean_token_accuracy": 0.6707485318183899, "num_tokens": 1063191541.0, "step": 6328 }, { "entropy": 1.6575310031572978, "epoch": 0.6952843920793167, "grad_norm": 0.6347379088401794, "learning_rate": 1.5645258496581105e-05, "loss": 1.4302, "mean_token_accuracy": 0.6440733820199966, "num_tokens": 1063388167.0, "step": 6329 }, { "entropy": 1.6861818035443623, "epoch": 0.6953942489906896, "grad_norm": 0.6363089680671692, "learning_rate": 1.564388704930336e-05, "loss": 1.3444, "mean_token_accuracy": 0.662903368473053, "num_tokens": 1063543659.0, "step": 6330 }, { "entropy": 1.7344481647014618, "epoch": 0.6955041059020626, "grad_norm": 0.8041152358055115, "learning_rate": 1.5642515455048684e-05, "loss": 1.3584, "mean_token_accuracy": 0.686911458770434, "num_tokens": 1063676616.0, "step": 6331 }, { "entropy": 1.7182823022206624, "epoch": 0.6956139628134355, "grad_norm": 0.7374937534332275, "learning_rate": 1.5641143713860485e-05, "loss": 1.406, "mean_token_accuracy": 0.6564443459113439, "num_tokens": 1063821043.0, "step": 6332 }, { "entropy": 1.6801859041055043, "epoch": 0.6957238197248085, "grad_norm": 0.8506401181221008, "learning_rate": 1.563977182578218e-05, "loss": 1.4541, "mean_token_accuracy": 0.6540651917457581, "num_tokens": 1063987781.0, "step": 6333 }, { "entropy": 1.6580947836240132, "epoch": 0.6958336766361813, "grad_norm": 0.5996966361999512, "learning_rate": 1.563839979085719e-05, "loss": 1.3718, "mean_token_accuracy": 0.663277710477511, "num_tokens": 1064197609.0, "step": 6334 }, { "entropy": 1.7448161741097767, "epoch": 0.6959435335475543, "grad_norm": 0.6750478148460388, "learning_rate": 1.563702760912893e-05, "loss": 1.3593, "mean_token_accuracy": 0.6488836805025736, "num_tokens": 1064346954.0, "step": 6335 }, { "entropy": 1.6830095052719116, "epoch": 0.6960533904589272, "grad_norm": 0.7102033495903015, "learning_rate": 1.5635655280640844e-05, "loss": 1.4087, "mean_token_accuracy": 0.6654968212048212, "num_tokens": 1064517282.0, "step": 6336 }, { "entropy": 1.7004373967647552, "epoch": 0.6961632473703002, "grad_norm": 0.6220065355300903, "learning_rate": 1.563428280543635e-05, "loss": 1.363, "mean_token_accuracy": 0.6625643819570541, "num_tokens": 1064668244.0, "step": 6337 }, { "entropy": 1.6941338976224263, "epoch": 0.6962731042816731, "grad_norm": 0.6489022970199585, "learning_rate": 1.5632910183558895e-05, "loss": 1.3424, "mean_token_accuracy": 0.6700575947761536, "num_tokens": 1064823692.0, "step": 6338 }, { "entropy": 1.676298052072525, "epoch": 0.6963829611930461, "grad_norm": 0.7490513920783997, "learning_rate": 1.5631537415051927e-05, "loss": 1.3607, "mean_token_accuracy": 0.6670024891694387, "num_tokens": 1064991450.0, "step": 6339 }, { "entropy": 1.6780159771442413, "epoch": 0.696492818104419, "grad_norm": 0.8336478471755981, "learning_rate": 1.5630164499958876e-05, "loss": 1.5349, "mean_token_accuracy": 0.6541972657044729, "num_tokens": 1065167869.0, "step": 6340 }, { "entropy": 1.693702240784963, "epoch": 0.696602675015792, "grad_norm": 0.6953732967376709, "learning_rate": 1.562879143832321e-05, "loss": 1.2524, "mean_token_accuracy": 0.6819103260835012, "num_tokens": 1065309858.0, "step": 6341 }, { "entropy": 1.684336523214976, "epoch": 0.6967125319271649, "grad_norm": 0.6483939290046692, "learning_rate": 1.562741823018838e-05, "loss": 1.2624, "mean_token_accuracy": 0.6634860585133234, "num_tokens": 1065503318.0, "step": 6342 }, { "entropy": 1.7288965284824371, "epoch": 0.6968223888385378, "grad_norm": 0.7463001012802124, "learning_rate": 1.562604487559785e-05, "loss": 1.5298, "mean_token_accuracy": 0.6451147546370825, "num_tokens": 1065693529.0, "step": 6343 }, { "entropy": 1.7293485403060913, "epoch": 0.6969322457499108, "grad_norm": 0.6564697623252869, "learning_rate": 1.5624671374595083e-05, "loss": 1.3069, "mean_token_accuracy": 0.6628097891807556, "num_tokens": 1065829037.0, "step": 6344 }, { "entropy": 1.738810787598292, "epoch": 0.6970421026612836, "grad_norm": 0.6779906749725342, "learning_rate": 1.5623297727223554e-05, "loss": 1.3215, "mean_token_accuracy": 0.6662501196066538, "num_tokens": 1065959965.0, "step": 6345 }, { "entropy": 1.7129548887411754, "epoch": 0.6971519595726566, "grad_norm": 0.814060628414154, "learning_rate": 1.5621923933526734e-05, "loss": 1.3439, "mean_token_accuracy": 0.6748589227596918, "num_tokens": 1066076653.0, "step": 6346 }, { "entropy": 1.7468621532122295, "epoch": 0.6972618164840295, "grad_norm": 0.6097841858863831, "learning_rate": 1.56205499935481e-05, "loss": 1.4377, "mean_token_accuracy": 0.6586494793494543, "num_tokens": 1066260701.0, "step": 6347 }, { "entropy": 1.6852293213208516, "epoch": 0.6973716733954025, "grad_norm": 0.6476978063583374, "learning_rate": 1.561917590733115e-05, "loss": 1.332, "mean_token_accuracy": 0.6707625389099121, "num_tokens": 1066460345.0, "step": 6348 }, { "entropy": 1.7005867660045624, "epoch": 0.6974815303067754, "grad_norm": 0.6457695364952087, "learning_rate": 1.5617801674919353e-05, "loss": 1.4474, "mean_token_accuracy": 0.649574855963389, "num_tokens": 1066634701.0, "step": 6349 }, { "entropy": 1.6940444807211559, "epoch": 0.6975913872181484, "grad_norm": 0.7139136791229248, "learning_rate": 1.5616427296356217e-05, "loss": 1.3646, "mean_token_accuracy": 0.6607652654250463, "num_tokens": 1066769091.0, "step": 6350 }, { "entropy": 1.6971173187096913, "epoch": 0.6977012441295213, "grad_norm": 0.7305136919021606, "learning_rate": 1.561505277168524e-05, "loss": 1.3967, "mean_token_accuracy": 0.6508905241886774, "num_tokens": 1066944238.0, "step": 6351 }, { "entropy": 1.6950480441252391, "epoch": 0.6978111010408943, "grad_norm": 0.8133467435836792, "learning_rate": 1.561367810094992e-05, "loss": 1.4793, "mean_token_accuracy": 0.6544815003871918, "num_tokens": 1067126672.0, "step": 6352 }, { "entropy": 1.7079233924547832, "epoch": 0.6979209579522672, "grad_norm": 0.7765207290649414, "learning_rate": 1.5612303284193765e-05, "loss": 1.4357, "mean_token_accuracy": 0.6562918275594711, "num_tokens": 1067302213.0, "step": 6353 }, { "entropy": 1.7212556799252827, "epoch": 0.6980308148636402, "grad_norm": 0.7137874364852905, "learning_rate": 1.5610928321460296e-05, "loss": 1.3325, "mean_token_accuracy": 0.6592159370581309, "num_tokens": 1067451247.0, "step": 6354 }, { "entropy": 1.7126038074493408, "epoch": 0.698140671775013, "grad_norm": 0.8555010557174683, "learning_rate": 1.5609553212793018e-05, "loss": 1.4302, "mean_token_accuracy": 0.6475658317406973, "num_tokens": 1067578152.0, "step": 6355 }, { "entropy": 1.695282369852066, "epoch": 0.6982505286863859, "grad_norm": 0.6748037338256836, "learning_rate": 1.5608177958235462e-05, "loss": 1.267, "mean_token_accuracy": 0.6690774112939835, "num_tokens": 1067684477.0, "step": 6356 }, { "entropy": 1.69649139046669, "epoch": 0.6983603855977589, "grad_norm": 0.7423410415649414, "learning_rate": 1.560680255783115e-05, "loss": 1.2596, "mean_token_accuracy": 0.6732803036769232, "num_tokens": 1067800400.0, "step": 6357 }, { "entropy": 1.7296584745248158, "epoch": 0.6984702425091318, "grad_norm": 0.6657732129096985, "learning_rate": 1.560542701162361e-05, "loss": 1.4479, "mean_token_accuracy": 0.6483340859413147, "num_tokens": 1068016618.0, "step": 6358 }, { "entropy": 1.7768322229385376, "epoch": 0.6985800994205048, "grad_norm": 0.7509652972221375, "learning_rate": 1.5604051319656378e-05, "loss": 1.5285, "mean_token_accuracy": 0.6426715403795242, "num_tokens": 1068197524.0, "step": 6359 }, { "entropy": 1.729514628648758, "epoch": 0.6986899563318777, "grad_norm": 0.8853446841239929, "learning_rate": 1.5602675481973003e-05, "loss": 1.3558, "mean_token_accuracy": 0.6598193844159445, "num_tokens": 1068352214.0, "step": 6360 }, { "entropy": 1.714291383822759, "epoch": 0.6987998132432507, "grad_norm": 0.6338637471199036, "learning_rate": 1.5601299498617017e-05, "loss": 1.5695, "mean_token_accuracy": 0.6288912991682688, "num_tokens": 1068538787.0, "step": 6361 }, { "entropy": 1.7057184378306072, "epoch": 0.6989096701546236, "grad_norm": 0.7257465124130249, "learning_rate": 1.5599923369631977e-05, "loss": 1.3388, "mean_token_accuracy": 0.661540021498998, "num_tokens": 1068693499.0, "step": 6362 }, { "entropy": 1.749087264140447, "epoch": 0.6990195270659966, "grad_norm": 0.7464898228645325, "learning_rate": 1.559854709506144e-05, "loss": 1.2842, "mean_token_accuracy": 0.6702013909816742, "num_tokens": 1068847863.0, "step": 6363 }, { "entropy": 1.7329839169979095, "epoch": 0.6991293839773695, "grad_norm": 0.6883919835090637, "learning_rate": 1.5597170674948956e-05, "loss": 1.4929, "mean_token_accuracy": 0.6517574687798818, "num_tokens": 1069021234.0, "step": 6364 }, { "entropy": 1.7091910441716511, "epoch": 0.6992392408887425, "grad_norm": 0.5777117013931274, "learning_rate": 1.5595794109338087e-05, "loss": 1.4065, "mean_token_accuracy": 0.6439725557963053, "num_tokens": 1069203920.0, "step": 6365 }, { "entropy": 1.6956571837266285, "epoch": 0.6993490978001153, "grad_norm": 0.6748632192611694, "learning_rate": 1.559441739827241e-05, "loss": 1.4705, "mean_token_accuracy": 0.6408219436804453, "num_tokens": 1069401303.0, "step": 6366 }, { "entropy": 1.768018513917923, "epoch": 0.6994589547114883, "grad_norm": 0.6776396036148071, "learning_rate": 1.5593040541795494e-05, "loss": 1.415, "mean_token_accuracy": 0.6665412137905756, "num_tokens": 1069527841.0, "step": 6367 }, { "entropy": 1.6495687067508698, "epoch": 0.6995688116228612, "grad_norm": 0.6302627921104431, "learning_rate": 1.559166353995091e-05, "loss": 1.421, "mean_token_accuracy": 0.6526271998882294, "num_tokens": 1069725307.0, "step": 6368 }, { "entropy": 1.7440635164578755, "epoch": 0.6996786685342341, "grad_norm": 0.6958877444267273, "learning_rate": 1.559028639278225e-05, "loss": 1.4643, "mean_token_accuracy": 0.6413827786842982, "num_tokens": 1069924701.0, "step": 6369 }, { "entropy": 1.7379199266433716, "epoch": 0.6997885254456071, "grad_norm": 0.7230368256568909, "learning_rate": 1.5588909100333093e-05, "loss": 1.4683, "mean_token_accuracy": 0.6515718946854273, "num_tokens": 1070076085.0, "step": 6370 }, { "entropy": 1.6385211845239003, "epoch": 0.69989838235698, "grad_norm": 0.628541886806488, "learning_rate": 1.5587531662647025e-05, "loss": 1.4062, "mean_token_accuracy": 0.6495350897312164, "num_tokens": 1070269052.0, "step": 6371 }, { "entropy": 1.7529702385266621, "epoch": 0.700008239268353, "grad_norm": 0.6730430126190186, "learning_rate": 1.558615407976765e-05, "loss": 1.3968, "mean_token_accuracy": 0.6596626192331314, "num_tokens": 1070390227.0, "step": 6372 }, { "entropy": 1.6995338002840679, "epoch": 0.7001180961797259, "grad_norm": 0.600246250629425, "learning_rate": 1.5584776351738568e-05, "loss": 1.4458, "mean_token_accuracy": 0.6408328165610632, "num_tokens": 1070624225.0, "step": 6373 }, { "entropy": 1.6173172891139984, "epoch": 0.7002279530910989, "grad_norm": 0.7701708674430847, "learning_rate": 1.5583398478603375e-05, "loss": 1.3347, "mean_token_accuracy": 0.6675042559703191, "num_tokens": 1070802383.0, "step": 6374 }, { "entropy": 1.6439649661382039, "epoch": 0.7003378100024718, "grad_norm": 0.6842703819274902, "learning_rate": 1.558202046040569e-05, "loss": 1.3865, "mean_token_accuracy": 0.6518467565377554, "num_tokens": 1070971297.0, "step": 6375 }, { "entropy": 1.7387069861094158, "epoch": 0.7004476669138447, "grad_norm": 0.7097147107124329, "learning_rate": 1.5580642297189122e-05, "loss": 1.3293, "mean_token_accuracy": 0.6582437654336294, "num_tokens": 1071111781.0, "step": 6376 }, { "entropy": 1.7173890272776287, "epoch": 0.7005575238252176, "grad_norm": 0.706751823425293, "learning_rate": 1.5579263988997286e-05, "loss": 1.4515, "mean_token_accuracy": 0.6454547345638275, "num_tokens": 1071299496.0, "step": 6377 }, { "entropy": 1.7025466759999592, "epoch": 0.7006673807365906, "grad_norm": 0.7652823328971863, "learning_rate": 1.5577885535873813e-05, "loss": 1.3607, "mean_token_accuracy": 0.6740467697381973, "num_tokens": 1071422802.0, "step": 6378 }, { "entropy": 1.7247331937154133, "epoch": 0.7007772376479635, "grad_norm": 0.6709319353103638, "learning_rate": 1.5576506937862322e-05, "loss": 1.3397, "mean_token_accuracy": 0.6705234696467718, "num_tokens": 1071594636.0, "step": 6379 }, { "entropy": 1.7250055472056072, "epoch": 0.7008870945593365, "grad_norm": 0.6866453289985657, "learning_rate": 1.5575128195006452e-05, "loss": 1.4093, "mean_token_accuracy": 0.6612274398406347, "num_tokens": 1071753971.0, "step": 6380 }, { "entropy": 1.7455697258313496, "epoch": 0.7009969514707094, "grad_norm": 0.8073441982269287, "learning_rate": 1.5573749307349832e-05, "loss": 1.5399, "mean_token_accuracy": 0.629800001780192, "num_tokens": 1071920504.0, "step": 6381 }, { "entropy": 1.7188852628072102, "epoch": 0.7011068083820824, "grad_norm": 0.7286099195480347, "learning_rate": 1.5572370274936112e-05, "loss": 1.3478, "mean_token_accuracy": 0.66085384786129, "num_tokens": 1072063218.0, "step": 6382 }, { "entropy": 1.7210610608259838, "epoch": 0.7012166652934553, "grad_norm": 0.886602520942688, "learning_rate": 1.5570991097808926e-05, "loss": 1.3156, "mean_token_accuracy": 0.6739104390144348, "num_tokens": 1072190834.0, "step": 6383 }, { "entropy": 1.6681243975957234, "epoch": 0.7013265222048282, "grad_norm": 0.7629004716873169, "learning_rate": 1.5569611776011936e-05, "loss": 1.3262, "mean_token_accuracy": 0.6660947451988856, "num_tokens": 1072319190.0, "step": 6384 }, { "entropy": 1.7308462460835774, "epoch": 0.7014363791162012, "grad_norm": 0.7029445767402649, "learning_rate": 1.5568232309588793e-05, "loss": 1.5264, "mean_token_accuracy": 0.6421166161696116, "num_tokens": 1072545984.0, "step": 6385 }, { "entropy": 1.7237797677516937, "epoch": 0.701546236027574, "grad_norm": 0.6271055936813354, "learning_rate": 1.5566852698583156e-05, "loss": 1.4193, "mean_token_accuracy": 0.6527849485476812, "num_tokens": 1072742663.0, "step": 6386 }, { "entropy": 1.7033430834611256, "epoch": 0.701656092938947, "grad_norm": 0.851382851600647, "learning_rate": 1.5565472943038686e-05, "loss": 1.3205, "mean_token_accuracy": 0.6494818925857544, "num_tokens": 1072892465.0, "step": 6387 }, { "entropy": 1.7225984930992126, "epoch": 0.7017659498503199, "grad_norm": 0.8029654622077942, "learning_rate": 1.5564093042999058e-05, "loss": 1.2164, "mean_token_accuracy": 0.6834103514750799, "num_tokens": 1073004684.0, "step": 6388 }, { "entropy": 1.660697062810262, "epoch": 0.7018758067616929, "grad_norm": 0.654461145401001, "learning_rate": 1.556271299850794e-05, "loss": 1.2874, "mean_token_accuracy": 0.6679888367652893, "num_tokens": 1073149632.0, "step": 6389 }, { "entropy": 1.7544045547644298, "epoch": 0.7019856636730658, "grad_norm": 0.7389849424362183, "learning_rate": 1.5561332809609013e-05, "loss": 1.4401, "mean_token_accuracy": 0.6510027199983597, "num_tokens": 1073278621.0, "step": 6390 }, { "entropy": 1.713613510131836, "epoch": 0.7020955205844388, "grad_norm": 0.6665468215942383, "learning_rate": 1.5559952476345958e-05, "loss": 1.3568, "mean_token_accuracy": 0.6602018525203069, "num_tokens": 1073419861.0, "step": 6391 }, { "entropy": 1.7007086873054504, "epoch": 0.7022053774958117, "grad_norm": 0.5884419083595276, "learning_rate": 1.555857199876246e-05, "loss": 1.4787, "mean_token_accuracy": 0.63471091290315, "num_tokens": 1073629064.0, "step": 6392 }, { "entropy": 1.6845936278502147, "epoch": 0.7023152344071847, "grad_norm": 0.6721514463424683, "learning_rate": 1.5557191376902214e-05, "loss": 1.5321, "mean_token_accuracy": 0.6354875167210897, "num_tokens": 1073831920.0, "step": 6393 }, { "entropy": 1.6878847082455952, "epoch": 0.7024250913185576, "grad_norm": 0.787539005279541, "learning_rate": 1.5555810610808914e-05, "loss": 1.3595, "mean_token_accuracy": 0.6564808338880539, "num_tokens": 1073990510.0, "step": 6394 }, { "entropy": 1.734977275133133, "epoch": 0.7025349482299306, "grad_norm": 0.7654755711555481, "learning_rate": 1.555442970052626e-05, "loss": 1.4424, "mean_token_accuracy": 0.666431744893392, "num_tokens": 1074150701.0, "step": 6395 }, { "entropy": 1.6856712996959686, "epoch": 0.7026448051413035, "grad_norm": 0.7252474427223206, "learning_rate": 1.5553048646097958e-05, "loss": 1.4068, "mean_token_accuracy": 0.6496947507063547, "num_tokens": 1074315075.0, "step": 6396 }, { "entropy": 1.7249629298845928, "epoch": 0.7027546620526763, "grad_norm": 0.7137119174003601, "learning_rate": 1.555166744756772e-05, "loss": 1.4618, "mean_token_accuracy": 0.6392665853103002, "num_tokens": 1074445490.0, "step": 6397 }, { "entropy": 1.6709438264369965, "epoch": 0.7028645189640493, "grad_norm": 0.6605518460273743, "learning_rate": 1.555028610497926e-05, "loss": 1.4832, "mean_token_accuracy": 0.6422385623057684, "num_tokens": 1074664978.0, "step": 6398 }, { "entropy": 1.6293116410573323, "epoch": 0.7029743758754222, "grad_norm": 0.5970544815063477, "learning_rate": 1.554890461837629e-05, "loss": 1.3538, "mean_token_accuracy": 0.6604219327370325, "num_tokens": 1074807024.0, "step": 6399 }, { "entropy": 1.7081689337889354, "epoch": 0.7030842327867952, "grad_norm": 0.759528636932373, "learning_rate": 1.5547522987802542e-05, "loss": 1.4654, "mean_token_accuracy": 0.6531898428996404, "num_tokens": 1074948098.0, "step": 6400 }, { "entropy": 1.6969364682833354, "epoch": 0.7031940896981681, "grad_norm": 0.7736058235168457, "learning_rate": 1.554614121330174e-05, "loss": 1.3684, "mean_token_accuracy": 0.6516063958406448, "num_tokens": 1075134536.0, "step": 6401 }, { "entropy": 1.7077515522638957, "epoch": 0.7033039466095411, "grad_norm": 0.5851559042930603, "learning_rate": 1.5544759294917616e-05, "loss": 1.3913, "mean_token_accuracy": 0.6567753752072653, "num_tokens": 1075319222.0, "step": 6402 }, { "entropy": 1.6978352069854736, "epoch": 0.703413803520914, "grad_norm": 0.7662501931190491, "learning_rate": 1.554337723269391e-05, "loss": 1.3474, "mean_token_accuracy": 0.666194369395574, "num_tokens": 1075447222.0, "step": 6403 }, { "entropy": 1.7218878070513408, "epoch": 0.703523660432287, "grad_norm": 0.6417670249938965, "learning_rate": 1.5541995026674363e-05, "loss": 1.4205, "mean_token_accuracy": 0.6567677110433578, "num_tokens": 1075603408.0, "step": 6404 }, { "entropy": 1.6632341345151265, "epoch": 0.7036335173436599, "grad_norm": 0.7193872332572937, "learning_rate": 1.5540612676902715e-05, "loss": 1.328, "mean_token_accuracy": 0.6630524943272272, "num_tokens": 1075726060.0, "step": 6405 }, { "entropy": 1.794555813074112, "epoch": 0.7037433742550329, "grad_norm": 0.7477422952651978, "learning_rate": 1.5539230183422725e-05, "loss": 1.3365, "mean_token_accuracy": 0.6674585938453674, "num_tokens": 1075847782.0, "step": 6406 }, { "entropy": 1.7614405552546184, "epoch": 0.7038532311664057, "grad_norm": 0.624266505241394, "learning_rate": 1.5537847546278145e-05, "loss": 1.3842, "mean_token_accuracy": 0.653992493947347, "num_tokens": 1076038754.0, "step": 6407 }, { "entropy": 1.741749346256256, "epoch": 0.7039630880777787, "grad_norm": 0.7164651155471802, "learning_rate": 1.553646476551274e-05, "loss": 1.6044, "mean_token_accuracy": 0.6402417123317719, "num_tokens": 1076211380.0, "step": 6408 }, { "entropy": 1.7253131071726482, "epoch": 0.7040729449891516, "grad_norm": 0.669684648513794, "learning_rate": 1.5535081841170257e-05, "loss": 1.5255, "mean_token_accuracy": 0.6500623474518458, "num_tokens": 1076421027.0, "step": 6409 }, { "entropy": 1.7519688804944356, "epoch": 0.7041828019005245, "grad_norm": 0.5820850133895874, "learning_rate": 1.553369877329449e-05, "loss": 1.3844, "mean_token_accuracy": 0.650462418794632, "num_tokens": 1076583424.0, "step": 6410 }, { "entropy": 1.6753594875335693, "epoch": 0.7042926588118975, "grad_norm": 0.6438754200935364, "learning_rate": 1.5532315561929194e-05, "loss": 1.3457, "mean_token_accuracy": 0.6627227415641149, "num_tokens": 1076765313.0, "step": 6411 }, { "entropy": 1.7350413004557292, "epoch": 0.7044025157232704, "grad_norm": 0.7367886900901794, "learning_rate": 1.553093220711815e-05, "loss": 1.5004, "mean_token_accuracy": 0.6472184459368387, "num_tokens": 1076924086.0, "step": 6412 }, { "entropy": 1.6722463369369507, "epoch": 0.7045123726346434, "grad_norm": 0.7393024563789368, "learning_rate": 1.552954870890515e-05, "loss": 1.357, "mean_token_accuracy": 0.663534477353096, "num_tokens": 1077094084.0, "step": 6413 }, { "entropy": 1.665319134791692, "epoch": 0.7046222295460163, "grad_norm": 0.8351560235023499, "learning_rate": 1.5528165067333972e-05, "loss": 1.4145, "mean_token_accuracy": 0.6641974002122879, "num_tokens": 1077301938.0, "step": 6414 }, { "entropy": 1.665222058693568, "epoch": 0.7047320864573893, "grad_norm": 0.6075441837310791, "learning_rate": 1.5526781282448408e-05, "loss": 1.3895, "mean_token_accuracy": 0.6595604221026102, "num_tokens": 1077518144.0, "step": 6415 }, { "entropy": 1.6636256277561188, "epoch": 0.7048419433687622, "grad_norm": 0.6443570852279663, "learning_rate": 1.5525397354292256e-05, "loss": 1.2649, "mean_token_accuracy": 0.6825617849826813, "num_tokens": 1077663050.0, "step": 6416 }, { "entropy": 1.7030467987060547, "epoch": 0.7049518002801352, "grad_norm": 0.6067739129066467, "learning_rate": 1.5524013282909317e-05, "loss": 1.4999, "mean_token_accuracy": 0.6428120483954748, "num_tokens": 1077865926.0, "step": 6417 }, { "entropy": 1.6580960551897685, "epoch": 0.705061657191508, "grad_norm": 0.657632052898407, "learning_rate": 1.5522629068343398e-05, "loss": 1.2896, "mean_token_accuracy": 0.6659079343080521, "num_tokens": 1078018210.0, "step": 6418 }, { "entropy": 1.743414322535197, "epoch": 0.705171514102881, "grad_norm": 0.7279876470565796, "learning_rate": 1.5521244710638308e-05, "loss": 1.3474, "mean_token_accuracy": 0.6605549802382787, "num_tokens": 1078149814.0, "step": 6419 }, { "entropy": 1.6969486773014069, "epoch": 0.7052813710142539, "grad_norm": 0.8344591856002808, "learning_rate": 1.5519860209837858e-05, "loss": 1.3424, "mean_token_accuracy": 0.670002485315005, "num_tokens": 1078317881.0, "step": 6420 }, { "entropy": 1.6994330783685048, "epoch": 0.7053912279256269, "grad_norm": 0.6680699586868286, "learning_rate": 1.551847556598587e-05, "loss": 1.3801, "mean_token_accuracy": 0.669528936346372, "num_tokens": 1078477165.0, "step": 6421 }, { "entropy": 1.666198472181956, "epoch": 0.7055010848369998, "grad_norm": 0.6620866656303406, "learning_rate": 1.5517090779126164e-05, "loss": 1.3215, "mean_token_accuracy": 0.6595088789860407, "num_tokens": 1078619965.0, "step": 6422 }, { "entropy": 1.7299580574035645, "epoch": 0.7056109417483728, "grad_norm": 0.7888288497924805, "learning_rate": 1.5515705849302574e-05, "loss": 1.2851, "mean_token_accuracy": 0.6729756246010462, "num_tokens": 1078758890.0, "step": 6423 }, { "entropy": 1.7276004652182262, "epoch": 0.7057207986597457, "grad_norm": 0.7002907991409302, "learning_rate": 1.5514320776558928e-05, "loss": 1.4228, "mean_token_accuracy": 0.6579409589370092, "num_tokens": 1078958010.0, "step": 6424 }, { "entropy": 1.6907508472601573, "epoch": 0.7058306555711186, "grad_norm": 0.632900595664978, "learning_rate": 1.551293556093906e-05, "loss": 1.4111, "mean_token_accuracy": 0.6516719460487366, "num_tokens": 1079164270.0, "step": 6425 }, { "entropy": 1.7010155816872914, "epoch": 0.7059405124824916, "grad_norm": 0.6720937490463257, "learning_rate": 1.551155020248682e-05, "loss": 1.2768, "mean_token_accuracy": 0.6753781239191691, "num_tokens": 1079285399.0, "step": 6426 }, { "entropy": 1.6723153193791707, "epoch": 0.7060503693938645, "grad_norm": 0.8205432295799255, "learning_rate": 1.5510164701246045e-05, "loss": 1.4409, "mean_token_accuracy": 0.655280739068985, "num_tokens": 1079479151.0, "step": 6427 }, { "entropy": 1.731699009736379, "epoch": 0.7061602263052374, "grad_norm": 0.6112235188484192, "learning_rate": 1.550877905726059e-05, "loss": 1.4701, "mean_token_accuracy": 0.6529090950886408, "num_tokens": 1079685790.0, "step": 6428 }, { "entropy": 1.746723433335622, "epoch": 0.7062700832166103, "grad_norm": 0.7355782985687256, "learning_rate": 1.5507393270574315e-05, "loss": 1.3453, "mean_token_accuracy": 0.6606174210707346, "num_tokens": 1079837134.0, "step": 6429 }, { "entropy": 1.7096926669279735, "epoch": 0.7063799401279833, "grad_norm": 0.7809394001960754, "learning_rate": 1.5506007341231068e-05, "loss": 1.3517, "mean_token_accuracy": 0.6668333212534586, "num_tokens": 1079974934.0, "step": 6430 }, { "entropy": 1.7233172257741292, "epoch": 0.7064897970393562, "grad_norm": 0.9166316390037537, "learning_rate": 1.550462126927472e-05, "loss": 1.3369, "mean_token_accuracy": 0.6595128228267034, "num_tokens": 1080104310.0, "step": 6431 }, { "entropy": 1.7122790416081746, "epoch": 0.7065996539507292, "grad_norm": 0.8295903205871582, "learning_rate": 1.550323505474914e-05, "loss": 1.5162, "mean_token_accuracy": 0.6423207471768061, "num_tokens": 1080270460.0, "step": 6432 }, { "entropy": 1.6761127014954884, "epoch": 0.7067095108621021, "grad_norm": 0.9063708782196045, "learning_rate": 1.55018486976982e-05, "loss": 1.5275, "mean_token_accuracy": 0.6545315235853195, "num_tokens": 1080457268.0, "step": 6433 }, { "entropy": 1.7378019988536835, "epoch": 0.7068193677734751, "grad_norm": 0.6509607434272766, "learning_rate": 1.5500462198165778e-05, "loss": 1.4512, "mean_token_accuracy": 0.6503981401522955, "num_tokens": 1080644349.0, "step": 6434 }, { "entropy": 1.60017196337382, "epoch": 0.706929224684848, "grad_norm": 0.6047476530075073, "learning_rate": 1.5499075556195752e-05, "loss": 1.3142, "mean_token_accuracy": 0.6764021714528402, "num_tokens": 1080833259.0, "step": 6435 }, { "entropy": 1.7449369231859844, "epoch": 0.707039081596221, "grad_norm": 0.7219707369804382, "learning_rate": 1.5497688771832017e-05, "loss": 1.3236, "mean_token_accuracy": 0.6659722030162811, "num_tokens": 1080997620.0, "step": 6436 }, { "entropy": 1.7626902063687642, "epoch": 0.7071489385075939, "grad_norm": 0.7538440823554993, "learning_rate": 1.549630184511845e-05, "loss": 1.5468, "mean_token_accuracy": 0.6329626242319742, "num_tokens": 1081153212.0, "step": 6437 }, { "entropy": 1.6917728781700134, "epoch": 0.7072587954189667, "grad_norm": 0.7224356532096863, "learning_rate": 1.5494914776098967e-05, "loss": 1.6, "mean_token_accuracy": 0.6502460787693659, "num_tokens": 1081310174.0, "step": 6438 }, { "entropy": 1.7162715196609497, "epoch": 0.7073686523303397, "grad_norm": 0.7522397637367249, "learning_rate": 1.549352756481745e-05, "loss": 1.3803, "mean_token_accuracy": 0.6606674641370773, "num_tokens": 1081482148.0, "step": 6439 }, { "entropy": 1.6705755194028218, "epoch": 0.7074785092417126, "grad_norm": 0.7202532291412354, "learning_rate": 1.5492140211317813e-05, "loss": 1.355, "mean_token_accuracy": 0.6613196780284246, "num_tokens": 1081621256.0, "step": 6440 }, { "entropy": 1.6998901466528575, "epoch": 0.7075883661530856, "grad_norm": 0.6419969797134399, "learning_rate": 1.549075271564396e-05, "loss": 1.3242, "mean_token_accuracy": 0.6660476873318354, "num_tokens": 1081781478.0, "step": 6441 }, { "entropy": 1.714255303144455, "epoch": 0.7076982230644585, "grad_norm": 0.6204527020454407, "learning_rate": 1.548936507783981e-05, "loss": 1.3608, "mean_token_accuracy": 0.655594398578008, "num_tokens": 1081946139.0, "step": 6442 }, { "entropy": 1.6719560623168945, "epoch": 0.7078080799758315, "grad_norm": 0.6580803394317627, "learning_rate": 1.5487977297949276e-05, "loss": 1.4012, "mean_token_accuracy": 0.6551670630772909, "num_tokens": 1082090613.0, "step": 6443 }, { "entropy": 1.6676383117834728, "epoch": 0.7079179368872044, "grad_norm": 0.6761298179626465, "learning_rate": 1.5486589376016284e-05, "loss": 1.3466, "mean_token_accuracy": 0.671358272433281, "num_tokens": 1082206340.0, "step": 6444 }, { "entropy": 1.7442449033260345, "epoch": 0.7080277937985774, "grad_norm": 0.6918967962265015, "learning_rate": 1.548520131208476e-05, "loss": 1.3748, "mean_token_accuracy": 0.6563903441031774, "num_tokens": 1082338856.0, "step": 6445 }, { "entropy": 1.6667213837305705, "epoch": 0.7081376507099503, "grad_norm": 0.5648940205574036, "learning_rate": 1.5483813106198634e-05, "loss": 1.3901, "mean_token_accuracy": 0.6636083672444025, "num_tokens": 1082552762.0, "step": 6446 }, { "entropy": 1.6465531090895336, "epoch": 0.7082475076213233, "grad_norm": 0.7094516754150391, "learning_rate": 1.5482424758401847e-05, "loss": 1.3485, "mean_token_accuracy": 0.6755081762870153, "num_tokens": 1082720364.0, "step": 6447 }, { "entropy": 1.7297605971495311, "epoch": 0.7083573645326962, "grad_norm": 0.6798611283302307, "learning_rate": 1.5481036268738334e-05, "loss": 1.3216, "mean_token_accuracy": 0.6573556611935297, "num_tokens": 1082836557.0, "step": 6448 }, { "entropy": 1.6604599058628082, "epoch": 0.7084672214440692, "grad_norm": 0.659517228603363, "learning_rate": 1.547964763725204e-05, "loss": 1.3595, "mean_token_accuracy": 0.6587564200162888, "num_tokens": 1082997407.0, "step": 6449 }, { "entropy": 1.7796454230944316, "epoch": 0.708577078355442, "grad_norm": 0.7438165545463562, "learning_rate": 1.547825886398692e-05, "loss": 1.4302, "mean_token_accuracy": 0.6503714273373286, "num_tokens": 1083170024.0, "step": 6450 }, { "entropy": 1.6703903377056122, "epoch": 0.7086869352668149, "grad_norm": 0.7155895233154297, "learning_rate": 1.5476869948986925e-05, "loss": 1.5655, "mean_token_accuracy": 0.6468634754419327, "num_tokens": 1083357698.0, "step": 6451 }, { "entropy": 1.7359613676865895, "epoch": 0.7087967921781879, "grad_norm": 0.6262257695198059, "learning_rate": 1.5475480892296013e-05, "loss": 1.5612, "mean_token_accuracy": 0.628671204050382, "num_tokens": 1083543537.0, "step": 6452 }, { "entropy": 1.678429941336314, "epoch": 0.7089066490895608, "grad_norm": 0.7694371342658997, "learning_rate": 1.5474091693958146e-05, "loss": 1.3765, "mean_token_accuracy": 0.6632258395353953, "num_tokens": 1083695268.0, "step": 6453 }, { "entropy": 1.6886884073416393, "epoch": 0.7090165060009338, "grad_norm": 0.733223021030426, "learning_rate": 1.5472702354017296e-05, "loss": 1.3159, "mean_token_accuracy": 0.6743916422128677, "num_tokens": 1083825522.0, "step": 6454 }, { "entropy": 1.7432435353597004, "epoch": 0.7091263629123067, "grad_norm": 0.7304172515869141, "learning_rate": 1.547131287251743e-05, "loss": 1.2952, "mean_token_accuracy": 0.6635237882534663, "num_tokens": 1084003481.0, "step": 6455 }, { "entropy": 1.6784548958142598, "epoch": 0.7092362198236797, "grad_norm": 0.7074428200721741, "learning_rate": 1.5469923249502525e-05, "loss": 1.5661, "mean_token_accuracy": 0.6370118310054144, "num_tokens": 1084177783.0, "step": 6456 }, { "entropy": 1.6755680044492085, "epoch": 0.7093460767350526, "grad_norm": 0.6471802592277527, "learning_rate": 1.5468533485016564e-05, "loss": 1.4748, "mean_token_accuracy": 0.634057030081749, "num_tokens": 1084376301.0, "step": 6457 }, { "entropy": 1.6814270714918773, "epoch": 0.7094559336464256, "grad_norm": 0.6327021718025208, "learning_rate": 1.5467143579103535e-05, "loss": 1.3108, "mean_token_accuracy": 0.6754618585109711, "num_tokens": 1084510548.0, "step": 6458 }, { "entropy": 1.6858037908871968, "epoch": 0.7095657905577984, "grad_norm": 0.7395240664482117, "learning_rate": 1.546575353180742e-05, "loss": 1.239, "mean_token_accuracy": 0.6764448136091232, "num_tokens": 1084615745.0, "step": 6459 }, { "entropy": 1.7345422605673473, "epoch": 0.7096756474691714, "grad_norm": 0.628578245639801, "learning_rate": 1.5464363343172223e-05, "loss": 1.6155, "mean_token_accuracy": 0.6141057461500168, "num_tokens": 1084869481.0, "step": 6460 }, { "entropy": 1.6727848052978516, "epoch": 0.7097855043805443, "grad_norm": 0.7392853498458862, "learning_rate": 1.5462973013241934e-05, "loss": 1.4858, "mean_token_accuracy": 0.6492108752330145, "num_tokens": 1085037273.0, "step": 6461 }, { "entropy": 1.7004869282245636, "epoch": 0.7098953612919173, "grad_norm": 0.7187851071357727, "learning_rate": 1.546158254206056e-05, "loss": 1.478, "mean_token_accuracy": 0.6374183098475138, "num_tokens": 1085232886.0, "step": 6462 }, { "entropy": 1.6797158320744832, "epoch": 0.7100052182032902, "grad_norm": 0.7203065752983093, "learning_rate": 1.546019192967211e-05, "loss": 1.5025, "mean_token_accuracy": 0.6410057172179222, "num_tokens": 1085413273.0, "step": 6463 }, { "entropy": 1.742477943499883, "epoch": 0.7101150751146631, "grad_norm": 0.7050339579582214, "learning_rate": 1.5458801176120597e-05, "loss": 1.4738, "mean_token_accuracy": 0.6423740684986115, "num_tokens": 1085552171.0, "step": 6464 }, { "entropy": 1.7188653250535328, "epoch": 0.7102249320260361, "grad_norm": 0.7164783477783203, "learning_rate": 1.5457410281450034e-05, "loss": 1.506, "mean_token_accuracy": 0.6500173856814703, "num_tokens": 1085724958.0, "step": 6465 }, { "entropy": 1.76152570048968, "epoch": 0.710334788937409, "grad_norm": 0.7042224407196045, "learning_rate": 1.5456019245704445e-05, "loss": 1.451, "mean_token_accuracy": 0.6454960157473882, "num_tokens": 1085889597.0, "step": 6466 }, { "entropy": 1.666865090529124, "epoch": 0.710444645848782, "grad_norm": 0.6657201051712036, "learning_rate": 1.5454628068927854e-05, "loss": 1.3749, "mean_token_accuracy": 0.6552201559146246, "num_tokens": 1086027428.0, "step": 6467 }, { "entropy": 1.7004301051298778, "epoch": 0.7105545027601549, "grad_norm": 0.6426774263381958, "learning_rate": 1.5453236751164293e-05, "loss": 1.4649, "mean_token_accuracy": 0.6523342033227285, "num_tokens": 1086215530.0, "step": 6468 }, { "entropy": 1.680479904015859, "epoch": 0.7106643596715279, "grad_norm": 0.6741653680801392, "learning_rate": 1.5451845292457793e-05, "loss": 1.3898, "mean_token_accuracy": 0.6664047390222549, "num_tokens": 1086377606.0, "step": 6469 }, { "entropy": 1.6704957087834675, "epoch": 0.7107742165829007, "grad_norm": 0.7036837339401245, "learning_rate": 1.54504536928524e-05, "loss": 1.3221, "mean_token_accuracy": 0.6773081024487814, "num_tokens": 1086513650.0, "step": 6470 }, { "entropy": 1.6654736300309498, "epoch": 0.7108840734942737, "grad_norm": 0.642352819442749, "learning_rate": 1.5449061952392148e-05, "loss": 1.366, "mean_token_accuracy": 0.6635124981403351, "num_tokens": 1086681158.0, "step": 6471 }, { "entropy": 1.6999973754088085, "epoch": 0.7109939304056466, "grad_norm": 0.5569196939468384, "learning_rate": 1.5447670071121093e-05, "loss": 1.3281, "mean_token_accuracy": 0.6624209682146708, "num_tokens": 1086840513.0, "step": 6472 }, { "entropy": 1.7996805508931477, "epoch": 0.7111037873170196, "grad_norm": 0.66071617603302, "learning_rate": 1.5446278049083284e-05, "loss": 1.6934, "mean_token_accuracy": 0.6085737546284994, "num_tokens": 1087130957.0, "step": 6473 }, { "entropy": 1.734180857737859, "epoch": 0.7112136442283925, "grad_norm": 0.7350216507911682, "learning_rate": 1.5444885886322778e-05, "loss": 1.5484, "mean_token_accuracy": 0.6372219175100327, "num_tokens": 1087298330.0, "step": 6474 }, { "entropy": 1.7409184575080872, "epoch": 0.7113235011397655, "grad_norm": 0.6457538604736328, "learning_rate": 1.544349358288364e-05, "loss": 1.3968, "mean_token_accuracy": 0.6600100994110107, "num_tokens": 1087439869.0, "step": 6475 }, { "entropy": 1.747595449288686, "epoch": 0.7114333580511384, "grad_norm": 0.8765959739685059, "learning_rate": 1.5442101138809928e-05, "loss": 1.3413, "mean_token_accuracy": 0.6616929272810618, "num_tokens": 1087554965.0, "step": 6476 }, { "entropy": 1.775346169869105, "epoch": 0.7115432149625114, "grad_norm": 0.7475898265838623, "learning_rate": 1.5440708554145713e-05, "loss": 1.546, "mean_token_accuracy": 0.6328155199686686, "num_tokens": 1087769878.0, "step": 6477 }, { "entropy": 1.6867960194746654, "epoch": 0.7116530718738843, "grad_norm": 0.6512202620506287, "learning_rate": 1.5439315828935083e-05, "loss": 1.3724, "mean_token_accuracy": 0.6561179707447687, "num_tokens": 1087958250.0, "step": 6478 }, { "entropy": 1.7009850045045216, "epoch": 0.7117629287852572, "grad_norm": 0.6758841872215271, "learning_rate": 1.54379229632221e-05, "loss": 1.422, "mean_token_accuracy": 0.6556328684091568, "num_tokens": 1088149996.0, "step": 6479 }, { "entropy": 1.7897284130255382, "epoch": 0.7118727856966302, "grad_norm": 0.84705650806427, "learning_rate": 1.5436529957050858e-05, "loss": 1.2395, "mean_token_accuracy": 0.6755161037047704, "num_tokens": 1088256500.0, "step": 6480 }, { "entropy": 1.6992632150650024, "epoch": 0.711982642608003, "grad_norm": 0.8954823613166809, "learning_rate": 1.543513681046544e-05, "loss": 1.4168, "mean_token_accuracy": 0.6409216324488322, "num_tokens": 1088463599.0, "step": 6481 }, { "entropy": 1.7095726033051808, "epoch": 0.712092499519376, "grad_norm": 0.88639235496521, "learning_rate": 1.5433743523509945e-05, "loss": 1.365, "mean_token_accuracy": 0.6730570693810781, "num_tokens": 1088601030.0, "step": 6482 }, { "entropy": 1.7022682825724285, "epoch": 0.7122023564307489, "grad_norm": 0.5840624570846558, "learning_rate": 1.543235009622846e-05, "loss": 1.5923, "mean_token_accuracy": 0.6384105285008749, "num_tokens": 1088900721.0, "step": 6483 }, { "entropy": 1.6645398636658986, "epoch": 0.7123122133421219, "grad_norm": 0.5561464428901672, "learning_rate": 1.5430956528665095e-05, "loss": 1.4083, "mean_token_accuracy": 0.6542864640553793, "num_tokens": 1089140437.0, "step": 6484 }, { "entropy": 1.7146425247192383, "epoch": 0.7124220702534948, "grad_norm": 0.6508673429489136, "learning_rate": 1.5429562820863954e-05, "loss": 1.4767, "mean_token_accuracy": 0.6512588014205297, "num_tokens": 1089324853.0, "step": 6485 }, { "entropy": 1.6801698704560597, "epoch": 0.7125319271648678, "grad_norm": 0.6502287983894348, "learning_rate": 1.542816897286914e-05, "loss": 1.2781, "mean_token_accuracy": 0.675328845779101, "num_tokens": 1089463656.0, "step": 6486 }, { "entropy": 1.6508076985677083, "epoch": 0.7126417840762407, "grad_norm": 0.7556900978088379, "learning_rate": 1.5426774984724775e-05, "loss": 1.3662, "mean_token_accuracy": 0.6651237408320109, "num_tokens": 1089678421.0, "step": 6487 }, { "entropy": 1.651640792687734, "epoch": 0.7127516409876137, "grad_norm": 0.8668679594993591, "learning_rate": 1.542538085647498e-05, "loss": 1.4303, "mean_token_accuracy": 0.6480504920085272, "num_tokens": 1089818077.0, "step": 6488 }, { "entropy": 1.7374186714490254, "epoch": 0.7128614978989866, "grad_norm": 0.7118296027183533, "learning_rate": 1.542398658816387e-05, "loss": 1.4924, "mean_token_accuracy": 0.6559328337510427, "num_tokens": 1090014597.0, "step": 6489 }, { "entropy": 1.7288208802541096, "epoch": 0.7129713548103596, "grad_norm": 0.796561062335968, "learning_rate": 1.5422592179835586e-05, "loss": 1.5408, "mean_token_accuracy": 0.6503265549739202, "num_tokens": 1090158154.0, "step": 6490 }, { "entropy": 1.76965993642807, "epoch": 0.7130812117217324, "grad_norm": 0.6486510634422302, "learning_rate": 1.5421197631534246e-05, "loss": 1.4316, "mean_token_accuracy": 0.6440114875634512, "num_tokens": 1090373107.0, "step": 6491 }, { "entropy": 1.771783361832301, "epoch": 0.7131910686331053, "grad_norm": 0.6636711359024048, "learning_rate": 1.5419802943303995e-05, "loss": 1.3121, "mean_token_accuracy": 0.6703186631202698, "num_tokens": 1090523447.0, "step": 6492 }, { "entropy": 1.7251704931259155, "epoch": 0.7133009255444783, "grad_norm": 0.6942716836929321, "learning_rate": 1.5418408115188973e-05, "loss": 1.3039, "mean_token_accuracy": 0.6602616558472315, "num_tokens": 1090656225.0, "step": 6493 }, { "entropy": 1.6484019656976063, "epoch": 0.7134107824558512, "grad_norm": 0.7084615230560303, "learning_rate": 1.5417013147233324e-05, "loss": 1.2269, "mean_token_accuracy": 0.6825538575649261, "num_tokens": 1090806963.0, "step": 6494 }, { "entropy": 1.7776933411757152, "epoch": 0.7135206393672242, "grad_norm": 0.6260969042778015, "learning_rate": 1.5415618039481196e-05, "loss": 1.445, "mean_token_accuracy": 0.6529321223497391, "num_tokens": 1091004761.0, "step": 6495 }, { "entropy": 1.6919648945331573, "epoch": 0.7136304962785971, "grad_norm": 0.7151902914047241, "learning_rate": 1.5414222791976753e-05, "loss": 1.5192, "mean_token_accuracy": 0.6683632185061773, "num_tokens": 1091163822.0, "step": 6496 }, { "entropy": 1.7350868582725525, "epoch": 0.7137403531899701, "grad_norm": 0.7141695618629456, "learning_rate": 1.5412827404764146e-05, "loss": 1.3278, "mean_token_accuracy": 0.6679353018601736, "num_tokens": 1091293463.0, "step": 6497 }, { "entropy": 1.7138068477312725, "epoch": 0.713850210101343, "grad_norm": 0.7359333038330078, "learning_rate": 1.5411431877887536e-05, "loss": 1.4096, "mean_token_accuracy": 0.6525428295135498, "num_tokens": 1091461909.0, "step": 6498 }, { "entropy": 1.6973837018013, "epoch": 0.713960067012716, "grad_norm": 15.46304702758789, "learning_rate": 1.54100362113911e-05, "loss": 1.4589, "mean_token_accuracy": 0.6513221810261408, "num_tokens": 1091655585.0, "step": 6499 }, { "entropy": 1.7354335486888885, "epoch": 0.7140699239240889, "grad_norm": 0.6850374341011047, "learning_rate": 1.5408640405319004e-05, "loss": 1.4875, "mean_token_accuracy": 0.6419627815485001, "num_tokens": 1091795441.0, "step": 6500 }, { "entropy": 1.6926626861095428, "epoch": 0.7141797808354619, "grad_norm": 0.6201228499412537, "learning_rate": 1.5407244459715424e-05, "loss": 1.3315, "mean_token_accuracy": 0.6752594908078512, "num_tokens": 1091959354.0, "step": 6501 }, { "entropy": 1.6494509776433308, "epoch": 0.7142896377468347, "grad_norm": 0.5546920299530029, "learning_rate": 1.5405848374624545e-05, "loss": 1.514, "mean_token_accuracy": 0.6437575320402781, "num_tokens": 1092182556.0, "step": 6502 }, { "entropy": 1.7340616683165233, "epoch": 0.7143994946582077, "grad_norm": 0.7549980282783508, "learning_rate": 1.540445215009055e-05, "loss": 1.4259, "mean_token_accuracy": 0.6449489444494247, "num_tokens": 1092358579.0, "step": 6503 }, { "entropy": 1.6930972735087078, "epoch": 0.7145093515695806, "grad_norm": 0.6418382525444031, "learning_rate": 1.5403055786157626e-05, "loss": 1.3439, "mean_token_accuracy": 0.6560649822155634, "num_tokens": 1092489483.0, "step": 6504 }, { "entropy": 1.6835120022296906, "epoch": 0.7146192084809535, "grad_norm": 0.6219762563705444, "learning_rate": 1.5401659282869973e-05, "loss": 1.2959, "mean_token_accuracy": 0.6700324018796285, "num_tokens": 1092617242.0, "step": 6505 }, { "entropy": 1.7238627175490062, "epoch": 0.7147290653923265, "grad_norm": 0.734199583530426, "learning_rate": 1.5400262640271786e-05, "loss": 1.5356, "mean_token_accuracy": 0.6461377541224161, "num_tokens": 1092778452.0, "step": 6506 }, { "entropy": 1.668906291325887, "epoch": 0.7148389223036994, "grad_norm": 0.6679801344871521, "learning_rate": 1.5398865858407272e-05, "loss": 1.5205, "mean_token_accuracy": 0.6480654130379359, "num_tokens": 1092994198.0, "step": 6507 }, { "entropy": 1.7181902726491292, "epoch": 0.7149487792150724, "grad_norm": 0.7857739925384521, "learning_rate": 1.539746893732063e-05, "loss": 1.376, "mean_token_accuracy": 0.6500228643417358, "num_tokens": 1093171424.0, "step": 6508 }, { "entropy": 1.6858000059922535, "epoch": 0.7150586361264453, "grad_norm": 0.6876581907272339, "learning_rate": 1.539607187705608e-05, "loss": 1.3498, "mean_token_accuracy": 0.6653678317864736, "num_tokens": 1093348791.0, "step": 6509 }, { "entropy": 1.714595099290212, "epoch": 0.7151684930378183, "grad_norm": 0.5665342807769775, "learning_rate": 1.5394674677657843e-05, "loss": 1.3115, "mean_token_accuracy": 0.655341257651647, "num_tokens": 1093555039.0, "step": 6510 }, { "entropy": 1.6599359611670177, "epoch": 0.7152783499491912, "grad_norm": 0.7044631242752075, "learning_rate": 1.5393277339170126e-05, "loss": 1.3531, "mean_token_accuracy": 0.6782778998215994, "num_tokens": 1093719986.0, "step": 6511 }, { "entropy": 1.6935375332832336, "epoch": 0.7153882068605641, "grad_norm": 0.7041305303573608, "learning_rate": 1.539187986163716e-05, "loss": 1.445, "mean_token_accuracy": 0.6444460153579712, "num_tokens": 1093903707.0, "step": 6512 }, { "entropy": 1.6425399382909138, "epoch": 0.715498063771937, "grad_norm": 0.783403217792511, "learning_rate": 1.5390482245103178e-05, "loss": 1.4552, "mean_token_accuracy": 0.6741680949926376, "num_tokens": 1094040376.0, "step": 6513 }, { "entropy": 1.8000925381978352, "epoch": 0.71560792068331, "grad_norm": 0.6443293690681458, "learning_rate": 1.538908448961241e-05, "loss": 1.3937, "mean_token_accuracy": 0.645576020081838, "num_tokens": 1094221651.0, "step": 6514 }, { "entropy": 1.7409981389840443, "epoch": 0.7157177775946829, "grad_norm": 0.712158203125, "learning_rate": 1.5387686595209097e-05, "loss": 1.3776, "mean_token_accuracy": 0.664316713809967, "num_tokens": 1094377410.0, "step": 6515 }, { "entropy": 1.69818913936615, "epoch": 0.7158276345060559, "grad_norm": 0.681666910648346, "learning_rate": 1.5386288561937482e-05, "loss": 1.3804, "mean_token_accuracy": 0.6551361183325449, "num_tokens": 1094534065.0, "step": 6516 }, { "entropy": 1.718794455130895, "epoch": 0.7159374914174288, "grad_norm": 0.6347442865371704, "learning_rate": 1.5384890389841803e-05, "loss": 1.3203, "mean_token_accuracy": 0.6684871315956116, "num_tokens": 1094666102.0, "step": 6517 }, { "entropy": 1.7151610056559246, "epoch": 0.7160473483288017, "grad_norm": 0.6894080638885498, "learning_rate": 1.5383492078966328e-05, "loss": 1.3328, "mean_token_accuracy": 0.667813797791799, "num_tokens": 1094812585.0, "step": 6518 }, { "entropy": 1.6722050309181213, "epoch": 0.7161572052401747, "grad_norm": 0.6094774007797241, "learning_rate": 1.5382093629355303e-05, "loss": 1.4698, "mean_token_accuracy": 0.6564723700284958, "num_tokens": 1094988414.0, "step": 6519 }, { "entropy": 1.7040863831837971, "epoch": 0.7162670621515476, "grad_norm": 0.6441994905471802, "learning_rate": 1.5380695041052983e-05, "loss": 1.3583, "mean_token_accuracy": 0.6653302560249964, "num_tokens": 1095160094.0, "step": 6520 }, { "entropy": 1.7137998640537262, "epoch": 0.7163769190629206, "grad_norm": 0.6766939163208008, "learning_rate": 1.5379296314103645e-05, "loss": 1.0347, "mean_token_accuracy": 0.6831163018941879, "num_tokens": 1095308938.0, "step": 6521 }, { "entropy": 1.773234248161316, "epoch": 0.7164867759742934, "grad_norm": 0.6776031255722046, "learning_rate": 1.5377897448551548e-05, "loss": 1.489, "mean_token_accuracy": 0.647934744755427, "num_tokens": 1095487258.0, "step": 6522 }, { "entropy": 1.7177372376124065, "epoch": 0.7165966328856664, "grad_norm": 0.629857063293457, "learning_rate": 1.537649844444097e-05, "loss": 1.3285, "mean_token_accuracy": 0.6603048046429952, "num_tokens": 1095675612.0, "step": 6523 }, { "entropy": 1.7716063459714253, "epoch": 0.7167064897970393, "grad_norm": 0.7026961445808411, "learning_rate": 1.537509930181619e-05, "loss": 1.3509, "mean_token_accuracy": 0.6556298683087031, "num_tokens": 1095820438.0, "step": 6524 }, { "entropy": 1.7175563077131908, "epoch": 0.7168163467084123, "grad_norm": 0.765849232673645, "learning_rate": 1.537370002072149e-05, "loss": 1.4239, "mean_token_accuracy": 0.6597336481014887, "num_tokens": 1095962592.0, "step": 6525 }, { "entropy": 1.6588062246640523, "epoch": 0.7169262036197852, "grad_norm": 0.8724434971809387, "learning_rate": 1.5372300601201152e-05, "loss": 1.454, "mean_token_accuracy": 0.6489892651637396, "num_tokens": 1096119263.0, "step": 6526 }, { "entropy": 1.752094993988673, "epoch": 0.7170360605311582, "grad_norm": 0.7585995197296143, "learning_rate": 1.537090104329947e-05, "loss": 1.348, "mean_token_accuracy": 0.677097295721372, "num_tokens": 1096272863.0, "step": 6527 }, { "entropy": 1.7106123467286427, "epoch": 0.7171459174425311, "grad_norm": 0.691969633102417, "learning_rate": 1.5369501347060744e-05, "loss": 1.4732, "mean_token_accuracy": 0.6419112334648768, "num_tokens": 1096429369.0, "step": 6528 }, { "entropy": 1.6728369891643524, "epoch": 0.7172557743539041, "grad_norm": 0.6538815498352051, "learning_rate": 1.5368101512529264e-05, "loss": 1.3228, "mean_token_accuracy": 0.6591041833162308, "num_tokens": 1096543106.0, "step": 6529 }, { "entropy": 1.7897494733333588, "epoch": 0.717365631265277, "grad_norm": 0.8466572761535645, "learning_rate": 1.536670153974934e-05, "loss": 1.3401, "mean_token_accuracy": 0.6769247204065323, "num_tokens": 1096698071.0, "step": 6530 }, { "entropy": 1.7067344685395558, "epoch": 0.71747548817665, "grad_norm": 0.695894181728363, "learning_rate": 1.5365301428765286e-05, "loss": 1.4081, "mean_token_accuracy": 0.6609604756037394, "num_tokens": 1096850429.0, "step": 6531 }, { "entropy": 1.7233928342660267, "epoch": 0.7175853450880229, "grad_norm": 0.6567378044128418, "learning_rate": 1.5363901179621403e-05, "loss": 1.4852, "mean_token_accuracy": 0.6497254719336828, "num_tokens": 1097059613.0, "step": 6532 }, { "entropy": 1.7379729052384694, "epoch": 0.7176952019993957, "grad_norm": 0.7090989351272583, "learning_rate": 1.5362500792362013e-05, "loss": 1.3564, "mean_token_accuracy": 0.6722868382930756, "num_tokens": 1097197673.0, "step": 6533 }, { "entropy": 1.689707726240158, "epoch": 0.7178050589107687, "grad_norm": 0.6554761528968811, "learning_rate": 1.5361100267031444e-05, "loss": 1.29, "mean_token_accuracy": 0.6656525383392969, "num_tokens": 1097348154.0, "step": 6534 }, { "entropy": 1.7208319107691448, "epoch": 0.7179149158221416, "grad_norm": 0.6578717827796936, "learning_rate": 1.5359699603674014e-05, "loss": 1.3293, "mean_token_accuracy": 0.6660318821668625, "num_tokens": 1097489928.0, "step": 6535 }, { "entropy": 1.7080370386441548, "epoch": 0.7180247727335146, "grad_norm": 0.5738018155097961, "learning_rate": 1.5358298802334053e-05, "loss": 1.4117, "mean_token_accuracy": 0.6500293960173925, "num_tokens": 1097700016.0, "step": 6536 }, { "entropy": 1.6456352074940999, "epoch": 0.7181346296448875, "grad_norm": 0.612850546836853, "learning_rate": 1.53568978630559e-05, "loss": 1.4237, "mean_token_accuracy": 0.6467505743106207, "num_tokens": 1097920130.0, "step": 6537 }, { "entropy": 1.7054031888643901, "epoch": 0.7182444865562605, "grad_norm": 0.6343128681182861, "learning_rate": 1.53554967858839e-05, "loss": 1.31, "mean_token_accuracy": 0.6704280972480774, "num_tokens": 1098071482.0, "step": 6538 }, { "entropy": 1.739743580420812, "epoch": 0.7183543434676334, "grad_norm": 0.7319331169128418, "learning_rate": 1.535409557086238e-05, "loss": 1.2582, "mean_token_accuracy": 0.6795140455166498, "num_tokens": 1098233791.0, "step": 6539 }, { "entropy": 1.724317838748296, "epoch": 0.7184642003790064, "grad_norm": 0.6251640915870667, "learning_rate": 1.5352694218035703e-05, "loss": 1.4219, "mean_token_accuracy": 0.6388923674821854, "num_tokens": 1098447007.0, "step": 6540 }, { "entropy": 1.7108652492364247, "epoch": 0.7185740572903793, "grad_norm": 0.7806166410446167, "learning_rate": 1.5351292727448214e-05, "loss": 1.3493, "mean_token_accuracy": 0.6638698279857635, "num_tokens": 1098660254.0, "step": 6541 }, { "entropy": 1.6957137882709503, "epoch": 0.7186839142017523, "grad_norm": 0.6760042309761047, "learning_rate": 1.534989109914427e-05, "loss": 1.2932, "mean_token_accuracy": 0.6657718569040298, "num_tokens": 1098799723.0, "step": 6542 }, { "entropy": 1.6598234574000041, "epoch": 0.7187937711131251, "grad_norm": 0.6579678058624268, "learning_rate": 1.5348489333168233e-05, "loss": 1.3738, "mean_token_accuracy": 0.65997414290905, "num_tokens": 1098964913.0, "step": 6543 }, { "entropy": 1.6748213072617848, "epoch": 0.7189036280244981, "grad_norm": 0.693806529045105, "learning_rate": 1.534708742956447e-05, "loss": 1.3726, "mean_token_accuracy": 0.658714180191358, "num_tokens": 1099132726.0, "step": 6544 }, { "entropy": 1.7303595145543416, "epoch": 0.719013484935871, "grad_norm": 0.9831480979919434, "learning_rate": 1.5345685388377342e-05, "loss": 1.4561, "mean_token_accuracy": 0.6566809763511022, "num_tokens": 1099260996.0, "step": 6545 }, { "entropy": 1.6850965122381847, "epoch": 0.7191233418472439, "grad_norm": 0.6742528080940247, "learning_rate": 1.5344283209651237e-05, "loss": 1.4858, "mean_token_accuracy": 0.650047724445661, "num_tokens": 1099473098.0, "step": 6546 }, { "entropy": 1.726267506678899, "epoch": 0.7192331987586169, "grad_norm": 0.7677439451217651, "learning_rate": 1.5342880893430526e-05, "loss": 1.4177, "mean_token_accuracy": 0.6506613542636236, "num_tokens": 1099657221.0, "step": 6547 }, { "entropy": 1.6909742454687755, "epoch": 0.7193430556699898, "grad_norm": 0.6657822132110596, "learning_rate": 1.534147843975959e-05, "loss": 1.4182, "mean_token_accuracy": 0.659657746553421, "num_tokens": 1099834559.0, "step": 6548 }, { "entropy": 1.701712469259898, "epoch": 0.7194529125813628, "grad_norm": 0.7202948927879333, "learning_rate": 1.5340075848682812e-05, "loss": 1.296, "mean_token_accuracy": 0.6672409772872925, "num_tokens": 1099955050.0, "step": 6549 }, { "entropy": 1.711505303780238, "epoch": 0.7195627694927357, "grad_norm": 0.7047147154808044, "learning_rate": 1.53386731202446e-05, "loss": 1.4693, "mean_token_accuracy": 0.6619550883769989, "num_tokens": 1100138035.0, "step": 6550 }, { "entropy": 1.7319445709387462, "epoch": 0.7196726264041087, "grad_norm": 0.8525048494338989, "learning_rate": 1.533727025448933e-05, "loss": 1.3342, "mean_token_accuracy": 0.6697620848814646, "num_tokens": 1100271260.0, "step": 6551 }, { "entropy": 1.7205536564191182, "epoch": 0.7197824833154816, "grad_norm": 0.8322303891181946, "learning_rate": 1.5335867251461415e-05, "loss": 1.3912, "mean_token_accuracy": 0.6587785333395004, "num_tokens": 1100450079.0, "step": 6552 }, { "entropy": 1.7100428342819214, "epoch": 0.7198923402268546, "grad_norm": 0.7583587169647217, "learning_rate": 1.5334464111205253e-05, "loss": 1.4353, "mean_token_accuracy": 0.6676417837540308, "num_tokens": 1100568632.0, "step": 6553 }, { "entropy": 1.6718673606713612, "epoch": 0.7200021971382274, "grad_norm": 0.6357502937316895, "learning_rate": 1.5333060833765255e-05, "loss": 1.3762, "mean_token_accuracy": 0.6590482493241628, "num_tokens": 1100788024.0, "step": 6554 }, { "entropy": 1.7551223436991374, "epoch": 0.7201120540496004, "grad_norm": 0.7153880000114441, "learning_rate": 1.5331657419185838e-05, "loss": 1.3293, "mean_token_accuracy": 0.6558401187260946, "num_tokens": 1100984331.0, "step": 6555 }, { "entropy": 1.6930421988169353, "epoch": 0.7202219109609733, "grad_norm": 0.6794725656509399, "learning_rate": 1.5330253867511415e-05, "loss": 1.3988, "mean_token_accuracy": 0.6546763380368551, "num_tokens": 1101147632.0, "step": 6556 }, { "entropy": 1.679756999015808, "epoch": 0.7203317678723463, "grad_norm": 0.77561354637146, "learning_rate": 1.5328850178786403e-05, "loss": 1.3257, "mean_token_accuracy": 0.6894521017869314, "num_tokens": 1101300740.0, "step": 6557 }, { "entropy": 1.7333133816719055, "epoch": 0.7204416247837192, "grad_norm": 0.7283507585525513, "learning_rate": 1.532744635305524e-05, "loss": 1.3716, "mean_token_accuracy": 0.6582729518413544, "num_tokens": 1101477366.0, "step": 6558 }, { "entropy": 1.7297575374444325, "epoch": 0.7205514816950921, "grad_norm": 0.7023628950119019, "learning_rate": 1.5326042390362347e-05, "loss": 1.4311, "mean_token_accuracy": 0.651911993821462, "num_tokens": 1101638434.0, "step": 6559 }, { "entropy": 1.7308916648228962, "epoch": 0.7206613386064651, "grad_norm": 0.6646205186843872, "learning_rate": 1.532463829075216e-05, "loss": 1.4038, "mean_token_accuracy": 0.6484263290961584, "num_tokens": 1101821514.0, "step": 6560 }, { "entropy": 1.746131847302119, "epoch": 0.720771195517838, "grad_norm": 0.6808525323867798, "learning_rate": 1.532323405426912e-05, "loss": 1.4595, "mean_token_accuracy": 0.6446107079585394, "num_tokens": 1102044527.0, "step": 6561 }, { "entropy": 1.7083693246046703, "epoch": 0.720881052429211, "grad_norm": 0.7566711902618408, "learning_rate": 1.5321829680957673e-05, "loss": 1.2904, "mean_token_accuracy": 0.680796946088473, "num_tokens": 1102175522.0, "step": 6562 }, { "entropy": 1.7244884669780731, "epoch": 0.7209909093405839, "grad_norm": 0.6379838585853577, "learning_rate": 1.532042517086226e-05, "loss": 1.3748, "mean_token_accuracy": 0.6597454150517782, "num_tokens": 1102364222.0, "step": 6563 }, { "entropy": 1.7356492678324382, "epoch": 0.7211007662519568, "grad_norm": 0.6635233163833618, "learning_rate": 1.531902052402734e-05, "loss": 1.4433, "mean_token_accuracy": 0.64767458041509, "num_tokens": 1102498467.0, "step": 6564 }, { "entropy": 1.7014682590961456, "epoch": 0.7212106231633297, "grad_norm": 0.6852055788040161, "learning_rate": 1.5317615740497366e-05, "loss": 1.4805, "mean_token_accuracy": 0.6671850581963857, "num_tokens": 1102659134.0, "step": 6565 }, { "entropy": 1.7902653813362122, "epoch": 0.7213204800747027, "grad_norm": 0.6710415482521057, "learning_rate": 1.53162108203168e-05, "loss": 1.4562, "mean_token_accuracy": 0.6423666675885519, "num_tokens": 1102819522.0, "step": 6566 }, { "entropy": 1.6991690297921498, "epoch": 0.7214303369860756, "grad_norm": 0.592832088470459, "learning_rate": 1.5314805763530106e-05, "loss": 1.4588, "mean_token_accuracy": 0.6513507117827734, "num_tokens": 1103004741.0, "step": 6567 }, { "entropy": 1.7278131941954296, "epoch": 0.7215401938974486, "grad_norm": 0.6481438875198364, "learning_rate": 1.5313400570181755e-05, "loss": 1.3835, "mean_token_accuracy": 0.6460322539011637, "num_tokens": 1103174131.0, "step": 6568 }, { "entropy": 1.7156480550765991, "epoch": 0.7216500508088215, "grad_norm": 0.5778603553771973, "learning_rate": 1.531199524031622e-05, "loss": 1.5037, "mean_token_accuracy": 0.6403802782297134, "num_tokens": 1103460063.0, "step": 6569 }, { "entropy": 1.7245876292387645, "epoch": 0.7217599077201945, "grad_norm": 0.6985930800437927, "learning_rate": 1.5310589773977974e-05, "loss": 1.6586, "mean_token_accuracy": 0.6096041947603226, "num_tokens": 1103734337.0, "step": 6570 }, { "entropy": 1.6952235698699951, "epoch": 0.7218697646315674, "grad_norm": 0.7454794049263, "learning_rate": 1.530918417121151e-05, "loss": 1.4224, "mean_token_accuracy": 0.6474892646074295, "num_tokens": 1103880813.0, "step": 6571 }, { "entropy": 1.6598933935165405, "epoch": 0.7219796215429404, "grad_norm": 0.647855818271637, "learning_rate": 1.5307778432061307e-05, "loss": 1.5853, "mean_token_accuracy": 0.6572377036015192, "num_tokens": 1104135140.0, "step": 6572 }, { "entropy": 1.6665717562039692, "epoch": 0.7220894784543133, "grad_norm": 0.695473849773407, "learning_rate": 1.5306372556571854e-05, "loss": 1.3111, "mean_token_accuracy": 0.6695192058881124, "num_tokens": 1104325457.0, "step": 6573 }, { "entropy": 1.6647779544194539, "epoch": 0.7221993353656861, "grad_norm": 0.7145929336547852, "learning_rate": 1.5304966544787655e-05, "loss": 1.4169, "mean_token_accuracy": 0.6621694614489874, "num_tokens": 1104482078.0, "step": 6574 }, { "entropy": 1.705965260664622, "epoch": 0.7223091922770591, "grad_norm": 0.6520200371742249, "learning_rate": 1.53035603967532e-05, "loss": 1.3999, "mean_token_accuracy": 0.6532257596651713, "num_tokens": 1104709750.0, "step": 6575 }, { "entropy": 1.7335670789082844, "epoch": 0.722419049188432, "grad_norm": 0.6884702444076538, "learning_rate": 1.5302154112513e-05, "loss": 1.4801, "mean_token_accuracy": 0.6404634515444437, "num_tokens": 1104882328.0, "step": 6576 }, { "entropy": 1.7464761237303417, "epoch": 0.722528906099805, "grad_norm": 0.680620014667511, "learning_rate": 1.5300747692111562e-05, "loss": 1.3174, "mean_token_accuracy": 0.6647460460662842, "num_tokens": 1105029762.0, "step": 6577 }, { "entropy": 1.771151453256607, "epoch": 0.7226387630111779, "grad_norm": 0.6430426836013794, "learning_rate": 1.5299341135593397e-05, "loss": 1.416, "mean_token_accuracy": 0.6499434957901636, "num_tokens": 1105183987.0, "step": 6578 }, { "entropy": 1.7155382533868153, "epoch": 0.7227486199225509, "grad_norm": 0.589311957359314, "learning_rate": 1.5297934443003023e-05, "loss": 1.4938, "mean_token_accuracy": 0.6517259627580643, "num_tokens": 1105391076.0, "step": 6579 }, { "entropy": 1.6656960149606068, "epoch": 0.7228584768339238, "grad_norm": 0.7947255373001099, "learning_rate": 1.529652761438496e-05, "loss": 1.3701, "mean_token_accuracy": 0.6710091133912405, "num_tokens": 1105518257.0, "step": 6580 }, { "entropy": 1.695709377527237, "epoch": 0.7229683337452968, "grad_norm": 0.7417528033256531, "learning_rate": 1.529512064978373e-05, "loss": 1.5117, "mean_token_accuracy": 0.6543413400650024, "num_tokens": 1105681961.0, "step": 6581 }, { "entropy": 1.6597294012705486, "epoch": 0.7230781906566697, "grad_norm": 0.6216127872467041, "learning_rate": 1.5293713549243872e-05, "loss": 1.3874, "mean_token_accuracy": 0.6591449032227198, "num_tokens": 1105851772.0, "step": 6582 }, { "entropy": 1.677135815223058, "epoch": 0.7231880475680427, "grad_norm": 0.7040608525276184, "learning_rate": 1.5292306312809914e-05, "loss": 1.2326, "mean_token_accuracy": 0.6805572162071863, "num_tokens": 1106010660.0, "step": 6583 }, { "entropy": 1.6714986264705658, "epoch": 0.7232979044794156, "grad_norm": 0.7146863341331482, "learning_rate": 1.52908989405264e-05, "loss": 1.3671, "mean_token_accuracy": 0.6705887715021769, "num_tokens": 1106194910.0, "step": 6584 }, { "entropy": 1.7175088028113048, "epoch": 0.7234077613907886, "grad_norm": 0.5585004687309265, "learning_rate": 1.5289491432437857e-05, "loss": 1.4606, "mean_token_accuracy": 0.6325180033842722, "num_tokens": 1106435793.0, "step": 6585 }, { "entropy": 1.7380537887414296, "epoch": 0.7235176183021614, "grad_norm": 0.8111526966094971, "learning_rate": 1.528808378858885e-05, "loss": 1.4263, "mean_token_accuracy": 0.6445368727048238, "num_tokens": 1106592659.0, "step": 6586 }, { "entropy": 1.7423686981201172, "epoch": 0.7236274752135343, "grad_norm": 0.6948752403259277, "learning_rate": 1.528667600902392e-05, "loss": 1.354, "mean_token_accuracy": 0.65903340280056, "num_tokens": 1106736777.0, "step": 6587 }, { "entropy": 1.7096178233623505, "epoch": 0.7237373321249073, "grad_norm": 0.8738431334495544, "learning_rate": 1.528526809378763e-05, "loss": 1.302, "mean_token_accuracy": 0.6691812723875046, "num_tokens": 1106864737.0, "step": 6588 }, { "entropy": 1.7112048765023549, "epoch": 0.7238471890362802, "grad_norm": 0.7258959412574768, "learning_rate": 1.5283860042924538e-05, "loss": 1.419, "mean_token_accuracy": 0.6464797953764597, "num_tokens": 1107049074.0, "step": 6589 }, { "entropy": 1.6991894841194153, "epoch": 0.7239570459476532, "grad_norm": 0.6656256914138794, "learning_rate": 1.5282451856479202e-05, "loss": 1.3974, "mean_token_accuracy": 0.6695433159669241, "num_tokens": 1107213955.0, "step": 6590 }, { "entropy": 1.6625440021355946, "epoch": 0.7240669028590261, "grad_norm": 0.6745372414588928, "learning_rate": 1.5281043534496193e-05, "loss": 1.2792, "mean_token_accuracy": 0.6713592559099197, "num_tokens": 1107357394.0, "step": 6591 }, { "entropy": 1.695921152830124, "epoch": 0.7241767597703991, "grad_norm": 0.7285088896751404, "learning_rate": 1.5279635077020087e-05, "loss": 1.2813, "mean_token_accuracy": 0.6724530756473541, "num_tokens": 1107477762.0, "step": 6592 }, { "entropy": 1.691762089729309, "epoch": 0.724286616681772, "grad_norm": 0.730449914932251, "learning_rate": 1.527822648409546e-05, "loss": 1.3207, "mean_token_accuracy": 0.6625142047802607, "num_tokens": 1107709610.0, "step": 6593 }, { "entropy": 1.7176588773727417, "epoch": 0.724396473593145, "grad_norm": 0.6654216051101685, "learning_rate": 1.5276817755766894e-05, "loss": 1.3717, "mean_token_accuracy": 0.6538281142711639, "num_tokens": 1107913145.0, "step": 6594 }, { "entropy": 1.7409211297829945, "epoch": 0.7245063305045178, "grad_norm": 0.6343408823013306, "learning_rate": 1.5275408892078967e-05, "loss": 1.4509, "mean_token_accuracy": 0.6523208022117615, "num_tokens": 1108067869.0, "step": 6595 }, { "entropy": 1.7824281652768452, "epoch": 0.7246161874158908, "grad_norm": 0.7533055543899536, "learning_rate": 1.527399989307628e-05, "loss": 1.4195, "mean_token_accuracy": 0.6448671966791153, "num_tokens": 1108219158.0, "step": 6596 }, { "entropy": 1.7014999488989513, "epoch": 0.7247260443272637, "grad_norm": 0.5905172824859619, "learning_rate": 1.5272590758803423e-05, "loss": 1.4628, "mean_token_accuracy": 0.6379047979911169, "num_tokens": 1108422283.0, "step": 6597 }, { "entropy": 1.6868136525154114, "epoch": 0.7248359012386367, "grad_norm": 0.7195769548416138, "learning_rate": 1.527118148930499e-05, "loss": 1.4474, "mean_token_accuracy": 0.6536327004432678, "num_tokens": 1108576791.0, "step": 6598 }, { "entropy": 1.7102086345354717, "epoch": 0.7249457581500096, "grad_norm": 0.6632969975471497, "learning_rate": 1.526977208462559e-05, "loss": 1.3659, "mean_token_accuracy": 0.6555024435122808, "num_tokens": 1108758285.0, "step": 6599 }, { "entropy": 1.684918999671936, "epoch": 0.7250556150613825, "grad_norm": 1.003880262374878, "learning_rate": 1.526836254480983e-05, "loss": 1.523, "mean_token_accuracy": 0.657150665918986, "num_tokens": 1108915576.0, "step": 6600 }, { "entropy": 1.760528455177943, "epoch": 0.7251654719727555, "grad_norm": 0.6639096140861511, "learning_rate": 1.5266952869902315e-05, "loss": 1.3828, "mean_token_accuracy": 0.6530559410651525, "num_tokens": 1109037423.0, "step": 6601 }, { "entropy": 1.646875262260437, "epoch": 0.7252753288841284, "grad_norm": 0.7310366630554199, "learning_rate": 1.526554305994766e-05, "loss": 1.2516, "mean_token_accuracy": 0.6824596722920736, "num_tokens": 1109183215.0, "step": 6602 }, { "entropy": 1.6977095107237499, "epoch": 0.7253851857955014, "grad_norm": 0.6350131630897522, "learning_rate": 1.5264133114990498e-05, "loss": 1.4548, "mean_token_accuracy": 0.6472870657841364, "num_tokens": 1109397845.0, "step": 6603 }, { "entropy": 1.6626974542935689, "epoch": 0.7254950427068743, "grad_norm": 0.6890853047370911, "learning_rate": 1.526272303507544e-05, "loss": 1.3251, "mean_token_accuracy": 0.6578503499428431, "num_tokens": 1109534813.0, "step": 6604 }, { "entropy": 1.7217269043127696, "epoch": 0.7256048996182473, "grad_norm": 0.7920450568199158, "learning_rate": 1.526131282024712e-05, "loss": 1.4178, "mean_token_accuracy": 0.6627766042947769, "num_tokens": 1109701662.0, "step": 6605 }, { "entropy": 1.7304079035917919, "epoch": 0.7257147565296201, "grad_norm": 0.6736690402030945, "learning_rate": 1.525990247055017e-05, "loss": 1.582, "mean_token_accuracy": 0.6449010322491328, "num_tokens": 1109891057.0, "step": 6606 }, { "entropy": 1.7143594821294148, "epoch": 0.7258246134409931, "grad_norm": 0.813389778137207, "learning_rate": 1.5258491986029224e-05, "loss": 1.5102, "mean_token_accuracy": 0.6494471182425817, "num_tokens": 1110042199.0, "step": 6607 }, { "entropy": 1.712285617987315, "epoch": 0.725934470352366, "grad_norm": 0.651279628276825, "learning_rate": 1.5257081366728928e-05, "loss": 1.3512, "mean_token_accuracy": 0.6540268957614899, "num_tokens": 1110181003.0, "step": 6608 }, { "entropy": 1.7569693525632222, "epoch": 0.726044327263739, "grad_norm": 0.6964418292045593, "learning_rate": 1.5255670612693925e-05, "loss": 1.4252, "mean_token_accuracy": 0.652028406659762, "num_tokens": 1110325250.0, "step": 6609 }, { "entropy": 1.7216349244117737, "epoch": 0.7261541841751119, "grad_norm": 0.8492372035980225, "learning_rate": 1.5254259723968865e-05, "loss": 1.406, "mean_token_accuracy": 0.6618664065996805, "num_tokens": 1110484997.0, "step": 6610 }, { "entropy": 1.6996253232161205, "epoch": 0.7262640410864849, "grad_norm": 0.6752820014953613, "learning_rate": 1.52528487005984e-05, "loss": 1.3428, "mean_token_accuracy": 0.6618599245945612, "num_tokens": 1110628770.0, "step": 6611 }, { "entropy": 1.7417829434076946, "epoch": 0.7263738979978578, "grad_norm": 0.651860237121582, "learning_rate": 1.525143754262719e-05, "loss": 1.5468, "mean_token_accuracy": 0.6249453624089559, "num_tokens": 1110855352.0, "step": 6612 }, { "entropy": 1.6957463920116425, "epoch": 0.7264837549092307, "grad_norm": 0.6212682127952576, "learning_rate": 1.5250026250099896e-05, "loss": 1.328, "mean_token_accuracy": 0.6674534380435944, "num_tokens": 1111037352.0, "step": 6613 }, { "entropy": 1.721895823876063, "epoch": 0.7265936118206037, "grad_norm": 0.5673272013664246, "learning_rate": 1.5248614823061191e-05, "loss": 1.3398, "mean_token_accuracy": 0.6586939742167791, "num_tokens": 1111222031.0, "step": 6614 }, { "entropy": 1.6733955939610798, "epoch": 0.7267034687319766, "grad_norm": 0.696190595626831, "learning_rate": 1.524720326155574e-05, "loss": 1.3081, "mean_token_accuracy": 0.670863464474678, "num_tokens": 1111351621.0, "step": 6615 }, { "entropy": 1.7015343010425568, "epoch": 0.7268133256433496, "grad_norm": 0.8680276870727539, "learning_rate": 1.5245791565628219e-05, "loss": 1.554, "mean_token_accuracy": 0.6325680613517761, "num_tokens": 1111559646.0, "step": 6616 }, { "entropy": 1.6907628178596497, "epoch": 0.7269231825547224, "grad_norm": 0.8936082124710083, "learning_rate": 1.5244379735323305e-05, "loss": 1.4222, "mean_token_accuracy": 0.6638303697109222, "num_tokens": 1111676130.0, "step": 6617 }, { "entropy": 1.6800566116968791, "epoch": 0.7270330394660954, "grad_norm": 0.6707502603530884, "learning_rate": 1.5242967770685688e-05, "loss": 1.475, "mean_token_accuracy": 0.6595744838317236, "num_tokens": 1111839395.0, "step": 6618 }, { "entropy": 1.6874233186244965, "epoch": 0.7271428963774683, "grad_norm": 0.7718756198883057, "learning_rate": 1.5241555671760053e-05, "loss": 1.296, "mean_token_accuracy": 0.673300489783287, "num_tokens": 1111977599.0, "step": 6619 }, { "entropy": 1.69405393799146, "epoch": 0.7272527532888413, "grad_norm": 0.6836499571800232, "learning_rate": 1.5240143438591091e-05, "loss": 1.52, "mean_token_accuracy": 0.6380777706702551, "num_tokens": 1112169745.0, "step": 6620 }, { "entropy": 1.6723734835783641, "epoch": 0.7273626102002142, "grad_norm": 0.7240423560142517, "learning_rate": 1.52387310712235e-05, "loss": 1.445, "mean_token_accuracy": 0.6521526724100113, "num_tokens": 1112355144.0, "step": 6621 }, { "entropy": 1.6805169483025868, "epoch": 0.7274724671115872, "grad_norm": 0.6674152612686157, "learning_rate": 1.5237318569701982e-05, "loss": 1.4642, "mean_token_accuracy": 0.6453837553660074, "num_tokens": 1112530036.0, "step": 6622 }, { "entropy": 1.7313962876796722, "epoch": 0.7275823240229601, "grad_norm": 0.6953855156898499, "learning_rate": 1.523590593407124e-05, "loss": 1.3161, "mean_token_accuracy": 0.6699342131614685, "num_tokens": 1112683314.0, "step": 6623 }, { "entropy": 1.6776468753814697, "epoch": 0.7276921809343331, "grad_norm": 0.7314421534538269, "learning_rate": 1.5234493164375983e-05, "loss": 1.3186, "mean_token_accuracy": 0.6604682207107544, "num_tokens": 1112849572.0, "step": 6624 }, { "entropy": 1.774099330107371, "epoch": 0.727802037845706, "grad_norm": 0.9143105149269104, "learning_rate": 1.5233080260660929e-05, "loss": 1.3862, "mean_token_accuracy": 0.6627939840157827, "num_tokens": 1112960638.0, "step": 6625 }, { "entropy": 1.74024565021197, "epoch": 0.727911894757079, "grad_norm": 0.7166746854782104, "learning_rate": 1.5231667222970788e-05, "loss": 1.5266, "mean_token_accuracy": 0.6335213532050451, "num_tokens": 1113149401.0, "step": 6626 }, { "entropy": 1.6582687099774678, "epoch": 0.7280217516684518, "grad_norm": 0.6647648215293884, "learning_rate": 1.5230254051350288e-05, "loss": 1.3167, "mean_token_accuracy": 0.6622982124487559, "num_tokens": 1113296414.0, "step": 6627 }, { "entropy": 1.7777949670950572, "epoch": 0.7281316085798247, "grad_norm": 0.7266597151756287, "learning_rate": 1.5228840745844154e-05, "loss": 1.4685, "mean_token_accuracy": 0.6511821498473486, "num_tokens": 1113480827.0, "step": 6628 }, { "entropy": 1.702185720205307, "epoch": 0.7282414654911977, "grad_norm": 0.7669931650161743, "learning_rate": 1.5227427306497113e-05, "loss": 1.4266, "mean_token_accuracy": 0.6498915751775106, "num_tokens": 1113636000.0, "step": 6629 }, { "entropy": 1.69204247991244, "epoch": 0.7283513224025706, "grad_norm": 0.6504570841789246, "learning_rate": 1.5226013733353906e-05, "loss": 1.2188, "mean_token_accuracy": 0.681893065571785, "num_tokens": 1113756966.0, "step": 6630 }, { "entropy": 1.7312454879283905, "epoch": 0.7284611793139436, "grad_norm": 0.622042179107666, "learning_rate": 1.5224600026459266e-05, "loss": 1.4279, "mean_token_accuracy": 0.6588011731704077, "num_tokens": 1113941442.0, "step": 6631 }, { "entropy": 1.6893315315246582, "epoch": 0.7285710362253165, "grad_norm": 0.7400401830673218, "learning_rate": 1.5223186185857941e-05, "loss": 1.314, "mean_token_accuracy": 0.6682803531487783, "num_tokens": 1114137617.0, "step": 6632 }, { "entropy": 1.695889800786972, "epoch": 0.7286808931366895, "grad_norm": 0.7676869034767151, "learning_rate": 1.5221772211594674e-05, "loss": 1.4751, "mean_token_accuracy": 0.6432386587063471, "num_tokens": 1114363719.0, "step": 6633 }, { "entropy": 1.7293170789877574, "epoch": 0.7287907500480624, "grad_norm": 0.628367006778717, "learning_rate": 1.5220358103714223e-05, "loss": 1.472, "mean_token_accuracy": 0.6526259730259577, "num_tokens": 1114546800.0, "step": 6634 }, { "entropy": 1.6271907488505046, "epoch": 0.7289006069594354, "grad_norm": 0.5525145530700684, "learning_rate": 1.5218943862261334e-05, "loss": 1.3507, "mean_token_accuracy": 0.6575760791699091, "num_tokens": 1114731169.0, "step": 6635 }, { "entropy": 1.70510795712471, "epoch": 0.7290104638708083, "grad_norm": 0.5792953372001648, "learning_rate": 1.5217529487280777e-05, "loss": 1.3888, "mean_token_accuracy": 0.6515243798494339, "num_tokens": 1114929310.0, "step": 6636 }, { "entropy": 1.772267738978068, "epoch": 0.7291203207821813, "grad_norm": 0.7042428851127625, "learning_rate": 1.5216114978817311e-05, "loss": 1.393, "mean_token_accuracy": 0.6505443006753922, "num_tokens": 1115053969.0, "step": 6637 }, { "entropy": 1.7222981949647267, "epoch": 0.7292301776935541, "grad_norm": 0.7960110902786255, "learning_rate": 1.5214700336915707e-05, "loss": 1.3253, "mean_token_accuracy": 0.6682598739862442, "num_tokens": 1115182490.0, "step": 6638 }, { "entropy": 1.656973163286845, "epoch": 0.7293400346049271, "grad_norm": 0.8022185564041138, "learning_rate": 1.5213285561620735e-05, "loss": 1.3047, "mean_token_accuracy": 0.678027073542277, "num_tokens": 1115345148.0, "step": 6639 }, { "entropy": 1.6861250897248585, "epoch": 0.7294498915163, "grad_norm": 0.6490318179130554, "learning_rate": 1.5211870652977174e-05, "loss": 1.4897, "mean_token_accuracy": 0.63862211505572, "num_tokens": 1115591859.0, "step": 6640 }, { "entropy": 1.6945749620596569, "epoch": 0.7295597484276729, "grad_norm": 0.6802365779876709, "learning_rate": 1.5210455611029805e-05, "loss": 1.2691, "mean_token_accuracy": 0.6764042377471924, "num_tokens": 1115753223.0, "step": 6641 }, { "entropy": 1.6721658011277516, "epoch": 0.7296696053390459, "grad_norm": 0.7338157892227173, "learning_rate": 1.5209040435823412e-05, "loss": 1.2733, "mean_token_accuracy": 0.6703089773654938, "num_tokens": 1115947252.0, "step": 6642 }, { "entropy": 1.7122320334116619, "epoch": 0.7297794622504188, "grad_norm": 0.6685588955879211, "learning_rate": 1.5207625127402788e-05, "loss": 1.3575, "mean_token_accuracy": 0.6568591793378195, "num_tokens": 1116088045.0, "step": 6643 }, { "entropy": 1.7311648031075795, "epoch": 0.7298893191617918, "grad_norm": 0.635732114315033, "learning_rate": 1.5206209685812723e-05, "loss": 1.4057, "mean_token_accuracy": 0.6452435304721197, "num_tokens": 1116281246.0, "step": 6644 }, { "entropy": 1.6677932838598888, "epoch": 0.7299991760731647, "grad_norm": 0.6619640588760376, "learning_rate": 1.5204794111098016e-05, "loss": 1.4132, "mean_token_accuracy": 0.6569789250691732, "num_tokens": 1116507132.0, "step": 6645 }, { "entropy": 1.692247857650121, "epoch": 0.7301090329845377, "grad_norm": 0.6537544131278992, "learning_rate": 1.5203378403303473e-05, "loss": 1.3817, "mean_token_accuracy": 0.6712391922871271, "num_tokens": 1116674442.0, "step": 6646 }, { "entropy": 1.676435798406601, "epoch": 0.7302188898959106, "grad_norm": 0.5650108456611633, "learning_rate": 1.5201962562473893e-05, "loss": 1.4422, "mean_token_accuracy": 0.6645029336214066, "num_tokens": 1116857222.0, "step": 6647 }, { "entropy": 1.7497854729493458, "epoch": 0.7303287468072835, "grad_norm": 0.6814372539520264, "learning_rate": 1.5200546588654097e-05, "loss": 1.5072, "mean_token_accuracy": 0.641438439488411, "num_tokens": 1117030729.0, "step": 6648 }, { "entropy": 1.6965225736300151, "epoch": 0.7304386037186564, "grad_norm": 0.7143113017082214, "learning_rate": 1.519913048188889e-05, "loss": 1.4258, "mean_token_accuracy": 0.6576728324095408, "num_tokens": 1117220830.0, "step": 6649 }, { "entropy": 1.6916110416253407, "epoch": 0.7305484606300294, "grad_norm": 0.8342239260673523, "learning_rate": 1.5197714242223098e-05, "loss": 1.4201, "mean_token_accuracy": 0.6518561790386835, "num_tokens": 1117356587.0, "step": 6650 }, { "entropy": 1.6956707139809926, "epoch": 0.7306583175414023, "grad_norm": 0.6960749626159668, "learning_rate": 1.519629786970154e-05, "loss": 1.5202, "mean_token_accuracy": 0.6355159282684326, "num_tokens": 1117540824.0, "step": 6651 }, { "entropy": 1.6752854486306508, "epoch": 0.7307681744527753, "grad_norm": 0.6621298789978027, "learning_rate": 1.5194881364369048e-05, "loss": 1.198, "mean_token_accuracy": 0.6848858445882797, "num_tokens": 1117657941.0, "step": 6652 }, { "entropy": 1.7179746131102245, "epoch": 0.7308780313641482, "grad_norm": 0.6317608952522278, "learning_rate": 1.5193464726270448e-05, "loss": 1.3602, "mean_token_accuracy": 0.6555174241463343, "num_tokens": 1117821762.0, "step": 6653 }, { "entropy": 1.7452267309029896, "epoch": 0.7309878882755211, "grad_norm": 0.695980429649353, "learning_rate": 1.519204795545058e-05, "loss": 1.2723, "mean_token_accuracy": 0.670079380273819, "num_tokens": 1117938624.0, "step": 6654 }, { "entropy": 1.685364653666814, "epoch": 0.7310977451868941, "grad_norm": 0.6580285429954529, "learning_rate": 1.5190631051954285e-05, "loss": 1.4589, "mean_token_accuracy": 0.6547621041536331, "num_tokens": 1118134302.0, "step": 6655 }, { "entropy": 1.701217790444692, "epoch": 0.731207602098267, "grad_norm": 0.6681925058364868, "learning_rate": 1.5189214015826406e-05, "loss": 1.2259, "mean_token_accuracy": 0.6868906915187836, "num_tokens": 1118254275.0, "step": 6656 }, { "entropy": 1.7343595921993256, "epoch": 0.73131745900964, "grad_norm": 0.7051990628242493, "learning_rate": 1.5187796847111787e-05, "loss": 1.2701, "mean_token_accuracy": 0.6716774702072144, "num_tokens": 1118370267.0, "step": 6657 }, { "entropy": 1.7071207165718079, "epoch": 0.7314273159210128, "grad_norm": 0.6622605919837952, "learning_rate": 1.5186379545855287e-05, "loss": 1.4456, "mean_token_accuracy": 0.6554889182249705, "num_tokens": 1118535104.0, "step": 6658 }, { "entropy": 1.7222228546937306, "epoch": 0.7315371728323858, "grad_norm": 0.7932170033454895, "learning_rate": 1.5184962112101762e-05, "loss": 1.4314, "mean_token_accuracy": 0.6692963143189748, "num_tokens": 1118679063.0, "step": 6659 }, { "entropy": 1.6951739291350048, "epoch": 0.7316470297437587, "grad_norm": 0.7392410039901733, "learning_rate": 1.5183544545896067e-05, "loss": 1.4841, "mean_token_accuracy": 0.6538377950588862, "num_tokens": 1118849347.0, "step": 6660 }, { "entropy": 1.6979187230269115, "epoch": 0.7317568866551317, "grad_norm": 0.7129770517349243, "learning_rate": 1.5182126847283079e-05, "loss": 1.4111, "mean_token_accuracy": 0.6520341485738754, "num_tokens": 1119029143.0, "step": 6661 }, { "entropy": 1.738596349954605, "epoch": 0.7318667435665046, "grad_norm": 0.707251250743866, "learning_rate": 1.5180709016307657e-05, "loss": 1.3563, "mean_token_accuracy": 0.6612697641054789, "num_tokens": 1119159190.0, "step": 6662 }, { "entropy": 1.760772128899892, "epoch": 0.7319766004778776, "grad_norm": 0.9078652262687683, "learning_rate": 1.5179291053014678e-05, "loss": 1.5109, "mean_token_accuracy": 0.648401752114296, "num_tokens": 1119289035.0, "step": 6663 }, { "entropy": 1.6850820978482564, "epoch": 0.7320864573892505, "grad_norm": 0.6886321902275085, "learning_rate": 1.5177872957449022e-05, "loss": 1.2989, "mean_token_accuracy": 0.6829408456881841, "num_tokens": 1119453081.0, "step": 6664 }, { "entropy": 1.6729042033354442, "epoch": 0.7321963143006235, "grad_norm": 0.6785295605659485, "learning_rate": 1.517645472965557e-05, "loss": 1.4541, "mean_token_accuracy": 0.6468785454829534, "num_tokens": 1119599099.0, "step": 6665 }, { "entropy": 1.7227231860160828, "epoch": 0.7323061712119964, "grad_norm": 0.7047486305236816, "learning_rate": 1.5175036369679207e-05, "loss": 1.3855, "mean_token_accuracy": 0.6632230083147684, "num_tokens": 1119738114.0, "step": 6666 }, { "entropy": 1.6837018032868702, "epoch": 0.7324160281233693, "grad_norm": 0.6316218972206116, "learning_rate": 1.5173617877564824e-05, "loss": 1.4426, "mean_token_accuracy": 0.6650246977806091, "num_tokens": 1119931974.0, "step": 6667 }, { "entropy": 1.7190644443035126, "epoch": 0.7325258850347423, "grad_norm": 0.6559016108512878, "learning_rate": 1.5172199253357317e-05, "loss": 1.4886, "mean_token_accuracy": 0.647669846812884, "num_tokens": 1120121272.0, "step": 6668 }, { "entropy": 1.7384677827358246, "epoch": 0.7326357419461151, "grad_norm": 0.689016580581665, "learning_rate": 1.517078049710158e-05, "loss": 1.5965, "mean_token_accuracy": 0.6388440877199173, "num_tokens": 1120326088.0, "step": 6669 }, { "entropy": 1.7198711037635803, "epoch": 0.7327455988574881, "grad_norm": 0.7162541747093201, "learning_rate": 1.5169361608842526e-05, "loss": 1.4903, "mean_token_accuracy": 0.6306491643190384, "num_tokens": 1120527353.0, "step": 6670 }, { "entropy": 1.7130565146605174, "epoch": 0.732855455768861, "grad_norm": 0.7176745533943176, "learning_rate": 1.5167942588625051e-05, "loss": 1.4091, "mean_token_accuracy": 0.663971463839213, "num_tokens": 1120649569.0, "step": 6671 }, { "entropy": 1.691343088944753, "epoch": 0.732965312680234, "grad_norm": 0.7076248526573181, "learning_rate": 1.516652343649407e-05, "loss": 1.2405, "mean_token_accuracy": 0.684244821468989, "num_tokens": 1120793554.0, "step": 6672 }, { "entropy": 1.7118805746237438, "epoch": 0.7330751695916069, "grad_norm": 0.6533283591270447, "learning_rate": 1.51651041524945e-05, "loss": 1.3292, "mean_token_accuracy": 0.6786542683839798, "num_tokens": 1120953250.0, "step": 6673 }, { "entropy": 1.717886209487915, "epoch": 0.7331850265029799, "grad_norm": 0.6892806887626648, "learning_rate": 1.5163684736671268e-05, "loss": 1.4064, "mean_token_accuracy": 0.6795545766750971, "num_tokens": 1121130654.0, "step": 6674 }, { "entropy": 1.6847498218218486, "epoch": 0.7332948834143528, "grad_norm": 0.6819150447845459, "learning_rate": 1.516226518906928e-05, "loss": 1.4565, "mean_token_accuracy": 0.635857825477918, "num_tokens": 1121314947.0, "step": 6675 }, { "entropy": 1.7709390620390575, "epoch": 0.7334047403257258, "grad_norm": 0.7383188605308533, "learning_rate": 1.5160845509733481e-05, "loss": 1.4823, "mean_token_accuracy": 0.6510516554117203, "num_tokens": 1121457352.0, "step": 6676 }, { "entropy": 1.7243086993694305, "epoch": 0.7335145972370987, "grad_norm": 0.7374492287635803, "learning_rate": 1.5159425698708794e-05, "loss": 1.3823, "mean_token_accuracy": 0.6554542581240336, "num_tokens": 1121595888.0, "step": 6677 }, { "entropy": 1.7116446495056152, "epoch": 0.7336244541484717, "grad_norm": 0.7225131988525391, "learning_rate": 1.515800575604016e-05, "loss": 1.3005, "mean_token_accuracy": 0.6611469139655431, "num_tokens": 1121715635.0, "step": 6678 }, { "entropy": 1.7593301236629486, "epoch": 0.7337343110598445, "grad_norm": 0.6853554844856262, "learning_rate": 1.5156585681772513e-05, "loss": 1.4137, "mean_token_accuracy": 0.652967189749082, "num_tokens": 1121900551.0, "step": 6679 }, { "entropy": 1.6201580166816711, "epoch": 0.7338441679712175, "grad_norm": 0.5897448658943176, "learning_rate": 1.5155165475950808e-05, "loss": 1.3441, "mean_token_accuracy": 0.6597137997547785, "num_tokens": 1122074093.0, "step": 6680 }, { "entropy": 1.6828961670398712, "epoch": 0.7339540248825904, "grad_norm": 0.6277378797531128, "learning_rate": 1.5153745138619984e-05, "loss": 1.447, "mean_token_accuracy": 0.6535770297050476, "num_tokens": 1122262412.0, "step": 6681 }, { "entropy": 1.6912165582180023, "epoch": 0.7340638817939633, "grad_norm": 0.6900094747543335, "learning_rate": 1.5152324669825001e-05, "loss": 1.3992, "mean_token_accuracy": 0.6562529653310776, "num_tokens": 1122393860.0, "step": 6682 }, { "entropy": 1.7647963762283325, "epoch": 0.7341737387053363, "grad_norm": 0.7445904612541199, "learning_rate": 1.515090406961081e-05, "loss": 1.4102, "mean_token_accuracy": 0.6538730363051096, "num_tokens": 1122528985.0, "step": 6683 }, { "entropy": 1.6255736549695332, "epoch": 0.7342835956167092, "grad_norm": 0.7534268498420715, "learning_rate": 1.514948333802238e-05, "loss": 1.509, "mean_token_accuracy": 0.6601062913735708, "num_tokens": 1122731476.0, "step": 6684 }, { "entropy": 1.744109223286311, "epoch": 0.7343934525280822, "grad_norm": 0.6984800100326538, "learning_rate": 1.5148062475104667e-05, "loss": 1.3975, "mean_token_accuracy": 0.6600144853194555, "num_tokens": 1122910393.0, "step": 6685 }, { "entropy": 1.706167111794154, "epoch": 0.7345033094394551, "grad_norm": 0.7030271887779236, "learning_rate": 1.5146641480902648e-05, "loss": 1.3823, "mean_token_accuracy": 0.6593709588050842, "num_tokens": 1123088442.0, "step": 6686 }, { "entropy": 1.6892346441745758, "epoch": 0.7346131663508281, "grad_norm": 0.7129636406898499, "learning_rate": 1.5145220355461296e-05, "loss": 1.4505, "mean_token_accuracy": 0.6492991894483566, "num_tokens": 1123239118.0, "step": 6687 }, { "entropy": 1.6550804773966472, "epoch": 0.734723023262201, "grad_norm": 0.5693633556365967, "learning_rate": 1.5143799098825587e-05, "loss": 1.5521, "mean_token_accuracy": 0.634413423637549, "num_tokens": 1123485150.0, "step": 6688 }, { "entropy": 1.6616708040237427, "epoch": 0.734832880173574, "grad_norm": 0.5824199318885803, "learning_rate": 1.5142377711040503e-05, "loss": 1.3501, "mean_token_accuracy": 0.65452907482783, "num_tokens": 1123694935.0, "step": 6689 }, { "entropy": 1.6947866678237915, "epoch": 0.7349427370849468, "grad_norm": 0.6208266019821167, "learning_rate": 1.5140956192151031e-05, "loss": 1.581, "mean_token_accuracy": 0.6196437428394953, "num_tokens": 1123927009.0, "step": 6690 }, { "entropy": 1.6772996087869008, "epoch": 0.7350525939963198, "grad_norm": 0.6417631506919861, "learning_rate": 1.513953454220216e-05, "loss": 1.3748, "mean_token_accuracy": 0.6572922120491663, "num_tokens": 1124106818.0, "step": 6691 }, { "entropy": 1.721063772837321, "epoch": 0.7351624509076927, "grad_norm": 0.7408942580223083, "learning_rate": 1.513811276123889e-05, "loss": 1.4452, "mean_token_accuracy": 0.6464604238669077, "num_tokens": 1124260837.0, "step": 6692 }, { "entropy": 1.7039151688416798, "epoch": 0.7352723078190657, "grad_norm": 0.7227491736412048, "learning_rate": 1.5136690849306212e-05, "loss": 1.3876, "mean_token_accuracy": 0.6548017660776774, "num_tokens": 1124411691.0, "step": 6693 }, { "entropy": 1.6541140377521515, "epoch": 0.7353821647304386, "grad_norm": 0.5854305624961853, "learning_rate": 1.5135268806449135e-05, "loss": 1.4233, "mean_token_accuracy": 0.6561457067728043, "num_tokens": 1124624577.0, "step": 6694 }, { "entropy": 1.6717216869195302, "epoch": 0.7354920216418115, "grad_norm": 0.6732227206230164, "learning_rate": 1.5133846632712663e-05, "loss": 1.3833, "mean_token_accuracy": 0.6642757703860601, "num_tokens": 1124813586.0, "step": 6695 }, { "entropy": 1.6729457378387451, "epoch": 0.7356018785531845, "grad_norm": 0.7442759871482849, "learning_rate": 1.5132424328141809e-05, "loss": 1.3488, "mean_token_accuracy": 0.660913089911143, "num_tokens": 1124978782.0, "step": 6696 }, { "entropy": 1.6723896364370983, "epoch": 0.7357117354645574, "grad_norm": 0.663541853427887, "learning_rate": 1.5131001892781582e-05, "loss": 1.3012, "mean_token_accuracy": 0.6680503934621811, "num_tokens": 1125122428.0, "step": 6697 }, { "entropy": 1.746447930733363, "epoch": 0.7358215923759304, "grad_norm": 0.8296138048171997, "learning_rate": 1.5129579326677014e-05, "loss": 1.3793, "mean_token_accuracy": 0.6552826712528864, "num_tokens": 1125290284.0, "step": 6698 }, { "entropy": 1.7190218269824982, "epoch": 0.7359314492873033, "grad_norm": 0.7751715779304504, "learning_rate": 1.5128156629873119e-05, "loss": 1.406, "mean_token_accuracy": 0.6542918781439463, "num_tokens": 1125443248.0, "step": 6699 }, { "entropy": 1.7631977200508118, "epoch": 0.7360413061986762, "grad_norm": 0.6478453874588013, "learning_rate": 1.5126733802414923e-05, "loss": 1.3893, "mean_token_accuracy": 0.6554517845312754, "num_tokens": 1125635355.0, "step": 6700 }, { "entropy": 1.6814933717250824, "epoch": 0.7361511631100491, "grad_norm": 0.6561465859413147, "learning_rate": 1.5125310844347465e-05, "loss": 1.4587, "mean_token_accuracy": 0.6471677968899409, "num_tokens": 1125845108.0, "step": 6701 }, { "entropy": 1.5653251310189564, "epoch": 0.7362610200214221, "grad_norm": 0.704756498336792, "learning_rate": 1.5123887755715776e-05, "loss": 1.288, "mean_token_accuracy": 0.6854538271824518, "num_tokens": 1125981759.0, "step": 6702 }, { "entropy": 1.7314506371815999, "epoch": 0.736370876932795, "grad_norm": 0.724329948425293, "learning_rate": 1.5122464536564899e-05, "loss": 1.4032, "mean_token_accuracy": 0.6662786255280176, "num_tokens": 1126133016.0, "step": 6703 }, { "entropy": 1.6579938729604085, "epoch": 0.736480733844168, "grad_norm": 0.6255090236663818, "learning_rate": 1.5121041186939877e-05, "loss": 1.3151, "mean_token_accuracy": 0.6845894455909729, "num_tokens": 1126274029.0, "step": 6704 }, { "entropy": 1.7049907743930817, "epoch": 0.7365905907555409, "grad_norm": 0.6977643370628357, "learning_rate": 1.5119617706885759e-05, "loss": 1.455, "mean_token_accuracy": 0.6570224811633428, "num_tokens": 1126430889.0, "step": 6705 }, { "entropy": 1.6987931430339813, "epoch": 0.7367004476669139, "grad_norm": 0.5982023477554321, "learning_rate": 1.5118194096447595e-05, "loss": 1.5509, "mean_token_accuracy": 0.6404446264108022, "num_tokens": 1126622602.0, "step": 6706 }, { "entropy": 1.678564767042796, "epoch": 0.7368103045782868, "grad_norm": 0.7278105020523071, "learning_rate": 1.5116770355670443e-05, "loss": 1.2952, "mean_token_accuracy": 0.67676875491937, "num_tokens": 1126798370.0, "step": 6707 }, { "entropy": 1.6989206870396931, "epoch": 0.7369201614896597, "grad_norm": 0.6880453824996948, "learning_rate": 1.5115346484599369e-05, "loss": 1.382, "mean_token_accuracy": 0.6712295562028885, "num_tokens": 1126918091.0, "step": 6708 }, { "entropy": 1.7392151554425557, "epoch": 0.7370300184010327, "grad_norm": 1.0246655941009521, "learning_rate": 1.5113922483279428e-05, "loss": 1.6634, "mean_token_accuracy": 0.6349669992923737, "num_tokens": 1127107995.0, "step": 6709 }, { "entropy": 1.707019825776418, "epoch": 0.7371398753124055, "grad_norm": 0.5890080332756042, "learning_rate": 1.5112498351755698e-05, "loss": 1.4471, "mean_token_accuracy": 0.6488803972800573, "num_tokens": 1127258053.0, "step": 6710 }, { "entropy": 1.7253030637900035, "epoch": 0.7372497322237785, "grad_norm": 0.8130516409873962, "learning_rate": 1.5111074090073245e-05, "loss": 1.3519, "mean_token_accuracy": 0.6647070497274399, "num_tokens": 1127396234.0, "step": 6711 }, { "entropy": 1.673587401707967, "epoch": 0.7373595891351514, "grad_norm": 0.7452590465545654, "learning_rate": 1.5109649698277154e-05, "loss": 1.3056, "mean_token_accuracy": 0.6725350320339203, "num_tokens": 1127561050.0, "step": 6712 }, { "entropy": 1.679761916399002, "epoch": 0.7374694460465244, "grad_norm": 0.6038820743560791, "learning_rate": 1.5108225176412494e-05, "loss": 1.3374, "mean_token_accuracy": 0.6583557625611623, "num_tokens": 1127773253.0, "step": 6713 }, { "entropy": 1.717817982037862, "epoch": 0.7375793029578973, "grad_norm": 0.7298420667648315, "learning_rate": 1.5106800524524367e-05, "loss": 1.2114, "mean_token_accuracy": 0.685623566309611, "num_tokens": 1127909193.0, "step": 6714 }, { "entropy": 1.695088545481364, "epoch": 0.7376891598692703, "grad_norm": 0.6675203442573547, "learning_rate": 1.510537574265785e-05, "loss": 1.3353, "mean_token_accuracy": 0.6633873581886292, "num_tokens": 1128068140.0, "step": 6715 }, { "entropy": 1.734484702348709, "epoch": 0.7377990167806432, "grad_norm": 0.7346919178962708, "learning_rate": 1.5103950830858041e-05, "loss": 1.3236, "mean_token_accuracy": 0.6552731692790985, "num_tokens": 1128212239.0, "step": 6716 }, { "entropy": 1.7688794334729512, "epoch": 0.7379088736920162, "grad_norm": 0.6720606684684753, "learning_rate": 1.5102525789170038e-05, "loss": 1.5391, "mean_token_accuracy": 0.6279580891132355, "num_tokens": 1128447592.0, "step": 6717 }, { "entropy": 1.6898958086967468, "epoch": 0.7380187306033891, "grad_norm": 0.6676945090293884, "learning_rate": 1.5101100617638943e-05, "loss": 1.4134, "mean_token_accuracy": 0.6584409524997076, "num_tokens": 1128590607.0, "step": 6718 }, { "entropy": 1.6703394452730815, "epoch": 0.7381285875147621, "grad_norm": 0.6979921460151672, "learning_rate": 1.5099675316309857e-05, "loss": 1.3771, "mean_token_accuracy": 0.6564718584219614, "num_tokens": 1128763833.0, "step": 6719 }, { "entropy": 1.6774761080741882, "epoch": 0.738238444426135, "grad_norm": 0.6202614903450012, "learning_rate": 1.50982498852279e-05, "loss": 1.4414, "mean_token_accuracy": 0.6443298210700353, "num_tokens": 1128979972.0, "step": 6720 }, { "entropy": 1.6933607856432598, "epoch": 0.738348301337508, "grad_norm": 0.6849549412727356, "learning_rate": 1.5096824324438178e-05, "loss": 1.3212, "mean_token_accuracy": 0.6674212664365768, "num_tokens": 1129105832.0, "step": 6721 }, { "entropy": 1.6850066979726155, "epoch": 0.7384581582488808, "grad_norm": 0.6466884613037109, "learning_rate": 1.5095398633985812e-05, "loss": 1.3967, "mean_token_accuracy": 0.6724070111910502, "num_tokens": 1129257945.0, "step": 6722 }, { "entropy": 1.751002699136734, "epoch": 0.7385680151602537, "grad_norm": 0.8040537238121033, "learning_rate": 1.5093972813915927e-05, "loss": 1.3518, "mean_token_accuracy": 0.6617087076107661, "num_tokens": 1129373817.0, "step": 6723 }, { "entropy": 1.7356711030006409, "epoch": 0.7386778720716267, "grad_norm": 0.7240248918533325, "learning_rate": 1.5092546864273648e-05, "loss": 1.268, "mean_token_accuracy": 0.6819742123285929, "num_tokens": 1129493491.0, "step": 6724 }, { "entropy": 1.7143625020980835, "epoch": 0.7387877289829996, "grad_norm": 0.6687737107276917, "learning_rate": 1.50911207851041e-05, "loss": 1.3479, "mean_token_accuracy": 0.655364657441775, "num_tokens": 1129641923.0, "step": 6725 }, { "entropy": 1.776674618323644, "epoch": 0.7388975858943726, "grad_norm": 0.7817396521568298, "learning_rate": 1.5089694576452425e-05, "loss": 1.3725, "mean_token_accuracy": 0.6603845258553823, "num_tokens": 1129787182.0, "step": 6726 }, { "entropy": 1.681551843881607, "epoch": 0.7390074428057455, "grad_norm": 0.643803596496582, "learning_rate": 1.5088268238363762e-05, "loss": 1.358, "mean_token_accuracy": 0.6494590491056442, "num_tokens": 1129974180.0, "step": 6727 }, { "entropy": 1.7054549753665924, "epoch": 0.7391172997171185, "grad_norm": 0.7394840121269226, "learning_rate": 1.5086841770883249e-05, "loss": 1.479, "mean_token_accuracy": 0.6421494533618292, "num_tokens": 1130137129.0, "step": 6728 }, { "entropy": 1.661314348379771, "epoch": 0.7392271566284914, "grad_norm": 0.8410232663154602, "learning_rate": 1.5085415174056035e-05, "loss": 1.2465, "mean_token_accuracy": 0.6789174030224482, "num_tokens": 1130250429.0, "step": 6729 }, { "entropy": 1.718008428812027, "epoch": 0.7393370135398644, "grad_norm": 0.6935721039772034, "learning_rate": 1.5083988447927276e-05, "loss": 1.2534, "mean_token_accuracy": 0.6772228926420212, "num_tokens": 1130373436.0, "step": 6730 }, { "entropy": 1.6716387967268627, "epoch": 0.7394468704512372, "grad_norm": 0.7610313892364502, "learning_rate": 1.5082561592542115e-05, "loss": 1.2726, "mean_token_accuracy": 0.674076090256373, "num_tokens": 1130495850.0, "step": 6731 }, { "entropy": 1.725304255882899, "epoch": 0.7395567273626102, "grad_norm": 0.8041244149208069, "learning_rate": 1.5081134607945726e-05, "loss": 1.274, "mean_token_accuracy": 0.6769175430138906, "num_tokens": 1130644766.0, "step": 6732 }, { "entropy": 1.7322080036004384, "epoch": 0.7396665842739831, "grad_norm": 0.6417209506034851, "learning_rate": 1.5079707494183265e-05, "loss": 1.3872, "mean_token_accuracy": 0.6669703970352808, "num_tokens": 1130810397.0, "step": 6733 }, { "entropy": 1.6604767839113872, "epoch": 0.7397764411853561, "grad_norm": 0.6830531358718872, "learning_rate": 1.5078280251299898e-05, "loss": 1.3999, "mean_token_accuracy": 0.657826155424118, "num_tokens": 1130962006.0, "step": 6734 }, { "entropy": 1.7110504806041718, "epoch": 0.739886298096729, "grad_norm": 0.7035920023918152, "learning_rate": 1.5076852879340798e-05, "loss": 1.3654, "mean_token_accuracy": 0.6586341708898544, "num_tokens": 1131090714.0, "step": 6735 }, { "entropy": 1.6222879389921825, "epoch": 0.7399961550081019, "grad_norm": 0.7651909589767456, "learning_rate": 1.5075425378351143e-05, "loss": 1.4796, "mean_token_accuracy": 0.6476244777441025, "num_tokens": 1131286901.0, "step": 6736 }, { "entropy": 1.6963482002417247, "epoch": 0.7401060119194749, "grad_norm": 0.6514533162117004, "learning_rate": 1.507399774837611e-05, "loss": 1.4149, "mean_token_accuracy": 0.6608110070228577, "num_tokens": 1131442624.0, "step": 6737 }, { "entropy": 1.6943640112876892, "epoch": 0.7402158688308478, "grad_norm": 0.7217374444007874, "learning_rate": 1.5072569989460887e-05, "loss": 1.4165, "mean_token_accuracy": 0.664705902338028, "num_tokens": 1131640297.0, "step": 6738 }, { "entropy": 1.7367797791957855, "epoch": 0.7403257257422208, "grad_norm": 0.6871684789657593, "learning_rate": 1.5071142101650657e-05, "loss": 1.4446, "mean_token_accuracy": 0.6475637157758077, "num_tokens": 1131801764.0, "step": 6739 }, { "entropy": 1.6569513181845348, "epoch": 0.7404355826535937, "grad_norm": 0.6715342998504639, "learning_rate": 1.5069714084990614e-05, "loss": 1.2538, "mean_token_accuracy": 0.6771769026915232, "num_tokens": 1131961068.0, "step": 6740 }, { "entropy": 1.648532897233963, "epoch": 0.7405454395649667, "grad_norm": 0.724288821220398, "learning_rate": 1.5068285939525953e-05, "loss": 1.3426, "mean_token_accuracy": 0.6620112607876459, "num_tokens": 1132131553.0, "step": 6741 }, { "entropy": 1.7068750858306885, "epoch": 0.7406552964763395, "grad_norm": 0.7151614427566528, "learning_rate": 1.506685766530188e-05, "loss": 1.328, "mean_token_accuracy": 0.6632406115531921, "num_tokens": 1132259340.0, "step": 6742 }, { "entropy": 1.713490217924118, "epoch": 0.7407651533877125, "grad_norm": 0.7277394533157349, "learning_rate": 1.506542926236359e-05, "loss": 1.3877, "mean_token_accuracy": 0.6614874800046285, "num_tokens": 1132391138.0, "step": 6743 }, { "entropy": 1.700178434451421, "epoch": 0.7408750102990854, "grad_norm": 0.6973790526390076, "learning_rate": 1.5064000730756295e-05, "loss": 1.4149, "mean_token_accuracy": 0.6597117880980173, "num_tokens": 1132531234.0, "step": 6744 }, { "entropy": 1.6388778189818065, "epoch": 0.7409848672104584, "grad_norm": 0.6643558144569397, "learning_rate": 1.5062572070525207e-05, "loss": 1.4025, "mean_token_accuracy": 0.6725161075592041, "num_tokens": 1132734802.0, "step": 6745 }, { "entropy": 1.7185613016287486, "epoch": 0.7410947241218313, "grad_norm": 0.8322924971580505, "learning_rate": 1.5061143281715552e-05, "loss": 1.4067, "mean_token_accuracy": 0.6537605971097946, "num_tokens": 1132895353.0, "step": 6746 }, { "entropy": 1.7120015025138855, "epoch": 0.7412045810332043, "grad_norm": 0.725736677646637, "learning_rate": 1.5059714364372531e-05, "loss": 1.4964, "mean_token_accuracy": 0.6400942405064901, "num_tokens": 1133050277.0, "step": 6747 }, { "entropy": 1.677657534678777, "epoch": 0.7413144379445772, "grad_norm": 0.7027127742767334, "learning_rate": 1.5058285318541389e-05, "loss": 1.5079, "mean_token_accuracy": 0.6412277817726135, "num_tokens": 1133231985.0, "step": 6748 }, { "entropy": 1.6989895105361938, "epoch": 0.7414242948559501, "grad_norm": 0.6422317028045654, "learning_rate": 1.505685614426734e-05, "loss": 1.398, "mean_token_accuracy": 0.6447745362917582, "num_tokens": 1133401721.0, "step": 6749 }, { "entropy": 1.7608485122521718, "epoch": 0.7415341517673231, "grad_norm": 0.6693912744522095, "learning_rate": 1.5055426841595624e-05, "loss": 1.3246, "mean_token_accuracy": 0.6594583491484324, "num_tokens": 1133512280.0, "step": 6750 }, { "entropy": 1.6934813757737477, "epoch": 0.741644008678696, "grad_norm": 0.692389965057373, "learning_rate": 1.5053997410571474e-05, "loss": 1.6025, "mean_token_accuracy": 0.6274192283550898, "num_tokens": 1133772590.0, "step": 6751 }, { "entropy": 1.7201940218607585, "epoch": 0.741753865590069, "grad_norm": 0.700623095035553, "learning_rate": 1.5052567851240138e-05, "loss": 1.3729, "mean_token_accuracy": 0.6719879905382792, "num_tokens": 1133893711.0, "step": 6752 }, { "entropy": 1.7405705253283184, "epoch": 0.7418637225014418, "grad_norm": 0.7587204575538635, "learning_rate": 1.5051138163646848e-05, "loss": 1.5049, "mean_token_accuracy": 0.6506867110729218, "num_tokens": 1134043053.0, "step": 6753 }, { "entropy": 1.7238063216209412, "epoch": 0.7419735794128148, "grad_norm": 0.6716615557670593, "learning_rate": 1.5049708347836866e-05, "loss": 1.6108, "mean_token_accuracy": 0.6179195394118627, "num_tokens": 1134248453.0, "step": 6754 }, { "entropy": 1.6737729807694752, "epoch": 0.7420834363241877, "grad_norm": 0.7254316210746765, "learning_rate": 1.5048278403855439e-05, "loss": 1.3895, "mean_token_accuracy": 0.6687343964974085, "num_tokens": 1134417667.0, "step": 6755 }, { "entropy": 1.6903795301914215, "epoch": 0.7421932932355607, "grad_norm": 0.6885725855827332, "learning_rate": 1.5046848331747822e-05, "loss": 1.324, "mean_token_accuracy": 0.6620573401451111, "num_tokens": 1134596695.0, "step": 6756 }, { "entropy": 1.6956494649251301, "epoch": 0.7423031501469336, "grad_norm": 0.7012706398963928, "learning_rate": 1.5045418131559281e-05, "loss": 1.3519, "mean_token_accuracy": 0.6658426324526469, "num_tokens": 1134747187.0, "step": 6757 }, { "entropy": 1.7318655947844188, "epoch": 0.7424130070583066, "grad_norm": 0.8121592402458191, "learning_rate": 1.5043987803335081e-05, "loss": 1.4545, "mean_token_accuracy": 0.6543787519137064, "num_tokens": 1134884449.0, "step": 6758 }, { "entropy": 1.7426222761472066, "epoch": 0.7425228639696795, "grad_norm": 0.7535271644592285, "learning_rate": 1.5042557347120486e-05, "loss": 1.3212, "mean_token_accuracy": 0.6727963835000992, "num_tokens": 1135027665.0, "step": 6759 }, { "entropy": 1.6979150076707203, "epoch": 0.7426327208810525, "grad_norm": 0.8373980522155762, "learning_rate": 1.5041126762960774e-05, "loss": 1.3267, "mean_token_accuracy": 0.6588234305381775, "num_tokens": 1135172577.0, "step": 6760 }, { "entropy": 1.700280745824178, "epoch": 0.7427425777924254, "grad_norm": 0.7200369834899902, "learning_rate": 1.503969605090122e-05, "loss": 1.3094, "mean_token_accuracy": 0.6659737030665079, "num_tokens": 1135299828.0, "step": 6761 }, { "entropy": 1.7104643682638805, "epoch": 0.7428524347037982, "grad_norm": 0.8385793566703796, "learning_rate": 1.5038265210987109e-05, "loss": 1.3116, "mean_token_accuracy": 0.677546814084053, "num_tokens": 1135459026.0, "step": 6762 }, { "entropy": 1.6308595538139343, "epoch": 0.7429622916151712, "grad_norm": 0.6230477690696716, "learning_rate": 1.5036834243263718e-05, "loss": 1.4281, "mean_token_accuracy": 0.6566774696111679, "num_tokens": 1135670612.0, "step": 6763 }, { "entropy": 1.709012786547343, "epoch": 0.7430721485265441, "grad_norm": 0.9405829906463623, "learning_rate": 1.5035403147776348e-05, "loss": 1.3462, "mean_token_accuracy": 0.662533774971962, "num_tokens": 1135792652.0, "step": 6764 }, { "entropy": 1.6998238166173298, "epoch": 0.7431820054379171, "grad_norm": 0.7661788463592529, "learning_rate": 1.5033971924570283e-05, "loss": 1.3654, "mean_token_accuracy": 0.6749661912520727, "num_tokens": 1135970182.0, "step": 6765 }, { "entropy": 1.6655668715635936, "epoch": 0.74329186234929, "grad_norm": 0.7080719470977783, "learning_rate": 1.5032540573690828e-05, "loss": 1.4305, "mean_token_accuracy": 0.651120533545812, "num_tokens": 1136142615.0, "step": 6766 }, { "entropy": 1.7483255763848622, "epoch": 0.743401719260663, "grad_norm": 0.7917311191558838, "learning_rate": 1.5031109095183278e-05, "loss": 1.5119, "mean_token_accuracy": 0.6534301191568375, "num_tokens": 1136350269.0, "step": 6767 }, { "entropy": 1.633179912964503, "epoch": 0.7435115761720359, "grad_norm": 0.5961988568305969, "learning_rate": 1.5029677489092944e-05, "loss": 1.3725, "mean_token_accuracy": 0.6645344644784927, "num_tokens": 1136531760.0, "step": 6768 }, { "entropy": 1.7083572447299957, "epoch": 0.7436214330834089, "grad_norm": 0.7430605292320251, "learning_rate": 1.5028245755465129e-05, "loss": 1.3712, "mean_token_accuracy": 0.6828558494647344, "num_tokens": 1136674495.0, "step": 6769 }, { "entropy": 1.6955423951148987, "epoch": 0.7437312899947818, "grad_norm": 0.6523454785346985, "learning_rate": 1.5026813894345159e-05, "loss": 1.4493, "mean_token_accuracy": 0.6469675749540329, "num_tokens": 1136869151.0, "step": 6770 }, { "entropy": 1.7560264070828755, "epoch": 0.7438411469061548, "grad_norm": 0.6248143911361694, "learning_rate": 1.5025381905778336e-05, "loss": 1.392, "mean_token_accuracy": 0.6438594659169515, "num_tokens": 1137053577.0, "step": 6771 }, { "entropy": 1.7154277463754017, "epoch": 0.7439510038175277, "grad_norm": 0.6773508191108704, "learning_rate": 1.5023949789809991e-05, "loss": 1.3957, "mean_token_accuracy": 0.6574154595534006, "num_tokens": 1137208972.0, "step": 6772 }, { "entropy": 1.6735303401947021, "epoch": 0.7440608607289007, "grad_norm": 0.7054452896118164, "learning_rate": 1.5022517546485451e-05, "loss": 1.2731, "mean_token_accuracy": 0.6664699663718542, "num_tokens": 1137320522.0, "step": 6773 }, { "entropy": 1.7122906744480133, "epoch": 0.7441707176402735, "grad_norm": 0.6373655796051025, "learning_rate": 1.502108517585004e-05, "loss": 1.4198, "mean_token_accuracy": 0.6484536776940028, "num_tokens": 1137490263.0, "step": 6774 }, { "entropy": 1.6470833718776703, "epoch": 0.7442805745516465, "grad_norm": 0.6568727493286133, "learning_rate": 1.50196526779491e-05, "loss": 1.441, "mean_token_accuracy": 0.647697259982427, "num_tokens": 1137726786.0, "step": 6775 }, { "entropy": 1.6590462823708851, "epoch": 0.7443904314630194, "grad_norm": 0.8282439112663269, "learning_rate": 1.501822005282796e-05, "loss": 1.3672, "mean_token_accuracy": 0.6522834698359171, "num_tokens": 1137919330.0, "step": 6776 }, { "entropy": 1.6876604358355205, "epoch": 0.7445002883743923, "grad_norm": 0.6368933320045471, "learning_rate": 1.5016787300531965e-05, "loss": 1.3694, "mean_token_accuracy": 0.6590426663557688, "num_tokens": 1138061019.0, "step": 6777 }, { "entropy": 1.6871869663397472, "epoch": 0.7446101452857653, "grad_norm": 0.6821638345718384, "learning_rate": 1.5015354421106464e-05, "loss": 1.5103, "mean_token_accuracy": 0.6524971077839533, "num_tokens": 1138230811.0, "step": 6778 }, { "entropy": 1.6363123655319214, "epoch": 0.7447200021971382, "grad_norm": 0.6159114837646484, "learning_rate": 1.5013921414596806e-05, "loss": 1.3712, "mean_token_accuracy": 0.6576385100682577, "num_tokens": 1138422996.0, "step": 6779 }, { "entropy": 1.6642758349577587, "epoch": 0.7448298591085112, "grad_norm": 0.6397916674613953, "learning_rate": 1.5012488281048344e-05, "loss": 1.3105, "mean_token_accuracy": 0.6616799881060919, "num_tokens": 1138559976.0, "step": 6780 }, { "entropy": 1.6806075970331829, "epoch": 0.7449397160198841, "grad_norm": 0.6554465889930725, "learning_rate": 1.5011055020506432e-05, "loss": 1.3143, "mean_token_accuracy": 0.6696526308854421, "num_tokens": 1138716321.0, "step": 6781 }, { "entropy": 1.7737302879492443, "epoch": 0.7450495729312571, "grad_norm": 0.7125297784805298, "learning_rate": 1.500962163301644e-05, "loss": 1.3339, "mean_token_accuracy": 0.6506142367919286, "num_tokens": 1138844468.0, "step": 6782 }, { "entropy": 1.7082662880420685, "epoch": 0.74515942984263, "grad_norm": 0.690348744392395, "learning_rate": 1.500818811862373e-05, "loss": 1.5161, "mean_token_accuracy": 0.643420398235321, "num_tokens": 1138993642.0, "step": 6783 }, { "entropy": 1.7463260293006897, "epoch": 0.745269286754003, "grad_norm": 1.0192396640777588, "learning_rate": 1.500675447737367e-05, "loss": 1.5818, "mean_token_accuracy": 0.6677199751138687, "num_tokens": 1139162652.0, "step": 6784 }, { "entropy": 1.6664335330327351, "epoch": 0.7453791436653758, "grad_norm": 0.6164513826370239, "learning_rate": 1.5005320709311638e-05, "loss": 1.4417, "mean_token_accuracy": 0.6483336140712103, "num_tokens": 1139341551.0, "step": 6785 }, { "entropy": 1.7031769156455994, "epoch": 0.7454890005767488, "grad_norm": 0.8442233204841614, "learning_rate": 1.5003886814483011e-05, "loss": 1.302, "mean_token_accuracy": 0.6629199633995692, "num_tokens": 1139484565.0, "step": 6786 }, { "entropy": 1.7150315344333649, "epoch": 0.7455988574881217, "grad_norm": 0.7291525602340698, "learning_rate": 1.5002452792933166e-05, "loss": 1.3973, "mean_token_accuracy": 0.6579045653343201, "num_tokens": 1139653727.0, "step": 6787 }, { "entropy": 1.6477097769578297, "epoch": 0.7457087143994947, "grad_norm": 0.6176000833511353, "learning_rate": 1.50010186447075e-05, "loss": 1.2534, "mean_token_accuracy": 0.6797188719113668, "num_tokens": 1139808285.0, "step": 6788 }, { "entropy": 1.6606711745262146, "epoch": 0.7458185713108676, "grad_norm": 0.705842912197113, "learning_rate": 1.4999584369851392e-05, "loss": 1.4349, "mean_token_accuracy": 0.6436782528956732, "num_tokens": 1140008061.0, "step": 6789 }, { "entropy": 1.7517486015955608, "epoch": 0.7459284282222405, "grad_norm": 0.625238835811615, "learning_rate": 1.4998149968410243e-05, "loss": 1.4634, "mean_token_accuracy": 0.625215545296669, "num_tokens": 1140198927.0, "step": 6790 }, { "entropy": 1.7464244266351063, "epoch": 0.7460382851336135, "grad_norm": 0.7666485905647278, "learning_rate": 1.4996715440429447e-05, "loss": 1.4979, "mean_token_accuracy": 0.6369941085577011, "num_tokens": 1140359162.0, "step": 6791 }, { "entropy": 1.667342593272527, "epoch": 0.7461481420449864, "grad_norm": 0.6632201671600342, "learning_rate": 1.4995280785954413e-05, "loss": 1.3564, "mean_token_accuracy": 0.6578471561272939, "num_tokens": 1140549770.0, "step": 6792 }, { "entropy": 1.7453742424647014, "epoch": 0.7462579989563594, "grad_norm": 0.7090116739273071, "learning_rate": 1.4993846005030537e-05, "loss": 1.4562, "mean_token_accuracy": 0.653822178641955, "num_tokens": 1140699614.0, "step": 6793 }, { "entropy": 1.7000041206677754, "epoch": 0.7463678558677322, "grad_norm": 0.7104377150535583, "learning_rate": 1.4992411097703237e-05, "loss": 1.3719, "mean_token_accuracy": 0.6573426475127538, "num_tokens": 1140867840.0, "step": 6794 }, { "entropy": 1.6723633507887523, "epoch": 0.7464777127791052, "grad_norm": 0.7225411534309387, "learning_rate": 1.4990976064017925e-05, "loss": 1.2767, "mean_token_accuracy": 0.6692603131135305, "num_tokens": 1140998012.0, "step": 6795 }, { "entropy": 1.6979938050111134, "epoch": 0.7465875696904781, "grad_norm": 0.6646689176559448, "learning_rate": 1.4989540904020018e-05, "loss": 1.2908, "mean_token_accuracy": 0.6694482167561849, "num_tokens": 1141117803.0, "step": 6796 }, { "entropy": 1.68376824259758, "epoch": 0.7466974266018511, "grad_norm": 0.6983800530433655, "learning_rate": 1.4988105617754942e-05, "loss": 1.5562, "mean_token_accuracy": 0.6386436770359675, "num_tokens": 1141368730.0, "step": 6797 }, { "entropy": 1.6934349636236827, "epoch": 0.746807283513224, "grad_norm": 0.9968954920768738, "learning_rate": 1.498667020526812e-05, "loss": 1.4534, "mean_token_accuracy": 0.6503161440292994, "num_tokens": 1141576192.0, "step": 6798 }, { "entropy": 1.7412570118904114, "epoch": 0.746917140424597, "grad_norm": 0.5900823473930359, "learning_rate": 1.4985234666604978e-05, "loss": 1.5797, "mean_token_accuracy": 0.6377448340257009, "num_tokens": 1141774836.0, "step": 6799 }, { "entropy": 1.659286359945933, "epoch": 0.7470269973359699, "grad_norm": 0.6186773180961609, "learning_rate": 1.4983799001810957e-05, "loss": 1.39, "mean_token_accuracy": 0.6724599103132883, "num_tokens": 1141944431.0, "step": 6800 }, { "entropy": 1.6813781360785167, "epoch": 0.7471368542473429, "grad_norm": 0.7697616219520569, "learning_rate": 1.4982363210931495e-05, "loss": 1.3546, "mean_token_accuracy": 0.6622022340695063, "num_tokens": 1142128048.0, "step": 6801 }, { "entropy": 1.7053708632787068, "epoch": 0.7472467111587158, "grad_norm": 0.625506579875946, "learning_rate": 1.498092729401203e-05, "loss": 1.3282, "mean_token_accuracy": 0.6647703299919764, "num_tokens": 1142326201.0, "step": 6802 }, { "entropy": 1.668170581261317, "epoch": 0.7473565680700887, "grad_norm": 0.6538956761360168, "learning_rate": 1.4979491251098008e-05, "loss": 1.3052, "mean_token_accuracy": 0.6595268547534943, "num_tokens": 1142502759.0, "step": 6803 }, { "entropy": 1.6516262590885162, "epoch": 0.7474664249814617, "grad_norm": 0.5898981690406799, "learning_rate": 1.4978055082234883e-05, "loss": 1.3253, "mean_token_accuracy": 0.6630469312270483, "num_tokens": 1142677014.0, "step": 6804 }, { "entropy": 1.7548083265622456, "epoch": 0.7475762818928345, "grad_norm": 0.7785349488258362, "learning_rate": 1.4976618787468109e-05, "loss": 1.3407, "mean_token_accuracy": 0.6611214627822241, "num_tokens": 1142821725.0, "step": 6805 }, { "entropy": 1.6554772853851318, "epoch": 0.7476861388042075, "grad_norm": 0.6662288308143616, "learning_rate": 1.497518236684314e-05, "loss": 1.4193, "mean_token_accuracy": 0.6535109380880991, "num_tokens": 1143014793.0, "step": 6806 }, { "entropy": 1.6847680111726124, "epoch": 0.7477959957155804, "grad_norm": 0.7195385098457336, "learning_rate": 1.4973745820405442e-05, "loss": 1.3784, "mean_token_accuracy": 0.6672850747903188, "num_tokens": 1143166310.0, "step": 6807 }, { "entropy": 1.7305493354797363, "epoch": 0.7479058526269534, "grad_norm": 0.7656927704811096, "learning_rate": 1.497230914820048e-05, "loss": 1.4317, "mean_token_accuracy": 0.656554693977038, "num_tokens": 1143316576.0, "step": 6808 }, { "entropy": 1.6923631529013317, "epoch": 0.7480157095383263, "grad_norm": 0.7529177069664001, "learning_rate": 1.4970872350273717e-05, "loss": 1.1828, "mean_token_accuracy": 0.688283234834671, "num_tokens": 1143455530.0, "step": 6809 }, { "entropy": 1.7461569805939992, "epoch": 0.7481255664496993, "grad_norm": 0.5634061694145203, "learning_rate": 1.496943542667064e-05, "loss": 1.3979, "mean_token_accuracy": 0.6512090861797333, "num_tokens": 1143626650.0, "step": 6810 }, { "entropy": 1.6781017482280731, "epoch": 0.7482354233610722, "grad_norm": 0.7326881885528564, "learning_rate": 1.4967998377436717e-05, "loss": 1.4369, "mean_token_accuracy": 0.6579112311204275, "num_tokens": 1143767777.0, "step": 6811 }, { "entropy": 1.6676452855269115, "epoch": 0.7483452802724452, "grad_norm": 0.8091786503791809, "learning_rate": 1.4966561202617435e-05, "loss": 1.4482, "mean_token_accuracy": 0.6634651124477386, "num_tokens": 1143955831.0, "step": 6812 }, { "entropy": 1.7290521661440532, "epoch": 0.7484551371838181, "grad_norm": 0.6584422588348389, "learning_rate": 1.4965123902258279e-05, "loss": 1.3347, "mean_token_accuracy": 0.6581239700317383, "num_tokens": 1144071834.0, "step": 6813 }, { "entropy": 1.6535277366638184, "epoch": 0.7485649940951911, "grad_norm": 0.6501061916351318, "learning_rate": 1.4963686476404737e-05, "loss": 1.4189, "mean_token_accuracy": 0.6524655272563299, "num_tokens": 1144258749.0, "step": 6814 }, { "entropy": 1.6799985071023305, "epoch": 0.748674851006564, "grad_norm": 0.8732005953788757, "learning_rate": 1.4962248925102305e-05, "loss": 1.4393, "mean_token_accuracy": 0.6538581599791845, "num_tokens": 1144394205.0, "step": 6815 }, { "entropy": 1.7935606241226196, "epoch": 0.7487847079179369, "grad_norm": 0.7209190726280212, "learning_rate": 1.496081124839648e-05, "loss": 1.4205, "mean_token_accuracy": 0.6454559167226156, "num_tokens": 1144522534.0, "step": 6816 }, { "entropy": 1.6710129082202911, "epoch": 0.7488945648293098, "grad_norm": 0.5794231295585632, "learning_rate": 1.4959373446332762e-05, "loss": 1.3592, "mean_token_accuracy": 0.6566446522871653, "num_tokens": 1144710283.0, "step": 6817 }, { "entropy": 1.6727528870105743, "epoch": 0.7490044217406827, "grad_norm": 1.6803122758865356, "learning_rate": 1.4957935518956658e-05, "loss": 1.3172, "mean_token_accuracy": 0.6655105352401733, "num_tokens": 1144865494.0, "step": 6818 }, { "entropy": 1.6749801139036815, "epoch": 0.7491142786520557, "grad_norm": 0.6018057465553284, "learning_rate": 1.4956497466313682e-05, "loss": 1.3718, "mean_token_accuracy": 0.6562728782494863, "num_tokens": 1145060596.0, "step": 6819 }, { "entropy": 1.6236327687899272, "epoch": 0.7492241355634286, "grad_norm": 0.7080875635147095, "learning_rate": 1.4955059288449343e-05, "loss": 1.4342, "mean_token_accuracy": 0.6543013006448746, "num_tokens": 1145299929.0, "step": 6820 }, { "entropy": 1.7417059342066448, "epoch": 0.7493339924748016, "grad_norm": 0.6856288313865662, "learning_rate": 1.4953620985409156e-05, "loss": 1.4524, "mean_token_accuracy": 0.6499587247769038, "num_tokens": 1145473231.0, "step": 6821 }, { "entropy": 1.7538205881913502, "epoch": 0.7494438493861745, "grad_norm": 0.6540340781211853, "learning_rate": 1.495218255723865e-05, "loss": 1.4127, "mean_token_accuracy": 0.6443704416354498, "num_tokens": 1145635824.0, "step": 6822 }, { "entropy": 1.8043759365876515, "epoch": 0.7495537062975475, "grad_norm": 0.7414424419403076, "learning_rate": 1.4950744003983346e-05, "loss": 1.2803, "mean_token_accuracy": 0.6785912364721298, "num_tokens": 1145771367.0, "step": 6823 }, { "entropy": 1.6939500371615093, "epoch": 0.7496635632089204, "grad_norm": 0.7015360593795776, "learning_rate": 1.4949305325688776e-05, "loss": 1.2973, "mean_token_accuracy": 0.6717601070801417, "num_tokens": 1145898356.0, "step": 6824 }, { "entropy": 1.719683289527893, "epoch": 0.7497734201202934, "grad_norm": 0.7223207950592041, "learning_rate": 1.4947866522400469e-05, "loss": 1.3485, "mean_token_accuracy": 0.6610392481088638, "num_tokens": 1146043519.0, "step": 6825 }, { "entropy": 1.726772169272105, "epoch": 0.7498832770316662, "grad_norm": 0.7367146611213684, "learning_rate": 1.494642759416397e-05, "loss": 1.3865, "mean_token_accuracy": 0.6567636330922445, "num_tokens": 1146210946.0, "step": 6826 }, { "entropy": 1.6836354335149128, "epoch": 0.7499931339430392, "grad_norm": 0.6234486103057861, "learning_rate": 1.494498854102481e-05, "loss": 1.4224, "mean_token_accuracy": 0.6549923866987228, "num_tokens": 1146431560.0, "step": 6827 }, { "entropy": 1.7541552980740864, "epoch": 0.7501029908544121, "grad_norm": 0.6722849607467651, "learning_rate": 1.4943549363028544e-05, "loss": 1.4151, "mean_token_accuracy": 0.6468459516763687, "num_tokens": 1146608356.0, "step": 6828 }, { "entropy": 1.7377861142158508, "epoch": 0.7502128477657851, "grad_norm": 0.6956329941749573, "learning_rate": 1.4942110060220718e-05, "loss": 1.492, "mean_token_accuracy": 0.645117849111557, "num_tokens": 1146763510.0, "step": 6829 }, { "entropy": 1.7091150482495625, "epoch": 0.750322704677158, "grad_norm": 0.5567704439163208, "learning_rate": 1.4940670632646886e-05, "loss": 1.512, "mean_token_accuracy": 0.6451121767361959, "num_tokens": 1146972167.0, "step": 6830 }, { "entropy": 1.7323314944903057, "epoch": 0.7504325615885309, "grad_norm": 0.6515035629272461, "learning_rate": 1.49392310803526e-05, "loss": 1.4759, "mean_token_accuracy": 0.6642122765382131, "num_tokens": 1147157436.0, "step": 6831 }, { "entropy": 1.6438543101151784, "epoch": 0.7505424184999039, "grad_norm": 0.826503574848175, "learning_rate": 1.4937791403383429e-05, "loss": 1.2688, "mean_token_accuracy": 0.6719188491503397, "num_tokens": 1147301711.0, "step": 6832 }, { "entropy": 1.7450671792030334, "epoch": 0.7506522754112768, "grad_norm": 0.7970007658004761, "learning_rate": 1.4936351601784936e-05, "loss": 1.4347, "mean_token_accuracy": 0.6608226199944814, "num_tokens": 1147448757.0, "step": 6833 }, { "entropy": 1.7097887794176738, "epoch": 0.7507621323226498, "grad_norm": 0.6782563328742981, "learning_rate": 1.4934911675602684e-05, "loss": 1.4367, "mean_token_accuracy": 0.6495286400119463, "num_tokens": 1147583520.0, "step": 6834 }, { "entropy": 1.6653445859750111, "epoch": 0.7508719892340227, "grad_norm": 0.6241797804832458, "learning_rate": 1.4933471624882252e-05, "loss": 1.4375, "mean_token_accuracy": 0.6648624936739603, "num_tokens": 1147772528.0, "step": 6835 }, { "entropy": 1.6597016155719757, "epoch": 0.7509818461453956, "grad_norm": 0.6311061382293701, "learning_rate": 1.4932031449669216e-05, "loss": 1.3328, "mean_token_accuracy": 0.6625900516907374, "num_tokens": 1147914543.0, "step": 6836 }, { "entropy": 1.7220198810100555, "epoch": 0.7510917030567685, "grad_norm": 0.6737807393074036, "learning_rate": 1.4930591150009153e-05, "loss": 1.3936, "mean_token_accuracy": 0.6545198758443197, "num_tokens": 1148060878.0, "step": 6837 }, { "entropy": 1.7228451172510784, "epoch": 0.7512015599681415, "grad_norm": 0.5968870520591736, "learning_rate": 1.4929150725947657e-05, "loss": 1.4637, "mean_token_accuracy": 0.6343479951222738, "num_tokens": 1148284690.0, "step": 6838 }, { "entropy": 1.7416771451632183, "epoch": 0.7513114168795144, "grad_norm": 0.6965200901031494, "learning_rate": 1.4927710177530308e-05, "loss": 1.553, "mean_token_accuracy": 0.6420968150099119, "num_tokens": 1148448260.0, "step": 6839 }, { "entropy": 1.6946365038553874, "epoch": 0.7514212737908874, "grad_norm": 0.6708457469940186, "learning_rate": 1.4926269504802702e-05, "loss": 1.3724, "mean_token_accuracy": 0.6507139702637991, "num_tokens": 1148660677.0, "step": 6840 }, { "entropy": 1.7274916470050812, "epoch": 0.7515311307022603, "grad_norm": 0.8043560981750488, "learning_rate": 1.4924828707810434e-05, "loss": 1.4065, "mean_token_accuracy": 0.6550286362568537, "num_tokens": 1148845422.0, "step": 6841 }, { "entropy": 1.6953127483526866, "epoch": 0.7516409876136333, "grad_norm": 0.615106463432312, "learning_rate": 1.4923387786599111e-05, "loss": 1.3215, "mean_token_accuracy": 0.6688494135936102, "num_tokens": 1148998827.0, "step": 6842 }, { "entropy": 1.6872650881608326, "epoch": 0.7517508445250062, "grad_norm": 0.6760332584381104, "learning_rate": 1.4921946741214328e-05, "loss": 1.4611, "mean_token_accuracy": 0.6474284629027048, "num_tokens": 1149163526.0, "step": 6843 }, { "entropy": 1.7162937223911285, "epoch": 0.7518607014363791, "grad_norm": 0.6299443244934082, "learning_rate": 1.49205055717017e-05, "loss": 1.4696, "mean_token_accuracy": 0.6503280848264694, "num_tokens": 1149379885.0, "step": 6844 }, { "entropy": 1.707916518052419, "epoch": 0.7519705583477521, "grad_norm": 0.6752136945724487, "learning_rate": 1.4919064278106837e-05, "loss": 1.3816, "mean_token_accuracy": 0.66462242603302, "num_tokens": 1149545097.0, "step": 6845 }, { "entropy": 1.708972801764806, "epoch": 0.752080415259125, "grad_norm": 0.746783435344696, "learning_rate": 1.4917622860475355e-05, "loss": 1.292, "mean_token_accuracy": 0.6678305218617121, "num_tokens": 1149699714.0, "step": 6846 }, { "entropy": 1.7258944114049275, "epoch": 0.7521902721704979, "grad_norm": 0.7096854448318481, "learning_rate": 1.4916181318852872e-05, "loss": 1.5354, "mean_token_accuracy": 0.6471205502748489, "num_tokens": 1149911864.0, "step": 6847 }, { "entropy": 1.7283643583456676, "epoch": 0.7523001290818708, "grad_norm": 0.8113722801208496, "learning_rate": 1.491473965328502e-05, "loss": 1.5158, "mean_token_accuracy": 0.6531914075215658, "num_tokens": 1150075121.0, "step": 6848 }, { "entropy": 1.6454201638698578, "epoch": 0.7524099859932438, "grad_norm": 0.6000940203666687, "learning_rate": 1.4913297863817417e-05, "loss": 1.3858, "mean_token_accuracy": 0.6618871788183848, "num_tokens": 1150257842.0, "step": 6849 }, { "entropy": 1.7098387082417805, "epoch": 0.7525198429046167, "grad_norm": 0.6875969767570496, "learning_rate": 1.4911855950495707e-05, "loss": 1.5528, "mean_token_accuracy": 0.6489702612161636, "num_tokens": 1150479789.0, "step": 6850 }, { "entropy": 1.7067073086897533, "epoch": 0.7526296998159897, "grad_norm": 0.6003955602645874, "learning_rate": 1.4910413913365511e-05, "loss": 1.4514, "mean_token_accuracy": 0.6396404554446539, "num_tokens": 1150666377.0, "step": 6851 }, { "entropy": 1.7154656648635864, "epoch": 0.7527395567273626, "grad_norm": 0.7262822389602661, "learning_rate": 1.490897175247248e-05, "loss": 1.3599, "mean_token_accuracy": 0.6746832331021627, "num_tokens": 1150801744.0, "step": 6852 }, { "entropy": 1.7396377523740132, "epoch": 0.7528494136387356, "grad_norm": 0.6769723892211914, "learning_rate": 1.4907529467862254e-05, "loss": 1.6661, "mean_token_accuracy": 0.6230086013674736, "num_tokens": 1151017918.0, "step": 6853 }, { "entropy": 1.6834344764550526, "epoch": 0.7529592705501085, "grad_norm": 0.580190122127533, "learning_rate": 1.4906087059580483e-05, "loss": 1.3398, "mean_token_accuracy": 0.6577950765689214, "num_tokens": 1151202898.0, "step": 6854 }, { "entropy": 1.7219790021578472, "epoch": 0.7530691274614815, "grad_norm": 0.7568797469139099, "learning_rate": 1.4904644527672813e-05, "loss": 1.3304, "mean_token_accuracy": 0.657778725028038, "num_tokens": 1151353607.0, "step": 6855 }, { "entropy": 1.7640255590279896, "epoch": 0.7531789843728544, "grad_norm": 0.7201240658760071, "learning_rate": 1.4903201872184909e-05, "loss": 1.5171, "mean_token_accuracy": 0.6397636433442434, "num_tokens": 1151519388.0, "step": 6856 }, { "entropy": 1.6551097631454468, "epoch": 0.7532888412842272, "grad_norm": 0.6681106686592102, "learning_rate": 1.4901759093162423e-05, "loss": 1.2624, "mean_token_accuracy": 0.6762852072715759, "num_tokens": 1151672154.0, "step": 6857 }, { "entropy": 1.6735802292823792, "epoch": 0.7533986981956002, "grad_norm": 0.635806679725647, "learning_rate": 1.4900316190651013e-05, "loss": 1.422, "mean_token_accuracy": 0.6669272085030874, "num_tokens": 1151816351.0, "step": 6858 }, { "entropy": 1.7274777193864186, "epoch": 0.7535085551069731, "grad_norm": 0.7612840533256531, "learning_rate": 1.4898873164696361e-05, "loss": 1.2724, "mean_token_accuracy": 0.668310264746348, "num_tokens": 1151944344.0, "step": 6859 }, { "entropy": 1.7404861251513164, "epoch": 0.7536184120183461, "grad_norm": 0.7002642750740051, "learning_rate": 1.4897430015344128e-05, "loss": 1.376, "mean_token_accuracy": 0.6616505285104116, "num_tokens": 1152096710.0, "step": 6860 }, { "entropy": 1.7473096946875255, "epoch": 0.753728268929719, "grad_norm": 0.9548206925392151, "learning_rate": 1.489598674263999e-05, "loss": 1.5039, "mean_token_accuracy": 0.6459807008504868, "num_tokens": 1152222865.0, "step": 6861 }, { "entropy": 1.7407631874084473, "epoch": 0.753838125841092, "grad_norm": 0.7236599326133728, "learning_rate": 1.4894543346629628e-05, "loss": 1.4074, "mean_token_accuracy": 0.6461221228043238, "num_tokens": 1152370024.0, "step": 6862 }, { "entropy": 1.706801136334737, "epoch": 0.7539479827524649, "grad_norm": 0.6131123304367065, "learning_rate": 1.4893099827358725e-05, "loss": 1.4282, "mean_token_accuracy": 0.640210434794426, "num_tokens": 1152551295.0, "step": 6863 }, { "entropy": 1.7038409014542897, "epoch": 0.7540578396638379, "grad_norm": 0.849195122718811, "learning_rate": 1.4891656184872967e-05, "loss": 1.4797, "mean_token_accuracy": 0.6482670257488886, "num_tokens": 1152715725.0, "step": 6864 }, { "entropy": 1.671141008536021, "epoch": 0.7541676965752108, "grad_norm": 0.7273076772689819, "learning_rate": 1.4890212419218042e-05, "loss": 1.3456, "mean_token_accuracy": 0.6643229325612386, "num_tokens": 1152856152.0, "step": 6865 }, { "entropy": 1.7077520688374836, "epoch": 0.7542775534865838, "grad_norm": 0.6826111078262329, "learning_rate": 1.4888768530439648e-05, "loss": 1.3934, "mean_token_accuracy": 0.6760113835334778, "num_tokens": 1153011787.0, "step": 6866 }, { "entropy": 1.7440832058588664, "epoch": 0.7543874103979566, "grad_norm": 0.766875684261322, "learning_rate": 1.4887324518583482e-05, "loss": 1.5279, "mean_token_accuracy": 0.6481309731801351, "num_tokens": 1153194581.0, "step": 6867 }, { "entropy": 1.696602463722229, "epoch": 0.7544972673093296, "grad_norm": 0.6051673293113708, "learning_rate": 1.4885880383695245e-05, "loss": 1.3711, "mean_token_accuracy": 0.6530511478583018, "num_tokens": 1153426371.0, "step": 6868 }, { "entropy": 1.6467144290606182, "epoch": 0.7546071242207025, "grad_norm": 0.7292453646659851, "learning_rate": 1.4884436125820647e-05, "loss": 1.4731, "mean_token_accuracy": 0.6530012140671412, "num_tokens": 1153573291.0, "step": 6869 }, { "entropy": 1.7314301331837971, "epoch": 0.7547169811320755, "grad_norm": 0.6091862916946411, "learning_rate": 1.4882991745005398e-05, "loss": 1.4244, "mean_token_accuracy": 0.6590274671713511, "num_tokens": 1153732528.0, "step": 6870 }, { "entropy": 1.7819582720597584, "epoch": 0.7548268380434484, "grad_norm": 0.7857415676116943, "learning_rate": 1.4881547241295207e-05, "loss": 1.491, "mean_token_accuracy": 0.650216872493426, "num_tokens": 1153921075.0, "step": 6871 }, { "entropy": 1.723918507496516, "epoch": 0.7549366949548213, "grad_norm": 0.8788068294525146, "learning_rate": 1.4880102614735793e-05, "loss": 1.3552, "mean_token_accuracy": 0.6704768786827723, "num_tokens": 1154081656.0, "step": 6872 }, { "entropy": 1.762155642112096, "epoch": 0.7550465518661943, "grad_norm": 0.6839030981063843, "learning_rate": 1.4878657865372885e-05, "loss": 1.4846, "mean_token_accuracy": 0.6490776985883713, "num_tokens": 1154251448.0, "step": 6873 }, { "entropy": 1.69782950480779, "epoch": 0.7551564087775672, "grad_norm": 0.710850715637207, "learning_rate": 1.48772129932522e-05, "loss": 1.3765, "mean_token_accuracy": 0.6583685626586279, "num_tokens": 1154429721.0, "step": 6874 }, { "entropy": 1.6837405562400818, "epoch": 0.7552662656889402, "grad_norm": 0.6713438630104065, "learning_rate": 1.487576799841947e-05, "loss": 1.3999, "mean_token_accuracy": 0.6706645538409551, "num_tokens": 1154576709.0, "step": 6875 }, { "entropy": 1.697175920009613, "epoch": 0.7553761226003131, "grad_norm": 0.6505449414253235, "learning_rate": 1.4874322880920433e-05, "loss": 1.4259, "mean_token_accuracy": 0.6693024138609568, "num_tokens": 1154713459.0, "step": 6876 }, { "entropy": 1.6716215113798778, "epoch": 0.7554859795116861, "grad_norm": 0.6013683080673218, "learning_rate": 1.4872877640800818e-05, "loss": 1.4185, "mean_token_accuracy": 0.6550382524728775, "num_tokens": 1154916105.0, "step": 6877 }, { "entropy": 1.727581520875295, "epoch": 0.7555958364230589, "grad_norm": 0.634597897529602, "learning_rate": 1.4871432278106376e-05, "loss": 1.4924, "mean_token_accuracy": 0.6589693377415339, "num_tokens": 1155124626.0, "step": 6878 }, { "entropy": 1.6853844324747722, "epoch": 0.7557056933344319, "grad_norm": 0.7113041281700134, "learning_rate": 1.4869986792882842e-05, "loss": 1.393, "mean_token_accuracy": 0.6586426496505737, "num_tokens": 1155315768.0, "step": 6879 }, { "entropy": 1.720129370689392, "epoch": 0.7558155502458048, "grad_norm": 0.758216381072998, "learning_rate": 1.4868541185175973e-05, "loss": 1.2764, "mean_token_accuracy": 0.6894825349251429, "num_tokens": 1155458375.0, "step": 6880 }, { "entropy": 1.7519591550032299, "epoch": 0.7559254071571778, "grad_norm": 0.785953164100647, "learning_rate": 1.4867095455031515e-05, "loss": 1.4353, "mean_token_accuracy": 0.6612924883762995, "num_tokens": 1155625543.0, "step": 6881 }, { "entropy": 1.7473149696985881, "epoch": 0.7560352640685507, "grad_norm": 0.7062848210334778, "learning_rate": 1.4865649602495233e-05, "loss": 1.4931, "mean_token_accuracy": 0.6371675978104273, "num_tokens": 1155805083.0, "step": 6882 }, { "entropy": 1.6948178907235463, "epoch": 0.7561451209799237, "grad_norm": 0.6578991413116455, "learning_rate": 1.4864203627612878e-05, "loss": 1.2472, "mean_token_accuracy": 0.6706608285506567, "num_tokens": 1155948315.0, "step": 6883 }, { "entropy": 1.7165430684884389, "epoch": 0.7562549778912966, "grad_norm": 0.6664071083068848, "learning_rate": 1.4862757530430228e-05, "loss": 1.3434, "mean_token_accuracy": 0.6707089493672053, "num_tokens": 1156127831.0, "step": 6884 }, { "entropy": 1.643526017665863, "epoch": 0.7563648348026695, "grad_norm": 0.7331792116165161, "learning_rate": 1.4861311310993037e-05, "loss": 1.3877, "mean_token_accuracy": 0.6548557827870051, "num_tokens": 1156308750.0, "step": 6885 }, { "entropy": 1.709174503882726, "epoch": 0.7564746917140425, "grad_norm": 0.6824013590812683, "learning_rate": 1.485986496934708e-05, "loss": 1.3598, "mean_token_accuracy": 0.6605821500221888, "num_tokens": 1156454330.0, "step": 6886 }, { "entropy": 1.6876440346240997, "epoch": 0.7565845486254154, "grad_norm": 0.7022239565849304, "learning_rate": 1.485841850553814e-05, "loss": 1.4559, "mean_token_accuracy": 0.659210721651713, "num_tokens": 1156623224.0, "step": 6887 }, { "entropy": 1.670984039704005, "epoch": 0.7566944055367884, "grad_norm": 0.8806616067886353, "learning_rate": 1.4856971919611993e-05, "loss": 1.5488, "mean_token_accuracy": 0.6296228965123495, "num_tokens": 1156899766.0, "step": 6888 }, { "entropy": 1.6433234910170238, "epoch": 0.7568042624481612, "grad_norm": 0.6585554480552673, "learning_rate": 1.485552521161442e-05, "loss": 1.2847, "mean_token_accuracy": 0.663401777545611, "num_tokens": 1157043630.0, "step": 6889 }, { "entropy": 1.7140738268693287, "epoch": 0.7569141193595342, "grad_norm": 0.6763997077941895, "learning_rate": 1.4854078381591215e-05, "loss": 1.3165, "mean_token_accuracy": 0.6577816307544708, "num_tokens": 1157198891.0, "step": 6890 }, { "entropy": 1.7322425842285156, "epoch": 0.7570239762709071, "grad_norm": 0.6586790680885315, "learning_rate": 1.4852631429588164e-05, "loss": 1.4056, "mean_token_accuracy": 0.6535748243331909, "num_tokens": 1157367746.0, "step": 6891 }, { "entropy": 1.7426089147726695, "epoch": 0.7571338331822801, "grad_norm": 0.6329144835472107, "learning_rate": 1.4851184355651063e-05, "loss": 1.347, "mean_token_accuracy": 0.6626821060975393, "num_tokens": 1157518999.0, "step": 6892 }, { "entropy": 1.727944056193034, "epoch": 0.757243690093653, "grad_norm": 0.6308918595314026, "learning_rate": 1.4849737159825714e-05, "loss": 1.3709, "mean_token_accuracy": 0.6626657843589783, "num_tokens": 1157679031.0, "step": 6893 }, { "entropy": 1.7070819934209187, "epoch": 0.757353547005026, "grad_norm": 0.6136558055877686, "learning_rate": 1.4848289842157922e-05, "loss": 1.3357, "mean_token_accuracy": 0.6673356592655182, "num_tokens": 1157823732.0, "step": 6894 }, { "entropy": 1.7385432024796803, "epoch": 0.7574634039163989, "grad_norm": 0.6450533270835876, "learning_rate": 1.4846842402693485e-05, "loss": 1.3576, "mean_token_accuracy": 0.6694497863451639, "num_tokens": 1157968223.0, "step": 6895 }, { "entropy": 1.7011185189088185, "epoch": 0.7575732608277719, "grad_norm": 0.7629391551017761, "learning_rate": 1.4845394841478223e-05, "loss": 1.417, "mean_token_accuracy": 0.6586320847272873, "num_tokens": 1158093485.0, "step": 6896 }, { "entropy": 1.7235964337984722, "epoch": 0.7576831177391448, "grad_norm": 0.6074236631393433, "learning_rate": 1.4843947158557943e-05, "loss": 1.5449, "mean_token_accuracy": 0.6520265738169352, "num_tokens": 1158302727.0, "step": 6897 }, { "entropy": 1.6836207310358684, "epoch": 0.7577929746505176, "grad_norm": 0.6065682172775269, "learning_rate": 1.484249935397847e-05, "loss": 1.3707, "mean_token_accuracy": 0.6710223456223806, "num_tokens": 1158455241.0, "step": 6898 }, { "entropy": 1.7246917287508647, "epoch": 0.7579028315618906, "grad_norm": 0.7016457915306091, "learning_rate": 1.4841051427785625e-05, "loss": 1.4724, "mean_token_accuracy": 0.6549848715464274, "num_tokens": 1158625284.0, "step": 6899 }, { "entropy": 1.6922398805618286, "epoch": 0.7580126884732635, "grad_norm": 0.6993584036827087, "learning_rate": 1.4839603380025236e-05, "loss": 1.37, "mean_token_accuracy": 0.662392814954122, "num_tokens": 1158788784.0, "step": 6900 }, { "entropy": 1.6558389564355214, "epoch": 0.7581225453846365, "grad_norm": 0.7079626321792603, "learning_rate": 1.4838155210743124e-05, "loss": 1.2161, "mean_token_accuracy": 0.68675068517526, "num_tokens": 1158934601.0, "step": 6901 }, { "entropy": 1.7894011040528615, "epoch": 0.7582324022960094, "grad_norm": 0.7190823554992676, "learning_rate": 1.4836706919985131e-05, "loss": 1.517, "mean_token_accuracy": 0.6554691096146902, "num_tokens": 1159073338.0, "step": 6902 }, { "entropy": 1.7235852877298992, "epoch": 0.7583422592073824, "grad_norm": 0.6851528882980347, "learning_rate": 1.4835258507797094e-05, "loss": 1.3269, "mean_token_accuracy": 0.6659359286228815, "num_tokens": 1159221997.0, "step": 6903 }, { "entropy": 1.6805242598056793, "epoch": 0.7584521161187553, "grad_norm": 0.5390611886978149, "learning_rate": 1.4833809974224853e-05, "loss": 1.3438, "mean_token_accuracy": 0.6687274475892385, "num_tokens": 1159431862.0, "step": 6904 }, { "entropy": 1.7057210902372997, "epoch": 0.7585619730301283, "grad_norm": 0.6051385402679443, "learning_rate": 1.4832361319314252e-05, "loss": 1.4902, "mean_token_accuracy": 0.6540891279776891, "num_tokens": 1159648613.0, "step": 6905 }, { "entropy": 1.6478383739789326, "epoch": 0.7586718299415012, "grad_norm": 0.6398272514343262, "learning_rate": 1.4830912543111146e-05, "loss": 1.4191, "mean_token_accuracy": 0.6588562329610189, "num_tokens": 1159819964.0, "step": 6906 }, { "entropy": 1.7082193195819855, "epoch": 0.7587816868528742, "grad_norm": 0.7104562520980835, "learning_rate": 1.4829463645661382e-05, "loss": 1.3546, "mean_token_accuracy": 0.6578278988599777, "num_tokens": 1159996948.0, "step": 6907 }, { "entropy": 1.7297697563966115, "epoch": 0.7588915437642471, "grad_norm": 0.6430516242980957, "learning_rate": 1.4828014627010819e-05, "loss": 1.3365, "mean_token_accuracy": 0.6678340236345927, "num_tokens": 1160156500.0, "step": 6908 }, { "entropy": 1.639371891816457, "epoch": 0.75900140067562, "grad_norm": 0.6583812832832336, "learning_rate": 1.4826565487205319e-05, "loss": 1.3132, "mean_token_accuracy": 0.6856881082057953, "num_tokens": 1160300199.0, "step": 6909 }, { "entropy": 1.7579985360304515, "epoch": 0.7591112575869929, "grad_norm": 0.8110305666923523, "learning_rate": 1.4825116226290746e-05, "loss": 1.6068, "mean_token_accuracy": 0.6306049029032389, "num_tokens": 1160545710.0, "step": 6910 }, { "entropy": 1.6936591267585754, "epoch": 0.7592211144983658, "grad_norm": 0.6707553863525391, "learning_rate": 1.4823666844312962e-05, "loss": 1.29, "mean_token_accuracy": 0.6731418470541636, "num_tokens": 1160680981.0, "step": 6911 }, { "entropy": 1.705474744240443, "epoch": 0.7593309714097388, "grad_norm": 0.764737069606781, "learning_rate": 1.4822217341317852e-05, "loss": 1.4305, "mean_token_accuracy": 0.6555500676234564, "num_tokens": 1160819936.0, "step": 6912 }, { "entropy": 1.6776454746723175, "epoch": 0.7594408283211117, "grad_norm": 0.6505389213562012, "learning_rate": 1.4820767717351285e-05, "loss": 1.3035, "mean_token_accuracy": 0.6752181301514307, "num_tokens": 1161006775.0, "step": 6913 }, { "entropy": 1.6627070903778076, "epoch": 0.7595506852324847, "grad_norm": 0.6468442678451538, "learning_rate": 1.481931797245914e-05, "loss": 1.4297, "mean_token_accuracy": 0.6586330334345499, "num_tokens": 1161241820.0, "step": 6914 }, { "entropy": 1.6682479977607727, "epoch": 0.7596605421438576, "grad_norm": 0.6730937361717224, "learning_rate": 1.4817868106687303e-05, "loss": 1.4197, "mean_token_accuracy": 0.6521937002738317, "num_tokens": 1161416726.0, "step": 6915 }, { "entropy": 1.6636716326077778, "epoch": 0.7597703990552306, "grad_norm": 0.6824373006820679, "learning_rate": 1.4816418120081662e-05, "loss": 1.3944, "mean_token_accuracy": 0.6513000329335531, "num_tokens": 1161583847.0, "step": 6916 }, { "entropy": 1.7242677907148998, "epoch": 0.7598802559666035, "grad_norm": 0.686144232749939, "learning_rate": 1.4814968012688102e-05, "loss": 1.4005, "mean_token_accuracy": 0.656681497891744, "num_tokens": 1161736676.0, "step": 6917 }, { "entropy": 1.6589552164077759, "epoch": 0.7599901128779765, "grad_norm": 0.6163228154182434, "learning_rate": 1.4813517784552529e-05, "loss": 1.4136, "mean_token_accuracy": 0.6552829394737879, "num_tokens": 1161954113.0, "step": 6918 }, { "entropy": 1.678790142138799, "epoch": 0.7600999697893494, "grad_norm": 0.6521669030189514, "learning_rate": 1.4812067435720834e-05, "loss": 1.1138, "mean_token_accuracy": 0.6720156023899714, "num_tokens": 1162176443.0, "step": 6919 }, { "entropy": 1.6968080500761669, "epoch": 0.7602098267007223, "grad_norm": 0.6458204388618469, "learning_rate": 1.4810616966238922e-05, "loss": 1.4491, "mean_token_accuracy": 0.6454138110081354, "num_tokens": 1162348356.0, "step": 6920 }, { "entropy": 1.6829048295815785, "epoch": 0.7603196836120952, "grad_norm": 0.7455350756645203, "learning_rate": 1.4809166376152701e-05, "loss": 1.3664, "mean_token_accuracy": 0.6672768096129099, "num_tokens": 1162488148.0, "step": 6921 }, { "entropy": 1.6659102042516072, "epoch": 0.7604295405234682, "grad_norm": 0.7293592095375061, "learning_rate": 1.4807715665508083e-05, "loss": 1.3741, "mean_token_accuracy": 0.6664891839027405, "num_tokens": 1162650930.0, "step": 6922 }, { "entropy": 1.6555348932743073, "epoch": 0.7605393974348411, "grad_norm": 0.727997899055481, "learning_rate": 1.4806264834350976e-05, "loss": 1.3346, "mean_token_accuracy": 0.6639738827943802, "num_tokens": 1162797968.0, "step": 6923 }, { "entropy": 1.7288571496804555, "epoch": 0.7606492543462141, "grad_norm": 0.7030077576637268, "learning_rate": 1.4804813882727305e-05, "loss": 1.3352, "mean_token_accuracy": 0.6633950720230738, "num_tokens": 1163010773.0, "step": 6924 }, { "entropy": 1.6670528650283813, "epoch": 0.760759111257587, "grad_norm": 0.8227211236953735, "learning_rate": 1.4803362810682988e-05, "loss": 1.2828, "mean_token_accuracy": 0.678699125846227, "num_tokens": 1163144830.0, "step": 6925 }, { "entropy": 1.7120730479558308, "epoch": 0.7608689681689599, "grad_norm": 0.6343841552734375, "learning_rate": 1.480191161826395e-05, "loss": 1.4498, "mean_token_accuracy": 0.638987218340238, "num_tokens": 1163435423.0, "step": 6926 }, { "entropy": 1.7124398946762085, "epoch": 0.7609788250803329, "grad_norm": 0.5454217791557312, "learning_rate": 1.4800460305516125e-05, "loss": 1.523, "mean_token_accuracy": 0.6400202016035715, "num_tokens": 1163644758.0, "step": 6927 }, { "entropy": 1.7238081296284993, "epoch": 0.7610886819917058, "grad_norm": 0.9346860647201538, "learning_rate": 1.4799008872485442e-05, "loss": 1.4065, "mean_token_accuracy": 0.6679123987754186, "num_tokens": 1163827583.0, "step": 6928 }, { "entropy": 1.7018209397792816, "epoch": 0.7611985389030788, "grad_norm": 0.7219953536987305, "learning_rate": 1.4797557319217844e-05, "loss": 1.3688, "mean_token_accuracy": 0.6602154572804769, "num_tokens": 1163970324.0, "step": 6929 }, { "entropy": 1.668403019507726, "epoch": 0.7613083958144516, "grad_norm": 0.7923089861869812, "learning_rate": 1.4796105645759265e-05, "loss": 1.3472, "mean_token_accuracy": 0.6825543294350306, "num_tokens": 1164133261.0, "step": 6930 }, { "entropy": 1.717042436202367, "epoch": 0.7614182527258246, "grad_norm": 0.6521219611167908, "learning_rate": 1.4794653852155652e-05, "loss": 1.3194, "mean_token_accuracy": 0.679710810383161, "num_tokens": 1164291576.0, "step": 6931 }, { "entropy": 1.6769898136456807, "epoch": 0.7615281096371975, "grad_norm": 0.720014214515686, "learning_rate": 1.4793201938452954e-05, "loss": 1.2698, "mean_token_accuracy": 0.6756969839334488, "num_tokens": 1164403028.0, "step": 6932 }, { "entropy": 1.6898697714010875, "epoch": 0.7616379665485705, "grad_norm": 0.7772789001464844, "learning_rate": 1.4791749904697126e-05, "loss": 1.4018, "mean_token_accuracy": 0.6722168525060018, "num_tokens": 1164542964.0, "step": 6933 }, { "entropy": 1.7483848134676616, "epoch": 0.7617478234599434, "grad_norm": 0.7039276957511902, "learning_rate": 1.4790297750934122e-05, "loss": 1.5323, "mean_token_accuracy": 0.6407303462425867, "num_tokens": 1164715324.0, "step": 6934 }, { "entropy": 1.7075275778770447, "epoch": 0.7618576803713164, "grad_norm": 0.8316227197647095, "learning_rate": 1.4788845477209902e-05, "loss": 1.2911, "mean_token_accuracy": 0.6719946066538492, "num_tokens": 1164865136.0, "step": 6935 }, { "entropy": 1.7587328751881917, "epoch": 0.7619675372826893, "grad_norm": 0.7186470031738281, "learning_rate": 1.478739308357043e-05, "loss": 1.5645, "mean_token_accuracy": 0.6294473161300024, "num_tokens": 1165022876.0, "step": 6936 }, { "entropy": 1.6595724324385326, "epoch": 0.7620773941940623, "grad_norm": 0.7300217151641846, "learning_rate": 1.4785940570061674e-05, "loss": 1.2741, "mean_token_accuracy": 0.6697218616803488, "num_tokens": 1165153628.0, "step": 6937 }, { "entropy": 1.7758424580097198, "epoch": 0.7621872511054352, "grad_norm": 0.7241067886352539, "learning_rate": 1.4784487936729603e-05, "loss": 1.4515, "mean_token_accuracy": 0.6553726196289062, "num_tokens": 1165289807.0, "step": 6938 }, { "entropy": 1.6955066323280334, "epoch": 0.7622971080168081, "grad_norm": 0.7136008143424988, "learning_rate": 1.4783035183620195e-05, "loss": 1.3052, "mean_token_accuracy": 0.6689305007457733, "num_tokens": 1165443874.0, "step": 6939 }, { "entropy": 1.6818625926971436, "epoch": 0.762406964928181, "grad_norm": 0.7151510119438171, "learning_rate": 1.478158231077943e-05, "loss": 1.3418, "mean_token_accuracy": 0.6611978759368261, "num_tokens": 1165609704.0, "step": 6940 }, { "entropy": 1.6454001367092133, "epoch": 0.7625168218395539, "grad_norm": 0.7376065850257874, "learning_rate": 1.4780129318253287e-05, "loss": 1.262, "mean_token_accuracy": 0.6749721119801203, "num_tokens": 1165748280.0, "step": 6941 }, { "entropy": 1.7588698168595631, "epoch": 0.7626266787509269, "grad_norm": 0.8318473100662231, "learning_rate": 1.4778676206087757e-05, "loss": 1.3082, "mean_token_accuracy": 0.6613359103600184, "num_tokens": 1165874711.0, "step": 6942 }, { "entropy": 1.6534738838672638, "epoch": 0.7627365356622998, "grad_norm": 0.7632639408111572, "learning_rate": 1.4777222974328823e-05, "loss": 1.2516, "mean_token_accuracy": 0.6722172896067301, "num_tokens": 1166003519.0, "step": 6943 }, { "entropy": 1.716422309478124, "epoch": 0.7628463925736728, "grad_norm": 0.6576639413833618, "learning_rate": 1.4775769623022488e-05, "loss": 1.3884, "mean_token_accuracy": 0.6642766098181406, "num_tokens": 1166160623.0, "step": 6944 }, { "entropy": 1.6622655391693115, "epoch": 0.7629562494850457, "grad_norm": 0.619766116142273, "learning_rate": 1.477431615221474e-05, "loss": 1.3108, "mean_token_accuracy": 0.6632679601510366, "num_tokens": 1166325809.0, "step": 6945 }, { "entropy": 1.6929753025372822, "epoch": 0.7630661063964187, "grad_norm": 0.6698241829872131, "learning_rate": 1.4772862561951595e-05, "loss": 1.3187, "mean_token_accuracy": 0.6679337720076243, "num_tokens": 1166463053.0, "step": 6946 }, { "entropy": 1.6616682608922322, "epoch": 0.7631759633077916, "grad_norm": 0.5817018747329712, "learning_rate": 1.4771408852279045e-05, "loss": 1.358, "mean_token_accuracy": 0.6618844419717789, "num_tokens": 1166652937.0, "step": 6947 }, { "entropy": 1.6677273412545521, "epoch": 0.7632858202191646, "grad_norm": 0.7584317326545715, "learning_rate": 1.4769955023243104e-05, "loss": 1.2932, "mean_token_accuracy": 0.6745211482048035, "num_tokens": 1166780191.0, "step": 6948 }, { "entropy": 1.6915989518165588, "epoch": 0.7633956771305375, "grad_norm": 0.6527446508407593, "learning_rate": 1.4768501074889787e-05, "loss": 1.431, "mean_token_accuracy": 0.641153042515119, "num_tokens": 1166943637.0, "step": 6949 }, { "entropy": 1.722637156645457, "epoch": 0.7635055340419105, "grad_norm": 0.712783694267273, "learning_rate": 1.476704700726511e-05, "loss": 1.4764, "mean_token_accuracy": 0.6418820122877756, "num_tokens": 1167072984.0, "step": 6950 }, { "entropy": 1.6762286921342213, "epoch": 0.7636153909532833, "grad_norm": 0.7010881900787354, "learning_rate": 1.4765592820415087e-05, "loss": 1.3241, "mean_token_accuracy": 0.6702330708503723, "num_tokens": 1167222121.0, "step": 6951 }, { "entropy": 1.6860096454620361, "epoch": 0.7637252478646562, "grad_norm": 0.720114529132843, "learning_rate": 1.4764138514385755e-05, "loss": 1.3242, "mean_token_accuracy": 0.6637054880460104, "num_tokens": 1167366067.0, "step": 6952 }, { "entropy": 1.685429056485494, "epoch": 0.7638351047760292, "grad_norm": 0.6480314135551453, "learning_rate": 1.4762684089223133e-05, "loss": 1.4365, "mean_token_accuracy": 0.6541512509187063, "num_tokens": 1167560550.0, "step": 6953 }, { "entropy": 1.7416847745577495, "epoch": 0.7639449616874021, "grad_norm": 0.7204356789588928, "learning_rate": 1.4761229544973253e-05, "loss": 1.3083, "mean_token_accuracy": 0.6680977592865626, "num_tokens": 1167682119.0, "step": 6954 }, { "entropy": 1.7352920869986217, "epoch": 0.7640548185987751, "grad_norm": 0.6415309906005859, "learning_rate": 1.4759774881682154e-05, "loss": 1.4291, "mean_token_accuracy": 0.6526644130547842, "num_tokens": 1167920712.0, "step": 6955 }, { "entropy": 1.7168652017911274, "epoch": 0.764164675510148, "grad_norm": 0.7147775292396545, "learning_rate": 1.4758320099395878e-05, "loss": 1.4244, "mean_token_accuracy": 0.6459483454624811, "num_tokens": 1168098347.0, "step": 6956 }, { "entropy": 1.716566542784373, "epoch": 0.764274532421521, "grad_norm": 0.6769205331802368, "learning_rate": 1.475686519816046e-05, "loss": 1.3462, "mean_token_accuracy": 0.6656129608551661, "num_tokens": 1168227173.0, "step": 6957 }, { "entropy": 1.7634214858214061, "epoch": 0.7643843893328939, "grad_norm": 0.6557965874671936, "learning_rate": 1.475541017802195e-05, "loss": 1.3621, "mean_token_accuracy": 0.6519608447949091, "num_tokens": 1168383497.0, "step": 6958 }, { "entropy": 1.7473195095856984, "epoch": 0.7644942462442669, "grad_norm": 0.7062838673591614, "learning_rate": 1.4753955039026404e-05, "loss": 1.3984, "mean_token_accuracy": 0.6513441602389017, "num_tokens": 1168533793.0, "step": 6959 }, { "entropy": 1.6654168864091237, "epoch": 0.7646041031556398, "grad_norm": 0.6767547726631165, "learning_rate": 1.4752499781219872e-05, "loss": 1.2874, "mean_token_accuracy": 0.6723661124706268, "num_tokens": 1168672243.0, "step": 6960 }, { "entropy": 1.6357990304629009, "epoch": 0.7647139600670128, "grad_norm": 0.7614895105361938, "learning_rate": 1.4751044404648408e-05, "loss": 1.2983, "mean_token_accuracy": 0.6715351541837057, "num_tokens": 1168857075.0, "step": 6961 }, { "entropy": 1.7005026539166768, "epoch": 0.7648238169783856, "grad_norm": 0.666313648223877, "learning_rate": 1.4749588909358083e-05, "loss": 1.3576, "mean_token_accuracy": 0.6531829734643301, "num_tokens": 1169003694.0, "step": 6962 }, { "entropy": 1.6825850903987885, "epoch": 0.7649336738897586, "grad_norm": 0.6583350896835327, "learning_rate": 1.474813329539496e-05, "loss": 1.4258, "mean_token_accuracy": 0.6811217963695526, "num_tokens": 1169167340.0, "step": 6963 }, { "entropy": 1.6577100853125255, "epoch": 0.7650435308011315, "grad_norm": 0.9120500087738037, "learning_rate": 1.4746677562805105e-05, "loss": 1.1814, "mean_token_accuracy": 0.6874327609936396, "num_tokens": 1169279246.0, "step": 6964 }, { "entropy": 1.7242598036924999, "epoch": 0.7651533877125045, "grad_norm": 0.9223476052284241, "learning_rate": 1.4745221711634595e-05, "loss": 1.2861, "mean_token_accuracy": 0.6691893190145493, "num_tokens": 1169427675.0, "step": 6965 }, { "entropy": 1.727004200220108, "epoch": 0.7652632446238774, "grad_norm": 0.7496103644371033, "learning_rate": 1.4743765741929503e-05, "loss": 1.4509, "mean_token_accuracy": 0.646850789586703, "num_tokens": 1169595749.0, "step": 6966 }, { "entropy": 1.644486020008723, "epoch": 0.7653731015352503, "grad_norm": 0.5539238452911377, "learning_rate": 1.4742309653735911e-05, "loss": 1.3391, "mean_token_accuracy": 0.655859500169754, "num_tokens": 1169788192.0, "step": 6967 }, { "entropy": 1.7506780723730724, "epoch": 0.7654829584466233, "grad_norm": 0.7031539678573608, "learning_rate": 1.4740853447099912e-05, "loss": 1.4266, "mean_token_accuracy": 0.6525140305360159, "num_tokens": 1169919266.0, "step": 6968 }, { "entropy": 1.7322252094745636, "epoch": 0.7655928153579962, "grad_norm": 0.712948203086853, "learning_rate": 1.4739397122067583e-05, "loss": 1.4078, "mean_token_accuracy": 0.6520160535971323, "num_tokens": 1170084069.0, "step": 6969 }, { "entropy": 1.6913042962551117, "epoch": 0.7657026722693692, "grad_norm": 0.6860613226890564, "learning_rate": 1.4737940678685016e-05, "loss": 1.3426, "mean_token_accuracy": 0.6700414170821508, "num_tokens": 1170236546.0, "step": 6970 }, { "entropy": 1.7662197053432465, "epoch": 0.765812529180742, "grad_norm": 0.696919858455658, "learning_rate": 1.4736484116998315e-05, "loss": 1.4445, "mean_token_accuracy": 0.6526474754015604, "num_tokens": 1170412715.0, "step": 6971 }, { "entropy": 1.703002353509267, "epoch": 0.765922386092115, "grad_norm": 0.6996424198150635, "learning_rate": 1.4735027437053576e-05, "loss": 1.2528, "mean_token_accuracy": 0.6738860954840978, "num_tokens": 1170516111.0, "step": 6972 }, { "entropy": 1.7598189612229664, "epoch": 0.7660322430034879, "grad_norm": 0.6697201132774353, "learning_rate": 1.47335706388969e-05, "loss": 1.4846, "mean_token_accuracy": 0.6326592018206915, "num_tokens": 1170683915.0, "step": 6973 }, { "entropy": 1.744869331518809, "epoch": 0.7661420999148609, "grad_norm": 0.6344386339187622, "learning_rate": 1.4732113722574395e-05, "loss": 1.3379, "mean_token_accuracy": 0.6556073526541392, "num_tokens": 1170823388.0, "step": 6974 }, { "entropy": 1.6862431168556213, "epoch": 0.7662519568262338, "grad_norm": 0.6660764217376709, "learning_rate": 1.4730656688132173e-05, "loss": 1.4486, "mean_token_accuracy": 0.6581073055664698, "num_tokens": 1170965077.0, "step": 6975 }, { "entropy": 1.7214731673399608, "epoch": 0.7663618137376068, "grad_norm": 0.6826305389404297, "learning_rate": 1.472919953561635e-05, "loss": 1.3649, "mean_token_accuracy": 0.6637348333994547, "num_tokens": 1171116343.0, "step": 6976 }, { "entropy": 1.7036389410495758, "epoch": 0.7664716706489797, "grad_norm": 0.6430275440216064, "learning_rate": 1.472774226507304e-05, "loss": 1.3781, "mean_token_accuracy": 0.6487658222516378, "num_tokens": 1171264476.0, "step": 6977 }, { "entropy": 1.6874217987060547, "epoch": 0.7665815275603527, "grad_norm": 0.6152638792991638, "learning_rate": 1.4726284876548367e-05, "loss": 1.3798, "mean_token_accuracy": 0.6498878498872122, "num_tokens": 1171468339.0, "step": 6978 }, { "entropy": 1.7156427005926769, "epoch": 0.7666913844717256, "grad_norm": 0.6952628493309021, "learning_rate": 1.4724827370088457e-05, "loss": 1.4389, "mean_token_accuracy": 0.6459887872139612, "num_tokens": 1171683447.0, "step": 6979 }, { "entropy": 1.6867701709270477, "epoch": 0.7668012413830985, "grad_norm": 0.6468155980110168, "learning_rate": 1.472336974573944e-05, "loss": 1.4697, "mean_token_accuracy": 0.6447963615258535, "num_tokens": 1171853214.0, "step": 6980 }, { "entropy": 1.710288276274999, "epoch": 0.7669110982944715, "grad_norm": 0.5480025410652161, "learning_rate": 1.4721912003547447e-05, "loss": 1.4186, "mean_token_accuracy": 0.648127923409144, "num_tokens": 1172038628.0, "step": 6981 }, { "entropy": 1.6937748491764069, "epoch": 0.7670209552058443, "grad_norm": 0.6765931844711304, "learning_rate": 1.4720454143558618e-05, "loss": 1.4487, "mean_token_accuracy": 0.6704505582650503, "num_tokens": 1172249770.0, "step": 6982 }, { "entropy": 1.7202177445093791, "epoch": 0.7671308121172173, "grad_norm": 0.7446685433387756, "learning_rate": 1.4718996165819093e-05, "loss": 1.3326, "mean_token_accuracy": 0.6584400335947672, "num_tokens": 1172391864.0, "step": 6983 }, { "entropy": 1.6548251410325368, "epoch": 0.7672406690285902, "grad_norm": 0.7036621570587158, "learning_rate": 1.471753807037501e-05, "loss": 1.2375, "mean_token_accuracy": 0.680665984749794, "num_tokens": 1172535215.0, "step": 6984 }, { "entropy": 1.6808474858601887, "epoch": 0.7673505259399632, "grad_norm": 0.6487118601799011, "learning_rate": 1.4716079857272527e-05, "loss": 1.3899, "mean_token_accuracy": 0.6563947548468908, "num_tokens": 1172692357.0, "step": 6985 }, { "entropy": 1.695367197195689, "epoch": 0.7674603828513361, "grad_norm": 1.2500559091567993, "learning_rate": 1.4714621526557788e-05, "loss": 1.3457, "mean_token_accuracy": 0.6743065714836121, "num_tokens": 1172820590.0, "step": 6986 }, { "entropy": 1.6751854817072551, "epoch": 0.7675702397627091, "grad_norm": 0.5937096476554871, "learning_rate": 1.4713163078276953e-05, "loss": 1.4562, "mean_token_accuracy": 0.6525317927201589, "num_tokens": 1173063549.0, "step": 6987 }, { "entropy": 1.6620129545529683, "epoch": 0.767680096674082, "grad_norm": 0.7316376566886902, "learning_rate": 1.471170451247618e-05, "loss": 1.3667, "mean_token_accuracy": 0.666183148821195, "num_tokens": 1173292913.0, "step": 6988 }, { "entropy": 1.7417064011096954, "epoch": 0.767789953585455, "grad_norm": 0.6786331534385681, "learning_rate": 1.471024582920163e-05, "loss": 1.3281, "mean_token_accuracy": 0.6628505686918894, "num_tokens": 1173481347.0, "step": 6989 }, { "entropy": 1.6276845037937164, "epoch": 0.7678998104968279, "grad_norm": 0.8080840706825256, "learning_rate": 1.4708787028499475e-05, "loss": 1.2766, "mean_token_accuracy": 0.670835038026174, "num_tokens": 1173610214.0, "step": 6990 }, { "entropy": 1.725032518307368, "epoch": 0.7680096674082009, "grad_norm": 0.9934976696968079, "learning_rate": 1.470732811041588e-05, "loss": 1.3939, "mean_token_accuracy": 0.6700426588455836, "num_tokens": 1173748546.0, "step": 6991 }, { "entropy": 1.7025299568970997, "epoch": 0.7681195243195738, "grad_norm": 0.6891453266143799, "learning_rate": 1.4705869074997022e-05, "loss": 1.5514, "mean_token_accuracy": 0.6387580533822378, "num_tokens": 1173940859.0, "step": 6992 }, { "entropy": 1.7296819686889648, "epoch": 0.7682293812309466, "grad_norm": 0.6920235753059387, "learning_rate": 1.4704409922289074e-05, "loss": 1.3289, "mean_token_accuracy": 0.6593838532765707, "num_tokens": 1174139950.0, "step": 6993 }, { "entropy": 1.7431517243385315, "epoch": 0.7683392381423196, "grad_norm": 0.6889626979827881, "learning_rate": 1.4702950652338224e-05, "loss": 1.5143, "mean_token_accuracy": 0.6448341409365336, "num_tokens": 1174281330.0, "step": 6994 }, { "entropy": 1.6172963281472523, "epoch": 0.7684490950536925, "grad_norm": 0.6094475388526917, "learning_rate": 1.4701491265190652e-05, "loss": 1.3748, "mean_token_accuracy": 0.6714093685150146, "num_tokens": 1174444111.0, "step": 6995 }, { "entropy": 1.7066385547320049, "epoch": 0.7685589519650655, "grad_norm": 0.5568619966506958, "learning_rate": 1.4700031760892552e-05, "loss": 1.3955, "mean_token_accuracy": 0.6417905241250992, "num_tokens": 1174677634.0, "step": 6996 }, { "entropy": 1.7028604447841644, "epoch": 0.7686688088764384, "grad_norm": 0.8622896075248718, "learning_rate": 1.4698572139490113e-05, "loss": 1.3625, "mean_token_accuracy": 0.6595305403073629, "num_tokens": 1174829291.0, "step": 6997 }, { "entropy": 1.748704065879186, "epoch": 0.7687786657878114, "grad_norm": 0.7469777464866638, "learning_rate": 1.4697112401029532e-05, "loss": 1.6273, "mean_token_accuracy": 0.6403237904111544, "num_tokens": 1175037549.0, "step": 6998 }, { "entropy": 1.6959306299686432, "epoch": 0.7688885226991843, "grad_norm": 0.7172491550445557, "learning_rate": 1.4695652545557009e-05, "loss": 1.3416, "mean_token_accuracy": 0.6578481743733088, "num_tokens": 1175213614.0, "step": 6999 }, { "entropy": 1.7542682588100433, "epoch": 0.7689983796105573, "grad_norm": 0.665587306022644, "learning_rate": 1.469419257311875e-05, "loss": 1.3778, "mean_token_accuracy": 0.6552835355202357, "num_tokens": 1175337289.0, "step": 7000 }, { "entropy": 1.6769481201966603, "epoch": 0.7691082365219302, "grad_norm": 0.8862442374229431, "learning_rate": 1.4692732483760958e-05, "loss": 1.347, "mean_token_accuracy": 0.6755407452583313, "num_tokens": 1175483112.0, "step": 7001 }, { "entropy": 1.719422310590744, "epoch": 0.7692180934333032, "grad_norm": 0.755631148815155, "learning_rate": 1.4691272277529852e-05, "loss": 1.3332, "mean_token_accuracy": 0.6593481749296188, "num_tokens": 1175630991.0, "step": 7002 }, { "entropy": 1.6748768985271454, "epoch": 0.769327950344676, "grad_norm": 0.6282011270523071, "learning_rate": 1.4689811954471638e-05, "loss": 1.3524, "mean_token_accuracy": 0.6604679971933365, "num_tokens": 1175801098.0, "step": 7003 }, { "entropy": 1.6717474361260731, "epoch": 0.769437807256049, "grad_norm": 0.7004813551902771, "learning_rate": 1.4688351514632539e-05, "loss": 1.255, "mean_token_accuracy": 0.6758040388425192, "num_tokens": 1175949273.0, "step": 7004 }, { "entropy": 1.7221704920132954, "epoch": 0.7695476641674219, "grad_norm": 0.6526414752006531, "learning_rate": 1.4686890958058774e-05, "loss": 1.3306, "mean_token_accuracy": 0.6652916769186655, "num_tokens": 1176111924.0, "step": 7005 }, { "entropy": 1.7045084337393444, "epoch": 0.7696575210787948, "grad_norm": 0.6869589686393738, "learning_rate": 1.4685430284796575e-05, "loss": 1.3229, "mean_token_accuracy": 0.6639479349056879, "num_tokens": 1176252812.0, "step": 7006 }, { "entropy": 1.6703706979751587, "epoch": 0.7697673779901678, "grad_norm": 0.6456193923950195, "learning_rate": 1.4683969494892168e-05, "loss": 1.3546, "mean_token_accuracy": 0.6619254897038142, "num_tokens": 1176420918.0, "step": 7007 }, { "entropy": 1.7053408324718475, "epoch": 0.7698772349015407, "grad_norm": 0.724219560623169, "learning_rate": 1.4682508588391786e-05, "loss": 1.3881, "mean_token_accuracy": 0.6580935915311178, "num_tokens": 1176594950.0, "step": 7008 }, { "entropy": 1.6640128095944722, "epoch": 0.7699870918129137, "grad_norm": 0.7082245349884033, "learning_rate": 1.4681047565341664e-05, "loss": 1.4806, "mean_token_accuracy": 0.6520075996716818, "num_tokens": 1176779499.0, "step": 7009 }, { "entropy": 1.6447947323322296, "epoch": 0.7700969487242866, "grad_norm": 0.7441786527633667, "learning_rate": 1.4679586425788051e-05, "loss": 1.3688, "mean_token_accuracy": 0.657518689831098, "num_tokens": 1176947062.0, "step": 7010 }, { "entropy": 1.7106069127718608, "epoch": 0.7702068056356596, "grad_norm": 0.8191734552383423, "learning_rate": 1.467812516977718e-05, "loss": 1.4011, "mean_token_accuracy": 0.6573437452316284, "num_tokens": 1177110501.0, "step": 7011 }, { "entropy": 1.7322260042031605, "epoch": 0.7703166625470325, "grad_norm": 0.7164145708084106, "learning_rate": 1.4676663797355307e-05, "loss": 1.3584, "mean_token_accuracy": 0.6518987119197845, "num_tokens": 1177271454.0, "step": 7012 }, { "entropy": 1.6704432566960652, "epoch": 0.7704265194584055, "grad_norm": 0.7696778774261475, "learning_rate": 1.4675202308568682e-05, "loss": 1.2934, "mean_token_accuracy": 0.6695187787214915, "num_tokens": 1177410556.0, "step": 7013 }, { "entropy": 1.6978066364924114, "epoch": 0.7705363763697783, "grad_norm": 0.8118786811828613, "learning_rate": 1.4673740703463559e-05, "loss": 1.2362, "mean_token_accuracy": 0.6846217463413874, "num_tokens": 1177543278.0, "step": 7014 }, { "entropy": 1.6863240996996562, "epoch": 0.7706462332811513, "grad_norm": 0.7545218467712402, "learning_rate": 1.46722789820862e-05, "loss": 1.3926, "mean_token_accuracy": 0.6479389071464539, "num_tokens": 1177711545.0, "step": 7015 }, { "entropy": 1.6915642023086548, "epoch": 0.7707560901925242, "grad_norm": 0.6925393342971802, "learning_rate": 1.4670817144482864e-05, "loss": 1.2654, "mean_token_accuracy": 0.6772432029247284, "num_tokens": 1177877618.0, "step": 7016 }, { "entropy": 1.7470574875672658, "epoch": 0.7708659471038972, "grad_norm": 0.67853844165802, "learning_rate": 1.466935519069982e-05, "loss": 1.6348, "mean_token_accuracy": 0.6177881682912508, "num_tokens": 1178092438.0, "step": 7017 }, { "entropy": 1.7246305743853252, "epoch": 0.7709758040152701, "grad_norm": 0.6471522450447083, "learning_rate": 1.4667893120783337e-05, "loss": 1.4376, "mean_token_accuracy": 0.6511177718639374, "num_tokens": 1178263593.0, "step": 7018 }, { "entropy": 1.7667359312375386, "epoch": 0.7710856609266431, "grad_norm": 0.7176552414894104, "learning_rate": 1.4666430934779692e-05, "loss": 1.3189, "mean_token_accuracy": 0.6782967547575632, "num_tokens": 1178415863.0, "step": 7019 }, { "entropy": 1.7315512498219807, "epoch": 0.771195517838016, "grad_norm": 0.6310048699378967, "learning_rate": 1.4664968632735157e-05, "loss": 1.5482, "mean_token_accuracy": 0.6332679738601049, "num_tokens": 1178636690.0, "step": 7020 }, { "entropy": 1.6622104545434315, "epoch": 0.7713053747493889, "grad_norm": 0.5793983340263367, "learning_rate": 1.4663506214696019e-05, "loss": 1.5111, "mean_token_accuracy": 0.6406310300032297, "num_tokens": 1178883992.0, "step": 7021 }, { "entropy": 1.6803521513938904, "epoch": 0.7714152316607619, "grad_norm": 0.5114834308624268, "learning_rate": 1.4662043680708557e-05, "loss": 1.55, "mean_token_accuracy": 0.6302276899417242, "num_tokens": 1179137785.0, "step": 7022 }, { "entropy": 1.7061661183834076, "epoch": 0.7715250885721348, "grad_norm": 0.6063534617424011, "learning_rate": 1.4660581030819063e-05, "loss": 1.3733, "mean_token_accuracy": 0.6526365379492441, "num_tokens": 1179295006.0, "step": 7023 }, { "entropy": 1.7167406380176544, "epoch": 0.7716349454835078, "grad_norm": 0.6147019267082214, "learning_rate": 1.4659118265073832e-05, "loss": 1.394, "mean_token_accuracy": 0.6613097737232844, "num_tokens": 1179510820.0, "step": 7024 }, { "entropy": 1.6936827500661213, "epoch": 0.7717448023948806, "grad_norm": 0.7615833878517151, "learning_rate": 1.4657655383519157e-05, "loss": 1.3649, "mean_token_accuracy": 0.6567181398471197, "num_tokens": 1179665016.0, "step": 7025 }, { "entropy": 1.7460854351520538, "epoch": 0.7718546593062536, "grad_norm": 0.8454451560974121, "learning_rate": 1.4656192386201333e-05, "loss": 1.4548, "mean_token_accuracy": 0.6643540759881338, "num_tokens": 1179813832.0, "step": 7026 }, { "entropy": 1.6930663386980693, "epoch": 0.7719645162176265, "grad_norm": 0.7514932155609131, "learning_rate": 1.465472927316667e-05, "loss": 1.5265, "mean_token_accuracy": 0.6348920861879984, "num_tokens": 1180021268.0, "step": 7027 }, { "entropy": 1.7139446039994557, "epoch": 0.7720743731289995, "grad_norm": 0.6872261762619019, "learning_rate": 1.4653266044461474e-05, "loss": 1.4672, "mean_token_accuracy": 0.648734375834465, "num_tokens": 1180163008.0, "step": 7028 }, { "entropy": 1.7607576847076416, "epoch": 0.7721842300403724, "grad_norm": 0.6853843927383423, "learning_rate": 1.465180270013205e-05, "loss": 1.4754, "mean_token_accuracy": 0.6421893537044525, "num_tokens": 1180316292.0, "step": 7029 }, { "entropy": 1.6858061254024506, "epoch": 0.7722940869517454, "grad_norm": 0.684089720249176, "learning_rate": 1.465033924022472e-05, "loss": 1.4033, "mean_token_accuracy": 0.6613581776618958, "num_tokens": 1180452591.0, "step": 7030 }, { "entropy": 1.692560573418935, "epoch": 0.7724039438631183, "grad_norm": 0.6754666566848755, "learning_rate": 1.4648875664785797e-05, "loss": 1.4735, "mean_token_accuracy": 0.6475921819607416, "num_tokens": 1180654465.0, "step": 7031 }, { "entropy": 1.7072215179602306, "epoch": 0.7725138007744913, "grad_norm": 0.6174911260604858, "learning_rate": 1.4647411973861601e-05, "loss": 1.5394, "mean_token_accuracy": 0.6355844636758169, "num_tokens": 1180883593.0, "step": 7032 }, { "entropy": 1.7164014180501301, "epoch": 0.7726236576858642, "grad_norm": 0.760221004486084, "learning_rate": 1.464594816749846e-05, "loss": 1.3237, "mean_token_accuracy": 0.6593389511108398, "num_tokens": 1181049298.0, "step": 7033 }, { "entropy": 1.7105709115664165, "epoch": 0.772733514597237, "grad_norm": 0.7115726470947266, "learning_rate": 1.4644484245742704e-05, "loss": 1.568, "mean_token_accuracy": 0.642639140288035, "num_tokens": 1181262923.0, "step": 7034 }, { "entropy": 1.7091182271639507, "epoch": 0.77284337150861, "grad_norm": 0.7895721197128296, "learning_rate": 1.4643020208640664e-05, "loss": 1.4203, "mean_token_accuracy": 0.652391200264295, "num_tokens": 1181432024.0, "step": 7035 }, { "entropy": 1.6460919280846913, "epoch": 0.7729532284199829, "grad_norm": 0.5189076662063599, "learning_rate": 1.4641556056238675e-05, "loss": 1.532, "mean_token_accuracy": 0.6269241819779078, "num_tokens": 1181703567.0, "step": 7036 }, { "entropy": 1.738191584746043, "epoch": 0.7730630853313559, "grad_norm": 0.8178884983062744, "learning_rate": 1.4640091788583079e-05, "loss": 1.5218, "mean_token_accuracy": 0.6404098868370056, "num_tokens": 1181891405.0, "step": 7037 }, { "entropy": 1.7580572366714478, "epoch": 0.7731729422427288, "grad_norm": 0.594333291053772, "learning_rate": 1.4638627405720216e-05, "loss": 1.5078, "mean_token_accuracy": 0.6458245019117991, "num_tokens": 1182086324.0, "step": 7038 }, { "entropy": 1.7033535142739613, "epoch": 0.7732827991541018, "grad_norm": 0.6162420511245728, "learning_rate": 1.4637162907696438e-05, "loss": 1.2653, "mean_token_accuracy": 0.6843320180972418, "num_tokens": 1182290496.0, "step": 7039 }, { "entropy": 1.7195685009161632, "epoch": 0.7733926560654747, "grad_norm": 0.7166089415550232, "learning_rate": 1.4635698294558092e-05, "loss": 1.4405, "mean_token_accuracy": 0.6487057308355967, "num_tokens": 1182489457.0, "step": 7040 }, { "entropy": 1.7417178054650624, "epoch": 0.7735025129768477, "grad_norm": 0.8565784096717834, "learning_rate": 1.463423356635153e-05, "loss": 1.4438, "mean_token_accuracy": 0.643385499715805, "num_tokens": 1182731014.0, "step": 7041 }, { "entropy": 1.7084386845429738, "epoch": 0.7736123698882206, "grad_norm": 0.5510115027427673, "learning_rate": 1.4632768723123119e-05, "loss": 1.5365, "mean_token_accuracy": 0.642001653711001, "num_tokens": 1182947354.0, "step": 7042 }, { "entropy": 1.7248138189315796, "epoch": 0.7737222267995936, "grad_norm": 0.7305212616920471, "learning_rate": 1.4631303764919208e-05, "loss": 1.1954, "mean_token_accuracy": 0.6862581819295883, "num_tokens": 1183074561.0, "step": 7043 }, { "entropy": 1.7128371099630992, "epoch": 0.7738320837109665, "grad_norm": 0.7132003307342529, "learning_rate": 1.4629838691786176e-05, "loss": 1.3073, "mean_token_accuracy": 0.6683625827232996, "num_tokens": 1183198631.0, "step": 7044 }, { "entropy": 1.7525557577610016, "epoch": 0.7739419406223395, "grad_norm": 0.724991500377655, "learning_rate": 1.462837350377038e-05, "loss": 1.5135, "mean_token_accuracy": 0.6378008325894674, "num_tokens": 1183375784.0, "step": 7045 }, { "entropy": 1.717278391122818, "epoch": 0.7740517975337123, "grad_norm": 0.674461841583252, "learning_rate": 1.4626908200918201e-05, "loss": 1.3914, "mean_token_accuracy": 0.6517154922087988, "num_tokens": 1183544082.0, "step": 7046 }, { "entropy": 1.5977116922537486, "epoch": 0.7741616544450852, "grad_norm": 0.6767549514770508, "learning_rate": 1.4625442783276012e-05, "loss": 1.2301, "mean_token_accuracy": 0.6752993414799372, "num_tokens": 1183679452.0, "step": 7047 }, { "entropy": 1.7543257574240367, "epoch": 0.7742715113564582, "grad_norm": 0.6589480638504028, "learning_rate": 1.462397725089019e-05, "loss": 1.4896, "mean_token_accuracy": 0.65002969900767, "num_tokens": 1183809390.0, "step": 7048 }, { "entropy": 1.6917062997817993, "epoch": 0.7743813682678311, "grad_norm": 0.6566320061683655, "learning_rate": 1.462251160380712e-05, "loss": 1.3941, "mean_token_accuracy": 0.6552205433448156, "num_tokens": 1183967409.0, "step": 7049 }, { "entropy": 1.6994734903176625, "epoch": 0.7744912251792041, "grad_norm": 0.7120063900947571, "learning_rate": 1.4621045842073194e-05, "loss": 1.4939, "mean_token_accuracy": 0.6527921060721079, "num_tokens": 1184183390.0, "step": 7050 }, { "entropy": 1.6695085167884827, "epoch": 0.774601082090577, "grad_norm": 0.8632370829582214, "learning_rate": 1.4619579965734797e-05, "loss": 1.4014, "mean_token_accuracy": 0.6676080425580343, "num_tokens": 1184346247.0, "step": 7051 }, { "entropy": 1.6956228117148082, "epoch": 0.77471093900195, "grad_norm": 0.6049069166183472, "learning_rate": 1.4618113974838324e-05, "loss": 1.3815, "mean_token_accuracy": 0.6569184164206187, "num_tokens": 1184519454.0, "step": 7052 }, { "entropy": 1.6839020152886708, "epoch": 0.7748207959133229, "grad_norm": 0.6516950726509094, "learning_rate": 1.4616647869430174e-05, "loss": 1.3496, "mean_token_accuracy": 0.665029858549436, "num_tokens": 1184698002.0, "step": 7053 }, { "entropy": 1.706792841355006, "epoch": 0.7749306528246959, "grad_norm": 0.6899370551109314, "learning_rate": 1.4615181649556751e-05, "loss": 1.5164, "mean_token_accuracy": 0.6479889204104742, "num_tokens": 1184859559.0, "step": 7054 }, { "entropy": 1.726404498020808, "epoch": 0.7750405097360688, "grad_norm": 0.5813102126121521, "learning_rate": 1.4613715315264453e-05, "loss": 1.4043, "mean_token_accuracy": 0.6609879980484644, "num_tokens": 1185057723.0, "step": 7055 }, { "entropy": 1.7247681121031444, "epoch": 0.7751503666474417, "grad_norm": 0.6459679007530212, "learning_rate": 1.4612248866599698e-05, "loss": 1.438, "mean_token_accuracy": 0.6407895038525263, "num_tokens": 1185236310.0, "step": 7056 }, { "entropy": 1.696563959121704, "epoch": 0.7752602235588146, "grad_norm": 0.8122643828392029, "learning_rate": 1.4610782303608895e-05, "loss": 1.5251, "mean_token_accuracy": 0.662423754731814, "num_tokens": 1185411741.0, "step": 7057 }, { "entropy": 1.6828083793322246, "epoch": 0.7753700804701876, "grad_norm": 0.7139939665794373, "learning_rate": 1.4609315626338455e-05, "loss": 1.2945, "mean_token_accuracy": 0.671471560994784, "num_tokens": 1185540548.0, "step": 7058 }, { "entropy": 1.6743756433327992, "epoch": 0.7754799373815605, "grad_norm": 0.6616625785827637, "learning_rate": 1.4607848834834808e-05, "loss": 1.307, "mean_token_accuracy": 0.6813914626836777, "num_tokens": 1185668124.0, "step": 7059 }, { "entropy": 1.6928254266579945, "epoch": 0.7755897942929334, "grad_norm": 0.7479018568992615, "learning_rate": 1.4606381929144366e-05, "loss": 1.373, "mean_token_accuracy": 0.6539994676907858, "num_tokens": 1185800426.0, "step": 7060 }, { "entropy": 1.6936748921871185, "epoch": 0.7756996512043064, "grad_norm": 0.8162828087806702, "learning_rate": 1.4604914909313562e-05, "loss": 1.211, "mean_token_accuracy": 0.6736436436573664, "num_tokens": 1185908583.0, "step": 7061 }, { "entropy": 1.713008721669515, "epoch": 0.7758095081156793, "grad_norm": 0.6247701644897461, "learning_rate": 1.4603447775388825e-05, "loss": 1.5041, "mean_token_accuracy": 0.6550228893756866, "num_tokens": 1186056753.0, "step": 7062 }, { "entropy": 1.752008448044459, "epoch": 0.7759193650270523, "grad_norm": 0.6326964497566223, "learning_rate": 1.4601980527416593e-05, "loss": 1.4098, "mean_token_accuracy": 0.6500469148159027, "num_tokens": 1186254903.0, "step": 7063 }, { "entropy": 1.71945525209109, "epoch": 0.7760292219384252, "grad_norm": 0.6365431547164917, "learning_rate": 1.4600513165443298e-05, "loss": 1.4776, "mean_token_accuracy": 0.6456809441248575, "num_tokens": 1186449812.0, "step": 7064 }, { "entropy": 1.7330725888411205, "epoch": 0.7761390788497982, "grad_norm": 0.7301865816116333, "learning_rate": 1.4599045689515383e-05, "loss": 1.2947, "mean_token_accuracy": 0.6618055999279022, "num_tokens": 1186602119.0, "step": 7065 }, { "entropy": 1.732138842344284, "epoch": 0.776248935761171, "grad_norm": 0.639707624912262, "learning_rate": 1.4597578099679293e-05, "loss": 1.3862, "mean_token_accuracy": 0.6675709386666616, "num_tokens": 1186752684.0, "step": 7066 }, { "entropy": 1.7195107837518055, "epoch": 0.776358792672544, "grad_norm": 0.6329506635665894, "learning_rate": 1.4596110395981477e-05, "loss": 1.5057, "mean_token_accuracy": 0.6421088526646296, "num_tokens": 1186923028.0, "step": 7067 }, { "entropy": 1.7601061860720317, "epoch": 0.7764686495839169, "grad_norm": 0.7304174304008484, "learning_rate": 1.459464257846839e-05, "loss": 1.4543, "mean_token_accuracy": 0.6482264697551727, "num_tokens": 1187126827.0, "step": 7068 }, { "entropy": 1.6720358630021412, "epoch": 0.7765785064952899, "grad_norm": 0.6124829649925232, "learning_rate": 1.4593174647186484e-05, "loss": 1.3228, "mean_token_accuracy": 0.6597183843453726, "num_tokens": 1187279117.0, "step": 7069 }, { "entropy": 1.7097432514031727, "epoch": 0.7766883634066628, "grad_norm": 0.6492157578468323, "learning_rate": 1.459170660218222e-05, "loss": 1.4168, "mean_token_accuracy": 0.6490695029497147, "num_tokens": 1187477267.0, "step": 7070 }, { "entropy": 1.6731528639793396, "epoch": 0.7767982203180358, "grad_norm": 0.6846409440040588, "learning_rate": 1.4590238443502062e-05, "loss": 1.3208, "mean_token_accuracy": 0.6802329818407694, "num_tokens": 1187640442.0, "step": 7071 }, { "entropy": 1.694802353779475, "epoch": 0.7769080772294087, "grad_norm": 0.6815143823623657, "learning_rate": 1.458877017119247e-05, "loss": 1.442, "mean_token_accuracy": 0.6708350131909052, "num_tokens": 1187819222.0, "step": 7072 }, { "entropy": 1.7885936399300892, "epoch": 0.7770179341407817, "grad_norm": 0.6548523902893066, "learning_rate": 1.4587301785299925e-05, "loss": 1.4269, "mean_token_accuracy": 0.6466637452443441, "num_tokens": 1188034722.0, "step": 7073 }, { "entropy": 1.7135749161243439, "epoch": 0.7771277910521546, "grad_norm": 0.7812349796295166, "learning_rate": 1.4585833285870891e-05, "loss": 1.3671, "mean_token_accuracy": 0.6483140687147776, "num_tokens": 1188216014.0, "step": 7074 }, { "entropy": 1.6287512878576915, "epoch": 0.7772376479635275, "grad_norm": 0.7831335663795471, "learning_rate": 1.4584364672951851e-05, "loss": 1.3522, "mean_token_accuracy": 0.6784834712743759, "num_tokens": 1188410952.0, "step": 7075 }, { "entropy": 1.7114653885364532, "epoch": 0.7773475048749005, "grad_norm": 0.7758569121360779, "learning_rate": 1.4582895946589287e-05, "loss": 1.3488, "mean_token_accuracy": 0.6737766712903976, "num_tokens": 1188548783.0, "step": 7076 }, { "entropy": 1.7152994672457378, "epoch": 0.7774573617862733, "grad_norm": 0.6986987590789795, "learning_rate": 1.4581427106829675e-05, "loss": 1.4113, "mean_token_accuracy": 0.6604458590348562, "num_tokens": 1188715804.0, "step": 7077 }, { "entropy": 1.7589812874794006, "epoch": 0.7775672186976463, "grad_norm": 0.9140593409538269, "learning_rate": 1.4579958153719513e-05, "loss": 1.2027, "mean_token_accuracy": 0.6834556410710017, "num_tokens": 1188807708.0, "step": 7078 }, { "entropy": 1.6676159103711445, "epoch": 0.7776770756090192, "grad_norm": 0.6604957580566406, "learning_rate": 1.4578489087305286e-05, "loss": 1.395, "mean_token_accuracy": 0.6646223912636439, "num_tokens": 1189019253.0, "step": 7079 }, { "entropy": 1.701384961605072, "epoch": 0.7777869325203922, "grad_norm": 0.6634955406188965, "learning_rate": 1.4577019907633494e-05, "loss": 1.3598, "mean_token_accuracy": 0.6663598666588465, "num_tokens": 1189162079.0, "step": 7080 }, { "entropy": 1.7295205891132355, "epoch": 0.7778967894317651, "grad_norm": 0.7290459275245667, "learning_rate": 1.4575550614750636e-05, "loss": 1.4276, "mean_token_accuracy": 0.6518198847770691, "num_tokens": 1189348501.0, "step": 7081 }, { "entropy": 1.6786798735459645, "epoch": 0.7780066463431381, "grad_norm": 0.8376657962799072, "learning_rate": 1.4574081208703205e-05, "loss": 1.5544, "mean_token_accuracy": 0.6468896766503652, "num_tokens": 1189516193.0, "step": 7082 }, { "entropy": 1.6363192001978557, "epoch": 0.778116503254511, "grad_norm": 0.7634339332580566, "learning_rate": 1.457261168953772e-05, "loss": 1.4173, "mean_token_accuracy": 0.6621982008218765, "num_tokens": 1189666179.0, "step": 7083 }, { "entropy": 1.6522767841815948, "epoch": 0.778226360165884, "grad_norm": 0.6967000365257263, "learning_rate": 1.4571142057300683e-05, "loss": 1.2449, "mean_token_accuracy": 0.677667478720347, "num_tokens": 1189784559.0, "step": 7084 }, { "entropy": 1.667193869749705, "epoch": 0.7783362170772569, "grad_norm": 0.6377148032188416, "learning_rate": 1.4569672312038607e-05, "loss": 1.4232, "mean_token_accuracy": 0.6659876654545466, "num_tokens": 1189964900.0, "step": 7085 }, { "entropy": 1.7085439264774323, "epoch": 0.7784460739886299, "grad_norm": 0.743248701095581, "learning_rate": 1.4568202453798014e-05, "loss": 1.372, "mean_token_accuracy": 0.6602604488531748, "num_tokens": 1190156829.0, "step": 7086 }, { "entropy": 1.6639246940612793, "epoch": 0.7785559309000027, "grad_norm": 0.718908429145813, "learning_rate": 1.4566732482625423e-05, "loss": 1.3575, "mean_token_accuracy": 0.6681891083717346, "num_tokens": 1190316623.0, "step": 7087 }, { "entropy": 1.6937816043694813, "epoch": 0.7786657878113756, "grad_norm": 0.7970039248466492, "learning_rate": 1.4565262398567352e-05, "loss": 1.2439, "mean_token_accuracy": 0.6761472771565119, "num_tokens": 1190464941.0, "step": 7088 }, { "entropy": 1.639806220928828, "epoch": 0.7787756447227486, "grad_norm": 0.5538852214813232, "learning_rate": 1.4563792201670334e-05, "loss": 1.3917, "mean_token_accuracy": 0.6550086786349615, "num_tokens": 1190671240.0, "step": 7089 }, { "entropy": 1.7520277798175812, "epoch": 0.7788855016341215, "grad_norm": 0.7734472751617432, "learning_rate": 1.45623218919809e-05, "loss": 1.3325, "mean_token_accuracy": 0.6582324057817459, "num_tokens": 1190786339.0, "step": 7090 }, { "entropy": 1.715962419907252, "epoch": 0.7789953585454945, "grad_norm": 0.682809054851532, "learning_rate": 1.456085146954558e-05, "loss": 1.3316, "mean_token_accuracy": 0.6533773044745127, "num_tokens": 1190936395.0, "step": 7091 }, { "entropy": 1.6548383732636769, "epoch": 0.7791052154568674, "grad_norm": 0.7674005627632141, "learning_rate": 1.4559380934410918e-05, "loss": 1.4276, "mean_token_accuracy": 0.6647045860687891, "num_tokens": 1191063187.0, "step": 7092 }, { "entropy": 1.7424963613351185, "epoch": 0.7792150723682404, "grad_norm": 0.8039253950119019, "learning_rate": 1.4557910286623456e-05, "loss": 1.4081, "mean_token_accuracy": 0.6619451393683752, "num_tokens": 1191216376.0, "step": 7093 }, { "entropy": 1.673016995191574, "epoch": 0.7793249292796133, "grad_norm": 0.750651478767395, "learning_rate": 1.455643952622973e-05, "loss": 1.4322, "mean_token_accuracy": 0.6569622804721197, "num_tokens": 1191362087.0, "step": 7094 }, { "entropy": 1.7055266002813976, "epoch": 0.7794347861909863, "grad_norm": 0.74098801612854, "learning_rate": 1.4554968653276303e-05, "loss": 1.4917, "mean_token_accuracy": 0.6539320250352224, "num_tokens": 1191506306.0, "step": 7095 }, { "entropy": 1.6832468211650848, "epoch": 0.7795446431023592, "grad_norm": 0.6400901675224304, "learning_rate": 1.4553497667809716e-05, "loss": 1.5095, "mean_token_accuracy": 0.637568806608518, "num_tokens": 1191702148.0, "step": 7096 }, { "entropy": 1.6865523755550385, "epoch": 0.7796545000137322, "grad_norm": 0.7597500681877136, "learning_rate": 1.455202656987653e-05, "loss": 1.5815, "mean_token_accuracy": 0.6512685567140579, "num_tokens": 1191868681.0, "step": 7097 }, { "entropy": 1.7562141319115956, "epoch": 0.779764356925105, "grad_norm": 0.7682708501815796, "learning_rate": 1.4550555359523303e-05, "loss": 1.3168, "mean_token_accuracy": 0.6706572075684866, "num_tokens": 1192009219.0, "step": 7098 }, { "entropy": 1.7148883839448292, "epoch": 0.779874213836478, "grad_norm": 0.7574900984764099, "learning_rate": 1.45490840367966e-05, "loss": 1.6325, "mean_token_accuracy": 0.6384792327880859, "num_tokens": 1192191135.0, "step": 7099 }, { "entropy": 1.6949690977732341, "epoch": 0.7799840707478509, "grad_norm": 0.643990695476532, "learning_rate": 1.4547612601742984e-05, "loss": 1.3238, "mean_token_accuracy": 0.6610787808895111, "num_tokens": 1192365786.0, "step": 7100 }, { "entropy": 1.6968937317530315, "epoch": 0.7800939276592238, "grad_norm": 0.6925724744796753, "learning_rate": 1.4546141054409026e-05, "loss": 1.3673, "mean_token_accuracy": 0.6582049876451492, "num_tokens": 1192499916.0, "step": 7101 }, { "entropy": 1.6771467129389446, "epoch": 0.7802037845705968, "grad_norm": 6.278994560241699, "learning_rate": 1.4544669394841307e-05, "loss": 1.3181, "mean_token_accuracy": 0.676389808456103, "num_tokens": 1192644459.0, "step": 7102 }, { "entropy": 1.7438280681769054, "epoch": 0.7803136414819697, "grad_norm": 0.6767034530639648, "learning_rate": 1.4543197623086398e-05, "loss": 1.2602, "mean_token_accuracy": 0.6726950407028198, "num_tokens": 1192759155.0, "step": 7103 }, { "entropy": 1.6500231822331746, "epoch": 0.7804234983933427, "grad_norm": 0.642570972442627, "learning_rate": 1.454172573919088e-05, "loss": 1.3393, "mean_token_accuracy": 0.6680295219024023, "num_tokens": 1192903425.0, "step": 7104 }, { "entropy": 1.6823652784029643, "epoch": 0.7805333553047156, "grad_norm": 0.7715455889701843, "learning_rate": 1.4540253743201336e-05, "loss": 1.1928, "mean_token_accuracy": 0.6783884565035502, "num_tokens": 1193030964.0, "step": 7105 }, { "entropy": 1.67123677333196, "epoch": 0.7806432122160886, "grad_norm": 0.6010688543319702, "learning_rate": 1.4538781635164359e-05, "loss": 1.4498, "mean_token_accuracy": 0.6451991299788157, "num_tokens": 1193238226.0, "step": 7106 }, { "entropy": 1.7276023924350739, "epoch": 0.7807530691274615, "grad_norm": 0.7072981595993042, "learning_rate": 1.4537309415126535e-05, "loss": 1.2735, "mean_token_accuracy": 0.677293395002683, "num_tokens": 1193368968.0, "step": 7107 }, { "entropy": 1.7027284701665242, "epoch": 0.7808629260388344, "grad_norm": 1.048176884651184, "learning_rate": 1.4535837083134465e-05, "loss": 1.5693, "mean_token_accuracy": 0.6457755664984385, "num_tokens": 1193493783.0, "step": 7108 }, { "entropy": 1.710277110338211, "epoch": 0.7809727829502073, "grad_norm": 0.7665229439735413, "learning_rate": 1.4534364639234744e-05, "loss": 1.3597, "mean_token_accuracy": 0.6668632626533508, "num_tokens": 1193646619.0, "step": 7109 }, { "entropy": 1.7380196849505107, "epoch": 0.7810826398615803, "grad_norm": 0.6514268517494202, "learning_rate": 1.4532892083473973e-05, "loss": 1.4335, "mean_token_accuracy": 0.6352319270372391, "num_tokens": 1193853184.0, "step": 7110 }, { "entropy": 1.659955104192098, "epoch": 0.7811924967729532, "grad_norm": 0.6248233914375305, "learning_rate": 1.4531419415898762e-05, "loss": 1.4731, "mean_token_accuracy": 0.6436507304509481, "num_tokens": 1194058800.0, "step": 7111 }, { "entropy": 1.766392429669698, "epoch": 0.7813023536843262, "grad_norm": 0.6671506762504578, "learning_rate": 1.4529946636555716e-05, "loss": 1.4857, "mean_token_accuracy": 0.642402226726214, "num_tokens": 1194250202.0, "step": 7112 }, { "entropy": 1.737959663073222, "epoch": 0.7814122105956991, "grad_norm": 0.5975798964500427, "learning_rate": 1.452847374549145e-05, "loss": 1.3968, "mean_token_accuracy": 0.6597791264454523, "num_tokens": 1194441641.0, "step": 7113 }, { "entropy": 1.7000750998655956, "epoch": 0.7815220675070721, "grad_norm": 0.6063849329948425, "learning_rate": 1.452700074275258e-05, "loss": 1.4046, "mean_token_accuracy": 0.6620944837729136, "num_tokens": 1194625979.0, "step": 7114 }, { "entropy": 1.7005607883135478, "epoch": 0.781631924418445, "grad_norm": 0.5850129127502441, "learning_rate": 1.4525527628385728e-05, "loss": 1.3936, "mean_token_accuracy": 0.6443201154470444, "num_tokens": 1194826016.0, "step": 7115 }, { "entropy": 1.728336493174235, "epoch": 0.7817417813298179, "grad_norm": 0.6136082410812378, "learning_rate": 1.4524054402437511e-05, "loss": 1.4068, "mean_token_accuracy": 0.6534546116987864, "num_tokens": 1194994162.0, "step": 7116 }, { "entropy": 1.7266682982444763, "epoch": 0.7818516382411909, "grad_norm": 0.6669444441795349, "learning_rate": 1.4522581064954563e-05, "loss": 1.3809, "mean_token_accuracy": 0.6540538171927134, "num_tokens": 1195189176.0, "step": 7117 }, { "entropy": 1.756045748790105, "epoch": 0.7819614951525637, "grad_norm": 0.674505889415741, "learning_rate": 1.4521107615983511e-05, "loss": 1.3878, "mean_token_accuracy": 0.6560290704170862, "num_tokens": 1195346345.0, "step": 7118 }, { "entropy": 1.6180338263511658, "epoch": 0.7820713520639367, "grad_norm": 0.6228286623954773, "learning_rate": 1.4519634055570988e-05, "loss": 1.2565, "mean_token_accuracy": 0.6812761723995209, "num_tokens": 1195480456.0, "step": 7119 }, { "entropy": 1.7572944561640422, "epoch": 0.7821812089753096, "grad_norm": 0.6308412551879883, "learning_rate": 1.4518160383763635e-05, "loss": 1.4338, "mean_token_accuracy": 0.6442923347155253, "num_tokens": 1195630768.0, "step": 7120 }, { "entropy": 1.7162999709447224, "epoch": 0.7822910658866826, "grad_norm": 0.7594370245933533, "learning_rate": 1.4516686600608089e-05, "loss": 1.4204, "mean_token_accuracy": 0.656131515900294, "num_tokens": 1195813359.0, "step": 7121 }, { "entropy": 1.721927394469579, "epoch": 0.7824009227980555, "grad_norm": 0.7334505915641785, "learning_rate": 1.4515212706151001e-05, "loss": 1.2533, "mean_token_accuracy": 0.6717882007360458, "num_tokens": 1195918100.0, "step": 7122 }, { "entropy": 1.7734851737817128, "epoch": 0.7825107797094285, "grad_norm": 0.949683427810669, "learning_rate": 1.4513738700439014e-05, "loss": 1.5294, "mean_token_accuracy": 0.642572283744812, "num_tokens": 1196061449.0, "step": 7123 }, { "entropy": 1.7282946904500325, "epoch": 0.7826206366208014, "grad_norm": 0.7295317649841309, "learning_rate": 1.4512264583518776e-05, "loss": 1.4733, "mean_token_accuracy": 0.6531463364760081, "num_tokens": 1196213722.0, "step": 7124 }, { "entropy": 1.6672942737738292, "epoch": 0.7827304935321744, "grad_norm": 0.620469868183136, "learning_rate": 1.451079035543695e-05, "loss": 1.3513, "mean_token_accuracy": 0.6611681828896204, "num_tokens": 1196354639.0, "step": 7125 }, { "entropy": 1.743516246477763, "epoch": 0.7828403504435473, "grad_norm": 0.7476531863212585, "learning_rate": 1.4509316016240189e-05, "loss": 1.4161, "mean_token_accuracy": 0.6624071647723516, "num_tokens": 1196531954.0, "step": 7126 }, { "entropy": 1.6648136377334595, "epoch": 0.7829502073549203, "grad_norm": 0.6160597801208496, "learning_rate": 1.4507841565975163e-05, "loss": 1.3679, "mean_token_accuracy": 0.6603780339161555, "num_tokens": 1196696042.0, "step": 7127 }, { "entropy": 1.6777517398198445, "epoch": 0.7830600642662932, "grad_norm": 0.667905330657959, "learning_rate": 1.4506367004688526e-05, "loss": 1.2421, "mean_token_accuracy": 0.6805467208226522, "num_tokens": 1196869852.0, "step": 7128 }, { "entropy": 1.773158888022105, "epoch": 0.783169921177666, "grad_norm": 0.8235062956809998, "learning_rate": 1.4504892332426954e-05, "loss": 1.497, "mean_token_accuracy": 0.6440630505482355, "num_tokens": 1197042159.0, "step": 7129 }, { "entropy": 1.67943408091863, "epoch": 0.783279778089039, "grad_norm": 0.6340872049331665, "learning_rate": 1.450341754923712e-05, "loss": 1.3416, "mean_token_accuracy": 0.6593078672885895, "num_tokens": 1197192533.0, "step": 7130 }, { "entropy": 1.6706339716911316, "epoch": 0.7833896350004119, "grad_norm": 0.5731471180915833, "learning_rate": 1.4501942655165701e-05, "loss": 1.4853, "mean_token_accuracy": 0.6310961991548538, "num_tokens": 1197393580.0, "step": 7131 }, { "entropy": 1.698110560576121, "epoch": 0.7834994919117849, "grad_norm": 0.7156584858894348, "learning_rate": 1.4500467650259373e-05, "loss": 1.3617, "mean_token_accuracy": 0.6580530057350794, "num_tokens": 1197545276.0, "step": 7132 }, { "entropy": 1.6475163499514263, "epoch": 0.7836093488231578, "grad_norm": 0.8746734261512756, "learning_rate": 1.4498992534564823e-05, "loss": 1.3337, "mean_token_accuracy": 0.6697489966948827, "num_tokens": 1197712691.0, "step": 7133 }, { "entropy": 1.7037550906340282, "epoch": 0.7837192057345308, "grad_norm": 0.6587815284729004, "learning_rate": 1.4497517308128734e-05, "loss": 1.4479, "mean_token_accuracy": 0.6581203639507294, "num_tokens": 1197877900.0, "step": 7134 }, { "entropy": 1.6985561152299244, "epoch": 0.7838290626459037, "grad_norm": 0.6671653985977173, "learning_rate": 1.44960419709978e-05, "loss": 1.2729, "mean_token_accuracy": 0.6706394900878271, "num_tokens": 1198012365.0, "step": 7135 }, { "entropy": 1.68305508295695, "epoch": 0.7839389195572767, "grad_norm": 0.6679463982582092, "learning_rate": 1.449456652321871e-05, "loss": 1.2377, "mean_token_accuracy": 0.6831518908341726, "num_tokens": 1198152717.0, "step": 7136 }, { "entropy": 1.7280430893103282, "epoch": 0.7840487764686496, "grad_norm": 0.8825252056121826, "learning_rate": 1.4493090964838167e-05, "loss": 1.3264, "mean_token_accuracy": 0.6597543060779572, "num_tokens": 1198339044.0, "step": 7137 }, { "entropy": 1.7101606527964275, "epoch": 0.7841586333800226, "grad_norm": 0.6220462322235107, "learning_rate": 1.449161529590287e-05, "loss": 1.3644, "mean_token_accuracy": 0.6578847219546636, "num_tokens": 1198461351.0, "step": 7138 }, { "entropy": 1.6869953870773315, "epoch": 0.7842684902913954, "grad_norm": 0.6071659922599792, "learning_rate": 1.449013951645952e-05, "loss": 1.552, "mean_token_accuracy": 0.636215329170227, "num_tokens": 1198670100.0, "step": 7139 }, { "entropy": 1.6908225218454997, "epoch": 0.7843783472027684, "grad_norm": 0.72711580991745, "learning_rate": 1.4488663626554826e-05, "loss": 1.4751, "mean_token_accuracy": 0.6628256092468897, "num_tokens": 1198842301.0, "step": 7140 }, { "entropy": 1.7508656183878581, "epoch": 0.7844882041141413, "grad_norm": 0.6913683414459229, "learning_rate": 1.4487187626235504e-05, "loss": 1.4579, "mean_token_accuracy": 0.6502855817476908, "num_tokens": 1198992313.0, "step": 7141 }, { "entropy": 1.6920421818892162, "epoch": 0.7845980610255142, "grad_norm": 0.711681604385376, "learning_rate": 1.4485711515548261e-05, "loss": 1.37, "mean_token_accuracy": 0.6575459539890289, "num_tokens": 1199136804.0, "step": 7142 }, { "entropy": 1.6301299730936687, "epoch": 0.7847079179368872, "grad_norm": 0.7222129106521606, "learning_rate": 1.4484235294539824e-05, "loss": 1.3653, "mean_token_accuracy": 0.669685035943985, "num_tokens": 1199302100.0, "step": 7143 }, { "entropy": 1.6759273211161296, "epoch": 0.7848177748482601, "grad_norm": 0.727353036403656, "learning_rate": 1.4482758963256904e-05, "loss": 1.2621, "mean_token_accuracy": 0.6747185587882996, "num_tokens": 1199431204.0, "step": 7144 }, { "entropy": 1.7488112548987071, "epoch": 0.7849276317596331, "grad_norm": 0.6163308024406433, "learning_rate": 1.4481282521746236e-05, "loss": 1.5528, "mean_token_accuracy": 0.6342363655567169, "num_tokens": 1199615484.0, "step": 7145 }, { "entropy": 1.7138899366060893, "epoch": 0.785037488671006, "grad_norm": 0.619773268699646, "learning_rate": 1.4479805970054544e-05, "loss": 1.4131, "mean_token_accuracy": 0.6630217432975769, "num_tokens": 1199768737.0, "step": 7146 }, { "entropy": 1.7128514150778453, "epoch": 0.785147345582379, "grad_norm": 0.7631600499153137, "learning_rate": 1.447832930822856e-05, "loss": 1.4182, "mean_token_accuracy": 0.6703273256619772, "num_tokens": 1199917429.0, "step": 7147 }, { "entropy": 1.7253048022588093, "epoch": 0.7852572024937519, "grad_norm": 0.6738438010215759, "learning_rate": 1.4476852536315022e-05, "loss": 1.2802, "mean_token_accuracy": 0.6651460230350494, "num_tokens": 1200032163.0, "step": 7148 }, { "entropy": 1.673990160226822, "epoch": 0.7853670594051249, "grad_norm": 0.6391650438308716, "learning_rate": 1.4475375654360669e-05, "loss": 1.3706, "mean_token_accuracy": 0.6688804576794306, "num_tokens": 1200202637.0, "step": 7149 }, { "entropy": 1.7560782929261525, "epoch": 0.7854769163164977, "grad_norm": 0.683594286441803, "learning_rate": 1.447389866241224e-05, "loss": 1.4088, "mean_token_accuracy": 0.6569011211395264, "num_tokens": 1200359101.0, "step": 7150 }, { "entropy": 1.7138656278451283, "epoch": 0.7855867732278707, "grad_norm": 0.6900503635406494, "learning_rate": 1.4472421560516485e-05, "loss": 1.4651, "mean_token_accuracy": 0.6426846434672674, "num_tokens": 1200560872.0, "step": 7151 }, { "entropy": 1.7244854867458344, "epoch": 0.7856966301392436, "grad_norm": 0.6141315698623657, "learning_rate": 1.4470944348720155e-05, "loss": 1.4302, "mean_token_accuracy": 0.6623003830512365, "num_tokens": 1200716958.0, "step": 7152 }, { "entropy": 1.712331473827362, "epoch": 0.7858064870506166, "grad_norm": 0.6378352046012878, "learning_rate": 1.4469467027069996e-05, "loss": 1.4881, "mean_token_accuracy": 0.6507144321997961, "num_tokens": 1200953835.0, "step": 7153 }, { "entropy": 1.7112789849440257, "epoch": 0.7859163439619895, "grad_norm": 0.6243149042129517, "learning_rate": 1.446798959561277e-05, "loss": 1.3715, "mean_token_accuracy": 0.6659561494986216, "num_tokens": 1201154401.0, "step": 7154 }, { "entropy": 1.6723881363868713, "epoch": 0.7860262008733624, "grad_norm": 0.7144157290458679, "learning_rate": 1.4466512054395238e-05, "loss": 1.2879, "mean_token_accuracy": 0.672190397977829, "num_tokens": 1201292259.0, "step": 7155 }, { "entropy": 1.6763904094696045, "epoch": 0.7861360577847354, "grad_norm": 0.6170639395713806, "learning_rate": 1.446503440346416e-05, "loss": 1.2683, "mean_token_accuracy": 0.6757438133160273, "num_tokens": 1201434618.0, "step": 7156 }, { "entropy": 1.7109374403953552, "epoch": 0.7862459146961083, "grad_norm": 0.7301081418991089, "learning_rate": 1.4463556642866305e-05, "loss": 1.3605, "mean_token_accuracy": 0.6616794069608053, "num_tokens": 1201592457.0, "step": 7157 }, { "entropy": 1.6911123394966125, "epoch": 0.7863557716074813, "grad_norm": 0.6597540974617004, "learning_rate": 1.4462078772648445e-05, "loss": 1.4674, "mean_token_accuracy": 0.6407536615928014, "num_tokens": 1201781440.0, "step": 7158 }, { "entropy": 1.6666575372219086, "epoch": 0.7864656285188542, "grad_norm": 0.5943217873573303, "learning_rate": 1.4460600792857349e-05, "loss": 1.4631, "mean_token_accuracy": 0.6552510807911555, "num_tokens": 1201987350.0, "step": 7159 }, { "entropy": 1.736344705025355, "epoch": 0.7865754854302272, "grad_norm": 0.6110522150993347, "learning_rate": 1.4459122703539796e-05, "loss": 1.5263, "mean_token_accuracy": 0.6426798502604166, "num_tokens": 1202207575.0, "step": 7160 }, { "entropy": 1.7043259739875793, "epoch": 0.7866853423416, "grad_norm": 0.763015866279602, "learning_rate": 1.4457644504742572e-05, "loss": 1.4422, "mean_token_accuracy": 0.654599666595459, "num_tokens": 1202348000.0, "step": 7161 }, { "entropy": 1.7122917970021565, "epoch": 0.786795199252973, "grad_norm": 0.8961231112480164, "learning_rate": 1.4456166196512453e-05, "loss": 1.4143, "mean_token_accuracy": 0.6518704841534296, "num_tokens": 1202509044.0, "step": 7162 }, { "entropy": 1.6719779272874196, "epoch": 0.7869050561643459, "grad_norm": 0.6332946419715881, "learning_rate": 1.4454687778896235e-05, "loss": 1.4086, "mean_token_accuracy": 0.6405781507492065, "num_tokens": 1202724539.0, "step": 7163 }, { "entropy": 1.7168918947378795, "epoch": 0.7870149130757189, "grad_norm": 0.7975315451622009, "learning_rate": 1.4453209251940706e-05, "loss": 1.3936, "mean_token_accuracy": 0.6588208178679148, "num_tokens": 1202860338.0, "step": 7164 }, { "entropy": 1.7201037506262462, "epoch": 0.7871247699870918, "grad_norm": 0.7638601660728455, "learning_rate": 1.4451730615692658e-05, "loss": 1.3508, "mean_token_accuracy": 0.6692859182755152, "num_tokens": 1202992554.0, "step": 7165 }, { "entropy": 1.6601012448469799, "epoch": 0.7872346268984648, "grad_norm": 0.5404378175735474, "learning_rate": 1.445025187019889e-05, "loss": 1.3115, "mean_token_accuracy": 0.6680645495653152, "num_tokens": 1203174793.0, "step": 7166 }, { "entropy": 1.7655375202496846, "epoch": 0.7873444838098377, "grad_norm": 0.7130011320114136, "learning_rate": 1.444877301550621e-05, "loss": 1.4636, "mean_token_accuracy": 0.6438860942920049, "num_tokens": 1203369080.0, "step": 7167 }, { "entropy": 1.7369131445884705, "epoch": 0.7874543407212107, "grad_norm": 0.6770559549331665, "learning_rate": 1.4447294051661414e-05, "loss": 1.3676, "mean_token_accuracy": 0.6571057687203089, "num_tokens": 1203515924.0, "step": 7168 }, { "entropy": 1.7329241931438446, "epoch": 0.7875641976325836, "grad_norm": 0.6496652960777283, "learning_rate": 1.4445814978711317e-05, "loss": 1.5801, "mean_token_accuracy": 0.6215295642614365, "num_tokens": 1203718060.0, "step": 7169 }, { "entropy": 1.7130872507890065, "epoch": 0.7876740545439564, "grad_norm": 0.6565669178962708, "learning_rate": 1.4444335796702726e-05, "loss": 1.449, "mean_token_accuracy": 0.6431066493193308, "num_tokens": 1203949259.0, "step": 7170 }, { "entropy": 1.7333206037680309, "epoch": 0.7877839114553294, "grad_norm": 0.7088605165481567, "learning_rate": 1.4442856505682462e-05, "loss": 1.4145, "mean_token_accuracy": 0.6694990048805872, "num_tokens": 1204132043.0, "step": 7171 }, { "entropy": 1.697807510693868, "epoch": 0.7878937683667023, "grad_norm": 0.8567109704017639, "learning_rate": 1.4441377105697339e-05, "loss": 1.4784, "mean_token_accuracy": 0.6585593720277151, "num_tokens": 1204333339.0, "step": 7172 }, { "entropy": 1.6664861639340718, "epoch": 0.7880036252780753, "grad_norm": 0.711167573928833, "learning_rate": 1.443989759679418e-05, "loss": 1.3998, "mean_token_accuracy": 0.6541877388954163, "num_tokens": 1204476596.0, "step": 7173 }, { "entropy": 1.7098850707213085, "epoch": 0.7881134821894482, "grad_norm": 0.6929422616958618, "learning_rate": 1.4438417979019817e-05, "loss": 1.5313, "mean_token_accuracy": 0.634042297800382, "num_tokens": 1204657222.0, "step": 7174 }, { "entropy": 1.7105639080206554, "epoch": 0.7882233391008212, "grad_norm": 0.6769076585769653, "learning_rate": 1.443693825242107e-05, "loss": 1.4749, "mean_token_accuracy": 0.6523736665646235, "num_tokens": 1204808131.0, "step": 7175 }, { "entropy": 1.6820484797159831, "epoch": 0.7883331960121941, "grad_norm": 0.6611175537109375, "learning_rate": 1.4435458417044777e-05, "loss": 1.3882, "mean_token_accuracy": 0.6505205978949865, "num_tokens": 1204971165.0, "step": 7176 }, { "entropy": 1.6880492369333904, "epoch": 0.7884430529235671, "grad_norm": 0.7320232391357422, "learning_rate": 1.4433978472937776e-05, "loss": 1.312, "mean_token_accuracy": 0.6604448159535726, "num_tokens": 1205093123.0, "step": 7177 }, { "entropy": 1.736960728963216, "epoch": 0.78855290983494, "grad_norm": 0.6706869602203369, "learning_rate": 1.44324984201469e-05, "loss": 1.3149, "mean_token_accuracy": 0.6695601592461268, "num_tokens": 1205218514.0, "step": 7178 }, { "entropy": 1.7094795008500416, "epoch": 0.788662766746313, "grad_norm": 0.6586654186248779, "learning_rate": 1.4431018258718996e-05, "loss": 1.3724, "mean_token_accuracy": 0.649998739361763, "num_tokens": 1205350578.0, "step": 7179 }, { "entropy": 1.6713014940420787, "epoch": 0.7887726236576859, "grad_norm": 0.6533616781234741, "learning_rate": 1.4429537988700913e-05, "loss": 1.3621, "mean_token_accuracy": 0.6759164482355118, "num_tokens": 1205503254.0, "step": 7180 }, { "entropy": 1.683766891558965, "epoch": 0.7888824805690589, "grad_norm": 0.67979496717453, "learning_rate": 1.4428057610139495e-05, "loss": 1.3932, "mean_token_accuracy": 0.6734537233908972, "num_tokens": 1205639667.0, "step": 7181 }, { "entropy": 1.7113385399182637, "epoch": 0.7889923374804317, "grad_norm": 0.712390124797821, "learning_rate": 1.4426577123081597e-05, "loss": 1.2857, "mean_token_accuracy": 0.6757234086592993, "num_tokens": 1205769605.0, "step": 7182 }, { "entropy": 1.6981197694937389, "epoch": 0.7891021943918046, "grad_norm": 0.6102924942970276, "learning_rate": 1.4425096527574082e-05, "loss": 1.3276, "mean_token_accuracy": 0.6542087992032369, "num_tokens": 1205933495.0, "step": 7183 }, { "entropy": 1.6887332499027252, "epoch": 0.7892120513031776, "grad_norm": 0.745152473449707, "learning_rate": 1.4423615823663804e-05, "loss": 1.3603, "mean_token_accuracy": 0.6610602786143621, "num_tokens": 1206050631.0, "step": 7184 }, { "entropy": 1.7006172637144725, "epoch": 0.7893219082145505, "grad_norm": 0.6827918887138367, "learning_rate": 1.4422135011397627e-05, "loss": 1.2546, "mean_token_accuracy": 0.6747910380363464, "num_tokens": 1206167560.0, "step": 7185 }, { "entropy": 1.7094734410444896, "epoch": 0.7894317651259235, "grad_norm": 0.7063429951667786, "learning_rate": 1.4420654090822416e-05, "loss": 1.3133, "mean_token_accuracy": 0.6641270716985067, "num_tokens": 1206342612.0, "step": 7186 }, { "entropy": 1.6872341831525166, "epoch": 0.7895416220372964, "grad_norm": 0.7216169238090515, "learning_rate": 1.4419173061985048e-05, "loss": 1.3068, "mean_token_accuracy": 0.6675632099310557, "num_tokens": 1206480850.0, "step": 7187 }, { "entropy": 1.7280444105466206, "epoch": 0.7896514789486694, "grad_norm": 0.7706807851791382, "learning_rate": 1.4417691924932394e-05, "loss": 1.3474, "mean_token_accuracy": 0.6651838620503744, "num_tokens": 1206625169.0, "step": 7188 }, { "entropy": 1.719985653956731, "epoch": 0.7897613358600423, "grad_norm": 0.7672920227050781, "learning_rate": 1.441621067971133e-05, "loss": 1.3996, "mean_token_accuracy": 0.6526385049025217, "num_tokens": 1206774389.0, "step": 7189 }, { "entropy": 1.7050624787807465, "epoch": 0.7898711927714153, "grad_norm": 0.8137822151184082, "learning_rate": 1.4414729326368736e-05, "loss": 1.4153, "mean_token_accuracy": 0.657172903418541, "num_tokens": 1206920548.0, "step": 7190 }, { "entropy": 1.7352370421091716, "epoch": 0.7899810496827882, "grad_norm": 0.6931704878807068, "learning_rate": 1.4413247864951499e-05, "loss": 1.4766, "mean_token_accuracy": 0.6547851413488388, "num_tokens": 1207111742.0, "step": 7191 }, { "entropy": 1.6695275406042736, "epoch": 0.7900909065941611, "grad_norm": 0.7764499187469482, "learning_rate": 1.4411766295506502e-05, "loss": 1.1244, "mean_token_accuracy": 0.7065702676773071, "num_tokens": 1207238801.0, "step": 7192 }, { "entropy": 1.651973952849706, "epoch": 0.790200763505534, "grad_norm": 0.8136328458786011, "learning_rate": 1.4410284618080644e-05, "loss": 1.3584, "mean_token_accuracy": 0.6729863931735357, "num_tokens": 1207392700.0, "step": 7193 }, { "entropy": 1.6933129529158275, "epoch": 0.790310620416907, "grad_norm": 0.7404091954231262, "learning_rate": 1.440880283272081e-05, "loss": 1.407, "mean_token_accuracy": 0.6515365193287531, "num_tokens": 1207583914.0, "step": 7194 }, { "entropy": 1.7414989471435547, "epoch": 0.7904204773282799, "grad_norm": 0.7047650814056396, "learning_rate": 1.4407320939473903e-05, "loss": 1.386, "mean_token_accuracy": 0.6629294902086258, "num_tokens": 1207747681.0, "step": 7195 }, { "entropy": 1.7473087112108867, "epoch": 0.7905303342396528, "grad_norm": 0.6341284513473511, "learning_rate": 1.4405838938386827e-05, "loss": 1.4854, "mean_token_accuracy": 0.6382193118333817, "num_tokens": 1207993633.0, "step": 7196 }, { "entropy": 1.7010388871033986, "epoch": 0.7906401911510258, "grad_norm": 0.7494300603866577, "learning_rate": 1.440435682950648e-05, "loss": 1.3331, "mean_token_accuracy": 0.669193853934606, "num_tokens": 1208164918.0, "step": 7197 }, { "entropy": 1.7257753908634186, "epoch": 0.7907500480623987, "grad_norm": 0.7151653170585632, "learning_rate": 1.4402874612879774e-05, "loss": 1.2647, "mean_token_accuracy": 0.6737553824981054, "num_tokens": 1208275864.0, "step": 7198 }, { "entropy": 1.7049545844395955, "epoch": 0.7908599049737717, "grad_norm": 0.8426851630210876, "learning_rate": 1.4401392288553622e-05, "loss": 1.4961, "mean_token_accuracy": 0.6478038181861242, "num_tokens": 1208466701.0, "step": 7199 }, { "entropy": 1.7350122928619385, "epoch": 0.7909697618851446, "grad_norm": 0.816241979598999, "learning_rate": 1.4399909856574931e-05, "loss": 1.3144, "mean_token_accuracy": 0.6634030193090439, "num_tokens": 1208595401.0, "step": 7200 }, { "entropy": 1.7607790033022563, "epoch": 0.7910796187965176, "grad_norm": 0.6901513934135437, "learning_rate": 1.4398427316990633e-05, "loss": 1.348, "mean_token_accuracy": 0.6634863515694936, "num_tokens": 1208778298.0, "step": 7201 }, { "entropy": 1.7141740421454112, "epoch": 0.7911894757078904, "grad_norm": 0.6500587463378906, "learning_rate": 1.4396944669847637e-05, "loss": 1.4433, "mean_token_accuracy": 0.6416673759619395, "num_tokens": 1208970192.0, "step": 7202 }, { "entropy": 1.68813360730807, "epoch": 0.7912993326192634, "grad_norm": 0.6901952624320984, "learning_rate": 1.4395461915192875e-05, "loss": 1.366, "mean_token_accuracy": 0.6568211714426676, "num_tokens": 1209101142.0, "step": 7203 }, { "entropy": 1.6981943150361378, "epoch": 0.7914091895306363, "grad_norm": 0.7170037627220154, "learning_rate": 1.439397905307327e-05, "loss": 1.5666, "mean_token_accuracy": 0.6461018125216166, "num_tokens": 1209262785.0, "step": 7204 }, { "entropy": 1.5867635409037273, "epoch": 0.7915190464420093, "grad_norm": 0.7785094976425171, "learning_rate": 1.4392496083535764e-05, "loss": 1.3767, "mean_token_accuracy": 0.6633408665657043, "num_tokens": 1209442179.0, "step": 7205 }, { "entropy": 1.7308302025000255, "epoch": 0.7916289033533822, "grad_norm": 0.6985065937042236, "learning_rate": 1.4391013006627276e-05, "loss": 1.4598, "mean_token_accuracy": 0.6464549154043198, "num_tokens": 1209665973.0, "step": 7206 }, { "entropy": 1.7229714790980022, "epoch": 0.7917387602647552, "grad_norm": 0.6461872458457947, "learning_rate": 1.438952982239476e-05, "loss": 1.4052, "mean_token_accuracy": 0.6459956765174866, "num_tokens": 1209842142.0, "step": 7207 }, { "entropy": 1.6630838414033253, "epoch": 0.7918486171761281, "grad_norm": 0.7045498490333557, "learning_rate": 1.4388046530885156e-05, "loss": 1.2883, "mean_token_accuracy": 0.6830503195524216, "num_tokens": 1209971675.0, "step": 7208 }, { "entropy": 1.712927410999934, "epoch": 0.7919584740875011, "grad_norm": 0.8392392992973328, "learning_rate": 1.43865631321454e-05, "loss": 1.3648, "mean_token_accuracy": 0.6657893657684326, "num_tokens": 1210119563.0, "step": 7209 }, { "entropy": 1.67608709136645, "epoch": 0.792068330998874, "grad_norm": 0.6663906574249268, "learning_rate": 1.438507962622245e-05, "loss": 1.2885, "mean_token_accuracy": 0.6710440864165624, "num_tokens": 1210253870.0, "step": 7210 }, { "entropy": 1.7072576979796092, "epoch": 0.7921781879102469, "grad_norm": 0.6991833448410034, "learning_rate": 1.4383596013163254e-05, "loss": 1.5431, "mean_token_accuracy": 0.641920750339826, "num_tokens": 1210486487.0, "step": 7211 }, { "entropy": 1.6564313073952992, "epoch": 0.7922880448216199, "grad_norm": 0.8407886028289795, "learning_rate": 1.4382112293014767e-05, "loss": 1.2964, "mean_token_accuracy": 0.6637519697348276, "num_tokens": 1210607227.0, "step": 7212 }, { "entropy": 1.72148593266805, "epoch": 0.7923979017329927, "grad_norm": 0.8114281892776489, "learning_rate": 1.4380628465823954e-05, "loss": 1.371, "mean_token_accuracy": 0.6630014181137085, "num_tokens": 1210763126.0, "step": 7213 }, { "entropy": 1.6811268826325734, "epoch": 0.7925077586443657, "grad_norm": 0.6577451825141907, "learning_rate": 1.4379144531637773e-05, "loss": 1.4008, "mean_token_accuracy": 0.6475434551636378, "num_tokens": 1210944851.0, "step": 7214 }, { "entropy": 1.6897524297237396, "epoch": 0.7926176155557386, "grad_norm": 0.6809960603713989, "learning_rate": 1.4377660490503187e-05, "loss": 1.3815, "mean_token_accuracy": 0.665686676899592, "num_tokens": 1211129884.0, "step": 7215 }, { "entropy": 1.7260896265506744, "epoch": 0.7927274724671116, "grad_norm": 0.6479694247245789, "learning_rate": 1.437617634246717e-05, "loss": 1.3552, "mean_token_accuracy": 0.6604562699794769, "num_tokens": 1211273138.0, "step": 7216 }, { "entropy": 1.704200655221939, "epoch": 0.7928373293784845, "grad_norm": 0.8002914190292358, "learning_rate": 1.4374692087576694e-05, "loss": 1.2478, "mean_token_accuracy": 0.6780698845783869, "num_tokens": 1211401771.0, "step": 7217 }, { "entropy": 1.7003964483737946, "epoch": 0.7929471862898575, "grad_norm": 0.6824570894241333, "learning_rate": 1.4373207725878736e-05, "loss": 1.417, "mean_token_accuracy": 0.6604044139385223, "num_tokens": 1211525125.0, "step": 7218 }, { "entropy": 1.756047526995341, "epoch": 0.7930570432012304, "grad_norm": 0.7336795926094055, "learning_rate": 1.437172325742027e-05, "loss": 1.4541, "mean_token_accuracy": 0.6522673020760218, "num_tokens": 1211688977.0, "step": 7219 }, { "entropy": 1.674008419116338, "epoch": 0.7931669001126034, "grad_norm": 0.5697746872901917, "learning_rate": 1.4370238682248284e-05, "loss": 1.4325, "mean_token_accuracy": 0.6417450805505117, "num_tokens": 1211915500.0, "step": 7220 }, { "entropy": 1.669522186120351, "epoch": 0.7932767570239763, "grad_norm": 0.7043665051460266, "learning_rate": 1.4368754000409759e-05, "loss": 1.2599, "mean_token_accuracy": 0.6736998210350672, "num_tokens": 1212019533.0, "step": 7221 }, { "entropy": 1.7614585657914479, "epoch": 0.7933866139353493, "grad_norm": 0.6837732791900635, "learning_rate": 1.4367269211951688e-05, "loss": 1.3755, "mean_token_accuracy": 0.6557194739580154, "num_tokens": 1212154651.0, "step": 7222 }, { "entropy": 1.7001692553361256, "epoch": 0.7934964708467221, "grad_norm": 0.7408974170684814, "learning_rate": 1.436578431692107e-05, "loss": 1.4553, "mean_token_accuracy": 0.6416794806718826, "num_tokens": 1212298701.0, "step": 7223 }, { "entropy": 1.7324201961358388, "epoch": 0.793606327758095, "grad_norm": 0.7774471640586853, "learning_rate": 1.436429931536489e-05, "loss": 1.4997, "mean_token_accuracy": 0.6496329059203466, "num_tokens": 1212477054.0, "step": 7224 }, { "entropy": 1.6642400324344635, "epoch": 0.793716184669468, "grad_norm": 0.5790720582008362, "learning_rate": 1.4362814207330154e-05, "loss": 1.372, "mean_token_accuracy": 0.6617165555556616, "num_tokens": 1212697010.0, "step": 7225 }, { "entropy": 1.662980963786443, "epoch": 0.7938260415808409, "grad_norm": 0.6963479518890381, "learning_rate": 1.4361328992863863e-05, "loss": 1.3915, "mean_token_accuracy": 0.6623529940843582, "num_tokens": 1212835136.0, "step": 7226 }, { "entropy": 1.697847972313563, "epoch": 0.7939358984922139, "grad_norm": 0.676906943321228, "learning_rate": 1.4359843672013025e-05, "loss": 1.2866, "mean_token_accuracy": 0.669852097829183, "num_tokens": 1212964012.0, "step": 7227 }, { "entropy": 1.7281867067019145, "epoch": 0.7940457554035868, "grad_norm": 0.7022289633750916, "learning_rate": 1.4358358244824646e-05, "loss": 1.356, "mean_token_accuracy": 0.6543090840180715, "num_tokens": 1213170176.0, "step": 7228 }, { "entropy": 1.726097176472346, "epoch": 0.7941556123149598, "grad_norm": 0.799540638923645, "learning_rate": 1.4356872711345746e-05, "loss": 1.5969, "mean_token_accuracy": 0.6627245545387268, "num_tokens": 1213333780.0, "step": 7229 }, { "entropy": 1.6588218410809834, "epoch": 0.7942654692263327, "grad_norm": 0.7514909505844116, "learning_rate": 1.4355387071623335e-05, "loss": 1.4477, "mean_token_accuracy": 0.6484199364980062, "num_tokens": 1213525486.0, "step": 7230 }, { "entropy": 1.7335455020268757, "epoch": 0.7943753261377057, "grad_norm": 0.6512316465377808, "learning_rate": 1.4353901325704439e-05, "loss": 1.3723, "mean_token_accuracy": 0.6515757242838541, "num_tokens": 1213713590.0, "step": 7231 }, { "entropy": 1.7147459487120311, "epoch": 0.7944851830490786, "grad_norm": 0.7964367270469666, "learning_rate": 1.4352415473636071e-05, "loss": 1.2251, "mean_token_accuracy": 0.6848846276601156, "num_tokens": 1213837877.0, "step": 7232 }, { "entropy": 1.680985818306605, "epoch": 0.7945950399604516, "grad_norm": 1.0966331958770752, "learning_rate": 1.4350929515465269e-05, "loss": 1.447, "mean_token_accuracy": 0.6554000427325567, "num_tokens": 1213994774.0, "step": 7233 }, { "entropy": 1.7075146635373433, "epoch": 0.7947048968718244, "grad_norm": 0.6517575979232788, "learning_rate": 1.4349443451239052e-05, "loss": 1.3756, "mean_token_accuracy": 0.6637918055057526, "num_tokens": 1214150146.0, "step": 7234 }, { "entropy": 1.7310162385304768, "epoch": 0.7948147537831974, "grad_norm": 0.6323092579841614, "learning_rate": 1.4347957281004466e-05, "loss": 1.5208, "mean_token_accuracy": 0.6204556177059809, "num_tokens": 1214404039.0, "step": 7235 }, { "entropy": 1.722548524538676, "epoch": 0.7949246106945703, "grad_norm": 0.7502648234367371, "learning_rate": 1.4346471004808536e-05, "loss": 1.2681, "mean_token_accuracy": 0.6832453906536102, "num_tokens": 1214523130.0, "step": 7236 }, { "entropy": 1.7112720509370167, "epoch": 0.7950344676059432, "grad_norm": 0.71775221824646, "learning_rate": 1.4344984622698308e-05, "loss": 1.2868, "mean_token_accuracy": 0.6676936894655228, "num_tokens": 1214672632.0, "step": 7237 }, { "entropy": 1.6758712430795033, "epoch": 0.7951443245173162, "grad_norm": 0.7727818489074707, "learning_rate": 1.4343498134720823e-05, "loss": 1.3304, "mean_token_accuracy": 0.6693208316961924, "num_tokens": 1214805275.0, "step": 7238 }, { "entropy": 1.7423097888628643, "epoch": 0.7952541814286891, "grad_norm": 0.6385967135429382, "learning_rate": 1.434201154092313e-05, "loss": 1.5397, "mean_token_accuracy": 0.6438859502474467, "num_tokens": 1215023577.0, "step": 7239 }, { "entropy": 1.7130355834960938, "epoch": 0.7953640383400621, "grad_norm": 0.6498627662658691, "learning_rate": 1.4340524841352278e-05, "loss": 1.343, "mean_token_accuracy": 0.6625998119513193, "num_tokens": 1215162413.0, "step": 7240 }, { "entropy": 1.6157074769337971, "epoch": 0.795473895251435, "grad_norm": 0.669402003288269, "learning_rate": 1.433903803605532e-05, "loss": 1.298, "mean_token_accuracy": 0.6726977676153183, "num_tokens": 1215324282.0, "step": 7241 }, { "entropy": 1.679003765185674, "epoch": 0.795583752162808, "grad_norm": 0.6385429501533508, "learning_rate": 1.4337551125079315e-05, "loss": 1.5047, "mean_token_accuracy": 0.6625163654486338, "num_tokens": 1215524603.0, "step": 7242 }, { "entropy": 1.7556110223134358, "epoch": 0.7956936090741809, "grad_norm": 0.7021380662918091, "learning_rate": 1.4336064108471315e-05, "loss": 1.3767, "mean_token_accuracy": 0.6701732029517492, "num_tokens": 1215652188.0, "step": 7243 }, { "entropy": 1.7281469702720642, "epoch": 0.7958034659855538, "grad_norm": 0.6799027323722839, "learning_rate": 1.4334576986278392e-05, "loss": 1.431, "mean_token_accuracy": 0.6460000276565552, "num_tokens": 1215885275.0, "step": 7244 }, { "entropy": 1.7195940514405568, "epoch": 0.7959133228969267, "grad_norm": 0.720520555973053, "learning_rate": 1.4333089758547611e-05, "loss": 1.3475, "mean_token_accuracy": 0.6720231225093206, "num_tokens": 1216073716.0, "step": 7245 }, { "entropy": 1.7394606570402782, "epoch": 0.7960231798082997, "grad_norm": 0.7158997058868408, "learning_rate": 1.4331602425326038e-05, "loss": 1.5034, "mean_token_accuracy": 0.6467889149983724, "num_tokens": 1216232381.0, "step": 7246 }, { "entropy": 1.6945938964684804, "epoch": 0.7961330367196726, "grad_norm": 0.5890840291976929, "learning_rate": 1.4330114986660755e-05, "loss": 1.5163, "mean_token_accuracy": 0.6420510311921438, "num_tokens": 1216438001.0, "step": 7247 }, { "entropy": 1.7162805596987407, "epoch": 0.7962428936310456, "grad_norm": 0.7011001110076904, "learning_rate": 1.4328627442598827e-05, "loss": 1.4178, "mean_token_accuracy": 0.6583675543467203, "num_tokens": 1216579926.0, "step": 7248 }, { "entropy": 1.7298036813735962, "epoch": 0.7963527505424185, "grad_norm": 0.6295740008354187, "learning_rate": 1.4327139793187343e-05, "loss": 1.3503, "mean_token_accuracy": 0.6574052224556605, "num_tokens": 1216725414.0, "step": 7249 }, { "entropy": 1.6822136640548706, "epoch": 0.7964626074537914, "grad_norm": 0.8162563443183899, "learning_rate": 1.4325652038473386e-05, "loss": 1.516, "mean_token_accuracy": 0.6439789732297262, "num_tokens": 1216913110.0, "step": 7250 }, { "entropy": 1.68292702237765, "epoch": 0.7965724643651644, "grad_norm": 0.6158664226531982, "learning_rate": 1.432416417850404e-05, "loss": 1.3995, "mean_token_accuracy": 0.6543232848246893, "num_tokens": 1217101697.0, "step": 7251 }, { "entropy": 1.7199612259864807, "epoch": 0.7966823212765373, "grad_norm": 0.6690497398376465, "learning_rate": 1.4322676213326392e-05, "loss": 1.3827, "mean_token_accuracy": 0.6606669773658117, "num_tokens": 1217278700.0, "step": 7252 }, { "entropy": 1.6890186369419098, "epoch": 0.7967921781879103, "grad_norm": 0.6910893321037292, "learning_rate": 1.4321188142987545e-05, "loss": 1.3698, "mean_token_accuracy": 0.656900112827619, "num_tokens": 1217443444.0, "step": 7253 }, { "entropy": 1.7530109186967213, "epoch": 0.7969020350992831, "grad_norm": 0.6086611747741699, "learning_rate": 1.4319699967534584e-05, "loss": 1.6023, "mean_token_accuracy": 0.6309465765953064, "num_tokens": 1217637360.0, "step": 7254 }, { "entropy": 1.7479777733484905, "epoch": 0.7970118920106561, "grad_norm": 0.8288069367408752, "learning_rate": 1.4318211687014618e-05, "loss": 1.5313, "mean_token_accuracy": 0.6323782056570053, "num_tokens": 1217803903.0, "step": 7255 }, { "entropy": 1.7118703424930573, "epoch": 0.797121748922029, "grad_norm": 0.6841630339622498, "learning_rate": 1.4316723301474744e-05, "loss": 1.3312, "mean_token_accuracy": 0.6639546205600103, "num_tokens": 1217950265.0, "step": 7256 }, { "entropy": 1.6801489094893138, "epoch": 0.797231605833402, "grad_norm": 0.785036027431488, "learning_rate": 1.4315234810962077e-05, "loss": 1.5764, "mean_token_accuracy": 0.6401002655426661, "num_tokens": 1218123645.0, "step": 7257 }, { "entropy": 1.7071903347969055, "epoch": 0.7973414627447749, "grad_norm": 0.6923706531524658, "learning_rate": 1.431374621552372e-05, "loss": 1.3916, "mean_token_accuracy": 0.6654524803161621, "num_tokens": 1218284414.0, "step": 7258 }, { "entropy": 1.6701487104098003, "epoch": 0.7974513196561479, "grad_norm": 0.6866686344146729, "learning_rate": 1.4312257515206788e-05, "loss": 1.4887, "mean_token_accuracy": 0.6344787627458572, "num_tokens": 1218489559.0, "step": 7259 }, { "entropy": 1.6827106575171153, "epoch": 0.7975611765675208, "grad_norm": 0.6273086667060852, "learning_rate": 1.4310768710058398e-05, "loss": 1.4382, "mean_token_accuracy": 0.6519757409890493, "num_tokens": 1218759113.0, "step": 7260 }, { "entropy": 1.703356256087621, "epoch": 0.7976710334788938, "grad_norm": 0.7796327471733093, "learning_rate": 1.4309279800125673e-05, "loss": 1.4105, "mean_token_accuracy": 0.6624956379334132, "num_tokens": 1218906762.0, "step": 7261 }, { "entropy": 1.661956379810969, "epoch": 0.7977808903902667, "grad_norm": 0.5531797409057617, "learning_rate": 1.4307790785455729e-05, "loss": 1.4883, "mean_token_accuracy": 0.6515692820151647, "num_tokens": 1219102172.0, "step": 7262 }, { "entropy": 1.7754519681135814, "epoch": 0.7978907473016397, "grad_norm": 0.7891526222229004, "learning_rate": 1.4306301666095702e-05, "loss": 1.4952, "mean_token_accuracy": 0.644075925151507, "num_tokens": 1219278956.0, "step": 7263 }, { "entropy": 1.6872022251288097, "epoch": 0.7980006042130126, "grad_norm": 0.6932452321052551, "learning_rate": 1.4304812442092713e-05, "loss": 1.1952, "mean_token_accuracy": 0.6833833257357279, "num_tokens": 1219417575.0, "step": 7264 }, { "entropy": 1.7105295658111572, "epoch": 0.7981104611243854, "grad_norm": 0.6524655818939209, "learning_rate": 1.43033231134939e-05, "loss": 1.3186, "mean_token_accuracy": 0.6636832803487778, "num_tokens": 1219558072.0, "step": 7265 }, { "entropy": 1.6755134363969166, "epoch": 0.7982203180357584, "grad_norm": 0.6954984664916992, "learning_rate": 1.43018336803464e-05, "loss": 1.3372, "mean_token_accuracy": 0.6699787775675455, "num_tokens": 1219718681.0, "step": 7266 }, { "entropy": 1.7518522143363953, "epoch": 0.7983301749471313, "grad_norm": 0.6694498658180237, "learning_rate": 1.4300344142697353e-05, "loss": 1.4487, "mean_token_accuracy": 0.6395488778750101, "num_tokens": 1219883865.0, "step": 7267 }, { "entropy": 1.6969805459181468, "epoch": 0.7984400318585043, "grad_norm": 0.6084674000740051, "learning_rate": 1.4298854500593897e-05, "loss": 1.3776, "mean_token_accuracy": 0.6505701790253321, "num_tokens": 1220093180.0, "step": 7268 }, { "entropy": 1.7364482978979747, "epoch": 0.7985498887698772, "grad_norm": 0.7209011912345886, "learning_rate": 1.4297364754083187e-05, "loss": 1.5423, "mean_token_accuracy": 0.6337501257658005, "num_tokens": 1220270122.0, "step": 7269 }, { "entropy": 1.7650366127490997, "epoch": 0.7986597456812502, "grad_norm": 0.7382558584213257, "learning_rate": 1.4295874903212365e-05, "loss": 1.4628, "mean_token_accuracy": 0.6457639882961909, "num_tokens": 1220450090.0, "step": 7270 }, { "entropy": 1.7180620034535725, "epoch": 0.7987696025926231, "grad_norm": 0.7159477472305298, "learning_rate": 1.4294384948028592e-05, "loss": 1.4328, "mean_token_accuracy": 0.6519068032503128, "num_tokens": 1220627437.0, "step": 7271 }, { "entropy": 1.7323042750358582, "epoch": 0.7988794595039961, "grad_norm": 0.6927447319030762, "learning_rate": 1.4292894888579014e-05, "loss": 1.3658, "mean_token_accuracy": 0.6517662604649862, "num_tokens": 1220748500.0, "step": 7272 }, { "entropy": 1.7364622453848522, "epoch": 0.798989316415369, "grad_norm": 0.6226632595062256, "learning_rate": 1.4291404724910803e-05, "loss": 1.4278, "mean_token_accuracy": 0.6461608906586965, "num_tokens": 1220930228.0, "step": 7273 }, { "entropy": 1.7141484121481578, "epoch": 0.799099173326742, "grad_norm": 0.6916367411613464, "learning_rate": 1.428991445707111e-05, "loss": 1.2965, "mean_token_accuracy": 0.669938306013743, "num_tokens": 1221053000.0, "step": 7274 }, { "entropy": 1.7483568787574768, "epoch": 0.7992090302381148, "grad_norm": 0.7176331877708435, "learning_rate": 1.428842408510711e-05, "loss": 1.2919, "mean_token_accuracy": 0.6726898650328318, "num_tokens": 1221164118.0, "step": 7275 }, { "entropy": 1.7342201670010884, "epoch": 0.7993188871494878, "grad_norm": 0.6409656405448914, "learning_rate": 1.4286933609065967e-05, "loss": 1.4841, "mean_token_accuracy": 0.6403429557879766, "num_tokens": 1221324965.0, "step": 7276 }, { "entropy": 1.6586043238639832, "epoch": 0.7994287440608607, "grad_norm": 0.8449923396110535, "learning_rate": 1.4285443028994859e-05, "loss": 1.2365, "mean_token_accuracy": 0.6813416828711828, "num_tokens": 1221465842.0, "step": 7277 }, { "entropy": 1.696493277947108, "epoch": 0.7995386009722336, "grad_norm": 0.7281336784362793, "learning_rate": 1.4283952344940957e-05, "loss": 1.247, "mean_token_accuracy": 0.681158721446991, "num_tokens": 1221591240.0, "step": 7278 }, { "entropy": 1.7007083594799042, "epoch": 0.7996484578836066, "grad_norm": 0.6635676622390747, "learning_rate": 1.4282461556951445e-05, "loss": 1.3977, "mean_token_accuracy": 0.6511793335278829, "num_tokens": 1221772023.0, "step": 7279 }, { "entropy": 1.7213706970214844, "epoch": 0.7997583147949795, "grad_norm": 0.7881171107292175, "learning_rate": 1.4280970665073503e-05, "loss": 1.1846, "mean_token_accuracy": 0.6822384099165598, "num_tokens": 1221870566.0, "step": 7280 }, { "entropy": 1.658156931400299, "epoch": 0.7998681717063525, "grad_norm": 0.7354137897491455, "learning_rate": 1.4279479669354319e-05, "loss": 1.362, "mean_token_accuracy": 0.6667267928520838, "num_tokens": 1222061004.0, "step": 7281 }, { "entropy": 1.7199231286843617, "epoch": 0.7999780286177254, "grad_norm": 0.6585229635238647, "learning_rate": 1.4277988569841082e-05, "loss": 1.4833, "mean_token_accuracy": 0.6446650822957357, "num_tokens": 1222247129.0, "step": 7282 }, { "entropy": 1.6953574518362682, "epoch": 0.8000878855290984, "grad_norm": 0.581791341304779, "learning_rate": 1.4276497366580982e-05, "loss": 1.4653, "mean_token_accuracy": 0.6526039590438207, "num_tokens": 1222436035.0, "step": 7283 }, { "entropy": 1.7320642570654552, "epoch": 0.8001977424404713, "grad_norm": 0.7845410704612732, "learning_rate": 1.4275006059621217e-05, "loss": 1.5006, "mean_token_accuracy": 0.6406663705905279, "num_tokens": 1222625280.0, "step": 7284 }, { "entropy": 1.7656051715215046, "epoch": 0.8003075993518443, "grad_norm": 0.8226374983787537, "learning_rate": 1.4273514649008989e-05, "loss": 1.3163, "mean_token_accuracy": 0.6708792199691137, "num_tokens": 1222743645.0, "step": 7285 }, { "entropy": 1.764061023791631, "epoch": 0.8004174562632171, "grad_norm": 0.6624506115913391, "learning_rate": 1.4272023134791493e-05, "loss": 1.4877, "mean_token_accuracy": 0.6504695763190588, "num_tokens": 1222947630.0, "step": 7286 }, { "entropy": 1.738725354274114, "epoch": 0.8005273131745901, "grad_norm": 0.827363133430481, "learning_rate": 1.4270531517015943e-05, "loss": 1.4878, "mean_token_accuracy": 0.6427379300196966, "num_tokens": 1223111073.0, "step": 7287 }, { "entropy": 1.7238669991493225, "epoch": 0.800637170085963, "grad_norm": 0.665775716304779, "learning_rate": 1.426903979572954e-05, "loss": 1.4706, "mean_token_accuracy": 0.6543847819169363, "num_tokens": 1223269067.0, "step": 7288 }, { "entropy": 1.659266859292984, "epoch": 0.800747026997336, "grad_norm": 0.6303220391273499, "learning_rate": 1.4267547970979502e-05, "loss": 1.2609, "mean_token_accuracy": 0.679823304216067, "num_tokens": 1223420407.0, "step": 7289 }, { "entropy": 1.672978659470876, "epoch": 0.8008568839087089, "grad_norm": 0.5509341359138489, "learning_rate": 1.4266056042813043e-05, "loss": 1.3959, "mean_token_accuracy": 0.6600791364908218, "num_tokens": 1223639283.0, "step": 7290 }, { "entropy": 1.6871724128723145, "epoch": 0.8009667408200818, "grad_norm": 0.6795254349708557, "learning_rate": 1.4264564011277384e-05, "loss": 1.3352, "mean_token_accuracy": 0.6592608243227005, "num_tokens": 1223817100.0, "step": 7291 }, { "entropy": 1.7118334273497264, "epoch": 0.8010765977314548, "grad_norm": 0.7024778723716736, "learning_rate": 1.4263071876419744e-05, "loss": 1.6732, "mean_token_accuracy": 0.6494659408926964, "num_tokens": 1224030111.0, "step": 7292 }, { "entropy": 1.6586161156495411, "epoch": 0.8011864546428277, "grad_norm": 0.7682591676712036, "learning_rate": 1.4261579638287351e-05, "loss": 1.201, "mean_token_accuracy": 0.6980761736631393, "num_tokens": 1224163835.0, "step": 7293 }, { "entropy": 1.7308415472507477, "epoch": 0.8012963115542007, "grad_norm": 0.7771059274673462, "learning_rate": 1.4260087296927427e-05, "loss": 1.4017, "mean_token_accuracy": 0.6600347012281418, "num_tokens": 1224300825.0, "step": 7294 }, { "entropy": 1.695349782705307, "epoch": 0.8014061684655736, "grad_norm": 13.370857238769531, "learning_rate": 1.4258594852387213e-05, "loss": 1.5951, "mean_token_accuracy": 0.6378213365872701, "num_tokens": 1224469121.0, "step": 7295 }, { "entropy": 1.7092136939366658, "epoch": 0.8015160253769466, "grad_norm": 0.7703883647918701, "learning_rate": 1.425710230471394e-05, "loss": 1.3407, "mean_token_accuracy": 0.6662224382162094, "num_tokens": 1224620305.0, "step": 7296 }, { "entropy": 1.7132171392440796, "epoch": 0.8016258822883194, "grad_norm": 0.6171491146087646, "learning_rate": 1.4255609653954847e-05, "loss": 1.3658, "mean_token_accuracy": 0.6527489374081293, "num_tokens": 1224785259.0, "step": 7297 }, { "entropy": 1.6952051520347595, "epoch": 0.8017357391996924, "grad_norm": 0.7840876579284668, "learning_rate": 1.4254116900157173e-05, "loss": 1.4597, "mean_token_accuracy": 0.6597426682710648, "num_tokens": 1224930212.0, "step": 7298 }, { "entropy": 1.785047431786855, "epoch": 0.8018455961110653, "grad_norm": 1.3836613893508911, "learning_rate": 1.4252624043368169e-05, "loss": 1.4614, "mean_token_accuracy": 0.6522940744956335, "num_tokens": 1225052106.0, "step": 7299 }, { "entropy": 1.6921504139900208, "epoch": 0.8019554530224383, "grad_norm": 0.6270791888237, "learning_rate": 1.4251131083635079e-05, "loss": 1.4451, "mean_token_accuracy": 0.6468443423509598, "num_tokens": 1225221791.0, "step": 7300 }, { "entropy": 1.745924452940623, "epoch": 0.8020653099338112, "grad_norm": 0.8254175186157227, "learning_rate": 1.4249638021005154e-05, "loss": 1.4149, "mean_token_accuracy": 0.6477925777435303, "num_tokens": 1225366690.0, "step": 7301 }, { "entropy": 1.6810812751452129, "epoch": 0.8021751668451842, "grad_norm": 0.6022759675979614, "learning_rate": 1.4248144855525649e-05, "loss": 1.2906, "mean_token_accuracy": 0.683276375134786, "num_tokens": 1225516260.0, "step": 7302 }, { "entropy": 1.647108296553294, "epoch": 0.8022850237565571, "grad_norm": 0.7280488610267639, "learning_rate": 1.4246651587243825e-05, "loss": 1.3632, "mean_token_accuracy": 0.6681808729966482, "num_tokens": 1225722689.0, "step": 7303 }, { "entropy": 1.6735007365544636, "epoch": 0.80239488066793, "grad_norm": 0.6589364409446716, "learning_rate": 1.424515821620694e-05, "loss": 1.3376, "mean_token_accuracy": 0.6602647950251898, "num_tokens": 1225891479.0, "step": 7304 }, { "entropy": 1.7428459525108337, "epoch": 0.802504737579303, "grad_norm": 0.7700157165527344, "learning_rate": 1.424366474246226e-05, "loss": 1.3954, "mean_token_accuracy": 0.6708898593982061, "num_tokens": 1226034151.0, "step": 7305 }, { "entropy": 1.7126306494077046, "epoch": 0.8026145944906758, "grad_norm": 0.7041934728622437, "learning_rate": 1.4242171166057053e-05, "loss": 1.551, "mean_token_accuracy": 0.6539329538742701, "num_tokens": 1226215865.0, "step": 7306 }, { "entropy": 1.6650786697864532, "epoch": 0.8027244514020488, "grad_norm": 0.6044019460678101, "learning_rate": 1.4240677487038593e-05, "loss": 1.3191, "mean_token_accuracy": 0.6622636218865713, "num_tokens": 1226372708.0, "step": 7307 }, { "entropy": 1.723981390396754, "epoch": 0.8028343083134217, "grad_norm": 0.8162484765052795, "learning_rate": 1.4239183705454142e-05, "loss": 1.4615, "mean_token_accuracy": 0.6413914859294891, "num_tokens": 1226535459.0, "step": 7308 }, { "entropy": 1.7149596611658733, "epoch": 0.8029441652247947, "grad_norm": 0.73653644323349, "learning_rate": 1.4237689821350992e-05, "loss": 1.3971, "mean_token_accuracy": 0.6685678660869598, "num_tokens": 1226686426.0, "step": 7309 }, { "entropy": 1.6466976702213287, "epoch": 0.8030540221361676, "grad_norm": 0.743812084197998, "learning_rate": 1.4236195834776418e-05, "loss": 1.3838, "mean_token_accuracy": 0.6726710299650828, "num_tokens": 1226842592.0, "step": 7310 }, { "entropy": 1.8227874239285786, "epoch": 0.8031638790475406, "grad_norm": 0.9089652299880981, "learning_rate": 1.4234701745777704e-05, "loss": 1.6301, "mean_token_accuracy": 0.6158707390228907, "num_tokens": 1227021023.0, "step": 7311 }, { "entropy": 1.6785000363985698, "epoch": 0.8032737359589135, "grad_norm": 0.6702415347099304, "learning_rate": 1.4233207554402138e-05, "loss": 1.4478, "mean_token_accuracy": 0.6375938355922699, "num_tokens": 1227242256.0, "step": 7312 }, { "entropy": 1.719922512769699, "epoch": 0.8033835928702865, "grad_norm": 0.8157113790512085, "learning_rate": 1.423171326069701e-05, "loss": 1.4502, "mean_token_accuracy": 0.6464798400799433, "num_tokens": 1227418743.0, "step": 7313 }, { "entropy": 1.6604024668534596, "epoch": 0.8034934497816594, "grad_norm": 0.7159737348556519, "learning_rate": 1.4230218864709612e-05, "loss": 1.4431, "mean_token_accuracy": 0.6605499237775803, "num_tokens": 1227635822.0, "step": 7314 }, { "entropy": 1.706661621729533, "epoch": 0.8036033066930324, "grad_norm": 0.6185526251792908, "learning_rate": 1.4228724366487242e-05, "loss": 1.3767, "mean_token_accuracy": 0.67093226313591, "num_tokens": 1227818074.0, "step": 7315 }, { "entropy": 1.6483195424079895, "epoch": 0.8037131636044053, "grad_norm": 0.6785904765129089, "learning_rate": 1.4227229766077202e-05, "loss": 1.3335, "mean_token_accuracy": 0.67606753607591, "num_tokens": 1227982171.0, "step": 7316 }, { "entropy": 1.6934054692586262, "epoch": 0.8038230205157783, "grad_norm": 0.6464650630950928, "learning_rate": 1.4225735063526792e-05, "loss": 1.3717, "mean_token_accuracy": 0.6636403550704321, "num_tokens": 1228125554.0, "step": 7317 }, { "entropy": 1.636114815870921, "epoch": 0.8039328774271511, "grad_norm": 0.6265885233879089, "learning_rate": 1.4224240258883324e-05, "loss": 1.2675, "mean_token_accuracy": 0.6773168394962946, "num_tokens": 1228305404.0, "step": 7318 }, { "entropy": 1.73605677485466, "epoch": 0.804042734338524, "grad_norm": 0.659631073474884, "learning_rate": 1.4222745352194102e-05, "loss": 1.4094, "mean_token_accuracy": 0.6566009968519211, "num_tokens": 1228493229.0, "step": 7319 }, { "entropy": 1.6943889657656352, "epoch": 0.804152591249897, "grad_norm": 0.7117233276367188, "learning_rate": 1.4221250343506445e-05, "loss": 1.1594, "mean_token_accuracy": 0.6870453854401907, "num_tokens": 1228603511.0, "step": 7320 }, { "entropy": 1.6649406949679058, "epoch": 0.8042624481612699, "grad_norm": 0.7140738368034363, "learning_rate": 1.4219755232867662e-05, "loss": 1.2535, "mean_token_accuracy": 0.6795340776443481, "num_tokens": 1228720218.0, "step": 7321 }, { "entropy": 1.7426091035207112, "epoch": 0.8043723050726429, "grad_norm": 0.6925419569015503, "learning_rate": 1.4218260020325079e-05, "loss": 1.3582, "mean_token_accuracy": 0.655068372686704, "num_tokens": 1228895991.0, "step": 7322 }, { "entropy": 1.7241126894950867, "epoch": 0.8044821619840158, "grad_norm": 0.6894976496696472, "learning_rate": 1.4216764705926019e-05, "loss": 1.3149, "mean_token_accuracy": 0.6623408049345016, "num_tokens": 1229066649.0, "step": 7323 }, { "entropy": 1.725894719362259, "epoch": 0.8045920188953888, "grad_norm": 0.669735848903656, "learning_rate": 1.4215269289717802e-05, "loss": 1.3299, "mean_token_accuracy": 0.6665193190177282, "num_tokens": 1229214694.0, "step": 7324 }, { "entropy": 1.6697326302528381, "epoch": 0.8047018758067617, "grad_norm": 0.9352332353591919, "learning_rate": 1.4213773771747763e-05, "loss": 1.3784, "mean_token_accuracy": 0.6521992981433868, "num_tokens": 1229407333.0, "step": 7325 }, { "entropy": 1.6702334781487782, "epoch": 0.8048117327181347, "grad_norm": 0.7152570486068726, "learning_rate": 1.4212278152063228e-05, "loss": 1.3232, "mean_token_accuracy": 0.6703629096349081, "num_tokens": 1229536389.0, "step": 7326 }, { "entropy": 1.714383860429128, "epoch": 0.8049215896295076, "grad_norm": 0.7807464599609375, "learning_rate": 1.4210782430711541e-05, "loss": 1.2982, "mean_token_accuracy": 0.6688077251116434, "num_tokens": 1229662562.0, "step": 7327 }, { "entropy": 1.699068009853363, "epoch": 0.8050314465408805, "grad_norm": 0.6622336506843567, "learning_rate": 1.4209286607740036e-05, "loss": 1.3098, "mean_token_accuracy": 0.6640769392251968, "num_tokens": 1229832222.0, "step": 7328 }, { "entropy": 1.7409149905045826, "epoch": 0.8051413034522534, "grad_norm": 0.6264408826828003, "learning_rate": 1.4207790683196056e-05, "loss": 1.3233, "mean_token_accuracy": 0.6729765981435776, "num_tokens": 1229979504.0, "step": 7329 }, { "entropy": 1.696464866399765, "epoch": 0.8052511603636264, "grad_norm": 0.6128476858139038, "learning_rate": 1.4206294657126944e-05, "loss": 1.3835, "mean_token_accuracy": 0.6473323603471121, "num_tokens": 1230154418.0, "step": 7330 }, { "entropy": 1.7256540358066559, "epoch": 0.8053610172749993, "grad_norm": 0.6184810400009155, "learning_rate": 1.4204798529580055e-05, "loss": 1.424, "mean_token_accuracy": 0.6607218682765961, "num_tokens": 1230355844.0, "step": 7331 }, { "entropy": 1.6991771360238392, "epoch": 0.8054708741863722, "grad_norm": 0.6512514352798462, "learning_rate": 1.4203302300602735e-05, "loss": 1.3036, "mean_token_accuracy": 0.6798295130332311, "num_tokens": 1230493084.0, "step": 7332 }, { "entropy": 1.6824211478233337, "epoch": 0.8055807310977452, "grad_norm": 0.6564586758613586, "learning_rate": 1.420180597024234e-05, "loss": 1.3671, "mean_token_accuracy": 0.6743087867895762, "num_tokens": 1230629324.0, "step": 7333 }, { "entropy": 1.691230148077011, "epoch": 0.8056905880091181, "grad_norm": 0.6371413469314575, "learning_rate": 1.420030953854623e-05, "loss": 1.3799, "mean_token_accuracy": 0.649422844250997, "num_tokens": 1230812422.0, "step": 7334 }, { "entropy": 1.6784875591595967, "epoch": 0.8058004449204911, "grad_norm": 0.8957354426383972, "learning_rate": 1.4198813005561765e-05, "loss": 1.4552, "mean_token_accuracy": 0.6490340381860733, "num_tokens": 1231007507.0, "step": 7335 }, { "entropy": 1.7226787110169728, "epoch": 0.805910301831864, "grad_norm": 0.7002930641174316, "learning_rate": 1.4197316371336307e-05, "loss": 1.3037, "mean_token_accuracy": 0.6610642572244009, "num_tokens": 1231148630.0, "step": 7336 }, { "entropy": 1.6683934728304546, "epoch": 0.806020158743237, "grad_norm": 0.6079908013343811, "learning_rate": 1.419581963591723e-05, "loss": 1.3604, "mean_token_accuracy": 0.6693431635697683, "num_tokens": 1231354998.0, "step": 7337 }, { "entropy": 1.7245097557703655, "epoch": 0.8061300156546098, "grad_norm": 0.5906463265419006, "learning_rate": 1.41943227993519e-05, "loss": 1.4765, "mean_token_accuracy": 0.6449787418047587, "num_tokens": 1231548058.0, "step": 7338 }, { "entropy": 1.757252832253774, "epoch": 0.8062398725659828, "grad_norm": 0.6691707968711853, "learning_rate": 1.4192825861687694e-05, "loss": 1.2934, "mean_token_accuracy": 0.6613676349322001, "num_tokens": 1231674981.0, "step": 7339 }, { "entropy": 1.7177879710992177, "epoch": 0.8063497294773557, "grad_norm": 0.7445343136787415, "learning_rate": 1.4191328822971988e-05, "loss": 1.2699, "mean_token_accuracy": 0.6711633503437042, "num_tokens": 1231810236.0, "step": 7340 }, { "entropy": 1.7406736811002095, "epoch": 0.8064595863887287, "grad_norm": 0.5835939645767212, "learning_rate": 1.4189831683252162e-05, "loss": 1.3592, "mean_token_accuracy": 0.6513313700755438, "num_tokens": 1231967375.0, "step": 7341 }, { "entropy": 1.714879075686137, "epoch": 0.8065694433001016, "grad_norm": 0.723407506942749, "learning_rate": 1.41883344425756e-05, "loss": 1.4315, "mean_token_accuracy": 0.6485263953606287, "num_tokens": 1232124514.0, "step": 7342 }, { "entropy": 1.7313550611337025, "epoch": 0.8066793002114746, "grad_norm": 0.671328067779541, "learning_rate": 1.4186837100989693e-05, "loss": 1.4934, "mean_token_accuracy": 0.6444969574610392, "num_tokens": 1232311836.0, "step": 7343 }, { "entropy": 1.7535496056079865, "epoch": 0.8067891571228475, "grad_norm": 0.7109101414680481, "learning_rate": 1.4185339658541824e-05, "loss": 1.5029, "mean_token_accuracy": 0.6288647800683975, "num_tokens": 1232511838.0, "step": 7344 }, { "entropy": 1.6992291112740834, "epoch": 0.8068990140342204, "grad_norm": 0.6878111958503723, "learning_rate": 1.4183842115279391e-05, "loss": 1.3329, "mean_token_accuracy": 0.6704870462417603, "num_tokens": 1232683222.0, "step": 7345 }, { "entropy": 1.664991666873296, "epoch": 0.8070088709455934, "grad_norm": 0.6333096623420715, "learning_rate": 1.4182344471249789e-05, "loss": 1.4144, "mean_token_accuracy": 0.6521175851424535, "num_tokens": 1232883037.0, "step": 7346 }, { "entropy": 1.7921419044335682, "epoch": 0.8071187278569663, "grad_norm": 0.7302199602127075, "learning_rate": 1.4180846726500422e-05, "loss": 1.417, "mean_token_accuracy": 0.6555136690537134, "num_tokens": 1233037865.0, "step": 7347 }, { "entropy": 1.7335894107818604, "epoch": 0.8072285847683393, "grad_norm": 0.6250348687171936, "learning_rate": 1.4179348881078687e-05, "loss": 1.4014, "mean_token_accuracy": 0.6503031303485235, "num_tokens": 1233208316.0, "step": 7348 }, { "entropy": 1.7149858474731445, "epoch": 0.8073384416797121, "grad_norm": 0.6429965496063232, "learning_rate": 1.4177850935031991e-05, "loss": 1.4741, "mean_token_accuracy": 0.6448526183764139, "num_tokens": 1233386088.0, "step": 7349 }, { "entropy": 1.7311444580554962, "epoch": 0.8074482985910851, "grad_norm": 0.6727426052093506, "learning_rate": 1.4176352888407748e-05, "loss": 1.4227, "mean_token_accuracy": 0.6663500418265661, "num_tokens": 1233510643.0, "step": 7350 }, { "entropy": 1.7784233887990315, "epoch": 0.807558155502458, "grad_norm": 0.7476586699485779, "learning_rate": 1.4174854741253368e-05, "loss": 1.3135, "mean_token_accuracy": 0.6639601538578669, "num_tokens": 1233678361.0, "step": 7351 }, { "entropy": 1.6666496098041534, "epoch": 0.807668012413831, "grad_norm": 0.7939883470535278, "learning_rate": 1.417335649361626e-05, "loss": 1.3468, "mean_token_accuracy": 0.6685838301976522, "num_tokens": 1233827347.0, "step": 7352 }, { "entropy": 1.6903888583183289, "epoch": 0.8077778693252039, "grad_norm": 0.6927181482315063, "learning_rate": 1.4171858145543856e-05, "loss": 1.4467, "mean_token_accuracy": 0.6569238354762396, "num_tokens": 1233951478.0, "step": 7353 }, { "entropy": 1.6132766505082448, "epoch": 0.8078877262365769, "grad_norm": 0.6065834164619446, "learning_rate": 1.4170359697083564e-05, "loss": 1.378, "mean_token_accuracy": 0.6582773874203364, "num_tokens": 1234135918.0, "step": 7354 }, { "entropy": 1.667845626672109, "epoch": 0.8079975831479498, "grad_norm": 0.5912481546401978, "learning_rate": 1.416886114828282e-05, "loss": 1.3259, "mean_token_accuracy": 0.6613271286090215, "num_tokens": 1234318765.0, "step": 7355 }, { "entropy": 1.7094935675462086, "epoch": 0.8081074400593228, "grad_norm": 0.6756147742271423, "learning_rate": 1.416736249918905e-05, "loss": 1.3621, "mean_token_accuracy": 0.6520873159170151, "num_tokens": 1234459417.0, "step": 7356 }, { "entropy": 1.7112588385740917, "epoch": 0.8082172969706957, "grad_norm": 0.703292965888977, "learning_rate": 1.4165863749849684e-05, "loss": 1.4601, "mean_token_accuracy": 0.6652803619702657, "num_tokens": 1234622325.0, "step": 7357 }, { "entropy": 1.733866771062215, "epoch": 0.8083271538820687, "grad_norm": 0.6714462637901306, "learning_rate": 1.4164364900312152e-05, "loss": 1.429, "mean_token_accuracy": 0.6481720258792242, "num_tokens": 1234820943.0, "step": 7358 }, { "entropy": 1.6990663806597393, "epoch": 0.8084370107934415, "grad_norm": 0.7425878643989563, "learning_rate": 1.4162865950623903e-05, "loss": 1.4332, "mean_token_accuracy": 0.6520752906799316, "num_tokens": 1234974817.0, "step": 7359 }, { "entropy": 1.6746714909871419, "epoch": 0.8085468677048144, "grad_norm": 0.7591057419776917, "learning_rate": 1.416136690083237e-05, "loss": 1.4892, "mean_token_accuracy": 0.6340252707401911, "num_tokens": 1235191091.0, "step": 7360 }, { "entropy": 1.7674211462338765, "epoch": 0.8086567246161874, "grad_norm": 0.7490597367286682, "learning_rate": 1.4159867750984998e-05, "loss": 1.523, "mean_token_accuracy": 0.6239955872297287, "num_tokens": 1235418947.0, "step": 7361 }, { "entropy": 1.758747826019923, "epoch": 0.8087665815275603, "grad_norm": 0.7145038843154907, "learning_rate": 1.4158368501129234e-05, "loss": 1.3235, "mean_token_accuracy": 0.6654741416374842, "num_tokens": 1235562850.0, "step": 7362 }, { "entropy": 1.6892236868540447, "epoch": 0.8088764384389333, "grad_norm": 0.6746348142623901, "learning_rate": 1.4156869151312536e-05, "loss": 1.3841, "mean_token_accuracy": 0.6534732679526011, "num_tokens": 1235704318.0, "step": 7363 }, { "entropy": 1.7318583031495411, "epoch": 0.8089862953503062, "grad_norm": 0.7424976825714111, "learning_rate": 1.4155369701582344e-05, "loss": 1.5392, "mean_token_accuracy": 0.6394655803839365, "num_tokens": 1235921680.0, "step": 7364 }, { "entropy": 1.7026897370815277, "epoch": 0.8090961522616792, "grad_norm": 0.6459017395973206, "learning_rate": 1.4153870151986127e-05, "loss": 1.4672, "mean_token_accuracy": 0.6501971036195755, "num_tokens": 1236149444.0, "step": 7365 }, { "entropy": 1.7260268131891887, "epoch": 0.8092060091730521, "grad_norm": 0.6344119906425476, "learning_rate": 1.4152370502571343e-05, "loss": 1.3406, "mean_token_accuracy": 0.6541385352611542, "num_tokens": 1236288713.0, "step": 7366 }, { "entropy": 1.6736855705579121, "epoch": 0.8093158660844251, "grad_norm": 0.7475732564926147, "learning_rate": 1.415087075338545e-05, "loss": 1.3069, "mean_token_accuracy": 0.6776044766108195, "num_tokens": 1236442000.0, "step": 7367 }, { "entropy": 1.6757760147253673, "epoch": 0.809425722995798, "grad_norm": 0.6088912487030029, "learning_rate": 1.4149370904475916e-05, "loss": 1.4567, "mean_token_accuracy": 0.6608283271392187, "num_tokens": 1236620361.0, "step": 7368 }, { "entropy": 1.635620504617691, "epoch": 0.809535579907171, "grad_norm": 1.5183919668197632, "learning_rate": 1.4147870955890217e-05, "loss": 1.3127, "mean_token_accuracy": 0.6584126055240631, "num_tokens": 1236884490.0, "step": 7369 }, { "entropy": 1.6411939958731334, "epoch": 0.8096454368185438, "grad_norm": 0.6386780142784119, "learning_rate": 1.4146370907675816e-05, "loss": 1.3192, "mean_token_accuracy": 0.6725454529126486, "num_tokens": 1237042264.0, "step": 7370 }, { "entropy": 1.7076645493507385, "epoch": 0.8097552937299168, "grad_norm": 0.7984034419059753, "learning_rate": 1.4144870759880196e-05, "loss": 1.5304, "mean_token_accuracy": 0.633898084362348, "num_tokens": 1237194352.0, "step": 7371 }, { "entropy": 1.756166120370229, "epoch": 0.8098651506412897, "grad_norm": 0.7380567789077759, "learning_rate": 1.4143370512550831e-05, "loss": 1.4426, "mean_token_accuracy": 0.6379890193541845, "num_tokens": 1237324596.0, "step": 7372 }, { "entropy": 1.7159120738506317, "epoch": 0.8099750075526626, "grad_norm": 0.6627910733222961, "learning_rate": 1.414187016573521e-05, "loss": 1.3909, "mean_token_accuracy": 0.6649558494488398, "num_tokens": 1237483038.0, "step": 7373 }, { "entropy": 1.7520929177602131, "epoch": 0.8100848644640356, "grad_norm": 0.7027316093444824, "learning_rate": 1.4140369719480812e-05, "loss": 1.665, "mean_token_accuracy": 0.6250251233577728, "num_tokens": 1237657133.0, "step": 7374 }, { "entropy": 1.700265755256017, "epoch": 0.8101947213754085, "grad_norm": 0.6161781549453735, "learning_rate": 1.4138869173835128e-05, "loss": 1.4111, "mean_token_accuracy": 0.661478283504645, "num_tokens": 1237839093.0, "step": 7375 }, { "entropy": 1.666181892156601, "epoch": 0.8103045782867815, "grad_norm": 0.7483380436897278, "learning_rate": 1.4137368528845648e-05, "loss": 1.3713, "mean_token_accuracy": 0.6747563034296036, "num_tokens": 1237973528.0, "step": 7376 }, { "entropy": 1.7163873811562855, "epoch": 0.8104144351981544, "grad_norm": 0.6816152930259705, "learning_rate": 1.4135867784559867e-05, "loss": 1.4682, "mean_token_accuracy": 0.645609254638354, "num_tokens": 1238112424.0, "step": 7377 }, { "entropy": 1.6794928908348083, "epoch": 0.8105242921095274, "grad_norm": 0.7110916376113892, "learning_rate": 1.4134366941025283e-05, "loss": 1.4644, "mean_token_accuracy": 0.6717881063620249, "num_tokens": 1238254177.0, "step": 7378 }, { "entropy": 1.6711277862389882, "epoch": 0.8106341490209003, "grad_norm": 0.6988908648490906, "learning_rate": 1.4132865998289402e-05, "loss": 1.3469, "mean_token_accuracy": 0.652699887752533, "num_tokens": 1238443003.0, "step": 7379 }, { "entropy": 1.679229776064555, "epoch": 0.8107440059322732, "grad_norm": 0.6412124037742615, "learning_rate": 1.413136495639972e-05, "loss": 1.4704, "mean_token_accuracy": 0.6530571530262629, "num_tokens": 1238623416.0, "step": 7380 }, { "entropy": 1.6649984618028004, "epoch": 0.8108538628436461, "grad_norm": 0.6781518459320068, "learning_rate": 1.412986381540375e-05, "loss": 1.2339, "mean_token_accuracy": 0.6802275578180949, "num_tokens": 1238797372.0, "step": 7381 }, { "entropy": 1.739145537217458, "epoch": 0.8109637197550191, "grad_norm": 0.7031247019767761, "learning_rate": 1.4128362575349e-05, "loss": 1.2742, "mean_token_accuracy": 0.6698156992594401, "num_tokens": 1238931134.0, "step": 7382 }, { "entropy": 1.6609701414903004, "epoch": 0.811073576666392, "grad_norm": 0.6347212791442871, "learning_rate": 1.4126861236282985e-05, "loss": 1.3583, "mean_token_accuracy": 0.6621866772572199, "num_tokens": 1239089834.0, "step": 7383 }, { "entropy": 1.6577509045600891, "epoch": 0.811183433577765, "grad_norm": 1.0662569999694824, "learning_rate": 1.412535979825322e-05, "loss": 1.3994, "mean_token_accuracy": 0.6656260589758555, "num_tokens": 1239250886.0, "step": 7384 }, { "entropy": 1.73176771402359, "epoch": 0.8112932904891379, "grad_norm": 0.8225398659706116, "learning_rate": 1.4123858261307227e-05, "loss": 1.2648, "mean_token_accuracy": 0.6719925204912821, "num_tokens": 1239372148.0, "step": 7385 }, { "entropy": 1.6916013360023499, "epoch": 0.8114031474005108, "grad_norm": 0.7312498092651367, "learning_rate": 1.4122356625492524e-05, "loss": 1.4389, "mean_token_accuracy": 0.6483626465002695, "num_tokens": 1239549194.0, "step": 7386 }, { "entropy": 1.715633491675059, "epoch": 0.8115130043118838, "grad_norm": 0.7113534808158875, "learning_rate": 1.4120854890856643e-05, "loss": 1.4785, "mean_token_accuracy": 0.6499229570229849, "num_tokens": 1239715171.0, "step": 7387 }, { "entropy": 1.6997787555058796, "epoch": 0.8116228612232567, "grad_norm": 0.7462812662124634, "learning_rate": 1.4119353057447112e-05, "loss": 1.2767, "mean_token_accuracy": 0.677174707253774, "num_tokens": 1239820052.0, "step": 7388 }, { "entropy": 1.7284424602985382, "epoch": 0.8117327181346297, "grad_norm": 0.6025354862213135, "learning_rate": 1.4117851125311462e-05, "loss": 1.4262, "mean_token_accuracy": 0.6419772803783417, "num_tokens": 1239981429.0, "step": 7389 }, { "entropy": 1.7345656553904216, "epoch": 0.8118425750460025, "grad_norm": 0.6548384428024292, "learning_rate": 1.4116349094497228e-05, "loss": 1.3457, "mean_token_accuracy": 0.6590167085329691, "num_tokens": 1240144145.0, "step": 7390 }, { "entropy": 1.7846961518128712, "epoch": 0.8119524319573755, "grad_norm": 0.7144470810890198, "learning_rate": 1.4114846965051952e-05, "loss": 1.5711, "mean_token_accuracy": 0.643833170334498, "num_tokens": 1240324650.0, "step": 7391 }, { "entropy": 1.7715802987416585, "epoch": 0.8120622888687484, "grad_norm": 0.8290162682533264, "learning_rate": 1.4113344737023167e-05, "loss": 1.2688, "mean_token_accuracy": 0.6774558126926422, "num_tokens": 1240451300.0, "step": 7392 }, { "entropy": 1.658871442079544, "epoch": 0.8121721457801214, "grad_norm": 0.6699681878089905, "learning_rate": 1.411184241045843e-05, "loss": 1.4638, "mean_token_accuracy": 0.6388088216384252, "num_tokens": 1240657822.0, "step": 7393 }, { "entropy": 1.733129362265269, "epoch": 0.8122820026914943, "grad_norm": 0.7511906623840332, "learning_rate": 1.411033998540528e-05, "loss": 1.3968, "mean_token_accuracy": 0.657474105556806, "num_tokens": 1240814705.0, "step": 7394 }, { "entropy": 1.7295902868111928, "epoch": 0.8123918596028673, "grad_norm": 0.7865959405899048, "learning_rate": 1.4108837461911273e-05, "loss": 1.477, "mean_token_accuracy": 0.6377530843019485, "num_tokens": 1240969971.0, "step": 7395 }, { "entropy": 1.695414235194524, "epoch": 0.8125017165142402, "grad_norm": 0.6054057478904724, "learning_rate": 1.410733484002396e-05, "loss": 1.4033, "mean_token_accuracy": 0.6436779797077179, "num_tokens": 1241149681.0, "step": 7396 }, { "entropy": 1.6878659625848134, "epoch": 0.8126115734256132, "grad_norm": 0.6600765585899353, "learning_rate": 1.4105832119790898e-05, "loss": 1.2499, "mean_token_accuracy": 0.68401571114858, "num_tokens": 1241269068.0, "step": 7397 }, { "entropy": 1.68837175766627, "epoch": 0.8127214303369861, "grad_norm": 0.788350522518158, "learning_rate": 1.4104329301259652e-05, "loss": 1.447, "mean_token_accuracy": 0.6587880005439123, "num_tokens": 1241425698.0, "step": 7398 }, { "entropy": 1.7728902697563171, "epoch": 0.812831287248359, "grad_norm": 0.839996337890625, "learning_rate": 1.4102826384477782e-05, "loss": 1.4142, "mean_token_accuracy": 0.6632864475250244, "num_tokens": 1241542424.0, "step": 7399 }, { "entropy": 1.679862250884374, "epoch": 0.812941144159732, "grad_norm": 0.7468640804290771, "learning_rate": 1.4101323369492854e-05, "loss": 1.3749, "mean_token_accuracy": 0.6650107949972153, "num_tokens": 1241699390.0, "step": 7400 }, { "entropy": 1.7201584080855052, "epoch": 0.8130510010711048, "grad_norm": 0.633568525314331, "learning_rate": 1.4099820256352436e-05, "loss": 1.4956, "mean_token_accuracy": 0.6278330336014429, "num_tokens": 1241877531.0, "step": 7401 }, { "entropy": 1.6532767017682393, "epoch": 0.8131608579824778, "grad_norm": 1.0218884944915771, "learning_rate": 1.4098317045104106e-05, "loss": 1.47, "mean_token_accuracy": 0.6716248542070389, "num_tokens": 1241999071.0, "step": 7402 }, { "entropy": 1.7018433213233948, "epoch": 0.8132707148938507, "grad_norm": 0.7204262018203735, "learning_rate": 1.4096813735795443e-05, "loss": 1.3745, "mean_token_accuracy": 0.6768209586540858, "num_tokens": 1242125538.0, "step": 7403 }, { "entropy": 1.6862289508183796, "epoch": 0.8133805718052237, "grad_norm": 0.6623913049697876, "learning_rate": 1.4095310328474015e-05, "loss": 1.4319, "mean_token_accuracy": 0.6565775275230408, "num_tokens": 1242318229.0, "step": 7404 }, { "entropy": 1.6885381937026978, "epoch": 0.8134904287165966, "grad_norm": 0.7589840888977051, "learning_rate": 1.4093806823187408e-05, "loss": 1.3628, "mean_token_accuracy": 0.6577220807472864, "num_tokens": 1242448793.0, "step": 7405 }, { "entropy": 1.6763391296068828, "epoch": 0.8136002856279696, "grad_norm": 0.8346347212791443, "learning_rate": 1.4092303219983215e-05, "loss": 1.3303, "mean_token_accuracy": 0.6764725148677826, "num_tokens": 1242553987.0, "step": 7406 }, { "entropy": 1.657248059908549, "epoch": 0.8137101425393425, "grad_norm": 0.7094236612319946, "learning_rate": 1.4090799518909015e-05, "loss": 1.3725, "mean_token_accuracy": 0.6620489905277888, "num_tokens": 1242749089.0, "step": 7407 }, { "entropy": 1.7001424332459767, "epoch": 0.8138199994507155, "grad_norm": 0.7831171751022339, "learning_rate": 1.4089295720012402e-05, "loss": 1.3146, "mean_token_accuracy": 0.6626470337311426, "num_tokens": 1242871762.0, "step": 7408 }, { "entropy": 1.7205629646778107, "epoch": 0.8139298563620884, "grad_norm": 0.744288980960846, "learning_rate": 1.4087791823340975e-05, "loss": 1.4054, "mean_token_accuracy": 0.6607886006434759, "num_tokens": 1243041832.0, "step": 7409 }, { "entropy": 1.688971887032191, "epoch": 0.8140397132734614, "grad_norm": 0.6575775742530823, "learning_rate": 1.4086287828942326e-05, "loss": 1.5004, "mean_token_accuracy": 0.6478760689496994, "num_tokens": 1243182302.0, "step": 7410 }, { "entropy": 1.6879553596178691, "epoch": 0.8141495701848342, "grad_norm": 0.6473801136016846, "learning_rate": 1.4084783736864055e-05, "loss": 1.273, "mean_token_accuracy": 0.6795784085988998, "num_tokens": 1243333276.0, "step": 7411 }, { "entropy": 1.771759420633316, "epoch": 0.8142594270962072, "grad_norm": 0.8150290250778198, "learning_rate": 1.4083279547153774e-05, "loss": 1.5647, "mean_token_accuracy": 0.6433297594388326, "num_tokens": 1243491601.0, "step": 7412 }, { "entropy": 1.6955374677975972, "epoch": 0.8143692840075801, "grad_norm": 0.5659390091896057, "learning_rate": 1.4081775259859083e-05, "loss": 1.5195, "mean_token_accuracy": 0.6323159287373225, "num_tokens": 1243669031.0, "step": 7413 }, { "entropy": 1.6806012392044067, "epoch": 0.814479140918953, "grad_norm": 0.7607001066207886, "learning_rate": 1.408027087502759e-05, "loss": 1.3539, "mean_token_accuracy": 0.6575177957614263, "num_tokens": 1243784885.0, "step": 7414 }, { "entropy": 1.735370695590973, "epoch": 0.814588997830326, "grad_norm": 0.6500069499015808, "learning_rate": 1.4078766392706919e-05, "loss": 1.3712, "mean_token_accuracy": 0.6685160199801127, "num_tokens": 1243908604.0, "step": 7415 }, { "entropy": 1.7190530995527904, "epoch": 0.8146988547416989, "grad_norm": 0.6073886156082153, "learning_rate": 1.4077261812944675e-05, "loss": 1.4499, "mean_token_accuracy": 0.6379290819168091, "num_tokens": 1244149442.0, "step": 7416 }, { "entropy": 1.7028957704703014, "epoch": 0.8148087116530719, "grad_norm": 0.7936030626296997, "learning_rate": 1.4075757135788481e-05, "loss": 1.3337, "mean_token_accuracy": 0.663459782799085, "num_tokens": 1244299370.0, "step": 7417 }, { "entropy": 1.6539308826128643, "epoch": 0.8149185685644448, "grad_norm": 0.6710909605026245, "learning_rate": 1.4074252361285961e-05, "loss": 1.3521, "mean_token_accuracy": 0.6716783146063486, "num_tokens": 1244465781.0, "step": 7418 }, { "entropy": 1.7069568435351055, "epoch": 0.8150284254758178, "grad_norm": 0.8002110123634338, "learning_rate": 1.4072747489484736e-05, "loss": 1.2018, "mean_token_accuracy": 0.6825656940539678, "num_tokens": 1244599995.0, "step": 7419 }, { "entropy": 1.663703719774882, "epoch": 0.8151382823871907, "grad_norm": 0.7054318785667419, "learning_rate": 1.407124252043244e-05, "loss": 1.3063, "mean_token_accuracy": 0.6676364541053772, "num_tokens": 1244739287.0, "step": 7420 }, { "entropy": 1.7169875999291737, "epoch": 0.8152481392985637, "grad_norm": 0.7952367067337036, "learning_rate": 1.4069737454176704e-05, "loss": 1.5895, "mean_token_accuracy": 0.6387151479721069, "num_tokens": 1244927614.0, "step": 7421 }, { "entropy": 1.7815453708171844, "epoch": 0.8153579962099365, "grad_norm": 0.959420919418335, "learning_rate": 1.4068232290765158e-05, "loss": 1.3701, "mean_token_accuracy": 0.6549298316240311, "num_tokens": 1245061586.0, "step": 7422 }, { "entropy": 1.681764543056488, "epoch": 0.8154678531213095, "grad_norm": 0.6878554224967957, "learning_rate": 1.4066727030245442e-05, "loss": 1.384, "mean_token_accuracy": 0.6570885529120764, "num_tokens": 1245214907.0, "step": 7423 }, { "entropy": 1.7041710217793782, "epoch": 0.8155777100326824, "grad_norm": 0.8578292727470398, "learning_rate": 1.4065221672665199e-05, "loss": 1.4803, "mean_token_accuracy": 0.6547748496135076, "num_tokens": 1245366976.0, "step": 7424 }, { "entropy": 1.7255032062530518, "epoch": 0.8156875669440554, "grad_norm": 0.6222707033157349, "learning_rate": 1.4063716218072072e-05, "loss": 1.3239, "mean_token_accuracy": 0.6556070099274317, "num_tokens": 1245497096.0, "step": 7425 }, { "entropy": 1.721158226331075, "epoch": 0.8157974238554283, "grad_norm": 0.7585559487342834, "learning_rate": 1.4062210666513705e-05, "loss": 1.55, "mean_token_accuracy": 0.6495650957028071, "num_tokens": 1245700324.0, "step": 7426 }, { "entropy": 1.6588152348995209, "epoch": 0.8159072807668012, "grad_norm": 0.6037783622741699, "learning_rate": 1.4060705018037752e-05, "loss": 1.2474, "mean_token_accuracy": 0.6797231733798981, "num_tokens": 1245829906.0, "step": 7427 }, { "entropy": 1.7529727220535278, "epoch": 0.8160171376781742, "grad_norm": 0.6627152562141418, "learning_rate": 1.4059199272691864e-05, "loss": 1.274, "mean_token_accuracy": 0.6731006652116776, "num_tokens": 1245965202.0, "step": 7428 }, { "entropy": 1.7085862557093303, "epoch": 0.8161269945895471, "grad_norm": 0.7288098931312561, "learning_rate": 1.4057693430523696e-05, "loss": 1.405, "mean_token_accuracy": 0.6680357307195663, "num_tokens": 1246151767.0, "step": 7429 }, { "entropy": 1.690618246793747, "epoch": 0.8162368515009201, "grad_norm": 0.8623577952384949, "learning_rate": 1.4056187491580911e-05, "loss": 1.4846, "mean_token_accuracy": 0.6495842784643173, "num_tokens": 1246308268.0, "step": 7430 }, { "entropy": 1.7508464058240254, "epoch": 0.816346708412293, "grad_norm": 0.716978132724762, "learning_rate": 1.4054681455911168e-05, "loss": 1.337, "mean_token_accuracy": 0.6598278482755026, "num_tokens": 1246427848.0, "step": 7431 }, { "entropy": 1.6642480889956157, "epoch": 0.816456565323666, "grad_norm": 0.632388174533844, "learning_rate": 1.4053175323562132e-05, "loss": 1.3534, "mean_token_accuracy": 0.6599838187297186, "num_tokens": 1246615964.0, "step": 7432 }, { "entropy": 1.6865338583787282, "epoch": 0.8165664222350388, "grad_norm": 0.7921580672264099, "learning_rate": 1.4051669094581478e-05, "loss": 1.4469, "mean_token_accuracy": 0.6657046775023142, "num_tokens": 1246761618.0, "step": 7433 }, { "entropy": 1.7106922467549641, "epoch": 0.8166762791464118, "grad_norm": 0.6639615893363953, "learning_rate": 1.4050162769016867e-05, "loss": 1.3718, "mean_token_accuracy": 0.6516933192809423, "num_tokens": 1246914824.0, "step": 7434 }, { "entropy": 1.6909891565640767, "epoch": 0.8167861360577847, "grad_norm": 0.6576387882232666, "learning_rate": 1.4048656346915984e-05, "loss": 1.4314, "mean_token_accuracy": 0.6656709363063177, "num_tokens": 1247129112.0, "step": 7435 }, { "entropy": 1.7483769953250885, "epoch": 0.8168959929691577, "grad_norm": 0.9580649733543396, "learning_rate": 1.4047149828326491e-05, "loss": 1.4487, "mean_token_accuracy": 0.648655946056048, "num_tokens": 1247272520.0, "step": 7436 }, { "entropy": 1.76641050974528, "epoch": 0.8170058498805306, "grad_norm": 0.7349089980125427, "learning_rate": 1.404564321329609e-05, "loss": 1.5273, "mean_token_accuracy": 0.6450001696745554, "num_tokens": 1247422308.0, "step": 7437 }, { "entropy": 1.7038010954856873, "epoch": 0.8171157067919036, "grad_norm": 0.612480878829956, "learning_rate": 1.4044136501872447e-05, "loss": 1.4251, "mean_token_accuracy": 0.6478384385506312, "num_tokens": 1247602434.0, "step": 7438 }, { "entropy": 1.7296733955542247, "epoch": 0.8172255637032765, "grad_norm": 0.6623514294624329, "learning_rate": 1.4042629694103259e-05, "loss": 1.4849, "mean_token_accuracy": 0.6379480262597402, "num_tokens": 1247765918.0, "step": 7439 }, { "entropy": 1.7219422558943431, "epoch": 0.8173354206146494, "grad_norm": 0.756436288356781, "learning_rate": 1.404112279003621e-05, "loss": 1.3645, "mean_token_accuracy": 0.6643207172552744, "num_tokens": 1247913353.0, "step": 7440 }, { "entropy": 1.6891942123572032, "epoch": 0.8174452775260224, "grad_norm": 0.7340204119682312, "learning_rate": 1.4039615789719e-05, "loss": 1.4962, "mean_token_accuracy": 0.6556605597337087, "num_tokens": 1248126159.0, "step": 7441 }, { "entropy": 1.7491665681203206, "epoch": 0.8175551344373952, "grad_norm": 0.7347027659416199, "learning_rate": 1.4038108693199313e-05, "loss": 1.3362, "mean_token_accuracy": 0.6711843659480413, "num_tokens": 1248263141.0, "step": 7442 }, { "entropy": 1.705857555071513, "epoch": 0.8176649913487682, "grad_norm": 0.8173395991325378, "learning_rate": 1.4036601500524858e-05, "loss": 1.5044, "mean_token_accuracy": 0.6398077656825384, "num_tokens": 1248453485.0, "step": 7443 }, { "entropy": 1.7155713438987732, "epoch": 0.8177748482601411, "grad_norm": 0.6897460222244263, "learning_rate": 1.4035094211743335e-05, "loss": 1.4384, "mean_token_accuracy": 0.6557023028532664, "num_tokens": 1248605375.0, "step": 7444 }, { "entropy": 1.7846886813640594, "epoch": 0.8178847051715141, "grad_norm": 0.7669224739074707, "learning_rate": 1.4033586826902446e-05, "loss": 1.4515, "mean_token_accuracy": 0.6553824543952942, "num_tokens": 1248829634.0, "step": 7445 }, { "entropy": 1.7502902050813038, "epoch": 0.817994562082887, "grad_norm": 0.9004881978034973, "learning_rate": 1.40320793460499e-05, "loss": 1.4205, "mean_token_accuracy": 0.6514177868763605, "num_tokens": 1249052664.0, "step": 7446 }, { "entropy": 1.7154946823914845, "epoch": 0.81810441899426, "grad_norm": 0.7069644927978516, "learning_rate": 1.4030571769233411e-05, "loss": 1.3711, "mean_token_accuracy": 0.6522306303183237, "num_tokens": 1249220107.0, "step": 7447 }, { "entropy": 1.7312849462032318, "epoch": 0.8182142759056329, "grad_norm": 0.7484097480773926, "learning_rate": 1.4029064096500689e-05, "loss": 1.2812, "mean_token_accuracy": 0.6704812347888947, "num_tokens": 1249347873.0, "step": 7448 }, { "entropy": 1.7800405323505402, "epoch": 0.8183241328170059, "grad_norm": 0.635948657989502, "learning_rate": 1.4027556327899456e-05, "loss": 1.3237, "mean_token_accuracy": 0.6623754402001699, "num_tokens": 1249477584.0, "step": 7449 }, { "entropy": 1.7426054279009502, "epoch": 0.8184339897283788, "grad_norm": 0.6542919874191284, "learning_rate": 1.402604846347743e-05, "loss": 1.5097, "mean_token_accuracy": 0.6329874048630396, "num_tokens": 1249683768.0, "step": 7450 }, { "entropy": 1.7369756003220875, "epoch": 0.8185438466397518, "grad_norm": 0.7994239330291748, "learning_rate": 1.402454050328233e-05, "loss": 1.4815, "mean_token_accuracy": 0.6522703667481741, "num_tokens": 1249832625.0, "step": 7451 }, { "entropy": 1.6854838530222576, "epoch": 0.8186537035511247, "grad_norm": 0.6902982592582703, "learning_rate": 1.4023032447361888e-05, "loss": 1.3764, "mean_token_accuracy": 0.6690236181020737, "num_tokens": 1249979361.0, "step": 7452 }, { "entropy": 1.717117150624593, "epoch": 0.8187635604624975, "grad_norm": 0.7572975158691406, "learning_rate": 1.4021524295763832e-05, "loss": 1.5046, "mean_token_accuracy": 0.6426510065793991, "num_tokens": 1250141814.0, "step": 7453 }, { "entropy": 1.6960802574952443, "epoch": 0.8188734173738705, "grad_norm": 0.6628535985946655, "learning_rate": 1.4020016048535894e-05, "loss": 1.4133, "mean_token_accuracy": 0.6609309216340383, "num_tokens": 1250311504.0, "step": 7454 }, { "entropy": 1.7238394518693287, "epoch": 0.8189832742852434, "grad_norm": 0.6855142712593079, "learning_rate": 1.401850770572581e-05, "loss": 1.3652, "mean_token_accuracy": 0.6540437589089075, "num_tokens": 1250445247.0, "step": 7455 }, { "entropy": 1.7303275068600972, "epoch": 0.8190931311966164, "grad_norm": 0.6414943933486938, "learning_rate": 1.4016999267381312e-05, "loss": 1.4394, "mean_token_accuracy": 0.6512100994586945, "num_tokens": 1250642583.0, "step": 7456 }, { "entropy": 1.6663293739159901, "epoch": 0.8192029881079893, "grad_norm": 0.6580475568771362, "learning_rate": 1.401549073355015e-05, "loss": 1.3234, "mean_token_accuracy": 0.6663972685734431, "num_tokens": 1250818999.0, "step": 7457 }, { "entropy": 1.7176962395509083, "epoch": 0.8193128450193623, "grad_norm": 0.5929837822914124, "learning_rate": 1.4013982104280063e-05, "loss": 1.5187, "mean_token_accuracy": 0.6359325100978216, "num_tokens": 1251021224.0, "step": 7458 }, { "entropy": 1.7315525313218434, "epoch": 0.8194227019307352, "grad_norm": 0.7245919108390808, "learning_rate": 1.4012473379618804e-05, "loss": 1.4878, "mean_token_accuracy": 0.6452689071496328, "num_tokens": 1251185042.0, "step": 7459 }, { "entropy": 1.7084216177463531, "epoch": 0.8195325588421082, "grad_norm": 0.713031530380249, "learning_rate": 1.4010964559614118e-05, "loss": 1.492, "mean_token_accuracy": 0.6423763384421667, "num_tokens": 1251383890.0, "step": 7460 }, { "entropy": 1.7116627792517345, "epoch": 0.8196424157534811, "grad_norm": 0.7990723848342896, "learning_rate": 1.4009455644313764e-05, "loss": 1.462, "mean_token_accuracy": 0.6500266542037328, "num_tokens": 1251560097.0, "step": 7461 }, { "entropy": 1.659277429183324, "epoch": 0.8197522726648541, "grad_norm": 0.6893109083175659, "learning_rate": 1.400794663376549e-05, "loss": 1.4211, "mean_token_accuracy": 0.6517892877260844, "num_tokens": 1251745264.0, "step": 7462 }, { "entropy": 1.6505893170833588, "epoch": 0.819862129576227, "grad_norm": 0.806117057800293, "learning_rate": 1.4006437528017063e-05, "loss": 1.4743, "mean_token_accuracy": 0.6712833146254221, "num_tokens": 1251939883.0, "step": 7463 }, { "entropy": 1.6510307888189952, "epoch": 0.8199719864876, "grad_norm": 0.6745132803916931, "learning_rate": 1.400492832711624e-05, "loss": 1.3437, "mean_token_accuracy": 0.6679824143648148, "num_tokens": 1252128793.0, "step": 7464 }, { "entropy": 1.706537942091624, "epoch": 0.8200818433989728, "grad_norm": 0.6287968158721924, "learning_rate": 1.4003419031110794e-05, "loss": 1.3509, "mean_token_accuracy": 0.6736375490824381, "num_tokens": 1252315132.0, "step": 7465 }, { "entropy": 1.6979309916496277, "epoch": 0.8201917003103458, "grad_norm": 0.7194148302078247, "learning_rate": 1.4001909640048485e-05, "loss": 1.2779, "mean_token_accuracy": 0.6610195636749268, "num_tokens": 1252483018.0, "step": 7466 }, { "entropy": 1.6625094612439473, "epoch": 0.8203015572217187, "grad_norm": 0.7005951404571533, "learning_rate": 1.4000400153977092e-05, "loss": 1.2451, "mean_token_accuracy": 0.6797448396682739, "num_tokens": 1252621050.0, "step": 7467 }, { "entropy": 1.635416607062022, "epoch": 0.8204114141330916, "grad_norm": 0.5949429273605347, "learning_rate": 1.3998890572944383e-05, "loss": 1.3522, "mean_token_accuracy": 0.6722677995761236, "num_tokens": 1252787245.0, "step": 7468 }, { "entropy": 1.6553764442602794, "epoch": 0.8205212710444646, "grad_norm": 0.7361176013946533, "learning_rate": 1.3997380896998141e-05, "loss": 1.4988, "mean_token_accuracy": 0.6553371498982111, "num_tokens": 1252949574.0, "step": 7469 }, { "entropy": 1.6532206336657207, "epoch": 0.8206311279558375, "grad_norm": 0.7868068814277649, "learning_rate": 1.3995871126186142e-05, "loss": 1.3155, "mean_token_accuracy": 0.6697768618663152, "num_tokens": 1253093237.0, "step": 7470 }, { "entropy": 1.6919066905975342, "epoch": 0.8207409848672105, "grad_norm": 0.7258913516998291, "learning_rate": 1.3994361260556175e-05, "loss": 1.4628, "mean_token_accuracy": 0.6440849602222443, "num_tokens": 1253248303.0, "step": 7471 }, { "entropy": 1.7257492740948994, "epoch": 0.8208508417785834, "grad_norm": 0.6986476182937622, "learning_rate": 1.3992851300156024e-05, "loss": 1.3161, "mean_token_accuracy": 0.6773944149414698, "num_tokens": 1253458286.0, "step": 7472 }, { "entropy": 1.7134167353312175, "epoch": 0.8209606986899564, "grad_norm": 0.6301187872886658, "learning_rate": 1.3991341245033474e-05, "loss": 1.2863, "mean_token_accuracy": 0.6769869873921076, "num_tokens": 1253628175.0, "step": 7473 }, { "entropy": 1.7503137389818828, "epoch": 0.8210705556013292, "grad_norm": 0.706662654876709, "learning_rate": 1.3989831095236321e-05, "loss": 1.4009, "mean_token_accuracy": 0.639786938826243, "num_tokens": 1253775675.0, "step": 7474 }, { "entropy": 1.678730736176173, "epoch": 0.8211804125127022, "grad_norm": 0.6688277721405029, "learning_rate": 1.3988320850812367e-05, "loss": 1.2569, "mean_token_accuracy": 0.6795259167750677, "num_tokens": 1253917761.0, "step": 7475 }, { "entropy": 1.6687143941720326, "epoch": 0.8212902694240751, "grad_norm": 0.7596714496612549, "learning_rate": 1.3986810511809396e-05, "loss": 1.2865, "mean_token_accuracy": 0.679057906071345, "num_tokens": 1254076456.0, "step": 7476 }, { "entropy": 1.7445678611596425, "epoch": 0.8214001263354481, "grad_norm": 0.7357975244522095, "learning_rate": 1.3985300078275226e-05, "loss": 1.3714, "mean_token_accuracy": 0.6583857784668604, "num_tokens": 1254202651.0, "step": 7477 }, { "entropy": 1.7018981575965881, "epoch": 0.821509983246821, "grad_norm": 0.7750345468521118, "learning_rate": 1.398378955025765e-05, "loss": 1.3008, "mean_token_accuracy": 0.6796058019002279, "num_tokens": 1254359741.0, "step": 7478 }, { "entropy": 1.8074349462985992, "epoch": 0.821619840158194, "grad_norm": 0.9239285588264465, "learning_rate": 1.398227892780448e-05, "loss": 1.4871, "mean_token_accuracy": 0.6412616769472758, "num_tokens": 1254504105.0, "step": 7479 }, { "entropy": 1.8056731621424358, "epoch": 0.8217296970695669, "grad_norm": 0.8136707544326782, "learning_rate": 1.3980768210963524e-05, "loss": 1.4746, "mean_token_accuracy": 0.6449335664510727, "num_tokens": 1254705954.0, "step": 7480 }, { "entropy": 1.6646238962809246, "epoch": 0.8218395539809398, "grad_norm": 0.6531665921211243, "learning_rate": 1.3979257399782603e-05, "loss": 1.4227, "mean_token_accuracy": 0.6651426901419958, "num_tokens": 1254843961.0, "step": 7481 }, { "entropy": 1.7323362529277802, "epoch": 0.8219494108923128, "grad_norm": 0.9482001662254333, "learning_rate": 1.3977746494309521e-05, "loss": 1.3124, "mean_token_accuracy": 0.6612970530986786, "num_tokens": 1254957021.0, "step": 7482 }, { "entropy": 1.7369599243005116, "epoch": 0.8220592678036857, "grad_norm": 0.7881601452827454, "learning_rate": 1.3976235494592107e-05, "loss": 1.3393, "mean_token_accuracy": 0.6736653447151184, "num_tokens": 1255084387.0, "step": 7483 }, { "entropy": 1.7011582553386688, "epoch": 0.8221691247150587, "grad_norm": 0.6895524859428406, "learning_rate": 1.3974724400678183e-05, "loss": 1.4111, "mean_token_accuracy": 0.6459124386310577, "num_tokens": 1255274711.0, "step": 7484 }, { "entropy": 1.767829418182373, "epoch": 0.8222789816264315, "grad_norm": 0.6690332293510437, "learning_rate": 1.3973213212615569e-05, "loss": 1.5124, "mean_token_accuracy": 0.6429779479900996, "num_tokens": 1255410443.0, "step": 7485 }, { "entropy": 1.756785641113917, "epoch": 0.8223888385378045, "grad_norm": 0.6052453517913818, "learning_rate": 1.3971701930452097e-05, "loss": 1.4162, "mean_token_accuracy": 0.6490946312745413, "num_tokens": 1255596887.0, "step": 7486 }, { "entropy": 1.6892687578996022, "epoch": 0.8224986954491774, "grad_norm": 0.7892059683799744, "learning_rate": 1.39701905542356e-05, "loss": 1.2558, "mean_token_accuracy": 0.6721896727879842, "num_tokens": 1255744213.0, "step": 7487 }, { "entropy": 1.7740124762058258, "epoch": 0.8226085523605504, "grad_norm": 0.6680493950843811, "learning_rate": 1.3968679084013905e-05, "loss": 1.3391, "mean_token_accuracy": 0.6599594056606293, "num_tokens": 1255886708.0, "step": 7488 }, { "entropy": 1.6903804937998455, "epoch": 0.8227184092719233, "grad_norm": 0.6269848346710205, "learning_rate": 1.396716751983486e-05, "loss": 1.4869, "mean_token_accuracy": 0.6487141301234564, "num_tokens": 1256060601.0, "step": 7489 }, { "entropy": 1.74964839220047, "epoch": 0.8228282661832963, "grad_norm": 0.6463726758956909, "learning_rate": 1.3965655861746302e-05, "loss": 1.3761, "mean_token_accuracy": 0.6572670241196951, "num_tokens": 1256223609.0, "step": 7490 }, { "entropy": 1.6653203467528026, "epoch": 0.8229381230946692, "grad_norm": 0.701028048992157, "learning_rate": 1.3964144109796067e-05, "loss": 1.3633, "mean_token_accuracy": 0.6773179620504379, "num_tokens": 1256361202.0, "step": 7491 }, { "entropy": 1.7040152450402577, "epoch": 0.8230479800060422, "grad_norm": 0.7073589563369751, "learning_rate": 1.396263226403201e-05, "loss": 1.4705, "mean_token_accuracy": 0.664089247584343, "num_tokens": 1256517961.0, "step": 7492 }, { "entropy": 1.6980151931444805, "epoch": 0.8231578369174151, "grad_norm": 0.5834183692932129, "learning_rate": 1.3961120324501978e-05, "loss": 1.4236, "mean_token_accuracy": 0.6516111840804418, "num_tokens": 1256713181.0, "step": 7493 }, { "entropy": 1.6546641091505687, "epoch": 0.823267693828788, "grad_norm": 0.686537504196167, "learning_rate": 1.3959608291253815e-05, "loss": 1.2607, "mean_token_accuracy": 0.6846804320812225, "num_tokens": 1256923364.0, "step": 7494 }, { "entropy": 1.7012316783269246, "epoch": 0.823377550740161, "grad_norm": 0.6142180562019348, "learning_rate": 1.3958096164335391e-05, "loss": 1.4143, "mean_token_accuracy": 0.6444648404916128, "num_tokens": 1257106231.0, "step": 7495 }, { "entropy": 1.707035501797994, "epoch": 0.8234874076515338, "grad_norm": 0.7143438458442688, "learning_rate": 1.395658394379455e-05, "loss": 1.4001, "mean_token_accuracy": 0.6408938119808832, "num_tokens": 1257276313.0, "step": 7496 }, { "entropy": 1.7746020754178364, "epoch": 0.8235972645629068, "grad_norm": 0.7095411419868469, "learning_rate": 1.3955071629679164e-05, "loss": 1.4486, "mean_token_accuracy": 0.6568672706683477, "num_tokens": 1257442953.0, "step": 7497 }, { "entropy": 1.683940976858139, "epoch": 0.8237071214742797, "grad_norm": 0.7230114340782166, "learning_rate": 1.395355922203709e-05, "loss": 1.3716, "mean_token_accuracy": 0.6584843943516413, "num_tokens": 1257589278.0, "step": 7498 }, { "entropy": 1.7272027730941772, "epoch": 0.8238169783856527, "grad_norm": 0.8744773268699646, "learning_rate": 1.39520467209162e-05, "loss": 1.4112, "mean_token_accuracy": 0.6432114988565445, "num_tokens": 1257732387.0, "step": 7499 }, { "entropy": 1.7044003407160442, "epoch": 0.8239268352970256, "grad_norm": 0.659590482711792, "learning_rate": 1.395053412636436e-05, "loss": 1.4211, "mean_token_accuracy": 0.652682383855184, "num_tokens": 1257916824.0, "step": 7500 }, { "entropy": 1.7109976410865784, "epoch": 0.8240366922083986, "grad_norm": 0.6942122578620911, "learning_rate": 1.3949021438429445e-05, "loss": 1.4207, "mean_token_accuracy": 0.6466724226872126, "num_tokens": 1258102470.0, "step": 7501 }, { "entropy": 1.6839348375797272, "epoch": 0.8241465491197715, "grad_norm": 0.6320016980171204, "learning_rate": 1.3947508657159328e-05, "loss": 1.3188, "mean_token_accuracy": 0.671158974369367, "num_tokens": 1258260131.0, "step": 7502 }, { "entropy": 1.7140422860781352, "epoch": 0.8242564060311445, "grad_norm": 0.9178858995437622, "learning_rate": 1.3945995782601893e-05, "loss": 1.3831, "mean_token_accuracy": 0.6665874371925989, "num_tokens": 1258411004.0, "step": 7503 }, { "entropy": 1.682992508014043, "epoch": 0.8243662629425174, "grad_norm": 0.6360436081886292, "learning_rate": 1.3944482814805018e-05, "loss": 1.2866, "mean_token_accuracy": 0.675972451766332, "num_tokens": 1258560249.0, "step": 7504 }, { "entropy": 1.7603969275951385, "epoch": 0.8244761198538904, "grad_norm": 0.6871092915534973, "learning_rate": 1.3942969753816589e-05, "loss": 1.295, "mean_token_accuracy": 0.6693955262502035, "num_tokens": 1258691545.0, "step": 7505 }, { "entropy": 1.7000905573368073, "epoch": 0.8245859767652632, "grad_norm": 0.6371673941612244, "learning_rate": 1.3941456599684493e-05, "loss": 1.4154, "mean_token_accuracy": 0.644666830698649, "num_tokens": 1258870586.0, "step": 7506 }, { "entropy": 1.6680241823196411, "epoch": 0.8246958336766362, "grad_norm": 0.6976217031478882, "learning_rate": 1.3939943352456623e-05, "loss": 1.3481, "mean_token_accuracy": 0.6589303016662598, "num_tokens": 1259046324.0, "step": 7507 }, { "entropy": 1.6960498293240864, "epoch": 0.8248056905880091, "grad_norm": 0.7053700685501099, "learning_rate": 1.3938430012180868e-05, "loss": 1.4329, "mean_token_accuracy": 0.6613789896170298, "num_tokens": 1259217515.0, "step": 7508 }, { "entropy": 1.712292194366455, "epoch": 0.824915547499382, "grad_norm": 0.6611363291740417, "learning_rate": 1.393691657890513e-05, "loss": 1.4571, "mean_token_accuracy": 0.6506155629952749, "num_tokens": 1259405409.0, "step": 7509 }, { "entropy": 1.698621819416682, "epoch": 0.825025404410755, "grad_norm": 0.7005683183670044, "learning_rate": 1.39354030526773e-05, "loss": 1.3047, "mean_token_accuracy": 0.6643372923135757, "num_tokens": 1259524807.0, "step": 7510 }, { "entropy": 1.612492283185323, "epoch": 0.8251352613221279, "grad_norm": 0.6348177194595337, "learning_rate": 1.3933889433545292e-05, "loss": 1.3432, "mean_token_accuracy": 0.6589316080013911, "num_tokens": 1259689565.0, "step": 7511 }, { "entropy": 1.6569429437319438, "epoch": 0.8252451182335009, "grad_norm": 0.6412181258201599, "learning_rate": 1.3932375721557004e-05, "loss": 1.3133, "mean_token_accuracy": 0.6661182244618734, "num_tokens": 1259862375.0, "step": 7512 }, { "entropy": 1.7173986732959747, "epoch": 0.8253549751448738, "grad_norm": 0.673636257648468, "learning_rate": 1.3930861916760343e-05, "loss": 1.5394, "mean_token_accuracy": 0.6368442674477895, "num_tokens": 1260043863.0, "step": 7513 }, { "entropy": 1.7598693370819092, "epoch": 0.8254648320562468, "grad_norm": 0.7226663827896118, "learning_rate": 1.3929348019203223e-05, "loss": 1.4727, "mean_token_accuracy": 0.6370566139618555, "num_tokens": 1260163201.0, "step": 7514 }, { "entropy": 1.7205652197202046, "epoch": 0.8255746889676197, "grad_norm": 1.0138691663742065, "learning_rate": 1.3927834028933565e-05, "loss": 1.2895, "mean_token_accuracy": 0.6636170645554861, "num_tokens": 1260282440.0, "step": 7515 }, { "entropy": 1.700068513552348, "epoch": 0.8256845458789926, "grad_norm": 0.7420987486839294, "learning_rate": 1.3926319945999272e-05, "loss": 1.4072, "mean_token_accuracy": 0.6525601297616959, "num_tokens": 1260443719.0, "step": 7516 }, { "entropy": 1.69573841492335, "epoch": 0.8257944027903655, "grad_norm": 0.5588994026184082, "learning_rate": 1.3924805770448275e-05, "loss": 1.3833, "mean_token_accuracy": 0.6473828703165054, "num_tokens": 1260653846.0, "step": 7517 }, { "entropy": 1.7026591698328655, "epoch": 0.8259042597017385, "grad_norm": 0.6840011477470398, "learning_rate": 1.3923291502328493e-05, "loss": 1.3299, "mean_token_accuracy": 0.6716416925191879, "num_tokens": 1260834311.0, "step": 7518 }, { "entropy": 1.7089744905630748, "epoch": 0.8260141166131114, "grad_norm": 0.8017948865890503, "learning_rate": 1.3921777141687851e-05, "loss": 1.3692, "mean_token_accuracy": 0.6594879478216171, "num_tokens": 1260978152.0, "step": 7519 }, { "entropy": 1.717678815126419, "epoch": 0.8261239735244844, "grad_norm": 0.6748639941215515, "learning_rate": 1.392026268857428e-05, "loss": 1.5332, "mean_token_accuracy": 0.6416458636522293, "num_tokens": 1261161530.0, "step": 7520 }, { "entropy": 1.7014433046181996, "epoch": 0.8262338304358573, "grad_norm": 0.5452734231948853, "learning_rate": 1.3918748143035712e-05, "loss": 1.4928, "mean_token_accuracy": 0.6255062818527222, "num_tokens": 1261392764.0, "step": 7521 }, { "entropy": 1.6911316414674122, "epoch": 0.8263436873472302, "grad_norm": 0.694558322429657, "learning_rate": 1.3917233505120073e-05, "loss": 1.4584, "mean_token_accuracy": 0.6488548169533411, "num_tokens": 1261521560.0, "step": 7522 }, { "entropy": 1.6891414125760396, "epoch": 0.8264535442586032, "grad_norm": 0.6259676814079285, "learning_rate": 1.3915718774875317e-05, "loss": 1.4898, "mean_token_accuracy": 0.632416253288587, "num_tokens": 1261714723.0, "step": 7523 }, { "entropy": 1.7080417772134144, "epoch": 0.8265634011699761, "grad_norm": 0.7242565751075745, "learning_rate": 1.3914203952349374e-05, "loss": 1.3847, "mean_token_accuracy": 0.6655266831318537, "num_tokens": 1261850914.0, "step": 7524 }, { "entropy": 1.6633085012435913, "epoch": 0.8266732580813491, "grad_norm": 0.7950205206871033, "learning_rate": 1.3912689037590189e-05, "loss": 1.5695, "mean_token_accuracy": 0.6606140186389288, "num_tokens": 1261993356.0, "step": 7525 }, { "entropy": 1.7180237174034119, "epoch": 0.826783114992722, "grad_norm": 0.6543428301811218, "learning_rate": 1.3911174030645705e-05, "loss": 1.4543, "mean_token_accuracy": 0.660189817349116, "num_tokens": 1262170004.0, "step": 7526 }, { "entropy": 1.6681218047936757, "epoch": 0.8268929719040949, "grad_norm": 0.6747716069221497, "learning_rate": 1.390965893156388e-05, "loss": 1.3851, "mean_token_accuracy": 0.650188018878301, "num_tokens": 1262325407.0, "step": 7527 }, { "entropy": 1.69178702433904, "epoch": 0.8270028288154678, "grad_norm": 0.7375442385673523, "learning_rate": 1.3908143740392657e-05, "loss": 1.4235, "mean_token_accuracy": 0.6644584635893503, "num_tokens": 1262479364.0, "step": 7528 }, { "entropy": 1.7722974220911663, "epoch": 0.8271126857268408, "grad_norm": 0.720845103263855, "learning_rate": 1.3906628457179994e-05, "loss": 1.3381, "mean_token_accuracy": 0.66179092725118, "num_tokens": 1262577801.0, "step": 7529 }, { "entropy": 1.7099088231722515, "epoch": 0.8272225426382137, "grad_norm": 0.8065167665481567, "learning_rate": 1.3905113081973854e-05, "loss": 1.3695, "mean_token_accuracy": 0.656641498208046, "num_tokens": 1262761100.0, "step": 7530 }, { "entropy": 1.665325830380122, "epoch": 0.8273323995495867, "grad_norm": 0.7184627056121826, "learning_rate": 1.390359761482219e-05, "loss": 1.3796, "mean_token_accuracy": 0.6679128209749857, "num_tokens": 1262899915.0, "step": 7531 }, { "entropy": 1.7003116806348164, "epoch": 0.8274422564609596, "grad_norm": 0.7257794141769409, "learning_rate": 1.390208205577297e-05, "loss": 1.3098, "mean_token_accuracy": 0.6723635445038477, "num_tokens": 1263025397.0, "step": 7532 }, { "entropy": 1.6903511186440785, "epoch": 0.8275521133723326, "grad_norm": 0.7771725058555603, "learning_rate": 1.3900566404874165e-05, "loss": 1.3125, "mean_token_accuracy": 0.6623782813549042, "num_tokens": 1263172104.0, "step": 7533 }, { "entropy": 1.738791823387146, "epoch": 0.8276619702837055, "grad_norm": 0.6048402786254883, "learning_rate": 1.3899050662173736e-05, "loss": 1.3476, "mean_token_accuracy": 0.6545044680436453, "num_tokens": 1263335359.0, "step": 7534 }, { "entropy": 1.7750772138436635, "epoch": 0.8277718271950784, "grad_norm": 0.7775545120239258, "learning_rate": 1.3897534827719663e-05, "loss": 1.6391, "mean_token_accuracy": 0.6482603698968887, "num_tokens": 1263472462.0, "step": 7535 }, { "entropy": 1.7368451058864594, "epoch": 0.8278816841064514, "grad_norm": 0.7810788750648499, "learning_rate": 1.3896018901559915e-05, "loss": 1.5206, "mean_token_accuracy": 0.6479092240333557, "num_tokens": 1263615854.0, "step": 7536 }, { "entropy": 1.7175723016262054, "epoch": 0.8279915410178242, "grad_norm": 0.675439178943634, "learning_rate": 1.389450288374248e-05, "loss": 1.4315, "mean_token_accuracy": 0.6542692532142004, "num_tokens": 1263767613.0, "step": 7537 }, { "entropy": 1.6717278858025868, "epoch": 0.8281013979291972, "grad_norm": 0.8180225491523743, "learning_rate": 1.3892986774315325e-05, "loss": 1.2958, "mean_token_accuracy": 0.6851775397857031, "num_tokens": 1263942570.0, "step": 7538 }, { "entropy": 1.7367408871650696, "epoch": 0.8282112548405701, "grad_norm": 0.7362589240074158, "learning_rate": 1.3891470573326446e-05, "loss": 1.5338, "mean_token_accuracy": 0.6389251748720804, "num_tokens": 1264115819.0, "step": 7539 }, { "entropy": 1.7383471826712291, "epoch": 0.8283211117519431, "grad_norm": 0.6163108348846436, "learning_rate": 1.3889954280823828e-05, "loss": 1.4486, "mean_token_accuracy": 0.6470398008823395, "num_tokens": 1264290334.0, "step": 7540 }, { "entropy": 1.697384923696518, "epoch": 0.828430968663316, "grad_norm": 0.6724095344543457, "learning_rate": 1.3888437896855456e-05, "loss": 1.2777, "mean_token_accuracy": 0.6817297836144766, "num_tokens": 1264414382.0, "step": 7541 }, { "entropy": 1.6797985633214314, "epoch": 0.828540825574689, "grad_norm": 0.6427711844444275, "learning_rate": 1.3886921421469329e-05, "loss": 1.2919, "mean_token_accuracy": 0.6724950323502222, "num_tokens": 1264550399.0, "step": 7542 }, { "entropy": 1.7129569550355275, "epoch": 0.8286506824860619, "grad_norm": 0.6985865831375122, "learning_rate": 1.3885404854713437e-05, "loss": 1.2449, "mean_token_accuracy": 0.6730537166198095, "num_tokens": 1264690996.0, "step": 7543 }, { "entropy": 1.7042374809583027, "epoch": 0.8287605393974349, "grad_norm": 0.6594078540802002, "learning_rate": 1.3883888196635785e-05, "loss": 1.4256, "mean_token_accuracy": 0.643621101975441, "num_tokens": 1264927827.0, "step": 7544 }, { "entropy": 1.7009220818678539, "epoch": 0.8288703963088078, "grad_norm": 0.8057886362075806, "learning_rate": 1.3882371447284369e-05, "loss": 1.4477, "mean_token_accuracy": 0.6611939668655396, "num_tokens": 1265053926.0, "step": 7545 }, { "entropy": 1.6498080094655354, "epoch": 0.8289802532201808, "grad_norm": 0.6930307149887085, "learning_rate": 1.3880854606707195e-05, "loss": 1.4983, "mean_token_accuracy": 0.6612697939078013, "num_tokens": 1265206127.0, "step": 7546 }, { "entropy": 1.694092224041621, "epoch": 0.8290901101315536, "grad_norm": 0.6370811462402344, "learning_rate": 1.3879337674952274e-05, "loss": 1.3453, "mean_token_accuracy": 0.6520874202251434, "num_tokens": 1265360777.0, "step": 7547 }, { "entropy": 1.7223928372065227, "epoch": 0.8291999670429265, "grad_norm": 0.6785632967948914, "learning_rate": 1.3877820652067609e-05, "loss": 1.4052, "mean_token_accuracy": 0.6524649461110433, "num_tokens": 1265517050.0, "step": 7548 }, { "entropy": 1.7133225500583649, "epoch": 0.8293098239542995, "grad_norm": 0.6474944353103638, "learning_rate": 1.3876303538101218e-05, "loss": 1.3002, "mean_token_accuracy": 0.6698809812466303, "num_tokens": 1265665927.0, "step": 7549 }, { "entropy": 1.7734433313210805, "epoch": 0.8294196808656724, "grad_norm": 0.7001741528511047, "learning_rate": 1.3874786333101117e-05, "loss": 1.3274, "mean_token_accuracy": 0.6608901371558508, "num_tokens": 1265774674.0, "step": 7550 }, { "entropy": 1.633638968070348, "epoch": 0.8295295377770454, "grad_norm": 0.6309921741485596, "learning_rate": 1.3873269037115325e-05, "loss": 1.4088, "mean_token_accuracy": 0.6554737389087677, "num_tokens": 1265997082.0, "step": 7551 }, { "entropy": 1.677928477525711, "epoch": 0.8296393946884183, "grad_norm": 2.2038040161132812, "learning_rate": 1.3871751650191861e-05, "loss": 1.1266, "mean_token_accuracy": 0.6781335373719534, "num_tokens": 1266160041.0, "step": 7552 }, { "entropy": 1.7169209221998851, "epoch": 0.8297492515997913, "grad_norm": 0.6631376147270203, "learning_rate": 1.387023417237875e-05, "loss": 1.4185, "mean_token_accuracy": 0.6488533268372217, "num_tokens": 1266332720.0, "step": 7553 }, { "entropy": 1.698218435049057, "epoch": 0.8298591085111642, "grad_norm": 0.6376110911369324, "learning_rate": 1.3868716603724024e-05, "loss": 1.4032, "mean_token_accuracy": 0.6554437925418218, "num_tokens": 1266512357.0, "step": 7554 }, { "entropy": 1.7614865104357402, "epoch": 0.8299689654225372, "grad_norm": 0.6769449710845947, "learning_rate": 1.386719894427571e-05, "loss": 1.457, "mean_token_accuracy": 0.640682727098465, "num_tokens": 1266656391.0, "step": 7555 }, { "entropy": 1.6298018097877502, "epoch": 0.8300788223339101, "grad_norm": 0.6778908371925354, "learning_rate": 1.386568119408184e-05, "loss": 1.3012, "mean_token_accuracy": 0.6756581912438074, "num_tokens": 1266798263.0, "step": 7556 }, { "entropy": 1.725428541501363, "epoch": 0.8301886792452831, "grad_norm": 0.6709782481193542, "learning_rate": 1.3864163353190453e-05, "loss": 1.3906, "mean_token_accuracy": 0.66632479429245, "num_tokens": 1266924812.0, "step": 7557 }, { "entropy": 1.6883962154388428, "epoch": 0.8302985361566559, "grad_norm": 0.68055260181427, "learning_rate": 1.3862645421649582e-05, "loss": 1.3803, "mean_token_accuracy": 0.6522450596094131, "num_tokens": 1267073930.0, "step": 7558 }, { "entropy": 1.7249091962973278, "epoch": 0.8304083930680289, "grad_norm": 0.669360339641571, "learning_rate": 1.386112739950728e-05, "loss": 1.3908, "mean_token_accuracy": 0.6687298119068146, "num_tokens": 1267263523.0, "step": 7559 }, { "entropy": 1.7766032218933105, "epoch": 0.8305182499794018, "grad_norm": 0.8653247952461243, "learning_rate": 1.3859609286811576e-05, "loss": 1.4976, "mean_token_accuracy": 0.6408978551626205, "num_tokens": 1267408031.0, "step": 7560 }, { "entropy": 1.7056670884291332, "epoch": 0.8306281068907748, "grad_norm": 0.7807538509368896, "learning_rate": 1.3858091083610537e-05, "loss": 1.4544, "mean_token_accuracy": 0.658196692665418, "num_tokens": 1267607695.0, "step": 7561 }, { "entropy": 1.7037642896175385, "epoch": 0.8307379638021477, "grad_norm": 0.6015084981918335, "learning_rate": 1.3856572789952197e-05, "loss": 1.4094, "mean_token_accuracy": 0.6588554928700129, "num_tokens": 1267778360.0, "step": 7562 }, { "entropy": 1.677244524161021, "epoch": 0.8308478207135206, "grad_norm": 0.6811591982841492, "learning_rate": 1.3855054405884619e-05, "loss": 1.3449, "mean_token_accuracy": 0.6656729827324549, "num_tokens": 1267937117.0, "step": 7563 }, { "entropy": 1.6949976682662964, "epoch": 0.8309576776248936, "grad_norm": 0.8715330362319946, "learning_rate": 1.385353593145585e-05, "loss": 1.4748, "mean_token_accuracy": 0.6535067111253738, "num_tokens": 1268150134.0, "step": 7564 }, { "entropy": 1.742262860139211, "epoch": 0.8310675345362665, "grad_norm": 0.7902666330337524, "learning_rate": 1.3852017366713962e-05, "loss": 1.6062, "mean_token_accuracy": 0.642037237683932, "num_tokens": 1268318445.0, "step": 7565 }, { "entropy": 1.707626740137736, "epoch": 0.8311773914476395, "grad_norm": 0.7191417813301086, "learning_rate": 1.3850498711707001e-05, "loss": 1.3055, "mean_token_accuracy": 0.6673828760782877, "num_tokens": 1268422008.0, "step": 7566 }, { "entropy": 1.689712017774582, "epoch": 0.8312872483590124, "grad_norm": 0.6316739916801453, "learning_rate": 1.3848979966483048e-05, "loss": 1.5149, "mean_token_accuracy": 0.6431446621815363, "num_tokens": 1268691031.0, "step": 7567 }, { "entropy": 1.6952175498008728, "epoch": 0.8313971052703854, "grad_norm": 0.6542701125144958, "learning_rate": 1.3847461131090159e-05, "loss": 1.3766, "mean_token_accuracy": 0.6535580505927404, "num_tokens": 1268869666.0, "step": 7568 }, { "entropy": 1.6646797557671864, "epoch": 0.8315069621817582, "grad_norm": 0.7026156187057495, "learning_rate": 1.3845942205576408e-05, "loss": 1.3871, "mean_token_accuracy": 0.6553316861391068, "num_tokens": 1269053305.0, "step": 7569 }, { "entropy": 1.6775665481885274, "epoch": 0.8316168190931312, "grad_norm": 0.7035835981369019, "learning_rate": 1.3844423189989868e-05, "loss": 1.2915, "mean_token_accuracy": 0.6790016442537308, "num_tokens": 1269181271.0, "step": 7570 }, { "entropy": 1.6944467822710674, "epoch": 0.8317266760045041, "grad_norm": 0.6992266178131104, "learning_rate": 1.3842904084378619e-05, "loss": 1.2448, "mean_token_accuracy": 0.6773143957058588, "num_tokens": 1269291180.0, "step": 7571 }, { "entropy": 1.7388213972250621, "epoch": 0.8318365329158771, "grad_norm": 0.7684722542762756, "learning_rate": 1.3841384888790734e-05, "loss": 1.5707, "mean_token_accuracy": 0.6360517491896948, "num_tokens": 1269452126.0, "step": 7572 }, { "entropy": 1.749897877375285, "epoch": 0.83194638982725, "grad_norm": 0.6144039630889893, "learning_rate": 1.38398656032743e-05, "loss": 1.4208, "mean_token_accuracy": 0.6549445589383444, "num_tokens": 1269616125.0, "step": 7573 }, { "entropy": 1.7409840126832326, "epoch": 0.832056246738623, "grad_norm": 0.687271773815155, "learning_rate": 1.3838346227877398e-05, "loss": 1.3635, "mean_token_accuracy": 0.6573161135117213, "num_tokens": 1269735340.0, "step": 7574 }, { "entropy": 1.650204559167226, "epoch": 0.8321661036499959, "grad_norm": 0.6359143257141113, "learning_rate": 1.3836826762648117e-05, "loss": 1.3908, "mean_token_accuracy": 0.6477119276920954, "num_tokens": 1269901539.0, "step": 7575 }, { "entropy": 1.7002926965554555, "epoch": 0.8322759605613688, "grad_norm": 0.6047689914703369, "learning_rate": 1.3835307207634545e-05, "loss": 1.3467, "mean_token_accuracy": 0.6575921724239985, "num_tokens": 1270036658.0, "step": 7576 }, { "entropy": 1.699533224105835, "epoch": 0.8323858174727418, "grad_norm": 0.690969705581665, "learning_rate": 1.3833787562884784e-05, "loss": 1.3226, "mean_token_accuracy": 0.6634237319231033, "num_tokens": 1270178148.0, "step": 7577 }, { "entropy": 1.664102743069331, "epoch": 0.8324956743841146, "grad_norm": 0.663180947303772, "learning_rate": 1.3832267828446914e-05, "loss": 1.2973, "mean_token_accuracy": 0.6731946070988973, "num_tokens": 1270323103.0, "step": 7578 }, { "entropy": 1.6971265574296315, "epoch": 0.8326055312954876, "grad_norm": 0.9087331295013428, "learning_rate": 1.383074800436905e-05, "loss": 1.4288, "mean_token_accuracy": 0.6556025594472885, "num_tokens": 1270488548.0, "step": 7579 }, { "entropy": 1.6653367976347606, "epoch": 0.8327153882068605, "grad_norm": 0.6120437979698181, "learning_rate": 1.3829228090699286e-05, "loss": 1.4193, "mean_token_accuracy": 0.6514114042123159, "num_tokens": 1270694474.0, "step": 7580 }, { "entropy": 1.7614048818747203, "epoch": 0.8328252451182335, "grad_norm": 0.6478108763694763, "learning_rate": 1.3827708087485727e-05, "loss": 1.5128, "mean_token_accuracy": 0.6446801622708639, "num_tokens": 1270918826.0, "step": 7581 }, { "entropy": 1.7623259325822194, "epoch": 0.8329351020296064, "grad_norm": 0.6244803667068481, "learning_rate": 1.3826187994776484e-05, "loss": 1.3796, "mean_token_accuracy": 0.6532119462887446, "num_tokens": 1271092214.0, "step": 7582 }, { "entropy": 1.6713003118832905, "epoch": 0.8330449589409794, "grad_norm": 0.8507984280586243, "learning_rate": 1.382466781261966e-05, "loss": 1.3142, "mean_token_accuracy": 0.6736680517594019, "num_tokens": 1271207600.0, "step": 7583 }, { "entropy": 1.7317315141359966, "epoch": 0.8331548158523523, "grad_norm": 0.6434891819953918, "learning_rate": 1.3823147541063376e-05, "loss": 1.5426, "mean_token_accuracy": 0.6401833097139994, "num_tokens": 1271386603.0, "step": 7584 }, { "entropy": 1.7154659231503804, "epoch": 0.8332646727637253, "grad_norm": 0.6992514133453369, "learning_rate": 1.3821627180155743e-05, "loss": 1.4521, "mean_token_accuracy": 0.6397911409536997, "num_tokens": 1271565473.0, "step": 7585 }, { "entropy": 1.6124655902385712, "epoch": 0.8333745296750982, "grad_norm": 0.5322008728981018, "learning_rate": 1.3820106729944882e-05, "loss": 1.411, "mean_token_accuracy": 0.6569860825935999, "num_tokens": 1271800335.0, "step": 7586 }, { "entropy": 1.693232387304306, "epoch": 0.8334843865864712, "grad_norm": 0.6390825510025024, "learning_rate": 1.3818586190478916e-05, "loss": 1.5123, "mean_token_accuracy": 0.6511318882306417, "num_tokens": 1271963559.0, "step": 7587 }, { "entropy": 1.7264248430728912, "epoch": 0.8335942434978441, "grad_norm": 0.8136751651763916, "learning_rate": 1.3817065561805962e-05, "loss": 1.3956, "mean_token_accuracy": 0.6575086663166682, "num_tokens": 1272120985.0, "step": 7588 }, { "entropy": 1.7141720652580261, "epoch": 0.8337041004092169, "grad_norm": 0.8287689089775085, "learning_rate": 1.3815544843974156e-05, "loss": 1.4675, "mean_token_accuracy": 0.661083827416102, "num_tokens": 1272246029.0, "step": 7589 }, { "entropy": 1.712748219569524, "epoch": 0.8338139573205899, "grad_norm": 0.6662017703056335, "learning_rate": 1.3814024037031624e-05, "loss": 1.4793, "mean_token_accuracy": 0.6412945588429769, "num_tokens": 1272428836.0, "step": 7590 }, { "entropy": 1.694316158692042, "epoch": 0.8339238142319628, "grad_norm": 0.7065073251724243, "learning_rate": 1.3812503141026497e-05, "loss": 1.4966, "mean_token_accuracy": 0.6386247078577677, "num_tokens": 1272662052.0, "step": 7591 }, { "entropy": 1.6363686819871266, "epoch": 0.8340336711433358, "grad_norm": 0.7142933011054993, "learning_rate": 1.3810982156006914e-05, "loss": 1.2562, "mean_token_accuracy": 0.6731893370548884, "num_tokens": 1272810815.0, "step": 7592 }, { "entropy": 1.705398678779602, "epoch": 0.8341435280547087, "grad_norm": 0.7638614773750305, "learning_rate": 1.3809461082021015e-05, "loss": 1.3403, "mean_token_accuracy": 0.6670798907677332, "num_tokens": 1272950592.0, "step": 7593 }, { "entropy": 1.7166258593400319, "epoch": 0.8342533849660817, "grad_norm": 0.7492454648017883, "learning_rate": 1.3807939919116935e-05, "loss": 1.4619, "mean_token_accuracy": 0.6594860553741455, "num_tokens": 1273083671.0, "step": 7594 }, { "entropy": 1.7036760747432709, "epoch": 0.8343632418774546, "grad_norm": 0.6306270360946655, "learning_rate": 1.3806418667342825e-05, "loss": 1.4087, "mean_token_accuracy": 0.6539557129144669, "num_tokens": 1273249788.0, "step": 7595 }, { "entropy": 1.7021053830782573, "epoch": 0.8344730987888276, "grad_norm": 0.7657412886619568, "learning_rate": 1.3804897326746826e-05, "loss": 1.2697, "mean_token_accuracy": 0.664386381705602, "num_tokens": 1273375149.0, "step": 7596 }, { "entropy": 1.7494820058345795, "epoch": 0.8345829557002005, "grad_norm": 0.6962859630584717, "learning_rate": 1.3803375897377091e-05, "loss": 1.3636, "mean_token_accuracy": 0.6591188112894694, "num_tokens": 1273491406.0, "step": 7597 }, { "entropy": 1.727548082669576, "epoch": 0.8346928126115735, "grad_norm": 0.5763877034187317, "learning_rate": 1.3801854379281772e-05, "loss": 1.4143, "mean_token_accuracy": 0.6519613862037659, "num_tokens": 1273691352.0, "step": 7598 }, { "entropy": 1.7307646075884502, "epoch": 0.8348026695229464, "grad_norm": 0.6695159077644348, "learning_rate": 1.3800332772509028e-05, "loss": 1.3111, "mean_token_accuracy": 0.6668645044167837, "num_tokens": 1273843898.0, "step": 7599 }, { "entropy": 1.7181670566399891, "epoch": 0.8349125264343193, "grad_norm": 0.675504207611084, "learning_rate": 1.3798811077107008e-05, "loss": 1.3693, "mean_token_accuracy": 0.6546612332264582, "num_tokens": 1273988852.0, "step": 7600 }, { "entropy": 1.7171874046325684, "epoch": 0.8350223833456922, "grad_norm": 0.8511648774147034, "learning_rate": 1.3797289293123884e-05, "loss": 1.4898, "mean_token_accuracy": 0.6442197859287262, "num_tokens": 1274195068.0, "step": 7601 }, { "entropy": 1.7084606885910034, "epoch": 0.8351322402570651, "grad_norm": 0.6916182637214661, "learning_rate": 1.379576742060781e-05, "loss": 1.5412, "mean_token_accuracy": 0.6330763747294744, "num_tokens": 1274428299.0, "step": 7602 }, { "entropy": 1.7000287473201752, "epoch": 0.8352420971684381, "grad_norm": 0.7107706069946289, "learning_rate": 1.379424545960696e-05, "loss": 1.2805, "mean_token_accuracy": 0.6748431076606115, "num_tokens": 1274598715.0, "step": 7603 }, { "entropy": 1.6960639754931133, "epoch": 0.835351954079811, "grad_norm": 0.6091739535331726, "learning_rate": 1.3792723410169498e-05, "loss": 1.4367, "mean_token_accuracy": 0.6490481595198313, "num_tokens": 1274769825.0, "step": 7604 }, { "entropy": 1.6969983875751495, "epoch": 0.835461810991184, "grad_norm": 0.6014200448989868, "learning_rate": 1.3791201272343602e-05, "loss": 1.4189, "mean_token_accuracy": 0.657557855049769, "num_tokens": 1274963060.0, "step": 7605 }, { "entropy": 1.7252587974071503, "epoch": 0.8355716679025569, "grad_norm": 0.6048182249069214, "learning_rate": 1.3789679046177438e-05, "loss": 1.3069, "mean_token_accuracy": 0.6655952880779902, "num_tokens": 1275104494.0, "step": 7606 }, { "entropy": 1.7161117593447368, "epoch": 0.8356815248139299, "grad_norm": 0.7150284647941589, "learning_rate": 1.3788156731719196e-05, "loss": 1.3054, "mean_token_accuracy": 0.6586286971966425, "num_tokens": 1275219815.0, "step": 7607 }, { "entropy": 1.7322356899579365, "epoch": 0.8357913817253028, "grad_norm": 0.719291627407074, "learning_rate": 1.3786634329017044e-05, "loss": 1.4993, "mean_token_accuracy": 0.6435506194829941, "num_tokens": 1275361651.0, "step": 7608 }, { "entropy": 1.7530653476715088, "epoch": 0.8359012386366758, "grad_norm": 0.6821619868278503, "learning_rate": 1.3785111838119174e-05, "loss": 1.4983, "mean_token_accuracy": 0.6553416550159454, "num_tokens": 1275492520.0, "step": 7609 }, { "entropy": 1.691613495349884, "epoch": 0.8360110955480486, "grad_norm": 0.6883498430252075, "learning_rate": 1.3783589259073766e-05, "loss": 1.3471, "mean_token_accuracy": 0.6675893068313599, "num_tokens": 1275630876.0, "step": 7610 }, { "entropy": 1.7405516107877095, "epoch": 0.8361209524594216, "grad_norm": 0.7199444770812988, "learning_rate": 1.3782066591929017e-05, "loss": 1.2501, "mean_token_accuracy": 0.6688971618811289, "num_tokens": 1275742939.0, "step": 7611 }, { "entropy": 1.6530592640240986, "epoch": 0.8362308093707945, "grad_norm": 0.7949721813201904, "learning_rate": 1.3780543836733112e-05, "loss": 1.4701, "mean_token_accuracy": 0.6404084165891012, "num_tokens": 1275941141.0, "step": 7612 }, { "entropy": 1.7587747077147167, "epoch": 0.8363406662821675, "grad_norm": 0.7122784852981567, "learning_rate": 1.3779020993534249e-05, "loss": 1.3668, "mean_token_accuracy": 0.6711858014265696, "num_tokens": 1276087415.0, "step": 7613 }, { "entropy": 1.6879003842671711, "epoch": 0.8364505231935404, "grad_norm": 0.7349809408187866, "learning_rate": 1.3777498062380622e-05, "loss": 1.4567, "mean_token_accuracy": 0.661365215977033, "num_tokens": 1276239252.0, "step": 7614 }, { "entropy": 1.6400221586227417, "epoch": 0.8365603801049134, "grad_norm": 0.7023922204971313, "learning_rate": 1.3775975043320433e-05, "loss": 1.2416, "mean_token_accuracy": 0.6837769548098246, "num_tokens": 1276403009.0, "step": 7615 }, { "entropy": 1.7130279938379924, "epoch": 0.8366702370162863, "grad_norm": 0.7748481631278992, "learning_rate": 1.3774451936401882e-05, "loss": 1.4081, "mean_token_accuracy": 0.670517255862554, "num_tokens": 1276574324.0, "step": 7616 }, { "entropy": 1.762073000272115, "epoch": 0.8367800939276592, "grad_norm": 0.7048318386077881, "learning_rate": 1.3772928741673184e-05, "loss": 1.5452, "mean_token_accuracy": 0.6333042333523432, "num_tokens": 1276765168.0, "step": 7617 }, { "entropy": 1.6817299922307332, "epoch": 0.8368899508390322, "grad_norm": 0.6088959574699402, "learning_rate": 1.3771405459182536e-05, "loss": 1.3579, "mean_token_accuracy": 0.6705379237731298, "num_tokens": 1276951991.0, "step": 7618 }, { "entropy": 1.6863965789477031, "epoch": 0.8369998077504051, "grad_norm": 0.7418268918991089, "learning_rate": 1.3769882088978154e-05, "loss": 1.2244, "mean_token_accuracy": 0.6815223594506582, "num_tokens": 1277059043.0, "step": 7619 }, { "entropy": 1.6903445621331532, "epoch": 0.837109664661778, "grad_norm": 0.6564303636550903, "learning_rate": 1.3768358631108254e-05, "loss": 1.4957, "mean_token_accuracy": 0.6512309859196345, "num_tokens": 1277229644.0, "step": 7620 }, { "entropy": 1.7503305276234944, "epoch": 0.8372195215731509, "grad_norm": 0.6145588159561157, "learning_rate": 1.376683508562105e-05, "loss": 1.3732, "mean_token_accuracy": 0.6656107902526855, "num_tokens": 1277367439.0, "step": 7621 }, { "entropy": 1.6609105666478474, "epoch": 0.8373293784845239, "grad_norm": 0.635491669178009, "learning_rate": 1.376531145256476e-05, "loss": 1.3981, "mean_token_accuracy": 0.6671904375155767, "num_tokens": 1277528410.0, "step": 7622 }, { "entropy": 1.6996264060338337, "epoch": 0.8374392353958968, "grad_norm": 0.6683711409568787, "learning_rate": 1.3763787731987614e-05, "loss": 1.3574, "mean_token_accuracy": 0.6552396714687347, "num_tokens": 1277707936.0, "step": 7623 }, { "entropy": 1.7298993468284607, "epoch": 0.8375490923072698, "grad_norm": 0.7171658873558044, "learning_rate": 1.3762263923937829e-05, "loss": 1.3435, "mean_token_accuracy": 0.6661288539568583, "num_tokens": 1277857298.0, "step": 7624 }, { "entropy": 1.6921402116616566, "epoch": 0.8376589492186427, "grad_norm": 0.6446428894996643, "learning_rate": 1.3760740028463632e-05, "loss": 1.3402, "mean_token_accuracy": 0.6615449984868368, "num_tokens": 1277989822.0, "step": 7625 }, { "entropy": 1.6879879732926686, "epoch": 0.8377688061300157, "grad_norm": 0.6671029925346375, "learning_rate": 1.3759216045613262e-05, "loss": 1.3044, "mean_token_accuracy": 0.6745457847913107, "num_tokens": 1278178157.0, "step": 7626 }, { "entropy": 1.6387710173924763, "epoch": 0.8378786630413886, "grad_norm": 0.5973528027534485, "learning_rate": 1.3757691975434949e-05, "loss": 1.4271, "mean_token_accuracy": 0.6483223338921865, "num_tokens": 1278350974.0, "step": 7627 }, { "entropy": 1.726877639691035, "epoch": 0.8379885199527616, "grad_norm": 0.7956101894378662, "learning_rate": 1.375616781797692e-05, "loss": 1.3057, "mean_token_accuracy": 0.6639814128478368, "num_tokens": 1278481024.0, "step": 7628 }, { "entropy": 1.6962241232395172, "epoch": 0.8380983768641345, "grad_norm": 0.7772151827812195, "learning_rate": 1.3754643573287428e-05, "loss": 1.3155, "mean_token_accuracy": 0.6763526697953542, "num_tokens": 1278635659.0, "step": 7629 }, { "entropy": 1.7717609802881877, "epoch": 0.8382082337755073, "grad_norm": 0.6711469292640686, "learning_rate": 1.3753119241414706e-05, "loss": 1.3777, "mean_token_accuracy": 0.6482406208912531, "num_tokens": 1278798975.0, "step": 7630 }, { "entropy": 1.7128020922342937, "epoch": 0.8383180906868803, "grad_norm": 0.6757957339286804, "learning_rate": 1.3751594822407e-05, "loss": 1.3464, "mean_token_accuracy": 0.6667918612559637, "num_tokens": 1278927300.0, "step": 7631 }, { "entropy": 1.7163095275561016, "epoch": 0.8384279475982532, "grad_norm": 0.6230423450469971, "learning_rate": 1.3750070316312559e-05, "loss": 1.4484, "mean_token_accuracy": 0.6352711419264475, "num_tokens": 1279138435.0, "step": 7632 }, { "entropy": 1.7445255815982819, "epoch": 0.8385378045096262, "grad_norm": 0.6208248734474182, "learning_rate": 1.374854572317963e-05, "loss": 1.594, "mean_token_accuracy": 0.6226391047239304, "num_tokens": 1279332164.0, "step": 7633 }, { "entropy": 1.6468991041183472, "epoch": 0.8386476614209991, "grad_norm": 0.7001860737800598, "learning_rate": 1.3747021043056468e-05, "loss": 1.4056, "mean_token_accuracy": 0.674930676817894, "num_tokens": 1279498135.0, "step": 7634 }, { "entropy": 1.7082558274269104, "epoch": 0.8387575183323721, "grad_norm": 0.6932383179664612, "learning_rate": 1.3745496275991328e-05, "loss": 1.329, "mean_token_accuracy": 0.6606937795877457, "num_tokens": 1279642711.0, "step": 7635 }, { "entropy": 1.7408236265182495, "epoch": 0.838867375243745, "grad_norm": 0.6592848896980286, "learning_rate": 1.374397142203247e-05, "loss": 1.4983, "mean_token_accuracy": 0.6435133467117945, "num_tokens": 1279837041.0, "step": 7636 }, { "entropy": 1.6659258703390758, "epoch": 0.838977232155118, "grad_norm": 0.7573028802871704, "learning_rate": 1.3742446481228149e-05, "loss": 1.5325, "mean_token_accuracy": 0.6281823118527731, "num_tokens": 1280043404.0, "step": 7637 }, { "entropy": 1.7001129885514576, "epoch": 0.8390870890664909, "grad_norm": 0.7068085670471191, "learning_rate": 1.3740921453626635e-05, "loss": 1.4459, "mean_token_accuracy": 0.6530873229106268, "num_tokens": 1280220340.0, "step": 7638 }, { "entropy": 1.7399956981341045, "epoch": 0.8391969459778639, "grad_norm": 0.7076330184936523, "learning_rate": 1.3739396339276194e-05, "loss": 1.5227, "mean_token_accuracy": 0.6424537748098373, "num_tokens": 1280364296.0, "step": 7639 }, { "entropy": 1.653020828962326, "epoch": 0.8393068028892368, "grad_norm": 0.7728797793388367, "learning_rate": 1.373787113822509e-05, "loss": 1.3846, "mean_token_accuracy": 0.6617040187120438, "num_tokens": 1280503851.0, "step": 7640 }, { "entropy": 1.7436749835809071, "epoch": 0.8394166598006098, "grad_norm": 0.7593557238578796, "learning_rate": 1.3736345850521602e-05, "loss": 1.4094, "mean_token_accuracy": 0.6662583450476328, "num_tokens": 1280648876.0, "step": 7641 }, { "entropy": 1.7310488323370616, "epoch": 0.8395265167119826, "grad_norm": 0.6699831485748291, "learning_rate": 1.3734820476213997e-05, "loss": 1.3641, "mean_token_accuracy": 0.6698733866214752, "num_tokens": 1280785864.0, "step": 7642 }, { "entropy": 1.6108634571234386, "epoch": 0.8396363736233555, "grad_norm": 0.667095959186554, "learning_rate": 1.3733295015350557e-05, "loss": 1.2481, "mean_token_accuracy": 0.6830354034900665, "num_tokens": 1280910220.0, "step": 7643 }, { "entropy": 1.8118035594622295, "epoch": 0.8397462305347285, "grad_norm": 0.7681687474250793, "learning_rate": 1.373176946797956e-05, "loss": 1.476, "mean_token_accuracy": 0.6538631469011307, "num_tokens": 1281025428.0, "step": 7644 }, { "entropy": 1.7167300780614216, "epoch": 0.8398560874461014, "grad_norm": 0.5978860259056091, "learning_rate": 1.3730243834149295e-05, "loss": 1.5872, "mean_token_accuracy": 0.6373479117949804, "num_tokens": 1281203179.0, "step": 7645 }, { "entropy": 1.613272448380788, "epoch": 0.8399659443574744, "grad_norm": 0.657454252243042, "learning_rate": 1.3728718113908039e-05, "loss": 1.3732, "mean_token_accuracy": 0.666471059123675, "num_tokens": 1281375107.0, "step": 7646 }, { "entropy": 1.699168860912323, "epoch": 0.8400758012688473, "grad_norm": 0.6724218726158142, "learning_rate": 1.3727192307304085e-05, "loss": 1.3107, "mean_token_accuracy": 0.6698317726453146, "num_tokens": 1281502914.0, "step": 7647 }, { "entropy": 1.6800562342007954, "epoch": 0.8401856581802203, "grad_norm": 0.6762789487838745, "learning_rate": 1.3725666414385723e-05, "loss": 1.3332, "mean_token_accuracy": 0.6533271272977194, "num_tokens": 1281663636.0, "step": 7648 }, { "entropy": 1.774364064137141, "epoch": 0.8402955150915932, "grad_norm": 0.7857850193977356, "learning_rate": 1.372414043520125e-05, "loss": 1.4153, "mean_token_accuracy": 0.6500428368647894, "num_tokens": 1281789745.0, "step": 7649 }, { "entropy": 1.7148446440696716, "epoch": 0.8404053720029662, "grad_norm": 0.650869607925415, "learning_rate": 1.3722614369798957e-05, "loss": 1.439, "mean_token_accuracy": 0.6369368185599645, "num_tokens": 1282005721.0, "step": 7650 }, { "entropy": 1.6923074920972188, "epoch": 0.840515228914339, "grad_norm": 0.7095004916191101, "learning_rate": 1.3721088218227148e-05, "loss": 1.3425, "mean_token_accuracy": 0.6514080464839935, "num_tokens": 1282166997.0, "step": 7651 }, { "entropy": 1.6772983868916829, "epoch": 0.840625085825712, "grad_norm": 0.6236726641654968, "learning_rate": 1.3719561980534122e-05, "loss": 1.4042, "mean_token_accuracy": 0.6637339144945145, "num_tokens": 1282356185.0, "step": 7652 }, { "entropy": 1.7177359561125438, "epoch": 0.8407349427370849, "grad_norm": 0.7458381652832031, "learning_rate": 1.3718035656768182e-05, "loss": 1.4507, "mean_token_accuracy": 0.6659137606620789, "num_tokens": 1282520253.0, "step": 7653 }, { "entropy": 1.6357039312521617, "epoch": 0.8408447996484579, "grad_norm": 0.5765164494514465, "learning_rate": 1.3716509246977643e-05, "loss": 1.4195, "mean_token_accuracy": 0.6570479621489843, "num_tokens": 1282709467.0, "step": 7654 }, { "entropy": 1.7260218759377797, "epoch": 0.8409546565598308, "grad_norm": 0.7507497668266296, "learning_rate": 1.3714982751210808e-05, "loss": 1.314, "mean_token_accuracy": 0.6629079331954321, "num_tokens": 1282831662.0, "step": 7655 }, { "entropy": 1.675975243250529, "epoch": 0.8410645134712038, "grad_norm": 0.7367669343948364, "learning_rate": 1.371345616951599e-05, "loss": 1.2233, "mean_token_accuracy": 0.6800348659356436, "num_tokens": 1282976248.0, "step": 7656 }, { "entropy": 1.7002997398376465, "epoch": 0.8411743703825767, "grad_norm": 0.6870225071907043, "learning_rate": 1.3711929501941512e-05, "loss": 1.3712, "mean_token_accuracy": 0.6616632044315338, "num_tokens": 1283105621.0, "step": 7657 }, { "entropy": 1.710231105486552, "epoch": 0.8412842272939496, "grad_norm": 0.6416940093040466, "learning_rate": 1.3710402748535688e-05, "loss": 1.3102, "mean_token_accuracy": 0.6693031589190165, "num_tokens": 1283251344.0, "step": 7658 }, { "entropy": 1.6835933824380238, "epoch": 0.8413940842053226, "grad_norm": 0.6878907680511475, "learning_rate": 1.3708875909346832e-05, "loss": 1.4185, "mean_token_accuracy": 0.6552811364332835, "num_tokens": 1283435304.0, "step": 7659 }, { "entropy": 1.671914945046107, "epoch": 0.8415039411166955, "grad_norm": 0.6930204033851624, "learning_rate": 1.3707348984423277e-05, "loss": 1.3017, "mean_token_accuracy": 0.6702569822470347, "num_tokens": 1283566399.0, "step": 7660 }, { "entropy": 1.6549534698327382, "epoch": 0.8416137980280685, "grad_norm": 0.6953391432762146, "learning_rate": 1.3705821973813352e-05, "loss": 1.4282, "mean_token_accuracy": 0.6581354439258575, "num_tokens": 1283720803.0, "step": 7661 }, { "entropy": 1.735606461763382, "epoch": 0.8417236549394413, "grad_norm": 0.8534516096115112, "learning_rate": 1.3704294877565372e-05, "loss": 1.3774, "mean_token_accuracy": 0.6662740260362625, "num_tokens": 1283849961.0, "step": 7662 }, { "entropy": 1.7239322364330292, "epoch": 0.8418335118508143, "grad_norm": 0.6426288485527039, "learning_rate": 1.3702767695727684e-05, "loss": 1.4996, "mean_token_accuracy": 0.6409449676672617, "num_tokens": 1284040809.0, "step": 7663 }, { "entropy": 1.6683675050735474, "epoch": 0.8419433687621872, "grad_norm": 0.7720414400100708, "learning_rate": 1.3701240428348612e-05, "loss": 1.482, "mean_token_accuracy": 0.6555820008118948, "num_tokens": 1284206147.0, "step": 7664 }, { "entropy": 1.7035513420899708, "epoch": 0.8420532256735602, "grad_norm": 0.5820039510726929, "learning_rate": 1.36997130754765e-05, "loss": 1.421, "mean_token_accuracy": 0.6452493071556091, "num_tokens": 1284370393.0, "step": 7665 }, { "entropy": 1.681229054927826, "epoch": 0.8421630825849331, "grad_norm": 0.7429522275924683, "learning_rate": 1.3698185637159682e-05, "loss": 1.235, "mean_token_accuracy": 0.6775188346703848, "num_tokens": 1284493127.0, "step": 7666 }, { "entropy": 1.7138707240422566, "epoch": 0.8422729394963061, "grad_norm": 0.5457119345664978, "learning_rate": 1.369665811344651e-05, "loss": 1.4761, "mean_token_accuracy": 0.6490335464477539, "num_tokens": 1284771528.0, "step": 7667 }, { "entropy": 1.6924820840358734, "epoch": 0.842382796407679, "grad_norm": 0.6924734115600586, "learning_rate": 1.369513050438532e-05, "loss": 1.3606, "mean_token_accuracy": 0.6642278035481771, "num_tokens": 1284923425.0, "step": 7668 }, { "entropy": 1.6925741334756215, "epoch": 0.842492653319052, "grad_norm": 0.6529973745346069, "learning_rate": 1.3693602810024466e-05, "loss": 1.2482, "mean_token_accuracy": 0.6726948221524557, "num_tokens": 1285060828.0, "step": 7669 }, { "entropy": 1.6858268876870472, "epoch": 0.8426025102304249, "grad_norm": 0.649381160736084, "learning_rate": 1.3692075030412295e-05, "loss": 1.462, "mean_token_accuracy": 0.6515221893787384, "num_tokens": 1285247826.0, "step": 7670 }, { "entropy": 1.7777485251426697, "epoch": 0.8427123671417978, "grad_norm": 0.713453471660614, "learning_rate": 1.3690547165597166e-05, "loss": 1.4854, "mean_token_accuracy": 0.643087034424146, "num_tokens": 1285378746.0, "step": 7671 }, { "entropy": 1.6306644082069397, "epoch": 0.8428222240531708, "grad_norm": 0.6652552485466003, "learning_rate": 1.3689019215627428e-05, "loss": 1.3156, "mean_token_accuracy": 0.671681821346283, "num_tokens": 1285560412.0, "step": 7672 }, { "entropy": 1.7075538237889607, "epoch": 0.8429320809645436, "grad_norm": 0.7357656359672546, "learning_rate": 1.3687491180551447e-05, "loss": 1.4037, "mean_token_accuracy": 0.6523735970258713, "num_tokens": 1285702229.0, "step": 7673 }, { "entropy": 1.7711325983206432, "epoch": 0.8430419378759166, "grad_norm": 0.686625599861145, "learning_rate": 1.3685963060417576e-05, "loss": 1.4509, "mean_token_accuracy": 0.6421075165271759, "num_tokens": 1285900255.0, "step": 7674 }, { "entropy": 1.6846754550933838, "epoch": 0.8431517947872895, "grad_norm": 0.7092203497886658, "learning_rate": 1.3684434855274189e-05, "loss": 1.2795, "mean_token_accuracy": 0.6742515216271082, "num_tokens": 1286027859.0, "step": 7675 }, { "entropy": 1.6374373237291973, "epoch": 0.8432616516986625, "grad_norm": 0.6417721509933472, "learning_rate": 1.3682906565169646e-05, "loss": 1.3225, "mean_token_accuracy": 0.6675249536832174, "num_tokens": 1286181159.0, "step": 7676 }, { "entropy": 1.6539149185021718, "epoch": 0.8433715086100354, "grad_norm": 0.6134423017501831, "learning_rate": 1.3681378190152321e-05, "loss": 1.4416, "mean_token_accuracy": 0.6583320200443268, "num_tokens": 1286380359.0, "step": 7677 }, { "entropy": 1.7651469906171162, "epoch": 0.8434813655214084, "grad_norm": 0.6425126791000366, "learning_rate": 1.3679849730270582e-05, "loss": 1.4183, "mean_token_accuracy": 0.6462546785672506, "num_tokens": 1286545480.0, "step": 7678 }, { "entropy": 1.6872264842192333, "epoch": 0.8435912224327813, "grad_norm": 0.6594815254211426, "learning_rate": 1.367832118557281e-05, "loss": 1.3546, "mean_token_accuracy": 0.6645703117052714, "num_tokens": 1286686590.0, "step": 7679 }, { "entropy": 1.7343334058920543, "epoch": 0.8437010793441543, "grad_norm": 0.7362040877342224, "learning_rate": 1.3676792556107376e-05, "loss": 1.3422, "mean_token_accuracy": 0.667659322420756, "num_tokens": 1286859906.0, "step": 7680 }, { "entropy": 1.7099198997020721, "epoch": 0.8438109362555272, "grad_norm": 0.6804381608963013, "learning_rate": 1.3675263841922665e-05, "loss": 1.643, "mean_token_accuracy": 0.6239674588044485, "num_tokens": 1287079553.0, "step": 7681 }, { "entropy": 1.7008231182893117, "epoch": 0.8439207931669002, "grad_norm": 0.7834773063659668, "learning_rate": 1.367373504306706e-05, "loss": 1.3961, "mean_token_accuracy": 0.6471186677614847, "num_tokens": 1287215979.0, "step": 7682 }, { "entropy": 1.7217775185902913, "epoch": 0.844030650078273, "grad_norm": 0.6311613917350769, "learning_rate": 1.3672206159588945e-05, "loss": 1.4119, "mean_token_accuracy": 0.6476258685191473, "num_tokens": 1287372294.0, "step": 7683 }, { "entropy": 1.7093331813812256, "epoch": 0.8441405069896459, "grad_norm": 2.1464595794677734, "learning_rate": 1.3670677191536707e-05, "loss": 1.2492, "mean_token_accuracy": 0.664307658871015, "num_tokens": 1287584672.0, "step": 7684 }, { "entropy": 1.7259198725223541, "epoch": 0.8442503639010189, "grad_norm": 0.6909459829330444, "learning_rate": 1.3669148138958744e-05, "loss": 1.4728, "mean_token_accuracy": 0.6467719525098801, "num_tokens": 1287755964.0, "step": 7685 }, { "entropy": 1.72645503282547, "epoch": 0.8443602208123918, "grad_norm": 0.6276677846908569, "learning_rate": 1.3667619001903442e-05, "loss": 1.4365, "mean_token_accuracy": 0.6519429683685303, "num_tokens": 1287962476.0, "step": 7686 }, { "entropy": 1.7283104161421459, "epoch": 0.8444700777237648, "grad_norm": 0.7658132314682007, "learning_rate": 1.3666089780419201e-05, "loss": 1.497, "mean_token_accuracy": 0.6409247318903605, "num_tokens": 1288111416.0, "step": 7687 }, { "entropy": 1.6543226341406505, "epoch": 0.8445799346351377, "grad_norm": 0.686872124671936, "learning_rate": 1.3664560474554419e-05, "loss": 1.4009, "mean_token_accuracy": 0.655271073182424, "num_tokens": 1288308554.0, "step": 7688 }, { "entropy": 1.6488666733105977, "epoch": 0.8446897915465107, "grad_norm": 0.6725640296936035, "learning_rate": 1.3663031084357501e-05, "loss": 1.3845, "mean_token_accuracy": 0.658675899108251, "num_tokens": 1288486606.0, "step": 7689 }, { "entropy": 1.7219010492165883, "epoch": 0.8447996484578836, "grad_norm": 0.6540157794952393, "learning_rate": 1.3661501609876847e-05, "loss": 1.3808, "mean_token_accuracy": 0.6512012432018915, "num_tokens": 1288658497.0, "step": 7690 }, { "entropy": 1.7627086639404297, "epoch": 0.8449095053692566, "grad_norm": 0.7566828727722168, "learning_rate": 1.3659972051160868e-05, "loss": 1.4124, "mean_token_accuracy": 0.6677108506361643, "num_tokens": 1288811315.0, "step": 7691 }, { "entropy": 1.7626505196094513, "epoch": 0.8450193622806295, "grad_norm": 0.6381642818450928, "learning_rate": 1.3658442408257972e-05, "loss": 1.4573, "mean_token_accuracy": 0.6391281684239706, "num_tokens": 1289035418.0, "step": 7692 }, { "entropy": 1.731861154238383, "epoch": 0.8451292191920025, "grad_norm": 0.6614934802055359, "learning_rate": 1.365691268121657e-05, "loss": 1.4221, "mean_token_accuracy": 0.6379889895518621, "num_tokens": 1289211468.0, "step": 7693 }, { "entropy": 1.737843285004298, "epoch": 0.8452390761033753, "grad_norm": 0.6590113043785095, "learning_rate": 1.3655382870085078e-05, "loss": 1.4666, "mean_token_accuracy": 0.6468397031227747, "num_tokens": 1289389121.0, "step": 7694 }, { "entropy": 1.723410467306773, "epoch": 0.8453489330147483, "grad_norm": 0.7802287936210632, "learning_rate": 1.3653852974911919e-05, "loss": 1.4251, "mean_token_accuracy": 0.6476560135682424, "num_tokens": 1289559256.0, "step": 7695 }, { "entropy": 1.7518675525983174, "epoch": 0.8454587899261212, "grad_norm": 0.7318578958511353, "learning_rate": 1.3652322995745504e-05, "loss": 1.2606, "mean_token_accuracy": 0.6652724295854568, "num_tokens": 1289658783.0, "step": 7696 }, { "entropy": 1.749257892370224, "epoch": 0.8455686468374941, "grad_norm": 0.7955240607261658, "learning_rate": 1.3650792932634268e-05, "loss": 1.2613, "mean_token_accuracy": 0.6822487364212672, "num_tokens": 1289795148.0, "step": 7697 }, { "entropy": 1.7356181144714355, "epoch": 0.8456785037488671, "grad_norm": 0.7357754707336426, "learning_rate": 1.3649262785626624e-05, "loss": 1.5575, "mean_token_accuracy": 0.6548448453346888, "num_tokens": 1289948148.0, "step": 7698 }, { "entropy": 1.7077325284481049, "epoch": 0.84578836066024, "grad_norm": 0.7098826169967651, "learning_rate": 1.3647732554771009e-05, "loss": 1.502, "mean_token_accuracy": 0.6426471124092737, "num_tokens": 1290138416.0, "step": 7699 }, { "entropy": 1.735207627216975, "epoch": 0.845898217571613, "grad_norm": 0.6340279579162598, "learning_rate": 1.3646202240115852e-05, "loss": 1.3897, "mean_token_accuracy": 0.6579069246848425, "num_tokens": 1290340173.0, "step": 7700 }, { "entropy": 1.6785156230131786, "epoch": 0.8460080744829859, "grad_norm": 0.8312824964523315, "learning_rate": 1.3644671841709586e-05, "loss": 1.2704, "mean_token_accuracy": 0.6747389038403829, "num_tokens": 1290456610.0, "step": 7701 }, { "entropy": 1.6480493446191151, "epoch": 0.8461179313943589, "grad_norm": 0.6698850989341736, "learning_rate": 1.3643141359600647e-05, "loss": 1.267, "mean_token_accuracy": 0.6753464639186859, "num_tokens": 1290626267.0, "step": 7702 }, { "entropy": 1.7196992834409077, "epoch": 0.8462277883057318, "grad_norm": 0.7034914493560791, "learning_rate": 1.3641610793837478e-05, "loss": 1.4121, "mean_token_accuracy": 0.6590729554494222, "num_tokens": 1290772213.0, "step": 7703 }, { "entropy": 1.7197033961613972, "epoch": 0.8463376452171048, "grad_norm": 0.6949339509010315, "learning_rate": 1.3640080144468515e-05, "loss": 1.3447, "mean_token_accuracy": 0.6591121902068456, "num_tokens": 1290899259.0, "step": 7704 }, { "entropy": 1.7233955065409343, "epoch": 0.8464475021284776, "grad_norm": 0.7986577749252319, "learning_rate": 1.3638549411542205e-05, "loss": 1.4605, "mean_token_accuracy": 0.660988504687945, "num_tokens": 1291113194.0, "step": 7705 }, { "entropy": 1.6643975575764973, "epoch": 0.8465573590398506, "grad_norm": 0.6427856683731079, "learning_rate": 1.3637018595106996e-05, "loss": 1.4165, "mean_token_accuracy": 0.6534582326809565, "num_tokens": 1291263837.0, "step": 7706 }, { "entropy": 1.7111331125100453, "epoch": 0.8466672159512235, "grad_norm": 0.7272197008132935, "learning_rate": 1.3635487695211337e-05, "loss": 1.3851, "mean_token_accuracy": 0.6659311503171921, "num_tokens": 1291412707.0, "step": 7707 }, { "entropy": 1.7255754868189495, "epoch": 0.8467770728625965, "grad_norm": 0.6733617782592773, "learning_rate": 1.3633956711903682e-05, "loss": 1.4219, "mean_token_accuracy": 0.6492577840884527, "num_tokens": 1291572306.0, "step": 7708 }, { "entropy": 1.7116366227467854, "epoch": 0.8468869297739694, "grad_norm": 0.6349593997001648, "learning_rate": 1.363242564523248e-05, "loss": 1.4648, "mean_token_accuracy": 0.6612848242123922, "num_tokens": 1291738907.0, "step": 7709 }, { "entropy": 1.782869964838028, "epoch": 0.8469967866853424, "grad_norm": 0.8094156384468079, "learning_rate": 1.3630894495246194e-05, "loss": 1.3299, "mean_token_accuracy": 0.6685720980167389, "num_tokens": 1291866880.0, "step": 7710 }, { "entropy": 1.6088589231173198, "epoch": 0.8471066435967153, "grad_norm": 1.1662250757217407, "learning_rate": 1.3629363261993285e-05, "loss": 1.2702, "mean_token_accuracy": 0.6650574405988058, "num_tokens": 1292039473.0, "step": 7711 }, { "entropy": 1.7531055708726246, "epoch": 0.8472165005080882, "grad_norm": 0.680927038192749, "learning_rate": 1.362783194552221e-05, "loss": 1.4834, "mean_token_accuracy": 0.6375831713279089, "num_tokens": 1292229376.0, "step": 7712 }, { "entropy": 1.694258709748586, "epoch": 0.8473263574194612, "grad_norm": 0.7578868865966797, "learning_rate": 1.3626300545881442e-05, "loss": 1.2226, "mean_token_accuracy": 0.6796438743670782, "num_tokens": 1292349842.0, "step": 7713 }, { "entropy": 1.6762607991695404, "epoch": 0.847436214330834, "grad_norm": 0.6741758584976196, "learning_rate": 1.362476906311944e-05, "loss": 1.4122, "mean_token_accuracy": 0.6557339429855347, "num_tokens": 1292545481.0, "step": 7714 }, { "entropy": 1.7098297476768494, "epoch": 0.847546071242207, "grad_norm": 0.6798667907714844, "learning_rate": 1.3623237497284683e-05, "loss": 1.4471, "mean_token_accuracy": 0.6456852555274963, "num_tokens": 1292734742.0, "step": 7715 }, { "entropy": 1.7433710793654125, "epoch": 0.8476559281535799, "grad_norm": 0.7019221186637878, "learning_rate": 1.3621705848425641e-05, "loss": 1.4745, "mean_token_accuracy": 0.6478450198968252, "num_tokens": 1292893188.0, "step": 7716 }, { "entropy": 1.731772820154826, "epoch": 0.8477657850649529, "grad_norm": 0.7611411213874817, "learning_rate": 1.3620174116590791e-05, "loss": 1.3669, "mean_token_accuracy": 0.6490048070748647, "num_tokens": 1293055710.0, "step": 7717 }, { "entropy": 1.7181940376758575, "epoch": 0.8478756419763258, "grad_norm": 0.8934732675552368, "learning_rate": 1.361864230182861e-05, "loss": 1.5196, "mean_token_accuracy": 0.6444053202867508, "num_tokens": 1293253973.0, "step": 7718 }, { "entropy": 1.685392697652181, "epoch": 0.8479854988876988, "grad_norm": 0.6858906149864197, "learning_rate": 1.361711040418758e-05, "loss": 1.2893, "mean_token_accuracy": 0.6633900205294291, "num_tokens": 1293387605.0, "step": 7719 }, { "entropy": 1.713352640469869, "epoch": 0.8480953557990717, "grad_norm": 0.6775051355361938, "learning_rate": 1.3615578423716187e-05, "loss": 1.513, "mean_token_accuracy": 0.6740926851828893, "num_tokens": 1293564698.0, "step": 7720 }, { "entropy": 1.767835130294164, "epoch": 0.8482052127104447, "grad_norm": 0.6613144278526306, "learning_rate": 1.3614046360462912e-05, "loss": 1.5051, "mean_token_accuracy": 0.6474483261505762, "num_tokens": 1293727382.0, "step": 7721 }, { "entropy": 1.7436497310797374, "epoch": 0.8483150696218176, "grad_norm": 0.6383576989173889, "learning_rate": 1.3612514214476249e-05, "loss": 1.2954, "mean_token_accuracy": 0.6786330391963323, "num_tokens": 1293878593.0, "step": 7722 }, { "entropy": 1.7230326632658641, "epoch": 0.8484249265331906, "grad_norm": 0.7083463668823242, "learning_rate": 1.361098198580469e-05, "loss": 1.3808, "mean_token_accuracy": 0.6482375711202621, "num_tokens": 1294025781.0, "step": 7723 }, { "entropy": 1.7545043329397838, "epoch": 0.8485347834445635, "grad_norm": 0.5937564969062805, "learning_rate": 1.3609449674496726e-05, "loss": 1.501, "mean_token_accuracy": 0.6370914578437805, "num_tokens": 1294227008.0, "step": 7724 }, { "entropy": 1.700508326292038, "epoch": 0.8486446403559363, "grad_norm": 0.8746032118797302, "learning_rate": 1.3607917280600855e-05, "loss": 1.3259, "mean_token_accuracy": 0.6724284738302231, "num_tokens": 1294393254.0, "step": 7725 }, { "entropy": 1.661549021800359, "epoch": 0.8487544972673093, "grad_norm": 0.7372915744781494, "learning_rate": 1.360638480416558e-05, "loss": 1.4659, "mean_token_accuracy": 0.6515500744183859, "num_tokens": 1294571064.0, "step": 7726 }, { "entropy": 1.7442650695641835, "epoch": 0.8488643541786822, "grad_norm": 0.6306323409080505, "learning_rate": 1.3604852245239397e-05, "loss": 1.5477, "mean_token_accuracy": 0.6327639867862066, "num_tokens": 1294744598.0, "step": 7727 }, { "entropy": 1.712537129720052, "epoch": 0.8489742110900552, "grad_norm": 0.7366087436676025, "learning_rate": 1.3603319603870818e-05, "loss": 1.3154, "mean_token_accuracy": 0.67984339594841, "num_tokens": 1294886218.0, "step": 7728 }, { "entropy": 1.7154980301856995, "epoch": 0.8490840680014281, "grad_norm": 0.8025618195533752, "learning_rate": 1.3601786880108343e-05, "loss": 1.5105, "mean_token_accuracy": 0.6657672872145971, "num_tokens": 1295017541.0, "step": 7729 }, { "entropy": 1.711490790049235, "epoch": 0.8491939249128011, "grad_norm": 0.6536463499069214, "learning_rate": 1.3600254074000488e-05, "loss": 1.4221, "mean_token_accuracy": 0.6637669056653976, "num_tokens": 1295227688.0, "step": 7730 }, { "entropy": 1.6762659549713135, "epoch": 0.849303781824174, "grad_norm": 0.7155306339263916, "learning_rate": 1.359872118559576e-05, "loss": 1.3234, "mean_token_accuracy": 0.6701004455486933, "num_tokens": 1295379203.0, "step": 7731 }, { "entropy": 1.6928722262382507, "epoch": 0.849413638735547, "grad_norm": 0.7518654465675354, "learning_rate": 1.359718821494268e-05, "loss": 1.5937, "mean_token_accuracy": 0.6596247951189677, "num_tokens": 1295542003.0, "step": 7732 }, { "entropy": 1.6721904973189037, "epoch": 0.8495234956469199, "grad_norm": 0.7368571758270264, "learning_rate": 1.3595655162089763e-05, "loss": 1.4228, "mean_token_accuracy": 0.6764175544182459, "num_tokens": 1295695454.0, "step": 7733 }, { "entropy": 1.7590387463569641, "epoch": 0.8496333525582929, "grad_norm": 0.7963206768035889, "learning_rate": 1.359412202708553e-05, "loss": 1.4675, "mean_token_accuracy": 0.647629976272583, "num_tokens": 1295827314.0, "step": 7734 }, { "entropy": 1.731603890657425, "epoch": 0.8497432094696658, "grad_norm": 0.6758211851119995, "learning_rate": 1.3592588809978506e-05, "loss": 1.3838, "mean_token_accuracy": 0.659120092789332, "num_tokens": 1295996986.0, "step": 7735 }, { "entropy": 1.6921556492646534, "epoch": 0.8498530663810387, "grad_norm": 0.5895377993583679, "learning_rate": 1.3591055510817213e-05, "loss": 1.3931, "mean_token_accuracy": 0.652939553062121, "num_tokens": 1296164645.0, "step": 7736 }, { "entropy": 1.6915649970372517, "epoch": 0.8499629232924116, "grad_norm": 0.7645225524902344, "learning_rate": 1.358952212965018e-05, "loss": 1.3265, "mean_token_accuracy": 0.6632872621218363, "num_tokens": 1296327910.0, "step": 7737 }, { "entropy": 1.7159571647644043, "epoch": 0.8500727802037845, "grad_norm": 0.7446976900100708, "learning_rate": 1.3587988666525935e-05, "loss": 1.4285, "mean_token_accuracy": 0.6510045429070791, "num_tokens": 1296466186.0, "step": 7738 }, { "entropy": 1.6824923356374104, "epoch": 0.8501826371151575, "grad_norm": 0.6190294027328491, "learning_rate": 1.358645512149302e-05, "loss": 1.4375, "mean_token_accuracy": 0.6470278948545456, "num_tokens": 1296637794.0, "step": 7739 }, { "entropy": 1.7388030588626862, "epoch": 0.8502924940265304, "grad_norm": 0.7785733938217163, "learning_rate": 1.3584921494599963e-05, "loss": 1.3741, "mean_token_accuracy": 0.657695472240448, "num_tokens": 1296760052.0, "step": 7740 }, { "entropy": 1.7327676912148793, "epoch": 0.8504023509379034, "grad_norm": 0.7766647338867188, "learning_rate": 1.3583387785895307e-05, "loss": 1.2975, "mean_token_accuracy": 0.673372263709704, "num_tokens": 1296869236.0, "step": 7741 }, { "entropy": 1.718622773885727, "epoch": 0.8505122078492763, "grad_norm": 0.690539538860321, "learning_rate": 1.3581853995427591e-05, "loss": 1.3776, "mean_token_accuracy": 0.6660670936107635, "num_tokens": 1297011769.0, "step": 7742 }, { "entropy": 1.6976758639017742, "epoch": 0.8506220647606493, "grad_norm": 0.6688826084136963, "learning_rate": 1.3580320123245361e-05, "loss": 1.3505, "mean_token_accuracy": 0.6525122026602427, "num_tokens": 1297178554.0, "step": 7743 }, { "entropy": 1.691978245973587, "epoch": 0.8507319216720222, "grad_norm": 0.6021746397018433, "learning_rate": 1.3578786169397158e-05, "loss": 1.3599, "mean_token_accuracy": 0.6527169843514761, "num_tokens": 1297364819.0, "step": 7744 }, { "entropy": 1.6742048561573029, "epoch": 0.8508417785833952, "grad_norm": 0.8681425452232361, "learning_rate": 1.357725213393154e-05, "loss": 1.2843, "mean_token_accuracy": 0.6764674683411916, "num_tokens": 1297516573.0, "step": 7745 }, { "entropy": 1.7442771196365356, "epoch": 0.850951635494768, "grad_norm": 0.6213224530220032, "learning_rate": 1.3575718016897046e-05, "loss": 1.4259, "mean_token_accuracy": 0.6503079384565353, "num_tokens": 1297672956.0, "step": 7746 }, { "entropy": 1.7606963614622753, "epoch": 0.851061492406141, "grad_norm": 0.7436356544494629, "learning_rate": 1.3574183818342245e-05, "loss": 1.3349, "mean_token_accuracy": 0.6708455085754395, "num_tokens": 1297818809.0, "step": 7747 }, { "entropy": 1.672513614098231, "epoch": 0.8511713493175139, "grad_norm": 0.73287034034729, "learning_rate": 1.3572649538315683e-05, "loss": 1.3939, "mean_token_accuracy": 0.6704998711744944, "num_tokens": 1297980662.0, "step": 7748 }, { "entropy": 1.7294066945711772, "epoch": 0.8512812062288869, "grad_norm": 0.6251292824745178, "learning_rate": 1.3571115176865923e-05, "loss": 1.542, "mean_token_accuracy": 0.643743579586347, "num_tokens": 1298143653.0, "step": 7749 }, { "entropy": 1.6557518442471821, "epoch": 0.8513910631402598, "grad_norm": 0.6958547830581665, "learning_rate": 1.3569580734041524e-05, "loss": 1.3905, "mean_token_accuracy": 0.6798481444517771, "num_tokens": 1298331907.0, "step": 7750 }, { "entropy": 1.6850103636582692, "epoch": 0.8515009200516328, "grad_norm": 0.7102126479148865, "learning_rate": 1.3568046209891055e-05, "loss": 1.2097, "mean_token_accuracy": 0.6820806463559469, "num_tokens": 1298488338.0, "step": 7751 }, { "entropy": 1.7033019761244457, "epoch": 0.8516107769630057, "grad_norm": 0.639173686504364, "learning_rate": 1.356651160446308e-05, "loss": 1.4144, "mean_token_accuracy": 0.6469381103912989, "num_tokens": 1298684159.0, "step": 7752 }, { "entropy": 1.7016167442003887, "epoch": 0.8517206338743786, "grad_norm": 0.9110562801361084, "learning_rate": 1.356497691780617e-05, "loss": 1.7517, "mean_token_accuracy": 0.6370747834444046, "num_tokens": 1298844311.0, "step": 7753 }, { "entropy": 1.6738309760888417, "epoch": 0.8518304907857516, "grad_norm": 0.7459472417831421, "learning_rate": 1.3563442149968896e-05, "loss": 1.3617, "mean_token_accuracy": 0.6658263305823008, "num_tokens": 1298991771.0, "step": 7754 }, { "entropy": 1.6745908459027607, "epoch": 0.8519403476971245, "grad_norm": 0.8173218369483948, "learning_rate": 1.356190730099983e-05, "loss": 1.3996, "mean_token_accuracy": 0.6685936997334162, "num_tokens": 1299145840.0, "step": 7755 }, { "entropy": 1.7424539625644684, "epoch": 0.8520502046084975, "grad_norm": 0.6466085314750671, "learning_rate": 1.3560372370947557e-05, "loss": 1.3801, "mean_token_accuracy": 0.6733126441637675, "num_tokens": 1299315556.0, "step": 7756 }, { "entropy": 1.6781065960725148, "epoch": 0.8521600615198703, "grad_norm": 0.6531357765197754, "learning_rate": 1.3558837359860651e-05, "loss": 1.3956, "mean_token_accuracy": 0.6521165718634924, "num_tokens": 1299491636.0, "step": 7757 }, { "entropy": 1.7538845141728718, "epoch": 0.8522699184312433, "grad_norm": 0.6810640692710876, "learning_rate": 1.3557302267787691e-05, "loss": 1.5143, "mean_token_accuracy": 0.6484838575124741, "num_tokens": 1299682671.0, "step": 7758 }, { "entropy": 1.734057645003001, "epoch": 0.8523797753426162, "grad_norm": 0.6916408538818359, "learning_rate": 1.3555767094777272e-05, "loss": 1.3975, "mean_token_accuracy": 0.6555085331201553, "num_tokens": 1299884491.0, "step": 7759 }, { "entropy": 1.7141314844290416, "epoch": 0.8524896322539892, "grad_norm": 0.6095522046089172, "learning_rate": 1.3554231840877973e-05, "loss": 1.3404, "mean_token_accuracy": 0.653201217452685, "num_tokens": 1300048240.0, "step": 7760 }, { "entropy": 1.73670361439387, "epoch": 0.8525994891653621, "grad_norm": 0.615277111530304, "learning_rate": 1.355269650613839e-05, "loss": 1.3983, "mean_token_accuracy": 0.653611977895101, "num_tokens": 1300238035.0, "step": 7761 }, { "entropy": 1.7244941194852192, "epoch": 0.8527093460767351, "grad_norm": 0.689967155456543, "learning_rate": 1.3551161090607113e-05, "loss": 1.3408, "mean_token_accuracy": 0.668989489475886, "num_tokens": 1300408112.0, "step": 7762 }, { "entropy": 1.7187703053156536, "epoch": 0.852819202988108, "grad_norm": 0.7365146279335022, "learning_rate": 1.3549625594332734e-05, "loss": 1.4606, "mean_token_accuracy": 0.6453435768683752, "num_tokens": 1300635927.0, "step": 7763 }, { "entropy": 1.7673336962858837, "epoch": 0.852929059899481, "grad_norm": 0.7960333824157715, "learning_rate": 1.3548090017363853e-05, "loss": 1.3389, "mean_token_accuracy": 0.658059557278951, "num_tokens": 1300768798.0, "step": 7764 }, { "entropy": 1.7308455010255177, "epoch": 0.8530389168108539, "grad_norm": 0.6843191385269165, "learning_rate": 1.3546554359749078e-05, "loss": 1.3368, "mean_token_accuracy": 0.6665947139263153, "num_tokens": 1300927812.0, "step": 7765 }, { "entropy": 1.708117683728536, "epoch": 0.8531487737222267, "grad_norm": 0.7319220900535583, "learning_rate": 1.3545018621537e-05, "loss": 1.4025, "mean_token_accuracy": 0.6578193108240763, "num_tokens": 1301110237.0, "step": 7766 }, { "entropy": 1.7092965046564739, "epoch": 0.8532586306335997, "grad_norm": 0.7056390047073364, "learning_rate": 1.354348280277623e-05, "loss": 1.4762, "mean_token_accuracy": 0.645611047744751, "num_tokens": 1301280209.0, "step": 7767 }, { "entropy": 1.6814217766125996, "epoch": 0.8533684875449726, "grad_norm": 0.6106694936752319, "learning_rate": 1.3541946903515373e-05, "loss": 1.4101, "mean_token_accuracy": 0.6508079369862875, "num_tokens": 1301471786.0, "step": 7768 }, { "entropy": 1.7410283883412678, "epoch": 0.8534783444563456, "grad_norm": 0.5932704210281372, "learning_rate": 1.3540410923803047e-05, "loss": 1.322, "mean_token_accuracy": 0.65825983385245, "num_tokens": 1301608122.0, "step": 7769 }, { "entropy": 1.654642830292384, "epoch": 0.8535882013677185, "grad_norm": 0.6340963840484619, "learning_rate": 1.3538874863687857e-05, "loss": 1.3904, "mean_token_accuracy": 0.6750276188055674, "num_tokens": 1301774068.0, "step": 7770 }, { "entropy": 1.8286733229955037, "epoch": 0.8536980582790915, "grad_norm": 0.8947479128837585, "learning_rate": 1.353733872321842e-05, "loss": 1.4883, "mean_token_accuracy": 0.6447364389896393, "num_tokens": 1301942077.0, "step": 7771 }, { "entropy": 1.734379122654597, "epoch": 0.8538079151904644, "grad_norm": 0.7082586884498596, "learning_rate": 1.3535802502443358e-05, "loss": 1.424, "mean_token_accuracy": 0.6567316949367523, "num_tokens": 1302111046.0, "step": 7772 }, { "entropy": 1.7233747939268749, "epoch": 0.8539177721018374, "grad_norm": 0.7988469004631042, "learning_rate": 1.353426620141129e-05, "loss": 1.2831, "mean_token_accuracy": 0.6733775039513906, "num_tokens": 1302216832.0, "step": 7773 }, { "entropy": 1.6604902148246765, "epoch": 0.8540276290132103, "grad_norm": 0.780096173286438, "learning_rate": 1.3532729820170835e-05, "loss": 1.2723, "mean_token_accuracy": 0.6720686207214991, "num_tokens": 1302337836.0, "step": 7774 }, { "entropy": 1.6743205388387044, "epoch": 0.8541374859245833, "grad_norm": 0.9057300090789795, "learning_rate": 1.353119335877063e-05, "loss": 1.4191, "mean_token_accuracy": 0.6672864605983099, "num_tokens": 1302489417.0, "step": 7775 }, { "entropy": 1.6898978352546692, "epoch": 0.8542473428359562, "grad_norm": 0.7002508044242859, "learning_rate": 1.3529656817259287e-05, "loss": 1.4303, "mean_token_accuracy": 0.6622383644183477, "num_tokens": 1302686661.0, "step": 7776 }, { "entropy": 1.6237229605515797, "epoch": 0.8543571997473292, "grad_norm": 0.7121983170509338, "learning_rate": 1.3528120195685451e-05, "loss": 1.3871, "mean_token_accuracy": 0.6706487536430359, "num_tokens": 1302863425.0, "step": 7777 }, { "entropy": 1.7381801307201385, "epoch": 0.854467056658702, "grad_norm": 0.7030956149101257, "learning_rate": 1.3526583494097749e-05, "loss": 1.3846, "mean_token_accuracy": 0.6603255172570547, "num_tokens": 1303005875.0, "step": 7778 }, { "entropy": 1.7050376236438751, "epoch": 0.8545769135700749, "grad_norm": 0.737881600856781, "learning_rate": 1.3525046712544818e-05, "loss": 1.4434, "mean_token_accuracy": 0.650563602646192, "num_tokens": 1303199958.0, "step": 7779 }, { "entropy": 1.65973166624705, "epoch": 0.8546867704814479, "grad_norm": 0.8631945848464966, "learning_rate": 1.3523509851075293e-05, "loss": 1.3929, "mean_token_accuracy": 0.6690235982338587, "num_tokens": 1303349109.0, "step": 7780 }, { "entropy": 1.7056522568066914, "epoch": 0.8547966273928208, "grad_norm": 0.6538403630256653, "learning_rate": 1.3521972909737824e-05, "loss": 1.4684, "mean_token_accuracy": 0.6520558893680573, "num_tokens": 1303526369.0, "step": 7781 }, { "entropy": 1.664311518271764, "epoch": 0.8549064843041938, "grad_norm": 0.5570957064628601, "learning_rate": 1.3520435888581044e-05, "loss": 1.3669, "mean_token_accuracy": 0.6615254829327265, "num_tokens": 1303777805.0, "step": 7782 }, { "entropy": 1.7221255699793498, "epoch": 0.8550163412155667, "grad_norm": 0.6258386969566345, "learning_rate": 1.351889878765361e-05, "loss": 1.3535, "mean_token_accuracy": 0.6568758289019266, "num_tokens": 1303942723.0, "step": 7783 }, { "entropy": 1.716312845547994, "epoch": 0.8551261981269397, "grad_norm": 0.6711044907569885, "learning_rate": 1.3517361607004158e-05, "loss": 1.3727, "mean_token_accuracy": 0.6513389696677526, "num_tokens": 1304117546.0, "step": 7784 }, { "entropy": 1.7806439300378163, "epoch": 0.8552360550383126, "grad_norm": 0.7212101817131042, "learning_rate": 1.3515824346681348e-05, "loss": 1.4373, "mean_token_accuracy": 0.6462565610806147, "num_tokens": 1304261647.0, "step": 7785 }, { "entropy": 1.5951051115989685, "epoch": 0.8553459119496856, "grad_norm": 0.6845982074737549, "learning_rate": 1.351428700673383e-05, "loss": 1.2047, "mean_token_accuracy": 0.6924866537253062, "num_tokens": 1304411452.0, "step": 7786 }, { "entropy": 1.709506740172704, "epoch": 0.8554557688610585, "grad_norm": 0.5833786129951477, "learning_rate": 1.3512749587210264e-05, "loss": 1.3802, "mean_token_accuracy": 0.6559178431828817, "num_tokens": 1304591192.0, "step": 7787 }, { "entropy": 1.6945497194925945, "epoch": 0.8555656257724314, "grad_norm": 0.7337885499000549, "learning_rate": 1.3511212088159302e-05, "loss": 1.426, "mean_token_accuracy": 0.6539691934982935, "num_tokens": 1304787587.0, "step": 7788 }, { "entropy": 1.7732653816541035, "epoch": 0.8556754826838043, "grad_norm": 0.7243953347206116, "learning_rate": 1.3509674509629612e-05, "loss": 1.5344, "mean_token_accuracy": 0.6324852307637533, "num_tokens": 1304969243.0, "step": 7789 }, { "entropy": 1.7157978514830272, "epoch": 0.8557853395951773, "grad_norm": 0.6849737763404846, "learning_rate": 1.3508136851669853e-05, "loss": 1.3162, "mean_token_accuracy": 0.6636256823937098, "num_tokens": 1305132204.0, "step": 7790 }, { "entropy": 1.6959392031033833, "epoch": 0.8558951965065502, "grad_norm": 0.65585857629776, "learning_rate": 1.3506599114328695e-05, "loss": 1.262, "mean_token_accuracy": 0.6834282577037811, "num_tokens": 1305276090.0, "step": 7791 }, { "entropy": 1.6683409810066223, "epoch": 0.8560050534179231, "grad_norm": 0.7357686758041382, "learning_rate": 1.35050612976548e-05, "loss": 1.5049, "mean_token_accuracy": 0.6626105507214864, "num_tokens": 1305470955.0, "step": 7792 }, { "entropy": 1.6899990141391754, "epoch": 0.8561149103292961, "grad_norm": 0.7392531633377075, "learning_rate": 1.3503523401696849e-05, "loss": 1.4496, "mean_token_accuracy": 0.6497561434904734, "num_tokens": 1305631140.0, "step": 7793 }, { "entropy": 1.6974543333053589, "epoch": 0.856224767240669, "grad_norm": 0.6308239102363586, "learning_rate": 1.3501985426503508e-05, "loss": 1.3712, "mean_token_accuracy": 0.6657331734895706, "num_tokens": 1305789255.0, "step": 7794 }, { "entropy": 1.6489758292833965, "epoch": 0.856334624152042, "grad_norm": 0.5670278668403625, "learning_rate": 1.3500447372123455e-05, "loss": 1.4481, "mean_token_accuracy": 0.6598306248585383, "num_tokens": 1305971505.0, "step": 7795 }, { "entropy": 1.640490214029948, "epoch": 0.8564444810634149, "grad_norm": 0.7669674754142761, "learning_rate": 1.3498909238605371e-05, "loss": 1.2363, "mean_token_accuracy": 0.68501316010952, "num_tokens": 1306085158.0, "step": 7796 }, { "entropy": 1.697544554869334, "epoch": 0.8565543379747879, "grad_norm": 0.7241058349609375, "learning_rate": 1.3497371025997938e-05, "loss": 1.3508, "mean_token_accuracy": 0.6685928404331207, "num_tokens": 1306257026.0, "step": 7797 }, { "entropy": 1.694200575351715, "epoch": 0.8566641948861607, "grad_norm": 0.7969628572463989, "learning_rate": 1.3495832734349831e-05, "loss": 1.3376, "mean_token_accuracy": 0.6603265057007471, "num_tokens": 1306376894.0, "step": 7798 }, { "entropy": 1.7276227374871571, "epoch": 0.8567740517975337, "grad_norm": 0.7449766397476196, "learning_rate": 1.3494294363709746e-05, "loss": 1.4243, "mean_token_accuracy": 0.6657720059156418, "num_tokens": 1306503412.0, "step": 7799 }, { "entropy": 1.6635934511820476, "epoch": 0.8568839087089066, "grad_norm": 0.7158152461051941, "learning_rate": 1.349275591412637e-05, "loss": 1.3208, "mean_token_accuracy": 0.6728008190790812, "num_tokens": 1306649874.0, "step": 7800 }, { "entropy": 1.7274696131547291, "epoch": 0.8569937656202796, "grad_norm": 0.7230932116508484, "learning_rate": 1.3491217385648392e-05, "loss": 1.5066, "mean_token_accuracy": 0.6431404302517573, "num_tokens": 1306837527.0, "step": 7801 }, { "entropy": 1.7124834557374318, "epoch": 0.8571036225316525, "grad_norm": 0.6679414510726929, "learning_rate": 1.3489678778324501e-05, "loss": 1.343, "mean_token_accuracy": 0.660656655828158, "num_tokens": 1306992011.0, "step": 7802 }, { "entropy": 1.730410397052765, "epoch": 0.8572134794430255, "grad_norm": 0.676726758480072, "learning_rate": 1.3488140092203405e-05, "loss": 1.4971, "mean_token_accuracy": 0.6543196365237236, "num_tokens": 1307160616.0, "step": 7803 }, { "entropy": 1.6937486827373505, "epoch": 0.8573233363543984, "grad_norm": 0.8226978182792664, "learning_rate": 1.3486601327333795e-05, "loss": 1.3941, "mean_token_accuracy": 0.6554071108500162, "num_tokens": 1307301087.0, "step": 7804 }, { "entropy": 1.6323599517345428, "epoch": 0.8574331932657714, "grad_norm": 0.7059171199798584, "learning_rate": 1.3485062483764372e-05, "loss": 1.3001, "mean_token_accuracy": 0.6734863370656967, "num_tokens": 1307429854.0, "step": 7805 }, { "entropy": 1.6826396882534027, "epoch": 0.8575430501771443, "grad_norm": 0.6876824498176575, "learning_rate": 1.3483523561543842e-05, "loss": 1.4278, "mean_token_accuracy": 0.6498329937458038, "num_tokens": 1307587225.0, "step": 7806 }, { "entropy": 1.7240065733591716, "epoch": 0.8576529070885172, "grad_norm": 0.7715162634849548, "learning_rate": 1.348198456072091e-05, "loss": 1.4212, "mean_token_accuracy": 0.6564840972423553, "num_tokens": 1307748629.0, "step": 7807 }, { "entropy": 1.7205755809942882, "epoch": 0.8577627639998902, "grad_norm": 0.68276047706604, "learning_rate": 1.3480445481344282e-05, "loss": 1.467, "mean_token_accuracy": 0.6505002329746882, "num_tokens": 1307912570.0, "step": 7808 }, { "entropy": 1.754823088645935, "epoch": 0.857872620911263, "grad_norm": 0.6763650178909302, "learning_rate": 1.3478906323462677e-05, "loss": 1.5182, "mean_token_accuracy": 0.6330088227987289, "num_tokens": 1308180580.0, "step": 7809 }, { "entropy": 1.722066303094228, "epoch": 0.857982477822636, "grad_norm": 0.7227879166603088, "learning_rate": 1.3477367087124801e-05, "loss": 1.3975, "mean_token_accuracy": 0.6556669274965922, "num_tokens": 1308325653.0, "step": 7810 }, { "entropy": 1.6812674701213837, "epoch": 0.8580923347340089, "grad_norm": 0.5938608050346375, "learning_rate": 1.3475827772379374e-05, "loss": 1.421, "mean_token_accuracy": 0.6548441002766291, "num_tokens": 1308530259.0, "step": 7811 }, { "entropy": 1.7212364772955577, "epoch": 0.8582021916453819, "grad_norm": 0.7832656502723694, "learning_rate": 1.3474288379275116e-05, "loss": 1.375, "mean_token_accuracy": 0.6673020124435425, "num_tokens": 1308688364.0, "step": 7812 }, { "entropy": 1.6957217554251354, "epoch": 0.8583120485567548, "grad_norm": 0.8879761695861816, "learning_rate": 1.3472748907860745e-05, "loss": 1.3826, "mean_token_accuracy": 0.6552250782648722, "num_tokens": 1308826808.0, "step": 7813 }, { "entropy": 1.6911301414171855, "epoch": 0.8584219054681278, "grad_norm": 0.7592836022377014, "learning_rate": 1.347120935818498e-05, "loss": 1.3259, "mean_token_accuracy": 0.6610475679238638, "num_tokens": 1308965132.0, "step": 7814 }, { "entropy": 1.6509084304173787, "epoch": 0.8585317623795007, "grad_norm": 0.6882309317588806, "learning_rate": 1.3469669730296558e-05, "loss": 1.5456, "mean_token_accuracy": 0.6430366585652033, "num_tokens": 1309185918.0, "step": 7815 }, { "entropy": 1.6446500718593597, "epoch": 0.8586416192908737, "grad_norm": 0.7358518838882446, "learning_rate": 1.34681300242442e-05, "loss": 1.277, "mean_token_accuracy": 0.6738730818033218, "num_tokens": 1309312422.0, "step": 7816 }, { "entropy": 1.7559974590937297, "epoch": 0.8587514762022466, "grad_norm": 0.682422399520874, "learning_rate": 1.346659024007664e-05, "loss": 1.3794, "mean_token_accuracy": 0.6634985208511353, "num_tokens": 1309441403.0, "step": 7817 }, { "entropy": 1.663654625415802, "epoch": 0.8588613331136196, "grad_norm": 0.6666421890258789, "learning_rate": 1.3465050377842608e-05, "loss": 1.4686, "mean_token_accuracy": 0.659173255165418, "num_tokens": 1309669132.0, "step": 7818 }, { "entropy": 1.6507892608642578, "epoch": 0.8589711900249924, "grad_norm": 0.6008228659629822, "learning_rate": 1.3463510437590846e-05, "loss": 1.4239, "mean_token_accuracy": 0.6523040185372034, "num_tokens": 1309852500.0, "step": 7819 }, { "entropy": 1.7078345616658528, "epoch": 0.8590810469363653, "grad_norm": 0.6348268985748291, "learning_rate": 1.3461970419370083e-05, "loss": 1.4027, "mean_token_accuracy": 0.6630667001008987, "num_tokens": 1310008578.0, "step": 7820 }, { "entropy": 1.7023847003777821, "epoch": 0.8591909038477383, "grad_norm": 0.8338757157325745, "learning_rate": 1.3460430323229071e-05, "loss": 1.3093, "mean_token_accuracy": 0.6611761053403219, "num_tokens": 1310151323.0, "step": 7821 }, { "entropy": 1.6935268541177113, "epoch": 0.8593007607591112, "grad_norm": 0.7700740098953247, "learning_rate": 1.3458890149216546e-05, "loss": 1.4202, "mean_token_accuracy": 0.6526497304439545, "num_tokens": 1310313628.0, "step": 7822 }, { "entropy": 1.7246541380882263, "epoch": 0.8594106176704842, "grad_norm": 0.6157558560371399, "learning_rate": 1.3457349897381256e-05, "loss": 1.2788, "mean_token_accuracy": 0.6698776682217916, "num_tokens": 1310453120.0, "step": 7823 }, { "entropy": 1.7128772636254628, "epoch": 0.8595204745818571, "grad_norm": 0.8066511750221252, "learning_rate": 1.345580956777195e-05, "loss": 1.3368, "mean_token_accuracy": 0.6596921930710474, "num_tokens": 1310607539.0, "step": 7824 }, { "entropy": 1.7081545094648998, "epoch": 0.8596303314932301, "grad_norm": 0.754356324672699, "learning_rate": 1.3454269160437377e-05, "loss": 1.4662, "mean_token_accuracy": 0.638428787390391, "num_tokens": 1310768768.0, "step": 7825 }, { "entropy": 1.6387466490268707, "epoch": 0.859740188404603, "grad_norm": 0.6813954710960388, "learning_rate": 1.345272867542629e-05, "loss": 1.2403, "mean_token_accuracy": 0.6748148997624716, "num_tokens": 1310885892.0, "step": 7826 }, { "entropy": 1.7912492752075195, "epoch": 0.859850045315976, "grad_norm": 0.7757691144943237, "learning_rate": 1.3451188112787446e-05, "loss": 1.3154, "mean_token_accuracy": 0.6780353983243307, "num_tokens": 1311037679.0, "step": 7827 }, { "entropy": 1.6761020123958588, "epoch": 0.8599599022273489, "grad_norm": 0.8084965348243713, "learning_rate": 1.3449647472569603e-05, "loss": 1.3014, "mean_token_accuracy": 0.6743810077508291, "num_tokens": 1311198055.0, "step": 7828 }, { "entropy": 1.708401362101237, "epoch": 0.8600697591387219, "grad_norm": 0.6399450898170471, "learning_rate": 1.344810675482152e-05, "loss": 1.2311, "mean_token_accuracy": 0.6786706000566483, "num_tokens": 1311322715.0, "step": 7829 }, { "entropy": 1.662269651889801, "epoch": 0.8601796160500947, "grad_norm": 0.6289361715316772, "learning_rate": 1.3446565959591963e-05, "loss": 1.2845, "mean_token_accuracy": 0.6679496963818868, "num_tokens": 1311461506.0, "step": 7830 }, { "entropy": 1.682697872320811, "epoch": 0.8602894729614677, "grad_norm": 0.613720715045929, "learning_rate": 1.3445025086929698e-05, "loss": 1.4083, "mean_token_accuracy": 0.65741033355395, "num_tokens": 1311626696.0, "step": 7831 }, { "entropy": 1.7680631478627522, "epoch": 0.8603993298728406, "grad_norm": 0.7231320142745972, "learning_rate": 1.3443484136883486e-05, "loss": 1.3911, "mean_token_accuracy": 0.6568551162878672, "num_tokens": 1311757726.0, "step": 7832 }, { "entropy": 1.722759485244751, "epoch": 0.8605091867842135, "grad_norm": 0.8545400500297546, "learning_rate": 1.3441943109502105e-05, "loss": 1.2789, "mean_token_accuracy": 0.6633422871430715, "num_tokens": 1311870074.0, "step": 7833 }, { "entropy": 1.7299232880274455, "epoch": 0.8606190436955865, "grad_norm": 0.765442430973053, "learning_rate": 1.3440402004834323e-05, "loss": 1.5995, "mean_token_accuracy": 0.633381262421608, "num_tokens": 1312052301.0, "step": 7834 }, { "entropy": 1.7414843638737996, "epoch": 0.8607289006069594, "grad_norm": 0.72737717628479, "learning_rate": 1.343886082292892e-05, "loss": 1.4679, "mean_token_accuracy": 0.6508069137732188, "num_tokens": 1312225655.0, "step": 7835 }, { "entropy": 1.7260840733846028, "epoch": 0.8608387575183324, "grad_norm": 0.7150377035140991, "learning_rate": 1.343731956383467e-05, "loss": 1.5002, "mean_token_accuracy": 0.6485694497823715, "num_tokens": 1312371137.0, "step": 7836 }, { "entropy": 1.7633921603361766, "epoch": 0.8609486144297053, "grad_norm": 0.7518701553344727, "learning_rate": 1.3435778227600354e-05, "loss": 1.4145, "mean_token_accuracy": 0.6541777650515238, "num_tokens": 1312486083.0, "step": 7837 }, { "entropy": 1.7039452989896138, "epoch": 0.8610584713410783, "grad_norm": 0.744445264339447, "learning_rate": 1.3434236814274752e-05, "loss": 1.3822, "mean_token_accuracy": 0.6768287618954977, "num_tokens": 1312646037.0, "step": 7838 }, { "entropy": 1.691188375155131, "epoch": 0.8611683282524512, "grad_norm": 0.6668843030929565, "learning_rate": 1.3432695323906657e-05, "loss": 1.3382, "mean_token_accuracy": 0.66710098584493, "num_tokens": 1312772973.0, "step": 7839 }, { "entropy": 1.7686751286188762, "epoch": 0.8612781851638242, "grad_norm": 0.8265035152435303, "learning_rate": 1.3431153756544849e-05, "loss": 1.4093, "mean_token_accuracy": 0.654551645119985, "num_tokens": 1312935895.0, "step": 7840 }, { "entropy": 1.7481578489144642, "epoch": 0.861388042075197, "grad_norm": 0.6910483241081238, "learning_rate": 1.3429612112238119e-05, "loss": 1.4032, "mean_token_accuracy": 0.6643613328536352, "num_tokens": 1313100695.0, "step": 7841 }, { "entropy": 1.7362829943497975, "epoch": 0.86149789898657, "grad_norm": 0.7029606699943542, "learning_rate": 1.342807039103526e-05, "loss": 1.4679, "mean_token_accuracy": 0.6628567526737849, "num_tokens": 1313272040.0, "step": 7842 }, { "entropy": 1.756723831097285, "epoch": 0.8616077558979429, "grad_norm": 3.727766513824463, "learning_rate": 1.3426528592985068e-05, "loss": 1.2346, "mean_token_accuracy": 0.6736096292734146, "num_tokens": 1313460210.0, "step": 7843 }, { "entropy": 1.678599238395691, "epoch": 0.8617176128093159, "grad_norm": 0.5941556692123413, "learning_rate": 1.342498671813634e-05, "loss": 1.423, "mean_token_accuracy": 0.6411783198515574, "num_tokens": 1313628236.0, "step": 7844 }, { "entropy": 1.6739432116349537, "epoch": 0.8618274697206888, "grad_norm": 0.6052295565605164, "learning_rate": 1.3423444766537874e-05, "loss": 1.3497, "mean_token_accuracy": 0.6628371526797613, "num_tokens": 1313794912.0, "step": 7845 }, { "entropy": 1.7657102247079213, "epoch": 0.8619373266320617, "grad_norm": 0.6851087212562561, "learning_rate": 1.3421902738238473e-05, "loss": 1.5192, "mean_token_accuracy": 0.6398663818836212, "num_tokens": 1313957418.0, "step": 7846 }, { "entropy": 1.7496927479902904, "epoch": 0.8620471835434347, "grad_norm": 0.675603449344635, "learning_rate": 1.3420360633286944e-05, "loss": 1.3619, "mean_token_accuracy": 0.6570636580387751, "num_tokens": 1314089818.0, "step": 7847 }, { "entropy": 1.7162054479122162, "epoch": 0.8621570404548076, "grad_norm": 0.8953336477279663, "learning_rate": 1.3418818451732087e-05, "loss": 1.4036, "mean_token_accuracy": 0.6674930676817894, "num_tokens": 1314213824.0, "step": 7848 }, { "entropy": 1.6898426910241444, "epoch": 0.8622668973661806, "grad_norm": 0.721627414226532, "learning_rate": 1.3417276193622721e-05, "loss": 1.5284, "mean_token_accuracy": 0.6341640055179596, "num_tokens": 1314457495.0, "step": 7849 }, { "entropy": 1.7005638281504314, "epoch": 0.8623767542775534, "grad_norm": 0.8751857876777649, "learning_rate": 1.3415733859007652e-05, "loss": 1.1988, "mean_token_accuracy": 0.6919720123211542, "num_tokens": 1314566939.0, "step": 7850 }, { "entropy": 1.7131429314613342, "epoch": 0.8624866111889264, "grad_norm": 0.7577322125434875, "learning_rate": 1.3414191447935695e-05, "loss": 1.3949, "mean_token_accuracy": 0.6738038708766302, "num_tokens": 1314734429.0, "step": 7851 }, { "entropy": 1.6478227376937866, "epoch": 0.8625964681002993, "grad_norm": 0.6248055696487427, "learning_rate": 1.341264896045566e-05, "loss": 1.4491, "mean_token_accuracy": 0.6442533234755198, "num_tokens": 1314953465.0, "step": 7852 }, { "entropy": 1.7321637471516926, "epoch": 0.8627063250116723, "grad_norm": 0.7030457258224487, "learning_rate": 1.3411106396616382e-05, "loss": 1.3662, "mean_token_accuracy": 0.6582097162803014, "num_tokens": 1315141404.0, "step": 7853 }, { "entropy": 1.7090543111165364, "epoch": 0.8628161819230452, "grad_norm": 0.6939349174499512, "learning_rate": 1.3409563756466667e-05, "loss": 1.4836, "mean_token_accuracy": 0.63978943725427, "num_tokens": 1315296574.0, "step": 7854 }, { "entropy": 1.7191306352615356, "epoch": 0.8629260388344182, "grad_norm": 0.654860258102417, "learning_rate": 1.3408021040055348e-05, "loss": 1.2846, "mean_token_accuracy": 0.6714579413334528, "num_tokens": 1315419882.0, "step": 7855 }, { "entropy": 1.6892946660518646, "epoch": 0.8630358957457911, "grad_norm": 0.7134132385253906, "learning_rate": 1.3406478247431246e-05, "loss": 1.4268, "mean_token_accuracy": 0.6599131226539612, "num_tokens": 1315635983.0, "step": 7856 }, { "entropy": 1.7296896080176036, "epoch": 0.8631457526571641, "grad_norm": 0.7645989656448364, "learning_rate": 1.340493537864319e-05, "loss": 1.3553, "mean_token_accuracy": 0.6631773064533869, "num_tokens": 1315842488.0, "step": 7857 }, { "entropy": 1.7573328018188477, "epoch": 0.863255609568537, "grad_norm": 0.6863840222358704, "learning_rate": 1.3403392433740017e-05, "loss": 1.406, "mean_token_accuracy": 0.6623306075731913, "num_tokens": 1316019789.0, "step": 7858 }, { "entropy": 1.680842439333598, "epoch": 0.86336546647991, "grad_norm": 0.6738454699516296, "learning_rate": 1.3401849412770556e-05, "loss": 1.3635, "mean_token_accuracy": 0.6600955078999201, "num_tokens": 1316184981.0, "step": 7859 }, { "entropy": 1.7413969735304515, "epoch": 0.8634753233912829, "grad_norm": 0.7007496953010559, "learning_rate": 1.3400306315783641e-05, "loss": 1.4063, "mean_token_accuracy": 0.6600519170363744, "num_tokens": 1316341745.0, "step": 7860 }, { "entropy": 1.6591811577479045, "epoch": 0.8635851803026557, "grad_norm": 0.711081326007843, "learning_rate": 1.3398763142828115e-05, "loss": 1.33, "mean_token_accuracy": 0.6715270678202311, "num_tokens": 1316453593.0, "step": 7861 }, { "entropy": 1.7344895700613658, "epoch": 0.8636950372140287, "grad_norm": 0.6897302865982056, "learning_rate": 1.3397219893952816e-05, "loss": 1.3221, "mean_token_accuracy": 0.6635162134965261, "num_tokens": 1316620759.0, "step": 7862 }, { "entropy": 1.7110367218653362, "epoch": 0.8638048941254016, "grad_norm": 0.7375456690788269, "learning_rate": 1.3395676569206587e-05, "loss": 1.3048, "mean_token_accuracy": 0.6686635613441467, "num_tokens": 1316744902.0, "step": 7863 }, { "entropy": 1.7058011094729106, "epoch": 0.8639147510367746, "grad_norm": 0.7151663303375244, "learning_rate": 1.3394133168638274e-05, "loss": 1.3693, "mean_token_accuracy": 0.6559457530577978, "num_tokens": 1316872149.0, "step": 7864 }, { "entropy": 1.6610862414042156, "epoch": 0.8640246079481475, "grad_norm": 0.660163402557373, "learning_rate": 1.3392589692296727e-05, "loss": 1.3023, "mean_token_accuracy": 0.6779667536417643, "num_tokens": 1317040405.0, "step": 7865 }, { "entropy": 1.7064382135868073, "epoch": 0.8641344648595205, "grad_norm": 0.7105300426483154, "learning_rate": 1.3391046140230792e-05, "loss": 1.2392, "mean_token_accuracy": 0.6803303956985474, "num_tokens": 1317174030.0, "step": 7866 }, { "entropy": 1.7627345124880474, "epoch": 0.8642443217708934, "grad_norm": 0.7074387073516846, "learning_rate": 1.3389502512489326e-05, "loss": 1.5413, "mean_token_accuracy": 0.6326200217008591, "num_tokens": 1317350387.0, "step": 7867 }, { "entropy": 1.7101947367191315, "epoch": 0.8643541786822664, "grad_norm": 0.6427745819091797, "learning_rate": 1.3387958809121177e-05, "loss": 1.3858, "mean_token_accuracy": 0.6644566704829534, "num_tokens": 1317502704.0, "step": 7868 }, { "entropy": 1.6640800833702087, "epoch": 0.8644640355936393, "grad_norm": 0.618799090385437, "learning_rate": 1.3386415030175212e-05, "loss": 1.4887, "mean_token_accuracy": 0.6454523503780365, "num_tokens": 1317730335.0, "step": 7869 }, { "entropy": 1.7683025399843852, "epoch": 0.8645738925050123, "grad_norm": 0.6431897282600403, "learning_rate": 1.3384871175700287e-05, "loss": 1.4189, "mean_token_accuracy": 0.6522246897220612, "num_tokens": 1317876056.0, "step": 7870 }, { "entropy": 1.6496396660804749, "epoch": 0.8646837494163852, "grad_norm": 0.6853657364845276, "learning_rate": 1.3383327245745266e-05, "loss": 1.3756, "mean_token_accuracy": 0.6567689329385757, "num_tokens": 1318062538.0, "step": 7871 }, { "entropy": 1.6585274438063304, "epoch": 0.8647936063277581, "grad_norm": 0.5792921185493469, "learning_rate": 1.3381783240359007e-05, "loss": 1.427, "mean_token_accuracy": 0.6579019178946813, "num_tokens": 1318242979.0, "step": 7872 }, { "entropy": 1.7485062181949615, "epoch": 0.864903463239131, "grad_norm": 1.0194803476333618, "learning_rate": 1.3380239159590385e-05, "loss": 1.7003, "mean_token_accuracy": 0.648332287867864, "num_tokens": 1318393747.0, "step": 7873 }, { "entropy": 1.6931703289349873, "epoch": 0.8650133201505039, "grad_norm": 0.665524423122406, "learning_rate": 1.3378695003488264e-05, "loss": 1.3505, "mean_token_accuracy": 0.6695401221513748, "num_tokens": 1318562851.0, "step": 7874 }, { "entropy": 1.7432339489459991, "epoch": 0.8651231770618769, "grad_norm": 0.70815509557724, "learning_rate": 1.3377150772101517e-05, "loss": 1.4095, "mean_token_accuracy": 0.6534250229597092, "num_tokens": 1318737443.0, "step": 7875 }, { "entropy": 1.6439895927906036, "epoch": 0.8652330339732498, "grad_norm": 0.6177237629890442, "learning_rate": 1.3375606465479024e-05, "loss": 1.3875, "mean_token_accuracy": 0.6528783192237219, "num_tokens": 1318906562.0, "step": 7876 }, { "entropy": 1.672441154718399, "epoch": 0.8653428908846228, "grad_norm": 0.6379650235176086, "learning_rate": 1.3374062083669653e-05, "loss": 1.3043, "mean_token_accuracy": 0.6697641412417094, "num_tokens": 1319050646.0, "step": 7877 }, { "entropy": 1.668338378270467, "epoch": 0.8654527477959957, "grad_norm": 0.7436346411705017, "learning_rate": 1.3372517626722288e-05, "loss": 1.3871, "mean_token_accuracy": 0.6683712204297384, "num_tokens": 1319219239.0, "step": 7878 }, { "entropy": 1.642588605483373, "epoch": 0.8655626047073687, "grad_norm": 0.7161867022514343, "learning_rate": 1.3370973094685809e-05, "loss": 1.4992, "mean_token_accuracy": 0.6446088055769602, "num_tokens": 1319395389.0, "step": 7879 }, { "entropy": 1.639831284681956, "epoch": 0.8656724616187416, "grad_norm": 0.6735728979110718, "learning_rate": 1.33694284876091e-05, "loss": 1.3289, "mean_token_accuracy": 0.6666930864254633, "num_tokens": 1319561485.0, "step": 7880 }, { "entropy": 1.6760977109273274, "epoch": 0.8657823185301146, "grad_norm": 0.5775339007377625, "learning_rate": 1.3367883805541048e-05, "loss": 1.4283, "mean_token_accuracy": 0.6341107288996378, "num_tokens": 1319788535.0, "step": 7881 }, { "entropy": 1.6838768422603607, "epoch": 0.8658921754414874, "grad_norm": 0.8022451400756836, "learning_rate": 1.3366339048530537e-05, "loss": 1.6205, "mean_token_accuracy": 0.6297398805618286, "num_tokens": 1320001016.0, "step": 7882 }, { "entropy": 1.636765331029892, "epoch": 0.8660020323528604, "grad_norm": 0.6422226428985596, "learning_rate": 1.3364794216626467e-05, "loss": 1.3239, "mean_token_accuracy": 0.6695540249347687, "num_tokens": 1320153921.0, "step": 7883 }, { "entropy": 1.7224363684654236, "epoch": 0.8661118892642333, "grad_norm": 0.6930742859840393, "learning_rate": 1.3363249309877719e-05, "loss": 1.373, "mean_token_accuracy": 0.6712667942047119, "num_tokens": 1320321110.0, "step": 7884 }, { "entropy": 1.6932853261629741, "epoch": 0.8662217461756063, "grad_norm": 0.771900475025177, "learning_rate": 1.3361704328333198e-05, "loss": 1.4568, "mean_token_accuracy": 0.6484440217415491, "num_tokens": 1320473455.0, "step": 7885 }, { "entropy": 1.7034966945648193, "epoch": 0.8663316030869792, "grad_norm": 0.6940920352935791, "learning_rate": 1.3360159272041801e-05, "loss": 1.394, "mean_token_accuracy": 0.6586278776327769, "num_tokens": 1320651894.0, "step": 7886 }, { "entropy": 1.701320121685664, "epoch": 0.8664414599983521, "grad_norm": 0.614683985710144, "learning_rate": 1.3358614141052429e-05, "loss": 1.4261, "mean_token_accuracy": 0.6411514133214951, "num_tokens": 1320841977.0, "step": 7887 }, { "entropy": 1.6840573747952778, "epoch": 0.8665513169097251, "grad_norm": 0.7097548842430115, "learning_rate": 1.3357068935413975e-05, "loss": 1.4517, "mean_token_accuracy": 0.6532600124677023, "num_tokens": 1321017646.0, "step": 7888 }, { "entropy": 1.7255522906780243, "epoch": 0.866661173821098, "grad_norm": 0.7355175614356995, "learning_rate": 1.3355523655175357e-05, "loss": 1.2392, "mean_token_accuracy": 0.682395468155543, "num_tokens": 1321169484.0, "step": 7889 }, { "entropy": 1.6489202578862507, "epoch": 0.866771030732471, "grad_norm": 0.6251848936080933, "learning_rate": 1.3353978300385472e-05, "loss": 1.2982, "mean_token_accuracy": 0.6671447803576788, "num_tokens": 1321319826.0, "step": 7890 }, { "entropy": 1.6924934685230255, "epoch": 0.8668808876438439, "grad_norm": 0.6230257749557495, "learning_rate": 1.3352432871093239e-05, "loss": 1.3966, "mean_token_accuracy": 0.6617010831832886, "num_tokens": 1321482330.0, "step": 7891 }, { "entropy": 1.7230217059453328, "epoch": 0.8669907445552169, "grad_norm": 0.6096069812774658, "learning_rate": 1.3350887367347565e-05, "loss": 1.5194, "mean_token_accuracy": 0.6552851547797521, "num_tokens": 1321660671.0, "step": 7892 }, { "entropy": 1.6864555577437084, "epoch": 0.8671006014665897, "grad_norm": 0.604369044303894, "learning_rate": 1.3349341789197365e-05, "loss": 1.3665, "mean_token_accuracy": 0.6564571112394333, "num_tokens": 1321849446.0, "step": 7893 }, { "entropy": 1.7439285119374592, "epoch": 0.8672104583779627, "grad_norm": 0.802845299243927, "learning_rate": 1.3347796136691553e-05, "loss": 1.4116, "mean_token_accuracy": 0.6643748581409454, "num_tokens": 1322003294.0, "step": 7894 }, { "entropy": 1.6520594159762065, "epoch": 0.8673203152893356, "grad_norm": 1.0191586017608643, "learning_rate": 1.3346250409879056e-05, "loss": 1.4608, "mean_token_accuracy": 0.6599269956350327, "num_tokens": 1322132928.0, "step": 7895 }, { "entropy": 1.6756224830945332, "epoch": 0.8674301722007086, "grad_norm": 0.5737661123275757, "learning_rate": 1.3344704608808787e-05, "loss": 0.9707, "mean_token_accuracy": 0.696823646624883, "num_tokens": 1322293646.0, "step": 7896 }, { "entropy": 1.720400442679723, "epoch": 0.8675400291120815, "grad_norm": 0.7352355122566223, "learning_rate": 1.3343158733529673e-05, "loss": 1.3443, "mean_token_accuracy": 0.6619482586781184, "num_tokens": 1322472406.0, "step": 7897 }, { "entropy": 1.726184109846751, "epoch": 0.8676498860234545, "grad_norm": 0.7784338593482971, "learning_rate": 1.3341612784090643e-05, "loss": 1.2061, "mean_token_accuracy": 0.683728352189064, "num_tokens": 1322582080.0, "step": 7898 }, { "entropy": 1.754507710536321, "epoch": 0.8677597429348274, "grad_norm": 0.8482814431190491, "learning_rate": 1.3340066760540624e-05, "loss": 1.4338, "mean_token_accuracy": 0.6534205625454584, "num_tokens": 1322768188.0, "step": 7899 }, { "entropy": 1.7251827617486317, "epoch": 0.8678695998462004, "grad_norm": 0.6221253871917725, "learning_rate": 1.3338520662928545e-05, "loss": 1.525, "mean_token_accuracy": 0.6393624295790991, "num_tokens": 1322947115.0, "step": 7900 }, { "entropy": 1.673731615146001, "epoch": 0.8679794567575733, "grad_norm": 0.748742938041687, "learning_rate": 1.3336974491303343e-05, "loss": 1.2827, "mean_token_accuracy": 0.6780743896961212, "num_tokens": 1323119216.0, "step": 7901 }, { "entropy": 1.6825636823972066, "epoch": 0.8680893136689461, "grad_norm": 0.5840120911598206, "learning_rate": 1.3335428245713949e-05, "loss": 1.2393, "mean_token_accuracy": 0.6743812263011932, "num_tokens": 1323274126.0, "step": 7902 }, { "entropy": 1.7099784115950267, "epoch": 0.8681991705803191, "grad_norm": 0.6230765581130981, "learning_rate": 1.3333881926209304e-05, "loss": 1.5362, "mean_token_accuracy": 0.6441142161687216, "num_tokens": 1323465046.0, "step": 7903 }, { "entropy": 1.7780559460322063, "epoch": 0.868309027491692, "grad_norm": 0.778548538684845, "learning_rate": 1.3332335532838347e-05, "loss": 1.2601, "mean_token_accuracy": 0.6791570882002512, "num_tokens": 1323603989.0, "step": 7904 }, { "entropy": 1.6974503993988037, "epoch": 0.868418884403065, "grad_norm": 0.771642804145813, "learning_rate": 1.3330789065650025e-05, "loss": 1.4817, "mean_token_accuracy": 0.6580607742071152, "num_tokens": 1323792589.0, "step": 7905 }, { "entropy": 1.7237968544165294, "epoch": 0.8685287413144379, "grad_norm": 0.674707293510437, "learning_rate": 1.3329242524693278e-05, "loss": 1.3211, "mean_token_accuracy": 0.657488743464152, "num_tokens": 1323964176.0, "step": 7906 }, { "entropy": 1.68796439965566, "epoch": 0.8686385982258109, "grad_norm": 0.6906165480613708, "learning_rate": 1.3327695910017051e-05, "loss": 1.3029, "mean_token_accuracy": 0.6650421073039373, "num_tokens": 1324089399.0, "step": 7907 }, { "entropy": 1.6761383811632793, "epoch": 0.8687484551371838, "grad_norm": 0.6780588626861572, "learning_rate": 1.33261492216703e-05, "loss": 1.3755, "mean_token_accuracy": 0.6589695960283279, "num_tokens": 1324248925.0, "step": 7908 }, { "entropy": 1.6557303369045258, "epoch": 0.8688583120485568, "grad_norm": 0.6261764764785767, "learning_rate": 1.3324602459701973e-05, "loss": 1.3346, "mean_token_accuracy": 0.6587485869725546, "num_tokens": 1324440175.0, "step": 7909 }, { "entropy": 1.700795332590739, "epoch": 0.8689681689599297, "grad_norm": 0.6909230351448059, "learning_rate": 1.332305562416103e-05, "loss": 1.3137, "mean_token_accuracy": 0.6629070142904917, "num_tokens": 1324596366.0, "step": 7910 }, { "entropy": 1.7026448547840118, "epoch": 0.8690780258713027, "grad_norm": 0.6621904373168945, "learning_rate": 1.3321508715096418e-05, "loss": 1.3886, "mean_token_accuracy": 0.6658767014741898, "num_tokens": 1324743986.0, "step": 7911 }, { "entropy": 1.7210921943187714, "epoch": 0.8691878827826756, "grad_norm": 0.660092294216156, "learning_rate": 1.3319961732557105e-05, "loss": 1.3884, "mean_token_accuracy": 0.6560011406739553, "num_tokens": 1324875317.0, "step": 7912 }, { "entropy": 1.6727829774220784, "epoch": 0.8692977396940486, "grad_norm": 0.6397646069526672, "learning_rate": 1.3318414676592047e-05, "loss": 1.3876, "mean_token_accuracy": 0.6527626812458038, "num_tokens": 1325061131.0, "step": 7913 }, { "entropy": 1.671280860900879, "epoch": 0.8694075966054214, "grad_norm": 0.6057349443435669, "learning_rate": 1.3316867547250207e-05, "loss": 1.3217, "mean_token_accuracy": 0.6597979366779327, "num_tokens": 1325235395.0, "step": 7914 }, { "entropy": 1.7142386734485626, "epoch": 0.8695174535167943, "grad_norm": 0.6590214967727661, "learning_rate": 1.3315320344580556e-05, "loss": 1.3788, "mean_token_accuracy": 0.6602257490158081, "num_tokens": 1325394926.0, "step": 7915 }, { "entropy": 1.687685916821162, "epoch": 0.8696273104281673, "grad_norm": 0.8642633557319641, "learning_rate": 1.3313773068632058e-05, "loss": 1.2531, "mean_token_accuracy": 0.6789219677448273, "num_tokens": 1325582463.0, "step": 7916 }, { "entropy": 1.6729052861531575, "epoch": 0.8697371673395402, "grad_norm": 0.6398195028305054, "learning_rate": 1.3312225719453688e-05, "loss": 1.386, "mean_token_accuracy": 0.6651468873023987, "num_tokens": 1325770243.0, "step": 7917 }, { "entropy": 1.6908937791983287, "epoch": 0.8698470242509132, "grad_norm": 0.5825358033180237, "learning_rate": 1.3310678297094412e-05, "loss": 1.4217, "mean_token_accuracy": 0.658950557311376, "num_tokens": 1325994900.0, "step": 7918 }, { "entropy": 1.717313547929128, "epoch": 0.8699568811622861, "grad_norm": 0.6195710897445679, "learning_rate": 1.3309130801603209e-05, "loss": 1.4411, "mean_token_accuracy": 0.651375080148379, "num_tokens": 1326175599.0, "step": 7919 }, { "entropy": 1.7254052360852559, "epoch": 0.8700667380736591, "grad_norm": 0.6985616087913513, "learning_rate": 1.330758323302906e-05, "loss": 1.5164, "mean_token_accuracy": 0.6510032018025717, "num_tokens": 1326384995.0, "step": 7920 }, { "entropy": 1.7191411058108013, "epoch": 0.870176594985032, "grad_norm": 0.7221682667732239, "learning_rate": 1.330603559142094e-05, "loss": 1.593, "mean_token_accuracy": 0.6362573454777399, "num_tokens": 1326565564.0, "step": 7921 }, { "entropy": 1.6968744397163391, "epoch": 0.870286451896405, "grad_norm": 0.6443734765052795, "learning_rate": 1.3304487876827831e-05, "loss": 1.418, "mean_token_accuracy": 0.6536405632893244, "num_tokens": 1326738893.0, "step": 7922 }, { "entropy": 1.71379288037618, "epoch": 0.8703963088077779, "grad_norm": 0.6184552311897278, "learning_rate": 1.3302940089298722e-05, "loss": 1.3953, "mean_token_accuracy": 0.661503846446673, "num_tokens": 1326894910.0, "step": 7923 }, { "entropy": 1.7957302431265514, "epoch": 0.8705061657191508, "grad_norm": 0.6591717600822449, "learning_rate": 1.3301392228882598e-05, "loss": 1.5458, "mean_token_accuracy": 0.6335903803507487, "num_tokens": 1327137563.0, "step": 7924 }, { "entropy": 1.6927340527375538, "epoch": 0.8706160226305237, "grad_norm": 0.7157540917396545, "learning_rate": 1.3299844295628442e-05, "loss": 1.4384, "mean_token_accuracy": 0.6635022660096487, "num_tokens": 1327287689.0, "step": 7925 }, { "entropy": 1.6452916463216145, "epoch": 0.8707258795418967, "grad_norm": 1.110607624053955, "learning_rate": 1.3298296289585254e-05, "loss": 1.2155, "mean_token_accuracy": 0.6726205994685491, "num_tokens": 1327496091.0, "step": 7926 }, { "entropy": 1.6707601845264435, "epoch": 0.8708357364532696, "grad_norm": 1.3014352321624756, "learning_rate": 1.3296748210802022e-05, "loss": 1.361, "mean_token_accuracy": 0.6518186579147974, "num_tokens": 1327691641.0, "step": 7927 }, { "entropy": 1.6890023251374562, "epoch": 0.8709455933646425, "grad_norm": 0.759329080581665, "learning_rate": 1.3295200059327744e-05, "loss": 1.2898, "mean_token_accuracy": 0.6639624188343684, "num_tokens": 1327858295.0, "step": 7928 }, { "entropy": 1.765195220708847, "epoch": 0.8710554502760155, "grad_norm": 0.8240900039672852, "learning_rate": 1.329365183521142e-05, "loss": 1.6451, "mean_token_accuracy": 0.6218532472848892, "num_tokens": 1328056850.0, "step": 7929 }, { "entropy": 1.7044878403345745, "epoch": 0.8711653071873884, "grad_norm": 0.7272043824195862, "learning_rate": 1.3292103538502048e-05, "loss": 1.4744, "mean_token_accuracy": 0.6585461994012197, "num_tokens": 1328221991.0, "step": 7930 }, { "entropy": 1.6718167662620544, "epoch": 0.8712751640987614, "grad_norm": 0.7868055701255798, "learning_rate": 1.3290555169248631e-05, "loss": 1.2644, "mean_token_accuracy": 0.683634286125501, "num_tokens": 1328351299.0, "step": 7931 }, { "entropy": 1.7744904160499573, "epoch": 0.8713850210101343, "grad_norm": 1.1038771867752075, "learning_rate": 1.3289006727500179e-05, "loss": 1.5812, "mean_token_accuracy": 0.6369357059399287, "num_tokens": 1328547843.0, "step": 7932 }, { "entropy": 1.731363942225774, "epoch": 0.8714948779215073, "grad_norm": 0.6608824729919434, "learning_rate": 1.3287458213305693e-05, "loss": 1.3958, "mean_token_accuracy": 0.6688510825236639, "num_tokens": 1328733249.0, "step": 7933 }, { "entropy": 1.6438096066315968, "epoch": 0.8716047348328801, "grad_norm": 0.6863547563552856, "learning_rate": 1.3285909626714184e-05, "loss": 1.394, "mean_token_accuracy": 0.6732639074325562, "num_tokens": 1328944621.0, "step": 7934 }, { "entropy": 1.752169926961263, "epoch": 0.8717145917442531, "grad_norm": 0.7345601320266724, "learning_rate": 1.3284360967774668e-05, "loss": 1.5279, "mean_token_accuracy": 0.639055406053861, "num_tokens": 1329117591.0, "step": 7935 }, { "entropy": 1.7430291771888733, "epoch": 0.871824448655626, "grad_norm": 0.684219241142273, "learning_rate": 1.3282812236536153e-05, "loss": 1.2381, "mean_token_accuracy": 0.6767359425624212, "num_tokens": 1329257421.0, "step": 7936 }, { "entropy": 1.7502950926621754, "epoch": 0.871934305566999, "grad_norm": 0.6932651996612549, "learning_rate": 1.328126343304766e-05, "loss": 1.4658, "mean_token_accuracy": 0.6520059059063593, "num_tokens": 1329399517.0, "step": 7937 }, { "entropy": 1.6875435908635457, "epoch": 0.8720441624783719, "grad_norm": 0.7229261994361877, "learning_rate": 1.3279714557358207e-05, "loss": 1.2595, "mean_token_accuracy": 0.678600956996282, "num_tokens": 1329537962.0, "step": 7938 }, { "entropy": 1.6786061922709148, "epoch": 0.8721540193897449, "grad_norm": 0.707375168800354, "learning_rate": 1.327816560951682e-05, "loss": 1.3806, "mean_token_accuracy": 0.679164374868075, "num_tokens": 1329678550.0, "step": 7939 }, { "entropy": 1.695389598608017, "epoch": 0.8722638763011178, "grad_norm": 0.7458509206771851, "learning_rate": 1.3276616589572516e-05, "loss": 1.3762, "mean_token_accuracy": 0.646119033296903, "num_tokens": 1329866491.0, "step": 7940 }, { "entropy": 1.7952686448891957, "epoch": 0.8723737332124907, "grad_norm": 0.9156729578971863, "learning_rate": 1.3275067497574323e-05, "loss": 1.4037, "mean_token_accuracy": 0.6545374641815821, "num_tokens": 1330008220.0, "step": 7941 }, { "entropy": 1.704797516266505, "epoch": 0.8724835901238637, "grad_norm": 0.7488060593605042, "learning_rate": 1.3273518333571267e-05, "loss": 1.4559, "mean_token_accuracy": 0.6578322052955627, "num_tokens": 1330177595.0, "step": 7942 }, { "entropy": 1.7436169187227886, "epoch": 0.8725934470352366, "grad_norm": 0.6806449294090271, "learning_rate": 1.3271969097612381e-05, "loss": 1.5533, "mean_token_accuracy": 0.6280863881111145, "num_tokens": 1330355286.0, "step": 7943 }, { "entropy": 1.7313541571299236, "epoch": 0.8727033039466096, "grad_norm": 0.7117105722427368, "learning_rate": 1.3270419789746696e-05, "loss": 1.4541, "mean_token_accuracy": 0.6451993534962336, "num_tokens": 1330539558.0, "step": 7944 }, { "entropy": 1.8234418034553528, "epoch": 0.8728131608579824, "grad_norm": 0.8532108068466187, "learning_rate": 1.326887041002325e-05, "loss": 1.5098, "mean_token_accuracy": 0.6385622421900431, "num_tokens": 1330714292.0, "step": 7945 }, { "entropy": 1.7574726343154907, "epoch": 0.8729230177693554, "grad_norm": 0.7637962102890015, "learning_rate": 1.3267320958491078e-05, "loss": 1.299, "mean_token_accuracy": 0.6635063340266546, "num_tokens": 1330836527.0, "step": 7946 }, { "entropy": 1.7145869135856628, "epoch": 0.8730328746807283, "grad_norm": 0.6418426036834717, "learning_rate": 1.3265771435199214e-05, "loss": 1.3584, "mean_token_accuracy": 0.6642368783553442, "num_tokens": 1330995034.0, "step": 7947 }, { "entropy": 1.7334729234377544, "epoch": 0.8731427315921013, "grad_norm": 0.7519394159317017, "learning_rate": 1.3264221840196712e-05, "loss": 1.3698, "mean_token_accuracy": 0.6525163898865382, "num_tokens": 1331131359.0, "step": 7948 }, { "entropy": 1.720908761024475, "epoch": 0.8732525885034742, "grad_norm": 0.7024477124214172, "learning_rate": 1.3262672173532607e-05, "loss": 1.3505, "mean_token_accuracy": 0.6521359930435816, "num_tokens": 1331265467.0, "step": 7949 }, { "entropy": 1.6730608840783436, "epoch": 0.8733624454148472, "grad_norm": 0.6420222520828247, "learning_rate": 1.3261122435255946e-05, "loss": 1.338, "mean_token_accuracy": 0.6632480919361115, "num_tokens": 1331421892.0, "step": 7950 }, { "entropy": 1.7371685206890106, "epoch": 0.8734723023262201, "grad_norm": 0.7718887329101562, "learning_rate": 1.3259572625415778e-05, "loss": 1.3936, "mean_token_accuracy": 0.65486179292202, "num_tokens": 1331550141.0, "step": 7951 }, { "entropy": 1.6655554672082264, "epoch": 0.8735821592375931, "grad_norm": 0.646058201789856, "learning_rate": 1.3258022744061157e-05, "loss": 1.3398, "mean_token_accuracy": 0.6610534985860189, "num_tokens": 1331802988.0, "step": 7952 }, { "entropy": 1.7078120807806652, "epoch": 0.873692016148966, "grad_norm": 0.8628278374671936, "learning_rate": 1.3256472791241131e-05, "loss": 1.4658, "mean_token_accuracy": 0.6470666378736496, "num_tokens": 1331979281.0, "step": 7953 }, { "entropy": 1.7339671850204468, "epoch": 0.873801873060339, "grad_norm": 0.8818228244781494, "learning_rate": 1.3254922767004759e-05, "loss": 1.2851, "mean_token_accuracy": 0.6744043976068497, "num_tokens": 1332118565.0, "step": 7954 }, { "entropy": 1.6598787903785706, "epoch": 0.8739117299717118, "grad_norm": 0.7136387228965759, "learning_rate": 1.3253372671401099e-05, "loss": 1.3051, "mean_token_accuracy": 0.6756090174118677, "num_tokens": 1332233805.0, "step": 7955 }, { "entropy": 1.7392461498578389, "epoch": 0.8740215868830847, "grad_norm": 0.6263501644134521, "learning_rate": 1.3251822504479207e-05, "loss": 1.3962, "mean_token_accuracy": 0.6539953947067261, "num_tokens": 1332404169.0, "step": 7956 }, { "entropy": 1.7147627174854279, "epoch": 0.8741314437944577, "grad_norm": 0.7884214520454407, "learning_rate": 1.3250272266288149e-05, "loss": 1.577, "mean_token_accuracy": 0.6330409099658331, "num_tokens": 1332570136.0, "step": 7957 }, { "entropy": 1.7000277042388916, "epoch": 0.8742413007058306, "grad_norm": 0.9027677178382874, "learning_rate": 1.324872195687699e-05, "loss": 1.3856, "mean_token_accuracy": 0.6580591748158137, "num_tokens": 1332706178.0, "step": 7958 }, { "entropy": 1.7484715183575947, "epoch": 0.8743511576172036, "grad_norm": 0.8269484639167786, "learning_rate": 1.3247171576294791e-05, "loss": 1.4001, "mean_token_accuracy": 0.6555562863747278, "num_tokens": 1332886733.0, "step": 7959 }, { "entropy": 1.698500504096349, "epoch": 0.8744610145285765, "grad_norm": 0.7188910245895386, "learning_rate": 1.3245621124590625e-05, "loss": 1.4849, "mean_token_accuracy": 0.6570742378632227, "num_tokens": 1333091324.0, "step": 7960 }, { "entropy": 1.691120167573293, "epoch": 0.8745708714399495, "grad_norm": 0.6104452610015869, "learning_rate": 1.3244070601813564e-05, "loss": 1.4192, "mean_token_accuracy": 0.6446485817432404, "num_tokens": 1333253616.0, "step": 7961 }, { "entropy": 1.6805418034394581, "epoch": 0.8746807283513224, "grad_norm": 1.033692717552185, "learning_rate": 1.3242520008012676e-05, "loss": 1.5897, "mean_token_accuracy": 0.6430460214614868, "num_tokens": 1333449657.0, "step": 7962 }, { "entropy": 1.650507350762685, "epoch": 0.8747905852626954, "grad_norm": 0.6361247301101685, "learning_rate": 1.3240969343237042e-05, "loss": 1.2264, "mean_token_accuracy": 0.6698981175820032, "num_tokens": 1333679417.0, "step": 7963 }, { "entropy": 1.6824390292167664, "epoch": 0.8749004421740683, "grad_norm": 0.7842257618904114, "learning_rate": 1.3239418607535737e-05, "loss": 1.2012, "mean_token_accuracy": 0.6809289256731669, "num_tokens": 1333808944.0, "step": 7964 }, { "entropy": 1.6636536419391632, "epoch": 0.8750102990854413, "grad_norm": 0.6498377919197083, "learning_rate": 1.3237867800957843e-05, "loss": 1.2811, "mean_token_accuracy": 0.6815899511178335, "num_tokens": 1334018959.0, "step": 7965 }, { "entropy": 1.7166621784369152, "epoch": 0.8751201559968141, "grad_norm": 0.678538978099823, "learning_rate": 1.3236316923552443e-05, "loss": 1.4701, "mean_token_accuracy": 0.6503576040267944, "num_tokens": 1334162080.0, "step": 7966 }, { "entropy": 1.6881644229094188, "epoch": 0.8752300129081871, "grad_norm": 0.8077633380889893, "learning_rate": 1.3234765975368622e-05, "loss": 1.515, "mean_token_accuracy": 0.6426454931497574, "num_tokens": 1334344960.0, "step": 7967 }, { "entropy": 1.7116271654764812, "epoch": 0.87533986981956, "grad_norm": 0.6946110725402832, "learning_rate": 1.3233214956455461e-05, "loss": 1.3035, "mean_token_accuracy": 0.6665644148985544, "num_tokens": 1334564958.0, "step": 7968 }, { "entropy": 1.699356774489085, "epoch": 0.8754497267309329, "grad_norm": 0.7314174175262451, "learning_rate": 1.3231663866862052e-05, "loss": 1.4564, "mean_token_accuracy": 0.6603659292062124, "num_tokens": 1334742349.0, "step": 7969 }, { "entropy": 1.7104867994785309, "epoch": 0.8755595836423059, "grad_norm": 0.6361663341522217, "learning_rate": 1.323011270663749e-05, "loss": 1.546, "mean_token_accuracy": 0.6363288114468256, "num_tokens": 1334924134.0, "step": 7970 }, { "entropy": 1.703242838382721, "epoch": 0.8756694405536788, "grad_norm": 0.6855658888816833, "learning_rate": 1.3228561475830866e-05, "loss": 1.256, "mean_token_accuracy": 0.6767214983701706, "num_tokens": 1335041877.0, "step": 7971 }, { "entropy": 1.7060719430446625, "epoch": 0.8757792974650518, "grad_norm": 0.6562889218330383, "learning_rate": 1.3227010174491272e-05, "loss": 1.5151, "mean_token_accuracy": 0.6551367143789927, "num_tokens": 1335195526.0, "step": 7972 }, { "entropy": 1.6942510604858398, "epoch": 0.8758891543764247, "grad_norm": 0.680708646774292, "learning_rate": 1.3225458802667814e-05, "loss": 1.3885, "mean_token_accuracy": 0.6581521232922872, "num_tokens": 1335342529.0, "step": 7973 }, { "entropy": 1.6612178683280945, "epoch": 0.8759990112877977, "grad_norm": 0.641156017780304, "learning_rate": 1.3223907360409585e-05, "loss": 1.4741, "mean_token_accuracy": 0.6471780588229498, "num_tokens": 1335590113.0, "step": 7974 }, { "entropy": 1.7183683415253956, "epoch": 0.8761088681991706, "grad_norm": 0.72384113073349, "learning_rate": 1.3222355847765691e-05, "loss": 1.2578, "mean_token_accuracy": 0.6796244730552038, "num_tokens": 1335743664.0, "step": 7975 }, { "entropy": 1.6416078209877014, "epoch": 0.8762187251105436, "grad_norm": 0.6154013276100159, "learning_rate": 1.3220804264785233e-05, "loss": 1.4331, "mean_token_accuracy": 0.6597521007061005, "num_tokens": 1335926381.0, "step": 7976 }, { "entropy": 1.7263556321461995, "epoch": 0.8763285820219164, "grad_norm": 0.6627217531204224, "learning_rate": 1.3219252611517326e-05, "loss": 1.3985, "mean_token_accuracy": 0.6437351852655411, "num_tokens": 1336093870.0, "step": 7977 }, { "entropy": 1.7286728123823802, "epoch": 0.8764384389332894, "grad_norm": 0.7835181355476379, "learning_rate": 1.3217700888011072e-05, "loss": 1.3356, "mean_token_accuracy": 0.6737496505180994, "num_tokens": 1336228546.0, "step": 7978 }, { "entropy": 1.6983933846155803, "epoch": 0.8765482958446623, "grad_norm": 0.8094499707221985, "learning_rate": 1.3216149094315585e-05, "loss": 1.3539, "mean_token_accuracy": 0.6737185815970103, "num_tokens": 1336384984.0, "step": 7979 }, { "entropy": 1.6982168157895405, "epoch": 0.8766581527560353, "grad_norm": 0.7010941505432129, "learning_rate": 1.3214597230479973e-05, "loss": 1.4708, "mean_token_accuracy": 0.6522993097702662, "num_tokens": 1336542390.0, "step": 7980 }, { "entropy": 1.6157186627388, "epoch": 0.8767680096674082, "grad_norm": 0.6266676187515259, "learning_rate": 1.321304529655336e-05, "loss": 1.2673, "mean_token_accuracy": 0.6801381210486094, "num_tokens": 1336752692.0, "step": 7981 }, { "entropy": 1.671266903479894, "epoch": 0.8768778665787811, "grad_norm": 0.7309592962265015, "learning_rate": 1.3211493292584861e-05, "loss": 1.3975, "mean_token_accuracy": 0.6700055301189423, "num_tokens": 1336903638.0, "step": 7982 }, { "entropy": 1.677875409523646, "epoch": 0.8769877234901541, "grad_norm": 0.7530861496925354, "learning_rate": 1.3209941218623594e-05, "loss": 1.4529, "mean_token_accuracy": 0.6435393045345942, "num_tokens": 1337079040.0, "step": 7983 }, { "entropy": 1.695750226577123, "epoch": 0.877097580401527, "grad_norm": 9.608741760253906, "learning_rate": 1.3208389074718686e-05, "loss": 1.3589, "mean_token_accuracy": 0.6756584992011389, "num_tokens": 1337261289.0, "step": 7984 }, { "entropy": 1.7192882398764293, "epoch": 0.8772074373129, "grad_norm": 0.6504213213920593, "learning_rate": 1.3206836860919258e-05, "loss": 1.4516, "mean_token_accuracy": 0.6443294088045756, "num_tokens": 1337445749.0, "step": 7985 }, { "entropy": 1.6834936439990997, "epoch": 0.8773172942242728, "grad_norm": 0.6355934739112854, "learning_rate": 1.3205284577274438e-05, "loss": 1.365, "mean_token_accuracy": 0.6607343057791392, "num_tokens": 1337641026.0, "step": 7986 }, { "entropy": 1.6866546074549358, "epoch": 0.8774271511356458, "grad_norm": 0.5846768021583557, "learning_rate": 1.3203732223833352e-05, "loss": 1.4117, "mean_token_accuracy": 0.6505334079265594, "num_tokens": 1337845504.0, "step": 7987 }, { "entropy": 1.6987277269363403, "epoch": 0.8775370080470187, "grad_norm": 0.6802120804786682, "learning_rate": 1.3202179800645137e-05, "loss": 1.3893, "mean_token_accuracy": 0.6545873979727427, "num_tokens": 1338027155.0, "step": 7988 }, { "entropy": 1.8231212794780731, "epoch": 0.8776468649583917, "grad_norm": 0.7370648980140686, "learning_rate": 1.3200627307758922e-05, "loss": 1.5404, "mean_token_accuracy": 0.6349399189154307, "num_tokens": 1338177966.0, "step": 7989 }, { "entropy": 1.7170870800813038, "epoch": 0.8777567218697646, "grad_norm": 0.6052808165550232, "learning_rate": 1.3199074745223849e-05, "loss": 1.3193, "mean_token_accuracy": 0.6577565719683965, "num_tokens": 1338338646.0, "step": 7990 }, { "entropy": 1.6922166148821514, "epoch": 0.8778665787811376, "grad_norm": 0.5780960917472839, "learning_rate": 1.3197522113089045e-05, "loss": 1.3872, "mean_token_accuracy": 0.6465161889791489, "num_tokens": 1338520517.0, "step": 7991 }, { "entropy": 1.7829012076059978, "epoch": 0.8779764356925105, "grad_norm": 0.6546869277954102, "learning_rate": 1.3195969411403657e-05, "loss": 1.5343, "mean_token_accuracy": 0.6399548500776291, "num_tokens": 1338703551.0, "step": 7992 }, { "entropy": 1.7429544230302174, "epoch": 0.8780862926038835, "grad_norm": 0.6422476172447205, "learning_rate": 1.319441664021683e-05, "loss": 1.421, "mean_token_accuracy": 0.6439397037029266, "num_tokens": 1338876260.0, "step": 7993 }, { "entropy": 1.7216396530469258, "epoch": 0.8781961495152564, "grad_norm": 0.7357894778251648, "learning_rate": 1.3192863799577702e-05, "loss": 1.5756, "mean_token_accuracy": 0.6361222863197327, "num_tokens": 1339067416.0, "step": 7994 }, { "entropy": 1.712314208348592, "epoch": 0.8783060064266293, "grad_norm": 0.6917596459388733, "learning_rate": 1.3191310889535425e-05, "loss": 1.4794, "mean_token_accuracy": 0.6355889936288198, "num_tokens": 1339228303.0, "step": 7995 }, { "entropy": 1.7267231245835621, "epoch": 0.8784158633380023, "grad_norm": 0.7089694738388062, "learning_rate": 1.3189757910139144e-05, "loss": 1.4635, "mean_token_accuracy": 0.6534610986709595, "num_tokens": 1339364859.0, "step": 7996 }, { "entropy": 1.7212253610293071, "epoch": 0.8785257202493751, "grad_norm": 0.782818078994751, "learning_rate": 1.3188204861438014e-05, "loss": 1.3417, "mean_token_accuracy": 0.6555100381374359, "num_tokens": 1339513208.0, "step": 7997 }, { "entropy": 1.7673552135626476, "epoch": 0.8786355771607481, "grad_norm": 0.6749922633171082, "learning_rate": 1.3186651743481185e-05, "loss": 1.366, "mean_token_accuracy": 0.6576452553272247, "num_tokens": 1339679215.0, "step": 7998 }, { "entropy": 1.7615666389465332, "epoch": 0.878745434072121, "grad_norm": 0.7322090268135071, "learning_rate": 1.3185098556317814e-05, "loss": 1.5279, "mean_token_accuracy": 0.644319606324037, "num_tokens": 1339867935.0, "step": 7999 }, { "entropy": 1.7344570557276409, "epoch": 0.878855290983494, "grad_norm": 0.6841682195663452, "learning_rate": 1.3183545299997059e-05, "loss": 1.465, "mean_token_accuracy": 0.6409310499827067, "num_tokens": 1340072053.0, "step": 8000 }, { "entropy": 1.6895175874233246, "epoch": 0.8789651478948669, "grad_norm": 0.622738242149353, "learning_rate": 1.3181991974568078e-05, "loss": 1.371, "mean_token_accuracy": 0.6549272984266281, "num_tokens": 1340258625.0, "step": 8001 }, { "entropy": 1.7024616301059723, "epoch": 0.8790750048062399, "grad_norm": 0.6197877526283264, "learning_rate": 1.3180438580080035e-05, "loss": 1.3511, "mean_token_accuracy": 0.6649558196465174, "num_tokens": 1340444952.0, "step": 8002 }, { "entropy": 1.6654754877090454, "epoch": 0.8791848617176128, "grad_norm": 1.9219154119491577, "learning_rate": 1.3178885116582092e-05, "loss": 1.2438, "mean_token_accuracy": 0.6720800052086512, "num_tokens": 1340670642.0, "step": 8003 }, { "entropy": 1.7420108218987782, "epoch": 0.8792947186289858, "grad_norm": 0.6763916015625, "learning_rate": 1.3177331584123415e-05, "loss": 1.3347, "mean_token_accuracy": 0.6596690714359283, "num_tokens": 1340838356.0, "step": 8004 }, { "entropy": 1.7406767507394154, "epoch": 0.8794045755403587, "grad_norm": 0.6398903727531433, "learning_rate": 1.3175777982753181e-05, "loss": 1.4552, "mean_token_accuracy": 0.6480192442735037, "num_tokens": 1340984070.0, "step": 8005 }, { "entropy": 1.6723910371462505, "epoch": 0.8795144324517317, "grad_norm": 0.6995456218719482, "learning_rate": 1.317422431252055e-05, "loss": 1.2322, "mean_token_accuracy": 0.684855322043101, "num_tokens": 1341178363.0, "step": 8006 }, { "entropy": 1.656785657008489, "epoch": 0.8796242893631046, "grad_norm": 0.6289657354354858, "learning_rate": 1.3172670573474702e-05, "loss": 1.3367, "mean_token_accuracy": 0.6745680520931879, "num_tokens": 1341321368.0, "step": 8007 }, { "entropy": 1.6708788673082988, "epoch": 0.8797341462744775, "grad_norm": 0.6338051557540894, "learning_rate": 1.3171116765664806e-05, "loss": 1.5384, "mean_token_accuracy": 0.6278170545895895, "num_tokens": 1341560129.0, "step": 8008 }, { "entropy": 1.6958947479724884, "epoch": 0.8798440031858504, "grad_norm": 0.8811983466148376, "learning_rate": 1.3169562889140044e-05, "loss": 1.3398, "mean_token_accuracy": 0.6620439837376276, "num_tokens": 1341696606.0, "step": 8009 }, { "entropy": 1.7053867677847545, "epoch": 0.8799538600972233, "grad_norm": 1.529905915260315, "learning_rate": 1.3168008943949595e-05, "loss": 1.3059, "mean_token_accuracy": 0.6612508594989777, "num_tokens": 1341886733.0, "step": 8010 }, { "entropy": 1.6900759637355804, "epoch": 0.8800637170085963, "grad_norm": 0.6369670629501343, "learning_rate": 1.3166454930142638e-05, "loss": 1.4512, "mean_token_accuracy": 0.6452312916517258, "num_tokens": 1342084201.0, "step": 8011 }, { "entropy": 1.6731635133425395, "epoch": 0.8801735739199692, "grad_norm": 0.772038996219635, "learning_rate": 1.316490084776836e-05, "loss": 1.2925, "mean_token_accuracy": 0.6838527768850327, "num_tokens": 1342258577.0, "step": 8012 }, { "entropy": 1.7299912571907043, "epoch": 0.8802834308313422, "grad_norm": 0.6461442708969116, "learning_rate": 1.3163346696875948e-05, "loss": 1.4096, "mean_token_accuracy": 0.6662061562140783, "num_tokens": 1342489971.0, "step": 8013 }, { "entropy": 1.6888580024242401, "epoch": 0.8803932877427151, "grad_norm": 0.6851293444633484, "learning_rate": 1.3161792477514581e-05, "loss": 1.5714, "mean_token_accuracy": 0.648188849290212, "num_tokens": 1342667835.0, "step": 8014 }, { "entropy": 1.7008213798205059, "epoch": 0.8805031446540881, "grad_norm": 0.5513099431991577, "learning_rate": 1.3160238189733461e-05, "loss": 1.4225, "mean_token_accuracy": 0.6414727667967478, "num_tokens": 1342901022.0, "step": 8015 }, { "entropy": 1.6794636050860088, "epoch": 0.880613001565461, "grad_norm": 0.6192083358764648, "learning_rate": 1.3158683833581776e-05, "loss": 1.3251, "mean_token_accuracy": 0.664734274148941, "num_tokens": 1343050141.0, "step": 8016 }, { "entropy": 1.6627166867256165, "epoch": 0.880722858476834, "grad_norm": 0.7238250970840454, "learning_rate": 1.315712940910872e-05, "loss": 1.3705, "mean_token_accuracy": 0.6606160700321198, "num_tokens": 1343257130.0, "step": 8017 }, { "entropy": 1.6578301588694255, "epoch": 0.8808327153882068, "grad_norm": 0.644854724407196, "learning_rate": 1.3155574916363489e-05, "loss": 1.4382, "mean_token_accuracy": 0.6555332243442535, "num_tokens": 1343435487.0, "step": 8018 }, { "entropy": 1.662944386402766, "epoch": 0.8809425722995798, "grad_norm": 0.7258864641189575, "learning_rate": 1.3154020355395285e-05, "loss": 1.352, "mean_token_accuracy": 0.6717381527026495, "num_tokens": 1343561950.0, "step": 8019 }, { "entropy": 1.701594094435374, "epoch": 0.8810524292109527, "grad_norm": 0.7201105952262878, "learning_rate": 1.3152465726253307e-05, "loss": 1.3787, "mean_token_accuracy": 0.6671847403049469, "num_tokens": 1343705589.0, "step": 8020 }, { "entropy": 1.704057554403941, "epoch": 0.8811622861223257, "grad_norm": 0.6907951831817627, "learning_rate": 1.3150911028986756e-05, "loss": 1.2673, "mean_token_accuracy": 0.6720318496227264, "num_tokens": 1343840657.0, "step": 8021 }, { "entropy": 1.6859318315982819, "epoch": 0.8812721430336986, "grad_norm": 0.7433066368103027, "learning_rate": 1.3149356263644844e-05, "loss": 1.386, "mean_token_accuracy": 0.6604318271080653, "num_tokens": 1343965621.0, "step": 8022 }, { "entropy": 1.730026255051295, "epoch": 0.8813819999450715, "grad_norm": 0.9503698348999023, "learning_rate": 1.3147801430276771e-05, "loss": 1.4897, "mean_token_accuracy": 0.6525371472040812, "num_tokens": 1344127435.0, "step": 8023 }, { "entropy": 1.6777367393175762, "epoch": 0.8814918568564445, "grad_norm": 0.6132991909980774, "learning_rate": 1.3146246528931757e-05, "loss": 1.4217, "mean_token_accuracy": 0.6546296526988348, "num_tokens": 1344296015.0, "step": 8024 }, { "entropy": 1.6886170705159504, "epoch": 0.8816017137678174, "grad_norm": 0.6643725633621216, "learning_rate": 1.3144691559659e-05, "loss": 1.4503, "mean_token_accuracy": 0.6464731891949972, "num_tokens": 1344487057.0, "step": 8025 }, { "entropy": 1.694665402173996, "epoch": 0.8817115706791904, "grad_norm": 0.8616427183151245, "learning_rate": 1.3143136522507727e-05, "loss": 1.3552, "mean_token_accuracy": 0.6591685314973196, "num_tokens": 1344651179.0, "step": 8026 }, { "entropy": 1.705785721540451, "epoch": 0.8818214275905633, "grad_norm": 0.6112991571426392, "learning_rate": 1.3141581417527142e-05, "loss": 1.4192, "mean_token_accuracy": 0.6488917469978333, "num_tokens": 1344852823.0, "step": 8027 }, { "entropy": 1.6365499794483185, "epoch": 0.8819312845019363, "grad_norm": 0.6156476736068726, "learning_rate": 1.3140026244766474e-05, "loss": 1.407, "mean_token_accuracy": 0.6569693684577942, "num_tokens": 1345024193.0, "step": 8028 }, { "entropy": 1.7127653062343597, "epoch": 0.8820411414133091, "grad_norm": 0.6506058573722839, "learning_rate": 1.3138471004274942e-05, "loss": 1.3516, "mean_token_accuracy": 0.6794911821683248, "num_tokens": 1345221187.0, "step": 8029 }, { "entropy": 1.709245463212331, "epoch": 0.8821509983246821, "grad_norm": 0.7298224568367004, "learning_rate": 1.3136915696101768e-05, "loss": 1.5062, "mean_token_accuracy": 0.6443512787421545, "num_tokens": 1345408682.0, "step": 8030 }, { "entropy": 1.753902445236842, "epoch": 0.882260855236055, "grad_norm": 0.6582000255584717, "learning_rate": 1.3135360320296172e-05, "loss": 1.2243, "mean_token_accuracy": 0.6782107502222061, "num_tokens": 1345548857.0, "step": 8031 }, { "entropy": 1.689902792374293, "epoch": 0.882370712147428, "grad_norm": 0.6262725591659546, "learning_rate": 1.3133804876907381e-05, "loss": 1.4091, "mean_token_accuracy": 0.669882799188296, "num_tokens": 1345724951.0, "step": 8032 }, { "entropy": 1.7034416993459065, "epoch": 0.8824805690588009, "grad_norm": 0.6859605312347412, "learning_rate": 1.313224936598463e-05, "loss": 1.4119, "mean_token_accuracy": 0.6529233107964197, "num_tokens": 1345847774.0, "step": 8033 }, { "entropy": 1.6871616741021473, "epoch": 0.8825904259701739, "grad_norm": 0.7056890726089478, "learning_rate": 1.3130693787577149e-05, "loss": 1.4314, "mean_token_accuracy": 0.6448503235975901, "num_tokens": 1346052041.0, "step": 8034 }, { "entropy": 1.6991152167320251, "epoch": 0.8827002828815468, "grad_norm": 0.7343994975090027, "learning_rate": 1.312913814173417e-05, "loss": 1.465, "mean_token_accuracy": 0.636786495645841, "num_tokens": 1346230977.0, "step": 8035 }, { "entropy": 1.740164339542389, "epoch": 0.8828101397929197, "grad_norm": 0.6870355606079102, "learning_rate": 1.3127582428504924e-05, "loss": 1.3112, "mean_token_accuracy": 0.6617578764756521, "num_tokens": 1346377875.0, "step": 8036 }, { "entropy": 1.6937303443749745, "epoch": 0.8829199967042927, "grad_norm": 0.6445454359054565, "learning_rate": 1.3126026647938656e-05, "loss": 1.4059, "mean_token_accuracy": 0.6537498732407888, "num_tokens": 1346547610.0, "step": 8037 }, { "entropy": 1.7122799456119537, "epoch": 0.8830298536156655, "grad_norm": 0.6448954343795776, "learning_rate": 1.3124470800084602e-05, "loss": 1.4434, "mean_token_accuracy": 0.6424995213747025, "num_tokens": 1346735778.0, "step": 8038 }, { "entropy": 1.7327117224534352, "epoch": 0.8831397105270385, "grad_norm": 0.6899316310882568, "learning_rate": 1.3122914884992001e-05, "loss": 1.3337, "mean_token_accuracy": 0.6604535380999247, "num_tokens": 1346857791.0, "step": 8039 }, { "entropy": 1.6908073723316193, "epoch": 0.8832495674384114, "grad_norm": 0.6724409461021423, "learning_rate": 1.3121358902710106e-05, "loss": 1.3755, "mean_token_accuracy": 0.6658162524302801, "num_tokens": 1347035627.0, "step": 8040 }, { "entropy": 1.6952118575572968, "epoch": 0.8833594243497844, "grad_norm": 1.6206833124160767, "learning_rate": 1.3119802853288157e-05, "loss": 1.176, "mean_token_accuracy": 0.6623029261827469, "num_tokens": 1347231666.0, "step": 8041 }, { "entropy": 1.6843286454677582, "epoch": 0.8834692812611573, "grad_norm": 0.6928609609603882, "learning_rate": 1.31182467367754e-05, "loss": 1.3903, "mean_token_accuracy": 0.6516619374354681, "num_tokens": 1347406884.0, "step": 8042 }, { "entropy": 1.6303186118602753, "epoch": 0.8835791381725303, "grad_norm": 0.6562328934669495, "learning_rate": 1.311669055322109e-05, "loss": 1.3083, "mean_token_accuracy": 0.6617482751607895, "num_tokens": 1347573182.0, "step": 8043 }, { "entropy": 1.7110784550507863, "epoch": 0.8836889950839032, "grad_norm": 0.6911236643791199, "learning_rate": 1.3115134302674476e-05, "loss": 1.3642, "mean_token_accuracy": 0.6580260396003723, "num_tokens": 1347729826.0, "step": 8044 }, { "entropy": 1.7337822914123535, "epoch": 0.8837988519952762, "grad_norm": 0.840054988861084, "learning_rate": 1.3113577985184815e-05, "loss": 1.3266, "mean_token_accuracy": 0.6658614228169123, "num_tokens": 1347900052.0, "step": 8045 }, { "entropy": 1.7084493140379589, "epoch": 0.8839087089066491, "grad_norm": 0.749947726726532, "learning_rate": 1.3112021600801367e-05, "loss": 1.474, "mean_token_accuracy": 0.6458921432495117, "num_tokens": 1348105613.0, "step": 8046 }, { "entropy": 1.620888243118922, "epoch": 0.8840185658180221, "grad_norm": 0.6167489290237427, "learning_rate": 1.3110465149573384e-05, "loss": 1.398, "mean_token_accuracy": 0.6520049870014191, "num_tokens": 1348336198.0, "step": 8047 }, { "entropy": 1.7000917494297028, "epoch": 0.884128422729395, "grad_norm": 0.6212296485900879, "learning_rate": 1.3108908631550128e-05, "loss": 1.491, "mean_token_accuracy": 0.6476211200157801, "num_tokens": 1348499904.0, "step": 8048 }, { "entropy": 1.6660768489042919, "epoch": 0.884238279640768, "grad_norm": 0.8931158781051636, "learning_rate": 1.3107352046780865e-05, "loss": 1.0585, "mean_token_accuracy": 0.6905455191930135, "num_tokens": 1348668149.0, "step": 8049 }, { "entropy": 1.7104520897070568, "epoch": 0.8843481365521408, "grad_norm": 0.7820631861686707, "learning_rate": 1.3105795395314863e-05, "loss": 1.3984, "mean_token_accuracy": 0.6557039568821589, "num_tokens": 1348803873.0, "step": 8050 }, { "entropy": 1.7667625844478607, "epoch": 0.8844579934635137, "grad_norm": 0.6352094411849976, "learning_rate": 1.3104238677201382e-05, "loss": 1.3466, "mean_token_accuracy": 0.6517351716756821, "num_tokens": 1348947762.0, "step": 8051 }, { "entropy": 1.6937001744906108, "epoch": 0.8845678503748867, "grad_norm": 0.700639009475708, "learning_rate": 1.3102681892489698e-05, "loss": 1.4141, "mean_token_accuracy": 0.6588475555181503, "num_tokens": 1349135921.0, "step": 8052 }, { "entropy": 1.7913443545500438, "epoch": 0.8846777072862596, "grad_norm": 0.719153881072998, "learning_rate": 1.3101125041229077e-05, "loss": 1.3666, "mean_token_accuracy": 0.654579242070516, "num_tokens": 1349279170.0, "step": 8053 }, { "entropy": 1.7504811882972717, "epoch": 0.8847875641976326, "grad_norm": 0.7166516184806824, "learning_rate": 1.3099568123468796e-05, "loss": 1.6457, "mean_token_accuracy": 0.6370598326126734, "num_tokens": 1349463834.0, "step": 8054 }, { "entropy": 1.7077268064022064, "epoch": 0.8848974211090055, "grad_norm": 0.6966634392738342, "learning_rate": 1.309801113925813e-05, "loss": 1.3574, "mean_token_accuracy": 0.6640727718671163, "num_tokens": 1349625453.0, "step": 8055 }, { "entropy": 1.718926727771759, "epoch": 0.8850072780203785, "grad_norm": 0.6505473852157593, "learning_rate": 1.3096454088646355e-05, "loss": 1.2966, "mean_token_accuracy": 0.6705836703379949, "num_tokens": 1349759088.0, "step": 8056 }, { "entropy": 1.659464915593465, "epoch": 0.8851171349317514, "grad_norm": 0.5969595909118652, "learning_rate": 1.3094896971682756e-05, "loss": 1.3489, "mean_token_accuracy": 0.6619338194529215, "num_tokens": 1349958910.0, "step": 8057 }, { "entropy": 1.7118416329224904, "epoch": 0.8852269918431244, "grad_norm": 0.7195928692817688, "learning_rate": 1.3093339788416611e-05, "loss": 1.3853, "mean_token_accuracy": 0.6494811822970709, "num_tokens": 1350096253.0, "step": 8058 }, { "entropy": 1.7227883338928223, "epoch": 0.8853368487544973, "grad_norm": 0.7288689613342285, "learning_rate": 1.3091782538897204e-05, "loss": 1.5027, "mean_token_accuracy": 0.6574197262525558, "num_tokens": 1350239891.0, "step": 8059 }, { "entropy": 1.6495947043100994, "epoch": 0.8854467056658702, "grad_norm": 0.6621578335762024, "learning_rate": 1.3090225223173822e-05, "loss": 1.463, "mean_token_accuracy": 0.6442839155594507, "num_tokens": 1350454281.0, "step": 8060 }, { "entropy": 1.7141701777776082, "epoch": 0.8855565625772431, "grad_norm": 0.7444977164268494, "learning_rate": 1.3088667841295755e-05, "loss": 1.3837, "mean_token_accuracy": 0.6679119020700455, "num_tokens": 1350628019.0, "step": 8061 }, { "entropy": 1.7080074946085613, "epoch": 0.8856664194886161, "grad_norm": 0.6999690532684326, "learning_rate": 1.308711039331229e-05, "loss": 1.3976, "mean_token_accuracy": 0.6628955900669098, "num_tokens": 1350818224.0, "step": 8062 }, { "entropy": 1.7163402338822682, "epoch": 0.885776276399989, "grad_norm": 0.7445178627967834, "learning_rate": 1.3085552879272723e-05, "loss": 1.5502, "mean_token_accuracy": 0.6500421464443207, "num_tokens": 1350961323.0, "step": 8063 }, { "entropy": 1.6648909350236256, "epoch": 0.8858861333113619, "grad_norm": 0.6955971717834473, "learning_rate": 1.3083995299226349e-05, "loss": 1.222, "mean_token_accuracy": 0.6757313311100006, "num_tokens": 1351090851.0, "step": 8064 }, { "entropy": 1.7097779909769695, "epoch": 0.8859959902227349, "grad_norm": 0.7316083908081055, "learning_rate": 1.308243765322246e-05, "loss": 1.4142, "mean_token_accuracy": 0.6512270569801331, "num_tokens": 1351338598.0, "step": 8065 }, { "entropy": 1.7168799837430317, "epoch": 0.8861058471341078, "grad_norm": 0.8173125386238098, "learning_rate": 1.3080879941310357e-05, "loss": 1.431, "mean_token_accuracy": 0.6548661192258199, "num_tokens": 1351471043.0, "step": 8066 }, { "entropy": 1.7480222483476002, "epoch": 0.8862157040454808, "grad_norm": 3.3873794078826904, "learning_rate": 1.3079322163539343e-05, "loss": 1.1677, "mean_token_accuracy": 0.677946095665296, "num_tokens": 1351661275.0, "step": 8067 }, { "entropy": 1.75862056016922, "epoch": 0.8863255609568537, "grad_norm": 0.7313094735145569, "learning_rate": 1.307776431995872e-05, "loss": 1.5896, "mean_token_accuracy": 0.6547629435857137, "num_tokens": 1351818032.0, "step": 8068 }, { "entropy": 1.736459106206894, "epoch": 0.8864354178682267, "grad_norm": 0.6745466589927673, "learning_rate": 1.3076206410617792e-05, "loss": 1.3345, "mean_token_accuracy": 0.6557362129290899, "num_tokens": 1351960721.0, "step": 8069 }, { "entropy": 1.6622845729192097, "epoch": 0.8865452747795995, "grad_norm": 0.6415925621986389, "learning_rate": 1.3074648435565866e-05, "loss": 1.4075, "mean_token_accuracy": 0.6481207013130188, "num_tokens": 1352103987.0, "step": 8070 }, { "entropy": 1.760219156742096, "epoch": 0.8866551316909725, "grad_norm": 0.6308138370513916, "learning_rate": 1.3073090394852253e-05, "loss": 1.3635, "mean_token_accuracy": 0.6509590496619543, "num_tokens": 1352236836.0, "step": 8071 }, { "entropy": 1.665820409854253, "epoch": 0.8867649886023454, "grad_norm": 0.7212702035903931, "learning_rate": 1.307153228852626e-05, "loss": 1.3673, "mean_token_accuracy": 0.6518625418345133, "num_tokens": 1352428604.0, "step": 8072 }, { "entropy": 1.7342944145202637, "epoch": 0.8868748455137184, "grad_norm": 0.7079007029533386, "learning_rate": 1.3069974116637207e-05, "loss": 1.2633, "mean_token_accuracy": 0.6728782703479131, "num_tokens": 1352562805.0, "step": 8073 }, { "entropy": 1.728011429309845, "epoch": 0.8869847024250913, "grad_norm": 0.6237488389015198, "learning_rate": 1.3068415879234409e-05, "loss": 1.389, "mean_token_accuracy": 0.6614675124486288, "num_tokens": 1352798623.0, "step": 8074 }, { "entropy": 1.6492702066898346, "epoch": 0.8870945593364643, "grad_norm": 0.744462251663208, "learning_rate": 1.3066857576367173e-05, "loss": 1.4776, "mean_token_accuracy": 0.6488187313079834, "num_tokens": 1352998143.0, "step": 8075 }, { "entropy": 1.7421591877937317, "epoch": 0.8872044162478372, "grad_norm": 0.6387677788734436, "learning_rate": 1.306529920808483e-05, "loss": 1.5294, "mean_token_accuracy": 0.6411878218253454, "num_tokens": 1353150077.0, "step": 8076 }, { "entropy": 1.7408295770486195, "epoch": 0.8873142731592101, "grad_norm": 0.7120410799980164, "learning_rate": 1.3063740774436699e-05, "loss": 1.3272, "mean_token_accuracy": 0.6632676968971888, "num_tokens": 1353315861.0, "step": 8077 }, { "entropy": 1.6731611490249634, "epoch": 0.8874241300705831, "grad_norm": 0.706117570400238, "learning_rate": 1.3062182275472097e-05, "loss": 1.316, "mean_token_accuracy": 0.6600356449683508, "num_tokens": 1353424738.0, "step": 8078 }, { "entropy": 1.6679266492525737, "epoch": 0.887533986981956, "grad_norm": 0.7776505351066589, "learning_rate": 1.3060623711240362e-05, "loss": 1.3721, "mean_token_accuracy": 0.669564555088679, "num_tokens": 1353592283.0, "step": 8079 }, { "entropy": 1.6982711652914684, "epoch": 0.887643843893329, "grad_norm": 0.7552779316902161, "learning_rate": 1.3059065081790814e-05, "loss": 1.4374, "mean_token_accuracy": 0.6687319328387579, "num_tokens": 1353771761.0, "step": 8080 }, { "entropy": 1.638779918352763, "epoch": 0.8877537008047018, "grad_norm": 0.5680516362190247, "learning_rate": 1.305750638717278e-05, "loss": 1.3348, "mean_token_accuracy": 0.6778454432884852, "num_tokens": 1353939394.0, "step": 8081 }, { "entropy": 1.6758286853631337, "epoch": 0.8878635577160748, "grad_norm": 0.6949761509895325, "learning_rate": 1.3055947627435597e-05, "loss": 1.3651, "mean_token_accuracy": 0.6731551140546799, "num_tokens": 1354080326.0, "step": 8082 }, { "entropy": 1.688368280728658, "epoch": 0.8879734146274477, "grad_norm": 0.6399317979812622, "learning_rate": 1.30543888026286e-05, "loss": 1.357, "mean_token_accuracy": 0.6511105100313822, "num_tokens": 1354217646.0, "step": 8083 }, { "entropy": 1.691909670829773, "epoch": 0.8880832715388207, "grad_norm": 0.6731947660446167, "learning_rate": 1.3052829912801121e-05, "loss": 1.5381, "mean_token_accuracy": 0.6489096581935883, "num_tokens": 1354406834.0, "step": 8084 }, { "entropy": 1.6758221685886383, "epoch": 0.8881931284501936, "grad_norm": 0.6500033140182495, "learning_rate": 1.3051270958002503e-05, "loss": 1.3698, "mean_token_accuracy": 0.6664744565884272, "num_tokens": 1354556751.0, "step": 8085 }, { "entropy": 1.6546663045883179, "epoch": 0.8883029853615666, "grad_norm": 0.6402091979980469, "learning_rate": 1.3049711938282084e-05, "loss": 1.35, "mean_token_accuracy": 0.6814102729161581, "num_tokens": 1354717236.0, "step": 8086 }, { "entropy": 1.6440533498922985, "epoch": 0.8884128422729395, "grad_norm": 0.7296947836875916, "learning_rate": 1.3048152853689202e-05, "loss": 1.3896, "mean_token_accuracy": 0.6694160799185435, "num_tokens": 1354912353.0, "step": 8087 }, { "entropy": 1.7287603914737701, "epoch": 0.8885226991843125, "grad_norm": 0.8662500977516174, "learning_rate": 1.3046593704273205e-05, "loss": 1.2404, "mean_token_accuracy": 0.686885267496109, "num_tokens": 1355040756.0, "step": 8088 }, { "entropy": 1.741082489490509, "epoch": 0.8886325560956854, "grad_norm": 0.7242109775543213, "learning_rate": 1.3045034490083442e-05, "loss": 1.4916, "mean_token_accuracy": 0.6341162770986557, "num_tokens": 1355257903.0, "step": 8089 }, { "entropy": 1.7150556246439617, "epoch": 0.8887424130070583, "grad_norm": 0.67889803647995, "learning_rate": 1.3043475211169257e-05, "loss": 1.4755, "mean_token_accuracy": 0.6414446582396826, "num_tokens": 1355428402.0, "step": 8090 }, { "entropy": 1.6610381305217743, "epoch": 0.8888522699184312, "grad_norm": 0.6266405582427979, "learning_rate": 1.3041915867580004e-05, "loss": 1.3219, "mean_token_accuracy": 0.6615271915992101, "num_tokens": 1355587479.0, "step": 8091 }, { "entropy": 1.7193395793437958, "epoch": 0.8889621268298041, "grad_norm": 0.6784216165542603, "learning_rate": 1.3040356459365035e-05, "loss": 1.3691, "mean_token_accuracy": 0.6642310122648875, "num_tokens": 1355710721.0, "step": 8092 }, { "entropy": 1.671286831299464, "epoch": 0.8890719837411771, "grad_norm": 0.6728245615959167, "learning_rate": 1.30387969865737e-05, "loss": 1.3102, "mean_token_accuracy": 0.666391134262085, "num_tokens": 1355856077.0, "step": 8093 }, { "entropy": 1.6652600566546123, "epoch": 0.88918184065255, "grad_norm": 0.8366493582725525, "learning_rate": 1.3037237449255363e-05, "loss": 1.1922, "mean_token_accuracy": 0.6832515945037206, "num_tokens": 1356007976.0, "step": 8094 }, { "entropy": 1.7026143074035645, "epoch": 0.889291697563923, "grad_norm": 0.7122969031333923, "learning_rate": 1.3035677847459376e-05, "loss": 1.3912, "mean_token_accuracy": 0.6449934641520182, "num_tokens": 1356202823.0, "step": 8095 }, { "entropy": 1.712339609861374, "epoch": 0.8894015544752959, "grad_norm": 0.6235902309417725, "learning_rate": 1.3034118181235103e-05, "loss": 1.3057, "mean_token_accuracy": 0.6651495695114136, "num_tokens": 1356362943.0, "step": 8096 }, { "entropy": 1.6995584865411122, "epoch": 0.8895114113866689, "grad_norm": 0.6809194684028625, "learning_rate": 1.3032558450631905e-05, "loss": 1.4245, "mean_token_accuracy": 0.6478584508101145, "num_tokens": 1356528585.0, "step": 8097 }, { "entropy": 1.6661972502867382, "epoch": 0.8896212682980418, "grad_norm": 0.7444778680801392, "learning_rate": 1.3030998655699152e-05, "loss": 1.4135, "mean_token_accuracy": 0.6626001720627149, "num_tokens": 1356696607.0, "step": 8098 }, { "entropy": 1.666017969449361, "epoch": 0.8897311252094148, "grad_norm": 0.6592122912406921, "learning_rate": 1.3029438796486205e-05, "loss": 1.4551, "mean_token_accuracy": 0.6414574682712555, "num_tokens": 1356874909.0, "step": 8099 }, { "entropy": 1.7365792989730835, "epoch": 0.8898409821207877, "grad_norm": 0.7328019738197327, "learning_rate": 1.3027878873042431e-05, "loss": 1.4733, "mean_token_accuracy": 0.6426637371381124, "num_tokens": 1357089568.0, "step": 8100 }, { "entropy": 1.7083971202373505, "epoch": 0.8899508390321607, "grad_norm": 0.6374284625053406, "learning_rate": 1.3026318885417208e-05, "loss": 1.2617, "mean_token_accuracy": 0.6822344164053599, "num_tokens": 1357196677.0, "step": 8101 }, { "entropy": 1.7009641925493877, "epoch": 0.8900606959435335, "grad_norm": 0.6936139464378357, "learning_rate": 1.3024758833659906e-05, "loss": 1.4522, "mean_token_accuracy": 0.654137596487999, "num_tokens": 1357358345.0, "step": 8102 }, { "entropy": 1.6674350996812184, "epoch": 0.8901705528549065, "grad_norm": 0.6887747049331665, "learning_rate": 1.3023198717819896e-05, "loss": 1.2265, "mean_token_accuracy": 0.675381526350975, "num_tokens": 1357473542.0, "step": 8103 }, { "entropy": 1.7373623251914978, "epoch": 0.8902804097662794, "grad_norm": 0.8778982162475586, "learning_rate": 1.3021638537946562e-05, "loss": 1.4434, "mean_token_accuracy": 0.6643926252921423, "num_tokens": 1357646759.0, "step": 8104 }, { "entropy": 1.7153649926185608, "epoch": 0.8903902666776523, "grad_norm": 0.6064153909683228, "learning_rate": 1.3020078294089276e-05, "loss": 1.379, "mean_token_accuracy": 0.6452821493148804, "num_tokens": 1357837941.0, "step": 8105 }, { "entropy": 1.7316668430964153, "epoch": 0.8905001235890253, "grad_norm": 0.7943192720413208, "learning_rate": 1.3018517986297423e-05, "loss": 1.3593, "mean_token_accuracy": 0.6662193487087885, "num_tokens": 1358041225.0, "step": 8106 }, { "entropy": 1.7015369435151417, "epoch": 0.8906099805003982, "grad_norm": 0.6925376057624817, "learning_rate": 1.3016957614620385e-05, "loss": 1.4367, "mean_token_accuracy": 0.6497325003147125, "num_tokens": 1358238077.0, "step": 8107 }, { "entropy": 1.7462484240531921, "epoch": 0.8907198374117712, "grad_norm": 0.6663040518760681, "learning_rate": 1.301539717910755e-05, "loss": 1.4275, "mean_token_accuracy": 0.663551022609075, "num_tokens": 1358404375.0, "step": 8108 }, { "entropy": 1.7458167274792988, "epoch": 0.8908296943231441, "grad_norm": 0.7102859020233154, "learning_rate": 1.3013836679808299e-05, "loss": 1.4161, "mean_token_accuracy": 0.6619683603445689, "num_tokens": 1358590647.0, "step": 8109 }, { "entropy": 1.6818938553333282, "epoch": 0.8909395512345171, "grad_norm": 0.6789277195930481, "learning_rate": 1.3012276116772027e-05, "loss": 1.2391, "mean_token_accuracy": 0.6869035313526789, "num_tokens": 1358735171.0, "step": 8110 }, { "entropy": 1.7530201375484467, "epoch": 0.89104940814589, "grad_norm": 0.849226713180542, "learning_rate": 1.301071549004812e-05, "loss": 1.4003, "mean_token_accuracy": 0.6840375413497289, "num_tokens": 1358913878.0, "step": 8111 }, { "entropy": 1.766763836145401, "epoch": 0.891159265057263, "grad_norm": 0.6677963733673096, "learning_rate": 1.3009154799685977e-05, "loss": 1.4964, "mean_token_accuracy": 0.6554523011048635, "num_tokens": 1359064147.0, "step": 8112 }, { "entropy": 1.7489437560240428, "epoch": 0.8912691219686358, "grad_norm": 0.7236900329589844, "learning_rate": 1.3007594045734986e-05, "loss": 1.439, "mean_token_accuracy": 0.6437687029441198, "num_tokens": 1359239467.0, "step": 8113 }, { "entropy": 1.7261870900789897, "epoch": 0.8913789788800088, "grad_norm": 0.6887776851654053, "learning_rate": 1.3006033228244551e-05, "loss": 1.4056, "mean_token_accuracy": 0.6591099550326666, "num_tokens": 1359378786.0, "step": 8114 }, { "entropy": 1.6371654470761616, "epoch": 0.8914888357913817, "grad_norm": 0.8251991868019104, "learning_rate": 1.300447234726407e-05, "loss": 1.3108, "mean_token_accuracy": 0.68764096001784, "num_tokens": 1359534184.0, "step": 8115 }, { "entropy": 1.6558915674686432, "epoch": 0.8915986927027547, "grad_norm": 0.7391266822814941, "learning_rate": 1.3002911402842941e-05, "loss": 1.3898, "mean_token_accuracy": 0.6650058180093765, "num_tokens": 1359685889.0, "step": 8116 }, { "entropy": 1.7229611972967784, "epoch": 0.8917085496141276, "grad_norm": 0.8075942993164062, "learning_rate": 1.3001350395030568e-05, "loss": 1.3773, "mean_token_accuracy": 0.6721263627211252, "num_tokens": 1359898074.0, "step": 8117 }, { "entropy": 1.6780574719111125, "epoch": 0.8918184065255005, "grad_norm": 0.6064956784248352, "learning_rate": 1.2999789323876355e-05, "loss": 1.3074, "mean_token_accuracy": 0.6772323052088419, "num_tokens": 1360031925.0, "step": 8118 }, { "entropy": 1.7141193449497223, "epoch": 0.8919282634368735, "grad_norm": 0.7515255808830261, "learning_rate": 1.2998228189429713e-05, "loss": 1.411, "mean_token_accuracy": 0.6621668885151545, "num_tokens": 1360194172.0, "step": 8119 }, { "entropy": 1.7168916761875153, "epoch": 0.8920381203482464, "grad_norm": 0.6676003932952881, "learning_rate": 1.299666699174005e-05, "loss": 1.4152, "mean_token_accuracy": 0.6431319614251455, "num_tokens": 1360408050.0, "step": 8120 }, { "entropy": 1.7273538609345753, "epoch": 0.8921479772596194, "grad_norm": 0.7413110136985779, "learning_rate": 1.2995105730856774e-05, "loss": 1.3913, "mean_token_accuracy": 0.6430693517128626, "num_tokens": 1360572401.0, "step": 8121 }, { "entropy": 1.6615086793899536, "epoch": 0.8922578341709922, "grad_norm": 0.7433538436889648, "learning_rate": 1.2993544406829303e-05, "loss": 1.473, "mean_token_accuracy": 0.6483894636233648, "num_tokens": 1360765523.0, "step": 8122 }, { "entropy": 1.708322823047638, "epoch": 0.8923676910823652, "grad_norm": 0.6540583372116089, "learning_rate": 1.299198301970705e-05, "loss": 1.3412, "mean_token_accuracy": 0.6675945669412613, "num_tokens": 1360935268.0, "step": 8123 }, { "entropy": 1.6495771209398906, "epoch": 0.8924775479937381, "grad_norm": 0.6546026468276978, "learning_rate": 1.2990421569539429e-05, "loss": 1.4705, "mean_token_accuracy": 0.6375894794861475, "num_tokens": 1361161749.0, "step": 8124 }, { "entropy": 1.6485347251097362, "epoch": 0.8925874049051111, "grad_norm": 0.6614772081375122, "learning_rate": 1.2988860056375864e-05, "loss": 1.4092, "mean_token_accuracy": 0.660191277662913, "num_tokens": 1361334599.0, "step": 8125 }, { "entropy": 1.7235966821511586, "epoch": 0.892697261816484, "grad_norm": 0.6788547039031982, "learning_rate": 1.2987298480265775e-05, "loss": 1.5024, "mean_token_accuracy": 0.6518261929353079, "num_tokens": 1361511927.0, "step": 8126 }, { "entropy": 1.7020771602789562, "epoch": 0.892807118727857, "grad_norm": 0.7183151841163635, "learning_rate": 1.2985736841258585e-05, "loss": 1.4419, "mean_token_accuracy": 0.6394909024238586, "num_tokens": 1361690858.0, "step": 8127 }, { "entropy": 1.68422997991244, "epoch": 0.8929169756392299, "grad_norm": 0.6331420540809631, "learning_rate": 1.2984175139403719e-05, "loss": 1.3114, "mean_token_accuracy": 0.6713191568851471, "num_tokens": 1361842250.0, "step": 8128 }, { "entropy": 1.728828767935435, "epoch": 0.8930268325506029, "grad_norm": 0.7083820700645447, "learning_rate": 1.29826133747506e-05, "loss": 1.6225, "mean_token_accuracy": 0.6279341727495193, "num_tokens": 1362061841.0, "step": 8129 }, { "entropy": 1.718500663836797, "epoch": 0.8931366894619758, "grad_norm": 0.6595919132232666, "learning_rate": 1.2981051547348667e-05, "loss": 1.5593, "mean_token_accuracy": 0.6272151817878088, "num_tokens": 1362318836.0, "step": 8130 }, { "entropy": 1.7139343520005543, "epoch": 0.8932465463733487, "grad_norm": 0.7923753261566162, "learning_rate": 1.297948965724734e-05, "loss": 1.534, "mean_token_accuracy": 0.6324710150559744, "num_tokens": 1362525771.0, "step": 8131 }, { "entropy": 1.722178190946579, "epoch": 0.8933564032847217, "grad_norm": 0.6958953142166138, "learning_rate": 1.2977927704496063e-05, "loss": 1.4105, "mean_token_accuracy": 0.6546609650055567, "num_tokens": 1362697980.0, "step": 8132 }, { "entropy": 1.7218119998772938, "epoch": 0.8934662601960945, "grad_norm": 0.720320463180542, "learning_rate": 1.2976365689144262e-05, "loss": 1.3803, "mean_token_accuracy": 0.6600370605786642, "num_tokens": 1362841672.0, "step": 8133 }, { "entropy": 1.6862787107626598, "epoch": 0.8935761171074675, "grad_norm": 0.6662365794181824, "learning_rate": 1.2974803611241375e-05, "loss": 1.5426, "mean_token_accuracy": 0.6379824380079905, "num_tokens": 1363031283.0, "step": 8134 }, { "entropy": 1.7841876844565074, "epoch": 0.8936859740188404, "grad_norm": 0.6615442633628845, "learning_rate": 1.2973241470836844e-05, "loss": 1.3344, "mean_token_accuracy": 0.6595032413800558, "num_tokens": 1363152561.0, "step": 8135 }, { "entropy": 1.7041483422120411, "epoch": 0.8937958309302134, "grad_norm": 0.7564711570739746, "learning_rate": 1.2971679267980115e-05, "loss": 1.3051, "mean_token_accuracy": 0.6767140378554662, "num_tokens": 1363281399.0, "step": 8136 }, { "entropy": 1.6851592659950256, "epoch": 0.8939056878415863, "grad_norm": 0.6683154702186584, "learning_rate": 1.2970117002720619e-05, "loss": 1.4669, "mean_token_accuracy": 0.6413289060195287, "num_tokens": 1363470077.0, "step": 8137 }, { "entropy": 1.7018636564413707, "epoch": 0.8940155447529593, "grad_norm": 0.6740677356719971, "learning_rate": 1.2968554675107811e-05, "loss": 1.4038, "mean_token_accuracy": 0.6529013961553574, "num_tokens": 1363608254.0, "step": 8138 }, { "entropy": 1.6942344903945923, "epoch": 0.8941254016643322, "grad_norm": 0.6417088508605957, "learning_rate": 1.2966992285191136e-05, "loss": 1.3952, "mean_token_accuracy": 0.6635211457808813, "num_tokens": 1363799347.0, "step": 8139 }, { "entropy": 1.7088079651196797, "epoch": 0.8942352585757052, "grad_norm": 0.6937258243560791, "learning_rate": 1.296542983302004e-05, "loss": 1.4418, "mean_token_accuracy": 0.6464910159508387, "num_tokens": 1363948205.0, "step": 8140 }, { "entropy": 1.7441412607828777, "epoch": 0.8943451154870781, "grad_norm": 0.6750649809837341, "learning_rate": 1.2963867318643977e-05, "loss": 1.3852, "mean_token_accuracy": 0.6618605355421702, "num_tokens": 1364070807.0, "step": 8141 }, { "entropy": 1.7100872000058491, "epoch": 0.8944549723984511, "grad_norm": 0.6978124976158142, "learning_rate": 1.2962304742112398e-05, "loss": 1.3172, "mean_token_accuracy": 0.6667628437280655, "num_tokens": 1364214371.0, "step": 8142 }, { "entropy": 1.7487525542577107, "epoch": 0.894564829309824, "grad_norm": 0.746597945690155, "learning_rate": 1.2960742103474752e-05, "loss": 1.3387, "mean_token_accuracy": 0.6738084952036539, "num_tokens": 1364319641.0, "step": 8143 }, { "entropy": 1.7394179999828339, "epoch": 0.894674686221197, "grad_norm": 0.676131546497345, "learning_rate": 1.2959179402780508e-05, "loss": 1.4124, "mean_token_accuracy": 0.6497220148642858, "num_tokens": 1364460921.0, "step": 8144 }, { "entropy": 1.7177764972050984, "epoch": 0.8947845431325698, "grad_norm": 0.9208407402038574, "learning_rate": 1.2957616640079118e-05, "loss": 1.515, "mean_token_accuracy": 0.6533168703317642, "num_tokens": 1364628805.0, "step": 8145 }, { "entropy": 1.7679544786612194, "epoch": 0.8948944000439427, "grad_norm": 0.6939182281494141, "learning_rate": 1.2956053815420044e-05, "loss": 1.3703, "mean_token_accuracy": 0.6553449034690857, "num_tokens": 1364794603.0, "step": 8146 }, { "entropy": 1.687473217646281, "epoch": 0.8950042569553157, "grad_norm": 0.7051041722297668, "learning_rate": 1.2954490928852746e-05, "loss": 1.4056, "mean_token_accuracy": 0.6483729779720306, "num_tokens": 1365029285.0, "step": 8147 }, { "entropy": 1.7038420736789703, "epoch": 0.8951141138666886, "grad_norm": 0.8011882901191711, "learning_rate": 1.2952927980426696e-05, "loss": 1.3818, "mean_token_accuracy": 0.6608738501866659, "num_tokens": 1365145593.0, "step": 8148 }, { "entropy": 1.6690380970637004, "epoch": 0.8952239707780616, "grad_norm": 0.6747339963912964, "learning_rate": 1.2951364970191347e-05, "loss": 1.4883, "mean_token_accuracy": 0.6363983005285263, "num_tokens": 1365353607.0, "step": 8149 }, { "entropy": 1.7097953756650288, "epoch": 0.8953338276894345, "grad_norm": 0.7147229909896851, "learning_rate": 1.2949801898196182e-05, "loss": 1.4018, "mean_token_accuracy": 0.6539677331844965, "num_tokens": 1365499388.0, "step": 8150 }, { "entropy": 1.7601737678050995, "epoch": 0.8954436846008075, "grad_norm": 0.6297820806503296, "learning_rate": 1.2948238764490664e-05, "loss": 1.5235, "mean_token_accuracy": 0.6257789582014084, "num_tokens": 1365714612.0, "step": 8151 }, { "entropy": 1.6906922459602356, "epoch": 0.8955535415121804, "grad_norm": 0.837054431438446, "learning_rate": 1.2946675569124266e-05, "loss": 1.3727, "mean_token_accuracy": 0.6722191870212555, "num_tokens": 1365845342.0, "step": 8152 }, { "entropy": 1.7662516037623088, "epoch": 0.8956633984235534, "grad_norm": 0.6832341551780701, "learning_rate": 1.2945112312146464e-05, "loss": 1.4975, "mean_token_accuracy": 0.6488762050867081, "num_tokens": 1365980955.0, "step": 8153 }, { "entropy": 1.6681772371133168, "epoch": 0.8957732553349262, "grad_norm": 0.5820019245147705, "learning_rate": 1.2943548993606736e-05, "loss": 1.3885, "mean_token_accuracy": 0.6611274381478628, "num_tokens": 1366185654.0, "step": 8154 }, { "entropy": 1.773647129535675, "epoch": 0.8958831122462992, "grad_norm": 0.6890908479690552, "learning_rate": 1.2941985613554558e-05, "loss": 1.4015, "mean_token_accuracy": 0.6438634345928828, "num_tokens": 1366332030.0, "step": 8155 }, { "entropy": 1.7235575517018635, "epoch": 0.8959929691576721, "grad_norm": 0.7061694264411926, "learning_rate": 1.294042217203941e-05, "loss": 1.4915, "mean_token_accuracy": 0.6430180122454962, "num_tokens": 1366525544.0, "step": 8156 }, { "entropy": 1.670232355594635, "epoch": 0.8961028260690451, "grad_norm": 0.5871284604072571, "learning_rate": 1.293885866911077e-05, "loss": 1.3359, "mean_token_accuracy": 0.6684425920248032, "num_tokens": 1366765534.0, "step": 8157 }, { "entropy": 1.7217474579811096, "epoch": 0.896212682980418, "grad_norm": 0.6503912806510925, "learning_rate": 1.293729510481813e-05, "loss": 1.2649, "mean_token_accuracy": 0.6772298713525137, "num_tokens": 1366890588.0, "step": 8158 }, { "entropy": 1.686649481455485, "epoch": 0.8963225398917909, "grad_norm": 0.8139302730560303, "learning_rate": 1.293573147921097e-05, "loss": 1.3724, "mean_token_accuracy": 0.6652147769927979, "num_tokens": 1367071167.0, "step": 8159 }, { "entropy": 1.7288777728875477, "epoch": 0.8964323968031639, "grad_norm": 0.6936602592468262, "learning_rate": 1.2934167792338788e-05, "loss": 1.5265, "mean_token_accuracy": 0.6469365855058035, "num_tokens": 1367276133.0, "step": 8160 }, { "entropy": 1.689979334672292, "epoch": 0.8965422537145368, "grad_norm": 0.7575037479400635, "learning_rate": 1.2932604044251063e-05, "loss": 1.5474, "mean_token_accuracy": 0.6251169790824255, "num_tokens": 1367539260.0, "step": 8161 }, { "entropy": 1.6986994842688243, "epoch": 0.8966521106259098, "grad_norm": 0.6705021858215332, "learning_rate": 1.293104023499729e-05, "loss": 1.4051, "mean_token_accuracy": 0.6545246789852778, "num_tokens": 1367699048.0, "step": 8162 }, { "entropy": 1.685945173104604, "epoch": 0.8967619675372827, "grad_norm": 0.7093241214752197, "learning_rate": 1.2929476364626965e-05, "loss": 1.362, "mean_token_accuracy": 0.6544206788142523, "num_tokens": 1367871465.0, "step": 8163 }, { "entropy": 1.7687697807947795, "epoch": 0.8968718244486557, "grad_norm": 0.6797177195549011, "learning_rate": 1.2927912433189583e-05, "loss": 1.523, "mean_token_accuracy": 0.6424828718105952, "num_tokens": 1368046329.0, "step": 8164 }, { "entropy": 1.7541530827681224, "epoch": 0.8969816813600285, "grad_norm": 0.6629700660705566, "learning_rate": 1.2926348440734637e-05, "loss": 1.369, "mean_token_accuracy": 0.6573603600263596, "num_tokens": 1368231299.0, "step": 8165 }, { "entropy": 1.7120748162269592, "epoch": 0.8970915382714015, "grad_norm": 0.6065205335617065, "learning_rate": 1.2924784387311638e-05, "loss": 1.5206, "mean_token_accuracy": 0.6434388856093088, "num_tokens": 1368425603.0, "step": 8166 }, { "entropy": 1.6880040764808655, "epoch": 0.8972013951827744, "grad_norm": 0.6545516848564148, "learning_rate": 1.2923220272970074e-05, "loss": 1.5294, "mean_token_accuracy": 0.6539272020260493, "num_tokens": 1368582508.0, "step": 8167 }, { "entropy": 1.7425115207831066, "epoch": 0.8973112520941474, "grad_norm": 0.8306770920753479, "learning_rate": 1.2921656097759459e-05, "loss": 1.4239, "mean_token_accuracy": 0.6608427713314692, "num_tokens": 1368707001.0, "step": 8168 }, { "entropy": 1.6598342955112457, "epoch": 0.8974211090055203, "grad_norm": 0.5864236950874329, "learning_rate": 1.2920091861729291e-05, "loss": 1.3178, "mean_token_accuracy": 0.6688061058521271, "num_tokens": 1368866831.0, "step": 8169 }, { "entropy": 1.7307129402955372, "epoch": 0.8975309659168933, "grad_norm": 0.7228249907493591, "learning_rate": 1.2918527564929084e-05, "loss": 1.4023, "mean_token_accuracy": 0.6562465329964956, "num_tokens": 1369012025.0, "step": 8170 }, { "entropy": 1.6857167681058247, "epoch": 0.8976408228282662, "grad_norm": 0.7698543071746826, "learning_rate": 1.2916963207408339e-05, "loss": 1.3763, "mean_token_accuracy": 0.6605077634255091, "num_tokens": 1369220961.0, "step": 8171 }, { "entropy": 1.6822535892327626, "epoch": 0.8977506797396391, "grad_norm": 0.6768351197242737, "learning_rate": 1.291539878921658e-05, "loss": 1.1921, "mean_token_accuracy": 0.6810629268487295, "num_tokens": 1369395347.0, "step": 8172 }, { "entropy": 1.6895845532417297, "epoch": 0.8978605366510121, "grad_norm": 0.591072678565979, "learning_rate": 1.2913834310403309e-05, "loss": 1.3772, "mean_token_accuracy": 0.6566944519678751, "num_tokens": 1369551940.0, "step": 8173 }, { "entropy": 1.6599336862564087, "epoch": 0.897970393562385, "grad_norm": 0.5289608836174011, "learning_rate": 1.2912269771018042e-05, "loss": 1.4677, "mean_token_accuracy": 0.6440421094497045, "num_tokens": 1369765218.0, "step": 8174 }, { "entropy": 1.7176273266474407, "epoch": 0.898080250473758, "grad_norm": 0.9386351108551025, "learning_rate": 1.29107051711103e-05, "loss": 1.4368, "mean_token_accuracy": 0.6671102990706762, "num_tokens": 1369906301.0, "step": 8175 }, { "entropy": 1.674050102631251, "epoch": 0.8981901073851308, "grad_norm": 0.659980833530426, "learning_rate": 1.2909140510729602e-05, "loss": 1.4214, "mean_token_accuracy": 0.6602593511343002, "num_tokens": 1370085506.0, "step": 8176 }, { "entropy": 1.6822616755962372, "epoch": 0.8982999642965038, "grad_norm": 0.7302301526069641, "learning_rate": 1.2907575789925464e-05, "loss": 1.2561, "mean_token_accuracy": 0.6732948124408722, "num_tokens": 1370218678.0, "step": 8177 }, { "entropy": 1.6883311371008556, "epoch": 0.8984098212078767, "grad_norm": 0.679049551486969, "learning_rate": 1.2906011008747416e-05, "loss": 1.533, "mean_token_accuracy": 0.6458878070116043, "num_tokens": 1370432098.0, "step": 8178 }, { "entropy": 1.7072526613871257, "epoch": 0.8985196781192497, "grad_norm": 0.6430801153182983, "learning_rate": 1.2904446167244975e-05, "loss": 1.246, "mean_token_accuracy": 0.6781556854645411, "num_tokens": 1370555624.0, "step": 8179 }, { "entropy": 1.7674343287944794, "epoch": 0.8986295350306226, "grad_norm": 0.6533283591270447, "learning_rate": 1.2902881265467672e-05, "loss": 1.44, "mean_token_accuracy": 0.6452242086331049, "num_tokens": 1370727544.0, "step": 8180 }, { "entropy": 1.737773100535075, "epoch": 0.8987393919419956, "grad_norm": 0.6868041157722473, "learning_rate": 1.2901316303465034e-05, "loss": 1.3688, "mean_token_accuracy": 0.6619212180376053, "num_tokens": 1370861919.0, "step": 8181 }, { "entropy": 1.6641343732674916, "epoch": 0.8988492488533685, "grad_norm": 1.105683445930481, "learning_rate": 1.2899751281286595e-05, "loss": 1.4682, "mean_token_accuracy": 0.6598964184522629, "num_tokens": 1371063236.0, "step": 8182 }, { "entropy": 1.756370743115743, "epoch": 0.8989591057647415, "grad_norm": 0.854651927947998, "learning_rate": 1.289818619898188e-05, "loss": 1.5702, "mean_token_accuracy": 0.6351617326339086, "num_tokens": 1371203907.0, "step": 8183 }, { "entropy": 1.7830684284369152, "epoch": 0.8990689626761144, "grad_norm": 0.6945520639419556, "learning_rate": 1.2896621056600429e-05, "loss": 1.4435, "mean_token_accuracy": 0.644214446345965, "num_tokens": 1371354131.0, "step": 8184 }, { "entropy": 1.672329713900884, "epoch": 0.8991788195874872, "grad_norm": 0.6429153084754944, "learning_rate": 1.2895055854191776e-05, "loss": 1.2707, "mean_token_accuracy": 0.6696565896272659, "num_tokens": 1371497398.0, "step": 8185 }, { "entropy": 1.6738866865634918, "epoch": 0.8992886764988602, "grad_norm": 0.676530122756958, "learning_rate": 1.2893490591805458e-05, "loss": 1.3334, "mean_token_accuracy": 0.6663380612929662, "num_tokens": 1371708217.0, "step": 8186 }, { "entropy": 1.704796036084493, "epoch": 0.8993985334102331, "grad_norm": 0.7546509504318237, "learning_rate": 1.2891925269491018e-05, "loss": 1.2849, "mean_token_accuracy": 0.6688467363516489, "num_tokens": 1371894274.0, "step": 8187 }, { "entropy": 1.7164516548315685, "epoch": 0.8995083903216061, "grad_norm": 0.6668331027030945, "learning_rate": 1.2890359887297996e-05, "loss": 1.5352, "mean_token_accuracy": 0.6357905914386114, "num_tokens": 1372102280.0, "step": 8188 }, { "entropy": 1.7005331714948018, "epoch": 0.899618247232979, "grad_norm": 0.6698519587516785, "learning_rate": 1.2888794445275931e-05, "loss": 1.2379, "mean_token_accuracy": 0.6748972535133362, "num_tokens": 1372240684.0, "step": 8189 }, { "entropy": 1.7654169201850891, "epoch": 0.899728104144352, "grad_norm": 0.8310354948043823, "learning_rate": 1.2887228943474376e-05, "loss": 1.4978, "mean_token_accuracy": 0.6487491776545843, "num_tokens": 1372380490.0, "step": 8190 }, { "entropy": 1.7207094430923462, "epoch": 0.8998379610557249, "grad_norm": 0.6810332536697388, "learning_rate": 1.2885663381942877e-05, "loss": 1.506, "mean_token_accuracy": 0.6450515190760294, "num_tokens": 1372567918.0, "step": 8191 }, { "entropy": 1.6392219463984172, "epoch": 0.8999478179670979, "grad_norm": 0.6061927676200867, "learning_rate": 1.288409776073098e-05, "loss": 1.4508, "mean_token_accuracy": 0.6553038557370504, "num_tokens": 1372767417.0, "step": 8192 }, { "entropy": 1.6560143133004506, "epoch": 0.9000576748784708, "grad_norm": 0.7693495750427246, "learning_rate": 1.2882532079888234e-05, "loss": 1.2824, "mean_token_accuracy": 0.6741581360499064, "num_tokens": 1372908182.0, "step": 8193 }, { "entropy": 1.7727676530679066, "epoch": 0.9001675317898438, "grad_norm": 0.7517789602279663, "learning_rate": 1.2880966339464203e-05, "loss": 1.3932, "mean_token_accuracy": 0.6643371681372324, "num_tokens": 1373084118.0, "step": 8194 }, { "entropy": 1.6854754785696666, "epoch": 0.9002773887012167, "grad_norm": 0.7738332152366638, "learning_rate": 1.2879400539508431e-05, "loss": 1.3219, "mean_token_accuracy": 0.6660276005665461, "num_tokens": 1373209452.0, "step": 8195 }, { "entropy": 1.6973048547903697, "epoch": 0.9003872456125896, "grad_norm": 0.6942301392555237, "learning_rate": 1.287783468007048e-05, "loss": 1.294, "mean_token_accuracy": 0.6708059559265772, "num_tokens": 1373356174.0, "step": 8196 }, { "entropy": 1.6481784184773762, "epoch": 0.9004971025239625, "grad_norm": 0.6286212801933289, "learning_rate": 1.2876268761199905e-05, "loss": 1.3796, "mean_token_accuracy": 0.6566238403320312, "num_tokens": 1373512786.0, "step": 8197 }, { "entropy": 1.657067855199178, "epoch": 0.9006069594353355, "grad_norm": 0.6861938238143921, "learning_rate": 1.2874702782946273e-05, "loss": 1.3017, "mean_token_accuracy": 0.6696779529253641, "num_tokens": 1373631016.0, "step": 8198 }, { "entropy": 1.6833013196786244, "epoch": 0.9007168163467084, "grad_norm": 0.6281445026397705, "learning_rate": 1.2873136745359138e-05, "loss": 1.4777, "mean_token_accuracy": 0.6407529513041178, "num_tokens": 1373814798.0, "step": 8199 }, { "entropy": 1.7312106589476268, "epoch": 0.9008266732580813, "grad_norm": 0.6315418481826782, "learning_rate": 1.2871570648488074e-05, "loss": 1.409, "mean_token_accuracy": 0.6527961442867914, "num_tokens": 1373963715.0, "step": 8200 }, { "entropy": 1.636249562104543, "epoch": 0.9009365301694543, "grad_norm": 0.614751935005188, "learning_rate": 1.2870004492382639e-05, "loss": 1.464, "mean_token_accuracy": 0.6453719884157181, "num_tokens": 1374167035.0, "step": 8201 }, { "entropy": 1.7170550723870595, "epoch": 0.9010463870808272, "grad_norm": 0.7123568058013916, "learning_rate": 1.2868438277092408e-05, "loss": 1.355, "mean_token_accuracy": 0.6694580415884653, "num_tokens": 1374299248.0, "step": 8202 }, { "entropy": 1.8127153019110362, "epoch": 0.9011562439922002, "grad_norm": 0.6031003594398499, "learning_rate": 1.2866872002666943e-05, "loss": 1.4627, "mean_token_accuracy": 0.6451161354780197, "num_tokens": 1374494985.0, "step": 8203 }, { "entropy": 1.7356827060381572, "epoch": 0.9012661009035731, "grad_norm": 0.8380510807037354, "learning_rate": 1.2865305669155822e-05, "loss": 1.4517, "mean_token_accuracy": 0.6483780195315679, "num_tokens": 1374700001.0, "step": 8204 }, { "entropy": 1.7188110053539276, "epoch": 0.9013759578149461, "grad_norm": 0.6875895261764526, "learning_rate": 1.2863739276608618e-05, "loss": 1.3714, "mean_token_accuracy": 0.6474323074022929, "num_tokens": 1374861646.0, "step": 8205 }, { "entropy": 1.6347064077854156, "epoch": 0.901485814726319, "grad_norm": 0.6865116953849792, "learning_rate": 1.2862172825074906e-05, "loss": 1.5043, "mean_token_accuracy": 0.6302382349967957, "num_tokens": 1375086655.0, "step": 8206 }, { "entropy": 1.664256900548935, "epoch": 0.9015956716376919, "grad_norm": 0.6529141664505005, "learning_rate": 1.2860606314604262e-05, "loss": 1.4591, "mean_token_accuracy": 0.6522035201390585, "num_tokens": 1375273760.0, "step": 8207 }, { "entropy": 1.783077895641327, "epoch": 0.9017055285490648, "grad_norm": 0.7015360593795776, "learning_rate": 1.2859039745246267e-05, "loss": 1.3206, "mean_token_accuracy": 0.6642241428295771, "num_tokens": 1375397296.0, "step": 8208 }, { "entropy": 1.677474598089854, "epoch": 0.9018153854604378, "grad_norm": 0.6711921095848083, "learning_rate": 1.28574731170505e-05, "loss": 1.5791, "mean_token_accuracy": 0.6554554551839828, "num_tokens": 1375570200.0, "step": 8209 }, { "entropy": 1.7532115777333577, "epoch": 0.9019252423718107, "grad_norm": 0.7770174741744995, "learning_rate": 1.2855906430066552e-05, "loss": 1.4795, "mean_token_accuracy": 0.6456655959288279, "num_tokens": 1375729714.0, "step": 8210 }, { "entropy": 1.7668093641599019, "epoch": 0.9020350992831837, "grad_norm": 0.6325072050094604, "learning_rate": 1.2854339684343993e-05, "loss": 1.5966, "mean_token_accuracy": 0.6250222822030386, "num_tokens": 1375963527.0, "step": 8211 }, { "entropy": 1.6405591766039531, "epoch": 0.9021449561945566, "grad_norm": 0.6002046465873718, "learning_rate": 1.2852772879932425e-05, "loss": 1.3697, "mean_token_accuracy": 0.6608653912941614, "num_tokens": 1376118353.0, "step": 8212 }, { "entropy": 1.7306797703107197, "epoch": 0.9022548131059295, "grad_norm": 0.8113459944725037, "learning_rate": 1.285120601688143e-05, "loss": 1.4127, "mean_token_accuracy": 0.6562537600596746, "num_tokens": 1376286365.0, "step": 8213 }, { "entropy": 1.6529719134171803, "epoch": 0.9023646700173025, "grad_norm": 0.6751854419708252, "learning_rate": 1.2849639095240596e-05, "loss": 1.3637, "mean_token_accuracy": 0.6638490408658981, "num_tokens": 1376444760.0, "step": 8214 }, { "entropy": 1.6793536742528279, "epoch": 0.9024745269286754, "grad_norm": 0.6569497585296631, "learning_rate": 1.284807211505952e-05, "loss": 1.4346, "mean_token_accuracy": 0.643970454732577, "num_tokens": 1376679188.0, "step": 8215 }, { "entropy": 1.65372101465861, "epoch": 0.9025843838400484, "grad_norm": 0.7767542004585266, "learning_rate": 1.2846505076387794e-05, "loss": 1.3946, "mean_token_accuracy": 0.6664103666941324, "num_tokens": 1376842688.0, "step": 8216 }, { "entropy": 1.682920217514038, "epoch": 0.9026942407514212, "grad_norm": 0.6500419974327087, "learning_rate": 1.284493797927501e-05, "loss": 1.488, "mean_token_accuracy": 0.641838863492012, "num_tokens": 1377060077.0, "step": 8217 }, { "entropy": 1.6799386739730835, "epoch": 0.9028040976627942, "grad_norm": 0.7521069049835205, "learning_rate": 1.2843370823770776e-05, "loss": 1.4117, "mean_token_accuracy": 0.6611147572596868, "num_tokens": 1377218796.0, "step": 8218 }, { "entropy": 1.6649762590726216, "epoch": 0.9029139545741671, "grad_norm": 0.8042888641357422, "learning_rate": 1.2841803609924684e-05, "loss": 1.4148, "mean_token_accuracy": 0.6492630541324615, "num_tokens": 1377444988.0, "step": 8219 }, { "entropy": 1.6619918942451477, "epoch": 0.9030238114855401, "grad_norm": 0.6244032382965088, "learning_rate": 1.284023633778634e-05, "loss": 1.2809, "mean_token_accuracy": 0.6721317023038864, "num_tokens": 1377588973.0, "step": 8220 }, { "entropy": 1.7610027194023132, "epoch": 0.903133668396913, "grad_norm": 0.7928817272186279, "learning_rate": 1.2838669007405343e-05, "loss": 1.4796, "mean_token_accuracy": 0.6414338201284409, "num_tokens": 1377760628.0, "step": 8221 }, { "entropy": 1.666873186826706, "epoch": 0.903243525308286, "grad_norm": 0.7573736310005188, "learning_rate": 1.2837101618831298e-05, "loss": 1.2919, "mean_token_accuracy": 0.6718289206425349, "num_tokens": 1377896394.0, "step": 8222 }, { "entropy": 1.659334381421407, "epoch": 0.9033533822196589, "grad_norm": 0.6099801659584045, "learning_rate": 1.2835534172113818e-05, "loss": 1.3683, "mean_token_accuracy": 0.6715717862049738, "num_tokens": 1378074075.0, "step": 8223 }, { "entropy": 1.7113625705242157, "epoch": 0.9034632391310319, "grad_norm": 0.8202866911888123, "learning_rate": 1.2833966667302507e-05, "loss": 1.3022, "mean_token_accuracy": 0.6690480063358942, "num_tokens": 1378201013.0, "step": 8224 }, { "entropy": 1.6659850974877675, "epoch": 0.9035730960424048, "grad_norm": 0.6271844506263733, "learning_rate": 1.283239910444698e-05, "loss": 1.3199, "mean_token_accuracy": 0.659883846839269, "num_tokens": 1378393507.0, "step": 8225 }, { "entropy": 1.6229958931605022, "epoch": 0.9036829529537777, "grad_norm": 0.6993024945259094, "learning_rate": 1.2830831483596843e-05, "loss": 1.3524, "mean_token_accuracy": 0.6596012363831202, "num_tokens": 1378561370.0, "step": 8226 }, { "entropy": 1.7114948133627574, "epoch": 0.9037928098651506, "grad_norm": 0.7609866857528687, "learning_rate": 1.2829263804801717e-05, "loss": 1.2955, "mean_token_accuracy": 0.6641099601984024, "num_tokens": 1378671487.0, "step": 8227 }, { "entropy": 1.6923895478248596, "epoch": 0.9039026667765235, "grad_norm": 0.6936889290809631, "learning_rate": 1.2827696068111215e-05, "loss": 1.3362, "mean_token_accuracy": 0.6670081863800684, "num_tokens": 1378865187.0, "step": 8228 }, { "entropy": 1.6573287546634674, "epoch": 0.9040125236878965, "grad_norm": 0.768925666809082, "learning_rate": 1.2826128273574956e-05, "loss": 1.2579, "mean_token_accuracy": 0.6781423836946487, "num_tokens": 1378998735.0, "step": 8229 }, { "entropy": 1.7605082790056865, "epoch": 0.9041223805992694, "grad_norm": 0.8298249244689941, "learning_rate": 1.2824560421242561e-05, "loss": 1.3718, "mean_token_accuracy": 0.6664842814207077, "num_tokens": 1379115376.0, "step": 8230 }, { "entropy": 1.7072460353374481, "epoch": 0.9042322375106424, "grad_norm": 0.6020426154136658, "learning_rate": 1.282299251116365e-05, "loss": 1.4243, "mean_token_accuracy": 0.6609440296888351, "num_tokens": 1379263910.0, "step": 8231 }, { "entropy": 1.684839407602946, "epoch": 0.9043420944220153, "grad_norm": 0.6894782185554504, "learning_rate": 1.2821424543387847e-05, "loss": 1.2821, "mean_token_accuracy": 0.6726427723964056, "num_tokens": 1379417480.0, "step": 8232 }, { "entropy": 1.6503340899944305, "epoch": 0.9044519513333883, "grad_norm": 0.5822688937187195, "learning_rate": 1.281985651796478e-05, "loss": 1.4296, "mean_token_accuracy": 0.649740070104599, "num_tokens": 1379665136.0, "step": 8233 }, { "entropy": 1.6690570612748463, "epoch": 0.9045618082447612, "grad_norm": 0.6308638453483582, "learning_rate": 1.2818288434944072e-05, "loss": 1.3888, "mean_token_accuracy": 0.6529064277807871, "num_tokens": 1379883176.0, "step": 8234 }, { "entropy": 1.7349829475084941, "epoch": 0.9046716651561342, "grad_norm": 0.6335077881813049, "learning_rate": 1.2816720294375356e-05, "loss": 1.5175, "mean_token_accuracy": 0.6491954425970713, "num_tokens": 1380137225.0, "step": 8235 }, { "entropy": 1.754395325978597, "epoch": 0.9047815220675071, "grad_norm": 0.5818184018135071, "learning_rate": 1.281515209630826e-05, "loss": 1.4973, "mean_token_accuracy": 0.6262113898992538, "num_tokens": 1380356095.0, "step": 8236 }, { "entropy": 1.7043922344843547, "epoch": 0.9048913789788801, "grad_norm": 0.6699923276901245, "learning_rate": 1.281358384079242e-05, "loss": 1.4521, "mean_token_accuracy": 0.6527075817187628, "num_tokens": 1380514717.0, "step": 8237 }, { "entropy": 1.690778245528539, "epoch": 0.9050012358902529, "grad_norm": 0.6731590628623962, "learning_rate": 1.2812015527877468e-05, "loss": 1.3295, "mean_token_accuracy": 0.6692969848712286, "num_tokens": 1380638078.0, "step": 8238 }, { "entropy": 1.7137603163719177, "epoch": 0.9051110928016258, "grad_norm": 0.9683634042739868, "learning_rate": 1.281044715761304e-05, "loss": 1.0955, "mean_token_accuracy": 0.6890260974566141, "num_tokens": 1380809601.0, "step": 8239 }, { "entropy": 1.709503750006358, "epoch": 0.9052209497129988, "grad_norm": 0.6515042781829834, "learning_rate": 1.2808878730048776e-05, "loss": 1.3787, "mean_token_accuracy": 0.6661019821961721, "num_tokens": 1380961836.0, "step": 8240 }, { "entropy": 1.7124955157438915, "epoch": 0.9053308066243717, "grad_norm": 0.6768700480461121, "learning_rate": 1.2807310245234315e-05, "loss": 1.3232, "mean_token_accuracy": 0.6700140833854675, "num_tokens": 1381120067.0, "step": 8241 }, { "entropy": 1.7447414994239807, "epoch": 0.9054406635357447, "grad_norm": 0.6183107495307922, "learning_rate": 1.2805741703219298e-05, "loss": 1.394, "mean_token_accuracy": 0.6493855814139048, "num_tokens": 1381295366.0, "step": 8242 }, { "entropy": 1.6751858790715535, "epoch": 0.9055505204471176, "grad_norm": 0.7140679359436035, "learning_rate": 1.280417310405337e-05, "loss": 1.2097, "mean_token_accuracy": 0.678791751464208, "num_tokens": 1381414709.0, "step": 8243 }, { "entropy": 1.7946178317070007, "epoch": 0.9056603773584906, "grad_norm": 0.7667945027351379, "learning_rate": 1.280260444778618e-05, "loss": 1.6662, "mean_token_accuracy": 0.6264889935652415, "num_tokens": 1381601348.0, "step": 8244 }, { "entropy": 1.733892410993576, "epoch": 0.9057702342698635, "grad_norm": 5.535741806030273, "learning_rate": 1.2801035734467367e-05, "loss": 1.5474, "mean_token_accuracy": 0.6549767504135767, "num_tokens": 1381785359.0, "step": 8245 }, { "entropy": 1.6751560469468434, "epoch": 0.9058800911812365, "grad_norm": 0.6716073751449585, "learning_rate": 1.2799466964146588e-05, "loss": 1.2718, "mean_token_accuracy": 0.6745875130097071, "num_tokens": 1381912751.0, "step": 8246 }, { "entropy": 1.6946379244327545, "epoch": 0.9059899480926094, "grad_norm": 0.7089009284973145, "learning_rate": 1.2797898136873488e-05, "loss": 1.3159, "mean_token_accuracy": 0.6636594186226527, "num_tokens": 1382073482.0, "step": 8247 }, { "entropy": 1.6982887486616771, "epoch": 0.9060998050039824, "grad_norm": 0.7040889263153076, "learning_rate": 1.2796329252697723e-05, "loss": 1.4122, "mean_token_accuracy": 0.6548338035742441, "num_tokens": 1382261132.0, "step": 8248 }, { "entropy": 1.7026695410410564, "epoch": 0.9062096619153552, "grad_norm": 0.6209987998008728, "learning_rate": 1.2794760311668946e-05, "loss": 1.4881, "mean_token_accuracy": 0.6293542782465616, "num_tokens": 1382448970.0, "step": 8249 }, { "entropy": 1.7288841704527538, "epoch": 0.9063195188267282, "grad_norm": 0.6209704875946045, "learning_rate": 1.2793191313836815e-05, "loss": 1.3709, "mean_token_accuracy": 0.6561005115509033, "num_tokens": 1382661028.0, "step": 8250 }, { "entropy": 1.6416561702887218, "epoch": 0.9064293757381011, "grad_norm": 0.8076834678649902, "learning_rate": 1.2791622259250986e-05, "loss": 1.2379, "mean_token_accuracy": 0.6792215506235758, "num_tokens": 1382780205.0, "step": 8251 }, { "entropy": 1.6922811170419056, "epoch": 0.9065392326494741, "grad_norm": 0.5980085730552673, "learning_rate": 1.2790053147961119e-05, "loss": 1.3521, "mean_token_accuracy": 0.6647952993710836, "num_tokens": 1382952918.0, "step": 8252 }, { "entropy": 1.6755750874678295, "epoch": 0.906649089560847, "grad_norm": 0.7372617721557617, "learning_rate": 1.2788483980016878e-05, "loss": 1.367, "mean_token_accuracy": 0.6559625367323557, "num_tokens": 1383126488.0, "step": 8253 }, { "entropy": 1.7112425963083904, "epoch": 0.9067589464722199, "grad_norm": 0.634032666683197, "learning_rate": 1.2786914755467924e-05, "loss": 1.4346, "mean_token_accuracy": 0.6412641257047653, "num_tokens": 1383322709.0, "step": 8254 }, { "entropy": 1.6903300682703655, "epoch": 0.9068688033835929, "grad_norm": 0.6346539855003357, "learning_rate": 1.2785345474363922e-05, "loss": 1.457, "mean_token_accuracy": 0.6424979070822397, "num_tokens": 1383520924.0, "step": 8255 }, { "entropy": 1.7225427826245625, "epoch": 0.9069786602949658, "grad_norm": 0.8833540678024292, "learning_rate": 1.2783776136754544e-05, "loss": 1.1687, "mean_token_accuracy": 0.690707857410113, "num_tokens": 1383642044.0, "step": 8256 }, { "entropy": 1.660976231098175, "epoch": 0.9070885172063388, "grad_norm": 0.6287848353385925, "learning_rate": 1.2782206742689453e-05, "loss": 1.434, "mean_token_accuracy": 0.6516825159390768, "num_tokens": 1383861090.0, "step": 8257 }, { "entropy": 1.784266173839569, "epoch": 0.9071983741177116, "grad_norm": 0.7297434210777283, "learning_rate": 1.278063729221832e-05, "loss": 1.4181, "mean_token_accuracy": 0.6553197354078293, "num_tokens": 1384011860.0, "step": 8258 }, { "entropy": 1.704436033964157, "epoch": 0.9073082310290846, "grad_norm": 0.7265962362289429, "learning_rate": 1.2779067785390822e-05, "loss": 1.5012, "mean_token_accuracy": 0.6423581590255102, "num_tokens": 1384221197.0, "step": 8259 }, { "entropy": 1.7151046693325043, "epoch": 0.9074180879404575, "grad_norm": 0.6915941834449768, "learning_rate": 1.277749822225663e-05, "loss": 1.2919, "mean_token_accuracy": 0.6690166046222051, "num_tokens": 1384407824.0, "step": 8260 }, { "entropy": 1.71536985039711, "epoch": 0.9075279448518305, "grad_norm": 0.6239339709281921, "learning_rate": 1.2775928602865418e-05, "loss": 1.4164, "mean_token_accuracy": 0.646524965763092, "num_tokens": 1384587139.0, "step": 8261 }, { "entropy": 1.7479670147101085, "epoch": 0.9076378017632034, "grad_norm": 0.6657982468605042, "learning_rate": 1.2774358927266869e-05, "loss": 1.399, "mean_token_accuracy": 0.6403181304534277, "num_tokens": 1384776964.0, "step": 8262 }, { "entropy": 1.6458578010400136, "epoch": 0.9077476586745764, "grad_norm": 0.6543890833854675, "learning_rate": 1.2772789195510658e-05, "loss": 1.382, "mean_token_accuracy": 0.6550763497749964, "num_tokens": 1384967491.0, "step": 8263 }, { "entropy": 1.7436320980389912, "epoch": 0.9078575155859493, "grad_norm": 0.683189332485199, "learning_rate": 1.2771219407646465e-05, "loss": 1.4977, "mean_token_accuracy": 0.6435463974873225, "num_tokens": 1385186079.0, "step": 8264 }, { "entropy": 1.6882221698760986, "epoch": 0.9079673724973223, "grad_norm": 0.9108843803405762, "learning_rate": 1.2769649563723979e-05, "loss": 1.3097, "mean_token_accuracy": 0.6750803043444952, "num_tokens": 1385311026.0, "step": 8265 }, { "entropy": 1.7177577217419941, "epoch": 0.9080772294086952, "grad_norm": 0.681003987789154, "learning_rate": 1.276807966379288e-05, "loss": 1.2956, "mean_token_accuracy": 0.6633460720380148, "num_tokens": 1385507602.0, "step": 8266 }, { "entropy": 1.7337321539719899, "epoch": 0.9081870863200681, "grad_norm": 27.118099212646484, "learning_rate": 1.2766509707902856e-05, "loss": 1.382, "mean_token_accuracy": 0.6552288780609766, "num_tokens": 1385687771.0, "step": 8267 }, { "entropy": 1.7292255461215973, "epoch": 0.9082969432314411, "grad_norm": 0.722960352897644, "learning_rate": 1.27649396961036e-05, "loss": 1.4267, "mean_token_accuracy": 0.6583873132864634, "num_tokens": 1385851761.0, "step": 8268 }, { "entropy": 1.6824017763137817, "epoch": 0.9084068001428139, "grad_norm": 0.74224454164505, "learning_rate": 1.2763369628444793e-05, "loss": 1.4162, "mean_token_accuracy": 0.6561558942000071, "num_tokens": 1385995094.0, "step": 8269 }, { "entropy": 1.774072657028834, "epoch": 0.9085166570541869, "grad_norm": 0.6898522973060608, "learning_rate": 1.2761799504976133e-05, "loss": 1.6522, "mean_token_accuracy": 0.6376588419079781, "num_tokens": 1386198262.0, "step": 8270 }, { "entropy": 1.7188159724076588, "epoch": 0.9086265139655598, "grad_norm": 0.6473353505134583, "learning_rate": 1.2760229325747316e-05, "loss": 1.3344, "mean_token_accuracy": 0.6641741444667181, "num_tokens": 1386372553.0, "step": 8271 }, { "entropy": 1.6474638481934865, "epoch": 0.9087363708769328, "grad_norm": 0.6398204565048218, "learning_rate": 1.2758659090808032e-05, "loss": 1.4653, "mean_token_accuracy": 0.6392107456922531, "num_tokens": 1386547723.0, "step": 8272 }, { "entropy": 1.7262985209623973, "epoch": 0.9088462277883057, "grad_norm": 0.7074971199035645, "learning_rate": 1.2757088800207977e-05, "loss": 1.4144, "mean_token_accuracy": 0.6551804691553116, "num_tokens": 1386692276.0, "step": 8273 }, { "entropy": 1.7006110846996307, "epoch": 0.9089560846996787, "grad_norm": 0.7677414417266846, "learning_rate": 1.275551845399686e-05, "loss": 1.3142, "mean_token_accuracy": 0.6622739533583323, "num_tokens": 1386841086.0, "step": 8274 }, { "entropy": 1.727352688709895, "epoch": 0.9090659416110516, "grad_norm": 0.660779595375061, "learning_rate": 1.275394805222437e-05, "loss": 1.3278, "mean_token_accuracy": 0.6708957056204478, "num_tokens": 1386990369.0, "step": 8275 }, { "entropy": 1.6799138486385345, "epoch": 0.9091757985224246, "grad_norm": 0.6254904270172119, "learning_rate": 1.2752377594940215e-05, "loss": 1.337, "mean_token_accuracy": 0.6697245140870413, "num_tokens": 1387149561.0, "step": 8276 }, { "entropy": 1.7286913692951202, "epoch": 0.9092856554337975, "grad_norm": 0.6610144376754761, "learning_rate": 1.27508070821941e-05, "loss": 1.3642, "mean_token_accuracy": 0.6627133886019388, "num_tokens": 1387344643.0, "step": 8277 }, { "entropy": 1.764141748348872, "epoch": 0.9093955123451705, "grad_norm": 0.7539360523223877, "learning_rate": 1.2749236514035727e-05, "loss": 1.3591, "mean_token_accuracy": 0.6495264520247778, "num_tokens": 1387478751.0, "step": 8278 }, { "entropy": 1.7027659912904103, "epoch": 0.9095053692565434, "grad_norm": 0.6264234185218811, "learning_rate": 1.2747665890514808e-05, "loss": 1.3784, "mean_token_accuracy": 0.6494586914777756, "num_tokens": 1387632284.0, "step": 8279 }, { "entropy": 1.7547483344872792, "epoch": 0.9096152261679162, "grad_norm": 0.8345460295677185, "learning_rate": 1.2746095211681053e-05, "loss": 1.3302, "mean_token_accuracy": 0.6681412657101949, "num_tokens": 1387757487.0, "step": 8280 }, { "entropy": 1.6923251152038574, "epoch": 0.9097250830792892, "grad_norm": 0.6497990489006042, "learning_rate": 1.2744524477584171e-05, "loss": 1.4038, "mean_token_accuracy": 0.6624845961729685, "num_tokens": 1387931911.0, "step": 8281 }, { "entropy": 1.682264655828476, "epoch": 0.9098349399906621, "grad_norm": 1.6480847597122192, "learning_rate": 1.2742953688273877e-05, "loss": 1.2411, "mean_token_accuracy": 0.6727783133586248, "num_tokens": 1388125678.0, "step": 8282 }, { "entropy": 1.6904015044371288, "epoch": 0.9099447969020351, "grad_norm": 0.67786705493927, "learning_rate": 1.2741382843799879e-05, "loss": 1.3375, "mean_token_accuracy": 0.6675763030846914, "num_tokens": 1388267967.0, "step": 8283 }, { "entropy": 1.7203630308310192, "epoch": 0.910054653813408, "grad_norm": 0.7089915871620178, "learning_rate": 1.2739811944211902e-05, "loss": 1.5562, "mean_token_accuracy": 0.6400385747353236, "num_tokens": 1388461097.0, "step": 8284 }, { "entropy": 1.6640840868155162, "epoch": 0.910164510724781, "grad_norm": 0.7004643678665161, "learning_rate": 1.273824098955966e-05, "loss": 1.2948, "mean_token_accuracy": 0.6744209975004196, "num_tokens": 1388587063.0, "step": 8285 }, { "entropy": 1.7536252836386363, "epoch": 0.9102743676361539, "grad_norm": 0.7096135020256042, "learning_rate": 1.2736669979892874e-05, "loss": 1.5139, "mean_token_accuracy": 0.6507594784100851, "num_tokens": 1388788368.0, "step": 8286 }, { "entropy": 1.7375941177209218, "epoch": 0.9103842245475269, "grad_norm": 0.6821257472038269, "learning_rate": 1.2735098915261264e-05, "loss": 1.348, "mean_token_accuracy": 0.6525353888670603, "num_tokens": 1388976824.0, "step": 8287 }, { "entropy": 1.7107720772425334, "epoch": 0.9104940814588998, "grad_norm": 0.717570960521698, "learning_rate": 1.2733527795714558e-05, "loss": 1.4769, "mean_token_accuracy": 0.6409533818562826, "num_tokens": 1389192824.0, "step": 8288 }, { "entropy": 1.7379231850306194, "epoch": 0.9106039383702728, "grad_norm": 0.8190452456474304, "learning_rate": 1.2731956621302477e-05, "loss": 1.4556, "mean_token_accuracy": 0.6462369511524836, "num_tokens": 1389373761.0, "step": 8289 }, { "entropy": 1.7158324718475342, "epoch": 0.9107137952816456, "grad_norm": 0.7872406840324402, "learning_rate": 1.2730385392074751e-05, "loss": 1.63, "mean_token_accuracy": 0.6238044649362564, "num_tokens": 1389557573.0, "step": 8290 }, { "entropy": 1.7453622718652089, "epoch": 0.9108236521930186, "grad_norm": 0.7404714226722717, "learning_rate": 1.2728814108081105e-05, "loss": 1.2521, "mean_token_accuracy": 0.6764761656522751, "num_tokens": 1389670922.0, "step": 8291 }, { "entropy": 1.7301382223765056, "epoch": 0.9109335091043915, "grad_norm": 0.6327300071716309, "learning_rate": 1.272724276937127e-05, "loss": 1.3361, "mean_token_accuracy": 0.6668089230855306, "num_tokens": 1389851239.0, "step": 8292 }, { "entropy": 1.719651500384013, "epoch": 0.9110433660157645, "grad_norm": 0.564914882183075, "learning_rate": 1.2725671375994984e-05, "loss": 1.4253, "mean_token_accuracy": 0.6573879073063532, "num_tokens": 1390084857.0, "step": 8293 }, { "entropy": 1.695727248986562, "epoch": 0.9111532229271374, "grad_norm": 0.6507130861282349, "learning_rate": 1.2724099928001977e-05, "loss": 1.3804, "mean_token_accuracy": 0.6489260047674179, "num_tokens": 1390242291.0, "step": 8294 }, { "entropy": 1.7323060234387715, "epoch": 0.9112630798385103, "grad_norm": 0.6349548101425171, "learning_rate": 1.2722528425441978e-05, "loss": 1.4971, "mean_token_accuracy": 0.6366077115138372, "num_tokens": 1390456782.0, "step": 8295 }, { "entropy": 1.6607350210348766, "epoch": 0.9113729367498833, "grad_norm": 0.6189599633216858, "learning_rate": 1.2720956868364737e-05, "loss": 1.3644, "mean_token_accuracy": 0.6537392934163412, "num_tokens": 1390624854.0, "step": 8296 }, { "entropy": 1.676759531100591, "epoch": 0.9114827936612562, "grad_norm": 0.682950496673584, "learning_rate": 1.2719385256819983e-05, "loss": 1.2863, "mean_token_accuracy": 0.6645957181851069, "num_tokens": 1390765067.0, "step": 8297 }, { "entropy": 1.7330308457215626, "epoch": 0.9115926505726292, "grad_norm": 0.7536049485206604, "learning_rate": 1.2717813590857462e-05, "loss": 1.4905, "mean_token_accuracy": 0.6402916212876638, "num_tokens": 1390972898.0, "step": 8298 }, { "entropy": 1.706084320942561, "epoch": 0.9117025074840021, "grad_norm": 0.61712247133255, "learning_rate": 1.2716241870526913e-05, "loss": 1.412, "mean_token_accuracy": 0.6601520677407583, "num_tokens": 1391173481.0, "step": 8299 }, { "entropy": 1.7238514224688213, "epoch": 0.911812364395375, "grad_norm": 0.7173047661781311, "learning_rate": 1.2714670095878085e-05, "loss": 1.3898, "mean_token_accuracy": 0.6515658646821976, "num_tokens": 1391342776.0, "step": 8300 }, { "entropy": 1.6747208436330159, "epoch": 0.9119222213067479, "grad_norm": 0.6746057868003845, "learning_rate": 1.2713098266960717e-05, "loss": 1.4247, "mean_token_accuracy": 0.6679030358791351, "num_tokens": 1391480315.0, "step": 8301 }, { "entropy": 1.70952441294988, "epoch": 0.9120320782181209, "grad_norm": 0.6282344460487366, "learning_rate": 1.2711526383824567e-05, "loss": 1.3414, "mean_token_accuracy": 0.6575956245263418, "num_tokens": 1391643523.0, "step": 8302 }, { "entropy": 1.7618720829486847, "epoch": 0.9121419351294938, "grad_norm": 0.6925609707832336, "learning_rate": 1.2709954446519372e-05, "loss": 1.3791, "mean_token_accuracy": 0.6538346409797668, "num_tokens": 1391809693.0, "step": 8303 }, { "entropy": 1.7483843763669331, "epoch": 0.9122517920408668, "grad_norm": 0.807517945766449, "learning_rate": 1.2708382455094893e-05, "loss": 1.4168, "mean_token_accuracy": 0.6449888050556183, "num_tokens": 1391968941.0, "step": 8304 }, { "entropy": 1.6495538353919983, "epoch": 0.9123616489522397, "grad_norm": 0.5905596017837524, "learning_rate": 1.2706810409600877e-05, "loss": 1.4587, "mean_token_accuracy": 0.6530623485644659, "num_tokens": 1392185953.0, "step": 8305 }, { "entropy": 1.638418326775233, "epoch": 0.9124715058636127, "grad_norm": 0.7588011622428894, "learning_rate": 1.2705238310087082e-05, "loss": 1.4184, "mean_token_accuracy": 0.659953753153483, "num_tokens": 1392375019.0, "step": 8306 }, { "entropy": 1.7082325716813405, "epoch": 0.9125813627749856, "grad_norm": 0.586554229259491, "learning_rate": 1.270366615660326e-05, "loss": 1.4373, "mean_token_accuracy": 0.6499194204807281, "num_tokens": 1392615284.0, "step": 8307 }, { "entropy": 1.7017434040705364, "epoch": 0.9126912196863585, "grad_norm": 0.7278909683227539, "learning_rate": 1.2702093949199177e-05, "loss": 1.4675, "mean_token_accuracy": 0.6527448892593384, "num_tokens": 1392790159.0, "step": 8308 }, { "entropy": 1.6655905544757843, "epoch": 0.9128010765977315, "grad_norm": 1.0901970863342285, "learning_rate": 1.2700521687924583e-05, "loss": 1.3658, "mean_token_accuracy": 0.6630384723345438, "num_tokens": 1392957262.0, "step": 8309 }, { "entropy": 1.7326476275920868, "epoch": 0.9129109335091043, "grad_norm": 0.6388905048370361, "learning_rate": 1.2698949372829248e-05, "loss": 1.3687, "mean_token_accuracy": 0.6470983376105627, "num_tokens": 1393116396.0, "step": 8310 }, { "entropy": 1.6326172451178234, "epoch": 0.9130207904204773, "grad_norm": 0.7294114828109741, "learning_rate": 1.2697377003962925e-05, "loss": 1.3318, "mean_token_accuracy": 0.6786777973175049, "num_tokens": 1393326810.0, "step": 8311 }, { "entropy": 1.6858830153942108, "epoch": 0.9131306473318502, "grad_norm": 0.7388503551483154, "learning_rate": 1.269580458137539e-05, "loss": 1.4234, "mean_token_accuracy": 0.6717520157496134, "num_tokens": 1393522621.0, "step": 8312 }, { "entropy": 1.7482396562894185, "epoch": 0.9132405042432232, "grad_norm": 0.9013400077819824, "learning_rate": 1.2694232105116401e-05, "loss": 1.5036, "mean_token_accuracy": 0.6550756047169367, "num_tokens": 1393690191.0, "step": 8313 }, { "entropy": 1.7518030802408855, "epoch": 0.9133503611545961, "grad_norm": 0.6710026264190674, "learning_rate": 1.269265957523573e-05, "loss": 1.6336, "mean_token_accuracy": 0.629910779496034, "num_tokens": 1393926208.0, "step": 8314 }, { "entropy": 1.7153498927752178, "epoch": 0.9134602180659691, "grad_norm": 0.6707825064659119, "learning_rate": 1.2691086991783147e-05, "loss": 1.6033, "mean_token_accuracy": 0.6374608427286148, "num_tokens": 1394147685.0, "step": 8315 }, { "entropy": 1.7012030879656475, "epoch": 0.913570074977342, "grad_norm": 0.6023955345153809, "learning_rate": 1.2689514354808425e-05, "loss": 1.4334, "mean_token_accuracy": 0.6374744673569998, "num_tokens": 1394333296.0, "step": 8316 }, { "entropy": 1.7281060020128887, "epoch": 0.913679931888715, "grad_norm": 0.714945912361145, "learning_rate": 1.268794166436133e-05, "loss": 1.5645, "mean_token_accuracy": 0.6323638061682383, "num_tokens": 1394552789.0, "step": 8317 }, { "entropy": 1.6209270258744557, "epoch": 0.9137897888000879, "grad_norm": 0.6666091084480286, "learning_rate": 1.2686368920491648e-05, "loss": 1.2327, "mean_token_accuracy": 0.6780135631561279, "num_tokens": 1394674019.0, "step": 8318 }, { "entropy": 1.736515998840332, "epoch": 0.9138996457114609, "grad_norm": 0.7664490938186646, "learning_rate": 1.2684796123249145e-05, "loss": 1.3157, "mean_token_accuracy": 0.6623199184735616, "num_tokens": 1394823043.0, "step": 8319 }, { "entropy": 1.6709323624769847, "epoch": 0.9140095026228338, "grad_norm": 0.6247878670692444, "learning_rate": 1.2683223272683604e-05, "loss": 1.3596, "mean_token_accuracy": 0.6630469262599945, "num_tokens": 1395012780.0, "step": 8320 }, { "entropy": 1.7542273203531902, "epoch": 0.9141193595342066, "grad_norm": 0.6201014518737793, "learning_rate": 1.2681650368844804e-05, "loss": 1.5317, "mean_token_accuracy": 0.6245150317748388, "num_tokens": 1395180061.0, "step": 8321 }, { "entropy": 1.7659225364526112, "epoch": 0.9142292164455796, "grad_norm": 0.8412492275238037, "learning_rate": 1.2680077411782533e-05, "loss": 1.4629, "mean_token_accuracy": 0.6547584036986033, "num_tokens": 1395322290.0, "step": 8322 }, { "entropy": 1.7514819204807281, "epoch": 0.9143390733569525, "grad_norm": 0.6405079960823059, "learning_rate": 1.2678504401546563e-05, "loss": 1.4131, "mean_token_accuracy": 0.6564811915159225, "num_tokens": 1395491553.0, "step": 8323 }, { "entropy": 1.692853420972824, "epoch": 0.9144489302683255, "grad_norm": 0.7179692387580872, "learning_rate": 1.2676931338186688e-05, "loss": 1.3103, "mean_token_accuracy": 0.6628242333730062, "num_tokens": 1395638800.0, "step": 8324 }, { "entropy": 1.7319222788016002, "epoch": 0.9145587871796984, "grad_norm": 0.621341347694397, "learning_rate": 1.2675358221752691e-05, "loss": 1.3553, "mean_token_accuracy": 0.6674741456906, "num_tokens": 1395825432.0, "step": 8325 }, { "entropy": 1.7467030783494313, "epoch": 0.9146686440910714, "grad_norm": 0.712310791015625, "learning_rate": 1.2673785052294364e-05, "loss": 1.4939, "mean_token_accuracy": 0.6631985902786255, "num_tokens": 1395995766.0, "step": 8326 }, { "entropy": 1.6573481957117717, "epoch": 0.9147785010024443, "grad_norm": 0.6593137979507446, "learning_rate": 1.267221182986149e-05, "loss": 1.3545, "mean_token_accuracy": 0.6750344733397166, "num_tokens": 1396168861.0, "step": 8327 }, { "entropy": 1.6669391791025798, "epoch": 0.9148883579138173, "grad_norm": 0.58598792552948, "learning_rate": 1.2670638554503867e-05, "loss": 1.3189, "mean_token_accuracy": 0.6607841104269028, "num_tokens": 1396310664.0, "step": 8328 }, { "entropy": 1.7750846942265828, "epoch": 0.9149982148251902, "grad_norm": 0.6623988747596741, "learning_rate": 1.2669065226271284e-05, "loss": 1.4966, "mean_token_accuracy": 0.6353505253791809, "num_tokens": 1396515959.0, "step": 8329 }, { "entropy": 1.6843581199645996, "epoch": 0.9151080717365632, "grad_norm": 0.6799822449684143, "learning_rate": 1.2667491845213545e-05, "loss": 1.4068, "mean_token_accuracy": 0.6600462645292282, "num_tokens": 1396680133.0, "step": 8330 }, { "entropy": 1.6392850279808044, "epoch": 0.915217928647936, "grad_norm": 0.5755462050437927, "learning_rate": 1.2665918411380434e-05, "loss": 1.38, "mean_token_accuracy": 0.6573647956053416, "num_tokens": 1396852839.0, "step": 8331 }, { "entropy": 1.6700959205627441, "epoch": 0.915327785559309, "grad_norm": 0.6844523549079895, "learning_rate": 1.2664344924821758e-05, "loss": 1.4268, "mean_token_accuracy": 0.6432247956593832, "num_tokens": 1397067247.0, "step": 8332 }, { "entropy": 1.8182924290498097, "epoch": 0.9154376424706819, "grad_norm": 0.6894211173057556, "learning_rate": 1.2662771385587317e-05, "loss": 1.3992, "mean_token_accuracy": 0.649507686495781, "num_tokens": 1397187287.0, "step": 8333 }, { "entropy": 1.7245789070924122, "epoch": 0.9155474993820548, "grad_norm": 0.7190035581588745, "learning_rate": 1.266119779372691e-05, "loss": 1.5355, "mean_token_accuracy": 0.6330128163099289, "num_tokens": 1397345435.0, "step": 8334 }, { "entropy": 1.71918981273969, "epoch": 0.9156573562934278, "grad_norm": 0.782472550868988, "learning_rate": 1.2659624149290337e-05, "loss": 1.4884, "mean_token_accuracy": 0.6548773149649302, "num_tokens": 1397524068.0, "step": 8335 }, { "entropy": 1.7045840620994568, "epoch": 0.9157672132048007, "grad_norm": 0.6515925526618958, "learning_rate": 1.2658050452327415e-05, "loss": 1.3979, "mean_token_accuracy": 0.6536910384893417, "num_tokens": 1397679482.0, "step": 8336 }, { "entropy": 1.6657692591349285, "epoch": 0.9158770701161737, "grad_norm": 0.6342126131057739, "learning_rate": 1.2656476702887939e-05, "loss": 1.3015, "mean_token_accuracy": 0.6769297222296397, "num_tokens": 1397858550.0, "step": 8337 }, { "entropy": 1.7451824148495991, "epoch": 0.9159869270275466, "grad_norm": 0.7401023507118225, "learning_rate": 1.2654902901021725e-05, "loss": 1.2909, "mean_token_accuracy": 0.6768914808829626, "num_tokens": 1397963039.0, "step": 8338 }, { "entropy": 1.7285672624905903, "epoch": 0.9160967839389196, "grad_norm": 0.727428138256073, "learning_rate": 1.2653329046778576e-05, "loss": 1.4568, "mean_token_accuracy": 0.665610060095787, "num_tokens": 1398148299.0, "step": 8339 }, { "entropy": 1.7707529664039612, "epoch": 0.9162066408502925, "grad_norm": 0.8654743432998657, "learning_rate": 1.265175514020831e-05, "loss": 1.3726, "mean_token_accuracy": 0.6515340109666189, "num_tokens": 1398290632.0, "step": 8340 }, { "entropy": 1.7380510866641998, "epoch": 0.9163164977616655, "grad_norm": 0.6711382269859314, "learning_rate": 1.2650181181360734e-05, "loss": 1.3634, "mean_token_accuracy": 0.6581040819485983, "num_tokens": 1398430925.0, "step": 8341 }, { "entropy": 1.734023739894231, "epoch": 0.9164263546730383, "grad_norm": 0.6661319732666016, "learning_rate": 1.2648607170285671e-05, "loss": 1.3642, "mean_token_accuracy": 0.659072607755661, "num_tokens": 1398612350.0, "step": 8342 }, { "entropy": 1.70571368932724, "epoch": 0.9165362115844113, "grad_norm": 0.6126469373703003, "learning_rate": 1.2647033107032936e-05, "loss": 1.5186, "mean_token_accuracy": 0.6387844830751419, "num_tokens": 1398842688.0, "step": 8343 }, { "entropy": 1.6804417272408803, "epoch": 0.9166460684957842, "grad_norm": 0.5771108865737915, "learning_rate": 1.2645458991652342e-05, "loss": 1.2489, "mean_token_accuracy": 0.6804888198773066, "num_tokens": 1398998228.0, "step": 8344 }, { "entropy": 1.741776704788208, "epoch": 0.9167559254071572, "grad_norm": 0.6767376065254211, "learning_rate": 1.264388482419371e-05, "loss": 1.3365, "mean_token_accuracy": 0.6583043287197748, "num_tokens": 1399121579.0, "step": 8345 }, { "entropy": 1.6522979438304901, "epoch": 0.9168657823185301, "grad_norm": 0.7042422294616699, "learning_rate": 1.2642310604706868e-05, "loss": 1.2792, "mean_token_accuracy": 0.6725463817516962, "num_tokens": 1399288987.0, "step": 8346 }, { "entropy": 1.7250507672627766, "epoch": 0.9169756392299031, "grad_norm": 0.5657103657722473, "learning_rate": 1.2640736333241634e-05, "loss": 1.4876, "mean_token_accuracy": 0.6186676770448685, "num_tokens": 1399528479.0, "step": 8347 }, { "entropy": 1.7386068999767303, "epoch": 0.917085496141276, "grad_norm": 0.638064444065094, "learning_rate": 1.2639162009847836e-05, "loss": 1.4766, "mean_token_accuracy": 0.6609574755032858, "num_tokens": 1399691992.0, "step": 8348 }, { "entropy": 1.7201377550760906, "epoch": 0.9171953530526489, "grad_norm": 0.7226953506469727, "learning_rate": 1.2637587634575297e-05, "loss": 1.4676, "mean_token_accuracy": 0.6478322297334671, "num_tokens": 1399958317.0, "step": 8349 }, { "entropy": 1.6978058218955994, "epoch": 0.9173052099640219, "grad_norm": 0.5818222761154175, "learning_rate": 1.2636013207473849e-05, "loss": 1.4523, "mean_token_accuracy": 0.6427704046169916, "num_tokens": 1400192788.0, "step": 8350 }, { "entropy": 1.6792748073736827, "epoch": 0.9174150668753948, "grad_norm": 0.5890861749649048, "learning_rate": 1.2634438728593319e-05, "loss": 1.3267, "mean_token_accuracy": 0.6607906967401505, "num_tokens": 1400378162.0, "step": 8351 }, { "entropy": 1.7638347347577412, "epoch": 0.9175249237867678, "grad_norm": 0.7139198184013367, "learning_rate": 1.263286419798354e-05, "loss": 1.3686, "mean_token_accuracy": 0.6581098288297653, "num_tokens": 1400548924.0, "step": 8352 }, { "entropy": 1.7151235540707905, "epoch": 0.9176347806981406, "grad_norm": 0.6000536680221558, "learning_rate": 1.2631289615694347e-05, "loss": 1.4582, "mean_token_accuracy": 0.6504177699486414, "num_tokens": 1400749113.0, "step": 8353 }, { "entropy": 1.727899005015691, "epoch": 0.9177446376095136, "grad_norm": 0.8415667414665222, "learning_rate": 1.262971498177557e-05, "loss": 1.3531, "mean_token_accuracy": 0.6797670821348826, "num_tokens": 1400926683.0, "step": 8354 }, { "entropy": 1.73043093085289, "epoch": 0.9178544945208865, "grad_norm": 0.7654650211334229, "learning_rate": 1.2628140296277049e-05, "loss": 1.5241, "mean_token_accuracy": 0.6519339581330618, "num_tokens": 1401120570.0, "step": 8355 }, { "entropy": 1.7113446791966755, "epoch": 0.9179643514322595, "grad_norm": 0.7105302810668945, "learning_rate": 1.2626565559248622e-05, "loss": 1.3434, "mean_token_accuracy": 0.6649382462104162, "num_tokens": 1401267560.0, "step": 8356 }, { "entropy": 1.7008299330870311, "epoch": 0.9180742083436324, "grad_norm": 0.5741628408432007, "learning_rate": 1.2624990770740123e-05, "loss": 1.3885, "mean_token_accuracy": 0.6473373621702194, "num_tokens": 1401462451.0, "step": 8357 }, { "entropy": 1.7151657938957214, "epoch": 0.9181840652550054, "grad_norm": 0.6269608736038208, "learning_rate": 1.2623415930801405e-05, "loss": 1.5253, "mean_token_accuracy": 0.6376299858093262, "num_tokens": 1401646187.0, "step": 8358 }, { "entropy": 1.7247498830159504, "epoch": 0.9182939221663783, "grad_norm": 0.7287866473197937, "learning_rate": 1.2621841039482303e-05, "loss": 1.2987, "mean_token_accuracy": 0.6703514059384664, "num_tokens": 1401771042.0, "step": 8359 }, { "entropy": 1.6538776357968648, "epoch": 0.9184037790777513, "grad_norm": 0.6443969011306763, "learning_rate": 1.2620266096832663e-05, "loss": 1.3982, "mean_token_accuracy": 0.6559311151504517, "num_tokens": 1401999604.0, "step": 8360 }, { "entropy": 1.7296237647533417, "epoch": 0.9185136359891242, "grad_norm": 0.8798031210899353, "learning_rate": 1.261869110290233e-05, "loss": 1.4636, "mean_token_accuracy": 0.6591440141201019, "num_tokens": 1402164177.0, "step": 8361 }, { "entropy": 1.712800492842992, "epoch": 0.918623492900497, "grad_norm": 0.7003832459449768, "learning_rate": 1.2617116057741152e-05, "loss": 1.3247, "mean_token_accuracy": 0.6562914500633875, "num_tokens": 1402297927.0, "step": 8362 }, { "entropy": 1.6861789226531982, "epoch": 0.91873334981187, "grad_norm": 0.6626542806625366, "learning_rate": 1.261554096139898e-05, "loss": 1.5779, "mean_token_accuracy": 0.6270742913087209, "num_tokens": 1402514465.0, "step": 8363 }, { "entropy": 1.7360760072867076, "epoch": 0.9188432067232429, "grad_norm": 0.8311833739280701, "learning_rate": 1.2613965813925666e-05, "loss": 1.3172, "mean_token_accuracy": 0.6613505631685257, "num_tokens": 1402618003.0, "step": 8364 }, { "entropy": 1.7023787597815196, "epoch": 0.9189530636346159, "grad_norm": 0.6417757868766785, "learning_rate": 1.261239061537106e-05, "loss": 1.3442, "mean_token_accuracy": 0.6616579592227936, "num_tokens": 1402818781.0, "step": 8365 }, { "entropy": 1.732607791821162, "epoch": 0.9190629205459888, "grad_norm": 0.6801357865333557, "learning_rate": 1.261081536578502e-05, "loss": 1.3579, "mean_token_accuracy": 0.6601632038752238, "num_tokens": 1402973852.0, "step": 8366 }, { "entropy": 1.6984934012095134, "epoch": 0.9191727774573618, "grad_norm": 0.6982681751251221, "learning_rate": 1.2609240065217396e-05, "loss": 1.4012, "mean_token_accuracy": 0.6618342697620392, "num_tokens": 1403131167.0, "step": 8367 }, { "entropy": 1.6751105586687725, "epoch": 0.9192826343687347, "grad_norm": 0.7063464522361755, "learning_rate": 1.260766471371805e-05, "loss": 1.256, "mean_token_accuracy": 0.6785000711679459, "num_tokens": 1403267829.0, "step": 8368 }, { "entropy": 1.7225077946980794, "epoch": 0.9193924912801077, "grad_norm": 0.6618905067443848, "learning_rate": 1.260608931133684e-05, "loss": 1.4058, "mean_token_accuracy": 0.6538095970948538, "num_tokens": 1403431242.0, "step": 8369 }, { "entropy": 1.6800328195095062, "epoch": 0.9195023481914806, "grad_norm": 0.6762068867683411, "learning_rate": 1.2604513858123629e-05, "loss": 1.4314, "mean_token_accuracy": 0.6620854735374451, "num_tokens": 1403605335.0, "step": 8370 }, { "entropy": 1.700233409802119, "epoch": 0.9196122051028536, "grad_norm": 0.6993805170059204, "learning_rate": 1.2602938354128276e-05, "loss": 1.5316, "mean_token_accuracy": 0.6513668298721313, "num_tokens": 1403787016.0, "step": 8371 }, { "entropy": 1.6806213955084484, "epoch": 0.9197220620142265, "grad_norm": 0.9308298826217651, "learning_rate": 1.2601362799400648e-05, "loss": 1.3191, "mean_token_accuracy": 0.6591364145278931, "num_tokens": 1403994148.0, "step": 8372 }, { "entropy": 1.6756692230701447, "epoch": 0.9198319189255995, "grad_norm": 0.6425023674964905, "learning_rate": 1.2599787193990605e-05, "loss": 1.2729, "mean_token_accuracy": 0.6760301639636358, "num_tokens": 1404117376.0, "step": 8373 }, { "entropy": 1.6797867914040883, "epoch": 0.9199417758369723, "grad_norm": 0.6142690777778625, "learning_rate": 1.2598211537948022e-05, "loss": 1.3994, "mean_token_accuracy": 0.6605397313833237, "num_tokens": 1404310009.0, "step": 8374 }, { "entropy": 1.6622991561889648, "epoch": 0.9200516327483452, "grad_norm": 0.7478998899459839, "learning_rate": 1.2596635831322761e-05, "loss": 1.5679, "mean_token_accuracy": 0.6572754432757696, "num_tokens": 1404484363.0, "step": 8375 }, { "entropy": 1.6799915730953217, "epoch": 0.9201614896597182, "grad_norm": 0.6949113607406616, "learning_rate": 1.2595060074164698e-05, "loss": 1.2796, "mean_token_accuracy": 0.6649729063113531, "num_tokens": 1404661640.0, "step": 8376 }, { "entropy": 1.7269630233446758, "epoch": 0.9202713465710911, "grad_norm": 0.7026051878929138, "learning_rate": 1.2593484266523701e-05, "loss": 1.3694, "mean_token_accuracy": 0.6497256408135096, "num_tokens": 1404821227.0, "step": 8377 }, { "entropy": 1.7466710011164348, "epoch": 0.9203812034824641, "grad_norm": 0.9665287733078003, "learning_rate": 1.2591908408449647e-05, "loss": 1.3825, "mean_token_accuracy": 0.6634558041890463, "num_tokens": 1404954222.0, "step": 8378 }, { "entropy": 1.6914934416611989, "epoch": 0.920491060393837, "grad_norm": 0.7472662925720215, "learning_rate": 1.2590332499992406e-05, "loss": 1.286, "mean_token_accuracy": 0.672187735637029, "num_tokens": 1405065197.0, "step": 8379 }, { "entropy": 1.6020830969015758, "epoch": 0.92060091730521, "grad_norm": 0.635012149810791, "learning_rate": 1.2588756541201861e-05, "loss": 1.2781, "mean_token_accuracy": 0.6772213528553644, "num_tokens": 1405219589.0, "step": 8380 }, { "entropy": 1.7316095232963562, "epoch": 0.9207107742165829, "grad_norm": 0.7221272587776184, "learning_rate": 1.2587180532127886e-05, "loss": 1.6001, "mean_token_accuracy": 0.6357069065173467, "num_tokens": 1405416476.0, "step": 8381 }, { "entropy": 1.7943975925445557, "epoch": 0.9208206311279559, "grad_norm": 0.77412348985672, "learning_rate": 1.258560447282036e-05, "loss": 1.5053, "mean_token_accuracy": 0.6331639190514883, "num_tokens": 1405601890.0, "step": 8382 }, { "entropy": 1.7066338161627452, "epoch": 0.9209304880393288, "grad_norm": 0.6691288352012634, "learning_rate": 1.2584028363329172e-05, "loss": 1.3199, "mean_token_accuracy": 0.6606237838665644, "num_tokens": 1405752844.0, "step": 8383 }, { "entropy": 1.713592956463496, "epoch": 0.9210403449507018, "grad_norm": 0.6735867857933044, "learning_rate": 1.2582452203704196e-05, "loss": 1.3706, "mean_token_accuracy": 0.6515240619579951, "num_tokens": 1405978966.0, "step": 8384 }, { "entropy": 1.6773227254549663, "epoch": 0.9211502018620746, "grad_norm": 0.6591025590896606, "learning_rate": 1.2580875993995324e-05, "loss": 1.3694, "mean_token_accuracy": 0.665874645113945, "num_tokens": 1406131263.0, "step": 8385 }, { "entropy": 1.7191874583562214, "epoch": 0.9212600587734476, "grad_norm": 0.6563531160354614, "learning_rate": 1.2579299734252435e-05, "loss": 1.6292, "mean_token_accuracy": 0.6292891552050909, "num_tokens": 1406323110.0, "step": 8386 }, { "entropy": 1.7289370795090993, "epoch": 0.9213699156848205, "grad_norm": 0.659126341342926, "learning_rate": 1.2577723424525425e-05, "loss": 1.2939, "mean_token_accuracy": 0.6766804109017054, "num_tokens": 1406466231.0, "step": 8387 }, { "entropy": 1.690926472345988, "epoch": 0.9214797725961934, "grad_norm": 0.7042950391769409, "learning_rate": 1.2576147064864177e-05, "loss": 1.4388, "mean_token_accuracy": 0.6502556999524435, "num_tokens": 1406626762.0, "step": 8388 }, { "entropy": 1.6028278172016144, "epoch": 0.9215896295075664, "grad_norm": 0.6516503691673279, "learning_rate": 1.2574570655318586e-05, "loss": 1.2367, "mean_token_accuracy": 0.6695101857185364, "num_tokens": 1406811606.0, "step": 8389 }, { "entropy": 1.642892986536026, "epoch": 0.9216994864189393, "grad_norm": 0.6159811615943909, "learning_rate": 1.2572994195938543e-05, "loss": 1.4283, "mean_token_accuracy": 0.6557525595029196, "num_tokens": 1406972657.0, "step": 8390 }, { "entropy": 1.7573666175206502, "epoch": 0.9218093433303123, "grad_norm": 0.6652973890304565, "learning_rate": 1.2571417686773942e-05, "loss": 1.354, "mean_token_accuracy": 0.6559064388275146, "num_tokens": 1407093499.0, "step": 8391 }, { "entropy": 1.7429068982601166, "epoch": 0.9219192002416852, "grad_norm": 0.7107627987861633, "learning_rate": 1.256984112787468e-05, "loss": 1.4118, "mean_token_accuracy": 0.6419812937577566, "num_tokens": 1407253457.0, "step": 8392 }, { "entropy": 1.7409753501415253, "epoch": 0.9220290571530582, "grad_norm": 0.7587690949440002, "learning_rate": 1.2568264519290654e-05, "loss": 1.3945, "mean_token_accuracy": 0.65499414006869, "num_tokens": 1407450974.0, "step": 8393 }, { "entropy": 1.7279663681983948, "epoch": 0.922138914064431, "grad_norm": 0.6342840194702148, "learning_rate": 1.2566687861071762e-05, "loss": 1.3915, "mean_token_accuracy": 0.660114253560702, "num_tokens": 1407606742.0, "step": 8394 }, { "entropy": 1.6986714601516724, "epoch": 0.922248770975804, "grad_norm": 0.6908047795295715, "learning_rate": 1.2565111153267904e-05, "loss": 1.5053, "mean_token_accuracy": 0.6442679464817047, "num_tokens": 1407769641.0, "step": 8395 }, { "entropy": 1.7808184325695038, "epoch": 0.9223586278871769, "grad_norm": 0.6698216199874878, "learning_rate": 1.2563534395928987e-05, "loss": 1.5286, "mean_token_accuracy": 0.642250527938207, "num_tokens": 1407933465.0, "step": 8396 }, { "entropy": 1.733227163553238, "epoch": 0.9224684847985499, "grad_norm": 0.7015756964683533, "learning_rate": 1.2561957589104908e-05, "loss": 1.2428, "mean_token_accuracy": 0.6744599491357803, "num_tokens": 1408037394.0, "step": 8397 }, { "entropy": 1.7421282827854156, "epoch": 0.9225783417099228, "grad_norm": 0.6361960768699646, "learning_rate": 1.2560380732845577e-05, "loss": 1.2879, "mean_token_accuracy": 0.6739509999752045, "num_tokens": 1408162511.0, "step": 8398 }, { "entropy": 1.7272718846797943, "epoch": 0.9226881986212958, "grad_norm": 0.8491674661636353, "learning_rate": 1.2558803827200896e-05, "loss": 1.4679, "mean_token_accuracy": 0.6472969353199005, "num_tokens": 1408337595.0, "step": 8399 }, { "entropy": 1.724097619454066, "epoch": 0.9227980555326687, "grad_norm": 0.6793266534805298, "learning_rate": 1.255722687222078e-05, "loss": 1.4947, "mean_token_accuracy": 0.6494862536589304, "num_tokens": 1408563108.0, "step": 8400 }, { "entropy": 1.734597235918045, "epoch": 0.9229079124440417, "grad_norm": 0.695158064365387, "learning_rate": 1.2555649867955128e-05, "loss": 1.4444, "mean_token_accuracy": 0.6569420943657557, "num_tokens": 1408721968.0, "step": 8401 }, { "entropy": 1.715299169222514, "epoch": 0.9230177693554146, "grad_norm": 0.7081811428070068, "learning_rate": 1.2554072814453865e-05, "loss": 1.4605, "mean_token_accuracy": 0.6608149409294128, "num_tokens": 1408873539.0, "step": 8402 }, { "entropy": 1.6958219408988953, "epoch": 0.9231276262667875, "grad_norm": 0.7010018825531006, "learning_rate": 1.2552495711766897e-05, "loss": 1.2639, "mean_token_accuracy": 0.6718494196732839, "num_tokens": 1409024445.0, "step": 8403 }, { "entropy": 1.6861001054445903, "epoch": 0.9232374831781605, "grad_norm": 0.8214607834815979, "learning_rate": 1.2550918559944138e-05, "loss": 1.3134, "mean_token_accuracy": 0.6580562740564346, "num_tokens": 1409185013.0, "step": 8404 }, { "entropy": 1.6605658928553264, "epoch": 0.9233473400895333, "grad_norm": 0.6449036598205566, "learning_rate": 1.2549341359035507e-05, "loss": 1.4082, "mean_token_accuracy": 0.6663586348295212, "num_tokens": 1409335022.0, "step": 8405 }, { "entropy": 1.763859748840332, "epoch": 0.9234571970009063, "grad_norm": 0.7659093141555786, "learning_rate": 1.254776410909092e-05, "loss": 1.3732, "mean_token_accuracy": 0.6532298525174459, "num_tokens": 1409453428.0, "step": 8406 }, { "entropy": 1.696772535641988, "epoch": 0.9235670539122792, "grad_norm": 0.7647340893745422, "learning_rate": 1.2546186810160294e-05, "loss": 1.3433, "mean_token_accuracy": 0.667681892712911, "num_tokens": 1409572864.0, "step": 8407 }, { "entropy": 1.7054913242657979, "epoch": 0.9236769108236522, "grad_norm": 0.6106885075569153, "learning_rate": 1.2544609462293555e-05, "loss": 1.4098, "mean_token_accuracy": 0.6489782730738322, "num_tokens": 1409739736.0, "step": 8408 }, { "entropy": 1.7002596755822499, "epoch": 0.9237867677350251, "grad_norm": 0.599602997303009, "learning_rate": 1.2543032065540622e-05, "loss": 1.343, "mean_token_accuracy": 0.6687337110439936, "num_tokens": 1409932692.0, "step": 8409 }, { "entropy": 1.6986660559972127, "epoch": 0.9238966246463981, "grad_norm": 0.6248365640640259, "learning_rate": 1.2541454619951416e-05, "loss": 1.391, "mean_token_accuracy": 0.6515335639317831, "num_tokens": 1410120104.0, "step": 8410 }, { "entropy": 1.686285485823949, "epoch": 0.924006481557771, "grad_norm": 0.7016609311103821, "learning_rate": 1.253987712557587e-05, "loss": 1.358, "mean_token_accuracy": 0.6637587447961172, "num_tokens": 1410281992.0, "step": 8411 }, { "entropy": 1.740788499514262, "epoch": 0.924116338469144, "grad_norm": 0.7246219515800476, "learning_rate": 1.2538299582463906e-05, "loss": 1.4015, "mean_token_accuracy": 0.6567526757717133, "num_tokens": 1410440576.0, "step": 8412 }, { "entropy": 1.7054814994335175, "epoch": 0.9242261953805169, "grad_norm": 0.7429696917533875, "learning_rate": 1.253672199066545e-05, "loss": 1.4615, "mean_token_accuracy": 0.6404254684845606, "num_tokens": 1410634470.0, "step": 8413 }, { "entropy": 1.6572390894095104, "epoch": 0.9243360522918899, "grad_norm": 0.713073194026947, "learning_rate": 1.2535144350230441e-05, "loss": 1.345, "mean_token_accuracy": 0.6785429567098618, "num_tokens": 1410752529.0, "step": 8414 }, { "entropy": 1.6576418578624725, "epoch": 0.9244459092032628, "grad_norm": 0.5704638361930847, "learning_rate": 1.2533566661208803e-05, "loss": 1.3617, "mean_token_accuracy": 0.6624558568000793, "num_tokens": 1410940518.0, "step": 8415 }, { "entropy": 1.6944686770439148, "epoch": 0.9245557661146356, "grad_norm": 0.7132385969161987, "learning_rate": 1.2531988923650469e-05, "loss": 1.3251, "mean_token_accuracy": 0.6656810740629832, "num_tokens": 1411091487.0, "step": 8416 }, { "entropy": 1.6068796714146931, "epoch": 0.9246656230260086, "grad_norm": 0.5596727132797241, "learning_rate": 1.2530411137605376e-05, "loss": 1.3988, "mean_token_accuracy": 0.651663064956665, "num_tokens": 1411349098.0, "step": 8417 }, { "entropy": 1.6926358838876088, "epoch": 0.9247754799373815, "grad_norm": 0.6509303450584412, "learning_rate": 1.2528833303123464e-05, "loss": 1.4295, "mean_token_accuracy": 0.6421109537283579, "num_tokens": 1411553106.0, "step": 8418 }, { "entropy": 1.6756529609362285, "epoch": 0.9248853368487545, "grad_norm": 0.7943989038467407, "learning_rate": 1.2527255420254663e-05, "loss": 1.4821, "mean_token_accuracy": 0.6490869422753652, "num_tokens": 1411745801.0, "step": 8419 }, { "entropy": 1.6821991205215454, "epoch": 0.9249951937601274, "grad_norm": 0.6343573331832886, "learning_rate": 1.2525677489048919e-05, "loss": 1.3901, "mean_token_accuracy": 0.6549980839093527, "num_tokens": 1411971218.0, "step": 8420 }, { "entropy": 1.7167380253473918, "epoch": 0.9251050506715004, "grad_norm": 0.6473302841186523, "learning_rate": 1.252409950955617e-05, "loss": 1.2781, "mean_token_accuracy": 0.6656585186719894, "num_tokens": 1412093664.0, "step": 8421 }, { "entropy": 1.6299481391906738, "epoch": 0.9252149075828733, "grad_norm": 0.5957537293434143, "learning_rate": 1.2522521481826355e-05, "loss": 1.5059, "mean_token_accuracy": 0.6492985685666403, "num_tokens": 1412312361.0, "step": 8422 }, { "entropy": 1.6707092622915904, "epoch": 0.9253247644942463, "grad_norm": 0.6398028135299683, "learning_rate": 1.2520943405909423e-05, "loss": 1.3445, "mean_token_accuracy": 0.6681879659493765, "num_tokens": 1412486757.0, "step": 8423 }, { "entropy": 1.6841243704160054, "epoch": 0.9254346214056192, "grad_norm": 0.6823419332504272, "learning_rate": 1.251936528185532e-05, "loss": 1.4074, "mean_token_accuracy": 0.6703370014826456, "num_tokens": 1412624400.0, "step": 8424 }, { "entropy": 1.7717651029427846, "epoch": 0.9255444783169922, "grad_norm": 0.699639618396759, "learning_rate": 1.2517787109713986e-05, "loss": 1.4845, "mean_token_accuracy": 0.6359593768914541, "num_tokens": 1412807209.0, "step": 8425 }, { "entropy": 1.7056340873241425, "epoch": 0.925654335228365, "grad_norm": 0.7243174314498901, "learning_rate": 1.2516208889535377e-05, "loss": 1.4097, "mean_token_accuracy": 0.668630967537562, "num_tokens": 1412947855.0, "step": 8426 }, { "entropy": 1.73410764336586, "epoch": 0.925764192139738, "grad_norm": 0.764216959476471, "learning_rate": 1.2514630621369437e-05, "loss": 1.368, "mean_token_accuracy": 0.670150876045227, "num_tokens": 1413110693.0, "step": 8427 }, { "entropy": 1.7153330743312836, "epoch": 0.9258740490511109, "grad_norm": 0.6585131287574768, "learning_rate": 1.2513052305266123e-05, "loss": 1.2796, "mean_token_accuracy": 0.6731938471396764, "num_tokens": 1413242626.0, "step": 8428 }, { "entropy": 1.7023490965366364, "epoch": 0.9259839059624838, "grad_norm": 0.6882517337799072, "learning_rate": 1.2511473941275385e-05, "loss": 1.2992, "mean_token_accuracy": 0.6625126004219055, "num_tokens": 1413400164.0, "step": 8429 }, { "entropy": 1.705989311138789, "epoch": 0.9260937628738568, "grad_norm": 0.8120949864387512, "learning_rate": 1.2509895529447178e-05, "loss": 1.3778, "mean_token_accuracy": 0.6520531823237737, "num_tokens": 1413522941.0, "step": 8430 }, { "entropy": 1.7116830845673878, "epoch": 0.9262036197852297, "grad_norm": 0.7705047130584717, "learning_rate": 1.250831706983146e-05, "loss": 1.2424, "mean_token_accuracy": 0.681115910410881, "num_tokens": 1413645220.0, "step": 8431 }, { "entropy": 1.7691873808701832, "epoch": 0.9263134766966027, "grad_norm": 0.7861184477806091, "learning_rate": 1.250673856247818e-05, "loss": 1.4448, "mean_token_accuracy": 0.6577897220849991, "num_tokens": 1413789562.0, "step": 8432 }, { "entropy": 1.7770712574323018, "epoch": 0.9264233336079756, "grad_norm": 0.6887810826301575, "learning_rate": 1.2505160007437309e-05, "loss": 1.4101, "mean_token_accuracy": 0.6699302395184835, "num_tokens": 1413947460.0, "step": 8433 }, { "entropy": 1.6620681683222454, "epoch": 0.9265331905193486, "grad_norm": 0.6765469908714294, "learning_rate": 1.25035814047588e-05, "loss": 1.382, "mean_token_accuracy": 0.6652428507804871, "num_tokens": 1414106635.0, "step": 8434 }, { "entropy": 1.6912939846515656, "epoch": 0.9266430474307215, "grad_norm": 0.6659488081932068, "learning_rate": 1.2502002754492614e-05, "loss": 1.3485, "mean_token_accuracy": 0.6623717993497849, "num_tokens": 1414283009.0, "step": 8435 }, { "entropy": 1.7182398637135823, "epoch": 0.9267529043420945, "grad_norm": 0.6735560894012451, "learning_rate": 1.2500424056688722e-05, "loss": 1.3758, "mean_token_accuracy": 0.6657535483439764, "num_tokens": 1414423522.0, "step": 8436 }, { "entropy": 1.6874533692995708, "epoch": 0.9268627612534673, "grad_norm": 0.5966618061065674, "learning_rate": 1.2498845311397083e-05, "loss": 1.5429, "mean_token_accuracy": 0.6355781530340513, "num_tokens": 1414625227.0, "step": 8437 }, { "entropy": 1.6916421949863434, "epoch": 0.9269726181648403, "grad_norm": 0.6825304627418518, "learning_rate": 1.2497266518667667e-05, "loss": 1.4306, "mean_token_accuracy": 0.6556826333204905, "num_tokens": 1414808666.0, "step": 8438 }, { "entropy": 1.7351886530717213, "epoch": 0.9270824750762132, "grad_norm": 0.8129728436470032, "learning_rate": 1.249568767855044e-05, "loss": 1.3367, "mean_token_accuracy": 0.6633518934249878, "num_tokens": 1414917866.0, "step": 8439 }, { "entropy": 1.6401391724745433, "epoch": 0.9271923319875862, "grad_norm": 0.7099363207817078, "learning_rate": 1.2494108791095372e-05, "loss": 1.3524, "mean_token_accuracy": 0.6650873670975367, "num_tokens": 1415083152.0, "step": 8440 }, { "entropy": 1.69183216492335, "epoch": 0.9273021888989591, "grad_norm": 0.637658953666687, "learning_rate": 1.2492529856352431e-05, "loss": 1.436, "mean_token_accuracy": 0.6460116654634476, "num_tokens": 1415302375.0, "step": 8441 }, { "entropy": 1.6242200930913289, "epoch": 0.9274120458103321, "grad_norm": 0.6604406237602234, "learning_rate": 1.2490950874371594e-05, "loss": 1.2643, "mean_token_accuracy": 0.6734424084424973, "num_tokens": 1415474824.0, "step": 8442 }, { "entropy": 1.6695034007231395, "epoch": 0.927521902721705, "grad_norm": 0.7424845695495605, "learning_rate": 1.2489371845202836e-05, "loss": 1.3691, "mean_token_accuracy": 0.6602280388275782, "num_tokens": 1415661133.0, "step": 8443 }, { "entropy": 1.7420236865679424, "epoch": 0.9276317596330779, "grad_norm": 0.8569214940071106, "learning_rate": 1.2487792768896127e-05, "loss": 1.5441, "mean_token_accuracy": 0.6413151572148005, "num_tokens": 1415805898.0, "step": 8444 }, { "entropy": 1.686678260564804, "epoch": 0.9277416165444509, "grad_norm": 0.760896623134613, "learning_rate": 1.248621364550145e-05, "loss": 1.3362, "mean_token_accuracy": 0.667745237549146, "num_tokens": 1415925173.0, "step": 8445 }, { "entropy": 1.694368968407313, "epoch": 0.9278514734558237, "grad_norm": 0.8224613666534424, "learning_rate": 1.2484634475068781e-05, "loss": 1.4879, "mean_token_accuracy": 0.6645511214931806, "num_tokens": 1416074408.0, "step": 8446 }, { "entropy": 1.6848430434862773, "epoch": 0.9279613303671967, "grad_norm": 0.6078700423240662, "learning_rate": 1.2483055257648098e-05, "loss": 1.3897, "mean_token_accuracy": 0.6586156040430069, "num_tokens": 1416257726.0, "step": 8447 }, { "entropy": 1.6660497585932414, "epoch": 0.9280711872785696, "grad_norm": 0.7581548094749451, "learning_rate": 1.2481475993289385e-05, "loss": 1.4777, "mean_token_accuracy": 0.6510738035043081, "num_tokens": 1416421922.0, "step": 8448 }, { "entropy": 1.7178461253643036, "epoch": 0.9281810441899426, "grad_norm": 0.706211507320404, "learning_rate": 1.2479896682042625e-05, "loss": 1.3219, "mean_token_accuracy": 0.6680372059345245, "num_tokens": 1416593459.0, "step": 8449 }, { "entropy": 1.684712787469228, "epoch": 0.9282909011013155, "grad_norm": 0.6404665112495422, "learning_rate": 1.24783173239578e-05, "loss": 1.3882, "mean_token_accuracy": 0.6511215766270956, "num_tokens": 1416740179.0, "step": 8450 }, { "entropy": 1.6592655877272289, "epoch": 0.9284007580126885, "grad_norm": 0.6992117762565613, "learning_rate": 1.2476737919084898e-05, "loss": 1.3854, "mean_token_accuracy": 0.653921420375506, "num_tokens": 1416916822.0, "step": 8451 }, { "entropy": 1.698452393213908, "epoch": 0.9285106149240614, "grad_norm": 0.6352612972259521, "learning_rate": 1.2475158467473911e-05, "loss": 1.4618, "mean_token_accuracy": 0.6499315698941549, "num_tokens": 1417107907.0, "step": 8452 }, { "entropy": 1.7192625999450684, "epoch": 0.9286204718354344, "grad_norm": 0.7419793605804443, "learning_rate": 1.2473578969174817e-05, "loss": 1.478, "mean_token_accuracy": 0.6512725353240967, "num_tokens": 1417291830.0, "step": 8453 }, { "entropy": 1.684527148803075, "epoch": 0.9287303287468073, "grad_norm": 0.6986659169197083, "learning_rate": 1.2471999424237615e-05, "loss": 1.3782, "mean_token_accuracy": 0.658308207988739, "num_tokens": 1417446239.0, "step": 8454 }, { "entropy": 1.6121302247047424, "epoch": 0.9288401856581803, "grad_norm": 0.7182373404502869, "learning_rate": 1.2470419832712295e-05, "loss": 1.3144, "mean_token_accuracy": 0.6786510099967321, "num_tokens": 1417581558.0, "step": 8455 }, { "entropy": 1.6011373102664948, "epoch": 0.9289500425695532, "grad_norm": 0.7618659138679504, "learning_rate": 1.246884019464885e-05, "loss": 1.2432, "mean_token_accuracy": 0.6746308306852976, "num_tokens": 1417763013.0, "step": 8456 }, { "entropy": 1.662847876548767, "epoch": 0.929059899480926, "grad_norm": 0.6191786527633667, "learning_rate": 1.2467260510097275e-05, "loss": 1.4174, "mean_token_accuracy": 0.6469977349042892, "num_tokens": 1418005665.0, "step": 8457 }, { "entropy": 1.713506688674291, "epoch": 0.929169756392299, "grad_norm": 0.7612412571907043, "learning_rate": 1.2465680779107564e-05, "loss": 1.5276, "mean_token_accuracy": 0.6622031579415003, "num_tokens": 1418151137.0, "step": 8458 }, { "entropy": 1.7254696488380432, "epoch": 0.9292796133036719, "grad_norm": 0.6934106945991516, "learning_rate": 1.246410100172972e-05, "loss": 1.3767, "mean_token_accuracy": 0.6779943505922953, "num_tokens": 1418282753.0, "step": 8459 }, { "entropy": 1.7169914940992992, "epoch": 0.9293894702150449, "grad_norm": 0.6493316292762756, "learning_rate": 1.2462521178013736e-05, "loss": 1.3592, "mean_token_accuracy": 0.6591555128494898, "num_tokens": 1418439711.0, "step": 8460 }, { "entropy": 1.6799816985925038, "epoch": 0.9294993271264178, "grad_norm": 0.8348252773284912, "learning_rate": 1.2460941308009615e-05, "loss": 1.2885, "mean_token_accuracy": 0.6671053916215897, "num_tokens": 1418565133.0, "step": 8461 }, { "entropy": 1.6939114530881245, "epoch": 0.9296091840377908, "grad_norm": 0.8337905406951904, "learning_rate": 1.2459361391767366e-05, "loss": 1.5956, "mean_token_accuracy": 0.6368262519439062, "num_tokens": 1418758270.0, "step": 8462 }, { "entropy": 1.6878935396671295, "epoch": 0.9297190409491637, "grad_norm": 0.7053253054618835, "learning_rate": 1.245778142933698e-05, "loss": 1.3431, "mean_token_accuracy": 0.6594057977199554, "num_tokens": 1418943386.0, "step": 8463 }, { "entropy": 1.7375274399916332, "epoch": 0.9298288978605367, "grad_norm": 0.7320691347122192, "learning_rate": 1.2456201420768472e-05, "loss": 1.4361, "mean_token_accuracy": 0.6541687101125717, "num_tokens": 1419098061.0, "step": 8464 }, { "entropy": 1.6313360234101613, "epoch": 0.9299387547719096, "grad_norm": 0.5476987957954407, "learning_rate": 1.2454621366111843e-05, "loss": 1.436, "mean_token_accuracy": 0.6428209195534388, "num_tokens": 1419319095.0, "step": 8465 }, { "entropy": 1.713592936595281, "epoch": 0.9300486116832826, "grad_norm": 0.7491887807846069, "learning_rate": 1.2453041265417105e-05, "loss": 1.4515, "mean_token_accuracy": 0.6389258007208506, "num_tokens": 1419463995.0, "step": 8466 }, { "entropy": 1.6619866987069447, "epoch": 0.9301584685946555, "grad_norm": 0.5975055694580078, "learning_rate": 1.2451461118734267e-05, "loss": 1.4631, "mean_token_accuracy": 0.6633708626031876, "num_tokens": 1419650890.0, "step": 8467 }, { "entropy": 1.6842903196811676, "epoch": 0.9302683255060284, "grad_norm": 0.7102713584899902, "learning_rate": 1.2449880926113339e-05, "loss": 1.552, "mean_token_accuracy": 0.6366306593020757, "num_tokens": 1419817492.0, "step": 8468 }, { "entropy": 1.6764464179674785, "epoch": 0.9303781824174013, "grad_norm": 0.648377001285553, "learning_rate": 1.2448300687604327e-05, "loss": 1.5442, "mean_token_accuracy": 0.6397795329491297, "num_tokens": 1420015035.0, "step": 8469 }, { "entropy": 1.736095021168391, "epoch": 0.9304880393287742, "grad_norm": 0.7206259965896606, "learning_rate": 1.2446720403257255e-05, "loss": 1.4128, "mean_token_accuracy": 0.6519557138284048, "num_tokens": 1420202744.0, "step": 8470 }, { "entropy": 1.671694815158844, "epoch": 0.9305978962401472, "grad_norm": 0.7011840343475342, "learning_rate": 1.2445140073122135e-05, "loss": 1.452, "mean_token_accuracy": 0.6552790006001791, "num_tokens": 1420346561.0, "step": 8471 }, { "entropy": 1.649581750233968, "epoch": 0.9307077531515201, "grad_norm": 0.7680811285972595, "learning_rate": 1.244355969724898e-05, "loss": 1.2785, "mean_token_accuracy": 0.6835348854462305, "num_tokens": 1420520670.0, "step": 8472 }, { "entropy": 1.6625533699989319, "epoch": 0.9308176100628931, "grad_norm": 0.7054336667060852, "learning_rate": 1.2441979275687813e-05, "loss": 1.3931, "mean_token_accuracy": 0.6612143218517303, "num_tokens": 1420685237.0, "step": 8473 }, { "entropy": 1.692489633957545, "epoch": 0.930927466974266, "grad_norm": 0.6488029360771179, "learning_rate": 1.2440398808488654e-05, "loss": 1.3779, "mean_token_accuracy": 0.679279754559199, "num_tokens": 1420901387.0, "step": 8474 }, { "entropy": 1.7313298384348552, "epoch": 0.931037323885639, "grad_norm": 0.7841524481773376, "learning_rate": 1.2438818295701515e-05, "loss": 1.3249, "mean_token_accuracy": 0.6554968257745107, "num_tokens": 1421099246.0, "step": 8475 }, { "entropy": 1.715612788995107, "epoch": 0.9311471807970119, "grad_norm": 0.7147516012191772, "learning_rate": 1.2437237737376431e-05, "loss": 1.3202, "mean_token_accuracy": 0.6654301732778549, "num_tokens": 1421270600.0, "step": 8476 }, { "entropy": 1.6151216328144073, "epoch": 0.9312570377083849, "grad_norm": 0.6839093565940857, "learning_rate": 1.2435657133563419e-05, "loss": 1.3194, "mean_token_accuracy": 0.6828833172718684, "num_tokens": 1421443338.0, "step": 8477 }, { "entropy": 1.681669036547343, "epoch": 0.9313668946197577, "grad_norm": 0.6480600237846375, "learning_rate": 1.2434076484312507e-05, "loss": 1.3436, "mean_token_accuracy": 0.6785709311564764, "num_tokens": 1421703636.0, "step": 8478 }, { "entropy": 1.7277530829111736, "epoch": 0.9314767515311307, "grad_norm": 0.7484097480773926, "learning_rate": 1.2432495789673717e-05, "loss": 1.3296, "mean_token_accuracy": 0.669383779168129, "num_tokens": 1421860755.0, "step": 8479 }, { "entropy": 1.694298009077708, "epoch": 0.9315866084425036, "grad_norm": 0.6591439247131348, "learning_rate": 1.2430915049697086e-05, "loss": 1.3729, "mean_token_accuracy": 0.6702051361401876, "num_tokens": 1422006872.0, "step": 8480 }, { "entropy": 1.654652992884318, "epoch": 0.9316964653538766, "grad_norm": 0.6514762043952942, "learning_rate": 1.2429334264432632e-05, "loss": 1.2161, "mean_token_accuracy": 0.6821036289135615, "num_tokens": 1422135857.0, "step": 8481 }, { "entropy": 1.7302643954753876, "epoch": 0.9318063222652495, "grad_norm": 0.6303740739822388, "learning_rate": 1.2427753433930398e-05, "loss": 1.3769, "mean_token_accuracy": 0.6573646614948908, "num_tokens": 1422283990.0, "step": 8482 }, { "entropy": 1.7143929402033489, "epoch": 0.9319161791766224, "grad_norm": 0.7011592984199524, "learning_rate": 1.2426172558240408e-05, "loss": 1.6449, "mean_token_accuracy": 0.626821535329024, "num_tokens": 1422527659.0, "step": 8483 }, { "entropy": 1.6688818732897441, "epoch": 0.9320260360879954, "grad_norm": 0.707831621170044, "learning_rate": 1.24245916374127e-05, "loss": 1.4702, "mean_token_accuracy": 0.6559558510780334, "num_tokens": 1422709784.0, "step": 8484 }, { "entropy": 1.7655527591705322, "epoch": 0.9321358929993683, "grad_norm": 0.7450686693191528, "learning_rate": 1.2423010671497309e-05, "loss": 1.363, "mean_token_accuracy": 0.6644783665736517, "num_tokens": 1422827575.0, "step": 8485 }, { "entropy": 1.7146152754624684, "epoch": 0.9322457499107413, "grad_norm": 0.7006492614746094, "learning_rate": 1.2421429660544274e-05, "loss": 1.2541, "mean_token_accuracy": 0.6772527098655701, "num_tokens": 1422967424.0, "step": 8486 }, { "entropy": 1.6500997145970662, "epoch": 0.9323556068221142, "grad_norm": 152.7211151123047, "learning_rate": 1.2419848604603624e-05, "loss": 1.3616, "mean_token_accuracy": 0.6649878074725469, "num_tokens": 1423124348.0, "step": 8487 }, { "entropy": 1.6900402406851451, "epoch": 0.9324654637334872, "grad_norm": 0.6380817294120789, "learning_rate": 1.2418267503725409e-05, "loss": 1.5177, "mean_token_accuracy": 0.6421713878711065, "num_tokens": 1423284404.0, "step": 8488 }, { "entropy": 1.6533196369806926, "epoch": 0.93257532064486, "grad_norm": 0.9251922965049744, "learning_rate": 1.2416686357959668e-05, "loss": 1.1989, "mean_token_accuracy": 0.6831783403952917, "num_tokens": 1423413353.0, "step": 8489 }, { "entropy": 1.6796255509058635, "epoch": 0.932685177556233, "grad_norm": 0.6227523684501648, "learning_rate": 1.2415105167356442e-05, "loss": 1.4729, "mean_token_accuracy": 0.6428764114777247, "num_tokens": 1423591192.0, "step": 8490 }, { "entropy": 1.7086295584837596, "epoch": 0.9327950344676059, "grad_norm": 0.6465691924095154, "learning_rate": 1.2413523931965775e-05, "loss": 1.4636, "mean_token_accuracy": 0.6566512435674667, "num_tokens": 1423756354.0, "step": 8491 }, { "entropy": 1.7025008797645569, "epoch": 0.9329048913789789, "grad_norm": 0.6250014901161194, "learning_rate": 1.2411942651837712e-05, "loss": 1.3253, "mean_token_accuracy": 0.6653555085261663, "num_tokens": 1423920837.0, "step": 8492 }, { "entropy": 1.7105981409549713, "epoch": 0.9330147482903518, "grad_norm": 0.7055547833442688, "learning_rate": 1.24103613270223e-05, "loss": 1.4082, "mean_token_accuracy": 0.6560359050830206, "num_tokens": 1424106336.0, "step": 8493 }, { "entropy": 1.7495214740435283, "epoch": 0.9331246052017248, "grad_norm": 0.7928077578544617, "learning_rate": 1.2408779957569586e-05, "loss": 1.3455, "mean_token_accuracy": 0.6681791841983795, "num_tokens": 1424236467.0, "step": 8494 }, { "entropy": 1.6718592544396718, "epoch": 0.9332344621130977, "grad_norm": 1.34169602394104, "learning_rate": 1.2407198543529624e-05, "loss": 1.4863, "mean_token_accuracy": 0.6318272079030672, "num_tokens": 1424400906.0, "step": 8495 }, { "entropy": 1.8002726435661316, "epoch": 0.9333443190244707, "grad_norm": 0.8875370025634766, "learning_rate": 1.2405617084952461e-05, "loss": 1.4144, "mean_token_accuracy": 0.6550916383663813, "num_tokens": 1424538191.0, "step": 8496 }, { "entropy": 1.6871886054674785, "epoch": 0.9334541759358436, "grad_norm": 0.6395008563995361, "learning_rate": 1.2404035581888149e-05, "loss": 1.4748, "mean_token_accuracy": 0.6380327840646108, "num_tokens": 1424765006.0, "step": 8497 }, { "entropy": 1.750147004922231, "epoch": 0.9335640328472165, "grad_norm": 0.7137428522109985, "learning_rate": 1.2402454034386747e-05, "loss": 1.485, "mean_token_accuracy": 0.6527098168929418, "num_tokens": 1424918563.0, "step": 8498 }, { "entropy": 1.7322723865509033, "epoch": 0.9336738897585894, "grad_norm": 0.7444048523902893, "learning_rate": 1.2400872442498306e-05, "loss": 1.4352, "mean_token_accuracy": 0.6530717114607493, "num_tokens": 1425078392.0, "step": 8499 }, { "entropy": 1.7369141379992168, "epoch": 0.9337837466699623, "grad_norm": 0.6727978587150574, "learning_rate": 1.239929080627288e-05, "loss": 1.4268, "mean_token_accuracy": 0.654167448480924, "num_tokens": 1425227908.0, "step": 8500 }, { "entropy": 1.7379600306351979, "epoch": 0.9338936035813353, "grad_norm": 0.8184369802474976, "learning_rate": 1.2397709125760533e-05, "loss": 1.3786, "mean_token_accuracy": 0.6590213775634766, "num_tokens": 1425352745.0, "step": 8501 }, { "entropy": 1.674992948770523, "epoch": 0.9340034604927082, "grad_norm": 0.6965845823287964, "learning_rate": 1.2396127401011324e-05, "loss": 1.2521, "mean_token_accuracy": 0.6741303652524948, "num_tokens": 1425458702.0, "step": 8502 }, { "entropy": 1.7390521963437398, "epoch": 0.9341133174040812, "grad_norm": 0.6826538443565369, "learning_rate": 1.2394545632075305e-05, "loss": 1.2356, "mean_token_accuracy": 0.6743427018324534, "num_tokens": 1425560277.0, "step": 8503 }, { "entropy": 1.7002219259738922, "epoch": 0.9342231743154541, "grad_norm": 0.5916033387184143, "learning_rate": 1.2392963819002555e-05, "loss": 1.3097, "mean_token_accuracy": 0.6765924940506617, "num_tokens": 1425736774.0, "step": 8504 }, { "entropy": 1.7539168000221252, "epoch": 0.9343330312268271, "grad_norm": 0.7708821296691895, "learning_rate": 1.2391381961843121e-05, "loss": 1.2834, "mean_token_accuracy": 0.6643802175919215, "num_tokens": 1425859969.0, "step": 8505 }, { "entropy": 1.7002765933672588, "epoch": 0.9344428881382, "grad_norm": 0.8281891345977783, "learning_rate": 1.2389800060647077e-05, "loss": 1.3764, "mean_token_accuracy": 0.6619481245676676, "num_tokens": 1425999977.0, "step": 8506 }, { "entropy": 1.7185891668001811, "epoch": 0.934552745049573, "grad_norm": 0.8112635612487793, "learning_rate": 1.2388218115464486e-05, "loss": 1.4361, "mean_token_accuracy": 0.6457992345094681, "num_tokens": 1426174270.0, "step": 8507 }, { "entropy": 1.7229107817014058, "epoch": 0.9346626019609459, "grad_norm": 0.6610147953033447, "learning_rate": 1.238663612634542e-05, "loss": 1.415, "mean_token_accuracy": 0.641492078701655, "num_tokens": 1426351638.0, "step": 8508 }, { "entropy": 1.7340005536874135, "epoch": 0.9347724588723189, "grad_norm": 0.729792058467865, "learning_rate": 1.2385054093339941e-05, "loss": 1.2905, "mean_token_accuracy": 0.6576346158981323, "num_tokens": 1426491965.0, "step": 8509 }, { "entropy": 1.703254113594691, "epoch": 0.9348823157836917, "grad_norm": 0.7107172012329102, "learning_rate": 1.2383472016498128e-05, "loss": 1.423, "mean_token_accuracy": 0.6514757623275121, "num_tokens": 1426659259.0, "step": 8510 }, { "entropy": 1.652273913224538, "epoch": 0.9349921726950646, "grad_norm": 0.6343944072723389, "learning_rate": 1.2381889895870047e-05, "loss": 1.4148, "mean_token_accuracy": 0.6528402169545492, "num_tokens": 1426855817.0, "step": 8511 }, { "entropy": 1.6305622259775798, "epoch": 0.9351020296064376, "grad_norm": 0.7010462880134583, "learning_rate": 1.2380307731505774e-05, "loss": 1.415, "mean_token_accuracy": 0.6715733309586843, "num_tokens": 1427018351.0, "step": 8512 }, { "entropy": 1.698022296031316, "epoch": 0.9352118865178105, "grad_norm": 0.7755676507949829, "learning_rate": 1.2378725523455385e-05, "loss": 1.4059, "mean_token_accuracy": 0.6574622690677643, "num_tokens": 1427181253.0, "step": 8513 }, { "entropy": 1.6600361963113148, "epoch": 0.9353217434291835, "grad_norm": 0.6646712422370911, "learning_rate": 1.2377143271768952e-05, "loss": 1.2889, "mean_token_accuracy": 0.6721018751462301, "num_tokens": 1427336298.0, "step": 8514 }, { "entropy": 1.7082114418347676, "epoch": 0.9354316003405564, "grad_norm": 0.681930422782898, "learning_rate": 1.2375560976496552e-05, "loss": 1.376, "mean_token_accuracy": 0.653716524442037, "num_tokens": 1427502299.0, "step": 8515 }, { "entropy": 1.7119992474714916, "epoch": 0.9355414572519294, "grad_norm": 0.6305302977561951, "learning_rate": 1.2373978637688273e-05, "loss": 1.3506, "mean_token_accuracy": 0.6574372202157974, "num_tokens": 1427667365.0, "step": 8516 }, { "entropy": 1.6821909447511036, "epoch": 0.9356513141633023, "grad_norm": 0.6896127462387085, "learning_rate": 1.2372396255394187e-05, "loss": 1.4327, "mean_token_accuracy": 0.6596807638804117, "num_tokens": 1427798152.0, "step": 8517 }, { "entropy": 1.6889863014221191, "epoch": 0.9357611710746753, "grad_norm": 0.6460300087928772, "learning_rate": 1.2370813829664378e-05, "loss": 1.3724, "mean_token_accuracy": 0.6577446510394415, "num_tokens": 1427962124.0, "step": 8518 }, { "entropy": 1.7353461384773254, "epoch": 0.9358710279860482, "grad_norm": 0.7331773638725281, "learning_rate": 1.236923136054893e-05, "loss": 1.3649, "mean_token_accuracy": 0.6644917080799738, "num_tokens": 1428118360.0, "step": 8519 }, { "entropy": 1.698060820500056, "epoch": 0.9359808848974212, "grad_norm": 0.6143467426300049, "learning_rate": 1.2367648848097926e-05, "loss": 1.3148, "mean_token_accuracy": 0.6635189453760783, "num_tokens": 1428275558.0, "step": 8520 }, { "entropy": 1.7490674356619518, "epoch": 0.936090741808794, "grad_norm": 0.7750940918922424, "learning_rate": 1.2366066292361452e-05, "loss": 1.6152, "mean_token_accuracy": 0.6309548219045004, "num_tokens": 1428429123.0, "step": 8521 }, { "entropy": 1.7576595544815063, "epoch": 0.936200598720167, "grad_norm": 0.7605588436126709, "learning_rate": 1.2364483693389595e-05, "loss": 1.3305, "mean_token_accuracy": 0.660569633046786, "num_tokens": 1428583904.0, "step": 8522 }, { "entropy": 1.6813994944095612, "epoch": 0.9363104556315399, "grad_norm": 0.6593738794326782, "learning_rate": 1.2362901051232443e-05, "loss": 1.424, "mean_token_accuracy": 0.6580062558253607, "num_tokens": 1428758635.0, "step": 8523 }, { "entropy": 1.6494923929373424, "epoch": 0.9364203125429128, "grad_norm": 0.6516050696372986, "learning_rate": 1.236131836594009e-05, "loss": 1.2905, "mean_token_accuracy": 0.6730307787656784, "num_tokens": 1428897399.0, "step": 8524 }, { "entropy": 1.626155565182368, "epoch": 0.9365301694542858, "grad_norm": 0.7014347314834595, "learning_rate": 1.235973563756262e-05, "loss": 1.2858, "mean_token_accuracy": 0.6724262833595276, "num_tokens": 1429039444.0, "step": 8525 }, { "entropy": 1.709865580002467, "epoch": 0.9366400263656587, "grad_norm": 0.61009281873703, "learning_rate": 1.2358152866150132e-05, "loss": 1.3338, "mean_token_accuracy": 0.6688801348209381, "num_tokens": 1429165058.0, "step": 8526 }, { "entropy": 1.6634798149267833, "epoch": 0.9367498832770317, "grad_norm": 0.7199569344520569, "learning_rate": 1.235657005175272e-05, "loss": 1.4218, "mean_token_accuracy": 0.6555696477492651, "num_tokens": 1429330762.0, "step": 8527 }, { "entropy": 1.7542400260766347, "epoch": 0.9368597401884046, "grad_norm": 0.6616681814193726, "learning_rate": 1.235498719442047e-05, "loss": 1.5701, "mean_token_accuracy": 0.6374204456806183, "num_tokens": 1429540856.0, "step": 8528 }, { "entropy": 1.7321417133013408, "epoch": 0.9369695970997776, "grad_norm": 0.9613889455795288, "learning_rate": 1.2353404294203493e-05, "loss": 1.4868, "mean_token_accuracy": 0.6607649475336075, "num_tokens": 1429656849.0, "step": 8529 }, { "entropy": 1.694995254278183, "epoch": 0.9370794540111504, "grad_norm": 0.6711973547935486, "learning_rate": 1.2351821351151877e-05, "loss": 1.4466, "mean_token_accuracy": 0.6372744043668112, "num_tokens": 1429878375.0, "step": 8530 }, { "entropy": 1.605004479487737, "epoch": 0.9371893109225234, "grad_norm": 0.7244718074798584, "learning_rate": 1.2350238365315725e-05, "loss": 1.1491, "mean_token_accuracy": 0.6727927128473917, "num_tokens": 1430059211.0, "step": 8531 }, { "entropy": 1.6997300287087758, "epoch": 0.9372991678338963, "grad_norm": 0.669244110584259, "learning_rate": 1.2348655336745139e-05, "loss": 1.597, "mean_token_accuracy": 0.6324973752101263, "num_tokens": 1430300402.0, "step": 8532 }, { "entropy": 1.7309175928433735, "epoch": 0.9374090247452693, "grad_norm": 0.6466419696807861, "learning_rate": 1.2347072265490217e-05, "loss": 1.3845, "mean_token_accuracy": 0.6553378701210022, "num_tokens": 1430441031.0, "step": 8533 }, { "entropy": 1.71918390194575, "epoch": 0.9375188816566422, "grad_norm": 0.7030944228172302, "learning_rate": 1.2345489151601065e-05, "loss": 1.5114, "mean_token_accuracy": 0.6413251161575317, "num_tokens": 1430640995.0, "step": 8534 }, { "entropy": 1.735251506169637, "epoch": 0.9376287385680152, "grad_norm": 0.619408369064331, "learning_rate": 1.2343905995127787e-05, "loss": 1.4836, "mean_token_accuracy": 0.6480836818615595, "num_tokens": 1430801643.0, "step": 8535 }, { "entropy": 1.7108490367730458, "epoch": 0.9377385954793881, "grad_norm": 0.6266258358955383, "learning_rate": 1.2342322796120494e-05, "loss": 1.4076, "mean_token_accuracy": 0.6552125016848246, "num_tokens": 1430964465.0, "step": 8536 }, { "entropy": 1.7700796524683635, "epoch": 0.9378484523907611, "grad_norm": 0.6809885501861572, "learning_rate": 1.2340739554629285e-05, "loss": 1.4017, "mean_token_accuracy": 0.647578035791715, "num_tokens": 1431106838.0, "step": 8537 }, { "entropy": 1.722182273864746, "epoch": 0.937958309302134, "grad_norm": 0.6057953834533691, "learning_rate": 1.2339156270704273e-05, "loss": 1.3765, "mean_token_accuracy": 0.6555658529202143, "num_tokens": 1431241799.0, "step": 8538 }, { "entropy": 1.7782465716203053, "epoch": 0.9380681662135069, "grad_norm": 0.7593392729759216, "learning_rate": 1.233757294439557e-05, "loss": 1.5713, "mean_token_accuracy": 0.6409921248753866, "num_tokens": 1431375532.0, "step": 8539 }, { "entropy": 1.6467249592145283, "epoch": 0.9381780231248799, "grad_norm": 0.6114002466201782, "learning_rate": 1.2335989575753287e-05, "loss": 1.3017, "mean_token_accuracy": 0.666813870271047, "num_tokens": 1431556355.0, "step": 8540 }, { "entropy": 1.7343460321426392, "epoch": 0.9382878800362527, "grad_norm": 0.673979640007019, "learning_rate": 1.2334406164827532e-05, "loss": 1.2794, "mean_token_accuracy": 0.6679557810227076, "num_tokens": 1431733091.0, "step": 8541 }, { "entropy": 1.6641011436780293, "epoch": 0.9383977369476257, "grad_norm": 0.7670729160308838, "learning_rate": 1.2332822711668429e-05, "loss": 1.3856, "mean_token_accuracy": 0.6573190341393153, "num_tokens": 1431889217.0, "step": 8542 }, { "entropy": 1.7148659825325012, "epoch": 0.9385075938589986, "grad_norm": 0.7064855098724365, "learning_rate": 1.233123921632608e-05, "loss": 1.3523, "mean_token_accuracy": 0.665443574388822, "num_tokens": 1432021404.0, "step": 8543 }, { "entropy": 1.6672392785549164, "epoch": 0.9386174507703716, "grad_norm": 0.6361745595932007, "learning_rate": 1.2329655678850619e-05, "loss": 1.412, "mean_token_accuracy": 0.6467155714829763, "num_tokens": 1432204022.0, "step": 8544 }, { "entropy": 1.601523111263911, "epoch": 0.9387273076817445, "grad_norm": 0.5796836018562317, "learning_rate": 1.2328072099292148e-05, "loss": 1.3599, "mean_token_accuracy": 0.6564453194538752, "num_tokens": 1432403878.0, "step": 8545 }, { "entropy": 1.699288825194041, "epoch": 0.9388371645931175, "grad_norm": 0.7255278825759888, "learning_rate": 1.2326488477700795e-05, "loss": 1.5015, "mean_token_accuracy": 0.6359589795271555, "num_tokens": 1432602079.0, "step": 8546 }, { "entropy": 1.6769676804542542, "epoch": 0.9389470215044904, "grad_norm": 0.7669406533241272, "learning_rate": 1.2324904814126682e-05, "loss": 1.4365, "mean_token_accuracy": 0.645541230837504, "num_tokens": 1432795695.0, "step": 8547 }, { "entropy": 1.7266658147176106, "epoch": 0.9390568784158634, "grad_norm": 0.6003899574279785, "learning_rate": 1.2323321108619927e-05, "loss": 1.5048, "mean_token_accuracy": 0.6312548617521921, "num_tokens": 1433015547.0, "step": 8548 }, { "entropy": 1.694272110859553, "epoch": 0.9391667353272363, "grad_norm": 0.597334623336792, "learning_rate": 1.2321737361230657e-05, "loss": 1.4965, "mean_token_accuracy": 0.6420956204334894, "num_tokens": 1433216205.0, "step": 8549 }, { "entropy": 1.750846117734909, "epoch": 0.9392765922386093, "grad_norm": 0.6369536519050598, "learning_rate": 1.232015357200899e-05, "loss": 1.4068, "mean_token_accuracy": 0.6545988370974859, "num_tokens": 1433365930.0, "step": 8550 }, { "entropy": 1.7660410205523174, "epoch": 0.9393864491499822, "grad_norm": 0.7357500195503235, "learning_rate": 1.231856974100506e-05, "loss": 1.2884, "mean_token_accuracy": 0.6664379785458246, "num_tokens": 1433474906.0, "step": 8551 }, { "entropy": 1.717143605152766, "epoch": 0.939496306061355, "grad_norm": 0.7086150050163269, "learning_rate": 1.2316985868268996e-05, "loss": 1.2126, "mean_token_accuracy": 0.6740925163030624, "num_tokens": 1433578842.0, "step": 8552 }, { "entropy": 1.689423680305481, "epoch": 0.939606162972728, "grad_norm": 0.7993782758712769, "learning_rate": 1.2315401953850915e-05, "loss": 1.3022, "mean_token_accuracy": 0.6719008336464564, "num_tokens": 1433704238.0, "step": 8553 }, { "entropy": 1.7148550947507222, "epoch": 0.9397160198841009, "grad_norm": 0.6006395220756531, "learning_rate": 1.2313817997800963e-05, "loss": 1.3841, "mean_token_accuracy": 0.6570403923590978, "num_tokens": 1433895168.0, "step": 8554 }, { "entropy": 1.7745637098948162, "epoch": 0.9398258767954739, "grad_norm": 0.6105915904045105, "learning_rate": 1.231223400016926e-05, "loss": 1.3853, "mean_token_accuracy": 0.6493383248647054, "num_tokens": 1434091545.0, "step": 8555 }, { "entropy": 1.6534299850463867, "epoch": 0.9399357337068468, "grad_norm": 0.630155622959137, "learning_rate": 1.2310649961005937e-05, "loss": 1.3915, "mean_token_accuracy": 0.6596393237511317, "num_tokens": 1434295654.0, "step": 8556 }, { "entropy": 1.6820517877737682, "epoch": 0.9400455906182198, "grad_norm": 0.6753036975860596, "learning_rate": 1.2309065880361139e-05, "loss": 1.3637, "mean_token_accuracy": 0.6623709599177042, "num_tokens": 1434465834.0, "step": 8557 }, { "entropy": 1.7782000700632732, "epoch": 0.9401554475295927, "grad_norm": 0.7968646883964539, "learning_rate": 1.2307481758284996e-05, "loss": 1.3908, "mean_token_accuracy": 0.6523198982079824, "num_tokens": 1434654878.0, "step": 8558 }, { "entropy": 1.7569174667199452, "epoch": 0.9402653044409657, "grad_norm": 0.8099093437194824, "learning_rate": 1.2305897594827642e-05, "loss": 1.4227, "mean_token_accuracy": 0.654918392499288, "num_tokens": 1434786625.0, "step": 8559 }, { "entropy": 1.6676458517710369, "epoch": 0.9403751613523386, "grad_norm": 0.7002986669540405, "learning_rate": 1.230431339003922e-05, "loss": 1.3799, "mean_token_accuracy": 0.6608104457457861, "num_tokens": 1434961024.0, "step": 8560 }, { "entropy": 1.7227271993954976, "epoch": 0.9404850182637116, "grad_norm": 0.6729583144187927, "learning_rate": 1.2302729143969864e-05, "loss": 1.4044, "mean_token_accuracy": 0.6510558873414993, "num_tokens": 1435139239.0, "step": 8561 }, { "entropy": 1.7713795006275177, "epoch": 0.9405948751750844, "grad_norm": 0.6517491340637207, "learning_rate": 1.230114485666972e-05, "loss": 1.3088, "mean_token_accuracy": 0.6703696896632513, "num_tokens": 1435306595.0, "step": 8562 }, { "entropy": 1.7000613113244374, "epoch": 0.9407047320864574, "grad_norm": 0.7172781229019165, "learning_rate": 1.2299560528188928e-05, "loss": 1.4559, "mean_token_accuracy": 0.6527749449014664, "num_tokens": 1435512818.0, "step": 8563 }, { "entropy": 1.6688750584920247, "epoch": 0.9408145889978303, "grad_norm": 0.6413210034370422, "learning_rate": 1.2297976158577632e-05, "loss": 1.4387, "mean_token_accuracy": 0.655961866180102, "num_tokens": 1435687791.0, "step": 8564 }, { "entropy": 1.7471512258052826, "epoch": 0.9409244459092032, "grad_norm": 0.7935196161270142, "learning_rate": 1.2296391747885969e-05, "loss": 1.4428, "mean_token_accuracy": 0.652156800031662, "num_tokens": 1435831770.0, "step": 8565 }, { "entropy": 1.645731806755066, "epoch": 0.9410343028205762, "grad_norm": 0.6245310306549072, "learning_rate": 1.22948072961641e-05, "loss": 1.3675, "mean_token_accuracy": 0.6590336362520853, "num_tokens": 1436053698.0, "step": 8566 }, { "entropy": 1.7025697429974873, "epoch": 0.9411441597319491, "grad_norm": 0.6795003414154053, "learning_rate": 1.2293222803462157e-05, "loss": 1.3575, "mean_token_accuracy": 0.6611177573601404, "num_tokens": 1436232464.0, "step": 8567 }, { "entropy": 1.6784963309764862, "epoch": 0.9412540166433221, "grad_norm": 0.6474406719207764, "learning_rate": 1.2291638269830296e-05, "loss": 1.5794, "mean_token_accuracy": 0.6418151060740153, "num_tokens": 1436431748.0, "step": 8568 }, { "entropy": 1.6753207445144653, "epoch": 0.941363873554695, "grad_norm": 0.7023776769638062, "learning_rate": 1.2290053695318666e-05, "loss": 1.2703, "mean_token_accuracy": 0.6644528806209564, "num_tokens": 1436594419.0, "step": 8569 }, { "entropy": 1.693762997786204, "epoch": 0.941473730466068, "grad_norm": 0.7446289658546448, "learning_rate": 1.2288469079977423e-05, "loss": 1.4171, "mean_token_accuracy": 0.6514114439487457, "num_tokens": 1436802609.0, "step": 8570 }, { "entropy": 1.7322840690612793, "epoch": 0.9415835873774409, "grad_norm": 0.7329487204551697, "learning_rate": 1.2286884423856707e-05, "loss": 1.3476, "mean_token_accuracy": 0.6640298316876093, "num_tokens": 1436947973.0, "step": 8571 }, { "entropy": 1.7172012130419414, "epoch": 0.9416934442888139, "grad_norm": 0.7047858238220215, "learning_rate": 1.2285299727006681e-05, "loss": 1.2538, "mean_token_accuracy": 0.6799323062102, "num_tokens": 1437075212.0, "step": 8572 }, { "entropy": 1.7134579122066498, "epoch": 0.9418033012001867, "grad_norm": 0.7685977816581726, "learning_rate": 1.22837149894775e-05, "loss": 1.3434, "mean_token_accuracy": 0.6664036015669504, "num_tokens": 1437204294.0, "step": 8573 }, { "entropy": 1.6684084435304005, "epoch": 0.9419131581115597, "grad_norm": 0.6283167004585266, "learning_rate": 1.2282130211319317e-05, "loss": 1.4298, "mean_token_accuracy": 0.6495349705219269, "num_tokens": 1437409005.0, "step": 8574 }, { "entropy": 1.7082226773103077, "epoch": 0.9420230150229326, "grad_norm": 0.710364043712616, "learning_rate": 1.228054539258229e-05, "loss": 1.3381, "mean_token_accuracy": 0.6665283391873041, "num_tokens": 1437559170.0, "step": 8575 }, { "entropy": 1.6750965019067128, "epoch": 0.9421328719343056, "grad_norm": 0.7653444409370422, "learning_rate": 1.227896053331658e-05, "loss": 1.2684, "mean_token_accuracy": 0.6685964713493983, "num_tokens": 1437700695.0, "step": 8576 }, { "entropy": 1.740403562784195, "epoch": 0.9422427288456785, "grad_norm": 0.6980507373809814, "learning_rate": 1.2277375633572342e-05, "loss": 1.4079, "mean_token_accuracy": 0.6575213720401129, "num_tokens": 1437824155.0, "step": 8577 }, { "entropy": 1.697914143403371, "epoch": 0.9423525857570514, "grad_norm": 1.6362974643707275, "learning_rate": 1.2275790693399742e-05, "loss": 1.1908, "mean_token_accuracy": 0.6685192883014679, "num_tokens": 1438043105.0, "step": 8578 }, { "entropy": 1.7175638178984325, "epoch": 0.9424624426684244, "grad_norm": 0.7739365696907043, "learning_rate": 1.2274205712848946e-05, "loss": 1.2582, "mean_token_accuracy": 0.6746840725342432, "num_tokens": 1438171351.0, "step": 8579 }, { "entropy": 1.7105300724506378, "epoch": 0.9425722995797973, "grad_norm": 0.6245592832565308, "learning_rate": 1.227262069197011e-05, "loss": 1.4033, "mean_token_accuracy": 0.6511557598908743, "num_tokens": 1438333063.0, "step": 8580 }, { "entropy": 1.7435839176177979, "epoch": 0.9426821564911703, "grad_norm": 1.099191665649414, "learning_rate": 1.2271035630813399e-05, "loss": 1.6261, "mean_token_accuracy": 0.6620939721663793, "num_tokens": 1438480683.0, "step": 8581 }, { "entropy": 1.6721567908922832, "epoch": 0.9427920134025431, "grad_norm": 0.7055137753486633, "learning_rate": 1.2269450529428987e-05, "loss": 1.3201, "mean_token_accuracy": 0.6689964085817337, "num_tokens": 1438677861.0, "step": 8582 }, { "entropy": 1.7083663443724315, "epoch": 0.9429018703139161, "grad_norm": 0.5708084106445312, "learning_rate": 1.2267865387867038e-05, "loss": 1.3933, "mean_token_accuracy": 0.656741683681806, "num_tokens": 1438877416.0, "step": 8583 }, { "entropy": 1.706307937701543, "epoch": 0.943011727225289, "grad_norm": 0.7216012477874756, "learning_rate": 1.2266280206177718e-05, "loss": 1.4801, "mean_token_accuracy": 0.6545088092486063, "num_tokens": 1439042236.0, "step": 8584 }, { "entropy": 1.70126873254776, "epoch": 0.943121584136662, "grad_norm": 0.7581015229225159, "learning_rate": 1.2264694984411203e-05, "loss": 1.5482, "mean_token_accuracy": 0.6490120142698288, "num_tokens": 1439220469.0, "step": 8585 }, { "entropy": 1.651670257250468, "epoch": 0.9432314410480349, "grad_norm": 0.6032636761665344, "learning_rate": 1.226310972261766e-05, "loss": 1.5364, "mean_token_accuracy": 0.6319657365481058, "num_tokens": 1439465630.0, "step": 8586 }, { "entropy": 1.6893612047036488, "epoch": 0.9433412979594079, "grad_norm": 0.5619993805885315, "learning_rate": 1.2261524420847265e-05, "loss": 1.0479, "mean_token_accuracy": 0.6818432062864304, "num_tokens": 1439603786.0, "step": 8587 }, { "entropy": 1.7372606893380482, "epoch": 0.9434511548707808, "grad_norm": 0.8364537954330444, "learning_rate": 1.225993907915019e-05, "loss": 1.433, "mean_token_accuracy": 0.6543504744768143, "num_tokens": 1439802054.0, "step": 8588 }, { "entropy": 1.6576914886633556, "epoch": 0.9435610117821538, "grad_norm": 0.6688291430473328, "learning_rate": 1.225835369757661e-05, "loss": 1.3298, "mean_token_accuracy": 0.6816399743159612, "num_tokens": 1439958813.0, "step": 8589 }, { "entropy": 1.6907293200492859, "epoch": 0.9436708686935267, "grad_norm": 0.657168984413147, "learning_rate": 1.2256768276176702e-05, "loss": 1.3813, "mean_token_accuracy": 0.6570898244778315, "num_tokens": 1440161016.0, "step": 8590 }, { "entropy": 1.6189829607804616, "epoch": 0.9437807256048997, "grad_norm": 0.7953099012374878, "learning_rate": 1.2255182815000646e-05, "loss": 1.4897, "mean_token_accuracy": 0.6546467443307241, "num_tokens": 1440331282.0, "step": 8591 }, { "entropy": 1.751821796099345, "epoch": 0.9438905825162726, "grad_norm": 0.8301904797554016, "learning_rate": 1.225359731409862e-05, "loss": 1.5474, "mean_token_accuracy": 0.6519018063942591, "num_tokens": 1440558471.0, "step": 8592 }, { "entropy": 1.741612325112025, "epoch": 0.9440004394276454, "grad_norm": 0.7195350527763367, "learning_rate": 1.22520117735208e-05, "loss": 1.4176, "mean_token_accuracy": 0.6544721672932307, "num_tokens": 1440739587.0, "step": 8593 }, { "entropy": 1.6872974336147308, "epoch": 0.9441102963390184, "grad_norm": 0.6328041553497314, "learning_rate": 1.2250426193317376e-05, "loss": 1.5012, "mean_token_accuracy": 0.645188053448995, "num_tokens": 1440907130.0, "step": 8594 }, { "entropy": 1.7358746826648712, "epoch": 0.9442201532503913, "grad_norm": 0.6817638278007507, "learning_rate": 1.2248840573538522e-05, "loss": 1.43, "mean_token_accuracy": 0.6595302472511927, "num_tokens": 1441080715.0, "step": 8595 }, { "entropy": 1.6565217673778534, "epoch": 0.9443300101617643, "grad_norm": 0.6565890312194824, "learning_rate": 1.224725491423443e-05, "loss": 1.5008, "mean_token_accuracy": 0.6534999509652456, "num_tokens": 1441287677.0, "step": 8596 }, { "entropy": 1.7144455512364705, "epoch": 0.9444398670731372, "grad_norm": 0.6671959757804871, "learning_rate": 1.224566921545528e-05, "loss": 1.4134, "mean_token_accuracy": 0.6520157555739085, "num_tokens": 1441433876.0, "step": 8597 }, { "entropy": 1.690605749686559, "epoch": 0.9445497239845102, "grad_norm": 0.723557710647583, "learning_rate": 1.2244083477251265e-05, "loss": 1.5496, "mean_token_accuracy": 0.6480442037185034, "num_tokens": 1441611098.0, "step": 8598 }, { "entropy": 1.7220774292945862, "epoch": 0.9446595808958831, "grad_norm": 0.6669568419456482, "learning_rate": 1.2242497699672562e-05, "loss": 1.547, "mean_token_accuracy": 0.6334994534651438, "num_tokens": 1441771369.0, "step": 8599 }, { "entropy": 1.7486573259035747, "epoch": 0.9447694378072561, "grad_norm": 0.6781217455863953, "learning_rate": 1.2240911882769372e-05, "loss": 1.4171, "mean_token_accuracy": 0.645118405421575, "num_tokens": 1441916756.0, "step": 8600 }, { "entropy": 1.6590690712134044, "epoch": 0.944879294718629, "grad_norm": 0.6555703282356262, "learning_rate": 1.2239326026591877e-05, "loss": 1.2832, "mean_token_accuracy": 0.6796758274237314, "num_tokens": 1442041756.0, "step": 8601 }, { "entropy": 1.7096993426481883, "epoch": 0.944989151630002, "grad_norm": 0.690627932548523, "learning_rate": 1.2237740131190275e-05, "loss": 1.3739, "mean_token_accuracy": 0.66547991335392, "num_tokens": 1442198769.0, "step": 8602 }, { "entropy": 1.728843520085017, "epoch": 0.9450990085413749, "grad_norm": 0.7249277830123901, "learning_rate": 1.2236154196614754e-05, "loss": 1.5124, "mean_token_accuracy": 0.6484755227963129, "num_tokens": 1442371906.0, "step": 8603 }, { "entropy": 1.7191427449385326, "epoch": 0.9452088654527478, "grad_norm": 0.6279011368751526, "learning_rate": 1.2234568222915511e-05, "loss": 1.5187, "mean_token_accuracy": 0.6500843664010366, "num_tokens": 1442549837.0, "step": 8604 }, { "entropy": 1.7306394279003143, "epoch": 0.9453187223641207, "grad_norm": 0.7033337950706482, "learning_rate": 1.2232982210142734e-05, "loss": 1.4231, "mean_token_accuracy": 0.649728591243426, "num_tokens": 1442708077.0, "step": 8605 }, { "entropy": 1.6703736782073975, "epoch": 0.9454285792754936, "grad_norm": 0.745704174041748, "learning_rate": 1.2231396158346631e-05, "loss": 1.4547, "mean_token_accuracy": 0.6601613610982895, "num_tokens": 1442846251.0, "step": 8606 }, { "entropy": 1.6758897602558136, "epoch": 0.9455384361868666, "grad_norm": 0.6928910613059998, "learning_rate": 1.2229810067577395e-05, "loss": 1.5418, "mean_token_accuracy": 0.6258559823036194, "num_tokens": 1443052093.0, "step": 8607 }, { "entropy": 1.685420682032903, "epoch": 0.9456482930982395, "grad_norm": 0.6015498042106628, "learning_rate": 1.2228223937885222e-05, "loss": 1.3963, "mean_token_accuracy": 0.6562847743431727, "num_tokens": 1443234288.0, "step": 8608 }, { "entropy": 1.7362493971983592, "epoch": 0.9457581500096125, "grad_norm": 0.7113436460494995, "learning_rate": 1.2226637769320316e-05, "loss": 1.3335, "mean_token_accuracy": 0.6664293905099233, "num_tokens": 1443373399.0, "step": 8609 }, { "entropy": 1.7087362408638, "epoch": 0.9458680069209854, "grad_norm": 0.6327268481254578, "learning_rate": 1.2225051561932877e-05, "loss": 1.4998, "mean_token_accuracy": 0.6323538819948832, "num_tokens": 1443518849.0, "step": 8610 }, { "entropy": 1.673716555039088, "epoch": 0.9459778638323584, "grad_norm": 0.6509339213371277, "learning_rate": 1.2223465315773109e-05, "loss": 1.3558, "mean_token_accuracy": 0.6670020073652267, "num_tokens": 1443642314.0, "step": 8611 }, { "entropy": 1.6992060740788777, "epoch": 0.9460877207437313, "grad_norm": 0.8136547207832336, "learning_rate": 1.2221879030891214e-05, "loss": 1.5503, "mean_token_accuracy": 0.6302010516325632, "num_tokens": 1443856184.0, "step": 8612 }, { "entropy": 1.7098911603291829, "epoch": 0.9461975776551043, "grad_norm": 0.7616743445396423, "learning_rate": 1.2220292707337396e-05, "loss": 1.4039, "mean_token_accuracy": 0.650201790034771, "num_tokens": 1443988081.0, "step": 8613 }, { "entropy": 1.679803490638733, "epoch": 0.9463074345664771, "grad_norm": 0.8417178988456726, "learning_rate": 1.2218706345161869e-05, "loss": 1.3653, "mean_token_accuracy": 0.6717022359371185, "num_tokens": 1444134135.0, "step": 8614 }, { "entropy": 1.7369107902050018, "epoch": 0.9464172914778501, "grad_norm": 0.7196716070175171, "learning_rate": 1.221711994441483e-05, "loss": 1.259, "mean_token_accuracy": 0.6838698089122772, "num_tokens": 1444243607.0, "step": 8615 }, { "entropy": 1.6895853380362194, "epoch": 0.946527148389223, "grad_norm": 0.6455492973327637, "learning_rate": 1.2215533505146498e-05, "loss": 1.5036, "mean_token_accuracy": 0.638408382733663, "num_tokens": 1444393001.0, "step": 8616 }, { "entropy": 1.7575147449970245, "epoch": 0.946637005300596, "grad_norm": 0.7868098020553589, "learning_rate": 1.2213947027407074e-05, "loss": 1.4593, "mean_token_accuracy": 0.6475796749194463, "num_tokens": 1444581539.0, "step": 8617 }, { "entropy": 1.6824683447678883, "epoch": 0.9467468622119689, "grad_norm": 0.7493737936019897, "learning_rate": 1.2212360511246775e-05, "loss": 1.2181, "mean_token_accuracy": 0.6755462735891342, "num_tokens": 1444696551.0, "step": 8618 }, { "entropy": 1.685210108757019, "epoch": 0.9468567191233418, "grad_norm": 0.6668775081634521, "learning_rate": 1.221077395671581e-05, "loss": 1.3536, "mean_token_accuracy": 0.6589836031198502, "num_tokens": 1444842322.0, "step": 8619 }, { "entropy": 1.695833792289098, "epoch": 0.9469665760347148, "grad_norm": 0.6757495403289795, "learning_rate": 1.2209187363864403e-05, "loss": 1.4149, "mean_token_accuracy": 0.6574927568435669, "num_tokens": 1444978361.0, "step": 8620 }, { "entropy": 1.6860649983088176, "epoch": 0.9470764329460877, "grad_norm": 0.8738111853599548, "learning_rate": 1.2207600732742753e-05, "loss": 1.4086, "mean_token_accuracy": 0.672735000650088, "num_tokens": 1445114765.0, "step": 8621 }, { "entropy": 1.7755548655986786, "epoch": 0.9471862898574607, "grad_norm": 0.7084314227104187, "learning_rate": 1.2206014063401088e-05, "loss": 1.4593, "mean_token_accuracy": 0.6394910415013632, "num_tokens": 1445280099.0, "step": 8622 }, { "entropy": 1.6687957346439362, "epoch": 0.9472961467688336, "grad_norm": 0.6261263489723206, "learning_rate": 1.2204427355889619e-05, "loss": 1.4939, "mean_token_accuracy": 0.6568110336860021, "num_tokens": 1445470949.0, "step": 8623 }, { "entropy": 1.6220212280750275, "epoch": 0.9474060036802066, "grad_norm": 0.6310474276542664, "learning_rate": 1.2202840610258567e-05, "loss": 1.4776, "mean_token_accuracy": 0.6486604412396749, "num_tokens": 1445669405.0, "step": 8624 }, { "entropy": 1.7591717044512432, "epoch": 0.9475158605915794, "grad_norm": 0.6975616812705994, "learning_rate": 1.2201253826558151e-05, "loss": 1.4629, "mean_token_accuracy": 0.6384722590446472, "num_tokens": 1445848452.0, "step": 8625 }, { "entropy": 1.7158987323443096, "epoch": 0.9476257175029524, "grad_norm": 0.7575097680091858, "learning_rate": 1.2199667004838595e-05, "loss": 1.5049, "mean_token_accuracy": 0.6423617899417877, "num_tokens": 1446025783.0, "step": 8626 }, { "entropy": 1.7229747573534648, "epoch": 0.9477355744143253, "grad_norm": 0.7464864253997803, "learning_rate": 1.2198080145150115e-05, "loss": 1.5275, "mean_token_accuracy": 0.6419810652732849, "num_tokens": 1446233918.0, "step": 8627 }, { "entropy": 1.6983900268872578, "epoch": 0.9478454313256983, "grad_norm": 0.7038105130195618, "learning_rate": 1.2196493247542945e-05, "loss": 1.4288, "mean_token_accuracy": 0.6560607403516769, "num_tokens": 1446397858.0, "step": 8628 }, { "entropy": 1.6872251629829407, "epoch": 0.9479552882370712, "grad_norm": 0.6693107485771179, "learning_rate": 1.2194906312067298e-05, "loss": 1.4111, "mean_token_accuracy": 0.6616791983445486, "num_tokens": 1446572029.0, "step": 8629 }, { "entropy": 1.734555850426356, "epoch": 0.9480651451484442, "grad_norm": 0.8772332668304443, "learning_rate": 1.2193319338773407e-05, "loss": 1.2269, "mean_token_accuracy": 0.675347904364268, "num_tokens": 1446687371.0, "step": 8630 }, { "entropy": 1.7342069049676259, "epoch": 0.9481750020598171, "grad_norm": 0.7794189453125, "learning_rate": 1.2191732327711494e-05, "loss": 1.4603, "mean_token_accuracy": 0.6412904510895411, "num_tokens": 1446848084.0, "step": 8631 }, { "entropy": 1.6514850755532582, "epoch": 0.94828485897119, "grad_norm": 0.660847544670105, "learning_rate": 1.219014527893179e-05, "loss": 1.3589, "mean_token_accuracy": 0.6614196399847666, "num_tokens": 1447020659.0, "step": 8632 }, { "entropy": 1.752412219842275, "epoch": 0.948394715882563, "grad_norm": 0.7109209895133972, "learning_rate": 1.2188558192484524e-05, "loss": 1.476, "mean_token_accuracy": 0.6500055193901062, "num_tokens": 1447193145.0, "step": 8633 }, { "entropy": 1.7354917923609416, "epoch": 0.9485045727939359, "grad_norm": 0.7011961936950684, "learning_rate": 1.2186971068419933e-05, "loss": 1.405, "mean_token_accuracy": 0.6602567285299301, "num_tokens": 1447336250.0, "step": 8634 }, { "entropy": 1.7502802014350891, "epoch": 0.9486144297053088, "grad_norm": 0.5763677358627319, "learning_rate": 1.2185383906788235e-05, "loss": 1.3808, "mean_token_accuracy": 0.6556936403115591, "num_tokens": 1447545615.0, "step": 8635 }, { "entropy": 1.6883324980735779, "epoch": 0.9487242866166817, "grad_norm": 0.6211029887199402, "learning_rate": 1.2183796707639672e-05, "loss": 1.412, "mean_token_accuracy": 0.6450077096621195, "num_tokens": 1447732873.0, "step": 8636 }, { "entropy": 1.7086971402168274, "epoch": 0.9488341435280547, "grad_norm": 0.7448198199272156, "learning_rate": 1.2182209471024478e-05, "loss": 1.2632, "mean_token_accuracy": 0.6758876889944077, "num_tokens": 1447854972.0, "step": 8637 }, { "entropy": 1.7209039429823558, "epoch": 0.9489440004394276, "grad_norm": 0.770972490310669, "learning_rate": 1.2180622196992889e-05, "loss": 1.4801, "mean_token_accuracy": 0.6674757947524389, "num_tokens": 1447990717.0, "step": 8638 }, { "entropy": 1.6779508491357167, "epoch": 0.9490538573508006, "grad_norm": 0.6987808346748352, "learning_rate": 1.2179034885595133e-05, "loss": 1.4486, "mean_token_accuracy": 0.6476506143808365, "num_tokens": 1448146980.0, "step": 8639 }, { "entropy": 1.70108496149381, "epoch": 0.9491637142621735, "grad_norm": 0.5300113558769226, "learning_rate": 1.217744753688146e-05, "loss": 1.4492, "mean_token_accuracy": 0.6409799307584763, "num_tokens": 1448398932.0, "step": 8640 }, { "entropy": 1.7490708430608113, "epoch": 0.9492735711735465, "grad_norm": 0.7466815710067749, "learning_rate": 1.2175860150902103e-05, "loss": 1.3808, "mean_token_accuracy": 0.6627901097138723, "num_tokens": 1448517817.0, "step": 8641 }, { "entropy": 1.8031253119309743, "epoch": 0.9493834280849194, "grad_norm": 0.7149515748023987, "learning_rate": 1.21742727277073e-05, "loss": 1.4257, "mean_token_accuracy": 0.6576060056686401, "num_tokens": 1448680244.0, "step": 8642 }, { "entropy": 1.6629578669865925, "epoch": 0.9494932849962924, "grad_norm": 0.8295400738716125, "learning_rate": 1.2172685267347293e-05, "loss": 1.3317, "mean_token_accuracy": 0.6723531931638718, "num_tokens": 1448837554.0, "step": 8643 }, { "entropy": 1.6949988305568695, "epoch": 0.9496031419076653, "grad_norm": 0.5842585563659668, "learning_rate": 1.2171097769872331e-05, "loss": 1.4098, "mean_token_accuracy": 0.6408194800217947, "num_tokens": 1449029085.0, "step": 8644 }, { "entropy": 1.6800251007080078, "epoch": 0.9497129988190383, "grad_norm": 0.6801996231079102, "learning_rate": 1.216951023533265e-05, "loss": 1.2726, "mean_token_accuracy": 0.6702685306469599, "num_tokens": 1449167357.0, "step": 8645 }, { "entropy": 1.7152654727300007, "epoch": 0.9498228557304111, "grad_norm": 0.6115834712982178, "learning_rate": 1.2167922663778493e-05, "loss": 1.4586, "mean_token_accuracy": 0.6457581520080566, "num_tokens": 1449365920.0, "step": 8646 }, { "entropy": 1.6668970982233684, "epoch": 0.949932712641784, "grad_norm": 0.6367796063423157, "learning_rate": 1.2166335055260112e-05, "loss": 1.544, "mean_token_accuracy": 0.6396810958782831, "num_tokens": 1449592927.0, "step": 8647 }, { "entropy": 1.6488535205523174, "epoch": 0.950042569553157, "grad_norm": 0.6653515100479126, "learning_rate": 1.2164747409827755e-05, "loss": 1.3773, "mean_token_accuracy": 0.6532280345757803, "num_tokens": 1449757980.0, "step": 8648 }, { "entropy": 1.7187113364537556, "epoch": 0.9501524264645299, "grad_norm": 0.6801130771636963, "learning_rate": 1.2163159727531664e-05, "loss": 1.4093, "mean_token_accuracy": 0.6681001136700312, "num_tokens": 1449900483.0, "step": 8649 }, { "entropy": 1.6937756339708965, "epoch": 0.9502622833759029, "grad_norm": 0.6868298053741455, "learning_rate": 1.2161572008422093e-05, "loss": 1.4056, "mean_token_accuracy": 0.6516019354263941, "num_tokens": 1450048324.0, "step": 8650 }, { "entropy": 1.7173837820688884, "epoch": 0.9503721402872758, "grad_norm": 0.6891928315162659, "learning_rate": 1.215998425254929e-05, "loss": 1.3734, "mean_token_accuracy": 0.6473551144202551, "num_tokens": 1450207740.0, "step": 8651 }, { "entropy": 1.68080539504687, "epoch": 0.9504819971986488, "grad_norm": 0.7287924289703369, "learning_rate": 1.2158396459963511e-05, "loss": 1.4223, "mean_token_accuracy": 0.6610532452662786, "num_tokens": 1450370382.0, "step": 8652 }, { "entropy": 1.7321538031101227, "epoch": 0.9505918541100217, "grad_norm": 0.8169899582862854, "learning_rate": 1.2156808630715004e-05, "loss": 1.5197, "mean_token_accuracy": 0.6509786198536555, "num_tokens": 1450555161.0, "step": 8653 }, { "entropy": 1.7048485080401103, "epoch": 0.9507017110213947, "grad_norm": 0.6728548407554626, "learning_rate": 1.2155220764854027e-05, "loss": 1.2885, "mean_token_accuracy": 0.6768156687418619, "num_tokens": 1450701110.0, "step": 8654 }, { "entropy": 1.7684779067834218, "epoch": 0.9508115679327676, "grad_norm": 0.7296202778816223, "learning_rate": 1.2153632862430828e-05, "loss": 1.4785, "mean_token_accuracy": 0.6470775653918585, "num_tokens": 1450896700.0, "step": 8655 }, { "entropy": 1.7234237790107727, "epoch": 0.9509214248441406, "grad_norm": 0.691101610660553, "learning_rate": 1.2152044923495676e-05, "loss": 1.4179, "mean_token_accuracy": 0.6461136788129807, "num_tokens": 1451079096.0, "step": 8656 }, { "entropy": 1.6851498285929363, "epoch": 0.9510312817555134, "grad_norm": 0.6487606763839722, "learning_rate": 1.215045694809882e-05, "loss": 1.3169, "mean_token_accuracy": 0.6667506843805313, "num_tokens": 1451221118.0, "step": 8657 }, { "entropy": 1.709127922852834, "epoch": 0.9511411386668864, "grad_norm": 0.7377097606658936, "learning_rate": 1.2148868936290515e-05, "loss": 1.4872, "mean_token_accuracy": 0.655120978752772, "num_tokens": 1451396693.0, "step": 8658 }, { "entropy": 1.6430183351039886, "epoch": 0.9512509955782593, "grad_norm": 0.6040515899658203, "learning_rate": 1.2147280888121026e-05, "loss": 1.5599, "mean_token_accuracy": 0.6429479469855627, "num_tokens": 1451618865.0, "step": 8659 }, { "entropy": 1.7497617801030476, "epoch": 0.9513608524896322, "grad_norm": 0.613490104675293, "learning_rate": 1.2145692803640621e-05, "loss": 1.3799, "mean_token_accuracy": 0.6475595831871033, "num_tokens": 1451744744.0, "step": 8660 }, { "entropy": 1.7006073792775471, "epoch": 0.9514707094010052, "grad_norm": 0.6991299986839294, "learning_rate": 1.2144104682899548e-05, "loss": 1.5495, "mean_token_accuracy": 0.6511161873737971, "num_tokens": 1451894113.0, "step": 8661 }, { "entropy": 1.6935534576574962, "epoch": 0.9515805663123781, "grad_norm": 0.7065954208374023, "learning_rate": 1.2142516525948083e-05, "loss": 1.3041, "mean_token_accuracy": 0.6719731688499451, "num_tokens": 1452005836.0, "step": 8662 }, { "entropy": 1.6703713536262512, "epoch": 0.9516904232237511, "grad_norm": 0.6174436211585999, "learning_rate": 1.214092833283648e-05, "loss": 1.3075, "mean_token_accuracy": 0.6695892562468847, "num_tokens": 1452146942.0, "step": 8663 }, { "entropy": 1.7062116861343384, "epoch": 0.951800280135124, "grad_norm": 0.6128714084625244, "learning_rate": 1.2139340103615011e-05, "loss": 1.398, "mean_token_accuracy": 0.6536041001478831, "num_tokens": 1452297995.0, "step": 8664 }, { "entropy": 1.691074013710022, "epoch": 0.951910137046497, "grad_norm": 0.618613600730896, "learning_rate": 1.2137751838333943e-05, "loss": 1.4527, "mean_token_accuracy": 0.6470177272955576, "num_tokens": 1452490687.0, "step": 8665 }, { "entropy": 1.7151458064715068, "epoch": 0.9520199939578698, "grad_norm": 0.6557570695877075, "learning_rate": 1.213616353704354e-05, "loss": 1.4584, "mean_token_accuracy": 0.6519462615251541, "num_tokens": 1452657863.0, "step": 8666 }, { "entropy": 1.7122306029001872, "epoch": 0.9521298508692428, "grad_norm": 0.675037682056427, "learning_rate": 1.2134575199794072e-05, "loss": 1.4007, "mean_token_accuracy": 0.6560028443733851, "num_tokens": 1452824357.0, "step": 8667 }, { "entropy": 1.7048703233400981, "epoch": 0.9522397077806157, "grad_norm": 0.7334290146827698, "learning_rate": 1.213298682663581e-05, "loss": 1.3043, "mean_token_accuracy": 0.6710883726676306, "num_tokens": 1452982914.0, "step": 8668 }, { "entropy": 1.7539819777011871, "epoch": 0.9523495646919887, "grad_norm": 0.7398406267166138, "learning_rate": 1.2131398417619029e-05, "loss": 1.3738, "mean_token_accuracy": 0.6580022970835367, "num_tokens": 1453109599.0, "step": 8669 }, { "entropy": 1.6519952714443207, "epoch": 0.9524594216033616, "grad_norm": 0.7986524701118469, "learning_rate": 1.2129809972793997e-05, "loss": 1.413, "mean_token_accuracy": 0.658644050359726, "num_tokens": 1453319483.0, "step": 8670 }, { "entropy": 1.7309541801611583, "epoch": 0.9525692785147346, "grad_norm": 0.7906885147094727, "learning_rate": 1.2128221492210986e-05, "loss": 1.3046, "mean_token_accuracy": 0.6700306981801987, "num_tokens": 1453490803.0, "step": 8671 }, { "entropy": 1.6974034408728282, "epoch": 0.9526791354261075, "grad_norm": 0.7683852314949036, "learning_rate": 1.2126632975920277e-05, "loss": 1.3621, "mean_token_accuracy": 0.6743018825848898, "num_tokens": 1453617883.0, "step": 8672 }, { "entropy": 1.7786914706230164, "epoch": 0.9527889923374804, "grad_norm": 0.7997028231620789, "learning_rate": 1.2125044423972139e-05, "loss": 1.588, "mean_token_accuracy": 0.6399167478084564, "num_tokens": 1453781186.0, "step": 8673 }, { "entropy": 1.7436818778514862, "epoch": 0.9528988492488534, "grad_norm": 0.702833890914917, "learning_rate": 1.2123455836416852e-05, "loss": 1.2924, "mean_token_accuracy": 0.6721568206946055, "num_tokens": 1453888795.0, "step": 8674 }, { "entropy": 1.7169397870699565, "epoch": 0.9530087061602263, "grad_norm": 0.7126211524009705, "learning_rate": 1.2121867213304692e-05, "loss": 1.4358, "mean_token_accuracy": 0.6485247810681661, "num_tokens": 1454039464.0, "step": 8675 }, { "entropy": 1.628256380558014, "epoch": 0.9531185630715993, "grad_norm": 0.6888135671615601, "learning_rate": 1.2120278554685944e-05, "loss": 1.3604, "mean_token_accuracy": 0.6754744102557501, "num_tokens": 1454188174.0, "step": 8676 }, { "entropy": 1.7532523274421692, "epoch": 0.9532284199829721, "grad_norm": 0.6963343620300293, "learning_rate": 1.2118689860610882e-05, "loss": 1.3401, "mean_token_accuracy": 0.6503902872403463, "num_tokens": 1454357110.0, "step": 8677 }, { "entropy": 1.7357207636038463, "epoch": 0.9533382768943451, "grad_norm": 0.6894516348838806, "learning_rate": 1.2117101131129793e-05, "loss": 1.3727, "mean_token_accuracy": 0.661634643872579, "num_tokens": 1454486942.0, "step": 8678 }, { "entropy": 1.674231469631195, "epoch": 0.953448133805718, "grad_norm": 0.5685495734214783, "learning_rate": 1.2115512366292954e-05, "loss": 1.3578, "mean_token_accuracy": 0.6616425861914953, "num_tokens": 1454666674.0, "step": 8679 }, { "entropy": 1.765285313129425, "epoch": 0.953557990717091, "grad_norm": 0.7109258770942688, "learning_rate": 1.2113923566150651e-05, "loss": 1.3287, "mean_token_accuracy": 0.6567708303531011, "num_tokens": 1454802351.0, "step": 8680 }, { "entropy": 1.709367722272873, "epoch": 0.9536678476284639, "grad_norm": 0.6101370453834534, "learning_rate": 1.211233473075317e-05, "loss": 1.3885, "mean_token_accuracy": 0.6554233133792877, "num_tokens": 1454989959.0, "step": 8681 }, { "entropy": 1.7657522161801655, "epoch": 0.9537777045398369, "grad_norm": 0.6212213039398193, "learning_rate": 1.2110745860150798e-05, "loss": 1.4819, "mean_token_accuracy": 0.6521278421084086, "num_tokens": 1455172258.0, "step": 8682 }, { "entropy": 1.7684936622778575, "epoch": 0.9538875614512098, "grad_norm": 0.8916065692901611, "learning_rate": 1.2109156954393815e-05, "loss": 1.516, "mean_token_accuracy": 0.6521298487981161, "num_tokens": 1455316700.0, "step": 8683 }, { "entropy": 1.7200209399064381, "epoch": 0.9539974183625828, "grad_norm": 0.6917714476585388, "learning_rate": 1.210756801353252e-05, "loss": 1.5067, "mean_token_accuracy": 0.6368722418944041, "num_tokens": 1455501678.0, "step": 8684 }, { "entropy": 1.7058296203613281, "epoch": 0.9541072752739557, "grad_norm": 0.6779616475105286, "learning_rate": 1.2105979037617196e-05, "loss": 1.4618, "mean_token_accuracy": 0.6522943874200186, "num_tokens": 1455659558.0, "step": 8685 }, { "entropy": 1.648918906847636, "epoch": 0.9542171321853287, "grad_norm": 0.5767722725868225, "learning_rate": 1.210439002669813e-05, "loss": 1.4618, "mean_token_accuracy": 0.6379017184178034, "num_tokens": 1455914506.0, "step": 8686 }, { "entropy": 1.7035534083843231, "epoch": 0.9543269890967016, "grad_norm": 0.7029200792312622, "learning_rate": 1.2102800980825617e-05, "loss": 1.3144, "mean_token_accuracy": 0.6641533325115839, "num_tokens": 1456036680.0, "step": 8687 }, { "entropy": 1.68134809533755, "epoch": 0.9544368460080744, "grad_norm": 0.9198618531227112, "learning_rate": 1.2101211900049954e-05, "loss": 1.3963, "mean_token_accuracy": 0.6574839899937311, "num_tokens": 1456182571.0, "step": 8688 }, { "entropy": 1.7450095514456432, "epoch": 0.9545467029194474, "grad_norm": 0.7267429232597351, "learning_rate": 1.2099622784421426e-05, "loss": 1.4871, "mean_token_accuracy": 0.6336076408624649, "num_tokens": 1456363555.0, "step": 8689 }, { "entropy": 1.749456803003947, "epoch": 0.9546565598308203, "grad_norm": 4.669123649597168, "learning_rate": 1.2098033633990336e-05, "loss": 1.02, "mean_token_accuracy": 0.6833541542291641, "num_tokens": 1456514456.0, "step": 8690 }, { "entropy": 1.7409884134928386, "epoch": 0.9547664167421933, "grad_norm": 0.7867989540100098, "learning_rate": 1.2096444448806977e-05, "loss": 1.4597, "mean_token_accuracy": 0.6389687110980352, "num_tokens": 1456692736.0, "step": 8691 }, { "entropy": 1.701568841934204, "epoch": 0.9548762736535662, "grad_norm": 0.6494891047477722, "learning_rate": 1.209485522892164e-05, "loss": 1.3752, "mean_token_accuracy": 0.6650643845399221, "num_tokens": 1456880796.0, "step": 8692 }, { "entropy": 1.7152188817660015, "epoch": 0.9549861305649392, "grad_norm": 0.7680609226226807, "learning_rate": 1.2093265974384631e-05, "loss": 1.3529, "mean_token_accuracy": 0.6595882922410965, "num_tokens": 1457008533.0, "step": 8693 }, { "entropy": 1.722943127155304, "epoch": 0.9550959874763121, "grad_norm": 0.6621650457382202, "learning_rate": 1.2091676685246252e-05, "loss": 1.5738, "mean_token_accuracy": 0.6255774199962616, "num_tokens": 1457229767.0, "step": 8694 }, { "entropy": 1.702676256497701, "epoch": 0.9552058443876851, "grad_norm": 0.5807628631591797, "learning_rate": 1.209008736155679e-05, "loss": 1.4362, "mean_token_accuracy": 0.657213474313418, "num_tokens": 1457448218.0, "step": 8695 }, { "entropy": 1.7524159948031108, "epoch": 0.955315701299058, "grad_norm": 0.9052096605300903, "learning_rate": 1.208849800336656e-05, "loss": 1.5353, "mean_token_accuracy": 0.6409603903690974, "num_tokens": 1457652077.0, "step": 8696 }, { "entropy": 1.6945769389470418, "epoch": 0.955425558210431, "grad_norm": 0.6669119596481323, "learning_rate": 1.2086908610725854e-05, "loss": 1.5198, "mean_token_accuracy": 0.6341730306545893, "num_tokens": 1457881605.0, "step": 8697 }, { "entropy": 1.757227510213852, "epoch": 0.9555354151218038, "grad_norm": 0.6839233636856079, "learning_rate": 1.2085319183684981e-05, "loss": 1.4284, "mean_token_accuracy": 0.6508975972731909, "num_tokens": 1458082960.0, "step": 8698 }, { "entropy": 1.7042691508928935, "epoch": 0.9556452720331768, "grad_norm": 0.6574342846870422, "learning_rate": 1.2083729722294246e-05, "loss": 1.5346, "mean_token_accuracy": 0.6502645313739777, "num_tokens": 1458281946.0, "step": 8699 }, { "entropy": 1.6815843482812245, "epoch": 0.9557551289445497, "grad_norm": 0.7146515846252441, "learning_rate": 1.2082140226603955e-05, "loss": 1.3785, "mean_token_accuracy": 0.6631735612948736, "num_tokens": 1458433277.0, "step": 8700 }, { "entropy": 1.6828182240327199, "epoch": 0.9558649858559226, "grad_norm": 0.6447663307189941, "learning_rate": 1.2080550696664413e-05, "loss": 1.2576, "mean_token_accuracy": 0.6774131655693054, "num_tokens": 1458590309.0, "step": 8701 }, { "entropy": 1.6610161860783894, "epoch": 0.9559748427672956, "grad_norm": 0.5871066451072693, "learning_rate": 1.2078961132525929e-05, "loss": 1.325, "mean_token_accuracy": 0.666997030377388, "num_tokens": 1458767372.0, "step": 8702 }, { "entropy": 1.6988926430543263, "epoch": 0.9560846996786685, "grad_norm": 0.6767246127128601, "learning_rate": 1.2077371534238809e-05, "loss": 1.4668, "mean_token_accuracy": 0.6547530144453049, "num_tokens": 1458929290.0, "step": 8703 }, { "entropy": 1.7858167787392933, "epoch": 0.9561945565900415, "grad_norm": 0.6854000091552734, "learning_rate": 1.2075781901853367e-05, "loss": 1.3713, "mean_token_accuracy": 0.658269797762235, "num_tokens": 1459077793.0, "step": 8704 }, { "entropy": 1.6693780521551769, "epoch": 0.9563044135014144, "grad_norm": 0.6797814965248108, "learning_rate": 1.2074192235419908e-05, "loss": 1.2583, "mean_token_accuracy": 0.6774491270383199, "num_tokens": 1459203208.0, "step": 8705 }, { "entropy": 1.7343849937121074, "epoch": 0.9564142704127874, "grad_norm": 0.588331401348114, "learning_rate": 1.2072602534988756e-05, "loss": 1.4504, "mean_token_accuracy": 0.643997256954511, "num_tokens": 1459385818.0, "step": 8706 }, { "entropy": 1.6691008905569713, "epoch": 0.9565241273241603, "grad_norm": 0.7460022568702698, "learning_rate": 1.2071012800610214e-05, "loss": 1.3452, "mean_token_accuracy": 0.6772498339414597, "num_tokens": 1459524006.0, "step": 8707 }, { "entropy": 1.6213213801383972, "epoch": 0.9566339842355333, "grad_norm": 0.6637667417526245, "learning_rate": 1.2069423032334598e-05, "loss": 1.3413, "mean_token_accuracy": 0.6586452474196752, "num_tokens": 1459689191.0, "step": 8708 }, { "entropy": 1.6628740727901459, "epoch": 0.9567438411469061, "grad_norm": 0.6381793022155762, "learning_rate": 1.2067833230212225e-05, "loss": 1.4917, "mean_token_accuracy": 0.6472151229778925, "num_tokens": 1459903129.0, "step": 8709 }, { "entropy": 1.734858940045039, "epoch": 0.9568536980582791, "grad_norm": 0.7690825462341309, "learning_rate": 1.2066243394293412e-05, "loss": 1.4222, "mean_token_accuracy": 0.6538131634394327, "num_tokens": 1460058233.0, "step": 8710 }, { "entropy": 1.7033085723718007, "epoch": 0.956963554969652, "grad_norm": 0.6803534626960754, "learning_rate": 1.2064653524628478e-05, "loss": 1.3746, "mean_token_accuracy": 0.6563105036815008, "num_tokens": 1460257861.0, "step": 8711 }, { "entropy": 1.6802352865537007, "epoch": 0.957073411881025, "grad_norm": 0.6258231401443481, "learning_rate": 1.2063063621267738e-05, "loss": 1.3747, "mean_token_accuracy": 0.6545542577902476, "num_tokens": 1460441507.0, "step": 8712 }, { "entropy": 1.7096125185489655, "epoch": 0.9571832687923979, "grad_norm": 0.7180109024047852, "learning_rate": 1.2061473684261513e-05, "loss": 1.3745, "mean_token_accuracy": 0.6694450577100118, "num_tokens": 1460625014.0, "step": 8713 }, { "entropy": 1.6804834107557933, "epoch": 0.9572931257037708, "grad_norm": 0.8495198488235474, "learning_rate": 1.2059883713660125e-05, "loss": 1.3337, "mean_token_accuracy": 0.656502236922582, "num_tokens": 1460792252.0, "step": 8714 }, { "entropy": 1.6971332728862762, "epoch": 0.9574029826151438, "grad_norm": 0.6549186706542969, "learning_rate": 1.2058293709513896e-05, "loss": 1.406, "mean_token_accuracy": 0.6549234290917715, "num_tokens": 1460980358.0, "step": 8715 }, { "entropy": 1.6848892569541931, "epoch": 0.9575128395265167, "grad_norm": 0.6425775289535522, "learning_rate": 1.2056703671873148e-05, "loss": 1.3264, "mean_token_accuracy": 0.6857404808203379, "num_tokens": 1461152259.0, "step": 8716 }, { "entropy": 1.7091786166032155, "epoch": 0.9576226964378897, "grad_norm": 8.731441497802734, "learning_rate": 1.2055113600788202e-05, "loss": 1.2535, "mean_token_accuracy": 0.6799486676851908, "num_tokens": 1461321663.0, "step": 8717 }, { "entropy": 1.748598317305247, "epoch": 0.9577325533492625, "grad_norm": 0.6042277812957764, "learning_rate": 1.205352349630939e-05, "loss": 1.4481, "mean_token_accuracy": 0.6515509237845739, "num_tokens": 1461516392.0, "step": 8718 }, { "entropy": 1.786596695582072, "epoch": 0.9578424102606355, "grad_norm": 0.636871337890625, "learning_rate": 1.2051933358487031e-05, "loss": 1.6215, "mean_token_accuracy": 0.6109706809123358, "num_tokens": 1461727681.0, "step": 8719 }, { "entropy": 1.7093546092510223, "epoch": 0.9579522671720084, "grad_norm": 0.6685346364974976, "learning_rate": 1.2050343187371457e-05, "loss": 1.3936, "mean_token_accuracy": 0.6527099361022314, "num_tokens": 1461888422.0, "step": 8720 }, { "entropy": 1.7236407697200775, "epoch": 0.9580621240833814, "grad_norm": 0.750136137008667, "learning_rate": 1.2048752983012992e-05, "loss": 1.3902, "mean_token_accuracy": 0.661915456255277, "num_tokens": 1462058752.0, "step": 8721 }, { "entropy": 1.6986550291379292, "epoch": 0.9581719809947543, "grad_norm": 0.7931959629058838, "learning_rate": 1.2047162745461974e-05, "loss": 1.255, "mean_token_accuracy": 0.6735956718524297, "num_tokens": 1462196633.0, "step": 8722 }, { "entropy": 1.6549886465072632, "epoch": 0.9582818379061273, "grad_norm": 0.6466114521026611, "learning_rate": 1.2045572474768718e-05, "loss": 1.4337, "mean_token_accuracy": 0.671045849720637, "num_tokens": 1462409134.0, "step": 8723 }, { "entropy": 1.7527413566907246, "epoch": 0.9583916948175002, "grad_norm": 0.8396289348602295, "learning_rate": 1.2043982170983568e-05, "loss": 1.3088, "mean_token_accuracy": 0.679228276014328, "num_tokens": 1462541782.0, "step": 8724 }, { "entropy": 1.735308289527893, "epoch": 0.9585015517288732, "grad_norm": 0.6846469044685364, "learning_rate": 1.2042391834156854e-05, "loss": 1.4979, "mean_token_accuracy": 0.652966578801473, "num_tokens": 1462722456.0, "step": 8725 }, { "entropy": 1.6945312122503917, "epoch": 0.9586114086402461, "grad_norm": 0.7038013339042664, "learning_rate": 1.2040801464338907e-05, "loss": 1.3764, "mean_token_accuracy": 0.6591214487950007, "num_tokens": 1462916582.0, "step": 8726 }, { "entropy": 1.7100801467895508, "epoch": 0.958721265551619, "grad_norm": 0.7070258855819702, "learning_rate": 1.2039211061580063e-05, "loss": 1.5168, "mean_token_accuracy": 0.6492930054664612, "num_tokens": 1463100418.0, "step": 8727 }, { "entropy": 1.745457837978999, "epoch": 0.958831122462992, "grad_norm": 0.7946493625640869, "learning_rate": 1.2037620625930659e-05, "loss": 1.6822, "mean_token_accuracy": 0.6400948514540991, "num_tokens": 1463263777.0, "step": 8728 }, { "entropy": 1.762039452791214, "epoch": 0.9589409793743648, "grad_norm": 0.8285095691680908, "learning_rate": 1.2036030157441026e-05, "loss": 1.3384, "mean_token_accuracy": 0.658534953991572, "num_tokens": 1463407883.0, "step": 8729 }, { "entropy": 1.7134557962417603, "epoch": 0.9590508362857378, "grad_norm": 0.6590238809585571, "learning_rate": 1.2034439656161509e-05, "loss": 1.5242, "mean_token_accuracy": 0.6427919020255407, "num_tokens": 1463608060.0, "step": 8730 }, { "entropy": 1.6973025898138683, "epoch": 0.9591606931971107, "grad_norm": 0.6781467199325562, "learning_rate": 1.203284912214244e-05, "loss": 1.4759, "mean_token_accuracy": 0.6487634430329005, "num_tokens": 1463783712.0, "step": 8731 }, { "entropy": 1.676356424887975, "epoch": 0.9592705501084837, "grad_norm": 0.5379504561424255, "learning_rate": 1.2031258555434164e-05, "loss": 1.3997, "mean_token_accuracy": 0.651170089840889, "num_tokens": 1464018941.0, "step": 8732 }, { "entropy": 1.6823839048544567, "epoch": 0.9593804070198566, "grad_norm": 0.6782552003860474, "learning_rate": 1.2029667956087017e-05, "loss": 1.3481, "mean_token_accuracy": 0.6624757548173269, "num_tokens": 1464167232.0, "step": 8733 }, { "entropy": 1.7918393512566884, "epoch": 0.9594902639312296, "grad_norm": 0.7031469345092773, "learning_rate": 1.2028077324151347e-05, "loss": 1.4965, "mean_token_accuracy": 0.6428997168938319, "num_tokens": 1464340879.0, "step": 8734 }, { "entropy": 1.6191656390825908, "epoch": 0.9596001208426025, "grad_norm": 0.7140489816665649, "learning_rate": 1.202648665967749e-05, "loss": 1.1656, "mean_token_accuracy": 0.688547745347023, "num_tokens": 1464452058.0, "step": 8735 }, { "entropy": 1.6774865587552388, "epoch": 0.9597099777539755, "grad_norm": 0.6400611996650696, "learning_rate": 1.2024895962715795e-05, "loss": 1.476, "mean_token_accuracy": 0.6562142173449198, "num_tokens": 1464656449.0, "step": 8736 }, { "entropy": 1.7151943445205688, "epoch": 0.9598198346653484, "grad_norm": 0.7991637587547302, "learning_rate": 1.2023305233316602e-05, "loss": 1.3804, "mean_token_accuracy": 0.6708137293656667, "num_tokens": 1464806740.0, "step": 8737 }, { "entropy": 1.7741727034250896, "epoch": 0.9599296915767214, "grad_norm": 0.7432534694671631, "learning_rate": 1.2021714471530262e-05, "loss": 1.4837, "mean_token_accuracy": 0.6382344514131546, "num_tokens": 1464971963.0, "step": 8738 }, { "entropy": 1.701940377553304, "epoch": 0.9600395484880943, "grad_norm": 0.6169398427009583, "learning_rate": 1.2020123677407113e-05, "loss": 1.4066, "mean_token_accuracy": 0.6589889874060949, "num_tokens": 1465162928.0, "step": 8739 }, { "entropy": 1.6859253843625386, "epoch": 0.9601494053994672, "grad_norm": 0.7215724587440491, "learning_rate": 1.2018532850997518e-05, "loss": 1.3828, "mean_token_accuracy": 0.6706574161847433, "num_tokens": 1465300333.0, "step": 8740 }, { "entropy": 1.760906199614207, "epoch": 0.9602592623108401, "grad_norm": 0.6548580527305603, "learning_rate": 1.2016941992351811e-05, "loss": 1.5413, "mean_token_accuracy": 0.6272419343392054, "num_tokens": 1465507133.0, "step": 8741 }, { "entropy": 1.7261857688426971, "epoch": 0.960369119222213, "grad_norm": 0.6962506771087646, "learning_rate": 1.2015351101520354e-05, "loss": 1.4314, "mean_token_accuracy": 0.6458855321009954, "num_tokens": 1465673287.0, "step": 8742 }, { "entropy": 1.7670903007189434, "epoch": 0.960478976133586, "grad_norm": 0.6517634987831116, "learning_rate": 1.2013760178553487e-05, "loss": 1.2565, "mean_token_accuracy": 0.6761279304822286, "num_tokens": 1465809133.0, "step": 8743 }, { "entropy": 1.718291014432907, "epoch": 0.9605888330449589, "grad_norm": 0.7264907956123352, "learning_rate": 1.2012169223501568e-05, "loss": 1.3405, "mean_token_accuracy": 0.6658484935760498, "num_tokens": 1465965362.0, "step": 8744 }, { "entropy": 1.6515865127245586, "epoch": 0.9606986899563319, "grad_norm": 0.7858138680458069, "learning_rate": 1.2010578236414949e-05, "loss": 1.4631, "mean_token_accuracy": 0.6571001460154852, "num_tokens": 1466178520.0, "step": 8745 }, { "entropy": 1.6674973865350087, "epoch": 0.9608085468677048, "grad_norm": 0.6715067625045776, "learning_rate": 1.2008987217343986e-05, "loss": 1.3843, "mean_token_accuracy": 0.6489299088716507, "num_tokens": 1466362772.0, "step": 8746 }, { "entropy": 1.6680465439955394, "epoch": 0.9609184037790778, "grad_norm": 0.6776669025421143, "learning_rate": 1.2007396166339035e-05, "loss": 1.402, "mean_token_accuracy": 0.649745578567187, "num_tokens": 1466526197.0, "step": 8747 }, { "entropy": 1.6848007043202717, "epoch": 0.9610282606904507, "grad_norm": 0.654761016368866, "learning_rate": 1.2005805083450443e-05, "loss": 1.3582, "mean_token_accuracy": 0.6620151400566101, "num_tokens": 1466659274.0, "step": 8748 }, { "entropy": 1.6908719142278035, "epoch": 0.9611381176018237, "grad_norm": 0.7975518703460693, "learning_rate": 1.2004213968728575e-05, "loss": 1.3319, "mean_token_accuracy": 0.6631641636292139, "num_tokens": 1466793078.0, "step": 8749 }, { "entropy": 1.69454359014829, "epoch": 0.9612479745131965, "grad_norm": 0.6039302349090576, "learning_rate": 1.200262282222379e-05, "loss": 1.4186, "mean_token_accuracy": 0.6558008641004562, "num_tokens": 1466937877.0, "step": 8750 }, { "entropy": 1.7083939115206401, "epoch": 0.9613578314245695, "grad_norm": 0.6897109746932983, "learning_rate": 1.200103164398644e-05, "loss": 1.3845, "mean_token_accuracy": 0.661809429526329, "num_tokens": 1467157146.0, "step": 8751 }, { "entropy": 1.6259233554204304, "epoch": 0.9614676883359424, "grad_norm": 0.6461367607116699, "learning_rate": 1.1999440434066896e-05, "loss": 1.4192, "mean_token_accuracy": 0.6577809949715933, "num_tokens": 1467315246.0, "step": 8752 }, { "entropy": 1.7572091619173686, "epoch": 0.9615775452473154, "grad_norm": 0.8533002734184265, "learning_rate": 1.199784919251551e-05, "loss": 1.6754, "mean_token_accuracy": 0.6236068258682886, "num_tokens": 1467482267.0, "step": 8753 }, { "entropy": 1.770795226097107, "epoch": 0.9616874021586883, "grad_norm": 0.7685062885284424, "learning_rate": 1.1996257919382646e-05, "loss": 1.6123, "mean_token_accuracy": 0.6360293204585711, "num_tokens": 1467666169.0, "step": 8754 }, { "entropy": 1.7153681516647339, "epoch": 0.9617972590700612, "grad_norm": 0.695287823677063, "learning_rate": 1.1994666614718667e-05, "loss": 1.3786, "mean_token_accuracy": 0.6639162302017212, "num_tokens": 1467807005.0, "step": 8755 }, { "entropy": 1.7088010211785634, "epoch": 0.9619071159814342, "grad_norm": 0.701137900352478, "learning_rate": 1.1993075278573938e-05, "loss": 1.5254, "mean_token_accuracy": 0.6290678034226099, "num_tokens": 1468014033.0, "step": 8756 }, { "entropy": 1.6555834611256917, "epoch": 0.9620169728928071, "grad_norm": 0.6522089838981628, "learning_rate": 1.1991483910998823e-05, "loss": 1.6107, "mean_token_accuracy": 0.6368126993378004, "num_tokens": 1468194921.0, "step": 8757 }, { "entropy": 1.7156870265801747, "epoch": 0.9621268298041801, "grad_norm": 0.6632962226867676, "learning_rate": 1.1989892512043693e-05, "loss": 1.379, "mean_token_accuracy": 0.6591578175624212, "num_tokens": 1468353097.0, "step": 8758 }, { "entropy": 1.7234142522017162, "epoch": 0.962236686715553, "grad_norm": 0.6855953335762024, "learning_rate": 1.1988301081758908e-05, "loss": 1.4963, "mean_token_accuracy": 0.6408663143714269, "num_tokens": 1468565163.0, "step": 8759 }, { "entropy": 1.6909594734509785, "epoch": 0.962346543626926, "grad_norm": 0.6290444731712341, "learning_rate": 1.1986709620194837e-05, "loss": 1.3152, "mean_token_accuracy": 0.6744314332803091, "num_tokens": 1468717687.0, "step": 8760 }, { "entropy": 1.7224095662434895, "epoch": 0.9624564005382988, "grad_norm": 0.7756173014640808, "learning_rate": 1.1985118127401854e-05, "loss": 1.401, "mean_token_accuracy": 0.6540177861849467, "num_tokens": 1468867338.0, "step": 8761 }, { "entropy": 1.6869498590628307, "epoch": 0.9625662574496718, "grad_norm": 0.8160791397094727, "learning_rate": 1.1983526603430328e-05, "loss": 1.4635, "mean_token_accuracy": 0.6533959607283274, "num_tokens": 1469062599.0, "step": 8762 }, { "entropy": 1.7486900488535564, "epoch": 0.9626761143610447, "grad_norm": 0.6580154895782471, "learning_rate": 1.1981935048330625e-05, "loss": 1.3756, "mean_token_accuracy": 0.648088201880455, "num_tokens": 1469193764.0, "step": 8763 }, { "entropy": 1.7289330164591472, "epoch": 0.9627859712724177, "grad_norm": 0.8532882928848267, "learning_rate": 1.1980343462153121e-05, "loss": 1.2638, "mean_token_accuracy": 0.6606898903846741, "num_tokens": 1469364686.0, "step": 8764 }, { "entropy": 1.6669448614120483, "epoch": 0.9628958281837906, "grad_norm": 0.6776072978973389, "learning_rate": 1.1978751844948188e-05, "loss": 1.2815, "mean_token_accuracy": 0.6700325111548106, "num_tokens": 1469538171.0, "step": 8765 }, { "entropy": 1.7350335617860158, "epoch": 0.9630056850951636, "grad_norm": 0.6142683029174805, "learning_rate": 1.1977160196766203e-05, "loss": 1.4018, "mean_token_accuracy": 0.6594241609176, "num_tokens": 1469706808.0, "step": 8766 }, { "entropy": 1.6608028213183086, "epoch": 0.9631155420065365, "grad_norm": 0.6166122555732727, "learning_rate": 1.1975568517657532e-05, "loss": 1.5136, "mean_token_accuracy": 0.6444362699985504, "num_tokens": 1469920924.0, "step": 8767 }, { "entropy": 1.6671899060408275, "epoch": 0.9632253989179094, "grad_norm": 0.6786704659461975, "learning_rate": 1.1973976807672563e-05, "loss": 1.5595, "mean_token_accuracy": 0.6488665342330933, "num_tokens": 1470140541.0, "step": 8768 }, { "entropy": 1.6928558846314747, "epoch": 0.9633352558292824, "grad_norm": 0.6339307427406311, "learning_rate": 1.1972385066861665e-05, "loss": 1.3924, "mean_token_accuracy": 0.6605489750703176, "num_tokens": 1470311335.0, "step": 8769 }, { "entropy": 1.7137080430984497, "epoch": 0.9634451127406553, "grad_norm": 0.6629822850227356, "learning_rate": 1.1970793295275216e-05, "loss": 1.5686, "mean_token_accuracy": 0.6393506328264872, "num_tokens": 1470533352.0, "step": 8770 }, { "entropy": 1.731887976328532, "epoch": 0.9635549696520282, "grad_norm": 0.6983689665794373, "learning_rate": 1.1969201492963599e-05, "loss": 1.3827, "mean_token_accuracy": 0.664002334078153, "num_tokens": 1470669337.0, "step": 8771 }, { "entropy": 1.6695733070373535, "epoch": 0.9636648265634011, "grad_norm": 0.6725772023200989, "learning_rate": 1.1967609659977188e-05, "loss": 1.3551, "mean_token_accuracy": 0.6574635605017344, "num_tokens": 1470818033.0, "step": 8772 }, { "entropy": 1.688662052154541, "epoch": 0.9637746834747741, "grad_norm": 0.646634042263031, "learning_rate": 1.1966017796366372e-05, "loss": 1.5005, "mean_token_accuracy": 0.652362714211146, "num_tokens": 1470969505.0, "step": 8773 }, { "entropy": 1.7582306861877441, "epoch": 0.963884540386147, "grad_norm": 0.7543734908103943, "learning_rate": 1.1964425902181526e-05, "loss": 1.4083, "mean_token_accuracy": 0.66182312866052, "num_tokens": 1471138254.0, "step": 8774 }, { "entropy": 1.6540814240773518, "epoch": 0.96399439729752, "grad_norm": 0.7629412412643433, "learning_rate": 1.1962833977473035e-05, "loss": 1.4068, "mean_token_accuracy": 0.6505512396494547, "num_tokens": 1471313832.0, "step": 8775 }, { "entropy": 1.7056627968947093, "epoch": 0.9641042542088929, "grad_norm": 0.7697269320487976, "learning_rate": 1.1961242022291281e-05, "loss": 1.3822, "mean_token_accuracy": 0.6581480453411738, "num_tokens": 1471490485.0, "step": 8776 }, { "entropy": 1.7259169320265453, "epoch": 0.9642141111202659, "grad_norm": 0.6654592156410217, "learning_rate": 1.1959650036686652e-05, "loss": 1.3186, "mean_token_accuracy": 0.659534772237142, "num_tokens": 1471647049.0, "step": 8777 }, { "entropy": 1.7011475265026093, "epoch": 0.9643239680316388, "grad_norm": 0.7446539402008057, "learning_rate": 1.195805802070953e-05, "loss": 1.4798, "mean_token_accuracy": 0.6635573208332062, "num_tokens": 1471799236.0, "step": 8778 }, { "entropy": 1.7411305209000905, "epoch": 0.9644338249430118, "grad_norm": 0.6938983201980591, "learning_rate": 1.1956465974410305e-05, "loss": 1.3509, "mean_token_accuracy": 0.6578657031059265, "num_tokens": 1471915469.0, "step": 8779 }, { "entropy": 1.732979655265808, "epoch": 0.9645436818543847, "grad_norm": 0.7012693881988525, "learning_rate": 1.1954873897839363e-05, "loss": 1.2764, "mean_token_accuracy": 0.6760233988364538, "num_tokens": 1472040376.0, "step": 8780 }, { "entropy": 1.707295298576355, "epoch": 0.9646535387657575, "grad_norm": 0.7303659319877625, "learning_rate": 1.1953281791047091e-05, "loss": 1.413, "mean_token_accuracy": 0.67343603571256, "num_tokens": 1472152558.0, "step": 8781 }, { "entropy": 1.6815251310666401, "epoch": 0.9647633956771305, "grad_norm": 0.5562863945960999, "learning_rate": 1.1951689654083883e-05, "loss": 1.3436, "mean_token_accuracy": 0.6479224115610123, "num_tokens": 1472325563.0, "step": 8782 }, { "entropy": 1.6789535880088806, "epoch": 0.9648732525885034, "grad_norm": 0.7839949131011963, "learning_rate": 1.195009748700012e-05, "loss": 1.3481, "mean_token_accuracy": 0.6642525096734365, "num_tokens": 1472504702.0, "step": 8783 }, { "entropy": 1.715220332145691, "epoch": 0.9649831094998764, "grad_norm": 0.7262733578681946, "learning_rate": 1.1948505289846205e-05, "loss": 1.421, "mean_token_accuracy": 0.6491985072692236, "num_tokens": 1472646861.0, "step": 8784 }, { "entropy": 1.7468859950701396, "epoch": 0.9650929664112493, "grad_norm": 0.6974389553070068, "learning_rate": 1.194691306267252e-05, "loss": 1.4079, "mean_token_accuracy": 0.6441583534081777, "num_tokens": 1472806006.0, "step": 8785 }, { "entropy": 1.6872367163499196, "epoch": 0.9652028233226223, "grad_norm": 0.6552119851112366, "learning_rate": 1.194532080552947e-05, "loss": 1.4084, "mean_token_accuracy": 0.6529111266136169, "num_tokens": 1472994372.0, "step": 8786 }, { "entropy": 1.6701744496822357, "epoch": 0.9653126802339952, "grad_norm": 0.7151638269424438, "learning_rate": 1.1943728518467441e-05, "loss": 1.2341, "mean_token_accuracy": 0.6837707708279291, "num_tokens": 1473126393.0, "step": 8787 }, { "entropy": 1.7374973396460216, "epoch": 0.9654225371453682, "grad_norm": 0.619019091129303, "learning_rate": 1.1942136201536827e-05, "loss": 1.4518, "mean_token_accuracy": 0.6381538957357407, "num_tokens": 1473343494.0, "step": 8788 }, { "entropy": 1.6976648370424907, "epoch": 0.9655323940567411, "grad_norm": 0.7506789565086365, "learning_rate": 1.1940543854788026e-05, "loss": 1.2836, "mean_token_accuracy": 0.67548568546772, "num_tokens": 1473474533.0, "step": 8789 }, { "entropy": 1.698626885811488, "epoch": 0.9656422509681141, "grad_norm": 0.6703222990036011, "learning_rate": 1.193895147827144e-05, "loss": 1.3658, "mean_token_accuracy": 0.6542994330326716, "num_tokens": 1473651782.0, "step": 8790 }, { "entropy": 1.7644267777601879, "epoch": 0.965752107879487, "grad_norm": 0.6878910064697266, "learning_rate": 1.1937359072037458e-05, "loss": 1.5286, "mean_token_accuracy": 0.6460073043902715, "num_tokens": 1473864651.0, "step": 8791 }, { "entropy": 1.6762928366661072, "epoch": 0.96586196479086, "grad_norm": 0.7150443196296692, "learning_rate": 1.1935766636136487e-05, "loss": 1.3087, "mean_token_accuracy": 0.6731075594822565, "num_tokens": 1474006770.0, "step": 8792 }, { "entropy": 1.689261128505071, "epoch": 0.9659718217022328, "grad_norm": 0.6254614591598511, "learning_rate": 1.1934174170618921e-05, "loss": 1.4649, "mean_token_accuracy": 0.6447116434574127, "num_tokens": 1474190445.0, "step": 8793 }, { "entropy": 1.6668556829293568, "epoch": 0.9660816786136058, "grad_norm": 0.7564082741737366, "learning_rate": 1.1932581675535167e-05, "loss": 1.3167, "mean_token_accuracy": 0.6619139909744263, "num_tokens": 1474344961.0, "step": 8794 }, { "entropy": 1.7443881531556447, "epoch": 0.9661915355249787, "grad_norm": 0.7837411761283875, "learning_rate": 1.193098915093562e-05, "loss": 1.4101, "mean_token_accuracy": 0.6615714579820633, "num_tokens": 1474529968.0, "step": 8795 }, { "entropy": 1.7280802925427754, "epoch": 0.9663013924363516, "grad_norm": 0.8796549439430237, "learning_rate": 1.1929396596870688e-05, "loss": 1.5307, "mean_token_accuracy": 0.6449608951807022, "num_tokens": 1474654137.0, "step": 8796 }, { "entropy": 1.714556525150935, "epoch": 0.9664112493477246, "grad_norm": 0.6233653426170349, "learning_rate": 1.1927804013390771e-05, "loss": 1.292, "mean_token_accuracy": 0.6737811714410782, "num_tokens": 1474821466.0, "step": 8797 }, { "entropy": 1.7329127391179402, "epoch": 0.9665211062590975, "grad_norm": 0.6161532402038574, "learning_rate": 1.1926211400546276e-05, "loss": 1.341, "mean_token_accuracy": 0.6685069849093755, "num_tokens": 1474978261.0, "step": 8798 }, { "entropy": 1.730038086573283, "epoch": 0.9666309631704705, "grad_norm": 0.7773633003234863, "learning_rate": 1.1924618758387607e-05, "loss": 1.3345, "mean_token_accuracy": 0.6633727848529816, "num_tokens": 1475124005.0, "step": 8799 }, { "entropy": 1.7478882769743602, "epoch": 0.9667408200818434, "grad_norm": 0.6906635165214539, "learning_rate": 1.1923026086965171e-05, "loss": 1.4519, "mean_token_accuracy": 0.651598185300827, "num_tokens": 1475288346.0, "step": 8800 }, { "entropy": 1.749391367038091, "epoch": 0.9668506769932164, "grad_norm": 0.7163913249969482, "learning_rate": 1.1921433386329375e-05, "loss": 1.3796, "mean_token_accuracy": 0.6630617678165436, "num_tokens": 1475422303.0, "step": 8801 }, { "entropy": 1.6840445597966511, "epoch": 0.9669605339045892, "grad_norm": 0.763927161693573, "learning_rate": 1.191984065653063e-05, "loss": 1.4901, "mean_token_accuracy": 0.6456265101830164, "num_tokens": 1475617488.0, "step": 8802 }, { "entropy": 1.7242066264152527, "epoch": 0.9670703908159622, "grad_norm": 0.6778439283370972, "learning_rate": 1.191824789761934e-05, "loss": 1.2706, "mean_token_accuracy": 0.6764429658651352, "num_tokens": 1475727115.0, "step": 8803 }, { "entropy": 1.69703604777654, "epoch": 0.9671802477273351, "grad_norm": 0.6198210120201111, "learning_rate": 1.1916655109645919e-05, "loss": 1.2735, "mean_token_accuracy": 0.6707077473402023, "num_tokens": 1475872434.0, "step": 8804 }, { "entropy": 1.7205248872439067, "epoch": 0.9672901046387081, "grad_norm": 0.8538046479225159, "learning_rate": 1.1915062292660774e-05, "loss": 1.3696, "mean_token_accuracy": 0.6752174297968546, "num_tokens": 1476017139.0, "step": 8805 }, { "entropy": 1.656046062707901, "epoch": 0.967399961550081, "grad_norm": 0.7897881865501404, "learning_rate": 1.1913469446714323e-05, "loss": 1.266, "mean_token_accuracy": 0.670650397737821, "num_tokens": 1476142714.0, "step": 8806 }, { "entropy": 1.7231378157933552, "epoch": 0.967509818461454, "grad_norm": 0.6750791668891907, "learning_rate": 1.1911876571856975e-05, "loss": 1.2979, "mean_token_accuracy": 0.6721773644288381, "num_tokens": 1476278163.0, "step": 8807 }, { "entropy": 1.720835566520691, "epoch": 0.9676196753728269, "grad_norm": 0.780296266078949, "learning_rate": 1.1910283668139147e-05, "loss": 1.4398, "mean_token_accuracy": 0.6561943292617798, "num_tokens": 1476432943.0, "step": 8808 }, { "entropy": 1.6540588239828746, "epoch": 0.9677295322841998, "grad_norm": 0.6764923334121704, "learning_rate": 1.1908690735611246e-05, "loss": 1.2985, "mean_token_accuracy": 0.6647897958755493, "num_tokens": 1476568567.0, "step": 8809 }, { "entropy": 1.6742003659407299, "epoch": 0.9678393891955728, "grad_norm": 0.6901260018348694, "learning_rate": 1.1907097774323693e-05, "loss": 1.4862, "mean_token_accuracy": 0.6387346585591634, "num_tokens": 1476792318.0, "step": 8810 }, { "entropy": 1.673385351896286, "epoch": 0.9679492461069457, "grad_norm": 0.7130966782569885, "learning_rate": 1.1905504784326907e-05, "loss": 1.2702, "mean_token_accuracy": 0.6630944808324178, "num_tokens": 1476905292.0, "step": 8811 }, { "entropy": 1.6886204878489177, "epoch": 0.9680591030183187, "grad_norm": 0.7114747166633606, "learning_rate": 1.19039117656713e-05, "loss": 1.2468, "mean_token_accuracy": 0.6765016714731852, "num_tokens": 1477053287.0, "step": 8812 }, { "entropy": 1.708604981501897, "epoch": 0.9681689599296915, "grad_norm": 0.6665516495704651, "learning_rate": 1.1902318718407295e-05, "loss": 1.3936, "mean_token_accuracy": 0.6632998138666153, "num_tokens": 1477269370.0, "step": 8813 }, { "entropy": 1.7254629333813984, "epoch": 0.9682788168410645, "grad_norm": 0.8017979860305786, "learning_rate": 1.190072564258531e-05, "loss": 1.3117, "mean_token_accuracy": 0.6620668768882751, "num_tokens": 1477459504.0, "step": 8814 }, { "entropy": 1.709552268187205, "epoch": 0.9683886737524374, "grad_norm": 0.8370741009712219, "learning_rate": 1.1899132538255764e-05, "loss": 1.3385, "mean_token_accuracy": 0.6709966957569122, "num_tokens": 1477580950.0, "step": 8815 }, { "entropy": 1.75144029657046, "epoch": 0.9684985306638104, "grad_norm": 0.7077965140342712, "learning_rate": 1.1897539405469079e-05, "loss": 1.5018, "mean_token_accuracy": 0.642456571261088, "num_tokens": 1477766228.0, "step": 8816 }, { "entropy": 1.6804983814557393, "epoch": 0.9686083875751833, "grad_norm": 0.6244013905525208, "learning_rate": 1.189594624427567e-05, "loss": 1.4332, "mean_token_accuracy": 0.6555044750372568, "num_tokens": 1477975630.0, "step": 8817 }, { "entropy": 1.700407882531484, "epoch": 0.9687182444865563, "grad_norm": 0.5928204655647278, "learning_rate": 1.1894353054725976e-05, "loss": 1.3642, "mean_token_accuracy": 0.6752937485774358, "num_tokens": 1478182533.0, "step": 8818 }, { "entropy": 1.7318575084209442, "epoch": 0.9688281013979292, "grad_norm": 0.7394362092018127, "learning_rate": 1.1892759836870402e-05, "loss": 1.4435, "mean_token_accuracy": 0.6566926191250483, "num_tokens": 1478342831.0, "step": 8819 }, { "entropy": 1.7132834196090698, "epoch": 0.9689379583093022, "grad_norm": 0.6716835498809814, "learning_rate": 1.1891166590759386e-05, "loss": 1.4787, "mean_token_accuracy": 0.6364479611317316, "num_tokens": 1478555411.0, "step": 8820 }, { "entropy": 1.66799263159434, "epoch": 0.9690478152206751, "grad_norm": 0.6324994564056396, "learning_rate": 1.1889573316443349e-05, "loss": 1.4213, "mean_token_accuracy": 0.6493504792451859, "num_tokens": 1478775775.0, "step": 8821 }, { "entropy": 1.7549628714720409, "epoch": 0.969157672132048, "grad_norm": 0.6879369616508484, "learning_rate": 1.1887980013972715e-05, "loss": 1.4407, "mean_token_accuracy": 0.6554758697748184, "num_tokens": 1478969172.0, "step": 8822 }, { "entropy": 1.626541276772817, "epoch": 0.969267529043421, "grad_norm": 0.7112913727760315, "learning_rate": 1.1886386683397917e-05, "loss": 1.4272, "mean_token_accuracy": 0.6542681405941645, "num_tokens": 1479172290.0, "step": 8823 }, { "entropy": 1.7104832927385967, "epoch": 0.9693773859547938, "grad_norm": 0.7072854042053223, "learning_rate": 1.1884793324769379e-05, "loss": 1.3244, "mean_token_accuracy": 0.6610027849674225, "num_tokens": 1479371313.0, "step": 8824 }, { "entropy": 1.621906081835429, "epoch": 0.9694872428661668, "grad_norm": 0.6670812368392944, "learning_rate": 1.1883199938137528e-05, "loss": 1.4952, "mean_token_accuracy": 0.6568761318922043, "num_tokens": 1479538947.0, "step": 8825 }, { "entropy": 1.6460838218530018, "epoch": 0.9695970997775397, "grad_norm": 0.6370675563812256, "learning_rate": 1.18816065235528e-05, "loss": 1.2052, "mean_token_accuracy": 0.6849732001622518, "num_tokens": 1479663363.0, "step": 8826 }, { "entropy": 1.7442928353945415, "epoch": 0.9697069566889127, "grad_norm": 0.7854894995689392, "learning_rate": 1.188001308106562e-05, "loss": 1.2911, "mean_token_accuracy": 0.6643334925174713, "num_tokens": 1479797528.0, "step": 8827 }, { "entropy": 1.7473202149073284, "epoch": 0.9698168136002856, "grad_norm": 0.7597824931144714, "learning_rate": 1.1878419610726423e-05, "loss": 1.3298, "mean_token_accuracy": 0.662042036652565, "num_tokens": 1479958379.0, "step": 8828 }, { "entropy": 1.729845941066742, "epoch": 0.9699266705116586, "grad_norm": 0.7279675602912903, "learning_rate": 1.1876826112585645e-05, "loss": 1.5532, "mean_token_accuracy": 0.6339255919059118, "num_tokens": 1480118370.0, "step": 8829 }, { "entropy": 1.6920412480831146, "epoch": 0.9700365274230315, "grad_norm": 0.6832351684570312, "learning_rate": 1.1875232586693712e-05, "loss": 1.4446, "mean_token_accuracy": 0.653128465016683, "num_tokens": 1480289535.0, "step": 8830 }, { "entropy": 1.634282539288203, "epoch": 0.9701463843344045, "grad_norm": 0.7434066534042358, "learning_rate": 1.1873639033101066e-05, "loss": 1.2848, "mean_token_accuracy": 0.6711482803026835, "num_tokens": 1480427662.0, "step": 8831 }, { "entropy": 1.7319086988766987, "epoch": 0.9702562412457774, "grad_norm": 0.6085994839668274, "learning_rate": 1.1872045451858132e-05, "loss": 1.3227, "mean_token_accuracy": 0.6564686596393585, "num_tokens": 1480575924.0, "step": 8832 }, { "entropy": 1.6918261349201202, "epoch": 0.9703660981571504, "grad_norm": 0.6591848731040955, "learning_rate": 1.1870451843015357e-05, "loss": 1.2071, "mean_token_accuracy": 0.6775870273510615, "num_tokens": 1480693963.0, "step": 8833 }, { "entropy": 1.6758748193581898, "epoch": 0.9704759550685232, "grad_norm": 0.7486820816993713, "learning_rate": 1.186885820662317e-05, "loss": 1.3314, "mean_token_accuracy": 0.6618000318606695, "num_tokens": 1480855735.0, "step": 8834 }, { "entropy": 1.7517311076323192, "epoch": 0.9705858119798962, "grad_norm": 0.6901195049285889, "learning_rate": 1.1867264542732013e-05, "loss": 1.4552, "mean_token_accuracy": 0.6589073191086451, "num_tokens": 1480986317.0, "step": 8835 }, { "entropy": 1.6750148733456929, "epoch": 0.9706956688912691, "grad_norm": 0.6650272011756897, "learning_rate": 1.186567085139233e-05, "loss": 1.3836, "mean_token_accuracy": 0.6477328389883041, "num_tokens": 1481124442.0, "step": 8836 }, { "entropy": 1.6886054178078969, "epoch": 0.970805525802642, "grad_norm": 0.7245535850524902, "learning_rate": 1.1864077132654547e-05, "loss": 1.246, "mean_token_accuracy": 0.6800411989291509, "num_tokens": 1481244157.0, "step": 8837 }, { "entropy": 1.755658229192098, "epoch": 0.970915382714015, "grad_norm": 0.6730127334594727, "learning_rate": 1.1862483386569116e-05, "loss": 1.3982, "mean_token_accuracy": 0.64553735156854, "num_tokens": 1481399744.0, "step": 8838 }, { "entropy": 1.726265827814738, "epoch": 0.9710252396253879, "grad_norm": 0.6427361369132996, "learning_rate": 1.1860889613186473e-05, "loss": 1.449, "mean_token_accuracy": 0.6438863674799601, "num_tokens": 1481578751.0, "step": 8839 }, { "entropy": 1.7021092474460602, "epoch": 0.9711350965367609, "grad_norm": 0.6495327949523926, "learning_rate": 1.1859295812557063e-05, "loss": 1.3721, "mean_token_accuracy": 0.6563242822885513, "num_tokens": 1481747669.0, "step": 8840 }, { "entropy": 1.7147242824236553, "epoch": 0.9712449534481338, "grad_norm": 0.6548023819923401, "learning_rate": 1.1857701984731327e-05, "loss": 1.5216, "mean_token_accuracy": 0.6520404716332754, "num_tokens": 1481960498.0, "step": 8841 }, { "entropy": 1.7369143664836884, "epoch": 0.9713548103595068, "grad_norm": 1.022463083267212, "learning_rate": 1.185610812975971e-05, "loss": 0.9632, "mean_token_accuracy": 0.6833280275265375, "num_tokens": 1482092594.0, "step": 8842 }, { "entropy": 1.693136473496755, "epoch": 0.9714646672708797, "grad_norm": 0.7875451445579529, "learning_rate": 1.1854514247692654e-05, "loss": 1.4815, "mean_token_accuracy": 0.6493118107318878, "num_tokens": 1482278952.0, "step": 8843 }, { "entropy": 1.6291471024354298, "epoch": 0.9715745241822527, "grad_norm": 0.5732967257499695, "learning_rate": 1.1852920338580612e-05, "loss": 1.3202, "mean_token_accuracy": 0.6629867007335027, "num_tokens": 1482496222.0, "step": 8844 }, { "entropy": 1.762572060028712, "epoch": 0.9716843810936255, "grad_norm": 0.7325165867805481, "learning_rate": 1.1851326402474021e-05, "loss": 1.4403, "mean_token_accuracy": 0.647186944882075, "num_tokens": 1482638902.0, "step": 8845 }, { "entropy": 1.7113976279894512, "epoch": 0.9717942380049985, "grad_norm": 0.5798704028129578, "learning_rate": 1.1849732439423336e-05, "loss": 1.2823, "mean_token_accuracy": 0.6795357465744019, "num_tokens": 1482850659.0, "step": 8846 }, { "entropy": 1.6824340323607128, "epoch": 0.9719040949163714, "grad_norm": 0.6129104495048523, "learning_rate": 1.1848138449479e-05, "loss": 1.3034, "mean_token_accuracy": 0.6713794569174448, "num_tokens": 1482991261.0, "step": 8847 }, { "entropy": 1.7262922724088032, "epoch": 0.9720139518277444, "grad_norm": 0.6804250478744507, "learning_rate": 1.1846544432691466e-05, "loss": 1.4445, "mean_token_accuracy": 0.6579902023077011, "num_tokens": 1483175630.0, "step": 8848 }, { "entropy": 1.6732697486877441, "epoch": 0.9721238087391173, "grad_norm": 0.8014521598815918, "learning_rate": 1.1844950389111182e-05, "loss": 1.4432, "mean_token_accuracy": 0.6527867317199707, "num_tokens": 1483337929.0, "step": 8849 }, { "entropy": 1.7497017979621887, "epoch": 0.9722336656504902, "grad_norm": 0.7554667592048645, "learning_rate": 1.1843356318788597e-05, "loss": 1.5084, "mean_token_accuracy": 0.6426639705896378, "num_tokens": 1483472141.0, "step": 8850 }, { "entropy": 1.6782042880853016, "epoch": 0.9723435225618632, "grad_norm": 0.6667650938034058, "learning_rate": 1.1841762221774166e-05, "loss": 1.3261, "mean_token_accuracy": 0.6682502627372742, "num_tokens": 1483655083.0, "step": 8851 }, { "entropy": 1.7021657625834148, "epoch": 0.9724533794732361, "grad_norm": 0.7699475288391113, "learning_rate": 1.1840168098118341e-05, "loss": 1.2373, "mean_token_accuracy": 0.6808784703413645, "num_tokens": 1483770294.0, "step": 8852 }, { "entropy": 1.6511710683504741, "epoch": 0.9725632363846091, "grad_norm": 0.6833035349845886, "learning_rate": 1.1838573947871572e-05, "loss": 1.3962, "mean_token_accuracy": 0.6551846663157145, "num_tokens": 1484013632.0, "step": 8853 }, { "entropy": 1.6699829399585724, "epoch": 0.972673093295982, "grad_norm": 0.6805309057235718, "learning_rate": 1.1836979771084319e-05, "loss": 1.4132, "mean_token_accuracy": 0.6588405172030131, "num_tokens": 1484203655.0, "step": 8854 }, { "entropy": 1.7369339366753895, "epoch": 0.972782950207355, "grad_norm": 0.7582236528396606, "learning_rate": 1.183538556780703e-05, "loss": 1.3637, "mean_token_accuracy": 0.6499685148398081, "num_tokens": 1484339577.0, "step": 8855 }, { "entropy": 1.6894344786802928, "epoch": 0.9728928071187278, "grad_norm": 0.6298988461494446, "learning_rate": 1.1833791338090164e-05, "loss": 1.4281, "mean_token_accuracy": 0.6555620779593786, "num_tokens": 1484533014.0, "step": 8856 }, { "entropy": 1.738153209288915, "epoch": 0.9730026640301008, "grad_norm": 0.8281303644180298, "learning_rate": 1.1832197081984178e-05, "loss": 1.4179, "mean_token_accuracy": 0.6543222516775131, "num_tokens": 1484703160.0, "step": 8857 }, { "entropy": 1.7207493782043457, "epoch": 0.9731125209414737, "grad_norm": 0.6813370585441589, "learning_rate": 1.1830602799539532e-05, "loss": 1.3754, "mean_token_accuracy": 0.6627134084701538, "num_tokens": 1484840728.0, "step": 8858 }, { "entropy": 1.6518064538637798, "epoch": 0.9732223778528467, "grad_norm": 0.6134771108627319, "learning_rate": 1.1829008490806682e-05, "loss": 1.3939, "mean_token_accuracy": 0.6519312014182409, "num_tokens": 1485038123.0, "step": 8859 }, { "entropy": 1.7453482647736867, "epoch": 0.9733322347642196, "grad_norm": 0.6196027398109436, "learning_rate": 1.1827414155836083e-05, "loss": 1.3782, "mean_token_accuracy": 0.6594426184892654, "num_tokens": 1485288940.0, "step": 8860 }, { "entropy": 1.6767084399859111, "epoch": 0.9734420916755926, "grad_norm": 0.7031689286231995, "learning_rate": 1.1825819794678201e-05, "loss": 1.3408, "mean_token_accuracy": 0.6554951965808868, "num_tokens": 1485453230.0, "step": 8861 }, { "entropy": 1.7447557151317596, "epoch": 0.9735519485869655, "grad_norm": 0.658467710018158, "learning_rate": 1.1824225407383494e-05, "loss": 1.4039, "mean_token_accuracy": 0.6553630034128824, "num_tokens": 1485639366.0, "step": 8862 }, { "entropy": 1.7522308230400085, "epoch": 0.9736618054983384, "grad_norm": 0.6083486080169678, "learning_rate": 1.1822630994002425e-05, "loss": 1.4194, "mean_token_accuracy": 0.6455821990966797, "num_tokens": 1485812666.0, "step": 8863 }, { "entropy": 1.6696155369281769, "epoch": 0.9737716624097114, "grad_norm": 0.6207438111305237, "learning_rate": 1.1821036554585457e-05, "loss": 1.4398, "mean_token_accuracy": 0.6514710088570913, "num_tokens": 1485967627.0, "step": 8864 }, { "entropy": 1.7358433306217194, "epoch": 0.9738815193210842, "grad_norm": 0.8131921291351318, "learning_rate": 1.1819442089183051e-05, "loss": 1.4363, "mean_token_accuracy": 0.6631392339865366, "num_tokens": 1486133922.0, "step": 8865 }, { "entropy": 1.711951583623886, "epoch": 0.9739913762324572, "grad_norm": 0.7035873532295227, "learning_rate": 1.181784759784567e-05, "loss": 1.403, "mean_token_accuracy": 0.6459818432728449, "num_tokens": 1486288914.0, "step": 8866 }, { "entropy": 1.7438312371571858, "epoch": 0.9741012331438301, "grad_norm": 0.810245156288147, "learning_rate": 1.1816253080623783e-05, "loss": 1.2218, "mean_token_accuracy": 0.6892569859822592, "num_tokens": 1486416868.0, "step": 8867 }, { "entropy": 1.7063380181789398, "epoch": 0.9742110900552031, "grad_norm": 0.7329069375991821, "learning_rate": 1.1814658537567851e-05, "loss": 1.3848, "mean_token_accuracy": 0.6666723837455114, "num_tokens": 1486561189.0, "step": 8868 }, { "entropy": 1.707608977953593, "epoch": 0.974320946966576, "grad_norm": 0.8022003173828125, "learning_rate": 1.1813063968728347e-05, "loss": 1.3259, "mean_token_accuracy": 0.6613700141509374, "num_tokens": 1486705794.0, "step": 8869 }, { "entropy": 1.7382404804229736, "epoch": 0.974430803877949, "grad_norm": 0.8011891841888428, "learning_rate": 1.1811469374155736e-05, "loss": 1.2486, "mean_token_accuracy": 0.6719160179297129, "num_tokens": 1486836957.0, "step": 8870 }, { "entropy": 1.7135269542535145, "epoch": 0.9745406607893219, "grad_norm": 0.7246976494789124, "learning_rate": 1.1809874753900481e-05, "loss": 1.4138, "mean_token_accuracy": 0.6471135467290878, "num_tokens": 1487021960.0, "step": 8871 }, { "entropy": 1.7171033422152202, "epoch": 0.9746505177006949, "grad_norm": 0.7895018458366394, "learning_rate": 1.1808280108013056e-05, "loss": 1.4596, "mean_token_accuracy": 0.6534997771183649, "num_tokens": 1487169618.0, "step": 8872 }, { "entropy": 1.6360890467961628, "epoch": 0.9747603746120678, "grad_norm": 0.638076901435852, "learning_rate": 1.1806685436543929e-05, "loss": 1.3767, "mean_token_accuracy": 0.6550846695899963, "num_tokens": 1487335153.0, "step": 8873 }, { "entropy": 1.7775618036588032, "epoch": 0.9748702315234408, "grad_norm": 0.7299431562423706, "learning_rate": 1.1805090739543574e-05, "loss": 1.4693, "mean_token_accuracy": 0.6526975681384405, "num_tokens": 1487516942.0, "step": 8874 }, { "entropy": 1.717639684677124, "epoch": 0.9749800884348137, "grad_norm": 0.6354700326919556, "learning_rate": 1.1803496017062458e-05, "loss": 1.3929, "mean_token_accuracy": 0.6608896454175314, "num_tokens": 1487720150.0, "step": 8875 }, { "entropy": 1.7499834895133972, "epoch": 0.9750899453461865, "grad_norm": 0.6952952742576599, "learning_rate": 1.1801901269151057e-05, "loss": 1.5107, "mean_token_accuracy": 0.6322333912054697, "num_tokens": 1487928726.0, "step": 8876 }, { "entropy": 1.7447476585706074, "epoch": 0.9751998022575595, "grad_norm": 0.7346185445785522, "learning_rate": 1.180030649585984e-05, "loss": 1.3801, "mean_token_accuracy": 0.6633228411277136, "num_tokens": 1488088706.0, "step": 8877 }, { "entropy": 1.7237921754519145, "epoch": 0.9753096591689324, "grad_norm": 0.6742929816246033, "learning_rate": 1.1798711697239281e-05, "loss": 1.542, "mean_token_accuracy": 0.6433573961257935, "num_tokens": 1488277908.0, "step": 8878 }, { "entropy": 1.7318544387817383, "epoch": 0.9754195160803054, "grad_norm": 0.7747882604598999, "learning_rate": 1.1797116873339862e-05, "loss": 1.449, "mean_token_accuracy": 0.6521434336900711, "num_tokens": 1488421687.0, "step": 8879 }, { "entropy": 1.7487476070721943, "epoch": 0.9755293729916783, "grad_norm": 0.67233806848526, "learning_rate": 1.1795522024212052e-05, "loss": 1.4115, "mean_token_accuracy": 0.6510422080755234, "num_tokens": 1488578364.0, "step": 8880 }, { "entropy": 1.7285469969113667, "epoch": 0.9756392299030513, "grad_norm": 0.714249312877655, "learning_rate": 1.1793927149906329e-05, "loss": 1.5166, "mean_token_accuracy": 0.6440831869840622, "num_tokens": 1488760116.0, "step": 8881 }, { "entropy": 1.7103685835997264, "epoch": 0.9757490868144242, "grad_norm": 0.6406890749931335, "learning_rate": 1.1792332250473167e-05, "loss": 1.3169, "mean_token_accuracy": 0.6635753909746805, "num_tokens": 1488954184.0, "step": 8882 }, { "entropy": 1.7467718720436096, "epoch": 0.9758589437257972, "grad_norm": 0.713141918182373, "learning_rate": 1.1790737325963047e-05, "loss": 1.4421, "mean_token_accuracy": 0.6507671574751536, "num_tokens": 1489118857.0, "step": 8883 }, { "entropy": 1.7769187291463215, "epoch": 0.9759688006371701, "grad_norm": 0.7039201855659485, "learning_rate": 1.1789142376426446e-05, "loss": 1.4417, "mean_token_accuracy": 0.6531537423531214, "num_tokens": 1489241521.0, "step": 8884 }, { "entropy": 1.6950764159361522, "epoch": 0.9760786575485431, "grad_norm": 0.623028039932251, "learning_rate": 1.1787547401913844e-05, "loss": 1.4874, "mean_token_accuracy": 0.6512324412663778, "num_tokens": 1489416640.0, "step": 8885 }, { "entropy": 1.739354799191157, "epoch": 0.976188514459916, "grad_norm": 0.6581858396530151, "learning_rate": 1.1785952402475722e-05, "loss": 1.4426, "mean_token_accuracy": 0.6493807832400004, "num_tokens": 1489591572.0, "step": 8886 }, { "entropy": 1.7338965435822804, "epoch": 0.9762983713712889, "grad_norm": 0.7526776790618896, "learning_rate": 1.1784357378162563e-05, "loss": 1.4659, "mean_token_accuracy": 0.6593673924605051, "num_tokens": 1489730151.0, "step": 8887 }, { "entropy": 1.7149750391642253, "epoch": 0.9764082282826618, "grad_norm": 0.6592543125152588, "learning_rate": 1.1782762329024844e-05, "loss": 1.4636, "mean_token_accuracy": 0.6369660447041193, "num_tokens": 1489951203.0, "step": 8888 }, { "entropy": 1.6767615675926208, "epoch": 0.9765180851940348, "grad_norm": 0.6977933049201965, "learning_rate": 1.178116725511305e-05, "loss": 1.3287, "mean_token_accuracy": 0.6671111832062403, "num_tokens": 1490090010.0, "step": 8889 }, { "entropy": 1.7671857078870137, "epoch": 0.9766279421054077, "grad_norm": 0.6696137189865112, "learning_rate": 1.1779572156477668e-05, "loss": 1.4625, "mean_token_accuracy": 0.6420988490184149, "num_tokens": 1490251825.0, "step": 8890 }, { "entropy": 1.7096356054147084, "epoch": 0.9767377990167806, "grad_norm": 0.7088844776153564, "learning_rate": 1.1777977033169172e-05, "loss": 1.4098, "mean_token_accuracy": 0.6524170140425364, "num_tokens": 1490465675.0, "step": 8891 }, { "entropy": 1.6997392972310383, "epoch": 0.9768476559281536, "grad_norm": 0.656985342502594, "learning_rate": 1.1776381885238061e-05, "loss": 1.2332, "mean_token_accuracy": 0.6762174765268961, "num_tokens": 1490560957.0, "step": 8892 }, { "entropy": 1.7471089363098145, "epoch": 0.9769575128395265, "grad_norm": 0.7269816994667053, "learning_rate": 1.1774786712734809e-05, "loss": 1.4095, "mean_token_accuracy": 0.6508002032836279, "num_tokens": 1490726338.0, "step": 8893 }, { "entropy": 1.7028346260388691, "epoch": 0.9770673697508995, "grad_norm": 0.6202099919319153, "learning_rate": 1.1773191515709906e-05, "loss": 1.4608, "mean_token_accuracy": 0.6557352344195048, "num_tokens": 1490880225.0, "step": 8894 }, { "entropy": 1.7463387648264568, "epoch": 0.9771772266622724, "grad_norm": 0.7726168632507324, "learning_rate": 1.1771596294213843e-05, "loss": 1.3347, "mean_token_accuracy": 0.6617359022299448, "num_tokens": 1490998821.0, "step": 8895 }, { "entropy": 1.7535992066065471, "epoch": 0.9772870835736454, "grad_norm": 0.7872083187103271, "learning_rate": 1.1770001048297102e-05, "loss": 1.4472, "mean_token_accuracy": 0.6451671719551086, "num_tokens": 1491127949.0, "step": 8896 }, { "entropy": 1.6302488346894581, "epoch": 0.9773969404850182, "grad_norm": 0.6466932892799377, "learning_rate": 1.1768405778010175e-05, "loss": 1.5216, "mean_token_accuracy": 0.6445588419834772, "num_tokens": 1491366456.0, "step": 8897 }, { "entropy": 1.703003813823064, "epoch": 0.9775067973963912, "grad_norm": 0.6098012328147888, "learning_rate": 1.1766810483403554e-05, "loss": 1.3419, "mean_token_accuracy": 0.6562597801287969, "num_tokens": 1491508523.0, "step": 8898 }, { "entropy": 1.7005487581094105, "epoch": 0.9776166543077641, "grad_norm": 0.6172084212303162, "learning_rate": 1.1765215164527724e-05, "loss": 1.3601, "mean_token_accuracy": 0.6567008445660273, "num_tokens": 1491676475.0, "step": 8899 }, { "entropy": 1.6621931393941243, "epoch": 0.9777265112191371, "grad_norm": 0.6719179153442383, "learning_rate": 1.176361982143318e-05, "loss": 1.4336, "mean_token_accuracy": 0.6611567785342535, "num_tokens": 1491840435.0, "step": 8900 }, { "entropy": 1.6821343700091045, "epoch": 0.97783636813051, "grad_norm": 0.7708391547203064, "learning_rate": 1.176202445417041e-05, "loss": 1.2368, "mean_token_accuracy": 0.6823245485623678, "num_tokens": 1491954495.0, "step": 8901 }, { "entropy": 1.7113755146662395, "epoch": 0.977946225041883, "grad_norm": 0.7206063866615295, "learning_rate": 1.1760429062789913e-05, "loss": 1.3775, "mean_token_accuracy": 0.6614246865113577, "num_tokens": 1492099534.0, "step": 8902 }, { "entropy": 1.7529734373092651, "epoch": 0.9780560819532559, "grad_norm": 0.699995219707489, "learning_rate": 1.1758833647342176e-05, "loss": 1.4319, "mean_token_accuracy": 0.6588613192240397, "num_tokens": 1492259682.0, "step": 8903 }, { "entropy": 1.7423115372657776, "epoch": 0.9781659388646288, "grad_norm": 0.6755629777908325, "learning_rate": 1.1757238207877702e-05, "loss": 1.4738, "mean_token_accuracy": 0.6427132934331894, "num_tokens": 1492394241.0, "step": 8904 }, { "entropy": 1.6748673518498738, "epoch": 0.9782757957760018, "grad_norm": 0.6021566987037659, "learning_rate": 1.1755642744446976e-05, "loss": 1.4454, "mean_token_accuracy": 0.6756737381219864, "num_tokens": 1492593369.0, "step": 8905 }, { "entropy": 1.688041518131892, "epoch": 0.9783856526873747, "grad_norm": 0.6667623519897461, "learning_rate": 1.1754047257100496e-05, "loss": 1.403, "mean_token_accuracy": 0.6555336664120356, "num_tokens": 1492797945.0, "step": 8906 }, { "entropy": 1.7394044895966847, "epoch": 0.9784955095987476, "grad_norm": 0.6365262269973755, "learning_rate": 1.175245174588876e-05, "loss": 0.9444, "mean_token_accuracy": 0.6888117839892706, "num_tokens": 1492929891.0, "step": 8907 }, { "entropy": 1.7516534825166066, "epoch": 0.9786053665101205, "grad_norm": 0.6223315596580505, "learning_rate": 1.1750856210862267e-05, "loss": 1.3887, "mean_token_accuracy": 0.6591986964146296, "num_tokens": 1493095539.0, "step": 8908 }, { "entropy": 1.7105275094509125, "epoch": 0.9787152234214935, "grad_norm": 0.8252950310707092, "learning_rate": 1.1749260652071513e-05, "loss": 1.2241, "mean_token_accuracy": 0.6796365777651469, "num_tokens": 1493206041.0, "step": 8909 }, { "entropy": 1.7503976225852966, "epoch": 0.9788250803328664, "grad_norm": 0.7552759051322937, "learning_rate": 1.1747665069566998e-05, "loss": 1.433, "mean_token_accuracy": 0.652017816901207, "num_tokens": 1493376417.0, "step": 8910 }, { "entropy": 1.7101737360159557, "epoch": 0.9789349372442394, "grad_norm": 0.636199414730072, "learning_rate": 1.174606946339922e-05, "loss": 1.363, "mean_token_accuracy": 0.6580263326565424, "num_tokens": 1493515476.0, "step": 8911 }, { "entropy": 1.7023467222849529, "epoch": 0.9790447941556123, "grad_norm": 0.6993868947029114, "learning_rate": 1.174447383361868e-05, "loss": 1.4091, "mean_token_accuracy": 0.6553865273793539, "num_tokens": 1493688665.0, "step": 8912 }, { "entropy": 1.689314067363739, "epoch": 0.9791546510669853, "grad_norm": 0.602799117565155, "learning_rate": 1.1742878180275876e-05, "loss": 1.3516, "mean_token_accuracy": 0.6648583362499872, "num_tokens": 1493896130.0, "step": 8913 }, { "entropy": 1.6816753149032593, "epoch": 0.9792645079783582, "grad_norm": 0.7375824451446533, "learning_rate": 1.1741282503421314e-05, "loss": 1.5003, "mean_token_accuracy": 0.6500919560591379, "num_tokens": 1494039815.0, "step": 8914 }, { "entropy": 1.7708214024702709, "epoch": 0.9793743648897312, "grad_norm": 0.643165647983551, "learning_rate": 1.1739686803105497e-05, "loss": 1.3811, "mean_token_accuracy": 0.6472144474585851, "num_tokens": 1494220521.0, "step": 8915 }, { "entropy": 1.7270447909832, "epoch": 0.9794842218011041, "grad_norm": 0.7264362573623657, "learning_rate": 1.1738091079378924e-05, "loss": 1.3741, "mean_token_accuracy": 0.6521051526069641, "num_tokens": 1494408989.0, "step": 8916 }, { "entropy": 1.7155033648014069, "epoch": 0.979594078712477, "grad_norm": 0.7391374111175537, "learning_rate": 1.1736495332292099e-05, "loss": 1.5278, "mean_token_accuracy": 0.6366018503904343, "num_tokens": 1494615976.0, "step": 8917 }, { "entropy": 1.7143846253554027, "epoch": 0.9797039356238499, "grad_norm": 0.702869713306427, "learning_rate": 1.1734899561895532e-05, "loss": 1.2975, "mean_token_accuracy": 0.6688247273365656, "num_tokens": 1494746252.0, "step": 8918 }, { "entropy": 1.7029621005058289, "epoch": 0.9798137925352228, "grad_norm": 0.6561051607131958, "learning_rate": 1.1733303768239721e-05, "loss": 1.3749, "mean_token_accuracy": 0.6583746274312338, "num_tokens": 1494902545.0, "step": 8919 }, { "entropy": 1.7064663370450337, "epoch": 0.9799236494465958, "grad_norm": 0.6154294013977051, "learning_rate": 1.173170795137518e-05, "loss": 1.5231, "mean_token_accuracy": 0.6378661692142487, "num_tokens": 1495137727.0, "step": 8920 }, { "entropy": 1.7173560659090679, "epoch": 0.9800335063579687, "grad_norm": 0.6912945508956909, "learning_rate": 1.1730112111352412e-05, "loss": 1.5634, "mean_token_accuracy": 0.6340258419513702, "num_tokens": 1495314972.0, "step": 8921 }, { "entropy": 1.7897585928440094, "epoch": 0.9801433632693417, "grad_norm": 0.6243012547492981, "learning_rate": 1.1728516248221921e-05, "loss": 1.4356, "mean_token_accuracy": 0.642118309934934, "num_tokens": 1495453177.0, "step": 8922 }, { "entropy": 1.726568082968394, "epoch": 0.9802532201807146, "grad_norm": 0.7456775903701782, "learning_rate": 1.1726920362034222e-05, "loss": 1.2715, "mean_token_accuracy": 0.6740024735530218, "num_tokens": 1495558366.0, "step": 8923 }, { "entropy": 1.661215364933014, "epoch": 0.9803630770920876, "grad_norm": 0.6240798830986023, "learning_rate": 1.172532445283982e-05, "loss": 1.4022, "mean_token_accuracy": 0.6461461385091146, "num_tokens": 1495729811.0, "step": 8924 }, { "entropy": 1.645288070042928, "epoch": 0.9804729340034605, "grad_norm": 0.6724650263786316, "learning_rate": 1.1723728520689226e-05, "loss": 1.4172, "mean_token_accuracy": 0.660889113942782, "num_tokens": 1495907965.0, "step": 8925 }, { "entropy": 1.6819191972414653, "epoch": 0.9805827909148335, "grad_norm": 0.8175992965698242, "learning_rate": 1.172213256563295e-05, "loss": 1.5096, "mean_token_accuracy": 0.6531357516845068, "num_tokens": 1496076569.0, "step": 8926 }, { "entropy": 1.691595862309138, "epoch": 0.9806926478262064, "grad_norm": 0.6539618372917175, "learning_rate": 1.1720536587721506e-05, "loss": 1.4512, "mean_token_accuracy": 0.6612804333368937, "num_tokens": 1496228306.0, "step": 8927 }, { "entropy": 1.6252157092094421, "epoch": 0.9808025047375794, "grad_norm": 0.6574737429618835, "learning_rate": 1.1718940587005403e-05, "loss": 1.3293, "mean_token_accuracy": 0.6706244150797526, "num_tokens": 1496384607.0, "step": 8928 }, { "entropy": 1.6941309372584026, "epoch": 0.9809123616489522, "grad_norm": 0.7366401553153992, "learning_rate": 1.171734456353515e-05, "loss": 1.3279, "mean_token_accuracy": 0.6707786669333776, "num_tokens": 1496503504.0, "step": 8929 }, { "entropy": 1.7239616513252258, "epoch": 0.9810222185603251, "grad_norm": 0.814553439617157, "learning_rate": 1.171574851736127e-05, "loss": 1.3939, "mean_token_accuracy": 0.651360089580218, "num_tokens": 1496619109.0, "step": 8930 }, { "entropy": 1.7386276920636494, "epoch": 0.9811320754716981, "grad_norm": 0.7365756034851074, "learning_rate": 1.171415244853427e-05, "loss": 1.2141, "mean_token_accuracy": 0.677468384305636, "num_tokens": 1496749789.0, "step": 8931 }, { "entropy": 1.6668656865755718, "epoch": 0.981241932383071, "grad_norm": 0.6614105105400085, "learning_rate": 1.1712556357104669e-05, "loss": 1.3721, "mean_token_accuracy": 0.66343554854393, "num_tokens": 1496896836.0, "step": 8932 }, { "entropy": 1.5968853334585826, "epoch": 0.981351789294444, "grad_norm": 0.624069094657898, "learning_rate": 1.1710960243122978e-05, "loss": 1.2155, "mean_token_accuracy": 0.681601325670878, "num_tokens": 1497046633.0, "step": 8933 }, { "entropy": 1.6975778539975483, "epoch": 0.9814616462058169, "grad_norm": 0.6668051481246948, "learning_rate": 1.1709364106639715e-05, "loss": 1.4791, "mean_token_accuracy": 0.6465711345275243, "num_tokens": 1497216701.0, "step": 8934 }, { "entropy": 1.6943465371926625, "epoch": 0.9815715031171899, "grad_norm": 0.8045838475227356, "learning_rate": 1.17077679477054e-05, "loss": 1.2983, "mean_token_accuracy": 0.6777238150437673, "num_tokens": 1497334152.0, "step": 8935 }, { "entropy": 1.6488543053468068, "epoch": 0.9816813600285628, "grad_norm": 0.6241557598114014, "learning_rate": 1.1706171766370546e-05, "loss": 1.3189, "mean_token_accuracy": 0.678161104520162, "num_tokens": 1497515242.0, "step": 8936 }, { "entropy": 1.7082229157288868, "epoch": 0.9817912169399358, "grad_norm": 0.7065755724906921, "learning_rate": 1.1704575562685674e-05, "loss": 1.3357, "mean_token_accuracy": 0.6597681244214376, "num_tokens": 1497716579.0, "step": 8937 }, { "entropy": 1.7012514372666676, "epoch": 0.9819010738513086, "grad_norm": 0.7709197402000427, "learning_rate": 1.1702979336701306e-05, "loss": 1.4173, "mean_token_accuracy": 0.6548969050248464, "num_tokens": 1497869693.0, "step": 8938 }, { "entropy": 1.6415949165821075, "epoch": 0.9820109307626816, "grad_norm": 0.5932096242904663, "learning_rate": 1.1701383088467958e-05, "loss": 1.4232, "mean_token_accuracy": 0.6544928352038065, "num_tokens": 1498106239.0, "step": 8939 }, { "entropy": 1.814208447933197, "epoch": 0.9821207876740545, "grad_norm": 0.6313380002975464, "learning_rate": 1.169978681803615e-05, "loss": 1.418, "mean_token_accuracy": 0.6428090532620748, "num_tokens": 1498265625.0, "step": 8940 }, { "entropy": 1.6680020491282146, "epoch": 0.9822306445854275, "grad_norm": 0.5956284403800964, "learning_rate": 1.1698190525456403e-05, "loss": 1.3458, "mean_token_accuracy": 0.671937977274259, "num_tokens": 1498434182.0, "step": 8941 }, { "entropy": 1.7206951081752777, "epoch": 0.9823405014968004, "grad_norm": 0.715988278388977, "learning_rate": 1.1696594210779242e-05, "loss": 1.2887, "mean_token_accuracy": 0.6786340028047562, "num_tokens": 1498582319.0, "step": 8942 }, { "entropy": 1.7263545493284862, "epoch": 0.9824503584081734, "grad_norm": 0.6619638800621033, "learning_rate": 1.169499787405519e-05, "loss": 1.2756, "mean_token_accuracy": 0.6720621436834335, "num_tokens": 1498690732.0, "step": 8943 }, { "entropy": 1.644078363974889, "epoch": 0.9825602153195463, "grad_norm": 0.6541385650634766, "learning_rate": 1.1693401515334767e-05, "loss": 1.4228, "mean_token_accuracy": 0.6626478185256323, "num_tokens": 1498865749.0, "step": 8944 }, { "entropy": 1.7186622321605682, "epoch": 0.9826700722309192, "grad_norm": 0.7101624608039856, "learning_rate": 1.1691805134668497e-05, "loss": 1.563, "mean_token_accuracy": 0.6366095294555029, "num_tokens": 1499039558.0, "step": 8945 }, { "entropy": 1.679495245218277, "epoch": 0.9827799291422922, "grad_norm": 0.6966460943222046, "learning_rate": 1.169020873210691e-05, "loss": 1.4945, "mean_token_accuracy": 0.6450996845960617, "num_tokens": 1499197333.0, "step": 8946 }, { "entropy": 1.722711722056071, "epoch": 0.9828897860536651, "grad_norm": 0.7616344094276428, "learning_rate": 1.1688612307700522e-05, "loss": 1.419, "mean_token_accuracy": 0.649802620212237, "num_tokens": 1499406096.0, "step": 8947 }, { "entropy": 1.641068955262502, "epoch": 0.9829996429650381, "grad_norm": 0.7064522504806519, "learning_rate": 1.1687015861499866e-05, "loss": 1.3548, "mean_token_accuracy": 0.6700154940287272, "num_tokens": 1499582395.0, "step": 8948 }, { "entropy": 1.709856649239858, "epoch": 0.9831094998764109, "grad_norm": 0.6546580195426941, "learning_rate": 1.1685419393555474e-05, "loss": 1.4311, "mean_token_accuracy": 0.6408520142237345, "num_tokens": 1499777610.0, "step": 8949 }, { "entropy": 1.6791809399922688, "epoch": 0.9832193567877839, "grad_norm": 0.7274378538131714, "learning_rate": 1.168382290391786e-05, "loss": 1.357, "mean_token_accuracy": 0.668040469288826, "num_tokens": 1499945571.0, "step": 8950 }, { "entropy": 1.7598425050576527, "epoch": 0.9833292136991568, "grad_norm": 1.086031198501587, "learning_rate": 1.1682226392637561e-05, "loss": 1.4119, "mean_token_accuracy": 0.6470048973957697, "num_tokens": 1500087592.0, "step": 8951 }, { "entropy": 1.682246168454488, "epoch": 0.9834390706105298, "grad_norm": 0.6415425539016724, "learning_rate": 1.1680629859765107e-05, "loss": 1.4473, "mean_token_accuracy": 0.6519815276066462, "num_tokens": 1500248684.0, "step": 8952 }, { "entropy": 1.646356741587321, "epoch": 0.9835489275219027, "grad_norm": 0.6637715697288513, "learning_rate": 1.167903330535102e-05, "loss": 1.4991, "mean_token_accuracy": 0.6322367091973623, "num_tokens": 1500476242.0, "step": 8953 }, { "entropy": 1.6522825956344604, "epoch": 0.9836587844332757, "grad_norm": 0.6415530443191528, "learning_rate": 1.1677436729445837e-05, "loss": 1.4045, "mean_token_accuracy": 0.6576189547777176, "num_tokens": 1500640691.0, "step": 8954 }, { "entropy": 1.7172890106836955, "epoch": 0.9837686413446486, "grad_norm": 0.6979526877403259, "learning_rate": 1.167584013210009e-05, "loss": 1.4555, "mean_token_accuracy": 0.6452741970618566, "num_tokens": 1500825536.0, "step": 8955 }, { "entropy": 1.726652721563975, "epoch": 0.9838784982560216, "grad_norm": 0.7327434420585632, "learning_rate": 1.1674243513364303e-05, "loss": 1.3525, "mean_token_accuracy": 0.6641982247432073, "num_tokens": 1500971763.0, "step": 8956 }, { "entropy": 1.6420506338278453, "epoch": 0.9839883551673945, "grad_norm": 0.6714190244674683, "learning_rate": 1.1672646873289014e-05, "loss": 1.4045, "mean_token_accuracy": 0.6655755738417307, "num_tokens": 1501155131.0, "step": 8957 }, { "entropy": 1.676787108182907, "epoch": 0.9840982120787674, "grad_norm": 0.6548340916633606, "learning_rate": 1.1671050211924752e-05, "loss": 1.3451, "mean_token_accuracy": 0.6712605257829031, "num_tokens": 1501282420.0, "step": 8958 }, { "entropy": 1.6804224650065105, "epoch": 0.9842080689901404, "grad_norm": 0.698941171169281, "learning_rate": 1.1669453529322056e-05, "loss": 1.2366, "mean_token_accuracy": 0.6790623267491659, "num_tokens": 1501402213.0, "step": 8959 }, { "entropy": 1.6702880064646404, "epoch": 0.9843179259015132, "grad_norm": 0.7242985963821411, "learning_rate": 1.1667856825531458e-05, "loss": 1.4095, "mean_token_accuracy": 0.6546028355757395, "num_tokens": 1501565497.0, "step": 8960 }, { "entropy": 1.736267864704132, "epoch": 0.9844277828128862, "grad_norm": 0.7420823574066162, "learning_rate": 1.1666260100603493e-05, "loss": 1.3009, "mean_token_accuracy": 0.6798698008060455, "num_tokens": 1501742697.0, "step": 8961 }, { "entropy": 1.7749883234500885, "epoch": 0.9845376397242591, "grad_norm": 0.8388869166374207, "learning_rate": 1.1664663354588694e-05, "loss": 1.6187, "mean_token_accuracy": 0.6209886992971102, "num_tokens": 1501930271.0, "step": 8962 }, { "entropy": 1.672831416130066, "epoch": 0.9846474966356321, "grad_norm": 0.6645883321762085, "learning_rate": 1.16630665875376e-05, "loss": 1.4393, "mean_token_accuracy": 0.6532560338576635, "num_tokens": 1502163533.0, "step": 8963 }, { "entropy": 1.7371017535527546, "epoch": 0.984757353547005, "grad_norm": 0.871916651725769, "learning_rate": 1.1661469799500747e-05, "loss": 1.4171, "mean_token_accuracy": 0.6553890208403269, "num_tokens": 1502320064.0, "step": 8964 }, { "entropy": 1.680338740348816, "epoch": 0.984867210458378, "grad_norm": 0.6550365686416626, "learning_rate": 1.1659872990528674e-05, "loss": 1.3864, "mean_token_accuracy": 0.6682237784067789, "num_tokens": 1502525140.0, "step": 8965 }, { "entropy": 1.7634477416674297, "epoch": 0.9849770673697509, "grad_norm": 0.6921877861022949, "learning_rate": 1.1658276160671915e-05, "loss": 1.4945, "mean_token_accuracy": 0.6510110149780909, "num_tokens": 1502673658.0, "step": 8966 }, { "entropy": 1.7780840198198955, "epoch": 0.9850869242811239, "grad_norm": 0.632499635219574, "learning_rate": 1.1656679309981017e-05, "loss": 1.5241, "mean_token_accuracy": 0.6321147382259369, "num_tokens": 1502864453.0, "step": 8967 }, { "entropy": 1.762084702650706, "epoch": 0.9851967811924968, "grad_norm": 0.6069890260696411, "learning_rate": 1.1655082438506511e-05, "loss": 1.4161, "mean_token_accuracy": 0.6416159570217133, "num_tokens": 1503015146.0, "step": 8968 }, { "entropy": 1.6979095737139385, "epoch": 0.9853066381038698, "grad_norm": 0.6491718888282776, "learning_rate": 1.1653485546298941e-05, "loss": 1.381, "mean_token_accuracy": 0.6543610692024231, "num_tokens": 1503180409.0, "step": 8969 }, { "entropy": 1.681455820798874, "epoch": 0.9854164950152426, "grad_norm": 0.8329312801361084, "learning_rate": 1.1651888633408853e-05, "loss": 1.301, "mean_token_accuracy": 0.6834056129058202, "num_tokens": 1503343958.0, "step": 8970 }, { "entropy": 1.7064136465390523, "epoch": 0.9855263519266155, "grad_norm": 0.7739664316177368, "learning_rate": 1.1650291699886778e-05, "loss": 1.4141, "mean_token_accuracy": 0.6504683097203573, "num_tokens": 1503500968.0, "step": 8971 }, { "entropy": 1.6865461766719818, "epoch": 0.9856362088379885, "grad_norm": 0.7742812633514404, "learning_rate": 1.1648694745783265e-05, "loss": 1.3123, "mean_token_accuracy": 0.6681303232908249, "num_tokens": 1503641625.0, "step": 8972 }, { "entropy": 1.708566923936208, "epoch": 0.9857460657493614, "grad_norm": 0.7213424444198608, "learning_rate": 1.1647097771148857e-05, "loss": 1.4823, "mean_token_accuracy": 0.6548313399155935, "num_tokens": 1503804043.0, "step": 8973 }, { "entropy": 1.7305469711621602, "epoch": 0.9858559226607344, "grad_norm": 0.7062889933586121, "learning_rate": 1.1645500776034096e-05, "loss": 1.3272, "mean_token_accuracy": 0.6624001910289129, "num_tokens": 1503908419.0, "step": 8974 }, { "entropy": 1.6859267055988312, "epoch": 0.9859657795721073, "grad_norm": 0.7024558186531067, "learning_rate": 1.1643903760489523e-05, "loss": 1.4326, "mean_token_accuracy": 0.654796913266182, "num_tokens": 1504084648.0, "step": 8975 }, { "entropy": 1.7165729403495789, "epoch": 0.9860756364834803, "grad_norm": 0.7313957214355469, "learning_rate": 1.1642306724565688e-05, "loss": 1.4266, "mean_token_accuracy": 0.6520107636849085, "num_tokens": 1504244529.0, "step": 8976 }, { "entropy": 1.6938693324724834, "epoch": 0.9861854933948532, "grad_norm": 0.7270261645317078, "learning_rate": 1.1640709668313137e-05, "loss": 1.2063, "mean_token_accuracy": 0.6774415969848633, "num_tokens": 1504440015.0, "step": 8977 }, { "entropy": 1.6900843977928162, "epoch": 0.9862953503062262, "grad_norm": 0.797430157661438, "learning_rate": 1.1639112591782413e-05, "loss": 1.3578, "mean_token_accuracy": 0.6580925136804581, "num_tokens": 1504572367.0, "step": 8978 }, { "entropy": 1.6583419442176819, "epoch": 0.9864052072175991, "grad_norm": 0.711826741695404, "learning_rate": 1.1637515495024062e-05, "loss": 1.4362, "mean_token_accuracy": 0.6491169184446335, "num_tokens": 1504721064.0, "step": 8979 }, { "entropy": 1.6519073247909546, "epoch": 0.986515064128972, "grad_norm": 0.6117995381355286, "learning_rate": 1.163591837808863e-05, "loss": 1.2617, "mean_token_accuracy": 0.6822675367196401, "num_tokens": 1504875817.0, "step": 8980 }, { "entropy": 1.742538849512736, "epoch": 0.9866249210403449, "grad_norm": 0.7913803458213806, "learning_rate": 1.1634321241026671e-05, "loss": 1.5257, "mean_token_accuracy": 0.6468542764584223, "num_tokens": 1505024315.0, "step": 8981 }, { "entropy": 1.7452322244644165, "epoch": 0.9867347779517179, "grad_norm": 0.8122254014015198, "learning_rate": 1.163272408388873e-05, "loss": 1.3508, "mean_token_accuracy": 0.6734697222709656, "num_tokens": 1505160858.0, "step": 8982 }, { "entropy": 1.6993557115395863, "epoch": 0.9868446348630908, "grad_norm": 0.6506452560424805, "learning_rate": 1.163112690672536e-05, "loss": 1.4257, "mean_token_accuracy": 0.6508930325508118, "num_tokens": 1505359516.0, "step": 8983 }, { "entropy": 1.7169030010700226, "epoch": 0.9869544917744638, "grad_norm": 0.8766597509384155, "learning_rate": 1.1629529709587103e-05, "loss": 1.3419, "mean_token_accuracy": 0.6719989031553268, "num_tokens": 1505479668.0, "step": 8984 }, { "entropy": 1.7049663464228313, "epoch": 0.9870643486858367, "grad_norm": 0.751346230506897, "learning_rate": 1.1627932492524519e-05, "loss": 1.4392, "mean_token_accuracy": 0.6511711478233337, "num_tokens": 1505669915.0, "step": 8985 }, { "entropy": 1.7046051720778148, "epoch": 0.9871742055972096, "grad_norm": 0.6194396615028381, "learning_rate": 1.1626335255588153e-05, "loss": 1.486, "mean_token_accuracy": 0.6346323589483897, "num_tokens": 1505848159.0, "step": 8986 }, { "entropy": 1.70768607656161, "epoch": 0.9872840625085826, "grad_norm": 0.5602802038192749, "learning_rate": 1.1624737998828556e-05, "loss": 1.414, "mean_token_accuracy": 0.6471713682015737, "num_tokens": 1506088376.0, "step": 8987 }, { "entropy": 1.62164506316185, "epoch": 0.9873939194199555, "grad_norm": 0.7139067649841309, "learning_rate": 1.1623140722296285e-05, "loss": 1.5303, "mean_token_accuracy": 0.6505985458691915, "num_tokens": 1506254740.0, "step": 8988 }, { "entropy": 1.7491505940755208, "epoch": 0.9875037763313285, "grad_norm": 0.7321544885635376, "learning_rate": 1.162154342604189e-05, "loss": 1.3151, "mean_token_accuracy": 0.6853879491488138, "num_tokens": 1506417451.0, "step": 8989 }, { "entropy": 1.6918767988681793, "epoch": 0.9876136332427013, "grad_norm": 0.6921842098236084, "learning_rate": 1.1619946110115928e-05, "loss": 1.3741, "mean_token_accuracy": 0.6635448783636093, "num_tokens": 1506573897.0, "step": 8990 }, { "entropy": 1.6910037795702617, "epoch": 0.9877234901540743, "grad_norm": 0.6403983235359192, "learning_rate": 1.1618348774568946e-05, "loss": 1.4347, "mean_token_accuracy": 0.6572843343019485, "num_tokens": 1506763992.0, "step": 8991 }, { "entropy": 1.724970320860545, "epoch": 0.9878333470654472, "grad_norm": 0.70880126953125, "learning_rate": 1.1616751419451506e-05, "loss": 1.3969, "mean_token_accuracy": 0.6549189041058222, "num_tokens": 1506902471.0, "step": 8992 }, { "entropy": 1.7925910154978435, "epoch": 0.9879432039768202, "grad_norm": 0.6777533292770386, "learning_rate": 1.1615154044814163e-05, "loss": 1.3396, "mean_token_accuracy": 0.6721122364203135, "num_tokens": 1507045033.0, "step": 8993 }, { "entropy": 1.5604525705178578, "epoch": 0.9880530608881931, "grad_norm": 0.6724399328231812, "learning_rate": 1.1613556650707474e-05, "loss": 1.2766, "mean_token_accuracy": 0.6662530352671941, "num_tokens": 1507227734.0, "step": 8994 }, { "entropy": 1.7361294726530712, "epoch": 0.9881629177995661, "grad_norm": 0.7844634652137756, "learning_rate": 1.1611959237181991e-05, "loss": 1.2964, "mean_token_accuracy": 0.6645766844352087, "num_tokens": 1507366369.0, "step": 8995 }, { "entropy": 1.7280752658843994, "epoch": 0.988272774710939, "grad_norm": 0.721837043762207, "learning_rate": 1.1610361804288273e-05, "loss": 1.3637, "mean_token_accuracy": 0.6476641943057379, "num_tokens": 1507549321.0, "step": 8996 }, { "entropy": 1.701300948858261, "epoch": 0.988382631622312, "grad_norm": 0.6382768750190735, "learning_rate": 1.1608764352076878e-05, "loss": 1.4257, "mean_token_accuracy": 0.6443998465935389, "num_tokens": 1507731929.0, "step": 8997 }, { "entropy": 1.703882485628128, "epoch": 0.9884924885336849, "grad_norm": 0.7071985006332397, "learning_rate": 1.1607166880598366e-05, "loss": 1.4985, "mean_token_accuracy": 0.6458214769760767, "num_tokens": 1507918984.0, "step": 8998 }, { "entropy": 1.7094257573286693, "epoch": 0.9886023454450578, "grad_norm": 0.8170492053031921, "learning_rate": 1.1605569389903297e-05, "loss": 1.3406, "mean_token_accuracy": 0.6753592838843664, "num_tokens": 1508052713.0, "step": 8999 }, { "entropy": 1.7019550204277039, "epoch": 0.9887122023564308, "grad_norm": 0.7124339938163757, "learning_rate": 1.1603971880042228e-05, "loss": 1.3914, "mean_token_accuracy": 0.654958705107371, "num_tokens": 1508206066.0, "step": 9000 }, { "entropy": 1.7603452901045482, "epoch": 0.9888220592678036, "grad_norm": 0.8602905869483948, "learning_rate": 1.1602374351065725e-05, "loss": 1.5332, "mean_token_accuracy": 0.6360108802715937, "num_tokens": 1508375603.0, "step": 9001 }, { "entropy": 1.7130256096522014, "epoch": 0.9889319161791766, "grad_norm": 0.7287706136703491, "learning_rate": 1.1600776803024344e-05, "loss": 1.4632, "mean_token_accuracy": 0.6579320232073466, "num_tokens": 1508573102.0, "step": 9002 }, { "entropy": 1.689418117205302, "epoch": 0.9890417730905495, "grad_norm": 0.6394194960594177, "learning_rate": 1.1599179235968646e-05, "loss": 1.4721, "mean_token_accuracy": 0.6430060019095739, "num_tokens": 1508748381.0, "step": 9003 }, { "entropy": 1.635381430387497, "epoch": 0.9891516300019225, "grad_norm": 0.741367757320404, "learning_rate": 1.1597581649949194e-05, "loss": 1.3015, "mean_token_accuracy": 0.6785789032777151, "num_tokens": 1508941115.0, "step": 9004 }, { "entropy": 1.6817569931348164, "epoch": 0.9892614869132954, "grad_norm": 0.6391214728355408, "learning_rate": 1.1595984045016557e-05, "loss": 1.4592, "mean_token_accuracy": 0.6553023606538773, "num_tokens": 1509128859.0, "step": 9005 }, { "entropy": 1.65160737435023, "epoch": 0.9893713438246684, "grad_norm": 0.723601222038269, "learning_rate": 1.1594386421221289e-05, "loss": 1.3859, "mean_token_accuracy": 0.6749422947565714, "num_tokens": 1509267167.0, "step": 9006 }, { "entropy": 1.6834927201271057, "epoch": 0.9894812007360413, "grad_norm": 0.6616725325584412, "learning_rate": 1.1592788778613962e-05, "loss": 1.4887, "mean_token_accuracy": 0.624958798289299, "num_tokens": 1509557371.0, "step": 9007 }, { "entropy": 1.7665310402711232, "epoch": 0.9895910576474143, "grad_norm": 0.7587254047393799, "learning_rate": 1.1591191117245134e-05, "loss": 1.4524, "mean_token_accuracy": 0.6576088120539983, "num_tokens": 1509705073.0, "step": 9008 }, { "entropy": 1.6970743139584858, "epoch": 0.9897009145587872, "grad_norm": 0.6766437888145447, "learning_rate": 1.1589593437165377e-05, "loss": 1.413, "mean_token_accuracy": 0.6557514518499374, "num_tokens": 1509877904.0, "step": 9009 }, { "entropy": 1.6982737878958385, "epoch": 0.9898107714701602, "grad_norm": 0.7253543138504028, "learning_rate": 1.1587995738425249e-05, "loss": 1.3851, "mean_token_accuracy": 0.6553531636794409, "num_tokens": 1510029228.0, "step": 9010 }, { "entropy": 1.6637147863705952, "epoch": 0.989920628381533, "grad_norm": 0.6019961833953857, "learning_rate": 1.1586398021075324e-05, "loss": 1.4421, "mean_token_accuracy": 0.6378505776325861, "num_tokens": 1510279098.0, "step": 9011 }, { "entropy": 1.6774966319402058, "epoch": 0.9900304852929059, "grad_norm": 0.655983567237854, "learning_rate": 1.1584800285166164e-05, "loss": 1.3409, "mean_token_accuracy": 0.650844136873881, "num_tokens": 1510440886.0, "step": 9012 }, { "entropy": 1.6945745448271434, "epoch": 0.9901403422042789, "grad_norm": 0.62674480676651, "learning_rate": 1.1583202530748341e-05, "loss": 1.3902, "mean_token_accuracy": 0.6639150381088257, "num_tokens": 1510656532.0, "step": 9013 }, { "entropy": 1.7257001300652821, "epoch": 0.9902501991156518, "grad_norm": 0.7610213160514832, "learning_rate": 1.1581604757872418e-05, "loss": 1.4418, "mean_token_accuracy": 0.642704447110494, "num_tokens": 1510830217.0, "step": 9014 }, { "entropy": 1.6689273913701375, "epoch": 0.9903600560270248, "grad_norm": 0.7523655891418457, "learning_rate": 1.1580006966588968e-05, "loss": 1.2532, "mean_token_accuracy": 0.6769355684518814, "num_tokens": 1510954384.0, "step": 9015 }, { "entropy": 1.720543771982193, "epoch": 0.9904699129383977, "grad_norm": 0.7777354121208191, "learning_rate": 1.1578409156948558e-05, "loss": 1.5624, "mean_token_accuracy": 0.6435587803522745, "num_tokens": 1511108028.0, "step": 9016 }, { "entropy": 1.6359238624572754, "epoch": 0.9905797698497707, "grad_norm": 0.8014277219772339, "learning_rate": 1.157681132900176e-05, "loss": 1.2437, "mean_token_accuracy": 0.6720960934956869, "num_tokens": 1511244389.0, "step": 9017 }, { "entropy": 1.6672624746958415, "epoch": 0.9906896267611436, "grad_norm": 0.7083525061607361, "learning_rate": 1.1575213482799144e-05, "loss": 1.4483, "mean_token_accuracy": 0.6651191810766856, "num_tokens": 1511417628.0, "step": 9018 }, { "entropy": 1.6869849264621735, "epoch": 0.9907994836725166, "grad_norm": 0.6426869630813599, "learning_rate": 1.1573615618391279e-05, "loss": 1.498, "mean_token_accuracy": 0.6442697743574778, "num_tokens": 1511656313.0, "step": 9019 }, { "entropy": 1.703253189722697, "epoch": 0.9909093405838895, "grad_norm": 0.6750736832618713, "learning_rate": 1.1572017735828738e-05, "loss": 1.3201, "mean_token_accuracy": 0.6629547973473867, "num_tokens": 1511768183.0, "step": 9020 }, { "entropy": 1.7203664779663086, "epoch": 0.9910191974952625, "grad_norm": 0.6736758947372437, "learning_rate": 1.1570419835162093e-05, "loss": 1.38, "mean_token_accuracy": 0.6648065795501074, "num_tokens": 1511913663.0, "step": 9021 }, { "entropy": 1.7273483872413635, "epoch": 0.9911290544066353, "grad_norm": 0.6232172846794128, "learning_rate": 1.1568821916441916e-05, "loss": 1.4793, "mean_token_accuracy": 0.6454748759667078, "num_tokens": 1512131107.0, "step": 9022 }, { "entropy": 1.7320756713549297, "epoch": 0.9912389113180083, "grad_norm": 0.927398145198822, "learning_rate": 1.1567223979718786e-05, "loss": 1.3629, "mean_token_accuracy": 0.6540378282467524, "num_tokens": 1512294920.0, "step": 9023 }, { "entropy": 1.7156847814718883, "epoch": 0.9913487682293812, "grad_norm": 0.6778035759925842, "learning_rate": 1.156562602504327e-05, "loss": 1.6711, "mean_token_accuracy": 0.6444185674190521, "num_tokens": 1512497502.0, "step": 9024 }, { "entropy": 1.7153818408648174, "epoch": 0.9914586251407541, "grad_norm": 0.6155846118927002, "learning_rate": 1.1564028052465945e-05, "loss": 1.3561, "mean_token_accuracy": 0.6623079578081766, "num_tokens": 1512629995.0, "step": 9025 }, { "entropy": 1.6826336582501729, "epoch": 0.9915684820521271, "grad_norm": 0.6194709539413452, "learning_rate": 1.156243006203739e-05, "loss": 1.532, "mean_token_accuracy": 0.6453428119421005, "num_tokens": 1512822020.0, "step": 9026 }, { "entropy": 1.6776429613431294, "epoch": 0.9916783389635, "grad_norm": 0.6859866976737976, "learning_rate": 1.1560832053808172e-05, "loss": 1.2609, "mean_token_accuracy": 0.6728976418574651, "num_tokens": 1512946589.0, "step": 9027 }, { "entropy": 1.6960046589374542, "epoch": 0.991788195874873, "grad_norm": 0.6310602426528931, "learning_rate": 1.1559234027828872e-05, "loss": 1.4226, "mean_token_accuracy": 0.6640596588452657, "num_tokens": 1513133048.0, "step": 9028 }, { "entropy": 1.7602422833442688, "epoch": 0.9918980527862459, "grad_norm": 0.7083531618118286, "learning_rate": 1.155763598415007e-05, "loss": 1.5015, "mean_token_accuracy": 0.6424345870812734, "num_tokens": 1513332237.0, "step": 9029 }, { "entropy": 1.6640680531660716, "epoch": 0.9920079096976189, "grad_norm": 0.5896248817443848, "learning_rate": 1.155603792282234e-05, "loss": 1.4749, "mean_token_accuracy": 0.6422792822122574, "num_tokens": 1513545562.0, "step": 9030 }, { "entropy": 1.732280304034551, "epoch": 0.9921177666089918, "grad_norm": 0.6422677040100098, "learning_rate": 1.1554439843896261e-05, "loss": 1.5216, "mean_token_accuracy": 0.6359787285327911, "num_tokens": 1513726745.0, "step": 9031 }, { "entropy": 1.6377032995224, "epoch": 0.9922276235203648, "grad_norm": 0.6851502060890198, "learning_rate": 1.1552841747422409e-05, "loss": 1.2825, "mean_token_accuracy": 0.6651904483636221, "num_tokens": 1513892498.0, "step": 9032 }, { "entropy": 1.7014533579349518, "epoch": 0.9923374804317376, "grad_norm": 0.7634040117263794, "learning_rate": 1.1551243633451365e-05, "loss": 1.3062, "mean_token_accuracy": 0.6721664518117905, "num_tokens": 1514062306.0, "step": 9033 }, { "entropy": 1.7060857713222504, "epoch": 0.9924473373431106, "grad_norm": 0.6701831221580505, "learning_rate": 1.1549645502033709e-05, "loss": 1.4121, "mean_token_accuracy": 0.6439545353253683, "num_tokens": 1514266388.0, "step": 9034 }, { "entropy": 1.6986914575099945, "epoch": 0.9925571942544835, "grad_norm": 0.678210437297821, "learning_rate": 1.154804735322002e-05, "loss": 1.3727, "mean_token_accuracy": 0.6620455632607142, "num_tokens": 1514432185.0, "step": 9035 }, { "entropy": 1.5856430729230244, "epoch": 0.9926670511658565, "grad_norm": 0.735240638256073, "learning_rate": 1.154644918706088e-05, "loss": 1.3048, "mean_token_accuracy": 0.6784227043390274, "num_tokens": 1514616077.0, "step": 9036 }, { "entropy": 1.7611307700475056, "epoch": 0.9927769080772294, "grad_norm": 0.7041000127792358, "learning_rate": 1.1544851003606867e-05, "loss": 1.2482, "mean_token_accuracy": 0.6712858428557714, "num_tokens": 1514722578.0, "step": 9037 }, { "entropy": 1.7410919765631359, "epoch": 0.9928867649886024, "grad_norm": 0.9079409837722778, "learning_rate": 1.1543252802908569e-05, "loss": 1.4019, "mean_token_accuracy": 0.6431263387203217, "num_tokens": 1514926211.0, "step": 9038 }, { "entropy": 1.7162447571754456, "epoch": 0.9929966218999753, "grad_norm": 0.8317917585372925, "learning_rate": 1.1541654585016564e-05, "loss": 1.5269, "mean_token_accuracy": 0.6505968123674393, "num_tokens": 1515076490.0, "step": 9039 }, { "entropy": 1.7665583193302155, "epoch": 0.9931064788113482, "grad_norm": 0.675299882888794, "learning_rate": 1.154005634998143e-05, "loss": 1.4275, "mean_token_accuracy": 0.6545489778121313, "num_tokens": 1515243623.0, "step": 9040 }, { "entropy": 1.6715355316797893, "epoch": 0.9932163357227212, "grad_norm": 0.6317136883735657, "learning_rate": 1.1538458097853764e-05, "loss": 1.5146, "mean_token_accuracy": 0.6456638177235922, "num_tokens": 1515452950.0, "step": 9041 }, { "entropy": 1.7157903412977855, "epoch": 0.993326192634094, "grad_norm": 0.7072700262069702, "learning_rate": 1.1536859828684134e-05, "loss": 1.318, "mean_token_accuracy": 0.6620603998502096, "num_tokens": 1515563514.0, "step": 9042 }, { "entropy": 1.7313476900259654, "epoch": 0.993436049545467, "grad_norm": 0.836708664894104, "learning_rate": 1.1535261542523137e-05, "loss": 1.2703, "mean_token_accuracy": 0.6730028490225474, "num_tokens": 1515686917.0, "step": 9043 }, { "entropy": 1.7311189671357472, "epoch": 0.9935459064568399, "grad_norm": 0.6318951845169067, "learning_rate": 1.1533663239421354e-05, "loss": 1.4732, "mean_token_accuracy": 0.6417555063962936, "num_tokens": 1515860537.0, "step": 9044 }, { "entropy": 1.7036487360795338, "epoch": 0.9936557633682129, "grad_norm": 0.7849493026733398, "learning_rate": 1.1532064919429369e-05, "loss": 1.4322, "mean_token_accuracy": 0.6610707342624664, "num_tokens": 1516011139.0, "step": 9045 }, { "entropy": 1.7278256515661876, "epoch": 0.9937656202795858, "grad_norm": 0.6660558581352234, "learning_rate": 1.1530466582597766e-05, "loss": 1.3744, "mean_token_accuracy": 0.6509429017702738, "num_tokens": 1516205732.0, "step": 9046 }, { "entropy": 1.723042756319046, "epoch": 0.9938754771909588, "grad_norm": 0.7152264714241028, "learning_rate": 1.152886822897714e-05, "loss": 1.5193, "mean_token_accuracy": 0.6414504299561182, "num_tokens": 1516391979.0, "step": 9047 }, { "entropy": 1.6957969069480896, "epoch": 0.9939853341023317, "grad_norm": 0.7662060856819153, "learning_rate": 1.152726985861807e-05, "loss": 1.3979, "mean_token_accuracy": 0.6510555545488993, "num_tokens": 1516550510.0, "step": 9048 }, { "entropy": 1.6899131039778392, "epoch": 0.9940951910137047, "grad_norm": 0.7555585503578186, "learning_rate": 1.1525671471571148e-05, "loss": 1.2768, "mean_token_accuracy": 0.6714038848876953, "num_tokens": 1516701671.0, "step": 9049 }, { "entropy": 1.7336824933687847, "epoch": 0.9942050479250776, "grad_norm": 0.7775252461433411, "learning_rate": 1.1524073067886958e-05, "loss": 1.3336, "mean_token_accuracy": 0.6652177522579829, "num_tokens": 1516815568.0, "step": 9050 }, { "entropy": 1.672436664501826, "epoch": 0.9943149048364506, "grad_norm": 0.6073688268661499, "learning_rate": 1.1522474647616095e-05, "loss": 1.3894, "mean_token_accuracy": 0.658691331744194, "num_tokens": 1516980970.0, "step": 9051 }, { "entropy": 1.7328505516052246, "epoch": 0.9944247617478235, "grad_norm": 0.6700348854064941, "learning_rate": 1.1520876210809143e-05, "loss": 1.4246, "mean_token_accuracy": 0.6411223659912745, "num_tokens": 1517164909.0, "step": 9052 }, { "entropy": 1.7129102945327759, "epoch": 0.9945346186591963, "grad_norm": 0.7768635749816895, "learning_rate": 1.1519277757516693e-05, "loss": 1.3948, "mean_token_accuracy": 0.6586629996697108, "num_tokens": 1517309049.0, "step": 9053 }, { "entropy": 1.6529461741447449, "epoch": 0.9946444755705693, "grad_norm": 0.593262255191803, "learning_rate": 1.1517679287789335e-05, "loss": 1.4001, "mean_token_accuracy": 0.6509411931037903, "num_tokens": 1517506632.0, "step": 9054 }, { "entropy": 1.6614431242148082, "epoch": 0.9947543324819422, "grad_norm": 0.6814653277397156, "learning_rate": 1.1516080801677662e-05, "loss": 1.4424, "mean_token_accuracy": 0.6618533333142599, "num_tokens": 1517654638.0, "step": 9055 }, { "entropy": 1.6925993263721466, "epoch": 0.9948641893933152, "grad_norm": 0.6455976963043213, "learning_rate": 1.1514482299232266e-05, "loss": 1.3165, "mean_token_accuracy": 0.6803247978289922, "num_tokens": 1517787177.0, "step": 9056 }, { "entropy": 1.693973034620285, "epoch": 0.9949740463046881, "grad_norm": 0.8472748398780823, "learning_rate": 1.1512883780503737e-05, "loss": 1.4849, "mean_token_accuracy": 0.6628153622150421, "num_tokens": 1517914092.0, "step": 9057 }, { "entropy": 1.6954384346803029, "epoch": 0.9950839032160611, "grad_norm": 0.6717925667762756, "learning_rate": 1.1511285245542663e-05, "loss": 1.3713, "mean_token_accuracy": 0.6591513852278391, "num_tokens": 1518076723.0, "step": 9058 }, { "entropy": 1.6373221576213837, "epoch": 0.995193760127434, "grad_norm": 0.7058557271957397, "learning_rate": 1.1509686694399647e-05, "loss": 1.2549, "mean_token_accuracy": 0.6811738759279251, "num_tokens": 1518196658.0, "step": 9059 }, { "entropy": 1.7418764730294545, "epoch": 0.995303617038807, "grad_norm": 0.642804741859436, "learning_rate": 1.1508088127125274e-05, "loss": 1.4527, "mean_token_accuracy": 0.6466216047604879, "num_tokens": 1518384038.0, "step": 9060 }, { "entropy": 1.6919652024904888, "epoch": 0.9954134739501799, "grad_norm": 2.170675039291382, "learning_rate": 1.150648954377014e-05, "loss": 1.4643, "mean_token_accuracy": 0.6461801479260126, "num_tokens": 1518542211.0, "step": 9061 }, { "entropy": 1.7008472084999084, "epoch": 0.9955233308615529, "grad_norm": 0.6365600228309631, "learning_rate": 1.150489094438484e-05, "loss": 1.482, "mean_token_accuracy": 0.6452033768097559, "num_tokens": 1518761319.0, "step": 9062 }, { "entropy": 1.753299355506897, "epoch": 0.9956331877729258, "grad_norm": 0.6528931260108948, "learning_rate": 1.1503292329019972e-05, "loss": 1.4625, "mean_token_accuracy": 0.6458161721626917, "num_tokens": 1518936829.0, "step": 9063 }, { "entropy": 1.6954053243001301, "epoch": 0.9957430446842988, "grad_norm": 0.788202166557312, "learning_rate": 1.1501693697726126e-05, "loss": 1.4025, "mean_token_accuracy": 0.6617314616839091, "num_tokens": 1519101793.0, "step": 9064 }, { "entropy": 1.6784932613372803, "epoch": 0.9958529015956716, "grad_norm": 0.5653716325759888, "learning_rate": 1.1500095050553901e-05, "loss": 1.4645, "mean_token_accuracy": 0.6363365004460017, "num_tokens": 1519305956.0, "step": 9065 }, { "entropy": 1.6899797419706981, "epoch": 0.9959627585070445, "grad_norm": 0.6082383990287781, "learning_rate": 1.1498496387553892e-05, "loss": 1.4347, "mean_token_accuracy": 0.6532058666149775, "num_tokens": 1519531519.0, "step": 9066 }, { "entropy": 1.676472971836726, "epoch": 0.9960726154184175, "grad_norm": 0.6178333759307861, "learning_rate": 1.1496897708776703e-05, "loss": 1.3604, "mean_token_accuracy": 0.6715668042500814, "num_tokens": 1519708888.0, "step": 9067 }, { "entropy": 1.7519252399603527, "epoch": 0.9961824723297904, "grad_norm": 0.8261198401451111, "learning_rate": 1.1495299014272916e-05, "loss": 1.6367, "mean_token_accuracy": 0.6203742722670237, "num_tokens": 1519896103.0, "step": 9068 }, { "entropy": 1.6828113396962483, "epoch": 0.9962923292411634, "grad_norm": 0.6337330937385559, "learning_rate": 1.1493700304093146e-05, "loss": 1.4801, "mean_token_accuracy": 0.6396347731351852, "num_tokens": 1520084614.0, "step": 9069 }, { "entropy": 1.724067787329356, "epoch": 0.9964021861525363, "grad_norm": 0.5894656181335449, "learning_rate": 1.149210157828798e-05, "loss": 1.4314, "mean_token_accuracy": 0.6514023790756861, "num_tokens": 1520279341.0, "step": 9070 }, { "entropy": 1.6811665495236714, "epoch": 0.9965120430639093, "grad_norm": 0.6790025234222412, "learning_rate": 1.1490502836908022e-05, "loss": 1.3181, "mean_token_accuracy": 0.6632300714651743, "num_tokens": 1520407513.0, "step": 9071 }, { "entropy": 1.6569512685139973, "epoch": 0.9966218999752822, "grad_norm": 0.6046280264854431, "learning_rate": 1.1488904080003868e-05, "loss": 1.3662, "mean_token_accuracy": 0.661707783738772, "num_tokens": 1520561581.0, "step": 9072 }, { "entropy": 1.7094461222489674, "epoch": 0.9967317568866552, "grad_norm": 0.6875105500221252, "learning_rate": 1.1487305307626125e-05, "loss": 1.267, "mean_token_accuracy": 0.667315478126208, "num_tokens": 1520702575.0, "step": 9073 }, { "entropy": 1.737904926141103, "epoch": 0.996841613798028, "grad_norm": 0.7653663158416748, "learning_rate": 1.1485706519825384e-05, "loss": 1.4828, "mean_token_accuracy": 0.6492100208997726, "num_tokens": 1520851927.0, "step": 9074 }, { "entropy": 1.6922054886817932, "epoch": 0.996951470709401, "grad_norm": 0.7423431277275085, "learning_rate": 1.1484107716652256e-05, "loss": 1.4411, "mean_token_accuracy": 0.6581322699785233, "num_tokens": 1520996024.0, "step": 9075 }, { "entropy": 1.7260395387808483, "epoch": 0.9970613276207739, "grad_norm": 0.6210670471191406, "learning_rate": 1.148250889815733e-05, "loss": 1.5119, "mean_token_accuracy": 0.6369271477063497, "num_tokens": 1521257653.0, "step": 9076 }, { "entropy": 1.7174350917339325, "epoch": 0.9971711845321469, "grad_norm": 0.69709712266922, "learning_rate": 1.148091006439122e-05, "loss": 1.3194, "mean_token_accuracy": 0.6692462513844172, "num_tokens": 1521373358.0, "step": 9077 }, { "entropy": 1.6322135925292969, "epoch": 0.9972810414435198, "grad_norm": 0.6395667791366577, "learning_rate": 1.1479311215404518e-05, "loss": 1.4847, "mean_token_accuracy": 0.6553497264782587, "num_tokens": 1521538626.0, "step": 9078 }, { "entropy": 1.657319446404775, "epoch": 0.9973908983548928, "grad_norm": 0.6808715462684631, "learning_rate": 1.1477712351247839e-05, "loss": 1.3141, "mean_token_accuracy": 0.6693860242764155, "num_tokens": 1521697083.0, "step": 9079 }, { "entropy": 1.6426800390084584, "epoch": 0.9975007552662657, "grad_norm": 0.5870410203933716, "learning_rate": 1.1476113471971773e-05, "loss": 1.3934, "mean_token_accuracy": 0.6434798091650009, "num_tokens": 1521871698.0, "step": 9080 }, { "entropy": 1.7074416776498158, "epoch": 0.9976106121776386, "grad_norm": 0.6354272961616516, "learning_rate": 1.1474514577626934e-05, "loss": 1.437, "mean_token_accuracy": 0.6434929817914963, "num_tokens": 1522047384.0, "step": 9081 }, { "entropy": 1.749830315510432, "epoch": 0.9977204690890116, "grad_norm": 0.7293029427528381, "learning_rate": 1.147291566826392e-05, "loss": 1.3922, "mean_token_accuracy": 0.6497508933146795, "num_tokens": 1522218336.0, "step": 9082 }, { "entropy": 1.717184990644455, "epoch": 0.9978303260003845, "grad_norm": 0.6962136626243591, "learning_rate": 1.1471316743933339e-05, "loss": 1.341, "mean_token_accuracy": 0.662678599357605, "num_tokens": 1522347386.0, "step": 9083 }, { "entropy": 1.7010120153427124, "epoch": 0.9979401829117575, "grad_norm": 0.6533600091934204, "learning_rate": 1.1469717804685795e-05, "loss": 1.142, "mean_token_accuracy": 0.6681769291559855, "num_tokens": 1522515121.0, "step": 9084 }, { "entropy": 1.7459450960159302, "epoch": 0.9980500398231303, "grad_norm": 0.655548632144928, "learning_rate": 1.1468118850571899e-05, "loss": 1.3486, "mean_token_accuracy": 0.6624786804119746, "num_tokens": 1522668009.0, "step": 9085 }, { "entropy": 1.7290511826674144, "epoch": 0.9981598967345033, "grad_norm": 0.7485929131507874, "learning_rate": 1.1466519881642246e-05, "loss": 1.3893, "mean_token_accuracy": 0.6591125130653381, "num_tokens": 1522843719.0, "step": 9086 }, { "entropy": 1.6899711390336354, "epoch": 0.9982697536458762, "grad_norm": 0.6839129328727722, "learning_rate": 1.146492089794745e-05, "loss": 1.3313, "mean_token_accuracy": 0.673391396800677, "num_tokens": 1522991970.0, "step": 9087 }, { "entropy": 1.7311389843622844, "epoch": 0.9983796105572492, "grad_norm": 0.6621695756912231, "learning_rate": 1.1463321899538117e-05, "loss": 1.4378, "mean_token_accuracy": 0.6520956506331762, "num_tokens": 1523146080.0, "step": 9088 }, { "entropy": 1.7134305437405903, "epoch": 0.9984894674686221, "grad_norm": 0.5507893562316895, "learning_rate": 1.1461722886464856e-05, "loss": 1.5207, "mean_token_accuracy": 0.6198769162098566, "num_tokens": 1523399266.0, "step": 9089 }, { "entropy": 1.7272109687328339, "epoch": 0.9985993243799951, "grad_norm": 0.9652552008628845, "learning_rate": 1.1460123858778276e-05, "loss": 1.5345, "mean_token_accuracy": 0.6346574972073237, "num_tokens": 1523564638.0, "step": 9090 }, { "entropy": 1.7457146843274434, "epoch": 0.998709181291368, "grad_norm": 0.7757859230041504, "learning_rate": 1.1458524816528981e-05, "loss": 1.5039, "mean_token_accuracy": 0.6465511868397394, "num_tokens": 1523748062.0, "step": 9091 }, { "entropy": 1.6893859306971233, "epoch": 0.998819038202741, "grad_norm": 0.697685718536377, "learning_rate": 1.1456925759767582e-05, "loss": 1.4123, "mean_token_accuracy": 0.6497365186611811, "num_tokens": 1523895192.0, "step": 9092 }, { "entropy": 1.6923041641712189, "epoch": 0.9989288951141139, "grad_norm": 0.7961398363113403, "learning_rate": 1.1455326688544688e-05, "loss": 1.3345, "mean_token_accuracy": 0.6674275547266006, "num_tokens": 1524050097.0, "step": 9093 }, { "entropy": 1.6818099617958069, "epoch": 0.9990387520254868, "grad_norm": 0.6849196553230286, "learning_rate": 1.1453727602910909e-05, "loss": 1.3496, "mean_token_accuracy": 0.6572927534580231, "num_tokens": 1524209256.0, "step": 9094 }, { "entropy": 1.6935460070768993, "epoch": 0.9991486089368598, "grad_norm": 0.6596961617469788, "learning_rate": 1.145212850291686e-05, "loss": 1.2704, "mean_token_accuracy": 0.6796439737081528, "num_tokens": 1524398541.0, "step": 9095 }, { "entropy": 1.6655145784219105, "epoch": 0.9992584658482326, "grad_norm": 0.7738831043243408, "learning_rate": 1.1450529388613144e-05, "loss": 1.4704, "mean_token_accuracy": 0.6514392644166946, "num_tokens": 1524586535.0, "step": 9096 }, { "entropy": 1.635209560394287, "epoch": 0.9993683227596056, "grad_norm": 0.6728395223617554, "learning_rate": 1.1448930260050375e-05, "loss": 1.2365, "mean_token_accuracy": 0.6900093406438828, "num_tokens": 1524762897.0, "step": 9097 }, { "entropy": 1.71544353167216, "epoch": 0.9994781796709785, "grad_norm": 0.8286552429199219, "learning_rate": 1.1447331117279168e-05, "loss": 1.3268, "mean_token_accuracy": 0.6621815711259842, "num_tokens": 1524990809.0, "step": 9098 }, { "entropy": 1.6993728578090668, "epoch": 0.9995880365823515, "grad_norm": 0.6080448031425476, "learning_rate": 1.144573196035013e-05, "loss": 1.4415, "mean_token_accuracy": 0.6439933578173319, "num_tokens": 1525244055.0, "step": 9099 }, { "entropy": 1.7140068113803864, "epoch": 0.9996978934937244, "grad_norm": 0.7794548869132996, "learning_rate": 1.144413278931388e-05, "loss": 1.3395, "mean_token_accuracy": 0.6707132905721664, "num_tokens": 1525433832.0, "step": 9100 }, { "entropy": 1.800976832707723, "epoch": 0.9998077504050974, "grad_norm": 0.6397004723548889, "learning_rate": 1.1442533604221025e-05, "loss": 1.4958, "mean_token_accuracy": 0.6379488656918207, "num_tokens": 1525669327.0, "step": 9101 }, { "entropy": 1.7914798657099407, "epoch": 0.9999176073164703, "grad_norm": 0.7445523738861084, "learning_rate": 1.144093440512218e-05, "loss": 1.3949, "mean_token_accuracy": 0.6527659147977829, "num_tokens": 1525797142.0, "step": 9102 }, { "entropy": 1.7685750590430365, "epoch": 1.0, "grad_norm": 0.8221209049224854, "learning_rate": 1.1439335192067961e-05, "loss": 1.3113, "mean_token_accuracy": 0.6720441844728258, "num_tokens": 1525864289.0, "step": 9103 }, { "entropy": 1.7487789193789165, "epoch": 1.0001098569113729, "grad_norm": 0.6469790935516357, "learning_rate": 1.1437735965108982e-05, "loss": 1.3983, "mean_token_accuracy": 0.6585537244876226, "num_tokens": 1526037039.0, "step": 9104 }, { "entropy": 1.7090687155723572, "epoch": 1.0002197138227458, "grad_norm": 0.6354356408119202, "learning_rate": 1.1436136724295855e-05, "loss": 1.3507, "mean_token_accuracy": 0.6568548729022344, "num_tokens": 1526183317.0, "step": 9105 }, { "entropy": 1.778639147679011, "epoch": 1.0003295707341189, "grad_norm": 0.6832847595214844, "learning_rate": 1.1434537469679197e-05, "loss": 1.5105, "mean_token_accuracy": 0.6421279708544413, "num_tokens": 1526347692.0, "step": 9106 }, { "entropy": 1.671216607093811, "epoch": 1.0004394276454918, "grad_norm": 0.6764916181564331, "learning_rate": 1.1432938201309627e-05, "loss": 1.4491, "mean_token_accuracy": 0.6480690489212672, "num_tokens": 1526518664.0, "step": 9107 }, { "entropy": 1.6947729587554932, "epoch": 1.0005492845568646, "grad_norm": 0.7800838947296143, "learning_rate": 1.1431338919237753e-05, "loss": 1.4244, "mean_token_accuracy": 0.6530092557271322, "num_tokens": 1526722405.0, "step": 9108 }, { "entropy": 1.76162455479304, "epoch": 1.0006591414682375, "grad_norm": 0.6866593360900879, "learning_rate": 1.1429739623514202e-05, "loss": 1.3098, "mean_token_accuracy": 0.6589891264835993, "num_tokens": 1526831942.0, "step": 9109 }, { "entropy": 1.706797569990158, "epoch": 1.0007689983796106, "grad_norm": 0.6604471802711487, "learning_rate": 1.1428140314189581e-05, "loss": 1.5449, "mean_token_accuracy": 0.6446791191895803, "num_tokens": 1527004873.0, "step": 9110 }, { "entropy": 1.7111522555351257, "epoch": 1.0008788552909835, "grad_norm": 0.7985087633132935, "learning_rate": 1.1426540991314516e-05, "loss": 1.2653, "mean_token_accuracy": 0.6687728961308798, "num_tokens": 1527118121.0, "step": 9111 }, { "entropy": 1.7522371212641399, "epoch": 1.0009887122023564, "grad_norm": 0.6903984546661377, "learning_rate": 1.1424941654939619e-05, "loss": 1.4082, "mean_token_accuracy": 0.6500783811012903, "num_tokens": 1527289842.0, "step": 9112 }, { "entropy": 1.6945828100045521, "epoch": 1.0010985691137293, "grad_norm": 0.9061050415039062, "learning_rate": 1.1423342305115512e-05, "loss": 1.3049, "mean_token_accuracy": 0.669381340344747, "num_tokens": 1527431687.0, "step": 9113 }, { "entropy": 1.6780508855978649, "epoch": 1.0012084260251024, "grad_norm": 0.631228506565094, "learning_rate": 1.1421742941892808e-05, "loss": 1.3588, "mean_token_accuracy": 0.6521518329779307, "num_tokens": 1527618917.0, "step": 9114 }, { "entropy": 1.707773486773173, "epoch": 1.0013182829364753, "grad_norm": 0.7094998359680176, "learning_rate": 1.1420143565322132e-05, "loss": 1.4832, "mean_token_accuracy": 0.6415019631385803, "num_tokens": 1527828488.0, "step": 9115 }, { "entropy": 1.6614188154538472, "epoch": 1.0014281398478482, "grad_norm": 0.7236858010292053, "learning_rate": 1.1418544175454103e-05, "loss": 1.23, "mean_token_accuracy": 0.6863870620727539, "num_tokens": 1527993879.0, "step": 9116 }, { "entropy": 1.6892668704191844, "epoch": 1.001537996759221, "grad_norm": 0.6846614480018616, "learning_rate": 1.1416944772339335e-05, "loss": 1.3371, "mean_token_accuracy": 0.6594837407271067, "num_tokens": 1528128935.0, "step": 9117 }, { "entropy": 1.7514809270699818, "epoch": 1.001647853670594, "grad_norm": 0.7638584971427917, "learning_rate": 1.1415345356028458e-05, "loss": 1.4151, "mean_token_accuracy": 0.6582140922546387, "num_tokens": 1528297300.0, "step": 9118 }, { "entropy": 1.6739897926648457, "epoch": 1.001757710581967, "grad_norm": 0.6910973787307739, "learning_rate": 1.1413745926572086e-05, "loss": 1.3261, "mean_token_accuracy": 0.6593633989493052, "num_tokens": 1528434737.0, "step": 9119 }, { "entropy": 1.6245358089605968, "epoch": 1.00186756749334, "grad_norm": 0.6937578916549683, "learning_rate": 1.1412146484020841e-05, "loss": 1.3433, "mean_token_accuracy": 0.6818140596151352, "num_tokens": 1528594319.0, "step": 9120 }, { "entropy": 1.780480186144511, "epoch": 1.0019774244047128, "grad_norm": 0.7517448663711548, "learning_rate": 1.1410547028425345e-05, "loss": 1.341, "mean_token_accuracy": 0.6648537566264471, "num_tokens": 1528698660.0, "step": 9121 }, { "entropy": 1.7274539073308308, "epoch": 1.0020872813160857, "grad_norm": 0.8150880336761475, "learning_rate": 1.140894755983622e-05, "loss": 1.4617, "mean_token_accuracy": 0.6476200868686041, "num_tokens": 1528869853.0, "step": 9122 }, { "entropy": 1.6871586740016937, "epoch": 1.0021971382274588, "grad_norm": 0.6788790225982666, "learning_rate": 1.1407348078304094e-05, "loss": 1.4679, "mean_token_accuracy": 0.6567584524552027, "num_tokens": 1529051202.0, "step": 9123 }, { "entropy": 1.6829076210657756, "epoch": 1.0023069951388317, "grad_norm": 0.5760919451713562, "learning_rate": 1.1405748583879578e-05, "loss": 1.4402, "mean_token_accuracy": 0.65225517253081, "num_tokens": 1529253732.0, "step": 9124 }, { "entropy": 1.6780024766921997, "epoch": 1.0024168520502046, "grad_norm": 0.6377182006835938, "learning_rate": 1.1404149076613307e-05, "loss": 1.3531, "mean_token_accuracy": 0.6596865554650625, "num_tokens": 1529399965.0, "step": 9125 }, { "entropy": 1.7208391726016998, "epoch": 1.0025267089615775, "grad_norm": 0.6458075046539307, "learning_rate": 1.1402549556555897e-05, "loss": 1.5011, "mean_token_accuracy": 0.6317235877116522, "num_tokens": 1529603450.0, "step": 9126 }, { "entropy": 1.6888208488623302, "epoch": 1.0026365658729506, "grad_norm": 0.9376909732818604, "learning_rate": 1.1400950023757974e-05, "loss": 1.4523, "mean_token_accuracy": 0.6374652137358984, "num_tokens": 1529778344.0, "step": 9127 }, { "entropy": 1.6820887227853139, "epoch": 1.0027464227843235, "grad_norm": 0.6910247206687927, "learning_rate": 1.1399350478270169e-05, "loss": 1.2399, "mean_token_accuracy": 0.678116371234258, "num_tokens": 1529879742.0, "step": 9128 }, { "entropy": 1.6896148324012756, "epoch": 1.0028562796956964, "grad_norm": 0.8987225890159607, "learning_rate": 1.1397750920143096e-05, "loss": 1.1534, "mean_token_accuracy": 0.6927760044733683, "num_tokens": 1530010877.0, "step": 9129 }, { "entropy": 1.6502757966518402, "epoch": 1.0029661366070692, "grad_norm": 0.7082890272140503, "learning_rate": 1.1396151349427386e-05, "loss": 1.3722, "mean_token_accuracy": 0.6598286827405294, "num_tokens": 1530152017.0, "step": 9130 }, { "entropy": 1.6588487525780995, "epoch": 1.0030759935184421, "grad_norm": 0.6771763563156128, "learning_rate": 1.1394551766173668e-05, "loss": 1.4409, "mean_token_accuracy": 0.649484987060229, "num_tokens": 1530341414.0, "step": 9131 }, { "entropy": 1.6604489584763844, "epoch": 1.0031858504298152, "grad_norm": 0.6896274089813232, "learning_rate": 1.1392952170432561e-05, "loss": 1.4544, "mean_token_accuracy": 0.6518164028724035, "num_tokens": 1530547366.0, "step": 9132 }, { "entropy": 1.7379337052504222, "epoch": 1.0032957073411881, "grad_norm": 0.7070825695991516, "learning_rate": 1.1391352562254696e-05, "loss": 1.4681, "mean_token_accuracy": 0.6329749723275503, "num_tokens": 1530710763.0, "step": 9133 }, { "entropy": 1.7117513318856556, "epoch": 1.003405564252561, "grad_norm": 0.752705991268158, "learning_rate": 1.1389752941690698e-05, "loss": 1.3212, "mean_token_accuracy": 0.6719024926424026, "num_tokens": 1530851989.0, "step": 9134 }, { "entropy": 1.6691398521264393, "epoch": 1.0035154211639339, "grad_norm": 0.6442150473594666, "learning_rate": 1.1388153308791196e-05, "loss": 1.2853, "mean_token_accuracy": 0.6734344561894735, "num_tokens": 1531026173.0, "step": 9135 }, { "entropy": 1.7129848897457123, "epoch": 1.003625278075307, "grad_norm": 0.643925130367279, "learning_rate": 1.1386553663606816e-05, "loss": 1.3813, "mean_token_accuracy": 0.6649036655823389, "num_tokens": 1531167599.0, "step": 9136 }, { "entropy": 1.7421828111012776, "epoch": 1.0037351349866799, "grad_norm": 0.7078534960746765, "learning_rate": 1.1384954006188188e-05, "loss": 1.4009, "mean_token_accuracy": 0.6498052229483923, "num_tokens": 1531284335.0, "step": 9137 }, { "entropy": 1.6620370745658875, "epoch": 1.0038449918980528, "grad_norm": 0.6959486603736877, "learning_rate": 1.1383354336585939e-05, "loss": 1.2479, "mean_token_accuracy": 0.6867090910673141, "num_tokens": 1531411606.0, "step": 9138 }, { "entropy": 1.7095261812210083, "epoch": 1.0039548488094256, "grad_norm": 0.6359619498252869, "learning_rate": 1.1381754654850696e-05, "loss": 1.4088, "mean_token_accuracy": 0.6495067228873571, "num_tokens": 1531650481.0, "step": 9139 }, { "entropy": 1.7148667971293132, "epoch": 1.0040647057207988, "grad_norm": 0.626611053943634, "learning_rate": 1.1380154961033091e-05, "loss": 1.4655, "mean_token_accuracy": 0.6441022356351217, "num_tokens": 1531860240.0, "step": 9140 }, { "entropy": 1.6672312021255493, "epoch": 1.0041745626321716, "grad_norm": 0.6994165182113647, "learning_rate": 1.1378555255183756e-05, "loss": 1.3451, "mean_token_accuracy": 0.6728704025348028, "num_tokens": 1532028027.0, "step": 9141 }, { "entropy": 1.6781473656495411, "epoch": 1.0042844195435445, "grad_norm": 0.6643490791320801, "learning_rate": 1.1376955537353314e-05, "loss": 1.3587, "mean_token_accuracy": 0.6705238421758016, "num_tokens": 1532185194.0, "step": 9142 }, { "entropy": 1.695909669001897, "epoch": 1.0043942764549174, "grad_norm": 0.7049947381019592, "learning_rate": 1.1375355807592401e-05, "loss": 1.2133, "mean_token_accuracy": 0.6815102944771448, "num_tokens": 1532292206.0, "step": 9143 }, { "entropy": 1.6645172834396362, "epoch": 1.0045041333662903, "grad_norm": 0.8290209174156189, "learning_rate": 1.1373756065951645e-05, "loss": 1.29, "mean_token_accuracy": 0.6677450140317281, "num_tokens": 1532417707.0, "step": 9144 }, { "entropy": 1.7276211281617482, "epoch": 1.0046139902776634, "grad_norm": 0.6277461051940918, "learning_rate": 1.1372156312481676e-05, "loss": 1.4016, "mean_token_accuracy": 0.6529937634865443, "num_tokens": 1532596038.0, "step": 9145 }, { "entropy": 1.6773775219917297, "epoch": 1.0047238471890363, "grad_norm": 0.5622502565383911, "learning_rate": 1.1370556547233129e-05, "loss": 1.3817, "mean_token_accuracy": 0.6580819934606552, "num_tokens": 1532837671.0, "step": 9146 }, { "entropy": 1.6747903128465016, "epoch": 1.0048337041004092, "grad_norm": 0.7736158967018127, "learning_rate": 1.1368956770256636e-05, "loss": 1.43, "mean_token_accuracy": 0.6633151968320211, "num_tokens": 1532987212.0, "step": 9147 }, { "entropy": 1.6907167434692383, "epoch": 1.004943561011782, "grad_norm": 0.6770092248916626, "learning_rate": 1.1367356981602824e-05, "loss": 1.315, "mean_token_accuracy": 0.6748213569323221, "num_tokens": 1533166077.0, "step": 9148 }, { "entropy": 1.630173772573471, "epoch": 1.0050534179231552, "grad_norm": 0.7909526824951172, "learning_rate": 1.1365757181322332e-05, "loss": 1.2256, "mean_token_accuracy": 0.676344245672226, "num_tokens": 1533285561.0, "step": 9149 }, { "entropy": 1.6531668106714885, "epoch": 1.005163274834528, "grad_norm": 0.7197229266166687, "learning_rate": 1.1364157369465791e-05, "loss": 1.3322, "mean_token_accuracy": 0.6645158727963766, "num_tokens": 1533446931.0, "step": 9150 }, { "entropy": 1.7534123659133911, "epoch": 1.005273131745901, "grad_norm": 0.7781052589416504, "learning_rate": 1.136255754608383e-05, "loss": 1.3887, "mean_token_accuracy": 0.6418828169504801, "num_tokens": 1533605019.0, "step": 9151 }, { "entropy": 1.711018721262614, "epoch": 1.0053829886572738, "grad_norm": 0.6242640018463135, "learning_rate": 1.1360957711227087e-05, "loss": 1.3331, "mean_token_accuracy": 0.658738394578298, "num_tokens": 1533756344.0, "step": 9152 }, { "entropy": 1.7103002866109211, "epoch": 1.005492845568647, "grad_norm": 0.7828008532524109, "learning_rate": 1.1359357864946197e-05, "loss": 1.4497, "mean_token_accuracy": 0.6512656211853027, "num_tokens": 1533940363.0, "step": 9153 }, { "entropy": 1.7072757482528687, "epoch": 1.0056027024800198, "grad_norm": 0.6864430904388428, "learning_rate": 1.135775800729179e-05, "loss": 1.4115, "mean_token_accuracy": 0.6535514990488688, "num_tokens": 1534129155.0, "step": 9154 }, { "entropy": 1.645534763733546, "epoch": 1.0057125593913927, "grad_norm": 0.6666370630264282, "learning_rate": 1.1356158138314504e-05, "loss": 1.3353, "mean_token_accuracy": 0.6572008977333704, "num_tokens": 1534352556.0, "step": 9155 }, { "entropy": 1.7274697025616963, "epoch": 1.0058224163027656, "grad_norm": 0.762096107006073, "learning_rate": 1.1354558258064974e-05, "loss": 1.3457, "mean_token_accuracy": 0.6579453647136688, "num_tokens": 1534462733.0, "step": 9156 }, { "entropy": 1.7067709763844807, "epoch": 1.0059322732141387, "grad_norm": 0.7625720500946045, "learning_rate": 1.1352958366593838e-05, "loss": 1.1324, "mean_token_accuracy": 0.7027974327405294, "num_tokens": 1534566983.0, "step": 9157 }, { "entropy": 1.6603530049324036, "epoch": 1.0060421301255116, "grad_norm": 0.6020463705062866, "learning_rate": 1.1351358463951722e-05, "loss": 1.5009, "mean_token_accuracy": 0.6399320860703787, "num_tokens": 1534784810.0, "step": 9158 }, { "entropy": 1.7036788860956829, "epoch": 1.0061519870368845, "grad_norm": 0.6299261450767517, "learning_rate": 1.1349758550189276e-05, "loss": 1.4234, "mean_token_accuracy": 0.6528842945893606, "num_tokens": 1534972111.0, "step": 9159 }, { "entropy": 1.69377605120341, "epoch": 1.0062618439482574, "grad_norm": 0.7959760427474976, "learning_rate": 1.1348158625357125e-05, "loss": 1.3657, "mean_token_accuracy": 0.6739930411179861, "num_tokens": 1535118835.0, "step": 9160 }, { "entropy": 1.7244078516960144, "epoch": 1.0063717008596302, "grad_norm": 0.5939955115318298, "learning_rate": 1.1346558689505911e-05, "loss": 1.5987, "mean_token_accuracy": 0.6203551987806956, "num_tokens": 1535344945.0, "step": 9161 }, { "entropy": 1.6984511812527974, "epoch": 1.0064815577710033, "grad_norm": 0.6517001390457153, "learning_rate": 1.134495874268627e-05, "loss": 1.3917, "mean_token_accuracy": 0.6546116669972738, "num_tokens": 1535547924.0, "step": 9162 }, { "entropy": 1.765064944823583, "epoch": 1.0065914146823762, "grad_norm": 0.6505841612815857, "learning_rate": 1.1343358784948841e-05, "loss": 1.4628, "mean_token_accuracy": 0.6358625143766403, "num_tokens": 1535727355.0, "step": 9163 }, { "entropy": 1.652586172024409, "epoch": 1.0067012715937491, "grad_norm": 0.6075358986854553, "learning_rate": 1.1341758816344261e-05, "loss": 1.3771, "mean_token_accuracy": 0.657783105969429, "num_tokens": 1535890845.0, "step": 9164 }, { "entropy": 1.7130601306756337, "epoch": 1.006811128505122, "grad_norm": 0.6406556963920593, "learning_rate": 1.1340158836923169e-05, "loss": 1.3664, "mean_token_accuracy": 0.6631237914164861, "num_tokens": 1536024832.0, "step": 9165 }, { "entropy": 1.6731863021850586, "epoch": 1.006920985416495, "grad_norm": 0.6789574027061462, "learning_rate": 1.1338558846736203e-05, "loss": 1.5358, "mean_token_accuracy": 0.644293467203776, "num_tokens": 1536181406.0, "step": 9166 }, { "entropy": 1.7349721789360046, "epoch": 1.007030842327868, "grad_norm": 0.754366934299469, "learning_rate": 1.1336958845834001e-05, "loss": 1.3995, "mean_token_accuracy": 0.6601565976937612, "num_tokens": 1536312544.0, "step": 9167 }, { "entropy": 1.7296982606252034, "epoch": 1.0071406992392409, "grad_norm": 0.7296738624572754, "learning_rate": 1.1335358834267202e-05, "loss": 1.4222, "mean_token_accuracy": 0.6510292192300161, "num_tokens": 1536477655.0, "step": 9168 }, { "entropy": 1.6839477519194286, "epoch": 1.0072505561506138, "grad_norm": 0.7424401044845581, "learning_rate": 1.1333758812086455e-05, "loss": 1.4959, "mean_token_accuracy": 0.6491817037264506, "num_tokens": 1536682787.0, "step": 9169 }, { "entropy": 1.7023440599441528, "epoch": 1.0073604130619869, "grad_norm": 0.6158515810966492, "learning_rate": 1.1332158779342382e-05, "loss": 1.4231, "mean_token_accuracy": 0.649179662267367, "num_tokens": 1536874196.0, "step": 9170 }, { "entropy": 1.6747990051905315, "epoch": 1.0074702699733598, "grad_norm": 0.7934166789054871, "learning_rate": 1.1330558736085639e-05, "loss": 1.4705, "mean_token_accuracy": 0.6424223830302557, "num_tokens": 1537043636.0, "step": 9171 }, { "entropy": 1.7200193206469219, "epoch": 1.0075801268847326, "grad_norm": 0.625928521156311, "learning_rate": 1.132895868236686e-05, "loss": 1.3149, "mean_token_accuracy": 0.6662678668896357, "num_tokens": 1537192653.0, "step": 9172 }, { "entropy": 1.6934810976187389, "epoch": 1.0076899837961055, "grad_norm": 0.7504294514656067, "learning_rate": 1.1327358618236686e-05, "loss": 1.4699, "mean_token_accuracy": 0.6619169364372889, "num_tokens": 1537364617.0, "step": 9173 }, { "entropy": 1.6977708041667938, "epoch": 1.0077998407074784, "grad_norm": 0.7154932618141174, "learning_rate": 1.132575854374576e-05, "loss": 1.3676, "mean_token_accuracy": 0.6595998754103979, "num_tokens": 1537500964.0, "step": 9174 }, { "entropy": 1.661940226952235, "epoch": 1.0079096976188515, "grad_norm": 0.6199997067451477, "learning_rate": 1.1324158458944724e-05, "loss": 1.3844, "mean_token_accuracy": 0.6616794715325037, "num_tokens": 1537688873.0, "step": 9175 }, { "entropy": 1.7638680438200633, "epoch": 1.0080195545302244, "grad_norm": 0.6562539339065552, "learning_rate": 1.1322558363884215e-05, "loss": 1.5134, "mean_token_accuracy": 0.6430147786935171, "num_tokens": 1537938462.0, "step": 9176 }, { "entropy": 1.706565539042155, "epoch": 1.0081294114415973, "grad_norm": 0.7883732914924622, "learning_rate": 1.1320958258614882e-05, "loss": 1.4292, "mean_token_accuracy": 0.6586494793494543, "num_tokens": 1538090142.0, "step": 9177 }, { "entropy": 1.7051230370998383, "epoch": 1.0082392683529702, "grad_norm": 0.635830283164978, "learning_rate": 1.1319358143187364e-05, "loss": 1.4372, "mean_token_accuracy": 0.6473241200049719, "num_tokens": 1538303825.0, "step": 9178 }, { "entropy": 1.6625941793123882, "epoch": 1.0083491252643433, "grad_norm": 0.6459980607032776, "learning_rate": 1.1317758017652304e-05, "loss": 1.3858, "mean_token_accuracy": 0.6522909849882126, "num_tokens": 1538515668.0, "step": 9179 }, { "entropy": 1.69016495347023, "epoch": 1.0084589821757162, "grad_norm": 0.8300599455833435, "learning_rate": 1.1316157882060347e-05, "loss": 1.4209, "mean_token_accuracy": 0.6421416252851486, "num_tokens": 1538721803.0, "step": 9180 }, { "entropy": 1.7026409308115642, "epoch": 1.008568839087089, "grad_norm": 0.7141305804252625, "learning_rate": 1.131455773646214e-05, "loss": 1.3456, "mean_token_accuracy": 0.6597028175989786, "num_tokens": 1538867063.0, "step": 9181 }, { "entropy": 1.6708631614844005, "epoch": 1.008678695998462, "grad_norm": 0.7779679298400879, "learning_rate": 1.1312957580908316e-05, "loss": 1.399, "mean_token_accuracy": 0.6662464737892151, "num_tokens": 1539025340.0, "step": 9182 }, { "entropy": 1.7861079672972362, "epoch": 1.008788552909835, "grad_norm": 0.7467420697212219, "learning_rate": 1.1311357415449527e-05, "loss": 1.409, "mean_token_accuracy": 0.6420427312453588, "num_tokens": 1539177337.0, "step": 9183 }, { "entropy": 1.6625968714555104, "epoch": 1.008898409821208, "grad_norm": 0.7059647440910339, "learning_rate": 1.1309757240136416e-05, "loss": 1.3722, "mean_token_accuracy": 0.6587095757325491, "num_tokens": 1539350824.0, "step": 9184 }, { "entropy": 1.6715769072373707, "epoch": 1.0090082667325808, "grad_norm": 0.8573365211486816, "learning_rate": 1.130815705501963e-05, "loss": 1.2702, "mean_token_accuracy": 0.6697567055622736, "num_tokens": 1539515545.0, "step": 9185 }, { "entropy": 1.6940159698327382, "epoch": 1.0091181236439537, "grad_norm": 0.7451865673065186, "learning_rate": 1.1306556860149807e-05, "loss": 1.2517, "mean_token_accuracy": 0.6819993555545807, "num_tokens": 1539654828.0, "step": 9186 }, { "entropy": 1.6895244518915813, "epoch": 1.0092279805553266, "grad_norm": 0.662607729434967, "learning_rate": 1.1304956655577603e-05, "loss": 1.498, "mean_token_accuracy": 0.6299208501974741, "num_tokens": 1539887429.0, "step": 9187 }, { "entropy": 1.7312080164750416, "epoch": 1.0093378374666997, "grad_norm": 0.7649029493331909, "learning_rate": 1.1303356441353657e-05, "loss": 1.2637, "mean_token_accuracy": 0.6721003005901972, "num_tokens": 1540007181.0, "step": 9188 }, { "entropy": 1.6537673771381378, "epoch": 1.0094476943780726, "grad_norm": 0.6279980540275574, "learning_rate": 1.1301756217528617e-05, "loss": 1.4753, "mean_token_accuracy": 0.6382663249969482, "num_tokens": 1540226678.0, "step": 9189 }, { "entropy": 1.717966636021932, "epoch": 1.0095575512894455, "grad_norm": 0.6371181607246399, "learning_rate": 1.1300155984153125e-05, "loss": 1.5536, "mean_token_accuracy": 0.6400111019611359, "num_tokens": 1540398417.0, "step": 9190 }, { "entropy": 1.7388447523117065, "epoch": 1.0096674082008184, "grad_norm": 0.6854660511016846, "learning_rate": 1.1298555741277837e-05, "loss": 1.4315, "mean_token_accuracy": 0.6481965134541193, "num_tokens": 1540574257.0, "step": 9191 }, { "entropy": 1.7373858094215393, "epoch": 1.0097772651121915, "grad_norm": 0.8039863705635071, "learning_rate": 1.1296955488953385e-05, "loss": 1.5048, "mean_token_accuracy": 0.6509123841921488, "num_tokens": 1540737286.0, "step": 9192 }, { "entropy": 1.733175406853358, "epoch": 1.0098871220235643, "grad_norm": 0.7755190134048462, "learning_rate": 1.1295355227230434e-05, "loss": 1.4932, "mean_token_accuracy": 0.6399683107932409, "num_tokens": 1540938739.0, "step": 9193 }, { "entropy": 1.7742779056231182, "epoch": 1.0099969789349372, "grad_norm": 0.8532186150550842, "learning_rate": 1.1293754956159622e-05, "loss": 1.2785, "mean_token_accuracy": 0.681659941871961, "num_tokens": 1541076692.0, "step": 9194 }, { "entropy": 1.6056585907936096, "epoch": 1.0101068358463101, "grad_norm": 0.6729063987731934, "learning_rate": 1.1292154675791596e-05, "loss": 1.3879, "mean_token_accuracy": 0.6605608214934667, "num_tokens": 1541267393.0, "step": 9195 }, { "entropy": 1.6868411600589752, "epoch": 1.0102166927576832, "grad_norm": 0.6951854825019836, "learning_rate": 1.1290554386177006e-05, "loss": 1.3287, "mean_token_accuracy": 0.6730459630489349, "num_tokens": 1541408773.0, "step": 9196 }, { "entropy": 1.70897176861763, "epoch": 1.010326549669056, "grad_norm": 0.7483338117599487, "learning_rate": 1.1288954087366504e-05, "loss": 1.5295, "mean_token_accuracy": 0.6577043558160464, "num_tokens": 1541578695.0, "step": 9197 }, { "entropy": 1.6923380196094513, "epoch": 1.010436406580429, "grad_norm": 0.6193323731422424, "learning_rate": 1.128735377941073e-05, "loss": 1.4399, "mean_token_accuracy": 0.651967058579127, "num_tokens": 1541828514.0, "step": 9198 }, { "entropy": 1.6712921659151714, "epoch": 1.0105462634918019, "grad_norm": 0.6577204465866089, "learning_rate": 1.1285753462360343e-05, "loss": 1.3324, "mean_token_accuracy": 0.6647132039070129, "num_tokens": 1541971144.0, "step": 9199 }, { "entropy": 1.6951302190621693, "epoch": 1.0106561204031748, "grad_norm": 0.67694091796875, "learning_rate": 1.1284153136265986e-05, "loss": 1.4847, "mean_token_accuracy": 0.656242623925209, "num_tokens": 1542162549.0, "step": 9200 }, { "entropy": 1.658402919769287, "epoch": 1.0107659773145479, "grad_norm": 0.6639556884765625, "learning_rate": 1.1282552801178308e-05, "loss": 1.2398, "mean_token_accuracy": 0.6856526831785837, "num_tokens": 1542331765.0, "step": 9201 }, { "entropy": 1.7573369940121968, "epoch": 1.0108758342259208, "grad_norm": 0.7384838461875916, "learning_rate": 1.1280952457147964e-05, "loss": 1.4386, "mean_token_accuracy": 0.6591017047564188, "num_tokens": 1542461418.0, "step": 9202 }, { "entropy": 1.7195665736993153, "epoch": 1.0109856911372936, "grad_norm": 0.7522571086883545, "learning_rate": 1.1279352104225603e-05, "loss": 1.2697, "mean_token_accuracy": 0.6741450677315394, "num_tokens": 1542598399.0, "step": 9203 }, { "entropy": 1.7449373702208202, "epoch": 1.0110955480486665, "grad_norm": 0.7479003071784973, "learning_rate": 1.127775174246187e-05, "loss": 1.2921, "mean_token_accuracy": 0.6684492280085882, "num_tokens": 1542703548.0, "step": 9204 }, { "entropy": 1.7740589280923207, "epoch": 1.0112054049600396, "grad_norm": 0.7149597406387329, "learning_rate": 1.1276151371907422e-05, "loss": 1.4712, "mean_token_accuracy": 0.647121841708819, "num_tokens": 1542869904.0, "step": 9205 }, { "entropy": 1.759735494852066, "epoch": 1.0113152618714125, "grad_norm": 0.616786003112793, "learning_rate": 1.1274550992612905e-05, "loss": 1.5223, "mean_token_accuracy": 0.6346758852402369, "num_tokens": 1543072195.0, "step": 9206 }, { "entropy": 1.6773889164129894, "epoch": 1.0114251187827854, "grad_norm": 0.7276062965393066, "learning_rate": 1.1272950604628974e-05, "loss": 1.465, "mean_token_accuracy": 0.6493238161007563, "num_tokens": 1543242043.0, "step": 9207 }, { "entropy": 1.7213714122772217, "epoch": 1.0115349756941583, "grad_norm": 0.5921209454536438, "learning_rate": 1.1271350208006277e-05, "loss": 1.4313, "mean_token_accuracy": 0.6454088240861893, "num_tokens": 1543424668.0, "step": 9208 }, { "entropy": 1.7043544550736744, "epoch": 1.0116448326055314, "grad_norm": 0.703683078289032, "learning_rate": 1.1269749802795475e-05, "loss": 1.5242, "mean_token_accuracy": 0.639569049080213, "num_tokens": 1543590504.0, "step": 9209 }, { "entropy": 1.7013638118902843, "epoch": 1.0117546895169043, "grad_norm": 0.651660144329071, "learning_rate": 1.1268149389047207e-05, "loss": 1.3612, "mean_token_accuracy": 0.6684914082288742, "num_tokens": 1543742205.0, "step": 9210 }, { "entropy": 1.7225966254870098, "epoch": 1.0118645464282772, "grad_norm": 0.680589497089386, "learning_rate": 1.1266548966812136e-05, "loss": 1.4609, "mean_token_accuracy": 0.6508485525846481, "num_tokens": 1543960759.0, "step": 9211 }, { "entropy": 1.690873513619105, "epoch": 1.01197440333965, "grad_norm": 0.6258131265640259, "learning_rate": 1.1264948536140908e-05, "loss": 1.2756, "mean_token_accuracy": 0.6746162871519724, "num_tokens": 1544079815.0, "step": 9212 }, { "entropy": 1.7661484678586323, "epoch": 1.012084260251023, "grad_norm": 0.7420499324798584, "learning_rate": 1.126334809708418e-05, "loss": 1.4235, "mean_token_accuracy": 0.6455812205870947, "num_tokens": 1544236306.0, "step": 9213 }, { "entropy": 1.6909300088882446, "epoch": 1.012194117162396, "grad_norm": 0.7056745886802673, "learning_rate": 1.1261747649692598e-05, "loss": 1.4225, "mean_token_accuracy": 0.6687211891015371, "num_tokens": 1544360026.0, "step": 9214 }, { "entropy": 1.7200450201829274, "epoch": 1.012303974073769, "grad_norm": 0.6737309694290161, "learning_rate": 1.1260147194016826e-05, "loss": 1.4383, "mean_token_accuracy": 0.6554515163103739, "num_tokens": 1544518333.0, "step": 9215 }, { "entropy": 1.6989065408706665, "epoch": 1.0124138309851418, "grad_norm": 0.7748240232467651, "learning_rate": 1.1258546730107511e-05, "loss": 1.3029, "mean_token_accuracy": 0.6642332822084427, "num_tokens": 1544633183.0, "step": 9216 }, { "entropy": 1.684309144814809, "epoch": 1.0125236878965147, "grad_norm": 0.7276560068130493, "learning_rate": 1.1256946258015309e-05, "loss": 1.3735, "mean_token_accuracy": 0.6629331211249033, "num_tokens": 1544766272.0, "step": 9217 }, { "entropy": 1.7121345500151317, "epoch": 1.0126335448078878, "grad_norm": 0.6448559761047363, "learning_rate": 1.1255345777790874e-05, "loss": 1.351, "mean_token_accuracy": 0.6598079651594162, "num_tokens": 1544978083.0, "step": 9218 }, { "entropy": 1.6897433201471965, "epoch": 1.0127434017192607, "grad_norm": 0.6677665114402771, "learning_rate": 1.1253745289484858e-05, "loss": 1.2992, "mean_token_accuracy": 0.6672473748524984, "num_tokens": 1545113400.0, "step": 9219 }, { "entropy": 1.730336219072342, "epoch": 1.0128532586306336, "grad_norm": 0.7854005694389343, "learning_rate": 1.1252144793147919e-05, "loss": 1.3935, "mean_token_accuracy": 0.666775236527125, "num_tokens": 1545256599.0, "step": 9220 }, { "entropy": 1.6706956028938293, "epoch": 1.0129631155420065, "grad_norm": 0.6474412083625793, "learning_rate": 1.1250544288830712e-05, "loss": 1.3071, "mean_token_accuracy": 0.6717335432767868, "num_tokens": 1545436360.0, "step": 9221 }, { "entropy": 1.670490821202596, "epoch": 1.0130729724533796, "grad_norm": 0.666471004486084, "learning_rate": 1.1248943776583892e-05, "loss": 1.3748, "mean_token_accuracy": 0.6653418590625128, "num_tokens": 1545607551.0, "step": 9222 }, { "entropy": 1.6969726085662842, "epoch": 1.0131828293647525, "grad_norm": 0.7258543968200684, "learning_rate": 1.124734325645811e-05, "loss": 1.3163, "mean_token_accuracy": 0.6724565674861273, "num_tokens": 1545796908.0, "step": 9223 }, { "entropy": 1.7342469195524852, "epoch": 1.0132926862761253, "grad_norm": 0.7631778717041016, "learning_rate": 1.1245742728504028e-05, "loss": 1.4204, "mean_token_accuracy": 0.6610995680093765, "num_tokens": 1545909631.0, "step": 9224 }, { "entropy": 1.7213138242562611, "epoch": 1.0134025431874982, "grad_norm": 0.6091188192367554, "learning_rate": 1.1244142192772301e-05, "loss": 1.4003, "mean_token_accuracy": 0.6554784874121348, "num_tokens": 1546167237.0, "step": 9225 }, { "entropy": 1.6888580818970997, "epoch": 1.0135124000988711, "grad_norm": 0.684411346912384, "learning_rate": 1.1242541649313577e-05, "loss": 1.4356, "mean_token_accuracy": 0.6582817882299423, "num_tokens": 1546363932.0, "step": 9226 }, { "entropy": 1.7218577762444813, "epoch": 1.0136222570102442, "grad_norm": 0.5998579263687134, "learning_rate": 1.1240941098178527e-05, "loss": 1.4542, "mean_token_accuracy": 0.6470504850149155, "num_tokens": 1546546664.0, "step": 9227 }, { "entropy": 1.6980265875657399, "epoch": 1.013732113921617, "grad_norm": 0.9041829109191895, "learning_rate": 1.1239340539417796e-05, "loss": 1.4107, "mean_token_accuracy": 0.651981790860494, "num_tokens": 1546699355.0, "step": 9228 }, { "entropy": 1.653926134109497, "epoch": 1.01384197083299, "grad_norm": 1.0387535095214844, "learning_rate": 1.1237739973082045e-05, "loss": 1.085, "mean_token_accuracy": 0.6834785888592402, "num_tokens": 1546888748.0, "step": 9229 }, { "entropy": 1.7255582809448242, "epoch": 1.0139518277443629, "grad_norm": 0.6586278676986694, "learning_rate": 1.123613939922193e-05, "loss": 1.3067, "mean_token_accuracy": 0.66642597814401, "num_tokens": 1547058192.0, "step": 9230 }, { "entropy": 1.6987995107968648, "epoch": 1.014061684655736, "grad_norm": 0.6259772777557373, "learning_rate": 1.1234538817888112e-05, "loss": 1.3602, "mean_token_accuracy": 0.6535337815682093, "num_tokens": 1547227451.0, "step": 9231 }, { "entropy": 1.7403975526491802, "epoch": 1.0141715415671089, "grad_norm": 0.6632811427116394, "learning_rate": 1.1232938229131243e-05, "loss": 1.3447, "mean_token_accuracy": 0.6578138470649719, "num_tokens": 1547376096.0, "step": 9232 }, { "entropy": 1.7203827500343323, "epoch": 1.0142813984784818, "grad_norm": 0.6725448369979858, "learning_rate": 1.1231337633001987e-05, "loss": 1.4274, "mean_token_accuracy": 0.6475430677334467, "num_tokens": 1547535222.0, "step": 9233 }, { "entropy": 1.7150332828362782, "epoch": 1.0143912553898546, "grad_norm": 0.6448984742164612, "learning_rate": 1.1229737029550997e-05, "loss": 1.2242, "mean_token_accuracy": 0.6787202705939611, "num_tokens": 1547643675.0, "step": 9234 }, { "entropy": 1.7125314672787983, "epoch": 1.0145011123012277, "grad_norm": 0.6407131552696228, "learning_rate": 1.1228136418828934e-05, "loss": 1.4369, "mean_token_accuracy": 0.6412733842929205, "num_tokens": 1547864654.0, "step": 9235 }, { "entropy": 1.6636697153250377, "epoch": 1.0146109692126006, "grad_norm": 0.7547443509101868, "learning_rate": 1.1226535800886456e-05, "loss": 1.2837, "mean_token_accuracy": 0.6684808333714803, "num_tokens": 1547998232.0, "step": 9236 }, { "entropy": 1.6625105440616608, "epoch": 1.0147208261239735, "grad_norm": 0.6787840723991394, "learning_rate": 1.1224935175774225e-05, "loss": 1.2093, "mean_token_accuracy": 0.6760126401980718, "num_tokens": 1548179995.0, "step": 9237 }, { "entropy": 1.7261524299780528, "epoch": 1.0148306830353464, "grad_norm": 0.7945687174797058, "learning_rate": 1.1223334543542892e-05, "loss": 1.3306, "mean_token_accuracy": 0.6550563474496206, "num_tokens": 1548324181.0, "step": 9238 }, { "entropy": 1.626392384370168, "epoch": 1.0149405399467193, "grad_norm": 0.5870576500892639, "learning_rate": 1.1221733904243126e-05, "loss": 1.314, "mean_token_accuracy": 0.6767490158478419, "num_tokens": 1548479091.0, "step": 9239 }, { "entropy": 1.62749649087588, "epoch": 1.0150503968580924, "grad_norm": 0.6715527772903442, "learning_rate": 1.1220133257925581e-05, "loss": 1.3297, "mean_token_accuracy": 0.6707568516333898, "num_tokens": 1548664070.0, "step": 9240 }, { "entropy": 1.7191180487473805, "epoch": 1.0151602537694653, "grad_norm": 0.6779627203941345, "learning_rate": 1.1218532604640912e-05, "loss": 1.4021, "mean_token_accuracy": 0.6458870420853297, "num_tokens": 1548850266.0, "step": 9241 }, { "entropy": 1.7357692917188008, "epoch": 1.0152701106808382, "grad_norm": 0.7277780175209045, "learning_rate": 1.121693194443979e-05, "loss": 1.2538, "mean_token_accuracy": 0.6712505420049032, "num_tokens": 1548990486.0, "step": 9242 }, { "entropy": 1.7432827452818553, "epoch": 1.015379967592211, "grad_norm": 0.6973701119422913, "learning_rate": 1.1215331277372869e-05, "loss": 1.3871, "mean_token_accuracy": 0.6440560271342596, "num_tokens": 1549135814.0, "step": 9243 }, { "entropy": 1.707447479168574, "epoch": 1.0154898245035842, "grad_norm": 0.7135317325592041, "learning_rate": 1.1213730603490808e-05, "loss": 1.5025, "mean_token_accuracy": 0.6496869872013727, "num_tokens": 1549283138.0, "step": 9244 }, { "entropy": 1.7765212555726368, "epoch": 1.015599681414957, "grad_norm": 0.6586817502975464, "learning_rate": 1.1212129922844275e-05, "loss": 1.3391, "mean_token_accuracy": 0.6648561110099157, "num_tokens": 1549420931.0, "step": 9245 }, { "entropy": 1.7636926869551341, "epoch": 1.01570953832633, "grad_norm": 0.6714787483215332, "learning_rate": 1.1210529235483921e-05, "loss": 1.4032, "mean_token_accuracy": 0.6477454006671906, "num_tokens": 1549548264.0, "step": 9246 }, { "entropy": 1.7096338669459026, "epoch": 1.0158193952377028, "grad_norm": 0.7031419277191162, "learning_rate": 1.1208928541460413e-05, "loss": 1.4511, "mean_token_accuracy": 0.6563627272844315, "num_tokens": 1549736242.0, "step": 9247 }, { "entropy": 1.6731836001078289, "epoch": 1.015929252149076, "grad_norm": 0.8989787697792053, "learning_rate": 1.1207327840824408e-05, "loss": 1.4856, "mean_token_accuracy": 0.6486278722683588, "num_tokens": 1549903701.0, "step": 9248 }, { "entropy": 1.6696616013844807, "epoch": 1.0160391090604488, "grad_norm": 0.6424921751022339, "learning_rate": 1.1205727133626577e-05, "loss": 1.3334, "mean_token_accuracy": 0.6658105552196503, "num_tokens": 1550064704.0, "step": 9249 }, { "entropy": 1.7057221233844757, "epoch": 1.0161489659718217, "grad_norm": 0.6833515763282776, "learning_rate": 1.1204126419917567e-05, "loss": 1.3819, "mean_token_accuracy": 0.6675165841976801, "num_tokens": 1550210298.0, "step": 9250 }, { "entropy": 1.6603924830754597, "epoch": 1.0162588228831946, "grad_norm": 0.7398085594177246, "learning_rate": 1.1202525699748053e-05, "loss": 1.4715, "mean_token_accuracy": 0.6583651875456175, "num_tokens": 1550374210.0, "step": 9251 }, { "entropy": 1.7534627715746562, "epoch": 1.0163686797945677, "grad_norm": 0.7214245796203613, "learning_rate": 1.120092497316869e-05, "loss": 1.5149, "mean_token_accuracy": 0.6404918928941091, "num_tokens": 1550522260.0, "step": 9252 }, { "entropy": 1.7200071314970653, "epoch": 1.0164785367059406, "grad_norm": 0.6468613743782043, "learning_rate": 1.1199324240230143e-05, "loss": 1.4526, "mean_token_accuracy": 0.6536131302515665, "num_tokens": 1550680157.0, "step": 9253 }, { "entropy": 1.7434161007404327, "epoch": 1.0165883936173135, "grad_norm": 0.6495728492736816, "learning_rate": 1.1197723500983069e-05, "loss": 1.3692, "mean_token_accuracy": 0.6529008895158768, "num_tokens": 1550834840.0, "step": 9254 }, { "entropy": 1.729017287492752, "epoch": 1.0166982505286863, "grad_norm": 0.6990881562232971, "learning_rate": 1.119612275547814e-05, "loss": 1.5721, "mean_token_accuracy": 0.6412505855162939, "num_tokens": 1551054269.0, "step": 9255 }, { "entropy": 1.7043547133604686, "epoch": 1.0168081074400592, "grad_norm": 0.6473329663276672, "learning_rate": 1.1194522003766013e-05, "loss": 1.315, "mean_token_accuracy": 0.6622092028458914, "num_tokens": 1551189361.0, "step": 9256 }, { "entropy": 1.7061095635096233, "epoch": 1.0169179643514323, "grad_norm": 0.7081418633460999, "learning_rate": 1.1192921245897353e-05, "loss": 1.4447, "mean_token_accuracy": 0.6577693919340769, "num_tokens": 1551368041.0, "step": 9257 }, { "entropy": 1.7616630991299946, "epoch": 1.0170278212628052, "grad_norm": 0.7121334075927734, "learning_rate": 1.1191320481922823e-05, "loss": 1.264, "mean_token_accuracy": 0.6756730278333029, "num_tokens": 1551481052.0, "step": 9258 }, { "entropy": 1.6953161557515461, "epoch": 1.017137678174178, "grad_norm": 0.6922043561935425, "learning_rate": 1.1189719711893088e-05, "loss": 1.3064, "mean_token_accuracy": 0.664389913280805, "num_tokens": 1551614812.0, "step": 9259 }, { "entropy": 1.6914753516515095, "epoch": 1.017247535085551, "grad_norm": 0.7497768402099609, "learning_rate": 1.1188118935858802e-05, "loss": 1.2366, "mean_token_accuracy": 0.6768055210510889, "num_tokens": 1551732083.0, "step": 9260 }, { "entropy": 1.6624971628189087, "epoch": 1.017357391996924, "grad_norm": 0.6488621830940247, "learning_rate": 1.1186518153870643e-05, "loss": 1.3451, "mean_token_accuracy": 0.661164661248525, "num_tokens": 1551882678.0, "step": 9261 }, { "entropy": 1.712691217660904, "epoch": 1.017467248908297, "grad_norm": 0.6433484554290771, "learning_rate": 1.1184917365979267e-05, "loss": 1.3778, "mean_token_accuracy": 0.6611688236395518, "num_tokens": 1552010626.0, "step": 9262 }, { "entropy": 1.6859545807043712, "epoch": 1.0175771058196699, "grad_norm": 0.6317065954208374, "learning_rate": 1.118331657223534e-05, "loss": 1.307, "mean_token_accuracy": 0.6647968838612238, "num_tokens": 1552155014.0, "step": 9263 }, { "entropy": 1.7237164676189423, "epoch": 1.0176869627310428, "grad_norm": 0.5941221117973328, "learning_rate": 1.1181715772689524e-05, "loss": 1.3943, "mean_token_accuracy": 0.6520277112722397, "num_tokens": 1552338624.0, "step": 9264 }, { "entropy": 1.7700924972693126, "epoch": 1.0177968196424159, "grad_norm": 0.8220915794372559, "learning_rate": 1.1180114967392488e-05, "loss": 1.5014, "mean_token_accuracy": 0.6256469786167145, "num_tokens": 1552565673.0, "step": 9265 }, { "entropy": 1.6931136548519135, "epoch": 1.0179066765537887, "grad_norm": 0.7540929913520813, "learning_rate": 1.1178514156394893e-05, "loss": 1.3396, "mean_token_accuracy": 0.6597989400227865, "num_tokens": 1552709040.0, "step": 9266 }, { "entropy": 1.7067534426848094, "epoch": 1.0180165334651616, "grad_norm": 0.720111608505249, "learning_rate": 1.1176913339747406e-05, "loss": 1.3746, "mean_token_accuracy": 0.6579870879650116, "num_tokens": 1552884550.0, "step": 9267 }, { "entropy": 1.7605009973049164, "epoch": 1.0181263903765345, "grad_norm": 0.77629554271698, "learning_rate": 1.1175312517500692e-05, "loss": 1.3439, "mean_token_accuracy": 0.6593941003084183, "num_tokens": 1553031590.0, "step": 9268 }, { "entropy": 1.7027594049771626, "epoch": 1.0182362472879074, "grad_norm": 0.5735223889350891, "learning_rate": 1.1173711689705413e-05, "loss": 1.5773, "mean_token_accuracy": 0.6314045637845993, "num_tokens": 1553236524.0, "step": 9269 }, { "entropy": 1.6763208210468292, "epoch": 1.0183461041992805, "grad_norm": 0.7643057107925415, "learning_rate": 1.117211085641224e-05, "loss": 1.2915, "mean_token_accuracy": 0.6760772267977396, "num_tokens": 1553392639.0, "step": 9270 }, { "entropy": 1.6933831572532654, "epoch": 1.0184559611106534, "grad_norm": 0.6687283515930176, "learning_rate": 1.1170510017671836e-05, "loss": 1.5079, "mean_token_accuracy": 0.636181429028511, "num_tokens": 1553611294.0, "step": 9271 }, { "entropy": 1.7319226165612538, "epoch": 1.0185658180220263, "grad_norm": 0.5630615949630737, "learning_rate": 1.1168909173534866e-05, "loss": 1.637, "mean_token_accuracy": 0.6280501782894135, "num_tokens": 1553812798.0, "step": 9272 }, { "entropy": 1.6394573052724202, "epoch": 1.0186756749333992, "grad_norm": 0.8335058093070984, "learning_rate": 1.1167308324051998e-05, "loss": 1.5129, "mean_token_accuracy": 0.6522385478019714, "num_tokens": 1553984737.0, "step": 9273 }, { "entropy": 1.6925493081410725, "epoch": 1.0187855318447723, "grad_norm": 0.6182948350906372, "learning_rate": 1.1165707469273894e-05, "loss": 1.3235, "mean_token_accuracy": 0.6617102771997452, "num_tokens": 1554122672.0, "step": 9274 }, { "entropy": 1.7493834793567657, "epoch": 1.0188953887561452, "grad_norm": 0.6503170132637024, "learning_rate": 1.116410660925123e-05, "loss": 1.4509, "mean_token_accuracy": 0.6459475109974543, "num_tokens": 1554293612.0, "step": 9275 }, { "entropy": 1.7210332651933034, "epoch": 1.019005245667518, "grad_norm": 0.6399514675140381, "learning_rate": 1.1162505744034658e-05, "loss": 1.3569, "mean_token_accuracy": 0.6521624475717545, "num_tokens": 1554536874.0, "step": 9276 }, { "entropy": 1.6515512764453888, "epoch": 1.019115102578891, "grad_norm": 0.6285982728004456, "learning_rate": 1.1160904873674855e-05, "loss": 1.275, "mean_token_accuracy": 0.6685600280761719, "num_tokens": 1554689700.0, "step": 9277 }, { "entropy": 1.7064630885918934, "epoch": 1.019224959490264, "grad_norm": 0.8239073753356934, "learning_rate": 1.1159303998222484e-05, "loss": 1.3523, "mean_token_accuracy": 0.6559799164533615, "num_tokens": 1554826581.0, "step": 9278 }, { "entropy": 1.7321425378322601, "epoch": 1.019334816401637, "grad_norm": 0.8235211968421936, "learning_rate": 1.1157703117728216e-05, "loss": 1.3853, "mean_token_accuracy": 0.6528972536325455, "num_tokens": 1554968925.0, "step": 9279 }, { "entropy": 1.6756745378176372, "epoch": 1.0194446733130098, "grad_norm": 0.808443009853363, "learning_rate": 1.1156102232242714e-05, "loss": 1.3856, "mean_token_accuracy": 0.6638946781555811, "num_tokens": 1555130010.0, "step": 9280 }, { "entropy": 1.7489099601904552, "epoch": 1.0195545302243827, "grad_norm": 0.7170142531394958, "learning_rate": 1.1154501341816648e-05, "loss": 1.3066, "mean_token_accuracy": 0.6800417453050613, "num_tokens": 1555288245.0, "step": 9281 }, { "entropy": 1.707829624414444, "epoch": 1.0196643871357556, "grad_norm": 0.7252710461616516, "learning_rate": 1.115290044650068e-05, "loss": 1.3392, "mean_token_accuracy": 0.6582317799329758, "num_tokens": 1555441544.0, "step": 9282 }, { "entropy": 1.6867429316043854, "epoch": 1.0197742440471287, "grad_norm": 0.6245051026344299, "learning_rate": 1.1151299546345487e-05, "loss": 1.3129, "mean_token_accuracy": 0.6731636921564738, "num_tokens": 1555573014.0, "step": 9283 }, { "entropy": 1.6290603975454967, "epoch": 1.0198841009585016, "grad_norm": 10.985240936279297, "learning_rate": 1.1149698641401729e-05, "loss": 1.1759, "mean_token_accuracy": 0.6769275714953741, "num_tokens": 1555740917.0, "step": 9284 }, { "entropy": 1.7704954346021016, "epoch": 1.0199939578698745, "grad_norm": 0.7116957306861877, "learning_rate": 1.1148097731720075e-05, "loss": 1.3332, "mean_token_accuracy": 0.6628136684497198, "num_tokens": 1555866851.0, "step": 9285 }, { "entropy": 1.7090757687886555, "epoch": 1.0201038147812473, "grad_norm": 0.7023559808731079, "learning_rate": 1.1146496817351198e-05, "loss": 1.378, "mean_token_accuracy": 0.6548497478167216, "num_tokens": 1556020336.0, "step": 9286 }, { "entropy": 1.6991868913173676, "epoch": 1.0202136716926204, "grad_norm": 0.6621536612510681, "learning_rate": 1.1144895898345763e-05, "loss": 1.4705, "mean_token_accuracy": 0.6551912526289622, "num_tokens": 1556223903.0, "step": 9287 }, { "entropy": 1.6916466653347015, "epoch": 1.0203235286039933, "grad_norm": 0.6319494843482971, "learning_rate": 1.1143294974754432e-05, "loss": 1.3627, "mean_token_accuracy": 0.6651053031285604, "num_tokens": 1556404805.0, "step": 9288 }, { "entropy": 1.7253743211428325, "epoch": 1.0204333855153662, "grad_norm": 0.8715941309928894, "learning_rate": 1.1141694046627887e-05, "loss": 1.3739, "mean_token_accuracy": 0.6772264291842779, "num_tokens": 1556567447.0, "step": 9289 }, { "entropy": 1.6666424969832103, "epoch": 1.020543242426739, "grad_norm": 0.7647740244865417, "learning_rate": 1.1140093114016785e-05, "loss": 1.5055, "mean_token_accuracy": 0.6350631018479665, "num_tokens": 1556756290.0, "step": 9290 }, { "entropy": 1.708291381597519, "epoch": 1.0206530993381122, "grad_norm": 0.6904531121253967, "learning_rate": 1.11384921769718e-05, "loss": 1.3246, "mean_token_accuracy": 0.6716498136520386, "num_tokens": 1556892411.0, "step": 9291 }, { "entropy": 1.7041618327299755, "epoch": 1.020762956249485, "grad_norm": 0.7279467582702637, "learning_rate": 1.1136891235543602e-05, "loss": 1.3717, "mean_token_accuracy": 0.653698722521464, "num_tokens": 1557050513.0, "step": 9292 }, { "entropy": 1.6928143699963887, "epoch": 1.020872813160858, "grad_norm": 0.6164371371269226, "learning_rate": 1.1135290289782856e-05, "loss": 1.2497, "mean_token_accuracy": 0.684166838725408, "num_tokens": 1557186695.0, "step": 9293 }, { "entropy": 1.6356267134348552, "epoch": 1.0209826700722309, "grad_norm": 0.6712630391120911, "learning_rate": 1.1133689339740232e-05, "loss": 1.2799, "mean_token_accuracy": 0.6790573745965958, "num_tokens": 1557342961.0, "step": 9294 }, { "entropy": 1.7505371868610382, "epoch": 1.0210925269836038, "grad_norm": 0.7134815454483032, "learning_rate": 1.1132088385466404e-05, "loss": 1.3846, "mean_token_accuracy": 0.6622013101975123, "num_tokens": 1557505250.0, "step": 9295 }, { "entropy": 1.730526864528656, "epoch": 1.0212023838949769, "grad_norm": 0.7285529971122742, "learning_rate": 1.1130487427012035e-05, "loss": 1.2494, "mean_token_accuracy": 0.671364982922872, "num_tokens": 1557626583.0, "step": 9296 }, { "entropy": 1.6375334958235424, "epoch": 1.0213122408063497, "grad_norm": 0.5679100155830383, "learning_rate": 1.11288864644278e-05, "loss": 1.5019, "mean_token_accuracy": 0.64617853363355, "num_tokens": 1557848562.0, "step": 9297 }, { "entropy": 1.6906728843847911, "epoch": 1.0214220977177226, "grad_norm": 0.7008233070373535, "learning_rate": 1.1127285497764366e-05, "loss": 1.4128, "mean_token_accuracy": 0.6613838970661163, "num_tokens": 1558008515.0, "step": 9298 }, { "entropy": 1.7250931958357494, "epoch": 1.0215319546290955, "grad_norm": 0.7736313343048096, "learning_rate": 1.1125684527072403e-05, "loss": 1.5166, "mean_token_accuracy": 0.6381128629048666, "num_tokens": 1558218566.0, "step": 9299 }, { "entropy": 1.7384556730588276, "epoch": 1.0216418115404686, "grad_norm": 0.7157011032104492, "learning_rate": 1.1124083552402578e-05, "loss": 1.3946, "mean_token_accuracy": 0.6412435173988342, "num_tokens": 1558416646.0, "step": 9300 }, { "entropy": 1.7145174245039623, "epoch": 1.0217516684518415, "grad_norm": 0.5953468680381775, "learning_rate": 1.1122482573805572e-05, "loss": 1.3498, "mean_token_accuracy": 0.6631358712911606, "num_tokens": 1558570368.0, "step": 9301 }, { "entropy": 1.5895135502020519, "epoch": 1.0218615253632144, "grad_norm": 0.6971820592880249, "learning_rate": 1.1120881591332042e-05, "loss": 1.3617, "mean_token_accuracy": 0.6736765454212824, "num_tokens": 1558745606.0, "step": 9302 }, { "entropy": 1.7481009860833485, "epoch": 1.0219713822745873, "grad_norm": 0.6487974524497986, "learning_rate": 1.1119280605032667e-05, "loss": 1.5241, "mean_token_accuracy": 0.629365916053454, "num_tokens": 1558940755.0, "step": 9303 }, { "entropy": 1.7577315270900726, "epoch": 1.0220812391859604, "grad_norm": 0.7471747398376465, "learning_rate": 1.111767961495811e-05, "loss": 1.5136, "mean_token_accuracy": 0.6320231805245081, "num_tokens": 1559112446.0, "step": 9304 }, { "entropy": 1.6948171555995941, "epoch": 1.0221910960973333, "grad_norm": 0.642125129699707, "learning_rate": 1.111607862115905e-05, "loss": 1.4423, "mean_token_accuracy": 0.6560766796271006, "num_tokens": 1559341272.0, "step": 9305 }, { "entropy": 1.682332714398702, "epoch": 1.0223009530087062, "grad_norm": 0.6562165021896362, "learning_rate": 1.1114477623686155e-05, "loss": 1.3956, "mean_token_accuracy": 0.6665085901816686, "num_tokens": 1559494474.0, "step": 9306 }, { "entropy": 1.724548449118932, "epoch": 1.022410809920079, "grad_norm": 0.6904338002204895, "learning_rate": 1.1112876622590091e-05, "loss": 1.4954, "mean_token_accuracy": 0.656151756644249, "num_tokens": 1559687605.0, "step": 9307 }, { "entropy": 1.7673320770263672, "epoch": 1.022520666831452, "grad_norm": 0.6390427350997925, "learning_rate": 1.1111275617921538e-05, "loss": 1.3457, "mean_token_accuracy": 0.6566664973894755, "num_tokens": 1559816863.0, "step": 9308 }, { "entropy": 1.7288634777069092, "epoch": 1.022630523742825, "grad_norm": 0.8318937420845032, "learning_rate": 1.1109674609731158e-05, "loss": 1.4264, "mean_token_accuracy": 0.6647811233997345, "num_tokens": 1559943550.0, "step": 9309 }, { "entropy": 1.6661728421847026, "epoch": 1.022740380654198, "grad_norm": 0.7150062918663025, "learning_rate": 1.1108073598069624e-05, "loss": 1.3994, "mean_token_accuracy": 0.6527441392342249, "num_tokens": 1560098393.0, "step": 9310 }, { "entropy": 1.6526323854923248, "epoch": 1.0228502375655708, "grad_norm": 0.765347421169281, "learning_rate": 1.1106472582987615e-05, "loss": 1.359, "mean_token_accuracy": 0.6697281499703726, "num_tokens": 1560239415.0, "step": 9311 }, { "entropy": 1.6831820905208588, "epoch": 1.0229600944769437, "grad_norm": 0.7733060121536255, "learning_rate": 1.1104871564535792e-05, "loss": 1.3801, "mean_token_accuracy": 0.6575480302174886, "num_tokens": 1560411622.0, "step": 9312 }, { "entropy": 1.7815355956554413, "epoch": 1.0230699513883168, "grad_norm": 0.7969313859939575, "learning_rate": 1.1103270542764832e-05, "loss": 1.3305, "mean_token_accuracy": 0.665749246875445, "num_tokens": 1560515174.0, "step": 9313 }, { "entropy": 1.7089110016822815, "epoch": 1.0231798082996897, "grad_norm": 0.6208791136741638, "learning_rate": 1.1101669517725409e-05, "loss": 1.4192, "mean_token_accuracy": 0.6394672940174738, "num_tokens": 1560678366.0, "step": 9314 }, { "entropy": 1.6642550726731618, "epoch": 1.0232896652110626, "grad_norm": 0.7100003361701965, "learning_rate": 1.110006848946819e-05, "loss": 1.3558, "mean_token_accuracy": 0.672847161690394, "num_tokens": 1560836440.0, "step": 9315 }, { "entropy": 1.6587112347284954, "epoch": 1.0233995221224355, "grad_norm": 0.631491482257843, "learning_rate": 1.1098467458043844e-05, "loss": 1.3645, "mean_token_accuracy": 0.6515796532233556, "num_tokens": 1561021294.0, "step": 9316 }, { "entropy": 1.7398035724957783, "epoch": 1.0235093790338086, "grad_norm": 0.7306511402130127, "learning_rate": 1.1096866423503054e-05, "loss": 1.5682, "mean_token_accuracy": 0.6455154716968536, "num_tokens": 1561199258.0, "step": 9317 }, { "entropy": 1.6927287181218464, "epoch": 1.0236192359451814, "grad_norm": 0.6335356831550598, "learning_rate": 1.1095265385896484e-05, "loss": 1.4913, "mean_token_accuracy": 0.6352100173632304, "num_tokens": 1561425004.0, "step": 9318 }, { "entropy": 1.7034152448177338, "epoch": 1.0237290928565543, "grad_norm": 0.6361052989959717, "learning_rate": 1.1093664345274804e-05, "loss": 1.5067, "mean_token_accuracy": 0.6494300862153372, "num_tokens": 1561637991.0, "step": 9319 }, { "entropy": 1.69016628464063, "epoch": 1.0238389497679272, "grad_norm": 0.6373481750488281, "learning_rate": 1.1092063301688691e-05, "loss": 1.4863, "mean_token_accuracy": 0.6439661830663681, "num_tokens": 1561838602.0, "step": 9320 }, { "entropy": 1.6587995290756226, "epoch": 1.0239488066793, "grad_norm": 0.8336928486824036, "learning_rate": 1.1090462255188819e-05, "loss": 1.2563, "mean_token_accuracy": 0.6774145613114039, "num_tokens": 1561963251.0, "step": 9321 }, { "entropy": 1.7257398466269176, "epoch": 1.0240586635906732, "grad_norm": 0.6993498802185059, "learning_rate": 1.1088861205825853e-05, "loss": 1.3805, "mean_token_accuracy": 0.6668401459852854, "num_tokens": 1562127473.0, "step": 9322 }, { "entropy": 1.7249635954697926, "epoch": 1.024168520502046, "grad_norm": 0.7952443957328796, "learning_rate": 1.1087260153650474e-05, "loss": 1.7184, "mean_token_accuracy": 0.638072150448958, "num_tokens": 1562317748.0, "step": 9323 }, { "entropy": 1.6867842574914296, "epoch": 1.024278377413419, "grad_norm": 0.749860405921936, "learning_rate": 1.1085659098713348e-05, "loss": 1.4717, "mean_token_accuracy": 0.6456845154364904, "num_tokens": 1562512974.0, "step": 9324 }, { "entropy": 1.7148866256078084, "epoch": 1.0243882343247919, "grad_norm": 0.8800878524780273, "learning_rate": 1.1084058041065151e-05, "loss": 1.4662, "mean_token_accuracy": 0.6518640766541163, "num_tokens": 1562660251.0, "step": 9325 }, { "entropy": 1.7287410298983257, "epoch": 1.024498091236165, "grad_norm": 0.7516173720359802, "learning_rate": 1.1082456980756553e-05, "loss": 1.4594, "mean_token_accuracy": 0.6475322792927424, "num_tokens": 1562818950.0, "step": 9326 }, { "entropy": 1.6619328260421753, "epoch": 1.0246079481475379, "grad_norm": 0.589447557926178, "learning_rate": 1.1080855917838232e-05, "loss": 1.3906, "mean_token_accuracy": 0.662062461177508, "num_tokens": 1562994934.0, "step": 9327 }, { "entropy": 1.6725085377693176, "epoch": 1.0247178050589107, "grad_norm": 0.5970123410224915, "learning_rate": 1.1079254852360852e-05, "loss": 1.4908, "mean_token_accuracy": 0.6354695955912272, "num_tokens": 1563198043.0, "step": 9328 }, { "entropy": 1.7263079186280568, "epoch": 1.0248276619702836, "grad_norm": 0.7884548902511597, "learning_rate": 1.1077653784375098e-05, "loss": 1.3299, "mean_token_accuracy": 0.6538351277510325, "num_tokens": 1563372999.0, "step": 9329 }, { "entropy": 1.755046049753825, "epoch": 1.0249375188816567, "grad_norm": 0.8799434900283813, "learning_rate": 1.1076052713931633e-05, "loss": 1.5743, "mean_token_accuracy": 0.6435807049274445, "num_tokens": 1563531399.0, "step": 9330 }, { "entropy": 1.6757254600524902, "epoch": 1.0250473757930296, "grad_norm": 0.7605611085891724, "learning_rate": 1.1074451641081135e-05, "loss": 1.3706, "mean_token_accuracy": 0.6580003201961517, "num_tokens": 1563710017.0, "step": 9331 }, { "entropy": 1.7006694972515106, "epoch": 1.0251572327044025, "grad_norm": 0.7495110630989075, "learning_rate": 1.1072850565874274e-05, "loss": 1.3559, "mean_token_accuracy": 0.6717531283696493, "num_tokens": 1563867033.0, "step": 9332 }, { "entropy": 1.6994576354821522, "epoch": 1.0252670896157754, "grad_norm": 0.7062970399856567, "learning_rate": 1.107124948836173e-05, "loss": 1.3514, "mean_token_accuracy": 0.6614094525575638, "num_tokens": 1563988943.0, "step": 9333 }, { "entropy": 1.681195815404256, "epoch": 1.0253769465271483, "grad_norm": 0.8650986552238464, "learning_rate": 1.1069648408594168e-05, "loss": 1.444, "mean_token_accuracy": 0.6560016522804896, "num_tokens": 1564179018.0, "step": 9334 }, { "entropy": 1.734657605489095, "epoch": 1.0254868034385214, "grad_norm": 0.688232958316803, "learning_rate": 1.1068047326622269e-05, "loss": 1.5821, "mean_token_accuracy": 0.6293347378571829, "num_tokens": 1564399374.0, "step": 9335 }, { "entropy": 1.7198534111181896, "epoch": 1.0255966603498943, "grad_norm": 0.712509036064148, "learning_rate": 1.1066446242496697e-05, "loss": 1.3292, "mean_token_accuracy": 0.6663158188263575, "num_tokens": 1564521995.0, "step": 9336 }, { "entropy": 1.752720485130946, "epoch": 1.0257065172612672, "grad_norm": 1.8393864631652832, "learning_rate": 1.1064845156268135e-05, "loss": 1.3724, "mean_token_accuracy": 0.6571303755044937, "num_tokens": 1564664216.0, "step": 9337 }, { "entropy": 1.6954265733559926, "epoch": 1.02581637417264, "grad_norm": 0.6746184825897217, "learning_rate": 1.1063244067987253e-05, "loss": 1.4452, "mean_token_accuracy": 0.6532609164714813, "num_tokens": 1564867480.0, "step": 9338 }, { "entropy": 1.7057288686434429, "epoch": 1.0259262310840132, "grad_norm": 0.6397607326507568, "learning_rate": 1.1061642977704726e-05, "loss": 1.5297, "mean_token_accuracy": 0.6352156102657318, "num_tokens": 1565064367.0, "step": 9339 }, { "entropy": 1.7565435369809468, "epoch": 1.026036087995386, "grad_norm": 0.7192218899726868, "learning_rate": 1.1060041885471224e-05, "loss": 1.4612, "mean_token_accuracy": 0.6381704757610956, "num_tokens": 1565271382.0, "step": 9340 }, { "entropy": 1.7230738202730815, "epoch": 1.026145944906759, "grad_norm": 0.6718366742134094, "learning_rate": 1.1058440791337424e-05, "loss": 1.5053, "mean_token_accuracy": 0.6260107507308325, "num_tokens": 1565498782.0, "step": 9341 }, { "entropy": 1.7352421184380848, "epoch": 1.0262558018181318, "grad_norm": 0.8205738663673401, "learning_rate": 1.1056839695354e-05, "loss": 1.6207, "mean_token_accuracy": 0.6656580666700999, "num_tokens": 1565694130.0, "step": 9342 }, { "entropy": 1.709681620200475, "epoch": 1.026365658729505, "grad_norm": 0.6529027223587036, "learning_rate": 1.1055238597571627e-05, "loss": 1.4073, "mean_token_accuracy": 0.656768262386322, "num_tokens": 1565886376.0, "step": 9343 }, { "entropy": 1.7247290511926014, "epoch": 1.0264755156408778, "grad_norm": 0.8053439855575562, "learning_rate": 1.1053637498040972e-05, "loss": 1.3863, "mean_token_accuracy": 0.6635664304097494, "num_tokens": 1566024854.0, "step": 9344 }, { "entropy": 1.7492102185885112, "epoch": 1.0265853725522507, "grad_norm": 0.7567391395568848, "learning_rate": 1.105203639681272e-05, "loss": 1.4823, "mean_token_accuracy": 0.6328875770171484, "num_tokens": 1566239010.0, "step": 9345 }, { "entropy": 1.6845263640085857, "epoch": 1.0266952294636236, "grad_norm": 0.658109724521637, "learning_rate": 1.1050435293937535e-05, "loss": 1.5708, "mean_token_accuracy": 0.6598745634158453, "num_tokens": 1566396108.0, "step": 9346 }, { "entropy": 1.7013383607069652, "epoch": 1.0268050863749965, "grad_norm": 0.7290819883346558, "learning_rate": 1.10488341894661e-05, "loss": 1.309, "mean_token_accuracy": 0.6661685655514399, "num_tokens": 1566506452.0, "step": 9347 }, { "entropy": 1.7153269449869792, "epoch": 1.0269149432863696, "grad_norm": 0.6383672952651978, "learning_rate": 1.104723308344908e-05, "loss": 1.5736, "mean_token_accuracy": 0.6496073206265768, "num_tokens": 1566669636.0, "step": 9348 }, { "entropy": 1.7498332460721333, "epoch": 1.0270248001977424, "grad_norm": 1.3603650331497192, "learning_rate": 1.1045631975937162e-05, "loss": 1.54, "mean_token_accuracy": 0.6541606038808823, "num_tokens": 1566846993.0, "step": 9349 }, { "entropy": 1.6750881572564442, "epoch": 1.0271346571091153, "grad_norm": 0.8021945357322693, "learning_rate": 1.1044030866981003e-05, "loss": 1.4035, "mean_token_accuracy": 0.664098913470904, "num_tokens": 1567016917.0, "step": 9350 }, { "entropy": 1.680233657360077, "epoch": 1.0272445140204882, "grad_norm": 0.6845235824584961, "learning_rate": 1.1042429756631291e-05, "loss": 1.4367, "mean_token_accuracy": 0.6555875589450201, "num_tokens": 1567176851.0, "step": 9351 }, { "entropy": 1.6933209796746571, "epoch": 1.0273543709318613, "grad_norm": 0.7372247576713562, "learning_rate": 1.1040828644938697e-05, "loss": 1.4267, "mean_token_accuracy": 0.6577855745951334, "num_tokens": 1567371811.0, "step": 9352 }, { "entropy": 1.7011990348498027, "epoch": 1.0274642278432342, "grad_norm": 0.7555881142616272, "learning_rate": 1.1039227531953896e-05, "loss": 1.2464, "mean_token_accuracy": 0.6763677497704824, "num_tokens": 1567559137.0, "step": 9353 }, { "entropy": 1.7057754397392273, "epoch": 1.027574084754607, "grad_norm": 0.7010595202445984, "learning_rate": 1.1037626417727558e-05, "loss": 1.4121, "mean_token_accuracy": 0.6522766401370367, "num_tokens": 1567756111.0, "step": 9354 }, { "entropy": 1.7608352601528168, "epoch": 1.02768394166598, "grad_norm": 0.7014980912208557, "learning_rate": 1.1036025302310364e-05, "loss": 1.4579, "mean_token_accuracy": 0.6561769843101501, "num_tokens": 1567928704.0, "step": 9355 }, { "entropy": 1.7167177398999531, "epoch": 1.027793798577353, "grad_norm": 0.7096652984619141, "learning_rate": 1.1034424185752982e-05, "loss": 1.301, "mean_token_accuracy": 0.6627550721168518, "num_tokens": 1568041208.0, "step": 9356 }, { "entropy": 1.706625332434972, "epoch": 1.027903655488726, "grad_norm": 0.7294109463691711, "learning_rate": 1.1032823068106092e-05, "loss": 1.4412, "mean_token_accuracy": 0.6564443955818812, "num_tokens": 1568214563.0, "step": 9357 }, { "entropy": 1.7395167748133342, "epoch": 1.0280135124000989, "grad_norm": 0.7195934653282166, "learning_rate": 1.1031221949420368e-05, "loss": 1.3897, "mean_token_accuracy": 0.649207149942716, "num_tokens": 1568397977.0, "step": 9358 }, { "entropy": 1.692352334658305, "epoch": 1.0281233693114717, "grad_norm": 0.6182077527046204, "learning_rate": 1.1029620829746482e-05, "loss": 1.4435, "mean_token_accuracy": 0.6503968785206476, "num_tokens": 1568599949.0, "step": 9359 }, { "entropy": 1.7287443379561107, "epoch": 1.0282332262228449, "grad_norm": 0.7307857871055603, "learning_rate": 1.102801970913511e-05, "loss": 1.3121, "mean_token_accuracy": 0.6638143807649612, "num_tokens": 1568741614.0, "step": 9360 }, { "entropy": 1.699091374874115, "epoch": 1.0283430831342177, "grad_norm": 0.6680180430412292, "learning_rate": 1.1026418587636926e-05, "loss": 1.4316, "mean_token_accuracy": 0.6453281243642172, "num_tokens": 1568903127.0, "step": 9361 }, { "entropy": 1.7088652749856312, "epoch": 1.0284529400455906, "grad_norm": 0.7400436997413635, "learning_rate": 1.1024817465302604e-05, "loss": 1.3959, "mean_token_accuracy": 0.6520246714353561, "num_tokens": 1569104488.0, "step": 9362 }, { "entropy": 1.6986040870348613, "epoch": 1.0285627969569635, "grad_norm": 0.6973623037338257, "learning_rate": 1.1023216342182825e-05, "loss": 1.3094, "mean_token_accuracy": 0.6667328427235285, "num_tokens": 1569258463.0, "step": 9363 }, { "entropy": 1.7184610863526661, "epoch": 1.0286726538683364, "grad_norm": 0.7891613245010376, "learning_rate": 1.1021615218328257e-05, "loss": 1.5252, "mean_token_accuracy": 0.6617070535818735, "num_tokens": 1569458294.0, "step": 9364 }, { "entropy": 1.6800651748975117, "epoch": 1.0287825107797095, "grad_norm": 0.6154624223709106, "learning_rate": 1.102001409378958e-05, "loss": 1.3427, "mean_token_accuracy": 0.6658696780602137, "num_tokens": 1569657703.0, "step": 9365 }, { "entropy": 1.6739746828873951, "epoch": 1.0288923676910824, "grad_norm": 0.5887352228164673, "learning_rate": 1.101841296861746e-05, "loss": 1.3319, "mean_token_accuracy": 0.664004052678744, "num_tokens": 1569812239.0, "step": 9366 }, { "entropy": 1.7404913504918416, "epoch": 1.0290022246024553, "grad_norm": 0.6177912354469299, "learning_rate": 1.1016811842862583e-05, "loss": 1.3795, "mean_token_accuracy": 0.6624182611703873, "num_tokens": 1569971143.0, "step": 9367 }, { "entropy": 1.7285182078679402, "epoch": 1.0291120815138282, "grad_norm": 0.6963897347450256, "learning_rate": 1.1015210716575614e-05, "loss": 1.2931, "mean_token_accuracy": 0.6709794253110886, "num_tokens": 1570122158.0, "step": 9368 }, { "entropy": 1.7291064659754436, "epoch": 1.0292219384252013, "grad_norm": 0.6220366358757019, "learning_rate": 1.1013609589807237e-05, "loss": 1.3733, "mean_token_accuracy": 0.6541631271441778, "num_tokens": 1570257383.0, "step": 9369 }, { "entropy": 1.6683486600716908, "epoch": 1.0293317953365742, "grad_norm": 0.6968728303909302, "learning_rate": 1.1012008462608119e-05, "loss": 1.3097, "mean_token_accuracy": 0.6743961970011393, "num_tokens": 1570389364.0, "step": 9370 }, { "entropy": 1.680118163426717, "epoch": 1.029441652247947, "grad_norm": 0.7007773518562317, "learning_rate": 1.1010407335028944e-05, "loss": 1.3684, "mean_token_accuracy": 0.6575128883123398, "num_tokens": 1570532077.0, "step": 9371 }, { "entropy": 1.697032392024994, "epoch": 1.02955150915932, "grad_norm": 0.6026526093482971, "learning_rate": 1.1008806207120376e-05, "loss": 1.3735, "mean_token_accuracy": 0.6517840176820755, "num_tokens": 1570684541.0, "step": 9372 }, { "entropy": 1.6714433928330739, "epoch": 1.029661366070693, "grad_norm": 0.6352996230125427, "learning_rate": 1.1007205078933099e-05, "loss": 1.3515, "mean_token_accuracy": 0.6700502683719, "num_tokens": 1570836377.0, "step": 9373 }, { "entropy": 1.6749180654684703, "epoch": 1.029771222982066, "grad_norm": 0.6775603890419006, "learning_rate": 1.1005603950517783e-05, "loss": 1.3308, "mean_token_accuracy": 0.6705063283443451, "num_tokens": 1571008501.0, "step": 9374 }, { "entropy": 1.6891617675622304, "epoch": 1.0298810798934388, "grad_norm": 0.6425476670265198, "learning_rate": 1.1004002821925104e-05, "loss": 1.3842, "mean_token_accuracy": 0.6577885945638021, "num_tokens": 1571178622.0, "step": 9375 }, { "entropy": 1.6456943849722545, "epoch": 1.0299909368048117, "grad_norm": 2.2865750789642334, "learning_rate": 1.1002401693205738e-05, "loss": 1.3875, "mean_token_accuracy": 0.6783433457215627, "num_tokens": 1571353888.0, "step": 9376 }, { "entropy": 1.6456839342912037, "epoch": 1.0301007937161846, "grad_norm": 0.7852012515068054, "learning_rate": 1.1000800564410362e-05, "loss": 1.3242, "mean_token_accuracy": 0.6605374266703924, "num_tokens": 1571473748.0, "step": 9377 }, { "entropy": 1.699092835187912, "epoch": 1.0302106506275577, "grad_norm": 0.6551658511161804, "learning_rate": 1.0999199435589643e-05, "loss": 1.5777, "mean_token_accuracy": 0.6421516289313635, "num_tokens": 1571656099.0, "step": 9378 }, { "entropy": 1.7373557190100353, "epoch": 1.0303205075389306, "grad_norm": 0.6427181363105774, "learning_rate": 1.0997598306794269e-05, "loss": 1.578, "mean_token_accuracy": 0.641086682677269, "num_tokens": 1571911580.0, "step": 9379 }, { "entropy": 1.6774665514628093, "epoch": 1.0304303644503034, "grad_norm": 0.6472753286361694, "learning_rate": 1.09959971780749e-05, "loss": 1.2805, "mean_token_accuracy": 0.6775522033373514, "num_tokens": 1572069418.0, "step": 9380 }, { "entropy": 1.7251865367094676, "epoch": 1.0305402213616763, "grad_norm": 0.7229856848716736, "learning_rate": 1.0994396049482221e-05, "loss": 1.2864, "mean_token_accuracy": 0.6660173137982687, "num_tokens": 1572223424.0, "step": 9381 }, { "entropy": 1.6989657084147136, "epoch": 1.0306500782730494, "grad_norm": 0.6592692732810974, "learning_rate": 1.0992794921066908e-05, "loss": 1.2797, "mean_token_accuracy": 0.6701207856337229, "num_tokens": 1572364920.0, "step": 9382 }, { "entropy": 1.703797886768977, "epoch": 1.0307599351844223, "grad_norm": 0.7170230746269226, "learning_rate": 1.0991193792879629e-05, "loss": 1.3009, "mean_token_accuracy": 0.665116066733996, "num_tokens": 1572482987.0, "step": 9383 }, { "entropy": 1.7276430229345958, "epoch": 1.0308697920957952, "grad_norm": 0.7376964092254639, "learning_rate": 1.0989592664971061e-05, "loss": 1.5201, "mean_token_accuracy": 0.6401703308025996, "num_tokens": 1572654914.0, "step": 9384 }, { "entropy": 1.7409641345342, "epoch": 1.030979649007168, "grad_norm": 0.8233011364936829, "learning_rate": 1.0987991537391884e-05, "loss": 1.4287, "mean_token_accuracy": 0.6417080561319987, "num_tokens": 1572849673.0, "step": 9385 }, { "entropy": 1.7794389128684998, "epoch": 1.0310895059185412, "grad_norm": 0.7736028432846069, "learning_rate": 1.0986390410192767e-05, "loss": 1.5405, "mean_token_accuracy": 0.6407246440649033, "num_tokens": 1573037823.0, "step": 9386 }, { "entropy": 1.7333606382211049, "epoch": 1.031199362829914, "grad_norm": 0.6458233594894409, "learning_rate": 1.0984789283424389e-05, "loss": 1.3172, "mean_token_accuracy": 0.6686715831359228, "num_tokens": 1573199853.0, "step": 9387 }, { "entropy": 1.6926281054814656, "epoch": 1.031309219741287, "grad_norm": 0.7213067412376404, "learning_rate": 1.0983188157137423e-05, "loss": 1.2975, "mean_token_accuracy": 0.6710375199715296, "num_tokens": 1573346257.0, "step": 9388 }, { "entropy": 1.7269733548164368, "epoch": 1.0314190766526599, "grad_norm": 0.6135429739952087, "learning_rate": 1.0981587031382543e-05, "loss": 1.3502, "mean_token_accuracy": 0.6585359672705332, "num_tokens": 1573502913.0, "step": 9389 }, { "entropy": 1.6842507719993591, "epoch": 1.0315289335640327, "grad_norm": 0.6283948421478271, "learning_rate": 1.0979985906210424e-05, "loss": 1.3511, "mean_token_accuracy": 0.6634029597043991, "num_tokens": 1573673503.0, "step": 9390 }, { "entropy": 1.6653286516666412, "epoch": 1.0316387904754059, "grad_norm": 0.6210644245147705, "learning_rate": 1.0978384781671747e-05, "loss": 1.4805, "mean_token_accuracy": 0.648143524924914, "num_tokens": 1573902167.0, "step": 9391 }, { "entropy": 1.6455399890740712, "epoch": 1.0317486473867787, "grad_norm": 0.6163156628608704, "learning_rate": 1.0976783657817178e-05, "loss": 1.4123, "mean_token_accuracy": 0.6596850504477819, "num_tokens": 1574126639.0, "step": 9392 }, { "entropy": 1.6833748817443848, "epoch": 1.0318585042981516, "grad_norm": 0.6697494983673096, "learning_rate": 1.0975182534697397e-05, "loss": 1.3195, "mean_token_accuracy": 0.6766639103492101, "num_tokens": 1574279133.0, "step": 9393 }, { "entropy": 1.7093348105748494, "epoch": 1.0319683612095245, "grad_norm": 0.7185570001602173, "learning_rate": 1.0973581412363078e-05, "loss": 1.381, "mean_token_accuracy": 0.6626923183600107, "num_tokens": 1574416811.0, "step": 9394 }, { "entropy": 1.6518169144789379, "epoch": 1.0320782181208976, "grad_norm": 0.6190440654754639, "learning_rate": 1.0971980290864896e-05, "loss": 1.3933, "mean_token_accuracy": 0.6565068662166595, "num_tokens": 1574590620.0, "step": 9395 }, { "entropy": 1.7660084863503773, "epoch": 1.0321880750322705, "grad_norm": 0.8229091167449951, "learning_rate": 1.0970379170253523e-05, "loss": 1.6284, "mean_token_accuracy": 0.6307843253016472, "num_tokens": 1574792296.0, "step": 9396 }, { "entropy": 1.674621005853017, "epoch": 1.0322979319436434, "grad_norm": 0.6914384961128235, "learning_rate": 1.0968778050579638e-05, "loss": 1.3486, "mean_token_accuracy": 0.6684543887774149, "num_tokens": 1574979186.0, "step": 9397 }, { "entropy": 1.700808932383855, "epoch": 1.0324077888550163, "grad_norm": 0.646267831325531, "learning_rate": 1.096717693189391e-05, "loss": 1.3497, "mean_token_accuracy": 0.65216397245725, "num_tokens": 1575130170.0, "step": 9398 }, { "entropy": 1.6885426342487335, "epoch": 1.0325176457663894, "grad_norm": 0.6223624348640442, "learning_rate": 1.096557581424702e-05, "loss": 1.3716, "mean_token_accuracy": 0.6467955311139425, "num_tokens": 1575370782.0, "step": 9399 }, { "entropy": 1.7195010880629222, "epoch": 1.0326275026777623, "grad_norm": 0.6038949489593506, "learning_rate": 1.0963974697689644e-05, "loss": 1.3991, "mean_token_accuracy": 0.6538459608952204, "num_tokens": 1575633582.0, "step": 9400 }, { "entropy": 1.7567541698614757, "epoch": 1.0327373595891352, "grad_norm": 0.770776093006134, "learning_rate": 1.0962373582272445e-05, "loss": 1.5789, "mean_token_accuracy": 0.6492965420087179, "num_tokens": 1575782782.0, "step": 9401 }, { "entropy": 1.7692484458287556, "epoch": 1.032847216500508, "grad_norm": 0.8557153344154358, "learning_rate": 1.0960772468046109e-05, "loss": 1.4258, "mean_token_accuracy": 0.6477571874856949, "num_tokens": 1575969960.0, "step": 9402 }, { "entropy": 1.681354542573293, "epoch": 1.032957073411881, "grad_norm": 0.6943827867507935, "learning_rate": 1.095917135506131e-05, "loss": 1.2469, "mean_token_accuracy": 0.6881647706031799, "num_tokens": 1576101672.0, "step": 9403 }, { "entropy": 1.7285095751285553, "epoch": 1.033066930323254, "grad_norm": 0.6692385673522949, "learning_rate": 1.0957570243368711e-05, "loss": 1.4383, "mean_token_accuracy": 0.6471123496691386, "num_tokens": 1576249549.0, "step": 9404 }, { "entropy": 1.74024698138237, "epoch": 1.033176787234627, "grad_norm": 0.9306145310401917, "learning_rate": 1.0955969133019e-05, "loss": 1.236, "mean_token_accuracy": 0.6720249801874161, "num_tokens": 1576405097.0, "step": 9405 }, { "entropy": 1.7044260899225872, "epoch": 1.0332866441459998, "grad_norm": 0.7469892501831055, "learning_rate": 1.0954368024062846e-05, "loss": 1.4853, "mean_token_accuracy": 0.6485099146763483, "num_tokens": 1576584110.0, "step": 9406 }, { "entropy": 1.704027235507965, "epoch": 1.0333965010573727, "grad_norm": 0.7305955290794373, "learning_rate": 1.0952766916550923e-05, "loss": 1.4969, "mean_token_accuracy": 0.6464851995309194, "num_tokens": 1576770943.0, "step": 9407 }, { "entropy": 1.7666937212149303, "epoch": 1.0335063579687458, "grad_norm": 0.6984846591949463, "learning_rate": 1.0951165810533903e-05, "loss": 1.3663, "mean_token_accuracy": 0.6565556079149246, "num_tokens": 1576907284.0, "step": 9408 }, { "entropy": 1.7006201644738514, "epoch": 1.0336162148801187, "grad_norm": 0.7005829215049744, "learning_rate": 1.094956470606247e-05, "loss": 1.4357, "mean_token_accuracy": 0.6611707657575607, "num_tokens": 1577102449.0, "step": 9409 }, { "entropy": 1.673817624648412, "epoch": 1.0337260717914916, "grad_norm": 0.7164878845214844, "learning_rate": 1.0947963603187284e-05, "loss": 1.436, "mean_token_accuracy": 0.6602627138296763, "num_tokens": 1577306757.0, "step": 9410 }, { "entropy": 1.6474266449610393, "epoch": 1.0338359287028644, "grad_norm": 0.723376452922821, "learning_rate": 1.094636250195903e-05, "loss": 1.4225, "mean_token_accuracy": 0.6634102612733841, "num_tokens": 1577508937.0, "step": 9411 }, { "entropy": 1.6886802514394124, "epoch": 1.0339457856142376, "grad_norm": 0.7515102624893188, "learning_rate": 1.094476140242838e-05, "loss": 1.3389, "mean_token_accuracy": 0.6653014322121938, "num_tokens": 1577668263.0, "step": 9412 }, { "entropy": 1.6901151835918427, "epoch": 1.0340556425256104, "grad_norm": 0.7329745888710022, "learning_rate": 1.0943160304646004e-05, "loss": 1.2741, "mean_token_accuracy": 0.6680738429228464, "num_tokens": 1577798964.0, "step": 9413 }, { "entropy": 1.6920847098032634, "epoch": 1.0341654994369833, "grad_norm": 0.6827352643013, "learning_rate": 1.0941559208662575e-05, "loss": 1.3344, "mean_token_accuracy": 0.6580119580030441, "num_tokens": 1577947634.0, "step": 9414 }, { "entropy": 1.7117530802885692, "epoch": 1.0342753563483562, "grad_norm": 0.6250885725021362, "learning_rate": 1.0939958114528782e-05, "loss": 1.294, "mean_token_accuracy": 0.666492278377215, "num_tokens": 1578098070.0, "step": 9415 }, { "entropy": 1.7154468695322673, "epoch": 1.034385213259729, "grad_norm": 0.6659483909606934, "learning_rate": 1.0938357022295277e-05, "loss": 1.329, "mean_token_accuracy": 0.6750149528185526, "num_tokens": 1578226451.0, "step": 9416 }, { "entropy": 1.7065132061640422, "epoch": 1.0344950701711022, "grad_norm": 0.7499461770057678, "learning_rate": 1.0936755932012748e-05, "loss": 1.4039, "mean_token_accuracy": 0.6496814092000326, "num_tokens": 1578361910.0, "step": 9417 }, { "entropy": 1.7363367676734924, "epoch": 1.034604927082475, "grad_norm": 0.6667954921722412, "learning_rate": 1.0935154843731868e-05, "loss": 1.3555, "mean_token_accuracy": 0.6621254285176595, "num_tokens": 1578520491.0, "step": 9418 }, { "entropy": 1.7204224864641826, "epoch": 1.034714783993848, "grad_norm": 0.7654107213020325, "learning_rate": 1.0933553757503306e-05, "loss": 1.4836, "mean_token_accuracy": 0.6469293584426244, "num_tokens": 1578708477.0, "step": 9419 }, { "entropy": 1.725062648455302, "epoch": 1.0348246409052209, "grad_norm": 0.6829729676246643, "learning_rate": 1.0931952673377735e-05, "loss": 1.3266, "mean_token_accuracy": 0.6637885620196661, "num_tokens": 1578888821.0, "step": 9420 }, { "entropy": 1.6738385657469432, "epoch": 1.034934497816594, "grad_norm": 0.538158655166626, "learning_rate": 1.0930351591405836e-05, "loss": 1.3432, "mean_token_accuracy": 0.6584972242514292, "num_tokens": 1579082733.0, "step": 9421 }, { "entropy": 1.7132914861043294, "epoch": 1.0350443547279669, "grad_norm": 0.6430938839912415, "learning_rate": 1.0928750511638272e-05, "loss": 1.4578, "mean_token_accuracy": 0.6506678561369578, "num_tokens": 1579299272.0, "step": 9422 }, { "entropy": 1.7596480747063954, "epoch": 1.0351542116393397, "grad_norm": 0.7233623266220093, "learning_rate": 1.0927149434125725e-05, "loss": 1.3634, "mean_token_accuracy": 0.6559812525908152, "num_tokens": 1579418957.0, "step": 9423 }, { "entropy": 1.6402805646260579, "epoch": 1.0352640685507126, "grad_norm": 0.7694196701049805, "learning_rate": 1.092554835891887e-05, "loss": 1.2518, "mean_token_accuracy": 0.6738807211319605, "num_tokens": 1579551997.0, "step": 9424 }, { "entropy": 1.6636198858420055, "epoch": 1.0353739254620857, "grad_norm": 0.7831659317016602, "learning_rate": 1.092394728606837e-05, "loss": 1.4856, "mean_token_accuracy": 0.6563919832309087, "num_tokens": 1579768552.0, "step": 9425 }, { "entropy": 1.716377208630244, "epoch": 1.0354837823734586, "grad_norm": 0.6124777793884277, "learning_rate": 1.0922346215624905e-05, "loss": 1.521, "mean_token_accuracy": 0.6268165409564972, "num_tokens": 1580016863.0, "step": 9426 }, { "entropy": 1.7089830438296, "epoch": 1.0355936392848315, "grad_norm": 0.6345846652984619, "learning_rate": 1.092074514763915e-05, "loss": 1.3273, "mean_token_accuracy": 0.655559649070104, "num_tokens": 1580174116.0, "step": 9427 }, { "entropy": 1.726669172445933, "epoch": 1.0357034961962044, "grad_norm": 0.7052589654922485, "learning_rate": 1.0919144082161773e-05, "loss": 1.3349, "mean_token_accuracy": 0.6611567487319311, "num_tokens": 1580325786.0, "step": 9428 }, { "entropy": 1.71493865052859, "epoch": 1.0358133531075775, "grad_norm": 0.6326825618743896, "learning_rate": 1.0917543019243451e-05, "loss": 1.3176, "mean_token_accuracy": 0.6714158058166504, "num_tokens": 1580469908.0, "step": 9429 }, { "entropy": 1.6959039668242137, "epoch": 1.0359232100189504, "grad_norm": 0.6697741746902466, "learning_rate": 1.0915941958934855e-05, "loss": 1.2301, "mean_token_accuracy": 0.6766091585159302, "num_tokens": 1580630331.0, "step": 9430 }, { "entropy": 1.672759582599004, "epoch": 1.0360330669303233, "grad_norm": 0.7174257636070251, "learning_rate": 1.0914340901286657e-05, "loss": 1.5949, "mean_token_accuracy": 0.646736760934194, "num_tokens": 1580830522.0, "step": 9431 }, { "entropy": 1.7258618871370952, "epoch": 1.0361429238416962, "grad_norm": 0.6316325664520264, "learning_rate": 1.0912739846349529e-05, "loss": 1.419, "mean_token_accuracy": 0.6607301781574885, "num_tokens": 1581014308.0, "step": 9432 }, { "entropy": 1.7705416182676952, "epoch": 1.036252780753069, "grad_norm": 0.6447232961654663, "learning_rate": 1.0911138794174151e-05, "loss": 1.3577, "mean_token_accuracy": 0.6522252509991328, "num_tokens": 1581141631.0, "step": 9433 }, { "entropy": 1.6815461615721385, "epoch": 1.0363626376644421, "grad_norm": 0.6908884644508362, "learning_rate": 1.0909537744811186e-05, "loss": 1.3045, "mean_token_accuracy": 0.6645476520061493, "num_tokens": 1581289338.0, "step": 9434 }, { "entropy": 1.6792431473731995, "epoch": 1.036472494575815, "grad_norm": 0.6091166734695435, "learning_rate": 1.090793669831131e-05, "loss": 1.4287, "mean_token_accuracy": 0.6663202345371246, "num_tokens": 1581475004.0, "step": 9435 }, { "entropy": 1.6384399731953938, "epoch": 1.036582351487188, "grad_norm": 0.6524921655654907, "learning_rate": 1.0906335654725199e-05, "loss": 1.443, "mean_token_accuracy": 0.6610978494087855, "num_tokens": 1581635537.0, "step": 9436 }, { "entropy": 1.7117190460364025, "epoch": 1.0366922083985608, "grad_norm": 0.5974973440170288, "learning_rate": 1.090473461410352e-05, "loss": 1.5023, "mean_token_accuracy": 0.6504662285248438, "num_tokens": 1581826389.0, "step": 9437 }, { "entropy": 1.7102805376052856, "epoch": 1.036802065309934, "grad_norm": 0.7070255279541016, "learning_rate": 1.0903133576496952e-05, "loss": 1.5332, "mean_token_accuracy": 0.640349547068278, "num_tokens": 1582049870.0, "step": 9438 }, { "entropy": 1.7368799050649006, "epoch": 1.0369119222213068, "grad_norm": 0.6297962069511414, "learning_rate": 1.0901532541956159e-05, "loss": 1.389, "mean_token_accuracy": 0.64411032696565, "num_tokens": 1582205781.0, "step": 9439 }, { "entropy": 1.7114115158716838, "epoch": 1.0370217791326797, "grad_norm": 0.6687158346176147, "learning_rate": 1.0899931510531814e-05, "loss": 1.6336, "mean_token_accuracy": 0.6240918189287186, "num_tokens": 1582406654.0, "step": 9440 }, { "entropy": 1.6156066060066223, "epoch": 1.0371316360440526, "grad_norm": 0.7124824523925781, "learning_rate": 1.0898330482274598e-05, "loss": 1.3993, "mean_token_accuracy": 0.6696003576119741, "num_tokens": 1582566705.0, "step": 9441 }, { "entropy": 1.7156145075956981, "epoch": 1.0372414929554257, "grad_norm": 0.6782826781272888, "learning_rate": 1.089672945723517e-05, "loss": 1.4816, "mean_token_accuracy": 0.6556824495395025, "num_tokens": 1582742277.0, "step": 9442 }, { "entropy": 1.593651960293452, "epoch": 1.0373513498667986, "grad_norm": 0.8009786009788513, "learning_rate": 1.089512843546421e-05, "loss": 1.3372, "mean_token_accuracy": 0.6646837691466013, "num_tokens": 1582926809.0, "step": 9443 }, { "entropy": 1.7035342653592427, "epoch": 1.0374612067781714, "grad_norm": 0.7360708713531494, "learning_rate": 1.0893527417012391e-05, "loss": 1.453, "mean_token_accuracy": 0.6582255164782206, "num_tokens": 1583084184.0, "step": 9444 }, { "entropy": 1.6633445918560028, "epoch": 1.0375710636895443, "grad_norm": 0.5686823129653931, "learning_rate": 1.0891926401930379e-05, "loss": 1.3507, "mean_token_accuracy": 0.6624014725287756, "num_tokens": 1583226432.0, "step": 9445 }, { "entropy": 1.698949267466863, "epoch": 1.0376809206009172, "grad_norm": 0.757599413394928, "learning_rate": 1.0890325390268846e-05, "loss": 1.2624, "mean_token_accuracy": 0.674264038602511, "num_tokens": 1583345570.0, "step": 9446 }, { "entropy": 1.6697109937667847, "epoch": 1.0377907775122903, "grad_norm": 0.7264062166213989, "learning_rate": 1.088872438207847e-05, "loss": 1.3789, "mean_token_accuracy": 0.6585222283999125, "num_tokens": 1583594017.0, "step": 9447 }, { "entropy": 1.664307415485382, "epoch": 1.0379006344236632, "grad_norm": 0.6853240728378296, "learning_rate": 1.0887123377409911e-05, "loss": 1.3018, "mean_token_accuracy": 0.6689639985561371, "num_tokens": 1583728618.0, "step": 9448 }, { "entropy": 1.6331247488657634, "epoch": 1.038010491335036, "grad_norm": 0.7877902984619141, "learning_rate": 1.0885522376313848e-05, "loss": 1.3652, "mean_token_accuracy": 0.6695854564507803, "num_tokens": 1583859662.0, "step": 9449 }, { "entropy": 1.788600593805313, "epoch": 1.038120348246409, "grad_norm": 0.7138171195983887, "learning_rate": 1.0883921378840954e-05, "loss": 1.3607, "mean_token_accuracy": 0.6501663575569788, "num_tokens": 1584058402.0, "step": 9450 }, { "entropy": 1.697994162638982, "epoch": 1.038230205157782, "grad_norm": 1.0916004180908203, "learning_rate": 1.0882320385041893e-05, "loss": 1.3157, "mean_token_accuracy": 0.6665289948383967, "num_tokens": 1584191366.0, "step": 9451 }, { "entropy": 1.698983242114385, "epoch": 1.038340062069155, "grad_norm": 0.7171838283538818, "learning_rate": 1.0880719394967336e-05, "loss": 1.4387, "mean_token_accuracy": 0.6508918007214864, "num_tokens": 1584338635.0, "step": 9452 }, { "entropy": 1.7182096342245738, "epoch": 1.0384499189805279, "grad_norm": 0.7586554884910583, "learning_rate": 1.0879118408667964e-05, "loss": 1.2876, "mean_token_accuracy": 0.667991022268931, "num_tokens": 1584457176.0, "step": 9453 }, { "entropy": 1.6738979419072468, "epoch": 1.0385597758919007, "grad_norm": 0.706785261631012, "learning_rate": 1.0877517426194433e-05, "loss": 1.2904, "mean_token_accuracy": 0.6816811164220175, "num_tokens": 1584625605.0, "step": 9454 }, { "entropy": 1.6561195055643718, "epoch": 1.0386696328032738, "grad_norm": 0.7076295614242554, "learning_rate": 1.0875916447597423e-05, "loss": 1.5181, "mean_token_accuracy": 0.6429178069035212, "num_tokens": 1584812931.0, "step": 9455 }, { "entropy": 1.7122123738129933, "epoch": 1.0387794897146467, "grad_norm": 0.7200018167495728, "learning_rate": 1.0874315472927601e-05, "loss": 1.4502, "mean_token_accuracy": 0.6555789758761724, "num_tokens": 1584970989.0, "step": 9456 }, { "entropy": 1.6943861742814381, "epoch": 1.0388893466260196, "grad_norm": 0.8031889796257019, "learning_rate": 1.087271450223564e-05, "loss": 1.5488, "mean_token_accuracy": 0.6436296353737513, "num_tokens": 1585140553.0, "step": 9457 }, { "entropy": 1.718834122021993, "epoch": 1.0389992035373925, "grad_norm": 0.5605930089950562, "learning_rate": 1.0871113535572203e-05, "loss": 1.4931, "mean_token_accuracy": 0.6522552420695623, "num_tokens": 1585352103.0, "step": 9458 }, { "entropy": 1.6309671302636464, "epoch": 1.0391090604487654, "grad_norm": 0.6874315142631531, "learning_rate": 1.0869512572987971e-05, "loss": 1.3105, "mean_token_accuracy": 0.6766562660535177, "num_tokens": 1585488952.0, "step": 9459 }, { "entropy": 1.7397524615128834, "epoch": 1.0392189173601385, "grad_norm": 0.7973026037216187, "learning_rate": 1.0867911614533599e-05, "loss": 1.5217, "mean_token_accuracy": 0.6329129338264465, "num_tokens": 1585683287.0, "step": 9460 }, { "entropy": 1.6979398131370544, "epoch": 1.0393287742715114, "grad_norm": 0.7028073668479919, "learning_rate": 1.0866310660259769e-05, "loss": 1.3715, "mean_token_accuracy": 0.6600656112035116, "num_tokens": 1585861769.0, "step": 9461 }, { "entropy": 1.6728238761425018, "epoch": 1.0394386311828843, "grad_norm": 0.6784250736236572, "learning_rate": 1.0864709710217149e-05, "loss": 1.4738, "mean_token_accuracy": 0.64061538875103, "num_tokens": 1586053141.0, "step": 9462 }, { "entropy": 1.7052525381247203, "epoch": 1.0395484880942572, "grad_norm": 0.6356661915779114, "learning_rate": 1.0863108764456403e-05, "loss": 1.3033, "mean_token_accuracy": 0.6688580562671026, "num_tokens": 1586187994.0, "step": 9463 }, { "entropy": 1.6879315078258514, "epoch": 1.0396583450056303, "grad_norm": 0.6088063716888428, "learning_rate": 1.0861507823028201e-05, "loss": 1.2934, "mean_token_accuracy": 0.6732538690169653, "num_tokens": 1586321606.0, "step": 9464 }, { "entropy": 1.6835426688194275, "epoch": 1.0397682019170031, "grad_norm": 0.7947443127632141, "learning_rate": 1.0859906885983221e-05, "loss": 1.4395, "mean_token_accuracy": 0.6450006117423376, "num_tokens": 1586497070.0, "step": 9465 }, { "entropy": 1.678231567144394, "epoch": 1.039878058828376, "grad_norm": 0.6062014102935791, "learning_rate": 1.0858305953372117e-05, "loss": 1.411, "mean_token_accuracy": 0.6603821168343226, "num_tokens": 1586704728.0, "step": 9466 }, { "entropy": 1.757646510998408, "epoch": 1.039987915739749, "grad_norm": 0.7607711553573608, "learning_rate": 1.0856705025245566e-05, "loss": 1.2984, "mean_token_accuracy": 0.6734205285708109, "num_tokens": 1586815108.0, "step": 9467 }, { "entropy": 1.6538205246130626, "epoch": 1.040097772651122, "grad_norm": 0.8425273895263672, "learning_rate": 1.0855104101654241e-05, "loss": 1.1927, "mean_token_accuracy": 0.6824866682291031, "num_tokens": 1586938071.0, "step": 9468 }, { "entropy": 1.678248147169749, "epoch": 1.040207629562495, "grad_norm": 0.6422735452651978, "learning_rate": 1.0853503182648806e-05, "loss": 1.5994, "mean_token_accuracy": 0.6251678119103113, "num_tokens": 1587160509.0, "step": 9469 }, { "entropy": 1.7205635011196136, "epoch": 1.0403174864738678, "grad_norm": 0.717113733291626, "learning_rate": 1.0851902268279923e-05, "loss": 1.4603, "mean_token_accuracy": 0.6581100126107534, "num_tokens": 1587294558.0, "step": 9470 }, { "entropy": 1.6604284048080444, "epoch": 1.0404273433852407, "grad_norm": 0.6005326509475708, "learning_rate": 1.0850301358598276e-05, "loss": 1.5035, "mean_token_accuracy": 0.6511695633331934, "num_tokens": 1587540147.0, "step": 9471 }, { "entropy": 1.7822232246398926, "epoch": 1.0405372002966136, "grad_norm": 0.6600624918937683, "learning_rate": 1.0848700453654517e-05, "loss": 1.3744, "mean_token_accuracy": 0.6612835874160131, "num_tokens": 1587676331.0, "step": 9472 }, { "entropy": 1.6617770393689473, "epoch": 1.0406470572079867, "grad_norm": 0.5907867550849915, "learning_rate": 1.0847099553499321e-05, "loss": 1.3851, "mean_token_accuracy": 0.6616502503554026, "num_tokens": 1587839664.0, "step": 9473 }, { "entropy": 1.7285722692807515, "epoch": 1.0407569141193596, "grad_norm": 0.6949423551559448, "learning_rate": 1.0845498658183358e-05, "loss": 1.6408, "mean_token_accuracy": 0.6250172158082327, "num_tokens": 1588028296.0, "step": 9474 }, { "entropy": 1.6520145336786907, "epoch": 1.0408667710307324, "grad_norm": 0.6275820136070251, "learning_rate": 1.084389776775729e-05, "loss": 1.3768, "mean_token_accuracy": 0.6688976238171259, "num_tokens": 1588161759.0, "step": 9475 }, { "entropy": 1.727418303489685, "epoch": 1.0409766279421053, "grad_norm": 0.690833330154419, "learning_rate": 1.0842296882271785e-05, "loss": 1.3988, "mean_token_accuracy": 0.6522354880968729, "num_tokens": 1588316836.0, "step": 9476 }, { "entropy": 1.7261015077431996, "epoch": 1.0410864848534784, "grad_norm": 0.7379058003425598, "learning_rate": 1.0840696001777519e-05, "loss": 1.4334, "mean_token_accuracy": 0.6473392049471537, "num_tokens": 1588497270.0, "step": 9477 }, { "entropy": 1.7033613324165344, "epoch": 1.0411963417648513, "grad_norm": 0.6721797585487366, "learning_rate": 1.0839095126325148e-05, "loss": 1.4466, "mean_token_accuracy": 0.6510754525661469, "num_tokens": 1588687763.0, "step": 9478 }, { "entropy": 1.7521330614884694, "epoch": 1.0413061986762242, "grad_norm": 0.7003641128540039, "learning_rate": 1.0837494255965347e-05, "loss": 1.4311, "mean_token_accuracy": 0.6499675313631693, "num_tokens": 1588893987.0, "step": 9479 }, { "entropy": 1.6936258971691132, "epoch": 1.041416055587597, "grad_norm": 0.6926009654998779, "learning_rate": 1.0835893390748777e-05, "loss": 1.3398, "mean_token_accuracy": 0.6582322865724564, "num_tokens": 1589149049.0, "step": 9480 }, { "entropy": 1.7185107568899791, "epoch": 1.0415259124989702, "grad_norm": 0.6344667077064514, "learning_rate": 1.083429253072611e-05, "loss": 1.3841, "mean_token_accuracy": 0.6531223158041636, "num_tokens": 1589317687.0, "step": 9481 }, { "entropy": 1.7045740981896718, "epoch": 1.041635769410343, "grad_norm": 0.6675848960876465, "learning_rate": 1.0832691675948004e-05, "loss": 1.3395, "mean_token_accuracy": 0.6758049378792444, "num_tokens": 1589477279.0, "step": 9482 }, { "entropy": 1.717594563961029, "epoch": 1.041745626321716, "grad_norm": 0.9027393460273743, "learning_rate": 1.0831090826465139e-05, "loss": 1.4093, "mean_token_accuracy": 0.6508564899365107, "num_tokens": 1589628011.0, "step": 9483 }, { "entropy": 1.5932787358760834, "epoch": 1.0418554832330889, "grad_norm": 0.5880972146987915, "learning_rate": 1.0829489982328168e-05, "loss": 1.3118, "mean_token_accuracy": 0.6709119379520416, "num_tokens": 1589802651.0, "step": 9484 }, { "entropy": 1.728572279214859, "epoch": 1.0419653401444617, "grad_norm": 0.629753589630127, "learning_rate": 1.0827889143587761e-05, "loss": 1.3639, "mean_token_accuracy": 0.6476372530062994, "num_tokens": 1590000495.0, "step": 9485 }, { "entropy": 1.7684464951356251, "epoch": 1.0420751970558348, "grad_norm": 0.7243369817733765, "learning_rate": 1.082628831029459e-05, "loss": 1.337, "mean_token_accuracy": 0.6554118494192759, "num_tokens": 1590147039.0, "step": 9486 }, { "entropy": 1.7246462404727936, "epoch": 1.0421850539672077, "grad_norm": 0.7119016647338867, "learning_rate": 1.0824687482499312e-05, "loss": 1.6279, "mean_token_accuracy": 0.626791646083196, "num_tokens": 1590345057.0, "step": 9487 }, { "entropy": 1.6625853478908539, "epoch": 1.0422949108785806, "grad_norm": 0.6488005518913269, "learning_rate": 1.0823086660252595e-05, "loss": 1.3786, "mean_token_accuracy": 0.6684714208046595, "num_tokens": 1590517394.0, "step": 9488 }, { "entropy": 1.6413832604885101, "epoch": 1.0424047677899535, "grad_norm": 0.6880024075508118, "learning_rate": 1.0821485843605114e-05, "loss": 1.3171, "mean_token_accuracy": 0.6710259020328522, "num_tokens": 1590676951.0, "step": 9489 }, { "entropy": 1.7233413557211559, "epoch": 1.0425146247013266, "grad_norm": 0.8669329881668091, "learning_rate": 1.0819885032607516e-05, "loss": 1.4437, "mean_token_accuracy": 0.6574445317188898, "num_tokens": 1590798961.0, "step": 9490 }, { "entropy": 1.7381323476632435, "epoch": 1.0426244816126995, "grad_norm": 0.7421051263809204, "learning_rate": 1.0818284227310479e-05, "loss": 1.3651, "mean_token_accuracy": 0.6702295790115992, "num_tokens": 1590925866.0, "step": 9491 }, { "entropy": 1.69859579205513, "epoch": 1.0427343385240724, "grad_norm": 0.6176729202270508, "learning_rate": 1.0816683427764665e-05, "loss": 1.3806, "mean_token_accuracy": 0.6713998268047968, "num_tokens": 1591079868.0, "step": 9492 }, { "entropy": 1.6749196946620941, "epoch": 1.0428441954354453, "grad_norm": 0.766416072845459, "learning_rate": 1.0815082634020737e-05, "loss": 1.3999, "mean_token_accuracy": 0.6613041559855143, "num_tokens": 1591232390.0, "step": 9493 }, { "entropy": 1.7065080106258392, "epoch": 1.0429540523468184, "grad_norm": 0.5942599177360535, "learning_rate": 1.0813481846129358e-05, "loss": 1.3596, "mean_token_accuracy": 0.6696609854698181, "num_tokens": 1591415037.0, "step": 9494 }, { "entropy": 1.6899320483207703, "epoch": 1.0430639092581913, "grad_norm": 0.6180544495582581, "learning_rate": 1.0811881064141201e-05, "loss": 1.4791, "mean_token_accuracy": 0.6475685685873032, "num_tokens": 1591633165.0, "step": 9495 }, { "entropy": 1.7490257620811462, "epoch": 1.0431737661695641, "grad_norm": 0.8704851269721985, "learning_rate": 1.0810280288106918e-05, "loss": 1.5085, "mean_token_accuracy": 0.6373362342516581, "num_tokens": 1591830572.0, "step": 9496 }, { "entropy": 1.7038207252820332, "epoch": 1.043283623080937, "grad_norm": 0.6665728092193604, "learning_rate": 1.0808679518077178e-05, "loss": 1.3859, "mean_token_accuracy": 0.6533089727163315, "num_tokens": 1592012205.0, "step": 9497 }, { "entropy": 1.6818484663963318, "epoch": 1.04339347999231, "grad_norm": 0.7445968985557556, "learning_rate": 1.0807078754102649e-05, "loss": 1.3426, "mean_token_accuracy": 0.662312775850296, "num_tokens": 1592147760.0, "step": 9498 }, { "entropy": 1.7490168611208599, "epoch": 1.043503336903683, "grad_norm": 0.6908554434776306, "learning_rate": 1.0805477996233988e-05, "loss": 1.409, "mean_token_accuracy": 0.649876077969869, "num_tokens": 1592273034.0, "step": 9499 }, { "entropy": 1.6453217168649037, "epoch": 1.043613193815056, "grad_norm": 0.7113070487976074, "learning_rate": 1.0803877244521863e-05, "loss": 1.3044, "mean_token_accuracy": 0.6699142803748449, "num_tokens": 1592419247.0, "step": 9500 }, { "entropy": 1.7486879030863445, "epoch": 1.0437230507264288, "grad_norm": 0.7228788137435913, "learning_rate": 1.0802276499016932e-05, "loss": 1.4574, "mean_token_accuracy": 0.6567816833655039, "num_tokens": 1592549064.0, "step": 9501 }, { "entropy": 1.7028538783391316, "epoch": 1.0438329076378017, "grad_norm": 0.7245208024978638, "learning_rate": 1.0800675759769861e-05, "loss": 1.4313, "mean_token_accuracy": 0.653529609243075, "num_tokens": 1592725321.0, "step": 9502 }, { "entropy": 1.6976373294989269, "epoch": 1.0439427645491748, "grad_norm": 0.5360068678855896, "learning_rate": 1.0799075026831317e-05, "loss": 1.4317, "mean_token_accuracy": 0.6476470828056335, "num_tokens": 1592938657.0, "step": 9503 }, { "entropy": 1.6830028196175892, "epoch": 1.0440526214605477, "grad_norm": 0.5777772665023804, "learning_rate": 1.0797474300251952e-05, "loss": 1.4256, "mean_token_accuracy": 0.6514229973157247, "num_tokens": 1593133783.0, "step": 9504 }, { "entropy": 1.6651501556237538, "epoch": 1.0441624783719206, "grad_norm": 0.6623477935791016, "learning_rate": 1.0795873580082434e-05, "loss": 1.4702, "mean_token_accuracy": 0.6620995352665583, "num_tokens": 1593338733.0, "step": 9505 }, { "entropy": 1.698107163111369, "epoch": 1.0442723352832934, "grad_norm": 0.7003724575042725, "learning_rate": 1.0794272866373431e-05, "loss": 1.5227, "mean_token_accuracy": 0.6455037742853165, "num_tokens": 1593586152.0, "step": 9506 }, { "entropy": 1.7461525400479634, "epoch": 1.0443821921946665, "grad_norm": 0.8416768908500671, "learning_rate": 1.0792672159175595e-05, "loss": 1.6376, "mean_token_accuracy": 0.6324817140897115, "num_tokens": 1593765469.0, "step": 9507 }, { "entropy": 1.7584339678287506, "epoch": 1.0444920491060394, "grad_norm": 0.7773275971412659, "learning_rate": 1.079107145853959e-05, "loss": 1.4037, "mean_token_accuracy": 0.6495963931083679, "num_tokens": 1593887086.0, "step": 9508 }, { "entropy": 1.715292066335678, "epoch": 1.0446019060174123, "grad_norm": 0.7239483594894409, "learning_rate": 1.0789470764516084e-05, "loss": 1.2976, "mean_token_accuracy": 0.6715022226174673, "num_tokens": 1594044689.0, "step": 9509 }, { "entropy": 1.6825797359148662, "epoch": 1.0447117629287852, "grad_norm": 0.7251731753349304, "learning_rate": 1.0787870077155728e-05, "loss": 1.2752, "mean_token_accuracy": 0.6671033104260763, "num_tokens": 1594163502.0, "step": 9510 }, { "entropy": 1.7215838134288788, "epoch": 1.044821619840158, "grad_norm": 0.7168553471565247, "learning_rate": 1.0786269396509193e-05, "loss": 1.2838, "mean_token_accuracy": 0.6646192222833633, "num_tokens": 1594317624.0, "step": 9511 }, { "entropy": 1.718077729145686, "epoch": 1.0449314767515312, "grad_norm": 0.6914111375808716, "learning_rate": 1.0784668722627134e-05, "loss": 1.3862, "mean_token_accuracy": 0.6595685482025146, "num_tokens": 1594465247.0, "step": 9512 }, { "entropy": 1.7051511108875275, "epoch": 1.045041333662904, "grad_norm": 0.7132536768913269, "learning_rate": 1.0783068055560212e-05, "loss": 1.3335, "mean_token_accuracy": 0.6642508854468664, "num_tokens": 1594646030.0, "step": 9513 }, { "entropy": 1.7331341405709584, "epoch": 1.045151190574277, "grad_norm": 0.760560929775238, "learning_rate": 1.0781467395359086e-05, "loss": 1.4119, "mean_token_accuracy": 0.6549452046553293, "num_tokens": 1594796111.0, "step": 9514 }, { "entropy": 1.6813835899035137, "epoch": 1.0452610474856499, "grad_norm": 0.6038804650306702, "learning_rate": 1.0779866742074427e-05, "loss": 1.3664, "mean_token_accuracy": 0.655574768781662, "num_tokens": 1594951516.0, "step": 9515 }, { "entropy": 1.753594805796941, "epoch": 1.045370904397023, "grad_norm": 0.6593688130378723, "learning_rate": 1.0778266095756877e-05, "loss": 1.3677, "mean_token_accuracy": 0.64958788951238, "num_tokens": 1595102148.0, "step": 9516 }, { "entropy": 1.696462760368983, "epoch": 1.0454807613083958, "grad_norm": 0.8179159164428711, "learning_rate": 1.077666545645711e-05, "loss": 1.342, "mean_token_accuracy": 0.6669958929220835, "num_tokens": 1595253275.0, "step": 9517 }, { "entropy": 1.7647280593713124, "epoch": 1.0455906182197687, "grad_norm": 0.7071588039398193, "learning_rate": 1.077506482422578e-05, "loss": 1.4662, "mean_token_accuracy": 0.6489268441994985, "num_tokens": 1595420875.0, "step": 9518 }, { "entropy": 1.661277323961258, "epoch": 1.0457004751311416, "grad_norm": 0.6176816821098328, "learning_rate": 1.0773464199113545e-05, "loss": 1.4074, "mean_token_accuracy": 0.6495037625233332, "num_tokens": 1595639530.0, "step": 9519 }, { "entropy": 1.6793854931990306, "epoch": 1.0458103320425147, "grad_norm": 0.6099405288696289, "learning_rate": 1.0771863581171067e-05, "loss": 1.4901, "mean_token_accuracy": 0.638078898191452, "num_tokens": 1595805557.0, "step": 9520 }, { "entropy": 1.6664031247297924, "epoch": 1.0459201889538876, "grad_norm": 0.7855103611946106, "learning_rate": 1.0770262970449007e-05, "loss": 1.2644, "mean_token_accuracy": 0.6843874802192053, "num_tokens": 1596024907.0, "step": 9521 }, { "entropy": 1.7892861366271973, "epoch": 1.0460300458652605, "grad_norm": 0.6650031208992004, "learning_rate": 1.0768662366998017e-05, "loss": 1.5564, "mean_token_accuracy": 0.6340744346380234, "num_tokens": 1596223113.0, "step": 9522 }, { "entropy": 1.6714712381362915, "epoch": 1.0461399027766334, "grad_norm": 0.6163212656974792, "learning_rate": 1.0767061770868758e-05, "loss": 1.4224, "mean_token_accuracy": 0.6527364104986191, "num_tokens": 1596404722.0, "step": 9523 }, { "entropy": 1.7213294704755147, "epoch": 1.0462497596880063, "grad_norm": 0.7242403030395508, "learning_rate": 1.0765461182111894e-05, "loss": 1.4046, "mean_token_accuracy": 0.6574538151423136, "num_tokens": 1596556498.0, "step": 9524 }, { "entropy": 1.70473250746727, "epoch": 1.0463596165993794, "grad_norm": 0.8077401518821716, "learning_rate": 1.0763860600778073e-05, "loss": 1.3111, "mean_token_accuracy": 0.6717382967472076, "num_tokens": 1596692410.0, "step": 9525 }, { "entropy": 1.65737051765124, "epoch": 1.0464694735107523, "grad_norm": 0.6047679781913757, "learning_rate": 1.0762260026917957e-05, "loss": 1.5421, "mean_token_accuracy": 0.6482739100853602, "num_tokens": 1596893524.0, "step": 9526 }, { "entropy": 1.6772952377796173, "epoch": 1.0465793304221251, "grad_norm": 0.6844344139099121, "learning_rate": 1.076065946058221e-05, "loss": 1.3097, "mean_token_accuracy": 0.6696969568729401, "num_tokens": 1597033357.0, "step": 9527 }, { "entropy": 1.690885325272878, "epoch": 1.046689187333498, "grad_norm": 0.722217857837677, "learning_rate": 1.0759058901821478e-05, "loss": 1.4674, "mean_token_accuracy": 0.6511874943971634, "num_tokens": 1597219302.0, "step": 9528 }, { "entropy": 1.7689124047756195, "epoch": 1.0467990442448711, "grad_norm": 0.7030705809593201, "learning_rate": 1.0757458350686423e-05, "loss": 1.3782, "mean_token_accuracy": 0.6555833717187246, "num_tokens": 1597354184.0, "step": 9529 }, { "entropy": 1.7530174255371094, "epoch": 1.046908901156244, "grad_norm": 0.6865312457084656, "learning_rate": 1.0755857807227705e-05, "loss": 1.2826, "mean_token_accuracy": 0.6668087194363276, "num_tokens": 1597483504.0, "step": 9530 }, { "entropy": 1.664690375328064, "epoch": 1.047018758067617, "grad_norm": 0.7193317413330078, "learning_rate": 1.0754257271495976e-05, "loss": 1.4993, "mean_token_accuracy": 0.6674816509087881, "num_tokens": 1597629526.0, "step": 9531 }, { "entropy": 1.6781654755274455, "epoch": 1.0471286149789898, "grad_norm": 0.8282174468040466, "learning_rate": 1.0752656743541892e-05, "loss": 1.2977, "mean_token_accuracy": 0.6744814167420069, "num_tokens": 1597772249.0, "step": 9532 }, { "entropy": 1.653449535369873, "epoch": 1.047238471890363, "grad_norm": 0.6748833060264587, "learning_rate": 1.0751056223416116e-05, "loss": 1.4044, "mean_token_accuracy": 0.6702044556538264, "num_tokens": 1597951987.0, "step": 9533 }, { "entropy": 1.6903007527192433, "epoch": 1.0473483288017358, "grad_norm": 0.7806357741355896, "learning_rate": 1.074945571116929e-05, "loss": 1.3952, "mean_token_accuracy": 0.6694519221782684, "num_tokens": 1598150824.0, "step": 9534 }, { "entropy": 1.681130548318227, "epoch": 1.0474581857131087, "grad_norm": 0.6809220314025879, "learning_rate": 1.0747855206852083e-05, "loss": 1.3198, "mean_token_accuracy": 0.6696507086356481, "num_tokens": 1598324940.0, "step": 9535 }, { "entropy": 1.7262980838616688, "epoch": 1.0475680426244816, "grad_norm": 0.763521671295166, "learning_rate": 1.0746254710515148e-05, "loss": 1.4035, "mean_token_accuracy": 0.6502132068077723, "num_tokens": 1598501856.0, "step": 9536 }, { "entropy": 1.738362580537796, "epoch": 1.0476778995358544, "grad_norm": 0.6573911309242249, "learning_rate": 1.0744654222209132e-05, "loss": 1.476, "mean_token_accuracy": 0.65400230884552, "num_tokens": 1598670504.0, "step": 9537 }, { "entropy": 1.6924460927645366, "epoch": 1.0477877564472275, "grad_norm": 0.8024819493293762, "learning_rate": 1.0743053741984692e-05, "loss": 1.3561, "mean_token_accuracy": 0.6752625207106272, "num_tokens": 1598838975.0, "step": 9538 }, { "entropy": 1.6892497539520264, "epoch": 1.0478976133586004, "grad_norm": 0.5688782930374146, "learning_rate": 1.0741453269892495e-05, "loss": 1.3445, "mean_token_accuracy": 0.6590389758348465, "num_tokens": 1599027494.0, "step": 9539 }, { "entropy": 1.6760010123252869, "epoch": 1.0480074702699733, "grad_norm": 0.6238588094711304, "learning_rate": 1.0739852805983177e-05, "loss": 1.3616, "mean_token_accuracy": 0.6570176730553309, "num_tokens": 1599237306.0, "step": 9540 }, { "entropy": 1.7172687649726868, "epoch": 1.0481173271813462, "grad_norm": 0.6968215703964233, "learning_rate": 1.0738252350307403e-05, "loss": 1.3149, "mean_token_accuracy": 0.669064129392306, "num_tokens": 1599419358.0, "step": 9541 }, { "entropy": 1.7211929361025493, "epoch": 1.0482271840927193, "grad_norm": 0.8353010416030884, "learning_rate": 1.0736651902915827e-05, "loss": 1.3156, "mean_token_accuracy": 0.6618754814068476, "num_tokens": 1599527770.0, "step": 9542 }, { "entropy": 1.6436882019042969, "epoch": 1.0483370410040922, "grad_norm": 0.6856302618980408, "learning_rate": 1.0735051463859097e-05, "loss": 1.2963, "mean_token_accuracy": 0.6674405237038931, "num_tokens": 1599705089.0, "step": 9543 }, { "entropy": 1.6677443285783131, "epoch": 1.048446897915465, "grad_norm": 0.6206963658332825, "learning_rate": 1.0733451033187866e-05, "loss": 1.3537, "mean_token_accuracy": 0.6627790033817291, "num_tokens": 1599908469.0, "step": 9544 }, { "entropy": 1.669652263323466, "epoch": 1.048556754826838, "grad_norm": 0.6658746004104614, "learning_rate": 1.0731850610952796e-05, "loss": 1.3356, "mean_token_accuracy": 0.6593130081892014, "num_tokens": 1600099601.0, "step": 9545 }, { "entropy": 1.6830444832642872, "epoch": 1.048666611738211, "grad_norm": 0.7324784994125366, "learning_rate": 1.0730250197204528e-05, "loss": 1.5043, "mean_token_accuracy": 0.6441976577043533, "num_tokens": 1600322089.0, "step": 9546 }, { "entropy": 1.6887101829051971, "epoch": 1.048776468649584, "grad_norm": 0.8754149079322815, "learning_rate": 1.0728649791993722e-05, "loss": 1.7007, "mean_token_accuracy": 0.6487985526522001, "num_tokens": 1600493611.0, "step": 9547 }, { "entropy": 1.679084559281667, "epoch": 1.0488863255609568, "grad_norm": 0.8017110228538513, "learning_rate": 1.0727049395371029e-05, "loss": 1.2833, "mean_token_accuracy": 0.6837884237368902, "num_tokens": 1600609476.0, "step": 9548 }, { "entropy": 1.7725566426912944, "epoch": 1.0489961824723297, "grad_norm": 0.7964447736740112, "learning_rate": 1.07254490073871e-05, "loss": 1.386, "mean_token_accuracy": 0.6649324297904968, "num_tokens": 1600749785.0, "step": 9549 }, { "entropy": 1.717088649670283, "epoch": 1.0491060393837026, "grad_norm": 0.7279574871063232, "learning_rate": 1.072384862809258e-05, "loss": 1.3735, "mean_token_accuracy": 0.6538225511709849, "num_tokens": 1600931046.0, "step": 9550 }, { "entropy": 1.7207684218883514, "epoch": 1.0492158962950757, "grad_norm": 0.7171173095703125, "learning_rate": 1.0722248257538135e-05, "loss": 1.384, "mean_token_accuracy": 0.6570334682861964, "num_tokens": 1601116183.0, "step": 9551 }, { "entropy": 1.6767055094242096, "epoch": 1.0493257532064486, "grad_norm": 0.7148609757423401, "learning_rate": 1.0720647895774402e-05, "loss": 1.2768, "mean_token_accuracy": 0.674123153090477, "num_tokens": 1601237406.0, "step": 9552 }, { "entropy": 1.7004509270191193, "epoch": 1.0494356101178215, "grad_norm": 0.5615423321723938, "learning_rate": 1.0719047542852037e-05, "loss": 1.3725, "mean_token_accuracy": 0.6597498307625452, "num_tokens": 1601434893.0, "step": 9553 }, { "entropy": 1.6728576719760895, "epoch": 1.0495454670291944, "grad_norm": 0.6771143078804016, "learning_rate": 1.0717447198821693e-05, "loss": 1.4928, "mean_token_accuracy": 0.6323941498994827, "num_tokens": 1601650254.0, "step": 9554 }, { "entropy": 1.6481030186017354, "epoch": 1.0496553239405675, "grad_norm": 0.6681134104728699, "learning_rate": 1.0715846863734019e-05, "loss": 1.4834, "mean_token_accuracy": 0.6682325402895609, "num_tokens": 1601806562.0, "step": 9555 }, { "entropy": 1.6618581414222717, "epoch": 1.0497651808519404, "grad_norm": 0.6858922839164734, "learning_rate": 1.071424653763966e-05, "loss": 1.3277, "mean_token_accuracy": 0.6726219256718954, "num_tokens": 1601992968.0, "step": 9556 }, { "entropy": 1.6976956526438396, "epoch": 1.0498750377633133, "grad_norm": 0.6189122200012207, "learning_rate": 1.0712646220589274e-05, "loss": 1.3589, "mean_token_accuracy": 0.6756937205791473, "num_tokens": 1602146119.0, "step": 9557 }, { "entropy": 1.7072361807028453, "epoch": 1.0499848946746861, "grad_norm": 0.8546844720840454, "learning_rate": 1.07110459126335e-05, "loss": 1.4876, "mean_token_accuracy": 0.6624875615040461, "num_tokens": 1602380098.0, "step": 9558 }, { "entropy": 1.6925819118817647, "epoch": 1.0500947515860592, "grad_norm": 0.6426894068717957, "learning_rate": 1.0709445613822997e-05, "loss": 1.3083, "mean_token_accuracy": 0.664939691623052, "num_tokens": 1602549743.0, "step": 9559 }, { "entropy": 1.731207271416982, "epoch": 1.0502046084974321, "grad_norm": 0.8742700219154358, "learning_rate": 1.0707845324208407e-05, "loss": 1.2079, "mean_token_accuracy": 0.6823851068814596, "num_tokens": 1602677648.0, "step": 9560 }, { "entropy": 1.7196010947227478, "epoch": 1.050314465408805, "grad_norm": 0.6880813241004944, "learning_rate": 1.0706245043840381e-05, "loss": 1.5014, "mean_token_accuracy": 0.6513290057579676, "num_tokens": 1602849621.0, "step": 9561 }, { "entropy": 1.6641053060690563, "epoch": 1.050424322320178, "grad_norm": 0.6426426768302917, "learning_rate": 1.070464477276957e-05, "loss": 1.5101, "mean_token_accuracy": 0.6436965962251028, "num_tokens": 1603025923.0, "step": 9562 }, { "entropy": 1.7633658051490784, "epoch": 1.0505341792315508, "grad_norm": 0.6683143973350525, "learning_rate": 1.0703044511046617e-05, "loss": 1.4956, "mean_token_accuracy": 0.6683969696362814, "num_tokens": 1603184478.0, "step": 9563 }, { "entropy": 1.6500650942325592, "epoch": 1.050644036142924, "grad_norm": 0.669915497303009, "learning_rate": 1.070144425872217e-05, "loss": 1.5075, "mean_token_accuracy": 0.6701604525248209, "num_tokens": 1603368813.0, "step": 9564 }, { "entropy": 1.7405575414498646, "epoch": 1.0507538930542968, "grad_norm": 0.683010458946228, "learning_rate": 1.069984401584688e-05, "loss": 1.4898, "mean_token_accuracy": 0.6517573595046997, "num_tokens": 1603532608.0, "step": 9565 }, { "entropy": 1.6904991964499156, "epoch": 1.0508637499656697, "grad_norm": 0.7113816738128662, "learning_rate": 1.069824378247139e-05, "loss": 1.3158, "mean_token_accuracy": 0.6657985001802444, "num_tokens": 1603688565.0, "step": 9566 }, { "entropy": 1.6928423345088959, "epoch": 1.0509736068770426, "grad_norm": 0.6442391276359558, "learning_rate": 1.0696643558646346e-05, "loss": 1.4616, "mean_token_accuracy": 0.6492231587568918, "num_tokens": 1603907279.0, "step": 9567 }, { "entropy": 1.7089947859446208, "epoch": 1.0510834637884157, "grad_norm": 0.698707103729248, "learning_rate": 1.0695043344422402e-05, "loss": 1.4134, "mean_token_accuracy": 0.6618327150742213, "num_tokens": 1604075541.0, "step": 9568 }, { "entropy": 1.672034611304601, "epoch": 1.0511933206997885, "grad_norm": 0.5879615545272827, "learning_rate": 1.0693443139850194e-05, "loss": 1.3688, "mean_token_accuracy": 0.670054112871488, "num_tokens": 1604239307.0, "step": 9569 }, { "entropy": 1.7428979178269703, "epoch": 1.0513031776111614, "grad_norm": 0.6346009969711304, "learning_rate": 1.0691842944980373e-05, "loss": 1.3989, "mean_token_accuracy": 0.6469153513511022, "num_tokens": 1604416450.0, "step": 9570 }, { "entropy": 1.6811433633168538, "epoch": 1.0514130345225343, "grad_norm": 0.6256464719772339, "learning_rate": 1.0690242759863587e-05, "loss": 1.367, "mean_token_accuracy": 0.6655552933613459, "num_tokens": 1604583184.0, "step": 9571 }, { "entropy": 1.7027230560779572, "epoch": 1.0515228914339074, "grad_norm": 0.6681546568870544, "learning_rate": 1.0688642584550477e-05, "loss": 1.3715, "mean_token_accuracy": 0.6578748822212219, "num_tokens": 1604805908.0, "step": 9572 }, { "entropy": 1.7179262538750966, "epoch": 1.0516327483452803, "grad_norm": 0.6444426774978638, "learning_rate": 1.0687042419091688e-05, "loss": 1.5337, "mean_token_accuracy": 0.6471273948748907, "num_tokens": 1605033262.0, "step": 9573 }, { "entropy": 1.7171581784884136, "epoch": 1.0517426052566532, "grad_norm": 0.6714052557945251, "learning_rate": 1.0685442263537867e-05, "loss": 1.427, "mean_token_accuracy": 0.6538668225208918, "num_tokens": 1605187982.0, "step": 9574 }, { "entropy": 1.698363830645879, "epoch": 1.051852462168026, "grad_norm": 0.6877774000167847, "learning_rate": 1.0683842117939655e-05, "loss": 1.3865, "mean_token_accuracy": 0.6598330289125443, "num_tokens": 1605342223.0, "step": 9575 }, { "entropy": 1.7184595068295796, "epoch": 1.0519623190793992, "grad_norm": 0.5890787243843079, "learning_rate": 1.0682241982347697e-05, "loss": 1.5158, "mean_token_accuracy": 0.6271322419246038, "num_tokens": 1605559031.0, "step": 9576 }, { "entropy": 1.7405591209729512, "epoch": 1.052072175990772, "grad_norm": 0.7134595513343811, "learning_rate": 1.0680641856812638e-05, "loss": 1.4408, "mean_token_accuracy": 0.6583415667215983, "num_tokens": 1605702610.0, "step": 9577 }, { "entropy": 1.6781224409739177, "epoch": 1.052182032902145, "grad_norm": 0.72991943359375, "learning_rate": 1.067904174138512e-05, "loss": 1.4711, "mean_token_accuracy": 0.6530647377173106, "num_tokens": 1605865046.0, "step": 9578 }, { "entropy": 1.7373049159844716, "epoch": 1.0522918898135178, "grad_norm": 0.9875661134719849, "learning_rate": 1.0677441636115786e-05, "loss": 1.4841, "mean_token_accuracy": 0.6617722262938818, "num_tokens": 1605989563.0, "step": 9579 }, { "entropy": 1.7485832075277965, "epoch": 1.0524017467248907, "grad_norm": 0.7092475295066833, "learning_rate": 1.0675841541055283e-05, "loss": 1.4333, "mean_token_accuracy": 0.6555873850981394, "num_tokens": 1606154356.0, "step": 9580 }, { "entropy": 1.6484754184881847, "epoch": 1.0525116036362638, "grad_norm": 0.7261121869087219, "learning_rate": 1.0674241456254244e-05, "loss": 1.4312, "mean_token_accuracy": 0.6665322482585907, "num_tokens": 1606317868.0, "step": 9581 }, { "entropy": 1.6414496501286824, "epoch": 1.0526214605476367, "grad_norm": 0.6463345289230347, "learning_rate": 1.0672641381763315e-05, "loss": 1.2341, "mean_token_accuracy": 0.6869133959213892, "num_tokens": 1606467019.0, "step": 9582 }, { "entropy": 1.7304282387097676, "epoch": 1.0527313174590096, "grad_norm": 0.6692349910736084, "learning_rate": 1.0671041317633144e-05, "loss": 1.2577, "mean_token_accuracy": 0.6683614204327265, "num_tokens": 1606592230.0, "step": 9583 }, { "entropy": 1.6495085159937541, "epoch": 1.0528411743703825, "grad_norm": 0.6861699223518372, "learning_rate": 1.0669441263914364e-05, "loss": 1.2424, "mean_token_accuracy": 0.6809816161791483, "num_tokens": 1606726297.0, "step": 9584 }, { "entropy": 1.6947397887706757, "epoch": 1.0529510312817556, "grad_norm": 0.6416590213775635, "learning_rate": 1.066784122065762e-05, "loss": 1.3894, "mean_token_accuracy": 0.6536195824543635, "num_tokens": 1606944421.0, "step": 9585 }, { "entropy": 1.7330600718657176, "epoch": 1.0530608881931285, "grad_norm": 0.7335972189903259, "learning_rate": 1.0666241187913553e-05, "loss": 1.415, "mean_token_accuracy": 0.6541456083456675, "num_tokens": 1607071607.0, "step": 9586 }, { "entropy": 1.7887234091758728, "epoch": 1.0531707451045014, "grad_norm": 0.8420895338058472, "learning_rate": 1.06646411657328e-05, "loss": 1.6066, "mean_token_accuracy": 0.6439038117726644, "num_tokens": 1607230227.0, "step": 9587 }, { "entropy": 1.7334937155246735, "epoch": 1.0532806020158743, "grad_norm": 0.7040529251098633, "learning_rate": 1.0663041154166e-05, "loss": 1.5178, "mean_token_accuracy": 0.6528173585732778, "num_tokens": 1607420300.0, "step": 9588 }, { "entropy": 1.7212029000123341, "epoch": 1.0533904589272474, "grad_norm": 0.8923618793487549, "learning_rate": 1.0661441153263801e-05, "loss": 1.6135, "mean_token_accuracy": 0.6504168063402176, "num_tokens": 1607561102.0, "step": 9589 }, { "entropy": 1.7322006324927013, "epoch": 1.0535003158386202, "grad_norm": 0.6851010918617249, "learning_rate": 1.0659841163076834e-05, "loss": 1.2743, "mean_token_accuracy": 0.6731189688046774, "num_tokens": 1607681433.0, "step": 9590 }, { "entropy": 1.6935873130957286, "epoch": 1.0536101727499931, "grad_norm": 0.6381398439407349, "learning_rate": 1.0658241183655741e-05, "loss": 1.3826, "mean_token_accuracy": 0.6610639144976934, "num_tokens": 1607883422.0, "step": 9591 }, { "entropy": 1.7271686891714733, "epoch": 1.053720029661366, "grad_norm": 0.6199973225593567, "learning_rate": 1.0656641215051165e-05, "loss": 1.5337, "mean_token_accuracy": 0.6284688164790472, "num_tokens": 1608114811.0, "step": 9592 }, { "entropy": 1.7459026277065277, "epoch": 1.053829886572739, "grad_norm": 0.9864705204963684, "learning_rate": 1.0655041257313735e-05, "loss": 1.3567, "mean_token_accuracy": 0.6629860103130341, "num_tokens": 1608251495.0, "step": 9593 }, { "entropy": 1.7121766805648804, "epoch": 1.053939743484112, "grad_norm": 0.7664281725883484, "learning_rate": 1.0653441310494092e-05, "loss": 1.287, "mean_token_accuracy": 0.6731170068184534, "num_tokens": 1608369641.0, "step": 9594 }, { "entropy": 1.6202069719632466, "epoch": 1.054049600395485, "grad_norm": 0.5870428681373596, "learning_rate": 1.0651841374642882e-05, "loss": 1.3897, "mean_token_accuracy": 0.6637488653262457, "num_tokens": 1608579940.0, "step": 9595 }, { "entropy": 1.727886547644933, "epoch": 1.0541594573068578, "grad_norm": 0.844563364982605, "learning_rate": 1.065024144981073e-05, "loss": 1.5118, "mean_token_accuracy": 0.6562522749106089, "num_tokens": 1608758095.0, "step": 9596 }, { "entropy": 1.7154951989650726, "epoch": 1.0542693142182307, "grad_norm": 0.7168102860450745, "learning_rate": 1.064864153604828e-05, "loss": 1.5547, "mean_token_accuracy": 0.641726886232694, "num_tokens": 1608920813.0, "step": 9597 }, { "entropy": 1.7120743890603383, "epoch": 1.0543791711296038, "grad_norm": 0.8167440891265869, "learning_rate": 1.0647041633406168e-05, "loss": 1.4122, "mean_token_accuracy": 0.6548740615447363, "num_tokens": 1609056254.0, "step": 9598 }, { "entropy": 1.6860410471757252, "epoch": 1.0544890280409767, "grad_norm": 0.6746973395347595, "learning_rate": 1.0645441741935029e-05, "loss": 1.4384, "mean_token_accuracy": 0.6527743736902872, "num_tokens": 1609290219.0, "step": 9599 }, { "entropy": 1.653940111398697, "epoch": 1.0545988849523495, "grad_norm": 0.7178821563720703, "learning_rate": 1.0643841861685498e-05, "loss": 1.3891, "mean_token_accuracy": 0.6595231195290884, "num_tokens": 1609491031.0, "step": 9600 }, { "entropy": 1.6904591818650563, "epoch": 1.0547087418637224, "grad_norm": 0.6442198157310486, "learning_rate": 1.0642241992708215e-05, "loss": 1.4274, "mean_token_accuracy": 0.6542298197746277, "num_tokens": 1609653550.0, "step": 9601 }, { "entropy": 1.7299334208170574, "epoch": 1.0548185987750955, "grad_norm": 0.7151714563369751, "learning_rate": 1.0640642135053807e-05, "loss": 1.354, "mean_token_accuracy": 0.6594923585653305, "num_tokens": 1609798123.0, "step": 9602 }, { "entropy": 1.7396175960699718, "epoch": 1.0549284556864684, "grad_norm": 0.7707934975624084, "learning_rate": 1.0639042288772914e-05, "loss": 1.3926, "mean_token_accuracy": 0.6571768969297409, "num_tokens": 1609941400.0, "step": 9603 }, { "entropy": 1.768477698167165, "epoch": 1.0550383125978413, "grad_norm": 0.6910679936408997, "learning_rate": 1.0637442453916173e-05, "loss": 1.5426, "mean_token_accuracy": 0.6517347097396851, "num_tokens": 1610105272.0, "step": 9604 }, { "entropy": 1.6502993007500966, "epoch": 1.0551481695092142, "grad_norm": 0.6803098320960999, "learning_rate": 1.0635842630534215e-05, "loss": 1.4061, "mean_token_accuracy": 0.6640617698431015, "num_tokens": 1610247173.0, "step": 9605 }, { "entropy": 1.6991427938143413, "epoch": 1.055258026420587, "grad_norm": 0.6311363577842712, "learning_rate": 1.063424281867767e-05, "loss": 1.4876, "mean_token_accuracy": 0.6401338577270508, "num_tokens": 1610423935.0, "step": 9606 }, { "entropy": 1.6480543613433838, "epoch": 1.0553678833319602, "grad_norm": 0.8264037370681763, "learning_rate": 1.063264301839718e-05, "loss": 1.5157, "mean_token_accuracy": 0.6646409928798676, "num_tokens": 1610616702.0, "step": 9607 }, { "entropy": 1.7201192478338878, "epoch": 1.055477740243333, "grad_norm": 0.7642366886138916, "learning_rate": 1.0631043229743367e-05, "loss": 1.3135, "mean_token_accuracy": 0.6787395030260086, "num_tokens": 1610783350.0, "step": 9608 }, { "entropy": 1.7671165466308594, "epoch": 1.055587597154706, "grad_norm": 0.737358570098877, "learning_rate": 1.0629443452766872e-05, "loss": 1.5055, "mean_token_accuracy": 0.6605880657831827, "num_tokens": 1610956661.0, "step": 9609 }, { "entropy": 1.7348363002141316, "epoch": 1.0556974540660788, "grad_norm": 0.7109798192977905, "learning_rate": 1.0627843687518326e-05, "loss": 1.4493, "mean_token_accuracy": 0.6522191713253657, "num_tokens": 1611087838.0, "step": 9610 }, { "entropy": 1.6798150340716045, "epoch": 1.055807310977452, "grad_norm": 0.6326707601547241, "learning_rate": 1.062624393404836e-05, "loss": 1.3727, "mean_token_accuracy": 0.6628447274367014, "num_tokens": 1611244410.0, "step": 9611 }, { "entropy": 1.6709414720535278, "epoch": 1.0559171678888248, "grad_norm": 0.7551990747451782, "learning_rate": 1.06246441924076e-05, "loss": 1.2086, "mean_token_accuracy": 0.6848486711581548, "num_tokens": 1611363852.0, "step": 9612 }, { "entropy": 1.6593234837055206, "epoch": 1.0560270248001977, "grad_norm": 0.7147453427314758, "learning_rate": 1.062304446264669e-05, "loss": 1.331, "mean_token_accuracy": 0.6691244542598724, "num_tokens": 1611543725.0, "step": 9613 }, { "entropy": 1.7118070423603058, "epoch": 1.0561368817115706, "grad_norm": 0.6040416359901428, "learning_rate": 1.0621444744816247e-05, "loss": 1.3854, "mean_token_accuracy": 0.6613721499840418, "num_tokens": 1611719789.0, "step": 9614 }, { "entropy": 1.693102478981018, "epoch": 1.0562467386229437, "grad_norm": 0.8449906706809998, "learning_rate": 1.061984503896691e-05, "loss": 1.4027, "mean_token_accuracy": 0.684621180097262, "num_tokens": 1611874620.0, "step": 9615 }, { "entropy": 1.6907888650894165, "epoch": 1.0563565955343166, "grad_norm": 0.7158522605895996, "learning_rate": 1.0618245345149308e-05, "loss": 1.2702, "mean_token_accuracy": 0.672495091954867, "num_tokens": 1611997469.0, "step": 9616 }, { "entropy": 1.7449814677238464, "epoch": 1.0564664524456895, "grad_norm": 0.7022985816001892, "learning_rate": 1.0616645663414064e-05, "loss": 1.3994, "mean_token_accuracy": 0.6574458430210749, "num_tokens": 1612189165.0, "step": 9617 }, { "entropy": 1.6879704495271046, "epoch": 1.0565763093570624, "grad_norm": 0.7382509112358093, "learning_rate": 1.0615045993811813e-05, "loss": 1.288, "mean_token_accuracy": 0.6710261305173238, "num_tokens": 1612327205.0, "step": 9618 }, { "entropy": 1.7922049760818481, "epoch": 1.0566861662684353, "grad_norm": 0.7976166009902954, "learning_rate": 1.0613446336393187e-05, "loss": 1.5642, "mean_token_accuracy": 0.6370315402746201, "num_tokens": 1612482869.0, "step": 9619 }, { "entropy": 1.7130910356839497, "epoch": 1.0567960231798084, "grad_norm": 0.6342209577560425, "learning_rate": 1.0611846691208805e-05, "loss": 1.3953, "mean_token_accuracy": 0.6711895515521368, "num_tokens": 1612653926.0, "step": 9620 }, { "entropy": 1.68388170003891, "epoch": 1.0569058800911812, "grad_norm": 0.6616874933242798, "learning_rate": 1.0610247058309305e-05, "loss": 1.3314, "mean_token_accuracy": 0.6681681573390961, "num_tokens": 1612792712.0, "step": 9621 }, { "entropy": 1.6615887383619945, "epoch": 1.0570157370025541, "grad_norm": 0.6471387147903442, "learning_rate": 1.0608647437745308e-05, "loss": 1.5439, "mean_token_accuracy": 0.622841422756513, "num_tokens": 1613069195.0, "step": 9622 }, { "entropy": 1.689245601495107, "epoch": 1.057125593913927, "grad_norm": 0.7069512009620667, "learning_rate": 1.0607047829567443e-05, "loss": 1.3479, "mean_token_accuracy": 0.6640896399815878, "num_tokens": 1613260754.0, "step": 9623 }, { "entropy": 1.703204204638799, "epoch": 1.0572354508253001, "grad_norm": 0.7228965759277344, "learning_rate": 1.0605448233826338e-05, "loss": 1.2755, "mean_token_accuracy": 0.6698009570439657, "num_tokens": 1613369068.0, "step": 9624 }, { "entropy": 1.6650657554467518, "epoch": 1.057345307736673, "grad_norm": 0.661736786365509, "learning_rate": 1.0603848650572616e-05, "loss": 1.5366, "mean_token_accuracy": 0.6506972561279932, "num_tokens": 1613545729.0, "step": 9625 }, { "entropy": 1.653905838727951, "epoch": 1.057455164648046, "grad_norm": 0.725866973400116, "learning_rate": 1.0602249079856905e-05, "loss": 1.4377, "mean_token_accuracy": 0.6448950469493866, "num_tokens": 1613757396.0, "step": 9626 }, { "entropy": 1.6793735524018605, "epoch": 1.0575650215594188, "grad_norm": 0.6681774854660034, "learning_rate": 1.0600649521729836e-05, "loss": 1.4342, "mean_token_accuracy": 0.6574407368898392, "num_tokens": 1613895809.0, "step": 9627 }, { "entropy": 1.7233494420846303, "epoch": 1.057674878470792, "grad_norm": 0.617106556892395, "learning_rate": 1.0599049976242028e-05, "loss": 1.3149, "mean_token_accuracy": 0.6670562823613485, "num_tokens": 1614054222.0, "step": 9628 }, { "entropy": 1.7311593393484752, "epoch": 1.0577847353821648, "grad_norm": 0.791827380657196, "learning_rate": 1.0597450443444106e-05, "loss": 1.3224, "mean_token_accuracy": 0.6638512363036474, "num_tokens": 1614246882.0, "step": 9629 }, { "entropy": 1.7481309374173482, "epoch": 1.0578945922935377, "grad_norm": 0.6565181016921997, "learning_rate": 1.0595850923386699e-05, "loss": 1.5325, "mean_token_accuracy": 0.6478005150953928, "num_tokens": 1614418530.0, "step": 9630 }, { "entropy": 1.7112793425718944, "epoch": 1.0580044492049105, "grad_norm": 0.8694975972175598, "learning_rate": 1.0594251416120426e-05, "loss": 1.5551, "mean_token_accuracy": 0.6495432009299597, "num_tokens": 1614553386.0, "step": 9631 }, { "entropy": 1.708936999241511, "epoch": 1.0581143061162837, "grad_norm": 0.6418928503990173, "learning_rate": 1.0592651921695912e-05, "loss": 1.4058, "mean_token_accuracy": 0.658669908841451, "num_tokens": 1614708336.0, "step": 9632 }, { "entropy": 1.6334985593954723, "epoch": 1.0582241630276565, "grad_norm": 0.7269279360771179, "learning_rate": 1.0591052440163783e-05, "loss": 1.2101, "mean_token_accuracy": 0.6838384419679642, "num_tokens": 1614846838.0, "step": 9633 }, { "entropy": 1.7367220322291057, "epoch": 1.0583340199390294, "grad_norm": 0.930919349193573, "learning_rate": 1.0589452971574659e-05, "loss": 1.1158, "mean_token_accuracy": 0.6759131153424581, "num_tokens": 1615008974.0, "step": 9634 }, { "entropy": 1.7149739066759746, "epoch": 1.0584438768504023, "grad_norm": 0.7244778871536255, "learning_rate": 1.0587853515979163e-05, "loss": 1.2775, "mean_token_accuracy": 0.6694569289684296, "num_tokens": 1615125425.0, "step": 9635 }, { "entropy": 1.7364940841992695, "epoch": 1.0585537337617752, "grad_norm": 0.7459930181503296, "learning_rate": 1.0586254073427919e-05, "loss": 1.5224, "mean_token_accuracy": 0.6431458294391632, "num_tokens": 1615289010.0, "step": 9636 }, { "entropy": 1.774601896603902, "epoch": 1.0586635906731483, "grad_norm": 0.6885215640068054, "learning_rate": 1.0584654643971546e-05, "loss": 1.5666, "mean_token_accuracy": 0.6418042729298273, "num_tokens": 1615450775.0, "step": 9637 }, { "entropy": 1.694488286972046, "epoch": 1.0587734475845212, "grad_norm": 0.6608237624168396, "learning_rate": 1.0583055227660666e-05, "loss": 1.3102, "mean_token_accuracy": 0.6682019531726837, "num_tokens": 1615623137.0, "step": 9638 }, { "entropy": 1.7278088827927907, "epoch": 1.058883304495894, "grad_norm": 0.697512686252594, "learning_rate": 1.05814558245459e-05, "loss": 1.4574, "mean_token_accuracy": 0.6336638778448105, "num_tokens": 1615802189.0, "step": 9639 }, { "entropy": 1.7499482830365498, "epoch": 1.058993161407267, "grad_norm": 0.6239208579063416, "learning_rate": 1.057985643467787e-05, "loss": 1.4644, "mean_token_accuracy": 0.637439583738645, "num_tokens": 1615995816.0, "step": 9640 }, { "entropy": 1.6262230277061462, "epoch": 1.05910301831864, "grad_norm": 0.8068125247955322, "learning_rate": 1.0578257058107193e-05, "loss": 1.269, "mean_token_accuracy": 0.6698660800854365, "num_tokens": 1616120440.0, "step": 9641 }, { "entropy": 1.6767154037952423, "epoch": 1.059212875230013, "grad_norm": 0.680270254611969, "learning_rate": 1.0576657694884494e-05, "loss": 1.2518, "mean_token_accuracy": 0.6656695355971655, "num_tokens": 1616265383.0, "step": 9642 }, { "entropy": 1.6900179982185364, "epoch": 1.0593227321413858, "grad_norm": 0.6536128520965576, "learning_rate": 1.0575058345060386e-05, "loss": 1.3902, "mean_token_accuracy": 0.6535529990990957, "num_tokens": 1616443547.0, "step": 9643 }, { "entropy": 1.7220917642116547, "epoch": 1.0594325890527587, "grad_norm": 0.6382746696472168, "learning_rate": 1.0573459008685485e-05, "loss": 1.5317, "mean_token_accuracy": 0.6338553329308828, "num_tokens": 1616648707.0, "step": 9644 }, { "entropy": 1.649655689795812, "epoch": 1.0595424459641318, "grad_norm": 0.6632347106933594, "learning_rate": 1.057185968581042e-05, "loss": 1.2947, "mean_token_accuracy": 0.6613847464323044, "num_tokens": 1616790214.0, "step": 9645 }, { "entropy": 1.686415046453476, "epoch": 1.0596523028755047, "grad_norm": 0.7371233105659485, "learning_rate": 1.0570260376485801e-05, "loss": 1.4037, "mean_token_accuracy": 0.6672069480021795, "num_tokens": 1616952135.0, "step": 9646 }, { "entropy": 1.651140828927358, "epoch": 1.0597621597868776, "grad_norm": 0.6713118553161621, "learning_rate": 1.0568661080762246e-05, "loss": 1.518, "mean_token_accuracy": 0.6458087265491486, "num_tokens": 1617144223.0, "step": 9647 }, { "entropy": 1.6354533036549885, "epoch": 1.0598720166982505, "grad_norm": 0.811916708946228, "learning_rate": 1.0567061798690379e-05, "loss": 1.207, "mean_token_accuracy": 0.6824970146020254, "num_tokens": 1617304698.0, "step": 9648 }, { "entropy": 1.7508118848005931, "epoch": 1.0599818736096234, "grad_norm": 0.6534659266471863, "learning_rate": 1.0565462530320806e-05, "loss": 1.4521, "mean_token_accuracy": 0.6644961486260096, "num_tokens": 1617462814.0, "step": 9649 }, { "entropy": 1.6995843648910522, "epoch": 1.0600917305209965, "grad_norm": 0.6688631772994995, "learning_rate": 1.0563863275704147e-05, "loss": 1.355, "mean_token_accuracy": 0.6675297270218531, "num_tokens": 1617591628.0, "step": 9650 }, { "entropy": 1.6866484085718791, "epoch": 1.0602015874323694, "grad_norm": 0.6986563205718994, "learning_rate": 1.0562264034891024e-05, "loss": 1.5377, "mean_token_accuracy": 0.6672149201234182, "num_tokens": 1617732897.0, "step": 9651 }, { "entropy": 1.6906297703584034, "epoch": 1.0603114443437422, "grad_norm": 0.7244104743003845, "learning_rate": 1.0560664807932041e-05, "loss": 1.6056, "mean_token_accuracy": 0.6466974268356959, "num_tokens": 1617916608.0, "step": 9652 }, { "entropy": 1.7492324809233348, "epoch": 1.0604213012551151, "grad_norm": 0.8112777471542358, "learning_rate": 1.0559065594877822e-05, "loss": 1.3205, "mean_token_accuracy": 0.6659993777672449, "num_tokens": 1618039898.0, "step": 9653 }, { "entropy": 1.704162836074829, "epoch": 1.0605311581664882, "grad_norm": 0.7873448729515076, "learning_rate": 1.055746639577898e-05, "loss": 1.1877, "mean_token_accuracy": 0.6737140913804373, "num_tokens": 1618223214.0, "step": 9654 }, { "entropy": 1.691829393307368, "epoch": 1.0606410150778611, "grad_norm": 0.9849948287010193, "learning_rate": 1.0555867210686127e-05, "loss": 1.3944, "mean_token_accuracy": 0.6720232317845026, "num_tokens": 1618379497.0, "step": 9655 }, { "entropy": 1.6794477105140686, "epoch": 1.060750871989234, "grad_norm": 0.6636627912521362, "learning_rate": 1.055426803964987e-05, "loss": 1.377, "mean_token_accuracy": 0.6529742429653803, "num_tokens": 1618604731.0, "step": 9656 }, { "entropy": 1.7792643805344899, "epoch": 1.060860728900607, "grad_norm": 0.6718061566352844, "learning_rate": 1.0552668882720836e-05, "loss": 1.3205, "mean_token_accuracy": 0.6614718536535898, "num_tokens": 1618716722.0, "step": 9657 }, { "entropy": 1.7258899211883545, "epoch": 1.06097058581198, "grad_norm": 0.7656777501106262, "learning_rate": 1.0551069739949626e-05, "loss": 1.4781, "mean_token_accuracy": 0.6475807974735895, "num_tokens": 1618904778.0, "step": 9658 }, { "entropy": 1.7146598994731903, "epoch": 1.061080442723353, "grad_norm": 0.7096765637397766, "learning_rate": 1.0549470611386861e-05, "loss": 1.4426, "mean_token_accuracy": 0.6532019923130671, "num_tokens": 1619050535.0, "step": 9659 }, { "entropy": 1.7547457814216614, "epoch": 1.0611902996347258, "grad_norm": 0.7451149225234985, "learning_rate": 1.0547871497083147e-05, "loss": 1.4693, "mean_token_accuracy": 0.6539823710918427, "num_tokens": 1619236759.0, "step": 9660 }, { "entropy": 1.675736625989278, "epoch": 1.0613001565460987, "grad_norm": 0.583570122718811, "learning_rate": 1.0546272397089094e-05, "loss": 1.4404, "mean_token_accuracy": 0.6391840279102325, "num_tokens": 1619447379.0, "step": 9661 }, { "entropy": 1.7973829209804535, "epoch": 1.0614100134574715, "grad_norm": 0.7883579134941101, "learning_rate": 1.0544673311455313e-05, "loss": 1.4437, "mean_token_accuracy": 0.6529788474241892, "num_tokens": 1619594379.0, "step": 9662 }, { "entropy": 1.7165298263231914, "epoch": 1.0615198703688447, "grad_norm": 0.7997109293937683, "learning_rate": 1.0543074240232421e-05, "loss": 1.6165, "mean_token_accuracy": 0.6245667189359665, "num_tokens": 1619800773.0, "step": 9663 }, { "entropy": 1.7447736859321594, "epoch": 1.0616297272802175, "grad_norm": 0.7188670039176941, "learning_rate": 1.0541475183471022e-05, "loss": 1.3859, "mean_token_accuracy": 0.6584334820508957, "num_tokens": 1619954114.0, "step": 9664 }, { "entropy": 1.6913307011127472, "epoch": 1.0617395841915904, "grad_norm": 0.748576283454895, "learning_rate": 1.0539876141221726e-05, "loss": 1.3055, "mean_token_accuracy": 0.6666964242855707, "num_tokens": 1620087595.0, "step": 9665 }, { "entropy": 1.6912503639856975, "epoch": 1.0618494411029633, "grad_norm": 0.6023520231246948, "learning_rate": 1.0538277113535145e-05, "loss": 1.2883, "mean_token_accuracy": 0.6658832430839539, "num_tokens": 1620223231.0, "step": 9666 }, { "entropy": 1.7426739136377971, "epoch": 1.0619592980143364, "grad_norm": 0.654004693031311, "learning_rate": 1.0536678100461885e-05, "loss": 1.4584, "mean_token_accuracy": 0.6539844125509262, "num_tokens": 1620423582.0, "step": 9667 }, { "entropy": 1.6927513976891835, "epoch": 1.0620691549257093, "grad_norm": 0.7091237902641296, "learning_rate": 1.053507910205255e-05, "loss": 1.4725, "mean_token_accuracy": 0.6411188344160715, "num_tokens": 1620653454.0, "step": 9668 }, { "entropy": 1.7462256650129955, "epoch": 1.0621790118370822, "grad_norm": 0.6891539692878723, "learning_rate": 1.0533480118357757e-05, "loss": 1.4739, "mean_token_accuracy": 0.6464346051216125, "num_tokens": 1620877012.0, "step": 9669 }, { "entropy": 1.671479304631551, "epoch": 1.062288868748455, "grad_norm": 0.6748672127723694, "learning_rate": 1.0531881149428107e-05, "loss": 1.4269, "mean_token_accuracy": 0.6441149214903513, "num_tokens": 1621057118.0, "step": 9670 }, { "entropy": 1.7517466147740681, "epoch": 1.0623987256598282, "grad_norm": 0.7266637086868286, "learning_rate": 1.0530282195314206e-05, "loss": 1.4868, "mean_token_accuracy": 0.6445014526446661, "num_tokens": 1621240078.0, "step": 9671 }, { "entropy": 1.6607401569684346, "epoch": 1.062508582571201, "grad_norm": 0.6144156455993652, "learning_rate": 1.0528683256066666e-05, "loss": 1.4003, "mean_token_accuracy": 0.646324540177981, "num_tokens": 1621410254.0, "step": 9672 }, { "entropy": 1.6924620866775513, "epoch": 1.062618439482574, "grad_norm": 0.8956554532051086, "learning_rate": 1.0527084331736084e-05, "loss": 1.3734, "mean_token_accuracy": 0.6591135859489441, "num_tokens": 1621581882.0, "step": 9673 }, { "entropy": 1.6320221523443859, "epoch": 1.0627282963939468, "grad_norm": 0.6442832350730896, "learning_rate": 1.0525485422373069e-05, "loss": 1.437, "mean_token_accuracy": 0.6561227341492971, "num_tokens": 1621796589.0, "step": 9674 }, { "entropy": 1.7225764592488606, "epoch": 1.0628381533053197, "grad_norm": 0.7406787872314453, "learning_rate": 1.0523886528028231e-05, "loss": 1.3413, "mean_token_accuracy": 0.6588184783856074, "num_tokens": 1621947807.0, "step": 9675 }, { "entropy": 1.662076324224472, "epoch": 1.0629480102166928, "grad_norm": 0.5989936590194702, "learning_rate": 1.0522287648752165e-05, "loss": 1.3468, "mean_token_accuracy": 0.670153538386027, "num_tokens": 1622139639.0, "step": 9676 }, { "entropy": 1.6708702047665913, "epoch": 1.0630578671280657, "grad_norm": 0.7162211537361145, "learning_rate": 1.0520688784595484e-05, "loss": 1.4332, "mean_token_accuracy": 0.6640763978163401, "num_tokens": 1622282259.0, "step": 9677 }, { "entropy": 1.7043314973513286, "epoch": 1.0631677240394386, "grad_norm": 0.6820145845413208, "learning_rate": 1.0519089935608786e-05, "loss": 1.4393, "mean_token_accuracy": 0.6579158157110214, "num_tokens": 1622477913.0, "step": 9678 }, { "entropy": 1.6165697475274403, "epoch": 1.0632775809508115, "grad_norm": 0.6088776588439941, "learning_rate": 1.0517491101842672e-05, "loss": 1.1827, "mean_token_accuracy": 0.6940498252709707, "num_tokens": 1622603559.0, "step": 9679 }, { "entropy": 1.7742246389389038, "epoch": 1.0633874378621846, "grad_norm": 0.635688841342926, "learning_rate": 1.0515892283347752e-05, "loss": 1.5138, "mean_token_accuracy": 0.6343726913134257, "num_tokens": 1622820459.0, "step": 9680 }, { "entropy": 1.6375213364760082, "epoch": 1.0634972947735575, "grad_norm": 0.7309091687202454, "learning_rate": 1.051429348017462e-05, "loss": 1.3063, "mean_token_accuracy": 0.6767990191777548, "num_tokens": 1623019426.0, "step": 9681 }, { "entropy": 1.6735208332538605, "epoch": 1.0636071516849304, "grad_norm": 0.8735835552215576, "learning_rate": 1.051269469237388e-05, "loss": 1.239, "mean_token_accuracy": 0.6743840475877126, "num_tokens": 1623154750.0, "step": 9682 }, { "entropy": 1.7003762324651082, "epoch": 1.0637170085963032, "grad_norm": 0.5599207878112793, "learning_rate": 1.0511095919996135e-05, "loss": 1.496, "mean_token_accuracy": 0.6529860148827235, "num_tokens": 1623356174.0, "step": 9683 }, { "entropy": 1.726138601700465, "epoch": 1.0638268655076764, "grad_norm": 0.6613774299621582, "learning_rate": 1.0509497163091983e-05, "loss": 1.3858, "mean_token_accuracy": 0.6613100071748098, "num_tokens": 1623511935.0, "step": 9684 }, { "entropy": 1.7090193430582683, "epoch": 1.0639367224190492, "grad_norm": 0.6702026724815369, "learning_rate": 1.0507898421712023e-05, "loss": 1.4125, "mean_token_accuracy": 0.6465659638245901, "num_tokens": 1623742462.0, "step": 9685 }, { "entropy": 1.7133901019891102, "epoch": 1.0640465793304221, "grad_norm": 0.732112467288971, "learning_rate": 1.0506299695906859e-05, "loss": 1.3698, "mean_token_accuracy": 0.6699735869963964, "num_tokens": 1623909947.0, "step": 9686 }, { "entropy": 1.7531900107860565, "epoch": 1.064156436241795, "grad_norm": 0.7114512324333191, "learning_rate": 1.0504700985727087e-05, "loss": 1.4496, "mean_token_accuracy": 0.6486657311518987, "num_tokens": 1624037188.0, "step": 9687 }, { "entropy": 1.6981233954429626, "epoch": 1.064266293153168, "grad_norm": 0.6429248452186584, "learning_rate": 1.0503102291223302e-05, "loss": 1.2754, "mean_token_accuracy": 0.6718081583579382, "num_tokens": 1624161312.0, "step": 9688 }, { "entropy": 1.7247523069381714, "epoch": 1.064376150064541, "grad_norm": 0.7967625856399536, "learning_rate": 1.050150361244611e-05, "loss": 1.317, "mean_token_accuracy": 0.6654373556375504, "num_tokens": 1624312357.0, "step": 9689 }, { "entropy": 1.7157942950725555, "epoch": 1.064486006975914, "grad_norm": 0.6844218373298645, "learning_rate": 1.0499904949446102e-05, "loss": 1.5019, "mean_token_accuracy": 0.6439647078514099, "num_tokens": 1624499884.0, "step": 9690 }, { "entropy": 1.7442339460055034, "epoch": 1.0645958638872868, "grad_norm": 0.8911353349685669, "learning_rate": 1.0498306302273877e-05, "loss": 1.4668, "mean_token_accuracy": 0.6469593246777853, "num_tokens": 1624631885.0, "step": 9691 }, { "entropy": 1.6330601076285045, "epoch": 1.0647057207986597, "grad_norm": 0.6595163345336914, "learning_rate": 1.0496707670980032e-05, "loss": 1.3273, "mean_token_accuracy": 0.6642039865255356, "num_tokens": 1624809064.0, "step": 9692 }, { "entropy": 1.677077313264211, "epoch": 1.0648155777100328, "grad_norm": 0.6782556176185608, "learning_rate": 1.0495109055615162e-05, "loss": 1.3071, "mean_token_accuracy": 0.6632417937119802, "num_tokens": 1624974504.0, "step": 9693 }, { "entropy": 1.6450525323549907, "epoch": 1.0649254346214057, "grad_norm": 0.7020920515060425, "learning_rate": 1.0493510456229863e-05, "loss": 1.2382, "mean_token_accuracy": 0.6796882202227911, "num_tokens": 1625109730.0, "step": 9694 }, { "entropy": 1.6747917830944061, "epoch": 1.0650352915327785, "grad_norm": 0.7329574227333069, "learning_rate": 1.0491911872874732e-05, "loss": 1.3547, "mean_token_accuracy": 0.6668652594089508, "num_tokens": 1625269037.0, "step": 9695 }, { "entropy": 1.6843490501244862, "epoch": 1.0651451484441514, "grad_norm": 0.75140780210495, "learning_rate": 1.0490313305600357e-05, "loss": 1.351, "mean_token_accuracy": 0.6596083492040634, "num_tokens": 1625423315.0, "step": 9696 }, { "entropy": 1.6949997544288635, "epoch": 1.0652550053555245, "grad_norm": 0.7336523532867432, "learning_rate": 1.0488714754457338e-05, "loss": 1.5667, "mean_token_accuracy": 0.6420057465632757, "num_tokens": 1625614264.0, "step": 9697 }, { "entropy": 1.67600150903066, "epoch": 1.0653648622668974, "grad_norm": 0.7469053864479065, "learning_rate": 1.048711621949627e-05, "loss": 1.3705, "mean_token_accuracy": 0.6591821859280268, "num_tokens": 1625755751.0, "step": 9698 }, { "entropy": 1.7073884507020314, "epoch": 1.0654747191782703, "grad_norm": 0.6153196692466736, "learning_rate": 1.048551770076774e-05, "loss": 1.4425, "mean_token_accuracy": 0.6455186804135641, "num_tokens": 1625925207.0, "step": 9699 }, { "entropy": 1.726867179075877, "epoch": 1.0655845760896432, "grad_norm": 0.9850101470947266, "learning_rate": 1.048391919832234e-05, "loss": 1.4454, "mean_token_accuracy": 0.656451866030693, "num_tokens": 1626065708.0, "step": 9700 }, { "entropy": 1.6206376453240712, "epoch": 1.065694433001016, "grad_norm": 0.6840752959251404, "learning_rate": 1.0482320712210667e-05, "loss": 1.2084, "mean_token_accuracy": 0.689173142115275, "num_tokens": 1626205586.0, "step": 9701 }, { "entropy": 1.7038061221440632, "epoch": 1.0658042899123892, "grad_norm": 0.7342552542686462, "learning_rate": 1.0480722242483311e-05, "loss": 1.2624, "mean_token_accuracy": 0.6706964919964472, "num_tokens": 1626338702.0, "step": 9702 }, { "entropy": 1.7553867201010387, "epoch": 1.065914146823762, "grad_norm": 0.6570054292678833, "learning_rate": 1.0479123789190862e-05, "loss": 1.3466, "mean_token_accuracy": 0.6499860535065333, "num_tokens": 1626493092.0, "step": 9703 }, { "entropy": 1.6693990528583527, "epoch": 1.066024003735135, "grad_norm": 0.5803617835044861, "learning_rate": 1.0477525352383913e-05, "loss": 1.3904, "mean_token_accuracy": 0.6566586941480637, "num_tokens": 1626717365.0, "step": 9704 }, { "entropy": 1.7204264203707378, "epoch": 1.0661338606465078, "grad_norm": 0.8450800776481628, "learning_rate": 1.0475926932113048e-05, "loss": 1.2561, "mean_token_accuracy": 0.6803312748670578, "num_tokens": 1626849414.0, "step": 9705 }, { "entropy": 1.6471377710501354, "epoch": 1.066243717557881, "grad_norm": 0.7381437420845032, "learning_rate": 1.0474328528428857e-05, "loss": 1.3509, "mean_token_accuracy": 0.6623278111219406, "num_tokens": 1626997472.0, "step": 9706 }, { "entropy": 1.648034284512202, "epoch": 1.0663535744692538, "grad_norm": 0.5970849394798279, "learning_rate": 1.0472730141381934e-05, "loss": 1.4102, "mean_token_accuracy": 0.6520186911026636, "num_tokens": 1627182309.0, "step": 9707 }, { "entropy": 1.6744927664597828, "epoch": 1.0664634313806267, "grad_norm": 0.7309461236000061, "learning_rate": 1.0471131771022864e-05, "loss": 1.5346, "mean_token_accuracy": 0.6382336765527725, "num_tokens": 1627436905.0, "step": 9708 }, { "entropy": 1.7173547446727753, "epoch": 1.0665732882919996, "grad_norm": 0.7076787948608398, "learning_rate": 1.0469533417402233e-05, "loss": 1.3205, "mean_token_accuracy": 0.6694023460149765, "num_tokens": 1627595498.0, "step": 9709 }, { "entropy": 1.7104172905286152, "epoch": 1.0666831452033727, "grad_norm": 0.7134943008422852, "learning_rate": 1.0467935080570635e-05, "loss": 1.4921, "mean_token_accuracy": 0.6477473825216293, "num_tokens": 1627769844.0, "step": 9710 }, { "entropy": 1.7438405752182007, "epoch": 1.0667930021147456, "grad_norm": 0.8070123791694641, "learning_rate": 1.0466336760578651e-05, "loss": 1.2289, "mean_token_accuracy": 0.6676356494426727, "num_tokens": 1627874501.0, "step": 9711 }, { "entropy": 1.6932413180669148, "epoch": 1.0669028590261185, "grad_norm": 0.6592340469360352, "learning_rate": 1.0464738457476864e-05, "loss": 1.3901, "mean_token_accuracy": 0.6631415237983068, "num_tokens": 1628013193.0, "step": 9712 }, { "entropy": 1.7657971382141113, "epoch": 1.0670127159374914, "grad_norm": 0.6301009654998779, "learning_rate": 1.0463140171315869e-05, "loss": 1.4882, "mean_token_accuracy": 0.6417807191610336, "num_tokens": 1628199079.0, "step": 9713 }, { "entropy": 1.7166667381922405, "epoch": 1.0671225728488642, "grad_norm": 0.6923064589500427, "learning_rate": 1.0461541902146242e-05, "loss": 1.4851, "mean_token_accuracy": 0.6553737769524256, "num_tokens": 1628345776.0, "step": 9714 }, { "entropy": 1.6853701770305634, "epoch": 1.0672324297602374, "grad_norm": 0.9525777697563171, "learning_rate": 1.0459943650018571e-05, "loss": 1.4003, "mean_token_accuracy": 0.6418146789073944, "num_tokens": 1628531537.0, "step": 9715 }, { "entropy": 1.6456667979558308, "epoch": 1.0673422866716102, "grad_norm": 0.7955992817878723, "learning_rate": 1.0458345414983443e-05, "loss": 1.4437, "mean_token_accuracy": 0.6676210363705953, "num_tokens": 1628676576.0, "step": 9716 }, { "entropy": 1.7199549674987793, "epoch": 1.0674521435829831, "grad_norm": 0.7301570773124695, "learning_rate": 1.0456747197091437e-05, "loss": 1.2948, "mean_token_accuracy": 0.6685295353333155, "num_tokens": 1628797611.0, "step": 9717 }, { "entropy": 1.709757685661316, "epoch": 1.067562000494356, "grad_norm": 0.6962376832962036, "learning_rate": 1.0455148996393134e-05, "loss": 1.4389, "mean_token_accuracy": 0.6500872025887171, "num_tokens": 1628968130.0, "step": 9718 }, { "entropy": 1.7100279132525127, "epoch": 1.0676718574057291, "grad_norm": 0.6749715805053711, "learning_rate": 1.0453550812939123e-05, "loss": 1.4132, "mean_token_accuracy": 0.657318522532781, "num_tokens": 1629132479.0, "step": 9719 }, { "entropy": 1.7299424409866333, "epoch": 1.067781714317102, "grad_norm": 0.7030600905418396, "learning_rate": 1.0451952646779984e-05, "loss": 1.5058, "mean_token_accuracy": 0.6384957631429037, "num_tokens": 1629288118.0, "step": 9720 }, { "entropy": 1.6266062955061595, "epoch": 1.067891571228475, "grad_norm": 0.5976377725601196, "learning_rate": 1.0450354497966296e-05, "loss": 1.252, "mean_token_accuracy": 0.6704409321149191, "num_tokens": 1629440160.0, "step": 9721 }, { "entropy": 1.7321474353472393, "epoch": 1.0680014281398478, "grad_norm": 0.7913817763328552, "learning_rate": 1.044875636654864e-05, "loss": 1.5509, "mean_token_accuracy": 0.6350030054648718, "num_tokens": 1629614991.0, "step": 9722 }, { "entropy": 1.7179767787456512, "epoch": 1.0681112850512209, "grad_norm": 0.7277995944023132, "learning_rate": 1.0447158252577595e-05, "loss": 1.397, "mean_token_accuracy": 0.6543081154425939, "num_tokens": 1629790080.0, "step": 9723 }, { "entropy": 1.7230423092842102, "epoch": 1.0682211419625938, "grad_norm": 0.6613367795944214, "learning_rate": 1.0445560156103742e-05, "loss": 1.5439, "mean_token_accuracy": 0.6340614507595698, "num_tokens": 1630005087.0, "step": 9724 }, { "entropy": 1.6821261048316956, "epoch": 1.0683309988739667, "grad_norm": 0.7301710844039917, "learning_rate": 1.0443962077177662e-05, "loss": 1.4798, "mean_token_accuracy": 0.6496898879607519, "num_tokens": 1630248651.0, "step": 9725 }, { "entropy": 1.6728065013885498, "epoch": 1.0684408557853395, "grad_norm": 0.6719411611557007, "learning_rate": 1.0442364015849933e-05, "loss": 1.5279, "mean_token_accuracy": 0.6336749543746313, "num_tokens": 1630450396.0, "step": 9726 }, { "entropy": 1.6456262568632762, "epoch": 1.0685507126967124, "grad_norm": 0.6920890808105469, "learning_rate": 1.044076597217113e-05, "loss": 1.4419, "mean_token_accuracy": 0.6407264123360316, "num_tokens": 1630661340.0, "step": 9727 }, { "entropy": 1.664272427558899, "epoch": 1.0686605696080855, "grad_norm": 0.8365959525108337, "learning_rate": 1.0439167946191833e-05, "loss": 1.2577, "mean_token_accuracy": 0.6765812089045843, "num_tokens": 1630830641.0, "step": 9728 }, { "entropy": 1.757549246152242, "epoch": 1.0687704265194584, "grad_norm": 0.8349844217300415, "learning_rate": 1.0437569937962617e-05, "loss": 1.4603, "mean_token_accuracy": 0.6479651033878326, "num_tokens": 1630990387.0, "step": 9729 }, { "entropy": 1.6663007040818532, "epoch": 1.0688802834308313, "grad_norm": 0.5842850208282471, "learning_rate": 1.0435971947534056e-05, "loss": 1.4114, "mean_token_accuracy": 0.6464737504720688, "num_tokens": 1631194523.0, "step": 9730 }, { "entropy": 1.7366156081358592, "epoch": 1.0689901403422042, "grad_norm": 0.6315268278121948, "learning_rate": 1.0434373974956732e-05, "loss": 1.3055, "mean_token_accuracy": 0.6716218789418539, "num_tokens": 1631350147.0, "step": 9731 }, { "entropy": 1.7530793845653534, "epoch": 1.0690999972535773, "grad_norm": 0.7210341691970825, "learning_rate": 1.0432776020281217e-05, "loss": 1.5249, "mean_token_accuracy": 0.6390533894300461, "num_tokens": 1631523960.0, "step": 9732 }, { "entropy": 1.6565876404444377, "epoch": 1.0692098541649502, "grad_norm": 0.5894297957420349, "learning_rate": 1.0431178083558085e-05, "loss": 1.5349, "mean_token_accuracy": 0.6446276158094406, "num_tokens": 1631694097.0, "step": 9733 }, { "entropy": 1.7121220330397289, "epoch": 1.069319711076323, "grad_norm": 0.8435656428337097, "learning_rate": 1.0429580164837912e-05, "loss": 1.5071, "mean_token_accuracy": 0.6510050147771835, "num_tokens": 1631875264.0, "step": 9734 }, { "entropy": 1.6705400049686432, "epoch": 1.069429567987696, "grad_norm": 0.7857469320297241, "learning_rate": 1.0427982264171265e-05, "loss": 1.4161, "mean_token_accuracy": 0.6750262777010599, "num_tokens": 1632003546.0, "step": 9735 }, { "entropy": 1.7056404054164886, "epoch": 1.069539424899069, "grad_norm": 0.7281525731086731, "learning_rate": 1.0426384381608723e-05, "loss": 1.3679, "mean_token_accuracy": 0.655732790629069, "num_tokens": 1632145433.0, "step": 9736 }, { "entropy": 1.6636238992214203, "epoch": 1.069649281810442, "grad_norm": 0.6364181041717529, "learning_rate": 1.042478651720086e-05, "loss": 1.421, "mean_token_accuracy": 0.6475005000829697, "num_tokens": 1632329209.0, "step": 9737 }, { "entropy": 1.724616914987564, "epoch": 1.0697591387218148, "grad_norm": 0.7049274444580078, "learning_rate": 1.0423188670998243e-05, "loss": 1.4647, "mean_token_accuracy": 0.65199646850427, "num_tokens": 1632496345.0, "step": 9738 }, { "entropy": 1.7012827197710674, "epoch": 1.0698689956331877, "grad_norm": 0.6477790474891663, "learning_rate": 1.0421590843051443e-05, "loss": 1.395, "mean_token_accuracy": 0.6610339830319086, "num_tokens": 1632699990.0, "step": 9739 }, { "entropy": 1.701954831679662, "epoch": 1.0699788525445606, "grad_norm": 0.7536341547966003, "learning_rate": 1.0419993033411036e-05, "loss": 1.4754, "mean_token_accuracy": 0.6506841977437338, "num_tokens": 1632878568.0, "step": 9740 }, { "entropy": 1.6800040304660797, "epoch": 1.0700887094559337, "grad_norm": 0.7120772004127502, "learning_rate": 1.0418395242127586e-05, "loss": 1.3509, "mean_token_accuracy": 0.6788023312886556, "num_tokens": 1633019001.0, "step": 9741 }, { "entropy": 1.6614234050114949, "epoch": 1.0701985663673066, "grad_norm": 0.5462971925735474, "learning_rate": 1.0416797469251665e-05, "loss": 1.3423, "mean_token_accuracy": 0.6563159028689066, "num_tokens": 1633205177.0, "step": 9742 }, { "entropy": 1.6736981471379597, "epoch": 1.0703084232786795, "grad_norm": 0.5824480056762695, "learning_rate": 1.0415199714833839e-05, "loss": 1.4978, "mean_token_accuracy": 0.6424353569746017, "num_tokens": 1633445240.0, "step": 9743 }, { "entropy": 1.6671138405799866, "epoch": 1.0704182801900524, "grad_norm": 0.6380411386489868, "learning_rate": 1.0413601978924679e-05, "loss": 1.2962, "mean_token_accuracy": 0.6786693433920542, "num_tokens": 1633617985.0, "step": 9744 }, { "entropy": 1.6733331382274628, "epoch": 1.0705281371014255, "grad_norm": 0.7337895035743713, "learning_rate": 1.0412004261574756e-05, "loss": 1.2254, "mean_token_accuracy": 0.6779622882604599, "num_tokens": 1633735989.0, "step": 9745 }, { "entropy": 1.7590945859750111, "epoch": 1.0706379940127984, "grad_norm": 0.7717983722686768, "learning_rate": 1.041040656283463e-05, "loss": 1.3141, "mean_token_accuracy": 0.6691213548183441, "num_tokens": 1633873474.0, "step": 9746 }, { "entropy": 1.7039412657419841, "epoch": 1.0707478509241712, "grad_norm": 0.8194169998168945, "learning_rate": 1.040880888275487e-05, "loss": 1.2822, "mean_token_accuracy": 0.670879011352857, "num_tokens": 1634023457.0, "step": 9747 }, { "entropy": 1.7141053279240925, "epoch": 1.0708577078355441, "grad_norm": 0.8548807501792908, "learning_rate": 1.0407211221386045e-05, "loss": 1.5504, "mean_token_accuracy": 0.6367798795302709, "num_tokens": 1634192980.0, "step": 9748 }, { "entropy": 1.7354576488335927, "epoch": 1.0709675647469172, "grad_norm": 0.7616019248962402, "learning_rate": 1.0405613578778715e-05, "loss": 1.4807, "mean_token_accuracy": 0.6475146313508352, "num_tokens": 1634353012.0, "step": 9749 }, { "entropy": 1.646492878595988, "epoch": 1.0710774216582901, "grad_norm": 0.6295143365859985, "learning_rate": 1.0404015954983446e-05, "loss": 1.2916, "mean_token_accuracy": 0.6755774716536204, "num_tokens": 1634499347.0, "step": 9750 }, { "entropy": 1.7243276337782543, "epoch": 1.071187278569663, "grad_norm": 0.7246981263160706, "learning_rate": 1.0402418350050807e-05, "loss": 1.3662, "mean_token_accuracy": 0.6525032967329025, "num_tokens": 1634629111.0, "step": 9751 }, { "entropy": 1.688949167728424, "epoch": 1.071297135481036, "grad_norm": 0.6447790265083313, "learning_rate": 1.0400820764031359e-05, "loss": 1.3286, "mean_token_accuracy": 0.6776885588963827, "num_tokens": 1634768536.0, "step": 9752 }, { "entropy": 1.6156230966250102, "epoch": 1.0714069923924088, "grad_norm": 0.6778178811073303, "learning_rate": 1.039922319697566e-05, "loss": 1.4044, "mean_token_accuracy": 0.6686372607946396, "num_tokens": 1634933718.0, "step": 9753 }, { "entropy": 1.7385461231072743, "epoch": 1.0715168493037819, "grad_norm": 0.7039634585380554, "learning_rate": 1.0397625648934279e-05, "loss": 1.4781, "mean_token_accuracy": 0.6421026686827341, "num_tokens": 1635106063.0, "step": 9754 }, { "entropy": 1.7002881566683452, "epoch": 1.0716267062151548, "grad_norm": 0.5606304407119751, "learning_rate": 1.0396028119957775e-05, "loss": 1.4618, "mean_token_accuracy": 0.6472335507472357, "num_tokens": 1635323066.0, "step": 9755 }, { "entropy": 1.720334788163503, "epoch": 1.0717365631265277, "grad_norm": 0.6186944842338562, "learning_rate": 1.0394430610096704e-05, "loss": 1.4535, "mean_token_accuracy": 0.648893857995669, "num_tokens": 1635538160.0, "step": 9756 }, { "entropy": 1.710259069999059, "epoch": 1.0718464200379005, "grad_norm": 0.7848376631736755, "learning_rate": 1.0392833119401635e-05, "loss": 1.2818, "mean_token_accuracy": 0.6685756246248881, "num_tokens": 1635684409.0, "step": 9757 }, { "entropy": 1.679042249917984, "epoch": 1.0719562769492736, "grad_norm": 0.6115646362304688, "learning_rate": 1.0391235647923125e-05, "loss": 1.3574, "mean_token_accuracy": 0.6604473541180292, "num_tokens": 1635818851.0, "step": 9758 }, { "entropy": 1.6841739316781361, "epoch": 1.0720661338606465, "grad_norm": 0.6064473390579224, "learning_rate": 1.0389638195711731e-05, "loss": 1.3239, "mean_token_accuracy": 0.6654881288607916, "num_tokens": 1636047560.0, "step": 9759 }, { "entropy": 1.7591745456059773, "epoch": 1.0721759907720194, "grad_norm": 0.7367409467697144, "learning_rate": 1.0388040762818015e-05, "loss": 1.4377, "mean_token_accuracy": 0.6540845880905787, "num_tokens": 1636161616.0, "step": 9760 }, { "entropy": 1.7117444177468617, "epoch": 1.0722858476833923, "grad_norm": 2.199622869491577, "learning_rate": 1.0386443349292532e-05, "loss": 1.2059, "mean_token_accuracy": 0.6679257899522781, "num_tokens": 1636339502.0, "step": 9761 }, { "entropy": 1.7388999263445537, "epoch": 1.0723957045947654, "grad_norm": 0.6870440244674683, "learning_rate": 1.0384845955185838e-05, "loss": 1.5565, "mean_token_accuracy": 0.6418974051872889, "num_tokens": 1636544108.0, "step": 9762 }, { "entropy": 1.6883673071861267, "epoch": 1.0725055615061383, "grad_norm": 0.6861622929573059, "learning_rate": 1.0383248580548495e-05, "loss": 1.2584, "mean_token_accuracy": 0.6789047420024872, "num_tokens": 1636730212.0, "step": 9763 }, { "entropy": 1.7474354803562164, "epoch": 1.0726154184175112, "grad_norm": 0.8235689401626587, "learning_rate": 1.0381651225431055e-05, "loss": 1.5772, "mean_token_accuracy": 0.6468819305300713, "num_tokens": 1636875441.0, "step": 9764 }, { "entropy": 1.7631232539812725, "epoch": 1.072725275328884, "grad_norm": 0.6376375555992126, "learning_rate": 1.0380053889884077e-05, "loss": 1.3514, "mean_token_accuracy": 0.65848508477211, "num_tokens": 1637056648.0, "step": 9765 }, { "entropy": 1.7007540861765544, "epoch": 1.072835132240257, "grad_norm": 0.6686075329780579, "learning_rate": 1.0378456573958113e-05, "loss": 1.404, "mean_token_accuracy": 0.6725321859121323, "num_tokens": 1637238698.0, "step": 9766 }, { "entropy": 1.7597143749396007, "epoch": 1.07294498915163, "grad_norm": 0.686677098274231, "learning_rate": 1.037685927770372e-05, "loss": 1.5594, "mean_token_accuracy": 0.6368110875288645, "num_tokens": 1637454987.0, "step": 9767 }, { "entropy": 1.736111968755722, "epoch": 1.073054846063003, "grad_norm": 0.7504826188087463, "learning_rate": 1.0375262001171446e-05, "loss": 1.7144, "mean_token_accuracy": 0.6141124417384466, "num_tokens": 1637622399.0, "step": 9768 }, { "entropy": 1.7867793242136638, "epoch": 1.0731647029743758, "grad_norm": 0.6543484926223755, "learning_rate": 1.0373664744411851e-05, "loss": 1.34, "mean_token_accuracy": 0.667723630865415, "num_tokens": 1637753172.0, "step": 9769 }, { "entropy": 1.6704789002736409, "epoch": 1.0732745598857487, "grad_norm": 0.6091136336326599, "learning_rate": 1.0372067507475485e-05, "loss": 1.2838, "mean_token_accuracy": 0.6668292681376139, "num_tokens": 1637948045.0, "step": 9770 }, { "entropy": 1.7835040887196858, "epoch": 1.0733844167971218, "grad_norm": 0.6930747032165527, "learning_rate": 1.0370470290412898e-05, "loss": 1.402, "mean_token_accuracy": 0.6510342458883921, "num_tokens": 1638092529.0, "step": 9771 }, { "entropy": 1.632412811120351, "epoch": 1.0734942737084947, "grad_norm": 0.6597646474838257, "learning_rate": 1.0368873093274646e-05, "loss": 1.2968, "mean_token_accuracy": 0.6696435958147049, "num_tokens": 1638232322.0, "step": 9772 }, { "entropy": 1.6681885520617168, "epoch": 1.0736041306198676, "grad_norm": 0.6119679808616638, "learning_rate": 1.0367275916111272e-05, "loss": 1.3269, "mean_token_accuracy": 0.6568154295285543, "num_tokens": 1638417721.0, "step": 9773 }, { "entropy": 1.7815796037515004, "epoch": 1.0737139875312405, "grad_norm": 0.6741944551467896, "learning_rate": 1.036567875897333e-05, "loss": 1.4554, "mean_token_accuracy": 0.6437022139628729, "num_tokens": 1638573018.0, "step": 9774 }, { "entropy": 1.658657729625702, "epoch": 1.0738238444426136, "grad_norm": 0.6922010779380798, "learning_rate": 1.0364081621911372e-05, "loss": 1.4375, "mean_token_accuracy": 0.6637585858503977, "num_tokens": 1638755544.0, "step": 9775 }, { "entropy": 1.6960961520671844, "epoch": 1.0739337013539865, "grad_norm": 0.7771033048629761, "learning_rate": 1.0362484504975943e-05, "loss": 1.3166, "mean_token_accuracy": 0.6613381505012512, "num_tokens": 1638893033.0, "step": 9776 }, { "entropy": 1.7312343815962474, "epoch": 1.0740435582653594, "grad_norm": 0.6502153873443604, "learning_rate": 1.0360887408217592e-05, "loss": 1.4751, "mean_token_accuracy": 0.6359160343805949, "num_tokens": 1639088128.0, "step": 9777 }, { "entropy": 1.7355634570121765, "epoch": 1.0741534151767322, "grad_norm": 0.6641053557395935, "learning_rate": 1.0359290331686869e-05, "loss": 1.5899, "mean_token_accuracy": 0.6462592383225759, "num_tokens": 1639267913.0, "step": 9778 }, { "entropy": 1.7013458808263142, "epoch": 1.0742632720881051, "grad_norm": 0.7097647786140442, "learning_rate": 1.0357693275434315e-05, "loss": 1.1678, "mean_token_accuracy": 0.682997981707255, "num_tokens": 1639367322.0, "step": 9779 }, { "entropy": 1.725685566663742, "epoch": 1.0743731289994782, "grad_norm": 0.6841909289360046, "learning_rate": 1.0356096239510478e-05, "loss": 1.3812, "mean_token_accuracy": 0.6562798221906027, "num_tokens": 1639546232.0, "step": 9780 }, { "entropy": 1.6580406824747722, "epoch": 1.0744829859108511, "grad_norm": 0.6318546533584595, "learning_rate": 1.035449922396591e-05, "loss": 1.3734, "mean_token_accuracy": 0.668061430255572, "num_tokens": 1639715575.0, "step": 9781 }, { "entropy": 1.750498543183009, "epoch": 1.074592842822224, "grad_norm": 0.7357514500617981, "learning_rate": 1.0352902228851147e-05, "loss": 1.2926, "mean_token_accuracy": 0.6660207162300745, "num_tokens": 1639837278.0, "step": 9782 }, { "entropy": 1.7398191094398499, "epoch": 1.0747026997335969, "grad_norm": 0.787640392780304, "learning_rate": 1.0351305254216736e-05, "loss": 1.3594, "mean_token_accuracy": 0.6593456069628397, "num_tokens": 1639989121.0, "step": 9783 }, { "entropy": 1.6935386459032695, "epoch": 1.07481255664497, "grad_norm": 0.6656416654586792, "learning_rate": 1.0349708300113228e-05, "loss": 1.3941, "mean_token_accuracy": 0.6612067172924677, "num_tokens": 1640152318.0, "step": 9784 }, { "entropy": 1.717629502216975, "epoch": 1.0749224135563429, "grad_norm": 0.7953632473945618, "learning_rate": 1.0348111366591154e-05, "loss": 1.5034, "mean_token_accuracy": 0.6524255921443304, "num_tokens": 1640280142.0, "step": 9785 }, { "entropy": 1.7047446469465892, "epoch": 1.0750322704677158, "grad_norm": 0.6888314485549927, "learning_rate": 1.034651445370106e-05, "loss": 1.4914, "mean_token_accuracy": 0.6492563138405482, "num_tokens": 1640440654.0, "step": 9786 }, { "entropy": 1.709777424732844, "epoch": 1.0751421273790887, "grad_norm": 0.5959087610244751, "learning_rate": 1.0344917561493492e-05, "loss": 1.4593, "mean_token_accuracy": 0.6443605422973633, "num_tokens": 1640638440.0, "step": 9787 }, { "entropy": 1.7329801519711812, "epoch": 1.0752519842904618, "grad_norm": 0.6564949750900269, "learning_rate": 1.0343320690018988e-05, "loss": 1.4895, "mean_token_accuracy": 0.6578076879183451, "num_tokens": 1640779771.0, "step": 9788 }, { "entropy": 1.722126881281535, "epoch": 1.0753618412018346, "grad_norm": 0.6596241593360901, "learning_rate": 1.0341723839328086e-05, "loss": 1.4626, "mean_token_accuracy": 0.6395512421925863, "num_tokens": 1640969536.0, "step": 9789 }, { "entropy": 1.6550799508889515, "epoch": 1.0754716981132075, "grad_norm": 0.6088923215866089, "learning_rate": 1.0340127009471331e-05, "loss": 1.4409, "mean_token_accuracy": 0.6460276196400324, "num_tokens": 1641181637.0, "step": 9790 }, { "entropy": 1.6901710430781047, "epoch": 1.0755815550245804, "grad_norm": 0.677692711353302, "learning_rate": 1.0338530200499258e-05, "loss": 1.3289, "mean_token_accuracy": 0.670496458808581, "num_tokens": 1641338802.0, "step": 9791 }, { "entropy": 1.7122070491313934, "epoch": 1.0756914119359535, "grad_norm": 0.8194560408592224, "learning_rate": 1.0336933412462402e-05, "loss": 1.3465, "mean_token_accuracy": 0.6500441581010818, "num_tokens": 1641506448.0, "step": 9792 }, { "entropy": 1.737431804339091, "epoch": 1.0758012688473264, "grad_norm": 0.6265955567359924, "learning_rate": 1.0335336645411309e-05, "loss": 1.3948, "mean_token_accuracy": 0.6440109014511108, "num_tokens": 1641693580.0, "step": 9793 }, { "entropy": 1.7306747833887737, "epoch": 1.0759111257586993, "grad_norm": 0.6415075063705444, "learning_rate": 1.0333739899396511e-05, "loss": 1.4351, "mean_token_accuracy": 0.6500951796770096, "num_tokens": 1641847952.0, "step": 9794 }, { "entropy": 1.720722109079361, "epoch": 1.0760209826700722, "grad_norm": 0.7167672514915466, "learning_rate": 1.0332143174468545e-05, "loss": 1.4079, "mean_token_accuracy": 0.6544150163729986, "num_tokens": 1642023634.0, "step": 9795 }, { "entropy": 1.654203087091446, "epoch": 1.076130839581445, "grad_norm": 0.581329345703125, "learning_rate": 1.0330546470677946e-05, "loss": 1.2703, "mean_token_accuracy": 0.6772001385688782, "num_tokens": 1642227381.0, "step": 9796 }, { "entropy": 1.7064690093199413, "epoch": 1.0762406964928182, "grad_norm": 0.6623792052268982, "learning_rate": 1.0328949788075249e-05, "loss": 1.4675, "mean_token_accuracy": 0.6539207597573599, "num_tokens": 1642396240.0, "step": 9797 }, { "entropy": 1.662476509809494, "epoch": 1.076350553404191, "grad_norm": 0.6676307320594788, "learning_rate": 1.0327353126710988e-05, "loss": 1.4906, "mean_token_accuracy": 0.6464128841956457, "num_tokens": 1642538098.0, "step": 9798 }, { "entropy": 1.671265075604121, "epoch": 1.076460410315564, "grad_norm": 0.6107703447341919, "learning_rate": 1.03257564866357e-05, "loss": 1.358, "mean_token_accuracy": 0.6674867620070776, "num_tokens": 1642748380.0, "step": 9799 }, { "entropy": 1.7271918257077534, "epoch": 1.0765702672269368, "grad_norm": 0.6320644617080688, "learning_rate": 1.0324159867899914e-05, "loss": 1.4831, "mean_token_accuracy": 0.6485229134559631, "num_tokens": 1642965144.0, "step": 9800 }, { "entropy": 1.6610159476598103, "epoch": 1.07668012413831, "grad_norm": 0.9616381525993347, "learning_rate": 1.0322563270554167e-05, "loss": 1.2259, "mean_token_accuracy": 0.6779639472564062, "num_tokens": 1643120751.0, "step": 9801 }, { "entropy": 1.6773190399010975, "epoch": 1.0767899810496828, "grad_norm": 0.7179288268089294, "learning_rate": 1.0320966694648984e-05, "loss": 1.2882, "mean_token_accuracy": 0.6666442155838013, "num_tokens": 1643266003.0, "step": 9802 }, { "entropy": 1.724582443634669, "epoch": 1.0768998379610557, "grad_norm": 0.6458866596221924, "learning_rate": 1.03193701402349e-05, "loss": 1.4392, "mean_token_accuracy": 0.6562464485565821, "num_tokens": 1643464964.0, "step": 9803 }, { "entropy": 1.7303006847699482, "epoch": 1.0770096948724286, "grad_norm": 0.7385509014129639, "learning_rate": 1.0317773607362445e-05, "loss": 1.5418, "mean_token_accuracy": 0.6399680574735006, "num_tokens": 1643676724.0, "step": 9804 }, { "entropy": 1.7020288407802582, "epoch": 1.0771195517838017, "grad_norm": 0.7342250347137451, "learning_rate": 1.0316177096082142e-05, "loss": 1.4731, "mean_token_accuracy": 0.6379378736019135, "num_tokens": 1643837858.0, "step": 9805 }, { "entropy": 1.6457345684369404, "epoch": 1.0772294086951746, "grad_norm": 0.6080856323242188, "learning_rate": 1.0314580606444531e-05, "loss": 1.3619, "mean_token_accuracy": 0.6683526982863744, "num_tokens": 1644029727.0, "step": 9806 }, { "entropy": 1.6607999900976818, "epoch": 1.0773392656065475, "grad_norm": 0.6070815324783325, "learning_rate": 1.0312984138500137e-05, "loss": 1.3415, "mean_token_accuracy": 0.6586224585771561, "num_tokens": 1644184892.0, "step": 9807 }, { "entropy": 1.7429456015427907, "epoch": 1.0774491225179204, "grad_norm": 0.6538578271865845, "learning_rate": 1.0311387692299481e-05, "loss": 1.4834, "mean_token_accuracy": 0.6407992839813232, "num_tokens": 1644349707.0, "step": 9808 }, { "entropy": 1.6649847229321797, "epoch": 1.0775589794292935, "grad_norm": 1.274907112121582, "learning_rate": 1.0309791267893097e-05, "loss": 1.2798, "mean_token_accuracy": 0.6626059412956238, "num_tokens": 1644577887.0, "step": 9809 }, { "entropy": 1.7314873437086742, "epoch": 1.0776688363406663, "grad_norm": 2.2484679222106934, "learning_rate": 1.030819486533151e-05, "loss": 1.0967, "mean_token_accuracy": 0.6872533162434896, "num_tokens": 1644760231.0, "step": 9810 }, { "entropy": 1.6759747962156932, "epoch": 1.0777786932520392, "grad_norm": 0.7670673131942749, "learning_rate": 1.0306598484665237e-05, "loss": 1.3782, "mean_token_accuracy": 0.6530605256557465, "num_tokens": 1644987826.0, "step": 9811 }, { "entropy": 1.702736069758733, "epoch": 1.0778885501634121, "grad_norm": 0.6072533130645752, "learning_rate": 1.0305002125944815e-05, "loss": 1.3696, "mean_token_accuracy": 0.6654748469591141, "num_tokens": 1645161886.0, "step": 9812 }, { "entropy": 1.6799784203370411, "epoch": 1.077998407074785, "grad_norm": 0.6296765804290771, "learning_rate": 1.0303405789220762e-05, "loss": 1.2927, "mean_token_accuracy": 0.6687972942988077, "num_tokens": 1645290821.0, "step": 9813 }, { "entropy": 1.7050584852695465, "epoch": 1.078108263986158, "grad_norm": 0.6202853918075562, "learning_rate": 1.03018094745436e-05, "loss": 1.3272, "mean_token_accuracy": 0.6606413920720419, "num_tokens": 1645434124.0, "step": 9814 }, { "entropy": 1.6356126467386882, "epoch": 1.078218120897531, "grad_norm": 0.6204116344451904, "learning_rate": 1.0300213181963854e-05, "loss": 1.4115, "mean_token_accuracy": 0.661191796263059, "num_tokens": 1645655256.0, "step": 9815 }, { "entropy": 1.6903501550356548, "epoch": 1.0783279778089039, "grad_norm": 0.8098730444908142, "learning_rate": 1.0298616911532047e-05, "loss": 1.3938, "mean_token_accuracy": 0.6561371485392252, "num_tokens": 1645794366.0, "step": 9816 }, { "entropy": 1.7309677203496296, "epoch": 1.0784378347202768, "grad_norm": 0.769802451133728, "learning_rate": 1.0297020663298695e-05, "loss": 1.3183, "mean_token_accuracy": 0.654662013053894, "num_tokens": 1646020983.0, "step": 9817 }, { "entropy": 1.688058316707611, "epoch": 1.0785476916316499, "grad_norm": 0.6309769749641418, "learning_rate": 1.0295424437314326e-05, "loss": 1.5407, "mean_token_accuracy": 0.6352472951014837, "num_tokens": 1646262332.0, "step": 9818 }, { "entropy": 1.6809982061386108, "epoch": 1.0786575485430228, "grad_norm": 0.7366631031036377, "learning_rate": 1.0293828233629457e-05, "loss": 1.3507, "mean_token_accuracy": 0.6608734428882599, "num_tokens": 1646417726.0, "step": 9819 }, { "entropy": 1.7148310641447704, "epoch": 1.0787674054543956, "grad_norm": 2.1722970008850098, "learning_rate": 1.0292232052294603e-05, "loss": 1.3295, "mean_token_accuracy": 0.6597887873649597, "num_tokens": 1646608030.0, "step": 9820 }, { "entropy": 1.6972604592641194, "epoch": 1.0788772623657685, "grad_norm": 0.7574262619018555, "learning_rate": 1.0290635893360288e-05, "loss": 1.3516, "mean_token_accuracy": 0.6627415219942728, "num_tokens": 1646720566.0, "step": 9821 }, { "entropy": 1.7421917816003163, "epoch": 1.0789871192771416, "grad_norm": 0.6693733930587769, "learning_rate": 1.0289039756877026e-05, "loss": 1.5256, "mean_token_accuracy": 0.6440355281035105, "num_tokens": 1646877724.0, "step": 9822 }, { "entropy": 1.7440255184968312, "epoch": 1.0790969761885145, "grad_norm": 0.8541271686553955, "learning_rate": 1.0287443642895334e-05, "loss": 1.6604, "mean_token_accuracy": 0.6371288100878397, "num_tokens": 1647087449.0, "step": 9823 }, { "entropy": 1.6293854117393494, "epoch": 1.0792068330998874, "grad_norm": 0.678485095500946, "learning_rate": 1.0285847551465731e-05, "loss": 1.3659, "mean_token_accuracy": 0.6574168552954992, "num_tokens": 1647232759.0, "step": 9824 }, { "entropy": 1.6905154486497243, "epoch": 1.0793166900112603, "grad_norm": 0.6696950197219849, "learning_rate": 1.0284251482638731e-05, "loss": 1.3161, "mean_token_accuracy": 0.6695791979630789, "num_tokens": 1647386243.0, "step": 9825 }, { "entropy": 1.7025805910428364, "epoch": 1.0794265469226332, "grad_norm": 0.6888556480407715, "learning_rate": 1.028265543646485e-05, "loss": 1.3759, "mean_token_accuracy": 0.6593132664759954, "num_tokens": 1647595259.0, "step": 9826 }, { "entropy": 1.7074210743109386, "epoch": 1.0795364038340063, "grad_norm": 0.8074763417243958, "learning_rate": 1.02810594129946e-05, "loss": 1.4319, "mean_token_accuracy": 0.6571053018172582, "num_tokens": 1647787234.0, "step": 9827 }, { "entropy": 1.6747658252716064, "epoch": 1.0796462607453792, "grad_norm": 0.6728916168212891, "learning_rate": 1.0279463412278499e-05, "loss": 1.4022, "mean_token_accuracy": 0.6656891653935114, "num_tokens": 1647936897.0, "step": 9828 }, { "entropy": 1.6993980407714844, "epoch": 1.079756117656752, "grad_norm": 0.6834884881973267, "learning_rate": 1.0277867434367052e-05, "loss": 1.4363, "mean_token_accuracy": 0.6489211916923523, "num_tokens": 1648124511.0, "step": 9829 }, { "entropy": 1.6820717453956604, "epoch": 1.079865974568125, "grad_norm": 0.7455261945724487, "learning_rate": 1.0276271479310775e-05, "loss": 1.1957, "mean_token_accuracy": 0.6947454114754995, "num_tokens": 1648246242.0, "step": 9830 }, { "entropy": 1.7340157429377239, "epoch": 1.079975831479498, "grad_norm": 0.7091799974441528, "learning_rate": 1.0274675547160184e-05, "loss": 1.3353, "mean_token_accuracy": 0.6555547267198563, "num_tokens": 1648388192.0, "step": 9831 }, { "entropy": 1.7246295909086864, "epoch": 1.080085688390871, "grad_norm": 0.6388477683067322, "learning_rate": 1.0273079637965782e-05, "loss": 1.5495, "mean_token_accuracy": 0.6348314036925634, "num_tokens": 1648607042.0, "step": 9832 }, { "entropy": 1.7295263310273488, "epoch": 1.0801955453022438, "grad_norm": 0.7163142561912537, "learning_rate": 1.0271483751778082e-05, "loss": 1.3953, "mean_token_accuracy": 0.6609020779530207, "num_tokens": 1648770793.0, "step": 9833 }, { "entropy": 1.6535666485627492, "epoch": 1.0803054022136167, "grad_norm": 0.6457258462905884, "learning_rate": 1.0269887888647594e-05, "loss": 1.2601, "mean_token_accuracy": 0.6786867479483286, "num_tokens": 1648918233.0, "step": 9834 }, { "entropy": 1.6825013260046642, "epoch": 1.0804152591249898, "grad_norm": 0.6680422425270081, "learning_rate": 1.0268292048624825e-05, "loss": 1.4099, "mean_token_accuracy": 0.6632524182399114, "num_tokens": 1649110520.0, "step": 9835 }, { "entropy": 1.700180431207021, "epoch": 1.0805251160363627, "grad_norm": 0.6605114340782166, "learning_rate": 1.026669623176028e-05, "loss": 1.4498, "mean_token_accuracy": 0.6626182099183401, "num_tokens": 1649282053.0, "step": 9836 }, { "entropy": 1.6609856685002644, "epoch": 1.0806349729477356, "grad_norm": 0.7333995699882507, "learning_rate": 1.0265100438104474e-05, "loss": 1.2677, "mean_token_accuracy": 0.6766239404678345, "num_tokens": 1649457935.0, "step": 9837 }, { "entropy": 1.693364332119624, "epoch": 1.0807448298591085, "grad_norm": 0.7352896928787231, "learning_rate": 1.0263504667707904e-05, "loss": 1.4006, "mean_token_accuracy": 0.6677973767121633, "num_tokens": 1649597344.0, "step": 9838 }, { "entropy": 1.6948012510935466, "epoch": 1.0808546867704814, "grad_norm": 0.7008348107337952, "learning_rate": 1.026190892062108e-05, "loss": 1.404, "mean_token_accuracy": 0.6567817181348801, "num_tokens": 1649740110.0, "step": 9839 }, { "entropy": 1.7075625856717427, "epoch": 1.0809645436818545, "grad_norm": 0.752145528793335, "learning_rate": 1.0260313196894509e-05, "loss": 1.3352, "mean_token_accuracy": 0.6571847250064214, "num_tokens": 1649877492.0, "step": 9840 }, { "entropy": 1.6945746143658955, "epoch": 1.0810744005932273, "grad_norm": 0.9085291624069214, "learning_rate": 1.025871749657869e-05, "loss": 1.3813, "mean_token_accuracy": 0.6665191451708475, "num_tokens": 1650073878.0, "step": 9841 }, { "entropy": 1.7060600022474925, "epoch": 1.0811842575046002, "grad_norm": 0.8114275336265564, "learning_rate": 1.0257121819724125e-05, "loss": 1.3438, "mean_token_accuracy": 0.6567393392324448, "num_tokens": 1650206487.0, "step": 9842 }, { "entropy": 1.6965892314910889, "epoch": 1.0812941144159731, "grad_norm": 0.6386088728904724, "learning_rate": 1.0255526166381326e-05, "loss": 1.4076, "mean_token_accuracy": 0.6541461398204168, "num_tokens": 1650355875.0, "step": 9843 }, { "entropy": 1.6367349326610565, "epoch": 1.0814039713273462, "grad_norm": 0.6096007823944092, "learning_rate": 1.0253930536600785e-05, "loss": 1.3704, "mean_token_accuracy": 0.6580093254645666, "num_tokens": 1650560937.0, "step": 9844 }, { "entropy": 1.6642896234989166, "epoch": 1.081513828238719, "grad_norm": 0.7645293474197388, "learning_rate": 1.0252334930433005e-05, "loss": 1.294, "mean_token_accuracy": 0.6714354753494263, "num_tokens": 1650749246.0, "step": 9845 }, { "entropy": 1.7149154146512349, "epoch": 1.081623685150092, "grad_norm": 0.6272317171096802, "learning_rate": 1.0250739347928492e-05, "loss": 1.4153, "mean_token_accuracy": 0.6595138013362885, "num_tokens": 1650926377.0, "step": 9846 }, { "entropy": 1.6645729045073192, "epoch": 1.0817335420614649, "grad_norm": 0.7638152241706848, "learning_rate": 1.0249143789137736e-05, "loss": 1.3517, "mean_token_accuracy": 0.6574498365322748, "num_tokens": 1651131120.0, "step": 9847 }, { "entropy": 1.7273275057474773, "epoch": 1.081843398972838, "grad_norm": 0.8124344944953918, "learning_rate": 1.0247548254111242e-05, "loss": 1.3278, "mean_token_accuracy": 0.6669291456540426, "num_tokens": 1651296563.0, "step": 9848 }, { "entropy": 1.7501426339149475, "epoch": 1.0819532558842109, "grad_norm": 0.8257563710212708, "learning_rate": 1.0245952742899508e-05, "loss": 1.4294, "mean_token_accuracy": 0.6583471794923147, "num_tokens": 1651419353.0, "step": 9849 }, { "entropy": 1.7237468461195629, "epoch": 1.0820631127955838, "grad_norm": 0.6573739051818848, "learning_rate": 1.024435725555303e-05, "loss": 1.4584, "mean_token_accuracy": 0.6424074321985245, "num_tokens": 1651615401.0, "step": 9850 }, { "entropy": 1.738576332728068, "epoch": 1.0821729697069566, "grad_norm": 0.7192042469978333, "learning_rate": 1.0242761792122303e-05, "loss": 1.4456, "mean_token_accuracy": 0.6536912967761358, "num_tokens": 1651759046.0, "step": 9851 }, { "entropy": 1.6815000077088673, "epoch": 1.0822828266183295, "grad_norm": 0.6549572944641113, "learning_rate": 1.0241166352657825e-05, "loss": 1.4403, "mean_token_accuracy": 0.6523531973361969, "num_tokens": 1651935854.0, "step": 9852 }, { "entropy": 1.7222835222880046, "epoch": 1.0823926835297026, "grad_norm": 0.5829499363899231, "learning_rate": 1.023957093721009e-05, "loss": 1.3928, "mean_token_accuracy": 0.6541026532649994, "num_tokens": 1652165819.0, "step": 9853 }, { "entropy": 1.7308641870816548, "epoch": 1.0825025404410755, "grad_norm": 0.5812973380088806, "learning_rate": 1.023797554582959e-05, "loss": 1.3832, "mean_token_accuracy": 0.6509318649768829, "num_tokens": 1652345735.0, "step": 9854 }, { "entropy": 1.696856160958608, "epoch": 1.0826123973524484, "grad_norm": 0.6950253844261169, "learning_rate": 1.0236380178566825e-05, "loss": 1.3401, "mean_token_accuracy": 0.6734130581219991, "num_tokens": 1652491455.0, "step": 9855 }, { "entropy": 1.7205109894275665, "epoch": 1.0827222542638213, "grad_norm": 0.7813112139701843, "learning_rate": 1.023478483547228e-05, "loss": 1.2933, "mean_token_accuracy": 0.6657413095235825, "num_tokens": 1652678524.0, "step": 9856 }, { "entropy": 1.7302239338556926, "epoch": 1.0828321111751944, "grad_norm": 0.6658751368522644, "learning_rate": 1.0233189516596452e-05, "loss": 1.3485, "mean_token_accuracy": 0.6597934563954672, "num_tokens": 1652856846.0, "step": 9857 }, { "entropy": 1.7021392385164897, "epoch": 1.0829419680865673, "grad_norm": 0.6967145800590515, "learning_rate": 1.023159422198983e-05, "loss": 1.4445, "mean_token_accuracy": 0.6574411243200302, "num_tokens": 1653065215.0, "step": 9858 }, { "entropy": 1.7235744297504425, "epoch": 1.0830518249979402, "grad_norm": 0.6974611282348633, "learning_rate": 1.0229998951702902e-05, "loss": 1.3955, "mean_token_accuracy": 0.6510575066010157, "num_tokens": 1653238703.0, "step": 9859 }, { "entropy": 1.7360788782437642, "epoch": 1.083161681909313, "grad_norm": 0.6182504892349243, "learning_rate": 1.0228403705786165e-05, "loss": 1.3991, "mean_token_accuracy": 0.656227042277654, "num_tokens": 1653453495.0, "step": 9860 }, { "entropy": 1.7091187338034313, "epoch": 1.0832715388206862, "grad_norm": 0.6004095077514648, "learning_rate": 1.0226808484290097e-05, "loss": 1.5411, "mean_token_accuracy": 0.6317160924275717, "num_tokens": 1653719905.0, "step": 9861 }, { "entropy": 1.6987085143725078, "epoch": 1.083381395732059, "grad_norm": 0.6815454959869385, "learning_rate": 1.0225213287265194e-05, "loss": 1.4007, "mean_token_accuracy": 0.646802599231402, "num_tokens": 1653908379.0, "step": 9862 }, { "entropy": 1.755209634701411, "epoch": 1.083491252643432, "grad_norm": 0.7040994167327881, "learning_rate": 1.0223618114761947e-05, "loss": 1.4096, "mean_token_accuracy": 0.6451922804117203, "num_tokens": 1654083065.0, "step": 9863 }, { "entropy": 1.680431107680003, "epoch": 1.0836011095548048, "grad_norm": 0.7204059362411499, "learning_rate": 1.022202296683083e-05, "loss": 1.4142, "mean_token_accuracy": 0.6432947367429733, "num_tokens": 1654261728.0, "step": 9864 }, { "entropy": 1.7355269491672516, "epoch": 1.0837109664661777, "grad_norm": 0.8181194067001343, "learning_rate": 1.0220427843522338e-05, "loss": 1.3979, "mean_token_accuracy": 0.6544067362944285, "num_tokens": 1654389163.0, "step": 9865 }, { "entropy": 1.6419854164123535, "epoch": 1.0838208233775508, "grad_norm": 0.6498574018478394, "learning_rate": 1.0218832744886956e-05, "loss": 1.2833, "mean_token_accuracy": 0.6642761528491974, "num_tokens": 1654574592.0, "step": 9866 }, { "entropy": 1.6292428175608318, "epoch": 1.0839306802889237, "grad_norm": 0.6409704685211182, "learning_rate": 1.0217237670975158e-05, "loss": 1.3204, "mean_token_accuracy": 0.6685640662908554, "num_tokens": 1654727006.0, "step": 9867 }, { "entropy": 1.704335480928421, "epoch": 1.0840405372002966, "grad_norm": 0.9147383570671082, "learning_rate": 1.021564262183744e-05, "loss": 1.5267, "mean_token_accuracy": 0.6415324260791143, "num_tokens": 1654887057.0, "step": 9868 }, { "entropy": 1.737158477306366, "epoch": 1.0841503941116695, "grad_norm": 0.6425780057907104, "learning_rate": 1.0214047597524281e-05, "loss": 1.3627, "mean_token_accuracy": 0.6585712929566702, "num_tokens": 1655026529.0, "step": 9869 }, { "entropy": 1.6733458836873372, "epoch": 1.0842602510230426, "grad_norm": 0.65185546875, "learning_rate": 1.021245259808616e-05, "loss": 1.5595, "mean_token_accuracy": 0.6183687796195348, "num_tokens": 1655310252.0, "step": 9870 }, { "entropy": 1.7314467032750447, "epoch": 1.0843701079344155, "grad_norm": 0.6148692965507507, "learning_rate": 1.0210857623573558e-05, "loss": 1.5472, "mean_token_accuracy": 0.623336007197698, "num_tokens": 1655485693.0, "step": 9871 }, { "entropy": 1.7008031606674194, "epoch": 1.0844799648457883, "grad_norm": 0.6279149651527405, "learning_rate": 1.0209262674036961e-05, "loss": 1.3351, "mean_token_accuracy": 0.6545123358567556, "num_tokens": 1655652876.0, "step": 9872 }, { "entropy": 1.7076662480831146, "epoch": 1.0845898217571612, "grad_norm": 0.7002870440483093, "learning_rate": 1.0207667749526838e-05, "loss": 1.5737, "mean_token_accuracy": 0.6307255576054255, "num_tokens": 1655813676.0, "step": 9873 }, { "entropy": 1.6783838669459026, "epoch": 1.0846996786685343, "grad_norm": 0.6915937662124634, "learning_rate": 1.0206072850093676e-05, "loss": 1.3763, "mean_token_accuracy": 0.6629201124111811, "num_tokens": 1655992944.0, "step": 9874 }, { "entropy": 1.6777593195438385, "epoch": 1.0848095355799072, "grad_norm": 0.8328781723976135, "learning_rate": 1.0204477975787955e-05, "loss": 1.5274, "mean_token_accuracy": 0.6462227056423823, "num_tokens": 1656134359.0, "step": 9875 }, { "entropy": 1.6952139933904011, "epoch": 1.08491939249128, "grad_norm": 0.6985744833946228, "learning_rate": 1.0202883126660142e-05, "loss": 1.4285, "mean_token_accuracy": 0.641810322801272, "num_tokens": 1656339761.0, "step": 9876 }, { "entropy": 1.6837367415428162, "epoch": 1.085029249402653, "grad_norm": 0.6384702324867249, "learning_rate": 1.020128830276072e-05, "loss": 1.4694, "mean_token_accuracy": 0.645171602567037, "num_tokens": 1656548333.0, "step": 9877 }, { "entropy": 1.650167852640152, "epoch": 1.0851391063140259, "grad_norm": 0.6957221627235413, "learning_rate": 1.0199693504140165e-05, "loss": 1.3905, "mean_token_accuracy": 0.6591930339733759, "num_tokens": 1656743652.0, "step": 9878 }, { "entropy": 1.6500220100084941, "epoch": 1.085248963225399, "grad_norm": 0.6716198325157166, "learning_rate": 1.0198098730848947e-05, "loss": 1.4271, "mean_token_accuracy": 0.6517676363388697, "num_tokens": 1656927088.0, "step": 9879 }, { "entropy": 1.7115015387535095, "epoch": 1.0853588201367719, "grad_norm": 0.6852779388427734, "learning_rate": 1.0196503982937545e-05, "loss": 1.3624, "mean_token_accuracy": 0.6609803885221481, "num_tokens": 1657077951.0, "step": 9880 }, { "entropy": 1.6878548562526703, "epoch": 1.0854686770481448, "grad_norm": 0.6171632409095764, "learning_rate": 1.0194909260456428e-05, "loss": 1.2938, "mean_token_accuracy": 0.6820201476414999, "num_tokens": 1657237315.0, "step": 9881 }, { "entropy": 1.6748429437478383, "epoch": 1.0855785339595176, "grad_norm": 0.6681820154190063, "learning_rate": 1.0193314563456074e-05, "loss": 1.3424, "mean_token_accuracy": 0.6564933856328329, "num_tokens": 1657386937.0, "step": 9882 }, { "entropy": 1.7561264435450237, "epoch": 1.0856883908708908, "grad_norm": 0.8080701231956482, "learning_rate": 1.0191719891986947e-05, "loss": 1.452, "mean_token_accuracy": 0.6485906491676966, "num_tokens": 1657566975.0, "step": 9883 }, { "entropy": 1.7150229513645172, "epoch": 1.0857982477822636, "grad_norm": 0.6068223714828491, "learning_rate": 1.0190125246099525e-05, "loss": 1.1525, "mean_token_accuracy": 0.6776071439186732, "num_tokens": 1657768813.0, "step": 9884 }, { "entropy": 1.7351139684518178, "epoch": 1.0859081046936365, "grad_norm": 0.706877589225769, "learning_rate": 1.0188530625844269e-05, "loss": 1.372, "mean_token_accuracy": 0.6516173481941223, "num_tokens": 1657909155.0, "step": 9885 }, { "entropy": 1.7090483804543812, "epoch": 1.0860179616050094, "grad_norm": 0.7106319665908813, "learning_rate": 1.0186936031271654e-05, "loss": 1.3158, "mean_token_accuracy": 0.6649338553349177, "num_tokens": 1658033454.0, "step": 9886 }, { "entropy": 1.7353846828142803, "epoch": 1.0861278185163825, "grad_norm": 0.7911872267723083, "learning_rate": 1.0185341462432152e-05, "loss": 1.3776, "mean_token_accuracy": 0.6581158141295115, "num_tokens": 1658185568.0, "step": 9887 }, { "entropy": 1.693009227514267, "epoch": 1.0862376754277554, "grad_norm": 0.7232357859611511, "learning_rate": 1.018374691937622e-05, "loss": 1.3956, "mean_token_accuracy": 0.6537212679783503, "num_tokens": 1658345153.0, "step": 9888 }, { "entropy": 1.6881878475348155, "epoch": 1.0863475323391283, "grad_norm": 0.6509016156196594, "learning_rate": 1.0182152402154332e-05, "loss": 1.4972, "mean_token_accuracy": 0.6434793770313263, "num_tokens": 1658541107.0, "step": 9889 }, { "entropy": 1.7099956174691517, "epoch": 1.0864573892505012, "grad_norm": 0.7336589694023132, "learning_rate": 1.0180557910816955e-05, "loss": 1.4108, "mean_token_accuracy": 0.6635782122612, "num_tokens": 1658686363.0, "step": 9890 }, { "entropy": 1.6696670254071553, "epoch": 1.086567246161874, "grad_norm": 0.7499677538871765, "learning_rate": 1.0178963445414546e-05, "loss": 1.297, "mean_token_accuracy": 0.6625167379776636, "num_tokens": 1658800203.0, "step": 9891 }, { "entropy": 1.7122901181379955, "epoch": 1.0866771030732472, "grad_norm": 0.7370545864105225, "learning_rate": 1.0177369005997576e-05, "loss": 1.4498, "mean_token_accuracy": 0.6478169759114584, "num_tokens": 1658963986.0, "step": 9892 }, { "entropy": 1.7717590828736622, "epoch": 1.08678695998462, "grad_norm": 0.72324138879776, "learning_rate": 1.0175774592616509e-05, "loss": 1.487, "mean_token_accuracy": 0.6404697100321451, "num_tokens": 1659147012.0, "step": 9893 }, { "entropy": 1.731563498576482, "epoch": 1.086896816895993, "grad_norm": 0.7684550881385803, "learning_rate": 1.0174180205321801e-05, "loss": 1.3065, "mean_token_accuracy": 0.675625761349996, "num_tokens": 1659280859.0, "step": 9894 }, { "entropy": 1.7210414310296376, "epoch": 1.0870066738073658, "grad_norm": 0.7703231573104858, "learning_rate": 1.017258584416392e-05, "loss": 1.3729, "mean_token_accuracy": 0.6620122243960699, "num_tokens": 1659426959.0, "step": 9895 }, { "entropy": 1.713478038708369, "epoch": 1.087116530718739, "grad_norm": 0.9372931718826294, "learning_rate": 1.0170991509193324e-05, "loss": 1.3353, "mean_token_accuracy": 0.6640975425640742, "num_tokens": 1659591324.0, "step": 9896 }, { "entropy": 1.6873964667320251, "epoch": 1.0872263876301118, "grad_norm": 0.5912502408027649, "learning_rate": 1.0169397200460469e-05, "loss": 1.3739, "mean_token_accuracy": 0.6547368913888931, "num_tokens": 1659753168.0, "step": 9897 }, { "entropy": 1.67390971382459, "epoch": 1.0873362445414847, "grad_norm": 0.6598351001739502, "learning_rate": 1.0167802918015821e-05, "loss": 1.2891, "mean_token_accuracy": 0.672528882821401, "num_tokens": 1659897374.0, "step": 9898 }, { "entropy": 1.6686325172583263, "epoch": 1.0874461014528576, "grad_norm": 0.7137023210525513, "learning_rate": 1.0166208661909837e-05, "loss": 1.2901, "mean_token_accuracy": 0.6805033435424169, "num_tokens": 1660054636.0, "step": 9899 }, { "entropy": 1.7360956966876984, "epoch": 1.0875559583642307, "grad_norm": 0.7920895218849182, "learning_rate": 1.0164614432192973e-05, "loss": 1.5854, "mean_token_accuracy": 0.6431082089742025, "num_tokens": 1660269145.0, "step": 9900 }, { "entropy": 1.7179748117923737, "epoch": 1.0876658152756036, "grad_norm": 0.738042414188385, "learning_rate": 1.0163020228915686e-05, "loss": 1.4252, "mean_token_accuracy": 0.6560932546854019, "num_tokens": 1660431180.0, "step": 9901 }, { "entropy": 1.7385500172773998, "epoch": 1.0877756721869765, "grad_norm": 0.6019150018692017, "learning_rate": 1.0161426052128432e-05, "loss": 1.4104, "mean_token_accuracy": 0.6502055029074351, "num_tokens": 1660601241.0, "step": 9902 }, { "entropy": 1.6881616115570068, "epoch": 1.0878855290983493, "grad_norm": 0.7434528470039368, "learning_rate": 1.0159831901881663e-05, "loss": 1.2115, "mean_token_accuracy": 0.6797519276539484, "num_tokens": 1660764313.0, "step": 9903 }, { "entropy": 1.7372826635837555, "epoch": 1.0879953860097222, "grad_norm": 0.7365524172782898, "learning_rate": 1.0158237778225835e-05, "loss": 1.515, "mean_token_accuracy": 0.6425711264212927, "num_tokens": 1660964668.0, "step": 9904 }, { "entropy": 1.7024028201897938, "epoch": 1.0881052429210953, "grad_norm": 0.8199495077133179, "learning_rate": 1.0156643681211404e-05, "loss": 1.3366, "mean_token_accuracy": 0.6539936810731888, "num_tokens": 1661112643.0, "step": 9905 }, { "entropy": 1.6678318579991658, "epoch": 1.0882150998324682, "grad_norm": 0.816861629486084, "learning_rate": 1.0155049610888823e-05, "loss": 1.2508, "mean_token_accuracy": 0.6761003037293752, "num_tokens": 1661236856.0, "step": 9906 }, { "entropy": 1.6942639748255413, "epoch": 1.088324956743841, "grad_norm": 0.7153278589248657, "learning_rate": 1.0153455567308537e-05, "loss": 1.4531, "mean_token_accuracy": 0.6470590929190317, "num_tokens": 1661388508.0, "step": 9907 }, { "entropy": 1.7340314586957295, "epoch": 1.088434813655214, "grad_norm": 0.6582464575767517, "learning_rate": 1.0151861550521006e-05, "loss": 1.5217, "mean_token_accuracy": 0.6363318214813868, "num_tokens": 1661604834.0, "step": 9908 }, { "entropy": 1.7097918391227722, "epoch": 1.088544670566587, "grad_norm": 0.6414450407028198, "learning_rate": 1.0150267560576667e-05, "loss": 1.5432, "mean_token_accuracy": 0.6336255719264349, "num_tokens": 1661801434.0, "step": 9909 }, { "entropy": 1.7380212744077046, "epoch": 1.08865452747796, "grad_norm": 0.7711119055747986, "learning_rate": 1.014867359752598e-05, "loss": 1.35, "mean_token_accuracy": 0.6672601054112116, "num_tokens": 1661937596.0, "step": 9910 }, { "entropy": 1.678007831176122, "epoch": 1.0887643843893329, "grad_norm": 0.8995655179023743, "learning_rate": 1.0147079661419393e-05, "loss": 1.4534, "mean_token_accuracy": 0.6521992137034734, "num_tokens": 1662131802.0, "step": 9911 }, { "entropy": 1.7503215471903484, "epoch": 1.0888742413007058, "grad_norm": 0.7043768167495728, "learning_rate": 1.0145485752307347e-05, "loss": 1.3609, "mean_token_accuracy": 0.6602404067913691, "num_tokens": 1662303751.0, "step": 9912 }, { "entropy": 1.7225368320941925, "epoch": 1.0889840982120789, "grad_norm": 0.6886836290359497, "learning_rate": 1.0143891870240293e-05, "loss": 1.5237, "mean_token_accuracy": 0.6407229552666346, "num_tokens": 1662476819.0, "step": 9913 }, { "entropy": 1.7340431312719982, "epoch": 1.0890939551234518, "grad_norm": 0.7423052787780762, "learning_rate": 1.0142298015268678e-05, "loss": 1.3407, "mean_token_accuracy": 0.6658698171377182, "num_tokens": 1662668061.0, "step": 9914 }, { "entropy": 1.6915172338485718, "epoch": 1.0892038120348246, "grad_norm": 0.640897274017334, "learning_rate": 1.0140704187442942e-05, "loss": 1.3072, "mean_token_accuracy": 0.6653468410174052, "num_tokens": 1662812165.0, "step": 9915 }, { "entropy": 1.754497468471527, "epoch": 1.0893136689461975, "grad_norm": 0.7400673627853394, "learning_rate": 1.0139110386813528e-05, "loss": 1.4021, "mean_token_accuracy": 0.6460580776135126, "num_tokens": 1662991292.0, "step": 9916 }, { "entropy": 1.70304274559021, "epoch": 1.0894235258575704, "grad_norm": 0.7350078821182251, "learning_rate": 1.0137516613430887e-05, "loss": 1.3661, "mean_token_accuracy": 0.6609525481859843, "num_tokens": 1663118044.0, "step": 9917 }, { "entropy": 1.7494067947069805, "epoch": 1.0895333827689435, "grad_norm": 0.81744784116745, "learning_rate": 1.0135922867345455e-05, "loss": 1.5288, "mean_token_accuracy": 0.6569081693887711, "num_tokens": 1663262827.0, "step": 9918 }, { "entropy": 1.7277030646800995, "epoch": 1.0896432396803164, "grad_norm": 0.9427797794342041, "learning_rate": 1.0134329148607675e-05, "loss": 1.4552, "mean_token_accuracy": 0.6570529192686081, "num_tokens": 1663396238.0, "step": 9919 }, { "entropy": 1.661819577217102, "epoch": 1.0897530965916893, "grad_norm": 0.7879918217658997, "learning_rate": 1.0132735457267988e-05, "loss": 1.3381, "mean_token_accuracy": 0.6635206490755081, "num_tokens": 1663526020.0, "step": 9920 }, { "entropy": 1.6900553206602733, "epoch": 1.0898629535030622, "grad_norm": 0.6344413161277771, "learning_rate": 1.0131141793376833e-05, "loss": 1.3869, "mean_token_accuracy": 0.6595876067876816, "num_tokens": 1663719329.0, "step": 9921 }, { "entropy": 1.7128386199474335, "epoch": 1.0899728104144353, "grad_norm": 0.658137321472168, "learning_rate": 1.012954815698465e-05, "loss": 1.5023, "mean_token_accuracy": 0.6380962332089742, "num_tokens": 1663912510.0, "step": 9922 }, { "entropy": 1.664399077494939, "epoch": 1.0900826673258082, "grad_norm": 0.7193596363067627, "learning_rate": 1.0127954548141872e-05, "loss": 1.4288, "mean_token_accuracy": 0.6621369272470474, "num_tokens": 1664042226.0, "step": 9923 }, { "entropy": 1.6875610550244649, "epoch": 1.090192524237181, "grad_norm": 0.6304190158843994, "learning_rate": 1.012636096689894e-05, "loss": 1.3007, "mean_token_accuracy": 0.658969427148501, "num_tokens": 1664197536.0, "step": 9924 }, { "entropy": 1.6750660041968028, "epoch": 1.090302381148554, "grad_norm": 0.6103596091270447, "learning_rate": 1.0124767413306294e-05, "loss": 1.5455, "mean_token_accuracy": 0.636797179778417, "num_tokens": 1664372063.0, "step": 9925 }, { "entropy": 1.7024609645207722, "epoch": 1.090412238059927, "grad_norm": 0.7331560850143433, "learning_rate": 1.0123173887414361e-05, "loss": 1.2627, "mean_token_accuracy": 0.6728994299968084, "num_tokens": 1664500629.0, "step": 9926 }, { "entropy": 1.7319872776667278, "epoch": 1.0905220949713, "grad_norm": 0.6502282619476318, "learning_rate": 1.012158038927358e-05, "loss": 1.3303, "mean_token_accuracy": 0.6671723872423172, "num_tokens": 1664704233.0, "step": 9927 }, { "entropy": 1.7720895409584045, "epoch": 1.0906319518826728, "grad_norm": 0.8043599128723145, "learning_rate": 1.0119986918934386e-05, "loss": 1.446, "mean_token_accuracy": 0.6564847181240717, "num_tokens": 1664856266.0, "step": 9928 }, { "entropy": 1.7761450012524922, "epoch": 1.0907418087940457, "grad_norm": 0.6992666721343994, "learning_rate": 1.0118393476447204e-05, "loss": 1.3832, "mean_token_accuracy": 0.6636711110671362, "num_tokens": 1665006546.0, "step": 9929 }, { "entropy": 1.6626348197460175, "epoch": 1.0908516657054186, "grad_norm": 0.6073324680328369, "learning_rate": 1.0116800061862475e-05, "loss": 1.2507, "mean_token_accuracy": 0.67312224706014, "num_tokens": 1665181716.0, "step": 9930 }, { "entropy": 1.6563841303189595, "epoch": 1.0909615226167917, "grad_norm": 0.6241437196731567, "learning_rate": 1.0115206675230626e-05, "loss": 1.3984, "mean_token_accuracy": 0.6542405039072037, "num_tokens": 1665356676.0, "step": 9931 }, { "entropy": 1.689384828011195, "epoch": 1.0910713795281646, "grad_norm": 0.7169914245605469, "learning_rate": 1.011361331660209e-05, "loss": 1.3182, "mean_token_accuracy": 0.6685070743163427, "num_tokens": 1665542132.0, "step": 9932 }, { "entropy": 1.7403077880541484, "epoch": 1.0911812364395375, "grad_norm": 0.6693525910377502, "learning_rate": 1.0112019986027289e-05, "loss": 1.5033, "mean_token_accuracy": 0.6436150471369425, "num_tokens": 1665764372.0, "step": 9933 }, { "entropy": 1.7170814077059429, "epoch": 1.0912910933509103, "grad_norm": 0.6054666638374329, "learning_rate": 1.0110426683556657e-05, "loss": 1.3651, "mean_token_accuracy": 0.6551655034224192, "num_tokens": 1665966456.0, "step": 9934 }, { "entropy": 1.7085198163986206, "epoch": 1.0914009502622835, "grad_norm": 0.6800384521484375, "learning_rate": 1.0108833409240617e-05, "loss": 1.36, "mean_token_accuracy": 0.6609861155351003, "num_tokens": 1666121414.0, "step": 9935 }, { "entropy": 1.7367458045482635, "epoch": 1.0915108071736563, "grad_norm": 0.5863933563232422, "learning_rate": 1.0107240163129599e-05, "loss": 1.4401, "mean_token_accuracy": 0.6531588186820348, "num_tokens": 1666295279.0, "step": 9936 }, { "entropy": 1.7235424220561981, "epoch": 1.0916206640850292, "grad_norm": 0.7675713896751404, "learning_rate": 1.010564694527403e-05, "loss": 1.5024, "mean_token_accuracy": 0.6675131072600683, "num_tokens": 1666457168.0, "step": 9937 }, { "entropy": 1.682725340127945, "epoch": 1.091730520996402, "grad_norm": 0.7586541175842285, "learning_rate": 1.0104053755724332e-05, "loss": 1.3828, "mean_token_accuracy": 0.6533033003409704, "num_tokens": 1666667783.0, "step": 9938 }, { "entropy": 1.736223300298055, "epoch": 1.0918403779077752, "grad_norm": 0.7098135948181152, "learning_rate": 1.0102460594530926e-05, "loss": 1.3948, "mean_token_accuracy": 0.6576603204011917, "num_tokens": 1666801846.0, "step": 9939 }, { "entropy": 1.7084941665331523, "epoch": 1.091950234819148, "grad_norm": 0.724420964717865, "learning_rate": 1.0100867461744241e-05, "loss": 1.4758, "mean_token_accuracy": 0.646695002913475, "num_tokens": 1666982440.0, "step": 9940 }, { "entropy": 1.74443985025088, "epoch": 1.092060091730521, "grad_norm": 0.7071523666381836, "learning_rate": 1.0099274357414692e-05, "loss": 1.4043, "mean_token_accuracy": 0.6590453336636225, "num_tokens": 1667133865.0, "step": 9941 }, { "entropy": 1.6637418170770009, "epoch": 1.0921699486418939, "grad_norm": 0.591380774974823, "learning_rate": 1.0097681281592706e-05, "loss": 1.3282, "mean_token_accuracy": 0.6629678010940552, "num_tokens": 1667279421.0, "step": 9942 }, { "entropy": 1.6820252339045207, "epoch": 1.0922798055532668, "grad_norm": 0.6717654466629028, "learning_rate": 1.0096088234328702e-05, "loss": 1.4755, "mean_token_accuracy": 0.6481594145298004, "num_tokens": 1667473211.0, "step": 9943 }, { "entropy": 1.709025154511134, "epoch": 1.0923896624646399, "grad_norm": 0.6753855347633362, "learning_rate": 1.0094495215673097e-05, "loss": 1.2966, "mean_token_accuracy": 0.667145162820816, "num_tokens": 1667604956.0, "step": 9944 }, { "entropy": 1.625400871038437, "epoch": 1.0924995193760128, "grad_norm": 0.64048832654953, "learning_rate": 1.009290222567631e-05, "loss": 1.3972, "mean_token_accuracy": 0.660579577088356, "num_tokens": 1667823844.0, "step": 9945 }, { "entropy": 1.7338625093301137, "epoch": 1.0926093762873856, "grad_norm": 0.7985219359397888, "learning_rate": 1.009130926438876e-05, "loss": 1.6674, "mean_token_accuracy": 0.6493135193983713, "num_tokens": 1668007284.0, "step": 9946 }, { "entropy": 1.6951390206813812, "epoch": 1.0927192331987585, "grad_norm": 0.683193027973175, "learning_rate": 1.008971633186086e-05, "loss": 1.2785, "mean_token_accuracy": 0.6708967983722687, "num_tokens": 1668145759.0, "step": 9947 }, { "entropy": 1.6314593156178792, "epoch": 1.0928290901101316, "grad_norm": 0.7132555842399597, "learning_rate": 1.0088123428143029e-05, "loss": 1.3441, "mean_token_accuracy": 0.681462566057841, "num_tokens": 1668277008.0, "step": 9948 }, { "entropy": 1.7529467344284058, "epoch": 1.0929389470215045, "grad_norm": 0.670924186706543, "learning_rate": 1.008653055328568e-05, "loss": 1.429, "mean_token_accuracy": 0.641497532526652, "num_tokens": 1668483054.0, "step": 9949 }, { "entropy": 1.6520490248998005, "epoch": 1.0930488039328774, "grad_norm": 0.8519325256347656, "learning_rate": 1.0084937707339229e-05, "loss": 1.4219, "mean_token_accuracy": 0.6672419607639313, "num_tokens": 1668700174.0, "step": 9950 }, { "entropy": 1.736632893482844, "epoch": 1.0931586608442503, "grad_norm": 0.7080869674682617, "learning_rate": 1.0083344890354086e-05, "loss": 1.4226, "mean_token_accuracy": 0.6710045486688614, "num_tokens": 1668855553.0, "step": 9951 }, { "entropy": 1.742478887240092, "epoch": 1.0932685177556234, "grad_norm": 0.6985324025154114, "learning_rate": 1.0081752102380667e-05, "loss": 1.3687, "mean_token_accuracy": 0.6526035120089849, "num_tokens": 1669025165.0, "step": 9952 }, { "entropy": 1.7326435049374898, "epoch": 1.0933783746669963, "grad_norm": 0.6467759609222412, "learning_rate": 1.0080159343469373e-05, "loss": 1.3327, "mean_token_accuracy": 0.6626055538654327, "num_tokens": 1669163361.0, "step": 9953 }, { "entropy": 1.6840360065301259, "epoch": 1.0934882315783692, "grad_norm": 0.6494070291519165, "learning_rate": 1.0078566613670626e-05, "loss": 1.4666, "mean_token_accuracy": 0.6533608982960383, "num_tokens": 1669347018.0, "step": 9954 }, { "entropy": 1.717919021844864, "epoch": 1.093598088489742, "grad_norm": 0.6406670808792114, "learning_rate": 1.0076973913034833e-05, "loss": 1.3224, "mean_token_accuracy": 0.6631946166356405, "num_tokens": 1669490134.0, "step": 9955 }, { "entropy": 1.7073165476322174, "epoch": 1.093707945401115, "grad_norm": 0.7670049667358398, "learning_rate": 1.0075381241612396e-05, "loss": 1.3305, "mean_token_accuracy": 0.6583481182654699, "num_tokens": 1669620317.0, "step": 9956 }, { "entropy": 1.7269740998744965, "epoch": 1.093817802312488, "grad_norm": 0.8087154626846313, "learning_rate": 1.0073788599453727e-05, "loss": 1.3544, "mean_token_accuracy": 0.6522675156593323, "num_tokens": 1669805743.0, "step": 9957 }, { "entropy": 1.7226569155852, "epoch": 1.093927659223861, "grad_norm": 0.6575363874435425, "learning_rate": 1.0072195986609235e-05, "loss": 1.6043, "mean_token_accuracy": 0.6334054693579674, "num_tokens": 1670020161.0, "step": 9958 }, { "entropy": 1.7135057151317596, "epoch": 1.0940375161352338, "grad_norm": 0.6211276054382324, "learning_rate": 1.0070603403129315e-05, "loss": 1.4269, "mean_token_accuracy": 0.6416071703036627, "num_tokens": 1670240465.0, "step": 9959 }, { "entropy": 1.725637008746465, "epoch": 1.0941473730466067, "grad_norm": 0.7707021236419678, "learning_rate": 1.0069010849064382e-05, "loss": 1.3579, "mean_token_accuracy": 0.6549892872571945, "num_tokens": 1670366147.0, "step": 9960 }, { "entropy": 1.7043171326319377, "epoch": 1.0942572299579798, "grad_norm": 0.7570623755455017, "learning_rate": 1.0067418324464838e-05, "loss": 1.3259, "mean_token_accuracy": 0.6776840935150782, "num_tokens": 1670481710.0, "step": 9961 }, { "entropy": 1.7652468581994374, "epoch": 1.0943670868693527, "grad_norm": 0.6389201879501343, "learning_rate": 1.0065825829381082e-05, "loss": 1.5209, "mean_token_accuracy": 0.6202053825060526, "num_tokens": 1670709151.0, "step": 9962 }, { "entropy": 1.7265647252400715, "epoch": 1.0944769437807256, "grad_norm": 0.5859116911888123, "learning_rate": 1.0064233363863519e-05, "loss": 1.4948, "mean_token_accuracy": 0.6309501181046168, "num_tokens": 1670961566.0, "step": 9963 }, { "entropy": 1.7115404605865479, "epoch": 1.0945868006920985, "grad_norm": 0.6820839047431946, "learning_rate": 1.0062640927962546e-05, "loss": 1.3777, "mean_token_accuracy": 0.6574893345435461, "num_tokens": 1671153616.0, "step": 9964 }, { "entropy": 1.6882510085900624, "epoch": 1.0946966576034716, "grad_norm": 0.6248074769973755, "learning_rate": 1.0061048521728565e-05, "loss": 1.4502, "mean_token_accuracy": 0.655212844411532, "num_tokens": 1671336660.0, "step": 9965 }, { "entropy": 1.7038983503977458, "epoch": 1.0948065145148445, "grad_norm": 0.6728511452674866, "learning_rate": 1.0059456145211976e-05, "loss": 1.3841, "mean_token_accuracy": 0.6533484607934952, "num_tokens": 1671508735.0, "step": 9966 }, { "entropy": 1.6372570097446442, "epoch": 1.0949163714262173, "grad_norm": 0.7651037573814392, "learning_rate": 1.0057863798463178e-05, "loss": 1.436, "mean_token_accuracy": 0.6575490534305573, "num_tokens": 1671716110.0, "step": 9967 }, { "entropy": 1.7102164427439372, "epoch": 1.0950262283375902, "grad_norm": 1.415974736213684, "learning_rate": 1.0056271481532565e-05, "loss": 1.4185, "mean_token_accuracy": 0.6585031648476919, "num_tokens": 1671875078.0, "step": 9968 }, { "entropy": 1.7216593126455944, "epoch": 1.095136085248963, "grad_norm": 0.6866213083267212, "learning_rate": 1.0054679194470533e-05, "loss": 1.2383, "mean_token_accuracy": 0.6811109681924185, "num_tokens": 1672000646.0, "step": 9969 }, { "entropy": 1.666219154993693, "epoch": 1.0952459421603362, "grad_norm": 0.7179189324378967, "learning_rate": 1.0053086937327481e-05, "loss": 1.4343, "mean_token_accuracy": 0.6527023464441299, "num_tokens": 1672171592.0, "step": 9970 }, { "entropy": 1.707016150156657, "epoch": 1.095355799071709, "grad_norm": 0.6981037855148315, "learning_rate": 1.0051494710153797e-05, "loss": 1.5801, "mean_token_accuracy": 0.6489623288313547, "num_tokens": 1672358507.0, "step": 9971 }, { "entropy": 1.7377264102300007, "epoch": 1.095465655983082, "grad_norm": 0.7055451273918152, "learning_rate": 1.004990251299988e-05, "loss": 1.5114, "mean_token_accuracy": 0.6383561591307322, "num_tokens": 1672529837.0, "step": 9972 }, { "entropy": 1.708618571360906, "epoch": 1.0955755128944549, "grad_norm": 0.7005475163459778, "learning_rate": 1.0048310345916123e-05, "loss": 1.3085, "mean_token_accuracy": 0.6701053728659948, "num_tokens": 1672715868.0, "step": 9973 }, { "entropy": 1.7293658057848613, "epoch": 1.095685369805828, "grad_norm": 0.7964652180671692, "learning_rate": 1.0046718208952912e-05, "loss": 1.5353, "mean_token_accuracy": 0.6405654648939768, "num_tokens": 1672895461.0, "step": 9974 }, { "entropy": 1.6681481798489888, "epoch": 1.0957952267172009, "grad_norm": 0.6238622069358826, "learning_rate": 1.0045126102160641e-05, "loss": 1.4342, "mean_token_accuracy": 0.6553277472654978, "num_tokens": 1673112425.0, "step": 9975 }, { "entropy": 1.700081080198288, "epoch": 1.0959050836285738, "grad_norm": 0.6713470816612244, "learning_rate": 1.0043534025589702e-05, "loss": 1.3626, "mean_token_accuracy": 0.6709864139556885, "num_tokens": 1673262686.0, "step": 9976 }, { "entropy": 1.6879205107688904, "epoch": 1.0960149405399466, "grad_norm": 0.6403784155845642, "learning_rate": 1.004194197929047e-05, "loss": 1.4501, "mean_token_accuracy": 0.637953132390976, "num_tokens": 1673474045.0, "step": 9977 }, { "entropy": 1.658721258242925, "epoch": 1.0961247974513197, "grad_norm": 0.6213784217834473, "learning_rate": 1.004034996331335e-05, "loss": 1.3654, "mean_token_accuracy": 0.6668369323015213, "num_tokens": 1673639563.0, "step": 9978 }, { "entropy": 1.7311672468980153, "epoch": 1.0962346543626926, "grad_norm": 0.6326048374176025, "learning_rate": 1.0038757977708722e-05, "loss": 1.4684, "mean_token_accuracy": 0.6399320314327875, "num_tokens": 1673825028.0, "step": 9979 }, { "entropy": 1.6954893171787262, "epoch": 1.0963445112740655, "grad_norm": 0.9208673238754272, "learning_rate": 1.003716602252697e-05, "loss": 1.2998, "mean_token_accuracy": 0.6691089371840159, "num_tokens": 1673951048.0, "step": 9980 }, { "entropy": 1.7091851830482483, "epoch": 1.0964543681854384, "grad_norm": 63.81305694580078, "learning_rate": 1.0035574097818478e-05, "loss": 1.4792, "mean_token_accuracy": 0.6564100285371145, "num_tokens": 1674122349.0, "step": 9981 }, { "entropy": 1.686219314734141, "epoch": 1.0965642250968115, "grad_norm": 0.6443544030189514, "learning_rate": 1.0033982203633632e-05, "loss": 1.3514, "mean_token_accuracy": 0.6670923282702764, "num_tokens": 1674297304.0, "step": 9982 }, { "entropy": 1.7111739615599315, "epoch": 1.0966740820081844, "grad_norm": 0.7431286573410034, "learning_rate": 1.0032390340022813e-05, "loss": 1.4799, "mean_token_accuracy": 0.6537399043639501, "num_tokens": 1674461127.0, "step": 9983 }, { "entropy": 1.71789946158727, "epoch": 1.0967839389195573, "grad_norm": 0.683925449848175, "learning_rate": 1.0030798507036408e-05, "loss": 1.4389, "mean_token_accuracy": 0.6607331385215124, "num_tokens": 1674594551.0, "step": 9984 }, { "entropy": 1.6557001272837322, "epoch": 1.0968937958309302, "grad_norm": 0.6890281438827515, "learning_rate": 1.0029206704724787e-05, "loss": 1.3217, "mean_token_accuracy": 0.6550944646199545, "num_tokens": 1674795787.0, "step": 9985 }, { "entropy": 1.6847777664661407, "epoch": 1.097003652742303, "grad_norm": 0.6635385751724243, "learning_rate": 1.002761493313834e-05, "loss": 1.3433, "mean_token_accuracy": 0.6774813532829285, "num_tokens": 1674947703.0, "step": 9986 }, { "entropy": 1.7151075502236683, "epoch": 1.0971135096536762, "grad_norm": 0.7632783055305481, "learning_rate": 1.0026023192327441e-05, "loss": 1.4479, "mean_token_accuracy": 0.6485897650321325, "num_tokens": 1675092387.0, "step": 9987 }, { "entropy": 1.646423081556956, "epoch": 1.097223366565049, "grad_norm": 0.7513181567192078, "learning_rate": 1.0024431482342471e-05, "loss": 1.31, "mean_token_accuracy": 0.6654202590386072, "num_tokens": 1675232012.0, "step": 9988 }, { "entropy": 1.718563437461853, "epoch": 1.097333223476422, "grad_norm": 0.621102511882782, "learning_rate": 1.0022839803233804e-05, "loss": 1.3573, "mean_token_accuracy": 0.6684616009394327, "num_tokens": 1675402739.0, "step": 9989 }, { "entropy": 1.605027476946513, "epoch": 1.0974430803877948, "grad_norm": 0.7413462996482849, "learning_rate": 1.0021248155051817e-05, "loss": 1.1547, "mean_token_accuracy": 0.6949248611927032, "num_tokens": 1675515525.0, "step": 9990 }, { "entropy": 1.6858182946840923, "epoch": 1.097552937299168, "grad_norm": 0.6468738317489624, "learning_rate": 1.0019656537846883e-05, "loss": 1.2763, "mean_token_accuracy": 0.6762718011935552, "num_tokens": 1675645268.0, "step": 9991 }, { "entropy": 1.6803157031536102, "epoch": 1.0976627942105408, "grad_norm": 0.5877875685691833, "learning_rate": 1.0018064951669377e-05, "loss": 1.3821, "mean_token_accuracy": 0.6426830291748047, "num_tokens": 1675906428.0, "step": 9992 }, { "entropy": 1.7092790802319844, "epoch": 1.0977726511219137, "grad_norm": 0.7025560140609741, "learning_rate": 1.0016473396569676e-05, "loss": 1.2588, "mean_token_accuracy": 0.6746116280555725, "num_tokens": 1676046321.0, "step": 9993 }, { "entropy": 1.7590789496898651, "epoch": 1.0978825080332866, "grad_norm": 0.792087733745575, "learning_rate": 1.0014881872598147e-05, "loss": 1.2788, "mean_token_accuracy": 0.6598645945390066, "num_tokens": 1676194845.0, "step": 9994 }, { "entropy": 1.743706077337265, "epoch": 1.0979923649446597, "grad_norm": 0.7016844749450684, "learning_rate": 1.0013290379805164e-05, "loss": 1.4946, "mean_token_accuracy": 0.6423351069291433, "num_tokens": 1676362780.0, "step": 9995 }, { "entropy": 1.7073632975419362, "epoch": 1.0981022218560326, "grad_norm": 0.8244330286979675, "learning_rate": 1.00116989182411e-05, "loss": 1.4003, "mean_token_accuracy": 0.6536079297463099, "num_tokens": 1676508306.0, "step": 9996 }, { "entropy": 1.731406440337499, "epoch": 1.0982120787674055, "grad_norm": 0.7456120252609253, "learning_rate": 1.0010107487956311e-05, "loss": 1.3884, "mean_token_accuracy": 0.6658417532841364, "num_tokens": 1676649132.0, "step": 9997 }, { "entropy": 1.7547302941481273, "epoch": 1.0983219356787783, "grad_norm": 0.6900354623794556, "learning_rate": 1.0008516089001178e-05, "loss": 1.4422, "mean_token_accuracy": 0.6387932747602463, "num_tokens": 1676829373.0, "step": 9998 }, { "entropy": 1.7983198165893555, "epoch": 1.0984317925901514, "grad_norm": 0.6247063875198364, "learning_rate": 1.0006924721426069e-05, "loss": 1.5958, "mean_token_accuracy": 0.6131992489099503, "num_tokens": 1677076619.0, "step": 9999 }, { "entropy": 1.753205378850301, "epoch": 1.0985416495015243, "grad_norm": 0.6765521764755249, "learning_rate": 1.0005333385281338e-05, "loss": 1.5413, "mean_token_accuracy": 0.6390999456246694, "num_tokens": 1677305713.0, "step": 10000 }, { "entropy": 1.6796150207519531, "epoch": 1.0986515064128972, "grad_norm": 0.7337918281555176, "learning_rate": 1.000374208061736e-05, "loss": 1.2874, "mean_token_accuracy": 0.6799869785706202, "num_tokens": 1677445019.0, "step": 10001 }, { "entropy": 1.711920936902364, "epoch": 1.09876136332427, "grad_norm": 0.5600767731666565, "learning_rate": 1.0002150807484497e-05, "loss": 1.3903, "mean_token_accuracy": 0.653274749716123, "num_tokens": 1677642988.0, "step": 10002 }, { "entropy": 1.733855148156484, "epoch": 1.098871220235643, "grad_norm": 0.7659547328948975, "learning_rate": 1.0000559565933109e-05, "loss": 1.2707, "mean_token_accuracy": 0.6706570088863373, "num_tokens": 1677754531.0, "step": 10003 }, { "entropy": 1.710139532883962, "epoch": 1.098981077147016, "grad_norm": 0.75276118516922, "learning_rate": 9.998968356013561e-06, "loss": 1.3375, "mean_token_accuracy": 0.6552453736464182, "num_tokens": 1677905273.0, "step": 10004 }, { "entropy": 1.7893259624640148, "epoch": 1.099090934058389, "grad_norm": 0.6971526145935059, "learning_rate": 9.997377177776212e-06, "loss": 1.5402, "mean_token_accuracy": 0.6377905060847601, "num_tokens": 1678053177.0, "step": 10005 }, { "entropy": 1.6512891054153442, "epoch": 1.0992007909697619, "grad_norm": 0.6249794960021973, "learning_rate": 9.995786031271428e-06, "loss": 1.4462, "mean_token_accuracy": 0.6493960867325465, "num_tokens": 1678225106.0, "step": 10006 }, { "entropy": 1.6760885218779247, "epoch": 1.0993106478811348, "grad_norm": 0.772819995880127, "learning_rate": 9.99419491654956e-06, "loss": 1.4534, "mean_token_accuracy": 0.659936378399531, "num_tokens": 1678418651.0, "step": 10007 }, { "entropy": 1.7428101301193237, "epoch": 1.0994205047925079, "grad_norm": 0.6936253905296326, "learning_rate": 9.992603833660972e-06, "loss": 1.3324, "mean_token_accuracy": 0.6534697463115057, "num_tokens": 1678577045.0, "step": 10008 }, { "entropy": 1.7122528354326885, "epoch": 1.0995303617038807, "grad_norm": 0.6630085110664368, "learning_rate": 9.991012782656015e-06, "loss": 1.6186, "mean_token_accuracy": 0.6261989126602808, "num_tokens": 1678761263.0, "step": 10009 }, { "entropy": 1.708151896794637, "epoch": 1.0996402186152536, "grad_norm": 0.673546314239502, "learning_rate": 9.989421763585052e-06, "loss": 1.4439, "mean_token_accuracy": 0.6554606457551321, "num_tokens": 1678951453.0, "step": 10010 }, { "entropy": 1.6987049877643585, "epoch": 1.0997500755266265, "grad_norm": 0.7956987023353577, "learning_rate": 9.987830776498435e-06, "loss": 1.5238, "mean_token_accuracy": 0.6471638679504395, "num_tokens": 1679144538.0, "step": 10011 }, { "entropy": 1.668361673752467, "epoch": 1.0998599324379996, "grad_norm": 0.6938173174858093, "learning_rate": 9.986239821446517e-06, "loss": 1.3803, "mean_token_accuracy": 0.6642382641633352, "num_tokens": 1679298989.0, "step": 10012 }, { "entropy": 1.6760740081469219, "epoch": 1.0999697893493725, "grad_norm": 0.8040129542350769, "learning_rate": 9.984648898479652e-06, "loss": 1.5746, "mean_token_accuracy": 0.6352566902836164, "num_tokens": 1679527067.0, "step": 10013 }, { "entropy": 1.6475440760453541, "epoch": 1.1000796462607454, "grad_norm": 0.5550295114517212, "learning_rate": 9.983058007648192e-06, "loss": 1.4691, "mean_token_accuracy": 0.6531208554903666, "num_tokens": 1679766761.0, "step": 10014 }, { "entropy": 1.679537256558736, "epoch": 1.1001895031721183, "grad_norm": 0.7648224830627441, "learning_rate": 9.981467149002486e-06, "loss": 1.3501, "mean_token_accuracy": 0.6587068686882654, "num_tokens": 1679937622.0, "step": 10015 }, { "entropy": 1.7292282382647197, "epoch": 1.1002993600834912, "grad_norm": 0.6542650461196899, "learning_rate": 9.979876322592886e-06, "loss": 1.4841, "mean_token_accuracy": 0.6392849683761597, "num_tokens": 1680115451.0, "step": 10016 }, { "entropy": 1.6712367534637451, "epoch": 1.1004092169948643, "grad_norm": 0.6149039268493652, "learning_rate": 9.978285528469744e-06, "loss": 1.3236, "mean_token_accuracy": 0.6738909035921097, "num_tokens": 1680311480.0, "step": 10017 }, { "entropy": 1.7433668871720631, "epoch": 1.1005190739062372, "grad_norm": 0.7319428324699402, "learning_rate": 9.976694766683401e-06, "loss": 1.3905, "mean_token_accuracy": 0.6612731317679087, "num_tokens": 1680488251.0, "step": 10018 }, { "entropy": 1.7435412506262462, "epoch": 1.10062893081761, "grad_norm": 0.7963857650756836, "learning_rate": 9.97510403728421e-06, "loss": 1.5602, "mean_token_accuracy": 0.6402197231849035, "num_tokens": 1680631108.0, "step": 10019 }, { "entropy": 1.6780545115470886, "epoch": 1.100738787728983, "grad_norm": 0.685632050037384, "learning_rate": 9.973513340322515e-06, "loss": 1.4263, "mean_token_accuracy": 0.6653469949960709, "num_tokens": 1680763623.0, "step": 10020 }, { "entropy": 1.6828766167163849, "epoch": 1.100848644640356, "grad_norm": 0.6240759491920471, "learning_rate": 9.971922675848655e-06, "loss": 1.308, "mean_token_accuracy": 0.6743075450261434, "num_tokens": 1680908342.0, "step": 10021 }, { "entropy": 1.705411026875178, "epoch": 1.100958501551729, "grad_norm": 0.7477165460586548, "learning_rate": 9.970332043912982e-06, "loss": 1.3975, "mean_token_accuracy": 0.6521026045084, "num_tokens": 1681071612.0, "step": 10022 }, { "entropy": 1.7263496617476146, "epoch": 1.1010683584631018, "grad_norm": 0.7266680002212524, "learning_rate": 9.968741444565839e-06, "loss": 1.2693, "mean_token_accuracy": 0.6663507620493571, "num_tokens": 1681213720.0, "step": 10023 }, { "entropy": 1.699666867653529, "epoch": 1.1011782153744747, "grad_norm": 0.7280838489532471, "learning_rate": 9.96715087785756e-06, "loss": 1.3408, "mean_token_accuracy": 0.6538164764642715, "num_tokens": 1681381420.0, "step": 10024 }, { "entropy": 1.7014712989330292, "epoch": 1.1012880722858478, "grad_norm": 0.7642046809196472, "learning_rate": 9.965560343838494e-06, "loss": 1.3778, "mean_token_accuracy": 0.65878793100516, "num_tokens": 1681534063.0, "step": 10025 }, { "entropy": 1.7197924951712291, "epoch": 1.1013979291972207, "grad_norm": 0.8084545135498047, "learning_rate": 9.963969842558979e-06, "loss": 1.4746, "mean_token_accuracy": 0.6574273506800333, "num_tokens": 1681719997.0, "step": 10026 }, { "entropy": 1.6657897333304088, "epoch": 1.1015077861085936, "grad_norm": 0.6369723081588745, "learning_rate": 9.962379374069344e-06, "loss": 1.5711, "mean_token_accuracy": 0.6259370992581049, "num_tokens": 1681973686.0, "step": 10027 }, { "entropy": 1.6844724615414937, "epoch": 1.1016176430199665, "grad_norm": 0.6429160237312317, "learning_rate": 9.960788938419938e-06, "loss": 1.4057, "mean_token_accuracy": 0.6732848683993021, "num_tokens": 1682149098.0, "step": 10028 }, { "entropy": 1.723353534936905, "epoch": 1.1017274999313393, "grad_norm": 0.6367024779319763, "learning_rate": 9.959198535661097e-06, "loss": 1.5805, "mean_token_accuracy": 0.6324650992949804, "num_tokens": 1682363134.0, "step": 10029 }, { "entropy": 1.6801141500473022, "epoch": 1.1018373568427124, "grad_norm": 0.7213335633277893, "learning_rate": 9.957608165843148e-06, "loss": 1.3366, "mean_token_accuracy": 0.6565315226713816, "num_tokens": 1682553091.0, "step": 10030 }, { "entropy": 1.6784232159455617, "epoch": 1.1019472137540853, "grad_norm": 0.7102720737457275, "learning_rate": 9.956017829016434e-06, "loss": 1.3336, "mean_token_accuracy": 0.6537942936023077, "num_tokens": 1682722356.0, "step": 10031 }, { "entropy": 1.680375188589096, "epoch": 1.1020570706654582, "grad_norm": 0.6875813007354736, "learning_rate": 9.954427525231285e-06, "loss": 1.3502, "mean_token_accuracy": 0.6625233242909113, "num_tokens": 1682842970.0, "step": 10032 }, { "entropy": 1.6692634721597035, "epoch": 1.102166927576831, "grad_norm": 0.700435996055603, "learning_rate": 9.952837254538032e-06, "loss": 1.3591, "mean_token_accuracy": 0.667348379890124, "num_tokens": 1683041864.0, "step": 10033 }, { "entropy": 1.7761492331822712, "epoch": 1.1022767844882042, "grad_norm": 0.7642155885696411, "learning_rate": 9.95124701698701e-06, "loss": 1.462, "mean_token_accuracy": 0.6416449447472891, "num_tokens": 1683184924.0, "step": 10034 }, { "entropy": 1.725660542647044, "epoch": 1.102386641399577, "grad_norm": 0.6598086357116699, "learning_rate": 9.949656812628548e-06, "loss": 1.3182, "mean_token_accuracy": 0.6549786279598871, "num_tokens": 1683372899.0, "step": 10035 }, { "entropy": 1.6732657253742218, "epoch": 1.10249649831095, "grad_norm": 0.9407162070274353, "learning_rate": 9.948066641512972e-06, "loss": 1.3752, "mean_token_accuracy": 0.6609608381986618, "num_tokens": 1683562266.0, "step": 10036 }, { "entropy": 1.7500144441922505, "epoch": 1.1026063552223229, "grad_norm": 0.6773711442947388, "learning_rate": 9.946476503690613e-06, "loss": 1.4346, "mean_token_accuracy": 0.6503184189399084, "num_tokens": 1683732368.0, "step": 10037 }, { "entropy": 1.7161312401294708, "epoch": 1.102716212133696, "grad_norm": 0.6016209125518799, "learning_rate": 9.944886399211802e-06, "loss": 1.3286, "mean_token_accuracy": 0.6561945378780365, "num_tokens": 1683895566.0, "step": 10038 }, { "entropy": 1.7138621707757313, "epoch": 1.1028260690450689, "grad_norm": 0.7198561429977417, "learning_rate": 9.943296328126855e-06, "loss": 1.3559, "mean_token_accuracy": 0.6556108146905899, "num_tokens": 1684035609.0, "step": 10039 }, { "entropy": 1.7401485840479534, "epoch": 1.1029359259564417, "grad_norm": 0.7214450240135193, "learning_rate": 9.941706290486107e-06, "loss": 1.4328, "mean_token_accuracy": 0.6506765186786652, "num_tokens": 1684183063.0, "step": 10040 }, { "entropy": 1.713246077299118, "epoch": 1.1030457828678146, "grad_norm": 0.7159080505371094, "learning_rate": 9.940116286339876e-06, "loss": 1.2452, "mean_token_accuracy": 0.67661052942276, "num_tokens": 1684309228.0, "step": 10041 }, { "entropy": 1.7424029608567555, "epoch": 1.1031556397791875, "grad_norm": 0.598591148853302, "learning_rate": 9.938526315738488e-06, "loss": 1.3506, "mean_token_accuracy": 0.6587058206399282, "num_tokens": 1684467440.0, "step": 10042 }, { "entropy": 1.737044592698415, "epoch": 1.1032654966905606, "grad_norm": 0.7071549296379089, "learning_rate": 9.936936378732264e-06, "loss": 1.3585, "mean_token_accuracy": 0.6568170885245005, "num_tokens": 1684616936.0, "step": 10043 }, { "entropy": 1.6824077864487965, "epoch": 1.1033753536019335, "grad_norm": 0.7109892964363098, "learning_rate": 9.935346475371526e-06, "loss": 1.3406, "mean_token_accuracy": 0.6530480086803436, "num_tokens": 1684799276.0, "step": 10044 }, { "entropy": 1.658627490202586, "epoch": 1.1034852105133064, "grad_norm": 0.6493405699729919, "learning_rate": 9.933756605706589e-06, "loss": 1.4513, "mean_token_accuracy": 0.6560260156790415, "num_tokens": 1684941620.0, "step": 10045 }, { "entropy": 1.7143846948941548, "epoch": 1.1035950674246793, "grad_norm": 0.7175660133361816, "learning_rate": 9.93216676978778e-06, "loss": 1.3808, "mean_token_accuracy": 0.6502297967672348, "num_tokens": 1685141645.0, "step": 10046 }, { "entropy": 1.7108404437700908, "epoch": 1.1037049243360524, "grad_norm": 0.665665864944458, "learning_rate": 9.930576967665405e-06, "loss": 1.3979, "mean_token_accuracy": 0.6681078970432281, "num_tokens": 1685310487.0, "step": 10047 }, { "entropy": 1.6906990508238475, "epoch": 1.1038147812474253, "grad_norm": 0.685772716999054, "learning_rate": 9.928987199389791e-06, "loss": 1.2396, "mean_token_accuracy": 0.6812936266263326, "num_tokens": 1685454139.0, "step": 10048 }, { "entropy": 1.6968371470769246, "epoch": 1.1039246381587982, "grad_norm": 0.6249828934669495, "learning_rate": 9.92739746501125e-06, "loss": 1.4544, "mean_token_accuracy": 0.6454744736353556, "num_tokens": 1685605872.0, "step": 10049 }, { "entropy": 1.6949062744776409, "epoch": 1.104034495070171, "grad_norm": 0.656091034412384, "learning_rate": 9.925807764580094e-06, "loss": 1.3709, "mean_token_accuracy": 0.6586327403783798, "num_tokens": 1685794273.0, "step": 10050 }, { "entropy": 1.7288226087888081, "epoch": 1.1041443519815441, "grad_norm": 0.7473734617233276, "learning_rate": 9.924218098146636e-06, "loss": 1.2089, "mean_token_accuracy": 0.6816525955994924, "num_tokens": 1685887419.0, "step": 10051 }, { "entropy": 1.6433724264303844, "epoch": 1.104254208892917, "grad_norm": 0.6661230325698853, "learning_rate": 9.922628465761197e-06, "loss": 1.2899, "mean_token_accuracy": 0.6614332050085068, "num_tokens": 1686038032.0, "step": 10052 }, { "entropy": 1.72390815615654, "epoch": 1.10436406580429, "grad_norm": 0.750630259513855, "learning_rate": 9.921038867474076e-06, "loss": 1.4065, "mean_token_accuracy": 0.6597993671894073, "num_tokens": 1686238542.0, "step": 10053 }, { "entropy": 1.7463493744532268, "epoch": 1.1044739227156628, "grad_norm": 0.7480951547622681, "learning_rate": 9.919449303335591e-06, "loss": 1.3189, "mean_token_accuracy": 0.664255807797114, "num_tokens": 1686360222.0, "step": 10054 }, { "entropy": 1.7227412561575572, "epoch": 1.1045837796270357, "grad_norm": 0.7310038208961487, "learning_rate": 9.917859773396048e-06, "loss": 1.4055, "mean_token_accuracy": 0.6522340675195059, "num_tokens": 1686517954.0, "step": 10055 }, { "entropy": 1.727901021639506, "epoch": 1.1046936365384088, "grad_norm": 0.7204070687294006, "learning_rate": 9.916270277705755e-06, "loss": 1.3779, "mean_token_accuracy": 0.6602382163206736, "num_tokens": 1686720706.0, "step": 10056 }, { "entropy": 1.6805065770943959, "epoch": 1.1048034934497817, "grad_norm": 0.747397780418396, "learning_rate": 9.914680816315018e-06, "loss": 1.4117, "mean_token_accuracy": 0.6577341059843699, "num_tokens": 1686851842.0, "step": 10057 }, { "entropy": 1.683358073234558, "epoch": 1.1049133503611546, "grad_norm": 0.8031777739524841, "learning_rate": 9.913091389274149e-06, "loss": 1.398, "mean_token_accuracy": 0.6626959492762884, "num_tokens": 1686999977.0, "step": 10058 }, { "entropy": 1.7599800129731495, "epoch": 1.1050232072725275, "grad_norm": 0.6084749698638916, "learning_rate": 9.911501996633446e-06, "loss": 1.3626, "mean_token_accuracy": 0.6569390346606573, "num_tokens": 1687162443.0, "step": 10059 }, { "entropy": 1.666684329509735, "epoch": 1.1051330641839006, "grad_norm": 0.7338747978210449, "learning_rate": 9.909912638443211e-06, "loss": 1.3339, "mean_token_accuracy": 0.6666023383537928, "num_tokens": 1687326579.0, "step": 10060 }, { "entropy": 1.7396563490231831, "epoch": 1.1052429210952734, "grad_norm": 0.5960805416107178, "learning_rate": 9.908323314753754e-06, "loss": 1.4937, "mean_token_accuracy": 0.6390989075104395, "num_tokens": 1687537916.0, "step": 10061 }, { "entropy": 1.7278473377227783, "epoch": 1.1053527780066463, "grad_norm": 0.640707790851593, "learning_rate": 9.90673402561537e-06, "loss": 1.3766, "mean_token_accuracy": 0.6638915787140528, "num_tokens": 1687698161.0, "step": 10062 }, { "entropy": 1.723745624224345, "epoch": 1.1054626349180192, "grad_norm": 0.7135167717933655, "learning_rate": 9.90514477107836e-06, "loss": 1.4669, "mean_token_accuracy": 0.6489771803220113, "num_tokens": 1687859122.0, "step": 10063 }, { "entropy": 1.650217165549596, "epoch": 1.1055724918293923, "grad_norm": 0.6993624567985535, "learning_rate": 9.90355555119303e-06, "loss": 1.2399, "mean_token_accuracy": 0.684073825677236, "num_tokens": 1687974398.0, "step": 10064 }, { "entropy": 1.7590695818265278, "epoch": 1.1056823487407652, "grad_norm": 0.6988198757171631, "learning_rate": 9.901966366009665e-06, "loss": 1.5556, "mean_token_accuracy": 0.6316021184126536, "num_tokens": 1688190329.0, "step": 10065 }, { "entropy": 1.7328562041123707, "epoch": 1.105792205652138, "grad_norm": 0.8319157958030701, "learning_rate": 9.900377215578575e-06, "loss": 1.3012, "mean_token_accuracy": 0.6607611576716105, "num_tokens": 1688300281.0, "step": 10066 }, { "entropy": 1.6762152512868245, "epoch": 1.105902062563511, "grad_norm": 0.7458414435386658, "learning_rate": 9.89878809995005e-06, "loss": 1.2462, "mean_token_accuracy": 0.6730685979127884, "num_tokens": 1688449740.0, "step": 10067 }, { "entropy": 1.6644122898578644, "epoch": 1.1060119194748839, "grad_norm": 0.5826370716094971, "learning_rate": 9.897199019174386e-06, "loss": 1.427, "mean_token_accuracy": 0.6464564104874929, "num_tokens": 1688657245.0, "step": 10068 }, { "entropy": 1.7347849011421204, "epoch": 1.106121776386257, "grad_norm": 1.0048359632492065, "learning_rate": 9.895609973301873e-06, "loss": 1.4001, "mean_token_accuracy": 0.6557883818944296, "num_tokens": 1688785006.0, "step": 10069 }, { "entropy": 1.7074712614218395, "epoch": 1.1062316332976299, "grad_norm": 0.7061405777931213, "learning_rate": 9.89402096238281e-06, "loss": 1.2721, "mean_token_accuracy": 0.6725035260121027, "num_tokens": 1688900935.0, "step": 10070 }, { "entropy": 1.7450923323631287, "epoch": 1.1063414902090027, "grad_norm": 0.6660796403884888, "learning_rate": 9.892431986467483e-06, "loss": 1.3757, "mean_token_accuracy": 0.6571687310934067, "num_tokens": 1689061066.0, "step": 10071 }, { "entropy": 1.6484363277753193, "epoch": 1.1064513471203756, "grad_norm": 0.5785127282142639, "learning_rate": 9.890843045606185e-06, "loss": 1.3894, "mean_token_accuracy": 0.6500343084335327, "num_tokens": 1689306241.0, "step": 10072 }, { "entropy": 1.7250319123268127, "epoch": 1.1065612040317487, "grad_norm": 0.6741638779640198, "learning_rate": 9.889254139849207e-06, "loss": 1.4156, "mean_token_accuracy": 0.6609347065289816, "num_tokens": 1689490952.0, "step": 10073 }, { "entropy": 1.703676551580429, "epoch": 1.1066710609431216, "grad_norm": 0.6628722548484802, "learning_rate": 9.887665269246833e-06, "loss": 1.3862, "mean_token_accuracy": 0.6559304048617681, "num_tokens": 1689668182.0, "step": 10074 }, { "entropy": 1.7785408198833466, "epoch": 1.1067809178544945, "grad_norm": 0.6795310974121094, "learning_rate": 9.886076433849352e-06, "loss": 1.3857, "mean_token_accuracy": 0.6603186577558517, "num_tokens": 1689843179.0, "step": 10075 }, { "entropy": 1.7740286191304524, "epoch": 1.1068907747658674, "grad_norm": 0.6643602848052979, "learning_rate": 9.884487633707052e-06, "loss": 1.455, "mean_token_accuracy": 0.6375104387601217, "num_tokens": 1690010012.0, "step": 10076 }, { "entropy": 1.7466478248437245, "epoch": 1.1070006316772405, "grad_norm": 0.7298927903175354, "learning_rate": 9.882898868870212e-06, "loss": 1.4145, "mean_token_accuracy": 0.6563018610080084, "num_tokens": 1690159229.0, "step": 10077 }, { "entropy": 1.705216646194458, "epoch": 1.1071104885886134, "grad_norm": 0.793967068195343, "learning_rate": 9.88131013938912e-06, "loss": 1.3992, "mean_token_accuracy": 0.6699225157499313, "num_tokens": 1690302272.0, "step": 10078 }, { "entropy": 1.70258762439092, "epoch": 1.1072203454999863, "grad_norm": 0.7318503260612488, "learning_rate": 9.87972144531406e-06, "loss": 1.3234, "mean_token_accuracy": 0.6641089816888174, "num_tokens": 1690461700.0, "step": 10079 }, { "entropy": 1.6951699952284496, "epoch": 1.1073302024113592, "grad_norm": 0.8104332089424133, "learning_rate": 9.87813278669531e-06, "loss": 1.3786, "mean_token_accuracy": 0.677444338798523, "num_tokens": 1690598643.0, "step": 10080 }, { "entropy": 1.688471108675003, "epoch": 1.107440059322732, "grad_norm": 0.5393524169921875, "learning_rate": 9.876544163583153e-06, "loss": 1.4725, "mean_token_accuracy": 0.6343822181224823, "num_tokens": 1690799088.0, "step": 10081 }, { "entropy": 1.683466762304306, "epoch": 1.1075499162341051, "grad_norm": 0.6404281854629517, "learning_rate": 9.87495557602787e-06, "loss": 1.386, "mean_token_accuracy": 0.6577825993299484, "num_tokens": 1690951222.0, "step": 10082 }, { "entropy": 1.6536591549714406, "epoch": 1.107659773145478, "grad_norm": 0.6884099245071411, "learning_rate": 9.873367024079728e-06, "loss": 1.4333, "mean_token_accuracy": 0.6448961248000463, "num_tokens": 1691125533.0, "step": 10083 }, { "entropy": 1.6448639531930287, "epoch": 1.107769630056851, "grad_norm": 0.583249032497406, "learning_rate": 9.871778507789016e-06, "loss": 1.4251, "mean_token_accuracy": 0.6626767565806707, "num_tokens": 1691309375.0, "step": 10084 }, { "entropy": 1.7260893980662029, "epoch": 1.1078794869682238, "grad_norm": 0.8317601084709167, "learning_rate": 9.870190027206009e-06, "loss": 1.4023, "mean_token_accuracy": 0.6650147537390391, "num_tokens": 1691463284.0, "step": 10085 }, { "entropy": 1.7166006167729695, "epoch": 1.107989343879597, "grad_norm": 0.6556655168533325, "learning_rate": 9.868601582380974e-06, "loss": 1.3163, "mean_token_accuracy": 0.6799081613620123, "num_tokens": 1691575414.0, "step": 10086 }, { "entropy": 1.7317289213339488, "epoch": 1.1080992007909698, "grad_norm": 0.729681134223938, "learning_rate": 9.867013173364191e-06, "loss": 1.2541, "mean_token_accuracy": 0.6766142894824346, "num_tokens": 1691669164.0, "step": 10087 }, { "entropy": 1.6821042597293854, "epoch": 1.1082090577023427, "grad_norm": 0.6675170063972473, "learning_rate": 9.865424800205931e-06, "loss": 1.3605, "mean_token_accuracy": 0.6503806213537852, "num_tokens": 1691815021.0, "step": 10088 }, { "entropy": 1.751885672410329, "epoch": 1.1083189146137156, "grad_norm": 0.5960071086883545, "learning_rate": 9.863836462956464e-06, "loss": 1.3456, "mean_token_accuracy": 0.6711924225091934, "num_tokens": 1691976476.0, "step": 10089 }, { "entropy": 1.684192289908727, "epoch": 1.1084287715250887, "grad_norm": 0.7027954459190369, "learning_rate": 9.862248161666062e-06, "loss": 1.4732, "mean_token_accuracy": 0.6473731994628906, "num_tokens": 1692149190.0, "step": 10090 }, { "entropy": 1.648518443107605, "epoch": 1.1085386284364616, "grad_norm": 0.8666717410087585, "learning_rate": 9.860659896384991e-06, "loss": 1.4343, "mean_token_accuracy": 0.661418413122495, "num_tokens": 1692355012.0, "step": 10091 }, { "entropy": 1.6971223453680675, "epoch": 1.1086484853478344, "grad_norm": 0.7207356095314026, "learning_rate": 9.859071667163523e-06, "loss": 1.2235, "mean_token_accuracy": 0.675402487317721, "num_tokens": 1692474814.0, "step": 10092 }, { "entropy": 1.699401597181956, "epoch": 1.1087583422592073, "grad_norm": 0.747488260269165, "learning_rate": 9.857483474051921e-06, "loss": 1.5966, "mean_token_accuracy": 0.6380815704663595, "num_tokens": 1692666384.0, "step": 10093 }, { "entropy": 1.6512778798739116, "epoch": 1.1088681991705802, "grad_norm": 0.5988063812255859, "learning_rate": 9.855895317100456e-06, "loss": 1.5544, "mean_token_accuracy": 0.6298639525969824, "num_tokens": 1692884550.0, "step": 10094 }, { "entropy": 1.7332661549250286, "epoch": 1.1089780560819533, "grad_norm": 0.6148350238800049, "learning_rate": 9.854307196359383e-06, "loss": 1.4389, "mean_token_accuracy": 0.6564859499533972, "num_tokens": 1693097634.0, "step": 10095 }, { "entropy": 1.637395977973938, "epoch": 1.1090879129933262, "grad_norm": 0.6527997255325317, "learning_rate": 9.852719111878973e-06, "loss": 1.495, "mean_token_accuracy": 0.6438925464948019, "num_tokens": 1693345822.0, "step": 10096 }, { "entropy": 1.7149374882380168, "epoch": 1.109197769904699, "grad_norm": 0.7964677214622498, "learning_rate": 9.851131063709488e-06, "loss": 1.3364, "mean_token_accuracy": 0.6784713963667551, "num_tokens": 1693495662.0, "step": 10097 }, { "entropy": 1.681052456299464, "epoch": 1.109307626816072, "grad_norm": 0.6391304135322571, "learning_rate": 9.849543051901187e-06, "loss": 1.3896, "mean_token_accuracy": 0.6593037992715836, "num_tokens": 1693661136.0, "step": 10098 }, { "entropy": 1.70050710439682, "epoch": 1.109417483727445, "grad_norm": 0.5992792844772339, "learning_rate": 9.847955076504327e-06, "loss": 1.3061, "mean_token_accuracy": 0.6639832506577173, "num_tokens": 1693809033.0, "step": 10099 }, { "entropy": 1.6677427391211193, "epoch": 1.109527340638818, "grad_norm": 0.6866241693496704, "learning_rate": 9.846367137569175e-06, "loss": 1.2431, "mean_token_accuracy": 0.6873250852028528, "num_tokens": 1693937839.0, "step": 10100 }, { "entropy": 1.7384430766105652, "epoch": 1.1096371975501909, "grad_norm": 0.72084641456604, "learning_rate": 9.844779235145975e-06, "loss": 1.583, "mean_token_accuracy": 0.6379265685876211, "num_tokens": 1694143956.0, "step": 10101 }, { "entropy": 1.7250956892967224, "epoch": 1.1097470544615637, "grad_norm": 0.5843518376350403, "learning_rate": 9.843191369285e-06, "loss": 1.3725, "mean_token_accuracy": 0.6559849927822748, "num_tokens": 1694333230.0, "step": 10102 }, { "entropy": 1.6686339875062306, "epoch": 1.1098569113729368, "grad_norm": 0.6618912220001221, "learning_rate": 9.841603540036493e-06, "loss": 1.2857, "mean_token_accuracy": 0.6752923329671224, "num_tokens": 1694528211.0, "step": 10103 }, { "entropy": 1.7217063804467518, "epoch": 1.1099667682843097, "grad_norm": 0.7229596972465515, "learning_rate": 9.84001574745071e-06, "loss": 1.3442, "mean_token_accuracy": 0.6586703856786092, "num_tokens": 1694668137.0, "step": 10104 }, { "entropy": 1.730820248524348, "epoch": 1.1100766251956826, "grad_norm": 0.7770981788635254, "learning_rate": 9.838427991577913e-06, "loss": 1.3819, "mean_token_accuracy": 0.6443741470575333, "num_tokens": 1694834662.0, "step": 10105 }, { "entropy": 1.7381211817264557, "epoch": 1.1101864821070555, "grad_norm": 0.6995685696601868, "learning_rate": 9.83684027246834e-06, "loss": 1.4073, "mean_token_accuracy": 0.6486761023600897, "num_tokens": 1694996990.0, "step": 10106 }, { "entropy": 1.7615481615066528, "epoch": 1.1102963390184284, "grad_norm": 0.7999100089073181, "learning_rate": 9.835252590172248e-06, "loss": 1.418, "mean_token_accuracy": 0.6523937930663427, "num_tokens": 1695161564.0, "step": 10107 }, { "entropy": 1.698384553194046, "epoch": 1.1104061959298015, "grad_norm": 0.766127347946167, "learning_rate": 9.833664944739894e-06, "loss": 1.3847, "mean_token_accuracy": 0.6614054441452026, "num_tokens": 1695302392.0, "step": 10108 }, { "entropy": 1.7386977672576904, "epoch": 1.1105160528411744, "grad_norm": 0.7259992361068726, "learning_rate": 9.832077336221511e-06, "loss": 1.4899, "mean_token_accuracy": 0.6413588871558508, "num_tokens": 1695452887.0, "step": 10109 }, { "entropy": 1.721059521039327, "epoch": 1.1106259097525473, "grad_norm": 0.6974899172782898, "learning_rate": 9.830489764667357e-06, "loss": 1.4765, "mean_token_accuracy": 0.650780513882637, "num_tokens": 1695615069.0, "step": 10110 }, { "entropy": 1.7291888693968456, "epoch": 1.1107357666639202, "grad_norm": 0.7660084366798401, "learning_rate": 9.828902230127675e-06, "loss": 1.4252, "mean_token_accuracy": 0.6462711741526922, "num_tokens": 1695760221.0, "step": 10111 }, { "entropy": 1.7433270911375682, "epoch": 1.1108456235752933, "grad_norm": 0.7739673852920532, "learning_rate": 9.827314732652708e-06, "loss": 1.4445, "mean_token_accuracy": 0.650582085053126, "num_tokens": 1695917595.0, "step": 10112 }, { "entropy": 1.6808188458283742, "epoch": 1.1109554804866661, "grad_norm": 0.7548496723175049, "learning_rate": 9.825727272292702e-06, "loss": 1.3041, "mean_token_accuracy": 0.6782331267992655, "num_tokens": 1696031484.0, "step": 10113 }, { "entropy": 1.7230580151081085, "epoch": 1.111065337398039, "grad_norm": 0.7264763116836548, "learning_rate": 9.824139849097901e-06, "loss": 1.3437, "mean_token_accuracy": 0.6618181715408961, "num_tokens": 1696143962.0, "step": 10114 }, { "entropy": 1.7376162310441334, "epoch": 1.111175194309412, "grad_norm": 0.6344247460365295, "learning_rate": 9.822552463118542e-06, "loss": 1.4366, "mean_token_accuracy": 0.6638104766607285, "num_tokens": 1696315979.0, "step": 10115 }, { "entropy": 1.7340149482091267, "epoch": 1.111285051220785, "grad_norm": 1.0014694929122925, "learning_rate": 9.820965114404866e-06, "loss": 1.327, "mean_token_accuracy": 0.6686884462833405, "num_tokens": 1696452363.0, "step": 10116 }, { "entropy": 1.6129749516646068, "epoch": 1.111394908132158, "grad_norm": 0.6101342439651489, "learning_rate": 9.819377803007117e-06, "loss": 1.3782, "mean_token_accuracy": 0.669797440369924, "num_tokens": 1696599085.0, "step": 10117 }, { "entropy": 1.7004303236802418, "epoch": 1.1115047650435308, "grad_norm": 0.7235705852508545, "learning_rate": 9.817790528975527e-06, "loss": 1.4595, "mean_token_accuracy": 0.6571828325589498, "num_tokens": 1696746504.0, "step": 10118 }, { "entropy": 1.6813008785247803, "epoch": 1.1116146219549037, "grad_norm": 0.6818208694458008, "learning_rate": 9.81620329236033e-06, "loss": 1.3373, "mean_token_accuracy": 0.6721784075101217, "num_tokens": 1696892330.0, "step": 10119 }, { "entropy": 1.716547667980194, "epoch": 1.1117244788662766, "grad_norm": 0.684902548789978, "learning_rate": 9.81461609321177e-06, "loss": 1.3562, "mean_token_accuracy": 0.6633862257003784, "num_tokens": 1697027411.0, "step": 10120 }, { "entropy": 1.7614688177903493, "epoch": 1.1118343357776497, "grad_norm": 0.6902977824211121, "learning_rate": 9.813028931580073e-06, "loss": 1.4259, "mean_token_accuracy": 0.6516207158565521, "num_tokens": 1697221211.0, "step": 10121 }, { "entropy": 1.7099326650301616, "epoch": 1.1119441926890226, "grad_norm": 0.7409700751304626, "learning_rate": 9.811441807515477e-06, "loss": 1.4281, "mean_token_accuracy": 0.6582437505324682, "num_tokens": 1697396444.0, "step": 10122 }, { "entropy": 1.6779835720856984, "epoch": 1.1120540496003954, "grad_norm": 0.621612012386322, "learning_rate": 9.809854721068213e-06, "loss": 1.4229, "mean_token_accuracy": 0.6485381374756495, "num_tokens": 1697565929.0, "step": 10123 }, { "entropy": 1.6557986438274384, "epoch": 1.1121639065117683, "grad_norm": 0.6872241497039795, "learning_rate": 9.808267672288509e-06, "loss": 1.3494, "mean_token_accuracy": 0.666273444890976, "num_tokens": 1697738024.0, "step": 10124 }, { "entropy": 1.7175208032131195, "epoch": 1.1122737634231414, "grad_norm": 0.5981009602546692, "learning_rate": 9.806680661226595e-06, "loss": 1.459, "mean_token_accuracy": 0.6348882069190344, "num_tokens": 1697938594.0, "step": 10125 }, { "entropy": 1.720637023448944, "epoch": 1.1123836203345143, "grad_norm": 0.7893303632736206, "learning_rate": 9.805093687932707e-06, "loss": 1.3752, "mean_token_accuracy": 0.6728624453147253, "num_tokens": 1698075068.0, "step": 10126 }, { "entropy": 1.7189677953720093, "epoch": 1.1124934772458872, "grad_norm": 0.7821738123893738, "learning_rate": 9.80350675245706e-06, "loss": 1.4315, "mean_token_accuracy": 0.653843825062116, "num_tokens": 1698231670.0, "step": 10127 }, { "entropy": 1.756723403930664, "epoch": 1.11260333415726, "grad_norm": 0.7259140610694885, "learning_rate": 9.801919854849884e-06, "loss": 1.4513, "mean_token_accuracy": 0.6415894875923792, "num_tokens": 1698432467.0, "step": 10128 }, { "entropy": 1.751261701186498, "epoch": 1.1127131910686332, "grad_norm": 0.6469233632087708, "learning_rate": 9.800332995161408e-06, "loss": 1.4379, "mean_token_accuracy": 0.6382074107726415, "num_tokens": 1698590937.0, "step": 10129 }, { "entropy": 1.735701670249303, "epoch": 1.112823047980006, "grad_norm": 0.667665421962738, "learning_rate": 9.798746173441852e-06, "loss": 1.3649, "mean_token_accuracy": 0.6617190291484197, "num_tokens": 1698784904.0, "step": 10130 }, { "entropy": 1.7073118388652802, "epoch": 1.112932904891379, "grad_norm": 0.8839547634124756, "learning_rate": 9.797159389741436e-06, "loss": 1.3859, "mean_token_accuracy": 0.6646972000598907, "num_tokens": 1698955976.0, "step": 10131 }, { "entropy": 1.6580509543418884, "epoch": 1.1130427618027519, "grad_norm": 0.6089791655540466, "learning_rate": 9.795572644110387e-06, "loss": 1.4237, "mean_token_accuracy": 0.6523320525884628, "num_tokens": 1699205226.0, "step": 10132 }, { "entropy": 1.7225276331106822, "epoch": 1.1131526187141247, "grad_norm": 0.6253435611724854, "learning_rate": 9.793985936598916e-06, "loss": 1.34, "mean_token_accuracy": 0.6591206341981888, "num_tokens": 1699364197.0, "step": 10133 }, { "entropy": 1.7497197190920513, "epoch": 1.1132624756254978, "grad_norm": 0.7956197261810303, "learning_rate": 9.792399267257249e-06, "loss": 1.4329, "mean_token_accuracy": 0.6548638641834259, "num_tokens": 1699500152.0, "step": 10134 }, { "entropy": 1.7095544238885243, "epoch": 1.1133723325368707, "grad_norm": 0.6860001683235168, "learning_rate": 9.790812636135603e-06, "loss": 1.5067, "mean_token_accuracy": 0.6451443135738373, "num_tokens": 1699661286.0, "step": 10135 }, { "entropy": 1.6957333187262218, "epoch": 1.1134821894482436, "grad_norm": 0.7276600003242493, "learning_rate": 9.78922604328419e-06, "loss": 1.5212, "mean_token_accuracy": 0.6416138807932535, "num_tokens": 1699844103.0, "step": 10136 }, { "entropy": 1.7566113372643788, "epoch": 1.1135920463596165, "grad_norm": 0.7104360461235046, "learning_rate": 9.787639488753224e-06, "loss": 1.4888, "mean_token_accuracy": 0.6481777926286062, "num_tokens": 1699984609.0, "step": 10137 }, { "entropy": 1.6825304627418518, "epoch": 1.1137019032709896, "grad_norm": 0.8023889064788818, "learning_rate": 9.78605297259293e-06, "loss": 1.2334, "mean_token_accuracy": 0.6820906003316244, "num_tokens": 1700098327.0, "step": 10138 }, { "entropy": 1.673277239004771, "epoch": 1.1138117601823625, "grad_norm": 0.7964149117469788, "learning_rate": 9.784466494853507e-06, "loss": 1.3369, "mean_token_accuracy": 0.6647952695687612, "num_tokens": 1700260846.0, "step": 10139 }, { "entropy": 1.6351796289285023, "epoch": 1.1139216170937354, "grad_norm": 0.6151949763298035, "learning_rate": 9.782880055585171e-06, "loss": 1.5037, "mean_token_accuracy": 0.640854095419248, "num_tokens": 1700482399.0, "step": 10140 }, { "entropy": 1.7372311453024547, "epoch": 1.1140314740051083, "grad_norm": 0.8570227026939392, "learning_rate": 9.781293654838137e-06, "loss": 1.4559, "mean_token_accuracy": 0.6682560443878174, "num_tokens": 1700618804.0, "step": 10141 }, { "entropy": 1.6593137284119923, "epoch": 1.1141413309164814, "grad_norm": 0.5903623700141907, "learning_rate": 9.779707292662605e-06, "loss": 1.3958, "mean_token_accuracy": 0.6583308031161627, "num_tokens": 1700820971.0, "step": 10142 }, { "entropy": 1.6421323815981548, "epoch": 1.1142511878278543, "grad_norm": 0.6660314798355103, "learning_rate": 9.778120969108791e-06, "loss": 1.2946, "mean_token_accuracy": 0.6711133569478989, "num_tokens": 1700977095.0, "step": 10143 }, { "entropy": 1.629296710093816, "epoch": 1.1143610447392271, "grad_norm": 0.6648311018943787, "learning_rate": 9.776534684226898e-06, "loss": 1.4811, "mean_token_accuracy": 0.6564251184463501, "num_tokens": 1701153164.0, "step": 10144 }, { "entropy": 1.746991515159607, "epoch": 1.1144709016506, "grad_norm": 0.7114366888999939, "learning_rate": 9.774948438067127e-06, "loss": 1.347, "mean_token_accuracy": 0.6591640909512838, "num_tokens": 1701290473.0, "step": 10145 }, { "entropy": 1.7070113221804302, "epoch": 1.114580758561973, "grad_norm": 0.7355979681015015, "learning_rate": 9.773362230679685e-06, "loss": 1.5335, "mean_token_accuracy": 0.6442695558071136, "num_tokens": 1701450663.0, "step": 10146 }, { "entropy": 1.7952332894007366, "epoch": 1.114690615473346, "grad_norm": 0.7171587347984314, "learning_rate": 9.771776062114782e-06, "loss": 1.5082, "mean_token_accuracy": 0.6332686841487885, "num_tokens": 1701691000.0, "step": 10147 }, { "entropy": 1.7332034011681874, "epoch": 1.114800472384719, "grad_norm": 0.6650365591049194, "learning_rate": 9.77018993242261e-06, "loss": 1.3343, "mean_token_accuracy": 0.6661591629187266, "num_tokens": 1701854437.0, "step": 10148 }, { "entropy": 1.7049450874328613, "epoch": 1.1149103292960918, "grad_norm": 0.6518258452415466, "learning_rate": 9.76860384165337e-06, "loss": 1.2154, "mean_token_accuracy": 0.6860535194476446, "num_tokens": 1702016466.0, "step": 10149 }, { "entropy": 1.6941988567511241, "epoch": 1.1150201862074647, "grad_norm": 0.6656258702278137, "learning_rate": 9.76701778985727e-06, "loss": 1.4502, "mean_token_accuracy": 0.6606006671984991, "num_tokens": 1702179287.0, "step": 10150 }, { "entropy": 1.6935183207194011, "epoch": 1.1151300431188378, "grad_norm": 0.7154098749160767, "learning_rate": 9.765431777084495e-06, "loss": 1.2172, "mean_token_accuracy": 0.6906551122665405, "num_tokens": 1702313501.0, "step": 10151 }, { "entropy": 1.7146691580613453, "epoch": 1.1152399000302107, "grad_norm": 0.7676160335540771, "learning_rate": 9.763845803385247e-06, "loss": 1.5107, "mean_token_accuracy": 0.664627286295096, "num_tokens": 1702453001.0, "step": 10152 }, { "entropy": 1.6825979848702748, "epoch": 1.1153497569415836, "grad_norm": 0.5976483225822449, "learning_rate": 9.76225986880973e-06, "loss": 1.537, "mean_token_accuracy": 0.636245513955752, "num_tokens": 1702684575.0, "step": 10153 }, { "entropy": 1.6788121958573659, "epoch": 1.1154596138529564, "grad_norm": 0.6797428131103516, "learning_rate": 9.760673973408124e-06, "loss": 1.2018, "mean_token_accuracy": 0.6839652607838312, "num_tokens": 1702832136.0, "step": 10154 }, { "entropy": 1.6775270501772563, "epoch": 1.1155694707643296, "grad_norm": 0.7173194885253906, "learning_rate": 9.75908811723063e-06, "loss": 1.3752, "mean_token_accuracy": 0.6599200914303461, "num_tokens": 1702990937.0, "step": 10155 }, { "entropy": 1.6935907403628032, "epoch": 1.1156793276757024, "grad_norm": 0.7358232140541077, "learning_rate": 9.757502300327439e-06, "loss": 1.2976, "mean_token_accuracy": 0.6701224446296692, "num_tokens": 1703131461.0, "step": 10156 }, { "entropy": 1.6839358309904735, "epoch": 1.1157891845870753, "grad_norm": 0.7180026173591614, "learning_rate": 9.755916522748738e-06, "loss": 1.3543, "mean_token_accuracy": 0.6787949800491333, "num_tokens": 1703265354.0, "step": 10157 }, { "entropy": 1.7061149676640828, "epoch": 1.1158990414984482, "grad_norm": 0.7453353404998779, "learning_rate": 9.754330784544719e-06, "loss": 1.3589, "mean_token_accuracy": 0.6774944067001343, "num_tokens": 1703403406.0, "step": 10158 }, { "entropy": 1.6841832200686138, "epoch": 1.116008898409821, "grad_norm": 0.7039199471473694, "learning_rate": 9.752745085765571e-06, "loss": 1.4147, "mean_token_accuracy": 0.6500913898150126, "num_tokens": 1703568329.0, "step": 10159 }, { "entropy": 1.7383232315381367, "epoch": 1.1161187553211942, "grad_norm": 0.6420716047286987, "learning_rate": 9.751159426461479e-06, "loss": 1.5264, "mean_token_accuracy": 0.6397146930297216, "num_tokens": 1703758280.0, "step": 10160 }, { "entropy": 1.7103537619113922, "epoch": 1.116228612232567, "grad_norm": 0.7599209547042847, "learning_rate": 9.749573806682629e-06, "loss": 1.5267, "mean_token_accuracy": 0.6386250903209051, "num_tokens": 1703928952.0, "step": 10161 }, { "entropy": 1.6944385866324108, "epoch": 1.11633846914394, "grad_norm": 0.6873871684074402, "learning_rate": 9.747988226479203e-06, "loss": 1.3201, "mean_token_accuracy": 0.6644426584243774, "num_tokens": 1704065206.0, "step": 10162 }, { "entropy": 1.65885129570961, "epoch": 1.1164483260553129, "grad_norm": 0.62119460105896, "learning_rate": 9.746402685901384e-06, "loss": 1.409, "mean_token_accuracy": 0.6450504660606384, "num_tokens": 1704239787.0, "step": 10163 }, { "entropy": 1.6746040880680084, "epoch": 1.116558182966686, "grad_norm": 0.6383149027824402, "learning_rate": 9.74481718499936e-06, "loss": 1.401, "mean_token_accuracy": 0.6668302963177363, "num_tokens": 1704427935.0, "step": 10164 }, { "entropy": 1.7026494840780895, "epoch": 1.1166680398780588, "grad_norm": 0.6862279772758484, "learning_rate": 9.743231723823301e-06, "loss": 1.3652, "mean_token_accuracy": 0.6631535540024439, "num_tokens": 1704561300.0, "step": 10165 }, { "entropy": 1.6878803571065266, "epoch": 1.1167778967894317, "grad_norm": 1.0565212965011597, "learning_rate": 9.741646302423392e-06, "loss": 1.5243, "mean_token_accuracy": 0.6648477713267008, "num_tokens": 1704716778.0, "step": 10166 }, { "entropy": 1.774049351612727, "epoch": 1.1168877537008046, "grad_norm": 0.6734504103660583, "learning_rate": 9.740060920849816e-06, "loss": 1.4133, "mean_token_accuracy": 0.6444449126720428, "num_tokens": 1704906978.0, "step": 10167 }, { "entropy": 1.7018288373947144, "epoch": 1.1169976106121777, "grad_norm": 0.5699096322059631, "learning_rate": 9.73847557915274e-06, "loss": 1.3479, "mean_token_accuracy": 0.6530384172995886, "num_tokens": 1705118342.0, "step": 10168 }, { "entropy": 1.7284921209017436, "epoch": 1.1171074675235506, "grad_norm": 0.6984654068946838, "learning_rate": 9.73689027738234e-06, "loss": 1.5368, "mean_token_accuracy": 0.6358341524998347, "num_tokens": 1705278596.0, "step": 10169 }, { "entropy": 1.7392705778280895, "epoch": 1.1172173244349235, "grad_norm": 0.6023355722427368, "learning_rate": 9.735305015588803e-06, "loss": 1.4588, "mean_token_accuracy": 0.6480032652616501, "num_tokens": 1705479803.0, "step": 10170 }, { "entropy": 1.6727214256922405, "epoch": 1.1173271813462964, "grad_norm": 0.708677351474762, "learning_rate": 9.733719793822285e-06, "loss": 1.3237, "mean_token_accuracy": 0.6617665340503057, "num_tokens": 1705612132.0, "step": 10171 }, { "entropy": 1.7155958612759907, "epoch": 1.1174370382576693, "grad_norm": 0.6951910853385925, "learning_rate": 9.732134612132967e-06, "loss": 1.5166, "mean_token_accuracy": 0.6467616135875384, "num_tokens": 1705786778.0, "step": 10172 }, { "entropy": 1.7597291270891826, "epoch": 1.1175468951690424, "grad_norm": 0.8650582432746887, "learning_rate": 9.730549470571017e-06, "loss": 1.3505, "mean_token_accuracy": 0.667744422952334, "num_tokens": 1705918795.0, "step": 10173 }, { "entropy": 1.7015631298224132, "epoch": 1.1176567520804153, "grad_norm": 0.6916826367378235, "learning_rate": 9.728964369186604e-06, "loss": 1.4126, "mean_token_accuracy": 0.6624699632326762, "num_tokens": 1706077148.0, "step": 10174 }, { "entropy": 1.6976262032985687, "epoch": 1.1177666089917881, "grad_norm": 0.6640357375144958, "learning_rate": 9.727379308029894e-06, "loss": 1.2898, "mean_token_accuracy": 0.6710817664861679, "num_tokens": 1706214005.0, "step": 10175 }, { "entropy": 1.6960356036822002, "epoch": 1.117876465903161, "grad_norm": 0.6526516675949097, "learning_rate": 9.72579428715106e-06, "loss": 1.4422, "mean_token_accuracy": 0.6493511895338694, "num_tokens": 1706416877.0, "step": 10176 }, { "entropy": 1.7513096928596497, "epoch": 1.1179863228145341, "grad_norm": 0.6430375576019287, "learning_rate": 9.724209306600259e-06, "loss": 1.365, "mean_token_accuracy": 0.668521781762441, "num_tokens": 1706557592.0, "step": 10177 }, { "entropy": 1.6746535897254944, "epoch": 1.118096179725907, "grad_norm": 0.6057274341583252, "learning_rate": 9.72262436642766e-06, "loss": 1.3255, "mean_token_accuracy": 0.6679912606875101, "num_tokens": 1706756422.0, "step": 10178 }, { "entropy": 1.7983420590559642, "epoch": 1.11820603663728, "grad_norm": 0.7294691205024719, "learning_rate": 9.721039466683425e-06, "loss": 1.4899, "mean_token_accuracy": 0.6450391262769699, "num_tokens": 1706915673.0, "step": 10179 }, { "entropy": 1.6967030266920726, "epoch": 1.1183158935486528, "grad_norm": 2.2237155437469482, "learning_rate": 9.719454607417713e-06, "loss": 1.1928, "mean_token_accuracy": 0.6813047230243683, "num_tokens": 1707094039.0, "step": 10180 }, { "entropy": 1.684626470009486, "epoch": 1.118425750460026, "grad_norm": 0.637450635433197, "learning_rate": 9.717869788680686e-06, "loss": 1.4283, "mean_token_accuracy": 0.6514883587757746, "num_tokens": 1707258674.0, "step": 10181 }, { "entropy": 1.7461927036444347, "epoch": 1.1185356073713988, "grad_norm": 0.7752982378005981, "learning_rate": 9.716285010522507e-06, "loss": 1.29, "mean_token_accuracy": 0.6688565959533056, "num_tokens": 1707367308.0, "step": 10182 }, { "entropy": 1.6707812150319417, "epoch": 1.1186454642827717, "grad_norm": 0.6544961929321289, "learning_rate": 9.71470027299332e-06, "loss": 1.5234, "mean_token_accuracy": 0.6454186936219534, "num_tokens": 1707582766.0, "step": 10183 }, { "entropy": 1.6422998011112213, "epoch": 1.1187553211941446, "grad_norm": 0.6208282709121704, "learning_rate": 9.713115576143294e-06, "loss": 1.4995, "mean_token_accuracy": 0.6404968003431956, "num_tokens": 1707802739.0, "step": 10184 }, { "entropy": 1.7650231917699177, "epoch": 1.1188651781055177, "grad_norm": 0.7994515895843506, "learning_rate": 9.711530920022583e-06, "loss": 1.3585, "mean_token_accuracy": 0.6573351373275121, "num_tokens": 1707927893.0, "step": 10185 }, { "entropy": 1.650092860062917, "epoch": 1.1189750350168906, "grad_norm": 0.6611128449440002, "learning_rate": 9.709946304681337e-06, "loss": 1.3735, "mean_token_accuracy": 0.654491126537323, "num_tokens": 1708061832.0, "step": 10186 }, { "entropy": 1.6721069812774658, "epoch": 1.1190848919282634, "grad_norm": 0.608931303024292, "learning_rate": 9.708361730169704e-06, "loss": 1.3893, "mean_token_accuracy": 0.6576156516869863, "num_tokens": 1708203863.0, "step": 10187 }, { "entropy": 1.720770001411438, "epoch": 1.1191947488396363, "grad_norm": 0.6923795342445374, "learning_rate": 9.706777196537848e-06, "loss": 1.422, "mean_token_accuracy": 0.6553168892860413, "num_tokens": 1708401936.0, "step": 10188 }, { "entropy": 1.7493318518002827, "epoch": 1.1193046057510092, "grad_norm": 0.7753176093101501, "learning_rate": 9.705192703835905e-06, "loss": 1.336, "mean_token_accuracy": 0.6591468950112661, "num_tokens": 1708521454.0, "step": 10189 }, { "entropy": 1.7393775284290314, "epoch": 1.1194144626623823, "grad_norm": 0.7612557411193848, "learning_rate": 9.703608252114032e-06, "loss": 1.3429, "mean_token_accuracy": 0.6587264835834503, "num_tokens": 1708653355.0, "step": 10190 }, { "entropy": 1.6912192503611247, "epoch": 1.1195243195737552, "grad_norm": 0.6567522883415222, "learning_rate": 9.702023841422375e-06, "loss": 1.4079, "mean_token_accuracy": 0.6546053836743037, "num_tokens": 1708820340.0, "step": 10191 }, { "entropy": 1.651652197043101, "epoch": 1.119634176485128, "grad_norm": 0.5810186862945557, "learning_rate": 9.700439471811076e-06, "loss": 1.411, "mean_token_accuracy": 0.642088994383812, "num_tokens": 1709066402.0, "step": 10192 }, { "entropy": 1.6859426498413086, "epoch": 1.119744033396501, "grad_norm": 0.7347911596298218, "learning_rate": 9.698855143330279e-06, "loss": 1.3373, "mean_token_accuracy": 0.6684578359127045, "num_tokens": 1709226238.0, "step": 10193 }, { "entropy": 1.7300080160299938, "epoch": 1.119853890307874, "grad_norm": 0.5946059823036194, "learning_rate": 9.697270856030139e-06, "loss": 1.4268, "mean_token_accuracy": 0.641315350929896, "num_tokens": 1709429497.0, "step": 10194 }, { "entropy": 1.7237819532553356, "epoch": 1.119963747219247, "grad_norm": 0.6461696624755859, "learning_rate": 9.695686609960781e-06, "loss": 1.3313, "mean_token_accuracy": 0.6617699911197027, "num_tokens": 1709578553.0, "step": 10195 }, { "entropy": 1.7087758978207905, "epoch": 1.1200736041306198, "grad_norm": 0.6898693442344666, "learning_rate": 9.694102405172359e-06, "loss": 1.5797, "mean_token_accuracy": 0.6312484840552012, "num_tokens": 1709792879.0, "step": 10196 }, { "entropy": 1.7287095288435619, "epoch": 1.1201834610419927, "grad_norm": 0.6966397166252136, "learning_rate": 9.692518241715007e-06, "loss": 1.4683, "mean_token_accuracy": 0.642145057519277, "num_tokens": 1709980761.0, "step": 10197 }, { "entropy": 1.6889378329118092, "epoch": 1.1202933179533658, "grad_norm": 0.7332677245140076, "learning_rate": 9.690934119638864e-06, "loss": 1.3267, "mean_token_accuracy": 0.670543372631073, "num_tokens": 1710162953.0, "step": 10198 }, { "entropy": 1.7048714061578114, "epoch": 1.1204031748647387, "grad_norm": 0.643816351890564, "learning_rate": 9.68935003899406e-06, "loss": 1.483, "mean_token_accuracy": 0.6404896924893061, "num_tokens": 1710319457.0, "step": 10199 }, { "entropy": 1.758160392443339, "epoch": 1.1205130317761116, "grad_norm": 0.7269825339317322, "learning_rate": 9.687765999830747e-06, "loss": 1.4181, "mean_token_accuracy": 0.6542981912692388, "num_tokens": 1710487316.0, "step": 10200 }, { "entropy": 1.6718091368675232, "epoch": 1.1206228886874845, "grad_norm": 0.6098527908325195, "learning_rate": 9.686182002199043e-06, "loss": 1.3717, "mean_token_accuracy": 0.6548734953006109, "num_tokens": 1710721505.0, "step": 10201 }, { "entropy": 1.658282607793808, "epoch": 1.1207327455988576, "grad_norm": 0.7469896078109741, "learning_rate": 9.684598046149086e-06, "loss": 1.3578, "mean_token_accuracy": 0.6641291330258051, "num_tokens": 1710872364.0, "step": 10202 }, { "entropy": 1.6753500401973724, "epoch": 1.1208426025102305, "grad_norm": 0.6354155540466309, "learning_rate": 9.68301413173101e-06, "loss": 1.3854, "mean_token_accuracy": 0.6567136198282242, "num_tokens": 1711009667.0, "step": 10203 }, { "entropy": 1.7254823247591655, "epoch": 1.1209524594216034, "grad_norm": 0.8729540109634399, "learning_rate": 9.681430258994942e-06, "loss": 1.5197, "mean_token_accuracy": 0.6521262973546982, "num_tokens": 1711175837.0, "step": 10204 }, { "entropy": 1.7196756303310394, "epoch": 1.1210623163329763, "grad_norm": 0.9252813458442688, "learning_rate": 9.67984642799101e-06, "loss": 1.5275, "mean_token_accuracy": 0.6681269109249115, "num_tokens": 1711325831.0, "step": 10205 }, { "entropy": 1.6983699103196461, "epoch": 1.1211721732443491, "grad_norm": 1.3726911544799805, "learning_rate": 9.67826263876935e-06, "loss": 1.3746, "mean_token_accuracy": 0.6633789986371994, "num_tokens": 1711546558.0, "step": 10206 }, { "entropy": 1.7186132570107777, "epoch": 1.1212820301557223, "grad_norm": 0.6234872937202454, "learning_rate": 9.676678891380075e-06, "loss": 1.564, "mean_token_accuracy": 0.643887793024381, "num_tokens": 1711737822.0, "step": 10207 }, { "entropy": 1.7215098639329274, "epoch": 1.1213918870670951, "grad_norm": 0.6350100040435791, "learning_rate": 9.67509518587332e-06, "loss": 1.4629, "mean_token_accuracy": 0.627614696820577, "num_tokens": 1711928499.0, "step": 10208 }, { "entropy": 1.6704553961753845, "epoch": 1.121501743978468, "grad_norm": 0.8866394758224487, "learning_rate": 9.673511522299206e-06, "loss": 1.2698, "mean_token_accuracy": 0.6800111383199692, "num_tokens": 1712068332.0, "step": 10209 }, { "entropy": 1.6835826337337494, "epoch": 1.121611600889841, "grad_norm": 0.787550151348114, "learning_rate": 9.671927900707853e-06, "loss": 1.3623, "mean_token_accuracy": 0.6513439963261286, "num_tokens": 1712231679.0, "step": 10210 }, { "entropy": 1.658888618151347, "epoch": 1.121721457801214, "grad_norm": 0.6265230774879456, "learning_rate": 9.670344321149382e-06, "loss": 1.3855, "mean_token_accuracy": 0.6652615120013555, "num_tokens": 1712419094.0, "step": 10211 }, { "entropy": 1.701241821050644, "epoch": 1.121831314712587, "grad_norm": 0.6618353724479675, "learning_rate": 9.66876078367392e-06, "loss": 1.2214, "mean_token_accuracy": 0.6815350999434789, "num_tokens": 1712520934.0, "step": 10212 }, { "entropy": 1.674480825662613, "epoch": 1.1219411716239598, "grad_norm": 1.002471685409546, "learning_rate": 9.667177288331575e-06, "loss": 1.4104, "mean_token_accuracy": 0.6764763842026392, "num_tokens": 1712659842.0, "step": 10213 }, { "entropy": 1.7319279114405315, "epoch": 1.1220510285353327, "grad_norm": 0.7504306435585022, "learning_rate": 9.665593835172469e-06, "loss": 1.5625, "mean_token_accuracy": 0.6419167965650558, "num_tokens": 1712808419.0, "step": 10214 }, { "entropy": 1.7441862523555756, "epoch": 1.1221608854467058, "grad_norm": 0.771921694278717, "learning_rate": 9.664010424246718e-06, "loss": 1.4994, "mean_token_accuracy": 0.6410591999689738, "num_tokens": 1713000413.0, "step": 10215 }, { "entropy": 1.680644154548645, "epoch": 1.1222707423580787, "grad_norm": 0.6322289109230042, "learning_rate": 9.662427055604433e-06, "loss": 1.4454, "mean_token_accuracy": 0.6500407656033834, "num_tokens": 1713179456.0, "step": 10216 }, { "entropy": 1.72195503115654, "epoch": 1.1223805992694516, "grad_norm": 0.7085158824920654, "learning_rate": 9.66084372929573e-06, "loss": 1.2239, "mean_token_accuracy": 0.6777483820915222, "num_tokens": 1713317703.0, "step": 10217 }, { "entropy": 1.6902472376823425, "epoch": 1.1224904561808244, "grad_norm": 0.7547399997711182, "learning_rate": 9.659260445370721e-06, "loss": 1.2484, "mean_token_accuracy": 0.6817424396673838, "num_tokens": 1713448541.0, "step": 10218 }, { "entropy": 1.719992220401764, "epoch": 1.1226003130921973, "grad_norm": 0.7051261067390442, "learning_rate": 9.65767720387951e-06, "loss": 1.408, "mean_token_accuracy": 0.6687562465667725, "num_tokens": 1713608815.0, "step": 10219 }, { "entropy": 1.7180972397327423, "epoch": 1.1227101700035704, "grad_norm": 0.7280387878417969, "learning_rate": 9.656094004872214e-06, "loss": 1.4462, "mean_token_accuracy": 0.6457540740569433, "num_tokens": 1713764991.0, "step": 10220 }, { "entropy": 1.7479403515656788, "epoch": 1.1228200269149433, "grad_norm": 0.7459531426429749, "learning_rate": 9.65451084839894e-06, "loss": 1.4455, "mean_token_accuracy": 0.6594701160987219, "num_tokens": 1713947251.0, "step": 10221 }, { "entropy": 1.662820319334666, "epoch": 1.1229298838263162, "grad_norm": 0.7663895487785339, "learning_rate": 9.652927734509785e-06, "loss": 1.2602, "mean_token_accuracy": 0.6720548172791799, "num_tokens": 1714098388.0, "step": 10222 }, { "entropy": 1.6706760227680206, "epoch": 1.123039740737689, "grad_norm": 0.7139101624488831, "learning_rate": 9.651344663254867e-06, "loss": 1.2496, "mean_token_accuracy": 0.6699910461902618, "num_tokens": 1714223112.0, "step": 10223 }, { "entropy": 1.7492280701796215, "epoch": 1.1231495976490622, "grad_norm": 0.7086498141288757, "learning_rate": 9.649761634684278e-06, "loss": 1.3808, "mean_token_accuracy": 0.6561706811189651, "num_tokens": 1714384472.0, "step": 10224 }, { "entropy": 1.7483586271603901, "epoch": 1.123259454560435, "grad_norm": 0.7158522605895996, "learning_rate": 9.648178648848124e-06, "loss": 1.336, "mean_token_accuracy": 0.6585302899281184, "num_tokens": 1714505046.0, "step": 10225 }, { "entropy": 1.727395882209142, "epoch": 1.123369311471808, "grad_norm": 0.713912844657898, "learning_rate": 9.646595705796512e-06, "loss": 1.5567, "mean_token_accuracy": 0.622910718123118, "num_tokens": 1714731130.0, "step": 10226 }, { "entropy": 1.655521293481191, "epoch": 1.1234791683831808, "grad_norm": 0.6293887495994568, "learning_rate": 9.64501280557953e-06, "loss": 1.3659, "mean_token_accuracy": 0.6622976015011469, "num_tokens": 1714907443.0, "step": 10227 }, { "entropy": 1.7207797865072887, "epoch": 1.123589025294554, "grad_norm": 0.6388752460479736, "learning_rate": 9.643429948247285e-06, "loss": 1.4711, "mean_token_accuracy": 0.6455638408660889, "num_tokens": 1715072791.0, "step": 10228 }, { "entropy": 1.7234470546245575, "epoch": 1.1236988822059268, "grad_norm": 0.733909547328949, "learning_rate": 9.641847133849871e-06, "loss": 1.3224, "mean_token_accuracy": 0.6681032180786133, "num_tokens": 1715205714.0, "step": 10229 }, { "entropy": 1.6593499183654785, "epoch": 1.1238087391172997, "grad_norm": 0.7304568290710449, "learning_rate": 9.640264362437383e-06, "loss": 1.3619, "mean_token_accuracy": 0.6740380873282751, "num_tokens": 1715356891.0, "step": 10230 }, { "entropy": 1.6985628008842468, "epoch": 1.1239185960286726, "grad_norm": 0.7651453614234924, "learning_rate": 9.638681634059912e-06, "loss": 1.329, "mean_token_accuracy": 0.6615240027507147, "num_tokens": 1715495634.0, "step": 10231 }, { "entropy": 1.7773073414961498, "epoch": 1.1240284529400455, "grad_norm": 0.8025757074356079, "learning_rate": 9.63709894876756e-06, "loss": 1.4464, "mean_token_accuracy": 0.6638560245434443, "num_tokens": 1715615936.0, "step": 10232 }, { "entropy": 1.703110893567403, "epoch": 1.1241383098514186, "grad_norm": 0.7909550666809082, "learning_rate": 9.63551630661041e-06, "loss": 1.3458, "mean_token_accuracy": 0.6680939247210821, "num_tokens": 1715769008.0, "step": 10233 }, { "entropy": 1.720233827829361, "epoch": 1.1242481667627915, "grad_norm": 0.7866725921630859, "learning_rate": 9.633933707638549e-06, "loss": 1.3118, "mean_token_accuracy": 0.6679093490044276, "num_tokens": 1715904405.0, "step": 10234 }, { "entropy": 1.7014137109120686, "epoch": 1.1243580236741644, "grad_norm": 0.9128050804138184, "learning_rate": 9.632351151902078e-06, "loss": 1.4969, "mean_token_accuracy": 0.6651032914717993, "num_tokens": 1716056585.0, "step": 10235 }, { "entropy": 1.7811519304911296, "epoch": 1.1244678805855373, "grad_norm": 0.6634161472320557, "learning_rate": 9.630768639451074e-06, "loss": 1.5714, "mean_token_accuracy": 0.6314926048119863, "num_tokens": 1716297004.0, "step": 10236 }, { "entropy": 1.6649401287237804, "epoch": 1.1245777374969104, "grad_norm": 0.9859393239021301, "learning_rate": 9.629186170335623e-06, "loss": 1.4163, "mean_token_accuracy": 0.6583981762329737, "num_tokens": 1716441728.0, "step": 10237 }, { "entropy": 1.6984333594640095, "epoch": 1.1246875944082833, "grad_norm": 0.5795386433601379, "learning_rate": 9.627603744605816e-06, "loss": 1.3759, "mean_token_accuracy": 0.6487035552660624, "num_tokens": 1716646155.0, "step": 10238 }, { "entropy": 1.7283507784207661, "epoch": 1.1247974513196561, "grad_norm": 0.6568630337715149, "learning_rate": 9.626021362311728e-06, "loss": 1.4842, "mean_token_accuracy": 0.6527669827143351, "num_tokens": 1716845898.0, "step": 10239 }, { "entropy": 1.672084202369054, "epoch": 1.124907308231029, "grad_norm": 1.0509593486785889, "learning_rate": 9.624439023503447e-06, "loss": 1.4772, "mean_token_accuracy": 0.6506913155317307, "num_tokens": 1717018504.0, "step": 10240 }, { "entropy": 1.747216780980428, "epoch": 1.1250171651424021, "grad_norm": 0.6140730977058411, "learning_rate": 9.62285672823105e-06, "loss": 1.509, "mean_token_accuracy": 0.6518428673346838, "num_tokens": 1717198945.0, "step": 10241 }, { "entropy": 1.7177268067995708, "epoch": 1.125127022053775, "grad_norm": 0.6890478730201721, "learning_rate": 9.62127447654462e-06, "loss": 1.3421, "mean_token_accuracy": 0.6564944684505463, "num_tokens": 1717368940.0, "step": 10242 }, { "entropy": 1.7818762163321178, "epoch": 1.125236878965148, "grad_norm": 0.7375714778900146, "learning_rate": 9.619692268494227e-06, "loss": 1.4952, "mean_token_accuracy": 0.653364305694898, "num_tokens": 1717525260.0, "step": 10243 }, { "entropy": 1.7333811124165852, "epoch": 1.1253467358765208, "grad_norm": 0.8810262084007263, "learning_rate": 9.618110104129959e-06, "loss": 1.2485, "mean_token_accuracy": 0.673724964261055, "num_tokens": 1717622207.0, "step": 10244 }, { "entropy": 1.7475936810175579, "epoch": 1.1254565927878937, "grad_norm": 0.7619924545288086, "learning_rate": 9.616527983501875e-06, "loss": 1.3755, "mean_token_accuracy": 0.6646767059961954, "num_tokens": 1717792417.0, "step": 10245 }, { "entropy": 1.6934519012769063, "epoch": 1.1255664496992668, "grad_norm": 0.7149166464805603, "learning_rate": 9.61494590666006e-06, "loss": 1.5686, "mean_token_accuracy": 0.6368276750048002, "num_tokens": 1717962583.0, "step": 10246 }, { "entropy": 1.7014482418696086, "epoch": 1.1256763066106397, "grad_norm": 0.7237895131111145, "learning_rate": 9.613363873654587e-06, "loss": 1.455, "mean_token_accuracy": 0.6583812286456426, "num_tokens": 1718136025.0, "step": 10247 }, { "entropy": 1.6409784257411957, "epoch": 1.1257861635220126, "grad_norm": 0.8253493905067444, "learning_rate": 9.611781884535515e-06, "loss": 1.4142, "mean_token_accuracy": 0.6619662940502167, "num_tokens": 1718279754.0, "step": 10248 }, { "entropy": 1.713402251402537, "epoch": 1.1258960204333854, "grad_norm": 0.7551915049552917, "learning_rate": 9.610199939352927e-06, "loss": 1.4834, "mean_token_accuracy": 0.6495722184578577, "num_tokens": 1718426265.0, "step": 10249 }, { "entropy": 1.701746533314387, "epoch": 1.1260058773447585, "grad_norm": 0.6706552505493164, "learning_rate": 9.608618038156885e-06, "loss": 1.2812, "mean_token_accuracy": 0.6634306162595749, "num_tokens": 1718558139.0, "step": 10250 }, { "entropy": 1.7403425474961598, "epoch": 1.1261157342561314, "grad_norm": 0.7633783221244812, "learning_rate": 9.60703618099745e-06, "loss": 1.4703, "mean_token_accuracy": 0.6518524537483851, "num_tokens": 1718700729.0, "step": 10251 }, { "entropy": 1.726717124382655, "epoch": 1.1262255911675043, "grad_norm": 0.6118089556694031, "learning_rate": 9.605454367924694e-06, "loss": 1.5331, "mean_token_accuracy": 0.6415149420499802, "num_tokens": 1718933466.0, "step": 10252 }, { "entropy": 1.6945171753565471, "epoch": 1.1263354480788772, "grad_norm": 0.8243867754936218, "learning_rate": 9.603872598988681e-06, "loss": 1.5107, "mean_token_accuracy": 0.6461230466763178, "num_tokens": 1719113356.0, "step": 10253 }, { "entropy": 1.7175917228062947, "epoch": 1.1264453049902503, "grad_norm": 0.6808179020881653, "learning_rate": 9.60229087423947e-06, "loss": 1.3696, "mean_token_accuracy": 0.6517705669005712, "num_tokens": 1719278633.0, "step": 10254 }, { "entropy": 1.7561777532100677, "epoch": 1.1265551619016232, "grad_norm": 0.7055034041404724, "learning_rate": 9.60070919372712e-06, "loss": 1.3642, "mean_token_accuracy": 0.6630295763413111, "num_tokens": 1719416802.0, "step": 10255 }, { "entropy": 1.6309455533822377, "epoch": 1.126665018812996, "grad_norm": 0.7051904797554016, "learning_rate": 9.599127557501702e-06, "loss": 1.3411, "mean_token_accuracy": 0.66075432797273, "num_tokens": 1719589328.0, "step": 10256 }, { "entropy": 1.7621269524097443, "epoch": 1.126774875724369, "grad_norm": 0.6432750821113586, "learning_rate": 9.597545965613256e-06, "loss": 1.5804, "mean_token_accuracy": 0.6246517151594162, "num_tokens": 1719767545.0, "step": 10257 }, { "entropy": 1.7293944557507832, "epoch": 1.1268847326357418, "grad_norm": 0.7736344337463379, "learning_rate": 9.595964418111852e-06, "loss": 1.4588, "mean_token_accuracy": 0.6443672925233841, "num_tokens": 1719921859.0, "step": 10258 }, { "entropy": 1.6619472404321034, "epoch": 1.126994589547115, "grad_norm": 0.6600391268730164, "learning_rate": 9.594382915047541e-06, "loss": 1.4106, "mean_token_accuracy": 0.6570078432559967, "num_tokens": 1720055743.0, "step": 10259 }, { "entropy": 1.703255335489909, "epoch": 1.1271044464584878, "grad_norm": 0.6473222374916077, "learning_rate": 9.59280145647038e-06, "loss": 1.2278, "mean_token_accuracy": 0.6826945741971334, "num_tokens": 1720194410.0, "step": 10260 }, { "entropy": 1.7209681471188862, "epoch": 1.1272143033698607, "grad_norm": 0.6969286203384399, "learning_rate": 9.591220042430413e-06, "loss": 1.3104, "mean_token_accuracy": 0.6570959637562434, "num_tokens": 1720355987.0, "step": 10261 }, { "entropy": 1.6787172555923462, "epoch": 1.1273241602812336, "grad_norm": 0.6830558180809021, "learning_rate": 9.589638672977707e-06, "loss": 1.4813, "mean_token_accuracy": 0.6633206804593405, "num_tokens": 1720540451.0, "step": 10262 }, { "entropy": 1.7526487509409587, "epoch": 1.1274340171926067, "grad_norm": 0.681906521320343, "learning_rate": 9.588057348162291e-06, "loss": 1.4437, "mean_token_accuracy": 0.6427949617306391, "num_tokens": 1720713011.0, "step": 10263 }, { "entropy": 1.6857905586560566, "epoch": 1.1275438741039796, "grad_norm": 0.7251749634742737, "learning_rate": 9.586476068034227e-06, "loss": 1.3212, "mean_token_accuracy": 0.6568873276313146, "num_tokens": 1720891843.0, "step": 10264 }, { "entropy": 1.670927365620931, "epoch": 1.1276537310153525, "grad_norm": 0.6126587986946106, "learning_rate": 9.58489483264356e-06, "loss": 1.4053, "mean_token_accuracy": 0.6479257047176361, "num_tokens": 1721080212.0, "step": 10265 }, { "entropy": 1.7660788198312123, "epoch": 1.1277635879267254, "grad_norm": 0.7999944686889648, "learning_rate": 9.583313642040334e-06, "loss": 1.6588, "mean_token_accuracy": 0.6271585474411646, "num_tokens": 1721260940.0, "step": 10266 }, { "entropy": 1.6764280597368877, "epoch": 1.1278734448380985, "grad_norm": 0.7556807398796082, "learning_rate": 9.581732496274589e-06, "loss": 1.44, "mean_token_accuracy": 0.6474957416454951, "num_tokens": 1721416623.0, "step": 10267 }, { "entropy": 1.6823217471440632, "epoch": 1.1279833017494714, "grad_norm": 0.7498142719268799, "learning_rate": 9.58015139539638e-06, "loss": 1.3952, "mean_token_accuracy": 0.6678342968225479, "num_tokens": 1721589531.0, "step": 10268 }, { "entropy": 1.6793174644311268, "epoch": 1.1280931586608443, "grad_norm": 0.9057360887527466, "learning_rate": 9.578570339455731e-06, "loss": 1.3778, "mean_token_accuracy": 0.6720141271750132, "num_tokens": 1721737220.0, "step": 10269 }, { "entropy": 1.7173177699247997, "epoch": 1.1282030155722171, "grad_norm": 0.7818934917449951, "learning_rate": 9.576989328502692e-06, "loss": 1.3202, "mean_token_accuracy": 0.675959994395574, "num_tokens": 1721852887.0, "step": 10270 }, { "entropy": 1.7108666598796844, "epoch": 1.12831287248359, "grad_norm": 0.6389757394790649, "learning_rate": 9.575408362587303e-06, "loss": 1.418, "mean_token_accuracy": 0.66609459122022, "num_tokens": 1722068639.0, "step": 10271 }, { "entropy": 1.695683737595876, "epoch": 1.1284227293949631, "grad_norm": 0.607473611831665, "learning_rate": 9.573827441759595e-06, "loss": 1.5285, "mean_token_accuracy": 0.6294114092985789, "num_tokens": 1722254953.0, "step": 10272 }, { "entropy": 1.6890762945016224, "epoch": 1.128532586306336, "grad_norm": 0.8829686045646667, "learning_rate": 9.572246566069605e-06, "loss": 1.4423, "mean_token_accuracy": 0.6620638519525528, "num_tokens": 1722387360.0, "step": 10273 }, { "entropy": 1.689795712629954, "epoch": 1.128642443217709, "grad_norm": 0.727443516254425, "learning_rate": 9.570665735567371e-06, "loss": 1.4261, "mean_token_accuracy": 0.6524536609649658, "num_tokens": 1722559707.0, "step": 10274 }, { "entropy": 1.6225083768367767, "epoch": 1.1287523001290818, "grad_norm": 0.7552088499069214, "learning_rate": 9.569084950302919e-06, "loss": 1.3415, "mean_token_accuracy": 0.6804608354965845, "num_tokens": 1722701488.0, "step": 10275 }, { "entropy": 1.6765993038813274, "epoch": 1.128862157040455, "grad_norm": 0.7234074473381042, "learning_rate": 9.567504210326282e-06, "loss": 1.4674, "mean_token_accuracy": 0.653687963883082, "num_tokens": 1722919230.0, "step": 10276 }, { "entropy": 1.641531725724538, "epoch": 1.1289720139518278, "grad_norm": 0.5420734882354736, "learning_rate": 9.565923515687496e-06, "loss": 1.4152, "mean_token_accuracy": 0.6417432824770609, "num_tokens": 1723122116.0, "step": 10277 }, { "entropy": 1.7413820525010426, "epoch": 1.1290818708632007, "grad_norm": 0.7966932654380798, "learning_rate": 9.564342866436582e-06, "loss": 1.459, "mean_token_accuracy": 0.6337345441182455, "num_tokens": 1723305443.0, "step": 10278 }, { "entropy": 1.6700741648674011, "epoch": 1.1291917277745736, "grad_norm": 0.7032240033149719, "learning_rate": 9.562762262623569e-06, "loss": 1.2664, "mean_token_accuracy": 0.6797453612089157, "num_tokens": 1723427892.0, "step": 10279 }, { "entropy": 1.7009910543759663, "epoch": 1.1293015846859467, "grad_norm": 0.6182928085327148, "learning_rate": 9.561181704298487e-06, "loss": 1.3533, "mean_token_accuracy": 0.6629110823074976, "num_tokens": 1723591055.0, "step": 10280 }, { "entropy": 1.6887823740641277, "epoch": 1.1294114415973195, "grad_norm": 0.6807572245597839, "learning_rate": 9.55960119151135e-06, "loss": 1.5122, "mean_token_accuracy": 0.6364335119724274, "num_tokens": 1723783650.0, "step": 10281 }, { "entropy": 1.7669847408930461, "epoch": 1.1295212985086924, "grad_norm": 0.8603620529174805, "learning_rate": 9.558020724312192e-06, "loss": 1.3234, "mean_token_accuracy": 0.6592119683821996, "num_tokens": 1723913943.0, "step": 10282 }, { "entropy": 1.8033428092797597, "epoch": 1.1296311554200653, "grad_norm": 0.6308757066726685, "learning_rate": 9.556440302751022e-06, "loss": 1.4724, "mean_token_accuracy": 0.6299006392558416, "num_tokens": 1724146940.0, "step": 10283 }, { "entropy": 1.6769826412200928, "epoch": 1.1297410123314382, "grad_norm": 0.7638330459594727, "learning_rate": 9.554859926877868e-06, "loss": 1.4675, "mean_token_accuracy": 0.6560545514027277, "num_tokens": 1724296007.0, "step": 10284 }, { "entropy": 1.7535623212655385, "epoch": 1.1298508692428113, "grad_norm": 0.7146167755126953, "learning_rate": 9.553279596742748e-06, "loss": 1.5313, "mean_token_accuracy": 0.6488161732753118, "num_tokens": 1724415625.0, "step": 10285 }, { "entropy": 1.7697202265262604, "epoch": 1.1299607261541842, "grad_norm": 0.7635106444358826, "learning_rate": 9.551699312395677e-06, "loss": 1.2609, "mean_token_accuracy": 0.6754236469666163, "num_tokens": 1724518575.0, "step": 10286 }, { "entropy": 1.6063568989435832, "epoch": 1.130070583065557, "grad_norm": 0.8617009520530701, "learning_rate": 9.550119073886666e-06, "loss": 1.3429, "mean_token_accuracy": 0.6730814675490061, "num_tokens": 1724716862.0, "step": 10287 }, { "entropy": 1.7419047852357228, "epoch": 1.13018043997693, "grad_norm": 0.7265612483024597, "learning_rate": 9.548538881265739e-06, "loss": 1.5776, "mean_token_accuracy": 0.6297195802132288, "num_tokens": 1724911990.0, "step": 10288 }, { "entropy": 1.7049497961997986, "epoch": 1.130290296888303, "grad_norm": 1.0515176057815552, "learning_rate": 9.546958734582897e-06, "loss": 1.333, "mean_token_accuracy": 0.6726740250984827, "num_tokens": 1725084372.0, "step": 10289 }, { "entropy": 1.6889955898125966, "epoch": 1.130400153799676, "grad_norm": 0.6568050980567932, "learning_rate": 9.545378633888158e-06, "loss": 1.4114, "mean_token_accuracy": 0.6661112556854883, "num_tokens": 1725262103.0, "step": 10290 }, { "entropy": 1.7066051562627156, "epoch": 1.1305100107110488, "grad_norm": 0.6112655997276306, "learning_rate": 9.543798579231534e-06, "loss": 1.4617, "mean_token_accuracy": 0.647816851735115, "num_tokens": 1725470273.0, "step": 10291 }, { "entropy": 1.6733331779638927, "epoch": 1.1306198676224217, "grad_norm": 0.5629302263259888, "learning_rate": 9.542218570663024e-06, "loss": 1.543, "mean_token_accuracy": 0.6284714738527933, "num_tokens": 1725724252.0, "step": 10292 }, { "entropy": 1.7002252141634624, "epoch": 1.1307297245337948, "grad_norm": 0.6977674961090088, "learning_rate": 9.540638608232637e-06, "loss": 1.3992, "mean_token_accuracy": 0.6548378119866053, "num_tokens": 1725901325.0, "step": 10293 }, { "entropy": 1.7188651661078136, "epoch": 1.1308395814451677, "grad_norm": 0.707028329372406, "learning_rate": 9.539058691990388e-06, "loss": 1.3586, "mean_token_accuracy": 0.6680986136198044, "num_tokens": 1726022572.0, "step": 10294 }, { "entropy": 1.7041955292224884, "epoch": 1.1309494383565406, "grad_norm": 0.583182692527771, "learning_rate": 9.537478821986266e-06, "loss": 1.4493, "mean_token_accuracy": 0.648201530178388, "num_tokens": 1726212529.0, "step": 10295 }, { "entropy": 1.6689602136611938, "epoch": 1.1310592952679135, "grad_norm": 0.6370671391487122, "learning_rate": 9.535898998270283e-06, "loss": 1.403, "mean_token_accuracy": 0.6426753501097361, "num_tokens": 1726394511.0, "step": 10296 }, { "entropy": 1.6657602687676747, "epoch": 1.1311691521792864, "grad_norm": 0.7945041656494141, "learning_rate": 9.534319220892438e-06, "loss": 1.4308, "mean_token_accuracy": 0.6577966163555781, "num_tokens": 1726563985.0, "step": 10297 }, { "entropy": 1.776595026254654, "epoch": 1.1312790090906595, "grad_norm": 0.7368420958518982, "learning_rate": 9.53273948990273e-06, "loss": 1.3903, "mean_token_accuracy": 0.6460030823945999, "num_tokens": 1726750625.0, "step": 10298 }, { "entropy": 1.7575420339902241, "epoch": 1.1313888660020324, "grad_norm": 0.7476623058319092, "learning_rate": 9.531159805351151e-06, "loss": 1.5496, "mean_token_accuracy": 0.6646288931369781, "num_tokens": 1726878077.0, "step": 10299 }, { "entropy": 1.6953720152378082, "epoch": 1.1314987229134053, "grad_norm": 0.6216321587562561, "learning_rate": 9.52958016728771e-06, "loss": 1.3568, "mean_token_accuracy": 0.6530263473590215, "num_tokens": 1727019274.0, "step": 10300 }, { "entropy": 1.751002957423528, "epoch": 1.1316085798247781, "grad_norm": 0.6717413663864136, "learning_rate": 9.528000575762387e-06, "loss": 1.3837, "mean_token_accuracy": 0.6547732700904211, "num_tokens": 1727175939.0, "step": 10301 }, { "entropy": 1.7171097993850708, "epoch": 1.1317184367361512, "grad_norm": 0.6593231558799744, "learning_rate": 9.526421030825186e-06, "loss": 1.504, "mean_token_accuracy": 0.6401314934094747, "num_tokens": 1727319618.0, "step": 10302 }, { "entropy": 1.730803112188975, "epoch": 1.1318282936475241, "grad_norm": 0.6500627994537354, "learning_rate": 9.524841532526095e-06, "loss": 1.3106, "mean_token_accuracy": 0.6745495200157166, "num_tokens": 1727473341.0, "step": 10303 }, { "entropy": 1.7114306290944417, "epoch": 1.131938150558897, "grad_norm": 0.6906517148017883, "learning_rate": 9.523262080915103e-06, "loss": 1.3155, "mean_token_accuracy": 0.672325387597084, "num_tokens": 1727600003.0, "step": 10304 }, { "entropy": 1.6404125392436981, "epoch": 1.13204800747027, "grad_norm": 0.7134620547294617, "learning_rate": 9.521682676042201e-06, "loss": 1.2493, "mean_token_accuracy": 0.6794395595788956, "num_tokens": 1727738890.0, "step": 10305 }, { "entropy": 1.7687196135520935, "epoch": 1.132157864381643, "grad_norm": 0.7907180190086365, "learning_rate": 9.520103317957382e-06, "loss": 1.3394, "mean_token_accuracy": 0.6640162070592245, "num_tokens": 1727889905.0, "step": 10306 }, { "entropy": 1.7121462921301525, "epoch": 1.132267721293016, "grad_norm": 0.8624327182769775, "learning_rate": 9.51852400671062e-06, "loss": 1.2975, "mean_token_accuracy": 0.6706186135609945, "num_tokens": 1728016133.0, "step": 10307 }, { "entropy": 1.7604290346304576, "epoch": 1.1323775782043888, "grad_norm": 0.680280327796936, "learning_rate": 9.516944742351905e-06, "loss": 1.4087, "mean_token_accuracy": 0.6513306001822153, "num_tokens": 1728165640.0, "step": 10308 }, { "entropy": 1.7208806375662486, "epoch": 1.1324874351157617, "grad_norm": 0.7024528384208679, "learning_rate": 9.515365524931223e-06, "loss": 1.2785, "mean_token_accuracy": 0.6805547028779984, "num_tokens": 1728291634.0, "step": 10309 }, { "entropy": 1.6809004644552867, "epoch": 1.1325972920271346, "grad_norm": 0.640552818775177, "learning_rate": 9.513786354498554e-06, "loss": 1.4281, "mean_token_accuracy": 0.6382468740145365, "num_tokens": 1728470807.0, "step": 10310 }, { "entropy": 1.6941138605276744, "epoch": 1.1327071489385077, "grad_norm": 0.6164855360984802, "learning_rate": 9.512207231103874e-06, "loss": 1.4135, "mean_token_accuracy": 0.6559168150027593, "num_tokens": 1728644963.0, "step": 10311 }, { "entropy": 1.6928605437278748, "epoch": 1.1328170058498805, "grad_norm": 0.7542420625686646, "learning_rate": 9.51062815479717e-06, "loss": 1.5822, "mean_token_accuracy": 0.6550649454196295, "num_tokens": 1728820062.0, "step": 10312 }, { "entropy": 1.686714122692744, "epoch": 1.1329268627612534, "grad_norm": 0.6460275650024414, "learning_rate": 9.509049125628407e-06, "loss": 1.3163, "mean_token_accuracy": 0.6617647508780161, "num_tokens": 1728959484.0, "step": 10313 }, { "entropy": 1.7324243982632954, "epoch": 1.1330367196726263, "grad_norm": 0.718798816204071, "learning_rate": 9.50747014364757e-06, "loss": 1.4294, "mean_token_accuracy": 0.6527051776647568, "num_tokens": 1729080689.0, "step": 10314 }, { "entropy": 1.758222073316574, "epoch": 1.1331465765839994, "grad_norm": 0.6609033942222595, "learning_rate": 9.505891208904634e-06, "loss": 1.3854, "mean_token_accuracy": 0.6570114940404892, "num_tokens": 1729234542.0, "step": 10315 }, { "entropy": 1.7237963378429413, "epoch": 1.1332564334953723, "grad_norm": 0.7275382280349731, "learning_rate": 9.504312321449565e-06, "loss": 1.3887, "mean_token_accuracy": 0.6633484015862147, "num_tokens": 1729382944.0, "step": 10316 }, { "entropy": 1.7187721331914265, "epoch": 1.1333662904067452, "grad_norm": 0.738908052444458, "learning_rate": 9.502733481332334e-06, "loss": 1.4098, "mean_token_accuracy": 0.6463323136170706, "num_tokens": 1729543587.0, "step": 10317 }, { "entropy": 1.6943478484948475, "epoch": 1.133476147318118, "grad_norm": 0.7214493155479431, "learning_rate": 9.501154688602921e-06, "loss": 1.5356, "mean_token_accuracy": 0.6358891526858012, "num_tokens": 1729722777.0, "step": 10318 }, { "entropy": 1.658001681168874, "epoch": 1.1335860042294912, "grad_norm": 0.6282760500907898, "learning_rate": 9.499575943311279e-06, "loss": 1.3592, "mean_token_accuracy": 0.6529184977213541, "num_tokens": 1729894534.0, "step": 10319 }, { "entropy": 1.6976789931456249, "epoch": 1.133695861140864, "grad_norm": 0.6558998823165894, "learning_rate": 9.497997245507387e-06, "loss": 1.4736, "mean_token_accuracy": 0.6610572139422098, "num_tokens": 1730086808.0, "step": 10320 }, { "entropy": 1.7426810363928478, "epoch": 1.133805718052237, "grad_norm": 0.6194723844528198, "learning_rate": 9.496418595241203e-06, "loss": 1.4611, "mean_token_accuracy": 0.6435061097145081, "num_tokens": 1730288789.0, "step": 10321 }, { "entropy": 1.7035086651643117, "epoch": 1.1339155749636098, "grad_norm": 0.6074888706207275, "learning_rate": 9.494839992562697e-06, "loss": 1.4105, "mean_token_accuracy": 0.6378484318653742, "num_tokens": 1730467433.0, "step": 10322 }, { "entropy": 1.7067280213038127, "epoch": 1.1340254318749827, "grad_norm": 0.6873534917831421, "learning_rate": 9.49326143752182e-06, "loss": 1.3172, "mean_token_accuracy": 0.664742906888326, "num_tokens": 1730640614.0, "step": 10323 }, { "entropy": 1.6762806077798207, "epoch": 1.1341352887863558, "grad_norm": 0.6909382939338684, "learning_rate": 9.491682930168548e-06, "loss": 1.3528, "mean_token_accuracy": 0.6638672153155009, "num_tokens": 1730807800.0, "step": 10324 }, { "entropy": 1.6831237574418385, "epoch": 1.1342451456977287, "grad_norm": 0.7416048049926758, "learning_rate": 9.490104470552823e-06, "loss": 1.4919, "mean_token_accuracy": 0.6549220134814581, "num_tokens": 1731057934.0, "step": 10325 }, { "entropy": 1.724802275498708, "epoch": 1.1343550026091016, "grad_norm": 0.7422142624855042, "learning_rate": 9.488526058724617e-06, "loss": 1.4261, "mean_token_accuracy": 0.6417362888654073, "num_tokens": 1731249169.0, "step": 10326 }, { "entropy": 1.6890951693058014, "epoch": 1.1344648595204745, "grad_norm": 0.7085136771202087, "learning_rate": 9.48694769473388e-06, "loss": 1.3171, "mean_token_accuracy": 0.6658613979816437, "num_tokens": 1731364488.0, "step": 10327 }, { "entropy": 1.797446479399999, "epoch": 1.1345747164318476, "grad_norm": 0.7604406476020813, "learning_rate": 9.485369378630564e-06, "loss": 1.3586, "mean_token_accuracy": 0.66508649289608, "num_tokens": 1731510025.0, "step": 10328 }, { "entropy": 1.69396177927653, "epoch": 1.1346845733432205, "grad_norm": 0.6808403134346008, "learning_rate": 9.483791110464624e-06, "loss": 1.3246, "mean_token_accuracy": 0.6774620612462362, "num_tokens": 1731644186.0, "step": 10329 }, { "entropy": 1.7862180769443512, "epoch": 1.1347944302545934, "grad_norm": 0.7217937707901001, "learning_rate": 9.482212890286017e-06, "loss": 1.4788, "mean_token_accuracy": 0.6508872807025909, "num_tokens": 1731839500.0, "step": 10330 }, { "entropy": 1.706006020307541, "epoch": 1.1349042871659663, "grad_norm": 0.6489999890327454, "learning_rate": 9.480634718144684e-06, "loss": 1.3578, "mean_token_accuracy": 0.6593449711799622, "num_tokens": 1731976383.0, "step": 10331 }, { "entropy": 1.7095771531263988, "epoch": 1.1350141440773394, "grad_norm": 0.6405286192893982, "learning_rate": 9.47905659409058e-06, "loss": 1.412, "mean_token_accuracy": 0.6624357551336288, "num_tokens": 1732123313.0, "step": 10332 }, { "entropy": 1.715343068043391, "epoch": 1.1351240009887122, "grad_norm": 0.7203224301338196, "learning_rate": 9.477478518173646e-06, "loss": 1.5001, "mean_token_accuracy": 0.6489410251379013, "num_tokens": 1732294493.0, "step": 10333 }, { "entropy": 1.6658440132935841, "epoch": 1.1352338579000851, "grad_norm": 0.6969232559204102, "learning_rate": 9.475900490443835e-06, "loss": 1.2221, "mean_token_accuracy": 0.6730835686127344, "num_tokens": 1732415579.0, "step": 10334 }, { "entropy": 1.7030311127503712, "epoch": 1.135343714811458, "grad_norm": 0.6568850874900818, "learning_rate": 9.474322510951082e-06, "loss": 1.3944, "mean_token_accuracy": 0.6627188473939896, "num_tokens": 1732592393.0, "step": 10335 }, { "entropy": 1.7397877375284831, "epoch": 1.135453571722831, "grad_norm": 0.7089440822601318, "learning_rate": 9.472744579745338e-06, "loss": 1.3188, "mean_token_accuracy": 0.6615829467773438, "num_tokens": 1732742092.0, "step": 10336 }, { "entropy": 1.7489128410816193, "epoch": 1.135563428634204, "grad_norm": 0.6160045862197876, "learning_rate": 9.471166696876539e-06, "loss": 1.4619, "mean_token_accuracy": 0.646346777677536, "num_tokens": 1732946886.0, "step": 10337 }, { "entropy": 1.7360928257306416, "epoch": 1.135673285545577, "grad_norm": 1.8664852380752563, "learning_rate": 9.469588862394624e-06, "loss": 1.1419, "mean_token_accuracy": 0.675204411149025, "num_tokens": 1733119323.0, "step": 10338 }, { "entropy": 1.6926367580890656, "epoch": 1.1357831424569498, "grad_norm": 0.7028651237487793, "learning_rate": 9.468011076349532e-06, "loss": 1.4818, "mean_token_accuracy": 0.6468727837006251, "num_tokens": 1733299458.0, "step": 10339 }, { "entropy": 1.7644418974717457, "epoch": 1.1358929993683227, "grad_norm": 0.7585068941116333, "learning_rate": 9.466433338791202e-06, "loss": 1.3099, "mean_token_accuracy": 0.6703908890485764, "num_tokens": 1733419610.0, "step": 10340 }, { "entropy": 1.7097221116224925, "epoch": 1.1360028562796958, "grad_norm": 0.7578231692314148, "learning_rate": 9.46485564976956e-06, "loss": 1.3979, "mean_token_accuracy": 0.6605977068344752, "num_tokens": 1733588689.0, "step": 10341 }, { "entropy": 1.7715651094913483, "epoch": 1.1361127131910687, "grad_norm": 0.6864378452301025, "learning_rate": 9.463278009334552e-06, "loss": 1.4961, "mean_token_accuracy": 0.6296594391266505, "num_tokens": 1733758712.0, "step": 10342 }, { "entropy": 1.6944345732529957, "epoch": 1.1362225701024415, "grad_norm": 1.1330305337905884, "learning_rate": 9.461700417536095e-06, "loss": 1.3299, "mean_token_accuracy": 0.6746721168359121, "num_tokens": 1733939337.0, "step": 10343 }, { "entropy": 1.672745595375697, "epoch": 1.1363324270138144, "grad_norm": 0.6669848561286926, "learning_rate": 9.460122874424136e-06, "loss": 1.5127, "mean_token_accuracy": 0.649186576406161, "num_tokens": 1734107917.0, "step": 10344 }, { "entropy": 1.693463295698166, "epoch": 1.1364422839251875, "grad_norm": 0.6603065133094788, "learning_rate": 9.458545380048585e-06, "loss": 1.2553, "mean_token_accuracy": 0.67331130305926, "num_tokens": 1734223657.0, "step": 10345 }, { "entropy": 1.7278470595677693, "epoch": 1.1365521408365604, "grad_norm": 1.3493287563323975, "learning_rate": 9.456967934459383e-06, "loss": 1.3466, "mean_token_accuracy": 0.660364697376887, "num_tokens": 1734412197.0, "step": 10346 }, { "entropy": 1.6575153172016144, "epoch": 1.1366619977479333, "grad_norm": 0.6611323952674866, "learning_rate": 9.455390537706451e-06, "loss": 1.2062, "mean_token_accuracy": 0.680364117026329, "num_tokens": 1734545860.0, "step": 10347 }, { "entropy": 1.656428058942159, "epoch": 1.1367718546593062, "grad_norm": 0.6214374303817749, "learning_rate": 9.453813189839709e-06, "loss": 1.4702, "mean_token_accuracy": 0.6467104901870092, "num_tokens": 1734744456.0, "step": 10348 }, { "entropy": 1.7128709455331166, "epoch": 1.136881711570679, "grad_norm": 0.804625928401947, "learning_rate": 9.452235890909083e-06, "loss": 1.523, "mean_token_accuracy": 0.650082861383756, "num_tokens": 1734900916.0, "step": 10349 }, { "entropy": 1.700402319431305, "epoch": 1.1369915684820522, "grad_norm": 0.7087069749832153, "learning_rate": 9.450658640964498e-06, "loss": 1.234, "mean_token_accuracy": 0.6779408504565557, "num_tokens": 1735001282.0, "step": 10350 }, { "entropy": 1.745913565158844, "epoch": 1.137101425393425, "grad_norm": 0.7145112156867981, "learning_rate": 9.449081440055865e-06, "loss": 1.4814, "mean_token_accuracy": 0.6550154387950897, "num_tokens": 1735153742.0, "step": 10351 }, { "entropy": 1.7599404752254486, "epoch": 1.137211282304798, "grad_norm": 1.112202763557434, "learning_rate": 9.447504288233104e-06, "loss": 1.5053, "mean_token_accuracy": 0.6597884198029836, "num_tokens": 1735351148.0, "step": 10352 }, { "entropy": 1.6744179526964824, "epoch": 1.137321139216171, "grad_norm": 0.5947840213775635, "learning_rate": 9.44592718554614e-06, "loss": 1.294, "mean_token_accuracy": 0.6707391689221064, "num_tokens": 1735548975.0, "step": 10353 }, { "entropy": 1.7266633212566376, "epoch": 1.137430996127544, "grad_norm": 0.6907797455787659, "learning_rate": 9.444350132044873e-06, "loss": 1.4707, "mean_token_accuracy": 0.6492672860622406, "num_tokens": 1735725525.0, "step": 10354 }, { "entropy": 1.7150587638219197, "epoch": 1.1375408530389168, "grad_norm": 0.639342725276947, "learning_rate": 9.442773127779226e-06, "loss": 1.3635, "mean_token_accuracy": 0.6725068837404251, "num_tokens": 1735894005.0, "step": 10355 }, { "entropy": 1.6278500159581502, "epoch": 1.1376507099502897, "grad_norm": 0.719607412815094, "learning_rate": 9.44119617279911e-06, "loss": 1.2805, "mean_token_accuracy": 0.67085100710392, "num_tokens": 1736025755.0, "step": 10356 }, { "entropy": 1.6743311981360118, "epoch": 1.1377605668616626, "grad_norm": 0.6438021659851074, "learning_rate": 9.439619267154428e-06, "loss": 1.3495, "mean_token_accuracy": 0.6617470035950342, "num_tokens": 1736176599.0, "step": 10357 }, { "entropy": 1.6663430829842885, "epoch": 1.1378704237730357, "grad_norm": 0.6496438384056091, "learning_rate": 9.438042410895097e-06, "loss": 1.3258, "mean_token_accuracy": 0.6678551882505417, "num_tokens": 1736314214.0, "step": 10358 }, { "entropy": 1.6967433889706929, "epoch": 1.1379802806844086, "grad_norm": 0.6643569469451904, "learning_rate": 9.436465604071019e-06, "loss": 1.3103, "mean_token_accuracy": 0.6723993321259817, "num_tokens": 1736448597.0, "step": 10359 }, { "entropy": 1.6814461847146351, "epoch": 1.1380901375957815, "grad_norm": 0.6767435073852539, "learning_rate": 9.434888846732097e-06, "loss": 1.4335, "mean_token_accuracy": 0.6468447397152582, "num_tokens": 1736668920.0, "step": 10360 }, { "entropy": 1.7169758081436157, "epoch": 1.1381999945071544, "grad_norm": 0.8308820128440857, "learning_rate": 9.43331213892824e-06, "loss": 1.3841, "mean_token_accuracy": 0.6509679108858109, "num_tokens": 1736800070.0, "step": 10361 }, { "entropy": 1.6866010129451752, "epoch": 1.1383098514185273, "grad_norm": 0.8533633351325989, "learning_rate": 9.431735480709352e-06, "loss": 1.4519, "mean_token_accuracy": 0.6826412826776505, "num_tokens": 1736939842.0, "step": 10362 }, { "entropy": 1.7029000719388325, "epoch": 1.1384197083299004, "grad_norm": 0.797741711139679, "learning_rate": 9.430158872125324e-06, "loss": 1.4602, "mean_token_accuracy": 0.6410268098115921, "num_tokens": 1737129340.0, "step": 10363 }, { "entropy": 1.7252192397912343, "epoch": 1.1385295652412732, "grad_norm": 0.7115998864173889, "learning_rate": 9.42858231322606e-06, "loss": 1.3813, "mean_token_accuracy": 0.656056766708692, "num_tokens": 1737301283.0, "step": 10364 }, { "entropy": 1.7449837823708851, "epoch": 1.1386394221526461, "grad_norm": 0.7513498663902283, "learning_rate": 9.427005804061462e-06, "loss": 1.532, "mean_token_accuracy": 0.6449931561946869, "num_tokens": 1737480963.0, "step": 10365 }, { "entropy": 1.7203444143136342, "epoch": 1.1387492790640192, "grad_norm": 0.7123376131057739, "learning_rate": 9.425429344681415e-06, "loss": 1.6309, "mean_token_accuracy": 0.6379441867272059, "num_tokens": 1737657860.0, "step": 10366 }, { "entropy": 1.6939558287461598, "epoch": 1.1388591359753921, "grad_norm": 0.6163555979728699, "learning_rate": 9.423852935135824e-06, "loss": 1.2301, "mean_token_accuracy": 0.6741809546947479, "num_tokens": 1737806597.0, "step": 10367 }, { "entropy": 1.614011029402415, "epoch": 1.138968992886765, "grad_norm": 0.7325506210327148, "learning_rate": 9.42227657547458e-06, "loss": 1.2167, "mean_token_accuracy": 0.6826542516549429, "num_tokens": 1737916809.0, "step": 10368 }, { "entropy": 1.682138333717982, "epoch": 1.139078849798138, "grad_norm": 0.5863914489746094, "learning_rate": 9.420700265747566e-06, "loss": 1.3563, "mean_token_accuracy": 0.6610041856765747, "num_tokens": 1738100933.0, "step": 10369 }, { "entropy": 1.6821511387825012, "epoch": 1.1391887067095108, "grad_norm": 0.6776061654090881, "learning_rate": 9.419124006004681e-06, "loss": 1.4566, "mean_token_accuracy": 0.6502898782491684, "num_tokens": 1738277417.0, "step": 10370 }, { "entropy": 1.6609342396259308, "epoch": 1.1392985636208839, "grad_norm": 0.617734432220459, "learning_rate": 9.417547796295807e-06, "loss": 1.4432, "mean_token_accuracy": 0.6545563538869222, "num_tokens": 1738457334.0, "step": 10371 }, { "entropy": 1.7074103355407715, "epoch": 1.1394084205322568, "grad_norm": 0.6897749900817871, "learning_rate": 9.415971636670832e-06, "loss": 1.3867, "mean_token_accuracy": 0.6595341066519419, "num_tokens": 1738604909.0, "step": 10372 }, { "entropy": 1.6968832810719807, "epoch": 1.1395182774436297, "grad_norm": 0.6201480031013489, "learning_rate": 9.41439552717964e-06, "loss": 1.4053, "mean_token_accuracy": 0.6468455741802851, "num_tokens": 1738777998.0, "step": 10373 }, { "entropy": 1.635752648115158, "epoch": 1.1396281343550025, "grad_norm": 0.5653038620948792, "learning_rate": 9.412819467872119e-06, "loss": 1.2893, "mean_token_accuracy": 0.6629117280244827, "num_tokens": 1738979425.0, "step": 10374 }, { "entropy": 1.6903326710065205, "epoch": 1.1397379912663754, "grad_norm": 0.7527031898498535, "learning_rate": 9.411243458798144e-06, "loss": 1.4398, "mean_token_accuracy": 0.6569599111874899, "num_tokens": 1739148251.0, "step": 10375 }, { "entropy": 1.7592523097991943, "epoch": 1.1398478481777485, "grad_norm": 0.6619188785552979, "learning_rate": 9.409667500007595e-06, "loss": 1.4471, "mean_token_accuracy": 0.664801706870397, "num_tokens": 1739340860.0, "step": 10376 }, { "entropy": 1.6681431134541829, "epoch": 1.1399577050891214, "grad_norm": 0.6245502829551697, "learning_rate": 9.408091591550359e-06, "loss": 1.4495, "mean_token_accuracy": 0.6568524142106374, "num_tokens": 1739547206.0, "step": 10377 }, { "entropy": 1.6391673783461254, "epoch": 1.1400675620004943, "grad_norm": 0.6753904223442078, "learning_rate": 9.406515733476302e-06, "loss": 1.4371, "mean_token_accuracy": 0.6508794724941254, "num_tokens": 1739790829.0, "step": 10378 }, { "entropy": 1.6744989454746246, "epoch": 1.1401774189118674, "grad_norm": 0.6560745239257812, "learning_rate": 9.404939925835304e-06, "loss": 1.3288, "mean_token_accuracy": 0.6673153092463812, "num_tokens": 1739931875.0, "step": 10379 }, { "entropy": 1.7254528601964314, "epoch": 1.1402872758232403, "grad_norm": 0.6491901278495789, "learning_rate": 9.403364168677242e-06, "loss": 1.3784, "mean_token_accuracy": 0.6630249718825022, "num_tokens": 1740098056.0, "step": 10380 }, { "entropy": 1.7695842186609905, "epoch": 1.1403971327346132, "grad_norm": 0.739123523235321, "learning_rate": 9.401788462051981e-06, "loss": 1.4259, "mean_token_accuracy": 0.6524376769860586, "num_tokens": 1740252560.0, "step": 10381 }, { "entropy": 1.6941548983256023, "epoch": 1.140506989645986, "grad_norm": 0.7095143795013428, "learning_rate": 9.400212806009396e-06, "loss": 1.4692, "mean_token_accuracy": 0.6424577981233597, "num_tokens": 1740446012.0, "step": 10382 }, { "entropy": 1.7164734701315563, "epoch": 1.140616846557359, "grad_norm": 0.7843037843704224, "learning_rate": 9.398637200599357e-06, "loss": 1.5502, "mean_token_accuracy": 0.6457971682151159, "num_tokens": 1740601766.0, "step": 10383 }, { "entropy": 1.7246180772781372, "epoch": 1.140726703468732, "grad_norm": 0.6460191607475281, "learning_rate": 9.397061645871728e-06, "loss": 1.4905, "mean_token_accuracy": 0.6386567503213882, "num_tokens": 1740768615.0, "step": 10384 }, { "entropy": 1.6967721978823345, "epoch": 1.140836560380105, "grad_norm": 0.6672912240028381, "learning_rate": 9.395486141876374e-06, "loss": 1.4701, "mean_token_accuracy": 0.6470180948575338, "num_tokens": 1740969706.0, "step": 10385 }, { "entropy": 1.6823700368404388, "epoch": 1.1409464172914778, "grad_norm": 0.7741503119468689, "learning_rate": 9.393910688663164e-06, "loss": 1.2931, "mean_token_accuracy": 0.6733145167430242, "num_tokens": 1741106299.0, "step": 10386 }, { "entropy": 1.7498717904090881, "epoch": 1.1410562742028507, "grad_norm": 0.7507087588310242, "learning_rate": 9.392335286281953e-06, "loss": 1.5033, "mean_token_accuracy": 0.6538749684890112, "num_tokens": 1741254646.0, "step": 10387 }, { "entropy": 1.6514920592308044, "epoch": 1.1411661311142236, "grad_norm": 0.5859827399253845, "learning_rate": 9.390759934782607e-06, "loss": 1.3249, "mean_token_accuracy": 0.6833820442358652, "num_tokens": 1741432870.0, "step": 10388 }, { "entropy": 1.684400051832199, "epoch": 1.1412759880255967, "grad_norm": 0.6740873456001282, "learning_rate": 9.389184634214985e-06, "loss": 1.3331, "mean_token_accuracy": 0.6611009438832601, "num_tokens": 1741574186.0, "step": 10389 }, { "entropy": 1.742189993460973, "epoch": 1.1413858449369696, "grad_norm": 0.7069135308265686, "learning_rate": 9.387609384628945e-06, "loss": 1.2605, "mean_token_accuracy": 0.6775266925493876, "num_tokens": 1741691346.0, "step": 10390 }, { "entropy": 1.735118528207143, "epoch": 1.1414957018483425, "grad_norm": 0.8108046054840088, "learning_rate": 9.386034186074335e-06, "loss": 1.3915, "mean_token_accuracy": 0.66219495733579, "num_tokens": 1741833271.0, "step": 10391 }, { "entropy": 1.7121002276738484, "epoch": 1.1416055587597156, "grad_norm": 0.7746871113777161, "learning_rate": 9.384459038601024e-06, "loss": 1.3267, "mean_token_accuracy": 0.6555629670619965, "num_tokens": 1741960943.0, "step": 10392 }, { "entropy": 1.7076607942581177, "epoch": 1.1417154156710885, "grad_norm": 0.6252023577690125, "learning_rate": 9.382883942258849e-06, "loss": 1.3896, "mean_token_accuracy": 0.653433953722318, "num_tokens": 1742171872.0, "step": 10393 }, { "entropy": 1.6984122693538666, "epoch": 1.1418252725824614, "grad_norm": 0.7633947134017944, "learning_rate": 9.381308897097671e-06, "loss": 1.4597, "mean_token_accuracy": 0.6523663302262624, "num_tokens": 1742335760.0, "step": 10394 }, { "entropy": 1.682322899500529, "epoch": 1.1419351294938342, "grad_norm": 0.6641841530799866, "learning_rate": 9.37973390316734e-06, "loss": 1.4247, "mean_token_accuracy": 0.6529939075311025, "num_tokens": 1742492989.0, "step": 10395 }, { "entropy": 1.7189187506834667, "epoch": 1.1420449864052071, "grad_norm": 0.6823170185089111, "learning_rate": 9.378158960517701e-06, "loss": 1.345, "mean_token_accuracy": 0.6566206763188044, "num_tokens": 1742644794.0, "step": 10396 }, { "entropy": 1.629441926876704, "epoch": 1.1421548433165802, "grad_norm": 0.654874861240387, "learning_rate": 9.376584069198593e-06, "loss": 1.3227, "mean_token_accuracy": 0.6684759259223938, "num_tokens": 1742817557.0, "step": 10397 }, { "entropy": 1.6485731303691864, "epoch": 1.1422647002279531, "grad_norm": 0.6740143895149231, "learning_rate": 9.375009229259878e-06, "loss": 1.4679, "mean_token_accuracy": 0.6339434087276459, "num_tokens": 1743044080.0, "step": 10398 }, { "entropy": 1.6696706712245941, "epoch": 1.142374557139326, "grad_norm": 0.7383650541305542, "learning_rate": 9.37343444075138e-06, "loss": 1.3688, "mean_token_accuracy": 0.6621562987565994, "num_tokens": 1743274614.0, "step": 10399 }, { "entropy": 1.6913128296534221, "epoch": 1.142484414050699, "grad_norm": 0.5945432782173157, "learning_rate": 9.371859703722952e-06, "loss": 1.3708, "mean_token_accuracy": 0.6589928964773814, "num_tokens": 1743456863.0, "step": 10400 }, { "entropy": 1.7033980588118236, "epoch": 1.1425942709620718, "grad_norm": 0.6133428812026978, "learning_rate": 9.370285018224432e-06, "loss": 1.2997, "mean_token_accuracy": 0.6748911092678705, "num_tokens": 1743617303.0, "step": 10401 }, { "entropy": 1.6469106773535411, "epoch": 1.1427041278734449, "grad_norm": 0.7003388404846191, "learning_rate": 9.368710384305656e-06, "loss": 1.2912, "mean_token_accuracy": 0.6673061301310858, "num_tokens": 1743751670.0, "step": 10402 }, { "entropy": 1.6901063521703084, "epoch": 1.1428139847848178, "grad_norm": 0.663631796836853, "learning_rate": 9.367135802016463e-06, "loss": 1.4043, "mean_token_accuracy": 0.6495856940746307, "num_tokens": 1743894039.0, "step": 10403 }, { "entropy": 1.7655975222587585, "epoch": 1.1429238416961907, "grad_norm": 0.8853231072425842, "learning_rate": 9.365561271406684e-06, "loss": 1.526, "mean_token_accuracy": 0.628182902932167, "num_tokens": 1744066506.0, "step": 10404 }, { "entropy": 1.6902997593084972, "epoch": 1.1430336986075638, "grad_norm": 0.8013792634010315, "learning_rate": 9.363986792526152e-06, "loss": 1.4213, "mean_token_accuracy": 0.6655001491308212, "num_tokens": 1744211659.0, "step": 10405 }, { "entropy": 1.6334332625071208, "epoch": 1.1431435555189366, "grad_norm": 0.6941357254981995, "learning_rate": 9.362412365424704e-06, "loss": 1.2846, "mean_token_accuracy": 0.6686849494775137, "num_tokens": 1744364432.0, "step": 10406 }, { "entropy": 1.6745346983273823, "epoch": 1.1432534124303095, "grad_norm": 0.7382486462593079, "learning_rate": 9.360837990152167e-06, "loss": 1.3855, "mean_token_accuracy": 0.6577971825997034, "num_tokens": 1744536714.0, "step": 10407 }, { "entropy": 1.6726201673348744, "epoch": 1.1433632693416824, "grad_norm": 0.7212955951690674, "learning_rate": 9.359263666758367e-06, "loss": 1.3708, "mean_token_accuracy": 0.6609119226535162, "num_tokens": 1744731550.0, "step": 10408 }, { "entropy": 1.690768967072169, "epoch": 1.1434731262530553, "grad_norm": 0.7584574818611145, "learning_rate": 9.357689395293134e-06, "loss": 1.3912, "mean_token_accuracy": 0.6541225661834081, "num_tokens": 1744924719.0, "step": 10409 }, { "entropy": 1.6714553038279216, "epoch": 1.1435829831644284, "grad_norm": 0.7702672481536865, "learning_rate": 9.356115175806292e-06, "loss": 1.3676, "mean_token_accuracy": 0.6619028945763906, "num_tokens": 1745084791.0, "step": 10410 }, { "entropy": 1.7582578659057617, "epoch": 1.1436928400758013, "grad_norm": 0.6678996086120605, "learning_rate": 9.354541008347661e-06, "loss": 1.3649, "mean_token_accuracy": 0.6453887671232224, "num_tokens": 1745250672.0, "step": 10411 }, { "entropy": 1.6818044086297352, "epoch": 1.1438026969871742, "grad_norm": 0.7201360464096069, "learning_rate": 9.352966892967072e-06, "loss": 1.478, "mean_token_accuracy": 0.6579025636116663, "num_tokens": 1745445321.0, "step": 10412 }, { "entropy": 1.7616774141788483, "epoch": 1.143912553898547, "grad_norm": 0.6419490575790405, "learning_rate": 9.351392829714332e-06, "loss": 1.5105, "mean_token_accuracy": 0.636634940902392, "num_tokens": 1745691802.0, "step": 10413 }, { "entropy": 1.7232487003008525, "epoch": 1.1440224108099202, "grad_norm": 0.6756424903869629, "learning_rate": 9.349818818639267e-06, "loss": 1.5376, "mean_token_accuracy": 0.661085287729899, "num_tokens": 1745864777.0, "step": 10414 }, { "entropy": 1.7252596020698547, "epoch": 1.144132267721293, "grad_norm": 0.6202812790870667, "learning_rate": 9.348244859791698e-06, "loss": 1.3156, "mean_token_accuracy": 0.664531409740448, "num_tokens": 1745989005.0, "step": 10415 }, { "entropy": 1.6829163233439128, "epoch": 1.144242124632666, "grad_norm": 0.7266920208930969, "learning_rate": 9.346670953221429e-06, "loss": 1.4073, "mean_token_accuracy": 0.6535108834505081, "num_tokens": 1746165103.0, "step": 10416 }, { "entropy": 1.7211932837963104, "epoch": 1.1443519815440388, "grad_norm": 0.6822185516357422, "learning_rate": 9.34509709897828e-06, "loss": 1.5674, "mean_token_accuracy": 0.6551593492428461, "num_tokens": 1746403549.0, "step": 10417 }, { "entropy": 1.6584857602914174, "epoch": 1.144461838455412, "grad_norm": 0.6132012605667114, "learning_rate": 9.343523297112066e-06, "loss": 1.3687, "mean_token_accuracy": 0.6635250995556513, "num_tokens": 1746571701.0, "step": 10418 }, { "entropy": 1.6848807831605275, "epoch": 1.1445716953667848, "grad_norm": 0.6472894549369812, "learning_rate": 9.341949547672588e-06, "loss": 1.335, "mean_token_accuracy": 0.6618951757748922, "num_tokens": 1746734240.0, "step": 10419 }, { "entropy": 1.718974103530248, "epoch": 1.1446815522781577, "grad_norm": 0.736495852470398, "learning_rate": 9.340375850709663e-06, "loss": 1.4658, "mean_token_accuracy": 0.6561418076356252, "num_tokens": 1746909958.0, "step": 10420 }, { "entropy": 1.7155489722887676, "epoch": 1.1447914091895306, "grad_norm": 0.6326528191566467, "learning_rate": 9.338802206273097e-06, "loss": 1.4693, "mean_token_accuracy": 0.6499947756528854, "num_tokens": 1747163992.0, "step": 10421 }, { "entropy": 1.6991062760353088, "epoch": 1.1449012661009035, "grad_norm": 0.6131667494773865, "learning_rate": 9.337228614412688e-06, "loss": 1.351, "mean_token_accuracy": 0.6598118593295416, "num_tokens": 1747305956.0, "step": 10422 }, { "entropy": 1.7058619757493336, "epoch": 1.1450111230122766, "grad_norm": 0.7168628573417664, "learning_rate": 9.335655075178243e-06, "loss": 1.3225, "mean_token_accuracy": 0.6609494437774023, "num_tokens": 1747424468.0, "step": 10423 }, { "entropy": 1.6946961383024852, "epoch": 1.1451209799236495, "grad_norm": 0.6358611583709717, "learning_rate": 9.33408158861957e-06, "loss": 1.3029, "mean_token_accuracy": 0.6581896990537643, "num_tokens": 1747556121.0, "step": 10424 }, { "entropy": 1.782209446032842, "epoch": 1.1452308368350224, "grad_norm": 0.7476517558097839, "learning_rate": 9.33250815478646e-06, "loss": 1.3775, "mean_token_accuracy": 0.6569018463293711, "num_tokens": 1747682025.0, "step": 10425 }, { "entropy": 1.6908418933550518, "epoch": 1.1453406937463952, "grad_norm": 0.7563366293907166, "learning_rate": 9.330934773728717e-06, "loss": 1.3391, "mean_token_accuracy": 0.6665392766396204, "num_tokens": 1747809976.0, "step": 10426 }, { "entropy": 1.6805396974086761, "epoch": 1.1454505506577684, "grad_norm": 0.5949506163597107, "learning_rate": 9.32936144549614e-06, "loss": 1.4936, "mean_token_accuracy": 0.6355966081221899, "num_tokens": 1747996141.0, "step": 10427 }, { "entropy": 1.679563969373703, "epoch": 1.1455604075691412, "grad_norm": 0.6496044397354126, "learning_rate": 9.327788170138514e-06, "loss": 1.3572, "mean_token_accuracy": 0.6697363605101904, "num_tokens": 1748139185.0, "step": 10428 }, { "entropy": 1.6627104580402374, "epoch": 1.1456702644805141, "grad_norm": 0.623802661895752, "learning_rate": 9.326214947705641e-06, "loss": 1.3569, "mean_token_accuracy": 0.6598296562830607, "num_tokens": 1748306822.0, "step": 10429 }, { "entropy": 1.713492641846339, "epoch": 1.145780121391887, "grad_norm": 0.7390007972717285, "learning_rate": 9.324641778247313e-06, "loss": 1.4243, "mean_token_accuracy": 0.6549296230077744, "num_tokens": 1748528987.0, "step": 10430 }, { "entropy": 1.6868635416030884, "epoch": 1.1458899783032601, "grad_norm": 0.6720066666603088, "learning_rate": 9.323068661813315e-06, "loss": 1.3167, "mean_token_accuracy": 0.6549607117970785, "num_tokens": 1748712876.0, "step": 10431 }, { "entropy": 1.6507586737473805, "epoch": 1.145999835214633, "grad_norm": 0.6831554174423218, "learning_rate": 9.321495598453438e-06, "loss": 1.2928, "mean_token_accuracy": 0.6722377041975657, "num_tokens": 1748849425.0, "step": 10432 }, { "entropy": 1.6795489092667897, "epoch": 1.1461096921260059, "grad_norm": 0.6272848844528198, "learning_rate": 9.319922588217472e-06, "loss": 1.4739, "mean_token_accuracy": 0.6550329575935999, "num_tokens": 1749033503.0, "step": 10433 }, { "entropy": 1.6624679764111836, "epoch": 1.1462195490373788, "grad_norm": 0.7027580738067627, "learning_rate": 9.318349631155197e-06, "loss": 1.3611, "mean_token_accuracy": 0.6647091160217921, "num_tokens": 1749269891.0, "step": 10434 }, { "entropy": 1.6587688227494557, "epoch": 1.1463294059487517, "grad_norm": 0.695829451084137, "learning_rate": 9.316776727316397e-06, "loss": 1.5764, "mean_token_accuracy": 0.6489768524964651, "num_tokens": 1749462840.0, "step": 10435 }, { "entropy": 1.72525155544281, "epoch": 1.1464392628601248, "grad_norm": 0.6153085231781006, "learning_rate": 9.31520387675086e-06, "loss": 1.4573, "mean_token_accuracy": 0.6477632522583008, "num_tokens": 1749623332.0, "step": 10436 }, { "entropy": 1.6733653446038563, "epoch": 1.1465491197714976, "grad_norm": 0.6889209747314453, "learning_rate": 9.313631079508357e-06, "loss": 1.2139, "mean_token_accuracy": 0.6837521890799204, "num_tokens": 1749756254.0, "step": 10437 }, { "entropy": 1.6137581169605255, "epoch": 1.1466589766828705, "grad_norm": 0.6261329054832458, "learning_rate": 9.312058335638669e-06, "loss": 1.2555, "mean_token_accuracy": 0.6895642032225927, "num_tokens": 1749906250.0, "step": 10438 }, { "entropy": 1.6432836850484211, "epoch": 1.1467688335942434, "grad_norm": 0.6834116578102112, "learning_rate": 9.31048564519158e-06, "loss": 1.3124, "mean_token_accuracy": 0.66798102358977, "num_tokens": 1750054646.0, "step": 10439 }, { "entropy": 1.6783512830734253, "epoch": 1.1468786905056165, "grad_norm": 0.6411421895027161, "learning_rate": 9.308913008216855e-06, "loss": 1.3029, "mean_token_accuracy": 0.6628169417381287, "num_tokens": 1750191191.0, "step": 10440 }, { "entropy": 1.7019068499406178, "epoch": 1.1469885474169894, "grad_norm": 0.8326993584632874, "learning_rate": 9.30734042476427e-06, "loss": 1.4707, "mean_token_accuracy": 0.6603338221708933, "num_tokens": 1750340860.0, "step": 10441 }, { "entropy": 1.7151079376538594, "epoch": 1.1470984043283623, "grad_norm": 0.8003994822502136, "learning_rate": 9.305767894883602e-06, "loss": 1.4788, "mean_token_accuracy": 0.6640694737434387, "num_tokens": 1750456588.0, "step": 10442 }, { "entropy": 1.743663897116979, "epoch": 1.1472082612397352, "grad_norm": 0.6902558207511902, "learning_rate": 9.304195418624614e-06, "loss": 1.4213, "mean_token_accuracy": 0.6569770723581314, "num_tokens": 1750652408.0, "step": 10443 }, { "entropy": 1.6893725295861561, "epoch": 1.1473181181511083, "grad_norm": 0.650435745716095, "learning_rate": 9.302622996037074e-06, "loss": 1.3089, "mean_token_accuracy": 0.6710364570220312, "num_tokens": 1750803978.0, "step": 10444 }, { "entropy": 1.7381121218204498, "epoch": 1.1474279750624812, "grad_norm": 0.6412340402603149, "learning_rate": 9.301050627170758e-06, "loss": 1.4465, "mean_token_accuracy": 0.653094212214152, "num_tokens": 1750997989.0, "step": 10445 }, { "entropy": 1.7343399027983348, "epoch": 1.147537831973854, "grad_norm": 0.6787511706352234, "learning_rate": 9.299478312075421e-06, "loss": 1.4958, "mean_token_accuracy": 0.6529847681522369, "num_tokens": 1751158800.0, "step": 10446 }, { "entropy": 1.6631225248177846, "epoch": 1.147647688885227, "grad_norm": 0.6875215768814087, "learning_rate": 9.297906050800824e-06, "loss": 1.1925, "mean_token_accuracy": 0.6849933316310247, "num_tokens": 1751279105.0, "step": 10447 }, { "entropy": 1.7318195203940074, "epoch": 1.1477575457965998, "grad_norm": 0.8004332780838013, "learning_rate": 9.296333843396743e-06, "loss": 1.4193, "mean_token_accuracy": 0.6542117198308309, "num_tokens": 1751478243.0, "step": 10448 }, { "entropy": 1.7312237322330475, "epoch": 1.147867402707973, "grad_norm": 0.6288403868675232, "learning_rate": 9.294761689912921e-06, "loss": 1.3694, "mean_token_accuracy": 0.6531921078761419, "num_tokens": 1751639847.0, "step": 10449 }, { "entropy": 1.665016194184621, "epoch": 1.1479772596193458, "grad_norm": 0.7054689526557922, "learning_rate": 9.293189590399126e-06, "loss": 1.5536, "mean_token_accuracy": 0.6347174296776453, "num_tokens": 1751821535.0, "step": 10450 }, { "entropy": 1.6455471416314442, "epoch": 1.1480871165307187, "grad_norm": 0.6462990641593933, "learning_rate": 9.291617544905112e-06, "loss": 1.2751, "mean_token_accuracy": 0.6752283871173859, "num_tokens": 1751950364.0, "step": 10451 }, { "entropy": 1.6657202740510304, "epoch": 1.1481969734420916, "grad_norm": 0.6472091674804688, "learning_rate": 9.29004555348063e-06, "loss": 1.4169, "mean_token_accuracy": 0.6555340985457102, "num_tokens": 1752107814.0, "step": 10452 }, { "entropy": 1.7669294873873393, "epoch": 1.1483068303534647, "grad_norm": 0.7394276857376099, "learning_rate": 9.288473616175438e-06, "loss": 1.3615, "mean_token_accuracy": 0.6475641032059988, "num_tokens": 1752250559.0, "step": 10453 }, { "entropy": 1.6974846025307972, "epoch": 1.1484166872648376, "grad_norm": 0.7621778845787048, "learning_rate": 9.286901733039286e-06, "loss": 1.4146, "mean_token_accuracy": 0.6701687673727671, "num_tokens": 1752411368.0, "step": 10454 }, { "entropy": 1.7656433582305908, "epoch": 1.1485265441762105, "grad_norm": 0.7843154072761536, "learning_rate": 9.285329904121918e-06, "loss": 1.3917, "mean_token_accuracy": 0.658236563205719, "num_tokens": 1752544724.0, "step": 10455 }, { "entropy": 1.7151671946048737, "epoch": 1.1486364010875834, "grad_norm": 0.6612775325775146, "learning_rate": 9.283758129473088e-06, "loss": 1.3915, "mean_token_accuracy": 0.6535660674174627, "num_tokens": 1752679479.0, "step": 10456 }, { "entropy": 1.7036021947860718, "epoch": 1.1487462579989565, "grad_norm": 0.6847598552703857, "learning_rate": 9.282186409142542e-06, "loss": 1.3911, "mean_token_accuracy": 0.6572980483373007, "num_tokens": 1752868566.0, "step": 10457 }, { "entropy": 1.7341304918130238, "epoch": 1.1488561149103294, "grad_norm": 0.66192227602005, "learning_rate": 9.280614743180019e-06, "loss": 1.4441, "mean_token_accuracy": 0.6463885257641474, "num_tokens": 1753043553.0, "step": 10458 }, { "entropy": 1.7356309394041698, "epoch": 1.1489659718217022, "grad_norm": 0.7760790586471558, "learning_rate": 9.279043131635266e-06, "loss": 1.483, "mean_token_accuracy": 0.6427653779586157, "num_tokens": 1753185343.0, "step": 10459 }, { "entropy": 1.7046063840389252, "epoch": 1.1490758287330751, "grad_norm": 0.6848695874214172, "learning_rate": 9.277471574558023e-06, "loss": 1.4344, "mean_token_accuracy": 0.6615995417038599, "num_tokens": 1753355296.0, "step": 10460 }, { "entropy": 1.7546161313851674, "epoch": 1.149185685644448, "grad_norm": 0.6744615435600281, "learning_rate": 9.275900071998028e-06, "loss": 1.3556, "mean_token_accuracy": 0.6578517059485117, "num_tokens": 1753553622.0, "step": 10461 }, { "entropy": 1.7109164694945018, "epoch": 1.1492955425558211, "grad_norm": 0.6636914014816284, "learning_rate": 9.274328624005019e-06, "loss": 1.3815, "mean_token_accuracy": 0.6661298722028732, "num_tokens": 1753736512.0, "step": 10462 }, { "entropy": 1.6936496595541637, "epoch": 1.149405399467194, "grad_norm": 0.7392176389694214, "learning_rate": 9.272757230628731e-06, "loss": 1.5186, "mean_token_accuracy": 0.6480444173018137, "num_tokens": 1753937725.0, "step": 10463 }, { "entropy": 1.7084623177846272, "epoch": 1.1495152563785669, "grad_norm": 0.7401105165481567, "learning_rate": 9.271185891918896e-06, "loss": 1.3537, "mean_token_accuracy": 0.6713838477929434, "num_tokens": 1754103003.0, "step": 10464 }, { "entropy": 1.686184932788213, "epoch": 1.1496251132899398, "grad_norm": 0.6153541803359985, "learning_rate": 9.269614607925255e-06, "loss": 1.5945, "mean_token_accuracy": 0.6229482889175415, "num_tokens": 1754331188.0, "step": 10465 }, { "entropy": 1.7238997519016266, "epoch": 1.1497349702013129, "grad_norm": 0.7695441246032715, "learning_rate": 9.268043378697527e-06, "loss": 1.4725, "mean_token_accuracy": 0.6536758492390314, "num_tokens": 1754507673.0, "step": 10466 }, { "entropy": 1.710547149181366, "epoch": 1.1498448271126858, "grad_norm": 0.6528117060661316, "learning_rate": 9.266472204285443e-06, "loss": 1.3943, "mean_token_accuracy": 0.6460304210583369, "num_tokens": 1754693959.0, "step": 10467 }, { "entropy": 1.6860848863919575, "epoch": 1.1499546840240586, "grad_norm": 0.6279901266098022, "learning_rate": 9.264901084738737e-06, "loss": 1.3695, "mean_token_accuracy": 0.6599749426047007, "num_tokens": 1754862001.0, "step": 10468 }, { "entropy": 1.7022302746772766, "epoch": 1.1500645409354315, "grad_norm": 0.6494450569152832, "learning_rate": 9.263330020107131e-06, "loss": 1.3224, "mean_token_accuracy": 0.6655841370423635, "num_tokens": 1755021180.0, "step": 10469 }, { "entropy": 1.7287100454171498, "epoch": 1.1501743978468046, "grad_norm": 0.8179412484169006, "learning_rate": 9.261759010440343e-06, "loss": 1.4208, "mean_token_accuracy": 0.6409466514984766, "num_tokens": 1755199159.0, "step": 10470 }, { "entropy": 1.6620949506759644, "epoch": 1.1502842547581775, "grad_norm": 0.7252711057662964, "learning_rate": 9.260188055788104e-06, "loss": 1.3515, "mean_token_accuracy": 0.6578169663747152, "num_tokens": 1755368309.0, "step": 10471 }, { "entropy": 1.6833390891551971, "epoch": 1.1503941116695504, "grad_norm": 0.6849291324615479, "learning_rate": 9.258617156200127e-06, "loss": 1.3125, "mean_token_accuracy": 0.6736855655908585, "num_tokens": 1755533771.0, "step": 10472 }, { "entropy": 1.7997891108194988, "epoch": 1.1505039685809233, "grad_norm": 0.7142224907875061, "learning_rate": 9.257046311726128e-06, "loss": 1.4591, "mean_token_accuracy": 0.6437089890241623, "num_tokens": 1755708002.0, "step": 10473 }, { "entropy": 1.7047271529833476, "epoch": 1.1506138254922962, "grad_norm": 0.700008749961853, "learning_rate": 9.255475522415834e-06, "loss": 1.4242, "mean_token_accuracy": 0.6596666872501373, "num_tokens": 1755849614.0, "step": 10474 }, { "entropy": 1.7262776792049408, "epoch": 1.1507236824036693, "grad_norm": 0.6418355703353882, "learning_rate": 9.25390478831895e-06, "loss": 1.4052, "mean_token_accuracy": 0.6510264078776041, "num_tokens": 1756022118.0, "step": 10475 }, { "entropy": 1.7786755760510762, "epoch": 1.1508335393150422, "grad_norm": 0.9499866962432861, "learning_rate": 9.252334109485193e-06, "loss": 1.6351, "mean_token_accuracy": 0.632032627860705, "num_tokens": 1756176047.0, "step": 10476 }, { "entropy": 1.7582121590773265, "epoch": 1.150943396226415, "grad_norm": 2.6958770751953125, "learning_rate": 9.250763485964276e-06, "loss": 1.1807, "mean_token_accuracy": 0.6710349669059118, "num_tokens": 1756372478.0, "step": 10477 }, { "entropy": 1.7144115070501964, "epoch": 1.151053253137788, "grad_norm": 0.6320227384567261, "learning_rate": 9.249192917805905e-06, "loss": 1.3936, "mean_token_accuracy": 0.6546385983626047, "num_tokens": 1756525920.0, "step": 10478 }, { "entropy": 1.749243050813675, "epoch": 1.151163110049161, "grad_norm": 0.7566484212875366, "learning_rate": 9.247622405059786e-06, "loss": 1.414, "mean_token_accuracy": 0.6681007444858551, "num_tokens": 1756682079.0, "step": 10479 }, { "entropy": 1.7483246127764385, "epoch": 1.151272966960534, "grad_norm": 0.7398757934570312, "learning_rate": 9.246051947775635e-06, "loss": 1.4072, "mean_token_accuracy": 0.6565983096758524, "num_tokens": 1756881456.0, "step": 10480 }, { "entropy": 1.7031634449958801, "epoch": 1.1513828238719068, "grad_norm": 0.6389073133468628, "learning_rate": 9.244481546003146e-06, "loss": 1.3583, "mean_token_accuracy": 0.660191277662913, "num_tokens": 1757043583.0, "step": 10481 }, { "entropy": 1.6907376945018768, "epoch": 1.1514926807832797, "grad_norm": 0.6452929377555847, "learning_rate": 9.242911199792024e-06, "loss": 1.3741, "mean_token_accuracy": 0.6537407586971918, "num_tokens": 1757198541.0, "step": 10482 }, { "entropy": 1.7258997162183125, "epoch": 1.1516025376946528, "grad_norm": 0.6563553810119629, "learning_rate": 9.24134090919197e-06, "loss": 1.3968, "mean_token_accuracy": 0.6483776172002157, "num_tokens": 1757403114.0, "step": 10483 }, { "entropy": 1.5953759948412578, "epoch": 1.1517123946060257, "grad_norm": 1.8490371704101562, "learning_rate": 9.239770674252689e-06, "loss": 1.0496, "mean_token_accuracy": 0.7037697086731592, "num_tokens": 1757563792.0, "step": 10484 }, { "entropy": 1.6668463846047719, "epoch": 1.1518222515173986, "grad_norm": 0.696306049823761, "learning_rate": 9.238200495023867e-06, "loss": 1.4716, "mean_token_accuracy": 0.6465398073196411, "num_tokens": 1757728133.0, "step": 10485 }, { "entropy": 1.702815721432368, "epoch": 1.1519321084287715, "grad_norm": 0.7206531167030334, "learning_rate": 9.236630371555208e-06, "loss": 1.354, "mean_token_accuracy": 0.669020434220632, "num_tokens": 1757887891.0, "step": 10486 }, { "entropy": 1.6901472806930542, "epoch": 1.1520419653401444, "grad_norm": 0.6455077528953552, "learning_rate": 9.235060303896404e-06, "loss": 1.4898, "mean_token_accuracy": 0.6457482799887657, "num_tokens": 1758055648.0, "step": 10487 }, { "entropy": 1.6960370043913524, "epoch": 1.1521518222515175, "grad_norm": 0.7689752578735352, "learning_rate": 9.233490292097143e-06, "loss": 1.475, "mean_token_accuracy": 0.6641001453002294, "num_tokens": 1758224944.0, "step": 10488 }, { "entropy": 1.6482413212458293, "epoch": 1.1522616791628904, "grad_norm": 0.6643248796463013, "learning_rate": 9.231920336207123e-06, "loss": 1.3675, "mean_token_accuracy": 0.6523696879545847, "num_tokens": 1758393832.0, "step": 10489 }, { "entropy": 1.752677987019221, "epoch": 1.1523715360742632, "grad_norm": 0.684615969657898, "learning_rate": 9.230350436276026e-06, "loss": 1.3543, "mean_token_accuracy": 0.663479283452034, "num_tokens": 1758530887.0, "step": 10490 }, { "entropy": 1.6707546810309093, "epoch": 1.1524813929856361, "grad_norm": 0.5739973783493042, "learning_rate": 9.228780592353538e-06, "loss": 1.3706, "mean_token_accuracy": 0.6499018023411433, "num_tokens": 1758710774.0, "step": 10491 }, { "entropy": 1.7167290846506755, "epoch": 1.1525912498970092, "grad_norm": 0.6053609848022461, "learning_rate": 9.227210804489348e-06, "loss": 1.4804, "mean_token_accuracy": 0.6375697354475657, "num_tokens": 1758891290.0, "step": 10492 }, { "entropy": 1.7572990953922272, "epoch": 1.1527011068083821, "grad_norm": 0.8494213819503784, "learning_rate": 9.225641072733136e-06, "loss": 1.6531, "mean_token_accuracy": 0.6410497824350992, "num_tokens": 1759083232.0, "step": 10493 }, { "entropy": 1.7099438905715942, "epoch": 1.152810963719755, "grad_norm": 0.6985329389572144, "learning_rate": 9.224071397134585e-06, "loss": 1.548, "mean_token_accuracy": 0.6346048961083094, "num_tokens": 1759304006.0, "step": 10494 }, { "entropy": 1.7394586006800334, "epoch": 1.1529208206311279, "grad_norm": 0.5961000919342041, "learning_rate": 9.222501777743375e-06, "loss": 1.2539, "mean_token_accuracy": 0.6759810944398245, "num_tokens": 1759443975.0, "step": 10495 }, { "entropy": 1.6950910985469818, "epoch": 1.153030677542501, "grad_norm": 0.6235581636428833, "learning_rate": 9.220932214609181e-06, "loss": 1.2665, "mean_token_accuracy": 0.6792470415433248, "num_tokens": 1759580860.0, "step": 10496 }, { "entropy": 1.7201534907023113, "epoch": 1.1531405344538739, "grad_norm": 0.6948989629745483, "learning_rate": 9.21936270778168e-06, "loss": 1.3425, "mean_token_accuracy": 0.6627224882443746, "num_tokens": 1759735334.0, "step": 10497 }, { "entropy": 1.655206690231959, "epoch": 1.1532503913652468, "grad_norm": 0.5705309510231018, "learning_rate": 9.217793257310552e-06, "loss": 1.3007, "mean_token_accuracy": 0.676471064488093, "num_tokens": 1759958339.0, "step": 10498 }, { "entropy": 1.708439866701762, "epoch": 1.1533602482766196, "grad_norm": 0.7828124761581421, "learning_rate": 9.216223863245459e-06, "loss": 1.5709, "mean_token_accuracy": 0.6466249401370684, "num_tokens": 1760152326.0, "step": 10499 }, { "entropy": 1.7445420026779175, "epoch": 1.1534701051879925, "grad_norm": 0.7042776346206665, "learning_rate": 9.214654525636078e-06, "loss": 1.3096, "mean_token_accuracy": 0.6746334433555603, "num_tokens": 1760290174.0, "step": 10500 }, { "entropy": 1.681450366973877, "epoch": 1.1535799620993656, "grad_norm": 0.6809564828872681, "learning_rate": 9.21308524453208e-06, "loss": 1.2993, "mean_token_accuracy": 0.6653634657462438, "num_tokens": 1760441480.0, "step": 10501 }, { "entropy": 1.7187366684277852, "epoch": 1.1536898190107385, "grad_norm": 0.57969069480896, "learning_rate": 9.211516019983127e-06, "loss": 1.3651, "mean_token_accuracy": 0.6566129624843597, "num_tokens": 1760598812.0, "step": 10502 }, { "entropy": 1.7298386891682942, "epoch": 1.1537996759221114, "grad_norm": 0.7230368256568909, "learning_rate": 9.209946852038882e-06, "loss": 1.5234, "mean_token_accuracy": 0.6430017203092575, "num_tokens": 1760754936.0, "step": 10503 }, { "entropy": 1.7077820599079132, "epoch": 1.1539095328334843, "grad_norm": 0.7134849429130554, "learning_rate": 9.20837774074902e-06, "loss": 1.3521, "mean_token_accuracy": 0.6613487799962362, "num_tokens": 1760918109.0, "step": 10504 }, { "entropy": 1.6505942145983379, "epoch": 1.1540193897448574, "grad_norm": 0.649359405040741, "learning_rate": 9.20680868616319e-06, "loss": 1.5086, "mean_token_accuracy": 0.6458548208077749, "num_tokens": 1761083688.0, "step": 10505 }, { "entropy": 1.636115938425064, "epoch": 1.1541292466562303, "grad_norm": 0.7054543495178223, "learning_rate": 9.205239688331056e-06, "loss": 1.415, "mean_token_accuracy": 0.6505183627208074, "num_tokens": 1761280525.0, "step": 10506 }, { "entropy": 1.7434692879517872, "epoch": 1.1542391035676032, "grad_norm": 0.7469452619552612, "learning_rate": 9.203670747302283e-06, "loss": 1.3334, "mean_token_accuracy": 0.6721020837624868, "num_tokens": 1761449006.0, "step": 10507 }, { "entropy": 1.71237579981486, "epoch": 1.154348960478976, "grad_norm": 0.799959659576416, "learning_rate": 9.202101863126516e-06, "loss": 1.5013, "mean_token_accuracy": 0.6525566975275675, "num_tokens": 1761612171.0, "step": 10508 }, { "entropy": 1.7190298636754353, "epoch": 1.1544588173903492, "grad_norm": 0.7386515140533447, "learning_rate": 9.200533035853414e-06, "loss": 1.3877, "mean_token_accuracy": 0.6623720477024714, "num_tokens": 1761765552.0, "step": 10509 }, { "entropy": 1.708103507757187, "epoch": 1.154568674301722, "grad_norm": 0.7165181040763855, "learning_rate": 9.198964265532638e-06, "loss": 1.372, "mean_token_accuracy": 0.6593515028556188, "num_tokens": 1761922081.0, "step": 10510 }, { "entropy": 1.682081123193105, "epoch": 1.154678531213095, "grad_norm": 0.7013752460479736, "learning_rate": 9.197395552213823e-06, "loss": 1.384, "mean_token_accuracy": 0.6498614301284155, "num_tokens": 1762114261.0, "step": 10511 }, { "entropy": 1.7438469529151917, "epoch": 1.1547883881244678, "grad_norm": 0.8890546560287476, "learning_rate": 9.195826895946629e-06, "loss": 1.6896, "mean_token_accuracy": 0.6446986744801203, "num_tokens": 1762363635.0, "step": 10512 }, { "entropy": 1.7093018889427185, "epoch": 1.1548982450358407, "grad_norm": 0.7026628255844116, "learning_rate": 9.194258296780705e-06, "loss": 1.34, "mean_token_accuracy": 0.6579982489347458, "num_tokens": 1762525972.0, "step": 10513 }, { "entropy": 1.6976705988248189, "epoch": 1.1550081019472138, "grad_norm": 0.8118287324905396, "learning_rate": 9.19268975476569e-06, "loss": 1.225, "mean_token_accuracy": 0.673242911696434, "num_tokens": 1762630219.0, "step": 10514 }, { "entropy": 1.7051588793595631, "epoch": 1.1551179588585867, "grad_norm": 0.6836156249046326, "learning_rate": 9.191121269951226e-06, "loss": 1.3528, "mean_token_accuracy": 0.6582736670970917, "num_tokens": 1762794612.0, "step": 10515 }, { "entropy": 1.7200697461764018, "epoch": 1.1552278157699596, "grad_norm": 0.6674354076385498, "learning_rate": 9.189552842386964e-06, "loss": 1.2994, "mean_token_accuracy": 0.6657363077004751, "num_tokens": 1762965830.0, "step": 10516 }, { "entropy": 1.7609045306841533, "epoch": 1.1553376726813325, "grad_norm": 0.6270747780799866, "learning_rate": 9.187984472122535e-06, "loss": 1.4016, "mean_token_accuracy": 0.6604256083567938, "num_tokens": 1763128044.0, "step": 10517 }, { "entropy": 1.659463216861089, "epoch": 1.1554475295927056, "grad_norm": 0.7091048955917358, "learning_rate": 9.186416159207582e-06, "loss": 1.4481, "mean_token_accuracy": 0.6355709036191305, "num_tokens": 1763315602.0, "step": 10518 }, { "entropy": 1.7180581390857697, "epoch": 1.1555573865040785, "grad_norm": 0.7278168797492981, "learning_rate": 9.184847903691743e-06, "loss": 1.3962, "mean_token_accuracy": 0.6445047954718272, "num_tokens": 1763462039.0, "step": 10519 }, { "entropy": 1.7248013814290364, "epoch": 1.1556672434154514, "grad_norm": 0.98234623670578, "learning_rate": 9.183279705624645e-06, "loss": 1.3433, "mean_token_accuracy": 0.6596641639868418, "num_tokens": 1763596149.0, "step": 10520 }, { "entropy": 1.7311313549677532, "epoch": 1.1557771003268242, "grad_norm": 0.6669163107872009, "learning_rate": 9.181711565055927e-06, "loss": 1.4256, "mean_token_accuracy": 0.6505987147490183, "num_tokens": 1763791721.0, "step": 10521 }, { "entropy": 1.722319593032201, "epoch": 1.1558869572381973, "grad_norm": 0.6654046177864075, "learning_rate": 9.180143482035223e-06, "loss": 1.3032, "mean_token_accuracy": 0.6619775195916494, "num_tokens": 1763927643.0, "step": 10522 }, { "entropy": 1.7048958043257396, "epoch": 1.1559968141495702, "grad_norm": 0.6961905360221863, "learning_rate": 9.178575456612154e-06, "loss": 1.2973, "mean_token_accuracy": 0.6727237900098165, "num_tokens": 1764080454.0, "step": 10523 }, { "entropy": 1.7435453335444133, "epoch": 1.1561066710609431, "grad_norm": 0.7452827095985413, "learning_rate": 9.177007488836354e-06, "loss": 1.3283, "mean_token_accuracy": 0.670777623852094, "num_tokens": 1764211025.0, "step": 10524 }, { "entropy": 1.7101693550745647, "epoch": 1.156216527972316, "grad_norm": 1.0145291090011597, "learning_rate": 9.175439578757442e-06, "loss": 1.698, "mean_token_accuracy": 0.6353831539551417, "num_tokens": 1764418930.0, "step": 10525 }, { "entropy": 1.766825556755066, "epoch": 1.1563263848836889, "grad_norm": 0.7082020044326782, "learning_rate": 9.173871726425045e-06, "loss": 1.4726, "mean_token_accuracy": 0.6487270891666412, "num_tokens": 1764572213.0, "step": 10526 }, { "entropy": 1.7133028507232666, "epoch": 1.156436241795062, "grad_norm": 0.7147353887557983, "learning_rate": 9.17230393188879e-06, "loss": 1.2518, "mean_token_accuracy": 0.6745504637559255, "num_tokens": 1764723324.0, "step": 10527 }, { "entropy": 1.6591166456540425, "epoch": 1.1565460987064349, "grad_norm": 0.7346095442771912, "learning_rate": 9.170736195198287e-06, "loss": 1.4666, "mean_token_accuracy": 0.6519047121206919, "num_tokens": 1764898490.0, "step": 10528 }, { "entropy": 1.6714920202891033, "epoch": 1.1566559556178078, "grad_norm": 0.6665278673171997, "learning_rate": 9.169168516403158e-06, "loss": 1.3278, "mean_token_accuracy": 0.6733681559562683, "num_tokens": 1765035645.0, "step": 10529 }, { "entropy": 1.7320491870244343, "epoch": 1.1567658125291806, "grad_norm": 0.726340651512146, "learning_rate": 9.167600895553024e-06, "loss": 1.3063, "mean_token_accuracy": 0.6701697111129761, "num_tokens": 1765181838.0, "step": 10530 }, { "entropy": 1.6742952664693196, "epoch": 1.1568756694405538, "grad_norm": 0.9233806729316711, "learning_rate": 9.166033332697495e-06, "loss": 1.3761, "mean_token_accuracy": 0.66909788052241, "num_tokens": 1765313129.0, "step": 10531 }, { "entropy": 1.680614411830902, "epoch": 1.1569855263519266, "grad_norm": 0.6643198132514954, "learning_rate": 9.164465827886184e-06, "loss": 1.3118, "mean_token_accuracy": 0.6700858275095621, "num_tokens": 1765454598.0, "step": 10532 }, { "entropy": 1.7060332397619884, "epoch": 1.1570953832632995, "grad_norm": 0.6335230469703674, "learning_rate": 9.162898381168705e-06, "loss": 1.3871, "mean_token_accuracy": 0.6586262285709381, "num_tokens": 1765649572.0, "step": 10533 }, { "entropy": 1.71233864625295, "epoch": 1.1572052401746724, "grad_norm": 0.6753906011581421, "learning_rate": 9.161330992594662e-06, "loss": 1.4858, "mean_token_accuracy": 0.6469430774450302, "num_tokens": 1765840368.0, "step": 10534 }, { "entropy": 1.7542717456817627, "epoch": 1.1573150970860455, "grad_norm": 0.6666431427001953, "learning_rate": 9.159763662213664e-06, "loss": 1.4361, "mean_token_accuracy": 0.648838589588801, "num_tokens": 1765994363.0, "step": 10535 }, { "entropy": 1.6961637834707897, "epoch": 1.1574249539974184, "grad_norm": 0.7073807120323181, "learning_rate": 9.158196390075319e-06, "loss": 1.3327, "mean_token_accuracy": 0.6611540814240774, "num_tokens": 1766109247.0, "step": 10536 }, { "entropy": 1.680997868378957, "epoch": 1.1575348109087913, "grad_norm": 0.7748100757598877, "learning_rate": 9.156629176229225e-06, "loss": 1.3899, "mean_token_accuracy": 0.6582418978214264, "num_tokens": 1766252569.0, "step": 10537 }, { "entropy": 1.7486574749151866, "epoch": 1.1576446678201642, "grad_norm": 0.7355571389198303, "learning_rate": 9.15506202072499e-06, "loss": 1.4158, "mean_token_accuracy": 0.6534449557463328, "num_tokens": 1766416656.0, "step": 10538 }, { "entropy": 1.7009160220623016, "epoch": 1.157754524731537, "grad_norm": 0.6310091614723206, "learning_rate": 9.153494923612212e-06, "loss": 1.4498, "mean_token_accuracy": 0.6585008750359217, "num_tokens": 1766557961.0, "step": 10539 }, { "entropy": 1.7415493031342824, "epoch": 1.1578643816429102, "grad_norm": 0.6227964758872986, "learning_rate": 9.151927884940486e-06, "loss": 1.4646, "mean_token_accuracy": 0.6527653783559799, "num_tokens": 1766779349.0, "step": 10540 }, { "entropy": 1.7468430002530415, "epoch": 1.157974238554283, "grad_norm": 0.7541377544403076, "learning_rate": 9.150360904759405e-06, "loss": 1.3462, "mean_token_accuracy": 0.668052484591802, "num_tokens": 1766924931.0, "step": 10541 }, { "entropy": 1.7139411966005962, "epoch": 1.158084095465656, "grad_norm": 0.8286843299865723, "learning_rate": 9.148793983118574e-06, "loss": 1.4628, "mean_token_accuracy": 0.6435067802667618, "num_tokens": 1767096773.0, "step": 10542 }, { "entropy": 1.732242186864217, "epoch": 1.1581939523770288, "grad_norm": 0.7068530917167664, "learning_rate": 9.147227120067576e-06, "loss": 1.3388, "mean_token_accuracy": 0.6692612071832021, "num_tokens": 1767257451.0, "step": 10543 }, { "entropy": 1.7326354285081227, "epoch": 1.158303809288402, "grad_norm": 0.8493311405181885, "learning_rate": 9.145660315656006e-06, "loss": 1.4362, "mean_token_accuracy": 0.6661918113629023, "num_tokens": 1767412405.0, "step": 10544 }, { "entropy": 1.797954519589742, "epoch": 1.1584136661997748, "grad_norm": 0.8520449995994568, "learning_rate": 9.144093569933454e-06, "loss": 1.5181, "mean_token_accuracy": 0.6419855256875356, "num_tokens": 1767572561.0, "step": 10545 }, { "entropy": 1.704055945078532, "epoch": 1.1585235231111477, "grad_norm": 0.6789255738258362, "learning_rate": 9.142526882949501e-06, "loss": 1.4423, "mean_token_accuracy": 0.6528183867534002, "num_tokens": 1767792584.0, "step": 10546 }, { "entropy": 1.7003831168015797, "epoch": 1.1586333800225206, "grad_norm": 0.6809309720993042, "learning_rate": 9.140960254753733e-06, "loss": 1.3725, "mean_token_accuracy": 0.6628607759873072, "num_tokens": 1767932953.0, "step": 10547 }, { "entropy": 1.7150506675243378, "epoch": 1.1587432369338937, "grad_norm": 0.9768050312995911, "learning_rate": 9.13939368539574e-06, "loss": 1.3804, "mean_token_accuracy": 0.6604229360818863, "num_tokens": 1768075316.0, "step": 10548 }, { "entropy": 1.6081635057926178, "epoch": 1.1588530938452666, "grad_norm": 0.6204017400741577, "learning_rate": 9.137827174925095e-06, "loss": 1.4556, "mean_token_accuracy": 0.6706610669692358, "num_tokens": 1768245765.0, "step": 10549 }, { "entropy": 1.6946631868680317, "epoch": 1.1589629507566395, "grad_norm": 0.6550582647323608, "learning_rate": 9.136260723391383e-06, "loss": 1.3747, "mean_token_accuracy": 0.6594513903061548, "num_tokens": 1768426866.0, "step": 10550 }, { "entropy": 1.6690677801767986, "epoch": 1.1590728076680124, "grad_norm": 0.7413309216499329, "learning_rate": 9.13469433084418e-06, "loss": 1.3875, "mean_token_accuracy": 0.653538167476654, "num_tokens": 1768621316.0, "step": 10551 }, { "entropy": 1.675765037536621, "epoch": 1.1591826645793852, "grad_norm": 0.7278109192848206, "learning_rate": 9.13312799733306e-06, "loss": 1.2493, "mean_token_accuracy": 0.6701826651891073, "num_tokens": 1768773290.0, "step": 10552 }, { "entropy": 1.6604806085427601, "epoch": 1.1592925214907583, "grad_norm": 0.7603628635406494, "learning_rate": 9.131561722907593e-06, "loss": 1.2599, "mean_token_accuracy": 0.6719126949707667, "num_tokens": 1768919033.0, "step": 10553 }, { "entropy": 1.7303833464781444, "epoch": 1.1594023784021312, "grad_norm": 0.71886146068573, "learning_rate": 9.129995507617362e-06, "loss": 1.6364, "mean_token_accuracy": 0.6442072639862696, "num_tokens": 1769065412.0, "step": 10554 }, { "entropy": 1.7273939549922943, "epoch": 1.1595122353135041, "grad_norm": 0.7979735732078552, "learning_rate": 9.128429351511929e-06, "loss": 1.4078, "mean_token_accuracy": 0.6563597470521927, "num_tokens": 1769204785.0, "step": 10555 }, { "entropy": 1.7173643112182617, "epoch": 1.1596220922248772, "grad_norm": 0.5847103595733643, "learning_rate": 9.126863254640863e-06, "loss": 1.4891, "mean_token_accuracy": 0.6459435870250066, "num_tokens": 1769424428.0, "step": 10556 }, { "entropy": 1.7192479570706685, "epoch": 1.15973194913625, "grad_norm": 0.7932802438735962, "learning_rate": 9.12529721705373e-06, "loss": 1.538, "mean_token_accuracy": 0.6482977941632271, "num_tokens": 1769575003.0, "step": 10557 }, { "entropy": 1.662235786517461, "epoch": 1.159841806047623, "grad_norm": 0.7710309028625488, "learning_rate": 9.123731238800098e-06, "loss": 1.4101, "mean_token_accuracy": 0.6681878517071406, "num_tokens": 1769738956.0, "step": 10558 }, { "entropy": 1.753949224948883, "epoch": 1.1599516629589959, "grad_norm": 0.7551962733268738, "learning_rate": 9.122165319929521e-06, "loss": 1.504, "mean_token_accuracy": 0.6444283723831177, "num_tokens": 1769908470.0, "step": 10559 }, { "entropy": 1.7544064223766327, "epoch": 1.1600615198703688, "grad_norm": 0.7502493262290955, "learning_rate": 9.120599460491572e-06, "loss": 1.3027, "mean_token_accuracy": 0.679939478635788, "num_tokens": 1770051861.0, "step": 10560 }, { "entropy": 1.728769302368164, "epoch": 1.1601713767817419, "grad_norm": 0.6876187920570374, "learning_rate": 9.119033660535802e-06, "loss": 1.3227, "mean_token_accuracy": 0.6703376968701681, "num_tokens": 1770199848.0, "step": 10561 }, { "entropy": 1.7336504260698955, "epoch": 1.1602812336931148, "grad_norm": 0.8140459060668945, "learning_rate": 9.117467920111767e-06, "loss": 1.2416, "mean_token_accuracy": 0.6855403482913971, "num_tokens": 1770304890.0, "step": 10562 }, { "entropy": 1.723543256521225, "epoch": 1.1603910906044876, "grad_norm": 0.8142033219337463, "learning_rate": 9.115902239269026e-06, "loss": 1.4238, "mean_token_accuracy": 0.6583587676286697, "num_tokens": 1770475091.0, "step": 10563 }, { "entropy": 1.7559408446153004, "epoch": 1.1605009475158605, "grad_norm": 0.708025336265564, "learning_rate": 9.114336618057126e-06, "loss": 1.3814, "mean_token_accuracy": 0.6672280778487524, "num_tokens": 1770627827.0, "step": 10564 }, { "entropy": 1.7441905339558919, "epoch": 1.1606108044272334, "grad_norm": 0.6231316328048706, "learning_rate": 9.112771056525625e-06, "loss": 1.3323, "mean_token_accuracy": 0.6605872611204783, "num_tokens": 1770819185.0, "step": 10565 }, { "entropy": 1.7347522576649983, "epoch": 1.1607206613386065, "grad_norm": 0.6096704602241516, "learning_rate": 9.111205554724071e-06, "loss": 1.3962, "mean_token_accuracy": 0.6540986547867457, "num_tokens": 1770986955.0, "step": 10566 }, { "entropy": 1.698427716890971, "epoch": 1.1608305182499794, "grad_norm": 0.6909480690956116, "learning_rate": 9.109640112702009e-06, "loss": 1.3849, "mean_token_accuracy": 0.6506121506293615, "num_tokens": 1771145897.0, "step": 10567 }, { "entropy": 1.6268266638120015, "epoch": 1.1609403751613523, "grad_norm": 0.5729960203170776, "learning_rate": 9.108074730508985e-06, "loss": 1.324, "mean_token_accuracy": 0.6655399600664774, "num_tokens": 1771300536.0, "step": 10568 }, { "entropy": 1.7214660545190175, "epoch": 1.1610502320727254, "grad_norm": 0.6441773772239685, "learning_rate": 9.106509408194543e-06, "loss": 1.2798, "mean_token_accuracy": 0.665215253829956, "num_tokens": 1771434573.0, "step": 10569 }, { "entropy": 1.7466843525568645, "epoch": 1.1611600889840983, "grad_norm": 0.6851255297660828, "learning_rate": 9.104944145808228e-06, "loss": 1.3357, "mean_token_accuracy": 0.6567875295877457, "num_tokens": 1771601038.0, "step": 10570 }, { "entropy": 1.7371169924736023, "epoch": 1.1612699458954712, "grad_norm": 0.8068298697471619, "learning_rate": 9.103378943399572e-06, "loss": 1.4549, "mean_token_accuracy": 0.646860788265864, "num_tokens": 1771772926.0, "step": 10571 }, { "entropy": 1.7305392722288768, "epoch": 1.161379802806844, "grad_norm": 0.7280715703964233, "learning_rate": 9.101813801018125e-06, "loss": 1.3148, "mean_token_accuracy": 0.6666086862484614, "num_tokens": 1771920615.0, "step": 10572 }, { "entropy": 1.7404690285523732, "epoch": 1.161489659718217, "grad_norm": 0.9056682586669922, "learning_rate": 9.100248718713406e-06, "loss": 1.4988, "mean_token_accuracy": 0.6431169708569845, "num_tokens": 1772118214.0, "step": 10573 }, { "entropy": 1.7107574343681335, "epoch": 1.16159951662959, "grad_norm": 0.663151204586029, "learning_rate": 9.098683696534964e-06, "loss": 1.4899, "mean_token_accuracy": 0.6537577112515768, "num_tokens": 1772305129.0, "step": 10574 }, { "entropy": 1.7315536936124165, "epoch": 1.161709373540963, "grad_norm": 0.6387749910354614, "learning_rate": 9.09711873453233e-06, "loss": 1.3131, "mean_token_accuracy": 0.659678096572558, "num_tokens": 1772445507.0, "step": 10575 }, { "entropy": 1.7294196883837383, "epoch": 1.1618192304523358, "grad_norm": 0.6368371844291687, "learning_rate": 9.095553832755026e-06, "loss": 1.4576, "mean_token_accuracy": 0.6409991731246313, "num_tokens": 1772685191.0, "step": 10576 }, { "entropy": 1.738920897245407, "epoch": 1.1619290873637087, "grad_norm": 0.5927242636680603, "learning_rate": 9.093988991252585e-06, "loss": 1.3886, "mean_token_accuracy": 0.6494887272516886, "num_tokens": 1772865464.0, "step": 10577 }, { "entropy": 1.6951357523600261, "epoch": 1.1620389442750816, "grad_norm": 0.7614024877548218, "learning_rate": 9.092424210074537e-06, "loss": 1.4212, "mean_token_accuracy": 0.6512744178374609, "num_tokens": 1773014220.0, "step": 10578 }, { "entropy": 1.6392890711625416, "epoch": 1.1621488011864547, "grad_norm": 0.6055826544761658, "learning_rate": 9.090859489270399e-06, "loss": 1.3891, "mean_token_accuracy": 0.655634676416715, "num_tokens": 1773199400.0, "step": 10579 }, { "entropy": 1.6917970776557922, "epoch": 1.1622586580978276, "grad_norm": 0.6927284002304077, "learning_rate": 9.0892948288897e-06, "loss": 1.3529, "mean_token_accuracy": 0.6633793711662292, "num_tokens": 1773382971.0, "step": 10580 }, { "entropy": 1.691469391187032, "epoch": 1.1623685150092005, "grad_norm": 0.7069520950317383, "learning_rate": 9.087730228981959e-06, "loss": 1.3401, "mean_token_accuracy": 0.6657597869634628, "num_tokens": 1773539956.0, "step": 10581 }, { "entropy": 1.6694080928961437, "epoch": 1.1624783719205736, "grad_norm": 0.6818525791168213, "learning_rate": 9.086165689596696e-06, "loss": 1.2939, "mean_token_accuracy": 0.6682254274686178, "num_tokens": 1773664225.0, "step": 10582 }, { "entropy": 1.6743863622347515, "epoch": 1.1625882288319465, "grad_norm": 0.8346628546714783, "learning_rate": 9.084601210783424e-06, "loss": 1.4255, "mean_token_accuracy": 0.6574391573667526, "num_tokens": 1773827963.0, "step": 10583 }, { "entropy": 1.6976170639197032, "epoch": 1.1626980857433193, "grad_norm": 0.6019466519355774, "learning_rate": 9.083036792591662e-06, "loss": 1.4136, "mean_token_accuracy": 0.6440401424964269, "num_tokens": 1774002026.0, "step": 10584 }, { "entropy": 1.7483911216259003, "epoch": 1.1628079426546922, "grad_norm": 0.7636407613754272, "learning_rate": 9.081472435070917e-06, "loss": 1.4012, "mean_token_accuracy": 0.6655046790838242, "num_tokens": 1774159224.0, "step": 10585 }, { "entropy": 1.6063493490219116, "epoch": 1.1629177995660651, "grad_norm": 0.6063027381896973, "learning_rate": 9.079908138270711e-06, "loss": 1.3721, "mean_token_accuracy": 0.6649445941050848, "num_tokens": 1774345751.0, "step": 10586 }, { "entropy": 1.7763068775335948, "epoch": 1.1630276564774382, "grad_norm": 0.8216478228569031, "learning_rate": 9.078343902240546e-06, "loss": 1.4016, "mean_token_accuracy": 0.6733109205961227, "num_tokens": 1774478712.0, "step": 10587 }, { "entropy": 1.6169381241003673, "epoch": 1.163137513388811, "grad_norm": 0.6574183106422424, "learning_rate": 9.076779727029929e-06, "loss": 1.1698, "mean_token_accuracy": 0.6915866086880366, "num_tokens": 1774608731.0, "step": 10588 }, { "entropy": 1.756181428829829, "epoch": 1.163247370300184, "grad_norm": 0.7906789183616638, "learning_rate": 9.075215612688369e-06, "loss": 1.3784, "mean_token_accuracy": 0.6525656481583914, "num_tokens": 1774744469.0, "step": 10589 }, { "entropy": 1.7244456708431244, "epoch": 1.1633572272115569, "grad_norm": 0.7453427910804749, "learning_rate": 9.073651559265365e-06, "loss": 1.5139, "mean_token_accuracy": 0.6516165683666865, "num_tokens": 1774919441.0, "step": 10590 }, { "entropy": 1.6759937008221943, "epoch": 1.1634670841229298, "grad_norm": 0.741671085357666, "learning_rate": 9.072087566810422e-06, "loss": 1.385, "mean_token_accuracy": 0.6523188451925913, "num_tokens": 1775078071.0, "step": 10591 }, { "entropy": 1.72449991106987, "epoch": 1.1635769410343029, "grad_norm": 0.7586898803710938, "learning_rate": 9.07052363537304e-06, "loss": 1.2781, "mean_token_accuracy": 0.684979259967804, "num_tokens": 1775227702.0, "step": 10592 }, { "entropy": 1.6677986184755962, "epoch": 1.1636867979456758, "grad_norm": 0.731613278388977, "learning_rate": 9.068959765002714e-06, "loss": 1.5139, "mean_token_accuracy": 0.6585745165745417, "num_tokens": 1775376632.0, "step": 10593 }, { "entropy": 1.7536945442358653, "epoch": 1.1637966548570486, "grad_norm": 0.9093847274780273, "learning_rate": 9.06739595574894e-06, "loss": 1.3927, "mean_token_accuracy": 0.6645366350809733, "num_tokens": 1775517987.0, "step": 10594 }, { "entropy": 1.7309378584225972, "epoch": 1.1639065117684217, "grad_norm": 0.7094044089317322, "learning_rate": 9.065832207661218e-06, "loss": 1.3985, "mean_token_accuracy": 0.6560509552558264, "num_tokens": 1775747878.0, "step": 10595 }, { "entropy": 1.7115402321020763, "epoch": 1.1640163686797946, "grad_norm": 0.655071496963501, "learning_rate": 9.06426852078903e-06, "loss": 1.2848, "mean_token_accuracy": 0.6730567514896393, "num_tokens": 1775897435.0, "step": 10596 }, { "entropy": 1.7259460389614105, "epoch": 1.1641262255911675, "grad_norm": 0.6214396357536316, "learning_rate": 9.062704895181873e-06, "loss": 1.4306, "mean_token_accuracy": 0.6395227412382761, "num_tokens": 1776133092.0, "step": 10597 }, { "entropy": 1.6866820653279622, "epoch": 1.1642360825025404, "grad_norm": 0.9714513421058655, "learning_rate": 9.061141330889234e-06, "loss": 1.3075, "mean_token_accuracy": 0.6768196622530619, "num_tokens": 1776291999.0, "step": 10598 }, { "entropy": 1.6818428039550781, "epoch": 1.1643459394139133, "grad_norm": 0.6200037002563477, "learning_rate": 9.059577827960597e-06, "loss": 1.4208, "mean_token_accuracy": 0.6529973646004995, "num_tokens": 1776497288.0, "step": 10599 }, { "entropy": 1.686517169078191, "epoch": 1.1644557963252864, "grad_norm": 0.5967657566070557, "learning_rate": 9.058014386445449e-06, "loss": 1.3172, "mean_token_accuracy": 0.6720605492591858, "num_tokens": 1776678153.0, "step": 10600 }, { "entropy": 1.7095843454202015, "epoch": 1.1645656532366593, "grad_norm": 0.6960015892982483, "learning_rate": 9.05645100639327e-06, "loss": 1.5391, "mean_token_accuracy": 0.6424980262915293, "num_tokens": 1776864339.0, "step": 10601 }, { "entropy": 1.6995967328548431, "epoch": 1.1646755101480322, "grad_norm": 0.6774857044219971, "learning_rate": 9.05488768785354e-06, "loss": 1.4723, "mean_token_accuracy": 0.6477284729480743, "num_tokens": 1777018164.0, "step": 10602 }, { "entropy": 1.6932755609353383, "epoch": 1.164785367059405, "grad_norm": 0.6286726593971252, "learning_rate": 9.053324430875734e-06, "loss": 1.3633, "mean_token_accuracy": 0.6565052568912506, "num_tokens": 1777149134.0, "step": 10603 }, { "entropy": 1.6836401224136353, "epoch": 1.164895223970778, "grad_norm": 0.8434138894081116, "learning_rate": 9.051761235509339e-06, "loss": 1.3775, "mean_token_accuracy": 0.6582860300938288, "num_tokens": 1777336064.0, "step": 10604 }, { "entropy": 1.6700752675533295, "epoch": 1.165005080882151, "grad_norm": 0.6058101058006287, "learning_rate": 9.050198101803822e-06, "loss": 1.3903, "mean_token_accuracy": 0.654156357049942, "num_tokens": 1777523437.0, "step": 10605 }, { "entropy": 1.754345069328944, "epoch": 1.165114937793524, "grad_norm": 0.7618310451507568, "learning_rate": 9.048635029808654e-06, "loss": 1.3307, "mean_token_accuracy": 0.6736029783884684, "num_tokens": 1777664563.0, "step": 10606 }, { "entropy": 1.722790112098058, "epoch": 1.1652247947048968, "grad_norm": 0.5938490033149719, "learning_rate": 9.04707201957331e-06, "loss": 1.3779, "mean_token_accuracy": 0.6630453765392303, "num_tokens": 1777833158.0, "step": 10607 }, { "entropy": 1.7870861391226451, "epoch": 1.16533465161627, "grad_norm": 0.7509839534759521, "learning_rate": 9.045509071147255e-06, "loss": 1.3353, "mean_token_accuracy": 0.665691594282786, "num_tokens": 1777949791.0, "step": 10608 }, { "entropy": 1.6829339563846588, "epoch": 1.1654445085276428, "grad_norm": 0.6705135703086853, "learning_rate": 9.043946184579957e-06, "loss": 1.2543, "mean_token_accuracy": 0.6714605540037155, "num_tokens": 1778052170.0, "step": 10609 }, { "entropy": 1.7535866002241771, "epoch": 1.1655543654390157, "grad_norm": 0.8111270666122437, "learning_rate": 9.042383359920886e-06, "loss": 1.3505, "mean_token_accuracy": 0.6616497834523519, "num_tokens": 1778182029.0, "step": 10610 }, { "entropy": 1.6741001804669697, "epoch": 1.1656642223503886, "grad_norm": 0.5639720559120178, "learning_rate": 9.040820597219493e-06, "loss": 1.4687, "mean_token_accuracy": 0.6465960890054703, "num_tokens": 1778397245.0, "step": 10611 }, { "entropy": 1.7868964572747548, "epoch": 1.1657740792617615, "grad_norm": 0.7194597125053406, "learning_rate": 9.039257896525249e-06, "loss": 1.5443, "mean_token_accuracy": 0.6437151481707891, "num_tokens": 1778582965.0, "step": 10612 }, { "entropy": 1.7630162437756856, "epoch": 1.1658839361731346, "grad_norm": 0.7208252549171448, "learning_rate": 9.037695257887608e-06, "loss": 1.4503, "mean_token_accuracy": 0.6444578021764755, "num_tokens": 1778811502.0, "step": 10613 }, { "entropy": 1.628514697154363, "epoch": 1.1659937930845075, "grad_norm": 0.6529536843299866, "learning_rate": 9.03613268135603e-06, "loss": 1.3149, "mean_token_accuracy": 0.6686781197786331, "num_tokens": 1778965602.0, "step": 10614 }, { "entropy": 1.6970041394233704, "epoch": 1.1661036499958803, "grad_norm": 0.7727194428443909, "learning_rate": 9.034570166979961e-06, "loss": 1.4644, "mean_token_accuracy": 0.6570507635672888, "num_tokens": 1779171505.0, "step": 10615 }, { "entropy": 1.7038895587126415, "epoch": 1.1662135069072532, "grad_norm": 0.6358299255371094, "learning_rate": 9.033007714808865e-06, "loss": 1.3361, "mean_token_accuracy": 0.6585201720396677, "num_tokens": 1779300269.0, "step": 10616 }, { "entropy": 1.6607412695884705, "epoch": 1.1663233638186263, "grad_norm": 0.641280472278595, "learning_rate": 9.03144532489219e-06, "loss": 1.2479, "mean_token_accuracy": 0.6724933038155237, "num_tokens": 1779431418.0, "step": 10617 }, { "entropy": 1.7358726660410564, "epoch": 1.1664332207299992, "grad_norm": 0.6991965770721436, "learning_rate": 9.029882997279383e-06, "loss": 1.3864, "mean_token_accuracy": 0.6562596013148626, "num_tokens": 1779579018.0, "step": 10618 }, { "entropy": 1.6888511975606282, "epoch": 1.166543077641372, "grad_norm": 0.6948026418685913, "learning_rate": 9.02832073201989e-06, "loss": 1.2935, "mean_token_accuracy": 0.6627761671940485, "num_tokens": 1779728921.0, "step": 10619 }, { "entropy": 1.6387386123339336, "epoch": 1.166652934552745, "grad_norm": 0.543950080871582, "learning_rate": 9.026758529163158e-06, "loss": 1.5132, "mean_token_accuracy": 0.6369695862134298, "num_tokens": 1779938084.0, "step": 10620 }, { "entropy": 1.696452538172404, "epoch": 1.166762791464118, "grad_norm": 0.8097180724143982, "learning_rate": 9.025196388758626e-06, "loss": 1.3434, "mean_token_accuracy": 0.6701660056908926, "num_tokens": 1780093963.0, "step": 10621 }, { "entropy": 1.6749595602353413, "epoch": 1.166872648375491, "grad_norm": 0.7634894251823425, "learning_rate": 9.023634310855744e-06, "loss": 1.3388, "mean_token_accuracy": 0.670145645737648, "num_tokens": 1780267420.0, "step": 10622 }, { "entropy": 1.6456829011440277, "epoch": 1.1669825052868639, "grad_norm": 0.7166178822517395, "learning_rate": 9.02207229550394e-06, "loss": 1.4062, "mean_token_accuracy": 0.6510950972636541, "num_tokens": 1780461631.0, "step": 10623 }, { "entropy": 1.731689711411794, "epoch": 1.1670923621982368, "grad_norm": 0.8911299109458923, "learning_rate": 9.020510342752662e-06, "loss": 1.3846, "mean_token_accuracy": 0.652527650197347, "num_tokens": 1780628175.0, "step": 10624 }, { "entropy": 1.7384942670663197, "epoch": 1.1672022191096096, "grad_norm": 0.7990009188652039, "learning_rate": 9.018948452651336e-06, "loss": 1.5015, "mean_token_accuracy": 0.6490287284056345, "num_tokens": 1780796578.0, "step": 10625 }, { "entropy": 1.7568972607453663, "epoch": 1.1673120760209827, "grad_norm": 0.6731627583503723, "learning_rate": 9.0173866252494e-06, "loss": 1.4538, "mean_token_accuracy": 0.6447356839974722, "num_tokens": 1780980358.0, "step": 10626 }, { "entropy": 1.7077033917109172, "epoch": 1.1674219329323556, "grad_norm": 1.3267916440963745, "learning_rate": 9.015824860596283e-06, "loss": 1.4833, "mean_token_accuracy": 0.6471607486406962, "num_tokens": 1781190806.0, "step": 10627 }, { "entropy": 1.7151707013448079, "epoch": 1.1675317898437285, "grad_norm": 0.6183844208717346, "learning_rate": 9.014263158741418e-06, "loss": 1.4084, "mean_token_accuracy": 0.6550938785076141, "num_tokens": 1781328149.0, "step": 10628 }, { "entropy": 1.7326987187067668, "epoch": 1.1676416467551014, "grad_norm": 0.7378236055374146, "learning_rate": 9.012701519734226e-06, "loss": 1.4359, "mean_token_accuracy": 0.6496442258358002, "num_tokens": 1781522900.0, "step": 10629 }, { "entropy": 1.6831368406613667, "epoch": 1.1677515036664745, "grad_norm": 0.6311535239219666, "learning_rate": 9.011139943624137e-06, "loss": 1.3995, "mean_token_accuracy": 0.6628275960683823, "num_tokens": 1781696717.0, "step": 10630 }, { "entropy": 1.754588007926941, "epoch": 1.1678613605778474, "grad_norm": 0.6265390515327454, "learning_rate": 9.009578430460572e-06, "loss": 1.5914, "mean_token_accuracy": 0.6241682320833206, "num_tokens": 1781891433.0, "step": 10631 }, { "entropy": 1.692752718925476, "epoch": 1.1679712174892203, "grad_norm": 0.6025134921073914, "learning_rate": 9.008016980292956e-06, "loss": 1.4682, "mean_token_accuracy": 0.6510246594746908, "num_tokens": 1782053228.0, "step": 10632 }, { "entropy": 1.7124259273211162, "epoch": 1.1680810744005932, "grad_norm": 0.7642148733139038, "learning_rate": 9.006455593170698e-06, "loss": 1.1932, "mean_token_accuracy": 0.6781323105096817, "num_tokens": 1782152083.0, "step": 10633 }, { "entropy": 1.6471915046374004, "epoch": 1.1681909313119663, "grad_norm": 0.6959193348884583, "learning_rate": 9.004894269143228e-06, "loss": 1.3874, "mean_token_accuracy": 0.6629950155814489, "num_tokens": 1782300299.0, "step": 10634 }, { "entropy": 1.6989375551541646, "epoch": 1.1683007882233392, "grad_norm": 0.6835771799087524, "learning_rate": 9.003333008259953e-06, "loss": 1.3153, "mean_token_accuracy": 0.6625100125869116, "num_tokens": 1782431834.0, "step": 10635 }, { "entropy": 1.7035725514094036, "epoch": 1.168410645134712, "grad_norm": 0.678627610206604, "learning_rate": 9.001771810570288e-06, "loss": 1.4835, "mean_token_accuracy": 0.6454518338044485, "num_tokens": 1782610660.0, "step": 10636 }, { "entropy": 1.7128291328748066, "epoch": 1.168520502046085, "grad_norm": 0.6007285118103027, "learning_rate": 9.000210676123648e-06, "loss": 1.3285, "mean_token_accuracy": 0.6597598244746526, "num_tokens": 1782755084.0, "step": 10637 }, { "entropy": 1.7302992641925812, "epoch": 1.1686303589574578, "grad_norm": 0.5934082865715027, "learning_rate": 8.998649604969436e-06, "loss": 1.3524, "mean_token_accuracy": 0.6561450411876043, "num_tokens": 1782932096.0, "step": 10638 }, { "entropy": 1.6810977458953857, "epoch": 1.168740215868831, "grad_norm": 0.616631269454956, "learning_rate": 8.997088597157062e-06, "loss": 1.4466, "mean_token_accuracy": 0.652444009979566, "num_tokens": 1783155895.0, "step": 10639 }, { "entropy": 1.7775660753250122, "epoch": 1.1688500727802038, "grad_norm": 0.859024703502655, "learning_rate": 8.995527652735933e-06, "loss": 1.3859, "mean_token_accuracy": 0.6765128125747045, "num_tokens": 1783280758.0, "step": 10640 }, { "entropy": 1.6725508570671082, "epoch": 1.1689599296915767, "grad_norm": 0.7639785408973694, "learning_rate": 8.99396677175545e-06, "loss": 1.409, "mean_token_accuracy": 0.6709824502468109, "num_tokens": 1783440353.0, "step": 10641 }, { "entropy": 1.7496330042680104, "epoch": 1.1690697866029496, "grad_norm": 0.6356641054153442, "learning_rate": 8.992405954265014e-06, "loss": 1.3488, "mean_token_accuracy": 0.665415291984876, "num_tokens": 1783593270.0, "step": 10642 }, { "entropy": 1.6593229870001476, "epoch": 1.1691796435143227, "grad_norm": 0.7678477168083191, "learning_rate": 8.990845200314027e-06, "loss": 1.3097, "mean_token_accuracy": 0.6617792199055353, "num_tokens": 1783719742.0, "step": 10643 }, { "entropy": 1.702727844317754, "epoch": 1.1692895004256956, "grad_norm": 0.682217538356781, "learning_rate": 8.989284509951881e-06, "loss": 1.3527, "mean_token_accuracy": 0.6565568794806799, "num_tokens": 1783873275.0, "step": 10644 }, { "entropy": 1.7255015075206757, "epoch": 1.1693993573370685, "grad_norm": 0.7633523344993591, "learning_rate": 8.98772388322798e-06, "loss": 1.3573, "mean_token_accuracy": 0.6607132703065872, "num_tokens": 1783994243.0, "step": 10645 }, { "entropy": 1.672673612833023, "epoch": 1.1695092142484413, "grad_norm": 0.7300711274147034, "learning_rate": 8.986163320191706e-06, "loss": 1.4695, "mean_token_accuracy": 0.6509375472863516, "num_tokens": 1784144916.0, "step": 10646 }, { "entropy": 1.7136195699373882, "epoch": 1.1696190711598144, "grad_norm": 0.5912143588066101, "learning_rate": 8.984602820892454e-06, "loss": 1.4903, "mean_token_accuracy": 0.6366753627856573, "num_tokens": 1784386249.0, "step": 10647 }, { "entropy": 1.6643874446551006, "epoch": 1.1697289280711873, "grad_norm": 0.8985964059829712, "learning_rate": 8.983042385379618e-06, "loss": 1.4004, "mean_token_accuracy": 0.6662670622269312, "num_tokens": 1784544876.0, "step": 10648 }, { "entropy": 1.6806229849656422, "epoch": 1.1698387849825602, "grad_norm": 0.6527777314186096, "learning_rate": 8.98148201370258e-06, "loss": 1.3905, "mean_token_accuracy": 0.6572650174299876, "num_tokens": 1784747792.0, "step": 10649 }, { "entropy": 1.7049620548884075, "epoch": 1.169948641893933, "grad_norm": 0.6680081486701965, "learning_rate": 8.979921705910729e-06, "loss": 1.4625, "mean_token_accuracy": 0.6404824604590734, "num_tokens": 1784944930.0, "step": 10650 }, { "entropy": 1.6514282921950023, "epoch": 1.170058498805306, "grad_norm": 0.6086418032646179, "learning_rate": 8.978361462053444e-06, "loss": 1.4598, "mean_token_accuracy": 0.6551013191541036, "num_tokens": 1785164307.0, "step": 10651 }, { "entropy": 1.6851195593674977, "epoch": 1.170168355716679, "grad_norm": 0.6363802552223206, "learning_rate": 8.976801282180108e-06, "loss": 1.4424, "mean_token_accuracy": 0.638342077533404, "num_tokens": 1785428083.0, "step": 10652 }, { "entropy": 1.7672754526138306, "epoch": 1.170278212628052, "grad_norm": 0.6229258179664612, "learning_rate": 8.975241166340097e-06, "loss": 1.435, "mean_token_accuracy": 0.6456956764062246, "num_tokens": 1785603506.0, "step": 10653 }, { "entropy": 1.672398070494334, "epoch": 1.1703880695394249, "grad_norm": 0.5889727473258972, "learning_rate": 8.973681114582795e-06, "loss": 1.3637, "mean_token_accuracy": 0.6640516370534897, "num_tokens": 1785801159.0, "step": 10654 }, { "entropy": 1.783601274092992, "epoch": 1.1704979264507978, "grad_norm": 0.7808632254600525, "learning_rate": 8.972121126957571e-06, "loss": 1.2955, "mean_token_accuracy": 0.6714789718389511, "num_tokens": 1785921513.0, "step": 10655 }, { "entropy": 1.7051396469275157, "epoch": 1.1706077833621709, "grad_norm": 0.7725319862365723, "learning_rate": 8.9705612035138e-06, "loss": 1.5335, "mean_token_accuracy": 0.6427340308825175, "num_tokens": 1786062181.0, "step": 10656 }, { "entropy": 1.7101481556892395, "epoch": 1.1707176402735437, "grad_norm": 0.7353886961936951, "learning_rate": 8.969001344300854e-06, "loss": 1.2933, "mean_token_accuracy": 0.6786648482084274, "num_tokens": 1786199326.0, "step": 10657 }, { "entropy": 1.7510856886704762, "epoch": 1.1708274971849166, "grad_norm": 0.7046499848365784, "learning_rate": 8.967441549368097e-06, "loss": 1.4565, "mean_token_accuracy": 0.6432525664567947, "num_tokens": 1786349644.0, "step": 10658 }, { "entropy": 1.6620566546916962, "epoch": 1.1709373540962895, "grad_norm": 0.5971559882164001, "learning_rate": 8.9658818187649e-06, "loss": 1.3713, "mean_token_accuracy": 0.6542538553476334, "num_tokens": 1786576097.0, "step": 10659 }, { "entropy": 1.7027663091818492, "epoch": 1.1710472110076626, "grad_norm": 1.0174574851989746, "learning_rate": 8.964322152540627e-06, "loss": 1.3846, "mean_token_accuracy": 0.6615005234877268, "num_tokens": 1786752269.0, "step": 10660 }, { "entropy": 1.734482745329539, "epoch": 1.1711570679190355, "grad_norm": 0.6983833909034729, "learning_rate": 8.962762550744642e-06, "loss": 1.341, "mean_token_accuracy": 0.6733351896206538, "num_tokens": 1786886030.0, "step": 10661 }, { "entropy": 1.7492407063643138, "epoch": 1.1712669248304084, "grad_norm": 0.7310764789581299, "learning_rate": 8.9612030134263e-06, "loss": 1.4714, "mean_token_accuracy": 0.6528683652480444, "num_tokens": 1787072947.0, "step": 10662 }, { "entropy": 1.5964481433232625, "epoch": 1.1713767817417813, "grad_norm": 0.6488633751869202, "learning_rate": 8.95964354063497e-06, "loss": 1.2987, "mean_token_accuracy": 0.6710949192444483, "num_tokens": 1787201005.0, "step": 10663 }, { "entropy": 1.7364132006963093, "epoch": 1.1714866386531542, "grad_norm": 0.7075624465942383, "learning_rate": 8.958084132419999e-06, "loss": 1.4657, "mean_token_accuracy": 0.6590905785560608, "num_tokens": 1787332503.0, "step": 10664 }, { "entropy": 1.7492092450459797, "epoch": 1.1715964955645273, "grad_norm": 0.6287668943405151, "learning_rate": 8.956524788830742e-06, "loss": 1.4, "mean_token_accuracy": 0.6587251722812653, "num_tokens": 1787490700.0, "step": 10665 }, { "entropy": 1.6878098646799724, "epoch": 1.1717063524759002, "grad_norm": 0.6984691023826599, "learning_rate": 8.95496550991656e-06, "loss": 1.4229, "mean_token_accuracy": 0.6428747077782949, "num_tokens": 1787656504.0, "step": 10666 }, { "entropy": 1.7307902872562408, "epoch": 1.171816209387273, "grad_norm": 0.636269748210907, "learning_rate": 8.953406295726796e-06, "loss": 1.5128, "mean_token_accuracy": 0.6517880360285441, "num_tokens": 1787851407.0, "step": 10667 }, { "entropy": 1.7220154702663422, "epoch": 1.171926066298646, "grad_norm": 0.6204155087471008, "learning_rate": 8.951847146310801e-06, "loss": 1.4483, "mean_token_accuracy": 0.6446654995282491, "num_tokens": 1788023011.0, "step": 10668 }, { "entropy": 1.7286285956700642, "epoch": 1.172035923210019, "grad_norm": 0.7028345465660095, "learning_rate": 8.950288061717924e-06, "loss": 1.5394, "mean_token_accuracy": 0.6381612122058868, "num_tokens": 1788248357.0, "step": 10669 }, { "entropy": 1.6964424749215443, "epoch": 1.172145780121392, "grad_norm": 0.780795693397522, "learning_rate": 8.948729041997502e-06, "loss": 1.4833, "mean_token_accuracy": 0.6638787587483724, "num_tokens": 1788381104.0, "step": 10670 }, { "entropy": 1.7409202357133229, "epoch": 1.1722556370327648, "grad_norm": 0.700515627861023, "learning_rate": 8.94717008719888e-06, "loss": 1.3702, "mean_token_accuracy": 0.6486127773920695, "num_tokens": 1788513068.0, "step": 10671 }, { "entropy": 1.6607150733470917, "epoch": 1.1723654939441377, "grad_norm": 0.6845481991767883, "learning_rate": 8.945611197371404e-06, "loss": 1.3526, "mean_token_accuracy": 0.6619810660680135, "num_tokens": 1788675191.0, "step": 10672 }, { "entropy": 1.6844545602798462, "epoch": 1.1724753508555108, "grad_norm": 0.5828627943992615, "learning_rate": 8.944052372564404e-06, "loss": 1.333, "mean_token_accuracy": 0.6642112135887146, "num_tokens": 1788823629.0, "step": 10673 }, { "entropy": 1.7418619493643444, "epoch": 1.1725852077668837, "grad_norm": 0.7149393558502197, "learning_rate": 8.942493612827223e-06, "loss": 1.4441, "mean_token_accuracy": 0.6542845120032629, "num_tokens": 1788993849.0, "step": 10674 }, { "entropy": 1.7660021980603535, "epoch": 1.1726950646782566, "grad_norm": 0.6694035530090332, "learning_rate": 8.940934918209193e-06, "loss": 1.4416, "mean_token_accuracy": 0.6477504769961039, "num_tokens": 1789149567.0, "step": 10675 }, { "entropy": 1.7088763415813446, "epoch": 1.1728049215896295, "grad_norm": 0.7105270028114319, "learning_rate": 8.939376288759643e-06, "loss": 1.4383, "mean_token_accuracy": 0.6585270663102468, "num_tokens": 1789297771.0, "step": 10676 }, { "entropy": 1.7306538224220276, "epoch": 1.1729147785010023, "grad_norm": 0.6987410187721252, "learning_rate": 8.937817724527901e-06, "loss": 1.4355, "mean_token_accuracy": 0.6463360438744227, "num_tokens": 1789463092.0, "step": 10677 }, { "entropy": 1.6846852699915569, "epoch": 1.1730246354123754, "grad_norm": 0.7629522085189819, "learning_rate": 8.936259225563306e-06, "loss": 1.2583, "mean_token_accuracy": 0.6747141232093176, "num_tokens": 1789581228.0, "step": 10678 }, { "entropy": 1.7274243632952373, "epoch": 1.1731344923237483, "grad_norm": 0.6886153221130371, "learning_rate": 8.934700791915171e-06, "loss": 1.3786, "mean_token_accuracy": 0.6586506168047587, "num_tokens": 1789767348.0, "step": 10679 }, { "entropy": 1.7268758118152618, "epoch": 1.1732443492351212, "grad_norm": 0.7361603379249573, "learning_rate": 8.933142423632828e-06, "loss": 1.3816, "mean_token_accuracy": 0.666677271326383, "num_tokens": 1789899146.0, "step": 10680 }, { "entropy": 1.6961700121561687, "epoch": 1.173354206146494, "grad_norm": 0.5983572006225586, "learning_rate": 8.931584120765598e-06, "loss": 1.4038, "mean_token_accuracy": 0.66066013276577, "num_tokens": 1790122194.0, "step": 10681 }, { "entropy": 1.7338594396909077, "epoch": 1.1734640630578672, "grad_norm": 0.6352316737174988, "learning_rate": 8.930025883362796e-06, "loss": 1.344, "mean_token_accuracy": 0.6731750816106796, "num_tokens": 1790289767.0, "step": 10682 }, { "entropy": 1.692486047744751, "epoch": 1.17357391996924, "grad_norm": 0.7538011074066162, "learning_rate": 8.928467711473741e-06, "loss": 1.4321, "mean_token_accuracy": 0.6721002409855524, "num_tokens": 1790441707.0, "step": 10683 }, { "entropy": 1.73250612616539, "epoch": 1.173683776880613, "grad_norm": 0.6547481417655945, "learning_rate": 8.926909605147751e-06, "loss": 1.345, "mean_token_accuracy": 0.6578451991081238, "num_tokens": 1790604605.0, "step": 10684 }, { "entropy": 1.6946922838687897, "epoch": 1.1737936337919859, "grad_norm": 0.6833810210227966, "learning_rate": 8.925351564434137e-06, "loss": 1.364, "mean_token_accuracy": 0.6670823097229004, "num_tokens": 1790749799.0, "step": 10685 }, { "entropy": 1.7278658747673035, "epoch": 1.173903490703359, "grad_norm": 0.6715664267539978, "learning_rate": 8.92379358938221e-06, "loss": 1.4199, "mean_token_accuracy": 0.6526618450880051, "num_tokens": 1790927175.0, "step": 10686 }, { "entropy": 1.7412831882635753, "epoch": 1.1740133476147319, "grad_norm": 0.8361808061599731, "learning_rate": 8.922235680041284e-06, "loss": 1.5667, "mean_token_accuracy": 0.661175494392713, "num_tokens": 1791071508.0, "step": 10687 }, { "entropy": 1.7346645096937816, "epoch": 1.1741232045261047, "grad_norm": 0.697149395942688, "learning_rate": 8.920677836460661e-06, "loss": 1.2796, "mean_token_accuracy": 0.6704900513092676, "num_tokens": 1791186740.0, "step": 10688 }, { "entropy": 1.742050697406133, "epoch": 1.1742330614374776, "grad_norm": 0.6318445801734924, "learning_rate": 8.919120058689643e-06, "loss": 1.5916, "mean_token_accuracy": 0.6259209712346395, "num_tokens": 1791436608.0, "step": 10689 }, { "entropy": 1.6182755033175151, "epoch": 1.1743429183488505, "grad_norm": 0.6061080098152161, "learning_rate": 8.917562346777544e-06, "loss": 1.4126, "mean_token_accuracy": 0.6556143959363302, "num_tokens": 1791640128.0, "step": 10690 }, { "entropy": 1.6585151453812916, "epoch": 1.1744527752602236, "grad_norm": 0.6770459413528442, "learning_rate": 8.916004700773656e-06, "loss": 1.2284, "mean_token_accuracy": 0.6863802125056585, "num_tokens": 1791778020.0, "step": 10691 }, { "entropy": 1.69165035088857, "epoch": 1.1745626321715965, "grad_norm": 0.5943127274513245, "learning_rate": 8.914447120727278e-06, "loss": 1.4704, "mean_token_accuracy": 0.6474858671426773, "num_tokens": 1792003193.0, "step": 10692 }, { "entropy": 1.6488823493321736, "epoch": 1.1746724890829694, "grad_norm": 0.6268026828765869, "learning_rate": 8.912889606687713e-06, "loss": 1.1967, "mean_token_accuracy": 0.687493771314621, "num_tokens": 1792114106.0, "step": 10693 }, { "entropy": 1.6368895769119263, "epoch": 1.1747823459943423, "grad_norm": 0.6344706416130066, "learning_rate": 8.911332158704248e-06, "loss": 1.4322, "mean_token_accuracy": 0.6635664999485016, "num_tokens": 1792283248.0, "step": 10694 }, { "entropy": 1.6913351913293202, "epoch": 1.1748922029057154, "grad_norm": 0.6644214987754822, "learning_rate": 8.909774776826179e-06, "loss": 1.4297, "mean_token_accuracy": 0.647185837229093, "num_tokens": 1792453230.0, "step": 10695 }, { "entropy": 1.7606900731722515, "epoch": 1.1750020598170883, "grad_norm": 0.6442691683769226, "learning_rate": 8.908217461102799e-06, "loss": 1.3776, "mean_token_accuracy": 0.6546642581621805, "num_tokens": 1792613034.0, "step": 10696 }, { "entropy": 1.7363630533218384, "epoch": 1.1751119167284612, "grad_norm": 0.8015692830085754, "learning_rate": 8.906660211583392e-06, "loss": 1.2139, "mean_token_accuracy": 0.6720298528671265, "num_tokens": 1792743828.0, "step": 10697 }, { "entropy": 1.7080905040105183, "epoch": 1.175221773639834, "grad_norm": 0.6090119481086731, "learning_rate": 8.905103028317245e-06, "loss": 1.4163, "mean_token_accuracy": 0.6519571195046107, "num_tokens": 1792938546.0, "step": 10698 }, { "entropy": 1.7202429076035817, "epoch": 1.1753316305512072, "grad_norm": 0.980828583240509, "learning_rate": 8.903545911353648e-06, "loss": 1.1951, "mean_token_accuracy": 0.6983717431624731, "num_tokens": 1793050711.0, "step": 10699 }, { "entropy": 1.6536558071772258, "epoch": 1.17544148746258, "grad_norm": 0.8090218305587769, "learning_rate": 8.901988860741875e-06, "loss": 1.4403, "mean_token_accuracy": 0.6533640176057816, "num_tokens": 1793218498.0, "step": 10700 }, { "entropy": 1.701753854751587, "epoch": 1.175551344373953, "grad_norm": 0.622660219669342, "learning_rate": 8.900431876531205e-06, "loss": 1.4098, "mean_token_accuracy": 0.6528751403093338, "num_tokens": 1793407396.0, "step": 10701 }, { "entropy": 1.743919461965561, "epoch": 1.1756612012853258, "grad_norm": 0.6665404438972473, "learning_rate": 8.898874958770928e-06, "loss": 1.3872, "mean_token_accuracy": 0.6619693537553152, "num_tokens": 1793570421.0, "step": 10702 }, { "entropy": 1.7298048436641693, "epoch": 1.1757710581966987, "grad_norm": 0.8393076658248901, "learning_rate": 8.897318107510307e-06, "loss": 1.5443, "mean_token_accuracy": 0.6459775815407435, "num_tokens": 1793738540.0, "step": 10703 }, { "entropy": 1.7660633722941081, "epoch": 1.1758809151080718, "grad_norm": 0.7265772223472595, "learning_rate": 8.895761322798622e-06, "loss": 1.471, "mean_token_accuracy": 0.6382889002561569, "num_tokens": 1793889951.0, "step": 10704 }, { "entropy": 1.7584838569164276, "epoch": 1.1759907720194447, "grad_norm": 0.6831865906715393, "learning_rate": 8.894204604685142e-06, "loss": 1.4035, "mean_token_accuracy": 0.6512691229581833, "num_tokens": 1794068010.0, "step": 10705 }, { "entropy": 1.7787149449189503, "epoch": 1.1761006289308176, "grad_norm": 0.8004885911941528, "learning_rate": 8.892647953219136e-06, "loss": 1.3326, "mean_token_accuracy": 0.6619590371847153, "num_tokens": 1794185919.0, "step": 10706 }, { "entropy": 1.6391962865988414, "epoch": 1.1762104858421905, "grad_norm": 0.6488991975784302, "learning_rate": 8.891091368449876e-06, "loss": 1.3778, "mean_token_accuracy": 0.6589486648639044, "num_tokens": 1794377843.0, "step": 10707 }, { "entropy": 1.73589222629865, "epoch": 1.1763203427535636, "grad_norm": 0.7107129693031311, "learning_rate": 8.88953485042662e-06, "loss": 1.3963, "mean_token_accuracy": 0.661611388127009, "num_tokens": 1794530068.0, "step": 10708 }, { "entropy": 1.691332995891571, "epoch": 1.1764301996649364, "grad_norm": 0.7042721509933472, "learning_rate": 8.887978399198636e-06, "loss": 1.2553, "mean_token_accuracy": 0.6832821269830068, "num_tokens": 1794666670.0, "step": 10709 }, { "entropy": 1.7346055905024211, "epoch": 1.1765400565763093, "grad_norm": 0.6356518268585205, "learning_rate": 8.886422014815188e-06, "loss": 1.3837, "mean_token_accuracy": 0.6597516189018885, "num_tokens": 1794846449.0, "step": 10710 }, { "entropy": 1.7051764130592346, "epoch": 1.1766499134876822, "grad_norm": 0.657356321811676, "learning_rate": 8.884865697325526e-06, "loss": 1.47, "mean_token_accuracy": 0.6433271119991938, "num_tokens": 1795086141.0, "step": 10711 }, { "entropy": 1.7161107162634532, "epoch": 1.1767597703990553, "grad_norm": 0.6534709930419922, "learning_rate": 8.883309446778914e-06, "loss": 1.4793, "mean_token_accuracy": 0.6407648821671804, "num_tokens": 1795262028.0, "step": 10712 }, { "entropy": 1.6933226088682811, "epoch": 1.1768696273104282, "grad_norm": 0.8174028396606445, "learning_rate": 8.881753263224604e-06, "loss": 1.5551, "mean_token_accuracy": 0.6449368943770727, "num_tokens": 1795415394.0, "step": 10713 }, { "entropy": 1.6775444547335308, "epoch": 1.176979484221801, "grad_norm": 0.6747733950614929, "learning_rate": 8.880197146711846e-06, "loss": 1.3102, "mean_token_accuracy": 0.6573974937200546, "num_tokens": 1795587872.0, "step": 10714 }, { "entropy": 1.641041358311971, "epoch": 1.177089341133174, "grad_norm": 0.8012470006942749, "learning_rate": 8.878641097289895e-06, "loss": 1.4218, "mean_token_accuracy": 0.6504307389259338, "num_tokens": 1795768734.0, "step": 10715 }, { "entropy": 1.740955690542857, "epoch": 1.1771991980445469, "grad_norm": 0.6754148006439209, "learning_rate": 8.877085115008e-06, "loss": 1.4466, "mean_token_accuracy": 0.6471899896860123, "num_tokens": 1795996496.0, "step": 10716 }, { "entropy": 1.6761998136838276, "epoch": 1.17730905495592, "grad_norm": 0.6564405560493469, "learning_rate": 8.875529199915403e-06, "loss": 1.4154, "mean_token_accuracy": 0.6573975533246994, "num_tokens": 1796179868.0, "step": 10717 }, { "entropy": 1.6646329561869304, "epoch": 1.1774189118672929, "grad_norm": 0.6660974025726318, "learning_rate": 8.873973352061346e-06, "loss": 1.3027, "mean_token_accuracy": 0.6660636613766352, "num_tokens": 1796375561.0, "step": 10718 }, { "entropy": 1.6921138167381287, "epoch": 1.1775287687786657, "grad_norm": 0.6286952495574951, "learning_rate": 8.87241757149508e-06, "loss": 1.3583, "mean_token_accuracy": 0.6696690519650778, "num_tokens": 1796529528.0, "step": 10719 }, { "entropy": 1.627532919247945, "epoch": 1.1776386256900386, "grad_norm": 2.4623351097106934, "learning_rate": 8.870861858265836e-06, "loss": 1.0272, "mean_token_accuracy": 0.6974131315946579, "num_tokens": 1796667856.0, "step": 10720 }, { "entropy": 1.7433740397294362, "epoch": 1.1777484826014117, "grad_norm": 0.7270897626876831, "learning_rate": 8.869306212422852e-06, "loss": 1.3554, "mean_token_accuracy": 0.6652982632319132, "num_tokens": 1796814869.0, "step": 10721 }, { "entropy": 1.6887870232264202, "epoch": 1.1778583395127846, "grad_norm": 0.6307252049446106, "learning_rate": 8.867750634015372e-06, "loss": 1.3855, "mean_token_accuracy": 0.6616760591665903, "num_tokens": 1797013937.0, "step": 10722 }, { "entropy": 1.7013648450374603, "epoch": 1.1779681964241575, "grad_norm": 0.664087176322937, "learning_rate": 8.86619512309262e-06, "loss": 1.3015, "mean_token_accuracy": 0.6622590919335684, "num_tokens": 1797176520.0, "step": 10723 }, { "entropy": 1.758970280488332, "epoch": 1.1780780533355304, "grad_norm": 0.709904670715332, "learning_rate": 8.864639679703833e-06, "loss": 1.4653, "mean_token_accuracy": 0.6567131032546362, "num_tokens": 1797330029.0, "step": 10724 }, { "entropy": 1.7270347674687703, "epoch": 1.1781879102469035, "grad_norm": 0.6952686905860901, "learning_rate": 8.863084303898238e-06, "loss": 1.4274, "mean_token_accuracy": 0.6522839615742365, "num_tokens": 1797485981.0, "step": 10725 }, { "entropy": 1.6494085093339284, "epoch": 1.1782977671582764, "grad_norm": 0.8430054783821106, "learning_rate": 8.86152899572506e-06, "loss": 1.3444, "mean_token_accuracy": 0.6608125517765681, "num_tokens": 1797666072.0, "step": 10726 }, { "entropy": 1.6977934141953785, "epoch": 1.1784076240696493, "grad_norm": 0.7214722633361816, "learning_rate": 8.859973755233525e-06, "loss": 1.4748, "mean_token_accuracy": 0.648172547419866, "num_tokens": 1797856352.0, "step": 10727 }, { "entropy": 1.6916989386081696, "epoch": 1.1785174809810222, "grad_norm": 0.7626371383666992, "learning_rate": 8.858418582472859e-06, "loss": 1.3687, "mean_token_accuracy": 0.651703084508578, "num_tokens": 1797979260.0, "step": 10728 }, { "entropy": 1.7365763584772747, "epoch": 1.178627337892395, "grad_norm": 0.7373912334442139, "learning_rate": 8.856863477492276e-06, "loss": 1.3676, "mean_token_accuracy": 0.6566950579484304, "num_tokens": 1798131140.0, "step": 10729 }, { "entropy": 1.6413574417432149, "epoch": 1.1787371948037682, "grad_norm": 0.7868739366531372, "learning_rate": 8.855308440341001e-06, "loss": 1.213, "mean_token_accuracy": 0.6854518900314966, "num_tokens": 1798266166.0, "step": 10730 }, { "entropy": 1.6832148929437, "epoch": 1.178847051715141, "grad_norm": 0.6691809892654419, "learning_rate": 8.853753471068249e-06, "loss": 1.2974, "mean_token_accuracy": 0.6681927392880121, "num_tokens": 1798393542.0, "step": 10731 }, { "entropy": 1.6947985390822093, "epoch": 1.178956908626514, "grad_norm": 0.6206928491592407, "learning_rate": 8.852198569723231e-06, "loss": 1.3536, "mean_token_accuracy": 0.6597307672103246, "num_tokens": 1798562655.0, "step": 10732 }, { "entropy": 1.6786811153093975, "epoch": 1.1790667655378868, "grad_norm": 0.6354871988296509, "learning_rate": 8.850643736355157e-06, "loss": 1.3047, "mean_token_accuracy": 0.6605040381352106, "num_tokens": 1798727276.0, "step": 10733 }, { "entropy": 1.6901488800843556, "epoch": 1.17917662244926, "grad_norm": 0.6511650681495667, "learning_rate": 8.849088971013246e-06, "loss": 1.3516, "mean_token_accuracy": 0.6556326846281687, "num_tokens": 1798878099.0, "step": 10734 }, { "entropy": 1.7377649943033855, "epoch": 1.1792864793606328, "grad_norm": 0.7267980575561523, "learning_rate": 8.847534273746696e-06, "loss": 1.515, "mean_token_accuracy": 0.6588891347249349, "num_tokens": 1799033920.0, "step": 10735 }, { "entropy": 1.6604502499103546, "epoch": 1.1793963362720057, "grad_norm": 0.6588174700737, "learning_rate": 8.845979644604716e-06, "loss": 1.4775, "mean_token_accuracy": 0.6402100125948588, "num_tokens": 1799277414.0, "step": 10736 }, { "entropy": 1.7129139800866444, "epoch": 1.1795061931833786, "grad_norm": 0.7839108109474182, "learning_rate": 8.844425083636514e-06, "loss": 1.3845, "mean_token_accuracy": 0.6433479189872742, "num_tokens": 1799462174.0, "step": 10737 }, { "entropy": 1.6579439043998718, "epoch": 1.1796160500947517, "grad_norm": 0.7659602761268616, "learning_rate": 8.842870590891284e-06, "loss": 1.3638, "mean_token_accuracy": 0.6679676622152328, "num_tokens": 1799659246.0, "step": 10738 }, { "entropy": 1.7055364549160004, "epoch": 1.1797259070061246, "grad_norm": 0.7582058906555176, "learning_rate": 8.841316166418225e-06, "loss": 1.4981, "mean_token_accuracy": 0.6421041041612625, "num_tokens": 1799856918.0, "step": 10739 }, { "entropy": 1.6829807460308075, "epoch": 1.1798357639174974, "grad_norm": 0.6783363223075867, "learning_rate": 8.83976181026654e-06, "loss": 1.3274, "mean_token_accuracy": 0.6636816610892614, "num_tokens": 1800016390.0, "step": 10740 }, { "entropy": 1.684312105178833, "epoch": 1.1799456208288703, "grad_norm": 0.7491908669471741, "learning_rate": 8.83820752248542e-06, "loss": 1.3397, "mean_token_accuracy": 0.6631839076677958, "num_tokens": 1800167294.0, "step": 10741 }, { "entropy": 1.739410251379013, "epoch": 1.1800554777402432, "grad_norm": 0.7408508062362671, "learning_rate": 8.836653303124057e-06, "loss": 1.2769, "mean_token_accuracy": 0.6693633794784546, "num_tokens": 1800278529.0, "step": 10742 }, { "entropy": 1.6683301428953807, "epoch": 1.1801653346516163, "grad_norm": 0.7159736752510071, "learning_rate": 8.835099152231645e-06, "loss": 1.5041, "mean_token_accuracy": 0.6455606669187546, "num_tokens": 1800503623.0, "step": 10743 }, { "entropy": 1.6624947686990101, "epoch": 1.1802751915629892, "grad_norm": 0.6846541166305542, "learning_rate": 8.833545069857366e-06, "loss": 1.3794, "mean_token_accuracy": 0.6705669413010279, "num_tokens": 1800684862.0, "step": 10744 }, { "entropy": 1.702225963274638, "epoch": 1.180385048474362, "grad_norm": 0.699865460395813, "learning_rate": 8.831991056050408e-06, "loss": 1.5913, "mean_token_accuracy": 0.6365682830413183, "num_tokens": 1800903631.0, "step": 10745 }, { "entropy": 1.6989426116148632, "epoch": 1.1804949053857352, "grad_norm": 0.6623237729072571, "learning_rate": 8.830437110859959e-06, "loss": 1.5188, "mean_token_accuracy": 0.6346626182397207, "num_tokens": 1801123618.0, "step": 10746 }, { "entropy": 1.7860455016295116, "epoch": 1.180604762297108, "grad_norm": 0.6535719633102417, "learning_rate": 8.828883234335197e-06, "loss": 1.3937, "mean_token_accuracy": 0.6469403405984243, "num_tokens": 1801317371.0, "step": 10747 }, { "entropy": 1.6487139264742534, "epoch": 1.180714619208481, "grad_norm": 0.6619005799293518, "learning_rate": 8.827329426525301e-06, "loss": 1.3683, "mean_token_accuracy": 0.6584922273953756, "num_tokens": 1801487160.0, "step": 10748 }, { "entropy": 1.7167851825555165, "epoch": 1.1808244761198539, "grad_norm": 0.9309948086738586, "learning_rate": 8.825775687479454e-06, "loss": 1.3891, "mean_token_accuracy": 0.6533536563316981, "num_tokens": 1801615083.0, "step": 10749 }, { "entropy": 1.7493426501750946, "epoch": 1.1809343330312267, "grad_norm": 0.8402960300445557, "learning_rate": 8.824222017246824e-06, "loss": 1.3704, "mean_token_accuracy": 0.6674526085456213, "num_tokens": 1801759170.0, "step": 10750 }, { "entropy": 1.6924077570438385, "epoch": 1.1810441899425999, "grad_norm": 0.6291844248771667, "learning_rate": 8.822668415876582e-06, "loss": 1.5256, "mean_token_accuracy": 0.6429929981629053, "num_tokens": 1801944217.0, "step": 10751 }, { "entropy": 1.7071336607138317, "epoch": 1.1811540468539727, "grad_norm": 0.7020394802093506, "learning_rate": 8.821114883417909e-06, "loss": 1.3756, "mean_token_accuracy": 0.6600728432337443, "num_tokens": 1802074925.0, "step": 10752 }, { "entropy": 1.664092222849528, "epoch": 1.1812639037653456, "grad_norm": 0.7180442810058594, "learning_rate": 8.81956141991997e-06, "loss": 1.2898, "mean_token_accuracy": 0.6710790693759918, "num_tokens": 1802243459.0, "step": 10753 }, { "entropy": 1.6833447615305583, "epoch": 1.1813737606767185, "grad_norm": 0.6144715547561646, "learning_rate": 8.818008025431925e-06, "loss": 1.434, "mean_token_accuracy": 0.6551510939995447, "num_tokens": 1802442490.0, "step": 10754 }, { "entropy": 1.7438991864522297, "epoch": 1.1814836175880914, "grad_norm": 0.755179226398468, "learning_rate": 8.816454700002946e-06, "loss": 1.343, "mean_token_accuracy": 0.6712101946274439, "num_tokens": 1802583232.0, "step": 10755 }, { "entropy": 1.6965516308943431, "epoch": 1.1815934744994645, "grad_norm": 0.7071336507797241, "learning_rate": 8.814901443682189e-06, "loss": 1.4545, "mean_token_accuracy": 0.6644222984711329, "num_tokens": 1802775291.0, "step": 10756 }, { "entropy": 1.8494134942690532, "epoch": 1.1817033314108374, "grad_norm": 0.7613623142242432, "learning_rate": 8.813348256518816e-06, "loss": 1.489, "mean_token_accuracy": 0.6528632789850235, "num_tokens": 1802943235.0, "step": 10757 }, { "entropy": 1.7241133948167164, "epoch": 1.1818131883222103, "grad_norm": 0.6806331276893616, "learning_rate": 8.811795138561989e-06, "loss": 1.3547, "mean_token_accuracy": 0.653764029343923, "num_tokens": 1803086552.0, "step": 10758 }, { "entropy": 1.6509768664836884, "epoch": 1.1819230452335834, "grad_norm": 0.7953295111656189, "learning_rate": 8.810242089860857e-06, "loss": 1.4753, "mean_token_accuracy": 0.6489654282728831, "num_tokens": 1803273180.0, "step": 10759 }, { "entropy": 1.701873242855072, "epoch": 1.1820329021449563, "grad_norm": 0.701553463935852, "learning_rate": 8.808689110464576e-06, "loss": 1.3899, "mean_token_accuracy": 0.6529113153616587, "num_tokens": 1803435603.0, "step": 10760 }, { "entropy": 1.7141969501972198, "epoch": 1.1821427590563292, "grad_norm": 0.7244220972061157, "learning_rate": 8.807136200422301e-06, "loss": 1.5109, "mean_token_accuracy": 0.6525500317414602, "num_tokens": 1803580415.0, "step": 10761 }, { "entropy": 1.6993577778339386, "epoch": 1.182252615967702, "grad_norm": 0.6596866250038147, "learning_rate": 8.805583359783175e-06, "loss": 1.4599, "mean_token_accuracy": 0.6497125774621964, "num_tokens": 1803719774.0, "step": 10762 }, { "entropy": 1.7196275393168132, "epoch": 1.182362472879075, "grad_norm": 0.5810356736183167, "learning_rate": 8.804030588596344e-06, "loss": 1.5008, "mean_token_accuracy": 0.6446505437294642, "num_tokens": 1803972288.0, "step": 10763 }, { "entropy": 1.695581078529358, "epoch": 1.182472329790448, "grad_norm": 0.6525010466575623, "learning_rate": 8.802477886910958e-06, "loss": 1.3401, "mean_token_accuracy": 0.6595296412706375, "num_tokens": 1804124653.0, "step": 10764 }, { "entropy": 1.7033733328183491, "epoch": 1.182582186701821, "grad_norm": 0.7598459124565125, "learning_rate": 8.800925254776158e-06, "loss": 1.3434, "mean_token_accuracy": 0.6662160108486811, "num_tokens": 1804257013.0, "step": 10765 }, { "entropy": 1.6759057243665059, "epoch": 1.1826920436131938, "grad_norm": 0.6761953234672546, "learning_rate": 8.799372692241082e-06, "loss": 1.428, "mean_token_accuracy": 0.669055625796318, "num_tokens": 1804452630.0, "step": 10766 }, { "entropy": 1.7233761151631672, "epoch": 1.1828019005245667, "grad_norm": 0.6960268616676331, "learning_rate": 8.797820199354868e-06, "loss": 1.5065, "mean_token_accuracy": 0.6348318805297216, "num_tokens": 1804614525.0, "step": 10767 }, { "entropy": 1.685812105735143, "epoch": 1.1829117574359396, "grad_norm": 0.7641476988792419, "learning_rate": 8.796267776166651e-06, "loss": 1.5683, "mean_token_accuracy": 0.6436462799708048, "num_tokens": 1804786476.0, "step": 10768 }, { "entropy": 1.6836872696876526, "epoch": 1.1830216143473127, "grad_norm": 0.5971675515174866, "learning_rate": 8.794715422725569e-06, "loss": 1.52, "mean_token_accuracy": 0.6463221857945124, "num_tokens": 1805022456.0, "step": 10769 }, { "entropy": 1.7146795690059662, "epoch": 1.1831314712586856, "grad_norm": 0.781304657459259, "learning_rate": 8.793163139080744e-06, "loss": 1.5337, "mean_token_accuracy": 0.6382714013258616, "num_tokens": 1805203477.0, "step": 10770 }, { "entropy": 1.641987790664037, "epoch": 1.1832413281700584, "grad_norm": 0.7032956480979919, "learning_rate": 8.791610925281315e-06, "loss": 1.3017, "mean_token_accuracy": 0.6680291642745336, "num_tokens": 1805370746.0, "step": 10771 }, { "entropy": 1.765711506207784, "epoch": 1.1833511850814316, "grad_norm": 0.9066851735115051, "learning_rate": 8.790058781376409e-06, "loss": 1.4206, "mean_token_accuracy": 0.6616054326295853, "num_tokens": 1805496309.0, "step": 10772 }, { "entropy": 1.7519434293111165, "epoch": 1.1834610419928044, "grad_norm": 0.8009188175201416, "learning_rate": 8.788506707415143e-06, "loss": 1.4096, "mean_token_accuracy": 0.6672770380973816, "num_tokens": 1805641288.0, "step": 10773 }, { "entropy": 1.768102914094925, "epoch": 1.1835708989041773, "grad_norm": 0.6128711700439453, "learning_rate": 8.786954703446643e-06, "loss": 1.5932, "mean_token_accuracy": 0.6311574280261993, "num_tokens": 1805848452.0, "step": 10774 }, { "entropy": 1.7200669348239899, "epoch": 1.1836807558155502, "grad_norm": 0.674370527267456, "learning_rate": 8.78540276952003e-06, "loss": 1.3235, "mean_token_accuracy": 0.6750156929095587, "num_tokens": 1805980538.0, "step": 10775 }, { "entropy": 1.6917518973350525, "epoch": 1.183790612726923, "grad_norm": 0.6382037997245789, "learning_rate": 8.78385090568442e-06, "loss": 1.4685, "mean_token_accuracy": 0.6526532918214798, "num_tokens": 1806141214.0, "step": 10776 }, { "entropy": 1.755267471075058, "epoch": 1.1839004696382962, "grad_norm": 0.7073934078216553, "learning_rate": 8.78229911198893e-06, "loss": 1.2182, "mean_token_accuracy": 0.6788963029781977, "num_tokens": 1806280417.0, "step": 10777 }, { "entropy": 1.6847423215707142, "epoch": 1.184010326549669, "grad_norm": 0.7584076523780823, "learning_rate": 8.780747388482678e-06, "loss": 1.2184, "mean_token_accuracy": 0.6779392212629318, "num_tokens": 1806421411.0, "step": 10778 }, { "entropy": 1.6994816462198894, "epoch": 1.184120183461042, "grad_norm": 0.6640441417694092, "learning_rate": 8.779195735214768e-06, "loss": 1.3675, "mean_token_accuracy": 0.6564560582240423, "num_tokens": 1806579038.0, "step": 10779 }, { "entropy": 1.7090040544668834, "epoch": 1.1842300403724149, "grad_norm": 0.7332303524017334, "learning_rate": 8.777644152234312e-06, "loss": 1.2549, "mean_token_accuracy": 0.6839319815238317, "num_tokens": 1806722045.0, "step": 10780 }, { "entropy": 1.7189118365446727, "epoch": 1.1843398972837877, "grad_norm": 0.6345376372337341, "learning_rate": 8.776092639590418e-06, "loss": 1.4359, "mean_token_accuracy": 0.659914493560791, "num_tokens": 1806887963.0, "step": 10781 }, { "entropy": 1.7617081105709076, "epoch": 1.1844497541951609, "grad_norm": 0.8099861741065979, "learning_rate": 8.77454119733219e-06, "loss": 1.4132, "mean_token_accuracy": 0.6468125134706497, "num_tokens": 1807042559.0, "step": 10782 }, { "entropy": 1.7302567660808563, "epoch": 1.1845596111065337, "grad_norm": 0.8026572465896606, "learning_rate": 8.77298982550873e-06, "loss": 1.5754, "mean_token_accuracy": 0.6293011705080668, "num_tokens": 1807278669.0, "step": 10783 }, { "entropy": 1.7094827393690746, "epoch": 1.1846694680179066, "grad_norm": 0.6681255102157593, "learning_rate": 8.771438524169137e-06, "loss": 1.3552, "mean_token_accuracy": 0.6640477081139883, "num_tokens": 1807442397.0, "step": 10784 }, { "entropy": 1.7145535846551259, "epoch": 1.1847793249292797, "grad_norm": 21.605440139770508, "learning_rate": 8.769887293362514e-06, "loss": 1.4412, "mean_token_accuracy": 0.6432745158672333, "num_tokens": 1807645099.0, "step": 10785 }, { "entropy": 1.7830155591169994, "epoch": 1.1848891818406526, "grad_norm": 0.6564657092094421, "learning_rate": 8.768336133137949e-06, "loss": 1.4377, "mean_token_accuracy": 0.6420264492432276, "num_tokens": 1807780953.0, "step": 10786 }, { "entropy": 1.6700923939545949, "epoch": 1.1849990387520255, "grad_norm": 0.700512707233429, "learning_rate": 8.766785043544544e-06, "loss": 1.4123, "mean_token_accuracy": 0.6515941818555196, "num_tokens": 1807943402.0, "step": 10787 }, { "entropy": 1.671025017897288, "epoch": 1.1851088956633984, "grad_norm": 0.6476449370384216, "learning_rate": 8.765234024631381e-06, "loss": 1.3315, "mean_token_accuracy": 0.6557556490103403, "num_tokens": 1808108811.0, "step": 10788 }, { "entropy": 1.7127976814905803, "epoch": 1.1852187525747713, "grad_norm": 0.542065441608429, "learning_rate": 8.763683076447558e-06, "loss": 1.5378, "mean_token_accuracy": 0.6265371342500051, "num_tokens": 1808343132.0, "step": 10789 }, { "entropy": 1.7127373119195302, "epoch": 1.1853286094861444, "grad_norm": 0.7368000745773315, "learning_rate": 8.762132199042158e-06, "loss": 1.3349, "mean_token_accuracy": 0.6565342048803965, "num_tokens": 1808552364.0, "step": 10790 }, { "entropy": 1.7234003643194835, "epoch": 1.1854384663975173, "grad_norm": 0.733325719833374, "learning_rate": 8.760581392464265e-06, "loss": 1.479, "mean_token_accuracy": 0.6574785908063253, "num_tokens": 1808736493.0, "step": 10791 }, { "entropy": 1.7298449873924255, "epoch": 1.1855483233088902, "grad_norm": 0.6609643697738647, "learning_rate": 8.759030656762961e-06, "loss": 1.5159, "mean_token_accuracy": 0.6325143476327261, "num_tokens": 1808941867.0, "step": 10792 }, { "entropy": 1.7493693828582764, "epoch": 1.185658180220263, "grad_norm": 0.7295409440994263, "learning_rate": 8.757479991987328e-06, "loss": 1.542, "mean_token_accuracy": 0.6383609374364217, "num_tokens": 1809188308.0, "step": 10793 }, { "entropy": 1.7492066224416096, "epoch": 1.185768037131636, "grad_norm": 0.7301694750785828, "learning_rate": 8.755929398186441e-06, "loss": 1.5574, "mean_token_accuracy": 0.6535097360610962, "num_tokens": 1809380493.0, "step": 10794 }, { "entropy": 1.6621710260709126, "epoch": 1.185877894043009, "grad_norm": 0.7311023473739624, "learning_rate": 8.754378875409378e-06, "loss": 1.4412, "mean_token_accuracy": 0.6498973866303762, "num_tokens": 1809561584.0, "step": 10795 }, { "entropy": 1.6603956421216328, "epoch": 1.185987750954382, "grad_norm": 0.6298139691352844, "learning_rate": 8.752828423705213e-06, "loss": 1.3381, "mean_token_accuracy": 0.6642551869153976, "num_tokens": 1809753841.0, "step": 10796 }, { "entropy": 1.7171143392721813, "epoch": 1.1860976078657548, "grad_norm": 0.635201096534729, "learning_rate": 8.751278043123015e-06, "loss": 1.4912, "mean_token_accuracy": 0.6408715645472208, "num_tokens": 1810007570.0, "step": 10797 }, { "entropy": 1.6833914419015248, "epoch": 1.186207464777128, "grad_norm": 0.6429863572120667, "learning_rate": 8.749727733711852e-06, "loss": 1.4519, "mean_token_accuracy": 0.6536713739236196, "num_tokens": 1810172296.0, "step": 10798 }, { "entropy": 1.6702220439910889, "epoch": 1.1863173216885008, "grad_norm": 0.5836479663848877, "learning_rate": 8.748177495520795e-06, "loss": 1.3426, "mean_token_accuracy": 0.6664466510216395, "num_tokens": 1810364719.0, "step": 10799 }, { "entropy": 1.6623725195725758, "epoch": 1.1864271785998737, "grad_norm": 0.7200176119804382, "learning_rate": 8.746627328598903e-06, "loss": 1.3376, "mean_token_accuracy": 0.6745273669560751, "num_tokens": 1810517478.0, "step": 10800 }, { "entropy": 1.7162803411483765, "epoch": 1.1865370355112466, "grad_norm": 0.7739757895469666, "learning_rate": 8.74507723299524e-06, "loss": 1.4352, "mean_token_accuracy": 0.636848971247673, "num_tokens": 1810687094.0, "step": 10801 }, { "entropy": 1.7077897389729817, "epoch": 1.1866468924226194, "grad_norm": 0.9581501483917236, "learning_rate": 8.74352720875887e-06, "loss": 1.3625, "mean_token_accuracy": 0.6712329884370168, "num_tokens": 1810838305.0, "step": 10802 }, { "entropy": 1.7443882822990417, "epoch": 1.1867567493339926, "grad_norm": 0.5825392007827759, "learning_rate": 8.741977255938848e-06, "loss": 1.4246, "mean_token_accuracy": 0.6382510860761007, "num_tokens": 1811036111.0, "step": 10803 }, { "entropy": 1.7159675359725952, "epoch": 1.1868666062453654, "grad_norm": 0.6169284582138062, "learning_rate": 8.740427374584225e-06, "loss": 1.353, "mean_token_accuracy": 0.650434414545695, "num_tokens": 1811216805.0, "step": 10804 }, { "entropy": 1.7108531892299652, "epoch": 1.1869764631567383, "grad_norm": 0.7137644290924072, "learning_rate": 8.73887756474406e-06, "loss": 1.3657, "mean_token_accuracy": 0.6566335658232371, "num_tokens": 1811341956.0, "step": 10805 }, { "entropy": 1.735455960035324, "epoch": 1.1870863200681112, "grad_norm": 0.5706676840782166, "learning_rate": 8.7373278264674e-06, "loss": 1.4739, "mean_token_accuracy": 0.6481334368387858, "num_tokens": 1811539451.0, "step": 10806 }, { "entropy": 1.7320310175418854, "epoch": 1.1871961769794843, "grad_norm": 0.6939385533332825, "learning_rate": 8.735778159803289e-06, "loss": 1.3383, "mean_token_accuracy": 0.6659232576688131, "num_tokens": 1811690465.0, "step": 10807 }, { "entropy": 1.6345330973466237, "epoch": 1.1873060338908572, "grad_norm": 0.6689730286598206, "learning_rate": 8.734228564800787e-06, "loss": 1.2998, "mean_token_accuracy": 0.6712810496489207, "num_tokens": 1811851641.0, "step": 10808 }, { "entropy": 1.7186749478181202, "epoch": 1.18741589080223, "grad_norm": 0.6938754916191101, "learning_rate": 8.732679041508927e-06, "loss": 1.3595, "mean_token_accuracy": 0.6612470696369807, "num_tokens": 1812004102.0, "step": 10809 }, { "entropy": 1.738366852204005, "epoch": 1.187525747713603, "grad_norm": 0.6082279682159424, "learning_rate": 8.731129589976752e-06, "loss": 1.3528, "mean_token_accuracy": 0.661902000506719, "num_tokens": 1812140283.0, "step": 10810 }, { "entropy": 1.73800332347552, "epoch": 1.187635604624976, "grad_norm": 0.7404204607009888, "learning_rate": 8.729580210253307e-06, "loss": 1.474, "mean_token_accuracy": 0.6457099169492722, "num_tokens": 1812288672.0, "step": 10811 }, { "entropy": 1.7052789727846782, "epoch": 1.187745461536349, "grad_norm": 0.6835205554962158, "learning_rate": 8.728030902387623e-06, "loss": 1.4069, "mean_token_accuracy": 0.661319280664126, "num_tokens": 1812489937.0, "step": 10812 }, { "entropy": 1.6847817699114482, "epoch": 1.1878553184477219, "grad_norm": 0.6975307464599609, "learning_rate": 8.726481666428735e-06, "loss": 1.5141, "mean_token_accuracy": 0.6451181322336197, "num_tokens": 1812717330.0, "step": 10813 }, { "entropy": 1.7484122415383656, "epoch": 1.1879651753590947, "grad_norm": 0.8225982189178467, "learning_rate": 8.724932502425681e-06, "loss": 1.3702, "mean_token_accuracy": 0.6497304985920588, "num_tokens": 1812917091.0, "step": 10814 }, { "entropy": 1.6711904605229695, "epoch": 1.1880750322704676, "grad_norm": 0.5482514500617981, "learning_rate": 8.723383410427486e-06, "loss": 1.4879, "mean_token_accuracy": 0.6315444807211558, "num_tokens": 1813125811.0, "step": 10815 }, { "entropy": 1.7230869730313618, "epoch": 1.1881848891818407, "grad_norm": 0.818645179271698, "learning_rate": 8.721834390483181e-06, "loss": 1.4077, "mean_token_accuracy": 0.6621546596288681, "num_tokens": 1813307367.0, "step": 10816 }, { "entropy": 1.682017187277476, "epoch": 1.1882947460932136, "grad_norm": 0.6782887578010559, "learning_rate": 8.720285442641794e-06, "loss": 1.5252, "mean_token_accuracy": 0.6403040736913681, "num_tokens": 1813517516.0, "step": 10817 }, { "entropy": 1.6661075949668884, "epoch": 1.1884046030045865, "grad_norm": 0.6994887590408325, "learning_rate": 8.718736566952342e-06, "loss": 1.3352, "mean_token_accuracy": 0.6600988954305649, "num_tokens": 1813674638.0, "step": 10818 }, { "entropy": 1.7056255837281544, "epoch": 1.1885144599159594, "grad_norm": 0.6169335246086121, "learning_rate": 8.717187763463848e-06, "loss": 1.4025, "mean_token_accuracy": 0.6571420232454935, "num_tokens": 1813822167.0, "step": 10819 }, { "entropy": 1.709171086549759, "epoch": 1.1886243168273325, "grad_norm": 0.6775344610214233, "learning_rate": 8.715639032225338e-06, "loss": 1.4238, "mean_token_accuracy": 0.6446866790453593, "num_tokens": 1813991064.0, "step": 10820 }, { "entropy": 1.6793767909208934, "epoch": 1.1887341737387054, "grad_norm": 0.791778564453125, "learning_rate": 8.71409037328582e-06, "loss": 1.489, "mean_token_accuracy": 0.6477701465288798, "num_tokens": 1814176897.0, "step": 10821 }, { "entropy": 1.7169764240582783, "epoch": 1.1888440306500783, "grad_norm": 0.6778224110603333, "learning_rate": 8.71254178669431e-06, "loss": 1.5503, "mean_token_accuracy": 0.6353256702423096, "num_tokens": 1814358467.0, "step": 10822 }, { "entropy": 1.6720323065916698, "epoch": 1.1889538875614512, "grad_norm": 0.6832537055015564, "learning_rate": 8.710993272499826e-06, "loss": 1.2303, "mean_token_accuracy": 0.677433043718338, "num_tokens": 1814480540.0, "step": 10823 }, { "entropy": 1.7010047535101573, "epoch": 1.1890637444728243, "grad_norm": 0.8217154145240784, "learning_rate": 8.70944483075137e-06, "loss": 1.338, "mean_token_accuracy": 0.6667589843273163, "num_tokens": 1814617055.0, "step": 10824 }, { "entropy": 1.7079274654388428, "epoch": 1.1891736013841971, "grad_norm": 0.8178585767745972, "learning_rate": 8.707896461497957e-06, "loss": 1.3209, "mean_token_accuracy": 0.6651990612347921, "num_tokens": 1814759656.0, "step": 10825 }, { "entropy": 1.690351406733195, "epoch": 1.18928345829557, "grad_norm": 0.6807016134262085, "learning_rate": 8.706348164788582e-06, "loss": 1.4074, "mean_token_accuracy": 0.6610402117172877, "num_tokens": 1814904145.0, "step": 10826 }, { "entropy": 1.7773006558418274, "epoch": 1.189393315206943, "grad_norm": 0.8337060213088989, "learning_rate": 8.704799940672257e-06, "loss": 1.3194, "mean_token_accuracy": 0.668373758594195, "num_tokens": 1815086239.0, "step": 10827 }, { "entropy": 1.6671875913937886, "epoch": 1.1895031721183158, "grad_norm": 0.7558709383010864, "learning_rate": 8.703251789197981e-06, "loss": 1.4228, "mean_token_accuracy": 0.6599519302447637, "num_tokens": 1815233304.0, "step": 10828 }, { "entropy": 1.6764814754327138, "epoch": 1.189613029029689, "grad_norm": 0.648366391658783, "learning_rate": 8.701703710414752e-06, "loss": 1.2463, "mean_token_accuracy": 0.6833883871634802, "num_tokens": 1815365343.0, "step": 10829 }, { "entropy": 1.6759169201056163, "epoch": 1.1897228859410618, "grad_norm": 0.7814769744873047, "learning_rate": 8.700155704371562e-06, "loss": 1.4332, "mean_token_accuracy": 0.6664823815226555, "num_tokens": 1815511637.0, "step": 10830 }, { "entropy": 1.7373320559660594, "epoch": 1.1898327428524347, "grad_norm": 0.8521638512611389, "learning_rate": 8.698607771117408e-06, "loss": 1.4448, "mean_token_accuracy": 0.652740036447843, "num_tokens": 1815650747.0, "step": 10831 }, { "entropy": 1.686434547106425, "epoch": 1.1899425997638076, "grad_norm": 0.707066535949707, "learning_rate": 8.697059910701283e-06, "loss": 1.1549, "mean_token_accuracy": 0.6953272720177969, "num_tokens": 1815758439.0, "step": 10832 }, { "entropy": 1.7592855592568715, "epoch": 1.1900524566751807, "grad_norm": 0.6283326745033264, "learning_rate": 8.69551212317217e-06, "loss": 1.3828, "mean_token_accuracy": 0.6532629181941351, "num_tokens": 1815916712.0, "step": 10833 }, { "entropy": 1.6812595228354137, "epoch": 1.1901623135865536, "grad_norm": 0.887874960899353, "learning_rate": 8.693964408579063e-06, "loss": 1.3895, "mean_token_accuracy": 0.6594204902648926, "num_tokens": 1816075205.0, "step": 10834 }, { "entropy": 1.6642636756102245, "epoch": 1.1902721704979264, "grad_norm": 0.6853379011154175, "learning_rate": 8.692416766970943e-06, "loss": 1.3377, "mean_token_accuracy": 0.6647604952255884, "num_tokens": 1816224025.0, "step": 10835 }, { "entropy": 1.7936367491881053, "epoch": 1.1903820274092993, "grad_norm": 0.7250938415527344, "learning_rate": 8.690869198396792e-06, "loss": 1.4598, "mean_token_accuracy": 0.6410937756299973, "num_tokens": 1816370800.0, "step": 10836 }, { "entropy": 1.7596316039562225, "epoch": 1.1904918843206724, "grad_norm": 0.7456021308898926, "learning_rate": 8.689321702905593e-06, "loss": 1.4467, "mean_token_accuracy": 0.6487318376700083, "num_tokens": 1816518599.0, "step": 10837 }, { "entropy": 1.7355043093363445, "epoch": 1.1906017412320453, "grad_norm": 0.5878375768661499, "learning_rate": 8.687774280546317e-06, "loss": 1.5659, "mean_token_accuracy": 0.6355293492476145, "num_tokens": 1816824813.0, "step": 10838 }, { "entropy": 1.6184170246124268, "epoch": 1.1907115981434182, "grad_norm": 0.7229267954826355, "learning_rate": 8.686226931367943e-06, "loss": 1.355, "mean_token_accuracy": 0.6660072356462479, "num_tokens": 1816987791.0, "step": 10839 }, { "entropy": 1.731922020514806, "epoch": 1.190821455054791, "grad_norm": 0.6348045468330383, "learning_rate": 8.684679655419445e-06, "loss": 1.4086, "mean_token_accuracy": 0.6459181507428488, "num_tokens": 1817155835.0, "step": 10840 }, { "entropy": 1.7074114779631298, "epoch": 1.190931311966164, "grad_norm": 10.530064582824707, "learning_rate": 8.683132452749796e-06, "loss": 1.5041, "mean_token_accuracy": 0.6442484011252722, "num_tokens": 1817336230.0, "step": 10841 }, { "entropy": 1.6497264802455902, "epoch": 1.191041168877537, "grad_norm": 0.6446982622146606, "learning_rate": 8.681585323407958e-06, "loss": 1.5598, "mean_token_accuracy": 0.6426790108283361, "num_tokens": 1817608365.0, "step": 10842 }, { "entropy": 1.7166444063186646, "epoch": 1.19115102578891, "grad_norm": 0.6891461610794067, "learning_rate": 8.6800382674429e-06, "loss": 1.5312, "mean_token_accuracy": 0.6481931606928507, "num_tokens": 1817825491.0, "step": 10843 }, { "entropy": 1.7337975700696309, "epoch": 1.1912608827002829, "grad_norm": 0.6657007932662964, "learning_rate": 8.678491284903583e-06, "loss": 1.4386, "mean_token_accuracy": 0.6465141177177429, "num_tokens": 1817977223.0, "step": 10844 }, { "entropy": 1.6892333626747131, "epoch": 1.1913707396116557, "grad_norm": 0.6128289103507996, "learning_rate": 8.676944375838973e-06, "loss": 1.2792, "mean_token_accuracy": 0.6714215278625488, "num_tokens": 1818149277.0, "step": 10845 }, { "entropy": 1.597786416610082, "epoch": 1.1914805965230288, "grad_norm": 0.6063182950019836, "learning_rate": 8.67539754029803e-06, "loss": 1.4619, "mean_token_accuracy": 0.6459900289773941, "num_tokens": 1818347912.0, "step": 10846 }, { "entropy": 1.6953892509142559, "epoch": 1.1915904534344017, "grad_norm": 0.8109437823295593, "learning_rate": 8.673850778329702e-06, "loss": 1.4544, "mean_token_accuracy": 0.6425779660542806, "num_tokens": 1818571841.0, "step": 10847 }, { "entropy": 1.655045618613561, "epoch": 1.1917003103457746, "grad_norm": 0.6422619819641113, "learning_rate": 8.67230408998295e-06, "loss": 1.3572, "mean_token_accuracy": 0.6548277189334234, "num_tokens": 1818751000.0, "step": 10848 }, { "entropy": 1.6748607456684113, "epoch": 1.1918101672571475, "grad_norm": 0.753288984298706, "learning_rate": 8.670757475306728e-06, "loss": 1.3551, "mean_token_accuracy": 0.6647098064422607, "num_tokens": 1818937047.0, "step": 10849 }, { "entropy": 1.70504829287529, "epoch": 1.1919200241685206, "grad_norm": 0.5776710510253906, "learning_rate": 8.669210934349978e-06, "loss": 1.4304, "mean_token_accuracy": 0.6487905929485956, "num_tokens": 1819120691.0, "step": 10850 }, { "entropy": 1.6992063224315643, "epoch": 1.1920298810798935, "grad_norm": 0.9151628017425537, "learning_rate": 8.667664467161652e-06, "loss": 1.4308, "mean_token_accuracy": 0.6610411157210668, "num_tokens": 1819289227.0, "step": 10851 }, { "entropy": 1.6430395245552063, "epoch": 1.1921397379912664, "grad_norm": 0.7337287068367004, "learning_rate": 8.666118073790699e-06, "loss": 1.4605, "mean_token_accuracy": 0.652332549293836, "num_tokens": 1819495147.0, "step": 10852 }, { "entropy": 1.698825587828954, "epoch": 1.1922495949026393, "grad_norm": 0.75420743227005, "learning_rate": 8.664571754286052e-06, "loss": 1.4167, "mean_token_accuracy": 0.6623470187187195, "num_tokens": 1819635916.0, "step": 10853 }, { "entropy": 1.6793596645196278, "epoch": 1.1923594518140122, "grad_norm": 0.618486225605011, "learning_rate": 8.663025508696658e-06, "loss": 1.3148, "mean_token_accuracy": 0.6688097268342972, "num_tokens": 1819786330.0, "step": 10854 }, { "entropy": 1.6689561307430267, "epoch": 1.1924693087253853, "grad_norm": 0.7865815758705139, "learning_rate": 8.661479337071458e-06, "loss": 1.3624, "mean_token_accuracy": 0.6614319185415903, "num_tokens": 1819922056.0, "step": 10855 }, { "entropy": 1.691734939813614, "epoch": 1.1925791656367581, "grad_norm": 0.7773484587669373, "learning_rate": 8.659933239459377e-06, "loss": 1.428, "mean_token_accuracy": 0.6591572364171346, "num_tokens": 1820169282.0, "step": 10856 }, { "entropy": 1.7865646183490753, "epoch": 1.192689022548131, "grad_norm": 0.7435487508773804, "learning_rate": 8.658387215909358e-06, "loss": 1.3392, "mean_token_accuracy": 0.6749976028998693, "num_tokens": 1820290334.0, "step": 10857 }, { "entropy": 1.6983545819918315, "epoch": 1.192798879459504, "grad_norm": 0.6907163262367249, "learning_rate": 8.656841266470328e-06, "loss": 1.2468, "mean_token_accuracy": 0.6775921235481898, "num_tokens": 1820415779.0, "step": 10858 }, { "entropy": 1.6848465104897816, "epoch": 1.192908736370877, "grad_norm": 0.6214163303375244, "learning_rate": 8.65529539119122e-06, "loss": 1.345, "mean_token_accuracy": 0.6662961939970652, "num_tokens": 1820575417.0, "step": 10859 }, { "entropy": 1.7312945226828258, "epoch": 1.19301859328225, "grad_norm": 0.5840948224067688, "learning_rate": 8.65374959012095e-06, "loss": 1.541, "mean_token_accuracy": 0.6397745758295059, "num_tokens": 1820797132.0, "step": 10860 }, { "entropy": 1.7128262619177501, "epoch": 1.1931284501936228, "grad_norm": 0.6750525832176208, "learning_rate": 8.65220386330845e-06, "loss": 1.3281, "mean_token_accuracy": 0.6626399159431458, "num_tokens": 1820916118.0, "step": 10861 }, { "entropy": 1.7520559827486675, "epoch": 1.1932383071049957, "grad_norm": 0.5905542969703674, "learning_rate": 8.650658210802638e-06, "loss": 1.4636, "mean_token_accuracy": 0.6265908926725388, "num_tokens": 1821134408.0, "step": 10862 }, { "entropy": 1.6462377607822418, "epoch": 1.1933481640163688, "grad_norm": 0.7132760286331177, "learning_rate": 8.649112632652436e-06, "loss": 1.3858, "mean_token_accuracy": 0.6677844027678171, "num_tokens": 1821314158.0, "step": 10863 }, { "entropy": 1.6968292494614918, "epoch": 1.1934580209277417, "grad_norm": 0.6396412253379822, "learning_rate": 8.647567128906764e-06, "loss": 1.3326, "mean_token_accuracy": 0.6573519359032313, "num_tokens": 1821452147.0, "step": 10864 }, { "entropy": 1.7002749343713124, "epoch": 1.1935678778391146, "grad_norm": 0.5961291790008545, "learning_rate": 8.646021699614529e-06, "loss": 1.4085, "mean_token_accuracy": 0.6624472538630167, "num_tokens": 1821639995.0, "step": 10865 }, { "entropy": 1.7443738182385762, "epoch": 1.1936777347504874, "grad_norm": 0.6922990679740906, "learning_rate": 8.644476344824646e-06, "loss": 1.3906, "mean_token_accuracy": 0.6587434560060501, "num_tokens": 1821779295.0, "step": 10866 }, { "entropy": 1.6607798635959625, "epoch": 1.1937875916618603, "grad_norm": 0.5818439722061157, "learning_rate": 8.642931064586028e-06, "loss": 1.308, "mean_token_accuracy": 0.6683350056409836, "num_tokens": 1821913214.0, "step": 10867 }, { "entropy": 1.7286332647005718, "epoch": 1.1938974485732334, "grad_norm": 0.7446157336235046, "learning_rate": 8.641385858947576e-06, "loss": 1.4779, "mean_token_accuracy": 0.6418144504229227, "num_tokens": 1822091301.0, "step": 10868 }, { "entropy": 1.748667687177658, "epoch": 1.1940073054846063, "grad_norm": 0.7008844017982483, "learning_rate": 8.6398407279582e-06, "loss": 1.5479, "mean_token_accuracy": 0.6476845939954122, "num_tokens": 1822281478.0, "step": 10869 }, { "entropy": 1.7366498708724976, "epoch": 1.1941171623959792, "grad_norm": 0.7748090028762817, "learning_rate": 8.638295671666803e-06, "loss": 1.471, "mean_token_accuracy": 0.6507512678702673, "num_tokens": 1822443339.0, "step": 10870 }, { "entropy": 1.730059305826823, "epoch": 1.194227019307352, "grad_norm": 0.675847053527832, "learning_rate": 8.636750690122282e-06, "loss": 1.4335, "mean_token_accuracy": 0.6394089609384537, "num_tokens": 1822638722.0, "step": 10871 }, { "entropy": 1.6546126703421276, "epoch": 1.1943368762187252, "grad_norm": 0.5662322640419006, "learning_rate": 8.63520578337354e-06, "loss": 1.4372, "mean_token_accuracy": 0.6361754983663559, "num_tokens": 1822865064.0, "step": 10872 }, { "entropy": 1.701521893342336, "epoch": 1.194446733130098, "grad_norm": 0.6960839033126831, "learning_rate": 8.633660951469468e-06, "loss": 1.4204, "mean_token_accuracy": 0.6519134740034739, "num_tokens": 1823055053.0, "step": 10873 }, { "entropy": 1.6866462131341298, "epoch": 1.194556590041471, "grad_norm": 0.6791787147521973, "learning_rate": 8.632116194458955e-06, "loss": 1.3331, "mean_token_accuracy": 0.6622498879830042, "num_tokens": 1823224555.0, "step": 10874 }, { "entropy": 1.7383518815040588, "epoch": 1.1946664469528439, "grad_norm": 0.7168798446655273, "learning_rate": 8.630571512390901e-06, "loss": 1.4732, "mean_token_accuracy": 0.6602436949809393, "num_tokens": 1823381692.0, "step": 10875 }, { "entropy": 1.7333435515562694, "epoch": 1.194776303864217, "grad_norm": 0.6332979798316956, "learning_rate": 8.629026905314195e-06, "loss": 1.4628, "mean_token_accuracy": 0.6372295717398325, "num_tokens": 1823557246.0, "step": 10876 }, { "entropy": 1.7318945527076721, "epoch": 1.1948861607755898, "grad_norm": 0.7273834943771362, "learning_rate": 8.627482373277715e-06, "loss": 1.5831, "mean_token_accuracy": 0.6298131893078486, "num_tokens": 1823721277.0, "step": 10877 }, { "entropy": 1.689674695332845, "epoch": 1.1949960176869627, "grad_norm": 0.6805070042610168, "learning_rate": 8.625937916330349e-06, "loss": 1.2654, "mean_token_accuracy": 0.6795346190532049, "num_tokens": 1823846743.0, "step": 10878 }, { "entropy": 1.6561415096124013, "epoch": 1.1951058745983356, "grad_norm": 0.6901777386665344, "learning_rate": 8.62439353452098e-06, "loss": 1.3924, "mean_token_accuracy": 0.6656059821446737, "num_tokens": 1824066791.0, "step": 10879 }, { "entropy": 1.6609856188297272, "epoch": 1.1952157315097085, "grad_norm": 0.6951460242271423, "learning_rate": 8.622849227898484e-06, "loss": 1.202, "mean_token_accuracy": 0.6859797437985738, "num_tokens": 1824221799.0, "step": 10880 }, { "entropy": 1.6828400393327076, "epoch": 1.1953255884210816, "grad_norm": 0.8013219237327576, "learning_rate": 8.621304996511737e-06, "loss": 1.5402, "mean_token_accuracy": 0.6594565212726593, "num_tokens": 1824404281.0, "step": 10881 }, { "entropy": 1.758839060862859, "epoch": 1.1954354453324545, "grad_norm": 0.8230046629905701, "learning_rate": 8.61976084040962e-06, "loss": 1.3892, "mean_token_accuracy": 0.6508858899275461, "num_tokens": 1824542346.0, "step": 10882 }, { "entropy": 1.7173049648602803, "epoch": 1.1955453022438274, "grad_norm": 0.6363534331321716, "learning_rate": 8.618216759640994e-06, "loss": 1.5549, "mean_token_accuracy": 0.628744641939799, "num_tokens": 1824762577.0, "step": 10883 }, { "entropy": 1.678319166103999, "epoch": 1.1956551591552003, "grad_norm": 0.7358280420303345, "learning_rate": 8.616672754254738e-06, "loss": 1.428, "mean_token_accuracy": 0.6569743702809016, "num_tokens": 1824896107.0, "step": 10884 }, { "entropy": 1.6763150095939636, "epoch": 1.1957650160665734, "grad_norm": 0.8936296701431274, "learning_rate": 8.615128824299716e-06, "loss": 1.4788, "mean_token_accuracy": 0.6430085202058157, "num_tokens": 1825169621.0, "step": 10885 }, { "entropy": 1.7438491185506184, "epoch": 1.1958748729779463, "grad_norm": 0.6828886866569519, "learning_rate": 8.613584969824789e-06, "loss": 1.5277, "mean_token_accuracy": 0.6488937735557556, "num_tokens": 1825324021.0, "step": 10886 }, { "entropy": 1.7129732171694438, "epoch": 1.1959847298893191, "grad_norm": 0.8133248090744019, "learning_rate": 8.612041190878826e-06, "loss": 1.3015, "mean_token_accuracy": 0.672540470957756, "num_tokens": 1825460259.0, "step": 10887 }, { "entropy": 1.6727862358093262, "epoch": 1.196094586800692, "grad_norm": 0.6648197174072266, "learning_rate": 8.610497487510679e-06, "loss": 1.414, "mean_token_accuracy": 0.6671945502360662, "num_tokens": 1825626619.0, "step": 10888 }, { "entropy": 1.7124856114387512, "epoch": 1.1962044437120651, "grad_norm": 0.8533644080162048, "learning_rate": 8.60895385976921e-06, "loss": 1.4938, "mean_token_accuracy": 0.652462845047315, "num_tokens": 1825775596.0, "step": 10889 }, { "entropy": 1.6300967534383137, "epoch": 1.196314300623438, "grad_norm": 0.697281002998352, "learning_rate": 8.607410307703279e-06, "loss": 1.3898, "mean_token_accuracy": 0.6632688790559769, "num_tokens": 1825950828.0, "step": 10890 }, { "entropy": 1.6891121864318848, "epoch": 1.196424157534811, "grad_norm": 0.7355936169624329, "learning_rate": 8.605866831361729e-06, "loss": 1.572, "mean_token_accuracy": 0.6447887768348058, "num_tokens": 1826134511.0, "step": 10891 }, { "entropy": 1.7174355785051982, "epoch": 1.1965340144461838, "grad_norm": 0.6898308992385864, "learning_rate": 8.604323430793416e-06, "loss": 1.4689, "mean_token_accuracy": 0.6526208321253458, "num_tokens": 1826298089.0, "step": 10892 }, { "entropy": 1.7147394319375355, "epoch": 1.1966438713575567, "grad_norm": 0.8916130661964417, "learning_rate": 8.602780106047189e-06, "loss": 1.364, "mean_token_accuracy": 0.6715402801831564, "num_tokens": 1826432931.0, "step": 10893 }, { "entropy": 1.6138789653778076, "epoch": 1.1967537282689298, "grad_norm": 0.7221713662147522, "learning_rate": 8.60123685717189e-06, "loss": 1.4328, "mean_token_accuracy": 0.6576006362835566, "num_tokens": 1826627859.0, "step": 10894 }, { "entropy": 1.7023044029871623, "epoch": 1.1968635851803027, "grad_norm": 0.571751594543457, "learning_rate": 8.59969368421636e-06, "loss": 1.3235, "mean_token_accuracy": 0.6696832726399103, "num_tokens": 1826792129.0, "step": 10895 }, { "entropy": 1.7759801348050435, "epoch": 1.1969734420916756, "grad_norm": 0.738571286201477, "learning_rate": 8.598150587229448e-06, "loss": 1.4592, "mean_token_accuracy": 0.6435786783695221, "num_tokens": 1826939218.0, "step": 10896 }, { "entropy": 1.6735620200634003, "epoch": 1.1970832990030484, "grad_norm": 0.6554346680641174, "learning_rate": 8.596607566259986e-06, "loss": 1.4253, "mean_token_accuracy": 0.6584400484959284, "num_tokens": 1827121356.0, "step": 10897 }, { "entropy": 1.6918166776498158, "epoch": 1.1971931559144215, "grad_norm": 0.7005612254142761, "learning_rate": 8.595064621356812e-06, "loss": 1.3349, "mean_token_accuracy": 0.6674779852231344, "num_tokens": 1827261219.0, "step": 10898 }, { "entropy": 1.6832281549771626, "epoch": 1.1973030128257944, "grad_norm": 0.732524573802948, "learning_rate": 8.593521752568759e-06, "loss": 1.3192, "mean_token_accuracy": 0.6616079111893972, "num_tokens": 1827424352.0, "step": 10899 }, { "entropy": 1.7052031954129536, "epoch": 1.1974128697371673, "grad_norm": 0.7440763115882874, "learning_rate": 8.591978959944657e-06, "loss": 1.2866, "mean_token_accuracy": 0.6621012737353643, "num_tokens": 1827566352.0, "step": 10900 }, { "entropy": 1.748506526152293, "epoch": 1.1975227266485402, "grad_norm": 0.6760443449020386, "learning_rate": 8.590436243533336e-06, "loss": 1.3757, "mean_token_accuracy": 0.6591590344905853, "num_tokens": 1827705988.0, "step": 10901 }, { "entropy": 1.7245979209740956, "epoch": 1.1976325835599133, "grad_norm": 0.6143633127212524, "learning_rate": 8.588893603383623e-06, "loss": 1.5103, "mean_token_accuracy": 0.6388898193836212, "num_tokens": 1827921089.0, "step": 10902 }, { "entropy": 1.6794546246528625, "epoch": 1.1977424404712862, "grad_norm": 0.6420578956604004, "learning_rate": 8.58735103954434e-06, "loss": 1.5082, "mean_token_accuracy": 0.641454761226972, "num_tokens": 1828112111.0, "step": 10903 }, { "entropy": 1.70048584540685, "epoch": 1.197852297382659, "grad_norm": 0.6062077879905701, "learning_rate": 8.585808552064312e-06, "loss": 1.3617, "mean_token_accuracy": 0.6477002501487732, "num_tokens": 1828253988.0, "step": 10904 }, { "entropy": 1.6546235779921215, "epoch": 1.197962154294032, "grad_norm": 0.6344867944717407, "learning_rate": 8.584266140992355e-06, "loss": 1.4448, "mean_token_accuracy": 0.6534637212753296, "num_tokens": 1828444002.0, "step": 10905 }, { "entropy": 1.6595034301280975, "epoch": 1.1980720112054049, "grad_norm": 0.7396848797798157, "learning_rate": 8.582723806377281e-06, "loss": 1.1545, "mean_token_accuracy": 0.6930899421374003, "num_tokens": 1828558474.0, "step": 10906 }, { "entropy": 1.7764694193998973, "epoch": 1.198181868116778, "grad_norm": 0.7311699390411377, "learning_rate": 8.581181548267914e-06, "loss": 1.4681, "mean_token_accuracy": 0.647409662604332, "num_tokens": 1828672601.0, "step": 10907 }, { "entropy": 1.778613011042277, "epoch": 1.1982917250281508, "grad_norm": 0.8004505634307861, "learning_rate": 8.579639366713062e-06, "loss": 1.4917, "mean_token_accuracy": 0.6318371693293253, "num_tokens": 1828867425.0, "step": 10908 }, { "entropy": 1.7501141329606373, "epoch": 1.1984015819395237, "grad_norm": 0.8574265241622925, "learning_rate": 8.578097261761531e-06, "loss": 1.3178, "mean_token_accuracy": 0.6586999098459879, "num_tokens": 1829025448.0, "step": 10909 }, { "entropy": 1.7619508107503254, "epoch": 1.1985114388508966, "grad_norm": 0.7897709608078003, "learning_rate": 8.57655523346213e-06, "loss": 1.5334, "mean_token_accuracy": 0.6314461479584376, "num_tokens": 1829224005.0, "step": 10910 }, { "entropy": 1.717555691798528, "epoch": 1.1986212957622697, "grad_norm": 0.6715591549873352, "learning_rate": 8.575013281863666e-06, "loss": 1.3903, "mean_token_accuracy": 0.6613827695449194, "num_tokens": 1829394320.0, "step": 10911 }, { "entropy": 1.7155894537766774, "epoch": 1.1987311526736426, "grad_norm": 0.8104733228683472, "learning_rate": 8.573471407014934e-06, "loss": 1.3106, "mean_token_accuracy": 0.663354347149531, "num_tokens": 1829539454.0, "step": 10912 }, { "entropy": 1.6335892776648204, "epoch": 1.1988410095850155, "grad_norm": 0.6717244386672974, "learning_rate": 8.571929608964743e-06, "loss": 1.1869, "mean_token_accuracy": 0.6866246312856674, "num_tokens": 1829642311.0, "step": 10913 }, { "entropy": 1.7017800013224285, "epoch": 1.1989508664963884, "grad_norm": 0.5946372151374817, "learning_rate": 8.570387887761886e-06, "loss": 1.4284, "mean_token_accuracy": 0.6471086144447327, "num_tokens": 1829869402.0, "step": 10914 }, { "entropy": 1.6842081248760223, "epoch": 1.1990607234077615, "grad_norm": 0.6334558725357056, "learning_rate": 8.568846243455156e-06, "loss": 1.3793, "mean_token_accuracy": 0.6581207563479742, "num_tokens": 1830025157.0, "step": 10915 }, { "entropy": 1.7087959746519725, "epoch": 1.1991705803191344, "grad_norm": 0.6897690296173096, "learning_rate": 8.56730467609335e-06, "loss": 1.499, "mean_token_accuracy": 0.6366796096165975, "num_tokens": 1830195791.0, "step": 10916 }, { "entropy": 1.6709490915139515, "epoch": 1.1992804372305073, "grad_norm": 0.633358895778656, "learning_rate": 8.56576318572525e-06, "loss": 1.2466, "mean_token_accuracy": 0.6887932568788528, "num_tokens": 1830355273.0, "step": 10917 }, { "entropy": 1.696038504441579, "epoch": 1.1993902941418801, "grad_norm": 0.8158591985702515, "learning_rate": 8.564221772399649e-06, "loss": 1.5133, "mean_token_accuracy": 0.6450046946605047, "num_tokens": 1830511215.0, "step": 10918 }, { "entropy": 1.7114079197247822, "epoch": 1.199500151053253, "grad_norm": 0.6601821780204773, "learning_rate": 8.562680436165334e-06, "loss": 1.4599, "mean_token_accuracy": 0.6562004834413528, "num_tokens": 1830664540.0, "step": 10919 }, { "entropy": 1.636279861132304, "epoch": 1.1996100079646261, "grad_norm": 0.6178733110427856, "learning_rate": 8.561139177071082e-06, "loss": 1.3889, "mean_token_accuracy": 0.6608523726463318, "num_tokens": 1830824816.0, "step": 10920 }, { "entropy": 1.695862223704656, "epoch": 1.199719864875999, "grad_norm": 0.7451301217079163, "learning_rate": 8.559597995165678e-06, "loss": 1.3195, "mean_token_accuracy": 0.6666155556837717, "num_tokens": 1830947228.0, "step": 10921 }, { "entropy": 1.7582411766052246, "epoch": 1.199829721787372, "grad_norm": 0.864019513130188, "learning_rate": 8.558056890497897e-06, "loss": 1.3974, "mean_token_accuracy": 0.6508052796125412, "num_tokens": 1831092466.0, "step": 10922 }, { "entropy": 1.7041733066240947, "epoch": 1.1999395786987448, "grad_norm": 0.6823435425758362, "learning_rate": 8.556515863116518e-06, "loss": 1.2998, "mean_token_accuracy": 0.6597320288419724, "num_tokens": 1831280239.0, "step": 10923 }, { "entropy": 1.7025948067506154, "epoch": 1.200049435610118, "grad_norm": 0.6404684782028198, "learning_rate": 8.554974913070306e-06, "loss": 1.4125, "mean_token_accuracy": 0.6431457748015722, "num_tokens": 1831481003.0, "step": 10924 }, { "entropy": 1.7434356113274891, "epoch": 1.2001592925214908, "grad_norm": 0.769716203212738, "learning_rate": 8.553434040408037e-06, "loss": 1.4012, "mean_token_accuracy": 0.6592916697263718, "num_tokens": 1831679505.0, "step": 10925 }, { "entropy": 1.7073861261208851, "epoch": 1.2002691494328637, "grad_norm": 0.6649128198623657, "learning_rate": 8.551893245178482e-06, "loss": 1.359, "mean_token_accuracy": 0.6671257416407267, "num_tokens": 1831833327.0, "step": 10926 }, { "entropy": 1.693003276983897, "epoch": 1.2003790063442366, "grad_norm": 0.6499382257461548, "learning_rate": 8.550352527430402e-06, "loss": 1.4374, "mean_token_accuracy": 0.6594889660676321, "num_tokens": 1832003734.0, "step": 10927 }, { "entropy": 1.644775668780009, "epoch": 1.2004888632556097, "grad_norm": 0.618766725063324, "learning_rate": 8.548811887212558e-06, "loss": 1.495, "mean_token_accuracy": 0.655649391313394, "num_tokens": 1832145698.0, "step": 10928 }, { "entropy": 1.7419310013453166, "epoch": 1.2005987201669825, "grad_norm": 0.7380454540252686, "learning_rate": 8.547271324573716e-06, "loss": 1.4547, "mean_token_accuracy": 0.6507051835457484, "num_tokens": 1832300473.0, "step": 10929 }, { "entropy": 1.7053045133749645, "epoch": 1.2007085770783554, "grad_norm": 0.6194471716880798, "learning_rate": 8.545730839562627e-06, "loss": 1.4298, "mean_token_accuracy": 0.648463194568952, "num_tokens": 1832468480.0, "step": 10930 }, { "entropy": 1.7070962289969127, "epoch": 1.2008184339897283, "grad_norm": 0.7254568934440613, "learning_rate": 8.544190432228053e-06, "loss": 1.3819, "mean_token_accuracy": 0.6639789591232935, "num_tokens": 1832639575.0, "step": 10931 }, { "entropy": 1.7318992813428242, "epoch": 1.2009282909011012, "grad_norm": 0.7872775197029114, "learning_rate": 8.542650102618748e-06, "loss": 1.3596, "mean_token_accuracy": 0.6584235628445944, "num_tokens": 1832820036.0, "step": 10932 }, { "entropy": 1.6651087601979573, "epoch": 1.2010381478124743, "grad_norm": 0.6679090857505798, "learning_rate": 8.541109850783458e-06, "loss": 1.3423, "mean_token_accuracy": 0.6579601069291433, "num_tokens": 1833005066.0, "step": 10933 }, { "entropy": 1.691778947909673, "epoch": 1.2011480047238472, "grad_norm": 0.6940400004386902, "learning_rate": 8.539569676770931e-06, "loss": 1.2484, "mean_token_accuracy": 0.6750961343447367, "num_tokens": 1833137014.0, "step": 10934 }, { "entropy": 1.754847486813863, "epoch": 1.20125786163522, "grad_norm": 0.788187563419342, "learning_rate": 8.53802958062992e-06, "loss": 1.3827, "mean_token_accuracy": 0.6513949334621429, "num_tokens": 1833284638.0, "step": 10935 }, { "entropy": 1.7428893844286601, "epoch": 1.201367718546593, "grad_norm": 0.6965903043746948, "learning_rate": 8.536489562409159e-06, "loss": 1.6019, "mean_token_accuracy": 0.627113069097201, "num_tokens": 1833452637.0, "step": 10936 }, { "entropy": 1.7226960361003876, "epoch": 1.201477575457966, "grad_norm": 0.7512861490249634, "learning_rate": 8.534949622157393e-06, "loss": 1.5185, "mean_token_accuracy": 0.6288545529047648, "num_tokens": 1833649388.0, "step": 10937 }, { "entropy": 1.711164077123006, "epoch": 1.201587432369339, "grad_norm": 0.7107270359992981, "learning_rate": 8.533409759923364e-06, "loss": 1.3231, "mean_token_accuracy": 0.6556845357020696, "num_tokens": 1833816986.0, "step": 10938 }, { "entropy": 1.7271955609321594, "epoch": 1.2016972892807118, "grad_norm": 0.6369715929031372, "learning_rate": 8.531869975755803e-06, "loss": 1.398, "mean_token_accuracy": 0.6558120846748352, "num_tokens": 1833950907.0, "step": 10939 }, { "entropy": 1.6842861076196034, "epoch": 1.2018071461920847, "grad_norm": 0.6507421135902405, "learning_rate": 8.530330269703445e-06, "loss": 1.2904, "mean_token_accuracy": 0.6633835931619009, "num_tokens": 1834127190.0, "step": 10940 }, { "entropy": 1.6939348876476288, "epoch": 1.2019170031034578, "grad_norm": 0.6615996360778809, "learning_rate": 8.52879064181502e-06, "loss": 1.433, "mean_token_accuracy": 0.661163717508316, "num_tokens": 1834286480.0, "step": 10941 }, { "entropy": 1.7181770503520966, "epoch": 1.2020268600148307, "grad_norm": 0.6543670892715454, "learning_rate": 8.52725109213926e-06, "loss": 1.2943, "mean_token_accuracy": 0.6620519210894903, "num_tokens": 1834394332.0, "step": 10942 }, { "entropy": 1.6917479634284973, "epoch": 1.2021367169262036, "grad_norm": 0.8514935374259949, "learning_rate": 8.525711620724885e-06, "loss": 1.6089, "mean_token_accuracy": 0.6384094009796778, "num_tokens": 1834567370.0, "step": 10943 }, { "entropy": 1.7038015524546306, "epoch": 1.2022465738375765, "grad_norm": 0.7576673626899719, "learning_rate": 8.524172227620628e-06, "loss": 1.4333, "mean_token_accuracy": 0.6669259319702784, "num_tokens": 1834731150.0, "step": 10944 }, { "entropy": 1.698100248972575, "epoch": 1.2023564307489494, "grad_norm": 0.7677764892578125, "learning_rate": 8.522632912875201e-06, "loss": 1.2893, "mean_token_accuracy": 0.6776777257521948, "num_tokens": 1834881903.0, "step": 10945 }, { "entropy": 1.7503166198730469, "epoch": 1.2024662876603225, "grad_norm": 0.8348533511161804, "learning_rate": 8.521093676537327e-06, "loss": 1.5078, "mean_token_accuracy": 0.64637457827727, "num_tokens": 1835009118.0, "step": 10946 }, { "entropy": 1.7325368821620941, "epoch": 1.2025761445716954, "grad_norm": 0.7055541276931763, "learning_rate": 8.519554518655719e-06, "loss": 1.3927, "mean_token_accuracy": 0.6497747053702673, "num_tokens": 1835147384.0, "step": 10947 }, { "entropy": 1.7662848830223083, "epoch": 1.2026860014830683, "grad_norm": 0.6840864419937134, "learning_rate": 8.518015439279092e-06, "loss": 1.3965, "mean_token_accuracy": 0.6465002000331879, "num_tokens": 1835316504.0, "step": 10948 }, { "entropy": 1.6147024432818096, "epoch": 1.2027958583944414, "grad_norm": 0.6623427867889404, "learning_rate": 8.516476438456164e-06, "loss": 1.3179, "mean_token_accuracy": 0.6583419640858968, "num_tokens": 1835510113.0, "step": 10949 }, { "entropy": 1.69747061530749, "epoch": 1.2029057153058142, "grad_norm": 0.8042090535163879, "learning_rate": 8.51493751623563e-06, "loss": 1.4357, "mean_token_accuracy": 0.6584860185782114, "num_tokens": 1835682732.0, "step": 10950 }, { "entropy": 1.7218117117881775, "epoch": 1.2030155722171871, "grad_norm": 0.613860547542572, "learning_rate": 8.513398672666209e-06, "loss": 1.3976, "mean_token_accuracy": 0.6480874568223953, "num_tokens": 1835857692.0, "step": 10951 }, { "entropy": 1.7380519111951191, "epoch": 1.20312542912856, "grad_norm": 0.7758024334907532, "learning_rate": 8.5118599077966e-06, "loss": 1.4748, "mean_token_accuracy": 0.6393528680006663, "num_tokens": 1836015807.0, "step": 10952 }, { "entropy": 1.6796276768048604, "epoch": 1.203235286039933, "grad_norm": 0.5999566912651062, "learning_rate": 8.5103212216755e-06, "loss": 1.3092, "mean_token_accuracy": 0.6672457307577133, "num_tokens": 1836153324.0, "step": 10953 }, { "entropy": 1.7185988624890645, "epoch": 1.203345142951306, "grad_norm": 0.8109869360923767, "learning_rate": 8.508782614351612e-06, "loss": 1.4122, "mean_token_accuracy": 0.6546374360720316, "num_tokens": 1836311706.0, "step": 10954 }, { "entropy": 1.6729619602362316, "epoch": 1.203454999862679, "grad_norm": 0.6391358971595764, "learning_rate": 8.507244085873636e-06, "loss": 1.4177, "mean_token_accuracy": 0.6587773958841959, "num_tokens": 1836484187.0, "step": 10955 }, { "entropy": 1.7137231330076854, "epoch": 1.2035648567740518, "grad_norm": 0.6148737072944641, "learning_rate": 8.505705636290256e-06, "loss": 1.4516, "mean_token_accuracy": 0.6422171841065089, "num_tokens": 1836722681.0, "step": 10956 }, { "entropy": 1.7291185359160106, "epoch": 1.2036747136854247, "grad_norm": 0.7713000178337097, "learning_rate": 8.504167265650171e-06, "loss": 1.523, "mean_token_accuracy": 0.629439448316892, "num_tokens": 1836930155.0, "step": 10957 }, { "entropy": 1.6895070970058441, "epoch": 1.2037845705967976, "grad_norm": 0.627571702003479, "learning_rate": 8.50262897400207e-06, "loss": 1.3553, "mean_token_accuracy": 0.6645805637041727, "num_tokens": 1837103755.0, "step": 10958 }, { "entropy": 1.6793027222156525, "epoch": 1.2038944275081707, "grad_norm": 0.6628625392913818, "learning_rate": 8.501090761394633e-06, "loss": 1.3049, "mean_token_accuracy": 0.6793260723352432, "num_tokens": 1837258622.0, "step": 10959 }, { "entropy": 1.7522801260153453, "epoch": 1.2040042844195435, "grad_norm": 0.7251481413841248, "learning_rate": 8.499552627876548e-06, "loss": 1.2863, "mean_token_accuracy": 0.6737864712874094, "num_tokens": 1837364398.0, "step": 10960 }, { "entropy": 1.6999558309714, "epoch": 1.2041141413309164, "grad_norm": 0.6430142521858215, "learning_rate": 8.498014573496495e-06, "loss": 1.368, "mean_token_accuracy": 0.6580288509527842, "num_tokens": 1837556811.0, "step": 10961 }, { "entropy": 1.7127414047718048, "epoch": 1.2042239982422895, "grad_norm": 0.7526107430458069, "learning_rate": 8.496476598303154e-06, "loss": 1.3032, "mean_token_accuracy": 0.6637988835573196, "num_tokens": 1837690847.0, "step": 10962 }, { "entropy": 1.7249715427557628, "epoch": 1.2043338551536624, "grad_norm": 0.7242283225059509, "learning_rate": 8.4949387023452e-06, "loss": 1.4256, "mean_token_accuracy": 0.6612179130315781, "num_tokens": 1837830879.0, "step": 10963 }, { "entropy": 1.6988115906715393, "epoch": 1.2044437120650353, "grad_norm": 0.6785094141960144, "learning_rate": 8.493400885671308e-06, "loss": 1.3657, "mean_token_accuracy": 0.6599143246809641, "num_tokens": 1837977639.0, "step": 10964 }, { "entropy": 1.6827894548575084, "epoch": 1.2045535689764082, "grad_norm": 0.6206066012382507, "learning_rate": 8.491863148330148e-06, "loss": 1.3765, "mean_token_accuracy": 0.6473558694124222, "num_tokens": 1838195501.0, "step": 10965 }, { "entropy": 1.7016201118628185, "epoch": 1.204663425887781, "grad_norm": 0.7276713252067566, "learning_rate": 8.49032549037039e-06, "loss": 1.4146, "mean_token_accuracy": 0.6548609832922617, "num_tokens": 1838330606.0, "step": 10966 }, { "entropy": 1.678989330927531, "epoch": 1.2047732827991542, "grad_norm": 0.8118691444396973, "learning_rate": 8.488787911840702e-06, "loss": 1.4573, "mean_token_accuracy": 0.642837405204773, "num_tokens": 1838496302.0, "step": 10967 }, { "entropy": 1.6498075425624847, "epoch": 1.204883139710527, "grad_norm": 0.7537748217582703, "learning_rate": 8.48725041278974e-06, "loss": 1.2895, "mean_token_accuracy": 0.6750341604153315, "num_tokens": 1838650065.0, "step": 10968 }, { "entropy": 1.6907791793346405, "epoch": 1.2049929966219, "grad_norm": 0.9500882029533386, "learning_rate": 8.48571299326617e-06, "loss": 1.4808, "mean_token_accuracy": 0.6541274686654409, "num_tokens": 1838802465.0, "step": 10969 }, { "entropy": 1.6472548147042592, "epoch": 1.2051028535332728, "grad_norm": 0.6497575044631958, "learning_rate": 8.484175653318656e-06, "loss": 1.2956, "mean_token_accuracy": 0.6712877601385117, "num_tokens": 1838940896.0, "step": 10970 }, { "entropy": 1.7175530691941578, "epoch": 1.2052127104446457, "grad_norm": 0.7882832288742065, "learning_rate": 8.482638392995845e-06, "loss": 1.3548, "mean_token_accuracy": 0.6520499388376871, "num_tokens": 1839095122.0, "step": 10971 }, { "entropy": 1.6997943917910259, "epoch": 1.2053225673560188, "grad_norm": 0.65944504737854, "learning_rate": 8.481101212346395e-06, "loss": 1.3365, "mean_token_accuracy": 0.6599059452613195, "num_tokens": 1839282288.0, "step": 10972 }, { "entropy": 1.6870457927385967, "epoch": 1.2054324242673917, "grad_norm": 0.6719939708709717, "learning_rate": 8.479564111418959e-06, "loss": 1.426, "mean_token_accuracy": 0.6513770818710327, "num_tokens": 1839448385.0, "step": 10973 }, { "entropy": 1.7068449358145397, "epoch": 1.2055422811787646, "grad_norm": 0.6699382066726685, "learning_rate": 8.47802709026218e-06, "loss": 1.4808, "mean_token_accuracy": 0.6471947580575943, "num_tokens": 1839597423.0, "step": 10974 }, { "entropy": 1.712907761335373, "epoch": 1.2056521380901377, "grad_norm": 0.6794223189353943, "learning_rate": 8.476490148924705e-06, "loss": 1.3044, "mean_token_accuracy": 0.6599731842676798, "num_tokens": 1839736794.0, "step": 10975 }, { "entropy": 1.74131045738856, "epoch": 1.2057619950015106, "grad_norm": 0.8693950772285461, "learning_rate": 8.474953287455185e-06, "loss": 1.407, "mean_token_accuracy": 0.6535183389981588, "num_tokens": 1839864797.0, "step": 10976 }, { "entropy": 1.738874187072118, "epoch": 1.2058718519128835, "grad_norm": 0.8112277984619141, "learning_rate": 8.473416505902254e-06, "loss": 1.5832, "mean_token_accuracy": 0.6483089849352837, "num_tokens": 1840003600.0, "step": 10977 }, { "entropy": 1.7444894413153331, "epoch": 1.2059817088242564, "grad_norm": 0.6465990543365479, "learning_rate": 8.471879804314552e-06, "loss": 1.5445, "mean_token_accuracy": 0.6178958763678869, "num_tokens": 1840308433.0, "step": 10978 }, { "entropy": 1.728948066631953, "epoch": 1.2060915657356293, "grad_norm": 0.6564865112304688, "learning_rate": 8.470343182740716e-06, "loss": 1.4047, "mean_token_accuracy": 0.6490548650423685, "num_tokens": 1840478644.0, "step": 10979 }, { "entropy": 1.7226575712362926, "epoch": 1.2062014226470024, "grad_norm": 0.7290470600128174, "learning_rate": 8.468806641229376e-06, "loss": 1.2962, "mean_token_accuracy": 0.6657624244689941, "num_tokens": 1840621628.0, "step": 10980 }, { "entropy": 1.7463841636975606, "epoch": 1.2063112795583752, "grad_norm": 0.6011817455291748, "learning_rate": 8.467270179829166e-06, "loss": 1.4322, "mean_token_accuracy": 0.6443581183751425, "num_tokens": 1840798045.0, "step": 10981 }, { "entropy": 1.773529440164566, "epoch": 1.2064211364697481, "grad_norm": 0.6532623767852783, "learning_rate": 8.465733798588715e-06, "loss": 1.4649, "mean_token_accuracy": 0.631449893116951, "num_tokens": 1840950614.0, "step": 10982 }, { "entropy": 1.6585228244463603, "epoch": 1.206530993381121, "grad_norm": 0.6496007442474365, "learning_rate": 8.464197497556646e-06, "loss": 1.4284, "mean_token_accuracy": 0.6490184764067332, "num_tokens": 1841136948.0, "step": 10983 }, { "entropy": 1.7097909947236378, "epoch": 1.206640850292494, "grad_norm": 0.6547970175743103, "learning_rate": 8.462661276781583e-06, "loss": 1.4973, "mean_token_accuracy": 0.6476222276687622, "num_tokens": 1841300598.0, "step": 10984 }, { "entropy": 1.7845760981241863, "epoch": 1.206750707203867, "grad_norm": 0.8349284529685974, "learning_rate": 8.46112513631215e-06, "loss": 1.5017, "mean_token_accuracy": 0.6529583881298701, "num_tokens": 1841457868.0, "step": 10985 }, { "entropy": 1.7160189151763916, "epoch": 1.20686056411524, "grad_norm": 0.746083676815033, "learning_rate": 8.459589076196957e-06, "loss": 1.2057, "mean_token_accuracy": 0.6863613526026408, "num_tokens": 1841583833.0, "step": 10986 }, { "entropy": 1.6878787875175476, "epoch": 1.2069704210266128, "grad_norm": 0.7189993858337402, "learning_rate": 8.458053096484628e-06, "loss": 1.3567, "mean_token_accuracy": 0.6640171358982722, "num_tokens": 1841780454.0, "step": 10987 }, { "entropy": 1.7334311107794445, "epoch": 1.207080277937986, "grad_norm": 0.652119517326355, "learning_rate": 8.456517197223774e-06, "loss": 1.5206, "mean_token_accuracy": 0.6425420294205347, "num_tokens": 1841985689.0, "step": 10988 }, { "entropy": 1.6652919054031372, "epoch": 1.2071901348493588, "grad_norm": 0.6829299330711365, "learning_rate": 8.454981378463006e-06, "loss": 1.4756, "mean_token_accuracy": 0.6531298210223516, "num_tokens": 1842141908.0, "step": 10989 }, { "entropy": 1.7330115834871929, "epoch": 1.2072999917607317, "grad_norm": 0.7318177819252014, "learning_rate": 8.453445640250928e-06, "loss": 1.2572, "mean_token_accuracy": 0.6711813112099966, "num_tokens": 1842253992.0, "step": 10990 }, { "entropy": 1.7475427587827046, "epoch": 1.2074098486721045, "grad_norm": 0.6801440119743347, "learning_rate": 8.451909982636148e-06, "loss": 1.4171, "mean_token_accuracy": 0.6432561924060186, "num_tokens": 1842417979.0, "step": 10991 }, { "entropy": 1.7295256853103638, "epoch": 1.2075197055834774, "grad_norm": 0.6919019222259521, "learning_rate": 8.450374405667267e-06, "loss": 1.4833, "mean_token_accuracy": 0.643854022026062, "num_tokens": 1842583946.0, "step": 10992 }, { "entropy": 1.735103686650594, "epoch": 1.2076295624948505, "grad_norm": 0.7653998136520386, "learning_rate": 8.448838909392889e-06, "loss": 1.3802, "mean_token_accuracy": 0.655050535996755, "num_tokens": 1842739381.0, "step": 10993 }, { "entropy": 1.7508669197559357, "epoch": 1.2077394194062234, "grad_norm": 0.8188372254371643, "learning_rate": 8.447303493861612e-06, "loss": 1.5111, "mean_token_accuracy": 0.6337236364682516, "num_tokens": 1842933440.0, "step": 10994 }, { "entropy": 1.6848741968472798, "epoch": 1.2078492763175963, "grad_norm": 0.5986067056655884, "learning_rate": 8.445768159122028e-06, "loss": 1.3676, "mean_token_accuracy": 0.6652501175800959, "num_tokens": 1843132075.0, "step": 10995 }, { "entropy": 1.7106738686561584, "epoch": 1.2079591332289692, "grad_norm": 0.6997463703155518, "learning_rate": 8.44423290522273e-06, "loss": 1.4677, "mean_token_accuracy": 0.6429871618747711, "num_tokens": 1843296882.0, "step": 10996 }, { "entropy": 1.7586935957272847, "epoch": 1.208068990140342, "grad_norm": 0.7393748164176941, "learning_rate": 8.44269773221231e-06, "loss": 1.447, "mean_token_accuracy": 0.6556659440199534, "num_tokens": 1843432063.0, "step": 10997 }, { "entropy": 1.685716986656189, "epoch": 1.2081788470517152, "grad_norm": 0.6346744894981384, "learning_rate": 8.441162640139354e-06, "loss": 1.4373, "mean_token_accuracy": 0.6571964671214422, "num_tokens": 1843608301.0, "step": 10998 }, { "entropy": 1.7133116920789082, "epoch": 1.208288703963088, "grad_norm": 0.7435621023178101, "learning_rate": 8.439627629052446e-06, "loss": 1.4443, "mean_token_accuracy": 0.6661744117736816, "num_tokens": 1843798714.0, "step": 10999 }, { "entropy": 1.619499186674754, "epoch": 1.208398560874461, "grad_norm": 0.6935999989509583, "learning_rate": 8.438092699000172e-06, "loss": 1.2591, "mean_token_accuracy": 0.675690621137619, "num_tokens": 1843939118.0, "step": 11000 }, { "entropy": 1.757647732893626, "epoch": 1.208508417785834, "grad_norm": 0.6426697373390198, "learning_rate": 8.436557850031109e-06, "loss": 1.4623, "mean_token_accuracy": 0.6505621820688248, "num_tokens": 1844111185.0, "step": 11001 }, { "entropy": 1.7143224676450093, "epoch": 1.208618274697207, "grad_norm": 0.7159050703048706, "learning_rate": 8.435023082193834e-06, "loss": 1.4011, "mean_token_accuracy": 0.669948066274325, "num_tokens": 1844278495.0, "step": 11002 }, { "entropy": 1.6677929162979126, "epoch": 1.2087281316085798, "grad_norm": 0.8504517674446106, "learning_rate": 8.433488395536924e-06, "loss": 1.4652, "mean_token_accuracy": 0.6526702543099722, "num_tokens": 1844426506.0, "step": 11003 }, { "entropy": 1.6635715464750926, "epoch": 1.2088379885199527, "grad_norm": 0.6439080834388733, "learning_rate": 8.431953790108946e-06, "loss": 1.3158, "mean_token_accuracy": 0.6697449535131454, "num_tokens": 1844581041.0, "step": 11004 }, { "entropy": 1.717222531636556, "epoch": 1.2089478454313256, "grad_norm": 0.5454255938529968, "learning_rate": 8.430419265958481e-06, "loss": 1.4645, "mean_token_accuracy": 0.6585534413655599, "num_tokens": 1844751255.0, "step": 11005 }, { "entropy": 1.7076788544654846, "epoch": 1.2090577023426987, "grad_norm": 0.7401055097579956, "learning_rate": 8.42888482313408e-06, "loss": 1.3668, "mean_token_accuracy": 0.6625747780005137, "num_tokens": 1844902026.0, "step": 11006 }, { "entropy": 1.707229753335317, "epoch": 1.2091675592540716, "grad_norm": 0.7380411624908447, "learning_rate": 8.42735046168432e-06, "loss": 1.2464, "mean_token_accuracy": 0.6756529162327448, "num_tokens": 1845022200.0, "step": 11007 }, { "entropy": 1.7667948305606842, "epoch": 1.2092774161654445, "grad_norm": 0.652038037776947, "learning_rate": 8.42581618165776e-06, "loss": 1.3872, "mean_token_accuracy": 0.6549219787120819, "num_tokens": 1845206105.0, "step": 11008 }, { "entropy": 1.6876067121823628, "epoch": 1.2093872730768174, "grad_norm": 0.7061187028884888, "learning_rate": 8.424281983102956e-06, "loss": 1.2664, "mean_token_accuracy": 0.6709717114766439, "num_tokens": 1845316113.0, "step": 11009 }, { "entropy": 1.7546610136826832, "epoch": 1.2094971299881905, "grad_norm": 0.7472836971282959, "learning_rate": 8.422747866068464e-06, "loss": 1.4804, "mean_token_accuracy": 0.6357733458280563, "num_tokens": 1845544449.0, "step": 11010 }, { "entropy": 1.7322336435317993, "epoch": 1.2096069868995634, "grad_norm": 0.6114717125892639, "learning_rate": 8.421213830602846e-06, "loss": 1.463, "mean_token_accuracy": 0.6512684375047684, "num_tokens": 1845756608.0, "step": 11011 }, { "entropy": 1.7614449659983318, "epoch": 1.2097168438109362, "grad_norm": 0.6005818843841553, "learning_rate": 8.419679876754643e-06, "loss": 1.5256, "mean_token_accuracy": 0.6367160379886627, "num_tokens": 1845970779.0, "step": 11012 }, { "entropy": 1.72640464703242, "epoch": 1.2098267007223091, "grad_norm": 0.7229748964309692, "learning_rate": 8.418146004572412e-06, "loss": 1.3674, "mean_token_accuracy": 0.6622246205806732, "num_tokens": 1846110227.0, "step": 11013 }, { "entropy": 1.6716107626756032, "epoch": 1.2099365576336822, "grad_norm": 0.6086723804473877, "learning_rate": 8.416612214104695e-06, "loss": 1.3978, "mean_token_accuracy": 0.6556883007287979, "num_tokens": 1846294061.0, "step": 11014 }, { "entropy": 1.6809994280338287, "epoch": 1.2100464145450551, "grad_norm": 0.7691161632537842, "learning_rate": 8.415078505400041e-06, "loss": 1.4725, "mean_token_accuracy": 0.6410997360944748, "num_tokens": 1846452635.0, "step": 11015 }, { "entropy": 1.6480069359143574, "epoch": 1.210156271456428, "grad_norm": 0.6271137595176697, "learning_rate": 8.413544878506983e-06, "loss": 1.5155, "mean_token_accuracy": 0.64403468867143, "num_tokens": 1846614016.0, "step": 11016 }, { "entropy": 1.6463837722937267, "epoch": 1.210266128367801, "grad_norm": 0.9350231885910034, "learning_rate": 8.412011333474068e-06, "loss": 1.4401, "mean_token_accuracy": 0.6404287169377009, "num_tokens": 1846826350.0, "step": 11017 }, { "entropy": 1.732763757308324, "epoch": 1.2103759852791738, "grad_norm": 0.7772718071937561, "learning_rate": 8.410477870349825e-06, "loss": 1.4685, "mean_token_accuracy": 0.6515309810638428, "num_tokens": 1846958581.0, "step": 11018 }, { "entropy": 1.721984734137853, "epoch": 1.210485842190547, "grad_norm": 0.6624974012374878, "learning_rate": 8.408944489182791e-06, "loss": 1.3843, "mean_token_accuracy": 0.6445004592339197, "num_tokens": 1847163105.0, "step": 11019 }, { "entropy": 1.690103272596995, "epoch": 1.2105956991019198, "grad_norm": 0.7604218125343323, "learning_rate": 8.4074111900215e-06, "loss": 1.448, "mean_token_accuracy": 0.6505727221568426, "num_tokens": 1847318720.0, "step": 11020 }, { "entropy": 1.673486590385437, "epoch": 1.2107055560132927, "grad_norm": 0.7544184923171997, "learning_rate": 8.405877972914472e-06, "loss": 1.5931, "mean_token_accuracy": 0.6497178276379904, "num_tokens": 1847464207.0, "step": 11021 }, { "entropy": 1.739910493294398, "epoch": 1.2108154129246655, "grad_norm": 0.82258141040802, "learning_rate": 8.404344837910237e-06, "loss": 1.5897, "mean_token_accuracy": 0.6385711828867594, "num_tokens": 1847632525.0, "step": 11022 }, { "entropy": 1.7209635078907013, "epoch": 1.2109252698360387, "grad_norm": 0.6614201068878174, "learning_rate": 8.402811785057326e-06, "loss": 1.399, "mean_token_accuracy": 0.6512503723303477, "num_tokens": 1847774532.0, "step": 11023 }, { "entropy": 1.7443317274252574, "epoch": 1.2110351267474115, "grad_norm": 0.6455691456794739, "learning_rate": 8.40127881440424e-06, "loss": 1.3867, "mean_token_accuracy": 0.6450707316398621, "num_tokens": 1847906774.0, "step": 11024 }, { "entropy": 1.6787743270397186, "epoch": 1.2111449836587844, "grad_norm": 0.7437204122543335, "learning_rate": 8.399745925999517e-06, "loss": 1.3348, "mean_token_accuracy": 0.6639335205157598, "num_tokens": 1848048858.0, "step": 11025 }, { "entropy": 1.7229706346988678, "epoch": 1.2112548405701573, "grad_norm": 0.6177759170532227, "learning_rate": 8.39821311989166e-06, "loss": 1.4536, "mean_token_accuracy": 0.6526401787996292, "num_tokens": 1848219021.0, "step": 11026 }, { "entropy": 1.7722203433513641, "epoch": 1.2113646974815304, "grad_norm": 0.6613593697547913, "learning_rate": 8.396680396129189e-06, "loss": 1.5069, "mean_token_accuracy": 0.6412953784068426, "num_tokens": 1848416777.0, "step": 11027 }, { "entropy": 1.7179987331231434, "epoch": 1.2114745543929033, "grad_norm": 0.7174702882766724, "learning_rate": 8.395147754760604e-06, "loss": 1.417, "mean_token_accuracy": 0.6555820604165395, "num_tokens": 1848589063.0, "step": 11028 }, { "entropy": 1.7602262993653615, "epoch": 1.2115844113042762, "grad_norm": 0.7947672009468079, "learning_rate": 8.393615195834425e-06, "loss": 1.4152, "mean_token_accuracy": 0.6568863987922668, "num_tokens": 1848739349.0, "step": 11029 }, { "entropy": 1.7349158922831218, "epoch": 1.211694268215649, "grad_norm": 0.6535570621490479, "learning_rate": 8.392082719399146e-06, "loss": 1.411, "mean_token_accuracy": 0.6554250419139862, "num_tokens": 1848886234.0, "step": 11030 }, { "entropy": 1.7216412425041199, "epoch": 1.211804125127022, "grad_norm": 0.6314913034439087, "learning_rate": 8.390550325503276e-06, "loss": 1.3573, "mean_token_accuracy": 0.644097218910853, "num_tokens": 1849037728.0, "step": 11031 }, { "entropy": 1.6654905676841736, "epoch": 1.211913982038395, "grad_norm": 0.5710697770118713, "learning_rate": 8.389018014195316e-06, "loss": 1.3456, "mean_token_accuracy": 0.6632640808820724, "num_tokens": 1849207967.0, "step": 11032 }, { "entropy": 1.7055143018563588, "epoch": 1.212023838949768, "grad_norm": 0.631976842880249, "learning_rate": 8.387485785523755e-06, "loss": 1.4246, "mean_token_accuracy": 0.6355055769284567, "num_tokens": 1849411860.0, "step": 11033 }, { "entropy": 1.719924658536911, "epoch": 1.2121336958611408, "grad_norm": 0.6693115830421448, "learning_rate": 8.38595363953709e-06, "loss": 1.4092, "mean_token_accuracy": 0.6476466059684753, "num_tokens": 1849617211.0, "step": 11034 }, { "entropy": 1.7284215490023296, "epoch": 1.2122435527725137, "grad_norm": 0.7452521324157715, "learning_rate": 8.384421576283819e-06, "loss": 1.4597, "mean_token_accuracy": 0.6476357032855352, "num_tokens": 1849792205.0, "step": 11035 }, { "entropy": 1.6705568730831146, "epoch": 1.2123534096838868, "grad_norm": 0.5901700854301453, "learning_rate": 8.382889595812422e-06, "loss": 1.3592, "mean_token_accuracy": 0.6637533108393351, "num_tokens": 1849966783.0, "step": 11036 }, { "entropy": 1.679235577583313, "epoch": 1.2124632665952597, "grad_norm": 0.7355685830116272, "learning_rate": 8.381357698171392e-06, "loss": 1.4727, "mean_token_accuracy": 0.6543498982985815, "num_tokens": 1850129797.0, "step": 11037 }, { "entropy": 1.6582284073034923, "epoch": 1.2125731235066326, "grad_norm": 0.7128838300704956, "learning_rate": 8.379825883409213e-06, "loss": 1.3672, "mean_token_accuracy": 0.6587399691343307, "num_tokens": 1850314612.0, "step": 11038 }, { "entropy": 1.7182820936044056, "epoch": 1.2126829804180055, "grad_norm": 0.9296267032623291, "learning_rate": 8.378294151574362e-06, "loss": 1.4561, "mean_token_accuracy": 0.6555204093456268, "num_tokens": 1850481038.0, "step": 11039 }, { "entropy": 1.6882583896319072, "epoch": 1.2127928373293786, "grad_norm": 0.7641075253486633, "learning_rate": 8.376762502715318e-06, "loss": 1.4607, "mean_token_accuracy": 0.6370103309551874, "num_tokens": 1850666045.0, "step": 11040 }, { "entropy": 1.7282833755016327, "epoch": 1.2129026942407515, "grad_norm": 0.6613611578941345, "learning_rate": 8.375230936880562e-06, "loss": 1.3988, "mean_token_accuracy": 0.6569731831550598, "num_tokens": 1850870551.0, "step": 11041 }, { "entropy": 1.7475760380427043, "epoch": 1.2130125511521244, "grad_norm": 0.7780677080154419, "learning_rate": 8.373699454118562e-06, "loss": 1.2312, "mean_token_accuracy": 0.6765096088250478, "num_tokens": 1850988764.0, "step": 11042 }, { "entropy": 1.7152721087137859, "epoch": 1.2131224080634972, "grad_norm": 0.6480224132537842, "learning_rate": 8.372168054477791e-06, "loss": 1.4921, "mean_token_accuracy": 0.6434395660956701, "num_tokens": 1851178116.0, "step": 11043 }, { "entropy": 1.6879489123821259, "epoch": 1.2132322649748701, "grad_norm": 0.6920694708824158, "learning_rate": 8.370636738006721e-06, "loss": 1.4977, "mean_token_accuracy": 0.6338366170724233, "num_tokens": 1851382859.0, "step": 11044 }, { "entropy": 1.6802996695041656, "epoch": 1.2133421218862432, "grad_norm": 0.6248618364334106, "learning_rate": 8.369105504753809e-06, "loss": 1.3379, "mean_token_accuracy": 0.6674815913041433, "num_tokens": 1851589206.0, "step": 11045 }, { "entropy": 1.7379266719023387, "epoch": 1.2134519787976161, "grad_norm": 0.7720683813095093, "learning_rate": 8.367574354767522e-06, "loss": 1.3548, "mean_token_accuracy": 0.6515401800473531, "num_tokens": 1851737929.0, "step": 11046 }, { "entropy": 1.7030988434950511, "epoch": 1.213561835708989, "grad_norm": 0.7740477919578552, "learning_rate": 8.366043288096324e-06, "loss": 1.4222, "mean_token_accuracy": 0.6508887757857641, "num_tokens": 1851939859.0, "step": 11047 }, { "entropy": 1.6766453782717388, "epoch": 1.213671692620362, "grad_norm": 0.6776142120361328, "learning_rate": 8.364512304788664e-06, "loss": 1.4908, "mean_token_accuracy": 0.6671174516280493, "num_tokens": 1852092528.0, "step": 11048 }, { "entropy": 1.6063755849997203, "epoch": 1.213781549531735, "grad_norm": 0.5421578884124756, "learning_rate": 8.362981404893005e-06, "loss": 1.5063, "mean_token_accuracy": 0.6495156238476435, "num_tokens": 1852332284.0, "step": 11049 }, { "entropy": 1.7231159309546153, "epoch": 1.213891406443108, "grad_norm": 0.8034752011299133, "learning_rate": 8.361450588457798e-06, "loss": 1.4637, "mean_token_accuracy": 0.6396220078070959, "num_tokens": 1852504941.0, "step": 11050 }, { "entropy": 1.759638677040736, "epoch": 1.2140012633544808, "grad_norm": 0.6964645981788635, "learning_rate": 8.35991985553149e-06, "loss": 1.3326, "mean_token_accuracy": 0.6753019044796625, "num_tokens": 1852671091.0, "step": 11051 }, { "entropy": 1.769709587097168, "epoch": 1.2141111202658537, "grad_norm": 0.6132997870445251, "learning_rate": 8.358389206162525e-06, "loss": 1.5146, "mean_token_accuracy": 0.6466521521409353, "num_tokens": 1852894196.0, "step": 11052 }, { "entropy": 1.7432759602864583, "epoch": 1.2142209771772268, "grad_norm": 0.7836261987686157, "learning_rate": 8.356858640399354e-06, "loss": 1.2905, "mean_token_accuracy": 0.6691566308339437, "num_tokens": 1853022071.0, "step": 11053 }, { "entropy": 1.6997772653897603, "epoch": 1.2143308340885997, "grad_norm": 0.6840148568153381, "learning_rate": 8.355328158290415e-06, "loss": 1.3656, "mean_token_accuracy": 0.6704634875059128, "num_tokens": 1853143220.0, "step": 11054 }, { "entropy": 1.725580135981242, "epoch": 1.2144406909999725, "grad_norm": 0.7722833752632141, "learning_rate": 8.35379775988415e-06, "loss": 1.571, "mean_token_accuracy": 0.6398867269357046, "num_tokens": 1853345019.0, "step": 11055 }, { "entropy": 1.6314020156860352, "epoch": 1.2145505479113454, "grad_norm": 0.892280101776123, "learning_rate": 8.352267445228994e-06, "loss": 1.4697, "mean_token_accuracy": 0.6704972585042318, "num_tokens": 1853539231.0, "step": 11056 }, { "entropy": 1.6972508529822032, "epoch": 1.2146604048227183, "grad_norm": 0.615267276763916, "learning_rate": 8.350737214373379e-06, "loss": 1.4657, "mean_token_accuracy": 0.6365112711985906, "num_tokens": 1853703622.0, "step": 11057 }, { "entropy": 1.6537209053834279, "epoch": 1.2147702617340914, "grad_norm": 0.6618078947067261, "learning_rate": 8.349207067365737e-06, "loss": 1.271, "mean_token_accuracy": 0.6827361087004343, "num_tokens": 1853882138.0, "step": 11058 }, { "entropy": 1.6730584800243378, "epoch": 1.2148801186454643, "grad_norm": 0.6180942058563232, "learning_rate": 8.347677004254498e-06, "loss": 1.332, "mean_token_accuracy": 0.6662278970082601, "num_tokens": 1854028245.0, "step": 11059 }, { "entropy": 1.6949761112531025, "epoch": 1.2149899755568372, "grad_norm": 0.6167245507240295, "learning_rate": 8.346147025088086e-06, "loss": 1.3307, "mean_token_accuracy": 0.6576797862847646, "num_tokens": 1854185316.0, "step": 11060 }, { "entropy": 1.7193231880664825, "epoch": 1.21509983246821, "grad_norm": 0.6622843146324158, "learning_rate": 8.344617129914923e-06, "loss": 1.528, "mean_token_accuracy": 0.63414998849233, "num_tokens": 1854426177.0, "step": 11061 }, { "entropy": 1.6867660681406658, "epoch": 1.2152096893795832, "grad_norm": 0.6055188775062561, "learning_rate": 8.343087318783434e-06, "loss": 1.4881, "mean_token_accuracy": 0.6526160339514414, "num_tokens": 1854610116.0, "step": 11062 }, { "entropy": 1.7061599691708882, "epoch": 1.215319546290956, "grad_norm": 0.680685818195343, "learning_rate": 8.34155759174203e-06, "loss": 1.4192, "mean_token_accuracy": 0.6544150362412134, "num_tokens": 1854755139.0, "step": 11063 }, { "entropy": 1.6967225869496663, "epoch": 1.215429403202329, "grad_norm": 0.6835984587669373, "learning_rate": 8.340027948839135e-06, "loss": 1.3267, "mean_token_accuracy": 0.6695795605580012, "num_tokens": 1854901981.0, "step": 11064 }, { "entropy": 1.7517095704873402, "epoch": 1.2155392601137018, "grad_norm": 0.8013256192207336, "learning_rate": 8.338498390123158e-06, "loss": 1.4562, "mean_token_accuracy": 0.6485675225655237, "num_tokens": 1855044987.0, "step": 11065 }, { "entropy": 1.7563750843207042, "epoch": 1.215649117025075, "grad_norm": 0.7665896415710449, "learning_rate": 8.3369689156425e-06, "loss": 1.3885, "mean_token_accuracy": 0.6616611480712891, "num_tokens": 1855188518.0, "step": 11066 }, { "entropy": 1.7391284902890523, "epoch": 1.2157589739364478, "grad_norm": 0.8238292336463928, "learning_rate": 8.335439525445586e-06, "loss": 1.3099, "mean_token_accuracy": 0.6730262041091919, "num_tokens": 1855310644.0, "step": 11067 }, { "entropy": 1.6750684281190236, "epoch": 1.2158688308478207, "grad_norm": 0.5978335738182068, "learning_rate": 8.333910219580804e-06, "loss": 1.4055, "mean_token_accuracy": 0.6556108246246973, "num_tokens": 1855483085.0, "step": 11068 }, { "entropy": 1.6777072350184123, "epoch": 1.2159786877591936, "grad_norm": 0.7173717617988586, "learning_rate": 8.332380998096561e-06, "loss": 1.4097, "mean_token_accuracy": 0.6578392386436462, "num_tokens": 1855663183.0, "step": 11069 }, { "entropy": 1.7260019779205322, "epoch": 1.2160885446705665, "grad_norm": 0.7578794956207275, "learning_rate": 8.330851861041262e-06, "loss": 1.339, "mean_token_accuracy": 0.6605818818012873, "num_tokens": 1855792392.0, "step": 11070 }, { "entropy": 1.6965550482273102, "epoch": 1.2161984015819396, "grad_norm": 0.6967483162879944, "learning_rate": 8.329322808463294e-06, "loss": 1.4111, "mean_token_accuracy": 0.660852442185084, "num_tokens": 1855970158.0, "step": 11071 }, { "entropy": 1.7621172269185383, "epoch": 1.2163082584933125, "grad_norm": 0.6754755973815918, "learning_rate": 8.327793840411056e-06, "loss": 1.3595, "mean_token_accuracy": 0.6517157753308614, "num_tokens": 1856107207.0, "step": 11072 }, { "entropy": 1.727922797203064, "epoch": 1.2164181154046854, "grad_norm": 0.7006334662437439, "learning_rate": 8.326264956932946e-06, "loss": 1.4191, "mean_token_accuracy": 0.6418725997209549, "num_tokens": 1856296668.0, "step": 11073 }, { "entropy": 1.6740870575110118, "epoch": 1.2165279723160582, "grad_norm": 0.5527358055114746, "learning_rate": 8.324736158077338e-06, "loss": 1.1741, "mean_token_accuracy": 0.6742709130048752, "num_tokens": 1856484822.0, "step": 11074 }, { "entropy": 1.6873212854067485, "epoch": 1.2166378292274314, "grad_norm": 0.6192285418510437, "learning_rate": 8.323207443892626e-06, "loss": 1.3717, "mean_token_accuracy": 0.6412427127361298, "num_tokens": 1856734436.0, "step": 11075 }, { "entropy": 1.7283975680669148, "epoch": 1.2167476861388042, "grad_norm": 0.6958233714103699, "learning_rate": 8.321678814427195e-06, "loss": 1.5502, "mean_token_accuracy": 0.6426873902479807, "num_tokens": 1856955797.0, "step": 11076 }, { "entropy": 1.7120100259780884, "epoch": 1.2168575430501771, "grad_norm": 0.7286651134490967, "learning_rate": 8.320150269729421e-06, "loss": 1.436, "mean_token_accuracy": 0.6634295533100764, "num_tokens": 1857107820.0, "step": 11077 }, { "entropy": 1.6884620587031047, "epoch": 1.21696739996155, "grad_norm": 0.8027754426002502, "learning_rate": 8.318621809847682e-06, "loss": 1.4571, "mean_token_accuracy": 0.6525517205397288, "num_tokens": 1857282314.0, "step": 11078 }, { "entropy": 1.7306037942568462, "epoch": 1.2170772568729231, "grad_norm": 0.755138635635376, "learning_rate": 8.317093434830358e-06, "loss": 1.1819, "mean_token_accuracy": 0.6871931801239649, "num_tokens": 1857387879.0, "step": 11079 }, { "entropy": 1.693780501683553, "epoch": 1.217187113784296, "grad_norm": 0.7477782368659973, "learning_rate": 8.315565144725814e-06, "loss": 1.4305, "mean_token_accuracy": 0.667605901757876, "num_tokens": 1857536614.0, "step": 11080 }, { "entropy": 1.7177151342233021, "epoch": 1.217296970695669, "grad_norm": 0.6408316493034363, "learning_rate": 8.314036939582426e-06, "loss": 1.4695, "mean_token_accuracy": 0.6495102594296137, "num_tokens": 1857700887.0, "step": 11081 }, { "entropy": 1.6869684358437855, "epoch": 1.2174068276070418, "grad_norm": 0.745124340057373, "learning_rate": 8.31250881944856e-06, "loss": 1.3735, "mean_token_accuracy": 0.6654743601878484, "num_tokens": 1857869919.0, "step": 11082 }, { "entropy": 1.690779209136963, "epoch": 1.2175166845184147, "grad_norm": 0.6019642949104309, "learning_rate": 8.310980784372576e-06, "loss": 1.3871, "mean_token_accuracy": 0.6539642065763474, "num_tokens": 1858056352.0, "step": 11083 }, { "entropy": 1.6384514768918355, "epoch": 1.2176265414297878, "grad_norm": 0.66316157579422, "learning_rate": 8.309452834402837e-06, "loss": 1.34, "mean_token_accuracy": 0.6631773859262466, "num_tokens": 1858232023.0, "step": 11084 }, { "entropy": 1.7274446388085682, "epoch": 1.2177363983411607, "grad_norm": 0.7292064428329468, "learning_rate": 8.307924969587708e-06, "loss": 1.5255, "mean_token_accuracy": 0.6418920457363129, "num_tokens": 1858412774.0, "step": 11085 }, { "entropy": 1.6863858600457509, "epoch": 1.2178462552525335, "grad_norm": 0.7607459425926208, "learning_rate": 8.306397189975537e-06, "loss": 1.3769, "mean_token_accuracy": 0.6505500276883444, "num_tokens": 1858530907.0, "step": 11086 }, { "entropy": 1.713003009557724, "epoch": 1.2179561121639064, "grad_norm": 0.6406744718551636, "learning_rate": 8.30486949561468e-06, "loss": 1.4627, "mean_token_accuracy": 0.6432670553525289, "num_tokens": 1858701857.0, "step": 11087 }, { "entropy": 1.7706784307956696, "epoch": 1.2180659690752795, "grad_norm": 0.6789109706878662, "learning_rate": 8.303341886553493e-06, "loss": 1.3834, "mean_token_accuracy": 0.6596761445204417, "num_tokens": 1858891212.0, "step": 11088 }, { "entropy": 1.6831317842006683, "epoch": 1.2181758259866524, "grad_norm": 0.6298303604125977, "learning_rate": 8.30181436284032e-06, "loss": 1.312, "mean_token_accuracy": 0.6730460574229559, "num_tokens": 1859017132.0, "step": 11089 }, { "entropy": 1.6755876143773396, "epoch": 1.2182856828980253, "grad_norm": 1.0117133855819702, "learning_rate": 8.300286924523505e-06, "loss": 1.4366, "mean_token_accuracy": 0.6528118550777435, "num_tokens": 1859168864.0, "step": 11090 }, { "entropy": 1.632968008518219, "epoch": 1.2183955398093982, "grad_norm": 0.6477782130241394, "learning_rate": 8.298759571651393e-06, "loss": 1.4088, "mean_token_accuracy": 0.6646546920140585, "num_tokens": 1859317350.0, "step": 11091 }, { "entropy": 1.697382648785909, "epoch": 1.2185053967207713, "grad_norm": 0.7350544333457947, "learning_rate": 8.297232304272322e-06, "loss": 1.4274, "mean_token_accuracy": 0.6546217650175095, "num_tokens": 1859504084.0, "step": 11092 }, { "entropy": 1.6902291178703308, "epoch": 1.2186152536321442, "grad_norm": 0.7430658340454102, "learning_rate": 8.295705122434633e-06, "loss": 1.3245, "mean_token_accuracy": 0.6620542804400126, "num_tokens": 1859649852.0, "step": 11093 }, { "entropy": 1.7104488511880238, "epoch": 1.218725110543517, "grad_norm": 0.7023297548294067, "learning_rate": 8.294178026186656e-06, "loss": 1.3796, "mean_token_accuracy": 0.6658419122298559, "num_tokens": 1859776385.0, "step": 11094 }, { "entropy": 1.6531602640946705, "epoch": 1.21883496745489, "grad_norm": 0.6301870346069336, "learning_rate": 8.292651015576725e-06, "loss": 1.3238, "mean_token_accuracy": 0.6639458288749059, "num_tokens": 1859912333.0, "step": 11095 }, { "entropy": 1.7121768792470295, "epoch": 1.2189448243662628, "grad_norm": 0.7700769901275635, "learning_rate": 8.29112409065317e-06, "loss": 1.435, "mean_token_accuracy": 0.6486354172229767, "num_tokens": 1860057917.0, "step": 11096 }, { "entropy": 1.6702754994233449, "epoch": 1.219054681277636, "grad_norm": 0.834185004234314, "learning_rate": 8.289597251464319e-06, "loss": 1.4033, "mean_token_accuracy": 0.6532324800888697, "num_tokens": 1860209091.0, "step": 11097 }, { "entropy": 1.6551378965377808, "epoch": 1.2191645381890088, "grad_norm": 0.7919728755950928, "learning_rate": 8.288070498058489e-06, "loss": 1.5648, "mean_token_accuracy": 0.6503797471523285, "num_tokens": 1860389834.0, "step": 11098 }, { "entropy": 1.687700519959132, "epoch": 1.2192743951003817, "grad_norm": 0.699600100517273, "learning_rate": 8.28654383048401e-06, "loss": 1.2284, "mean_token_accuracy": 0.6715284287929535, "num_tokens": 1860507119.0, "step": 11099 }, { "entropy": 1.6870764593283336, "epoch": 1.2193842520117546, "grad_norm": 0.6864370107650757, "learning_rate": 8.285017248789195e-06, "loss": 1.3806, "mean_token_accuracy": 0.6525691151618958, "num_tokens": 1860656756.0, "step": 11100 }, { "entropy": 1.795731355746587, "epoch": 1.2194941089231277, "grad_norm": 0.8872252106666565, "learning_rate": 8.28349075302236e-06, "loss": 1.5156, "mean_token_accuracy": 0.6348374287287394, "num_tokens": 1860800299.0, "step": 11101 }, { "entropy": 1.6798087656497955, "epoch": 1.2196039658345006, "grad_norm": 0.6114014983177185, "learning_rate": 8.281964343231817e-06, "loss": 1.3712, "mean_token_accuracy": 0.6489862948656082, "num_tokens": 1860986779.0, "step": 11102 }, { "entropy": 1.7513733704884846, "epoch": 1.2197138227458735, "grad_norm": 0.6035370826721191, "learning_rate": 8.280438019465885e-06, "loss": 1.4784, "mean_token_accuracy": 0.6494489560524622, "num_tokens": 1861163872.0, "step": 11103 }, { "entropy": 1.7338022689024608, "epoch": 1.2198236796572464, "grad_norm": 0.7351298928260803, "learning_rate": 8.278911781772853e-06, "loss": 1.3004, "mean_token_accuracy": 0.6633716921011606, "num_tokens": 1861310542.0, "step": 11104 }, { "entropy": 1.7447912494341533, "epoch": 1.2199335365686195, "grad_norm": 0.5645570755004883, "learning_rate": 8.277385630201044e-06, "loss": 1.4266, "mean_token_accuracy": 0.6452751606702805, "num_tokens": 1861542153.0, "step": 11105 }, { "entropy": 1.6788997650146484, "epoch": 1.2200433934799924, "grad_norm": 0.7873282432556152, "learning_rate": 8.275859564798753e-06, "loss": 1.5012, "mean_token_accuracy": 0.6227647066116333, "num_tokens": 1861776129.0, "step": 11106 }, { "entropy": 1.7607338031133015, "epoch": 1.2201532503913652, "grad_norm": 0.6690042614936829, "learning_rate": 8.274333585614278e-06, "loss": 1.4915, "mean_token_accuracy": 0.6434455215930939, "num_tokens": 1861967058.0, "step": 11107 }, { "entropy": 1.675374945004781, "epoch": 1.2202631073027381, "grad_norm": 0.6949226260185242, "learning_rate": 8.272807692695915e-06, "loss": 1.3347, "mean_token_accuracy": 0.6678502013285955, "num_tokens": 1862107356.0, "step": 11108 }, { "entropy": 1.6946699917316437, "epoch": 1.220372964214111, "grad_norm": 0.7122815251350403, "learning_rate": 8.271281886091964e-06, "loss": 1.349, "mean_token_accuracy": 0.6702584276596705, "num_tokens": 1862267288.0, "step": 11109 }, { "entropy": 1.6867165565490723, "epoch": 1.2204828211254841, "grad_norm": 0.7338141202926636, "learning_rate": 8.26975616585071e-06, "loss": 1.3542, "mean_token_accuracy": 0.6660454173882803, "num_tokens": 1862425941.0, "step": 11110 }, { "entropy": 1.7361581027507782, "epoch": 1.220592678036857, "grad_norm": 0.7320640087127686, "learning_rate": 8.26823053202044e-06, "loss": 1.412, "mean_token_accuracy": 0.6694223483403524, "num_tokens": 1862582085.0, "step": 11111 }, { "entropy": 1.5928312540054321, "epoch": 1.22070253494823, "grad_norm": 0.5795355439186096, "learning_rate": 8.266704984649448e-06, "loss": 1.2941, "mean_token_accuracy": 0.6710518797238668, "num_tokens": 1862754268.0, "step": 11112 }, { "entropy": 1.790160854657491, "epoch": 1.2208123918596028, "grad_norm": 0.7169445753097534, "learning_rate": 8.265179523786007e-06, "loss": 1.3867, "mean_token_accuracy": 0.655024250348409, "num_tokens": 1862879401.0, "step": 11113 }, { "entropy": 1.7564424475034077, "epoch": 1.2209222487709759, "grad_norm": 0.7270147204399109, "learning_rate": 8.263654149478404e-06, "loss": 1.4396, "mean_token_accuracy": 0.6577896674474081, "num_tokens": 1863038450.0, "step": 11114 }, { "entropy": 1.7379739979902904, "epoch": 1.2210321056823488, "grad_norm": 0.6270740032196045, "learning_rate": 8.262128861774914e-06, "loss": 1.4605, "mean_token_accuracy": 0.6530610223611196, "num_tokens": 1863206326.0, "step": 11115 }, { "entropy": 1.7211223940054576, "epoch": 1.2211419625937217, "grad_norm": 0.7160316109657288, "learning_rate": 8.260603660723809e-06, "loss": 1.3408, "mean_token_accuracy": 0.6656116793553034, "num_tokens": 1863350263.0, "step": 11116 }, { "entropy": 1.7302058239777882, "epoch": 1.2212518195050945, "grad_norm": 0.6913062334060669, "learning_rate": 8.259078546373365e-06, "loss": 1.3812, "mean_token_accuracy": 0.6563667754332224, "num_tokens": 1863492291.0, "step": 11117 }, { "entropy": 1.6666455070177715, "epoch": 1.2213616764164676, "grad_norm": 0.5929701328277588, "learning_rate": 8.257553518771853e-06, "loss": 1.3796, "mean_token_accuracy": 0.6561521291732788, "num_tokens": 1863680842.0, "step": 11118 }, { "entropy": 1.691912164290746, "epoch": 1.2214715333278405, "grad_norm": 0.6996101140975952, "learning_rate": 8.256028577967534e-06, "loss": 1.4237, "mean_token_accuracy": 0.652123952905337, "num_tokens": 1863838255.0, "step": 11119 }, { "entropy": 1.67883962392807, "epoch": 1.2215813902392134, "grad_norm": 0.6681597828865051, "learning_rate": 8.254503724008673e-06, "loss": 1.4035, "mean_token_accuracy": 0.6579047491153082, "num_tokens": 1863976314.0, "step": 11120 }, { "entropy": 1.761623462041219, "epoch": 1.2216912471505863, "grad_norm": 0.6309159398078918, "learning_rate": 8.252978956943536e-06, "loss": 1.5039, "mean_token_accuracy": 0.636713887254397, "num_tokens": 1864175243.0, "step": 11121 }, { "entropy": 1.722734143336614, "epoch": 1.2218011040619592, "grad_norm": 0.5722051858901978, "learning_rate": 8.251454276820372e-06, "loss": 1.259, "mean_token_accuracy": 0.6658644527196884, "num_tokens": 1864306155.0, "step": 11122 }, { "entropy": 1.6461964547634125, "epoch": 1.2219109609733323, "grad_norm": 0.6660195589065552, "learning_rate": 8.249929683687442e-06, "loss": 1.3394, "mean_token_accuracy": 0.669757604598999, "num_tokens": 1864445396.0, "step": 11123 }, { "entropy": 1.677109609047572, "epoch": 1.2220208178847052, "grad_norm": 0.7361236810684204, "learning_rate": 8.248405177593005e-06, "loss": 1.5038, "mean_token_accuracy": 0.6409556319316229, "num_tokens": 1864633037.0, "step": 11124 }, { "entropy": 1.7299526433149974, "epoch": 1.222130674796078, "grad_norm": 0.8260616064071655, "learning_rate": 8.246880758585299e-06, "loss": 1.3426, "mean_token_accuracy": 0.6657137821118037, "num_tokens": 1864758296.0, "step": 11125 }, { "entropy": 1.7556905547777812, "epoch": 1.222240531707451, "grad_norm": 0.9507250785827637, "learning_rate": 8.245356426712577e-06, "loss": 1.5266, "mean_token_accuracy": 0.6378592848777771, "num_tokens": 1864935775.0, "step": 11126 }, { "entropy": 1.6686455806096394, "epoch": 1.222350388618824, "grad_norm": 0.6535077691078186, "learning_rate": 8.243832182023082e-06, "loss": 1.4378, "mean_token_accuracy": 0.6565053512652715, "num_tokens": 1865122257.0, "step": 11127 }, { "entropy": 1.6927911341190338, "epoch": 1.222460245530197, "grad_norm": 0.579563319683075, "learning_rate": 8.242308024565058e-06, "loss": 1.4441, "mean_token_accuracy": 0.6382209062576294, "num_tokens": 1865351730.0, "step": 11128 }, { "entropy": 1.695607751607895, "epoch": 1.2225701024415698, "grad_norm": 0.7512062191963196, "learning_rate": 8.240783954386744e-06, "loss": 1.4194, "mean_token_accuracy": 0.6544724305470785, "num_tokens": 1865476451.0, "step": 11129 }, { "entropy": 1.7077071964740753, "epoch": 1.2226799593529427, "grad_norm": 0.6465796828269958, "learning_rate": 8.239259971536369e-06, "loss": 1.3455, "mean_token_accuracy": 0.6696978360414505, "num_tokens": 1865637091.0, "step": 11130 }, { "entropy": 1.7845915853977203, "epoch": 1.2227898162643158, "grad_norm": 0.7069242000579834, "learning_rate": 8.237736076062176e-06, "loss": 1.4593, "mean_token_accuracy": 0.6528904487689337, "num_tokens": 1865780627.0, "step": 11131 }, { "entropy": 1.643855979045232, "epoch": 1.2228996731756887, "grad_norm": 0.6371172070503235, "learning_rate": 8.23621226801239e-06, "loss": 1.3646, "mean_token_accuracy": 0.6664480765660604, "num_tokens": 1865934135.0, "step": 11132 }, { "entropy": 1.7372296055157979, "epoch": 1.2230095300870616, "grad_norm": 0.8932238817214966, "learning_rate": 8.23468854743524e-06, "loss": 1.533, "mean_token_accuracy": 0.6475943475961685, "num_tokens": 1866099365.0, "step": 11133 }, { "entropy": 1.6458615064620972, "epoch": 1.2231193869984345, "grad_norm": 0.6451045870780945, "learning_rate": 8.233164914378952e-06, "loss": 1.3967, "mean_token_accuracy": 0.661471222837766, "num_tokens": 1866274426.0, "step": 11134 }, { "entropy": 1.7578480541706085, "epoch": 1.2232292439098074, "grad_norm": 0.770330548286438, "learning_rate": 8.231641368891752e-06, "loss": 1.5281, "mean_token_accuracy": 0.6454629898071289, "num_tokens": 1866492310.0, "step": 11135 }, { "entropy": 1.7039073308308919, "epoch": 1.2233391008211805, "grad_norm": 0.7558161020278931, "learning_rate": 8.230117911021849e-06, "loss": 1.2976, "mean_token_accuracy": 0.6695977548758189, "num_tokens": 1866623727.0, "step": 11136 }, { "entropy": 1.674913187821706, "epoch": 1.2234489577325534, "grad_norm": 0.6112053394317627, "learning_rate": 8.228594540817467e-06, "loss": 1.3014, "mean_token_accuracy": 0.6781783352295557, "num_tokens": 1866761880.0, "step": 11137 }, { "entropy": 1.6501458883285522, "epoch": 1.2235588146439262, "grad_norm": 0.6094418168067932, "learning_rate": 8.227071258326823e-06, "loss": 1.4306, "mean_token_accuracy": 0.6528994739055634, "num_tokens": 1867008257.0, "step": 11138 }, { "entropy": 1.7520569463570912, "epoch": 1.2236686715552993, "grad_norm": 0.7769097089767456, "learning_rate": 8.22554806359812e-06, "loss": 1.2928, "mean_token_accuracy": 0.6684681624174118, "num_tokens": 1867113624.0, "step": 11139 }, { "entropy": 1.6467416286468506, "epoch": 1.2237785284666722, "grad_norm": 0.6554421782493591, "learning_rate": 8.224024956679568e-06, "loss": 1.2857, "mean_token_accuracy": 0.66878113647302, "num_tokens": 1867252361.0, "step": 11140 }, { "entropy": 1.75765860080719, "epoch": 1.2238883853780451, "grad_norm": 0.8180747628211975, "learning_rate": 8.222501937619385e-06, "loss": 1.4596, "mean_token_accuracy": 0.6483653237422308, "num_tokens": 1867380976.0, "step": 11141 }, { "entropy": 1.746919463078181, "epoch": 1.223998242289418, "grad_norm": 0.63518887758255, "learning_rate": 8.220979006465755e-06, "loss": 1.4453, "mean_token_accuracy": 0.6484291801850001, "num_tokens": 1867523470.0, "step": 11142 }, { "entropy": 1.7108286619186401, "epoch": 1.2241080992007909, "grad_norm": 0.7541074156761169, "learning_rate": 8.219456163266891e-06, "loss": 1.2723, "mean_token_accuracy": 0.6758786340554556, "num_tokens": 1867627150.0, "step": 11143 }, { "entropy": 1.703292191028595, "epoch": 1.224217956112164, "grad_norm": 0.6642011404037476, "learning_rate": 8.217933408070985e-06, "loss": 1.3838, "mean_token_accuracy": 0.6647111773490906, "num_tokens": 1867821226.0, "step": 11144 }, { "entropy": 1.722439835468928, "epoch": 1.2243278130235369, "grad_norm": 0.7255253791809082, "learning_rate": 8.216410740926235e-06, "loss": 1.5162, "mean_token_accuracy": 0.6462632616360983, "num_tokens": 1868008940.0, "step": 11145 }, { "entropy": 1.7716669142246246, "epoch": 1.2244376699349098, "grad_norm": 0.6904542446136475, "learning_rate": 8.214888161880827e-06, "loss": 1.3692, "mean_token_accuracy": 0.659872904419899, "num_tokens": 1868166158.0, "step": 11146 }, { "entropy": 1.7202934126059215, "epoch": 1.2245475268462827, "grad_norm": 0.6671558022499084, "learning_rate": 8.21336567098296e-06, "loss": 1.2963, "mean_token_accuracy": 0.6699869285027186, "num_tokens": 1868311826.0, "step": 11147 }, { "entropy": 1.684409538904826, "epoch": 1.2246573837576555, "grad_norm": 0.6063627004623413, "learning_rate": 8.211843268280807e-06, "loss": 1.38, "mean_token_accuracy": 0.6571643104155859, "num_tokens": 1868495561.0, "step": 11148 }, { "entropy": 1.6580710808436077, "epoch": 1.2247672406690286, "grad_norm": 0.6459930539131165, "learning_rate": 8.210320953822561e-06, "loss": 1.377, "mean_token_accuracy": 0.6583688110113144, "num_tokens": 1868664866.0, "step": 11149 }, { "entropy": 1.6917679210503895, "epoch": 1.2248770975804015, "grad_norm": 0.8134970664978027, "learning_rate": 8.208798727656404e-06, "loss": 1.3967, "mean_token_accuracy": 0.6652498145898184, "num_tokens": 1868894590.0, "step": 11150 }, { "entropy": 1.6833031276861827, "epoch": 1.2249869544917744, "grad_norm": 0.6595972180366516, "learning_rate": 8.207276589830505e-06, "loss": 1.4866, "mean_token_accuracy": 0.6401710361242294, "num_tokens": 1869068031.0, "step": 11151 }, { "entropy": 1.701577494541804, "epoch": 1.2250968114031475, "grad_norm": 0.6729449033737183, "learning_rate": 8.20575454039304e-06, "loss": 1.4412, "mean_token_accuracy": 0.6557190865278244, "num_tokens": 1869229821.0, "step": 11152 }, { "entropy": 1.7543394267559052, "epoch": 1.2252066683145204, "grad_norm": 0.7445177435874939, "learning_rate": 8.204232579392192e-06, "loss": 1.2951, "mean_token_accuracy": 0.669405405720075, "num_tokens": 1869350127.0, "step": 11153 }, { "entropy": 1.7157810529073079, "epoch": 1.2253165252258933, "grad_norm": 0.712943971157074, "learning_rate": 8.20271070687612e-06, "loss": 1.4664, "mean_token_accuracy": 0.64886274933815, "num_tokens": 1869538791.0, "step": 11154 }, { "entropy": 1.6712701618671417, "epoch": 1.2254263821372662, "grad_norm": 0.6949601769447327, "learning_rate": 8.201188922892994e-06, "loss": 1.3177, "mean_token_accuracy": 0.6590605328480402, "num_tokens": 1869677641.0, "step": 11155 }, { "entropy": 1.6515525877475739, "epoch": 1.225536239048639, "grad_norm": 0.6297810673713684, "learning_rate": 8.199667227490978e-06, "loss": 1.2314, "mean_token_accuracy": 0.6822561621665955, "num_tokens": 1869820121.0, "step": 11156 }, { "entropy": 1.72104745109876, "epoch": 1.2256460959600122, "grad_norm": 0.582068681716919, "learning_rate": 8.198145620718229e-06, "loss": 1.4739, "mean_token_accuracy": 0.6453680694103241, "num_tokens": 1870094410.0, "step": 11157 }, { "entropy": 1.7204219698905945, "epoch": 1.225755952871385, "grad_norm": 0.9038074612617493, "learning_rate": 8.19662410262291e-06, "loss": 1.4717, "mean_token_accuracy": 0.6598606556653976, "num_tokens": 1870217099.0, "step": 11158 }, { "entropy": 1.705298662185669, "epoch": 1.225865809782758, "grad_norm": 0.956987738609314, "learning_rate": 8.195102673253179e-06, "loss": 1.3267, "mean_token_accuracy": 0.674546500047048, "num_tokens": 1870363296.0, "step": 11159 }, { "entropy": 1.7120076020558674, "epoch": 1.2259756666941308, "grad_norm": 0.6030857563018799, "learning_rate": 8.19358133265718e-06, "loss": 1.4096, "mean_token_accuracy": 0.6453298330307007, "num_tokens": 1870546904.0, "step": 11160 }, { "entropy": 1.6742028892040253, "epoch": 1.2260855236055037, "grad_norm": 0.7126300930976868, "learning_rate": 8.192060080883066e-06, "loss": 1.4757, "mean_token_accuracy": 0.6451009213924408, "num_tokens": 1870746388.0, "step": 11161 }, { "entropy": 1.6565779447555542, "epoch": 1.2261953805168768, "grad_norm": 0.6220477223396301, "learning_rate": 8.19053891797899e-06, "loss": 1.5842, "mean_token_accuracy": 0.6242297689119974, "num_tokens": 1870984793.0, "step": 11162 }, { "entropy": 1.7263220647970836, "epoch": 1.2263052374282497, "grad_norm": 0.7451938390731812, "learning_rate": 8.189017843993087e-06, "loss": 1.3359, "mean_token_accuracy": 0.6607558329900106, "num_tokens": 1871172441.0, "step": 11163 }, { "entropy": 1.7222507695357006, "epoch": 1.2264150943396226, "grad_norm": 0.6210897564888, "learning_rate": 8.187496858973504e-06, "loss": 1.4375, "mean_token_accuracy": 0.6482439885536829, "num_tokens": 1871366509.0, "step": 11164 }, { "entropy": 1.6978266040484111, "epoch": 1.2265249512509957, "grad_norm": 0.6470620036125183, "learning_rate": 8.185975962968382e-06, "loss": 1.329, "mean_token_accuracy": 0.6652160336573919, "num_tokens": 1871583409.0, "step": 11165 }, { "entropy": 1.7219670116901398, "epoch": 1.2266348081623686, "grad_norm": 0.6918816566467285, "learning_rate": 8.184455156025849e-06, "loss": 1.5423, "mean_token_accuracy": 0.6453223278125128, "num_tokens": 1871795247.0, "step": 11166 }, { "entropy": 1.655881514151891, "epoch": 1.2267446650737415, "grad_norm": 0.5830437541007996, "learning_rate": 8.182934438194039e-06, "loss": 1.3384, "mean_token_accuracy": 0.6642249425252279, "num_tokens": 1872026942.0, "step": 11167 }, { "entropy": 1.6868476569652557, "epoch": 1.2268545219851144, "grad_norm": 0.7208216190338135, "learning_rate": 8.18141380952109e-06, "loss": 1.4512, "mean_token_accuracy": 0.6498266657193502, "num_tokens": 1872200498.0, "step": 11168 }, { "entropy": 1.7578080296516418, "epoch": 1.2269643788964872, "grad_norm": 0.6857250332832336, "learning_rate": 8.179893270055122e-06, "loss": 1.3811, "mean_token_accuracy": 0.6548460274934769, "num_tokens": 1872368081.0, "step": 11169 }, { "entropy": 1.6785810391108196, "epoch": 1.2270742358078603, "grad_norm": 0.6952616572380066, "learning_rate": 8.178372819844258e-06, "loss": 1.2608, "mean_token_accuracy": 0.6776244093974432, "num_tokens": 1872510704.0, "step": 11170 }, { "entropy": 1.7037639617919922, "epoch": 1.2271840927192332, "grad_norm": 0.7573713064193726, "learning_rate": 8.176852458936628e-06, "loss": 1.4666, "mean_token_accuracy": 0.6548537611961365, "num_tokens": 1872669770.0, "step": 11171 }, { "entropy": 1.6827135582764943, "epoch": 1.2272939496306061, "grad_norm": 0.6930450201034546, "learning_rate": 8.175332187380341e-06, "loss": 1.3069, "mean_token_accuracy": 0.66745425760746, "num_tokens": 1872875315.0, "step": 11172 }, { "entropy": 1.7051290174325306, "epoch": 1.227403806541979, "grad_norm": 0.6210904121398926, "learning_rate": 8.173812005223517e-06, "loss": 1.2625, "mean_token_accuracy": 0.6816918949286143, "num_tokens": 1873033536.0, "step": 11173 }, { "entropy": 1.7081526120503743, "epoch": 1.2275136634533519, "grad_norm": 0.6397086977958679, "learning_rate": 8.172291912514274e-06, "loss": 1.473, "mean_token_accuracy": 0.663579652706782, "num_tokens": 1873186734.0, "step": 11174 }, { "entropy": 1.7037063737710316, "epoch": 1.227623520364725, "grad_norm": 0.6888397336006165, "learning_rate": 8.170771909300716e-06, "loss": 1.5498, "mean_token_accuracy": 0.6241617798805237, "num_tokens": 1873388660.0, "step": 11175 }, { "entropy": 1.6468111673990886, "epoch": 1.2277333772760979, "grad_norm": 0.6850365996360779, "learning_rate": 8.169251995630948e-06, "loss": 1.3269, "mean_token_accuracy": 0.6640166540940603, "num_tokens": 1873634673.0, "step": 11176 }, { "entropy": 1.7099710702896118, "epoch": 1.2278432341874708, "grad_norm": 0.6371767520904541, "learning_rate": 8.167732171553088e-06, "loss": 1.341, "mean_token_accuracy": 0.6740860641002655, "num_tokens": 1873780882.0, "step": 11177 }, { "entropy": 1.7181882460912068, "epoch": 1.2279530910988439, "grad_norm": 0.7756669521331787, "learning_rate": 8.166212437115221e-06, "loss": 1.403, "mean_token_accuracy": 0.6458031634489695, "num_tokens": 1873933442.0, "step": 11178 }, { "entropy": 1.6694627106189728, "epoch": 1.2280629480102168, "grad_norm": 0.8991198539733887, "learning_rate": 8.164692792365456e-06, "loss": 1.3021, "mean_token_accuracy": 0.6779783020416895, "num_tokens": 1874056969.0, "step": 11179 }, { "entropy": 1.7401012182235718, "epoch": 1.2281728049215896, "grad_norm": 0.7417164444923401, "learning_rate": 8.163173237351887e-06, "loss": 1.434, "mean_token_accuracy": 0.6551551967859268, "num_tokens": 1874216033.0, "step": 11180 }, { "entropy": 1.7963026364644368, "epoch": 1.2282826618329625, "grad_norm": 0.7083638310432434, "learning_rate": 8.161653772122607e-06, "loss": 1.4688, "mean_token_accuracy": 0.6413846760988235, "num_tokens": 1874412535.0, "step": 11181 }, { "entropy": 1.6756743987401326, "epoch": 1.2283925187443354, "grad_norm": 0.6325013637542725, "learning_rate": 8.1601343967257e-06, "loss": 1.3387, "mean_token_accuracy": 0.6656516889731089, "num_tokens": 1874560478.0, "step": 11182 }, { "entropy": 1.6859131356080372, "epoch": 1.2285023756557085, "grad_norm": 0.6860812306404114, "learning_rate": 8.15861511120927e-06, "loss": 1.3272, "mean_token_accuracy": 0.6739522715409597, "num_tokens": 1874709470.0, "step": 11183 }, { "entropy": 1.7304639220237732, "epoch": 1.2286122325670814, "grad_norm": 1.0082952976226807, "learning_rate": 8.157095915621382e-06, "loss": 1.5461, "mean_token_accuracy": 0.6445205509662628, "num_tokens": 1874878019.0, "step": 11184 }, { "entropy": 1.7110270063082378, "epoch": 1.2287220894784543, "grad_norm": 0.6818872690200806, "learning_rate": 8.155576810010131e-06, "loss": 1.6461, "mean_token_accuracy": 0.6105376332998276, "num_tokens": 1875092932.0, "step": 11185 }, { "entropy": 1.6794796387354534, "epoch": 1.2288319463898272, "grad_norm": 0.5344785451889038, "learning_rate": 8.154057794423595e-06, "loss": 1.3906, "mean_token_accuracy": 0.6523840377728144, "num_tokens": 1875304235.0, "step": 11186 }, { "entropy": 1.635409543911616, "epoch": 1.2289418033012, "grad_norm": 0.5482689738273621, "learning_rate": 8.152538868909846e-06, "loss": 1.4172, "mean_token_accuracy": 0.6563707540432612, "num_tokens": 1875523446.0, "step": 11187 }, { "entropy": 1.7209465603033702, "epoch": 1.2290516602125732, "grad_norm": 0.8221262693405151, "learning_rate": 8.151020033516957e-06, "loss": 1.4545, "mean_token_accuracy": 0.6479563862085342, "num_tokens": 1875685022.0, "step": 11188 }, { "entropy": 1.7244952420393627, "epoch": 1.229161517123946, "grad_norm": 0.7386845350265503, "learning_rate": 8.149501288293e-06, "loss": 1.3956, "mean_token_accuracy": 0.6560295174519221, "num_tokens": 1875850083.0, "step": 11189 }, { "entropy": 1.6882909337679546, "epoch": 1.229271374035319, "grad_norm": 0.5964418649673462, "learning_rate": 8.147982633286043e-06, "loss": 1.4545, "mean_token_accuracy": 0.6443512588739395, "num_tokens": 1876051887.0, "step": 11190 }, { "entropy": 1.7120668391386669, "epoch": 1.229381230946692, "grad_norm": 0.7184486389160156, "learning_rate": 8.146464068544153e-06, "loss": 1.4313, "mean_token_accuracy": 0.6619121432304382, "num_tokens": 1876193353.0, "step": 11191 }, { "entropy": 1.715154270331065, "epoch": 1.229491087858065, "grad_norm": 0.7384195923805237, "learning_rate": 8.144945594115386e-06, "loss": 1.4344, "mean_token_accuracy": 0.6491910715897878, "num_tokens": 1876348250.0, "step": 11192 }, { "entropy": 1.7401968638102214, "epoch": 1.2296009447694378, "grad_norm": 0.7795338034629822, "learning_rate": 8.143427210047806e-06, "loss": 1.3542, "mean_token_accuracy": 0.6642808963855108, "num_tokens": 1876487873.0, "step": 11193 }, { "entropy": 1.6769267618656158, "epoch": 1.2297108016808107, "grad_norm": 0.7037333846092224, "learning_rate": 8.14190891638947e-06, "loss": 1.2072, "mean_token_accuracy": 0.681295191248258, "num_tokens": 1876634540.0, "step": 11194 }, { "entropy": 1.7220933934052784, "epoch": 1.2298206585921836, "grad_norm": 0.7242723107337952, "learning_rate": 8.140390713188425e-06, "loss": 1.3561, "mean_token_accuracy": 0.6651198863983154, "num_tokens": 1876792228.0, "step": 11195 }, { "entropy": 1.6575371026992798, "epoch": 1.2299305155035567, "grad_norm": 0.5735102295875549, "learning_rate": 8.138872600492725e-06, "loss": 1.4878, "mean_token_accuracy": 0.644407923022906, "num_tokens": 1877007455.0, "step": 11196 }, { "entropy": 1.711806943019231, "epoch": 1.2300403724149296, "grad_norm": 0.5998605489730835, "learning_rate": 8.137354578350422e-06, "loss": 1.5256, "mean_token_accuracy": 0.6423748483260473, "num_tokens": 1877208212.0, "step": 11197 }, { "entropy": 1.6850987871487935, "epoch": 1.2301502293263025, "grad_norm": 0.5823908448219299, "learning_rate": 8.135836646809552e-06, "loss": 1.3704, "mean_token_accuracy": 0.658658762772878, "num_tokens": 1877387686.0, "step": 11198 }, { "entropy": 1.7524065176645915, "epoch": 1.2302600862376754, "grad_norm": 0.6456050872802734, "learning_rate": 8.134318805918161e-06, "loss": 1.4089, "mean_token_accuracy": 0.6498519033193588, "num_tokens": 1877539996.0, "step": 11199 }, { "entropy": 1.786929150422414, "epoch": 1.2303699431490482, "grad_norm": 0.7778921723365784, "learning_rate": 8.132801055724296e-06, "loss": 1.6354, "mean_token_accuracy": 0.6156754593054453, "num_tokens": 1877784918.0, "step": 11200 }, { "entropy": 1.7587116559346516, "epoch": 1.2304798000604213, "grad_norm": 0.7484762668609619, "learning_rate": 8.13128339627598e-06, "loss": 1.3407, "mean_token_accuracy": 0.6659899353981018, "num_tokens": 1877989211.0, "step": 11201 }, { "entropy": 1.6641751329104106, "epoch": 1.2305896569717942, "grad_norm": 0.8541742563247681, "learning_rate": 8.12976582762125e-06, "loss": 1.4287, "mean_token_accuracy": 0.6577843030293783, "num_tokens": 1878183278.0, "step": 11202 }, { "entropy": 1.6898448566595714, "epoch": 1.2306995138831671, "grad_norm": 0.7239437103271484, "learning_rate": 8.128248349808143e-06, "loss": 1.2639, "mean_token_accuracy": 0.6711952984333038, "num_tokens": 1878308690.0, "step": 11203 }, { "entropy": 1.6802580654621124, "epoch": 1.2308093707945402, "grad_norm": 0.6512843370437622, "learning_rate": 8.12673096288468e-06, "loss": 1.5386, "mean_token_accuracy": 0.6389039307832718, "num_tokens": 1878465206.0, "step": 11204 }, { "entropy": 1.724786251783371, "epoch": 1.230919227705913, "grad_norm": 0.7077043652534485, "learning_rate": 8.125213666898886e-06, "loss": 1.3819, "mean_token_accuracy": 0.6506198197603226, "num_tokens": 1878611726.0, "step": 11205 }, { "entropy": 1.7447443306446075, "epoch": 1.231029084617286, "grad_norm": 0.6581472754478455, "learning_rate": 8.123696461898785e-06, "loss": 1.318, "mean_token_accuracy": 0.6526310493548712, "num_tokens": 1878761057.0, "step": 11206 }, { "entropy": 1.741285651922226, "epoch": 1.2311389415286589, "grad_norm": 0.7155635356903076, "learning_rate": 8.122179347932396e-06, "loss": 1.5159, "mean_token_accuracy": 0.6463326240579287, "num_tokens": 1878956011.0, "step": 11207 }, { "entropy": 1.7428237795829773, "epoch": 1.2312487984400318, "grad_norm": 0.7881234288215637, "learning_rate": 8.12066232504773e-06, "loss": 1.5306, "mean_token_accuracy": 0.6414182931184769, "num_tokens": 1879152697.0, "step": 11208 }, { "entropy": 1.6375042895476024, "epoch": 1.2313586553514049, "grad_norm": 0.622815728187561, "learning_rate": 8.119145393292808e-06, "loss": 1.3191, "mean_token_accuracy": 0.6815162648757299, "num_tokens": 1879303949.0, "step": 11209 }, { "entropy": 1.6750175754229228, "epoch": 1.2314685122627778, "grad_norm": 0.6067901253700256, "learning_rate": 8.117628552715636e-06, "loss": 1.4323, "mean_token_accuracy": 0.6625420202811559, "num_tokens": 1879486772.0, "step": 11210 }, { "entropy": 1.7117481927076976, "epoch": 1.2315783691741506, "grad_norm": 0.6861073970794678, "learning_rate": 8.116111803364218e-06, "loss": 1.325, "mean_token_accuracy": 0.6644338915745417, "num_tokens": 1879628385.0, "step": 11211 }, { "entropy": 1.7356761197249095, "epoch": 1.2316882260855235, "grad_norm": 0.7062935829162598, "learning_rate": 8.114595145286565e-06, "loss": 1.3774, "mean_token_accuracy": 0.6549742966890335, "num_tokens": 1879808474.0, "step": 11212 }, { "entropy": 1.6221475005149841, "epoch": 1.2317980829968966, "grad_norm": 0.7179040908813477, "learning_rate": 8.113078578530676e-06, "loss": 1.4577, "mean_token_accuracy": 0.6649397065242132, "num_tokens": 1879965479.0, "step": 11213 }, { "entropy": 1.6686547497908275, "epoch": 1.2319079399082695, "grad_norm": 0.6317336559295654, "learning_rate": 8.111562103144543e-06, "loss": 1.3409, "mean_token_accuracy": 0.6715045968691508, "num_tokens": 1880102350.0, "step": 11214 }, { "entropy": 1.6721225877602894, "epoch": 1.2320177968196424, "grad_norm": 0.6598741412162781, "learning_rate": 8.110045719176178e-06, "loss": 1.4653, "mean_token_accuracy": 0.650575632850329, "num_tokens": 1880285478.0, "step": 11215 }, { "entropy": 1.7288126051425934, "epoch": 1.2321276537310153, "grad_norm": 0.7576711773872375, "learning_rate": 8.108529426673555e-06, "loss": 1.4545, "mean_token_accuracy": 0.6471219807863235, "num_tokens": 1880471549.0, "step": 11216 }, { "entropy": 1.749824732542038, "epoch": 1.2322375106423884, "grad_norm": 0.7423568367958069, "learning_rate": 8.107013225684678e-06, "loss": 1.3571, "mean_token_accuracy": 0.6579922884702682, "num_tokens": 1880652106.0, "step": 11217 }, { "entropy": 1.6819796562194824, "epoch": 1.2323473675537613, "grad_norm": 0.7285211682319641, "learning_rate": 8.105497116257526e-06, "loss": 1.3604, "mean_token_accuracy": 0.6721114267905554, "num_tokens": 1880810988.0, "step": 11218 }, { "entropy": 1.7526112000147502, "epoch": 1.2324572244651342, "grad_norm": 0.7563691139221191, "learning_rate": 8.103981098440087e-06, "loss": 1.405, "mean_token_accuracy": 0.668835868438085, "num_tokens": 1880927999.0, "step": 11219 }, { "entropy": 1.678017516930898, "epoch": 1.232567081376507, "grad_norm": 0.7735137939453125, "learning_rate": 8.10246517228034e-06, "loss": 1.3019, "mean_token_accuracy": 0.675841843088468, "num_tokens": 1881086954.0, "step": 11220 }, { "entropy": 1.6915496389071147, "epoch": 1.23267693828788, "grad_norm": 0.6371824145317078, "learning_rate": 8.100949337826267e-06, "loss": 1.3869, "mean_token_accuracy": 0.6606535166501999, "num_tokens": 1881251528.0, "step": 11221 }, { "entropy": 1.6786122421423595, "epoch": 1.232786795199253, "grad_norm": 0.7070814967155457, "learning_rate": 8.099433595125838e-06, "loss": 1.3486, "mean_token_accuracy": 0.6716959228118261, "num_tokens": 1881373248.0, "step": 11222 }, { "entropy": 1.7694110969702403, "epoch": 1.232896652110626, "grad_norm": 0.6588417887687683, "learning_rate": 8.097917944227031e-06, "loss": 1.4997, "mean_token_accuracy": 0.6302092870076498, "num_tokens": 1881541933.0, "step": 11223 }, { "entropy": 1.689767171939214, "epoch": 1.2330065090219988, "grad_norm": 0.8221830129623413, "learning_rate": 8.096402385177816e-06, "loss": 1.4524, "mean_token_accuracy": 0.6705189446608225, "num_tokens": 1881695653.0, "step": 11224 }, { "entropy": 1.7141635119915009, "epoch": 1.2331163659333717, "grad_norm": 0.6804819107055664, "learning_rate": 8.094886918026153e-06, "loss": 1.305, "mean_token_accuracy": 0.6599769194920858, "num_tokens": 1881827348.0, "step": 11225 }, { "entropy": 1.6513873438040416, "epoch": 1.2332262228447448, "grad_norm": 0.6210925579071045, "learning_rate": 8.093371542820007e-06, "loss": 1.3637, "mean_token_accuracy": 0.6525876174370447, "num_tokens": 1881985777.0, "step": 11226 }, { "entropy": 1.683081477880478, "epoch": 1.2333360797561177, "grad_norm": 0.7101804614067078, "learning_rate": 8.09185625960735e-06, "loss": 1.2141, "mean_token_accuracy": 0.6824707140525182, "num_tokens": 1882099438.0, "step": 11227 }, { "entropy": 1.7311066389083862, "epoch": 1.2334459366674906, "grad_norm": 0.7459114789962769, "learning_rate": 8.090341068436125e-06, "loss": 1.3144, "mean_token_accuracy": 0.6670710841814677, "num_tokens": 1882267699.0, "step": 11228 }, { "entropy": 1.6882832149664562, "epoch": 1.2335557935788635, "grad_norm": 0.714763879776001, "learning_rate": 8.088825969354298e-06, "loss": 1.2732, "mean_token_accuracy": 0.6782094736893972, "num_tokens": 1882453057.0, "step": 11229 }, { "entropy": 1.684934099515279, "epoch": 1.2336656504902366, "grad_norm": 0.638083279132843, "learning_rate": 8.087310962409818e-06, "loss": 1.341, "mean_token_accuracy": 0.661807561914126, "num_tokens": 1882603582.0, "step": 11230 }, { "entropy": 1.7063041031360626, "epoch": 1.2337755074016095, "grad_norm": 0.6284477710723877, "learning_rate": 8.085796047650632e-06, "loss": 1.5387, "mean_token_accuracy": 0.6299227277437845, "num_tokens": 1882851492.0, "step": 11231 }, { "entropy": 1.7208701372146606, "epoch": 1.2338853643129823, "grad_norm": 0.7093353867530823, "learning_rate": 8.084281225124684e-06, "loss": 1.4154, "mean_token_accuracy": 0.6541569431622823, "num_tokens": 1882989394.0, "step": 11232 }, { "entropy": 1.798000564177831, "epoch": 1.2339952212243552, "grad_norm": 0.834276556968689, "learning_rate": 8.082766494879928e-06, "loss": 1.5977, "mean_token_accuracy": 0.6478389153877894, "num_tokens": 1883119346.0, "step": 11233 }, { "entropy": 1.7449569801489513, "epoch": 1.2341050781357281, "grad_norm": 0.6619470715522766, "learning_rate": 8.081251856964291e-06, "loss": 1.3306, "mean_token_accuracy": 0.6545126388470331, "num_tokens": 1883247560.0, "step": 11234 }, { "entropy": 1.7160409688949585, "epoch": 1.2342149350471012, "grad_norm": 0.736487865447998, "learning_rate": 8.079737311425723e-06, "loss": 1.4371, "mean_token_accuracy": 0.6482950200637182, "num_tokens": 1883402069.0, "step": 11235 }, { "entropy": 1.7530849079291027, "epoch": 1.234324791958474, "grad_norm": 0.8390946984291077, "learning_rate": 8.078222858312152e-06, "loss": 1.504, "mean_token_accuracy": 0.6466073642174403, "num_tokens": 1883577023.0, "step": 11236 }, { "entropy": 1.7000373403231304, "epoch": 1.234434648869847, "grad_norm": 0.6646814942359924, "learning_rate": 8.07670849767151e-06, "loss": 1.2625, "mean_token_accuracy": 0.6718258758385977, "num_tokens": 1883700619.0, "step": 11237 }, { "entropy": 1.745482623577118, "epoch": 1.2345445057812199, "grad_norm": 0.6695995330810547, "learning_rate": 8.075194229551726e-06, "loss": 1.3949, "mean_token_accuracy": 0.6459067513545355, "num_tokens": 1883864316.0, "step": 11238 }, { "entropy": 1.6707193851470947, "epoch": 1.234654362692593, "grad_norm": 0.6652836799621582, "learning_rate": 8.073680054000733e-06, "loss": 1.497, "mean_token_accuracy": 0.6416679819424947, "num_tokens": 1884073039.0, "step": 11239 }, { "entropy": 1.73182346423467, "epoch": 1.2347642196039659, "grad_norm": 0.7183116674423218, "learning_rate": 8.07216597106644e-06, "loss": 1.3477, "mean_token_accuracy": 0.6592358897129694, "num_tokens": 1884211467.0, "step": 11240 }, { "entropy": 1.6830492317676544, "epoch": 1.2348740765153388, "grad_norm": 0.6599522233009338, "learning_rate": 8.070651980796775e-06, "loss": 1.4568, "mean_token_accuracy": 0.6476858655611674, "num_tokens": 1884378829.0, "step": 11241 }, { "entropy": 1.686943491299947, "epoch": 1.2349839334267116, "grad_norm": 0.607049286365509, "learning_rate": 8.06913808323966e-06, "loss": 1.5163, "mean_token_accuracy": 0.6283295204242071, "num_tokens": 1884603951.0, "step": 11242 }, { "entropy": 1.705921232700348, "epoch": 1.2350937903380848, "grad_norm": 0.7713742852210999, "learning_rate": 8.067624278443e-06, "loss": 1.4968, "mean_token_accuracy": 0.6453157613674799, "num_tokens": 1884801772.0, "step": 11243 }, { "entropy": 1.751990258693695, "epoch": 1.2352036472494576, "grad_norm": 0.8362163305282593, "learning_rate": 8.06611056645471e-06, "loss": 1.446, "mean_token_accuracy": 0.653899297118187, "num_tokens": 1884960483.0, "step": 11244 }, { "entropy": 1.643377035856247, "epoch": 1.2353135041608305, "grad_norm": 0.5995488166809082, "learning_rate": 8.064596947322703e-06, "loss": 1.429, "mean_token_accuracy": 0.6459860801696777, "num_tokens": 1885182089.0, "step": 11245 }, { "entropy": 1.6707975268363953, "epoch": 1.2354233610722034, "grad_norm": 0.6349611282348633, "learning_rate": 8.063083421094875e-06, "loss": 1.3166, "mean_token_accuracy": 0.6706758588552475, "num_tokens": 1885352722.0, "step": 11246 }, { "entropy": 1.73516180117925, "epoch": 1.2355332179835763, "grad_norm": 0.6147273182868958, "learning_rate": 8.061569987819138e-06, "loss": 1.4639, "mean_token_accuracy": 0.6467922131220499, "num_tokens": 1885566345.0, "step": 11247 }, { "entropy": 1.716229885816574, "epoch": 1.2356430748949494, "grad_norm": 0.7673629522323608, "learning_rate": 8.060056647543382e-06, "loss": 1.4798, "mean_token_accuracy": 0.6467408984899521, "num_tokens": 1885762755.0, "step": 11248 }, { "entropy": 1.7514924108982086, "epoch": 1.2357529318063223, "grad_norm": 0.7376429438591003, "learning_rate": 8.058543400315511e-06, "loss": 1.336, "mean_token_accuracy": 0.6571770658095678, "num_tokens": 1885914416.0, "step": 11249 }, { "entropy": 1.7116054991881053, "epoch": 1.2358627887176952, "grad_norm": 1.5102351903915405, "learning_rate": 8.057030246183416e-06, "loss": 1.6694, "mean_token_accuracy": 0.6353745808204015, "num_tokens": 1886111948.0, "step": 11250 }, { "entropy": 1.6769183973471324, "epoch": 1.235972645629068, "grad_norm": 0.9520527720451355, "learning_rate": 8.055517185194988e-06, "loss": 1.3784, "mean_token_accuracy": 0.6644560744365057, "num_tokens": 1886245502.0, "step": 11251 }, { "entropy": 1.7583904763062794, "epoch": 1.2360825025404412, "grad_norm": 0.810713529586792, "learning_rate": 8.054004217398108e-06, "loss": 1.4918, "mean_token_accuracy": 0.6517674972613653, "num_tokens": 1886405065.0, "step": 11252 }, { "entropy": 1.6664839486281078, "epoch": 1.236192359451814, "grad_norm": 0.6370511651039124, "learning_rate": 8.052491342840677e-06, "loss": 1.3647, "mean_token_accuracy": 0.6623385399580002, "num_tokens": 1886553072.0, "step": 11253 }, { "entropy": 1.737219403187434, "epoch": 1.236302216363187, "grad_norm": 0.7130185961723328, "learning_rate": 8.05097856157056e-06, "loss": 1.3521, "mean_token_accuracy": 0.6642791330814362, "num_tokens": 1886689924.0, "step": 11254 }, { "entropy": 1.7893791596094768, "epoch": 1.2364120732745598, "grad_norm": 0.6922145485877991, "learning_rate": 8.049465873635644e-06, "loss": 1.4279, "mean_token_accuracy": 0.6482027868429819, "num_tokens": 1886819576.0, "step": 11255 }, { "entropy": 1.7212933500607808, "epoch": 1.236521930185933, "grad_norm": 0.6548290848731995, "learning_rate": 8.047953279083805e-06, "loss": 1.4333, "mean_token_accuracy": 0.650567352771759, "num_tokens": 1887004217.0, "step": 11256 }, { "entropy": 1.7187994420528412, "epoch": 1.2366317870973058, "grad_norm": 0.6271427273750305, "learning_rate": 8.046440777962914e-06, "loss": 1.4241, "mean_token_accuracy": 0.6418495823939642, "num_tokens": 1887209350.0, "step": 11257 }, { "entropy": 1.7576852043469746, "epoch": 1.2367416440086787, "grad_norm": 0.7661997675895691, "learning_rate": 8.044928370320837e-06, "loss": 1.5276, "mean_token_accuracy": 0.6394319285949072, "num_tokens": 1887413418.0, "step": 11258 }, { "entropy": 1.7255164881547291, "epoch": 1.2368515009200516, "grad_norm": 0.7709239721298218, "learning_rate": 8.043416056205453e-06, "loss": 1.4078, "mean_token_accuracy": 0.6649090001980463, "num_tokens": 1887560944.0, "step": 11259 }, { "entropy": 1.692698359489441, "epoch": 1.2369613578314245, "grad_norm": 0.8086570501327515, "learning_rate": 8.041903835664615e-06, "loss": 1.4357, "mean_token_accuracy": 0.6539787004391352, "num_tokens": 1887718710.0, "step": 11260 }, { "entropy": 1.7619259258111317, "epoch": 1.2370712147427976, "grad_norm": 0.7824429869651794, "learning_rate": 8.040391708746186e-06, "loss": 1.6336, "mean_token_accuracy": 0.6352614412705103, "num_tokens": 1887896912.0, "step": 11261 }, { "entropy": 1.6718494693438213, "epoch": 1.2371810716541705, "grad_norm": 0.7123764157295227, "learning_rate": 8.038879675498031e-06, "loss": 1.3994, "mean_token_accuracy": 0.6606058677037557, "num_tokens": 1888048701.0, "step": 11262 }, { "entropy": 1.6848260561625164, "epoch": 1.2372909285655433, "grad_norm": 0.7577449679374695, "learning_rate": 8.037367735967995e-06, "loss": 1.522, "mean_token_accuracy": 0.6498318860928217, "num_tokens": 1888223514.0, "step": 11263 }, { "entropy": 1.6410026550292969, "epoch": 1.2374007854769162, "grad_norm": 0.6510109901428223, "learning_rate": 8.035855890203934e-06, "loss": 1.4112, "mean_token_accuracy": 0.6637563705444336, "num_tokens": 1888407644.0, "step": 11264 }, { "entropy": 1.7332176466782887, "epoch": 1.2375106423882893, "grad_norm": 0.791204571723938, "learning_rate": 8.034344138253704e-06, "loss": 1.2795, "mean_token_accuracy": 0.6705978065729141, "num_tokens": 1888559368.0, "step": 11265 }, { "entropy": 1.7388821343580882, "epoch": 1.2376204992996622, "grad_norm": 0.7957805395126343, "learning_rate": 8.03283248016514e-06, "loss": 1.4571, "mean_token_accuracy": 0.6462946683168411, "num_tokens": 1888761523.0, "step": 11266 }, { "entropy": 1.749239871899287, "epoch": 1.237730356211035, "grad_norm": 0.7862349152565002, "learning_rate": 8.031320915986093e-06, "loss": 1.2856, "mean_token_accuracy": 0.6639771660168966, "num_tokens": 1888914049.0, "step": 11267 }, { "entropy": 1.637138585249583, "epoch": 1.237840213122408, "grad_norm": 0.6658058762550354, "learning_rate": 8.029809445764404e-06, "loss": 1.3354, "mean_token_accuracy": 0.661085252960523, "num_tokens": 1889102287.0, "step": 11268 }, { "entropy": 1.769773135582606, "epoch": 1.237950070033781, "grad_norm": 0.8036999702453613, "learning_rate": 8.028298069547907e-06, "loss": 1.5082, "mean_token_accuracy": 0.6557409813006719, "num_tokens": 1889224418.0, "step": 11269 }, { "entropy": 1.7286994357903798, "epoch": 1.238059926945154, "grad_norm": 0.7675381898880005, "learning_rate": 8.02678678738443e-06, "loss": 1.5319, "mean_token_accuracy": 0.6377677967151006, "num_tokens": 1889439985.0, "step": 11270 }, { "entropy": 1.681524654229482, "epoch": 1.2381697838565269, "grad_norm": 0.702340304851532, "learning_rate": 8.025275599321825e-06, "loss": 1.3766, "mean_token_accuracy": 0.6608841866254807, "num_tokens": 1889599434.0, "step": 11271 }, { "entropy": 1.6762547592322032, "epoch": 1.2382796407678998, "grad_norm": 0.6304272413253784, "learning_rate": 8.023764505407894e-06, "loss": 1.4182, "mean_token_accuracy": 0.6476298222939173, "num_tokens": 1889807142.0, "step": 11272 }, { "entropy": 1.7101092040538788, "epoch": 1.2383894976792726, "grad_norm": 0.675635814666748, "learning_rate": 8.02225350569048e-06, "loss": 1.3755, "mean_token_accuracy": 0.6752298523982366, "num_tokens": 1889984200.0, "step": 11273 }, { "entropy": 1.7226931552092235, "epoch": 1.2384993545906458, "grad_norm": 0.7080081701278687, "learning_rate": 8.020742600217403e-06, "loss": 1.4787, "mean_token_accuracy": 0.6550223429997762, "num_tokens": 1890184448.0, "step": 11274 }, { "entropy": 1.723098615805308, "epoch": 1.2386092115020186, "grad_norm": 0.7677369713783264, "learning_rate": 8.019231789036477e-06, "loss": 1.4064, "mean_token_accuracy": 0.6541879673798879, "num_tokens": 1890351047.0, "step": 11275 }, { "entropy": 1.6581893960634868, "epoch": 1.2387190684133915, "grad_norm": 0.6451082229614258, "learning_rate": 8.017721072195522e-06, "loss": 1.5279, "mean_token_accuracy": 0.6365671356519064, "num_tokens": 1890543514.0, "step": 11276 }, { "entropy": 1.7340434888998668, "epoch": 1.2388289253247644, "grad_norm": 0.7805740237236023, "learning_rate": 8.016210449742354e-06, "loss": 1.3005, "mean_token_accuracy": 0.6726719886064529, "num_tokens": 1890661486.0, "step": 11277 }, { "entropy": 1.6500552793343861, "epoch": 1.2389387822361375, "grad_norm": 0.658091127872467, "learning_rate": 8.014699921724777e-06, "loss": 1.424, "mean_token_accuracy": 0.6632284422715505, "num_tokens": 1890861914.0, "step": 11278 }, { "entropy": 1.768718143304189, "epoch": 1.2390486391475104, "grad_norm": 0.6783964037895203, "learning_rate": 8.013189488190605e-06, "loss": 1.4826, "mean_token_accuracy": 0.6467755486567816, "num_tokens": 1891080586.0, "step": 11279 }, { "entropy": 1.7763389150301616, "epoch": 1.2391584960588833, "grad_norm": 0.688116729259491, "learning_rate": 8.01167914918764e-06, "loss": 1.5137, "mean_token_accuracy": 0.6396682957808176, "num_tokens": 1891230652.0, "step": 11280 }, { "entropy": 1.7367208699385326, "epoch": 1.2392683529702562, "grad_norm": 0.688133180141449, "learning_rate": 8.010168904763681e-06, "loss": 1.5204, "mean_token_accuracy": 0.6416043788194656, "num_tokens": 1891411181.0, "step": 11281 }, { "entropy": 1.717280815045039, "epoch": 1.2393782098816293, "grad_norm": 0.5666574239730835, "learning_rate": 8.008658754966527e-06, "loss": 1.3223, "mean_token_accuracy": 0.6569070219993591, "num_tokens": 1891627961.0, "step": 11282 }, { "entropy": 1.6722883383433025, "epoch": 1.2394880667930022, "grad_norm": 0.5551705360412598, "learning_rate": 8.007148699843982e-06, "loss": 1.4714, "mean_token_accuracy": 0.6321922043959299, "num_tokens": 1891858469.0, "step": 11283 }, { "entropy": 1.6376002232233684, "epoch": 1.239597923704375, "grad_norm": 0.8324296474456787, "learning_rate": 8.00563873944383e-06, "loss": 1.3256, "mean_token_accuracy": 0.6628724733988444, "num_tokens": 1892054586.0, "step": 11284 }, { "entropy": 1.7216839094956715, "epoch": 1.239707780615748, "grad_norm": 0.6770010590553284, "learning_rate": 8.004128873813859e-06, "loss": 1.3593, "mean_token_accuracy": 0.655941034356753, "num_tokens": 1892219818.0, "step": 11285 }, { "entropy": 1.6371920903523762, "epoch": 1.2398176375271208, "grad_norm": 0.6165127158164978, "learning_rate": 8.002619103001863e-06, "loss": 1.4678, "mean_token_accuracy": 0.6445280561844507, "num_tokens": 1892423750.0, "step": 11286 }, { "entropy": 1.716288646062215, "epoch": 1.239927494438494, "grad_norm": 0.6745500564575195, "learning_rate": 8.00110942705562e-06, "loss": 1.3397, "mean_token_accuracy": 0.675658643245697, "num_tokens": 1892583215.0, "step": 11287 }, { "entropy": 1.7311961750189464, "epoch": 1.2400373513498668, "grad_norm": 0.681602954864502, "learning_rate": 7.999599846022909e-06, "loss": 1.3579, "mean_token_accuracy": 0.665436198314031, "num_tokens": 1892769578.0, "step": 11288 }, { "entropy": 1.6320242981115978, "epoch": 1.2401472082612397, "grad_norm": 0.6943032741546631, "learning_rate": 7.998090359951518e-06, "loss": 1.3245, "mean_token_accuracy": 0.6648548195759455, "num_tokens": 1892920874.0, "step": 11289 }, { "entropy": 1.7329721252123516, "epoch": 1.2402570651726126, "grad_norm": 0.6341266632080078, "learning_rate": 7.996580968889209e-06, "loss": 1.4349, "mean_token_accuracy": 0.6397968182961146, "num_tokens": 1893096196.0, "step": 11290 }, { "entropy": 1.715551386276881, "epoch": 1.2403669220839857, "grad_norm": 0.6576389670372009, "learning_rate": 7.99507167288376e-06, "loss": 1.3832, "mean_token_accuracy": 0.6513058344523112, "num_tokens": 1893260174.0, "step": 11291 }, { "entropy": 1.7113690475622814, "epoch": 1.2404767789953586, "grad_norm": 0.653464138507843, "learning_rate": 7.99356247198294e-06, "loss": 1.5356, "mean_token_accuracy": 0.6355665028095245, "num_tokens": 1893463782.0, "step": 11292 }, { "entropy": 1.7006126741568248, "epoch": 1.2405866359067315, "grad_norm": 3.044800043106079, "learning_rate": 7.992053366234513e-06, "loss": 1.2922, "mean_token_accuracy": 0.660791665315628, "num_tokens": 1893671388.0, "step": 11293 }, { "entropy": 1.662010023991267, "epoch": 1.2406964928181043, "grad_norm": 0.680200457572937, "learning_rate": 7.990544355686239e-06, "loss": 1.4566, "mean_token_accuracy": 0.6599440028270086, "num_tokens": 1893824239.0, "step": 11294 }, { "entropy": 1.6833816369374592, "epoch": 1.2408063497294775, "grad_norm": 0.6722885966300964, "learning_rate": 7.989035440385885e-06, "loss": 1.5087, "mean_token_accuracy": 0.6377679258584976, "num_tokens": 1894057681.0, "step": 11295 }, { "entropy": 1.7269285221894581, "epoch": 1.2409162066408503, "grad_norm": 0.8462622165679932, "learning_rate": 7.987526620381197e-06, "loss": 1.5224, "mean_token_accuracy": 0.6455184866984686, "num_tokens": 1894213333.0, "step": 11296 }, { "entropy": 1.720696081717809, "epoch": 1.2410260635522232, "grad_norm": 0.7249704599380493, "learning_rate": 7.986017895719934e-06, "loss": 1.3996, "mean_token_accuracy": 0.6568809896707535, "num_tokens": 1894371395.0, "step": 11297 }, { "entropy": 1.6835100750128429, "epoch": 1.241135920463596, "grad_norm": 0.6641572713851929, "learning_rate": 7.984509266449854e-06, "loss": 1.3834, "mean_token_accuracy": 0.6554353535175323, "num_tokens": 1894511956.0, "step": 11298 }, { "entropy": 1.7112232049306233, "epoch": 1.241245777374969, "grad_norm": 0.7815585732460022, "learning_rate": 7.98300073261869e-06, "loss": 1.2925, "mean_token_accuracy": 0.6678037742773691, "num_tokens": 1894618068.0, "step": 11299 }, { "entropy": 1.755595584710439, "epoch": 1.241355634286342, "grad_norm": 0.6250059604644775, "learning_rate": 7.981492294274194e-06, "loss": 1.3003, "mean_token_accuracy": 0.6646648645401001, "num_tokens": 1894758381.0, "step": 11300 }, { "entropy": 1.75324742992719, "epoch": 1.241465491197715, "grad_norm": 0.7397940754890442, "learning_rate": 7.97998395146411e-06, "loss": 1.4168, "mean_token_accuracy": 0.6665991842746735, "num_tokens": 1894892394.0, "step": 11301 }, { "entropy": 1.7075209816296895, "epoch": 1.2415753481090879, "grad_norm": 0.7101148366928101, "learning_rate": 7.978475704236169e-06, "loss": 1.3675, "mean_token_accuracy": 0.6513032168149948, "num_tokens": 1895077445.0, "step": 11302 }, { "entropy": 1.7482849955558777, "epoch": 1.2416852050204608, "grad_norm": 0.72342449426651, "learning_rate": 7.976967552638111e-06, "loss": 1.3761, "mean_token_accuracy": 0.6645904332399368, "num_tokens": 1895252858.0, "step": 11303 }, { "entropy": 1.6877289811770122, "epoch": 1.2417950619318339, "grad_norm": 0.6635198593139648, "learning_rate": 7.975459496717672e-06, "loss": 1.2438, "mean_token_accuracy": 0.6745279332002004, "num_tokens": 1895422983.0, "step": 11304 }, { "entropy": 1.686889111995697, "epoch": 1.2419049188432068, "grad_norm": 0.6177757978439331, "learning_rate": 7.973951536522574e-06, "loss": 1.4462, "mean_token_accuracy": 0.6353614429632822, "num_tokens": 1895599337.0, "step": 11305 }, { "entropy": 1.6756052076816559, "epoch": 1.2420147757545796, "grad_norm": 0.6626149415969849, "learning_rate": 7.972443672100543e-06, "loss": 1.2887, "mean_token_accuracy": 0.6680636157592138, "num_tokens": 1895731538.0, "step": 11306 }, { "entropy": 1.685206929842631, "epoch": 1.2421246326659525, "grad_norm": 0.7096571326255798, "learning_rate": 7.970935903499312e-06, "loss": 1.2293, "mean_token_accuracy": 0.679922545949618, "num_tokens": 1895859633.0, "step": 11307 }, { "entropy": 1.6315878629684448, "epoch": 1.2422344895773256, "grad_norm": 0.6607580780982971, "learning_rate": 7.96942823076659e-06, "loss": 1.2639, "mean_token_accuracy": 0.6793940017620722, "num_tokens": 1896020071.0, "step": 11308 }, { "entropy": 1.7243158320585887, "epoch": 1.2423443464886985, "grad_norm": 0.6347528696060181, "learning_rate": 7.967920653950105e-06, "loss": 1.3314, "mean_token_accuracy": 0.6642320106426874, "num_tokens": 1896228945.0, "step": 11309 }, { "entropy": 1.74300483862559, "epoch": 1.2424542034000714, "grad_norm": 0.7812113165855408, "learning_rate": 7.966413173097559e-06, "loss": 1.4193, "mean_token_accuracy": 0.6341168930133184, "num_tokens": 1896375603.0, "step": 11310 }, { "entropy": 1.741927295923233, "epoch": 1.2425640603114443, "grad_norm": 0.9015730619430542, "learning_rate": 7.96490578825667e-06, "loss": 1.3465, "mean_token_accuracy": 0.6564824233452479, "num_tokens": 1896517786.0, "step": 11311 }, { "entropy": 1.7557086944580078, "epoch": 1.2426739172228172, "grad_norm": 0.6636369824409485, "learning_rate": 7.963398499475146e-06, "loss": 1.5039, "mean_token_accuracy": 0.6422920376062393, "num_tokens": 1896712152.0, "step": 11312 }, { "entropy": 1.7586438258488972, "epoch": 1.2427837741341903, "grad_norm": 0.7631456255912781, "learning_rate": 7.961891306800691e-06, "loss": 1.4998, "mean_token_accuracy": 0.6448372304439545, "num_tokens": 1896863330.0, "step": 11313 }, { "entropy": 1.7146364947160084, "epoch": 1.2428936310455632, "grad_norm": 0.7649849653244019, "learning_rate": 7.960384210281005e-06, "loss": 1.4133, "mean_token_accuracy": 0.6560538013776144, "num_tokens": 1897024053.0, "step": 11314 }, { "entropy": 1.6867588957150776, "epoch": 1.243003487956936, "grad_norm": 0.7380170226097107, "learning_rate": 7.958877209963794e-06, "loss": 1.3173, "mean_token_accuracy": 0.6729622135559717, "num_tokens": 1897153363.0, "step": 11315 }, { "entropy": 1.7193871140480042, "epoch": 1.243113344868309, "grad_norm": 0.8186469674110413, "learning_rate": 7.957370305896744e-06, "loss": 1.3701, "mean_token_accuracy": 0.6554479797681173, "num_tokens": 1897330815.0, "step": 11316 }, { "entropy": 1.7145767311255138, "epoch": 1.243223201779682, "grad_norm": 0.625273585319519, "learning_rate": 7.955863498127555e-06, "loss": 1.4123, "mean_token_accuracy": 0.6473723153273264, "num_tokens": 1897486340.0, "step": 11317 }, { "entropy": 1.6745652059714, "epoch": 1.243333058691055, "grad_norm": 0.6851847171783447, "learning_rate": 7.954356786703916e-06, "loss": 1.3004, "mean_token_accuracy": 0.6670237829287847, "num_tokens": 1897651114.0, "step": 11318 }, { "entropy": 1.72303906083107, "epoch": 1.2434429156024278, "grad_norm": 0.6741484999656677, "learning_rate": 7.95285017167351e-06, "loss": 1.4053, "mean_token_accuracy": 0.659760649005572, "num_tokens": 1897842734.0, "step": 11319 }, { "entropy": 1.708362211783727, "epoch": 1.2435527725138007, "grad_norm": 0.5985382199287415, "learning_rate": 7.951343653084023e-06, "loss": 1.4592, "mean_token_accuracy": 0.6402342220147451, "num_tokens": 1898046316.0, "step": 11320 }, { "entropy": 1.6680605312188466, "epoch": 1.2436626294251738, "grad_norm": 0.642793595790863, "learning_rate": 7.94983723098314e-06, "loss": 1.4187, "mean_token_accuracy": 0.6495463897784551, "num_tokens": 1898222912.0, "step": 11321 }, { "entropy": 1.686878780523936, "epoch": 1.2437724863365467, "grad_norm": 0.6895222067832947, "learning_rate": 7.948330905418527e-06, "loss": 1.4837, "mean_token_accuracy": 0.6618087788422903, "num_tokens": 1898404255.0, "step": 11322 }, { "entropy": 1.7111988961696625, "epoch": 1.2438823432479196, "grad_norm": 0.7032332420349121, "learning_rate": 7.94682467643787e-06, "loss": 1.2716, "mean_token_accuracy": 0.6768457492192587, "num_tokens": 1898544016.0, "step": 11323 }, { "entropy": 1.7379381159941356, "epoch": 1.2439922001592925, "grad_norm": 0.664441704750061, "learning_rate": 7.945318544088836e-06, "loss": 1.2956, "mean_token_accuracy": 0.6739976902802786, "num_tokens": 1898669680.0, "step": 11324 }, { "entropy": 1.7190495828787486, "epoch": 1.2441020570706653, "grad_norm": 0.636641800403595, "learning_rate": 7.943812508419093e-06, "loss": 1.4763, "mean_token_accuracy": 0.6462114254633585, "num_tokens": 1898880825.0, "step": 11325 }, { "entropy": 1.6952326397101085, "epoch": 1.2442119139820385, "grad_norm": 0.7681459188461304, "learning_rate": 7.942306569476303e-06, "loss": 1.198, "mean_token_accuracy": 0.6756665309270223, "num_tokens": 1899015166.0, "step": 11326 }, { "entropy": 1.7597149014472961, "epoch": 1.2443217708934113, "grad_norm": 0.7061123251914978, "learning_rate": 7.940800727308142e-06, "loss": 1.4911, "mean_token_accuracy": 0.6365721672773361, "num_tokens": 1899174895.0, "step": 11327 }, { "entropy": 1.6776218215624492, "epoch": 1.2444316278047842, "grad_norm": 0.8109696507453918, "learning_rate": 7.93929498196225e-06, "loss": 1.279, "mean_token_accuracy": 0.6621010253826777, "num_tokens": 1899330055.0, "step": 11328 }, { "entropy": 1.685188114643097, "epoch": 1.244541484716157, "grad_norm": 0.7661949396133423, "learning_rate": 7.937789333486296e-06, "loss": 1.2638, "mean_token_accuracy": 0.684383233388265, "num_tokens": 1899458252.0, "step": 11329 }, { "entropy": 1.7264382243156433, "epoch": 1.2446513416275302, "grad_norm": 0.6619189977645874, "learning_rate": 7.936283781927934e-06, "loss": 1.4722, "mean_token_accuracy": 0.6431934088468552, "num_tokens": 1899621097.0, "step": 11330 }, { "entropy": 1.7132483919461567, "epoch": 1.244761198538903, "grad_norm": 0.6909228563308716, "learning_rate": 7.934778327334804e-06, "loss": 1.4797, "mean_token_accuracy": 0.6424340556065241, "num_tokens": 1899790828.0, "step": 11331 }, { "entropy": 1.6885711252689362, "epoch": 1.244871055450276, "grad_norm": 0.7082422375679016, "learning_rate": 7.933272969754558e-06, "loss": 1.4103, "mean_token_accuracy": 0.6677902390559515, "num_tokens": 1899958848.0, "step": 11332 }, { "entropy": 1.7759423851966858, "epoch": 1.2449809123616489, "grad_norm": 0.7639626860618591, "learning_rate": 7.931767709234848e-06, "loss": 1.4212, "mean_token_accuracy": 0.662767251332601, "num_tokens": 1900092051.0, "step": 11333 }, { "entropy": 1.7315894961357117, "epoch": 1.245090769273022, "grad_norm": 0.6064445972442627, "learning_rate": 7.9302625458233e-06, "loss": 1.4768, "mean_token_accuracy": 0.6455651024977366, "num_tokens": 1900320309.0, "step": 11334 }, { "entropy": 1.6946211953957875, "epoch": 1.2452006261843949, "grad_norm": 0.7126203179359436, "learning_rate": 7.928757479567561e-06, "loss": 1.3931, "mean_token_accuracy": 0.6527270923058192, "num_tokens": 1900534165.0, "step": 11335 }, { "entropy": 1.6968140602111816, "epoch": 1.2453104830957678, "grad_norm": 0.8074250817298889, "learning_rate": 7.927252510515266e-06, "loss": 1.5176, "mean_token_accuracy": 0.6537874937057495, "num_tokens": 1900743441.0, "step": 11336 }, { "entropy": 1.7140926122665405, "epoch": 1.2454203400071406, "grad_norm": 0.7751270532608032, "learning_rate": 7.925747638714043e-06, "loss": 1.4288, "mean_token_accuracy": 0.6500294556220373, "num_tokens": 1900930640.0, "step": 11337 }, { "entropy": 1.6379591524600983, "epoch": 1.2455301969185135, "grad_norm": 0.7415010929107666, "learning_rate": 7.92424286421152e-06, "loss": 1.2861, "mean_token_accuracy": 0.6670693109432856, "num_tokens": 1901069110.0, "step": 11338 }, { "entropy": 1.6497638821601868, "epoch": 1.2456400538298866, "grad_norm": 0.7474594116210938, "learning_rate": 7.922738187055329e-06, "loss": 1.3534, "mean_token_accuracy": 0.6680422226587931, "num_tokens": 1901229274.0, "step": 11339 }, { "entropy": 1.6657158931096394, "epoch": 1.2457499107412595, "grad_norm": 0.6230567097663879, "learning_rate": 7.921233607293084e-06, "loss": 1.3925, "mean_token_accuracy": 0.6576641102631887, "num_tokens": 1901396643.0, "step": 11340 }, { "entropy": 1.6605386932690938, "epoch": 1.2458597676526324, "grad_norm": 0.711249828338623, "learning_rate": 7.919729124972409e-06, "loss": 1.3159, "mean_token_accuracy": 0.6733442395925522, "num_tokens": 1901560464.0, "step": 11341 }, { "entropy": 1.7819677889347076, "epoch": 1.2459696245640055, "grad_norm": 0.6379202008247375, "learning_rate": 7.91822474014092e-06, "loss": 1.4941, "mean_token_accuracy": 0.6413015226523081, "num_tokens": 1901759395.0, "step": 11342 }, { "entropy": 1.7399208843708038, "epoch": 1.2460794814753784, "grad_norm": 0.811903715133667, "learning_rate": 7.916720452846229e-06, "loss": 1.538, "mean_token_accuracy": 0.6447446842988332, "num_tokens": 1901901676.0, "step": 11343 }, { "entropy": 1.7778501212596893, "epoch": 1.2461893383867513, "grad_norm": 0.7180720567703247, "learning_rate": 7.915216263135942e-06, "loss": 1.449, "mean_token_accuracy": 0.6565718402465185, "num_tokens": 1902050193.0, "step": 11344 }, { "entropy": 1.765024612347285, "epoch": 1.2462991952981242, "grad_norm": 0.6027868390083313, "learning_rate": 7.91371217105768e-06, "loss": 1.4265, "mean_token_accuracy": 0.6494091699520746, "num_tokens": 1902235835.0, "step": 11345 }, { "entropy": 1.6627511084079742, "epoch": 1.246409052209497, "grad_norm": 0.9971237182617188, "learning_rate": 7.912208176659028e-06, "loss": 1.4272, "mean_token_accuracy": 0.6701801866292953, "num_tokens": 1902389123.0, "step": 11346 }, { "entropy": 1.6802993714809418, "epoch": 1.2465189091208702, "grad_norm": 0.6501787304878235, "learning_rate": 7.9107042799876e-06, "loss": 1.4603, "mean_token_accuracy": 0.6509832988182703, "num_tokens": 1902539417.0, "step": 11347 }, { "entropy": 1.7607911229133606, "epoch": 1.246628766032243, "grad_norm": 0.7493710517883301, "learning_rate": 7.909200481090989e-06, "loss": 1.4329, "mean_token_accuracy": 0.6626504063606262, "num_tokens": 1902707782.0, "step": 11348 }, { "entropy": 1.6591468056042988, "epoch": 1.246738622943616, "grad_norm": 0.7652831673622131, "learning_rate": 7.90769678001679e-06, "loss": 1.2996, "mean_token_accuracy": 0.675150990486145, "num_tokens": 1902829430.0, "step": 11349 }, { "entropy": 1.6999635299046834, "epoch": 1.2468484798549888, "grad_norm": 0.7982178330421448, "learning_rate": 7.906193176812591e-06, "loss": 1.1053, "mean_token_accuracy": 0.7014695952335993, "num_tokens": 1902925845.0, "step": 11350 }, { "entropy": 1.739738126595815, "epoch": 1.2469583367663617, "grad_norm": 0.7161890268325806, "learning_rate": 7.904689671525992e-06, "loss": 1.3593, "mean_token_accuracy": 0.6658004621664683, "num_tokens": 1903064238.0, "step": 11351 }, { "entropy": 1.7278717656930287, "epoch": 1.2470681936777348, "grad_norm": 0.6915018558502197, "learning_rate": 7.903186264204561e-06, "loss": 1.3432, "mean_token_accuracy": 0.6522951871156693, "num_tokens": 1903220088.0, "step": 11352 }, { "entropy": 1.7226141194502513, "epoch": 1.2471780505891077, "grad_norm": 0.6510446667671204, "learning_rate": 7.901682954895893e-06, "loss": 1.5513, "mean_token_accuracy": 0.6328976154327393, "num_tokens": 1903418123.0, "step": 11353 }, { "entropy": 1.6626664996147156, "epoch": 1.2472879075004806, "grad_norm": 0.5966384410858154, "learning_rate": 7.900179743647567e-06, "loss": 1.4024, "mean_token_accuracy": 0.6576230376958847, "num_tokens": 1903595039.0, "step": 11354 }, { "entropy": 1.7284984985987346, "epoch": 1.2473977644118537, "grad_norm": 0.6498193740844727, "learning_rate": 7.898676630507152e-06, "loss": 1.3478, "mean_token_accuracy": 0.6563937862714132, "num_tokens": 1903733716.0, "step": 11355 }, { "entropy": 1.673979103565216, "epoch": 1.2475076213232266, "grad_norm": 0.776412308216095, "learning_rate": 7.89717361552222e-06, "loss": 1.2461, "mean_token_accuracy": 0.6760004907846451, "num_tokens": 1903851827.0, "step": 11356 }, { "entropy": 1.7170047760009766, "epoch": 1.2476174782345995, "grad_norm": 0.6476826667785645, "learning_rate": 7.895670698740354e-06, "loss": 1.4353, "mean_token_accuracy": 0.6535161038239797, "num_tokens": 1903994160.0, "step": 11357 }, { "entropy": 1.7057405809561412, "epoch": 1.2477273351459723, "grad_norm": 3.21343994140625, "learning_rate": 7.894167880209103e-06, "loss": 1.3062, "mean_token_accuracy": 0.6527599294980367, "num_tokens": 1904204321.0, "step": 11358 }, { "entropy": 1.7505437235037486, "epoch": 1.2478371920573452, "grad_norm": 0.704789936542511, "learning_rate": 7.892665159976042e-06, "loss": 1.4142, "mean_token_accuracy": 0.6686330437660217, "num_tokens": 1904352497.0, "step": 11359 }, { "entropy": 1.6835230986277263, "epoch": 1.2479470489687183, "grad_norm": 0.7824683785438538, "learning_rate": 7.89116253808873e-06, "loss": 1.2709, "mean_token_accuracy": 0.6724584052960078, "num_tokens": 1904462352.0, "step": 11360 }, { "entropy": 1.771289696296056, "epoch": 1.2480569058800912, "grad_norm": 0.6507266163825989, "learning_rate": 7.889660014594722e-06, "loss": 1.3916, "mean_token_accuracy": 0.6458721508582433, "num_tokens": 1904644989.0, "step": 11361 }, { "entropy": 1.6943688193957012, "epoch": 1.248166762791464, "grad_norm": 0.6878480315208435, "learning_rate": 7.888157589541571e-06, "loss": 1.3827, "mean_token_accuracy": 0.6601410458485285, "num_tokens": 1904811730.0, "step": 11362 }, { "entropy": 1.7335290908813477, "epoch": 1.248276619702837, "grad_norm": 0.6336010098457336, "learning_rate": 7.886655262976834e-06, "loss": 1.51, "mean_token_accuracy": 0.6341728915770849, "num_tokens": 1905005726.0, "step": 11363 }, { "entropy": 1.7121953169504802, "epoch": 1.2483864766142099, "grad_norm": 0.610726535320282, "learning_rate": 7.885153034948053e-06, "loss": 1.3719, "mean_token_accuracy": 0.655587320526441, "num_tokens": 1905219181.0, "step": 11364 }, { "entropy": 1.7200307448705037, "epoch": 1.248496333525583, "grad_norm": 0.6448392868041992, "learning_rate": 7.883650905502773e-06, "loss": 1.452, "mean_token_accuracy": 0.6528996278842291, "num_tokens": 1905429324.0, "step": 11365 }, { "entropy": 1.768402338027954, "epoch": 1.2486061904369559, "grad_norm": 0.6446058750152588, "learning_rate": 7.88214887468854e-06, "loss": 1.3712, "mean_token_accuracy": 0.6579476048549017, "num_tokens": 1905566991.0, "step": 11366 }, { "entropy": 1.7406864861647289, "epoch": 1.2487160473483288, "grad_norm": 0.7488144040107727, "learning_rate": 7.880646942552891e-06, "loss": 1.4148, "mean_token_accuracy": 0.6457947393258413, "num_tokens": 1905756851.0, "step": 11367 }, { "entropy": 1.7286293804645538, "epoch": 1.2488259042597019, "grad_norm": 0.7700992822647095, "learning_rate": 7.87914510914336e-06, "loss": 1.4692, "mean_token_accuracy": 0.6668734302123388, "num_tokens": 1905897609.0, "step": 11368 }, { "entropy": 1.6787353257338207, "epoch": 1.2489357611710747, "grad_norm": 0.6573531627655029, "learning_rate": 7.87764337450748e-06, "loss": 1.3161, "mean_token_accuracy": 0.6725091288487116, "num_tokens": 1906051094.0, "step": 11369 }, { "entropy": 1.7037298083305359, "epoch": 1.2490456180824476, "grad_norm": 0.7217747569084167, "learning_rate": 7.876141738692778e-06, "loss": 1.5271, "mean_token_accuracy": 0.6617752313613892, "num_tokens": 1906225865.0, "step": 11370 }, { "entropy": 1.7089182237784069, "epoch": 1.2491554749938205, "grad_norm": 0.7379319667816162, "learning_rate": 7.874640201746784e-06, "loss": 1.2766, "mean_token_accuracy": 0.6676273395617803, "num_tokens": 1906346116.0, "step": 11371 }, { "entropy": 1.6987358729044597, "epoch": 1.2492653319051934, "grad_norm": 0.756645679473877, "learning_rate": 7.87313876371702e-06, "loss": 1.3747, "mean_token_accuracy": 0.6652411719163259, "num_tokens": 1906558656.0, "step": 11372 }, { "entropy": 1.6623602509498596, "epoch": 1.2493751888165665, "grad_norm": 0.7025351524353027, "learning_rate": 7.871637424651002e-06, "loss": 1.3465, "mean_token_accuracy": 0.6570960233608881, "num_tokens": 1906710447.0, "step": 11373 }, { "entropy": 1.7358343799908955, "epoch": 1.2494850457279394, "grad_norm": 0.6764085292816162, "learning_rate": 7.870136184596253e-06, "loss": 1.41, "mean_token_accuracy": 0.6695780654748281, "num_tokens": 1906839460.0, "step": 11374 }, { "entropy": 1.7395348747571309, "epoch": 1.2495949026393123, "grad_norm": 0.6143444776535034, "learning_rate": 7.868635043600283e-06, "loss": 1.3852, "mean_token_accuracy": 0.6516972482204437, "num_tokens": 1906996950.0, "step": 11375 }, { "entropy": 1.7187215089797974, "epoch": 1.2497047595506852, "grad_norm": 0.6263564229011536, "learning_rate": 7.867134001710601e-06, "loss": 1.4024, "mean_token_accuracy": 0.6484654247760773, "num_tokens": 1907218349.0, "step": 11376 }, { "entropy": 1.7315069735050201, "epoch": 1.249814616462058, "grad_norm": 0.5838350653648376, "learning_rate": 7.865633058974718e-06, "loss": 1.4567, "mean_token_accuracy": 0.6523949603239695, "num_tokens": 1907459088.0, "step": 11377 }, { "entropy": 1.7416872481505077, "epoch": 1.2499244733734312, "grad_norm": 0.751085102558136, "learning_rate": 7.864132215440137e-06, "loss": 1.234, "mean_token_accuracy": 0.6816779424746832, "num_tokens": 1907560765.0, "step": 11378 }, { "entropy": 1.687682181596756, "epoch": 1.250034330284804, "grad_norm": 0.6116113662719727, "learning_rate": 7.862631471154357e-06, "loss": 1.2595, "mean_token_accuracy": 0.6819984763860703, "num_tokens": 1907720034.0, "step": 11379 }, { "entropy": 1.677247832218806, "epoch": 1.250144187196177, "grad_norm": 0.6379266977310181, "learning_rate": 7.861130826164878e-06, "loss": 1.4734, "mean_token_accuracy": 0.6537698358297348, "num_tokens": 1907896716.0, "step": 11380 }, { "entropy": 1.7170507113138835, "epoch": 1.25025404410755, "grad_norm": 0.6168753504753113, "learning_rate": 7.859630280519193e-06, "loss": 1.5527, "mean_token_accuracy": 0.642242968082428, "num_tokens": 1908096706.0, "step": 11381 }, { "entropy": 1.8267957270145416, "epoch": 1.250363901018923, "grad_norm": 0.7190276980400085, "learning_rate": 7.85812983426479e-06, "loss": 1.4579, "mean_token_accuracy": 0.6479291965564092, "num_tokens": 1908212863.0, "step": 11382 }, { "entropy": 1.7421042323112488, "epoch": 1.2504737579302958, "grad_norm": 0.6885977983474731, "learning_rate": 7.85662948744917e-06, "loss": 1.351, "mean_token_accuracy": 0.6630544364452362, "num_tokens": 1908387457.0, "step": 11383 }, { "entropy": 1.7527458270390828, "epoch": 1.2505836148416687, "grad_norm": 1.0296976566314697, "learning_rate": 7.855129240119808e-06, "loss": 1.4872, "mean_token_accuracy": 0.6270147214333216, "num_tokens": 1908595445.0, "step": 11384 }, { "entropy": 1.6783875326315563, "epoch": 1.2506934717530416, "grad_norm": 0.7553209066390991, "learning_rate": 7.853629092324187e-06, "loss": 1.5384, "mean_token_accuracy": 0.6514027168353399, "num_tokens": 1908751432.0, "step": 11385 }, { "entropy": 1.6871531903743744, "epoch": 1.2508033286644147, "grad_norm": 0.7314842343330383, "learning_rate": 7.852129044109788e-06, "loss": 1.2429, "mean_token_accuracy": 0.6730901698271433, "num_tokens": 1908862066.0, "step": 11386 }, { "entropy": 1.7323083678881328, "epoch": 1.2509131855757876, "grad_norm": 0.6713790893554688, "learning_rate": 7.850629095524086e-06, "loss": 1.4655, "mean_token_accuracy": 0.6579304486513138, "num_tokens": 1909003499.0, "step": 11387 }, { "entropy": 1.7376106083393097, "epoch": 1.2510230424871605, "grad_norm": 0.7411003708839417, "learning_rate": 7.849129246614552e-06, "loss": 1.3845, "mean_token_accuracy": 0.6707366009553274, "num_tokens": 1909184557.0, "step": 11388 }, { "entropy": 1.660805990298589, "epoch": 1.2511328993985333, "grad_norm": 0.6880229115486145, "learning_rate": 7.847629497428664e-06, "loss": 1.394, "mean_token_accuracy": 0.6483379105726877, "num_tokens": 1909382189.0, "step": 11389 }, { "entropy": 1.7267470955848694, "epoch": 1.2512427563099062, "grad_norm": 0.7655637860298157, "learning_rate": 7.846129848013874e-06, "loss": 1.3935, "mean_token_accuracy": 0.6489508698383967, "num_tokens": 1909567336.0, "step": 11390 }, { "entropy": 1.765625, "epoch": 1.2513526132212793, "grad_norm": 0.7343372702598572, "learning_rate": 7.844630298417657e-06, "loss": 1.2667, "mean_token_accuracy": 0.6655522038539251, "num_tokens": 1909736898.0, "step": 11391 }, { "entropy": 1.699706216653188, "epoch": 1.2514624701326522, "grad_norm": 0.6885928511619568, "learning_rate": 7.843130848687472e-06, "loss": 1.3203, "mean_token_accuracy": 0.6711514194806417, "num_tokens": 1909883541.0, "step": 11392 }, { "entropy": 1.7037067711353302, "epoch": 1.251572327044025, "grad_norm": 0.7248368263244629, "learning_rate": 7.84163149887077e-06, "loss": 1.3969, "mean_token_accuracy": 0.6453188508749008, "num_tokens": 1910013519.0, "step": 11393 }, { "entropy": 1.6497456729412079, "epoch": 1.2516821839553982, "grad_norm": 0.6989073753356934, "learning_rate": 7.840132249015005e-06, "loss": 1.2801, "mean_token_accuracy": 0.6834556013345718, "num_tokens": 1910183319.0, "step": 11394 }, { "entropy": 1.733015646537145, "epoch": 1.251792040866771, "grad_norm": 0.7218592166900635, "learning_rate": 7.838633099167636e-06, "loss": 1.2952, "mean_token_accuracy": 0.668768381079038, "num_tokens": 1910309032.0, "step": 11395 }, { "entropy": 1.6412979066371918, "epoch": 1.251901897778144, "grad_norm": 0.6749725341796875, "learning_rate": 7.837134049376101e-06, "loss": 1.4272, "mean_token_accuracy": 0.676101932922999, "num_tokens": 1910504707.0, "step": 11396 }, { "entropy": 1.6720272302627563, "epoch": 1.2520117546895169, "grad_norm": 0.6150344610214233, "learning_rate": 7.835635099687849e-06, "loss": 1.3575, "mean_token_accuracy": 0.667877584695816, "num_tokens": 1910686467.0, "step": 11397 }, { "entropy": 1.6736577153205872, "epoch": 1.2521216116008898, "grad_norm": 0.6787571907043457, "learning_rate": 7.834136250150322e-06, "loss": 1.3508, "mean_token_accuracy": 0.6710595637559891, "num_tokens": 1910821814.0, "step": 11398 }, { "entropy": 1.674732546011607, "epoch": 1.2522314685122629, "grad_norm": 0.7294467687606812, "learning_rate": 7.832637500810956e-06, "loss": 1.3117, "mean_token_accuracy": 0.6755828162034353, "num_tokens": 1910986283.0, "step": 11399 }, { "entropy": 1.7269720037778218, "epoch": 1.2523413254236357, "grad_norm": 0.6978003978729248, "learning_rate": 7.83113885171718e-06, "loss": 1.4251, "mean_token_accuracy": 0.6505262355009714, "num_tokens": 1911148949.0, "step": 11400 }, { "entropy": 1.684136559565862, "epoch": 1.2524511823350086, "grad_norm": 0.591343879699707, "learning_rate": 7.829640302916439e-06, "loss": 1.3168, "mean_token_accuracy": 0.6647894382476807, "num_tokens": 1911289617.0, "step": 11401 }, { "entropy": 1.7063394288221996, "epoch": 1.2525610392463815, "grad_norm": 0.8325570225715637, "learning_rate": 7.82814185445615e-06, "loss": 1.3085, "mean_token_accuracy": 0.6733732322851816, "num_tokens": 1911410460.0, "step": 11402 }, { "entropy": 1.7209921578566234, "epoch": 1.2526708961577544, "grad_norm": 0.6524738669395447, "learning_rate": 7.826643506383741e-06, "loss": 1.3605, "mean_token_accuracy": 0.6583642363548279, "num_tokens": 1911582978.0, "step": 11403 }, { "entropy": 1.7001774509747822, "epoch": 1.2527807530691275, "grad_norm": 0.581378698348999, "learning_rate": 7.82514525874664e-06, "loss": 1.5381, "mean_token_accuracy": 0.6198792159557343, "num_tokens": 1911779836.0, "step": 11404 }, { "entropy": 1.7446727454662323, "epoch": 1.2528906099805004, "grad_norm": 0.8353737592697144, "learning_rate": 7.823647111592257e-06, "loss": 1.505, "mean_token_accuracy": 0.6476826096574465, "num_tokens": 1911926444.0, "step": 11405 }, { "entropy": 1.6707488397757213, "epoch": 1.2530004668918733, "grad_norm": 0.7989435195922852, "learning_rate": 7.82214906496801e-06, "loss": 1.4969, "mean_token_accuracy": 0.6394098401069641, "num_tokens": 1912111911.0, "step": 11406 }, { "entropy": 1.7673610746860504, "epoch": 1.2531103238032464, "grad_norm": 0.8053948879241943, "learning_rate": 7.820651118921319e-06, "loss": 1.3536, "mean_token_accuracy": 0.6432522932688395, "num_tokens": 1912232792.0, "step": 11407 }, { "entropy": 1.6859275102615356, "epoch": 1.2532201807146193, "grad_norm": 0.7226851582527161, "learning_rate": 7.819153273499582e-06, "loss": 1.3106, "mean_token_accuracy": 0.6881605138381323, "num_tokens": 1912365835.0, "step": 11408 }, { "entropy": 1.7172163128852844, "epoch": 1.2533300376259922, "grad_norm": 0.7478646039962769, "learning_rate": 7.817655528750212e-06, "loss": 1.3927, "mean_token_accuracy": 0.6513003359238306, "num_tokens": 1912538400.0, "step": 11409 }, { "entropy": 1.7053345441818237, "epoch": 1.253439894537365, "grad_norm": 0.6886608600616455, "learning_rate": 7.816157884720612e-06, "loss": 1.4765, "mean_token_accuracy": 0.6423324594895045, "num_tokens": 1912724255.0, "step": 11410 }, { "entropy": 1.727815439303716, "epoch": 1.253549751448738, "grad_norm": 0.7457959055900574, "learning_rate": 7.81466034145818e-06, "loss": 1.4172, "mean_token_accuracy": 0.6606029123067856, "num_tokens": 1912962558.0, "step": 11411 }, { "entropy": 1.6915427148342133, "epoch": 1.253659608360111, "grad_norm": 0.6017783880233765, "learning_rate": 7.813162899010309e-06, "loss": 1.512, "mean_token_accuracy": 0.6343448410431544, "num_tokens": 1913116255.0, "step": 11412 }, { "entropy": 1.7317763566970825, "epoch": 1.253769465271484, "grad_norm": 0.6574037075042725, "learning_rate": 7.811665557424405e-06, "loss": 1.3666, "mean_token_accuracy": 0.6605449169874191, "num_tokens": 1913270950.0, "step": 11413 }, { "entropy": 1.7199652592341106, "epoch": 1.2538793221828568, "grad_norm": 0.796875, "learning_rate": 7.81016831674784e-06, "loss": 1.3238, "mean_token_accuracy": 0.6675882587830225, "num_tokens": 1913411579.0, "step": 11414 }, { "entropy": 1.6290069818496704, "epoch": 1.2539891790942297, "grad_norm": 0.680347204208374, "learning_rate": 7.808671177028013e-06, "loss": 1.4744, "mean_token_accuracy": 0.6564949949582418, "num_tokens": 1913645682.0, "step": 11415 }, { "entropy": 1.7070193191369374, "epoch": 1.2540990360056026, "grad_norm": 0.6644991636276245, "learning_rate": 7.80717413831231e-06, "loss": 1.5298, "mean_token_accuracy": 0.6586725761493047, "num_tokens": 1913872167.0, "step": 11416 }, { "entropy": 1.708907941977183, "epoch": 1.2542088929169757, "grad_norm": 0.6234670877456665, "learning_rate": 7.805677200648101e-06, "loss": 1.3705, "mean_token_accuracy": 0.6489444921414057, "num_tokens": 1914049662.0, "step": 11417 }, { "entropy": 1.6813920140266418, "epoch": 1.2543187498283486, "grad_norm": 0.6542984843254089, "learning_rate": 7.80418036408277e-06, "loss": 1.3409, "mean_token_accuracy": 0.6567636926968893, "num_tokens": 1914249075.0, "step": 11418 }, { "entropy": 1.7307861546675365, "epoch": 1.2544286067397215, "grad_norm": 0.723311185836792, "learning_rate": 7.802683628663697e-06, "loss": 1.4246, "mean_token_accuracy": 0.6489053318897883, "num_tokens": 1914392536.0, "step": 11419 }, { "entropy": 1.7004589041074116, "epoch": 1.2545384636510946, "grad_norm": 0.5945419669151306, "learning_rate": 7.801186994438236e-06, "loss": 1.4268, "mean_token_accuracy": 0.6500704089800516, "num_tokens": 1914564395.0, "step": 11420 }, { "entropy": 1.6588083505630493, "epoch": 1.2546483205624674, "grad_norm": 0.7622363567352295, "learning_rate": 7.79969046145377e-06, "loss": 1.4433, "mean_token_accuracy": 0.6651933292547861, "num_tokens": 1914736861.0, "step": 11421 }, { "entropy": 1.7121857802073162, "epoch": 1.2547581774738403, "grad_norm": 0.7922995090484619, "learning_rate": 7.798194029757661e-06, "loss": 1.3512, "mean_token_accuracy": 0.6623698522647222, "num_tokens": 1914899502.0, "step": 11422 }, { "entropy": 1.6708403130372365, "epoch": 1.2548680343852132, "grad_norm": 0.8336834907531738, "learning_rate": 7.796697699397266e-06, "loss": 1.5238, "mean_token_accuracy": 0.6434931059678396, "num_tokens": 1915090759.0, "step": 11423 }, { "entropy": 1.7130014995733898, "epoch": 1.254977891296586, "grad_norm": 0.7885116338729858, "learning_rate": 7.795201470419944e-06, "loss": 1.4998, "mean_token_accuracy": 0.6617122739553452, "num_tokens": 1915294853.0, "step": 11424 }, { "entropy": 1.695469965537389, "epoch": 1.2550877482079592, "grad_norm": 0.7806084156036377, "learning_rate": 7.793705342873057e-06, "loss": 1.5192, "mean_token_accuracy": 0.6436646829048792, "num_tokens": 1915519404.0, "step": 11425 }, { "entropy": 1.7029032309850056, "epoch": 1.255197605119332, "grad_norm": 0.6547440886497498, "learning_rate": 7.792209316803945e-06, "loss": 1.4503, "mean_token_accuracy": 0.6494365930557251, "num_tokens": 1915706424.0, "step": 11426 }, { "entropy": 1.6860091984272003, "epoch": 1.255307462030705, "grad_norm": 0.7135421633720398, "learning_rate": 7.790713392259967e-06, "loss": 1.6007, "mean_token_accuracy": 0.6431414932012558, "num_tokens": 1915915669.0, "step": 11427 }, { "entropy": 1.6765115559101105, "epoch": 1.2554173189420779, "grad_norm": 0.6755972504615784, "learning_rate": 7.78921756928846e-06, "loss": 1.3682, "mean_token_accuracy": 0.653800884882609, "num_tokens": 1916074318.0, "step": 11428 }, { "entropy": 1.6629555523395538, "epoch": 1.2555271758534507, "grad_norm": 0.6233551502227783, "learning_rate": 7.787721847936773e-06, "loss": 1.5946, "mean_token_accuracy": 0.6113560448090235, "num_tokens": 1916321807.0, "step": 11429 }, { "entropy": 1.7326057354609172, "epoch": 1.2556370327648239, "grad_norm": 0.7778398990631104, "learning_rate": 7.786226228252245e-06, "loss": 1.2951, "mean_token_accuracy": 0.6696663945913315, "num_tokens": 1916452669.0, "step": 11430 }, { "entropy": 1.6951783398787181, "epoch": 1.2557468896761967, "grad_norm": 0.7745827436447144, "learning_rate": 7.784730710282203e-06, "loss": 1.3895, "mean_token_accuracy": 0.6611627688010534, "num_tokens": 1916616875.0, "step": 11431 }, { "entropy": 1.6729782323042552, "epoch": 1.2558567465875696, "grad_norm": 0.6417363286018372, "learning_rate": 7.783235294073986e-06, "loss": 1.3102, "mean_token_accuracy": 0.6600176095962524, "num_tokens": 1916753385.0, "step": 11432 }, { "entropy": 1.72296741604805, "epoch": 1.2559666034989427, "grad_norm": 0.7033810019493103, "learning_rate": 7.781739979674922e-06, "loss": 1.3348, "mean_token_accuracy": 0.6627410103877386, "num_tokens": 1916948753.0, "step": 11433 }, { "entropy": 1.795237421989441, "epoch": 1.2560764604103156, "grad_norm": 0.8221445679664612, "learning_rate": 7.780244767132339e-06, "loss": 1.4476, "mean_token_accuracy": 0.6527186830838522, "num_tokens": 1917103771.0, "step": 11434 }, { "entropy": 1.6837720175584157, "epoch": 1.2561863173216885, "grad_norm": 0.7041736245155334, "learning_rate": 7.778749656493558e-06, "loss": 1.3005, "mean_token_accuracy": 0.6646720518668493, "num_tokens": 1917251916.0, "step": 11435 }, { "entropy": 1.7194795906543732, "epoch": 1.2562961742330614, "grad_norm": 0.7449667453765869, "learning_rate": 7.7772546478059e-06, "loss": 1.4377, "mean_token_accuracy": 0.6714818626642227, "num_tokens": 1917395245.0, "step": 11436 }, { "entropy": 1.6425227721532185, "epoch": 1.2564060311444343, "grad_norm": 0.6943098902702332, "learning_rate": 7.77575974111668e-06, "loss": 1.4591, "mean_token_accuracy": 0.6591807802518209, "num_tokens": 1917627333.0, "step": 11437 }, { "entropy": 1.6901133060455322, "epoch": 1.2565158880558074, "grad_norm": 0.6888213753700256, "learning_rate": 7.774264936473209e-06, "loss": 1.4082, "mean_token_accuracy": 0.6591382523377737, "num_tokens": 1917797362.0, "step": 11438 }, { "entropy": 1.6233469347159069, "epoch": 1.2566257449671803, "grad_norm": 0.6449564695358276, "learning_rate": 7.772770233922801e-06, "loss": 1.3172, "mean_token_accuracy": 0.6671230693658193, "num_tokens": 1917967575.0, "step": 11439 }, { "entropy": 1.7254037757714589, "epoch": 1.2567356018785532, "grad_norm": 0.7280165553092957, "learning_rate": 7.771275633512761e-06, "loss": 1.4346, "mean_token_accuracy": 0.6726182848215103, "num_tokens": 1918147690.0, "step": 11440 }, { "entropy": 1.671642541885376, "epoch": 1.256845458789926, "grad_norm": 0.7780535221099854, "learning_rate": 7.769781135290392e-06, "loss": 1.2984, "mean_token_accuracy": 0.6698754082123438, "num_tokens": 1918277327.0, "step": 11441 }, { "entropy": 1.7655751307805378, "epoch": 1.256955315701299, "grad_norm": 0.7205750346183777, "learning_rate": 7.768286739302997e-06, "loss": 1.4342, "mean_token_accuracy": 0.6538830598195394, "num_tokens": 1918451845.0, "step": 11442 }, { "entropy": 1.6561244527498882, "epoch": 1.257065172612672, "grad_norm": 0.5723996162414551, "learning_rate": 7.766792445597867e-06, "loss": 1.2961, "mean_token_accuracy": 0.6749825278917948, "num_tokens": 1918591768.0, "step": 11443 }, { "entropy": 1.7065655092398326, "epoch": 1.257175029524045, "grad_norm": 0.6684293150901794, "learning_rate": 7.765298254222295e-06, "loss": 1.3863, "mean_token_accuracy": 0.6667204201221466, "num_tokens": 1918751296.0, "step": 11444 }, { "entropy": 1.7191319068272908, "epoch": 1.2572848864354178, "grad_norm": 0.6368053555488586, "learning_rate": 7.763804165223583e-06, "loss": 1.3593, "mean_token_accuracy": 0.6531160324811935, "num_tokens": 1918911483.0, "step": 11445 }, { "entropy": 1.7349167664845784, "epoch": 1.257394743346791, "grad_norm": 0.6222125291824341, "learning_rate": 7.762310178649009e-06, "loss": 1.4368, "mean_token_accuracy": 0.6423256794611613, "num_tokens": 1919049670.0, "step": 11446 }, { "entropy": 1.6775768597920735, "epoch": 1.2575046002581638, "grad_norm": 0.682058572769165, "learning_rate": 7.760816294545859e-06, "loss": 1.5105, "mean_token_accuracy": 0.6353928248087565, "num_tokens": 1919331340.0, "step": 11447 }, { "entropy": 1.6649962762991588, "epoch": 1.2576144571695367, "grad_norm": 0.6357629299163818, "learning_rate": 7.759322512961414e-06, "loss": 1.4366, "mean_token_accuracy": 0.6457183212041855, "num_tokens": 1919563182.0, "step": 11448 }, { "entropy": 1.675305445988973, "epoch": 1.2577243140809096, "grad_norm": 0.6749522089958191, "learning_rate": 7.757828833942951e-06, "loss": 1.3358, "mean_token_accuracy": 0.6629159996906916, "num_tokens": 1919785893.0, "step": 11449 }, { "entropy": 1.7580168048540752, "epoch": 1.2578341709922825, "grad_norm": 0.6780009269714355, "learning_rate": 7.756335257537741e-06, "loss": 1.4463, "mean_token_accuracy": 0.6388568629821142, "num_tokens": 1919960622.0, "step": 11450 }, { "entropy": 1.7142368654410045, "epoch": 1.2579440279036556, "grad_norm": 0.7723596692085266, "learning_rate": 7.754841783793064e-06, "loss": 1.3538, "mean_token_accuracy": 0.6707404851913452, "num_tokens": 1920101550.0, "step": 11451 }, { "entropy": 1.741561730702718, "epoch": 1.2580538848150284, "grad_norm": 0.7125125527381897, "learning_rate": 7.753348412756179e-06, "loss": 1.466, "mean_token_accuracy": 0.6531734565893809, "num_tokens": 1920294592.0, "step": 11452 }, { "entropy": 1.6811016102631886, "epoch": 1.2581637417264013, "grad_norm": 0.6982854604721069, "learning_rate": 7.751855144474354e-06, "loss": 1.4956, "mean_token_accuracy": 0.642639954884847, "num_tokens": 1920433718.0, "step": 11453 }, { "entropy": 1.7243138253688812, "epoch": 1.2582735986377744, "grad_norm": 0.7025527954101562, "learning_rate": 7.75036197899485e-06, "loss": 1.2947, "mean_token_accuracy": 0.6688729325930277, "num_tokens": 1920552445.0, "step": 11454 }, { "entropy": 1.7009615500768025, "epoch": 1.258383455549147, "grad_norm": 0.7054716944694519, "learning_rate": 7.748868916364924e-06, "loss": 1.5373, "mean_token_accuracy": 0.6239589502414068, "num_tokens": 1920759876.0, "step": 11455 }, { "entropy": 1.7350860337416332, "epoch": 1.2584933124605202, "grad_norm": 0.6489484906196594, "learning_rate": 7.747375956631833e-06, "loss": 1.4164, "mean_token_accuracy": 0.6492180824279785, "num_tokens": 1920930265.0, "step": 11456 }, { "entropy": 1.7335429390271504, "epoch": 1.258603169371893, "grad_norm": 2.155296564102173, "learning_rate": 7.745883099842828e-06, "loss": 1.2186, "mean_token_accuracy": 0.6745936175187429, "num_tokens": 1921145852.0, "step": 11457 }, { "entropy": 1.6847576002279918, "epoch": 1.258713026283266, "grad_norm": 0.7475409507751465, "learning_rate": 7.744390346045156e-06, "loss": 1.3127, "mean_token_accuracy": 0.6665635804335276, "num_tokens": 1921288993.0, "step": 11458 }, { "entropy": 1.6852596898873646, "epoch": 1.258822883194639, "grad_norm": 0.6202402114868164, "learning_rate": 7.742897695286063e-06, "loss": 1.2607, "mean_token_accuracy": 0.6772429198026657, "num_tokens": 1921467875.0, "step": 11459 }, { "entropy": 1.7395563423633575, "epoch": 1.258932740106012, "grad_norm": 0.6076090335845947, "learning_rate": 7.741405147612791e-06, "loss": 1.3799, "mean_token_accuracy": 0.6517507483561834, "num_tokens": 1921609703.0, "step": 11460 }, { "entropy": 1.6285878519217174, "epoch": 1.2590425970173849, "grad_norm": 0.6175395846366882, "learning_rate": 7.739912703072576e-06, "loss": 1.3612, "mean_token_accuracy": 0.6656326601902643, "num_tokens": 1921784864.0, "step": 11461 }, { "entropy": 1.7589463591575623, "epoch": 1.2591524539287577, "grad_norm": 0.7153595685958862, "learning_rate": 7.738420361712654e-06, "loss": 1.4444, "mean_token_accuracy": 0.6449099431435267, "num_tokens": 1921959314.0, "step": 11462 }, { "entropy": 1.7452492415904999, "epoch": 1.2592623108401306, "grad_norm": 0.7535262107849121, "learning_rate": 7.736928123580259e-06, "loss": 1.6277, "mean_token_accuracy": 0.6322442690531412, "num_tokens": 1922134627.0, "step": 11463 }, { "entropy": 1.7327638566493988, "epoch": 1.2593721677515037, "grad_norm": 0.6867733001708984, "learning_rate": 7.73543598872262e-06, "loss": 1.2915, "mean_token_accuracy": 0.6723167101542155, "num_tokens": 1922251085.0, "step": 11464 }, { "entropy": 1.6638148029645283, "epoch": 1.2594820246628766, "grad_norm": 0.7520348429679871, "learning_rate": 7.733943957186958e-06, "loss": 1.3079, "mean_token_accuracy": 0.667260949810346, "num_tokens": 1922418678.0, "step": 11465 }, { "entropy": 1.65032497048378, "epoch": 1.2595918815742495, "grad_norm": 0.6573466062545776, "learning_rate": 7.7324520290205e-06, "loss": 1.3575, "mean_token_accuracy": 0.6559811184803644, "num_tokens": 1922574854.0, "step": 11466 }, { "entropy": 1.676581472158432, "epoch": 1.2597017384856226, "grad_norm": 0.7159135341644287, "learning_rate": 7.730960204270464e-06, "loss": 1.4979, "mean_token_accuracy": 0.6280734737714132, "num_tokens": 1922802094.0, "step": 11467 }, { "entropy": 1.643155614535014, "epoch": 1.2598115953969953, "grad_norm": 0.6512061953544617, "learning_rate": 7.729468482984062e-06, "loss": 1.5478, "mean_token_accuracy": 0.6497600624958674, "num_tokens": 1923003559.0, "step": 11468 }, { "entropy": 1.7391295929749806, "epoch": 1.2599214523083684, "grad_norm": 0.7512926459312439, "learning_rate": 7.727976865208511e-06, "loss": 1.3226, "mean_token_accuracy": 0.6600721975167593, "num_tokens": 1923121923.0, "step": 11469 }, { "entropy": 1.7174865404764812, "epoch": 1.2600313092197413, "grad_norm": 0.6891658306121826, "learning_rate": 7.726485350991016e-06, "loss": 1.2844, "mean_token_accuracy": 0.6672490239143372, "num_tokens": 1923266801.0, "step": 11470 }, { "entropy": 1.6965750257174175, "epoch": 1.2601411661311142, "grad_norm": 3.922152042388916, "learning_rate": 7.724993940378784e-06, "loss": 1.6618, "mean_token_accuracy": 0.6115802451968193, "num_tokens": 1923523178.0, "step": 11471 }, { "entropy": 1.72745943069458, "epoch": 1.2602510230424873, "grad_norm": 0.6440877318382263, "learning_rate": 7.723502633419022e-06, "loss": 1.4972, "mean_token_accuracy": 0.6545726358890533, "num_tokens": 1923703015.0, "step": 11472 }, { "entropy": 1.693722536166509, "epoch": 1.2603608799538601, "grad_norm": 0.7532528638839722, "learning_rate": 7.722011430158923e-06, "loss": 1.5349, "mean_token_accuracy": 0.6389701962471008, "num_tokens": 1923865936.0, "step": 11473 }, { "entropy": 1.72782959540685, "epoch": 1.260470736865233, "grad_norm": 0.7088679671287537, "learning_rate": 7.72052033064568e-06, "loss": 1.2202, "mean_token_accuracy": 0.6756645192702612, "num_tokens": 1923968318.0, "step": 11474 }, { "entropy": 1.6902720232804616, "epoch": 1.260580593776606, "grad_norm": 0.612113356590271, "learning_rate": 7.7190293349265e-06, "loss": 1.38, "mean_token_accuracy": 0.6533292979001999, "num_tokens": 1924177834.0, "step": 11475 }, { "entropy": 1.6937540173530579, "epoch": 1.2606904506879788, "grad_norm": 0.7892910242080688, "learning_rate": 7.717538443048556e-06, "loss": 1.4682, "mean_token_accuracy": 0.6503288199504217, "num_tokens": 1924315589.0, "step": 11476 }, { "entropy": 1.7266385753949482, "epoch": 1.260800307599352, "grad_norm": 0.7862039804458618, "learning_rate": 7.716047655059043e-06, "loss": 1.266, "mean_token_accuracy": 0.6670999377965927, "num_tokens": 1924421277.0, "step": 11477 }, { "entropy": 1.6943805813789368, "epoch": 1.2609101645107248, "grad_norm": 0.6646753549575806, "learning_rate": 7.714556971005145e-06, "loss": 1.4912, "mean_token_accuracy": 0.6547419528166453, "num_tokens": 1924585290.0, "step": 11478 }, { "entropy": 1.7027036249637604, "epoch": 1.2610200214220977, "grad_norm": 0.609962522983551, "learning_rate": 7.713066390934034e-06, "loss": 1.4166, "mean_token_accuracy": 0.6544028073549271, "num_tokens": 1924754377.0, "step": 11479 }, { "entropy": 1.7627936601638794, "epoch": 1.2611298783334708, "grad_norm": 0.6366637349128723, "learning_rate": 7.711575914892893e-06, "loss": 1.4668, "mean_token_accuracy": 0.6458509564399719, "num_tokens": 1924953930.0, "step": 11480 }, { "entropy": 1.720168004433314, "epoch": 1.2612397352448437, "grad_norm": 0.6627671122550964, "learning_rate": 7.710085542928893e-06, "loss": 1.4203, "mean_token_accuracy": 0.6433569043874741, "num_tokens": 1925151322.0, "step": 11481 }, { "entropy": 1.7099298934141796, "epoch": 1.2613495921562166, "grad_norm": 0.7178329825401306, "learning_rate": 7.708595275089202e-06, "loss": 1.5419, "mean_token_accuracy": 0.6385843257109324, "num_tokens": 1925338468.0, "step": 11482 }, { "entropy": 1.6707678933938344, "epoch": 1.2614594490675894, "grad_norm": 0.6434171795845032, "learning_rate": 7.707105111420985e-06, "loss": 1.4085, "mean_token_accuracy": 0.6535218954086304, "num_tokens": 1925559149.0, "step": 11483 }, { "entropy": 1.6994469662507374, "epoch": 1.2615693059789623, "grad_norm": 0.6782479882240295, "learning_rate": 7.705615051971413e-06, "loss": 1.3478, "mean_token_accuracy": 0.6566101660331091, "num_tokens": 1925685270.0, "step": 11484 }, { "entropy": 1.7290050586064656, "epoch": 1.2616791628903354, "grad_norm": 0.649772047996521, "learning_rate": 7.704125096787636e-06, "loss": 1.4744, "mean_token_accuracy": 0.6495188226302465, "num_tokens": 1925843285.0, "step": 11485 }, { "entropy": 1.6614007751146953, "epoch": 1.2617890198017083, "grad_norm": 0.7200374603271484, "learning_rate": 7.702635245916814e-06, "loss": 1.2959, "mean_token_accuracy": 0.6671645094950994, "num_tokens": 1925992589.0, "step": 11486 }, { "entropy": 1.7314343353112538, "epoch": 1.2618988767130812, "grad_norm": 0.7426960468292236, "learning_rate": 7.701145499406106e-06, "loss": 1.4451, "mean_token_accuracy": 0.650872215628624, "num_tokens": 1926166622.0, "step": 11487 }, { "entropy": 1.7814875145753224, "epoch": 1.262008733624454, "grad_norm": 0.7111337184906006, "learning_rate": 7.69965585730265e-06, "loss": 1.311, "mean_token_accuracy": 0.6611191133658091, "num_tokens": 1926299574.0, "step": 11488 }, { "entropy": 1.773252805074056, "epoch": 1.262118590535827, "grad_norm": 0.6960779428482056, "learning_rate": 7.698166319653604e-06, "loss": 1.3395, "mean_token_accuracy": 0.6631787866353989, "num_tokens": 1926441744.0, "step": 11489 }, { "entropy": 1.721550424893697, "epoch": 1.2622284474472, "grad_norm": 0.6645501255989075, "learning_rate": 7.696676886506102e-06, "loss": 1.4345, "mean_token_accuracy": 0.652705987294515, "num_tokens": 1926620603.0, "step": 11490 }, { "entropy": 1.624302864074707, "epoch": 1.262338304358573, "grad_norm": 0.8502170443534851, "learning_rate": 7.695187557907292e-06, "loss": 1.1604, "mean_token_accuracy": 0.6946276426315308, "num_tokens": 1926753986.0, "step": 11491 }, { "entropy": 1.7301335036754608, "epoch": 1.2624481612699459, "grad_norm": 0.9755292534828186, "learning_rate": 7.693698333904305e-06, "loss": 1.4621, "mean_token_accuracy": 0.6535843859116236, "num_tokens": 1926923605.0, "step": 11492 }, { "entropy": 1.691352754831314, "epoch": 1.262558018181319, "grad_norm": 0.6778144836425781, "learning_rate": 7.692209214544276e-06, "loss": 1.4903, "mean_token_accuracy": 0.6365737020969391, "num_tokens": 1927182509.0, "step": 11493 }, { "entropy": 1.6417991022268932, "epoch": 1.2626678750926918, "grad_norm": 0.629219651222229, "learning_rate": 7.690720199874331e-06, "loss": 1.355, "mean_token_accuracy": 0.6662428428729376, "num_tokens": 1927352842.0, "step": 11494 }, { "entropy": 1.6753190557161968, "epoch": 1.2627777320040647, "grad_norm": 0.6111719608306885, "learning_rate": 7.689231289941606e-06, "loss": 1.4178, "mean_token_accuracy": 0.6571441541115443, "num_tokens": 1927616855.0, "step": 11495 }, { "entropy": 1.6729080478350322, "epoch": 1.2628875889154376, "grad_norm": 0.6489112973213196, "learning_rate": 7.687742484793215e-06, "loss": 1.3208, "mean_token_accuracy": 0.6776315818230311, "num_tokens": 1927765143.0, "step": 11496 }, { "entropy": 1.7384801010290782, "epoch": 1.2629974458268105, "grad_norm": 0.7252342104911804, "learning_rate": 7.686253784476284e-06, "loss": 1.3797, "mean_token_accuracy": 0.6533515950043997, "num_tokens": 1927917822.0, "step": 11497 }, { "entropy": 1.6913585464159648, "epoch": 1.2631073027381836, "grad_norm": 0.8369355797767639, "learning_rate": 7.684765189037925e-06, "loss": 1.3558, "mean_token_accuracy": 0.6676561236381531, "num_tokens": 1928111621.0, "step": 11498 }, { "entropy": 1.7651902238527934, "epoch": 1.2632171596495565, "grad_norm": 0.7449557781219482, "learning_rate": 7.683276698525257e-06, "loss": 1.4675, "mean_token_accuracy": 0.6476789265871048, "num_tokens": 1928263994.0, "step": 11499 }, { "entropy": 1.715785026550293, "epoch": 1.2633270165609294, "grad_norm": 0.7928597927093506, "learning_rate": 7.681788312985383e-06, "loss": 1.3824, "mean_token_accuracy": 0.6676217714945475, "num_tokens": 1928407724.0, "step": 11500 }, { "entropy": 1.6963496307531993, "epoch": 1.2634368734723023, "grad_norm": 0.7719584703445435, "learning_rate": 7.680300032465418e-06, "loss": 1.3438, "mean_token_accuracy": 0.6627303858598074, "num_tokens": 1928529780.0, "step": 11501 }, { "entropy": 1.7370944917201996, "epoch": 1.2635467303836752, "grad_norm": 0.6030802726745605, "learning_rate": 7.678811857012461e-06, "loss": 1.5213, "mean_token_accuracy": 0.6461159139871597, "num_tokens": 1928707122.0, "step": 11502 }, { "entropy": 1.7230145931243896, "epoch": 1.2636565872950483, "grad_norm": 0.7120351791381836, "learning_rate": 7.67732378667361e-06, "loss": 1.5991, "mean_token_accuracy": 0.6233674536148707, "num_tokens": 1928938624.0, "step": 11503 }, { "entropy": 1.7156452139218648, "epoch": 1.2637664442064211, "grad_norm": 0.7055292129516602, "learning_rate": 7.675835821495965e-06, "loss": 1.3116, "mean_token_accuracy": 0.6527894685665766, "num_tokens": 1929072927.0, "step": 11504 }, { "entropy": 1.7379835744698842, "epoch": 1.263876301117794, "grad_norm": 0.6333845853805542, "learning_rate": 7.674347961526617e-06, "loss": 1.4977, "mean_token_accuracy": 0.6325055857499441, "num_tokens": 1929263786.0, "step": 11505 }, { "entropy": 1.676991045475006, "epoch": 1.2639861580291671, "grad_norm": 0.6579580903053284, "learning_rate": 7.672860206812655e-06, "loss": 1.3853, "mean_token_accuracy": 0.6635429114103317, "num_tokens": 1929431076.0, "step": 11506 }, { "entropy": 1.757521351178487, "epoch": 1.26409601494054, "grad_norm": 0.7360755205154419, "learning_rate": 7.671372557401174e-06, "loss": 1.5488, "mean_token_accuracy": 0.623514766494433, "num_tokens": 1929649046.0, "step": 11507 }, { "entropy": 1.620929052432378, "epoch": 1.264205871851913, "grad_norm": 0.7235838174819946, "learning_rate": 7.66988501333925e-06, "loss": 1.3106, "mean_token_accuracy": 0.6729168196519216, "num_tokens": 1929847207.0, "step": 11508 }, { "entropy": 1.6391695042451222, "epoch": 1.2643157287632858, "grad_norm": 0.5893858671188354, "learning_rate": 7.668397574673963e-06, "loss": 1.4936, "mean_token_accuracy": 0.6433763305346171, "num_tokens": 1930052630.0, "step": 11509 }, { "entropy": 1.675877183675766, "epoch": 1.2644255856746587, "grad_norm": 0.6175031661987305, "learning_rate": 7.666910241452395e-06, "loss": 1.3547, "mean_token_accuracy": 0.665746475259463, "num_tokens": 1930224630.0, "step": 11510 }, { "entropy": 1.7053968608379364, "epoch": 1.2645354425860318, "grad_norm": 0.7241131663322449, "learning_rate": 7.665423013721611e-06, "loss": 1.3036, "mean_token_accuracy": 0.6749317497014999, "num_tokens": 1930350724.0, "step": 11511 }, { "entropy": 1.6937896013259888, "epoch": 1.2646452994974047, "grad_norm": 0.6429654359817505, "learning_rate": 7.663935891528686e-06, "loss": 1.3792, "mean_token_accuracy": 0.6615054110685984, "num_tokens": 1930486501.0, "step": 11512 }, { "entropy": 1.655285765727361, "epoch": 1.2647551564087776, "grad_norm": 0.6734585762023926, "learning_rate": 7.662448874920692e-06, "loss": 1.3187, "mean_token_accuracy": 0.6705884784460068, "num_tokens": 1930635527.0, "step": 11513 }, { "entropy": 1.6695108612378438, "epoch": 1.2648650133201504, "grad_norm": 0.7096666693687439, "learning_rate": 7.660961963944682e-06, "loss": 1.3868, "mean_token_accuracy": 0.6595136175553004, "num_tokens": 1930815397.0, "step": 11514 }, { "entropy": 1.6726165413856506, "epoch": 1.2649748702315233, "grad_norm": 0.6741091012954712, "learning_rate": 7.659475158647724e-06, "loss": 1.4177, "mean_token_accuracy": 0.6559295405944189, "num_tokens": 1930982798.0, "step": 11515 }, { "entropy": 1.7462388277053833, "epoch": 1.2650847271428964, "grad_norm": 0.6222598552703857, "learning_rate": 7.657988459076872e-06, "loss": 1.538, "mean_token_accuracy": 0.6377201875050863, "num_tokens": 1931198546.0, "step": 11516 }, { "entropy": 1.647881656885147, "epoch": 1.2651945840542693, "grad_norm": 0.7117844223976135, "learning_rate": 7.656501865279178e-06, "loss": 1.4001, "mean_token_accuracy": 0.6614241848389307, "num_tokens": 1931360393.0, "step": 11517 }, { "entropy": 1.7693034013112385, "epoch": 1.2653044409656422, "grad_norm": 0.7837836742401123, "learning_rate": 7.655015377301693e-06, "loss": 1.3253, "mean_token_accuracy": 0.664752279718717, "num_tokens": 1931480695.0, "step": 11518 }, { "entropy": 1.6859951515992482, "epoch": 1.2654142978770153, "grad_norm": 0.8946231603622437, "learning_rate": 7.653528995191467e-06, "loss": 1.3278, "mean_token_accuracy": 0.6740283519029617, "num_tokens": 1931619467.0, "step": 11519 }, { "entropy": 1.6824671526749928, "epoch": 1.2655241547883882, "grad_norm": 0.705892026424408, "learning_rate": 7.652042718995539e-06, "loss": 1.2626, "mean_token_accuracy": 0.6893934309482574, "num_tokens": 1931782165.0, "step": 11520 }, { "entropy": 1.680429647366206, "epoch": 1.265634011699761, "grad_norm": 0.7540983557701111, "learning_rate": 7.650556548760948e-06, "loss": 1.3173, "mean_token_accuracy": 0.6678841362396876, "num_tokens": 1931937633.0, "step": 11521 }, { "entropy": 1.648360123236974, "epoch": 1.265743868611134, "grad_norm": 0.7288416624069214, "learning_rate": 7.649070484534737e-06, "loss": 1.342, "mean_token_accuracy": 0.6797003994385401, "num_tokens": 1932146802.0, "step": 11522 }, { "entropy": 1.7429245710372925, "epoch": 1.2658537255225069, "grad_norm": 0.724900484085083, "learning_rate": 7.647584526363933e-06, "loss": 1.4145, "mean_token_accuracy": 0.6517399648825327, "num_tokens": 1932307870.0, "step": 11523 }, { "entropy": 1.6724059581756592, "epoch": 1.26596358243388, "grad_norm": 0.612019419670105, "learning_rate": 7.646098674295566e-06, "loss": 1.4009, "mean_token_accuracy": 0.6560710817575455, "num_tokens": 1932481868.0, "step": 11524 }, { "entropy": 1.6238444844881694, "epoch": 1.2660734393452528, "grad_norm": 0.6354291439056396, "learning_rate": 7.644612928376666e-06, "loss": 1.5184, "mean_token_accuracy": 0.653970350821813, "num_tokens": 1932664212.0, "step": 11525 }, { "entropy": 1.7032426098982494, "epoch": 1.2661832962566257, "grad_norm": 0.6199919581413269, "learning_rate": 7.643127288654255e-06, "loss": 1.4766, "mean_token_accuracy": 0.6471427232027054, "num_tokens": 1932844482.0, "step": 11526 }, { "entropy": 1.677983929713567, "epoch": 1.2662931531679986, "grad_norm": 0.698670506477356, "learning_rate": 7.641641755175353e-06, "loss": 1.3514, "mean_token_accuracy": 0.6649612784385681, "num_tokens": 1932999488.0, "step": 11527 }, { "entropy": 1.7283147772153218, "epoch": 1.2664030100793715, "grad_norm": 0.6362758278846741, "learning_rate": 7.640156327986978e-06, "loss": 1.5035, "mean_token_accuracy": 0.6524873872598013, "num_tokens": 1933210321.0, "step": 11528 }, { "entropy": 1.6926906903584797, "epoch": 1.2665128669907446, "grad_norm": 0.6771997809410095, "learning_rate": 7.63867100713614e-06, "loss": 1.4221, "mean_token_accuracy": 0.6506749987602234, "num_tokens": 1933393700.0, "step": 11529 }, { "entropy": 1.7136725882689159, "epoch": 1.2666227239021175, "grad_norm": 0.6458131074905396, "learning_rate": 7.637185792669849e-06, "loss": 1.3923, "mean_token_accuracy": 0.6545117845137914, "num_tokens": 1933537916.0, "step": 11530 }, { "entropy": 1.678369532028834, "epoch": 1.2667325808134904, "grad_norm": 0.739032506942749, "learning_rate": 7.635700684635112e-06, "loss": 1.3112, "mean_token_accuracy": 0.6715343842903773, "num_tokens": 1933714762.0, "step": 11531 }, { "entropy": 1.65420796473821, "epoch": 1.2668424377248635, "grad_norm": 0.7117313742637634, "learning_rate": 7.634215683078934e-06, "loss": 1.446, "mean_token_accuracy": 0.6615893120567004, "num_tokens": 1933884409.0, "step": 11532 }, { "entropy": 1.7248832484086354, "epoch": 1.2669522946362364, "grad_norm": 0.6786313056945801, "learning_rate": 7.632730788048313e-06, "loss": 1.4713, "mean_token_accuracy": 0.6573885877927145, "num_tokens": 1934081855.0, "step": 11533 }, { "entropy": 1.7391786475976307, "epoch": 1.2670621515476093, "grad_norm": 0.6770562529563904, "learning_rate": 7.631245999590244e-06, "loss": 1.388, "mean_token_accuracy": 0.6576652526855469, "num_tokens": 1934265897.0, "step": 11534 }, { "entropy": 1.755203555027644, "epoch": 1.2671720084589821, "grad_norm": 0.5667737126350403, "learning_rate": 7.629761317751723e-06, "loss": 1.4356, "mean_token_accuracy": 0.6415314426024755, "num_tokens": 1934470125.0, "step": 11535 }, { "entropy": 1.7336049179236095, "epoch": 1.267281865370355, "grad_norm": 0.6754252910614014, "learning_rate": 7.628276742579732e-06, "loss": 1.3752, "mean_token_accuracy": 0.6565804481506348, "num_tokens": 1934638119.0, "step": 11536 }, { "entropy": 1.7392099499702454, "epoch": 1.2673917222817281, "grad_norm": 0.6438708305358887, "learning_rate": 7.626792274121268e-06, "loss": 1.5711, "mean_token_accuracy": 0.6428494701782862, "num_tokens": 1934809654.0, "step": 11537 }, { "entropy": 1.7270666062831879, "epoch": 1.267501579193101, "grad_norm": 0.6939952373504639, "learning_rate": 7.625307912423308e-06, "loss": 1.4309, "mean_token_accuracy": 0.6438876688480377, "num_tokens": 1934985020.0, "step": 11538 }, { "entropy": 1.6828594009081523, "epoch": 1.267611436104474, "grad_norm": 0.8025250434875488, "learning_rate": 7.6238236575328315e-06, "loss": 1.2169, "mean_token_accuracy": 0.6810509413480759, "num_tokens": 1935128210.0, "step": 11539 }, { "entropy": 1.7216303646564484, "epoch": 1.2677212930158468, "grad_norm": 1.0964614152908325, "learning_rate": 7.622339509496814e-06, "loss": 1.4948, "mean_token_accuracy": 0.6510275801022848, "num_tokens": 1935295219.0, "step": 11540 }, { "entropy": 1.7091183761755626, "epoch": 1.2678311499272197, "grad_norm": 0.7597293257713318, "learning_rate": 7.620855468362232e-06, "loss": 1.3388, "mean_token_accuracy": 0.6646958986918131, "num_tokens": 1935444915.0, "step": 11541 }, { "entropy": 1.7101080814997356, "epoch": 1.2679410068385928, "grad_norm": 0.6305139064788818, "learning_rate": 7.619371534176045e-06, "loss": 1.3943, "mean_token_accuracy": 0.6649887412786484, "num_tokens": 1935608450.0, "step": 11542 }, { "entropy": 1.776473770538966, "epoch": 1.2680508637499657, "grad_norm": 0.674400269985199, "learning_rate": 7.6178877069852344e-06, "loss": 1.4229, "mean_token_accuracy": 0.658959781130155, "num_tokens": 1935755983.0, "step": 11543 }, { "entropy": 1.6880793074766796, "epoch": 1.2681607206613386, "grad_norm": 0.8230254054069519, "learning_rate": 7.616403986836749e-06, "loss": 1.2906, "mean_token_accuracy": 0.6654263834158579, "num_tokens": 1935880326.0, "step": 11544 }, { "entropy": 1.6400333046913147, "epoch": 1.2682705775727117, "grad_norm": 0.6082746386528015, "learning_rate": 7.614920373777552e-06, "loss": 1.2296, "mean_token_accuracy": 0.6776652832825979, "num_tokens": 1936009216.0, "step": 11545 }, { "entropy": 1.7601770758628845, "epoch": 1.2683804344840846, "grad_norm": 0.8611322045326233, "learning_rate": 7.613436867854602e-06, "loss": 1.4269, "mean_token_accuracy": 0.6573955913384756, "num_tokens": 1936200547.0, "step": 11546 }, { "entropy": 1.627968817949295, "epoch": 1.2684902913954574, "grad_norm": 0.6086071729660034, "learning_rate": 7.611953469114848e-06, "loss": 1.3741, "mean_token_accuracy": 0.6606210221846899, "num_tokens": 1936426414.0, "step": 11547 }, { "entropy": 1.715288132429123, "epoch": 1.2686001483068303, "grad_norm": 0.6595588326454163, "learning_rate": 7.610470177605242e-06, "loss": 1.3391, "mean_token_accuracy": 0.6657747477293015, "num_tokens": 1936579983.0, "step": 11548 }, { "entropy": 1.7211828331152599, "epoch": 1.2687100052182032, "grad_norm": 0.7300513386726379, "learning_rate": 7.608986993372727e-06, "loss": 1.4806, "mean_token_accuracy": 0.6543687780698141, "num_tokens": 1936713738.0, "step": 11549 }, { "entropy": 1.6829596360524495, "epoch": 1.2688198621295763, "grad_norm": 0.6392272710800171, "learning_rate": 7.607503916464241e-06, "loss": 1.3133, "mean_token_accuracy": 0.6747443874677023, "num_tokens": 1936857583.0, "step": 11550 }, { "entropy": 1.682002027829488, "epoch": 1.2689297190409492, "grad_norm": 0.6379438638687134, "learning_rate": 7.606020946926731e-06, "loss": 1.4315, "mean_token_accuracy": 0.6531884868939718, "num_tokens": 1937072118.0, "step": 11551 }, { "entropy": 1.6802029808362324, "epoch": 1.269039575952322, "grad_norm": 0.6880862712860107, "learning_rate": 7.6045380848071295e-06, "loss": 1.3077, "mean_token_accuracy": 0.66480353474617, "num_tokens": 1937192796.0, "step": 11552 }, { "entropy": 1.7240610718727112, "epoch": 1.269149432863695, "grad_norm": 0.8166324496269226, "learning_rate": 7.6030553301523665e-06, "loss": 1.4164, "mean_token_accuracy": 0.653435026605924, "num_tokens": 1937370359.0, "step": 11553 }, { "entropy": 1.6760085920492809, "epoch": 1.2692592897750679, "grad_norm": 0.7545213103294373, "learning_rate": 7.601572683009373e-06, "loss": 1.2791, "mean_token_accuracy": 0.6738790373007456, "num_tokens": 1937515434.0, "step": 11554 }, { "entropy": 1.7001279195149739, "epoch": 1.269369146686441, "grad_norm": 0.6577803492546082, "learning_rate": 7.60009014342507e-06, "loss": 1.5541, "mean_token_accuracy": 0.6371362606684366, "num_tokens": 1937763386.0, "step": 11555 }, { "entropy": 1.6969635585943859, "epoch": 1.2694790035978138, "grad_norm": 0.6654831171035767, "learning_rate": 7.598607711446382e-06, "loss": 1.3728, "mean_token_accuracy": 0.6597702354192734, "num_tokens": 1937915864.0, "step": 11556 }, { "entropy": 1.7329435348510742, "epoch": 1.2695888605091867, "grad_norm": 0.7754169702529907, "learning_rate": 7.59712538712023e-06, "loss": 1.5217, "mean_token_accuracy": 0.6538184309999148, "num_tokens": 1938088629.0, "step": 11557 }, { "entropy": 1.7482871214548747, "epoch": 1.2696987174205598, "grad_norm": 0.6027451157569885, "learning_rate": 7.595643170493525e-06, "loss": 1.2809, "mean_token_accuracy": 0.6688571075598398, "num_tokens": 1938242998.0, "step": 11558 }, { "entropy": 1.6371783415476482, "epoch": 1.2698085743319327, "grad_norm": 0.7046194672584534, "learning_rate": 7.594161061613179e-06, "loss": 1.4464, "mean_token_accuracy": 0.6568540185689926, "num_tokens": 1938473136.0, "step": 11559 }, { "entropy": 1.71583757797877, "epoch": 1.2699184312433056, "grad_norm": 0.753637969493866, "learning_rate": 7.592679060526101e-06, "loss": 1.5237, "mean_token_accuracy": 0.6531053235133489, "num_tokens": 1938604756.0, "step": 11560 }, { "entropy": 1.7930570244789124, "epoch": 1.2700282881546785, "grad_norm": 0.7080893516540527, "learning_rate": 7.591197167279196e-06, "loss": 1.5274, "mean_token_accuracy": 0.6388321270545324, "num_tokens": 1938794403.0, "step": 11561 }, { "entropy": 1.6845806340376537, "epoch": 1.2701381450660514, "grad_norm": 0.6687464714050293, "learning_rate": 7.58971538191936e-06, "loss": 1.2573, "mean_token_accuracy": 0.6719970951477686, "num_tokens": 1938929740.0, "step": 11562 }, { "entropy": 1.6890028317769368, "epoch": 1.2702480019774245, "grad_norm": 0.7339609265327454, "learning_rate": 7.588233704493502e-06, "loss": 1.3484, "mean_token_accuracy": 0.6660866936047872, "num_tokens": 1939075182.0, "step": 11563 }, { "entropy": 1.7128116687138875, "epoch": 1.2703578588887974, "grad_norm": 0.8218494057655334, "learning_rate": 7.586752135048505e-06, "loss": 1.3804, "mean_token_accuracy": 0.6709433694680532, "num_tokens": 1939238859.0, "step": 11564 }, { "entropy": 1.6646581888198853, "epoch": 1.2704677158001703, "grad_norm": 0.7205196022987366, "learning_rate": 7.585270673631266e-06, "loss": 1.29, "mean_token_accuracy": 0.681825632850329, "num_tokens": 1939391199.0, "step": 11565 }, { "entropy": 1.7184071640173595, "epoch": 1.2705775727115431, "grad_norm": 0.6637095808982849, "learning_rate": 7.583789320288675e-06, "loss": 1.2919, "mean_token_accuracy": 0.6663111497958502, "num_tokens": 1939503801.0, "step": 11566 }, { "entropy": 1.684226264556249, "epoch": 1.270687429622916, "grad_norm": 0.6571996212005615, "learning_rate": 7.58230807506761e-06, "loss": 1.426, "mean_token_accuracy": 0.6591875404119492, "num_tokens": 1939662664.0, "step": 11567 }, { "entropy": 1.6475600401560466, "epoch": 1.2707972865342891, "grad_norm": 0.6276744604110718, "learning_rate": 7.580826938014953e-06, "loss": 1.3372, "mean_token_accuracy": 0.6658165256182352, "num_tokens": 1939828551.0, "step": 11568 }, { "entropy": 1.685198297103246, "epoch": 1.270907143445662, "grad_norm": 0.6816840171813965, "learning_rate": 7.579345909177586e-06, "loss": 1.4007, "mean_token_accuracy": 0.657182534535726, "num_tokens": 1939959149.0, "step": 11569 }, { "entropy": 1.7015057305494945, "epoch": 1.271017000357035, "grad_norm": 0.6778846979141235, "learning_rate": 7.577864988602377e-06, "loss": 1.4315, "mean_token_accuracy": 0.6453954130411148, "num_tokens": 1940132843.0, "step": 11570 }, { "entropy": 1.7004386285940807, "epoch": 1.271126857268408, "grad_norm": 0.7151092886924744, "learning_rate": 7.5763841763362e-06, "loss": 1.4591, "mean_token_accuracy": 0.6519459386666616, "num_tokens": 1940315082.0, "step": 11571 }, { "entropy": 1.6989735166231792, "epoch": 1.271236714179781, "grad_norm": 0.6294535994529724, "learning_rate": 7.574903472425923e-06, "loss": 1.217, "mean_token_accuracy": 0.6772444297870001, "num_tokens": 1940439043.0, "step": 11572 }, { "entropy": 1.7112750212351482, "epoch": 1.2713465710911538, "grad_norm": 0.7392633557319641, "learning_rate": 7.573422876918404e-06, "loss": 1.4047, "mean_token_accuracy": 0.6572145769993464, "num_tokens": 1940580555.0, "step": 11573 }, { "entropy": 1.783184975385666, "epoch": 1.2714564280025267, "grad_norm": 0.6250627040863037, "learning_rate": 7.571942389860507e-06, "loss": 1.4025, "mean_token_accuracy": 0.6510418156782786, "num_tokens": 1940756497.0, "step": 11574 }, { "entropy": 1.6986599067846935, "epoch": 1.2715662849138996, "grad_norm": 0.6578481197357178, "learning_rate": 7.570462011299091e-06, "loss": 1.2965, "mean_token_accuracy": 0.6748481144507726, "num_tokens": 1940890219.0, "step": 11575 }, { "entropy": 1.7359587053457897, "epoch": 1.2716761418252727, "grad_norm": 0.6191852688789368, "learning_rate": 7.568981741281007e-06, "loss": 1.468, "mean_token_accuracy": 0.6537004808584849, "num_tokens": 1941083366.0, "step": 11576 }, { "entropy": 1.6769965887069702, "epoch": 1.2717859987366456, "grad_norm": 0.639702320098877, "learning_rate": 7.567501579853103e-06, "loss": 1.5604, "mean_token_accuracy": 0.6393257280190786, "num_tokens": 1941330691.0, "step": 11577 }, { "entropy": 1.695969820022583, "epoch": 1.2718958556480184, "grad_norm": 0.6549391746520996, "learning_rate": 7.5660215270622306e-06, "loss": 1.393, "mean_token_accuracy": 0.6529108683268229, "num_tokens": 1941483759.0, "step": 11578 }, { "entropy": 1.6532461146513622, "epoch": 1.2720057125593913, "grad_norm": 0.5573631525039673, "learning_rate": 7.5645415829552275e-06, "loss": 1.424, "mean_token_accuracy": 0.6503102580706278, "num_tokens": 1941662294.0, "step": 11579 }, { "entropy": 1.720036009947459, "epoch": 1.2721155694707642, "grad_norm": 0.6423214673995972, "learning_rate": 7.56306174757893e-06, "loss": 1.3438, "mean_token_accuracy": 0.6584438482920328, "num_tokens": 1941797711.0, "step": 11580 }, { "entropy": 1.6889376938343048, "epoch": 1.2722254263821373, "grad_norm": 0.6410171389579773, "learning_rate": 7.5615820209801875e-06, "loss": 1.4084, "mean_token_accuracy": 0.6631045937538147, "num_tokens": 1941925972.0, "step": 11581 }, { "entropy": 1.7637586692969005, "epoch": 1.2723352832935102, "grad_norm": 1.9716415405273438, "learning_rate": 7.560102403205822e-06, "loss": 1.1051, "mean_token_accuracy": 0.676330178976059, "num_tokens": 1942076811.0, "step": 11582 }, { "entropy": 1.6584815084934235, "epoch": 1.272445140204883, "grad_norm": 0.6023903489112854, "learning_rate": 7.558622894302663e-06, "loss": 1.4478, "mean_token_accuracy": 0.660874476035436, "num_tokens": 1942249799.0, "step": 11583 }, { "entropy": 1.7897962033748627, "epoch": 1.2725549971162562, "grad_norm": 0.7759119868278503, "learning_rate": 7.557143494317543e-06, "loss": 1.2283, "mean_token_accuracy": 0.6802993218104044, "num_tokens": 1942363001.0, "step": 11584 }, { "entropy": 1.7277598679065704, "epoch": 1.272664854027629, "grad_norm": 0.5648651719093323, "learning_rate": 7.5556642032972774e-06, "loss": 1.3947, "mean_token_accuracy": 0.6426876882712046, "num_tokens": 1942584633.0, "step": 11585 }, { "entropy": 1.7164186437924702, "epoch": 1.272774710939002, "grad_norm": 0.7039127349853516, "learning_rate": 7.554185021288684e-06, "loss": 1.5314, "mean_token_accuracy": 0.6496036102374395, "num_tokens": 1942729133.0, "step": 11586 }, { "entropy": 1.6905947029590607, "epoch": 1.2728845678503748, "grad_norm": 0.6478644609451294, "learning_rate": 7.5527059483385875e-06, "loss": 1.3381, "mean_token_accuracy": 0.659003218015035, "num_tokens": 1942862637.0, "step": 11587 }, { "entropy": 1.6825307210286458, "epoch": 1.2729944247617477, "grad_norm": 0.6765702962875366, "learning_rate": 7.551226984493793e-06, "loss": 1.3988, "mean_token_accuracy": 0.6641071836153666, "num_tokens": 1943082999.0, "step": 11588 }, { "entropy": 1.7173330585161846, "epoch": 1.2731042816731208, "grad_norm": 0.8550540804862976, "learning_rate": 7.549748129801109e-06, "loss": 1.5485, "mean_token_accuracy": 0.6394771635532379, "num_tokens": 1943238205.0, "step": 11589 }, { "entropy": 1.6354697545369465, "epoch": 1.2732141385844937, "grad_norm": 0.688818633556366, "learning_rate": 7.548269384307345e-06, "loss": 1.2072, "mean_token_accuracy": 0.6835995813210806, "num_tokens": 1943351183.0, "step": 11590 }, { "entropy": 1.6105882823467255, "epoch": 1.2733239954958666, "grad_norm": 0.6564744710922241, "learning_rate": 7.5467907480592984e-06, "loss": 1.3681, "mean_token_accuracy": 0.6682546585798264, "num_tokens": 1943561853.0, "step": 11591 }, { "entropy": 1.6714021066824596, "epoch": 1.2734338524072395, "grad_norm": 0.6318192481994629, "learning_rate": 7.545312221103765e-06, "loss": 1.3323, "mean_token_accuracy": 0.669236014286677, "num_tokens": 1943736887.0, "step": 11592 }, { "entropy": 1.7196357150872548, "epoch": 1.2735437093186124, "grad_norm": 0.7883795499801636, "learning_rate": 7.543833803487548e-06, "loss": 1.4954, "mean_token_accuracy": 0.6526716152826945, "num_tokens": 1943908941.0, "step": 11593 }, { "entropy": 1.6909798383712769, "epoch": 1.2736535662299855, "grad_norm": 0.7004644870758057, "learning_rate": 7.542355495257432e-06, "loss": 1.4842, "mean_token_accuracy": 0.6391565153996149, "num_tokens": 1944106941.0, "step": 11594 }, { "entropy": 1.7800021568934123, "epoch": 1.2737634231413584, "grad_norm": 0.8370211124420166, "learning_rate": 7.540877296460205e-06, "loss": 1.2816, "mean_token_accuracy": 0.6755285759766897, "num_tokens": 1944294593.0, "step": 11595 }, { "entropy": 1.7417665024598439, "epoch": 1.2738732800527313, "grad_norm": 0.663817822933197, "learning_rate": 7.539399207142657e-06, "loss": 1.4015, "mean_token_accuracy": 0.6470625003178915, "num_tokens": 1944448204.0, "step": 11596 }, { "entropy": 1.717939426501592, "epoch": 1.2739831369641044, "grad_norm": 0.6097803115844727, "learning_rate": 7.537921227351561e-06, "loss": 1.3607, "mean_token_accuracy": 0.6571676184733709, "num_tokens": 1944609689.0, "step": 11597 }, { "entropy": 1.709089497725169, "epoch": 1.2740929938754773, "grad_norm": 0.7132073044776917, "learning_rate": 7.536443357133696e-06, "loss": 1.4271, "mean_token_accuracy": 0.6546765118837357, "num_tokens": 1944787935.0, "step": 11598 }, { "entropy": 1.6773851712544758, "epoch": 1.2742028507868501, "grad_norm": 0.6749030947685242, "learning_rate": 7.5349655965358415e-06, "loss": 1.4296, "mean_token_accuracy": 0.6590938319762548, "num_tokens": 1944997360.0, "step": 11599 }, { "entropy": 1.69098565975825, "epoch": 1.274312707698223, "grad_norm": 0.6707255244255066, "learning_rate": 7.533487945604765e-06, "loss": 1.322, "mean_token_accuracy": 0.6724912573893865, "num_tokens": 1945134346.0, "step": 11600 }, { "entropy": 1.7805437743663788, "epoch": 1.274422564609596, "grad_norm": 0.6537451148033142, "learning_rate": 7.532010404387231e-06, "loss": 1.4231, "mean_token_accuracy": 0.646497001250585, "num_tokens": 1945327281.0, "step": 11601 }, { "entropy": 1.666386862595876, "epoch": 1.274532421520969, "grad_norm": 0.6942588686943054, "learning_rate": 7.530532972930007e-06, "loss": 1.2446, "mean_token_accuracy": 0.6778380324443182, "num_tokens": 1945439224.0, "step": 11602 }, { "entropy": 1.7151568233966827, "epoch": 1.274642278432342, "grad_norm": 0.7166895866394043, "learning_rate": 7.529055651279851e-06, "loss": 1.3638, "mean_token_accuracy": 0.6571328192949295, "num_tokens": 1945660352.0, "step": 11603 }, { "entropy": 1.7030317882696788, "epoch": 1.2747521353437148, "grad_norm": 0.6731720566749573, "learning_rate": 7.5275784394835135e-06, "loss": 1.3623, "mean_token_accuracy": 0.6527252991994222, "num_tokens": 1945809702.0, "step": 11604 }, { "entropy": 1.7581920226414998, "epoch": 1.2748619922550877, "grad_norm": 0.6831167936325073, "learning_rate": 7.526101337587761e-06, "loss": 1.3561, "mean_token_accuracy": 0.6571609377861023, "num_tokens": 1945959339.0, "step": 11605 }, { "entropy": 1.6485190987586975, "epoch": 1.2749718491664606, "grad_norm": 0.6158422827720642, "learning_rate": 7.524624345639333e-06, "loss": 1.3174, "mean_token_accuracy": 0.6634372224410375, "num_tokens": 1946151020.0, "step": 11606 }, { "entropy": 1.6657854715983074, "epoch": 1.2750817060778337, "grad_norm": 0.622463583946228, "learning_rate": 7.5231474636849785e-06, "loss": 1.3579, "mean_token_accuracy": 0.645659883817037, "num_tokens": 1946309288.0, "step": 11607 }, { "entropy": 1.7252983450889587, "epoch": 1.2751915629892066, "grad_norm": 0.7152490615844727, "learning_rate": 7.521670691771443e-06, "loss": 1.4609, "mean_token_accuracy": 0.6484145522117615, "num_tokens": 1946497861.0, "step": 11608 }, { "entropy": 1.6993794043858845, "epoch": 1.2753014199005794, "grad_norm": 0.6890069842338562, "learning_rate": 7.52019402994546e-06, "loss": 1.4335, "mean_token_accuracy": 0.6603013724088669, "num_tokens": 1946661551.0, "step": 11609 }, { "entropy": 1.7552596231301625, "epoch": 1.2754112768119525, "grad_norm": 0.6681763529777527, "learning_rate": 7.5187174782537675e-06, "loss": 1.4473, "mean_token_accuracy": 0.6456053505341212, "num_tokens": 1946840538.0, "step": 11610 }, { "entropy": 1.7156967719395955, "epoch": 1.2755211337233254, "grad_norm": 0.629675567150116, "learning_rate": 7.517241036743097e-06, "loss": 1.5218, "mean_token_accuracy": 0.6306114296118418, "num_tokens": 1947058986.0, "step": 11611 }, { "entropy": 1.7247178852558136, "epoch": 1.2756309906346983, "grad_norm": 0.6354183554649353, "learning_rate": 7.51576470546018e-06, "loss": 1.4083, "mean_token_accuracy": 0.6475364615519842, "num_tokens": 1947200502.0, "step": 11612 }, { "entropy": 1.7191159228483837, "epoch": 1.2757408475460712, "grad_norm": 0.5886407494544983, "learning_rate": 7.514288484451742e-06, "loss": 1.4599, "mean_token_accuracy": 0.6498788446187973, "num_tokens": 1947384612.0, "step": 11613 }, { "entropy": 1.7265147765477498, "epoch": 1.275850704457444, "grad_norm": 0.6430819630622864, "learning_rate": 7.5128123737645e-06, "loss": 1.4648, "mean_token_accuracy": 0.6587880849838257, "num_tokens": 1947533842.0, "step": 11614 }, { "entropy": 1.6808937191963196, "epoch": 1.2759605613688172, "grad_norm": 0.6885290741920471, "learning_rate": 7.511336373445175e-06, "loss": 1.4076, "mean_token_accuracy": 0.6503855834404627, "num_tokens": 1947734266.0, "step": 11615 }, { "entropy": 1.7028583685557048, "epoch": 1.27607041828019, "grad_norm": 0.7654819488525391, "learning_rate": 7.5098604835404856e-06, "loss": 1.3317, "mean_token_accuracy": 0.6681941697994868, "num_tokens": 1947850050.0, "step": 11616 }, { "entropy": 1.7267645796140034, "epoch": 1.276180275191563, "grad_norm": 0.6581327319145203, "learning_rate": 7.508384704097134e-06, "loss": 1.4452, "mean_token_accuracy": 0.6505034416913986, "num_tokens": 1948033475.0, "step": 11617 }, { "entropy": 1.6980148752530415, "epoch": 1.2762901321029358, "grad_norm": 0.7092710137367249, "learning_rate": 7.506909035161833e-06, "loss": 1.3132, "mean_token_accuracy": 0.6706616580486298, "num_tokens": 1948154888.0, "step": 11618 }, { "entropy": 1.6872047583262126, "epoch": 1.2763999890143087, "grad_norm": 0.7112807035446167, "learning_rate": 7.505433476781292e-06, "loss": 1.2504, "mean_token_accuracy": 0.6689596921205521, "num_tokens": 1948286056.0, "step": 11619 }, { "entropy": 1.7642404039700825, "epoch": 1.2765098459256818, "grad_norm": 0.7342185974121094, "learning_rate": 7.5039580290022054e-06, "loss": 1.495, "mean_token_accuracy": 0.6511110663414001, "num_tokens": 1948447031.0, "step": 11620 }, { "entropy": 1.714083880186081, "epoch": 1.2766197028370547, "grad_norm": 0.8669022917747498, "learning_rate": 7.502482691871269e-06, "loss": 1.1501, "mean_token_accuracy": 0.6964519172906876, "num_tokens": 1948544481.0, "step": 11621 }, { "entropy": 1.6839018563429515, "epoch": 1.2767295597484276, "grad_norm": 0.6407862901687622, "learning_rate": 7.501007465435182e-06, "loss": 1.568, "mean_token_accuracy": 0.6368062049150467, "num_tokens": 1948753108.0, "step": 11622 }, { "entropy": 1.763452668984731, "epoch": 1.2768394166598007, "grad_norm": 0.7078571319580078, "learning_rate": 7.499532349740631e-06, "loss": 1.5233, "mean_token_accuracy": 0.6335721065600713, "num_tokens": 1948921783.0, "step": 11623 }, { "entropy": 1.6938395102818806, "epoch": 1.2769492735711736, "grad_norm": 0.6525269150733948, "learning_rate": 7.498057344834302e-06, "loss": 1.5406, "mean_token_accuracy": 0.6494365582863489, "num_tokens": 1949114943.0, "step": 11624 }, { "entropy": 1.6960388819376628, "epoch": 1.2770591304825465, "grad_norm": 0.6488698124885559, "learning_rate": 7.496582450762881e-06, "loss": 1.3803, "mean_token_accuracy": 0.6615366737047831, "num_tokens": 1949274514.0, "step": 11625 }, { "entropy": 1.6426782707373302, "epoch": 1.2771689873939194, "grad_norm": 0.6749052405357361, "learning_rate": 7.495107667573047e-06, "loss": 1.3651, "mean_token_accuracy": 0.6629842420419058, "num_tokens": 1949433286.0, "step": 11626 }, { "entropy": 1.7444765071074169, "epoch": 1.2772788443052923, "grad_norm": 0.5791497230529785, "learning_rate": 7.493632995311477e-06, "loss": 1.373, "mean_token_accuracy": 0.6536852220694224, "num_tokens": 1949597131.0, "step": 11627 }, { "entropy": 1.6915338238080342, "epoch": 1.2773887012166654, "grad_norm": 0.8342865109443665, "learning_rate": 7.492158434024846e-06, "loss": 1.6073, "mean_token_accuracy": 0.6346415231625239, "num_tokens": 1949781046.0, "step": 11628 }, { "entropy": 1.7401387890179951, "epoch": 1.2774985581280383, "grad_norm": 0.62762051820755, "learning_rate": 7.490683983759814e-06, "loss": 1.435, "mean_token_accuracy": 0.6580404887596766, "num_tokens": 1949912389.0, "step": 11629 }, { "entropy": 1.6930598020553589, "epoch": 1.2776084150394111, "grad_norm": 0.6954199075698853, "learning_rate": 7.489209644563053e-06, "loss": 1.416, "mean_token_accuracy": 0.659172311425209, "num_tokens": 1950093173.0, "step": 11630 }, { "entropy": 1.6991178691387177, "epoch": 1.277718271950784, "grad_norm": 0.712602972984314, "learning_rate": 7.487735416481227e-06, "loss": 1.306, "mean_token_accuracy": 0.6658920894066492, "num_tokens": 1950240503.0, "step": 11631 }, { "entropy": 1.7666970590750377, "epoch": 1.277828128862157, "grad_norm": 0.6580962538719177, "learning_rate": 7.486261299560993e-06, "loss": 1.4578, "mean_token_accuracy": 0.660940021276474, "num_tokens": 1950405403.0, "step": 11632 }, { "entropy": 1.684452474117279, "epoch": 1.27793798577353, "grad_norm": 0.8572995662689209, "learning_rate": 7.484787293849003e-06, "loss": 1.2695, "mean_token_accuracy": 0.6728391995032629, "num_tokens": 1950539727.0, "step": 11633 }, { "entropy": 1.717938760916392, "epoch": 1.278047842684903, "grad_norm": 0.7117380499839783, "learning_rate": 7.483313399391914e-06, "loss": 1.3573, "mean_token_accuracy": 0.6588635991017023, "num_tokens": 1950689158.0, "step": 11634 }, { "entropy": 1.703975349664688, "epoch": 1.2781576995962758, "grad_norm": 3.185786724090576, "learning_rate": 7.48183961623637e-06, "loss": 1.5718, "mean_token_accuracy": 0.6170200606187185, "num_tokens": 1950984775.0, "step": 11635 }, { "entropy": 1.7145447830359142, "epoch": 1.278267556507649, "grad_norm": 0.689428985118866, "learning_rate": 7.480365944429013e-06, "loss": 1.5036, "mean_token_accuracy": 0.6465061157941818, "num_tokens": 1951196598.0, "step": 11636 }, { "entropy": 1.7026291191577911, "epoch": 1.2783774134190218, "grad_norm": 0.6487104296684265, "learning_rate": 7.478892384016494e-06, "loss": 1.5404, "mean_token_accuracy": 0.6514692256848017, "num_tokens": 1951402964.0, "step": 11637 }, { "entropy": 1.6885337332884471, "epoch": 1.2784872703303947, "grad_norm": 0.7306270599365234, "learning_rate": 7.477418935045442e-06, "loss": 1.4906, "mean_token_accuracy": 0.6599554171164831, "num_tokens": 1951561045.0, "step": 11638 }, { "entropy": 1.6858366429805756, "epoch": 1.2785971272417676, "grad_norm": 0.6280055046081543, "learning_rate": 7.475945597562491e-06, "loss": 1.4303, "mean_token_accuracy": 0.6426028609275818, "num_tokens": 1951751395.0, "step": 11639 }, { "entropy": 1.7249255081017811, "epoch": 1.2787069841531404, "grad_norm": 0.6180586218833923, "learning_rate": 7.4744723716142785e-06, "loss": 1.5163, "mean_token_accuracy": 0.6588364889224371, "num_tokens": 1951920690.0, "step": 11640 }, { "entropy": 1.6651106576124828, "epoch": 1.2788168410645135, "grad_norm": 0.629157304763794, "learning_rate": 7.472999257247424e-06, "loss": 1.3646, "mean_token_accuracy": 0.6712455501159033, "num_tokens": 1952071357.0, "step": 11641 }, { "entropy": 1.7360005180040996, "epoch": 1.2789266979758864, "grad_norm": 0.6886469125747681, "learning_rate": 7.471526254508552e-06, "loss": 1.2119, "mean_token_accuracy": 0.6848239749670029, "num_tokens": 1952185288.0, "step": 11642 }, { "entropy": 1.7535901367664337, "epoch": 1.2790365548872593, "grad_norm": 0.8039774298667908, "learning_rate": 7.470053363444288e-06, "loss": 1.3669, "mean_token_accuracy": 0.6577004939317703, "num_tokens": 1952336655.0, "step": 11643 }, { "entropy": 1.7281550963719685, "epoch": 1.2791464117986322, "grad_norm": 0.6642824411392212, "learning_rate": 7.4685805841012414e-06, "loss": 1.3183, "mean_token_accuracy": 0.6607625285784403, "num_tokens": 1952503015.0, "step": 11644 }, { "entropy": 1.6712844371795654, "epoch": 1.279256268710005, "grad_norm": 0.6621568202972412, "learning_rate": 7.467107916526028e-06, "loss": 1.3695, "mean_token_accuracy": 0.6621130158503851, "num_tokens": 1952675470.0, "step": 11645 }, { "entropy": 1.6059234241644542, "epoch": 1.2793661256213782, "grad_norm": 0.6320291757583618, "learning_rate": 7.46563536076526e-06, "loss": 1.283, "mean_token_accuracy": 0.6788782924413681, "num_tokens": 1952802099.0, "step": 11646 }, { "entropy": 1.7441943685213726, "epoch": 1.279475982532751, "grad_norm": 0.6545817255973816, "learning_rate": 7.464162916865541e-06, "loss": 1.3842, "mean_token_accuracy": 0.6757529973983765, "num_tokens": 1952974376.0, "step": 11647 }, { "entropy": 1.6520853539307911, "epoch": 1.279585839444124, "grad_norm": 0.6548392176628113, "learning_rate": 7.462690584873467e-06, "loss": 1.355, "mean_token_accuracy": 0.6545472939809164, "num_tokens": 1953140371.0, "step": 11648 }, { "entropy": 1.7266732851664226, "epoch": 1.279695696355497, "grad_norm": 0.8059017062187195, "learning_rate": 7.461218364835645e-06, "loss": 1.2907, "mean_token_accuracy": 0.6783981472253799, "num_tokens": 1953295038.0, "step": 11649 }, { "entropy": 1.6765375832716625, "epoch": 1.27980555326687, "grad_norm": 0.6597868204116821, "learning_rate": 7.459746256798666e-06, "loss": 1.3894, "mean_token_accuracy": 0.6578025966882706, "num_tokens": 1953454565.0, "step": 11650 }, { "entropy": 1.666476051012675, "epoch": 1.2799154101782428, "grad_norm": 0.6877656579017639, "learning_rate": 7.4582742608091244e-06, "loss": 1.4281, "mean_token_accuracy": 0.6640834957361221, "num_tokens": 1953657839.0, "step": 11651 }, { "entropy": 1.6953352391719818, "epoch": 1.2800252670896157, "grad_norm": 0.6984429359436035, "learning_rate": 7.456802376913608e-06, "loss": 1.3965, "mean_token_accuracy": 0.670659194389979, "num_tokens": 1953769451.0, "step": 11652 }, { "entropy": 1.66109103957812, "epoch": 1.2801351240009886, "grad_norm": 0.6225873827934265, "learning_rate": 7.455330605158697e-06, "loss": 1.4248, "mean_token_accuracy": 0.6516261696815491, "num_tokens": 1953987279.0, "step": 11653 }, { "entropy": 1.7144280870755513, "epoch": 1.2802449809123617, "grad_norm": 0.6939162015914917, "learning_rate": 7.453858945590973e-06, "loss": 1.4259, "mean_token_accuracy": 0.6643421500921249, "num_tokens": 1954139885.0, "step": 11654 }, { "entropy": 1.6657731036345165, "epoch": 1.2803548378237346, "grad_norm": 0.7827641367912292, "learning_rate": 7.45238739825702e-06, "loss": 1.3829, "mean_token_accuracy": 0.6760758807261785, "num_tokens": 1954304470.0, "step": 11655 }, { "entropy": 1.6782557964324951, "epoch": 1.2804646947351075, "grad_norm": 0.7069709897041321, "learning_rate": 7.4509159632034045e-06, "loss": 1.3769, "mean_token_accuracy": 0.6612060517072678, "num_tokens": 1954436039.0, "step": 11656 }, { "entropy": 1.6865948935349782, "epoch": 1.2805745516464806, "grad_norm": 0.5696167945861816, "learning_rate": 7.449444640476702e-06, "loss": 1.4007, "mean_token_accuracy": 0.6477059076229731, "num_tokens": 1954623170.0, "step": 11657 }, { "entropy": 1.7937167088190715, "epoch": 1.2806844085578533, "grad_norm": 0.8088985085487366, "learning_rate": 7.447973430123476e-06, "loss": 1.5221, "mean_token_accuracy": 0.6332688679297765, "num_tokens": 1954786672.0, "step": 11658 }, { "entropy": 1.6993589500586193, "epoch": 1.2807942654692264, "grad_norm": 0.6645467877388, "learning_rate": 7.446502332190289e-06, "loss": 1.3088, "mean_token_accuracy": 0.6647295008103052, "num_tokens": 1954897607.0, "step": 11659 }, { "entropy": 1.6773101290067036, "epoch": 1.2809041223805993, "grad_norm": 0.8003481030464172, "learning_rate": 7.445031346723699e-06, "loss": 1.3166, "mean_token_accuracy": 0.6816578855117162, "num_tokens": 1955037208.0, "step": 11660 }, { "entropy": 1.6241084535916646, "epoch": 1.2810139792919721, "grad_norm": 0.8966746926307678, "learning_rate": 7.443560473770271e-06, "loss": 1.395, "mean_token_accuracy": 0.6726480275392532, "num_tokens": 1955209074.0, "step": 11661 }, { "entropy": 1.7033714254697163, "epoch": 1.2811238362033452, "grad_norm": 0.7265210151672363, "learning_rate": 7.442089713376548e-06, "loss": 1.2868, "mean_token_accuracy": 0.6727895885705948, "num_tokens": 1955344048.0, "step": 11662 }, { "entropy": 1.6858701407909393, "epoch": 1.2812336931147181, "grad_norm": 0.7443154454231262, "learning_rate": 7.440619065589083e-06, "loss": 1.4205, "mean_token_accuracy": 0.6643148511648178, "num_tokens": 1955487623.0, "step": 11663 }, { "entropy": 1.6719338993231456, "epoch": 1.281343550026091, "grad_norm": 0.7148160934448242, "learning_rate": 7.439148530454423e-06, "loss": 1.5308, "mean_token_accuracy": 0.6345583150784174, "num_tokens": 1955669240.0, "step": 11664 }, { "entropy": 1.6509188016255696, "epoch": 1.281453406937464, "grad_norm": 0.80116868019104, "learning_rate": 7.437678108019104e-06, "loss": 1.4478, "mean_token_accuracy": 0.6688994914293289, "num_tokens": 1955847696.0, "step": 11665 }, { "entropy": 1.7179848750432332, "epoch": 1.2815632638488368, "grad_norm": 0.6163962483406067, "learning_rate": 7.436207798329667e-06, "loss": 1.4411, "mean_token_accuracy": 0.6562605003515879, "num_tokens": 1956010904.0, "step": 11666 }, { "entropy": 1.695182869831721, "epoch": 1.28167312076021, "grad_norm": 0.6897042989730835, "learning_rate": 7.434737601432651e-06, "loss": 1.3903, "mean_token_accuracy": 0.6611831237872442, "num_tokens": 1956172860.0, "step": 11667 }, { "entropy": 1.6385972301165264, "epoch": 1.2817829776715828, "grad_norm": 0.5938105583190918, "learning_rate": 7.43326751737458e-06, "loss": 1.4496, "mean_token_accuracy": 0.6599717885255814, "num_tokens": 1956402001.0, "step": 11668 }, { "entropy": 1.6853100558121998, "epoch": 1.2818928345829557, "grad_norm": 0.7520754337310791, "learning_rate": 7.4317975462019885e-06, "loss": 1.4595, "mean_token_accuracy": 0.6442477852106094, "num_tokens": 1956542664.0, "step": 11669 }, { "entropy": 1.695339282353719, "epoch": 1.2820026914943288, "grad_norm": 0.6549242734909058, "learning_rate": 7.430327687961394e-06, "loss": 1.3846, "mean_token_accuracy": 0.6543597926696142, "num_tokens": 1956719717.0, "step": 11670 }, { "entropy": 1.678192138671875, "epoch": 1.2821125484057014, "grad_norm": 0.7235942482948303, "learning_rate": 7.428857942699322e-06, "loss": 1.396, "mean_token_accuracy": 0.6574389437834421, "num_tokens": 1956849343.0, "step": 11671 }, { "entropy": 1.6636808514595032, "epoch": 1.2822224053170745, "grad_norm": 0.664930522441864, "learning_rate": 7.427388310462285e-06, "loss": 1.3348, "mean_token_accuracy": 0.6659966111183167, "num_tokens": 1957000013.0, "step": 11672 }, { "entropy": 1.6923915545145671, "epoch": 1.2823322622284474, "grad_norm": 0.7998056411743164, "learning_rate": 7.425918791296798e-06, "loss": 1.4864, "mean_token_accuracy": 0.6688689639170965, "num_tokens": 1957204036.0, "step": 11673 }, { "entropy": 1.7271687885125477, "epoch": 1.2824421191398203, "grad_norm": 0.7436834573745728, "learning_rate": 7.42444938524937e-06, "loss": 1.312, "mean_token_accuracy": 0.6680330435434977, "num_tokens": 1957337123.0, "step": 11674 }, { "entropy": 1.715428461631139, "epoch": 1.2825519760511934, "grad_norm": 0.7918713688850403, "learning_rate": 7.422980092366512e-06, "loss": 1.3576, "mean_token_accuracy": 0.6627502292394638, "num_tokens": 1957532881.0, "step": 11675 }, { "entropy": 1.7627593576908112, "epoch": 1.2826618329625663, "grad_norm": 0.6629673838615417, "learning_rate": 7.421510912694716e-06, "loss": 1.3912, "mean_token_accuracy": 0.6503987908363342, "num_tokens": 1957681518.0, "step": 11676 }, { "entropy": 1.6355752150217693, "epoch": 1.2827716898739392, "grad_norm": 0.6012086868286133, "learning_rate": 7.420041846280492e-06, "loss": 1.4797, "mean_token_accuracy": 0.6419784228006998, "num_tokens": 1957868543.0, "step": 11677 }, { "entropy": 1.7351706624031067, "epoch": 1.282881546785312, "grad_norm": 0.6617944240570068, "learning_rate": 7.418572893170328e-06, "loss": 1.4835, "mean_token_accuracy": 0.646317924062411, "num_tokens": 1958073332.0, "step": 11678 }, { "entropy": 1.7856847544511159, "epoch": 1.282991403696685, "grad_norm": 0.7386542558670044, "learning_rate": 7.417104053410718e-06, "loss": 1.373, "mean_token_accuracy": 0.6571315675973892, "num_tokens": 1958228225.0, "step": 11679 }, { "entropy": 1.6699702441692352, "epoch": 1.283101260608058, "grad_norm": 0.6713958382606506, "learning_rate": 7.415635327048152e-06, "loss": 1.3583, "mean_token_accuracy": 0.6684353550275167, "num_tokens": 1958397897.0, "step": 11680 }, { "entropy": 1.7619405488173168, "epoch": 1.283211117519431, "grad_norm": 0.9027857780456543, "learning_rate": 7.414166714129112e-06, "loss": 1.3348, "mean_token_accuracy": 0.6649856468041738, "num_tokens": 1958569608.0, "step": 11681 }, { "entropy": 1.7285025020440419, "epoch": 1.2833209744308038, "grad_norm": 0.6914839744567871, "learning_rate": 7.4126982147000785e-06, "loss": 1.4919, "mean_token_accuracy": 0.6378475278615952, "num_tokens": 1958774892.0, "step": 11682 }, { "entropy": 1.7116582890351613, "epoch": 1.283430831342177, "grad_norm": 0.7318129539489746, "learning_rate": 7.411229828807531e-06, "loss": 1.3924, "mean_token_accuracy": 0.6698858588933945, "num_tokens": 1958936632.0, "step": 11683 }, { "entropy": 1.6879831353823345, "epoch": 1.2835406882535498, "grad_norm": 0.6720309257507324, "learning_rate": 7.409761556497945e-06, "loss": 1.3349, "mean_token_accuracy": 0.658750464518865, "num_tokens": 1959147194.0, "step": 11684 }, { "entropy": 1.7317336002985637, "epoch": 1.2836505451649227, "grad_norm": 0.7208735346794128, "learning_rate": 7.408293397817783e-06, "loss": 1.46, "mean_token_accuracy": 0.6475148300329844, "num_tokens": 1959348096.0, "step": 11685 }, { "entropy": 1.6827017863591511, "epoch": 1.2837604020762956, "grad_norm": 0.6627811789512634, "learning_rate": 7.406825352813516e-06, "loss": 1.3233, "mean_token_accuracy": 0.6603292127450308, "num_tokens": 1959490153.0, "step": 11686 }, { "entropy": 1.7177750865618389, "epoch": 1.2838702589876685, "grad_norm": 0.6499682664871216, "learning_rate": 7.405357421531614e-06, "loss": 1.3783, "mean_token_accuracy": 0.6555012961228689, "num_tokens": 1959626237.0, "step": 11687 }, { "entropy": 1.7713517745335896, "epoch": 1.2839801158990416, "grad_norm": 0.6956122517585754, "learning_rate": 7.403889604018524e-06, "loss": 1.496, "mean_token_accuracy": 0.6454281061887741, "num_tokens": 1959841349.0, "step": 11688 }, { "entropy": 1.7518351475397747, "epoch": 1.2840899728104145, "grad_norm": 0.6666655540466309, "learning_rate": 7.402421900320711e-06, "loss": 1.3928, "mean_token_accuracy": 0.641091987490654, "num_tokens": 1960040280.0, "step": 11689 }, { "entropy": 1.7136612335840862, "epoch": 1.2841998297217874, "grad_norm": 0.8001027703285217, "learning_rate": 7.400954310484623e-06, "loss": 1.5009, "mean_token_accuracy": 0.6399757514397303, "num_tokens": 1960222001.0, "step": 11690 }, { "entropy": 1.6805146038532257, "epoch": 1.2843096866331603, "grad_norm": 0.760985255241394, "learning_rate": 7.399486834556706e-06, "loss": 1.2747, "mean_token_accuracy": 0.6765825847784678, "num_tokens": 1960361451.0, "step": 11691 }, { "entropy": 1.6519253353277843, "epoch": 1.2844195435445331, "grad_norm": 0.6442874670028687, "learning_rate": 7.3980194725834105e-06, "loss": 1.4785, "mean_token_accuracy": 0.6515339364608129, "num_tokens": 1960537160.0, "step": 11692 }, { "entropy": 1.7215826908747356, "epoch": 1.2845294004559062, "grad_norm": 0.8578620553016663, "learning_rate": 7.3965522246111774e-06, "loss": 1.268, "mean_token_accuracy": 0.6820251246293386, "num_tokens": 1960638199.0, "step": 11693 }, { "entropy": 1.6882357994715373, "epoch": 1.2846392573672791, "grad_norm": 0.6779175996780396, "learning_rate": 7.395085090686443e-06, "loss": 1.2499, "mean_token_accuracy": 0.6728865206241608, "num_tokens": 1960770852.0, "step": 11694 }, { "entropy": 1.6696954766909282, "epoch": 1.284749114278652, "grad_norm": 0.6538259387016296, "learning_rate": 7.3936180708556375e-06, "loss": 1.2807, "mean_token_accuracy": 0.6751231253147125, "num_tokens": 1960900887.0, "step": 11695 }, { "entropy": 1.6774700582027435, "epoch": 1.2848589711900251, "grad_norm": 0.7781380414962769, "learning_rate": 7.392151165165198e-06, "loss": 1.3951, "mean_token_accuracy": 0.6492411891619364, "num_tokens": 1961094569.0, "step": 11696 }, { "entropy": 1.6870755751927693, "epoch": 1.284968828101398, "grad_norm": 0.629217803478241, "learning_rate": 7.390684373661547e-06, "loss": 1.432, "mean_token_accuracy": 0.6549626439809799, "num_tokens": 1961376840.0, "step": 11697 }, { "entropy": 1.6291709244251251, "epoch": 1.285078685012771, "grad_norm": 0.5751771926879883, "learning_rate": 7.389217696391107e-06, "loss": 1.3228, "mean_token_accuracy": 0.6684358169635137, "num_tokens": 1961548354.0, "step": 11698 }, { "entropy": 1.727874477704366, "epoch": 1.2851885419241438, "grad_norm": 0.7103152871131897, "learning_rate": 7.387751133400303e-06, "loss": 1.5097, "mean_token_accuracy": 0.6475981076558431, "num_tokens": 1961734956.0, "step": 11699 }, { "entropy": 1.68792125582695, "epoch": 1.2852983988355167, "grad_norm": 0.6444193124771118, "learning_rate": 7.386284684735547e-06, "loss": 1.3165, "mean_token_accuracy": 0.6684761742750803, "num_tokens": 1961923277.0, "step": 11700 }, { "entropy": 1.723108321428299, "epoch": 1.2854082557468898, "grad_norm": 0.7117464542388916, "learning_rate": 7.384818350443252e-06, "loss": 1.3607, "mean_token_accuracy": 0.6554534633954366, "num_tokens": 1962108819.0, "step": 11701 }, { "entropy": 1.6910544236501057, "epoch": 1.2855181126582627, "grad_norm": 0.6509753465652466, "learning_rate": 7.38335213056983e-06, "loss": 1.2883, "mean_token_accuracy": 0.6676128606001536, "num_tokens": 1962275707.0, "step": 11702 }, { "entropy": 1.6606975098450978, "epoch": 1.2856279695696355, "grad_norm": 0.7419883608818054, "learning_rate": 7.38188602516168e-06, "loss": 1.3423, "mean_token_accuracy": 0.6589969595273336, "num_tokens": 1962462866.0, "step": 11703 }, { "entropy": 1.7654975454012554, "epoch": 1.2857378264810084, "grad_norm": 0.7047287225723267, "learning_rate": 7.380420034265205e-06, "loss": 1.4466, "mean_token_accuracy": 0.6558421750863394, "num_tokens": 1962610696.0, "step": 11704 }, { "entropy": 1.6822943886121113, "epoch": 1.2858476833923813, "grad_norm": 0.6518201231956482, "learning_rate": 7.3789541579268095e-06, "loss": 1.2855, "mean_token_accuracy": 0.6662348906199137, "num_tokens": 1962770700.0, "step": 11705 }, { "entropy": 1.758181909720103, "epoch": 1.2859575403037544, "grad_norm": 0.6527379155158997, "learning_rate": 7.377488396192882e-06, "loss": 1.4032, "mean_token_accuracy": 0.6612779349088669, "num_tokens": 1962946514.0, "step": 11706 }, { "entropy": 1.764567494392395, "epoch": 1.2860673972151273, "grad_norm": 0.7921777367591858, "learning_rate": 7.376022749109812e-06, "loss": 1.3384, "mean_token_accuracy": 0.6526055236657461, "num_tokens": 1963086122.0, "step": 11707 }, { "entropy": 1.7310861845811207, "epoch": 1.2861772541265002, "grad_norm": 0.737005889415741, "learning_rate": 7.374557216723994e-06, "loss": 1.372, "mean_token_accuracy": 0.6504655679066976, "num_tokens": 1963209728.0, "step": 11708 }, { "entropy": 1.6854293247063954, "epoch": 1.2862871110378733, "grad_norm": 0.6641037464141846, "learning_rate": 7.3730917990818015e-06, "loss": 1.4029, "mean_token_accuracy": 0.6705863028764725, "num_tokens": 1963357538.0, "step": 11709 }, { "entropy": 1.6769119401772816, "epoch": 1.2863969679492462, "grad_norm": 0.6438668966293335, "learning_rate": 7.37162649622962e-06, "loss": 1.4861, "mean_token_accuracy": 0.6491279552380244, "num_tokens": 1963582021.0, "step": 11710 }, { "entropy": 1.641738514105479, "epoch": 1.286506824860619, "grad_norm": 0.6233183145523071, "learning_rate": 7.3701613082138275e-06, "loss": 1.3751, "mean_token_accuracy": 0.6620054890712103, "num_tokens": 1963776433.0, "step": 11711 }, { "entropy": 1.6426913837591808, "epoch": 1.286616681771992, "grad_norm": 0.5799766182899475, "learning_rate": 7.368696235080792e-06, "loss": 1.372, "mean_token_accuracy": 0.6623861541350683, "num_tokens": 1963956213.0, "step": 11712 }, { "entropy": 1.7183026572068532, "epoch": 1.2867265386833648, "grad_norm": 0.681439995765686, "learning_rate": 7.367231276876885e-06, "loss": 1.455, "mean_token_accuracy": 0.655020589629809, "num_tokens": 1964135822.0, "step": 11713 }, { "entropy": 1.7211709121863048, "epoch": 1.286836395594738, "grad_norm": 0.6723782420158386, "learning_rate": 7.365766433648471e-06, "loss": 1.4048, "mean_token_accuracy": 0.6689492960770925, "num_tokens": 1964301098.0, "step": 11714 }, { "entropy": 1.7698278029759724, "epoch": 1.2869462525061108, "grad_norm": 0.77666175365448, "learning_rate": 7.3643017054419146e-06, "loss": 1.5987, "mean_token_accuracy": 0.6401151369015375, "num_tokens": 1964462937.0, "step": 11715 }, { "entropy": 1.6744122505187988, "epoch": 1.2870561094174837, "grad_norm": 0.8135389685630798, "learning_rate": 7.362837092303565e-06, "loss": 1.205, "mean_token_accuracy": 0.6760056912899017, "num_tokens": 1964577164.0, "step": 11716 }, { "entropy": 1.7008947432041168, "epoch": 1.2871659663288566, "grad_norm": 0.6217523813247681, "learning_rate": 7.361372594279785e-06, "loss": 1.3901, "mean_token_accuracy": 0.6549960921208063, "num_tokens": 1964734519.0, "step": 11717 }, { "entropy": 1.7229733566443126, "epoch": 1.2872758232402295, "grad_norm": 0.6909745335578918, "learning_rate": 7.359908211416924e-06, "loss": 1.3528, "mean_token_accuracy": 0.6632678508758545, "num_tokens": 1964878772.0, "step": 11718 }, { "entropy": 1.7443588475386302, "epoch": 1.2873856801516026, "grad_norm": 0.6048434376716614, "learning_rate": 7.358443943761326e-06, "loss": 1.461, "mean_token_accuracy": 0.6371092249949774, "num_tokens": 1965083666.0, "step": 11719 }, { "entropy": 1.7583917180697124, "epoch": 1.2874955370629755, "grad_norm": 0.6534097790718079, "learning_rate": 7.35697979135934e-06, "loss": 1.3643, "mean_token_accuracy": 0.6648856898148855, "num_tokens": 1965213053.0, "step": 11720 }, { "entropy": 1.7194798986117046, "epoch": 1.2876053939743484, "grad_norm": 0.646305501461029, "learning_rate": 7.3555157542572984e-06, "loss": 1.3896, "mean_token_accuracy": 0.6472862859567007, "num_tokens": 1965354020.0, "step": 11721 }, { "entropy": 1.675745298465093, "epoch": 1.2877152508857215, "grad_norm": 0.5743793845176697, "learning_rate": 7.354051832501541e-06, "loss": 1.3386, "mean_token_accuracy": 0.6610815872748693, "num_tokens": 1965529564.0, "step": 11722 }, { "entropy": 1.6679284969965618, "epoch": 1.2878251077970944, "grad_norm": 0.553485631942749, "learning_rate": 7.352588026138401e-06, "loss": 1.464, "mean_token_accuracy": 0.6533455202976862, "num_tokens": 1965718786.0, "step": 11723 }, { "entropy": 1.6976435681184132, "epoch": 1.2879349647084672, "grad_norm": 0.6102703809738159, "learning_rate": 7.351124335214206e-06, "loss": 1.3461, "mean_token_accuracy": 0.6602204740047455, "num_tokens": 1965910642.0, "step": 11724 }, { "entropy": 1.7120192646980286, "epoch": 1.2880448216198401, "grad_norm": 0.7902237772941589, "learning_rate": 7.349660759775283e-06, "loss": 1.1653, "mean_token_accuracy": 0.6933720608552297, "num_tokens": 1966041926.0, "step": 11725 }, { "entropy": 1.6760241091251373, "epoch": 1.288154678531213, "grad_norm": 0.6319912672042847, "learning_rate": 7.348197299867952e-06, "loss": 1.3921, "mean_token_accuracy": 0.678732305765152, "num_tokens": 1966200003.0, "step": 11726 }, { "entropy": 1.7074303428332012, "epoch": 1.2882645354425861, "grad_norm": 0.7581055164337158, "learning_rate": 7.34673395553853e-06, "loss": 1.3912, "mean_token_accuracy": 0.6616330395142237, "num_tokens": 1966353610.0, "step": 11727 }, { "entropy": 1.6789619823296864, "epoch": 1.288374392353959, "grad_norm": 0.774255096912384, "learning_rate": 7.345270726833331e-06, "loss": 1.3375, "mean_token_accuracy": 0.6750166416168213, "num_tokens": 1966490169.0, "step": 11728 }, { "entropy": 1.7094947596391041, "epoch": 1.288484249265332, "grad_norm": 0.6712803840637207, "learning_rate": 7.343807613798668e-06, "loss": 1.2826, "mean_token_accuracy": 0.6736528823773066, "num_tokens": 1966605134.0, "step": 11729 }, { "entropy": 1.6739847759405773, "epoch": 1.2885941061767048, "grad_norm": 0.5984385013580322, "learning_rate": 7.342344616480848e-06, "loss": 1.3459, "mean_token_accuracy": 0.6620316952466965, "num_tokens": 1966787815.0, "step": 11730 }, { "entropy": 1.6550920108954112, "epoch": 1.2887039630880777, "grad_norm": 0.6178155541419983, "learning_rate": 7.340881734926171e-06, "loss": 1.3412, "mean_token_accuracy": 0.6700419485569, "num_tokens": 1966997480.0, "step": 11731 }, { "entropy": 1.7782117525736492, "epoch": 1.2888138199994508, "grad_norm": 0.6943917274475098, "learning_rate": 7.339418969180938e-06, "loss": 1.5025, "mean_token_accuracy": 0.6368412226438522, "num_tokens": 1967178649.0, "step": 11732 }, { "entropy": 1.7510855495929718, "epoch": 1.2889236769108237, "grad_norm": 0.6902898550033569, "learning_rate": 7.337956319291446e-06, "loss": 1.2981, "mean_token_accuracy": 0.6604495048522949, "num_tokens": 1967292655.0, "step": 11733 }, { "entropy": 1.7116970022519429, "epoch": 1.2890335338221965, "grad_norm": 0.7756577134132385, "learning_rate": 7.336493785303986e-06, "loss": 1.3434, "mean_token_accuracy": 0.6637662698825201, "num_tokens": 1967438644.0, "step": 11734 }, { "entropy": 1.7591506739457448, "epoch": 1.2891433907335696, "grad_norm": 0.5982057452201843, "learning_rate": 7.335031367264844e-06, "loss": 1.379, "mean_token_accuracy": 0.6485430747270584, "num_tokens": 1967622935.0, "step": 11735 }, { "entropy": 1.6700083116690319, "epoch": 1.2892532476449425, "grad_norm": 0.6805751919746399, "learning_rate": 7.333569065220309e-06, "loss": 1.318, "mean_token_accuracy": 0.6725479116042455, "num_tokens": 1967792881.0, "step": 11736 }, { "entropy": 1.7045224507649739, "epoch": 1.2893631045563154, "grad_norm": 0.7660247087478638, "learning_rate": 7.332106879216667e-06, "loss": 1.4916, "mean_token_accuracy": 0.652613898118337, "num_tokens": 1967956377.0, "step": 11737 }, { "entropy": 1.6230275332927704, "epoch": 1.2894729614676883, "grad_norm": 0.6637044548988342, "learning_rate": 7.3306448093001825e-06, "loss": 1.3228, "mean_token_accuracy": 0.6681044300397238, "num_tokens": 1968087519.0, "step": 11738 }, { "entropy": 1.6863668859004974, "epoch": 1.2895828183790612, "grad_norm": 0.594308614730835, "learning_rate": 7.329182855517141e-06, "loss": 1.4027, "mean_token_accuracy": 0.6514003972212473, "num_tokens": 1968251423.0, "step": 11739 }, { "entropy": 1.6702902913093567, "epoch": 1.2896926752904343, "grad_norm": 0.693254828453064, "learning_rate": 7.327721017913805e-06, "loss": 1.2496, "mean_token_accuracy": 0.6850862701733907, "num_tokens": 1968412040.0, "step": 11740 }, { "entropy": 1.662459562222163, "epoch": 1.2898025322018072, "grad_norm": 0.7011620402336121, "learning_rate": 7.326259296536442e-06, "loss": 1.5088, "mean_token_accuracy": 0.6396182477474213, "num_tokens": 1968616860.0, "step": 11741 }, { "entropy": 1.737975647052129, "epoch": 1.28991238911318, "grad_norm": 0.8035622239112854, "learning_rate": 7.32479769143132e-06, "loss": 1.2819, "mean_token_accuracy": 0.6790835956732432, "num_tokens": 1968740375.0, "step": 11742 }, { "entropy": 1.730758676926295, "epoch": 1.290022246024553, "grad_norm": 0.6591079831123352, "learning_rate": 7.323336202644698e-06, "loss": 1.3442, "mean_token_accuracy": 0.6591987013816833, "num_tokens": 1968923933.0, "step": 11743 }, { "entropy": 1.756201942761739, "epoch": 1.2901321029359258, "grad_norm": 0.6940193176269531, "learning_rate": 7.3218748302228236e-06, "loss": 1.5993, "mean_token_accuracy": 0.6265199581782023, "num_tokens": 1969109185.0, "step": 11744 }, { "entropy": 1.6942794720331829, "epoch": 1.290241959847299, "grad_norm": 0.7570939660072327, "learning_rate": 7.320413574211955e-06, "loss": 1.3312, "mean_token_accuracy": 0.6817633907000223, "num_tokens": 1969221453.0, "step": 11745 }, { "entropy": 1.6586816012859344, "epoch": 1.2903518167586718, "grad_norm": 0.6691310405731201, "learning_rate": 7.31895243465834e-06, "loss": 1.3373, "mean_token_accuracy": 0.6555156062046686, "num_tokens": 1969369427.0, "step": 11746 }, { "entropy": 1.7016185621420543, "epoch": 1.2904616736700447, "grad_norm": 0.6833151578903198, "learning_rate": 7.317491411608217e-06, "loss": 1.4382, "mean_token_accuracy": 0.6552936285734177, "num_tokens": 1969538724.0, "step": 11747 }, { "entropy": 1.6746133367220561, "epoch": 1.2905715305814178, "grad_norm": 0.5930050611495972, "learning_rate": 7.316030505107834e-06, "loss": 1.4735, "mean_token_accuracy": 0.6479932516813278, "num_tokens": 1969761041.0, "step": 11748 }, { "entropy": 1.6806008915106456, "epoch": 1.2906813874927907, "grad_norm": 0.7082892060279846, "learning_rate": 7.314569715203428e-06, "loss": 1.3426, "mean_token_accuracy": 0.6537379374106725, "num_tokens": 1969892525.0, "step": 11749 }, { "entropy": 1.784896006186803, "epoch": 1.2907912444041636, "grad_norm": 0.7892354726791382, "learning_rate": 7.3131090419412285e-06, "loss": 1.4915, "mean_token_accuracy": 0.6374368518590927, "num_tokens": 1970080563.0, "step": 11750 }, { "entropy": 1.6796510914961498, "epoch": 1.2909011013155365, "grad_norm": 0.6004863381385803, "learning_rate": 7.311648485367464e-06, "loss": 1.412, "mean_token_accuracy": 0.6581088254849116, "num_tokens": 1970248705.0, "step": 11751 }, { "entropy": 1.7254607180754344, "epoch": 1.2910109582269094, "grad_norm": 0.6974371075630188, "learning_rate": 7.310188045528368e-06, "loss": 1.4514, "mean_token_accuracy": 0.6568728238344193, "num_tokens": 1970419431.0, "step": 11752 }, { "entropy": 1.7045761744181316, "epoch": 1.2911208151382825, "grad_norm": 2.843557119369507, "learning_rate": 7.308727722470153e-06, "loss": 1.4688, "mean_token_accuracy": 0.6505365371704102, "num_tokens": 1970581754.0, "step": 11753 }, { "entropy": 1.7577880720297496, "epoch": 1.2912306720496554, "grad_norm": 0.9117422699928284, "learning_rate": 7.307267516239043e-06, "loss": 1.4026, "mean_token_accuracy": 0.6464939614137014, "num_tokens": 1970743224.0, "step": 11754 }, { "entropy": 1.7608660360177357, "epoch": 1.2913405289610282, "grad_norm": 0.7385045289993286, "learning_rate": 7.305807426881255e-06, "loss": 1.324, "mean_token_accuracy": 0.6659322182337443, "num_tokens": 1970867328.0, "step": 11755 }, { "entropy": 1.7018007536729176, "epoch": 1.2914503858724011, "grad_norm": 0.5788907408714294, "learning_rate": 7.304347454442992e-06, "loss": 1.2803, "mean_token_accuracy": 0.6538551598787308, "num_tokens": 1971134236.0, "step": 11756 }, { "entropy": 1.756419579188029, "epoch": 1.291560242783774, "grad_norm": 0.619351327419281, "learning_rate": 7.302887598970472e-06, "loss": 1.3685, "mean_token_accuracy": 0.6473907629648844, "num_tokens": 1971335443.0, "step": 11757 }, { "entropy": 1.7158561150232952, "epoch": 1.2916700996951471, "grad_norm": 1.6634712219238281, "learning_rate": 7.3014278605098934e-06, "loss": 1.2694, "mean_token_accuracy": 0.6615792512893677, "num_tokens": 1971562350.0, "step": 11758 }, { "entropy": 1.6589511632919312, "epoch": 1.29177995660652, "grad_norm": 0.6881945133209229, "learning_rate": 7.299968239107451e-06, "loss": 1.3316, "mean_token_accuracy": 0.6625782549381256, "num_tokens": 1971746704.0, "step": 11759 }, { "entropy": 1.7037721276283264, "epoch": 1.291889813517893, "grad_norm": 0.6696583032608032, "learning_rate": 7.298508734809351e-06, "loss": 1.4161, "mean_token_accuracy": 0.6640026867389679, "num_tokens": 1971956211.0, "step": 11760 }, { "entropy": 1.7227367758750916, "epoch": 1.291999670429266, "grad_norm": 0.6352359652519226, "learning_rate": 7.297049347661782e-06, "loss": 1.5067, "mean_token_accuracy": 0.6483261436223984, "num_tokens": 1972162108.0, "step": 11761 }, { "entropy": 1.7867354949315388, "epoch": 1.2921095273406389, "grad_norm": 0.6077547073364258, "learning_rate": 7.29559007771093e-06, "loss": 1.4752, "mean_token_accuracy": 0.6493107676506042, "num_tokens": 1972309946.0, "step": 11762 }, { "entropy": 1.7437163889408112, "epoch": 1.2922193842520118, "grad_norm": 0.7125455141067505, "learning_rate": 7.2941309250029845e-06, "loss": 1.4619, "mean_token_accuracy": 0.6622959723075231, "num_tokens": 1972500940.0, "step": 11763 }, { "entropy": 1.7352955440680187, "epoch": 1.2923292411633847, "grad_norm": 0.5929360389709473, "learning_rate": 7.2926718895841246e-06, "loss": 1.4278, "mean_token_accuracy": 0.646904394030571, "num_tokens": 1972691170.0, "step": 11764 }, { "entropy": 1.7026380797227223, "epoch": 1.2924390980747575, "grad_norm": 0.6624311804771423, "learning_rate": 7.291212971500527e-06, "loss": 1.3807, "mean_token_accuracy": 0.6627266258001328, "num_tokens": 1972854592.0, "step": 11765 }, { "entropy": 1.6698378721872966, "epoch": 1.2925489549861306, "grad_norm": 0.7162081599235535, "learning_rate": 7.289754170798369e-06, "loss": 1.3603, "mean_token_accuracy": 0.6661340196927389, "num_tokens": 1973042979.0, "step": 11766 }, { "entropy": 1.6535949905713399, "epoch": 1.2926588118975035, "grad_norm": 0.6275128722190857, "learning_rate": 7.288295487523822e-06, "loss": 1.2839, "mean_token_accuracy": 0.6786264330148697, "num_tokens": 1973170895.0, "step": 11767 }, { "entropy": 1.732323278983434, "epoch": 1.2927686688088764, "grad_norm": 0.8781585097312927, "learning_rate": 7.286836921723048e-06, "loss": 1.36, "mean_token_accuracy": 0.6611862430969874, "num_tokens": 1973331867.0, "step": 11768 }, { "entropy": 1.7570100327332814, "epoch": 1.2928785257202493, "grad_norm": 0.7007432579994202, "learning_rate": 7.2853784734422155e-06, "loss": 1.4099, "mean_token_accuracy": 0.6472747921943665, "num_tokens": 1973490229.0, "step": 11769 }, { "entropy": 1.711938053369522, "epoch": 1.2929883826316222, "grad_norm": 0.6697954535484314, "learning_rate": 7.283920142727479e-06, "loss": 1.5415, "mean_token_accuracy": 0.634870320558548, "num_tokens": 1973666134.0, "step": 11770 }, { "entropy": 1.6825863222281139, "epoch": 1.2930982395429953, "grad_norm": 0.7022350430488586, "learning_rate": 7.282461929624991e-06, "loss": 1.2739, "mean_token_accuracy": 0.664794052640597, "num_tokens": 1973774886.0, "step": 11771 }, { "entropy": 1.7592849830786388, "epoch": 1.2932080964543682, "grad_norm": 0.7243680953979492, "learning_rate": 7.2810038341809105e-06, "loss": 1.4997, "mean_token_accuracy": 0.6491112063328425, "num_tokens": 1973920195.0, "step": 11772 }, { "entropy": 1.6567996442317963, "epoch": 1.293317953365741, "grad_norm": 0.7638998031616211, "learning_rate": 7.279545856441385e-06, "loss": 1.2132, "mean_token_accuracy": 0.6840778191884359, "num_tokens": 1974044070.0, "step": 11773 }, { "entropy": 1.7306942145029705, "epoch": 1.2934278102771142, "grad_norm": 0.7598798274993896, "learning_rate": 7.278087996452554e-06, "loss": 1.4113, "mean_token_accuracy": 0.6556040098269781, "num_tokens": 1974195242.0, "step": 11774 }, { "entropy": 1.7199226518472035, "epoch": 1.293537667188487, "grad_norm": 0.7280505299568176, "learning_rate": 7.2766302542605615e-06, "loss": 1.4192, "mean_token_accuracy": 0.6484815229972204, "num_tokens": 1974355884.0, "step": 11775 }, { "entropy": 1.7219727238019307, "epoch": 1.29364752409986, "grad_norm": 0.8648471236228943, "learning_rate": 7.275172629911546e-06, "loss": 1.4423, "mean_token_accuracy": 0.6569175471862158, "num_tokens": 1974466512.0, "step": 11776 }, { "entropy": 1.757084995508194, "epoch": 1.2937573810112328, "grad_norm": 0.7437247037887573, "learning_rate": 7.2737151234516365e-06, "loss": 1.3887, "mean_token_accuracy": 0.6547428021828333, "num_tokens": 1974621651.0, "step": 11777 }, { "entropy": 1.7222739160060883, "epoch": 1.2938672379226057, "grad_norm": 0.682804524898529, "learning_rate": 7.2722577349269615e-06, "loss": 1.4023, "mean_token_accuracy": 0.6509612699349722, "num_tokens": 1974790569.0, "step": 11778 }, { "entropy": 1.6923163831233978, "epoch": 1.2939770948339788, "grad_norm": 0.7132525444030762, "learning_rate": 7.270800464383654e-06, "loss": 1.3487, "mean_token_accuracy": 0.6630134681860606, "num_tokens": 1974908362.0, "step": 11779 }, { "entropy": 1.7271219789981842, "epoch": 1.2940869517453517, "grad_norm": 0.6374887824058533, "learning_rate": 7.269343311867829e-06, "loss": 1.4675, "mean_token_accuracy": 0.6458124866088232, "num_tokens": 1975049557.0, "step": 11780 }, { "entropy": 1.7077939212322235, "epoch": 1.2941968086567246, "grad_norm": 0.6350587010383606, "learning_rate": 7.2678862774256065e-06, "loss": 1.337, "mean_token_accuracy": 0.6723198741674423, "num_tokens": 1975191967.0, "step": 11781 }, { "entropy": 1.7040321032206218, "epoch": 1.2943066655680975, "grad_norm": 0.6252678036689758, "learning_rate": 7.266429361103105e-06, "loss": 1.3758, "mean_token_accuracy": 0.6479005714257559, "num_tokens": 1975354337.0, "step": 11782 }, { "entropy": 1.7597824732462566, "epoch": 1.2944165224794704, "grad_norm": 0.8166074752807617, "learning_rate": 7.264972562946428e-06, "loss": 1.481, "mean_token_accuracy": 0.6362377305825552, "num_tokens": 1975526565.0, "step": 11783 }, { "entropy": 1.6754455467065175, "epoch": 1.2945263793908435, "grad_norm": 0.736605703830719, "learning_rate": 7.263515883001686e-06, "loss": 1.2794, "mean_token_accuracy": 0.671693374713262, "num_tokens": 1975678286.0, "step": 11784 }, { "entropy": 1.6512251496315002, "epoch": 1.2946362363022164, "grad_norm": 9.648917198181152, "learning_rate": 7.2620593213149874e-06, "loss": 1.3817, "mean_token_accuracy": 0.6627425750096639, "num_tokens": 1975887204.0, "step": 11785 }, { "entropy": 1.7003162701924641, "epoch": 1.2947460932135892, "grad_norm": 0.7399555444717407, "learning_rate": 7.260602877932421e-06, "loss": 1.3761, "mean_token_accuracy": 0.6608262062072754, "num_tokens": 1976048216.0, "step": 11786 }, { "entropy": 1.7178180714448292, "epoch": 1.2948559501249624, "grad_norm": 0.7446051239967346, "learning_rate": 7.259146552900094e-06, "loss": 1.4646, "mean_token_accuracy": 0.6506668627262115, "num_tokens": 1976241273.0, "step": 11787 }, { "entropy": 1.7054597040017445, "epoch": 1.2949658070363352, "grad_norm": 0.6272408366203308, "learning_rate": 7.25769034626409e-06, "loss": 1.2619, "mean_token_accuracy": 0.6702099094788233, "num_tokens": 1976386664.0, "step": 11788 }, { "entropy": 1.7981916566689808, "epoch": 1.2950756639477081, "grad_norm": 0.5923606157302856, "learning_rate": 7.256234258070501e-06, "loss": 1.5048, "mean_token_accuracy": 0.6292905509471893, "num_tokens": 1976617356.0, "step": 11789 }, { "entropy": 1.7312207321325939, "epoch": 1.295185520859081, "grad_norm": 0.6586351990699768, "learning_rate": 7.254778288365411e-06, "loss": 1.4734, "mean_token_accuracy": 0.6350182294845581, "num_tokens": 1976838422.0, "step": 11790 }, { "entropy": 1.760515828927358, "epoch": 1.295295377770454, "grad_norm": 0.7017537951469421, "learning_rate": 7.253322437194901e-06, "loss": 1.5208, "mean_token_accuracy": 0.6316369622945786, "num_tokens": 1977010019.0, "step": 11791 }, { "entropy": 1.666582852602005, "epoch": 1.295405234681827, "grad_norm": 0.5920802354812622, "learning_rate": 7.251866704605042e-06, "loss": 1.2817, "mean_token_accuracy": 0.6713870366414388, "num_tokens": 1977141644.0, "step": 11792 }, { "entropy": 1.7240260044733684, "epoch": 1.2955150915931999, "grad_norm": 0.6175614595413208, "learning_rate": 7.25041109064192e-06, "loss": 1.3674, "mean_token_accuracy": 0.6563980529705683, "num_tokens": 1977305533.0, "step": 11793 }, { "entropy": 1.7184670567512512, "epoch": 1.2956249485045728, "grad_norm": 0.8212663531303406, "learning_rate": 7.248955595351592e-06, "loss": 1.3066, "mean_token_accuracy": 0.6645220468441645, "num_tokens": 1977456661.0, "step": 11794 }, { "entropy": 1.6388721764087677, "epoch": 1.2957348054159457, "grad_norm": 0.743337869644165, "learning_rate": 7.2475002187801345e-06, "loss": 1.3813, "mean_token_accuracy": 0.6679724355538686, "num_tokens": 1977638102.0, "step": 11795 }, { "entropy": 1.7348832388718922, "epoch": 1.2958446623273185, "grad_norm": 0.7091452479362488, "learning_rate": 7.246044960973602e-06, "loss": 1.3269, "mean_token_accuracy": 0.6580028831958771, "num_tokens": 1977794927.0, "step": 11796 }, { "entropy": 1.7408220171928406, "epoch": 1.2959545192386916, "grad_norm": 0.5987675189971924, "learning_rate": 7.244589821978052e-06, "loss": 1.1792, "mean_token_accuracy": 0.6719856162865957, "num_tokens": 1977968569.0, "step": 11797 }, { "entropy": 1.7049931287765503, "epoch": 1.2960643761500645, "grad_norm": 0.8301398158073425, "learning_rate": 7.243134801839544e-06, "loss": 1.3704, "mean_token_accuracy": 0.6715128173430761, "num_tokens": 1978133116.0, "step": 11798 }, { "entropy": 1.7279678384462993, "epoch": 1.2961742330614374, "grad_norm": 0.7465829849243164, "learning_rate": 7.24167990060413e-06, "loss": 1.3674, "mean_token_accuracy": 0.6562019089857737, "num_tokens": 1978285874.0, "step": 11799 }, { "entropy": 1.662907858689626, "epoch": 1.2962840899728105, "grad_norm": 0.744123101234436, "learning_rate": 7.240225118317847e-06, "loss": 1.5223, "mean_token_accuracy": 0.6506157964468002, "num_tokens": 1978461876.0, "step": 11800 }, { "entropy": 1.7489655017852783, "epoch": 1.2963939468841834, "grad_norm": 0.7728520631790161, "learning_rate": 7.238770455026747e-06, "loss": 1.3505, "mean_token_accuracy": 0.6687692006429037, "num_tokens": 1978591043.0, "step": 11801 }, { "entropy": 1.7328561941782634, "epoch": 1.2965038037955563, "grad_norm": 0.707901120185852, "learning_rate": 7.237315910776872e-06, "loss": 1.4924, "mean_token_accuracy": 0.6454095045725504, "num_tokens": 1978750688.0, "step": 11802 }, { "entropy": 1.6640961865584056, "epoch": 1.2966136607069292, "grad_norm": 0.9605063796043396, "learning_rate": 7.235861485614248e-06, "loss": 1.2226, "mean_token_accuracy": 0.6842072506745657, "num_tokens": 1978910145.0, "step": 11803 }, { "entropy": 1.703210969765981, "epoch": 1.296723517618302, "grad_norm": 0.6342226266860962, "learning_rate": 7.234407179584912e-06, "loss": 1.3948, "mean_token_accuracy": 0.6613224347432455, "num_tokens": 1979095563.0, "step": 11804 }, { "entropy": 1.6607285638650258, "epoch": 1.2968333745296752, "grad_norm": 0.8452777862548828, "learning_rate": 7.2329529927348966e-06, "loss": 1.211, "mean_token_accuracy": 0.6795276602109274, "num_tokens": 1979225266.0, "step": 11805 }, { "entropy": 1.7239519755045574, "epoch": 1.296943231441048, "grad_norm": 0.6554011106491089, "learning_rate": 7.231498925110214e-06, "loss": 1.3701, "mean_token_accuracy": 0.6555942744016647, "num_tokens": 1979419579.0, "step": 11806 }, { "entropy": 1.7264830768108368, "epoch": 1.297053088352421, "grad_norm": 0.7340265512466431, "learning_rate": 7.230044976756898e-06, "loss": 1.6073, "mean_token_accuracy": 0.6350849618514379, "num_tokens": 1979619520.0, "step": 11807 }, { "entropy": 1.6583397487799327, "epoch": 1.2971629452637938, "grad_norm": 0.6471593379974365, "learning_rate": 7.2285911477209604e-06, "loss": 1.3271, "mean_token_accuracy": 0.6611123780409495, "num_tokens": 1979757550.0, "step": 11808 }, { "entropy": 1.7062188585599263, "epoch": 1.2972728021751667, "grad_norm": 0.7513505220413208, "learning_rate": 7.227137438048411e-06, "loss": 1.4257, "mean_token_accuracy": 0.6616204331318537, "num_tokens": 1979909535.0, "step": 11809 }, { "entropy": 1.7708937724431355, "epoch": 1.2973826590865398, "grad_norm": 1.0161291360855103, "learning_rate": 7.225683847785261e-06, "loss": 1.4273, "mean_token_accuracy": 0.6494510521491369, "num_tokens": 1980093310.0, "step": 11810 }, { "entropy": 1.7270852228005726, "epoch": 1.2974925159979127, "grad_norm": 1.1134231090545654, "learning_rate": 7.224230376977519e-06, "loss": 1.5241, "mean_token_accuracy": 0.655661274989446, "num_tokens": 1980280004.0, "step": 11811 }, { "entropy": 1.6724676191806793, "epoch": 1.2976023729092856, "grad_norm": 0.6491298079490662, "learning_rate": 7.222777025671182e-06, "loss": 1.3062, "mean_token_accuracy": 0.6757878363132477, "num_tokens": 1980485888.0, "step": 11812 }, { "entropy": 1.7291560967763264, "epoch": 1.2977122298206587, "grad_norm": 0.6538869738578796, "learning_rate": 7.221323793912247e-06, "loss": 1.4369, "mean_token_accuracy": 0.6460357258717219, "num_tokens": 1980665077.0, "step": 11813 }, { "entropy": 1.6848385234673817, "epoch": 1.2978220867320316, "grad_norm": 0.704898476600647, "learning_rate": 7.219870681746717e-06, "loss": 1.4264, "mean_token_accuracy": 0.6565316567818323, "num_tokens": 1980810801.0, "step": 11814 }, { "entropy": 1.6930330594380696, "epoch": 1.2979319436434045, "grad_norm": 0.7854775786399841, "learning_rate": 7.218417689220576e-06, "loss": 1.4475, "mean_token_accuracy": 0.6585629632075628, "num_tokens": 1980944906.0, "step": 11815 }, { "entropy": 1.6579373677571614, "epoch": 1.2980418005547774, "grad_norm": 0.697228729724884, "learning_rate": 7.216964816379805e-06, "loss": 1.3587, "mean_token_accuracy": 0.6618794798851013, "num_tokens": 1981144140.0, "step": 11816 }, { "entropy": 1.634168028831482, "epoch": 1.2981516574661502, "grad_norm": 0.6177424788475037, "learning_rate": 7.2155120632704e-06, "loss": 1.5157, "mean_token_accuracy": 0.6338949004809061, "num_tokens": 1981412853.0, "step": 11817 }, { "entropy": 1.7243984242280324, "epoch": 1.2982615143775234, "grad_norm": 0.6800485849380493, "learning_rate": 7.214059429938329e-06, "loss": 1.5578, "mean_token_accuracy": 0.64493028819561, "num_tokens": 1981563675.0, "step": 11818 }, { "entropy": 1.728604664405187, "epoch": 1.2983713712888962, "grad_norm": 0.7974774837493896, "learning_rate": 7.212606916429572e-06, "loss": 1.2656, "mean_token_accuracy": 0.6728243281443914, "num_tokens": 1981676374.0, "step": 11819 }, { "entropy": 1.7148986756801605, "epoch": 1.2984812282002691, "grad_norm": 0.5727463364601135, "learning_rate": 7.211154522790103e-06, "loss": 1.4279, "mean_token_accuracy": 0.638765682776769, "num_tokens": 1981861170.0, "step": 11820 }, { "entropy": 1.6881239612897236, "epoch": 1.298591085111642, "grad_norm": 0.7453581094741821, "learning_rate": 7.2097022490658795e-06, "loss": 1.3699, "mean_token_accuracy": 0.6761754850546519, "num_tokens": 1982007898.0, "step": 11821 }, { "entropy": 1.6406256258487701, "epoch": 1.298700942023015, "grad_norm": 0.5940481424331665, "learning_rate": 7.208250095302878e-06, "loss": 1.3569, "mean_token_accuracy": 0.6683052430550257, "num_tokens": 1982173648.0, "step": 11822 }, { "entropy": 1.6838708420594533, "epoch": 1.298810798934388, "grad_norm": 0.651563286781311, "learning_rate": 7.206798061547049e-06, "loss": 1.5306, "mean_token_accuracy": 0.6416983604431152, "num_tokens": 1982331870.0, "step": 11823 }, { "entropy": 1.7058892448743184, "epoch": 1.2989206558457609, "grad_norm": 0.6198409795761108, "learning_rate": 7.205346147844352e-06, "loss": 1.3637, "mean_token_accuracy": 0.6558132419983546, "num_tokens": 1982532371.0, "step": 11824 }, { "entropy": 1.7239612738291423, "epoch": 1.2990305127571338, "grad_norm": 0.7450637221336365, "learning_rate": 7.203894354240737e-06, "loss": 1.43, "mean_token_accuracy": 0.6622414539257685, "num_tokens": 1982696214.0, "step": 11825 }, { "entropy": 1.7213209768136342, "epoch": 1.2991403696685069, "grad_norm": 0.6577510237693787, "learning_rate": 7.20244268078216e-06, "loss": 1.3634, "mean_token_accuracy": 0.6626957158247629, "num_tokens": 1982828315.0, "step": 11826 }, { "entropy": 1.7126743793487549, "epoch": 1.2992502265798798, "grad_norm": 0.6911088228225708, "learning_rate": 7.2009911275145605e-06, "loss": 1.3489, "mean_token_accuracy": 0.6616120487451553, "num_tokens": 1982974989.0, "step": 11827 }, { "entropy": 1.742800772190094, "epoch": 1.2993600834912526, "grad_norm": 0.7396600246429443, "learning_rate": 7.1995396944838765e-06, "loss": 1.4829, "mean_token_accuracy": 0.638206327954928, "num_tokens": 1983151283.0, "step": 11828 }, { "entropy": 1.6596211989720662, "epoch": 1.2994699404026255, "grad_norm": 2.785015821456909, "learning_rate": 7.198088381736053e-06, "loss": 1.3438, "mean_token_accuracy": 0.6643916070461273, "num_tokens": 1983335044.0, "step": 11829 }, { "entropy": 1.6466420888900757, "epoch": 1.2995797973139984, "grad_norm": 0.5830391049385071, "learning_rate": 7.196637189317015e-06, "loss": 1.4721, "mean_token_accuracy": 0.6433140188455582, "num_tokens": 1983568953.0, "step": 11830 }, { "entropy": 1.7181233763694763, "epoch": 1.2996896542253715, "grad_norm": 0.8077186942100525, "learning_rate": 7.1951861172726985e-06, "loss": 1.2321, "mean_token_accuracy": 0.6756879289944967, "num_tokens": 1983674728.0, "step": 11831 }, { "entropy": 1.6590841114521027, "epoch": 1.2997995111367444, "grad_norm": 0.5984413027763367, "learning_rate": 7.193735165649027e-06, "loss": 1.5037, "mean_token_accuracy": 0.6605504155158997, "num_tokens": 1983866230.0, "step": 11832 }, { "entropy": 1.7355755269527435, "epoch": 1.2999093680481173, "grad_norm": 0.7825373411178589, "learning_rate": 7.192284334491919e-06, "loss": 1.3951, "mean_token_accuracy": 0.6593762536843618, "num_tokens": 1984017499.0, "step": 11833 }, { "entropy": 1.6846800744533539, "epoch": 1.3000192249594902, "grad_norm": 0.6214932799339294, "learning_rate": 7.190833623847302e-06, "loss": 1.2622, "mean_token_accuracy": 0.6819400539000829, "num_tokens": 1984194774.0, "step": 11834 }, { "entropy": 1.6585146188735962, "epoch": 1.300129081870863, "grad_norm": 0.5928328037261963, "learning_rate": 7.189383033761082e-06, "loss": 1.4513, "mean_token_accuracy": 0.6382601261138916, "num_tokens": 1984450421.0, "step": 11835 }, { "entropy": 1.6726765831311543, "epoch": 1.3002389387822362, "grad_norm": 0.6932438015937805, "learning_rate": 7.187932564279168e-06, "loss": 1.4706, "mean_token_accuracy": 0.6601354628801346, "num_tokens": 1984578455.0, "step": 11836 }, { "entropy": 1.7142049372196198, "epoch": 1.300348795693609, "grad_norm": 0.7112865447998047, "learning_rate": 7.186482215447472e-06, "loss": 1.5127, "mean_token_accuracy": 0.6401646981636683, "num_tokens": 1984793396.0, "step": 11837 }, { "entropy": 1.7584912180900574, "epoch": 1.300458652604982, "grad_norm": 0.7246370315551758, "learning_rate": 7.185031987311899e-06, "loss": 1.562, "mean_token_accuracy": 0.625005453824997, "num_tokens": 1984962832.0, "step": 11838 }, { "entropy": 1.6865639090538025, "epoch": 1.300568509516355, "grad_norm": 0.6186059713363647, "learning_rate": 7.183581879918344e-06, "loss": 1.5017, "mean_token_accuracy": 0.6512916932503382, "num_tokens": 1985164889.0, "step": 11839 }, { "entropy": 1.7075908879439037, "epoch": 1.300678366427728, "grad_norm": 0.6377032399177551, "learning_rate": 7.182131893312698e-06, "loss": 1.5451, "mean_token_accuracy": 0.6384754379590353, "num_tokens": 1985344290.0, "step": 11840 }, { "entropy": 1.6983333627382915, "epoch": 1.3007882233391008, "grad_norm": 0.7583761811256409, "learning_rate": 7.180682027540864e-06, "loss": 1.4311, "mean_token_accuracy": 0.6552286992470423, "num_tokens": 1985503791.0, "step": 11841 }, { "entropy": 1.6771070162455242, "epoch": 1.3008980802504737, "grad_norm": 0.7743870615959167, "learning_rate": 7.179232282648716e-06, "loss": 1.3962, "mean_token_accuracy": 0.6450282633304596, "num_tokens": 1985677205.0, "step": 11842 }, { "entropy": 1.6850054661432903, "epoch": 1.3010079371618466, "grad_norm": 0.6330224871635437, "learning_rate": 7.177782658682148e-06, "loss": 1.3411, "mean_token_accuracy": 0.6549698412418365, "num_tokens": 1985854174.0, "step": 11843 }, { "entropy": 1.7192271451155345, "epoch": 1.3011177940732197, "grad_norm": 0.7451735734939575, "learning_rate": 7.176333155687039e-06, "loss": 1.4255, "mean_token_accuracy": 0.6647992481788, "num_tokens": 1986045909.0, "step": 11844 }, { "entropy": 1.705276260773341, "epoch": 1.3012276509845926, "grad_norm": 0.7818323373794556, "learning_rate": 7.174883773709258e-06, "loss": 1.412, "mean_token_accuracy": 0.66334301729997, "num_tokens": 1986222420.0, "step": 11845 }, { "entropy": 1.7389629483222961, "epoch": 1.3013375078959655, "grad_norm": 0.7498189210891724, "learning_rate": 7.173434512794686e-06, "loss": 1.3499, "mean_token_accuracy": 0.656540701786677, "num_tokens": 1986418388.0, "step": 11846 }, { "entropy": 1.6752577722072601, "epoch": 1.3014473648073386, "grad_norm": 0.680406391620636, "learning_rate": 7.171985372989185e-06, "loss": 1.4032, "mean_token_accuracy": 0.6697799315055212, "num_tokens": 1986620372.0, "step": 11847 }, { "entropy": 1.7210516333580017, "epoch": 1.3015572217187112, "grad_norm": 0.776543378829956, "learning_rate": 7.170536354338622e-06, "loss": 1.3586, "mean_token_accuracy": 0.6546121736367544, "num_tokens": 1986743025.0, "step": 11848 }, { "entropy": 1.7157737612724304, "epoch": 1.3016670786300844, "grad_norm": 0.681416928768158, "learning_rate": 7.169087456888859e-06, "loss": 1.2704, "mean_token_accuracy": 0.6695546756188074, "num_tokens": 1986896242.0, "step": 11849 }, { "entropy": 1.7309301495552063, "epoch": 1.3017769355414572, "grad_norm": 0.6461417078971863, "learning_rate": 7.167638680685749e-06, "loss": 1.2813, "mean_token_accuracy": 0.6683636407057444, "num_tokens": 1987045305.0, "step": 11850 }, { "entropy": 1.697464495897293, "epoch": 1.3018867924528301, "grad_norm": 0.6775514483451843, "learning_rate": 7.16619002577515e-06, "loss": 1.4658, "mean_token_accuracy": 0.6477470993995667, "num_tokens": 1987223981.0, "step": 11851 }, { "entropy": 1.7292206982771556, "epoch": 1.3019966493642032, "grad_norm": 0.7780271172523499, "learning_rate": 7.164741492202911e-06, "loss": 1.4561, "mean_token_accuracy": 0.6541826476653417, "num_tokens": 1987371081.0, "step": 11852 }, { "entropy": 1.6961702009042103, "epoch": 1.3021065062755761, "grad_norm": 0.7289796471595764, "learning_rate": 7.163293080014872e-06, "loss": 1.2561, "mean_token_accuracy": 0.6818042149146398, "num_tokens": 1987473964.0, "step": 11853 }, { "entropy": 1.6992531319459279, "epoch": 1.302216363186949, "grad_norm": 0.85200035572052, "learning_rate": 7.161844789256882e-06, "loss": 1.2384, "mean_token_accuracy": 0.6741587022940317, "num_tokens": 1987601010.0, "step": 11854 }, { "entropy": 1.7139351069927216, "epoch": 1.3023262200983219, "grad_norm": 1.0030229091644287, "learning_rate": 7.160396619974772e-06, "loss": 1.3984, "mean_token_accuracy": 0.6595780551433563, "num_tokens": 1987758086.0, "step": 11855 }, { "entropy": 1.7854057649771373, "epoch": 1.3024360770096948, "grad_norm": 0.6239326000213623, "learning_rate": 7.158948572214377e-06, "loss": 1.4072, "mean_token_accuracy": 0.6497927755117416, "num_tokens": 1987956125.0, "step": 11856 }, { "entropy": 1.6467955509821575, "epoch": 1.3025459339210679, "grad_norm": 0.621613085269928, "learning_rate": 7.157500646021529e-06, "loss": 1.3393, "mean_token_accuracy": 0.6661281585693359, "num_tokens": 1988127619.0, "step": 11857 }, { "entropy": 1.6565412779649098, "epoch": 1.3026557908324408, "grad_norm": 0.7688978314399719, "learning_rate": 7.156052841442058e-06, "loss": 1.3215, "mean_token_accuracy": 0.6778760701417923, "num_tokens": 1988347207.0, "step": 11858 }, { "entropy": 1.756882220506668, "epoch": 1.3027656477438136, "grad_norm": 0.7031749486923218, "learning_rate": 7.154605158521784e-06, "loss": 1.473, "mean_token_accuracy": 0.6567084838946661, "num_tokens": 1988503513.0, "step": 11859 }, { "entropy": 1.647767146428426, "epoch": 1.3028755046551868, "grad_norm": 0.7281495928764343, "learning_rate": 7.153157597306517e-06, "loss": 1.3981, "mean_token_accuracy": 0.6550866365432739, "num_tokens": 1988676182.0, "step": 11860 }, { "entropy": 1.7764273285865784, "epoch": 1.3029853615665594, "grad_norm": 0.7380655407905579, "learning_rate": 7.1517101578420845e-06, "loss": 1.3915, "mean_token_accuracy": 0.6554965376853943, "num_tokens": 1988784119.0, "step": 11861 }, { "entropy": 1.710543821255366, "epoch": 1.3030952184779325, "grad_norm": 0.6946497559547424, "learning_rate": 7.150262840174287e-06, "loss": 1.4343, "mean_token_accuracy": 0.6613740076621374, "num_tokens": 1988976260.0, "step": 11862 }, { "entropy": 1.6800654629866283, "epoch": 1.3032050753893054, "grad_norm": 0.7925371527671814, "learning_rate": 7.148815644348939e-06, "loss": 1.4201, "mean_token_accuracy": 0.6524600485960642, "num_tokens": 1989116149.0, "step": 11863 }, { "entropy": 1.6714057624340057, "epoch": 1.3033149323006783, "grad_norm": 0.7147430777549744, "learning_rate": 7.1473685704118415e-06, "loss": 1.4243, "mean_token_accuracy": 0.6460753281911215, "num_tokens": 1989292509.0, "step": 11864 }, { "entropy": 1.6726448833942413, "epoch": 1.3034247892120514, "grad_norm": 0.8853915929794312, "learning_rate": 7.145921618408789e-06, "loss": 1.4295, "mean_token_accuracy": 0.6527641713619232, "num_tokens": 1989445522.0, "step": 11865 }, { "entropy": 1.6634302536646526, "epoch": 1.3035346461234243, "grad_norm": 0.8797194361686707, "learning_rate": 7.1444747883855825e-06, "loss": 1.441, "mean_token_accuracy": 0.6599002232154211, "num_tokens": 1989603818.0, "step": 11866 }, { "entropy": 1.733254959185918, "epoch": 1.3036445030347972, "grad_norm": 0.8028691411018372, "learning_rate": 7.1430280803880125e-06, "loss": 1.2522, "mean_token_accuracy": 0.6718244006236395, "num_tokens": 1989737439.0, "step": 11867 }, { "entropy": 1.691257268190384, "epoch": 1.30375435994617, "grad_norm": 0.6741119027137756, "learning_rate": 7.1415814944618646e-06, "loss": 1.4412, "mean_token_accuracy": 0.6452458004156748, "num_tokens": 1989925558.0, "step": 11868 }, { "entropy": 1.702443500359853, "epoch": 1.303864216857543, "grad_norm": 0.8861745595932007, "learning_rate": 7.140135030652919e-06, "loss": 1.4018, "mean_token_accuracy": 0.6592222899198532, "num_tokens": 1990085254.0, "step": 11869 }, { "entropy": 1.6728020509084065, "epoch": 1.303974073768916, "grad_norm": 0.6596800684928894, "learning_rate": 7.138688689006968e-06, "loss": 1.3176, "mean_token_accuracy": 0.6630978385607401, "num_tokens": 1990248507.0, "step": 11870 }, { "entropy": 1.7162721355756123, "epoch": 1.304083930680289, "grad_norm": 0.799435019493103, "learning_rate": 7.13724246956978e-06, "loss": 1.4751, "mean_token_accuracy": 0.6495125244061152, "num_tokens": 1990412879.0, "step": 11871 }, { "entropy": 1.6690248648325603, "epoch": 1.3041937875916618, "grad_norm": 0.7061107754707336, "learning_rate": 7.135796372387121e-06, "loss": 1.4601, "mean_token_accuracy": 0.6446433266003927, "num_tokens": 1990609780.0, "step": 11872 }, { "entropy": 1.661782403786977, "epoch": 1.304303644503035, "grad_norm": 0.620296835899353, "learning_rate": 7.13435039750477e-06, "loss": 1.4143, "mean_token_accuracy": 0.6596356878678004, "num_tokens": 1990813000.0, "step": 11873 }, { "entropy": 1.737670491139094, "epoch": 1.3044135014144078, "grad_norm": 0.6647923588752747, "learning_rate": 7.132904544968484e-06, "loss": 1.4695, "mean_token_accuracy": 0.628335619966189, "num_tokens": 1991014720.0, "step": 11874 }, { "entropy": 1.771154135465622, "epoch": 1.3045233583257807, "grad_norm": 0.7745919823646545, "learning_rate": 7.131458814824033e-06, "loss": 1.3392, "mean_token_accuracy": 0.6562488625446955, "num_tokens": 1991159959.0, "step": 11875 }, { "entropy": 1.720243752002716, "epoch": 1.3046332152371536, "grad_norm": 0.6591370105743408, "learning_rate": 7.130013207117164e-06, "loss": 1.3527, "mean_token_accuracy": 0.6506476004918417, "num_tokens": 1991284571.0, "step": 11876 }, { "entropy": 1.7519052525361378, "epoch": 1.3047430721485265, "grad_norm": 0.8525600433349609, "learning_rate": 7.128567721893629e-06, "loss": 1.3246, "mean_token_accuracy": 0.6619629363218943, "num_tokens": 1991433891.0, "step": 11877 }, { "entropy": 1.7587460080782573, "epoch": 1.3048529290598996, "grad_norm": 0.7629795074462891, "learning_rate": 7.127122359199186e-06, "loss": 1.5044, "mean_token_accuracy": 0.633780856927236, "num_tokens": 1991631013.0, "step": 11878 }, { "entropy": 1.7409328023592632, "epoch": 1.3049627859712725, "grad_norm": 1.040186882019043, "learning_rate": 7.1256771190795744e-06, "loss": 1.4168, "mean_token_accuracy": 0.6474807063738505, "num_tokens": 1991741027.0, "step": 11879 }, { "entropy": 1.6785088181495667, "epoch": 1.3050726428826454, "grad_norm": 0.6517196893692017, "learning_rate": 7.124232001580533e-06, "loss": 1.4536, "mean_token_accuracy": 0.6468540678421656, "num_tokens": 1991930281.0, "step": 11880 }, { "entropy": 1.7357937196890514, "epoch": 1.3051824997940182, "grad_norm": 0.6505614519119263, "learning_rate": 7.1227870067478025e-06, "loss": 1.5418, "mean_token_accuracy": 0.6523448824882507, "num_tokens": 1992141041.0, "step": 11881 }, { "entropy": 1.6435925761858623, "epoch": 1.3052923567053911, "grad_norm": 0.73778235912323, "learning_rate": 7.121342134627121e-06, "loss": 1.3333, "mean_token_accuracy": 0.6684698065121969, "num_tokens": 1992299737.0, "step": 11882 }, { "entropy": 1.6869693100452423, "epoch": 1.3054022136167642, "grad_norm": 0.6052371859550476, "learning_rate": 7.1198973852642094e-06, "loss": 1.4825, "mean_token_accuracy": 0.6540184319019318, "num_tokens": 1992527255.0, "step": 11883 }, { "entropy": 1.6479829649130504, "epoch": 1.3055120705281371, "grad_norm": 0.6196063756942749, "learning_rate": 7.118452758704797e-06, "loss": 1.4023, "mean_token_accuracy": 0.6664341787497202, "num_tokens": 1992722009.0, "step": 11884 }, { "entropy": 1.751798113187154, "epoch": 1.30562192743951, "grad_norm": 0.7154742479324341, "learning_rate": 7.117008254994608e-06, "loss": 1.4442, "mean_token_accuracy": 0.6428210635979971, "num_tokens": 1992926541.0, "step": 11885 }, { "entropy": 1.7372412979602814, "epoch": 1.305731784350883, "grad_norm": 0.6471896171569824, "learning_rate": 7.115563874179354e-06, "loss": 1.3508, "mean_token_accuracy": 0.6744556576013565, "num_tokens": 1993067139.0, "step": 11886 }, { "entropy": 1.6598396003246307, "epoch": 1.305841641262256, "grad_norm": 0.6070998311042786, "learning_rate": 7.114119616304758e-06, "loss": 1.4995, "mean_token_accuracy": 0.6376579652229944, "num_tokens": 1993332234.0, "step": 11887 }, { "entropy": 1.6974481840928395, "epoch": 1.3059514981736289, "grad_norm": 0.689513623714447, "learning_rate": 7.112675481416524e-06, "loss": 1.4293, "mean_token_accuracy": 0.6504635115464529, "num_tokens": 1993523750.0, "step": 11888 }, { "entropy": 1.7427086234092712, "epoch": 1.3060613550850018, "grad_norm": 0.6812959313392639, "learning_rate": 7.111231469560356e-06, "loss": 1.369, "mean_token_accuracy": 0.6677893449862798, "num_tokens": 1993677201.0, "step": 11889 }, { "entropy": 1.7141542931397755, "epoch": 1.3061712119963746, "grad_norm": 0.6561225652694702, "learning_rate": 7.109787580781964e-06, "loss": 1.4565, "mean_token_accuracy": 0.649625892440478, "num_tokens": 1993841174.0, "step": 11890 }, { "entropy": 1.7247630953788757, "epoch": 1.3062810689077478, "grad_norm": 0.7922856211662292, "learning_rate": 7.108343815127041e-06, "loss": 1.1284, "mean_token_accuracy": 0.682140568892161, "num_tokens": 1994019614.0, "step": 11891 }, { "entropy": 1.7190321187178295, "epoch": 1.3063909258191206, "grad_norm": 0.7327906489372253, "learning_rate": 7.10690017264128e-06, "loss": 1.3485, "mean_token_accuracy": 0.6538528551657995, "num_tokens": 1994170865.0, "step": 11892 }, { "entropy": 1.7219412624835968, "epoch": 1.3065007827304935, "grad_norm": 0.6950879096984863, "learning_rate": 7.105456653370373e-06, "loss": 1.6429, "mean_token_accuracy": 0.6246584728360176, "num_tokens": 1994357037.0, "step": 11893 }, { "entropy": 1.6911317110061646, "epoch": 1.3066106396418664, "grad_norm": 0.6314573884010315, "learning_rate": 7.104013257360012e-06, "loss": 1.4832, "mean_token_accuracy": 0.641195093592008, "num_tokens": 1994567440.0, "step": 11894 }, { "entropy": 1.7743210991223652, "epoch": 1.3067204965532393, "grad_norm": 0.6991893649101257, "learning_rate": 7.102569984655876e-06, "loss": 1.4349, "mean_token_accuracy": 0.6492632130781809, "num_tokens": 1994730948.0, "step": 11895 }, { "entropy": 1.6938276489575703, "epoch": 1.3068303534646124, "grad_norm": 0.6895888447761536, "learning_rate": 7.101126835303642e-06, "loss": 1.2818, "mean_token_accuracy": 0.676262636979421, "num_tokens": 1994872136.0, "step": 11896 }, { "entropy": 1.7031813363234203, "epoch": 1.3069402103759853, "grad_norm": 0.6379356980323792, "learning_rate": 7.099683809348987e-06, "loss": 1.5104, "mean_token_accuracy": 0.6480912466843923, "num_tokens": 1995100831.0, "step": 11897 }, { "entropy": 1.697776734828949, "epoch": 1.3070500672873582, "grad_norm": 0.782518208026886, "learning_rate": 7.098240906837581e-06, "loss": 1.4419, "mean_token_accuracy": 0.6553240418434143, "num_tokens": 1995309589.0, "step": 11898 }, { "entropy": 1.733527531226476, "epoch": 1.3071599241987313, "grad_norm": 0.7691713571548462, "learning_rate": 7.096798127815095e-06, "loss": 1.544, "mean_token_accuracy": 0.6410651057958603, "num_tokens": 1995483068.0, "step": 11899 }, { "entropy": 1.7282393078009288, "epoch": 1.3072697811101042, "grad_norm": 0.7292653322219849, "learning_rate": 7.095355472327188e-06, "loss": 1.5436, "mean_token_accuracy": 0.6305726369222006, "num_tokens": 1995685871.0, "step": 11900 }, { "entropy": 1.7617888549963634, "epoch": 1.307379638021477, "grad_norm": 0.607972264289856, "learning_rate": 7.093912940419518e-06, "loss": 1.4118, "mean_token_accuracy": 0.6436517437299093, "num_tokens": 1995881773.0, "step": 11901 }, { "entropy": 1.6877683500448863, "epoch": 1.30748949493285, "grad_norm": 0.8216177225112915, "learning_rate": 7.0924705321377476e-06, "loss": 1.4615, "mean_token_accuracy": 0.6518243153889974, "num_tokens": 1996016525.0, "step": 11902 }, { "entropy": 1.6974779566129048, "epoch": 1.3075993518442228, "grad_norm": 0.701296865940094, "learning_rate": 7.091028247527523e-06, "loss": 1.4144, "mean_token_accuracy": 0.6555771032969157, "num_tokens": 1996158592.0, "step": 11903 }, { "entropy": 1.681029220422109, "epoch": 1.307709208755596, "grad_norm": 0.6263594627380371, "learning_rate": 7.08958608663449e-06, "loss": 1.4482, "mean_token_accuracy": 0.6391114493211111, "num_tokens": 1996383683.0, "step": 11904 }, { "entropy": 1.6188758412996929, "epoch": 1.3078190656669688, "grad_norm": 0.5903595685958862, "learning_rate": 7.088144049504297e-06, "loss": 1.2563, "mean_token_accuracy": 0.6739430278539658, "num_tokens": 1996537882.0, "step": 11905 }, { "entropy": 1.7247794965902965, "epoch": 1.3079289225783417, "grad_norm": 0.645677924156189, "learning_rate": 7.0867021361825834e-06, "loss": 1.392, "mean_token_accuracy": 0.6563497483730316, "num_tokens": 1996661633.0, "step": 11906 }, { "entropy": 1.6579484939575195, "epoch": 1.3080387794897146, "grad_norm": 0.606181263923645, "learning_rate": 7.085260346714984e-06, "loss": 1.5362, "mean_token_accuracy": 0.6405880848566691, "num_tokens": 1996846001.0, "step": 11907 }, { "entropy": 1.699442724386851, "epoch": 1.3081486364010875, "grad_norm": 0.6131225228309631, "learning_rate": 7.083818681147128e-06, "loss": 1.3592, "mean_token_accuracy": 0.658347432812055, "num_tokens": 1997010019.0, "step": 11908 }, { "entropy": 1.688926676909129, "epoch": 1.3082584933124606, "grad_norm": 0.7150919437408447, "learning_rate": 7.08237713952465e-06, "loss": 1.1604, "mean_token_accuracy": 0.6859798580408096, "num_tokens": 1997120284.0, "step": 11909 }, { "entropy": 1.703328440586726, "epoch": 1.3083683502238335, "grad_norm": 0.6847726702690125, "learning_rate": 7.0809357218931655e-06, "loss": 1.5503, "mean_token_accuracy": 0.637129470705986, "num_tokens": 1997338833.0, "step": 11910 }, { "entropy": 1.7029491166273754, "epoch": 1.3084782071352064, "grad_norm": 0.6607728004455566, "learning_rate": 7.079494428298306e-06, "loss": 1.3826, "mean_token_accuracy": 0.6601289560397466, "num_tokens": 1997488890.0, "step": 11911 }, { "entropy": 1.779124120871226, "epoch": 1.3085880640465795, "grad_norm": 0.6851378083229065, "learning_rate": 7.078053258785675e-06, "loss": 1.5597, "mean_token_accuracy": 0.6372034152348837, "num_tokens": 1997717867.0, "step": 11912 }, { "entropy": 1.703583796819051, "epoch": 1.3086979209579523, "grad_norm": 0.7330154776573181, "learning_rate": 7.076612213400893e-06, "loss": 1.5164, "mean_token_accuracy": 0.6479217112064362, "num_tokens": 1997907725.0, "step": 11913 }, { "entropy": 1.752791404724121, "epoch": 1.3088077778693252, "grad_norm": 0.6505106687545776, "learning_rate": 7.075171292189567e-06, "loss": 1.2992, "mean_token_accuracy": 0.6648927380641302, "num_tokens": 1998051327.0, "step": 11914 }, { "entropy": 1.7556921243667603, "epoch": 1.3089176347806981, "grad_norm": 0.9044151902198792, "learning_rate": 7.073730495197302e-06, "loss": 1.3221, "mean_token_accuracy": 0.6583772599697113, "num_tokens": 1998151465.0, "step": 11915 }, { "entropy": 1.6777693728605907, "epoch": 1.309027491692071, "grad_norm": 0.5966777205467224, "learning_rate": 7.072289822469696e-06, "loss": 1.4588, "mean_token_accuracy": 0.6472314149141312, "num_tokens": 1998389856.0, "step": 11916 }, { "entropy": 1.7139520446459453, "epoch": 1.309137348603444, "grad_norm": 0.8145208358764648, "learning_rate": 7.070849274052347e-06, "loss": 1.4261, "mean_token_accuracy": 0.6561163713534673, "num_tokens": 1998569531.0, "step": 11917 }, { "entropy": 1.7513943115870159, "epoch": 1.309247205514817, "grad_norm": 0.7245900630950928, "learning_rate": 7.069408849990846e-06, "loss": 1.4398, "mean_token_accuracy": 0.6619679679473242, "num_tokens": 1998718517.0, "step": 11918 }, { "entropy": 1.7176838616530101, "epoch": 1.3093570624261899, "grad_norm": 0.6961973309516907, "learning_rate": 7.067968550330788e-06, "loss": 1.3737, "mean_token_accuracy": 0.665938675403595, "num_tokens": 1998860116.0, "step": 11919 }, { "entropy": 1.7159354587395985, "epoch": 1.3094669193375628, "grad_norm": 0.7147000432014465, "learning_rate": 7.066528375117754e-06, "loss": 1.2228, "mean_token_accuracy": 0.6914103428522745, "num_tokens": 1998983159.0, "step": 11920 }, { "entropy": 1.6717688739299774, "epoch": 1.3095767762489356, "grad_norm": 0.6504638195037842, "learning_rate": 7.06508832439732e-06, "loss": 1.3763, "mean_token_accuracy": 0.657271221280098, "num_tokens": 1999134388.0, "step": 11921 }, { "entropy": 1.7023660739262898, "epoch": 1.3096866331603088, "grad_norm": 0.7245521545410156, "learning_rate": 7.0636483982150685e-06, "loss": 1.4674, "mean_token_accuracy": 0.6482570519049963, "num_tokens": 1999333736.0, "step": 11922 }, { "entropy": 1.7503623863061268, "epoch": 1.3097964900716816, "grad_norm": 0.7295483350753784, "learning_rate": 7.0622085966165775e-06, "loss": 1.2565, "mean_token_accuracy": 0.6758219550053278, "num_tokens": 1999443030.0, "step": 11923 }, { "entropy": 1.6741431951522827, "epoch": 1.3099063469830545, "grad_norm": 0.8775436282157898, "learning_rate": 7.060768919647402e-06, "loss": 1.3264, "mean_token_accuracy": 0.6649001787106196, "num_tokens": 1999620601.0, "step": 11924 }, { "entropy": 1.6897100607554119, "epoch": 1.3100162038944276, "grad_norm": 0.5896010994911194, "learning_rate": 7.0593293673531185e-06, "loss": 1.4494, "mean_token_accuracy": 0.6483661234378815, "num_tokens": 1999852207.0, "step": 11925 }, { "entropy": 1.7433798710505168, "epoch": 1.3101260608058005, "grad_norm": 0.6538956761360168, "learning_rate": 7.057889939779284e-06, "loss": 1.3828, "mean_token_accuracy": 0.6552889595429102, "num_tokens": 2000021319.0, "step": 11926 }, { "entropy": 1.6846702595551808, "epoch": 1.3102359177171734, "grad_norm": 0.8677592873573303, "learning_rate": 7.056450636971459e-06, "loss": 1.2809, "mean_token_accuracy": 0.6779492398103079, "num_tokens": 2000178801.0, "step": 11927 }, { "entropy": 1.7277966737747192, "epoch": 1.3103457746285463, "grad_norm": 0.7624452114105225, "learning_rate": 7.055011458975189e-06, "loss": 1.4015, "mean_token_accuracy": 0.6448380748430887, "num_tokens": 2000366731.0, "step": 11928 }, { "entropy": 1.6685428619384766, "epoch": 1.3104556315399192, "grad_norm": 0.6821413636207581, "learning_rate": 7.053572405836035e-06, "loss": 1.3076, "mean_token_accuracy": 0.6730211079120636, "num_tokens": 2000503771.0, "step": 11929 }, { "entropy": 1.7650530834992726, "epoch": 1.3105654884512923, "grad_norm": 0.7161713242530823, "learning_rate": 7.0521334775995325e-06, "loss": 1.2504, "mean_token_accuracy": 0.674405058224996, "num_tokens": 2000625020.0, "step": 11930 }, { "entropy": 1.7475067675113678, "epoch": 1.3106753453626652, "grad_norm": 0.7672361135482788, "learning_rate": 7.050694674311227e-06, "loss": 1.5302, "mean_token_accuracy": 0.6399498085180918, "num_tokens": 2000778280.0, "step": 11931 }, { "entropy": 1.7035863002141316, "epoch": 1.310785202274038, "grad_norm": 0.6786298751831055, "learning_rate": 7.049255996016657e-06, "loss": 1.4849, "mean_token_accuracy": 0.6429836452007294, "num_tokens": 2000942476.0, "step": 11932 }, { "entropy": 1.6817757089932759, "epoch": 1.310895059185411, "grad_norm": 0.6696064472198486, "learning_rate": 7.047817442761351e-06, "loss": 1.32, "mean_token_accuracy": 0.668683315316836, "num_tokens": 2001089622.0, "step": 11933 }, { "entropy": 1.6976955632368724, "epoch": 1.3110049160967838, "grad_norm": 0.7553936839103699, "learning_rate": 7.046379014590847e-06, "loss": 1.2293, "mean_token_accuracy": 0.6788338373104731, "num_tokens": 2001197344.0, "step": 11934 }, { "entropy": 1.6999300718307495, "epoch": 1.311114773008157, "grad_norm": 0.6611595153808594, "learning_rate": 7.0449407115506655e-06, "loss": 1.3581, "mean_token_accuracy": 0.6565580070018768, "num_tokens": 2001349759.0, "step": 11935 }, { "entropy": 1.6997497379779816, "epoch": 1.3112246299195298, "grad_norm": 1.5599058866500854, "learning_rate": 7.043502533686321e-06, "loss": 1.3612, "mean_token_accuracy": 0.664860337972641, "num_tokens": 2001540113.0, "step": 11936 }, { "entropy": 1.6655697226524353, "epoch": 1.3113344868309027, "grad_norm": 0.7153550982475281, "learning_rate": 7.04206448104334e-06, "loss": 1.4734, "mean_token_accuracy": 0.6454818745454153, "num_tokens": 2001759838.0, "step": 11937 }, { "entropy": 1.6881613234678905, "epoch": 1.3114443437422758, "grad_norm": 0.6710202693939209, "learning_rate": 7.04062655366724e-06, "loss": 1.3767, "mean_token_accuracy": 0.6576898495356241, "num_tokens": 2001927019.0, "step": 11938 }, { "entropy": 1.6877728005250294, "epoch": 1.3115542006536487, "grad_norm": 0.7258594036102295, "learning_rate": 7.039188751603525e-06, "loss": 1.2771, "mean_token_accuracy": 0.6696644773085912, "num_tokens": 2002100598.0, "step": 11939 }, { "entropy": 1.6909307440121968, "epoch": 1.3116640575650216, "grad_norm": 0.6466124057769775, "learning_rate": 7.037751074897698e-06, "loss": 1.5839, "mean_token_accuracy": 0.6439488381147385, "num_tokens": 2002336522.0, "step": 11940 }, { "entropy": 1.7181779742240906, "epoch": 1.3117739144763945, "grad_norm": 0.6814691424369812, "learning_rate": 7.036313523595266e-06, "loss": 1.4541, "mean_token_accuracy": 0.6534381111462911, "num_tokens": 2002516915.0, "step": 11941 }, { "entropy": 1.6743408739566803, "epoch": 1.3118837713877674, "grad_norm": 0.6737117171287537, "learning_rate": 7.034876097741723e-06, "loss": 1.3741, "mean_token_accuracy": 0.6632463186979294, "num_tokens": 2002703215.0, "step": 11942 }, { "entropy": 1.7161073585351307, "epoch": 1.3119936282991405, "grad_norm": 0.6047975420951843, "learning_rate": 7.033438797382568e-06, "loss": 1.3728, "mean_token_accuracy": 0.6500318894783655, "num_tokens": 2002862792.0, "step": 11943 }, { "entropy": 1.6879894336064656, "epoch": 1.3121034852105133, "grad_norm": 0.660843551158905, "learning_rate": 7.032001622563287e-06, "loss": 1.435, "mean_token_accuracy": 0.6527181764443716, "num_tokens": 2003060623.0, "step": 11944 }, { "entropy": 1.7308319707711537, "epoch": 1.3122133421218862, "grad_norm": 0.6762115359306335, "learning_rate": 7.030564573329364e-06, "loss": 1.3298, "mean_token_accuracy": 0.6627347220977148, "num_tokens": 2003226847.0, "step": 11945 }, { "entropy": 1.6313576400279999, "epoch": 1.3123231990332591, "grad_norm": 0.647402822971344, "learning_rate": 7.029127649726286e-06, "loss": 1.46, "mean_token_accuracy": 0.6542697101831436, "num_tokens": 2003413157.0, "step": 11946 }, { "entropy": 1.6898943781852722, "epoch": 1.312433055944632, "grad_norm": 0.6319282650947571, "learning_rate": 7.027690851799529e-06, "loss": 1.4451, "mean_token_accuracy": 0.6684151142835617, "num_tokens": 2003564249.0, "step": 11947 }, { "entropy": 1.6767113904158275, "epoch": 1.312542912856005, "grad_norm": 0.794843316078186, "learning_rate": 7.026254179594563e-06, "loss": 1.3385, "mean_token_accuracy": 0.6593736608823141, "num_tokens": 2003753111.0, "step": 11948 }, { "entropy": 1.7902327179908752, "epoch": 1.312652769767378, "grad_norm": 0.7606083154678345, "learning_rate": 7.024817633156862e-06, "loss": 1.5597, "mean_token_accuracy": 0.6477732261021932, "num_tokens": 2003905456.0, "step": 11949 }, { "entropy": 1.755109578371048, "epoch": 1.3127626266787509, "grad_norm": 0.7885520458221436, "learning_rate": 7.023381212531895e-06, "loss": 1.3244, "mean_token_accuracy": 0.6673354307810465, "num_tokens": 2004044077.0, "step": 11950 }, { "entropy": 1.7376815676689148, "epoch": 1.312872483590124, "grad_norm": 0.6872356534004211, "learning_rate": 7.02194491776512e-06, "loss": 1.4115, "mean_token_accuracy": 0.6557418157656988, "num_tokens": 2004214096.0, "step": 11951 }, { "entropy": 1.7242650091648102, "epoch": 1.3129823405014969, "grad_norm": 1.8172736167907715, "learning_rate": 7.020508748901993e-06, "loss": 1.181, "mean_token_accuracy": 0.6751060833533605, "num_tokens": 2004384350.0, "step": 11952 }, { "entropy": 1.7193986773490906, "epoch": 1.3130921974128698, "grad_norm": 0.7913485169410706, "learning_rate": 7.019072705987975e-06, "loss": 1.3648, "mean_token_accuracy": 0.6586080143849055, "num_tokens": 2004533962.0, "step": 11953 }, { "entropy": 1.7227738400300343, "epoch": 1.3132020543242426, "grad_norm": 0.6674314141273499, "learning_rate": 7.017636789068507e-06, "loss": 1.439, "mean_token_accuracy": 0.6552018125851949, "num_tokens": 2004682824.0, "step": 11954 }, { "entropy": 1.696447104215622, "epoch": 1.3133119112356155, "grad_norm": 0.7668749094009399, "learning_rate": 7.0162009981890445e-06, "loss": 1.2401, "mean_token_accuracy": 0.6790489206711451, "num_tokens": 2004823503.0, "step": 11955 }, { "entropy": 1.7591257691383362, "epoch": 1.3134217681469886, "grad_norm": 0.6392584443092346, "learning_rate": 7.014765333395026e-06, "loss": 1.4618, "mean_token_accuracy": 0.640847826997439, "num_tokens": 2005055675.0, "step": 11956 }, { "entropy": 1.7323053081830342, "epoch": 1.3135316250583615, "grad_norm": 0.7856550216674805, "learning_rate": 7.0133297947318845e-06, "loss": 1.2616, "mean_token_accuracy": 0.6696621626615524, "num_tokens": 2005187034.0, "step": 11957 }, { "entropy": 1.7238508264223735, "epoch": 1.3136414819697344, "grad_norm": 0.845143735408783, "learning_rate": 7.011894382245062e-06, "loss": 1.4599, "mean_token_accuracy": 0.6531222860018412, "num_tokens": 2005330183.0, "step": 11958 }, { "entropy": 1.7025631666183472, "epoch": 1.3137513388811073, "grad_norm": 0.7193249464035034, "learning_rate": 7.0104590959799845e-06, "loss": 1.322, "mean_token_accuracy": 0.6668793509403864, "num_tokens": 2005483599.0, "step": 11959 }, { "entropy": 1.721425364414851, "epoch": 1.3138611957924802, "grad_norm": 0.8288023471832275, "learning_rate": 7.009023935982076e-06, "loss": 1.3867, "mean_token_accuracy": 0.6620455334583918, "num_tokens": 2005630137.0, "step": 11960 }, { "entropy": 1.6998872856299083, "epoch": 1.3139710527038533, "grad_norm": 0.673412024974823, "learning_rate": 7.0075889022967625e-06, "loss": 1.5038, "mean_token_accuracy": 0.6355178554852804, "num_tokens": 2005829806.0, "step": 11961 }, { "entropy": 1.792010138432185, "epoch": 1.3140809096152262, "grad_norm": 0.805509090423584, "learning_rate": 7.0061539949694645e-06, "loss": 1.4613, "mean_token_accuracy": 0.6476135204235712, "num_tokens": 2005964524.0, "step": 11962 }, { "entropy": 1.7988332509994507, "epoch": 1.314190766526599, "grad_norm": 0.7284339666366577, "learning_rate": 7.004719214045592e-06, "loss": 1.4274, "mean_token_accuracy": 0.6368361463149389, "num_tokens": 2006085615.0, "step": 11963 }, { "entropy": 1.7714728315671284, "epoch": 1.3143006234379722, "grad_norm": 0.674480676651001, "learning_rate": 7.003284559570554e-06, "loss": 1.4091, "mean_token_accuracy": 0.6386928856372833, "num_tokens": 2006276341.0, "step": 11964 }, { "entropy": 1.7038045426209767, "epoch": 1.314410480349345, "grad_norm": 0.7045182585716248, "learning_rate": 7.001850031589761e-06, "loss": 1.4416, "mean_token_accuracy": 0.6599552830060323, "num_tokens": 2006435304.0, "step": 11965 }, { "entropy": 1.7020907998085022, "epoch": 1.314520337260718, "grad_norm": 0.7166406512260437, "learning_rate": 7.0004156301486095e-06, "loss": 1.3538, "mean_token_accuracy": 0.6714149415493011, "num_tokens": 2006545361.0, "step": 11966 }, { "entropy": 1.6336172918478649, "epoch": 1.3146301941720908, "grad_norm": 0.5933431386947632, "learning_rate": 6.998981355292505e-06, "loss": 1.4167, "mean_token_accuracy": 0.6494115591049194, "num_tokens": 2006711734.0, "step": 11967 }, { "entropy": 1.704167326291402, "epoch": 1.3147400510834637, "grad_norm": 0.6579341292381287, "learning_rate": 6.997547207066836e-06, "loss": 1.2635, "mean_token_accuracy": 0.6757092028856277, "num_tokens": 2006860103.0, "step": 11968 }, { "entropy": 1.725958655277888, "epoch": 1.3148499079948368, "grad_norm": 0.7082239389419556, "learning_rate": 6.996113185516993e-06, "loss": 1.3941, "mean_token_accuracy": 0.6522109111150106, "num_tokens": 2007005193.0, "step": 11969 }, { "entropy": 1.6946588456630707, "epoch": 1.3149597649062097, "grad_norm": 0.7615059614181519, "learning_rate": 6.994679290688366e-06, "loss": 1.3615, "mean_token_accuracy": 0.6599778831005096, "num_tokens": 2007176565.0, "step": 11970 }, { "entropy": 1.64273335536321, "epoch": 1.3150696218175826, "grad_norm": 0.6892207860946655, "learning_rate": 6.993245522626335e-06, "loss": 1.1621, "mean_token_accuracy": 0.6934431493282318, "num_tokens": 2007289708.0, "step": 11971 }, { "entropy": 1.7297363777955372, "epoch": 1.3151794787289555, "grad_norm": 0.7124273180961609, "learning_rate": 6.991811881376274e-06, "loss": 1.4418, "mean_token_accuracy": 0.641096313794454, "num_tokens": 2007489730.0, "step": 11972 }, { "entropy": 1.7163341144720714, "epoch": 1.3152893356403283, "grad_norm": 0.6671984195709229, "learning_rate": 6.990378366983563e-06, "loss": 1.4064, "mean_token_accuracy": 0.6469017068545023, "num_tokens": 2007658295.0, "step": 11973 }, { "entropy": 1.7121194104353588, "epoch": 1.3153991925517015, "grad_norm": 0.7968659400939941, "learning_rate": 6.9889449794935685e-06, "loss": 1.3539, "mean_token_accuracy": 0.6690041224161783, "num_tokens": 2007827861.0, "step": 11974 }, { "entropy": 1.7029815713564556, "epoch": 1.3155090494630743, "grad_norm": 0.6681612133979797, "learning_rate": 6.987511718951661e-06, "loss": 1.4541, "mean_token_accuracy": 0.6633522013823191, "num_tokens": 2008021545.0, "step": 11975 }, { "entropy": 1.727482130130132, "epoch": 1.3156189063744472, "grad_norm": 0.6855336427688599, "learning_rate": 6.9860785854032e-06, "loss": 1.5242, "mean_token_accuracy": 0.6396484598517418, "num_tokens": 2008216504.0, "step": 11976 }, { "entropy": 1.7152353723843892, "epoch": 1.3157287632858203, "grad_norm": 0.6966313123703003, "learning_rate": 6.9846455788935376e-06, "loss": 1.3325, "mean_token_accuracy": 0.6538062343994776, "num_tokens": 2008410271.0, "step": 11977 }, { "entropy": 1.7069965600967407, "epoch": 1.3158386201971932, "grad_norm": 0.6595732569694519, "learning_rate": 6.983212699468035e-06, "loss": 1.5271, "mean_token_accuracy": 0.6532554477453232, "num_tokens": 2008581622.0, "step": 11978 }, { "entropy": 1.6654066642125447, "epoch": 1.315948477108566, "grad_norm": 0.6108769774436951, "learning_rate": 6.981779947172047e-06, "loss": 1.4571, "mean_token_accuracy": 0.6504184703032175, "num_tokens": 2008773849.0, "step": 11979 }, { "entropy": 1.65288241704305, "epoch": 1.316058334019939, "grad_norm": 0.7052204608917236, "learning_rate": 6.980347322050905e-06, "loss": 1.2769, "mean_token_accuracy": 0.6784281581640244, "num_tokens": 2008965203.0, "step": 11980 }, { "entropy": 1.6817485094070435, "epoch": 1.3161681909313119, "grad_norm": 0.5673655867576599, "learning_rate": 6.97891482414996e-06, "loss": 1.3401, "mean_token_accuracy": 0.6604795058568319, "num_tokens": 2009142441.0, "step": 11981 }, { "entropy": 1.7240809003512065, "epoch": 1.316278047842685, "grad_norm": 0.6513432860374451, "learning_rate": 6.9774824535145525e-06, "loss": 1.4674, "mean_token_accuracy": 0.6503648559252421, "num_tokens": 2009350171.0, "step": 11982 }, { "entropy": 1.687408596277237, "epoch": 1.3163879047540579, "grad_norm": 1.6337801218032837, "learning_rate": 6.976050210190013e-06, "loss": 1.3665, "mean_token_accuracy": 0.6606544703245163, "num_tokens": 2009555178.0, "step": 11983 }, { "entropy": 1.7758075793584187, "epoch": 1.3164977616654308, "grad_norm": 0.7786139249801636, "learning_rate": 6.9746180942216676e-06, "loss": 1.3912, "mean_token_accuracy": 0.657907764116923, "num_tokens": 2009757056.0, "step": 11984 }, { "entropy": 1.7262560923894246, "epoch": 1.3166076185768036, "grad_norm": 0.5811822414398193, "learning_rate": 6.973186105654849e-06, "loss": 1.4709, "mean_token_accuracy": 0.6334926833709081, "num_tokens": 2009978729.0, "step": 11985 }, { "entropy": 1.7114702463150024, "epoch": 1.3167174754881765, "grad_norm": 0.6832294464111328, "learning_rate": 6.971754244534872e-06, "loss": 1.3515, "mean_token_accuracy": 0.6603354662656784, "num_tokens": 2010165090.0, "step": 11986 }, { "entropy": 1.7641779085000355, "epoch": 1.3168273323995496, "grad_norm": 0.694060742855072, "learning_rate": 6.97032251090706e-06, "loss": 1.3409, "mean_token_accuracy": 0.6643087863922119, "num_tokens": 2010274509.0, "step": 11987 }, { "entropy": 1.6871871054172516, "epoch": 1.3169371893109225, "grad_norm": 0.7732803821563721, "learning_rate": 6.9688909048167265e-06, "loss": 1.2772, "mean_token_accuracy": 0.6672064363956451, "num_tokens": 2010382893.0, "step": 11988 }, { "entropy": 1.679459939400355, "epoch": 1.3170470462222954, "grad_norm": 0.6008415818214417, "learning_rate": 6.967459426309175e-06, "loss": 1.3141, "mean_token_accuracy": 0.6672980437676111, "num_tokens": 2010528829.0, "step": 11989 }, { "entropy": 1.6381245056788127, "epoch": 1.3171569031336685, "grad_norm": 1.6934006214141846, "learning_rate": 6.966028075429716e-06, "loss": 1.0885, "mean_token_accuracy": 0.6874684443076452, "num_tokens": 2010687460.0, "step": 11990 }, { "entropy": 1.7382714649041493, "epoch": 1.3172667600450414, "grad_norm": 0.7133601307868958, "learning_rate": 6.9645968522236576e-06, "loss": 1.4665, "mean_token_accuracy": 0.6435732394456863, "num_tokens": 2010850045.0, "step": 11991 }, { "entropy": 1.736552745103836, "epoch": 1.3173766169564143, "grad_norm": 0.8156201243400574, "learning_rate": 6.963165756736283e-06, "loss": 1.3862, "mean_token_accuracy": 0.6678305069605509, "num_tokens": 2010981868.0, "step": 11992 }, { "entropy": 1.670212835073471, "epoch": 1.3174864738677872, "grad_norm": 0.6825160384178162, "learning_rate": 6.961734789012895e-06, "loss": 1.3759, "mean_token_accuracy": 0.6739961455265681, "num_tokens": 2011137705.0, "step": 11993 }, { "entropy": 1.7107898096243541, "epoch": 1.31759633077916, "grad_norm": 0.6937064528465271, "learning_rate": 6.9603039490987834e-06, "loss": 1.3964, "mean_token_accuracy": 0.6487229913473129, "num_tokens": 2011327696.0, "step": 11994 }, { "entropy": 1.7345775763193767, "epoch": 1.3177061876905332, "grad_norm": 0.7579020857810974, "learning_rate": 6.958873237039231e-06, "loss": 1.3378, "mean_token_accuracy": 0.6619026213884354, "num_tokens": 2011466276.0, "step": 11995 }, { "entropy": 1.6762347221374512, "epoch": 1.317816044601906, "grad_norm": 0.7532185912132263, "learning_rate": 6.957442652879516e-06, "loss": 1.3609, "mean_token_accuracy": 0.6672007888555527, "num_tokens": 2011583262.0, "step": 11996 }, { "entropy": 1.6885204215844472, "epoch": 1.317925901513279, "grad_norm": 0.8010686635971069, "learning_rate": 6.956012196664925e-06, "loss": 1.4232, "mean_token_accuracy": 0.6506709555784861, "num_tokens": 2011723412.0, "step": 11997 }, { "entropy": 1.687297483285268, "epoch": 1.3180357584246518, "grad_norm": 0.6130065321922302, "learning_rate": 6.95458186844072e-06, "loss": 1.4189, "mean_token_accuracy": 0.6468682587146759, "num_tokens": 2011927534.0, "step": 11998 }, { "entropy": 1.6592112084229786, "epoch": 1.3181456153360247, "grad_norm": 0.6361923217773438, "learning_rate": 6.9531516682521805e-06, "loss": 1.3959, "mean_token_accuracy": 0.6538289586702982, "num_tokens": 2012095512.0, "step": 11999 }, { "entropy": 1.682155708471934, "epoch": 1.3182554722473978, "grad_norm": 0.6887022852897644, "learning_rate": 6.951721596144566e-06, "loss": 1.4071, "mean_token_accuracy": 0.6655903309583664, "num_tokens": 2012261571.0, "step": 12000 }, { "entropy": 1.636295755704244, "epoch": 1.3183653291587707, "grad_norm": 0.8937088847160339, "learning_rate": 6.950291652163137e-06, "loss": 1.4039, "mean_token_accuracy": 0.6635189006725947, "num_tokens": 2012434032.0, "step": 12001 }, { "entropy": 1.7253755927085876, "epoch": 1.3184751860701436, "grad_norm": 0.6961193680763245, "learning_rate": 6.9488618363531515e-06, "loss": 1.4444, "mean_token_accuracy": 0.6477092305819193, "num_tokens": 2012624662.0, "step": 12002 }, { "entropy": 1.6379418571790059, "epoch": 1.3185850429815167, "grad_norm": 0.7339573502540588, "learning_rate": 6.947432148759871e-06, "loss": 1.2261, "mean_token_accuracy": 0.6745143185059229, "num_tokens": 2012745932.0, "step": 12003 }, { "entropy": 1.6386353770891826, "epoch": 1.3186948998928896, "grad_norm": 0.6927421689033508, "learning_rate": 6.946002589428528e-06, "loss": 1.3068, "mean_token_accuracy": 0.6671117693185806, "num_tokens": 2012919590.0, "step": 12004 }, { "entropy": 1.7081229587395985, "epoch": 1.3188047568042625, "grad_norm": 0.7358942627906799, "learning_rate": 6.9445731584043776e-06, "loss": 1.4894, "mean_token_accuracy": 0.6323947161436081, "num_tokens": 2013142770.0, "step": 12005 }, { "entropy": 1.6147757669289906, "epoch": 1.3189146137156353, "grad_norm": 0.6353159546852112, "learning_rate": 6.943143855732662e-06, "loss": 1.2623, "mean_token_accuracy": 0.6711504012346268, "num_tokens": 2013315444.0, "step": 12006 }, { "entropy": 1.7047906319300334, "epoch": 1.3190244706270082, "grad_norm": 0.8190060257911682, "learning_rate": 6.941714681458617e-06, "loss": 1.3866, "mean_token_accuracy": 0.6565055847167969, "num_tokens": 2013477090.0, "step": 12007 }, { "entropy": 1.734445333480835, "epoch": 1.3191343275383813, "grad_norm": 0.5961945056915283, "learning_rate": 6.940285635627468e-06, "loss": 1.4759, "mean_token_accuracy": 0.6589976797501246, "num_tokens": 2013679313.0, "step": 12008 }, { "entropy": 1.7090435028076172, "epoch": 1.3192441844497542, "grad_norm": 0.7113987803459167, "learning_rate": 6.9388567182844545e-06, "loss": 1.5274, "mean_token_accuracy": 0.6512242555618286, "num_tokens": 2013869597.0, "step": 12009 }, { "entropy": 1.708377093076706, "epoch": 1.319354041361127, "grad_norm": 0.6466943025588989, "learning_rate": 6.9374279294747914e-06, "loss": 1.3696, "mean_token_accuracy": 0.657651330033938, "num_tokens": 2014060463.0, "step": 12010 }, { "entropy": 1.7558682362238567, "epoch": 1.3194638982725, "grad_norm": 0.6701050400733948, "learning_rate": 6.9359992692437074e-06, "loss": 1.5358, "mean_token_accuracy": 0.6455720663070679, "num_tokens": 2014230820.0, "step": 12011 }, { "entropy": 1.716192901134491, "epoch": 1.3195737551838729, "grad_norm": 0.6614550948143005, "learning_rate": 6.934570737636415e-06, "loss": 1.3733, "mean_token_accuracy": 0.6561314910650253, "num_tokens": 2014358023.0, "step": 12012 }, { "entropy": 1.7146589954694111, "epoch": 1.319683612095246, "grad_norm": 0.8411096930503845, "learning_rate": 6.933142334698126e-06, "loss": 1.504, "mean_token_accuracy": 0.6503568341334661, "num_tokens": 2014493658.0, "step": 12013 }, { "entropy": 1.656291385491689, "epoch": 1.3197934690066189, "grad_norm": 0.6897427439689636, "learning_rate": 6.931714060474051e-06, "loss": 1.3497, "mean_token_accuracy": 0.6640297720829645, "num_tokens": 2014639448.0, "step": 12014 }, { "entropy": 1.7179415325323741, "epoch": 1.3199033259179918, "grad_norm": 0.8011785745620728, "learning_rate": 6.930285915009391e-06, "loss": 1.4384, "mean_token_accuracy": 0.6595332821210226, "num_tokens": 2014781669.0, "step": 12015 }, { "entropy": 1.6988608439763386, "epoch": 1.3200131828293649, "grad_norm": 0.7006280422210693, "learning_rate": 6.928857898349347e-06, "loss": 1.4754, "mean_token_accuracy": 0.6605327129364014, "num_tokens": 2014956142.0, "step": 12016 }, { "entropy": 1.7000405689080555, "epoch": 1.3201230397407377, "grad_norm": 0.8182290196418762, "learning_rate": 6.927430010539115e-06, "loss": 1.5508, "mean_token_accuracy": 0.6524588018655777, "num_tokens": 2015126652.0, "step": 12017 }, { "entropy": 1.6618265112241108, "epoch": 1.3202328966521106, "grad_norm": 0.7501579523086548, "learning_rate": 6.9260022516238915e-06, "loss": 1.402, "mean_token_accuracy": 0.6613495101531347, "num_tokens": 2015257773.0, "step": 12018 }, { "entropy": 1.7788889408111572, "epoch": 1.3203427535634835, "grad_norm": 0.7341859936714172, "learning_rate": 6.924574621648861e-06, "loss": 1.3527, "mean_token_accuracy": 0.6608054389556249, "num_tokens": 2015397071.0, "step": 12019 }, { "entropy": 1.7242496609687805, "epoch": 1.3204526104748564, "grad_norm": 0.6629573702812195, "learning_rate": 6.923147120659204e-06, "loss": 1.4938, "mean_token_accuracy": 0.6375502049922943, "num_tokens": 2015580976.0, "step": 12020 }, { "entropy": 1.6560555597146351, "epoch": 1.3205624673862295, "grad_norm": 0.6642670035362244, "learning_rate": 6.921719748700107e-06, "loss": 1.3582, "mean_token_accuracy": 0.6535757084687551, "num_tokens": 2015763946.0, "step": 12021 }, { "entropy": 1.768319457769394, "epoch": 1.3206723242976024, "grad_norm": 0.7571823596954346, "learning_rate": 6.9202925058167395e-06, "loss": 1.41, "mean_token_accuracy": 0.64376833041509, "num_tokens": 2015909480.0, "step": 12022 }, { "entropy": 1.6888968745867412, "epoch": 1.3207821812089753, "grad_norm": 0.6024224758148193, "learning_rate": 6.918865392054276e-06, "loss": 1.4121, "mean_token_accuracy": 0.6446995933850607, "num_tokens": 2016132439.0, "step": 12023 }, { "entropy": 1.6744337181250255, "epoch": 1.3208920381203482, "grad_norm": 0.6757270097732544, "learning_rate": 6.917438407457888e-06, "loss": 1.4779, "mean_token_accuracy": 0.6477493494749069, "num_tokens": 2016327301.0, "step": 12024 }, { "entropy": 1.6705586810906727, "epoch": 1.321001895031721, "grad_norm": 0.6879779696464539, "learning_rate": 6.916011552072729e-06, "loss": 1.3824, "mean_token_accuracy": 0.6512851764758428, "num_tokens": 2016514623.0, "step": 12025 }, { "entropy": 1.6962314943472545, "epoch": 1.3211117519430942, "grad_norm": 0.7470236420631409, "learning_rate": 6.9145848259439676e-06, "loss": 1.3411, "mean_token_accuracy": 0.6559085547924042, "num_tokens": 2016664612.0, "step": 12026 }, { "entropy": 1.6964036126931508, "epoch": 1.321221608854467, "grad_norm": 0.6507591009140015, "learning_rate": 6.913158229116755e-06, "loss": 1.3099, "mean_token_accuracy": 0.6590965191523234, "num_tokens": 2016862328.0, "step": 12027 }, { "entropy": 1.6942812999089558, "epoch": 1.32133146576584, "grad_norm": 0.8158959150314331, "learning_rate": 6.911731761636241e-06, "loss": 1.2446, "mean_token_accuracy": 0.6787735968828201, "num_tokens": 2016988531.0, "step": 12028 }, { "entropy": 1.6633458336194356, "epoch": 1.321441322677213, "grad_norm": 0.7270153760910034, "learning_rate": 6.910305423547574e-06, "loss": 1.4116, "mean_token_accuracy": 0.6561925808588663, "num_tokens": 2017194398.0, "step": 12029 }, { "entropy": 1.7125836710135143, "epoch": 1.321551179588586, "grad_norm": 0.7208383083343506, "learning_rate": 6.908879214895902e-06, "loss": 1.5425, "mean_token_accuracy": 0.629949559768041, "num_tokens": 2017344053.0, "step": 12030 }, { "entropy": 1.6911011735598247, "epoch": 1.3216610364999588, "grad_norm": 0.6760180592536926, "learning_rate": 6.907453135726358e-06, "loss": 1.2465, "mean_token_accuracy": 0.6811218510071436, "num_tokens": 2017484897.0, "step": 12031 }, { "entropy": 1.7392044166723888, "epoch": 1.3217708934113317, "grad_norm": 0.6357057094573975, "learning_rate": 6.906027186084079e-06, "loss": 1.4049, "mean_token_accuracy": 0.6535494228204092, "num_tokens": 2017680899.0, "step": 12032 }, { "entropy": 1.7378877997398376, "epoch": 1.3218807503227046, "grad_norm": 0.7144643664360046, "learning_rate": 6.9046013660141895e-06, "loss": 1.4085, "mean_token_accuracy": 0.6645797441403071, "num_tokens": 2017822147.0, "step": 12033 }, { "entropy": 1.67686927318573, "epoch": 1.3219906072340777, "grad_norm": 0.6405379772186279, "learning_rate": 6.903175675561823e-06, "loss": 1.5225, "mean_token_accuracy": 0.6583962291479111, "num_tokens": 2018060459.0, "step": 12034 }, { "entropy": 1.7921959658463795, "epoch": 1.3221004641454506, "grad_norm": 0.6897278428077698, "learning_rate": 6.901750114772107e-06, "loss": 1.5251, "mean_token_accuracy": 0.6415904760360718, "num_tokens": 2018209159.0, "step": 12035 }, { "entropy": 1.748464286327362, "epoch": 1.3222103210568235, "grad_norm": 0.7679427862167358, "learning_rate": 6.900324683690145e-06, "loss": 1.2433, "mean_token_accuracy": 0.6749467998743057, "num_tokens": 2018314401.0, "step": 12036 }, { "entropy": 1.808843304713567, "epoch": 1.3223201779681963, "grad_norm": 0.6774364709854126, "learning_rate": 6.8988993823610595e-06, "loss": 1.5223, "mean_token_accuracy": 0.646757240096728, "num_tokens": 2018473445.0, "step": 12037 }, { "entropy": 1.7024830679098766, "epoch": 1.3224300348795692, "grad_norm": 0.6565459966659546, "learning_rate": 6.897474210829965e-06, "loss": 1.4297, "mean_token_accuracy": 0.6503916382789612, "num_tokens": 2018649350.0, "step": 12038 }, { "entropy": 1.6916101773579915, "epoch": 1.3225398917909423, "grad_norm": 0.7631819248199463, "learning_rate": 6.896049169141964e-06, "loss": 1.4192, "mean_token_accuracy": 0.6639653344949087, "num_tokens": 2018818121.0, "step": 12039 }, { "entropy": 1.6281659305095673, "epoch": 1.3226497487023152, "grad_norm": 0.6123877167701721, "learning_rate": 6.894624257342153e-06, "loss": 1.3443, "mean_token_accuracy": 0.6637972791989645, "num_tokens": 2018982855.0, "step": 12040 }, { "entropy": 1.6332585612932842, "epoch": 1.322759605613688, "grad_norm": 0.6782002449035645, "learning_rate": 6.893199475475638e-06, "loss": 1.4276, "mean_token_accuracy": 0.6608372827370962, "num_tokens": 2019181386.0, "step": 12041 }, { "entropy": 1.6969127257664998, "epoch": 1.3228694625250612, "grad_norm": 0.751428484916687, "learning_rate": 6.891774823587505e-06, "loss": 1.4005, "mean_token_accuracy": 0.6656929155190786, "num_tokens": 2019326811.0, "step": 12042 }, { "entropy": 1.6707496643066406, "epoch": 1.322979319436434, "grad_norm": 0.6801748871803284, "learning_rate": 6.890350301722852e-06, "loss": 1.405, "mean_token_accuracy": 0.6508347243070602, "num_tokens": 2019497430.0, "step": 12043 }, { "entropy": 1.770410180091858, "epoch": 1.323089176347807, "grad_norm": 0.6442971229553223, "learning_rate": 6.888925909926758e-06, "loss": 1.4553, "mean_token_accuracy": 0.6529978712399801, "num_tokens": 2019663455.0, "step": 12044 }, { "entropy": 1.7813760836919148, "epoch": 1.3231990332591799, "grad_norm": 0.7301626801490784, "learning_rate": 6.887501648244306e-06, "loss": 1.3938, "mean_token_accuracy": 0.6543681472539902, "num_tokens": 2019814550.0, "step": 12045 }, { "entropy": 1.7918006579081218, "epoch": 1.3233088901705528, "grad_norm": 0.7231292724609375, "learning_rate": 6.886077516720572e-06, "loss": 1.614, "mean_token_accuracy": 0.6222240428129832, "num_tokens": 2020042991.0, "step": 12046 }, { "entropy": 1.6463509897391002, "epoch": 1.3234187470819259, "grad_norm": 0.6251701712608337, "learning_rate": 6.8846535154006385e-06, "loss": 1.3859, "mean_token_accuracy": 0.667206252614657, "num_tokens": 2020213466.0, "step": 12047 }, { "entropy": 1.739404598871867, "epoch": 1.3235286039932987, "grad_norm": 0.7523561716079712, "learning_rate": 6.8832296443295585e-06, "loss": 1.4522, "mean_token_accuracy": 0.648137629032135, "num_tokens": 2020366099.0, "step": 12048 }, { "entropy": 1.6742408871650696, "epoch": 1.3236384609046716, "grad_norm": 0.7165248394012451, "learning_rate": 6.881805903552408e-06, "loss": 1.4481, "mean_token_accuracy": 0.6673527806997299, "num_tokens": 2020537306.0, "step": 12049 }, { "entropy": 1.7045758267243702, "epoch": 1.3237483178160447, "grad_norm": 0.7423866987228394, "learning_rate": 6.880382293114245e-06, "loss": 1.4574, "mean_token_accuracy": 0.6491605639457703, "num_tokens": 2020660666.0, "step": 12050 }, { "entropy": 1.7853013277053833, "epoch": 1.3238581747274174, "grad_norm": 0.6766754984855652, "learning_rate": 6.878958813060127e-06, "loss": 1.3687, "mean_token_accuracy": 0.6513369977474213, "num_tokens": 2020832651.0, "step": 12051 }, { "entropy": 1.657790740331014, "epoch": 1.3239680316387905, "grad_norm": 0.6425931453704834, "learning_rate": 6.877535463435103e-06, "loss": 1.3053, "mean_token_accuracy": 0.6669684946537018, "num_tokens": 2020962525.0, "step": 12052 }, { "entropy": 1.6860653658707936, "epoch": 1.3240778885501634, "grad_norm": 0.6832910180091858, "learning_rate": 6.876112244284228e-06, "loss": 1.2645, "mean_token_accuracy": 0.6669280380010605, "num_tokens": 2021106646.0, "step": 12053 }, { "entropy": 1.693892925977707, "epoch": 1.3241877454615363, "grad_norm": 0.5836326479911804, "learning_rate": 6.874689155652537e-06, "loss": 1.4917, "mean_token_accuracy": 0.6535281638304392, "num_tokens": 2021316957.0, "step": 12054 }, { "entropy": 1.7312416632970173, "epoch": 1.3242976023729094, "grad_norm": 0.9197054505348206, "learning_rate": 6.873266197585079e-06, "loss": 1.4773, "mean_token_accuracy": 0.6464556207259496, "num_tokens": 2021458891.0, "step": 12055 }, { "entropy": 1.6423344214757283, "epoch": 1.3244074592842823, "grad_norm": 0.5606783628463745, "learning_rate": 6.8718433701268885e-06, "loss": 1.2475, "mean_token_accuracy": 0.6767958799997965, "num_tokens": 2021632253.0, "step": 12056 }, { "entropy": 1.7067996362845104, "epoch": 1.3245173161956552, "grad_norm": 0.5830101370811462, "learning_rate": 6.870420673322988e-06, "loss": 1.393, "mean_token_accuracy": 0.6444303045670191, "num_tokens": 2021840691.0, "step": 12057 }, { "entropy": 1.745294988155365, "epoch": 1.324627173107028, "grad_norm": 0.6736421585083008, "learning_rate": 6.8689981072184166e-06, "loss": 1.402, "mean_token_accuracy": 0.6488266239563624, "num_tokens": 2021994717.0, "step": 12058 }, { "entropy": 1.7859689891338348, "epoch": 1.324737030018401, "grad_norm": 0.7754059433937073, "learning_rate": 6.867575671858197e-06, "loss": 1.4224, "mean_token_accuracy": 0.641977791984876, "num_tokens": 2022184817.0, "step": 12059 }, { "entropy": 1.6727923055489857, "epoch": 1.324846886929774, "grad_norm": 0.6476246118545532, "learning_rate": 6.86615336728734e-06, "loss": 1.3758, "mean_token_accuracy": 0.6541973451773325, "num_tokens": 2022383266.0, "step": 12060 }, { "entropy": 1.7898275355497997, "epoch": 1.324956743841147, "grad_norm": 0.8410981297492981, "learning_rate": 6.864731193550867e-06, "loss": 1.3628, "mean_token_accuracy": 0.6513276447852453, "num_tokens": 2022514087.0, "step": 12061 }, { "entropy": 1.658981204032898, "epoch": 1.3250666007525198, "grad_norm": 0.7151813507080078, "learning_rate": 6.863309150693789e-06, "loss": 1.3358, "mean_token_accuracy": 0.6637451301018397, "num_tokens": 2022697233.0, "step": 12062 }, { "entropy": 1.723794678846995, "epoch": 1.325176457663893, "grad_norm": 0.706913411617279, "learning_rate": 6.861887238761116e-06, "loss": 1.4122, "mean_token_accuracy": 0.6545346329609553, "num_tokens": 2022854156.0, "step": 12063 }, { "entropy": 1.7677522897720337, "epoch": 1.3252863145752656, "grad_norm": 0.6113898158073425, "learning_rate": 6.86046545779784e-06, "loss": 1.5288, "mean_token_accuracy": 0.6368463883797327, "num_tokens": 2023116448.0, "step": 12064 }, { "entropy": 1.7364682853221893, "epoch": 1.3253961714866387, "grad_norm": 0.6671069860458374, "learning_rate": 6.859043807848973e-06, "loss": 1.4571, "mean_token_accuracy": 0.6500351677338282, "num_tokens": 2023293074.0, "step": 12065 }, { "entropy": 1.726439744234085, "epoch": 1.3255060283980116, "grad_norm": 0.6300005912780762, "learning_rate": 6.8576222889595e-06, "loss": 1.3818, "mean_token_accuracy": 0.6553014020125071, "num_tokens": 2023473442.0, "step": 12066 }, { "entropy": 1.7023302714029949, "epoch": 1.3256158853093845, "grad_norm": 0.7470946311950684, "learning_rate": 6.856200901174417e-06, "loss": 1.5458, "mean_token_accuracy": 0.6382872660954794, "num_tokens": 2023630081.0, "step": 12067 }, { "entropy": 1.7234592040379841, "epoch": 1.3257257422207576, "grad_norm": 0.7340896129608154, "learning_rate": 6.854779644538708e-06, "loss": 1.4096, "mean_token_accuracy": 0.6467949201663336, "num_tokens": 2023806667.0, "step": 12068 }, { "entropy": 1.735681543747584, "epoch": 1.3258355991321304, "grad_norm": 0.7612413763999939, "learning_rate": 6.853358519097353e-06, "loss": 1.5394, "mean_token_accuracy": 0.6362536748250326, "num_tokens": 2023996526.0, "step": 12069 }, { "entropy": 1.6876520315806072, "epoch": 1.3259454560435033, "grad_norm": 0.7015395164489746, "learning_rate": 6.851937524895334e-06, "loss": 1.3935, "mean_token_accuracy": 0.6509424696365992, "num_tokens": 2024153887.0, "step": 12070 }, { "entropy": 1.7338077227274578, "epoch": 1.3260553129548762, "grad_norm": 0.7074576020240784, "learning_rate": 6.850516661977626e-06, "loss": 1.4231, "mean_token_accuracy": 0.6674382239580154, "num_tokens": 2024314815.0, "step": 12071 }, { "entropy": 1.7138215899467468, "epoch": 1.326165169866249, "grad_norm": 0.7921140789985657, "learning_rate": 6.849095930389193e-06, "loss": 1.3343, "mean_token_accuracy": 0.6653185288111368, "num_tokens": 2024439564.0, "step": 12072 }, { "entropy": 1.7277231812477112, "epoch": 1.3262750267776222, "grad_norm": 0.6742545366287231, "learning_rate": 6.847675330175001e-06, "loss": 1.5814, "mean_token_accuracy": 0.6412968585888544, "num_tokens": 2024634239.0, "step": 12073 }, { "entropy": 1.6866126358509064, "epoch": 1.326384883688995, "grad_norm": 0.6755991578102112, "learning_rate": 6.8462548613800176e-06, "loss": 1.3887, "mean_token_accuracy": 0.6467768748601278, "num_tokens": 2024837409.0, "step": 12074 }, { "entropy": 1.763855755329132, "epoch": 1.326494740600368, "grad_norm": 0.8013515472412109, "learning_rate": 6.844834524049198e-06, "loss": 1.4646, "mean_token_accuracy": 0.6409885436296463, "num_tokens": 2025036012.0, "step": 12075 }, { "entropy": 1.765538622935613, "epoch": 1.326604597511741, "grad_norm": 0.6882338523864746, "learning_rate": 6.843414318227486e-06, "loss": 1.3705, "mean_token_accuracy": 0.6453090657790502, "num_tokens": 2025164469.0, "step": 12076 }, { "entropy": 1.6984353959560394, "epoch": 1.326714454423114, "grad_norm": 0.7873267531394958, "learning_rate": 6.8419942439598445e-06, "loss": 1.5014, "mean_token_accuracy": 0.6472826500733694, "num_tokens": 2025344537.0, "step": 12077 }, { "entropy": 1.7276420692602794, "epoch": 1.3268243113344869, "grad_norm": 0.6308796405792236, "learning_rate": 6.8405743012912074e-06, "loss": 1.3326, "mean_token_accuracy": 0.6528024027744929, "num_tokens": 2025488514.0, "step": 12078 }, { "entropy": 1.7100607454776764, "epoch": 1.3269341682458597, "grad_norm": 0.6320695877075195, "learning_rate": 6.839154490266521e-06, "loss": 1.377, "mean_token_accuracy": 0.6606988708178202, "num_tokens": 2025632637.0, "step": 12079 }, { "entropy": 1.730517605940501, "epoch": 1.3270440251572326, "grad_norm": 0.9139355421066284, "learning_rate": 6.837734810930722e-06, "loss": 1.242, "mean_token_accuracy": 0.68810007472833, "num_tokens": 2025747502.0, "step": 12080 }, { "entropy": 1.695272147655487, "epoch": 1.3271538820686057, "grad_norm": 0.6435163021087646, "learning_rate": 6.836315263328737e-06, "loss": 1.4986, "mean_token_accuracy": 0.6545588473478953, "num_tokens": 2025962120.0, "step": 12081 }, { "entropy": 1.6857863465944927, "epoch": 1.3272637389799786, "grad_norm": 0.7062543630599976, "learning_rate": 6.834895847505496e-06, "loss": 1.3823, "mean_token_accuracy": 0.6642873833576838, "num_tokens": 2026100917.0, "step": 12082 }, { "entropy": 1.6795487602551777, "epoch": 1.3273735958913515, "grad_norm": 0.6519520282745361, "learning_rate": 6.833476563505934e-06, "loss": 1.454, "mean_token_accuracy": 0.6510899215936661, "num_tokens": 2026281990.0, "step": 12083 }, { "entropy": 1.7207885682582855, "epoch": 1.3274834528027244, "grad_norm": 0.5761967301368713, "learning_rate": 6.8320574113749535e-06, "loss": 1.4114, "mean_token_accuracy": 0.6519269794225693, "num_tokens": 2026492957.0, "step": 12084 }, { "entropy": 1.7370118896166484, "epoch": 1.3275933097140973, "grad_norm": 1.0227420330047607, "learning_rate": 6.830638391157478e-06, "loss": 1.3871, "mean_token_accuracy": 0.6556207984685898, "num_tokens": 2026647880.0, "step": 12085 }, { "entropy": 1.7096319099267323, "epoch": 1.3277031666254704, "grad_norm": 0.6812415719032288, "learning_rate": 6.829219502898421e-06, "loss": 1.3995, "mean_token_accuracy": 0.6833713253339132, "num_tokens": 2026809514.0, "step": 12086 }, { "entropy": 1.704438676436742, "epoch": 1.3278130235368433, "grad_norm": 0.7178927063941956, "learning_rate": 6.827800746642688e-06, "loss": 1.5393, "mean_token_accuracy": 0.6350291073322296, "num_tokens": 2027020403.0, "step": 12087 }, { "entropy": 1.6621620655059814, "epoch": 1.3279228804482162, "grad_norm": 0.7681390643119812, "learning_rate": 6.826382122435178e-06, "loss": 1.3886, "mean_token_accuracy": 0.6718294769525528, "num_tokens": 2027223142.0, "step": 12088 }, { "entropy": 1.6573795974254608, "epoch": 1.3280327373595893, "grad_norm": 0.6329140067100525, "learning_rate": 6.824963630320798e-06, "loss": 1.3725, "mean_token_accuracy": 0.6754241387049357, "num_tokens": 2027379460.0, "step": 12089 }, { "entropy": 1.6735499898592632, "epoch": 1.3281425942709622, "grad_norm": 0.780312180519104, "learning_rate": 6.823545270344432e-06, "loss": 1.3158, "mean_token_accuracy": 0.6761320730050405, "num_tokens": 2027511602.0, "step": 12090 }, { "entropy": 1.7244892517725627, "epoch": 1.328252451182335, "grad_norm": 0.796455442905426, "learning_rate": 6.822127042550983e-06, "loss": 1.2815, "mean_token_accuracy": 0.6747524440288544, "num_tokens": 2027691642.0, "step": 12091 }, { "entropy": 1.6727336744467418, "epoch": 1.328362308093708, "grad_norm": 0.5619511604309082, "learning_rate": 6.820708946985325e-06, "loss": 1.3715, "mean_token_accuracy": 0.6650692274173101, "num_tokens": 2027869176.0, "step": 12092 }, { "entropy": 1.6756927768389385, "epoch": 1.3284721650050808, "grad_norm": 0.6625829935073853, "learning_rate": 6.819290983692346e-06, "loss": 1.2637, "mean_token_accuracy": 0.6874003559350967, "num_tokens": 2028019901.0, "step": 12093 }, { "entropy": 1.6760556896527607, "epoch": 1.328582021916454, "grad_norm": 0.6881618499755859, "learning_rate": 6.817873152716925e-06, "loss": 1.41, "mean_token_accuracy": 0.6657233734925588, "num_tokens": 2028184898.0, "step": 12094 }, { "entropy": 1.7184965113798778, "epoch": 1.3286918788278268, "grad_norm": 0.6784566640853882, "learning_rate": 6.816455454103936e-06, "loss": 1.4383, "mean_token_accuracy": 0.6545319110155106, "num_tokens": 2028337020.0, "step": 12095 }, { "entropy": 1.6974100073178608, "epoch": 1.3288017357391997, "grad_norm": 0.623323380947113, "learning_rate": 6.815037887898243e-06, "loss": 1.5349, "mean_token_accuracy": 0.6560999403397242, "num_tokens": 2028506971.0, "step": 12096 }, { "entropy": 1.6983208358287811, "epoch": 1.3289115926505726, "grad_norm": 0.7683018445968628, "learning_rate": 6.813620454144718e-06, "loss": 1.3477, "mean_token_accuracy": 0.663595899939537, "num_tokens": 2028724476.0, "step": 12097 }, { "entropy": 1.701758086681366, "epoch": 1.3290214495619455, "grad_norm": 0.7559242248535156, "learning_rate": 6.812203152888216e-06, "loss": 1.3109, "mean_token_accuracy": 0.6690565794706345, "num_tokens": 2028875421.0, "step": 12098 }, { "entropy": 1.722754289706548, "epoch": 1.3291313064733186, "grad_norm": 0.6389537453651428, "learning_rate": 6.8107859841736e-06, "loss": 1.3986, "mean_token_accuracy": 0.6538368314504623, "num_tokens": 2029057890.0, "step": 12099 }, { "entropy": 1.7785523335138957, "epoch": 1.3292411633846914, "grad_norm": 0.7880353927612305, "learning_rate": 6.80936894804572e-06, "loss": 1.3391, "mean_token_accuracy": 0.6652881453434626, "num_tokens": 2029173437.0, "step": 12100 }, { "entropy": 1.6625278691450756, "epoch": 1.3293510202960643, "grad_norm": 0.715702474117279, "learning_rate": 6.807952044549422e-06, "loss": 1.2885, "mean_token_accuracy": 0.6675082196791967, "num_tokens": 2029315644.0, "step": 12101 }, { "entropy": 1.7816561063130696, "epoch": 1.3294608772074374, "grad_norm": 0.6899002194404602, "learning_rate": 6.806535273729551e-06, "loss": 1.5393, "mean_token_accuracy": 0.6315892537434896, "num_tokens": 2029493296.0, "step": 12102 }, { "entropy": 1.6816334029038746, "epoch": 1.3295707341188103, "grad_norm": 0.6617991328239441, "learning_rate": 6.8051186356309585e-06, "loss": 1.2528, "mean_token_accuracy": 0.6830165733893713, "num_tokens": 2029629625.0, "step": 12103 }, { "entropy": 1.750120351711909, "epoch": 1.3296805910301832, "grad_norm": 0.7644038796424866, "learning_rate": 6.803702130298462e-06, "loss": 1.3048, "mean_token_accuracy": 0.6720990637938181, "num_tokens": 2029736612.0, "step": 12104 }, { "entropy": 1.7711990475654602, "epoch": 1.329790447941556, "grad_norm": 0.712631106376648, "learning_rate": 6.802285757776903e-06, "loss": 1.4558, "mean_token_accuracy": 0.6478696018457413, "num_tokens": 2029871767.0, "step": 12105 }, { "entropy": 1.7491689622402191, "epoch": 1.329900304852929, "grad_norm": 0.6656632423400879, "learning_rate": 6.800869518111111e-06, "loss": 1.3455, "mean_token_accuracy": 0.6632706572612127, "num_tokens": 2030058925.0, "step": 12106 }, { "entropy": 1.7960249086221058, "epoch": 1.330010161764302, "grad_norm": 0.6363185048103333, "learning_rate": 6.7994534113459075e-06, "loss": 1.5365, "mean_token_accuracy": 0.61135300497214, "num_tokens": 2030274330.0, "step": 12107 }, { "entropy": 1.631099820137024, "epoch": 1.330120018675675, "grad_norm": 0.7717340588569641, "learning_rate": 6.798037437526106e-06, "loss": 1.2993, "mean_token_accuracy": 0.6846882502237955, "num_tokens": 2030417966.0, "step": 12108 }, { "entropy": 1.6861089169979095, "epoch": 1.3302298755870479, "grad_norm": 0.8972033262252808, "learning_rate": 6.796621596696531e-06, "loss": 1.7368, "mean_token_accuracy": 0.6355453704794248, "num_tokens": 2030588343.0, "step": 12109 }, { "entropy": 1.7588840822378795, "epoch": 1.3303397324984207, "grad_norm": 0.6780613660812378, "learning_rate": 6.795205888901984e-06, "loss": 1.4806, "mean_token_accuracy": 0.6419356018304825, "num_tokens": 2030766964.0, "step": 12110 }, { "entropy": 1.7469553053379059, "epoch": 1.3304495894097936, "grad_norm": 0.6792296767234802, "learning_rate": 6.793790314187281e-06, "loss": 1.3987, "mean_token_accuracy": 0.6516177902619044, "num_tokens": 2030931496.0, "step": 12111 }, { "entropy": 1.6458273430665333, "epoch": 1.3305594463211667, "grad_norm": 0.8105985522270203, "learning_rate": 6.792374872597217e-06, "loss": 1.2897, "mean_token_accuracy": 0.6808892091115316, "num_tokens": 2031079780.0, "step": 12112 }, { "entropy": 1.6807252566019695, "epoch": 1.3306693032325396, "grad_norm": 0.7525298595428467, "learning_rate": 6.79095956417659e-06, "loss": 1.3477, "mean_token_accuracy": 0.6601622154315313, "num_tokens": 2031305389.0, "step": 12113 }, { "entropy": 1.751152257124583, "epoch": 1.3307791601439125, "grad_norm": 1.0267671346664429, "learning_rate": 6.789544388970196e-06, "loss": 1.4263, "mean_token_accuracy": 0.6512612501780192, "num_tokens": 2031449086.0, "step": 12114 }, { "entropy": 1.6886617640654247, "epoch": 1.3308890170552856, "grad_norm": 0.766525149345398, "learning_rate": 6.788129347022832e-06, "loss": 1.2504, "mean_token_accuracy": 0.6706090221802393, "num_tokens": 2031571554.0, "step": 12115 }, { "entropy": 1.7444157203038533, "epoch": 1.3309988739666585, "grad_norm": 0.7540661692619324, "learning_rate": 6.786714438379269e-06, "loss": 1.3464, "mean_token_accuracy": 0.6591867109139761, "num_tokens": 2031697222.0, "step": 12116 }, { "entropy": 1.717256059249242, "epoch": 1.3311087308780314, "grad_norm": 0.7085712552070618, "learning_rate": 6.7852996630842936e-06, "loss": 1.4093, "mean_token_accuracy": 0.6617002884546915, "num_tokens": 2031842284.0, "step": 12117 }, { "entropy": 1.6935730675856273, "epoch": 1.3312185877894043, "grad_norm": 0.7506963014602661, "learning_rate": 6.7838850211826925e-06, "loss": 1.5524, "mean_token_accuracy": 0.6330506006876627, "num_tokens": 2032029021.0, "step": 12118 }, { "entropy": 1.6292231281598408, "epoch": 1.3313284447007772, "grad_norm": 0.6500567197799683, "learning_rate": 6.782470512719227e-06, "loss": 1.4116, "mean_token_accuracy": 0.6551267306009928, "num_tokens": 2032225231.0, "step": 12119 }, { "entropy": 1.7364420294761658, "epoch": 1.3314383016121503, "grad_norm": 0.6082978248596191, "learning_rate": 6.781056137738667e-06, "loss": 1.3851, "mean_token_accuracy": 0.6635348598162333, "num_tokens": 2032400791.0, "step": 12120 }, { "entropy": 1.6161598761876423, "epoch": 1.3315481585235232, "grad_norm": 0.612918496131897, "learning_rate": 6.779641896285783e-06, "loss": 1.3149, "mean_token_accuracy": 0.6781648695468903, "num_tokens": 2032538191.0, "step": 12121 }, { "entropy": 1.726882795492808, "epoch": 1.331658015434896, "grad_norm": 0.7016002535820007, "learning_rate": 6.778227788405325e-06, "loss": 1.6569, "mean_token_accuracy": 0.6355594048897425, "num_tokens": 2032731160.0, "step": 12122 }, { "entropy": 1.6399606863657634, "epoch": 1.331767872346269, "grad_norm": 0.8239472508430481, "learning_rate": 6.776813814142062e-06, "loss": 1.2364, "mean_token_accuracy": 0.6827895094950994, "num_tokens": 2032887536.0, "step": 12123 }, { "entropy": 1.7254582345485687, "epoch": 1.3318777292576418, "grad_norm": 0.6165110468864441, "learning_rate": 6.7753999735407375e-06, "loss": 1.4148, "mean_token_accuracy": 0.6540853381156921, "num_tokens": 2033034011.0, "step": 12124 }, { "entropy": 1.6723759472370148, "epoch": 1.331987586169015, "grad_norm": 0.7529140710830688, "learning_rate": 6.773986266646098e-06, "loss": 1.4075, "mean_token_accuracy": 0.66306305428346, "num_tokens": 2033173974.0, "step": 12125 }, { "entropy": 1.6746163368225098, "epoch": 1.3320974430803878, "grad_norm": 0.6699461936950684, "learning_rate": 6.772572693502887e-06, "loss": 1.4146, "mean_token_accuracy": 0.6633772750695547, "num_tokens": 2033318698.0, "step": 12126 }, { "entropy": 1.6714819769064586, "epoch": 1.3322072999917607, "grad_norm": 0.8034442663192749, "learning_rate": 6.771159254155853e-06, "loss": 1.3074, "mean_token_accuracy": 0.6753875811894735, "num_tokens": 2033446101.0, "step": 12127 }, { "entropy": 1.644019901752472, "epoch": 1.3323171569031338, "grad_norm": 0.5550585985183716, "learning_rate": 6.769745948649717e-06, "loss": 1.3786, "mean_token_accuracy": 0.6502327471971512, "num_tokens": 2033648528.0, "step": 12128 }, { "entropy": 1.7570864657560985, "epoch": 1.3324270138145067, "grad_norm": 0.7444114685058594, "learning_rate": 6.768332777029214e-06, "loss": 1.5661, "mean_token_accuracy": 0.6311159779628118, "num_tokens": 2033809795.0, "step": 12129 }, { "entropy": 1.702331284681956, "epoch": 1.3325368707258796, "grad_norm": 0.6505382061004639, "learning_rate": 6.766919739339076e-06, "loss": 1.4109, "mean_token_accuracy": 0.6532570620377859, "num_tokens": 2033966956.0, "step": 12130 }, { "entropy": 1.6757369736830394, "epoch": 1.3326467276372524, "grad_norm": 0.6705249547958374, "learning_rate": 6.76550683562402e-06, "loss": 1.3289, "mean_token_accuracy": 0.6670956462621689, "num_tokens": 2034141052.0, "step": 12131 }, { "entropy": 1.7126195033391316, "epoch": 1.3327565845486253, "grad_norm": 0.7784457802772522, "learning_rate": 6.764094065928762e-06, "loss": 1.3657, "mean_token_accuracy": 0.6621719797452291, "num_tokens": 2034413616.0, "step": 12132 }, { "entropy": 1.6480099658171337, "epoch": 1.3328664414599984, "grad_norm": 0.634157657623291, "learning_rate": 6.762681430298021e-06, "loss": 1.5479, "mean_token_accuracy": 0.6490356723467509, "num_tokens": 2034615609.0, "step": 12133 }, { "entropy": 1.7340960800647736, "epoch": 1.3329762983713713, "grad_norm": 0.7252094149589539, "learning_rate": 6.7612689287764996e-06, "loss": 1.3478, "mean_token_accuracy": 0.6707275907198588, "num_tokens": 2034723645.0, "step": 12134 }, { "entropy": 1.6428366204102833, "epoch": 1.3330861552827442, "grad_norm": 0.7468758225440979, "learning_rate": 6.759856561408912e-06, "loss": 1.3562, "mean_token_accuracy": 0.659113829334577, "num_tokens": 2034890797.0, "step": 12135 }, { "entropy": 1.743541826804479, "epoch": 1.333196012194117, "grad_norm": 0.7921319603919983, "learning_rate": 6.758444328239951e-06, "loss": 1.2967, "mean_token_accuracy": 0.6673512558142344, "num_tokens": 2035037394.0, "step": 12136 }, { "entropy": 1.7060744762420654, "epoch": 1.33330586910549, "grad_norm": 0.8504371047019958, "learning_rate": 6.757032229314314e-06, "loss": 1.2957, "mean_token_accuracy": 0.6720158954461416, "num_tokens": 2035148382.0, "step": 12137 }, { "entropy": 1.660773108402888, "epoch": 1.333415726016863, "grad_norm": 0.6613491773605347, "learning_rate": 6.7556202646766955e-06, "loss": 1.2584, "mean_token_accuracy": 0.6726939876874288, "num_tokens": 2035265813.0, "step": 12138 }, { "entropy": 1.7064545849959056, "epoch": 1.333525582928236, "grad_norm": 0.7123542428016663, "learning_rate": 6.7542084343717885e-06, "loss": 1.407, "mean_token_accuracy": 0.6450283875068029, "num_tokens": 2035442166.0, "step": 12139 }, { "entropy": 1.759802907705307, "epoch": 1.3336354398396089, "grad_norm": 0.6230959296226501, "learning_rate": 6.752796738444265e-06, "loss": 1.4219, "mean_token_accuracy": 0.640018438299497, "num_tokens": 2035628368.0, "step": 12140 }, { "entropy": 1.6767445107301076, "epoch": 1.333745296750982, "grad_norm": 0.7107008695602417, "learning_rate": 6.7513851769388105e-06, "loss": 1.5074, "mean_token_accuracy": 0.6424607733885447, "num_tokens": 2035852613.0, "step": 12141 }, { "entropy": 1.6929062108198802, "epoch": 1.3338551536623549, "grad_norm": 0.8039124011993408, "learning_rate": 6.749973749900104e-06, "loss": 1.4325, "mean_token_accuracy": 0.6694301267464956, "num_tokens": 2036027820.0, "step": 12142 }, { "entropy": 1.695325791835785, "epoch": 1.3339650105737277, "grad_norm": 0.7455415725708008, "learning_rate": 6.748562457372814e-06, "loss": 1.599, "mean_token_accuracy": 0.6340650320053101, "num_tokens": 2036191571.0, "step": 12143 }, { "entropy": 1.6283740599950154, "epoch": 1.3340748674851006, "grad_norm": 0.597327709197998, "learning_rate": 6.747151299401602e-06, "loss": 1.4393, "mean_token_accuracy": 0.6598650167385737, "num_tokens": 2036409052.0, "step": 12144 }, { "entropy": 1.70907461643219, "epoch": 1.3341847243964735, "grad_norm": 0.7348134517669678, "learning_rate": 6.74574027603114e-06, "loss": 1.3319, "mean_token_accuracy": 0.6699612587690353, "num_tokens": 2036548176.0, "step": 12145 }, { "entropy": 1.750954935948054, "epoch": 1.3342945813078466, "grad_norm": 0.6614691019058228, "learning_rate": 6.744329387306077e-06, "loss": 1.5194, "mean_token_accuracy": 0.6467989534139633, "num_tokens": 2036787561.0, "step": 12146 }, { "entropy": 1.7040914098421733, "epoch": 1.3344044382192195, "grad_norm": 0.8070111274719238, "learning_rate": 6.742918633271074e-06, "loss": 1.5117, "mean_token_accuracy": 0.6465377608935038, "num_tokens": 2036938048.0, "step": 12147 }, { "entropy": 1.744322548309962, "epoch": 1.3345142951305924, "grad_norm": 0.7317628264427185, "learning_rate": 6.741508013970779e-06, "loss": 1.3279, "mean_token_accuracy": 0.6692080895105997, "num_tokens": 2037071722.0, "step": 12148 }, { "entropy": 1.7309604982535045, "epoch": 1.3346241520419653, "grad_norm": 0.6659313440322876, "learning_rate": 6.740097529449833e-06, "loss": 1.4134, "mean_token_accuracy": 0.6467402577400208, "num_tokens": 2037281053.0, "step": 12149 }, { "entropy": 1.7567891379197438, "epoch": 1.3347340089533382, "grad_norm": 0.6446396112442017, "learning_rate": 6.7386871797528816e-06, "loss": 1.38, "mean_token_accuracy": 0.6591022710005442, "num_tokens": 2037409299.0, "step": 12150 }, { "entropy": 1.7695753872394562, "epoch": 1.3348438658647113, "grad_norm": 0.8622597455978394, "learning_rate": 6.737276964924564e-06, "loss": 1.4295, "mean_token_accuracy": 0.665856863061587, "num_tokens": 2037543858.0, "step": 12151 }, { "entropy": 1.6813296675682068, "epoch": 1.3349537227760842, "grad_norm": 0.711685299873352, "learning_rate": 6.735866885009506e-06, "loss": 1.2672, "mean_token_accuracy": 0.6845654745896658, "num_tokens": 2037666476.0, "step": 12152 }, { "entropy": 1.6936272382736206, "epoch": 1.335063579687457, "grad_norm": 0.6655703783035278, "learning_rate": 6.7344569400523404e-06, "loss": 1.348, "mean_token_accuracy": 0.670586441953977, "num_tokens": 2037839539.0, "step": 12153 }, { "entropy": 1.659144659837087, "epoch": 1.3351734365988301, "grad_norm": 0.6707544922828674, "learning_rate": 6.733047130097689e-06, "loss": 1.3097, "mean_token_accuracy": 0.670238604148229, "num_tokens": 2037978288.0, "step": 12154 }, { "entropy": 1.6983022093772888, "epoch": 1.335283293510203, "grad_norm": 0.5423887968063354, "learning_rate": 6.731637455190177e-06, "loss": 1.4855, "mean_token_accuracy": 0.6436324616273245, "num_tokens": 2038175684.0, "step": 12155 }, { "entropy": 1.6970913509527843, "epoch": 1.335393150421576, "grad_norm": 0.6358638405799866, "learning_rate": 6.730227915374414e-06, "loss": 1.3425, "mean_token_accuracy": 0.6590030988057455, "num_tokens": 2038309359.0, "step": 12156 }, { "entropy": 1.68005574742953, "epoch": 1.3355030073329488, "grad_norm": 0.6394631862640381, "learning_rate": 6.728818510695012e-06, "loss": 1.4228, "mean_token_accuracy": 0.6577940632899603, "num_tokens": 2038463898.0, "step": 12157 }, { "entropy": 1.7448046207427979, "epoch": 1.3356128642443217, "grad_norm": 0.677924394607544, "learning_rate": 6.7274092411965795e-06, "loss": 1.3776, "mean_token_accuracy": 0.6486985584100088, "num_tokens": 2038682056.0, "step": 12158 }, { "entropy": 1.706708659728368, "epoch": 1.3357227211556948, "grad_norm": 0.6977136731147766, "learning_rate": 6.7260001069237265e-06, "loss": 1.4341, "mean_token_accuracy": 0.650297815601031, "num_tokens": 2038849719.0, "step": 12159 }, { "entropy": 1.7140244444211323, "epoch": 1.3358325780670677, "grad_norm": 0.6504734754562378, "learning_rate": 6.7245911079210365e-06, "loss": 1.4013, "mean_token_accuracy": 0.6597279409567515, "num_tokens": 2039024559.0, "step": 12160 }, { "entropy": 1.7537720998128254, "epoch": 1.3359424349784406, "grad_norm": 0.6768696308135986, "learning_rate": 6.723182244233111e-06, "loss": 1.4743, "mean_token_accuracy": 0.6464989334344864, "num_tokens": 2039186877.0, "step": 12161 }, { "entropy": 1.7301738758881886, "epoch": 1.3360522918898134, "grad_norm": 0.8379589915275574, "learning_rate": 6.7217735159045434e-06, "loss": 1.301, "mean_token_accuracy": 0.6735063940286636, "num_tokens": 2039339067.0, "step": 12162 }, { "entropy": 1.6896904309590657, "epoch": 1.3361621488011863, "grad_norm": 0.7300477623939514, "learning_rate": 6.720364922979918e-06, "loss": 1.3683, "mean_token_accuracy": 0.6638319989045461, "num_tokens": 2039499002.0, "step": 12163 }, { "entropy": 1.7401621341705322, "epoch": 1.3362720057125594, "grad_norm": 0.814299464225769, "learning_rate": 6.71895646550381e-06, "loss": 1.5958, "mean_token_accuracy": 0.6319139301776886, "num_tokens": 2039683860.0, "step": 12164 }, { "entropy": 1.7155976593494415, "epoch": 1.3363818626239323, "grad_norm": 0.614666759967804, "learning_rate": 6.7175481435208045e-06, "loss": 1.4577, "mean_token_accuracy": 0.6343758553266525, "num_tokens": 2039861354.0, "step": 12165 }, { "entropy": 1.6503975987434387, "epoch": 1.3364917195353052, "grad_norm": 0.6941211223602295, "learning_rate": 6.716139957075466e-06, "loss": 1.262, "mean_token_accuracy": 0.669905404249827, "num_tokens": 2039997906.0, "step": 12166 }, { "entropy": 1.7294440964857738, "epoch": 1.3366015764466783, "grad_norm": 0.860774040222168, "learning_rate": 6.71473190621237e-06, "loss": 1.4486, "mean_token_accuracy": 0.6619910101095835, "num_tokens": 2040154720.0, "step": 12167 }, { "entropy": 1.7141015529632568, "epoch": 1.3367114333580512, "grad_norm": 0.6437616348266602, "learning_rate": 6.7133239909760815e-06, "loss": 1.3718, "mean_token_accuracy": 0.6447582989931107, "num_tokens": 2040342386.0, "step": 12168 }, { "entropy": 1.7256827255090077, "epoch": 1.336821290269424, "grad_norm": 0.8925187587738037, "learning_rate": 6.711916211411151e-06, "loss": 1.5247, "mean_token_accuracy": 0.6567995399236679, "num_tokens": 2040464956.0, "step": 12169 }, { "entropy": 1.6982427140076954, "epoch": 1.336931147180797, "grad_norm": 0.709115207195282, "learning_rate": 6.710508567562142e-06, "loss": 1.3701, "mean_token_accuracy": 0.6482947717110316, "num_tokens": 2040624818.0, "step": 12170 }, { "entropy": 1.7100390096505482, "epoch": 1.3370410040921699, "grad_norm": 0.6098870038986206, "learning_rate": 6.7091010594736096e-06, "loss": 1.2835, "mean_token_accuracy": 0.6711164067188898, "num_tokens": 2040777685.0, "step": 12171 }, { "entropy": 1.7272491256395976, "epoch": 1.337150861003543, "grad_norm": 0.6299756169319153, "learning_rate": 6.7076936871900876e-06, "loss": 1.3556, "mean_token_accuracy": 0.6612934718529383, "num_tokens": 2040945251.0, "step": 12172 }, { "entropy": 1.7412489652633667, "epoch": 1.3372607179149159, "grad_norm": 0.6856157183647156, "learning_rate": 6.706286450756129e-06, "loss": 1.2422, "mean_token_accuracy": 0.6719231804211935, "num_tokens": 2041059005.0, "step": 12173 }, { "entropy": 1.6607161959012349, "epoch": 1.3373705748262887, "grad_norm": 0.6099095940589905, "learning_rate": 6.70487935021627e-06, "loss": 1.3921, "mean_token_accuracy": 0.6561809430519739, "num_tokens": 2041292263.0, "step": 12174 }, { "entropy": 1.661214272181193, "epoch": 1.3374804317376616, "grad_norm": 0.6299693584442139, "learning_rate": 6.703472385615045e-06, "loss": 1.2867, "mean_token_accuracy": 0.6732292920351028, "num_tokens": 2041428050.0, "step": 12175 }, { "entropy": 1.7231532831986744, "epoch": 1.3375902886490345, "grad_norm": 0.7081091403961182, "learning_rate": 6.7020655569969795e-06, "loss": 1.3649, "mean_token_accuracy": 0.6584912836551666, "num_tokens": 2041583936.0, "step": 12176 }, { "entropy": 1.7170475920041401, "epoch": 1.3377001455604076, "grad_norm": 0.7208677530288696, "learning_rate": 6.700658864406607e-06, "loss": 1.3288, "mean_token_accuracy": 0.6651655087868372, "num_tokens": 2041738235.0, "step": 12177 }, { "entropy": 1.6539806723594666, "epoch": 1.3378100024717805, "grad_norm": 0.5469607710838318, "learning_rate": 6.69925230788844e-06, "loss": 1.3708, "mean_token_accuracy": 0.6626506249109904, "num_tokens": 2041949745.0, "step": 12178 }, { "entropy": 1.6960765818754833, "epoch": 1.3379198593831534, "grad_norm": 0.6942289471626282, "learning_rate": 6.697845887487002e-06, "loss": 1.3427, "mean_token_accuracy": 0.666606068611145, "num_tokens": 2042104158.0, "step": 12179 }, { "entropy": 1.7386765678723652, "epoch": 1.3380297162945265, "grad_norm": 0.7499749064445496, "learning_rate": 6.696439603246805e-06, "loss": 1.355, "mean_token_accuracy": 0.6540759851535162, "num_tokens": 2042257471.0, "step": 12180 }, { "entropy": 1.6771236062049866, "epoch": 1.3381395732058994, "grad_norm": 0.6517854928970337, "learning_rate": 6.69503345521235e-06, "loss": 1.4844, "mean_token_accuracy": 0.6465798964103063, "num_tokens": 2042424318.0, "step": 12181 }, { "entropy": 1.6540433168411255, "epoch": 1.3382494301172723, "grad_norm": 0.7354924082756042, "learning_rate": 6.693627443428146e-06, "loss": 1.2878, "mean_token_accuracy": 0.6684385339419047, "num_tokens": 2042554398.0, "step": 12182 }, { "entropy": 1.7346096734205882, "epoch": 1.3383592870286452, "grad_norm": 0.6490511894226074, "learning_rate": 6.6922215679387014e-06, "loss": 1.4537, "mean_token_accuracy": 0.6384791831175486, "num_tokens": 2042753232.0, "step": 12183 }, { "entropy": 1.6197856763998668, "epoch": 1.338469143940018, "grad_norm": 0.6182257533073425, "learning_rate": 6.690815828788495e-06, "loss": 1.3176, "mean_token_accuracy": 0.6738310505946478, "num_tokens": 2042931082.0, "step": 12184 }, { "entropy": 1.6874169707298279, "epoch": 1.3385790008513911, "grad_norm": 0.6227688193321228, "learning_rate": 6.6894102260220266e-06, "loss": 1.3646, "mean_token_accuracy": 0.6637938221295675, "num_tokens": 2043074729.0, "step": 12185 }, { "entropy": 1.7578496237595875, "epoch": 1.338688857762764, "grad_norm": 0.6879784464836121, "learning_rate": 6.688004759683784e-06, "loss": 1.4202, "mean_token_accuracy": 0.6445033997297287, "num_tokens": 2043229062.0, "step": 12186 }, { "entropy": 1.7419428924719493, "epoch": 1.338798714674137, "grad_norm": 0.6606770157814026, "learning_rate": 6.68659942981825e-06, "loss": 1.3376, "mean_token_accuracy": 0.6617419819037119, "num_tokens": 2043347488.0, "step": 12187 }, { "entropy": 1.7223469018936157, "epoch": 1.3389085715855098, "grad_norm": 0.6497092247009277, "learning_rate": 6.685194236469896e-06, "loss": 1.5492, "mean_token_accuracy": 0.6567690620819727, "num_tokens": 2043535592.0, "step": 12188 }, { "entropy": 1.748223255077998, "epoch": 1.3390184284968827, "grad_norm": 1.2254005670547485, "learning_rate": 6.683789179683203e-06, "loss": 1.4608, "mean_token_accuracy": 0.6514624655246735, "num_tokens": 2043684053.0, "step": 12189 }, { "entropy": 1.6679157416025798, "epoch": 1.3391282854082558, "grad_norm": 0.5853722095489502, "learning_rate": 6.682384259502635e-06, "loss": 1.3211, "mean_token_accuracy": 0.6666586250066757, "num_tokens": 2043841336.0, "step": 12190 }, { "entropy": 1.6924639145533245, "epoch": 1.3392381423196287, "grad_norm": 0.7584408521652222, "learning_rate": 6.680979475972664e-06, "loss": 1.3216, "mean_token_accuracy": 0.6670918017625809, "num_tokens": 2043997000.0, "step": 12191 }, { "entropy": 1.7606268525123596, "epoch": 1.3393479992310016, "grad_norm": 0.7066436409950256, "learning_rate": 6.679574829137744e-06, "loss": 1.4732, "mean_token_accuracy": 0.6344574143489202, "num_tokens": 2044206455.0, "step": 12192 }, { "entropy": 1.7649494012196858, "epoch": 1.3394578561423747, "grad_norm": 0.7756333947181702, "learning_rate": 6.678170319042332e-06, "loss": 1.5191, "mean_token_accuracy": 0.6414674917856852, "num_tokens": 2044379695.0, "step": 12193 }, { "entropy": 1.7187654276688893, "epoch": 1.3395677130537476, "grad_norm": 0.7068918347358704, "learning_rate": 6.676765945730881e-06, "loss": 1.4654, "mean_token_accuracy": 0.6458430662751198, "num_tokens": 2044525734.0, "step": 12194 }, { "entropy": 1.7495672305425007, "epoch": 1.3396775699651204, "grad_norm": 0.6518183350563049, "learning_rate": 6.675361709247847e-06, "loss": 1.4416, "mean_token_accuracy": 0.6462061703205109, "num_tokens": 2044703188.0, "step": 12195 }, { "entropy": 1.7593078414599101, "epoch": 1.3397874268764933, "grad_norm": 0.8291671276092529, "learning_rate": 6.673957609637659e-06, "loss": 1.2821, "mean_token_accuracy": 0.6755085190137228, "num_tokens": 2044844945.0, "step": 12196 }, { "entropy": 1.6297094126542409, "epoch": 1.3398972837878662, "grad_norm": 0.6503934264183044, "learning_rate": 6.672553646944764e-06, "loss": 1.5341, "mean_token_accuracy": 0.6331368734439214, "num_tokens": 2045065342.0, "step": 12197 }, { "entropy": 1.730812023083369, "epoch": 1.3400071406992393, "grad_norm": 0.6852719187736511, "learning_rate": 6.6711498212135994e-06, "loss": 1.3495, "mean_token_accuracy": 0.6606137305498123, "num_tokens": 2045262146.0, "step": 12198 }, { "entropy": 1.6728038688500722, "epoch": 1.3401169976106122, "grad_norm": 0.6521802544593811, "learning_rate": 6.669746132488591e-06, "loss": 1.3743, "mean_token_accuracy": 0.6557741363843282, "num_tokens": 2045501023.0, "step": 12199 }, { "entropy": 1.7133564253648121, "epoch": 1.340226854521985, "grad_norm": 0.8919302821159363, "learning_rate": 6.668342580814165e-06, "loss": 1.3303, "mean_token_accuracy": 0.6722137182950974, "num_tokens": 2045671599.0, "step": 12200 }, { "entropy": 1.776214490334193, "epoch": 1.340336711433358, "grad_norm": 0.8077658414840698, "learning_rate": 6.666939166234747e-06, "loss": 1.4297, "mean_token_accuracy": 0.6601533641417822, "num_tokens": 2045889947.0, "step": 12201 }, { "entropy": 1.7369691332181294, "epoch": 1.3404465683447309, "grad_norm": 0.6759545207023621, "learning_rate": 6.665535888794748e-06, "loss": 1.2795, "mean_token_accuracy": 0.664157842596372, "num_tokens": 2046039114.0, "step": 12202 }, { "entropy": 1.7115701337655385, "epoch": 1.340556425256104, "grad_norm": 0.8110707402229309, "learning_rate": 6.664132748538588e-06, "loss": 1.3392, "mean_token_accuracy": 0.6724221408367157, "num_tokens": 2046210860.0, "step": 12203 }, { "entropy": 1.6262700359026592, "epoch": 1.3406662821674769, "grad_norm": 0.6628835201263428, "learning_rate": 6.662729745510674e-06, "loss": 1.1864, "mean_token_accuracy": 0.6867125034332275, "num_tokens": 2046348059.0, "step": 12204 }, { "entropy": 1.7018965184688568, "epoch": 1.3407761390788497, "grad_norm": 0.8943867683410645, "learning_rate": 6.661326879755403e-06, "loss": 1.2828, "mean_token_accuracy": 0.6711592872937521, "num_tokens": 2046465860.0, "step": 12205 }, { "entropy": 1.7263304789861043, "epoch": 1.3408859959902228, "grad_norm": 0.6487671136856079, "learning_rate": 6.659924151317184e-06, "loss": 1.3756, "mean_token_accuracy": 0.6614161481459936, "num_tokens": 2046623387.0, "step": 12206 }, { "entropy": 1.6342344085375469, "epoch": 1.3409958529015957, "grad_norm": 0.5863806009292603, "learning_rate": 6.658521560240416e-06, "loss": 1.4543, "mean_token_accuracy": 0.6475448707739512, "num_tokens": 2046836646.0, "step": 12207 }, { "entropy": 1.6563426355520885, "epoch": 1.3411057098129686, "grad_norm": 0.7072530388832092, "learning_rate": 6.657119106569477e-06, "loss": 1.2196, "mean_token_accuracy": 0.6976286470890045, "num_tokens": 2046975202.0, "step": 12208 }, { "entropy": 1.7593169311682384, "epoch": 1.3412155667243415, "grad_norm": 0.64771568775177, "learning_rate": 6.655716790348763e-06, "loss": 1.5249, "mean_token_accuracy": 0.6462835719188055, "num_tokens": 2047176210.0, "step": 12209 }, { "entropy": 1.7476372917493184, "epoch": 1.3413254236357144, "grad_norm": 0.6780056953430176, "learning_rate": 6.654314611622656e-06, "loss": 1.3844, "mean_token_accuracy": 0.6559838354587555, "num_tokens": 2047355243.0, "step": 12210 }, { "entropy": 1.7375625570615132, "epoch": 1.3414352805470875, "grad_norm": 0.6932122707366943, "learning_rate": 6.652912570435536e-06, "loss": 1.2896, "mean_token_accuracy": 0.6642061273256937, "num_tokens": 2047471435.0, "step": 12211 }, { "entropy": 1.7208791573842366, "epoch": 1.3415451374584604, "grad_norm": 0.6981452107429504, "learning_rate": 6.651510666831772e-06, "loss": 1.3985, "mean_token_accuracy": 0.6554392377535502, "num_tokens": 2047653436.0, "step": 12212 }, { "entropy": 1.703304221232732, "epoch": 1.3416549943698333, "grad_norm": 0.7472629547119141, "learning_rate": 6.650108900855734e-06, "loss": 1.405, "mean_token_accuracy": 0.662998785575231, "num_tokens": 2047810436.0, "step": 12213 }, { "entropy": 1.6598742206891377, "epoch": 1.3417648512812062, "grad_norm": 0.6906598806381226, "learning_rate": 6.6487072725517874e-06, "loss": 1.3876, "mean_token_accuracy": 0.669889286160469, "num_tokens": 2047947809.0, "step": 12214 }, { "entropy": 1.7231373190879822, "epoch": 1.341874708192579, "grad_norm": 0.7631545662879944, "learning_rate": 6.647305781964304e-06, "loss": 1.4282, "mean_token_accuracy": 0.665252481897672, "num_tokens": 2048165206.0, "step": 12215 }, { "entropy": 1.693508545557658, "epoch": 1.3419845651039521, "grad_norm": 0.6549314260482788, "learning_rate": 6.645904429137622e-06, "loss": 1.3312, "mean_token_accuracy": 0.6650498857100805, "num_tokens": 2048331616.0, "step": 12216 }, { "entropy": 1.6807125707467396, "epoch": 1.342094422015325, "grad_norm": 0.6334341168403625, "learning_rate": 6.644503214116105e-06, "loss": 1.5014, "mean_token_accuracy": 0.6361768593390783, "num_tokens": 2048541247.0, "step": 12217 }, { "entropy": 1.7305749952793121, "epoch": 1.342204278926698, "grad_norm": 0.7204241752624512, "learning_rate": 6.6431021369441005e-06, "loss": 1.3633, "mean_token_accuracy": 0.6571338723103205, "num_tokens": 2048708995.0, "step": 12218 }, { "entropy": 1.688479075829188, "epoch": 1.342314135838071, "grad_norm": 0.65947026014328, "learning_rate": 6.64170119766595e-06, "loss": 1.4483, "mean_token_accuracy": 0.6561633894840876, "num_tokens": 2048890366.0, "step": 12219 }, { "entropy": 1.669929713010788, "epoch": 1.342423992749444, "grad_norm": 0.702551007270813, "learning_rate": 6.640300396325991e-06, "loss": 1.3, "mean_token_accuracy": 0.6672399689753851, "num_tokens": 2049007667.0, "step": 12220 }, { "entropy": 1.7009166479110718, "epoch": 1.3425338496608168, "grad_norm": 0.6616597175598145, "learning_rate": 6.638899732968562e-06, "loss": 1.3765, "mean_token_accuracy": 0.6643947462240855, "num_tokens": 2049182073.0, "step": 12221 }, { "entropy": 1.6636148790518444, "epoch": 1.3426437065721897, "grad_norm": 0.6758390665054321, "learning_rate": 6.637499207637988e-06, "loss": 1.2797, "mean_token_accuracy": 0.686865970492363, "num_tokens": 2049322365.0, "step": 12222 }, { "entropy": 1.729952871799469, "epoch": 1.3427535634835626, "grad_norm": 0.6957722902297974, "learning_rate": 6.636098820378603e-06, "loss": 1.4878, "mean_token_accuracy": 0.6524218966563543, "num_tokens": 2049500723.0, "step": 12223 }, { "entropy": 1.68058975537618, "epoch": 1.3428634203949357, "grad_norm": 0.677686870098114, "learning_rate": 6.6346985712347215e-06, "loss": 1.2679, "mean_token_accuracy": 0.6779388835032781, "num_tokens": 2049657923.0, "step": 12224 }, { "entropy": 1.629438002904256, "epoch": 1.3429732773063086, "grad_norm": 0.7106833457946777, "learning_rate": 6.633298460250661e-06, "loss": 1.363, "mean_token_accuracy": 0.6710018614927927, "num_tokens": 2049895914.0, "step": 12225 }, { "entropy": 1.692349870999654, "epoch": 1.3430831342176814, "grad_norm": 0.6746785044670105, "learning_rate": 6.631898487470736e-06, "loss": 1.4901, "mean_token_accuracy": 0.6519258220990499, "num_tokens": 2050061436.0, "step": 12226 }, { "entropy": 1.7352541486422222, "epoch": 1.3431929911290543, "grad_norm": 0.6847538948059082, "learning_rate": 6.630498652939263e-06, "loss": 1.4904, "mean_token_accuracy": 0.6462793598572413, "num_tokens": 2050247189.0, "step": 12227 }, { "entropy": 1.697327196598053, "epoch": 1.3433028480404272, "grad_norm": 0.9095365405082703, "learning_rate": 6.6290989567005325e-06, "loss": 1.8281, "mean_token_accuracy": 0.6107906103134155, "num_tokens": 2050449478.0, "step": 12228 }, { "entropy": 1.6943837304910023, "epoch": 1.3434127049518003, "grad_norm": 0.7068896293640137, "learning_rate": 6.627699398798849e-06, "loss": 1.3645, "mean_token_accuracy": 0.6637044797341028, "num_tokens": 2050599011.0, "step": 12229 }, { "entropy": 1.7469304104646046, "epoch": 1.3435225618631732, "grad_norm": 0.6389604210853577, "learning_rate": 6.626299979278514e-06, "loss": 1.3339, "mean_token_accuracy": 0.6636842538913091, "num_tokens": 2050749646.0, "step": 12230 }, { "entropy": 1.7814926008383434, "epoch": 1.343632418774546, "grad_norm": 0.6876501441001892, "learning_rate": 6.6249006981838134e-06, "loss": 1.4588, "mean_token_accuracy": 0.6435671746730804, "num_tokens": 2050899001.0, "step": 12231 }, { "entropy": 1.6366635859012604, "epoch": 1.3437422756859192, "grad_norm": 0.6466518044471741, "learning_rate": 6.623501555559031e-06, "loss": 1.2687, "mean_token_accuracy": 0.6803223540385565, "num_tokens": 2051066927.0, "step": 12232 }, { "entropy": 1.6806427439053853, "epoch": 1.343852132597292, "grad_norm": 0.6225482225418091, "learning_rate": 6.622102551448456e-06, "loss": 1.2683, "mean_token_accuracy": 0.6693209211031595, "num_tokens": 2051200497.0, "step": 12233 }, { "entropy": 1.7055828273296356, "epoch": 1.343961989508665, "grad_norm": 0.6730872988700867, "learning_rate": 6.620703685896358e-06, "loss": 1.4317, "mean_token_accuracy": 0.6518972416718801, "num_tokens": 2051333834.0, "step": 12234 }, { "entropy": 1.7186238567034404, "epoch": 1.3440718464200379, "grad_norm": 0.6576967835426331, "learning_rate": 6.619304958947019e-06, "loss": 1.4835, "mean_token_accuracy": 0.6451779951651891, "num_tokens": 2051514481.0, "step": 12235 }, { "entropy": 1.7354322572549183, "epoch": 1.3441817033314107, "grad_norm": 0.609137237071991, "learning_rate": 6.617906370644704e-06, "loss": 1.3636, "mean_token_accuracy": 0.6611924121777216, "num_tokens": 2051685341.0, "step": 12236 }, { "entropy": 1.696060409148534, "epoch": 1.3442915602427838, "grad_norm": 0.6814700961112976, "learning_rate": 6.616507921033673e-06, "loss": 1.1382, "mean_token_accuracy": 0.6968227724234263, "num_tokens": 2051839522.0, "step": 12237 }, { "entropy": 1.7019359568754833, "epoch": 1.3444014171541567, "grad_norm": 0.7261495590209961, "learning_rate": 6.615109610158194e-06, "loss": 1.3654, "mean_token_accuracy": 0.6753996213277181, "num_tokens": 2051985272.0, "step": 12238 }, { "entropy": 1.727369596560796, "epoch": 1.3445112740655296, "grad_norm": 0.6944707632064819, "learning_rate": 6.6137114380625255e-06, "loss": 1.349, "mean_token_accuracy": 0.6564425776402155, "num_tokens": 2052119479.0, "step": 12239 }, { "entropy": 1.7554010450839996, "epoch": 1.3446211309769027, "grad_norm": 0.8773122429847717, "learning_rate": 6.612313404790907e-06, "loss": 1.3218, "mean_token_accuracy": 0.6642026404539744, "num_tokens": 2052246785.0, "step": 12240 }, { "entropy": 1.6962561508019764, "epoch": 1.3447309878882754, "grad_norm": 0.6583256721496582, "learning_rate": 6.61091551038759e-06, "loss": 1.3261, "mean_token_accuracy": 0.6596464316050211, "num_tokens": 2052401863.0, "step": 12241 }, { "entropy": 1.6963837146759033, "epoch": 1.3448408447996485, "grad_norm": 0.6508731842041016, "learning_rate": 6.609517754896824e-06, "loss": 1.3197, "mean_token_accuracy": 0.6548064053058624, "num_tokens": 2052537089.0, "step": 12242 }, { "entropy": 1.6928088863690693, "epoch": 1.3449507017110214, "grad_norm": 0.6507278084754944, "learning_rate": 6.608120138362844e-06, "loss": 1.4328, "mean_token_accuracy": 0.6521178285280863, "num_tokens": 2052697099.0, "step": 12243 }, { "entropy": 1.743686467409134, "epoch": 1.3450605586223943, "grad_norm": 0.6717689037322998, "learning_rate": 6.6067226608298765e-06, "loss": 1.4188, "mean_token_accuracy": 0.6479152143001556, "num_tokens": 2052886501.0, "step": 12244 }, { "entropy": 1.6980459491411846, "epoch": 1.3451704155337674, "grad_norm": 0.7341813445091248, "learning_rate": 6.605325322342162e-06, "loss": 1.3154, "mean_token_accuracy": 0.6584825615088145, "num_tokens": 2053021245.0, "step": 12245 }, { "entropy": 1.7826583683490753, "epoch": 1.3452802724451403, "grad_norm": 0.770753026008606, "learning_rate": 6.603928122943918e-06, "loss": 1.3637, "mean_token_accuracy": 0.6621987770001093, "num_tokens": 2053162265.0, "step": 12246 }, { "entropy": 1.7585231363773346, "epoch": 1.3453901293565131, "grad_norm": 0.7951369881629944, "learning_rate": 6.602531062679371e-06, "loss": 1.426, "mean_token_accuracy": 0.6497030705213547, "num_tokens": 2053305821.0, "step": 12247 }, { "entropy": 1.6882805128892262, "epoch": 1.345499986267886, "grad_norm": 0.7582414150238037, "learning_rate": 6.6011341415927345e-06, "loss": 1.4527, "mean_token_accuracy": 0.6527374486128489, "num_tokens": 2053475006.0, "step": 12248 }, { "entropy": 1.7408703466256459, "epoch": 1.345609843179259, "grad_norm": 0.621370255947113, "learning_rate": 6.599737359728216e-06, "loss": 1.4883, "mean_token_accuracy": 0.6453725149234136, "num_tokens": 2053652199.0, "step": 12249 }, { "entropy": 1.6780038674672444, "epoch": 1.345719700090632, "grad_norm": 0.7224386930465698, "learning_rate": 6.598340717130027e-06, "loss": 1.3707, "mean_token_accuracy": 0.6695211380720139, "num_tokens": 2053819186.0, "step": 12250 }, { "entropy": 1.7335948844750722, "epoch": 1.345829557002005, "grad_norm": 0.6624288558959961, "learning_rate": 6.59694421384238e-06, "loss": 1.4288, "mean_token_accuracy": 0.6465139786402384, "num_tokens": 2053967739.0, "step": 12251 }, { "entropy": 1.7141142686208088, "epoch": 1.3459394139133778, "grad_norm": 0.6660525798797607, "learning_rate": 6.595547849909456e-06, "loss": 1.55, "mean_token_accuracy": 0.6478944619496664, "num_tokens": 2054156120.0, "step": 12252 }, { "entropy": 1.756292422612508, "epoch": 1.346049270824751, "grad_norm": 0.781925618648529, "learning_rate": 6.594151625375458e-06, "loss": 1.2002, "mean_token_accuracy": 0.6815067678689957, "num_tokens": 2054258490.0, "step": 12253 }, { "entropy": 1.7494585911432903, "epoch": 1.3461591277361236, "grad_norm": 0.8068336844444275, "learning_rate": 6.5927555402845775e-06, "loss": 1.4026, "mean_token_accuracy": 0.6626935104529063, "num_tokens": 2054424651.0, "step": 12254 }, { "entropy": 1.715184877316157, "epoch": 1.3462689846474967, "grad_norm": 0.7686038613319397, "learning_rate": 6.591359594681001e-06, "loss": 1.2625, "mean_token_accuracy": 0.6788710653781891, "num_tokens": 2054553349.0, "step": 12255 }, { "entropy": 1.7815559605757396, "epoch": 1.3463788415588696, "grad_norm": 0.8140032291412354, "learning_rate": 6.5899637886089014e-06, "loss": 1.3976, "mean_token_accuracy": 0.6547159850597382, "num_tokens": 2054696873.0, "step": 12256 }, { "entropy": 1.671088566382726, "epoch": 1.3464886984702424, "grad_norm": 0.6825757026672363, "learning_rate": 6.588568122112464e-06, "loss": 1.5188, "mean_token_accuracy": 0.6364674071470896, "num_tokens": 2054865237.0, "step": 12257 }, { "entropy": 1.7114405731360118, "epoch": 1.3465985553816155, "grad_norm": 0.7471262216567993, "learning_rate": 6.587172595235856e-06, "loss": 1.426, "mean_token_accuracy": 0.6726455787817637, "num_tokens": 2054979860.0, "step": 12258 }, { "entropy": 1.708991785844167, "epoch": 1.3467084122929884, "grad_norm": 0.641041100025177, "learning_rate": 6.585777208023249e-06, "loss": 1.4844, "mean_token_accuracy": 0.65794704357783, "num_tokens": 2055155113.0, "step": 12259 }, { "entropy": 1.7066246469815571, "epoch": 1.3468182692043613, "grad_norm": 0.6625202298164368, "learning_rate": 6.584381960518805e-06, "loss": 1.3399, "mean_token_accuracy": 0.6542827486991882, "num_tokens": 2055292902.0, "step": 12260 }, { "entropy": 1.7277962068716686, "epoch": 1.3469281261157342, "grad_norm": 0.7618191838264465, "learning_rate": 6.58298685276668e-06, "loss": 1.5263, "mean_token_accuracy": 0.6597974797089895, "num_tokens": 2055475600.0, "step": 12261 }, { "entropy": 1.7162013947963715, "epoch": 1.347037983027107, "grad_norm": 0.6941340565681458, "learning_rate": 6.581591884811029e-06, "loss": 1.3457, "mean_token_accuracy": 0.653758093714714, "num_tokens": 2055605034.0, "step": 12262 }, { "entropy": 1.6504423717657726, "epoch": 1.3471478399384802, "grad_norm": 0.6448864340782166, "learning_rate": 6.580197056696009e-06, "loss": 1.3639, "mean_token_accuracy": 0.6632355799277624, "num_tokens": 2055763298.0, "step": 12263 }, { "entropy": 1.7163707713286083, "epoch": 1.347257696849853, "grad_norm": 0.740750253200531, "learning_rate": 6.578802368465758e-06, "loss": 1.318, "mean_token_accuracy": 0.6563950031995773, "num_tokens": 2055913303.0, "step": 12264 }, { "entropy": 1.6848769783973694, "epoch": 1.347367553761226, "grad_norm": 0.7327452301979065, "learning_rate": 6.577407820164417e-06, "loss": 1.2855, "mean_token_accuracy": 0.6678441762924194, "num_tokens": 2056037104.0, "step": 12265 }, { "entropy": 1.731307754913966, "epoch": 1.347477410672599, "grad_norm": 0.7852592468261719, "learning_rate": 6.576013411836128e-06, "loss": 1.3271, "mean_token_accuracy": 0.6711703638235728, "num_tokens": 2056238659.0, "step": 12266 }, { "entropy": 1.640326350927353, "epoch": 1.347587267583972, "grad_norm": 0.6604642271995544, "learning_rate": 6.5746191435250226e-06, "loss": 1.4065, "mean_token_accuracy": 0.6636442442735037, "num_tokens": 2056418652.0, "step": 12267 }, { "entropy": 1.6644433339436848, "epoch": 1.3476971244953448, "grad_norm": 0.7689099311828613, "learning_rate": 6.5732250152752245e-06, "loss": 1.252, "mean_token_accuracy": 0.6750207046667734, "num_tokens": 2056524847.0, "step": 12268 }, { "entropy": 1.6737519601980846, "epoch": 1.3478069814067177, "grad_norm": 0.665513813495636, "learning_rate": 6.5718310271308635e-06, "loss": 1.3628, "mean_token_accuracy": 0.6609103033939997, "num_tokens": 2056685770.0, "step": 12269 }, { "entropy": 1.705015589793523, "epoch": 1.3479168383180906, "grad_norm": 0.5894766449928284, "learning_rate": 6.57043717913605e-06, "loss": 1.5859, "mean_token_accuracy": 0.6079653153816859, "num_tokens": 2056913511.0, "step": 12270 }, { "entropy": 1.7268774608771007, "epoch": 1.3480266952294637, "grad_norm": 0.7870163321495056, "learning_rate": 6.569043471334908e-06, "loss": 1.6112, "mean_token_accuracy": 0.6265291919310888, "num_tokens": 2057089554.0, "step": 12271 }, { "entropy": 1.7062111397584279, "epoch": 1.3481365521408366, "grad_norm": 0.7420828342437744, "learning_rate": 6.567649903771543e-06, "loss": 1.4877, "mean_token_accuracy": 0.638950581351916, "num_tokens": 2057257100.0, "step": 12272 }, { "entropy": 1.6089070041974385, "epoch": 1.3482464090522095, "grad_norm": 0.6262128949165344, "learning_rate": 6.56625647649006e-06, "loss": 1.2779, "mean_token_accuracy": 0.6737044056256613, "num_tokens": 2057420018.0, "step": 12273 }, { "entropy": 1.706796109676361, "epoch": 1.3483562659635824, "grad_norm": 0.6636627912521362, "learning_rate": 6.564863189534562e-06, "loss": 1.2494, "mean_token_accuracy": 0.6729203015565872, "num_tokens": 2057522688.0, "step": 12274 }, { "entropy": 1.6989140808582306, "epoch": 1.3484661228749553, "grad_norm": 0.6211623549461365, "learning_rate": 6.563470042949147e-06, "loss": 1.3388, "mean_token_accuracy": 0.6737065613269806, "num_tokens": 2057711110.0, "step": 12275 }, { "entropy": 1.6851005852222443, "epoch": 1.3485759797863284, "grad_norm": 0.7660583257675171, "learning_rate": 6.562077036777902e-06, "loss": 1.4126, "mean_token_accuracy": 0.6566516806681951, "num_tokens": 2057882659.0, "step": 12276 }, { "entropy": 1.7562197347482045, "epoch": 1.3486858366977013, "grad_norm": 0.6725447177886963, "learning_rate": 6.560684171064924e-06, "loss": 1.354, "mean_token_accuracy": 0.6481892019510269, "num_tokens": 2058055824.0, "step": 12277 }, { "entropy": 1.7564709782600403, "epoch": 1.3487956936090741, "grad_norm": 0.7089916467666626, "learning_rate": 6.5592914458542855e-06, "loss": 1.5148, "mean_token_accuracy": 0.6371178328990936, "num_tokens": 2058221303.0, "step": 12278 }, { "entropy": 1.7744645377000172, "epoch": 1.3489055505204472, "grad_norm": 0.785156786441803, "learning_rate": 6.557898861190077e-06, "loss": 1.4629, "mean_token_accuracy": 0.6464930176734924, "num_tokens": 2058427514.0, "step": 12279 }, { "entropy": 1.7491690417130787, "epoch": 1.3490154074318201, "grad_norm": 0.7399646043777466, "learning_rate": 6.556506417116368e-06, "loss": 1.3963, "mean_token_accuracy": 0.6607019901275635, "num_tokens": 2058610955.0, "step": 12280 }, { "entropy": 1.7057534257570903, "epoch": 1.349125264343193, "grad_norm": 0.8085160255432129, "learning_rate": 6.555114113677226e-06, "loss": 1.4088, "mean_token_accuracy": 0.6513328750928243, "num_tokens": 2058770075.0, "step": 12281 }, { "entropy": 1.7024572590986888, "epoch": 1.349235121254566, "grad_norm": 0.6303825378417969, "learning_rate": 6.553721950916717e-06, "loss": 1.334, "mean_token_accuracy": 0.6583151618639628, "num_tokens": 2058936370.0, "step": 12282 }, { "entropy": 1.7161445319652557, "epoch": 1.3493449781659388, "grad_norm": 0.7127066254615784, "learning_rate": 6.552329928878914e-06, "loss": 1.4262, "mean_token_accuracy": 0.6654284497102102, "num_tokens": 2059051247.0, "step": 12283 }, { "entropy": 1.7131555875142415, "epoch": 1.349454835077312, "grad_norm": 0.6429823637008667, "learning_rate": 6.550938047607855e-06, "loss": 1.3437, "mean_token_accuracy": 0.6701266666253408, "num_tokens": 2059187258.0, "step": 12284 }, { "entropy": 1.6841067373752594, "epoch": 1.3495646919886848, "grad_norm": 0.623063862323761, "learning_rate": 6.549546307147604e-06, "loss": 1.4499, "mean_token_accuracy": 0.6565073132514954, "num_tokens": 2059370768.0, "step": 12285 }, { "entropy": 1.7391627728939056, "epoch": 1.3496745489000577, "grad_norm": 0.5836269855499268, "learning_rate": 6.548154707542209e-06, "loss": 1.5104, "mean_token_accuracy": 0.6366867274045944, "num_tokens": 2059615803.0, "step": 12286 }, { "entropy": 1.725864330927531, "epoch": 1.3497844058114306, "grad_norm": 0.6897220015525818, "learning_rate": 6.546763248835713e-06, "loss": 1.3016, "mean_token_accuracy": 0.659923846522967, "num_tokens": 2059736379.0, "step": 12287 }, { "entropy": 1.6832166115442913, "epoch": 1.3498942627228034, "grad_norm": 0.7074958086013794, "learning_rate": 6.5453719310721485e-06, "loss": 1.2413, "mean_token_accuracy": 0.6812171091636022, "num_tokens": 2059877019.0, "step": 12288 }, { "entropy": 1.7349075376987457, "epoch": 1.3500041196341765, "grad_norm": 0.7303879857063293, "learning_rate": 6.543980754295559e-06, "loss": 1.2653, "mean_token_accuracy": 0.6705899288256963, "num_tokens": 2060007355.0, "step": 12289 }, { "entropy": 1.6935294369856517, "epoch": 1.3501139765455494, "grad_norm": 0.9410924911499023, "learning_rate": 6.542589718549968e-06, "loss": 1.5074, "mean_token_accuracy": 0.6501014828681946, "num_tokens": 2060194841.0, "step": 12290 }, { "entropy": 1.649632195631663, "epoch": 1.3502238334569223, "grad_norm": 0.5783915519714355, "learning_rate": 6.541198823879406e-06, "loss": 1.2677, "mean_token_accuracy": 0.6630587677160898, "num_tokens": 2060380983.0, "step": 12291 }, { "entropy": 1.7274872958660126, "epoch": 1.3503336903682954, "grad_norm": 0.67799311876297, "learning_rate": 6.5398080703278935e-06, "loss": 1.3454, "mean_token_accuracy": 0.6603363305330276, "num_tokens": 2060548780.0, "step": 12292 }, { "entropy": 1.7195112307866414, "epoch": 1.3504435472796683, "grad_norm": 0.7613741755485535, "learning_rate": 6.5384174579394435e-06, "loss": 1.2624, "mean_token_accuracy": 0.6771682302157084, "num_tokens": 2060657304.0, "step": 12293 }, { "entropy": 1.7164626916249592, "epoch": 1.3505534041910412, "grad_norm": 0.8549966216087341, "learning_rate": 6.537026986758068e-06, "loss": 1.3898, "mean_token_accuracy": 0.650303453207016, "num_tokens": 2060815540.0, "step": 12294 }, { "entropy": 1.7116701900959015, "epoch": 1.350663261102414, "grad_norm": 0.6703973412513733, "learning_rate": 6.5356366568277855e-06, "loss": 1.4342, "mean_token_accuracy": 0.6575676451126734, "num_tokens": 2060986234.0, "step": 12295 }, { "entropy": 1.6897992591063182, "epoch": 1.350773118013787, "grad_norm": 0.6119180917739868, "learning_rate": 6.534246468192582e-06, "loss": 1.3418, "mean_token_accuracy": 0.6577243904272715, "num_tokens": 2061159321.0, "step": 12296 }, { "entropy": 1.6780081788698833, "epoch": 1.35088297492516, "grad_norm": 0.6298530697822571, "learning_rate": 6.532856420896469e-06, "loss": 1.3248, "mean_token_accuracy": 0.6540986796220144, "num_tokens": 2061293448.0, "step": 12297 }, { "entropy": 1.701556493838628, "epoch": 1.350992831836533, "grad_norm": 0.6996129155158997, "learning_rate": 6.531466514983438e-06, "loss": 1.4905, "mean_token_accuracy": 0.6383817990620931, "num_tokens": 2061518965.0, "step": 12298 }, { "entropy": 1.665728211402893, "epoch": 1.3511026887479058, "grad_norm": 0.8420900106430054, "learning_rate": 6.530076750497479e-06, "loss": 1.4345, "mean_token_accuracy": 0.6632737889885902, "num_tokens": 2061659464.0, "step": 12299 }, { "entropy": 1.680874894062678, "epoch": 1.3512125456592787, "grad_norm": 0.669554591178894, "learning_rate": 6.5286871274825736e-06, "loss": 1.5223, "mean_token_accuracy": 0.642878438035647, "num_tokens": 2061862373.0, "step": 12300 }, { "entropy": 1.7252130707105, "epoch": 1.3513224025706516, "grad_norm": 0.6238853335380554, "learning_rate": 6.527297645982709e-06, "loss": 1.4893, "mean_token_accuracy": 0.6513131509224573, "num_tokens": 2062013384.0, "step": 12301 }, { "entropy": 1.6984285215536754, "epoch": 1.3514322594820247, "grad_norm": 0.7303968667984009, "learning_rate": 6.525908306041855e-06, "loss": 1.2639, "mean_token_accuracy": 0.6747083564599355, "num_tokens": 2062142981.0, "step": 12302 }, { "entropy": 1.685420423746109, "epoch": 1.3515421163933976, "grad_norm": 0.7917304039001465, "learning_rate": 6.52451910770399e-06, "loss": 1.3632, "mean_token_accuracy": 0.6628880898157755, "num_tokens": 2062320185.0, "step": 12303 }, { "entropy": 1.7349448402722676, "epoch": 1.3516519733047705, "grad_norm": 0.732818067073822, "learning_rate": 6.52313005101308e-06, "loss": 1.3378, "mean_token_accuracy": 0.661628877123197, "num_tokens": 2062477090.0, "step": 12304 }, { "entropy": 1.7262056469917297, "epoch": 1.3517618302161436, "grad_norm": 0.8141718506813049, "learning_rate": 6.5217411360130815e-06, "loss": 1.5927, "mean_token_accuracy": 0.642752543091774, "num_tokens": 2062658383.0, "step": 12305 }, { "entropy": 1.697360982497533, "epoch": 1.3518716871275165, "grad_norm": 0.685117244720459, "learning_rate": 6.520352362747959e-06, "loss": 1.2822, "mean_token_accuracy": 0.6733687619368235, "num_tokens": 2062789276.0, "step": 12306 }, { "entropy": 1.6366734206676483, "epoch": 1.3519815440388894, "grad_norm": 0.7046248316764832, "learning_rate": 6.518963731261673e-06, "loss": 1.3198, "mean_token_accuracy": 0.6753099660078684, "num_tokens": 2062938946.0, "step": 12307 }, { "entropy": 1.662851224342982, "epoch": 1.3520914009502623, "grad_norm": 0.6730368137359619, "learning_rate": 6.517575241598157e-06, "loss": 1.3058, "mean_token_accuracy": 0.6809868812561035, "num_tokens": 2063102608.0, "step": 12308 }, { "entropy": 1.6642492314179738, "epoch": 1.3522012578616351, "grad_norm": 0.6425415873527527, "learning_rate": 6.516186893801366e-06, "loss": 1.4789, "mean_token_accuracy": 0.6525690505901972, "num_tokens": 2063301963.0, "step": 12309 }, { "entropy": 1.6880824367205303, "epoch": 1.3523111147730082, "grad_norm": 0.6970797181129456, "learning_rate": 6.514798687915243e-06, "loss": 1.3369, "mean_token_accuracy": 0.6655599971612295, "num_tokens": 2063467828.0, "step": 12310 }, { "entropy": 1.7415729264418285, "epoch": 1.3524209716843811, "grad_norm": 0.7570353746414185, "learning_rate": 6.513410623983719e-06, "loss": 1.4903, "mean_token_accuracy": 0.652079368631045, "num_tokens": 2063628984.0, "step": 12311 }, { "entropy": 1.7562313973903656, "epoch": 1.352530828595754, "grad_norm": 0.834516167640686, "learning_rate": 6.512022702050726e-06, "loss": 1.2405, "mean_token_accuracy": 0.6728113840023676, "num_tokens": 2063738934.0, "step": 12312 }, { "entropy": 1.7031614283720653, "epoch": 1.352640685507127, "grad_norm": 0.6265947222709656, "learning_rate": 6.510634922160194e-06, "loss": 1.3499, "mean_token_accuracy": 0.6655804167191187, "num_tokens": 2063945293.0, "step": 12313 }, { "entropy": 1.750719130039215, "epoch": 1.3527505424184998, "grad_norm": 0.7226284742355347, "learning_rate": 6.5092472843560404e-06, "loss": 1.4446, "mean_token_accuracy": 0.6544534166653951, "num_tokens": 2064107640.0, "step": 12314 }, { "entropy": 1.6810857057571411, "epoch": 1.352860399329873, "grad_norm": 0.7839952111244202, "learning_rate": 6.507859788682191e-06, "loss": 1.3484, "mean_token_accuracy": 0.6710902700821558, "num_tokens": 2064260066.0, "step": 12315 }, { "entropy": 1.7289798359076183, "epoch": 1.3529702562412458, "grad_norm": 0.7496806979179382, "learning_rate": 6.506472435182555e-06, "loss": 1.417, "mean_token_accuracy": 0.656493753194809, "num_tokens": 2064413871.0, "step": 12316 }, { "entropy": 1.724411557118098, "epoch": 1.3530801131526187, "grad_norm": 0.7686552405357361, "learning_rate": 6.505085223901037e-06, "loss": 1.4302, "mean_token_accuracy": 0.6589695413907369, "num_tokens": 2064584214.0, "step": 12317 }, { "entropy": 1.7191261947154999, "epoch": 1.3531899700639918, "grad_norm": 0.65711510181427, "learning_rate": 6.503698154881547e-06, "loss": 1.5165, "mean_token_accuracy": 0.6463207254807154, "num_tokens": 2064812066.0, "step": 12318 }, { "entropy": 1.7270474930604298, "epoch": 1.3532998269753647, "grad_norm": 0.7155986428260803, "learning_rate": 6.50231122816799e-06, "loss": 1.4462, "mean_token_accuracy": 0.6558532069126765, "num_tokens": 2064987126.0, "step": 12319 }, { "entropy": 1.6704954504966736, "epoch": 1.3534096838867375, "grad_norm": 0.7355462312698364, "learning_rate": 6.500924443804251e-06, "loss": 1.4153, "mean_token_accuracy": 0.6554108460744222, "num_tokens": 2065163540.0, "step": 12320 }, { "entropy": 1.7073476314544678, "epoch": 1.3535195407981104, "grad_norm": 0.6781120896339417, "learning_rate": 6.499537801834224e-06, "loss": 1.4084, "mean_token_accuracy": 0.645026778181394, "num_tokens": 2065344154.0, "step": 12321 }, { "entropy": 1.6700172821680705, "epoch": 1.3536293977094833, "grad_norm": 0.6056855320930481, "learning_rate": 6.4981513023018026e-06, "loss": 1.4415, "mean_token_accuracy": 0.6474265257517496, "num_tokens": 2065592601.0, "step": 12322 }, { "entropy": 1.7153640190760295, "epoch": 1.3537392546208564, "grad_norm": 0.6348044276237488, "learning_rate": 6.4967649452508645e-06, "loss": 1.3697, "mean_token_accuracy": 0.6543361097574234, "num_tokens": 2065767977.0, "step": 12323 }, { "entropy": 1.714322954416275, "epoch": 1.3538491115322293, "grad_norm": 0.6365450024604797, "learning_rate": 6.4953787307252815e-06, "loss": 1.442, "mean_token_accuracy": 0.635169451435407, "num_tokens": 2065967643.0, "step": 12324 }, { "entropy": 1.7047211130460103, "epoch": 1.3539589684436022, "grad_norm": 0.8873185515403748, "learning_rate": 6.493992658768935e-06, "loss": 1.418, "mean_token_accuracy": 0.6530811885992686, "num_tokens": 2066111579.0, "step": 12325 }, { "entropy": 1.7306146423021953, "epoch": 1.354068825354975, "grad_norm": 0.7051663398742676, "learning_rate": 6.492606729425688e-06, "loss": 1.4218, "mean_token_accuracy": 0.6628289421399435, "num_tokens": 2066301945.0, "step": 12326 }, { "entropy": 1.7398844460646312, "epoch": 1.354178682266348, "grad_norm": 0.6335932612419128, "learning_rate": 6.491220942739411e-06, "loss": 1.276, "mean_token_accuracy": 0.6720243046681086, "num_tokens": 2066426077.0, "step": 12327 }, { "entropy": 1.707278350989024, "epoch": 1.354288539177721, "grad_norm": 0.6095598936080933, "learning_rate": 6.489835298753959e-06, "loss": 1.4107, "mean_token_accuracy": 0.6501167962948481, "num_tokens": 2066577706.0, "step": 12328 }, { "entropy": 1.7580445806185405, "epoch": 1.354398396089094, "grad_norm": 0.6501827836036682, "learning_rate": 6.488449797513183e-06, "loss": 1.4603, "mean_token_accuracy": 0.6521537154912949, "num_tokens": 2066736674.0, "step": 12329 }, { "entropy": 1.7793205082416534, "epoch": 1.3545082530004668, "grad_norm": 0.8058743476867676, "learning_rate": 6.487064439060939e-06, "loss": 1.4942, "mean_token_accuracy": 0.649241695801417, "num_tokens": 2066929361.0, "step": 12330 }, { "entropy": 1.6442664166291554, "epoch": 1.35461810991184, "grad_norm": 0.7431530356407166, "learning_rate": 6.485679223441079e-06, "loss": 1.2053, "mean_token_accuracy": 0.6872799694538116, "num_tokens": 2067059964.0, "step": 12331 }, { "entropy": 1.6199305057525635, "epoch": 1.3547279668232128, "grad_norm": 0.6863206624984741, "learning_rate": 6.48429415069743e-06, "loss": 1.3179, "mean_token_accuracy": 0.6764807005723318, "num_tokens": 2067198007.0, "step": 12332 }, { "entropy": 1.6564082105954487, "epoch": 1.3548378237345857, "grad_norm": 0.6196966767311096, "learning_rate": 6.482909220873838e-06, "loss": 1.447, "mean_token_accuracy": 0.6547676275173823, "num_tokens": 2067377864.0, "step": 12333 }, { "entropy": 1.7091114819049835, "epoch": 1.3549476806459586, "grad_norm": 0.8706151843070984, "learning_rate": 6.481524434014134e-06, "loss": 1.4952, "mean_token_accuracy": 0.6537403712670008, "num_tokens": 2067518216.0, "step": 12334 }, { "entropy": 1.685113827387492, "epoch": 1.3550575375573315, "grad_norm": 0.7392619848251343, "learning_rate": 6.480139790162146e-06, "loss": 1.4484, "mean_token_accuracy": 0.652221699555715, "num_tokens": 2067650159.0, "step": 12335 }, { "entropy": 1.7211529811223347, "epoch": 1.3551673944687046, "grad_norm": 0.7249420285224915, "learning_rate": 6.478755289361698e-06, "loss": 1.4059, "mean_token_accuracy": 0.642576590180397, "num_tokens": 2067855074.0, "step": 12336 }, { "entropy": 1.7155493001143138, "epoch": 1.3552772513800775, "grad_norm": 1.159257411956787, "learning_rate": 6.4773709316566036e-06, "loss": 1.237, "mean_token_accuracy": 0.6643270750840505, "num_tokens": 2068013117.0, "step": 12337 }, { "entropy": 1.693147212266922, "epoch": 1.3553871082914504, "grad_norm": 0.692810595035553, "learning_rate": 6.475986717090683e-06, "loss": 1.3471, "mean_token_accuracy": 0.6661613335212072, "num_tokens": 2068174867.0, "step": 12338 }, { "entropy": 1.66440216700236, "epoch": 1.3554969652028233, "grad_norm": 0.6912005543708801, "learning_rate": 6.474602645707746e-06, "loss": 1.3643, "mean_token_accuracy": 0.660924697915713, "num_tokens": 2068363011.0, "step": 12339 }, { "entropy": 1.7170608242352803, "epoch": 1.3556068221141961, "grad_norm": 0.6402304172515869, "learning_rate": 6.473218717551597e-06, "loss": 1.5135, "mean_token_accuracy": 0.6523879170417786, "num_tokens": 2068560209.0, "step": 12340 }, { "entropy": 1.7328025897343953, "epoch": 1.3557166790255692, "grad_norm": 0.6300782561302185, "learning_rate": 6.471834932666033e-06, "loss": 1.3726, "mean_token_accuracy": 0.6494940121968588, "num_tokens": 2068759721.0, "step": 12341 }, { "entropy": 1.7307655314604442, "epoch": 1.3558265359369421, "grad_norm": 0.716905951499939, "learning_rate": 6.470451291094855e-06, "loss": 1.3454, "mean_token_accuracy": 0.6711088915665945, "num_tokens": 2068930911.0, "step": 12342 }, { "entropy": 1.7667767107486725, "epoch": 1.355936392848315, "grad_norm": 0.6788172721862793, "learning_rate": 6.469067792881853e-06, "loss": 1.4581, "mean_token_accuracy": 0.6369956433773041, "num_tokens": 2069122750.0, "step": 12343 }, { "entropy": 1.7475673854351044, "epoch": 1.3560462497596881, "grad_norm": 0.744155764579773, "learning_rate": 6.467684438070809e-06, "loss": 1.3626, "mean_token_accuracy": 0.6463822772105535, "num_tokens": 2069264289.0, "step": 12344 }, { "entropy": 1.7614603241284688, "epoch": 1.356156106671061, "grad_norm": 0.6432300209999084, "learning_rate": 6.466301226705516e-06, "loss": 1.4698, "mean_token_accuracy": 0.6485941559076309, "num_tokens": 2069414632.0, "step": 12345 }, { "entropy": 1.6905387043952942, "epoch": 1.356265963582434, "grad_norm": 0.6607070565223694, "learning_rate": 6.464918158829741e-06, "loss": 1.4342, "mean_token_accuracy": 0.6547519415616989, "num_tokens": 2069600554.0, "step": 12346 }, { "entropy": 1.660075883070628, "epoch": 1.3563758204938068, "grad_norm": 0.6849450469017029, "learning_rate": 6.463535234487267e-06, "loss": 1.3493, "mean_token_accuracy": 0.6670071085294088, "num_tokens": 2069751438.0, "step": 12347 }, { "entropy": 1.7046632369359334, "epoch": 1.3564856774051797, "grad_norm": 0.6302372217178345, "learning_rate": 6.462152453721859e-06, "loss": 1.3905, "mean_token_accuracy": 0.6431731383005778, "num_tokens": 2069882559.0, "step": 12348 }, { "entropy": 1.7553166151046753, "epoch": 1.3565955343165528, "grad_norm": 0.7600220441818237, "learning_rate": 6.460769816577277e-06, "loss": 1.3602, "mean_token_accuracy": 0.6480604211489359, "num_tokens": 2070014200.0, "step": 12349 }, { "entropy": 1.6376391152540843, "epoch": 1.3567053912279257, "grad_norm": 0.7972891926765442, "learning_rate": 6.4593873230972845e-06, "loss": 1.4161, "mean_token_accuracy": 0.6628619134426117, "num_tokens": 2070220462.0, "step": 12350 }, { "entropy": 1.6964278320471446, "epoch": 1.3568152481392985, "grad_norm": 0.6905115842819214, "learning_rate": 6.458004973325643e-06, "loss": 1.2696, "mean_token_accuracy": 0.6703715721766154, "num_tokens": 2070379579.0, "step": 12351 }, { "entropy": 1.695211390654246, "epoch": 1.3569251050506714, "grad_norm": 0.6479082107543945, "learning_rate": 6.456622767306093e-06, "loss": 1.3873, "mean_token_accuracy": 0.6564722607533137, "num_tokens": 2070508597.0, "step": 12352 }, { "entropy": 1.6002402206261952, "epoch": 1.3570349619620443, "grad_norm": 0.6399569511413574, "learning_rate": 6.455240705082386e-06, "loss": 1.1791, "mean_token_accuracy": 0.6922868291536967, "num_tokens": 2070662900.0, "step": 12353 }, { "entropy": 1.667330761750539, "epoch": 1.3571448188734174, "grad_norm": 0.6267159581184387, "learning_rate": 6.453858786698264e-06, "loss": 1.4196, "mean_token_accuracy": 0.6513769179582596, "num_tokens": 2070864178.0, "step": 12354 }, { "entropy": 1.697418709595998, "epoch": 1.3572546757847903, "grad_norm": 0.615899920463562, "learning_rate": 6.4524770121974625e-06, "loss": 1.4321, "mean_token_accuracy": 0.6468136459589005, "num_tokens": 2071051168.0, "step": 12355 }, { "entropy": 1.7128571271896362, "epoch": 1.3573645326961632, "grad_norm": 0.6929111480712891, "learning_rate": 6.451095381623711e-06, "loss": 1.3526, "mean_token_accuracy": 0.6634085973103842, "num_tokens": 2071226809.0, "step": 12356 }, { "entropy": 1.7381273806095123, "epoch": 1.3574743896075363, "grad_norm": 0.6834197640419006, "learning_rate": 6.449713895020746e-06, "loss": 1.4679, "mean_token_accuracy": 0.6420772125323614, "num_tokens": 2071396186.0, "step": 12357 }, { "entropy": 1.722596416870753, "epoch": 1.3575842465189092, "grad_norm": 0.6959985494613647, "learning_rate": 6.448332552432282e-06, "loss": 1.4207, "mean_token_accuracy": 0.6518757989009222, "num_tokens": 2071595004.0, "step": 12358 }, { "entropy": 1.6781906882921855, "epoch": 1.357694103430282, "grad_norm": 0.63628751039505, "learning_rate": 6.446951353902045e-06, "loss": 1.4942, "mean_token_accuracy": 0.663748636841774, "num_tokens": 2071740366.0, "step": 12359 }, { "entropy": 1.6715950568517048, "epoch": 1.357803960341655, "grad_norm": 0.6627793312072754, "learning_rate": 6.445570299473744e-06, "loss": 1.4144, "mean_token_accuracy": 0.660917063554128, "num_tokens": 2071913108.0, "step": 12360 }, { "entropy": 1.803322861591975, "epoch": 1.3579138172530278, "grad_norm": 0.7100440859794617, "learning_rate": 6.4441893891910885e-06, "loss": 1.6403, "mean_token_accuracy": 0.6258356620868047, "num_tokens": 2072140175.0, "step": 12361 }, { "entropy": 1.7187660336494446, "epoch": 1.358023674164401, "grad_norm": 0.648104727268219, "learning_rate": 6.442808623097787e-06, "loss": 1.3935, "mean_token_accuracy": 0.6531741370757421, "num_tokens": 2072329640.0, "step": 12362 }, { "entropy": 1.6625087360541027, "epoch": 1.3581335310757738, "grad_norm": 0.6913976669311523, "learning_rate": 6.441428001237546e-06, "loss": 1.3703, "mean_token_accuracy": 0.6623245229323705, "num_tokens": 2072470021.0, "step": 12363 }, { "entropy": 1.6901950438817341, "epoch": 1.3582433879871467, "grad_norm": 0.6735898852348328, "learning_rate": 6.440047523654047e-06, "loss": 1.5483, "mean_token_accuracy": 0.6314966926972071, "num_tokens": 2072671483.0, "step": 12364 }, { "entropy": 1.7260485688845317, "epoch": 1.3583532448985196, "grad_norm": 0.7406774163246155, "learning_rate": 6.438667190390989e-06, "loss": 1.2643, "mean_token_accuracy": 0.676031157374382, "num_tokens": 2072824287.0, "step": 12365 }, { "entropy": 1.6700426439444225, "epoch": 1.3584631018098925, "grad_norm": 0.6413915157318115, "learning_rate": 6.437287001492063e-06, "loss": 1.3453, "mean_token_accuracy": 0.6662431508302689, "num_tokens": 2072998417.0, "step": 12366 }, { "entropy": 1.717922439177831, "epoch": 1.3585729587212656, "grad_norm": 0.7316557765007019, "learning_rate": 6.4359069570009455e-06, "loss": 1.5969, "mean_token_accuracy": 0.620139608780543, "num_tokens": 2073209496.0, "step": 12367 }, { "entropy": 1.6824163496494293, "epoch": 1.3586828156326385, "grad_norm": 0.667999804019928, "learning_rate": 6.434527056961315e-06, "loss": 1.3104, "mean_token_accuracy": 0.6806386361519495, "num_tokens": 2073399215.0, "step": 12368 }, { "entropy": 1.6601607302824657, "epoch": 1.3587926725440114, "grad_norm": 0.5929083228111267, "learning_rate": 6.4331473014168485e-06, "loss": 1.2936, "mean_token_accuracy": 0.6646314362684885, "num_tokens": 2073573586.0, "step": 12369 }, { "entropy": 1.8227445185184479, "epoch": 1.3589025294553845, "grad_norm": 0.6329379677772522, "learning_rate": 6.431767690411208e-06, "loss": 1.4407, "mean_token_accuracy": 0.6459532777468363, "num_tokens": 2073708654.0, "step": 12370 }, { "entropy": 1.6659250060717266, "epoch": 1.3590123863667574, "grad_norm": 0.6623415946960449, "learning_rate": 6.430388223988067e-06, "loss": 1.6464, "mean_token_accuracy": 0.6338375359773636, "num_tokens": 2073930206.0, "step": 12371 }, { "entropy": 1.730675846338272, "epoch": 1.3591222432781302, "grad_norm": 0.6786333918571472, "learning_rate": 6.429008902191077e-06, "loss": 1.3172, "mean_token_accuracy": 0.676612580815951, "num_tokens": 2074090430.0, "step": 12372 }, { "entropy": 1.723184158404668, "epoch": 1.3592321001895031, "grad_norm": 0.9140381217002869, "learning_rate": 6.4276297250638945e-06, "loss": 1.3079, "mean_token_accuracy": 0.6676105012496313, "num_tokens": 2074265822.0, "step": 12373 }, { "entropy": 1.6886428495248158, "epoch": 1.359341957100876, "grad_norm": 0.696064293384552, "learning_rate": 6.426250692650169e-06, "loss": 1.2555, "mean_token_accuracy": 0.6780825853347778, "num_tokens": 2074394539.0, "step": 12374 }, { "entropy": 1.7155614097913106, "epoch": 1.3594518140122491, "grad_norm": 0.751995861530304, "learning_rate": 6.424871804993555e-06, "loss": 1.4351, "mean_token_accuracy": 0.6506524682044983, "num_tokens": 2074589651.0, "step": 12375 }, { "entropy": 1.7856918176015217, "epoch": 1.359561670923622, "grad_norm": 0.8634830117225647, "learning_rate": 6.423493062137683e-06, "loss": 1.4097, "mean_token_accuracy": 0.6395214746395746, "num_tokens": 2074752173.0, "step": 12376 }, { "entropy": 1.6887954970200856, "epoch": 1.359671527834995, "grad_norm": 0.686133623123169, "learning_rate": 6.42211446412619e-06, "loss": 1.3559, "mean_token_accuracy": 0.665522962808609, "num_tokens": 2074914932.0, "step": 12377 }, { "entropy": 1.6841253538926442, "epoch": 1.3597813847463678, "grad_norm": 0.6983356475830078, "learning_rate": 6.420736011002715e-06, "loss": 1.3099, "mean_token_accuracy": 0.674403061469396, "num_tokens": 2075065681.0, "step": 12378 }, { "entropy": 1.7410068213939667, "epoch": 1.3598912416577407, "grad_norm": 0.798302173614502, "learning_rate": 6.419357702810882e-06, "loss": 1.4741, "mean_token_accuracy": 0.6387134939432144, "num_tokens": 2075284738.0, "step": 12379 }, { "entropy": 1.711126794417699, "epoch": 1.3600010985691138, "grad_norm": 0.6752367615699768, "learning_rate": 6.417979539594311e-06, "loss": 1.4566, "mean_token_accuracy": 0.6450911511977514, "num_tokens": 2075459304.0, "step": 12380 }, { "entropy": 1.6090798874696095, "epoch": 1.3601109554804867, "grad_norm": 0.7679362297058105, "learning_rate": 6.416601521396626e-06, "loss": 1.453, "mean_token_accuracy": 0.6455042411883672, "num_tokens": 2075657038.0, "step": 12381 }, { "entropy": 1.7894425988197327, "epoch": 1.3602208123918595, "grad_norm": 0.685226559638977, "learning_rate": 6.4152236482614336e-06, "loss": 1.6363, "mean_token_accuracy": 0.6196721792221069, "num_tokens": 2075889197.0, "step": 12382 }, { "entropy": 1.6688521007696788, "epoch": 1.3603306693032327, "grad_norm": 0.606177031993866, "learning_rate": 6.413845920232351e-06, "loss": 1.5137, "mean_token_accuracy": 0.641996776064237, "num_tokens": 2076111995.0, "step": 12383 }, { "entropy": 1.7345014313856761, "epoch": 1.3604405262146055, "grad_norm": 0.7717848420143127, "learning_rate": 6.41246833735298e-06, "loss": 1.4069, "mean_token_accuracy": 0.6651813685894012, "num_tokens": 2076293865.0, "step": 12384 }, { "entropy": 1.6873282094796498, "epoch": 1.3605503831259784, "grad_norm": 0.6491485238075256, "learning_rate": 6.411090899666912e-06, "loss": 1.4593, "mean_token_accuracy": 0.6515095929304758, "num_tokens": 2076503891.0, "step": 12385 }, { "entropy": 1.6362064977486928, "epoch": 1.3606602400373513, "grad_norm": 0.669855535030365, "learning_rate": 6.4097136072177516e-06, "loss": 1.4198, "mean_token_accuracy": 0.6423494170109431, "num_tokens": 2076635813.0, "step": 12386 }, { "entropy": 1.7514414191246033, "epoch": 1.3607700969487242, "grad_norm": 0.683935284614563, "learning_rate": 6.408336460049091e-06, "loss": 1.3794, "mean_token_accuracy": 0.6483311802148819, "num_tokens": 2076766414.0, "step": 12387 }, { "entropy": 1.7777518530686696, "epoch": 1.3608799538600973, "grad_norm": 0.7260268926620483, "learning_rate": 6.406959458204509e-06, "loss": 1.4164, "mean_token_accuracy": 0.6536405185858408, "num_tokens": 2076906861.0, "step": 12388 }, { "entropy": 1.7372454206148784, "epoch": 1.3609898107714702, "grad_norm": 0.6218958497047424, "learning_rate": 6.4055826017275895e-06, "loss": 1.4287, "mean_token_accuracy": 0.6501857141653696, "num_tokens": 2077064213.0, "step": 12389 }, { "entropy": 1.6530345578988392, "epoch": 1.361099667682843, "grad_norm": 0.6436760425567627, "learning_rate": 6.404205890661914e-06, "loss": 1.4417, "mean_token_accuracy": 0.6399639348189036, "num_tokens": 2077282573.0, "step": 12390 }, { "entropy": 1.7672642767429352, "epoch": 1.361209524594216, "grad_norm": 0.6963381767272949, "learning_rate": 6.40282932505105e-06, "loss": 1.388, "mean_token_accuracy": 0.6588474710782369, "num_tokens": 2077494073.0, "step": 12391 }, { "entropy": 1.6798830231030781, "epoch": 1.3613193815055888, "grad_norm": 0.7534324526786804, "learning_rate": 6.4014529049385674e-06, "loss": 1.2571, "mean_token_accuracy": 0.6731600165367126, "num_tokens": 2077645291.0, "step": 12392 }, { "entropy": 1.6944253742694855, "epoch": 1.361429238416962, "grad_norm": 0.7895978093147278, "learning_rate": 6.400076630368024e-06, "loss": 1.3938, "mean_token_accuracy": 0.6565722674131393, "num_tokens": 2077779765.0, "step": 12393 }, { "entropy": 1.732394814491272, "epoch": 1.3615390953283348, "grad_norm": 0.7902349233627319, "learning_rate": 6.398700501382983e-06, "loss": 1.5484, "mean_token_accuracy": 0.6475964114069939, "num_tokens": 2077979044.0, "step": 12394 }, { "entropy": 1.6490447123845418, "epoch": 1.3616489522397077, "grad_norm": 0.6817672252655029, "learning_rate": 6.397324518027002e-06, "loss": 1.3966, "mean_token_accuracy": 0.6636393964290619, "num_tokens": 2078130750.0, "step": 12395 }, { "entropy": 1.7045618295669556, "epoch": 1.3617588091510808, "grad_norm": 0.6113632917404175, "learning_rate": 6.395948680343625e-06, "loss": 1.5267, "mean_token_accuracy": 0.6461210399866104, "num_tokens": 2078334424.0, "step": 12396 }, { "entropy": 1.677074631055196, "epoch": 1.3618686660624537, "grad_norm": 0.6405321955680847, "learning_rate": 6.394572988376393e-06, "loss": 1.3298, "mean_token_accuracy": 0.6814304739236832, "num_tokens": 2078483301.0, "step": 12397 }, { "entropy": 1.6795497337977092, "epoch": 1.3619785229738266, "grad_norm": 0.6742625832557678, "learning_rate": 6.393197442168856e-06, "loss": 1.4616, "mean_token_accuracy": 0.6637395819028219, "num_tokens": 2078651932.0, "step": 12398 }, { "entropy": 1.6983890235424042, "epoch": 1.3620883798851995, "grad_norm": 0.8591446280479431, "learning_rate": 6.391822041764542e-06, "loss": 1.3586, "mean_token_accuracy": 0.6608823935190836, "num_tokens": 2078783927.0, "step": 12399 }, { "entropy": 1.7535987198352814, "epoch": 1.3621982367965724, "grad_norm": 0.6175576448440552, "learning_rate": 6.390446787206983e-06, "loss": 1.5121, "mean_token_accuracy": 0.6477769613265991, "num_tokens": 2078957176.0, "step": 12400 }, { "entropy": 1.7856710652510326, "epoch": 1.3623080937079455, "grad_norm": 0.6982350945472717, "learning_rate": 6.389071678539708e-06, "loss": 1.4667, "mean_token_accuracy": 0.6387214660644531, "num_tokens": 2079129907.0, "step": 12401 }, { "entropy": 1.7471899092197418, "epoch": 1.3624179506193184, "grad_norm": 0.895235002040863, "learning_rate": 6.387696715806233e-06, "loss": 1.5033, "mean_token_accuracy": 0.6521243900060654, "num_tokens": 2079292376.0, "step": 12402 }, { "entropy": 1.702579249938329, "epoch": 1.3625278075306912, "grad_norm": 0.80966717004776, "learning_rate": 6.3863218990500835e-06, "loss": 1.424, "mean_token_accuracy": 0.6629576434691747, "num_tokens": 2079480359.0, "step": 12403 }, { "entropy": 1.6714877784252167, "epoch": 1.3626376644420641, "grad_norm": 0.7189324498176575, "learning_rate": 6.384947228314765e-06, "loss": 1.239, "mean_token_accuracy": 0.6766127347946167, "num_tokens": 2079633790.0, "step": 12404 }, { "entropy": 1.6770406166712444, "epoch": 1.362747521353437, "grad_norm": 0.5395998358726501, "learning_rate": 6.383572703643786e-06, "loss": 1.4526, "mean_token_accuracy": 0.6433726151784261, "num_tokens": 2079890149.0, "step": 12405 }, { "entropy": 1.6809902389844258, "epoch": 1.3628573782648101, "grad_norm": 0.6718010306358337, "learning_rate": 6.382198325080649e-06, "loss": 1.5027, "mean_token_accuracy": 0.636824240287145, "num_tokens": 2080094393.0, "step": 12406 }, { "entropy": 1.6974414388338726, "epoch": 1.362967235176183, "grad_norm": 0.6535385847091675, "learning_rate": 6.380824092668857e-06, "loss": 1.3319, "mean_token_accuracy": 0.6718876659870148, "num_tokens": 2080234231.0, "step": 12407 }, { "entropy": 1.701567719380061, "epoch": 1.363077092087556, "grad_norm": 0.5973226428031921, "learning_rate": 6.379450006451902e-06, "loss": 1.5466, "mean_token_accuracy": 0.6344873458147049, "num_tokens": 2080439315.0, "step": 12408 }, { "entropy": 1.6704054077466328, "epoch": 1.363186948998929, "grad_norm": 0.6961952447891235, "learning_rate": 6.378076066473269e-06, "loss": 1.4765, "mean_token_accuracy": 0.6445471247037252, "num_tokens": 2080636110.0, "step": 12409 }, { "entropy": 1.7136572698752086, "epoch": 1.363296805910302, "grad_norm": 0.8393598198890686, "learning_rate": 6.37670227277645e-06, "loss": 1.4695, "mean_token_accuracy": 0.6775861183802286, "num_tokens": 2080766358.0, "step": 12410 }, { "entropy": 1.6980823675791423, "epoch": 1.3634066628216748, "grad_norm": 0.7727437019348145, "learning_rate": 6.37532862540492e-06, "loss": 1.4852, "mean_token_accuracy": 0.6424992879231771, "num_tokens": 2080940544.0, "step": 12411 }, { "entropy": 1.7293777863184612, "epoch": 1.3635165197330477, "grad_norm": 0.7608976364135742, "learning_rate": 6.3739551244021515e-06, "loss": 1.4743, "mean_token_accuracy": 0.6436664660771688, "num_tokens": 2081098947.0, "step": 12412 }, { "entropy": 1.7463851571083069, "epoch": 1.3636263766444205, "grad_norm": 0.6330917477607727, "learning_rate": 6.372581769811621e-06, "loss": 1.3279, "mean_token_accuracy": 0.6707935730616251, "num_tokens": 2081249943.0, "step": 12413 }, { "entropy": 1.744843582312266, "epoch": 1.3637362335557937, "grad_norm": 0.8132315874099731, "learning_rate": 6.37120856167679e-06, "loss": 1.6554, "mean_token_accuracy": 0.635198379556338, "num_tokens": 2081409375.0, "step": 12414 }, { "entropy": 1.6739271680514018, "epoch": 1.3638460904671665, "grad_norm": 0.7317605018615723, "learning_rate": 6.369835500041126e-06, "loss": 1.3227, "mean_token_accuracy": 0.663427397608757, "num_tokens": 2081563302.0, "step": 12415 }, { "entropy": 1.6746556460857391, "epoch": 1.3639559473785394, "grad_norm": 0.7247527837753296, "learning_rate": 6.368462584948082e-06, "loss": 1.3398, "mean_token_accuracy": 0.6720200031995773, "num_tokens": 2081684223.0, "step": 12416 }, { "entropy": 1.6810146073500316, "epoch": 1.3640658042899123, "grad_norm": 0.7046148777008057, "learning_rate": 6.367089816441106e-06, "loss": 1.3198, "mean_token_accuracy": 0.6763695975144705, "num_tokens": 2081804676.0, "step": 12417 }, { "entropy": 1.7369506259759266, "epoch": 1.3641756612012852, "grad_norm": 0.679513156414032, "learning_rate": 6.36571719456365e-06, "loss": 1.4399, "mean_token_accuracy": 0.652540922164917, "num_tokens": 2081966586.0, "step": 12418 }, { "entropy": 1.7097415924072266, "epoch": 1.3642855181126583, "grad_norm": 0.6426488161087036, "learning_rate": 6.364344719359161e-06, "loss": 1.4498, "mean_token_accuracy": 0.6472660650809606, "num_tokens": 2082178544.0, "step": 12419 }, { "entropy": 1.7177700698375702, "epoch": 1.3643953750240312, "grad_norm": 0.6975800395011902, "learning_rate": 6.362972390871072e-06, "loss": 1.4556, "mean_token_accuracy": 0.6420897940794627, "num_tokens": 2082354325.0, "step": 12420 }, { "entropy": 1.6956178347269695, "epoch": 1.364505231935404, "grad_norm": 0.6548015475273132, "learning_rate": 6.361600209142813e-06, "loss": 1.2944, "mean_token_accuracy": 0.6646904796361923, "num_tokens": 2082470336.0, "step": 12421 }, { "entropy": 1.6302022536595662, "epoch": 1.3646150888467772, "grad_norm": 1.402173638343811, "learning_rate": 6.360228174217822e-06, "loss": 1.3346, "mean_token_accuracy": 0.6607320159673691, "num_tokens": 2082670245.0, "step": 12422 }, { "entropy": 1.7581724623839061, "epoch": 1.36472494575815, "grad_norm": 0.7247641682624817, "learning_rate": 6.358856286139517e-06, "loss": 1.3864, "mean_token_accuracy": 0.65654323498408, "num_tokens": 2082832020.0, "step": 12423 }, { "entropy": 1.7525269190470378, "epoch": 1.364834802669523, "grad_norm": 0.7007389664649963, "learning_rate": 6.3574845449513175e-06, "loss": 1.5072, "mean_token_accuracy": 0.6481544723113378, "num_tokens": 2082991463.0, "step": 12424 }, { "entropy": 1.693600445985794, "epoch": 1.3649446595808958, "grad_norm": 0.7632626891136169, "learning_rate": 6.356112950696642e-06, "loss": 1.3861, "mean_token_accuracy": 0.6590569615364075, "num_tokens": 2083136636.0, "step": 12425 }, { "entropy": 1.680637151002884, "epoch": 1.3650545164922687, "grad_norm": 0.6591139435768127, "learning_rate": 6.354741503418897e-06, "loss": 1.4859, "mean_token_accuracy": 0.6472095102071762, "num_tokens": 2083318799.0, "step": 12426 }, { "entropy": 1.7445678512255351, "epoch": 1.3651643734036418, "grad_norm": 0.8773960471153259, "learning_rate": 6.353370203161493e-06, "loss": 1.5806, "mean_token_accuracy": 0.6431012004613876, "num_tokens": 2083489179.0, "step": 12427 }, { "entropy": 1.6873707075913746, "epoch": 1.3652742303150147, "grad_norm": 0.6120126843452454, "learning_rate": 6.351999049967829e-06, "loss": 1.4179, "mean_token_accuracy": 0.6483838905890783, "num_tokens": 2083675182.0, "step": 12428 }, { "entropy": 1.7138080596923828, "epoch": 1.3653840872263876, "grad_norm": 0.7926364541053772, "learning_rate": 6.350628043881296e-06, "loss": 1.3874, "mean_token_accuracy": 0.6502122531334559, "num_tokens": 2083802000.0, "step": 12429 }, { "entropy": 1.6688072582085927, "epoch": 1.3654939441377605, "grad_norm": 0.6590139865875244, "learning_rate": 6.349257184945291e-06, "loss": 1.3461, "mean_token_accuracy": 0.6638127416372299, "num_tokens": 2083975602.0, "step": 12430 }, { "entropy": 1.6825914184252422, "epoch": 1.3656038010491334, "grad_norm": 0.6948336958885193, "learning_rate": 6.347886473203204e-06, "loss": 1.4358, "mean_token_accuracy": 0.6588354905446371, "num_tokens": 2084152778.0, "step": 12431 }, { "entropy": 1.6966508328914642, "epoch": 1.3657136579605065, "grad_norm": 0.6842400431632996, "learning_rate": 6.346515908698414e-06, "loss": 1.4629, "mean_token_accuracy": 0.6462537546952566, "num_tokens": 2084319697.0, "step": 12432 }, { "entropy": 1.7046682834625244, "epoch": 1.3658235148718794, "grad_norm": 0.6207724213600159, "learning_rate": 6.345145491474295e-06, "loss": 1.3735, "mean_token_accuracy": 0.6687126259009043, "num_tokens": 2084458870.0, "step": 12433 }, { "entropy": 1.6672801077365875, "epoch": 1.3659333717832522, "grad_norm": 0.7119844555854797, "learning_rate": 6.3437752215742264e-06, "loss": 1.4564, "mean_token_accuracy": 0.672510157028834, "num_tokens": 2084603570.0, "step": 12434 }, { "entropy": 1.6629460255304973, "epoch": 1.3660432286946254, "grad_norm": 0.7983689904212952, "learning_rate": 6.3424050990415745e-06, "loss": 1.4319, "mean_token_accuracy": 0.6575490633646647, "num_tokens": 2084752897.0, "step": 12435 }, { "entropy": 1.6838939388593037, "epoch": 1.3661530856059982, "grad_norm": 0.6658820509910583, "learning_rate": 6.341035123919699e-06, "loss": 1.6706, "mean_token_accuracy": 0.6166095087925593, "num_tokens": 2084981969.0, "step": 12436 }, { "entropy": 1.6812881429990132, "epoch": 1.3662629425173711, "grad_norm": 0.6590875387191772, "learning_rate": 6.339665296251966e-06, "loss": 1.4998, "mean_token_accuracy": 0.6573397864898046, "num_tokens": 2085171039.0, "step": 12437 }, { "entropy": 1.706734577814738, "epoch": 1.366372799428744, "grad_norm": 0.6647880673408508, "learning_rate": 6.338295616081722e-06, "loss": 1.4314, "mean_token_accuracy": 0.6501033653815588, "num_tokens": 2085335853.0, "step": 12438 }, { "entropy": 1.6170736749966939, "epoch": 1.366482656340117, "grad_norm": 0.7858694195747375, "learning_rate": 6.336926083452326e-06, "loss": 1.2863, "mean_token_accuracy": 0.6720435669024786, "num_tokens": 2085498251.0, "step": 12439 }, { "entropy": 1.6985772848129272, "epoch": 1.36659251325149, "grad_norm": 0.6605982184410095, "learning_rate": 6.335556698407117e-06, "loss": 1.3983, "mean_token_accuracy": 0.6527110983928045, "num_tokens": 2085671772.0, "step": 12440 }, { "entropy": 1.6974250773588817, "epoch": 1.366702370162863, "grad_norm": 0.7720880508422852, "learning_rate": 6.334187460989434e-06, "loss": 1.3258, "mean_token_accuracy": 0.6786187787850698, "num_tokens": 2085791125.0, "step": 12441 }, { "entropy": 1.7007473905881245, "epoch": 1.3668122270742358, "grad_norm": 0.6617766618728638, "learning_rate": 6.332818371242615e-06, "loss": 1.3439, "mean_token_accuracy": 0.6567811866601309, "num_tokens": 2085914564.0, "step": 12442 }, { "entropy": 1.6779978076616924, "epoch": 1.3669220839856089, "grad_norm": 0.6167494058609009, "learning_rate": 6.331449429209998e-06, "loss": 1.5439, "mean_token_accuracy": 0.6489314138889313, "num_tokens": 2086101783.0, "step": 12443 }, { "entropy": 1.6638370255629222, "epoch": 1.3670319408969815, "grad_norm": 0.6931138634681702, "learning_rate": 6.330080634934896e-06, "loss": 1.245, "mean_token_accuracy": 0.6787913938363394, "num_tokens": 2086270110.0, "step": 12444 }, { "entropy": 1.7775676747163136, "epoch": 1.3671417978083547, "grad_norm": 0.673394501209259, "learning_rate": 6.3287119884606385e-06, "loss": 1.3892, "mean_token_accuracy": 0.6486051231622696, "num_tokens": 2086442552.0, "step": 12445 }, { "entropy": 1.7234329481919606, "epoch": 1.3672516547197275, "grad_norm": 0.6274213194847107, "learning_rate": 6.327343489830544e-06, "loss": 1.409, "mean_token_accuracy": 0.6496559977531433, "num_tokens": 2086600998.0, "step": 12446 }, { "entropy": 1.6764400204022725, "epoch": 1.3673615116311004, "grad_norm": 0.5889531970024109, "learning_rate": 6.3259751390879235e-06, "loss": 1.4925, "mean_token_accuracy": 0.6441396176815033, "num_tokens": 2086809020.0, "step": 12447 }, { "entropy": 1.7784501016139984, "epoch": 1.3674713685424735, "grad_norm": 0.6351875066757202, "learning_rate": 6.324606936276081e-06, "loss": 1.3461, "mean_token_accuracy": 0.6528228173653284, "num_tokens": 2086958944.0, "step": 12448 }, { "entropy": 1.7382746239503224, "epoch": 1.3675812254538464, "grad_norm": 0.7826247811317444, "learning_rate": 6.323238881438322e-06, "loss": 1.2748, "mean_token_accuracy": 0.6688608477512995, "num_tokens": 2087071918.0, "step": 12449 }, { "entropy": 1.6849194665749867, "epoch": 1.3676910823652193, "grad_norm": 0.8119662404060364, "learning_rate": 6.321870974617945e-06, "loss": 1.4217, "mean_token_accuracy": 0.6616232146819433, "num_tokens": 2087246848.0, "step": 12450 }, { "entropy": 1.6627297898133595, "epoch": 1.3678009392765922, "grad_norm": 0.7588232755661011, "learning_rate": 6.320503215858247e-06, "loss": 1.3605, "mean_token_accuracy": 0.6651882280906042, "num_tokens": 2087390127.0, "step": 12451 }, { "entropy": 1.6607940097649891, "epoch": 1.367910796187965, "grad_norm": 0.6578659415245056, "learning_rate": 6.3191356052025125e-06, "loss": 1.3467, "mean_token_accuracy": 0.663345048824946, "num_tokens": 2087593548.0, "step": 12452 }, { "entropy": 1.7427105605602264, "epoch": 1.3680206530993382, "grad_norm": 0.6504858136177063, "learning_rate": 6.317768142694023e-06, "loss": 1.5592, "mean_token_accuracy": 0.6351048996051153, "num_tokens": 2087811467.0, "step": 12453 }, { "entropy": 1.7341377437114716, "epoch": 1.368130510010711, "grad_norm": 0.7811003923416138, "learning_rate": 6.316400828376067e-06, "loss": 1.3086, "mean_token_accuracy": 0.6740827312072118, "num_tokens": 2087971627.0, "step": 12454 }, { "entropy": 1.73456209897995, "epoch": 1.368240366922084, "grad_norm": 0.8186246156692505, "learning_rate": 6.315033662291913e-06, "loss": 1.3032, "mean_token_accuracy": 0.6766839226086935, "num_tokens": 2088104619.0, "step": 12455 }, { "entropy": 1.745054046312968, "epoch": 1.368350223833457, "grad_norm": 0.6494616270065308, "learning_rate": 6.31366664448483e-06, "loss": 1.4788, "mean_token_accuracy": 0.6535368661085764, "num_tokens": 2088264333.0, "step": 12456 }, { "entropy": 1.6955593327681224, "epoch": 1.3684600807448297, "grad_norm": 0.6850633025169373, "learning_rate": 6.312299774998088e-06, "loss": 1.3775, "mean_token_accuracy": 0.6563636163870493, "num_tokens": 2088428326.0, "step": 12457 }, { "entropy": 1.6658681730429332, "epoch": 1.3685699376562028, "grad_norm": 0.7219941020011902, "learning_rate": 6.310933053874944e-06, "loss": 1.3378, "mean_token_accuracy": 0.6682392458120981, "num_tokens": 2088585255.0, "step": 12458 }, { "entropy": 1.6764575441678364, "epoch": 1.3686797945675757, "grad_norm": 0.6834865212440491, "learning_rate": 6.309566481158657e-06, "loss": 1.4419, "mean_token_accuracy": 0.6662019590536753, "num_tokens": 2088742531.0, "step": 12459 }, { "entropy": 1.6608708600203197, "epoch": 1.3687896514789486, "grad_norm": 0.5805539488792419, "learning_rate": 6.30820005689248e-06, "loss": 1.3691, "mean_token_accuracy": 0.667238692442576, "num_tokens": 2088931283.0, "step": 12460 }, { "entropy": 1.6612081130345662, "epoch": 1.3688995083903217, "grad_norm": 0.6132814288139343, "learning_rate": 6.306833781119653e-06, "loss": 1.3991, "mean_token_accuracy": 0.6704057107369105, "num_tokens": 2089145116.0, "step": 12461 }, { "entropy": 1.656867245833079, "epoch": 1.3690093653016946, "grad_norm": 0.587921142578125, "learning_rate": 6.305467653883419e-06, "loss": 1.3241, "mean_token_accuracy": 0.6596115976572037, "num_tokens": 2089314770.0, "step": 12462 }, { "entropy": 1.6916143695513408, "epoch": 1.3691192222130675, "grad_norm": 0.6570534110069275, "learning_rate": 6.304101675227025e-06, "loss": 1.2452, "mean_token_accuracy": 0.6774038225412369, "num_tokens": 2089454204.0, "step": 12463 }, { "entropy": 1.655198593934377, "epoch": 1.3692290791244404, "grad_norm": 0.7079626321792603, "learning_rate": 6.3027358451936945e-06, "loss": 1.4054, "mean_token_accuracy": 0.6695781598488489, "num_tokens": 2089589977.0, "step": 12464 }, { "entropy": 1.7478280862172444, "epoch": 1.3693389360358132, "grad_norm": 0.8134309649467468, "learning_rate": 6.301370163826657e-06, "loss": 1.3172, "mean_token_accuracy": 0.6582539429267248, "num_tokens": 2089722453.0, "step": 12465 }, { "entropy": 1.7540696163972218, "epoch": 1.3694487929471864, "grad_norm": 0.8033037781715393, "learning_rate": 6.30000463116914e-06, "loss": 1.4302, "mean_token_accuracy": 0.6428679327170054, "num_tokens": 2089859787.0, "step": 12466 }, { "entropy": 1.7167806526025136, "epoch": 1.3695586498585592, "grad_norm": 0.9351958632469177, "learning_rate": 6.298639247264356e-06, "loss": 1.3128, "mean_token_accuracy": 0.6571259746948878, "num_tokens": 2090018785.0, "step": 12467 }, { "entropy": 1.6641875207424164, "epoch": 1.3696685067699321, "grad_norm": 0.6932764053344727, "learning_rate": 6.297274012155521e-06, "loss": 1.4692, "mean_token_accuracy": 0.6500126868486404, "num_tokens": 2090182955.0, "step": 12468 }, { "entropy": 1.6753845711549122, "epoch": 1.3697783636813052, "grad_norm": 0.6691131591796875, "learning_rate": 6.295908925885845e-06, "loss": 1.3916, "mean_token_accuracy": 0.6713538318872452, "num_tokens": 2090346994.0, "step": 12469 }, { "entropy": 1.715299944082896, "epoch": 1.3698882205926781, "grad_norm": 0.7135533690452576, "learning_rate": 6.294543988498529e-06, "loss": 1.2943, "mean_token_accuracy": 0.6677762617667516, "num_tokens": 2090522173.0, "step": 12470 }, { "entropy": 1.7170192102591197, "epoch": 1.369998077504051, "grad_norm": 0.60918790102005, "learning_rate": 6.293179200036781e-06, "loss": 1.4456, "mean_token_accuracy": 0.6431434949239095, "num_tokens": 2090703501.0, "step": 12471 }, { "entropy": 1.7095450460910797, "epoch": 1.370107934415424, "grad_norm": 0.8031734824180603, "learning_rate": 6.29181456054379e-06, "loss": 1.3702, "mean_token_accuracy": 0.653916930158933, "num_tokens": 2090852167.0, "step": 12472 }, { "entropy": 1.6358346939086914, "epoch": 1.3702177913267968, "grad_norm": 0.7461312413215637, "learning_rate": 6.290450070062741e-06, "loss": 1.3405, "mean_token_accuracy": 0.6676389326651891, "num_tokens": 2091034902.0, "step": 12473 }, { "entropy": 1.6393639147281647, "epoch": 1.3703276482381699, "grad_norm": 0.7326330542564392, "learning_rate": 6.289085728636827e-06, "loss": 1.4606, "mean_token_accuracy": 0.6608719974756241, "num_tokens": 2091197060.0, "step": 12474 }, { "entropy": 1.702725499868393, "epoch": 1.3704375051495428, "grad_norm": 0.7266789674758911, "learning_rate": 6.287721536309228e-06, "loss": 1.468, "mean_token_accuracy": 0.6516217837731043, "num_tokens": 2091354267.0, "step": 12475 }, { "entropy": 1.7397367060184479, "epoch": 1.3705473620609157, "grad_norm": 0.6959664821624756, "learning_rate": 6.286357493123121e-06, "loss": 1.3982, "mean_token_accuracy": 0.644005666176478, "num_tokens": 2091481641.0, "step": 12476 }, { "entropy": 1.6641556123892467, "epoch": 1.3706572189722885, "grad_norm": 0.5910245776176453, "learning_rate": 6.284993599121671e-06, "loss": 1.4441, "mean_token_accuracy": 0.6538594514131546, "num_tokens": 2091691502.0, "step": 12477 }, { "entropy": 1.6857453385988872, "epoch": 1.3707670758836614, "grad_norm": 0.7743722796440125, "learning_rate": 6.283629854348053e-06, "loss": 1.5228, "mean_token_accuracy": 0.6551050196091334, "num_tokens": 2091850943.0, "step": 12478 }, { "entropy": 1.7026999294757843, "epoch": 1.3708769327950345, "grad_norm": 0.8539242744445801, "learning_rate": 6.2822662588454255e-06, "loss": 1.2969, "mean_token_accuracy": 0.6824150482813517, "num_tokens": 2091973000.0, "step": 12479 }, { "entropy": 1.650414725144704, "epoch": 1.3709867897064074, "grad_norm": 0.6949166059494019, "learning_rate": 6.280902812656941e-06, "loss": 1.3121, "mean_token_accuracy": 0.668070966998736, "num_tokens": 2092112881.0, "step": 12480 }, { "entropy": 1.7248587508996327, "epoch": 1.3710966466177803, "grad_norm": 0.843644917011261, "learning_rate": 6.279539515825759e-06, "loss": 1.5992, "mean_token_accuracy": 0.6491927405198415, "num_tokens": 2092288051.0, "step": 12481 }, { "entropy": 1.6901381611824036, "epoch": 1.3712065035291534, "grad_norm": 0.6566561460494995, "learning_rate": 6.2781763683950216e-06, "loss": 1.4618, "mean_token_accuracy": 0.6334747324387232, "num_tokens": 2092497839.0, "step": 12482 }, { "entropy": 1.6996184488137562, "epoch": 1.3713163604405263, "grad_norm": 0.6987338066101074, "learning_rate": 6.276813370407876e-06, "loss": 1.476, "mean_token_accuracy": 0.6629880964756012, "num_tokens": 2092636759.0, "step": 12483 }, { "entropy": 1.750564714272817, "epoch": 1.3714262173518992, "grad_norm": 0.8554201722145081, "learning_rate": 6.27545052190746e-06, "loss": 1.4742, "mean_token_accuracy": 0.6432334631681442, "num_tokens": 2092770538.0, "step": 12484 }, { "entropy": 1.6747083564599354, "epoch": 1.371536074263272, "grad_norm": 0.6590608358383179, "learning_rate": 6.274087822936904e-06, "loss": 1.3891, "mean_token_accuracy": 0.6730438470840454, "num_tokens": 2092951129.0, "step": 12485 }, { "entropy": 1.6601560016473134, "epoch": 1.371645931174645, "grad_norm": 0.7627508640289307, "learning_rate": 6.272725273539337e-06, "loss": 1.3777, "mean_token_accuracy": 0.6717945039272308, "num_tokens": 2093112437.0, "step": 12486 }, { "entropy": 1.6760172843933105, "epoch": 1.371755788086018, "grad_norm": 0.6052493453025818, "learning_rate": 6.271362873757889e-06, "loss": 1.3659, "mean_token_accuracy": 0.6492635011672974, "num_tokens": 2093284948.0, "step": 12487 }, { "entropy": 1.7177870472272236, "epoch": 1.371865644997391, "grad_norm": 0.6940590739250183, "learning_rate": 6.270000623635675e-06, "loss": 1.3116, "mean_token_accuracy": 0.6777070065339407, "num_tokens": 2093413636.0, "step": 12488 }, { "entropy": 1.7334169745445251, "epoch": 1.3719755019087638, "grad_norm": 0.7038564682006836, "learning_rate": 6.268638523215807e-06, "loss": 1.5389, "mean_token_accuracy": 0.6339151461919149, "num_tokens": 2093610103.0, "step": 12489 }, { "entropy": 1.7048886219660442, "epoch": 1.3720853588201367, "grad_norm": 0.7182838916778564, "learning_rate": 6.267276572541401e-06, "loss": 1.4323, "mean_token_accuracy": 0.6515081773201624, "num_tokens": 2093782215.0, "step": 12490 }, { "entropy": 1.6638799210389454, "epoch": 1.3721952157315096, "grad_norm": 0.6247413754463196, "learning_rate": 6.265914771655559e-06, "loss": 1.4979, "mean_token_accuracy": 0.649769072731336, "num_tokens": 2094006255.0, "step": 12491 }, { "entropy": 1.684423953294754, "epoch": 1.3723050726428827, "grad_norm": 0.6764265894889832, "learning_rate": 6.264553120601378e-06, "loss": 1.44, "mean_token_accuracy": 0.6405173540115356, "num_tokens": 2094154650.0, "step": 12492 }, { "entropy": 1.707674354314804, "epoch": 1.3724149295542556, "grad_norm": 0.6965126395225525, "learning_rate": 6.26319161942196e-06, "loss": 1.321, "mean_token_accuracy": 0.6638032595316569, "num_tokens": 2094304169.0, "step": 12493 }, { "entropy": 1.7017245789368947, "epoch": 1.3725247864656285, "grad_norm": 0.687353253364563, "learning_rate": 6.261830268160388e-06, "loss": 1.2905, "mean_token_accuracy": 0.6623808195193609, "num_tokens": 2094463463.0, "step": 12494 }, { "entropy": 1.7572371661663055, "epoch": 1.3726346433770016, "grad_norm": 0.6422104239463806, "learning_rate": 6.260469066859758e-06, "loss": 1.5449, "mean_token_accuracy": 0.6423913687467575, "num_tokens": 2094645342.0, "step": 12495 }, { "entropy": 1.7151329219341278, "epoch": 1.3727445002883745, "grad_norm": 0.7359181046485901, "learning_rate": 6.259108015563146e-06, "loss": 1.4197, "mean_token_accuracy": 0.6493860632181168, "num_tokens": 2094828348.0, "step": 12496 }, { "entropy": 1.71349502603213, "epoch": 1.3728543571997474, "grad_norm": 0.6822761297225952, "learning_rate": 6.257747114313626e-06, "loss": 1.4804, "mean_token_accuracy": 0.6557674954334894, "num_tokens": 2095006520.0, "step": 12497 }, { "entropy": 1.688800722360611, "epoch": 1.3729642141111202, "grad_norm": 0.5929533839225769, "learning_rate": 6.256386363154272e-06, "loss": 1.3362, "mean_token_accuracy": 0.6658134708801905, "num_tokens": 2095165179.0, "step": 12498 }, { "entropy": 1.7582799593607585, "epoch": 1.3730740710224931, "grad_norm": 0.6980983018875122, "learning_rate": 6.255025762128156e-06, "loss": 1.5299, "mean_token_accuracy": 0.6501601040363312, "num_tokens": 2095338814.0, "step": 12499 }, { "entropy": 1.6549382110436757, "epoch": 1.3731839279338662, "grad_norm": 0.6574755907058716, "learning_rate": 6.253665311278337e-06, "loss": 1.4851, "mean_token_accuracy": 0.6519963542620341, "num_tokens": 2095554250.0, "step": 12500 }, { "entropy": 1.7362759113311768, "epoch": 1.3732937848452391, "grad_norm": 0.6475224494934082, "learning_rate": 6.252305010647868e-06, "loss": 1.3406, "mean_token_accuracy": 0.6704058945178986, "num_tokens": 2095687229.0, "step": 12501 }, { "entropy": 1.683371404806773, "epoch": 1.373403641756612, "grad_norm": 0.6420804858207703, "learning_rate": 6.250944860279809e-06, "loss": 1.344, "mean_token_accuracy": 0.6628765761852264, "num_tokens": 2095919210.0, "step": 12502 }, { "entropy": 1.7301292022069295, "epoch": 1.3735134986679849, "grad_norm": 0.8406617641448975, "learning_rate": 6.249584860217206e-06, "loss": 1.4015, "mean_token_accuracy": 0.6547163327534994, "num_tokens": 2096052146.0, "step": 12503 }, { "entropy": 1.6722088654836018, "epoch": 1.3736233555793578, "grad_norm": 0.7558131814002991, "learning_rate": 6.248225010503098e-06, "loss": 1.4738, "mean_token_accuracy": 0.6520429998636246, "num_tokens": 2096209673.0, "step": 12504 }, { "entropy": 1.684027413527171, "epoch": 1.3737332124907309, "grad_norm": 0.8138048648834229, "learning_rate": 6.246865311180532e-06, "loss": 1.5581, "mean_token_accuracy": 0.6619268457094828, "num_tokens": 2096376074.0, "step": 12505 }, { "entropy": 1.666286826133728, "epoch": 1.3738430694021038, "grad_norm": 0.6682919859886169, "learning_rate": 6.245505762292532e-06, "loss": 1.3127, "mean_token_accuracy": 0.6639792720476786, "num_tokens": 2096564264.0, "step": 12506 }, { "entropy": 1.760203758875529, "epoch": 1.3739529263134767, "grad_norm": 0.7202064990997314, "learning_rate": 6.2441463638821355e-06, "loss": 1.5849, "mean_token_accuracy": 0.6438749060034752, "num_tokens": 2096724407.0, "step": 12507 }, { "entropy": 1.758449226617813, "epoch": 1.3740627832248498, "grad_norm": 0.6769862771034241, "learning_rate": 6.242787115992364e-06, "loss": 1.5019, "mean_token_accuracy": 0.6401093502839407, "num_tokens": 2096904566.0, "step": 12508 }, { "entropy": 1.6738151510556538, "epoch": 1.3741726401362226, "grad_norm": 0.5517680644989014, "learning_rate": 6.241428018666234e-06, "loss": 1.3892, "mean_token_accuracy": 0.655499001344045, "num_tokens": 2097086758.0, "step": 12509 }, { "entropy": 1.714376340309779, "epoch": 1.3742824970475955, "grad_norm": 0.6948719620704651, "learning_rate": 6.240069071946762e-06, "loss": 1.4616, "mean_token_accuracy": 0.6460278133551279, "num_tokens": 2097254724.0, "step": 12510 }, { "entropy": 1.7263469596703847, "epoch": 1.3743923539589684, "grad_norm": 0.6415128707885742, "learning_rate": 6.238710275876962e-06, "loss": 1.3862, "mean_token_accuracy": 0.6575345396995544, "num_tokens": 2097417417.0, "step": 12511 }, { "entropy": 1.698015163342158, "epoch": 1.3745022108703413, "grad_norm": 0.6114696264266968, "learning_rate": 6.237351630499837e-06, "loss": 1.4891, "mean_token_accuracy": 0.6359892090161642, "num_tokens": 2097646264.0, "step": 12512 }, { "entropy": 1.764273762702942, "epoch": 1.3746120677817144, "grad_norm": 0.9009761810302734, "learning_rate": 6.235993135858387e-06, "loss": 1.4376, "mean_token_accuracy": 0.6558701246976852, "num_tokens": 2097847731.0, "step": 12513 }, { "entropy": 1.7166384359200795, "epoch": 1.3747219246930873, "grad_norm": 0.7256246209144592, "learning_rate": 6.234634791995603e-06, "loss": 1.2758, "mean_token_accuracy": 0.6786874433358511, "num_tokens": 2097976363.0, "step": 12514 }, { "entropy": 1.7404154340426128, "epoch": 1.3748317816044602, "grad_norm": 0.7519568800926208, "learning_rate": 6.233276598954485e-06, "loss": 1.4151, "mean_token_accuracy": 0.6605635484059652, "num_tokens": 2098120603.0, "step": 12515 }, { "entropy": 1.6740521490573883, "epoch": 1.374941638515833, "grad_norm": 0.7059155106544495, "learning_rate": 6.231918556778014e-06, "loss": 1.3228, "mean_token_accuracy": 0.6625839471817017, "num_tokens": 2098259613.0, "step": 12516 }, { "entropy": 1.7056198716163635, "epoch": 1.375051495427206, "grad_norm": 0.6181541085243225, "learning_rate": 6.2305606655091685e-06, "loss": 1.347, "mean_token_accuracy": 0.6637411365906397, "num_tokens": 2098407106.0, "step": 12517 }, { "entropy": 1.7412952582041423, "epoch": 1.375161352338579, "grad_norm": 0.645773708820343, "learning_rate": 6.229202925190931e-06, "loss": 1.5052, "mean_token_accuracy": 0.6412904262542725, "num_tokens": 2098599802.0, "step": 12518 }, { "entropy": 1.7033185958862305, "epoch": 1.375271209249952, "grad_norm": 0.7069703936576843, "learning_rate": 6.227845335866271e-06, "loss": 1.3859, "mean_token_accuracy": 0.6646387676397959, "num_tokens": 2098760048.0, "step": 12519 }, { "entropy": 1.7417400777339935, "epoch": 1.3753810661613248, "grad_norm": 0.8793759346008301, "learning_rate": 6.226487897578159e-06, "loss": 1.3239, "mean_token_accuracy": 0.665963664650917, "num_tokens": 2098930969.0, "step": 12520 }, { "entropy": 1.6477711200714111, "epoch": 1.375490923072698, "grad_norm": 0.8466112017631531, "learning_rate": 6.22513061036955e-06, "loss": 1.3336, "mean_token_accuracy": 0.6694683879613876, "num_tokens": 2099084185.0, "step": 12521 }, { "entropy": 1.7149950762589772, "epoch": 1.3756007799840708, "grad_norm": 0.8220770359039307, "learning_rate": 6.223773474283408e-06, "loss": 1.387, "mean_token_accuracy": 0.6811329424381256, "num_tokens": 2099253890.0, "step": 12522 }, { "entropy": 1.6823686361312866, "epoch": 1.3757106368954437, "grad_norm": 0.6571083664894104, "learning_rate": 6.222416489362683e-06, "loss": 1.2217, "mean_token_accuracy": 0.6786747376124064, "num_tokens": 2099388467.0, "step": 12523 }, { "entropy": 1.6723161041736603, "epoch": 1.3758204938068166, "grad_norm": 0.6704456806182861, "learning_rate": 6.221059655650321e-06, "loss": 1.369, "mean_token_accuracy": 0.6669376641511917, "num_tokens": 2099559332.0, "step": 12524 }, { "entropy": 1.7238895495732625, "epoch": 1.3759303507181895, "grad_norm": 0.846031904220581, "learning_rate": 6.21970297318927e-06, "loss": 1.5058, "mean_token_accuracy": 0.6613429884115855, "num_tokens": 2099730332.0, "step": 12525 }, { "entropy": 1.6958635946114857, "epoch": 1.3760402076295626, "grad_norm": 0.6999905705451965, "learning_rate": 6.218346442022462e-06, "loss": 1.2999, "mean_token_accuracy": 0.6632877240578333, "num_tokens": 2099900875.0, "step": 12526 }, { "entropy": 1.6386962433656056, "epoch": 1.3761500645409355, "grad_norm": 0.5973513126373291, "learning_rate": 6.2169900621928394e-06, "loss": 1.4151, "mean_token_accuracy": 0.6484298954407374, "num_tokens": 2100076851.0, "step": 12527 }, { "entropy": 1.6838893989721935, "epoch": 1.3762599214523084, "grad_norm": 0.7201468348503113, "learning_rate": 6.215633833743325e-06, "loss": 1.2795, "mean_token_accuracy": 0.6828643282254537, "num_tokens": 2100209931.0, "step": 12528 }, { "entropy": 1.653613954782486, "epoch": 1.3763697783636812, "grad_norm": 0.6516929268836975, "learning_rate": 6.214277756716841e-06, "loss": 1.3982, "mean_token_accuracy": 0.6462257554133733, "num_tokens": 2100421383.0, "step": 12529 }, { "entropy": 1.5921331147352855, "epoch": 1.3764796352750541, "grad_norm": 0.6113102436065674, "learning_rate": 6.212921831156309e-06, "loss": 1.3048, "mean_token_accuracy": 0.6834805657466253, "num_tokens": 2100566416.0, "step": 12530 }, { "entropy": 1.6861707468827565, "epoch": 1.3765894921864272, "grad_norm": 0.744138240814209, "learning_rate": 6.2115660571046475e-06, "loss": 1.4114, "mean_token_accuracy": 0.6627868016560873, "num_tokens": 2100707648.0, "step": 12531 }, { "entropy": 1.7084755897521973, "epoch": 1.3766993490978001, "grad_norm": 0.613015353679657, "learning_rate": 6.2102104346047635e-06, "loss": 1.4212, "mean_token_accuracy": 0.659169336160024, "num_tokens": 2100885511.0, "step": 12532 }, { "entropy": 1.7335072060426076, "epoch": 1.376809206009173, "grad_norm": 0.6884053945541382, "learning_rate": 6.208854963699555e-06, "loss": 1.3903, "mean_token_accuracy": 0.6577414770921072, "num_tokens": 2101034010.0, "step": 12533 }, { "entropy": 1.6870386103789012, "epoch": 1.376919062920546, "grad_norm": 0.6219229102134705, "learning_rate": 6.207499644431935e-06, "loss": 1.3897, "mean_token_accuracy": 0.6502025226751963, "num_tokens": 2101186648.0, "step": 12534 }, { "entropy": 1.7138726909955342, "epoch": 1.377028919831919, "grad_norm": 0.5587577819824219, "learning_rate": 6.206144476844789e-06, "loss": 1.4165, "mean_token_accuracy": 0.6410103340943655, "num_tokens": 2101425465.0, "step": 12535 }, { "entropy": 1.7444292902946472, "epoch": 1.3771387767432919, "grad_norm": 0.6542041301727295, "learning_rate": 6.204789460981008e-06, "loss": 1.5301, "mean_token_accuracy": 0.6419963190952936, "num_tokens": 2101626304.0, "step": 12536 }, { "entropy": 1.6845300594965618, "epoch": 1.3772486336546648, "grad_norm": 0.7498136758804321, "learning_rate": 6.203434596883482e-06, "loss": 1.3899, "mean_token_accuracy": 0.6670129199822744, "num_tokens": 2101765786.0, "step": 12537 }, { "entropy": 1.8169357279936473, "epoch": 1.3773584905660377, "grad_norm": 0.7087510824203491, "learning_rate": 6.202079884595088e-06, "loss": 1.3182, "mean_token_accuracy": 0.6673329919576645, "num_tokens": 2101886833.0, "step": 12538 }, { "entropy": 1.7348575592041016, "epoch": 1.3774683474774108, "grad_norm": 0.7547774910926819, "learning_rate": 6.200725324158705e-06, "loss": 1.3981, "mean_token_accuracy": 0.6532778888940811, "num_tokens": 2102006067.0, "step": 12539 }, { "entropy": 1.695908526579539, "epoch": 1.3775782043887836, "grad_norm": 0.7535329461097717, "learning_rate": 6.199370915617204e-06, "loss": 1.4789, "mean_token_accuracy": 0.665493423740069, "num_tokens": 2102171012.0, "step": 12540 }, { "entropy": 1.715345323085785, "epoch": 1.3776880613001565, "grad_norm": 0.6196267604827881, "learning_rate": 6.198016659013447e-06, "loss": 1.3696, "mean_token_accuracy": 0.6550077845652899, "num_tokens": 2102338771.0, "step": 12541 }, { "entropy": 1.689347783724467, "epoch": 1.3777979182115294, "grad_norm": 0.7949912548065186, "learning_rate": 6.196662554390298e-06, "loss": 1.2438, "mean_token_accuracy": 0.6799779733022054, "num_tokens": 2102449380.0, "step": 12542 }, { "entropy": 1.717601974805196, "epoch": 1.3779077751229023, "grad_norm": 0.682672381401062, "learning_rate": 6.19530860179062e-06, "loss": 1.303, "mean_token_accuracy": 0.6659560054540634, "num_tokens": 2102580923.0, "step": 12543 }, { "entropy": 1.6912067731221516, "epoch": 1.3780176320342754, "grad_norm": 0.6183836460113525, "learning_rate": 6.1939548012572585e-06, "loss": 1.5116, "mean_token_accuracy": 0.6377200831969579, "num_tokens": 2102835634.0, "step": 12544 }, { "entropy": 1.7292577226956685, "epoch": 1.3781274889456483, "grad_norm": 0.7108417749404907, "learning_rate": 6.1926011528330575e-06, "loss": 1.4302, "mean_token_accuracy": 0.6581776638825735, "num_tokens": 2102968731.0, "step": 12545 }, { "entropy": 1.663769433895747, "epoch": 1.3782373458570212, "grad_norm": 0.5661031603813171, "learning_rate": 6.191247656560868e-06, "loss": 1.3358, "mean_token_accuracy": 0.6701933294534683, "num_tokens": 2103135710.0, "step": 12546 }, { "entropy": 1.7361170947551727, "epoch": 1.3783472027683943, "grad_norm": 0.7165773510932922, "learning_rate": 6.189894312483524e-06, "loss": 1.4254, "mean_token_accuracy": 0.6522279679775238, "num_tokens": 2103314119.0, "step": 12547 }, { "entropy": 1.7245140473047893, "epoch": 1.3784570596797672, "grad_norm": 0.6944742202758789, "learning_rate": 6.188541120643854e-06, "loss": 1.2226, "mean_token_accuracy": 0.6783800820509592, "num_tokens": 2103440751.0, "step": 12548 }, { "entropy": 1.7549363176027934, "epoch": 1.37856691659114, "grad_norm": 0.7486315369606018, "learning_rate": 6.1871880810846915e-06, "loss": 1.3902, "mean_token_accuracy": 0.6532481958468755, "num_tokens": 2103581713.0, "step": 12549 }, { "entropy": 1.616852581501007, "epoch": 1.378676773502513, "grad_norm": 0.8074557781219482, "learning_rate": 6.185835193848856e-06, "loss": 1.2921, "mean_token_accuracy": 0.6893499394257864, "num_tokens": 2103724805.0, "step": 12550 }, { "entropy": 1.7497636179129283, "epoch": 1.3787866304138858, "grad_norm": 0.8509400486946106, "learning_rate": 6.184482458979169e-06, "loss": 1.4539, "mean_token_accuracy": 0.6395279069741567, "num_tokens": 2103893055.0, "step": 12551 }, { "entropy": 1.6811266740163167, "epoch": 1.378896487325259, "grad_norm": 0.7023653388023376, "learning_rate": 6.183129876518443e-06, "loss": 1.3276, "mean_token_accuracy": 0.666248674194018, "num_tokens": 2104081495.0, "step": 12552 }, { "entropy": 1.7035264372825623, "epoch": 1.3790063442366318, "grad_norm": 0.5721656084060669, "learning_rate": 6.181777446509482e-06, "loss": 1.4066, "mean_token_accuracy": 0.6570123036702474, "num_tokens": 2104292769.0, "step": 12553 }, { "entropy": 1.6963448226451874, "epoch": 1.3791162011480047, "grad_norm": 0.6713624596595764, "learning_rate": 6.180425168995094e-06, "loss": 1.4223, "mean_token_accuracy": 0.6561168730258942, "num_tokens": 2104442926.0, "step": 12554 }, { "entropy": 1.6694329380989075, "epoch": 1.3792260580593776, "grad_norm": 0.6325618624687195, "learning_rate": 6.179073044018082e-06, "loss": 1.4122, "mean_token_accuracy": 0.6522913922866186, "num_tokens": 2104676130.0, "step": 12555 }, { "entropy": 1.7309764126936595, "epoch": 1.3793359149707505, "grad_norm": 0.7151079773902893, "learning_rate": 6.177721071621234e-06, "loss": 1.3119, "mean_token_accuracy": 0.6660696119070053, "num_tokens": 2104807634.0, "step": 12556 }, { "entropy": 1.6221238374710083, "epoch": 1.3794457718821236, "grad_norm": 0.7269033193588257, "learning_rate": 6.176369251847341e-06, "loss": 1.343, "mean_token_accuracy": 0.6632204552491506, "num_tokens": 2104975691.0, "step": 12557 }, { "entropy": 1.7174046039581299, "epoch": 1.3795556287934965, "grad_norm": 0.7088026404380798, "learning_rate": 6.175017584739187e-06, "loss": 1.2995, "mean_token_accuracy": 0.6678259124358495, "num_tokens": 2105128915.0, "step": 12558 }, { "entropy": 1.7887861529986064, "epoch": 1.3796654857048694, "grad_norm": 0.696653425693512, "learning_rate": 6.173666070339554e-06, "loss": 1.4658, "mean_token_accuracy": 0.6444426278273264, "num_tokens": 2105294149.0, "step": 12559 }, { "entropy": 1.7095742324988048, "epoch": 1.3797753426162425, "grad_norm": 0.6750052571296692, "learning_rate": 6.172314708691212e-06, "loss": 1.3382, "mean_token_accuracy": 0.6658424387375513, "num_tokens": 2105438713.0, "step": 12560 }, { "entropy": 1.694052904844284, "epoch": 1.3798851995276153, "grad_norm": 0.5075100064277649, "learning_rate": 6.170963499836937e-06, "loss": 1.3667, "mean_token_accuracy": 0.6514973640441895, "num_tokens": 2105612254.0, "step": 12561 }, { "entropy": 1.6640310784180958, "epoch": 1.3799950564389882, "grad_norm": 0.5934505462646484, "learning_rate": 6.169612443819488e-06, "loss": 1.3747, "mean_token_accuracy": 0.6665078550577164, "num_tokens": 2105797489.0, "step": 12562 }, { "entropy": 1.6789606213569641, "epoch": 1.3801049133503611, "grad_norm": 0.6959724426269531, "learning_rate": 6.1682615406816325e-06, "loss": 1.2852, "mean_token_accuracy": 0.6706260542074839, "num_tokens": 2105957291.0, "step": 12563 }, { "entropy": 1.7314012149969737, "epoch": 1.380214770261734, "grad_norm": 0.7916790246963501, "learning_rate": 6.166910790466121e-06, "loss": 1.3361, "mean_token_accuracy": 0.6655499537785848, "num_tokens": 2106101795.0, "step": 12564 }, { "entropy": 1.6863299409548442, "epoch": 1.380324627173107, "grad_norm": 0.8303491473197937, "learning_rate": 6.165560193215702e-06, "loss": 1.2178, "mean_token_accuracy": 0.6846508930126826, "num_tokens": 2106228777.0, "step": 12565 }, { "entropy": 1.6824834048748016, "epoch": 1.38043448408448, "grad_norm": 0.6064619421958923, "learning_rate": 6.164209748973124e-06, "loss": 1.4575, "mean_token_accuracy": 0.6451242392261823, "num_tokens": 2106442223.0, "step": 12566 }, { "entropy": 1.6857933203379314, "epoch": 1.3805443409958529, "grad_norm": 0.6482805609703064, "learning_rate": 6.162859457781132e-06, "loss": 1.4251, "mean_token_accuracy": 0.6491700212160746, "num_tokens": 2106642621.0, "step": 12567 }, { "entropy": 1.746164898077647, "epoch": 1.3806541979072258, "grad_norm": 0.7709128260612488, "learning_rate": 6.161509319682459e-06, "loss": 1.3878, "mean_token_accuracy": 0.6586853563785553, "num_tokens": 2106813233.0, "step": 12568 }, { "entropy": 1.7627079784870148, "epoch": 1.3807640548185987, "grad_norm": 1.2081003189086914, "learning_rate": 6.160159334719833e-06, "loss": 1.4917, "mean_token_accuracy": 0.6570613433917364, "num_tokens": 2106996455.0, "step": 12569 }, { "entropy": 1.6405859887599945, "epoch": 1.3808739117299718, "grad_norm": 0.703881561756134, "learning_rate": 6.158809502935985e-06, "loss": 1.2482, "mean_token_accuracy": 0.6670927107334137, "num_tokens": 2107209001.0, "step": 12570 }, { "entropy": 1.6598562101523082, "epoch": 1.3809837686413446, "grad_norm": 0.7052212953567505, "learning_rate": 6.1574598243736346e-06, "loss": 1.3986, "mean_token_accuracy": 0.662861779332161, "num_tokens": 2107392456.0, "step": 12571 }, { "entropy": 1.7908701996008556, "epoch": 1.3810936255527175, "grad_norm": 0.6938340663909912, "learning_rate": 6.156110299075501e-06, "loss": 1.3683, "mean_token_accuracy": 0.6564254115025202, "num_tokens": 2107530671.0, "step": 12572 }, { "entropy": 1.7378122210502625, "epoch": 1.3812034824640906, "grad_norm": 0.8076921701431274, "learning_rate": 6.154760927084289e-06, "loss": 1.5943, "mean_token_accuracy": 0.6301184669137001, "num_tokens": 2107734369.0, "step": 12573 }, { "entropy": 1.6900312105814617, "epoch": 1.3813133393754635, "grad_norm": 0.7580331563949585, "learning_rate": 6.153411708442709e-06, "loss": 1.4061, "mean_token_accuracy": 0.6511543840169907, "num_tokens": 2107933013.0, "step": 12574 }, { "entropy": 1.7424448728561401, "epoch": 1.3814231962868364, "grad_norm": 0.8853403329849243, "learning_rate": 6.152062643193469e-06, "loss": 1.395, "mean_token_accuracy": 0.6606808751821518, "num_tokens": 2108052983.0, "step": 12575 }, { "entropy": 1.6669080555438995, "epoch": 1.3815330531982093, "grad_norm": 0.7201902866363525, "learning_rate": 6.150713731379262e-06, "loss": 1.3757, "mean_token_accuracy": 0.6702162722746531, "num_tokens": 2108200984.0, "step": 12576 }, { "entropy": 1.6895977854728699, "epoch": 1.3816429101095822, "grad_norm": 0.6631450653076172, "learning_rate": 6.1493649730427775e-06, "loss": 1.4566, "mean_token_accuracy": 0.6551151325305303, "num_tokens": 2108383996.0, "step": 12577 }, { "entropy": 1.8024785220623016, "epoch": 1.3817527670209553, "grad_norm": 0.758419394493103, "learning_rate": 6.148016368226708e-06, "loss": 1.4561, "mean_token_accuracy": 0.652512788772583, "num_tokens": 2108632948.0, "step": 12578 }, { "entropy": 1.6716083586215973, "epoch": 1.3818626239323282, "grad_norm": 0.6724756360054016, "learning_rate": 6.1466679169737305e-06, "loss": 1.2769, "mean_token_accuracy": 0.6760951578617096, "num_tokens": 2108779464.0, "step": 12579 }, { "entropy": 1.7235155701637268, "epoch": 1.381972480843701, "grad_norm": 0.6784364581108093, "learning_rate": 6.145319619326531e-06, "loss": 1.4671, "mean_token_accuracy": 0.6498638937870661, "num_tokens": 2108948254.0, "step": 12580 }, { "entropy": 1.763847251733144, "epoch": 1.382082337755074, "grad_norm": 0.8038673400878906, "learning_rate": 6.143971475327777e-06, "loss": 1.5473, "mean_token_accuracy": 0.6444573253393173, "num_tokens": 2109131964.0, "step": 12581 }, { "entropy": 1.7665770848592122, "epoch": 1.3821921946664468, "grad_norm": 0.6910974979400635, "learning_rate": 6.142623485020135e-06, "loss": 1.4179, "mean_token_accuracy": 0.649805506070455, "num_tokens": 2109315655.0, "step": 12582 }, { "entropy": 1.6560252110163372, "epoch": 1.38230205157782, "grad_norm": 0.6830186247825623, "learning_rate": 6.141275648446274e-06, "loss": 1.3571, "mean_token_accuracy": 0.6550218462944031, "num_tokens": 2109499004.0, "step": 12583 }, { "entropy": 1.7072090804576874, "epoch": 1.3824119084891928, "grad_norm": 0.7160013318061829, "learning_rate": 6.139927965648848e-06, "loss": 1.3447, "mean_token_accuracy": 0.6548336744308472, "num_tokens": 2109659932.0, "step": 12584 }, { "entropy": 1.686892330646515, "epoch": 1.3825217654005657, "grad_norm": 0.6150763034820557, "learning_rate": 6.138580436670512e-06, "loss": 1.4549, "mean_token_accuracy": 0.635261004169782, "num_tokens": 2109900953.0, "step": 12585 }, { "entropy": 1.75520854194959, "epoch": 1.3826316223119388, "grad_norm": 0.6944630146026611, "learning_rate": 6.137233061553914e-06, "loss": 1.5959, "mean_token_accuracy": 0.6286770751078924, "num_tokens": 2110108815.0, "step": 12586 }, { "entropy": 1.720687488714854, "epoch": 1.3827414792233117, "grad_norm": 0.6678749322891235, "learning_rate": 6.1358858403416985e-06, "loss": 1.3596, "mean_token_accuracy": 0.6810717135667801, "num_tokens": 2110296447.0, "step": 12587 }, { "entropy": 1.7100276947021484, "epoch": 1.3828513361346846, "grad_norm": 0.6869692206382751, "learning_rate": 6.134538773076506e-06, "loss": 1.3972, "mean_token_accuracy": 0.6752725392580032, "num_tokens": 2110501100.0, "step": 12588 }, { "entropy": 1.7093996107578278, "epoch": 1.3829611930460575, "grad_norm": 0.7765591144561768, "learning_rate": 6.1331918598009664e-06, "loss": 1.2499, "mean_token_accuracy": 0.6684116174777349, "num_tokens": 2110646493.0, "step": 12589 }, { "entropy": 1.7051993509133656, "epoch": 1.3830710499574304, "grad_norm": 0.7620055079460144, "learning_rate": 6.131845100557713e-06, "loss": 1.3419, "mean_token_accuracy": 0.6666328310966492, "num_tokens": 2110816819.0, "step": 12590 }, { "entropy": 1.7090232570966084, "epoch": 1.3831809068688035, "grad_norm": 0.6869613528251648, "learning_rate": 6.130498495389365e-06, "loss": 1.4943, "mean_token_accuracy": 0.6450514495372772, "num_tokens": 2110994559.0, "step": 12591 }, { "entropy": 1.6619928280512493, "epoch": 1.3832907637801763, "grad_norm": 0.6001034379005432, "learning_rate": 6.129152044338551e-06, "loss": 1.2886, "mean_token_accuracy": 0.6643996685743332, "num_tokens": 2111140891.0, "step": 12592 }, { "entropy": 1.7544625401496887, "epoch": 1.3834006206915492, "grad_norm": 0.7446973919868469, "learning_rate": 6.1278057474478795e-06, "loss": 1.2745, "mean_token_accuracy": 0.676040510336558, "num_tokens": 2111325676.0, "step": 12593 }, { "entropy": 1.6300282776355743, "epoch": 1.3835104776029221, "grad_norm": 0.7586323022842407, "learning_rate": 6.1264596047599555e-06, "loss": 1.4867, "mean_token_accuracy": 0.6626367469628652, "num_tokens": 2111503103.0, "step": 12594 }, { "entropy": 1.6632478535175323, "epoch": 1.383620334514295, "grad_norm": 0.6314704418182373, "learning_rate": 6.125113616317394e-06, "loss": 1.4226, "mean_token_accuracy": 0.6542702714602152, "num_tokens": 2111687155.0, "step": 12595 }, { "entropy": 1.7528144121170044, "epoch": 1.383730191425668, "grad_norm": 0.7653163075447083, "learning_rate": 6.123767782162789e-06, "loss": 1.2804, "mean_token_accuracy": 0.672723392645518, "num_tokens": 2111806165.0, "step": 12596 }, { "entropy": 1.7398656606674194, "epoch": 1.383840048337041, "grad_norm": 0.7522697448730469, "learning_rate": 6.1224221023387335e-06, "loss": 1.426, "mean_token_accuracy": 0.6618303308884302, "num_tokens": 2111943423.0, "step": 12597 }, { "entropy": 1.7423573831717174, "epoch": 1.3839499052484139, "grad_norm": 0.8257996439933777, "learning_rate": 6.121076576887821e-06, "loss": 1.4182, "mean_token_accuracy": 0.678856705625852, "num_tokens": 2112103993.0, "step": 12598 }, { "entropy": 1.7207870781421661, "epoch": 1.384059762159787, "grad_norm": 0.6698588728904724, "learning_rate": 6.119731205852638e-06, "loss": 1.5969, "mean_token_accuracy": 0.6185207416613897, "num_tokens": 2112323075.0, "step": 12599 }, { "entropy": 1.7429817418257396, "epoch": 1.3841696190711599, "grad_norm": 0.6996835470199585, "learning_rate": 6.118385989275766e-06, "loss": 1.4458, "mean_token_accuracy": 0.6526039093732834, "num_tokens": 2112446359.0, "step": 12600 }, { "entropy": 1.7398698528607686, "epoch": 1.3842794759825328, "grad_norm": 0.8282992839813232, "learning_rate": 6.117040927199771e-06, "loss": 1.5379, "mean_token_accuracy": 0.648671011130015, "num_tokens": 2112590838.0, "step": 12601 }, { "entropy": 1.6970640818277996, "epoch": 1.3843893328939056, "grad_norm": 0.7883489727973938, "learning_rate": 6.115696019667236e-06, "loss": 1.5544, "mean_token_accuracy": 0.6379801481962204, "num_tokens": 2112806234.0, "step": 12602 }, { "entropy": 1.7620785633722942, "epoch": 1.3844991898052785, "grad_norm": 0.8957377672195435, "learning_rate": 6.1143512667207195e-06, "loss": 1.4253, "mean_token_accuracy": 0.6558419863382975, "num_tokens": 2112951869.0, "step": 12603 }, { "entropy": 1.6944806178410847, "epoch": 1.3846090467166516, "grad_norm": 0.591116726398468, "learning_rate": 6.113006668402783e-06, "loss": 1.5106, "mean_token_accuracy": 0.6455500026543936, "num_tokens": 2113186421.0, "step": 12604 }, { "entropy": 1.6918245454629262, "epoch": 1.3847189036280245, "grad_norm": 0.9420191645622253, "learning_rate": 6.111662224755984e-06, "loss": 1.0811, "mean_token_accuracy": 0.6826836367448171, "num_tokens": 2113359446.0, "step": 12605 }, { "entropy": 1.7305250068505604, "epoch": 1.3848287605393974, "grad_norm": 0.7896611094474792, "learning_rate": 6.110317935822871e-06, "loss": 1.4241, "mean_token_accuracy": 0.657659446199735, "num_tokens": 2113523537.0, "step": 12606 }, { "entropy": 1.6499216953913372, "epoch": 1.3849386174507703, "grad_norm": 0.7420812845230103, "learning_rate": 6.108973801645994e-06, "loss": 1.3184, "mean_token_accuracy": 0.6785426884889603, "num_tokens": 2113662311.0, "step": 12607 }, { "entropy": 1.7228143910566966, "epoch": 1.3850484743621432, "grad_norm": 0.7056854367256165, "learning_rate": 6.107629822267894e-06, "loss": 1.3208, "mean_token_accuracy": 0.6636027296384176, "num_tokens": 2113793614.0, "step": 12608 }, { "entropy": 1.795301725467046, "epoch": 1.3851583312735163, "grad_norm": 0.9566717147827148, "learning_rate": 6.106285997731101e-06, "loss": 1.4437, "mean_token_accuracy": 0.6586398979028066, "num_tokens": 2113942725.0, "step": 12609 }, { "entropy": 1.679547091325124, "epoch": 1.3852681881848892, "grad_norm": 0.58782958984375, "learning_rate": 6.1049423280781515e-06, "loss": 1.3413, "mean_token_accuracy": 0.6591441829999288, "num_tokens": 2114102494.0, "step": 12610 }, { "entropy": 1.7034885783990223, "epoch": 1.385378045096262, "grad_norm": 0.7246780395507812, "learning_rate": 6.103598813351575e-06, "loss": 1.3381, "mean_token_accuracy": 0.6627988219261169, "num_tokens": 2114234854.0, "step": 12611 }, { "entropy": 1.7090636690457661, "epoch": 1.3854879020076352, "grad_norm": 0.6690557599067688, "learning_rate": 6.10225545359389e-06, "loss": 1.4635, "mean_token_accuracy": 0.6498597512642542, "num_tokens": 2114454147.0, "step": 12612 }, { "entropy": 1.7583944102128346, "epoch": 1.385597758919008, "grad_norm": 0.8286144137382507, "learning_rate": 6.100912248847608e-06, "loss": 1.3619, "mean_token_accuracy": 0.6575885117053986, "num_tokens": 2114634934.0, "step": 12613 }, { "entropy": 1.6706369022528331, "epoch": 1.385707615830381, "grad_norm": 0.6204984188079834, "learning_rate": 6.099569199155251e-06, "loss": 1.3126, "mean_token_accuracy": 0.6611980448166529, "num_tokens": 2114792957.0, "step": 12614 }, { "entropy": 1.6743928492069244, "epoch": 1.3858174727417538, "grad_norm": 0.6785169243812561, "learning_rate": 6.09822630455932e-06, "loss": 1.3939, "mean_token_accuracy": 0.658115471402804, "num_tokens": 2114962832.0, "step": 12615 }, { "entropy": 1.6953681409358978, "epoch": 1.3859273296531267, "grad_norm": 0.6881593465805054, "learning_rate": 6.0968835651023135e-06, "loss": 1.389, "mean_token_accuracy": 0.6494457125663757, "num_tokens": 2115136453.0, "step": 12616 }, { "entropy": 1.7425408363342285, "epoch": 1.3860371865644998, "grad_norm": 0.7663772702217102, "learning_rate": 6.0955409808267375e-06, "loss": 1.4875, "mean_token_accuracy": 0.6456399957338969, "num_tokens": 2115377261.0, "step": 12617 }, { "entropy": 1.6987970372041066, "epoch": 1.3861470434758727, "grad_norm": 0.6070899963378906, "learning_rate": 6.0941985517750745e-06, "loss": 1.4082, "mean_token_accuracy": 0.6523675471544266, "num_tokens": 2115581762.0, "step": 12618 }, { "entropy": 1.7355947196483612, "epoch": 1.3862569003872456, "grad_norm": 0.7134828567504883, "learning_rate": 6.092856277989822e-06, "loss": 1.1913, "mean_token_accuracy": 0.687599798043569, "num_tokens": 2115710252.0, "step": 12619 }, { "entropy": 1.6785525679588318, "epoch": 1.3863667572986185, "grad_norm": 0.6055605411529541, "learning_rate": 6.0915141595134555e-06, "loss": 1.3615, "mean_token_accuracy": 0.6554200698932012, "num_tokens": 2115893536.0, "step": 12620 }, { "entropy": 1.7625056405862172, "epoch": 1.3864766142099914, "grad_norm": 0.9016237854957581, "learning_rate": 6.090172196388451e-06, "loss": 1.5013, "mean_token_accuracy": 0.6534651468197504, "num_tokens": 2116052986.0, "step": 12621 }, { "entropy": 1.7133546868960063, "epoch": 1.3865864711213645, "grad_norm": 0.6940526366233826, "learning_rate": 6.088830388657284e-06, "loss": 1.3231, "mean_token_accuracy": 0.6634116520484289, "num_tokens": 2116218287.0, "step": 12622 }, { "entropy": 1.6911606689294179, "epoch": 1.3866963280327373, "grad_norm": 0.6579228639602661, "learning_rate": 6.0874887363624255e-06, "loss": 1.2912, "mean_token_accuracy": 0.6736189971367518, "num_tokens": 2116348102.0, "step": 12623 }, { "entropy": 1.6526914338270824, "epoch": 1.3868061849441102, "grad_norm": 0.6540764570236206, "learning_rate": 6.086147239546336e-06, "loss": 1.418, "mean_token_accuracy": 0.6521534671386083, "num_tokens": 2116528433.0, "step": 12624 }, { "entropy": 1.6569193700949352, "epoch": 1.3869160418554833, "grad_norm": 0.5886544585227966, "learning_rate": 6.084805898251468e-06, "loss": 1.4071, "mean_token_accuracy": 0.660218303402265, "num_tokens": 2116774255.0, "step": 12625 }, { "entropy": 1.7367678980032604, "epoch": 1.3870258987668562, "grad_norm": 0.7575657367706299, "learning_rate": 6.083464712520282e-06, "loss": 1.415, "mean_token_accuracy": 0.6493276755015055, "num_tokens": 2116913832.0, "step": 12626 }, { "entropy": 1.7625746925671895, "epoch": 1.387135755678229, "grad_norm": 0.723961353302002, "learning_rate": 6.082123682395222e-06, "loss": 1.5134, "mean_token_accuracy": 0.6588109185298284, "num_tokens": 2117102538.0, "step": 12627 }, { "entropy": 1.6678134202957153, "epoch": 1.387245612589602, "grad_norm": 0.7065550088882446, "learning_rate": 6.080782807918728e-06, "loss": 1.3057, "mean_token_accuracy": 0.676655059059461, "num_tokens": 2117230523.0, "step": 12628 }, { "entropy": 1.742838462193807, "epoch": 1.3873554695009749, "grad_norm": 0.7316716313362122, "learning_rate": 6.079442089133245e-06, "loss": 1.3569, "mean_token_accuracy": 0.6540696074565252, "num_tokens": 2117359500.0, "step": 12629 }, { "entropy": 1.7375612556934357, "epoch": 1.387465326412348, "grad_norm": 0.756300687789917, "learning_rate": 6.078101526081199e-06, "loss": 1.5052, "mean_token_accuracy": 0.6618654529253641, "num_tokens": 2117537412.0, "step": 12630 }, { "entropy": 1.6921890676021576, "epoch": 1.3875751833237209, "grad_norm": 0.6773094534873962, "learning_rate": 6.076761118805026e-06, "loss": 1.3607, "mean_token_accuracy": 0.661697601278623, "num_tokens": 2117723137.0, "step": 12631 }, { "entropy": 1.68595223625501, "epoch": 1.3876850402350938, "grad_norm": 0.6630276441574097, "learning_rate": 6.075420867347144e-06, "loss": 1.3329, "mean_token_accuracy": 0.6754196931918462, "num_tokens": 2117863313.0, "step": 12632 }, { "entropy": 1.7486995458602905, "epoch": 1.3877948971464666, "grad_norm": 0.8930343985557556, "learning_rate": 6.07408077174997e-06, "loss": 1.4725, "mean_token_accuracy": 0.660398542881012, "num_tokens": 2118000107.0, "step": 12633 }, { "entropy": 1.6790860096613567, "epoch": 1.3879047540578395, "grad_norm": 0.633882462978363, "learning_rate": 6.072740832055923e-06, "loss": 1.402, "mean_token_accuracy": 0.6450261523326238, "num_tokens": 2118213587.0, "step": 12634 }, { "entropy": 1.7514410018920898, "epoch": 1.3880146109692126, "grad_norm": 0.7302515506744385, "learning_rate": 6.071401048307406e-06, "loss": 1.3367, "mean_token_accuracy": 0.6589195132255554, "num_tokens": 2118373897.0, "step": 12635 }, { "entropy": 1.734345058600108, "epoch": 1.3881244678805855, "grad_norm": 0.6358147859573364, "learning_rate": 6.070061420546827e-06, "loss": 1.4476, "mean_token_accuracy": 0.6477800408999125, "num_tokens": 2118531537.0, "step": 12636 }, { "entropy": 1.7529782156149547, "epoch": 1.3882343247919584, "grad_norm": 0.6754707098007202, "learning_rate": 6.0687219488165826e-06, "loss": 1.3373, "mean_token_accuracy": 0.6639518241087595, "num_tokens": 2118681904.0, "step": 12637 }, { "entropy": 1.674676090478897, "epoch": 1.3883441817033315, "grad_norm": 0.8263653516769409, "learning_rate": 6.067382633159062e-06, "loss": 1.2034, "mean_token_accuracy": 0.6820022811492285, "num_tokens": 2118787859.0, "step": 12638 }, { "entropy": 1.7267674307028453, "epoch": 1.3884540386147044, "grad_norm": 0.6797496676445007, "learning_rate": 6.066043473616665e-06, "loss": 1.3251, "mean_token_accuracy": 0.6689668297767639, "num_tokens": 2118933401.0, "step": 12639 }, { "entropy": 1.6904515027999878, "epoch": 1.3885638955260773, "grad_norm": 0.7103528380393982, "learning_rate": 6.064704470231766e-06, "loss": 1.3393, "mean_token_accuracy": 0.6714362452427546, "num_tokens": 2119096794.0, "step": 12640 }, { "entropy": 1.729119469722112, "epoch": 1.3886737524374502, "grad_norm": 0.7350696921348572, "learning_rate": 6.063365623046744e-06, "loss": 1.4866, "mean_token_accuracy": 0.6382670154174169, "num_tokens": 2119289765.0, "step": 12641 }, { "entropy": 1.6505240897337596, "epoch": 1.388783609348823, "grad_norm": 0.6928836107254028, "learning_rate": 6.062026932103976e-06, "loss": 1.3396, "mean_token_accuracy": 0.6640495459238688, "num_tokens": 2119449525.0, "step": 12642 }, { "entropy": 1.6739195088545482, "epoch": 1.3888934662601962, "grad_norm": 0.6808801293373108, "learning_rate": 6.0606883974458345e-06, "loss": 1.4049, "mean_token_accuracy": 0.6502687732378641, "num_tokens": 2119607013.0, "step": 12643 }, { "entropy": 1.7042246758937836, "epoch": 1.389003323171569, "grad_norm": 0.9757330417633057, "learning_rate": 6.059350019114678e-06, "loss": 1.5106, "mean_token_accuracy": 0.6467631061871847, "num_tokens": 2119764218.0, "step": 12644 }, { "entropy": 1.7424436310927074, "epoch": 1.389113180082942, "grad_norm": 0.7235627770423889, "learning_rate": 6.0580117971528655e-06, "loss": 1.3913, "mean_token_accuracy": 0.6477436472972234, "num_tokens": 2119946466.0, "step": 12645 }, { "entropy": 1.7050765951474507, "epoch": 1.389223036994315, "grad_norm": 0.6872043609619141, "learning_rate": 6.056673731602753e-06, "loss": 1.2568, "mean_token_accuracy": 0.6747584690650305, "num_tokens": 2120094895.0, "step": 12646 }, { "entropy": 1.667133589585622, "epoch": 1.3893328939056877, "grad_norm": 0.7159779071807861, "learning_rate": 6.055335822506688e-06, "loss": 1.2612, "mean_token_accuracy": 0.6703705290953318, "num_tokens": 2120234938.0, "step": 12647 }, { "entropy": 1.7026324371496837, "epoch": 1.3894427508170608, "grad_norm": 0.6149495244026184, "learning_rate": 6.053998069907019e-06, "loss": 1.3952, "mean_token_accuracy": 0.6657251864671707, "num_tokens": 2120393921.0, "step": 12648 }, { "entropy": 1.7234665950139363, "epoch": 1.3895526077284337, "grad_norm": 0.6455737352371216, "learning_rate": 6.052660473846084e-06, "loss": 1.5308, "mean_token_accuracy": 0.626121923327446, "num_tokens": 2120594349.0, "step": 12649 }, { "entropy": 1.6969818969567616, "epoch": 1.3896624646398066, "grad_norm": 0.7119026184082031, "learning_rate": 6.05132303436621e-06, "loss": 1.4113, "mean_token_accuracy": 0.6501194735368093, "num_tokens": 2120773080.0, "step": 12650 }, { "entropy": 1.7361374100049336, "epoch": 1.3897723215511797, "grad_norm": 0.7004039287567139, "learning_rate": 6.049985751509737e-06, "loss": 1.3639, "mean_token_accuracy": 0.6526622970898946, "num_tokens": 2120923549.0, "step": 12651 }, { "entropy": 1.737576534350713, "epoch": 1.3898821784625526, "grad_norm": 0.7022482752799988, "learning_rate": 6.048648625318984e-06, "loss": 1.4327, "mean_token_accuracy": 0.6477037022511164, "num_tokens": 2121135737.0, "step": 12652 }, { "entropy": 1.6698547104994457, "epoch": 1.3899920353739255, "grad_norm": 0.6836487054824829, "learning_rate": 6.0473116558362664e-06, "loss": 1.2327, "mean_token_accuracy": 0.6811383267243704, "num_tokens": 2121273006.0, "step": 12653 }, { "entropy": 1.696809043486913, "epoch": 1.3901018922852983, "grad_norm": 0.820905327796936, "learning_rate": 6.045974843103905e-06, "loss": 1.3486, "mean_token_accuracy": 0.6534950186808904, "num_tokens": 2121413071.0, "step": 12654 }, { "entropy": 1.6631783346335094, "epoch": 1.3902117491966712, "grad_norm": 0.6575664281845093, "learning_rate": 6.0446381871642094e-06, "loss": 1.4987, "mean_token_accuracy": 0.6349124858776728, "num_tokens": 2121632157.0, "step": 12655 }, { "entropy": 1.6863780121008556, "epoch": 1.3903216061080443, "grad_norm": 0.6468070149421692, "learning_rate": 6.043301688059482e-06, "loss": 1.4576, "mean_token_accuracy": 0.6422467132409414, "num_tokens": 2121782067.0, "step": 12656 }, { "entropy": 1.7445188562075298, "epoch": 1.3904314630194172, "grad_norm": 0.6437369585037231, "learning_rate": 6.04196534583202e-06, "loss": 1.3062, "mean_token_accuracy": 0.6756186882654825, "num_tokens": 2121955217.0, "step": 12657 }, { "entropy": 1.7488488654295604, "epoch": 1.39054131993079, "grad_norm": 0.7552010416984558, "learning_rate": 6.0406291605241255e-06, "loss": 1.321, "mean_token_accuracy": 0.6689753333727518, "num_tokens": 2122074461.0, "step": 12658 }, { "entropy": 1.6862552265326183, "epoch": 1.3906511768421632, "grad_norm": 0.7533565759658813, "learning_rate": 6.039293132178078e-06, "loss": 1.4313, "mean_token_accuracy": 0.6706264317035675, "num_tokens": 2122270613.0, "step": 12659 }, { "entropy": 1.668715238571167, "epoch": 1.390761033753536, "grad_norm": 0.6359433531761169, "learning_rate": 6.0379572608361715e-06, "loss": 1.2886, "mean_token_accuracy": 0.6727031916379929, "num_tokens": 2122418030.0, "step": 12660 }, { "entropy": 1.7360176543394725, "epoch": 1.390870890664909, "grad_norm": 0.6393101215362549, "learning_rate": 6.036621546540682e-06, "loss": 1.4723, "mean_token_accuracy": 0.6463060726722082, "num_tokens": 2122658480.0, "step": 12661 }, { "entropy": 1.6931299567222595, "epoch": 1.3909807475762819, "grad_norm": 0.6674166321754456, "learning_rate": 6.035285989333879e-06, "loss": 1.2776, "mean_token_accuracy": 0.6711171269416809, "num_tokens": 2122775840.0, "step": 12662 }, { "entropy": 1.6795523365338643, "epoch": 1.3910906044876548, "grad_norm": 0.6074432134628296, "learning_rate": 6.033950589258042e-06, "loss": 1.3652, "mean_token_accuracy": 0.6604388256867727, "num_tokens": 2122943493.0, "step": 12663 }, { "entropy": 1.6648745040098827, "epoch": 1.3912004613990279, "grad_norm": 0.6981958150863647, "learning_rate": 6.032615346355431e-06, "loss": 1.5333, "mean_token_accuracy": 0.6418487280607224, "num_tokens": 2123127314.0, "step": 12664 }, { "entropy": 1.6763863563537598, "epoch": 1.3913103183104008, "grad_norm": 0.6888807415962219, "learning_rate": 6.031280260668304e-06, "loss": 1.3952, "mean_token_accuracy": 0.6572959423065186, "num_tokens": 2123323822.0, "step": 12665 }, { "entropy": 1.6850894292195637, "epoch": 1.3914201752217736, "grad_norm": 0.6015814542770386, "learning_rate": 6.029945332238916e-06, "loss": 1.5196, "mean_token_accuracy": 0.6445889174938202, "num_tokens": 2123520705.0, "step": 12666 }, { "entropy": 1.7734043498833973, "epoch": 1.3915300321331465, "grad_norm": 0.6743616461753845, "learning_rate": 6.028610561109522e-06, "loss": 1.5488, "mean_token_accuracy": 0.6452811906735102, "num_tokens": 2123672858.0, "step": 12667 }, { "entropy": 1.7352135578791301, "epoch": 1.3916398890445194, "grad_norm": 0.73172527551651, "learning_rate": 6.027275947322364e-06, "loss": 1.3727, "mean_token_accuracy": 0.6599378883838654, "num_tokens": 2123804318.0, "step": 12668 }, { "entropy": 1.7475886444250743, "epoch": 1.3917497459558925, "grad_norm": 0.6466162800788879, "learning_rate": 6.025941490919678e-06, "loss": 1.4018, "mean_token_accuracy": 0.6429226100444794, "num_tokens": 2123975562.0, "step": 12669 }, { "entropy": 1.7205195526281993, "epoch": 1.3918596028672654, "grad_norm": 0.6437305808067322, "learning_rate": 6.024607191943707e-06, "loss": 1.3518, "mean_token_accuracy": 0.6662203172842661, "num_tokens": 2124119989.0, "step": 12670 }, { "entropy": 1.6926952401796977, "epoch": 1.3919694597786383, "grad_norm": 0.6400312185287476, "learning_rate": 6.023273050436671e-06, "loss": 1.3766, "mean_token_accuracy": 0.6680413832267126, "num_tokens": 2124270078.0, "step": 12671 }, { "entropy": 1.7688163717587788, "epoch": 1.3920793166900114, "grad_norm": 0.7608019113540649, "learning_rate": 6.021939066440805e-06, "loss": 1.3084, "mean_token_accuracy": 0.6677152961492538, "num_tokens": 2124413626.0, "step": 12672 }, { "entropy": 1.6955342292785645, "epoch": 1.3921891736013843, "grad_norm": 0.669330358505249, "learning_rate": 6.020605239998325e-06, "loss": 1.4574, "mean_token_accuracy": 0.6318171223004659, "num_tokens": 2124604904.0, "step": 12673 }, { "entropy": 1.7541027069091797, "epoch": 1.3922990305127572, "grad_norm": 0.7224034667015076, "learning_rate": 6.0192715711514415e-06, "loss": 1.3589, "mean_token_accuracy": 0.6613028347492218, "num_tokens": 2124738170.0, "step": 12674 }, { "entropy": 1.623542954524358, "epoch": 1.39240888742413, "grad_norm": 0.7347180247306824, "learning_rate": 6.01793805994237e-06, "loss": 1.4068, "mean_token_accuracy": 0.6547928502162298, "num_tokens": 2124955515.0, "step": 12675 }, { "entropy": 1.7191152274608612, "epoch": 1.392518744335503, "grad_norm": 0.6667714715003967, "learning_rate": 6.016604706413316e-06, "loss": 1.3162, "mean_token_accuracy": 0.6564339945713679, "num_tokens": 2125103505.0, "step": 12676 }, { "entropy": 1.7070033649603527, "epoch": 1.392628601246876, "grad_norm": 0.868321418762207, "learning_rate": 6.015271510606473e-06, "loss": 1.4041, "mean_token_accuracy": 0.6572008927663168, "num_tokens": 2125269047.0, "step": 12677 }, { "entropy": 1.7142626245816548, "epoch": 1.392738458158249, "grad_norm": 0.6363254189491272, "learning_rate": 6.01393847256404e-06, "loss": 1.5588, "mean_token_accuracy": 0.6368576760093371, "num_tokens": 2125441325.0, "step": 12678 }, { "entropy": 1.7593932350476582, "epoch": 1.3928483150696218, "grad_norm": 0.5922143459320068, "learning_rate": 6.012605592328213e-06, "loss": 1.4497, "mean_token_accuracy": 0.6469251116116842, "num_tokens": 2125633983.0, "step": 12679 }, { "entropy": 1.699719746907552, "epoch": 1.3929581719809947, "grad_norm": 0.7195116877555847, "learning_rate": 6.0112728699411714e-06, "loss": 1.4665, "mean_token_accuracy": 0.6510594636201859, "num_tokens": 2125807954.0, "step": 12680 }, { "entropy": 1.76499076684316, "epoch": 1.3930680288923676, "grad_norm": 0.7974780797958374, "learning_rate": 6.009940305445091e-06, "loss": 1.3902, "mean_token_accuracy": 0.6549367159605026, "num_tokens": 2125928186.0, "step": 12681 }, { "entropy": 1.6431426803270976, "epoch": 1.3931778858037407, "grad_norm": 0.6491580605506897, "learning_rate": 6.008607898882155e-06, "loss": 1.3716, "mean_token_accuracy": 0.6630857636531194, "num_tokens": 2126146596.0, "step": 12682 }, { "entropy": 1.6817518671353657, "epoch": 1.3932877427151136, "grad_norm": 0.7032451629638672, "learning_rate": 6.00727565029453e-06, "loss": 1.4448, "mean_token_accuracy": 0.6501007825136185, "num_tokens": 2126341206.0, "step": 12683 }, { "entropy": 1.6864906052748363, "epoch": 1.3933975996264865, "grad_norm": 0.8053936958312988, "learning_rate": 6.005943559724376e-06, "loss": 1.5758, "mean_token_accuracy": 0.6425473292668661, "num_tokens": 2126538622.0, "step": 12684 }, { "entropy": 1.6943072477976482, "epoch": 1.3935074565378596, "grad_norm": 0.6725866198539734, "learning_rate": 6.004611627213863e-06, "loss": 1.3835, "mean_token_accuracy": 0.6750175058841705, "num_tokens": 2126712924.0, "step": 12685 }, { "entropy": 1.7044211824735005, "epoch": 1.3936173134492325, "grad_norm": 0.6494740843772888, "learning_rate": 6.003279852805137e-06, "loss": 1.4474, "mean_token_accuracy": 0.6646173646052679, "num_tokens": 2126862241.0, "step": 12686 }, { "entropy": 1.7204302748044331, "epoch": 1.3937271703606053, "grad_norm": 0.6521239876747131, "learning_rate": 6.001948236540357e-06, "loss": 1.4459, "mean_token_accuracy": 0.6400475154320399, "num_tokens": 2127025520.0, "step": 12687 }, { "entropy": 1.716679612795512, "epoch": 1.3938370272719782, "grad_norm": 0.7173079252243042, "learning_rate": 6.000616778461661e-06, "loss": 1.3788, "mean_token_accuracy": 0.6761051565408707, "num_tokens": 2127282244.0, "step": 12688 }, { "entropy": 1.7010992169380188, "epoch": 1.393946884183351, "grad_norm": 19.059829711914062, "learning_rate": 5.99928547861119e-06, "loss": 1.3966, "mean_token_accuracy": 0.6659414370854696, "num_tokens": 2127448006.0, "step": 12689 }, { "entropy": 1.657021979490916, "epoch": 1.3940567410947242, "grad_norm": 0.5839347839355469, "learning_rate": 5.9979543370310775e-06, "loss": 1.4324, "mean_token_accuracy": 0.636112704873085, "num_tokens": 2127644165.0, "step": 12690 }, { "entropy": 1.714819739262263, "epoch": 1.394166598006097, "grad_norm": 0.6099923849105835, "learning_rate": 5.996623353763462e-06, "loss": 1.4218, "mean_token_accuracy": 0.6461069136857986, "num_tokens": 2127789572.0, "step": 12691 }, { "entropy": 1.7752255698045094, "epoch": 1.39427645491747, "grad_norm": 0.722017228603363, "learning_rate": 5.995292528850462e-06, "loss": 1.5213, "mean_token_accuracy": 0.6427341798941294, "num_tokens": 2127974615.0, "step": 12692 }, { "entropy": 1.7523868183294933, "epoch": 1.3943863118288429, "grad_norm": 0.6684828996658325, "learning_rate": 5.993961862334197e-06, "loss": 1.4147, "mean_token_accuracy": 0.653435617685318, "num_tokens": 2128141442.0, "step": 12693 }, { "entropy": 1.7046211461226146, "epoch": 1.3944961687402158, "grad_norm": 0.7684405446052551, "learning_rate": 5.9926313542567815e-06, "loss": 1.2746, "mean_token_accuracy": 0.6661444703737894, "num_tokens": 2128311566.0, "step": 12694 }, { "entropy": 1.7271720071633656, "epoch": 1.3946060256515889, "grad_norm": 0.6236696839332581, "learning_rate": 5.99130100466033e-06, "loss": 1.3441, "mean_token_accuracy": 0.663600504398346, "num_tokens": 2128500803.0, "step": 12695 }, { "entropy": 1.7542118628819783, "epoch": 1.3947158825629618, "grad_norm": 0.8479838371276855, "learning_rate": 5.989970813586945e-06, "loss": 1.4227, "mean_token_accuracy": 0.6497650593519211, "num_tokens": 2128680857.0, "step": 12696 }, { "entropy": 1.66288094719251, "epoch": 1.3948257394743346, "grad_norm": 0.6915057897567749, "learning_rate": 5.988640781078724e-06, "loss": 1.3693, "mean_token_accuracy": 0.6525774498780569, "num_tokens": 2128866827.0, "step": 12697 }, { "entropy": 1.729894479115804, "epoch": 1.3949355963857077, "grad_norm": 0.6445484161376953, "learning_rate": 5.987310907177763e-06, "loss": 1.4009, "mean_token_accuracy": 0.6387731532255808, "num_tokens": 2129076102.0, "step": 12698 }, { "entropy": 1.6647725601991017, "epoch": 1.3950454532970806, "grad_norm": 0.7019267678260803, "learning_rate": 5.985981191926156e-06, "loss": 1.4318, "mean_token_accuracy": 0.6519081046183904, "num_tokens": 2129283963.0, "step": 12699 }, { "entropy": 1.6667829751968384, "epoch": 1.3951553102084535, "grad_norm": 0.6425153017044067, "learning_rate": 5.984651635365985e-06, "loss": 1.4025, "mean_token_accuracy": 0.6508398950099945, "num_tokens": 2129458498.0, "step": 12700 }, { "entropy": 1.6813570360342662, "epoch": 1.3952651671198264, "grad_norm": 0.7219659090042114, "learning_rate": 5.983322237539326e-06, "loss": 1.331, "mean_token_accuracy": 0.6687901417414347, "num_tokens": 2129633109.0, "step": 12701 }, { "entropy": 1.7440830767154694, "epoch": 1.3953750240311993, "grad_norm": 0.7922948598861694, "learning_rate": 5.981992998488262e-06, "loss": 1.3723, "mean_token_accuracy": 0.6505018224318823, "num_tokens": 2129786689.0, "step": 12702 }, { "entropy": 1.6904495855172474, "epoch": 1.3954848809425724, "grad_norm": 0.7381642460823059, "learning_rate": 5.980663918254854e-06, "loss": 1.449, "mean_token_accuracy": 0.6513757407665253, "num_tokens": 2129959663.0, "step": 12703 }, { "entropy": 1.594289908806483, "epoch": 1.3955947378539453, "grad_norm": 0.7294413447380066, "learning_rate": 5.979334996881177e-06, "loss": 1.2396, "mean_token_accuracy": 0.6839832961559296, "num_tokens": 2130119921.0, "step": 12704 }, { "entropy": 1.6758387287457783, "epoch": 1.3957045947653182, "grad_norm": 0.6613010168075562, "learning_rate": 5.978006234409282e-06, "loss": 1.4366, "mean_token_accuracy": 0.6469068974256516, "num_tokens": 2130320202.0, "step": 12705 }, { "entropy": 1.6659991939862568, "epoch": 1.395814451676691, "grad_norm": 0.6377636790275574, "learning_rate": 5.9766776308812245e-06, "loss": 1.3725, "mean_token_accuracy": 0.6629499892393748, "num_tokens": 2130524111.0, "step": 12706 }, { "entropy": 1.6998618841171265, "epoch": 1.395924308588064, "grad_norm": 0.6595562100410461, "learning_rate": 5.9753491863390585e-06, "loss": 1.5279, "mean_token_accuracy": 0.6366192599137624, "num_tokens": 2130767602.0, "step": 12707 }, { "entropy": 1.7020623286565144, "epoch": 1.396034165499437, "grad_norm": 0.8012373447418213, "learning_rate": 5.974020900824829e-06, "loss": 1.4119, "mean_token_accuracy": 0.675724262992541, "num_tokens": 2130889858.0, "step": 12708 }, { "entropy": 1.7753592034180958, "epoch": 1.39614402241081, "grad_norm": 0.7129668593406677, "learning_rate": 5.972692774380568e-06, "loss": 1.3992, "mean_token_accuracy": 0.669427881638209, "num_tokens": 2131043830.0, "step": 12709 }, { "entropy": 1.67575670282046, "epoch": 1.3962538793221828, "grad_norm": 0.7088383436203003, "learning_rate": 5.9713648070483165e-06, "loss": 1.5448, "mean_token_accuracy": 0.6411570161581039, "num_tokens": 2131241286.0, "step": 12710 }, { "entropy": 1.6525772909323375, "epoch": 1.396363736233556, "grad_norm": 0.7682273983955383, "learning_rate": 5.9700369988701055e-06, "loss": 1.3177, "mean_token_accuracy": 0.6682016005118688, "num_tokens": 2131431791.0, "step": 12711 }, { "entropy": 1.6430234909057617, "epoch": 1.3964735931449288, "grad_norm": 0.5847803950309753, "learning_rate": 5.968709349887957e-06, "loss": 1.3363, "mean_token_accuracy": 0.676411454876264, "num_tokens": 2131614057.0, "step": 12712 }, { "entropy": 1.679671843846639, "epoch": 1.3965834500563017, "grad_norm": 0.7472701072692871, "learning_rate": 5.9673818601438885e-06, "loss": 1.2549, "mean_token_accuracy": 0.6748977800210317, "num_tokens": 2131726410.0, "step": 12713 }, { "entropy": 1.7021221121152241, "epoch": 1.3966933069676746, "grad_norm": 0.8775181174278259, "learning_rate": 5.9660545296799185e-06, "loss": 1.3966, "mean_token_accuracy": 0.6520507534344991, "num_tokens": 2131916348.0, "step": 12714 }, { "entropy": 1.6914891302585602, "epoch": 1.3968031638790475, "grad_norm": 0.6840558648109436, "learning_rate": 5.964727358538049e-06, "loss": 1.2845, "mean_token_accuracy": 0.6717403084039688, "num_tokens": 2132060214.0, "step": 12715 }, { "entropy": 1.7304686605930328, "epoch": 1.3969130207904206, "grad_norm": 0.7312942147254944, "learning_rate": 5.963400346760297e-06, "loss": 1.5967, "mean_token_accuracy": 0.6488021487991015, "num_tokens": 2132217864.0, "step": 12716 }, { "entropy": 1.7533520062764485, "epoch": 1.3970228777017935, "grad_norm": 0.6745875477790833, "learning_rate": 5.962073494388652e-06, "loss": 1.4307, "mean_token_accuracy": 0.6536213358243307, "num_tokens": 2132385245.0, "step": 12717 }, { "entropy": 1.7508942981561024, "epoch": 1.3971327346131663, "grad_norm": 0.7504527568817139, "learning_rate": 5.9607468014651085e-06, "loss": 1.5074, "mean_token_accuracy": 0.6408179601033529, "num_tokens": 2132551692.0, "step": 12718 }, { "entropy": 1.7008541425069172, "epoch": 1.3972425915245392, "grad_norm": 0.6057212352752686, "learning_rate": 5.959420268031661e-06, "loss": 1.5229, "mean_token_accuracy": 0.6457130114237467, "num_tokens": 2132768679.0, "step": 12719 }, { "entropy": 1.6575111548105876, "epoch": 1.397352448435912, "grad_norm": 0.8810737133026123, "learning_rate": 5.9580938941302905e-06, "loss": 1.2997, "mean_token_accuracy": 0.6577588965495428, "num_tokens": 2132902521.0, "step": 12720 }, { "entropy": 1.7134600281715393, "epoch": 1.3974623053472852, "grad_norm": 0.6999838948249817, "learning_rate": 5.956767679802972e-06, "loss": 1.4848, "mean_token_accuracy": 0.6436127026875814, "num_tokens": 2133091286.0, "step": 12721 }, { "entropy": 1.6435298323631287, "epoch": 1.397572162258658, "grad_norm": 0.7796320915222168, "learning_rate": 5.955441625091685e-06, "loss": 1.5166, "mean_token_accuracy": 0.6519753734270731, "num_tokens": 2133271963.0, "step": 12722 }, { "entropy": 1.7259367903073628, "epoch": 1.397682019170031, "grad_norm": 0.7645683288574219, "learning_rate": 5.9541157300384015e-06, "loss": 1.501, "mean_token_accuracy": 0.6502551784118017, "num_tokens": 2133415532.0, "step": 12723 }, { "entropy": 1.693211168050766, "epoch": 1.397791876081404, "grad_norm": 0.7891166806221008, "learning_rate": 5.95278999468508e-06, "loss": 1.2687, "mean_token_accuracy": 0.6685374329487482, "num_tokens": 2133555494.0, "step": 12724 }, { "entropy": 1.6511625250180562, "epoch": 1.397901732992777, "grad_norm": 0.5752436518669128, "learning_rate": 5.951464419073677e-06, "loss": 1.441, "mean_token_accuracy": 0.6449030637741089, "num_tokens": 2133777556.0, "step": 12725 }, { "entropy": 1.6687390704949696, "epoch": 1.3980115899041499, "grad_norm": 0.6337757110595703, "learning_rate": 5.9501390032461555e-06, "loss": 1.3054, "mean_token_accuracy": 0.6698885361353556, "num_tokens": 2134009899.0, "step": 12726 }, { "entropy": 1.7128651042779286, "epoch": 1.3981214468155228, "grad_norm": 0.6634067296981812, "learning_rate": 5.9488137472444526e-06, "loss": 1.4062, "mean_token_accuracy": 0.6472688515981039, "num_tokens": 2134211243.0, "step": 12727 }, { "entropy": 1.7509056230386097, "epoch": 1.3982313037268956, "grad_norm": 0.691646933555603, "learning_rate": 5.947488651110525e-06, "loss": 1.4227, "mean_token_accuracy": 0.6611177225907644, "num_tokens": 2134348640.0, "step": 12728 }, { "entropy": 1.7512084345022838, "epoch": 1.3983411606382687, "grad_norm": 0.8137237429618835, "learning_rate": 5.946163714886304e-06, "loss": 1.3646, "mean_token_accuracy": 0.6650453756252924, "num_tokens": 2134516069.0, "step": 12729 }, { "entropy": 1.702260931332906, "epoch": 1.3984510175496416, "grad_norm": 0.6533256769180298, "learning_rate": 5.944838938613722e-06, "loss": 1.4827, "mean_token_accuracy": 0.6493832468986511, "num_tokens": 2134719967.0, "step": 12730 }, { "entropy": 1.720413823922475, "epoch": 1.3985608744610145, "grad_norm": 0.7293774485588074, "learning_rate": 5.94351432233471e-06, "loss": 1.3079, "mean_token_accuracy": 0.6654588927825292, "num_tokens": 2134840334.0, "step": 12731 }, { "entropy": 1.6680286626021068, "epoch": 1.3986707313723874, "grad_norm": 0.7952906489372253, "learning_rate": 5.942189866091192e-06, "loss": 1.4333, "mean_token_accuracy": 0.6534133901198705, "num_tokens": 2134991028.0, "step": 12732 }, { "entropy": 1.6684763828913372, "epoch": 1.3987805882837603, "grad_norm": 0.6839401721954346, "learning_rate": 5.940865569925084e-06, "loss": 1.5594, "mean_token_accuracy": 0.6263647129138311, "num_tokens": 2135260443.0, "step": 12733 }, { "entropy": 1.763797640800476, "epoch": 1.3988904451951334, "grad_norm": 0.6452272534370422, "learning_rate": 5.9395414338783e-06, "loss": 1.4462, "mean_token_accuracy": 0.6526133120059967, "num_tokens": 2135421632.0, "step": 12734 }, { "entropy": 1.7352626224358876, "epoch": 1.3990003021065063, "grad_norm": 0.6591407060623169, "learning_rate": 5.938217457992752e-06, "loss": 1.3205, "mean_token_accuracy": 0.6572297314802805, "num_tokens": 2135570456.0, "step": 12735 }, { "entropy": 1.6932378311951954, "epoch": 1.3991101590178792, "grad_norm": 1.0089200735092163, "learning_rate": 5.936893642310342e-06, "loss": 1.4389, "mean_token_accuracy": 0.6600636690855026, "num_tokens": 2135747412.0, "step": 12736 }, { "entropy": 1.6893216868241627, "epoch": 1.3992200159292523, "grad_norm": 0.5760171413421631, "learning_rate": 5.935569986872962e-06, "loss": 1.4425, "mean_token_accuracy": 0.6468855142593384, "num_tokens": 2135980426.0, "step": 12737 }, { "entropy": 1.7061325411001842, "epoch": 1.3993298728406252, "grad_norm": 0.6980313062667847, "learning_rate": 5.934246491722515e-06, "loss": 1.3273, "mean_token_accuracy": 0.6672591865062714, "num_tokens": 2136158512.0, "step": 12738 }, { "entropy": 1.734901487827301, "epoch": 1.399439729751998, "grad_norm": 0.8329048752784729, "learning_rate": 5.93292315690088e-06, "loss": 1.3188, "mean_token_accuracy": 0.6712016463279724, "num_tokens": 2136318374.0, "step": 12739 }, { "entropy": 1.7213096022605896, "epoch": 1.399549586663371, "grad_norm": 0.8984243273735046, "learning_rate": 5.931599982449945e-06, "loss": 1.5536, "mean_token_accuracy": 0.648496687412262, "num_tokens": 2136472657.0, "step": 12740 }, { "entropy": 1.6307465930779774, "epoch": 1.3996594435747438, "grad_norm": 0.6590262651443481, "learning_rate": 5.930276968411589e-06, "loss": 1.3478, "mean_token_accuracy": 0.6669967323541641, "num_tokens": 2136625484.0, "step": 12741 }, { "entropy": 1.744086354970932, "epoch": 1.399769300486117, "grad_norm": 0.794403612613678, "learning_rate": 5.928954114827679e-06, "loss": 1.2884, "mean_token_accuracy": 0.6704124808311462, "num_tokens": 2136783736.0, "step": 12742 }, { "entropy": 1.7087414264678955, "epoch": 1.3998791573974898, "grad_norm": 0.6517627239227295, "learning_rate": 5.927631421740088e-06, "loss": 1.4211, "mean_token_accuracy": 0.6420366764068604, "num_tokens": 2136965121.0, "step": 12743 }, { "entropy": 1.7388703723748524, "epoch": 1.3999890143088627, "grad_norm": 0.5937987565994263, "learning_rate": 5.926308889190677e-06, "loss": 1.3561, "mean_token_accuracy": 0.6579962919155756, "num_tokens": 2137139051.0, "step": 12744 }, { "entropy": 1.710933009783427, "epoch": 1.4000988712202356, "grad_norm": 0.653157651424408, "learning_rate": 5.9249865172213e-06, "loss": 1.4606, "mean_token_accuracy": 0.647930254538854, "num_tokens": 2137320154.0, "step": 12745 }, { "entropy": 1.6999001502990723, "epoch": 1.4002087281316085, "grad_norm": 0.7102558612823486, "learning_rate": 5.9236643058738154e-06, "loss": 1.4033, "mean_token_accuracy": 0.6542644649744034, "num_tokens": 2137500878.0, "step": 12746 }, { "entropy": 1.7328318357467651, "epoch": 1.4003185850429816, "grad_norm": 0.6837024092674255, "learning_rate": 5.922342255190069e-06, "loss": 1.346, "mean_token_accuracy": 0.66578309237957, "num_tokens": 2137655692.0, "step": 12747 }, { "entropy": 1.6835198104381561, "epoch": 1.4004284419543545, "grad_norm": 0.6370250582695007, "learning_rate": 5.921020365211904e-06, "loss": 1.5254, "mean_token_accuracy": 0.6239050130049387, "num_tokens": 2137823214.0, "step": 12748 }, { "entropy": 1.7602061529954274, "epoch": 1.4005382988657273, "grad_norm": 0.6429856419563293, "learning_rate": 5.91969863598115e-06, "loss": 1.4799, "mean_token_accuracy": 0.6393208205699921, "num_tokens": 2138018064.0, "step": 12749 }, { "entropy": 1.745541363954544, "epoch": 1.4006481557771004, "grad_norm": 0.8476991653442383, "learning_rate": 5.918377067539649e-06, "loss": 1.1879, "mean_token_accuracy": 0.6587680826584498, "num_tokens": 2138210800.0, "step": 12750 }, { "entropy": 1.7145410180091858, "epoch": 1.4007580126884733, "grad_norm": 0.7389444708824158, "learning_rate": 5.917055659929226e-06, "loss": 1.4971, "mean_token_accuracy": 0.6424557218949, "num_tokens": 2138401395.0, "step": 12751 }, { "entropy": 1.7683631479740143, "epoch": 1.4008678695998462, "grad_norm": 0.7572634816169739, "learning_rate": 5.9157344131916964e-06, "loss": 1.2822, "mean_token_accuracy": 0.6673834770917892, "num_tokens": 2138510935.0, "step": 12752 }, { "entropy": 1.7613183856010437, "epoch": 1.400977726511219, "grad_norm": 0.7066530585289001, "learning_rate": 5.914413327368884e-06, "loss": 1.4304, "mean_token_accuracy": 0.6482445945342382, "num_tokens": 2138700696.0, "step": 12753 }, { "entropy": 1.7241238355636597, "epoch": 1.401087583422592, "grad_norm": 0.6761777997016907, "learning_rate": 5.913092402502596e-06, "loss": 1.4649, "mean_token_accuracy": 0.632220983505249, "num_tokens": 2138879164.0, "step": 12754 }, { "entropy": 1.7183875143527985, "epoch": 1.401197440333965, "grad_norm": 0.6765937209129333, "learning_rate": 5.911771638634645e-06, "loss": 1.3318, "mean_token_accuracy": 0.6607321550448736, "num_tokens": 2138999181.0, "step": 12755 }, { "entropy": 1.7736754318078358, "epoch": 1.401307297245338, "grad_norm": 6.098966598510742, "learning_rate": 5.910451035806827e-06, "loss": 1.3586, "mean_token_accuracy": 0.6717801292737325, "num_tokens": 2139163745.0, "step": 12756 }, { "entropy": 1.617441564798355, "epoch": 1.4014171541567109, "grad_norm": 0.6018511652946472, "learning_rate": 5.909130594060937e-06, "loss": 1.497, "mean_token_accuracy": 0.6571058879295985, "num_tokens": 2139356692.0, "step": 12757 }, { "entropy": 1.7085582911968231, "epoch": 1.4015270110680838, "grad_norm": 0.7229043245315552, "learning_rate": 5.907810313438773e-06, "loss": 1.2965, "mean_token_accuracy": 0.6663492073615392, "num_tokens": 2139499979.0, "step": 12758 }, { "entropy": 1.7085819641749065, "epoch": 1.4016368679794566, "grad_norm": 0.6772550344467163, "learning_rate": 5.906490193982117e-06, "loss": 1.4769, "mean_token_accuracy": 0.6481290062268575, "num_tokens": 2139690644.0, "step": 12759 }, { "entropy": 1.7135390937328339, "epoch": 1.4017467248908297, "grad_norm": 0.7282260060310364, "learning_rate": 5.905170235732753e-06, "loss": 1.3773, "mean_token_accuracy": 0.6570235292116801, "num_tokens": 2139868571.0, "step": 12760 }, { "entropy": 1.7161648571491241, "epoch": 1.4018565818022026, "grad_norm": 0.820698082447052, "learning_rate": 5.903850438732454e-06, "loss": 1.6134, "mean_token_accuracy": 0.6403177628914515, "num_tokens": 2140033198.0, "step": 12761 }, { "entropy": 1.6414225101470947, "epoch": 1.4019664387135755, "grad_norm": 0.7905219197273254, "learning_rate": 5.9025308030229926e-06, "loss": 1.334, "mean_token_accuracy": 0.6737157901128134, "num_tokens": 2140175099.0, "step": 12762 }, { "entropy": 1.6699174046516418, "epoch": 1.4020762956249486, "grad_norm": 0.7612943053245544, "learning_rate": 5.901211328646134e-06, "loss": 1.3, "mean_token_accuracy": 0.6618338972330093, "num_tokens": 2140342013.0, "step": 12763 }, { "entropy": 1.721044272184372, "epoch": 1.4021861525363215, "grad_norm": 0.6190001964569092, "learning_rate": 5.899892015643641e-06, "loss": 1.3738, "mean_token_accuracy": 0.6535343378782272, "num_tokens": 2140523914.0, "step": 12764 }, { "entropy": 1.7023292283217113, "epoch": 1.4022960094476944, "grad_norm": 0.6998386979103088, "learning_rate": 5.898572864057264e-06, "loss": 1.2795, "mean_token_accuracy": 0.6628076682488123, "num_tokens": 2140648869.0, "step": 12765 }, { "entropy": 1.684864302476247, "epoch": 1.4024058663590673, "grad_norm": 0.7491025328636169, "learning_rate": 5.8972538739287565e-06, "loss": 1.3828, "mean_token_accuracy": 0.6604053676128387, "num_tokens": 2140791080.0, "step": 12766 }, { "entropy": 1.6907603442668915, "epoch": 1.4025157232704402, "grad_norm": 0.7660679221153259, "learning_rate": 5.895935045299868e-06, "loss": 1.4723, "mean_token_accuracy": 0.6463464796543121, "num_tokens": 2140950257.0, "step": 12767 }, { "entropy": 1.7172885537147522, "epoch": 1.4026255801818133, "grad_norm": 0.6535598635673523, "learning_rate": 5.894616378212335e-06, "loss": 1.6173, "mean_token_accuracy": 0.6273392041524252, "num_tokens": 2141170697.0, "step": 12768 }, { "entropy": 1.6735565066337585, "epoch": 1.4027354370931862, "grad_norm": 0.7289633750915527, "learning_rate": 5.8932978727078916e-06, "loss": 1.5432, "mean_token_accuracy": 0.6520901521046957, "num_tokens": 2141348090.0, "step": 12769 }, { "entropy": 1.7456530233224232, "epoch": 1.402845294004559, "grad_norm": 0.6960586905479431, "learning_rate": 5.891979528828271e-06, "loss": 1.3654, "mean_token_accuracy": 0.6549131870269775, "num_tokens": 2141509964.0, "step": 12770 }, { "entropy": 1.7111040155092876, "epoch": 1.402955150915932, "grad_norm": 0.660510241985321, "learning_rate": 5.8906613466151945e-06, "loss": 1.5111, "mean_token_accuracy": 0.6399530122677485, "num_tokens": 2141692058.0, "step": 12771 }, { "entropy": 1.64168119430542, "epoch": 1.4030650078273048, "grad_norm": 0.6687092185020447, "learning_rate": 5.889343326110386e-06, "loss": 1.3046, "mean_token_accuracy": 0.6702596594889959, "num_tokens": 2141843417.0, "step": 12772 }, { "entropy": 1.6783235470453899, "epoch": 1.403174864738678, "grad_norm": 0.6634986996650696, "learning_rate": 5.8880254673555585e-06, "loss": 1.3643, "mean_token_accuracy": 0.6527419487635294, "num_tokens": 2142029490.0, "step": 12773 }, { "entropy": 1.8106913566589355, "epoch": 1.4032847216500508, "grad_norm": 0.9083941578865051, "learning_rate": 5.886707770392419e-06, "loss": 1.3996, "mean_token_accuracy": 0.6511979649464289, "num_tokens": 2142270554.0, "step": 12774 }, { "entropy": 1.673154612382253, "epoch": 1.4033945785614237, "grad_norm": 0.8272859454154968, "learning_rate": 5.885390235262678e-06, "loss": 1.3946, "mean_token_accuracy": 0.6538991828759512, "num_tokens": 2142445460.0, "step": 12775 }, { "entropy": 1.7107236782709758, "epoch": 1.4035044354727968, "grad_norm": 0.7277297377586365, "learning_rate": 5.88407286200803e-06, "loss": 1.359, "mean_token_accuracy": 0.6583436330159506, "num_tokens": 2142591637.0, "step": 12776 }, { "entropy": 1.7707445522149403, "epoch": 1.4036142923841697, "grad_norm": 0.8745039701461792, "learning_rate": 5.882755650670168e-06, "loss": 1.3564, "mean_token_accuracy": 0.6674359192450842, "num_tokens": 2142733811.0, "step": 12777 }, { "entropy": 1.7235571146011353, "epoch": 1.4037241492955426, "grad_norm": 0.644283652305603, "learning_rate": 5.881438601290783e-06, "loss": 1.3158, "mean_token_accuracy": 0.6760291904211044, "num_tokens": 2142913022.0, "step": 12778 }, { "entropy": 1.7330328822135925, "epoch": 1.4038340062069155, "grad_norm": 0.8673796057701111, "learning_rate": 5.880121713911564e-06, "loss": 1.2657, "mean_token_accuracy": 0.6716126203536987, "num_tokens": 2143050557.0, "step": 12779 }, { "entropy": 1.6988015472888947, "epoch": 1.4039438631182883, "grad_norm": 0.6786276698112488, "learning_rate": 5.878804988574187e-06, "loss": 1.4185, "mean_token_accuracy": 0.6512501438458761, "num_tokens": 2143287497.0, "step": 12780 }, { "entropy": 1.691178212563197, "epoch": 1.4040537200296614, "grad_norm": 0.6749993562698364, "learning_rate": 5.877488425320319e-06, "loss": 1.555, "mean_token_accuracy": 0.6484788060188293, "num_tokens": 2143479455.0, "step": 12781 }, { "entropy": 1.7058672209580739, "epoch": 1.4041635769410343, "grad_norm": 0.7358183264732361, "learning_rate": 5.876172024191638e-06, "loss": 1.4368, "mean_token_accuracy": 0.6451542327801386, "num_tokens": 2143664368.0, "step": 12782 }, { "entropy": 1.7063271800676982, "epoch": 1.4042734338524072, "grad_norm": 0.5937331318855286, "learning_rate": 5.8748557852298e-06, "loss": 1.4229, "mean_token_accuracy": 0.6448936760425568, "num_tokens": 2143865254.0, "step": 12783 }, { "entropy": 1.6791508595148723, "epoch": 1.40438329076378, "grad_norm": 0.6096206903457642, "learning_rate": 5.8735397084764715e-06, "loss": 1.5288, "mean_token_accuracy": 0.6363308951258659, "num_tokens": 2144054655.0, "step": 12784 }, { "entropy": 1.6694469451904297, "epoch": 1.404493147675153, "grad_norm": 0.6919692754745483, "learning_rate": 5.8722237939733e-06, "loss": 1.3743, "mean_token_accuracy": 0.6567158748706182, "num_tokens": 2144227659.0, "step": 12785 }, { "entropy": 1.69756019115448, "epoch": 1.404603004586526, "grad_norm": 0.5972253680229187, "learning_rate": 5.870908041761931e-06, "loss": 1.4428, "mean_token_accuracy": 0.6423445741335551, "num_tokens": 2144422290.0, "step": 12786 }, { "entropy": 1.7097438871860504, "epoch": 1.404712861497899, "grad_norm": 0.7439383268356323, "learning_rate": 5.869592451884016e-06, "loss": 1.5138, "mean_token_accuracy": 0.6612397755185763, "num_tokens": 2144562143.0, "step": 12787 }, { "entropy": 1.6832230985164642, "epoch": 1.4048227184092719, "grad_norm": 0.6270433664321899, "learning_rate": 5.868277024381188e-06, "loss": 1.4087, "mean_token_accuracy": 0.6563821186621984, "num_tokens": 2144747093.0, "step": 12788 }, { "entropy": 1.6605586012204487, "epoch": 1.404932575320645, "grad_norm": 0.5849980711936951, "learning_rate": 5.8669617592950756e-06, "loss": 1.4643, "mean_token_accuracy": 0.6427861303091049, "num_tokens": 2144974369.0, "step": 12789 }, { "entropy": 1.754450609286626, "epoch": 1.4050424322320179, "grad_norm": 0.6792782545089722, "learning_rate": 5.8656466566673096e-06, "loss": 1.5626, "mean_token_accuracy": 0.6303362647692362, "num_tokens": 2145155907.0, "step": 12790 }, { "entropy": 1.7363029321034749, "epoch": 1.4051522891433907, "grad_norm": 0.7463129162788391, "learning_rate": 5.864331716539519e-06, "loss": 1.5308, "mean_token_accuracy": 0.6494456827640533, "num_tokens": 2145329791.0, "step": 12791 }, { "entropy": 1.665359725554784, "epoch": 1.4052621460547636, "grad_norm": 0.681336522102356, "learning_rate": 5.863016938953313e-06, "loss": 1.4401, "mean_token_accuracy": 0.6599595348040262, "num_tokens": 2145484550.0, "step": 12792 }, { "entropy": 1.6739614307880402, "epoch": 1.4053720029661365, "grad_norm": 0.6140089631080627, "learning_rate": 5.861702323950304e-06, "loss": 1.366, "mean_token_accuracy": 0.654315322637558, "num_tokens": 2145641851.0, "step": 12793 }, { "entropy": 1.7016875247160594, "epoch": 1.4054818598775096, "grad_norm": 0.6950314044952393, "learning_rate": 5.860387871572105e-06, "loss": 1.3112, "mean_token_accuracy": 0.6771847307682037, "num_tokens": 2145795212.0, "step": 12794 }, { "entropy": 1.7205670773983002, "epoch": 1.4055917167888825, "grad_norm": 0.6614289879798889, "learning_rate": 5.85907358186031e-06, "loss": 1.4641, "mean_token_accuracy": 0.646242747704188, "num_tokens": 2146010519.0, "step": 12795 }, { "entropy": 1.6690000593662262, "epoch": 1.4057015737002554, "grad_norm": 0.7087666988372803, "learning_rate": 5.857759454856522e-06, "loss": 1.2666, "mean_token_accuracy": 0.6690100828806559, "num_tokens": 2146162066.0, "step": 12796 }, { "entropy": 1.7213394542535145, "epoch": 1.4058114306116283, "grad_norm": 0.6396216750144958, "learning_rate": 5.856445490602332e-06, "loss": 1.4768, "mean_token_accuracy": 0.642402172088623, "num_tokens": 2146343869.0, "step": 12797 }, { "entropy": 1.7456571360429127, "epoch": 1.4059212875230012, "grad_norm": 0.775600016117096, "learning_rate": 5.855131689139319e-06, "loss": 1.5141, "mean_token_accuracy": 0.6424583395322164, "num_tokens": 2146511314.0, "step": 12798 }, { "entropy": 1.6875923077265422, "epoch": 1.4060311444343743, "grad_norm": 0.8226394653320312, "learning_rate": 5.853818050509075e-06, "loss": 1.3774, "mean_token_accuracy": 0.6524281054735184, "num_tokens": 2146732048.0, "step": 12799 }, { "entropy": 1.6746432185173035, "epoch": 1.4061410013457472, "grad_norm": 0.8111725449562073, "learning_rate": 5.852504574753171e-06, "loss": 1.3547, "mean_token_accuracy": 0.670900379618009, "num_tokens": 2146891070.0, "step": 12800 }, { "entropy": 1.7106841901938121, "epoch": 1.40625085825712, "grad_norm": 0.8684120178222656, "learning_rate": 5.851191261913173e-06, "loss": 1.4345, "mean_token_accuracy": 0.6579526364803314, "num_tokens": 2147074096.0, "step": 12801 }, { "entropy": 1.6633965174357097, "epoch": 1.4063607151684931, "grad_norm": 0.6919369101524353, "learning_rate": 5.8498781120306515e-06, "loss": 1.5774, "mean_token_accuracy": 0.6280744473139445, "num_tokens": 2147378563.0, "step": 12802 }, { "entropy": 1.7567805548508961, "epoch": 1.406470572079866, "grad_norm": 0.6097819805145264, "learning_rate": 5.84856512514717e-06, "loss": 1.5855, "mean_token_accuracy": 0.6411197036504745, "num_tokens": 2147542328.0, "step": 12803 }, { "entropy": 1.7062805791695912, "epoch": 1.406580428991239, "grad_norm": 0.7504722476005554, "learning_rate": 5.847252301304283e-06, "loss": 1.4771, "mean_token_accuracy": 0.6363706986109415, "num_tokens": 2147776239.0, "step": 12804 }, { "entropy": 1.6895011464754741, "epoch": 1.4066902859026118, "grad_norm": 0.7538740038871765, "learning_rate": 5.845939640543532e-06, "loss": 1.161, "mean_token_accuracy": 0.6999478687842687, "num_tokens": 2147905569.0, "step": 12805 }, { "entropy": 1.742385983467102, "epoch": 1.4068001428139847, "grad_norm": 0.7203928828239441, "learning_rate": 5.844627142906476e-06, "loss": 1.3725, "mean_token_accuracy": 0.65119768679142, "num_tokens": 2148091661.0, "step": 12806 }, { "entropy": 1.6920853157838185, "epoch": 1.4069099997253578, "grad_norm": 0.9913017153739929, "learning_rate": 5.843314808434642e-06, "loss": 1.2324, "mean_token_accuracy": 0.6863707005977631, "num_tokens": 2148224018.0, "step": 12807 }, { "entropy": 1.7018676499525707, "epoch": 1.4070198566367307, "grad_norm": 0.6461707949638367, "learning_rate": 5.842002637169575e-06, "loss": 1.4177, "mean_token_accuracy": 0.6471654524405798, "num_tokens": 2148433747.0, "step": 12808 }, { "entropy": 1.6787349085013072, "epoch": 1.4071297135481036, "grad_norm": 0.7102311253547668, "learning_rate": 5.840690629152801e-06, "loss": 1.6121, "mean_token_accuracy": 0.6307234813769659, "num_tokens": 2148624255.0, "step": 12809 }, { "entropy": 1.7247712711493175, "epoch": 1.4072395704594765, "grad_norm": 0.7935449481010437, "learning_rate": 5.8393787844258395e-06, "loss": 1.4452, "mean_token_accuracy": 0.6595299392938614, "num_tokens": 2148761952.0, "step": 12810 }, { "entropy": 1.7299401660760243, "epoch": 1.4073494273708493, "grad_norm": 0.7766920328140259, "learning_rate": 5.838067103030216e-06, "loss": 1.4923, "mean_token_accuracy": 0.6479160586992899, "num_tokens": 2148884392.0, "step": 12811 }, { "entropy": 1.658048282066981, "epoch": 1.4074592842822224, "grad_norm": 0.6866027116775513, "learning_rate": 5.836755585007445e-06, "loss": 1.3419, "mean_token_accuracy": 0.6578021794557571, "num_tokens": 2149045505.0, "step": 12812 }, { "entropy": 1.6724791725476582, "epoch": 1.4075691411935953, "grad_norm": 0.8036855459213257, "learning_rate": 5.8354442303990285e-06, "loss": 1.3986, "mean_token_accuracy": 0.6611756682395935, "num_tokens": 2149207385.0, "step": 12813 }, { "entropy": 1.7076423863569896, "epoch": 1.4076789981049682, "grad_norm": 0.7133153676986694, "learning_rate": 5.834133039246479e-06, "loss": 1.4558, "mean_token_accuracy": 0.6625415583451589, "num_tokens": 2149386316.0, "step": 12814 }, { "entropy": 1.6813267568747203, "epoch": 1.4077888550163413, "grad_norm": 0.697750985622406, "learning_rate": 5.832822011591287e-06, "loss": 1.2656, "mean_token_accuracy": 0.6713636467854182, "num_tokens": 2149506203.0, "step": 12815 }, { "entropy": 1.7380880614121754, "epoch": 1.4078987119277142, "grad_norm": 0.8035324215888977, "learning_rate": 5.831511147474953e-06, "loss": 1.3003, "mean_token_accuracy": 0.6692630002895991, "num_tokens": 2149633124.0, "step": 12816 }, { "entropy": 1.6894714534282684, "epoch": 1.408008568839087, "grad_norm": 0.918594241142273, "learning_rate": 5.830200446938963e-06, "loss": 1.3384, "mean_token_accuracy": 0.6638317654530207, "num_tokens": 2149829653.0, "step": 12817 }, { "entropy": 1.6925647656122844, "epoch": 1.40811842575046, "grad_norm": 0.7484959959983826, "learning_rate": 5.828889910024796e-06, "loss": 1.3594, "mean_token_accuracy": 0.670293723543485, "num_tokens": 2149990639.0, "step": 12818 }, { "entropy": 1.7760498821735382, "epoch": 1.4082282826618329, "grad_norm": 0.890577495098114, "learning_rate": 5.827579536773933e-06, "loss": 1.559, "mean_token_accuracy": 0.6545114864905676, "num_tokens": 2150162739.0, "step": 12819 }, { "entropy": 1.7027121881643932, "epoch": 1.408338139573206, "grad_norm": 0.625486433506012, "learning_rate": 5.826269327227853e-06, "loss": 1.5667, "mean_token_accuracy": 0.6334756761789322, "num_tokens": 2150347461.0, "step": 12820 }, { "entropy": 1.7147199014822643, "epoch": 1.4084479964845789, "grad_norm": 0.7005285024642944, "learning_rate": 5.824959281428012e-06, "loss": 1.281, "mean_token_accuracy": 0.6745847860972086, "num_tokens": 2150493583.0, "step": 12821 }, { "entropy": 1.6792974670728047, "epoch": 1.4085578533959517, "grad_norm": 0.6783992648124695, "learning_rate": 5.823649399415876e-06, "loss": 1.3088, "mean_token_accuracy": 0.671634684006373, "num_tokens": 2150622072.0, "step": 12822 }, { "entropy": 1.7010674675305684, "epoch": 1.4086677103073246, "grad_norm": 0.6391741037368774, "learning_rate": 5.822339681232909e-06, "loss": 1.3342, "mean_token_accuracy": 0.6632145543893179, "num_tokens": 2150816027.0, "step": 12823 }, { "entropy": 1.7505244612693787, "epoch": 1.4087775672186975, "grad_norm": 0.7569530606269836, "learning_rate": 5.821030126920558e-06, "loss": 1.5322, "mean_token_accuracy": 0.6407630940278372, "num_tokens": 2150992201.0, "step": 12824 }, { "entropy": 1.724602570136388, "epoch": 1.4088874241300706, "grad_norm": 0.7671657800674438, "learning_rate": 5.819720736520265e-06, "loss": 1.3978, "mean_token_accuracy": 0.6648318469524384, "num_tokens": 2151147887.0, "step": 12825 }, { "entropy": 1.6906703511873882, "epoch": 1.4089972810414435, "grad_norm": 0.688035249710083, "learning_rate": 5.818411510073481e-06, "loss": 1.3757, "mean_token_accuracy": 0.6557995080947876, "num_tokens": 2151322253.0, "step": 12826 }, { "entropy": 1.6956557631492615, "epoch": 1.4091071379528164, "grad_norm": 0.5983976125717163, "learning_rate": 5.817102447621634e-06, "loss": 1.5676, "mean_token_accuracy": 0.639850397904714, "num_tokens": 2151538705.0, "step": 12827 }, { "entropy": 1.7294293542702992, "epoch": 1.4092169948641895, "grad_norm": 0.7593173384666443, "learning_rate": 5.815793549206163e-06, "loss": 1.4723, "mean_token_accuracy": 0.6482956012090048, "num_tokens": 2151684485.0, "step": 12828 }, { "entropy": 1.6686496635278065, "epoch": 1.4093268517755624, "grad_norm": 0.7154465317726135, "learning_rate": 5.8144848148684885e-06, "loss": 1.2243, "mean_token_accuracy": 0.6809603323539098, "num_tokens": 2151805051.0, "step": 12829 }, { "entropy": 1.685718337694804, "epoch": 1.4094367086869353, "grad_norm": 0.6651716232299805, "learning_rate": 5.813176244650032e-06, "loss": 1.2744, "mean_token_accuracy": 0.6674046268065771, "num_tokens": 2151913172.0, "step": 12830 }, { "entropy": 1.738152305285136, "epoch": 1.4095465655983082, "grad_norm": 0.7076160907745361, "learning_rate": 5.811867838592211e-06, "loss": 1.477, "mean_token_accuracy": 0.6419303814570109, "num_tokens": 2152093205.0, "step": 12831 }, { "entropy": 1.7107795576254528, "epoch": 1.409656422509681, "grad_norm": 0.6455994844436646, "learning_rate": 5.810559596736437e-06, "loss": 1.344, "mean_token_accuracy": 0.6550297737121582, "num_tokens": 2152241280.0, "step": 12832 }, { "entropy": 1.717966268459956, "epoch": 1.4097662794210541, "grad_norm": 0.6599206924438477, "learning_rate": 5.809251519124109e-06, "loss": 1.4948, "mean_token_accuracy": 0.6381760487953821, "num_tokens": 2152459746.0, "step": 12833 }, { "entropy": 1.7183611194292705, "epoch": 1.409876136332427, "grad_norm": 0.5863080024719238, "learning_rate": 5.807943605796631e-06, "loss": 1.4689, "mean_token_accuracy": 0.6355762084325155, "num_tokens": 2152659000.0, "step": 12834 }, { "entropy": 1.6961732705434163, "epoch": 1.4099859932438, "grad_norm": 0.6659945249557495, "learning_rate": 5.806635856795404e-06, "loss": 1.3066, "mean_token_accuracy": 0.6640375355879465, "num_tokens": 2152798280.0, "step": 12835 }, { "entropy": 1.7171707153320312, "epoch": 1.410095850155173, "grad_norm": 0.5799298286437988, "learning_rate": 5.80532827216181e-06, "loss": 1.458, "mean_token_accuracy": 0.6387546559174856, "num_tokens": 2153031793.0, "step": 12836 }, { "entropy": 1.7386144399642944, "epoch": 1.4102057070665457, "grad_norm": 0.6916424632072449, "learning_rate": 5.804020851937231e-06, "loss": 1.4831, "mean_token_accuracy": 0.6459663063287735, "num_tokens": 2153245122.0, "step": 12837 }, { "entropy": 1.7278761863708496, "epoch": 1.4103155639779188, "grad_norm": 0.657574474811554, "learning_rate": 5.8027135961630565e-06, "loss": 1.4642, "mean_token_accuracy": 0.6525082488854727, "num_tokens": 2153416789.0, "step": 12838 }, { "entropy": 1.7336822350819905, "epoch": 1.4104254208892917, "grad_norm": 0.7622162103652954, "learning_rate": 5.801406504880649e-06, "loss": 1.3134, "mean_token_accuracy": 0.6629678755998611, "num_tokens": 2153535980.0, "step": 12839 }, { "entropy": 1.654565433661143, "epoch": 1.4105352778006646, "grad_norm": 0.6035882830619812, "learning_rate": 5.800099578131388e-06, "loss": 1.3029, "mean_token_accuracy": 0.680802529056867, "num_tokens": 2153683959.0, "step": 12840 }, { "entropy": 1.6937087972958882, "epoch": 1.4106451347120377, "grad_norm": 0.6252169609069824, "learning_rate": 5.798792815956632e-06, "loss": 1.3357, "mean_token_accuracy": 0.6599215567111969, "num_tokens": 2153878650.0, "step": 12841 }, { "entropy": 1.7652093668778737, "epoch": 1.4107549916234106, "grad_norm": 0.715120255947113, "learning_rate": 5.797486218397737e-06, "loss": 1.4924, "mean_token_accuracy": 0.6500881711641947, "num_tokens": 2154048004.0, "step": 12842 }, { "entropy": 1.6499028007189434, "epoch": 1.4108648485347834, "grad_norm": 0.8363161683082581, "learning_rate": 5.796179785496061e-06, "loss": 1.3537, "mean_token_accuracy": 0.6692218035459518, "num_tokens": 2154206593.0, "step": 12843 }, { "entropy": 1.6193082729975383, "epoch": 1.4109747054461563, "grad_norm": 0.6517688035964966, "learning_rate": 5.7948735172929495e-06, "loss": 1.2561, "mean_token_accuracy": 0.6783891270558039, "num_tokens": 2154358170.0, "step": 12844 }, { "entropy": 1.685720553000768, "epoch": 1.4110845623575292, "grad_norm": 0.6175381541252136, "learning_rate": 5.7935674138297435e-06, "loss": 1.3547, "mean_token_accuracy": 0.653323769569397, "num_tokens": 2154528515.0, "step": 12845 }, { "entropy": 1.6989250282446544, "epoch": 1.4111944192689023, "grad_norm": 0.8476423025131226, "learning_rate": 5.792261475147782e-06, "loss": 1.5093, "mean_token_accuracy": 0.6390935728947321, "num_tokens": 2154727006.0, "step": 12846 }, { "entropy": 1.6973857482274373, "epoch": 1.4113042761802752, "grad_norm": 0.7867861986160278, "learning_rate": 5.790955701288402e-06, "loss": 1.4885, "mean_token_accuracy": 0.6534365713596344, "num_tokens": 2154880048.0, "step": 12847 }, { "entropy": 1.7213109532992046, "epoch": 1.411414133091648, "grad_norm": 0.6492405533790588, "learning_rate": 5.7896500922929265e-06, "loss": 1.3551, "mean_token_accuracy": 0.6515356749296188, "num_tokens": 2155041717.0, "step": 12848 }, { "entropy": 1.7319613297780354, "epoch": 1.4115239900030212, "grad_norm": 0.6068700551986694, "learning_rate": 5.788344648202675e-06, "loss": 1.327, "mean_token_accuracy": 0.6601489931344986, "num_tokens": 2155192456.0, "step": 12849 }, { "entropy": 1.6868494153022766, "epoch": 1.4116338469143939, "grad_norm": 0.6022621393203735, "learning_rate": 5.78703936905897e-06, "loss": 1.4006, "mean_token_accuracy": 0.6591216822465261, "num_tokens": 2155389011.0, "step": 12850 }, { "entropy": 1.6397359669208527, "epoch": 1.411743703825767, "grad_norm": 0.7067409753799438, "learning_rate": 5.785734254903117e-06, "loss": 1.346, "mean_token_accuracy": 0.6666657626628876, "num_tokens": 2155528733.0, "step": 12851 }, { "entropy": 1.6686400373776753, "epoch": 1.4118535607371399, "grad_norm": 0.6087329387664795, "learning_rate": 5.784429305776427e-06, "loss": 1.3229, "mean_token_accuracy": 0.6637933800617853, "num_tokens": 2155688378.0, "step": 12852 }, { "entropy": 1.7409396668275197, "epoch": 1.4119634176485127, "grad_norm": 0.7018166184425354, "learning_rate": 5.7831245217202e-06, "loss": 1.5291, "mean_token_accuracy": 0.6422918488581976, "num_tokens": 2155840989.0, "step": 12853 }, { "entropy": 1.7613717218240101, "epoch": 1.4120732745598858, "grad_norm": 0.6511824727058411, "learning_rate": 5.7818199027757296e-06, "loss": 1.4611, "mean_token_accuracy": 0.6367582231760025, "num_tokens": 2156025021.0, "step": 12854 }, { "entropy": 1.6414049168427784, "epoch": 1.4121831314712587, "grad_norm": 0.82599276304245, "learning_rate": 5.78051544898431e-06, "loss": 1.2513, "mean_token_accuracy": 0.6847147146860758, "num_tokens": 2156184157.0, "step": 12855 }, { "entropy": 1.6162588596343994, "epoch": 1.4122929883826316, "grad_norm": 0.6030210256576538, "learning_rate": 5.779211160387224e-06, "loss": 1.2715, "mean_token_accuracy": 0.681743452946345, "num_tokens": 2156337483.0, "step": 12856 }, { "entropy": 1.7061065038045247, "epoch": 1.4124028452940045, "grad_norm": 0.7851901054382324, "learning_rate": 5.777907037025748e-06, "loss": 1.2282, "mean_token_accuracy": 0.6925330509742101, "num_tokens": 2156434763.0, "step": 12857 }, { "entropy": 1.6331138213475545, "epoch": 1.4125127022053774, "grad_norm": 0.6462249755859375, "learning_rate": 5.776603078941163e-06, "loss": 1.3081, "mean_token_accuracy": 0.6729962974786758, "num_tokens": 2156592213.0, "step": 12858 }, { "entropy": 1.7185394763946533, "epoch": 1.4126225591167505, "grad_norm": 0.7172744274139404, "learning_rate": 5.775299286174739e-06, "loss": 1.5015, "mean_token_accuracy": 0.6441001494725546, "num_tokens": 2156745527.0, "step": 12859 }, { "entropy": 1.743662456671397, "epoch": 1.4127324160281234, "grad_norm": 0.6882581114768982, "learning_rate": 5.773995658767739e-06, "loss": 1.4154, "mean_token_accuracy": 0.6491503864526749, "num_tokens": 2156898386.0, "step": 12860 }, { "entropy": 1.7289847433567047, "epoch": 1.4128422729394963, "grad_norm": 0.6334021687507629, "learning_rate": 5.772692196761418e-06, "loss": 1.3968, "mean_token_accuracy": 0.64505868156751, "num_tokens": 2157089076.0, "step": 12861 }, { "entropy": 1.6975704431533813, "epoch": 1.4129521298508694, "grad_norm": 0.6869586706161499, "learning_rate": 5.771388900197037e-06, "loss": 1.4119, "mean_token_accuracy": 0.6508818864822388, "num_tokens": 2157300620.0, "step": 12862 }, { "entropy": 1.6993489861488342, "epoch": 1.4130619867622423, "grad_norm": 0.5923440456390381, "learning_rate": 5.770085769115836e-06, "loss": 1.4502, "mean_token_accuracy": 0.6494368265072504, "num_tokens": 2157490825.0, "step": 12863 }, { "entropy": 1.6607881089051564, "epoch": 1.4131718436736151, "grad_norm": 0.6591402292251587, "learning_rate": 5.76878280355907e-06, "loss": 1.4262, "mean_token_accuracy": 0.6494799305995306, "num_tokens": 2157695141.0, "step": 12864 }, { "entropy": 1.6793291966120403, "epoch": 1.413281700584988, "grad_norm": 0.6479206085205078, "learning_rate": 5.76748000356797e-06, "loss": 1.2806, "mean_token_accuracy": 0.6759979277849197, "num_tokens": 2157865996.0, "step": 12865 }, { "entropy": 1.7249796688556671, "epoch": 1.413391557496361, "grad_norm": 0.7696998715400696, "learning_rate": 5.766177369183767e-06, "loss": 1.4357, "mean_token_accuracy": 0.642528717716535, "num_tokens": 2158053561.0, "step": 12866 }, { "entropy": 1.760029007991155, "epoch": 1.413501414407734, "grad_norm": 0.8501371145248413, "learning_rate": 5.764874900447693e-06, "loss": 1.6135, "mean_token_accuracy": 0.6389002650976181, "num_tokens": 2158203865.0, "step": 12867 }, { "entropy": 1.7189124127229054, "epoch": 1.413611271319107, "grad_norm": 0.723200261592865, "learning_rate": 5.763572597400972e-06, "loss": 1.3901, "mean_token_accuracy": 0.6513624439636866, "num_tokens": 2158354427.0, "step": 12868 }, { "entropy": 1.6675411363442738, "epoch": 1.4137211282304798, "grad_norm": 0.6228331923484802, "learning_rate": 5.762270460084813e-06, "loss": 1.3496, "mean_token_accuracy": 0.6633824755748113, "num_tokens": 2158523511.0, "step": 12869 }, { "entropy": 1.8182001411914825, "epoch": 1.4138309851418527, "grad_norm": 0.7652587890625, "learning_rate": 5.760968488540437e-06, "loss": 1.7234, "mean_token_accuracy": 0.6275846213102341, "num_tokens": 2158704933.0, "step": 12870 }, { "entropy": 1.7456133862336476, "epoch": 1.4139408420532256, "grad_norm": 0.9210031628608704, "learning_rate": 5.759666682809049e-06, "loss": 1.3865, "mean_token_accuracy": 0.662195548415184, "num_tokens": 2158826778.0, "step": 12871 }, { "entropy": 1.7603688438733418, "epoch": 1.4140506989645987, "grad_norm": 1.4208470582962036, "learning_rate": 5.758365042931848e-06, "loss": 1.4263, "mean_token_accuracy": 0.6728880008061727, "num_tokens": 2158989638.0, "step": 12872 }, { "entropy": 1.616513580083847, "epoch": 1.4141605558759716, "grad_norm": 0.6693636775016785, "learning_rate": 5.75706356895003e-06, "loss": 1.1544, "mean_token_accuracy": 0.6959843138853709, "num_tokens": 2159102023.0, "step": 12873 }, { "entropy": 1.7250353495279949, "epoch": 1.4142704127873444, "grad_norm": 0.7106985449790955, "learning_rate": 5.75576226090479e-06, "loss": 1.3421, "mean_token_accuracy": 0.6622584760189056, "num_tokens": 2159251112.0, "step": 12874 }, { "entropy": 1.6478977501392365, "epoch": 1.4143802696987176, "grad_norm": 0.7380810379981995, "learning_rate": 5.754461118837309e-06, "loss": 1.498, "mean_token_accuracy": 0.6418175796667734, "num_tokens": 2159440534.0, "step": 12875 }, { "entropy": 1.691566934188207, "epoch": 1.4144901266100904, "grad_norm": 0.6885169744491577, "learning_rate": 5.753160142788775e-06, "loss": 1.3672, "mean_token_accuracy": 0.6527943164110184, "num_tokens": 2159592449.0, "step": 12876 }, { "entropy": 1.718112548192342, "epoch": 1.4145999835214633, "grad_norm": 3.1608479022979736, "learning_rate": 5.7518593328003515e-06, "loss": 1.1454, "mean_token_accuracy": 0.6821771760781606, "num_tokens": 2159796095.0, "step": 12877 }, { "entropy": 1.6949720482031505, "epoch": 1.4147098404328362, "grad_norm": 0.6075740456581116, "learning_rate": 5.750558688913217e-06, "loss": 1.3461, "mean_token_accuracy": 0.6663214464982351, "num_tokens": 2159968669.0, "step": 12878 }, { "entropy": 1.654084712266922, "epoch": 1.414819697344209, "grad_norm": 0.5908815264701843, "learning_rate": 5.749258211168536e-06, "loss": 1.2713, "mean_token_accuracy": 0.6685434530178705, "num_tokens": 2160112122.0, "step": 12879 }, { "entropy": 1.7059245606263478, "epoch": 1.4149295542555822, "grad_norm": 0.6774670481681824, "learning_rate": 5.747957899607468e-06, "loss": 1.5032, "mean_token_accuracy": 0.6411937922239304, "num_tokens": 2160296569.0, "step": 12880 }, { "entropy": 1.6509164174397786, "epoch": 1.415039411166955, "grad_norm": 0.6976071000099182, "learning_rate": 5.7466577542711634e-06, "loss": 1.5301, "mean_token_accuracy": 0.6424646973609924, "num_tokens": 2160522783.0, "step": 12881 }, { "entropy": 1.5837687651316326, "epoch": 1.415149268078328, "grad_norm": 0.5916568040847778, "learning_rate": 5.745357775200775e-06, "loss": 1.3209, "mean_token_accuracy": 0.6775771975517273, "num_tokens": 2160705969.0, "step": 12882 }, { "entropy": 1.7270430326461792, "epoch": 1.4152591249897009, "grad_norm": 0.6653161644935608, "learning_rate": 5.744057962437441e-06, "loss": 1.4435, "mean_token_accuracy": 0.6445691585540771, "num_tokens": 2160854814.0, "step": 12883 }, { "entropy": 1.7269325355688732, "epoch": 1.4153689819010737, "grad_norm": 0.6765090227127075, "learning_rate": 5.74275831602231e-06, "loss": 1.317, "mean_token_accuracy": 0.6648717721303304, "num_tokens": 2160986318.0, "step": 12884 }, { "entropy": 1.7668460508187611, "epoch": 1.4154788388124468, "grad_norm": 0.8804817199707031, "learning_rate": 5.741458835996507e-06, "loss": 1.5703, "mean_token_accuracy": 0.6425085514783859, "num_tokens": 2161164540.0, "step": 12885 }, { "entropy": 1.719743698835373, "epoch": 1.4155886957238197, "grad_norm": 0.8075534105300903, "learning_rate": 5.740159522401161e-06, "loss": 1.3203, "mean_token_accuracy": 0.6848004907369614, "num_tokens": 2161294773.0, "step": 12886 }, { "entropy": 1.726130078236262, "epoch": 1.4156985526351926, "grad_norm": 0.6449326276779175, "learning_rate": 5.738860375277395e-06, "loss": 1.4198, "mean_token_accuracy": 0.630332425236702, "num_tokens": 2161463323.0, "step": 12887 }, { "entropy": 1.712664246559143, "epoch": 1.4158084095465657, "grad_norm": 0.6622018218040466, "learning_rate": 5.737561394666336e-06, "loss": 1.3093, "mean_token_accuracy": 0.6683304011821747, "num_tokens": 2161633350.0, "step": 12888 }, { "entropy": 1.6602988839149475, "epoch": 1.4159182664579386, "grad_norm": 0.6974785923957825, "learning_rate": 5.7362625806090775e-06, "loss": 1.3417, "mean_token_accuracy": 0.6663461575905482, "num_tokens": 2161813061.0, "step": 12889 }, { "entropy": 1.651836782693863, "epoch": 1.4160281233693115, "grad_norm": 0.6534989476203918, "learning_rate": 5.734963933146739e-06, "loss": 1.4203, "mean_token_accuracy": 0.6599469731251398, "num_tokens": 2161992435.0, "step": 12890 }, { "entropy": 1.709712266921997, "epoch": 1.4161379802806844, "grad_norm": 0.7406985759735107, "learning_rate": 5.733665452320422e-06, "loss": 1.4721, "mean_token_accuracy": 0.6705115636189779, "num_tokens": 2162170219.0, "step": 12891 }, { "entropy": 1.669614851474762, "epoch": 1.4162478371920573, "grad_norm": 0.6635571122169495, "learning_rate": 5.73236713817122e-06, "loss": 1.3549, "mean_token_accuracy": 0.6629445304473242, "num_tokens": 2162337356.0, "step": 12892 }, { "entropy": 1.6756293376286824, "epoch": 1.4163576941034304, "grad_norm": 0.6038379073143005, "learning_rate": 5.731068990740222e-06, "loss": 1.4527, "mean_token_accuracy": 0.633764331539472, "num_tokens": 2162534176.0, "step": 12893 }, { "entropy": 1.6719582378864288, "epoch": 1.4164675510148033, "grad_norm": 0.7196714282035828, "learning_rate": 5.729771010068518e-06, "loss": 1.2758, "mean_token_accuracy": 0.6750404040018717, "num_tokens": 2162683391.0, "step": 12894 }, { "entropy": 1.7006418605645497, "epoch": 1.4165774079261761, "grad_norm": 0.6588510274887085, "learning_rate": 5.728473196197184e-06, "loss": 1.3755, "mean_token_accuracy": 0.6491716603438059, "num_tokens": 2162858449.0, "step": 12895 }, { "entropy": 1.704519013563792, "epoch": 1.416687264837549, "grad_norm": 0.7052327990531921, "learning_rate": 5.7271755491673035e-06, "loss": 1.2329, "mean_token_accuracy": 0.6732802291711172, "num_tokens": 2162964910.0, "step": 12896 }, { "entropy": 1.695040076971054, "epoch": 1.416797121748922, "grad_norm": 0.6275352835655212, "learning_rate": 5.725878069019937e-06, "loss": 1.538, "mean_token_accuracy": 0.6412870685259501, "num_tokens": 2163208900.0, "step": 12897 }, { "entropy": 1.6921038031578064, "epoch": 1.416906978660295, "grad_norm": 0.7399893999099731, "learning_rate": 5.724580755796152e-06, "loss": 1.4541, "mean_token_accuracy": 0.6728978330890337, "num_tokens": 2163332942.0, "step": 12898 }, { "entropy": 1.708950052658717, "epoch": 1.417016835571668, "grad_norm": 0.7712686657905579, "learning_rate": 5.72328360953701e-06, "loss": 1.4268, "mean_token_accuracy": 0.6555162221193314, "num_tokens": 2163493726.0, "step": 12899 }, { "entropy": 1.714707463979721, "epoch": 1.4171266924830408, "grad_norm": 0.6765271425247192, "learning_rate": 5.7219866302835684e-06, "loss": 1.4633, "mean_token_accuracy": 0.6470478971799215, "num_tokens": 2163663872.0, "step": 12900 }, { "entropy": 1.6449009974797566, "epoch": 1.417236549394414, "grad_norm": 0.6789788603782654, "learning_rate": 5.720689818076864e-06, "loss": 1.3217, "mean_token_accuracy": 0.6745987633864085, "num_tokens": 2163845660.0, "step": 12901 }, { "entropy": 1.6422028144200642, "epoch": 1.4173464063057868, "grad_norm": 0.8197759389877319, "learning_rate": 5.719393172957951e-06, "loss": 1.3709, "mean_token_accuracy": 0.670257086555163, "num_tokens": 2163970180.0, "step": 12902 }, { "entropy": 1.6707020998001099, "epoch": 1.4174562632171597, "grad_norm": 0.8020114302635193, "learning_rate": 5.718096694967866e-06, "loss": 1.4755, "mean_token_accuracy": 0.6537665476401647, "num_tokens": 2164098025.0, "step": 12903 }, { "entropy": 1.723763604958852, "epoch": 1.4175661201285326, "grad_norm": 0.8109487295150757, "learning_rate": 5.716800384147642e-06, "loss": 1.5173, "mean_token_accuracy": 0.6525298108657202, "num_tokens": 2164325693.0, "step": 12904 }, { "entropy": 1.6953211824099224, "epoch": 1.4176759770399054, "grad_norm": 0.7380589842796326, "learning_rate": 5.715504240538301e-06, "loss": 1.2802, "mean_token_accuracy": 0.6700010697046915, "num_tokens": 2164459673.0, "step": 12905 }, { "entropy": 1.735133836666743, "epoch": 1.4177858339512786, "grad_norm": 0.8321533799171448, "learning_rate": 5.714208264180872e-06, "loss": 1.5847, "mean_token_accuracy": 0.6274262269337972, "num_tokens": 2164662503.0, "step": 12906 }, { "entropy": 1.6269804338614147, "epoch": 1.4178956908626514, "grad_norm": 0.7073882818222046, "learning_rate": 5.712912455116367e-06, "loss": 1.3359, "mean_token_accuracy": 0.6600817640622457, "num_tokens": 2164855340.0, "step": 12907 }, { "entropy": 1.7148225208123524, "epoch": 1.4180055477740243, "grad_norm": 0.700375497341156, "learning_rate": 5.7116168133858044e-06, "loss": 1.3533, "mean_token_accuracy": 0.6706081926822662, "num_tokens": 2165023645.0, "step": 12908 }, { "entropy": 1.7189152439435322, "epoch": 1.4181154046853972, "grad_norm": 0.7371551394462585, "learning_rate": 5.710321339030186e-06, "loss": 1.43, "mean_token_accuracy": 0.6535715262095133, "num_tokens": 2165227184.0, "step": 12909 }, { "entropy": 1.6818451484044392, "epoch": 1.41822526159677, "grad_norm": 0.660900354385376, "learning_rate": 5.70902603209051e-06, "loss": 1.184, "mean_token_accuracy": 0.6832146992286047, "num_tokens": 2165339873.0, "step": 12910 }, { "entropy": 1.6560562153657277, "epoch": 1.4183351185081432, "grad_norm": 0.6540271043777466, "learning_rate": 5.70773089260778e-06, "loss": 1.2773, "mean_token_accuracy": 0.6754108965396881, "num_tokens": 2165478503.0, "step": 12911 }, { "entropy": 1.644927054643631, "epoch": 1.418444975419516, "grad_norm": 0.6257344484329224, "learning_rate": 5.7064359206229825e-06, "loss": 1.3435, "mean_token_accuracy": 0.6578503449757894, "num_tokens": 2165658626.0, "step": 12912 }, { "entropy": 1.678837110598882, "epoch": 1.418554832330889, "grad_norm": 0.7022602558135986, "learning_rate": 5.7051411161771e-06, "loss": 1.5779, "mean_token_accuracy": 0.6389969835678736, "num_tokens": 2165860011.0, "step": 12913 }, { "entropy": 1.671900063753128, "epoch": 1.418664689242262, "grad_norm": 0.7610450983047485, "learning_rate": 5.703846479311113e-06, "loss": 1.3848, "mean_token_accuracy": 0.6613888889551163, "num_tokens": 2166057539.0, "step": 12914 }, { "entropy": 1.732055425643921, "epoch": 1.418774546153635, "grad_norm": 0.8880397081375122, "learning_rate": 5.702552010066004e-06, "loss": 1.2981, "mean_token_accuracy": 0.6812852670749029, "num_tokens": 2166212303.0, "step": 12915 }, { "entropy": 1.6862863500912983, "epoch": 1.4188844030650078, "grad_norm": 0.7152805924415588, "learning_rate": 5.701257708482736e-06, "loss": 1.3078, "mean_token_accuracy": 0.6692292392253876, "num_tokens": 2166363658.0, "step": 12916 }, { "entropy": 1.7482119103272755, "epoch": 1.4189942599763807, "grad_norm": 0.7135628461837769, "learning_rate": 5.69996357460227e-06, "loss": 1.6073, "mean_token_accuracy": 0.6363287791609764, "num_tokens": 2166531130.0, "step": 12917 }, { "entropy": 1.6944365203380585, "epoch": 1.4191041168877536, "grad_norm": 0.8478591442108154, "learning_rate": 5.6986696084655725e-06, "loss": 1.2922, "mean_token_accuracy": 0.6734979252020518, "num_tokens": 2166657623.0, "step": 12918 }, { "entropy": 1.6972975830237071, "epoch": 1.4192139737991267, "grad_norm": 0.5631718039512634, "learning_rate": 5.6973758101135905e-06, "loss": 1.3744, "mean_token_accuracy": 0.6703929851452509, "num_tokens": 2166856825.0, "step": 12919 }, { "entropy": 1.6998174389203389, "epoch": 1.4193238307104996, "grad_norm": 0.7097121477127075, "learning_rate": 5.696082179587275e-06, "loss": 1.4455, "mean_token_accuracy": 0.6481041411558787, "num_tokens": 2167059792.0, "step": 12920 }, { "entropy": 1.6798253854115803, "epoch": 1.4194336876218725, "grad_norm": 0.6257836818695068, "learning_rate": 5.694788716927571e-06, "loss": 1.4738, "mean_token_accuracy": 0.64958456158638, "num_tokens": 2167257982.0, "step": 12921 }, { "entropy": 1.673070341348648, "epoch": 1.4195435445332454, "grad_norm": 0.6729440093040466, "learning_rate": 5.69349542217541e-06, "loss": 1.3327, "mean_token_accuracy": 0.6657520681619644, "num_tokens": 2167412027.0, "step": 12922 }, { "entropy": 1.7135821183522542, "epoch": 1.4196534014446183, "grad_norm": 0.7069577574729919, "learning_rate": 5.692202295371731e-06, "loss": 1.4568, "mean_token_accuracy": 0.6652841120958328, "num_tokens": 2167572724.0, "step": 12923 }, { "entropy": 1.7391641736030579, "epoch": 1.4197632583559914, "grad_norm": 0.6086916923522949, "learning_rate": 5.690909336557458e-06, "loss": 1.4022, "mean_token_accuracy": 0.6388307412465414, "num_tokens": 2167724978.0, "step": 12924 }, { "entropy": 1.6861611207326253, "epoch": 1.4198731152673643, "grad_norm": 0.6505236625671387, "learning_rate": 5.689616545773508e-06, "loss": 1.4473, "mean_token_accuracy": 0.652971088886261, "num_tokens": 2167860519.0, "step": 12925 }, { "entropy": 1.7772870361804962, "epoch": 1.4199829721787371, "grad_norm": 0.7045353055000305, "learning_rate": 5.6883239230608024e-06, "loss": 1.3123, "mean_token_accuracy": 0.6595732072989146, "num_tokens": 2167959886.0, "step": 12926 }, { "entropy": 1.7017023464043934, "epoch": 1.4200928290901103, "grad_norm": 0.8636213541030884, "learning_rate": 5.687031468460253e-06, "loss": 1.3478, "mean_token_accuracy": 0.6783096243937811, "num_tokens": 2168139313.0, "step": 12927 }, { "entropy": 1.680685927470525, "epoch": 1.4202026860014831, "grad_norm": 0.7073819637298584, "learning_rate": 5.685739182012764e-06, "loss": 1.3014, "mean_token_accuracy": 0.6693469732999802, "num_tokens": 2168261083.0, "step": 12928 }, { "entropy": 1.7441634833812714, "epoch": 1.420312542912856, "grad_norm": 0.6499477624893188, "learning_rate": 5.684447063759233e-06, "loss": 1.4158, "mean_token_accuracy": 0.6491926809151968, "num_tokens": 2168426919.0, "step": 12929 }, { "entropy": 1.7324085434277852, "epoch": 1.420422399824229, "grad_norm": 0.7667383551597595, "learning_rate": 5.683155113740559e-06, "loss": 1.2891, "mean_token_accuracy": 0.6760394672552744, "num_tokens": 2168542110.0, "step": 12930 }, { "entropy": 1.7055922349294026, "epoch": 1.4205322567356018, "grad_norm": 0.674475908279419, "learning_rate": 5.681863331997628e-06, "loss": 1.4863, "mean_token_accuracy": 0.6470987647771835, "num_tokens": 2168739330.0, "step": 12931 }, { "entropy": 1.698354721069336, "epoch": 1.420642113646975, "grad_norm": 0.6338586807250977, "learning_rate": 5.680571718571328e-06, "loss": 1.3027, "mean_token_accuracy": 0.6713970800240835, "num_tokens": 2168880859.0, "step": 12932 }, { "entropy": 1.645394931236903, "epoch": 1.4207519705583478, "grad_norm": 0.67153000831604, "learning_rate": 5.679280273502537e-06, "loss": 1.33, "mean_token_accuracy": 0.6692242324352264, "num_tokens": 2169052718.0, "step": 12933 }, { "entropy": 1.7089401880900066, "epoch": 1.4208618274697207, "grad_norm": 0.6924530863761902, "learning_rate": 5.677988996832124e-06, "loss": 1.4324, "mean_token_accuracy": 0.6542358994483948, "num_tokens": 2169198381.0, "step": 12934 }, { "entropy": 1.7337498764197032, "epoch": 1.4209716843810936, "grad_norm": 0.7746621370315552, "learning_rate": 5.676697888600965e-06, "loss": 1.3295, "mean_token_accuracy": 0.6665536761283875, "num_tokens": 2169349542.0, "step": 12935 }, { "entropy": 1.6929566363493602, "epoch": 1.4210815412924664, "grad_norm": 0.6887615323066711, "learning_rate": 5.675406948849919e-06, "loss": 1.6089, "mean_token_accuracy": 0.6433848490317663, "num_tokens": 2169537168.0, "step": 12936 }, { "entropy": 1.7130916615327199, "epoch": 1.4211913982038396, "grad_norm": 1.0259429216384888, "learning_rate": 5.67411617761984e-06, "loss": 1.4418, "mean_token_accuracy": 0.650916631023089, "num_tokens": 2169701884.0, "step": 12937 }, { "entropy": 1.6535163124402363, "epoch": 1.4213012551152124, "grad_norm": 0.6167465448379517, "learning_rate": 5.672825574951588e-06, "loss": 1.3627, "mean_token_accuracy": 0.6604279528061548, "num_tokens": 2169914597.0, "step": 12938 }, { "entropy": 1.6423707405726116, "epoch": 1.4214111120265853, "grad_norm": 0.6382650136947632, "learning_rate": 5.671535140886002e-06, "loss": 1.3769, "mean_token_accuracy": 0.6672868579626083, "num_tokens": 2170071581.0, "step": 12939 }, { "entropy": 1.7335049013296764, "epoch": 1.4215209689379584, "grad_norm": 0.8444647192955017, "learning_rate": 5.670244875463931e-06, "loss": 1.4357, "mean_token_accuracy": 0.6631718277931213, "num_tokens": 2170211841.0, "step": 12940 }, { "entropy": 1.732320378224055, "epoch": 1.4216308258493313, "grad_norm": 0.7674136757850647, "learning_rate": 5.668954778726209e-06, "loss": 1.2571, "mean_token_accuracy": 0.681780661145846, "num_tokens": 2170337013.0, "step": 12941 }, { "entropy": 1.6558412313461304, "epoch": 1.4217406827607042, "grad_norm": 0.5625472068786621, "learning_rate": 5.667664850713662e-06, "loss": 1.3564, "mean_token_accuracy": 0.6685200929641724, "num_tokens": 2170505477.0, "step": 12942 }, { "entropy": 1.6879661480585735, "epoch": 1.421850539672077, "grad_norm": 0.6796611547470093, "learning_rate": 5.66637509146712e-06, "loss": 1.5171, "mean_token_accuracy": 0.646856447060903, "num_tokens": 2170703823.0, "step": 12943 }, { "entropy": 1.7009165585041046, "epoch": 1.42196039658345, "grad_norm": 0.7441216111183167, "learning_rate": 5.66508550102741e-06, "loss": 1.5151, "mean_token_accuracy": 0.6342372844616572, "num_tokens": 2170899789.0, "step": 12944 }, { "entropy": 1.7801036536693573, "epoch": 1.422070253494823, "grad_norm": 0.7327330112457275, "learning_rate": 5.663796079435331e-06, "loss": 1.3925, "mean_token_accuracy": 0.6534372419118881, "num_tokens": 2171035224.0, "step": 12945 }, { "entropy": 1.6900883217652638, "epoch": 1.422180110406196, "grad_norm": 0.7192649245262146, "learning_rate": 5.662506826731704e-06, "loss": 1.2754, "mean_token_accuracy": 0.6802859654029211, "num_tokens": 2171203629.0, "step": 12946 }, { "entropy": 1.7146925528844197, "epoch": 1.4222899673175688, "grad_norm": 0.8082349896430969, "learning_rate": 5.661217742957333e-06, "loss": 1.6062, "mean_token_accuracy": 0.6461126953363419, "num_tokens": 2171370122.0, "step": 12947 }, { "entropy": 1.7389146387577057, "epoch": 1.4223998242289417, "grad_norm": 0.7392411828041077, "learning_rate": 5.659928828153015e-06, "loss": 1.3126, "mean_token_accuracy": 0.667231614391009, "num_tokens": 2171482880.0, "step": 12948 }, { "entropy": 1.6697891255219777, "epoch": 1.4225096811403146, "grad_norm": 0.6810927987098694, "learning_rate": 5.658640082359541e-06, "loss": 1.2989, "mean_token_accuracy": 0.6649407347043356, "num_tokens": 2171621173.0, "step": 12949 }, { "entropy": 1.7280420064926147, "epoch": 1.4226195380516877, "grad_norm": 0.6208683252334595, "learning_rate": 5.657351505617703e-06, "loss": 1.458, "mean_token_accuracy": 0.6525483777125677, "num_tokens": 2171892873.0, "step": 12950 }, { "entropy": 1.7015692094961803, "epoch": 1.4227293949630606, "grad_norm": 0.7659103274345398, "learning_rate": 5.656063097968281e-06, "loss": 1.4574, "mean_token_accuracy": 0.6513445029656092, "num_tokens": 2172057108.0, "step": 12951 }, { "entropy": 1.7544087767601013, "epoch": 1.4228392518744335, "grad_norm": 0.7193136215209961, "learning_rate": 5.6547748594520556e-06, "loss": 1.3002, "mean_token_accuracy": 0.6638988107442856, "num_tokens": 2172162991.0, "step": 12952 }, { "entropy": 1.6446592311064403, "epoch": 1.4229491087858066, "grad_norm": 0.6459053754806519, "learning_rate": 5.653486790109798e-06, "loss": 1.319, "mean_token_accuracy": 0.6649145980676016, "num_tokens": 2172326151.0, "step": 12953 }, { "entropy": 1.6824187239011128, "epoch": 1.4230589656971795, "grad_norm": 0.6160148978233337, "learning_rate": 5.65219888998227e-06, "loss": 1.4223, "mean_token_accuracy": 0.6440077473719915, "num_tokens": 2172475596.0, "step": 12954 }, { "entropy": 1.6969729959964752, "epoch": 1.4231688226085524, "grad_norm": 0.748254120349884, "learning_rate": 5.650911159110239e-06, "loss": 1.2247, "mean_token_accuracy": 0.6879066576560339, "num_tokens": 2172577164.0, "step": 12955 }, { "entropy": 1.71368607878685, "epoch": 1.4232786795199253, "grad_norm": 0.7107913494110107, "learning_rate": 5.649623597534466e-06, "loss": 1.3242, "mean_token_accuracy": 0.6704561958710352, "num_tokens": 2172735492.0, "step": 12956 }, { "entropy": 1.6840336819489796, "epoch": 1.4233885364312981, "grad_norm": 0.6022214889526367, "learning_rate": 5.648336205295687e-06, "loss": 1.3555, "mean_token_accuracy": 0.6485221783320109, "num_tokens": 2172911336.0, "step": 12957 }, { "entropy": 1.7092094123363495, "epoch": 1.4234983933426713, "grad_norm": 0.8171796202659607, "learning_rate": 5.647048982434656e-06, "loss": 1.4057, "mean_token_accuracy": 0.65643543501695, "num_tokens": 2173068228.0, "step": 12958 }, { "entropy": 1.762761503458023, "epoch": 1.4236082502540441, "grad_norm": 0.7240894436836243, "learning_rate": 5.645761928992117e-06, "loss": 1.3219, "mean_token_accuracy": 0.6557150532801946, "num_tokens": 2173184380.0, "step": 12959 }, { "entropy": 1.721298485994339, "epoch": 1.423718107165417, "grad_norm": 0.7749879360198975, "learning_rate": 5.644475045008799e-06, "loss": 1.5254, "mean_token_accuracy": 0.6502954959869385, "num_tokens": 2173328706.0, "step": 12960 }, { "entropy": 1.7307699620723724, "epoch": 1.42382796407679, "grad_norm": 0.7057486772537231, "learning_rate": 5.643188330525431e-06, "loss": 1.2917, "mean_token_accuracy": 0.6716625094413757, "num_tokens": 2173441079.0, "step": 12961 }, { "entropy": 1.716073344151179, "epoch": 1.4239378209881628, "grad_norm": 0.6514110565185547, "learning_rate": 5.641901785582739e-06, "loss": 1.3905, "mean_token_accuracy": 0.6514262358347574, "num_tokens": 2173586081.0, "step": 12962 }, { "entropy": 1.655029982328415, "epoch": 1.424047677899536, "grad_norm": 0.6521716117858887, "learning_rate": 5.640615410221442e-06, "loss": 1.3778, "mean_token_accuracy": 0.6559510032335917, "num_tokens": 2173819782.0, "step": 12963 }, { "entropy": 1.6636370718479156, "epoch": 1.4241575348109088, "grad_norm": 0.6372007727622986, "learning_rate": 5.639329204482252e-06, "loss": 1.3485, "mean_token_accuracy": 0.6604503045479456, "num_tokens": 2173978689.0, "step": 12964 }, { "entropy": 1.6833548645178478, "epoch": 1.4242673917222817, "grad_norm": 0.7769525051116943, "learning_rate": 5.638043168405878e-06, "loss": 1.4396, "mean_token_accuracy": 0.6549781362215678, "num_tokens": 2174216659.0, "step": 12965 }, { "entropy": 1.7020526230335236, "epoch": 1.4243772486336548, "grad_norm": 0.6384181380271912, "learning_rate": 5.636757302033018e-06, "loss": 1.321, "mean_token_accuracy": 0.6678778429826101, "num_tokens": 2174383271.0, "step": 12966 }, { "entropy": 1.6687651177247365, "epoch": 1.4244871055450277, "grad_norm": 0.7364367246627808, "learning_rate": 5.6354716054043726e-06, "loss": 1.5467, "mean_token_accuracy": 0.6513969451189041, "num_tokens": 2174578775.0, "step": 12967 }, { "entropy": 1.6899695893128712, "epoch": 1.4245969624564006, "grad_norm": 0.6350962519645691, "learning_rate": 5.634186078560641e-06, "loss": 1.339, "mean_token_accuracy": 0.6744259546200434, "num_tokens": 2174745441.0, "step": 12968 }, { "entropy": 1.6761878331502278, "epoch": 1.4247068193677734, "grad_norm": 0.726622998714447, "learning_rate": 5.632900721542496e-06, "loss": 1.5962, "mean_token_accuracy": 0.6452071170012156, "num_tokens": 2174967033.0, "step": 12969 }, { "entropy": 1.6440961559613545, "epoch": 1.4248166762791463, "grad_norm": 0.7337450385093689, "learning_rate": 5.631615534390623e-06, "loss": 1.4722, "mean_token_accuracy": 0.6409422506888708, "num_tokens": 2175205554.0, "step": 12970 }, { "entropy": 1.720875859260559, "epoch": 1.4249265331905194, "grad_norm": 0.7657408118247986, "learning_rate": 5.630330517145704e-06, "loss": 1.5809, "mean_token_accuracy": 0.6256515284379324, "num_tokens": 2175426782.0, "step": 12971 }, { "entropy": 1.740049531062444, "epoch": 1.4250363901018923, "grad_norm": 0.6672664880752563, "learning_rate": 5.6290456698484045e-06, "loss": 1.5588, "mean_token_accuracy": 0.6511749972899755, "num_tokens": 2175633316.0, "step": 12972 }, { "entropy": 1.7396978239218395, "epoch": 1.4251462470132652, "grad_norm": 0.6931610107421875, "learning_rate": 5.627760992539384e-06, "loss": 1.3684, "mean_token_accuracy": 0.6563472002744675, "num_tokens": 2175780067.0, "step": 12973 }, { "entropy": 1.6736437479654949, "epoch": 1.425256103924638, "grad_norm": 0.574825644493103, "learning_rate": 5.626476485259314e-06, "loss": 1.6164, "mean_token_accuracy": 0.6455618888139725, "num_tokens": 2175994610.0, "step": 12974 }, { "entropy": 1.6988399227460225, "epoch": 1.425365960836011, "grad_norm": 0.828632652759552, "learning_rate": 5.6251921480488355e-06, "loss": 1.3082, "mean_token_accuracy": 0.6656528313954672, "num_tokens": 2176110701.0, "step": 12975 }, { "entropy": 1.7076418995857239, "epoch": 1.425475817747384, "grad_norm": 0.5862886309623718, "learning_rate": 5.623907980948608e-06, "loss": 1.3982, "mean_token_accuracy": 0.662509153286616, "num_tokens": 2176301670.0, "step": 12976 }, { "entropy": 1.7174928188323975, "epoch": 1.425585674658757, "grad_norm": 0.8851239681243896, "learning_rate": 5.6226239839992715e-06, "loss": 1.4016, "mean_token_accuracy": 0.6703563729921976, "num_tokens": 2176456456.0, "step": 12977 }, { "entropy": 1.7496284544467926, "epoch": 1.4256955315701298, "grad_norm": 0.6377162337303162, "learning_rate": 5.6213401572414575e-06, "loss": 1.3347, "mean_token_accuracy": 0.6575185209512711, "num_tokens": 2176602131.0, "step": 12978 }, { "entropy": 1.766124387582143, "epoch": 1.425805388481503, "grad_norm": 0.667473316192627, "learning_rate": 5.620056500715805e-06, "loss": 1.3978, "mean_token_accuracy": 0.654146542151769, "num_tokens": 2176788934.0, "step": 12979 }, { "entropy": 1.7000204424063365, "epoch": 1.4259152453928758, "grad_norm": 0.7213659286499023, "learning_rate": 5.618773014462946e-06, "loss": 1.1064, "mean_token_accuracy": 0.7038625578085581, "num_tokens": 2176882325.0, "step": 12980 }, { "entropy": 1.7640029390652974, "epoch": 1.4260251023042487, "grad_norm": 0.6563233733177185, "learning_rate": 5.617489698523491e-06, "loss": 1.4325, "mean_token_accuracy": 0.6590708047151566, "num_tokens": 2177014004.0, "step": 12981 }, { "entropy": 1.6775661706924438, "epoch": 1.4261349592156216, "grad_norm": 0.5699209570884705, "learning_rate": 5.616206552938059e-06, "loss": 1.3725, "mean_token_accuracy": 0.6618246585130692, "num_tokens": 2177185937.0, "step": 12982 }, { "entropy": 1.6460503935813904, "epoch": 1.4262448161269945, "grad_norm": 0.7296600937843323, "learning_rate": 5.614923577747269e-06, "loss": 1.3168, "mean_token_accuracy": 0.6661859899759293, "num_tokens": 2177345935.0, "step": 12983 }, { "entropy": 1.6979570190111797, "epoch": 1.4263546730383676, "grad_norm": 0.6132034063339233, "learning_rate": 5.613640772991721e-06, "loss": 1.4773, "mean_token_accuracy": 0.6433689743280411, "num_tokens": 2177602730.0, "step": 12984 }, { "entropy": 1.6999173959096272, "epoch": 1.4264645299497405, "grad_norm": 0.6948098540306091, "learning_rate": 5.612358138712011e-06, "loss": 1.4101, "mean_token_accuracy": 0.6698167969783148, "num_tokens": 2177759033.0, "step": 12985 }, { "entropy": 1.7339690029621124, "epoch": 1.4265743868611134, "grad_norm": 0.7029469013214111, "learning_rate": 5.611075674948743e-06, "loss": 1.2782, "mean_token_accuracy": 0.6747192790110906, "num_tokens": 2177875280.0, "step": 12986 }, { "entropy": 1.6518168846766155, "epoch": 1.4266842437724863, "grad_norm": 0.7279475331306458, "learning_rate": 5.609793381742497e-06, "loss": 1.1832, "mean_token_accuracy": 0.6870991041262945, "num_tokens": 2177996766.0, "step": 12987 }, { "entropy": 1.7683051228523254, "epoch": 1.4267941006838591, "grad_norm": 0.7115809917449951, "learning_rate": 5.608511259133867e-06, "loss": 1.4565, "mean_token_accuracy": 0.6595746825138727, "num_tokens": 2178140721.0, "step": 12988 }, { "entropy": 1.6959002912044525, "epoch": 1.4269039575952323, "grad_norm": 0.7604736089706421, "learning_rate": 5.607229307163423e-06, "loss": 1.2443, "mean_token_accuracy": 0.6789979139963785, "num_tokens": 2178285786.0, "step": 12989 }, { "entropy": 1.751099556684494, "epoch": 1.4270138145066051, "grad_norm": 0.6957893967628479, "learning_rate": 5.60594752587174e-06, "loss": 1.3528, "mean_token_accuracy": 0.6624589115381241, "num_tokens": 2178418812.0, "step": 12990 }, { "entropy": 1.7143315374851227, "epoch": 1.427123671417978, "grad_norm": 0.6933035850524902, "learning_rate": 5.60466591529939e-06, "loss": 1.3594, "mean_token_accuracy": 0.6720022509495417, "num_tokens": 2178586498.0, "step": 12991 }, { "entropy": 1.7343165179093678, "epoch": 1.4272335283293511, "grad_norm": 0.6081417798995972, "learning_rate": 5.603384475486932e-06, "loss": 1.4883, "mean_token_accuracy": 0.6346796850363413, "num_tokens": 2178832189.0, "step": 12992 }, { "entropy": 1.7000845571358998, "epoch": 1.427343385240724, "grad_norm": 0.6261142492294312, "learning_rate": 5.602103206474922e-06, "loss": 1.3748, "mean_token_accuracy": 0.6570387482643127, "num_tokens": 2179026605.0, "step": 12993 }, { "entropy": 1.6828905642032623, "epoch": 1.427453242152097, "grad_norm": 0.7044478058815002, "learning_rate": 5.600822108303916e-06, "loss": 1.296, "mean_token_accuracy": 0.6614427367846171, "num_tokens": 2179195579.0, "step": 12994 }, { "entropy": 1.6979553401470184, "epoch": 1.4275630990634698, "grad_norm": 0.8138183355331421, "learning_rate": 5.599541181014453e-06, "loss": 1.4325, "mean_token_accuracy": 0.6653849979241689, "num_tokens": 2179366662.0, "step": 12995 }, { "entropy": 1.758367915948232, "epoch": 1.4276729559748427, "grad_norm": 0.7187701463699341, "learning_rate": 5.598260424647081e-06, "loss": 1.3425, "mean_token_accuracy": 0.656711811820666, "num_tokens": 2179506900.0, "step": 12996 }, { "entropy": 1.6735180914402008, "epoch": 1.4277828128862158, "grad_norm": 0.7112841606140137, "learning_rate": 5.596979839242335e-06, "loss": 1.2976, "mean_token_accuracy": 0.6648980478445689, "num_tokens": 2179630864.0, "step": 12997 }, { "entropy": 1.7127126554648082, "epoch": 1.4278926697975887, "grad_norm": 0.6963240504264832, "learning_rate": 5.595699424840737e-06, "loss": 1.3437, "mean_token_accuracy": 0.6695072799921036, "num_tokens": 2179771664.0, "step": 12998 }, { "entropy": 1.6788500547409058, "epoch": 1.4280025267089616, "grad_norm": 0.8437992334365845, "learning_rate": 5.5944191814828174e-06, "loss": 1.346, "mean_token_accuracy": 0.6613343755404154, "num_tokens": 2179961091.0, "step": 12999 }, { "entropy": 1.7473607063293457, "epoch": 1.4281123836203344, "grad_norm": 0.7349570393562317, "learning_rate": 5.593139109209102e-06, "loss": 1.4735, "mean_token_accuracy": 0.6576576034228007, "num_tokens": 2180151428.0, "step": 13000 }, { "entropy": 1.6567772924900055, "epoch": 1.4282222405317073, "grad_norm": 0.7208495736122131, "learning_rate": 5.591859208060091e-06, "loss": 1.2619, "mean_token_accuracy": 0.6733733216921488, "num_tokens": 2180285719.0, "step": 13001 }, { "entropy": 1.7362763285636902, "epoch": 1.4283320974430804, "grad_norm": 0.725077748298645, "learning_rate": 5.590579478076298e-06, "loss": 1.2903, "mean_token_accuracy": 0.6674645096063614, "num_tokens": 2180390288.0, "step": 13002 }, { "entropy": 1.6696288685003917, "epoch": 1.4284419543544533, "grad_norm": 0.5948540568351746, "learning_rate": 5.58929991929823e-06, "loss": 1.4792, "mean_token_accuracy": 0.6409497807423273, "num_tokens": 2180565522.0, "step": 13003 }, { "entropy": 1.703221042950948, "epoch": 1.4285518112658262, "grad_norm": 0.6803807020187378, "learning_rate": 5.5880205317663824e-06, "loss": 1.3171, "mean_token_accuracy": 0.6711372335751852, "num_tokens": 2180680337.0, "step": 13004 }, { "entropy": 1.6649436155954997, "epoch": 1.4286616681771993, "grad_norm": 0.6614540219306946, "learning_rate": 5.586741315521245e-06, "loss": 1.5934, "mean_token_accuracy": 0.62440458436807, "num_tokens": 2180939669.0, "step": 13005 }, { "entropy": 1.6837556461493175, "epoch": 1.4287715250885722, "grad_norm": 0.6564772725105286, "learning_rate": 5.585462270603306e-06, "loss": 1.4291, "mean_token_accuracy": 0.662267878651619, "num_tokens": 2181122104.0, "step": 13006 }, { "entropy": 1.7078391114870708, "epoch": 1.428881381999945, "grad_norm": 0.6484875679016113, "learning_rate": 5.5841833970530425e-06, "loss": 1.3659, "mean_token_accuracy": 0.6654360741376877, "num_tokens": 2181268418.0, "step": 13007 }, { "entropy": 1.6462851862112682, "epoch": 1.428991238911318, "grad_norm": 0.5821180939674377, "learning_rate": 5.58290469491094e-06, "loss": 1.3693, "mean_token_accuracy": 0.6470950643221537, "num_tokens": 2181533426.0, "step": 13008 }, { "entropy": 1.7857622504234314, "epoch": 1.4291010958226908, "grad_norm": 0.7754672169685364, "learning_rate": 5.581626164217461e-06, "loss": 1.4693, "mean_token_accuracy": 0.6310462603966395, "num_tokens": 2181692955.0, "step": 13009 }, { "entropy": 1.6840496559937794, "epoch": 1.429210952734064, "grad_norm": 1.3394370079040527, "learning_rate": 5.58034780501307e-06, "loss": 1.4793, "mean_token_accuracy": 0.6427704244852066, "num_tokens": 2181887005.0, "step": 13010 }, { "entropy": 1.7042312423388164, "epoch": 1.4293208096454368, "grad_norm": 0.6307734847068787, "learning_rate": 5.579069617338229e-06, "loss": 1.4193, "mean_token_accuracy": 0.6560370475053787, "num_tokens": 2182086766.0, "step": 13011 }, { "entropy": 1.7062763075033824, "epoch": 1.4294306665568097, "grad_norm": 0.6449376344680786, "learning_rate": 5.577791601233398e-06, "loss": 1.5444, "mean_token_accuracy": 0.6326194703578949, "num_tokens": 2182269781.0, "step": 13012 }, { "entropy": 1.6698502898216248, "epoch": 1.4295405234681826, "grad_norm": 0.6093115210533142, "learning_rate": 5.576513756739012e-06, "loss": 1.2875, "mean_token_accuracy": 0.6728635678688685, "num_tokens": 2182442382.0, "step": 13013 }, { "entropy": 1.7181947231292725, "epoch": 1.4296503803795555, "grad_norm": 0.682712733745575, "learning_rate": 5.5752360838955215e-06, "loss": 1.3765, "mean_token_accuracy": 0.6576512654622396, "num_tokens": 2182589601.0, "step": 13014 }, { "entropy": 1.6917083462079365, "epoch": 1.4297602372909286, "grad_norm": 0.6454870700836182, "learning_rate": 5.573958582743368e-06, "loss": 1.4754, "mean_token_accuracy": 0.6636495043834051, "num_tokens": 2182779186.0, "step": 13015 }, { "entropy": 1.7364496489365895, "epoch": 1.4298700942023015, "grad_norm": 0.8121635317802429, "learning_rate": 5.572681253322983e-06, "loss": 1.4932, "mean_token_accuracy": 0.6382195055484772, "num_tokens": 2182965939.0, "step": 13016 }, { "entropy": 1.705742100874583, "epoch": 1.4299799511136744, "grad_norm": 0.6007391810417175, "learning_rate": 5.571404095674786e-06, "loss": 1.4925, "mean_token_accuracy": 0.6454195727904638, "num_tokens": 2183198308.0, "step": 13017 }, { "entropy": 1.7828343609968822, "epoch": 1.4300898080250475, "grad_norm": 0.857241690158844, "learning_rate": 5.570127109839205e-06, "loss": 1.5772, "mean_token_accuracy": 0.6316992690165838, "num_tokens": 2183435367.0, "step": 13018 }, { "entropy": 1.719041536251704, "epoch": 1.4301996649364204, "grad_norm": 0.6930078864097595, "learning_rate": 5.568850295856652e-06, "loss": 1.434, "mean_token_accuracy": 0.6500537196795145, "num_tokens": 2183640896.0, "step": 13019 }, { "entropy": 1.6936982572078705, "epoch": 1.4303095218477933, "grad_norm": 0.8050754070281982, "learning_rate": 5.567573653767544e-06, "loss": 1.3243, "mean_token_accuracy": 0.667048583428065, "num_tokens": 2183808495.0, "step": 13020 }, { "entropy": 1.7069650292396545, "epoch": 1.4304193787591661, "grad_norm": 0.7172401547431946, "learning_rate": 5.5662971836122795e-06, "loss": 1.2996, "mean_token_accuracy": 0.6751367499430975, "num_tokens": 2183998179.0, "step": 13021 }, { "entropy": 1.744606077671051, "epoch": 1.430529235670539, "grad_norm": 0.7243566513061523, "learning_rate": 5.56502088543126e-06, "loss": 1.3691, "mean_token_accuracy": 0.6522730439901352, "num_tokens": 2184155317.0, "step": 13022 }, { "entropy": 1.6920949220657349, "epoch": 1.4306390925819121, "grad_norm": 0.5887582898139954, "learning_rate": 5.56374475926488e-06, "loss": 1.4795, "mean_token_accuracy": 0.6456716706355413, "num_tokens": 2184358935.0, "step": 13023 }, { "entropy": 1.7107125322024028, "epoch": 1.430748949493285, "grad_norm": 0.6471151113510132, "learning_rate": 5.562468805153534e-06, "loss": 1.4389, "mean_token_accuracy": 0.6550725599129995, "num_tokens": 2184539771.0, "step": 13024 }, { "entropy": 1.7102607389291127, "epoch": 1.430858806404658, "grad_norm": 0.7383255958557129, "learning_rate": 5.561193023137595e-06, "loss": 1.3653, "mean_token_accuracy": 0.6568672160307566, "num_tokens": 2184693987.0, "step": 13025 }, { "entropy": 1.7418889204661052, "epoch": 1.4309686633160308, "grad_norm": 0.8502252697944641, "learning_rate": 5.559917413257444e-06, "loss": 1.3336, "mean_token_accuracy": 0.6681547611951828, "num_tokens": 2184835284.0, "step": 13026 }, { "entropy": 1.7333911557992299, "epoch": 1.4310785202274037, "grad_norm": 0.669732928276062, "learning_rate": 5.558641975553459e-06, "loss": 1.2936, "mean_token_accuracy": 0.664469505349795, "num_tokens": 2184953631.0, "step": 13027 }, { "entropy": 1.7422559758027394, "epoch": 1.4311883771387768, "grad_norm": 0.7372428178787231, "learning_rate": 5.557366710066006e-06, "loss": 1.4934, "mean_token_accuracy": 0.6541983361045519, "num_tokens": 2185094275.0, "step": 13028 }, { "entropy": 1.6865850885709126, "epoch": 1.4312982340501497, "grad_norm": 0.7553392648696899, "learning_rate": 5.556091616835438e-06, "loss": 1.4829, "mean_token_accuracy": 0.6615019887685776, "num_tokens": 2185230818.0, "step": 13029 }, { "entropy": 1.7182862261931102, "epoch": 1.4314080909615226, "grad_norm": 0.6798596382141113, "learning_rate": 5.554816695902122e-06, "loss": 1.4433, "mean_token_accuracy": 0.6591095378001531, "num_tokens": 2185375644.0, "step": 13030 }, { "entropy": 1.771297464768092, "epoch": 1.4315179478728957, "grad_norm": 0.7370211482048035, "learning_rate": 5.5535419473064015e-06, "loss": 1.4662, "mean_token_accuracy": 0.6467802077531815, "num_tokens": 2185563893.0, "step": 13031 }, { "entropy": 1.6960931917031605, "epoch": 1.4316278047842685, "grad_norm": 0.7024471163749695, "learning_rate": 5.552267371088626e-06, "loss": 1.5382, "mean_token_accuracy": 0.6351848443349203, "num_tokens": 2185756274.0, "step": 13032 }, { "entropy": 1.7461300293604534, "epoch": 1.4317376616956414, "grad_norm": 0.7345073223114014, "learning_rate": 5.550992967289134e-06, "loss": 1.3774, "mean_token_accuracy": 0.6503315269947052, "num_tokens": 2185906994.0, "step": 13033 }, { "entropy": 1.7313400208950043, "epoch": 1.4318475186070143, "grad_norm": 0.6339378952980042, "learning_rate": 5.549718735948255e-06, "loss": 1.4133, "mean_token_accuracy": 0.6485500335693359, "num_tokens": 2186094480.0, "step": 13034 }, { "entropy": 1.6995634138584137, "epoch": 1.4319573755183872, "grad_norm": 0.6970425248146057, "learning_rate": 5.548444677106324e-06, "loss": 1.3832, "mean_token_accuracy": 0.6507101853688558, "num_tokens": 2186266968.0, "step": 13035 }, { "entropy": 1.6785328388214111, "epoch": 1.4320672324297603, "grad_norm": 0.8331180214881897, "learning_rate": 5.547170790803667e-06, "loss": 1.394, "mean_token_accuracy": 0.6583549777666727, "num_tokens": 2186422477.0, "step": 13036 }, { "entropy": 1.7474220593770344, "epoch": 1.4321770893411332, "grad_norm": 0.7904669642448425, "learning_rate": 5.545897077080591e-06, "loss": 1.5484, "mean_token_accuracy": 0.6391499191522598, "num_tokens": 2186658720.0, "step": 13037 }, { "entropy": 1.745592087507248, "epoch": 1.432286946252506, "grad_norm": 0.688362181186676, "learning_rate": 5.544623535977416e-06, "loss": 1.4482, "mean_token_accuracy": 0.6570040633281072, "num_tokens": 2186849614.0, "step": 13038 }, { "entropy": 1.683472563823064, "epoch": 1.4323968031638792, "grad_norm": 0.6357275247573853, "learning_rate": 5.543350167534451e-06, "loss": 1.3715, "mean_token_accuracy": 0.6654830276966095, "num_tokens": 2187013867.0, "step": 13039 }, { "entropy": 1.6940331260363262, "epoch": 1.4325066600752518, "grad_norm": 0.621670126914978, "learning_rate": 5.542076971791994e-06, "loss": 1.3792, "mean_token_accuracy": 0.6595076471567154, "num_tokens": 2187182576.0, "step": 13040 }, { "entropy": 1.7123170693715413, "epoch": 1.432616516986625, "grad_norm": 0.9452431797981262, "learning_rate": 5.5408039487903375e-06, "loss": 1.4066, "mean_token_accuracy": 0.6636346479256948, "num_tokens": 2187310672.0, "step": 13041 }, { "entropy": 1.6607805689175923, "epoch": 1.4327263738979978, "grad_norm": 0.6522439122200012, "learning_rate": 5.5395310985697804e-06, "loss": 1.1924, "mean_token_accuracy": 0.6833398044109344, "num_tokens": 2187434823.0, "step": 13042 }, { "entropy": 1.709082802136739, "epoch": 1.4328362308093707, "grad_norm": 0.6486063003540039, "learning_rate": 5.538258421170599e-06, "loss": 1.4724, "mean_token_accuracy": 0.6308440069357554, "num_tokens": 2187628693.0, "step": 13043 }, { "entropy": 1.6720016201337178, "epoch": 1.4329460877207438, "grad_norm": 0.790734052658081, "learning_rate": 5.5369859166330816e-06, "loss": 1.456, "mean_token_accuracy": 0.6650431652863821, "num_tokens": 2187795256.0, "step": 13044 }, { "entropy": 1.7333543697992961, "epoch": 1.4330559446321167, "grad_norm": 0.6807409524917603, "learning_rate": 5.535713584997498e-06, "loss": 1.4672, "mean_token_accuracy": 0.6574084411064783, "num_tokens": 2187948169.0, "step": 13045 }, { "entropy": 1.7210048735141754, "epoch": 1.4331658015434896, "grad_norm": 0.7557775974273682, "learning_rate": 5.5344414263041145e-06, "loss": 1.3155, "mean_token_accuracy": 0.6705209712187449, "num_tokens": 2188074869.0, "step": 13046 }, { "entropy": 1.679331550995509, "epoch": 1.4332756584548625, "grad_norm": 0.6121296286582947, "learning_rate": 5.5331694405931966e-06, "loss": 1.553, "mean_token_accuracy": 0.629560798406601, "num_tokens": 2188272724.0, "step": 13047 }, { "entropy": 1.6509125630060832, "epoch": 1.4333855153662354, "grad_norm": 0.764921247959137, "learning_rate": 5.531897627905009e-06, "loss": 1.2867, "mean_token_accuracy": 0.6722366611162821, "num_tokens": 2188438097.0, "step": 13048 }, { "entropy": 1.668609122435252, "epoch": 1.4334953722776085, "grad_norm": 0.6842905282974243, "learning_rate": 5.530625988279791e-06, "loss": 1.373, "mean_token_accuracy": 0.6644268482923508, "num_tokens": 2188625138.0, "step": 13049 }, { "entropy": 1.7268520295619965, "epoch": 1.4336052291889814, "grad_norm": 0.7898194789886475, "learning_rate": 5.529354521757796e-06, "loss": 1.3678, "mean_token_accuracy": 0.6659293274084727, "num_tokens": 2188786405.0, "step": 13050 }, { "entropy": 1.6797465880711873, "epoch": 1.4337150861003543, "grad_norm": 0.6213613748550415, "learning_rate": 5.5280832283792685e-06, "loss": 1.2378, "mean_token_accuracy": 0.6838393161694208, "num_tokens": 2188901389.0, "step": 13051 }, { "entropy": 1.749913473924001, "epoch": 1.4338249430117274, "grad_norm": 0.8360404372215271, "learning_rate": 5.52681210818444e-06, "loss": 1.4877, "mean_token_accuracy": 0.6598167518774668, "num_tokens": 2189098778.0, "step": 13052 }, { "entropy": 1.6591697732607524, "epoch": 1.4339347999231002, "grad_norm": 0.736677885055542, "learning_rate": 5.52554116121354e-06, "loss": 1.292, "mean_token_accuracy": 0.6754182428121567, "num_tokens": 2189222964.0, "step": 13053 }, { "entropy": 1.6895016729831696, "epoch": 1.4340446568344731, "grad_norm": 0.6395838260650635, "learning_rate": 5.5242703875067985e-06, "loss": 1.3534, "mean_token_accuracy": 0.6683394263188044, "num_tokens": 2189399202.0, "step": 13054 }, { "entropy": 1.7012076675891876, "epoch": 1.434154513745846, "grad_norm": 0.5661507844924927, "learning_rate": 5.522999787104429e-06, "loss": 1.3654, "mean_token_accuracy": 0.672503188252449, "num_tokens": 2189573162.0, "step": 13055 }, { "entropy": 1.7428718010584514, "epoch": 1.434264370657219, "grad_norm": 0.5942409634590149, "learning_rate": 5.521729360046653e-06, "loss": 1.3816, "mean_token_accuracy": 0.6550378203392029, "num_tokens": 2189775760.0, "step": 13056 }, { "entropy": 1.7421286702156067, "epoch": 1.434374227568592, "grad_norm": 0.6235953569412231, "learning_rate": 5.52045910637367e-06, "loss": 1.5425, "mean_token_accuracy": 0.6408863415320715, "num_tokens": 2189967802.0, "step": 13057 }, { "entropy": 1.763216882944107, "epoch": 1.434484084479965, "grad_norm": 0.7079127430915833, "learning_rate": 5.519189026125684e-06, "loss": 1.3053, "mean_token_accuracy": 0.6678203245004019, "num_tokens": 2190080360.0, "step": 13058 }, { "entropy": 1.6415379345417023, "epoch": 1.4345939413913378, "grad_norm": 0.6366350650787354, "learning_rate": 5.5179191193429015e-06, "loss": 1.2732, "mean_token_accuracy": 0.6822443703810374, "num_tokens": 2190213423.0, "step": 13059 }, { "entropy": 1.66671418150266, "epoch": 1.4347037983027107, "grad_norm": 37.921844482421875, "learning_rate": 5.516649386065508e-06, "loss": 1.3524, "mean_token_accuracy": 0.6727404892444611, "num_tokens": 2190381575.0, "step": 13060 }, { "entropy": 1.6547318299611409, "epoch": 1.4348136552140835, "grad_norm": 0.6079090237617493, "learning_rate": 5.515379826333688e-06, "loss": 1.49, "mean_token_accuracy": 0.6499229669570923, "num_tokens": 2190556977.0, "step": 13061 }, { "entropy": 1.7024961809317272, "epoch": 1.4349235121254567, "grad_norm": 0.664475679397583, "learning_rate": 5.514110440187628e-06, "loss": 1.5275, "mean_token_accuracy": 0.6373623659213384, "num_tokens": 2190773850.0, "step": 13062 }, { "entropy": 1.7388477126757305, "epoch": 1.4350333690368295, "grad_norm": 0.7594343423843384, "learning_rate": 5.5128412276674955e-06, "loss": 1.448, "mean_token_accuracy": 0.6530890514453253, "num_tokens": 2190944059.0, "step": 13063 }, { "entropy": 1.761966496706009, "epoch": 1.4351432259482024, "grad_norm": 0.7008910775184631, "learning_rate": 5.5115721888134695e-06, "loss": 1.5423, "mean_token_accuracy": 0.6320922573407491, "num_tokens": 2191128206.0, "step": 13064 }, { "entropy": 1.7421748340129852, "epoch": 1.4352530828595755, "grad_norm": 0.7401275634765625, "learning_rate": 5.510303323665712e-06, "loss": 1.4024, "mean_token_accuracy": 0.6560095548629761, "num_tokens": 2191241600.0, "step": 13065 }, { "entropy": 1.7223056654135387, "epoch": 1.4353629397709484, "grad_norm": 0.6929873824119568, "learning_rate": 5.509034632264376e-06, "loss": 1.2268, "mean_token_accuracy": 0.6787229428688685, "num_tokens": 2191357856.0, "step": 13066 }, { "entropy": 1.7451776067415874, "epoch": 1.4354727966823213, "grad_norm": 0.7561651468276978, "learning_rate": 5.507766114649622e-06, "loss": 1.445, "mean_token_accuracy": 0.6532369504372278, "num_tokens": 2191496045.0, "step": 13067 }, { "entropy": 1.7426794469356537, "epoch": 1.4355826535936942, "grad_norm": 0.6168361306190491, "learning_rate": 5.506497770861598e-06, "loss": 1.4346, "mean_token_accuracy": 0.6491942703723907, "num_tokens": 2191669650.0, "step": 13068 }, { "entropy": 1.679451435804367, "epoch": 1.435692510505067, "grad_norm": 0.7383124232292175, "learning_rate": 5.50522960094044e-06, "loss": 1.3998, "mean_token_accuracy": 0.6661538481712341, "num_tokens": 2191842559.0, "step": 13069 }, { "entropy": 1.723546991745631, "epoch": 1.4358023674164402, "grad_norm": 0.8791068196296692, "learning_rate": 5.503961604926291e-06, "loss": 1.4383, "mean_token_accuracy": 0.6561855375766754, "num_tokens": 2192029229.0, "step": 13070 }, { "entropy": 1.7081499894460042, "epoch": 1.435912224327813, "grad_norm": 0.7823132872581482, "learning_rate": 5.502693782859282e-06, "loss": 1.4804, "mean_token_accuracy": 0.6432801336050034, "num_tokens": 2192197284.0, "step": 13071 }, { "entropy": 1.7025318245093028, "epoch": 1.436022081239186, "grad_norm": 0.655196487903595, "learning_rate": 5.501426134779538e-06, "loss": 1.3445, "mean_token_accuracy": 0.6672158092260361, "num_tokens": 2192338396.0, "step": 13072 }, { "entropy": 1.7241312563419342, "epoch": 1.4361319381505588, "grad_norm": 0.7512596845626831, "learning_rate": 5.500158660727175e-06, "loss": 1.2377, "mean_token_accuracy": 0.6815748860438665, "num_tokens": 2192441821.0, "step": 13073 }, { "entropy": 1.683451513449351, "epoch": 1.4362417950619317, "grad_norm": 0.6415113210678101, "learning_rate": 5.498891360742316e-06, "loss": 1.3612, "mean_token_accuracy": 0.6760003666083018, "num_tokens": 2192589743.0, "step": 13074 }, { "entropy": 1.6738179723421733, "epoch": 1.4363516519733048, "grad_norm": 0.6926242709159851, "learning_rate": 5.497624234865062e-06, "loss": 1.3332, "mean_token_accuracy": 0.6816667566696802, "num_tokens": 2192738312.0, "step": 13075 }, { "entropy": 1.71518008907636, "epoch": 1.4364615088846777, "grad_norm": 0.6560258865356445, "learning_rate": 5.496357283135526e-06, "loss": 1.5321, "mean_token_accuracy": 0.6293992896874746, "num_tokens": 2192940324.0, "step": 13076 }, { "entropy": 1.6770286560058594, "epoch": 1.4365713657960506, "grad_norm": 0.6862941980361938, "learning_rate": 5.495090505593802e-06, "loss": 1.3098, "mean_token_accuracy": 0.6722962707281113, "num_tokens": 2193087527.0, "step": 13077 }, { "entropy": 1.7614335318406422, "epoch": 1.4366812227074237, "grad_norm": 0.6682018637657166, "learning_rate": 5.49382390227998e-06, "loss": 1.3978, "mean_token_accuracy": 0.6473137189944586, "num_tokens": 2193276538.0, "step": 13078 }, { "entropy": 1.6820653875668843, "epoch": 1.4367910796187966, "grad_norm": 0.6352739334106445, "learning_rate": 5.49255747323415e-06, "loss": 1.3586, "mean_token_accuracy": 0.6693233251571655, "num_tokens": 2193450625.0, "step": 13079 }, { "entropy": 1.7216021815935771, "epoch": 1.4369009365301695, "grad_norm": 0.6901304721832275, "learning_rate": 5.4912912184964e-06, "loss": 1.2759, "mean_token_accuracy": 0.6686844925085703, "num_tokens": 2193553352.0, "step": 13080 }, { "entropy": 1.7992511590321858, "epoch": 1.4370107934415424, "grad_norm": 0.6699839234352112, "learning_rate": 5.490025138106795e-06, "loss": 1.445, "mean_token_accuracy": 0.6500095178683599, "num_tokens": 2193705121.0, "step": 13081 }, { "entropy": 1.7488137980302174, "epoch": 1.4371206503529153, "grad_norm": 0.7710155844688416, "learning_rate": 5.488759232105412e-06, "loss": 1.5234, "mean_token_accuracy": 0.6365531980991364, "num_tokens": 2193866132.0, "step": 13082 }, { "entropy": 1.7383404672145844, "epoch": 1.4372305072642884, "grad_norm": 0.7229591608047485, "learning_rate": 5.487493500532318e-06, "loss": 1.5024, "mean_token_accuracy": 0.6449161618947983, "num_tokens": 2194048183.0, "step": 13083 }, { "entropy": 1.74971208969752, "epoch": 1.4373403641756612, "grad_norm": 0.8270702362060547, "learning_rate": 5.4862279434275716e-06, "loss": 1.5017, "mean_token_accuracy": 0.6444364488124847, "num_tokens": 2194201146.0, "step": 13084 }, { "entropy": 1.710715075333913, "epoch": 1.4374502210870341, "grad_norm": 0.8305548429489136, "learning_rate": 5.484962560831223e-06, "loss": 1.5135, "mean_token_accuracy": 0.6555256595214208, "num_tokens": 2194373810.0, "step": 13085 }, { "entropy": 1.740293820699056, "epoch": 1.437560077998407, "grad_norm": 0.6739172339439392, "learning_rate": 5.483697352783326e-06, "loss": 1.48, "mean_token_accuracy": 0.6560692836840948, "num_tokens": 2194537495.0, "step": 13086 }, { "entropy": 1.687047153711319, "epoch": 1.43766993490978, "grad_norm": 0.6256750226020813, "learning_rate": 5.48243231932392e-06, "loss": 1.3393, "mean_token_accuracy": 0.6665924340486526, "num_tokens": 2194693871.0, "step": 13087 }, { "entropy": 1.6996191541353862, "epoch": 1.437779791821153, "grad_norm": 0.7012233734130859, "learning_rate": 5.481167460493049e-06, "loss": 1.3996, "mean_token_accuracy": 0.653436486919721, "num_tokens": 2194841359.0, "step": 13088 }, { "entropy": 1.683770517508189, "epoch": 1.437889648732526, "grad_norm": 0.7458353042602539, "learning_rate": 5.479902776330739e-06, "loss": 1.2305, "mean_token_accuracy": 0.674822653333346, "num_tokens": 2194979501.0, "step": 13089 }, { "entropy": 1.7308926284313202, "epoch": 1.4379995056438988, "grad_norm": 0.8598765134811401, "learning_rate": 5.478638266877016e-06, "loss": 1.5589, "mean_token_accuracy": 0.6545391033093134, "num_tokens": 2195155093.0, "step": 13090 }, { "entropy": 1.774180034796397, "epoch": 1.4381093625552719, "grad_norm": 0.6300092935562134, "learning_rate": 5.4773739321719055e-06, "loss": 1.4823, "mean_token_accuracy": 0.6337632189194361, "num_tokens": 2195363872.0, "step": 13091 }, { "entropy": 1.686318536599477, "epoch": 1.4382192194666448, "grad_norm": 0.591356098651886, "learning_rate": 5.4761097722554264e-06, "loss": 1.3622, "mean_token_accuracy": 0.6576072623332342, "num_tokens": 2195569789.0, "step": 13092 }, { "entropy": 1.7126056949297588, "epoch": 1.4383290763780177, "grad_norm": 0.6735844016075134, "learning_rate": 5.474845787167578e-06, "loss": 1.433, "mean_token_accuracy": 0.6552617400884628, "num_tokens": 2195731935.0, "step": 13093 }, { "entropy": 1.6593547960122426, "epoch": 1.4384389332893905, "grad_norm": 0.7271912097930908, "learning_rate": 5.47358197694837e-06, "loss": 1.3903, "mean_token_accuracy": 0.6405983914931616, "num_tokens": 2195915850.0, "step": 13094 }, { "entropy": 1.7243566314379375, "epoch": 1.4385487902007634, "grad_norm": 0.7322264909744263, "learning_rate": 5.472318341637805e-06, "loss": 1.3498, "mean_token_accuracy": 0.6661138186852137, "num_tokens": 2196067340.0, "step": 13095 }, { "entropy": 1.7625857293605804, "epoch": 1.4386586471121365, "grad_norm": 0.7507118582725525, "learning_rate": 5.471054881275875e-06, "loss": 1.3823, "mean_token_accuracy": 0.6484930912653605, "num_tokens": 2196190131.0, "step": 13096 }, { "entropy": 1.6906941831111908, "epoch": 1.4387685040235094, "grad_norm": 0.6131132245063782, "learning_rate": 5.4697915959025625e-06, "loss": 1.465, "mean_token_accuracy": 0.6531191219886144, "num_tokens": 2196349851.0, "step": 13097 }, { "entropy": 1.6890461246172588, "epoch": 1.4388783609348823, "grad_norm": 0.6666757464408875, "learning_rate": 5.468528485557858e-06, "loss": 1.312, "mean_token_accuracy": 0.6747940282026926, "num_tokens": 2196519592.0, "step": 13098 }, { "entropy": 1.645443985859553, "epoch": 1.4389882178462552, "grad_norm": 0.617806613445282, "learning_rate": 5.4672655502817315e-06, "loss": 1.3039, "mean_token_accuracy": 0.6676177283128103, "num_tokens": 2196708869.0, "step": 13099 }, { "entropy": 1.728934407234192, "epoch": 1.439098074757628, "grad_norm": 0.6053014993667603, "learning_rate": 5.46600279011416e-06, "loss": 1.4976, "mean_token_accuracy": 0.632400318980217, "num_tokens": 2196936983.0, "step": 13100 }, { "entropy": 1.6949416001637776, "epoch": 1.4392079316690012, "grad_norm": 0.719120442867279, "learning_rate": 5.464740205095106e-06, "loss": 1.4367, "mean_token_accuracy": 0.6461255997419357, "num_tokens": 2197102049.0, "step": 13101 }, { "entropy": 1.6866117616494496, "epoch": 1.439317788580374, "grad_norm": 0.6704388856887817, "learning_rate": 5.463477795264527e-06, "loss": 1.4006, "mean_token_accuracy": 0.6509098261594772, "num_tokens": 2197273218.0, "step": 13102 }, { "entropy": 1.6496765514214833, "epoch": 1.439427645491747, "grad_norm": 0.6062201261520386, "learning_rate": 5.462215560662383e-06, "loss": 1.3943, "mean_token_accuracy": 0.6622524907191595, "num_tokens": 2197423275.0, "step": 13103 }, { "entropy": 1.68293896317482, "epoch": 1.43953750240312, "grad_norm": 0.7043601870536804, "learning_rate": 5.460953501328626e-06, "loss": 1.3067, "mean_token_accuracy": 0.6639659106731415, "num_tokens": 2197540176.0, "step": 13104 }, { "entropy": 1.7241731981436412, "epoch": 1.439647359314493, "grad_norm": 0.810967743396759, "learning_rate": 5.459691617303187e-06, "loss": 1.6757, "mean_token_accuracy": 0.6301688055197397, "num_tokens": 2197770837.0, "step": 13105 }, { "entropy": 1.7493579188982646, "epoch": 1.4397572162258658, "grad_norm": 0.6431688070297241, "learning_rate": 5.458429908626013e-06, "loss": 1.4976, "mean_token_accuracy": 0.6429360012213389, "num_tokens": 2197961177.0, "step": 13106 }, { "entropy": 1.7181467115879059, "epoch": 1.4398670731372387, "grad_norm": 0.8091310262680054, "learning_rate": 5.457168375337039e-06, "loss": 1.4059, "mean_token_accuracy": 0.662377749880155, "num_tokens": 2198144542.0, "step": 13107 }, { "entropy": 1.7004645963509877, "epoch": 1.4399769300486116, "grad_norm": 0.6322354078292847, "learning_rate": 5.455907017476188e-06, "loss": 1.3193, "mean_token_accuracy": 0.6652690172195435, "num_tokens": 2198283451.0, "step": 13108 }, { "entropy": 1.7281455794970195, "epoch": 1.4400867869599847, "grad_norm": 0.7495424747467041, "learning_rate": 5.4546458350833775e-06, "loss": 1.4623, "mean_token_accuracy": 0.6501360982656479, "num_tokens": 2198417040.0, "step": 13109 }, { "entropy": 1.6602643132209778, "epoch": 1.4401966438713576, "grad_norm": 0.6429694294929504, "learning_rate": 5.453384828198532e-06, "loss": 1.5415, "mean_token_accuracy": 0.642531914015611, "num_tokens": 2198623547.0, "step": 13110 }, { "entropy": 1.6596784790356953, "epoch": 1.4403065007827305, "grad_norm": 0.597550630569458, "learning_rate": 5.452123996861554e-06, "loss": 1.5658, "mean_token_accuracy": 0.6343776235977808, "num_tokens": 2198853878.0, "step": 13111 }, { "entropy": 1.6523742377758026, "epoch": 1.4404163576941034, "grad_norm": 0.812057614326477, "learning_rate": 5.4508633411123535e-06, "loss": 1.259, "mean_token_accuracy": 0.6862892160813013, "num_tokens": 2198973379.0, "step": 13112 }, { "entropy": 1.7168854574362438, "epoch": 1.4405262146054763, "grad_norm": 0.6416419148445129, "learning_rate": 5.449602860990828e-06, "loss": 1.4105, "mean_token_accuracy": 0.6478788256645203, "num_tokens": 2199129783.0, "step": 13113 }, { "entropy": 1.6594553391138713, "epoch": 1.4406360715168494, "grad_norm": 0.681231677532196, "learning_rate": 5.448342556536869e-06, "loss": 1.4884, "mean_token_accuracy": 0.639866515994072, "num_tokens": 2199387460.0, "step": 13114 }, { "entropy": 1.6886393030484517, "epoch": 1.4407459284282222, "grad_norm": 0.6803062558174133, "learning_rate": 5.447082427790368e-06, "loss": 1.3325, "mean_token_accuracy": 0.677370235323906, "num_tokens": 2199547310.0, "step": 13115 }, { "entropy": 1.709013928969701, "epoch": 1.4408557853395951, "grad_norm": 0.5610901713371277, "learning_rate": 5.445822474791207e-06, "loss": 1.4096, "mean_token_accuracy": 0.6584896892309189, "num_tokens": 2199735361.0, "step": 13116 }, { "entropy": 1.7390046020348866, "epoch": 1.4409656422509682, "grad_norm": 0.6881232261657715, "learning_rate": 5.444562697579259e-06, "loss": 1.388, "mean_token_accuracy": 0.6504150678714117, "num_tokens": 2199928625.0, "step": 13117 }, { "entropy": 1.7665140330791473, "epoch": 1.4410754991623411, "grad_norm": 0.6938253045082092, "learning_rate": 5.443303096194401e-06, "loss": 1.3693, "mean_token_accuracy": 0.66404556731383, "num_tokens": 2200069167.0, "step": 13118 }, { "entropy": 1.7453898986180623, "epoch": 1.441185356073714, "grad_norm": 0.6774733662605286, "learning_rate": 5.442043670676494e-06, "loss": 1.5307, "mean_token_accuracy": 0.6523572206497192, "num_tokens": 2200247454.0, "step": 13119 }, { "entropy": 1.7234888970851898, "epoch": 1.441295212985087, "grad_norm": 0.6913623213768005, "learning_rate": 5.440784421065402e-06, "loss": 1.2941, "mean_token_accuracy": 0.6711312582095464, "num_tokens": 2200358346.0, "step": 13120 }, { "entropy": 1.6904946466286976, "epoch": 1.4414050698964598, "grad_norm": 0.6734454035758972, "learning_rate": 5.439525347400978e-06, "loss": 1.2959, "mean_token_accuracy": 0.6673662761847178, "num_tokens": 2200517442.0, "step": 13121 }, { "entropy": 1.6776606639226277, "epoch": 1.4415149268078329, "grad_norm": 0.5592838525772095, "learning_rate": 5.438266449723069e-06, "loss": 1.4938, "mean_token_accuracy": 0.634076843659083, "num_tokens": 2200776827.0, "step": 13122 }, { "entropy": 1.661406288544337, "epoch": 1.4416247837192058, "grad_norm": 0.7140949964523315, "learning_rate": 5.437007728071519e-06, "loss": 1.3046, "mean_token_accuracy": 0.6735624670982361, "num_tokens": 2200946844.0, "step": 13123 }, { "entropy": 1.6646969815095265, "epoch": 1.4417346406305787, "grad_norm": 0.6813852190971375, "learning_rate": 5.435749182486175e-06, "loss": 1.4353, "mean_token_accuracy": 0.6477916638056437, "num_tokens": 2201107821.0, "step": 13124 }, { "entropy": 1.6899653573830922, "epoch": 1.4418444975419515, "grad_norm": 0.7384040951728821, "learning_rate": 5.4344908130068566e-06, "loss": 1.3519, "mean_token_accuracy": 0.6741080085436503, "num_tokens": 2201275870.0, "step": 13125 }, { "entropy": 1.6645598411560059, "epoch": 1.4419543544533244, "grad_norm": 0.6502087712287903, "learning_rate": 5.433232619673396e-06, "loss": 1.4388, "mean_token_accuracy": 0.662499854962031, "num_tokens": 2201458482.0, "step": 13126 }, { "entropy": 1.6533268988132477, "epoch": 1.4420642113646975, "grad_norm": 0.728032112121582, "learning_rate": 5.431974602525617e-06, "loss": 1.2845, "mean_token_accuracy": 0.6791494935750961, "num_tokens": 2201632361.0, "step": 13127 }, { "entropy": 1.7500494917233784, "epoch": 1.4421740682760704, "grad_norm": 0.6480644941329956, "learning_rate": 5.430716761603332e-06, "loss": 1.3909, "mean_token_accuracy": 0.6503734489281973, "num_tokens": 2201814325.0, "step": 13128 }, { "entropy": 1.7360434929529827, "epoch": 1.4422839251874433, "grad_norm": 0.7137820720672607, "learning_rate": 5.42945909694635e-06, "loss": 1.4508, "mean_token_accuracy": 0.6532419472932816, "num_tokens": 2201964448.0, "step": 13129 }, { "entropy": 1.6693811416625977, "epoch": 1.4423937820988164, "grad_norm": 0.6493667960166931, "learning_rate": 5.42820160859448e-06, "loss": 1.3158, "mean_token_accuracy": 0.6628794223070145, "num_tokens": 2202094892.0, "step": 13130 }, { "entropy": 1.738204260667165, "epoch": 1.4425036390101893, "grad_norm": 0.8041599988937378, "learning_rate": 5.426944296587515e-06, "loss": 1.6111, "mean_token_accuracy": 0.6330114702383677, "num_tokens": 2202341847.0, "step": 13131 }, { "entropy": 1.6739278137683868, "epoch": 1.4426134959215622, "grad_norm": 0.7445047497749329, "learning_rate": 5.425687160965256e-06, "loss": 1.3705, "mean_token_accuracy": 0.6565392563740412, "num_tokens": 2202489083.0, "step": 13132 }, { "entropy": 1.7505607505639393, "epoch": 1.442723352832935, "grad_norm": 0.6818254590034485, "learning_rate": 5.424430201767486e-06, "loss": 1.3646, "mean_token_accuracy": 0.6577309419711431, "num_tokens": 2202634715.0, "step": 13133 }, { "entropy": 1.6894071300824482, "epoch": 1.442833209744308, "grad_norm": 0.6561980247497559, "learning_rate": 5.423173419033985e-06, "loss": 1.2953, "mean_token_accuracy": 0.6628958334525427, "num_tokens": 2202768025.0, "step": 13134 }, { "entropy": 1.6657811105251312, "epoch": 1.442943066655681, "grad_norm": 0.753799557685852, "learning_rate": 5.4219168128045315e-06, "loss": 1.275, "mean_token_accuracy": 0.6830256134271622, "num_tokens": 2202911105.0, "step": 13135 }, { "entropy": 1.7518598437309265, "epoch": 1.443052923567054, "grad_norm": 0.6313562393188477, "learning_rate": 5.420660383118903e-06, "loss": 1.3652, "mean_token_accuracy": 0.6742727309465408, "num_tokens": 2203082635.0, "step": 13136 }, { "entropy": 1.6960064272085826, "epoch": 1.4431627804784268, "grad_norm": 0.7373610138893127, "learning_rate": 5.419404130016854e-06, "loss": 1.3989, "mean_token_accuracy": 0.6607239097356796, "num_tokens": 2203239727.0, "step": 13137 }, { "entropy": 1.6716348230838776, "epoch": 1.4432726373897997, "grad_norm": 0.7793363928794861, "learning_rate": 5.41814805353815e-06, "loss": 1.484, "mean_token_accuracy": 0.6447274684906006, "num_tokens": 2203425558.0, "step": 13138 }, { "entropy": 1.6881347199281056, "epoch": 1.4433824943011726, "grad_norm": 0.6784332394599915, "learning_rate": 5.416892153722548e-06, "loss": 1.3327, "mean_token_accuracy": 0.6610169510046641, "num_tokens": 2203588235.0, "step": 13139 }, { "entropy": 1.752338171005249, "epoch": 1.4434923512125457, "grad_norm": 0.8655160665512085, "learning_rate": 5.415636430609792e-06, "loss": 1.2634, "mean_token_accuracy": 0.6677990953127543, "num_tokens": 2203720757.0, "step": 13140 }, { "entropy": 1.705585926771164, "epoch": 1.4436022081239186, "grad_norm": 0.6548242568969727, "learning_rate": 5.414380884239625e-06, "loss": 1.5122, "mean_token_accuracy": 0.64339513083299, "num_tokens": 2203891963.0, "step": 13141 }, { "entropy": 1.7506338755289714, "epoch": 1.4437120650352915, "grad_norm": 0.6873526573181152, "learning_rate": 5.413125514651789e-06, "loss": 1.3258, "mean_token_accuracy": 0.6546006848414739, "num_tokens": 2204046733.0, "step": 13142 }, { "entropy": 1.6650571823120117, "epoch": 1.4438219219466646, "grad_norm": 0.8502957820892334, "learning_rate": 5.411870321886009e-06, "loss": 1.4231, "mean_token_accuracy": 0.6577673802773157, "num_tokens": 2204230937.0, "step": 13143 }, { "entropy": 1.7141657968362172, "epoch": 1.4439317788580375, "grad_norm": 0.6910656690597534, "learning_rate": 5.410615305982019e-06, "loss": 1.4016, "mean_token_accuracy": 0.6469135781129202, "num_tokens": 2204437250.0, "step": 13144 }, { "entropy": 1.6625679234663646, "epoch": 1.4440416357694104, "grad_norm": 0.6473484039306641, "learning_rate": 5.409360466979537e-06, "loss": 1.3574, "mean_token_accuracy": 0.6670989692211151, "num_tokens": 2204612238.0, "step": 13145 }, { "entropy": 1.713254948457082, "epoch": 1.4441514926807832, "grad_norm": 0.6853258013725281, "learning_rate": 5.408105804918271e-06, "loss": 1.3635, "mean_token_accuracy": 0.6598356068134308, "num_tokens": 2204763539.0, "step": 13146 }, { "entropy": 1.7004669805367787, "epoch": 1.4442613495921561, "grad_norm": 0.7649045586585999, "learning_rate": 5.406851319837938e-06, "loss": 1.4385, "mean_token_accuracy": 0.6565716167291006, "num_tokens": 2204909107.0, "step": 13147 }, { "entropy": 1.658108522494634, "epoch": 1.4443712065035292, "grad_norm": 0.6900636553764343, "learning_rate": 5.405597011778248e-06, "loss": 1.3316, "mean_token_accuracy": 0.6711122145255407, "num_tokens": 2205065286.0, "step": 13148 }, { "entropy": 1.7093073030312855, "epoch": 1.4444810634149021, "grad_norm": 0.7754839658737183, "learning_rate": 5.404342880778883e-06, "loss": 1.2241, "mean_token_accuracy": 0.6803254435459772, "num_tokens": 2205185259.0, "step": 13149 }, { "entropy": 1.743496169646581, "epoch": 1.444590920326275, "grad_norm": 0.7815446853637695, "learning_rate": 5.403088926879546e-06, "loss": 1.4544, "mean_token_accuracy": 0.6503811130921046, "num_tokens": 2205326720.0, "step": 13150 }, { "entropy": 1.7162681818008423, "epoch": 1.444700777237648, "grad_norm": 0.6339192986488342, "learning_rate": 5.401835150119925e-06, "loss": 1.4343, "mean_token_accuracy": 0.6413728495438894, "num_tokens": 2205487610.0, "step": 13151 }, { "entropy": 1.7141146957874298, "epoch": 1.4448106341490208, "grad_norm": 0.6438986659049988, "learning_rate": 5.400581550539699e-06, "loss": 1.4737, "mean_token_accuracy": 0.655816008647283, "num_tokens": 2205642144.0, "step": 13152 }, { "entropy": 1.7310173114140828, "epoch": 1.4449204910603939, "grad_norm": 0.7602280974388123, "learning_rate": 5.3993281281785415e-06, "loss": 1.4238, "mean_token_accuracy": 0.6691502779722214, "num_tokens": 2205789686.0, "step": 13153 }, { "entropy": 1.720057229200999, "epoch": 1.4450303479717668, "grad_norm": 0.6413891911506653, "learning_rate": 5.398074883076127e-06, "loss": 1.3896, "mean_token_accuracy": 0.6561499089002609, "num_tokens": 2205968385.0, "step": 13154 }, { "entropy": 1.6746338705221813, "epoch": 1.4451402048831397, "grad_norm": 0.6567736864089966, "learning_rate": 5.396821815272115e-06, "loss": 1.3772, "mean_token_accuracy": 0.6559559206167856, "num_tokens": 2206140553.0, "step": 13155 }, { "entropy": 1.7500001788139343, "epoch": 1.4452500617945128, "grad_norm": 0.7306400537490845, "learning_rate": 5.395568924806171e-06, "loss": 1.3747, "mean_token_accuracy": 0.6527500202258428, "num_tokens": 2206310921.0, "step": 13156 }, { "entropy": 1.7621792455514271, "epoch": 1.4453599187058856, "grad_norm": 0.6839621067047119, "learning_rate": 5.394316211717945e-06, "loss": 1.3053, "mean_token_accuracy": 0.6592706839243571, "num_tokens": 2206415388.0, "step": 13157 }, { "entropy": 1.6396544377009075, "epoch": 1.4454697756172585, "grad_norm": 0.64600670337677, "learning_rate": 5.393063676047083e-06, "loss": 1.2404, "mean_token_accuracy": 0.6779094239075979, "num_tokens": 2206534812.0, "step": 13158 }, { "entropy": 1.6962849795818329, "epoch": 1.4455796325286314, "grad_norm": 0.7013195157051086, "learning_rate": 5.391811317833229e-06, "loss": 1.3592, "mean_token_accuracy": 0.6624196718136469, "num_tokens": 2206682279.0, "step": 13159 }, { "entropy": 1.7327560285727184, "epoch": 1.4456894894400043, "grad_norm": 0.5949588418006897, "learning_rate": 5.390559137116025e-06, "loss": 1.4016, "mean_token_accuracy": 0.6511549949645996, "num_tokens": 2206866260.0, "step": 13160 }, { "entropy": 1.7158630589644115, "epoch": 1.4457993463513774, "grad_norm": 0.6915740370750427, "learning_rate": 5.38930713393509e-06, "loss": 1.463, "mean_token_accuracy": 0.6487186849117279, "num_tokens": 2207049927.0, "step": 13161 }, { "entropy": 1.675972153743108, "epoch": 1.4459092032627503, "grad_norm": 0.622731626033783, "learning_rate": 5.388055308330057e-06, "loss": 1.4674, "mean_token_accuracy": 0.6402994245290756, "num_tokens": 2207239023.0, "step": 13162 }, { "entropy": 1.749548574288686, "epoch": 1.4460190601741232, "grad_norm": 0.776073694229126, "learning_rate": 5.386803660340547e-06, "loss": 1.4152, "mean_token_accuracy": 0.6454198757807413, "num_tokens": 2207381870.0, "step": 13163 }, { "entropy": 1.618944267431895, "epoch": 1.446128917085496, "grad_norm": 0.592343807220459, "learning_rate": 5.3855521900061725e-06, "loss": 1.4439, "mean_token_accuracy": 0.6512120515108109, "num_tokens": 2207569716.0, "step": 13164 }, { "entropy": 1.6773555179437, "epoch": 1.446238773996869, "grad_norm": 0.7094241380691528, "learning_rate": 5.384300897366537e-06, "loss": 1.3302, "mean_token_accuracy": 0.6653115550676981, "num_tokens": 2207697714.0, "step": 13165 }, { "entropy": 1.7085080246130626, "epoch": 1.446348630908242, "grad_norm": 0.7474088072776794, "learning_rate": 5.383049782461251e-06, "loss": 1.5104, "mean_token_accuracy": 0.6549822489420573, "num_tokens": 2207875635.0, "step": 13166 }, { "entropy": 1.6417442560195923, "epoch": 1.446458487819615, "grad_norm": 0.7819391489028931, "learning_rate": 5.3817988453299064e-06, "loss": 1.2799, "mean_token_accuracy": 0.6651287525892258, "num_tokens": 2208032382.0, "step": 13167 }, { "entropy": 1.7218000292778015, "epoch": 1.4465683447309878, "grad_norm": 0.7218011021614075, "learning_rate": 5.380548086012099e-06, "loss": 1.4495, "mean_token_accuracy": 0.6531488001346588, "num_tokens": 2208182762.0, "step": 13168 }, { "entropy": 1.710701008637746, "epoch": 1.446678201642361, "grad_norm": 1.3408112525939941, "learning_rate": 5.379297504547412e-06, "loss": 1.1826, "mean_token_accuracy": 0.6656645238399506, "num_tokens": 2208344898.0, "step": 13169 }, { "entropy": 1.6560823222001393, "epoch": 1.4467880585537338, "grad_norm": 0.6145971417427063, "learning_rate": 5.378047100975424e-06, "loss": 1.3074, "mean_token_accuracy": 0.6633361180623373, "num_tokens": 2208515722.0, "step": 13170 }, { "entropy": 1.6993583242098491, "epoch": 1.4468979154651067, "grad_norm": 0.8017585277557373, "learning_rate": 5.376796875335713e-06, "loss": 1.5519, "mean_token_accuracy": 0.6469237754742304, "num_tokens": 2208708203.0, "step": 13171 }, { "entropy": 1.7376844882965088, "epoch": 1.4470077723764796, "grad_norm": 0.6851217150688171, "learning_rate": 5.375546827667851e-06, "loss": 1.5577, "mean_token_accuracy": 0.6304005980491638, "num_tokens": 2208910762.0, "step": 13172 }, { "entropy": 1.7630626857280731, "epoch": 1.4471176292878525, "grad_norm": 0.6860126256942749, "learning_rate": 5.3742969580113915e-06, "loss": 1.4536, "mean_token_accuracy": 0.6431644906600317, "num_tokens": 2209114237.0, "step": 13173 }, { "entropy": 1.668500433365504, "epoch": 1.4472274861992256, "grad_norm": 0.7579459547996521, "learning_rate": 5.3730472664059e-06, "loss": 1.37, "mean_token_accuracy": 0.6733713150024414, "num_tokens": 2209269635.0, "step": 13174 }, { "entropy": 1.6985759337743123, "epoch": 1.4473373431105985, "grad_norm": 0.7717159986495972, "learning_rate": 5.371797752890928e-06, "loss": 1.4412, "mean_token_accuracy": 0.6420165300369263, "num_tokens": 2209444154.0, "step": 13175 }, { "entropy": 1.6884556114673615, "epoch": 1.4474472000219714, "grad_norm": 0.7163543701171875, "learning_rate": 5.370548417506023e-06, "loss": 1.3367, "mean_token_accuracy": 0.6587778131167094, "num_tokens": 2209553018.0, "step": 13176 }, { "entropy": 1.6677973469098408, "epoch": 1.4475570569333442, "grad_norm": 0.5317003130912781, "learning_rate": 5.369299260290723e-06, "loss": 1.5134, "mean_token_accuracy": 0.636811763048172, "num_tokens": 2209829266.0, "step": 13177 }, { "entropy": 1.7124519248803456, "epoch": 1.4476669138447171, "grad_norm": 0.704308271408081, "learning_rate": 5.3680502812845606e-06, "loss": 1.4137, "mean_token_accuracy": 0.6537395964066187, "num_tokens": 2209975703.0, "step": 13178 }, { "entropy": 1.6899917125701904, "epoch": 1.4477767707560902, "grad_norm": 0.7196908593177795, "learning_rate": 5.366801480527068e-06, "loss": 1.3503, "mean_token_accuracy": 0.6669531762599945, "num_tokens": 2210131799.0, "step": 13179 }, { "entropy": 1.726241260766983, "epoch": 1.4478866276674631, "grad_norm": 0.6660773158073425, "learning_rate": 5.3655528580577785e-06, "loss": 1.4985, "mean_token_accuracy": 0.6414483537276586, "num_tokens": 2210311687.0, "step": 13180 }, { "entropy": 1.7482622861862183, "epoch": 1.447996484578836, "grad_norm": 0.6919118762016296, "learning_rate": 5.364304413916195e-06, "loss": 1.3277, "mean_token_accuracy": 0.6661824136972427, "num_tokens": 2210497399.0, "step": 13181 }, { "entropy": 1.6985487540562947, "epoch": 1.4481063414902091, "grad_norm": 0.7943996787071228, "learning_rate": 5.363056148141838e-06, "loss": 1.2813, "mean_token_accuracy": 0.6696479817231497, "num_tokens": 2210622604.0, "step": 13182 }, { "entropy": 1.6598475178082783, "epoch": 1.448216198401582, "grad_norm": 0.6746430397033691, "learning_rate": 5.361808060774216e-06, "loss": 1.3158, "mean_token_accuracy": 0.6594479928414027, "num_tokens": 2210777670.0, "step": 13183 }, { "entropy": 1.7671760121981304, "epoch": 1.4483260553129549, "grad_norm": 0.7635937333106995, "learning_rate": 5.360560151852828e-06, "loss": 1.4199, "mean_token_accuracy": 0.6667883445819219, "num_tokens": 2210926645.0, "step": 13184 }, { "entropy": 1.690992146730423, "epoch": 1.4484359122243278, "grad_norm": 0.7546091675758362, "learning_rate": 5.359312421417168e-06, "loss": 1.5281, "mean_token_accuracy": 0.6310638238986334, "num_tokens": 2211176157.0, "step": 13185 }, { "entropy": 1.6956369777520497, "epoch": 1.4485457691357007, "grad_norm": 0.6036320328712463, "learning_rate": 5.358064869506731e-06, "loss": 1.469, "mean_token_accuracy": 0.6401687761147817, "num_tokens": 2211393488.0, "step": 13186 }, { "entropy": 1.6649706959724426, "epoch": 1.4486556260470738, "grad_norm": 0.6010493040084839, "learning_rate": 5.356817496160994e-06, "loss": 1.3602, "mean_token_accuracy": 0.6572969208161036, "num_tokens": 2211585446.0, "step": 13187 }, { "entropy": 1.7027298510074615, "epoch": 1.4487654829584466, "grad_norm": 0.7195990681648254, "learning_rate": 5.355570301419446e-06, "loss": 1.3763, "mean_token_accuracy": 0.6585088024536768, "num_tokens": 2211740010.0, "step": 13188 }, { "entropy": 1.6455882887045543, "epoch": 1.4488753398698195, "grad_norm": 0.7932417392730713, "learning_rate": 5.354323285321552e-06, "loss": 1.3226, "mean_token_accuracy": 0.6679457773764929, "num_tokens": 2211907834.0, "step": 13189 }, { "entropy": 1.6916816929976146, "epoch": 1.4489851967811924, "grad_norm": 2.6084368228912354, "learning_rate": 5.3530764479067795e-06, "loss": 1.1454, "mean_token_accuracy": 0.6937383910020193, "num_tokens": 2212070495.0, "step": 13190 }, { "entropy": 1.6980760792891185, "epoch": 1.4490950536925653, "grad_norm": 0.7769445776939392, "learning_rate": 5.3518297892145955e-06, "loss": 1.463, "mean_token_accuracy": 0.6506317506233851, "num_tokens": 2212241884.0, "step": 13191 }, { "entropy": 1.7203065752983093, "epoch": 1.4492049106039384, "grad_norm": 0.671053409576416, "learning_rate": 5.350583309284456e-06, "loss": 1.4886, "mean_token_accuracy": 0.6554620762666067, "num_tokens": 2212416939.0, "step": 13192 }, { "entropy": 1.6941389739513397, "epoch": 1.4493147675153113, "grad_norm": 0.6860373616218567, "learning_rate": 5.349337008155805e-06, "loss": 1.2588, "mean_token_accuracy": 0.6677055060863495, "num_tokens": 2212572265.0, "step": 13193 }, { "entropy": 1.7378354767958324, "epoch": 1.4494246244266842, "grad_norm": 0.7001467943191528, "learning_rate": 5.348090885868091e-06, "loss": 1.4168, "mean_token_accuracy": 0.6458163360754648, "num_tokens": 2212720904.0, "step": 13194 }, { "entropy": 1.7072244087855022, "epoch": 1.4495344813380573, "grad_norm": 0.6630334854125977, "learning_rate": 5.346844942460756e-06, "loss": 1.2521, "mean_token_accuracy": 0.6737230718135834, "num_tokens": 2212833669.0, "step": 13195 }, { "entropy": 1.6764145195484161, "epoch": 1.4496443382494302, "grad_norm": 0.6521428823471069, "learning_rate": 5.345599177973233e-06, "loss": 1.3123, "mean_token_accuracy": 0.6774703562259674, "num_tokens": 2213000587.0, "step": 13196 }, { "entropy": 1.767681509256363, "epoch": 1.449754195160803, "grad_norm": 0.7817487120628357, "learning_rate": 5.344353592444943e-06, "loss": 1.2971, "mean_token_accuracy": 0.6568180421988169, "num_tokens": 2213145438.0, "step": 13197 }, { "entropy": 1.663481096426646, "epoch": 1.449864052072176, "grad_norm": 0.5956518650054932, "learning_rate": 5.3431081859153174e-06, "loss": 1.3152, "mean_token_accuracy": 0.6751365313927332, "num_tokens": 2213305808.0, "step": 13198 }, { "entropy": 1.6766654352347057, "epoch": 1.4499739089835488, "grad_norm": 0.6852260231971741, "learning_rate": 5.341862958423765e-06, "loss": 1.3912, "mean_token_accuracy": 0.6689743250608444, "num_tokens": 2213454974.0, "step": 13199 }, { "entropy": 1.7198993066946666, "epoch": 1.450083765894922, "grad_norm": 0.7866743206977844, "learning_rate": 5.340617910009705e-06, "loss": 1.4372, "mean_token_accuracy": 0.6474678864081701, "num_tokens": 2213626936.0, "step": 13200 }, { "entropy": 1.6825834314028423, "epoch": 1.4501936228062948, "grad_norm": 0.8059370517730713, "learning_rate": 5.3393730407125365e-06, "loss": 1.2714, "mean_token_accuracy": 0.6706172774235407, "num_tokens": 2213775434.0, "step": 13201 }, { "entropy": 1.6637190183003743, "epoch": 1.4503034797176677, "grad_norm": 14.398298263549805, "learning_rate": 5.338128350571659e-06, "loss": 1.3967, "mean_token_accuracy": 0.6558897644281387, "num_tokens": 2214009223.0, "step": 13202 }, { "entropy": 1.7083501815795898, "epoch": 1.4504133366290406, "grad_norm": 0.7195196747779846, "learning_rate": 5.336883839626466e-06, "loss": 1.4648, "mean_token_accuracy": 0.6481581528981527, "num_tokens": 2214193907.0, "step": 13203 }, { "entropy": 1.708481788635254, "epoch": 1.4505231935404135, "grad_norm": 0.815433919429779, "learning_rate": 5.335639507916354e-06, "loss": 1.4208, "mean_token_accuracy": 0.6755526115496954, "num_tokens": 2214358384.0, "step": 13204 }, { "entropy": 1.7456343571345012, "epoch": 1.4506330504517866, "grad_norm": 0.653662919998169, "learning_rate": 5.334395355480692e-06, "loss": 1.4671, "mean_token_accuracy": 0.6519175618886948, "num_tokens": 2214500150.0, "step": 13205 }, { "entropy": 1.683765749136607, "epoch": 1.4507429073631595, "grad_norm": 1.1602147817611694, "learning_rate": 5.333151382358867e-06, "loss": 1.3494, "mean_token_accuracy": 0.6610773354768753, "num_tokens": 2214625247.0, "step": 13206 }, { "entropy": 1.7299201289812725, "epoch": 1.4508527642745324, "grad_norm": 0.6895291805267334, "learning_rate": 5.331907588590248e-06, "loss": 1.4314, "mean_token_accuracy": 0.6494416346152624, "num_tokens": 2214811498.0, "step": 13207 }, { "entropy": 1.658159464597702, "epoch": 1.4509626211859055, "grad_norm": 0.6312925219535828, "learning_rate": 5.3306639742142015e-06, "loss": 1.36, "mean_token_accuracy": 0.6664116680622101, "num_tokens": 2214974047.0, "step": 13208 }, { "entropy": 1.7353888948758442, "epoch": 1.4510724780972784, "grad_norm": 0.6342319846153259, "learning_rate": 5.329420539270082e-06, "loss": 1.3164, "mean_token_accuracy": 0.6630458980798721, "num_tokens": 2215193942.0, "step": 13209 }, { "entropy": 1.7596177558104198, "epoch": 1.4511823350086512, "grad_norm": 0.6443141102790833, "learning_rate": 5.328177283797249e-06, "loss": 1.5036, "mean_token_accuracy": 0.6435278157393137, "num_tokens": 2215360444.0, "step": 13210 }, { "entropy": 1.6973899205525715, "epoch": 1.4512921919200241, "grad_norm": 0.6556523442268372, "learning_rate": 5.3269342078350465e-06, "loss": 1.4033, "mean_token_accuracy": 0.6569543530543646, "num_tokens": 2215502761.0, "step": 13211 }, { "entropy": 1.7224473754564922, "epoch": 1.451402048831397, "grad_norm": 0.6724802851676941, "learning_rate": 5.325691311422824e-06, "loss": 1.4342, "mean_token_accuracy": 0.6421651244163513, "num_tokens": 2215672804.0, "step": 13212 }, { "entropy": 1.6691329777240753, "epoch": 1.4515119057427701, "grad_norm": 0.6420386433601379, "learning_rate": 5.324448594599914e-06, "loss": 1.5087, "mean_token_accuracy": 0.6413849592208862, "num_tokens": 2215874049.0, "step": 13213 }, { "entropy": 1.624968518813451, "epoch": 1.451621762654143, "grad_norm": 0.7023099064826965, "learning_rate": 5.323206057405645e-06, "loss": 1.3814, "mean_token_accuracy": 0.6555198530356089, "num_tokens": 2216024759.0, "step": 13214 }, { "entropy": 1.6852657397588093, "epoch": 1.4517316195655159, "grad_norm": 0.7389397621154785, "learning_rate": 5.321963699879347e-06, "loss": 1.4723, "mean_token_accuracy": 0.646138941248258, "num_tokens": 2216188508.0, "step": 13215 }, { "entropy": 1.6744611859321594, "epoch": 1.4518414764768888, "grad_norm": 0.6439229846000671, "learning_rate": 5.320721522060346e-06, "loss": 1.4371, "mean_token_accuracy": 0.6564580400784811, "num_tokens": 2216352452.0, "step": 13216 }, { "entropy": 1.683128794034322, "epoch": 1.4519513333882617, "grad_norm": 0.7556068301200867, "learning_rate": 5.319479523987943e-06, "loss": 1.2867, "mean_token_accuracy": 0.6663307448228201, "num_tokens": 2216522725.0, "step": 13217 }, { "entropy": 1.7548390924930573, "epoch": 1.4520611902996348, "grad_norm": 0.6996464133262634, "learning_rate": 5.318237705701451e-06, "loss": 1.3232, "mean_token_accuracy": 0.6729239821434021, "num_tokens": 2216664562.0, "step": 13218 }, { "entropy": 1.7093205749988556, "epoch": 1.4521710472110076, "grad_norm": 0.7138844728469849, "learning_rate": 5.316996067240181e-06, "loss": 1.2359, "mean_token_accuracy": 0.6787795623143514, "num_tokens": 2216772838.0, "step": 13219 }, { "entropy": 1.6784123480319977, "epoch": 1.4522809041223805, "grad_norm": 0.6939015984535217, "learning_rate": 5.3157546086434245e-06, "loss": 1.2327, "mean_token_accuracy": 0.6807336856921514, "num_tokens": 2216919138.0, "step": 13220 }, { "entropy": 1.7018550237019856, "epoch": 1.4523907610337536, "grad_norm": 0.8771721124649048, "learning_rate": 5.314513329950469e-06, "loss": 1.3378, "mean_token_accuracy": 0.6704970449209213, "num_tokens": 2217068295.0, "step": 13221 }, { "entropy": 1.7389337023099263, "epoch": 1.4525006179451265, "grad_norm": 0.6254299879074097, "learning_rate": 5.313272231200609e-06, "loss": 1.4183, "mean_token_accuracy": 0.6511034518480301, "num_tokens": 2217280813.0, "step": 13222 }, { "entropy": 1.783752590417862, "epoch": 1.4526104748564994, "grad_norm": 0.6950295567512512, "learning_rate": 5.312031312433117e-06, "loss": 1.3607, "mean_token_accuracy": 0.6620252877473831, "num_tokens": 2217419842.0, "step": 13223 }, { "entropy": 1.7052730023860931, "epoch": 1.4527203317678723, "grad_norm": 0.6519191861152649, "learning_rate": 5.3107905736872745e-06, "loss": 1.4891, "mean_token_accuracy": 0.6575401375691096, "num_tokens": 2217599361.0, "step": 13224 }, { "entropy": 1.7535746296246846, "epoch": 1.4528301886792452, "grad_norm": 0.8356174826622009, "learning_rate": 5.309550015002346e-06, "loss": 1.2754, "mean_token_accuracy": 0.6758607228597006, "num_tokens": 2217738156.0, "step": 13225 }, { "entropy": 1.692326823870341, "epoch": 1.4529400455906183, "grad_norm": 0.6344167590141296, "learning_rate": 5.308309636417593e-06, "loss": 1.4251, "mean_token_accuracy": 0.6521053711573283, "num_tokens": 2217913111.0, "step": 13226 }, { "entropy": 1.68477068344752, "epoch": 1.4530499025019912, "grad_norm": 0.722823977470398, "learning_rate": 5.307069437972274e-06, "loss": 1.3475, "mean_token_accuracy": 0.6687405457099279, "num_tokens": 2218072811.0, "step": 13227 }, { "entropy": 1.6299297511577606, "epoch": 1.453159759413364, "grad_norm": 0.7247095704078674, "learning_rate": 5.305829419705648e-06, "loss": 1.2793, "mean_token_accuracy": 0.6687569071849188, "num_tokens": 2218206884.0, "step": 13228 }, { "entropy": 1.726058046023051, "epoch": 1.4532696163247372, "grad_norm": 0.6222012639045715, "learning_rate": 5.30458958165695e-06, "loss": 1.4566, "mean_token_accuracy": 0.6559246480464935, "num_tokens": 2218388967.0, "step": 13229 }, { "entropy": 1.7035260498523712, "epoch": 1.4533794732361098, "grad_norm": 0.6351275444030762, "learning_rate": 5.303349923865425e-06, "loss": 1.4242, "mean_token_accuracy": 0.6489528665939966, "num_tokens": 2218568637.0, "step": 13230 }, { "entropy": 1.6851453681786854, "epoch": 1.453489330147483, "grad_norm": 0.7717143297195435, "learning_rate": 5.30211044637031e-06, "loss": 1.2123, "mean_token_accuracy": 0.6865204274654388, "num_tokens": 2218672479.0, "step": 13231 }, { "entropy": 1.6864939232667286, "epoch": 1.4535991870588558, "grad_norm": 0.7055935859680176, "learning_rate": 5.300871149210833e-06, "loss": 1.3668, "mean_token_accuracy": 0.6618408660093943, "num_tokens": 2218894075.0, "step": 13232 }, { "entropy": 1.651742806037267, "epoch": 1.4537090439702287, "grad_norm": 0.7507491707801819, "learning_rate": 5.299632032426213e-06, "loss": 1.3153, "mean_token_accuracy": 0.6831634243329366, "num_tokens": 2219006787.0, "step": 13233 }, { "entropy": 1.772430956363678, "epoch": 1.4538189008816018, "grad_norm": 0.7279871702194214, "learning_rate": 5.298393096055674e-06, "loss": 1.4669, "mean_token_accuracy": 0.6384557783603668, "num_tokens": 2219193713.0, "step": 13234 }, { "entropy": 1.6615471144517262, "epoch": 1.4539287577929747, "grad_norm": 0.7124606966972351, "learning_rate": 5.297154340138419e-06, "loss": 1.5806, "mean_token_accuracy": 0.6216800361871719, "num_tokens": 2219441667.0, "step": 13235 }, { "entropy": 1.6834536989529927, "epoch": 1.4540386147043476, "grad_norm": 0.6642992496490479, "learning_rate": 5.295915764713666e-06, "loss": 1.22, "mean_token_accuracy": 0.673203244805336, "num_tokens": 2219605960.0, "step": 13236 }, { "entropy": 1.7114764948685963, "epoch": 1.4541484716157205, "grad_norm": 0.6740455627441406, "learning_rate": 5.294677369820605e-06, "loss": 1.433, "mean_token_accuracy": 0.6435778339703878, "num_tokens": 2219763478.0, "step": 13237 }, { "entropy": 1.66608660419782, "epoch": 1.4542583285270934, "grad_norm": 0.6613836288452148, "learning_rate": 5.293439155498435e-06, "loss": 1.507, "mean_token_accuracy": 0.6401470750570297, "num_tokens": 2219939231.0, "step": 13238 }, { "entropy": 1.7152255574862163, "epoch": 1.4543681854384665, "grad_norm": 0.799233615398407, "learning_rate": 5.292201121786345e-06, "loss": 1.3541, "mean_token_accuracy": 0.6578367203474045, "num_tokens": 2220093449.0, "step": 13239 }, { "entropy": 1.6978593568007152, "epoch": 1.4544780423498394, "grad_norm": 0.6676912903785706, "learning_rate": 5.290963268723517e-06, "loss": 1.367, "mean_token_accuracy": 0.6562477846940359, "num_tokens": 2220241070.0, "step": 13240 }, { "entropy": 1.726622184117635, "epoch": 1.4545878992612122, "grad_norm": 0.804278552532196, "learning_rate": 5.289725596349128e-06, "loss": 1.3472, "mean_token_accuracy": 0.6571770707766215, "num_tokens": 2220390433.0, "step": 13241 }, { "entropy": 1.6938590904076893, "epoch": 1.4546977561725853, "grad_norm": 0.6081349849700928, "learning_rate": 5.2884881047023516e-06, "loss": 1.4959, "mean_token_accuracy": 0.64292544623216, "num_tokens": 2220584496.0, "step": 13242 }, { "entropy": 1.6884879171848297, "epoch": 1.454807613083958, "grad_norm": 0.7846350073814392, "learning_rate": 5.287250793822352e-06, "loss": 1.4016, "mean_token_accuracy": 0.6731372624635696, "num_tokens": 2220768356.0, "step": 13243 }, { "entropy": 1.6507751047611237, "epoch": 1.4549174699953311, "grad_norm": 0.5410248637199402, "learning_rate": 5.286013663748292e-06, "loss": 1.4474, "mean_token_accuracy": 0.6416066288948059, "num_tokens": 2220995309.0, "step": 13244 }, { "entropy": 1.7409183184305828, "epoch": 1.455027326906704, "grad_norm": 0.7013614773750305, "learning_rate": 5.284776714519326e-06, "loss": 1.4582, "mean_token_accuracy": 0.6478712111711502, "num_tokens": 2221198221.0, "step": 13245 }, { "entropy": 1.7032929261525471, "epoch": 1.4551371838180769, "grad_norm": 0.6887391209602356, "learning_rate": 5.2835399461745965e-06, "loss": 1.4032, "mean_token_accuracy": 0.6849873264630636, "num_tokens": 2221400687.0, "step": 13246 }, { "entropy": 1.6999091704686482, "epoch": 1.45524704072945, "grad_norm": 0.7940466403961182, "learning_rate": 5.2823033587532545e-06, "loss": 1.2728, "mean_token_accuracy": 0.675998126467069, "num_tokens": 2221549840.0, "step": 13247 }, { "entropy": 1.657990833123525, "epoch": 1.4553568976408229, "grad_norm": 0.6163055896759033, "learning_rate": 5.281066952294436e-06, "loss": 1.4401, "mean_token_accuracy": 0.6552244772513708, "num_tokens": 2221785884.0, "step": 13248 }, { "entropy": 1.7675037880738576, "epoch": 1.4554667545521958, "grad_norm": 0.6566433310508728, "learning_rate": 5.2798307268372714e-06, "loss": 1.453, "mean_token_accuracy": 0.6563322295745214, "num_tokens": 2221930165.0, "step": 13249 }, { "entropy": 1.7534189720948536, "epoch": 1.4555766114635686, "grad_norm": 0.659052848815918, "learning_rate": 5.2785946824208845e-06, "loss": 1.4248, "mean_token_accuracy": 0.6439740558465322, "num_tokens": 2222126390.0, "step": 13250 }, { "entropy": 1.693113644917806, "epoch": 1.4556864683749415, "grad_norm": 0.7174281477928162, "learning_rate": 5.277358819084401e-06, "loss": 1.4873, "mean_token_accuracy": 0.6578061381975809, "num_tokens": 2222313824.0, "step": 13251 }, { "entropy": 1.6609934270381927, "epoch": 1.4557963252863146, "grad_norm": 0.5869874954223633, "learning_rate": 5.276123136866931e-06, "loss": 1.3664, "mean_token_accuracy": 0.6591756095488867, "num_tokens": 2222498021.0, "step": 13252 }, { "entropy": 1.6955777903397877, "epoch": 1.4559061821976875, "grad_norm": 0.736538290977478, "learning_rate": 5.274887635807584e-06, "loss": 1.3698, "mean_token_accuracy": 0.6608079870541891, "num_tokens": 2222666538.0, "step": 13253 }, { "entropy": 1.7050454417864482, "epoch": 1.4560160391090604, "grad_norm": 0.7196376323699951, "learning_rate": 5.273652315945464e-06, "loss": 1.3256, "mean_token_accuracy": 0.6665113717317581, "num_tokens": 2222791127.0, "step": 13254 }, { "entropy": 1.7047736942768097, "epoch": 1.4561258960204335, "grad_norm": 0.787895917892456, "learning_rate": 5.2724171773196665e-06, "loss": 1.4962, "mean_token_accuracy": 0.6398163984219233, "num_tokens": 2222997665.0, "step": 13255 }, { "entropy": 1.7137849926948547, "epoch": 1.4562357529318064, "grad_norm": 0.8692581057548523, "learning_rate": 5.271182219969286e-06, "loss": 1.3989, "mean_token_accuracy": 0.6480956127246221, "num_tokens": 2223183484.0, "step": 13256 }, { "entropy": 1.7420273820559184, "epoch": 1.4563456098431793, "grad_norm": 0.7221806049346924, "learning_rate": 5.269947443933408e-06, "loss": 1.3607, "mean_token_accuracy": 0.6581288725137711, "num_tokens": 2223324598.0, "step": 13257 }, { "entropy": 1.624452531337738, "epoch": 1.4564554667545522, "grad_norm": 0.6961126327514648, "learning_rate": 5.2687128492511075e-06, "loss": 1.4168, "mean_token_accuracy": 0.6610220770041147, "num_tokens": 2223496057.0, "step": 13258 }, { "entropy": 1.6884084045886993, "epoch": 1.456565323665925, "grad_norm": 1.0178797245025635, "learning_rate": 5.267478435961462e-06, "loss": 1.3582, "mean_token_accuracy": 0.665848026672999, "num_tokens": 2223654632.0, "step": 13259 }, { "entropy": 1.6696566045284271, "epoch": 1.4566751805772982, "grad_norm": 0.6774824261665344, "learning_rate": 5.266244204103548e-06, "loss": 1.343, "mean_token_accuracy": 0.6636465241511663, "num_tokens": 2223806523.0, "step": 13260 }, { "entropy": 1.7208555539449055, "epoch": 1.456785037488671, "grad_norm": 0.6381753087043762, "learning_rate": 5.265010153716415e-06, "loss": 1.3636, "mean_token_accuracy": 0.6624864041805267, "num_tokens": 2223993713.0, "step": 13261 }, { "entropy": 1.697748472293218, "epoch": 1.456894894400044, "grad_norm": 0.7066287994384766, "learning_rate": 5.263776284839126e-06, "loss": 1.2882, "mean_token_accuracy": 0.6692610581715902, "num_tokens": 2224121187.0, "step": 13262 }, { "entropy": 1.6702364484469097, "epoch": 1.4570047513114168, "grad_norm": 0.6550009250640869, "learning_rate": 5.2625425975107366e-06, "loss": 1.5535, "mean_token_accuracy": 0.6461095362901688, "num_tokens": 2224294928.0, "step": 13263 }, { "entropy": 1.671968440214793, "epoch": 1.4571146082227897, "grad_norm": 0.7919005751609802, "learning_rate": 5.261309091770288e-06, "loss": 1.3144, "mean_token_accuracy": 0.6730043093363444, "num_tokens": 2224442529.0, "step": 13264 }, { "entropy": 1.6805502672990162, "epoch": 1.4572244651341628, "grad_norm": 0.6840505599975586, "learning_rate": 5.260075767656818e-06, "loss": 1.3058, "mean_token_accuracy": 0.6673836757739385, "num_tokens": 2224580676.0, "step": 13265 }, { "entropy": 1.723142812649409, "epoch": 1.4573343220455357, "grad_norm": 0.7580272555351257, "learning_rate": 5.258842625209367e-06, "loss": 1.4996, "mean_token_accuracy": 0.641799122095108, "num_tokens": 2224774246.0, "step": 13266 }, { "entropy": 1.702703317006429, "epoch": 1.4574441789569086, "grad_norm": 1.6131107807159424, "learning_rate": 5.257609664466956e-06, "loss": 0.9713, "mean_token_accuracy": 0.6902973006169001, "num_tokens": 2224908967.0, "step": 13267 }, { "entropy": 1.7422963480154674, "epoch": 1.4575540358682817, "grad_norm": 0.830781102180481, "learning_rate": 5.256376885468615e-06, "loss": 1.5733, "mean_token_accuracy": 0.6498822967211405, "num_tokens": 2225102321.0, "step": 13268 }, { "entropy": 1.7461797297000885, "epoch": 1.4576638927796546, "grad_norm": 0.7430237531661987, "learning_rate": 5.255144288253357e-06, "loss": 1.408, "mean_token_accuracy": 0.6493665178616842, "num_tokens": 2225238306.0, "step": 13269 }, { "entropy": 1.6967601478099823, "epoch": 1.4577737496910275, "grad_norm": 0.7158797979354858, "learning_rate": 5.253911872860191e-06, "loss": 1.2596, "mean_token_accuracy": 0.6785516838232676, "num_tokens": 2225363858.0, "step": 13270 }, { "entropy": 1.7182322641213734, "epoch": 1.4578836066024004, "grad_norm": 0.5696946978569031, "learning_rate": 5.252679639328125e-06, "loss": 1.5107, "mean_token_accuracy": 0.6287727604309717, "num_tokens": 2225608182.0, "step": 13271 }, { "entropy": 1.7183633248011272, "epoch": 1.4579934635137732, "grad_norm": 0.8003261685371399, "learning_rate": 5.2514475876961655e-06, "loss": 1.3841, "mean_token_accuracy": 0.6599841763575872, "num_tokens": 2225734061.0, "step": 13272 }, { "entropy": 1.6641955971717834, "epoch": 1.4581033204251463, "grad_norm": 0.6576728820800781, "learning_rate": 5.250215718003293e-06, "loss": 1.2564, "mean_token_accuracy": 0.6735943456490835, "num_tokens": 2225892115.0, "step": 13273 }, { "entropy": 1.6959330240885417, "epoch": 1.4582131773365192, "grad_norm": 0.7016428112983704, "learning_rate": 5.2489840302885e-06, "loss": 1.2863, "mean_token_accuracy": 0.665631502866745, "num_tokens": 2226025946.0, "step": 13274 }, { "entropy": 1.6741726001103718, "epoch": 1.4583230342478921, "grad_norm": 0.7208593487739563, "learning_rate": 5.247752524590776e-06, "loss": 1.4776, "mean_token_accuracy": 0.6564379036426544, "num_tokens": 2226179358.0, "step": 13275 }, { "entropy": 1.682859222094218, "epoch": 1.458432891159265, "grad_norm": 0.7038945555686951, "learning_rate": 5.246521200949093e-06, "loss": 1.3761, "mean_token_accuracy": 0.6612346222003301, "num_tokens": 2226343715.0, "step": 13276 }, { "entropy": 1.7394512792428334, "epoch": 1.4585427480706379, "grad_norm": 0.778741180896759, "learning_rate": 5.245290059402417e-06, "loss": 1.3302, "mean_token_accuracy": 0.6651216298341751, "num_tokens": 2226515575.0, "step": 13277 }, { "entropy": 1.7190166016419728, "epoch": 1.458652604982011, "grad_norm": 0.6520856022834778, "learning_rate": 5.24405909998972e-06, "loss": 1.2736, "mean_token_accuracy": 0.6683216094970703, "num_tokens": 2226636398.0, "step": 13278 }, { "entropy": 1.736037790775299, "epoch": 1.4587624618933839, "grad_norm": 0.7920129299163818, "learning_rate": 5.242828322749958e-06, "loss": 1.4525, "mean_token_accuracy": 0.6551641374826431, "num_tokens": 2226803388.0, "step": 13279 }, { "entropy": 1.693130115667979, "epoch": 1.4588723188047568, "grad_norm": 0.6134016513824463, "learning_rate": 5.241597727722088e-06, "loss": 1.3696, "mean_token_accuracy": 0.6638036072254181, "num_tokens": 2226951444.0, "step": 13280 }, { "entropy": 1.7208605806032817, "epoch": 1.4589821757161299, "grad_norm": 0.7502411603927612, "learning_rate": 5.240367314945054e-06, "loss": 1.4216, "mean_token_accuracy": 0.6602864662806193, "num_tokens": 2227074694.0, "step": 13281 }, { "entropy": 1.6609876056512196, "epoch": 1.4590920326275028, "grad_norm": 0.6450250744819641, "learning_rate": 5.239137084457795e-06, "loss": 1.3909, "mean_token_accuracy": 0.656602198878924, "num_tokens": 2227238549.0, "step": 13282 }, { "entropy": 1.6838933726151784, "epoch": 1.4592018895388756, "grad_norm": 0.6704497337341309, "learning_rate": 5.2379070362992525e-06, "loss": 1.2862, "mean_token_accuracy": 0.6715071648359299, "num_tokens": 2227393583.0, "step": 13283 }, { "entropy": 1.7411844432353973, "epoch": 1.4593117464502485, "grad_norm": 0.7020705342292786, "learning_rate": 5.236677170508363e-06, "loss": 1.7397, "mean_token_accuracy": 0.6055120974779129, "num_tokens": 2227595673.0, "step": 13284 }, { "entropy": 1.6689063012599945, "epoch": 1.4594216033616214, "grad_norm": 0.7527252435684204, "learning_rate": 5.235447487124037e-06, "loss": 1.296, "mean_token_accuracy": 0.6730232934157053, "num_tokens": 2227767400.0, "step": 13285 }, { "entropy": 1.6576413909594219, "epoch": 1.4595314602729945, "grad_norm": 0.7640280723571777, "learning_rate": 5.234217986185201e-06, "loss": 1.4514, "mean_token_accuracy": 0.6499234984318415, "num_tokens": 2227956387.0, "step": 13286 }, { "entropy": 1.7037740747133892, "epoch": 1.4596413171843674, "grad_norm": 0.6326615214347839, "learning_rate": 5.23298866773077e-06, "loss": 1.4418, "mean_token_accuracy": 0.6500868995984396, "num_tokens": 2228170702.0, "step": 13287 }, { "entropy": 1.7462695737679799, "epoch": 1.4597511740957403, "grad_norm": 0.8364010453224182, "learning_rate": 5.231759531799649e-06, "loss": 1.4275, "mean_token_accuracy": 0.6656624972820282, "num_tokens": 2228377715.0, "step": 13288 }, { "entropy": 1.707568754752477, "epoch": 1.4598610310071132, "grad_norm": 0.820871889591217, "learning_rate": 5.230530578430737e-06, "loss": 1.284, "mean_token_accuracy": 0.6757774303356806, "num_tokens": 2228529705.0, "step": 13289 }, { "entropy": 1.7350817521413167, "epoch": 1.459970887918486, "grad_norm": 0.7010106444358826, "learning_rate": 5.229301807662937e-06, "loss": 1.3151, "mean_token_accuracy": 0.6694407761096954, "num_tokens": 2228686318.0, "step": 13290 }, { "entropy": 1.7383721967538197, "epoch": 1.4600807448298592, "grad_norm": 0.5950272679328918, "learning_rate": 5.228073219535128e-06, "loss": 1.6986, "mean_token_accuracy": 0.6260428552826246, "num_tokens": 2228943488.0, "step": 13291 }, { "entropy": 1.7025316456953685, "epoch": 1.460190601741232, "grad_norm": 0.7957791090011597, "learning_rate": 5.226844814086206e-06, "loss": 1.3464, "mean_token_accuracy": 0.6687312970558802, "num_tokens": 2229072202.0, "step": 13292 }, { "entropy": 1.671481430530548, "epoch": 1.460300458652605, "grad_norm": 0.7400625348091125, "learning_rate": 5.2256165913550425e-06, "loss": 1.3572, "mean_token_accuracy": 0.6515611658493677, "num_tokens": 2229265397.0, "step": 13293 }, { "entropy": 1.6869538923104603, "epoch": 1.460410315563978, "grad_norm": 0.7097235321998596, "learning_rate": 5.22438855138051e-06, "loss": 1.3306, "mean_token_accuracy": 0.6697370956341425, "num_tokens": 2229437081.0, "step": 13294 }, { "entropy": 1.7623671690622966, "epoch": 1.460520172475351, "grad_norm": 0.7999326586723328, "learning_rate": 5.223160694201477e-06, "loss": 1.4252, "mean_token_accuracy": 0.6673903316259384, "num_tokens": 2229599506.0, "step": 13295 }, { "entropy": 1.699026753505071, "epoch": 1.4606300293867238, "grad_norm": 0.6532884240150452, "learning_rate": 5.221933019856813e-06, "loss": 1.2955, "mean_token_accuracy": 0.6673917869726816, "num_tokens": 2229733052.0, "step": 13296 }, { "entropy": 1.6547558307647705, "epoch": 1.4607398862980967, "grad_norm": 0.6670539975166321, "learning_rate": 5.220705528385357e-06, "loss": 1.2823, "mean_token_accuracy": 0.669133797287941, "num_tokens": 2229887116.0, "step": 13297 }, { "entropy": 1.6800335148970287, "epoch": 1.4608497432094696, "grad_norm": 0.7013092041015625, "learning_rate": 5.219478219825969e-06, "loss": 1.2742, "mean_token_accuracy": 0.6806422223647436, "num_tokens": 2230068473.0, "step": 13298 }, { "entropy": 1.7160185774167378, "epoch": 1.4609596001208427, "grad_norm": 0.706506073474884, "learning_rate": 5.2182510942174904e-06, "loss": 1.3858, "mean_token_accuracy": 0.6637303580840429, "num_tokens": 2230208267.0, "step": 13299 }, { "entropy": 1.7366038858890533, "epoch": 1.4610694570322156, "grad_norm": 0.7497095465660095, "learning_rate": 5.217024151598759e-06, "loss": 1.656, "mean_token_accuracy": 0.6435926059881846, "num_tokens": 2230359536.0, "step": 13300 }, { "entropy": 1.6858433783054352, "epoch": 1.4611793139435885, "grad_norm": 0.6425523161888123, "learning_rate": 5.21579739200861e-06, "loss": 1.3171, "mean_token_accuracy": 0.6678670247395834, "num_tokens": 2230496567.0, "step": 13301 }, { "entropy": 1.712745487689972, "epoch": 1.4612891708549614, "grad_norm": 0.746553897857666, "learning_rate": 5.214570815485865e-06, "loss": 1.3764, "mean_token_accuracy": 0.6616001923878988, "num_tokens": 2230655773.0, "step": 13302 }, { "entropy": 1.681189884742101, "epoch": 1.4613990277663342, "grad_norm": 0.6803275346755981, "learning_rate": 5.213344422069344e-06, "loss": 1.4087, "mean_token_accuracy": 0.6556661377350489, "num_tokens": 2230883918.0, "step": 13303 }, { "entropy": 1.7151753803094227, "epoch": 1.4615088846777073, "grad_norm": 0.6623923778533936, "learning_rate": 5.212118211797868e-06, "loss": 1.5692, "mean_token_accuracy": 0.6383712540070215, "num_tokens": 2231057143.0, "step": 13304 }, { "entropy": 1.7497306366761525, "epoch": 1.4616187415890802, "grad_norm": 0.682961106300354, "learning_rate": 5.210892184710243e-06, "loss": 1.2886, "mean_token_accuracy": 0.6717896262804667, "num_tokens": 2231220320.0, "step": 13305 }, { "entropy": 1.6684472461541493, "epoch": 1.4617285985004531, "grad_norm": 0.8259005546569824, "learning_rate": 5.209666340845268e-06, "loss": 1.5261, "mean_token_accuracy": 0.6499257162213326, "num_tokens": 2231385621.0, "step": 13306 }, { "entropy": 1.644069214661916, "epoch": 1.4618384554118262, "grad_norm": 0.6260018944740295, "learning_rate": 5.2084406802417484e-06, "loss": 1.4294, "mean_token_accuracy": 0.6403475701808929, "num_tokens": 2231582756.0, "step": 13307 }, { "entropy": 1.7100238502025604, "epoch": 1.461948312323199, "grad_norm": 0.7612260580062866, "learning_rate": 5.207215202938471e-06, "loss": 1.4929, "mean_token_accuracy": 0.6612754563490549, "num_tokens": 2231709892.0, "step": 13308 }, { "entropy": 1.6849770645300548, "epoch": 1.462058169234572, "grad_norm": 0.7276026606559753, "learning_rate": 5.205989908974218e-06, "loss": 1.4184, "mean_token_accuracy": 0.6592111438512802, "num_tokens": 2231854359.0, "step": 13309 }, { "entropy": 1.698674072821935, "epoch": 1.4621680261459449, "grad_norm": 0.6991817951202393, "learning_rate": 5.204764798387778e-06, "loss": 1.4016, "mean_token_accuracy": 0.6530411044756571, "num_tokens": 2232053954.0, "step": 13310 }, { "entropy": 1.6900160908699036, "epoch": 1.4622778830573178, "grad_norm": 0.6570863127708435, "learning_rate": 5.203539871217918e-06, "loss": 1.4676, "mean_token_accuracy": 0.6459223727385203, "num_tokens": 2232234666.0, "step": 13311 }, { "entropy": 1.69076007604599, "epoch": 1.4623877399686909, "grad_norm": 0.8549068570137024, "learning_rate": 5.202315127503411e-06, "loss": 1.1945, "mean_token_accuracy": 0.6830791085958481, "num_tokens": 2232351276.0, "step": 13312 }, { "entropy": 1.7095185021559398, "epoch": 1.4624975968800638, "grad_norm": 0.7216442823410034, "learning_rate": 5.201090567283019e-06, "loss": 1.3842, "mean_token_accuracy": 0.6556618362665176, "num_tokens": 2232487699.0, "step": 13313 }, { "entropy": 1.7185616195201874, "epoch": 1.4626074537914366, "grad_norm": 0.6242141723632812, "learning_rate": 5.1998661905954925e-06, "loss": 1.2984, "mean_token_accuracy": 0.6604965478181839, "num_tokens": 2232640944.0, "step": 13314 }, { "entropy": 1.7047783136367798, "epoch": 1.4627173107028095, "grad_norm": 0.7990993857383728, "learning_rate": 5.1986419974795895e-06, "loss": 1.3772, "mean_token_accuracy": 0.6700956672430038, "num_tokens": 2232790937.0, "step": 13315 }, { "entropy": 1.6940909028053284, "epoch": 1.4628271676141824, "grad_norm": 0.6506087779998779, "learning_rate": 5.197417987974056e-06, "loss": 1.3113, "mean_token_accuracy": 0.6701582570870718, "num_tokens": 2232959603.0, "step": 13316 }, { "entropy": 1.7363331615924835, "epoch": 1.4629370245255555, "grad_norm": 0.668000340461731, "learning_rate": 5.196194162117627e-06, "loss": 1.4191, "mean_token_accuracy": 0.6679625312487284, "num_tokens": 2233101967.0, "step": 13317 }, { "entropy": 1.64047638575236, "epoch": 1.4630468814369284, "grad_norm": 0.7590168714523315, "learning_rate": 5.194970519949035e-06, "loss": 1.3215, "mean_token_accuracy": 0.6645925690730413, "num_tokens": 2233240156.0, "step": 13318 }, { "entropy": 1.64529550075531, "epoch": 1.4631567383483013, "grad_norm": 0.6108586192131042, "learning_rate": 5.193747061507015e-06, "loss": 1.4469, "mean_token_accuracy": 0.65325299402078, "num_tokens": 2233428355.0, "step": 13319 }, { "entropy": 1.7181050678094227, "epoch": 1.4632665952596744, "grad_norm": 0.6974697113037109, "learning_rate": 5.1925237868302815e-06, "loss": 1.4742, "mean_token_accuracy": 0.6386492003997167, "num_tokens": 2233622238.0, "step": 13320 }, { "entropy": 1.6982758343219757, "epoch": 1.4633764521710473, "grad_norm": 0.6342235207557678, "learning_rate": 5.1913006959575515e-06, "loss": 1.4225, "mean_token_accuracy": 0.6377789328495661, "num_tokens": 2233805645.0, "step": 13321 }, { "entropy": 1.6800893247127533, "epoch": 1.4634863090824202, "grad_norm": 0.7242743372917175, "learning_rate": 5.19007778892754e-06, "loss": 1.2808, "mean_token_accuracy": 0.6725705116987228, "num_tokens": 2233968192.0, "step": 13322 }, { "entropy": 1.7239407698313396, "epoch": 1.463596165993793, "grad_norm": 0.6715902090072632, "learning_rate": 5.188855065778946e-06, "loss": 1.4202, "mean_token_accuracy": 0.6526324351628622, "num_tokens": 2234137698.0, "step": 13323 }, { "entropy": 1.7070810496807098, "epoch": 1.463706022905166, "grad_norm": 0.6120285987854004, "learning_rate": 5.187632526550472e-06, "loss": 1.3874, "mean_token_accuracy": 0.6460235466559728, "num_tokens": 2234325035.0, "step": 13324 }, { "entropy": 1.738279104232788, "epoch": 1.463815879816539, "grad_norm": 0.776631236076355, "learning_rate": 5.1864101712808115e-06, "loss": 1.4277, "mean_token_accuracy": 0.6572244515021642, "num_tokens": 2234503600.0, "step": 13325 }, { "entropy": 1.6941909690697987, "epoch": 1.463925736727912, "grad_norm": 0.6817474961280823, "learning_rate": 5.185188000008645e-06, "loss": 1.251, "mean_token_accuracy": 0.6787453691164652, "num_tokens": 2234640034.0, "step": 13326 }, { "entropy": 1.7105824053287506, "epoch": 1.4640355936392848, "grad_norm": 0.6769583821296692, "learning_rate": 5.183966012772657e-06, "loss": 1.3502, "mean_token_accuracy": 0.6620890498161316, "num_tokens": 2234800322.0, "step": 13327 }, { "entropy": 1.7065203487873077, "epoch": 1.4641454505506577, "grad_norm": 0.7230082154273987, "learning_rate": 5.18274420961153e-06, "loss": 1.2583, "mean_token_accuracy": 0.6677148640155792, "num_tokens": 2234902867.0, "step": 13328 }, { "entropy": 1.6912482976913452, "epoch": 1.4642553074620306, "grad_norm": 0.6191965937614441, "learning_rate": 5.181522590563925e-06, "loss": 1.3609, "mean_token_accuracy": 0.6615066925684611, "num_tokens": 2235083263.0, "step": 13329 }, { "entropy": 1.7631979684034984, "epoch": 1.4643651643734037, "grad_norm": 0.6107144355773926, "learning_rate": 5.180301155668506e-06, "loss": 1.5027, "mean_token_accuracy": 0.634604016939799, "num_tokens": 2235264330.0, "step": 13330 }, { "entropy": 1.641640196243922, "epoch": 1.4644750212847766, "grad_norm": 0.5961340665817261, "learning_rate": 5.179079904963936e-06, "loss": 1.2884, "mean_token_accuracy": 0.6740356385707855, "num_tokens": 2235492587.0, "step": 13331 }, { "entropy": 1.68045578400294, "epoch": 1.4645848781961495, "grad_norm": 0.774403989315033, "learning_rate": 5.177858838488864e-06, "loss": 1.3224, "mean_token_accuracy": 0.6614306718111038, "num_tokens": 2235653770.0, "step": 13332 }, { "entropy": 1.6718662977218628, "epoch": 1.4646947351075226, "grad_norm": 0.730610728263855, "learning_rate": 5.176637956281934e-06, "loss": 1.4181, "mean_token_accuracy": 0.6523456772168478, "num_tokens": 2235821839.0, "step": 13333 }, { "entropy": 1.7122483650843303, "epoch": 1.4648045920188955, "grad_norm": 0.7947407960891724, "learning_rate": 5.175417258381789e-06, "loss": 1.2752, "mean_token_accuracy": 0.6747554838657379, "num_tokens": 2235967588.0, "step": 13334 }, { "entropy": 1.692185898621877, "epoch": 1.4649144489302683, "grad_norm": 0.7685208320617676, "learning_rate": 5.174196744827063e-06, "loss": 1.5189, "mean_token_accuracy": 0.6478336552778879, "num_tokens": 2236139430.0, "step": 13335 }, { "entropy": 1.7424322664737701, "epoch": 1.4650243058416412, "grad_norm": 0.8064534068107605, "learning_rate": 5.172976415656385e-06, "loss": 1.3833, "mean_token_accuracy": 0.6672340482473373, "num_tokens": 2236303607.0, "step": 13336 }, { "entropy": 1.7538822293281555, "epoch": 1.4651341627530141, "grad_norm": 0.6886154413223267, "learning_rate": 5.171756270908381e-06, "loss": 1.4997, "mean_token_accuracy": 0.6409474760293961, "num_tokens": 2236475795.0, "step": 13337 }, { "entropy": 1.6770086487134297, "epoch": 1.4652440196643872, "grad_norm": 0.7982631325721741, "learning_rate": 5.170536310621661e-06, "loss": 1.2743, "mean_token_accuracy": 0.6710592210292816, "num_tokens": 2236606141.0, "step": 13338 }, { "entropy": 1.7379637956619263, "epoch": 1.46535387657576, "grad_norm": 0.7204148173332214, "learning_rate": 5.169316534834838e-06, "loss": 1.39, "mean_token_accuracy": 0.6521616876125336, "num_tokens": 2236743945.0, "step": 13339 }, { "entropy": 1.764588902393977, "epoch": 1.465463733487133, "grad_norm": 0.7058348655700684, "learning_rate": 5.168096943586527e-06, "loss": 1.3477, "mean_token_accuracy": 0.6545776476462682, "num_tokens": 2236858423.0, "step": 13340 }, { "entropy": 1.7272200087706249, "epoch": 1.4655735903985059, "grad_norm": 0.7121959924697876, "learning_rate": 5.166877536915313e-06, "loss": 1.2527, "mean_token_accuracy": 0.6780747969945272, "num_tokens": 2236992537.0, "step": 13341 }, { "entropy": 1.7803178131580353, "epoch": 1.4656834473098788, "grad_norm": 0.7428368926048279, "learning_rate": 5.165658314859798e-06, "loss": 1.3522, "mean_token_accuracy": 0.6523331006368002, "num_tokens": 2237127144.0, "step": 13342 }, { "entropy": 1.7277946869532268, "epoch": 1.4657933042212519, "grad_norm": 0.7097252607345581, "learning_rate": 5.164439277458569e-06, "loss": 1.3009, "mean_token_accuracy": 0.6694452812274297, "num_tokens": 2237262457.0, "step": 13343 }, { "entropy": 1.6825427611668904, "epoch": 1.4659031611326248, "grad_norm": 0.723095178604126, "learning_rate": 5.163220424750209e-06, "loss": 1.5455, "mean_token_accuracy": 0.6467806448539098, "num_tokens": 2237409509.0, "step": 13344 }, { "entropy": 1.6955235799153645, "epoch": 1.4660130180439976, "grad_norm": 0.6675035357475281, "learning_rate": 5.162001756773289e-06, "loss": 1.5552, "mean_token_accuracy": 0.6430306434631348, "num_tokens": 2237551260.0, "step": 13345 }, { "entropy": 1.7177290419737499, "epoch": 1.4661228749553707, "grad_norm": 0.76436448097229, "learning_rate": 5.160783273566385e-06, "loss": 1.3634, "mean_token_accuracy": 0.6647897511720657, "num_tokens": 2237675393.0, "step": 13346 }, { "entropy": 1.6530766189098358, "epoch": 1.4662327318667436, "grad_norm": 0.613264799118042, "learning_rate": 5.1595649751680575e-06, "loss": 1.414, "mean_token_accuracy": 0.6538712580998739, "num_tokens": 2237881038.0, "step": 13347 }, { "entropy": 1.7521977821985881, "epoch": 1.4663425887781165, "grad_norm": 0.5653010010719299, "learning_rate": 5.1583468616168685e-06, "loss": 1.5067, "mean_token_accuracy": 0.6414316246906916, "num_tokens": 2238070216.0, "step": 13348 }, { "entropy": 1.7145592470963795, "epoch": 1.4664524456894894, "grad_norm": 0.6285973787307739, "learning_rate": 5.157128932951369e-06, "loss": 1.344, "mean_token_accuracy": 0.668559322754542, "num_tokens": 2238222293.0, "step": 13349 }, { "entropy": 1.7896797955036163, "epoch": 1.4665623026008623, "grad_norm": 0.6855942606925964, "learning_rate": 5.155911189210105e-06, "loss": 1.3821, "mean_token_accuracy": 0.6510206758975983, "num_tokens": 2238356636.0, "step": 13350 }, { "entropy": 1.6923208236694336, "epoch": 1.4666721595122354, "grad_norm": 0.6504180431365967, "learning_rate": 5.154693630431617e-06, "loss": 1.4542, "mean_token_accuracy": 0.6632367918888727, "num_tokens": 2238572585.0, "step": 13351 }, { "entropy": 1.7046404878298442, "epoch": 1.4667820164236083, "grad_norm": 0.6475574970245361, "learning_rate": 5.153476256654448e-06, "loss": 1.4873, "mean_token_accuracy": 0.6398185839255651, "num_tokens": 2238780003.0, "step": 13352 }, { "entropy": 1.6447254419326782, "epoch": 1.4668918733349812, "grad_norm": 0.6468961834907532, "learning_rate": 5.1522590679171135e-06, "loss": 1.4823, "mean_token_accuracy": 0.6522268503904343, "num_tokens": 2238954299.0, "step": 13353 }, { "entropy": 1.7484534879525502, "epoch": 1.467001730246354, "grad_norm": 0.8634352684020996, "learning_rate": 5.151042064258145e-06, "loss": 1.4664, "mean_token_accuracy": 0.6500384410222372, "num_tokens": 2239151290.0, "step": 13354 }, { "entropy": 1.7502192457516987, "epoch": 1.467111587157727, "grad_norm": 0.6729628443717957, "learning_rate": 5.149825245716063e-06, "loss": 1.421, "mean_token_accuracy": 0.6504283597071966, "num_tokens": 2239313609.0, "step": 13355 }, { "entropy": 1.658990353345871, "epoch": 1.4672214440691, "grad_norm": 0.7221643328666687, "learning_rate": 5.148608612329378e-06, "loss": 1.3597, "mean_token_accuracy": 0.658984954158465, "num_tokens": 2239519569.0, "step": 13356 }, { "entropy": 1.6480421324570973, "epoch": 1.467331300980473, "grad_norm": 0.578301727771759, "learning_rate": 5.147392164136591e-06, "loss": 1.3677, "mean_token_accuracy": 0.6631327817837397, "num_tokens": 2239722966.0, "step": 13357 }, { "entropy": 1.6934454341729481, "epoch": 1.4674411578918458, "grad_norm": 0.65192711353302, "learning_rate": 5.146175901176203e-06, "loss": 1.3089, "mean_token_accuracy": 0.6639690001805624, "num_tokens": 2239902756.0, "step": 13358 }, { "entropy": 1.7277617851893108, "epoch": 1.467551014803219, "grad_norm": 0.617236316204071, "learning_rate": 5.144959823486708e-06, "loss": 1.5418, "mean_token_accuracy": 0.6341453293959299, "num_tokens": 2240145800.0, "step": 13359 }, { "entropy": 1.6705620487531025, "epoch": 1.4676608717145918, "grad_norm": 0.6375599503517151, "learning_rate": 5.1437439311066006e-06, "loss": 1.3709, "mean_token_accuracy": 0.6495741556088129, "num_tokens": 2240318927.0, "step": 13360 }, { "entropy": 1.658727725346883, "epoch": 1.4677707286259647, "grad_norm": 0.6619511842727661, "learning_rate": 5.142528224074359e-06, "loss": 1.4811, "mean_token_accuracy": 0.6575326571861903, "num_tokens": 2240495823.0, "step": 13361 }, { "entropy": 1.669166515270869, "epoch": 1.4678805855373376, "grad_norm": 0.7321708798408508, "learning_rate": 5.141312702428456e-06, "loss": 1.3142, "mean_token_accuracy": 0.6671041746934255, "num_tokens": 2240668352.0, "step": 13362 }, { "entropy": 1.7291592756907146, "epoch": 1.4679904424487105, "grad_norm": 0.7490743398666382, "learning_rate": 5.140097366207371e-06, "loss": 1.3883, "mean_token_accuracy": 0.6642330040534338, "num_tokens": 2240837521.0, "step": 13363 }, { "entropy": 1.6821561257044475, "epoch": 1.4681002993600836, "grad_norm": 0.6379314661026001, "learning_rate": 5.138882215449561e-06, "loss": 1.2651, "mean_token_accuracy": 0.6746839582920074, "num_tokens": 2240975191.0, "step": 13364 }, { "entropy": 1.699595848719279, "epoch": 1.4682101562714565, "grad_norm": 0.7495248913764954, "learning_rate": 5.137667250193487e-06, "loss": 1.3034, "mean_token_accuracy": 0.6714814802010854, "num_tokens": 2241136736.0, "step": 13365 }, { "entropy": 1.6805048982302349, "epoch": 1.4683200131828293, "grad_norm": 0.7307020425796509, "learning_rate": 5.136452470477605e-06, "loss": 1.5607, "mean_token_accuracy": 0.6429369499286016, "num_tokens": 2241302570.0, "step": 13366 }, { "entropy": 1.6858853499094646, "epoch": 1.4684298700942022, "grad_norm": 0.7530251741409302, "learning_rate": 5.135237876340357e-06, "loss": 1.4323, "mean_token_accuracy": 0.6499339739481608, "num_tokens": 2241459406.0, "step": 13367 }, { "entropy": 1.7072277665138245, "epoch": 1.4685397270055751, "grad_norm": 0.7187586426734924, "learning_rate": 5.1340234678201905e-06, "loss": 1.4593, "mean_token_accuracy": 0.6429513593514761, "num_tokens": 2241601911.0, "step": 13368 }, { "entropy": 1.7173350950082142, "epoch": 1.4686495839169482, "grad_norm": 0.6327357888221741, "learning_rate": 5.132809244955538e-06, "loss": 1.3817, "mean_token_accuracy": 0.6499977658192316, "num_tokens": 2241787986.0, "step": 13369 }, { "entropy": 1.7320161958535512, "epoch": 1.468759440828321, "grad_norm": 0.5910692811012268, "learning_rate": 5.131595207784826e-06, "loss": 1.5099, "mean_token_accuracy": 0.6356032888094584, "num_tokens": 2241965938.0, "step": 13370 }, { "entropy": 1.6744110186894734, "epoch": 1.468869297739694, "grad_norm": 0.6589808464050293, "learning_rate": 5.130381356346482e-06, "loss": 1.4489, "mean_token_accuracy": 0.6553682386875153, "num_tokens": 2242170006.0, "step": 13371 }, { "entropy": 1.6842441360155742, "epoch": 1.468979154651067, "grad_norm": 0.6548722386360168, "learning_rate": 5.129167690678926e-06, "loss": 1.4434, "mean_token_accuracy": 0.6569918642441431, "num_tokens": 2242349906.0, "step": 13372 }, { "entropy": 1.6717151006062825, "epoch": 1.46908901156244, "grad_norm": 0.7034960985183716, "learning_rate": 5.127954210820566e-06, "loss": 1.2131, "mean_token_accuracy": 0.6854620377222697, "num_tokens": 2242479249.0, "step": 13373 }, { "entropy": 1.7261487344900768, "epoch": 1.4691988684738129, "grad_norm": 0.6977003812789917, "learning_rate": 5.126740916809807e-06, "loss": 1.4922, "mean_token_accuracy": 0.6372009714444479, "num_tokens": 2242681993.0, "step": 13374 }, { "entropy": 1.6832520266373951, "epoch": 1.4693087253851858, "grad_norm": 0.6861147284507751, "learning_rate": 5.125527808685054e-06, "loss": 1.4987, "mean_token_accuracy": 0.6465383569399515, "num_tokens": 2242854318.0, "step": 13375 }, { "entropy": 1.6995068192481995, "epoch": 1.4694185822965586, "grad_norm": 0.6299582123756409, "learning_rate": 5.1243148864847e-06, "loss": 1.3544, "mean_token_accuracy": 0.6593434164921442, "num_tokens": 2243028642.0, "step": 13376 }, { "entropy": 1.6980251967906952, "epoch": 1.4695284392079317, "grad_norm": 0.7787138223648071, "learning_rate": 5.1231021502471275e-06, "loss": 1.4421, "mean_token_accuracy": 0.6528284599383672, "num_tokens": 2243237493.0, "step": 13377 }, { "entropy": 1.717189719279607, "epoch": 1.4696382961193046, "grad_norm": 0.6779033541679382, "learning_rate": 5.121889600010727e-06, "loss": 1.5139, "mean_token_accuracy": 0.6445932437976202, "num_tokens": 2243438772.0, "step": 13378 }, { "entropy": 1.7422145505746205, "epoch": 1.4697481530306775, "grad_norm": 0.6294360756874084, "learning_rate": 5.120677235813871e-06, "loss": 1.3356, "mean_token_accuracy": 0.6672088205814362, "num_tokens": 2243605382.0, "step": 13379 }, { "entropy": 1.72358504931132, "epoch": 1.4698580099420504, "grad_norm": 0.7390589118003845, "learning_rate": 5.1194650576949326e-06, "loss": 1.3306, "mean_token_accuracy": 0.6668838312228521, "num_tokens": 2243725776.0, "step": 13380 }, { "entropy": 1.7053417166074116, "epoch": 1.4699678668534233, "grad_norm": 0.7105720043182373, "learning_rate": 5.118253065692276e-06, "loss": 1.4912, "mean_token_accuracy": 0.6499655246734619, "num_tokens": 2243901685.0, "step": 13381 }, { "entropy": 1.7427148222923279, "epoch": 1.4700777237647964, "grad_norm": 0.640266478061676, "learning_rate": 5.117041259844256e-06, "loss": 1.4751, "mean_token_accuracy": 0.6555547416210175, "num_tokens": 2244055138.0, "step": 13382 }, { "entropy": 1.7193391521771748, "epoch": 1.4701875806761693, "grad_norm": 0.7918505668640137, "learning_rate": 5.115829640189229e-06, "loss": 1.3312, "mean_token_accuracy": 0.6624412635962168, "num_tokens": 2244239813.0, "step": 13383 }, { "entropy": 1.6076705555121105, "epoch": 1.4702974375875422, "grad_norm": 0.700372040271759, "learning_rate": 5.1146182067655445e-06, "loss": 1.2775, "mean_token_accuracy": 0.6678995142380396, "num_tokens": 2244452998.0, "step": 13384 }, { "entropy": 1.7358313500881195, "epoch": 1.4704072944989153, "grad_norm": 0.6558439135551453, "learning_rate": 5.113406959611545e-06, "loss": 1.4388, "mean_token_accuracy": 0.6488740295171738, "num_tokens": 2244601173.0, "step": 13385 }, { "entropy": 1.7473020454247792, "epoch": 1.4705171514102882, "grad_norm": 0.704651415348053, "learning_rate": 5.112195898765557e-06, "loss": 1.5605, "mean_token_accuracy": 0.6439760675032934, "num_tokens": 2244776293.0, "step": 13386 }, { "entropy": 1.702320804198583, "epoch": 1.470627008321661, "grad_norm": 0.7008829712867737, "learning_rate": 5.110985024265917e-06, "loss": 1.4391, "mean_token_accuracy": 0.6583433995644251, "num_tokens": 2244939458.0, "step": 13387 }, { "entropy": 1.700803816318512, "epoch": 1.470736865233034, "grad_norm": 0.7770166993141174, "learning_rate": 5.109774336150951e-06, "loss": 1.4417, "mean_token_accuracy": 0.6354402701059977, "num_tokens": 2245145754.0, "step": 13388 }, { "entropy": 1.7102882862091064, "epoch": 1.4708467221444068, "grad_norm": 0.7285779118537903, "learning_rate": 5.108563834458969e-06, "loss": 1.4532, "mean_token_accuracy": 0.6507706940174103, "num_tokens": 2245356512.0, "step": 13389 }, { "entropy": 1.7527295649051666, "epoch": 1.47095657905578, "grad_norm": 0.793194055557251, "learning_rate": 5.107353519228289e-06, "loss": 1.2389, "mean_token_accuracy": 0.6625605672597885, "num_tokens": 2245496613.0, "step": 13390 }, { "entropy": 1.723008652528127, "epoch": 1.4710664359671528, "grad_norm": 0.6528907418251038, "learning_rate": 5.106143390497211e-06, "loss": 1.2856, "mean_token_accuracy": 0.6656887034575144, "num_tokens": 2245622811.0, "step": 13391 }, { "entropy": 1.6672236522038777, "epoch": 1.4711762928785257, "grad_norm": 0.5492684245109558, "learning_rate": 5.1049334483040436e-06, "loss": 1.3844, "mean_token_accuracy": 0.6572863310575485, "num_tokens": 2245825018.0, "step": 13392 }, { "entropy": 1.695264220237732, "epoch": 1.4712861497898986, "grad_norm": 0.6142525672912598, "learning_rate": 5.103723692687076e-06, "loss": 1.3716, "mean_token_accuracy": 0.6567343175411224, "num_tokens": 2246014688.0, "step": 13393 }, { "entropy": 1.7013497749964397, "epoch": 1.4713960067012715, "grad_norm": 0.6703829765319824, "learning_rate": 5.102514123684594e-06, "loss": 1.3899, "mean_token_accuracy": 0.6593191623687744, "num_tokens": 2246182122.0, "step": 13394 }, { "entropy": 1.7167363564173381, "epoch": 1.4715058636126446, "grad_norm": 0.7111037373542786, "learning_rate": 5.101304741334883e-06, "loss": 1.4901, "mean_token_accuracy": 0.6491454988718033, "num_tokens": 2246416059.0, "step": 13395 }, { "entropy": 1.6614757577578227, "epoch": 1.4716157205240175, "grad_norm": 0.6759634017944336, "learning_rate": 5.10009554567622e-06, "loss": 1.2934, "mean_token_accuracy": 0.6705714662869772, "num_tokens": 2246560395.0, "step": 13396 }, { "entropy": 1.6646903554598491, "epoch": 1.4717255774353903, "grad_norm": 0.6592537760734558, "learning_rate": 5.0988865367468746e-06, "loss": 1.2862, "mean_token_accuracy": 0.6767180810372034, "num_tokens": 2246720000.0, "step": 13397 }, { "entropy": 1.7495815654595692, "epoch": 1.4718354343467634, "grad_norm": 0.8151704668998718, "learning_rate": 5.09767771458511e-06, "loss": 1.5063, "mean_token_accuracy": 0.6574457635482153, "num_tokens": 2246873756.0, "step": 13398 }, { "entropy": 1.6735199590524037, "epoch": 1.4719452912581363, "grad_norm": 0.6688457131385803, "learning_rate": 5.096469079229187e-06, "loss": 1.3796, "mean_token_accuracy": 0.6598214159409205, "num_tokens": 2247046154.0, "step": 13399 }, { "entropy": 1.6267732282479603, "epoch": 1.4720551481695092, "grad_norm": 0.6082208156585693, "learning_rate": 5.095260630717358e-06, "loss": 1.3601, "mean_token_accuracy": 0.6803909589846929, "num_tokens": 2247214410.0, "step": 13400 }, { "entropy": 1.7172918021678925, "epoch": 1.472165005080882, "grad_norm": 0.7851716876029968, "learning_rate": 5.0940523690878665e-06, "loss": 1.2889, "mean_token_accuracy": 0.6563605020443598, "num_tokens": 2247347179.0, "step": 13401 }, { "entropy": 1.674240271250407, "epoch": 1.472274861992255, "grad_norm": 0.7133161425590515, "learning_rate": 5.092844294378959e-06, "loss": 1.6189, "mean_token_accuracy": 0.65032958984375, "num_tokens": 2247510287.0, "step": 13402 }, { "entropy": 1.7008503377437592, "epoch": 1.472384718903628, "grad_norm": 0.7033988237380981, "learning_rate": 5.091636406628866e-06, "loss": 1.4201, "mean_token_accuracy": 0.6558716595172882, "num_tokens": 2247651329.0, "step": 13403 }, { "entropy": 1.6965941190719604, "epoch": 1.472494575815001, "grad_norm": 0.7866081595420837, "learning_rate": 5.090428705875821e-06, "loss": 1.465, "mean_token_accuracy": 0.6466370224952698, "num_tokens": 2247825351.0, "step": 13404 }, { "entropy": 1.656824787457784, "epoch": 1.4726044327263739, "grad_norm": 0.7088666558265686, "learning_rate": 5.089221192158043e-06, "loss": 1.3121, "mean_token_accuracy": 0.6727252850929896, "num_tokens": 2248030695.0, "step": 13405 }, { "entropy": 1.7407319247722626, "epoch": 1.4727142896377468, "grad_norm": 0.7755722999572754, "learning_rate": 5.088013865513749e-06, "loss": 1.5342, "mean_token_accuracy": 0.6439951807260513, "num_tokens": 2248239120.0, "step": 13406 }, { "entropy": 1.6667874654134114, "epoch": 1.4728241465491196, "grad_norm": 0.6196835041046143, "learning_rate": 5.086806725981153e-06, "loss": 1.388, "mean_token_accuracy": 0.667399138212204, "num_tokens": 2248406198.0, "step": 13407 }, { "entropy": 1.7022119263807933, "epoch": 1.4729340034604927, "grad_norm": 0.6414451599121094, "learning_rate": 5.08559977359846e-06, "loss": 1.3475, "mean_token_accuracy": 0.6679307371377945, "num_tokens": 2248546706.0, "step": 13408 }, { "entropy": 1.67433958252271, "epoch": 1.4730438603718656, "grad_norm": 0.7130979895591736, "learning_rate": 5.0843930084038696e-06, "loss": 1.2636, "mean_token_accuracy": 0.6698357065518697, "num_tokens": 2248687310.0, "step": 13409 }, { "entropy": 1.658446768919627, "epoch": 1.4731537172832385, "grad_norm": 0.7370253801345825, "learning_rate": 5.083186430435574e-06, "loss": 1.4569, "mean_token_accuracy": 0.6521903574466705, "num_tokens": 2248871430.0, "step": 13410 }, { "entropy": 1.725239743789037, "epoch": 1.4732635741946116, "grad_norm": 0.6639283299446106, "learning_rate": 5.0819800397317635e-06, "loss": 1.3176, "mean_token_accuracy": 0.6676472028096517, "num_tokens": 2249035961.0, "step": 13411 }, { "entropy": 1.7703583141167958, "epoch": 1.4733734311059845, "grad_norm": 0.6325090527534485, "learning_rate": 5.0807738363306165e-06, "loss": 1.4219, "mean_token_accuracy": 0.6441073268651962, "num_tokens": 2249163917.0, "step": 13412 }, { "entropy": 1.7586402297019958, "epoch": 1.4734832880173574, "grad_norm": 0.6557300686836243, "learning_rate": 5.0795678202703104e-06, "loss": 1.4752, "mean_token_accuracy": 0.6431727459033331, "num_tokens": 2249359791.0, "step": 13413 }, { "entropy": 1.7578627566496532, "epoch": 1.4735931449287303, "grad_norm": 0.7130923271179199, "learning_rate": 5.078361991589016e-06, "loss": 1.3846, "mean_token_accuracy": 0.6526891241470972, "num_tokens": 2249514293.0, "step": 13414 }, { "entropy": 1.7255384922027588, "epoch": 1.4737030018401032, "grad_norm": 0.6839941740036011, "learning_rate": 5.0771563503248944e-06, "loss": 1.3951, "mean_token_accuracy": 0.6575382997592291, "num_tokens": 2249650382.0, "step": 13415 }, { "entropy": 1.6533535917599995, "epoch": 1.4738128587514763, "grad_norm": 0.6555220484733582, "learning_rate": 5.075950896516107e-06, "loss": 1.2708, "mean_token_accuracy": 0.6787864863872528, "num_tokens": 2249800802.0, "step": 13416 }, { "entropy": 1.7047683497269948, "epoch": 1.4739227156628492, "grad_norm": 0.7065283060073853, "learning_rate": 5.074745630200806e-06, "loss": 1.3563, "mean_token_accuracy": 0.673611119389534, "num_tokens": 2249944260.0, "step": 13417 }, { "entropy": 1.7077392141024272, "epoch": 1.474032572574222, "grad_norm": 0.7069240212440491, "learning_rate": 5.073540551417131e-06, "loss": 1.2927, "mean_token_accuracy": 0.664387916525205, "num_tokens": 2250053815.0, "step": 13418 }, { "entropy": 1.675834854443868, "epoch": 1.474142429485595, "grad_norm": 0.6132168173789978, "learning_rate": 5.072335660203231e-06, "loss": 1.5704, "mean_token_accuracy": 0.6342288305362066, "num_tokens": 2250337503.0, "step": 13419 }, { "entropy": 1.7231556475162506, "epoch": 1.4742522863969678, "grad_norm": 0.8251075744628906, "learning_rate": 5.071130956597236e-06, "loss": 1.3726, "mean_token_accuracy": 0.6538362056016922, "num_tokens": 2250483346.0, "step": 13420 }, { "entropy": 1.7046967844168346, "epoch": 1.474362143308341, "grad_norm": 0.7439585328102112, "learning_rate": 5.069926440637272e-06, "loss": 1.4672, "mean_token_accuracy": 0.6513397047917048, "num_tokens": 2250658255.0, "step": 13421 }, { "entropy": 1.756682167450587, "epoch": 1.4744720002197138, "grad_norm": 0.7163110375404358, "learning_rate": 5.068722112361466e-06, "loss": 1.5754, "mean_token_accuracy": 0.6236685266097387, "num_tokens": 2250872770.0, "step": 13422 }, { "entropy": 1.7206588784853618, "epoch": 1.4745818571310867, "grad_norm": 0.6744549870491028, "learning_rate": 5.067517971807931e-06, "loss": 1.4174, "mean_token_accuracy": 0.6684413055578867, "num_tokens": 2251059371.0, "step": 13423 }, { "entropy": 1.6995096405347188, "epoch": 1.4746917140424598, "grad_norm": 0.8165592551231384, "learning_rate": 5.066314019014781e-06, "loss": 1.1724, "mean_token_accuracy": 0.6960208316644033, "num_tokens": 2251185914.0, "step": 13424 }, { "entropy": 1.6548355321089427, "epoch": 1.4748015709538327, "grad_norm": 0.6455921530723572, "learning_rate": 5.065110254020118e-06, "loss": 1.5029, "mean_token_accuracy": 0.6502973834673563, "num_tokens": 2251393117.0, "step": 13425 }, { "entropy": 1.6629098852475483, "epoch": 1.4749114278652056, "grad_norm": 0.8761639595031738, "learning_rate": 5.063906676862039e-06, "loss": 1.3805, "mean_token_accuracy": 0.6616864850123724, "num_tokens": 2251558769.0, "step": 13426 }, { "entropy": 1.7066173553466797, "epoch": 1.4750212847765785, "grad_norm": 0.7723012566566467, "learning_rate": 5.062703287578638e-06, "loss": 1.3899, "mean_token_accuracy": 0.65970512231191, "num_tokens": 2251741724.0, "step": 13427 }, { "entropy": 1.6936110059420268, "epoch": 1.4751311416879513, "grad_norm": 0.5787246823310852, "learning_rate": 5.061500086208007e-06, "loss": 1.3812, "mean_token_accuracy": 0.662451446056366, "num_tokens": 2251977302.0, "step": 13428 }, { "entropy": 1.721141795317332, "epoch": 1.4752409985993244, "grad_norm": 0.5804960131645203, "learning_rate": 5.060297072788221e-06, "loss": 1.5953, "mean_token_accuracy": 0.6185376693805059, "num_tokens": 2252212559.0, "step": 13429 }, { "entropy": 1.7097849945227306, "epoch": 1.4753508555106973, "grad_norm": 0.7100227475166321, "learning_rate": 5.059094247357354e-06, "loss": 1.4159, "mean_token_accuracy": 0.6612443824609121, "num_tokens": 2252360518.0, "step": 13430 }, { "entropy": 1.6556785504023235, "epoch": 1.4754607124220702, "grad_norm": 0.6368470788002014, "learning_rate": 5.05789160995348e-06, "loss": 1.5139, "mean_token_accuracy": 0.65046127140522, "num_tokens": 2252576094.0, "step": 13431 }, { "entropy": 1.7037680546442668, "epoch": 1.4755705693334433, "grad_norm": 0.7826297879219055, "learning_rate": 5.056689160614659e-06, "loss": 1.3208, "mean_token_accuracy": 0.6799842069546381, "num_tokens": 2252703056.0, "step": 13432 }, { "entropy": 1.7210968534151714, "epoch": 1.475680426244816, "grad_norm": 0.8060481548309326, "learning_rate": 5.055486899378944e-06, "loss": 1.419, "mean_token_accuracy": 0.6568311204512914, "num_tokens": 2252857460.0, "step": 13433 }, { "entropy": 1.677445928255717, "epoch": 1.475790283156189, "grad_norm": 0.5863375067710876, "learning_rate": 5.054284826284393e-06, "loss": 1.4257, "mean_token_accuracy": 0.6570474654436111, "num_tokens": 2253067045.0, "step": 13434 }, { "entropy": 1.7341270844141643, "epoch": 1.475900140067562, "grad_norm": 0.594789445400238, "learning_rate": 5.053082941369045e-06, "loss": 1.3536, "mean_token_accuracy": 0.6686640679836273, "num_tokens": 2253265121.0, "step": 13435 }, { "entropy": 1.7422856092453003, "epoch": 1.4760099969789349, "grad_norm": 0.7513555884361267, "learning_rate": 5.051881244670947e-06, "loss": 1.4024, "mean_token_accuracy": 0.6600150018930435, "num_tokens": 2253440176.0, "step": 13436 }, { "entropy": 1.6602600614229839, "epoch": 1.476119853890308, "grad_norm": 0.6317045092582703, "learning_rate": 5.050679736228125e-06, "loss": 1.5157, "mean_token_accuracy": 0.6511089901129404, "num_tokens": 2253643777.0, "step": 13437 }, { "entropy": 1.7466975152492523, "epoch": 1.4762297108016809, "grad_norm": 0.647018313407898, "learning_rate": 5.049478416078608e-06, "loss": 1.3602, "mean_token_accuracy": 0.6553884297609329, "num_tokens": 2253771315.0, "step": 13438 }, { "entropy": 1.682116021712621, "epoch": 1.4763395677130537, "grad_norm": 0.8301473259925842, "learning_rate": 5.048277284260416e-06, "loss": 1.3466, "mean_token_accuracy": 0.6662062009175619, "num_tokens": 2253950173.0, "step": 13439 }, { "entropy": 1.7308276693026226, "epoch": 1.4764494246244266, "grad_norm": 0.8368297219276428, "learning_rate": 5.047076340811569e-06, "loss": 1.3698, "mean_token_accuracy": 0.6646293699741364, "num_tokens": 2254076894.0, "step": 13440 }, { "entropy": 1.7010807593663533, "epoch": 1.4765592815357995, "grad_norm": 0.6222649216651917, "learning_rate": 5.0458755857700725e-06, "loss": 1.3895, "mean_token_accuracy": 0.6515608131885529, "num_tokens": 2254301385.0, "step": 13441 }, { "entropy": 1.6313113868236542, "epoch": 1.4766691384471726, "grad_norm": 0.6975681185722351, "learning_rate": 5.04467501917393e-06, "loss": 1.1905, "mean_token_accuracy": 0.6868863999843597, "num_tokens": 2254455186.0, "step": 13442 }, { "entropy": 1.6619862020015717, "epoch": 1.4767789953585455, "grad_norm": 0.5563586354255676, "learning_rate": 5.043474641061141e-06, "loss": 1.4175, "mean_token_accuracy": 0.6464668810367584, "num_tokens": 2254700519.0, "step": 13443 }, { "entropy": 1.6394928991794586, "epoch": 1.4768888522699184, "grad_norm": 0.6185762882232666, "learning_rate": 5.042274451469696e-06, "loss": 1.3622, "mean_token_accuracy": 0.6612973709901174, "num_tokens": 2254900856.0, "step": 13444 }, { "entropy": 1.7395354708035786, "epoch": 1.4769987091812915, "grad_norm": 0.7197256684303284, "learning_rate": 5.041074450437577e-06, "loss": 1.3647, "mean_token_accuracy": 0.6561250587304434, "num_tokens": 2255066819.0, "step": 13445 }, { "entropy": 1.7001774807771046, "epoch": 1.4771085660926644, "grad_norm": 0.753657341003418, "learning_rate": 5.039874638002771e-06, "loss": 1.3652, "mean_token_accuracy": 0.6524695505698522, "num_tokens": 2255201358.0, "step": 13446 }, { "entropy": 1.6709985435009003, "epoch": 1.4772184230040373, "grad_norm": 0.6185214519500732, "learning_rate": 5.038675014203243e-06, "loss": 1.1932, "mean_token_accuracy": 0.6734795669714609, "num_tokens": 2255383642.0, "step": 13447 }, { "entropy": 1.7464906374613445, "epoch": 1.4773282799154102, "grad_norm": 0.6656374931335449, "learning_rate": 5.037475579076966e-06, "loss": 1.3962, "mean_token_accuracy": 0.6644060959418615, "num_tokens": 2255534586.0, "step": 13448 }, { "entropy": 1.685450941324234, "epoch": 1.477438136826783, "grad_norm": 0.808623731136322, "learning_rate": 5.0362763326619e-06, "loss": 1.4986, "mean_token_accuracy": 0.6441106796264648, "num_tokens": 2255683821.0, "step": 13449 }, { "entropy": 1.7671352128187816, "epoch": 1.4775479937381562, "grad_norm": 0.7077687382698059, "learning_rate": 5.0350772749960004e-06, "loss": 1.1849, "mean_token_accuracy": 0.6855556517839432, "num_tokens": 2255778360.0, "step": 13450 }, { "entropy": 1.633953034877777, "epoch": 1.477657850649529, "grad_norm": 0.6457007527351379, "learning_rate": 5.033878406117215e-06, "loss": 1.6397, "mean_token_accuracy": 0.6252560267845789, "num_tokens": 2256008403.0, "step": 13451 }, { "entropy": 1.6935044626394908, "epoch": 1.477767707560902, "grad_norm": 0.7718963623046875, "learning_rate": 5.032679726063494e-06, "loss": 1.2658, "mean_token_accuracy": 0.6831946323315302, "num_tokens": 2256131286.0, "step": 13452 }, { "entropy": 1.6343218088150024, "epoch": 1.4778775644722748, "grad_norm": 0.6215181350708008, "learning_rate": 5.03148123487277e-06, "loss": 1.3287, "mean_token_accuracy": 0.6735940128564835, "num_tokens": 2256326690.0, "step": 13453 }, { "entropy": 1.7170305450757344, "epoch": 1.4779874213836477, "grad_norm": 0.6218499541282654, "learning_rate": 5.030282932582972e-06, "loss": 1.4629, "mean_token_accuracy": 0.6319693426291147, "num_tokens": 2256500661.0, "step": 13454 }, { "entropy": 1.6966708103815715, "epoch": 1.4780972782950208, "grad_norm": 0.7736793756484985, "learning_rate": 5.0290848192320344e-06, "loss": 1.465, "mean_token_accuracy": 0.6512386153141657, "num_tokens": 2256690681.0, "step": 13455 }, { "entropy": 1.6986188689867656, "epoch": 1.4782071352063937, "grad_norm": 0.8579681515693665, "learning_rate": 5.02788689485787e-06, "loss": 1.4793, "mean_token_accuracy": 0.6540632620453835, "num_tokens": 2256859278.0, "step": 13456 }, { "entropy": 1.7905776103337605, "epoch": 1.4783169921177666, "grad_norm": 0.6369954347610474, "learning_rate": 5.02668915949839e-06, "loss": 1.5357, "mean_token_accuracy": 0.651649167140325, "num_tokens": 2257015967.0, "step": 13457 }, { "entropy": 1.7184994022051494, "epoch": 1.4784268490291397, "grad_norm": 0.5851972103118896, "learning_rate": 5.025491613191511e-06, "loss": 1.4093, "mean_token_accuracy": 0.6455397953589758, "num_tokens": 2257178746.0, "step": 13458 }, { "entropy": 1.694368600845337, "epoch": 1.4785367059405126, "grad_norm": 0.7007179856300354, "learning_rate": 5.0242942559751275e-06, "loss": 1.3101, "mean_token_accuracy": 0.6777728994687399, "num_tokens": 2257309719.0, "step": 13459 }, { "entropy": 1.741744190454483, "epoch": 1.4786465628518854, "grad_norm": 0.6834388971328735, "learning_rate": 5.023097087887141e-06, "loss": 1.3221, "mean_token_accuracy": 0.6692759493986765, "num_tokens": 2257454575.0, "step": 13460 }, { "entropy": 1.7196588615576427, "epoch": 1.4787564197632583, "grad_norm": 0.7268269658088684, "learning_rate": 5.021900108965438e-06, "loss": 1.2486, "mean_token_accuracy": 0.6762056102355322, "num_tokens": 2257600219.0, "step": 13461 }, { "entropy": 1.722573568423589, "epoch": 1.4788662766746312, "grad_norm": 0.7268519997596741, "learning_rate": 5.0207033192479e-06, "loss": 1.6339, "mean_token_accuracy": 0.6259458661079407, "num_tokens": 2257841931.0, "step": 13462 }, { "entropy": 1.6968311369419098, "epoch": 1.4789761335860043, "grad_norm": 0.6865484714508057, "learning_rate": 5.019506718772407e-06, "loss": 1.3771, "mean_token_accuracy": 0.6622037986914316, "num_tokens": 2258003647.0, "step": 13463 }, { "entropy": 1.6797509094079335, "epoch": 1.4790859904973772, "grad_norm": 0.6517234444618225, "learning_rate": 5.018310307576835e-06, "loss": 1.2848, "mean_token_accuracy": 0.6743231564760208, "num_tokens": 2258187580.0, "step": 13464 }, { "entropy": 1.7106235921382904, "epoch": 1.47919584740875, "grad_norm": 0.6443414092063904, "learning_rate": 5.017114085699046e-06, "loss": 1.4221, "mean_token_accuracy": 0.6623661716779073, "num_tokens": 2258378553.0, "step": 13465 }, { "entropy": 1.715536544720332, "epoch": 1.479305704320123, "grad_norm": 0.7782425284385681, "learning_rate": 5.0159180531768985e-06, "loss": 1.3477, "mean_token_accuracy": 0.6619761238495508, "num_tokens": 2258508761.0, "step": 13466 }, { "entropy": 1.7270130614439647, "epoch": 1.4794155612314959, "grad_norm": 0.6732892990112305, "learning_rate": 5.014722210048251e-06, "loss": 1.3856, "mean_token_accuracy": 0.6509933620691299, "num_tokens": 2258685957.0, "step": 13467 }, { "entropy": 1.73613902926445, "epoch": 1.479525418142869, "grad_norm": 0.6732542514801025, "learning_rate": 5.0135265563509475e-06, "loss": 1.3947, "mean_token_accuracy": 0.6510529269774755, "num_tokens": 2258863473.0, "step": 13468 }, { "entropy": 1.7343490421772003, "epoch": 1.4796352750542419, "grad_norm": 0.7667641639709473, "learning_rate": 5.0123310921228265e-06, "loss": 1.2903, "mean_token_accuracy": 0.6681475838025411, "num_tokens": 2259013861.0, "step": 13469 }, { "entropy": 1.7270642916361492, "epoch": 1.4797451319656147, "grad_norm": 0.7105488777160645, "learning_rate": 5.011135817401733e-06, "loss": 1.2928, "mean_token_accuracy": 0.6620072424411774, "num_tokens": 2259178477.0, "step": 13470 }, { "entropy": 1.7137524485588074, "epoch": 1.4798549888769879, "grad_norm": 0.7805687785148621, "learning_rate": 5.009940732225489e-06, "loss": 1.3888, "mean_token_accuracy": 0.6767902622620264, "num_tokens": 2259334869.0, "step": 13471 }, { "entropy": 1.7165914575258892, "epoch": 1.4799648457883607, "grad_norm": 0.7019093632698059, "learning_rate": 5.008745836631925e-06, "loss": 1.4593, "mean_token_accuracy": 0.6598990907271703, "num_tokens": 2259474030.0, "step": 13472 }, { "entropy": 1.6991265912850697, "epoch": 1.4800747026997336, "grad_norm": 0.6628867387771606, "learning_rate": 5.007551130658857e-06, "loss": 1.5422, "mean_token_accuracy": 0.6464388569196066, "num_tokens": 2259650297.0, "step": 13473 }, { "entropy": 1.6515828371047974, "epoch": 1.4801845596111065, "grad_norm": 0.622098982334137, "learning_rate": 5.00635661434409e-06, "loss": 1.3175, "mean_token_accuracy": 0.6653083264827728, "num_tokens": 2259822214.0, "step": 13474 }, { "entropy": 1.6867812772591908, "epoch": 1.4802944165224794, "grad_norm": 0.6378466486930847, "learning_rate": 5.0051622877254355e-06, "loss": 1.4009, "mean_token_accuracy": 0.6632012327512106, "num_tokens": 2259989102.0, "step": 13475 }, { "entropy": 1.683827131986618, "epoch": 1.4804042734338525, "grad_norm": 0.6706616282463074, "learning_rate": 5.003968150840697e-06, "loss": 1.4421, "mean_token_accuracy": 0.6411708742380142, "num_tokens": 2260172218.0, "step": 13476 }, { "entropy": 1.7033619185288746, "epoch": 1.4805141303452254, "grad_norm": 0.6120789051055908, "learning_rate": 5.002774203727665e-06, "loss": 1.3766, "mean_token_accuracy": 0.6580955187479655, "num_tokens": 2260328696.0, "step": 13477 }, { "entropy": 1.6510994335015614, "epoch": 1.4806239872565983, "grad_norm": 0.6583324670791626, "learning_rate": 5.001580446424126e-06, "loss": 1.2989, "mean_token_accuracy": 0.6703228702147802, "num_tokens": 2260527309.0, "step": 13478 }, { "entropy": 1.7263270119826, "epoch": 1.4807338441679712, "grad_norm": 0.8199454545974731, "learning_rate": 5.00038687896786e-06, "loss": 1.5701, "mean_token_accuracy": 0.6611626545588175, "num_tokens": 2260654170.0, "step": 13479 }, { "entropy": 1.6092917223771412, "epoch": 1.480843701079344, "grad_norm": 0.649434506893158, "learning_rate": 4.999193501396651e-06, "loss": 1.3777, "mean_token_accuracy": 0.6647194971640905, "num_tokens": 2260850430.0, "step": 13480 }, { "entropy": 1.7251697679360707, "epoch": 1.4809535579907172, "grad_norm": 0.6333536505699158, "learning_rate": 4.998000313748261e-06, "loss": 1.36, "mean_token_accuracy": 0.6603184541066488, "num_tokens": 2260977793.0, "step": 13481 }, { "entropy": 1.757962852716446, "epoch": 1.48106341490209, "grad_norm": 0.8047628402709961, "learning_rate": 4.9968073160604545e-06, "loss": 1.3915, "mean_token_accuracy": 0.6611624906460444, "num_tokens": 2261094020.0, "step": 13482 }, { "entropy": 1.6559306979179382, "epoch": 1.481173271813463, "grad_norm": 0.6404219269752502, "learning_rate": 4.995614508370992e-06, "loss": 1.3355, "mean_token_accuracy": 0.6575894902149836, "num_tokens": 2261227135.0, "step": 13483 }, { "entropy": 1.7070954938729603, "epoch": 1.481283128724836, "grad_norm": 0.749505341053009, "learning_rate": 4.994421890717627e-06, "loss": 1.2544, "mean_token_accuracy": 0.6670717298984528, "num_tokens": 2261352833.0, "step": 13484 }, { "entropy": 1.694403092066447, "epoch": 1.481392985636209, "grad_norm": 0.7458823919296265, "learning_rate": 4.9932294631381025e-06, "loss": 1.273, "mean_token_accuracy": 0.6751887102921804, "num_tokens": 2261469789.0, "step": 13485 }, { "entropy": 1.767996261517207, "epoch": 1.4815028425475818, "grad_norm": 0.7070764899253845, "learning_rate": 4.992037225670156e-06, "loss": 1.2761, "mean_token_accuracy": 0.6802639961242676, "num_tokens": 2261593206.0, "step": 13486 }, { "entropy": 1.6416256129741669, "epoch": 1.4816126994589547, "grad_norm": 0.6489446759223938, "learning_rate": 4.990845178351528e-06, "loss": 1.3192, "mean_token_accuracy": 0.6602905988693237, "num_tokens": 2261754373.0, "step": 13487 }, { "entropy": 1.7270208696524303, "epoch": 1.4817225563703276, "grad_norm": 0.5677620768547058, "learning_rate": 4.989653321219938e-06, "loss": 1.4977, "mean_token_accuracy": 0.65739672879378, "num_tokens": 2261959159.0, "step": 13488 }, { "entropy": 1.6949149171511333, "epoch": 1.4818324132817007, "grad_norm": 0.6971380114555359, "learning_rate": 4.988461654313116e-06, "loss": 1.3033, "mean_token_accuracy": 0.6612110733985901, "num_tokens": 2262083678.0, "step": 13489 }, { "entropy": 1.7387464841206868, "epoch": 1.4819422701930736, "grad_norm": 0.7100080251693726, "learning_rate": 4.987270177668773e-06, "loss": 1.2981, "mean_token_accuracy": 0.6643347293138504, "num_tokens": 2262239288.0, "step": 13490 }, { "entropy": 1.6716001530488331, "epoch": 1.4820521271044464, "grad_norm": 0.5574256181716919, "learning_rate": 4.986078891324617e-06, "loss": 1.4879, "mean_token_accuracy": 0.6372141987085342, "num_tokens": 2262514560.0, "step": 13491 }, { "entropy": 1.6284152368704479, "epoch": 1.4821619840158193, "grad_norm": 0.598142683506012, "learning_rate": 4.9848877953183575e-06, "loss": 1.3574, "mean_token_accuracy": 0.6729972014824549, "num_tokens": 2262727556.0, "step": 13492 }, { "entropy": 1.7167203028996785, "epoch": 1.4822718409271922, "grad_norm": 0.6333439946174622, "learning_rate": 4.9836968896876885e-06, "loss": 1.4435, "mean_token_accuracy": 0.6489839653174082, "num_tokens": 2262893662.0, "step": 13493 }, { "entropy": 1.72333358724912, "epoch": 1.4823816978385653, "grad_norm": 0.7966588139533997, "learning_rate": 4.982506174470299e-06, "loss": 1.6081, "mean_token_accuracy": 0.6349592606226603, "num_tokens": 2263116005.0, "step": 13494 }, { "entropy": 1.673676609992981, "epoch": 1.4824915547499382, "grad_norm": 0.6574962139129639, "learning_rate": 4.981315649703877e-06, "loss": 1.4094, "mean_token_accuracy": 0.6473542600870132, "num_tokens": 2263312879.0, "step": 13495 }, { "entropy": 1.6873856385548909, "epoch": 1.482601411661311, "grad_norm": 0.7907306551933289, "learning_rate": 4.980125315426106e-06, "loss": 1.4349, "mean_token_accuracy": 0.6571491559346517, "num_tokens": 2263522149.0, "step": 13496 }, { "entropy": 1.6957029402256012, "epoch": 1.4827112685726842, "grad_norm": 0.6600527763366699, "learning_rate": 4.9789351716746555e-06, "loss": 1.3383, "mean_token_accuracy": 0.6599696377913157, "num_tokens": 2263680544.0, "step": 13497 }, { "entropy": 1.716781238714854, "epoch": 1.482821125484057, "grad_norm": 0.7186543345451355, "learning_rate": 4.9777452184871915e-06, "loss": 1.299, "mean_token_accuracy": 0.6676995903253555, "num_tokens": 2263800785.0, "step": 13498 }, { "entropy": 1.7211474776268005, "epoch": 1.48293098239543, "grad_norm": 0.719422459602356, "learning_rate": 4.97655545590138e-06, "loss": 1.4209, "mean_token_accuracy": 0.6576556066672007, "num_tokens": 2263984167.0, "step": 13499 }, { "entropy": 1.7104551792144775, "epoch": 1.4830408393068029, "grad_norm": 0.7389053702354431, "learning_rate": 4.9753658839548745e-06, "loss": 1.4894, "mean_token_accuracy": 0.6423831830422083, "num_tokens": 2264212746.0, "step": 13500 }, { "entropy": 1.7473791042963664, "epoch": 1.4831506962181757, "grad_norm": 0.8776116967201233, "learning_rate": 4.97417650268532e-06, "loss": 1.3598, "mean_token_accuracy": 0.6644879480202993, "num_tokens": 2264366316.0, "step": 13501 }, { "entropy": 1.7517230312029521, "epoch": 1.4832605531295489, "grad_norm": 0.6201359033584595, "learning_rate": 4.972987312130369e-06, "loss": 1.4633, "mean_token_accuracy": 0.6426295389731725, "num_tokens": 2264580750.0, "step": 13502 }, { "entropy": 1.7220464249451954, "epoch": 1.4833704100409217, "grad_norm": 0.6405351758003235, "learning_rate": 4.97179831232765e-06, "loss": 1.4245, "mean_token_accuracy": 0.6534823377927145, "num_tokens": 2264733000.0, "step": 13503 }, { "entropy": 1.69864288965861, "epoch": 1.4834802669522946, "grad_norm": 0.624673068523407, "learning_rate": 4.9706095033148e-06, "loss": 1.3807, "mean_token_accuracy": 0.6559500147898992, "num_tokens": 2264898277.0, "step": 13504 }, { "entropy": 1.7481873134771984, "epoch": 1.4835901238636675, "grad_norm": 0.7108025550842285, "learning_rate": 4.969420885129443e-06, "loss": 1.4226, "mean_token_accuracy": 0.6583486298720042, "num_tokens": 2265061391.0, "step": 13505 }, { "entropy": 1.65755029519399, "epoch": 1.4836999807750404, "grad_norm": 0.6649439930915833, "learning_rate": 4.968232457809195e-06, "loss": 1.3684, "mean_token_accuracy": 0.6610806783040365, "num_tokens": 2265255559.0, "step": 13506 }, { "entropy": 1.7579138378302257, "epoch": 1.4838098376864135, "grad_norm": 0.686124324798584, "learning_rate": 4.967044221391671e-06, "loss": 1.438, "mean_token_accuracy": 0.6544395188490549, "num_tokens": 2265466450.0, "step": 13507 }, { "entropy": 1.743057797352473, "epoch": 1.4839196945977864, "grad_norm": 0.8767577409744263, "learning_rate": 4.9658561759144815e-06, "loss": 1.3654, "mean_token_accuracy": 0.65755066772302, "num_tokens": 2265596110.0, "step": 13508 }, { "entropy": 1.6500622431437175, "epoch": 1.4840295515091593, "grad_norm": 0.6735820174217224, "learning_rate": 4.964668321415226e-06, "loss": 1.2918, "mean_token_accuracy": 0.6772434115409851, "num_tokens": 2265768382.0, "step": 13509 }, { "entropy": 1.631582687298457, "epoch": 1.4841394084205324, "grad_norm": 0.6203559637069702, "learning_rate": 4.963480657931496e-06, "loss": 1.4507, "mean_token_accuracy": 0.6736873388290405, "num_tokens": 2265997734.0, "step": 13510 }, { "entropy": 1.7016975184281666, "epoch": 1.4842492653319053, "grad_norm": 0.8607683777809143, "learning_rate": 4.9622931855008845e-06, "loss": 1.2962, "mean_token_accuracy": 0.6669759303331375, "num_tokens": 2266140837.0, "step": 13511 }, { "entropy": 1.7488416135311127, "epoch": 1.4843591222432782, "grad_norm": 0.70814049243927, "learning_rate": 4.961105904160974e-06, "loss": 1.4504, "mean_token_accuracy": 0.6467891732851664, "num_tokens": 2266334509.0, "step": 13512 }, { "entropy": 1.6617756883303325, "epoch": 1.484468979154651, "grad_norm": 0.6905792355537415, "learning_rate": 4.959918813949338e-06, "loss": 1.4075, "mean_token_accuracy": 0.6694660286108652, "num_tokens": 2266497729.0, "step": 13513 }, { "entropy": 1.7315144042174022, "epoch": 1.484578836066024, "grad_norm": 0.6920166015625, "learning_rate": 4.958731914903551e-06, "loss": 1.302, "mean_token_accuracy": 0.6789048910140991, "num_tokens": 2266671543.0, "step": 13514 }, { "entropy": 1.6354870001475017, "epoch": 1.484688692977397, "grad_norm": 0.6212617754936218, "learning_rate": 4.957545207061175e-06, "loss": 1.3765, "mean_token_accuracy": 0.6648024767637253, "num_tokens": 2266861233.0, "step": 13515 }, { "entropy": 1.6495544612407684, "epoch": 1.48479854988877, "grad_norm": 0.7335532307624817, "learning_rate": 4.956358690459772e-06, "loss": 1.3946, "mean_token_accuracy": 0.6647347460190455, "num_tokens": 2267046550.0, "step": 13516 }, { "entropy": 1.704295853773753, "epoch": 1.4849084068001428, "grad_norm": 0.6387439966201782, "learning_rate": 4.955172365136894e-06, "loss": 1.4945, "mean_token_accuracy": 0.6511774758497874, "num_tokens": 2267211866.0, "step": 13517 }, { "entropy": 1.6961783468723297, "epoch": 1.4850182637115157, "grad_norm": 0.6863455176353455, "learning_rate": 4.953986231130084e-06, "loss": 1.2193, "mean_token_accuracy": 0.6830638696750005, "num_tokens": 2267318169.0, "step": 13518 }, { "entropy": 1.7797902425130208, "epoch": 1.4851281206228886, "grad_norm": 0.7058371305465698, "learning_rate": 4.952800288476886e-06, "loss": 1.5065, "mean_token_accuracy": 0.6423899084329605, "num_tokens": 2267468527.0, "step": 13519 }, { "entropy": 1.7239426573117573, "epoch": 1.4852379775342617, "grad_norm": 0.6466138958930969, "learning_rate": 4.951614537214837e-06, "loss": 1.4439, "mean_token_accuracy": 0.6584398398796717, "num_tokens": 2267672535.0, "step": 13520 }, { "entropy": 1.761413335800171, "epoch": 1.4853478344456346, "grad_norm": 0.7396242022514343, "learning_rate": 4.950428977381461e-06, "loss": 1.5382, "mean_token_accuracy": 0.645782599846522, "num_tokens": 2267851281.0, "step": 13521 }, { "entropy": 1.7479993800322216, "epoch": 1.4854576913570074, "grad_norm": 0.6907133460044861, "learning_rate": 4.94924360901428e-06, "loss": 1.5213, "mean_token_accuracy": 0.6497216572364172, "num_tokens": 2268048660.0, "step": 13522 }, { "entropy": 1.6639278034369152, "epoch": 1.4855675482683806, "grad_norm": 0.6215951442718506, "learning_rate": 4.948058432150814e-06, "loss": 1.4487, "mean_token_accuracy": 0.6463829030593237, "num_tokens": 2268254892.0, "step": 13523 }, { "entropy": 1.6794349352518718, "epoch": 1.4856774051797534, "grad_norm": 0.7702376842498779, "learning_rate": 4.946873446828572e-06, "loss": 1.4576, "mean_token_accuracy": 0.6624968846638998, "num_tokens": 2268433753.0, "step": 13524 }, { "entropy": 1.6901362140973408, "epoch": 1.4857872620911263, "grad_norm": 0.6038414239883423, "learning_rate": 4.945688653085055e-06, "loss": 1.4401, "mean_token_accuracy": 0.657305101553599, "num_tokens": 2268603211.0, "step": 13525 }, { "entropy": 1.7209720313549042, "epoch": 1.4858971190024992, "grad_norm": 0.6884058117866516, "learning_rate": 4.944504050957767e-06, "loss": 1.3502, "mean_token_accuracy": 0.6485557009776434, "num_tokens": 2268803883.0, "step": 13526 }, { "entropy": 1.6918534139792125, "epoch": 1.486006975913872, "grad_norm": 0.6835710406303406, "learning_rate": 4.943319640484195e-06, "loss": 1.4551, "mean_token_accuracy": 0.6516717125972112, "num_tokens": 2268996894.0, "step": 13527 }, { "entropy": 1.7772463758786519, "epoch": 1.4861168328252452, "grad_norm": 0.7287479639053345, "learning_rate": 4.942135421701829e-06, "loss": 1.5589, "mean_token_accuracy": 0.6285470475753149, "num_tokens": 2269206030.0, "step": 13528 }, { "entropy": 1.7186577022075653, "epoch": 1.486226689736618, "grad_norm": 0.6499691009521484, "learning_rate": 4.940951394648148e-06, "loss": 1.4773, "mean_token_accuracy": 0.6425957729419073, "num_tokens": 2269406402.0, "step": 13529 }, { "entropy": 1.7541224757830303, "epoch": 1.486336546647991, "grad_norm": 0.6786013841629028, "learning_rate": 4.939767559360621e-06, "loss": 1.4135, "mean_token_accuracy": 0.6587019910415014, "num_tokens": 2269547356.0, "step": 13530 }, { "entropy": 1.650489757458369, "epoch": 1.4864464035593639, "grad_norm": 0.7876176238059998, "learning_rate": 4.938583915876721e-06, "loss": 1.3363, "mean_token_accuracy": 0.663826659321785, "num_tokens": 2269723665.0, "step": 13531 }, { "entropy": 1.7275867958863576, "epoch": 1.4865562604707367, "grad_norm": 0.809974193572998, "learning_rate": 4.937400464233911e-06, "loss": 1.4697, "mean_token_accuracy": 0.6594187666972479, "num_tokens": 2269860339.0, "step": 13532 }, { "entropy": 1.6629600922266643, "epoch": 1.4866661173821099, "grad_norm": 0.7391853332519531, "learning_rate": 4.936217204469645e-06, "loss": 1.3979, "mean_token_accuracy": 0.6491153140862783, "num_tokens": 2270029166.0, "step": 13533 }, { "entropy": 1.6912591656049092, "epoch": 1.4867759742934827, "grad_norm": 0.6969987750053406, "learning_rate": 4.9350341366213685e-06, "loss": 1.3193, "mean_token_accuracy": 0.6674131552378336, "num_tokens": 2270166752.0, "step": 13534 }, { "entropy": 1.6451220214366913, "epoch": 1.4868858312048556, "grad_norm": 0.6137616634368896, "learning_rate": 4.9338512607265325e-06, "loss": 1.235, "mean_token_accuracy": 0.6780912727117538, "num_tokens": 2270320590.0, "step": 13535 }, { "entropy": 1.7086876134077709, "epoch": 1.4869956881162287, "grad_norm": 0.6514938473701477, "learning_rate": 4.9326685768225695e-06, "loss": 1.5391, "mean_token_accuracy": 0.6244403074185053, "num_tokens": 2270503385.0, "step": 13536 }, { "entropy": 1.6874873240788777, "epoch": 1.4871055450276016, "grad_norm": 0.6039011478424072, "learning_rate": 4.9314860849469134e-06, "loss": 1.4688, "mean_token_accuracy": 0.6564833472172419, "num_tokens": 2270710407.0, "step": 13537 }, { "entropy": 1.6665807565053303, "epoch": 1.4872154019389745, "grad_norm": 0.7873986959457397, "learning_rate": 4.9303037851369836e-06, "loss": 1.3184, "mean_token_accuracy": 0.6774963239828745, "num_tokens": 2270843801.0, "step": 13538 }, { "entropy": 1.700388679901759, "epoch": 1.4873252588503474, "grad_norm": 0.627056896686554, "learning_rate": 4.929121677430204e-06, "loss": 1.5074, "mean_token_accuracy": 0.6504200547933578, "num_tokens": 2271024231.0, "step": 13539 }, { "entropy": 1.6944973468780518, "epoch": 1.4874351157617203, "grad_norm": 0.6724776029586792, "learning_rate": 4.927939761863993e-06, "loss": 1.347, "mean_token_accuracy": 0.6622181783119837, "num_tokens": 2271156544.0, "step": 13540 }, { "entropy": 1.6311760048071544, "epoch": 1.4875449726730934, "grad_norm": 0.6557187438011169, "learning_rate": 4.926758038475751e-06, "loss": 1.4786, "mean_token_accuracy": 0.6390694926182429, "num_tokens": 2271409977.0, "step": 13541 }, { "entropy": 1.7377532819906871, "epoch": 1.4876548295844663, "grad_norm": 0.7433091998100281, "learning_rate": 4.9255765073028764e-06, "loss": 1.4261, "mean_token_accuracy": 0.6504714637994766, "num_tokens": 2271551261.0, "step": 13542 }, { "entropy": 1.6590198477109273, "epoch": 1.4877646864958392, "grad_norm": 0.6879268288612366, "learning_rate": 4.924395168382772e-06, "loss": 1.3696, "mean_token_accuracy": 0.6614403426647186, "num_tokens": 2271705504.0, "step": 13543 }, { "entropy": 1.716480682293574, "epoch": 1.487874543407212, "grad_norm": 0.7290632128715515, "learning_rate": 4.9232140217528205e-06, "loss": 1.4055, "mean_token_accuracy": 0.6647356102863947, "num_tokens": 2271842583.0, "step": 13544 }, { "entropy": 1.7306012709935505, "epoch": 1.487984400318585, "grad_norm": 0.7125338912010193, "learning_rate": 4.922033067450408e-06, "loss": 1.3186, "mean_token_accuracy": 0.6647218614816666, "num_tokens": 2271986650.0, "step": 13545 }, { "entropy": 1.6511406401793163, "epoch": 1.488094257229958, "grad_norm": 0.6755971908569336, "learning_rate": 4.920852305512911e-06, "loss": 1.2941, "mean_token_accuracy": 0.677680104970932, "num_tokens": 2272131530.0, "step": 13546 }, { "entropy": 1.6403106550375621, "epoch": 1.488204114141331, "grad_norm": 0.6104917526245117, "learning_rate": 4.919671735977698e-06, "loss": 1.2273, "mean_token_accuracy": 0.677567387620608, "num_tokens": 2272258498.0, "step": 13547 }, { "entropy": 1.6783630152543385, "epoch": 1.4883139710527038, "grad_norm": 0.7859485745429993, "learning_rate": 4.9184913588821355e-06, "loss": 1.3253, "mean_token_accuracy": 0.6634241938591003, "num_tokens": 2272400247.0, "step": 13548 }, { "entropy": 1.6718779901663463, "epoch": 1.488423827964077, "grad_norm": 0.6356991529464722, "learning_rate": 4.917311174263582e-06, "loss": 1.3806, "mean_token_accuracy": 0.6621431310971578, "num_tokens": 2272578243.0, "step": 13549 }, { "entropy": 1.6918590764204662, "epoch": 1.4885336848754498, "grad_norm": 0.6709624528884888, "learning_rate": 4.916131182159385e-06, "loss": 1.4217, "mean_token_accuracy": 0.6508203744888306, "num_tokens": 2272739851.0, "step": 13550 }, { "entropy": 1.6766121685504913, "epoch": 1.4886435417868227, "grad_norm": 0.6813299655914307, "learning_rate": 4.914951382606896e-06, "loss": 1.4075, "mean_token_accuracy": 0.6498910933732986, "num_tokens": 2272979858.0, "step": 13551 }, { "entropy": 1.6789940396944683, "epoch": 1.4887533986981956, "grad_norm": 0.7328557968139648, "learning_rate": 4.913771775643456e-06, "loss": 1.3464, "mean_token_accuracy": 0.6630066633224487, "num_tokens": 2273127557.0, "step": 13552 }, { "entropy": 1.7166900932788849, "epoch": 1.4888632556095684, "grad_norm": 0.7445768117904663, "learning_rate": 4.912592361306397e-06, "loss": 1.4081, "mean_token_accuracy": 0.6634646505117416, "num_tokens": 2273281855.0, "step": 13553 }, { "entropy": 1.7078879574934642, "epoch": 1.4889731125209416, "grad_norm": 0.7372060418128967, "learning_rate": 4.911413139633044e-06, "loss": 1.5168, "mean_token_accuracy": 0.6509969532489777, "num_tokens": 2273451264.0, "step": 13554 }, { "entropy": 1.650009383757909, "epoch": 1.4890829694323144, "grad_norm": 0.5854784250259399, "learning_rate": 4.910234110660724e-06, "loss": 1.3294, "mean_token_accuracy": 0.6673146585623423, "num_tokens": 2273598602.0, "step": 13555 }, { "entropy": 1.6960578461488087, "epoch": 1.4891928263436873, "grad_norm": 0.7097931504249573, "learning_rate": 4.909055274426747e-06, "loss": 1.3523, "mean_token_accuracy": 0.6700321088234583, "num_tokens": 2273761448.0, "step": 13556 }, { "entropy": 1.739314426978429, "epoch": 1.4893026832550602, "grad_norm": 0.7183867692947388, "learning_rate": 4.907876630968429e-06, "loss": 1.4509, "mean_token_accuracy": 0.651702399055163, "num_tokens": 2273921418.0, "step": 13557 }, { "entropy": 1.7305655578772228, "epoch": 1.489412540166433, "grad_norm": 0.6984145045280457, "learning_rate": 4.906698180323072e-06, "loss": 1.3913, "mean_token_accuracy": 0.668786495923996, "num_tokens": 2274158793.0, "step": 13558 }, { "entropy": 1.7573081453641255, "epoch": 1.4895223970778062, "grad_norm": 0.7101246118545532, "learning_rate": 4.9055199225279674e-06, "loss": 1.4446, "mean_token_accuracy": 0.6512223184108734, "num_tokens": 2274343919.0, "step": 13559 }, { "entropy": 1.7201645970344543, "epoch": 1.489632253989179, "grad_norm": 0.7223523855209351, "learning_rate": 4.904341857620415e-06, "loss": 1.3581, "mean_token_accuracy": 0.6549399097760519, "num_tokens": 2274495539.0, "step": 13560 }, { "entropy": 1.7697237531344097, "epoch": 1.489742110900552, "grad_norm": 0.7736477255821228, "learning_rate": 4.903163985637695e-06, "loss": 1.3867, "mean_token_accuracy": 0.6666910151640574, "num_tokens": 2274674058.0, "step": 13561 }, { "entropy": 1.7430163423220317, "epoch": 1.489851967811925, "grad_norm": 0.6027993559837341, "learning_rate": 4.901986306617085e-06, "loss": 1.4971, "mean_token_accuracy": 0.6381118098894755, "num_tokens": 2274879632.0, "step": 13562 }, { "entropy": 1.677459180355072, "epoch": 1.489961824723298, "grad_norm": 0.8948626518249512, "learning_rate": 4.9008088205958605e-06, "loss": 1.2984, "mean_token_accuracy": 0.6767031649748484, "num_tokens": 2275027702.0, "step": 13563 }, { "entropy": 1.7206537226835887, "epoch": 1.4900716816346709, "grad_norm": 0.6878028512001038, "learning_rate": 4.89963152761129e-06, "loss": 1.4045, "mean_token_accuracy": 0.6523149311542511, "num_tokens": 2275187790.0, "step": 13564 }, { "entropy": 1.6853440205256145, "epoch": 1.4901815385460437, "grad_norm": 0.6887045502662659, "learning_rate": 4.898454427700635e-06, "loss": 1.3973, "mean_token_accuracy": 0.6593828996022543, "num_tokens": 2275344735.0, "step": 13565 }, { "entropy": 1.708954284588496, "epoch": 1.4902913954574166, "grad_norm": 0.7596457600593567, "learning_rate": 4.897277520901144e-06, "loss": 1.4333, "mean_token_accuracy": 0.6570628980795542, "num_tokens": 2275476549.0, "step": 13566 }, { "entropy": 1.6877670685450237, "epoch": 1.4904012523687897, "grad_norm": 0.7278691530227661, "learning_rate": 4.896100807250073e-06, "loss": 1.3566, "mean_token_accuracy": 0.663535346587499, "num_tokens": 2275637937.0, "step": 13567 }, { "entropy": 1.6145448486010234, "epoch": 1.4905111092801626, "grad_norm": 0.6225070953369141, "learning_rate": 4.894924286784657e-06, "loss": 1.1552, "mean_token_accuracy": 0.6904433170954386, "num_tokens": 2275760835.0, "step": 13568 }, { "entropy": 1.717700292666753, "epoch": 1.4906209661915355, "grad_norm": 0.6022528409957886, "learning_rate": 4.89374795954214e-06, "loss": 1.3925, "mean_token_accuracy": 0.664552241563797, "num_tokens": 2275936821.0, "step": 13569 }, { "entropy": 1.7442820469538372, "epoch": 1.4907308231029084, "grad_norm": 0.708743691444397, "learning_rate": 4.892571825559749e-06, "loss": 1.3119, "mean_token_accuracy": 0.6691465228796005, "num_tokens": 2276075486.0, "step": 13570 }, { "entropy": 1.7310235400994618, "epoch": 1.4908406800142813, "grad_norm": 0.6880050301551819, "learning_rate": 4.891395884874705e-06, "loss": 1.4393, "mean_token_accuracy": 0.6460549732049307, "num_tokens": 2276292408.0, "step": 13571 }, { "entropy": 1.6751873592535655, "epoch": 1.4909505369256544, "grad_norm": 0.6142544746398926, "learning_rate": 4.890220137524229e-06, "loss": 1.3366, "mean_token_accuracy": 0.6657705307006836, "num_tokens": 2276454867.0, "step": 13572 }, { "entropy": 1.6691831449667613, "epoch": 1.4910603938370273, "grad_norm": 0.7860159873962402, "learning_rate": 4.889044583545535e-06, "loss": 1.4698, "mean_token_accuracy": 0.6658653269211451, "num_tokens": 2276631139.0, "step": 13573 }, { "entropy": 1.6530809303124745, "epoch": 1.4911702507484002, "grad_norm": 0.6658622622489929, "learning_rate": 4.887869222975823e-06, "loss": 1.3893, "mean_token_accuracy": 0.6663869768381119, "num_tokens": 2276811087.0, "step": 13574 }, { "entropy": 1.6668502887090046, "epoch": 1.4912801076597733, "grad_norm": 0.7318044900894165, "learning_rate": 4.886694055852295e-06, "loss": 1.2348, "mean_token_accuracy": 0.6809373696645101, "num_tokens": 2276936016.0, "step": 13575 }, { "entropy": 1.7107643485069275, "epoch": 1.4913899645711461, "grad_norm": 0.618091881275177, "learning_rate": 4.885519082212148e-06, "loss": 1.4674, "mean_token_accuracy": 0.6436606744925181, "num_tokens": 2277114355.0, "step": 13576 }, { "entropy": 1.716734250386556, "epoch": 1.491499821482519, "grad_norm": 0.6230522990226746, "learning_rate": 4.884344302092569e-06, "loss": 1.4011, "mean_token_accuracy": 0.6655239959557852, "num_tokens": 2277275543.0, "step": 13577 }, { "entropy": 1.710130383570989, "epoch": 1.491609678393892, "grad_norm": 0.6273171305656433, "learning_rate": 4.883169715530732e-06, "loss": 1.4362, "mean_token_accuracy": 0.638149564464887, "num_tokens": 2277452793.0, "step": 13578 }, { "entropy": 1.7487995425860088, "epoch": 1.4917195353052648, "grad_norm": 0.7999676465988159, "learning_rate": 4.881995322563821e-06, "loss": 1.2994, "mean_token_accuracy": 0.671253576874733, "num_tokens": 2277600260.0, "step": 13579 }, { "entropy": 1.6912379463513691, "epoch": 1.491829392216638, "grad_norm": 0.6340394616127014, "learning_rate": 4.880821123229002e-06, "loss": 1.295, "mean_token_accuracy": 0.6654437184333801, "num_tokens": 2277753911.0, "step": 13580 }, { "entropy": 1.6957992414633434, "epoch": 1.4919392491280108, "grad_norm": 0.778068482875824, "learning_rate": 4.879647117563432e-06, "loss": 1.4732, "mean_token_accuracy": 0.6568873922030131, "num_tokens": 2277934943.0, "step": 13581 }, { "entropy": 1.6554166972637177, "epoch": 1.4920491060393837, "grad_norm": 0.6689861416816711, "learning_rate": 4.8784733056042775e-06, "loss": 1.2573, "mean_token_accuracy": 0.67509492735068, "num_tokens": 2278061278.0, "step": 13582 }, { "entropy": 1.754564344882965, "epoch": 1.4921589629507566, "grad_norm": 0.7395191788673401, "learning_rate": 4.877299687388681e-06, "loss": 1.3777, "mean_token_accuracy": 0.6584471513827642, "num_tokens": 2278186060.0, "step": 13583 }, { "entropy": 1.7302316029866536, "epoch": 1.4922688198621294, "grad_norm": 0.6741985082626343, "learning_rate": 4.876126262953793e-06, "loss": 1.3478, "mean_token_accuracy": 0.6610042850176493, "num_tokens": 2278357055.0, "step": 13584 }, { "entropy": 1.6924872994422913, "epoch": 1.4923786767735026, "grad_norm": 1.0467135906219482, "learning_rate": 4.87495303233675e-06, "loss": 1.2743, "mean_token_accuracy": 0.6736592451731364, "num_tokens": 2278554005.0, "step": 13585 }, { "entropy": 1.6124165058135986, "epoch": 1.4924885336848754, "grad_norm": 0.6575326919555664, "learning_rate": 4.87377999557468e-06, "loss": 1.3924, "mean_token_accuracy": 0.6724445472160975, "num_tokens": 2278713474.0, "step": 13586 }, { "entropy": 1.7254461348056793, "epoch": 1.4925983905962483, "grad_norm": 0.6653977632522583, "learning_rate": 4.872607152704713e-06, "loss": 1.3827, "mean_token_accuracy": 0.6479282975196838, "num_tokens": 2278875327.0, "step": 13587 }, { "entropy": 1.7107085188229878, "epoch": 1.4927082475076214, "grad_norm": 0.7601565718650818, "learning_rate": 4.871434503763971e-06, "loss": 1.2622, "mean_token_accuracy": 0.6768785417079926, "num_tokens": 2278986177.0, "step": 13588 }, { "entropy": 1.7034134566783905, "epoch": 1.4928181044189943, "grad_norm": 0.8460795879364014, "learning_rate": 4.870262048789566e-06, "loss": 1.3049, "mean_token_accuracy": 0.6756768574317297, "num_tokens": 2279104088.0, "step": 13589 }, { "entropy": 1.7182818551858265, "epoch": 1.4929279613303672, "grad_norm": 0.6793023347854614, "learning_rate": 4.869089787818602e-06, "loss": 1.2894, "mean_token_accuracy": 0.666498064994812, "num_tokens": 2279231469.0, "step": 13590 }, { "entropy": 1.6521180470784504, "epoch": 1.49303781824174, "grad_norm": 0.7240891456604004, "learning_rate": 4.8679177208881855e-06, "loss": 1.4182, "mean_token_accuracy": 0.6724280714988708, "num_tokens": 2279385523.0, "step": 13591 }, { "entropy": 1.7422040899594624, "epoch": 1.493147675153113, "grad_norm": 0.623020589351654, "learning_rate": 4.866745848035412e-06, "loss": 1.3963, "mean_token_accuracy": 0.6499710033337275, "num_tokens": 2279561897.0, "step": 13592 }, { "entropy": 1.6989782949288685, "epoch": 1.493257532064486, "grad_norm": 0.6899892091751099, "learning_rate": 4.865574169297364e-06, "loss": 1.5596, "mean_token_accuracy": 0.6443688968817393, "num_tokens": 2279737279.0, "step": 13593 }, { "entropy": 1.6587830980618794, "epoch": 1.493367388975859, "grad_norm": 0.7197852730751038, "learning_rate": 4.864402684711133e-06, "loss": 1.5172, "mean_token_accuracy": 0.6575819750626882, "num_tokens": 2279901224.0, "step": 13594 }, { "entropy": 1.7458328505357106, "epoch": 1.4934772458872319, "grad_norm": 0.7012457847595215, "learning_rate": 4.863231394313789e-06, "loss": 1.4828, "mean_token_accuracy": 0.6473122785488764, "num_tokens": 2280096468.0, "step": 13595 }, { "entropy": 1.700047234694163, "epoch": 1.4935871027986047, "grad_norm": 0.705938994884491, "learning_rate": 4.8620602981424085e-06, "loss": 1.364, "mean_token_accuracy": 0.6618212511142095, "num_tokens": 2280279827.0, "step": 13596 }, { "entropy": 1.7099564174811046, "epoch": 1.4936969597099776, "grad_norm": 0.8156763315200806, "learning_rate": 4.860889396234055e-06, "loss": 1.3052, "mean_token_accuracy": 0.6710335661967596, "num_tokens": 2280416224.0, "step": 13597 }, { "entropy": 1.7591630319754283, "epoch": 1.4938068166213507, "grad_norm": 0.7641903162002563, "learning_rate": 4.859718688625782e-06, "loss": 1.3536, "mean_token_accuracy": 0.6579357037941614, "num_tokens": 2280551404.0, "step": 13598 }, { "entropy": 1.684128353993098, "epoch": 1.4939166735327236, "grad_norm": 0.6860581040382385, "learning_rate": 4.8585481753546486e-06, "loss": 1.3339, "mean_token_accuracy": 0.6609357595443726, "num_tokens": 2280670594.0, "step": 13599 }, { "entropy": 1.7321399648984273, "epoch": 1.4940265304440965, "grad_norm": 0.7905336618423462, "learning_rate": 4.8573778564576955e-06, "loss": 1.362, "mean_token_accuracy": 0.6644478042920431, "num_tokens": 2280802125.0, "step": 13600 }, { "entropy": 1.7287112375100453, "epoch": 1.4941363873554696, "grad_norm": 0.842991054058075, "learning_rate": 4.856207731971968e-06, "loss": 1.2462, "mean_token_accuracy": 0.6818788150946299, "num_tokens": 2280911360.0, "step": 13601 }, { "entropy": 1.7669661144415538, "epoch": 1.4942462442668425, "grad_norm": 0.6029599905014038, "learning_rate": 4.855037801934497e-06, "loss": 1.5295, "mean_token_accuracy": 0.6283855885267258, "num_tokens": 2281119023.0, "step": 13602 }, { "entropy": 1.6890939672787983, "epoch": 1.4943561011782154, "grad_norm": 0.6715998649597168, "learning_rate": 4.853868066382308e-06, "loss": 1.3511, "mean_token_accuracy": 0.6587308794260025, "num_tokens": 2281311484.0, "step": 13603 }, { "entropy": 1.7232101559638977, "epoch": 1.4944659580895883, "grad_norm": 0.6396613717079163, "learning_rate": 4.852698525352427e-06, "loss": 1.4827, "mean_token_accuracy": 0.649900938073794, "num_tokens": 2281526052.0, "step": 13604 }, { "entropy": 1.7208843032519023, "epoch": 1.4945758150009611, "grad_norm": 0.6806747913360596, "learning_rate": 4.8515291788818695e-06, "loss": 1.3837, "mean_token_accuracy": 0.6432156016429266, "num_tokens": 2281723549.0, "step": 13605 }, { "entropy": 1.6507877906163533, "epoch": 1.4946856719123343, "grad_norm": 0.829356849193573, "learning_rate": 4.850360027007639e-06, "loss": 1.257, "mean_token_accuracy": 0.677089735865593, "num_tokens": 2281892258.0, "step": 13606 }, { "entropy": 1.7317637304464977, "epoch": 1.4947955288237071, "grad_norm": 0.6524384021759033, "learning_rate": 4.8491910697667425e-06, "loss": 1.5544, "mean_token_accuracy": 0.6447519858678182, "num_tokens": 2282084102.0, "step": 13607 }, { "entropy": 1.7073566317558289, "epoch": 1.49490538573508, "grad_norm": 0.5912280678749084, "learning_rate": 4.848022307196181e-06, "loss": 1.368, "mean_token_accuracy": 0.6703123350938162, "num_tokens": 2282240654.0, "step": 13608 }, { "entropy": 1.7419903775056202, "epoch": 1.495015242646453, "grad_norm": 0.6560501456260681, "learning_rate": 4.84685373933294e-06, "loss": 1.2839, "mean_token_accuracy": 0.6740489850441614, "num_tokens": 2282376898.0, "step": 13609 }, { "entropy": 1.6600103080272675, "epoch": 1.4951250995578258, "grad_norm": 0.6871337890625, "learning_rate": 4.845685366214003e-06, "loss": 1.51, "mean_token_accuracy": 0.6429749627908071, "num_tokens": 2282552047.0, "step": 13610 }, { "entropy": 1.7339093486467998, "epoch": 1.495234956469199, "grad_norm": 0.7348884344100952, "learning_rate": 4.8445171878763536e-06, "loss": 1.2774, "mean_token_accuracy": 0.6790765027205149, "num_tokens": 2282690136.0, "step": 13611 }, { "entropy": 1.7065096199512482, "epoch": 1.4953448133805718, "grad_norm": 0.548129141330719, "learning_rate": 4.84334920435696e-06, "loss": 1.3133, "mean_token_accuracy": 0.6773122251033783, "num_tokens": 2282868665.0, "step": 13612 }, { "entropy": 1.7016609410444896, "epoch": 1.4954546702919447, "grad_norm": 0.6310706734657288, "learning_rate": 4.842181415692791e-06, "loss": 1.4762, "mean_token_accuracy": 0.6426133811473846, "num_tokens": 2283027370.0, "step": 13613 }, { "entropy": 1.7180238564809163, "epoch": 1.4955645272033178, "grad_norm": 0.6619656682014465, "learning_rate": 4.841013821920805e-06, "loss": 1.289, "mean_token_accuracy": 0.6721247384945551, "num_tokens": 2283197730.0, "step": 13614 }, { "entropy": 1.6656270027160645, "epoch": 1.4956743841146907, "grad_norm": 0.6908462047576904, "learning_rate": 4.839846423077955e-06, "loss": 1.2454, "mean_token_accuracy": 0.6713667859633764, "num_tokens": 2283312940.0, "step": 13615 }, { "entropy": 1.7119423349698384, "epoch": 1.4957842410260636, "grad_norm": 0.7182555198669434, "learning_rate": 4.838679219201192e-06, "loss": 1.5939, "mean_token_accuracy": 0.6521507352590561, "num_tokens": 2283454962.0, "step": 13616 }, { "entropy": 1.7009160617987316, "epoch": 1.4958940979374364, "grad_norm": 0.5092278718948364, "learning_rate": 4.837512210327456e-06, "loss": 1.493, "mean_token_accuracy": 0.624845340847969, "num_tokens": 2283755025.0, "step": 13617 }, { "entropy": 1.6453999876976013, "epoch": 1.4960039548488093, "grad_norm": 0.6449650526046753, "learning_rate": 4.836345396493678e-06, "loss": 1.3157, "mean_token_accuracy": 0.6672974874575933, "num_tokens": 2283966223.0, "step": 13618 }, { "entropy": 1.7281176149845123, "epoch": 1.4961138117601824, "grad_norm": 0.6479874849319458, "learning_rate": 4.835178777736791e-06, "loss": 1.4652, "mean_token_accuracy": 0.6569635172684988, "num_tokens": 2284138007.0, "step": 13619 }, { "entropy": 1.7001449863115947, "epoch": 1.4962236686715553, "grad_norm": 0.7589164972305298, "learning_rate": 4.83401235409372e-06, "loss": 1.4462, "mean_token_accuracy": 0.6550010740756989, "num_tokens": 2284287063.0, "step": 13620 }, { "entropy": 1.7176421483357747, "epoch": 1.4963335255829282, "grad_norm": 0.6610986590385437, "learning_rate": 4.832846125601381e-06, "loss": 1.4037, "mean_token_accuracy": 0.6466071307659149, "num_tokens": 2284436041.0, "step": 13621 }, { "entropy": 1.682494064172109, "epoch": 1.4964433824943013, "grad_norm": 0.7629041075706482, "learning_rate": 4.831680092296679e-06, "loss": 1.3422, "mean_token_accuracy": 0.6677883863449097, "num_tokens": 2284602710.0, "step": 13622 }, { "entropy": 1.6760949591795604, "epoch": 1.496553239405674, "grad_norm": 0.73377525806427, "learning_rate": 4.830514254216527e-06, "loss": 1.3821, "mean_token_accuracy": 0.6497271855672201, "num_tokens": 2284767266.0, "step": 13623 }, { "entropy": 1.6992427905400593, "epoch": 1.496663096317047, "grad_norm": 0.6510926485061646, "learning_rate": 4.829348611397815e-06, "loss": 1.3355, "mean_token_accuracy": 0.653320108850797, "num_tokens": 2284923786.0, "step": 13624 }, { "entropy": 1.6941948135693867, "epoch": 1.49677295322842, "grad_norm": 0.7802711725234985, "learning_rate": 4.828183163877441e-06, "loss": 1.2977, "mean_token_accuracy": 0.6674275199572245, "num_tokens": 2285069673.0, "step": 13625 }, { "entropy": 1.7570783694585164, "epoch": 1.4968828101397929, "grad_norm": 0.7150521278381348, "learning_rate": 4.82701791169229e-06, "loss": 1.4907, "mean_token_accuracy": 0.6550498505433401, "num_tokens": 2285211769.0, "step": 13626 }, { "entropy": 1.711845616499583, "epoch": 1.496992667051166, "grad_norm": 0.8639612793922424, "learning_rate": 4.825852854879236e-06, "loss": 1.3726, "mean_token_accuracy": 0.6677491863568624, "num_tokens": 2285352305.0, "step": 13627 }, { "entropy": 1.703562508026759, "epoch": 1.4971025239625388, "grad_norm": 0.6228059530258179, "learning_rate": 4.8246879934751615e-06, "loss": 1.2863, "mean_token_accuracy": 0.662003293633461, "num_tokens": 2285514680.0, "step": 13628 }, { "entropy": 1.661176194747289, "epoch": 1.4972123808739117, "grad_norm": 0.651232123374939, "learning_rate": 4.823523327516929e-06, "loss": 1.4186, "mean_token_accuracy": 0.6596865157286326, "num_tokens": 2285640894.0, "step": 13629 }, { "entropy": 1.7480360170205433, "epoch": 1.4973222377852846, "grad_norm": 0.6672519445419312, "learning_rate": 4.822358857041396e-06, "loss": 1.338, "mean_token_accuracy": 0.6738019635279974, "num_tokens": 2285786016.0, "step": 13630 }, { "entropy": 1.728183130423228, "epoch": 1.4974320946966575, "grad_norm": 0.6858686208724976, "learning_rate": 4.821194582085423e-06, "loss": 1.5963, "mean_token_accuracy": 0.6362834026416143, "num_tokens": 2285993402.0, "step": 13631 }, { "entropy": 1.7002196311950684, "epoch": 1.4975419516080306, "grad_norm": 0.7614853382110596, "learning_rate": 4.82003050268586e-06, "loss": 1.2961, "mean_token_accuracy": 0.6599853783845901, "num_tokens": 2286130357.0, "step": 13632 }, { "entropy": 1.6387390891710918, "epoch": 1.4976518085194035, "grad_norm": 0.6776584982872009, "learning_rate": 4.818866618879546e-06, "loss": 1.3764, "mean_token_accuracy": 0.6690385490655899, "num_tokens": 2286305495.0, "step": 13633 }, { "entropy": 1.7022567987442017, "epoch": 1.4977616654307764, "grad_norm": 0.6763585209846497, "learning_rate": 4.817702930703316e-06, "loss": 1.3917, "mean_token_accuracy": 0.6675192614396414, "num_tokens": 2286436486.0, "step": 13634 }, { "entropy": 1.7336170276006062, "epoch": 1.4978715223421495, "grad_norm": 0.5927034020423889, "learning_rate": 4.816539438194004e-06, "loss": 1.3455, "mean_token_accuracy": 0.6625049064556757, "num_tokens": 2286591633.0, "step": 13635 }, { "entropy": 1.7186884780724843, "epoch": 1.4979813792535221, "grad_norm": 0.6435331106185913, "learning_rate": 4.815376141388432e-06, "loss": 1.2864, "mean_token_accuracy": 0.6589618921279907, "num_tokens": 2286735105.0, "step": 13636 }, { "entropy": 1.7794620990753174, "epoch": 1.4980912361648953, "grad_norm": 0.6708908677101135, "learning_rate": 4.814213040323419e-06, "loss": 1.2498, "mean_token_accuracy": 0.6653878291447958, "num_tokens": 2286838400.0, "step": 13637 }, { "entropy": 1.6840010782082875, "epoch": 1.4982010930762681, "grad_norm": 0.6124277114868164, "learning_rate": 4.813050135035776e-06, "loss": 1.4343, "mean_token_accuracy": 0.6520940760771433, "num_tokens": 2287052287.0, "step": 13638 }, { "entropy": 1.6653833985328674, "epoch": 1.498310949987641, "grad_norm": 0.582930862903595, "learning_rate": 4.811887425562305e-06, "loss": 1.4394, "mean_token_accuracy": 0.6390776584545771, "num_tokens": 2287256232.0, "step": 13639 }, { "entropy": 1.7373347878456116, "epoch": 1.4984208068990141, "grad_norm": 0.7825767397880554, "learning_rate": 4.810724911939813e-06, "loss": 1.3372, "mean_token_accuracy": 0.6598907858133316, "num_tokens": 2287415673.0, "step": 13640 }, { "entropy": 1.6686055858929951, "epoch": 1.498530663810387, "grad_norm": 0.5538728833198547, "learning_rate": 4.809562594205088e-06, "loss": 1.4187, "mean_token_accuracy": 0.6480937798817953, "num_tokens": 2287619130.0, "step": 13641 }, { "entropy": 1.7331142822901409, "epoch": 1.49864052072176, "grad_norm": 0.7984141111373901, "learning_rate": 4.808400472394915e-06, "loss": 1.5261, "mean_token_accuracy": 0.6605927993853887, "num_tokens": 2287777134.0, "step": 13642 }, { "entropy": 1.7319643298784893, "epoch": 1.4987503776331328, "grad_norm": 0.7856102585792542, "learning_rate": 4.807238546546077e-06, "loss": 1.4331, "mean_token_accuracy": 0.6482569624980291, "num_tokens": 2287929596.0, "step": 13643 }, { "entropy": 1.7171655396620433, "epoch": 1.4988602345445057, "grad_norm": 0.7845726609230042, "learning_rate": 4.806076816695351e-06, "loss": 1.515, "mean_token_accuracy": 0.6387662142515182, "num_tokens": 2288091381.0, "step": 13644 }, { "entropy": 1.6830125947793324, "epoch": 1.4989700914558788, "grad_norm": 0.6584486365318298, "learning_rate": 4.804915282879503e-06, "loss": 1.2917, "mean_token_accuracy": 0.6716272433598837, "num_tokens": 2288232915.0, "step": 13645 }, { "entropy": 1.7228589157263439, "epoch": 1.4990799483672517, "grad_norm": 0.7010536193847656, "learning_rate": 4.80375394513529e-06, "loss": 1.4135, "mean_token_accuracy": 0.6520048628250757, "num_tokens": 2288398947.0, "step": 13646 }, { "entropy": 1.7012008130550385, "epoch": 1.4991898052786246, "grad_norm": 0.7033513784408569, "learning_rate": 4.802592803499477e-06, "loss": 1.2313, "mean_token_accuracy": 0.6781880507866541, "num_tokens": 2288506315.0, "step": 13647 }, { "entropy": 1.6403611302375793, "epoch": 1.4992996621899977, "grad_norm": 0.6568671464920044, "learning_rate": 4.80143185800881e-06, "loss": 1.2665, "mean_token_accuracy": 0.6699175884326299, "num_tokens": 2288658086.0, "step": 13648 }, { "entropy": 1.7293101052443187, "epoch": 1.4994095191013705, "grad_norm": 0.7070812582969666, "learning_rate": 4.800271108700027e-06, "loss": 1.623, "mean_token_accuracy": 0.626306434472402, "num_tokens": 2288892470.0, "step": 13649 }, { "entropy": 1.687088151772817, "epoch": 1.4995193760127434, "grad_norm": 0.5417489409446716, "learning_rate": 4.799110555609874e-06, "loss": 1.1432, "mean_token_accuracy": 0.6710360199213028, "num_tokens": 2289111747.0, "step": 13650 }, { "entropy": 1.7193756500879924, "epoch": 1.4996292329241163, "grad_norm": 0.590350329875946, "learning_rate": 4.797950198775074e-06, "loss": 1.3533, "mean_token_accuracy": 0.6762384523948034, "num_tokens": 2289293601.0, "step": 13651 }, { "entropy": 1.6460750301678975, "epoch": 1.4997390898354892, "grad_norm": 0.617831826210022, "learning_rate": 4.796790038232359e-06, "loss": 1.4126, "mean_token_accuracy": 0.6668714483579, "num_tokens": 2289481612.0, "step": 13652 }, { "entropy": 1.6675611039002736, "epoch": 1.4998489467468623, "grad_norm": 0.6598964333534241, "learning_rate": 4.795630074018443e-06, "loss": 1.541, "mean_token_accuracy": 0.6393274962902069, "num_tokens": 2289662266.0, "step": 13653 }, { "entropy": 1.6732101341088612, "epoch": 1.4999588036582352, "grad_norm": 0.837954580783844, "learning_rate": 4.794470306170038e-06, "loss": 1.5671, "mean_token_accuracy": 0.6427912364403406, "num_tokens": 2289836806.0, "step": 13654 }, { "entropy": 1.7142541805903118, "epoch": 1.500068660569608, "grad_norm": 0.7065649032592773, "learning_rate": 4.79331073472385e-06, "loss": 1.6684, "mean_token_accuracy": 0.6320697516202927, "num_tokens": 2290122996.0, "step": 13655 }, { "entropy": 1.7274170815944672, "epoch": 1.5001785174809812, "grad_norm": 0.6198680996894836, "learning_rate": 4.792151359716585e-06, "loss": 1.383, "mean_token_accuracy": 0.6606019486983618, "num_tokens": 2290319640.0, "step": 13656 }, { "entropy": 1.7504333357016246, "epoch": 1.5002883743923539, "grad_norm": 0.6796494126319885, "learning_rate": 4.79099218118493e-06, "loss": 1.3203, "mean_token_accuracy": 0.6651289115349451, "num_tokens": 2290446566.0, "step": 13657 }, { "entropy": 1.6876095831394196, "epoch": 1.500398231303727, "grad_norm": 0.6724913716316223, "learning_rate": 4.7898331991655764e-06, "loss": 1.6636, "mean_token_accuracy": 0.6144607166449229, "num_tokens": 2290691085.0, "step": 13658 }, { "entropy": 1.7187560300032299, "epoch": 1.5005080882150998, "grad_norm": 0.6489282250404358, "learning_rate": 4.7886744136951996e-06, "loss": 1.5508, "mean_token_accuracy": 0.6420976668596268, "num_tokens": 2290903404.0, "step": 13659 }, { "entropy": 1.6801285644372304, "epoch": 1.5006179451264727, "grad_norm": 0.6258361339569092, "learning_rate": 4.787515824810483e-06, "loss": 1.3144, "mean_token_accuracy": 0.6689251512289047, "num_tokens": 2291106588.0, "step": 13660 }, { "entropy": 1.7926131387551625, "epoch": 1.5007278020378458, "grad_norm": 0.7377539277076721, "learning_rate": 4.78635743254809e-06, "loss": 1.3222, "mean_token_accuracy": 0.6706551959117254, "num_tokens": 2291214514.0, "step": 13661 }, { "entropy": 1.708193560441335, "epoch": 1.5008376589492185, "grad_norm": 0.6403104066848755, "learning_rate": 4.785199236944681e-06, "loss": 1.4077, "mean_token_accuracy": 0.644023617108663, "num_tokens": 2291407970.0, "step": 13662 }, { "entropy": 1.6744596858819325, "epoch": 1.5009475158605916, "grad_norm": 0.751727819442749, "learning_rate": 4.784041238036917e-06, "loss": 1.3342, "mean_token_accuracy": 0.6770372043053309, "num_tokens": 2291571695.0, "step": 13663 }, { "entropy": 1.7439305186271667, "epoch": 1.5010573727719645, "grad_norm": 0.824898362159729, "learning_rate": 4.782883435861449e-06, "loss": 1.4083, "mean_token_accuracy": 0.6511137386163076, "num_tokens": 2291720500.0, "step": 13664 }, { "entropy": 1.7442241807778676, "epoch": 1.5011672296833374, "grad_norm": 0.8911228179931641, "learning_rate": 4.781725830454919e-06, "loss": 1.4769, "mean_token_accuracy": 0.6623394538958868, "num_tokens": 2291875701.0, "step": 13665 }, { "entropy": 1.6710403362909954, "epoch": 1.5012770865947105, "grad_norm": 0.6459485292434692, "learning_rate": 4.780568421853962e-06, "loss": 1.3773, "mean_token_accuracy": 0.6579457273085912, "num_tokens": 2292060235.0, "step": 13666 }, { "entropy": 1.7651425302028656, "epoch": 1.5013869435060834, "grad_norm": 0.8423007130622864, "learning_rate": 4.779411210095214e-06, "loss": 1.4055, "mean_token_accuracy": 0.6594301611185074, "num_tokens": 2292192597.0, "step": 13667 }, { "entropy": 1.6896374821662903, "epoch": 1.5014968004174563, "grad_norm": 0.7431081533432007, "learning_rate": 4.778254195215295e-06, "loss": 1.3766, "mean_token_accuracy": 0.6599178363879522, "num_tokens": 2292354541.0, "step": 13668 }, { "entropy": 1.6598813434441884, "epoch": 1.5016066573288294, "grad_norm": 0.8162408471107483, "learning_rate": 4.777097377250831e-06, "loss": 1.3112, "mean_token_accuracy": 0.677242711186409, "num_tokens": 2292495597.0, "step": 13669 }, { "entropy": 1.7197925249735515, "epoch": 1.501716514240202, "grad_norm": 0.649800181388855, "learning_rate": 4.775940756238431e-06, "loss": 1.5137, "mean_token_accuracy": 0.6383554091056188, "num_tokens": 2292653072.0, "step": 13670 }, { "entropy": 1.6852848728497822, "epoch": 1.5018263711515751, "grad_norm": 0.7410378456115723, "learning_rate": 4.774784332214697e-06, "loss": 1.4563, "mean_token_accuracy": 0.6579979757467905, "num_tokens": 2292806988.0, "step": 13671 }, { "entropy": 1.746435950199763, "epoch": 1.501936228062948, "grad_norm": 0.6971200108528137, "learning_rate": 4.773628105216238e-06, "loss": 1.3093, "mean_token_accuracy": 0.6615460316340128, "num_tokens": 2292939408.0, "step": 13672 }, { "entropy": 1.6766025920708973, "epoch": 1.502046084974321, "grad_norm": 0.6413285732269287, "learning_rate": 4.772472075279643e-06, "loss": 1.3751, "mean_token_accuracy": 0.6748560518026352, "num_tokens": 2293083000.0, "step": 13673 }, { "entropy": 1.6996668179829915, "epoch": 1.502155941885694, "grad_norm": 0.7790882587432861, "learning_rate": 4.771316242441498e-06, "loss": 1.327, "mean_token_accuracy": 0.661717543999354, "num_tokens": 2293233616.0, "step": 13674 }, { "entropy": 1.6642019947369893, "epoch": 1.5022657987970667, "grad_norm": 0.7140267491340637, "learning_rate": 4.7701606067383875e-06, "loss": 1.2386, "mean_token_accuracy": 0.6947644750277201, "num_tokens": 2293363011.0, "step": 13675 }, { "entropy": 1.7092399597167969, "epoch": 1.5023756557084398, "grad_norm": 0.6863964796066284, "learning_rate": 4.76900516820689e-06, "loss": 1.3732, "mean_token_accuracy": 0.6574582954247793, "num_tokens": 2293515091.0, "step": 13676 }, { "entropy": 1.6853445172309875, "epoch": 1.5024855126198127, "grad_norm": 0.7969051599502563, "learning_rate": 4.76784992688357e-06, "loss": 1.4371, "mean_token_accuracy": 0.6546561618645986, "num_tokens": 2293667579.0, "step": 13677 }, { "entropy": 1.6716053783893585, "epoch": 1.5025953695311856, "grad_norm": 8.106036186218262, "learning_rate": 4.76669488280499e-06, "loss": 1.2827, "mean_token_accuracy": 0.6833441058794657, "num_tokens": 2293876244.0, "step": 13678 }, { "entropy": 1.6749347150325775, "epoch": 1.5027052264425587, "grad_norm": 0.7246283292770386, "learning_rate": 4.76554003600771e-06, "loss": 1.4164, "mean_token_accuracy": 0.6572469621896744, "num_tokens": 2294045369.0, "step": 13679 }, { "entropy": 1.6581110556920369, "epoch": 1.5028150833539315, "grad_norm": 0.684249997138977, "learning_rate": 4.764385386528276e-06, "loss": 1.3176, "mean_token_accuracy": 0.6688032547632853, "num_tokens": 2294188555.0, "step": 13680 }, { "entropy": 1.8024831314881642, "epoch": 1.5029249402653044, "grad_norm": 0.7943997979164124, "learning_rate": 4.763230934403237e-06, "loss": 1.44, "mean_token_accuracy": 0.6461151192585627, "num_tokens": 2294350821.0, "step": 13681 }, { "entropy": 1.6654022733370464, "epoch": 1.5030347971766775, "grad_norm": 0.6539124250411987, "learning_rate": 4.762076679669128e-06, "loss": 1.4501, "mean_token_accuracy": 0.6525823424259821, "num_tokens": 2294535256.0, "step": 13682 }, { "entropy": 1.6741309265295665, "epoch": 1.5031446540880502, "grad_norm": 0.7052878737449646, "learning_rate": 4.760922622362481e-06, "loss": 1.3265, "mean_token_accuracy": 0.6755642145872116, "num_tokens": 2294715316.0, "step": 13683 }, { "entropy": 1.7677266299724579, "epoch": 1.5032545109994233, "grad_norm": 0.6491983532905579, "learning_rate": 4.759768762519822e-06, "loss": 1.4475, "mean_token_accuracy": 0.6450400104125341, "num_tokens": 2294970194.0, "step": 13684 }, { "entropy": 1.706259439388911, "epoch": 1.5033643679107962, "grad_norm": 0.6732988953590393, "learning_rate": 4.75861510017767e-06, "loss": 1.291, "mean_token_accuracy": 0.6787703533967336, "num_tokens": 2295122089.0, "step": 13685 }, { "entropy": 1.7781917651494343, "epoch": 1.503474224822169, "grad_norm": 0.7497197985649109, "learning_rate": 4.757461635372536e-06, "loss": 1.3894, "mean_token_accuracy": 0.6516469866037369, "num_tokens": 2295249519.0, "step": 13686 }, { "entropy": 1.6908331016699474, "epoch": 1.5035840817335422, "grad_norm": 0.6211578845977783, "learning_rate": 4.756308368140927e-06, "loss": 1.4081, "mean_token_accuracy": 0.6597599039475123, "num_tokens": 2295417437.0, "step": 13687 }, { "entropy": 1.738340864578883, "epoch": 1.5036939386449149, "grad_norm": 0.7150505781173706, "learning_rate": 4.755155298519349e-06, "loss": 1.4526, "mean_token_accuracy": 0.6501429776350657, "num_tokens": 2295603865.0, "step": 13688 }, { "entropy": 1.7665735979874928, "epoch": 1.503803795556288, "grad_norm": 0.7368733286857605, "learning_rate": 4.7540024265442905e-06, "loss": 1.5544, "mean_token_accuracy": 0.6462236990531286, "num_tokens": 2295751755.0, "step": 13689 }, { "entropy": 1.7254281441370647, "epoch": 1.5039136524676608, "grad_norm": 0.7418520450592041, "learning_rate": 4.7528497522522385e-06, "loss": 1.4659, "mean_token_accuracy": 0.6408105889956156, "num_tokens": 2295946046.0, "step": 13690 }, { "entropy": 1.7010563015937805, "epoch": 1.5040235093790337, "grad_norm": 0.6515077948570251, "learning_rate": 4.75169727567968e-06, "loss": 1.448, "mean_token_accuracy": 0.6640833069880804, "num_tokens": 2296143617.0, "step": 13691 }, { "entropy": 1.693510760863622, "epoch": 1.5041333662904068, "grad_norm": 0.650558590888977, "learning_rate": 4.750544996863083e-06, "loss": 1.4015, "mean_token_accuracy": 0.6564928144216537, "num_tokens": 2296290922.0, "step": 13692 }, { "entropy": 1.7150411407152812, "epoch": 1.5042432232017797, "grad_norm": 0.6943185329437256, "learning_rate": 4.749392915838925e-06, "loss": 1.3855, "mean_token_accuracy": 0.6770055890083313, "num_tokens": 2296481790.0, "step": 13693 }, { "entropy": 1.7828082740306854, "epoch": 1.5043530801131526, "grad_norm": 0.7607249617576599, "learning_rate": 4.748241032643664e-06, "loss": 1.4255, "mean_token_accuracy": 0.6515330821275711, "num_tokens": 2296600354.0, "step": 13694 }, { "entropy": 1.6512650549411774, "epoch": 1.5044629370245257, "grad_norm": 0.6560864448547363, "learning_rate": 4.747089347313755e-06, "loss": 1.2883, "mean_token_accuracy": 0.6671764502922694, "num_tokens": 2296774491.0, "step": 13695 }, { "entropy": 1.7189783950646718, "epoch": 1.5045727939358984, "grad_norm": 0.7325725555419922, "learning_rate": 4.7459378598856525e-06, "loss": 1.3908, "mean_token_accuracy": 0.6567257990439733, "num_tokens": 2296914415.0, "step": 13696 }, { "entropy": 1.729427436987559, "epoch": 1.5046826508472715, "grad_norm": 0.751825749874115, "learning_rate": 4.744786570395798e-06, "loss": 1.3657, "mean_token_accuracy": 0.6634038190046946, "num_tokens": 2297093221.0, "step": 13697 }, { "entropy": 1.7347900966803234, "epoch": 1.5047925077586444, "grad_norm": 0.6090309619903564, "learning_rate": 4.743635478880628e-06, "loss": 1.462, "mean_token_accuracy": 0.6348727444807688, "num_tokens": 2297310033.0, "step": 13698 }, { "entropy": 1.7234489421049755, "epoch": 1.5049023646700173, "grad_norm": 0.6301156878471375, "learning_rate": 4.742484585376576e-06, "loss": 1.4262, "mean_token_accuracy": 0.6584373613198599, "num_tokens": 2297493384.0, "step": 13699 }, { "entropy": 1.6984918216864269, "epoch": 1.5050122215813904, "grad_norm": 0.6758148074150085, "learning_rate": 4.74133388992007e-06, "loss": 1.4503, "mean_token_accuracy": 0.6438490003347397, "num_tokens": 2297684605.0, "step": 13700 }, { "entropy": 1.7280223667621613, "epoch": 1.505122078492763, "grad_norm": 0.5646396279335022, "learning_rate": 4.740183392547526e-06, "loss": 1.4605, "mean_token_accuracy": 0.6355783194303513, "num_tokens": 2297901393.0, "step": 13701 }, { "entropy": 1.6897727847099304, "epoch": 1.5052319354041361, "grad_norm": 0.7985787987709045, "learning_rate": 4.739033093295354e-06, "loss": 1.3079, "mean_token_accuracy": 0.6786791036526362, "num_tokens": 2298041421.0, "step": 13702 }, { "entropy": 1.6934953530629475, "epoch": 1.505341792315509, "grad_norm": 0.6441080570220947, "learning_rate": 4.737882992199966e-06, "loss": 1.4262, "mean_token_accuracy": 0.6507407377163569, "num_tokens": 2298251682.0, "step": 13703 }, { "entropy": 1.7239458660284679, "epoch": 1.505451649226882, "grad_norm": 0.6519790291786194, "learning_rate": 4.7367330892977575e-06, "loss": 1.5049, "mean_token_accuracy": 0.6607875376939774, "num_tokens": 2298459653.0, "step": 13704 }, { "entropy": 1.669982651869456, "epoch": 1.505561506138255, "grad_norm": 0.7108668684959412, "learning_rate": 4.735583384625126e-06, "loss": 1.4123, "mean_token_accuracy": 0.6692193200190862, "num_tokens": 2298597296.0, "step": 13705 }, { "entropy": 1.6570941507816315, "epoch": 1.505671363049628, "grad_norm": 0.7075870633125305, "learning_rate": 4.734433878218458e-06, "loss": 1.4478, "mean_token_accuracy": 0.6687259177366892, "num_tokens": 2298807746.0, "step": 13706 }, { "entropy": 1.733141968647639, "epoch": 1.5057812199610008, "grad_norm": 0.7741393446922302, "learning_rate": 4.733284570114132e-06, "loss": 1.4514, "mean_token_accuracy": 0.6441124876340231, "num_tokens": 2298973558.0, "step": 13707 }, { "entropy": 1.7128113210201263, "epoch": 1.505891076872374, "grad_norm": 0.7485846877098083, "learning_rate": 4.732135460348528e-06, "loss": 1.362, "mean_token_accuracy": 0.6596867889165878, "num_tokens": 2299133295.0, "step": 13708 }, { "entropy": 1.695679912964503, "epoch": 1.5060009337837466, "grad_norm": 0.7252331376075745, "learning_rate": 4.730986548958013e-06, "loss": 1.5427, "mean_token_accuracy": 0.6482022255659103, "num_tokens": 2299358470.0, "step": 13709 }, { "entropy": 1.6781230370203655, "epoch": 1.5061107906951197, "grad_norm": 0.6633203029632568, "learning_rate": 4.729837835978946e-06, "loss": 1.2652, "mean_token_accuracy": 0.6692434151967367, "num_tokens": 2299493276.0, "step": 13710 }, { "entropy": 1.7279355724652607, "epoch": 1.5062206476064925, "grad_norm": 0.9011074900627136, "learning_rate": 4.728689321447685e-06, "loss": 1.4516, "mean_token_accuracy": 0.660559723774592, "num_tokens": 2299656386.0, "step": 13711 }, { "entropy": 1.6402093569437664, "epoch": 1.5063305045178654, "grad_norm": 0.7499086260795593, "learning_rate": 4.727541005400584e-06, "loss": 1.4408, "mean_token_accuracy": 0.6577077358961105, "num_tokens": 2299799422.0, "step": 13712 }, { "entropy": 1.70639768242836, "epoch": 1.5064403614292385, "grad_norm": 0.7071443200111389, "learning_rate": 4.726392887873984e-06, "loss": 1.4686, "mean_token_accuracy": 0.6468622287114462, "num_tokens": 2299983882.0, "step": 13713 }, { "entropy": 1.6494310796260834, "epoch": 1.5065502183406112, "grad_norm": 0.7178316116333008, "learning_rate": 4.725244968904219e-06, "loss": 1.3299, "mean_token_accuracy": 0.6607310126225153, "num_tokens": 2300124046.0, "step": 13714 }, { "entropy": 1.6953360736370087, "epoch": 1.5066600752519843, "grad_norm": 0.861855685710907, "learning_rate": 4.724097248527627e-06, "loss": 1.4663, "mean_token_accuracy": 0.6720197945833206, "num_tokens": 2300287184.0, "step": 13715 }, { "entropy": 1.7422000865141551, "epoch": 1.5067699321633572, "grad_norm": 0.7510853409767151, "learning_rate": 4.722949726780526e-06, "loss": 1.318, "mean_token_accuracy": 0.6652670900026957, "num_tokens": 2300394163.0, "step": 13716 }, { "entropy": 1.7209021250406902, "epoch": 1.50687978907473, "grad_norm": 0.8294375538825989, "learning_rate": 4.721802403699244e-06, "loss": 1.4164, "mean_token_accuracy": 0.6530443280935287, "num_tokens": 2300530554.0, "step": 13717 }, { "entropy": 1.7540078063805897, "epoch": 1.5069896459861032, "grad_norm": 0.6351104378700256, "learning_rate": 4.720655279320079e-06, "loss": 1.4425, "mean_token_accuracy": 0.6411314457654953, "num_tokens": 2300742561.0, "step": 13718 }, { "entropy": 1.665945549805959, "epoch": 1.507099502897476, "grad_norm": 0.6579576134681702, "learning_rate": 4.719508353679347e-06, "loss": 1.4437, "mean_token_accuracy": 0.6524225821097692, "num_tokens": 2300910840.0, "step": 13719 }, { "entropy": 1.6633223791917164, "epoch": 1.507209359808849, "grad_norm": 0.6341217756271362, "learning_rate": 4.718361626813347e-06, "loss": 1.3326, "mean_token_accuracy": 0.6552438537279764, "num_tokens": 2301079382.0, "step": 13720 }, { "entropy": 1.722684770822525, "epoch": 1.507319216720222, "grad_norm": 0.6889148354530334, "learning_rate": 4.717215098758373e-06, "loss": 1.4923, "mean_token_accuracy": 0.6483738025029501, "num_tokens": 2301291490.0, "step": 13721 }, { "entropy": 1.728617916504542, "epoch": 1.5074290736315947, "grad_norm": 0.811132550239563, "learning_rate": 4.716068769550705e-06, "loss": 1.4803, "mean_token_accuracy": 0.6466376930475235, "num_tokens": 2301465815.0, "step": 13722 }, { "entropy": 1.717352608839671, "epoch": 1.5075389305429678, "grad_norm": 0.7719509601593018, "learning_rate": 4.714922639226632e-06, "loss": 1.4298, "mean_token_accuracy": 0.65669085085392, "num_tokens": 2301627400.0, "step": 13723 }, { "entropy": 1.6580866078535716, "epoch": 1.5076487874543407, "grad_norm": 0.6603320240974426, "learning_rate": 4.713776707822424e-06, "loss": 1.4617, "mean_token_accuracy": 0.6624788045883179, "num_tokens": 2301832465.0, "step": 13724 }, { "entropy": 1.7385274867216747, "epoch": 1.5077586443657136, "grad_norm": 0.7888380885124207, "learning_rate": 4.712630975374352e-06, "loss": 1.3048, "mean_token_accuracy": 0.6638933221499125, "num_tokens": 2301979094.0, "step": 13725 }, { "entropy": 1.7239436507225037, "epoch": 1.5078685012770867, "grad_norm": 0.8936779499053955, "learning_rate": 4.711485441918676e-06, "loss": 1.2566, "mean_token_accuracy": 0.678350642323494, "num_tokens": 2302110862.0, "step": 13726 }, { "entropy": 1.61548513174057, "epoch": 1.5079783581884594, "grad_norm": 0.5887323021888733, "learning_rate": 4.7103401074916505e-06, "loss": 1.3824, "mean_token_accuracy": 0.6588394343852997, "num_tokens": 2302319582.0, "step": 13727 }, { "entropy": 1.7819407383600872, "epoch": 1.5080882150998325, "grad_norm": 0.7063882946968079, "learning_rate": 4.70919497212953e-06, "loss": 1.699, "mean_token_accuracy": 0.610893577337265, "num_tokens": 2302538990.0, "step": 13728 }, { "entropy": 1.7448661824067433, "epoch": 1.5081980720112054, "grad_norm": 0.6677849888801575, "learning_rate": 4.708050035868552e-06, "loss": 1.5426, "mean_token_accuracy": 0.6525959322849909, "num_tokens": 2302738791.0, "step": 13729 }, { "entropy": 1.6619132260481517, "epoch": 1.5083079289225783, "grad_norm": 0.7120320200920105, "learning_rate": 4.706905298744953e-06, "loss": 1.3273, "mean_token_accuracy": 0.660373717546463, "num_tokens": 2302885338.0, "step": 13730 }, { "entropy": 1.7256252070267994, "epoch": 1.5084177858339514, "grad_norm": 0.7116526961326599, "learning_rate": 4.705760760794966e-06, "loss": 1.4432, "mean_token_accuracy": 0.6434483329455057, "num_tokens": 2303047678.0, "step": 13731 }, { "entropy": 1.727946698665619, "epoch": 1.5085276427453242, "grad_norm": 0.8816052079200745, "learning_rate": 4.704616422054816e-06, "loss": 1.2795, "mean_token_accuracy": 0.6723741243282954, "num_tokens": 2303171804.0, "step": 13732 }, { "entropy": 1.7127221127351124, "epoch": 1.5086374996566971, "grad_norm": 0.6119446754455566, "learning_rate": 4.7034722825607205e-06, "loss": 1.3865, "mean_token_accuracy": 0.652864803870519, "num_tokens": 2303339544.0, "step": 13733 }, { "entropy": 1.6904581189155579, "epoch": 1.5087473565680702, "grad_norm": 0.7307989001274109, "learning_rate": 4.702328342348888e-06, "loss": 1.4435, "mean_token_accuracy": 0.6472090234359106, "num_tokens": 2303523201.0, "step": 13734 }, { "entropy": 1.7181105117003124, "epoch": 1.508857213479443, "grad_norm": 0.6447550654411316, "learning_rate": 4.701184601455527e-06, "loss": 1.4236, "mean_token_accuracy": 0.6380515098571777, "num_tokens": 2303765509.0, "step": 13735 }, { "entropy": 1.727871795495351, "epoch": 1.508967070390816, "grad_norm": 0.7264792919158936, "learning_rate": 4.700041059916833e-06, "loss": 1.4861, "mean_token_accuracy": 0.6438801288604736, "num_tokens": 2303938570.0, "step": 13736 }, { "entropy": 1.6387112736701965, "epoch": 1.509076927302189, "grad_norm": 0.6865224838256836, "learning_rate": 4.6988977177690035e-06, "loss": 1.4266, "mean_token_accuracy": 0.6634560376405716, "num_tokens": 2304111132.0, "step": 13737 }, { "entropy": 1.6926419138908386, "epoch": 1.5091867842135618, "grad_norm": 0.6934045553207397, "learning_rate": 4.697754575048223e-06, "loss": 1.4896, "mean_token_accuracy": 0.6502173244953156, "num_tokens": 2304281050.0, "step": 13738 }, { "entropy": 1.7066146433353424, "epoch": 1.509296641124935, "grad_norm": 0.7495198249816895, "learning_rate": 4.696611631790665e-06, "loss": 1.3706, "mean_token_accuracy": 0.6793716996908188, "num_tokens": 2304436350.0, "step": 13739 }, { "entropy": 1.7890824973583221, "epoch": 1.5094064980363076, "grad_norm": 0.7411239743232727, "learning_rate": 4.695468888032513e-06, "loss": 1.4913, "mean_token_accuracy": 0.6336054851611456, "num_tokens": 2304575159.0, "step": 13740 }, { "entropy": 1.7109392881393433, "epoch": 1.5095163549476807, "grad_norm": 0.6763020157814026, "learning_rate": 4.694326343809929e-06, "loss": 1.4573, "mean_token_accuracy": 0.6470163067181905, "num_tokens": 2304755316.0, "step": 13741 }, { "entropy": 1.6757989923159282, "epoch": 1.5096262118590535, "grad_norm": 0.6875401139259338, "learning_rate": 4.693183999159073e-06, "loss": 1.4078, "mean_token_accuracy": 0.6494525969028473, "num_tokens": 2304937028.0, "step": 13742 }, { "entropy": 1.7914471526940663, "epoch": 1.5097360687704264, "grad_norm": 0.8657746911048889, "learning_rate": 4.692041854116101e-06, "loss": 1.4989, "mean_token_accuracy": 0.6594147632519404, "num_tokens": 2305111526.0, "step": 13743 }, { "entropy": 1.7403202851613362, "epoch": 1.5098459256817995, "grad_norm": 0.6036834716796875, "learning_rate": 4.6908999087171645e-06, "loss": 1.3853, "mean_token_accuracy": 0.6570370892683665, "num_tokens": 2305273638.0, "step": 13744 }, { "entropy": 1.699168711900711, "epoch": 1.5099557825931724, "grad_norm": 0.6418320536613464, "learning_rate": 4.689758162998403e-06, "loss": 1.4483, "mean_token_accuracy": 0.6439376026391983, "num_tokens": 2305466484.0, "step": 13745 }, { "entropy": 1.7933777173360188, "epoch": 1.5100656395045453, "grad_norm": 0.7398175001144409, "learning_rate": 4.688616616995949e-06, "loss": 1.3478, "mean_token_accuracy": 0.6565447101990382, "num_tokens": 2305625923.0, "step": 13746 }, { "entropy": 1.712761531273524, "epoch": 1.5101754964159184, "grad_norm": 0.5988736152648926, "learning_rate": 4.687475270745939e-06, "loss": 1.4272, "mean_token_accuracy": 0.6635372291008631, "num_tokens": 2305795495.0, "step": 13747 }, { "entropy": 1.6717861990133922, "epoch": 1.510285353327291, "grad_norm": 0.6338712573051453, "learning_rate": 4.686334124284489e-06, "loss": 1.4542, "mean_token_accuracy": 0.6616056164105734, "num_tokens": 2306022578.0, "step": 13748 }, { "entropy": 1.6783255338668823, "epoch": 1.5103952102386642, "grad_norm": 0.7062821984291077, "learning_rate": 4.685193177647721e-06, "loss": 1.4289, "mean_token_accuracy": 0.6619517654180527, "num_tokens": 2306208025.0, "step": 13749 }, { "entropy": 1.7133217950661976, "epoch": 1.510505067150037, "grad_norm": 0.8360413312911987, "learning_rate": 4.684052430871744e-06, "loss": 1.3356, "mean_token_accuracy": 0.6822609504063925, "num_tokens": 2306394670.0, "step": 13750 }, { "entropy": 1.7764535943667095, "epoch": 1.51061492406141, "grad_norm": 0.7038290500640869, "learning_rate": 4.682911883992659e-06, "loss": 1.3857, "mean_token_accuracy": 0.6598734309275945, "num_tokens": 2306545028.0, "step": 13751 }, { "entropy": 1.7245705723762512, "epoch": 1.510724780972783, "grad_norm": 0.6591954231262207, "learning_rate": 4.681771537046568e-06, "loss": 1.3675, "mean_token_accuracy": 0.6651880691448847, "num_tokens": 2306708834.0, "step": 13752 }, { "entropy": 1.7273605664571126, "epoch": 1.5108346378841557, "grad_norm": 0.754412055015564, "learning_rate": 4.680631390069561e-06, "loss": 1.3327, "mean_token_accuracy": 0.665368507305781, "num_tokens": 2306860973.0, "step": 13753 }, { "entropy": 1.6641030311584473, "epoch": 1.5109444947955288, "grad_norm": 0.8478244543075562, "learning_rate": 4.679491443097721e-06, "loss": 1.3195, "mean_token_accuracy": 0.6591992974281311, "num_tokens": 2307024826.0, "step": 13754 }, { "entropy": 1.7281495829423268, "epoch": 1.5110543517069017, "grad_norm": 0.8327801823616028, "learning_rate": 4.678351696167129e-06, "loss": 1.2827, "mean_token_accuracy": 0.66518135368824, "num_tokens": 2307155483.0, "step": 13755 }, { "entropy": 1.676687588294347, "epoch": 1.5111642086182746, "grad_norm": 0.6652231812477112, "learning_rate": 4.677212149313859e-06, "loss": 1.3258, "mean_token_accuracy": 0.6645462463299433, "num_tokens": 2307328046.0, "step": 13756 }, { "entropy": 1.7564114332199097, "epoch": 1.5112740655296477, "grad_norm": 0.6835631132125854, "learning_rate": 4.676072802573976e-06, "loss": 1.3074, "mean_token_accuracy": 0.6630533536275228, "num_tokens": 2307471442.0, "step": 13757 }, { "entropy": 1.7643627524375916, "epoch": 1.5113839224410206, "grad_norm": 0.7151616215705872, "learning_rate": 4.674933655983535e-06, "loss": 1.4011, "mean_token_accuracy": 0.6479361802339554, "num_tokens": 2307601581.0, "step": 13758 }, { "entropy": 1.7416835725307465, "epoch": 1.5114937793523935, "grad_norm": 0.6428912878036499, "learning_rate": 4.673794709578598e-06, "loss": 1.5211, "mean_token_accuracy": 0.6297041177749634, "num_tokens": 2307869204.0, "step": 13759 }, { "entropy": 1.7405222256978352, "epoch": 1.5116036362637666, "grad_norm": 0.7016375660896301, "learning_rate": 4.672655963395205e-06, "loss": 1.3975, "mean_token_accuracy": 0.6664147426684698, "num_tokens": 2308073195.0, "step": 13760 }, { "entropy": 1.6804000735282898, "epoch": 1.5117134931751393, "grad_norm": 0.6481202840805054, "learning_rate": 4.671517417469402e-06, "loss": 1.496, "mean_token_accuracy": 0.6482650935649872, "num_tokens": 2308239309.0, "step": 13761 }, { "entropy": 1.7526653309663136, "epoch": 1.5118233500865124, "grad_norm": 0.7293574213981628, "learning_rate": 4.670379071837221e-06, "loss": 1.4853, "mean_token_accuracy": 0.6492472440004349, "num_tokens": 2308401820.0, "step": 13762 }, { "entropy": 1.7497336467107136, "epoch": 1.5119332069978852, "grad_norm": 0.6734301447868347, "learning_rate": 4.6692409265346876e-06, "loss": 1.3734, "mean_token_accuracy": 0.6600970327854156, "num_tokens": 2308542755.0, "step": 13763 }, { "entropy": 1.701109786828359, "epoch": 1.5120430639092581, "grad_norm": 0.8500663042068481, "learning_rate": 4.668102981597828e-06, "loss": 1.5474, "mean_token_accuracy": 0.6489445865154266, "num_tokens": 2308734003.0, "step": 13764 }, { "entropy": 1.723372757434845, "epoch": 1.5121529208206312, "grad_norm": 0.6357081532478333, "learning_rate": 4.666965237062657e-06, "loss": 1.3514, "mean_token_accuracy": 0.6554248780012131, "num_tokens": 2308878086.0, "step": 13765 }, { "entropy": 1.702651709318161, "epoch": 1.512262777732004, "grad_norm": 0.7335965633392334, "learning_rate": 4.66582769296518e-06, "loss": 1.3079, "mean_token_accuracy": 0.6714355101188024, "num_tokens": 2308988685.0, "step": 13766 }, { "entropy": 1.710147311290105, "epoch": 1.512372634643377, "grad_norm": 0.7795404195785522, "learning_rate": 4.664690349341402e-06, "loss": 1.4638, "mean_token_accuracy": 0.659597784280777, "num_tokens": 2309151111.0, "step": 13767 }, { "entropy": 1.7290584842363994, "epoch": 1.51248249155475, "grad_norm": 0.6968294382095337, "learning_rate": 4.663553206227321e-06, "loss": 1.3245, "mean_token_accuracy": 0.6631123870611191, "num_tokens": 2309280184.0, "step": 13768 }, { "entropy": 1.7158755660057068, "epoch": 1.5125923484661228, "grad_norm": 0.6981979608535767, "learning_rate": 4.662416263658927e-06, "loss": 1.3123, "mean_token_accuracy": 0.6700327694416046, "num_tokens": 2309436766.0, "step": 13769 }, { "entropy": 1.7104793687661488, "epoch": 1.512702205377496, "grad_norm": 0.7425520420074463, "learning_rate": 4.661279521672199e-06, "loss": 1.4128, "mean_token_accuracy": 0.6677025308211645, "num_tokens": 2309588572.0, "step": 13770 }, { "entropy": 1.610454837481181, "epoch": 1.5128120622888688, "grad_norm": 0.6415528655052185, "learning_rate": 4.660142980303121e-06, "loss": 1.2953, "mean_token_accuracy": 0.6645541985829672, "num_tokens": 2309754449.0, "step": 13771 }, { "entropy": 1.6928213934103649, "epoch": 1.5129219192002417, "grad_norm": 0.6795879602432251, "learning_rate": 4.659006639587659e-06, "loss": 1.4469, "mean_token_accuracy": 0.6411414295434952, "num_tokens": 2309947529.0, "step": 13772 }, { "entropy": 1.7953710655371349, "epoch": 1.5130317761116148, "grad_norm": 0.6941425800323486, "learning_rate": 4.657870499561781e-06, "loss": 1.5126, "mean_token_accuracy": 0.629363218943278, "num_tokens": 2310124867.0, "step": 13773 }, { "entropy": 1.6879153450330098, "epoch": 1.5131416330229874, "grad_norm": 0.6987261772155762, "learning_rate": 4.656734560261445e-06, "loss": 1.2105, "mean_token_accuracy": 0.6808223128318787, "num_tokens": 2310246484.0, "step": 13774 }, { "entropy": 1.6733311613400776, "epoch": 1.5132514899343605, "grad_norm": 0.6962072253227234, "learning_rate": 4.655598821722597e-06, "loss": 1.3406, "mean_token_accuracy": 0.6601444731156031, "num_tokens": 2310381379.0, "step": 13775 }, { "entropy": 1.6864116390546162, "epoch": 1.5133613468457334, "grad_norm": 0.6691348552703857, "learning_rate": 4.654463283981193e-06, "loss": 1.3422, "mean_token_accuracy": 0.6588364889224371, "num_tokens": 2310624298.0, "step": 13776 }, { "entropy": 1.6842545072237651, "epoch": 1.5134712037571063, "grad_norm": 0.7775861024856567, "learning_rate": 4.653327947073165e-06, "loss": 1.3162, "mean_token_accuracy": 0.665345624089241, "num_tokens": 2310810645.0, "step": 13777 }, { "entropy": 1.6352218687534332, "epoch": 1.5135810606684794, "grad_norm": 0.626237690448761, "learning_rate": 4.652192811034445e-06, "loss": 1.3978, "mean_token_accuracy": 0.6582045257091522, "num_tokens": 2311014180.0, "step": 13778 }, { "entropy": 1.642144391934077, "epoch": 1.513690917579852, "grad_norm": 0.7304378747940063, "learning_rate": 4.651057875900964e-06, "loss": 1.4529, "mean_token_accuracy": 0.6429022600253423, "num_tokens": 2311188278.0, "step": 13779 }, { "entropy": 1.7175097266832988, "epoch": 1.5138007744912252, "grad_norm": 0.6320850253105164, "learning_rate": 4.649923141708639e-06, "loss": 1.4223, "mean_token_accuracy": 0.6590802123149236, "num_tokens": 2311368476.0, "step": 13780 }, { "entropy": 1.6938395003477733, "epoch": 1.513910631402598, "grad_norm": 0.6381179690361023, "learning_rate": 4.648788608493388e-06, "loss": 1.3746, "mean_token_accuracy": 0.6710842897494634, "num_tokens": 2311553528.0, "step": 13781 }, { "entropy": 1.7335260311762493, "epoch": 1.514020488313971, "grad_norm": 0.7260196805000305, "learning_rate": 4.647654276291114e-06, "loss": 1.3556, "mean_token_accuracy": 0.654750128587087, "num_tokens": 2311740346.0, "step": 13782 }, { "entropy": 1.793048232793808, "epoch": 1.514130345225344, "grad_norm": 0.7554465532302856, "learning_rate": 4.646520145137719e-06, "loss": 1.5026, "mean_token_accuracy": 0.6503528704245886, "num_tokens": 2311938873.0, "step": 13783 }, { "entropy": 1.7190141479174297, "epoch": 1.514240202136717, "grad_norm": 0.7326686382293701, "learning_rate": 4.645386215069097e-06, "loss": 1.423, "mean_token_accuracy": 0.6645475178956985, "num_tokens": 2312120733.0, "step": 13784 }, { "entropy": 1.676575392484665, "epoch": 1.5143500590480898, "grad_norm": 0.6733846068382263, "learning_rate": 4.644252486121145e-06, "loss": 1.3577, "mean_token_accuracy": 0.6720403631528219, "num_tokens": 2312307679.0, "step": 13785 }, { "entropy": 1.7140410840511322, "epoch": 1.514459915959463, "grad_norm": 0.6526421904563904, "learning_rate": 4.643118958329731e-06, "loss": 1.3907, "mean_token_accuracy": 0.6629425088564554, "num_tokens": 2312492173.0, "step": 13786 }, { "entropy": 1.6425736447175343, "epoch": 1.5145697728708356, "grad_norm": 0.7509266138076782, "learning_rate": 4.641985631730737e-06, "loss": 1.4446, "mean_token_accuracy": 0.6570387085278829, "num_tokens": 2312697867.0, "step": 13787 }, { "entropy": 1.730595697959264, "epoch": 1.5146796297822087, "grad_norm": 10.333463668823242, "learning_rate": 4.640852506360037e-06, "loss": 1.1871, "mean_token_accuracy": 0.6863798399766287, "num_tokens": 2312869978.0, "step": 13788 }, { "entropy": 1.7071086366971333, "epoch": 1.5147894866935816, "grad_norm": 0.7106054425239563, "learning_rate": 4.639719582253489e-06, "loss": 1.3772, "mean_token_accuracy": 0.6516473690668741, "num_tokens": 2313025921.0, "step": 13789 }, { "entropy": 1.6998887260754902, "epoch": 1.5148993436049545, "grad_norm": 0.536662757396698, "learning_rate": 4.638586859446947e-06, "loss": 1.4427, "mean_token_accuracy": 0.6548623442649841, "num_tokens": 2313202721.0, "step": 13790 }, { "entropy": 1.7144613564014435, "epoch": 1.5150092005163276, "grad_norm": 0.8173153400421143, "learning_rate": 4.637454337976267e-06, "loss": 1.3728, "mean_token_accuracy": 0.6525384138027827, "num_tokens": 2313375827.0, "step": 13791 }, { "entropy": 1.668924331665039, "epoch": 1.5151190574277003, "grad_norm": 0.6728224754333496, "learning_rate": 4.636322017877289e-06, "loss": 1.3425, "mean_token_accuracy": 0.6699813405672709, "num_tokens": 2313559355.0, "step": 13792 }, { "entropy": 1.6745288372039795, "epoch": 1.5152289143390734, "grad_norm": 0.6349695920944214, "learning_rate": 4.6351898991858526e-06, "loss": 1.274, "mean_token_accuracy": 0.6697569986184438, "num_tokens": 2313698496.0, "step": 13793 }, { "entropy": 1.7029302318890889, "epoch": 1.5153387712504462, "grad_norm": 0.6972919702529907, "learning_rate": 4.6340579819377885e-06, "loss": 1.4831, "mean_token_accuracy": 0.6308980584144592, "num_tokens": 2313906683.0, "step": 13794 }, { "entropy": 1.701465239127477, "epoch": 1.5154486281618191, "grad_norm": 0.6877496242523193, "learning_rate": 4.632926266168918e-06, "loss": 1.2802, "mean_token_accuracy": 0.6722718824942907, "num_tokens": 2314039947.0, "step": 13795 }, { "entropy": 1.755535235007604, "epoch": 1.5155584850731922, "grad_norm": 0.6645422577857971, "learning_rate": 4.631794751915063e-06, "loss": 1.5432, "mean_token_accuracy": 0.6427832990884781, "num_tokens": 2314236598.0, "step": 13796 }, { "entropy": 1.8075012763341267, "epoch": 1.5156683419845651, "grad_norm": 0.7589188814163208, "learning_rate": 4.630663439212039e-06, "loss": 1.6916, "mean_token_accuracy": 0.6184907828768095, "num_tokens": 2314460621.0, "step": 13797 }, { "entropy": 1.6989454329013824, "epoch": 1.515778198895938, "grad_norm": 0.604321300983429, "learning_rate": 4.629532328095641e-06, "loss": 1.3933, "mean_token_accuracy": 0.6470302095015844, "num_tokens": 2314660461.0, "step": 13798 }, { "entropy": 1.6902850965658824, "epoch": 1.5158880558073111, "grad_norm": 0.7460362911224365, "learning_rate": 4.628401418601675e-06, "loss": 1.4371, "mean_token_accuracy": 0.671076680223147, "num_tokens": 2314804887.0, "step": 13799 }, { "entropy": 1.7061065534750621, "epoch": 1.5159979127186838, "grad_norm": 0.6932515501976013, "learning_rate": 4.627270710765935e-06, "loss": 1.2638, "mean_token_accuracy": 0.6705667823553085, "num_tokens": 2314930379.0, "step": 13800 }, { "entropy": 1.6936753690242767, "epoch": 1.516107769630057, "grad_norm": 0.6244261860847473, "learning_rate": 4.626140204624207e-06, "loss": 1.4434, "mean_token_accuracy": 0.6529469887415568, "num_tokens": 2315130329.0, "step": 13801 }, { "entropy": 1.7133234739303589, "epoch": 1.5162176265414298, "grad_norm": 0.7487544417381287, "learning_rate": 4.625009900212265e-06, "loss": 1.3369, "mean_token_accuracy": 0.6595756113529205, "num_tokens": 2315276874.0, "step": 13802 }, { "entropy": 1.6450997491677601, "epoch": 1.5163274834528027, "grad_norm": 0.7113902568817139, "learning_rate": 4.62387979756589e-06, "loss": 1.3556, "mean_token_accuracy": 0.662777175505956, "num_tokens": 2315477438.0, "step": 13803 }, { "entropy": 1.7127596934636433, "epoch": 1.5164373403641758, "grad_norm": 0.7134985327720642, "learning_rate": 4.622749896720845e-06, "loss": 1.4482, "mean_token_accuracy": 0.6417205582062403, "num_tokens": 2315669497.0, "step": 13804 }, { "entropy": 1.659109354019165, "epoch": 1.5165471972755487, "grad_norm": 0.7004081010818481, "learning_rate": 4.621620197712894e-06, "loss": 1.4047, "mean_token_accuracy": 0.6536833544572195, "num_tokens": 2315841211.0, "step": 13805 }, { "entropy": 1.7210937837759654, "epoch": 1.5166570541869215, "grad_norm": 0.6302258372306824, "learning_rate": 4.620490700577788e-06, "loss": 1.6054, "mean_token_accuracy": 0.6396430979172388, "num_tokens": 2316082178.0, "step": 13806 }, { "entropy": 1.7338979343573253, "epoch": 1.5167669110982944, "grad_norm": 0.6911170482635498, "learning_rate": 4.619361405351276e-06, "loss": 1.3438, "mean_token_accuracy": 0.657525877157847, "num_tokens": 2316281274.0, "step": 13807 }, { "entropy": 1.6707657376925151, "epoch": 1.5168767680096673, "grad_norm": 0.5436812043190002, "learning_rate": 4.618232312069102e-06, "loss": 1.3353, "mean_token_accuracy": 0.6588641007741293, "num_tokens": 2316456603.0, "step": 13808 }, { "entropy": 1.7034710347652435, "epoch": 1.5169866249210404, "grad_norm": 0.8173933625221252, "learning_rate": 4.6171034207670005e-06, "loss": 1.2925, "mean_token_accuracy": 0.6720810929934183, "num_tokens": 2316592576.0, "step": 13809 }, { "entropy": 1.6645126938819885, "epoch": 1.5170964818324133, "grad_norm": 0.6110033392906189, "learning_rate": 4.615974731480695e-06, "loss": 1.3464, "mean_token_accuracy": 0.6670532127221426, "num_tokens": 2316743979.0, "step": 13810 }, { "entropy": 1.7283632159233093, "epoch": 1.5172063387437862, "grad_norm": 0.5693283081054688, "learning_rate": 4.614846244245914e-06, "loss": 1.3587, "mean_token_accuracy": 0.661205435792605, "num_tokens": 2316928848.0, "step": 13811 }, { "entropy": 1.7009834845860798, "epoch": 1.5173161956551593, "grad_norm": 0.6659391522407532, "learning_rate": 4.613717959098374e-06, "loss": 1.4906, "mean_token_accuracy": 0.6506659984588623, "num_tokens": 2317098556.0, "step": 13812 }, { "entropy": 1.7381359835465748, "epoch": 1.517426052566532, "grad_norm": 0.8102354407310486, "learning_rate": 4.612589876073785e-06, "loss": 1.3489, "mean_token_accuracy": 0.6604036937157313, "num_tokens": 2317270868.0, "step": 13813 }, { "entropy": 1.7617174784342449, "epoch": 1.517535909477905, "grad_norm": 0.816353440284729, "learning_rate": 4.611461995207843e-06, "loss": 1.4868, "mean_token_accuracy": 0.6646532714366913, "num_tokens": 2317428278.0, "step": 13814 }, { "entropy": 1.7236855427424114, "epoch": 1.517645766389278, "grad_norm": 0.6324769258499146, "learning_rate": 4.610334316536255e-06, "loss": 1.4888, "mean_token_accuracy": 0.6348064343134562, "num_tokens": 2317659506.0, "step": 13815 }, { "entropy": 1.6722245911757152, "epoch": 1.5177556233006508, "grad_norm": 0.737623393535614, "learning_rate": 4.609206840094702e-06, "loss": 1.3835, "mean_token_accuracy": 0.6555936386187872, "num_tokens": 2317836071.0, "step": 13816 }, { "entropy": 1.700161616007487, "epoch": 1.517865480212024, "grad_norm": 0.7109667658805847, "learning_rate": 4.608079565918877e-06, "loss": 1.3775, "mean_token_accuracy": 0.6652699112892151, "num_tokens": 2317969020.0, "step": 13817 }, { "entropy": 1.7235205272833507, "epoch": 1.5179753371233968, "grad_norm": 0.6419909000396729, "learning_rate": 4.606952494044452e-06, "loss": 1.4529, "mean_token_accuracy": 0.6587186654408773, "num_tokens": 2318125925.0, "step": 13818 }, { "entropy": 1.7614581386248271, "epoch": 1.5180851940347697, "grad_norm": 0.7397036552429199, "learning_rate": 4.605825624507097e-06, "loss": 1.2682, "mean_token_accuracy": 0.6707401523987452, "num_tokens": 2318235404.0, "step": 13819 }, { "entropy": 1.6948228081067402, "epoch": 1.5181950509461426, "grad_norm": 0.6969819068908691, "learning_rate": 4.604698957342484e-06, "loss": 1.3792, "mean_token_accuracy": 0.6615195969740549, "num_tokens": 2318404543.0, "step": 13820 }, { "entropy": 1.6588495473066966, "epoch": 1.5183049078575155, "grad_norm": 0.7565116286277771, "learning_rate": 4.603572492586266e-06, "loss": 1.4351, "mean_token_accuracy": 0.6644074221452078, "num_tokens": 2318598937.0, "step": 13821 }, { "entropy": 1.6587398151556652, "epoch": 1.5184147647688886, "grad_norm": 0.7786286473274231, "learning_rate": 4.602446230274094e-06, "loss": 1.3448, "mean_token_accuracy": 0.6546828200419744, "num_tokens": 2318769448.0, "step": 13822 }, { "entropy": 1.6828916768232982, "epoch": 1.5185246216802615, "grad_norm": 0.6554774045944214, "learning_rate": 4.601320170441616e-06, "loss": 1.3457, "mean_token_accuracy": 0.6564191430807114, "num_tokens": 2318939196.0, "step": 13823 }, { "entropy": 1.7403921981652577, "epoch": 1.5186344785916344, "grad_norm": 0.6817828416824341, "learning_rate": 4.6001943131244745e-06, "loss": 1.4085, "mean_token_accuracy": 0.650087426106135, "num_tokens": 2319099808.0, "step": 13824 }, { "entropy": 1.7434031864007313, "epoch": 1.5187443355030075, "grad_norm": 0.753669798374176, "learning_rate": 4.5990686583582985e-06, "loss": 1.3568, "mean_token_accuracy": 0.6551444629828135, "num_tokens": 2319261658.0, "step": 13825 }, { "entropy": 1.7295333445072174, "epoch": 1.5188541924143801, "grad_norm": 0.6525160670280457, "learning_rate": 4.597943206178712e-06, "loss": 1.3787, "mean_token_accuracy": 0.6602396667003632, "num_tokens": 2319392048.0, "step": 13826 }, { "entropy": 1.6922452350457509, "epoch": 1.5189640493257532, "grad_norm": 0.6596596837043762, "learning_rate": 4.596817956621342e-06, "loss": 1.4606, "mean_token_accuracy": 0.6499723295370737, "num_tokens": 2319591828.0, "step": 13827 }, { "entropy": 1.7007356186707814, "epoch": 1.5190739062371261, "grad_norm": 0.675512433052063, "learning_rate": 4.595692909721794e-06, "loss": 1.4131, "mean_token_accuracy": 0.6725195000569025, "num_tokens": 2319750099.0, "step": 13828 }, { "entropy": 1.7956644495328267, "epoch": 1.519183763148499, "grad_norm": 0.7226914763450623, "learning_rate": 4.5945680655156835e-06, "loss": 1.5228, "mean_token_accuracy": 0.6371510376532873, "num_tokens": 2319933057.0, "step": 13829 }, { "entropy": 1.7390548785527546, "epoch": 1.5192936200598721, "grad_norm": 0.7265375852584839, "learning_rate": 4.593443424038608e-06, "loss": 1.1547, "mean_token_accuracy": 0.6949647714694341, "num_tokens": 2320025210.0, "step": 13830 }, { "entropy": 1.6455240448315938, "epoch": 1.519403476971245, "grad_norm": 0.6959369778633118, "learning_rate": 4.592318985326158e-06, "loss": 1.2745, "mean_token_accuracy": 0.6683288365602493, "num_tokens": 2320188621.0, "step": 13831 }, { "entropy": 1.7331445614496868, "epoch": 1.519513333882618, "grad_norm": 0.7264030575752258, "learning_rate": 4.591194749413927e-06, "loss": 1.4339, "mean_token_accuracy": 0.6577616731325785, "num_tokens": 2320334001.0, "step": 13832 }, { "entropy": 1.6974613467852275, "epoch": 1.5196231907939908, "grad_norm": 0.709572970867157, "learning_rate": 4.590070716337495e-06, "loss": 1.3339, "mean_token_accuracy": 0.6570964654286703, "num_tokens": 2320480424.0, "step": 13833 }, { "entropy": 1.6866892476876576, "epoch": 1.5197330477053637, "grad_norm": 0.6560536623001099, "learning_rate": 4.588946886132433e-06, "loss": 1.3631, "mean_token_accuracy": 0.6670355498790741, "num_tokens": 2320674360.0, "step": 13834 }, { "entropy": 1.7293624182542164, "epoch": 1.5198429046167368, "grad_norm": 0.758017897605896, "learning_rate": 4.587823258834313e-06, "loss": 1.3618, "mean_token_accuracy": 0.6667436609665552, "num_tokens": 2320819919.0, "step": 13835 }, { "entropy": 1.6839136183261871, "epoch": 1.5199527615281097, "grad_norm": 0.6840667724609375, "learning_rate": 4.5866998344787e-06, "loss": 1.2936, "mean_token_accuracy": 0.6722035010655721, "num_tokens": 2321002330.0, "step": 13836 }, { "entropy": 1.6908271114031475, "epoch": 1.5200626184394825, "grad_norm": 0.7176492810249329, "learning_rate": 4.585576613101149e-06, "loss": 1.3865, "mean_token_accuracy": 0.6679946233828863, "num_tokens": 2321179746.0, "step": 13837 }, { "entropy": 1.6899594763914745, "epoch": 1.5201724753508556, "grad_norm": 0.6602792739868164, "learning_rate": 4.5844535947372066e-06, "loss": 1.3103, "mean_token_accuracy": 0.6629238277673721, "num_tokens": 2321320167.0, "step": 13838 }, { "entropy": 1.6667678654193878, "epoch": 1.5202823322622283, "grad_norm": 0.6572140455245972, "learning_rate": 4.583330779422415e-06, "loss": 1.2763, "mean_token_accuracy": 0.678403819600741, "num_tokens": 2321461364.0, "step": 13839 }, { "entropy": 1.6754031876722972, "epoch": 1.5203921891736014, "grad_norm": 0.7893751859664917, "learning_rate": 4.582208167192312e-06, "loss": 1.4581, "mean_token_accuracy": 0.6384941240151724, "num_tokens": 2321661021.0, "step": 13840 }, { "entropy": 1.7845016022523243, "epoch": 1.5205020460849743, "grad_norm": 0.7893801331520081, "learning_rate": 4.581085758082434e-06, "loss": 1.4384, "mean_token_accuracy": 0.6567031691471735, "num_tokens": 2321784876.0, "step": 13841 }, { "entropy": 1.6785170336564381, "epoch": 1.5206119029963472, "grad_norm": 0.6259093284606934, "learning_rate": 4.579963552128294e-06, "loss": 1.3312, "mean_token_accuracy": 0.6632338911294937, "num_tokens": 2321935360.0, "step": 13842 }, { "entropy": 1.696050186951955, "epoch": 1.5207217599077203, "grad_norm": 0.6764148473739624, "learning_rate": 4.578841549365415e-06, "loss": 1.3908, "mean_token_accuracy": 0.6596755584081014, "num_tokens": 2322083641.0, "step": 13843 }, { "entropy": 1.6868244409561157, "epoch": 1.5208316168190932, "grad_norm": 0.7002372741699219, "learning_rate": 4.57771974982931e-06, "loss": 1.3793, "mean_token_accuracy": 0.6510612765947977, "num_tokens": 2322237455.0, "step": 13844 }, { "entropy": 1.7343276540438335, "epoch": 1.520941473730466, "grad_norm": 0.8684859275817871, "learning_rate": 4.576598153555481e-06, "loss": 1.436, "mean_token_accuracy": 0.6581882784763972, "num_tokens": 2322398245.0, "step": 13845 }, { "entropy": 1.658086081345876, "epoch": 1.521051330641839, "grad_norm": 0.7486425042152405, "learning_rate": 4.575476760579422e-06, "loss": 1.4714, "mean_token_accuracy": 0.6543067147334417, "num_tokens": 2322602433.0, "step": 13846 }, { "entropy": 1.7451417048772175, "epoch": 1.5211611875532118, "grad_norm": 0.6304759979248047, "learning_rate": 4.574355570936633e-06, "loss": 1.4442, "mean_token_accuracy": 0.6402058055003484, "num_tokens": 2322787006.0, "step": 13847 }, { "entropy": 1.7013601462046306, "epoch": 1.521271044464585, "grad_norm": 0.8568814396858215, "learning_rate": 4.573234584662592e-06, "loss": 1.3864, "mean_token_accuracy": 0.6570970316727957, "num_tokens": 2322948041.0, "step": 13848 }, { "entropy": 1.699530432621638, "epoch": 1.5213809013759578, "grad_norm": 0.8710819482803345, "learning_rate": 4.572113801792783e-06, "loss": 1.4918, "mean_token_accuracy": 0.6583302120367686, "num_tokens": 2323150595.0, "step": 13849 }, { "entropy": 1.6952558259169261, "epoch": 1.5214907582873307, "grad_norm": 0.6915019154548645, "learning_rate": 4.570993222362674e-06, "loss": 1.3737, "mean_token_accuracy": 0.6718220909436544, "num_tokens": 2323287484.0, "step": 13850 }, { "entropy": 1.7063826123873393, "epoch": 1.5216006151987038, "grad_norm": 0.740079939365387, "learning_rate": 4.569872846407732e-06, "loss": 1.4068, "mean_token_accuracy": 0.6677322387695312, "num_tokens": 2323439617.0, "step": 13851 }, { "entropy": 1.7466611762841542, "epoch": 1.5217104721100765, "grad_norm": 0.6274285912513733, "learning_rate": 4.568752673963416e-06, "loss": 1.4659, "mean_token_accuracy": 0.6516986141602198, "num_tokens": 2323617361.0, "step": 13852 }, { "entropy": 1.7027616401513417, "epoch": 1.5218203290214496, "grad_norm": 0.57676762342453, "learning_rate": 4.567632705065186e-06, "loss": 1.5237, "mean_token_accuracy": 0.6254571576913198, "num_tokens": 2323812936.0, "step": 13853 }, { "entropy": 1.7192297577857971, "epoch": 1.5219301859328225, "grad_norm": 0.7230188846588135, "learning_rate": 4.566512939748476e-06, "loss": 1.4566, "mean_token_accuracy": 0.6502372076114019, "num_tokens": 2323949563.0, "step": 13854 }, { "entropy": 1.6509900987148285, "epoch": 1.5220400428441954, "grad_norm": 0.5848521590232849, "learning_rate": 4.565393378048733e-06, "loss": 1.5132, "mean_token_accuracy": 0.6319101750850677, "num_tokens": 2324180972.0, "step": 13855 }, { "entropy": 1.7650962670644124, "epoch": 1.5221498997555685, "grad_norm": 0.720257580280304, "learning_rate": 4.564274020001393e-06, "loss": 1.3704, "mean_token_accuracy": 0.6599539568026861, "num_tokens": 2324303927.0, "step": 13856 }, { "entropy": 1.7729399303595226, "epoch": 1.5222597566669414, "grad_norm": 0.6432046294212341, "learning_rate": 4.56315486564188e-06, "loss": 1.4875, "mean_token_accuracy": 0.6416818896929423, "num_tokens": 2324492636.0, "step": 13857 }, { "entropy": 1.752627670764923, "epoch": 1.5223696135783142, "grad_norm": 0.6898931264877319, "learning_rate": 4.562035915005611e-06, "loss": 1.3701, "mean_token_accuracy": 0.659583792090416, "num_tokens": 2324610878.0, "step": 13858 }, { "entropy": 1.730319658915202, "epoch": 1.5224794704896873, "grad_norm": 0.754682183265686, "learning_rate": 4.560917168128009e-06, "loss": 1.4008, "mean_token_accuracy": 0.6601613610982895, "num_tokens": 2324771008.0, "step": 13859 }, { "entropy": 1.6441446642080944, "epoch": 1.52258932740106, "grad_norm": 0.6233265399932861, "learning_rate": 4.559798625044473e-06, "loss": 1.3951, "mean_token_accuracy": 0.6597211956977844, "num_tokens": 2324942810.0, "step": 13860 }, { "entropy": 1.7084046204884846, "epoch": 1.5226991843124331, "grad_norm": 0.6157132983207703, "learning_rate": 4.558680285790413e-06, "loss": 1.2911, "mean_token_accuracy": 0.6648881336053213, "num_tokens": 2325106728.0, "step": 13861 }, { "entropy": 1.6908452014128368, "epoch": 1.522809041223806, "grad_norm": 0.6794646382331848, "learning_rate": 4.557562150401218e-06, "loss": 1.438, "mean_token_accuracy": 0.6520710190137228, "num_tokens": 2325273843.0, "step": 13862 }, { "entropy": 1.7190004388491313, "epoch": 1.5229188981351789, "grad_norm": 0.7677698731422424, "learning_rate": 4.556444218912275e-06, "loss": 1.4297, "mean_token_accuracy": 0.6583776374657949, "num_tokens": 2325414563.0, "step": 13863 }, { "entropy": 1.7089968224366505, "epoch": 1.523028755046552, "grad_norm": 0.6367853283882141, "learning_rate": 4.55532649135897e-06, "loss": 1.4434, "mean_token_accuracy": 0.6484352995951971, "num_tokens": 2325584116.0, "step": 13864 }, { "entropy": 1.6946298082669575, "epoch": 1.5231386119579247, "grad_norm": 0.7599850296974182, "learning_rate": 4.554208967776681e-06, "loss": 1.2423, "mean_token_accuracy": 0.6795276800791422, "num_tokens": 2325727142.0, "step": 13865 }, { "entropy": 1.6953876912593842, "epoch": 1.5232484688692978, "grad_norm": 0.7022786736488342, "learning_rate": 4.553091648200771e-06, "loss": 1.443, "mean_token_accuracy": 0.6698449452718099, "num_tokens": 2325866024.0, "step": 13866 }, { "entropy": 1.786557177702586, "epoch": 1.5233583257806707, "grad_norm": 0.8300307393074036, "learning_rate": 4.551974532666602e-06, "loss": 1.5473, "mean_token_accuracy": 0.62337193886439, "num_tokens": 2326045071.0, "step": 13867 }, { "entropy": 1.7563962737719219, "epoch": 1.5234681826920435, "grad_norm": 0.8392931222915649, "learning_rate": 4.550857621209538e-06, "loss": 1.4791, "mean_token_accuracy": 0.6514745354652405, "num_tokens": 2326224405.0, "step": 13868 }, { "entropy": 1.6940257251262665, "epoch": 1.5235780396034166, "grad_norm": 0.6167490482330322, "learning_rate": 4.549740913864926e-06, "loss": 1.3881, "mean_token_accuracy": 0.659416675567627, "num_tokens": 2326372794.0, "step": 13869 }, { "entropy": 1.7137708365917206, "epoch": 1.5236878965147895, "grad_norm": 0.7310816645622253, "learning_rate": 4.5486244106681025e-06, "loss": 1.4028, "mean_token_accuracy": 0.6513861964146296, "num_tokens": 2326531861.0, "step": 13870 }, { "entropy": 1.6824666062990825, "epoch": 1.5237977534261624, "grad_norm": 0.7571474313735962, "learning_rate": 4.547508111654412e-06, "loss": 1.4133, "mean_token_accuracy": 0.6456713875134786, "num_tokens": 2326753188.0, "step": 13871 }, { "entropy": 1.6938877006371815, "epoch": 1.5239076103375355, "grad_norm": 0.6758849620819092, "learning_rate": 4.546392016859181e-06, "loss": 1.3251, "mean_token_accuracy": 0.6616143981615702, "num_tokens": 2326905195.0, "step": 13872 }, { "entropy": 1.6427725851535797, "epoch": 1.5240174672489082, "grad_norm": 0.7181432843208313, "learning_rate": 4.545276126317736e-06, "loss": 1.3627, "mean_token_accuracy": 0.6573081215222677, "num_tokens": 2327094426.0, "step": 13873 }, { "entropy": 1.7088764309883118, "epoch": 1.5241273241602813, "grad_norm": 0.7042227387428284, "learning_rate": 4.544160440065394e-06, "loss": 1.5437, "mean_token_accuracy": 0.6491079305609068, "num_tokens": 2327227303.0, "step": 13874 }, { "entropy": 1.7235973974068959, "epoch": 1.5242371810716542, "grad_norm": 0.6834602952003479, "learning_rate": 4.54304495813746e-06, "loss": 1.2941, "mean_token_accuracy": 0.6685907791058222, "num_tokens": 2327358886.0, "step": 13875 }, { "entropy": 1.6720999280611675, "epoch": 1.524347037983027, "grad_norm": 0.5712348818778992, "learning_rate": 4.541929680569246e-06, "loss": 1.4281, "mean_token_accuracy": 0.6634454180796941, "num_tokens": 2327567108.0, "step": 13876 }, { "entropy": 1.6583287914594014, "epoch": 1.5244568948944002, "grad_norm": 0.7057250738143921, "learning_rate": 4.540814607396052e-06, "loss": 1.4087, "mean_token_accuracy": 0.6564200818538666, "num_tokens": 2327755776.0, "step": 13877 }, { "entropy": 1.6734488407770793, "epoch": 1.5245667518057728, "grad_norm": 0.667782187461853, "learning_rate": 4.53969973865316e-06, "loss": 1.328, "mean_token_accuracy": 0.6641414314508438, "num_tokens": 2327934192.0, "step": 13878 }, { "entropy": 1.704680899779002, "epoch": 1.524676608717146, "grad_norm": 0.662611722946167, "learning_rate": 4.538585074375861e-06, "loss": 1.4161, "mean_token_accuracy": 0.6625035852193832, "num_tokens": 2328111156.0, "step": 13879 }, { "entropy": 1.7344763179620106, "epoch": 1.5247864656285188, "grad_norm": 0.6415066123008728, "learning_rate": 4.537470614599434e-06, "loss": 1.3515, "mean_token_accuracy": 0.6630072891712189, "num_tokens": 2328262464.0, "step": 13880 }, { "entropy": 1.7009705603122711, "epoch": 1.5248963225398917, "grad_norm": 0.7482420802116394, "learning_rate": 4.5363563593591505e-06, "loss": 1.4322, "mean_token_accuracy": 0.6513066440820694, "num_tokens": 2328427937.0, "step": 13881 }, { "entropy": 1.69928045074145, "epoch": 1.5250061794512648, "grad_norm": 0.6484189629554749, "learning_rate": 4.5352423086902725e-06, "loss": 1.4215, "mean_token_accuracy": 0.6485116630792618, "num_tokens": 2328606277.0, "step": 13882 }, { "entropy": 1.6935794452826183, "epoch": 1.5251160363626377, "grad_norm": 0.6256346106529236, "learning_rate": 4.534128462628066e-06, "loss": 1.288, "mean_token_accuracy": 0.6697153101364771, "num_tokens": 2328753230.0, "step": 13883 }, { "entropy": 1.684336672226588, "epoch": 1.5252258932740106, "grad_norm": 0.6663857102394104, "learning_rate": 4.533014821207776e-06, "loss": 1.5602, "mean_token_accuracy": 0.6558093825976054, "num_tokens": 2329026991.0, "step": 13884 }, { "entropy": 1.772289623816808, "epoch": 1.5253357501853837, "grad_norm": 0.722428023815155, "learning_rate": 4.531901384464657e-06, "loss": 1.4207, "mean_token_accuracy": 0.6482864121596018, "num_tokens": 2329184506.0, "step": 13885 }, { "entropy": 1.6704554855823517, "epoch": 1.5254456070967564, "grad_norm": 0.7864761352539062, "learning_rate": 4.5307881524339436e-06, "loss": 1.5056, "mean_token_accuracy": 0.6587265928586324, "num_tokens": 2329322566.0, "step": 13886 }, { "entropy": 1.7044276495774586, "epoch": 1.5255554640081295, "grad_norm": 1.0024720430374146, "learning_rate": 4.529675125150868e-06, "loss": 1.2254, "mean_token_accuracy": 0.686733677983284, "num_tokens": 2329467035.0, "step": 13887 }, { "entropy": 1.744888146718343, "epoch": 1.5256653209195024, "grad_norm": 0.9798945784568787, "learning_rate": 4.528562302650661e-06, "loss": 1.4146, "mean_token_accuracy": 0.6556217769781748, "num_tokens": 2329619365.0, "step": 13888 }, { "entropy": 1.5887063244978588, "epoch": 1.5257751778308752, "grad_norm": 0.6329157948493958, "learning_rate": 4.527449684968542e-06, "loss": 1.3162, "mean_token_accuracy": 0.6746849020322164, "num_tokens": 2329831176.0, "step": 13889 }, { "entropy": 1.7707992394765217, "epoch": 1.5258850347422483, "grad_norm": 0.7070748805999756, "learning_rate": 4.5263372721397205e-06, "loss": 1.4715, "mean_token_accuracy": 0.6502692202727, "num_tokens": 2330045897.0, "step": 13890 }, { "entropy": 1.6835823158423107, "epoch": 1.525994891653621, "grad_norm": 0.6027015447616577, "learning_rate": 4.5252250641994066e-06, "loss": 1.4211, "mean_token_accuracy": 0.6563113729159037, "num_tokens": 2330194857.0, "step": 13891 }, { "entropy": 1.6718124349912007, "epoch": 1.5261047485649941, "grad_norm": 0.7228647470474243, "learning_rate": 4.524113061182806e-06, "loss": 1.3283, "mean_token_accuracy": 0.6579829454421997, "num_tokens": 2330352101.0, "step": 13892 }, { "entropy": 1.703097979227702, "epoch": 1.526214605476367, "grad_norm": 0.7070342302322388, "learning_rate": 4.523001263125108e-06, "loss": 1.3875, "mean_token_accuracy": 0.6609033346176147, "num_tokens": 2330512728.0, "step": 13893 }, { "entropy": 1.6350172857443492, "epoch": 1.5263244623877399, "grad_norm": 0.6093044877052307, "learning_rate": 4.5218896700614995e-06, "loss": 1.4077, "mean_token_accuracy": 0.6465565909941992, "num_tokens": 2330771542.0, "step": 13894 }, { "entropy": 1.7364859382311504, "epoch": 1.526434319299113, "grad_norm": 0.7398340106010437, "learning_rate": 4.520778282027166e-06, "loss": 1.4541, "mean_token_accuracy": 0.6436052819093069, "num_tokens": 2330944232.0, "step": 13895 }, { "entropy": 1.6555716196695964, "epoch": 1.5265441762104859, "grad_norm": 0.7023627758026123, "learning_rate": 4.5196670990572775e-06, "loss": 1.3531, "mean_token_accuracy": 0.6626766125361124, "num_tokens": 2331077782.0, "step": 13896 }, { "entropy": 1.6921831766764324, "epoch": 1.5266540331218588, "grad_norm": 0.7476381063461304, "learning_rate": 4.518556121187008e-06, "loss": 1.2434, "mean_token_accuracy": 0.6709140290816625, "num_tokens": 2331204207.0, "step": 13897 }, { "entropy": 1.656636933485667, "epoch": 1.5267638900332319, "grad_norm": 0.7082569003105164, "learning_rate": 4.517445348451517e-06, "loss": 1.3313, "mean_token_accuracy": 0.6678289026021957, "num_tokens": 2331349418.0, "step": 13898 }, { "entropy": 1.6638062099615734, "epoch": 1.5268737469446045, "grad_norm": 0.929499626159668, "learning_rate": 4.516334780885956e-06, "loss": 1.4912, "mean_token_accuracy": 0.6647545297940572, "num_tokens": 2331514470.0, "step": 13899 }, { "entropy": 1.7199692924817402, "epoch": 1.5269836038559776, "grad_norm": 0.7336742877960205, "learning_rate": 4.515224418525481e-06, "loss": 1.4878, "mean_token_accuracy": 0.6436636795600256, "num_tokens": 2331681866.0, "step": 13900 }, { "entropy": 1.706304907798767, "epoch": 1.5270934607673505, "grad_norm": 0.6889383792877197, "learning_rate": 4.51411426140523e-06, "loss": 1.3244, "mean_token_accuracy": 0.6611945678790411, "num_tokens": 2331824203.0, "step": 13901 }, { "entropy": 1.6809849242369335, "epoch": 1.5272033176787234, "grad_norm": 0.7422223687171936, "learning_rate": 4.513004309560339e-06, "loss": 1.2971, "mean_token_accuracy": 0.6715661436319351, "num_tokens": 2332002531.0, "step": 13902 }, { "entropy": 1.7066124081611633, "epoch": 1.5273131745900965, "grad_norm": 0.7259396910667419, "learning_rate": 4.511894563025941e-06, "loss": 1.4524, "mean_token_accuracy": 0.656074732542038, "num_tokens": 2332142038.0, "step": 13903 }, { "entropy": 1.668474902709325, "epoch": 1.5274230315014692, "grad_norm": 0.6683173179626465, "learning_rate": 4.510785021837152e-06, "loss": 1.3409, "mean_token_accuracy": 0.6598901102940241, "num_tokens": 2332282595.0, "step": 13904 }, { "entropy": 1.6978270014127095, "epoch": 1.5275328884128423, "grad_norm": 0.7189866900444031, "learning_rate": 4.509675686029098e-06, "loss": 1.2363, "mean_token_accuracy": 0.6774458686510721, "num_tokens": 2332402578.0, "step": 13905 }, { "entropy": 1.7065897683302562, "epoch": 1.5276427453242152, "grad_norm": 0.6405203342437744, "learning_rate": 4.508566555636883e-06, "loss": 1.4826, "mean_token_accuracy": 0.643202950557073, "num_tokens": 2332591335.0, "step": 13906 }, { "entropy": 1.6972811818122864, "epoch": 1.527752602235588, "grad_norm": 0.9096667170524597, "learning_rate": 4.507457630695608e-06, "loss": 1.2485, "mean_token_accuracy": 0.6712992439667383, "num_tokens": 2332720592.0, "step": 13907 }, { "entropy": 1.747529496749242, "epoch": 1.5278624591469612, "grad_norm": 0.6828884482383728, "learning_rate": 4.506348911240373e-06, "loss": 1.3577, "mean_token_accuracy": 0.6520382066567739, "num_tokens": 2332905147.0, "step": 13908 }, { "entropy": 1.7126144965489705, "epoch": 1.527972316058334, "grad_norm": 0.7815736532211304, "learning_rate": 4.505240397306276e-06, "loss": 1.3709, "mean_token_accuracy": 0.6534913231929144, "num_tokens": 2333089144.0, "step": 13909 }, { "entropy": 1.6652606030305226, "epoch": 1.528082172969707, "grad_norm": 0.619626522064209, "learning_rate": 4.504132088928387e-06, "loss": 1.3865, "mean_token_accuracy": 0.6694531738758087, "num_tokens": 2333339666.0, "step": 13910 }, { "entropy": 1.6987803876399994, "epoch": 1.52819202988108, "grad_norm": 0.670711874961853, "learning_rate": 4.50302398614179e-06, "loss": 1.3458, "mean_token_accuracy": 0.6516731629769007, "num_tokens": 2333501206.0, "step": 13911 }, { "entropy": 1.7137329777081807, "epoch": 1.5283018867924527, "grad_norm": 0.6769323945045471, "learning_rate": 4.50191608898156e-06, "loss": 1.3155, "mean_token_accuracy": 0.6620151499907175, "num_tokens": 2333669854.0, "step": 13912 }, { "entropy": 1.7096583346525829, "epoch": 1.5284117437038258, "grad_norm": 1.8156622648239136, "learning_rate": 4.500808397482758e-06, "loss": 1.4023, "mean_token_accuracy": 0.6582985719045004, "num_tokens": 2333802912.0, "step": 13913 }, { "entropy": 1.7230294446150463, "epoch": 1.5285216006151987, "grad_norm": 0.8509101867675781, "learning_rate": 4.499700911680438e-06, "loss": 1.3916, "mean_token_accuracy": 0.6567439685265223, "num_tokens": 2333964505.0, "step": 13914 }, { "entropy": 1.6359948416550953, "epoch": 1.5286314575265716, "grad_norm": 0.5980956554412842, "learning_rate": 4.498593631609659e-06, "loss": 1.3394, "mean_token_accuracy": 0.655903235077858, "num_tokens": 2334153055.0, "step": 13915 }, { "entropy": 1.6873148282368977, "epoch": 1.5287413144379447, "grad_norm": 0.7259606719017029, "learning_rate": 4.497486557305457e-06, "loss": 1.4662, "mean_token_accuracy": 0.649679829676946, "num_tokens": 2334300460.0, "step": 13916 }, { "entropy": 1.717822104692459, "epoch": 1.5288511713493174, "grad_norm": 0.6588619947433472, "learning_rate": 4.4963796888028795e-06, "loss": 1.3461, "mean_token_accuracy": 0.6689090430736542, "num_tokens": 2334450903.0, "step": 13917 }, { "entropy": 1.7424447536468506, "epoch": 1.5289610282606905, "grad_norm": 0.6434611082077026, "learning_rate": 4.495273026136955e-06, "loss": 1.4264, "mean_token_accuracy": 0.6540406395991644, "num_tokens": 2334682613.0, "step": 13918 }, { "entropy": 1.701530744632085, "epoch": 1.5290708851720634, "grad_norm": 0.6814815402030945, "learning_rate": 4.494166569342703e-06, "loss": 1.3523, "mean_token_accuracy": 0.6607520679632822, "num_tokens": 2334831018.0, "step": 13919 }, { "entropy": 1.7284032305081685, "epoch": 1.5291807420834362, "grad_norm": 0.6312406063079834, "learning_rate": 4.493060318455149e-06, "loss": 1.3887, "mean_token_accuracy": 0.6543530275424322, "num_tokens": 2334984799.0, "step": 13920 }, { "entropy": 1.6821238696575165, "epoch": 1.5292905989948093, "grad_norm": 0.8389406204223633, "learning_rate": 4.49195427350931e-06, "loss": 1.5546, "mean_token_accuracy": 0.6384274909893671, "num_tokens": 2335183329.0, "step": 13921 }, { "entropy": 1.6805053154627483, "epoch": 1.5294004559061822, "grad_norm": 0.6420016884803772, "learning_rate": 4.49084843454018e-06, "loss": 1.4169, "mean_token_accuracy": 0.6547876248757044, "num_tokens": 2335394113.0, "step": 13922 }, { "entropy": 1.7125700910886128, "epoch": 1.5295103128175551, "grad_norm": 0.6581193804740906, "learning_rate": 4.489742801582763e-06, "loss": 1.4741, "mean_token_accuracy": 0.6349633236726125, "num_tokens": 2335586212.0, "step": 13923 }, { "entropy": 1.6752095818519592, "epoch": 1.5296201697289282, "grad_norm": 0.6447793841362, "learning_rate": 4.488637374672055e-06, "loss": 1.3054, "mean_token_accuracy": 0.6653489669164022, "num_tokens": 2335744802.0, "step": 13924 }, { "entropy": 1.7021776934464772, "epoch": 1.5297300266403009, "grad_norm": 0.7589120864868164, "learning_rate": 4.487532153843042e-06, "loss": 1.3662, "mean_token_accuracy": 0.6518423855304718, "num_tokens": 2335891904.0, "step": 13925 }, { "entropy": 1.7558875183264415, "epoch": 1.529839883551674, "grad_norm": 0.7773360013961792, "learning_rate": 4.4864271391306966e-06, "loss": 1.587, "mean_token_accuracy": 0.6268220792214075, "num_tokens": 2336077877.0, "step": 13926 }, { "entropy": 1.7166087726751964, "epoch": 1.5299497404630469, "grad_norm": 0.7485700845718384, "learning_rate": 4.485322330570001e-06, "loss": 1.5295, "mean_token_accuracy": 0.6402496894200643, "num_tokens": 2336277354.0, "step": 13927 }, { "entropy": 1.6612951358159382, "epoch": 1.5300595973744198, "grad_norm": 0.6849234104156494, "learning_rate": 4.484217728195916e-06, "loss": 1.4499, "mean_token_accuracy": 0.6464580297470093, "num_tokens": 2336483683.0, "step": 13928 }, { "entropy": 1.7225702504316966, "epoch": 1.5301694542857929, "grad_norm": 0.7260335087776184, "learning_rate": 4.483113332043406e-06, "loss": 1.5129, "mean_token_accuracy": 0.6519743303457896, "num_tokens": 2336649224.0, "step": 13929 }, { "entropy": 1.7422150870164235, "epoch": 1.5302793111971655, "grad_norm": 0.7213874459266663, "learning_rate": 4.482009142147423e-06, "loss": 1.4632, "mean_token_accuracy": 0.6494071384270986, "num_tokens": 2336842981.0, "step": 13930 }, { "entropy": 1.7239739795525868, "epoch": 1.5303891681085386, "grad_norm": 1.510428786277771, "learning_rate": 4.48090515854291e-06, "loss": 1.2778, "mean_token_accuracy": 0.6638440688451132, "num_tokens": 2337021650.0, "step": 13931 }, { "entropy": 1.6931169827779133, "epoch": 1.5304990250199115, "grad_norm": 0.6036139130592346, "learning_rate": 4.479801381264812e-06, "loss": 1.3952, "mean_token_accuracy": 0.6582480867703756, "num_tokens": 2337184063.0, "step": 13932 }, { "entropy": 1.7547406653563182, "epoch": 1.5306088819312844, "grad_norm": 0.7975739240646362, "learning_rate": 4.478697810348067e-06, "loss": 1.4196, "mean_token_accuracy": 0.6527342349290848, "num_tokens": 2337334775.0, "step": 13933 }, { "entropy": 1.7274678846200306, "epoch": 1.5307187388426575, "grad_norm": 0.6878901124000549, "learning_rate": 4.477594445827593e-06, "loss": 1.2836, "mean_token_accuracy": 0.6727159321308136, "num_tokens": 2337481459.0, "step": 13934 }, { "entropy": 1.7022731602191925, "epoch": 1.5308285957540304, "grad_norm": 0.5858879685401917, "learning_rate": 4.476491287738315e-06, "loss": 1.4707, "mean_token_accuracy": 0.6447849820057551, "num_tokens": 2337670469.0, "step": 13935 }, { "entropy": 1.7229323883851368, "epoch": 1.5309384526654033, "grad_norm": 0.7864375710487366, "learning_rate": 4.47538833611515e-06, "loss": 1.4633, "mean_token_accuracy": 0.660116657614708, "num_tokens": 2337834873.0, "step": 13936 }, { "entropy": 1.6673548420270283, "epoch": 1.5310483095767764, "grad_norm": 0.6428960561752319, "learning_rate": 4.474285590993006e-06, "loss": 1.2677, "mean_token_accuracy": 0.6740302940209707, "num_tokens": 2337956224.0, "step": 13937 }, { "entropy": 1.710586170355479, "epoch": 1.531158166488149, "grad_norm": 0.7057774066925049, "learning_rate": 4.473183052406779e-06, "loss": 1.5108, "mean_token_accuracy": 0.6455043405294418, "num_tokens": 2338114195.0, "step": 13938 }, { "entropy": 1.8051166435082753, "epoch": 1.5312680233995222, "grad_norm": 0.7137125134468079, "learning_rate": 4.47208072039137e-06, "loss": 1.3484, "mean_token_accuracy": 0.6553170531988144, "num_tokens": 2338255652.0, "step": 13939 }, { "entropy": 1.7347020109494526, "epoch": 1.531377880310895, "grad_norm": 0.6780955195426941, "learning_rate": 4.470978594981662e-06, "loss": 1.2638, "mean_token_accuracy": 0.6686906566222509, "num_tokens": 2338407717.0, "step": 13940 }, { "entropy": 1.7669414679209392, "epoch": 1.531487737222268, "grad_norm": 0.6672480702400208, "learning_rate": 4.4698766762125424e-06, "loss": 1.5253, "mean_token_accuracy": 0.6340082536141077, "num_tokens": 2338614879.0, "step": 13941 }, { "entropy": 1.7260774771372478, "epoch": 1.531597594133641, "grad_norm": 0.7122741937637329, "learning_rate": 4.4687749641188825e-06, "loss": 1.1811, "mean_token_accuracy": 0.6872084339459738, "num_tokens": 2338713200.0, "step": 13942 }, { "entropy": 1.7119547426700592, "epoch": 1.5317074510450137, "grad_norm": 0.6557315587997437, "learning_rate": 4.4676734587355495e-06, "loss": 1.4215, "mean_token_accuracy": 0.6668926427761713, "num_tokens": 2338895649.0, "step": 13943 }, { "entropy": 1.7337224682172139, "epoch": 1.5318173079563868, "grad_norm": 0.688232421875, "learning_rate": 4.466572160097409e-06, "loss": 1.312, "mean_token_accuracy": 0.6711380928754807, "num_tokens": 2339057198.0, "step": 13944 }, { "entropy": 1.735473394393921, "epoch": 1.5319271648677597, "grad_norm": 0.7628278732299805, "learning_rate": 4.46547106823932e-06, "loss": 1.372, "mean_token_accuracy": 0.6484810014565786, "num_tokens": 2339218108.0, "step": 13945 }, { "entropy": 1.6890499293804169, "epoch": 1.5320370217791326, "grad_norm": 0.9208077788352966, "learning_rate": 4.464370183196122e-06, "loss": 1.479, "mean_token_accuracy": 0.6497959345579147, "num_tokens": 2339408501.0, "step": 13946 }, { "entropy": 1.7463324666023254, "epoch": 1.5321468786905057, "grad_norm": 0.8276370167732239, "learning_rate": 4.463269505002663e-06, "loss": 1.4574, "mean_token_accuracy": 0.6638544549544653, "num_tokens": 2339542985.0, "step": 13947 }, { "entropy": 1.671470006306966, "epoch": 1.5322567356018786, "grad_norm": 0.6009271740913391, "learning_rate": 4.462169033693782e-06, "loss": 1.3789, "mean_token_accuracy": 0.6519037485122681, "num_tokens": 2339734500.0, "step": 13948 }, { "entropy": 1.7509864171346028, "epoch": 1.5323665925132515, "grad_norm": 0.7692368626594543, "learning_rate": 4.461068769304303e-06, "loss": 1.3132, "mean_token_accuracy": 0.6650248964627584, "num_tokens": 2339859100.0, "step": 13949 }, { "entropy": 1.7392914295196533, "epoch": 1.5324764494246246, "grad_norm": 0.683042049407959, "learning_rate": 4.45996871186905e-06, "loss": 1.373, "mean_token_accuracy": 0.65072533984979, "num_tokens": 2339994657.0, "step": 13950 }, { "entropy": 1.709182192881902, "epoch": 1.5325863063359972, "grad_norm": 0.6510934829711914, "learning_rate": 4.4588688614228425e-06, "loss": 1.3961, "mean_token_accuracy": 0.654055600365003, "num_tokens": 2340139163.0, "step": 13951 }, { "entropy": 1.6690978010495503, "epoch": 1.5326961632473703, "grad_norm": 0.6553038358688354, "learning_rate": 4.457769218000485e-06, "loss": 1.3819, "mean_token_accuracy": 0.6699864417314529, "num_tokens": 2340285613.0, "step": 13952 }, { "entropy": 1.7530159155527751, "epoch": 1.5328060201587432, "grad_norm": 0.7141207456588745, "learning_rate": 4.456669781636787e-06, "loss": 1.4391, "mean_token_accuracy": 0.6459661523501078, "num_tokens": 2340410082.0, "step": 13953 }, { "entropy": 1.7613732715447743, "epoch": 1.5329158770701161, "grad_norm": 0.6994073987007141, "learning_rate": 4.455570552366541e-06, "loss": 1.4204, "mean_token_accuracy": 0.6491125027338663, "num_tokens": 2340593994.0, "step": 13954 }, { "entropy": 1.642663260300954, "epoch": 1.5330257339814892, "grad_norm": 0.6598994731903076, "learning_rate": 4.454471530224536e-06, "loss": 1.3548, "mean_token_accuracy": 0.6538633108139038, "num_tokens": 2340770191.0, "step": 13955 }, { "entropy": 1.6616478463013966, "epoch": 1.5331355908928619, "grad_norm": 0.6697360277175903, "learning_rate": 4.453372715245557e-06, "loss": 1.3995, "mean_token_accuracy": 0.6621593882640203, "num_tokens": 2340949740.0, "step": 13956 }, { "entropy": 1.723496437072754, "epoch": 1.533245447804235, "grad_norm": 0.7380031943321228, "learning_rate": 4.452274107464388e-06, "loss": 1.4068, "mean_token_accuracy": 0.6533598005771637, "num_tokens": 2341134749.0, "step": 13957 }, { "entropy": 1.7315025826295216, "epoch": 1.5333553047156079, "grad_norm": 0.6608657240867615, "learning_rate": 4.451175706915787e-06, "loss": 1.5382, "mean_token_accuracy": 0.6362046400705973, "num_tokens": 2341397004.0, "step": 13958 }, { "entropy": 1.7039255797863007, "epoch": 1.5334651616269808, "grad_norm": 0.6795163154602051, "learning_rate": 4.450077513634527e-06, "loss": 1.3976, "mean_token_accuracy": 0.6478537817796072, "num_tokens": 2341604517.0, "step": 13959 }, { "entropy": 1.6687067747116089, "epoch": 1.5335750185383539, "grad_norm": 0.7309846878051758, "learning_rate": 4.44897952765536e-06, "loss": 1.5048, "mean_token_accuracy": 0.655499721566836, "num_tokens": 2341781573.0, "step": 13960 }, { "entropy": 1.723157713810603, "epoch": 1.5336848754497268, "grad_norm": 0.7755676507949829, "learning_rate": 4.44788174901304e-06, "loss": 1.411, "mean_token_accuracy": 0.6578606764475504, "num_tokens": 2341911128.0, "step": 13961 }, { "entropy": 1.6915369133154552, "epoch": 1.5337947323610996, "grad_norm": 0.7471572160720825, "learning_rate": 4.446784177742312e-06, "loss": 1.2909, "mean_token_accuracy": 0.6779088576634725, "num_tokens": 2342089929.0, "step": 13962 }, { "entropy": 1.679351806640625, "epoch": 1.5339045892724728, "grad_norm": 0.5526012778282166, "learning_rate": 4.445686813877907e-06, "loss": 1.485, "mean_token_accuracy": 0.6317119797070821, "num_tokens": 2342319068.0, "step": 13963 }, { "entropy": 1.6245819826920826, "epoch": 1.5340144461838454, "grad_norm": 0.6974883079528809, "learning_rate": 4.444589657454562e-06, "loss": 1.4006, "mean_token_accuracy": 0.6653065234422684, "num_tokens": 2342464403.0, "step": 13964 }, { "entropy": 1.6968108018239338, "epoch": 1.5341243030952185, "grad_norm": 0.5962358713150024, "learning_rate": 4.443492708507007e-06, "loss": 1.4658, "mean_token_accuracy": 0.6390899419784546, "num_tokens": 2342734419.0, "step": 13965 }, { "entropy": 1.7429889142513275, "epoch": 1.5342341600065914, "grad_norm": 0.6705568432807922, "learning_rate": 4.442395967069947e-06, "loss": 1.4232, "mean_token_accuracy": 0.643167644739151, "num_tokens": 2342906016.0, "step": 13966 }, { "entropy": 1.7325883607069652, "epoch": 1.5343440169179643, "grad_norm": 0.7288616895675659, "learning_rate": 4.441299433178099e-06, "loss": 1.4707, "mean_token_accuracy": 0.6572986940542856, "num_tokens": 2343042540.0, "step": 13967 }, { "entropy": 1.6961339712142944, "epoch": 1.5344538738293374, "grad_norm": 0.7040743827819824, "learning_rate": 4.440203106866172e-06, "loss": 1.4501, "mean_token_accuracy": 0.6512836913267771, "num_tokens": 2343232269.0, "step": 13968 }, { "entropy": 1.6818045775095622, "epoch": 1.53456373074071, "grad_norm": 0.6538490653038025, "learning_rate": 4.439106988168861e-06, "loss": 1.3832, "mean_token_accuracy": 0.6568224181731542, "num_tokens": 2343424419.0, "step": 13969 }, { "entropy": 1.7116661369800568, "epoch": 1.5346735876520832, "grad_norm": 0.6646193265914917, "learning_rate": 4.438011077120854e-06, "loss": 1.4384, "mean_token_accuracy": 0.633455440402031, "num_tokens": 2343599348.0, "step": 13970 }, { "entropy": 1.6582121352354686, "epoch": 1.534783444563456, "grad_norm": 0.5454766154289246, "learning_rate": 4.436915373756843e-06, "loss": 1.3379, "mean_token_accuracy": 0.6576869090398153, "num_tokens": 2343772491.0, "step": 13971 }, { "entropy": 1.6670493185520172, "epoch": 1.534893301474829, "grad_norm": 0.6243618130683899, "learning_rate": 4.4358198781114995e-06, "loss": 1.3609, "mean_token_accuracy": 0.6600524286429087, "num_tokens": 2343939801.0, "step": 13972 }, { "entropy": 1.6370833118756611, "epoch": 1.535003158386202, "grad_norm": 0.6674981713294983, "learning_rate": 4.434724590219502e-06, "loss": 1.3437, "mean_token_accuracy": 0.6636191656192144, "num_tokens": 2344139798.0, "step": 13973 }, { "entropy": 1.7490037282307942, "epoch": 1.535113015297575, "grad_norm": 0.7179242372512817, "learning_rate": 4.433629510115512e-06, "loss": 1.3214, "mean_token_accuracy": 0.6645645598570505, "num_tokens": 2344267913.0, "step": 13974 }, { "entropy": 1.7178294559319813, "epoch": 1.5352228722089478, "grad_norm": 0.6131008267402649, "learning_rate": 4.432534637834188e-06, "loss": 1.5438, "mean_token_accuracy": 0.6416826993227005, "num_tokens": 2344466156.0, "step": 13975 }, { "entropy": 1.6926537454128265, "epoch": 1.535332729120321, "grad_norm": 0.673001766204834, "learning_rate": 4.431439973410183e-06, "loss": 1.5398, "mean_token_accuracy": 0.6400438646475474, "num_tokens": 2344663909.0, "step": 13976 }, { "entropy": 1.6792364219824474, "epoch": 1.5354425860316936, "grad_norm": 0.6953542828559875, "learning_rate": 4.430345516878147e-06, "loss": 1.4084, "mean_token_accuracy": 0.6717520505189896, "num_tokens": 2344803361.0, "step": 13977 }, { "entropy": 1.6765713791052501, "epoch": 1.5355524429430667, "grad_norm": 0.6225120425224304, "learning_rate": 4.4292512682727115e-06, "loss": 1.3553, "mean_token_accuracy": 0.6637383997440338, "num_tokens": 2345013367.0, "step": 13978 }, { "entropy": 1.7007201512654622, "epoch": 1.5356622998544396, "grad_norm": 0.7577117085456848, "learning_rate": 4.428157227628511e-06, "loss": 1.4322, "mean_token_accuracy": 0.6638199587663015, "num_tokens": 2345180653.0, "step": 13979 }, { "entropy": 1.6954215864340465, "epoch": 1.5357721567658125, "grad_norm": 0.6490945816040039, "learning_rate": 4.427063394980177e-06, "loss": 1.3916, "mean_token_accuracy": 0.6532481213410696, "num_tokens": 2345343665.0, "step": 13980 }, { "entropy": 1.6678927838802338, "epoch": 1.5358820136771856, "grad_norm": 0.6423428058624268, "learning_rate": 4.425969770362323e-06, "loss": 1.2533, "mean_token_accuracy": 0.6784861932198206, "num_tokens": 2345473755.0, "step": 13981 }, { "entropy": 1.6874541540940602, "epoch": 1.5359918705885582, "grad_norm": 0.6954723000526428, "learning_rate": 4.424876353809563e-06, "loss": 1.3609, "mean_token_accuracy": 0.6523398011922836, "num_tokens": 2345647433.0, "step": 13982 }, { "entropy": 1.7374683419863384, "epoch": 1.5361017274999313, "grad_norm": 0.6964279413223267, "learning_rate": 4.4237831453565035e-06, "loss": 1.3207, "mean_token_accuracy": 0.6694158862034479, "num_tokens": 2345753042.0, "step": 13983 }, { "entropy": 1.7551298042138417, "epoch": 1.5362115844113042, "grad_norm": 0.6724309921264648, "learning_rate": 4.422690145037743e-06, "loss": 1.3518, "mean_token_accuracy": 0.6594842871030172, "num_tokens": 2345889659.0, "step": 13984 }, { "entropy": 1.7073955833911896, "epoch": 1.5363214413226771, "grad_norm": 0.7073147296905518, "learning_rate": 4.421597352887879e-06, "loss": 1.4573, "mean_token_accuracy": 0.6550180613994598, "num_tokens": 2346072026.0, "step": 13985 }, { "entropy": 1.6307064195473988, "epoch": 1.5364312982340502, "grad_norm": 0.6468693017959595, "learning_rate": 4.420504768941493e-06, "loss": 1.4167, "mean_token_accuracy": 0.6737861136595408, "num_tokens": 2346229976.0, "step": 13986 }, { "entropy": 1.7437759339809418, "epoch": 1.536541155145423, "grad_norm": 0.7528170943260193, "learning_rate": 4.419412393233164e-06, "loss": 1.4906, "mean_token_accuracy": 0.6429897795120875, "num_tokens": 2346443997.0, "step": 13987 }, { "entropy": 1.6810493369897206, "epoch": 1.536651012056796, "grad_norm": 0.6314464807510376, "learning_rate": 4.4183202257974685e-06, "loss": 1.4204, "mean_token_accuracy": 0.6624763359626135, "num_tokens": 2346619458.0, "step": 13988 }, { "entropy": 1.7033787270387013, "epoch": 1.536760868968169, "grad_norm": 0.742909848690033, "learning_rate": 4.417228266668976e-06, "loss": 1.3033, "mean_token_accuracy": 0.6721209386984507, "num_tokens": 2346763776.0, "step": 13989 }, { "entropy": 1.7384433150291443, "epoch": 1.5368707258795418, "grad_norm": 0.688539445400238, "learning_rate": 4.4161365158822386e-06, "loss": 1.3789, "mean_token_accuracy": 0.6550500591595968, "num_tokens": 2346925583.0, "step": 13990 }, { "entropy": 1.7031288743019104, "epoch": 1.5369805827909149, "grad_norm": 0.7073594331741333, "learning_rate": 4.415044973471812e-06, "loss": 1.3824, "mean_token_accuracy": 0.6627199401458105, "num_tokens": 2347097785.0, "step": 13991 }, { "entropy": 1.674454540014267, "epoch": 1.5370904397022878, "grad_norm": 0.889224648475647, "learning_rate": 4.413953639472249e-06, "loss": 1.2342, "mean_token_accuracy": 0.6825215369462967, "num_tokens": 2347220132.0, "step": 13992 }, { "entropy": 1.6853329439957936, "epoch": 1.5372002966136606, "grad_norm": 0.6673226952552795, "learning_rate": 4.412862513918085e-06, "loss": 1.301, "mean_token_accuracy": 0.6678221076726913, "num_tokens": 2347354586.0, "step": 13993 }, { "entropy": 1.6549971401691437, "epoch": 1.5373101535250338, "grad_norm": 0.6486871242523193, "learning_rate": 4.411771596843852e-06, "loss": 1.4182, "mean_token_accuracy": 0.6577600389719009, "num_tokens": 2347518232.0, "step": 13994 }, { "entropy": 1.6686547100543976, "epoch": 1.5374200104364064, "grad_norm": 0.7365626096725464, "learning_rate": 4.410680888284081e-06, "loss": 1.428, "mean_token_accuracy": 0.6594243546326956, "num_tokens": 2347704419.0, "step": 13995 }, { "entropy": 1.697216699520747, "epoch": 1.5375298673477795, "grad_norm": 0.6379204392433167, "learning_rate": 4.409590388273288e-06, "loss": 1.4551, "mean_token_accuracy": 0.6572363177935282, "num_tokens": 2347903688.0, "step": 13996 }, { "entropy": 1.7163971066474915, "epoch": 1.5376397242591524, "grad_norm": 0.7252550721168518, "learning_rate": 4.4085000968459925e-06, "loss": 1.4578, "mean_token_accuracy": 0.6468437065680822, "num_tokens": 2348059580.0, "step": 13997 }, { "entropy": 1.6949025789896648, "epoch": 1.5377495811705253, "grad_norm": 0.7134789824485779, "learning_rate": 4.407410014036699e-06, "loss": 1.4531, "mean_token_accuracy": 0.6462388386329015, "num_tokens": 2348228716.0, "step": 13998 }, { "entropy": 1.705321490764618, "epoch": 1.5378594380818984, "grad_norm": 0.8609808087348938, "learning_rate": 4.406320139879906e-06, "loss": 1.4904, "mean_token_accuracy": 0.6510612418254217, "num_tokens": 2348400900.0, "step": 13999 }, { "entropy": 1.7144020994504292, "epoch": 1.5379692949932713, "grad_norm": 0.7137477993965149, "learning_rate": 4.405230474410108e-06, "loss": 1.4114, "mean_token_accuracy": 0.6484651267528534, "num_tokens": 2348613516.0, "step": 14000 }, { "entropy": 1.7028910517692566, "epoch": 1.5380791519046442, "grad_norm": 0.562455415725708, "learning_rate": 4.4041410176618e-06, "loss": 1.417, "mean_token_accuracy": 0.6450492938359579, "num_tokens": 2348799990.0, "step": 14001 }, { "entropy": 1.688763548930486, "epoch": 1.5381890088160173, "grad_norm": 0.7308263182640076, "learning_rate": 4.403051769669451e-06, "loss": 1.2927, "mean_token_accuracy": 0.6648479749759039, "num_tokens": 2348918497.0, "step": 14002 }, { "entropy": 1.738109012444814, "epoch": 1.53829886572739, "grad_norm": 0.6443284153938293, "learning_rate": 4.40196273046754e-06, "loss": 1.3536, "mean_token_accuracy": 0.6667558401823044, "num_tokens": 2349071410.0, "step": 14003 }, { "entropy": 1.702557345231374, "epoch": 1.538408722638763, "grad_norm": 0.8023272156715393, "learning_rate": 4.40087390009054e-06, "loss": 1.3405, "mean_token_accuracy": 0.676914319396019, "num_tokens": 2349234865.0, "step": 14004 }, { "entropy": 1.711880385875702, "epoch": 1.538518579550136, "grad_norm": 0.80071622133255, "learning_rate": 4.399785278572906e-06, "loss": 1.336, "mean_token_accuracy": 0.6736197620630264, "num_tokens": 2349415753.0, "step": 14005 }, { "entropy": 1.687764436006546, "epoch": 1.5386284364615088, "grad_norm": 0.7393195033073425, "learning_rate": 4.39869686594909e-06, "loss": 1.3066, "mean_token_accuracy": 0.6780485212802887, "num_tokens": 2349536443.0, "step": 14006 }, { "entropy": 1.6960475146770477, "epoch": 1.538738293372882, "grad_norm": 0.7331027984619141, "learning_rate": 4.397608662253548e-06, "loss": 1.2219, "mean_token_accuracy": 0.6794477055470148, "num_tokens": 2349691007.0, "step": 14007 }, { "entropy": 1.753263344367345, "epoch": 1.5388481502842548, "grad_norm": 0.6345871686935425, "learning_rate": 4.396520667520714e-06, "loss": 1.4942, "mean_token_accuracy": 0.639187882343928, "num_tokens": 2349883106.0, "step": 14008 }, { "entropy": 1.7145345509052277, "epoch": 1.5389580071956277, "grad_norm": 0.6685234308242798, "learning_rate": 4.395432881785028e-06, "loss": 1.438, "mean_token_accuracy": 0.6546867787837982, "num_tokens": 2350060890.0, "step": 14009 }, { "entropy": 1.7457486589749653, "epoch": 1.5390678641070006, "grad_norm": 0.6667968034744263, "learning_rate": 4.3943453050809144e-06, "loss": 1.4756, "mean_token_accuracy": 0.6380013773838679, "num_tokens": 2350245230.0, "step": 14010 }, { "entropy": 1.6892044444878895, "epoch": 1.5391777210183735, "grad_norm": 0.6611399054527283, "learning_rate": 4.393257937442793e-06, "loss": 1.2343, "mean_token_accuracy": 0.6810240397850672, "num_tokens": 2350353525.0, "step": 14011 }, { "entropy": 1.7096824645996094, "epoch": 1.5392875779297466, "grad_norm": 0.6070430278778076, "learning_rate": 4.392170778905081e-06, "loss": 1.3684, "mean_token_accuracy": 0.6506505062182745, "num_tokens": 2350592197.0, "step": 14012 }, { "entropy": 1.6973026096820831, "epoch": 1.5393974348411195, "grad_norm": 0.7121232748031616, "learning_rate": 4.3910838295021905e-06, "loss": 1.25, "mean_token_accuracy": 0.6867650945981344, "num_tokens": 2350738599.0, "step": 14013 }, { "entropy": 1.794579843680064, "epoch": 1.5395072917524923, "grad_norm": 0.7066651582717896, "learning_rate": 4.389997089268516e-06, "loss": 1.3076, "mean_token_accuracy": 0.6670825928449631, "num_tokens": 2350881742.0, "step": 14014 }, { "entropy": 1.6792183915774028, "epoch": 1.5396171486638655, "grad_norm": 0.6690120697021484, "learning_rate": 4.3889105582384525e-06, "loss": 1.3286, "mean_token_accuracy": 0.6693990727265676, "num_tokens": 2351073624.0, "step": 14015 }, { "entropy": 1.7471397519111633, "epoch": 1.5397270055752381, "grad_norm": 0.6697660684585571, "learning_rate": 4.387824236446395e-06, "loss": 1.5972, "mean_token_accuracy": 0.616848016778628, "num_tokens": 2351288596.0, "step": 14016 }, { "entropy": 1.7265910804271698, "epoch": 1.5398368624866112, "grad_norm": 0.6318051218986511, "learning_rate": 4.38673812392672e-06, "loss": 1.3871, "mean_token_accuracy": 0.6530221005280813, "num_tokens": 2351452744.0, "step": 14017 }, { "entropy": 1.6270829439163208, "epoch": 1.539946719397984, "grad_norm": 0.7931689023971558, "learning_rate": 4.385652220713801e-06, "loss": 1.4203, "mean_token_accuracy": 0.6615447551012039, "num_tokens": 2351609983.0, "step": 14018 }, { "entropy": 1.6672570705413818, "epoch": 1.540056576309357, "grad_norm": 0.7277114391326904, "learning_rate": 4.384566526842011e-06, "loss": 1.3248, "mean_token_accuracy": 0.6881892184416453, "num_tokens": 2351740970.0, "step": 14019 }, { "entropy": 1.6719843447208405, "epoch": 1.54016643322073, "grad_norm": 0.6636250615119934, "learning_rate": 4.383481042345707e-06, "loss": 1.334, "mean_token_accuracy": 0.669186050693194, "num_tokens": 2351887213.0, "step": 14020 }, { "entropy": 1.7347903450330098, "epoch": 1.540276290132103, "grad_norm": 0.7051345109939575, "learning_rate": 4.382395767259252e-06, "loss": 1.3205, "mean_token_accuracy": 0.6803038567304611, "num_tokens": 2352004799.0, "step": 14021 }, { "entropy": 1.658490777015686, "epoch": 1.5403861470434759, "grad_norm": 0.7130881547927856, "learning_rate": 4.381310701616985e-06, "loss": 1.368, "mean_token_accuracy": 0.6594650596380234, "num_tokens": 2352206971.0, "step": 14022 }, { "entropy": 1.7292596499125164, "epoch": 1.5404960039548488, "grad_norm": 0.6630394458770752, "learning_rate": 4.3802258454532495e-06, "loss": 1.3735, "mean_token_accuracy": 0.6567981640497843, "num_tokens": 2352400744.0, "step": 14023 }, { "entropy": 1.7222495377063751, "epoch": 1.5406058608662216, "grad_norm": 0.6828321814537048, "learning_rate": 4.379141198802388e-06, "loss": 1.4488, "mean_token_accuracy": 0.6561195055643717, "num_tokens": 2352562667.0, "step": 14024 }, { "entropy": 1.646317849556605, "epoch": 1.5407157177775948, "grad_norm": 0.7620025277137756, "learning_rate": 4.378056761698722e-06, "loss": 1.3757, "mean_token_accuracy": 0.6582992623249689, "num_tokens": 2352756006.0, "step": 14025 }, { "entropy": 1.6566996177037556, "epoch": 1.5408255746889676, "grad_norm": 0.6729485988616943, "learning_rate": 4.3769725341765745e-06, "loss": 1.4421, "mean_token_accuracy": 0.6499632894992828, "num_tokens": 2352952033.0, "step": 14026 }, { "entropy": 1.7033120195070903, "epoch": 1.5409354316003405, "grad_norm": 0.6999285817146301, "learning_rate": 4.375888516270264e-06, "loss": 1.3622, "mean_token_accuracy": 0.6672971496979395, "num_tokens": 2353094627.0, "step": 14027 }, { "entropy": 1.7603072027365367, "epoch": 1.5410452885117136, "grad_norm": 0.7112701535224915, "learning_rate": 4.3748047080140935e-06, "loss": 1.4744, "mean_token_accuracy": 0.6439538995424906, "num_tokens": 2353238406.0, "step": 14028 }, { "entropy": 1.691114326318105, "epoch": 1.5411551454230863, "grad_norm": 0.6865422129631042, "learning_rate": 4.373721109442373e-06, "loss": 1.3223, "mean_token_accuracy": 0.6618143618106842, "num_tokens": 2353364805.0, "step": 14029 }, { "entropy": 1.7279209593931835, "epoch": 1.5412650023344594, "grad_norm": 0.6755762100219727, "learning_rate": 4.3726377205893925e-06, "loss": 1.6106, "mean_token_accuracy": 0.6444238399465879, "num_tokens": 2353566096.0, "step": 14030 }, { "entropy": 1.7265671888987224, "epoch": 1.5413748592458323, "grad_norm": 0.7081911563873291, "learning_rate": 4.371554541489439e-06, "loss": 1.4627, "mean_token_accuracy": 0.658619354168574, "num_tokens": 2353736452.0, "step": 14031 }, { "entropy": 1.679870496193568, "epoch": 1.5414847161572052, "grad_norm": 0.6351853013038635, "learning_rate": 4.370471572176797e-06, "loss": 1.4357, "mean_token_accuracy": 0.651072566707929, "num_tokens": 2353911720.0, "step": 14032 }, { "entropy": 1.72749329606692, "epoch": 1.5415945730685783, "grad_norm": 0.7487029433250427, "learning_rate": 4.369388812685748e-06, "loss": 1.3973, "mean_token_accuracy": 0.6484367648760477, "num_tokens": 2354086543.0, "step": 14033 }, { "entropy": 1.662652164697647, "epoch": 1.5417044299799512, "grad_norm": 0.7096850872039795, "learning_rate": 4.3683062630505515e-06, "loss": 1.4054, "mean_token_accuracy": 0.6659991989533106, "num_tokens": 2354250773.0, "step": 14034 }, { "entropy": 1.680237591266632, "epoch": 1.541814286891324, "grad_norm": 0.8764726519584656, "learning_rate": 4.367223923305471e-06, "loss": 1.4226, "mean_token_accuracy": 0.6559638977050781, "num_tokens": 2354385839.0, "step": 14035 }, { "entropy": 1.6535289386908214, "epoch": 1.541924143802697, "grad_norm": 0.651672899723053, "learning_rate": 4.366141793484769e-06, "loss": 1.2837, "mean_token_accuracy": 0.6699432631333669, "num_tokens": 2354550541.0, "step": 14036 }, { "entropy": 1.706730951865514, "epoch": 1.5420340007140698, "grad_norm": 0.7192301154136658, "learning_rate": 4.365059873622689e-06, "loss": 1.2958, "mean_token_accuracy": 0.6683982561031977, "num_tokens": 2354690354.0, "step": 14037 }, { "entropy": 1.7195076942443848, "epoch": 1.542143857625443, "grad_norm": 0.6625783443450928, "learning_rate": 4.363978163753472e-06, "loss": 1.5496, "mean_token_accuracy": 0.6505264093478521, "num_tokens": 2354936587.0, "step": 14038 }, { "entropy": 1.7424062093098958, "epoch": 1.5422537145368158, "grad_norm": 0.7096309661865234, "learning_rate": 4.362896663911359e-06, "loss": 1.4185, "mean_token_accuracy": 0.6444617807865143, "num_tokens": 2355169144.0, "step": 14039 }, { "entropy": 1.6861979564030964, "epoch": 1.5423635714481887, "grad_norm": 0.6280454397201538, "learning_rate": 4.361815374130572e-06, "loss": 1.3581, "mean_token_accuracy": 0.6629171371459961, "num_tokens": 2355322642.0, "step": 14040 }, { "entropy": 1.7170844674110413, "epoch": 1.5424734283595618, "grad_norm": 0.6770321726799011, "learning_rate": 4.360734294445341e-06, "loss": 1.4312, "mean_token_accuracy": 0.650084396203359, "num_tokens": 2355507572.0, "step": 14041 }, { "entropy": 1.688788741827011, "epoch": 1.5425832852709345, "grad_norm": 0.8258582949638367, "learning_rate": 4.359653424889877e-06, "loss": 1.3963, "mean_token_accuracy": 0.6617969572544098, "num_tokens": 2355672193.0, "step": 14042 }, { "entropy": 1.6985827187697093, "epoch": 1.5426931421823076, "grad_norm": 0.6929754614830017, "learning_rate": 4.358572765498388e-06, "loss": 1.3762, "mean_token_accuracy": 0.6613185753424963, "num_tokens": 2355849560.0, "step": 14043 }, { "entropy": 1.7190652589003246, "epoch": 1.5428029990936805, "grad_norm": 0.6878488659858704, "learning_rate": 4.357492316305078e-06, "loss": 1.3982, "mean_token_accuracy": 0.659120962023735, "num_tokens": 2356025745.0, "step": 14044 }, { "entropy": 1.638331522544225, "epoch": 1.5429128560050533, "grad_norm": 0.5437892079353333, "learning_rate": 4.356412077344148e-06, "loss": 1.4459, "mean_token_accuracy": 0.6453542610009512, "num_tokens": 2356249597.0, "step": 14045 }, { "entropy": 1.6433692872524261, "epoch": 1.5430227129164265, "grad_norm": 0.7393973469734192, "learning_rate": 4.355332048649777e-06, "loss": 1.3323, "mean_token_accuracy": 0.6734596192836761, "num_tokens": 2356435811.0, "step": 14046 }, { "entropy": 1.6932270030180614, "epoch": 1.5431325698277993, "grad_norm": 0.7015334367752075, "learning_rate": 4.354252230256152e-06, "loss": 1.5119, "mean_token_accuracy": 0.6505384395519892, "num_tokens": 2356616344.0, "step": 14047 }, { "entropy": 1.722126583258311, "epoch": 1.5432424267391722, "grad_norm": 0.8697376251220703, "learning_rate": 4.353172622197453e-06, "loss": 1.593, "mean_token_accuracy": 0.6482644279797872, "num_tokens": 2356757894.0, "step": 14048 }, { "entropy": 1.7142964998881023, "epoch": 1.5433522836505453, "grad_norm": 0.8031371235847473, "learning_rate": 4.352093224507844e-06, "loss": 1.4184, "mean_token_accuracy": 0.6482335776090622, "num_tokens": 2356905299.0, "step": 14049 }, { "entropy": 1.7244684199492137, "epoch": 1.543462140561918, "grad_norm": 0.8741907477378845, "learning_rate": 4.351014037221487e-06, "loss": 1.163, "mean_token_accuracy": 0.687325323621432, "num_tokens": 2357051562.0, "step": 14050 }, { "entropy": 1.733912189801534, "epoch": 1.543571997473291, "grad_norm": 0.7576857209205627, "learning_rate": 4.349935060372542e-06, "loss": 1.4504, "mean_token_accuracy": 0.6613880942265192, "num_tokens": 2357268692.0, "step": 14051 }, { "entropy": 1.711068868637085, "epoch": 1.543681854384664, "grad_norm": 0.8415870666503906, "learning_rate": 4.348856293995154e-06, "loss": 1.3542, "mean_token_accuracy": 0.6686215748389562, "num_tokens": 2357397384.0, "step": 14052 }, { "entropy": 1.7130617996056874, "epoch": 1.5437917112960369, "grad_norm": 0.6683608889579773, "learning_rate": 4.347777738123469e-06, "loss": 1.4191, "mean_token_accuracy": 0.6496846874554952, "num_tokens": 2357561125.0, "step": 14053 }, { "entropy": 1.6761254767576854, "epoch": 1.54390156820741, "grad_norm": 0.7907574772834778, "learning_rate": 4.3466993927916215e-06, "loss": 1.5052, "mean_token_accuracy": 0.6459088623523712, "num_tokens": 2357745122.0, "step": 14054 }, { "entropy": 1.6257309913635254, "epoch": 1.5440114251187826, "grad_norm": 0.6445624828338623, "learning_rate": 4.345621258033737e-06, "loss": 1.421, "mean_token_accuracy": 0.6715798825025558, "num_tokens": 2357927515.0, "step": 14055 }, { "entropy": 1.6763292849063873, "epoch": 1.5441212820301558, "grad_norm": 0.6625300645828247, "learning_rate": 4.344543333883941e-06, "loss": 1.3599, "mean_token_accuracy": 0.6491398314634959, "num_tokens": 2358124048.0, "step": 14056 }, { "entropy": 1.7738927900791168, "epoch": 1.5442311389415286, "grad_norm": 0.7023770213127136, "learning_rate": 4.343465620376355e-06, "loss": 1.3112, "mean_token_accuracy": 0.6682771146297455, "num_tokens": 2358256450.0, "step": 14057 }, { "entropy": 1.685390333334605, "epoch": 1.5443409958529015, "grad_norm": 0.7113889455795288, "learning_rate": 4.342388117545078e-06, "loss": 1.3734, "mean_token_accuracy": 0.6664382467667261, "num_tokens": 2358391277.0, "step": 14058 }, { "entropy": 1.6166270176569622, "epoch": 1.5444508527642746, "grad_norm": 0.6758726835250854, "learning_rate": 4.341310825424215e-06, "loss": 1.3477, "mean_token_accuracy": 0.6600983242193857, "num_tokens": 2358561825.0, "step": 14059 }, { "entropy": 1.6522872944672902, "epoch": 1.5445607096756475, "grad_norm": 0.596356213092804, "learning_rate": 4.340233744047868e-06, "loss": 1.4209, "mean_token_accuracy": 0.6476166248321533, "num_tokens": 2358778206.0, "step": 14060 }, { "entropy": 1.724749763806661, "epoch": 1.5446705665870204, "grad_norm": 0.6655259728431702, "learning_rate": 4.339156873450122e-06, "loss": 1.3939, "mean_token_accuracy": 0.6588339308897654, "num_tokens": 2358936328.0, "step": 14061 }, { "entropy": 1.7044113278388977, "epoch": 1.5447804234983935, "grad_norm": 0.7894936203956604, "learning_rate": 4.338080213665058e-06, "loss": 1.362, "mean_token_accuracy": 0.6574911077817281, "num_tokens": 2359127677.0, "step": 14062 }, { "entropy": 1.6766592065493267, "epoch": 1.5448902804097662, "grad_norm": 0.7808144688606262, "learning_rate": 4.337003764726754e-06, "loss": 1.4714, "mean_token_accuracy": 0.6505367159843445, "num_tokens": 2359318644.0, "step": 14063 }, { "entropy": 1.6718900700410206, "epoch": 1.5450001373211393, "grad_norm": 0.7390754222869873, "learning_rate": 4.335927526669277e-06, "loss": 1.2496, "mean_token_accuracy": 0.6705302894115448, "num_tokens": 2359475453.0, "step": 14064 }, { "entropy": 1.6547542810440063, "epoch": 1.5451099942325122, "grad_norm": 0.716488242149353, "learning_rate": 4.334851499526693e-06, "loss": 1.3649, "mean_token_accuracy": 0.65887650847435, "num_tokens": 2359637946.0, "step": 14065 }, { "entropy": 1.6610281368096669, "epoch": 1.545219851143885, "grad_norm": 0.636939525604248, "learning_rate": 4.333775683333056e-06, "loss": 1.2319, "mean_token_accuracy": 0.6830330838759741, "num_tokens": 2359770202.0, "step": 14066 }, { "entropy": 1.6054012378056843, "epoch": 1.5453297080552582, "grad_norm": 0.6318295001983643, "learning_rate": 4.332700078122411e-06, "loss": 1.3044, "mean_token_accuracy": 0.6684317042430242, "num_tokens": 2359979968.0, "step": 14067 }, { "entropy": 1.734905183315277, "epoch": 1.5454395649666308, "grad_norm": 0.5726717114448547, "learning_rate": 4.3316246839288055e-06, "loss": 1.5647, "mean_token_accuracy": 0.6327670514583588, "num_tokens": 2360193178.0, "step": 14068 }, { "entropy": 1.7301185925801594, "epoch": 1.545549421878004, "grad_norm": 0.7241100668907166, "learning_rate": 4.330549500786279e-06, "loss": 1.5143, "mean_token_accuracy": 0.6339599887530009, "num_tokens": 2360414280.0, "step": 14069 }, { "entropy": 1.6495436231295268, "epoch": 1.5456592787893768, "grad_norm": 0.6548447012901306, "learning_rate": 4.329474528728851e-06, "loss": 1.3183, "mean_token_accuracy": 0.6611727774143219, "num_tokens": 2360550978.0, "step": 14070 }, { "entropy": 1.733685662349065, "epoch": 1.5457691357007497, "grad_norm": 0.7285884618759155, "learning_rate": 4.328399767790546e-06, "loss": 1.448, "mean_token_accuracy": 0.6412131836016973, "num_tokens": 2360730661.0, "step": 14071 }, { "entropy": 1.7295173903306325, "epoch": 1.5458789926121228, "grad_norm": 0.7899370789527893, "learning_rate": 4.327325218005386e-06, "loss": 1.3316, "mean_token_accuracy": 0.6687415341536204, "num_tokens": 2360851373.0, "step": 14072 }, { "entropy": 1.6831912994384766, "epoch": 1.5459888495234957, "grad_norm": 0.658294677734375, "learning_rate": 4.326250879407377e-06, "loss": 1.249, "mean_token_accuracy": 0.6833125005165736, "num_tokens": 2360979800.0, "step": 14073 }, { "entropy": 1.654470185438792, "epoch": 1.5460987064348686, "grad_norm": 0.6260450482368469, "learning_rate": 4.325176752030516e-06, "loss": 1.3658, "mean_token_accuracy": 0.6646214425563812, "num_tokens": 2361129203.0, "step": 14074 }, { "entropy": 1.6546796262264252, "epoch": 1.5462085633462417, "grad_norm": 0.6665990948677063, "learning_rate": 4.324102835908807e-06, "loss": 1.3189, "mean_token_accuracy": 0.6715873231490453, "num_tokens": 2361304020.0, "step": 14075 }, { "entropy": 1.684925526380539, "epoch": 1.5463184202576143, "grad_norm": 0.7178765535354614, "learning_rate": 4.323029131076232e-06, "loss": 1.3917, "mean_token_accuracy": 0.654693936308225, "num_tokens": 2361468876.0, "step": 14076 }, { "entropy": 1.7206127643585205, "epoch": 1.5464282771689875, "grad_norm": 0.7334764003753662, "learning_rate": 4.321955637566779e-06, "loss": 1.4133, "mean_token_accuracy": 0.6604679723580679, "num_tokens": 2361644981.0, "step": 14077 }, { "entropy": 1.7566125492254894, "epoch": 1.5465381340803603, "grad_norm": 0.9064491391181946, "learning_rate": 4.320882355414421e-06, "loss": 1.3328, "mean_token_accuracy": 0.6624845316012701, "num_tokens": 2361768215.0, "step": 14078 }, { "entropy": 1.6920438210169475, "epoch": 1.5466479909917332, "grad_norm": 0.8833717107772827, "learning_rate": 4.319809284653123e-06, "loss": 1.3987, "mean_token_accuracy": 0.6700094143549601, "num_tokens": 2361915240.0, "step": 14079 }, { "entropy": 1.6645349264144897, "epoch": 1.5467578479031063, "grad_norm": 0.5727179646492004, "learning_rate": 4.318736425316855e-06, "loss": 1.528, "mean_token_accuracy": 0.6394601066907247, "num_tokens": 2362137844.0, "step": 14080 }, { "entropy": 1.7052935063838959, "epoch": 1.546867704814479, "grad_norm": 0.7055914402008057, "learning_rate": 4.317663777439567e-06, "loss": 1.3783, "mean_token_accuracy": 0.6631141553322474, "num_tokens": 2362348283.0, "step": 14081 }, { "entropy": 1.6607483228047688, "epoch": 1.546977561725852, "grad_norm": 0.6008187532424927, "learning_rate": 4.316591341055208e-06, "loss": 1.3967, "mean_token_accuracy": 0.6505735764900843, "num_tokens": 2362519677.0, "step": 14082 }, { "entropy": 1.7044240633646648, "epoch": 1.547087418637225, "grad_norm": 0.6458705067634583, "learning_rate": 4.315519116197724e-06, "loss": 1.4219, "mean_token_accuracy": 0.6473012765248617, "num_tokens": 2362691629.0, "step": 14083 }, { "entropy": 1.7407875955104828, "epoch": 1.5471972755485979, "grad_norm": 0.7819077372550964, "learning_rate": 4.314447102901045e-06, "loss": 1.2604, "mean_token_accuracy": 0.687695175409317, "num_tokens": 2362816475.0, "step": 14084 }, { "entropy": 1.6973053614298503, "epoch": 1.547307132459971, "grad_norm": 0.6520508527755737, "learning_rate": 4.3133753011991046e-06, "loss": 1.3195, "mean_token_accuracy": 0.6731201509634653, "num_tokens": 2362972835.0, "step": 14085 }, { "entropy": 1.732239951690038, "epoch": 1.5474169893713439, "grad_norm": 0.8456202149391174, "learning_rate": 4.312303711125824e-06, "loss": 1.278, "mean_token_accuracy": 0.666911373535792, "num_tokens": 2363081271.0, "step": 14086 }, { "entropy": 1.644601583480835, "epoch": 1.5475268462827168, "grad_norm": 0.8071454763412476, "learning_rate": 4.311232332715114e-06, "loss": 1.4276, "mean_token_accuracy": 0.659256507953008, "num_tokens": 2363264153.0, "step": 14087 }, { "entropy": 1.6631284455458324, "epoch": 1.5476367031940899, "grad_norm": 0.6700156927108765, "learning_rate": 4.310161166000887e-06, "loss": 1.2801, "mean_token_accuracy": 0.6774795204401016, "num_tokens": 2363392436.0, "step": 14088 }, { "entropy": 1.7066160937150319, "epoch": 1.5477465601054625, "grad_norm": 0.6427406072616577, "learning_rate": 4.309090211017049e-06, "loss": 1.3209, "mean_token_accuracy": 0.6586870650450388, "num_tokens": 2363528126.0, "step": 14089 }, { "entropy": 1.799843966960907, "epoch": 1.5478564170168356, "grad_norm": 0.7809091210365295, "learning_rate": 4.308019467797487e-06, "loss": 1.4235, "mean_token_accuracy": 0.6456949164470037, "num_tokens": 2363671865.0, "step": 14090 }, { "entropy": 1.6500937740008037, "epoch": 1.5479662739282085, "grad_norm": 0.783412516117096, "learning_rate": 4.306948936376093e-06, "loss": 1.4475, "mean_token_accuracy": 0.6537269403537115, "num_tokens": 2363846696.0, "step": 14091 }, { "entropy": 1.7201881210009258, "epoch": 1.5480761308395814, "grad_norm": 0.6507890224456787, "learning_rate": 4.3058786167867505e-06, "loss": 1.349, "mean_token_accuracy": 0.6581819206476212, "num_tokens": 2364020827.0, "step": 14092 }, { "entropy": 1.702020267645518, "epoch": 1.5481859877509545, "grad_norm": 0.6586436033248901, "learning_rate": 4.304808509063335e-06, "loss": 1.4886, "mean_token_accuracy": 0.6433099905649821, "num_tokens": 2364256888.0, "step": 14093 }, { "entropy": 1.666669249534607, "epoch": 1.5482958446623272, "grad_norm": 0.7112491130828857, "learning_rate": 4.30373861323971e-06, "loss": 1.3181, "mean_token_accuracy": 0.6703788836797079, "num_tokens": 2364409964.0, "step": 14094 }, { "entropy": 1.702845573425293, "epoch": 1.5484057015737003, "grad_norm": 0.5408870577812195, "learning_rate": 4.302668929349742e-06, "loss": 1.4346, "mean_token_accuracy": 0.6451524297396342, "num_tokens": 2364604014.0, "step": 14095 }, { "entropy": 1.7024723092714946, "epoch": 1.5485155584850732, "grad_norm": 0.709581196308136, "learning_rate": 4.301599457427284e-06, "loss": 1.2413, "mean_token_accuracy": 0.6726260830958685, "num_tokens": 2364759857.0, "step": 14096 }, { "entropy": 1.6831977367401123, "epoch": 1.548625415396446, "grad_norm": 0.9029605388641357, "learning_rate": 4.300530197506187e-06, "loss": 1.4204, "mean_token_accuracy": 0.6639485061168671, "num_tokens": 2364941448.0, "step": 14097 }, { "entropy": 1.6596945226192474, "epoch": 1.5487352723078192, "grad_norm": 0.7375509142875671, "learning_rate": 4.299461149620289e-06, "loss": 1.2836, "mean_token_accuracy": 0.6749891887108485, "num_tokens": 2365057955.0, "step": 14098 }, { "entropy": 1.732172687848409, "epoch": 1.548845129219192, "grad_norm": 0.8708524107933044, "learning_rate": 4.298392313803423e-06, "loss": 1.6704, "mean_token_accuracy": 0.6292888720830282, "num_tokens": 2365272724.0, "step": 14099 }, { "entropy": 1.7026624778906505, "epoch": 1.548954986130565, "grad_norm": 0.6516265273094177, "learning_rate": 4.297323690089423e-06, "loss": 1.3864, "mean_token_accuracy": 0.6629829307397207, "num_tokens": 2365445475.0, "step": 14100 }, { "entropy": 1.757458617289861, "epoch": 1.549064843041938, "grad_norm": 0.6837176084518433, "learning_rate": 4.296255278512112e-06, "loss": 1.4276, "mean_token_accuracy": 0.6513576706250509, "num_tokens": 2365594575.0, "step": 14101 }, { "entropy": 1.6596699754397075, "epoch": 1.5491746999533107, "grad_norm": 0.7006385922431946, "learning_rate": 4.295187079105296e-06, "loss": 1.4615, "mean_token_accuracy": 0.6513244410355886, "num_tokens": 2365749946.0, "step": 14102 }, { "entropy": 1.7063644925753276, "epoch": 1.5492845568646838, "grad_norm": 0.712912917137146, "learning_rate": 4.294119091902786e-06, "loss": 1.4283, "mean_token_accuracy": 0.6631234188874563, "num_tokens": 2365907668.0, "step": 14103 }, { "entropy": 1.6990328629811604, "epoch": 1.5493944137760567, "grad_norm": 0.7050018310546875, "learning_rate": 4.293051316938389e-06, "loss": 1.2285, "mean_token_accuracy": 0.6825551042954127, "num_tokens": 2366041547.0, "step": 14104 }, { "entropy": 1.6146796643733978, "epoch": 1.5495042706874296, "grad_norm": 0.7505760192871094, "learning_rate": 4.291983754245895e-06, "loss": 1.3202, "mean_token_accuracy": 0.6715732961893082, "num_tokens": 2366209639.0, "step": 14105 }, { "entropy": 1.793668379386266, "epoch": 1.5496141275988027, "grad_norm": 0.7830557823181152, "learning_rate": 4.2909164038590915e-06, "loss": 1.3973, "mean_token_accuracy": 0.6473236183325449, "num_tokens": 2366333296.0, "step": 14106 }, { "entropy": 1.6847633024056752, "epoch": 1.5497239845101753, "grad_norm": 0.6471010446548462, "learning_rate": 4.289849265811761e-06, "loss": 1.3483, "mean_token_accuracy": 0.6691886434952418, "num_tokens": 2366478156.0, "step": 14107 }, { "entropy": 1.6979444523652394, "epoch": 1.5498338414215485, "grad_norm": 0.7506217360496521, "learning_rate": 4.288782340137675e-06, "loss": 1.3874, "mean_token_accuracy": 0.6577440400918325, "num_tokens": 2366636399.0, "step": 14108 }, { "entropy": 1.6835704545180004, "epoch": 1.5499436983329213, "grad_norm": 0.6772280931472778, "learning_rate": 4.287715626870609e-06, "loss": 1.318, "mean_token_accuracy": 0.6703698684771856, "num_tokens": 2366777293.0, "step": 14109 }, { "entropy": 1.716042975584666, "epoch": 1.5500535552442942, "grad_norm": 0.6948290467262268, "learning_rate": 4.286649126044316e-06, "loss": 1.5699, "mean_token_accuracy": 0.6452557643254598, "num_tokens": 2366963739.0, "step": 14110 }, { "entropy": 1.6994237899780273, "epoch": 1.5501634121556673, "grad_norm": 0.7720609307289124, "learning_rate": 4.2855828376925515e-06, "loss": 1.3042, "mean_token_accuracy": 0.6619250476360321, "num_tokens": 2367094397.0, "step": 14111 }, { "entropy": 1.7140393952528636, "epoch": 1.5502732690670402, "grad_norm": 0.6399521827697754, "learning_rate": 4.2845167618490645e-06, "loss": 1.4772, "mean_token_accuracy": 0.6374075512091318, "num_tokens": 2367332174.0, "step": 14112 }, { "entropy": 1.6940363347530365, "epoch": 1.550383125978413, "grad_norm": 0.723393976688385, "learning_rate": 4.283450898547601e-06, "loss": 1.3998, "mean_token_accuracy": 0.6478741665681204, "num_tokens": 2367479323.0, "step": 14113 }, { "entropy": 1.676634858051936, "epoch": 1.5504929828897862, "grad_norm": 0.724162220954895, "learning_rate": 4.282385247821886e-06, "loss": 1.234, "mean_token_accuracy": 0.6788554986317953, "num_tokens": 2367643037.0, "step": 14114 }, { "entropy": 1.6137764851252239, "epoch": 1.5506028398011589, "grad_norm": 0.7134261727333069, "learning_rate": 4.28131980970565e-06, "loss": 1.2265, "mean_token_accuracy": 0.682574192682902, "num_tokens": 2367828930.0, "step": 14115 }, { "entropy": 1.7493961155414581, "epoch": 1.550712696712532, "grad_norm": 0.6667389273643494, "learning_rate": 4.280254584232616e-06, "loss": 1.4779, "mean_token_accuracy": 0.6394469936688741, "num_tokens": 2368035180.0, "step": 14116 }, { "entropy": 1.6516866981983185, "epoch": 1.5508225536239049, "grad_norm": 0.7217621803283691, "learning_rate": 4.279189571436497e-06, "loss": 1.3961, "mean_token_accuracy": 0.6504537761211395, "num_tokens": 2368265315.0, "step": 14117 }, { "entropy": 1.720879077911377, "epoch": 1.5509324105352778, "grad_norm": 0.7750219702720642, "learning_rate": 4.2781247713509985e-06, "loss": 1.5135, "mean_token_accuracy": 0.6509824097156525, "num_tokens": 2368447962.0, "step": 14118 }, { "entropy": 1.7540039718151093, "epoch": 1.5510422674466509, "grad_norm": 0.768004834651947, "learning_rate": 4.2770601840098235e-06, "loss": 1.4708, "mean_token_accuracy": 0.664596493045489, "num_tokens": 2368632837.0, "step": 14119 }, { "entropy": 1.6874784628550212, "epoch": 1.5511521243580235, "grad_norm": 0.6328350305557251, "learning_rate": 4.275995809446661e-06, "loss": 1.3143, "mean_token_accuracy": 0.661526824037234, "num_tokens": 2368796121.0, "step": 14120 }, { "entropy": 1.7491315305233002, "epoch": 1.5512619812693966, "grad_norm": 0.6634995341300964, "learning_rate": 4.274931647695205e-06, "loss": 1.4015, "mean_token_accuracy": 0.6602429201205572, "num_tokens": 2368954333.0, "step": 14121 }, { "entropy": 1.7477340896924336, "epoch": 1.5513718381807695, "grad_norm": 0.8590699434280396, "learning_rate": 4.273867698789132e-06, "loss": 1.4338, "mean_token_accuracy": 0.6396941244602203, "num_tokens": 2369138689.0, "step": 14122 }, { "entropy": 1.671852171421051, "epoch": 1.5514816950921424, "grad_norm": 0.63103187084198, "learning_rate": 4.272803962762112e-06, "loss": 1.3111, "mean_token_accuracy": 0.6657995829979578, "num_tokens": 2369305079.0, "step": 14123 }, { "entropy": 1.6969023446242015, "epoch": 1.5515915520035155, "grad_norm": 0.8352360725402832, "learning_rate": 4.271740439647815e-06, "loss": 1.5118, "mean_token_accuracy": 0.6522821436325709, "num_tokens": 2369463395.0, "step": 14124 }, { "entropy": 1.7147069871425629, "epoch": 1.5517014089148884, "grad_norm": 0.714336097240448, "learning_rate": 4.270677129479908e-06, "loss": 1.3111, "mean_token_accuracy": 0.6640656888484955, "num_tokens": 2369580422.0, "step": 14125 }, { "entropy": 1.6699798206488292, "epoch": 1.5518112658262613, "grad_norm": 0.6139026284217834, "learning_rate": 4.2696140322920305e-06, "loss": 1.3299, "mean_token_accuracy": 0.6675732731819153, "num_tokens": 2369739830.0, "step": 14126 }, { "entropy": 1.642260581254959, "epoch": 1.5519211227376344, "grad_norm": 0.6656533479690552, "learning_rate": 4.268551148117836e-06, "loss": 1.4251, "mean_token_accuracy": 0.651205783089002, "num_tokens": 2369902180.0, "step": 14127 }, { "entropy": 1.6300160487492878, "epoch": 1.552030979649007, "grad_norm": 0.7406736612319946, "learning_rate": 4.26748847699097e-06, "loss": 1.4728, "mean_token_accuracy": 0.6558305223782858, "num_tokens": 2370101604.0, "step": 14128 }, { "entropy": 1.705411930878957, "epoch": 1.5521408365603802, "grad_norm": 0.6777219772338867, "learning_rate": 4.266426018945058e-06, "loss": 1.3698, "mean_token_accuracy": 0.6538793096939722, "num_tokens": 2370260513.0, "step": 14129 }, { "entropy": 1.6665898859500885, "epoch": 1.552250693471753, "grad_norm": 0.7032326459884644, "learning_rate": 4.265363774013724e-06, "loss": 1.4292, "mean_token_accuracy": 0.6513901352882385, "num_tokens": 2370418589.0, "step": 14130 }, { "entropy": 1.7191846172014873, "epoch": 1.552360550383126, "grad_norm": 0.6671421527862549, "learning_rate": 4.264301742230597e-06, "loss": 1.4887, "mean_token_accuracy": 0.6549615909655889, "num_tokens": 2370593404.0, "step": 14131 }, { "entropy": 1.7173360486825306, "epoch": 1.552470407294499, "grad_norm": 0.7962349057197571, "learning_rate": 4.263239923629281e-06, "loss": 1.4229, "mean_token_accuracy": 0.6445601582527161, "num_tokens": 2370764543.0, "step": 14132 }, { "entropy": 1.7082890371481578, "epoch": 1.5525802642058717, "grad_norm": 0.8383502960205078, "learning_rate": 4.262178318243388e-06, "loss": 1.1737, "mean_token_accuracy": 0.6861835420131683, "num_tokens": 2370883229.0, "step": 14133 }, { "entropy": 1.7188211580117543, "epoch": 1.5526901211172448, "grad_norm": 0.7299179434776306, "learning_rate": 4.261116926106516e-06, "loss": 1.3521, "mean_token_accuracy": 0.6593069980541865, "num_tokens": 2371046757.0, "step": 14134 }, { "entropy": 1.6964614987373352, "epoch": 1.5527999780286177, "grad_norm": 0.7393060326576233, "learning_rate": 4.260055747252254e-06, "loss": 1.5476, "mean_token_accuracy": 0.6422794361909231, "num_tokens": 2371236555.0, "step": 14135 }, { "entropy": 1.7541022598743439, "epoch": 1.5529098349399906, "grad_norm": 0.753736674785614, "learning_rate": 4.25899478171419e-06, "loss": 1.4018, "mean_token_accuracy": 0.6527466426293055, "num_tokens": 2371417078.0, "step": 14136 }, { "entropy": 1.7315999070803325, "epoch": 1.5530196918513637, "grad_norm": 2.197566270828247, "learning_rate": 4.25793402952591e-06, "loss": 1.1632, "mean_token_accuracy": 0.6761045108238856, "num_tokens": 2371602729.0, "step": 14137 }, { "entropy": 1.7214235365390778, "epoch": 1.5531295487627366, "grad_norm": 0.764700710773468, "learning_rate": 4.256873490720973e-06, "loss": 1.6514, "mean_token_accuracy": 0.631921668847402, "num_tokens": 2371808926.0, "step": 14138 }, { "entropy": 1.7141134142875671, "epoch": 1.5532394056741095, "grad_norm": 0.5947176814079285, "learning_rate": 4.2558131653329544e-06, "loss": 1.3755, "mean_token_accuracy": 0.6477245340744654, "num_tokens": 2372023718.0, "step": 14139 }, { "entropy": 1.692691445350647, "epoch": 1.5533492625854826, "grad_norm": 0.6217459440231323, "learning_rate": 4.254753053395409e-06, "loss": 1.3618, "mean_token_accuracy": 0.6693163911501566, "num_tokens": 2372178895.0, "step": 14140 }, { "entropy": 1.7047333717346191, "epoch": 1.5534591194968552, "grad_norm": 0.6178786158561707, "learning_rate": 4.2536931549418904e-06, "loss": 1.4113, "mean_token_accuracy": 0.6456644187370936, "num_tokens": 2372352839.0, "step": 14141 }, { "entropy": 1.6846307615439098, "epoch": 1.5535689764082283, "grad_norm": 0.6864225268363953, "learning_rate": 4.252633470005945e-06, "loss": 1.3063, "mean_token_accuracy": 0.6656875361998876, "num_tokens": 2372488611.0, "step": 14142 }, { "entropy": 1.7061450779438019, "epoch": 1.5536788333196012, "grad_norm": 0.6568386554718018, "learning_rate": 4.2515739986211055e-06, "loss": 1.6754, "mean_token_accuracy": 0.6310825794935226, "num_tokens": 2372678594.0, "step": 14143 }, { "entropy": 1.6643461883068085, "epoch": 1.553788690230974, "grad_norm": 0.5872832536697388, "learning_rate": 4.25051474082091e-06, "loss": 1.4162, "mean_token_accuracy": 0.6413596421480179, "num_tokens": 2372892692.0, "step": 14144 }, { "entropy": 1.7513802250226338, "epoch": 1.5538985471423472, "grad_norm": 0.6232224702835083, "learning_rate": 4.249455696638883e-06, "loss": 1.2908, "mean_token_accuracy": 0.6734890739123026, "num_tokens": 2373012728.0, "step": 14145 }, { "entropy": 1.7303274869918823, "epoch": 1.5540084040537199, "grad_norm": 0.7580487728118896, "learning_rate": 4.248396866108543e-06, "loss": 1.3061, "mean_token_accuracy": 0.6751085370779037, "num_tokens": 2373133773.0, "step": 14146 }, { "entropy": 1.7423675457636516, "epoch": 1.554118260965093, "grad_norm": 0.7742456197738647, "learning_rate": 4.247338249263395e-06, "loss": 1.3927, "mean_token_accuracy": 0.6640025774637858, "num_tokens": 2373288121.0, "step": 14147 }, { "entropy": 1.7087977528572083, "epoch": 1.5542281178764659, "grad_norm": 0.6881621479988098, "learning_rate": 4.246279846136953e-06, "loss": 1.3576, "mean_token_accuracy": 0.6579535851875941, "num_tokens": 2373465928.0, "step": 14148 }, { "entropy": 1.7393077313899994, "epoch": 1.5543379747878387, "grad_norm": 0.7589281797409058, "learning_rate": 4.24522165676271e-06, "loss": 1.6051, "mean_token_accuracy": 0.6269615292549133, "num_tokens": 2373690820.0, "step": 14149 }, { "entropy": 1.690729945898056, "epoch": 1.5544478316992119, "grad_norm": 0.6652538776397705, "learning_rate": 4.244163681174155e-06, "loss": 1.266, "mean_token_accuracy": 0.6762543171644211, "num_tokens": 2373802485.0, "step": 14150 }, { "entropy": 1.6694500545660655, "epoch": 1.5545576886105847, "grad_norm": 0.6579715013504028, "learning_rate": 4.243105919404778e-06, "loss": 1.298, "mean_token_accuracy": 0.6703143616517385, "num_tokens": 2373952556.0, "step": 14151 }, { "entropy": 1.678593675295512, "epoch": 1.5546675455219576, "grad_norm": 0.6812438368797302, "learning_rate": 4.2420483714880515e-06, "loss": 1.3788, "mean_token_accuracy": 0.6640019963184992, "num_tokens": 2374097793.0, "step": 14152 }, { "entropy": 1.6968460778395336, "epoch": 1.5547774024333307, "grad_norm": 0.6862781047821045, "learning_rate": 4.2409910374574504e-06, "loss": 1.4078, "mean_token_accuracy": 0.6522165536880493, "num_tokens": 2374255586.0, "step": 14153 }, { "entropy": 1.7099703550338745, "epoch": 1.5548872593447034, "grad_norm": 0.6841778755187988, "learning_rate": 4.239933917346437e-06, "loss": 1.4141, "mean_token_accuracy": 0.6437595734993616, "num_tokens": 2374450111.0, "step": 14154 }, { "entropy": 1.8270711302757263, "epoch": 1.5549971162560765, "grad_norm": 1.8875739574432373, "learning_rate": 4.238877011188468e-06, "loss": 1.5367, "mean_token_accuracy": 0.6540461281935374, "num_tokens": 2374596550.0, "step": 14155 }, { "entropy": 1.6991891662279766, "epoch": 1.5551069731674494, "grad_norm": 1.2882972955703735, "learning_rate": 4.237820319016994e-06, "loss": 1.2703, "mean_token_accuracy": 0.6688820570707321, "num_tokens": 2374811196.0, "step": 14156 }, { "entropy": 1.7167048652966816, "epoch": 1.5552168300788223, "grad_norm": 0.5811730027198792, "learning_rate": 4.236763840865467e-06, "loss": 1.508, "mean_token_accuracy": 0.6333837409814199, "num_tokens": 2375025686.0, "step": 14157 }, { "entropy": 1.677632709344228, "epoch": 1.5553266869901954, "grad_norm": 0.5716765522956848, "learning_rate": 4.23570757676731e-06, "loss": 1.3812, "mean_token_accuracy": 0.6522747029860815, "num_tokens": 2375216440.0, "step": 14158 }, { "entropy": 1.708607812722524, "epoch": 1.555436543901568, "grad_norm": 0.7346358299255371, "learning_rate": 4.23465152675596e-06, "loss": 1.4194, "mean_token_accuracy": 0.6497345666090647, "num_tokens": 2375379987.0, "step": 14159 }, { "entropy": 1.6388816436131795, "epoch": 1.5555464008129412, "grad_norm": 0.7345917820930481, "learning_rate": 4.2335956908648425e-06, "loss": 1.2366, "mean_token_accuracy": 0.6748053133487701, "num_tokens": 2375590298.0, "step": 14160 }, { "entropy": 1.7384839057922363, "epoch": 1.555656257724314, "grad_norm": 0.6710056066513062, "learning_rate": 4.2325400691273735e-06, "loss": 1.5245, "mean_token_accuracy": 0.6526643683513006, "num_tokens": 2375744576.0, "step": 14161 }, { "entropy": 1.6977645556132, "epoch": 1.555766114635687, "grad_norm": 0.635971188545227, "learning_rate": 4.231484661576959e-06, "loss": 1.3574, "mean_token_accuracy": 0.6610794266064962, "num_tokens": 2375881526.0, "step": 14162 }, { "entropy": 1.6987218856811523, "epoch": 1.55587597154706, "grad_norm": 0.6726696491241455, "learning_rate": 4.2304294682470074e-06, "loss": 1.2988, "mean_token_accuracy": 0.673176700870196, "num_tokens": 2376011240.0, "step": 14163 }, { "entropy": 1.6941667199134827, "epoch": 1.555985828458433, "grad_norm": 0.7962403297424316, "learning_rate": 4.22937448917091e-06, "loss": 1.2457, "mean_token_accuracy": 0.6745900958776474, "num_tokens": 2376165761.0, "step": 14164 }, { "entropy": 1.6965778370698292, "epoch": 1.5560956853698058, "grad_norm": 0.6440910696983337, "learning_rate": 4.228319724382062e-06, "loss": 1.5532, "mean_token_accuracy": 0.644252801934878, "num_tokens": 2376349498.0, "step": 14165 }, { "entropy": 1.7470646401246388, "epoch": 1.556205542281179, "grad_norm": 0.789556086063385, "learning_rate": 4.227265173913843e-06, "loss": 1.4675, "mean_token_accuracy": 0.6519816418488821, "num_tokens": 2376516296.0, "step": 14166 }, { "entropy": 1.7214811444282532, "epoch": 1.5563153991925516, "grad_norm": 0.62434321641922, "learning_rate": 4.226210837799627e-06, "loss": 1.4814, "mean_token_accuracy": 0.6433521310488383, "num_tokens": 2376722817.0, "step": 14167 }, { "entropy": 1.684100478887558, "epoch": 1.5564252561039247, "grad_norm": 0.7984063029289246, "learning_rate": 4.2251567160727855e-06, "loss": 1.3731, "mean_token_accuracy": 0.6490538815657297, "num_tokens": 2376895776.0, "step": 14168 }, { "entropy": 1.7043040891488392, "epoch": 1.5565351130152976, "grad_norm": 0.6661989092826843, "learning_rate": 4.224102808766687e-06, "loss": 1.381, "mean_token_accuracy": 0.6549601207176844, "num_tokens": 2377035427.0, "step": 14169 }, { "entropy": 1.725882242123286, "epoch": 1.5566449699266705, "grad_norm": 0.6910036206245422, "learning_rate": 4.223049115914676e-06, "loss": 1.567, "mean_token_accuracy": 0.6483024209737778, "num_tokens": 2377217821.0, "step": 14170 }, { "entropy": 1.665495256582896, "epoch": 1.5567548268380436, "grad_norm": 0.5861942172050476, "learning_rate": 4.221995637550106e-06, "loss": 1.4559, "mean_token_accuracy": 0.6493095854918162, "num_tokens": 2377446515.0, "step": 14171 }, { "entropy": 1.7102225919564564, "epoch": 1.5568646837494162, "grad_norm": 0.8284782767295837, "learning_rate": 4.220942373706323e-06, "loss": 1.524, "mean_token_accuracy": 0.6480698237816492, "num_tokens": 2377592535.0, "step": 14172 }, { "entropy": 1.6960971355438232, "epoch": 1.5569745406607893, "grad_norm": 0.5749043822288513, "learning_rate": 4.219889324416659e-06, "loss": 1.4179, "mean_token_accuracy": 0.6462828616301218, "num_tokens": 2377780665.0, "step": 14173 }, { "entropy": 1.7206454773743947, "epoch": 1.5570843975721622, "grad_norm": 0.6680061221122742, "learning_rate": 4.218836489714439e-06, "loss": 1.3162, "mean_token_accuracy": 0.6785482615232468, "num_tokens": 2377910201.0, "step": 14174 }, { "entropy": 1.6479010879993439, "epoch": 1.557194254483535, "grad_norm": 0.6720148921012878, "learning_rate": 4.217783869632992e-06, "loss": 1.3067, "mean_token_accuracy": 0.6648319562276205, "num_tokens": 2378062830.0, "step": 14175 }, { "entropy": 1.6538492143154144, "epoch": 1.5573041113949082, "grad_norm": 0.6240495443344116, "learning_rate": 4.216731464205627e-06, "loss": 1.5057, "mean_token_accuracy": 0.6449542989333471, "num_tokens": 2378278509.0, "step": 14176 }, { "entropy": 1.6563106775283813, "epoch": 1.557413968306281, "grad_norm": 0.6286166906356812, "learning_rate": 4.215679273465657e-06, "loss": 1.374, "mean_token_accuracy": 0.6685599933067957, "num_tokens": 2378484457.0, "step": 14177 }, { "entropy": 1.7658939957618713, "epoch": 1.557523825217654, "grad_norm": 0.7517053484916687, "learning_rate": 4.214627297446381e-06, "loss": 1.3914, "mean_token_accuracy": 0.6562267889579138, "num_tokens": 2378641243.0, "step": 14178 }, { "entropy": 1.7408847510814667, "epoch": 1.557633682129027, "grad_norm": 0.7769100666046143, "learning_rate": 4.2135755361810905e-06, "loss": 1.4162, "mean_token_accuracy": 0.6557272672653198, "num_tokens": 2378783586.0, "step": 14179 }, { "entropy": 1.634146640698115, "epoch": 1.5577435390403997, "grad_norm": 0.695136547088623, "learning_rate": 4.212523989703077e-06, "loss": 1.3489, "mean_token_accuracy": 0.6566696465015411, "num_tokens": 2378935922.0, "step": 14180 }, { "entropy": 1.7598876059055328, "epoch": 1.5578533959517729, "grad_norm": 0.7562305331230164, "learning_rate": 4.211472658045625e-06, "loss": 1.368, "mean_token_accuracy": 0.6584480106830597, "num_tokens": 2379071944.0, "step": 14181 }, { "entropy": 1.6786730587482452, "epoch": 1.5579632528631457, "grad_norm": 0.6836439371109009, "learning_rate": 4.210421541242e-06, "loss": 1.4001, "mean_token_accuracy": 0.6513353139162064, "num_tokens": 2379284248.0, "step": 14182 }, { "entropy": 1.6920330325762432, "epoch": 1.5580731097745186, "grad_norm": 0.9302690029144287, "learning_rate": 4.209370639325473e-06, "loss": 1.6559, "mean_token_accuracy": 0.6281924843788147, "num_tokens": 2379450051.0, "step": 14183 }, { "entropy": 1.6634094417095184, "epoch": 1.5581829666858917, "grad_norm": 0.6819601655006409, "learning_rate": 4.208319952329308e-06, "loss": 1.4073, "mean_token_accuracy": 0.6675108969211578, "num_tokens": 2379644676.0, "step": 14184 }, { "entropy": 1.8039693931738536, "epoch": 1.5582928235972644, "grad_norm": 0.7473592758178711, "learning_rate": 4.207269480286757e-06, "loss": 1.52, "mean_token_accuracy": 0.6487270295619965, "num_tokens": 2379777755.0, "step": 14185 }, { "entropy": 1.8343103528022766, "epoch": 1.5584026805086375, "grad_norm": 0.7583062648773193, "learning_rate": 4.2062192232310626e-06, "loss": 1.5376, "mean_token_accuracy": 0.6354842483997345, "num_tokens": 2379973389.0, "step": 14186 }, { "entropy": 1.6272041896979015, "epoch": 1.5585125374200104, "grad_norm": 0.6743770837783813, "learning_rate": 4.205169181195471e-06, "loss": 1.2484, "mean_token_accuracy": 0.672362208366394, "num_tokens": 2380116473.0, "step": 14187 }, { "entropy": 1.7308520078659058, "epoch": 1.5586223943313833, "grad_norm": 0.7099290490150452, "learning_rate": 4.204119354213211e-06, "loss": 1.4756, "mean_token_accuracy": 0.6549335817495981, "num_tokens": 2380265493.0, "step": 14188 }, { "entropy": 1.6808405816555023, "epoch": 1.5587322512427564, "grad_norm": 0.6774865984916687, "learning_rate": 4.203069742317514e-06, "loss": 1.4098, "mean_token_accuracy": 0.6618853112061819, "num_tokens": 2380433799.0, "step": 14189 }, { "entropy": 1.792554259300232, "epoch": 1.5588421081541293, "grad_norm": 0.7282685041427612, "learning_rate": 4.202020345541596e-06, "loss": 1.3166, "mean_token_accuracy": 0.6641228546698889, "num_tokens": 2380605424.0, "step": 14190 }, { "entropy": 1.718471388022105, "epoch": 1.5589519650655022, "grad_norm": 0.6723158359527588, "learning_rate": 4.200971163918669e-06, "loss": 1.3729, "mean_token_accuracy": 0.6559204707543055, "num_tokens": 2380791222.0, "step": 14191 }, { "entropy": 1.7398603161176045, "epoch": 1.5590618219768753, "grad_norm": 0.7290942668914795, "learning_rate": 4.199922197481939e-06, "loss": 1.3562, "mean_token_accuracy": 0.6515438904364904, "num_tokens": 2380945915.0, "step": 14192 }, { "entropy": 1.7121857802073162, "epoch": 1.559171678888248, "grad_norm": 0.6564915180206299, "learning_rate": 4.198873446264615e-06, "loss": 1.4534, "mean_token_accuracy": 0.6428688168525696, "num_tokens": 2381107747.0, "step": 14193 }, { "entropy": 1.7519804040590923, "epoch": 1.559281535799621, "grad_norm": 0.764492392539978, "learning_rate": 4.197824910299875e-06, "loss": 1.3467, "mean_token_accuracy": 0.6621157228946686, "num_tokens": 2381225043.0, "step": 14194 }, { "entropy": 1.6727059384187062, "epoch": 1.559391392710994, "grad_norm": 0.6586747169494629, "learning_rate": 4.1967765896209115e-06, "loss": 1.3698, "mean_token_accuracy": 0.6521917631228765, "num_tokens": 2381386769.0, "step": 14195 }, { "entropy": 1.7378324270248413, "epoch": 1.5595012496223668, "grad_norm": 0.7085768580436707, "learning_rate": 4.195728484260906e-06, "loss": 1.2976, "mean_token_accuracy": 0.6643084188302358, "num_tokens": 2381524930.0, "step": 14196 }, { "entropy": 1.6326094667116802, "epoch": 1.55961110653374, "grad_norm": 1.9627199172973633, "learning_rate": 4.19468059425303e-06, "loss": 1.4151, "mean_token_accuracy": 0.6560747673114141, "num_tokens": 2381764868.0, "step": 14197 }, { "entropy": 1.6878510216871898, "epoch": 1.5597209634451128, "grad_norm": 0.5996699333190918, "learning_rate": 4.193632919630441e-06, "loss": 1.3121, "mean_token_accuracy": 0.6628714253505071, "num_tokens": 2381931844.0, "step": 14198 }, { "entropy": 1.6812229951222737, "epoch": 1.5598308203564857, "grad_norm": 0.7253499627113342, "learning_rate": 4.192585460426307e-06, "loss": 1.3634, "mean_token_accuracy": 0.6608059406280518, "num_tokens": 2382152497.0, "step": 14199 }, { "entropy": 1.7064985831578572, "epoch": 1.5599406772678586, "grad_norm": 0.7470372915267944, "learning_rate": 4.191538216673774e-06, "loss": 1.3977, "mean_token_accuracy": 0.6608186711867651, "num_tokens": 2382323536.0, "step": 14200 }, { "entropy": 1.704755167166392, "epoch": 1.5600505341792315, "grad_norm": 0.665684700012207, "learning_rate": 4.190491188405989e-06, "loss": 1.5564, "mean_token_accuracy": 0.6435166969895363, "num_tokens": 2382522953.0, "step": 14201 }, { "entropy": 1.6741365194320679, "epoch": 1.5601603910906046, "grad_norm": 0.6628307104110718, "learning_rate": 4.189444375656091e-06, "loss": 1.4985, "mean_token_accuracy": 0.6512368569771448, "num_tokens": 2382704610.0, "step": 14202 }, { "entropy": 1.6700753569602966, "epoch": 1.5602702480019774, "grad_norm": 0.678337812423706, "learning_rate": 4.188397778457207e-06, "loss": 1.4405, "mean_token_accuracy": 0.6458247303962708, "num_tokens": 2382894458.0, "step": 14203 }, { "entropy": 1.6928566992282867, "epoch": 1.5603801049133503, "grad_norm": 0.7208871841430664, "learning_rate": 4.187351396842466e-06, "loss": 1.2387, "mean_token_accuracy": 0.6764888813098272, "num_tokens": 2383057750.0, "step": 14204 }, { "entropy": 1.670570929845174, "epoch": 1.5604899618247234, "grad_norm": 0.6567087173461914, "learning_rate": 4.186305230844984e-06, "loss": 1.3304, "mean_token_accuracy": 0.6640313764413198, "num_tokens": 2383237390.0, "step": 14205 }, { "entropy": 1.7132259011268616, "epoch": 1.560599818736096, "grad_norm": 0.7733318209648132, "learning_rate": 4.185259280497867e-06, "loss": 1.4444, "mean_token_accuracy": 0.6487619827191035, "num_tokens": 2383433523.0, "step": 14206 }, { "entropy": 1.7129162947336833, "epoch": 1.5607096756474692, "grad_norm": 0.8005481362342834, "learning_rate": 4.184213545834227e-06, "loss": 1.2981, "mean_token_accuracy": 0.6748090038696924, "num_tokens": 2383559455.0, "step": 14207 }, { "entropy": 1.720715989669164, "epoch": 1.560819532558842, "grad_norm": 0.7148941159248352, "learning_rate": 4.183168026887154e-06, "loss": 1.5122, "mean_token_accuracy": 0.6497367918491364, "num_tokens": 2383724385.0, "step": 14208 }, { "entropy": 1.647435188293457, "epoch": 1.560929389470215, "grad_norm": 0.5823018550872803, "learning_rate": 4.1821227236897445e-06, "loss": 1.3786, "mean_token_accuracy": 0.6537514179944992, "num_tokens": 2383964981.0, "step": 14209 }, { "entropy": 1.6917157073815663, "epoch": 1.561039246381588, "grad_norm": 0.6810054779052734, "learning_rate": 4.1810776362750785e-06, "loss": 1.4568, "mean_token_accuracy": 0.6532232761383057, "num_tokens": 2384142302.0, "step": 14210 }, { "entropy": 1.7271502912044525, "epoch": 1.561149103292961, "grad_norm": 0.7245599627494812, "learning_rate": 4.180032764676228e-06, "loss": 1.3084, "mean_token_accuracy": 0.6805372933546702, "num_tokens": 2384268777.0, "step": 14211 }, { "entropy": 1.7125717997550964, "epoch": 1.5612589602043339, "grad_norm": 0.6059805750846863, "learning_rate": 4.178988108926269e-06, "loss": 1.4136, "mean_token_accuracy": 0.6488851606845856, "num_tokens": 2384458391.0, "step": 14212 }, { "entropy": 1.7332377235094707, "epoch": 1.5613688171157067, "grad_norm": 0.633551299571991, "learning_rate": 4.177943669058267e-06, "loss": 1.4808, "mean_token_accuracy": 0.6372072199980418, "num_tokens": 2384688739.0, "step": 14213 }, { "entropy": 1.7179057399431865, "epoch": 1.5614786740270796, "grad_norm": 0.6609451174736023, "learning_rate": 4.176899445105271e-06, "loss": 1.4831, "mean_token_accuracy": 0.6471300423145294, "num_tokens": 2384873167.0, "step": 14214 }, { "entropy": 1.672248860200246, "epoch": 1.5615885309384527, "grad_norm": 0.6449457406997681, "learning_rate": 4.175855437100331e-06, "loss": 1.2892, "mean_token_accuracy": 0.6665244797865549, "num_tokens": 2385012942.0, "step": 14215 }, { "entropy": 1.6491312483946483, "epoch": 1.5616983878498256, "grad_norm": 0.7436006665229797, "learning_rate": 4.174811645076494e-06, "loss": 1.5262, "mean_token_accuracy": 0.6568896919488907, "num_tokens": 2385161630.0, "step": 14216 }, { "entropy": 1.7142470479011536, "epoch": 1.5618082447611985, "grad_norm": 0.732032060623169, "learning_rate": 4.1737680690667935e-06, "loss": 1.3078, "mean_token_accuracy": 0.6583902637163798, "num_tokens": 2385282234.0, "step": 14217 }, { "entropy": 1.6656326552232106, "epoch": 1.5619181016725716, "grad_norm": 0.6994863152503967, "learning_rate": 4.172724709104256e-06, "loss": 1.4121, "mean_token_accuracy": 0.6581338991721472, "num_tokens": 2385490081.0, "step": 14218 }, { "entropy": 1.7145332098007202, "epoch": 1.5620279585839443, "grad_norm": 0.7812539339065552, "learning_rate": 4.171681565221905e-06, "loss": 1.4355, "mean_token_accuracy": 0.6510881582895914, "num_tokens": 2385658398.0, "step": 14219 }, { "entropy": 1.6682457625865936, "epoch": 1.5621378154953174, "grad_norm": 0.8907629251480103, "learning_rate": 4.170638637452755e-06, "loss": 1.455, "mean_token_accuracy": 0.6465255270401636, "num_tokens": 2385858820.0, "step": 14220 }, { "entropy": 1.6630571881930034, "epoch": 1.5622476724066903, "grad_norm": 0.7714592814445496, "learning_rate": 4.1695959258298155e-06, "loss": 1.2204, "mean_token_accuracy": 0.6844823310772578, "num_tokens": 2386000558.0, "step": 14221 }, { "entropy": 1.7163423299789429, "epoch": 1.5623575293180632, "grad_norm": 0.7476485371589661, "learning_rate": 4.1685534303860895e-06, "loss": 1.4154, "mean_token_accuracy": 0.6500615924596786, "num_tokens": 2386147024.0, "step": 14222 }, { "entropy": 1.7059412399927776, "epoch": 1.5624673862294363, "grad_norm": 0.7124969959259033, "learning_rate": 4.1675111511545655e-06, "loss": 1.2981, "mean_token_accuracy": 0.6605760852495829, "num_tokens": 2386304325.0, "step": 14223 }, { "entropy": 1.7372475763161976, "epoch": 1.5625772431408091, "grad_norm": 0.6242848038673401, "learning_rate": 4.166469088168235e-06, "loss": 1.3766, "mean_token_accuracy": 0.6519166280825933, "num_tokens": 2386470095.0, "step": 14224 }, { "entropy": 1.7890447576840718, "epoch": 1.562687100052182, "grad_norm": 0.9228449463844299, "learning_rate": 4.16542724146008e-06, "loss": 1.5208, "mean_token_accuracy": 0.6443865299224854, "num_tokens": 2386662952.0, "step": 14225 }, { "entropy": 1.7128015756607056, "epoch": 1.562796956963555, "grad_norm": 0.585515022277832, "learning_rate": 4.164385611063074e-06, "loss": 1.3964, "mean_token_accuracy": 0.645780528585116, "num_tokens": 2386869100.0, "step": 14226 }, { "entropy": 1.6932969292004902, "epoch": 1.5629068138749278, "grad_norm": 0.6653163433074951, "learning_rate": 4.163344197010181e-06, "loss": 1.3276, "mean_token_accuracy": 0.674016997218132, "num_tokens": 2387014196.0, "step": 14227 }, { "entropy": 1.7202288210391998, "epoch": 1.563016670786301, "grad_norm": 0.7792028784751892, "learning_rate": 4.162302999334366e-06, "loss": 1.3553, "mean_token_accuracy": 0.6684650580088297, "num_tokens": 2387133483.0, "step": 14228 }, { "entropy": 1.6807933350404103, "epoch": 1.5631265276976738, "grad_norm": 0.6629685759544373, "learning_rate": 4.1612620180685795e-06, "loss": 1.5153, "mean_token_accuracy": 0.6433508296807607, "num_tokens": 2387302924.0, "step": 14229 }, { "entropy": 1.690634439388911, "epoch": 1.5632363846090467, "grad_norm": 0.6891497373580933, "learning_rate": 4.160221253245765e-06, "loss": 1.2502, "mean_token_accuracy": 0.6735278566678365, "num_tokens": 2387484689.0, "step": 14230 }, { "entropy": 1.7032279173533122, "epoch": 1.5633462415204198, "grad_norm": 0.6984847784042358, "learning_rate": 4.15918070489887e-06, "loss": 1.3396, "mean_token_accuracy": 0.6606222689151764, "num_tokens": 2387626318.0, "step": 14231 }, { "entropy": 1.678362290064494, "epoch": 1.5634560984317925, "grad_norm": 0.6913422346115112, "learning_rate": 4.1581403730608185e-06, "loss": 1.3096, "mean_token_accuracy": 0.6657731880744299, "num_tokens": 2387753790.0, "step": 14232 }, { "entropy": 1.6654574970404308, "epoch": 1.5635659553431656, "grad_norm": 0.8690144419670105, "learning_rate": 4.157100257764545e-06, "loss": 1.5989, "mean_token_accuracy": 0.6529507786035538, "num_tokens": 2387921051.0, "step": 14233 }, { "entropy": 1.715543528397878, "epoch": 1.5636758122545384, "grad_norm": 0.6697078347206116, "learning_rate": 4.156060359042966e-06, "loss": 1.4025, "mean_token_accuracy": 0.6578785429398218, "num_tokens": 2388071690.0, "step": 14234 }, { "entropy": 1.66527725259463, "epoch": 1.5637856691659113, "grad_norm": 0.7394384145736694, "learning_rate": 4.1550206769289885e-06, "loss": 1.3616, "mean_token_accuracy": 0.6681285699208578, "num_tokens": 2388294674.0, "step": 14235 }, { "entropy": 1.7145603199799855, "epoch": 1.5638955260772844, "grad_norm": 0.7570204734802246, "learning_rate": 4.1539812114555225e-06, "loss": 1.4025, "mean_token_accuracy": 0.6636460820833842, "num_tokens": 2388439276.0, "step": 14236 }, { "entropy": 1.624147782723109, "epoch": 1.5640053829886573, "grad_norm": 0.6261785626411438, "learning_rate": 4.152941962655472e-06, "loss": 1.3659, "mean_token_accuracy": 0.6665515998999277, "num_tokens": 2388609168.0, "step": 14237 }, { "entropy": 1.7175993124643962, "epoch": 1.5641152399000302, "grad_norm": 0.7182031869888306, "learning_rate": 4.151902930561718e-06, "loss": 1.3104, "mean_token_accuracy": 0.6693373173475266, "num_tokens": 2388741757.0, "step": 14238 }, { "entropy": 1.638315846522649, "epoch": 1.564225096811403, "grad_norm": 0.6741671562194824, "learning_rate": 4.150864115207149e-06, "loss": 1.3031, "mean_token_accuracy": 0.6736795554558436, "num_tokens": 2388882713.0, "step": 14239 }, { "entropy": 1.6951660414536793, "epoch": 1.564334953722776, "grad_norm": 0.8368586301803589, "learning_rate": 4.149825516624648e-06, "loss": 1.4751, "mean_token_accuracy": 0.6537247101465861, "num_tokens": 2389032474.0, "step": 14240 }, { "entropy": 1.7529374957084656, "epoch": 1.564444810634149, "grad_norm": 0.781149685382843, "learning_rate": 4.148787134847083e-06, "loss": 1.4741, "mean_token_accuracy": 0.6479151596625646, "num_tokens": 2389195676.0, "step": 14241 }, { "entropy": 1.6890328228473663, "epoch": 1.564554667545522, "grad_norm": 0.7580424547195435, "learning_rate": 4.147748969907315e-06, "loss": 1.5287, "mean_token_accuracy": 0.6358017772436142, "num_tokens": 2389402290.0, "step": 14242 }, { "entropy": 1.6835271914800007, "epoch": 1.5646645244568949, "grad_norm": 0.691645085811615, "learning_rate": 4.1467110218382065e-06, "loss": 1.3766, "mean_token_accuracy": 0.6494946281115214, "num_tokens": 2389597971.0, "step": 14243 }, { "entropy": 1.7536411086718242, "epoch": 1.564774381368268, "grad_norm": 0.7124913334846497, "learning_rate": 4.145673290672604e-06, "loss": 1.4361, "mean_token_accuracy": 0.6526350329319636, "num_tokens": 2389750552.0, "step": 14244 }, { "entropy": 1.761474738518397, "epoch": 1.5648842382796406, "grad_norm": 0.752509593963623, "learning_rate": 4.144635776443355e-06, "loss": 1.6245, "mean_token_accuracy": 0.6290792971849442, "num_tokens": 2390004070.0, "step": 14245 }, { "entropy": 1.6417441566785176, "epoch": 1.5649940951910137, "grad_norm": 0.6076568961143494, "learning_rate": 4.143598479183296e-06, "loss": 1.2921, "mean_token_accuracy": 0.6611118962367376, "num_tokens": 2390155985.0, "step": 14246 }, { "entropy": 1.7019707262516022, "epoch": 1.5651039521023866, "grad_norm": 0.7136411666870117, "learning_rate": 4.142561398925251e-06, "loss": 1.3393, "mean_token_accuracy": 0.6593159635861715, "num_tokens": 2390306312.0, "step": 14247 }, { "entropy": 1.676996996005376, "epoch": 1.5652138090137595, "grad_norm": 0.6909713745117188, "learning_rate": 4.14152453570205e-06, "loss": 1.2317, "mean_token_accuracy": 0.6901508718729019, "num_tokens": 2390448153.0, "step": 14248 }, { "entropy": 1.7591987252235413, "epoch": 1.5653236659251326, "grad_norm": 0.6709699630737305, "learning_rate": 4.140487889546511e-06, "loss": 1.5019, "mean_token_accuracy": 0.6351676136255264, "num_tokens": 2390635144.0, "step": 14249 }, { "entropy": 1.662870168685913, "epoch": 1.5654335228365055, "grad_norm": 0.7932735085487366, "learning_rate": 4.1394514604914346e-06, "loss": 1.273, "mean_token_accuracy": 0.6632749090592066, "num_tokens": 2390760110.0, "step": 14250 }, { "entropy": 1.716610203186671, "epoch": 1.5655433797478784, "grad_norm": 0.6800168752670288, "learning_rate": 4.138415248569627e-06, "loss": 1.5949, "mean_token_accuracy": 0.6412303000688553, "num_tokens": 2390971824.0, "step": 14251 }, { "entropy": 1.7617238660653431, "epoch": 1.5656532366592515, "grad_norm": 2.7349250316619873, "learning_rate": 4.137379253813888e-06, "loss": 1.3383, "mean_token_accuracy": 0.6525413393974304, "num_tokens": 2391201582.0, "step": 14252 }, { "entropy": 1.734719494978587, "epoch": 1.5657630935706242, "grad_norm": 0.6799651384353638, "learning_rate": 4.136343476257003e-06, "loss": 1.382, "mean_token_accuracy": 0.6536955088376999, "num_tokens": 2391339481.0, "step": 14253 }, { "entropy": 1.683516263961792, "epoch": 1.5658729504819973, "grad_norm": 0.6154446601867676, "learning_rate": 4.135307915931752e-06, "loss": 1.2975, "mean_token_accuracy": 0.6653460214535395, "num_tokens": 2391495107.0, "step": 14254 }, { "entropy": 1.6860613723595936, "epoch": 1.5659828073933701, "grad_norm": 0.6810904145240784, "learning_rate": 4.1342725728709155e-06, "loss": 1.3292, "mean_token_accuracy": 0.6534279535214106, "num_tokens": 2391656633.0, "step": 14255 }, { "entropy": 1.7145483096440632, "epoch": 1.566092664304743, "grad_norm": 0.6879013180732727, "learning_rate": 4.133237447107254e-06, "loss": 1.4758, "mean_token_accuracy": 0.6510107268889745, "num_tokens": 2391830562.0, "step": 14256 }, { "entropy": 1.716945578654607, "epoch": 1.5662025212161161, "grad_norm": 0.7154614329338074, "learning_rate": 4.1322025386735366e-06, "loss": 1.2937, "mean_token_accuracy": 0.6757306108872095, "num_tokens": 2391949707.0, "step": 14257 }, { "entropy": 1.684073011080424, "epoch": 1.5663123781274888, "grad_norm": 0.6808174252510071, "learning_rate": 4.131167847602514e-06, "loss": 1.3949, "mean_token_accuracy": 0.653533269961675, "num_tokens": 2392133416.0, "step": 14258 }, { "entropy": 1.6810634036858876, "epoch": 1.566422235038862, "grad_norm": 0.5711365342140198, "learning_rate": 4.130133373926931e-06, "loss": 1.2044, "mean_token_accuracy": 0.672467311223348, "num_tokens": 2392345107.0, "step": 14259 }, { "entropy": 1.6876142223676045, "epoch": 1.5665320919502348, "grad_norm": 0.6411077976226807, "learning_rate": 4.129099117679534e-06, "loss": 1.3813, "mean_token_accuracy": 0.6480342298746109, "num_tokens": 2392499285.0, "step": 14260 }, { "entropy": 1.74393226703008, "epoch": 1.5666419488616077, "grad_norm": 0.634601891040802, "learning_rate": 4.128065078893054e-06, "loss": 1.4692, "mean_token_accuracy": 0.6403647114833196, "num_tokens": 2392663103.0, "step": 14261 }, { "entropy": 1.7082592248916626, "epoch": 1.5667518057729808, "grad_norm": 0.5808596014976501, "learning_rate": 4.127031257600215e-06, "loss": 1.4834, "mean_token_accuracy": 0.6419193595647812, "num_tokens": 2392871214.0, "step": 14262 }, { "entropy": 1.7498057583967845, "epoch": 1.5668616626843537, "grad_norm": 0.7248362302780151, "learning_rate": 4.125997653833742e-06, "loss": 1.4285, "mean_token_accuracy": 0.6467612981796265, "num_tokens": 2393024951.0, "step": 14263 }, { "entropy": 1.6748530566692352, "epoch": 1.5669715195957266, "grad_norm": 0.6871564388275146, "learning_rate": 4.124964267626344e-06, "loss": 1.3883, "mean_token_accuracy": 0.657158151268959, "num_tokens": 2393157081.0, "step": 14264 }, { "entropy": 1.7383250097433727, "epoch": 1.5670813765070997, "grad_norm": 0.7244443893432617, "learning_rate": 4.123931099010731e-06, "loss": 1.475, "mean_token_accuracy": 0.648836076259613, "num_tokens": 2393358853.0, "step": 14265 }, { "entropy": 1.7299003303050995, "epoch": 1.5671912334184723, "grad_norm": 0.608918309211731, "learning_rate": 4.1228981480196e-06, "loss": 1.4105, "mean_token_accuracy": 0.6505444248517355, "num_tokens": 2393537058.0, "step": 14266 }, { "entropy": 1.768057684103648, "epoch": 1.5673010903298454, "grad_norm": 0.6040147542953491, "learning_rate": 4.121865414685641e-06, "loss": 1.35, "mean_token_accuracy": 0.6549276908238729, "num_tokens": 2393697269.0, "step": 14267 }, { "entropy": 1.7645529806613922, "epoch": 1.5674109472412183, "grad_norm": 0.7420253753662109, "learning_rate": 4.120832899041542e-06, "loss": 1.3501, "mean_token_accuracy": 0.6557344893614451, "num_tokens": 2393879833.0, "step": 14268 }, { "entropy": 1.685241311788559, "epoch": 1.5675208041525912, "grad_norm": 0.6206194758415222, "learning_rate": 4.1198006011199855e-06, "loss": 1.3303, "mean_token_accuracy": 0.6640505194664001, "num_tokens": 2394034031.0, "step": 14269 }, { "entropy": 1.664696882168452, "epoch": 1.5676306610639643, "grad_norm": 0.6417528986930847, "learning_rate": 4.118768520953638e-06, "loss": 1.2617, "mean_token_accuracy": 0.6767093986272812, "num_tokens": 2394158033.0, "step": 14270 }, { "entropy": 1.6648876368999481, "epoch": 1.567740517975337, "grad_norm": 0.6877491474151611, "learning_rate": 4.117736658575165e-06, "loss": 1.2169, "mean_token_accuracy": 0.6888188421726227, "num_tokens": 2394306056.0, "step": 14271 }, { "entropy": 1.7249459822972615, "epoch": 1.56785037488671, "grad_norm": 0.6589949131011963, "learning_rate": 4.116705014017229e-06, "loss": 1.3756, "mean_token_accuracy": 0.6467818568150202, "num_tokens": 2394459499.0, "step": 14272 }, { "entropy": 1.6618477304776509, "epoch": 1.567960231798083, "grad_norm": 0.7302327156066895, "learning_rate": 4.115673587312476e-06, "loss": 1.3993, "mean_token_accuracy": 0.6579805115858713, "num_tokens": 2394645406.0, "step": 14273 }, { "entropy": 1.7453274031480153, "epoch": 1.5680700887094559, "grad_norm": 0.6717466115951538, "learning_rate": 4.114642378493549e-06, "loss": 1.2956, "mean_token_accuracy": 0.6660959323247274, "num_tokens": 2394770158.0, "step": 14274 }, { "entropy": 1.7191137572129567, "epoch": 1.568179945620829, "grad_norm": 0.6410926580429077, "learning_rate": 4.113611387593091e-06, "loss": 1.3987, "mean_token_accuracy": 0.6500120759010315, "num_tokens": 2394936523.0, "step": 14275 }, { "entropy": 1.7772869765758514, "epoch": 1.5682898025322018, "grad_norm": 0.72120600938797, "learning_rate": 4.1125806146437285e-06, "loss": 1.4901, "mean_token_accuracy": 0.6559230337540308, "num_tokens": 2395116288.0, "step": 14276 }, { "entropy": 1.7197218139966328, "epoch": 1.5683996594435747, "grad_norm": 0.5482208132743835, "learning_rate": 4.111550059678087e-06, "loss": 1.5672, "mean_token_accuracy": 0.6186061501502991, "num_tokens": 2395375622.0, "step": 14277 }, { "entropy": 1.717966765165329, "epoch": 1.5685095163549478, "grad_norm": 0.787023663520813, "learning_rate": 4.110519722728782e-06, "loss": 1.2755, "mean_token_accuracy": 0.6763796657323837, "num_tokens": 2395509047.0, "step": 14278 }, { "entropy": 1.7471245626608531, "epoch": 1.5686193732663205, "grad_norm": 1.0187658071517944, "learning_rate": 4.109489603828422e-06, "loss": 1.2793, "mean_token_accuracy": 0.6738806962966919, "num_tokens": 2395635521.0, "step": 14279 }, { "entropy": 1.6846247414747875, "epoch": 1.5687292301776936, "grad_norm": 0.8501124382019043, "learning_rate": 4.10845970300961e-06, "loss": 1.3052, "mean_token_accuracy": 0.6631153573592504, "num_tokens": 2395770509.0, "step": 14280 }, { "entropy": 1.7312610546747844, "epoch": 1.5688390870890665, "grad_norm": 0.7959895133972168, "learning_rate": 4.107430020304945e-06, "loss": 1.5674, "mean_token_accuracy": 0.6618924016753832, "num_tokens": 2395923852.0, "step": 14281 }, { "entropy": 1.7540445923805237, "epoch": 1.5689489440004394, "grad_norm": 0.7842367887496948, "learning_rate": 4.106400555747015e-06, "loss": 1.2894, "mean_token_accuracy": 0.6724207550287247, "num_tokens": 2396076155.0, "step": 14282 }, { "entropy": 1.7668162484963734, "epoch": 1.5690588009118125, "grad_norm": 0.6241871118545532, "learning_rate": 4.105371309368399e-06, "loss": 1.3579, "mean_token_accuracy": 0.6569240589936575, "num_tokens": 2396220925.0, "step": 14283 }, { "entropy": 1.6470149258772533, "epoch": 1.5691686578231852, "grad_norm": 0.6123558878898621, "learning_rate": 4.104342281201676e-06, "loss": 1.3126, "mean_token_accuracy": 0.6688317805528641, "num_tokens": 2396398381.0, "step": 14284 }, { "entropy": 1.7287775576114655, "epoch": 1.5692785147345583, "grad_norm": 0.6877973675727844, "learning_rate": 4.103313471279413e-06, "loss": 1.3715, "mean_token_accuracy": 0.6662652442852656, "num_tokens": 2396575584.0, "step": 14285 }, { "entropy": 1.6712758839130402, "epoch": 1.5693883716459311, "grad_norm": 0.6240174174308777, "learning_rate": 4.102284879634167e-06, "loss": 1.3257, "mean_token_accuracy": 0.6696380823850632, "num_tokens": 2396771081.0, "step": 14286 }, { "entropy": 1.6926849484443665, "epoch": 1.569498228557304, "grad_norm": 0.7340817451477051, "learning_rate": 4.1012565062985e-06, "loss": 1.5994, "mean_token_accuracy": 0.620560958981514, "num_tokens": 2397011942.0, "step": 14287 }, { "entropy": 1.6864181657632191, "epoch": 1.5696080854686771, "grad_norm": 0.6771414875984192, "learning_rate": 4.100228351304954e-06, "loss": 1.3305, "mean_token_accuracy": 0.6688184440135956, "num_tokens": 2397167841.0, "step": 14288 }, { "entropy": 1.691802740097046, "epoch": 1.56971794238005, "grad_norm": 0.9737116098403931, "learning_rate": 4.0992004146860735e-06, "loss": 1.4348, "mean_token_accuracy": 0.6667732695738474, "num_tokens": 2397303533.0, "step": 14289 }, { "entropy": 1.6837505499521892, "epoch": 1.569827799291423, "grad_norm": 0.6902137994766235, "learning_rate": 4.098172696474389e-06, "loss": 1.3924, "mean_token_accuracy": 0.6759957373142242, "num_tokens": 2397479982.0, "step": 14290 }, { "entropy": 1.713492174943288, "epoch": 1.569937656202796, "grad_norm": 0.6719781160354614, "learning_rate": 4.097145196702429e-06, "loss": 1.3084, "mean_token_accuracy": 0.664943128824234, "num_tokens": 2397660597.0, "step": 14291 }, { "entropy": 1.714007943868637, "epoch": 1.5700475131141687, "grad_norm": 0.7276617884635925, "learning_rate": 4.096117915402711e-06, "loss": 1.4587, "mean_token_accuracy": 0.642642746369044, "num_tokens": 2397840863.0, "step": 14292 }, { "entropy": 1.6744861801465352, "epoch": 1.5701573700255418, "grad_norm": 0.7124812006950378, "learning_rate": 4.095090852607753e-06, "loss": 1.6228, "mean_token_accuracy": 0.6388173699378967, "num_tokens": 2398021773.0, "step": 14293 }, { "entropy": 1.6718362669150035, "epoch": 1.5702672269369147, "grad_norm": 0.6724779009819031, "learning_rate": 4.094064008350059e-06, "loss": 1.321, "mean_token_accuracy": 0.6795742710431417, "num_tokens": 2398160853.0, "step": 14294 }, { "entropy": 1.7398958404858906, "epoch": 1.5703770838482876, "grad_norm": 0.611190140247345, "learning_rate": 4.093037382662123e-06, "loss": 1.3787, "mean_token_accuracy": 0.6572253008683523, "num_tokens": 2398302307.0, "step": 14295 }, { "entropy": 1.6891403396924336, "epoch": 1.5704869407596607, "grad_norm": 0.6904042363166809, "learning_rate": 4.0920109755764445e-06, "loss": 1.2991, "mean_token_accuracy": 0.6698387066523234, "num_tokens": 2398439925.0, "step": 14296 }, { "entropy": 1.6572466492652893, "epoch": 1.5705967976710333, "grad_norm": 0.7035029530525208, "learning_rate": 4.090984787125506e-06, "loss": 1.3059, "mean_token_accuracy": 0.6694705088933309, "num_tokens": 2398606382.0, "step": 14297 }, { "entropy": 1.7408252656459808, "epoch": 1.5707066545824064, "grad_norm": 0.7190991640090942, "learning_rate": 4.089958817341783e-06, "loss": 1.4693, "mean_token_accuracy": 0.6496324787537257, "num_tokens": 2398775722.0, "step": 14298 }, { "entropy": 1.703038364648819, "epoch": 1.5708165114937793, "grad_norm": 0.7542386651039124, "learning_rate": 4.088933066257753e-06, "loss": 1.2668, "mean_token_accuracy": 0.6750974357128143, "num_tokens": 2398914013.0, "step": 14299 }, { "entropy": 1.6845628917217255, "epoch": 1.5709263684051522, "grad_norm": 0.6732815504074097, "learning_rate": 4.087907533905874e-06, "loss": 1.3025, "mean_token_accuracy": 0.6741761565208435, "num_tokens": 2399094931.0, "step": 14300 }, { "entropy": 1.7796473304430644, "epoch": 1.5710362253165253, "grad_norm": 0.6704933047294617, "learning_rate": 4.08688222031861e-06, "loss": 1.4761, "mean_token_accuracy": 0.6373851150274277, "num_tokens": 2399264259.0, "step": 14301 }, { "entropy": 1.7238643169403076, "epoch": 1.5711460822278982, "grad_norm": 0.8131667971611023, "learning_rate": 4.0858571255284075e-06, "loss": 1.5472, "mean_token_accuracy": 0.6483301371335983, "num_tokens": 2399417678.0, "step": 14302 }, { "entropy": 1.670019308725993, "epoch": 1.571255939139271, "grad_norm": 0.6241254210472107, "learning_rate": 4.084832249567709e-06, "loss": 1.497, "mean_token_accuracy": 0.641557107369105, "num_tokens": 2399615093.0, "step": 14303 }, { "entropy": 1.7172284523646038, "epoch": 1.5713657960506442, "grad_norm": 0.6693923473358154, "learning_rate": 4.083807592468956e-06, "loss": 1.2975, "mean_token_accuracy": 0.6784742772579193, "num_tokens": 2399759030.0, "step": 14304 }, { "entropy": 1.7683460513750713, "epoch": 1.5714756529620169, "grad_norm": 0.8016403317451477, "learning_rate": 4.0827831542645764e-06, "loss": 1.584, "mean_token_accuracy": 0.6237875620524088, "num_tokens": 2400010908.0, "step": 14305 }, { "entropy": 1.669872482617696, "epoch": 1.57158550987339, "grad_norm": 0.6497310996055603, "learning_rate": 4.081758934986993e-06, "loss": 1.3014, "mean_token_accuracy": 0.673602357506752, "num_tokens": 2400130678.0, "step": 14306 }, { "entropy": 1.7502439816792805, "epoch": 1.5716953667847628, "grad_norm": 0.8661501407623291, "learning_rate": 4.08073493466862e-06, "loss": 1.4551, "mean_token_accuracy": 0.6611292113860449, "num_tokens": 2400270119.0, "step": 14307 }, { "entropy": 1.7579331596692402, "epoch": 1.5718052236961357, "grad_norm": 0.7373813390731812, "learning_rate": 4.079711153341871e-06, "loss": 1.2837, "mean_token_accuracy": 0.6710260063409805, "num_tokens": 2400381408.0, "step": 14308 }, { "entropy": 1.7175275286038716, "epoch": 1.5719150806075088, "grad_norm": 0.7899195551872253, "learning_rate": 4.078687591039146e-06, "loss": 1.4791, "mean_token_accuracy": 0.6414479861656824, "num_tokens": 2400555300.0, "step": 14309 }, { "entropy": 1.7731333871682484, "epoch": 1.5720249375188815, "grad_norm": 0.7760249972343445, "learning_rate": 4.077664247792838e-06, "loss": 1.5491, "mean_token_accuracy": 0.6391358077526093, "num_tokens": 2400692168.0, "step": 14310 }, { "entropy": 1.658635934193929, "epoch": 1.5721347944302546, "grad_norm": 0.7568308115005493, "learning_rate": 4.076641123635338e-06, "loss": 1.5812, "mean_token_accuracy": 0.6311882634957632, "num_tokens": 2400892665.0, "step": 14311 }, { "entropy": 1.7353158593177795, "epoch": 1.5722446513416275, "grad_norm": 0.7283448576927185, "learning_rate": 4.0756182185990245e-06, "loss": 1.3225, "mean_token_accuracy": 0.6635488321383795, "num_tokens": 2401050516.0, "step": 14312 }, { "entropy": 1.605364441871643, "epoch": 1.5723545082530004, "grad_norm": 0.7137781977653503, "learning_rate": 4.0745955327162775e-06, "loss": 1.4384, "mean_token_accuracy": 0.6605170965194702, "num_tokens": 2401223303.0, "step": 14313 }, { "entropy": 1.7696949640909831, "epoch": 1.5724643651643735, "grad_norm": 0.7161397933959961, "learning_rate": 4.073573066019461e-06, "loss": 1.5051, "mean_token_accuracy": 0.6313910136620203, "num_tokens": 2401398847.0, "step": 14314 }, { "entropy": 1.7221232950687408, "epoch": 1.5725742220757464, "grad_norm": 0.6790109276771545, "learning_rate": 4.072550818540934e-06, "loss": 1.3352, "mean_token_accuracy": 0.6639846116304398, "num_tokens": 2401545939.0, "step": 14315 }, { "entropy": 1.7193631132443745, "epoch": 1.5726840789871193, "grad_norm": 0.6720796227455139, "learning_rate": 4.071528790313049e-06, "loss": 1.4618, "mean_token_accuracy": 0.6459654122591019, "num_tokens": 2401728338.0, "step": 14316 }, { "entropy": 1.6730712354183197, "epoch": 1.5727939358984924, "grad_norm": 0.7082514762878418, "learning_rate": 4.070506981368164e-06, "loss": 1.4599, "mean_token_accuracy": 0.6565167158842087, "num_tokens": 2401904602.0, "step": 14317 }, { "entropy": 1.7497392197450001, "epoch": 1.572903792809865, "grad_norm": 0.7780535817146301, "learning_rate": 4.069485391738605e-06, "loss": 1.4053, "mean_token_accuracy": 0.6494750926891962, "num_tokens": 2402107235.0, "step": 14318 }, { "entropy": 1.6548576653003693, "epoch": 1.5730136497212381, "grad_norm": 0.6952147483825684, "learning_rate": 4.068464021456709e-06, "loss": 1.426, "mean_token_accuracy": 0.6591857820749283, "num_tokens": 2402300956.0, "step": 14319 }, { "entropy": 1.6779307921727498, "epoch": 1.573123506632611, "grad_norm": 0.7041146159172058, "learning_rate": 4.0674428705548075e-06, "loss": 1.3681, "mean_token_accuracy": 0.6691206991672516, "num_tokens": 2402446715.0, "step": 14320 }, { "entropy": 1.6910231411457062, "epoch": 1.573233363543984, "grad_norm": 0.5578975081443787, "learning_rate": 4.0664219390652146e-06, "loss": 1.407, "mean_token_accuracy": 0.6494897405306498, "num_tokens": 2402638395.0, "step": 14321 }, { "entropy": 1.6795838276545207, "epoch": 1.573343220455357, "grad_norm": 0.6371523141860962, "learning_rate": 4.065401227020243e-06, "loss": 1.3769, "mean_token_accuracy": 0.6553529649972916, "num_tokens": 2402777742.0, "step": 14322 }, { "entropy": 1.6951970160007477, "epoch": 1.5734530773667297, "grad_norm": 0.7004981637001038, "learning_rate": 4.064380734452195e-06, "loss": 1.1673, "mean_token_accuracy": 0.687474250793457, "num_tokens": 2402925563.0, "step": 14323 }, { "entropy": 1.6712155938148499, "epoch": 1.5735629342781028, "grad_norm": 0.7162665128707886, "learning_rate": 4.06336046139337e-06, "loss": 1.3009, "mean_token_accuracy": 0.6811398416757584, "num_tokens": 2403103844.0, "step": 14324 }, { "entropy": 1.5985010763009389, "epoch": 1.5736727911894757, "grad_norm": 0.6248446702957153, "learning_rate": 4.062340407876066e-06, "loss": 1.1624, "mean_token_accuracy": 0.6961935559908549, "num_tokens": 2403284801.0, "step": 14325 }, { "entropy": 1.699522962172826, "epoch": 1.5737826481008486, "grad_norm": 0.7275028228759766, "learning_rate": 4.06132057393256e-06, "loss": 1.5205, "mean_token_accuracy": 0.6402701983849207, "num_tokens": 2403451049.0, "step": 14326 }, { "entropy": 1.717777858177821, "epoch": 1.5738925050122217, "grad_norm": 0.6977814435958862, "learning_rate": 4.060300959595129e-06, "loss": 1.3162, "mean_token_accuracy": 0.6749143203099569, "num_tokens": 2403611909.0, "step": 14327 }, { "entropy": 1.7565401196479797, "epoch": 1.5740023619235946, "grad_norm": 0.7143315076828003, "learning_rate": 4.059281564896049e-06, "loss": 1.4808, "mean_token_accuracy": 0.6375414083401362, "num_tokens": 2403777557.0, "step": 14328 }, { "entropy": 1.7590028643608093, "epoch": 1.5741122188349674, "grad_norm": 0.7009501457214355, "learning_rate": 4.058262389867579e-06, "loss": 1.4635, "mean_token_accuracy": 0.6421099056800207, "num_tokens": 2403952220.0, "step": 14329 }, { "entropy": 1.7278287311395009, "epoch": 1.5742220757463405, "grad_norm": 0.6733378767967224, "learning_rate": 4.0572434345419746e-06, "loss": 1.5208, "mean_token_accuracy": 0.6346190224091212, "num_tokens": 2404211887.0, "step": 14330 }, { "entropy": 1.7005733052889507, "epoch": 1.5743319326577132, "grad_norm": 0.6368502974510193, "learning_rate": 4.056224698951489e-06, "loss": 1.2437, "mean_token_accuracy": 0.6910210798184077, "num_tokens": 2404370898.0, "step": 14331 }, { "entropy": 1.7627998689810436, "epoch": 1.5744417895690863, "grad_norm": 0.6787126660346985, "learning_rate": 4.055206183128359e-06, "loss": 1.4583, "mean_token_accuracy": 0.645544116695722, "num_tokens": 2404538240.0, "step": 14332 }, { "entropy": 1.729688048362732, "epoch": 1.5745516464804592, "grad_norm": 0.7091361284255981, "learning_rate": 4.054187887104829e-06, "loss": 1.4387, "mean_token_accuracy": 0.6646422247091929, "num_tokens": 2404674194.0, "step": 14333 }, { "entropy": 1.753710041443507, "epoch": 1.574661503391832, "grad_norm": 0.9322096705436707, "learning_rate": 4.053169810913121e-06, "loss": 1.5747, "mean_token_accuracy": 0.6433406124512354, "num_tokens": 2404827461.0, "step": 14334 }, { "entropy": 1.7040641208489735, "epoch": 1.5747713603032052, "grad_norm": 0.6148183941841125, "learning_rate": 4.0521519545854555e-06, "loss": 1.4987, "mean_token_accuracy": 0.6388400246699651, "num_tokens": 2405061087.0, "step": 14335 }, { "entropy": 1.7134417394797008, "epoch": 1.5748812172145779, "grad_norm": 0.779222846031189, "learning_rate": 4.051134318154049e-06, "loss": 1.5683, "mean_token_accuracy": 0.6611418028672537, "num_tokens": 2405251828.0, "step": 14336 }, { "entropy": 1.7143574953079224, "epoch": 1.574991074125951, "grad_norm": 0.6745566129684448, "learning_rate": 4.050116901651116e-06, "loss": 1.4113, "mean_token_accuracy": 0.6613113085428873, "num_tokens": 2405407958.0, "step": 14337 }, { "entropy": 1.7491061985492706, "epoch": 1.5751009310373238, "grad_norm": 0.8248865604400635, "learning_rate": 4.049099705108849e-06, "loss": 1.2324, "mean_token_accuracy": 0.6748235374689102, "num_tokens": 2405559259.0, "step": 14338 }, { "entropy": 1.752279927333196, "epoch": 1.5752107879486967, "grad_norm": 0.6685039401054382, "learning_rate": 4.048082728559441e-06, "loss": 1.4519, "mean_token_accuracy": 0.6412715241312981, "num_tokens": 2405734315.0, "step": 14339 }, { "entropy": 1.680857280890147, "epoch": 1.5753206448600698, "grad_norm": 0.8184103965759277, "learning_rate": 4.047065972035085e-06, "loss": 1.3274, "mean_token_accuracy": 0.6744781285524368, "num_tokens": 2405872010.0, "step": 14340 }, { "entropy": 1.691278914610545, "epoch": 1.5754305017714427, "grad_norm": 0.6790178418159485, "learning_rate": 4.046049435567959e-06, "loss": 1.3451, "mean_token_accuracy": 0.6628639151652654, "num_tokens": 2406022115.0, "step": 14341 }, { "entropy": 1.6819725433985393, "epoch": 1.5755403586828156, "grad_norm": 0.6255328059196472, "learning_rate": 4.0450331191902315e-06, "loss": 1.2874, "mean_token_accuracy": 0.6705329616864523, "num_tokens": 2406169222.0, "step": 14342 }, { "entropy": 1.6964583198229473, "epoch": 1.5756502155941887, "grad_norm": 0.7723869681358337, "learning_rate": 4.044017022934074e-06, "loss": 1.336, "mean_token_accuracy": 0.6657444735368093, "num_tokens": 2406304501.0, "step": 14343 }, { "entropy": 1.6860394378503163, "epoch": 1.5757600725055614, "grad_norm": 0.6104413270950317, "learning_rate": 4.043001146831642e-06, "loss": 1.4501, "mean_token_accuracy": 0.6295960744222006, "num_tokens": 2406502733.0, "step": 14344 }, { "entropy": 1.718522051970164, "epoch": 1.5758699294169345, "grad_norm": 0.7444132566452026, "learning_rate": 4.0419854909150905e-06, "loss": 1.4821, "mean_token_accuracy": 0.6521776219209036, "num_tokens": 2406665496.0, "step": 14345 }, { "entropy": 1.7012490928173065, "epoch": 1.5759797863283074, "grad_norm": 0.6309324502944946, "learning_rate": 4.040970055216562e-06, "loss": 1.3502, "mean_token_accuracy": 0.6527341256539027, "num_tokens": 2406863488.0, "step": 14346 }, { "entropy": 1.6834677159786224, "epoch": 1.5760896432396803, "grad_norm": 0.6865068078041077, "learning_rate": 4.039954839768194e-06, "loss": 1.4619, "mean_token_accuracy": 0.6443218390146891, "num_tokens": 2407018418.0, "step": 14347 }, { "entropy": 1.6774198611577351, "epoch": 1.5761995001510534, "grad_norm": 0.6005121469497681, "learning_rate": 4.038939844602119e-06, "loss": 1.3968, "mean_token_accuracy": 0.6480477452278137, "num_tokens": 2407171807.0, "step": 14348 }, { "entropy": 1.6578874389330547, "epoch": 1.576309357062426, "grad_norm": 0.6394762992858887, "learning_rate": 4.0379250697504645e-06, "loss": 1.2588, "mean_token_accuracy": 0.6768279870351156, "num_tokens": 2407288051.0, "step": 14349 }, { "entropy": 1.6758925318717957, "epoch": 1.5764192139737991, "grad_norm": 0.6489967107772827, "learning_rate": 4.036910515245343e-06, "loss": 1.3984, "mean_token_accuracy": 0.6536735345919927, "num_tokens": 2407473963.0, "step": 14350 }, { "entropy": 1.7418983777364094, "epoch": 1.576529070885172, "grad_norm": 0.6643052697181702, "learning_rate": 4.0358961811188635e-06, "loss": 1.5226, "mean_token_accuracy": 0.6297204593817393, "num_tokens": 2407657529.0, "step": 14351 }, { "entropy": 1.6687126159667969, "epoch": 1.576638927796545, "grad_norm": 0.6805570721626282, "learning_rate": 4.034882067403135e-06, "loss": 1.4111, "mean_token_accuracy": 0.6512214044729868, "num_tokens": 2407830944.0, "step": 14352 }, { "entropy": 1.6523280044396718, "epoch": 1.576748784707918, "grad_norm": 0.6789618134498596, "learning_rate": 4.0338681741302495e-06, "loss": 1.4588, "mean_token_accuracy": 0.6637892872095108, "num_tokens": 2407974966.0, "step": 14353 }, { "entropy": 1.6389523049195607, "epoch": 1.576858641619291, "grad_norm": 0.6752464771270752, "learning_rate": 4.032854501332297e-06, "loss": 1.408, "mean_token_accuracy": 0.6546342919270197, "num_tokens": 2408137262.0, "step": 14354 }, { "entropy": 1.7064806123574574, "epoch": 1.5769684985306638, "grad_norm": 0.5836341381072998, "learning_rate": 4.031841049041361e-06, "loss": 1.3355, "mean_token_accuracy": 0.6571672906478246, "num_tokens": 2408339160.0, "step": 14355 }, { "entropy": 1.7815323770046234, "epoch": 1.577078355442037, "grad_norm": 0.7685762643814087, "learning_rate": 4.030827817289513e-06, "loss": 1.372, "mean_token_accuracy": 0.6627372950315475, "num_tokens": 2408471295.0, "step": 14356 }, { "entropy": 1.7619872987270355, "epoch": 1.5771882123534096, "grad_norm": 0.7153518795967102, "learning_rate": 4.029814806108827e-06, "loss": 1.4392, "mean_token_accuracy": 0.636270801226298, "num_tokens": 2408634566.0, "step": 14357 }, { "entropy": 1.6729972461859386, "epoch": 1.5772980692647827, "grad_norm": 0.6680687069892883, "learning_rate": 4.028802015531362e-06, "loss": 1.3128, "mean_token_accuracy": 0.6736362675825754, "num_tokens": 2408765313.0, "step": 14358 }, { "entropy": 1.7162837485472362, "epoch": 1.5774079261761556, "grad_norm": 0.7068792581558228, "learning_rate": 4.027789445589169e-06, "loss": 1.3913, "mean_token_accuracy": 0.6578982969125112, "num_tokens": 2408929681.0, "step": 14359 }, { "entropy": 1.6619195342063904, "epoch": 1.5775177830875284, "grad_norm": 0.6548637747764587, "learning_rate": 4.026777096314298e-06, "loss": 1.3688, "mean_token_accuracy": 0.6490531514088312, "num_tokens": 2409119354.0, "step": 14360 }, { "entropy": 1.6902973055839539, "epoch": 1.5776276399989015, "grad_norm": 0.7334201335906982, "learning_rate": 4.0257649677387924e-06, "loss": 1.6172, "mean_token_accuracy": 0.6320002973079681, "num_tokens": 2409327791.0, "step": 14361 }, { "entropy": 1.7276590665181477, "epoch": 1.5777374969102742, "grad_norm": 0.8310177326202393, "learning_rate": 4.024753059894683e-06, "loss": 1.5126, "mean_token_accuracy": 0.6533468067646027, "num_tokens": 2409464553.0, "step": 14362 }, { "entropy": 1.68667929371198, "epoch": 1.5778473538216473, "grad_norm": 0.5872757434844971, "learning_rate": 4.023741372813994e-06, "loss": 1.4306, "mean_token_accuracy": 0.6479167540868124, "num_tokens": 2409673452.0, "step": 14363 }, { "entropy": 1.6317294637362163, "epoch": 1.5779572107330202, "grad_norm": 0.705324113368988, "learning_rate": 4.02272990652875e-06, "loss": 1.4657, "mean_token_accuracy": 0.669963558514913, "num_tokens": 2409848784.0, "step": 14364 }, { "entropy": 1.6726927657922108, "epoch": 1.578067067644393, "grad_norm": 0.6846646070480347, "learning_rate": 4.021718661070959e-06, "loss": 1.3991, "mean_token_accuracy": 0.657963847120603, "num_tokens": 2410027810.0, "step": 14365 }, { "entropy": 1.6807039578755696, "epoch": 1.5781769245557662, "grad_norm": 0.634922981262207, "learning_rate": 4.020707636472626e-06, "loss": 1.411, "mean_token_accuracy": 0.6643590877453486, "num_tokens": 2410191106.0, "step": 14366 }, { "entropy": 1.690766880909602, "epoch": 1.578286781467139, "grad_norm": 0.6886934638023376, "learning_rate": 4.019696832765755e-06, "loss": 1.3997, "mean_token_accuracy": 0.6652351021766663, "num_tokens": 2410352396.0, "step": 14367 }, { "entropy": 1.696602702140808, "epoch": 1.578396638378512, "grad_norm": 0.7700260877609253, "learning_rate": 4.01868624998233e-06, "loss": 1.3986, "mean_token_accuracy": 0.654128318031629, "num_tokens": 2410524627.0, "step": 14368 }, { "entropy": 1.6783056855201721, "epoch": 1.578506495289885, "grad_norm": 0.7106050252914429, "learning_rate": 4.017675888154341e-06, "loss": 1.4447, "mean_token_accuracy": 0.6530628601710001, "num_tokens": 2410671042.0, "step": 14369 }, { "entropy": 1.6856454213460286, "epoch": 1.5786163522012577, "grad_norm": 0.5910614132881165, "learning_rate": 4.016665747313765e-06, "loss": 1.4043, "mean_token_accuracy": 0.6535949011643728, "num_tokens": 2410840404.0, "step": 14370 }, { "entropy": 1.681627740462621, "epoch": 1.5787262091126308, "grad_norm": 0.5946781635284424, "learning_rate": 4.0156558274925695e-06, "loss": 1.3597, "mean_token_accuracy": 0.6630496780077616, "num_tokens": 2410999756.0, "step": 14371 }, { "entropy": 1.6094493865966797, "epoch": 1.5788360660240037, "grad_norm": 0.8007654547691345, "learning_rate": 4.014646128722719e-06, "loss": 1.2379, "mean_token_accuracy": 0.6704281121492386, "num_tokens": 2411138576.0, "step": 14372 }, { "entropy": 1.7133605281511943, "epoch": 1.5789459229353766, "grad_norm": 0.6362724900245667, "learning_rate": 4.0136366510361735e-06, "loss": 1.4868, "mean_token_accuracy": 0.6381375938653946, "num_tokens": 2411382890.0, "step": 14373 }, { "entropy": 1.8057750562826793, "epoch": 1.5790557798467497, "grad_norm": 0.7414250373840332, "learning_rate": 4.01262739446488e-06, "loss": 1.3424, "mean_token_accuracy": 0.6564290225505829, "num_tokens": 2411520175.0, "step": 14374 }, { "entropy": 1.688672701517741, "epoch": 1.5791656367581224, "grad_norm": 0.5574719905853271, "learning_rate": 4.011618359040778e-06, "loss": 1.3622, "mean_token_accuracy": 0.6569213171799978, "num_tokens": 2411699169.0, "step": 14375 }, { "entropy": 1.753764549891154, "epoch": 1.5792754936694955, "grad_norm": 0.6876077651977539, "learning_rate": 4.010609544795808e-06, "loss": 1.472, "mean_token_accuracy": 0.6483410596847534, "num_tokens": 2411855045.0, "step": 14376 }, { "entropy": 1.6760170062383015, "epoch": 1.5793853505808684, "grad_norm": 0.670947790145874, "learning_rate": 4.009600951761896e-06, "loss": 1.2907, "mean_token_accuracy": 0.6729481816291809, "num_tokens": 2412065147.0, "step": 14377 }, { "entropy": 1.756626029809316, "epoch": 1.5794952074922413, "grad_norm": 0.7405815124511719, "learning_rate": 4.0085925799709635e-06, "loss": 1.4296, "mean_token_accuracy": 0.6655129392941793, "num_tokens": 2412191329.0, "step": 14378 }, { "entropy": 1.735265185435613, "epoch": 1.5796050644036144, "grad_norm": 0.7957320213317871, "learning_rate": 4.007584429454927e-06, "loss": 1.2667, "mean_token_accuracy": 0.6745762477318445, "num_tokens": 2412318330.0, "step": 14379 }, { "entropy": 1.7310957809289296, "epoch": 1.5797149213149873, "grad_norm": 0.639903724193573, "learning_rate": 4.006576500245689e-06, "loss": 1.452, "mean_token_accuracy": 0.6442805677652359, "num_tokens": 2412496380.0, "step": 14380 }, { "entropy": 1.7322147190570831, "epoch": 1.5798247782263601, "grad_norm": 0.6460077166557312, "learning_rate": 4.005568792375157e-06, "loss": 1.4793, "mean_token_accuracy": 0.6409311791261038, "num_tokens": 2412670508.0, "step": 14381 }, { "entropy": 1.6922193666299183, "epoch": 1.5799346351377332, "grad_norm": 0.6333412528038025, "learning_rate": 4.004561305875221e-06, "loss": 1.2359, "mean_token_accuracy": 0.6793088068564733, "num_tokens": 2412810294.0, "step": 14382 }, { "entropy": 1.7221355736255646, "epoch": 1.580044492049106, "grad_norm": 0.6996757388114929, "learning_rate": 4.003554040777765e-06, "loss": 1.5011, "mean_token_accuracy": 0.6347163567940394, "num_tokens": 2413041747.0, "step": 14383 }, { "entropy": 1.6786732574303944, "epoch": 1.580154348960479, "grad_norm": 0.6720352172851562, "learning_rate": 4.0025469971146725e-06, "loss": 1.4039, "mean_token_accuracy": 0.6694711993137995, "num_tokens": 2413203259.0, "step": 14384 }, { "entropy": 1.7239131430784862, "epoch": 1.580264205871852, "grad_norm": 0.6879767775535583, "learning_rate": 4.001540174917813e-06, "loss": 1.4192, "mean_token_accuracy": 0.6383152256409327, "num_tokens": 2413404931.0, "step": 14385 }, { "entropy": 1.679287075996399, "epoch": 1.5803740627832248, "grad_norm": 0.8257265686988831, "learning_rate": 4.0005335742190555e-06, "loss": 1.2133, "mean_token_accuracy": 0.6854179451862971, "num_tokens": 2413542889.0, "step": 14386 }, { "entropy": 1.6591391563415527, "epoch": 1.580483919694598, "grad_norm": 0.8294792175292969, "learning_rate": 3.999527195050255e-06, "loss": 1.2861, "mean_token_accuracy": 0.6734772324562073, "num_tokens": 2413685078.0, "step": 14387 }, { "entropy": 1.7173769970734913, "epoch": 1.5805937766059706, "grad_norm": 0.6364870667457581, "learning_rate": 3.998521037443264e-06, "loss": 1.4887, "mean_token_accuracy": 0.643697996934255, "num_tokens": 2413867535.0, "step": 14388 }, { "entropy": 1.7607737878958385, "epoch": 1.5807036335173437, "grad_norm": 0.6072537899017334, "learning_rate": 3.997515101429928e-06, "loss": 1.5469, "mean_token_accuracy": 0.6417495807011923, "num_tokens": 2414078328.0, "step": 14389 }, { "entropy": 1.7044040362040203, "epoch": 1.5808134904287166, "grad_norm": 0.6410809755325317, "learning_rate": 3.996509387042085e-06, "loss": 1.5127, "mean_token_accuracy": 0.6362091799577078, "num_tokens": 2414267611.0, "step": 14390 }, { "entropy": 1.6906994581222534, "epoch": 1.5809233473400894, "grad_norm": 0.6977923512458801, "learning_rate": 3.995503894311561e-06, "loss": 1.4303, "mean_token_accuracy": 0.6597084701061249, "num_tokens": 2414429725.0, "step": 14391 }, { "entropy": 1.7433028519153595, "epoch": 1.5810332042514625, "grad_norm": 0.7054868936538696, "learning_rate": 3.994498623270182e-06, "loss": 1.4431, "mean_token_accuracy": 0.6574334055185318, "num_tokens": 2414610681.0, "step": 14392 }, { "entropy": 1.739876647790273, "epoch": 1.5811430611628354, "grad_norm": 0.8737093806266785, "learning_rate": 3.993493573949768e-06, "loss": 1.2597, "mean_token_accuracy": 0.6745233436425527, "num_tokens": 2414709082.0, "step": 14393 }, { "entropy": 1.7829668621222179, "epoch": 1.5812529180742083, "grad_norm": 0.8045809864997864, "learning_rate": 3.992488746382125e-06, "loss": 1.431, "mean_token_accuracy": 0.6510123064120611, "num_tokens": 2414904822.0, "step": 14394 }, { "entropy": 1.7516865233580272, "epoch": 1.5813627749855814, "grad_norm": 0.8814604878425598, "learning_rate": 3.991484140599053e-06, "loss": 1.3402, "mean_token_accuracy": 0.6588575591643652, "num_tokens": 2415024541.0, "step": 14395 }, { "entropy": 1.7599789202213287, "epoch": 1.581472631896954, "grad_norm": 0.6524580121040344, "learning_rate": 3.990479756632352e-06, "loss": 1.458, "mean_token_accuracy": 0.6501910090446472, "num_tokens": 2415189620.0, "step": 14396 }, { "entropy": 1.7022678454717, "epoch": 1.5815824888083272, "grad_norm": 0.7698002457618713, "learning_rate": 3.989475594513808e-06, "loss": 1.3612, "mean_token_accuracy": 0.6615101943413416, "num_tokens": 2415364902.0, "step": 14397 }, { "entropy": 1.7096090018749237, "epoch": 1.5816923457197, "grad_norm": 0.8302357792854309, "learning_rate": 3.988471654275201e-06, "loss": 1.2287, "mean_token_accuracy": 0.6795289516448975, "num_tokens": 2415465709.0, "step": 14398 }, { "entropy": 1.727865646282832, "epoch": 1.581802202631073, "grad_norm": 0.7796132564544678, "learning_rate": 3.987467935948307e-06, "loss": 1.4827, "mean_token_accuracy": 0.6607057054837545, "num_tokens": 2415621621.0, "step": 14399 }, { "entropy": 1.7068704466025035, "epoch": 1.581912059542446, "grad_norm": 0.7254052758216858, "learning_rate": 3.986464439564893e-06, "loss": 1.5308, "mean_token_accuracy": 0.6460797290007273, "num_tokens": 2415798172.0, "step": 14400 }, { "entropy": 1.7472402950127919, "epoch": 1.582021916453819, "grad_norm": 1.3230574131011963, "learning_rate": 3.9854611651567196e-06, "loss": 1.4057, "mean_token_accuracy": 0.6481355031331381, "num_tokens": 2415960359.0, "step": 14401 }, { "entropy": 1.7266095876693726, "epoch": 1.5821317733651918, "grad_norm": 0.7041482329368591, "learning_rate": 3.98445811275554e-06, "loss": 1.3587, "mean_token_accuracy": 0.6671364406744639, "num_tokens": 2416113815.0, "step": 14402 }, { "entropy": 1.7281249165534973, "epoch": 1.5822416302765647, "grad_norm": 0.6447805166244507, "learning_rate": 3.983455282393099e-06, "loss": 1.412, "mean_token_accuracy": 0.6491978416840235, "num_tokens": 2416313418.0, "step": 14403 }, { "entropy": 1.744963804880778, "epoch": 1.5823514871879376, "grad_norm": 0.6686639189720154, "learning_rate": 3.9824526741011345e-06, "loss": 1.4133, "mean_token_accuracy": 0.6531851341327032, "num_tokens": 2416494715.0, "step": 14404 }, { "entropy": 1.734292556842168, "epoch": 1.5824613440993107, "grad_norm": 0.8086754679679871, "learning_rate": 3.981450287911385e-06, "loss": 1.3749, "mean_token_accuracy": 0.6653833836317062, "num_tokens": 2416629130.0, "step": 14405 }, { "entropy": 1.7502967417240143, "epoch": 1.5825712010106836, "grad_norm": 0.8233333826065063, "learning_rate": 3.9804481238555696e-06, "loss": 1.4493, "mean_token_accuracy": 0.649358481168747, "num_tokens": 2416784379.0, "step": 14406 }, { "entropy": 1.6617790857950847, "epoch": 1.5826810579220565, "grad_norm": 0.544765055179596, "learning_rate": 3.979446181965406e-06, "loss": 1.4374, "mean_token_accuracy": 0.6548088242610296, "num_tokens": 2416981866.0, "step": 14407 }, { "entropy": 1.6831092139085133, "epoch": 1.5827909148334296, "grad_norm": 0.8361105918884277, "learning_rate": 3.97844446227261e-06, "loss": 1.621, "mean_token_accuracy": 0.6430748477578163, "num_tokens": 2417131866.0, "step": 14408 }, { "entropy": 1.7041123708089192, "epoch": 1.5829007717448023, "grad_norm": 0.8004279732704163, "learning_rate": 3.977442964808883e-06, "loss": 1.4952, "mean_token_accuracy": 0.6577915449937185, "num_tokens": 2417297788.0, "step": 14409 }, { "entropy": 1.6733851035435994, "epoch": 1.5830106286561754, "grad_norm": 0.6946509480476379, "learning_rate": 3.976441689605919e-06, "loss": 1.3204, "mean_token_accuracy": 0.667379895846049, "num_tokens": 2417418708.0, "step": 14410 }, { "entropy": 1.7224521934986115, "epoch": 1.5831204855675483, "grad_norm": 0.6349174380302429, "learning_rate": 3.975440636695412e-06, "loss": 1.3984, "mean_token_accuracy": 0.6536268393198649, "num_tokens": 2417584213.0, "step": 14411 }, { "entropy": 1.6723372340202332, "epoch": 1.5832303424789211, "grad_norm": 0.7625806331634521, "learning_rate": 3.974439806109043e-06, "loss": 1.2409, "mean_token_accuracy": 0.6775754888852438, "num_tokens": 2417759522.0, "step": 14412 }, { "entropy": 1.642015775044759, "epoch": 1.5833401993902942, "grad_norm": 0.7911476492881775, "learning_rate": 3.973439197878489e-06, "loss": 1.2949, "mean_token_accuracy": 0.6780034005641937, "num_tokens": 2417912492.0, "step": 14413 }, { "entropy": 1.7025137146313984, "epoch": 1.5834500563016671, "grad_norm": 0.7081897854804993, "learning_rate": 3.972438812035419e-06, "loss": 1.5179, "mean_token_accuracy": 0.6537428398927053, "num_tokens": 2418109884.0, "step": 14414 }, { "entropy": 1.768744687239329, "epoch": 1.58355991321304, "grad_norm": 0.804851233959198, "learning_rate": 3.971438648611492e-06, "loss": 1.482, "mean_token_accuracy": 0.6415604799985886, "num_tokens": 2418302598.0, "step": 14415 }, { "entropy": 1.700428346792857, "epoch": 1.583669770124413, "grad_norm": 0.6298617720603943, "learning_rate": 3.970438707638364e-06, "loss": 1.4271, "mean_token_accuracy": 0.6591332703828812, "num_tokens": 2418483413.0, "step": 14416 }, { "entropy": 1.7190834681193035, "epoch": 1.5837796270357858, "grad_norm": 0.7330955266952515, "learning_rate": 3.969438989147685e-06, "loss": 1.5435, "mean_token_accuracy": 0.642086406548818, "num_tokens": 2418645090.0, "step": 14417 }, { "entropy": 1.701053947210312, "epoch": 1.583889483947159, "grad_norm": 0.648068904876709, "learning_rate": 3.9684394931710956e-06, "loss": 1.282, "mean_token_accuracy": 0.6661601016918818, "num_tokens": 2418771132.0, "step": 14418 }, { "entropy": 1.7536637683709462, "epoch": 1.5839993408585318, "grad_norm": 0.7328144907951355, "learning_rate": 3.967440219740224e-06, "loss": 1.4416, "mean_token_accuracy": 0.6516183565060297, "num_tokens": 2418935915.0, "step": 14419 }, { "entropy": 1.7374096810817719, "epoch": 1.5841091977699047, "grad_norm": 0.6931876540184021, "learning_rate": 3.966441168886704e-06, "loss": 1.3514, "mean_token_accuracy": 0.6611058761676153, "num_tokens": 2419100654.0, "step": 14420 }, { "entropy": 1.7090267737706502, "epoch": 1.5842190546812778, "grad_norm": 0.7308075428009033, "learning_rate": 3.96544234064215e-06, "loss": 1.2949, "mean_token_accuracy": 0.6706414471069971, "num_tokens": 2419225860.0, "step": 14421 }, { "entropy": 1.779475748538971, "epoch": 1.5843289115926504, "grad_norm": 0.6749278903007507, "learning_rate": 3.9644437350381745e-06, "loss": 1.4604, "mean_token_accuracy": 0.6398962736129761, "num_tokens": 2419420436.0, "step": 14422 }, { "entropy": 1.6845888098080952, "epoch": 1.5844387685040235, "grad_norm": 0.6891003251075745, "learning_rate": 3.9634453521063876e-06, "loss": 1.3074, "mean_token_accuracy": 0.6675096352895101, "num_tokens": 2419590074.0, "step": 14423 }, { "entropy": 1.6917446851730347, "epoch": 1.5845486254153964, "grad_norm": 0.7120577096939087, "learning_rate": 3.962447191878381e-06, "loss": 1.4101, "mean_token_accuracy": 0.6531930317481359, "num_tokens": 2419762614.0, "step": 14424 }, { "entropy": 1.6405010223388672, "epoch": 1.5846584823267693, "grad_norm": 0.6640504002571106, "learning_rate": 3.961449254385753e-06, "loss": 1.3397, "mean_token_accuracy": 0.67174232006073, "num_tokens": 2419921978.0, "step": 14425 }, { "entropy": 1.6967049439748128, "epoch": 1.5847683392381424, "grad_norm": 0.6310282349586487, "learning_rate": 3.960451539660084e-06, "loss": 1.3417, "mean_token_accuracy": 0.6760230660438538, "num_tokens": 2420092987.0, "step": 14426 }, { "entropy": 1.7303445041179657, "epoch": 1.5848781961495153, "grad_norm": 0.6532360911369324, "learning_rate": 3.959454047732949e-06, "loss": 1.3444, "mean_token_accuracy": 0.6584073007106781, "num_tokens": 2420258161.0, "step": 14427 }, { "entropy": 1.673458496729533, "epoch": 1.5849880530608882, "grad_norm": 0.6821812391281128, "learning_rate": 3.958456778635922e-06, "loss": 1.2654, "mean_token_accuracy": 0.6753448198239008, "num_tokens": 2420433591.0, "step": 14428 }, { "entropy": 1.679155856370926, "epoch": 1.585097909972261, "grad_norm": 0.5954638123512268, "learning_rate": 3.957459732400566e-06, "loss": 1.3344, "mean_token_accuracy": 0.6624444822470347, "num_tokens": 2420624387.0, "step": 14429 }, { "entropy": 1.627536416053772, "epoch": 1.585207766883634, "grad_norm": 0.729608952999115, "learning_rate": 3.956462909058436e-06, "loss": 1.3367, "mean_token_accuracy": 0.6631862074136734, "num_tokens": 2420779608.0, "step": 14430 }, { "entropy": 1.7547686696052551, "epoch": 1.585317623795007, "grad_norm": 0.7445028424263, "learning_rate": 3.95546630864108e-06, "loss": 1.3996, "mean_token_accuracy": 0.6553547183672587, "num_tokens": 2420925812.0, "step": 14431 }, { "entropy": 1.7932293613751729, "epoch": 1.58542748070638, "grad_norm": 0.7431174516677856, "learning_rate": 3.954469931180042e-06, "loss": 1.3638, "mean_token_accuracy": 0.6611845990022024, "num_tokens": 2421059861.0, "step": 14432 }, { "entropy": 1.6480081578095753, "epoch": 1.5855373376177528, "grad_norm": 0.6314308643341064, "learning_rate": 3.953473776706857e-06, "loss": 1.3646, "mean_token_accuracy": 0.6595342606306076, "num_tokens": 2421221397.0, "step": 14433 }, { "entropy": 1.6911343236764271, "epoch": 1.585647194529126, "grad_norm": 0.653496265411377, "learning_rate": 3.9524778452530476e-06, "loss": 1.429, "mean_token_accuracy": 0.6722802569468816, "num_tokens": 2421386795.0, "step": 14434 }, { "entropy": 1.6934001346429188, "epoch": 1.5857570514404986, "grad_norm": 0.6944672465324402, "learning_rate": 3.951482136850143e-06, "loss": 1.35, "mean_token_accuracy": 0.6598734756310781, "num_tokens": 2421530857.0, "step": 14435 }, { "entropy": 1.7472728689511616, "epoch": 1.5858669083518717, "grad_norm": 0.6578483581542969, "learning_rate": 3.950486651529649e-06, "loss": 1.3643, "mean_token_accuracy": 0.658632829785347, "num_tokens": 2421743676.0, "step": 14436 }, { "entropy": 1.7039150198300679, "epoch": 1.5859767652632446, "grad_norm": 0.7169721722602844, "learning_rate": 3.949491389323079e-06, "loss": 1.3317, "mean_token_accuracy": 0.671579380830129, "num_tokens": 2421913835.0, "step": 14437 }, { "entropy": 1.7325179874897003, "epoch": 1.5860866221746175, "grad_norm": 0.7134926319122314, "learning_rate": 3.948496350261929e-06, "loss": 1.5328, "mean_token_accuracy": 0.6371408551931381, "num_tokens": 2422078510.0, "step": 14438 }, { "entropy": 1.6378303567568462, "epoch": 1.5861964790859906, "grad_norm": 0.63148432970047, "learning_rate": 3.94750153437769e-06, "loss": 1.4693, "mean_token_accuracy": 0.6584180593490601, "num_tokens": 2422262293.0, "step": 14439 }, { "entropy": 1.7055408656597137, "epoch": 1.5863063359973635, "grad_norm": 0.6454849243164062, "learning_rate": 3.94650694170185e-06, "loss": 1.4238, "mean_token_accuracy": 0.6490067690610886, "num_tokens": 2422423182.0, "step": 14440 }, { "entropy": 1.6568194329738617, "epoch": 1.5864161929087364, "grad_norm": 0.61830735206604, "learning_rate": 3.945512572265888e-06, "loss": 1.4239, "mean_token_accuracy": 0.6476400097211202, "num_tokens": 2422619592.0, "step": 14441 }, { "entropy": 1.652705987294515, "epoch": 1.5865260498201095, "grad_norm": 0.6976563930511475, "learning_rate": 3.944518426101275e-06, "loss": 1.3246, "mean_token_accuracy": 0.6634400536616644, "num_tokens": 2422744514.0, "step": 14442 }, { "entropy": 1.7328997552394867, "epoch": 1.5866359067314821, "grad_norm": 0.699158251285553, "learning_rate": 3.943524503239474e-06, "loss": 1.4902, "mean_token_accuracy": 0.6478810012340546, "num_tokens": 2422929691.0, "step": 14443 }, { "entropy": 1.6880051692326863, "epoch": 1.5867457636428552, "grad_norm": 0.6858848333358765, "learning_rate": 3.942530803711941e-06, "loss": 1.2967, "mean_token_accuracy": 0.6767656803131104, "num_tokens": 2423066587.0, "step": 14444 }, { "entropy": 1.6842141250769298, "epoch": 1.5868556205542281, "grad_norm": 0.6382424831390381, "learning_rate": 3.941537327550131e-06, "loss": 1.3361, "mean_token_accuracy": 0.6656038562456766, "num_tokens": 2423196266.0, "step": 14445 }, { "entropy": 1.6592636009057362, "epoch": 1.586965477465601, "grad_norm": 0.6557448506355286, "learning_rate": 3.940544074785483e-06, "loss": 1.3152, "mean_token_accuracy": 0.6723695049683253, "num_tokens": 2423356556.0, "step": 14446 }, { "entropy": 1.7335790693759918, "epoch": 1.5870753343769741, "grad_norm": 0.7295007109642029, "learning_rate": 3.939551045449432e-06, "loss": 1.5265, "mean_token_accuracy": 0.6430895005663236, "num_tokens": 2423517951.0, "step": 14447 }, { "entropy": 1.7022380630175273, "epoch": 1.5871851912883468, "grad_norm": 0.6990877389907837, "learning_rate": 3.938558239573408e-06, "loss": 1.4924, "mean_token_accuracy": 0.6644400457541147, "num_tokens": 2423670051.0, "step": 14448 }, { "entropy": 1.7559547921021779, "epoch": 1.58729504819972, "grad_norm": 0.6705808639526367, "learning_rate": 3.937565657188838e-06, "loss": 1.5399, "mean_token_accuracy": 0.6576424241065979, "num_tokens": 2423823200.0, "step": 14449 }, { "entropy": 1.6872046788533528, "epoch": 1.5874049051110928, "grad_norm": 0.7224948406219482, "learning_rate": 3.93657329832713e-06, "loss": 1.261, "mean_token_accuracy": 0.6700985580682755, "num_tokens": 2423942075.0, "step": 14450 }, { "entropy": 1.707588940858841, "epoch": 1.5875147620224657, "grad_norm": 0.7525602579116821, "learning_rate": 3.935581163019694e-06, "loss": 1.4265, "mean_token_accuracy": 0.6589339872201284, "num_tokens": 2424120103.0, "step": 14451 }, { "entropy": 1.7470983068148296, "epoch": 1.5876246189338388, "grad_norm": 0.8434122800827026, "learning_rate": 3.9345892512979325e-06, "loss": 1.4399, "mean_token_accuracy": 0.6524594177802404, "num_tokens": 2424267471.0, "step": 14452 }, { "entropy": 1.7276219228903453, "epoch": 1.5877344758452117, "grad_norm": 0.6105454564094543, "learning_rate": 3.933597563193234e-06, "loss": 1.2936, "mean_token_accuracy": 0.6729495972394943, "num_tokens": 2424428446.0, "step": 14453 }, { "entropy": 1.6018980145454407, "epoch": 1.5878443327565845, "grad_norm": 0.6616988182067871, "learning_rate": 3.932606098736992e-06, "loss": 1.3709, "mean_token_accuracy": 0.6708630422751108, "num_tokens": 2424558299.0, "step": 14454 }, { "entropy": 1.6875403026739757, "epoch": 1.5879541896679576, "grad_norm": 0.7595418691635132, "learning_rate": 3.931614857960582e-06, "loss": 1.5471, "mean_token_accuracy": 0.6460252776741982, "num_tokens": 2424730586.0, "step": 14455 }, { "entropy": 1.7008586128552754, "epoch": 1.5880640465793303, "grad_norm": 0.7267166972160339, "learning_rate": 3.930623840895374e-06, "loss": 1.2598, "mean_token_accuracy": 0.6835194180409113, "num_tokens": 2424863946.0, "step": 14456 }, { "entropy": 1.7498765190442402, "epoch": 1.5881739034907034, "grad_norm": 0.7018238306045532, "learning_rate": 3.92963304757274e-06, "loss": 1.3719, "mean_token_accuracy": 0.6424062748750051, "num_tokens": 2425008200.0, "step": 14457 }, { "entropy": 1.6699590682983398, "epoch": 1.5882837604020763, "grad_norm": 0.660306453704834, "learning_rate": 3.928642478024032e-06, "loss": 1.3016, "mean_token_accuracy": 0.6737691263357798, "num_tokens": 2425187685.0, "step": 14458 }, { "entropy": 1.7022275626659393, "epoch": 1.5883936173134492, "grad_norm": 0.6317709684371948, "learning_rate": 3.927652132280601e-06, "loss": 1.3297, "mean_token_accuracy": 0.6666052093108495, "num_tokens": 2425320325.0, "step": 14459 }, { "entropy": 1.7465975681940715, "epoch": 1.5885034742248223, "grad_norm": 0.6987557411193848, "learning_rate": 3.926662010373794e-06, "loss": 1.3083, "mean_token_accuracy": 0.6656803041696548, "num_tokens": 2425430057.0, "step": 14460 }, { "entropy": 1.7197660605112712, "epoch": 1.588613331136195, "grad_norm": 0.6313250064849854, "learning_rate": 3.925672112334949e-06, "loss": 1.3808, "mean_token_accuracy": 0.6632373382647833, "num_tokens": 2425565518.0, "step": 14461 }, { "entropy": 1.6192362904548645, "epoch": 1.588723188047568, "grad_norm": 0.6355708241462708, "learning_rate": 3.924682438195394e-06, "loss": 1.2976, "mean_token_accuracy": 0.6806172430515289, "num_tokens": 2425722341.0, "step": 14462 }, { "entropy": 1.7012809813022614, "epoch": 1.588833044958941, "grad_norm": 0.6677663326263428, "learning_rate": 3.92369298798645e-06, "loss": 1.3461, "mean_token_accuracy": 0.6603120565414429, "num_tokens": 2425871674.0, "step": 14463 }, { "entropy": 1.6768560310204823, "epoch": 1.5889429018703138, "grad_norm": 0.6279048919677734, "learning_rate": 3.9227037617394345e-06, "loss": 1.2773, "mean_token_accuracy": 0.6672329306602478, "num_tokens": 2426021027.0, "step": 14464 }, { "entropy": 1.687048117319743, "epoch": 1.589052758781687, "grad_norm": 0.6013140678405762, "learning_rate": 3.921714759485657e-06, "loss": 1.4521, "mean_token_accuracy": 0.6478681514660517, "num_tokens": 2426196122.0, "step": 14465 }, { "entropy": 1.6956083178520203, "epoch": 1.5891626156930598, "grad_norm": 0.6516452431678772, "learning_rate": 3.920725981256416e-06, "loss": 1.4494, "mean_token_accuracy": 0.6545127183198929, "num_tokens": 2426380157.0, "step": 14466 }, { "entropy": 1.6591267784436543, "epoch": 1.5892724726044327, "grad_norm": 0.7242151498794556, "learning_rate": 3.9197374270830095e-06, "loss": 1.3716, "mean_token_accuracy": 0.671110580364863, "num_tokens": 2426507459.0, "step": 14467 }, { "entropy": 1.6566206415494282, "epoch": 1.5893823295158058, "grad_norm": 0.6785814166069031, "learning_rate": 3.918749096996721e-06, "loss": 1.4111, "mean_token_accuracy": 0.6570883542299271, "num_tokens": 2426673369.0, "step": 14468 }, { "entropy": 1.7064830362796783, "epoch": 1.5894921864271785, "grad_norm": 0.5841078758239746, "learning_rate": 3.917760991028835e-06, "loss": 1.5365, "mean_token_accuracy": 0.6401430120070776, "num_tokens": 2426884502.0, "step": 14469 }, { "entropy": 1.7302399973074596, "epoch": 1.5896020433385516, "grad_norm": 0.670065701007843, "learning_rate": 3.9167731092106225e-06, "loss": 1.2846, "mean_token_accuracy": 0.66989433268706, "num_tokens": 2427038641.0, "step": 14470 }, { "entropy": 1.693581352631251, "epoch": 1.5897119002499245, "grad_norm": 0.6922983527183533, "learning_rate": 3.915785451573346e-06, "loss": 1.3616, "mean_token_accuracy": 0.6663850297530493, "num_tokens": 2427197712.0, "step": 14471 }, { "entropy": 1.7170843879381816, "epoch": 1.5898217571612974, "grad_norm": 0.665672779083252, "learning_rate": 3.9147980181482685e-06, "loss": 1.5425, "mean_token_accuracy": 0.6393137524525324, "num_tokens": 2427387540.0, "step": 14472 }, { "entropy": 1.7163531581560771, "epoch": 1.5899316140726705, "grad_norm": 0.719980776309967, "learning_rate": 3.913810808966642e-06, "loss": 1.3993, "mean_token_accuracy": 0.6594651788473129, "num_tokens": 2427546679.0, "step": 14473 }, { "entropy": 1.739640901486079, "epoch": 1.5900414709840431, "grad_norm": 0.7251821756362915, "learning_rate": 3.9128238240597125e-06, "loss": 1.5654, "mean_token_accuracy": 0.6393600652615229, "num_tokens": 2427761624.0, "step": 14474 }, { "entropy": 1.6863900522391002, "epoch": 1.5901513278954162, "grad_norm": 0.5635016560554504, "learning_rate": 3.911837063458712e-06, "loss": 1.3514, "mean_token_accuracy": 0.663020983338356, "num_tokens": 2427936133.0, "step": 14475 }, { "entropy": 1.7012007733186085, "epoch": 1.5902611848067891, "grad_norm": 0.617427408695221, "learning_rate": 3.910850527194878e-06, "loss": 1.3294, "mean_token_accuracy": 0.6652300308148066, "num_tokens": 2428129442.0, "step": 14476 }, { "entropy": 1.6678180694580078, "epoch": 1.590371041718162, "grad_norm": 0.5885007381439209, "learning_rate": 3.9098642152994295e-06, "loss": 1.2837, "mean_token_accuracy": 0.6684055576721827, "num_tokens": 2428271086.0, "step": 14477 }, { "entropy": 1.7171485026677449, "epoch": 1.5904808986295351, "grad_norm": 0.9225579500198364, "learning_rate": 3.90887812780358e-06, "loss": 1.3262, "mean_token_accuracy": 0.6671409706274668, "num_tokens": 2428414176.0, "step": 14478 }, { "entropy": 1.645038495461146, "epoch": 1.590590755540908, "grad_norm": 0.6282086372375488, "learning_rate": 3.907892264738546e-06, "loss": 1.3399, "mean_token_accuracy": 0.6648639589548111, "num_tokens": 2428564196.0, "step": 14479 }, { "entropy": 1.69185275832812, "epoch": 1.590700612452281, "grad_norm": 0.6884506344795227, "learning_rate": 3.9069066261355235e-06, "loss": 1.4349, "mean_token_accuracy": 0.6530876805384954, "num_tokens": 2428744290.0, "step": 14480 }, { "entropy": 1.6777120033899944, "epoch": 1.590810469363654, "grad_norm": 0.6747894883155823, "learning_rate": 3.905921212025712e-06, "loss": 1.2716, "mean_token_accuracy": 0.6761378745237986, "num_tokens": 2428886772.0, "step": 14481 }, { "entropy": 1.7333306272824605, "epoch": 1.5909203262750267, "grad_norm": 0.7000220417976379, "learning_rate": 3.904936022440299e-06, "loss": 1.4266, "mean_token_accuracy": 0.6612260987361273, "num_tokens": 2429022815.0, "step": 14482 }, { "entropy": 1.7049691180388133, "epoch": 1.5910301831863998, "grad_norm": 0.678167462348938, "learning_rate": 3.90395105741046e-06, "loss": 1.5999, "mean_token_accuracy": 0.6353818227847418, "num_tokens": 2429194580.0, "step": 14483 }, { "entropy": 1.7223326464494069, "epoch": 1.5911400400977727, "grad_norm": 0.6117254495620728, "learning_rate": 3.9029663169673726e-06, "loss": 1.4239, "mean_token_accuracy": 0.6493904888629913, "num_tokens": 2429410711.0, "step": 14484 }, { "entropy": 1.721522440512975, "epoch": 1.5912498970091455, "grad_norm": 1.1732066869735718, "learning_rate": 3.901981801142206e-06, "loss": 1.3452, "mean_token_accuracy": 0.6608117173115412, "num_tokens": 2429595212.0, "step": 14485 }, { "entropy": 1.7386885285377502, "epoch": 1.5913597539205186, "grad_norm": 0.7604197263717651, "learning_rate": 3.900997509966116e-06, "loss": 1.2881, "mean_token_accuracy": 0.680141399304072, "num_tokens": 2429725832.0, "step": 14486 }, { "entropy": 1.673358937104543, "epoch": 1.5914696108318913, "grad_norm": 0.676415741443634, "learning_rate": 3.9000134434702546e-06, "loss": 1.3912, "mean_token_accuracy": 0.6618489970763525, "num_tokens": 2429905266.0, "step": 14487 }, { "entropy": 1.7429005404313405, "epoch": 1.5915794677432644, "grad_norm": 0.6558493971824646, "learning_rate": 3.899029601685771e-06, "loss": 1.4027, "mean_token_accuracy": 0.658114567399025, "num_tokens": 2430082783.0, "step": 14488 }, { "entropy": 1.7437707682450612, "epoch": 1.5916893246546373, "grad_norm": 0.7400485277175903, "learning_rate": 3.8980459846438e-06, "loss": 1.3969, "mean_token_accuracy": 0.6684872756401697, "num_tokens": 2430232038.0, "step": 14489 }, { "entropy": 1.6967511971791585, "epoch": 1.5917991815660102, "grad_norm": 0.9593654870986938, "learning_rate": 3.89706259237547e-06, "loss": 1.4267, "mean_token_accuracy": 0.6613242427508036, "num_tokens": 2430407393.0, "step": 14490 }, { "entropy": 1.726237694422404, "epoch": 1.5919090384773833, "grad_norm": 0.6871761083602905, "learning_rate": 3.896079424911913e-06, "loss": 1.3982, "mean_token_accuracy": 0.6530379752318064, "num_tokens": 2430597598.0, "step": 14491 }, { "entropy": 1.660180926322937, "epoch": 1.5920188953887562, "grad_norm": 0.6090943813323975, "learning_rate": 3.895096482284238e-06, "loss": 1.3659, "mean_token_accuracy": 0.6511796166499456, "num_tokens": 2430787465.0, "step": 14492 }, { "entropy": 1.7284020980199177, "epoch": 1.592128752300129, "grad_norm": 0.7778229713439941, "learning_rate": 3.89411376452356e-06, "loss": 1.4237, "mean_token_accuracy": 0.6680044829845428, "num_tokens": 2430917537.0, "step": 14493 }, { "entropy": 1.6784189939498901, "epoch": 1.5922386092115022, "grad_norm": 0.5987834334373474, "learning_rate": 3.8931312716609784e-06, "loss": 1.494, "mean_token_accuracy": 0.627329871058464, "num_tokens": 2431184294.0, "step": 14494 }, { "entropy": 1.7303737103939056, "epoch": 1.5923484661228748, "grad_norm": 0.7138601541519165, "learning_rate": 3.892149003727589e-06, "loss": 1.5162, "mean_token_accuracy": 0.6275907506545385, "num_tokens": 2431349362.0, "step": 14495 }, { "entropy": 1.6458158493041992, "epoch": 1.592458323034248, "grad_norm": 0.6402990818023682, "learning_rate": 3.891166960754479e-06, "loss": 1.2598, "mean_token_accuracy": 0.6761557509501775, "num_tokens": 2431473143.0, "step": 14496 }, { "entropy": 1.6565284033616383, "epoch": 1.5925681799456208, "grad_norm": 0.631554126739502, "learning_rate": 3.890185142772735e-06, "loss": 1.4001, "mean_token_accuracy": 0.6604942381381989, "num_tokens": 2431643673.0, "step": 14497 }, { "entropy": 1.709353893995285, "epoch": 1.5926780368569937, "grad_norm": 0.6308357119560242, "learning_rate": 3.889203549813426e-06, "loss": 1.4271, "mean_token_accuracy": 0.6546412259340286, "num_tokens": 2431809228.0, "step": 14498 }, { "entropy": 1.7170674403508503, "epoch": 1.5927878937683668, "grad_norm": 0.6483573913574219, "learning_rate": 3.88822218190762e-06, "loss": 1.3596, "mean_token_accuracy": 0.6566579739252726, "num_tokens": 2431991019.0, "step": 14499 }, { "entropy": 1.7452252904574077, "epoch": 1.5928977506797395, "grad_norm": 0.6205641627311707, "learning_rate": 3.887241039086378e-06, "loss": 1.458, "mean_token_accuracy": 0.655206615726153, "num_tokens": 2432162864.0, "step": 14500 }, { "entropy": 1.738767405351003, "epoch": 1.5930076075911126, "grad_norm": 0.7384020686149597, "learning_rate": 3.886260121380752e-06, "loss": 1.4563, "mean_token_accuracy": 0.6525509854157766, "num_tokens": 2432321317.0, "step": 14501 }, { "entropy": 1.7347382108370464, "epoch": 1.5931174645024855, "grad_norm": 0.6322787404060364, "learning_rate": 3.88527942882179e-06, "loss": 1.3526, "mean_token_accuracy": 0.6609780540068945, "num_tokens": 2432460919.0, "step": 14502 }, { "entropy": 1.6986276010672252, "epoch": 1.5932273214138584, "grad_norm": 0.7408791780471802, "learning_rate": 3.884298961440523e-06, "loss": 1.3952, "mean_token_accuracy": 0.6573623418807983, "num_tokens": 2432607846.0, "step": 14503 }, { "entropy": 1.69478377699852, "epoch": 1.5933371783252315, "grad_norm": 0.6527783274650574, "learning_rate": 3.883318719267989e-06, "loss": 1.3336, "mean_token_accuracy": 0.6538981248935064, "num_tokens": 2432812759.0, "step": 14504 }, { "entropy": 1.6673340400060017, "epoch": 1.5934470352366044, "grad_norm": 0.7707885503768921, "learning_rate": 3.8823387023352125e-06, "loss": 1.4294, "mean_token_accuracy": 0.645869846145312, "num_tokens": 2432981544.0, "step": 14505 }, { "entropy": 1.634259045124054, "epoch": 1.5935568921479772, "grad_norm": 0.7244494557380676, "learning_rate": 3.881358910673208e-06, "loss": 1.3827, "mean_token_accuracy": 0.6569319466749827, "num_tokens": 2433143702.0, "step": 14506 }, { "entropy": 1.6752298176288605, "epoch": 1.5936667490593504, "grad_norm": 0.6288512349128723, "learning_rate": 3.880379344312985e-06, "loss": 1.305, "mean_token_accuracy": 0.6659407715002695, "num_tokens": 2433308563.0, "step": 14507 }, { "entropy": 1.6835540930430095, "epoch": 1.593776605970723, "grad_norm": 0.7133122682571411, "learning_rate": 3.879400003285551e-06, "loss": 1.4305, "mean_token_accuracy": 0.663516491651535, "num_tokens": 2433474529.0, "step": 14508 }, { "entropy": 1.713885635137558, "epoch": 1.5938864628820961, "grad_norm": 0.6772231459617615, "learning_rate": 3.878420887621894e-06, "loss": 1.2612, "mean_token_accuracy": 0.6778315901756287, "num_tokens": 2433580636.0, "step": 14509 }, { "entropy": 1.64861661195755, "epoch": 1.593996319793469, "grad_norm": 0.6854017972946167, "learning_rate": 3.8774419973530096e-06, "loss": 1.2963, "mean_token_accuracy": 0.6728779971599579, "num_tokens": 2433711203.0, "step": 14510 }, { "entropy": 1.6567076245943706, "epoch": 1.594106176704842, "grad_norm": 0.6105472445487976, "learning_rate": 3.876463332509878e-06, "loss": 1.3891, "mean_token_accuracy": 0.6490083237489065, "num_tokens": 2433910114.0, "step": 14511 }, { "entropy": 1.728460282087326, "epoch": 1.594216033616215, "grad_norm": 0.6174923777580261, "learning_rate": 3.8754848931234675e-06, "loss": 1.3485, "mean_token_accuracy": 0.6621581812699636, "num_tokens": 2434064870.0, "step": 14512 }, { "entropy": 1.686330407857895, "epoch": 1.5943258905275877, "grad_norm": 0.7254580855369568, "learning_rate": 3.8745066792247535e-06, "loss": 1.4355, "mean_token_accuracy": 0.6480583598216375, "num_tokens": 2434250206.0, "step": 14513 }, { "entropy": 1.712296078602473, "epoch": 1.5944357474389608, "grad_norm": 0.604725182056427, "learning_rate": 3.873528690844691e-06, "loss": 1.6318, "mean_token_accuracy": 0.6211173211534818, "num_tokens": 2434550589.0, "step": 14514 }, { "entropy": 1.736443022886912, "epoch": 1.5945456043503337, "grad_norm": 0.7887392640113831, "learning_rate": 3.872550928014233e-06, "loss": 1.4507, "mean_token_accuracy": 0.6484967370827993, "num_tokens": 2434689712.0, "step": 14515 }, { "entropy": 1.6901069581508636, "epoch": 1.5946554612617065, "grad_norm": 0.6353496313095093, "learning_rate": 3.871573390764326e-06, "loss": 1.3398, "mean_token_accuracy": 0.664913609623909, "num_tokens": 2434821889.0, "step": 14516 }, { "entropy": 1.737305094798406, "epoch": 1.5947653181730796, "grad_norm": 0.7598763704299927, "learning_rate": 3.870596079125911e-06, "loss": 1.6029, "mean_token_accuracy": 0.6363438367843628, "num_tokens": 2434981702.0, "step": 14517 }, { "entropy": 1.713841090599696, "epoch": 1.5948751750844525, "grad_norm": 0.7490180134773254, "learning_rate": 3.869618993129919e-06, "loss": 1.4271, "mean_token_accuracy": 0.6571142375469208, "num_tokens": 2435130128.0, "step": 14518 }, { "entropy": 1.7111783623695374, "epoch": 1.5949850319958254, "grad_norm": 0.6395829916000366, "learning_rate": 3.868642132807268e-06, "loss": 1.5478, "mean_token_accuracy": 0.6382935494184494, "num_tokens": 2435341990.0, "step": 14519 }, { "entropy": 1.7345607578754425, "epoch": 1.5950948889071985, "grad_norm": 0.6980369687080383, "learning_rate": 3.8676654981888835e-06, "loss": 1.453, "mean_token_accuracy": 0.6387426902850469, "num_tokens": 2435519614.0, "step": 14520 }, { "entropy": 1.657178282737732, "epoch": 1.5952047458185712, "grad_norm": 0.694017231464386, "learning_rate": 3.866689089305671e-06, "loss": 1.4342, "mean_token_accuracy": 0.6631875882546107, "num_tokens": 2435709231.0, "step": 14521 }, { "entropy": 1.6879088878631592, "epoch": 1.5953146027299443, "grad_norm": 0.7356616854667664, "learning_rate": 3.865712906188535e-06, "loss": 1.3698, "mean_token_accuracy": 0.6680235962073008, "num_tokens": 2435913341.0, "step": 14522 }, { "entropy": 1.6702347894509633, "epoch": 1.5954244596413172, "grad_norm": 0.6752198338508606, "learning_rate": 3.8647369488683725e-06, "loss": 1.3957, "mean_token_accuracy": 0.6607193052768707, "num_tokens": 2436123537.0, "step": 14523 }, { "entropy": 1.7805339296658833, "epoch": 1.59553431655269, "grad_norm": 0.6627401113510132, "learning_rate": 3.863761217376066e-06, "loss": 1.4888, "mean_token_accuracy": 0.6396598418553671, "num_tokens": 2436306893.0, "step": 14524 }, { "entropy": 1.6623384753863018, "epoch": 1.5956441734640632, "grad_norm": 0.702000617980957, "learning_rate": 3.862785711742505e-06, "loss": 1.4121, "mean_token_accuracy": 0.669417624672254, "num_tokens": 2436502954.0, "step": 14525 }, { "entropy": 1.6859069367249806, "epoch": 1.5957540303754358, "grad_norm": 0.6549301743507385, "learning_rate": 3.861810431998561e-06, "loss": 1.3679, "mean_token_accuracy": 0.6617111215988795, "num_tokens": 2436663539.0, "step": 14526 }, { "entropy": 1.7175857424736023, "epoch": 1.595863887286809, "grad_norm": 0.6823814511299133, "learning_rate": 3.860835378175095e-06, "loss": 1.3735, "mean_token_accuracy": 0.6544593870639801, "num_tokens": 2436807795.0, "step": 14527 }, { "entropy": 1.638688455025355, "epoch": 1.5959737441981818, "grad_norm": 0.7116491794586182, "learning_rate": 3.859860550302975e-06, "loss": 1.4848, "mean_token_accuracy": 0.6531452437241873, "num_tokens": 2436952648.0, "step": 14528 }, { "entropy": 1.8078931868076324, "epoch": 1.5960836011095547, "grad_norm": 0.7018418312072754, "learning_rate": 3.858885948413053e-06, "loss": 1.4072, "mean_token_accuracy": 0.6488349239031473, "num_tokens": 2437119533.0, "step": 14529 }, { "entropy": 1.682287057240804, "epoch": 1.5961934580209278, "grad_norm": 0.6741816401481628, "learning_rate": 3.857911572536171e-06, "loss": 1.4283, "mean_token_accuracy": 0.6353452255328497, "num_tokens": 2437396152.0, "step": 14530 }, { "entropy": 1.7795856694380443, "epoch": 1.5963033149323007, "grad_norm": 0.6997506022453308, "learning_rate": 3.8569374227031685e-06, "loss": 1.5904, "mean_token_accuracy": 0.6392437120278677, "num_tokens": 2437559139.0, "step": 14531 }, { "entropy": 1.6715736190478008, "epoch": 1.5964131718436736, "grad_norm": 0.7329205870628357, "learning_rate": 3.855963498944881e-06, "loss": 1.4854, "mean_token_accuracy": 0.6508821298678716, "num_tokens": 2437753550.0, "step": 14532 }, { "entropy": 1.7069322069485982, "epoch": 1.5965230287550467, "grad_norm": 0.6997405290603638, "learning_rate": 3.854989801292126e-06, "loss": 1.2177, "mean_token_accuracy": 0.6853279570738474, "num_tokens": 2437861866.0, "step": 14533 }, { "entropy": 1.7327150007088978, "epoch": 1.5966328856664194, "grad_norm": 0.8832059502601624, "learning_rate": 3.854016329775727e-06, "loss": 1.4684, "mean_token_accuracy": 0.6543268064657847, "num_tokens": 2438062320.0, "step": 14534 }, { "entropy": 1.7514649629592896, "epoch": 1.5967427425777925, "grad_norm": 0.5511615872383118, "learning_rate": 3.853043084426491e-06, "loss": 1.404, "mean_token_accuracy": 0.6429890592892965, "num_tokens": 2438275800.0, "step": 14535 }, { "entropy": 1.7066907584667206, "epoch": 1.5968525994891654, "grad_norm": 0.8445066213607788, "learning_rate": 3.852070065275219e-06, "loss": 1.1934, "mean_token_accuracy": 0.6857452293237051, "num_tokens": 2438398233.0, "step": 14536 }, { "entropy": 1.7238865693410237, "epoch": 1.5969624564005382, "grad_norm": 0.8096691966056824, "learning_rate": 3.85109727235271e-06, "loss": 1.3924, "mean_token_accuracy": 0.6637987395127615, "num_tokens": 2438542858.0, "step": 14537 }, { "entropy": 1.6195646623770397, "epoch": 1.5970723133119114, "grad_norm": 0.6838819980621338, "learning_rate": 3.8501247056897516e-06, "loss": 1.4494, "mean_token_accuracy": 0.6555087268352509, "num_tokens": 2438704318.0, "step": 14538 }, { "entropy": 1.7541901965936024, "epoch": 1.597182170223284, "grad_norm": 0.7576407790184021, "learning_rate": 3.849152365317122e-06, "loss": 1.4994, "mean_token_accuracy": 0.6444768408934275, "num_tokens": 2438875912.0, "step": 14539 }, { "entropy": 1.7162339687347412, "epoch": 1.5972920271346571, "grad_norm": 0.7209724187850952, "learning_rate": 3.848180251265598e-06, "loss": 1.509, "mean_token_accuracy": 0.6356743176778158, "num_tokens": 2439050609.0, "step": 14540 }, { "entropy": 1.69766765832901, "epoch": 1.59740188404603, "grad_norm": 0.7323725819587708, "learning_rate": 3.847208363565948e-06, "loss": 1.2303, "mean_token_accuracy": 0.681826040148735, "num_tokens": 2439160195.0, "step": 14541 }, { "entropy": 1.7064706285794575, "epoch": 1.597511740957403, "grad_norm": 0.7907741665840149, "learning_rate": 3.84623670224893e-06, "loss": 1.2365, "mean_token_accuracy": 0.673534115155538, "num_tokens": 2439296809.0, "step": 14542 }, { "entropy": 1.7028611103693645, "epoch": 1.597621597868776, "grad_norm": 0.6134085059165955, "learning_rate": 3.845265267345295e-06, "loss": 1.3897, "mean_token_accuracy": 0.6668266952037811, "num_tokens": 2439444265.0, "step": 14543 }, { "entropy": 1.7380881508191426, "epoch": 1.5977314547801489, "grad_norm": 0.668287456035614, "learning_rate": 3.844294058885793e-06, "loss": 1.3247, "mean_token_accuracy": 0.6577390929063162, "num_tokens": 2439575203.0, "step": 14544 }, { "entropy": 1.7180090347925823, "epoch": 1.5978413116915218, "grad_norm": 0.6699938774108887, "learning_rate": 3.843323076901159e-06, "loss": 1.3547, "mean_token_accuracy": 0.6593746840953827, "num_tokens": 2439717026.0, "step": 14545 }, { "entropy": 1.7316296100616455, "epoch": 1.5979511686028949, "grad_norm": 0.7291305661201477, "learning_rate": 3.842352321422122e-06, "loss": 1.5341, "mean_token_accuracy": 0.6648634423812231, "num_tokens": 2439859920.0, "step": 14546 }, { "entropy": 1.7074800829092662, "epoch": 1.5980610255142675, "grad_norm": 0.7385088205337524, "learning_rate": 3.841381792479412e-06, "loss": 1.4818, "mean_token_accuracy": 0.641268327832222, "num_tokens": 2440079130.0, "step": 14547 }, { "entropy": 1.7116615772247314, "epoch": 1.5981708824256406, "grad_norm": 0.6778597831726074, "learning_rate": 3.840411490103739e-06, "loss": 1.4609, "mean_token_accuracy": 0.6559257407983144, "num_tokens": 2440223981.0, "step": 14548 }, { "entropy": 1.7189983030160267, "epoch": 1.5982807393370135, "grad_norm": 0.5723182559013367, "learning_rate": 3.83944141432582e-06, "loss": 1.437, "mean_token_accuracy": 0.6461120347181956, "num_tokens": 2440395440.0, "step": 14549 }, { "entropy": 1.7937857309977214, "epoch": 1.5983905962483864, "grad_norm": 1.1871225833892822, "learning_rate": 3.838471565176353e-06, "loss": 1.5759, "mean_token_accuracy": 0.6486638983090719, "num_tokens": 2440517170.0, "step": 14550 }, { "entropy": 1.674616406361262, "epoch": 1.5985004531597595, "grad_norm": 0.5636810064315796, "learning_rate": 3.837501942686031e-06, "loss": 1.2935, "mean_token_accuracy": 0.6660540401935577, "num_tokens": 2440713057.0, "step": 14551 }, { "entropy": 1.5875491201877594, "epoch": 1.5986103100711322, "grad_norm": 0.7079348564147949, "learning_rate": 3.836532546885546e-06, "loss": 1.3004, "mean_token_accuracy": 0.6757344851891199, "num_tokens": 2440867258.0, "step": 14552 }, { "entropy": 1.7332369486490886, "epoch": 1.5987201669825053, "grad_norm": 0.6799494624137878, "learning_rate": 3.83556337780558e-06, "loss": 1.4675, "mean_token_accuracy": 0.6664691468079885, "num_tokens": 2441018581.0, "step": 14553 }, { "entropy": 1.7250041862328847, "epoch": 1.5988300238938782, "grad_norm": 0.6802073121070862, "learning_rate": 3.834594435476805e-06, "loss": 1.4498, "mean_token_accuracy": 0.6497951696316401, "num_tokens": 2441215526.0, "step": 14554 }, { "entropy": 1.6998497645060222, "epoch": 1.598939880805251, "grad_norm": 0.8127340078353882, "learning_rate": 3.8336257199298845e-06, "loss": 1.4345, "mean_token_accuracy": 0.6507293184598287, "num_tokens": 2441379883.0, "step": 14555 }, { "entropy": 1.7047206560770671, "epoch": 1.5990497377166242, "grad_norm": 0.6116032004356384, "learning_rate": 3.832657231195483e-06, "loss": 1.4392, "mean_token_accuracy": 0.6522220075130463, "num_tokens": 2441556694.0, "step": 14556 }, { "entropy": 1.6925352116425831, "epoch": 1.599159594627997, "grad_norm": 0.6775454878807068, "learning_rate": 3.83168896930425e-06, "loss": 1.3527, "mean_token_accuracy": 0.6680527776479721, "num_tokens": 2441706254.0, "step": 14557 }, { "entropy": 1.695090264081955, "epoch": 1.59926945153937, "grad_norm": 0.633358359336853, "learning_rate": 3.8307209342868294e-06, "loss": 1.3081, "mean_token_accuracy": 0.664101724823316, "num_tokens": 2441841173.0, "step": 14558 }, { "entropy": 1.70580060283343, "epoch": 1.599379308450743, "grad_norm": 0.7494609951972961, "learning_rate": 3.8297531261738626e-06, "loss": 1.37, "mean_token_accuracy": 0.6554321199655533, "num_tokens": 2441999550.0, "step": 14559 }, { "entropy": 1.682591011126836, "epoch": 1.5994891653621157, "grad_norm": 0.6495206952095032, "learning_rate": 3.828785544995977e-06, "loss": 1.4213, "mean_token_accuracy": 0.6555017977952957, "num_tokens": 2442176523.0, "step": 14560 }, { "entropy": 1.7068423926830292, "epoch": 1.5995990222734888, "grad_norm": 0.7015541195869446, "learning_rate": 3.827818190783799e-06, "loss": 1.3814, "mean_token_accuracy": 0.6704583317041397, "num_tokens": 2442314778.0, "step": 14561 }, { "entropy": 1.6887734134991963, "epoch": 1.5997088791848617, "grad_norm": 0.5791016817092896, "learning_rate": 3.826851063567943e-06, "loss": 1.4871, "mean_token_accuracy": 0.6437655538320541, "num_tokens": 2442514913.0, "step": 14562 }, { "entropy": 1.7405428489049275, "epoch": 1.5998187360962346, "grad_norm": 0.7147680521011353, "learning_rate": 3.825884163379017e-06, "loss": 1.3649, "mean_token_accuracy": 0.6673022856314977, "num_tokens": 2442648604.0, "step": 14563 }, { "entropy": 1.7529491583506267, "epoch": 1.5999285930076077, "grad_norm": 0.5654240250587463, "learning_rate": 3.824917490247625e-06, "loss": 1.4984, "mean_token_accuracy": 0.6382785141468048, "num_tokens": 2442869846.0, "step": 14564 }, { "entropy": 1.6878890097141266, "epoch": 1.6000384499189804, "grad_norm": 0.7083843350410461, "learning_rate": 3.823951044204361e-06, "loss": 1.3488, "mean_token_accuracy": 0.6723784406979879, "num_tokens": 2443030206.0, "step": 14565 }, { "entropy": 1.7418665091196697, "epoch": 1.6001483068303535, "grad_norm": 0.6479423642158508, "learning_rate": 3.822984825279814e-06, "loss": 1.3768, "mean_token_accuracy": 0.6521689047416052, "num_tokens": 2443196472.0, "step": 14566 }, { "entropy": 1.7459342181682587, "epoch": 1.6002581637417264, "grad_norm": 0.758888304233551, "learning_rate": 3.822018833504564e-06, "loss": 1.2715, "mean_token_accuracy": 0.6687377045551935, "num_tokens": 2443326770.0, "step": 14567 }, { "entropy": 1.6872256497542064, "epoch": 1.6003680206530992, "grad_norm": 0.5410403609275818, "learning_rate": 3.821053068909182e-06, "loss": 1.4807, "mean_token_accuracy": 0.6485924671093622, "num_tokens": 2443565031.0, "step": 14568 }, { "entropy": 1.6879334946473439, "epoch": 1.6004778775644724, "grad_norm": 0.68756103515625, "learning_rate": 3.820087531524236e-06, "loss": 1.3248, "mean_token_accuracy": 0.6751703520615896, "num_tokens": 2443702467.0, "step": 14569 }, { "entropy": 1.7088148792584736, "epoch": 1.6005877344758452, "grad_norm": 0.6238970756530762, "learning_rate": 3.819122221380284e-06, "loss": 1.3777, "mean_token_accuracy": 0.6487255543470383, "num_tokens": 2443858226.0, "step": 14570 }, { "entropy": 1.7447780867417653, "epoch": 1.6006975913872181, "grad_norm": 0.7313827872276306, "learning_rate": 3.818157138507878e-06, "loss": 1.4983, "mean_token_accuracy": 0.6436322331428528, "num_tokens": 2444051433.0, "step": 14571 }, { "entropy": 1.6420711676279705, "epoch": 1.6008074482985912, "grad_norm": 0.7198586463928223, "learning_rate": 3.817192282937561e-06, "loss": 1.4051, "mean_token_accuracy": 0.6673461546500524, "num_tokens": 2444215820.0, "step": 14572 }, { "entropy": 1.7294553816318512, "epoch": 1.600917305209964, "grad_norm": 0.7166528105735779, "learning_rate": 3.816227654699873e-06, "loss": 1.5608, "mean_token_accuracy": 0.6288095712661743, "num_tokens": 2444425376.0, "step": 14573 }, { "entropy": 1.750697026650111, "epoch": 1.601027162121337, "grad_norm": 0.6765901446342468, "learning_rate": 3.815263253825344e-06, "loss": 1.3493, "mean_token_accuracy": 0.6524553100268046, "num_tokens": 2444541889.0, "step": 14574 }, { "entropy": 1.749423881371816, "epoch": 1.6011370190327099, "grad_norm": 0.7044322490692139, "learning_rate": 3.8142990803444935e-06, "loss": 1.4577, "mean_token_accuracy": 0.6440982123215994, "num_tokens": 2444719105.0, "step": 14575 }, { "entropy": 1.7084167798360188, "epoch": 1.6012468759440828, "grad_norm": 0.7361584305763245, "learning_rate": 3.8133351342878393e-06, "loss": 1.5912, "mean_token_accuracy": 0.6365940769513448, "num_tokens": 2444993446.0, "step": 14576 }, { "entropy": 1.7147212425867717, "epoch": 1.6013567328554559, "grad_norm": 0.6779691576957703, "learning_rate": 3.8123714156858886e-06, "loss": 1.2403, "mean_token_accuracy": 0.6808453897635142, "num_tokens": 2445124199.0, "step": 14577 }, { "entropy": 1.671504944562912, "epoch": 1.6014665897668285, "grad_norm": 0.6467366814613342, "learning_rate": 3.8114079245691473e-06, "loss": 1.3685, "mean_token_accuracy": 0.6617433130741119, "num_tokens": 2445279228.0, "step": 14578 }, { "entropy": 1.7411832610766094, "epoch": 1.6015764466782016, "grad_norm": 0.6643005609512329, "learning_rate": 3.810444660968104e-06, "loss": 1.4469, "mean_token_accuracy": 0.6542666604121526, "num_tokens": 2445408287.0, "step": 14579 }, { "entropy": 1.605327715476354, "epoch": 1.6016863035895745, "grad_norm": 0.6111404299736023, "learning_rate": 3.809481624913246e-06, "loss": 1.3451, "mean_token_accuracy": 0.6663858542839686, "num_tokens": 2445582266.0, "step": 14580 }, { "entropy": 1.7256748775641124, "epoch": 1.6017961605009474, "grad_norm": 0.6152947545051575, "learning_rate": 3.8085188164350574e-06, "loss": 1.3795, "mean_token_accuracy": 0.651614765326182, "num_tokens": 2445723827.0, "step": 14581 }, { "entropy": 1.7151028116544087, "epoch": 1.6019060174123205, "grad_norm": 0.6862490773200989, "learning_rate": 3.8075562355640066e-06, "loss": 1.3588, "mean_token_accuracy": 0.6671392222245535, "num_tokens": 2445884515.0, "step": 14582 }, { "entropy": 1.7010668416817982, "epoch": 1.6020158743236934, "grad_norm": 0.6362306475639343, "learning_rate": 3.806593882330558e-06, "loss": 1.5877, "mean_token_accuracy": 0.6236594518025717, "num_tokens": 2446102916.0, "step": 14583 }, { "entropy": 1.7054578860600789, "epoch": 1.6021257312350663, "grad_norm": 0.686418890953064, "learning_rate": 3.8056317567651723e-06, "loss": 1.4425, "mean_token_accuracy": 0.6407395700613657, "num_tokens": 2446306735.0, "step": 14584 }, { "entropy": 1.720127671957016, "epoch": 1.6022355881464394, "grad_norm": 0.706402599811554, "learning_rate": 3.804669858898301e-06, "loss": 1.4191, "mean_token_accuracy": 0.6532514144976934, "num_tokens": 2446492829.0, "step": 14585 }, { "entropy": 1.673651397228241, "epoch": 1.602345445057812, "grad_norm": 0.6456170678138733, "learning_rate": 3.803708188760387e-06, "loss": 1.3936, "mean_token_accuracy": 0.6565722078084946, "num_tokens": 2446673307.0, "step": 14586 }, { "entropy": 1.7640669147173564, "epoch": 1.6024553019691852, "grad_norm": 0.7029922604560852, "learning_rate": 3.8027467463818636e-06, "loss": 1.4321, "mean_token_accuracy": 0.6552887161572775, "num_tokens": 2446854256.0, "step": 14587 }, { "entropy": 1.7160078982512157, "epoch": 1.602565158880558, "grad_norm": 0.6183205842971802, "learning_rate": 3.801785531793164e-06, "loss": 1.5031, "mean_token_accuracy": 0.6445142378409704, "num_tokens": 2447091822.0, "step": 14588 }, { "entropy": 1.7458133002122243, "epoch": 1.602675015791931, "grad_norm": 0.6344084739685059, "learning_rate": 3.8008245450247085e-06, "loss": 1.4217, "mean_token_accuracy": 0.6480998347202936, "num_tokens": 2447275539.0, "step": 14589 }, { "entropy": 1.7621017297108967, "epoch": 1.602784872703304, "grad_norm": 0.6531854867935181, "learning_rate": 3.799863786106912e-06, "loss": 1.4966, "mean_token_accuracy": 0.6346394668022791, "num_tokens": 2447496350.0, "step": 14590 }, { "entropy": 1.6988888482252757, "epoch": 1.602894729614677, "grad_norm": 0.6750460267066956, "learning_rate": 3.798903255070184e-06, "loss": 1.4873, "mean_token_accuracy": 0.6548330287138621, "num_tokens": 2447678997.0, "step": 14591 }, { "entropy": 1.6446965634822845, "epoch": 1.6030045865260498, "grad_norm": 0.7351519465446472, "learning_rate": 3.79794295194492e-06, "loss": 1.3521, "mean_token_accuracy": 0.6712757696708044, "num_tokens": 2447824026.0, "step": 14592 }, { "entropy": 1.701996664206187, "epoch": 1.6031144434374227, "grad_norm": 0.6086641550064087, "learning_rate": 3.796982876761518e-06, "loss": 1.3741, "mean_token_accuracy": 0.6470549603303274, "num_tokens": 2448040359.0, "step": 14593 }, { "entropy": 1.7051210800806682, "epoch": 1.6032243003487956, "grad_norm": 0.6576172113418579, "learning_rate": 3.7960230295503636e-06, "loss": 1.4443, "mean_token_accuracy": 0.6446033616860708, "num_tokens": 2448249961.0, "step": 14594 }, { "entropy": 1.6408964693546295, "epoch": 1.6033341572601687, "grad_norm": 0.6409726738929749, "learning_rate": 3.7950634103418307e-06, "loss": 1.3604, "mean_token_accuracy": 0.6675408234198889, "num_tokens": 2448420205.0, "step": 14595 }, { "entropy": 1.7251879175504048, "epoch": 1.6034440141715416, "grad_norm": 0.5918386578559875, "learning_rate": 3.7941040191662943e-06, "loss": 1.5234, "mean_token_accuracy": 0.6395444025595983, "num_tokens": 2448625696.0, "step": 14596 }, { "entropy": 1.6319251755873363, "epoch": 1.6035538710829145, "grad_norm": 0.9376781582832336, "learning_rate": 3.793144856054122e-06, "loss": 1.3813, "mean_token_accuracy": 0.6574216683705648, "num_tokens": 2448787918.0, "step": 14597 }, { "entropy": 1.6945286591847737, "epoch": 1.6036637279942876, "grad_norm": 0.7240238785743713, "learning_rate": 3.7921859210356664e-06, "loss": 1.4856, "mean_token_accuracy": 0.6500385651985804, "num_tokens": 2448973971.0, "step": 14598 }, { "entropy": 1.6881266335646312, "epoch": 1.6037735849056602, "grad_norm": 0.6214616298675537, "learning_rate": 3.7912272141412767e-06, "loss": 1.3828, "mean_token_accuracy": 0.6629806409279505, "num_tokens": 2449114059.0, "step": 14599 }, { "entropy": 1.6800266206264496, "epoch": 1.6038834418170334, "grad_norm": 0.8281370401382446, "learning_rate": 3.7902687354012998e-06, "loss": 1.4063, "mean_token_accuracy": 0.6595364113648733, "num_tokens": 2449278084.0, "step": 14600 }, { "entropy": 1.739363302787145, "epoch": 1.6039932987284062, "grad_norm": 0.6671850085258484, "learning_rate": 3.789310484846065e-06, "loss": 1.4597, "mean_token_accuracy": 0.6446760495503744, "num_tokens": 2449536151.0, "step": 14601 }, { "entropy": 1.6971477965513866, "epoch": 1.6041031556397791, "grad_norm": 0.7448716163635254, "learning_rate": 3.7883524625059075e-06, "loss": 1.2988, "mean_token_accuracy": 0.6720754504203796, "num_tokens": 2449693777.0, "step": 14602 }, { "entropy": 1.7345844606558483, "epoch": 1.6042130125511522, "grad_norm": 0.7584978342056274, "learning_rate": 3.7873946684111452e-06, "loss": 1.2274, "mean_token_accuracy": 0.6714150657256445, "num_tokens": 2449794928.0, "step": 14603 }, { "entropy": 1.7422133386135101, "epoch": 1.6043228694625251, "grad_norm": 0.7101638317108154, "learning_rate": 3.78643710259209e-06, "loss": 1.3554, "mean_token_accuracy": 0.6665351639191309, "num_tokens": 2449911300.0, "step": 14604 }, { "entropy": 1.7503991921742756, "epoch": 1.604432726373898, "grad_norm": 0.716066837310791, "learning_rate": 3.78547976507905e-06, "loss": 1.2696, "mean_token_accuracy": 0.6812546650568644, "num_tokens": 2450077891.0, "step": 14605 }, { "entropy": 1.7393498420715332, "epoch": 1.6045425832852709, "grad_norm": 0.7485668659210205, "learning_rate": 3.7845226559023256e-06, "loss": 1.3663, "mean_token_accuracy": 0.6589196572701136, "num_tokens": 2450221382.0, "step": 14606 }, { "entropy": 1.73830442627271, "epoch": 1.6046524401966438, "grad_norm": 0.6657488346099854, "learning_rate": 3.783565775092206e-06, "loss": 1.4914, "mean_token_accuracy": 0.6288889646530151, "num_tokens": 2450418829.0, "step": 14607 }, { "entropy": 1.659916838010152, "epoch": 1.6047622971080169, "grad_norm": 0.7344122529029846, "learning_rate": 3.7826091226789772e-06, "loss": 1.4672, "mean_token_accuracy": 0.6499627828598022, "num_tokens": 2450595410.0, "step": 14608 }, { "entropy": 1.7361893852551777, "epoch": 1.6048721540193898, "grad_norm": 0.7069867253303528, "learning_rate": 3.7816526986929203e-06, "loss": 1.3449, "mean_token_accuracy": 0.657584935426712, "num_tokens": 2450744883.0, "step": 14609 }, { "entropy": 1.7976744870344799, "epoch": 1.6049820109307626, "grad_norm": 0.6963937878608704, "learning_rate": 3.780696503164303e-06, "loss": 1.5181, "mean_token_accuracy": 0.6381178746620814, "num_tokens": 2450900651.0, "step": 14610 }, { "entropy": 1.6914178828398387, "epoch": 1.6050918678421358, "grad_norm": 0.6201428771018982, "learning_rate": 3.7797405361233853e-06, "loss": 1.5151, "mean_token_accuracy": 0.6491784354050955, "num_tokens": 2451111106.0, "step": 14611 }, { "entropy": 1.6507742206255596, "epoch": 1.6052017247535084, "grad_norm": 0.737235963344574, "learning_rate": 3.7787847976004277e-06, "loss": 1.2467, "mean_token_accuracy": 0.6868196477492651, "num_tokens": 2451234221.0, "step": 14612 }, { "entropy": 1.661937306324641, "epoch": 1.6053115816648815, "grad_norm": 0.6396856904029846, "learning_rate": 3.7778292876256762e-06, "loss": 1.4216, "mean_token_accuracy": 0.6528457701206207, "num_tokens": 2451452229.0, "step": 14613 }, { "entropy": 1.6698509057362874, "epoch": 1.6054214385762544, "grad_norm": 0.7439182996749878, "learning_rate": 3.776874006229376e-06, "loss": 1.3751, "mean_token_accuracy": 0.6656199296315511, "num_tokens": 2451611210.0, "step": 14614 }, { "entropy": 1.7334049840768178, "epoch": 1.6055312954876273, "grad_norm": 0.7342074513435364, "learning_rate": 3.7759189534417575e-06, "loss": 1.3407, "mean_token_accuracy": 0.6541020025809606, "num_tokens": 2451748585.0, "step": 14615 }, { "entropy": 1.6244231363137562, "epoch": 1.6056411523990004, "grad_norm": 0.6952174305915833, "learning_rate": 3.774964129293046e-06, "loss": 1.3148, "mean_token_accuracy": 0.6725502957900366, "num_tokens": 2451912029.0, "step": 14616 }, { "entropy": 1.6656245787938435, "epoch": 1.6057510093103733, "grad_norm": 0.6574463248252869, "learning_rate": 3.7740095338134684e-06, "loss": 1.3002, "mean_token_accuracy": 0.67449023326238, "num_tokens": 2452048004.0, "step": 14617 }, { "entropy": 1.643865704536438, "epoch": 1.6058608662217462, "grad_norm": 0.814515233039856, "learning_rate": 3.7730551670332317e-06, "loss": 1.4194, "mean_token_accuracy": 0.6575757165749868, "num_tokens": 2452173933.0, "step": 14618 }, { "entropy": 1.7123675048351288, "epoch": 1.605970723133119, "grad_norm": 0.6425331830978394, "learning_rate": 3.7721010289825398e-06, "loss": 1.3976, "mean_token_accuracy": 0.6674291491508484, "num_tokens": 2452314391.0, "step": 14619 }, { "entropy": 1.688428372144699, "epoch": 1.606080580044492, "grad_norm": 0.6733593344688416, "learning_rate": 3.771147119691595e-06, "loss": 1.3977, "mean_token_accuracy": 0.6623414903879166, "num_tokens": 2452466205.0, "step": 14620 }, { "entropy": 1.7384653389453888, "epoch": 1.606190436955865, "grad_norm": 0.7183213829994202, "learning_rate": 3.7701934391905883e-06, "loss": 1.5537, "mean_token_accuracy": 0.6303740590810776, "num_tokens": 2452659090.0, "step": 14621 }, { "entropy": 1.698512186606725, "epoch": 1.606300293867238, "grad_norm": 0.6138864755630493, "learning_rate": 3.769239987509701e-06, "loss": 1.4726, "mean_token_accuracy": 0.6334889431794485, "num_tokens": 2452852526.0, "step": 14622 }, { "entropy": 1.7259888648986816, "epoch": 1.6064101507786108, "grad_norm": 0.6284215450286865, "learning_rate": 3.768286764679109e-06, "loss": 1.3779, "mean_token_accuracy": 0.652561808625857, "num_tokens": 2453015973.0, "step": 14623 }, { "entropy": 1.7250304917494457, "epoch": 1.606520007689984, "grad_norm": 0.6159952282905579, "learning_rate": 3.767333770728981e-06, "loss": 1.3785, "mean_token_accuracy": 0.6558371136585871, "num_tokens": 2453166510.0, "step": 14624 }, { "entropy": 1.7731029192606609, "epoch": 1.6066298646013566, "grad_norm": 0.7708766460418701, "learning_rate": 3.766381005689481e-06, "loss": 1.5243, "mean_token_accuracy": 0.63959468404452, "num_tokens": 2453377002.0, "step": 14625 }, { "entropy": 1.7134467959403992, "epoch": 1.6067397215127297, "grad_norm": 0.6083643436431885, "learning_rate": 3.7654284695907638e-06, "loss": 1.3713, "mean_token_accuracy": 0.666517436504364, "num_tokens": 2453534002.0, "step": 14626 }, { "entropy": 1.695216139157613, "epoch": 1.6068495784241026, "grad_norm": 0.8802637457847595, "learning_rate": 3.7644761624629745e-06, "loss": 1.2867, "mean_token_accuracy": 0.6799248705307642, "num_tokens": 2453662704.0, "step": 14627 }, { "entropy": 1.7392724752426147, "epoch": 1.6069594353354755, "grad_norm": 0.8451277613639832, "learning_rate": 3.763524084336252e-06, "loss": 1.4013, "mean_token_accuracy": 0.6632727136214575, "num_tokens": 2453811717.0, "step": 14628 }, { "entropy": 1.727136602004369, "epoch": 1.6070692922468486, "grad_norm": 0.7121945023536682, "learning_rate": 3.7625722352407348e-06, "loss": 1.3258, "mean_token_accuracy": 0.6634857207536697, "num_tokens": 2453989012.0, "step": 14629 }, { "entropy": 1.629335989554723, "epoch": 1.6071791491582215, "grad_norm": 1.1426151990890503, "learning_rate": 3.761620615206544e-06, "loss": 1.4052, "mean_token_accuracy": 0.660635307431221, "num_tokens": 2454190517.0, "step": 14630 }, { "entropy": 1.6815782884756725, "epoch": 1.6072890060695944, "grad_norm": 0.7410414814949036, "learning_rate": 3.760669224263798e-06, "loss": 1.387, "mean_token_accuracy": 0.6496629069248835, "num_tokens": 2454362336.0, "step": 14631 }, { "entropy": 1.7174339493115742, "epoch": 1.6073988629809672, "grad_norm": 0.7126834988594055, "learning_rate": 3.7597180624426106e-06, "loss": 1.4129, "mean_token_accuracy": 0.6453147878249487, "num_tokens": 2454534467.0, "step": 14632 }, { "entropy": 1.7280255556106567, "epoch": 1.6075087198923401, "grad_norm": 0.9190917015075684, "learning_rate": 3.7587671297730815e-06, "loss": 1.4702, "mean_token_accuracy": 0.6770395090182623, "num_tokens": 2454693122.0, "step": 14633 }, { "entropy": 1.656565527121226, "epoch": 1.6076185768037132, "grad_norm": 0.7317885160446167, "learning_rate": 3.7578164262853132e-06, "loss": 1.5353, "mean_token_accuracy": 0.6430183400710424, "num_tokens": 2454923455.0, "step": 14634 }, { "entropy": 1.6855885187784831, "epoch": 1.6077284337150861, "grad_norm": 0.7330460548400879, "learning_rate": 3.7568659520093908e-06, "loss": 1.487, "mean_token_accuracy": 0.6602890988190969, "num_tokens": 2455134200.0, "step": 14635 }, { "entropy": 1.6330851515134175, "epoch": 1.607838290626459, "grad_norm": 0.7137647867202759, "learning_rate": 3.7559157069753944e-06, "loss": 1.4943, "mean_token_accuracy": 0.6433676034212112, "num_tokens": 2455322690.0, "step": 14636 }, { "entropy": 1.7134460806846619, "epoch": 1.607948147537832, "grad_norm": 0.740168571472168, "learning_rate": 3.7549656912134047e-06, "loss": 1.2794, "mean_token_accuracy": 0.6720482061306635, "num_tokens": 2455461450.0, "step": 14637 }, { "entropy": 1.7948347826798756, "epoch": 1.6080580044492048, "grad_norm": 0.7866724729537964, "learning_rate": 3.754015904753486e-06, "loss": 1.5648, "mean_token_accuracy": 0.6387749413649241, "num_tokens": 2455632231.0, "step": 14638 }, { "entropy": 1.7541786233584087, "epoch": 1.6081678613605779, "grad_norm": 0.679871678352356, "learning_rate": 3.7530663476256966e-06, "loss": 1.3942, "mean_token_accuracy": 0.6533337185780207, "num_tokens": 2455793295.0, "step": 14639 }, { "entropy": 1.7643111447493236, "epoch": 1.6082777182719508, "grad_norm": 1.0173559188842773, "learning_rate": 3.752117019860091e-06, "loss": 1.4631, "mean_token_accuracy": 0.6418692767620087, "num_tokens": 2456022775.0, "step": 14640 }, { "entropy": 1.7089822093645732, "epoch": 1.6083875751833236, "grad_norm": 0.7529508471488953, "learning_rate": 3.7511679214867193e-06, "loss": 1.4893, "mean_token_accuracy": 0.6491026779015859, "num_tokens": 2456210046.0, "step": 14641 }, { "entropy": 1.7521416048208873, "epoch": 1.6084974320946968, "grad_norm": 0.7861169576644897, "learning_rate": 3.750219052535616e-06, "loss": 1.4302, "mean_token_accuracy": 0.6570564558108648, "num_tokens": 2456370939.0, "step": 14642 }, { "entropy": 1.7036944031715393, "epoch": 1.6086072890060696, "grad_norm": 0.6966022253036499, "learning_rate": 3.7492704130368103e-06, "loss": 1.5472, "mean_token_accuracy": 0.658886194229126, "num_tokens": 2456574331.0, "step": 14643 }, { "entropy": 1.6719833314418793, "epoch": 1.6087171459174425, "grad_norm": 0.6288134455680847, "learning_rate": 3.7483220030203305e-06, "loss": 1.4149, "mean_token_accuracy": 0.6646634787321091, "num_tokens": 2456763691.0, "step": 14644 }, { "entropy": 1.659516602754593, "epoch": 1.6088270028288156, "grad_norm": 0.7653041481971741, "learning_rate": 3.747373822516189e-06, "loss": 1.2218, "mean_token_accuracy": 0.6792045831680298, "num_tokens": 2456919385.0, "step": 14645 }, { "entropy": 1.7163499097029369, "epoch": 1.6089368597401883, "grad_norm": 0.8180403709411621, "learning_rate": 3.7464258715544023e-06, "loss": 1.579, "mean_token_accuracy": 0.6337461198369662, "num_tokens": 2457093423.0, "step": 14646 }, { "entropy": 1.6461522082487743, "epoch": 1.6090467166515614, "grad_norm": 0.6194190979003906, "learning_rate": 3.7454781501649674e-06, "loss": 1.3822, "mean_token_accuracy": 0.6567869633436203, "num_tokens": 2457285536.0, "step": 14647 }, { "entropy": 1.752669632434845, "epoch": 1.6091565735629343, "grad_norm": 0.6721103191375732, "learning_rate": 3.744530658377876e-06, "loss": 1.4253, "mean_token_accuracy": 0.6482215970754623, "num_tokens": 2457409826.0, "step": 14648 }, { "entropy": 1.7362704177697499, "epoch": 1.6092664304743072, "grad_norm": 0.6712827682495117, "learning_rate": 3.743583396223125e-06, "loss": 1.57, "mean_token_accuracy": 0.6405636916557947, "num_tokens": 2457605817.0, "step": 14649 }, { "entropy": 1.6012630959351857, "epoch": 1.6093762873856803, "grad_norm": 0.6160146594047546, "learning_rate": 3.7426363637306886e-06, "loss": 1.2842, "mean_token_accuracy": 0.6701871405045191, "num_tokens": 2457774714.0, "step": 14650 }, { "entropy": 1.6824021935462952, "epoch": 1.609486144297053, "grad_norm": 0.5910770297050476, "learning_rate": 3.741689560930538e-06, "loss": 1.401, "mean_token_accuracy": 0.6555624802907308, "num_tokens": 2457985015.0, "step": 14651 }, { "entropy": 1.6749296089013417, "epoch": 1.609596001208426, "grad_norm": 0.696537435054779, "learning_rate": 3.740742987852642e-06, "loss": 1.3365, "mean_token_accuracy": 0.6835020283857981, "num_tokens": 2458114521.0, "step": 14652 }, { "entropy": 1.7262722849845886, "epoch": 1.609705858119799, "grad_norm": 0.6201340556144714, "learning_rate": 3.7397966445269628e-06, "loss": 1.4564, "mean_token_accuracy": 0.6472860972086588, "num_tokens": 2458323341.0, "step": 14653 }, { "entropy": 1.7126949429512024, "epoch": 1.6098157150311718, "grad_norm": 0.6349091529846191, "learning_rate": 3.738850530983448e-06, "loss": 1.4529, "mean_token_accuracy": 0.6366288512945175, "num_tokens": 2458517592.0, "step": 14654 }, { "entropy": 1.7143605947494507, "epoch": 1.609925571942545, "grad_norm": 0.7637413144111633, "learning_rate": 3.737904647252039e-06, "loss": 1.2987, "mean_token_accuracy": 0.6668682942787806, "num_tokens": 2458644660.0, "step": 14655 }, { "entropy": 1.6725689272085826, "epoch": 1.6100354288539178, "grad_norm": 0.754520058631897, "learning_rate": 3.736958993362678e-06, "loss": 1.2872, "mean_token_accuracy": 0.6723710298538208, "num_tokens": 2458794632.0, "step": 14656 }, { "entropy": 1.7350502808888753, "epoch": 1.6101452857652907, "grad_norm": 0.801001250743866, "learning_rate": 3.73601356934529e-06, "loss": 1.3272, "mean_token_accuracy": 0.657276377081871, "num_tokens": 2458944318.0, "step": 14657 }, { "entropy": 1.724273145198822, "epoch": 1.6102551426766638, "grad_norm": 0.788803219795227, "learning_rate": 3.735068375229801e-06, "loss": 1.3117, "mean_token_accuracy": 0.6685859362284342, "num_tokens": 2459069936.0, "step": 14658 }, { "entropy": 1.643745203812917, "epoch": 1.6103649995880365, "grad_norm": 0.7852417826652527, "learning_rate": 3.7341234110461246e-06, "loss": 1.3608, "mean_token_accuracy": 0.6609650353590647, "num_tokens": 2459214227.0, "step": 14659 }, { "entropy": 1.726538171370824, "epoch": 1.6104748564994096, "grad_norm": 0.6105849146842957, "learning_rate": 3.7331786768241663e-06, "loss": 1.4536, "mean_token_accuracy": 0.6502714107433955, "num_tokens": 2459388462.0, "step": 14660 }, { "entropy": 1.7064904570579529, "epoch": 1.6105847134107825, "grad_norm": 0.6803503632545471, "learning_rate": 3.7322341725938314e-06, "loss": 1.396, "mean_token_accuracy": 0.658082311352094, "num_tokens": 2459570588.0, "step": 14661 }, { "entropy": 1.7269649803638458, "epoch": 1.6106945703221554, "grad_norm": 0.6450273394584656, "learning_rate": 3.7312898983850084e-06, "loss": 1.6308, "mean_token_accuracy": 0.628805602590243, "num_tokens": 2459740581.0, "step": 14662 }, { "entropy": 1.7392071982224782, "epoch": 1.6108044272335285, "grad_norm": 0.6870610117912292, "learning_rate": 3.7303458542275827e-06, "loss": 1.4163, "mean_token_accuracy": 0.6702167640129725, "num_tokens": 2459875754.0, "step": 14663 }, { "entropy": 1.7333300908406575, "epoch": 1.6109142841449011, "grad_norm": 0.8031678199768066, "learning_rate": 3.7294020401514364e-06, "loss": 1.3774, "mean_token_accuracy": 0.6614676515261332, "num_tokens": 2460032544.0, "step": 14664 }, { "entropy": 1.7381873826185863, "epoch": 1.6110241410562742, "grad_norm": 0.733604907989502, "learning_rate": 3.72845845618644e-06, "loss": 1.3342, "mean_token_accuracy": 0.6601580232381821, "num_tokens": 2460176158.0, "step": 14665 }, { "entropy": 1.741199215253194, "epoch": 1.6111339979676471, "grad_norm": 0.7065275311470032, "learning_rate": 3.727515102362457e-06, "loss": 1.4099, "mean_token_accuracy": 0.6452137182156245, "num_tokens": 2460289880.0, "step": 14666 }, { "entropy": 1.6943379541238148, "epoch": 1.61124385487902, "grad_norm": 0.6393603682518005, "learning_rate": 3.7265719787093425e-06, "loss": 1.3706, "mean_token_accuracy": 0.6622431923945745, "num_tokens": 2460455949.0, "step": 14667 }, { "entropy": 1.7258077561855316, "epoch": 1.611353711790393, "grad_norm": 0.7554365396499634, "learning_rate": 3.7256290852569486e-06, "loss": 1.3566, "mean_token_accuracy": 0.6608146925767263, "num_tokens": 2460609656.0, "step": 14668 }, { "entropy": 1.689467837413152, "epoch": 1.611463568701766, "grad_norm": 0.6613262295722961, "learning_rate": 3.724686422035115e-06, "loss": 1.5024, "mean_token_accuracy": 0.6482188751300176, "num_tokens": 2460844330.0, "step": 14669 }, { "entropy": 1.6908029715220134, "epoch": 1.6115734256131389, "grad_norm": 0.6851189136505127, "learning_rate": 3.7237439890736794e-06, "loss": 1.3898, "mean_token_accuracy": 0.6735121210416158, "num_tokens": 2461005246.0, "step": 14670 }, { "entropy": 1.6708916127681732, "epoch": 1.611683282524512, "grad_norm": 0.5962818264961243, "learning_rate": 3.7228017864024678e-06, "loss": 1.3767, "mean_token_accuracy": 0.6496349523464838, "num_tokens": 2461164991.0, "step": 14671 }, { "entropy": 1.7484399875005086, "epoch": 1.6117931394358846, "grad_norm": 0.670310378074646, "learning_rate": 3.7218598140512984e-06, "loss": 1.3154, "mean_token_accuracy": 0.6742733071247736, "num_tokens": 2461315273.0, "step": 14672 }, { "entropy": 1.7393219470977783, "epoch": 1.6119029963472578, "grad_norm": 0.6643054485321045, "learning_rate": 3.7209180720499895e-06, "loss": 1.3544, "mean_token_accuracy": 0.6552670349677404, "num_tokens": 2461488510.0, "step": 14673 }, { "entropy": 1.8004189630349476, "epoch": 1.6120128532586306, "grad_norm": 0.79213947057724, "learning_rate": 3.719976560428342e-06, "loss": 1.3652, "mean_token_accuracy": 0.6540059546629587, "num_tokens": 2461587275.0, "step": 14674 }, { "entropy": 1.7245031495889027, "epoch": 1.6121227101700035, "grad_norm": 0.6856813430786133, "learning_rate": 3.7190352792161544e-06, "loss": 1.454, "mean_token_accuracy": 0.6419583807388941, "num_tokens": 2461884625.0, "step": 14675 }, { "entropy": 1.7429804404576619, "epoch": 1.6122325670813766, "grad_norm": 0.9906445145606995, "learning_rate": 3.7180942284432187e-06, "loss": 1.4102, "mean_token_accuracy": 0.6640532414118449, "num_tokens": 2462069882.0, "step": 14676 }, { "entropy": 1.7205411791801453, "epoch": 1.6123424239927493, "grad_norm": 0.7368789911270142, "learning_rate": 3.7171534081393222e-06, "loss": 1.2647, "mean_token_accuracy": 0.6686208844184875, "num_tokens": 2462226030.0, "step": 14677 }, { "entropy": 1.6727672219276428, "epoch": 1.6124522809041224, "grad_norm": 0.6884908676147461, "learning_rate": 3.716212818334238e-06, "loss": 1.5422, "mean_token_accuracy": 0.66104227801164, "num_tokens": 2462413094.0, "step": 14678 }, { "entropy": 1.6247599720954895, "epoch": 1.6125621378154953, "grad_norm": 0.590446949005127, "learning_rate": 3.715272459057735e-06, "loss": 1.4282, "mean_token_accuracy": 0.644096295038859, "num_tokens": 2462641811.0, "step": 14679 }, { "entropy": 1.6443546215693157, "epoch": 1.6126719947268682, "grad_norm": 0.7115086317062378, "learning_rate": 3.714332330339577e-06, "loss": 1.5669, "mean_token_accuracy": 0.6451859523852667, "num_tokens": 2462806338.0, "step": 14680 }, { "entropy": 1.7336850663026173, "epoch": 1.6127818516382413, "grad_norm": 0.7021939158439636, "learning_rate": 3.7133924322095174e-06, "loss": 1.4519, "mean_token_accuracy": 0.6492075125376383, "num_tokens": 2462930880.0, "step": 14681 }, { "entropy": 1.6663711071014404, "epoch": 1.6128917085496142, "grad_norm": 0.7189558148384094, "learning_rate": 3.712452764697306e-06, "loss": 1.2616, "mean_token_accuracy": 0.667123039563497, "num_tokens": 2463057469.0, "step": 14682 }, { "entropy": 1.7106225689252217, "epoch": 1.613001565460987, "grad_norm": 0.6529760956764221, "learning_rate": 3.7115133278326776e-06, "loss": 1.4855, "mean_token_accuracy": 0.6439164827267329, "num_tokens": 2463193771.0, "step": 14683 }, { "entropy": 1.7312338948249817, "epoch": 1.6131114223723602, "grad_norm": 0.6860626339912415, "learning_rate": 3.7105741216453677e-06, "loss": 1.3038, "mean_token_accuracy": 0.6747928162415823, "num_tokens": 2463375144.0, "step": 14684 }, { "entropy": 1.6784348785877228, "epoch": 1.6132212792837328, "grad_norm": 0.636982798576355, "learning_rate": 3.7096351461651048e-06, "loss": 1.3763, "mean_token_accuracy": 0.647995188832283, "num_tokens": 2463568102.0, "step": 14685 }, { "entropy": 1.6646142303943634, "epoch": 1.613331136195106, "grad_norm": 0.7830153703689575, "learning_rate": 3.7086964014216044e-06, "loss": 1.2812, "mean_token_accuracy": 0.6732824593782425, "num_tokens": 2463698353.0, "step": 14686 }, { "entropy": 1.727742314338684, "epoch": 1.6134409931064788, "grad_norm": 0.6397407650947571, "learning_rate": 3.7077578874445747e-06, "loss": 1.6028, "mean_token_accuracy": 0.6402206718921661, "num_tokens": 2463895915.0, "step": 14687 }, { "entropy": 1.708655208349228, "epoch": 1.6135508500178517, "grad_norm": 0.7308383584022522, "learning_rate": 3.7068196042637243e-06, "loss": 1.3993, "mean_token_accuracy": 0.6531407485405604, "num_tokens": 2464095797.0, "step": 14688 }, { "entropy": 1.704372376203537, "epoch": 1.6136607069292248, "grad_norm": 0.6979178786277771, "learning_rate": 3.7058815519087444e-06, "loss": 1.2332, "mean_token_accuracy": 0.6791991045077642, "num_tokens": 2464232587.0, "step": 14689 }, { "entropy": 1.6719367702802022, "epoch": 1.6137705638405975, "grad_norm": 0.8596528172492981, "learning_rate": 3.7049437304093294e-06, "loss": 1.3867, "mean_token_accuracy": 0.6568711996078491, "num_tokens": 2464385186.0, "step": 14690 }, { "entropy": 1.6047246555487316, "epoch": 1.6138804207519706, "grad_norm": 0.6490098834037781, "learning_rate": 3.7040061397951576e-06, "loss": 1.3677, "mean_token_accuracy": 0.660823663075765, "num_tokens": 2464553229.0, "step": 14691 }, { "entropy": 1.7173048158486683, "epoch": 1.6139902776633435, "grad_norm": 0.7776662111282349, "learning_rate": 3.703068780095902e-06, "loss": 1.2761, "mean_token_accuracy": 0.6762077808380127, "num_tokens": 2464701504.0, "step": 14692 }, { "entropy": 1.7215432325998943, "epoch": 1.6141001345747163, "grad_norm": 0.9005657434463501, "learning_rate": 3.702131651341231e-06, "loss": 1.3798, "mean_token_accuracy": 0.6737534006436666, "num_tokens": 2464866324.0, "step": 14693 }, { "entropy": 1.7409155865510304, "epoch": 1.6142099914860895, "grad_norm": 0.7071303129196167, "learning_rate": 3.7011947535608105e-06, "loss": 1.5843, "mean_token_accuracy": 0.647487630446752, "num_tokens": 2465076973.0, "step": 14694 }, { "entropy": 1.7262167433897655, "epoch": 1.6143198483974623, "grad_norm": 0.7587045431137085, "learning_rate": 3.7002580867842815e-06, "loss": 1.2918, "mean_token_accuracy": 0.6742985248565674, "num_tokens": 2465200868.0, "step": 14695 }, { "entropy": 1.6662676731745403, "epoch": 1.6144297053088352, "grad_norm": 0.6712886095046997, "learning_rate": 3.6993216510412943e-06, "loss": 1.375, "mean_token_accuracy": 0.6569582025210062, "num_tokens": 2465388988.0, "step": 14696 }, { "entropy": 1.730035165945689, "epoch": 1.6145395622202083, "grad_norm": 0.7809839844703674, "learning_rate": 3.698385446361491e-06, "loss": 1.2638, "mean_token_accuracy": 0.6717608024676641, "num_tokens": 2465527801.0, "step": 14697 }, { "entropy": 1.7651971677939098, "epoch": 1.614649419131581, "grad_norm": 0.6956320405006409, "learning_rate": 3.6974494727744963e-06, "loss": 1.2942, "mean_token_accuracy": 0.6690488557020823, "num_tokens": 2465636303.0, "step": 14698 }, { "entropy": 1.6958413124084473, "epoch": 1.614759276042954, "grad_norm": 0.6723313331604004, "learning_rate": 3.6965137303099337e-06, "loss": 1.4207, "mean_token_accuracy": 0.6619421541690826, "num_tokens": 2465851994.0, "step": 14699 }, { "entropy": 1.6834101875623066, "epoch": 1.614869132954327, "grad_norm": 0.7277176380157471, "learning_rate": 3.695578218997423e-06, "loss": 1.2127, "mean_token_accuracy": 0.6854077279567719, "num_tokens": 2465977857.0, "step": 14700 }, { "entropy": 1.709593951702118, "epoch": 1.6149789898656999, "grad_norm": 0.6318420171737671, "learning_rate": 3.694642938866567e-06, "loss": 1.4732, "mean_token_accuracy": 0.6425252010424932, "num_tokens": 2466148882.0, "step": 14701 }, { "entropy": 1.7026881178220112, "epoch": 1.615088846777073, "grad_norm": 0.5743200778961182, "learning_rate": 3.6937078899469735e-06, "loss": 1.4259, "mean_token_accuracy": 0.6450713922580084, "num_tokens": 2466367763.0, "step": 14702 }, { "entropy": 1.7224018573760986, "epoch": 1.6151987036884456, "grad_norm": 0.7370775938034058, "learning_rate": 3.692773072268233e-06, "loss": 1.5327, "mean_token_accuracy": 0.6518655767043432, "num_tokens": 2466513299.0, "step": 14703 }, { "entropy": 1.6792699694633484, "epoch": 1.6153085605998188, "grad_norm": 0.7573254108428955, "learning_rate": 3.69183848585993e-06, "loss": 1.3101, "mean_token_accuracy": 0.6656599442164103, "num_tokens": 2466674052.0, "step": 14704 }, { "entropy": 1.7601742148399353, "epoch": 1.6154184175111916, "grad_norm": 0.6558720469474792, "learning_rate": 3.690904130751647e-06, "loss": 1.3575, "mean_token_accuracy": 0.6632993370294571, "num_tokens": 2466835324.0, "step": 14705 }, { "entropy": 1.670517235994339, "epoch": 1.6155282744225645, "grad_norm": 0.7299153208732605, "learning_rate": 3.689970006972955e-06, "loss": 1.3617, "mean_token_accuracy": 0.6664615025122961, "num_tokens": 2466978382.0, "step": 14706 }, { "entropy": 1.7113747795422871, "epoch": 1.6156381313339376, "grad_norm": 0.6921692490577698, "learning_rate": 3.689036114553416e-06, "loss": 1.4798, "mean_token_accuracy": 0.6365568687518438, "num_tokens": 2467205232.0, "step": 14707 }, { "entropy": 1.6883227229118347, "epoch": 1.6157479882453105, "grad_norm": 0.648524820804596, "learning_rate": 3.6881024535225895e-06, "loss": 1.5209, "mean_token_accuracy": 0.6537104596694311, "num_tokens": 2467377154.0, "step": 14708 }, { "entropy": 1.6793282429377239, "epoch": 1.6158578451566834, "grad_norm": 0.6334054470062256, "learning_rate": 3.687169023910029e-06, "loss": 1.3909, "mean_token_accuracy": 0.6522092173496882, "num_tokens": 2467532555.0, "step": 14709 }, { "entropy": 1.6903114418188732, "epoch": 1.6159677020680565, "grad_norm": 0.7473300695419312, "learning_rate": 3.6862358257452715e-06, "loss": 1.2707, "mean_token_accuracy": 0.6770381530125936, "num_tokens": 2467642784.0, "step": 14710 }, { "entropy": 1.6912944614887238, "epoch": 1.6160775589794292, "grad_norm": 0.6412237286567688, "learning_rate": 3.685302859057853e-06, "loss": 1.4237, "mean_token_accuracy": 0.648856391509374, "num_tokens": 2467791714.0, "step": 14711 }, { "entropy": 1.7014042536417644, "epoch": 1.6161874158908023, "grad_norm": 0.9805091619491577, "learning_rate": 3.6843701238773067e-06, "loss": 1.3664, "mean_token_accuracy": 0.6512833336989085, "num_tokens": 2467936735.0, "step": 14712 }, { "entropy": 1.7085695664087932, "epoch": 1.6162972728021752, "grad_norm": 0.6498310565948486, "learning_rate": 3.6834376202331457e-06, "loss": 1.4279, "mean_token_accuracy": 0.6503070195515951, "num_tokens": 2468137097.0, "step": 14713 }, { "entropy": 1.754157284895579, "epoch": 1.616407129713548, "grad_norm": 0.6614096164703369, "learning_rate": 3.68250534815489e-06, "loss": 1.4499, "mean_token_accuracy": 0.6387760390837988, "num_tokens": 2468335831.0, "step": 14714 }, { "entropy": 1.748912364244461, "epoch": 1.6165169866249212, "grad_norm": 0.6944563388824463, "learning_rate": 3.6815733076720417e-06, "loss": 1.4188, "mean_token_accuracy": 0.6436112423737844, "num_tokens": 2468487713.0, "step": 14715 }, { "entropy": 1.6973415712515514, "epoch": 1.6166268435362938, "grad_norm": 0.7646819949150085, "learning_rate": 3.6806414988140994e-06, "loss": 1.4375, "mean_token_accuracy": 0.6578048566977183, "num_tokens": 2468669987.0, "step": 14716 }, { "entropy": 1.732655018568039, "epoch": 1.616736700447667, "grad_norm": 0.8368391990661621, "learning_rate": 3.6797099216105574e-06, "loss": 1.343, "mean_token_accuracy": 0.666421135266622, "num_tokens": 2468838710.0, "step": 14717 }, { "entropy": 1.7877886792023976, "epoch": 1.6168465573590398, "grad_norm": 0.7718168497085571, "learning_rate": 3.6787785760908977e-06, "loss": 1.4756, "mean_token_accuracy": 0.6524422268072764, "num_tokens": 2468982036.0, "step": 14718 }, { "entropy": 1.6961112916469574, "epoch": 1.6169564142704127, "grad_norm": 0.6764015555381775, "learning_rate": 3.6778474622845944e-06, "loss": 1.3609, "mean_token_accuracy": 0.6668888131777445, "num_tokens": 2469168789.0, "step": 14719 }, { "entropy": 1.6913793583710988, "epoch": 1.6170662711817858, "grad_norm": 0.6833027005195618, "learning_rate": 3.6769165802211204e-06, "loss": 1.5313, "mean_token_accuracy": 0.630229189991951, "num_tokens": 2469398606.0, "step": 14720 }, { "entropy": 1.6282474398612976, "epoch": 1.6171761280931587, "grad_norm": 0.7693440318107605, "learning_rate": 3.675985929929938e-06, "loss": 1.4429, "mean_token_accuracy": 0.6655650039513906, "num_tokens": 2469592922.0, "step": 14721 }, { "entropy": 1.711096356312434, "epoch": 1.6172859850045316, "grad_norm": 0.8111943006515503, "learning_rate": 3.6750555114405006e-06, "loss": 1.5209, "mean_token_accuracy": 0.6461358418067297, "num_tokens": 2469796479.0, "step": 14722 }, { "entropy": 1.6384065548578899, "epoch": 1.6173958419159047, "grad_norm": 0.8203321099281311, "learning_rate": 3.674125324782254e-06, "loss": 1.4875, "mean_token_accuracy": 0.6577804535627365, "num_tokens": 2469953785.0, "step": 14723 }, { "entropy": 1.7690180937449138, "epoch": 1.6175056988272773, "grad_norm": 0.6374251842498779, "learning_rate": 3.6731953699846414e-06, "loss": 1.3627, "mean_token_accuracy": 0.6556740949551264, "num_tokens": 2470124089.0, "step": 14724 }, { "entropy": 1.7309903005758922, "epoch": 1.6176155557386505, "grad_norm": 0.7207738757133484, "learning_rate": 3.6722656470770923e-06, "loss": 1.4916, "mean_token_accuracy": 0.6519534190495809, "num_tokens": 2470255712.0, "step": 14725 }, { "entropy": 1.7002464632193248, "epoch": 1.6177254126500233, "grad_norm": 0.7734901905059814, "learning_rate": 3.6713361560890348e-06, "loss": 1.5482, "mean_token_accuracy": 0.6576615820328394, "num_tokens": 2470420871.0, "step": 14726 }, { "entropy": 1.6754888991514842, "epoch": 1.6178352695613962, "grad_norm": 0.8016461133956909, "learning_rate": 3.6704068970498864e-06, "loss": 1.2781, "mean_token_accuracy": 0.6687599966923395, "num_tokens": 2470573999.0, "step": 14727 }, { "entropy": 1.7265417277812958, "epoch": 1.6179451264727693, "grad_norm": 0.8027196526527405, "learning_rate": 3.6694778699890544e-06, "loss": 1.2972, "mean_token_accuracy": 0.672625203927358, "num_tokens": 2470696124.0, "step": 14728 }, { "entropy": 1.678039421637853, "epoch": 1.618054983384142, "grad_norm": 0.9656848311424255, "learning_rate": 3.6685490749359465e-06, "loss": 1.4742, "mean_token_accuracy": 0.6456383168697357, "num_tokens": 2470903763.0, "step": 14729 }, { "entropy": 1.6626673638820648, "epoch": 1.618164840295515, "grad_norm": 0.6540632247924805, "learning_rate": 3.6676205119199576e-06, "loss": 1.3202, "mean_token_accuracy": 0.6622842649618784, "num_tokens": 2471090045.0, "step": 14730 }, { "entropy": 1.6490332384904225, "epoch": 1.618274697206888, "grad_norm": 0.8260558843612671, "learning_rate": 3.6666921809704736e-06, "loss": 1.161, "mean_token_accuracy": 0.6894190460443497, "num_tokens": 2471227403.0, "step": 14731 }, { "entropy": 1.6390781899293263, "epoch": 1.6183845541182609, "grad_norm": 0.5702754259109497, "learning_rate": 3.665764082116876e-06, "loss": 1.4722, "mean_token_accuracy": 0.649658222993215, "num_tokens": 2471437545.0, "step": 14732 }, { "entropy": 1.629872699578603, "epoch": 1.618494411029634, "grad_norm": 0.6536400318145752, "learning_rate": 3.6648362153885436e-06, "loss": 1.3237, "mean_token_accuracy": 0.659297987818718, "num_tokens": 2471629535.0, "step": 14733 }, { "entropy": 1.7587083180745442, "epoch": 1.6186042679410069, "grad_norm": 0.6709061861038208, "learning_rate": 3.6639085808148393e-06, "loss": 1.3405, "mean_token_accuracy": 0.6536008963982264, "num_tokens": 2471737875.0, "step": 14734 }, { "entropy": 1.6815871397654216, "epoch": 1.6187141248523798, "grad_norm": 0.7016004920005798, "learning_rate": 3.66298117842512e-06, "loss": 1.3005, "mean_token_accuracy": 0.6671187877655029, "num_tokens": 2471871399.0, "step": 14735 }, { "entropy": 1.6025499800841014, "epoch": 1.6188239817637529, "grad_norm": 0.7438717484474182, "learning_rate": 3.662054008248743e-06, "loss": 1.3128, "mean_token_accuracy": 0.6726290682951609, "num_tokens": 2472023286.0, "step": 14736 }, { "entropy": 1.6263412833213806, "epoch": 1.6189338386751255, "grad_norm": 0.7102988362312317, "learning_rate": 3.661127070315048e-06, "loss": 1.4156, "mean_token_accuracy": 0.6527203271786371, "num_tokens": 2472222430.0, "step": 14737 }, { "entropy": 1.703275889158249, "epoch": 1.6190436955864986, "grad_norm": 0.714640736579895, "learning_rate": 3.660200364653377e-06, "loss": 1.3723, "mean_token_accuracy": 0.6539589911699295, "num_tokens": 2472389845.0, "step": 14738 }, { "entropy": 1.6962252755959828, "epoch": 1.6191535524978715, "grad_norm": 0.6212570071220398, "learning_rate": 3.6592738912930557e-06, "loss": 1.4415, "mean_token_accuracy": 0.644294947385788, "num_tokens": 2472558761.0, "step": 14739 }, { "entropy": 1.7015782197316487, "epoch": 1.6192634094092444, "grad_norm": 0.7756522297859192, "learning_rate": 3.6583476502634074e-06, "loss": 1.3007, "mean_token_accuracy": 0.670628140370051, "num_tokens": 2472732516.0, "step": 14740 }, { "entropy": 1.5966882010300953, "epoch": 1.6193732663206175, "grad_norm": 0.7755955457687378, "learning_rate": 3.657421641593748e-06, "loss": 1.2803, "mean_token_accuracy": 0.6795324633518854, "num_tokens": 2472883412.0, "step": 14741 }, { "entropy": 1.7600494424502056, "epoch": 1.6194831232319902, "grad_norm": 0.7740442752838135, "learning_rate": 3.6564958653133863e-06, "loss": 1.4893, "mean_token_accuracy": 0.6595326215028763, "num_tokens": 2473018952.0, "step": 14742 }, { "entropy": 1.7393341660499573, "epoch": 1.6195929801433633, "grad_norm": 0.6907954812049866, "learning_rate": 3.6555703214516193e-06, "loss": 1.5718, "mean_token_accuracy": 0.6427063147226969, "num_tokens": 2473202617.0, "step": 14743 }, { "entropy": 1.6578579048315685, "epoch": 1.6197028370547362, "grad_norm": 0.6247424483299255, "learning_rate": 3.654645010037744e-06, "loss": 1.5738, "mean_token_accuracy": 0.6394970516363779, "num_tokens": 2473402435.0, "step": 14744 }, { "entropy": 1.6981489062309265, "epoch": 1.619812693966109, "grad_norm": 0.5729104280471802, "learning_rate": 3.653719931101042e-06, "loss": 1.3324, "mean_token_accuracy": 0.6592134733994802, "num_tokens": 2473562428.0, "step": 14745 }, { "entropy": 1.647686739762624, "epoch": 1.6199225508774822, "grad_norm": 0.6854268312454224, "learning_rate": 3.652795084670795e-06, "loss": 1.3264, "mean_token_accuracy": 0.6706267396608988, "num_tokens": 2473690175.0, "step": 14746 }, { "entropy": 1.7167788644631703, "epoch": 1.620032407788855, "grad_norm": 0.5726441144943237, "learning_rate": 3.6518704707762747e-06, "loss": 1.4047, "mean_token_accuracy": 0.6441677361726761, "num_tokens": 2473897473.0, "step": 14747 }, { "entropy": 1.6790929238001506, "epoch": 1.620142264700228, "grad_norm": 0.7980174422264099, "learning_rate": 3.65094608944674e-06, "loss": 1.286, "mean_token_accuracy": 0.6767543057600657, "num_tokens": 2474027480.0, "step": 14748 }, { "entropy": 1.7248100241025288, "epoch": 1.620252121611601, "grad_norm": 0.664738655090332, "learning_rate": 3.650021940711449e-06, "loss": 1.447, "mean_token_accuracy": 0.6460002660751343, "num_tokens": 2474233223.0, "step": 14749 }, { "entropy": 1.679869105418523, "epoch": 1.6203619785229737, "grad_norm": 0.5752595663070679, "learning_rate": 3.6490980245996578e-06, "loss": 1.4, "mean_token_accuracy": 0.6550338019927343, "num_tokens": 2474391564.0, "step": 14750 }, { "entropy": 1.6999848584334056, "epoch": 1.6204718354343468, "grad_norm": 0.8822055459022522, "learning_rate": 3.6481743411405957e-06, "loss": 1.4801, "mean_token_accuracy": 0.6662048846483231, "num_tokens": 2474520827.0, "step": 14751 }, { "entropy": 1.6986857652664185, "epoch": 1.6205816923457197, "grad_norm": 0.5632603168487549, "learning_rate": 3.6472508903635035e-06, "loss": 1.4075, "mean_token_accuracy": 0.6511333485444387, "num_tokens": 2474718631.0, "step": 14752 }, { "entropy": 1.690843830506007, "epoch": 1.6206915492570926, "grad_norm": 0.6789196729660034, "learning_rate": 3.6463276722976094e-06, "loss": 1.421, "mean_token_accuracy": 0.6547816569606463, "num_tokens": 2474877824.0, "step": 14753 }, { "entropy": 1.7533318003018696, "epoch": 1.6208014061684657, "grad_norm": 0.6676966547966003, "learning_rate": 3.6454046869721314e-06, "loss": 1.311, "mean_token_accuracy": 0.6615221301714579, "num_tokens": 2475020061.0, "step": 14754 }, { "entropy": 1.7552814086278279, "epoch": 1.6209112630798383, "grad_norm": 0.6395809054374695, "learning_rate": 3.6444819344162785e-06, "loss": 1.3817, "mean_token_accuracy": 0.6580479294061661, "num_tokens": 2475208467.0, "step": 14755 }, { "entropy": 1.692303051551183, "epoch": 1.6210211199912115, "grad_norm": 0.6294587254524231, "learning_rate": 3.6435594146592602e-06, "loss": 1.427, "mean_token_accuracy": 0.6477192491292953, "num_tokens": 2475440566.0, "step": 14756 }, { "entropy": 1.799072911341985, "epoch": 1.6211309769025843, "grad_norm": 0.8360262513160706, "learning_rate": 3.6426371277302696e-06, "loss": 1.5254, "mean_token_accuracy": 0.6458015888929367, "num_tokens": 2475559755.0, "step": 14757 }, { "entropy": 1.6935710906982422, "epoch": 1.6212408338139572, "grad_norm": 0.6417449116706848, "learning_rate": 3.6417150736585005e-06, "loss": 1.2667, "mean_token_accuracy": 0.6743132919073105, "num_tokens": 2475671516.0, "step": 14758 }, { "entropy": 1.7454969485600789, "epoch": 1.6213506907253303, "grad_norm": 0.7443606853485107, "learning_rate": 3.6407932524731327e-06, "loss": 1.2905, "mean_token_accuracy": 0.6683808912833532, "num_tokens": 2475789097.0, "step": 14759 }, { "entropy": 1.683669090270996, "epoch": 1.6214605476367032, "grad_norm": 0.6186245083808899, "learning_rate": 3.6398716642033415e-06, "loss": 1.3217, "mean_token_accuracy": 0.6645027448733648, "num_tokens": 2475957658.0, "step": 14760 }, { "entropy": 1.7226401766141255, "epoch": 1.621570404548076, "grad_norm": 0.7316539883613586, "learning_rate": 3.638950308878295e-06, "loss": 1.3299, "mean_token_accuracy": 0.6640412161747614, "num_tokens": 2476076424.0, "step": 14761 }, { "entropy": 1.7300419211387634, "epoch": 1.6216802614594492, "grad_norm": 0.7443107962608337, "learning_rate": 3.638029186527159e-06, "loss": 1.4486, "mean_token_accuracy": 0.6463419745365778, "num_tokens": 2476257373.0, "step": 14762 }, { "entropy": 1.653321127096812, "epoch": 1.6217901183708219, "grad_norm": 0.6316555142402649, "learning_rate": 3.6371082971790774e-06, "loss": 1.5808, "mean_token_accuracy": 0.6333072433869044, "num_tokens": 2476519674.0, "step": 14763 }, { "entropy": 1.7144930958747864, "epoch": 1.621899975282195, "grad_norm": 0.6967435479164124, "learning_rate": 3.636187640863199e-06, "loss": 1.3244, "mean_token_accuracy": 0.6650471885999044, "num_tokens": 2476646778.0, "step": 14764 }, { "entropy": 1.6741214394569397, "epoch": 1.6220098321935679, "grad_norm": 0.7144061923027039, "learning_rate": 3.635267217608668e-06, "loss": 1.4367, "mean_token_accuracy": 0.6536417255798975, "num_tokens": 2476791495.0, "step": 14765 }, { "entropy": 1.7125622133413951, "epoch": 1.6221196891049408, "grad_norm": 0.8451400399208069, "learning_rate": 3.634347027444609e-06, "loss": 1.5601, "mean_token_accuracy": 0.6516855011383692, "num_tokens": 2476981517.0, "step": 14766 }, { "entropy": 1.679990828037262, "epoch": 1.6222295460163139, "grad_norm": 0.8648212552070618, "learning_rate": 3.6334270704001464e-06, "loss": 1.3945, "mean_token_accuracy": 0.6758679350217184, "num_tokens": 2477118327.0, "step": 14767 }, { "entropy": 1.6886627574761708, "epoch": 1.6223394029276865, "grad_norm": 0.5979213714599609, "learning_rate": 3.6325073465043998e-06, "loss": 1.451, "mean_token_accuracy": 0.6546831776698431, "num_tokens": 2477323299.0, "step": 14768 }, { "entropy": 1.7278961042563121, "epoch": 1.6224492598390596, "grad_norm": 0.7632152438163757, "learning_rate": 3.6315878557864732e-06, "loss": 1.3506, "mean_token_accuracy": 0.6647726694742838, "num_tokens": 2477455404.0, "step": 14769 }, { "entropy": 1.661962906519572, "epoch": 1.6225591167504325, "grad_norm": 0.5683802962303162, "learning_rate": 3.6306685982754725e-06, "loss": 1.4222, "mean_token_accuracy": 0.6534817218780518, "num_tokens": 2477637665.0, "step": 14770 }, { "entropy": 1.7604417105515797, "epoch": 1.6226689736618054, "grad_norm": 0.6922222971916199, "learning_rate": 3.629749574000491e-06, "loss": 1.3952, "mean_token_accuracy": 0.6722667813301086, "num_tokens": 2477792042.0, "step": 14771 }, { "entropy": 1.7918286224206288, "epoch": 1.6227788305731785, "grad_norm": 0.7370676398277283, "learning_rate": 3.628830782990611e-06, "loss": 1.4686, "mean_token_accuracy": 0.6477069209019343, "num_tokens": 2477953240.0, "step": 14772 }, { "entropy": 1.6899705628554027, "epoch": 1.6228886874845514, "grad_norm": 0.7105181813240051, "learning_rate": 3.627912225274916e-06, "loss": 1.4461, "mean_token_accuracy": 0.656821588675181, "num_tokens": 2478178028.0, "step": 14773 }, { "entropy": 1.6704809963703156, "epoch": 1.6229985443959243, "grad_norm": 0.6295377016067505, "learning_rate": 3.6269939008824818e-06, "loss": 1.3896, "mean_token_accuracy": 0.6608775307734808, "num_tokens": 2478369209.0, "step": 14774 }, { "entropy": 1.7202841540177662, "epoch": 1.6231084013072974, "grad_norm": 0.6942985653877258, "learning_rate": 3.6260758098423634e-06, "loss": 1.3808, "mean_token_accuracy": 0.664001539349556, "num_tokens": 2478504947.0, "step": 14775 }, { "entropy": 1.678806871175766, "epoch": 1.62321825821867, "grad_norm": 0.6336562037467957, "learning_rate": 3.6251579521836223e-06, "loss": 1.3756, "mean_token_accuracy": 0.6675303081671397, "num_tokens": 2478694950.0, "step": 14776 }, { "entropy": 1.764718770980835, "epoch": 1.6233281151300432, "grad_norm": 0.6643718481063843, "learning_rate": 3.624240327935312e-06, "loss": 1.5157, "mean_token_accuracy": 0.6346383889516195, "num_tokens": 2478927778.0, "step": 14777 }, { "entropy": 1.6955519517262776, "epoch": 1.623437972041416, "grad_norm": 0.678653359413147, "learning_rate": 3.6233229371264715e-06, "loss": 1.3345, "mean_token_accuracy": 0.6630487193663915, "num_tokens": 2479106950.0, "step": 14778 }, { "entropy": 1.7352122167746227, "epoch": 1.623547828952789, "grad_norm": 0.7108410000801086, "learning_rate": 3.6224057797861335e-06, "loss": 1.4498, "mean_token_accuracy": 0.6533684730529785, "num_tokens": 2479280565.0, "step": 14779 }, { "entropy": 1.765261471271515, "epoch": 1.623657685864162, "grad_norm": 0.8185455799102783, "learning_rate": 3.6214888559433303e-06, "loss": 1.6564, "mean_token_accuracy": 0.6357477903366089, "num_tokens": 2479455073.0, "step": 14780 }, { "entropy": 1.7805627187093098, "epoch": 1.6237675427755347, "grad_norm": 0.7521522641181946, "learning_rate": 3.6205721656270787e-06, "loss": 1.3739, "mean_token_accuracy": 0.6541548172632853, "num_tokens": 2479612490.0, "step": 14781 }, { "entropy": 1.7144214709599812, "epoch": 1.6238773996869078, "grad_norm": 0.7456989288330078, "learning_rate": 3.6196557088663933e-06, "loss": 1.5344, "mean_token_accuracy": 0.6387195686499277, "num_tokens": 2479774711.0, "step": 14782 }, { "entropy": 1.6867867310841878, "epoch": 1.6239872565982807, "grad_norm": 0.6226841807365417, "learning_rate": 3.6187394856902808e-06, "loss": 1.3739, "mean_token_accuracy": 0.6644567201534907, "num_tokens": 2479941427.0, "step": 14783 }, { "entropy": 1.721395303805669, "epoch": 1.6240971135096536, "grad_norm": 0.6708065271377563, "learning_rate": 3.617823496127734e-06, "loss": 1.3871, "mean_token_accuracy": 0.6624792019526163, "num_tokens": 2480131568.0, "step": 14784 }, { "entropy": 1.7298158307870228, "epoch": 1.6242069704210267, "grad_norm": 0.6362797021865845, "learning_rate": 3.6169077402077502e-06, "loss": 1.5095, "mean_token_accuracy": 0.6370205332835516, "num_tokens": 2480402026.0, "step": 14785 }, { "entropy": 1.7130170961221058, "epoch": 1.6243168273323996, "grad_norm": 0.6537313461303711, "learning_rate": 3.6159922179593087e-06, "loss": 1.5106, "mean_token_accuracy": 0.6379600862661997, "num_tokens": 2480597966.0, "step": 14786 }, { "entropy": 1.7668890555699666, "epoch": 1.6244266842437725, "grad_norm": 0.7130351066589355, "learning_rate": 3.615076929411384e-06, "loss": 1.4481, "mean_token_accuracy": 0.6494297136863073, "num_tokens": 2480795687.0, "step": 14787 }, { "entropy": 1.7258010109265645, "epoch": 1.6245365411551456, "grad_norm": 0.747342050075531, "learning_rate": 3.6141618745929472e-06, "loss": 1.5332, "mean_token_accuracy": 0.631104106704394, "num_tokens": 2481012853.0, "step": 14788 }, { "entropy": 1.6782464186350505, "epoch": 1.6246463980665182, "grad_norm": 0.6709155440330505, "learning_rate": 3.613247053532961e-06, "loss": 1.2226, "mean_token_accuracy": 0.690729891260465, "num_tokens": 2481149441.0, "step": 14789 }, { "entropy": 1.695702721675237, "epoch": 1.6247562549778913, "grad_norm": 0.614595353603363, "learning_rate": 3.6123324662603775e-06, "loss": 1.4185, "mean_token_accuracy": 0.6465272555748621, "num_tokens": 2481355357.0, "step": 14790 }, { "entropy": 1.6653617123762767, "epoch": 1.6248661118892642, "grad_norm": 0.6412925124168396, "learning_rate": 3.6114181128041404e-06, "loss": 1.2842, "mean_token_accuracy": 0.6678632348775864, "num_tokens": 2481495478.0, "step": 14791 }, { "entropy": 1.7512604494889576, "epoch": 1.624975968800637, "grad_norm": 0.7046493887901306, "learning_rate": 3.6105039931931917e-06, "loss": 1.5298, "mean_token_accuracy": 0.6425130367279053, "num_tokens": 2481665561.0, "step": 14792 }, { "entropy": 1.7655756374200184, "epoch": 1.6250858257120102, "grad_norm": 0.6847085356712341, "learning_rate": 3.6095901074564605e-06, "loss": 1.5536, "mean_token_accuracy": 0.6508872310320536, "num_tokens": 2481826917.0, "step": 14793 }, { "entropy": 1.6053343216578166, "epoch": 1.625195682623383, "grad_norm": 0.7845448851585388, "learning_rate": 3.608676455622874e-06, "loss": 1.4418, "mean_token_accuracy": 0.6726017246643702, "num_tokens": 2481975809.0, "step": 14794 }, { "entropy": 1.7125616768995922, "epoch": 1.625305539534756, "grad_norm": 0.6936936378479004, "learning_rate": 3.607763037721348e-06, "loss": 1.5531, "mean_token_accuracy": 0.6478741864363352, "num_tokens": 2482194797.0, "step": 14795 }, { "entropy": 1.736180692911148, "epoch": 1.6254153964461289, "grad_norm": 0.6816456317901611, "learning_rate": 3.6068498537807884e-06, "loss": 1.339, "mean_token_accuracy": 0.6603361467520396, "num_tokens": 2482331620.0, "step": 14796 }, { "entropy": 1.7087311546007793, "epoch": 1.6255252533575018, "grad_norm": 0.8054825067520142, "learning_rate": 3.6059369038301005e-06, "loss": 1.4759, "mean_token_accuracy": 0.6657463312149048, "num_tokens": 2482466267.0, "step": 14797 }, { "entropy": 1.6767024497191112, "epoch": 1.6256351102688749, "grad_norm": 0.723479688167572, "learning_rate": 3.605024187898178e-06, "loss": 1.1712, "mean_token_accuracy": 0.6891407519578934, "num_tokens": 2482611295.0, "step": 14798 }, { "entropy": 1.6842226882775624, "epoch": 1.6257449671802477, "grad_norm": 0.5745367407798767, "learning_rate": 3.604111706013906e-06, "loss": 1.3578, "mean_token_accuracy": 0.6609561542669932, "num_tokens": 2482777747.0, "step": 14799 }, { "entropy": 1.719879557689031, "epoch": 1.6258548240916206, "grad_norm": 0.7957612872123718, "learning_rate": 3.6031994582061657e-06, "loss": 1.3992, "mean_token_accuracy": 0.6573441376288732, "num_tokens": 2482902723.0, "step": 14800 }, { "entropy": 1.7037660876909893, "epoch": 1.6259646810029937, "grad_norm": 0.5532712340354919, "learning_rate": 3.6022874445038326e-06, "loss": 1.4247, "mean_token_accuracy": 0.6606289645036062, "num_tokens": 2483116670.0, "step": 14801 }, { "entropy": 1.7359294990698497, "epoch": 1.6260745379143664, "grad_norm": 0.6897640228271484, "learning_rate": 3.6013756649357675e-06, "loss": 1.3636, "mean_token_accuracy": 0.6593460738658905, "num_tokens": 2483265825.0, "step": 14802 }, { "entropy": 1.7429955800374348, "epoch": 1.6261843948257395, "grad_norm": 0.7224751710891724, "learning_rate": 3.6004641195308284e-06, "loss": 1.4376, "mean_token_accuracy": 0.6494200577338537, "num_tokens": 2483418684.0, "step": 14803 }, { "entropy": 1.7316950261592865, "epoch": 1.6262942517371124, "grad_norm": 0.8159376382827759, "learning_rate": 3.5995528083178632e-06, "loss": 1.5238, "mean_token_accuracy": 0.6503862986962, "num_tokens": 2483577085.0, "step": 14804 }, { "entropy": 1.687993158896764, "epoch": 1.6264041086484853, "grad_norm": 0.7989717721939087, "learning_rate": 3.5986417313257176e-06, "loss": 1.3912, "mean_token_accuracy": 0.659797266125679, "num_tokens": 2483734490.0, "step": 14805 }, { "entropy": 1.6549718777338664, "epoch": 1.6265139655598584, "grad_norm": 0.579478919506073, "learning_rate": 3.5977308885832297e-06, "loss": 1.4292, "mean_token_accuracy": 0.6463843236366907, "num_tokens": 2483947255.0, "step": 14806 }, { "entropy": 1.6915274957815807, "epoch": 1.6266238224712313, "grad_norm": 0.7029345035552979, "learning_rate": 3.596820280119221e-06, "loss": 1.5627, "mean_token_accuracy": 0.6520664145549139, "num_tokens": 2484087439.0, "step": 14807 }, { "entropy": 1.6860439380009968, "epoch": 1.6267336793826042, "grad_norm": 0.6037241816520691, "learning_rate": 3.5959099059625136e-06, "loss": 1.3567, "mean_token_accuracy": 0.6552727371454239, "num_tokens": 2484248258.0, "step": 14808 }, { "entropy": 1.6769340336322784, "epoch": 1.626843536293977, "grad_norm": 0.6906334757804871, "learning_rate": 3.594999766141922e-06, "loss": 1.4036, "mean_token_accuracy": 0.6545880983273188, "num_tokens": 2484416040.0, "step": 14809 }, { "entropy": 1.7643942634264629, "epoch": 1.62695339320535, "grad_norm": 0.7934665083885193, "learning_rate": 3.594089860686253e-06, "loss": 1.4969, "mean_token_accuracy": 0.6431198517481486, "num_tokens": 2484557942.0, "step": 14810 }, { "entropy": 1.679926613966624, "epoch": 1.627063250116723, "grad_norm": 0.5863533020019531, "learning_rate": 3.593180189624299e-06, "loss": 1.4085, "mean_token_accuracy": 0.6471660186847051, "num_tokens": 2484819829.0, "step": 14811 }, { "entropy": 1.7257270713647206, "epoch": 1.627173107028096, "grad_norm": 0.6526218056678772, "learning_rate": 3.5922707529848576e-06, "loss": 1.4658, "mean_token_accuracy": 0.6473792394002279, "num_tokens": 2485029927.0, "step": 14812 }, { "entropy": 1.7281257609526317, "epoch": 1.6272829639394688, "grad_norm": 0.7226946353912354, "learning_rate": 3.5913615507967057e-06, "loss": 1.2854, "mean_token_accuracy": 0.6790865163008372, "num_tokens": 2485168825.0, "step": 14813 }, { "entropy": 1.6933965583642323, "epoch": 1.627392820850842, "grad_norm": 0.6387641429901123, "learning_rate": 3.590452583088626e-06, "loss": 1.4405, "mean_token_accuracy": 0.6517203003168106, "num_tokens": 2485352424.0, "step": 14814 }, { "entropy": 1.745787501335144, "epoch": 1.6275026777622146, "grad_norm": 0.655708909034729, "learning_rate": 3.5895438498893827e-06, "loss": 1.4132, "mean_token_accuracy": 0.643621101975441, "num_tokens": 2485521153.0, "step": 14815 }, { "entropy": 1.7322950462500255, "epoch": 1.6276125346735877, "grad_norm": 0.7635506391525269, "learning_rate": 3.588635351227735e-06, "loss": 1.4825, "mean_token_accuracy": 0.6420500675837199, "num_tokens": 2485727288.0, "step": 14816 }, { "entropy": 1.663044144709905, "epoch": 1.6277223915849606, "grad_norm": 0.6754635572433472, "learning_rate": 3.5877270871324383e-06, "loss": 1.5849, "mean_token_accuracy": 0.6396665796637535, "num_tokens": 2485909269.0, "step": 14817 }, { "entropy": 1.661100705464681, "epoch": 1.6278322484963335, "grad_norm": 0.5776710510253906, "learning_rate": 3.586819057632245e-06, "loss": 1.3384, "mean_token_accuracy": 0.66503178079923, "num_tokens": 2486085108.0, "step": 14818 }, { "entropy": 1.7141460180282593, "epoch": 1.6279421054077066, "grad_norm": 0.6838655471801758, "learning_rate": 3.5859112627558823e-06, "loss": 1.3575, "mean_token_accuracy": 0.662881389260292, "num_tokens": 2486228224.0, "step": 14819 }, { "entropy": 1.698822170495987, "epoch": 1.6280519623190794, "grad_norm": 0.8585372567176819, "learning_rate": 3.585003702532087e-06, "loss": 1.3737, "mean_token_accuracy": 0.6647797971963882, "num_tokens": 2486380485.0, "step": 14820 }, { "entropy": 1.7368709444999695, "epoch": 1.6281618192304523, "grad_norm": 0.7684159874916077, "learning_rate": 3.5840963769895866e-06, "loss": 1.2543, "mean_token_accuracy": 0.6787616461515427, "num_tokens": 2486507958.0, "step": 14821 }, { "entropy": 1.6780678729216258, "epoch": 1.6282716761418252, "grad_norm": 0.6047248840332031, "learning_rate": 3.583189286157094e-06, "loss": 1.3225, "mean_token_accuracy": 0.6666500320037206, "num_tokens": 2486689124.0, "step": 14822 }, { "entropy": 1.723960777123769, "epoch": 1.628381533053198, "grad_norm": 0.6120347380638123, "learning_rate": 3.5822824300633153e-06, "loss": 1.3838, "mean_token_accuracy": 0.6574974805116653, "num_tokens": 2486823706.0, "step": 14823 }, { "entropy": 1.731412132581075, "epoch": 1.6284913899645712, "grad_norm": 0.5602653622627258, "learning_rate": 3.5813758087369577e-06, "loss": 1.3932, "mean_token_accuracy": 0.645255446434021, "num_tokens": 2487003576.0, "step": 14824 }, { "entropy": 1.738511284192403, "epoch": 1.628601246875944, "grad_norm": 0.7291231751441956, "learning_rate": 3.5804694222067117e-06, "loss": 1.3607, "mean_token_accuracy": 0.6644929597775141, "num_tokens": 2487179613.0, "step": 14825 }, { "entropy": 1.7071664134661357, "epoch": 1.628711103787317, "grad_norm": 0.596044659614563, "learning_rate": 3.579563270501266e-06, "loss": 1.4555, "mean_token_accuracy": 0.6338590929905573, "num_tokens": 2487378214.0, "step": 14826 }, { "entropy": 1.7025366922219594, "epoch": 1.62882096069869, "grad_norm": 0.862602710723877, "learning_rate": 3.5786573536493002e-06, "loss": 1.4171, "mean_token_accuracy": 0.6637825717528661, "num_tokens": 2487541639.0, "step": 14827 }, { "entropy": 1.7075651188691456, "epoch": 1.6289308176100628, "grad_norm": 0.731027364730835, "learning_rate": 3.5777516716794814e-06, "loss": 1.4465, "mean_token_accuracy": 0.6556923538446426, "num_tokens": 2487713071.0, "step": 14828 }, { "entropy": 1.6427714824676514, "epoch": 1.6290406745214359, "grad_norm": 0.5780958533287048, "learning_rate": 3.5768462246204793e-06, "loss": 1.3354, "mean_token_accuracy": 0.6602772623300552, "num_tokens": 2487904465.0, "step": 14829 }, { "entropy": 1.7856932580471039, "epoch": 1.6291505314328087, "grad_norm": 0.6738545894622803, "learning_rate": 3.575941012500952e-06, "loss": 1.4451, "mean_token_accuracy": 0.6467948655287424, "num_tokens": 2488101377.0, "step": 14830 }, { "entropy": 1.6973777413368225, "epoch": 1.6292603883441816, "grad_norm": 0.646508514881134, "learning_rate": 3.575036035349543e-06, "loss": 1.4545, "mean_token_accuracy": 0.6591275582710902, "num_tokens": 2488282493.0, "step": 14831 }, { "entropy": 1.6603109538555145, "epoch": 1.6293702452555547, "grad_norm": 0.6032806038856506, "learning_rate": 3.5741312931948973e-06, "loss": 1.4004, "mean_token_accuracy": 0.6521612008412679, "num_tokens": 2488501507.0, "step": 14832 }, { "entropy": 1.6811530391375225, "epoch": 1.6294801021669276, "grad_norm": 0.5795667767524719, "learning_rate": 3.573226786065652e-06, "loss": 1.426, "mean_token_accuracy": 0.6499424229065577, "num_tokens": 2488732884.0, "step": 14833 }, { "entropy": 1.758106917142868, "epoch": 1.6295899590783005, "grad_norm": 0.6564192771911621, "learning_rate": 3.5723225139904326e-06, "loss": 1.5641, "mean_token_accuracy": 0.6320732136567434, "num_tokens": 2488941510.0, "step": 14834 }, { "entropy": 1.6827127536137898, "epoch": 1.6296998159896734, "grad_norm": 0.8532228469848633, "learning_rate": 3.5714184769978564e-06, "loss": 1.4796, "mean_token_accuracy": 0.653532346089681, "num_tokens": 2489149280.0, "step": 14835 }, { "entropy": 1.6751657327016194, "epoch": 1.6298096729010463, "grad_norm": 0.6216030120849609, "learning_rate": 3.570514675116541e-06, "loss": 1.5362, "mean_token_accuracy": 0.6317654103040695, "num_tokens": 2489351067.0, "step": 14836 }, { "entropy": 1.7361950079600017, "epoch": 1.6299195298124194, "grad_norm": 0.670462965965271, "learning_rate": 3.569611108375085e-06, "loss": 1.3815, "mean_token_accuracy": 0.6552157799402872, "num_tokens": 2489508353.0, "step": 14837 }, { "entropy": 1.6183799505233765, "epoch": 1.6300293867237923, "grad_norm": 0.7788172960281372, "learning_rate": 3.568707776802093e-06, "loss": 1.2651, "mean_token_accuracy": 0.6860854128996531, "num_tokens": 2489659360.0, "step": 14838 }, { "entropy": 1.6797572473684947, "epoch": 1.6301392436351652, "grad_norm": 0.6599597930908203, "learning_rate": 3.567804680426149e-06, "loss": 1.6206, "mean_token_accuracy": 0.6344324350357056, "num_tokens": 2489862270.0, "step": 14839 }, { "entropy": 1.7235714693864186, "epoch": 1.6302491005465383, "grad_norm": 0.6941717267036438, "learning_rate": 3.5669018192758376e-06, "loss": 1.4494, "mean_token_accuracy": 0.644492988785108, "num_tokens": 2490076035.0, "step": 14840 }, { "entropy": 1.7475056151549022, "epoch": 1.630358957457911, "grad_norm": 0.8079373836517334, "learning_rate": 3.5659991933797335e-06, "loss": 1.2693, "mean_token_accuracy": 0.6746415694554647, "num_tokens": 2490202356.0, "step": 14841 }, { "entropy": 1.7727145949999492, "epoch": 1.630468814369284, "grad_norm": 0.8447114825248718, "learning_rate": 3.565096802766409e-06, "loss": 1.4037, "mean_token_accuracy": 0.6543415536483129, "num_tokens": 2490342399.0, "step": 14842 }, { "entropy": 1.6871120929718018, "epoch": 1.630578671280657, "grad_norm": 0.7047051191329956, "learning_rate": 3.564194647464416e-06, "loss": 1.335, "mean_token_accuracy": 0.6622759302457174, "num_tokens": 2490474991.0, "step": 14843 }, { "entropy": 1.7004227538903554, "epoch": 1.6306885281920298, "grad_norm": 0.7728781700134277, "learning_rate": 3.563292727502312e-06, "loss": 1.2688, "mean_token_accuracy": 0.679591124256452, "num_tokens": 2490595258.0, "step": 14844 }, { "entropy": 1.7138066987196605, "epoch": 1.630798385103403, "grad_norm": 0.6996949315071106, "learning_rate": 3.562391042908645e-06, "loss": 1.3455, "mean_token_accuracy": 0.6582550307114919, "num_tokens": 2490731251.0, "step": 14845 }, { "entropy": 1.667192538579305, "epoch": 1.6309082420147758, "grad_norm": 0.5813220143318176, "learning_rate": 3.5614895937119485e-06, "loss": 1.4651, "mean_token_accuracy": 0.6454577694336573, "num_tokens": 2490944539.0, "step": 14846 }, { "entropy": 1.624341497818629, "epoch": 1.6310180989261487, "grad_norm": 0.7384394407272339, "learning_rate": 3.5605883799407535e-06, "loss": 1.1494, "mean_token_accuracy": 0.6913396020730337, "num_tokens": 2491095006.0, "step": 14847 }, { "entropy": 1.6980106929938, "epoch": 1.6311279558375218, "grad_norm": 0.5377804040908813, "learning_rate": 3.559687401623586e-06, "loss": 1.3711, "mean_token_accuracy": 0.6495694518089294, "num_tokens": 2491351384.0, "step": 14848 }, { "entropy": 1.779253711303075, "epoch": 1.6312378127488945, "grad_norm": 0.7473645806312561, "learning_rate": 3.5587866587889576e-06, "loss": 1.3689, "mean_token_accuracy": 0.6602377941211065, "num_tokens": 2491513439.0, "step": 14849 }, { "entropy": 1.6981875896453857, "epoch": 1.6313476696602676, "grad_norm": 0.8267091512680054, "learning_rate": 3.5578861514653808e-06, "loss": 1.2869, "mean_token_accuracy": 0.6729562679926554, "num_tokens": 2491659420.0, "step": 14850 }, { "entropy": 1.738324244817098, "epoch": 1.6314575265716404, "grad_norm": 0.6099868416786194, "learning_rate": 3.5569858796813526e-06, "loss": 1.5184, "mean_token_accuracy": 0.6377961039543152, "num_tokens": 2491859689.0, "step": 14851 }, { "entropy": 1.6928378343582153, "epoch": 1.6315673834830133, "grad_norm": 0.8429316878318787, "learning_rate": 3.556085843465367e-06, "loss": 1.4247, "mean_token_accuracy": 0.6417907128731409, "num_tokens": 2492026241.0, "step": 14852 }, { "entropy": 1.7215720117092133, "epoch": 1.6316772403943864, "grad_norm": 0.5659279227256775, "learning_rate": 3.5551860428459083e-06, "loss": 1.3285, "mean_token_accuracy": 0.664972111582756, "num_tokens": 2492199044.0, "step": 14853 }, { "entropy": 1.67046320438385, "epoch": 1.631787097305759, "grad_norm": 0.669076681137085, "learning_rate": 3.554286477851461e-06, "loss": 1.3329, "mean_token_accuracy": 0.6556582550207773, "num_tokens": 2492357468.0, "step": 14854 }, { "entropy": 1.6809894442558289, "epoch": 1.6318969542171322, "grad_norm": 1.6321697235107422, "learning_rate": 3.5533871485104887e-06, "loss": 1.1329, "mean_token_accuracy": 0.6873187224070231, "num_tokens": 2492564644.0, "step": 14855 }, { "entropy": 1.7081284324328105, "epoch": 1.632006811128505, "grad_norm": 0.6694812774658203, "learning_rate": 3.5524880548514574e-06, "loss": 1.3172, "mean_token_accuracy": 0.6649055629968643, "num_tokens": 2492727367.0, "step": 14856 }, { "entropy": 1.7426902850468953, "epoch": 1.632116668039878, "grad_norm": 0.9611921310424805, "learning_rate": 3.551589196902824e-06, "loss": 1.5354, "mean_token_accuracy": 0.6367527097463608, "num_tokens": 2492905137.0, "step": 14857 }, { "entropy": 1.7176842490832012, "epoch": 1.632226524951251, "grad_norm": 0.7354887127876282, "learning_rate": 3.5506905746930365e-06, "loss": 1.3753, "mean_token_accuracy": 0.6750811090071996, "num_tokens": 2493042841.0, "step": 14858 }, { "entropy": 1.7373330096403758, "epoch": 1.632336381862624, "grad_norm": 0.6578879356384277, "learning_rate": 3.5497921882505345e-06, "loss": 1.4284, "mean_token_accuracy": 0.6463020741939545, "num_tokens": 2493188295.0, "step": 14859 }, { "entropy": 1.7101022799809773, "epoch": 1.6324462387739969, "grad_norm": 0.7568862438201904, "learning_rate": 3.548894037603754e-06, "loss": 1.3947, "mean_token_accuracy": 0.6567882696787516, "num_tokens": 2493300490.0, "step": 14860 }, { "entropy": 1.6923839151859283, "epoch": 1.63255609568537, "grad_norm": 0.7015122175216675, "learning_rate": 3.5479961227811176e-06, "loss": 1.2996, "mean_token_accuracy": 0.6799081216255823, "num_tokens": 2493421728.0, "step": 14861 }, { "entropy": 1.6776911318302155, "epoch": 1.6326659525967426, "grad_norm": 0.7158594727516174, "learning_rate": 3.547098443811048e-06, "loss": 1.4477, "mean_token_accuracy": 0.6565545201301575, "num_tokens": 2493557145.0, "step": 14862 }, { "entropy": 1.6888268689314525, "epoch": 1.6327758095081157, "grad_norm": 0.6423219442367554, "learning_rate": 3.546201000721955e-06, "loss": 1.3946, "mean_token_accuracy": 0.6625121484200159, "num_tokens": 2493720563.0, "step": 14863 }, { "entropy": 1.7296898762385051, "epoch": 1.6328856664194886, "grad_norm": 0.6093067526817322, "learning_rate": 3.5453037935422386e-06, "loss": 1.2773, "mean_token_accuracy": 0.6675838033358256, "num_tokens": 2493874867.0, "step": 14864 }, { "entropy": 1.720589945713679, "epoch": 1.6329955233308615, "grad_norm": 0.6853680610656738, "learning_rate": 3.544406822300301e-06, "loss": 1.4858, "mean_token_accuracy": 0.6388275722662607, "num_tokens": 2494059360.0, "step": 14865 }, { "entropy": 1.7451708912849426, "epoch": 1.6331053802422346, "grad_norm": 0.6423693895339966, "learning_rate": 3.543510087024527e-06, "loss": 1.3918, "mean_token_accuracy": 0.6528707345326742, "num_tokens": 2494217568.0, "step": 14866 }, { "entropy": 1.7308546602725983, "epoch": 1.6332152371536073, "grad_norm": 0.711121678352356, "learning_rate": 3.5426135877432964e-06, "loss": 1.3704, "mean_token_accuracy": 0.6558242936929067, "num_tokens": 2494360987.0, "step": 14867 }, { "entropy": 1.7218280136585236, "epoch": 1.6333250940649804, "grad_norm": 0.6833885908126831, "learning_rate": 3.541717324484989e-06, "loss": 1.3251, "mean_token_accuracy": 0.6657578895489374, "num_tokens": 2494512404.0, "step": 14868 }, { "entropy": 1.6832963228225708, "epoch": 1.6334349509763533, "grad_norm": 0.680909276008606, "learning_rate": 3.5408212972779637e-06, "loss": 1.5483, "mean_token_accuracy": 0.6394904057184855, "num_tokens": 2494693858.0, "step": 14869 }, { "entropy": 1.7123718361059825, "epoch": 1.6335448078877262, "grad_norm": 0.6566091179847717, "learning_rate": 3.5399255061505865e-06, "loss": 1.543, "mean_token_accuracy": 0.6367166439692179, "num_tokens": 2494928751.0, "step": 14870 }, { "entropy": 1.7312154173851013, "epoch": 1.6336546647990993, "grad_norm": 0.6933454871177673, "learning_rate": 3.5390299511312052e-06, "loss": 1.3882, "mean_token_accuracy": 0.6590891778469086, "num_tokens": 2495100100.0, "step": 14871 }, { "entropy": 1.6795762479305267, "epoch": 1.6337645217104722, "grad_norm": 0.6371995806694031, "learning_rate": 3.5381346322481615e-06, "loss": 1.4628, "mean_token_accuracy": 0.6527168452739716, "num_tokens": 2495293299.0, "step": 14872 }, { "entropy": 1.7581091423829396, "epoch": 1.633874378621845, "grad_norm": 0.7635604739189148, "learning_rate": 3.537239549529794e-06, "loss": 1.3626, "mean_token_accuracy": 0.6564153929551443, "num_tokens": 2495404583.0, "step": 14873 }, { "entropy": 1.6930834452311199, "epoch": 1.6339842355332181, "grad_norm": 0.668928861618042, "learning_rate": 3.536344703004437e-06, "loss": 1.4902, "mean_token_accuracy": 0.6442474573850632, "num_tokens": 2495642014.0, "step": 14874 }, { "entropy": 1.7224301397800446, "epoch": 1.6340940924445908, "grad_norm": 0.6915125846862793, "learning_rate": 3.535450092700402e-06, "loss": 1.5863, "mean_token_accuracy": 0.6436110337575277, "num_tokens": 2495828498.0, "step": 14875 }, { "entropy": 1.675351361433665, "epoch": 1.634203949355964, "grad_norm": 0.6921147704124451, "learning_rate": 3.5345557186460084e-06, "loss": 1.3715, "mean_token_accuracy": 0.6487573534250259, "num_tokens": 2495990677.0, "step": 14876 }, { "entropy": 1.6654540499051411, "epoch": 1.6343138062673368, "grad_norm": 0.6828057169914246, "learning_rate": 3.533661580869564e-06, "loss": 1.3833, "mean_token_accuracy": 0.6538653870423635, "num_tokens": 2496196017.0, "step": 14877 }, { "entropy": 1.654507319132487, "epoch": 1.6344236631787097, "grad_norm": 0.620732307434082, "learning_rate": 3.532767679399366e-06, "loss": 1.3135, "mean_token_accuracy": 0.671380952000618, "num_tokens": 2496363867.0, "step": 14878 }, { "entropy": 1.6607101559638977, "epoch": 1.6345335200900828, "grad_norm": 0.7271916270256042, "learning_rate": 3.5318740142637055e-06, "loss": 1.2748, "mean_token_accuracy": 0.6694884747266769, "num_tokens": 2496485499.0, "step": 14879 }, { "entropy": 1.680219570795695, "epoch": 1.6346433770014555, "grad_norm": 0.6058405041694641, "learning_rate": 3.530980585490868e-06, "loss": 1.2907, "mean_token_accuracy": 0.6739451040824255, "num_tokens": 2496627375.0, "step": 14880 }, { "entropy": 1.7118292550245922, "epoch": 1.6347532339128286, "grad_norm": 0.7250180244445801, "learning_rate": 3.5300873931091273e-06, "loss": 1.3839, "mean_token_accuracy": 0.6615385562181473, "num_tokens": 2496789160.0, "step": 14881 }, { "entropy": 1.6516647239526112, "epoch": 1.6348630908242014, "grad_norm": 0.6745825409889221, "learning_rate": 3.529194437146758e-06, "loss": 1.3025, "mean_token_accuracy": 0.6585622032483419, "num_tokens": 2496933961.0, "step": 14882 }, { "entropy": 1.7175203661123912, "epoch": 1.6349729477355743, "grad_norm": 0.7522205114364624, "learning_rate": 3.5283017176320165e-06, "loss": 1.4299, "mean_token_accuracy": 0.6582658936580023, "num_tokens": 2497087188.0, "step": 14883 }, { "entropy": 1.7287144362926483, "epoch": 1.6350828046469474, "grad_norm": 0.7147111892700195, "learning_rate": 3.5274092345931566e-06, "loss": 1.4834, "mean_token_accuracy": 0.6414237320423126, "num_tokens": 2497221997.0, "step": 14884 }, { "entropy": 1.7016437649726868, "epoch": 1.6351926615583203, "grad_norm": 0.6433010101318359, "learning_rate": 3.526516988058429e-06, "loss": 1.5277, "mean_token_accuracy": 0.6294675916433334, "num_tokens": 2497440321.0, "step": 14885 }, { "entropy": 1.667111227909724, "epoch": 1.6353025184696932, "grad_norm": 0.6358702778816223, "learning_rate": 3.525624978056075e-06, "loss": 1.3189, "mean_token_accuracy": 0.6666462322076162, "num_tokens": 2497632696.0, "step": 14886 }, { "entropy": 1.7168804009755452, "epoch": 1.6354123753810663, "grad_norm": 0.8079360723495483, "learning_rate": 3.5247332046143162e-06, "loss": 1.4001, "mean_token_accuracy": 0.6580467720826467, "num_tokens": 2497763143.0, "step": 14887 }, { "entropy": 1.7009617785612743, "epoch": 1.635522232292439, "grad_norm": 0.6826213598251343, "learning_rate": 3.523841667761384e-06, "loss": 1.4124, "mean_token_accuracy": 0.6428949236869812, "num_tokens": 2497924766.0, "step": 14888 }, { "entropy": 1.6994600693384807, "epoch": 1.635632089203812, "grad_norm": 0.7632419466972351, "learning_rate": 3.522950367525497e-06, "loss": 1.3133, "mean_token_accuracy": 0.6738540679216385, "num_tokens": 2498058132.0, "step": 14889 }, { "entropy": 1.7564916412035625, "epoch": 1.635741946115185, "grad_norm": 0.6684771776199341, "learning_rate": 3.522059303934862e-06, "loss": 1.4357, "mean_token_accuracy": 0.6621495882670084, "num_tokens": 2498215496.0, "step": 14890 }, { "entropy": 1.7205718557039897, "epoch": 1.6358518030265579, "grad_norm": 0.7321786880493164, "learning_rate": 3.5211684770176777e-06, "loss": 1.4398, "mean_token_accuracy": 0.6541054844856262, "num_tokens": 2498439505.0, "step": 14891 }, { "entropy": 1.67322771747907, "epoch": 1.635961659937931, "grad_norm": 0.7148532867431641, "learning_rate": 3.5202778868021423e-06, "loss": 1.3382, "mean_token_accuracy": 0.667180672287941, "num_tokens": 2498635826.0, "step": 14892 }, { "entropy": 1.7368038892745972, "epoch": 1.6360715168493036, "grad_norm": 0.780015230178833, "learning_rate": 3.5193875333164398e-06, "loss": 1.4777, "mean_token_accuracy": 0.6418419082959493, "num_tokens": 2498823186.0, "step": 14893 }, { "entropy": 1.6647245784600575, "epoch": 1.6361813737606767, "grad_norm": 0.6493139863014221, "learning_rate": 3.518497416588753e-06, "loss": 1.4324, "mean_token_accuracy": 0.6691676676273346, "num_tokens": 2499000974.0, "step": 14894 }, { "entropy": 1.7569353878498077, "epoch": 1.6362912306720496, "grad_norm": 0.6052994728088379, "learning_rate": 3.517607536647253e-06, "loss": 1.4793, "mean_token_accuracy": 0.6535971015691757, "num_tokens": 2499187968.0, "step": 14895 }, { "entropy": 1.6488630374272664, "epoch": 1.6364010875834225, "grad_norm": 0.6986522078514099, "learning_rate": 3.5167178935200996e-06, "loss": 1.389, "mean_token_accuracy": 0.6525800079107285, "num_tokens": 2499352714.0, "step": 14896 }, { "entropy": 1.7244251767794292, "epoch": 1.6365109444947956, "grad_norm": 0.720429003238678, "learning_rate": 3.515828487235453e-06, "loss": 1.5057, "mean_token_accuracy": 0.661791185537974, "num_tokens": 2499494824.0, "step": 14897 }, { "entropy": 1.6545683940251668, "epoch": 1.6366208014061685, "grad_norm": 0.7149010300636292, "learning_rate": 3.5149393178214663e-06, "loss": 1.3648, "mean_token_accuracy": 0.6682617863019308, "num_tokens": 2499651766.0, "step": 14898 }, { "entropy": 1.7152326206366222, "epoch": 1.6367306583175414, "grad_norm": 0.6399795413017273, "learning_rate": 3.5140503853062734e-06, "loss": 1.3521, "mean_token_accuracy": 0.6598746081193289, "num_tokens": 2499796383.0, "step": 14899 }, { "entropy": 1.7084354956944783, "epoch": 1.6368405152289145, "grad_norm": 0.693622887134552, "learning_rate": 3.5131616897180132e-06, "loss": 1.5177, "mean_token_accuracy": 0.641907716790835, "num_tokens": 2499967826.0, "step": 14900 }, { "entropy": 1.763914128144582, "epoch": 1.6369503721402872, "grad_norm": 0.7751642465591431, "learning_rate": 3.5122732310848124e-06, "loss": 1.3232, "mean_token_accuracy": 0.658448706070582, "num_tokens": 2500085030.0, "step": 14901 }, { "entropy": 1.6992384692033131, "epoch": 1.6370602290516603, "grad_norm": 0.6677964925765991, "learning_rate": 3.5113850094347906e-06, "loss": 1.4129, "mean_token_accuracy": 0.6515720884005228, "num_tokens": 2500323347.0, "step": 14902 }, { "entropy": 1.65836563706398, "epoch": 1.6371700859630332, "grad_norm": 0.6140561103820801, "learning_rate": 3.5104970247960567e-06, "loss": 1.3939, "mean_token_accuracy": 0.6596829444169998, "num_tokens": 2500533957.0, "step": 14903 }, { "entropy": 1.764195293188095, "epoch": 1.637279942874406, "grad_norm": 0.7463130354881287, "learning_rate": 3.50960927719672e-06, "loss": 1.5801, "mean_token_accuracy": 0.636942724386851, "num_tokens": 2500723855.0, "step": 14904 }, { "entropy": 1.6985965470472972, "epoch": 1.6373897997857791, "grad_norm": 0.6363186240196228, "learning_rate": 3.508721766664872e-06, "loss": 1.3983, "mean_token_accuracy": 0.6462677617867788, "num_tokens": 2500898305.0, "step": 14905 }, { "entropy": 1.7432740926742554, "epoch": 1.6374996566971518, "grad_norm": 0.6543776988983154, "learning_rate": 3.5078344932286055e-06, "loss": 1.3426, "mean_token_accuracy": 0.6671454260746638, "num_tokens": 2501057739.0, "step": 14906 }, { "entropy": 1.640707751115163, "epoch": 1.637609513608525, "grad_norm": 0.6563522219657898, "learning_rate": 3.506947456916002e-06, "loss": 1.477, "mean_token_accuracy": 0.6595013240973154, "num_tokens": 2501271194.0, "step": 14907 }, { "entropy": 1.7347827851772308, "epoch": 1.6377193705198978, "grad_norm": 0.5993261337280273, "learning_rate": 3.5060606577551325e-06, "loss": 1.4884, "mean_token_accuracy": 0.6420771131912867, "num_tokens": 2501480540.0, "step": 14908 }, { "entropy": 1.7362710138161976, "epoch": 1.6378292274312707, "grad_norm": 0.6462193727493286, "learning_rate": 3.5051740957740666e-06, "loss": 1.5208, "mean_token_accuracy": 0.6494198342164358, "num_tokens": 2501713196.0, "step": 14909 }, { "entropy": 1.7323419352372487, "epoch": 1.6379390843426438, "grad_norm": 0.6800756454467773, "learning_rate": 3.504287771000868e-06, "loss": 1.3642, "mean_token_accuracy": 0.6693668713172277, "num_tokens": 2501875419.0, "step": 14910 }, { "entropy": 1.7118912140528362, "epoch": 1.6380489412540167, "grad_norm": 0.6431507468223572, "learning_rate": 3.5034016834635787e-06, "loss": 1.3699, "mean_token_accuracy": 0.6649849017461141, "num_tokens": 2502026670.0, "step": 14911 }, { "entropy": 1.6773951450983684, "epoch": 1.6381587981653896, "grad_norm": 0.6200417876243591, "learning_rate": 3.5025158331902488e-06, "loss": 1.3204, "mean_token_accuracy": 0.6617454985777537, "num_tokens": 2502200495.0, "step": 14912 }, { "entropy": 1.6460547248522441, "epoch": 1.6382686550767627, "grad_norm": 0.7539404630661011, "learning_rate": 3.501630220208916e-06, "loss": 1.2843, "mean_token_accuracy": 0.6683847606182098, "num_tokens": 2502345107.0, "step": 14913 }, { "entropy": 1.7317273020744324, "epoch": 1.6383785119881353, "grad_norm": 0.7395991086959839, "learning_rate": 3.500744844547608e-06, "loss": 1.2899, "mean_token_accuracy": 0.6693265736103058, "num_tokens": 2502503992.0, "step": 14914 }, { "entropy": 1.6876648664474487, "epoch": 1.6384883688995084, "grad_norm": 0.778742790222168, "learning_rate": 3.4998597062343443e-06, "loss": 1.2905, "mean_token_accuracy": 0.6624922255674998, "num_tokens": 2502666223.0, "step": 14915 }, { "entropy": 1.6516147553920746, "epoch": 1.6385982258108813, "grad_norm": 0.722156822681427, "learning_rate": 3.498974805297144e-06, "loss": 1.3988, "mean_token_accuracy": 0.657736748456955, "num_tokens": 2502847645.0, "step": 14916 }, { "entropy": 1.7666858335336049, "epoch": 1.6387080827222542, "grad_norm": 0.773971438407898, "learning_rate": 3.4980901417640078e-06, "loss": 1.4984, "mean_token_accuracy": 0.6684010674556097, "num_tokens": 2502969638.0, "step": 14917 }, { "entropy": 1.7049271663029988, "epoch": 1.6388179396336273, "grad_norm": 0.7004063725471497, "learning_rate": 3.4972057156629407e-06, "loss": 1.5013, "mean_token_accuracy": 0.6599620431661606, "num_tokens": 2503096630.0, "step": 14918 }, { "entropy": 1.7267379264036815, "epoch": 1.638927796545, "grad_norm": 0.7648240327835083, "learning_rate": 3.4963215270219332e-06, "loss": 1.4041, "mean_token_accuracy": 0.6580546349287033, "num_tokens": 2503215160.0, "step": 14919 }, { "entropy": 1.7685744762420654, "epoch": 1.639037653456373, "grad_norm": 0.6759129762649536, "learning_rate": 3.495437575868964e-06, "loss": 1.2746, "mean_token_accuracy": 0.6785066773494085, "num_tokens": 2503327731.0, "step": 14920 }, { "entropy": 1.6827283104260762, "epoch": 1.639147510367746, "grad_norm": 0.7047680020332336, "learning_rate": 3.4945538622320147e-06, "loss": 1.3335, "mean_token_accuracy": 0.6698191513617834, "num_tokens": 2503496301.0, "step": 14921 }, { "entropy": 1.6511322756608326, "epoch": 1.6392573672791189, "grad_norm": 0.6691418290138245, "learning_rate": 3.4936703861390587e-06, "loss": 1.2096, "mean_token_accuracy": 0.6906089385350546, "num_tokens": 2503618169.0, "step": 14922 }, { "entropy": 1.6849328478177388, "epoch": 1.639367224190492, "grad_norm": 0.5966286659240723, "learning_rate": 3.4927871476180477e-06, "loss": 1.3157, "mean_token_accuracy": 0.6642686674992243, "num_tokens": 2503762147.0, "step": 14923 }, { "entropy": 1.695220708847046, "epoch": 1.6394770811018649, "grad_norm": 0.5794353485107422, "learning_rate": 3.4919041466969417e-06, "loss": 1.4488, "mean_token_accuracy": 0.6457569946845373, "num_tokens": 2503993523.0, "step": 14924 }, { "entropy": 1.7141393721103668, "epoch": 1.6395869380132377, "grad_norm": 0.6625608801841736, "learning_rate": 3.4910213834036848e-06, "loss": 1.3348, "mean_token_accuracy": 0.6721012790997823, "num_tokens": 2504177429.0, "step": 14925 }, { "entropy": 1.6831740339597066, "epoch": 1.6396967949246108, "grad_norm": 0.6153568029403687, "learning_rate": 3.4901388577662197e-06, "loss": 1.3432, "mean_token_accuracy": 0.6737964401642481, "num_tokens": 2504376626.0, "step": 14926 }, { "entropy": 1.6954569518566132, "epoch": 1.6398066518359835, "grad_norm": 0.6646896600723267, "learning_rate": 3.489256569812477e-06, "loss": 1.2977, "mean_token_accuracy": 0.6855068306128184, "num_tokens": 2504515974.0, "step": 14927 }, { "entropy": 1.6713591814041138, "epoch": 1.6399165087473566, "grad_norm": 0.7084295153617859, "learning_rate": 3.4883745195703754e-06, "loss": 1.3215, "mean_token_accuracy": 0.672945981224378, "num_tokens": 2504673537.0, "step": 14928 }, { "entropy": 1.7667873203754425, "epoch": 1.6400263656587295, "grad_norm": 0.7727949619293213, "learning_rate": 3.487492707067836e-06, "loss": 1.5748, "mean_token_accuracy": 0.6411859591801962, "num_tokens": 2504883359.0, "step": 14929 }, { "entropy": 1.6953539848327637, "epoch": 1.6401362225701024, "grad_norm": 0.6839701533317566, "learning_rate": 3.486611132332772e-06, "loss": 1.3055, "mean_token_accuracy": 0.6642041752735773, "num_tokens": 2505002882.0, "step": 14930 }, { "entropy": 1.7143741349379222, "epoch": 1.6402460794814755, "grad_norm": 0.6058863997459412, "learning_rate": 3.485729795393075e-06, "loss": 1.4816, "mean_token_accuracy": 0.6417601952950159, "num_tokens": 2505217245.0, "step": 14931 }, { "entropy": 1.7433338264624278, "epoch": 1.6403559363928482, "grad_norm": 0.6095753312110901, "learning_rate": 3.484848696276645e-06, "loss": 1.4746, "mean_token_accuracy": 0.6288290123144785, "num_tokens": 2505482140.0, "step": 14932 }, { "entropy": 1.715920130411784, "epoch": 1.6404657933042213, "grad_norm": 0.7610313296318054, "learning_rate": 3.4839678350113688e-06, "loss": 1.3884, "mean_token_accuracy": 0.6634253362814585, "num_tokens": 2505640431.0, "step": 14933 }, { "entropy": 1.6781785488128662, "epoch": 1.6405756502155942, "grad_norm": 0.8470014333724976, "learning_rate": 3.4830872116251235e-06, "loss": 1.5552, "mean_token_accuracy": 0.6412830402453741, "num_tokens": 2505826234.0, "step": 14934 }, { "entropy": 1.7671978374322255, "epoch": 1.640685507126967, "grad_norm": 0.8706827163696289, "learning_rate": 3.4822068261457785e-06, "loss": 1.504, "mean_token_accuracy": 0.6506080453594526, "num_tokens": 2505955616.0, "step": 14935 }, { "entropy": 1.7093442579110463, "epoch": 1.6407953640383401, "grad_norm": 0.6982713341712952, "learning_rate": 3.4813266786012024e-06, "loss": 1.2674, "mean_token_accuracy": 0.6796736617883047, "num_tokens": 2506098680.0, "step": 14936 }, { "entropy": 1.676591416200002, "epoch": 1.640905220949713, "grad_norm": 0.641269326210022, "learning_rate": 3.480446769019248e-06, "loss": 1.3135, "mean_token_accuracy": 0.674098422129949, "num_tokens": 2506228004.0, "step": 14937 }, { "entropy": 1.7529121339321136, "epoch": 1.641015077861086, "grad_norm": 0.7853704690933228, "learning_rate": 3.4795670974277657e-06, "loss": 1.5918, "mean_token_accuracy": 0.6261871109406153, "num_tokens": 2506412039.0, "step": 14938 }, { "entropy": 1.7120693922042847, "epoch": 1.641124934772459, "grad_norm": 0.6365554332733154, "learning_rate": 3.478687663854596e-06, "loss": 1.4076, "mean_token_accuracy": 0.6571402897437414, "num_tokens": 2506587078.0, "step": 14939 }, { "entropy": 1.7732374270757039, "epoch": 1.6412347916838317, "grad_norm": 0.7040333151817322, "learning_rate": 3.4778084683275703e-06, "loss": 1.4869, "mean_token_accuracy": 0.6406953384478887, "num_tokens": 2506742349.0, "step": 14940 }, { "entropy": 1.7779802978038788, "epoch": 1.6413446485952048, "grad_norm": 0.8952456712722778, "learning_rate": 3.4769295108745177e-06, "loss": 1.5624, "mean_token_accuracy": 0.6528476029634476, "num_tokens": 2506869120.0, "step": 14941 }, { "entropy": 1.7203821142514546, "epoch": 1.6414545055065777, "grad_norm": 0.6606357097625732, "learning_rate": 3.47605079152326e-06, "loss": 1.2624, "mean_token_accuracy": 0.670018677910169, "num_tokens": 2506989406.0, "step": 14942 }, { "entropy": 1.708219975233078, "epoch": 1.6415643624179506, "grad_norm": 0.7178025841712952, "learning_rate": 3.4751723103016e-06, "loss": 1.3562, "mean_token_accuracy": 0.6561354100704193, "num_tokens": 2507122848.0, "step": 14943 }, { "entropy": 1.7271239757537842, "epoch": 1.6416742193293237, "grad_norm": 0.7366745471954346, "learning_rate": 3.4742940672373464e-06, "loss": 1.3486, "mean_token_accuracy": 0.6560288916031519, "num_tokens": 2507321375.0, "step": 14944 }, { "entropy": 1.7078354159990947, "epoch": 1.6417840762406963, "grad_norm": 0.6426877975463867, "learning_rate": 3.473416062358296e-06, "loss": 1.3707, "mean_token_accuracy": 0.665172666311264, "num_tokens": 2507486772.0, "step": 14945 }, { "entropy": 1.7115601897239685, "epoch": 1.6418939331520694, "grad_norm": 0.7070155143737793, "learning_rate": 3.472538295692235e-06, "loss": 1.3945, "mean_token_accuracy": 0.6624555140733719, "num_tokens": 2507685516.0, "step": 14946 }, { "entropy": 1.7395752469698589, "epoch": 1.6420037900634423, "grad_norm": 0.8616496920585632, "learning_rate": 3.4716607672669435e-06, "loss": 1.4793, "mean_token_accuracy": 0.6566527982552847, "num_tokens": 2507832761.0, "step": 14947 }, { "entropy": 1.670799434185028, "epoch": 1.6421136469748152, "grad_norm": 0.7337598204612732, "learning_rate": 3.4707834771101985e-06, "loss": 1.4037, "mean_token_accuracy": 0.6581169764200846, "num_tokens": 2508037225.0, "step": 14948 }, { "entropy": 1.654303212960561, "epoch": 1.6422235038861883, "grad_norm": 0.8170070052146912, "learning_rate": 3.4699064252497616e-06, "loss": 1.4118, "mean_token_accuracy": 0.6585876593987147, "num_tokens": 2508185283.0, "step": 14949 }, { "entropy": 1.6956948439280193, "epoch": 1.6423333607975612, "grad_norm": 0.6304458975791931, "learning_rate": 3.469029611713395e-06, "loss": 1.349, "mean_token_accuracy": 0.6663823227087656, "num_tokens": 2508349025.0, "step": 14950 }, { "entropy": 1.6847296555836995, "epoch": 1.642443217708934, "grad_norm": 0.7351884245872498, "learning_rate": 3.4681530365288484e-06, "loss": 1.3056, "mean_token_accuracy": 0.674777110417684, "num_tokens": 2508469488.0, "step": 14951 }, { "entropy": 1.7506476143995922, "epoch": 1.6425530746203072, "grad_norm": 0.7597024440765381, "learning_rate": 3.4672766997238618e-06, "loss": 1.4348, "mean_token_accuracy": 0.6497123142083486, "num_tokens": 2508616481.0, "step": 14952 }, { "entropy": 1.715817630290985, "epoch": 1.6426629315316799, "grad_norm": 0.6161386370658875, "learning_rate": 3.4664006013261733e-06, "loss": 1.4003, "mean_token_accuracy": 0.6562450776497523, "num_tokens": 2508774763.0, "step": 14953 }, { "entropy": 1.7526540557543437, "epoch": 1.642772788443053, "grad_norm": 0.7678797841072083, "learning_rate": 3.465524741363515e-06, "loss": 1.4103, "mean_token_accuracy": 0.6473128894964854, "num_tokens": 2508964757.0, "step": 14954 }, { "entropy": 1.6675111552079518, "epoch": 1.6428826453544259, "grad_norm": 0.6056217551231384, "learning_rate": 3.464649119863599e-06, "loss": 1.3892, "mean_token_accuracy": 0.6614933907985687, "num_tokens": 2509142762.0, "step": 14955 }, { "entropy": 1.7322762807210286, "epoch": 1.6429925022657987, "grad_norm": 0.711068868637085, "learning_rate": 3.4637737368541436e-06, "loss": 1.4018, "mean_token_accuracy": 0.6589230199654897, "num_tokens": 2509286834.0, "step": 14956 }, { "entropy": 1.6484521726767223, "epoch": 1.6431023591771718, "grad_norm": 0.6235215663909912, "learning_rate": 3.462898592362855e-06, "loss": 1.494, "mean_token_accuracy": 0.6627842883268992, "num_tokens": 2509473391.0, "step": 14957 }, { "entropy": 1.7106922964255016, "epoch": 1.6432122160885445, "grad_norm": 0.6999409794807434, "learning_rate": 3.4620236864174308e-06, "loss": 1.3157, "mean_token_accuracy": 0.6589783877134323, "num_tokens": 2509587763.0, "step": 14958 }, { "entropy": 1.67958868543307, "epoch": 1.6433220729999176, "grad_norm": 0.6608509421348572, "learning_rate": 3.4611490190455566e-06, "loss": 1.2957, "mean_token_accuracy": 0.6818757752577463, "num_tokens": 2509728836.0, "step": 14959 }, { "entropy": 1.6971628268559773, "epoch": 1.6434319299112905, "grad_norm": 0.7482900023460388, "learning_rate": 3.460274590274922e-06, "loss": 1.3908, "mean_token_accuracy": 0.6718897273143133, "num_tokens": 2509872917.0, "step": 14960 }, { "entropy": 1.650085061788559, "epoch": 1.6435417868226634, "grad_norm": 0.7086712718009949, "learning_rate": 3.4594004001331964e-06, "loss": 1.3207, "mean_token_accuracy": 0.6673696339130402, "num_tokens": 2510005891.0, "step": 14961 }, { "entropy": 1.7292489111423492, "epoch": 1.6436516437340365, "grad_norm": 0.7552675008773804, "learning_rate": 3.458526448648053e-06, "loss": 1.3219, "mean_token_accuracy": 0.6736279179652532, "num_tokens": 2510168802.0, "step": 14962 }, { "entropy": 1.6483072837193806, "epoch": 1.6437615006454094, "grad_norm": 0.74481600522995, "learning_rate": 3.457652735847148e-06, "loss": 1.3123, "mean_token_accuracy": 0.6694979121287664, "num_tokens": 2510308210.0, "step": 14963 }, { "entropy": 1.6588951746622722, "epoch": 1.6438713575567823, "grad_norm": 0.6103244423866272, "learning_rate": 3.456779261758134e-06, "loss": 1.3824, "mean_token_accuracy": 0.6662068615357081, "num_tokens": 2510492255.0, "step": 14964 }, { "entropy": 1.732719083627065, "epoch": 1.6439812144681554, "grad_norm": 0.7661134004592896, "learning_rate": 3.455906026408658e-06, "loss": 1.518, "mean_token_accuracy": 0.6458713908990225, "num_tokens": 2510699730.0, "step": 14965 }, { "entropy": 1.680096020301183, "epoch": 1.644091071379528, "grad_norm": 0.6338300704956055, "learning_rate": 3.45503302982636e-06, "loss": 1.4193, "mean_token_accuracy": 0.6609679808219274, "num_tokens": 2510884193.0, "step": 14966 }, { "entropy": 1.7061445514361064, "epoch": 1.6442009282909011, "grad_norm": 0.7027104496955872, "learning_rate": 3.4541602720388633e-06, "loss": 1.3129, "mean_token_accuracy": 0.6661281039317449, "num_tokens": 2511064612.0, "step": 14967 }, { "entropy": 1.7287393510341644, "epoch": 1.644310785202274, "grad_norm": 0.6110780239105225, "learning_rate": 3.453287753073793e-06, "loss": 1.3438, "mean_token_accuracy": 0.673071970542272, "num_tokens": 2511225799.0, "step": 14968 }, { "entropy": 1.7669932544231415, "epoch": 1.644420642113647, "grad_norm": 0.7104088068008423, "learning_rate": 3.452415472958767e-06, "loss": 1.5157, "mean_token_accuracy": 0.6437924156586329, "num_tokens": 2511418847.0, "step": 14969 }, { "entropy": 1.720796098311742, "epoch": 1.64453049902502, "grad_norm": 0.6525430679321289, "learning_rate": 3.4515434317213904e-06, "loss": 1.2248, "mean_token_accuracy": 0.6795699944098791, "num_tokens": 2511526638.0, "step": 14970 }, { "entropy": 1.7100069324175518, "epoch": 1.6446403559363927, "grad_norm": 0.7813405990600586, "learning_rate": 3.4506716293892614e-06, "loss": 1.2652, "mean_token_accuracy": 0.6727269490559896, "num_tokens": 2511659416.0, "step": 14971 }, { "entropy": 1.7985884646574657, "epoch": 1.6447502128477658, "grad_norm": 0.8207912445068359, "learning_rate": 3.4498000659899745e-06, "loss": 1.4898, "mean_token_accuracy": 0.6614548414945602, "num_tokens": 2511868418.0, "step": 14972 }, { "entropy": 1.6758454938729603, "epoch": 1.6448600697591387, "grad_norm": 0.6920216679573059, "learning_rate": 3.4489287415511107e-06, "loss": 1.2647, "mean_token_accuracy": 0.676102747519811, "num_tokens": 2511999213.0, "step": 14973 }, { "entropy": 1.7158870200316112, "epoch": 1.6449699266705116, "grad_norm": 2.2388501167297363, "learning_rate": 3.4480576561002533e-06, "loss": 1.3928, "mean_token_accuracy": 0.6611330558856329, "num_tokens": 2512146843.0, "step": 14974 }, { "entropy": 1.7090627551078796, "epoch": 1.6450797835818847, "grad_norm": 0.8154202699661255, "learning_rate": 3.4471868096649676e-06, "loss": 1.3153, "mean_token_accuracy": 0.6673569430907568, "num_tokens": 2512297770.0, "step": 14975 }, { "entropy": 1.6612287263075511, "epoch": 1.6451896404932576, "grad_norm": 0.7027810215950012, "learning_rate": 3.4463162022728137e-06, "loss": 1.3933, "mean_token_accuracy": 0.6641417344411215, "num_tokens": 2512435108.0, "step": 14976 }, { "entropy": 1.6808200577894847, "epoch": 1.6452994974046304, "grad_norm": 0.7793719172477722, "learning_rate": 3.4454458339513487e-06, "loss": 1.2633, "mean_token_accuracy": 0.6780805687109629, "num_tokens": 2512623707.0, "step": 14977 }, { "entropy": 1.7211743195851643, "epoch": 1.6454093543160035, "grad_norm": 1.0265836715698242, "learning_rate": 3.4445757047281226e-06, "loss": 1.3302, "mean_token_accuracy": 0.6751032521327337, "num_tokens": 2512792244.0, "step": 14978 }, { "entropy": 1.7436003684997559, "epoch": 1.6455192112273762, "grad_norm": 0.688589870929718, "learning_rate": 3.443705814630666e-06, "loss": 1.3625, "mean_token_accuracy": 0.6572001427412033, "num_tokens": 2512925361.0, "step": 14979 }, { "entropy": 1.7435889144738514, "epoch": 1.6456290681387493, "grad_norm": 3.754509210586548, "learning_rate": 3.4428361636865167e-06, "loss": 1.1535, "mean_token_accuracy": 0.6706924239794413, "num_tokens": 2513119666.0, "step": 14980 }, { "entropy": 1.7060795525709789, "epoch": 1.6457389250501222, "grad_norm": 0.5898798108100891, "learning_rate": 3.441966751923199e-06, "loss": 1.489, "mean_token_accuracy": 0.6470472464958826, "num_tokens": 2513302051.0, "step": 14981 }, { "entropy": 1.7448813021183014, "epoch": 1.645848781961495, "grad_norm": 0.7016264796257019, "learning_rate": 3.441097579368228e-06, "loss": 1.4278, "mean_token_accuracy": 0.6593179255723953, "num_tokens": 2513439223.0, "step": 14982 }, { "entropy": 1.7329972485701244, "epoch": 1.6459586388728682, "grad_norm": 0.72443687915802, "learning_rate": 3.440228646049112e-06, "loss": 1.4211, "mean_token_accuracy": 0.6588300516208013, "num_tokens": 2513578549.0, "step": 14983 }, { "entropy": 1.7184851070245106, "epoch": 1.646068495784241, "grad_norm": 0.7031991481781006, "learning_rate": 3.439359951993351e-06, "loss": 1.4099, "mean_token_accuracy": 0.6423845837513605, "num_tokens": 2513802978.0, "step": 14984 }, { "entropy": 1.7713424662748973, "epoch": 1.646178352695614, "grad_norm": 0.7661514282226562, "learning_rate": 3.438491497228441e-06, "loss": 1.3603, "mean_token_accuracy": 0.6532770196596781, "num_tokens": 2513909884.0, "step": 14985 }, { "entropy": 1.708994189898173, "epoch": 1.6462882096069869, "grad_norm": 0.7131803035736084, "learning_rate": 3.4376232817818724e-06, "loss": 1.4916, "mean_token_accuracy": 0.6488902270793915, "num_tokens": 2514101465.0, "step": 14986 }, { "entropy": 1.7118847767512004, "epoch": 1.6463980665183597, "grad_norm": 0.5871043801307678, "learning_rate": 3.4367553056811143e-06, "loss": 1.3621, "mean_token_accuracy": 0.6566170553366343, "num_tokens": 2514282837.0, "step": 14987 }, { "entropy": 1.6855728328227997, "epoch": 1.6465079234297328, "grad_norm": 0.6960379481315613, "learning_rate": 3.4358875689536424e-06, "loss": 1.2995, "mean_token_accuracy": 0.6776407758394877, "num_tokens": 2514466067.0, "step": 14988 }, { "entropy": 1.7425021131833394, "epoch": 1.6466177803411057, "grad_norm": 0.6158261299133301, "learning_rate": 3.435020071626923e-06, "loss": 1.5397, "mean_token_accuracy": 0.6305239746967951, "num_tokens": 2514682417.0, "step": 14989 }, { "entropy": 1.7520319521427155, "epoch": 1.6467276372524786, "grad_norm": 0.8240329623222351, "learning_rate": 3.4341528137284097e-06, "loss": 1.3623, "mean_token_accuracy": 0.6614614178737005, "num_tokens": 2514865345.0, "step": 14990 }, { "entropy": 1.660654256741206, "epoch": 1.6468374941638517, "grad_norm": 0.7162386775016785, "learning_rate": 3.433285795285548e-06, "loss": 1.2122, "mean_token_accuracy": 0.6758624712626139, "num_tokens": 2514996123.0, "step": 14991 }, { "entropy": 1.7671376864115398, "epoch": 1.6469473510752244, "grad_norm": 0.73747718334198, "learning_rate": 3.432419016325784e-06, "loss": 1.5858, "mean_token_accuracy": 0.6482378343741099, "num_tokens": 2515213008.0, "step": 14992 }, { "entropy": 1.7469678024450939, "epoch": 1.6470572079865975, "grad_norm": 0.7026829719543457, "learning_rate": 3.431552476876545e-06, "loss": 1.5166, "mean_token_accuracy": 0.6324778149525324, "num_tokens": 2515397718.0, "step": 14993 }, { "entropy": 1.6981361210346222, "epoch": 1.6471670648979704, "grad_norm": 0.6470394134521484, "learning_rate": 3.4306861769652634e-06, "loss": 1.4181, "mean_token_accuracy": 0.6427850276231766, "num_tokens": 2515598794.0, "step": 14994 }, { "entropy": 1.6396251320838928, "epoch": 1.6472769218093433, "grad_norm": 0.6603129506111145, "learning_rate": 3.4298201166193512e-06, "loss": 1.2424, "mean_token_accuracy": 0.6694211512804031, "num_tokens": 2515725829.0, "step": 14995 }, { "entropy": 1.6787129143873851, "epoch": 1.6473867787207164, "grad_norm": 0.7073856592178345, "learning_rate": 3.4289542958662212e-06, "loss": 1.3735, "mean_token_accuracy": 0.6567325393358866, "num_tokens": 2515892679.0, "step": 14996 }, { "entropy": 1.6887877583503723, "epoch": 1.6474966356320893, "grad_norm": 0.7140524983406067, "learning_rate": 3.428088714733274e-06, "loss": 1.3827, "mean_token_accuracy": 0.6687111059824625, "num_tokens": 2516054075.0, "step": 14997 }, { "entropy": 1.6639872093995411, "epoch": 1.6476064925434621, "grad_norm": 0.6772821545600891, "learning_rate": 3.4272233732479134e-06, "loss": 1.344, "mean_token_accuracy": 0.6666690111160278, "num_tokens": 2516209836.0, "step": 14998 }, { "entropy": 1.6863376994927723, "epoch": 1.647716349454835, "grad_norm": 0.6367020606994629, "learning_rate": 3.4263582714375152e-06, "loss": 1.3958, "mean_token_accuracy": 0.6636939545472463, "num_tokens": 2516398662.0, "step": 14999 }, { "entropy": 1.622712602217992, "epoch": 1.647826206366208, "grad_norm": 0.7956864237785339, "learning_rate": 3.4254934093294655e-06, "loss": 1.4934, "mean_token_accuracy": 0.6606844613949457, "num_tokens": 2516590676.0, "step": 15000 }, { "entropy": 1.7188996473948162, "epoch": 1.647936063277581, "grad_norm": 0.6622449159622192, "learning_rate": 3.4246287869511373e-06, "loss": 1.4132, "mean_token_accuracy": 0.652355432510376, "num_tokens": 2516765487.0, "step": 15001 }, { "entropy": 1.6729025741418202, "epoch": 1.648045920188954, "grad_norm": 0.7184236645698547, "learning_rate": 3.423764404329895e-06, "loss": 1.4057, "mean_token_accuracy": 0.6480544259150823, "num_tokens": 2516931514.0, "step": 15002 }, { "entropy": 1.721810112396876, "epoch": 1.6481557771003268, "grad_norm": 0.6725652813911438, "learning_rate": 3.422900261493094e-06, "loss": 1.4415, "mean_token_accuracy": 0.6622193058331808, "num_tokens": 2517100166.0, "step": 15003 }, { "entropy": 1.7754334608713787, "epoch": 1.6482656340117, "grad_norm": 0.7001306414604187, "learning_rate": 3.4220363584680873e-06, "loss": 1.4849, "mean_token_accuracy": 0.6442708969116211, "num_tokens": 2517323102.0, "step": 15004 }, { "entropy": 1.761822521686554, "epoch": 1.6483754909230726, "grad_norm": 0.6547316908836365, "learning_rate": 3.421172695282213e-06, "loss": 1.5457, "mean_token_accuracy": 0.6352403461933136, "num_tokens": 2517531301.0, "step": 15005 }, { "entropy": 1.711624006430308, "epoch": 1.6484853478344457, "grad_norm": 0.6645762324333191, "learning_rate": 3.4203092719628096e-06, "loss": 1.2913, "mean_token_accuracy": 0.6747534225384394, "num_tokens": 2517647850.0, "step": 15006 }, { "entropy": 1.6479672491550446, "epoch": 1.6485952047458186, "grad_norm": 0.7587347626686096, "learning_rate": 3.4194460885372016e-06, "loss": 1.2875, "mean_token_accuracy": 0.665214791893959, "num_tokens": 2517772787.0, "step": 15007 }, { "entropy": 1.6956773499647777, "epoch": 1.6487050616571914, "grad_norm": 0.7115501165390015, "learning_rate": 3.4185831450327077e-06, "loss": 1.3814, "mean_token_accuracy": 0.6578154365221659, "num_tokens": 2517915878.0, "step": 15008 }, { "entropy": 1.6962638994057972, "epoch": 1.6488149185685645, "grad_norm": 0.6763247847557068, "learning_rate": 3.4177204414766405e-06, "loss": 1.3814, "mean_token_accuracy": 0.6662989805142084, "num_tokens": 2518072919.0, "step": 15009 }, { "entropy": 1.6879964172840118, "epoch": 1.6489247754799374, "grad_norm": 0.5787865519523621, "learning_rate": 3.4168579778963097e-06, "loss": 1.4668, "mean_token_accuracy": 0.6475926488637924, "num_tokens": 2518230729.0, "step": 15010 }, { "entropy": 1.7373622755209606, "epoch": 1.6490346323913103, "grad_norm": 0.7441072463989258, "learning_rate": 3.4159957543190015e-06, "loss": 1.3008, "mean_token_accuracy": 0.658590778708458, "num_tokens": 2518364564.0, "step": 15011 }, { "entropy": 1.644492268562317, "epoch": 1.6491444893026832, "grad_norm": 0.6323592662811279, "learning_rate": 3.4151337707720113e-06, "loss": 1.3825, "mean_token_accuracy": 0.6613515466451645, "num_tokens": 2518576546.0, "step": 15012 }, { "entropy": 1.7661062677701314, "epoch": 1.649254346214056, "grad_norm": 0.7031536102294922, "learning_rate": 3.414272027282621e-06, "loss": 1.3482, "mean_token_accuracy": 0.6614481111367544, "num_tokens": 2518741668.0, "step": 15013 }, { "entropy": 1.725075602531433, "epoch": 1.6493642031254292, "grad_norm": 0.6364622712135315, "learning_rate": 3.4134105238781033e-06, "loss": 1.5204, "mean_token_accuracy": 0.6369550327459971, "num_tokens": 2518948581.0, "step": 15014 }, { "entropy": 1.716450273990631, "epoch": 1.649474060036802, "grad_norm": 0.897272527217865, "learning_rate": 3.4125492605857215e-06, "loss": 1.389, "mean_token_accuracy": 0.6591392507155737, "num_tokens": 2519111501.0, "step": 15015 }, { "entropy": 1.7680715421835582, "epoch": 1.649583916948175, "grad_norm": 0.6573340892791748, "learning_rate": 3.411688237432739e-06, "loss": 1.3537, "mean_token_accuracy": 0.6606834232807159, "num_tokens": 2519237910.0, "step": 15016 }, { "entropy": 1.744692752758662, "epoch": 1.649693773859548, "grad_norm": 0.6951401233673096, "learning_rate": 3.4108274544464015e-06, "loss": 1.5013, "mean_token_accuracy": 0.6513955841461817, "num_tokens": 2519403504.0, "step": 15017 }, { "entropy": 1.705136001110077, "epoch": 1.6498036307709207, "grad_norm": 0.6180141568183899, "learning_rate": 3.409966911653958e-06, "loss": 1.3271, "mean_token_accuracy": 0.6569055368502935, "num_tokens": 2519558250.0, "step": 15018 }, { "entropy": 1.733963628609975, "epoch": 1.6499134876822938, "grad_norm": 0.8402661085128784, "learning_rate": 3.4091066090826415e-06, "loss": 1.2062, "mean_token_accuracy": 0.6875236531098684, "num_tokens": 2519675800.0, "step": 15019 }, { "entropy": 1.7446452577908833, "epoch": 1.6500233445936667, "grad_norm": 0.6582059860229492, "learning_rate": 3.4082465467596783e-06, "loss": 1.5078, "mean_token_accuracy": 0.6486608684062958, "num_tokens": 2519833913.0, "step": 15020 }, { "entropy": 1.6334237158298492, "epoch": 1.6501332015050396, "grad_norm": 0.7269055247306824, "learning_rate": 3.4073867247122906e-06, "loss": 1.2758, "mean_token_accuracy": 0.6774081140756607, "num_tokens": 2519999588.0, "step": 15021 }, { "entropy": 1.709044208129247, "epoch": 1.6502430584164127, "grad_norm": 0.6881639957427979, "learning_rate": 3.4065271429676965e-06, "loss": 1.6085, "mean_token_accuracy": 0.6218457967042923, "num_tokens": 2520245755.0, "step": 15022 }, { "entropy": 1.6926098664601643, "epoch": 1.6503529153277856, "grad_norm": 0.6597875356674194, "learning_rate": 3.405667801553092e-06, "loss": 1.3715, "mean_token_accuracy": 0.6609023263057073, "num_tokens": 2520443165.0, "step": 15023 }, { "entropy": 1.7041344543298085, "epoch": 1.6504627722391585, "grad_norm": 0.6892362236976624, "learning_rate": 3.4048087004956797e-06, "loss": 1.2991, "mean_token_accuracy": 0.6705830295880636, "num_tokens": 2520581997.0, "step": 15024 }, { "entropy": 1.6341348787148793, "epoch": 1.6505726291505314, "grad_norm": 0.7210821509361267, "learning_rate": 3.403949839822652e-06, "loss": 1.3469, "mean_token_accuracy": 0.6602633595466614, "num_tokens": 2520821919.0, "step": 15025 }, { "entropy": 1.6658415794372559, "epoch": 1.6506824860619043, "grad_norm": 0.7110158205032349, "learning_rate": 3.403091219561188e-06, "loss": 1.327, "mean_token_accuracy": 0.6694478690624237, "num_tokens": 2520982309.0, "step": 15026 }, { "entropy": 1.7470557987689972, "epoch": 1.6507923429732774, "grad_norm": 0.6562235355377197, "learning_rate": 3.4022328397384624e-06, "loss": 1.1178, "mean_token_accuracy": 0.6828839977582296, "num_tokens": 2521145714.0, "step": 15027 }, { "entropy": 1.7470394472281139, "epoch": 1.6509021998846503, "grad_norm": 0.6661121845245361, "learning_rate": 3.4013747003816454e-06, "loss": 1.3084, "mean_token_accuracy": 0.662788599729538, "num_tokens": 2521295381.0, "step": 15028 }, { "entropy": 1.7361040512720745, "epoch": 1.6510120567960231, "grad_norm": 0.9415419697761536, "learning_rate": 3.4005168015178935e-06, "loss": 1.4151, "mean_token_accuracy": 0.648168628414472, "num_tokens": 2521442369.0, "step": 15029 }, { "entropy": 1.7062184512615204, "epoch": 1.6511219137073962, "grad_norm": 0.6462988257408142, "learning_rate": 3.399659143174362e-06, "loss": 1.3354, "mean_token_accuracy": 0.6528641134500504, "num_tokens": 2521654103.0, "step": 15030 }, { "entropy": 1.6909612814585369, "epoch": 1.651231770618769, "grad_norm": 0.7832234501838684, "learning_rate": 3.3988017253781936e-06, "loss": 1.2744, "mean_token_accuracy": 0.6736619373162588, "num_tokens": 2521818430.0, "step": 15031 }, { "entropy": 1.7374659776687622, "epoch": 1.651341627530142, "grad_norm": 0.6640949249267578, "learning_rate": 3.3979445481565244e-06, "loss": 1.4493, "mean_token_accuracy": 0.6429694543282191, "num_tokens": 2521991665.0, "step": 15032 }, { "entropy": 1.781148185332616, "epoch": 1.651451484441515, "grad_norm": 0.732386589050293, "learning_rate": 3.397087611536485e-06, "loss": 1.5056, "mean_token_accuracy": 0.6434811403354009, "num_tokens": 2522181259.0, "step": 15033 }, { "entropy": 1.7025253772735596, "epoch": 1.6515613413528878, "grad_norm": 0.7074692845344543, "learning_rate": 3.3962309155451993e-06, "loss": 1.3074, "mean_token_accuracy": 0.681415448586146, "num_tokens": 2522343910.0, "step": 15034 }, { "entropy": 1.6830189228057861, "epoch": 1.651671198264261, "grad_norm": 0.7864646911621094, "learning_rate": 3.395374460209776e-06, "loss": 1.5032, "mean_token_accuracy": 0.6472100963195165, "num_tokens": 2522528624.0, "step": 15035 }, { "entropy": 1.6839284698168437, "epoch": 1.6517810551756338, "grad_norm": 0.7211703062057495, "learning_rate": 3.3945182455573234e-06, "loss": 1.3273, "mean_token_accuracy": 0.6669267763694128, "num_tokens": 2522664262.0, "step": 15036 }, { "entropy": 1.6885885000228882, "epoch": 1.6518909120870067, "grad_norm": 0.5619406700134277, "learning_rate": 3.3936622716149432e-06, "loss": 1.3137, "mean_token_accuracy": 0.6660237014293671, "num_tokens": 2522855978.0, "step": 15037 }, { "entropy": 1.695332556962967, "epoch": 1.6520007689983798, "grad_norm": 0.7055963277816772, "learning_rate": 3.3928065384097252e-06, "loss": 1.3109, "mean_token_accuracy": 0.6599967380364736, "num_tokens": 2523011118.0, "step": 15038 }, { "entropy": 1.74105371038119, "epoch": 1.6521106259097524, "grad_norm": 0.8354093432426453, "learning_rate": 3.3919510459687495e-06, "loss": 1.4608, "mean_token_accuracy": 0.6649217158555984, "num_tokens": 2523151102.0, "step": 15039 }, { "entropy": 1.69448517759641, "epoch": 1.6522204828211255, "grad_norm": 1.118112325668335, "learning_rate": 3.3910957943190974e-06, "loss": 1.142, "mean_token_accuracy": 0.6787567436695099, "num_tokens": 2523391240.0, "step": 15040 }, { "entropy": 1.6811459958553314, "epoch": 1.6523303397324984, "grad_norm": 0.5884284377098083, "learning_rate": 3.390240783487833e-06, "loss": 1.4136, "mean_token_accuracy": 0.6765343199173609, "num_tokens": 2523565283.0, "step": 15041 }, { "entropy": 1.7217009564240773, "epoch": 1.6524401966438713, "grad_norm": 0.7727037072181702, "learning_rate": 3.3893860135020213e-06, "loss": 1.2789, "mean_token_accuracy": 0.6699222077926, "num_tokens": 2523681428.0, "step": 15042 }, { "entropy": 1.693113128344218, "epoch": 1.6525500535552444, "grad_norm": 0.6383533477783203, "learning_rate": 3.388531484388711e-06, "loss": 1.3722, "mean_token_accuracy": 0.6580367684364319, "num_tokens": 2523890610.0, "step": 15043 }, { "entropy": 1.7015477518240611, "epoch": 1.652659910466617, "grad_norm": 0.8015193343162537, "learning_rate": 3.38767719617495e-06, "loss": 1.401, "mean_token_accuracy": 0.6783540596564611, "num_tokens": 2524009740.0, "step": 15044 }, { "entropy": 1.7582744856675465, "epoch": 1.6527697673779902, "grad_norm": 0.9063988327980042, "learning_rate": 3.3868231488877757e-06, "loss": 1.3525, "mean_token_accuracy": 0.6595317522684733, "num_tokens": 2524141825.0, "step": 15045 }, { "entropy": 1.732056309779485, "epoch": 1.652879624289363, "grad_norm": 0.7740351557731628, "learning_rate": 3.3859693425542186e-06, "loss": 1.4082, "mean_token_accuracy": 0.6550064533948898, "num_tokens": 2524302789.0, "step": 15046 }, { "entropy": 1.6483930746714275, "epoch": 1.652989481200736, "grad_norm": 0.608165979385376, "learning_rate": 3.385115777201298e-06, "loss": 1.3458, "mean_token_accuracy": 0.6636339922746023, "num_tokens": 2524444935.0, "step": 15047 }, { "entropy": 1.7196594377358754, "epoch": 1.653099338112109, "grad_norm": 0.572071373462677, "learning_rate": 3.3842624528560353e-06, "loss": 1.5982, "mean_token_accuracy": 0.6327051321665446, "num_tokens": 2524643745.0, "step": 15048 }, { "entropy": 1.6865974863370259, "epoch": 1.653209195023482, "grad_norm": 0.5919678211212158, "learning_rate": 3.3834093695454313e-06, "loss": 1.3454, "mean_token_accuracy": 0.6547940770785013, "num_tokens": 2524802624.0, "step": 15049 }, { "entropy": 1.6919112801551819, "epoch": 1.6533190519348548, "grad_norm": 0.6698882579803467, "learning_rate": 3.38255652729649e-06, "loss": 1.3982, "mean_token_accuracy": 0.6629907687505087, "num_tokens": 2524980761.0, "step": 15050 }, { "entropy": 1.6763626833756764, "epoch": 1.653428908846228, "grad_norm": 0.6936999559402466, "learning_rate": 3.381703926136204e-06, "loss": 1.314, "mean_token_accuracy": 0.6573221186796824, "num_tokens": 2525126619.0, "step": 15051 }, { "entropy": 1.766062339146932, "epoch": 1.6535387657576006, "grad_norm": 0.8053560256958008, "learning_rate": 3.380851566091552e-06, "loss": 1.3774, "mean_token_accuracy": 0.6592030425866445, "num_tokens": 2525244165.0, "step": 15052 }, { "entropy": 1.7038917541503906, "epoch": 1.6536486226689737, "grad_norm": 0.6169702410697937, "learning_rate": 3.379999447189516e-06, "loss": 1.3704, "mean_token_accuracy": 0.6640800684690475, "num_tokens": 2525404922.0, "step": 15053 }, { "entropy": 1.6612831552823384, "epoch": 1.6537584795803466, "grad_norm": 0.63535076379776, "learning_rate": 3.379147569457067e-06, "loss": 1.5327, "mean_token_accuracy": 0.6397574096918106, "num_tokens": 2525625009.0, "step": 15054 }, { "entropy": 1.7347392141819, "epoch": 1.6538683364917195, "grad_norm": 0.6799290776252747, "learning_rate": 3.3782959329211597e-06, "loss": 1.3879, "mean_token_accuracy": 0.6492925484975179, "num_tokens": 2525797102.0, "step": 15055 }, { "entropy": 1.750627835591634, "epoch": 1.6539781934030926, "grad_norm": 0.6583788394927979, "learning_rate": 3.3774445376087517e-06, "loss": 1.6107, "mean_token_accuracy": 0.6384320706129074, "num_tokens": 2526036673.0, "step": 15056 }, { "entropy": 1.7362577716509502, "epoch": 1.6540880503144653, "grad_norm": 0.751219630241394, "learning_rate": 3.3765933835467918e-06, "loss": 1.3345, "mean_token_accuracy": 0.6589648723602295, "num_tokens": 2526248957.0, "step": 15057 }, { "entropy": 1.7241934339205425, "epoch": 1.6541979072258384, "grad_norm": 0.7931070923805237, "learning_rate": 3.3757424707622156e-06, "loss": 1.502, "mean_token_accuracy": 0.6433264712492625, "num_tokens": 2526468669.0, "step": 15058 }, { "entropy": 1.652980665365855, "epoch": 1.6543077641372113, "grad_norm": 0.7637212872505188, "learning_rate": 3.374891799281952e-06, "loss": 1.3658, "mean_token_accuracy": 0.6645220816135406, "num_tokens": 2526655073.0, "step": 15059 }, { "entropy": 1.7018746038277943, "epoch": 1.6544176210485841, "grad_norm": 0.673249363899231, "learning_rate": 3.3740413691329294e-06, "loss": 1.3373, "mean_token_accuracy": 0.6641093840201696, "num_tokens": 2526829589.0, "step": 15060 }, { "entropy": 1.6794364154338837, "epoch": 1.6545274779599572, "grad_norm": 0.7552919387817383, "learning_rate": 3.3731911803420598e-06, "loss": 1.4182, "mean_token_accuracy": 0.6503365089495977, "num_tokens": 2526986226.0, "step": 15061 }, { "entropy": 1.7320887843767803, "epoch": 1.6546373348713301, "grad_norm": 0.664562463760376, "learning_rate": 3.3723412329362543e-06, "loss": 1.3605, "mean_token_accuracy": 0.6568738867839178, "num_tokens": 2527153507.0, "step": 15062 }, { "entropy": 1.7606121798356373, "epoch": 1.654747191782703, "grad_norm": 1.1021904945373535, "learning_rate": 3.3714915269424108e-06, "loss": 1.6345, "mean_token_accuracy": 0.6460135305921236, "num_tokens": 2527341854.0, "step": 15063 }, { "entropy": 1.6257544159889221, "epoch": 1.6548570486940761, "grad_norm": 0.5650546550750732, "learning_rate": 3.3706420623874213e-06, "loss": 1.3168, "mean_token_accuracy": 0.6696870078643163, "num_tokens": 2527504304.0, "step": 15064 }, { "entropy": 1.7246950368086498, "epoch": 1.6549669056054488, "grad_norm": 0.7503829598426819, "learning_rate": 3.3697928392981737e-06, "loss": 1.3115, "mean_token_accuracy": 0.6665904074907303, "num_tokens": 2527623793.0, "step": 15065 }, { "entropy": 1.7343401908874512, "epoch": 1.655076762516822, "grad_norm": 0.6358250379562378, "learning_rate": 3.3689438577015476e-06, "loss": 1.3758, "mean_token_accuracy": 0.6481455117464066, "num_tokens": 2527756953.0, "step": 15066 }, { "entropy": 1.7057737906773884, "epoch": 1.6551866194281948, "grad_norm": 0.5834030508995056, "learning_rate": 3.3680951176244064e-06, "loss": 1.5161, "mean_token_accuracy": 0.6389025648434957, "num_tokens": 2528005318.0, "step": 15067 }, { "entropy": 1.6946783165136974, "epoch": 1.6552964763395677, "grad_norm": 0.7368245124816895, "learning_rate": 3.367246619093615e-06, "loss": 1.502, "mean_token_accuracy": 0.6575753738482794, "num_tokens": 2528171311.0, "step": 15068 }, { "entropy": 1.732050359249115, "epoch": 1.6554063332509408, "grad_norm": 0.6881213188171387, "learning_rate": 3.366398362136031e-06, "loss": 1.3458, "mean_token_accuracy": 0.6613701532284418, "num_tokens": 2528352242.0, "step": 15069 }, { "entropy": 1.7644882798194885, "epoch": 1.6555161901623134, "grad_norm": 0.6945415735244751, "learning_rate": 3.3655503467784996e-06, "loss": 1.3223, "mean_token_accuracy": 0.6620460500319799, "num_tokens": 2528508423.0, "step": 15070 }, { "entropy": 1.6908580263455708, "epoch": 1.6556260470736865, "grad_norm": 0.704821765422821, "learning_rate": 3.3647025730478566e-06, "loss": 1.459, "mean_token_accuracy": 0.6593573639790217, "num_tokens": 2528690165.0, "step": 15071 }, { "entropy": 1.6922107140223186, "epoch": 1.6557359039850594, "grad_norm": 0.6756055355072021, "learning_rate": 3.363855040970939e-06, "loss": 1.5345, "mean_token_accuracy": 0.6271846890449524, "num_tokens": 2528963549.0, "step": 15072 }, { "entropy": 1.712009459733963, "epoch": 1.6558457608964323, "grad_norm": 0.6728986501693726, "learning_rate": 3.3630077505745664e-06, "loss": 1.3403, "mean_token_accuracy": 0.6565148731072744, "num_tokens": 2529077053.0, "step": 15073 }, { "entropy": 1.6842322250207264, "epoch": 1.6559556178078054, "grad_norm": 0.6321828365325928, "learning_rate": 3.362160701885559e-06, "loss": 1.3954, "mean_token_accuracy": 0.6590474247932434, "num_tokens": 2529232793.0, "step": 15074 }, { "entropy": 1.7570477823416393, "epoch": 1.6560654747191783, "grad_norm": 0.6922457814216614, "learning_rate": 3.3613138949307246e-06, "loss": 1.485, "mean_token_accuracy": 0.663332611322403, "num_tokens": 2529415470.0, "step": 15075 }, { "entropy": 1.7421770294507344, "epoch": 1.6561753316305512, "grad_norm": 0.825492262840271, "learning_rate": 3.3604673297368605e-06, "loss": 1.296, "mean_token_accuracy": 0.6683741807937622, "num_tokens": 2529558572.0, "step": 15076 }, { "entropy": 1.6646507183710735, "epoch": 1.6562851885419243, "grad_norm": 0.6699401140213013, "learning_rate": 3.3596210063307623e-06, "loss": 1.3958, "mean_token_accuracy": 0.6553126474221548, "num_tokens": 2529704252.0, "step": 15077 }, { "entropy": 1.7033556004365284, "epoch": 1.656395045453297, "grad_norm": 0.6611201763153076, "learning_rate": 3.3587749247392213e-06, "loss": 1.3577, "mean_token_accuracy": 0.6602742572625478, "num_tokens": 2529840132.0, "step": 15078 }, { "entropy": 1.6565563877423604, "epoch": 1.65650490236467, "grad_norm": 0.6807793974876404, "learning_rate": 3.3579290849890076e-06, "loss": 1.4446, "mean_token_accuracy": 0.6356561382611593, "num_tokens": 2530055346.0, "step": 15079 }, { "entropy": 1.6784409979979198, "epoch": 1.656614759276043, "grad_norm": 0.7249003648757935, "learning_rate": 3.3570834871068934e-06, "loss": 1.263, "mean_token_accuracy": 0.6803757299979528, "num_tokens": 2530190673.0, "step": 15080 }, { "entropy": 1.6624310910701752, "epoch": 1.6567246161874158, "grad_norm": 0.5706498622894287, "learning_rate": 3.356238131119645e-06, "loss": 1.4071, "mean_token_accuracy": 0.6508588592211405, "num_tokens": 2530411293.0, "step": 15081 }, { "entropy": 1.7260961433251698, "epoch": 1.656834473098789, "grad_norm": 0.7238296866416931, "learning_rate": 3.3553930170540166e-06, "loss": 1.4912, "mean_token_accuracy": 0.6538258691628774, "num_tokens": 2530586549.0, "step": 15082 }, { "entropy": 1.7342715958754222, "epoch": 1.6569443300101616, "grad_norm": 0.6654472351074219, "learning_rate": 3.354548144936751e-06, "loss": 1.4534, "mean_token_accuracy": 0.6400155772765478, "num_tokens": 2530753690.0, "step": 15083 }, { "entropy": 1.7684525350729625, "epoch": 1.6570541869215347, "grad_norm": 1.3236089944839478, "learning_rate": 3.353703514794594e-06, "loss": 1.35, "mean_token_accuracy": 0.669903039932251, "num_tokens": 2530865045.0, "step": 15084 }, { "entropy": 1.6798753043015797, "epoch": 1.6571640438329076, "grad_norm": 0.6657638549804688, "learning_rate": 3.3528591266542735e-06, "loss": 1.3229, "mean_token_accuracy": 0.6585825930039088, "num_tokens": 2531001639.0, "step": 15085 }, { "entropy": 1.7244026064872742, "epoch": 1.6572739007442805, "grad_norm": 0.6760687828063965, "learning_rate": 3.3520149805425174e-06, "loss": 1.3559, "mean_token_accuracy": 0.6678232202927271, "num_tokens": 2531132537.0, "step": 15086 }, { "entropy": 1.6744465331236522, "epoch": 1.6573837576556536, "grad_norm": 0.6795402765274048, "learning_rate": 3.3511710764860405e-06, "loss": 1.3079, "mean_token_accuracy": 0.6721722632646561, "num_tokens": 2531305857.0, "step": 15087 }, { "entropy": 1.7191696266333263, "epoch": 1.6574936145670265, "grad_norm": 0.7159080505371094, "learning_rate": 3.3503274145115516e-06, "loss": 1.3132, "mean_token_accuracy": 0.6738952944676081, "num_tokens": 2531423758.0, "step": 15088 }, { "entropy": 1.6929751634597778, "epoch": 1.6576034714783994, "grad_norm": 0.6210808157920837, "learning_rate": 3.3494839946457525e-06, "loss": 1.3419, "mean_token_accuracy": 0.6703117787837982, "num_tokens": 2531575439.0, "step": 15089 }, { "entropy": 1.656501869360606, "epoch": 1.6577133283897725, "grad_norm": 0.7103717923164368, "learning_rate": 3.3486408169153413e-06, "loss": 1.3132, "mean_token_accuracy": 0.6686868766943613, "num_tokens": 2531724263.0, "step": 15090 }, { "entropy": 1.6718024512132008, "epoch": 1.6578231853011451, "grad_norm": 0.6390276551246643, "learning_rate": 3.3477978813469957e-06, "loss": 1.4407, "mean_token_accuracy": 0.6513689408699671, "num_tokens": 2531902650.0, "step": 15091 }, { "entropy": 1.6487050652503967, "epoch": 1.6579330422125182, "grad_norm": 0.6771929860115051, "learning_rate": 3.3469551879674e-06, "loss": 1.3452, "mean_token_accuracy": 0.6610651115576426, "num_tokens": 2532059077.0, "step": 15092 }, { "entropy": 1.6755038897196453, "epoch": 1.6580428991238911, "grad_norm": 0.697956919670105, "learning_rate": 3.3461127368032266e-06, "loss": 1.4732, "mean_token_accuracy": 0.6597596059242884, "num_tokens": 2532248252.0, "step": 15093 }, { "entropy": 1.6560774842898052, "epoch": 1.658152756035264, "grad_norm": 0.7152982354164124, "learning_rate": 3.3452705278811352e-06, "loss": 1.4238, "mean_token_accuracy": 0.6569562057654063, "num_tokens": 2532425762.0, "step": 15094 }, { "entropy": 1.7077242334683735, "epoch": 1.6582626129466371, "grad_norm": 0.5992992520332336, "learning_rate": 3.3444285612277806e-06, "loss": 1.3899, "mean_token_accuracy": 0.6513581027587255, "num_tokens": 2532616814.0, "step": 15095 }, { "entropy": 1.7806836167971294, "epoch": 1.6583724698580098, "grad_norm": 0.6730424761772156, "learning_rate": 3.343586836869815e-06, "loss": 1.4785, "mean_token_accuracy": 0.6473760406176249, "num_tokens": 2532754041.0, "step": 15096 }, { "entropy": 1.694075107574463, "epoch": 1.658482326769383, "grad_norm": 0.7324855327606201, "learning_rate": 3.3427453548338724e-06, "loss": 1.5491, "mean_token_accuracy": 0.6529227097829183, "num_tokens": 2532969301.0, "step": 15097 }, { "entropy": 1.712545742591222, "epoch": 1.6585921836807558, "grad_norm": 0.6637945175170898, "learning_rate": 3.341904115146592e-06, "loss": 1.3429, "mean_token_accuracy": 0.6546710977951685, "num_tokens": 2533146356.0, "step": 15098 }, { "entropy": 1.7674176394939423, "epoch": 1.6587020405921287, "grad_norm": 0.7276931405067444, "learning_rate": 3.3410631178345956e-06, "loss": 1.4362, "mean_token_accuracy": 0.6518150369326273, "num_tokens": 2533290788.0, "step": 15099 }, { "entropy": 1.7356145282586415, "epoch": 1.6588118975035018, "grad_norm": 0.6265212893486023, "learning_rate": 3.3402223629244977e-06, "loss": 1.3753, "mean_token_accuracy": 0.6475427796443304, "num_tokens": 2533436985.0, "step": 15100 }, { "entropy": 1.663461983203888, "epoch": 1.6589217544148747, "grad_norm": 0.724477231502533, "learning_rate": 3.339381850442911e-06, "loss": 1.2809, "mean_token_accuracy": 0.667538528641065, "num_tokens": 2533568360.0, "step": 15101 }, { "entropy": 1.705003599325816, "epoch": 1.6590316113262475, "grad_norm": 0.620273768901825, "learning_rate": 3.33854158041644e-06, "loss": 1.4139, "mean_token_accuracy": 0.6586426943540573, "num_tokens": 2533751596.0, "step": 15102 }, { "entropy": 1.6570688684781392, "epoch": 1.6591414682376207, "grad_norm": 0.6880425810813904, "learning_rate": 3.3377015528716722e-06, "loss": 1.3196, "mean_token_accuracy": 0.6651638994614283, "num_tokens": 2533914399.0, "step": 15103 }, { "entropy": 1.7261102298895519, "epoch": 1.6592513251489933, "grad_norm": 0.9097874164581299, "learning_rate": 3.3368617678352e-06, "loss": 1.373, "mean_token_accuracy": 0.6556687106688818, "num_tokens": 2534127252.0, "step": 15104 }, { "entropy": 1.741780122121175, "epoch": 1.6593611820603664, "grad_norm": 0.701435923576355, "learning_rate": 3.3360222253335963e-06, "loss": 1.3993, "mean_token_accuracy": 0.6510543972253799, "num_tokens": 2534282275.0, "step": 15105 }, { "entropy": 1.707371195157369, "epoch": 1.6594710389717393, "grad_norm": 0.7605389356613159, "learning_rate": 3.335182925393439e-06, "loss": 1.5429, "mean_token_accuracy": 0.6444597393274307, "num_tokens": 2534425544.0, "step": 15106 }, { "entropy": 1.6632187863190968, "epoch": 1.6595808958831122, "grad_norm": 0.6267088651657104, "learning_rate": 3.334343868041288e-06, "loss": 1.3156, "mean_token_accuracy": 0.6677224983771642, "num_tokens": 2534595314.0, "step": 15107 }, { "entropy": 1.7033604681491852, "epoch": 1.6596907527944853, "grad_norm": 0.6936233639717102, "learning_rate": 3.3335050533036973e-06, "loss": 1.3935, "mean_token_accuracy": 0.6590020259221395, "num_tokens": 2534749411.0, "step": 15108 }, { "entropy": 1.6776057581106822, "epoch": 1.659800609705858, "grad_norm": 0.6327299475669861, "learning_rate": 3.332666481207217e-06, "loss": 1.3723, "mean_token_accuracy": 0.6604341218868891, "num_tokens": 2534924233.0, "step": 15109 }, { "entropy": 1.7498537997404735, "epoch": 1.659910466617231, "grad_norm": 0.7666849493980408, "learning_rate": 3.33182815177839e-06, "loss": 1.2836, "mean_token_accuracy": 0.6681917756795883, "num_tokens": 2535064163.0, "step": 15110 }, { "entropy": 1.6705930332342784, "epoch": 1.660020323528604, "grad_norm": 0.9941990971565247, "learning_rate": 3.3309900650437453e-06, "loss": 1.332, "mean_token_accuracy": 0.6723613291978836, "num_tokens": 2535185568.0, "step": 15111 }, { "entropy": 1.6964355210463207, "epoch": 1.6601301804399768, "grad_norm": 0.5545011758804321, "learning_rate": 3.330152221029809e-06, "loss": 1.5134, "mean_token_accuracy": 0.630453368028005, "num_tokens": 2535418640.0, "step": 15112 }, { "entropy": 1.7623733182748158, "epoch": 1.66024003735135, "grad_norm": 0.7459884285926819, "learning_rate": 3.3293146197631e-06, "loss": 1.5485, "mean_token_accuracy": 0.6495217035214106, "num_tokens": 2535565378.0, "step": 15113 }, { "entropy": 1.7098113199075062, "epoch": 1.6603498942627228, "grad_norm": 0.6535688042640686, "learning_rate": 3.3284772612701264e-06, "loss": 1.3106, "mean_token_accuracy": 0.6760827650626501, "num_tokens": 2535695343.0, "step": 15114 }, { "entropy": 1.6740643779436748, "epoch": 1.6604597511740957, "grad_norm": 0.7787010073661804, "learning_rate": 3.327640145577389e-06, "loss": 1.276, "mean_token_accuracy": 0.6757322053114573, "num_tokens": 2535832356.0, "step": 15115 }, { "entropy": 1.7608892818291981, "epoch": 1.6605696080854688, "grad_norm": 0.6061299443244934, "learning_rate": 3.3268032727113854e-06, "loss": 1.4559, "mean_token_accuracy": 0.6371789226929346, "num_tokens": 2536059624.0, "step": 15116 }, { "entropy": 1.6933262546857197, "epoch": 1.6606794649968415, "grad_norm": 0.6764704585075378, "learning_rate": 3.3259666426985992e-06, "loss": 1.3633, "mean_token_accuracy": 0.6675354987382889, "num_tokens": 2536235498.0, "step": 15117 }, { "entropy": 1.721170296271642, "epoch": 1.6607893219082146, "grad_norm": 0.7042806148529053, "learning_rate": 3.3251302555655125e-06, "loss": 1.4992, "mean_token_accuracy": 0.6421754111846288, "num_tokens": 2536422548.0, "step": 15118 }, { "entropy": 1.7569746871789296, "epoch": 1.6608991788195875, "grad_norm": 0.8258860111236572, "learning_rate": 3.3242941113385955e-06, "loss": 1.3906, "mean_token_accuracy": 0.6512050032615662, "num_tokens": 2536588540.0, "step": 15119 }, { "entropy": 1.6643067598342896, "epoch": 1.6610090357309604, "grad_norm": 0.6636916399002075, "learning_rate": 3.323458210044308e-06, "loss": 1.3003, "mean_token_accuracy": 0.6672434459129969, "num_tokens": 2536722836.0, "step": 15120 }, { "entropy": 1.6818746825059254, "epoch": 1.6611188926423335, "grad_norm": 0.8212363123893738, "learning_rate": 3.3226225517091092e-06, "loss": 1.3068, "mean_token_accuracy": 0.6724486698706945, "num_tokens": 2536847383.0, "step": 15121 }, { "entropy": 1.6717171669006348, "epoch": 1.6612287495537061, "grad_norm": 0.6446511745452881, "learning_rate": 3.32178713635945e-06, "loss": 1.4976, "mean_token_accuracy": 0.6454497029383978, "num_tokens": 2537070978.0, "step": 15122 }, { "entropy": 1.7301104565461476, "epoch": 1.6613386064650792, "grad_norm": 0.6526685953140259, "learning_rate": 3.3209519640217673e-06, "loss": 1.4171, "mean_token_accuracy": 0.6540913035472234, "num_tokens": 2537248801.0, "step": 15123 }, { "entropy": 1.7408444384733837, "epoch": 1.6614484633764521, "grad_norm": 0.726782500743866, "learning_rate": 3.320117034722493e-06, "loss": 1.2939, "mean_token_accuracy": 0.6629084100325903, "num_tokens": 2537391472.0, "step": 15124 }, { "entropy": 1.704400509595871, "epoch": 1.661558320287825, "grad_norm": 0.928636372089386, "learning_rate": 3.3192823484880554e-06, "loss": 1.2594, "mean_token_accuracy": 0.6687459697326025, "num_tokens": 2537535837.0, "step": 15125 }, { "entropy": 1.6730639934539795, "epoch": 1.6616681771991981, "grad_norm": 0.5636922121047974, "learning_rate": 3.3184479053448715e-06, "loss": 1.349, "mean_token_accuracy": 0.6587913980086645, "num_tokens": 2537730701.0, "step": 15126 }, { "entropy": 1.7522354920705159, "epoch": 1.661778034110571, "grad_norm": 0.686957597732544, "learning_rate": 3.317613705319347e-06, "loss": 1.5405, "mean_token_accuracy": 0.6359638373057047, "num_tokens": 2537914771.0, "step": 15127 }, { "entropy": 1.6851915816466014, "epoch": 1.661887891021944, "grad_norm": 0.7741264700889587, "learning_rate": 3.3167797484378885e-06, "loss": 1.3679, "mean_token_accuracy": 0.6594855835040411, "num_tokens": 2538075327.0, "step": 15128 }, { "entropy": 1.7024510304133098, "epoch": 1.661997747933317, "grad_norm": 0.734610915184021, "learning_rate": 3.3159460347268883e-06, "loss": 1.4623, "mean_token_accuracy": 0.6586558967828751, "num_tokens": 2538238125.0, "step": 15129 }, { "entropy": 1.6769766708215077, "epoch": 1.6621076048446897, "grad_norm": 0.63493812084198, "learning_rate": 3.3151125642127345e-06, "loss": 1.3767, "mean_token_accuracy": 0.6689134786526362, "num_tokens": 2538419817.0, "step": 15130 }, { "entropy": 1.6704062322775524, "epoch": 1.6622174617560628, "grad_norm": 0.6507300734519958, "learning_rate": 3.3142793369218062e-06, "loss": 1.2955, "mean_token_accuracy": 0.6776465276877085, "num_tokens": 2538580537.0, "step": 15131 }, { "entropy": 1.6934023002783458, "epoch": 1.6623273186674357, "grad_norm": 0.6520810723304749, "learning_rate": 3.3134463528804708e-06, "loss": 1.3876, "mean_token_accuracy": 0.6512720038493475, "num_tokens": 2538783784.0, "step": 15132 }, { "entropy": 1.6996967792510986, "epoch": 1.6624371755788085, "grad_norm": 0.6800544261932373, "learning_rate": 3.312613612115094e-06, "loss": 1.5371, "mean_token_accuracy": 0.6272955139478048, "num_tokens": 2538987568.0, "step": 15133 }, { "entropy": 1.6887696584065754, "epoch": 1.6625470324901817, "grad_norm": 0.672918975353241, "learning_rate": 3.311781114652037e-06, "loss": 1.4571, "mean_token_accuracy": 0.6463527331749598, "num_tokens": 2539126976.0, "step": 15134 }, { "entropy": 1.6642581224441528, "epoch": 1.6626568894015543, "grad_norm": 0.7320646643638611, "learning_rate": 3.3109488605176398e-06, "loss": 1.3934, "mean_token_accuracy": 0.6711856325467428, "num_tokens": 2539263021.0, "step": 15135 }, { "entropy": 1.7170037130514781, "epoch": 1.6627667463129274, "grad_norm": 0.7974650263786316, "learning_rate": 3.3101168497382463e-06, "loss": 1.411, "mean_token_accuracy": 0.6470285852750143, "num_tokens": 2539450814.0, "step": 15136 }, { "entropy": 1.6689094603061676, "epoch": 1.6628766032243003, "grad_norm": 0.6423022150993347, "learning_rate": 3.309285082340191e-06, "loss": 1.3791, "mean_token_accuracy": 0.649917870759964, "num_tokens": 2539611432.0, "step": 15137 }, { "entropy": 1.733245462179184, "epoch": 1.6629864601356732, "grad_norm": 0.6242569088935852, "learning_rate": 3.308453558349798e-06, "loss": 1.5296, "mean_token_accuracy": 0.6238453984260559, "num_tokens": 2539837249.0, "step": 15138 }, { "entropy": 1.635475645462672, "epoch": 1.6630963170470463, "grad_norm": 0.7449822425842285, "learning_rate": 3.307622277793382e-06, "loss": 1.381, "mean_token_accuracy": 0.6656597952047983, "num_tokens": 2540008011.0, "step": 15139 }, { "entropy": 1.7413840492566426, "epoch": 1.6632061739584192, "grad_norm": 0.7251917719841003, "learning_rate": 3.3067912406972553e-06, "loss": 1.4038, "mean_token_accuracy": 0.6577907751003901, "num_tokens": 2540197866.0, "step": 15140 }, { "entropy": 1.6912154257297516, "epoch": 1.663316030869792, "grad_norm": 0.6151164174079895, "learning_rate": 3.305960447087718e-06, "loss": 1.5038, "mean_token_accuracy": 0.6435969273249308, "num_tokens": 2540388308.0, "step": 15141 }, { "entropy": 1.6632501184940338, "epoch": 1.6634258877811652, "grad_norm": 0.786320686340332, "learning_rate": 3.3051298969910683e-06, "loss": 1.2916, "mean_token_accuracy": 0.6799655159314474, "num_tokens": 2540553028.0, "step": 15142 }, { "entropy": 1.6826303203900654, "epoch": 1.6635357446925378, "grad_norm": 0.7635297179222107, "learning_rate": 3.3042995904335884e-06, "loss": 1.31, "mean_token_accuracy": 0.6798640837272009, "num_tokens": 2540699747.0, "step": 15143 }, { "entropy": 1.7032555242379506, "epoch": 1.663645601603911, "grad_norm": 0.7347438931465149, "learning_rate": 3.3034695274415586e-06, "loss": 1.3382, "mean_token_accuracy": 0.6689636707305908, "num_tokens": 2540856169.0, "step": 15144 }, { "entropy": 1.7159066100915272, "epoch": 1.6637554585152838, "grad_norm": 0.728591799736023, "learning_rate": 3.3026397080412475e-06, "loss": 1.2757, "mean_token_accuracy": 0.6693220684925715, "num_tokens": 2540986299.0, "step": 15145 }, { "entropy": 1.7285268604755402, "epoch": 1.6638653154266567, "grad_norm": 0.7702023386955261, "learning_rate": 3.3018101322589276e-06, "loss": 1.282, "mean_token_accuracy": 0.6752820163965225, "num_tokens": 2541134966.0, "step": 15146 }, { "entropy": 1.7311889429887135, "epoch": 1.6639751723380298, "grad_norm": 0.8523202538490295, "learning_rate": 3.3009808001208433e-06, "loss": 1.4396, "mean_token_accuracy": 0.6446023831764857, "num_tokens": 2541288982.0, "step": 15147 }, { "entropy": 1.743065595626831, "epoch": 1.6640850292494025, "grad_norm": 0.7855637073516846, "learning_rate": 3.3001517116532467e-06, "loss": 1.4077, "mean_token_accuracy": 0.665867954492569, "num_tokens": 2541427699.0, "step": 15148 }, { "entropy": 1.701552430788676, "epoch": 1.6641948861607756, "grad_norm": 0.6906160712242126, "learning_rate": 3.299322866882382e-06, "loss": 1.3014, "mean_token_accuracy": 0.6663641184568405, "num_tokens": 2541559947.0, "step": 15149 }, { "entropy": 1.696532428264618, "epoch": 1.6643047430721485, "grad_norm": 0.8275318741798401, "learning_rate": 3.2984942658344775e-06, "loss": 1.4308, "mean_token_accuracy": 0.6603184888760248, "num_tokens": 2541719607.0, "step": 15150 }, { "entropy": 1.7219399809837341, "epoch": 1.6644145999835214, "grad_norm": 0.7034138441085815, "learning_rate": 3.297665908535757e-06, "loss": 1.43, "mean_token_accuracy": 0.6470849066972733, "num_tokens": 2541892403.0, "step": 15151 }, { "entropy": 1.638861060142517, "epoch": 1.6645244568948945, "grad_norm": 0.8403314352035522, "learning_rate": 3.2968377950124424e-06, "loss": 1.3045, "mean_token_accuracy": 0.66932080189387, "num_tokens": 2542029755.0, "step": 15152 }, { "entropy": 1.675868570804596, "epoch": 1.6646343138062674, "grad_norm": 0.9272775650024414, "learning_rate": 3.2960099252907383e-06, "loss": 1.4506, "mean_token_accuracy": 0.6282220433155695, "num_tokens": 2542277314.0, "step": 15153 }, { "entropy": 1.6995552678902943, "epoch": 1.6647441707176402, "grad_norm": 0.6546932458877563, "learning_rate": 3.2951822993968507e-06, "loss": 1.4917, "mean_token_accuracy": 0.6413043240706126, "num_tokens": 2542450597.0, "step": 15154 }, { "entropy": 1.6610127687454224, "epoch": 1.6648540276290134, "grad_norm": 0.7705767154693604, "learning_rate": 3.294354917356971e-06, "loss": 1.3614, "mean_token_accuracy": 0.6677778412898382, "num_tokens": 2542605563.0, "step": 15155 }, { "entropy": 1.6522420446077983, "epoch": 1.664963884540386, "grad_norm": 0.9083729982376099, "learning_rate": 3.2935277791972845e-06, "loss": 1.3583, "mean_token_accuracy": 0.6643216063578924, "num_tokens": 2542750030.0, "step": 15156 }, { "entropy": 1.6629354059696198, "epoch": 1.6650737414517591, "grad_norm": 0.7083542346954346, "learning_rate": 3.2927008849439713e-06, "loss": 1.5038, "mean_token_accuracy": 0.6516106476386389, "num_tokens": 2542920378.0, "step": 15157 }, { "entropy": 1.6975993414719899, "epoch": 1.665183598363132, "grad_norm": 0.6380283832550049, "learning_rate": 3.291874234623206e-06, "loss": 1.3954, "mean_token_accuracy": 0.6629827618598938, "num_tokens": 2543100162.0, "step": 15158 }, { "entropy": 1.6631451447804768, "epoch": 1.665293455274505, "grad_norm": 0.6705272793769836, "learning_rate": 3.2910478282611434e-06, "loss": 1.4026, "mean_token_accuracy": 0.6492062012354533, "num_tokens": 2543321718.0, "step": 15159 }, { "entropy": 1.662650595108668, "epoch": 1.665403312185878, "grad_norm": 0.6082910299301147, "learning_rate": 3.2902216658839437e-06, "loss": 1.3955, "mean_token_accuracy": 0.6513722836971283, "num_tokens": 2543502527.0, "step": 15160 }, { "entropy": 1.7114491661389668, "epoch": 1.6655131690972507, "grad_norm": 0.6370794177055359, "learning_rate": 3.2893957475177562e-06, "loss": 1.4805, "mean_token_accuracy": 0.6547950555880865, "num_tokens": 2543686162.0, "step": 15161 }, { "entropy": 1.7338766554991405, "epoch": 1.6656230260086238, "grad_norm": 0.6354936957359314, "learning_rate": 3.2885700731887184e-06, "loss": 1.4639, "mean_token_accuracy": 0.6365046302477518, "num_tokens": 2543866432.0, "step": 15162 }, { "entropy": 1.7078477044900258, "epoch": 1.6657328829199967, "grad_norm": 0.8597061038017273, "learning_rate": 3.287744642922961e-06, "loss": 1.2784, "mean_token_accuracy": 0.6691752125819524, "num_tokens": 2544031768.0, "step": 15163 }, { "entropy": 1.725355605284373, "epoch": 1.6658427398313695, "grad_norm": 0.8394426107406616, "learning_rate": 3.2869194567466126e-06, "loss": 1.4591, "mean_token_accuracy": 0.6521119624376297, "num_tokens": 2544304098.0, "step": 15164 }, { "entropy": 1.711538831392924, "epoch": 1.6659525967427427, "grad_norm": 0.6709228754043579, "learning_rate": 3.286094514685786e-06, "loss": 1.5216, "mean_token_accuracy": 0.648155947526296, "num_tokens": 2544482608.0, "step": 15165 }, { "entropy": 1.7310082018375397, "epoch": 1.6660624536541155, "grad_norm": 0.7595032453536987, "learning_rate": 3.285269816766593e-06, "loss": 1.3219, "mean_token_accuracy": 0.6648527532815933, "num_tokens": 2544614022.0, "step": 15166 }, { "entropy": 1.6925914386908214, "epoch": 1.6661723105654884, "grad_norm": 0.6604565382003784, "learning_rate": 3.284445363015135e-06, "loss": 1.4728, "mean_token_accuracy": 0.6658626943826675, "num_tokens": 2544783172.0, "step": 15167 }, { "entropy": 1.6832049489021301, "epoch": 1.6662821674768615, "grad_norm": 0.7163446545600891, "learning_rate": 3.2836211534575017e-06, "loss": 1.5589, "mean_token_accuracy": 0.6431887249151865, "num_tokens": 2544991921.0, "step": 15168 }, { "entropy": 1.710367888212204, "epoch": 1.6663920243882342, "grad_norm": 0.6329286694526672, "learning_rate": 3.282797188119784e-06, "loss": 1.3939, "mean_token_accuracy": 0.6490947405497233, "num_tokens": 2545169008.0, "step": 15169 }, { "entropy": 1.6956720153490703, "epoch": 1.6665018812996073, "grad_norm": 0.7948725819587708, "learning_rate": 3.281973467028059e-06, "loss": 1.3948, "mean_token_accuracy": 0.6627111285924911, "num_tokens": 2545343998.0, "step": 15170 }, { "entropy": 1.6870111227035522, "epoch": 1.6666117382109802, "grad_norm": 0.7442490458488464, "learning_rate": 3.2811499902083926e-06, "loss": 1.3838, "mean_token_accuracy": 0.6725998371839523, "num_tokens": 2545494318.0, "step": 15171 }, { "entropy": 1.707838664452235, "epoch": 1.666721595122353, "grad_norm": 0.7813781499862671, "learning_rate": 3.2803267576868537e-06, "loss": 1.4931, "mean_token_accuracy": 0.6501006484031677, "num_tokens": 2545634917.0, "step": 15172 }, { "entropy": 1.7154215077559154, "epoch": 1.6668314520337262, "grad_norm": 0.7479304671287537, "learning_rate": 3.2795037694894916e-06, "loss": 1.2564, "mean_token_accuracy": 0.6734669556220373, "num_tokens": 2545793905.0, "step": 15173 }, { "entropy": 1.7019230524698894, "epoch": 1.6669413089450988, "grad_norm": 0.7185121774673462, "learning_rate": 3.278681025642359e-06, "loss": 1.2722, "mean_token_accuracy": 0.689252441128095, "num_tokens": 2545972358.0, "step": 15174 }, { "entropy": 1.6919790307680767, "epoch": 1.667051165856472, "grad_norm": 0.7885094285011292, "learning_rate": 3.2778585261714925e-06, "loss": 1.6047, "mean_token_accuracy": 0.6392913907766342, "num_tokens": 2546235675.0, "step": 15175 }, { "entropy": 1.660687933365504, "epoch": 1.6671610227678448, "grad_norm": 0.7216572761535645, "learning_rate": 3.2770362711029226e-06, "loss": 1.4412, "mean_token_accuracy": 0.6594575295845667, "num_tokens": 2546375840.0, "step": 15176 }, { "entropy": 1.6957957843939464, "epoch": 1.6672708796792177, "grad_norm": 0.788975715637207, "learning_rate": 3.2762142604626724e-06, "loss": 1.4064, "mean_token_accuracy": 0.6570547719796499, "num_tokens": 2546509636.0, "step": 15177 }, { "entropy": 1.67244353890419, "epoch": 1.6673807365905908, "grad_norm": 0.731098473072052, "learning_rate": 3.2753924942767647e-06, "loss": 1.3241, "mean_token_accuracy": 0.6744396587212881, "num_tokens": 2546682886.0, "step": 15178 }, { "entropy": 1.6986753741900127, "epoch": 1.6674905935019637, "grad_norm": 0.8727912902832031, "learning_rate": 3.2745709725712027e-06, "loss": 1.2156, "mean_token_accuracy": 0.6811005771160126, "num_tokens": 2546823461.0, "step": 15179 }, { "entropy": 1.697281688451767, "epoch": 1.6676004504133366, "grad_norm": 0.6751629710197449, "learning_rate": 3.273749695371986e-06, "loss": 1.3449, "mean_token_accuracy": 0.6630785216887792, "num_tokens": 2547036887.0, "step": 15180 }, { "entropy": 1.752416580915451, "epoch": 1.6677103073247097, "grad_norm": 0.6552797555923462, "learning_rate": 3.2729286627051126e-06, "loss": 1.3898, "mean_token_accuracy": 0.6616188089052836, "num_tokens": 2547192022.0, "step": 15181 }, { "entropy": 1.7320577601591747, "epoch": 1.6678201642360824, "grad_norm": 0.7161309719085693, "learning_rate": 3.2721078745965653e-06, "loss": 1.5004, "mean_token_accuracy": 0.6625331242879232, "num_tokens": 2547364976.0, "step": 15182 }, { "entropy": 1.7012326021989186, "epoch": 1.6679300211474555, "grad_norm": 1.3153455257415771, "learning_rate": 3.2712873310723186e-06, "loss": 1.0569, "mean_token_accuracy": 0.6787421902020773, "num_tokens": 2547535164.0, "step": 15183 }, { "entropy": 1.6177492539087932, "epoch": 1.6680398780588284, "grad_norm": 1.2736519575119019, "learning_rate": 3.2704670321583474e-06, "loss": 1.2213, "mean_token_accuracy": 0.6746558050314585, "num_tokens": 2547762852.0, "step": 15184 }, { "entropy": 1.7297306557496388, "epoch": 1.6681497349702012, "grad_norm": 0.8100583553314209, "learning_rate": 3.2696469778806102e-06, "loss": 1.369, "mean_token_accuracy": 0.660000408689181, "num_tokens": 2547894035.0, "step": 15185 }, { "entropy": 1.631914883852005, "epoch": 1.6682595918815744, "grad_norm": 0.7101684212684631, "learning_rate": 3.2688271682650652e-06, "loss": 1.2938, "mean_token_accuracy": 0.6753945598999659, "num_tokens": 2548003771.0, "step": 15186 }, { "entropy": 1.6929580171902974, "epoch": 1.6683694487929472, "grad_norm": 0.6653352379798889, "learning_rate": 3.268007603337655e-06, "loss": 1.5302, "mean_token_accuracy": 0.6348066478967667, "num_tokens": 2548213469.0, "step": 15187 }, { "entropy": 1.6800095836321514, "epoch": 1.6684793057043201, "grad_norm": 0.6623151302337646, "learning_rate": 3.2671882831243192e-06, "loss": 1.3365, "mean_token_accuracy": 0.6838184396425883, "num_tokens": 2548375660.0, "step": 15188 }, { "entropy": 1.735003262758255, "epoch": 1.668589162615693, "grad_norm": 0.5957344770431519, "learning_rate": 3.26636920765099e-06, "loss": 1.4901, "mean_token_accuracy": 0.633953258395195, "num_tokens": 2548619091.0, "step": 15189 }, { "entropy": 1.7986479699611664, "epoch": 1.668699019527066, "grad_norm": 0.8299116492271423, "learning_rate": 3.2655503769435914e-06, "loss": 1.7051, "mean_token_accuracy": 0.6299788852532705, "num_tokens": 2548760586.0, "step": 15190 }, { "entropy": 1.7730123003323872, "epoch": 1.668808876438439, "grad_norm": 0.7321978211402893, "learning_rate": 3.2647317910280394e-06, "loss": 1.5606, "mean_token_accuracy": 0.647643451889356, "num_tokens": 2548927359.0, "step": 15191 }, { "entropy": 1.625301976998647, "epoch": 1.668918733349812, "grad_norm": 0.6785169243812561, "learning_rate": 3.2639134499302376e-06, "loss": 1.382, "mean_token_accuracy": 0.6625783642133077, "num_tokens": 2549121709.0, "step": 15192 }, { "entropy": 1.7081403533617656, "epoch": 1.6690285902611848, "grad_norm": 0.808611273765564, "learning_rate": 3.2630953536760912e-06, "loss": 1.5018, "mean_token_accuracy": 0.657961055636406, "num_tokens": 2549298184.0, "step": 15193 }, { "entropy": 1.723660518725713, "epoch": 1.6691384471725579, "grad_norm": 0.7684034705162048, "learning_rate": 3.2622775022914916e-06, "loss": 1.5894, "mean_token_accuracy": 0.6327784558137258, "num_tokens": 2549481832.0, "step": 15194 }, { "entropy": 1.7609472672144573, "epoch": 1.6692483040839305, "grad_norm": 0.7250325083732605, "learning_rate": 3.2614598958023197e-06, "loss": 1.4629, "mean_token_accuracy": 0.6485139379898707, "num_tokens": 2549642631.0, "step": 15195 }, { "entropy": 1.678977221250534, "epoch": 1.6693581609953037, "grad_norm": 0.6951817870140076, "learning_rate": 3.2606425342344563e-06, "loss": 1.3847, "mean_token_accuracy": 0.6604787260293961, "num_tokens": 2549822114.0, "step": 15196 }, { "entropy": 1.6499019463857014, "epoch": 1.6694680179066765, "grad_norm": 0.7327429056167603, "learning_rate": 3.259825417613768e-06, "loss": 1.4187, "mean_token_accuracy": 0.6621517539024353, "num_tokens": 2550005790.0, "step": 15197 }, { "entropy": 1.6978709896405537, "epoch": 1.6695778748180494, "grad_norm": 0.637737512588501, "learning_rate": 3.259008545966119e-06, "loss": 1.4359, "mean_token_accuracy": 0.6585992376009623, "num_tokens": 2550179351.0, "step": 15198 }, { "entropy": 1.7029849688212078, "epoch": 1.6696877317294225, "grad_norm": 0.6902602314949036, "learning_rate": 3.2581919193173617e-06, "loss": 1.4087, "mean_token_accuracy": 0.6876400311787924, "num_tokens": 2550325763.0, "step": 15199 }, { "entropy": 1.7197512686252594, "epoch": 1.6697975886407954, "grad_norm": 0.7032921314239502, "learning_rate": 3.25737553769334e-06, "loss": 1.3647, "mean_token_accuracy": 0.6540055871009827, "num_tokens": 2550453496.0, "step": 15200 }, { "entropy": 1.6929913659890492, "epoch": 1.6699074455521683, "grad_norm": 0.7520516514778137, "learning_rate": 3.2565594011198927e-06, "loss": 1.3464, "mean_token_accuracy": 0.6615286866823832, "num_tokens": 2550611853.0, "step": 15201 }, { "entropy": 1.782798061768214, "epoch": 1.6700173024635412, "grad_norm": 0.7003825306892395, "learning_rate": 3.255743509622854e-06, "loss": 1.4342, "mean_token_accuracy": 0.6429455975691477, "num_tokens": 2550777852.0, "step": 15202 }, { "entropy": 1.7199705839157104, "epoch": 1.670127159374914, "grad_norm": 0.6969501376152039, "learning_rate": 3.2549278632280428e-06, "loss": 1.3403, "mean_token_accuracy": 0.650229757030805, "num_tokens": 2550953345.0, "step": 15203 }, { "entropy": 1.7010493278503418, "epoch": 1.6702370162862872, "grad_norm": 0.8231012225151062, "learning_rate": 3.254112461961273e-06, "loss": 1.5104, "mean_token_accuracy": 0.6525203734636307, "num_tokens": 2551164319.0, "step": 15204 }, { "entropy": 1.7223340173562367, "epoch": 1.67034687319766, "grad_norm": 0.5782705545425415, "learning_rate": 3.2532973058483557e-06, "loss": 1.4939, "mean_token_accuracy": 0.6309877087672552, "num_tokens": 2551397512.0, "step": 15205 }, { "entropy": 1.6429968476295471, "epoch": 1.670456730109033, "grad_norm": 0.7078797817230225, "learning_rate": 3.2524823949150875e-06, "loss": 1.2195, "mean_token_accuracy": 0.6770479083061218, "num_tokens": 2551523040.0, "step": 15206 }, { "entropy": 1.646560360987981, "epoch": 1.670566587020406, "grad_norm": 0.6111953854560852, "learning_rate": 3.2516677291872577e-06, "loss": 1.4736, "mean_token_accuracy": 0.6481401324272156, "num_tokens": 2551756130.0, "step": 15207 }, { "entropy": 1.685501754283905, "epoch": 1.6706764439317787, "grad_norm": 0.6546112298965454, "learning_rate": 3.250853308690657e-06, "loss": 1.4007, "mean_token_accuracy": 0.6448431412378947, "num_tokens": 2551921706.0, "step": 15208 }, { "entropy": 1.6584607859452565, "epoch": 1.6707863008431518, "grad_norm": 0.6745330095291138, "learning_rate": 3.250039133451054e-06, "loss": 1.3448, "mean_token_accuracy": 0.6620303889115652, "num_tokens": 2552094248.0, "step": 15209 }, { "entropy": 1.7307999233404796, "epoch": 1.6708961577545247, "grad_norm": 0.7921266555786133, "learning_rate": 3.249225203494221e-06, "loss": 1.3666, "mean_token_accuracy": 0.6684642732143402, "num_tokens": 2552225219.0, "step": 15210 }, { "entropy": 1.610274225473404, "epoch": 1.6710060146658976, "grad_norm": 0.6567273736000061, "learning_rate": 3.2484115188459197e-06, "loss": 1.3422, "mean_token_accuracy": 0.6564729809761047, "num_tokens": 2552450742.0, "step": 15211 }, { "entropy": 1.6868870158990223, "epoch": 1.6711158715772707, "grad_norm": 0.665790319442749, "learning_rate": 3.2475980795318977e-06, "loss": 1.2995, "mean_token_accuracy": 0.6727713098128637, "num_tokens": 2552570748.0, "step": 15212 }, { "entropy": 1.724908987681071, "epoch": 1.6712257284886436, "grad_norm": 0.8795796632766724, "learning_rate": 3.246784885577903e-06, "loss": 1.3513, "mean_token_accuracy": 0.6731296479701996, "num_tokens": 2552718100.0, "step": 15213 }, { "entropy": 1.6999436517556508, "epoch": 1.6713355854000165, "grad_norm": 0.6800674200057983, "learning_rate": 3.2459719370096783e-06, "loss": 1.4395, "mean_token_accuracy": 0.6620732347170512, "num_tokens": 2552898022.0, "step": 15214 }, { "entropy": 1.709017237027486, "epoch": 1.6714454423113894, "grad_norm": 0.76900714635849, "learning_rate": 3.2451592338529424e-06, "loss": 1.3666, "mean_token_accuracy": 0.6669487059116364, "num_tokens": 2553055703.0, "step": 15215 }, { "entropy": 1.704631100098292, "epoch": 1.6715552992227622, "grad_norm": 0.6997295618057251, "learning_rate": 3.2443467761334236e-06, "loss": 1.2892, "mean_token_accuracy": 0.6640374610821406, "num_tokens": 2553189498.0, "step": 15216 }, { "entropy": 1.7368600467840831, "epoch": 1.6716651561341354, "grad_norm": 0.6645811200141907, "learning_rate": 3.243534563876835e-06, "loss": 1.5858, "mean_token_accuracy": 0.633993665377299, "num_tokens": 2553385158.0, "step": 15217 }, { "entropy": 1.721531867980957, "epoch": 1.6717750130455082, "grad_norm": 0.6490238904953003, "learning_rate": 3.242722597108883e-06, "loss": 1.4305, "mean_token_accuracy": 0.6537267516056696, "num_tokens": 2553552812.0, "step": 15218 }, { "entropy": 1.7109013696511586, "epoch": 1.6718848699568811, "grad_norm": 0.717147171497345, "learning_rate": 3.241910875855263e-06, "loss": 1.508, "mean_token_accuracy": 0.6526562124490738, "num_tokens": 2553756616.0, "step": 15219 }, { "entropy": 1.7662996451059978, "epoch": 1.6719947268682542, "grad_norm": 0.7439054250717163, "learning_rate": 3.2410994001416706e-06, "loss": 1.5202, "mean_token_accuracy": 0.6419854611158371, "num_tokens": 2553904412.0, "step": 15220 }, { "entropy": 1.7582280735174816, "epoch": 1.672104583779627, "grad_norm": 0.6127282381057739, "learning_rate": 3.240288169993784e-06, "loss": 1.3346, "mean_token_accuracy": 0.6562148282925288, "num_tokens": 2554077430.0, "step": 15221 }, { "entropy": 1.7364347378412883, "epoch": 1.672214440691, "grad_norm": 0.6855073571205139, "learning_rate": 3.239477185437281e-06, "loss": 1.3535, "mean_token_accuracy": 0.6602631757656733, "num_tokens": 2554224197.0, "step": 15222 }, { "entropy": 1.7275860210259755, "epoch": 1.6723242976023729, "grad_norm": 0.643002986907959, "learning_rate": 3.238666446497829e-06, "loss": 1.4968, "mean_token_accuracy": 0.6405756970246633, "num_tokens": 2554382135.0, "step": 15223 }, { "entropy": 1.766138106584549, "epoch": 1.6724341545137458, "grad_norm": 0.7326632738113403, "learning_rate": 3.2378559532010858e-06, "loss": 1.3671, "mean_token_accuracy": 0.666873628894488, "num_tokens": 2554520361.0, "step": 15224 }, { "entropy": 1.687092532714208, "epoch": 1.6725440114251189, "grad_norm": 0.6920173168182373, "learning_rate": 3.2370457055727046e-06, "loss": 1.4491, "mean_token_accuracy": 0.6508265684048334, "num_tokens": 2554726838.0, "step": 15225 }, { "entropy": 1.7336925466855366, "epoch": 1.6726538683364918, "grad_norm": 0.6075726747512817, "learning_rate": 3.2362357036383283e-06, "loss": 1.4406, "mean_token_accuracy": 0.64825872083505, "num_tokens": 2554943464.0, "step": 15226 }, { "entropy": 1.7321422894795735, "epoch": 1.6727637252478647, "grad_norm": 0.658340334892273, "learning_rate": 3.235425947423592e-06, "loss": 1.3172, "mean_token_accuracy": 0.6706226418415705, "num_tokens": 2555094800.0, "step": 15227 }, { "entropy": 1.6634202202161152, "epoch": 1.6728735821592375, "grad_norm": 0.6695008277893066, "learning_rate": 3.234616436954128e-06, "loss": 1.509, "mean_token_accuracy": 0.6520940413077673, "num_tokens": 2555259787.0, "step": 15228 }, { "entropy": 1.6890574097633362, "epoch": 1.6729834390706104, "grad_norm": 0.5829101204872131, "learning_rate": 3.233807172255552e-06, "loss": 1.3955, "mean_token_accuracy": 0.6553893884023031, "num_tokens": 2555477181.0, "step": 15229 }, { "entropy": 1.6599874198436737, "epoch": 1.6730932959819835, "grad_norm": 0.6483899354934692, "learning_rate": 3.2329981533534814e-06, "loss": 1.4103, "mean_token_accuracy": 0.6571273605028788, "num_tokens": 2555666070.0, "step": 15230 }, { "entropy": 1.7212112446626027, "epoch": 1.6732031528933564, "grad_norm": 0.9130486249923706, "learning_rate": 3.23218938027352e-06, "loss": 1.3511, "mean_token_accuracy": 0.6648656080166498, "num_tokens": 2555817850.0, "step": 15231 }, { "entropy": 1.703871637582779, "epoch": 1.6733130098047293, "grad_norm": 0.6791821122169495, "learning_rate": 3.2313808530412628e-06, "loss": 1.2272, "mean_token_accuracy": 0.6739940742651621, "num_tokens": 2555973386.0, "step": 15232 }, { "entropy": 1.6598846117655437, "epoch": 1.6734228667161024, "grad_norm": 0.6614598035812378, "learning_rate": 3.2305725716823005e-06, "loss": 1.385, "mean_token_accuracy": 0.6631904939810435, "num_tokens": 2556157643.0, "step": 15233 }, { "entropy": 1.6710762580235798, "epoch": 1.673532723627475, "grad_norm": 0.6756139993667603, "learning_rate": 3.2297645362222175e-06, "loss": 1.3222, "mean_token_accuracy": 0.6683625727891922, "num_tokens": 2556365206.0, "step": 15234 }, { "entropy": 1.6408987541993458, "epoch": 1.6736425805388482, "grad_norm": 0.5655611753463745, "learning_rate": 3.2289567466865858e-06, "loss": 1.3738, "mean_token_accuracy": 0.6523148367802302, "num_tokens": 2556553810.0, "step": 15235 }, { "entropy": 1.6711041033267975, "epoch": 1.673752437450221, "grad_norm": 0.6098411679267883, "learning_rate": 3.228149203100968e-06, "loss": 1.3861, "mean_token_accuracy": 0.653891901175181, "num_tokens": 2556744619.0, "step": 15236 }, { "entropy": 1.6401523053646088, "epoch": 1.673862294361594, "grad_norm": 0.7218595743179321, "learning_rate": 3.2273419054909283e-06, "loss": 1.3142, "mean_token_accuracy": 0.6689208696285883, "num_tokens": 2556943085.0, "step": 15237 }, { "entropy": 1.7597449918588002, "epoch": 1.673972151272967, "grad_norm": 0.8478575944900513, "learning_rate": 3.226534853882015e-06, "loss": 1.5462, "mean_token_accuracy": 0.6484808673461279, "num_tokens": 2557088817.0, "step": 15238 }, { "entropy": 1.6971666316191356, "epoch": 1.67408200818434, "grad_norm": 0.6962342262268066, "learning_rate": 3.225728048299769e-06, "loss": 1.4707, "mean_token_accuracy": 0.6557254840930303, "num_tokens": 2557283065.0, "step": 15239 }, { "entropy": 1.6427591145038605, "epoch": 1.6741918650957128, "grad_norm": 0.6922114491462708, "learning_rate": 3.22492148876973e-06, "loss": 1.2838, "mean_token_accuracy": 0.680359830458959, "num_tokens": 2557411768.0, "step": 15240 }, { "entropy": 1.712716003259023, "epoch": 1.674301722007086, "grad_norm": 0.6969720125198364, "learning_rate": 3.22411517531742e-06, "loss": 1.2368, "mean_token_accuracy": 0.6725062231222788, "num_tokens": 2557545725.0, "step": 15241 }, { "entropy": 1.71072651942571, "epoch": 1.6744115789184586, "grad_norm": 0.5817194581031799, "learning_rate": 3.2233091079683613e-06, "loss": 1.3838, "mean_token_accuracy": 0.6509876201550165, "num_tokens": 2557714503.0, "step": 15242 }, { "entropy": 1.7713170647621155, "epoch": 1.6745214358298317, "grad_norm": 0.6657090783119202, "learning_rate": 3.2225032867480664e-06, "loss": 1.3812, "mean_token_accuracy": 0.6571053812901179, "num_tokens": 2557874292.0, "step": 15243 }, { "entropy": 1.6696379979451497, "epoch": 1.6746312927412046, "grad_norm": 0.6969082355499268, "learning_rate": 3.2216977116820354e-06, "loss": 1.2049, "mean_token_accuracy": 0.6869342774152756, "num_tokens": 2558040611.0, "step": 15244 }, { "entropy": 1.718798081080119, "epoch": 1.6747411496525775, "grad_norm": 0.7062221765518188, "learning_rate": 3.2208923827957668e-06, "loss": 1.3801, "mean_token_accuracy": 0.6560978144407272, "num_tokens": 2558170281.0, "step": 15245 }, { "entropy": 1.709840973218282, "epoch": 1.6748510065639506, "grad_norm": 0.836525022983551, "learning_rate": 3.2200873001147513e-06, "loss": 1.6282, "mean_token_accuracy": 0.6493220552802086, "num_tokens": 2558354120.0, "step": 15246 }, { "entropy": 1.7364205320676167, "epoch": 1.6749608634753232, "grad_norm": 0.6105183362960815, "learning_rate": 3.219282463664467e-06, "loss": 1.5489, "mean_token_accuracy": 0.6435059358676275, "num_tokens": 2558563661.0, "step": 15247 }, { "entropy": 1.7640726168950398, "epoch": 1.6750707203866964, "grad_norm": 0.736595869064331, "learning_rate": 3.2184778734703848e-06, "loss": 1.2694, "mean_token_accuracy": 0.667763814330101, "num_tokens": 2558661391.0, "step": 15248 }, { "entropy": 1.6720350682735443, "epoch": 1.6751805772980692, "grad_norm": 0.6239567399024963, "learning_rate": 3.217673529557973e-06, "loss": 1.3413, "mean_token_accuracy": 0.6550045510133108, "num_tokens": 2558840998.0, "step": 15249 }, { "entropy": 1.6599902311960857, "epoch": 1.6752904342094421, "grad_norm": 0.6722832918167114, "learning_rate": 3.216869431952688e-06, "loss": 1.2589, "mean_token_accuracy": 0.6672548999389013, "num_tokens": 2558963126.0, "step": 15250 }, { "entropy": 1.6761441230773926, "epoch": 1.6754002911208152, "grad_norm": 0.8985497355461121, "learning_rate": 3.2160655806799744e-06, "loss": 1.259, "mean_token_accuracy": 0.675809289018313, "num_tokens": 2559119390.0, "step": 15251 }, { "entropy": 1.717965970436732, "epoch": 1.6755101480321881, "grad_norm": 0.6145942211151123, "learning_rate": 3.2152619757652813e-06, "loss": 1.4449, "mean_token_accuracy": 0.6425779561201731, "num_tokens": 2559316872.0, "step": 15252 }, { "entropy": 1.6946504712104797, "epoch": 1.675620004943561, "grad_norm": 0.6808292865753174, "learning_rate": 3.2144586172340365e-06, "loss": 1.3685, "mean_token_accuracy": 0.6554179340600967, "num_tokens": 2559459695.0, "step": 15253 }, { "entropy": 1.6927851835886638, "epoch": 1.675729861854934, "grad_norm": 0.7276740074157715, "learning_rate": 3.2136555051116704e-06, "loss": 1.418, "mean_token_accuracy": 0.679698646068573, "num_tokens": 2559642979.0, "step": 15254 }, { "entropy": 1.6810623904069264, "epoch": 1.6758397187663068, "grad_norm": 0.6984881162643433, "learning_rate": 3.2128526394235982e-06, "loss": 1.2343, "mean_token_accuracy": 0.6779455641905466, "num_tokens": 2559759977.0, "step": 15255 }, { "entropy": 1.6839697062969208, "epoch": 1.6759495756776799, "grad_norm": 0.7221273183822632, "learning_rate": 3.2120500201952298e-06, "loss": 1.339, "mean_token_accuracy": 0.6567999372879664, "num_tokens": 2559902932.0, "step": 15256 }, { "entropy": 1.6946297883987427, "epoch": 1.6760594325890528, "grad_norm": 0.778820812702179, "learning_rate": 3.2112476474519683e-06, "loss": 1.3138, "mean_token_accuracy": 0.6709027737379074, "num_tokens": 2560025897.0, "step": 15257 }, { "entropy": 1.750508725643158, "epoch": 1.6761692895004257, "grad_norm": 0.7278999090194702, "learning_rate": 3.2104455212192113e-06, "loss": 1.3121, "mean_token_accuracy": 0.6603905359903971, "num_tokens": 2560135481.0, "step": 15258 }, { "entropy": 1.7683274547259014, "epoch": 1.6762791464117988, "grad_norm": 0.8146536350250244, "learning_rate": 3.209643641522343e-06, "loss": 1.4074, "mean_token_accuracy": 0.6663754433393478, "num_tokens": 2560275025.0, "step": 15259 }, { "entropy": 1.74715722600619, "epoch": 1.6763890033231714, "grad_norm": 0.7501146197319031, "learning_rate": 3.208842008386742e-06, "loss": 1.4665, "mean_token_accuracy": 0.6487719466288885, "num_tokens": 2560474451.0, "step": 15260 }, { "entropy": 1.6942894756793976, "epoch": 1.6764988602345445, "grad_norm": 0.5945084691047668, "learning_rate": 3.2080406218377824e-06, "loss": 1.3198, "mean_token_accuracy": 0.6598286330699921, "num_tokens": 2560668942.0, "step": 15261 }, { "entropy": 1.6762113670508068, "epoch": 1.6766087171459174, "grad_norm": 0.7053788304328918, "learning_rate": 3.2072394819008263e-06, "loss": 1.2167, "mean_token_accuracy": 0.6785010149081548, "num_tokens": 2560840956.0, "step": 15262 }, { "entropy": 1.7425096035003662, "epoch": 1.6767185740572903, "grad_norm": 0.7079997062683105, "learning_rate": 3.2064385886012254e-06, "loss": 1.4733, "mean_token_accuracy": 0.6649157653252283, "num_tokens": 2560988485.0, "step": 15263 }, { "entropy": 1.6938765247662861, "epoch": 1.6768284309686634, "grad_norm": 0.6834542751312256, "learning_rate": 3.2056379419643353e-06, "loss": 1.4556, "mean_token_accuracy": 0.6481647590796152, "num_tokens": 2561196564.0, "step": 15264 }, { "entropy": 1.642237663269043, "epoch": 1.6769382878800363, "grad_norm": 0.6120626330375671, "learning_rate": 3.2048375420154887e-06, "loss": 1.2315, "mean_token_accuracy": 0.6777461071809133, "num_tokens": 2561331437.0, "step": 15265 }, { "entropy": 1.7010113994280498, "epoch": 1.6770481447914092, "grad_norm": 0.7019853591918945, "learning_rate": 3.204037388780025e-06, "loss": 1.3454, "mean_token_accuracy": 0.6715318908294042, "num_tokens": 2561528048.0, "step": 15266 }, { "entropy": 1.6710281074047089, "epoch": 1.6771580017027823, "grad_norm": 0.819465696811676, "learning_rate": 3.2032374822832634e-06, "loss": 1.3401, "mean_token_accuracy": 0.6738651841878891, "num_tokens": 2561685491.0, "step": 15267 }, { "entropy": 1.800257682800293, "epoch": 1.677267858614155, "grad_norm": 0.5872465372085571, "learning_rate": 3.2024378225505204e-06, "loss": 1.4982, "mean_token_accuracy": 0.6357658604780833, "num_tokens": 2561911809.0, "step": 15268 }, { "entropy": 1.712759256362915, "epoch": 1.677377715525528, "grad_norm": 0.7056664824485779, "learning_rate": 3.201638409607106e-06, "loss": 1.4008, "mean_token_accuracy": 0.6417889843384424, "num_tokens": 2562124310.0, "step": 15269 }, { "entropy": 1.6932222247123718, "epoch": 1.677487572436901, "grad_norm": 0.6303336024284363, "learning_rate": 3.2008392434783264e-06, "loss": 1.4301, "mean_token_accuracy": 0.6475923210382462, "num_tokens": 2562279988.0, "step": 15270 }, { "entropy": 1.6386590401331584, "epoch": 1.6775974293482738, "grad_norm": 0.657673716545105, "learning_rate": 3.2000403241894686e-06, "loss": 1.3441, "mean_token_accuracy": 0.6659832795461019, "num_tokens": 2562444547.0, "step": 15271 }, { "entropy": 1.6764297584692638, "epoch": 1.677707286259647, "grad_norm": 0.7012315988540649, "learning_rate": 3.1992416517658175e-06, "loss": 1.3601, "mean_token_accuracy": 0.6649115979671478, "num_tokens": 2562607824.0, "step": 15272 }, { "entropy": 1.7825362384319305, "epoch": 1.6778171431710196, "grad_norm": 0.7959426641464233, "learning_rate": 3.198443226232656e-06, "loss": 1.3882, "mean_token_accuracy": 0.6545184900363287, "num_tokens": 2562779514.0, "step": 15273 }, { "entropy": 1.7837129334608715, "epoch": 1.6779270000823927, "grad_norm": 0.8002263307571411, "learning_rate": 3.1976450476152506e-06, "loss": 1.4926, "mean_token_accuracy": 0.6270763973395029, "num_tokens": 2562968133.0, "step": 15274 }, { "entropy": 1.6454266607761383, "epoch": 1.6780368569937656, "grad_norm": 0.6544117331504822, "learning_rate": 3.19684711593886e-06, "loss": 1.2687, "mean_token_accuracy": 0.6670517573753992, "num_tokens": 2563129301.0, "step": 15275 }, { "entropy": 1.7555846671263378, "epoch": 1.6781467139051385, "grad_norm": 0.6647533774375916, "learning_rate": 3.196049431228746e-06, "loss": 1.5171, "mean_token_accuracy": 0.6388434370358785, "num_tokens": 2563328620.0, "step": 15276 }, { "entropy": 1.6958951950073242, "epoch": 1.6782565708165116, "grad_norm": 0.7346828579902649, "learning_rate": 3.195251993510149e-06, "loss": 1.286, "mean_token_accuracy": 0.6770187467336655, "num_tokens": 2563453949.0, "step": 15277 }, { "entropy": 1.7482584714889526, "epoch": 1.6783664277278845, "grad_norm": 0.7439913153648376, "learning_rate": 3.194454802808311e-06, "loss": 1.4923, "mean_token_accuracy": 0.6445967058340708, "num_tokens": 2563620845.0, "step": 15278 }, { "entropy": 1.7300353248914082, "epoch": 1.6784762846392574, "grad_norm": 0.6056285500526428, "learning_rate": 3.193657859148461e-06, "loss": 1.5211, "mean_token_accuracy": 0.6319058835506439, "num_tokens": 2563832970.0, "step": 15279 }, { "entropy": 1.7123978634675343, "epoch": 1.6785861415506305, "grad_norm": 0.6761077642440796, "learning_rate": 3.19286116255582e-06, "loss": 1.4335, "mean_token_accuracy": 0.6501255333423615, "num_tokens": 2563991103.0, "step": 15280 }, { "entropy": 1.7233157257239025, "epoch": 1.6786959984620031, "grad_norm": 0.6616033911705017, "learning_rate": 3.192064713055606e-06, "loss": 1.3993, "mean_token_accuracy": 0.6481630504131317, "num_tokens": 2564169608.0, "step": 15281 }, { "entropy": 1.7163086732228596, "epoch": 1.6788058553733762, "grad_norm": 0.7168435454368591, "learning_rate": 3.191268510673027e-06, "loss": 1.3552, "mean_token_accuracy": 0.6583843231201172, "num_tokens": 2564312562.0, "step": 15282 }, { "entropy": 1.7421314418315887, "epoch": 1.6789157122847491, "grad_norm": 1.4026530981063843, "learning_rate": 3.1904725554332805e-06, "loss": 1.2168, "mean_token_accuracy": 0.674383873740832, "num_tokens": 2564501041.0, "step": 15283 }, { "entropy": 1.7259081999460857, "epoch": 1.679025569196122, "grad_norm": 0.7258220314979553, "learning_rate": 3.189676847361559e-06, "loss": 1.3833, "mean_token_accuracy": 0.6572358012199402, "num_tokens": 2564696992.0, "step": 15284 }, { "entropy": 1.7746508121490479, "epoch": 1.679135426107495, "grad_norm": 0.7986940145492554, "learning_rate": 3.1888813864830435e-06, "loss": 1.3888, "mean_token_accuracy": 0.6498429874579111, "num_tokens": 2564836244.0, "step": 15285 }, { "entropy": 1.699170559644699, "epoch": 1.6792452830188678, "grad_norm": 0.7032765746116638, "learning_rate": 3.1880861728229152e-06, "loss": 1.2493, "mean_token_accuracy": 0.6778079668680826, "num_tokens": 2564971115.0, "step": 15286 }, { "entropy": 1.6739700535933177, "epoch": 1.6793551399302409, "grad_norm": 0.7897632122039795, "learning_rate": 3.1872912064063387e-06, "loss": 1.461, "mean_token_accuracy": 0.6509335339069366, "num_tokens": 2565134895.0, "step": 15287 }, { "entropy": 1.6778443853060405, "epoch": 1.6794649968416138, "grad_norm": 0.6658824682235718, "learning_rate": 3.186496487258474e-06, "loss": 1.3738, "mean_token_accuracy": 0.6780295670032501, "num_tokens": 2565302480.0, "step": 15288 }, { "entropy": 1.7276048461596172, "epoch": 1.6795748537529867, "grad_norm": 0.6688079237937927, "learning_rate": 3.185702015404474e-06, "loss": 1.3869, "mean_token_accuracy": 0.6666077673435211, "num_tokens": 2565448467.0, "step": 15289 }, { "entropy": 1.7064704895019531, "epoch": 1.6796847106643598, "grad_norm": 0.674453854560852, "learning_rate": 3.184907790869486e-06, "loss": 1.2915, "mean_token_accuracy": 0.6831353803475698, "num_tokens": 2565628460.0, "step": 15290 }, { "entropy": 1.7536171277364094, "epoch": 1.6797945675757326, "grad_norm": 0.7042247653007507, "learning_rate": 3.184113813678644e-06, "loss": 1.5146, "mean_token_accuracy": 0.662187417348226, "num_tokens": 2565782360.0, "step": 15291 }, { "entropy": 1.7017661929130554, "epoch": 1.6799044244871055, "grad_norm": 0.6648768186569214, "learning_rate": 3.183320083857076e-06, "loss": 1.3611, "mean_token_accuracy": 0.6698134889205297, "num_tokens": 2565974485.0, "step": 15292 }, { "entropy": 1.6317310432593028, "epoch": 1.6800142813984786, "grad_norm": 0.6019257307052612, "learning_rate": 3.1825266014299085e-06, "loss": 1.3964, "mean_token_accuracy": 0.6603737771511078, "num_tokens": 2566150672.0, "step": 15293 }, { "entropy": 1.6843983232975006, "epoch": 1.6801241383098513, "grad_norm": 0.751380205154419, "learning_rate": 3.1817333664222507e-06, "loss": 1.42, "mean_token_accuracy": 0.651827389995257, "num_tokens": 2566345461.0, "step": 15294 }, { "entropy": 1.7201267182826996, "epoch": 1.6802339952212244, "grad_norm": 0.7158882021903992, "learning_rate": 3.1809403788592066e-06, "loss": 1.3936, "mean_token_accuracy": 0.6722188790639242, "num_tokens": 2566536727.0, "step": 15295 }, { "entropy": 1.7150601148605347, "epoch": 1.6803438521325973, "grad_norm": 0.6860437989234924, "learning_rate": 3.180147638765878e-06, "loss": 1.5585, "mean_token_accuracy": 0.6306335628032684, "num_tokens": 2566726509.0, "step": 15296 }, { "entropy": 1.7363272806008656, "epoch": 1.6804537090439702, "grad_norm": 0.7342987656593323, "learning_rate": 3.179355146167351e-06, "loss": 1.1886, "mean_token_accuracy": 0.6802386889855067, "num_tokens": 2566841914.0, "step": 15297 }, { "entropy": 1.703192909558614, "epoch": 1.6805635659553433, "grad_norm": 0.6351720690727234, "learning_rate": 3.178562901088712e-06, "loss": 1.2872, "mean_token_accuracy": 0.674397294720014, "num_tokens": 2566981790.0, "step": 15298 }, { "entropy": 1.6544977327187855, "epoch": 1.680673422866716, "grad_norm": 0.7234415411949158, "learning_rate": 3.1777709035550318e-06, "loss": 1.3261, "mean_token_accuracy": 0.6573912451664606, "num_tokens": 2567177661.0, "step": 15299 }, { "entropy": 1.6995947659015656, "epoch": 1.680783279778089, "grad_norm": 0.7154074907302856, "learning_rate": 3.1769791535913767e-06, "loss": 1.4392, "mean_token_accuracy": 0.6578214665253957, "num_tokens": 2567339837.0, "step": 15300 }, { "entropy": 1.6951833069324493, "epoch": 1.680893136689462, "grad_norm": 0.6429846286773682, "learning_rate": 3.176187651222806e-06, "loss": 1.5831, "mean_token_accuracy": 0.6406177133321762, "num_tokens": 2567544062.0, "step": 15301 }, { "entropy": 1.7316114902496338, "epoch": 1.6810029936008348, "grad_norm": 0.7316505312919617, "learning_rate": 3.175396396474373e-06, "loss": 1.4443, "mean_token_accuracy": 0.6586751093467077, "num_tokens": 2567720570.0, "step": 15302 }, { "entropy": 1.724622756242752, "epoch": 1.681112850512208, "grad_norm": 0.6855919361114502, "learning_rate": 3.174605389371118e-06, "loss": 1.3854, "mean_token_accuracy": 0.6606116443872452, "num_tokens": 2567892992.0, "step": 15303 }, { "entropy": 1.7502660353978474, "epoch": 1.6812227074235808, "grad_norm": 0.6851439476013184, "learning_rate": 3.1738146299380746e-06, "loss": 1.4903, "mean_token_accuracy": 0.6492257912953695, "num_tokens": 2568070857.0, "step": 15304 }, { "entropy": 1.74208668867747, "epoch": 1.6813325643349537, "grad_norm": 0.7176704406738281, "learning_rate": 3.173024118200273e-06, "loss": 1.5042, "mean_token_accuracy": 0.6514915178219477, "num_tokens": 2568241367.0, "step": 15305 }, { "entropy": 1.765565186738968, "epoch": 1.6814424212463268, "grad_norm": 0.6486221551895142, "learning_rate": 3.1722338541827313e-06, "loss": 1.4233, "mean_token_accuracy": 0.6390677789847056, "num_tokens": 2568434347.0, "step": 15306 }, { "entropy": 1.74800306558609, "epoch": 1.6815522781576995, "grad_norm": 0.6911203861236572, "learning_rate": 3.1714438379104583e-06, "loss": 1.5126, "mean_token_accuracy": 0.6483513911565145, "num_tokens": 2568568047.0, "step": 15307 }, { "entropy": 1.6643619934717815, "epoch": 1.6816621350690726, "grad_norm": 0.7120999693870544, "learning_rate": 3.170654069408463e-06, "loss": 1.2547, "mean_token_accuracy": 0.6775770286719004, "num_tokens": 2568718128.0, "step": 15308 }, { "entropy": 1.7326288719971974, "epoch": 1.6817719919804455, "grad_norm": 0.789517343044281, "learning_rate": 3.169864548701736e-06, "loss": 1.4496, "mean_token_accuracy": 0.6548430124918619, "num_tokens": 2568891251.0, "step": 15309 }, { "entropy": 1.7174657980600994, "epoch": 1.6818818488918184, "grad_norm": 0.6772891283035278, "learning_rate": 3.1690752758152697e-06, "loss": 1.4284, "mean_token_accuracy": 0.6426176180442175, "num_tokens": 2569037403.0, "step": 15310 }, { "entropy": 1.6879849930604298, "epoch": 1.6819917058031915, "grad_norm": 0.6050668358802795, "learning_rate": 3.1682862507740425e-06, "loss": 1.4879, "mean_token_accuracy": 0.6514971653620402, "num_tokens": 2569232119.0, "step": 15311 }, { "entropy": 1.7036270002524059, "epoch": 1.6821015627145641, "grad_norm": 0.6641897559165955, "learning_rate": 3.1674974736030233e-06, "loss": 1.3175, "mean_token_accuracy": 0.6694677621126175, "num_tokens": 2569363697.0, "step": 15312 }, { "entropy": 1.6906124949455261, "epoch": 1.6822114196259372, "grad_norm": 0.7731119990348816, "learning_rate": 3.166708944327181e-06, "loss": 1.3484, "mean_token_accuracy": 0.667941133181254, "num_tokens": 2569490915.0, "step": 15313 }, { "entropy": 1.7285043100516002, "epoch": 1.6823212765373101, "grad_norm": 0.747154176235199, "learning_rate": 3.165920662971472e-06, "loss": 1.3582, "mean_token_accuracy": 0.6714018086592356, "num_tokens": 2569599168.0, "step": 15314 }, { "entropy": 1.7480494777361553, "epoch": 1.682431133448683, "grad_norm": 0.7452878952026367, "learning_rate": 3.1651326295608447e-06, "loss": 1.2041, "mean_token_accuracy": 0.6810566087563833, "num_tokens": 2569700113.0, "step": 15315 }, { "entropy": 1.7515860497951508, "epoch": 1.682540990360056, "grad_norm": 0.7225151658058167, "learning_rate": 3.164344844120237e-06, "loss": 1.314, "mean_token_accuracy": 0.6686030477285385, "num_tokens": 2569822465.0, "step": 15316 }, { "entropy": 1.7134381830692291, "epoch": 1.682650847271429, "grad_norm": 0.6826877593994141, "learning_rate": 3.1635573066745855e-06, "loss": 1.4157, "mean_token_accuracy": 0.64534163971742, "num_tokens": 2570027829.0, "step": 15317 }, { "entropy": 1.705380419890086, "epoch": 1.6827607041828019, "grad_norm": 0.6510130167007446, "learning_rate": 3.1627700172488147e-06, "loss": 1.2904, "mean_token_accuracy": 0.6670472820599874, "num_tokens": 2570147721.0, "step": 15318 }, { "entropy": 1.683544745047887, "epoch": 1.682870561094175, "grad_norm": 0.5858747363090515, "learning_rate": 3.1619829758678388e-06, "loss": 1.493, "mean_token_accuracy": 0.6476639409859976, "num_tokens": 2570342162.0, "step": 15319 }, { "entropy": 1.6911889413992565, "epoch": 1.6829804180055477, "grad_norm": 0.8070988059043884, "learning_rate": 3.1611961825565725e-06, "loss": 1.2663, "mean_token_accuracy": 0.6720109234253565, "num_tokens": 2570525734.0, "step": 15320 }, { "entropy": 1.7007411917050679, "epoch": 1.6830902749169208, "grad_norm": 0.6247788071632385, "learning_rate": 3.160409637339913e-06, "loss": 1.417, "mean_token_accuracy": 0.6442168205976486, "num_tokens": 2570720758.0, "step": 15321 }, { "entropy": 1.7167495091756184, "epoch": 1.6832001318282936, "grad_norm": 0.5952068567276001, "learning_rate": 3.159623340242757e-06, "loss": 1.313, "mean_token_accuracy": 0.6723757932583491, "num_tokens": 2570871146.0, "step": 15322 }, { "entropy": 1.747285137573878, "epoch": 1.6833099887396665, "grad_norm": 0.7220256328582764, "learning_rate": 3.158837291289989e-06, "loss": 1.3158, "mean_token_accuracy": 0.6664845049381256, "num_tokens": 2571002515.0, "step": 15323 }, { "entropy": 1.7323359350363414, "epoch": 1.6834198456510396, "grad_norm": 0.841284453868866, "learning_rate": 3.158051490506486e-06, "loss": 1.4707, "mean_token_accuracy": 0.660729338725408, "num_tokens": 2571162161.0, "step": 15324 }, { "entropy": 1.725009063879649, "epoch": 1.6835297025624123, "grad_norm": 0.7810693383216858, "learning_rate": 3.15726593791712e-06, "loss": 1.4043, "mean_token_accuracy": 0.6503799458344778, "num_tokens": 2571297383.0, "step": 15325 }, { "entropy": 1.7619624336560566, "epoch": 1.6836395594737854, "grad_norm": 0.7505675554275513, "learning_rate": 3.1564806335467544e-06, "loss": 1.4133, "mean_token_accuracy": 0.6519313355286916, "num_tokens": 2571464771.0, "step": 15326 }, { "entropy": 1.7304276923338573, "epoch": 1.6837494163851583, "grad_norm": 0.6991888880729675, "learning_rate": 3.1556955774202436e-06, "loss": 1.2079, "mean_token_accuracy": 0.6864756196737289, "num_tokens": 2571579779.0, "step": 15327 }, { "entropy": 1.6945532461007435, "epoch": 1.6838592732965312, "grad_norm": 0.7731361985206604, "learning_rate": 3.154910769562429e-06, "loss": 1.3362, "mean_token_accuracy": 0.6732407162586848, "num_tokens": 2571731395.0, "step": 15328 }, { "entropy": 1.6676548918088276, "epoch": 1.6839691302079043, "grad_norm": 0.6803898215293884, "learning_rate": 3.1541262099981573e-06, "loss": 1.458, "mean_token_accuracy": 0.6517745653788248, "num_tokens": 2571903109.0, "step": 15329 }, { "entropy": 1.7044414083162944, "epoch": 1.6840789871192772, "grad_norm": 0.6813333630561829, "learning_rate": 3.1533418987522547e-06, "loss": 1.4173, "mean_token_accuracy": 0.6529068152109782, "num_tokens": 2572070768.0, "step": 15330 }, { "entropy": 1.690029243628184, "epoch": 1.68418884403065, "grad_norm": 0.6491711735725403, "learning_rate": 3.1525578358495433e-06, "loss": 1.2993, "mean_token_accuracy": 0.6696517119805018, "num_tokens": 2572198958.0, "step": 15331 }, { "entropy": 1.6722242434819539, "epoch": 1.6842987009420232, "grad_norm": 0.7356240749359131, "learning_rate": 3.151774021314842e-06, "loss": 1.319, "mean_token_accuracy": 0.6778273731470108, "num_tokens": 2572348869.0, "step": 15332 }, { "entropy": 1.7420857747395833, "epoch": 1.6844085578533958, "grad_norm": 0.7489916086196899, "learning_rate": 3.1509904551729554e-06, "loss": 1.4856, "mean_token_accuracy": 0.6390324880679449, "num_tokens": 2572512314.0, "step": 15333 }, { "entropy": 1.751690109570821, "epoch": 1.684518414764769, "grad_norm": 0.6961585879325867, "learning_rate": 3.150207137448686e-06, "loss": 1.2745, "mean_token_accuracy": 0.6709187477827072, "num_tokens": 2572666138.0, "step": 15334 }, { "entropy": 1.6826651493708293, "epoch": 1.6846282716761418, "grad_norm": 0.6414405703544617, "learning_rate": 3.149424068166822e-06, "loss": 1.2945, "mean_token_accuracy": 0.6786713004112244, "num_tokens": 2572843000.0, "step": 15335 }, { "entropy": 1.6688839693864186, "epoch": 1.6847381285875147, "grad_norm": 0.8924053907394409, "learning_rate": 3.1486412473521476e-06, "loss": 1.387, "mean_token_accuracy": 0.6693562765916189, "num_tokens": 2572979120.0, "step": 15336 }, { "entropy": 1.7230990827083588, "epoch": 1.6848479854988878, "grad_norm": 0.601993978023529, "learning_rate": 3.14785867502944e-06, "loss": 1.3809, "mean_token_accuracy": 0.6529526164134344, "num_tokens": 2573168637.0, "step": 15337 }, { "entropy": 1.7520112891991932, "epoch": 1.6849578424102605, "grad_norm": 0.5566615462303162, "learning_rate": 3.147076351223469e-06, "loss": 1.4751, "mean_token_accuracy": 0.631900375088056, "num_tokens": 2573388244.0, "step": 15338 }, { "entropy": 1.731001118818919, "epoch": 1.6850676993216336, "grad_norm": 0.7146487236022949, "learning_rate": 3.1462942759589933e-06, "loss": 1.2527, "mean_token_accuracy": 0.6749810228745142, "num_tokens": 2573519678.0, "step": 15339 }, { "entropy": 1.694351961215337, "epoch": 1.6851775562330065, "grad_norm": 0.6235674023628235, "learning_rate": 3.145512449260762e-06, "loss": 1.4673, "mean_token_accuracy": 0.6534475237131119, "num_tokens": 2573695861.0, "step": 15340 }, { "entropy": 1.7240705291430156, "epoch": 1.6852874131443794, "grad_norm": 1.222989797592163, "learning_rate": 3.144730871153525e-06, "loss": 1.5403, "mean_token_accuracy": 0.643691211938858, "num_tokens": 2573907238.0, "step": 15341 }, { "entropy": 1.7270687023798625, "epoch": 1.6853972700557525, "grad_norm": 0.6817310452461243, "learning_rate": 3.1439495416620157e-06, "loss": 1.4433, "mean_token_accuracy": 0.662896732489268, "num_tokens": 2574080197.0, "step": 15342 }, { "entropy": 1.6700053215026855, "epoch": 1.6855071269671253, "grad_norm": 0.6429228186607361, "learning_rate": 3.1431684608109614e-06, "loss": 1.5984, "mean_token_accuracy": 0.6422629406054815, "num_tokens": 2574260989.0, "step": 15343 }, { "entropy": 1.6401211122671764, "epoch": 1.6856169838784982, "grad_norm": 0.5946700572967529, "learning_rate": 3.1423876286250872e-06, "loss": 1.3121, "mean_token_accuracy": 0.6619760394096375, "num_tokens": 2574505321.0, "step": 15344 }, { "entropy": 1.6671649018923442, "epoch": 1.6857268407898713, "grad_norm": 0.8995504975318909, "learning_rate": 3.1416070451291024e-06, "loss": 1.3446, "mean_token_accuracy": 0.6812218924363455, "num_tokens": 2574649743.0, "step": 15345 }, { "entropy": 1.702040175596873, "epoch": 1.685836697701244, "grad_norm": 0.7074987292289734, "learning_rate": 3.140826710347715e-06, "loss": 1.3002, "mean_token_accuracy": 0.6756115506092707, "num_tokens": 2574848047.0, "step": 15346 }, { "entropy": 1.7741004427274067, "epoch": 1.685946554612617, "grad_norm": 0.6643980145454407, "learning_rate": 3.14004662430562e-06, "loss": 1.356, "mean_token_accuracy": 0.6614086826642355, "num_tokens": 2575008827.0, "step": 15347 }, { "entropy": 1.6915812889734905, "epoch": 1.68605641152399, "grad_norm": 0.6701132655143738, "learning_rate": 3.1392667870275066e-06, "loss": 1.4227, "mean_token_accuracy": 0.6473148117462794, "num_tokens": 2575176906.0, "step": 15348 }, { "entropy": 1.7173262635866802, "epoch": 1.6861662684353629, "grad_norm": 0.6805701851844788, "learning_rate": 3.1384871985380582e-06, "loss": 1.4934, "mean_token_accuracy": 0.6477487633625666, "num_tokens": 2575349117.0, "step": 15349 }, { "entropy": 1.7245887120564778, "epoch": 1.686276125346736, "grad_norm": 0.6441610455513, "learning_rate": 3.137707858861947e-06, "loss": 1.2899, "mean_token_accuracy": 0.6831858903169632, "num_tokens": 2575498227.0, "step": 15350 }, { "entropy": 1.722615083058675, "epoch": 1.6863859822581087, "grad_norm": 0.6894484758377075, "learning_rate": 3.1369287680238403e-06, "loss": 1.3521, "mean_token_accuracy": 0.6721992939710617, "num_tokens": 2575690922.0, "step": 15351 }, { "entropy": 1.7229991952578227, "epoch": 1.6864958391694818, "grad_norm": 0.6383141279220581, "learning_rate": 3.1361499260483948e-06, "loss": 1.4355, "mean_token_accuracy": 0.6438505450884501, "num_tokens": 2575897646.0, "step": 15352 }, { "entropy": 1.7160128851731618, "epoch": 1.6866056960808546, "grad_norm": 0.7071347236633301, "learning_rate": 3.13537133296026e-06, "loss": 1.3538, "mean_token_accuracy": 0.6632434278726578, "num_tokens": 2576026434.0, "step": 15353 }, { "entropy": 1.7085239390532176, "epoch": 1.6867155529922275, "grad_norm": 0.7150105237960815, "learning_rate": 3.1345929887840785e-06, "loss": 1.3223, "mean_token_accuracy": 0.6567959388097128, "num_tokens": 2576136070.0, "step": 15354 }, { "entropy": 1.7165654997030895, "epoch": 1.6868254099036006, "grad_norm": 0.7486876845359802, "learning_rate": 3.1338148935444856e-06, "loss": 1.3181, "mean_token_accuracy": 0.6667283674081167, "num_tokens": 2576242543.0, "step": 15355 }, { "entropy": 1.76658500234286, "epoch": 1.6869352668149735, "grad_norm": 0.7848101854324341, "learning_rate": 3.133037047266105e-06, "loss": 1.4248, "mean_token_accuracy": 0.6585915784041086, "num_tokens": 2576381772.0, "step": 15356 }, { "entropy": 1.7358842889467876, "epoch": 1.6870451237263464, "grad_norm": 0.6816839575767517, "learning_rate": 3.1322594499735566e-06, "loss": 1.3862, "mean_token_accuracy": 0.6490218391021093, "num_tokens": 2576531068.0, "step": 15357 }, { "entropy": 1.6990918318430583, "epoch": 1.6871549806377195, "grad_norm": 1.0314568281173706, "learning_rate": 3.1314821016914535e-06, "loss": 1.3518, "mean_token_accuracy": 0.6715274453163147, "num_tokens": 2576653983.0, "step": 15358 }, { "entropy": 1.6669905682404835, "epoch": 1.6872648375490922, "grad_norm": 0.590815007686615, "learning_rate": 3.1307050024443963e-06, "loss": 1.4015, "mean_token_accuracy": 0.6566647191842397, "num_tokens": 2576831940.0, "step": 15359 }, { "entropy": 1.673220157623291, "epoch": 1.6873746944604653, "grad_norm": 0.643791913986206, "learning_rate": 3.129928152256978e-06, "loss": 1.4797, "mean_token_accuracy": 0.6434496690829595, "num_tokens": 2577049426.0, "step": 15360 }, { "entropy": 1.6904392540454865, "epoch": 1.6874845513718382, "grad_norm": 0.5934916138648987, "learning_rate": 3.129151551153789e-06, "loss": 1.5356, "mean_token_accuracy": 0.632567952076594, "num_tokens": 2577254922.0, "step": 15361 }, { "entropy": 1.664735992749532, "epoch": 1.687594408283211, "grad_norm": 0.6659498810768127, "learning_rate": 3.1283751991594064e-06, "loss": 1.3438, "mean_token_accuracy": 0.6631951779127121, "num_tokens": 2577415164.0, "step": 15362 }, { "entropy": 1.719626933336258, "epoch": 1.6877042651945842, "grad_norm": 0.6992260813713074, "learning_rate": 3.1275990962984e-06, "loss": 1.4677, "mean_token_accuracy": 0.6572467486063639, "num_tokens": 2577570986.0, "step": 15363 }, { "entropy": 1.7156515419483185, "epoch": 1.6878141221059568, "grad_norm": 0.6852288842201233, "learning_rate": 3.1268232425953364e-06, "loss": 1.3626, "mean_token_accuracy": 0.6607407828172048, "num_tokens": 2577754997.0, "step": 15364 }, { "entropy": 1.7793689171473186, "epoch": 1.68792397901733, "grad_norm": 0.6801753044128418, "learning_rate": 3.126047638074768e-06, "loss": 1.4492, "mean_token_accuracy": 0.639577383796374, "num_tokens": 2577919506.0, "step": 15365 }, { "entropy": 1.7608330448468525, "epoch": 1.6880338359287028, "grad_norm": 0.7202026844024658, "learning_rate": 3.1252722827612463e-06, "loss": 1.4545, "mean_token_accuracy": 0.6331879695256551, "num_tokens": 2578100044.0, "step": 15366 }, { "entropy": 1.724254459142685, "epoch": 1.6881436928400757, "grad_norm": 0.6835639476776123, "learning_rate": 3.124497176679308e-06, "loss": 1.3549, "mean_token_accuracy": 0.6532203555107117, "num_tokens": 2578267048.0, "step": 15367 }, { "entropy": 1.6863794922828674, "epoch": 1.6882535497514488, "grad_norm": 0.7550612092018127, "learning_rate": 3.1237223198534823e-06, "loss": 1.1698, "mean_token_accuracy": 0.6900685677925745, "num_tokens": 2578366956.0, "step": 15368 }, { "entropy": 1.7102207442124684, "epoch": 1.6883634066628217, "grad_norm": 0.7050641179084778, "learning_rate": 3.1229477123082968e-06, "loss": 1.4534, "mean_token_accuracy": 0.6498723477125168, "num_tokens": 2578560893.0, "step": 15369 }, { "entropy": 1.6907469928264618, "epoch": 1.6884732635741946, "grad_norm": 0.5717144012451172, "learning_rate": 3.1221733540682692e-06, "loss": 1.4725, "mean_token_accuracy": 0.647341325879097, "num_tokens": 2578836855.0, "step": 15370 }, { "entropy": 1.676329771677653, "epoch": 1.6885831204855677, "grad_norm": 0.8052626252174377, "learning_rate": 3.121399245157904e-06, "loss": 1.558, "mean_token_accuracy": 0.6526962419350942, "num_tokens": 2579026404.0, "step": 15371 }, { "entropy": 1.671715994675954, "epoch": 1.6886929773969404, "grad_norm": 0.7153114080429077, "learning_rate": 3.120625385601701e-06, "loss": 1.2896, "mean_token_accuracy": 0.6790550202131271, "num_tokens": 2579188647.0, "step": 15372 }, { "entropy": 1.701625217994054, "epoch": 1.6888028343083135, "grad_norm": 0.7289735078811646, "learning_rate": 3.1198517754241565e-06, "loss": 1.3561, "mean_token_accuracy": 0.674707810084025, "num_tokens": 2579331926.0, "step": 15373 }, { "entropy": 1.7479176918665569, "epoch": 1.6889126912196863, "grad_norm": 0.7183084487915039, "learning_rate": 3.119078414649753e-06, "loss": 1.2997, "mean_token_accuracy": 0.6685334344704946, "num_tokens": 2579527532.0, "step": 15374 }, { "entropy": 1.7347841362158458, "epoch": 1.6890225481310592, "grad_norm": 0.7196807265281677, "learning_rate": 3.118305303302962e-06, "loss": 1.3305, "mean_token_accuracy": 0.6832280606031418, "num_tokens": 2579691633.0, "step": 15375 }, { "entropy": 1.736104021469752, "epoch": 1.6891324050424323, "grad_norm": 0.7943740487098694, "learning_rate": 3.117532441408261e-06, "loss": 1.5753, "mean_token_accuracy": 0.6446651866038641, "num_tokens": 2579897232.0, "step": 15376 }, { "entropy": 1.793796718120575, "epoch": 1.6892422619538052, "grad_norm": 0.7013726830482483, "learning_rate": 3.116759828990103e-06, "loss": 1.2496, "mean_token_accuracy": 0.6713108470042547, "num_tokens": 2580015103.0, "step": 15377 }, { "entropy": 1.6883254448572795, "epoch": 1.689352118865178, "grad_norm": 0.7592623829841614, "learning_rate": 3.115987466072946e-06, "loss": 1.456, "mean_token_accuracy": 0.6466412742932638, "num_tokens": 2580213602.0, "step": 15378 }, { "entropy": 1.6982381443182628, "epoch": 1.689461975776551, "grad_norm": 0.7187153697013855, "learning_rate": 3.1152153526812343e-06, "loss": 1.3754, "mean_token_accuracy": 0.6655093431472778, "num_tokens": 2580340316.0, "step": 15379 }, { "entropy": 1.719240536292394, "epoch": 1.6895718326879239, "grad_norm": 0.6955122351646423, "learning_rate": 3.1144434888394003e-06, "loss": 1.335, "mean_token_accuracy": 0.6731832573811213, "num_tokens": 2580460689.0, "step": 15380 }, { "entropy": 1.6896177033583324, "epoch": 1.689681689599297, "grad_norm": 0.689373791217804, "learning_rate": 3.113671874571878e-06, "loss": 1.38, "mean_token_accuracy": 0.658390611410141, "num_tokens": 2580613434.0, "step": 15381 }, { "entropy": 1.6444495916366577, "epoch": 1.6897915465106699, "grad_norm": 0.7218437194824219, "learning_rate": 3.112900509903088e-06, "loss": 1.1382, "mean_token_accuracy": 0.699143057068189, "num_tokens": 2580707620.0, "step": 15382 }, { "entropy": 1.7752784192562103, "epoch": 1.6899014034220428, "grad_norm": 0.7364796996116638, "learning_rate": 3.1121293948574438e-06, "loss": 1.4396, "mean_token_accuracy": 0.6354698687791824, "num_tokens": 2580891653.0, "step": 15383 }, { "entropy": 1.6694513857364655, "epoch": 1.6900112603334159, "grad_norm": 0.764617383480072, "learning_rate": 3.111358529459348e-06, "loss": 1.2351, "mean_token_accuracy": 0.6757246901591619, "num_tokens": 2581032836.0, "step": 15384 }, { "entropy": 1.7019239862759907, "epoch": 1.6901211172447885, "grad_norm": 0.7817053198814392, "learning_rate": 3.1105879137332006e-06, "loss": 1.4947, "mean_token_accuracy": 0.6470496108134588, "num_tokens": 2581191748.0, "step": 15385 }, { "entropy": 1.6950092216332753, "epoch": 1.6902309741561616, "grad_norm": 0.7115320563316345, "learning_rate": 3.109817547703392e-06, "loss": 1.3195, "mean_token_accuracy": 0.665327916542689, "num_tokens": 2581334830.0, "step": 15386 }, { "entropy": 1.7190478245417278, "epoch": 1.6903408310675345, "grad_norm": 0.6659532785415649, "learning_rate": 3.1090474313942998e-06, "loss": 1.3647, "mean_token_accuracy": 0.6521613150835037, "num_tokens": 2581483982.0, "step": 15387 }, { "entropy": 1.7014067073663075, "epoch": 1.6904506879789074, "grad_norm": 0.8550192713737488, "learning_rate": 3.108277564830303e-06, "loss": 1.5101, "mean_token_accuracy": 0.6476392249266306, "num_tokens": 2581639613.0, "step": 15388 }, { "entropy": 1.6655798256397247, "epoch": 1.6905605448902805, "grad_norm": 0.7115856409072876, "learning_rate": 3.1075079480357634e-06, "loss": 1.4428, "mean_token_accuracy": 0.6531449556350708, "num_tokens": 2581810059.0, "step": 15389 }, { "entropy": 1.7903032004833221, "epoch": 1.6906704018016534, "grad_norm": 0.7611822485923767, "learning_rate": 3.106738581035042e-06, "loss": 1.4924, "mean_token_accuracy": 0.635255828499794, "num_tokens": 2581989898.0, "step": 15390 }, { "entropy": 1.6987177928288777, "epoch": 1.6907802587130263, "grad_norm": 0.6165050268173218, "learning_rate": 3.1059694638524886e-06, "loss": 1.3535, "mean_token_accuracy": 0.6634356826543808, "num_tokens": 2582145058.0, "step": 15391 }, { "entropy": 1.6762576599915822, "epoch": 1.6908901156243992, "grad_norm": 0.6371328234672546, "learning_rate": 3.105200596512442e-06, "loss": 1.4345, "mean_token_accuracy": 0.6503476947546005, "num_tokens": 2582368067.0, "step": 15392 }, { "entropy": 1.702197919289271, "epoch": 1.690999972535772, "grad_norm": 0.75450199842453, "learning_rate": 3.10443197903924e-06, "loss": 1.4777, "mean_token_accuracy": 0.6465541025002798, "num_tokens": 2582567443.0, "step": 15393 }, { "entropy": 1.6959167917569478, "epoch": 1.6911098294471452, "grad_norm": 0.7660825252532959, "learning_rate": 3.1036636114572088e-06, "loss": 1.1762, "mean_token_accuracy": 0.6866554866234461, "num_tokens": 2582690966.0, "step": 15394 }, { "entropy": 1.7242404520511627, "epoch": 1.691219686358518, "grad_norm": 0.8516025543212891, "learning_rate": 3.1028954937906668e-06, "loss": 1.4467, "mean_token_accuracy": 0.6590066701173782, "num_tokens": 2582850808.0, "step": 15395 }, { "entropy": 1.7131598989168804, "epoch": 1.691329543269891, "grad_norm": 0.7381974458694458, "learning_rate": 3.1021276260639217e-06, "loss": 1.4181, "mean_token_accuracy": 0.6618935763835907, "num_tokens": 2583009712.0, "step": 15396 }, { "entropy": 1.6620845595995586, "epoch": 1.691439400181264, "grad_norm": 0.6693155169487, "learning_rate": 3.10136000830128e-06, "loss": 1.5235, "mean_token_accuracy": 0.6472597966591517, "num_tokens": 2583188095.0, "step": 15397 }, { "entropy": 1.672204573949178, "epoch": 1.6915492570926367, "grad_norm": 0.936718225479126, "learning_rate": 3.1005926405270353e-06, "loss": 1.2397, "mean_token_accuracy": 0.6774502595265707, "num_tokens": 2583334819.0, "step": 15398 }, { "entropy": 1.7408578594525654, "epoch": 1.6916591140040098, "grad_norm": 0.6551694869995117, "learning_rate": 3.099825522765472e-06, "loss": 1.3283, "mean_token_accuracy": 0.6612852861483892, "num_tokens": 2583476321.0, "step": 15399 }, { "entropy": 1.6576103170712788, "epoch": 1.6917689709153827, "grad_norm": 0.7109887003898621, "learning_rate": 3.099058655040873e-06, "loss": 1.4108, "mean_token_accuracy": 0.6661920497814814, "num_tokens": 2583634776.0, "step": 15400 }, { "entropy": 1.795667548974355, "epoch": 1.6918788278267556, "grad_norm": 0.8126919865608215, "learning_rate": 3.098292037377505e-06, "loss": 1.4247, "mean_token_accuracy": 0.666491856177648, "num_tokens": 2583784360.0, "step": 15401 }, { "entropy": 1.6871559222539265, "epoch": 1.6919886847381287, "grad_norm": 0.6559981107711792, "learning_rate": 3.0975256697996358e-06, "loss": 1.2416, "mean_token_accuracy": 0.6782428324222565, "num_tokens": 2583926742.0, "step": 15402 }, { "entropy": 1.7118937869866688, "epoch": 1.6920985416495016, "grad_norm": 0.7892350554466248, "learning_rate": 3.096759552331518e-06, "loss": 1.4847, "mean_token_accuracy": 0.6499680678049723, "num_tokens": 2584097203.0, "step": 15403 }, { "entropy": 1.7176280121008556, "epoch": 1.6922083985608745, "grad_norm": 0.6054561734199524, "learning_rate": 3.0959936849973974e-06, "loss": 1.2682, "mean_token_accuracy": 0.6729069898525873, "num_tokens": 2584226875.0, "step": 15404 }, { "entropy": 1.7033534049987793, "epoch": 1.6923182554722473, "grad_norm": 0.6824467778205872, "learning_rate": 3.095228067821517e-06, "loss": 1.376, "mean_token_accuracy": 0.6606364697217941, "num_tokens": 2584430184.0, "step": 15405 }, { "entropy": 1.6788690189520519, "epoch": 1.6924281123836202, "grad_norm": 0.6166786551475525, "learning_rate": 3.0944627008281034e-06, "loss": 1.3412, "mean_token_accuracy": 0.6616918991009394, "num_tokens": 2584586860.0, "step": 15406 }, { "entropy": 1.6675065159797668, "epoch": 1.6925379692949933, "grad_norm": 0.6525241732597351, "learning_rate": 3.0936975840413863e-06, "loss": 1.5037, "mean_token_accuracy": 0.6569078887502352, "num_tokens": 2584788098.0, "step": 15407 }, { "entropy": 1.719109723965327, "epoch": 1.6926478262063662, "grad_norm": 0.629504919052124, "learning_rate": 3.0929327174855765e-06, "loss": 1.4084, "mean_token_accuracy": 0.6696446587642034, "num_tokens": 2584978784.0, "step": 15408 }, { "entropy": 1.6656635701656342, "epoch": 1.692757683117739, "grad_norm": 0.5897053480148315, "learning_rate": 3.092168101184883e-06, "loss": 1.4735, "mean_token_accuracy": 0.6469605465730032, "num_tokens": 2585167598.0, "step": 15409 }, { "entropy": 1.7369298934936523, "epoch": 1.6928675400291122, "grad_norm": 0.7727683186531067, "learning_rate": 3.091403735163507e-06, "loss": 1.4347, "mean_token_accuracy": 0.6634906083345413, "num_tokens": 2585317585.0, "step": 15410 }, { "entropy": 1.6967070400714874, "epoch": 1.6929773969404849, "grad_norm": 0.6468930840492249, "learning_rate": 3.090639619445638e-06, "loss": 1.4222, "mean_token_accuracy": 0.6505736857652664, "num_tokens": 2585533635.0, "step": 15411 }, { "entropy": 1.6828734079996746, "epoch": 1.693087253851858, "grad_norm": 0.7482141256332397, "learning_rate": 3.08987575405546e-06, "loss": 1.2519, "mean_token_accuracy": 0.6751055518786112, "num_tokens": 2585666750.0, "step": 15412 }, { "entropy": 1.6802496711413066, "epoch": 1.6931971107632309, "grad_norm": 0.5791299939155579, "learning_rate": 3.0891121390171498e-06, "loss": 1.4935, "mean_token_accuracy": 0.6477037717898687, "num_tokens": 2585855103.0, "step": 15413 }, { "entropy": 1.7238198220729828, "epoch": 1.6933069676746038, "grad_norm": 0.6337864995002747, "learning_rate": 3.088348774354878e-06, "loss": 1.4572, "mean_token_accuracy": 0.6338127752145132, "num_tokens": 2586147894.0, "step": 15414 }, { "entropy": 1.7157885332902272, "epoch": 1.6934168245859769, "grad_norm": 0.647091269493103, "learning_rate": 3.0875856600928017e-06, "loss": 1.5886, "mean_token_accuracy": 0.6402155508597692, "num_tokens": 2586359826.0, "step": 15415 }, { "entropy": 1.7303306659062703, "epoch": 1.6935266814973498, "grad_norm": 0.6226432919502258, "learning_rate": 3.0868227962550725e-06, "loss": 1.3488, "mean_token_accuracy": 0.6693485826253891, "num_tokens": 2586542824.0, "step": 15416 }, { "entropy": 1.6407539049784343, "epoch": 1.6936365384087226, "grad_norm": 0.6539502739906311, "learning_rate": 3.0860601828658377e-06, "loss": 1.4628, "mean_token_accuracy": 0.653552715977033, "num_tokens": 2586738889.0, "step": 15417 }, { "entropy": 1.7392374575138092, "epoch": 1.6937463953200955, "grad_norm": 0.5961517691612244, "learning_rate": 3.08529781994923e-06, "loss": 1.4295, "mean_token_accuracy": 0.654540628194809, "num_tokens": 2586899359.0, "step": 15418 }, { "entropy": 1.6805303692817688, "epoch": 1.6938562522314684, "grad_norm": 0.6699274182319641, "learning_rate": 3.0845357075293824e-06, "loss": 1.3482, "mean_token_accuracy": 0.6532176484664282, "num_tokens": 2587053914.0, "step": 15419 }, { "entropy": 1.7027775545914967, "epoch": 1.6939661091428415, "grad_norm": 0.755435585975647, "learning_rate": 3.0837738456304122e-06, "loss": 1.3533, "mean_token_accuracy": 0.6679652184247971, "num_tokens": 2587186667.0, "step": 15420 }, { "entropy": 1.7010966738065083, "epoch": 1.6940759660542144, "grad_norm": 0.7067722082138062, "learning_rate": 3.0830122342764314e-06, "loss": 1.3864, "mean_token_accuracy": 0.6638060361146927, "num_tokens": 2587384376.0, "step": 15421 }, { "entropy": 1.6957202355066936, "epoch": 1.6941858229655873, "grad_norm": 0.6873775124549866, "learning_rate": 3.0822508734915473e-06, "loss": 1.2841, "mean_token_accuracy": 0.6708781023820242, "num_tokens": 2587502711.0, "step": 15422 }, { "entropy": 1.7451180815696716, "epoch": 1.6942956798769604, "grad_norm": 0.6628127098083496, "learning_rate": 3.0814897632998546e-06, "loss": 1.5383, "mean_token_accuracy": 0.6356658140818278, "num_tokens": 2587749796.0, "step": 15423 }, { "entropy": 1.7371935844421387, "epoch": 1.694405536788333, "grad_norm": 0.7622631788253784, "learning_rate": 3.0807289037254417e-06, "loss": 1.3687, "mean_token_accuracy": 0.663616955280304, "num_tokens": 2587884693.0, "step": 15424 }, { "entropy": 1.7322389682133992, "epoch": 1.6945153936997062, "grad_norm": 0.648070752620697, "learning_rate": 3.0799682947923906e-06, "loss": 1.3667, "mean_token_accuracy": 0.6544249455134074, "num_tokens": 2588016729.0, "step": 15425 }, { "entropy": 1.7039423783620198, "epoch": 1.694625250611079, "grad_norm": 0.6290963888168335, "learning_rate": 3.0792079365247755e-06, "loss": 1.3423, "mean_token_accuracy": 0.6653772393862406, "num_tokens": 2588197565.0, "step": 15426 }, { "entropy": 1.687830110390981, "epoch": 1.694735107522452, "grad_norm": 0.6381257176399231, "learning_rate": 3.07844782894666e-06, "loss": 1.3763, "mean_token_accuracy": 0.6565463542938232, "num_tokens": 2588365992.0, "step": 15427 }, { "entropy": 1.6616821885108948, "epoch": 1.694844964433825, "grad_norm": 0.8576768040657043, "learning_rate": 3.0776879720820997e-06, "loss": 1.4612, "mean_token_accuracy": 0.6518943955500921, "num_tokens": 2588519705.0, "step": 15428 }, { "entropy": 1.7395183543364208, "epoch": 1.694954821345198, "grad_norm": 0.71395343542099, "learning_rate": 3.076928365955147e-06, "loss": 1.4336, "mean_token_accuracy": 0.6496130575736364, "num_tokens": 2588695021.0, "step": 15429 }, { "entropy": 1.7011998693148296, "epoch": 1.6950646782565708, "grad_norm": 0.6785951256752014, "learning_rate": 3.0761690105898393e-06, "loss": 1.298, "mean_token_accuracy": 0.6675257285435995, "num_tokens": 2588869513.0, "step": 15430 }, { "entropy": 1.737764298915863, "epoch": 1.695174535167944, "grad_norm": 0.9960548877716064, "learning_rate": 3.0754099060102135e-06, "loss": 1.3802, "mean_token_accuracy": 0.6851067890723547, "num_tokens": 2589015214.0, "step": 15431 }, { "entropy": 1.6571769615014393, "epoch": 1.6952843920793166, "grad_norm": 0.6647917628288269, "learning_rate": 3.074651052240294e-06, "loss": 1.3572, "mean_token_accuracy": 0.6534274220466614, "num_tokens": 2589200929.0, "step": 15432 }, { "entropy": 1.6438042024771373, "epoch": 1.6953942489906897, "grad_norm": 0.7289125919342041, "learning_rate": 3.073892449304095e-06, "loss": 1.4241, "mean_token_accuracy": 0.659342810511589, "num_tokens": 2589353999.0, "step": 15433 }, { "entropy": 1.7090658744176228, "epoch": 1.6955041059020626, "grad_norm": 0.7296878695487976, "learning_rate": 3.0731340972256303e-06, "loss": 1.3166, "mean_token_accuracy": 0.6685633112986883, "num_tokens": 2589524135.0, "step": 15434 }, { "entropy": 1.6926236947377522, "epoch": 1.6956139628134355, "grad_norm": 0.5468199849128723, "learning_rate": 3.0723759960288997e-06, "loss": 1.4544, "mean_token_accuracy": 0.6467462033033371, "num_tokens": 2589746970.0, "step": 15435 }, { "entropy": 1.677124152580897, "epoch": 1.6957238197248086, "grad_norm": 0.6528844833374023, "learning_rate": 3.0716181457378945e-06, "loss": 1.412, "mean_token_accuracy": 0.6589123407999674, "num_tokens": 2589913367.0, "step": 15436 }, { "entropy": 1.735766738653183, "epoch": 1.6958336766361812, "grad_norm": 0.6354487538337708, "learning_rate": 3.070860546376602e-06, "loss": 1.4608, "mean_token_accuracy": 0.6477210422356924, "num_tokens": 2590137460.0, "step": 15437 }, { "entropy": 1.705346167087555, "epoch": 1.6959435335475543, "grad_norm": 0.7688722610473633, "learning_rate": 3.0701031979690033e-06, "loss": 1.3772, "mean_token_accuracy": 0.658728207151095, "num_tokens": 2590326917.0, "step": 15438 }, { "entropy": 1.728643884261449, "epoch": 1.6960533904589272, "grad_norm": 0.7233805060386658, "learning_rate": 3.0693461005390636e-06, "loss": 1.4744, "mean_token_accuracy": 0.649912640452385, "num_tokens": 2590497105.0, "step": 15439 }, { "entropy": 1.7629015843073528, "epoch": 1.6961632473703, "grad_norm": 0.6526691317558289, "learning_rate": 3.0685892541107452e-06, "loss": 1.4067, "mean_token_accuracy": 0.6612003346284231, "num_tokens": 2590642085.0, "step": 15440 }, { "entropy": 1.6741429766019185, "epoch": 1.6962731042816732, "grad_norm": 0.697309672832489, "learning_rate": 3.067832658708004e-06, "loss": 1.5036, "mean_token_accuracy": 0.656085841357708, "num_tokens": 2590814853.0, "step": 15441 }, { "entropy": 1.6753457883993785, "epoch": 1.696382961193046, "grad_norm": 0.6827280521392822, "learning_rate": 3.0670763143547853e-06, "loss": 1.4097, "mean_token_accuracy": 0.6564631958802541, "num_tokens": 2591007239.0, "step": 15442 }, { "entropy": 1.6726371546586354, "epoch": 1.696492818104419, "grad_norm": 0.7021117806434631, "learning_rate": 3.066320221075025e-06, "loss": 1.5803, "mean_token_accuracy": 0.6457217087348303, "num_tokens": 2591224463.0, "step": 15443 }, { "entropy": 1.6059078176816304, "epoch": 1.696602675015792, "grad_norm": 0.6383489370346069, "learning_rate": 3.065564378892657e-06, "loss": 1.2984, "mean_token_accuracy": 0.6645799279212952, "num_tokens": 2591396199.0, "step": 15444 }, { "entropy": 1.6366430819034576, "epoch": 1.6967125319271648, "grad_norm": 0.5697821378707886, "learning_rate": 3.064808787831598e-06, "loss": 1.3218, "mean_token_accuracy": 0.6684574782848358, "num_tokens": 2591589772.0, "step": 15445 }, { "entropy": 1.7955954174200695, "epoch": 1.6968223888385379, "grad_norm": 0.6045777201652527, "learning_rate": 3.0640534479157686e-06, "loss": 1.6147, "mean_token_accuracy": 0.6359433382749557, "num_tokens": 2591765239.0, "step": 15446 }, { "entropy": 1.7104682524998982, "epoch": 1.6969322457499108, "grad_norm": 0.6094769239425659, "learning_rate": 3.0632983591690695e-06, "loss": 1.4571, "mean_token_accuracy": 0.645427738626798, "num_tokens": 2591963924.0, "step": 15447 }, { "entropy": 1.6970649858315785, "epoch": 1.6970421026612836, "grad_norm": 0.7453381419181824, "learning_rate": 3.062543521615401e-06, "loss": 1.3063, "mean_token_accuracy": 0.6602601408958435, "num_tokens": 2592117840.0, "step": 15448 }, { "entropy": 1.6575465599695842, "epoch": 1.6971519595726567, "grad_norm": 0.5633898973464966, "learning_rate": 3.061788935278653e-06, "loss": 1.3509, "mean_token_accuracy": 0.650563841064771, "num_tokens": 2592299746.0, "step": 15449 }, { "entropy": 1.7399055063724518, "epoch": 1.6972618164840294, "grad_norm": 0.6714982390403748, "learning_rate": 3.0610346001827085e-06, "loss": 1.3131, "mean_token_accuracy": 0.6603095183769861, "num_tokens": 2592412511.0, "step": 15450 }, { "entropy": 1.6033929189046223, "epoch": 1.6973716733954025, "grad_norm": 0.5816755890846252, "learning_rate": 3.060280516351444e-06, "loss": 1.32, "mean_token_accuracy": 0.6678592562675476, "num_tokens": 2592565463.0, "step": 15451 }, { "entropy": 1.7209522624810536, "epoch": 1.6974815303067754, "grad_norm": 0.6687294840812683, "learning_rate": 3.0595266838087195e-06, "loss": 1.5754, "mean_token_accuracy": 0.6195499996344248, "num_tokens": 2592769048.0, "step": 15452 }, { "entropy": 1.695073793331782, "epoch": 1.6975913872181483, "grad_norm": 0.7344485521316528, "learning_rate": 3.0587731025784006e-06, "loss": 1.3381, "mean_token_accuracy": 0.6686349560817083, "num_tokens": 2592917112.0, "step": 15453 }, { "entropy": 1.748264600833257, "epoch": 1.6977012441295214, "grad_norm": 0.7268601655960083, "learning_rate": 3.058019772684333e-06, "loss": 1.3523, "mean_token_accuracy": 0.660114531715711, "num_tokens": 2593065546.0, "step": 15454 }, { "entropy": 1.7753359874089558, "epoch": 1.6978111010408943, "grad_norm": 0.6813443303108215, "learning_rate": 3.0572666941503602e-06, "loss": 1.3395, "mean_token_accuracy": 0.6615369518597921, "num_tokens": 2593207742.0, "step": 15455 }, { "entropy": 1.686709036429723, "epoch": 1.6979209579522672, "grad_norm": 0.7412280440330505, "learning_rate": 3.0565138670003192e-06, "loss": 1.1122, "mean_token_accuracy": 0.6998019615809122, "num_tokens": 2593328448.0, "step": 15456 }, { "entropy": 1.7757901052633922, "epoch": 1.6980308148636403, "grad_norm": 0.8149046897888184, "learning_rate": 3.0557612912580332e-06, "loss": 1.6069, "mean_token_accuracy": 0.6343324283758799, "num_tokens": 2593476516.0, "step": 15457 }, { "entropy": 1.6797574857870738, "epoch": 1.698140671775013, "grad_norm": 0.6735820770263672, "learning_rate": 3.055008966947323e-06, "loss": 1.4156, "mean_token_accuracy": 0.6628180791934332, "num_tokens": 2593628490.0, "step": 15458 }, { "entropy": 1.729365775982539, "epoch": 1.698250528686386, "grad_norm": 0.7286481857299805, "learning_rate": 3.0542568940920007e-06, "loss": 1.3168, "mean_token_accuracy": 0.6649684309959412, "num_tokens": 2593751547.0, "step": 15459 }, { "entropy": 1.7199760377407074, "epoch": 1.698360385597759, "grad_norm": 0.8374939560890198, "learning_rate": 3.053505072715865e-06, "loss": 1.4923, "mean_token_accuracy": 0.6528761138518652, "num_tokens": 2593899249.0, "step": 15460 }, { "entropy": 1.7205273906389873, "epoch": 1.6984702425091318, "grad_norm": 0.7138111591339111, "learning_rate": 3.0527535028427126e-06, "loss": 1.4606, "mean_token_accuracy": 0.644857699672381, "num_tokens": 2594036578.0, "step": 15461 }, { "entropy": 1.7191713253657024, "epoch": 1.698580099420505, "grad_norm": 0.6800480484962463, "learning_rate": 3.0520021844963326e-06, "loss": 1.4163, "mean_token_accuracy": 0.6557717521985372, "num_tokens": 2594203729.0, "step": 15462 }, { "entropy": 1.6896109481652577, "epoch": 1.6986899563318776, "grad_norm": 0.7030267715454102, "learning_rate": 3.051251117700502e-06, "loss": 1.2521, "mean_token_accuracy": 0.6746014902989069, "num_tokens": 2594347487.0, "step": 15463 }, { "entropy": 1.7087683777014415, "epoch": 1.6987998132432507, "grad_norm": 0.6061800122261047, "learning_rate": 3.05050030247899e-06, "loss": 1.3607, "mean_token_accuracy": 0.653743584950765, "num_tokens": 2594543405.0, "step": 15464 }, { "entropy": 1.7524566849072774, "epoch": 1.6989096701546236, "grad_norm": 0.6522343754768372, "learning_rate": 3.049749738855563e-06, "loss": 1.4177, "mean_token_accuracy": 0.6434753388166428, "num_tokens": 2594716593.0, "step": 15465 }, { "entropy": 1.6446398794651031, "epoch": 1.6990195270659965, "grad_norm": 0.6932255625724792, "learning_rate": 3.0489994268539746e-06, "loss": 1.3285, "mean_token_accuracy": 0.6653372198343277, "num_tokens": 2594867263.0, "step": 15466 }, { "entropy": 1.7672787706057231, "epoch": 1.6991293839773696, "grad_norm": 0.8536300659179688, "learning_rate": 3.048249366497971e-06, "loss": 1.435, "mean_token_accuracy": 0.645067016283671, "num_tokens": 2595026533.0, "step": 15467 }, { "entropy": 1.7172228395938873, "epoch": 1.6992392408887425, "grad_norm": 0.5975283980369568, "learning_rate": 3.0474995578112907e-06, "loss": 1.4422, "mean_token_accuracy": 0.6420785139004389, "num_tokens": 2595192916.0, "step": 15468 }, { "entropy": 1.7480799158414204, "epoch": 1.6993490978001153, "grad_norm": 0.6868378520011902, "learning_rate": 3.0467500008176656e-06, "loss": 1.3149, "mean_token_accuracy": 0.6674151619275411, "num_tokens": 2595340344.0, "step": 15469 }, { "entropy": 1.7092045744260151, "epoch": 1.6994589547114884, "grad_norm": 0.6722932457923889, "learning_rate": 3.0460006955408206e-06, "loss": 1.5016, "mean_token_accuracy": 0.638856107989947, "num_tokens": 2595532316.0, "step": 15470 }, { "entropy": 1.698676884174347, "epoch": 1.699568811622861, "grad_norm": 0.739374041557312, "learning_rate": 3.0452516420044685e-06, "loss": 1.4562, "mean_token_accuracy": 0.6575873990853628, "num_tokens": 2595705984.0, "step": 15471 }, { "entropy": 1.7235010464986165, "epoch": 1.6996786685342342, "grad_norm": 0.6385025382041931, "learning_rate": 3.044502840232318e-06, "loss": 1.4149, "mean_token_accuracy": 0.6463307837645212, "num_tokens": 2595905802.0, "step": 15472 }, { "entropy": 1.6757484376430511, "epoch": 1.699788525445607, "grad_norm": 0.6640949845314026, "learning_rate": 3.043754290248069e-06, "loss": 1.4525, "mean_token_accuracy": 0.6534897486368815, "num_tokens": 2596076575.0, "step": 15473 }, { "entropy": 1.6671640475591023, "epoch": 1.69989838235698, "grad_norm": 0.6480453014373779, "learning_rate": 3.0430059920754084e-06, "loss": 1.3501, "mean_token_accuracy": 0.6624239881833395, "num_tokens": 2596216668.0, "step": 15474 }, { "entropy": 1.6163178483645122, "epoch": 1.700008239268353, "grad_norm": 0.6514328718185425, "learning_rate": 3.042257945738025e-06, "loss": 1.4128, "mean_token_accuracy": 0.6655259480079015, "num_tokens": 2596389965.0, "step": 15475 }, { "entropy": 1.69280410806338, "epoch": 1.7001180961797258, "grad_norm": 0.8175613284111023, "learning_rate": 3.041510151259592e-06, "loss": 1.2518, "mean_token_accuracy": 0.6777097036441168, "num_tokens": 2596514410.0, "step": 15476 }, { "entropy": 1.8059017360210419, "epoch": 1.7002279530910989, "grad_norm": 0.7695567607879639, "learning_rate": 3.0407626086637753e-06, "loss": 1.5005, "mean_token_accuracy": 0.6499375601609548, "num_tokens": 2596651139.0, "step": 15477 }, { "entropy": 1.6848878860473633, "epoch": 1.7003378100024718, "grad_norm": 0.7237509489059448, "learning_rate": 3.0400153179742366e-06, "loss": 1.3545, "mean_token_accuracy": 0.6637776046991348, "num_tokens": 2596813442.0, "step": 15478 }, { "entropy": 1.6869426270325978, "epoch": 1.7004476669138446, "grad_norm": 0.7502117156982422, "learning_rate": 3.039268279214626e-06, "loss": 1.4079, "mean_token_accuracy": 0.6469310919443766, "num_tokens": 2596981179.0, "step": 15479 }, { "entropy": 1.7178981204827626, "epoch": 1.7005575238252177, "grad_norm": 0.6466111540794373, "learning_rate": 3.038521492408586e-06, "loss": 1.5021, "mean_token_accuracy": 0.6554898222287496, "num_tokens": 2597147861.0, "step": 15480 }, { "entropy": 1.710584968328476, "epoch": 1.7006673807365906, "grad_norm": 0.720398485660553, "learning_rate": 3.037774957579752e-06, "loss": 1.5024, "mean_token_accuracy": 0.635799452662468, "num_tokens": 2597371591.0, "step": 15481 }, { "entropy": 1.6549212435881298, "epoch": 1.7007772376479635, "grad_norm": 0.6625518798828125, "learning_rate": 3.0370286747517565e-06, "loss": 1.3681, "mean_token_accuracy": 0.6604462365309397, "num_tokens": 2597518168.0, "step": 15482 }, { "entropy": 1.6826780637105305, "epoch": 1.7008870945593366, "grad_norm": 0.762800395488739, "learning_rate": 3.036282643948214e-06, "loss": 1.282, "mean_token_accuracy": 0.6670280794302622, "num_tokens": 2597659060.0, "step": 15483 }, { "entropy": 1.7338972091674805, "epoch": 1.7009969514707093, "grad_norm": 0.7537745833396912, "learning_rate": 3.0355368651927354e-06, "loss": 1.4775, "mean_token_accuracy": 0.6528653750816981, "num_tokens": 2597822951.0, "step": 15484 }, { "entropy": 1.7104704082012177, "epoch": 1.7011068083820824, "grad_norm": 0.6628887057304382, "learning_rate": 3.034791338508929e-06, "loss": 1.4674, "mean_token_accuracy": 0.6388405313094457, "num_tokens": 2597981401.0, "step": 15485 }, { "entropy": 1.6909295320510864, "epoch": 1.7012166652934553, "grad_norm": 0.8931862711906433, "learning_rate": 3.034046063920385e-06, "loss": 1.2176, "mean_token_accuracy": 0.6916706810394923, "num_tokens": 2598102875.0, "step": 15486 }, { "entropy": 1.7621839741865795, "epoch": 1.7013265222048282, "grad_norm": 0.719618022441864, "learning_rate": 3.033301041450695e-06, "loss": 1.3934, "mean_token_accuracy": 0.6588169485330582, "num_tokens": 2598256543.0, "step": 15487 }, { "entropy": 1.7612866361935933, "epoch": 1.7014363791162013, "grad_norm": 0.6616114377975464, "learning_rate": 3.0325562711234367e-06, "loss": 1.5589, "mean_token_accuracy": 0.6347967982292175, "num_tokens": 2598469802.0, "step": 15488 }, { "entropy": 1.6207097272078197, "epoch": 1.701546236027574, "grad_norm": 0.595119059085846, "learning_rate": 3.0318117529621813e-06, "loss": 1.2714, "mean_token_accuracy": 0.6714903662602106, "num_tokens": 2598639989.0, "step": 15489 }, { "entropy": 1.7276874681313832, "epoch": 1.701656092938947, "grad_norm": 0.6539283990859985, "learning_rate": 3.031067486990495e-06, "loss": 1.5024, "mean_token_accuracy": 0.6353075504302979, "num_tokens": 2598835997.0, "step": 15490 }, { "entropy": 1.7242399354775746, "epoch": 1.70176594985032, "grad_norm": 0.7293862104415894, "learning_rate": 3.0303234732319324e-06, "loss": 1.4393, "mean_token_accuracy": 0.6572525550921758, "num_tokens": 2598956041.0, "step": 15491 }, { "entropy": 1.696977545817693, "epoch": 1.7018758067616928, "grad_norm": 0.6066075563430786, "learning_rate": 3.029579711710038e-06, "loss": 1.3658, "mean_token_accuracy": 0.666529655456543, "num_tokens": 2599160795.0, "step": 15492 }, { "entropy": 1.6414269904295604, "epoch": 1.701985663673066, "grad_norm": 0.7428905367851257, "learning_rate": 3.028836202448355e-06, "loss": 1.0967, "mean_token_accuracy": 0.6895180543263754, "num_tokens": 2599345751.0, "step": 15493 }, { "entropy": 1.7395719190438588, "epoch": 1.7020955205844388, "grad_norm": 0.8325342535972595, "learning_rate": 3.0280929454704154e-06, "loss": 1.2534, "mean_token_accuracy": 0.6692363371451696, "num_tokens": 2599447366.0, "step": 15494 }, { "entropy": 1.7216653128465016, "epoch": 1.7022053774958117, "grad_norm": 0.9464581608772278, "learning_rate": 3.0273499407997424e-06, "loss": 1.5236, "mean_token_accuracy": 0.6325125495592753, "num_tokens": 2599645835.0, "step": 15495 }, { "entropy": 1.643006682395935, "epoch": 1.7023152344071848, "grad_norm": 0.6483979225158691, "learning_rate": 3.0266071884598485e-06, "loss": 1.2448, "mean_token_accuracy": 0.6851489593585333, "num_tokens": 2599820998.0, "step": 15496 }, { "entropy": 1.7206957936286926, "epoch": 1.7024250913185575, "grad_norm": 0.8952434062957764, "learning_rate": 3.025864688474247e-06, "loss": 1.3475, "mean_token_accuracy": 0.6655914137760798, "num_tokens": 2600007097.0, "step": 15497 }, { "entropy": 1.666352113087972, "epoch": 1.7025349482299306, "grad_norm": 0.7107973694801331, "learning_rate": 3.0251224408664327e-06, "loss": 1.4151, "mean_token_accuracy": 0.6601972033580145, "num_tokens": 2600256180.0, "step": 15498 }, { "entropy": 1.6548288067181904, "epoch": 1.7026448051413035, "grad_norm": 0.6338147521018982, "learning_rate": 3.024380445659901e-06, "loss": 1.358, "mean_token_accuracy": 0.6660072356462479, "num_tokens": 2600424528.0, "step": 15499 }, { "entropy": 1.7549506922562916, "epoch": 1.7027546620526763, "grad_norm": 0.6739881038665771, "learning_rate": 3.023638702878135e-06, "loss": 1.5014, "mean_token_accuracy": 0.651511957248052, "num_tokens": 2600600015.0, "step": 15500 }, { "entropy": 1.6789835790793102, "epoch": 1.7028645189640494, "grad_norm": 0.7053590416908264, "learning_rate": 3.022897212544608e-06, "loss": 1.4764, "mean_token_accuracy": 0.6552553325891495, "num_tokens": 2600761099.0, "step": 15501 }, { "entropy": 1.746220628420512, "epoch": 1.702974375875422, "grad_norm": 0.6821596026420593, "learning_rate": 3.0221559746827905e-06, "loss": 1.2714, "mean_token_accuracy": 0.6693116724491119, "num_tokens": 2600880919.0, "step": 15502 }, { "entropy": 1.6446532607078552, "epoch": 1.7030842327867952, "grad_norm": 0.7012965083122253, "learning_rate": 3.021414989316143e-06, "loss": 1.5149, "mean_token_accuracy": 0.6544087131818136, "num_tokens": 2601075802.0, "step": 15503 }, { "entropy": 1.6924720704555511, "epoch": 1.703194089698168, "grad_norm": 0.7682718634605408, "learning_rate": 3.0206742564681123e-06, "loss": 1.4444, "mean_token_accuracy": 0.6571909934282303, "num_tokens": 2601229320.0, "step": 15504 }, { "entropy": 1.759785145521164, "epoch": 1.703303946609541, "grad_norm": 0.7660031914710999, "learning_rate": 3.0199337761621465e-06, "loss": 1.502, "mean_token_accuracy": 0.6575988878806432, "num_tokens": 2601411092.0, "step": 15505 }, { "entropy": 1.6723263065020244, "epoch": 1.703413803520914, "grad_norm": 0.6515182256698608, "learning_rate": 3.019193548421683e-06, "loss": 1.3288, "mean_token_accuracy": 0.6691017051537832, "num_tokens": 2601564465.0, "step": 15506 }, { "entropy": 1.7310113807519276, "epoch": 1.703523660432287, "grad_norm": 0.6577731370925903, "learning_rate": 3.0184535732701464e-06, "loss": 1.3704, "mean_token_accuracy": 0.6577199498812357, "num_tokens": 2601736489.0, "step": 15507 }, { "entropy": 1.6878896454970043, "epoch": 1.7036335173436599, "grad_norm": 0.8926342129707336, "learning_rate": 3.0177138507309572e-06, "loss": 1.2761, "mean_token_accuracy": 0.6754782150189081, "num_tokens": 2601847021.0, "step": 15508 }, { "entropy": 1.669161597887675, "epoch": 1.703743374255033, "grad_norm": 0.7536756992340088, "learning_rate": 3.0169743808275286e-06, "loss": 1.5964, "mean_token_accuracy": 0.6591108938058218, "num_tokens": 2602004441.0, "step": 15509 }, { "entropy": 1.7129058440526326, "epoch": 1.7038532311664056, "grad_norm": 0.6124458909034729, "learning_rate": 3.016235163583262e-06, "loss": 1.3916, "mean_token_accuracy": 0.6562560399373373, "num_tokens": 2602159772.0, "step": 15510 }, { "entropy": 1.724676748116811, "epoch": 1.7039630880777787, "grad_norm": 0.598620593547821, "learning_rate": 3.0154961990215575e-06, "loss": 1.4405, "mean_token_accuracy": 0.6402031729618708, "num_tokens": 2602341730.0, "step": 15511 }, { "entropy": 1.70653834939003, "epoch": 1.7040729449891516, "grad_norm": 0.6428411602973938, "learning_rate": 3.0147574871658e-06, "loss": 1.381, "mean_token_accuracy": 0.6533275147279104, "num_tokens": 2602508258.0, "step": 15512 }, { "entropy": 1.739314079284668, "epoch": 1.7041828019005245, "grad_norm": 0.6420696973800659, "learning_rate": 3.0140190280393666e-06, "loss": 1.4793, "mean_token_accuracy": 0.640799934665362, "num_tokens": 2602689022.0, "step": 15513 }, { "entropy": 1.6944958964983623, "epoch": 1.7042926588118976, "grad_norm": 0.5888864398002625, "learning_rate": 3.013280821665636e-06, "loss": 1.5175, "mean_token_accuracy": 0.6373367408911387, "num_tokens": 2602903410.0, "step": 15514 }, { "entropy": 1.7060741186141968, "epoch": 1.7044025157232703, "grad_norm": 0.6964573264122009, "learning_rate": 3.012542868067968e-06, "loss": 1.4072, "mean_token_accuracy": 0.6693576574325562, "num_tokens": 2603052533.0, "step": 15515 }, { "entropy": 1.716521163781484, "epoch": 1.7045123726346434, "grad_norm": 0.6152582764625549, "learning_rate": 3.0118051672697164e-06, "loss": 1.3661, "mean_token_accuracy": 0.6634813745816549, "num_tokens": 2603221790.0, "step": 15516 }, { "entropy": 1.748351812362671, "epoch": 1.7046222295460163, "grad_norm": 0.6902201771736145, "learning_rate": 3.011067719294233e-06, "loss": 1.3842, "mean_token_accuracy": 0.6483493248621622, "num_tokens": 2603365662.0, "step": 15517 }, { "entropy": 1.676300545533498, "epoch": 1.7047320864573892, "grad_norm": 0.7123953104019165, "learning_rate": 3.010330524164857e-06, "loss": 1.4256, "mean_token_accuracy": 0.6634288628896078, "num_tokens": 2603506810.0, "step": 15518 }, { "entropy": 1.7149119873841603, "epoch": 1.7048419433687623, "grad_norm": 0.6167012453079224, "learning_rate": 3.0095935819049203e-06, "loss": 1.3524, "mean_token_accuracy": 0.6638331562280655, "num_tokens": 2603657268.0, "step": 15519 }, { "entropy": 1.7451521356900532, "epoch": 1.7049518002801352, "grad_norm": 0.6651485562324524, "learning_rate": 3.0088568925377444e-06, "loss": 1.3648, "mean_token_accuracy": 0.6588761260112127, "num_tokens": 2603778482.0, "step": 15520 }, { "entropy": 1.7001748283704121, "epoch": 1.705061657191508, "grad_norm": 0.6478435397148132, "learning_rate": 3.0081204560866482e-06, "loss": 1.3306, "mean_token_accuracy": 0.6700827330350876, "num_tokens": 2603933217.0, "step": 15521 }, { "entropy": 1.6979700823624928, "epoch": 1.7051715141028811, "grad_norm": 0.5511773824691772, "learning_rate": 3.007384272574939e-06, "loss": 1.3932, "mean_token_accuracy": 0.663392369945844, "num_tokens": 2604155419.0, "step": 15522 }, { "entropy": 1.7112592458724976, "epoch": 1.7052813710142538, "grad_norm": 0.8108544945716858, "learning_rate": 3.0066483420259145e-06, "loss": 1.4776, "mean_token_accuracy": 0.6554440756638845, "num_tokens": 2604343465.0, "step": 15523 }, { "entropy": 1.7090557316939037, "epoch": 1.705391227925627, "grad_norm": 0.650626540184021, "learning_rate": 3.005912664462869e-06, "loss": 1.2697, "mean_token_accuracy": 0.6703123350938162, "num_tokens": 2604482670.0, "step": 15524 }, { "entropy": 1.7024895350138347, "epoch": 1.7055010848369998, "grad_norm": 0.6583566665649414, "learning_rate": 3.0051772399090838e-06, "loss": 1.4045, "mean_token_accuracy": 0.6468039005994797, "num_tokens": 2604665042.0, "step": 15525 }, { "entropy": 1.709368646144867, "epoch": 1.7056109417483727, "grad_norm": 0.6365029215812683, "learning_rate": 3.0044420683878387e-06, "loss": 1.5439, "mean_token_accuracy": 0.6336111923058828, "num_tokens": 2604893566.0, "step": 15526 }, { "entropy": 1.731093277533849, "epoch": 1.7057207986597458, "grad_norm": 0.6655638813972473, "learning_rate": 3.003707149922398e-06, "loss": 1.3796, "mean_token_accuracy": 0.6612533827622732, "num_tokens": 2605048388.0, "step": 15527 }, { "entropy": 1.6967615683873494, "epoch": 1.7058306555711185, "grad_norm": 0.8625466823577881, "learning_rate": 3.002972484536022e-06, "loss": 1.4847, "mean_token_accuracy": 0.6741051077842712, "num_tokens": 2605202061.0, "step": 15528 }, { "entropy": 1.6434633831183116, "epoch": 1.7059405124824916, "grad_norm": 0.6478009223937988, "learning_rate": 3.002238072251965e-06, "loss": 1.4601, "mean_token_accuracy": 0.6438992669185003, "num_tokens": 2605404372.0, "step": 15529 }, { "entropy": 1.7079529066880543, "epoch": 1.7060503693938645, "grad_norm": 0.7610868811607361, "learning_rate": 3.001503913093468e-06, "loss": 1.3129, "mean_token_accuracy": 0.6602237820625305, "num_tokens": 2605566223.0, "step": 15530 }, { "entropy": 1.7487321893374126, "epoch": 1.7061602263052373, "grad_norm": 0.6044963002204895, "learning_rate": 3.0007700070837697e-06, "loss": 1.3794, "mean_token_accuracy": 0.6575321207443873, "num_tokens": 2605735404.0, "step": 15531 }, { "entropy": 1.6425415972868602, "epoch": 1.7062700832166104, "grad_norm": 0.6396393775939941, "learning_rate": 3.0000363542460953e-06, "loss": 1.4136, "mean_token_accuracy": 0.6657944619655609, "num_tokens": 2605976420.0, "step": 15532 }, { "entropy": 1.7656619250774384, "epoch": 1.7063799401279833, "grad_norm": 0.7868967652320862, "learning_rate": 2.999302954603664e-06, "loss": 1.5443, "mean_token_accuracy": 0.6374485790729523, "num_tokens": 2606105556.0, "step": 15533 }, { "entropy": 1.7477157612641652, "epoch": 1.7064897970393562, "grad_norm": 0.7586115002632141, "learning_rate": 2.9985698081796897e-06, "loss": 1.4364, "mean_token_accuracy": 0.6489766389131546, "num_tokens": 2606253316.0, "step": 15534 }, { "entropy": 1.7553973694642384, "epoch": 1.7065996539507293, "grad_norm": 0.7467787861824036, "learning_rate": 2.9978369149973773e-06, "loss": 1.334, "mean_token_accuracy": 0.6667229930559794, "num_tokens": 2606371755.0, "step": 15535 }, { "entropy": 1.7056459089120228, "epoch": 1.706709510862102, "grad_norm": 0.6380571126937866, "learning_rate": 2.997104275079918e-06, "loss": 1.4379, "mean_token_accuracy": 0.6464936286211014, "num_tokens": 2606548341.0, "step": 15536 }, { "entropy": 1.7088016072909038, "epoch": 1.706819367773475, "grad_norm": 0.8023338913917542, "learning_rate": 2.996371888450502e-06, "loss": 1.421, "mean_token_accuracy": 0.6561804662148157, "num_tokens": 2606692621.0, "step": 15537 }, { "entropy": 1.7052049537499745, "epoch": 1.706929224684848, "grad_norm": 0.6686699390411377, "learning_rate": 2.9956397551323113e-06, "loss": 1.4714, "mean_token_accuracy": 0.6560079008340836, "num_tokens": 2606846788.0, "step": 15538 }, { "entropy": 1.7651503086090088, "epoch": 1.7070390815962209, "grad_norm": 0.6589861512184143, "learning_rate": 2.9949078751485156e-06, "loss": 1.5128, "mean_token_accuracy": 0.640378495057424, "num_tokens": 2607021409.0, "step": 15539 }, { "entropy": 1.69097700715065, "epoch": 1.707148938507594, "grad_norm": 0.5823447704315186, "learning_rate": 2.9941762485222766e-06, "loss": 1.3412, "mean_token_accuracy": 0.662921796242396, "num_tokens": 2607189347.0, "step": 15540 }, { "entropy": 1.7773426473140717, "epoch": 1.7072587954189666, "grad_norm": 0.6733897924423218, "learning_rate": 2.993444875276753e-06, "loss": 1.4578, "mean_token_accuracy": 0.642360677321752, "num_tokens": 2607339073.0, "step": 15541 }, { "entropy": 1.7012966771920521, "epoch": 1.7073686523303397, "grad_norm": 0.6776845455169678, "learning_rate": 2.99271375543509e-06, "loss": 1.3251, "mean_token_accuracy": 0.6664983431498209, "num_tokens": 2607490579.0, "step": 15542 }, { "entropy": 1.7195370694001515, "epoch": 1.7074785092417126, "grad_norm": 0.6434686779975891, "learning_rate": 2.99198288902043e-06, "loss": 1.3383, "mean_token_accuracy": 0.6653375178575516, "num_tokens": 2607646549.0, "step": 15543 }, { "entropy": 1.7077328364054363, "epoch": 1.7075883661530855, "grad_norm": 0.6161625385284424, "learning_rate": 2.991252276055903e-06, "loss": 1.3273, "mean_token_accuracy": 0.6651216745376587, "num_tokens": 2607792694.0, "step": 15544 }, { "entropy": 1.6930580735206604, "epoch": 1.7076982230644586, "grad_norm": 0.8237130045890808, "learning_rate": 2.9905219165646316e-06, "loss": 1.2765, "mean_token_accuracy": 0.6694959203402201, "num_tokens": 2607974833.0, "step": 15545 }, { "entropy": 1.7149873475233715, "epoch": 1.7078080799758315, "grad_norm": 0.7254765629768372, "learning_rate": 2.989791810569734e-06, "loss": 1.453, "mean_token_accuracy": 0.6517289926608404, "num_tokens": 2608130283.0, "step": 15546 }, { "entropy": 1.6859596868356068, "epoch": 1.7079179368872044, "grad_norm": 0.731508195400238, "learning_rate": 2.989061958094316e-06, "loss": 1.4882, "mean_token_accuracy": 0.6498477756977081, "num_tokens": 2608311879.0, "step": 15547 }, { "entropy": 1.7175701260566711, "epoch": 1.7080277937985775, "grad_norm": 0.7343022227287292, "learning_rate": 2.9883323591614746e-06, "loss": 1.5166, "mean_token_accuracy": 0.6450098951657613, "num_tokens": 2608528935.0, "step": 15548 }, { "entropy": 1.6760614514350891, "epoch": 1.7081376507099502, "grad_norm": 0.7836624383926392, "learning_rate": 2.9876030137943045e-06, "loss": 1.2276, "mean_token_accuracy": 0.6732871532440186, "num_tokens": 2608638073.0, "step": 15549 }, { "entropy": 1.733736475308736, "epoch": 1.7082475076213233, "grad_norm": 0.6803503036499023, "learning_rate": 2.986873922015891e-06, "loss": 1.3603, "mean_token_accuracy": 0.6762413680553436, "num_tokens": 2608784234.0, "step": 15550 }, { "entropy": 1.7393794854482014, "epoch": 1.7083573645326962, "grad_norm": 0.824210524559021, "learning_rate": 2.9861450838493054e-06, "loss": 1.3731, "mean_token_accuracy": 0.6584658722082773, "num_tokens": 2608916469.0, "step": 15551 }, { "entropy": 1.6854054033756256, "epoch": 1.708467221444069, "grad_norm": 0.728629469871521, "learning_rate": 2.985416499317616e-06, "loss": 1.2944, "mean_token_accuracy": 0.6629828413327535, "num_tokens": 2609072203.0, "step": 15552 }, { "entropy": 1.6955331861972809, "epoch": 1.7085770783554421, "grad_norm": 0.6643010377883911, "learning_rate": 2.9846881684438853e-06, "loss": 1.3922, "mean_token_accuracy": 0.65622046093146, "num_tokens": 2609271094.0, "step": 15553 }, { "entropy": 1.6978692213694255, "epoch": 1.7086869352668148, "grad_norm": 0.7663952708244324, "learning_rate": 2.983960091251159e-06, "loss": 1.3997, "mean_token_accuracy": 0.6570919106403986, "num_tokens": 2609403906.0, "step": 15554 }, { "entropy": 1.7347622215747833, "epoch": 1.708796792178188, "grad_norm": 0.6088001728057861, "learning_rate": 2.9832322677624875e-06, "loss": 1.5643, "mean_token_accuracy": 0.6266407817602158, "num_tokens": 2609617624.0, "step": 15555 }, { "entropy": 1.7591754694779713, "epoch": 1.7089066490895608, "grad_norm": 0.6738680601119995, "learning_rate": 2.9825046980009005e-06, "loss": 1.5579, "mean_token_accuracy": 0.6592029680808386, "num_tokens": 2609781843.0, "step": 15556 }, { "entropy": 1.7426554759343464, "epoch": 1.7090165060009337, "grad_norm": 0.7468079924583435, "learning_rate": 2.981777381989426e-06, "loss": 1.4327, "mean_token_accuracy": 0.6514635235071182, "num_tokens": 2609965574.0, "step": 15557 }, { "entropy": 1.7027061482270558, "epoch": 1.7091263629123068, "grad_norm": 0.6666781306266785, "learning_rate": 2.9810503197510866e-06, "loss": 1.4226, "mean_token_accuracy": 0.6546925703684489, "num_tokens": 2610133359.0, "step": 15558 }, { "entropy": 1.7047854562600453, "epoch": 1.7092362198236797, "grad_norm": 0.7272503972053528, "learning_rate": 2.9803235113088904e-06, "loss": 1.3809, "mean_token_accuracy": 0.6511211693286896, "num_tokens": 2610271326.0, "step": 15559 }, { "entropy": 1.664241353670756, "epoch": 1.7093460767350526, "grad_norm": 0.7079517245292664, "learning_rate": 2.9795969566858394e-06, "loss": 1.4665, "mean_token_accuracy": 0.6483618170022964, "num_tokens": 2610493434.0, "step": 15560 }, { "entropy": 1.7043645282586415, "epoch": 1.7094559336464257, "grad_norm": 0.7159388661384583, "learning_rate": 2.9788706559049305e-06, "loss": 1.1727, "mean_token_accuracy": 0.683139776190122, "num_tokens": 2610632343.0, "step": 15561 }, { "entropy": 1.7370853920777638, "epoch": 1.7095657905577983, "grad_norm": 0.5975568890571594, "learning_rate": 2.978144608989154e-06, "loss": 1.285, "mean_token_accuracy": 0.6826331615447998, "num_tokens": 2610789458.0, "step": 15562 }, { "entropy": 1.7373074094454448, "epoch": 1.7096756474691714, "grad_norm": 0.6020911931991577, "learning_rate": 2.9774188159614847e-06, "loss": 1.4642, "mean_token_accuracy": 0.6399503002564112, "num_tokens": 2610971836.0, "step": 15563 }, { "entropy": 1.7091583808263142, "epoch": 1.7097855043805443, "grad_norm": 0.8372467160224915, "learning_rate": 2.9766932768448937e-06, "loss": 1.404, "mean_token_accuracy": 0.6612410992383957, "num_tokens": 2611119136.0, "step": 15564 }, { "entropy": 1.7217505673567455, "epoch": 1.7098953612919172, "grad_norm": 0.8266700506210327, "learning_rate": 2.9759679916623463e-06, "loss": 1.2311, "mean_token_accuracy": 0.6795663088560104, "num_tokens": 2611227488.0, "step": 15565 }, { "entropy": 1.6986558934052784, "epoch": 1.7100052182032903, "grad_norm": 0.6963996887207031, "learning_rate": 2.9752429604367945e-06, "loss": 1.5835, "mean_token_accuracy": 0.6428253799676895, "num_tokens": 2611427414.0, "step": 15566 }, { "entropy": 1.6627737681070964, "epoch": 1.710115075114663, "grad_norm": 0.621704638004303, "learning_rate": 2.9745181831911894e-06, "loss": 1.4027, "mean_token_accuracy": 0.6542961647113165, "num_tokens": 2611589095.0, "step": 15567 }, { "entropy": 1.6755984326203663, "epoch": 1.710224932026036, "grad_norm": 0.7194933295249939, "learning_rate": 2.973793659948466e-06, "loss": 1.5311, "mean_token_accuracy": 0.6462205847104391, "num_tokens": 2611798545.0, "step": 15568 }, { "entropy": 1.7322870294253032, "epoch": 1.710334788937409, "grad_norm": 0.6190232038497925, "learning_rate": 2.9730693907315566e-06, "loss": 1.6186, "mean_token_accuracy": 0.6203610102335612, "num_tokens": 2612029911.0, "step": 15569 }, { "entropy": 1.7027688721815746, "epoch": 1.7104446458487819, "grad_norm": 0.6538522839546204, "learning_rate": 2.9723453755633848e-06, "loss": 1.4993, "mean_token_accuracy": 0.6435906638701757, "num_tokens": 2612189497.0, "step": 15570 }, { "entropy": 1.7072203656037648, "epoch": 1.710554502760155, "grad_norm": 0.6151244044303894, "learning_rate": 2.9716216144668654e-06, "loss": 1.4434, "mean_token_accuracy": 0.6375455409288406, "num_tokens": 2612351230.0, "step": 15571 }, { "entropy": 1.6858009199301403, "epoch": 1.7106643596715279, "grad_norm": 0.634488582611084, "learning_rate": 2.9708981074649e-06, "loss": 1.3779, "mean_token_accuracy": 0.6542573670546213, "num_tokens": 2612501804.0, "step": 15572 }, { "entropy": 1.6959696511427562, "epoch": 1.7107742165829007, "grad_norm": 0.7555203437805176, "learning_rate": 2.9701748545803938e-06, "loss": 1.5933, "mean_token_accuracy": 0.6376071472962698, "num_tokens": 2612709277.0, "step": 15573 }, { "entropy": 1.6876679261525471, "epoch": 1.7108840734942738, "grad_norm": 0.8136647939682007, "learning_rate": 2.9694518558362363e-06, "loss": 1.505, "mean_token_accuracy": 0.6470917736490568, "num_tokens": 2612885448.0, "step": 15574 }, { "entropy": 1.7294893463452656, "epoch": 1.7109939304056465, "grad_norm": 0.6215494275093079, "learning_rate": 2.968729111255309e-06, "loss": 1.4703, "mean_token_accuracy": 0.6409442375103632, "num_tokens": 2613096534.0, "step": 15575 }, { "entropy": 1.7100212673346202, "epoch": 1.7111037873170196, "grad_norm": 0.7254142761230469, "learning_rate": 2.968006620860485e-06, "loss": 1.4299, "mean_token_accuracy": 0.6658960854013761, "num_tokens": 2613248348.0, "step": 15576 }, { "entropy": 1.7584032714366913, "epoch": 1.7112136442283925, "grad_norm": 0.7717592120170593, "learning_rate": 2.9672843846746326e-06, "loss": 1.4585, "mean_token_accuracy": 0.6663567970196406, "num_tokens": 2613406761.0, "step": 15577 }, { "entropy": 1.719919741153717, "epoch": 1.7113235011397654, "grad_norm": 0.6627902388572693, "learning_rate": 2.966562402720609e-06, "loss": 1.4896, "mean_token_accuracy": 0.6469202389319738, "num_tokens": 2613583879.0, "step": 15578 }, { "entropy": 1.6661270360151927, "epoch": 1.7114333580511385, "grad_norm": 0.7028049230575562, "learning_rate": 2.9658406750212664e-06, "loss": 1.4709, "mean_token_accuracy": 0.6556438406308492, "num_tokens": 2613730637.0, "step": 15579 }, { "entropy": 1.7086673080921173, "epoch": 1.7115432149625114, "grad_norm": 0.648152232170105, "learning_rate": 2.965119201599447e-06, "loss": 1.3956, "mean_token_accuracy": 0.6587710777918497, "num_tokens": 2613881692.0, "step": 15580 }, { "entropy": 1.664686808983485, "epoch": 1.7116530718738843, "grad_norm": 0.63326096534729, "learning_rate": 2.964397982477983e-06, "loss": 1.2859, "mean_token_accuracy": 0.6708424985408783, "num_tokens": 2614039965.0, "step": 15581 }, { "entropy": 1.705962876478831, "epoch": 1.7117629287852572, "grad_norm": 0.6361053586006165, "learning_rate": 2.963677017679702e-06, "loss": 1.3926, "mean_token_accuracy": 0.6527653137842814, "num_tokens": 2614228171.0, "step": 15582 }, { "entropy": 1.6903795500596364, "epoch": 1.71187278569663, "grad_norm": 0.8894221186637878, "learning_rate": 2.962956307227423e-06, "loss": 1.2964, "mean_token_accuracy": 0.661086842417717, "num_tokens": 2614404823.0, "step": 15583 }, { "entropy": 1.7170774539311726, "epoch": 1.7119826426080031, "grad_norm": 0.5786034464836121, "learning_rate": 2.962235851143955e-06, "loss": 1.4699, "mean_token_accuracy": 0.6363462110360464, "num_tokens": 2614608986.0, "step": 15584 }, { "entropy": 1.700467934211095, "epoch": 1.712092499519376, "grad_norm": 0.7319923639297485, "learning_rate": 2.9615156494520973e-06, "loss": 1.507, "mean_token_accuracy": 0.6545891861120859, "num_tokens": 2614767392.0, "step": 15585 }, { "entropy": 1.6899384955565135, "epoch": 1.712202356430749, "grad_norm": 0.800560474395752, "learning_rate": 2.9607957021746514e-06, "loss": 1.2687, "mean_token_accuracy": 0.6748671482006708, "num_tokens": 2614895120.0, "step": 15586 }, { "entropy": 1.6918248236179352, "epoch": 1.712312213342122, "grad_norm": 0.7248471975326538, "learning_rate": 2.9600760093343984e-06, "loss": 1.3761, "mean_token_accuracy": 0.6653676678737005, "num_tokens": 2615067944.0, "step": 15587 }, { "entropy": 1.684990406036377, "epoch": 1.7124220702534947, "grad_norm": 0.6657690405845642, "learning_rate": 2.959356570954116e-06, "loss": 1.3267, "mean_token_accuracy": 0.6671634962161382, "num_tokens": 2615221322.0, "step": 15588 }, { "entropy": 1.726973295211792, "epoch": 1.7125319271648678, "grad_norm": 0.6965436935424805, "learning_rate": 2.9586373870565743e-06, "loss": 1.2313, "mean_token_accuracy": 0.6775472164154053, "num_tokens": 2615371929.0, "step": 15589 }, { "entropy": 1.7516135772069295, "epoch": 1.7126417840762407, "grad_norm": 0.6991093754768372, "learning_rate": 2.9579184576645346e-06, "loss": 1.3075, "mean_token_accuracy": 0.6707476228475571, "num_tokens": 2615543141.0, "step": 15590 }, { "entropy": 1.7394656638304393, "epoch": 1.7127516409876136, "grad_norm": 0.6370988488197327, "learning_rate": 2.9571997828007567e-06, "loss": 1.5721, "mean_token_accuracy": 0.6540063172578812, "num_tokens": 2615728924.0, "step": 15591 }, { "entropy": 1.6763994693756104, "epoch": 1.7128614978989867, "grad_norm": 0.6898596882820129, "learning_rate": 2.956481362487977e-06, "loss": 1.3456, "mean_token_accuracy": 0.6678059051434199, "num_tokens": 2615892674.0, "step": 15592 }, { "entropy": 1.7547740538914998, "epoch": 1.7129713548103596, "grad_norm": 0.7486645579338074, "learning_rate": 2.9557631967489377e-06, "loss": 1.3792, "mean_token_accuracy": 0.6506419479846954, "num_tokens": 2616041776.0, "step": 15593 }, { "entropy": 1.6951400637626648, "epoch": 1.7130812117217324, "grad_norm": 0.6499601602554321, "learning_rate": 2.9550452856063705e-06, "loss": 1.4844, "mean_token_accuracy": 0.6406375219424566, "num_tokens": 2616214747.0, "step": 15594 }, { "entropy": 1.7358328998088837, "epoch": 1.7131910686331053, "grad_norm": 0.7066434621810913, "learning_rate": 2.954327629082995e-06, "loss": 1.425, "mean_token_accuracy": 0.6705079823732376, "num_tokens": 2616327828.0, "step": 15595 }, { "entropy": 1.727910081545512, "epoch": 1.7133009255444782, "grad_norm": 0.6480644941329956, "learning_rate": 2.953610227201522e-06, "loss": 1.5293, "mean_token_accuracy": 0.6270461082458496, "num_tokens": 2616497708.0, "step": 15596 }, { "entropy": 1.6446092625459034, "epoch": 1.7134107824558513, "grad_norm": 0.9010108709335327, "learning_rate": 2.9528930799846624e-06, "loss": 1.3272, "mean_token_accuracy": 0.6766605178515116, "num_tokens": 2616622349.0, "step": 15597 }, { "entropy": 1.6904981931050618, "epoch": 1.7135206393672242, "grad_norm": 0.6076183915138245, "learning_rate": 2.9521761874551074e-06, "loss": 1.4624, "mean_token_accuracy": 0.6577340712149938, "num_tokens": 2616786261.0, "step": 15598 }, { "entropy": 1.6667085389296215, "epoch": 1.713630496278597, "grad_norm": 0.7304791808128357, "learning_rate": 2.951459549635553e-06, "loss": 1.2399, "mean_token_accuracy": 0.684608002503713, "num_tokens": 2616919152.0, "step": 15599 }, { "entropy": 1.7170383930206299, "epoch": 1.7137403531899702, "grad_norm": 0.758103609085083, "learning_rate": 2.9507431665486762e-06, "loss": 1.3148, "mean_token_accuracy": 0.6641736576954523, "num_tokens": 2617016602.0, "step": 15600 }, { "entropy": 1.7205977539221446, "epoch": 1.7138502101013429, "grad_norm": 0.7272992730140686, "learning_rate": 2.95002703821715e-06, "loss": 1.4594, "mean_token_accuracy": 0.6407827585935593, "num_tokens": 2617200134.0, "step": 15601 }, { "entropy": 1.7511567175388336, "epoch": 1.713960067012716, "grad_norm": 0.6522664427757263, "learning_rate": 2.949311164663642e-06, "loss": 1.4678, "mean_token_accuracy": 0.6563850492238998, "num_tokens": 2617347308.0, "step": 15602 }, { "entropy": 1.7190465529759724, "epoch": 1.7140699239240889, "grad_norm": 0.7146872878074646, "learning_rate": 2.948595545910807e-06, "loss": 1.5173, "mean_token_accuracy": 0.6501129815975825, "num_tokens": 2617520468.0, "step": 15603 }, { "entropy": 1.7591931919256847, "epoch": 1.7141797808354617, "grad_norm": 0.7043587565422058, "learning_rate": 2.947880181981295e-06, "loss": 1.4204, "mean_token_accuracy": 0.6521059771378835, "num_tokens": 2617656762.0, "step": 15604 }, { "entropy": 1.713607559601466, "epoch": 1.7142896377468348, "grad_norm": 0.7444068193435669, "learning_rate": 2.947165072897745e-06, "loss": 1.4725, "mean_token_accuracy": 0.6571368873119354, "num_tokens": 2617811535.0, "step": 15605 }, { "entropy": 1.7211280067761738, "epoch": 1.7143994946582077, "grad_norm": 0.6797099709510803, "learning_rate": 2.946450218682796e-06, "loss": 1.4251, "mean_token_accuracy": 0.6561200817426046, "num_tokens": 2617969068.0, "step": 15606 }, { "entropy": 1.7104970415433247, "epoch": 1.7145093515695806, "grad_norm": 0.7477222084999084, "learning_rate": 2.945735619359066e-06, "loss": 1.4978, "mean_token_accuracy": 0.6518939783175787, "num_tokens": 2618122469.0, "step": 15607 }, { "entropy": 1.7389554679393768, "epoch": 1.7146192084809535, "grad_norm": 0.7334529757499695, "learning_rate": 2.9450212749491737e-06, "loss": 1.3035, "mean_token_accuracy": 0.6685802390178045, "num_tokens": 2618295553.0, "step": 15608 }, { "entropy": 1.6965291400750477, "epoch": 1.7147290653923264, "grad_norm": 0.725472092628479, "learning_rate": 2.9443071854757297e-06, "loss": 1.3944, "mean_token_accuracy": 0.6754897187153498, "num_tokens": 2618487620.0, "step": 15609 }, { "entropy": 1.7117928862571716, "epoch": 1.7148389223036995, "grad_norm": 1.0294393301010132, "learning_rate": 2.9435933509613323e-06, "loss": 1.2737, "mean_token_accuracy": 0.6840305080016454, "num_tokens": 2618628386.0, "step": 15610 }, { "entropy": 1.7260564068953197, "epoch": 1.7149487792150724, "grad_norm": 0.6792541742324829, "learning_rate": 2.942879771428577e-06, "loss": 1.3423, "mean_token_accuracy": 0.6633341958125433, "num_tokens": 2618816606.0, "step": 15611 }, { "entropy": 1.6906549831231434, "epoch": 1.7150586361264453, "grad_norm": 0.7536963224411011, "learning_rate": 2.9421664469000454e-06, "loss": 1.3189, "mean_token_accuracy": 0.6605163216590881, "num_tokens": 2618969345.0, "step": 15612 }, { "entropy": 1.705591360727946, "epoch": 1.7151684930378184, "grad_norm": 0.7592849731445312, "learning_rate": 2.941453377398313e-06, "loss": 1.3855, "mean_token_accuracy": 0.6538231472174326, "num_tokens": 2619169192.0, "step": 15613 }, { "entropy": 1.7342075407505035, "epoch": 1.715278349949191, "grad_norm": 0.7230466604232788, "learning_rate": 2.9407405629459525e-06, "loss": 1.2666, "mean_token_accuracy": 0.6689763913551966, "num_tokens": 2619273955.0, "step": 15614 }, { "entropy": 1.6978387037913005, "epoch": 1.7153882068605641, "grad_norm": 0.7770639657974243, "learning_rate": 2.940028003565521e-06, "loss": 1.3855, "mean_token_accuracy": 0.6582835217316946, "num_tokens": 2619478319.0, "step": 15615 }, { "entropy": 1.6785812576611836, "epoch": 1.715498063771937, "grad_norm": 0.6288565993309021, "learning_rate": 2.939315699279569e-06, "loss": 1.3499, "mean_token_accuracy": 0.6569116910298666, "num_tokens": 2619593713.0, "step": 15616 }, { "entropy": 1.7079376081625621, "epoch": 1.71560792068331, "grad_norm": 0.6645893454551697, "learning_rate": 2.938603650110644e-06, "loss": 1.3913, "mean_token_accuracy": 0.6528747181097666, "num_tokens": 2619768328.0, "step": 15617 }, { "entropy": 1.6401109794775646, "epoch": 1.715717777594683, "grad_norm": 0.5823392868041992, "learning_rate": 2.9378918560812825e-06, "loss": 1.386, "mean_token_accuracy": 0.6663381606340408, "num_tokens": 2619922330.0, "step": 15618 }, { "entropy": 1.6434422830740611, "epoch": 1.715827634506056, "grad_norm": 0.8188596963882446, "learning_rate": 2.93718031721401e-06, "loss": 1.2486, "mean_token_accuracy": 0.6767540127038956, "num_tokens": 2620087889.0, "step": 15619 }, { "entropy": 1.706367423137029, "epoch": 1.7159374914174288, "grad_norm": 0.766272246837616, "learning_rate": 2.9364690335313463e-06, "loss": 1.4292, "mean_token_accuracy": 0.6471919765075048, "num_tokens": 2620250444.0, "step": 15620 }, { "entropy": 1.6769792238871257, "epoch": 1.7160473483288017, "grad_norm": 0.6159754991531372, "learning_rate": 2.935758005055806e-06, "loss": 1.4807, "mean_token_accuracy": 0.638146718343099, "num_tokens": 2620434298.0, "step": 15621 }, { "entropy": 1.6449984113375347, "epoch": 1.7161572052401746, "grad_norm": 0.7580591440200806, "learning_rate": 2.9350472318098886e-06, "loss": 1.2516, "mean_token_accuracy": 0.672856385509173, "num_tokens": 2620564546.0, "step": 15622 }, { "entropy": 1.6625539064407349, "epoch": 1.7162670621515477, "grad_norm": 0.713958203792572, "learning_rate": 2.9343367138160943e-06, "loss": 1.3285, "mean_token_accuracy": 0.6687121589978536, "num_tokens": 2620712844.0, "step": 15623 }, { "entropy": 1.6688397228717804, "epoch": 1.7163769190629206, "grad_norm": 0.6362452507019043, "learning_rate": 2.9336264510969083e-06, "loss": 1.4256, "mean_token_accuracy": 0.6454948534568151, "num_tokens": 2620910175.0, "step": 15624 }, { "entropy": 1.734671155611674, "epoch": 1.7164867759742934, "grad_norm": 0.6450325846672058, "learning_rate": 2.9329164436748086e-06, "loss": 1.4168, "mean_token_accuracy": 0.6560704112052917, "num_tokens": 2621043013.0, "step": 15625 }, { "entropy": 1.7354978024959564, "epoch": 1.7165966328856666, "grad_norm": 0.7361391186714172, "learning_rate": 2.9322066915722706e-06, "loss": 1.4561, "mean_token_accuracy": 0.6466700434684753, "num_tokens": 2621186220.0, "step": 15626 }, { "entropy": 1.6472897231578827, "epoch": 1.7167064897970392, "grad_norm": 0.7093019485473633, "learning_rate": 2.931497194811755e-06, "loss": 1.2352, "mean_token_accuracy": 0.6806353082259496, "num_tokens": 2621304502.0, "step": 15627 }, { "entropy": 1.6753909885883331, "epoch": 1.7168163467084123, "grad_norm": 0.7245997786521912, "learning_rate": 2.930787953415716e-06, "loss": 1.4154, "mean_token_accuracy": 0.6562784959872564, "num_tokens": 2621490780.0, "step": 15628 }, { "entropy": 1.6912651062011719, "epoch": 1.7169262036197852, "grad_norm": 0.752405047416687, "learning_rate": 2.9300789674066014e-06, "loss": 1.4143, "mean_token_accuracy": 0.6599976718425751, "num_tokens": 2621649870.0, "step": 15629 }, { "entropy": 1.6908113261063893, "epoch": 1.717036060531158, "grad_norm": 0.6509510278701782, "learning_rate": 2.929370236806854e-06, "loss": 1.3438, "mean_token_accuracy": 0.663862998286883, "num_tokens": 2621790608.0, "step": 15630 }, { "entropy": 1.6996460954348247, "epoch": 1.7171459174425312, "grad_norm": 0.6494753956794739, "learning_rate": 2.9286617616389005e-06, "loss": 1.3424, "mean_token_accuracy": 0.6629728774229685, "num_tokens": 2621924824.0, "step": 15631 }, { "entropy": 1.6819026172161102, "epoch": 1.717255774353904, "grad_norm": 0.8028758764266968, "learning_rate": 2.9279535419251646e-06, "loss": 1.5812, "mean_token_accuracy": 0.636813203493754, "num_tokens": 2622102501.0, "step": 15632 }, { "entropy": 1.7385485967000325, "epoch": 1.717365631265277, "grad_norm": 0.7110795378684998, "learning_rate": 2.9272455776880632e-06, "loss": 1.3385, "mean_token_accuracy": 0.6732540826002756, "num_tokens": 2622244211.0, "step": 15633 }, { "entropy": 1.6775075495243073, "epoch": 1.71747548817665, "grad_norm": 0.6379189491271973, "learning_rate": 2.9265378689499995e-06, "loss": 1.5725, "mean_token_accuracy": 0.6385338008403778, "num_tokens": 2622442665.0, "step": 15634 }, { "entropy": 1.654201736052831, "epoch": 1.7175853450880227, "grad_norm": 0.5896367430686951, "learning_rate": 2.9258304157333763e-06, "loss": 1.3321, "mean_token_accuracy": 0.6641835123300552, "num_tokens": 2622625441.0, "step": 15635 }, { "entropy": 1.7210518419742584, "epoch": 1.7176952019993958, "grad_norm": 0.657882809638977, "learning_rate": 2.9251232180605822e-06, "loss": 1.4463, "mean_token_accuracy": 0.6423710286617279, "num_tokens": 2622849920.0, "step": 15636 }, { "entropy": 1.6777076125144958, "epoch": 1.7178050589107687, "grad_norm": 0.6649149060249329, "learning_rate": 2.9244162759539977e-06, "loss": 1.4115, "mean_token_accuracy": 0.6619361639022827, "num_tokens": 2623045328.0, "step": 15637 }, { "entropy": 1.7354417145252228, "epoch": 1.7179149158221416, "grad_norm": 0.6765681505203247, "learning_rate": 2.923709589436001e-06, "loss": 1.3755, "mean_token_accuracy": 0.664703369140625, "num_tokens": 2623167633.0, "step": 15638 }, { "entropy": 1.7414036691188812, "epoch": 1.7180247727335147, "grad_norm": 0.7814067006111145, "learning_rate": 2.9230031585289564e-06, "loss": 1.4758, "mean_token_accuracy": 0.6637212236722311, "num_tokens": 2623339230.0, "step": 15639 }, { "entropy": 1.7026597261428833, "epoch": 1.7181346296448874, "grad_norm": 0.7521904110908508, "learning_rate": 2.9222969832552205e-06, "loss": 1.6311, "mean_token_accuracy": 0.6423285851875941, "num_tokens": 2623520275.0, "step": 15640 }, { "entropy": 1.7413414518038433, "epoch": 1.7182444865562605, "grad_norm": 0.6938111186027527, "learning_rate": 2.9215910636371454e-06, "loss": 1.4661, "mean_token_accuracy": 0.6471091061830521, "num_tokens": 2623733621.0, "step": 15641 }, { "entropy": 1.7483003437519073, "epoch": 1.7183543434676334, "grad_norm": 0.617083728313446, "learning_rate": 2.920885399697074e-06, "loss": 1.3296, "mean_token_accuracy": 0.6537606020768484, "num_tokens": 2623881830.0, "step": 15642 }, { "entropy": 1.65190593401591, "epoch": 1.7184642003790063, "grad_norm": 0.6645247936248779, "learning_rate": 2.9201799914573397e-06, "loss": 1.3003, "mean_token_accuracy": 0.6721631934245428, "num_tokens": 2624073705.0, "step": 15643 }, { "entropy": 1.7330858608086903, "epoch": 1.7185740572903794, "grad_norm": 0.6733470559120178, "learning_rate": 2.919474838940266e-06, "loss": 1.446, "mean_token_accuracy": 0.6582320332527161, "num_tokens": 2624229129.0, "step": 15644 }, { "entropy": 1.857384592294693, "epoch": 1.7186839142017523, "grad_norm": 0.659773588180542, "learning_rate": 2.918769942168175e-06, "loss": 1.4821, "mean_token_accuracy": 0.6398710956176122, "num_tokens": 2624382591.0, "step": 15645 }, { "entropy": 1.7804729243119557, "epoch": 1.7187937711131251, "grad_norm": 0.7186923027038574, "learning_rate": 2.9180653011633718e-06, "loss": 1.4907, "mean_token_accuracy": 0.6509936352570852, "num_tokens": 2624575510.0, "step": 15646 }, { "entropy": 1.740348070859909, "epoch": 1.7189036280244983, "grad_norm": 0.7147418856620789, "learning_rate": 2.9173609159481623e-06, "loss": 1.3815, "mean_token_accuracy": 0.6621488879124323, "num_tokens": 2624712724.0, "step": 15647 }, { "entropy": 1.6902793844540913, "epoch": 1.719013484935871, "grad_norm": 0.7417254447937012, "learning_rate": 2.9166567865448354e-06, "loss": 1.4179, "mean_token_accuracy": 0.6671228508154551, "num_tokens": 2624891490.0, "step": 15648 }, { "entropy": 1.7188851237297058, "epoch": 1.719123341847244, "grad_norm": 0.7302298545837402, "learning_rate": 2.9159529129756786e-06, "loss": 1.3433, "mean_token_accuracy": 0.6725572695334753, "num_tokens": 2625061436.0, "step": 15649 }, { "entropy": 1.7102177143096924, "epoch": 1.719233198758617, "grad_norm": 0.7004075050354004, "learning_rate": 2.9152492952629705e-06, "loss": 1.348, "mean_token_accuracy": 0.6611120849847794, "num_tokens": 2625204652.0, "step": 15650 }, { "entropy": 1.7715183695157368, "epoch": 1.7193430556699898, "grad_norm": 0.6727532148361206, "learning_rate": 2.9145459334289793e-06, "loss": 1.3649, "mean_token_accuracy": 0.655080164472262, "num_tokens": 2625372329.0, "step": 15651 }, { "entropy": 1.705493172009786, "epoch": 1.719452912581363, "grad_norm": 0.632786750793457, "learning_rate": 2.913842827495964e-06, "loss": 1.5373, "mean_token_accuracy": 0.6413880536953608, "num_tokens": 2625606863.0, "step": 15652 }, { "entropy": 1.6163564026355743, "epoch": 1.7195627694927356, "grad_norm": 0.6396132111549377, "learning_rate": 2.9131399774861823e-06, "loss": 1.3683, "mean_token_accuracy": 0.6635381281375885, "num_tokens": 2625790091.0, "step": 15653 }, { "entropy": 1.7325818141301472, "epoch": 1.7196726264041087, "grad_norm": 0.6826179623603821, "learning_rate": 2.9124373834218733e-06, "loss": 1.4846, "mean_token_accuracy": 0.637732004125913, "num_tokens": 2625965636.0, "step": 15654 }, { "entropy": 1.727001855770747, "epoch": 1.7197824833154816, "grad_norm": 0.7186253070831299, "learning_rate": 2.9117350453252797e-06, "loss": 1.3532, "mean_token_accuracy": 0.6762463947137197, "num_tokens": 2626105857.0, "step": 15655 }, { "entropy": 1.673914760351181, "epoch": 1.7198923402268544, "grad_norm": 0.7053311467170715, "learning_rate": 2.9110329632186264e-06, "loss": 1.4876, "mean_token_accuracy": 0.6486860315004984, "num_tokens": 2626261625.0, "step": 15656 }, { "entropy": 1.701512336730957, "epoch": 1.7200021971382276, "grad_norm": 0.5304204821586609, "learning_rate": 2.9103311371241328e-06, "loss": 1.4538, "mean_token_accuracy": 0.6435932020346323, "num_tokens": 2626484562.0, "step": 15657 }, { "entropy": 1.7574267089366913, "epoch": 1.7201120540496004, "grad_norm": 0.6676803231239319, "learning_rate": 2.909629567064014e-06, "loss": 1.3626, "mean_token_accuracy": 0.6545713643232981, "num_tokens": 2626646846.0, "step": 15658 }, { "entropy": 1.7169890503088634, "epoch": 1.7202219109609733, "grad_norm": 0.6293471455574036, "learning_rate": 2.908928253060478e-06, "loss": 1.4239, "mean_token_accuracy": 0.6564174294471741, "num_tokens": 2626829982.0, "step": 15659 }, { "entropy": 1.6367035309473674, "epoch": 1.7203317678723464, "grad_norm": 0.6502057909965515, "learning_rate": 2.908227195135712e-06, "loss": 1.4779, "mean_token_accuracy": 0.6452774107456207, "num_tokens": 2627025170.0, "step": 15660 }, { "entropy": 1.6846541166305542, "epoch": 1.720441624783719, "grad_norm": 0.842652440071106, "learning_rate": 2.907526393311909e-06, "loss": 1.4808, "mean_token_accuracy": 0.6536863893270493, "num_tokens": 2627194478.0, "step": 15661 }, { "entropy": 1.697983334461848, "epoch": 1.7205514816950922, "grad_norm": 0.6564600467681885, "learning_rate": 2.906825847611252e-06, "loss": 1.4753, "mean_token_accuracy": 0.6554812788963318, "num_tokens": 2627373566.0, "step": 15662 }, { "entropy": 1.7039678891499836, "epoch": 1.720661338606465, "grad_norm": 0.7069868445396423, "learning_rate": 2.90612555805591e-06, "loss": 1.3481, "mean_token_accuracy": 0.6676451563835144, "num_tokens": 2627564580.0, "step": 15663 }, { "entropy": 1.6884814302126567, "epoch": 1.720771195517838, "grad_norm": 0.6857156157493591, "learning_rate": 2.905425524668044e-06, "loss": 1.3325, "mean_token_accuracy": 0.6730131804943085, "num_tokens": 2627688092.0, "step": 15664 }, { "entropy": 1.7210382620493572, "epoch": 1.720881052429211, "grad_norm": 0.6751901507377625, "learning_rate": 2.9047257474698155e-06, "loss": 1.4386, "mean_token_accuracy": 0.6408843944470087, "num_tokens": 2627853159.0, "step": 15665 }, { "entropy": 1.6964699625968933, "epoch": 1.7209909093405837, "grad_norm": 0.7327737212181091, "learning_rate": 2.9040262264833662e-06, "loss": 1.3029, "mean_token_accuracy": 0.6690873155991236, "num_tokens": 2627964728.0, "step": 15666 }, { "entropy": 1.7300258974234264, "epoch": 1.7211007662519568, "grad_norm": 0.7446189522743225, "learning_rate": 2.9033269617308417e-06, "loss": 1.4907, "mean_token_accuracy": 0.628525917728742, "num_tokens": 2628197249.0, "step": 15667 }, { "entropy": 1.73529119292895, "epoch": 1.7212106231633297, "grad_norm": 0.618928074836731, "learning_rate": 2.9026279532343702e-06, "loss": 1.3959, "mean_token_accuracy": 0.6569162358840307, "num_tokens": 2628372683.0, "step": 15668 }, { "entropy": 1.7280128796895344, "epoch": 1.7213204800747026, "grad_norm": 0.6142230033874512, "learning_rate": 2.9019292010160738e-06, "loss": 1.2844, "mean_token_accuracy": 0.6628156552712122, "num_tokens": 2628508232.0, "step": 15669 }, { "entropy": 1.7332999805609386, "epoch": 1.7214303369860757, "grad_norm": 0.6877867579460144, "learning_rate": 2.901230705098068e-06, "loss": 1.3278, "mean_token_accuracy": 0.6685926765203476, "num_tokens": 2628666542.0, "step": 15670 }, { "entropy": 1.650905857483546, "epoch": 1.7215401938974486, "grad_norm": 0.7264050841331482, "learning_rate": 2.9005324655024645e-06, "loss": 1.1828, "mean_token_accuracy": 0.6877222855885824, "num_tokens": 2628793638.0, "step": 15671 }, { "entropy": 1.7235294878482819, "epoch": 1.7216500508088215, "grad_norm": 0.8157387375831604, "learning_rate": 2.8998344822513563e-06, "loss": 1.3123, "mean_token_accuracy": 0.6598003009955088, "num_tokens": 2628914826.0, "step": 15672 }, { "entropy": 1.7341767648855846, "epoch": 1.7217599077201946, "grad_norm": 0.645763635635376, "learning_rate": 2.8991367553668364e-06, "loss": 1.4893, "mean_token_accuracy": 0.6368632217248281, "num_tokens": 2629125796.0, "step": 15673 }, { "entropy": 1.7225966950257618, "epoch": 1.7218697646315673, "grad_norm": 0.6662589311599731, "learning_rate": 2.89843928487099e-06, "loss": 1.365, "mean_token_accuracy": 0.6565856287876765, "num_tokens": 2629318301.0, "step": 15674 }, { "entropy": 1.7104746202627819, "epoch": 1.7219796215429404, "grad_norm": 0.6441190242767334, "learning_rate": 2.8977420707858896e-06, "loss": 1.5115, "mean_token_accuracy": 0.6391011476516724, "num_tokens": 2629516989.0, "step": 15675 }, { "entropy": 1.580856482187907, "epoch": 1.7220894784543133, "grad_norm": 0.5921868681907654, "learning_rate": 2.8970451131335987e-06, "loss": 1.2911, "mean_token_accuracy": 0.6746866156657537, "num_tokens": 2629683168.0, "step": 15676 }, { "entropy": 1.6643680731455486, "epoch": 1.7221993353656861, "grad_norm": 0.5791040062904358, "learning_rate": 2.8963484119361807e-06, "loss": 1.36, "mean_token_accuracy": 0.6568904668092728, "num_tokens": 2629835992.0, "step": 15677 }, { "entropy": 1.7079329987366993, "epoch": 1.7223091922770593, "grad_norm": 0.6563646793365479, "learning_rate": 2.895651967215683e-06, "loss": 1.4994, "mean_token_accuracy": 0.6590022444725037, "num_tokens": 2630030474.0, "step": 15678 }, { "entropy": 1.6830817659695942, "epoch": 1.722419049188432, "grad_norm": 0.653887152671814, "learning_rate": 2.8949557789941496e-06, "loss": 1.4428, "mean_token_accuracy": 0.6400194962819418, "num_tokens": 2630253676.0, "step": 15679 }, { "entropy": 1.6895558039347331, "epoch": 1.722528906099805, "grad_norm": 0.6651843190193176, "learning_rate": 2.894259847293614e-06, "loss": 1.4028, "mean_token_accuracy": 0.6565206199884415, "num_tokens": 2630450982.0, "step": 15680 }, { "entropy": 1.671963373819987, "epoch": 1.722638763011178, "grad_norm": 0.667715847492218, "learning_rate": 2.8935641721360997e-06, "loss": 1.408, "mean_token_accuracy": 0.6582218011220297, "num_tokens": 2630606203.0, "step": 15681 }, { "entropy": 1.7464380860328674, "epoch": 1.7227486199225508, "grad_norm": 0.7251789569854736, "learning_rate": 2.892868753543628e-06, "loss": 1.4277, "mean_token_accuracy": 0.6359966893990835, "num_tokens": 2630771994.0, "step": 15682 }, { "entropy": 1.7634708881378174, "epoch": 1.722858476833924, "grad_norm": 0.84815913438797, "learning_rate": 2.8921735915382077e-06, "loss": 1.5548, "mean_token_accuracy": 0.6365585227807363, "num_tokens": 2630956170.0, "step": 15683 }, { "entropy": 1.7360106408596039, "epoch": 1.7229683337452968, "grad_norm": 0.76357102394104, "learning_rate": 2.891478686141838e-06, "loss": 1.3809, "mean_token_accuracy": 0.6442870199680328, "num_tokens": 2631129034.0, "step": 15684 }, { "entropy": 1.7115418414274852, "epoch": 1.7230781906566697, "grad_norm": 1.291322112083435, "learning_rate": 2.890784037376514e-06, "loss": 1.1999, "mean_token_accuracy": 0.6683139950037003, "num_tokens": 2631354553.0, "step": 15685 }, { "entropy": 1.7102199296156566, "epoch": 1.7231880475680428, "grad_norm": 0.6701323390007019, "learning_rate": 2.8900896452642236e-06, "loss": 1.2201, "mean_token_accuracy": 0.6836532801389694, "num_tokens": 2631473586.0, "step": 15686 }, { "entropy": 1.658743570248286, "epoch": 1.7232979044794154, "grad_norm": 0.7017245292663574, "learning_rate": 2.8893955098269404e-06, "loss": 1.1978, "mean_token_accuracy": 0.6843846688667933, "num_tokens": 2631581852.0, "step": 15687 }, { "entropy": 1.636157254378001, "epoch": 1.7234077613907886, "grad_norm": 0.6195902824401855, "learning_rate": 2.888701631086633e-06, "loss": 1.4816, "mean_token_accuracy": 0.6684853285551071, "num_tokens": 2631732814.0, "step": 15688 }, { "entropy": 1.7463614245255787, "epoch": 1.7235176183021614, "grad_norm": 0.6704382300376892, "learning_rate": 2.888008009065266e-06, "loss": 1.3736, "mean_token_accuracy": 0.6524703949689865, "num_tokens": 2631917876.0, "step": 15689 }, { "entropy": 1.7010905543963115, "epoch": 1.7236274752135343, "grad_norm": 0.5545864701271057, "learning_rate": 2.8873146437847876e-06, "loss": 1.4765, "mean_token_accuracy": 0.638947606086731, "num_tokens": 2632151477.0, "step": 15690 }, { "entropy": 1.7439953585465748, "epoch": 1.7237373321249074, "grad_norm": 0.8762944340705872, "learning_rate": 2.8866215352671477e-06, "loss": 1.3559, "mean_token_accuracy": 0.6531753689050674, "num_tokens": 2632332013.0, "step": 15691 }, { "entropy": 1.716113030910492, "epoch": 1.72384718903628, "grad_norm": 0.6564568281173706, "learning_rate": 2.8859286835342793e-06, "loss": 1.4302, "mean_token_accuracy": 0.6436122357845306, "num_tokens": 2632523258.0, "step": 15692 }, { "entropy": 1.7107328176498413, "epoch": 1.7239570459476532, "grad_norm": 0.6151549220085144, "learning_rate": 2.885236088608111e-06, "loss": 1.3062, "mean_token_accuracy": 0.673832893371582, "num_tokens": 2632670998.0, "step": 15693 }, { "entropy": 1.6899991532166798, "epoch": 1.724066902859026, "grad_norm": 0.6508071422576904, "learning_rate": 2.8845437505105662e-06, "loss": 1.3572, "mean_token_accuracy": 0.6597887774308523, "num_tokens": 2632825597.0, "step": 15694 }, { "entropy": 1.7111450632413228, "epoch": 1.724176759770399, "grad_norm": 0.6507290005683899, "learning_rate": 2.883851669263554e-06, "loss": 1.4109, "mean_token_accuracy": 0.6530921260515848, "num_tokens": 2633013026.0, "step": 15695 }, { "entropy": 1.7176082531611125, "epoch": 1.724286616681772, "grad_norm": 0.7525252103805542, "learning_rate": 2.883159844888977e-06, "loss": 1.3411, "mean_token_accuracy": 0.6653605997562408, "num_tokens": 2633157545.0, "step": 15696 }, { "entropy": 1.6218600273132324, "epoch": 1.724396473593145, "grad_norm": 0.6842386722564697, "learning_rate": 2.8824682774087336e-06, "loss": 1.2215, "mean_token_accuracy": 0.6854247947533926, "num_tokens": 2633332852.0, "step": 15697 }, { "entropy": 1.7413849532604218, "epoch": 1.7245063305045178, "grad_norm": 0.7053453326225281, "learning_rate": 2.881776966844714e-06, "loss": 1.3955, "mean_token_accuracy": 0.6568620105584463, "num_tokens": 2633497069.0, "step": 15698 }, { "entropy": 1.7630057732264202, "epoch": 1.724616187415891, "grad_norm": 0.7357109785079956, "learning_rate": 2.881085913218794e-06, "loss": 1.3177, "mean_token_accuracy": 0.663982942700386, "num_tokens": 2633664211.0, "step": 15699 }, { "entropy": 1.7018859187761943, "epoch": 1.7247260443272636, "grad_norm": 0.7314789295196533, "learning_rate": 2.880395116552845e-06, "loss": 1.3952, "mean_token_accuracy": 0.6586244255304337, "num_tokens": 2633847816.0, "step": 15700 }, { "entropy": 1.6985140244166057, "epoch": 1.7248359012386367, "grad_norm": 0.6651721596717834, "learning_rate": 2.879704576868734e-06, "loss": 1.3008, "mean_token_accuracy": 0.672162319223086, "num_tokens": 2634007895.0, "step": 15701 }, { "entropy": 1.6979452073574066, "epoch": 1.7249457581500096, "grad_norm": 0.6808459758758545, "learning_rate": 2.8790142941883114e-06, "loss": 1.4499, "mean_token_accuracy": 0.6408154418071111, "num_tokens": 2634259125.0, "step": 15702 }, { "entropy": 1.647172898054123, "epoch": 1.7250556150613825, "grad_norm": 0.6427389979362488, "learning_rate": 2.87832426853343e-06, "loss": 1.396, "mean_token_accuracy": 0.6532814701398214, "num_tokens": 2634408832.0, "step": 15703 }, { "entropy": 1.701138327519099, "epoch": 1.7251654719727556, "grad_norm": 0.7189123034477234, "learning_rate": 2.8776344999259253e-06, "loss": 1.5086, "mean_token_accuracy": 0.637222687403361, "num_tokens": 2634574740.0, "step": 15704 }, { "entropy": 1.7070972124735515, "epoch": 1.7252753288841283, "grad_norm": 0.6271539926528931, "learning_rate": 2.876944988387626e-06, "loss": 1.3537, "mean_token_accuracy": 0.6601554602384567, "num_tokens": 2634731896.0, "step": 15705 }, { "entropy": 1.653431475162506, "epoch": 1.7253851857955014, "grad_norm": 0.8477639555931091, "learning_rate": 2.87625573394036e-06, "loss": 1.3336, "mean_token_accuracy": 0.6693581690390905, "num_tokens": 2634853832.0, "step": 15706 }, { "entropy": 1.729806274175644, "epoch": 1.7254950427068743, "grad_norm": 0.6469348073005676, "learning_rate": 2.8755667366059403e-06, "loss": 1.5134, "mean_token_accuracy": 0.650507057706515, "num_tokens": 2635036350.0, "step": 15707 }, { "entropy": 1.692490776379903, "epoch": 1.7256048996182471, "grad_norm": 0.6189622282981873, "learning_rate": 2.8748779964061697e-06, "loss": 1.3822, "mean_token_accuracy": 0.6697244842847189, "num_tokens": 2635210409.0, "step": 15708 }, { "entropy": 1.7030345400174458, "epoch": 1.7257147565296203, "grad_norm": 0.6761047840118408, "learning_rate": 2.8741895133628506e-06, "loss": 1.2378, "mean_token_accuracy": 0.6791710207859675, "num_tokens": 2635326490.0, "step": 15709 }, { "entropy": 1.7439703047275543, "epoch": 1.7258246134409931, "grad_norm": 0.80422043800354, "learning_rate": 2.873501287497771e-06, "loss": 1.399, "mean_token_accuracy": 0.6671061217784882, "num_tokens": 2635455970.0, "step": 15710 }, { "entropy": 1.7496139506498973, "epoch": 1.725934470352366, "grad_norm": 0.6345416903495789, "learning_rate": 2.8728133188327144e-06, "loss": 1.4652, "mean_token_accuracy": 0.6486029028892517, "num_tokens": 2635665581.0, "step": 15711 }, { "entropy": 1.6556781927744548, "epoch": 1.7260443272637391, "grad_norm": 0.6797258257865906, "learning_rate": 2.8721256073894554e-06, "loss": 1.3187, "mean_token_accuracy": 0.6713179250558218, "num_tokens": 2635846577.0, "step": 15712 }, { "entropy": 1.7008158266544342, "epoch": 1.7261541841751118, "grad_norm": 0.8460964560508728, "learning_rate": 2.8714381531897552e-06, "loss": 1.4194, "mean_token_accuracy": 0.662114754319191, "num_tokens": 2636002590.0, "step": 15713 }, { "entropy": 1.6474326451619465, "epoch": 1.726264041086485, "grad_norm": 0.6482619047164917, "learning_rate": 2.8707509562553754e-06, "loss": 1.4091, "mean_token_accuracy": 0.6451590359210968, "num_tokens": 2636207089.0, "step": 15714 }, { "entropy": 1.7873999178409576, "epoch": 1.7263738979978578, "grad_norm": 0.7166942358016968, "learning_rate": 2.8700640166080678e-06, "loss": 1.4096, "mean_token_accuracy": 0.6470801830291748, "num_tokens": 2636356920.0, "step": 15715 }, { "entropy": 1.7664975921312969, "epoch": 1.7264837549092307, "grad_norm": 0.8807752728462219, "learning_rate": 2.869377334269568e-06, "loss": 1.3901, "mean_token_accuracy": 0.6581128338972727, "num_tokens": 2636495774.0, "step": 15716 }, { "entropy": 1.716484268506368, "epoch": 1.7265936118206038, "grad_norm": 0.6690646409988403, "learning_rate": 2.868690909261611e-06, "loss": 1.2976, "mean_token_accuracy": 0.672653466463089, "num_tokens": 2636615668.0, "step": 15717 }, { "entropy": 1.7186988592147827, "epoch": 1.7267034687319764, "grad_norm": 0.6803736090660095, "learning_rate": 2.8680047416059255e-06, "loss": 1.464, "mean_token_accuracy": 0.6469851434230804, "num_tokens": 2636766089.0, "step": 15718 }, { "entropy": 1.7901420791943867, "epoch": 1.7268133256433496, "grad_norm": 0.9188567399978638, "learning_rate": 2.867318831324225e-06, "loss": 1.7136, "mean_token_accuracy": 0.6263786852359772, "num_tokens": 2636910508.0, "step": 15719 }, { "entropy": 1.710059146086375, "epoch": 1.7269231825547224, "grad_norm": 0.7188782095909119, "learning_rate": 2.8666331784382164e-06, "loss": 1.4827, "mean_token_accuracy": 0.6406730314095815, "num_tokens": 2637071721.0, "step": 15720 }, { "entropy": 1.6877124905586243, "epoch": 1.7270330394660953, "grad_norm": 0.6510874629020691, "learning_rate": 2.865947782969605e-06, "loss": 1.3875, "mean_token_accuracy": 0.6546644171079, "num_tokens": 2637242150.0, "step": 15721 }, { "entropy": 1.7223760485649109, "epoch": 1.7271428963774684, "grad_norm": 0.713353157043457, "learning_rate": 2.8652626449400794e-06, "loss": 1.4072, "mean_token_accuracy": 0.6563191761573156, "num_tokens": 2637397453.0, "step": 15722 }, { "entropy": 1.687508871157964, "epoch": 1.7272527532888413, "grad_norm": 0.6655722856521606, "learning_rate": 2.864577764371327e-06, "loss": 1.5558, "mean_token_accuracy": 0.6428915162881216, "num_tokens": 2637576207.0, "step": 15723 }, { "entropy": 1.7700623273849487, "epoch": 1.7273626102002142, "grad_norm": 0.8303236365318298, "learning_rate": 2.8638931412850226e-06, "loss": 1.4077, "mean_token_accuracy": 0.6632307171821594, "num_tokens": 2637726614.0, "step": 15724 }, { "entropy": 1.7799305220444996, "epoch": 1.7274724671115873, "grad_norm": 0.8366572856903076, "learning_rate": 2.8632087757028317e-06, "loss": 1.5173, "mean_token_accuracy": 0.6468792210022608, "num_tokens": 2637888327.0, "step": 15725 }, { "entropy": 1.6914372245470684, "epoch": 1.72758232402296, "grad_norm": 0.5621321201324463, "learning_rate": 2.862524667646417e-06, "loss": 1.5679, "mean_token_accuracy": 0.6358216305573782, "num_tokens": 2638134217.0, "step": 15726 }, { "entropy": 1.700711299975713, "epoch": 1.727692180934333, "grad_norm": 0.7504645586013794, "learning_rate": 2.861840817137433e-06, "loss": 1.4857, "mean_token_accuracy": 0.6607631792624792, "num_tokens": 2638295434.0, "step": 15727 }, { "entropy": 1.7090523938337963, "epoch": 1.727802037845706, "grad_norm": 0.7692071795463562, "learning_rate": 2.8611572241975167e-06, "loss": 1.3441, "mean_token_accuracy": 0.6551339974006017, "num_tokens": 2638424528.0, "step": 15728 }, { "entropy": 1.7331831753253937, "epoch": 1.7279118947570788, "grad_norm": 0.5608404874801636, "learning_rate": 2.8604738888483074e-06, "loss": 1.4806, "mean_token_accuracy": 0.6301528811454773, "num_tokens": 2638711868.0, "step": 15729 }, { "entropy": 1.6656751334667206, "epoch": 1.728021751668452, "grad_norm": 0.6592541933059692, "learning_rate": 2.8597908111114326e-06, "loss": 1.402, "mean_token_accuracy": 0.6527828822533289, "num_tokens": 2638931324.0, "step": 15730 }, { "entropy": 1.6092142363389332, "epoch": 1.7281316085798246, "grad_norm": 0.6773507595062256, "learning_rate": 2.8591079910085107e-06, "loss": 1.2708, "mean_token_accuracy": 0.678041805823644, "num_tokens": 2639120616.0, "step": 15731 }, { "entropy": 1.6422138214111328, "epoch": 1.7282414654911977, "grad_norm": 0.594689130783081, "learning_rate": 2.8584254285611512e-06, "loss": 1.3481, "mean_token_accuracy": 0.6701185554265976, "num_tokens": 2639262161.0, "step": 15732 }, { "entropy": 1.7203916311264038, "epoch": 1.7283513224025706, "grad_norm": 0.7151592969894409, "learning_rate": 2.8577431237909602e-06, "loss": 1.349, "mean_token_accuracy": 0.6614575286706289, "num_tokens": 2639424805.0, "step": 15733 }, { "entropy": 1.806416392326355, "epoch": 1.7284611793139435, "grad_norm": 0.6949290037155151, "learning_rate": 2.8570610767195274e-06, "loss": 1.4947, "mean_token_accuracy": 0.6405768990516663, "num_tokens": 2639580707.0, "step": 15734 }, { "entropy": 1.6962719062964122, "epoch": 1.7285710362253166, "grad_norm": 0.6608180999755859, "learning_rate": 2.8563792873684456e-06, "loss": 1.2491, "mean_token_accuracy": 0.6842715740203857, "num_tokens": 2639699425.0, "step": 15735 }, { "entropy": 1.6750577787558238, "epoch": 1.7286808931366895, "grad_norm": 0.7081702947616577, "learning_rate": 2.8556977557592884e-06, "loss": 1.3426, "mean_token_accuracy": 0.6623915582895279, "num_tokens": 2639841659.0, "step": 15736 }, { "entropy": 1.6969606379667919, "epoch": 1.7287907500480624, "grad_norm": 0.620355486869812, "learning_rate": 2.855016481913626e-06, "loss": 1.5321, "mean_token_accuracy": 0.6244812359412512, "num_tokens": 2640063434.0, "step": 15737 }, { "entropy": 1.646816263596217, "epoch": 1.7289006069594355, "grad_norm": 0.5661360025405884, "learning_rate": 2.854335465853022e-06, "loss": 1.5014, "mean_token_accuracy": 0.6421874364217123, "num_tokens": 2640282850.0, "step": 15738 }, { "entropy": 1.7638940612475078, "epoch": 1.7290104638708081, "grad_norm": 0.7863647937774658, "learning_rate": 2.8536547075990327e-06, "loss": 1.4823, "mean_token_accuracy": 0.6424362609783808, "num_tokens": 2640444765.0, "step": 15739 }, { "entropy": 1.7247726917266846, "epoch": 1.7291203207821813, "grad_norm": 0.7812128067016602, "learning_rate": 2.8529742071731985e-06, "loss": 1.4218, "mean_token_accuracy": 0.6599978854258856, "num_tokens": 2640610371.0, "step": 15740 }, { "entropy": 1.7008132835229237, "epoch": 1.7292301776935541, "grad_norm": 0.6622722744941711, "learning_rate": 2.8522939645970595e-06, "loss": 1.3794, "mean_token_accuracy": 0.6508124470710754, "num_tokens": 2640802543.0, "step": 15741 }, { "entropy": 1.6437017023563385, "epoch": 1.729340034604927, "grad_norm": 0.7969115972518921, "learning_rate": 2.851613979892146e-06, "loss": 1.3394, "mean_token_accuracy": 0.6736712157726288, "num_tokens": 2640968961.0, "step": 15742 }, { "entropy": 1.6500834325949352, "epoch": 1.7294498915163001, "grad_norm": 0.7005840539932251, "learning_rate": 2.8509342530799787e-06, "loss": 1.3966, "mean_token_accuracy": 0.6679046203692754, "num_tokens": 2641142162.0, "step": 15743 }, { "entropy": 1.7105072836081188, "epoch": 1.7295597484276728, "grad_norm": 0.6108032464981079, "learning_rate": 2.8502547841820684e-06, "loss": 1.5941, "mean_token_accuracy": 0.6181052277485529, "num_tokens": 2641405364.0, "step": 15744 }, { "entropy": 1.777452568213145, "epoch": 1.729669605339046, "grad_norm": 0.6558803915977478, "learning_rate": 2.8495755732199232e-06, "loss": 1.6254, "mean_token_accuracy": 0.6251836170752844, "num_tokens": 2641627804.0, "step": 15745 }, { "entropy": 1.735464612642924, "epoch": 1.7297794622504188, "grad_norm": 0.7284940481185913, "learning_rate": 2.848896620215037e-06, "loss": 1.4115, "mean_token_accuracy": 0.6465435773134232, "num_tokens": 2641763821.0, "step": 15746 }, { "entropy": 1.6562157074610393, "epoch": 1.7298893191617917, "grad_norm": 0.6121866703033447, "learning_rate": 2.848217925188902e-06, "loss": 1.3641, "mean_token_accuracy": 0.6681751608848572, "num_tokens": 2641955876.0, "step": 15747 }, { "entropy": 1.6489443282286327, "epoch": 1.7299991760731648, "grad_norm": 0.7306373119354248, "learning_rate": 2.8475394881629966e-06, "loss": 1.2843, "mean_token_accuracy": 0.6912119189898173, "num_tokens": 2642077818.0, "step": 15748 }, { "entropy": 1.6806483666102092, "epoch": 1.7301090329845377, "grad_norm": 0.8181135058403015, "learning_rate": 2.8468613091587902e-06, "loss": 1.4591, "mean_token_accuracy": 0.6678927342096964, "num_tokens": 2642228543.0, "step": 15749 }, { "entropy": 1.7011501491069794, "epoch": 1.7302188898959106, "grad_norm": 0.5812907218933105, "learning_rate": 2.84618338819775e-06, "loss": 1.4, "mean_token_accuracy": 0.6601615299781164, "num_tokens": 2642381141.0, "step": 15750 }, { "entropy": 1.6654735505580902, "epoch": 1.7303287468072837, "grad_norm": 0.6332482099533081, "learning_rate": 2.8455057253013354e-06, "loss": 1.4628, "mean_token_accuracy": 0.6520533412694931, "num_tokens": 2642558612.0, "step": 15751 }, { "entropy": 1.6841372152169545, "epoch": 1.7304386037186563, "grad_norm": 0.6896584033966064, "learning_rate": 2.8448283204909844e-06, "loss": 1.4898, "mean_token_accuracy": 0.6429760406414667, "num_tokens": 2642767553.0, "step": 15752 }, { "entropy": 1.6826592286427815, "epoch": 1.7305484606300294, "grad_norm": 0.6982526779174805, "learning_rate": 2.8441511737881443e-06, "loss": 1.2862, "mean_token_accuracy": 0.6770686457554499, "num_tokens": 2642910286.0, "step": 15753 }, { "entropy": 1.6773277123769124, "epoch": 1.7306583175414023, "grad_norm": 0.5871797800064087, "learning_rate": 2.843474285214246e-06, "loss": 1.5036, "mean_token_accuracy": 0.6401631236076355, "num_tokens": 2643130615.0, "step": 15754 }, { "entropy": 1.6971320907274883, "epoch": 1.7307681744527752, "grad_norm": 0.6325240135192871, "learning_rate": 2.8427976547907106e-06, "loss": 1.4979, "mean_token_accuracy": 0.6486349354187647, "num_tokens": 2643295952.0, "step": 15755 }, { "entropy": 1.6904420753320057, "epoch": 1.7308780313641483, "grad_norm": 0.8593859672546387, "learning_rate": 2.8421212825389516e-06, "loss": 1.3975, "mean_token_accuracy": 0.6446862071752548, "num_tokens": 2643482901.0, "step": 15756 }, { "entropy": 1.7852684557437897, "epoch": 1.730987888275521, "grad_norm": 0.6767821907997131, "learning_rate": 2.841445168480381e-06, "loss": 1.5704, "mean_token_accuracy": 0.6423915525277456, "num_tokens": 2643676176.0, "step": 15757 }, { "entropy": 1.7145917117595673, "epoch": 1.731097745186894, "grad_norm": 0.6300607323646545, "learning_rate": 2.8407693126363916e-06, "loss": 1.3585, "mean_token_accuracy": 0.6577344536781311, "num_tokens": 2643885388.0, "step": 15758 }, { "entropy": 1.727539946635564, "epoch": 1.731207602098267, "grad_norm": 0.7048355937004089, "learning_rate": 2.8400937150283793e-06, "loss": 1.5526, "mean_token_accuracy": 0.6489974558353424, "num_tokens": 2644056977.0, "step": 15759 }, { "entropy": 1.7069771488507588, "epoch": 1.7313174590096398, "grad_norm": 0.5949591398239136, "learning_rate": 2.8394183756777235e-06, "loss": 1.4094, "mean_token_accuracy": 0.6460785369078318, "num_tokens": 2644273336.0, "step": 15760 }, { "entropy": 1.7462304333845775, "epoch": 1.731427315921013, "grad_norm": 0.7763286232948303, "learning_rate": 2.838743294605797e-06, "loss": 1.4839, "mean_token_accuracy": 0.6531115273634592, "num_tokens": 2644460738.0, "step": 15761 }, { "entropy": 1.6935663322607677, "epoch": 1.7315371728323858, "grad_norm": 0.68621426820755, "learning_rate": 2.8380684718339696e-06, "loss": 1.4743, "mean_token_accuracy": 0.6545071303844452, "num_tokens": 2644623609.0, "step": 15762 }, { "entropy": 1.6895051697889965, "epoch": 1.7316470297437587, "grad_norm": 0.7107670903205872, "learning_rate": 2.8373939073835977e-06, "loss": 1.4164, "mean_token_accuracy": 0.665955513715744, "num_tokens": 2644765811.0, "step": 15763 }, { "entropy": 1.7151564260323842, "epoch": 1.7317568866551318, "grad_norm": 0.6578272581100464, "learning_rate": 2.8367196012760283e-06, "loss": 1.4266, "mean_token_accuracy": 0.6549387921889623, "num_tokens": 2644908445.0, "step": 15764 }, { "entropy": 1.702756404876709, "epoch": 1.7318667435665045, "grad_norm": 0.7014343738555908, "learning_rate": 2.836045553532605e-06, "loss": 1.2898, "mean_token_accuracy": 0.66953477760156, "num_tokens": 2645016657.0, "step": 15765 }, { "entropy": 1.6990113755067189, "epoch": 1.7319766004778776, "grad_norm": 0.6619819402694702, "learning_rate": 2.8353717641746625e-06, "loss": 1.3552, "mean_token_accuracy": 0.6663278589646021, "num_tokens": 2645151759.0, "step": 15766 }, { "entropy": 1.6938722531000774, "epoch": 1.7320864573892505, "grad_norm": 0.729397714138031, "learning_rate": 2.834698233223525e-06, "loss": 1.3684, "mean_token_accuracy": 0.6664966940879822, "num_tokens": 2645321883.0, "step": 15767 }, { "entropy": 1.6677038371562958, "epoch": 1.7321963143006234, "grad_norm": 0.6559589505195618, "learning_rate": 2.8340249607005087e-06, "loss": 1.4055, "mean_token_accuracy": 0.6565075367689133, "num_tokens": 2645474895.0, "step": 15768 }, { "entropy": 1.688380589087804, "epoch": 1.7323061712119965, "grad_norm": 0.6308188438415527, "learning_rate": 2.8333519466269223e-06, "loss": 1.3668, "mean_token_accuracy": 0.6556487778822581, "num_tokens": 2645681696.0, "step": 15769 }, { "entropy": 1.6896365185578663, "epoch": 1.7324160281233691, "grad_norm": 0.7085091471672058, "learning_rate": 2.832679191024066e-06, "loss": 1.3655, "mean_token_accuracy": 0.6658649444580078, "num_tokens": 2645863103.0, "step": 15770 }, { "entropy": 1.6898165345191956, "epoch": 1.7325258850347423, "grad_norm": 0.6663379669189453, "learning_rate": 2.8320066939132364e-06, "loss": 1.3565, "mean_token_accuracy": 0.6607397049665451, "num_tokens": 2646008101.0, "step": 15771 }, { "entropy": 1.759082555770874, "epoch": 1.7326357419461151, "grad_norm": 0.7728214859962463, "learning_rate": 2.83133445531571e-06, "loss": 1.4906, "mean_token_accuracy": 0.6619271288315455, "num_tokens": 2646196373.0, "step": 15772 }, { "entropy": 1.705134669939677, "epoch": 1.732745598857488, "grad_norm": 0.7739623785018921, "learning_rate": 2.8306624752527684e-06, "loss": 1.324, "mean_token_accuracy": 0.6735307027896246, "num_tokens": 2646379370.0, "step": 15773 }, { "entropy": 1.6545397241910298, "epoch": 1.7328554557688611, "grad_norm": 0.7858942151069641, "learning_rate": 2.82999075374568e-06, "loss": 1.4899, "mean_token_accuracy": 0.6507531503836314, "num_tokens": 2646557248.0, "step": 15774 }, { "entropy": 1.7393496334552765, "epoch": 1.732965312680234, "grad_norm": 0.8852123022079468, "learning_rate": 2.8293192908157025e-06, "loss": 1.5377, "mean_token_accuracy": 0.6492738674084345, "num_tokens": 2646718540.0, "step": 15775 }, { "entropy": 1.738167365392049, "epoch": 1.733075169591607, "grad_norm": 0.7457848191261292, "learning_rate": 2.828648086484086e-06, "loss": 1.4497, "mean_token_accuracy": 0.6467219044764837, "num_tokens": 2646866109.0, "step": 15776 }, { "entropy": 1.6820921699206035, "epoch": 1.73318502650298, "grad_norm": 0.6062507033348083, "learning_rate": 2.827977140772077e-06, "loss": 1.3582, "mean_token_accuracy": 0.6673463334639868, "num_tokens": 2647037691.0, "step": 15777 }, { "entropy": 1.700913409392039, "epoch": 1.7332948834143527, "grad_norm": 0.5943799614906311, "learning_rate": 2.827306453700907e-06, "loss": 1.4493, "mean_token_accuracy": 0.6339322676261266, "num_tokens": 2647261416.0, "step": 15778 }, { "entropy": 1.7037220895290375, "epoch": 1.7334047403257258, "grad_norm": 0.7893034219741821, "learning_rate": 2.826636025291808e-06, "loss": 1.4788, "mean_token_accuracy": 0.631934697429339, "num_tokens": 2647428426.0, "step": 15779 }, { "entropy": 1.662857900063197, "epoch": 1.7335145972370987, "grad_norm": 0.705226719379425, "learning_rate": 2.8259658555659947e-06, "loss": 1.3723, "mean_token_accuracy": 0.6748548299074173, "num_tokens": 2647576867.0, "step": 15780 }, { "entropy": 1.6877172191937764, "epoch": 1.7336244541484715, "grad_norm": 0.642670750617981, "learning_rate": 2.825295944544677e-06, "loss": 1.4399, "mean_token_accuracy": 0.6476767361164093, "num_tokens": 2647788737.0, "step": 15781 }, { "entropy": 1.6754780113697052, "epoch": 1.7337343110598447, "grad_norm": 0.8167265057563782, "learning_rate": 2.8246262922490596e-06, "loss": 1.4949, "mean_token_accuracy": 0.6553296744823456, "num_tokens": 2647943883.0, "step": 15782 }, { "entropy": 1.7225460310777028, "epoch": 1.7338441679712175, "grad_norm": 0.6751701831817627, "learning_rate": 2.8239568987003384e-06, "loss": 1.4517, "mean_token_accuracy": 0.6534865995248159, "num_tokens": 2648137335.0, "step": 15783 }, { "entropy": 1.690890371799469, "epoch": 1.7339540248825904, "grad_norm": 0.7012602686882019, "learning_rate": 2.8232877639196956e-06, "loss": 1.3007, "mean_token_accuracy": 0.6690143694480261, "num_tokens": 2648251721.0, "step": 15784 }, { "entropy": 1.7479467689990997, "epoch": 1.7340638817939633, "grad_norm": 0.7268240451812744, "learning_rate": 2.822618887928309e-06, "loss": 1.4206, "mean_token_accuracy": 0.6618320842583975, "num_tokens": 2648409793.0, "step": 15785 }, { "entropy": 1.722074290116628, "epoch": 1.7341737387053362, "grad_norm": 1.3608239889144897, "learning_rate": 2.8219502707473525e-06, "loss": 1.5261, "mean_token_accuracy": 0.6445515751838684, "num_tokens": 2648531384.0, "step": 15786 }, { "entropy": 1.6843404074509938, "epoch": 1.7342835956167093, "grad_norm": 0.6605405211448669, "learning_rate": 2.821281912397984e-06, "loss": 1.3001, "mean_token_accuracy": 0.6718050042788187, "num_tokens": 2648640336.0, "step": 15787 }, { "entropy": 1.662507524092992, "epoch": 1.7343934525280822, "grad_norm": 0.6708582043647766, "learning_rate": 2.820613812901356e-06, "loss": 1.3266, "mean_token_accuracy": 0.6668059825897217, "num_tokens": 2648772568.0, "step": 15788 }, { "entropy": 1.6774542232354481, "epoch": 1.734503309439455, "grad_norm": 0.6292397975921631, "learning_rate": 2.819945972278618e-06, "loss": 1.5471, "mean_token_accuracy": 0.6366194983323415, "num_tokens": 2649041455.0, "step": 15789 }, { "entropy": 1.7329609791437786, "epoch": 1.7346131663508282, "grad_norm": 0.7120325565338135, "learning_rate": 2.819278390550901e-06, "loss": 1.4066, "mean_token_accuracy": 0.6510707437992096, "num_tokens": 2649272586.0, "step": 15790 }, { "entropy": 1.6792218287785847, "epoch": 1.7347230232622008, "grad_norm": 0.6149081587791443, "learning_rate": 2.8186110677393387e-06, "loss": 1.3502, "mean_token_accuracy": 0.6708264698584875, "num_tokens": 2649437345.0, "step": 15791 }, { "entropy": 1.7267203132311504, "epoch": 1.734832880173574, "grad_norm": 0.6599135994911194, "learning_rate": 2.8179440038650496e-06, "loss": 1.3767, "mean_token_accuracy": 0.6644027580817541, "num_tokens": 2649620738.0, "step": 15792 }, { "entropy": 1.7191211581230164, "epoch": 1.7349427370849468, "grad_norm": 0.7179271578788757, "learning_rate": 2.817277198949144e-06, "loss": 1.4368, "mean_token_accuracy": 0.6469183464845022, "num_tokens": 2649837132.0, "step": 15793 }, { "entropy": 1.746620883544286, "epoch": 1.7350525939963197, "grad_norm": 0.643526017665863, "learning_rate": 2.8166106530127274e-06, "loss": 1.613, "mean_token_accuracy": 0.6203589936097463, "num_tokens": 2650073284.0, "step": 15794 }, { "entropy": 1.714139034350713, "epoch": 1.7351624509076928, "grad_norm": 0.6725690960884094, "learning_rate": 2.8159443660769002e-06, "loss": 1.381, "mean_token_accuracy": 0.6693058560291926, "num_tokens": 2650216304.0, "step": 15795 }, { "entropy": 1.7154027422269185, "epoch": 1.7352723078190657, "grad_norm": 0.6718941330909729, "learning_rate": 2.815278338162742e-06, "loss": 1.3734, "mean_token_accuracy": 0.6560245205958685, "num_tokens": 2650389116.0, "step": 15796 }, { "entropy": 1.6776060263315837, "epoch": 1.7353821647304386, "grad_norm": 0.6263152360916138, "learning_rate": 2.8146125692913373e-06, "loss": 1.3378, "mean_token_accuracy": 0.6764421413342158, "num_tokens": 2650553193.0, "step": 15797 }, { "entropy": 1.7232110400994618, "epoch": 1.7354920216418115, "grad_norm": 0.5665870904922485, "learning_rate": 2.8139470594837566e-06, "loss": 1.3868, "mean_token_accuracy": 0.6588666985432307, "num_tokens": 2650715674.0, "step": 15798 }, { "entropy": 1.6804384191830952, "epoch": 1.7356018785531844, "grad_norm": 0.5942453742027283, "learning_rate": 2.8132818087610637e-06, "loss": 1.3806, "mean_token_accuracy": 0.6652588794628779, "num_tokens": 2650863032.0, "step": 15799 }, { "entropy": 1.7259988685448964, "epoch": 1.7357117354645575, "grad_norm": 0.6818102598190308, "learning_rate": 2.81261681714431e-06, "loss": 1.3696, "mean_token_accuracy": 0.6753989507754644, "num_tokens": 2650997983.0, "step": 15800 }, { "entropy": 1.7083225051561992, "epoch": 1.7358215923759304, "grad_norm": 0.6384536623954773, "learning_rate": 2.811952084654548e-06, "loss": 1.4066, "mean_token_accuracy": 0.6457608987887701, "num_tokens": 2651168003.0, "step": 15801 }, { "entropy": 1.6841741700967152, "epoch": 1.7359314492873033, "grad_norm": 0.7538333535194397, "learning_rate": 2.8112876113128094e-06, "loss": 1.2787, "mean_token_accuracy": 0.665538469950358, "num_tokens": 2651282390.0, "step": 15802 }, { "entropy": 1.6514563858509064, "epoch": 1.7360413061986764, "grad_norm": 0.604898989200592, "learning_rate": 2.8106233971401305e-06, "loss": 1.384, "mean_token_accuracy": 0.6566628019014994, "num_tokens": 2651468047.0, "step": 15803 }, { "entropy": 1.6742986639340718, "epoch": 1.736151163110049, "grad_norm": 0.7198217511177063, "learning_rate": 2.8099594421575306e-06, "loss": 1.3387, "mean_token_accuracy": 0.6742167373498281, "num_tokens": 2651638952.0, "step": 15804 }, { "entropy": 1.6387183368206024, "epoch": 1.7362610200214221, "grad_norm": 0.5498782396316528, "learning_rate": 2.8092957463860225e-06, "loss": 1.4036, "mean_token_accuracy": 0.6512420624494553, "num_tokens": 2651855946.0, "step": 15805 }, { "entropy": 1.713132123152415, "epoch": 1.736370876932795, "grad_norm": 0.6297646164894104, "learning_rate": 2.8086323098466127e-06, "loss": 1.2696, "mean_token_accuracy": 0.6640227288007736, "num_tokens": 2652083899.0, "step": 15806 }, { "entropy": 1.7311813334623973, "epoch": 1.736480733844168, "grad_norm": 0.7326252460479736, "learning_rate": 2.8079691325603037e-06, "loss": 1.4061, "mean_token_accuracy": 0.6535738656918207, "num_tokens": 2652252305.0, "step": 15807 }, { "entropy": 1.7351558605829875, "epoch": 1.736590590755541, "grad_norm": 0.809312105178833, "learning_rate": 2.8073062145480766e-06, "loss": 1.6552, "mean_token_accuracy": 0.6350140472253164, "num_tokens": 2652441076.0, "step": 15808 }, { "entropy": 1.7146221995353699, "epoch": 1.736700447666914, "grad_norm": 0.6093737483024597, "learning_rate": 2.806643555830915e-06, "loss": 1.3532, "mean_token_accuracy": 0.6540501813093821, "num_tokens": 2652599894.0, "step": 15809 }, { "entropy": 1.699708640575409, "epoch": 1.7368103045782868, "grad_norm": 0.6803867816925049, "learning_rate": 2.8059811564297957e-06, "loss": 1.3324, "mean_token_accuracy": 0.6549798647562662, "num_tokens": 2652768541.0, "step": 15810 }, { "entropy": 1.7326335211594899, "epoch": 1.7369201614896597, "grad_norm": 0.7687568664550781, "learning_rate": 2.80531901636568e-06, "loss": 1.2987, "mean_token_accuracy": 0.6661215225855509, "num_tokens": 2652899364.0, "step": 15811 }, { "entropy": 1.7323335111141205, "epoch": 1.7370300184010325, "grad_norm": 0.7186509370803833, "learning_rate": 2.804657135659522e-06, "loss": 1.5174, "mean_token_accuracy": 0.6523576378822327, "num_tokens": 2653039239.0, "step": 15812 }, { "entropy": 1.6271824638048809, "epoch": 1.7371398753124057, "grad_norm": 0.7200955152511597, "learning_rate": 2.803995514332277e-06, "loss": 1.2861, "mean_token_accuracy": 0.6787689824899038, "num_tokens": 2653192220.0, "step": 15813 }, { "entropy": 1.6728685200214386, "epoch": 1.7372497322237785, "grad_norm": 0.6453947424888611, "learning_rate": 2.8033341524048764e-06, "loss": 1.443, "mean_token_accuracy": 0.6549021850029627, "num_tokens": 2653384321.0, "step": 15814 }, { "entropy": 1.7225570380687714, "epoch": 1.7373595891351514, "grad_norm": 0.8567750453948975, "learning_rate": 2.802673049898259e-06, "loss": 1.4889, "mean_token_accuracy": 0.6441005816062292, "num_tokens": 2653559173.0, "step": 15815 }, { "entropy": 1.7080976366996765, "epoch": 1.7374694460465245, "grad_norm": 0.6325947046279907, "learning_rate": 2.8020122068333466e-06, "loss": 1.4394, "mean_token_accuracy": 0.6394100387891134, "num_tokens": 2653758046.0, "step": 15816 }, { "entropy": 1.7126984298229218, "epoch": 1.7375793029578972, "grad_norm": 0.7144211530685425, "learning_rate": 2.801351623231051e-06, "loss": 1.4652, "mean_token_accuracy": 0.6530701269706091, "num_tokens": 2653930419.0, "step": 15817 }, { "entropy": 1.7186160882314045, "epoch": 1.7376891598692703, "grad_norm": 0.589459240436554, "learning_rate": 2.8006912991122827e-06, "loss": 1.356, "mean_token_accuracy": 0.6640836447477341, "num_tokens": 2654147898.0, "step": 15818 }, { "entropy": 1.675024002790451, "epoch": 1.7377990167806432, "grad_norm": 0.6387170553207397, "learning_rate": 2.8000312344979434e-06, "loss": 1.5746, "mean_token_accuracy": 0.646162673830986, "num_tokens": 2654364535.0, "step": 15819 }, { "entropy": 1.6666264633337657, "epoch": 1.737908873692016, "grad_norm": 0.7120461463928223, "learning_rate": 2.7993714294089173e-06, "loss": 1.2804, "mean_token_accuracy": 0.6800036976734797, "num_tokens": 2654482190.0, "step": 15820 }, { "entropy": 1.7276875178019206, "epoch": 1.7380187306033892, "grad_norm": 0.818804144859314, "learning_rate": 2.7987118838660903e-06, "loss": 1.2782, "mean_token_accuracy": 0.6826535513003668, "num_tokens": 2654642615.0, "step": 15821 }, { "entropy": 1.7416711151599884, "epoch": 1.738128587514762, "grad_norm": 0.6067622900009155, "learning_rate": 2.7980525978903378e-06, "loss": 1.6374, "mean_token_accuracy": 0.6284281214078268, "num_tokens": 2654834393.0, "step": 15822 }, { "entropy": 1.6803725957870483, "epoch": 1.738238444426135, "grad_norm": 0.5896869897842407, "learning_rate": 2.797393571502524e-06, "loss": 1.455, "mean_token_accuracy": 0.6401728590329488, "num_tokens": 2655091358.0, "step": 15823 }, { "entropy": 1.6684520145257313, "epoch": 1.738348301337508, "grad_norm": 0.6544545292854309, "learning_rate": 2.796734804723507e-06, "loss": 1.2353, "mean_token_accuracy": 0.6802895118792852, "num_tokens": 2655219666.0, "step": 15824 }, { "entropy": 1.7058659692605336, "epoch": 1.7384581582488807, "grad_norm": 0.6241233348846436, "learning_rate": 2.796076297574138e-06, "loss": 1.367, "mean_token_accuracy": 0.6578785528739294, "num_tokens": 2655395368.0, "step": 15825 }, { "entropy": 1.77889946103096, "epoch": 1.7385680151602538, "grad_norm": 0.6710366010665894, "learning_rate": 2.795418050075257e-06, "loss": 1.2793, "mean_token_accuracy": 0.6623863478501638, "num_tokens": 2655516501.0, "step": 15826 }, { "entropy": 1.703927884499232, "epoch": 1.7386778720716267, "grad_norm": 0.6267694234848022, "learning_rate": 2.7947600622476988e-06, "loss": 1.1627, "mean_token_accuracy": 0.6800014326969782, "num_tokens": 2655677633.0, "step": 15827 }, { "entropy": 1.7275777161121368, "epoch": 1.7387877289829996, "grad_norm": 0.6682912111282349, "learning_rate": 2.794102334112285e-06, "loss": 1.3755, "mean_token_accuracy": 0.6576682031154633, "num_tokens": 2655842114.0, "step": 15828 }, { "entropy": 1.6405729452768962, "epoch": 1.7388975858943727, "grad_norm": 0.7250331044197083, "learning_rate": 2.7934448656898357e-06, "loss": 1.3655, "mean_token_accuracy": 0.6576348741849264, "num_tokens": 2656065244.0, "step": 15829 }, { "entropy": 1.694657524426778, "epoch": 1.7390074428057454, "grad_norm": 0.7095337510108948, "learning_rate": 2.7927876570011594e-06, "loss": 1.3874, "mean_token_accuracy": 0.670227994521459, "num_tokens": 2656181627.0, "step": 15830 }, { "entropy": 1.61565363407135, "epoch": 1.7391172997171185, "grad_norm": 3.940749406814575, "learning_rate": 2.7921307080670553e-06, "loss": 1.269, "mean_token_accuracy": 0.6815206309159597, "num_tokens": 2656368268.0, "step": 15831 }, { "entropy": 1.7019450465838115, "epoch": 1.7392271566284914, "grad_norm": 0.9675191640853882, "learning_rate": 2.791474018908314e-06, "loss": 1.5986, "mean_token_accuracy": 0.6489445567131042, "num_tokens": 2656518851.0, "step": 15832 }, { "entropy": 1.6337886949380238, "epoch": 1.7393370135398643, "grad_norm": 0.5778870582580566, "learning_rate": 2.7908175895457224e-06, "loss": 1.3799, "mean_token_accuracy": 0.6472407778104147, "num_tokens": 2656710252.0, "step": 15833 }, { "entropy": 1.7616515358289082, "epoch": 1.7394468704512374, "grad_norm": 0.6655634641647339, "learning_rate": 2.7901614200000536e-06, "loss": 1.531, "mean_token_accuracy": 0.6265707910060883, "num_tokens": 2656985584.0, "step": 15834 }, { "entropy": 1.6581469575564067, "epoch": 1.7395567273626102, "grad_norm": 0.6637037992477417, "learning_rate": 2.789505510292078e-06, "loss": 1.2826, "mean_token_accuracy": 0.6793718685706457, "num_tokens": 2657128235.0, "step": 15835 }, { "entropy": 1.6418430705865223, "epoch": 1.7396665842739831, "grad_norm": 0.6092338562011719, "learning_rate": 2.788849860442554e-06, "loss": 1.2874, "mean_token_accuracy": 0.6675700594981512, "num_tokens": 2657267098.0, "step": 15836 }, { "entropy": 1.710461030403773, "epoch": 1.7397764411853562, "grad_norm": 0.619476854801178, "learning_rate": 2.7881944704722297e-06, "loss": 1.4077, "mean_token_accuracy": 0.6550353765487671, "num_tokens": 2657447186.0, "step": 15837 }, { "entropy": 1.75293172399203, "epoch": 1.739886298096729, "grad_norm": 0.7521857619285583, "learning_rate": 2.7875393404018498e-06, "loss": 1.5018, "mean_token_accuracy": 0.63949865847826, "num_tokens": 2657625193.0, "step": 15838 }, { "entropy": 1.7228674193223317, "epoch": 1.739996155008102, "grad_norm": 0.6770578622817993, "learning_rate": 2.786884470252153e-06, "loss": 1.5646, "mean_token_accuracy": 0.6267157097657522, "num_tokens": 2657814471.0, "step": 15839 }, { "entropy": 1.6907674670219421, "epoch": 1.740106011919475, "grad_norm": 0.5758486986160278, "learning_rate": 2.7862298600438577e-06, "loss": 1.3123, "mean_token_accuracy": 0.6664116332928339, "num_tokens": 2657971780.0, "step": 15840 }, { "entropy": 1.6239832937717438, "epoch": 1.7402158688308478, "grad_norm": 0.6411721110343933, "learning_rate": 2.7855755097976874e-06, "loss": 1.4493, "mean_token_accuracy": 0.6541901677846909, "num_tokens": 2658181782.0, "step": 15841 }, { "entropy": 1.7198786338170369, "epoch": 1.7403257257422209, "grad_norm": 0.695188045501709, "learning_rate": 2.784921419534351e-06, "loss": 1.654, "mean_token_accuracy": 0.6311604132254919, "num_tokens": 2658402574.0, "step": 15842 }, { "entropy": 1.6882243553797405, "epoch": 1.7404355826535935, "grad_norm": 0.6721879839897156, "learning_rate": 2.7842675892745503e-06, "loss": 1.2438, "mean_token_accuracy": 0.671802838643392, "num_tokens": 2658526785.0, "step": 15843 }, { "entropy": 1.742331971724828, "epoch": 1.7405454395649667, "grad_norm": 0.6647438406944275, "learning_rate": 2.7836140190389767e-06, "loss": 1.2945, "mean_token_accuracy": 0.6804736703634262, "num_tokens": 2658650417.0, "step": 15844 }, { "entropy": 1.6729466617107391, "epoch": 1.7406552964763395, "grad_norm": 0.6848008036613464, "learning_rate": 2.7829607088483192e-06, "loss": 1.3458, "mean_token_accuracy": 0.67551389336586, "num_tokens": 2658819700.0, "step": 15845 }, { "entropy": 1.6987472077210743, "epoch": 1.7407651533877124, "grad_norm": 0.7082852125167847, "learning_rate": 2.78230765872325e-06, "loss": 1.4251, "mean_token_accuracy": 0.6490759005149206, "num_tokens": 2659000912.0, "step": 15846 }, { "entropy": 1.7281360030174255, "epoch": 1.7408750102990855, "grad_norm": 0.5942803025245667, "learning_rate": 2.781654868684443e-06, "loss": 1.4627, "mean_token_accuracy": 0.6435799946387609, "num_tokens": 2659232723.0, "step": 15847 }, { "entropy": 1.6801528135935466, "epoch": 1.7409848672104584, "grad_norm": 0.6154281497001648, "learning_rate": 2.7810023387525553e-06, "loss": 1.3461, "mean_token_accuracy": 0.6573351869980494, "num_tokens": 2659406577.0, "step": 15848 }, { "entropy": 1.7473509311676025, "epoch": 1.7410947241218313, "grad_norm": 0.6029602289199829, "learning_rate": 2.780350068948239e-06, "loss": 1.5064, "mean_token_accuracy": 0.6355864902337393, "num_tokens": 2659628818.0, "step": 15849 }, { "entropy": 1.7131327490011852, "epoch": 1.7412045810332044, "grad_norm": 0.7908769845962524, "learning_rate": 2.7796980592921392e-06, "loss": 1.3336, "mean_token_accuracy": 0.6735485146443049, "num_tokens": 2659779586.0, "step": 15850 }, { "entropy": 1.7140113910039265, "epoch": 1.741314437944577, "grad_norm": 0.7202388644218445, "learning_rate": 2.779046309804895e-06, "loss": 1.5897, "mean_token_accuracy": 0.6321464478969574, "num_tokens": 2659934329.0, "step": 15851 }, { "entropy": 1.6853972772757213, "epoch": 1.7414242948559502, "grad_norm": 0.7633290886878967, "learning_rate": 2.7783948205071265e-06, "loss": 1.39, "mean_token_accuracy": 0.649382695555687, "num_tokens": 2660091123.0, "step": 15852 }, { "entropy": 1.6331301033496857, "epoch": 1.741534151767323, "grad_norm": 0.7545872926712036, "learning_rate": 2.7777435914194574e-06, "loss": 1.3173, "mean_token_accuracy": 0.6768523355325063, "num_tokens": 2660240337.0, "step": 15853 }, { "entropy": 1.7424573004245758, "epoch": 1.741644008678696, "grad_norm": 0.7436056137084961, "learning_rate": 2.7770926225625016e-06, "loss": 1.4274, "mean_token_accuracy": 0.6455424477656683, "num_tokens": 2660404045.0, "step": 15854 }, { "entropy": 1.7621293663978577, "epoch": 1.741753865590069, "grad_norm": 0.8202974200248718, "learning_rate": 2.7764419139568572e-06, "loss": 1.4083, "mean_token_accuracy": 0.6570224414269129, "num_tokens": 2660549219.0, "step": 15855 }, { "entropy": 1.7344311475753784, "epoch": 1.7418637225014417, "grad_norm": 0.9669505953788757, "learning_rate": 2.77579146562312e-06, "loss": 1.5783, "mean_token_accuracy": 0.643063947558403, "num_tokens": 2660719518.0, "step": 15856 }, { "entropy": 1.7664933999379475, "epoch": 1.7419735794128148, "grad_norm": 0.7337760925292969, "learning_rate": 2.7751412775818774e-06, "loss": 1.3591, "mean_token_accuracy": 0.6609189411004385, "num_tokens": 2660852111.0, "step": 15857 }, { "entropy": 1.7087633113066356, "epoch": 1.7420834363241877, "grad_norm": 0.6853848099708557, "learning_rate": 2.7744913498537073e-06, "loss": 1.3429, "mean_token_accuracy": 0.6679713129997253, "num_tokens": 2661027069.0, "step": 15858 }, { "entropy": 1.7308754622936249, "epoch": 1.7421932932355606, "grad_norm": 0.6821447610855103, "learning_rate": 2.77384168245918e-06, "loss": 1.2652, "mean_token_accuracy": 0.6845847517251968, "num_tokens": 2661181830.0, "step": 15859 }, { "entropy": 1.6414716045061748, "epoch": 1.7423031501469337, "grad_norm": 1.1366279125213623, "learning_rate": 2.7731922754188574e-06, "loss": 1.192, "mean_token_accuracy": 0.6885305742422739, "num_tokens": 2661407962.0, "step": 15860 }, { "entropy": 1.7026232481002808, "epoch": 1.7424130070583066, "grad_norm": 0.7026152014732361, "learning_rate": 2.77254312875329e-06, "loss": 1.3201, "mean_token_accuracy": 0.6643240998188654, "num_tokens": 2661539074.0, "step": 15861 }, { "entropy": 1.6785250306129456, "epoch": 1.7425228639696795, "grad_norm": 0.7026225924491882, "learning_rate": 2.7718942424830254e-06, "loss": 1.3103, "mean_token_accuracy": 0.6704440861940384, "num_tokens": 2661712726.0, "step": 15862 }, { "entropy": 1.731045385201772, "epoch": 1.7426327208810526, "grad_norm": 0.7580122947692871, "learning_rate": 2.771245616628603e-06, "loss": 1.2901, "mean_token_accuracy": 0.6796207278966904, "num_tokens": 2661827026.0, "step": 15863 }, { "entropy": 1.724742700656255, "epoch": 1.7427425777924253, "grad_norm": 0.8208819031715393, "learning_rate": 2.7705972512105454e-06, "loss": 1.35, "mean_token_accuracy": 0.6589020987351736, "num_tokens": 2661955123.0, "step": 15864 }, { "entropy": 1.7283775707085927, "epoch": 1.7428524347037984, "grad_norm": 0.8843021392822266, "learning_rate": 2.769949146249378e-06, "loss": 1.3974, "mean_token_accuracy": 0.6639308879772822, "num_tokens": 2662121725.0, "step": 15865 }, { "entropy": 1.7786122262477875, "epoch": 1.7429622916151712, "grad_norm": 0.762104332447052, "learning_rate": 2.769301301765612e-06, "loss": 1.3663, "mean_token_accuracy": 0.6581309884786606, "num_tokens": 2662248768.0, "step": 15866 }, { "entropy": 1.7138410607973735, "epoch": 1.7430721485265441, "grad_norm": 0.6937832236289978, "learning_rate": 2.7686537177797523e-06, "loss": 1.3812, "mean_token_accuracy": 0.655176599820455, "num_tokens": 2662403894.0, "step": 15867 }, { "entropy": 1.761600911617279, "epoch": 1.7431820054379172, "grad_norm": 0.6983460783958435, "learning_rate": 2.76800639431229e-06, "loss": 1.5221, "mean_token_accuracy": 0.6477015241980553, "num_tokens": 2662548926.0, "step": 15868 }, { "entropy": 1.7244667708873749, "epoch": 1.74329186234929, "grad_norm": 0.9077073335647583, "learning_rate": 2.767359331383718e-06, "loss": 1.7027, "mean_token_accuracy": 0.6437298407157263, "num_tokens": 2662710873.0, "step": 15869 }, { "entropy": 1.724602371454239, "epoch": 1.743401719260663, "grad_norm": 0.7138944268226624, "learning_rate": 2.766712529014512e-06, "loss": 1.5175, "mean_token_accuracy": 0.6394424885511398, "num_tokens": 2662849006.0, "step": 15870 }, { "entropy": 1.7281849185625713, "epoch": 1.743511576172036, "grad_norm": 0.6491277813911438, "learning_rate": 2.7660659872251465e-06, "loss": 1.4241, "mean_token_accuracy": 0.6524779796600342, "num_tokens": 2663030042.0, "step": 15871 }, { "entropy": 1.7124824225902557, "epoch": 1.7436214330834088, "grad_norm": 1.1152174472808838, "learning_rate": 2.7654197060360814e-06, "loss": 1.3749, "mean_token_accuracy": 0.6579590986172358, "num_tokens": 2663167002.0, "step": 15872 }, { "entropy": 1.7078356345494587, "epoch": 1.7437312899947819, "grad_norm": 0.7276211380958557, "learning_rate": 2.7647736854677713e-06, "loss": 1.3144, "mean_token_accuracy": 0.6709758639335632, "num_tokens": 2663323208.0, "step": 15873 }, { "entropy": 1.692564715941747, "epoch": 1.7438411469061548, "grad_norm": 0.8237895369529724, "learning_rate": 2.7641279255406627e-06, "loss": 1.2983, "mean_token_accuracy": 0.6772258182366689, "num_tokens": 2663464049.0, "step": 15874 }, { "entropy": 1.6918930908044179, "epoch": 1.7439510038175277, "grad_norm": 0.6199659705162048, "learning_rate": 2.763482426275198e-06, "loss": 1.5252, "mean_token_accuracy": 0.6460767934719721, "num_tokens": 2663626836.0, "step": 15875 }, { "entropy": 1.6668222049872081, "epoch": 1.7440608607289008, "grad_norm": 0.6627802848815918, "learning_rate": 2.762837187691799e-06, "loss": 1.442, "mean_token_accuracy": 0.6594364990790685, "num_tokens": 2663781920.0, "step": 15876 }, { "entropy": 1.7106184164683025, "epoch": 1.7441707176402734, "grad_norm": 0.6994909048080444, "learning_rate": 2.762192209810891e-06, "loss": 1.4027, "mean_token_accuracy": 0.6450665394465128, "num_tokens": 2663934972.0, "step": 15877 }, { "entropy": 1.664261003335317, "epoch": 1.7442805745516465, "grad_norm": 0.7002906203269958, "learning_rate": 2.7615474926528897e-06, "loss": 1.4519, "mean_token_accuracy": 0.6614581495523453, "num_tokens": 2664127135.0, "step": 15878 }, { "entropy": 1.7660688559214275, "epoch": 1.7443904314630194, "grad_norm": 0.6302309036254883, "learning_rate": 2.7609030362381985e-06, "loss": 1.3609, "mean_token_accuracy": 0.6554263929526011, "num_tokens": 2664317047.0, "step": 15879 }, { "entropy": 1.723916381597519, "epoch": 1.7445002883743923, "grad_norm": 0.6405919194221497, "learning_rate": 2.76025884058721e-06, "loss": 1.4031, "mean_token_accuracy": 0.656665583451589, "num_tokens": 2664491829.0, "step": 15880 }, { "entropy": 1.698687841494878, "epoch": 1.7446101452857654, "grad_norm": 0.6089337468147278, "learning_rate": 2.7596149057203198e-06, "loss": 1.2856, "mean_token_accuracy": 0.6691889415184656, "num_tokens": 2664635435.0, "step": 15881 }, { "entropy": 1.7116204798221588, "epoch": 1.744720002197138, "grad_norm": 0.698288083076477, "learning_rate": 2.758971231657902e-06, "loss": 1.3977, "mean_token_accuracy": 0.6574417501688004, "num_tokens": 2664771325.0, "step": 15882 }, { "entropy": 1.7519585887591045, "epoch": 1.7448298591085112, "grad_norm": 0.6853853464126587, "learning_rate": 2.758327818420333e-06, "loss": 1.5416, "mean_token_accuracy": 0.643589456876119, "num_tokens": 2665007886.0, "step": 15883 }, { "entropy": 1.688419868548711, "epoch": 1.744939716019884, "grad_norm": 0.6974164247512817, "learning_rate": 2.757684666027975e-06, "loss": 1.5013, "mean_token_accuracy": 0.6468348503112793, "num_tokens": 2665173280.0, "step": 15884 }, { "entropy": 1.7338370283444722, "epoch": 1.745049572931257, "grad_norm": 0.8303990960121155, "learning_rate": 2.757041774501182e-06, "loss": 1.3642, "mean_token_accuracy": 0.6649026970068613, "num_tokens": 2665324438.0, "step": 15885 }, { "entropy": 1.6935912072658539, "epoch": 1.74515942984263, "grad_norm": 0.7695938944816589, "learning_rate": 2.7563991438603017e-06, "loss": 1.4399, "mean_token_accuracy": 0.644097218910853, "num_tokens": 2665475264.0, "step": 15886 }, { "entropy": 1.6517487665017445, "epoch": 1.745269286754003, "grad_norm": 0.6604319214820862, "learning_rate": 2.755756774125678e-06, "loss": 1.2338, "mean_token_accuracy": 0.6787222623825073, "num_tokens": 2665614784.0, "step": 15887 }, { "entropy": 1.7014476756254833, "epoch": 1.7453791436653758, "grad_norm": 0.7577602863311768, "learning_rate": 2.755114665317634e-06, "loss": 1.2983, "mean_token_accuracy": 0.667173316081365, "num_tokens": 2665727625.0, "step": 15888 }, { "entropy": 1.6906098127365112, "epoch": 1.745489000576749, "grad_norm": 0.639340341091156, "learning_rate": 2.754472817456496e-06, "loss": 1.4082, "mean_token_accuracy": 0.6597124834855398, "num_tokens": 2665930344.0, "step": 15889 }, { "entropy": 1.6897248029708862, "epoch": 1.7455988574881216, "grad_norm": 0.665076732635498, "learning_rate": 2.7538312305625775e-06, "loss": 1.3761, "mean_token_accuracy": 0.6529867599407831, "num_tokens": 2666079301.0, "step": 15890 }, { "entropy": 1.7365977764129639, "epoch": 1.7457087143994947, "grad_norm": 0.7009277939796448, "learning_rate": 2.7531899046561862e-06, "loss": 1.3898, "mean_token_accuracy": 0.6456332057714462, "num_tokens": 2666266419.0, "step": 15891 }, { "entropy": 1.7120999991893768, "epoch": 1.7458185713108676, "grad_norm": 0.6818946003913879, "learning_rate": 2.7525488397576173e-06, "loss": 1.379, "mean_token_accuracy": 0.6504452576239904, "num_tokens": 2666457767.0, "step": 15892 }, { "entropy": 1.7892510890960693, "epoch": 1.7459284282222405, "grad_norm": 0.8706643581390381, "learning_rate": 2.751908035887161e-06, "loss": 1.6292, "mean_token_accuracy": 0.6330756644407908, "num_tokens": 2666652930.0, "step": 15893 }, { "entropy": 1.697370360294978, "epoch": 1.7460382851336136, "grad_norm": 0.7226040363311768, "learning_rate": 2.7512674930650974e-06, "loss": 1.4393, "mean_token_accuracy": 0.6513659656047821, "num_tokens": 2666817818.0, "step": 15894 }, { "entropy": 1.7232015530268352, "epoch": 1.7461481420449863, "grad_norm": 0.6826181411743164, "learning_rate": 2.7506272113117044e-06, "loss": 1.5085, "mean_token_accuracy": 0.638033077120781, "num_tokens": 2667047183.0, "step": 15895 }, { "entropy": 1.7857304712136586, "epoch": 1.7462579989563594, "grad_norm": 0.7245029807090759, "learning_rate": 2.74998719064724e-06, "loss": 1.3768, "mean_token_accuracy": 0.6544994562864304, "num_tokens": 2667225814.0, "step": 15896 }, { "entropy": 1.6997434000174205, "epoch": 1.7463678558677322, "grad_norm": 0.5723074078559875, "learning_rate": 2.749347431091963e-06, "loss": 1.4982, "mean_token_accuracy": 0.6429259975751241, "num_tokens": 2667426464.0, "step": 15897 }, { "entropy": 1.696702629327774, "epoch": 1.7464777127791051, "grad_norm": 0.7994809150695801, "learning_rate": 2.748707932666124e-06, "loss": 1.4103, "mean_token_accuracy": 0.6626110722621282, "num_tokens": 2667592380.0, "step": 15898 }, { "entropy": 1.7208527425924938, "epoch": 1.7465875696904782, "grad_norm": 0.7930123209953308, "learning_rate": 2.748068695389961e-06, "loss": 1.2325, "mean_token_accuracy": 0.6823674192031225, "num_tokens": 2667677926.0, "step": 15899 }, { "entropy": 1.7191575070222218, "epoch": 1.7466974266018511, "grad_norm": 0.896288275718689, "learning_rate": 2.7474297192837036e-06, "loss": 1.5166, "mean_token_accuracy": 0.6732039203246435, "num_tokens": 2667810576.0, "step": 15900 }, { "entropy": 1.6295614341894786, "epoch": 1.746807283513224, "grad_norm": 0.7545291185379028, "learning_rate": 2.7467910043675777e-06, "loss": 1.3663, "mean_token_accuracy": 0.6583205610513687, "num_tokens": 2667990884.0, "step": 15901 }, { "entropy": 1.7029616435368855, "epoch": 1.7469171404245971, "grad_norm": 0.6451340913772583, "learning_rate": 2.746152550661797e-06, "loss": 1.4962, "mean_token_accuracy": 0.637171596288681, "num_tokens": 2668221796.0, "step": 15902 }, { "entropy": 1.7344611088434856, "epoch": 1.7470269973359698, "grad_norm": 0.7526887655258179, "learning_rate": 2.74551435818657e-06, "loss": 1.2977, "mean_token_accuracy": 0.6728590279817581, "num_tokens": 2668370216.0, "step": 15903 }, { "entropy": 1.7479794124762218, "epoch": 1.7471368542473429, "grad_norm": 0.6179333925247192, "learning_rate": 2.7448764269620935e-06, "loss": 1.3405, "mean_token_accuracy": 0.6603627453247706, "num_tokens": 2668497399.0, "step": 15904 }, { "entropy": 1.749539703130722, "epoch": 1.7472467111587158, "grad_norm": 0.7413578629493713, "learning_rate": 2.744238757008557e-06, "loss": 1.4768, "mean_token_accuracy": 0.6490618834892908, "num_tokens": 2668630277.0, "step": 15905 }, { "entropy": 1.6794603963692982, "epoch": 1.7473565680700887, "grad_norm": 0.6921920776367188, "learning_rate": 2.7436013483461444e-06, "loss": 1.2721, "mean_token_accuracy": 0.6702764679988226, "num_tokens": 2668781076.0, "step": 15906 }, { "entropy": 1.697002778450648, "epoch": 1.7474664249814618, "grad_norm": 0.6494120955467224, "learning_rate": 2.742964200995031e-06, "loss": 1.4141, "mean_token_accuracy": 0.6404003153244654, "num_tokens": 2668988627.0, "step": 15907 }, { "entropy": 1.6904459396998088, "epoch": 1.7475762818928344, "grad_norm": 1.0672904253005981, "learning_rate": 2.7423273149753772e-06, "loss": 1.3819, "mean_token_accuracy": 0.6539155195156733, "num_tokens": 2669160412.0, "step": 15908 }, { "entropy": 1.7080066402753193, "epoch": 1.7476861388042075, "grad_norm": 0.6226124167442322, "learning_rate": 2.7416906903073428e-06, "loss": 1.4143, "mean_token_accuracy": 0.6591801842053732, "num_tokens": 2669354488.0, "step": 15909 }, { "entropy": 1.7091161111990611, "epoch": 1.7477959957155804, "grad_norm": 0.7031316757202148, "learning_rate": 2.7410543270110783e-06, "loss": 1.4967, "mean_token_accuracy": 0.6567925910154978, "num_tokens": 2669527028.0, "step": 15910 }, { "entropy": 1.66484734416008, "epoch": 1.7479058526269533, "grad_norm": 0.5733075737953186, "learning_rate": 2.7404182251067223e-06, "loss": 1.3354, "mean_token_accuracy": 0.6688386301199595, "num_tokens": 2669743539.0, "step": 15911 }, { "entropy": 1.682132512331009, "epoch": 1.7480157095383264, "grad_norm": 0.7437959313392639, "learning_rate": 2.739782384614407e-06, "loss": 1.4501, "mean_token_accuracy": 0.6545865833759308, "num_tokens": 2669897155.0, "step": 15912 }, { "entropy": 1.715892086426417, "epoch": 1.7481255664496993, "grad_norm": 0.6199830174446106, "learning_rate": 2.7391468055542573e-06, "loss": 1.3871, "mean_token_accuracy": 0.6614372233549753, "num_tokens": 2670031369.0, "step": 15913 }, { "entropy": 1.7419675091902416, "epoch": 1.7482354233610722, "grad_norm": 0.6958132386207581, "learning_rate": 2.7385114879463886e-06, "loss": 1.466, "mean_token_accuracy": 0.6350632160902023, "num_tokens": 2670226720.0, "step": 15914 }, { "entropy": 1.7340802152951558, "epoch": 1.7483452802724453, "grad_norm": 0.6458380818367004, "learning_rate": 2.73787643181091e-06, "loss": 1.3632, "mean_token_accuracy": 0.651315172513326, "num_tokens": 2670367360.0, "step": 15915 }, { "entropy": 1.7228031158447266, "epoch": 1.748455137183818, "grad_norm": 0.8244587779045105, "learning_rate": 2.7372416371679196e-06, "loss": 1.3318, "mean_token_accuracy": 0.6637958685557047, "num_tokens": 2670503435.0, "step": 15916 }, { "entropy": 1.730597714583079, "epoch": 1.748564994095191, "grad_norm": 0.6726572513580322, "learning_rate": 2.7366071040375055e-06, "loss": 1.4834, "mean_token_accuracy": 0.6449510852495829, "num_tokens": 2670754908.0, "step": 15917 }, { "entropy": 1.697900931040446, "epoch": 1.748674851006564, "grad_norm": 0.6586857438087463, "learning_rate": 2.7359728324397527e-06, "loss": 1.3441, "mean_token_accuracy": 0.6652401685714722, "num_tokens": 2670943327.0, "step": 15918 }, { "entropy": 1.742193837960561, "epoch": 1.7487847079179368, "grad_norm": 0.7539701461791992, "learning_rate": 2.73533882239474e-06, "loss": 1.4356, "mean_token_accuracy": 0.6551166623830795, "num_tokens": 2671090212.0, "step": 15919 }, { "entropy": 1.7344173789024353, "epoch": 1.74889456482931, "grad_norm": 0.6886321902275085, "learning_rate": 2.7347050739225255e-06, "loss": 1.4913, "mean_token_accuracy": 0.6443561265865961, "num_tokens": 2671261264.0, "step": 15920 }, { "entropy": 1.654103030761083, "epoch": 1.7490044217406826, "grad_norm": 0.6048797965049744, "learning_rate": 2.734071587043172e-06, "loss": 1.3701, "mean_token_accuracy": 0.6593893716732661, "num_tokens": 2671423561.0, "step": 15921 }, { "entropy": 1.673624058564504, "epoch": 1.7491142786520557, "grad_norm": 0.6645485758781433, "learning_rate": 2.733438361776729e-06, "loss": 1.4122, "mean_token_accuracy": 0.6572486211856207, "num_tokens": 2671579500.0, "step": 15922 }, { "entropy": 1.7188350359598796, "epoch": 1.7492241355634286, "grad_norm": 0.7232167720794678, "learning_rate": 2.7328053981432373e-06, "loss": 1.3182, "mean_token_accuracy": 0.6738363355398178, "num_tokens": 2671713076.0, "step": 15923 }, { "entropy": 1.6765947341918945, "epoch": 1.7493339924748015, "grad_norm": 0.7521688342094421, "learning_rate": 2.7321726961627272e-06, "loss": 1.3101, "mean_token_accuracy": 0.6763416528701782, "num_tokens": 2671916077.0, "step": 15924 }, { "entropy": 1.6944889426231384, "epoch": 1.7494438493861746, "grad_norm": 0.6953690648078918, "learning_rate": 2.731540255855228e-06, "loss": 1.3638, "mean_token_accuracy": 0.6754336108764013, "num_tokens": 2672113899.0, "step": 15925 }, { "entropy": 1.6916892528533936, "epoch": 1.7495537062975475, "grad_norm": 0.6988839507102966, "learning_rate": 2.7309080772407513e-06, "loss": 1.3953, "mean_token_accuracy": 0.6575096398591995, "num_tokens": 2672290280.0, "step": 15926 }, { "entropy": 1.7188960711161296, "epoch": 1.7496635632089204, "grad_norm": 0.7189807295799255, "learning_rate": 2.7302761603393102e-06, "loss": 1.475, "mean_token_accuracy": 0.645105391740799, "num_tokens": 2672469646.0, "step": 15927 }, { "entropy": 1.6774901350339253, "epoch": 1.7497734201202935, "grad_norm": 0.761957585811615, "learning_rate": 2.7296445051709012e-06, "loss": 1.3203, "mean_token_accuracy": 0.6618892600138983, "num_tokens": 2672626896.0, "step": 15928 }, { "entropy": 1.6909812192122142, "epoch": 1.7498832770316661, "grad_norm": 0.5992552638053894, "learning_rate": 2.7290131117555164e-06, "loss": 1.3377, "mean_token_accuracy": 0.6602785636981329, "num_tokens": 2672830089.0, "step": 15929 }, { "entropy": 1.7229216794172924, "epoch": 1.7499931339430392, "grad_norm": 0.6808377504348755, "learning_rate": 2.7283819801131393e-06, "loss": 1.3918, "mean_token_accuracy": 0.6523537784814835, "num_tokens": 2673004119.0, "step": 15930 }, { "entropy": 1.8220161596934001, "epoch": 1.7501029908544121, "grad_norm": 0.7409783601760864, "learning_rate": 2.727751110263749e-06, "loss": 1.3421, "mean_token_accuracy": 0.6639614452918371, "num_tokens": 2673106484.0, "step": 15931 }, { "entropy": 1.6994484464327495, "epoch": 1.750212847765785, "grad_norm": 0.6716399192810059, "learning_rate": 2.7271205022273044e-06, "loss": 1.4798, "mean_token_accuracy": 0.6541763444741567, "num_tokens": 2673266017.0, "step": 15932 }, { "entropy": 1.7303711573282878, "epoch": 1.7503227046771581, "grad_norm": 0.7343233823776245, "learning_rate": 2.7264901560237685e-06, "loss": 1.4671, "mean_token_accuracy": 0.6574054459730784, "num_tokens": 2673426360.0, "step": 15933 }, { "entropy": 1.6783941288789113, "epoch": 1.7504325615885308, "grad_norm": 0.6652559638023376, "learning_rate": 2.725860071673093e-06, "loss": 1.2442, "mean_token_accuracy": 0.6868862261374792, "num_tokens": 2673564234.0, "step": 15934 }, { "entropy": 1.7397794624169667, "epoch": 1.7505424184999039, "grad_norm": 0.7660438418388367, "learning_rate": 2.7252302491952166e-06, "loss": 1.5224, "mean_token_accuracy": 0.6459332555532455, "num_tokens": 2673745150.0, "step": 15935 }, { "entropy": 1.6459727088610332, "epoch": 1.7506522754112768, "grad_norm": 1.2467116117477417, "learning_rate": 2.724600688610073e-06, "loss": 1.157, "mean_token_accuracy": 0.6878319978713989, "num_tokens": 2673974532.0, "step": 15936 }, { "entropy": 1.6995637615521748, "epoch": 1.7507621323226497, "grad_norm": 0.6141746640205383, "learning_rate": 2.723971389937591e-06, "loss": 1.4254, "mean_token_accuracy": 0.6578077226877213, "num_tokens": 2674144964.0, "step": 15937 }, { "entropy": 1.785159985224406, "epoch": 1.7508719892340228, "grad_norm": 0.7061741352081299, "learning_rate": 2.7233423531976827e-06, "loss": 1.3695, "mean_token_accuracy": 0.6479012419780096, "num_tokens": 2674288702.0, "step": 15938 }, { "entropy": 1.7027852634588878, "epoch": 1.7509818461453956, "grad_norm": 0.7895951271057129, "learning_rate": 2.7227135784102622e-06, "loss": 1.4436, "mean_token_accuracy": 0.6551746229330698, "num_tokens": 2674455082.0, "step": 15939 }, { "entropy": 1.7255665163199108, "epoch": 1.7510917030567685, "grad_norm": 0.6441416144371033, "learning_rate": 2.722085065595226e-06, "loss": 1.6238, "mean_token_accuracy": 0.6162713964780172, "num_tokens": 2674717678.0, "step": 15940 }, { "entropy": 1.6721422374248505, "epoch": 1.7512015599681416, "grad_norm": 0.6975085735321045, "learning_rate": 2.7214568147724656e-06, "loss": 1.3447, "mean_token_accuracy": 0.6642735848824183, "num_tokens": 2674888510.0, "step": 15941 }, { "entropy": 1.70290403564771, "epoch": 1.7513114168795143, "grad_norm": 0.7391501665115356, "learning_rate": 2.7208288259618674e-06, "loss": 1.2404, "mean_token_accuracy": 0.66946313281854, "num_tokens": 2675027189.0, "step": 15942 }, { "entropy": 1.662659337123235, "epoch": 1.7514212737908874, "grad_norm": 0.7047905921936035, "learning_rate": 2.720201099183309e-06, "loss": 1.4786, "mean_token_accuracy": 0.6650376369555792, "num_tokens": 2675168738.0, "step": 15943 }, { "entropy": 1.767273743947347, "epoch": 1.7515311307022603, "grad_norm": 0.7692185640335083, "learning_rate": 2.719573634456652e-06, "loss": 1.2778, "mean_token_accuracy": 0.6670770943164825, "num_tokens": 2675281899.0, "step": 15944 }, { "entropy": 1.7268462379773457, "epoch": 1.7516409876136332, "grad_norm": 0.6072038412094116, "learning_rate": 2.7189464318017572e-06, "loss": 1.5135, "mean_token_accuracy": 0.6426151494185129, "num_tokens": 2675495851.0, "step": 15945 }, { "entropy": 1.7973586320877075, "epoch": 1.7517508445250063, "grad_norm": 0.7870144248008728, "learning_rate": 2.718319491238479e-06, "loss": 1.4825, "mean_token_accuracy": 0.6487486610809962, "num_tokens": 2675698102.0, "step": 15946 }, { "entropy": 1.6900157729784648, "epoch": 1.751860701436379, "grad_norm": 0.6972094178199768, "learning_rate": 2.7176928127866565e-06, "loss": 1.4772, "mean_token_accuracy": 0.6373498241106669, "num_tokens": 2675859370.0, "step": 15947 }, { "entropy": 1.7243396242459614, "epoch": 1.751970558347752, "grad_norm": 0.7015838623046875, "learning_rate": 2.7170663964661246e-06, "loss": 1.5138, "mean_token_accuracy": 0.6468863636255264, "num_tokens": 2676047567.0, "step": 15948 }, { "entropy": 1.714859535296758, "epoch": 1.752080415259125, "grad_norm": 0.6738941669464111, "learning_rate": 2.716440242296707e-06, "loss": 1.3198, "mean_token_accuracy": 0.6629375318686167, "num_tokens": 2676207126.0, "step": 15949 }, { "entropy": 1.64927805463473, "epoch": 1.7521902721704978, "grad_norm": 0.6364519000053406, "learning_rate": 2.715814350298223e-06, "loss": 1.2985, "mean_token_accuracy": 0.6785600632429123, "num_tokens": 2676349632.0, "step": 15950 }, { "entropy": 1.7328944404919941, "epoch": 1.752300129081871, "grad_norm": 0.7407512068748474, "learning_rate": 2.715188720490486e-06, "loss": 1.3626, "mean_token_accuracy": 0.6627685775359472, "num_tokens": 2676480646.0, "step": 15951 }, { "entropy": 1.6649740636348724, "epoch": 1.7524099859932438, "grad_norm": 0.6273338794708252, "learning_rate": 2.7145633528932884e-06, "loss": 1.2986, "mean_token_accuracy": 0.6614208469788233, "num_tokens": 2676625775.0, "step": 15952 }, { "entropy": 1.7441307703653972, "epoch": 1.7525198429046167, "grad_norm": 0.6636318564414978, "learning_rate": 2.713938247526428e-06, "loss": 1.3612, "mean_token_accuracy": 0.659666990240415, "num_tokens": 2676765131.0, "step": 15953 }, { "entropy": 1.691186914841334, "epoch": 1.7526296998159898, "grad_norm": 0.6902927160263062, "learning_rate": 2.7133134044096894e-06, "loss": 1.2746, "mean_token_accuracy": 0.6825152337551117, "num_tokens": 2676943845.0, "step": 15954 }, { "entropy": 1.6996070841948192, "epoch": 1.7527395567273625, "grad_norm": 0.6229896545410156, "learning_rate": 2.7126888235628484e-06, "loss": 1.4192, "mean_token_accuracy": 0.6589094599088033, "num_tokens": 2677124904.0, "step": 15955 }, { "entropy": 1.7306743164857228, "epoch": 1.7528494136387356, "grad_norm": 0.8987564444541931, "learning_rate": 2.7120645050056693e-06, "loss": 1.646, "mean_token_accuracy": 0.6419266114632288, "num_tokens": 2677305092.0, "step": 15956 }, { "entropy": 1.703240692615509, "epoch": 1.7529592705501085, "grad_norm": 0.6768248081207275, "learning_rate": 2.711440448757916e-06, "loss": 1.3706, "mean_token_accuracy": 0.6598798781633377, "num_tokens": 2677451996.0, "step": 15957 }, { "entropy": 1.68047430117925, "epoch": 1.7530691274614814, "grad_norm": 0.7604206204414368, "learning_rate": 2.7108166548393355e-06, "loss": 1.3357, "mean_token_accuracy": 0.6692562450965246, "num_tokens": 2677595564.0, "step": 15958 }, { "entropy": 1.6513379216194153, "epoch": 1.7531789843728545, "grad_norm": 0.7215495705604553, "learning_rate": 2.710193123269674e-06, "loss": 1.4028, "mean_token_accuracy": 0.668435071905454, "num_tokens": 2677747003.0, "step": 15959 }, { "entropy": 1.7083741823832195, "epoch": 1.7532888412842271, "grad_norm": 0.792725145816803, "learning_rate": 2.7095698540686656e-06, "loss": 1.4943, "mean_token_accuracy": 0.6499527543783188, "num_tokens": 2677977238.0, "step": 15960 }, { "entropy": 1.6812816560268402, "epoch": 1.7533986981956002, "grad_norm": 0.7482911944389343, "learning_rate": 2.7089468472560337e-06, "loss": 1.4457, "mean_token_accuracy": 0.6625443349281946, "num_tokens": 2678174371.0, "step": 15961 }, { "entropy": 1.7282609542210896, "epoch": 1.7535085551069731, "grad_norm": 0.6054615378379822, "learning_rate": 2.708324102851498e-06, "loss": 1.4013, "mean_token_accuracy": 0.6478755126396815, "num_tokens": 2678327959.0, "step": 15962 }, { "entropy": 1.6690288086732228, "epoch": 1.753618412018346, "grad_norm": 0.6034241318702698, "learning_rate": 2.707701620874771e-06, "loss": 1.447, "mean_token_accuracy": 0.6598279525836309, "num_tokens": 2678507673.0, "step": 15963 }, { "entropy": 1.7199995517730713, "epoch": 1.7537282689297191, "grad_norm": 0.5845739841461182, "learning_rate": 2.707079401345548e-06, "loss": 1.4097, "mean_token_accuracy": 0.6425377229849497, "num_tokens": 2678686394.0, "step": 15964 }, { "entropy": 1.7174680133660634, "epoch": 1.753838125841092, "grad_norm": 0.7504869103431702, "learning_rate": 2.7064574442835244e-06, "loss": 1.2004, "mean_token_accuracy": 0.6825359563032786, "num_tokens": 2678832333.0, "step": 15965 }, { "entropy": 1.7301206588745117, "epoch": 1.7539479827524649, "grad_norm": 0.7699616551399231, "learning_rate": 2.705835749708389e-06, "loss": 1.3751, "mean_token_accuracy": 0.6503161787986755, "num_tokens": 2678975215.0, "step": 15966 }, { "entropy": 1.6766011317571003, "epoch": 1.754057839663838, "grad_norm": 0.6368605494499207, "learning_rate": 2.705214317639813e-06, "loss": 1.3711, "mean_token_accuracy": 0.6545276641845703, "num_tokens": 2679134834.0, "step": 15967 }, { "entropy": 1.6891018450260162, "epoch": 1.7541676965752107, "grad_norm": 0.6660163402557373, "learning_rate": 2.7045931480974647e-06, "loss": 1.481, "mean_token_accuracy": 0.6436668932437897, "num_tokens": 2679370650.0, "step": 15968 }, { "entropy": 1.6860856016476948, "epoch": 1.7542775534865838, "grad_norm": 0.6828413009643555, "learning_rate": 2.7039722411010077e-06, "loss": 1.3477, "mean_token_accuracy": 0.6638344178597132, "num_tokens": 2679515052.0, "step": 15969 }, { "entropy": 1.707839588324229, "epoch": 1.7543874103979566, "grad_norm": 0.5549116134643555, "learning_rate": 2.703351596670089e-06, "loss": 1.3906, "mean_token_accuracy": 0.6582557906707128, "num_tokens": 2679697943.0, "step": 15970 }, { "entropy": 1.6483833988507588, "epoch": 1.7544972673093295, "grad_norm": 0.6756875514984131, "learning_rate": 2.7027312148243552e-06, "loss": 1.2794, "mean_token_accuracy": 0.6740471869707108, "num_tokens": 2679854587.0, "step": 15971 }, { "entropy": 1.7412736018498738, "epoch": 1.7546071242207026, "grad_norm": 0.6762500405311584, "learning_rate": 2.7021110955834397e-06, "loss": 1.3442, "mean_token_accuracy": 0.6655664046605428, "num_tokens": 2679989455.0, "step": 15972 }, { "entropy": 1.7418133318424225, "epoch": 1.7547169811320755, "grad_norm": 0.702103316783905, "learning_rate": 2.701491238966968e-06, "loss": 1.4733, "mean_token_accuracy": 0.6563413143157959, "num_tokens": 2680144527.0, "step": 15973 }, { "entropy": 1.7465166052182515, "epoch": 1.7548268380434484, "grad_norm": 0.659724771976471, "learning_rate": 2.700871644994558e-06, "loss": 1.6231, "mean_token_accuracy": 0.6223872403303782, "num_tokens": 2680367260.0, "step": 15974 }, { "entropy": 1.7185083429018657, "epoch": 1.7549366949548213, "grad_norm": 0.6778896450996399, "learning_rate": 2.7002523136858243e-06, "loss": 1.297, "mean_token_accuracy": 0.659514586130778, "num_tokens": 2680520101.0, "step": 15975 }, { "entropy": 1.6928850710391998, "epoch": 1.7550465518661942, "grad_norm": 0.6215304136276245, "learning_rate": 2.699633245060362e-06, "loss": 1.3566, "mean_token_accuracy": 0.6550732006629308, "num_tokens": 2680714804.0, "step": 15976 }, { "entropy": 1.636595219373703, "epoch": 1.7551564087775673, "grad_norm": 0.640494704246521, "learning_rate": 2.6990144391377672e-06, "loss": 1.4246, "mean_token_accuracy": 0.6581357816855112, "num_tokens": 2680900096.0, "step": 15977 }, { "entropy": 1.6510383188724518, "epoch": 1.7552662656889402, "grad_norm": 0.5748199820518494, "learning_rate": 2.698395895937627e-06, "loss": 1.3701, "mean_token_accuracy": 0.6559861749410629, "num_tokens": 2681112878.0, "step": 15978 }, { "entropy": 1.7111006379127502, "epoch": 1.755376122600313, "grad_norm": 0.6022339463233948, "learning_rate": 2.6977776154795143e-06, "loss": 1.43, "mean_token_accuracy": 0.6573653519153595, "num_tokens": 2681330192.0, "step": 15979 }, { "entropy": 1.7234566509723663, "epoch": 1.7554859795116862, "grad_norm": 0.7855963706970215, "learning_rate": 2.6971595977829986e-06, "loss": 1.4615, "mean_token_accuracy": 0.6453143805265427, "num_tokens": 2681475961.0, "step": 15980 }, { "entropy": 1.7315457065900166, "epoch": 1.7555958364230588, "grad_norm": 0.6155387759208679, "learning_rate": 2.6965418428676416e-06, "loss": 1.5345, "mean_token_accuracy": 0.6340660750865936, "num_tokens": 2681718898.0, "step": 15981 }, { "entropy": 1.6927696069081624, "epoch": 1.755705693334432, "grad_norm": 0.6210948824882507, "learning_rate": 2.695924350752992e-06, "loss": 1.4177, "mean_token_accuracy": 0.6520951439936956, "num_tokens": 2681871307.0, "step": 15982 }, { "entropy": 1.703049937884013, "epoch": 1.7558155502458048, "grad_norm": 0.6101760268211365, "learning_rate": 2.695307121458597e-06, "loss": 1.5486, "mean_token_accuracy": 0.6360116451978683, "num_tokens": 2682062747.0, "step": 15983 }, { "entropy": 1.7253990471363068, "epoch": 1.7559254071571777, "grad_norm": 0.8363544940948486, "learning_rate": 2.694690155003989e-06, "loss": 1.5341, "mean_token_accuracy": 0.6490476578474045, "num_tokens": 2682217967.0, "step": 15984 }, { "entropy": 1.7305609087149303, "epoch": 1.7560352640685508, "grad_norm": 0.6613196730613708, "learning_rate": 2.694073451408693e-06, "loss": 1.4157, "mean_token_accuracy": 0.6571665753920873, "num_tokens": 2682358141.0, "step": 15985 }, { "entropy": 1.683866063753764, "epoch": 1.7561451209799237, "grad_norm": 0.6239186525344849, "learning_rate": 2.69345701069223e-06, "loss": 1.412, "mean_token_accuracy": 0.6451130757729212, "num_tokens": 2682550962.0, "step": 15986 }, { "entropy": 1.7141178448994954, "epoch": 1.7562549778912966, "grad_norm": 0.6861118674278259, "learning_rate": 2.6928408328741128e-06, "loss": 1.5234, "mean_token_accuracy": 0.6307132889827093, "num_tokens": 2682768682.0, "step": 15987 }, { "entropy": 1.6778662502765656, "epoch": 1.7563648348026695, "grad_norm": 0.7148086428642273, "learning_rate": 2.692224917973837e-06, "loss": 1.279, "mean_token_accuracy": 0.6807506283124288, "num_tokens": 2682890200.0, "step": 15988 }, { "entropy": 1.680614044268926, "epoch": 1.7564746917140424, "grad_norm": 0.7254914045333862, "learning_rate": 2.6916092660108985e-06, "loss": 1.524, "mean_token_accuracy": 0.6463294724623362, "num_tokens": 2683094770.0, "step": 15989 }, { "entropy": 1.6979198157787323, "epoch": 1.7565845486254155, "grad_norm": 0.6991070508956909, "learning_rate": 2.690993877004785e-06, "loss": 1.5171, "mean_token_accuracy": 0.6333752622207006, "num_tokens": 2683283703.0, "step": 15990 }, { "entropy": 1.728099246819814, "epoch": 1.7566944055367884, "grad_norm": 0.6582128405570984, "learning_rate": 2.69037875097497e-06, "loss": 1.2978, "mean_token_accuracy": 0.6661944588025411, "num_tokens": 2683408690.0, "step": 15991 }, { "entropy": 1.6826795637607574, "epoch": 1.7568042624481612, "grad_norm": 0.7262701392173767, "learning_rate": 2.6897638879409228e-06, "loss": 1.4638, "mean_token_accuracy": 0.6540475736061732, "num_tokens": 2683550680.0, "step": 15992 }, { "entropy": 1.6950383583704631, "epoch": 1.7569141193595343, "grad_norm": 0.7318074703216553, "learning_rate": 2.689149287922105e-06, "loss": 1.5337, "mean_token_accuracy": 0.6470025032758713, "num_tokens": 2683732547.0, "step": 15993 }, { "entropy": 1.6813922425111134, "epoch": 1.757023976270907, "grad_norm": 0.6637877225875854, "learning_rate": 2.6885349509379667e-06, "loss": 1.4379, "mean_token_accuracy": 0.6517617652813593, "num_tokens": 2683904804.0, "step": 15994 }, { "entropy": 1.6451501250267029, "epoch": 1.7571338331822801, "grad_norm": 0.5915562510490417, "learning_rate": 2.687920877007952e-06, "loss": 1.3513, "mean_token_accuracy": 0.6604169209798177, "num_tokens": 2684077734.0, "step": 15995 }, { "entropy": 1.7052603960037231, "epoch": 1.757243690093653, "grad_norm": 0.6788076162338257, "learning_rate": 2.6873070661514966e-06, "loss": 1.3391, "mean_token_accuracy": 0.6597078988949457, "num_tokens": 2684211351.0, "step": 15996 }, { "entropy": 1.8529831767082214, "epoch": 1.7573535470050259, "grad_norm": 0.8095588088035583, "learning_rate": 2.6866935183880246e-06, "loss": 1.372, "mean_token_accuracy": 0.6530237297217051, "num_tokens": 2684339090.0, "step": 15997 }, { "entropy": 1.6758296092351277, "epoch": 1.757463403916399, "grad_norm": 0.8423399329185486, "learning_rate": 2.6860802337369574e-06, "loss": 1.2942, "mean_token_accuracy": 0.6831070631742477, "num_tokens": 2684480747.0, "step": 15998 }, { "entropy": 1.7313259641329448, "epoch": 1.7575732608277719, "grad_norm": 0.6407238841056824, "learning_rate": 2.685467212217708e-06, "loss": 1.3715, "mean_token_accuracy": 0.6576495319604874, "num_tokens": 2684654860.0, "step": 15999 }, { "entropy": 1.68729371825854, "epoch": 1.7576831177391448, "grad_norm": 0.6778021454811096, "learning_rate": 2.6848544538496708e-06, "loss": 1.3707, "mean_token_accuracy": 0.6608982980251312, "num_tokens": 2684843261.0, "step": 16000 }, { "entropy": 1.71370596686999, "epoch": 1.7577929746505176, "grad_norm": 0.6387611031532288, "learning_rate": 2.6842419586522438e-06, "loss": 1.4439, "mean_token_accuracy": 0.6582885235548019, "num_tokens": 2684991799.0, "step": 16001 }, { "entropy": 1.7273413042227428, "epoch": 1.7579028315618905, "grad_norm": 0.6538326144218445, "learning_rate": 2.6836297266448132e-06, "loss": 1.4684, "mean_token_accuracy": 0.6439621796210607, "num_tokens": 2685157892.0, "step": 16002 }, { "entropy": 1.6686415771643321, "epoch": 1.7580126884732636, "grad_norm": 0.7002345323562622, "learning_rate": 2.6830177578467538e-06, "loss": 1.2251, "mean_token_accuracy": 0.6772776246070862, "num_tokens": 2685281540.0, "step": 16003 }, { "entropy": 1.7180415491263072, "epoch": 1.7581225453846365, "grad_norm": 0.6955657601356506, "learning_rate": 2.6824060522774324e-06, "loss": 1.3035, "mean_token_accuracy": 0.6689807226260504, "num_tokens": 2685410847.0, "step": 16004 }, { "entropy": 1.7041480839252472, "epoch": 1.7582324022960094, "grad_norm": 0.7212827205657959, "learning_rate": 2.6817946099562144e-06, "loss": 1.4209, "mean_token_accuracy": 0.6546263992786407, "num_tokens": 2685550734.0, "step": 16005 }, { "entropy": 1.7181178629398346, "epoch": 1.7583422592073825, "grad_norm": 0.927869439125061, "learning_rate": 2.6811834309024464e-06, "loss": 1.5715, "mean_token_accuracy": 0.6469027449687322, "num_tokens": 2685780924.0, "step": 16006 }, { "entropy": 1.6185656785964966, "epoch": 1.7584521161187552, "grad_norm": 0.8028410077095032, "learning_rate": 2.6805725151354767e-06, "loss": 1.3453, "mean_token_accuracy": 0.6656514505545298, "num_tokens": 2685995927.0, "step": 16007 }, { "entropy": 1.7140028874079387, "epoch": 1.7585619730301283, "grad_norm": 0.5886359214782715, "learning_rate": 2.6799618626746373e-06, "loss": 1.3963, "mean_token_accuracy": 0.6488246818383535, "num_tokens": 2686183571.0, "step": 16008 }, { "entropy": 1.7340856492519379, "epoch": 1.7586718299415012, "grad_norm": 0.7170657515525818, "learning_rate": 2.679351473539254e-06, "loss": 1.3313, "mean_token_accuracy": 0.6784360110759735, "num_tokens": 2686339779.0, "step": 16009 }, { "entropy": 1.7010501722494762, "epoch": 1.758781686852874, "grad_norm": 0.708954930305481, "learning_rate": 2.678741347748649e-06, "loss": 1.3835, "mean_token_accuracy": 0.6587957243124644, "num_tokens": 2686483449.0, "step": 16010 }, { "entropy": 1.7321734031041462, "epoch": 1.7588915437642472, "grad_norm": 0.850824773311615, "learning_rate": 2.6781314853221317e-06, "loss": 1.2767, "mean_token_accuracy": 0.6718885898590088, "num_tokens": 2686619532.0, "step": 16011 }, { "entropy": 1.7261533737182617, "epoch": 1.75900140067562, "grad_norm": 0.6615371704101562, "learning_rate": 2.677521886279e-06, "loss": 1.3841, "mean_token_accuracy": 0.657954066991806, "num_tokens": 2686772792.0, "step": 16012 }, { "entropy": 1.6760503153006236, "epoch": 1.759111257586993, "grad_norm": 0.6881850957870483, "learning_rate": 2.676912550638553e-06, "loss": 1.317, "mean_token_accuracy": 0.6734907428423563, "num_tokens": 2686959332.0, "step": 16013 }, { "entropy": 1.7255980670452118, "epoch": 1.7592211144983658, "grad_norm": 0.8105875849723816, "learning_rate": 2.6763034784200714e-06, "loss": 1.3294, "mean_token_accuracy": 0.6656341602404913, "num_tokens": 2687089986.0, "step": 16014 }, { "entropy": 1.7051123281319935, "epoch": 1.7593309714097387, "grad_norm": 0.7158269882202148, "learning_rate": 2.675694669642835e-06, "loss": 1.3558, "mean_token_accuracy": 0.6500856876373291, "num_tokens": 2687263913.0, "step": 16015 }, { "entropy": 1.632367382446925, "epoch": 1.7594408283211118, "grad_norm": 0.6158329844474792, "learning_rate": 2.6750861243261116e-06, "loss": 1.4107, "mean_token_accuracy": 0.6531671682993571, "num_tokens": 2687432094.0, "step": 16016 }, { "entropy": 1.64411657055219, "epoch": 1.7595506852324847, "grad_norm": 0.7069551348686218, "learning_rate": 2.6744778424891593e-06, "loss": 1.2452, "mean_token_accuracy": 0.681958943605423, "num_tokens": 2687545345.0, "step": 16017 }, { "entropy": 1.7302074233690898, "epoch": 1.7596605421438576, "grad_norm": 0.922947108745575, "learning_rate": 2.673869824151233e-06, "loss": 1.411, "mean_token_accuracy": 0.6597683926423391, "num_tokens": 2687702086.0, "step": 16018 }, { "entropy": 1.6862739821275075, "epoch": 1.7597703990552307, "grad_norm": 0.611757218837738, "learning_rate": 2.6732620693315747e-06, "loss": 1.3634, "mean_token_accuracy": 0.6643515825271606, "num_tokens": 2687873391.0, "step": 16019 }, { "entropy": 1.6990788380304973, "epoch": 1.7598802559666034, "grad_norm": 0.6882119178771973, "learning_rate": 2.672654578049421e-06, "loss": 1.6048, "mean_token_accuracy": 0.6338710337877274, "num_tokens": 2688115401.0, "step": 16020 }, { "entropy": 1.762443095445633, "epoch": 1.7599901128779765, "grad_norm": 0.6838318109512329, "learning_rate": 2.6720473503239965e-06, "loss": 1.4519, "mean_token_accuracy": 0.6481083780527115, "num_tokens": 2688307828.0, "step": 16021 }, { "entropy": 1.6223317682743073, "epoch": 1.7600999697893494, "grad_norm": 0.7780981063842773, "learning_rate": 2.67144038617452e-06, "loss": 1.2702, "mean_token_accuracy": 0.6707681715488434, "num_tokens": 2688478830.0, "step": 16022 }, { "entropy": 1.7924179633458455, "epoch": 1.7602098267007222, "grad_norm": 0.693706214427948, "learning_rate": 2.670833685620204e-06, "loss": 1.4936, "mean_token_accuracy": 0.6311075091362, "num_tokens": 2688628557.0, "step": 16023 }, { "entropy": 1.7252692580223083, "epoch": 1.7603196836120953, "grad_norm": 0.8025743365287781, "learning_rate": 2.6702272486802467e-06, "loss": 1.6267, "mean_token_accuracy": 0.6387341618537903, "num_tokens": 2688872676.0, "step": 16024 }, { "entropy": 1.6703723271687825, "epoch": 1.7604295405234682, "grad_norm": 0.7961321473121643, "learning_rate": 2.669621075373845e-06, "loss": 1.344, "mean_token_accuracy": 0.6630989263455073, "num_tokens": 2689041194.0, "step": 16025 }, { "entropy": 1.72029647231102, "epoch": 1.7605393974348411, "grad_norm": 0.6698374152183533, "learning_rate": 2.6690151657201813e-06, "loss": 1.3747, "mean_token_accuracy": 0.6576948761940002, "num_tokens": 2689249040.0, "step": 16026 }, { "entropy": 1.7233352561791737, "epoch": 1.7606492543462142, "grad_norm": 0.7270263433456421, "learning_rate": 2.668409519738434e-06, "loss": 1.5737, "mean_token_accuracy": 0.6292213350534439, "num_tokens": 2689469853.0, "step": 16027 }, { "entropy": 1.716443419456482, "epoch": 1.7607591112575869, "grad_norm": 0.7445971965789795, "learning_rate": 2.667804137447772e-06, "loss": 1.3767, "mean_token_accuracy": 0.6535246272881826, "num_tokens": 2689621005.0, "step": 16028 }, { "entropy": 1.7172259191672008, "epoch": 1.76086896816896, "grad_norm": 0.6567142009735107, "learning_rate": 2.6671990188673534e-06, "loss": 1.4442, "mean_token_accuracy": 0.6565740207831064, "num_tokens": 2689811122.0, "step": 16029 }, { "entropy": 1.7025707860787709, "epoch": 1.7609788250803329, "grad_norm": 0.6175838112831116, "learning_rate": 2.666594164016331e-06, "loss": 1.4574, "mean_token_accuracy": 0.6463165481885275, "num_tokens": 2689987343.0, "step": 16030 }, { "entropy": 1.7212122082710266, "epoch": 1.7610886819917058, "grad_norm": 0.7046975493431091, "learning_rate": 2.6659895729138506e-06, "loss": 1.4979, "mean_token_accuracy": 0.6492815067370733, "num_tokens": 2690161731.0, "step": 16031 }, { "entropy": 1.7542727986971538, "epoch": 1.7611985389030789, "grad_norm": 0.8798859119415283, "learning_rate": 2.665385245579042e-06, "loss": 1.427, "mean_token_accuracy": 0.666147435704867, "num_tokens": 2690346545.0, "step": 16032 }, { "entropy": 1.7387540936470032, "epoch": 1.7613083958144515, "grad_norm": 0.7441887855529785, "learning_rate": 2.6647811820310345e-06, "loss": 1.3141, "mean_token_accuracy": 0.6612844069798788, "num_tokens": 2690447761.0, "step": 16033 }, { "entropy": 1.6950391431649525, "epoch": 1.7614182527258246, "grad_norm": 0.5990413427352905, "learning_rate": 2.664177382288948e-06, "loss": 1.4014, "mean_token_accuracy": 0.6514792641003927, "num_tokens": 2690622453.0, "step": 16034 }, { "entropy": 1.6765986780325572, "epoch": 1.7615281096371975, "grad_norm": 0.7618458271026611, "learning_rate": 2.6635738463718907e-06, "loss": 1.2839, "mean_token_accuracy": 0.6652130633592606, "num_tokens": 2690754415.0, "step": 16035 }, { "entropy": 1.6814166605472565, "epoch": 1.7616379665485704, "grad_norm": 0.745574951171875, "learning_rate": 2.662970574298964e-06, "loss": 1.2076, "mean_token_accuracy": 0.6819567829370499, "num_tokens": 2690921404.0, "step": 16036 }, { "entropy": 1.702602465947469, "epoch": 1.7617478234599435, "grad_norm": 0.6456969380378723, "learning_rate": 2.6623675660892646e-06, "loss": 1.3397, "mean_token_accuracy": 0.6715992788473765, "num_tokens": 2691076141.0, "step": 16037 }, { "entropy": 1.697231650352478, "epoch": 1.7618576803713164, "grad_norm": 0.6837537884712219, "learning_rate": 2.661764821761871e-06, "loss": 1.3611, "mean_token_accuracy": 0.662015880147616, "num_tokens": 2691223815.0, "step": 16038 }, { "entropy": 1.759251356124878, "epoch": 1.7619675372826893, "grad_norm": 0.714501142501831, "learning_rate": 2.6611623413358656e-06, "loss": 1.4554, "mean_token_accuracy": 0.6524814665317535, "num_tokens": 2691356249.0, "step": 16039 }, { "entropy": 1.7324928243954976, "epoch": 1.7620773941940624, "grad_norm": 0.6178767681121826, "learning_rate": 2.6605601248303152e-06, "loss": 1.3647, "mean_token_accuracy": 0.6701581329107285, "num_tokens": 2691553264.0, "step": 16040 }, { "entropy": 1.6730826298395793, "epoch": 1.762187251105435, "grad_norm": 0.7592617869377136, "learning_rate": 2.6599581722642762e-06, "loss": 1.3098, "mean_token_accuracy": 0.6677819540103277, "num_tokens": 2691739385.0, "step": 16041 }, { "entropy": 1.6866790254910786, "epoch": 1.7622971080168082, "grad_norm": 0.6936041712760925, "learning_rate": 2.6593564836568047e-06, "loss": 1.3359, "mean_token_accuracy": 0.6542757352193197, "num_tokens": 2691857897.0, "step": 16042 }, { "entropy": 1.6914058128992717, "epoch": 1.762406964928181, "grad_norm": 0.5658572316169739, "learning_rate": 2.658755059026944e-06, "loss": 1.384, "mean_token_accuracy": 0.644287516673406, "num_tokens": 2692062336.0, "step": 16043 }, { "entropy": 1.7410341103871663, "epoch": 1.762516821839554, "grad_norm": 0.6581618785858154, "learning_rate": 2.6581538983937243e-06, "loss": 1.3614, "mean_token_accuracy": 0.6670055588086446, "num_tokens": 2692198745.0, "step": 16044 }, { "entropy": 1.6533363958199818, "epoch": 1.762626678750927, "grad_norm": 0.6615976095199585, "learning_rate": 2.657553001776175e-06, "loss": 1.394, "mean_token_accuracy": 0.6650797625382742, "num_tokens": 2692378904.0, "step": 16045 }, { "entropy": 1.6993359824021657, "epoch": 1.7627365356622997, "grad_norm": 0.6522985100746155, "learning_rate": 2.6569523691933154e-06, "loss": 1.4219, "mean_token_accuracy": 0.6460706889629364, "num_tokens": 2692560029.0, "step": 16046 }, { "entropy": 1.7435101469357808, "epoch": 1.7628463925736728, "grad_norm": 0.678107500076294, "learning_rate": 2.656352000664153e-06, "loss": 1.4076, "mean_token_accuracy": 0.6516735653082529, "num_tokens": 2692749415.0, "step": 16047 }, { "entropy": 1.7272218664487202, "epoch": 1.7629562494850457, "grad_norm": 0.6166483759880066, "learning_rate": 2.6557518962076896e-06, "loss": 1.3556, "mean_token_accuracy": 0.6620925267537435, "num_tokens": 2692890906.0, "step": 16048 }, { "entropy": 1.7328097025553386, "epoch": 1.7630661063964186, "grad_norm": 1.0833711624145508, "learning_rate": 2.65515205584292e-06, "loss": 1.4331, "mean_token_accuracy": 0.6583366692066193, "num_tokens": 2693060492.0, "step": 16049 }, { "entropy": 1.6878051559130351, "epoch": 1.7631759633077917, "grad_norm": 0.694952666759491, "learning_rate": 2.654552479588826e-06, "loss": 1.3545, "mean_token_accuracy": 0.6597214490175247, "num_tokens": 2693240071.0, "step": 16050 }, { "entropy": 1.7286501228809357, "epoch": 1.7632858202191646, "grad_norm": 0.7728515267372131, "learning_rate": 2.653953167464387e-06, "loss": 1.3724, "mean_token_accuracy": 0.6499157945315043, "num_tokens": 2693361966.0, "step": 16051 }, { "entropy": 1.713899165391922, "epoch": 1.7633956771305375, "grad_norm": 0.7397400736808777, "learning_rate": 2.653354119488568e-06, "loss": 1.5974, "mean_token_accuracy": 0.6266492505868276, "num_tokens": 2693532391.0, "step": 16052 }, { "entropy": 1.7083103656768799, "epoch": 1.7635055340419106, "grad_norm": 0.7127790451049805, "learning_rate": 2.65275533568033e-06, "loss": 1.3755, "mean_token_accuracy": 0.665579229593277, "num_tokens": 2693713801.0, "step": 16053 }, { "entropy": 1.6606411933898926, "epoch": 1.7636153909532832, "grad_norm": 0.6641468405723572, "learning_rate": 2.6521568160586247e-06, "loss": 1.3632, "mean_token_accuracy": 0.6683401316404343, "num_tokens": 2693884118.0, "step": 16054 }, { "entropy": 1.714243769645691, "epoch": 1.7637252478646563, "grad_norm": 0.7927217483520508, "learning_rate": 2.651558560642397e-06, "loss": 1.5081, "mean_token_accuracy": 0.6555715998013815, "num_tokens": 2694061884.0, "step": 16055 }, { "entropy": 1.6633921265602112, "epoch": 1.7638351047760292, "grad_norm": 0.7348926663398743, "learning_rate": 2.650960569450576e-06, "loss": 1.4212, "mean_token_accuracy": 0.6668934176365534, "num_tokens": 2694230992.0, "step": 16056 }, { "entropy": 1.5766185522079468, "epoch": 1.7639449616874021, "grad_norm": 0.6212765574455261, "learning_rate": 2.65036284250209e-06, "loss": 1.5426, "mean_token_accuracy": 0.6506764938433965, "num_tokens": 2694462977.0, "step": 16057 }, { "entropy": 1.6853082577387493, "epoch": 1.7640548185987752, "grad_norm": 0.6269177794456482, "learning_rate": 2.64976537981586e-06, "loss": 1.4271, "mean_token_accuracy": 0.6474867115418116, "num_tokens": 2694663179.0, "step": 16058 }, { "entropy": 1.714323361714681, "epoch": 1.7641646755101479, "grad_norm": 0.6898490190505981, "learning_rate": 2.6491681814107933e-06, "loss": 1.2853, "mean_token_accuracy": 0.6686572035153707, "num_tokens": 2694814522.0, "step": 16059 }, { "entropy": 1.7176215052604675, "epoch": 1.764274532421521, "grad_norm": 0.6587092280387878, "learning_rate": 2.6485712473057886e-06, "loss": 1.3003, "mean_token_accuracy": 0.6659404089053472, "num_tokens": 2694951281.0, "step": 16060 }, { "entropy": 1.6764805714289348, "epoch": 1.7643843893328939, "grad_norm": 0.6627944707870483, "learning_rate": 2.647974577519742e-06, "loss": 1.3495, "mean_token_accuracy": 0.6571897814671198, "num_tokens": 2695107412.0, "step": 16061 }, { "entropy": 1.6623700261116028, "epoch": 1.7644942462442668, "grad_norm": 0.7114543318748474, "learning_rate": 2.647378172071535e-06, "loss": 1.3969, "mean_token_accuracy": 0.651480957865715, "num_tokens": 2695289663.0, "step": 16062 }, { "entropy": 1.7313786645730336, "epoch": 1.7646041031556399, "grad_norm": 0.6407042741775513, "learning_rate": 2.6467820309800472e-06, "loss": 1.4069, "mean_token_accuracy": 0.6487823029359182, "num_tokens": 2695500927.0, "step": 16063 }, { "entropy": 1.728273371855418, "epoch": 1.7647139600670128, "grad_norm": 0.677616536617279, "learning_rate": 2.646186154264143e-06, "loss": 1.2814, "mean_token_accuracy": 0.6799236685037613, "num_tokens": 2695643975.0, "step": 16064 }, { "entropy": 1.7222307622432709, "epoch": 1.7648238169783856, "grad_norm": 0.6702946424484253, "learning_rate": 2.645590541942683e-06, "loss": 1.4948, "mean_token_accuracy": 0.6496520837148031, "num_tokens": 2695802990.0, "step": 16065 }, { "entropy": 1.7068528135617573, "epoch": 1.7649336738897587, "grad_norm": 0.8094030022621155, "learning_rate": 2.6449951940345164e-06, "loss": 1.6127, "mean_token_accuracy": 0.6310140788555145, "num_tokens": 2696010181.0, "step": 16066 }, { "entropy": 1.6725980242093403, "epoch": 1.7650435308011314, "grad_norm": 0.8047062158584595, "learning_rate": 2.6444001105584897e-06, "loss": 1.3926, "mean_token_accuracy": 0.6618801603714625, "num_tokens": 2696173600.0, "step": 16067 }, { "entropy": 1.6676512161890666, "epoch": 1.7651533877125045, "grad_norm": 0.6256596446037292, "learning_rate": 2.643805291533433e-06, "loss": 1.311, "mean_token_accuracy": 0.6845296223958334, "num_tokens": 2696321443.0, "step": 16068 }, { "entropy": 1.7186016937096913, "epoch": 1.7652632446238774, "grad_norm": 0.6340106725692749, "learning_rate": 2.643210736978173e-06, "loss": 1.3346, "mean_token_accuracy": 0.6681850502888361, "num_tokens": 2696498399.0, "step": 16069 }, { "entropy": 1.684872140487035, "epoch": 1.7653731015352503, "grad_norm": 0.7013046145439148, "learning_rate": 2.6426164469115274e-06, "loss": 1.4155, "mean_token_accuracy": 0.6384324083725611, "num_tokens": 2696669061.0, "step": 16070 }, { "entropy": 1.6660610735416412, "epoch": 1.7654829584466234, "grad_norm": 0.7147235870361328, "learning_rate": 2.6420224213523066e-06, "loss": 1.2383, "mean_token_accuracy": 0.6764589746793112, "num_tokens": 2696830878.0, "step": 16071 }, { "entropy": 1.6860030889511108, "epoch": 1.765592815357996, "grad_norm": 0.6619150042533875, "learning_rate": 2.6414286603193094e-06, "loss": 1.5488, "mean_token_accuracy": 0.640265941619873, "num_tokens": 2697002297.0, "step": 16072 }, { "entropy": 1.7264870901902516, "epoch": 1.7657026722693692, "grad_norm": 0.7539317011833191, "learning_rate": 2.6408351638313272e-06, "loss": 1.3083, "mean_token_accuracy": 0.6663891822099686, "num_tokens": 2697134172.0, "step": 16073 }, { "entropy": 1.7615606784820557, "epoch": 1.765812529180742, "grad_norm": 0.6778137683868408, "learning_rate": 2.6402419319071463e-06, "loss": 1.5129, "mean_token_accuracy": 0.6431242475907007, "num_tokens": 2697306424.0, "step": 16074 }, { "entropy": 1.6955093443393707, "epoch": 1.765922386092115, "grad_norm": 0.7733752131462097, "learning_rate": 2.639648964565542e-06, "loss": 1.3358, "mean_token_accuracy": 0.6611419717470804, "num_tokens": 2697441605.0, "step": 16075 }, { "entropy": 1.6361872255802155, "epoch": 1.766032243003488, "grad_norm": 0.6613511443138123, "learning_rate": 2.6390562618252806e-06, "loss": 1.1672, "mean_token_accuracy": 0.6900085906187693, "num_tokens": 2697553650.0, "step": 16076 }, { "entropy": 1.71124001344045, "epoch": 1.766142099914861, "grad_norm": 0.7892715334892273, "learning_rate": 2.6384638237051198e-06, "loss": 1.3934, "mean_token_accuracy": 0.6527740309635798, "num_tokens": 2697688469.0, "step": 16077 }, { "entropy": 1.6695888042449951, "epoch": 1.7662519568262338, "grad_norm": 0.6769923567771912, "learning_rate": 2.637871650223812e-06, "loss": 1.3749, "mean_token_accuracy": 0.6554907162984213, "num_tokens": 2697888006.0, "step": 16078 }, { "entropy": 1.6960602402687073, "epoch": 1.766361813737607, "grad_norm": 0.6325769424438477, "learning_rate": 2.6372797414000996e-06, "loss": 1.483, "mean_token_accuracy": 0.6564978261788686, "num_tokens": 2698060704.0, "step": 16079 }, { "entropy": 1.715209275484085, "epoch": 1.7664716706489796, "grad_norm": 0.6917846202850342, "learning_rate": 2.636688097252713e-06, "loss": 1.2256, "mean_token_accuracy": 0.6857384641965231, "num_tokens": 2698175996.0, "step": 16080 }, { "entropy": 1.6636294424533844, "epoch": 1.7665815275603527, "grad_norm": 0.6673606634140015, "learning_rate": 2.636096717800381e-06, "loss": 1.486, "mean_token_accuracy": 0.6630779206752777, "num_tokens": 2698337011.0, "step": 16081 }, { "entropy": 1.6239332656065624, "epoch": 1.7666913844717256, "grad_norm": 0.8185363411903381, "learning_rate": 2.6355056030618166e-06, "loss": 1.2305, "mean_token_accuracy": 0.6870084901650747, "num_tokens": 2698479373.0, "step": 16082 }, { "entropy": 1.7032920519510906, "epoch": 1.7668012413830985, "grad_norm": 0.6755743622779846, "learning_rate": 2.6349147530557327e-06, "loss": 1.4925, "mean_token_accuracy": 0.6550940821568171, "num_tokens": 2698614324.0, "step": 16083 }, { "entropy": 1.6856786410013835, "epoch": 1.7669110982944716, "grad_norm": 0.7225176095962524, "learning_rate": 2.6343241678008286e-06, "loss": 1.2452, "mean_token_accuracy": 0.6743053098519644, "num_tokens": 2698770765.0, "step": 16084 }, { "entropy": 1.716284801562627, "epoch": 1.7670209552058442, "grad_norm": 0.6630404591560364, "learning_rate": 2.6337338473157925e-06, "loss": 1.4324, "mean_token_accuracy": 0.648807222644488, "num_tokens": 2698934174.0, "step": 16085 }, { "entropy": 1.705119530359904, "epoch": 1.7671308121172173, "grad_norm": 0.681877613067627, "learning_rate": 2.633143791619311e-06, "loss": 1.2931, "mean_token_accuracy": 0.6711487770080566, "num_tokens": 2699088905.0, "step": 16086 }, { "entropy": 1.6811328033606212, "epoch": 1.7672406690285902, "grad_norm": 0.6792789697647095, "learning_rate": 2.6325540007300585e-06, "loss": 1.4651, "mean_token_accuracy": 0.6474873671929041, "num_tokens": 2699230587.0, "step": 16087 }, { "entropy": 1.6984275380770366, "epoch": 1.7673505259399631, "grad_norm": 0.7023369669914246, "learning_rate": 2.631964474666702e-06, "loss": 1.412, "mean_token_accuracy": 0.6622547606627146, "num_tokens": 2699415668.0, "step": 16088 }, { "entropy": 1.7122309307257335, "epoch": 1.7674603828513362, "grad_norm": 0.614011287689209, "learning_rate": 2.631375213447898e-06, "loss": 1.328, "mean_token_accuracy": 0.6657908707857132, "num_tokens": 2699572735.0, "step": 16089 }, { "entropy": 1.6805100739002228, "epoch": 1.767570239762709, "grad_norm": 0.7636834979057312, "learning_rate": 2.6307862170922992e-06, "loss": 1.3451, "mean_token_accuracy": 0.6637326081593832, "num_tokens": 2699790891.0, "step": 16090 }, { "entropy": 1.7203999757766724, "epoch": 1.767680096674082, "grad_norm": 0.8896521329879761, "learning_rate": 2.630197485618544e-06, "loss": 1.6256, "mean_token_accuracy": 0.6291612386703491, "num_tokens": 2699997296.0, "step": 16091 }, { "entropy": 1.715426633755366, "epoch": 1.767789953585455, "grad_norm": 0.6982161402702332, "learning_rate": 2.629609019045267e-06, "loss": 1.6772, "mean_token_accuracy": 0.6193340122699738, "num_tokens": 2700178860.0, "step": 16092 }, { "entropy": 1.696618139743805, "epoch": 1.7678998104968278, "grad_norm": 0.6929605603218079, "learning_rate": 2.6290208173910935e-06, "loss": 1.3606, "mean_token_accuracy": 0.6675488402446111, "num_tokens": 2700334161.0, "step": 16093 }, { "entropy": 1.7300164600213368, "epoch": 1.7680096674082009, "grad_norm": 0.6754699945449829, "learning_rate": 2.628432880674637e-06, "loss": 1.4199, "mean_token_accuracy": 0.6655618896087011, "num_tokens": 2700482064.0, "step": 16094 }, { "entropy": 1.7005921800931294, "epoch": 1.7681195243195738, "grad_norm": 0.7460622191429138, "learning_rate": 2.6278452089145107e-06, "loss": 1.3045, "mean_token_accuracy": 0.6671140988667806, "num_tokens": 2700620075.0, "step": 16095 }, { "entropy": 1.6499681274096172, "epoch": 1.7682293812309466, "grad_norm": 0.6865026354789734, "learning_rate": 2.627257802129309e-06, "loss": 1.3658, "mean_token_accuracy": 0.6599505941073099, "num_tokens": 2700819470.0, "step": 16096 }, { "entropy": 1.6812036136786144, "epoch": 1.7683392381423197, "grad_norm": 0.6698898673057556, "learning_rate": 2.6266706603376244e-06, "loss": 1.4634, "mean_token_accuracy": 0.6481799880663554, "num_tokens": 2701000116.0, "step": 16097 }, { "entropy": 1.742706725994746, "epoch": 1.7684490950536924, "grad_norm": 0.7475796937942505, "learning_rate": 2.62608378355804e-06, "loss": 1.4385, "mean_token_accuracy": 0.6343745936950048, "num_tokens": 2701199955.0, "step": 16098 }, { "entropy": 1.7251374125480652, "epoch": 1.7685589519650655, "grad_norm": 0.6364467144012451, "learning_rate": 2.6254971718091326e-06, "loss": 1.2861, "mean_token_accuracy": 0.6751857052246729, "num_tokens": 2701376853.0, "step": 16099 }, { "entropy": 1.7397385934988658, "epoch": 1.7686688088764384, "grad_norm": 0.6278882026672363, "learning_rate": 2.624910825109466e-06, "loss": 1.4487, "mean_token_accuracy": 0.6475135733683904, "num_tokens": 2701541159.0, "step": 16100 }, { "entropy": 1.5949292679627736, "epoch": 1.7687786657878113, "grad_norm": 0.5723445415496826, "learning_rate": 2.6243247434775967e-06, "loss": 1.3228, "mean_token_accuracy": 0.6649216016133627, "num_tokens": 2701731184.0, "step": 16101 }, { "entropy": 1.6799314518769581, "epoch": 1.7688885226991844, "grad_norm": 0.6339821219444275, "learning_rate": 2.623738926932075e-06, "loss": 1.255, "mean_token_accuracy": 0.6732438405354818, "num_tokens": 2701853054.0, "step": 16102 }, { "entropy": 1.7200807829697926, "epoch": 1.7689983796105573, "grad_norm": 0.8305505514144897, "learning_rate": 2.6231533754914435e-06, "loss": 1.3544, "mean_token_accuracy": 0.6595542430877686, "num_tokens": 2701983290.0, "step": 16103 }, { "entropy": 1.6731150647004445, "epoch": 1.7691082365219302, "grad_norm": 0.7634748220443726, "learning_rate": 2.6225680891742307e-06, "loss": 1.5671, "mean_token_accuracy": 0.642717699209849, "num_tokens": 2702184992.0, "step": 16104 }, { "entropy": 1.7635932167371113, "epoch": 1.7692180934333033, "grad_norm": 0.6697694659233093, "learning_rate": 2.6219830679989645e-06, "loss": 1.4884, "mean_token_accuracy": 0.6513105779886246, "num_tokens": 2702325974.0, "step": 16105 }, { "entropy": 1.7982784907023113, "epoch": 1.769327950344676, "grad_norm": 0.8669276237487793, "learning_rate": 2.6213983119841573e-06, "loss": 1.5692, "mean_token_accuracy": 0.6412549217542013, "num_tokens": 2702471856.0, "step": 16106 }, { "entropy": 1.7105421125888824, "epoch": 1.769437807256049, "grad_norm": 0.6940631866455078, "learning_rate": 2.6208138211483193e-06, "loss": 1.4021, "mean_token_accuracy": 0.6522999107837677, "num_tokens": 2702631817.0, "step": 16107 }, { "entropy": 1.667997380097707, "epoch": 1.769547664167422, "grad_norm": 0.6804345846176147, "learning_rate": 2.6202295955099484e-06, "loss": 1.3276, "mean_token_accuracy": 0.6740523924430212, "num_tokens": 2702779392.0, "step": 16108 }, { "entropy": 1.764988124370575, "epoch": 1.7696575210787948, "grad_norm": 0.5990316271781921, "learning_rate": 2.6196456350875336e-06, "loss": 1.5235, "mean_token_accuracy": 0.6251028428475062, "num_tokens": 2703003157.0, "step": 16109 }, { "entropy": 1.712211827437083, "epoch": 1.769767377990168, "grad_norm": 0.8300401568412781, "learning_rate": 2.619061939899558e-06, "loss": 1.2377, "mean_token_accuracy": 0.6737738301356634, "num_tokens": 2703156071.0, "step": 16110 }, { "entropy": 1.732935518026352, "epoch": 1.7698772349015406, "grad_norm": 0.7982600331306458, "learning_rate": 2.618478509964498e-06, "loss": 1.3812, "mean_token_accuracy": 0.6614757974942526, "num_tokens": 2703317575.0, "step": 16111 }, { "entropy": 1.6523981094360352, "epoch": 1.7699870918129137, "grad_norm": 0.6973866820335388, "learning_rate": 2.6178953453008143e-06, "loss": 1.4013, "mean_token_accuracy": 0.6528991907835007, "num_tokens": 2703469385.0, "step": 16112 }, { "entropy": 1.6980741024017334, "epoch": 1.7700969487242866, "grad_norm": 0.9420611262321472, "learning_rate": 2.6173124459269654e-06, "loss": 1.5222, "mean_token_accuracy": 0.6804485072692236, "num_tokens": 2703634520.0, "step": 16113 }, { "entropy": 1.67299422621727, "epoch": 1.7702068056356595, "grad_norm": 0.6318880319595337, "learning_rate": 2.616729811861402e-06, "loss": 1.3287, "mean_token_accuracy": 0.6642769277095795, "num_tokens": 2703787272.0, "step": 16114 }, { "entropy": 1.7164417207241058, "epoch": 1.7703166625470326, "grad_norm": 0.788582444190979, "learning_rate": 2.6161474431225624e-06, "loss": 1.3569, "mean_token_accuracy": 0.669378658135732, "num_tokens": 2703907662.0, "step": 16115 }, { "entropy": 1.7314981818199158, "epoch": 1.7704265194584055, "grad_norm": 0.6980270743370056, "learning_rate": 2.6155653397288762e-06, "loss": 1.3667, "mean_token_accuracy": 0.6507422377665838, "num_tokens": 2704058988.0, "step": 16116 }, { "entropy": 1.7171373665332794, "epoch": 1.7705363763697783, "grad_norm": 0.7046215534210205, "learning_rate": 2.61498350169877e-06, "loss": 1.3032, "mean_token_accuracy": 0.6671566814184189, "num_tokens": 2704223794.0, "step": 16117 }, { "entropy": 1.6645724177360535, "epoch": 1.7706462332811514, "grad_norm": 0.8890359997749329, "learning_rate": 2.6144019290506577e-06, "loss": 1.2717, "mean_token_accuracy": 0.6672799090544382, "num_tokens": 2704401039.0, "step": 16118 }, { "entropy": 1.7233166893323262, "epoch": 1.7707560901925241, "grad_norm": 0.5646775364875793, "learning_rate": 2.613820621802947e-06, "loss": 1.4646, "mean_token_accuracy": 0.6415334989627203, "num_tokens": 2704591999.0, "step": 16119 }, { "entropy": 1.6967370808124542, "epoch": 1.7708659471038972, "grad_norm": 0.9199461340904236, "learning_rate": 2.613239579974034e-06, "loss": 1.3411, "mean_token_accuracy": 0.6616208553314209, "num_tokens": 2704736795.0, "step": 16120 }, { "entropy": 1.6985019147396088, "epoch": 1.77097580401527, "grad_norm": 0.7342497110366821, "learning_rate": 2.6126588035823074e-06, "loss": 1.4852, "mean_token_accuracy": 0.663700466354688, "num_tokens": 2704877585.0, "step": 16121 }, { "entropy": 1.686661461989085, "epoch": 1.771085660926643, "grad_norm": 0.6621032357215881, "learning_rate": 2.6120782926461514e-06, "loss": 1.2625, "mean_token_accuracy": 0.6748026907444, "num_tokens": 2705010312.0, "step": 16122 }, { "entropy": 1.6965848008791606, "epoch": 1.771195517838016, "grad_norm": 0.7610173225402832, "learning_rate": 2.6114980471839384e-06, "loss": 1.3852, "mean_token_accuracy": 0.6536916842063268, "num_tokens": 2705237228.0, "step": 16123 }, { "entropy": 1.687073806921641, "epoch": 1.7713053747493888, "grad_norm": 0.7324576377868652, "learning_rate": 2.6109180672140315e-06, "loss": 1.3022, "mean_token_accuracy": 0.6851969212293625, "num_tokens": 2705407389.0, "step": 16124 }, { "entropy": 1.705436368783315, "epoch": 1.7714152316607619, "grad_norm": 0.6689654588699341, "learning_rate": 2.6103383527547864e-06, "loss": 1.2624, "mean_token_accuracy": 0.675425186753273, "num_tokens": 2705543178.0, "step": 16125 }, { "entropy": 1.6826893587907155, "epoch": 1.7715250885721348, "grad_norm": 0.6657409071922302, "learning_rate": 2.6097589038245545e-06, "loss": 1.3492, "mean_token_accuracy": 0.655790776014328, "num_tokens": 2705697833.0, "step": 16126 }, { "entropy": 1.7663909792900085, "epoch": 1.7716349454835076, "grad_norm": 0.7559472322463989, "learning_rate": 2.609179720441672e-06, "loss": 1.533, "mean_token_accuracy": 0.6423116599520048, "num_tokens": 2705889425.0, "step": 16127 }, { "entropy": 1.6723959843317668, "epoch": 1.7717448023948807, "grad_norm": 0.6863106489181519, "learning_rate": 2.6086008026244704e-06, "loss": 1.32, "mean_token_accuracy": 0.675317257642746, "num_tokens": 2706068340.0, "step": 16128 }, { "entropy": 1.735195557276408, "epoch": 1.7718546593062536, "grad_norm": 0.8114670515060425, "learning_rate": 2.6080221503912707e-06, "loss": 1.5294, "mean_token_accuracy": 0.6455264935890833, "num_tokens": 2706229929.0, "step": 16129 }, { "entropy": 1.7443922758102417, "epoch": 1.7719645162176265, "grad_norm": 0.7101374864578247, "learning_rate": 2.6074437637603885e-06, "loss": 1.2177, "mean_token_accuracy": 0.6852958450714747, "num_tokens": 2706361616.0, "step": 16130 }, { "entropy": 1.6811170478661854, "epoch": 1.7720743731289996, "grad_norm": 0.7076679468154907, "learning_rate": 2.6068656427501303e-06, "loss": 1.4677, "mean_token_accuracy": 0.6564472218354543, "num_tokens": 2706535078.0, "step": 16131 }, { "entropy": 1.746174544095993, "epoch": 1.7721842300403723, "grad_norm": 0.7166526913642883, "learning_rate": 2.6062877873787933e-06, "loss": 1.5574, "mean_token_accuracy": 0.6369020914038023, "num_tokens": 2706692845.0, "step": 16132 }, { "entropy": 1.7436832785606384, "epoch": 1.7722940869517454, "grad_norm": 0.920242428779602, "learning_rate": 2.6057101976646633e-06, "loss": 1.586, "mean_token_accuracy": 0.6347026800115904, "num_tokens": 2706888652.0, "step": 16133 }, { "entropy": 1.7304763694604237, "epoch": 1.7724039438631183, "grad_norm": 0.6323895454406738, "learning_rate": 2.605132873626025e-06, "loss": 1.5587, "mean_token_accuracy": 0.6354440351327261, "num_tokens": 2707071567.0, "step": 16134 }, { "entropy": 1.6805862685044606, "epoch": 1.7725138007744912, "grad_norm": 0.6497008204460144, "learning_rate": 2.604555815281148e-06, "loss": 1.2406, "mean_token_accuracy": 0.6805828412373861, "num_tokens": 2707219245.0, "step": 16135 }, { "entropy": 1.7129256625970204, "epoch": 1.7726236576858643, "grad_norm": 0.6977981925010681, "learning_rate": 2.6039790226482956e-06, "loss": 1.514, "mean_token_accuracy": 0.6338231811920801, "num_tokens": 2707400096.0, "step": 16136 }, { "entropy": 1.641619215408961, "epoch": 1.772733514597237, "grad_norm": 0.6210656762123108, "learning_rate": 2.603402495745724e-06, "loss": 1.3067, "mean_token_accuracy": 0.6655979255835215, "num_tokens": 2707577271.0, "step": 16137 }, { "entropy": 1.7035197516282399, "epoch": 1.77284337150861, "grad_norm": 0.7586587071418762, "learning_rate": 2.6028262345916796e-06, "loss": 1.2978, "mean_token_accuracy": 0.6796625256538391, "num_tokens": 2707744658.0, "step": 16138 }, { "entropy": 1.752359499533971, "epoch": 1.772953228419983, "grad_norm": 0.6705688834190369, "learning_rate": 2.6022502392044023e-06, "loss": 1.5081, "mean_token_accuracy": 0.6395560602347056, "num_tokens": 2707934539.0, "step": 16139 }, { "entropy": 1.7236380577087402, "epoch": 1.7730630853313558, "grad_norm": 0.709601104259491, "learning_rate": 2.60167450960212e-06, "loss": 1.5704, "mean_token_accuracy": 0.6389553348223368, "num_tokens": 2708079185.0, "step": 16140 }, { "entropy": 1.7358200351397197, "epoch": 1.773172942242729, "grad_norm": 0.7101724743843079, "learning_rate": 2.6010990458030548e-06, "loss": 1.3491, "mean_token_accuracy": 0.6665903975566229, "num_tokens": 2708191566.0, "step": 16141 }, { "entropy": 1.7284641365210216, "epoch": 1.7732827991541018, "grad_norm": 0.6471381187438965, "learning_rate": 2.600523847825419e-06, "loss": 1.5326, "mean_token_accuracy": 0.6393000731865565, "num_tokens": 2708369199.0, "step": 16142 }, { "entropy": 1.719407816727956, "epoch": 1.7733926560654747, "grad_norm": 0.7297753095626831, "learning_rate": 2.5999489156874214e-06, "loss": 1.292, "mean_token_accuracy": 0.6675407042105993, "num_tokens": 2708487034.0, "step": 16143 }, { "entropy": 1.705468972524007, "epoch": 1.7735025129768478, "grad_norm": 0.6920966506004333, "learning_rate": 2.5993742494072544e-06, "loss": 1.4197, "mean_token_accuracy": 0.6390438576539358, "num_tokens": 2708660319.0, "step": 16144 }, { "entropy": 1.7181805968284607, "epoch": 1.7736123698882205, "grad_norm": 0.6733984351158142, "learning_rate": 2.5987998490031054e-06, "loss": 1.4356, "mean_token_accuracy": 0.6629656205574671, "num_tokens": 2708828951.0, "step": 16145 }, { "entropy": 1.7383651733398438, "epoch": 1.7737222267995936, "grad_norm": 0.7001859545707703, "learning_rate": 2.5982257144931573e-06, "loss": 1.4767, "mean_token_accuracy": 0.651274119814237, "num_tokens": 2708963681.0, "step": 16146 }, { "entropy": 1.68324081103007, "epoch": 1.7738320837109665, "grad_norm": 0.772042453289032, "learning_rate": 2.597651845895579e-06, "loss": 1.4884, "mean_token_accuracy": 0.6505793780088425, "num_tokens": 2709118225.0, "step": 16147 }, { "entropy": 1.7421075602372487, "epoch": 1.7739419406223393, "grad_norm": 0.7277549505233765, "learning_rate": 2.597078243228533e-06, "loss": 1.3914, "mean_token_accuracy": 0.6713375896215439, "num_tokens": 2709278861.0, "step": 16148 }, { "entropy": 1.6302008628845215, "epoch": 1.7740517975337124, "grad_norm": 0.6995809674263, "learning_rate": 2.5965049065101746e-06, "loss": 1.247, "mean_token_accuracy": 0.686556855837504, "num_tokens": 2709442407.0, "step": 16149 }, { "entropy": 1.6407863795757294, "epoch": 1.7741616544450851, "grad_norm": 0.7002416849136353, "learning_rate": 2.595931835758649e-06, "loss": 1.3002, "mean_token_accuracy": 0.6775663743416468, "num_tokens": 2709603952.0, "step": 16150 }, { "entropy": 1.6509563426176708, "epoch": 1.7742715113564582, "grad_norm": 0.6484111547470093, "learning_rate": 2.595359030992094e-06, "loss": 1.2547, "mean_token_accuracy": 0.6776574452718099, "num_tokens": 2709787365.0, "step": 16151 }, { "entropy": 1.7624330123265584, "epoch": 1.774381368267831, "grad_norm": 0.753284215927124, "learning_rate": 2.5947864922286386e-06, "loss": 1.4298, "mean_token_accuracy": 0.6394060303767523, "num_tokens": 2709952090.0, "step": 16152 }, { "entropy": 1.689292460680008, "epoch": 1.774491225179204, "grad_norm": 0.7348275780677795, "learning_rate": 2.5942142194864024e-06, "loss": 1.3677, "mean_token_accuracy": 0.6548637946446737, "num_tokens": 2710159115.0, "step": 16153 }, { "entropy": 1.7220345834891002, "epoch": 1.774601082090577, "grad_norm": 0.6995728611946106, "learning_rate": 2.5936422127834985e-06, "loss": 1.2697, "mean_token_accuracy": 0.6734829644362131, "num_tokens": 2710276327.0, "step": 16154 }, { "entropy": 1.7671296894550323, "epoch": 1.77471093900195, "grad_norm": 0.7245908379554749, "learning_rate": 2.593070472138031e-06, "loss": 1.4621, "mean_token_accuracy": 0.6349116514126459, "num_tokens": 2710506591.0, "step": 16155 }, { "entropy": 1.709269384543101, "epoch": 1.7748207959133229, "grad_norm": 0.6322346329689026, "learning_rate": 2.5924989975680963e-06, "loss": 1.3325, "mean_token_accuracy": 0.6667979657649994, "num_tokens": 2710662997.0, "step": 16156 }, { "entropy": 1.723963479200999, "epoch": 1.774930652824696, "grad_norm": 0.7471461892127991, "learning_rate": 2.5919277890917777e-06, "loss": 1.2944, "mean_token_accuracy": 0.6734066704909006, "num_tokens": 2710795173.0, "step": 16157 }, { "entropy": 1.7161237994829814, "epoch": 1.7750405097360686, "grad_norm": 0.6630175709724426, "learning_rate": 2.5913568467271564e-06, "loss": 1.5, "mean_token_accuracy": 0.633497933546702, "num_tokens": 2710956314.0, "step": 16158 }, { "entropy": 1.7181127270062764, "epoch": 1.7751503666474417, "grad_norm": 0.6697092652320862, "learning_rate": 2.590786170492304e-06, "loss": 1.4132, "mean_token_accuracy": 0.6475900014241537, "num_tokens": 2711129596.0, "step": 16159 }, { "entropy": 1.6878857612609863, "epoch": 1.7752602235588146, "grad_norm": 0.6678206324577332, "learning_rate": 2.590215760405277e-06, "loss": 1.3574, "mean_token_accuracy": 0.6573008944590887, "num_tokens": 2711318223.0, "step": 16160 }, { "entropy": 1.7164893845717113, "epoch": 1.7753700804701875, "grad_norm": 0.6961454749107361, "learning_rate": 2.589645616484133e-06, "loss": 1.4638, "mean_token_accuracy": 0.6609781930843989, "num_tokens": 2711458038.0, "step": 16161 }, { "entropy": 1.6448584695657094, "epoch": 1.7754799373815606, "grad_norm": 0.6715121269226074, "learning_rate": 2.589075738746914e-06, "loss": 1.4383, "mean_token_accuracy": 0.6469363421201706, "num_tokens": 2711618842.0, "step": 16162 }, { "entropy": 1.6738781730333965, "epoch": 1.7755897942929333, "grad_norm": 0.7207627892494202, "learning_rate": 2.5885061272116597e-06, "loss": 1.3785, "mean_token_accuracy": 0.6592583358287811, "num_tokens": 2711743448.0, "step": 16163 }, { "entropy": 1.764163116614024, "epoch": 1.7756996512043064, "grad_norm": 0.6622251868247986, "learning_rate": 2.5879367818963965e-06, "loss": 1.354, "mean_token_accuracy": 0.6568918774525324, "num_tokens": 2711879685.0, "step": 16164 }, { "entropy": 1.6938027838865917, "epoch": 1.7758095081156793, "grad_norm": 0.6962149143218994, "learning_rate": 2.5873677028191418e-06, "loss": 1.2467, "mean_token_accuracy": 0.6714517027139664, "num_tokens": 2712009648.0, "step": 16165 }, { "entropy": 1.706722229719162, "epoch": 1.7759193650270522, "grad_norm": 0.5590953230857849, "learning_rate": 2.5867988899979086e-06, "loss": 1.4431, "mean_token_accuracy": 0.6479671547810236, "num_tokens": 2712190182.0, "step": 16166 }, { "entropy": 1.7148079474767048, "epoch": 1.7760292219384253, "grad_norm": 0.6328277587890625, "learning_rate": 2.5862303434507e-06, "loss": 1.4287, "mean_token_accuracy": 0.6558680633703867, "num_tokens": 2712438765.0, "step": 16167 }, { "entropy": 1.7054628531138103, "epoch": 1.7761390788497982, "grad_norm": 0.6650689244270325, "learning_rate": 2.5856620631955102e-06, "loss": 1.3792, "mean_token_accuracy": 0.6593814243872961, "num_tokens": 2712569597.0, "step": 16168 }, { "entropy": 1.7111007869243622, "epoch": 1.776248935761171, "grad_norm": 0.6413836479187012, "learning_rate": 2.5850940492503236e-06, "loss": 1.3747, "mean_token_accuracy": 0.6556829412778219, "num_tokens": 2712700190.0, "step": 16169 }, { "entropy": 1.7287939886252086, "epoch": 1.7763587926725442, "grad_norm": 0.6228131055831909, "learning_rate": 2.584526301633119e-06, "loss": 1.4028, "mean_token_accuracy": 0.6681485623121262, "num_tokens": 2712890812.0, "step": 16170 }, { "entropy": 1.6465917030970256, "epoch": 1.7764686495839168, "grad_norm": 0.7483593225479126, "learning_rate": 2.583958820361866e-06, "loss": 1.3934, "mean_token_accuracy": 0.6619542588790258, "num_tokens": 2713032571.0, "step": 16171 }, { "entropy": 1.6852433780829112, "epoch": 1.77657850649529, "grad_norm": 0.6569193601608276, "learning_rate": 2.5833916054545217e-06, "loss": 1.2729, "mean_token_accuracy": 0.6696591476599375, "num_tokens": 2713172975.0, "step": 16172 }, { "entropy": 1.7001554270585377, "epoch": 1.7766883634066628, "grad_norm": 0.7120020389556885, "learning_rate": 2.582824656929042e-06, "loss": 1.43, "mean_token_accuracy": 0.6575095355510712, "num_tokens": 2713345608.0, "step": 16173 }, { "entropy": 1.667518824338913, "epoch": 1.7767982203180357, "grad_norm": 0.7396501898765564, "learning_rate": 2.5822579748033676e-06, "loss": 1.3359, "mean_token_accuracy": 0.6745062321424484, "num_tokens": 2713516606.0, "step": 16174 }, { "entropy": 1.6942040920257568, "epoch": 1.7769080772294088, "grad_norm": 0.8534673452377319, "learning_rate": 2.5816915590954367e-06, "loss": 1.2135, "mean_token_accuracy": 0.6788101047277451, "num_tokens": 2713681222.0, "step": 16175 }, { "entropy": 1.7359768450260162, "epoch": 1.7770179341407817, "grad_norm": 0.6801086664199829, "learning_rate": 2.581125409823175e-06, "loss": 1.4861, "mean_token_accuracy": 0.6353782365719477, "num_tokens": 2713917752.0, "step": 16176 }, { "entropy": 1.6656650304794312, "epoch": 1.7771277910521546, "grad_norm": 0.6138239502906799, "learning_rate": 2.580559527004499e-06, "loss": 1.2827, "mean_token_accuracy": 0.6767630279064178, "num_tokens": 2714055978.0, "step": 16177 }, { "entropy": 1.7242048780123393, "epoch": 1.7772376479635275, "grad_norm": 0.7279729247093201, "learning_rate": 2.579993910657319e-06, "loss": 1.33, "mean_token_accuracy": 0.6593465854724249, "num_tokens": 2714173544.0, "step": 16178 }, { "entropy": 1.7006490429242451, "epoch": 1.7773475048749003, "grad_norm": 0.7351089119911194, "learning_rate": 2.5794285607995407e-06, "loss": 1.4855, "mean_token_accuracy": 0.6623196552197138, "num_tokens": 2714346691.0, "step": 16179 }, { "entropy": 1.7114817400773366, "epoch": 1.7774573617862734, "grad_norm": 0.7498958110809326, "learning_rate": 2.5788634774490524e-06, "loss": 1.588, "mean_token_accuracy": 0.6416305353244146, "num_tokens": 2714550240.0, "step": 16180 }, { "entropy": 1.7464225788911183, "epoch": 1.7775672186976463, "grad_norm": 0.7129120826721191, "learning_rate": 2.57829866062374e-06, "loss": 1.5597, "mean_token_accuracy": 0.626517136891683, "num_tokens": 2714761712.0, "step": 16181 }, { "entropy": 1.7081879675388336, "epoch": 1.7776770756090192, "grad_norm": 0.6990813612937927, "learning_rate": 2.5777341103414807e-06, "loss": 1.3879, "mean_token_accuracy": 0.6567564556996027, "num_tokens": 2714898735.0, "step": 16182 }, { "entropy": 1.7253175874551137, "epoch": 1.7777869325203923, "grad_norm": 0.7992512583732605, "learning_rate": 2.577169826620142e-06, "loss": 1.3492, "mean_token_accuracy": 0.6692212472359339, "num_tokens": 2715058698.0, "step": 16183 }, { "entropy": 1.7420212825139363, "epoch": 1.777896789431765, "grad_norm": 0.6389954090118408, "learning_rate": 2.576605809477582e-06, "loss": 1.4296, "mean_token_accuracy": 0.6562297642230988, "num_tokens": 2715237761.0, "step": 16184 }, { "entropy": 1.7322679460048676, "epoch": 1.778006646343138, "grad_norm": 0.6425938010215759, "learning_rate": 2.576042058931653e-06, "loss": 1.2624, "mean_token_accuracy": 0.6835384468237559, "num_tokens": 2715439975.0, "step": 16185 }, { "entropy": 1.7014261583487194, "epoch": 1.778116503254511, "grad_norm": 0.6009911894798279, "learning_rate": 2.5754785750001966e-06, "loss": 1.3455, "mean_token_accuracy": 0.6725515276193619, "num_tokens": 2715575981.0, "step": 16186 }, { "entropy": 1.7201250692208607, "epoch": 1.7782263601658839, "grad_norm": 0.7367297410964966, "learning_rate": 2.574915357701048e-06, "loss": 1.3928, "mean_token_accuracy": 0.660605326294899, "num_tokens": 2715726384.0, "step": 16187 }, { "entropy": 1.6956477065881093, "epoch": 1.778336217077257, "grad_norm": 0.6419438719749451, "learning_rate": 2.574352407052031e-06, "loss": 1.3171, "mean_token_accuracy": 0.6647944003343582, "num_tokens": 2715836741.0, "step": 16188 }, { "entropy": 1.6989044447739918, "epoch": 1.7784460739886299, "grad_norm": 0.9841082692146301, "learning_rate": 2.5737897230709622e-06, "loss": 1.4961, "mean_token_accuracy": 0.6657343481977781, "num_tokens": 2716004267.0, "step": 16189 }, { "entropy": 1.7684976359208424, "epoch": 1.7785559309000027, "grad_norm": 0.8549887537956238, "learning_rate": 2.5732273057756552e-06, "loss": 1.446, "mean_token_accuracy": 0.6611962815125784, "num_tokens": 2716181603.0, "step": 16190 }, { "entropy": 1.7020417054494221, "epoch": 1.7786657878113756, "grad_norm": 0.5607102513313293, "learning_rate": 2.572665155183905e-06, "loss": 1.4124, "mean_token_accuracy": 0.6579537143309911, "num_tokens": 2716361068.0, "step": 16191 }, { "entropy": 1.6742089788119, "epoch": 1.7787756447227485, "grad_norm": 0.7444910407066345, "learning_rate": 2.5721032713135043e-06, "loss": 1.3866, "mean_token_accuracy": 0.6673271010319392, "num_tokens": 2716526072.0, "step": 16192 }, { "entropy": 1.759224534034729, "epoch": 1.7788855016341216, "grad_norm": 0.6558489799499512, "learning_rate": 2.5715416541822387e-06, "loss": 1.384, "mean_token_accuracy": 0.6560174822807312, "num_tokens": 2716699065.0, "step": 16193 }, { "entropy": 1.7571994364261627, "epoch": 1.7789953585454945, "grad_norm": 0.6524195075035095, "learning_rate": 2.570980303807881e-06, "loss": 1.3668, "mean_token_accuracy": 0.6584974030653635, "num_tokens": 2716857465.0, "step": 16194 }, { "entropy": 1.8164484004179637, "epoch": 1.7791052154568674, "grad_norm": 0.7619872689247131, "learning_rate": 2.570419220208199e-06, "loss": 1.3642, "mean_token_accuracy": 0.654331718881925, "num_tokens": 2716995467.0, "step": 16195 }, { "entropy": 1.7526369988918304, "epoch": 1.7792150723682405, "grad_norm": 0.6816757321357727, "learning_rate": 2.5698584034009504e-06, "loss": 1.3161, "mean_token_accuracy": 0.6580136120319366, "num_tokens": 2717136339.0, "step": 16196 }, { "entropy": 1.7177092730998993, "epoch": 1.7793249292796132, "grad_norm": 0.6499624848365784, "learning_rate": 2.5692978534038834e-06, "loss": 1.3675, "mean_token_accuracy": 0.6472986241181692, "num_tokens": 2717281863.0, "step": 16197 }, { "entropy": 1.7083185315132141, "epoch": 1.7794347861909863, "grad_norm": 0.6570821404457092, "learning_rate": 2.56873757023474e-06, "loss": 1.3565, "mean_token_accuracy": 0.6527627358833948, "num_tokens": 2717444596.0, "step": 16198 }, { "entropy": 1.7401759326457977, "epoch": 1.7795446431023592, "grad_norm": 0.7995166182518005, "learning_rate": 2.5681775539112554e-06, "loss": 1.4527, "mean_token_accuracy": 0.6470424781243006, "num_tokens": 2717671539.0, "step": 16199 }, { "entropy": 1.721009184916814, "epoch": 1.779654500013732, "grad_norm": 0.7379947900772095, "learning_rate": 2.5676178044511513e-06, "loss": 1.5224, "mean_token_accuracy": 0.6547667557994524, "num_tokens": 2717840082.0, "step": 16200 }, { "entropy": 1.7124258081118267, "epoch": 1.7797643569251052, "grad_norm": 0.5799604058265686, "learning_rate": 2.5670583218721422e-06, "loss": 1.4312, "mean_token_accuracy": 0.6489716867605845, "num_tokens": 2718026773.0, "step": 16201 }, { "entropy": 1.690716157356898, "epoch": 1.779874213836478, "grad_norm": 0.6450859904289246, "learning_rate": 2.566499106191939e-06, "loss": 1.5138, "mean_token_accuracy": 0.6407775630553564, "num_tokens": 2718200554.0, "step": 16202 }, { "entropy": 1.6798825959364574, "epoch": 1.779984070747851, "grad_norm": 0.68744957447052, "learning_rate": 2.5659401574282393e-06, "loss": 1.4299, "mean_token_accuracy": 0.6480642408132553, "num_tokens": 2718385213.0, "step": 16203 }, { "entropy": 1.7331880331039429, "epoch": 1.7800939276592238, "grad_norm": 0.6031718850135803, "learning_rate": 2.5653814755987314e-06, "loss": 1.5247, "mean_token_accuracy": 0.631999467809995, "num_tokens": 2718620759.0, "step": 16204 }, { "entropy": 1.6975124776363373, "epoch": 1.7802037845705967, "grad_norm": 0.6780478954315186, "learning_rate": 2.5648230607211e-06, "loss": 1.2644, "mean_token_accuracy": 0.6704892267783483, "num_tokens": 2718766277.0, "step": 16205 }, { "entropy": 1.678945968548457, "epoch": 1.7803136414819698, "grad_norm": 0.6999272704124451, "learning_rate": 2.564264912813017e-06, "loss": 1.359, "mean_token_accuracy": 0.6699830194314321, "num_tokens": 2718934488.0, "step": 16206 }, { "entropy": 1.7268775800863903, "epoch": 1.7804234983933427, "grad_norm": 0.7014032602310181, "learning_rate": 2.5637070318921488e-06, "loss": 1.3642, "mean_token_accuracy": 0.6547218362490336, "num_tokens": 2719076843.0, "step": 16207 }, { "entropy": 1.718455046415329, "epoch": 1.7805333553047156, "grad_norm": 0.6352714896202087, "learning_rate": 2.563149417976152e-06, "loss": 1.4188, "mean_token_accuracy": 0.6555627485116323, "num_tokens": 2719232421.0, "step": 16208 }, { "entropy": 1.7530864675839741, "epoch": 1.7806432122160887, "grad_norm": 0.6508417725563049, "learning_rate": 2.562592071082674e-06, "loss": 1.5481, "mean_token_accuracy": 0.6468537002801895, "num_tokens": 2719404790.0, "step": 16209 }, { "entropy": 1.6875406205654144, "epoch": 1.7807530691274613, "grad_norm": 0.7828112244606018, "learning_rate": 2.5620349912293543e-06, "loss": 1.4161, "mean_token_accuracy": 0.6764611254135767, "num_tokens": 2719529889.0, "step": 16210 }, { "entropy": 1.6766289969285328, "epoch": 1.7808629260388344, "grad_norm": 0.8107297420501709, "learning_rate": 2.5614781784338255e-06, "loss": 1.3208, "mean_token_accuracy": 0.6553743382294973, "num_tokens": 2719700245.0, "step": 16211 }, { "entropy": 1.672994703054428, "epoch": 1.7809727829502073, "grad_norm": 0.7123458981513977, "learning_rate": 2.560921632713711e-06, "loss": 1.3213, "mean_token_accuracy": 0.6707338194052378, "num_tokens": 2719870654.0, "step": 16212 }, { "entropy": 1.6604451934496562, "epoch": 1.7810826398615802, "grad_norm": 0.7824660539627075, "learning_rate": 2.5603653540866226e-06, "loss": 1.5431, "mean_token_accuracy": 0.6393003712097803, "num_tokens": 2720059793.0, "step": 16213 }, { "entropy": 1.69284787774086, "epoch": 1.7811924967729533, "grad_norm": 0.6443684101104736, "learning_rate": 2.559809342570168e-06, "loss": 1.2632, "mean_token_accuracy": 0.6757178753614426, "num_tokens": 2720222052.0, "step": 16214 }, { "entropy": 1.727107326189677, "epoch": 1.7813023536843262, "grad_norm": 0.6985810995101929, "learning_rate": 2.5592535981819455e-06, "loss": 1.4631, "mean_token_accuracy": 0.6416812141736349, "num_tokens": 2720394912.0, "step": 16215 }, { "entropy": 1.7403022348880768, "epoch": 1.781412210595699, "grad_norm": 0.6760064959526062, "learning_rate": 2.5586981209395414e-06, "loss": 1.4851, "mean_token_accuracy": 0.6429832726716995, "num_tokens": 2720559240.0, "step": 16216 }, { "entropy": 1.732146809498469, "epoch": 1.7815220675070722, "grad_norm": 0.6863375902175903, "learning_rate": 2.5581429108605394e-06, "loss": 1.4375, "mean_token_accuracy": 0.6447567095359167, "num_tokens": 2720745800.0, "step": 16217 }, { "entropy": 1.6957339843114216, "epoch": 1.7816319244184449, "grad_norm": 0.6518421769142151, "learning_rate": 2.557587967962509e-06, "loss": 1.361, "mean_token_accuracy": 0.6534071415662766, "num_tokens": 2720895109.0, "step": 16218 }, { "entropy": 1.7050747672716777, "epoch": 1.781741781329818, "grad_norm": 0.6569792032241821, "learning_rate": 2.5570332922630163e-06, "loss": 1.2822, "mean_token_accuracy": 0.6715095390876135, "num_tokens": 2721024272.0, "step": 16219 }, { "entropy": 1.679296483596166, "epoch": 1.7818516382411909, "grad_norm": 0.7886082530021667, "learning_rate": 2.5564788837796156e-06, "loss": 1.4813, "mean_token_accuracy": 0.6585791359345118, "num_tokens": 2721227239.0, "step": 16220 }, { "entropy": 1.73756409684817, "epoch": 1.7819614951525637, "grad_norm": 0.7555798888206482, "learning_rate": 2.5559247425298523e-06, "loss": 1.3367, "mean_token_accuracy": 0.6579089959462484, "num_tokens": 2721379504.0, "step": 16221 }, { "entropy": 1.7351977328459423, "epoch": 1.7820713520639369, "grad_norm": 0.6866292953491211, "learning_rate": 2.5553708685312658e-06, "loss": 1.3398, "mean_token_accuracy": 0.6643187751372656, "num_tokens": 2721521980.0, "step": 16222 }, { "entropy": 1.670342117547989, "epoch": 1.7821812089753095, "grad_norm": 0.5937049984931946, "learning_rate": 2.554817261801387e-06, "loss": 1.3746, "mean_token_accuracy": 0.6563832610845566, "num_tokens": 2721676655.0, "step": 16223 }, { "entropy": 1.674795150756836, "epoch": 1.7822910658866826, "grad_norm": 0.6507994532585144, "learning_rate": 2.554263922357737e-06, "loss": 1.4518, "mean_token_accuracy": 0.6518680403629938, "num_tokens": 2721878324.0, "step": 16224 }, { "entropy": 1.7025028467178345, "epoch": 1.7824009227980555, "grad_norm": 0.7818902134895325, "learning_rate": 2.553710850217826e-06, "loss": 1.5391, "mean_token_accuracy": 0.636594370007515, "num_tokens": 2722086360.0, "step": 16225 }, { "entropy": 1.7275918225447338, "epoch": 1.7825107797094284, "grad_norm": 0.6679732203483582, "learning_rate": 2.5531580453991627e-06, "loss": 1.3366, "mean_token_accuracy": 0.6550202568372091, "num_tokens": 2722217001.0, "step": 16226 }, { "entropy": 1.694819023211797, "epoch": 1.7826206366208015, "grad_norm": 0.7464036345481873, "learning_rate": 2.5526055079192413e-06, "loss": 1.5109, "mean_token_accuracy": 0.6589773992697397, "num_tokens": 2722371256.0, "step": 16227 }, { "entropy": 1.681807627280553, "epoch": 1.7827304935321744, "grad_norm": 0.6635357737541199, "learning_rate": 2.5520532377955467e-06, "loss": 1.3098, "mean_token_accuracy": 0.6588182846705118, "num_tokens": 2722509673.0, "step": 16228 }, { "entropy": 1.680689126253128, "epoch": 1.7828403504435473, "grad_norm": 0.6713885068893433, "learning_rate": 2.551501235045562e-06, "loss": 1.3095, "mean_token_accuracy": 0.6868884414434433, "num_tokens": 2722686390.0, "step": 16229 }, { "entropy": 1.712468832731247, "epoch": 1.7829502073549204, "grad_norm": 0.7904059886932373, "learning_rate": 2.5509494996867558e-06, "loss": 1.5056, "mean_token_accuracy": 0.6609023263057073, "num_tokens": 2722817375.0, "step": 16230 }, { "entropy": 1.7211446662743886, "epoch": 1.783060064266293, "grad_norm": 0.6894172430038452, "learning_rate": 2.5503980317365908e-06, "loss": 1.3432, "mean_token_accuracy": 0.6689777423938116, "num_tokens": 2722981904.0, "step": 16231 }, { "entropy": 1.6683667202790577, "epoch": 1.7831699211776662, "grad_norm": 0.8465138673782349, "learning_rate": 2.549846831212521e-06, "loss": 1.3657, "mean_token_accuracy": 0.6582571069399515, "num_tokens": 2723174066.0, "step": 16232 }, { "entropy": 1.726008802652359, "epoch": 1.783279778089039, "grad_norm": 0.7869644165039062, "learning_rate": 2.5492958981319902e-06, "loss": 1.2813, "mean_token_accuracy": 0.6665952205657959, "num_tokens": 2723281291.0, "step": 16233 }, { "entropy": 1.7815176844596863, "epoch": 1.783389635000412, "grad_norm": 0.656838059425354, "learning_rate": 2.5487452325124363e-06, "loss": 1.4156, "mean_token_accuracy": 0.6533069312572479, "num_tokens": 2723471325.0, "step": 16234 }, { "entropy": 1.7248845597108204, "epoch": 1.783499491911785, "grad_norm": 0.9060506820678711, "learning_rate": 2.5481948343712885e-06, "loss": 1.4979, "mean_token_accuracy": 0.6565845509370168, "num_tokens": 2723632795.0, "step": 16235 }, { "entropy": 1.730120857556661, "epoch": 1.7836093488231577, "grad_norm": 0.7274516820907593, "learning_rate": 2.5476447037259666e-06, "loss": 1.3954, "mean_token_accuracy": 0.6534441063801447, "num_tokens": 2723764226.0, "step": 16236 }, { "entropy": 1.6996258199214935, "epoch": 1.7837192057345308, "grad_norm": 0.7300492525100708, "learning_rate": 2.547094840593879e-06, "loss": 1.3445, "mean_token_accuracy": 0.6685766031344732, "num_tokens": 2723901277.0, "step": 16237 }, { "entropy": 1.7311415870984395, "epoch": 1.7838290626459037, "grad_norm": 0.8246431946754456, "learning_rate": 2.546545244992432e-06, "loss": 1.2342, "mean_token_accuracy": 0.6768847008546194, "num_tokens": 2724036004.0, "step": 16238 }, { "entropy": 1.6984553039073944, "epoch": 1.7839389195572766, "grad_norm": 0.6190625429153442, "learning_rate": 2.5459959169390185e-06, "loss": 1.5376, "mean_token_accuracy": 0.6259044905503591, "num_tokens": 2724340416.0, "step": 16239 }, { "entropy": 1.7265506088733673, "epoch": 1.7840487764686497, "grad_norm": 0.6775219440460205, "learning_rate": 2.5454468564510242e-06, "loss": 1.4671, "mean_token_accuracy": 0.6378841251134872, "num_tokens": 2724522692.0, "step": 16240 }, { "entropy": 1.7654529015223186, "epoch": 1.7841586333800226, "grad_norm": 0.655631959438324, "learning_rate": 2.5448980635458287e-06, "loss": 1.416, "mean_token_accuracy": 0.6465630332628886, "num_tokens": 2724738322.0, "step": 16241 }, { "entropy": 1.6371107796827953, "epoch": 1.7842684902913954, "grad_norm": 0.6871931552886963, "learning_rate": 2.5443495382407973e-06, "loss": 1.4574, "mean_token_accuracy": 0.647613137960434, "num_tokens": 2724894550.0, "step": 16242 }, { "entropy": 1.6568027933438618, "epoch": 1.7843783472027686, "grad_norm": 0.6367573142051697, "learning_rate": 2.543801280553295e-06, "loss": 1.4055, "mean_token_accuracy": 0.653554563721021, "num_tokens": 2725067815.0, "step": 16243 }, { "entropy": 1.753188967704773, "epoch": 1.7844882041141412, "grad_norm": 0.705480694770813, "learning_rate": 2.5432532905006715e-06, "loss": 1.5104, "mean_token_accuracy": 0.631978377699852, "num_tokens": 2725269398.0, "step": 16244 }, { "entropy": 1.6898446877797444, "epoch": 1.7845980610255143, "grad_norm": 0.7416958212852478, "learning_rate": 2.542705568100268e-06, "loss": 1.3553, "mean_token_accuracy": 0.6741011242071787, "num_tokens": 2725414400.0, "step": 16245 }, { "entropy": 1.7214942475159962, "epoch": 1.7847079179368872, "grad_norm": 0.7140223979949951, "learning_rate": 2.542158113369424e-06, "loss": 1.3623, "mean_token_accuracy": 0.6528001030286154, "num_tokens": 2725550421.0, "step": 16246 }, { "entropy": 1.6600320835908253, "epoch": 1.78481777484826, "grad_norm": 0.8466951251029968, "learning_rate": 2.5416109263254656e-06, "loss": 1.3405, "mean_token_accuracy": 0.658960203329722, "num_tokens": 2725749641.0, "step": 16247 }, { "entropy": 1.7030630608399708, "epoch": 1.7849276317596332, "grad_norm": 0.8019019365310669, "learning_rate": 2.541064006985709e-06, "loss": 1.5304, "mean_token_accuracy": 0.6433060467243195, "num_tokens": 2725922079.0, "step": 16248 }, { "entropy": 1.7040532032648723, "epoch": 1.7850374886710059, "grad_norm": 0.7823516726493835, "learning_rate": 2.5405173553674662e-06, "loss": 1.2843, "mean_token_accuracy": 0.6735737522443136, "num_tokens": 2726058883.0, "step": 16249 }, { "entropy": 1.7228721876939137, "epoch": 1.785147345582379, "grad_norm": 0.7137507200241089, "learning_rate": 2.539970971488034e-06, "loss": 1.3681, "mean_token_accuracy": 0.6637583325306574, "num_tokens": 2726214542.0, "step": 16250 }, { "entropy": 1.6770942211151123, "epoch": 1.7852572024937519, "grad_norm": 0.6972078680992126, "learning_rate": 2.539424855364711e-06, "loss": 1.326, "mean_token_accuracy": 0.6649446338415146, "num_tokens": 2726375099.0, "step": 16251 }, { "entropy": 1.7118816177050273, "epoch": 1.7853670594051247, "grad_norm": 0.7081136107444763, "learning_rate": 2.5388790070147796e-06, "loss": 1.3891, "mean_token_accuracy": 0.6423606922229131, "num_tokens": 2726521694.0, "step": 16252 }, { "entropy": 1.6510530412197113, "epoch": 1.7854769163164979, "grad_norm": 0.7234501242637634, "learning_rate": 2.538333426455512e-06, "loss": 1.4314, "mean_token_accuracy": 0.6571709563334783, "num_tokens": 2726696075.0, "step": 16253 }, { "entropy": 1.7041932344436646, "epoch": 1.7855867732278707, "grad_norm": 0.7055428624153137, "learning_rate": 2.53778811370418e-06, "loss": 1.454, "mean_token_accuracy": 0.6542116304238638, "num_tokens": 2726861735.0, "step": 16254 }, { "entropy": 1.6798604428768158, "epoch": 1.7856966301392436, "grad_norm": 0.6403173208236694, "learning_rate": 2.5372430687780413e-06, "loss": 1.4092, "mean_token_accuracy": 0.6626434773206711, "num_tokens": 2727050120.0, "step": 16255 }, { "entropy": 1.658277968565623, "epoch": 1.7858064870506167, "grad_norm": 0.7245867848396301, "learning_rate": 2.536698291694346e-06, "loss": 1.456, "mean_token_accuracy": 0.6435498197873434, "num_tokens": 2727250402.0, "step": 16256 }, { "entropy": 1.7269805371761322, "epoch": 1.7859163439619894, "grad_norm": 0.7680160999298096, "learning_rate": 2.536153782470335e-06, "loss": 1.5174, "mean_token_accuracy": 0.6507440209388733, "num_tokens": 2727435782.0, "step": 16257 }, { "entropy": 1.6973803043365479, "epoch": 1.7860262008733625, "grad_norm": 0.6898791790008545, "learning_rate": 2.5356095411232455e-06, "loss": 1.3865, "mean_token_accuracy": 0.6631582975387573, "num_tokens": 2727603708.0, "step": 16258 }, { "entropy": 1.764186054468155, "epoch": 1.7861360577847354, "grad_norm": 0.8053025007247925, "learning_rate": 2.5350655676702985e-06, "loss": 1.4573, "mean_token_accuracy": 0.6421335885922114, "num_tokens": 2727784144.0, "step": 16259 }, { "entropy": 1.7275482614835103, "epoch": 1.7862459146961083, "grad_norm": 0.6696358323097229, "learning_rate": 2.534521862128711e-06, "loss": 1.2937, "mean_token_accuracy": 0.6718499114116033, "num_tokens": 2727904805.0, "step": 16260 }, { "entropy": 1.7152071297168732, "epoch": 1.7863557716074814, "grad_norm": 0.8419023156166077, "learning_rate": 2.5339784245156934e-06, "loss": 1.3275, "mean_token_accuracy": 0.664416715502739, "num_tokens": 2728057786.0, "step": 16261 }, { "entropy": 1.7445741693178813, "epoch": 1.786465628518854, "grad_norm": 0.6461774110794067, "learning_rate": 2.533435254848442e-06, "loss": 1.3029, "mean_token_accuracy": 0.6617392847935358, "num_tokens": 2728206231.0, "step": 16262 }, { "entropy": 1.7072357336680095, "epoch": 1.7865754854302272, "grad_norm": 0.7268346548080444, "learning_rate": 2.5328923531441506e-06, "loss": 1.4484, "mean_token_accuracy": 0.6492378860712051, "num_tokens": 2728381345.0, "step": 16263 }, { "entropy": 1.6935893793900807, "epoch": 1.7866853423416, "grad_norm": 0.6541410088539124, "learning_rate": 2.5323497194200025e-06, "loss": 1.3363, "mean_token_accuracy": 0.6590806543827057, "num_tokens": 2728559317.0, "step": 16264 }, { "entropy": 1.727291206518809, "epoch": 1.786795199252973, "grad_norm": 0.7337368726730347, "learning_rate": 2.5318073536931677e-06, "loss": 1.5537, "mean_token_accuracy": 0.6399403661489487, "num_tokens": 2728774789.0, "step": 16265 }, { "entropy": 1.6569550434748332, "epoch": 1.786905056164346, "grad_norm": 0.8272339701652527, "learning_rate": 2.5312652559808143e-06, "loss": 1.4112, "mean_token_accuracy": 0.646061177055041, "num_tokens": 2728980119.0, "step": 16266 }, { "entropy": 1.7381823460261028, "epoch": 1.787014913075719, "grad_norm": 0.7224423885345459, "learning_rate": 2.5307234263001006e-06, "loss": 1.2268, "mean_token_accuracy": 0.6785031110048294, "num_tokens": 2729121787.0, "step": 16267 }, { "entropy": 1.6357511182626088, "epoch": 1.7871247699870918, "grad_norm": 0.6711469888687134, "learning_rate": 2.530181864668174e-06, "loss": 1.4506, "mean_token_accuracy": 0.6360716919104258, "num_tokens": 2729314285.0, "step": 16268 }, { "entropy": 1.66348002354304, "epoch": 1.787234626898465, "grad_norm": 0.5813800692558289, "learning_rate": 2.5296405711021744e-06, "loss": 1.4608, "mean_token_accuracy": 0.6357814073562622, "num_tokens": 2729556544.0, "step": 16269 }, { "entropy": 1.6779274741808574, "epoch": 1.7873444838098376, "grad_norm": 0.7128428816795349, "learning_rate": 2.529099545619234e-06, "loss": 1.4014, "mean_token_accuracy": 0.6523097256819407, "num_tokens": 2729746524.0, "step": 16270 }, { "entropy": 1.6374227901299794, "epoch": 1.7874543407212107, "grad_norm": 0.6598563194274902, "learning_rate": 2.5285587882364766e-06, "loss": 1.3394, "mean_token_accuracy": 0.662226935227712, "num_tokens": 2729917624.0, "step": 16271 }, { "entropy": 1.6421111126740773, "epoch": 1.7875641976325836, "grad_norm": 0.6324965953826904, "learning_rate": 2.5280182989710143e-06, "loss": 1.4136, "mean_token_accuracy": 0.656077653169632, "num_tokens": 2730146475.0, "step": 16272 }, { "entropy": 1.6922315955162048, "epoch": 1.7876740545439564, "grad_norm": 0.5751784443855286, "learning_rate": 2.5274780778399576e-06, "loss": 1.4533, "mean_token_accuracy": 0.6490372568368912, "num_tokens": 2730347616.0, "step": 16273 }, { "entropy": 1.699767659107844, "epoch": 1.7877839114553296, "grad_norm": 0.6968252062797546, "learning_rate": 2.526938124860401e-06, "loss": 1.4884, "mean_token_accuracy": 0.6427743136882782, "num_tokens": 2730587146.0, "step": 16274 }, { "entropy": 1.72932164867719, "epoch": 1.7878937683667022, "grad_norm": 0.700259804725647, "learning_rate": 2.5263984400494353e-06, "loss": 1.1976, "mean_token_accuracy": 0.6866904695828756, "num_tokens": 2730734334.0, "step": 16275 }, { "entropy": 1.7005844314893086, "epoch": 1.7880036252780753, "grad_norm": 0.6235203742980957, "learning_rate": 2.52585902342414e-06, "loss": 1.3332, "mean_token_accuracy": 0.6622271637121836, "num_tokens": 2730889099.0, "step": 16276 }, { "entropy": 1.7240139146645863, "epoch": 1.7881134821894482, "grad_norm": 0.7013264894485474, "learning_rate": 2.525319875001587e-06, "loss": 1.3928, "mean_token_accuracy": 0.6499841312567393, "num_tokens": 2731097821.0, "step": 16277 }, { "entropy": 1.730222374200821, "epoch": 1.788223339100821, "grad_norm": 0.6136840581893921, "learning_rate": 2.5247809947988413e-06, "loss": 1.4699, "mean_token_accuracy": 0.6457250515619913, "num_tokens": 2731308470.0, "step": 16278 }, { "entropy": 1.7151026626427968, "epoch": 1.7883331960121942, "grad_norm": 0.6567912101745605, "learning_rate": 2.524242382832959e-06, "loss": 1.3497, "mean_token_accuracy": 0.6674692332744598, "num_tokens": 2731473920.0, "step": 16279 }, { "entropy": 1.7287200788656871, "epoch": 1.788443052923567, "grad_norm": 0.8798257112503052, "learning_rate": 2.5237040391209877e-06, "loss": 1.4002, "mean_token_accuracy": 0.6503605445226034, "num_tokens": 2731618031.0, "step": 16280 }, { "entropy": 1.7416976193586986, "epoch": 1.78855290983494, "grad_norm": 0.7769317030906677, "learning_rate": 2.523165963679961e-06, "loss": 1.6973, "mean_token_accuracy": 0.6211136281490326, "num_tokens": 2731779387.0, "step": 16281 }, { "entropy": 1.7118895947933197, "epoch": 1.788662766746313, "grad_norm": 0.6092858910560608, "learning_rate": 2.522628156526914e-06, "loss": 1.4178, "mean_token_accuracy": 0.6442497919003168, "num_tokens": 2731984238.0, "step": 16282 }, { "entropy": 1.712389588356018, "epoch": 1.7887726236576857, "grad_norm": 0.8901104927062988, "learning_rate": 2.5220906176788657e-06, "loss": 1.3029, "mean_token_accuracy": 0.6697202920913696, "num_tokens": 2732213159.0, "step": 16283 }, { "entropy": 1.6833104491233826, "epoch": 1.7888824805690589, "grad_norm": 0.6661498546600342, "learning_rate": 2.5215533471528276e-06, "loss": 1.2588, "mean_token_accuracy": 0.6872056176265081, "num_tokens": 2732337606.0, "step": 16284 }, { "entropy": 1.722004105647405, "epoch": 1.7889923374804317, "grad_norm": 0.6921575665473938, "learning_rate": 2.521016344965807e-06, "loss": 1.5717, "mean_token_accuracy": 0.6401575257380804, "num_tokens": 2732528822.0, "step": 16285 }, { "entropy": 1.6872912446657817, "epoch": 1.7891021943918046, "grad_norm": 0.6085355281829834, "learning_rate": 2.520479611134797e-06, "loss": 1.3896, "mean_token_accuracy": 0.6487388958533605, "num_tokens": 2732708924.0, "step": 16286 }, { "entropy": 1.7655263344446819, "epoch": 1.7892120513031777, "grad_norm": 0.7121375799179077, "learning_rate": 2.5199431456767877e-06, "loss": 1.3726, "mean_token_accuracy": 0.6477866073449453, "num_tokens": 2732847700.0, "step": 16287 }, { "entropy": 1.7201635142167409, "epoch": 1.7893219082145504, "grad_norm": 0.6995194554328918, "learning_rate": 2.5194069486087564e-06, "loss": 1.3661, "mean_token_accuracy": 0.6587748775879542, "num_tokens": 2732996658.0, "step": 16288 }, { "entropy": 1.7433823545773823, "epoch": 1.7894317651259235, "grad_norm": 0.6454122066497803, "learning_rate": 2.5188710199476725e-06, "loss": 1.497, "mean_token_accuracy": 0.6462214092413584, "num_tokens": 2733190258.0, "step": 16289 }, { "entropy": 1.7409153878688812, "epoch": 1.7895416220372964, "grad_norm": 0.7849988341331482, "learning_rate": 2.5183353597104994e-06, "loss": 1.433, "mean_token_accuracy": 0.6468554139137268, "num_tokens": 2733359931.0, "step": 16290 }, { "entropy": 1.6835778454939525, "epoch": 1.7896514789486693, "grad_norm": 0.6935192346572876, "learning_rate": 2.517799967914191e-06, "loss": 1.5234, "mean_token_accuracy": 0.6396876275539398, "num_tokens": 2733583935.0, "step": 16291 }, { "entropy": 1.6984424690405528, "epoch": 1.7897613358600424, "grad_norm": 0.6901172995567322, "learning_rate": 2.5172648445756927e-06, "loss": 1.2957, "mean_token_accuracy": 0.6744356652100881, "num_tokens": 2733713104.0, "step": 16292 }, { "entropy": 1.6622243821620941, "epoch": 1.7898711927714153, "grad_norm": 0.6113606095314026, "learning_rate": 2.516729989711937e-06, "loss": 1.2825, "mean_token_accuracy": 0.6713751504818598, "num_tokens": 2733886622.0, "step": 16293 }, { "entropy": 1.68569149573644, "epoch": 1.7899810496827882, "grad_norm": 0.6700574159622192, "learning_rate": 2.516195403339856e-06, "loss": 1.4043, "mean_token_accuracy": 0.6612590849399567, "num_tokens": 2734036231.0, "step": 16294 }, { "entropy": 1.7632472316424053, "epoch": 1.7900909065941613, "grad_norm": 0.8103067278862, "learning_rate": 2.515661085476368e-06, "loss": 1.4073, "mean_token_accuracy": 0.6506477644046148, "num_tokens": 2734170169.0, "step": 16295 }, { "entropy": 1.6857722500960033, "epoch": 1.790200763505534, "grad_norm": 0.6791203618049622, "learning_rate": 2.5151270361383816e-06, "loss": 1.4516, "mean_token_accuracy": 0.6414875040451685, "num_tokens": 2734366953.0, "step": 16296 }, { "entropy": 1.6843474805355072, "epoch": 1.790310620416907, "grad_norm": 0.733788013458252, "learning_rate": 2.5145932553428038e-06, "loss": 1.4301, "mean_token_accuracy": 0.653965026140213, "num_tokens": 2734526424.0, "step": 16297 }, { "entropy": 1.6595512131849925, "epoch": 1.79042047732828, "grad_norm": 0.7882452607154846, "learning_rate": 2.5140597431065233e-06, "loss": 1.4809, "mean_token_accuracy": 0.651551162203153, "num_tokens": 2734683617.0, "step": 16298 }, { "entropy": 1.6853315234184265, "epoch": 1.7905303342396528, "grad_norm": 0.6644918322563171, "learning_rate": 2.5135264994464294e-06, "loss": 1.3111, "mean_token_accuracy": 0.6654441605011622, "num_tokens": 2734825857.0, "step": 16299 }, { "entropy": 1.6577715078989665, "epoch": 1.790640191151026, "grad_norm": 0.5730201601982117, "learning_rate": 2.512993524379398e-06, "loss": 1.3078, "mean_token_accuracy": 0.6807037740945816, "num_tokens": 2734961414.0, "step": 16300 }, { "entropy": 1.7456890443960826, "epoch": 1.7907500480623986, "grad_norm": 0.8542404770851135, "learning_rate": 2.5124608179222958e-06, "loss": 1.44, "mean_token_accuracy": 0.6580932984749476, "num_tokens": 2735127869.0, "step": 16301 }, { "entropy": 1.6978826026121776, "epoch": 1.7908599049737717, "grad_norm": 0.6640202403068542, "learning_rate": 2.5119283800919853e-06, "loss": 1.4269, "mean_token_accuracy": 0.655546839038531, "num_tokens": 2735315439.0, "step": 16302 }, { "entropy": 1.7440852721532185, "epoch": 1.7909697618851446, "grad_norm": 0.7849541902542114, "learning_rate": 2.5113962109053162e-06, "loss": 1.2534, "mean_token_accuracy": 0.6781354149182638, "num_tokens": 2735430005.0, "step": 16303 }, { "entropy": 1.7063166797161102, "epoch": 1.7910796187965174, "grad_norm": 1.1036492586135864, "learning_rate": 2.5108643103791335e-06, "loss": 1.3421, "mean_token_accuracy": 0.6793977270523707, "num_tokens": 2735552067.0, "step": 16304 }, { "entropy": 1.6659158567587535, "epoch": 1.7911894757078906, "grad_norm": 0.6122180819511414, "learning_rate": 2.5103326785302677e-06, "loss": 1.3725, "mean_token_accuracy": 0.6636357257763544, "num_tokens": 2735731985.0, "step": 16305 }, { "entropy": 1.6837575336297352, "epoch": 1.7912993326192634, "grad_norm": 0.5877187848091125, "learning_rate": 2.5098013153755485e-06, "loss": 1.486, "mean_token_accuracy": 0.6384171495834986, "num_tokens": 2735901634.0, "step": 16306 }, { "entropy": 1.6770992676417034, "epoch": 1.7914091895306363, "grad_norm": 0.7175406217575073, "learning_rate": 2.509270220931792e-06, "loss": 1.2945, "mean_token_accuracy": 0.6702460249265035, "num_tokens": 2736073169.0, "step": 16307 }, { "entropy": 1.7309307356675465, "epoch": 1.7915190464420094, "grad_norm": 0.7163530588150024, "learning_rate": 2.5087393952158063e-06, "loss": 1.3491, "mean_token_accuracy": 0.6687060197194418, "num_tokens": 2736199549.0, "step": 16308 }, { "entropy": 1.7686155239741008, "epoch": 1.791628903353382, "grad_norm": 0.7394537925720215, "learning_rate": 2.5082088382443936e-06, "loss": 1.4027, "mean_token_accuracy": 0.6433225274085999, "num_tokens": 2736375818.0, "step": 16309 }, { "entropy": 1.6461839079856873, "epoch": 1.7917387602647552, "grad_norm": 0.6997315287590027, "learning_rate": 2.5076785500343426e-06, "loss": 1.3915, "mean_token_accuracy": 0.6605212738116583, "num_tokens": 2736580721.0, "step": 16310 }, { "entropy": 1.7693572044372559, "epoch": 1.791848617176128, "grad_norm": 0.7545243501663208, "learning_rate": 2.5071485306024405e-06, "loss": 1.5297, "mean_token_accuracy": 0.6341488460699717, "num_tokens": 2736747341.0, "step": 16311 }, { "entropy": 1.6597660581270854, "epoch": 1.791958474087501, "grad_norm": 0.6617560386657715, "learning_rate": 2.5066187799654608e-06, "loss": 1.2636, "mean_token_accuracy": 0.6663326869408289, "num_tokens": 2736877606.0, "step": 16312 }, { "entropy": 1.5883537034193675, "epoch": 1.792068330998874, "grad_norm": 0.6924055218696594, "learning_rate": 2.506089298140168e-06, "loss": 1.4102, "mean_token_accuracy": 0.6675305167833964, "num_tokens": 2737039683.0, "step": 16313 }, { "entropy": 1.7212806940078735, "epoch": 1.7921781879102467, "grad_norm": 0.7379174828529358, "learning_rate": 2.5055600851433228e-06, "loss": 1.4748, "mean_token_accuracy": 0.6568605154752731, "num_tokens": 2737179397.0, "step": 16314 }, { "entropy": 1.7149664461612701, "epoch": 1.7922880448216199, "grad_norm": 0.6748194694519043, "learning_rate": 2.5050311409916715e-06, "loss": 1.3384, "mean_token_accuracy": 0.6551110148429871, "num_tokens": 2737319398.0, "step": 16315 }, { "entropy": 1.7069288392861683, "epoch": 1.7923979017329927, "grad_norm": 0.6862152814865112, "learning_rate": 2.5045024657019585e-06, "loss": 1.3491, "mean_token_accuracy": 0.6714658091465632, "num_tokens": 2737454204.0, "step": 16316 }, { "entropy": 1.7472139199574788, "epoch": 1.7925077586443656, "grad_norm": 0.7499648928642273, "learning_rate": 2.503974059290914e-06, "loss": 1.5385, "mean_token_accuracy": 0.6405575921138128, "num_tokens": 2737595817.0, "step": 16317 }, { "entropy": 1.6611140767733257, "epoch": 1.7926176155557387, "grad_norm": 0.9254728555679321, "learning_rate": 2.503445921775261e-06, "loss": 1.4631, "mean_token_accuracy": 0.6541319787502289, "num_tokens": 2737734944.0, "step": 16318 }, { "entropy": 1.7104494671026866, "epoch": 1.7927274724671116, "grad_norm": 0.6272209882736206, "learning_rate": 2.5029180531717172e-06, "loss": 1.36, "mean_token_accuracy": 0.6594759970903397, "num_tokens": 2737888914.0, "step": 16319 }, { "entropy": 1.6646449665228527, "epoch": 1.7928373293784845, "grad_norm": 0.7363027930259705, "learning_rate": 2.5023904534969885e-06, "loss": 1.4083, "mean_token_accuracy": 0.6451329837242762, "num_tokens": 2738084677.0, "step": 16320 }, { "entropy": 1.693971465031306, "epoch": 1.7929471862898576, "grad_norm": 0.8376074433326721, "learning_rate": 2.50186312276777e-06, "loss": 1.4147, "mean_token_accuracy": 0.6637802918752035, "num_tokens": 2738224943.0, "step": 16321 }, { "entropy": 1.6223762234052022, "epoch": 1.7930570432012303, "grad_norm": 0.7268716096878052, "learning_rate": 2.5013360610007555e-06, "loss": 1.3308, "mean_token_accuracy": 0.6813297122716904, "num_tokens": 2738430126.0, "step": 16322 }, { "entropy": 1.7354827622572582, "epoch": 1.7931669001126034, "grad_norm": 0.7839182615280151, "learning_rate": 2.500809268212626e-06, "loss": 1.3441, "mean_token_accuracy": 0.6678336064020792, "num_tokens": 2738574057.0, "step": 16323 }, { "entropy": 1.7117115159829457, "epoch": 1.7932767570239763, "grad_norm": 0.7276841998100281, "learning_rate": 2.5002827444200543e-06, "loss": 1.4605, "mean_token_accuracy": 0.6557506322860718, "num_tokens": 2738725295.0, "step": 16324 }, { "entropy": 1.6508471469084423, "epoch": 1.7933866139353491, "grad_norm": 0.7069340944290161, "learning_rate": 2.4997564896397015e-06, "loss": 1.3199, "mean_token_accuracy": 0.6781076391537985, "num_tokens": 2738874111.0, "step": 16325 }, { "entropy": 1.6659562587738037, "epoch": 1.7934964708467223, "grad_norm": 0.7840026617050171, "learning_rate": 2.4992305038882266e-06, "loss": 1.3742, "mean_token_accuracy": 0.6697394450505575, "num_tokens": 2739010468.0, "step": 16326 }, { "entropy": 1.7727423111597698, "epoch": 1.793606327758095, "grad_norm": 1.009763240814209, "learning_rate": 2.4987047871822756e-06, "loss": 1.4287, "mean_token_accuracy": 0.6506382723649343, "num_tokens": 2739200522.0, "step": 16327 }, { "entropy": 1.6970455447832744, "epoch": 1.793716184669468, "grad_norm": 0.6676150560379028, "learning_rate": 2.498179339538487e-06, "loss": 1.4482, "mean_token_accuracy": 0.6354714632034302, "num_tokens": 2739401660.0, "step": 16328 }, { "entropy": 1.6591151058673859, "epoch": 1.793826041580841, "grad_norm": 0.8759251832962036, "learning_rate": 2.497654160973493e-06, "loss": 1.4139, "mean_token_accuracy": 0.6550566603740057, "num_tokens": 2739573508.0, "step": 16329 }, { "entropy": 1.7104254464308422, "epoch": 1.7939358984922138, "grad_norm": 0.6900741457939148, "learning_rate": 2.4971292515039106e-06, "loss": 1.4752, "mean_token_accuracy": 0.6559490313132604, "num_tokens": 2739757627.0, "step": 16330 }, { "entropy": 1.6923480729262035, "epoch": 1.794045755403587, "grad_norm": 0.7489110231399536, "learning_rate": 2.496604611146358e-06, "loss": 1.3643, "mean_token_accuracy": 0.6735963573058447, "num_tokens": 2739932452.0, "step": 16331 }, { "entropy": 1.6965441604455311, "epoch": 1.7941556123149598, "grad_norm": 0.7722020149230957, "learning_rate": 2.4960802399174376e-06, "loss": 1.1919, "mean_token_accuracy": 0.6849365482727686, "num_tokens": 2740045892.0, "step": 16332 }, { "entropy": 1.7439928154150646, "epoch": 1.7942654692263327, "grad_norm": 0.6960392594337463, "learning_rate": 2.4955561378337446e-06, "loss": 1.4115, "mean_token_accuracy": 0.6489661236604055, "num_tokens": 2740166797.0, "step": 16333 }, { "entropy": 1.6369553208351135, "epoch": 1.7943753261377058, "grad_norm": 0.5748756527900696, "learning_rate": 2.4950323049118684e-06, "loss": 1.3669, "mean_token_accuracy": 0.6603866517543793, "num_tokens": 2740395498.0, "step": 16334 }, { "entropy": 1.6582307914892833, "epoch": 1.7944851830490784, "grad_norm": 0.8305114507675171, "learning_rate": 2.494508741168388e-06, "loss": 1.414, "mean_token_accuracy": 0.6532878627379736, "num_tokens": 2740586934.0, "step": 16335 }, { "entropy": 1.7147388954957326, "epoch": 1.7945950399604516, "grad_norm": 0.7109110355377197, "learning_rate": 2.493985446619872e-06, "loss": 1.5826, "mean_token_accuracy": 0.6403073569138845, "num_tokens": 2740834396.0, "step": 16336 }, { "entropy": 1.6089663604895275, "epoch": 1.7947048968718244, "grad_norm": 0.6663435101509094, "learning_rate": 2.493462421282884e-06, "loss": 1.3889, "mean_token_accuracy": 0.6556515793005625, "num_tokens": 2741006779.0, "step": 16337 }, { "entropy": 1.676442285378774, "epoch": 1.7948147537831973, "grad_norm": 0.6819809675216675, "learning_rate": 2.4929396651739773e-06, "loss": 1.3259, "mean_token_accuracy": 0.6694385011990865, "num_tokens": 2741114771.0, "step": 16338 }, { "entropy": 1.6962731381257374, "epoch": 1.7949246106945704, "grad_norm": 0.7554741501808167, "learning_rate": 2.492417178309697e-06, "loss": 1.3638, "mean_token_accuracy": 0.659926618138949, "num_tokens": 2741261871.0, "step": 16339 }, { "entropy": 1.6720819075902302, "epoch": 1.795034467605943, "grad_norm": 0.6029784083366394, "learning_rate": 2.491894960706579e-06, "loss": 1.3433, "mean_token_accuracy": 0.665259430805842, "num_tokens": 2741445707.0, "step": 16340 }, { "entropy": 1.7332893908023834, "epoch": 1.7951443245173162, "grad_norm": 0.6843107342720032, "learning_rate": 2.4913730123811525e-06, "loss": 1.5745, "mean_token_accuracy": 0.6281691541274389, "num_tokens": 2741632351.0, "step": 16341 }, { "entropy": 1.6098364094893138, "epoch": 1.795254181428689, "grad_norm": 0.6576522588729858, "learning_rate": 2.4908513333499353e-06, "loss": 1.1807, "mean_token_accuracy": 0.6885288804769516, "num_tokens": 2741787954.0, "step": 16342 }, { "entropy": 1.7071658372879028, "epoch": 1.795364038340062, "grad_norm": 0.6134920716285706, "learning_rate": 2.4903299236294394e-06, "loss": 1.4462, "mean_token_accuracy": 0.6478994737068812, "num_tokens": 2741949788.0, "step": 16343 }, { "entropy": 1.7075544893741608, "epoch": 1.795473895251435, "grad_norm": 0.713398814201355, "learning_rate": 2.489808783236168e-06, "loss": 1.3601, "mean_token_accuracy": 0.6593527148167292, "num_tokens": 2742099407.0, "step": 16344 }, { "entropy": 1.6858652830123901, "epoch": 1.795583752162808, "grad_norm": 0.8232229351997375, "learning_rate": 2.4892879121866113e-06, "loss": 1.2947, "mean_token_accuracy": 0.667354146639506, "num_tokens": 2742230304.0, "step": 16345 }, { "entropy": 1.6845860878626506, "epoch": 1.7956936090741809, "grad_norm": 0.7478837966918945, "learning_rate": 2.4887673104972583e-06, "loss": 1.2776, "mean_token_accuracy": 0.6781817525625229, "num_tokens": 2742369127.0, "step": 16346 }, { "entropy": 1.7888353765010834, "epoch": 1.795803465985554, "grad_norm": 0.6277621984481812, "learning_rate": 2.4882469781845847e-06, "loss": 1.4691, "mean_token_accuracy": 0.6437779317299525, "num_tokens": 2742578400.0, "step": 16347 }, { "entropy": 1.755648523569107, "epoch": 1.7959133228969266, "grad_norm": 0.6520666480064392, "learning_rate": 2.4877269152650597e-06, "loss": 1.3963, "mean_token_accuracy": 0.6472931802272797, "num_tokens": 2742753222.0, "step": 16348 }, { "entropy": 1.733013888200124, "epoch": 1.7960231798082997, "grad_norm": 0.7249704599380493, "learning_rate": 2.4872071217551404e-06, "loss": 1.4501, "mean_token_accuracy": 0.6556122601032257, "num_tokens": 2742928454.0, "step": 16349 }, { "entropy": 1.7438992460568745, "epoch": 1.7961330367196726, "grad_norm": 0.6250995397567749, "learning_rate": 2.4866875976712813e-06, "loss": 1.4395, "mean_token_accuracy": 0.6565362215042114, "num_tokens": 2743122316.0, "step": 16350 }, { "entropy": 1.620346486568451, "epoch": 1.7962428936310455, "grad_norm": 0.673563539981842, "learning_rate": 2.4861683430299236e-06, "loss": 1.4165, "mean_token_accuracy": 0.6502549201250076, "num_tokens": 2743314494.0, "step": 16351 }, { "entropy": 1.7250482241312664, "epoch": 1.7963527505424186, "grad_norm": 0.7625929117202759, "learning_rate": 2.4856493578475003e-06, "loss": 1.4833, "mean_token_accuracy": 0.6498374988635381, "num_tokens": 2743438742.0, "step": 16352 }, { "entropy": 1.7725351254145305, "epoch": 1.7964626074537913, "grad_norm": 0.7012255191802979, "learning_rate": 2.485130642140439e-06, "loss": 1.3361, "mean_token_accuracy": 0.6614055832227071, "num_tokens": 2743573991.0, "step": 16353 }, { "entropy": 1.677201271057129, "epoch": 1.7965724643651644, "grad_norm": 0.7226030230522156, "learning_rate": 2.484612195925154e-06, "loss": 1.3256, "mean_token_accuracy": 0.665013333161672, "num_tokens": 2743742342.0, "step": 16354 }, { "entropy": 1.7017574906349182, "epoch": 1.7966823212765373, "grad_norm": 0.6619887948036194, "learning_rate": 2.4840940192180585e-06, "loss": 1.4644, "mean_token_accuracy": 0.6368465920289358, "num_tokens": 2743926810.0, "step": 16355 }, { "entropy": 1.762619137763977, "epoch": 1.7967921781879101, "grad_norm": 1.3564014434814453, "learning_rate": 2.4835761120355495e-06, "loss": 1.2873, "mean_token_accuracy": 0.669828325510025, "num_tokens": 2744051036.0, "step": 16356 }, { "entropy": 1.7027521828810375, "epoch": 1.7969020350992833, "grad_norm": 0.597287654876709, "learning_rate": 2.4830584743940176e-06, "loss": 1.4155, "mean_token_accuracy": 0.6454381992419561, "num_tokens": 2744217006.0, "step": 16357 }, { "entropy": 1.7219856878121693, "epoch": 1.7970118920106561, "grad_norm": 0.6755548119544983, "learning_rate": 2.4825411063098465e-06, "loss": 1.5516, "mean_token_accuracy": 0.6386887629826864, "num_tokens": 2744493689.0, "step": 16358 }, { "entropy": 1.7195583780606587, "epoch": 1.797121748922029, "grad_norm": 0.7201851010322571, "learning_rate": 2.482024007799414e-06, "loss": 1.3217, "mean_token_accuracy": 0.661146675546964, "num_tokens": 2744632436.0, "step": 16359 }, { "entropy": 1.6850773394107819, "epoch": 1.7972316058334021, "grad_norm": 0.5701948404312134, "learning_rate": 2.4815071788790824e-06, "loss": 1.3307, "mean_token_accuracy": 0.6748195836941401, "num_tokens": 2744877316.0, "step": 16360 }, { "entropy": 1.7464614311854045, "epoch": 1.7973414627447748, "grad_norm": 0.8613492250442505, "learning_rate": 2.480990619565209e-06, "loss": 1.4267, "mean_token_accuracy": 0.6546533902486166, "num_tokens": 2745013143.0, "step": 16361 }, { "entropy": 1.738052507241567, "epoch": 1.797451319656148, "grad_norm": 0.6792759895324707, "learning_rate": 2.480474329874146e-06, "loss": 1.4118, "mean_token_accuracy": 0.657256638010343, "num_tokens": 2745174814.0, "step": 16362 }, { "entropy": 1.6887112458546956, "epoch": 1.7975611765675208, "grad_norm": 0.6691803932189941, "learning_rate": 2.4799583098222295e-06, "loss": 1.4631, "mean_token_accuracy": 0.6501191159089407, "num_tokens": 2745325641.0, "step": 16363 }, { "entropy": 1.7012372314929962, "epoch": 1.7976710334788937, "grad_norm": 0.6485432386398315, "learning_rate": 2.479442559425793e-06, "loss": 1.2735, "mean_token_accuracy": 0.6670918663342794, "num_tokens": 2745439047.0, "step": 16364 }, { "entropy": 1.7309893469015758, "epoch": 1.7977808903902668, "grad_norm": 0.8326260447502136, "learning_rate": 2.4789270787011615e-06, "loss": 1.3052, "mean_token_accuracy": 0.6672724187374115, "num_tokens": 2745546360.0, "step": 16365 }, { "entropy": 1.7189862628777821, "epoch": 1.7978907473016397, "grad_norm": 0.7580441236495972, "learning_rate": 2.4784118676646467e-06, "loss": 1.3881, "mean_token_accuracy": 0.6678448468446732, "num_tokens": 2745732348.0, "step": 16366 }, { "entropy": 1.6766027708848317, "epoch": 1.7980006042130126, "grad_norm": 0.6480311155319214, "learning_rate": 2.477896926332558e-06, "loss": 1.4681, "mean_token_accuracy": 0.6425887246926626, "num_tokens": 2745921764.0, "step": 16367 }, { "entropy": 1.6788609822591145, "epoch": 1.7981104611243854, "grad_norm": 0.6684360504150391, "learning_rate": 2.477382254721191e-06, "loss": 1.4321, "mean_token_accuracy": 0.6493734816710154, "num_tokens": 2746126329.0, "step": 16368 }, { "entropy": 1.766765018304189, "epoch": 1.7982203180357583, "grad_norm": 0.784034788608551, "learning_rate": 2.4768678528468345e-06, "loss": 1.4098, "mean_token_accuracy": 0.6438094178835551, "num_tokens": 2746279905.0, "step": 16369 }, { "entropy": 1.696258048216502, "epoch": 1.7983301749471314, "grad_norm": 0.6454617977142334, "learning_rate": 2.476353720725771e-06, "loss": 1.3864, "mean_token_accuracy": 0.6533452222744623, "num_tokens": 2746468173.0, "step": 16370 }, { "entropy": 1.684233695268631, "epoch": 1.7984400318585043, "grad_norm": 0.8708049654960632, "learning_rate": 2.475839858374269e-06, "loss": 1.3214, "mean_token_accuracy": 0.6717989295721054, "num_tokens": 2746606416.0, "step": 16371 }, { "entropy": 1.6811311642328899, "epoch": 1.7985498887698772, "grad_norm": 0.5873830914497375, "learning_rate": 2.475326265808597e-06, "loss": 1.3903, "mean_token_accuracy": 0.6625532309214274, "num_tokens": 2746825476.0, "step": 16372 }, { "entropy": 1.6943085193634033, "epoch": 1.7986597456812503, "grad_norm": 0.6107808351516724, "learning_rate": 2.474812943045007e-06, "loss": 1.3958, "mean_token_accuracy": 0.6486289997895559, "num_tokens": 2747042577.0, "step": 16373 }, { "entropy": 1.6802096863587697, "epoch": 1.798769602592623, "grad_norm": 0.6949267387390137, "learning_rate": 2.474299890099744e-06, "loss": 1.3366, "mean_token_accuracy": 0.6724252700805664, "num_tokens": 2747184615.0, "step": 16374 }, { "entropy": 1.7157519956429799, "epoch": 1.798879459503996, "grad_norm": 0.7292264103889465, "learning_rate": 2.47378710698905e-06, "loss": 1.2698, "mean_token_accuracy": 0.671657994389534, "num_tokens": 2747306579.0, "step": 16375 }, { "entropy": 1.7131900389989216, "epoch": 1.798989316415369, "grad_norm": 0.750167727470398, "learning_rate": 2.4732745937291515e-06, "loss": 1.3823, "mean_token_accuracy": 0.6537191818157831, "num_tokens": 2747456528.0, "step": 16376 }, { "entropy": 1.6936882932980855, "epoch": 1.7990991733267419, "grad_norm": 0.709400475025177, "learning_rate": 2.4727623503362686e-06, "loss": 1.3456, "mean_token_accuracy": 0.6595764954884847, "num_tokens": 2747582462.0, "step": 16377 }, { "entropy": 1.7834466397762299, "epoch": 1.799209030238115, "grad_norm": 0.7460691928863525, "learning_rate": 2.4722503768266144e-06, "loss": 1.4517, "mean_token_accuracy": 0.6345730274915695, "num_tokens": 2747760658.0, "step": 16378 }, { "entropy": 1.6861862341562908, "epoch": 1.7993188871494878, "grad_norm": 0.6694313287734985, "learning_rate": 2.4717386732163953e-06, "loss": 1.3049, "mean_token_accuracy": 0.66120112935702, "num_tokens": 2747884938.0, "step": 16379 }, { "entropy": 1.715090274810791, "epoch": 1.7994287440608607, "grad_norm": 0.6291208267211914, "learning_rate": 2.471227239521804e-06, "loss": 1.3891, "mean_token_accuracy": 0.6506547033786774, "num_tokens": 2748086682.0, "step": 16380 }, { "entropy": 1.6372637848059337, "epoch": 1.7995386009722336, "grad_norm": 0.6571980714797974, "learning_rate": 2.4707160757590253e-06, "loss": 1.2591, "mean_token_accuracy": 0.6797957370678583, "num_tokens": 2748239953.0, "step": 16381 }, { "entropy": 1.7329098383585613, "epoch": 1.7996484578836065, "grad_norm": 0.67576664686203, "learning_rate": 2.470205181944242e-06, "loss": 1.6271, "mean_token_accuracy": 0.6063709209362665, "num_tokens": 2748496615.0, "step": 16382 }, { "entropy": 1.7593752145767212, "epoch": 1.7997583147949796, "grad_norm": 0.6144885420799255, "learning_rate": 2.469694558093618e-06, "loss": 1.4584, "mean_token_accuracy": 0.6472984254360199, "num_tokens": 2748757893.0, "step": 16383 }, { "entropy": 1.760807067155838, "epoch": 1.7998681717063525, "grad_norm": 109.00904083251953, "learning_rate": 2.469184204223321e-06, "loss": 1.6694, "mean_token_accuracy": 0.6406611104806265, "num_tokens": 2748996769.0, "step": 16384 }, { "entropy": 1.6700663566589355, "epoch": 1.7999780286177254, "grad_norm": 0.6122145652770996, "learning_rate": 2.4686741203494976e-06, "loss": 1.339, "mean_token_accuracy": 0.6703683187564214, "num_tokens": 2749191720.0, "step": 16385 }, { "entropy": 1.6805997391541798, "epoch": 1.8000878855290985, "grad_norm": 0.6632294058799744, "learning_rate": 2.468164306488295e-06, "loss": 1.3269, "mean_token_accuracy": 0.6549940158923467, "num_tokens": 2749342736.0, "step": 16386 }, { "entropy": 1.7235978146394093, "epoch": 1.8001977424404711, "grad_norm": 0.6988422870635986, "learning_rate": 2.467654762655847e-06, "loss": 1.3662, "mean_token_accuracy": 0.6608254263798395, "num_tokens": 2749463576.0, "step": 16387 }, { "entropy": 1.73094642162323, "epoch": 1.8003075993518443, "grad_norm": 0.7575457096099854, "learning_rate": 2.467145488868281e-06, "loss": 1.4601, "mean_token_accuracy": 0.6553111871083578, "num_tokens": 2749630135.0, "step": 16388 }, { "entropy": 1.700294444958369, "epoch": 1.8004174562632171, "grad_norm": 0.6070172190666199, "learning_rate": 2.4666364851417153e-06, "loss": 1.5017, "mean_token_accuracy": 0.6433312793572744, "num_tokens": 2749851486.0, "step": 16389 }, { "entropy": 1.7130950689315796, "epoch": 1.80052731317459, "grad_norm": 0.7260795831680298, "learning_rate": 2.4661277514922587e-06, "loss": 1.3681, "mean_token_accuracy": 0.650189533829689, "num_tokens": 2750035261.0, "step": 16390 }, { "entropy": 1.700755516688029, "epoch": 1.8006371700859631, "grad_norm": 0.7316020131111145, "learning_rate": 2.4656192879360145e-06, "loss": 1.4561, "mean_token_accuracy": 0.6599542399247488, "num_tokens": 2750188972.0, "step": 16391 }, { "entropy": 1.6893901228904724, "epoch": 1.800747026997336, "grad_norm": 0.7152737975120544, "learning_rate": 2.465111094489074e-06, "loss": 1.2815, "mean_token_accuracy": 0.6717756688594818, "num_tokens": 2750332850.0, "step": 16392 }, { "entropy": 1.7034862637519836, "epoch": 1.800856883908709, "grad_norm": 0.6364946365356445, "learning_rate": 2.464603171167521e-06, "loss": 1.4426, "mean_token_accuracy": 0.6520007997751236, "num_tokens": 2750560589.0, "step": 16393 }, { "entropy": 1.6827989121278126, "epoch": 1.8009667408200818, "grad_norm": 0.6871801614761353, "learning_rate": 2.4640955179874333e-06, "loss": 1.2716, "mean_token_accuracy": 0.6799880017836889, "num_tokens": 2750726349.0, "step": 16394 }, { "entropy": 1.729516049226125, "epoch": 1.8010765977314547, "grad_norm": 0.7461774349212646, "learning_rate": 2.4635881349648734e-06, "loss": 1.4294, "mean_token_accuracy": 0.6584520041942596, "num_tokens": 2750898613.0, "step": 16395 }, { "entropy": 1.6981934209664662, "epoch": 1.8011864546428278, "grad_norm": 0.6527087688446045, "learning_rate": 2.4630810221159043e-06, "loss": 1.3578, "mean_token_accuracy": 0.6532334089279175, "num_tokens": 2751050794.0, "step": 16396 }, { "entropy": 1.7621082564194996, "epoch": 1.8012963115542007, "grad_norm": 0.6884635090827942, "learning_rate": 2.462574179456574e-06, "loss": 1.4785, "mean_token_accuracy": 0.6381318867206573, "num_tokens": 2751240455.0, "step": 16397 }, { "entropy": 1.6867110133171082, "epoch": 1.8014061684655736, "grad_norm": 0.5808276534080505, "learning_rate": 2.4620676070029223e-06, "loss": 1.4725, "mean_token_accuracy": 0.651040847102801, "num_tokens": 2751419542.0, "step": 16398 }, { "entropy": 1.6382981638113658, "epoch": 1.8015160253769467, "grad_norm": 0.5899358987808228, "learning_rate": 2.4615613047709847e-06, "loss": 1.3374, "mean_token_accuracy": 0.660874272386233, "num_tokens": 2751603980.0, "step": 16399 }, { "entropy": 1.7465067307154338, "epoch": 1.8016258822883193, "grad_norm": 0.700994610786438, "learning_rate": 2.4610552727767843e-06, "loss": 1.5425, "mean_token_accuracy": 0.6477147589127222, "num_tokens": 2751812703.0, "step": 16400 }, { "entropy": 1.6795567174752553, "epoch": 1.8017357391996924, "grad_norm": 0.6919041872024536, "learning_rate": 2.4605495110363366e-06, "loss": 1.4238, "mean_token_accuracy": 0.6519719262917837, "num_tokens": 2751984688.0, "step": 16401 }, { "entropy": 1.8034850259621937, "epoch": 1.8018455961110653, "grad_norm": 0.8304495215415955, "learning_rate": 2.4600440195656476e-06, "loss": 1.3008, "mean_token_accuracy": 0.6683735996484756, "num_tokens": 2752123752.0, "step": 16402 }, { "entropy": 1.7134062051773071, "epoch": 1.8019554530224382, "grad_norm": 0.668536901473999, "learning_rate": 2.459538798380719e-06, "loss": 1.3065, "mean_token_accuracy": 0.6793159395456314, "num_tokens": 2752275099.0, "step": 16403 }, { "entropy": 1.6386187970638275, "epoch": 1.8020653099338113, "grad_norm": 0.69599449634552, "learning_rate": 2.4590338474975397e-06, "loss": 1.3571, "mean_token_accuracy": 0.6669880499442419, "num_tokens": 2752404359.0, "step": 16404 }, { "entropy": 1.7862418989340465, "epoch": 1.8021751668451842, "grad_norm": 0.7669715881347656, "learning_rate": 2.4585291669320877e-06, "loss": 1.3221, "mean_token_accuracy": 0.6571997304757436, "num_tokens": 2752532453.0, "step": 16405 }, { "entropy": 1.6611520648002625, "epoch": 1.802285023756557, "grad_norm": 0.8591197729110718, "learning_rate": 2.458024756700341e-06, "loss": 1.2213, "mean_token_accuracy": 0.6755464772383372, "num_tokens": 2752670377.0, "step": 16406 }, { "entropy": 1.7248308161894481, "epoch": 1.80239488066793, "grad_norm": 0.7357346415519714, "learning_rate": 2.4575206168182605e-06, "loss": 1.4571, "mean_token_accuracy": 0.6435425728559494, "num_tokens": 2752869097.0, "step": 16407 }, { "entropy": 1.7167830963929493, "epoch": 1.8025047375793029, "grad_norm": 0.690274178981781, "learning_rate": 2.457016747301804e-06, "loss": 1.3364, "mean_token_accuracy": 0.662845383087794, "num_tokens": 2753007646.0, "step": 16408 }, { "entropy": 1.8022632400194805, "epoch": 1.802614594490676, "grad_norm": 0.8202261328697205, "learning_rate": 2.4565131481669175e-06, "loss": 1.3481, "mean_token_accuracy": 0.6701598316431046, "num_tokens": 2753123586.0, "step": 16409 }, { "entropy": 1.6932842234770458, "epoch": 1.8027244514020488, "grad_norm": 0.733900249004364, "learning_rate": 2.4560098194295397e-06, "loss": 1.4256, "mean_token_accuracy": 0.6689083476861318, "num_tokens": 2753287054.0, "step": 16410 }, { "entropy": 1.750009814898173, "epoch": 1.8028343083134217, "grad_norm": 0.7286418676376343, "learning_rate": 2.455506761105601e-06, "loss": 1.5902, "mean_token_accuracy": 0.6424010594685873, "num_tokens": 2753491319.0, "step": 16411 }, { "entropy": 1.6446313957373302, "epoch": 1.8029441652247948, "grad_norm": 0.7561642527580261, "learning_rate": 2.455003973211025e-06, "loss": 1.4449, "mean_token_accuracy": 0.6605862602591515, "num_tokens": 2753684199.0, "step": 16412 }, { "entropy": 1.7904584010442097, "epoch": 1.8030540221361675, "grad_norm": 0.7615606188774109, "learning_rate": 2.4545014557617205e-06, "loss": 1.4292, "mean_token_accuracy": 0.6448209335406622, "num_tokens": 2753848636.0, "step": 16413 }, { "entropy": 1.7027158041795094, "epoch": 1.8031638790475406, "grad_norm": 0.7243815660476685, "learning_rate": 2.4539992087735937e-06, "loss": 1.2719, "mean_token_accuracy": 0.6721286574999491, "num_tokens": 2753989493.0, "step": 16414 }, { "entropy": 1.6345816453297932, "epoch": 1.8032737359589135, "grad_norm": 0.6394364833831787, "learning_rate": 2.4534972322625434e-06, "loss": 1.3012, "mean_token_accuracy": 0.670843780040741, "num_tokens": 2754145938.0, "step": 16415 }, { "entropy": 1.710991491874059, "epoch": 1.8033835928702864, "grad_norm": 0.8464189171791077, "learning_rate": 2.4529955262444534e-06, "loss": 1.427, "mean_token_accuracy": 0.6431051045656204, "num_tokens": 2754279254.0, "step": 16416 }, { "entropy": 1.6918781101703644, "epoch": 1.8034934497816595, "grad_norm": 0.7020459771156311, "learning_rate": 2.4524940907352028e-06, "loss": 1.3814, "mean_token_accuracy": 0.6550219456354777, "num_tokens": 2754413658.0, "step": 16417 }, { "entropy": 1.653142919143041, "epoch": 1.8036033066930324, "grad_norm": 0.6922260522842407, "learning_rate": 2.4519929257506644e-06, "loss": 1.2536, "mean_token_accuracy": 0.681772361199061, "num_tokens": 2754595362.0, "step": 16418 }, { "entropy": 1.7180135349432628, "epoch": 1.8037131636044053, "grad_norm": 0.6518612504005432, "learning_rate": 2.4514920313066972e-06, "loss": 1.4934, "mean_token_accuracy": 0.6491179863611857, "num_tokens": 2754770348.0, "step": 16419 }, { "entropy": 1.646272877852122, "epoch": 1.8038230205157784, "grad_norm": 0.9592717885971069, "learning_rate": 2.4509914074191544e-06, "loss": 1.2443, "mean_token_accuracy": 0.6764777451753616, "num_tokens": 2754908323.0, "step": 16420 }, { "entropy": 1.7484492460886638, "epoch": 1.803932877427151, "grad_norm": 0.6856718063354492, "learning_rate": 2.450491054103883e-06, "loss": 1.338, "mean_token_accuracy": 0.6600681195656458, "num_tokens": 2755054083.0, "step": 16421 }, { "entropy": 1.6522502601146698, "epoch": 1.8040427343385241, "grad_norm": 0.5855985283851624, "learning_rate": 2.4499909713767156e-06, "loss": 1.3849, "mean_token_accuracy": 0.6528994540373484, "num_tokens": 2755261170.0, "step": 16422 }, { "entropy": 1.742342193921407, "epoch": 1.804152591249897, "grad_norm": 0.6841393113136292, "learning_rate": 2.4494911592534825e-06, "loss": 1.3531, "mean_token_accuracy": 0.6608762443065643, "num_tokens": 2755430970.0, "step": 16423 }, { "entropy": 1.7070422967274983, "epoch": 1.80426244816127, "grad_norm": 0.7020707726478577, "learning_rate": 2.4489916177500013e-06, "loss": 1.402, "mean_token_accuracy": 0.6581531713406245, "num_tokens": 2755580117.0, "step": 16424 }, { "entropy": 1.6086894969145458, "epoch": 1.804372305072643, "grad_norm": 0.7450229525566101, "learning_rate": 2.4484923468820805e-06, "loss": 1.4186, "mean_token_accuracy": 0.6774997810522715, "num_tokens": 2755722550.0, "step": 16425 }, { "entropy": 1.629365513722102, "epoch": 1.8044821619840157, "grad_norm": 0.5438582897186279, "learning_rate": 2.447993346665523e-06, "loss": 1.3446, "mean_token_accuracy": 0.6634021550416946, "num_tokens": 2755920382.0, "step": 16426 }, { "entropy": 1.6984173556168873, "epoch": 1.8045920188953888, "grad_norm": 0.6832945346832275, "learning_rate": 2.447494617116126e-06, "loss": 1.2979, "mean_token_accuracy": 0.66312904159228, "num_tokens": 2756073656.0, "step": 16427 }, { "entropy": 1.6762990454832714, "epoch": 1.8047018758067617, "grad_norm": 0.5510643124580383, "learning_rate": 2.4469961582496683e-06, "loss": 1.3274, "mean_token_accuracy": 0.6577611863613129, "num_tokens": 2756260540.0, "step": 16428 }, { "entropy": 1.7335613071918488, "epoch": 1.8048117327181346, "grad_norm": 0.6928039193153381, "learning_rate": 2.446497970081928e-06, "loss": 1.4443, "mean_token_accuracy": 0.6604134688774744, "num_tokens": 2756393501.0, "step": 16429 }, { "entropy": 1.7227633396784465, "epoch": 1.8049215896295077, "grad_norm": 0.6674528121948242, "learning_rate": 2.4460000526286727e-06, "loss": 1.3792, "mean_token_accuracy": 0.6571878095467886, "num_tokens": 2756561165.0, "step": 16430 }, { "entropy": 1.6204917430877686, "epoch": 1.8050314465408805, "grad_norm": 0.6645229458808899, "learning_rate": 2.4455024059056627e-06, "loss": 1.4276, "mean_token_accuracy": 0.6697969138622284, "num_tokens": 2756764755.0, "step": 16431 }, { "entropy": 1.7642103830973308, "epoch": 1.8051413034522534, "grad_norm": 0.6755779385566711, "learning_rate": 2.4450050299286452e-06, "loss": 1.3912, "mean_token_accuracy": 0.6555102616548538, "num_tokens": 2756901089.0, "step": 16432 }, { "entropy": 1.7875695725282033, "epoch": 1.8052511603636265, "grad_norm": 0.6372548341751099, "learning_rate": 2.444507924713364e-06, "loss": 1.3773, "mean_token_accuracy": 0.6585378497838974, "num_tokens": 2757036878.0, "step": 16433 }, { "entropy": 1.726793756087621, "epoch": 1.8053610172749992, "grad_norm": 0.6793131828308105, "learning_rate": 2.4440110902755513e-06, "loss": 1.3715, "mean_token_accuracy": 0.6532783309618632, "num_tokens": 2757170150.0, "step": 16434 }, { "entropy": 1.7632285555203755, "epoch": 1.8054708741863723, "grad_norm": 0.6790938973426819, "learning_rate": 2.443514526630933e-06, "loss": 1.3588, "mean_token_accuracy": 0.6496838182210922, "num_tokens": 2757309818.0, "step": 16435 }, { "entropy": 1.6782483259836833, "epoch": 1.8055807310977452, "grad_norm": 0.6445368528366089, "learning_rate": 2.4430182337952247e-06, "loss": 1.4392, "mean_token_accuracy": 0.6515261183182398, "num_tokens": 2757486240.0, "step": 16436 }, { "entropy": 1.6433900197347004, "epoch": 1.805690588009118, "grad_norm": 0.6568174958229065, "learning_rate": 2.4425222117841315e-06, "loss": 1.2252, "mean_token_accuracy": 0.6834449718395869, "num_tokens": 2757602672.0, "step": 16437 }, { "entropy": 1.7270792822043102, "epoch": 1.8058004449204912, "grad_norm": 0.7385875582695007, "learning_rate": 2.4420264606133555e-06, "loss": 1.3364, "mean_token_accuracy": 0.6688449184099833, "num_tokens": 2757751042.0, "step": 16438 }, { "entropy": 1.6770341396331787, "epoch": 1.8059103018318639, "grad_norm": 0.7027744650840759, "learning_rate": 2.4415309802985854e-06, "loss": 1.2349, "mean_token_accuracy": 0.6747290591398875, "num_tokens": 2757876536.0, "step": 16439 }, { "entropy": 1.7311459481716156, "epoch": 1.806020158743237, "grad_norm": 0.8075997233390808, "learning_rate": 2.4410357708555032e-06, "loss": 1.2985, "mean_token_accuracy": 0.6764100193977356, "num_tokens": 2758041919.0, "step": 16440 }, { "entropy": 1.6318459411462147, "epoch": 1.8061300156546098, "grad_norm": 0.5902323126792908, "learning_rate": 2.440540832299783e-06, "loss": 1.3022, "mean_token_accuracy": 0.6714819123347601, "num_tokens": 2758217442.0, "step": 16441 }, { "entropy": 1.7181476652622223, "epoch": 1.8062398725659827, "grad_norm": 0.8379008769989014, "learning_rate": 2.440046164647087e-06, "loss": 1.4068, "mean_token_accuracy": 0.6589639882246653, "num_tokens": 2758376926.0, "step": 16442 }, { "entropy": 1.672847221295039, "epoch": 1.8063497294773558, "grad_norm": 0.7189886569976807, "learning_rate": 2.4395517679130744e-06, "loss": 1.3829, "mean_token_accuracy": 0.663548931479454, "num_tokens": 2758551062.0, "step": 16443 }, { "entropy": 1.7527056137720745, "epoch": 1.8064595863887287, "grad_norm": 0.8830350041389465, "learning_rate": 2.4390576421133897e-06, "loss": 1.4128, "mean_token_accuracy": 0.6445074528455734, "num_tokens": 2758717383.0, "step": 16444 }, { "entropy": 1.6904211342334747, "epoch": 1.8065694433001016, "grad_norm": 0.7606146931648254, "learning_rate": 2.438563787263673e-06, "loss": 1.4546, "mean_token_accuracy": 0.6546589334805807, "num_tokens": 2758926548.0, "step": 16445 }, { "entropy": 1.684679885705312, "epoch": 1.8066793002114747, "grad_norm": 0.7838829159736633, "learning_rate": 2.4380702033795538e-06, "loss": 1.487, "mean_token_accuracy": 0.6486780146757761, "num_tokens": 2759100149.0, "step": 16446 }, { "entropy": 1.677791029214859, "epoch": 1.8067891571228474, "grad_norm": 0.6367784738540649, "learning_rate": 2.4375768904766563e-06, "loss": 1.4016, "mean_token_accuracy": 0.6575369586547216, "num_tokens": 2759261585.0, "step": 16447 }, { "entropy": 1.7279250423113506, "epoch": 1.8068990140342205, "grad_norm": 0.6288533210754395, "learning_rate": 2.4370838485705912e-06, "loss": 1.2892, "mean_token_accuracy": 0.6716119796037674, "num_tokens": 2759451394.0, "step": 16448 }, { "entropy": 1.6070989569028218, "epoch": 1.8070088709455934, "grad_norm": 0.5549145936965942, "learning_rate": 2.4365910776769634e-06, "loss": 1.3516, "mean_token_accuracy": 0.664691095550855, "num_tokens": 2759635049.0, "step": 16449 }, { "entropy": 1.6843027174472809, "epoch": 1.8071187278569663, "grad_norm": 0.7729708552360535, "learning_rate": 2.4360985778113696e-06, "loss": 1.2723, "mean_token_accuracy": 0.6750624477863312, "num_tokens": 2759847889.0, "step": 16450 }, { "entropy": 1.6489692231019337, "epoch": 1.8072285847683394, "grad_norm": 0.6890325546264648, "learning_rate": 2.4356063489893965e-06, "loss": 1.237, "mean_token_accuracy": 0.6862647583087286, "num_tokens": 2759961504.0, "step": 16451 }, { "entropy": 1.7302868167559307, "epoch": 1.807338441679712, "grad_norm": 0.6730805039405823, "learning_rate": 2.4351143912266232e-06, "loss": 1.4087, "mean_token_accuracy": 0.6450496266285578, "num_tokens": 2760109714.0, "step": 16452 }, { "entropy": 1.669048676888148, "epoch": 1.8074482985910851, "grad_norm": 0.6048988699913025, "learning_rate": 2.4346227045386208e-06, "loss": 1.4418, "mean_token_accuracy": 0.6532367666562399, "num_tokens": 2760296549.0, "step": 16453 }, { "entropy": 1.7388107577959697, "epoch": 1.807558155502458, "grad_norm": 0.6559601426124573, "learning_rate": 2.4341312889409495e-06, "loss": 1.4325, "mean_token_accuracy": 0.6547484199206034, "num_tokens": 2760490979.0, "step": 16454 }, { "entropy": 1.6647444764773052, "epoch": 1.807668012413831, "grad_norm": 0.6734881401062012, "learning_rate": 2.433640144449164e-06, "loss": 1.2971, "mean_token_accuracy": 0.6663618286450704, "num_tokens": 2760650612.0, "step": 16455 }, { "entropy": 1.7663162350654602, "epoch": 1.807777869325204, "grad_norm": 0.7578223943710327, "learning_rate": 2.433149271078807e-06, "loss": 1.3936, "mean_token_accuracy": 0.6566072255373001, "num_tokens": 2760761496.0, "step": 16456 }, { "entropy": 1.717143217722575, "epoch": 1.807887726236577, "grad_norm": 0.7225522994995117, "learning_rate": 2.4326586688454147e-06, "loss": 1.3449, "mean_token_accuracy": 0.6655629724264145, "num_tokens": 2760894645.0, "step": 16457 }, { "entropy": 1.6999610662460327, "epoch": 1.8079975831479498, "grad_norm": 0.6742311716079712, "learning_rate": 2.4321683377645146e-06, "loss": 1.4921, "mean_token_accuracy": 0.6431157986323038, "num_tokens": 2761080211.0, "step": 16458 }, { "entropy": 1.771820992231369, "epoch": 1.808107440059323, "grad_norm": 0.6953256726264954, "learning_rate": 2.4316782778516275e-06, "loss": 1.3855, "mean_token_accuracy": 0.6551636606454849, "num_tokens": 2761202587.0, "step": 16459 }, { "entropy": 1.745868742465973, "epoch": 1.8082172969706956, "grad_norm": 0.6078836917877197, "learning_rate": 2.4311884891222613e-06, "loss": 1.4532, "mean_token_accuracy": 0.6424828767776489, "num_tokens": 2761398670.0, "step": 16460 }, { "entropy": 1.6702168186505635, "epoch": 1.8083271538820687, "grad_norm": 0.6870954632759094, "learning_rate": 2.4306989715919173e-06, "loss": 1.4224, "mean_token_accuracy": 0.6447147478659948, "num_tokens": 2761597590.0, "step": 16461 }, { "entropy": 1.6488149464130402, "epoch": 1.8084370107934415, "grad_norm": 0.610784649848938, "learning_rate": 2.4302097252760913e-06, "loss": 1.4936, "mean_token_accuracy": 0.6545774986346563, "num_tokens": 2761794099.0, "step": 16462 }, { "entropy": 1.7186005214850109, "epoch": 1.8085468677048144, "grad_norm": 0.6306957602500916, "learning_rate": 2.429720750190264e-06, "loss": 1.3377, "mean_token_accuracy": 0.6656250059604645, "num_tokens": 2761973157.0, "step": 16463 }, { "entropy": 1.6818233629067738, "epoch": 1.8086567246161875, "grad_norm": 0.7410263419151306, "learning_rate": 2.4292320463499144e-06, "loss": 1.3116, "mean_token_accuracy": 0.6546371678511301, "num_tokens": 2762136640.0, "step": 16464 }, { "entropy": 1.6417667865753174, "epoch": 1.8087665815275602, "grad_norm": 0.6089370846748352, "learning_rate": 2.428743613770508e-06, "loss": 1.2356, "mean_token_accuracy": 0.6799864719311396, "num_tokens": 2762270207.0, "step": 16465 }, { "entropy": 1.746724675099055, "epoch": 1.8088764384389333, "grad_norm": 0.9241576194763184, "learning_rate": 2.4282554524675036e-06, "loss": 1.4229, "mean_token_accuracy": 0.6547742585341135, "num_tokens": 2762409921.0, "step": 16466 }, { "entropy": 1.6926732659339905, "epoch": 1.8089862953503062, "grad_norm": 0.7210370302200317, "learning_rate": 2.4277675624563523e-06, "loss": 1.3849, "mean_token_accuracy": 0.6654962301254272, "num_tokens": 2762570334.0, "step": 16467 }, { "entropy": 1.7325705389181774, "epoch": 1.809096152261679, "grad_norm": 0.6760542988777161, "learning_rate": 2.4272799437524954e-06, "loss": 1.4512, "mean_token_accuracy": 0.6488116631905237, "num_tokens": 2762709776.0, "step": 16468 }, { "entropy": 1.7742801705996196, "epoch": 1.8092060091730522, "grad_norm": 0.6283159852027893, "learning_rate": 2.4267925963713634e-06, "loss": 1.3957, "mean_token_accuracy": 0.6601527482271194, "num_tokens": 2762847503.0, "step": 16469 }, { "entropy": 1.6537544429302216, "epoch": 1.809315866084425, "grad_norm": 0.6634315252304077, "learning_rate": 2.426305520328383e-06, "loss": 1.3299, "mean_token_accuracy": 0.6634285499652227, "num_tokens": 2763000038.0, "step": 16470 }, { "entropy": 1.6798964043458302, "epoch": 1.809425722995798, "grad_norm": 0.7522450685501099, "learning_rate": 2.4258187156389707e-06, "loss": 1.4379, "mean_token_accuracy": 0.675532266497612, "num_tokens": 2763165723.0, "step": 16471 }, { "entropy": 1.6432731648286183, "epoch": 1.809535579907171, "grad_norm": 0.7260177135467529, "learning_rate": 2.4253321823185318e-06, "loss": 1.4119, "mean_token_accuracy": 0.6538095225890478, "num_tokens": 2763340508.0, "step": 16472 }, { "entropy": 1.6526922384897869, "epoch": 1.8096454368185437, "grad_norm": 0.6410662531852722, "learning_rate": 2.4248459203824652e-06, "loss": 1.4164, "mean_token_accuracy": 0.653022438287735, "num_tokens": 2763531382.0, "step": 16473 }, { "entropy": 1.6942103902498882, "epoch": 1.8097552937299168, "grad_norm": 0.8561227321624756, "learning_rate": 2.4243599298461616e-06, "loss": 1.4381, "mean_token_accuracy": 0.6655525416135788, "num_tokens": 2763661803.0, "step": 16474 }, { "entropy": 1.6899367968241374, "epoch": 1.8098651506412897, "grad_norm": 0.7844464778900146, "learning_rate": 2.423874210725001e-06, "loss": 1.3592, "mean_token_accuracy": 0.6753224035104116, "num_tokens": 2763804753.0, "step": 16475 }, { "entropy": 1.6260625620683034, "epoch": 1.8099750075526626, "grad_norm": 0.5812033414840698, "learning_rate": 2.423388763034358e-06, "loss": 1.4316, "mean_token_accuracy": 0.6537586599588394, "num_tokens": 2764008124.0, "step": 16476 }, { "entropy": 1.687359909216563, "epoch": 1.8100848644640357, "grad_norm": 0.7226285934448242, "learning_rate": 2.422903586789597e-06, "loss": 1.4195, "mean_token_accuracy": 0.6663658916950226, "num_tokens": 2764173536.0, "step": 16477 }, { "entropy": 1.7126532693703969, "epoch": 1.8101947213754084, "grad_norm": 0.6269643306732178, "learning_rate": 2.4224186820060708e-06, "loss": 1.4023, "mean_token_accuracy": 0.6467209408680598, "num_tokens": 2764346766.0, "step": 16478 }, { "entropy": 1.7550960679848988, "epoch": 1.8103045782867815, "grad_norm": 0.7029903531074524, "learning_rate": 2.42193404869913e-06, "loss": 1.2635, "mean_token_accuracy": 0.6703293671210607, "num_tokens": 2764462288.0, "step": 16479 }, { "entropy": 1.7869562208652496, "epoch": 1.8104144351981544, "grad_norm": 0.6959302425384521, "learning_rate": 2.421449686884109e-06, "loss": 1.549, "mean_token_accuracy": 0.6411256889502207, "num_tokens": 2764638641.0, "step": 16480 }, { "entropy": 1.7564504742622375, "epoch": 1.8105242921095273, "grad_norm": 0.6463617086410522, "learning_rate": 2.4209655965763406e-06, "loss": 1.4976, "mean_token_accuracy": 0.6407757749160131, "num_tokens": 2764811182.0, "step": 16481 }, { "entropy": 1.73487122853597, "epoch": 1.8106341490209004, "grad_norm": 0.7762302160263062, "learning_rate": 2.4204817777911455e-06, "loss": 1.3926, "mean_token_accuracy": 0.6518355309963226, "num_tokens": 2764952016.0, "step": 16482 }, { "entropy": 1.6598585546016693, "epoch": 1.8107440059322732, "grad_norm": 0.6841420531272888, "learning_rate": 2.4199982305438365e-06, "loss": 1.223, "mean_token_accuracy": 0.6864841481049856, "num_tokens": 2765057348.0, "step": 16483 }, { "entropy": 1.6540294587612152, "epoch": 1.8108538628436461, "grad_norm": 0.9833670854568481, "learning_rate": 2.4195149548497173e-06, "loss": 1.3703, "mean_token_accuracy": 0.67726102968057, "num_tokens": 2765199987.0, "step": 16484 }, { "entropy": 1.7150746981302898, "epoch": 1.8109637197550192, "grad_norm": 0.5984099507331848, "learning_rate": 2.419031950724082e-06, "loss": 1.4509, "mean_token_accuracy": 0.6461076935132345, "num_tokens": 2765417441.0, "step": 16485 }, { "entropy": 1.708159824212392, "epoch": 1.811073576666392, "grad_norm": 0.6068819165229797, "learning_rate": 2.41854921818222e-06, "loss": 1.4214, "mean_token_accuracy": 0.6494432340065638, "num_tokens": 2765635138.0, "step": 16486 }, { "entropy": 1.6690807143847148, "epoch": 1.811183433577765, "grad_norm": 0.7443904876708984, "learning_rate": 2.4180667572394073e-06, "loss": 1.2496, "mean_token_accuracy": 0.6697708616654078, "num_tokens": 2765765146.0, "step": 16487 }, { "entropy": 1.738635003566742, "epoch": 1.811293290489138, "grad_norm": 0.7414568662643433, "learning_rate": 2.4175845679109157e-06, "loss": 1.4483, "mean_token_accuracy": 0.6477248768011729, "num_tokens": 2765914159.0, "step": 16488 }, { "entropy": 1.73112353682518, "epoch": 1.8114031474005108, "grad_norm": 1.007875680923462, "learning_rate": 2.417102650212005e-06, "loss": 1.3697, "mean_token_accuracy": 0.6697366237640381, "num_tokens": 2766056322.0, "step": 16489 }, { "entropy": 1.6718713839848836, "epoch": 1.811513004311884, "grad_norm": 0.5636931657791138, "learning_rate": 2.4166210041579266e-06, "loss": 1.477, "mean_token_accuracy": 0.6347486774126688, "num_tokens": 2766306740.0, "step": 16490 }, { "entropy": 1.6812805632750194, "epoch": 1.8116228612232566, "grad_norm": 0.6897765398025513, "learning_rate": 2.4161396297639277e-06, "loss": 1.3959, "mean_token_accuracy": 0.6611317793528239, "num_tokens": 2766469990.0, "step": 16491 }, { "entropy": 1.7409476439158122, "epoch": 1.8117327181346297, "grad_norm": 0.9774511456489563, "learning_rate": 2.4156585270452413e-06, "loss": 1.7468, "mean_token_accuracy": 0.643994982043902, "num_tokens": 2766603370.0, "step": 16492 }, { "entropy": 1.6942278146743774, "epoch": 1.8118425750460025, "grad_norm": 0.6507070064544678, "learning_rate": 2.415177696017093e-06, "loss": 1.3617, "mean_token_accuracy": 0.6736532896757126, "num_tokens": 2766748463.0, "step": 16493 }, { "entropy": 1.6879957815011342, "epoch": 1.8119524319573754, "grad_norm": 0.6665419936180115, "learning_rate": 2.4146971366947035e-06, "loss": 1.3464, "mean_token_accuracy": 0.6612015018860499, "num_tokens": 2766923463.0, "step": 16494 }, { "entropy": 1.6867989003658295, "epoch": 1.8120622888687485, "grad_norm": 0.6739834547042847, "learning_rate": 2.4142168490932784e-06, "loss": 1.2979, "mean_token_accuracy": 0.6711998730897903, "num_tokens": 2767057148.0, "step": 16495 }, { "entropy": 1.7199506064256032, "epoch": 1.8121721457801214, "grad_norm": 0.6423784494400024, "learning_rate": 2.413736833228024e-06, "loss": 1.3756, "mean_token_accuracy": 0.6621057589848837, "num_tokens": 2767237139.0, "step": 16496 }, { "entropy": 1.6715861360232036, "epoch": 1.8122820026914943, "grad_norm": 0.6793438792228699, "learning_rate": 2.4132570891141296e-06, "loss": 1.2994, "mean_token_accuracy": 0.6636922707160314, "num_tokens": 2767411416.0, "step": 16497 }, { "entropy": 1.70285361011823, "epoch": 1.8123918596028674, "grad_norm": 0.6627052426338196, "learning_rate": 2.412777616766778e-06, "loss": 1.5275, "mean_token_accuracy": 0.6365848928689957, "num_tokens": 2767616868.0, "step": 16498 }, { "entropy": 1.748480220635732, "epoch": 1.81250171651424, "grad_norm": 0.6843937635421753, "learning_rate": 2.4122984162011453e-06, "loss": 1.4195, "mean_token_accuracy": 0.6697787046432495, "num_tokens": 2767808831.0, "step": 16499 }, { "entropy": 1.7178409099578857, "epoch": 1.8126115734256132, "grad_norm": 0.7172280550003052, "learning_rate": 2.4118194874323993e-06, "loss": 1.3364, "mean_token_accuracy": 0.6643195003271103, "num_tokens": 2767935675.0, "step": 16500 }, { "entropy": 1.7438208361466725, "epoch": 1.812721430336986, "grad_norm": 0.6629429459571838, "learning_rate": 2.4113408304756943e-06, "loss": 1.3889, "mean_token_accuracy": 0.6561461488405863, "num_tokens": 2768083615.0, "step": 16501 }, { "entropy": 1.7097637752691905, "epoch": 1.812831287248359, "grad_norm": 0.627873420715332, "learning_rate": 2.4108624453461825e-06, "loss": 1.3931, "mean_token_accuracy": 0.6541771193345388, "num_tokens": 2768263773.0, "step": 16502 }, { "entropy": 1.7309077978134155, "epoch": 1.812941144159732, "grad_norm": 0.6251326203346252, "learning_rate": 2.4103843320590053e-06, "loss": 1.548, "mean_token_accuracy": 0.6347083449363708, "num_tokens": 2768483831.0, "step": 16503 }, { "entropy": 1.645240803559621, "epoch": 1.8130510010711047, "grad_norm": 0.6707781553268433, "learning_rate": 2.409906490629294e-06, "loss": 1.4934, "mean_token_accuracy": 0.6460568408171335, "num_tokens": 2768679029.0, "step": 16504 }, { "entropy": 1.6946294804414113, "epoch": 1.8131608579824778, "grad_norm": 0.6006249785423279, "learning_rate": 2.4094289210721684e-06, "loss": 1.3718, "mean_token_accuracy": 0.6605852544307709, "num_tokens": 2768851311.0, "step": 16505 }, { "entropy": 1.711153248945872, "epoch": 1.8132707148938507, "grad_norm": 0.7911529541015625, "learning_rate": 2.40895162340275e-06, "loss": 1.5019, "mean_token_accuracy": 0.653807650009791, "num_tokens": 2768982496.0, "step": 16506 }, { "entropy": 1.6501058836778004, "epoch": 1.8133805718052236, "grad_norm": 0.6616920232772827, "learning_rate": 2.4084745976361382e-06, "loss": 1.3616, "mean_token_accuracy": 0.6733062863349915, "num_tokens": 2769152918.0, "step": 16507 }, { "entropy": 1.6238398055235546, "epoch": 1.8134904287165967, "grad_norm": 0.7626936435699463, "learning_rate": 2.4079978437874357e-06, "loss": 1.2714, "mean_token_accuracy": 0.6772501319646835, "num_tokens": 2769344820.0, "step": 16508 }, { "entropy": 1.7226660251617432, "epoch": 1.8136002856279696, "grad_norm": 0.6929018497467041, "learning_rate": 2.4075213618717304e-06, "loss": 1.4003, "mean_token_accuracy": 0.6568613747755686, "num_tokens": 2769517942.0, "step": 16509 }, { "entropy": 1.8020154933134716, "epoch": 1.8137101425393425, "grad_norm": 0.7082515358924866, "learning_rate": 2.4070451519041014e-06, "loss": 1.3086, "mean_token_accuracy": 0.669136126836141, "num_tokens": 2769667335.0, "step": 16510 }, { "entropy": 1.7351914743582408, "epoch": 1.8138199994507156, "grad_norm": 0.8061874508857727, "learning_rate": 2.406569213899621e-06, "loss": 1.4222, "mean_token_accuracy": 0.6561322311560313, "num_tokens": 2769819275.0, "step": 16511 }, { "entropy": 1.673819233973821, "epoch": 1.8139298563620883, "grad_norm": 0.7893275618553162, "learning_rate": 2.4060935478733538e-06, "loss": 1.2336, "mean_token_accuracy": 0.6748340229193369, "num_tokens": 2769964391.0, "step": 16512 }, { "entropy": 1.708072344462077, "epoch": 1.8140397132734614, "grad_norm": 0.7377780079841614, "learning_rate": 2.4056181538403515e-06, "loss": 1.4061, "mean_token_accuracy": 0.6597805072863897, "num_tokens": 2770118160.0, "step": 16513 }, { "entropy": 1.714382102092107, "epoch": 1.8141495701848342, "grad_norm": 0.5593966245651245, "learning_rate": 2.4051430318156622e-06, "loss": 1.415, "mean_token_accuracy": 0.6464897443850836, "num_tokens": 2770391818.0, "step": 16514 }, { "entropy": 1.7466392715771992, "epoch": 1.8142594270962071, "grad_norm": 0.7103216052055359, "learning_rate": 2.4046681818143245e-06, "loss": 1.3028, "mean_token_accuracy": 0.6765001912911733, "num_tokens": 2770508407.0, "step": 16515 }, { "entropy": 1.765973150730133, "epoch": 1.8143692840075802, "grad_norm": 0.7408754229545593, "learning_rate": 2.4041936038513647e-06, "loss": 1.3235, "mean_token_accuracy": 0.6654133200645447, "num_tokens": 2770656487.0, "step": 16516 }, { "entropy": 1.6894567012786865, "epoch": 1.814479140918953, "grad_norm": 0.6805311441421509, "learning_rate": 2.4037192979418036e-06, "loss": 1.4779, "mean_token_accuracy": 0.6684766709804535, "num_tokens": 2770806625.0, "step": 16517 }, { "entropy": 1.7023487786451976, "epoch": 1.814588997830326, "grad_norm": 0.6457291841506958, "learning_rate": 2.4032452641006546e-06, "loss": 1.4227, "mean_token_accuracy": 0.6392653485139211, "num_tokens": 2770997593.0, "step": 16518 }, { "entropy": 1.7675903141498566, "epoch": 1.814698854741699, "grad_norm": 0.7356979250907898, "learning_rate": 2.4027715023429173e-06, "loss": 1.3966, "mean_token_accuracy": 0.6494138091802597, "num_tokens": 2771136387.0, "step": 16519 }, { "entropy": 1.6030404170354207, "epoch": 1.8148087116530718, "grad_norm": 0.5834929347038269, "learning_rate": 2.4022980126835897e-06, "loss": 1.4752, "mean_token_accuracy": 0.6513334512710571, "num_tokens": 2771362647.0, "step": 16520 }, { "entropy": 1.722896158695221, "epoch": 1.814918568564445, "grad_norm": 0.7146098613739014, "learning_rate": 2.4018247951376546e-06, "loss": 1.595, "mean_token_accuracy": 0.6407341261704763, "num_tokens": 2771551945.0, "step": 16521 }, { "entropy": 1.7077131768067677, "epoch": 1.8150284254758178, "grad_norm": 0.6865191459655762, "learning_rate": 2.401351849720091e-06, "loss": 1.4025, "mean_token_accuracy": 0.656955232222875, "num_tokens": 2771751606.0, "step": 16522 }, { "entropy": 1.6773952742417653, "epoch": 1.8151382823871907, "grad_norm": 0.5679813027381897, "learning_rate": 2.4008791764458667e-06, "loss": 1.4841, "mean_token_accuracy": 0.6524718155463537, "num_tokens": 2771984622.0, "step": 16523 }, { "entropy": 1.7627310752868652, "epoch": 1.8152481392985638, "grad_norm": 0.621216893196106, "learning_rate": 2.4004067753299414e-06, "loss": 1.5703, "mean_token_accuracy": 0.6433713287115097, "num_tokens": 2772250906.0, "step": 16524 }, { "entropy": 1.6635705729325612, "epoch": 1.8153579962099364, "grad_norm": 0.661649763584137, "learning_rate": 2.399934646387266e-06, "loss": 1.3721, "mean_token_accuracy": 0.6734176874160767, "num_tokens": 2772389665.0, "step": 16525 }, { "entropy": 1.7503486176331837, "epoch": 1.8154678531213095, "grad_norm": 0.772406816482544, "learning_rate": 2.3994627896327832e-06, "loss": 1.4636, "mean_token_accuracy": 0.6583824306726456, "num_tokens": 2772543581.0, "step": 16526 }, { "entropy": 1.6406415303548176, "epoch": 1.8155777100326824, "grad_norm": 0.716643214225769, "learning_rate": 2.39899120508143e-06, "loss": 1.3905, "mean_token_accuracy": 0.6612937748432159, "num_tokens": 2772717311.0, "step": 16527 }, { "entropy": 1.6522994637489319, "epoch": 1.8156875669440553, "grad_norm": 0.7278351187705994, "learning_rate": 2.398519892748128e-06, "loss": 1.3473, "mean_token_accuracy": 0.6766088207562765, "num_tokens": 2772938768.0, "step": 16528 }, { "entropy": 1.779366006453832, "epoch": 1.8157974238554284, "grad_norm": 0.7547992467880249, "learning_rate": 2.398048852647795e-06, "loss": 1.2914, "mean_token_accuracy": 0.6645817359288534, "num_tokens": 2773034087.0, "step": 16529 }, { "entropy": 1.6814574499924977, "epoch": 1.815907280766801, "grad_norm": 0.7637436389923096, "learning_rate": 2.3975780847953413e-06, "loss": 1.4152, "mean_token_accuracy": 0.6605032732089361, "num_tokens": 2773188999.0, "step": 16530 }, { "entropy": 1.690110484759013, "epoch": 1.8160171376781742, "grad_norm": 0.5965714454650879, "learning_rate": 2.3971075892056628e-06, "loss": 1.3626, "mean_token_accuracy": 0.6655519704023997, "num_tokens": 2773368162.0, "step": 16531 }, { "entropy": 1.6539724171161652, "epoch": 1.816126994589547, "grad_norm": 0.733353853225708, "learning_rate": 2.3966373658936536e-06, "loss": 1.4261, "mean_token_accuracy": 0.6458848516146342, "num_tokens": 2773558099.0, "step": 16532 }, { "entropy": 1.716547002394994, "epoch": 1.81623685150092, "grad_norm": 0.5876471400260925, "learning_rate": 2.3961674148741954e-06, "loss": 1.4986, "mean_token_accuracy": 0.6417978306611379, "num_tokens": 2773771033.0, "step": 16533 }, { "entropy": 1.6878297924995422, "epoch": 1.816346708412293, "grad_norm": 0.6864316463470459, "learning_rate": 2.3956977361621607e-06, "loss": 1.4623, "mean_token_accuracy": 0.6522022038698196, "num_tokens": 2773946616.0, "step": 16534 }, { "entropy": 1.714217483997345, "epoch": 1.816456565323666, "grad_norm": 0.7374356389045715, "learning_rate": 2.3952283297724162e-06, "loss": 1.4191, "mean_token_accuracy": 0.6531722098588943, "num_tokens": 2774101537.0, "step": 16535 }, { "entropy": 1.6750989357630413, "epoch": 1.8165664222350388, "grad_norm": 0.8244749307632446, "learning_rate": 2.394759195719818e-06, "loss": 1.3881, "mean_token_accuracy": 0.6675261706113815, "num_tokens": 2774258044.0, "step": 16536 }, { "entropy": 1.6771197319030762, "epoch": 1.816676279146412, "grad_norm": 0.755206823348999, "learning_rate": 2.394290334019213e-06, "loss": 1.3127, "mean_token_accuracy": 0.6679815848668417, "num_tokens": 2774407830.0, "step": 16537 }, { "entropy": 1.6540471911430359, "epoch": 1.8167861360577846, "grad_norm": 0.6771402359008789, "learning_rate": 2.3938217446854393e-06, "loss": 1.1949, "mean_token_accuracy": 0.6918987234433492, "num_tokens": 2774503263.0, "step": 16538 }, { "entropy": 1.7108920514583588, "epoch": 1.8168959929691577, "grad_norm": 0.7171925902366638, "learning_rate": 2.3933534277333327e-06, "loss": 1.3017, "mean_token_accuracy": 0.6671940038601557, "num_tokens": 2774658086.0, "step": 16539 }, { "entropy": 1.7624292373657227, "epoch": 1.8170058498805306, "grad_norm": 0.753374457359314, "learning_rate": 2.392885383177711e-06, "loss": 1.4478, "mean_token_accuracy": 0.6500358184178671, "num_tokens": 2774808233.0, "step": 16540 }, { "entropy": 1.7303147614002228, "epoch": 1.8171157067919035, "grad_norm": 0.6383606791496277, "learning_rate": 2.3924176110333864e-06, "loss": 1.353, "mean_token_accuracy": 0.6555335720380148, "num_tokens": 2775015569.0, "step": 16541 }, { "entropy": 1.7392794887224834, "epoch": 1.8172255637032766, "grad_norm": 0.7534666061401367, "learning_rate": 2.391950111315167e-06, "loss": 1.3783, "mean_token_accuracy": 0.6698889136314392, "num_tokens": 2775181058.0, "step": 16542 }, { "entropy": 1.630462755759557, "epoch": 1.8173354206146493, "grad_norm": 0.7775093913078308, "learning_rate": 2.3914828840378476e-06, "loss": 1.196, "mean_token_accuracy": 0.6869658430417379, "num_tokens": 2775292306.0, "step": 16543 }, { "entropy": 1.7384036084016163, "epoch": 1.8174452775260224, "grad_norm": 0.788651704788208, "learning_rate": 2.3910159292162167e-06, "loss": 1.3534, "mean_token_accuracy": 0.6725411961476008, "num_tokens": 2775447501.0, "step": 16544 }, { "entropy": 1.7322270274162292, "epoch": 1.8175551344373952, "grad_norm": 0.6624115109443665, "learning_rate": 2.3905492468650527e-06, "loss": 1.4113, "mean_token_accuracy": 0.6496214121580124, "num_tokens": 2775618693.0, "step": 16545 }, { "entropy": 1.750143031279246, "epoch": 1.8176649913487681, "grad_norm": 0.6496718525886536, "learning_rate": 2.3900828369991234e-06, "loss": 1.466, "mean_token_accuracy": 0.6551011850436529, "num_tokens": 2775786820.0, "step": 16546 }, { "entropy": 1.6444389820098877, "epoch": 1.8177748482601412, "grad_norm": 0.7461166977882385, "learning_rate": 2.389616699633194e-06, "loss": 1.4606, "mean_token_accuracy": 0.6540708293517431, "num_tokens": 2775955052.0, "step": 16547 }, { "entropy": 1.7287197808424632, "epoch": 1.8178847051715141, "grad_norm": 0.6682546138763428, "learning_rate": 2.3891508347820165e-06, "loss": 1.0821, "mean_token_accuracy": 0.6904325783252716, "num_tokens": 2776186575.0, "step": 16548 }, { "entropy": 1.7020801107088726, "epoch": 1.817994562082887, "grad_norm": 0.8539118766784668, "learning_rate": 2.3886852424603333e-06, "loss": 1.4569, "mean_token_accuracy": 0.6489265362421671, "num_tokens": 2776385187.0, "step": 16549 }, { "entropy": 1.7080492277940114, "epoch": 1.8181044189942601, "grad_norm": 0.7399299740791321, "learning_rate": 2.388219922682883e-06, "loss": 1.4506, "mean_token_accuracy": 0.6532412966092428, "num_tokens": 2776571303.0, "step": 16550 }, { "entropy": 1.7559833427270253, "epoch": 1.8182142759056328, "grad_norm": 0.727372407913208, "learning_rate": 2.387754875464391e-06, "loss": 1.4189, "mean_token_accuracy": 0.6559430956840515, "num_tokens": 2776713216.0, "step": 16551 }, { "entropy": 1.6203450560569763, "epoch": 1.818324132817006, "grad_norm": 0.6398701071739197, "learning_rate": 2.3872901008195773e-06, "loss": 1.2424, "mean_token_accuracy": 0.6794936507940292, "num_tokens": 2776872230.0, "step": 16552 }, { "entropy": 1.7338751256465912, "epoch": 1.8184339897283788, "grad_norm": 0.7848241925239563, "learning_rate": 2.3868255987631505e-06, "loss": 1.2348, "mean_token_accuracy": 0.6750635951757431, "num_tokens": 2776992799.0, "step": 16553 }, { "entropy": 1.7074306507905324, "epoch": 1.8185438466397517, "grad_norm": 0.7216284871101379, "learning_rate": 2.386361369309812e-06, "loss": 1.3222, "mean_token_accuracy": 0.6633963038523992, "num_tokens": 2777138226.0, "step": 16554 }, { "entropy": 1.6739212572574615, "epoch": 1.8186537035511248, "grad_norm": 0.7185259461402893, "learning_rate": 2.385897412474255e-06, "loss": 1.5609, "mean_token_accuracy": 0.6289549271265665, "num_tokens": 2777323423.0, "step": 16555 }, { "entropy": 1.7207949956258137, "epoch": 1.8187635604624974, "grad_norm": 0.7477259039878845, "learning_rate": 2.385433728271164e-06, "loss": 1.3418, "mean_token_accuracy": 0.6546726375818253, "num_tokens": 2777504447.0, "step": 16556 }, { "entropy": 1.6997572779655457, "epoch": 1.8188734173738705, "grad_norm": 0.7746348977088928, "learning_rate": 2.3849703167152125e-06, "loss": 1.3969, "mean_token_accuracy": 0.6501687119404475, "num_tokens": 2777669377.0, "step": 16557 }, { "entropy": 1.715737024943034, "epoch": 1.8189832742852434, "grad_norm": 0.8140842318534851, "learning_rate": 2.3845071778210687e-06, "loss": 1.5268, "mean_token_accuracy": 0.6537976066271464, "num_tokens": 2777818039.0, "step": 16558 }, { "entropy": 1.714678963025411, "epoch": 1.8190931311966163, "grad_norm": 0.6977283954620361, "learning_rate": 2.3840443116033906e-06, "loss": 1.5821, "mean_token_accuracy": 0.6431048860152563, "num_tokens": 2778031195.0, "step": 16559 }, { "entropy": 1.7471038500467937, "epoch": 1.8192029881079894, "grad_norm": 0.6290971040725708, "learning_rate": 2.383581718076828e-06, "loss": 1.4625, "mean_token_accuracy": 0.6498029927412668, "num_tokens": 2778184112.0, "step": 16560 }, { "entropy": 1.7384718358516693, "epoch": 1.8193128450193623, "grad_norm": 0.8048774600028992, "learning_rate": 2.3831193972560204e-06, "loss": 1.5574, "mean_token_accuracy": 0.65819351375103, "num_tokens": 2778325520.0, "step": 16561 }, { "entropy": 1.709036111831665, "epoch": 1.8194227019307352, "grad_norm": 0.6308175325393677, "learning_rate": 2.382657349155602e-06, "loss": 1.4355, "mean_token_accuracy": 0.6599417279163996, "num_tokens": 2778495891.0, "step": 16562 }, { "entropy": 1.725346823533376, "epoch": 1.8195325588421083, "grad_norm": 0.6439611315727234, "learning_rate": 2.3821955737901942e-06, "loss": 1.3866, "mean_token_accuracy": 0.6503596703211466, "num_tokens": 2778659002.0, "step": 16563 }, { "entropy": 1.6709474126497905, "epoch": 1.819642415753481, "grad_norm": 0.6519814729690552, "learning_rate": 2.381734071174416e-06, "loss": 1.4571, "mean_token_accuracy": 0.642798125743866, "num_tokens": 2778843826.0, "step": 16564 }, { "entropy": 1.6215501725673676, "epoch": 1.819752272664854, "grad_norm": 0.655518114566803, "learning_rate": 2.381272841322869e-06, "loss": 1.2602, "mean_token_accuracy": 0.675087700287501, "num_tokens": 2778996584.0, "step": 16565 }, { "entropy": 1.7112641334533691, "epoch": 1.819862129576227, "grad_norm": 0.653723418712616, "learning_rate": 2.380811884250152e-06, "loss": 1.3754, "mean_token_accuracy": 0.6574215839306513, "num_tokens": 2779187702.0, "step": 16566 }, { "entropy": 1.6549660762151082, "epoch": 1.8199719864875998, "grad_norm": 0.7489318251609802, "learning_rate": 2.3803511999708554e-06, "loss": 1.3494, "mean_token_accuracy": 0.6677233328421911, "num_tokens": 2779388320.0, "step": 16567 }, { "entropy": 1.6793318192164104, "epoch": 1.820081843398973, "grad_norm": 0.6962845921516418, "learning_rate": 2.3798907884995617e-06, "loss": 1.3043, "mean_token_accuracy": 0.6692550530036291, "num_tokens": 2779514019.0, "step": 16568 }, { "entropy": 1.6745788753032684, "epoch": 1.8201917003103458, "grad_norm": 0.6880436539649963, "learning_rate": 2.379430649850837e-06, "loss": 1.3641, "mean_token_accuracy": 0.6582282483577728, "num_tokens": 2779675915.0, "step": 16569 }, { "entropy": 1.7116785844167073, "epoch": 1.8203015572217187, "grad_norm": 0.8645946383476257, "learning_rate": 2.3789707840392484e-06, "loss": 1.3819, "mean_token_accuracy": 0.6639973024527231, "num_tokens": 2779823266.0, "step": 16570 }, { "entropy": 1.6840636630853016, "epoch": 1.8204114141330916, "grad_norm": 0.6108362078666687, "learning_rate": 2.378511191079351e-06, "loss": 1.4026, "mean_token_accuracy": 0.6641562829415003, "num_tokens": 2779985266.0, "step": 16571 }, { "entropy": 1.7049194872379303, "epoch": 1.8205212710444645, "grad_norm": 0.7385087013244629, "learning_rate": 2.378051870985689e-06, "loss": 1.4367, "mean_token_accuracy": 0.650545577208201, "num_tokens": 2780144229.0, "step": 16572 }, { "entropy": 1.614651362101237, "epoch": 1.8206311279558376, "grad_norm": 0.6099897027015686, "learning_rate": 2.3775928237727996e-06, "loss": 1.3483, "mean_token_accuracy": 0.6629302948713303, "num_tokens": 2780302810.0, "step": 16573 }, { "entropy": 1.6483580370744069, "epoch": 1.8207409848672105, "grad_norm": 0.7105104327201843, "learning_rate": 2.377134049455213e-06, "loss": 1.3006, "mean_token_accuracy": 0.664797286192576, "num_tokens": 2780474250.0, "step": 16574 }, { "entropy": 1.7492181360721588, "epoch": 1.8208508417785834, "grad_norm": 0.6788060665130615, "learning_rate": 2.3766755480474464e-06, "loss": 1.4855, "mean_token_accuracy": 0.6429063032070795, "num_tokens": 2780669549.0, "step": 16575 }, { "entropy": 1.6822342773278554, "epoch": 1.8209606986899565, "grad_norm": 0.636565089225769, "learning_rate": 2.3762173195640147e-06, "loss": 1.4087, "mean_token_accuracy": 0.6564022650321325, "num_tokens": 2780853162.0, "step": 16576 }, { "entropy": 1.772486279408137, "epoch": 1.8210705556013291, "grad_norm": 0.6843612194061279, "learning_rate": 2.375759364019419e-06, "loss": 1.4175, "mean_token_accuracy": 0.6469480246305466, "num_tokens": 2781056685.0, "step": 16577 }, { "entropy": 1.7506540218989055, "epoch": 1.8211804125127022, "grad_norm": 0.727794885635376, "learning_rate": 2.3753016814281514e-06, "loss": 1.435, "mean_token_accuracy": 0.6630039562781652, "num_tokens": 2781207737.0, "step": 16578 }, { "entropy": 1.7064573367436726, "epoch": 1.8212902694240751, "grad_norm": 0.7168003916740417, "learning_rate": 2.374844271804701e-06, "loss": 1.2831, "mean_token_accuracy": 0.6836849649747213, "num_tokens": 2781321392.0, "step": 16579 }, { "entropy": 1.7291381855805714, "epoch": 1.821400126335448, "grad_norm": 0.6939396858215332, "learning_rate": 2.3743871351635427e-06, "loss": 1.4408, "mean_token_accuracy": 0.6493770778179169, "num_tokens": 2781476898.0, "step": 16580 }, { "entropy": 1.7322145501772563, "epoch": 1.8215099832468211, "grad_norm": 0.6548502445220947, "learning_rate": 2.373930271519143e-06, "loss": 1.4634, "mean_token_accuracy": 0.6514165798823038, "num_tokens": 2781644750.0, "step": 16581 }, { "entropy": 1.7112524112065632, "epoch": 1.821619840158194, "grad_norm": 0.5996736884117126, "learning_rate": 2.373473680885964e-06, "loss": 1.364, "mean_token_accuracy": 0.6540916860103607, "num_tokens": 2781826533.0, "step": 16582 }, { "entropy": 1.7321696877479553, "epoch": 1.8217296970695669, "grad_norm": 0.7605938911437988, "learning_rate": 2.373017363278457e-06, "loss": 1.2532, "mean_token_accuracy": 0.676396057009697, "num_tokens": 2781947706.0, "step": 16583 }, { "entropy": 1.683882822593053, "epoch": 1.8218395539809398, "grad_norm": 0.8438287377357483, "learning_rate": 2.3725613187110626e-06, "loss": 1.3207, "mean_token_accuracy": 0.676286518573761, "num_tokens": 2782070912.0, "step": 16584 }, { "entropy": 1.6728091140588124, "epoch": 1.8219494108923127, "grad_norm": 0.7430797219276428, "learning_rate": 2.3721055471982138e-06, "loss": 1.2725, "mean_token_accuracy": 0.6717578570048014, "num_tokens": 2782206255.0, "step": 16585 }, { "entropy": 1.7225382626056671, "epoch": 1.8220592678036858, "grad_norm": 0.6596887707710266, "learning_rate": 2.3716500487543376e-06, "loss": 1.4749, "mean_token_accuracy": 0.6395098119974136, "num_tokens": 2782423591.0, "step": 16586 }, { "entropy": 1.7209839125474293, "epoch": 1.8221691247150587, "grad_norm": 0.7370496988296509, "learning_rate": 2.3711948233938485e-06, "loss": 1.451, "mean_token_accuracy": 0.6665770759185156, "num_tokens": 2782558304.0, "step": 16587 }, { "entropy": 1.6876719395319622, "epoch": 1.8222789816264315, "grad_norm": 0.7385010719299316, "learning_rate": 2.3707398711311553e-06, "loss": 1.2099, "mean_token_accuracy": 0.678598885734876, "num_tokens": 2782664534.0, "step": 16588 }, { "entropy": 1.7227604786554973, "epoch": 1.8223888385378046, "grad_norm": 0.7744255065917969, "learning_rate": 2.3702851919806576e-06, "loss": 1.3421, "mean_token_accuracy": 0.6641747852166494, "num_tokens": 2782797638.0, "step": 16589 }, { "entropy": 1.675682693719864, "epoch": 1.8224986954491773, "grad_norm": 0.6251360177993774, "learning_rate": 2.369830785956744e-06, "loss": 1.2506, "mean_token_accuracy": 0.6763549745082855, "num_tokens": 2782911180.0, "step": 16590 }, { "entropy": 1.769344409306844, "epoch": 1.8226085523605504, "grad_norm": 0.7733559012413025, "learning_rate": 2.3693766530737978e-06, "loss": 1.5038, "mean_token_accuracy": 0.648768370350202, "num_tokens": 2783094728.0, "step": 16591 }, { "entropy": 1.7493232587973278, "epoch": 1.8227184092719233, "grad_norm": 0.711170494556427, "learning_rate": 2.3689227933461916e-06, "loss": 1.3211, "mean_token_accuracy": 0.6690777093172073, "num_tokens": 2783258611.0, "step": 16592 }, { "entropy": 1.681398739417394, "epoch": 1.8228282661832962, "grad_norm": 0.7355263829231262, "learning_rate": 2.368469206788289e-06, "loss": 1.2677, "mean_token_accuracy": 0.6758220344781876, "num_tokens": 2783430258.0, "step": 16593 }, { "entropy": 1.6628807882467906, "epoch": 1.8229381230946693, "grad_norm": 0.7200619578361511, "learning_rate": 2.3680158934144456e-06, "loss": 1.5192, "mean_token_accuracy": 0.6433456887801489, "num_tokens": 2783632093.0, "step": 16594 }, { "entropy": 1.701430231332779, "epoch": 1.8230479800060422, "grad_norm": 0.677447497844696, "learning_rate": 2.3675628532390113e-06, "loss": 1.4706, "mean_token_accuracy": 0.6618293623129526, "num_tokens": 2783774737.0, "step": 16595 }, { "entropy": 1.6659322182337444, "epoch": 1.823157836917415, "grad_norm": 0.7491664886474609, "learning_rate": 2.3671100862763226e-06, "loss": 1.3961, "mean_token_accuracy": 0.6457837373018265, "num_tokens": 2783955761.0, "step": 16596 }, { "entropy": 1.6904160976409912, "epoch": 1.823267693828788, "grad_norm": 0.6810483932495117, "learning_rate": 2.3666575925407086e-06, "loss": 1.4507, "mean_token_accuracy": 0.6556557367245356, "num_tokens": 2784112844.0, "step": 16597 }, { "entropy": 1.7386436462402344, "epoch": 1.8233775507401608, "grad_norm": 0.8389174342155457, "learning_rate": 2.3662053720464927e-06, "loss": 1.4558, "mean_token_accuracy": 0.6538802261153857, "num_tokens": 2784240847.0, "step": 16598 }, { "entropy": 1.6878819664319356, "epoch": 1.823487407651534, "grad_norm": 0.7181552648544312, "learning_rate": 2.3657534248079855e-06, "loss": 1.3477, "mean_token_accuracy": 0.6808636685212454, "num_tokens": 2784351960.0, "step": 16599 }, { "entropy": 1.6631807684898376, "epoch": 1.8235972645629068, "grad_norm": 0.6783377528190613, "learning_rate": 2.3653017508394916e-06, "loss": 1.3382, "mean_token_accuracy": 0.6660791685183843, "num_tokens": 2784499447.0, "step": 16600 }, { "entropy": 1.6464990079402924, "epoch": 1.8237071214742797, "grad_norm": 0.7220808267593384, "learning_rate": 2.3648503501553083e-06, "loss": 1.4082, "mean_token_accuracy": 0.6685073425372442, "num_tokens": 2784662470.0, "step": 16601 }, { "entropy": 1.718644032875697, "epoch": 1.8238169783856528, "grad_norm": 0.6363226175308228, "learning_rate": 2.3643992227697176e-06, "loss": 1.4001, "mean_token_accuracy": 0.6684810817241669, "num_tokens": 2784824331.0, "step": 16602 }, { "entropy": 1.6482765078544617, "epoch": 1.8239268352970255, "grad_norm": 0.5895575284957886, "learning_rate": 2.363948368697002e-06, "loss": 1.4568, "mean_token_accuracy": 0.6472050746281942, "num_tokens": 2785067484.0, "step": 16603 }, { "entropy": 1.6042577922344208, "epoch": 1.8240366922083986, "grad_norm": 0.662174642086029, "learning_rate": 2.363497787951428e-06, "loss": 1.3404, "mean_token_accuracy": 0.6540089795986811, "num_tokens": 2785336972.0, "step": 16604 }, { "entropy": 1.659020572900772, "epoch": 1.8241465491197715, "grad_norm": 0.6183151006698608, "learning_rate": 2.363047480547258e-06, "loss": 1.3836, "mean_token_accuracy": 0.666933129231135, "num_tokens": 2785508777.0, "step": 16605 }, { "entropy": 1.6878956854343414, "epoch": 1.8242564060311444, "grad_norm": 0.7308575510978699, "learning_rate": 2.362597446498742e-06, "loss": 1.1711, "mean_token_accuracy": 0.690845270951589, "num_tokens": 2785628333.0, "step": 16606 }, { "entropy": 1.7506561974684398, "epoch": 1.8243662629425175, "grad_norm": 0.7305101752281189, "learning_rate": 2.362147685820126e-06, "loss": 1.413, "mean_token_accuracy": 0.6451665014028549, "num_tokens": 2785756364.0, "step": 16607 }, { "entropy": 1.7476352254549663, "epoch": 1.8244761198538904, "grad_norm": 0.6824808716773987, "learning_rate": 2.361698198525644e-06, "loss": 1.4284, "mean_token_accuracy": 0.6470771382252375, "num_tokens": 2785922197.0, "step": 16608 }, { "entropy": 1.734663297732671, "epoch": 1.8245859767652632, "grad_norm": 0.7013296484947205, "learning_rate": 2.36124898462952e-06, "loss": 1.3782, "mean_token_accuracy": 0.6541523436705271, "num_tokens": 2786105100.0, "step": 16609 }, { "entropy": 1.7278130650520325, "epoch": 1.8246958336766363, "grad_norm": 0.8316255211830139, "learning_rate": 2.3608000441459748e-06, "loss": 1.382, "mean_token_accuracy": 0.6508718381325403, "num_tokens": 2786257010.0, "step": 16610 }, { "entropy": 1.699917882680893, "epoch": 1.824805690588009, "grad_norm": 0.8237127661705017, "learning_rate": 2.3603513770892125e-06, "loss": 1.3808, "mean_token_accuracy": 0.6678240597248077, "num_tokens": 2786411966.0, "step": 16611 }, { "entropy": 1.7327560484409332, "epoch": 1.8249155474993821, "grad_norm": 0.8676771521568298, "learning_rate": 2.3599029834734393e-06, "loss": 1.4715, "mean_token_accuracy": 0.6531338741381963, "num_tokens": 2786547040.0, "step": 16612 }, { "entropy": 1.7098338504632313, "epoch": 1.825025404410755, "grad_norm": 0.714078962802887, "learning_rate": 2.3594548633128413e-06, "loss": 1.3284, "mean_token_accuracy": 0.6696013609568278, "num_tokens": 2786756645.0, "step": 16613 }, { "entropy": 1.6878060698509216, "epoch": 1.8251352613221279, "grad_norm": 0.6905450820922852, "learning_rate": 2.359007016621603e-06, "loss": 1.4931, "mean_token_accuracy": 0.6502345601717631, "num_tokens": 2786951298.0, "step": 16614 }, { "entropy": 1.702516903479894, "epoch": 1.825245118233501, "grad_norm": 0.7101051807403564, "learning_rate": 2.3585594434139002e-06, "loss": 1.4063, "mean_token_accuracy": 0.6508783797423044, "num_tokens": 2787110816.0, "step": 16615 }, { "entropy": 1.7455834746360779, "epoch": 1.8253549751448737, "grad_norm": 0.6841264963150024, "learning_rate": 2.3581121437038975e-06, "loss": 1.2673, "mean_token_accuracy": 0.6685113509496053, "num_tokens": 2787231743.0, "step": 16616 }, { "entropy": 1.7308682600657146, "epoch": 1.8254648320562468, "grad_norm": 0.6765478253364563, "learning_rate": 2.3576651175057493e-06, "loss": 1.3843, "mean_token_accuracy": 0.6586334705352783, "num_tokens": 2787401301.0, "step": 16617 }, { "entropy": 1.6286826531092327, "epoch": 1.8255746889676197, "grad_norm": 0.7048670053482056, "learning_rate": 2.3572183648336072e-06, "loss": 1.2665, "mean_token_accuracy": 0.6805399060249329, "num_tokens": 2787572843.0, "step": 16618 }, { "entropy": 1.6946585575739543, "epoch": 1.8256845458789925, "grad_norm": 0.6286748647689819, "learning_rate": 2.3567718857016084e-06, "loss": 1.372, "mean_token_accuracy": 0.6510303070147833, "num_tokens": 2787763549.0, "step": 16619 }, { "entropy": 1.7653050124645233, "epoch": 1.8257944027903656, "grad_norm": 0.7640430331230164, "learning_rate": 2.3563256801238855e-06, "loss": 1.471, "mean_token_accuracy": 0.6584224353233973, "num_tokens": 2787893159.0, "step": 16620 }, { "entropy": 1.626900275548299, "epoch": 1.8259042597017385, "grad_norm": 0.7232459187507629, "learning_rate": 2.35587974811456e-06, "loss": 1.4189, "mean_token_accuracy": 0.6637706806262335, "num_tokens": 2788076267.0, "step": 16621 }, { "entropy": 1.6736698547999065, "epoch": 1.8260141166131114, "grad_norm": 0.7517544627189636, "learning_rate": 2.3554340896877453e-06, "loss": 1.3742, "mean_token_accuracy": 0.6552935838699341, "num_tokens": 2788232255.0, "step": 16622 }, { "entropy": 1.698614478111267, "epoch": 1.8261239735244845, "grad_norm": 0.7546419501304626, "learning_rate": 2.3549887048575446e-06, "loss": 1.4902, "mean_token_accuracy": 0.6500131438175837, "num_tokens": 2788431091.0, "step": 16623 }, { "entropy": 1.6913301448027294, "epoch": 1.8262338304358572, "grad_norm": 0.7140382528305054, "learning_rate": 2.354543593638059e-06, "loss": 1.3081, "mean_token_accuracy": 0.668897733092308, "num_tokens": 2788587712.0, "step": 16624 }, { "entropy": 1.728107343117396, "epoch": 1.8263436873472303, "grad_norm": 0.6673551797866821, "learning_rate": 2.3540987560433704e-06, "loss": 1.4086, "mean_token_accuracy": 0.6558303982019424, "num_tokens": 2788760375.0, "step": 16625 }, { "entropy": 1.6470238665739696, "epoch": 1.8264535442586032, "grad_norm": 0.6046478152275085, "learning_rate": 2.353654192087561e-06, "loss": 1.3309, "mean_token_accuracy": 0.6752079874277115, "num_tokens": 2788954047.0, "step": 16626 }, { "entropy": 1.6849171618620555, "epoch": 1.826563401169976, "grad_norm": 0.762234628200531, "learning_rate": 2.3532099017847002e-06, "loss": 1.4178, "mean_token_accuracy": 0.6630821377038956, "num_tokens": 2789138869.0, "step": 16627 }, { "entropy": 1.7250126202901204, "epoch": 1.8266732580813492, "grad_norm": 0.6825308799743652, "learning_rate": 2.3527658851488503e-06, "loss": 1.3463, "mean_token_accuracy": 0.6651128977537155, "num_tokens": 2789289194.0, "step": 16628 }, { "entropy": 1.6536945700645447, "epoch": 1.8267831149927218, "grad_norm": 0.6264491081237793, "learning_rate": 2.3523221421940624e-06, "loss": 1.3165, "mean_token_accuracy": 0.6712521612644196, "num_tokens": 2789433752.0, "step": 16629 }, { "entropy": 1.6571077704429626, "epoch": 1.826892971904095, "grad_norm": 0.6818052530288696, "learning_rate": 2.351878672934383e-06, "loss": 1.5239, "mean_token_accuracy": 0.6425358305374781, "num_tokens": 2789598739.0, "step": 16630 }, { "entropy": 1.7497955461343129, "epoch": 1.8270028288154678, "grad_norm": 0.7354583144187927, "learning_rate": 2.351435477383846e-06, "loss": 1.5477, "mean_token_accuracy": 0.6368038604656855, "num_tokens": 2789772847.0, "step": 16631 }, { "entropy": 1.736647496620814, "epoch": 1.8271126857268407, "grad_norm": 0.8048020601272583, "learning_rate": 2.35099255555648e-06, "loss": 1.5508, "mean_token_accuracy": 0.6268570274114609, "num_tokens": 2789991318.0, "step": 16632 }, { "entropy": 1.6801136036713917, "epoch": 1.8272225426382138, "grad_norm": 0.6898782253265381, "learning_rate": 2.350549907466302e-06, "loss": 1.3481, "mean_token_accuracy": 0.66416896879673, "num_tokens": 2790159923.0, "step": 16633 }, { "entropy": 1.7110735873381298, "epoch": 1.8273323995495867, "grad_norm": 0.8061655163764954, "learning_rate": 2.3501075331273208e-06, "loss": 1.4615, "mean_token_accuracy": 0.6463738034168879, "num_tokens": 2790314777.0, "step": 16634 }, { "entropy": 1.7187353670597076, "epoch": 1.8274422564609596, "grad_norm": 0.625403642654419, "learning_rate": 2.349665432553538e-06, "loss": 1.2924, "mean_token_accuracy": 0.6705781618754069, "num_tokens": 2790451689.0, "step": 16635 }, { "entropy": 1.7233947416146596, "epoch": 1.8275521133723327, "grad_norm": 0.5538727045059204, "learning_rate": 2.3492236057589494e-06, "loss": 1.5311, "mean_token_accuracy": 0.623880739013354, "num_tokens": 2790690085.0, "step": 16636 }, { "entropy": 1.734166105588277, "epoch": 1.8276619702837054, "grad_norm": 0.7274359464645386, "learning_rate": 2.348782052757533e-06, "loss": 1.5441, "mean_token_accuracy": 0.6415307223796844, "num_tokens": 2790855735.0, "step": 16637 }, { "entropy": 1.7256428599357605, "epoch": 1.8277718271950785, "grad_norm": 0.6457618474960327, "learning_rate": 2.3483407735632668e-06, "loss": 1.4386, "mean_token_accuracy": 0.6566118150949478, "num_tokens": 2791039866.0, "step": 16638 }, { "entropy": 1.7583041091759999, "epoch": 1.8278816841064514, "grad_norm": 0.7741835713386536, "learning_rate": 2.347899768190117e-06, "loss": 1.4442, "mean_token_accuracy": 0.660509412487348, "num_tokens": 2791221988.0, "step": 16639 }, { "entropy": 1.7275878588358562, "epoch": 1.8279915410178242, "grad_norm": 0.7615863084793091, "learning_rate": 2.34745903665204e-06, "loss": 1.3726, "mean_token_accuracy": 0.6682560543219248, "num_tokens": 2791351713.0, "step": 16640 }, { "entropy": 1.6993821263313293, "epoch": 1.8281013979291973, "grad_norm": 0.6623696684837341, "learning_rate": 2.3470185789629854e-06, "loss": 1.4305, "mean_token_accuracy": 0.6371948470671972, "num_tokens": 2791551404.0, "step": 16641 }, { "entropy": 1.7266852855682373, "epoch": 1.82821125484057, "grad_norm": 0.6855489015579224, "learning_rate": 2.3465783951368955e-06, "loss": 1.2734, "mean_token_accuracy": 0.6709648966789246, "num_tokens": 2791694899.0, "step": 16642 }, { "entropy": 1.7204951246579487, "epoch": 1.8283211117519431, "grad_norm": 0.6789599657058716, "learning_rate": 2.3461384851876983e-06, "loss": 1.6562, "mean_token_accuracy": 0.6272246465086937, "num_tokens": 2791866300.0, "step": 16643 }, { "entropy": 1.7294853528340657, "epoch": 1.828430968663316, "grad_norm": 0.6738252639770508, "learning_rate": 2.3456988491293193e-06, "loss": 1.5462, "mean_token_accuracy": 0.6291048725446066, "num_tokens": 2792090459.0, "step": 16644 }, { "entropy": 1.7207268675168355, "epoch": 1.8285408255746889, "grad_norm": 0.7021991610527039, "learning_rate": 2.345259486975672e-06, "loss": 1.2478, "mean_token_accuracy": 0.6744669079780579, "num_tokens": 2792232045.0, "step": 16645 }, { "entropy": 1.7015381852785747, "epoch": 1.828650682486062, "grad_norm": 2.8002796173095703, "learning_rate": 2.3448203987406613e-06, "loss": 1.0468, "mean_token_accuracy": 0.6926949769258499, "num_tokens": 2792370336.0, "step": 16646 }, { "entropy": 1.718252569437027, "epoch": 1.8287605393974349, "grad_norm": 0.7113930583000183, "learning_rate": 2.3443815844381846e-06, "loss": 1.3618, "mean_token_accuracy": 0.6545801758766174, "num_tokens": 2792522904.0, "step": 16647 }, { "entropy": 1.6835759778817494, "epoch": 1.8288703963088078, "grad_norm": 0.7733089327812195, "learning_rate": 2.3439430440821325e-06, "loss": 1.5894, "mean_token_accuracy": 0.6576187337438265, "num_tokens": 2792680483.0, "step": 16648 }, { "entropy": 1.7124978800614674, "epoch": 1.8289802532201809, "grad_norm": 0.7034731507301331, "learning_rate": 2.343504777686381e-06, "loss": 1.3342, "mean_token_accuracy": 0.6707392732302347, "num_tokens": 2792825740.0, "step": 16649 }, { "entropy": 1.7099045515060425, "epoch": 1.8290901101315535, "grad_norm": 0.7132044434547424, "learning_rate": 2.3430667852648026e-06, "loss": 1.3485, "mean_token_accuracy": 0.6722139616807302, "num_tokens": 2792955789.0, "step": 16650 }, { "entropy": 1.679776022831599, "epoch": 1.8291999670429266, "grad_norm": 0.6770622730255127, "learning_rate": 2.3426290668312595e-06, "loss": 1.3703, "mean_token_accuracy": 0.6551995724439621, "num_tokens": 2793117672.0, "step": 16651 }, { "entropy": 1.6446966528892517, "epoch": 1.8293098239542995, "grad_norm": 0.6462422609329224, "learning_rate": 2.3421916223996065e-06, "loss": 1.3236, "mean_token_accuracy": 0.6686488538980484, "num_tokens": 2793272672.0, "step": 16652 }, { "entropy": 1.6652612388134003, "epoch": 1.8294196808656724, "grad_norm": 0.721100926399231, "learning_rate": 2.341754451983686e-06, "loss": 1.4143, "mean_token_accuracy": 0.6578306208054224, "num_tokens": 2793437356.0, "step": 16653 }, { "entropy": 1.728610356648763, "epoch": 1.8295295377770455, "grad_norm": 0.755320131778717, "learning_rate": 2.341317555597336e-06, "loss": 1.3919, "mean_token_accuracy": 0.6722359557946523, "num_tokens": 2793560530.0, "step": 16654 }, { "entropy": 1.7180915176868439, "epoch": 1.8296393946884182, "grad_norm": 0.8217064142227173, "learning_rate": 2.340880933254383e-06, "loss": 1.4459, "mean_token_accuracy": 0.6533014078934988, "num_tokens": 2793736230.0, "step": 16655 }, { "entropy": 1.7117689450581868, "epoch": 1.8297492515997913, "grad_norm": 0.6694772839546204, "learning_rate": 2.340444584968648e-06, "loss": 1.4054, "mean_token_accuracy": 0.6524570882320404, "num_tokens": 2793904553.0, "step": 16656 }, { "entropy": 1.6685727834701538, "epoch": 1.8298591085111642, "grad_norm": 0.6724652051925659, "learning_rate": 2.34000851075394e-06, "loss": 1.4027, "mean_token_accuracy": 0.6603127866983414, "num_tokens": 2794065707.0, "step": 16657 }, { "entropy": 1.6848424673080444, "epoch": 1.829968965422537, "grad_norm": 0.6383946537971497, "learning_rate": 2.339572710624059e-06, "loss": 1.3201, "mean_token_accuracy": 0.6705543498198191, "num_tokens": 2794199367.0, "step": 16658 }, { "entropy": 1.7302991648515065, "epoch": 1.8300788223339102, "grad_norm": 0.823840320110321, "learning_rate": 2.3391371845928e-06, "loss": 1.4138, "mean_token_accuracy": 0.6613588233788809, "num_tokens": 2794363556.0, "step": 16659 }, { "entropy": 1.740784724553426, "epoch": 1.830188679245283, "grad_norm": 0.7151713967323303, "learning_rate": 2.3387019326739455e-06, "loss": 1.3664, "mean_token_accuracy": 0.6722310036420822, "num_tokens": 2794476140.0, "step": 16660 }, { "entropy": 1.7195179959138234, "epoch": 1.830298536156656, "grad_norm": 0.7539914846420288, "learning_rate": 2.338266954881273e-06, "loss": 1.5308, "mean_token_accuracy": 0.6465074469645818, "num_tokens": 2794668990.0, "step": 16661 }, { "entropy": 1.7197861671447754, "epoch": 1.830408393068029, "grad_norm": 0.8375680446624756, "learning_rate": 2.337832251228547e-06, "loss": 1.5809, "mean_token_accuracy": 0.6555479913949966, "num_tokens": 2794844610.0, "step": 16662 }, { "entropy": 1.7217328945795696, "epoch": 1.8305182499794017, "grad_norm": 0.7076695561408997, "learning_rate": 2.3373978217295286e-06, "loss": 1.3478, "mean_token_accuracy": 0.659163624048233, "num_tokens": 2795012428.0, "step": 16663 }, { "entropy": 1.7476484874884288, "epoch": 1.8306281068907748, "grad_norm": 0.6806117296218872, "learning_rate": 2.336963666397965e-06, "loss": 1.6084, "mean_token_accuracy": 0.6421725749969482, "num_tokens": 2795174407.0, "step": 16664 }, { "entropy": 1.6913608014583588, "epoch": 1.8307379638021477, "grad_norm": 0.7132964134216309, "learning_rate": 2.336529785247597e-06, "loss": 1.4221, "mean_token_accuracy": 0.6525693833827972, "num_tokens": 2795305317.0, "step": 16665 }, { "entropy": 1.703669399023056, "epoch": 1.8308478207135206, "grad_norm": 0.7673993706703186, "learning_rate": 2.336096178292159e-06, "loss": 1.3788, "mean_token_accuracy": 0.6568796038627625, "num_tokens": 2795427951.0, "step": 16666 }, { "entropy": 1.6177269021670024, "epoch": 1.8309576776248937, "grad_norm": 0.6640709638595581, "learning_rate": 2.3356628455453704e-06, "loss": 1.2852, "mean_token_accuracy": 0.6807574729124705, "num_tokens": 2795558559.0, "step": 16667 }, { "entropy": 1.719151347875595, "epoch": 1.8310675345362664, "grad_norm": 0.7354775667190552, "learning_rate": 2.3352297870209508e-06, "loss": 1.3344, "mean_token_accuracy": 0.6731430192788442, "num_tokens": 2795681017.0, "step": 16668 }, { "entropy": 1.6955258548259735, "epoch": 1.8311773914476395, "grad_norm": 0.8023842573165894, "learning_rate": 2.3347970027326043e-06, "loss": 1.497, "mean_token_accuracy": 0.6416831761598587, "num_tokens": 2795871098.0, "step": 16669 }, { "entropy": 1.7174125413099925, "epoch": 1.8312872483590124, "grad_norm": 0.6527412533760071, "learning_rate": 2.3343644926940253e-06, "loss": 1.2843, "mean_token_accuracy": 0.6673098454872767, "num_tokens": 2796022135.0, "step": 16670 }, { "entropy": 1.6860974431037903, "epoch": 1.8313971052703852, "grad_norm": 0.6887062788009644, "learning_rate": 2.3339322569189074e-06, "loss": 1.4243, "mean_token_accuracy": 0.6650121112664541, "num_tokens": 2796181833.0, "step": 16671 }, { "entropy": 1.6747096180915833, "epoch": 1.8315069621817583, "grad_norm": 0.7698276042938232, "learning_rate": 2.3335002954209285e-06, "loss": 1.4173, "mean_token_accuracy": 0.6679667383432388, "num_tokens": 2796341629.0, "step": 16672 }, { "entropy": 1.705536663532257, "epoch": 1.8316168190931312, "grad_norm": 0.7042364478111267, "learning_rate": 2.33306860821376e-06, "loss": 1.5278, "mean_token_accuracy": 0.6453391214211782, "num_tokens": 2796497957.0, "step": 16673 }, { "entropy": 1.678510695695877, "epoch": 1.8317266760045041, "grad_norm": 0.6615474820137024, "learning_rate": 2.3326371953110642e-06, "loss": 1.3508, "mean_token_accuracy": 0.6713146766026815, "num_tokens": 2796665935.0, "step": 16674 }, { "entropy": 1.6524465282758076, "epoch": 1.8318365329158772, "grad_norm": 0.6595404744148254, "learning_rate": 2.332206056726495e-06, "loss": 1.3634, "mean_token_accuracy": 0.6558839529752731, "num_tokens": 2796839734.0, "step": 16675 }, { "entropy": 1.683104048172633, "epoch": 1.8319463898272499, "grad_norm": 0.59060138463974, "learning_rate": 2.3317751924736994e-06, "loss": 1.4722, "mean_token_accuracy": 0.6600749840339025, "num_tokens": 2797058425.0, "step": 16676 }, { "entropy": 1.690855731566747, "epoch": 1.832056246738623, "grad_norm": 0.7545903921127319, "learning_rate": 2.331344602566313e-06, "loss": 1.1887, "mean_token_accuracy": 0.6840762843688329, "num_tokens": 2797168514.0, "step": 16677 }, { "entropy": 1.7053393423557281, "epoch": 1.8321661036499959, "grad_norm": 0.6647628545761108, "learning_rate": 2.3309142870179624e-06, "loss": 1.2683, "mean_token_accuracy": 0.6724948883056641, "num_tokens": 2797306928.0, "step": 16678 }, { "entropy": 1.7144875427087147, "epoch": 1.8322759605613688, "grad_norm": 0.7609655857086182, "learning_rate": 2.3304842458422687e-06, "loss": 1.4754, "mean_token_accuracy": 0.6519733120997747, "num_tokens": 2797455985.0, "step": 16679 }, { "entropy": 1.695515791575114, "epoch": 1.8323858174727419, "grad_norm": 0.7155903577804565, "learning_rate": 2.330054479052844e-06, "loss": 1.3468, "mean_token_accuracy": 0.6665776371955872, "num_tokens": 2797601314.0, "step": 16680 }, { "entropy": 1.730222334464391, "epoch": 1.8324956743841145, "grad_norm": 0.6431559920310974, "learning_rate": 2.329624986663286e-06, "loss": 1.4597, "mean_token_accuracy": 0.652314489086469, "num_tokens": 2797755866.0, "step": 16681 }, { "entropy": 1.7097695469856262, "epoch": 1.8326055312954876, "grad_norm": 0.7711726427078247, "learning_rate": 2.3291957686871906e-06, "loss": 1.3307, "mean_token_accuracy": 0.6789915611346563, "num_tokens": 2797949501.0, "step": 16682 }, { "entropy": 1.7180574436982472, "epoch": 1.8327153882068605, "grad_norm": 0.629719614982605, "learning_rate": 2.3287668251381425e-06, "loss": 1.341, "mean_token_accuracy": 0.661995048324267, "num_tokens": 2798137574.0, "step": 16683 }, { "entropy": 1.663592944542567, "epoch": 1.8328252451182334, "grad_norm": 0.5827559232711792, "learning_rate": 2.3283381560297174e-06, "loss": 1.3726, "mean_token_accuracy": 0.6665849586327871, "num_tokens": 2798318692.0, "step": 16684 }, { "entropy": 1.7471620738506317, "epoch": 1.8329351020296065, "grad_norm": 0.6353728175163269, "learning_rate": 2.327909761375481e-06, "loss": 1.5038, "mean_token_accuracy": 0.6181689401467642, "num_tokens": 2798535717.0, "step": 16685 }, { "entropy": 1.7170325716336567, "epoch": 1.8330449589409794, "grad_norm": 0.5999660491943359, "learning_rate": 2.327481641188994e-06, "loss": 1.4572, "mean_token_accuracy": 0.6410307437181473, "num_tokens": 2798743059.0, "step": 16686 }, { "entropy": 1.6927921573321025, "epoch": 1.8331548158523523, "grad_norm": 0.767248272895813, "learning_rate": 2.327053795483804e-06, "loss": 1.3298, "mean_token_accuracy": 0.6670361459255219, "num_tokens": 2798877340.0, "step": 16687 }, { "entropy": 1.7161982754866283, "epoch": 1.8332646727637254, "grad_norm": 0.6494265198707581, "learning_rate": 2.3266262242734533e-06, "loss": 1.4735, "mean_token_accuracy": 0.6449993848800659, "num_tokens": 2799071422.0, "step": 16688 }, { "entropy": 1.6752700805664062, "epoch": 1.833374529675098, "grad_norm": 0.6607106924057007, "learning_rate": 2.326198927571476e-06, "loss": 1.4465, "mean_token_accuracy": 0.6546447724103928, "num_tokens": 2799256633.0, "step": 16689 }, { "entropy": 1.6198724607626598, "epoch": 1.8334843865864712, "grad_norm": 0.6523711085319519, "learning_rate": 2.3257719053913918e-06, "loss": 1.3877, "mean_token_accuracy": 0.6629767715930939, "num_tokens": 2799420133.0, "step": 16690 }, { "entropy": 1.65846848487854, "epoch": 1.833594243497844, "grad_norm": 0.7563357353210449, "learning_rate": 2.325345157746719e-06, "loss": 1.3276, "mean_token_accuracy": 0.6739385028680166, "num_tokens": 2799565448.0, "step": 16691 }, { "entropy": 1.715136726697286, "epoch": 1.833704100409217, "grad_norm": 0.9137521386146545, "learning_rate": 2.324918684650965e-06, "loss": 1.2287, "mean_token_accuracy": 0.6744515299797058, "num_tokens": 2799666308.0, "step": 16692 }, { "entropy": 1.7323183019955952, "epoch": 1.83381395732059, "grad_norm": 0.6691234111785889, "learning_rate": 2.324492486117623e-06, "loss": 1.4001, "mean_token_accuracy": 0.6475772460301717, "num_tokens": 2799802850.0, "step": 16693 }, { "entropy": 1.7045779128869374, "epoch": 1.8339238142319627, "grad_norm": 0.6672487854957581, "learning_rate": 2.3240665621601845e-06, "loss": 1.394, "mean_token_accuracy": 0.6490417867898941, "num_tokens": 2799931349.0, "step": 16694 }, { "entropy": 1.7110650738080342, "epoch": 1.8340336711433358, "grad_norm": 0.9822511672973633, "learning_rate": 2.323640912792131e-06, "loss": 1.4708, "mean_token_accuracy": 0.6601487100124359, "num_tokens": 2800082985.0, "step": 16695 }, { "entropy": 1.7707766592502594, "epoch": 1.8341435280547087, "grad_norm": 0.6212161779403687, "learning_rate": 2.3232155380269334e-06, "loss": 1.4198, "mean_token_accuracy": 0.6484910945097605, "num_tokens": 2800227500.0, "step": 16696 }, { "entropy": 1.7343395352363586, "epoch": 1.8342533849660816, "grad_norm": 0.6109079718589783, "learning_rate": 2.3227904378780525e-06, "loss": 1.4134, "mean_token_accuracy": 0.6515641411145529, "num_tokens": 2800407929.0, "step": 16697 }, { "entropy": 1.7123263776302338, "epoch": 1.8343632418774547, "grad_norm": 0.7430224418640137, "learning_rate": 2.3223656123589465e-06, "loss": 1.4596, "mean_token_accuracy": 0.6462646871805191, "num_tokens": 2800592020.0, "step": 16698 }, { "entropy": 1.760039468606313, "epoch": 1.8344730987888276, "grad_norm": 0.7846954464912415, "learning_rate": 2.3219410614830565e-06, "loss": 1.3439, "mean_token_accuracy": 0.6587338050206503, "num_tokens": 2800717471.0, "step": 16699 }, { "entropy": 1.71830815076828, "epoch": 1.8345829557002005, "grad_norm": 0.833976686000824, "learning_rate": 2.321516785263822e-06, "loss": 1.4485, "mean_token_accuracy": 0.6538856824239095, "num_tokens": 2800878599.0, "step": 16700 }, { "entropy": 1.734289248784383, "epoch": 1.8346928126115736, "grad_norm": 0.6943913698196411, "learning_rate": 2.321092783714671e-06, "loss": 1.3736, "mean_token_accuracy": 0.6654922415812811, "num_tokens": 2801021962.0, "step": 16701 }, { "entropy": 1.7423664331436157, "epoch": 1.8348026695229462, "grad_norm": 0.665240466594696, "learning_rate": 2.3206690568490227e-06, "loss": 1.3895, "mean_token_accuracy": 0.6504726807276408, "num_tokens": 2801170792.0, "step": 16702 }, { "entropy": 1.6692781150341034, "epoch": 1.8349125264343193, "grad_norm": 0.6680687069892883, "learning_rate": 2.320245604680287e-06, "loss": 1.4679, "mean_token_accuracy": 0.6471812377373377, "num_tokens": 2801333096.0, "step": 16703 }, { "entropy": 1.73446982105573, "epoch": 1.8350223833456922, "grad_norm": 0.6099308133125305, "learning_rate": 2.3198224272218688e-06, "loss": 1.4017, "mean_token_accuracy": 0.6566864202419916, "num_tokens": 2801528350.0, "step": 16704 }, { "entropy": 1.7558285593986511, "epoch": 1.8351322402570651, "grad_norm": 0.6862417459487915, "learning_rate": 2.3193995244871563e-06, "loss": 1.4117, "mean_token_accuracy": 0.6532981991767883, "num_tokens": 2801649689.0, "step": 16705 }, { "entropy": 1.668075293302536, "epoch": 1.8352420971684382, "grad_norm": 0.6663626432418823, "learning_rate": 2.318976896489539e-06, "loss": 1.2259, "mean_token_accuracy": 0.6728375951449076, "num_tokens": 2801778768.0, "step": 16706 }, { "entropy": 1.7122756640116374, "epoch": 1.8353519540798109, "grad_norm": 0.6838109493255615, "learning_rate": 2.3185545432423913e-06, "loss": 1.3666, "mean_token_accuracy": 0.6625057260195414, "num_tokens": 2801920311.0, "step": 16707 }, { "entropy": 1.6512116491794586, "epoch": 1.835461810991184, "grad_norm": 0.6728096008300781, "learning_rate": 2.31813246475908e-06, "loss": 1.4699, "mean_token_accuracy": 0.6534949143727621, "num_tokens": 2802092232.0, "step": 16708 }, { "entropy": 1.7649835646152496, "epoch": 1.8355716679025569, "grad_norm": 0.6703423261642456, "learning_rate": 2.3177106610529636e-06, "loss": 1.3734, "mean_token_accuracy": 0.670386994878451, "num_tokens": 2802257287.0, "step": 16709 }, { "entropy": 1.709249993165334, "epoch": 1.8356815248139298, "grad_norm": 0.6634789109230042, "learning_rate": 2.317289132137394e-06, "loss": 1.4009, "mean_token_accuracy": 0.6566235572099686, "num_tokens": 2802415941.0, "step": 16710 }, { "entropy": 1.6666079958279927, "epoch": 1.8357913817253029, "grad_norm": 0.7016635537147522, "learning_rate": 2.3168678780257087e-06, "loss": 1.2665, "mean_token_accuracy": 0.6808893928925196, "num_tokens": 2802520782.0, "step": 16711 }, { "entropy": 1.7787012954552968, "epoch": 1.8359012386366758, "grad_norm": 0.803626298904419, "learning_rate": 2.316446898731243e-06, "loss": 1.3076, "mean_token_accuracy": 0.669058566292127, "num_tokens": 2802665165.0, "step": 16712 }, { "entropy": 1.7142982184886932, "epoch": 1.8360110955480486, "grad_norm": 0.6213630437850952, "learning_rate": 2.3160261942673214e-06, "loss": 1.4598, "mean_token_accuracy": 0.6640812555948893, "num_tokens": 2802817063.0, "step": 16713 }, { "entropy": 1.6978369255860646, "epoch": 1.8361209524594218, "grad_norm": 0.7595458030700684, "learning_rate": 2.315605764647256e-06, "loss": 1.2793, "mean_token_accuracy": 0.6742851883172989, "num_tokens": 2802976665.0, "step": 16714 }, { "entropy": 1.7584485709667206, "epoch": 1.8362308093707944, "grad_norm": 0.6625379323959351, "learning_rate": 2.3151856098843546e-06, "loss": 1.3989, "mean_token_accuracy": 0.6467997978130976, "num_tokens": 2803145950.0, "step": 16715 }, { "entropy": 1.6833869119485219, "epoch": 1.8363406662821675, "grad_norm": 0.7335963249206543, "learning_rate": 2.314765729991918e-06, "loss": 1.3019, "mean_token_accuracy": 0.67206671833992, "num_tokens": 2803299408.0, "step": 16716 }, { "entropy": 1.693650444348653, "epoch": 1.8364505231935404, "grad_norm": 1.7734737396240234, "learning_rate": 2.31434612498323e-06, "loss": 1.2178, "mean_token_accuracy": 0.6751609444618225, "num_tokens": 2803465084.0, "step": 16717 }, { "entropy": 1.657290409008662, "epoch": 1.8365603801049133, "grad_norm": 0.6704737544059753, "learning_rate": 2.3139267948715727e-06, "loss": 1.2829, "mean_token_accuracy": 0.6748186101516088, "num_tokens": 2803606663.0, "step": 16718 }, { "entropy": 1.6640637814998627, "epoch": 1.8366702370162864, "grad_norm": 0.6008581519126892, "learning_rate": 2.3135077396702205e-06, "loss": 1.4499, "mean_token_accuracy": 0.6364815980195999, "num_tokens": 2803903340.0, "step": 16719 }, { "entropy": 1.7623928785324097, "epoch": 1.836780093927659, "grad_norm": 0.6992172598838806, "learning_rate": 2.313088959392434e-06, "loss": 1.4895, "mean_token_accuracy": 0.6527946243683497, "num_tokens": 2804062195.0, "step": 16720 }, { "entropy": 1.6591077148914337, "epoch": 1.8368899508390322, "grad_norm": 0.6158271431922913, "learning_rate": 2.312670454051466e-06, "loss": 1.5485, "mean_token_accuracy": 0.636052280664444, "num_tokens": 2804285283.0, "step": 16721 }, { "entropy": 1.7128118971983592, "epoch": 1.836999807750405, "grad_norm": 0.5718826055526733, "learning_rate": 2.3122522236605645e-06, "loss": 1.4816, "mean_token_accuracy": 0.639900396267573, "num_tokens": 2804515546.0, "step": 16722 }, { "entropy": 1.6636869013309479, "epoch": 1.837109664661778, "grad_norm": 0.6339669227600098, "learning_rate": 2.311834268232964e-06, "loss": 1.3845, "mean_token_accuracy": 0.661793996890386, "num_tokens": 2804720983.0, "step": 16723 }, { "entropy": 1.6897972722848256, "epoch": 1.837219521573151, "grad_norm": 0.6934084296226501, "learning_rate": 2.311416587781895e-06, "loss": 1.215, "mean_token_accuracy": 0.6849165956179301, "num_tokens": 2804878895.0, "step": 16724 }, { "entropy": 1.757619212071101, "epoch": 1.837329378484524, "grad_norm": 0.6026404500007629, "learning_rate": 2.3109991823205763e-06, "loss": 1.3753, "mean_token_accuracy": 0.6520447432994843, "num_tokens": 2805043646.0, "step": 16725 }, { "entropy": 1.7655748923619587, "epoch": 1.8374392353958968, "grad_norm": 0.6365966200828552, "learning_rate": 2.310582051862217e-06, "loss": 1.3717, "mean_token_accuracy": 0.6470306913057963, "num_tokens": 2805192125.0, "step": 16726 }, { "entropy": 1.7034188906351726, "epoch": 1.83754909230727, "grad_norm": 0.8369081020355225, "learning_rate": 2.310165196420021e-06, "loss": 1.2939, "mean_token_accuracy": 0.668337215979894, "num_tokens": 2805332008.0, "step": 16727 }, { "entropy": 1.7169418434302013, "epoch": 1.8376589492186426, "grad_norm": 0.707994818687439, "learning_rate": 2.309748616007181e-06, "loss": 1.6019, "mean_token_accuracy": 0.6377008507649103, "num_tokens": 2805520557.0, "step": 16728 }, { "entropy": 1.739993025859197, "epoch": 1.8377688061300157, "grad_norm": 0.7567148208618164, "learning_rate": 2.3093323106368804e-06, "loss": 1.219, "mean_token_accuracy": 0.6780005594094595, "num_tokens": 2805656356.0, "step": 16729 }, { "entropy": 1.7764423092206318, "epoch": 1.8378786630413886, "grad_norm": 0.7887062430381775, "learning_rate": 2.308916280322296e-06, "loss": 1.5156, "mean_token_accuracy": 0.6396622359752655, "num_tokens": 2805861144.0, "step": 16730 }, { "entropy": 1.6603560149669647, "epoch": 1.8379885199527615, "grad_norm": 0.6049760580062866, "learning_rate": 2.3085005250765965e-06, "loss": 1.3529, "mean_token_accuracy": 0.6593878070513407, "num_tokens": 2806012980.0, "step": 16731 }, { "entropy": 1.745975524187088, "epoch": 1.8380983768641346, "grad_norm": 0.694965124130249, "learning_rate": 2.3080850449129375e-06, "loss": 1.5094, "mean_token_accuracy": 0.6494153340657552, "num_tokens": 2806192613.0, "step": 16732 }, { "entropy": 1.638623684644699, "epoch": 1.8382082337755072, "grad_norm": 0.5887953639030457, "learning_rate": 2.3076698398444714e-06, "loss": 1.318, "mean_token_accuracy": 0.6599133412043253, "num_tokens": 2806347525.0, "step": 16733 }, { "entropy": 1.7663246889909108, "epoch": 1.8383180906868803, "grad_norm": 0.7165161967277527, "learning_rate": 2.307254909884337e-06, "loss": 1.4706, "mean_token_accuracy": 0.6425551424423853, "num_tokens": 2806567225.0, "step": 16734 }, { "entropy": 1.6672922571500142, "epoch": 1.8384279475982532, "grad_norm": 0.7588421106338501, "learning_rate": 2.3068402550456666e-06, "loss": 1.3066, "mean_token_accuracy": 0.6717980951070786, "num_tokens": 2806721250.0, "step": 16735 }, { "entropy": 1.6799332797527313, "epoch": 1.8385378045096261, "grad_norm": 0.6996718645095825, "learning_rate": 2.3064258753415876e-06, "loss": 1.427, "mean_token_accuracy": 0.6578944275776545, "num_tokens": 2806893990.0, "step": 16736 }, { "entropy": 1.7593200008074443, "epoch": 1.8386476614209992, "grad_norm": 0.7356519103050232, "learning_rate": 2.30601177078521e-06, "loss": 1.5172, "mean_token_accuracy": 0.6432800640662512, "num_tokens": 2807056845.0, "step": 16737 }, { "entropy": 1.770335892836253, "epoch": 1.838757518332372, "grad_norm": 0.7394158840179443, "learning_rate": 2.305597941389643e-06, "loss": 1.4034, "mean_token_accuracy": 0.650958850979805, "num_tokens": 2807199870.0, "step": 16738 }, { "entropy": 1.683393657207489, "epoch": 1.838867375243745, "grad_norm": 0.7186655402183533, "learning_rate": 2.305184387167984e-06, "loss": 1.4326, "mean_token_accuracy": 0.6676509827375412, "num_tokens": 2807428387.0, "step": 16739 }, { "entropy": 1.7485286990801494, "epoch": 1.838977232155118, "grad_norm": 0.6906415224075317, "learning_rate": 2.3047711081333206e-06, "loss": 1.2982, "mean_token_accuracy": 0.6647703647613525, "num_tokens": 2807578751.0, "step": 16740 }, { "entropy": 1.7130445539951324, "epoch": 1.8390870890664908, "grad_norm": 0.6014775037765503, "learning_rate": 2.304358104298733e-06, "loss": 1.5085, "mean_token_accuracy": 0.6496585061152776, "num_tokens": 2807789299.0, "step": 16741 }, { "entropy": 1.7314409911632538, "epoch": 1.8391969459778639, "grad_norm": 0.8532242178916931, "learning_rate": 2.3039453756772944e-06, "loss": 1.5082, "mean_token_accuracy": 0.6510532250006994, "num_tokens": 2807931287.0, "step": 16742 }, { "entropy": 1.6471184194087982, "epoch": 1.8393068028892368, "grad_norm": 0.7204332947731018, "learning_rate": 2.3035329222820648e-06, "loss": 1.3199, "mean_token_accuracy": 0.6603179921706518, "num_tokens": 2808087757.0, "step": 16743 }, { "entropy": 1.6706886788209279, "epoch": 1.8394166598006096, "grad_norm": 0.6390844583511353, "learning_rate": 2.3031207441261006e-06, "loss": 1.3195, "mean_token_accuracy": 0.6594639817873637, "num_tokens": 2808250689.0, "step": 16744 }, { "entropy": 1.7221011817455292, "epoch": 1.8395265167119828, "grad_norm": 0.7180662751197815, "learning_rate": 2.302708841222445e-06, "loss": 1.2913, "mean_token_accuracy": 0.6685677369435629, "num_tokens": 2808358557.0, "step": 16745 }, { "entropy": 1.6975955367088318, "epoch": 1.8396363736233554, "grad_norm": 0.8012198209762573, "learning_rate": 2.3022972135841354e-06, "loss": 1.5236, "mean_token_accuracy": 0.636689285437266, "num_tokens": 2808536243.0, "step": 16746 }, { "entropy": 1.70916286110878, "epoch": 1.8397462305347285, "grad_norm": 0.6637392044067383, "learning_rate": 2.3018858612241997e-06, "loss": 1.3551, "mean_token_accuracy": 0.6624687761068344, "num_tokens": 2808674739.0, "step": 16747 }, { "entropy": 1.7597693900267284, "epoch": 1.8398560874461014, "grad_norm": 0.9263545274734497, "learning_rate": 2.3014747841556583e-06, "loss": 1.4849, "mean_token_accuracy": 0.6398697346448898, "num_tokens": 2808878557.0, "step": 16748 }, { "entropy": 1.7256910403569539, "epoch": 1.8399659443574743, "grad_norm": 0.695277988910675, "learning_rate": 2.301063982391519e-06, "loss": 1.3442, "mean_token_accuracy": 0.673882856965065, "num_tokens": 2809033754.0, "step": 16749 }, { "entropy": 1.6844683488210042, "epoch": 1.8400758012688474, "grad_norm": 0.775905430316925, "learning_rate": 2.300653455944785e-06, "loss": 1.2839, "mean_token_accuracy": 0.6783890922864279, "num_tokens": 2809173907.0, "step": 16750 }, { "entropy": 1.660104662179947, "epoch": 1.8401856581802203, "grad_norm": 0.9453991055488586, "learning_rate": 2.3002432048284495e-06, "loss": 1.569, "mean_token_accuracy": 0.6625748674074808, "num_tokens": 2809371151.0, "step": 16751 }, { "entropy": 1.6786755224068959, "epoch": 1.8402955150915932, "grad_norm": 0.9610604643821716, "learning_rate": 2.299833229055497e-06, "loss": 1.0901, "mean_token_accuracy": 0.6992639452219009, "num_tokens": 2809557765.0, "step": 16752 }, { "entropy": 1.6895591119925182, "epoch": 1.8404053720029663, "grad_norm": 0.6605542898178101, "learning_rate": 2.2994235286389006e-06, "loss": 1.4095, "mean_token_accuracy": 0.6520673781633377, "num_tokens": 2809742936.0, "step": 16753 }, { "entropy": 1.63118776679039, "epoch": 1.840515228914339, "grad_norm": 0.5481594800949097, "learning_rate": 2.2990141035916304e-06, "loss": 1.4048, "mean_token_accuracy": 0.6418586075305939, "num_tokens": 2809960815.0, "step": 16754 }, { "entropy": 1.64922496676445, "epoch": 1.840625085825712, "grad_norm": 0.6355406641960144, "learning_rate": 2.298604953926642e-06, "loss": 1.4373, "mean_token_accuracy": 0.6513316084941229, "num_tokens": 2810165370.0, "step": 16755 }, { "entropy": 1.6678697963555653, "epoch": 1.840734942737085, "grad_norm": 0.7785813212394714, "learning_rate": 2.2981960796568873e-06, "loss": 1.5217, "mean_token_accuracy": 0.65113993982474, "num_tokens": 2810349269.0, "step": 16756 }, { "entropy": 1.7382381856441498, "epoch": 1.8408447996484578, "grad_norm": 0.8284785747528076, "learning_rate": 2.297787480795305e-06, "loss": 1.442, "mean_token_accuracy": 0.6547079781691233, "num_tokens": 2810533928.0, "step": 16757 }, { "entropy": 1.6844376226266224, "epoch": 1.840954656559831, "grad_norm": 0.7436577677726746, "learning_rate": 2.2973791573548267e-06, "loss": 1.3838, "mean_token_accuracy": 0.6586803744236628, "num_tokens": 2810689879.0, "step": 16758 }, { "entropy": 1.7238931755224864, "epoch": 1.8410645134712038, "grad_norm": 0.6770063042640686, "learning_rate": 2.2969711093483765e-06, "loss": 1.4644, "mean_token_accuracy": 0.6473502864440283, "num_tokens": 2810846252.0, "step": 16759 }, { "entropy": 1.776161293188731, "epoch": 1.8411743703825767, "grad_norm": 0.601456344127655, "learning_rate": 2.2965633367888716e-06, "loss": 1.3195, "mean_token_accuracy": 0.6596719473600388, "num_tokens": 2810987101.0, "step": 16760 }, { "entropy": 1.6843051811059315, "epoch": 1.8412842272939496, "grad_norm": 0.746356189250946, "learning_rate": 2.296155839689213e-06, "loss": 1.3483, "mean_token_accuracy": 0.6751666714747747, "num_tokens": 2811153094.0, "step": 16761 }, { "entropy": 1.7052331566810608, "epoch": 1.8413940842053225, "grad_norm": 0.6584969162940979, "learning_rate": 2.295748618062299e-06, "loss": 1.4244, "mean_token_accuracy": 0.6420090397198995, "num_tokens": 2811356100.0, "step": 16762 }, { "entropy": 1.6847927769025166, "epoch": 1.8415039411166956, "grad_norm": 0.6887544393539429, "learning_rate": 2.2953416719210216e-06, "loss": 1.3471, "mean_token_accuracy": 0.6726632316907247, "num_tokens": 2811473596.0, "step": 16763 }, { "entropy": 1.6632155577341716, "epoch": 1.8416137980280685, "grad_norm": 0.7095504999160767, "learning_rate": 2.2949350012782563e-06, "loss": 1.4566, "mean_token_accuracy": 0.6513134290774664, "num_tokens": 2811660773.0, "step": 16764 }, { "entropy": 1.7216549217700958, "epoch": 1.8417236549394413, "grad_norm": 0.7090489268302917, "learning_rate": 2.2945286061468764e-06, "loss": 1.4007, "mean_token_accuracy": 0.6596039036909739, "num_tokens": 2811832001.0, "step": 16765 }, { "entropy": 1.7071708242098491, "epoch": 1.8418335118508145, "grad_norm": 0.6916561126708984, "learning_rate": 2.2941224865397428e-06, "loss": 1.5654, "mean_token_accuracy": 0.630169411500295, "num_tokens": 2812111813.0, "step": 16766 }, { "entropy": 1.7071344057718914, "epoch": 1.8419433687621871, "grad_norm": 0.6458945274353027, "learning_rate": 2.293716642469709e-06, "loss": 1.3842, "mean_token_accuracy": 0.6630050440629324, "num_tokens": 2812271734.0, "step": 16767 }, { "entropy": 1.727453351020813, "epoch": 1.8420532256735602, "grad_norm": 0.7190876603126526, "learning_rate": 2.2933110739496217e-06, "loss": 1.4531, "mean_token_accuracy": 0.6521121064821879, "num_tokens": 2812433572.0, "step": 16768 }, { "entropy": 1.6626160542170207, "epoch": 1.842163082584933, "grad_norm": 0.8066210746765137, "learning_rate": 2.2929057809923155e-06, "loss": 1.4449, "mean_token_accuracy": 0.6541995108127594, "num_tokens": 2812644959.0, "step": 16769 }, { "entropy": 1.7399661739667256, "epoch": 1.842272939496306, "grad_norm": 0.6394560933113098, "learning_rate": 2.2925007636106167e-06, "loss": 1.3539, "mean_token_accuracy": 0.6547619154055914, "num_tokens": 2812803221.0, "step": 16770 }, { "entropy": 1.7124665677547455, "epoch": 1.842382796407679, "grad_norm": 0.8350101709365845, "learning_rate": 2.292096021817345e-06, "loss": 1.3946, "mean_token_accuracy": 0.6505677302678426, "num_tokens": 2812976208.0, "step": 16771 }, { "entropy": 1.7322071393330891, "epoch": 1.842492653319052, "grad_norm": 0.7939152121543884, "learning_rate": 2.2916915556253123e-06, "loss": 1.4203, "mean_token_accuracy": 0.661156415939331, "num_tokens": 2813157911.0, "step": 16772 }, { "entropy": 1.7652178903420765, "epoch": 1.8426025102304249, "grad_norm": 0.7062113285064697, "learning_rate": 2.291287365047316e-06, "loss": 1.5109, "mean_token_accuracy": 0.6431985199451447, "num_tokens": 2813344031.0, "step": 16773 }, { "entropy": 1.6703122456868489, "epoch": 1.8427123671417978, "grad_norm": 0.713137686252594, "learning_rate": 2.2908834500961504e-06, "loss": 1.2947, "mean_token_accuracy": 0.6722335070371628, "num_tokens": 2813509598.0, "step": 16774 }, { "entropy": 1.73709570368131, "epoch": 1.8428222240531706, "grad_norm": 0.9680473804473877, "learning_rate": 2.290479810784599e-06, "loss": 1.387, "mean_token_accuracy": 0.6628308445215225, "num_tokens": 2813646975.0, "step": 16775 }, { "entropy": 1.7047974566618602, "epoch": 1.8429320809645438, "grad_norm": 0.6554457545280457, "learning_rate": 2.2900764471254385e-06, "loss": 1.4557, "mean_token_accuracy": 0.6604510943094889, "num_tokens": 2813795279.0, "step": 16776 }, { "entropy": 1.6860364377498627, "epoch": 1.8430419378759166, "grad_norm": 0.6723480820655823, "learning_rate": 2.2896733591314315e-06, "loss": 1.244, "mean_token_accuracy": 0.6721046268939972, "num_tokens": 2813934809.0, "step": 16777 }, { "entropy": 1.6676070193449657, "epoch": 1.8431517947872895, "grad_norm": 0.7274590730667114, "learning_rate": 2.28927054681534e-06, "loss": 1.2252, "mean_token_accuracy": 0.6727160960435867, "num_tokens": 2814065654.0, "step": 16778 }, { "entropy": 1.6777145564556122, "epoch": 1.8432616516986626, "grad_norm": 0.7008301019668579, "learning_rate": 2.2888680101899086e-06, "loss": 1.2634, "mean_token_accuracy": 0.6827175964911779, "num_tokens": 2814237054.0, "step": 16779 }, { "entropy": 1.6733269294102986, "epoch": 1.8433715086100353, "grad_norm": 0.7075291872024536, "learning_rate": 2.28846574926788e-06, "loss": 1.2871, "mean_token_accuracy": 0.6724706093470255, "num_tokens": 2814372528.0, "step": 16780 }, { "entropy": 1.7397844890753429, "epoch": 1.8434813655214084, "grad_norm": 0.6934898495674133, "learning_rate": 2.288063764061986e-06, "loss": 1.491, "mean_token_accuracy": 0.6440728902816772, "num_tokens": 2814564424.0, "step": 16781 }, { "entropy": 1.6866810023784637, "epoch": 1.8435912224327813, "grad_norm": 0.6557011008262634, "learning_rate": 2.2876620545849465e-06, "loss": 1.3145, "mean_token_accuracy": 0.66270412504673, "num_tokens": 2814761649.0, "step": 16782 }, { "entropy": 1.6241275866826375, "epoch": 1.8437010793441542, "grad_norm": 0.6553396582603455, "learning_rate": 2.2872606208494775e-06, "loss": 1.4424, "mean_token_accuracy": 0.6536834836006165, "num_tokens": 2814932983.0, "step": 16783 }, { "entropy": 1.6947355270385742, "epoch": 1.8438109362555273, "grad_norm": 0.8572350740432739, "learning_rate": 2.286859462868286e-06, "loss": 1.3464, "mean_token_accuracy": 0.6532238374153773, "num_tokens": 2815067834.0, "step": 16784 }, { "entropy": 1.7147201299667358, "epoch": 1.8439207931669002, "grad_norm": 0.6992621421813965, "learning_rate": 2.2864585806540637e-06, "loss": 1.3477, "mean_token_accuracy": 0.6593698014815649, "num_tokens": 2815250511.0, "step": 16785 }, { "entropy": 1.7203228970368702, "epoch": 1.844030650078273, "grad_norm": 0.7004925012588501, "learning_rate": 2.2860579742195016e-06, "loss": 1.3027, "mean_token_accuracy": 0.6743641148010889, "num_tokens": 2815388655.0, "step": 16786 }, { "entropy": 1.6600177884101868, "epoch": 1.844140506989646, "grad_norm": 0.6689477562904358, "learning_rate": 2.285657643577278e-06, "loss": 1.1647, "mean_token_accuracy": 0.6987222582101822, "num_tokens": 2815523979.0, "step": 16787 }, { "entropy": 1.6286835670471191, "epoch": 1.8442503639010188, "grad_norm": 0.632027804851532, "learning_rate": 2.285257588740064e-06, "loss": 1.3428, "mean_token_accuracy": 0.6710825363794962, "num_tokens": 2815756939.0, "step": 16788 }, { "entropy": 1.6791391670703888, "epoch": 1.844360220812392, "grad_norm": 0.6494190692901611, "learning_rate": 2.2848578097205193e-06, "loss": 1.4686, "mean_token_accuracy": 0.6351282844940821, "num_tokens": 2815934346.0, "step": 16789 }, { "entropy": 1.6839772363503773, "epoch": 1.8444700777237648, "grad_norm": 0.8661864995956421, "learning_rate": 2.284458306531298e-06, "loss": 1.4426, "mean_token_accuracy": 0.6467615962028503, "num_tokens": 2816134609.0, "step": 16790 }, { "entropy": 1.7479057808717091, "epoch": 1.8445799346351377, "grad_norm": 0.9440947771072388, "learning_rate": 2.2840590791850434e-06, "loss": 1.4306, "mean_token_accuracy": 0.6492450833320618, "num_tokens": 2816298540.0, "step": 16791 }, { "entropy": 1.6491265694300334, "epoch": 1.8446897915465108, "grad_norm": 0.6750578284263611, "learning_rate": 2.2836601276943944e-06, "loss": 1.4913, "mean_token_accuracy": 0.6493054578701655, "num_tokens": 2816496291.0, "step": 16792 }, { "entropy": 1.7014791468779247, "epoch": 1.8447996484578835, "grad_norm": 0.7774354815483093, "learning_rate": 2.2832614520719713e-06, "loss": 1.2901, "mean_token_accuracy": 0.6726734042167664, "num_tokens": 2816634154.0, "step": 16793 }, { "entropy": 1.6888912518819172, "epoch": 1.8449095053692566, "grad_norm": 0.6581716537475586, "learning_rate": 2.2828630523303962e-06, "loss": 1.2948, "mean_token_accuracy": 0.668033296863238, "num_tokens": 2816767661.0, "step": 16794 }, { "entropy": 1.7331350843111675, "epoch": 1.8450193622806295, "grad_norm": 0.8059678673744202, "learning_rate": 2.2824649284822777e-06, "loss": 1.2899, "mean_token_accuracy": 0.6695135881503423, "num_tokens": 2816904457.0, "step": 16795 }, { "entropy": 1.7052109042803447, "epoch": 1.8451292191920023, "grad_norm": 0.7135511040687561, "learning_rate": 2.2820670805402166e-06, "loss": 1.3201, "mean_token_accuracy": 0.6706758240858713, "num_tokens": 2817068724.0, "step": 16796 }, { "entropy": 1.7488359014193218, "epoch": 1.8452390761033755, "grad_norm": 0.7513749599456787, "learning_rate": 2.281669508516803e-06, "loss": 1.4146, "mean_token_accuracy": 0.6454348017772039, "num_tokens": 2817186893.0, "step": 16797 }, { "entropy": 1.7428977489471436, "epoch": 1.8453489330147483, "grad_norm": 0.6585659980773926, "learning_rate": 2.281272212424622e-06, "loss": 1.5118, "mean_token_accuracy": 0.6480231831471125, "num_tokens": 2817380337.0, "step": 16798 }, { "entropy": 1.6482553680737813, "epoch": 1.8454587899261212, "grad_norm": 0.6863150000572205, "learning_rate": 2.280875192276245e-06, "loss": 1.2707, "mean_token_accuracy": 0.6809622297684351, "num_tokens": 2817523945.0, "step": 16799 }, { "entropy": 1.7246152857939403, "epoch": 1.845568646837494, "grad_norm": 0.6100006103515625, "learning_rate": 2.2804784480842414e-06, "loss": 1.4405, "mean_token_accuracy": 0.648542195558548, "num_tokens": 2817701592.0, "step": 16800 }, { "entropy": 1.7031661570072174, "epoch": 1.845678503748867, "grad_norm": 0.6806704998016357, "learning_rate": 2.2800819798611644e-06, "loss": 1.3778, "mean_token_accuracy": 0.6427052021026611, "num_tokens": 2817867695.0, "step": 16801 }, { "entropy": 1.7161981364091237, "epoch": 1.84578836066024, "grad_norm": 0.6332004070281982, "learning_rate": 2.2796857876195637e-06, "loss": 1.4339, "mean_token_accuracy": 0.6533434242010117, "num_tokens": 2818056193.0, "step": 16802 }, { "entropy": 1.7151016394297283, "epoch": 1.845898217571613, "grad_norm": 0.7988026142120361, "learning_rate": 2.279289871371977e-06, "loss": 1.3272, "mean_token_accuracy": 0.6582034826278687, "num_tokens": 2818196429.0, "step": 16803 }, { "entropy": 1.7060537834962208, "epoch": 1.8460080744829859, "grad_norm": 0.7432763576507568, "learning_rate": 2.2788942311309397e-06, "loss": 1.3024, "mean_token_accuracy": 0.6767023553450903, "num_tokens": 2818331053.0, "step": 16804 }, { "entropy": 1.683958222468694, "epoch": 1.846117931394359, "grad_norm": 0.6856158375740051, "learning_rate": 2.2784988669089674e-06, "loss": 1.5868, "mean_token_accuracy": 0.6441004474957784, "num_tokens": 2818554982.0, "step": 16805 }, { "entropy": 1.689920614163081, "epoch": 1.8462277883057316, "grad_norm": 0.6839845180511475, "learning_rate": 2.278103778718577e-06, "loss": 1.5445, "mean_token_accuracy": 0.6441525717576345, "num_tokens": 2818721341.0, "step": 16806 }, { "entropy": 1.6903660396734874, "epoch": 1.8463376452171048, "grad_norm": 0.6059070825576782, "learning_rate": 2.2777089665722706e-06, "loss": 1.3686, "mean_token_accuracy": 0.6590339243412018, "num_tokens": 2818914745.0, "step": 16807 }, { "entropy": 1.6847312947114308, "epoch": 1.8464475021284776, "grad_norm": 0.6773668527603149, "learning_rate": 2.2773144304825473e-06, "loss": 1.3906, "mean_token_accuracy": 0.6678819706042608, "num_tokens": 2819045859.0, "step": 16808 }, { "entropy": 1.7114491661389668, "epoch": 1.8465573590398505, "grad_norm": 0.6937119960784912, "learning_rate": 2.2769201704618895e-06, "loss": 1.3054, "mean_token_accuracy": 0.6675901015599569, "num_tokens": 2819189812.0, "step": 16809 }, { "entropy": 1.7245989938577015, "epoch": 1.8466672159512236, "grad_norm": 0.8096246719360352, "learning_rate": 2.2765261865227795e-06, "loss": 1.3121, "mean_token_accuracy": 0.661870464682579, "num_tokens": 2819297022.0, "step": 16810 }, { "entropy": 1.7037302354971569, "epoch": 1.8467770728625965, "grad_norm": 0.7414513230323792, "learning_rate": 2.2761324786776827e-06, "loss": 1.2294, "mean_token_accuracy": 0.6829250454902649, "num_tokens": 2819415838.0, "step": 16811 }, { "entropy": 1.7018751204013824, "epoch": 1.8468869297739694, "grad_norm": 0.6822280287742615, "learning_rate": 2.275739046939063e-06, "loss": 1.4365, "mean_token_accuracy": 0.6489651799201965, "num_tokens": 2819582184.0, "step": 16812 }, { "entropy": 1.6578473349412282, "epoch": 1.8469967866853425, "grad_norm": 0.7063673734664917, "learning_rate": 2.275345891319372e-06, "loss": 1.2741, "mean_token_accuracy": 0.6733155796925226, "num_tokens": 2819737015.0, "step": 16813 }, { "entropy": 1.7193239827950795, "epoch": 1.8471066435967152, "grad_norm": 0.6380773782730103, "learning_rate": 2.2749530118310504e-06, "loss": 1.4591, "mean_token_accuracy": 0.6509887427091599, "num_tokens": 2819935004.0, "step": 16814 }, { "entropy": 1.7366726497809093, "epoch": 1.8472165005080883, "grad_norm": 0.6270143985748291, "learning_rate": 2.274560408486535e-06, "loss": 1.5331, "mean_token_accuracy": 0.6440207809209824, "num_tokens": 2820141194.0, "step": 16815 }, { "entropy": 1.6627205908298492, "epoch": 1.8473263574194612, "grad_norm": 0.6391332149505615, "learning_rate": 2.2741680812982525e-06, "loss": 1.3179, "mean_token_accuracy": 0.6662083069483439, "num_tokens": 2820333411.0, "step": 16816 }, { "entropy": 1.6978925466537476, "epoch": 1.847436214330834, "grad_norm": 0.6449623703956604, "learning_rate": 2.2737760302786165e-06, "loss": 1.383, "mean_token_accuracy": 0.6534488449494044, "num_tokens": 2820497197.0, "step": 16817 }, { "entropy": 1.6958427727222443, "epoch": 1.8475460712422072, "grad_norm": 0.7582001686096191, "learning_rate": 2.273384255440037e-06, "loss": 1.2255, "mean_token_accuracy": 0.6779115696748098, "num_tokens": 2820615677.0, "step": 16818 }, { "entropy": 1.668468713760376, "epoch": 1.8476559281535798, "grad_norm": 0.7352595925331116, "learning_rate": 2.2729927567949147e-06, "loss": 1.2167, "mean_token_accuracy": 0.6819255699714025, "num_tokens": 2820735125.0, "step": 16819 }, { "entropy": 1.7504223088423412, "epoch": 1.847765785064953, "grad_norm": 0.653083086013794, "learning_rate": 2.272601534355638e-06, "loss": 1.478, "mean_token_accuracy": 0.640269880493482, "num_tokens": 2820924373.0, "step": 16820 }, { "entropy": 1.744905153910319, "epoch": 1.8478756419763258, "grad_norm": 0.808557391166687, "learning_rate": 2.27221058813459e-06, "loss": 1.3103, "mean_token_accuracy": 0.6747282495101293, "num_tokens": 2821111140.0, "step": 16821 }, { "entropy": 1.6883414487044017, "epoch": 1.8479854988876987, "grad_norm": 0.8405027985572815, "learning_rate": 2.271819918144145e-06, "loss": 1.3422, "mean_token_accuracy": 0.6721090773741404, "num_tokens": 2821233106.0, "step": 16822 }, { "entropy": 1.6892358760039012, "epoch": 1.8480953557990718, "grad_norm": 0.5664523243904114, "learning_rate": 2.2714295243966663e-06, "loss": 1.4374, "mean_token_accuracy": 0.6468595862388611, "num_tokens": 2821409664.0, "step": 16823 }, { "entropy": 1.7041309575239818, "epoch": 1.8482052127104447, "grad_norm": 0.7229970097541809, "learning_rate": 2.2710394069045096e-06, "loss": 1.4368, "mean_token_accuracy": 0.6511443008979162, "num_tokens": 2821617059.0, "step": 16824 }, { "entropy": 1.7157021065553029, "epoch": 1.8483150696218176, "grad_norm": 0.6895220279693604, "learning_rate": 2.270649565680023e-06, "loss": 1.5049, "mean_token_accuracy": 0.6378757754961649, "num_tokens": 2821809999.0, "step": 16825 }, { "entropy": 1.6815617382526398, "epoch": 1.8484249265331907, "grad_norm": 0.6916029453277588, "learning_rate": 2.270260000735543e-06, "loss": 1.4192, "mean_token_accuracy": 0.6669404208660126, "num_tokens": 2821991768.0, "step": 16826 }, { "entropy": 1.7499388257662456, "epoch": 1.8485347834445633, "grad_norm": 0.6223210096359253, "learning_rate": 2.2698707120834e-06, "loss": 1.374, "mean_token_accuracy": 0.6528652707735697, "num_tokens": 2822148133.0, "step": 16827 }, { "entropy": 1.6805897454420726, "epoch": 1.8486446403559365, "grad_norm": 2.376575231552124, "learning_rate": 2.269481699735918e-06, "loss": 1.1966, "mean_token_accuracy": 0.6810894310474396, "num_tokens": 2822351860.0, "step": 16828 }, { "entropy": 1.6760378777980804, "epoch": 1.8487544972673093, "grad_norm": 0.6722053289413452, "learning_rate": 2.269092963705404e-06, "loss": 1.3251, "mean_token_accuracy": 0.6669818659623464, "num_tokens": 2822522055.0, "step": 16829 }, { "entropy": 1.6548854509989421, "epoch": 1.8488643541786822, "grad_norm": 0.6542387008666992, "learning_rate": 2.2687045040041625e-06, "loss": 1.2904, "mean_token_accuracy": 0.6669500768184662, "num_tokens": 2822682578.0, "step": 16830 }, { "entropy": 1.6649717092514038, "epoch": 1.8489742110900553, "grad_norm": 0.69137042760849, "learning_rate": 2.2683163206444903e-06, "loss": 1.3382, "mean_token_accuracy": 0.6678586552540461, "num_tokens": 2822822417.0, "step": 16831 }, { "entropy": 1.6161488095919292, "epoch": 1.849084068001428, "grad_norm": 0.7891423106193542, "learning_rate": 2.2679284136386717e-06, "loss": 1.4021, "mean_token_accuracy": 0.6629961331685384, "num_tokens": 2823043372.0, "step": 16832 }, { "entropy": 1.6671480735143025, "epoch": 1.849193924912801, "grad_norm": 0.5742250680923462, "learning_rate": 2.267540782998984e-06, "loss": 1.4551, "mean_token_accuracy": 0.6415112614631653, "num_tokens": 2823234593.0, "step": 16833 }, { "entropy": 1.7150403559207916, "epoch": 1.849303781824174, "grad_norm": 0.6328002214431763, "learning_rate": 2.2671534287376955e-06, "loss": 1.3687, "mean_token_accuracy": 0.6589196075995764, "num_tokens": 2823412259.0, "step": 16834 }, { "entropy": 1.6628845036029816, "epoch": 1.8494136387355469, "grad_norm": 0.6902245879173279, "learning_rate": 2.2667663508670654e-06, "loss": 1.3141, "mean_token_accuracy": 0.6782469848791758, "num_tokens": 2823563936.0, "step": 16835 }, { "entropy": 1.778301070133845, "epoch": 1.84952349564692, "grad_norm": 0.6464490294456482, "learning_rate": 2.266379549399346e-06, "loss": 1.4777, "mean_token_accuracy": 0.6514505942662557, "num_tokens": 2823785146.0, "step": 16836 }, { "entropy": 1.6853400766849518, "epoch": 1.8496333525582929, "grad_norm": 0.7404756546020508, "learning_rate": 2.265993024346779e-06, "loss": 1.3938, "mean_token_accuracy": 0.6646452844142914, "num_tokens": 2823948844.0, "step": 16837 }, { "entropy": 1.7575759092966716, "epoch": 1.8497432094696658, "grad_norm": 0.9101560115814209, "learning_rate": 2.2656067757215955e-06, "loss": 1.6004, "mean_token_accuracy": 0.6494082659482956, "num_tokens": 2824102594.0, "step": 16838 }, { "entropy": 1.705398013194402, "epoch": 1.8498530663810389, "grad_norm": 0.691576361656189, "learning_rate": 2.2652208035360216e-06, "loss": 1.5896, "mean_token_accuracy": 0.6335019121567408, "num_tokens": 2824300458.0, "step": 16839 }, { "entropy": 1.7217795650164287, "epoch": 1.8499629232924115, "grad_norm": 0.6099857091903687, "learning_rate": 2.2648351078022756e-06, "loss": 1.3593, "mean_token_accuracy": 0.6655921290318171, "num_tokens": 2824478345.0, "step": 16840 }, { "entropy": 1.6977481245994568, "epoch": 1.8500727802037846, "grad_norm": 0.7489005327224731, "learning_rate": 2.2644496885325602e-06, "loss": 1.3109, "mean_token_accuracy": 0.6732942511638006, "num_tokens": 2824614208.0, "step": 16841 }, { "entropy": 1.6951703131198883, "epoch": 1.8501826371151575, "grad_norm": 0.6727724671363831, "learning_rate": 2.2640645457390757e-06, "loss": 1.3008, "mean_token_accuracy": 0.6589942077795664, "num_tokens": 2824761514.0, "step": 16842 }, { "entropy": 1.6436572670936584, "epoch": 1.8502924940265304, "grad_norm": 0.6955944895744324, "learning_rate": 2.2636796794340134e-06, "loss": 1.2896, "mean_token_accuracy": 0.673475960890452, "num_tokens": 2824926126.0, "step": 16843 }, { "entropy": 1.7151092290878296, "epoch": 1.8504023509379035, "grad_norm": 0.6162389516830444, "learning_rate": 2.2632950896295524e-06, "loss": 1.3972, "mean_token_accuracy": 0.6557242920001348, "num_tokens": 2825105477.0, "step": 16844 }, { "entropy": 1.7381452520688374, "epoch": 1.8505122078492762, "grad_norm": 0.5753760933876038, "learning_rate": 2.262910776337863e-06, "loss": 1.4604, "mean_token_accuracy": 0.6351420283317566, "num_tokens": 2825303396.0, "step": 16845 }, { "entropy": 1.693215678135554, "epoch": 1.8506220647606493, "grad_norm": 0.8320888876914978, "learning_rate": 2.2625267395711124e-06, "loss": 1.3931, "mean_token_accuracy": 0.6597619901100794, "num_tokens": 2825511715.0, "step": 16846 }, { "entropy": 1.772448907295863, "epoch": 1.8507319216720222, "grad_norm": 0.6900238990783691, "learning_rate": 2.2621429793414513e-06, "loss": 1.2598, "mean_token_accuracy": 0.6730435639619827, "num_tokens": 2825660163.0, "step": 16847 }, { "entropy": 1.7407717903455098, "epoch": 1.850841778583395, "grad_norm": 0.6229955554008484, "learning_rate": 2.26175949566103e-06, "loss": 1.3992, "mean_token_accuracy": 0.6651196181774139, "num_tokens": 2825812055.0, "step": 16848 }, { "entropy": 1.6535049378871918, "epoch": 1.8509516354947682, "grad_norm": 0.8824671506881714, "learning_rate": 2.261376288541982e-06, "loss": 1.471, "mean_token_accuracy": 0.6661679844061533, "num_tokens": 2825974645.0, "step": 16849 }, { "entropy": 1.7141542633374531, "epoch": 1.851061492406141, "grad_norm": 0.7158608436584473, "learning_rate": 2.2609933579964364e-06, "loss": 1.3092, "mean_token_accuracy": 0.6757365266482035, "num_tokens": 2826109915.0, "step": 16850 }, { "entropy": 1.684940109650294, "epoch": 1.851171349317514, "grad_norm": 0.7266584634780884, "learning_rate": 2.260610704036514e-06, "loss": 1.4043, "mean_token_accuracy": 0.6563446720441183, "num_tokens": 2826276894.0, "step": 16851 }, { "entropy": 1.6518448789914448, "epoch": 1.851281206228887, "grad_norm": 0.8699021935462952, "learning_rate": 2.2602283266743242e-06, "loss": 1.235, "mean_token_accuracy": 0.6798707942167918, "num_tokens": 2826413789.0, "step": 16852 }, { "entropy": 1.7219206094741821, "epoch": 1.8513910631402597, "grad_norm": 0.6562429070472717, "learning_rate": 2.25984622592197e-06, "loss": 1.3031, "mean_token_accuracy": 0.675565222899119, "num_tokens": 2826543479.0, "step": 16853 }, { "entropy": 1.6323307752609253, "epoch": 1.8515009200516328, "grad_norm": 0.6604292988777161, "learning_rate": 2.259464401791544e-06, "loss": 1.3841, "mean_token_accuracy": 0.6637335220972697, "num_tokens": 2826681051.0, "step": 16854 }, { "entropy": 1.7547399997711182, "epoch": 1.8516107769630057, "grad_norm": 0.6812036633491516, "learning_rate": 2.25908285429513e-06, "loss": 1.5095, "mean_token_accuracy": 0.6496999114751816, "num_tokens": 2826877453.0, "step": 16855 }, { "entropy": 1.756416380405426, "epoch": 1.8517206338743786, "grad_norm": 0.5992780327796936, "learning_rate": 2.2587015834448066e-06, "loss": 1.3727, "mean_token_accuracy": 0.6700999438762665, "num_tokens": 2827036356.0, "step": 16856 }, { "entropy": 1.7250816226005554, "epoch": 1.8518304907857517, "grad_norm": 0.6736693978309631, "learning_rate": 2.2583205892526395e-06, "loss": 1.3946, "mean_token_accuracy": 0.6623944640159607, "num_tokens": 2827218750.0, "step": 16857 }, { "entropy": 1.746427297592163, "epoch": 1.8519403476971243, "grad_norm": 0.5987364053726196, "learning_rate": 2.2579398717306853e-06, "loss": 1.4286, "mean_token_accuracy": 0.6408430685599645, "num_tokens": 2827429105.0, "step": 16858 }, { "entropy": 1.7429214417934418, "epoch": 1.8520502046084975, "grad_norm": 0.6848793625831604, "learning_rate": 2.257559430890994e-06, "loss": 1.4197, "mean_token_accuracy": 0.6512637386719385, "num_tokens": 2827607457.0, "step": 16859 }, { "entropy": 1.6617528994878132, "epoch": 1.8521600615198703, "grad_norm": 0.6407644748687744, "learning_rate": 2.25717926674561e-06, "loss": 1.2838, "mean_token_accuracy": 0.6738065630197525, "num_tokens": 2827740068.0, "step": 16860 }, { "entropy": 1.6476930975914001, "epoch": 1.8522699184312432, "grad_norm": 0.587350070476532, "learning_rate": 2.2567993793065612e-06, "loss": 1.5079, "mean_token_accuracy": 0.6454335004091263, "num_tokens": 2827970706.0, "step": 16861 }, { "entropy": 1.6667810281117756, "epoch": 1.8523797753426163, "grad_norm": 0.6816261410713196, "learning_rate": 2.2564197685858718e-06, "loss": 1.3844, "mean_token_accuracy": 0.6544611503680547, "num_tokens": 2828133482.0, "step": 16862 }, { "entropy": 1.7026315033435822, "epoch": 1.8524896322539892, "grad_norm": 0.7092069387435913, "learning_rate": 2.2560404345955573e-06, "loss": 1.421, "mean_token_accuracy": 0.6365664452314377, "num_tokens": 2828374569.0, "step": 16863 }, { "entropy": 1.7290400266647339, "epoch": 1.852599489165362, "grad_norm": 0.6336418986320496, "learning_rate": 2.2556613773476234e-06, "loss": 1.3831, "mean_token_accuracy": 0.6517727623383204, "num_tokens": 2828523922.0, "step": 16864 }, { "entropy": 1.7043097118536632, "epoch": 1.8527093460767352, "grad_norm": 0.7059993147850037, "learning_rate": 2.255282596854065e-06, "loss": 1.3474, "mean_token_accuracy": 0.6678043107191721, "num_tokens": 2828669505.0, "step": 16865 }, { "entropy": 1.7145764529705048, "epoch": 1.8528192029881079, "grad_norm": 0.7578869462013245, "learning_rate": 2.254904093126874e-06, "loss": 1.4085, "mean_token_accuracy": 0.6547698179880778, "num_tokens": 2828791241.0, "step": 16866 }, { "entropy": 1.762321561574936, "epoch": 1.852929059899481, "grad_norm": 0.7735615372657776, "learning_rate": 2.2545258661780266e-06, "loss": 1.5863, "mean_token_accuracy": 0.6380794048309326, "num_tokens": 2828975921.0, "step": 16867 }, { "entropy": 1.69214462240537, "epoch": 1.8530389168108539, "grad_norm": 0.7045040130615234, "learning_rate": 2.2541479160194944e-06, "loss": 1.4513, "mean_token_accuracy": 0.6603608429431915, "num_tokens": 2829140958.0, "step": 16868 }, { "entropy": 1.662009169658025, "epoch": 1.8531487737222267, "grad_norm": 0.8396393060684204, "learning_rate": 2.2537702426632405e-06, "loss": 1.229, "mean_token_accuracy": 0.6779507348934809, "num_tokens": 2829246179.0, "step": 16869 }, { "entropy": 1.7280895511309307, "epoch": 1.8532586306335999, "grad_norm": 0.7660303115844727, "learning_rate": 2.2533928461212163e-06, "loss": 1.3047, "mean_token_accuracy": 0.6635698924462, "num_tokens": 2829424054.0, "step": 16870 }, { "entropy": 1.6450142761071522, "epoch": 1.8533684875449725, "grad_norm": 0.6026352047920227, "learning_rate": 2.2530157264053683e-06, "loss": 1.4684, "mean_token_accuracy": 0.6385354151328405, "num_tokens": 2829608694.0, "step": 16871 }, { "entropy": 1.7429430484771729, "epoch": 1.8534783444563456, "grad_norm": 0.7299525141716003, "learning_rate": 2.252638883527631e-06, "loss": 1.4044, "mean_token_accuracy": 0.6550277421871821, "num_tokens": 2829770875.0, "step": 16872 }, { "entropy": 1.7349806527296703, "epoch": 1.8535882013677185, "grad_norm": 0.7500406503677368, "learning_rate": 2.252262317499931e-06, "loss": 1.3102, "mean_token_accuracy": 0.6578699747721354, "num_tokens": 2829947693.0, "step": 16873 }, { "entropy": 1.7390219668547313, "epoch": 1.8536980582790914, "grad_norm": 0.7751812934875488, "learning_rate": 2.2518860283341864e-06, "loss": 1.5918, "mean_token_accuracy": 0.6272272417942683, "num_tokens": 2830153181.0, "step": 16874 }, { "entropy": 1.7201407651106517, "epoch": 1.8538079151904645, "grad_norm": 0.7094139456748962, "learning_rate": 2.251510016042308e-06, "loss": 1.5539, "mean_token_accuracy": 0.6450007905562719, "num_tokens": 2830334049.0, "step": 16875 }, { "entropy": 1.736316164334615, "epoch": 1.8539177721018374, "grad_norm": 0.6322393417358398, "learning_rate": 2.251134280636195e-06, "loss": 1.2949, "mean_token_accuracy": 0.675666610399882, "num_tokens": 2830485303.0, "step": 16876 }, { "entropy": 1.729094882806142, "epoch": 1.8540276290132103, "grad_norm": 0.7388508319854736, "learning_rate": 2.25075882212774e-06, "loss": 1.383, "mean_token_accuracy": 0.6609533528486887, "num_tokens": 2830635382.0, "step": 16877 }, { "entropy": 1.682877242565155, "epoch": 1.8541374859245834, "grad_norm": 0.6518582105636597, "learning_rate": 2.2503836405288256e-06, "loss": 1.4053, "mean_token_accuracy": 0.6476947963237762, "num_tokens": 2830843164.0, "step": 16878 }, { "entropy": 1.6826065182685852, "epoch": 1.854247342835956, "grad_norm": 0.6691297888755798, "learning_rate": 2.250008735851325e-06, "loss": 1.3046, "mean_token_accuracy": 0.6775392790635427, "num_tokens": 2830999778.0, "step": 16879 }, { "entropy": 1.661271055539449, "epoch": 1.8543571997473292, "grad_norm": 0.7119371294975281, "learning_rate": 2.2496341081071066e-06, "loss": 1.3536, "mean_token_accuracy": 0.657180925210317, "num_tokens": 2831151781.0, "step": 16880 }, { "entropy": 1.6969236334164937, "epoch": 1.854467056658702, "grad_norm": 0.6304360628128052, "learning_rate": 2.249259757308026e-06, "loss": 1.4231, "mean_token_accuracy": 0.6411556551853815, "num_tokens": 2831342014.0, "step": 16881 }, { "entropy": 1.7519350747267406, "epoch": 1.854576913570075, "grad_norm": 0.7681441307067871, "learning_rate": 2.248885683465929e-06, "loss": 1.2912, "mean_token_accuracy": 0.6645476470390955, "num_tokens": 2831444174.0, "step": 16882 }, { "entropy": 1.7026408016681671, "epoch": 1.854686770481448, "grad_norm": 0.6976202726364136, "learning_rate": 2.248511886592658e-06, "loss": 1.3042, "mean_token_accuracy": 0.6687952727079391, "num_tokens": 2831566636.0, "step": 16883 }, { "entropy": 1.6760593354701996, "epoch": 1.8547966273928207, "grad_norm": 0.6563632488250732, "learning_rate": 2.248138366700043e-06, "loss": 1.4318, "mean_token_accuracy": 0.6755139579375585, "num_tokens": 2831726930.0, "step": 16884 }, { "entropy": 1.724344511826833, "epoch": 1.8549064843041938, "grad_norm": 0.7031118869781494, "learning_rate": 2.247765123799904e-06, "loss": 1.3251, "mean_token_accuracy": 0.6661837746699651, "num_tokens": 2831896095.0, "step": 16885 }, { "entropy": 1.727848341067632, "epoch": 1.8550163412155667, "grad_norm": 0.5700660943984985, "learning_rate": 2.247392157904055e-06, "loss": 1.4622, "mean_token_accuracy": 0.642038439710935, "num_tokens": 2832131149.0, "step": 16886 }, { "entropy": 1.6446122825145721, "epoch": 1.8551261981269396, "grad_norm": 0.6341161727905273, "learning_rate": 2.2470194690243006e-06, "loss": 1.3984, "mean_token_accuracy": 0.6593250582615534, "num_tokens": 2832307378.0, "step": 16887 }, { "entropy": 1.6699632306893666, "epoch": 1.8552360550383127, "grad_norm": 0.5614050626754761, "learning_rate": 2.2466470571724357e-06, "loss": 1.3048, "mean_token_accuracy": 0.6609620600938797, "num_tokens": 2832519981.0, "step": 16888 }, { "entropy": 1.6194656590620677, "epoch": 1.8553459119496856, "grad_norm": 0.5817786455154419, "learning_rate": 2.2462749223602464e-06, "loss": 1.4775, "mean_token_accuracy": 0.6533914605776469, "num_tokens": 2832712716.0, "step": 16889 }, { "entropy": 1.7114621301492055, "epoch": 1.8554557688610585, "grad_norm": 0.7644500136375427, "learning_rate": 2.2459030645995118e-06, "loss": 1.3141, "mean_token_accuracy": 0.661896139383316, "num_tokens": 2832835841.0, "step": 16890 }, { "entropy": 1.661112666130066, "epoch": 1.8555656257724316, "grad_norm": 0.8860819935798645, "learning_rate": 2.245531483902e-06, "loss": 1.1732, "mean_token_accuracy": 0.688821072379748, "num_tokens": 2832959778.0, "step": 16891 }, { "entropy": 1.6903029382228851, "epoch": 1.8556754826838042, "grad_norm": 0.6567397713661194, "learning_rate": 2.245160180279473e-06, "loss": 1.4008, "mean_token_accuracy": 0.6515764991442362, "num_tokens": 2833141986.0, "step": 16892 }, { "entropy": 1.7396831810474396, "epoch": 1.8557853395951773, "grad_norm": 0.6399162411689758, "learning_rate": 2.244789153743681e-06, "loss": 1.5221, "mean_token_accuracy": 0.6352566480636597, "num_tokens": 2833383901.0, "step": 16893 }, { "entropy": 1.6990015904108684, "epoch": 1.8558951965065502, "grad_norm": 0.6782881617546082, "learning_rate": 2.2444184043063666e-06, "loss": 1.3644, "mean_token_accuracy": 0.6583975255489349, "num_tokens": 2833543551.0, "step": 16894 }, { "entropy": 1.798978457848231, "epoch": 1.856005053417923, "grad_norm": 0.7053307294845581, "learning_rate": 2.2440479319792636e-06, "loss": 1.4234, "mean_token_accuracy": 0.6484651267528534, "num_tokens": 2833660767.0, "step": 16895 }, { "entropy": 1.6739828785260518, "epoch": 1.8561149103292962, "grad_norm": 0.741791844367981, "learning_rate": 2.2436777367741004e-06, "loss": 1.3366, "mean_token_accuracy": 0.6574702759583791, "num_tokens": 2833816561.0, "step": 16896 }, { "entropy": 1.7440234621365864, "epoch": 1.8562247672406689, "grad_norm": 0.7840536832809448, "learning_rate": 2.2433078187025897e-06, "loss": 1.3036, "mean_token_accuracy": 0.6595564881960551, "num_tokens": 2833970261.0, "step": 16897 }, { "entropy": 1.7112967669963837, "epoch": 1.856334624152042, "grad_norm": 0.6875016093254089, "learning_rate": 2.24293817777644e-06, "loss": 1.5245, "mean_token_accuracy": 0.655097077290217, "num_tokens": 2834143895.0, "step": 16898 }, { "entropy": 1.7350221673647563, "epoch": 1.8564444810634149, "grad_norm": 0.6510404944419861, "learning_rate": 2.2425688140073515e-06, "loss": 1.3634, "mean_token_accuracy": 0.6634075343608856, "num_tokens": 2834282750.0, "step": 16899 }, { "entropy": 1.7747245331605275, "epoch": 1.8565543379747877, "grad_norm": 0.7304637432098389, "learning_rate": 2.2421997274070153e-06, "loss": 1.3861, "mean_token_accuracy": 0.6545447160800298, "num_tokens": 2834400058.0, "step": 16900 }, { "entropy": 1.6580698291460674, "epoch": 1.8566641948861609, "grad_norm": 0.6355282664299011, "learning_rate": 2.2418309179871094e-06, "loss": 1.3236, "mean_token_accuracy": 0.6701503843069077, "num_tokens": 2834567340.0, "step": 16901 }, { "entropy": 1.7195066312948863, "epoch": 1.8567740517975337, "grad_norm": 0.6729101538658142, "learning_rate": 2.2414623857593086e-06, "loss": 1.329, "mean_token_accuracy": 0.6680616289377213, "num_tokens": 2834701276.0, "step": 16902 }, { "entropy": 1.7093308369318645, "epoch": 1.8568839087089066, "grad_norm": 0.735205352306366, "learning_rate": 2.241094130735277e-06, "loss": 1.5407, "mean_token_accuracy": 0.6620696832736334, "num_tokens": 2834860563.0, "step": 16903 }, { "entropy": 1.6724826991558075, "epoch": 1.8569937656202797, "grad_norm": 0.6093858480453491, "learning_rate": 2.2407261529266697e-06, "loss": 1.4459, "mean_token_accuracy": 0.6603935311237971, "num_tokens": 2835024875.0, "step": 16904 }, { "entropy": 1.7351752022902172, "epoch": 1.8571036225316524, "grad_norm": 0.716964840888977, "learning_rate": 2.240358452345133e-06, "loss": 1.4218, "mean_token_accuracy": 0.6472053527832031, "num_tokens": 2835160577.0, "step": 16905 }, { "entropy": 1.6534366210301716, "epoch": 1.8572134794430255, "grad_norm": 0.6837955713272095, "learning_rate": 2.2399910290023024e-06, "loss": 1.3871, "mean_token_accuracy": 0.6537323395411173, "num_tokens": 2835300716.0, "step": 16906 }, { "entropy": 1.6657463312149048, "epoch": 1.8573233363543984, "grad_norm": 0.6619056463241577, "learning_rate": 2.2396238829098092e-06, "loss": 1.3247, "mean_token_accuracy": 0.6633240481217703, "num_tokens": 2835477286.0, "step": 16907 }, { "entropy": 1.7382064660390217, "epoch": 1.8574331932657713, "grad_norm": 0.9045251607894897, "learning_rate": 2.2392570140792743e-06, "loss": 1.4923, "mean_token_accuracy": 0.6527342349290848, "num_tokens": 2835621855.0, "step": 16908 }, { "entropy": 1.746301809946696, "epoch": 1.8575430501771444, "grad_norm": 0.8234473466873169, "learning_rate": 2.2388904225223047e-06, "loss": 1.4924, "mean_token_accuracy": 0.6496127992868423, "num_tokens": 2835796980.0, "step": 16909 }, { "entropy": 1.6744904418786366, "epoch": 1.857652907088517, "grad_norm": 0.6349613070487976, "learning_rate": 2.2385241082505062e-06, "loss": 1.3036, "mean_token_accuracy": 0.6642439812421799, "num_tokens": 2835991748.0, "step": 16910 }, { "entropy": 1.7444684902826946, "epoch": 1.8577627639998902, "grad_norm": 0.6656630635261536, "learning_rate": 2.2381580712754717e-06, "loss": 1.4149, "mean_token_accuracy": 0.6564318190018336, "num_tokens": 2836177838.0, "step": 16911 }, { "entropy": 1.7117125988006592, "epoch": 1.857872620911263, "grad_norm": 0.6614863276481628, "learning_rate": 2.237792311608787e-06, "loss": 1.5543, "mean_token_accuracy": 0.650190144777298, "num_tokens": 2836380864.0, "step": 16912 }, { "entropy": 1.70883509516716, "epoch": 1.857982477822636, "grad_norm": 0.6626452207565308, "learning_rate": 2.237426829262027e-06, "loss": 1.3597, "mean_token_accuracy": 0.6688442379236221, "num_tokens": 2836523172.0, "step": 16913 }, { "entropy": 1.7223861614863079, "epoch": 1.858092334734009, "grad_norm": 1.0268768072128296, "learning_rate": 2.237061624246758e-06, "loss": 1.3974, "mean_token_accuracy": 0.6556852708260218, "num_tokens": 2836659552.0, "step": 16914 }, { "entropy": 1.6183397471904755, "epoch": 1.858202191645382, "grad_norm": 0.6055456399917603, "learning_rate": 2.2366966965745403e-06, "loss": 1.3816, "mean_token_accuracy": 0.6664853493372599, "num_tokens": 2836851444.0, "step": 16915 }, { "entropy": 1.723154256741206, "epoch": 1.8583120485567548, "grad_norm": 0.6002776622772217, "learning_rate": 2.236332046256924e-06, "loss": 1.3431, "mean_token_accuracy": 0.6692459831635157, "num_tokens": 2837003681.0, "step": 16916 }, { "entropy": 1.7682878176371257, "epoch": 1.858421905468128, "grad_norm": 0.6165063977241516, "learning_rate": 2.2359676733054496e-06, "loss": 1.4432, "mean_token_accuracy": 0.6513163695732752, "num_tokens": 2837206049.0, "step": 16917 }, { "entropy": 1.745583325624466, "epoch": 1.8585317623795006, "grad_norm": 0.5849365592002869, "learning_rate": 2.235603577731648e-06, "loss": 1.5011, "mean_token_accuracy": 0.6388779282569885, "num_tokens": 2837413970.0, "step": 16918 }, { "entropy": 1.7166366577148438, "epoch": 1.8586416192908737, "grad_norm": 0.6849484443664551, "learning_rate": 2.2352397595470453e-06, "loss": 1.4588, "mean_token_accuracy": 0.6550758282343546, "num_tokens": 2837575965.0, "step": 16919 }, { "entropy": 1.6985012590885162, "epoch": 1.8587514762022466, "grad_norm": 0.6950430274009705, "learning_rate": 2.2348762187631537e-06, "loss": 1.3575, "mean_token_accuracy": 0.6614127407471339, "num_tokens": 2837715810.0, "step": 16920 }, { "entropy": 1.6506640315055847, "epoch": 1.8588613331136195, "grad_norm": 0.7237517237663269, "learning_rate": 2.2345129553914805e-06, "loss": 1.3152, "mean_token_accuracy": 0.66397192577521, "num_tokens": 2837839931.0, "step": 16921 }, { "entropy": 1.7041799624760945, "epoch": 1.8589711900249926, "grad_norm": 0.7405112981796265, "learning_rate": 2.234149969443522e-06, "loss": 1.5072, "mean_token_accuracy": 0.6494031300147375, "num_tokens": 2838000703.0, "step": 16922 }, { "entropy": 1.6308053533236186, "epoch": 1.8590810469363652, "grad_norm": 0.6319633722305298, "learning_rate": 2.2337872609307677e-06, "loss": 1.3862, "mean_token_accuracy": 0.6686498373746872, "num_tokens": 2838140080.0, "step": 16923 }, { "entropy": 1.7466503183046977, "epoch": 1.8591909038477383, "grad_norm": 0.6635008454322815, "learning_rate": 2.233424829864696e-06, "loss": 1.3945, "mean_token_accuracy": 0.649022842446963, "num_tokens": 2838274430.0, "step": 16924 }, { "entropy": 1.7209342022736867, "epoch": 1.8593007607591112, "grad_norm": 0.7904077172279358, "learning_rate": 2.2330626762567784e-06, "loss": 1.553, "mean_token_accuracy": 0.6467588543891907, "num_tokens": 2838492658.0, "step": 16925 }, { "entropy": 1.6345500747362773, "epoch": 1.859410617670484, "grad_norm": 0.7646093964576721, "learning_rate": 2.2327008001184764e-06, "loss": 1.5033, "mean_token_accuracy": 0.643667072057724, "num_tokens": 2838691595.0, "step": 16926 }, { "entropy": 1.692623883485794, "epoch": 1.8595204745818572, "grad_norm": 0.7697778940200806, "learning_rate": 2.2323392014612425e-06, "loss": 1.3278, "mean_token_accuracy": 0.6717701901992162, "num_tokens": 2838804890.0, "step": 16927 }, { "entropy": 1.676644762357076, "epoch": 1.85963033149323, "grad_norm": 0.6009611487388611, "learning_rate": 2.2319778802965244e-06, "loss": 1.463, "mean_token_accuracy": 0.6408818513154984, "num_tokens": 2839016654.0, "step": 16928 }, { "entropy": 1.719579428434372, "epoch": 1.859740188404603, "grad_norm": 0.6772399544715881, "learning_rate": 2.2316168366357533e-06, "loss": 1.4172, "mean_token_accuracy": 0.6630838066339493, "num_tokens": 2839199453.0, "step": 16929 }, { "entropy": 1.680406113465627, "epoch": 1.859850045315976, "grad_norm": 0.7438538074493408, "learning_rate": 2.2312560704903586e-06, "loss": 1.4936, "mean_token_accuracy": 0.6705125272274017, "num_tokens": 2839374638.0, "step": 16930 }, { "entropy": 1.6635006268819172, "epoch": 1.8599599022273487, "grad_norm": 0.6747929453849792, "learning_rate": 2.230895581871759e-06, "loss": 1.302, "mean_token_accuracy": 0.6731831183036169, "num_tokens": 2839525698.0, "step": 16931 }, { "entropy": 1.7203009327252705, "epoch": 1.8600697591387219, "grad_norm": 0.6208789348602295, "learning_rate": 2.2305353707913624e-06, "loss": 1.5573, "mean_token_accuracy": 0.6403620640436808, "num_tokens": 2839743378.0, "step": 16932 }, { "entropy": 1.7072515587011974, "epoch": 1.8601796160500947, "grad_norm": 0.6848557591438293, "learning_rate": 2.230175437260569e-06, "loss": 1.289, "mean_token_accuracy": 0.6748117307821909, "num_tokens": 2839860106.0, "step": 16933 }, { "entropy": 1.6722529927889507, "epoch": 1.8602894729614676, "grad_norm": 0.7470364570617676, "learning_rate": 2.229815781290772e-06, "loss": 1.3631, "mean_token_accuracy": 0.6619627922773361, "num_tokens": 2840008850.0, "step": 16934 }, { "entropy": 1.7064866026242573, "epoch": 1.8603993298728407, "grad_norm": 0.692008376121521, "learning_rate": 2.229456402893352e-06, "loss": 1.3543, "mean_token_accuracy": 0.6580140540997187, "num_tokens": 2840145150.0, "step": 16935 }, { "entropy": 1.6983463366826375, "epoch": 1.8605091867842134, "grad_norm": 0.6857158541679382, "learning_rate": 2.2290973020796873e-06, "loss": 1.3891, "mean_token_accuracy": 0.6641733994086584, "num_tokens": 2840296513.0, "step": 16936 }, { "entropy": 1.7579893469810486, "epoch": 1.8606190436955865, "grad_norm": 0.5688817501068115, "learning_rate": 2.228738478861139e-06, "loss": 1.4527, "mean_token_accuracy": 0.631959984699885, "num_tokens": 2840489377.0, "step": 16937 }, { "entropy": 1.7483516136805217, "epoch": 1.8607289006069594, "grad_norm": 0.6688814163208008, "learning_rate": 2.228379933249066e-06, "loss": 1.4025, "mean_token_accuracy": 0.651861771941185, "num_tokens": 2840638757.0, "step": 16938 }, { "entropy": 1.745933045943578, "epoch": 1.8608387575183323, "grad_norm": 0.6227981448173523, "learning_rate": 2.2280216652548144e-06, "loss": 1.3729, "mean_token_accuracy": 0.6608076989650726, "num_tokens": 2840812693.0, "step": 16939 }, { "entropy": 1.6749466558297474, "epoch": 1.8609486144297054, "grad_norm": 0.6650595664978027, "learning_rate": 2.2276636748897264e-06, "loss": 1.4298, "mean_token_accuracy": 0.6458970904350281, "num_tokens": 2840956606.0, "step": 16940 }, { "entropy": 1.7333874702453613, "epoch": 1.8610584713410783, "grad_norm": 0.6113640069961548, "learning_rate": 2.227305962165129e-06, "loss": 1.4095, "mean_token_accuracy": 0.6591263363758723, "num_tokens": 2841175897.0, "step": 16941 }, { "entropy": 1.7647278308868408, "epoch": 1.8611683282524512, "grad_norm": 0.9023246765136719, "learning_rate": 2.2269485270923446e-06, "loss": 1.3244, "mean_token_accuracy": 0.6695207307736079, "num_tokens": 2841295330.0, "step": 16942 }, { "entropy": 1.7428401311238606, "epoch": 1.8612781851638243, "grad_norm": 0.6869433522224426, "learning_rate": 2.2265913696826865e-06, "loss": 1.3092, "mean_token_accuracy": 0.6689073791106542, "num_tokens": 2841438192.0, "step": 16943 }, { "entropy": 1.7334860563278198, "epoch": 1.861388042075197, "grad_norm": 0.6899425983428955, "learning_rate": 2.2262344899474585e-06, "loss": 1.3344, "mean_token_accuracy": 0.6681728015343348, "num_tokens": 2841567347.0, "step": 16944 }, { "entropy": 1.7800839046637218, "epoch": 1.86149789898657, "grad_norm": 0.744773805141449, "learning_rate": 2.225877887897954e-06, "loss": 1.4977, "mean_token_accuracy": 0.6642592052618662, "num_tokens": 2841725960.0, "step": 16945 }, { "entropy": 1.6366903285185497, "epoch": 1.861607755897943, "grad_norm": 0.6069456934928894, "learning_rate": 2.2255215635454618e-06, "loss": 1.401, "mean_token_accuracy": 0.6623266190290451, "num_tokens": 2841915773.0, "step": 16946 }, { "entropy": 1.6906556288401287, "epoch": 1.8617176128093158, "grad_norm": 0.5900170803070068, "learning_rate": 2.225165516901257e-06, "loss": 1.5347, "mean_token_accuracy": 0.636528434852759, "num_tokens": 2842126533.0, "step": 16947 }, { "entropy": 1.7210322221120198, "epoch": 1.861827469720689, "grad_norm": 0.6937727928161621, "learning_rate": 2.2248097479766114e-06, "loss": 1.4849, "mean_token_accuracy": 0.651384433110555, "num_tokens": 2842268373.0, "step": 16948 }, { "entropy": 1.6970987915992737, "epoch": 1.8619373266320616, "grad_norm": 0.8207261562347412, "learning_rate": 2.224454256782783e-06, "loss": 1.4079, "mean_token_accuracy": 0.6684905638297399, "num_tokens": 2842422883.0, "step": 16949 }, { "entropy": 1.7192309498786926, "epoch": 1.8620471835434347, "grad_norm": 0.7105966806411743, "learning_rate": 2.2240990433310218e-06, "loss": 1.3975, "mean_token_accuracy": 0.6547664652268091, "num_tokens": 2842586220.0, "step": 16950 }, { "entropy": 1.714976857105891, "epoch": 1.8621570404548076, "grad_norm": 0.6246412396430969, "learning_rate": 2.2237441076325714e-06, "loss": 1.4338, "mean_token_accuracy": 0.652628536025683, "num_tokens": 2842777503.0, "step": 16951 }, { "entropy": 1.6915887892246246, "epoch": 1.8622668973661805, "grad_norm": 0.6842101216316223, "learning_rate": 2.223389449698666e-06, "loss": 1.3436, "mean_token_accuracy": 0.6701055020093918, "num_tokens": 2842904821.0, "step": 16952 }, { "entropy": 1.6738732159137726, "epoch": 1.8623767542775536, "grad_norm": 0.7973695397377014, "learning_rate": 2.2230350695405288e-06, "loss": 1.3581, "mean_token_accuracy": 0.6813636471827825, "num_tokens": 2843019612.0, "step": 16953 }, { "entropy": 1.682248741388321, "epoch": 1.8624866111889264, "grad_norm": 0.6853655576705933, "learning_rate": 2.222680967169377e-06, "loss": 1.3193, "mean_token_accuracy": 0.6603673497835795, "num_tokens": 2843174426.0, "step": 16954 }, { "entropy": 1.7681555946667988, "epoch": 1.8625964681002993, "grad_norm": 0.7105289101600647, "learning_rate": 2.2223271425964182e-06, "loss": 1.3293, "mean_token_accuracy": 0.67288438975811, "num_tokens": 2843305957.0, "step": 16955 }, { "entropy": 1.7183941106001537, "epoch": 1.8627063250116724, "grad_norm": 0.7996242642402649, "learning_rate": 2.22197359583285e-06, "loss": 1.53, "mean_token_accuracy": 0.6441441575686137, "num_tokens": 2843524423.0, "step": 16956 }, { "entropy": 1.7308327456315358, "epoch": 1.862816181923045, "grad_norm": 0.7495532035827637, "learning_rate": 2.2216203268898605e-06, "loss": 1.3608, "mean_token_accuracy": 0.6570025732119879, "num_tokens": 2843657802.0, "step": 16957 }, { "entropy": 1.6802193820476532, "epoch": 1.8629260388344182, "grad_norm": 0.7395232915878296, "learning_rate": 2.2212673357786333e-06, "loss": 1.3952, "mean_token_accuracy": 0.6501044581333796, "num_tokens": 2843822977.0, "step": 16958 }, { "entropy": 1.706279416879018, "epoch": 1.863035895745791, "grad_norm": 0.7118804454803467, "learning_rate": 2.220914622510339e-06, "loss": 1.4068, "mean_token_accuracy": 0.6517497350772222, "num_tokens": 2843988396.0, "step": 16959 }, { "entropy": 1.6896512309710185, "epoch": 1.863145752657164, "grad_norm": 0.7015063166618347, "learning_rate": 2.2205621870961405e-06, "loss": 1.5505, "mean_token_accuracy": 0.6469273467858633, "num_tokens": 2844136156.0, "step": 16960 }, { "entropy": 1.6428396503130596, "epoch": 1.863255609568537, "grad_norm": 0.7962595820426941, "learning_rate": 2.2202100295471937e-06, "loss": 1.1938, "mean_token_accuracy": 0.6869229475657145, "num_tokens": 2844254113.0, "step": 16961 }, { "entropy": 1.707498123248418, "epoch": 1.86336546647991, "grad_norm": 0.6344748139381409, "learning_rate": 2.219858149874642e-06, "loss": 1.4643, "mean_token_accuracy": 0.6413133492072424, "num_tokens": 2844456777.0, "step": 16962 }, { "entropy": 1.6804148157437642, "epoch": 1.8634753233912829, "grad_norm": 0.6727264523506165, "learning_rate": 2.219506548089623e-06, "loss": 1.1781, "mean_token_accuracy": 0.6860218544801077, "num_tokens": 2844572149.0, "step": 16963 }, { "entropy": 1.7345021267731984, "epoch": 1.8635851803026557, "grad_norm": 0.7716752290725708, "learning_rate": 2.219155224203268e-06, "loss": 1.4514, "mean_token_accuracy": 0.6558243483304977, "num_tokens": 2844746134.0, "step": 16964 }, { "entropy": 1.717079867919286, "epoch": 1.8636950372140286, "grad_norm": 0.7007601857185364, "learning_rate": 2.2188041782266905e-06, "loss": 1.5099, "mean_token_accuracy": 0.6400510122378668, "num_tokens": 2844934759.0, "step": 16965 }, { "entropy": 1.6786939601103466, "epoch": 1.8638048941254017, "grad_norm": 0.6247925758361816, "learning_rate": 2.2184534101710043e-06, "loss": 1.3304, "mean_token_accuracy": 0.6726222485303879, "num_tokens": 2845083838.0, "step": 16966 }, { "entropy": 1.7275762955347698, "epoch": 1.8639147510367746, "grad_norm": 0.8030937910079956, "learning_rate": 2.2181029200473123e-06, "loss": 1.2345, "mean_token_accuracy": 0.6929545601209005, "num_tokens": 2845188998.0, "step": 16967 }, { "entropy": 1.7728230853875477, "epoch": 1.8640246079481475, "grad_norm": 0.7184717059135437, "learning_rate": 2.217752707866704e-06, "loss": 1.4425, "mean_token_accuracy": 0.6444051365057627, "num_tokens": 2845371280.0, "step": 16968 }, { "entropy": 1.75874129931132, "epoch": 1.8641344648595206, "grad_norm": 0.7662733793258667, "learning_rate": 2.217402773640265e-06, "loss": 1.4214, "mean_token_accuracy": 0.6515029867490133, "num_tokens": 2845525478.0, "step": 16969 }, { "entropy": 1.7064630488554637, "epoch": 1.8642443217708933, "grad_norm": 0.7449208498001099, "learning_rate": 2.2170531173790722e-06, "loss": 1.3713, "mean_token_accuracy": 0.6680237799882889, "num_tokens": 2845670438.0, "step": 16970 }, { "entropy": 1.6645330289999645, "epoch": 1.8643541786822664, "grad_norm": 0.713966429233551, "learning_rate": 2.2167037390941892e-06, "loss": 1.4005, "mean_token_accuracy": 0.6721183756987253, "num_tokens": 2845825623.0, "step": 16971 }, { "entropy": 1.7196373244126637, "epoch": 1.8644640355936393, "grad_norm": 0.7147789597511292, "learning_rate": 2.2163546387966756e-06, "loss": 1.5454, "mean_token_accuracy": 0.6515597999095917, "num_tokens": 2845991287.0, "step": 16972 }, { "entropy": 1.704675664504369, "epoch": 1.8645738925050122, "grad_norm": 0.6308305263519287, "learning_rate": 2.21600581649758e-06, "loss": 1.3732, "mean_token_accuracy": 0.6472256034612656, "num_tokens": 2846154390.0, "step": 16973 }, { "entropy": 1.6952514847119649, "epoch": 1.8646837494163853, "grad_norm": 0.8253968358039856, "learning_rate": 2.2156572722079413e-06, "loss": 1.2201, "mean_token_accuracy": 0.6835501392682394, "num_tokens": 2846297166.0, "step": 16974 }, { "entropy": 1.7401870787143707, "epoch": 1.8647936063277581, "grad_norm": 0.9213194847106934, "learning_rate": 2.2153090059387926e-06, "loss": 1.4246, "mean_token_accuracy": 0.6445530652999878, "num_tokens": 2846470925.0, "step": 16975 }, { "entropy": 1.7035265266895294, "epoch": 1.864903463239131, "grad_norm": 0.6574537754058838, "learning_rate": 2.2149610177011547e-06, "loss": 1.4512, "mean_token_accuracy": 0.6518820325533549, "num_tokens": 2846627363.0, "step": 16976 }, { "entropy": 1.6686862508455913, "epoch": 1.865013320150504, "grad_norm": 0.7016027569770813, "learning_rate": 2.2146133075060412e-06, "loss": 1.5151, "mean_token_accuracy": 0.651968797047933, "num_tokens": 2846810432.0, "step": 16977 }, { "entropy": 1.673084298769633, "epoch": 1.8651231770618768, "grad_norm": 0.5893659591674805, "learning_rate": 2.2142658753644593e-06, "loss": 1.449, "mean_token_accuracy": 0.6452168524265289, "num_tokens": 2847041747.0, "step": 16978 }, { "entropy": 1.6141287585099537, "epoch": 1.86523303397325, "grad_norm": 0.7347483038902283, "learning_rate": 2.213918721287402e-06, "loss": 1.2958, "mean_token_accuracy": 0.6759419937928518, "num_tokens": 2847193557.0, "step": 16979 }, { "entropy": 1.71195982893308, "epoch": 1.8653428908846228, "grad_norm": 0.7597905993461609, "learning_rate": 2.2135718452858598e-06, "loss": 1.4343, "mean_token_accuracy": 0.6534530371427536, "num_tokens": 2847395961.0, "step": 16980 }, { "entropy": 1.726847916841507, "epoch": 1.8654527477959957, "grad_norm": 0.751272976398468, "learning_rate": 2.213225247370808e-06, "loss": 1.4013, "mean_token_accuracy": 0.6617669512828191, "num_tokens": 2847577102.0, "step": 16981 }, { "entropy": 1.7482527395089467, "epoch": 1.8655626047073688, "grad_norm": 0.7296915054321289, "learning_rate": 2.2128789275532175e-06, "loss": 1.5129, "mean_token_accuracy": 0.6335967232783636, "num_tokens": 2847772021.0, "step": 16982 }, { "entropy": 1.6838180720806122, "epoch": 1.8656724616187415, "grad_norm": 0.5979762077331543, "learning_rate": 2.2125328858440503e-06, "loss": 1.5231, "mean_token_accuracy": 0.6346350063880285, "num_tokens": 2847965351.0, "step": 16983 }, { "entropy": 1.7451152900854747, "epoch": 1.8657823185301146, "grad_norm": 0.6949437856674194, "learning_rate": 2.212187122254258e-06, "loss": 1.3898, "mean_token_accuracy": 0.6568313439687093, "num_tokens": 2848141395.0, "step": 16984 }, { "entropy": 1.7283788720766704, "epoch": 1.8658921754414874, "grad_norm": 0.6480981111526489, "learning_rate": 2.211841636794783e-06, "loss": 1.4786, "mean_token_accuracy": 0.6345923642317454, "num_tokens": 2848375079.0, "step": 16985 }, { "entropy": 1.7344995041688283, "epoch": 1.8660020323528603, "grad_norm": 0.7044827342033386, "learning_rate": 2.211496429476559e-06, "loss": 1.4727, "mean_token_accuracy": 0.648894136150678, "num_tokens": 2848592441.0, "step": 16986 }, { "entropy": 1.7405148645242055, "epoch": 1.8661118892642334, "grad_norm": 0.6550964117050171, "learning_rate": 2.2111515003105137e-06, "loss": 1.3739, "mean_token_accuracy": 0.656710093220075, "num_tokens": 2848736248.0, "step": 16987 }, { "entropy": 1.6667698224385579, "epoch": 1.8662217461756063, "grad_norm": 0.7342043519020081, "learning_rate": 2.2108068493075634e-06, "loss": 1.2817, "mean_token_accuracy": 0.676526720325152, "num_tokens": 2848898107.0, "step": 16988 }, { "entropy": 1.6982735991477966, "epoch": 1.8663316030869792, "grad_norm": 0.7887325286865234, "learning_rate": 2.2104624764786152e-06, "loss": 1.2851, "mean_token_accuracy": 0.673782487710317, "num_tokens": 2849023372.0, "step": 16989 }, { "entropy": 1.806358168522517, "epoch": 1.866441459998352, "grad_norm": 0.7065950036048889, "learning_rate": 2.210118381834569e-06, "loss": 1.5251, "mean_token_accuracy": 0.640480175614357, "num_tokens": 2849217998.0, "step": 16990 }, { "entropy": 1.6777258316675823, "epoch": 1.866551316909725, "grad_norm": 0.6541003584861755, "learning_rate": 2.2097745653863156e-06, "loss": 1.3798, "mean_token_accuracy": 0.6534243921438853, "num_tokens": 2849428641.0, "step": 16991 }, { "entropy": 1.741273860136668, "epoch": 1.866661173821098, "grad_norm": 0.7950140833854675, "learning_rate": 2.2094310271447355e-06, "loss": 1.3057, "mean_token_accuracy": 0.6594074964523315, "num_tokens": 2849527531.0, "step": 16992 }, { "entropy": 1.6880672574043274, "epoch": 1.866771030732471, "grad_norm": 0.6249304413795471, "learning_rate": 2.209087767120704e-06, "loss": 1.4085, "mean_token_accuracy": 0.6540153622627258, "num_tokens": 2849684171.0, "step": 16993 }, { "entropy": 1.7445914248625438, "epoch": 1.8668808876438439, "grad_norm": 0.6921653747558594, "learning_rate": 2.208744785325081e-06, "loss": 1.389, "mean_token_accuracy": 0.6449531565109888, "num_tokens": 2849836896.0, "step": 16994 }, { "entropy": 1.7295080125331879, "epoch": 1.866990744555217, "grad_norm": 0.5937165021896362, "learning_rate": 2.2084020817687253e-06, "loss": 1.4598, "mean_token_accuracy": 0.6409016450246176, "num_tokens": 2850047785.0, "step": 16995 }, { "entropy": 1.7254140277703602, "epoch": 1.8671006014665896, "grad_norm": 1.068790078163147, "learning_rate": 2.208059656462482e-06, "loss": 1.3398, "mean_token_accuracy": 0.6658286303281784, "num_tokens": 2850181291.0, "step": 16996 }, { "entropy": 1.7456210553646088, "epoch": 1.8672104583779627, "grad_norm": 0.6479652523994446, "learning_rate": 2.2077175094171903e-06, "loss": 1.3958, "mean_token_accuracy": 0.6518608878056208, "num_tokens": 2850309540.0, "step": 16997 }, { "entropy": 1.6959585348765056, "epoch": 1.8673203152893356, "grad_norm": 0.5444127321243286, "learning_rate": 2.207375640643675e-06, "loss": 1.3446, "mean_token_accuracy": 0.6572243670622507, "num_tokens": 2850495775.0, "step": 16998 }, { "entropy": 1.7165914873282115, "epoch": 1.8674301722007085, "grad_norm": 0.7069491744041443, "learning_rate": 2.2070340501527597e-06, "loss": 1.3456, "mean_token_accuracy": 0.6599544485410055, "num_tokens": 2850651848.0, "step": 16999 }, { "entropy": 1.6840533812840779, "epoch": 1.8675400291120816, "grad_norm": 0.6812978982925415, "learning_rate": 2.206692737955256e-06, "loss": 1.2676, "mean_token_accuracy": 0.6691629191239675, "num_tokens": 2850765863.0, "step": 17000 }, { "entropy": 1.6662333011627197, "epoch": 1.8676498860234545, "grad_norm": 0.7810607552528381, "learning_rate": 2.206351704061963e-06, "loss": 1.5147, "mean_token_accuracy": 0.6441531578699747, "num_tokens": 2850957710.0, "step": 17001 }, { "entropy": 1.778091549873352, "epoch": 1.8677597429348274, "grad_norm": 0.6844190955162048, "learning_rate": 2.2060109484836766e-06, "loss": 1.4703, "mean_token_accuracy": 0.6522639393806458, "num_tokens": 2851124151.0, "step": 17002 }, { "entropy": 1.772169252236684, "epoch": 1.8678695998462005, "grad_norm": 0.7324439883232117, "learning_rate": 2.20567047123118e-06, "loss": 1.401, "mean_token_accuracy": 0.6545427242914835, "num_tokens": 2851257624.0, "step": 17003 }, { "entropy": 1.6680610577265422, "epoch": 1.8679794567575732, "grad_norm": 0.6523362994194031, "learning_rate": 2.205330272315251e-06, "loss": 1.3719, "mean_token_accuracy": 0.6796788175900778, "num_tokens": 2851407097.0, "step": 17004 }, { "entropy": 1.7212667365868886, "epoch": 1.8680893136689463, "grad_norm": 0.7622836232185364, "learning_rate": 2.204990351746657e-06, "loss": 1.4981, "mean_token_accuracy": 0.6425471156835556, "num_tokens": 2851594805.0, "step": 17005 }, { "entropy": 1.7824140787124634, "epoch": 1.8681991705803191, "grad_norm": 0.8553072214126587, "learning_rate": 2.204650709536153e-06, "loss": 1.6837, "mean_token_accuracy": 0.6378919730583826, "num_tokens": 2851761200.0, "step": 17006 }, { "entropy": 1.7299172381560008, "epoch": 1.868309027491692, "grad_norm": 0.8043472766876221, "learning_rate": 2.204311345694492e-06, "loss": 1.4065, "mean_token_accuracy": 0.6654603232940038, "num_tokens": 2851896719.0, "step": 17007 }, { "entropy": 1.7903384566307068, "epoch": 1.8684188844030651, "grad_norm": 0.8522409200668335, "learning_rate": 2.203972260232415e-06, "loss": 1.3412, "mean_token_accuracy": 0.6667458862066269, "num_tokens": 2852084088.0, "step": 17008 }, { "entropy": 1.649288256963094, "epoch": 1.8685287413144378, "grad_norm": 0.6457241177558899, "learning_rate": 2.20363345316065e-06, "loss": 1.2725, "mean_token_accuracy": 0.6749263107776642, "num_tokens": 2852201978.0, "step": 17009 }, { "entropy": 1.6585692763328552, "epoch": 1.868638598225811, "grad_norm": 0.6605546474456787, "learning_rate": 2.203294924489922e-06, "loss": 1.2864, "mean_token_accuracy": 0.6589010854562124, "num_tokens": 2852379096.0, "step": 17010 }, { "entropy": 1.688430259625117, "epoch": 1.8687484551371838, "grad_norm": 0.6104924082756042, "learning_rate": 2.202956674230948e-06, "loss": 1.3572, "mean_token_accuracy": 0.67152139544487, "num_tokens": 2852565308.0, "step": 17011 }, { "entropy": 1.6815722684065502, "epoch": 1.8688583120485567, "grad_norm": 0.6199679970741272, "learning_rate": 2.202618702394431e-06, "loss": 1.3527, "mean_token_accuracy": 0.6769666820764542, "num_tokens": 2852721982.0, "step": 17012 }, { "entropy": 1.699970543384552, "epoch": 1.8689681689599298, "grad_norm": 0.6916362643241882, "learning_rate": 2.202281008991066e-06, "loss": 1.3273, "mean_token_accuracy": 0.6627818942070007, "num_tokens": 2852855715.0, "step": 17013 }, { "entropy": 1.7449293434619904, "epoch": 1.8690780258713027, "grad_norm": 0.6241941452026367, "learning_rate": 2.2019435940315435e-06, "loss": 1.5613, "mean_token_accuracy": 0.6198464930057526, "num_tokens": 2853091879.0, "step": 17014 }, { "entropy": 1.6857063074906666, "epoch": 1.8691878827826756, "grad_norm": 0.7108656167984009, "learning_rate": 2.2016064575265426e-06, "loss": 1.3398, "mean_token_accuracy": 0.6637291759252548, "num_tokens": 2853218575.0, "step": 17015 }, { "entropy": 1.6702306667963664, "epoch": 1.8692977396940487, "grad_norm": 0.688471794128418, "learning_rate": 2.201269599486732e-06, "loss": 1.3163, "mean_token_accuracy": 0.6772701740264893, "num_tokens": 2853381585.0, "step": 17016 }, { "entropy": 1.7522780398527782, "epoch": 1.8694075966054213, "grad_norm": 0.6958498358726501, "learning_rate": 2.2009330199227746e-06, "loss": 1.3844, "mean_token_accuracy": 0.6645175168911616, "num_tokens": 2853526534.0, "step": 17017 }, { "entropy": 1.6620089908440907, "epoch": 1.8695174535167944, "grad_norm": 0.7110161185264587, "learning_rate": 2.2005967188453206e-06, "loss": 1.2126, "mean_token_accuracy": 0.6797206650177637, "num_tokens": 2853685590.0, "step": 17018 }, { "entropy": 1.6503388981024425, "epoch": 1.8696273104281673, "grad_norm": 0.6182279586791992, "learning_rate": 2.200260696265016e-06, "loss": 1.6693, "mean_token_accuracy": 0.6140792071819305, "num_tokens": 2853949541.0, "step": 17019 }, { "entropy": 1.735267659028371, "epoch": 1.8697371673395402, "grad_norm": 0.7907410860061646, "learning_rate": 2.199924952192496e-06, "loss": 1.4237, "mean_token_accuracy": 0.6509945740302404, "num_tokens": 2854077562.0, "step": 17020 }, { "entropy": 1.7652918795744579, "epoch": 1.8698470242509133, "grad_norm": 0.9519900679588318, "learning_rate": 2.1995894866383844e-06, "loss": 1.3656, "mean_token_accuracy": 0.6748112390438715, "num_tokens": 2854228561.0, "step": 17021 }, { "entropy": 1.775589833656947, "epoch": 1.869956881162286, "grad_norm": 0.7908014059066772, "learning_rate": 2.1992542996133008e-06, "loss": 1.4917, "mean_token_accuracy": 0.6449446976184845, "num_tokens": 2854352461.0, "step": 17022 }, { "entropy": 1.6987537741661072, "epoch": 1.870066738073659, "grad_norm": 0.7079589366912842, "learning_rate": 2.198919391127854e-06, "loss": 1.452, "mean_token_accuracy": 0.6492635756731033, "num_tokens": 2854509134.0, "step": 17023 }, { "entropy": 1.7013746201992035, "epoch": 1.870176594985032, "grad_norm": 0.5822315216064453, "learning_rate": 2.1985847611926412e-06, "loss": 1.3831, "mean_token_accuracy": 0.6505730946858724, "num_tokens": 2854693579.0, "step": 17024 }, { "entropy": 1.6798737148443859, "epoch": 1.8702864518964049, "grad_norm": 0.6504672169685364, "learning_rate": 2.1982504098182543e-06, "loss": 1.4304, "mean_token_accuracy": 0.6504451334476471, "num_tokens": 2854874531.0, "step": 17025 }, { "entropy": 1.6749244034290314, "epoch": 1.870396308807778, "grad_norm": 0.8020433187484741, "learning_rate": 2.197916337015277e-06, "loss": 1.2575, "mean_token_accuracy": 0.6731888701518377, "num_tokens": 2855033432.0, "step": 17026 }, { "entropy": 1.7233870228131611, "epoch": 1.8705061657191508, "grad_norm": 0.6521610617637634, "learning_rate": 2.1975825427942797e-06, "loss": 1.2734, "mean_token_accuracy": 0.6759884258111318, "num_tokens": 2855157619.0, "step": 17027 }, { "entropy": 1.7447825769583385, "epoch": 1.8706160226305237, "grad_norm": 0.672861635684967, "learning_rate": 2.1972490271658304e-06, "loss": 1.3972, "mean_token_accuracy": 0.6574916392564774, "num_tokens": 2855387405.0, "step": 17028 }, { "entropy": 1.7002271513144176, "epoch": 1.8707258795418968, "grad_norm": 0.6032365560531616, "learning_rate": 2.1969157901404825e-06, "loss": 1.4316, "mean_token_accuracy": 0.649682859579722, "num_tokens": 2855554119.0, "step": 17029 }, { "entropy": 1.6851763526598613, "epoch": 1.8708357364532695, "grad_norm": 0.759894609451294, "learning_rate": 2.1965828317287816e-06, "loss": 1.3552, "mean_token_accuracy": 0.6525135089953741, "num_tokens": 2855676220.0, "step": 17030 }, { "entropy": 1.7493245204289753, "epoch": 1.8709455933646426, "grad_norm": 0.7808289527893066, "learning_rate": 2.1962501519412676e-06, "loss": 1.2661, "mean_token_accuracy": 0.674684152007103, "num_tokens": 2855798387.0, "step": 17031 }, { "entropy": 1.7108966807524364, "epoch": 1.8710554502760155, "grad_norm": 0.6273822784423828, "learning_rate": 2.1959177507884706e-06, "loss": 1.3955, "mean_token_accuracy": 0.6569380015134811, "num_tokens": 2855962363.0, "step": 17032 }, { "entropy": 1.658450762430827, "epoch": 1.8711653071873884, "grad_norm": 0.6467920541763306, "learning_rate": 2.195585628280909e-06, "loss": 1.5203, "mean_token_accuracy": 0.6477319151163101, "num_tokens": 2856134799.0, "step": 17033 }, { "entropy": 1.7665147085984547, "epoch": 1.8712751640987615, "grad_norm": 0.7447314262390137, "learning_rate": 2.1952537844290942e-06, "loss": 1.4415, "mean_token_accuracy": 0.6656771103541056, "num_tokens": 2856282873.0, "step": 17034 }, { "entropy": 1.715973476568858, "epoch": 1.8713850210101342, "grad_norm": 0.687869131565094, "learning_rate": 2.1949222192435293e-06, "loss": 1.4573, "mean_token_accuracy": 0.6531703372796377, "num_tokens": 2856413198.0, "step": 17035 }, { "entropy": 1.7442041039466858, "epoch": 1.8714948779215073, "grad_norm": 0.6900414824485779, "learning_rate": 2.1945909327347094e-06, "loss": 1.3789, "mean_token_accuracy": 0.6618852317333221, "num_tokens": 2856543909.0, "step": 17036 }, { "entropy": 1.6572751700878143, "epoch": 1.8716047348328801, "grad_norm": 0.6331953406333923, "learning_rate": 2.194259924913119e-06, "loss": 1.2094, "mean_token_accuracy": 0.6854116519292196, "num_tokens": 2856704996.0, "step": 17037 }, { "entropy": 1.769335389137268, "epoch": 1.871714591744253, "grad_norm": 0.7674529552459717, "learning_rate": 2.1939291957892327e-06, "loss": 1.2783, "mean_token_accuracy": 0.6658161183198293, "num_tokens": 2856829001.0, "step": 17038 }, { "entropy": 1.7292616963386536, "epoch": 1.8718244486556261, "grad_norm": 0.6778686046600342, "learning_rate": 2.19359874537352e-06, "loss": 1.5333, "mean_token_accuracy": 0.6352483679850897, "num_tokens": 2857081755.0, "step": 17039 }, { "entropy": 1.6831102867921193, "epoch": 1.871934305566999, "grad_norm": 4.391300678253174, "learning_rate": 2.1932685736764393e-06, "loss": 1.4329, "mean_token_accuracy": 0.6666145275036494, "num_tokens": 2857303953.0, "step": 17040 }, { "entropy": 1.7301143109798431, "epoch": 1.872044162478372, "grad_norm": 0.6847091913223267, "learning_rate": 2.1929386807084392e-06, "loss": 1.3037, "mean_token_accuracy": 0.6623615125815073, "num_tokens": 2857451437.0, "step": 17041 }, { "entropy": 1.7140269080797832, "epoch": 1.872154019389745, "grad_norm": 0.6751673221588135, "learning_rate": 2.192609066479961e-06, "loss": 1.2567, "mean_token_accuracy": 0.6729971269766489, "num_tokens": 2857569788.0, "step": 17042 }, { "entropy": 1.6233059763908386, "epoch": 1.8722638763011177, "grad_norm": 0.697934627532959, "learning_rate": 2.192279731001438e-06, "loss": 1.2847, "mean_token_accuracy": 0.6785244792699814, "num_tokens": 2857745514.0, "step": 17043 }, { "entropy": 1.7246392567952473, "epoch": 1.8723737332124908, "grad_norm": 0.714394211769104, "learning_rate": 2.191950674283292e-06, "loss": 1.4599, "mean_token_accuracy": 0.6533997456232706, "num_tokens": 2857908966.0, "step": 17044 }, { "entropy": 1.6922082702318828, "epoch": 1.8724835901238637, "grad_norm": 0.671735405921936, "learning_rate": 2.191621896335938e-06, "loss": 1.3781, "mean_token_accuracy": 0.6661517322063446, "num_tokens": 2858048527.0, "step": 17045 }, { "entropy": 1.710803061723709, "epoch": 1.8725934470352366, "grad_norm": 0.5997881293296814, "learning_rate": 2.1912933971697833e-06, "loss": 1.3553, "mean_token_accuracy": 0.6474985132614771, "num_tokens": 2858183882.0, "step": 17046 }, { "entropy": 1.7078399062156677, "epoch": 1.8727033039466097, "grad_norm": 0.6537618041038513, "learning_rate": 2.190965176795221e-06, "loss": 1.4829, "mean_token_accuracy": 0.6355342864990234, "num_tokens": 2858436938.0, "step": 17047 }, { "entropy": 1.717695951461792, "epoch": 1.8728131608579823, "grad_norm": 0.7487478256225586, "learning_rate": 2.1906372352226434e-06, "loss": 1.5507, "mean_token_accuracy": 0.6489723970492681, "num_tokens": 2858632964.0, "step": 17048 }, { "entropy": 1.7423529624938965, "epoch": 1.8729230177693554, "grad_norm": 0.59836745262146, "learning_rate": 2.1903095724624266e-06, "loss": 1.416, "mean_token_accuracy": 0.639144832889239, "num_tokens": 2858829021.0, "step": 17049 }, { "entropy": 1.7561921576658885, "epoch": 1.8730328746807283, "grad_norm": 0.7711092829704285, "learning_rate": 2.1899821885249423e-06, "loss": 1.4651, "mean_token_accuracy": 0.6511860340833664, "num_tokens": 2858947061.0, "step": 17050 }, { "entropy": 1.6973484357198079, "epoch": 1.8731427315921012, "grad_norm": 0.6623829007148743, "learning_rate": 2.189655083420551e-06, "loss": 1.3509, "mean_token_accuracy": 0.666906327009201, "num_tokens": 2859138268.0, "step": 17051 }, { "entropy": 1.7268371681372325, "epoch": 1.8732525885034743, "grad_norm": 0.7016375064849854, "learning_rate": 2.1893282571596075e-06, "loss": 1.5942, "mean_token_accuracy": 0.6331303964058558, "num_tokens": 2859322249.0, "step": 17052 }, { "entropy": 1.6590932706991832, "epoch": 1.8733624454148472, "grad_norm": 0.6531033515930176, "learning_rate": 2.189001709752454e-06, "loss": 1.4715, "mean_token_accuracy": 0.6680138657490412, "num_tokens": 2859528608.0, "step": 17053 }, { "entropy": 1.6124554375807445, "epoch": 1.87347230232622, "grad_norm": 0.7286319136619568, "learning_rate": 2.1886754412094264e-06, "loss": 1.2273, "mean_token_accuracy": 0.6924339234828949, "num_tokens": 2859687765.0, "step": 17054 }, { "entropy": 1.676022340854009, "epoch": 1.8735821592375932, "grad_norm": 0.6995905637741089, "learning_rate": 2.1883494515408502e-06, "loss": 1.3037, "mean_token_accuracy": 0.677752767999967, "num_tokens": 2859809649.0, "step": 17055 }, { "entropy": 1.6995362242062886, "epoch": 1.8736920161489659, "grad_norm": 0.8846271634101868, "learning_rate": 2.1880237407570444e-06, "loss": 1.4299, "mean_token_accuracy": 0.6495392918586731, "num_tokens": 2859990844.0, "step": 17056 }, { "entropy": 1.7364083031813304, "epoch": 1.873801873060339, "grad_norm": 0.6322318315505981, "learning_rate": 2.1876983088683143e-06, "loss": 1.4336, "mean_token_accuracy": 0.6391441822052002, "num_tokens": 2860194791.0, "step": 17057 }, { "entropy": 1.7000387012958527, "epoch": 1.8739117299717118, "grad_norm": 0.7319401502609253, "learning_rate": 2.187373155884964e-06, "loss": 1.4675, "mean_token_accuracy": 0.6507869611183802, "num_tokens": 2860341823.0, "step": 17058 }, { "entropy": 1.7169471581776936, "epoch": 1.8740215868830847, "grad_norm": 0.7342702746391296, "learning_rate": 2.1870482818172806e-06, "loss": 1.3525, "mean_token_accuracy": 0.6542551517486572, "num_tokens": 2860451598.0, "step": 17059 }, { "entropy": 1.690687209367752, "epoch": 1.8741314437944578, "grad_norm": 0.8333070874214172, "learning_rate": 2.1867236866755485e-06, "loss": 1.4478, "mean_token_accuracy": 0.6612561593453089, "num_tokens": 2860613234.0, "step": 17060 }, { "entropy": 1.6780410210291545, "epoch": 1.8742413007058305, "grad_norm": 0.6751420497894287, "learning_rate": 2.186399370470041e-06, "loss": 1.3559, "mean_token_accuracy": 0.6728497544924418, "num_tokens": 2860791756.0, "step": 17061 }, { "entropy": 1.6945658723513286, "epoch": 1.8743511576172036, "grad_norm": 0.6549538373947144, "learning_rate": 2.186075333211021e-06, "loss": 1.3931, "mean_token_accuracy": 0.6677144765853882, "num_tokens": 2861011997.0, "step": 17062 }, { "entropy": 1.7178350687026978, "epoch": 1.8744610145285765, "grad_norm": 0.8077467679977417, "learning_rate": 2.1857515749087446e-06, "loss": 1.4099, "mean_token_accuracy": 0.6525556395451227, "num_tokens": 2861207983.0, "step": 17063 }, { "entropy": 1.7497240900993347, "epoch": 1.8745708714399494, "grad_norm": 0.9003075957298279, "learning_rate": 2.1854280955734598e-06, "loss": 1.4174, "mean_token_accuracy": 0.6599795470635096, "num_tokens": 2861327750.0, "step": 17064 }, { "entropy": 1.6928727825482686, "epoch": 1.8746807283513225, "grad_norm": 0.6773639917373657, "learning_rate": 2.185104895215404e-06, "loss": 1.3828, "mean_token_accuracy": 0.6638530343770981, "num_tokens": 2861505270.0, "step": 17065 }, { "entropy": 1.7255871494611104, "epoch": 1.8747905852626954, "grad_norm": 0.6331843733787537, "learning_rate": 2.1847819738448052e-06, "loss": 1.4456, "mean_token_accuracy": 0.6406483799219131, "num_tokens": 2861670381.0, "step": 17066 }, { "entropy": 1.7596171100934346, "epoch": 1.8749004421740683, "grad_norm": 0.8675169348716736, "learning_rate": 2.1844593314718867e-06, "loss": 1.4466, "mean_token_accuracy": 0.6425968805948893, "num_tokens": 2861824862.0, "step": 17067 }, { "entropy": 1.6844032406806946, "epoch": 1.8750102990854414, "grad_norm": 0.7364755868911743, "learning_rate": 2.184136968106857e-06, "loss": 1.3846, "mean_token_accuracy": 0.6768645147482554, "num_tokens": 2861966016.0, "step": 17068 }, { "entropy": 1.6982781986395519, "epoch": 1.875120155996814, "grad_norm": 0.7753962278366089, "learning_rate": 2.1838148837599186e-06, "loss": 1.2406, "mean_token_accuracy": 0.6812696407238642, "num_tokens": 2862084902.0, "step": 17069 }, { "entropy": 1.715488960345586, "epoch": 1.8752300129081871, "grad_norm": 0.6685140132904053, "learning_rate": 2.183493078441268e-06, "loss": 1.3345, "mean_token_accuracy": 0.6754670590162277, "num_tokens": 2862300683.0, "step": 17070 }, { "entropy": 1.7158759633700054, "epoch": 1.87533986981956, "grad_norm": 0.6193472743034363, "learning_rate": 2.183171552161088e-06, "loss": 1.3894, "mean_token_accuracy": 0.6529115388790766, "num_tokens": 2862469396.0, "step": 17071 }, { "entropy": 1.7098310788472493, "epoch": 1.875449726730933, "grad_norm": 0.6856310963630676, "learning_rate": 2.1828503049295556e-06, "loss": 1.3109, "mean_token_accuracy": 0.6672416975100836, "num_tokens": 2862651736.0, "step": 17072 }, { "entropy": 1.686122328042984, "epoch": 1.875559583642306, "grad_norm": 0.7014265656471252, "learning_rate": 2.1825293367568375e-06, "loss": 1.446, "mean_token_accuracy": 0.6568909734487534, "num_tokens": 2862825931.0, "step": 17073 }, { "entropy": 1.6640195647875469, "epoch": 1.8756694405536787, "grad_norm": 0.7757869958877563, "learning_rate": 2.1822086476530922e-06, "loss": 1.2991, "mean_token_accuracy": 0.6700432101885477, "num_tokens": 2862961042.0, "step": 17074 }, { "entropy": 1.695358783006668, "epoch": 1.8757792974650518, "grad_norm": 0.5820761919021606, "learning_rate": 2.181888237628471e-06, "loss": 1.4312, "mean_token_accuracy": 0.6432149757941564, "num_tokens": 2863141499.0, "step": 17075 }, { "entropy": 1.7364993294080098, "epoch": 1.8758891543764247, "grad_norm": 0.6635037660598755, "learning_rate": 2.1815681066931136e-06, "loss": 1.4227, "mean_token_accuracy": 0.644807959596316, "num_tokens": 2863308785.0, "step": 17076 }, { "entropy": 1.6751290559768677, "epoch": 1.8759990112877976, "grad_norm": 0.6418736577033997, "learning_rate": 2.1812482548571515e-06, "loss": 1.3018, "mean_token_accuracy": 0.6599528888861338, "num_tokens": 2863449125.0, "step": 17077 }, { "entropy": 1.7395286758740742, "epoch": 1.8761088681991707, "grad_norm": 0.7806156873703003, "learning_rate": 2.180928682130708e-06, "loss": 1.4441, "mean_token_accuracy": 0.6352565785249075, "num_tokens": 2863620259.0, "step": 17078 }, { "entropy": 1.6934346357981365, "epoch": 1.8762187251105436, "grad_norm": 0.7733772993087769, "learning_rate": 2.1806093885238976e-06, "loss": 1.4599, "mean_token_accuracy": 0.6639293332894644, "num_tokens": 2863779694.0, "step": 17079 }, { "entropy": 1.73578542470932, "epoch": 1.8763285820219164, "grad_norm": 0.5854712724685669, "learning_rate": 2.1802903740468267e-06, "loss": 1.5494, "mean_token_accuracy": 0.6304313540458679, "num_tokens": 2863971818.0, "step": 17080 }, { "entropy": 1.7329691052436829, "epoch": 1.8764384389332895, "grad_norm": 0.7195613980293274, "learning_rate": 2.1799716387095905e-06, "loss": 1.3164, "mean_token_accuracy": 0.6736815422773361, "num_tokens": 2864109592.0, "step": 17081 }, { "entropy": 1.7070931295553844, "epoch": 1.8765482958446622, "grad_norm": 0.7272635102272034, "learning_rate": 2.179653182522278e-06, "loss": 1.2566, "mean_token_accuracy": 0.6699541062116623, "num_tokens": 2864217249.0, "step": 17082 }, { "entropy": 1.694780518611272, "epoch": 1.8766581527560353, "grad_norm": 0.6670692563056946, "learning_rate": 2.1793350054949674e-06, "loss": 1.3224, "mean_token_accuracy": 0.6682775169610977, "num_tokens": 2864379968.0, "step": 17083 }, { "entropy": 1.7478112777074177, "epoch": 1.8767680096674082, "grad_norm": 0.770989716053009, "learning_rate": 2.179017107637729e-06, "loss": 1.3859, "mean_token_accuracy": 0.6664139181375504, "num_tokens": 2864557477.0, "step": 17084 }, { "entropy": 1.7456609308719635, "epoch": 1.876877866578781, "grad_norm": 0.9178656339645386, "learning_rate": 2.1786994889606262e-06, "loss": 1.5924, "mean_token_accuracy": 0.6394909123579661, "num_tokens": 2864782031.0, "step": 17085 }, { "entropy": 1.6717379689216614, "epoch": 1.8769877234901542, "grad_norm": 0.668597400188446, "learning_rate": 2.1783821494737067e-06, "loss": 1.3992, "mean_token_accuracy": 0.6508362789948782, "num_tokens": 2864958961.0, "step": 17086 }, { "entropy": 1.6922581891218822, "epoch": 1.8770975804015269, "grad_norm": 0.6903640031814575, "learning_rate": 2.1780650891870188e-06, "loss": 1.3327, "mean_token_accuracy": 0.6786336451768875, "num_tokens": 2865077172.0, "step": 17087 }, { "entropy": 1.7544843256473541, "epoch": 1.8772074373129, "grad_norm": 0.7095819115638733, "learning_rate": 2.177748308110596e-06, "loss": 1.3514, "mean_token_accuracy": 0.6596596439679464, "num_tokens": 2865201021.0, "step": 17088 }, { "entropy": 1.6850469013055165, "epoch": 1.8773172942242728, "grad_norm": 0.7165399789810181, "learning_rate": 2.1774318062544623e-06, "loss": 1.4429, "mean_token_accuracy": 0.6499310483535131, "num_tokens": 2865443298.0, "step": 17089 }, { "entropy": 1.7009844084580739, "epoch": 1.8774271511356457, "grad_norm": 0.6865831613540649, "learning_rate": 2.177115583628637e-06, "loss": 1.4149, "mean_token_accuracy": 0.6542165130376816, "num_tokens": 2865685108.0, "step": 17090 }, { "entropy": 1.6992128491401672, "epoch": 1.8775370080470188, "grad_norm": 0.7311968803405762, "learning_rate": 2.176799640243128e-06, "loss": 1.2469, "mean_token_accuracy": 0.6794113417466482, "num_tokens": 2865784923.0, "step": 17091 }, { "entropy": 1.7033413747946422, "epoch": 1.8776468649583917, "grad_norm": 0.7062245607376099, "learning_rate": 2.1764839761079354e-06, "loss": 1.3618, "mean_token_accuracy": 0.6627474625905355, "num_tokens": 2865941631.0, "step": 17092 }, { "entropy": 1.6912708282470703, "epoch": 1.8777567218697646, "grad_norm": 0.6303133368492126, "learning_rate": 2.176168591233048e-06, "loss": 1.3618, "mean_token_accuracy": 0.6606016606092453, "num_tokens": 2866114212.0, "step": 17093 }, { "entropy": 1.6680894295374553, "epoch": 1.8778665787811377, "grad_norm": 0.6817684173583984, "learning_rate": 2.175853485628448e-06, "loss": 1.4784, "mean_token_accuracy": 0.6489969938993454, "num_tokens": 2866343322.0, "step": 17094 }, { "entropy": 1.7241562108198802, "epoch": 1.8779764356925104, "grad_norm": 0.653832197189331, "learning_rate": 2.175538659304109e-06, "loss": 1.3772, "mean_token_accuracy": 0.6535724550485611, "num_tokens": 2866532714.0, "step": 17095 }, { "entropy": 1.7202540437380474, "epoch": 1.8780862926038835, "grad_norm": 0.8709003329277039, "learning_rate": 2.1752241122699966e-06, "loss": 1.2235, "mean_token_accuracy": 0.6837896257638931, "num_tokens": 2866663231.0, "step": 17096 }, { "entropy": 1.7354286313056946, "epoch": 1.8781961495152564, "grad_norm": 0.6793311238288879, "learning_rate": 2.1749098445360633e-06, "loss": 1.4171, "mean_token_accuracy": 0.6585271706183752, "num_tokens": 2866817700.0, "step": 17097 }, { "entropy": 1.7306594252586365, "epoch": 1.8783060064266293, "grad_norm": 0.7136476039886475, "learning_rate": 2.174595856112257e-06, "loss": 1.3441, "mean_token_accuracy": 0.6554663379987081, "num_tokens": 2866969575.0, "step": 17098 }, { "entropy": 1.7610450983047485, "epoch": 1.8784158633380024, "grad_norm": 0.6379169821739197, "learning_rate": 2.174282147008515e-06, "loss": 1.3371, "mean_token_accuracy": 0.6573585122823715, "num_tokens": 2867123276.0, "step": 17099 }, { "entropy": 1.691591699918111, "epoch": 1.878525720249375, "grad_norm": 0.7431465983390808, "learning_rate": 2.173968717234767e-06, "loss": 1.4903, "mean_token_accuracy": 0.6451913366715113, "num_tokens": 2867292293.0, "step": 17100 }, { "entropy": 1.6427749395370483, "epoch": 1.8786355771607481, "grad_norm": 0.6136252880096436, "learning_rate": 2.1736555668009302e-06, "loss": 1.4948, "mean_token_accuracy": 0.6392818937699, "num_tokens": 2867500607.0, "step": 17101 }, { "entropy": 1.7291006743907928, "epoch": 1.878745434072121, "grad_norm": 0.7383739352226257, "learning_rate": 2.1733426957169185e-06, "loss": 1.4956, "mean_token_accuracy": 0.6548979928096136, "num_tokens": 2867695033.0, "step": 17102 }, { "entropy": 1.7051356335481007, "epoch": 1.878855290983494, "grad_norm": 0.7386910915374756, "learning_rate": 2.1730301039926322e-06, "loss": 1.4685, "mean_token_accuracy": 0.6496059795220693, "num_tokens": 2867916001.0, "step": 17103 }, { "entropy": 1.644729922215144, "epoch": 1.878965147894867, "grad_norm": 0.6021768450737, "learning_rate": 2.1727177916379664e-06, "loss": 1.3486, "mean_token_accuracy": 0.6531336605548859, "num_tokens": 2868091178.0, "step": 17104 }, { "entropy": 1.747380663951238, "epoch": 1.87907500480624, "grad_norm": 0.7101630568504333, "learning_rate": 2.1724057586628055e-06, "loss": 1.4363, "mean_token_accuracy": 0.6600662469863892, "num_tokens": 2868228095.0, "step": 17105 }, { "entropy": 1.7580998639265697, "epoch": 1.8791848617176128, "grad_norm": 0.6921120882034302, "learning_rate": 2.1720940050770238e-06, "loss": 1.3382, "mean_token_accuracy": 0.6595444331566492, "num_tokens": 2868384732.0, "step": 17106 }, { "entropy": 1.692195286353429, "epoch": 1.879294718628986, "grad_norm": 0.7340524792671204, "learning_rate": 2.171782530890488e-06, "loss": 1.5391, "mean_token_accuracy": 0.6522092272837957, "num_tokens": 2868557649.0, "step": 17107 }, { "entropy": 1.6761192977428436, "epoch": 1.8794045755403586, "grad_norm": 0.6261641383171082, "learning_rate": 2.171471336113058e-06, "loss": 1.2654, "mean_token_accuracy": 0.6697875261306763, "num_tokens": 2868694048.0, "step": 17108 }, { "entropy": 1.7369263966878254, "epoch": 1.8795144324517317, "grad_norm": 0.7136772274971008, "learning_rate": 2.1711604207545828e-06, "loss": 1.427, "mean_token_accuracy": 0.6500804722309113, "num_tokens": 2868867959.0, "step": 17109 }, { "entropy": 1.6655204892158508, "epoch": 1.8796242893631046, "grad_norm": 0.7923451066017151, "learning_rate": 2.1708497848248998e-06, "loss": 1.4741, "mean_token_accuracy": 0.647340714931488, "num_tokens": 2869051956.0, "step": 17110 }, { "entropy": 1.6829159657160442, "epoch": 1.8797341462744774, "grad_norm": 0.5876792669296265, "learning_rate": 2.170539428333844e-06, "loss": 1.4499, "mean_token_accuracy": 0.6519188384215037, "num_tokens": 2869234691.0, "step": 17111 }, { "entropy": 1.717431555191676, "epoch": 1.8798440031858505, "grad_norm": 0.8365014791488647, "learning_rate": 2.170229351291237e-06, "loss": 1.2309, "mean_token_accuracy": 0.6746444006760915, "num_tokens": 2869356432.0, "step": 17112 }, { "entropy": 1.6984197696050007, "epoch": 1.8799538600972232, "grad_norm": 0.6819642782211304, "learning_rate": 2.1699195537068908e-06, "loss": 1.3904, "mean_token_accuracy": 0.6632606933514277, "num_tokens": 2869499052.0, "step": 17113 }, { "entropy": 1.7775751153628032, "epoch": 1.8800637170085963, "grad_norm": 0.7044260501861572, "learning_rate": 2.1696100355906137e-06, "loss": 1.4486, "mean_token_accuracy": 0.6558221131563187, "num_tokens": 2869637826.0, "step": 17114 }, { "entropy": 1.6655368208885193, "epoch": 1.8801735739199692, "grad_norm": 0.6480795741081238, "learning_rate": 2.1693007969521985e-06, "loss": 1.2762, "mean_token_accuracy": 0.664667988816897, "num_tokens": 2869770682.0, "step": 17115 }, { "entropy": 1.7761302689711254, "epoch": 1.880283430831342, "grad_norm": 0.6664544343948364, "learning_rate": 2.1689918378014345e-06, "loss": 1.5244, "mean_token_accuracy": 0.6382568577925364, "num_tokens": 2869953883.0, "step": 17116 }, { "entropy": 1.7439662218093872, "epoch": 1.8803932877427152, "grad_norm": 0.8166419267654419, "learning_rate": 2.1686831581480992e-06, "loss": 1.2875, "mean_token_accuracy": 0.6673971563577652, "num_tokens": 2870115126.0, "step": 17117 }, { "entropy": 1.697892556587855, "epoch": 1.880503144654088, "grad_norm": 0.6083175539970398, "learning_rate": 2.1683747580019617e-06, "loss": 1.4151, "mean_token_accuracy": 0.6482170025507609, "num_tokens": 2870306843.0, "step": 17118 }, { "entropy": 1.6897941430409749, "epoch": 1.880613001565461, "grad_norm": 0.6954461932182312, "learning_rate": 2.1680666373727835e-06, "loss": 1.3079, "mean_token_accuracy": 0.6700096229712168, "num_tokens": 2870488476.0, "step": 17119 }, { "entropy": 1.6832915445168812, "epoch": 1.880722858476834, "grad_norm": 0.6204468607902527, "learning_rate": 2.1677587962703186e-06, "loss": 1.389, "mean_token_accuracy": 0.6591685314973196, "num_tokens": 2870669134.0, "step": 17120 }, { "entropy": 1.6706489821275075, "epoch": 1.8808327153882067, "grad_norm": 0.845382809638977, "learning_rate": 2.1674512347043057e-06, "loss": 1.2121, "mean_token_accuracy": 0.6803888330856959, "num_tokens": 2870794482.0, "step": 17121 }, { "entropy": 1.7348777552445729, "epoch": 1.8809425722995798, "grad_norm": 0.5870766043663025, "learning_rate": 2.1671439526844816e-06, "loss": 1.4127, "mean_token_accuracy": 0.6562628646691641, "num_tokens": 2870984182.0, "step": 17122 }, { "entropy": 1.6719197829564412, "epoch": 1.8810524292109527, "grad_norm": 0.685198962688446, "learning_rate": 2.166836950220572e-06, "loss": 1.3735, "mean_token_accuracy": 0.6528567423423132, "num_tokens": 2871163157.0, "step": 17123 }, { "entropy": 1.707009196281433, "epoch": 1.8811622861223256, "grad_norm": 0.9159536957740784, "learning_rate": 2.166530227322293e-06, "loss": 1.5308, "mean_token_accuracy": 0.660614863038063, "num_tokens": 2871303925.0, "step": 17124 }, { "entropy": 1.7264705697695415, "epoch": 1.8812721430336987, "grad_norm": 0.6962438821792603, "learning_rate": 2.166223783999351e-06, "loss": 1.3317, "mean_token_accuracy": 0.6664454191923141, "num_tokens": 2871441073.0, "step": 17125 }, { "entropy": 1.7702072660128276, "epoch": 1.8813819999450714, "grad_norm": 0.6648865938186646, "learning_rate": 2.165917620261446e-06, "loss": 1.5234, "mean_token_accuracy": 0.645287091533343, "num_tokens": 2871637167.0, "step": 17126 }, { "entropy": 1.6745448410511017, "epoch": 1.8814918568564445, "grad_norm": 0.7408782243728638, "learning_rate": 2.1656117361182664e-06, "loss": 1.2249, "mean_token_accuracy": 0.6850744038820267, "num_tokens": 2871766634.0, "step": 17127 }, { "entropy": 1.6802305380503337, "epoch": 1.8816017137678174, "grad_norm": 0.6817216277122498, "learning_rate": 2.165306131579495e-06, "loss": 1.3904, "mean_token_accuracy": 0.6435802976290385, "num_tokens": 2871954489.0, "step": 17128 }, { "entropy": 1.7907779812812805, "epoch": 1.8817115706791903, "grad_norm": 0.7694052457809448, "learning_rate": 2.165000806654805e-06, "loss": 1.5186, "mean_token_accuracy": 0.645979126294454, "num_tokens": 2872141193.0, "step": 17129 }, { "entropy": 1.7915573219458263, "epoch": 1.8818214275905634, "grad_norm": 0.7176956534385681, "learning_rate": 2.1646957613538573e-06, "loss": 1.4003, "mean_token_accuracy": 0.6597653726736704, "num_tokens": 2872323839.0, "step": 17130 }, { "entropy": 1.6581577956676483, "epoch": 1.8819312845019363, "grad_norm": 0.8513138890266418, "learning_rate": 2.1643909956863064e-06, "loss": 1.4395, "mean_token_accuracy": 0.6642699937025706, "num_tokens": 2872474722.0, "step": 17131 }, { "entropy": 1.731380472580592, "epoch": 1.8820411414133091, "grad_norm": 0.6215800642967224, "learning_rate": 2.1640865096618006e-06, "loss": 1.432, "mean_token_accuracy": 0.6468487431605657, "num_tokens": 2872681874.0, "step": 17132 }, { "entropy": 1.674561321735382, "epoch": 1.8821509983246822, "grad_norm": 0.6435210108757019, "learning_rate": 2.1637823032899747e-06, "loss": 1.3136, "mean_token_accuracy": 0.6661032090584437, "num_tokens": 2872841550.0, "step": 17133 }, { "entropy": 1.7193843921025593, "epoch": 1.882260855236055, "grad_norm": 0.7598627209663391, "learning_rate": 2.163478376580456e-06, "loss": 1.4469, "mean_token_accuracy": 0.6524012287457784, "num_tokens": 2873024920.0, "step": 17134 }, { "entropy": 1.693623701731364, "epoch": 1.882370712147428, "grad_norm": 2.42760968208313, "learning_rate": 2.1631747295428672e-06, "loss": 1.3129, "mean_token_accuracy": 0.6606552849213282, "num_tokens": 2873190325.0, "step": 17135 }, { "entropy": 1.7247453530629475, "epoch": 1.882480569058801, "grad_norm": 0.5800105333328247, "learning_rate": 2.1628713621868154e-06, "loss": 1.538, "mean_token_accuracy": 0.6374923388163248, "num_tokens": 2873403679.0, "step": 17136 }, { "entropy": 1.7465098798274994, "epoch": 1.8825904259701738, "grad_norm": 0.9322325587272644, "learning_rate": 2.1625682745219016e-06, "loss": 1.3702, "mean_token_accuracy": 0.6589129120111465, "num_tokens": 2873575685.0, "step": 17137 }, { "entropy": 1.7134642004966736, "epoch": 1.882700282881547, "grad_norm": 0.6871570348739624, "learning_rate": 2.1622654665577216e-06, "loss": 1.4218, "mean_token_accuracy": 0.6442284633715948, "num_tokens": 2873756855.0, "step": 17138 }, { "entropy": 1.7436382969220479, "epoch": 1.8828101397929196, "grad_norm": 0.773567795753479, "learning_rate": 2.1619629383038555e-06, "loss": 1.4462, "mean_token_accuracy": 0.6507983406384786, "num_tokens": 2873937261.0, "step": 17139 }, { "entropy": 1.691699226697286, "epoch": 1.8829199967042927, "grad_norm": 0.6026458740234375, "learning_rate": 2.1616606897698805e-06, "loss": 1.5601, "mean_token_accuracy": 0.6354630514979362, "num_tokens": 2874124211.0, "step": 17140 }, { "entropy": 1.7005370358626049, "epoch": 1.8830298536156655, "grad_norm": 0.6932726502418518, "learning_rate": 2.161358720965363e-06, "loss": 1.5492, "mean_token_accuracy": 0.6447855283816656, "num_tokens": 2874316120.0, "step": 17141 }, { "entropy": 1.67661514878273, "epoch": 1.8831397105270384, "grad_norm": 0.7656899690628052, "learning_rate": 2.1610570318998573e-06, "loss": 1.4386, "mean_token_accuracy": 0.6698861916859945, "num_tokens": 2874435775.0, "step": 17142 }, { "entropy": 1.6867518723011017, "epoch": 1.8832495674384115, "grad_norm": 0.6553547382354736, "learning_rate": 2.1607556225829144e-06, "loss": 1.3886, "mean_token_accuracy": 0.6542109300692877, "num_tokens": 2874597623.0, "step": 17143 }, { "entropy": 1.752112736304601, "epoch": 1.8833594243497844, "grad_norm": 0.6014690399169922, "learning_rate": 2.160454493024073e-06, "loss": 1.4618, "mean_token_accuracy": 0.6372010310490926, "num_tokens": 2874826976.0, "step": 17144 }, { "entropy": 1.7432648241519928, "epoch": 1.8834692812611573, "grad_norm": 0.649844229221344, "learning_rate": 2.1601536432328648e-06, "loss": 1.3441, "mean_token_accuracy": 0.6546978851159414, "num_tokens": 2875003958.0, "step": 17145 }, { "entropy": 1.7034888068834941, "epoch": 1.8835791381725304, "grad_norm": 0.7077612280845642, "learning_rate": 2.1598530732188087e-06, "loss": 1.4713, "mean_token_accuracy": 0.666733592748642, "num_tokens": 2875139891.0, "step": 17146 }, { "entropy": 1.7785635590553284, "epoch": 1.883688995083903, "grad_norm": 0.686241626739502, "learning_rate": 2.159552782991421e-06, "loss": 1.5261, "mean_token_accuracy": 0.6443983117739359, "num_tokens": 2875341135.0, "step": 17147 }, { "entropy": 1.6909163693586986, "epoch": 1.8837988519952762, "grad_norm": 0.7089021801948547, "learning_rate": 2.159252772560204e-06, "loss": 1.4343, "mean_token_accuracy": 0.6512851615746816, "num_tokens": 2875523311.0, "step": 17148 }, { "entropy": 1.6890226205190022, "epoch": 1.883908708906649, "grad_norm": 0.7156793475151062, "learning_rate": 2.1589530419346515e-06, "loss": 1.3621, "mean_token_accuracy": 0.6604212572177252, "num_tokens": 2875662146.0, "step": 17149 }, { "entropy": 1.6136377950509389, "epoch": 1.884018565818022, "grad_norm": 0.6490492224693298, "learning_rate": 2.158653591124252e-06, "loss": 1.3841, "mean_token_accuracy": 0.6620889157056808, "num_tokens": 2875843407.0, "step": 17150 }, { "entropy": 1.791116327047348, "epoch": 1.884128422729395, "grad_norm": 0.7462132573127747, "learning_rate": 2.1583544201384825e-06, "loss": 1.4494, "mean_token_accuracy": 0.6622537871201833, "num_tokens": 2875972410.0, "step": 17151 }, { "entropy": 1.6781320869922638, "epoch": 1.884238279640768, "grad_norm": 0.7043426632881165, "learning_rate": 2.1580555289868118e-06, "loss": 1.3202, "mean_token_accuracy": 0.6793678253889084, "num_tokens": 2876130756.0, "step": 17152 }, { "entropy": 1.6705817480882008, "epoch": 1.8843481365521408, "grad_norm": 0.6849955916404724, "learning_rate": 2.1577569176786993e-06, "loss": 1.2153, "mean_token_accuracy": 0.6768456598122915, "num_tokens": 2876253913.0, "step": 17153 }, { "entropy": 1.7637967069943745, "epoch": 1.8844579934635137, "grad_norm": 1.0273441076278687, "learning_rate": 2.157458586223596e-06, "loss": 1.3025, "mean_token_accuracy": 0.6631099134683609, "num_tokens": 2876388583.0, "step": 17154 }, { "entropy": 1.6225744386514027, "epoch": 1.8845678503748866, "grad_norm": 0.6709175705909729, "learning_rate": 2.157160534630943e-06, "loss": 1.2164, "mean_token_accuracy": 0.6821977148453394, "num_tokens": 2876505830.0, "step": 17155 }, { "entropy": 1.7032880385716755, "epoch": 1.8846777072862597, "grad_norm": 0.7127795219421387, "learning_rate": 2.1568627629101753e-06, "loss": 1.4102, "mean_token_accuracy": 0.6570267875989279, "num_tokens": 2876686265.0, "step": 17156 }, { "entropy": 1.6788496275742848, "epoch": 1.8847875641976326, "grad_norm": 0.7283757328987122, "learning_rate": 2.156565271070716e-06, "loss": 1.3016, "mean_token_accuracy": 0.6735827922821045, "num_tokens": 2876818542.0, "step": 17157 }, { "entropy": 1.6719705959161122, "epoch": 1.8848974211090055, "grad_norm": 0.8454893231391907, "learning_rate": 2.1562680591219815e-06, "loss": 1.1964, "mean_token_accuracy": 0.6822755336761475, "num_tokens": 2876933270.0, "step": 17158 }, { "entropy": 1.7461024026076, "epoch": 1.8850072780203786, "grad_norm": 0.7794149518013, "learning_rate": 2.1559711270733765e-06, "loss": 1.4554, "mean_token_accuracy": 0.6527780294418335, "num_tokens": 2877087587.0, "step": 17159 }, { "entropy": 1.7108294367790222, "epoch": 1.8851171349317513, "grad_norm": 0.6386652588844299, "learning_rate": 2.155674474934301e-06, "loss": 1.3632, "mean_token_accuracy": 0.6620263059933981, "num_tokens": 2877241197.0, "step": 17160 }, { "entropy": 1.716377208630244, "epoch": 1.8852269918431244, "grad_norm": 0.6413389444351196, "learning_rate": 2.1553781027141433e-06, "loss": 1.4775, "mean_token_accuracy": 0.6538679301738739, "num_tokens": 2877466362.0, "step": 17161 }, { "entropy": 1.6621573368708293, "epoch": 1.8853368487544973, "grad_norm": 0.7208569645881653, "learning_rate": 2.155082010422283e-06, "loss": 1.2998, "mean_token_accuracy": 0.6709053417046865, "num_tokens": 2877575652.0, "step": 17162 }, { "entropy": 1.6736730337142944, "epoch": 1.8854467056658701, "grad_norm": 0.8033668398857117, "learning_rate": 2.154786198068091e-06, "loss": 1.3498, "mean_token_accuracy": 0.6585894276698431, "num_tokens": 2877692948.0, "step": 17163 }, { "entropy": 1.7026234964529674, "epoch": 1.8855565625772432, "grad_norm": 0.7073157429695129, "learning_rate": 2.1544906656609303e-06, "loss": 1.4006, "mean_token_accuracy": 0.6467277258634567, "num_tokens": 2877850196.0, "step": 17164 }, { "entropy": 1.719588041305542, "epoch": 1.8856664194886161, "grad_norm": 0.6670511364936829, "learning_rate": 2.1541954132101546e-06, "loss": 1.4866, "mean_token_accuracy": 0.6505639304717382, "num_tokens": 2878003561.0, "step": 17165 }, { "entropy": 1.7602489292621613, "epoch": 1.885776276399989, "grad_norm": 0.9589049220085144, "learning_rate": 2.153900440725107e-06, "loss": 1.3435, "mean_token_accuracy": 0.6540907273689905, "num_tokens": 2878142884.0, "step": 17166 }, { "entropy": 1.7023918430010478, "epoch": 1.885886133311362, "grad_norm": 0.6268293261528015, "learning_rate": 2.1536057482151253e-06, "loss": 1.3064, "mean_token_accuracy": 0.6649014155069987, "num_tokens": 2878286722.0, "step": 17167 }, { "entropy": 1.7479477028052013, "epoch": 1.8859959902227348, "grad_norm": 0.7205145955085754, "learning_rate": 2.1533113356895356e-06, "loss": 1.3166, "mean_token_accuracy": 0.6679264704386393, "num_tokens": 2878488440.0, "step": 17168 }, { "entropy": 1.6543095012505848, "epoch": 1.886105847134108, "grad_norm": 0.6612537503242493, "learning_rate": 2.153017203157655e-06, "loss": 1.4688, "mean_token_accuracy": 0.6611438890298208, "num_tokens": 2878638347.0, "step": 17169 }, { "entropy": 1.6952688296635945, "epoch": 1.8862157040454808, "grad_norm": 0.6854779720306396, "learning_rate": 2.152723350628793e-06, "loss": 1.3837, "mean_token_accuracy": 0.6586264471213022, "num_tokens": 2878823408.0, "step": 17170 }, { "entropy": 1.6490706702073414, "epoch": 1.8863255609568537, "grad_norm": 0.7439612746238708, "learning_rate": 2.1524297781122507e-06, "loss": 1.2378, "mean_token_accuracy": 0.6832160651683807, "num_tokens": 2878940613.0, "step": 17171 }, { "entropy": 1.6276902059714, "epoch": 1.8864354178682268, "grad_norm": 0.6118603348731995, "learning_rate": 2.15213648561732e-06, "loss": 1.4464, "mean_token_accuracy": 0.6511234442392985, "num_tokens": 2879148471.0, "step": 17172 }, { "entropy": 1.7158278822898865, "epoch": 1.8865452747795994, "grad_norm": 0.603689432144165, "learning_rate": 2.1518434731532815e-06, "loss": 1.4145, "mean_token_accuracy": 0.6546343117952347, "num_tokens": 2879312410.0, "step": 17173 }, { "entropy": 1.75503213206927, "epoch": 1.8866551316909725, "grad_norm": 0.6921542882919312, "learning_rate": 2.1515507407294096e-06, "loss": 1.3781, "mean_token_accuracy": 0.6559339066346487, "num_tokens": 2879489120.0, "step": 17174 }, { "entropy": 1.6965388059616089, "epoch": 1.8867649886023454, "grad_norm": 0.5904287695884705, "learning_rate": 2.1512582883549703e-06, "loss": 1.507, "mean_token_accuracy": 0.6430360277493795, "num_tokens": 2879694192.0, "step": 17175 }, { "entropy": 1.7962828079859416, "epoch": 1.8868748455137183, "grad_norm": 0.8660070300102234, "learning_rate": 2.150966116039219e-06, "loss": 1.4214, "mean_token_accuracy": 0.6482437898715337, "num_tokens": 2879808765.0, "step": 17176 }, { "entropy": 1.7078647017478943, "epoch": 1.8869847024250914, "grad_norm": 0.6933693289756775, "learning_rate": 2.1506742237914026e-06, "loss": 1.4655, "mean_token_accuracy": 0.6365055541197459, "num_tokens": 2879968191.0, "step": 17177 }, { "entropy": 1.6872906982898712, "epoch": 1.8870945593364643, "grad_norm": 0.6161040663719177, "learning_rate": 2.1503826116207586e-06, "loss": 1.4166, "mean_token_accuracy": 0.6558385094006857, "num_tokens": 2880131622.0, "step": 17178 }, { "entropy": 1.75909224152565, "epoch": 1.8872044162478372, "grad_norm": 0.658726692199707, "learning_rate": 2.1500912795365193e-06, "loss": 1.3921, "mean_token_accuracy": 0.6491363197565079, "num_tokens": 2880300886.0, "step": 17179 }, { "entropy": 1.6820505162080128, "epoch": 1.88731427315921, "grad_norm": 0.8096556067466736, "learning_rate": 2.149800227547902e-06, "loss": 1.276, "mean_token_accuracy": 0.6641524781783422, "num_tokens": 2880485214.0, "step": 17180 }, { "entropy": 1.7462229331334431, "epoch": 1.887424130070583, "grad_norm": 0.7542430758476257, "learning_rate": 2.1495094556641183e-06, "loss": 1.5453, "mean_token_accuracy": 0.6436825742324194, "num_tokens": 2880688455.0, "step": 17181 }, { "entropy": 1.6861853897571564, "epoch": 1.887533986981956, "grad_norm": 0.7085747122764587, "learning_rate": 2.149218963894373e-06, "loss": 1.4594, "mean_token_accuracy": 0.657812312245369, "num_tokens": 2880835604.0, "step": 17182 }, { "entropy": 1.734100381533305, "epoch": 1.887643843893329, "grad_norm": 0.6035891175270081, "learning_rate": 2.148928752247859e-06, "loss": 1.5958, "mean_token_accuracy": 0.6254423459370931, "num_tokens": 2881031539.0, "step": 17183 }, { "entropy": 1.7015228271484375, "epoch": 1.8877537008047018, "grad_norm": 0.6321704983711243, "learning_rate": 2.148638820733762e-06, "loss": 1.4446, "mean_token_accuracy": 0.6493401179711024, "num_tokens": 2881176601.0, "step": 17184 }, { "entropy": 1.6736248135566711, "epoch": 1.887863557716075, "grad_norm": 0.7702553868293762, "learning_rate": 2.148349169361259e-06, "loss": 1.3464, "mean_token_accuracy": 0.675323560833931, "num_tokens": 2881349503.0, "step": 17185 }, { "entropy": 1.7201215823491414, "epoch": 1.8879734146274476, "grad_norm": 0.7781912684440613, "learning_rate": 2.148059798139514e-06, "loss": 1.4305, "mean_token_accuracy": 0.6516983310381571, "num_tokens": 2881490232.0, "step": 17186 }, { "entropy": 1.7094205915927887, "epoch": 1.8880832715388207, "grad_norm": 0.6526473760604858, "learning_rate": 2.1477707070776883e-06, "loss": 1.5196, "mean_token_accuracy": 0.6446429987748464, "num_tokens": 2881698286.0, "step": 17187 }, { "entropy": 1.688776175181071, "epoch": 1.8881931284501936, "grad_norm": 0.7511041760444641, "learning_rate": 2.1474818961849316e-06, "loss": 1.3161, "mean_token_accuracy": 0.6700662126143774, "num_tokens": 2881880985.0, "step": 17188 }, { "entropy": 1.6932948231697083, "epoch": 1.8883029853615665, "grad_norm": 0.6438283324241638, "learning_rate": 2.1471933654703836e-06, "loss": 1.4265, "mean_token_accuracy": 0.6442149132490158, "num_tokens": 2882058048.0, "step": 17189 }, { "entropy": 1.705348789691925, "epoch": 1.8884128422729396, "grad_norm": 0.7522128820419312, "learning_rate": 2.1469051149431757e-06, "loss": 1.271, "mean_token_accuracy": 0.6708547174930573, "num_tokens": 2882169248.0, "step": 17190 }, { "entropy": 1.8012695014476776, "epoch": 1.8885226991843125, "grad_norm": 0.7312252521514893, "learning_rate": 2.146617144612432e-06, "loss": 1.5895, "mean_token_accuracy": 0.6462727536757787, "num_tokens": 2882375194.0, "step": 17191 }, { "entropy": 1.737836887439092, "epoch": 1.8886325560956854, "grad_norm": 1.2015639543533325, "learning_rate": 2.1463294544872667e-06, "loss": 1.4586, "mean_token_accuracy": 0.6477744380633036, "num_tokens": 2882532455.0, "step": 17192 }, { "entropy": 1.737512121597926, "epoch": 1.8887424130070583, "grad_norm": 0.6532018184661865, "learning_rate": 2.1460420445767836e-06, "loss": 1.3933, "mean_token_accuracy": 0.6575685640176138, "num_tokens": 2882708598.0, "step": 17193 }, { "entropy": 1.6850264469782512, "epoch": 1.8888522699184311, "grad_norm": 0.6517339944839478, "learning_rate": 2.145754914890081e-06, "loss": 1.3152, "mean_token_accuracy": 0.6752770642439524, "num_tokens": 2882844056.0, "step": 17194 }, { "entropy": 1.6757459739844005, "epoch": 1.8889621268298042, "grad_norm": 0.6307591199874878, "learning_rate": 2.1454680654362445e-06, "loss": 1.3338, "mean_token_accuracy": 0.6622594942649206, "num_tokens": 2883031026.0, "step": 17195 }, { "entropy": 1.7201253175735474, "epoch": 1.8890719837411771, "grad_norm": 0.740987241268158, "learning_rate": 2.1451814962243545e-06, "loss": 1.3998, "mean_token_accuracy": 0.6528403460979462, "num_tokens": 2883216632.0, "step": 17196 }, { "entropy": 1.700130045413971, "epoch": 1.88918184065255, "grad_norm": 0.6037099361419678, "learning_rate": 2.1448952072634807e-06, "loss": 1.3888, "mean_token_accuracy": 0.6569363375504812, "num_tokens": 2883388061.0, "step": 17197 }, { "entropy": 1.7035789688428242, "epoch": 1.8892916975639231, "grad_norm": 0.629531741142273, "learning_rate": 2.1446091985626818e-06, "loss": 1.3131, "mean_token_accuracy": 0.6592882921298345, "num_tokens": 2883555748.0, "step": 17198 }, { "entropy": 1.7170901894569397, "epoch": 1.8894015544752958, "grad_norm": 0.8567357659339905, "learning_rate": 2.144323470131012e-06, "loss": 1.347, "mean_token_accuracy": 0.6601819346348444, "num_tokens": 2883707533.0, "step": 17199 }, { "entropy": 1.6625105639298756, "epoch": 1.889511411386669, "grad_norm": 0.7391311526298523, "learning_rate": 2.144038021977515e-06, "loss": 1.3399, "mean_token_accuracy": 0.6689294477303823, "num_tokens": 2883851556.0, "step": 17200 }, { "entropy": 1.6819651424884796, "epoch": 1.8896212682980418, "grad_norm": 0.7686917185783386, "learning_rate": 2.143752854111223e-06, "loss": 1.3399, "mean_token_accuracy": 0.6758074214061102, "num_tokens": 2883995505.0, "step": 17201 }, { "entropy": 1.8181818425655365, "epoch": 1.8897311252094147, "grad_norm": 0.7404806613922119, "learning_rate": 2.1434679665411625e-06, "loss": 1.487, "mean_token_accuracy": 0.6463923106590906, "num_tokens": 2884131386.0, "step": 17202 }, { "entropy": 1.736276884873708, "epoch": 1.8898409821207878, "grad_norm": 0.7244618535041809, "learning_rate": 2.1431833592763512e-06, "loss": 1.3495, "mean_token_accuracy": 0.6554465840260187, "num_tokens": 2884280001.0, "step": 17203 }, { "entropy": 1.7070696453253429, "epoch": 1.8899508390321607, "grad_norm": 0.6054216623306274, "learning_rate": 2.1428990323257944e-06, "loss": 1.4691, "mean_token_accuracy": 0.6478527784347534, "num_tokens": 2884483799.0, "step": 17204 }, { "entropy": 1.673392613728841, "epoch": 1.8900606959435335, "grad_norm": 0.6630545258522034, "learning_rate": 2.1426149856984922e-06, "loss": 1.2966, "mean_token_accuracy": 0.681118776400884, "num_tokens": 2884654832.0, "step": 17205 }, { "entropy": 1.7095024585723877, "epoch": 1.8901705528549066, "grad_norm": 0.633782684803009, "learning_rate": 2.1423312194034347e-06, "loss": 1.4286, "mean_token_accuracy": 0.6514197190602621, "num_tokens": 2884826182.0, "step": 17206 }, { "entropy": 1.6941520472367604, "epoch": 1.8902804097662793, "grad_norm": 0.772243320941925, "learning_rate": 2.1420477334496024e-06, "loss": 1.2764, "mean_token_accuracy": 0.6747996111710867, "num_tokens": 2884956841.0, "step": 17207 }, { "entropy": 1.7718258996804555, "epoch": 1.8903902666776524, "grad_norm": 0.8815136551856995, "learning_rate": 2.141764527845968e-06, "loss": 1.3896, "mean_token_accuracy": 0.6521994322538376, "num_tokens": 2885070369.0, "step": 17208 }, { "entropy": 1.7424478928248088, "epoch": 1.8905001235890253, "grad_norm": 0.6464412212371826, "learning_rate": 2.141481602601495e-06, "loss": 1.4169, "mean_token_accuracy": 0.6564925710360209, "num_tokens": 2885247084.0, "step": 17209 }, { "entropy": 1.7533444662888844, "epoch": 1.8906099805003982, "grad_norm": 0.6359097361564636, "learning_rate": 2.1411989577251376e-06, "loss": 1.4394, "mean_token_accuracy": 0.6459400206804276, "num_tokens": 2885433750.0, "step": 17210 }, { "entropy": 1.804566999276479, "epoch": 1.8907198374117713, "grad_norm": 0.7828404903411865, "learning_rate": 2.1409165932258406e-06, "loss": 1.523, "mean_token_accuracy": 0.6601679027080536, "num_tokens": 2885640946.0, "step": 17211 }, { "entropy": 1.722624033689499, "epoch": 1.890829694323144, "grad_norm": 0.775169849395752, "learning_rate": 2.1406345091125415e-06, "loss": 1.549, "mean_token_accuracy": 0.6515462944904963, "num_tokens": 2885818456.0, "step": 17212 }, { "entropy": 1.6952950755755107, "epoch": 1.890939551234517, "grad_norm": 0.7390128970146179, "learning_rate": 2.140352705394169e-06, "loss": 1.4906, "mean_token_accuracy": 0.6618280857801437, "num_tokens": 2885977593.0, "step": 17213 }, { "entropy": 1.7304560939470928, "epoch": 1.89104940814589, "grad_norm": 0.7561904191970825, "learning_rate": 2.140071182079641e-06, "loss": 1.2999, "mean_token_accuracy": 0.6680457144975662, "num_tokens": 2886109952.0, "step": 17214 }, { "entropy": 1.7267817457516987, "epoch": 1.8911592650572628, "grad_norm": 0.7434021234512329, "learning_rate": 2.1397899391778666e-06, "loss": 1.5385, "mean_token_accuracy": 0.6487158189217249, "num_tokens": 2886248004.0, "step": 17215 }, { "entropy": 1.64817480246226, "epoch": 1.891269121968636, "grad_norm": 0.5317151546478271, "learning_rate": 2.139508976697749e-06, "loss": 1.4407, "mean_token_accuracy": 0.6526560485363007, "num_tokens": 2886450875.0, "step": 17216 }, { "entropy": 1.653537929058075, "epoch": 1.8913789788800088, "grad_norm": 0.665301501750946, "learning_rate": 2.1392282946481794e-06, "loss": 1.2987, "mean_token_accuracy": 0.6666727811098099, "num_tokens": 2886590812.0, "step": 17217 }, { "entropy": 1.6849933167298634, "epoch": 1.8914888357913817, "grad_norm": 0.6797506213188171, "learning_rate": 2.1389478930380415e-06, "loss": 1.333, "mean_token_accuracy": 0.6623549064000448, "num_tokens": 2886736662.0, "step": 17218 }, { "entropy": 1.6301970183849335, "epoch": 1.8915986927027548, "grad_norm": 0.6078571081161499, "learning_rate": 2.13866777187621e-06, "loss": 1.3713, "mean_token_accuracy": 0.6544771194458008, "num_tokens": 2886961307.0, "step": 17219 }, { "entropy": 1.6913349032402039, "epoch": 1.8917085496141275, "grad_norm": 0.6399415135383606, "learning_rate": 2.13838793117155e-06, "loss": 1.4364, "mean_token_accuracy": 0.6472468177477518, "num_tokens": 2887153437.0, "step": 17220 }, { "entropy": 1.679469347000122, "epoch": 1.8918184065255006, "grad_norm": 0.6966269612312317, "learning_rate": 2.1381083709329195e-06, "loss": 1.3719, "mean_token_accuracy": 0.6473198433717092, "num_tokens": 2887320267.0, "step": 17221 }, { "entropy": 1.7523421943187714, "epoch": 1.8919282634368735, "grad_norm": 0.8340956568717957, "learning_rate": 2.1378290911691655e-06, "loss": 1.464, "mean_token_accuracy": 0.6585601170857748, "num_tokens": 2887483418.0, "step": 17222 }, { "entropy": 1.7297047674655914, "epoch": 1.8920381203482464, "grad_norm": 0.6574650406837463, "learning_rate": 2.1375500918891275e-06, "loss": 1.3611, "mean_token_accuracy": 0.6614359567562739, "num_tokens": 2887629911.0, "step": 17223 }, { "entropy": 1.6866742571194966, "epoch": 1.8921479772596195, "grad_norm": 0.6516870856285095, "learning_rate": 2.1372713731016356e-06, "loss": 1.3913, "mean_token_accuracy": 0.6656065583229065, "num_tokens": 2887773364.0, "step": 17224 }, { "entropy": 1.7239994208017986, "epoch": 1.8922578341709921, "grad_norm": 0.8059686422348022, "learning_rate": 2.136992934815511e-06, "loss": 1.3294, "mean_token_accuracy": 0.6618863890568415, "num_tokens": 2887901335.0, "step": 17225 }, { "entropy": 1.7334805130958557, "epoch": 1.8923676910823652, "grad_norm": 0.7674765586853027, "learning_rate": 2.1367147770395665e-06, "loss": 1.3486, "mean_token_accuracy": 0.6562761962413788, "num_tokens": 2888048170.0, "step": 17226 }, { "entropy": 1.8443331122398376, "epoch": 1.8924775479937381, "grad_norm": 0.6038434505462646, "learning_rate": 2.136436899782605e-06, "loss": 1.4085, "mean_token_accuracy": 0.6459904710451762, "num_tokens": 2888205505.0, "step": 17227 }, { "entropy": 1.7091451783974965, "epoch": 1.892587404905111, "grad_norm": 0.6854003071784973, "learning_rate": 2.1361593030534218e-06, "loss": 1.3721, "mean_token_accuracy": 0.6586553553740183, "num_tokens": 2888360293.0, "step": 17228 }, { "entropy": 1.7349936366081238, "epoch": 1.8926972618164841, "grad_norm": 0.672595739364624, "learning_rate": 2.135881986860803e-06, "loss": 1.3288, "mean_token_accuracy": 0.6604596922794977, "num_tokens": 2888475380.0, "step": 17229 }, { "entropy": 1.7175809343655903, "epoch": 1.892807118727857, "grad_norm": 0.7268809080123901, "learning_rate": 2.1356049512135245e-06, "loss": 1.2871, "mean_token_accuracy": 0.6609225074450175, "num_tokens": 2888622020.0, "step": 17230 }, { "entropy": 1.721697747707367, "epoch": 1.89291697563923, "grad_norm": 1.0464180707931519, "learning_rate": 2.135328196120354e-06, "loss": 1.3146, "mean_token_accuracy": 0.6679658045371374, "num_tokens": 2888767213.0, "step": 17231 }, { "entropy": 1.651025613149007, "epoch": 1.893026832550603, "grad_norm": 0.5859130620956421, "learning_rate": 2.135051721590053e-06, "loss": 1.3748, "mean_token_accuracy": 0.6555741727352142, "num_tokens": 2888927693.0, "step": 17232 }, { "entropy": 1.6868204673131306, "epoch": 1.8931366894619757, "grad_norm": 0.7139847874641418, "learning_rate": 2.1347755276313705e-06, "loss": 1.3789, "mean_token_accuracy": 0.6621604611476263, "num_tokens": 2889100377.0, "step": 17233 }, { "entropy": 1.7080059349536896, "epoch": 1.8932465463733488, "grad_norm": 0.8161289691925049, "learning_rate": 2.1344996142530466e-06, "loss": 1.3759, "mean_token_accuracy": 0.6496799687544504, "num_tokens": 2889249897.0, "step": 17234 }, { "entropy": 1.7380808293819427, "epoch": 1.8933564032847217, "grad_norm": 0.721257746219635, "learning_rate": 2.134223981463816e-06, "loss": 1.588, "mean_token_accuracy": 0.6347461392482122, "num_tokens": 2889409243.0, "step": 17235 }, { "entropy": 1.7111575802167256, "epoch": 1.8934662601960945, "grad_norm": 0.6843127608299255, "learning_rate": 2.133948629272401e-06, "loss": 1.3835, "mean_token_accuracy": 0.6749522536993027, "num_tokens": 2889559550.0, "step": 17236 }, { "entropy": 1.7137587368488312, "epoch": 1.8935761171074676, "grad_norm": 0.7074581384658813, "learning_rate": 2.133673557687516e-06, "loss": 1.5246, "mean_token_accuracy": 0.6346543331940969, "num_tokens": 2889754498.0, "step": 17237 }, { "entropy": 1.6512508889039357, "epoch": 1.8936859740188403, "grad_norm": 0.7215355634689331, "learning_rate": 2.1333987667178695e-06, "loss": 1.2638, "mean_token_accuracy": 0.6745673269033432, "num_tokens": 2889878312.0, "step": 17238 }, { "entropy": 1.7148396968841553, "epoch": 1.8937958309302134, "grad_norm": 0.7948116660118103, "learning_rate": 2.133124256372155e-06, "loss": 1.3975, "mean_token_accuracy": 0.6557581474383672, "num_tokens": 2890036666.0, "step": 17239 }, { "entropy": 1.7264957229296367, "epoch": 1.8939056878415863, "grad_norm": 0.7121086120605469, "learning_rate": 2.1328500266590625e-06, "loss": 1.3031, "mean_token_accuracy": 0.6640142252047857, "num_tokens": 2890161291.0, "step": 17240 }, { "entropy": 1.6922083993752797, "epoch": 1.8940155447529592, "grad_norm": 0.7959898114204407, "learning_rate": 2.132576077587272e-06, "loss": 1.2614, "mean_token_accuracy": 0.6731408536434174, "num_tokens": 2890333804.0, "step": 17241 }, { "entropy": 1.6807706554730732, "epoch": 1.8941254016643323, "grad_norm": 0.6071732044219971, "learning_rate": 2.132302409165452e-06, "loss": 1.3122, "mean_token_accuracy": 0.6773058325052261, "num_tokens": 2890502137.0, "step": 17242 }, { "entropy": 1.720189521710078, "epoch": 1.8942352585757052, "grad_norm": 0.6647835969924927, "learning_rate": 2.1320290214022642e-06, "loss": 1.4414, "mean_token_accuracy": 0.6582275728384653, "num_tokens": 2890709961.0, "step": 17243 }, { "entropy": 1.7124016781648, "epoch": 1.894345115487078, "grad_norm": 0.6113753318786621, "learning_rate": 2.1317559143063625e-06, "loss": 1.4965, "mean_token_accuracy": 0.6508686641852061, "num_tokens": 2890904159.0, "step": 17244 }, { "entropy": 1.7102111279964447, "epoch": 1.8944549723984512, "grad_norm": 0.856601893901825, "learning_rate": 2.1314830878863908e-06, "loss": 1.1759, "mean_token_accuracy": 0.6895684152841568, "num_tokens": 2891022698.0, "step": 17245 }, { "entropy": 1.7018269002437592, "epoch": 1.8945648293098238, "grad_norm": 0.6518383622169495, "learning_rate": 2.1312105421509827e-06, "loss": 1.3364, "mean_token_accuracy": 0.6601632038752238, "num_tokens": 2891193896.0, "step": 17246 }, { "entropy": 1.6979057788848877, "epoch": 1.894674686221197, "grad_norm": 0.678108274936676, "learning_rate": 2.130938277108764e-06, "loss": 1.3862, "mean_token_accuracy": 0.6634653856356939, "num_tokens": 2891354151.0, "step": 17247 }, { "entropy": 1.7425933976968129, "epoch": 1.8947845431325698, "grad_norm": 0.8439452052116394, "learning_rate": 2.1306662927683532e-06, "loss": 1.5762, "mean_token_accuracy": 0.6411895950635275, "num_tokens": 2891509855.0, "step": 17248 }, { "entropy": 1.6670502225557964, "epoch": 1.8948944000439427, "grad_norm": 0.8232260942459106, "learning_rate": 2.1303945891383575e-06, "loss": 1.3695, "mean_token_accuracy": 0.6671279718478521, "num_tokens": 2891678701.0, "step": 17249 }, { "entropy": 1.700257698694865, "epoch": 1.8950042569553158, "grad_norm": 0.8447266817092896, "learning_rate": 2.130123166227376e-06, "loss": 1.3507, "mean_token_accuracy": 0.6675139367580414, "num_tokens": 2891855717.0, "step": 17250 }, { "entropy": 1.717061976591746, "epoch": 1.8951141138666885, "grad_norm": 0.6110275983810425, "learning_rate": 2.129852024043999e-06, "loss": 1.4106, "mean_token_accuracy": 0.6613439122835795, "num_tokens": 2891988808.0, "step": 17251 }, { "entropy": 1.6792820394039154, "epoch": 1.8952239707780616, "grad_norm": 0.709892988204956, "learning_rate": 2.129581162596809e-06, "loss": 1.3578, "mean_token_accuracy": 0.6553207437197367, "num_tokens": 2892155247.0, "step": 17252 }, { "entropy": 1.6732084353764851, "epoch": 1.8953338276894345, "grad_norm": 0.7051602602005005, "learning_rate": 2.1293105818943777e-06, "loss": 1.3955, "mean_token_accuracy": 0.6673270811637243, "num_tokens": 2892306122.0, "step": 17253 }, { "entropy": 1.6753906508286793, "epoch": 1.8954436846008074, "grad_norm": 0.6376731991767883, "learning_rate": 2.1290402819452695e-06, "loss": 1.394, "mean_token_accuracy": 0.6547005921602249, "num_tokens": 2892455330.0, "step": 17254 }, { "entropy": 1.6851638952891033, "epoch": 1.8955535415121805, "grad_norm": 0.7481420636177063, "learning_rate": 2.1287702627580388e-06, "loss": 1.4232, "mean_token_accuracy": 0.6521519323190054, "num_tokens": 2892625583.0, "step": 17255 }, { "entropy": 1.7001373370488484, "epoch": 1.8956633984235534, "grad_norm": 0.7096436619758606, "learning_rate": 2.128500524341232e-06, "loss": 1.4475, "mean_token_accuracy": 0.648720865448316, "num_tokens": 2892835053.0, "step": 17256 }, { "entropy": 1.747383952140808, "epoch": 1.8957732553349262, "grad_norm": 0.8015025854110718, "learning_rate": 2.128231066703387e-06, "loss": 1.3288, "mean_token_accuracy": 0.6623203357060751, "num_tokens": 2892954963.0, "step": 17257 }, { "entropy": 1.6256678303082783, "epoch": 1.8958831122462994, "grad_norm": 0.6793299913406372, "learning_rate": 2.1279618898530294e-06, "loss": 1.3693, "mean_token_accuracy": 0.6596795121828715, "num_tokens": 2893138868.0, "step": 17258 }, { "entropy": 1.696552187204361, "epoch": 1.895992969157672, "grad_norm": 0.626272976398468, "learning_rate": 2.1276929937986816e-06, "loss": 1.4428, "mean_token_accuracy": 0.6529415895541509, "num_tokens": 2893319526.0, "step": 17259 }, { "entropy": 1.7114895482858021, "epoch": 1.8961028260690451, "grad_norm": 0.7434846758842468, "learning_rate": 2.1274243785488514e-06, "loss": 1.4121, "mean_token_accuracy": 0.6516354481379191, "num_tokens": 2893468438.0, "step": 17260 }, { "entropy": 1.6837340195973713, "epoch": 1.896212682980418, "grad_norm": 0.769420325756073, "learning_rate": 2.1271560441120416e-06, "loss": 1.2678, "mean_token_accuracy": 0.6741555829842886, "num_tokens": 2893605178.0, "step": 17261 }, { "entropy": 1.686502754688263, "epoch": 1.896322539891791, "grad_norm": 0.6285480260848999, "learning_rate": 2.1268879904967456e-06, "loss": 1.385, "mean_token_accuracy": 0.6608720819155375, "num_tokens": 2893824967.0, "step": 17262 }, { "entropy": 1.7563516199588776, "epoch": 1.896432396803164, "grad_norm": 0.856654942035675, "learning_rate": 2.1266202177114455e-06, "loss": 1.36, "mean_token_accuracy": 0.6687353501717249, "num_tokens": 2893958076.0, "step": 17263 }, { "entropy": 1.7702421148618062, "epoch": 1.8965422537145367, "grad_norm": 0.6715371608734131, "learning_rate": 2.1263527257646175e-06, "loss": 1.4385, "mean_token_accuracy": 0.6432004123926163, "num_tokens": 2894165718.0, "step": 17264 }, { "entropy": 1.7267632186412811, "epoch": 1.8966521106259098, "grad_norm": 0.6621989011764526, "learning_rate": 2.1260855146647278e-06, "loss": 1.4865, "mean_token_accuracy": 0.6259207328160604, "num_tokens": 2894390591.0, "step": 17265 }, { "entropy": 1.7133949995040894, "epoch": 1.8967619675372827, "grad_norm": 0.5840614438056946, "learning_rate": 2.125818584420232e-06, "loss": 1.4147, "mean_token_accuracy": 0.6490117361148199, "num_tokens": 2894561880.0, "step": 17266 }, { "entropy": 1.715239018201828, "epoch": 1.8968718244486555, "grad_norm": 0.6776023507118225, "learning_rate": 2.125551935039579e-06, "loss": 1.4306, "mean_token_accuracy": 0.6536256372928619, "num_tokens": 2894752290.0, "step": 17267 }, { "entropy": 1.6557459632555644, "epoch": 1.8969816813600286, "grad_norm": 0.6310899257659912, "learning_rate": 2.1252855665312084e-06, "loss": 1.3285, "mean_token_accuracy": 0.6622317085663477, "num_tokens": 2894940916.0, "step": 17268 }, { "entropy": 1.7279532651106517, "epoch": 1.8970915382714015, "grad_norm": 0.6145720481872559, "learning_rate": 2.1250194789035518e-06, "loss": 1.4792, "mean_token_accuracy": 0.6527701765298843, "num_tokens": 2895121962.0, "step": 17269 }, { "entropy": 1.7092354695002239, "epoch": 1.8972013951827744, "grad_norm": 0.6521205902099609, "learning_rate": 2.1247536721650283e-06, "loss": 1.4655, "mean_token_accuracy": 0.6464730401833853, "num_tokens": 2895276814.0, "step": 17270 }, { "entropy": 1.7156803806622822, "epoch": 1.8973112520941475, "grad_norm": 0.6255699992179871, "learning_rate": 2.1244881463240525e-06, "loss": 1.4412, "mean_token_accuracy": 0.6423149853944778, "num_tokens": 2895474372.0, "step": 17271 }, { "entropy": 1.7619168758392334, "epoch": 1.8974211090055202, "grad_norm": 0.6236703991889954, "learning_rate": 2.1242229013890277e-06, "loss": 1.4911, "mean_token_accuracy": 0.6392246186733246, "num_tokens": 2895664516.0, "step": 17272 }, { "entropy": 1.6980822086334229, "epoch": 1.8975309659168933, "grad_norm": 0.8708395957946777, "learning_rate": 2.1239579373683485e-06, "loss": 1.2633, "mean_token_accuracy": 0.6819048374891281, "num_tokens": 2895813180.0, "step": 17273 }, { "entropy": 1.6919432481129963, "epoch": 1.8976408228282662, "grad_norm": 0.6261935830116272, "learning_rate": 2.1236932542703996e-06, "loss": 1.4297, "mean_token_accuracy": 0.6447741687297821, "num_tokens": 2895979054.0, "step": 17274 }, { "entropy": 1.67582102616628, "epoch": 1.897750679739639, "grad_norm": 0.6235146522521973, "learning_rate": 2.1234288521035594e-06, "loss": 1.5338, "mean_token_accuracy": 0.660733645160993, "num_tokens": 2896154771.0, "step": 17275 }, { "entropy": 1.7051470975081127, "epoch": 1.8978605366510122, "grad_norm": 0.7629257440567017, "learning_rate": 2.1231647308761976e-06, "loss": 1.3684, "mean_token_accuracy": 0.6604376882314682, "num_tokens": 2896305092.0, "step": 17276 }, { "entropy": 1.7228100895881653, "epoch": 1.8979703935623848, "grad_norm": 0.5867047309875488, "learning_rate": 2.1229008905966725e-06, "loss": 1.4371, "mean_token_accuracy": 0.6493526001771291, "num_tokens": 2896471668.0, "step": 17277 }, { "entropy": 1.6839666068553925, "epoch": 1.898080250473758, "grad_norm": 0.5775178670883179, "learning_rate": 2.1226373312733327e-06, "loss": 1.4035, "mean_token_accuracy": 0.6348609228928884, "num_tokens": 2896683909.0, "step": 17278 }, { "entropy": 1.7211569050947826, "epoch": 1.8981901073851308, "grad_norm": 0.6018611192703247, "learning_rate": 2.1223740529145217e-06, "loss": 1.3831, "mean_token_accuracy": 0.6495659500360489, "num_tokens": 2896856820.0, "step": 17279 }, { "entropy": 1.7634968161582947, "epoch": 1.8982999642965037, "grad_norm": 0.6888352632522583, "learning_rate": 2.1221110555285705e-06, "loss": 1.3899, "mean_token_accuracy": 0.6544539431730906, "num_tokens": 2897043615.0, "step": 17280 }, { "entropy": 1.7279618481794994, "epoch": 1.8984098212078768, "grad_norm": 0.6864139437675476, "learning_rate": 2.1218483391238056e-06, "loss": 1.4239, "mean_token_accuracy": 0.6641011635462443, "num_tokens": 2897196622.0, "step": 17281 }, { "entropy": 1.7093985974788666, "epoch": 1.8985196781192497, "grad_norm": 0.7060381770133972, "learning_rate": 2.1215859037085396e-06, "loss": 1.3693, "mean_token_accuracy": 0.6542820632457733, "num_tokens": 2897350359.0, "step": 17282 }, { "entropy": 1.7037651638189952, "epoch": 1.8986295350306226, "grad_norm": 0.5937888026237488, "learning_rate": 2.121323749291078e-06, "loss": 1.3839, "mean_token_accuracy": 0.6539882570505142, "num_tokens": 2897504289.0, "step": 17283 }, { "entropy": 1.69916437069575, "epoch": 1.8987393919419957, "grad_norm": 0.7481595873832703, "learning_rate": 2.1210618758797206e-06, "loss": 1.3909, "mean_token_accuracy": 0.6524476011594137, "num_tokens": 2897655290.0, "step": 17284 }, { "entropy": 1.7476255595684052, "epoch": 1.8988492488533684, "grad_norm": 0.8031712174415588, "learning_rate": 2.1208002834827533e-06, "loss": 1.4292, "mean_token_accuracy": 0.6464576125144958, "num_tokens": 2897853586.0, "step": 17285 }, { "entropy": 1.6887069741884868, "epoch": 1.8989591057647415, "grad_norm": 0.6965251564979553, "learning_rate": 2.1205389721084556e-06, "loss": 1.4859, "mean_token_accuracy": 0.6459216872851054, "num_tokens": 2898098470.0, "step": 17286 }, { "entropy": 1.7607675989468892, "epoch": 1.8990689626761144, "grad_norm": 0.7362844347953796, "learning_rate": 2.1202779417650975e-06, "loss": 1.3171, "mean_token_accuracy": 0.6696411470572153, "num_tokens": 2898208883.0, "step": 17287 }, { "entropy": 1.6586427589257557, "epoch": 1.8991788195874872, "grad_norm": 0.5607314705848694, "learning_rate": 2.120017192460943e-06, "loss": 1.2891, "mean_token_accuracy": 0.6717559099197388, "num_tokens": 2898361274.0, "step": 17288 }, { "entropy": 1.741514007250468, "epoch": 1.8992886764988604, "grad_norm": 0.7974133491516113, "learning_rate": 2.119756724204242e-06, "loss": 1.2662, "mean_token_accuracy": 0.6750031113624573, "num_tokens": 2898462903.0, "step": 17289 }, { "entropy": 1.6692986885706584, "epoch": 1.899398533410233, "grad_norm": 0.6954519748687744, "learning_rate": 2.1194965370032384e-06, "loss": 1.2814, "mean_token_accuracy": 0.6717743625243505, "num_tokens": 2898658678.0, "step": 17290 }, { "entropy": 1.6970256865024567, "epoch": 1.8995083903216061, "grad_norm": 0.9171543121337891, "learning_rate": 2.119236630866169e-06, "loss": 1.344, "mean_token_accuracy": 0.6659214198589325, "num_tokens": 2898834450.0, "step": 17291 }, { "entropy": 1.7186993459860485, "epoch": 1.899618247232979, "grad_norm": 0.694511353969574, "learning_rate": 2.1189770058012575e-06, "loss": 1.5021, "mean_token_accuracy": 0.6403380235036215, "num_tokens": 2899016730.0, "step": 17292 }, { "entropy": 1.7705208857854207, "epoch": 1.899728104144352, "grad_norm": 0.65798020362854, "learning_rate": 2.118717661816723e-06, "loss": 1.3265, "mean_token_accuracy": 0.6581917802492777, "num_tokens": 2899142112.0, "step": 17293 }, { "entropy": 1.6541693210601807, "epoch": 1.899837961055725, "grad_norm": 0.5420558452606201, "learning_rate": 2.1184585989207723e-06, "loss": 1.0458, "mean_token_accuracy": 0.6923287163178126, "num_tokens": 2899325969.0, "step": 17294 }, { "entropy": 1.7057847082614899, "epoch": 1.8999478179670979, "grad_norm": 0.6451363563537598, "learning_rate": 2.118199817121604e-06, "loss": 1.3285, "mean_token_accuracy": 0.6623325844605764, "num_tokens": 2899500885.0, "step": 17295 }, { "entropy": 1.6700185736020405, "epoch": 1.9000576748784708, "grad_norm": 0.7210705280303955, "learning_rate": 2.1179413164274095e-06, "loss": 1.4972, "mean_token_accuracy": 0.6420283913612366, "num_tokens": 2899716939.0, "step": 17296 }, { "entropy": 1.685696393251419, "epoch": 1.9001675317898439, "grad_norm": 0.6625770926475525, "learning_rate": 2.117683096846371e-06, "loss": 1.5663, "mean_token_accuracy": 0.6434602290391922, "num_tokens": 2899951366.0, "step": 17297 }, { "entropy": 1.6919982035954793, "epoch": 1.9002773887012165, "grad_norm": 0.977072536945343, "learning_rate": 2.117425158386659e-06, "loss": 1.395, "mean_token_accuracy": 0.6762153804302216, "num_tokens": 2900112592.0, "step": 17298 }, { "entropy": 1.6969668567180634, "epoch": 1.9003872456125896, "grad_norm": 0.7278903722763062, "learning_rate": 2.1171675010564374e-06, "loss": 1.5714, "mean_token_accuracy": 0.6418847143650055, "num_tokens": 2900304765.0, "step": 17299 }, { "entropy": 1.6995855470498402, "epoch": 1.9004971025239625, "grad_norm": 0.7209764122962952, "learning_rate": 2.116910124863863e-06, "loss": 1.269, "mean_token_accuracy": 0.6693304081757864, "num_tokens": 2900414824.0, "step": 17300 }, { "entropy": 1.6842413942019145, "epoch": 1.9006069594353354, "grad_norm": 0.6563640236854553, "learning_rate": 2.1166530298170803e-06, "loss": 1.5027, "mean_token_accuracy": 0.6496880004803339, "num_tokens": 2900593430.0, "step": 17301 }, { "entropy": 1.692998468875885, "epoch": 1.9007168163467085, "grad_norm": 0.6235043406486511, "learning_rate": 2.1163962159242257e-06, "loss": 1.3154, "mean_token_accuracy": 0.6697773188352585, "num_tokens": 2900747780.0, "step": 17302 }, { "entropy": 1.6582288543383281, "epoch": 1.9008266732580812, "grad_norm": 0.6133668422698975, "learning_rate": 2.1161396831934276e-06, "loss": 1.3683, "mean_token_accuracy": 0.6610623051722845, "num_tokens": 2900929126.0, "step": 17303 }, { "entropy": 1.696847399075826, "epoch": 1.9009365301694543, "grad_norm": 0.7132181525230408, "learning_rate": 2.1158834316328057e-06, "loss": 1.3959, "mean_token_accuracy": 0.6614312728246053, "num_tokens": 2901113026.0, "step": 17304 }, { "entropy": 1.7614192068576813, "epoch": 1.9010463870808272, "grad_norm": 0.666845440864563, "learning_rate": 2.1156274612504707e-06, "loss": 1.6487, "mean_token_accuracy": 0.613468810915947, "num_tokens": 2901324692.0, "step": 17305 }, { "entropy": 1.6704783340295155, "epoch": 1.9011562439922, "grad_norm": 0.5894295573234558, "learning_rate": 2.115371772054523e-06, "loss": 1.458, "mean_token_accuracy": 0.6370960672696432, "num_tokens": 2901536912.0, "step": 17306 }, { "entropy": 1.739585965871811, "epoch": 1.9012661009035732, "grad_norm": 0.8946515321731567, "learning_rate": 2.115116364053054e-06, "loss": 1.4145, "mean_token_accuracy": 0.6544796675443649, "num_tokens": 2901672711.0, "step": 17307 }, { "entropy": 1.7094165285428364, "epoch": 1.901375957814946, "grad_norm": 0.753603994846344, "learning_rate": 2.1148612372541494e-06, "loss": 1.4601, "mean_token_accuracy": 0.6673130393028259, "num_tokens": 2901797776.0, "step": 17308 }, { "entropy": 1.7277058760325115, "epoch": 1.901485814726319, "grad_norm": 0.6706650257110596, "learning_rate": 2.114606391665883e-06, "loss": 1.4088, "mean_token_accuracy": 0.6417536189158758, "num_tokens": 2901976272.0, "step": 17309 }, { "entropy": 1.7154739300409954, "epoch": 1.901595671637692, "grad_norm": 0.7305334210395813, "learning_rate": 2.114351827296319e-06, "loss": 1.3598, "mean_token_accuracy": 0.6586563885211945, "num_tokens": 2902107800.0, "step": 17310 }, { "entropy": 1.6918523510297139, "epoch": 1.9017055285490647, "grad_norm": 0.6615371108055115, "learning_rate": 2.1140975441535173e-06, "loss": 1.4371, "mean_token_accuracy": 0.6612310359875361, "num_tokens": 2902296754.0, "step": 17311 }, { "entropy": 1.7673652370770772, "epoch": 1.9018153854604378, "grad_norm": 0.7220476865768433, "learning_rate": 2.1138435422455237e-06, "loss": 1.4816, "mean_token_accuracy": 0.6466121921936671, "num_tokens": 2902468097.0, "step": 17312 }, { "entropy": 1.7256468534469604, "epoch": 1.9019252423718107, "grad_norm": 0.7970626354217529, "learning_rate": 2.113589821580378e-06, "loss": 1.2991, "mean_token_accuracy": 0.6718382885058721, "num_tokens": 2902588100.0, "step": 17313 }, { "entropy": 1.682048757870992, "epoch": 1.9020350992831836, "grad_norm": 0.578278660774231, "learning_rate": 2.1133363821661097e-06, "loss": 1.4743, "mean_token_accuracy": 0.6461637963851293, "num_tokens": 2902759596.0, "step": 17314 }, { "entropy": 1.7191002070903778, "epoch": 1.9021449561945567, "grad_norm": 0.6966040134429932, "learning_rate": 2.113083224010741e-06, "loss": 1.3934, "mean_token_accuracy": 0.6677719354629517, "num_tokens": 2902921113.0, "step": 17315 }, { "entropy": 1.7440748512744904, "epoch": 1.9022548131059294, "grad_norm": 0.9760518074035645, "learning_rate": 2.112830347122284e-06, "loss": 1.4498, "mean_token_accuracy": 0.6512205849091212, "num_tokens": 2903037716.0, "step": 17316 }, { "entropy": 1.709779401620229, "epoch": 1.9023646700173025, "grad_norm": 0.6506473422050476, "learning_rate": 2.1125777515087405e-06, "loss": 1.4735, "mean_token_accuracy": 0.6514460444450378, "num_tokens": 2903208041.0, "step": 17317 }, { "entropy": 1.7440835038820903, "epoch": 1.9024745269286754, "grad_norm": 0.6512756943702698, "learning_rate": 2.1123254371781072e-06, "loss": 1.408, "mean_token_accuracy": 0.6547217865784963, "num_tokens": 2903370726.0, "step": 17318 }, { "entropy": 1.6938308576742809, "epoch": 1.9025843838400482, "grad_norm": 0.7320882081985474, "learning_rate": 2.1120734041383693e-06, "loss": 1.3902, "mean_token_accuracy": 0.6558147321144739, "num_tokens": 2903533604.0, "step": 17319 }, { "entropy": 1.7045827706654866, "epoch": 1.9026942407514214, "grad_norm": 0.6136589050292969, "learning_rate": 2.1118216523975033e-06, "loss": 1.3388, "mean_token_accuracy": 0.667515754699707, "num_tokens": 2903687713.0, "step": 17320 }, { "entropy": 1.6572574277718861, "epoch": 1.9028040976627942, "grad_norm": 0.642920196056366, "learning_rate": 2.111570181963476e-06, "loss": 1.2703, "mean_token_accuracy": 0.6777136772871017, "num_tokens": 2903848962.0, "step": 17321 }, { "entropy": 1.6885874370733898, "epoch": 1.9029139545741671, "grad_norm": 0.6778194904327393, "learning_rate": 2.1113189928442474e-06, "loss": 1.4339, "mean_token_accuracy": 0.6557305157184601, "num_tokens": 2904026770.0, "step": 17322 }, { "entropy": 1.7227852642536163, "epoch": 1.9030238114855402, "grad_norm": 0.7432954907417297, "learning_rate": 2.1110680850477677e-06, "loss": 1.2885, "mean_token_accuracy": 0.6714919358491898, "num_tokens": 2904155217.0, "step": 17323 }, { "entropy": 1.632401277621587, "epoch": 1.903133668396913, "grad_norm": 0.6605057716369629, "learning_rate": 2.1108174585819766e-06, "loss": 1.4722, "mean_token_accuracy": 0.6337202688058218, "num_tokens": 2904366981.0, "step": 17324 }, { "entropy": 1.7007041573524475, "epoch": 1.903243525308286, "grad_norm": 0.7226251363754272, "learning_rate": 2.1105671134548095e-06, "loss": 1.4875, "mean_token_accuracy": 0.652341494957606, "num_tokens": 2904540754.0, "step": 17325 }, { "entropy": 1.6867063740889232, "epoch": 1.9033533822196589, "grad_norm": 1.4981513023376465, "learning_rate": 2.110317049674186e-06, "loss": 1.2325, "mean_token_accuracy": 0.6909955491622289, "num_tokens": 2904719674.0, "step": 17326 }, { "entropy": 1.6712439060211182, "epoch": 1.9034632391310318, "grad_norm": 0.717866837978363, "learning_rate": 2.110067267248022e-06, "loss": 1.3748, "mean_token_accuracy": 0.6473964601755142, "num_tokens": 2904849892.0, "step": 17327 }, { "entropy": 1.626634528239568, "epoch": 1.9035730960424049, "grad_norm": 0.8814289569854736, "learning_rate": 2.109817766184224e-06, "loss": 1.3151, "mean_token_accuracy": 0.6663583666086197, "num_tokens": 2905045049.0, "step": 17328 }, { "entropy": 1.7585350374380748, "epoch": 1.9036829529537775, "grad_norm": 0.6172336339950562, "learning_rate": 2.1095685464906867e-06, "loss": 1.3216, "mean_token_accuracy": 0.6797197361787161, "num_tokens": 2905187276.0, "step": 17329 }, { "entropy": 1.7158283491929371, "epoch": 1.9037928098651506, "grad_norm": 0.6529079079627991, "learning_rate": 2.1093196081753005e-06, "loss": 1.4517, "mean_token_accuracy": 0.646214579542478, "num_tokens": 2905407433.0, "step": 17330 }, { "entropy": 1.6329115728537242, "epoch": 1.9039026667765235, "grad_norm": 0.6197842359542847, "learning_rate": 2.1090709512459403e-06, "loss": 1.4375, "mean_token_accuracy": 0.6615285128355026, "num_tokens": 2905645926.0, "step": 17331 }, { "entropy": 1.7400815387566884, "epoch": 1.9040125236878964, "grad_norm": 0.7766255736351013, "learning_rate": 2.1088225757104797e-06, "loss": 1.3882, "mean_token_accuracy": 0.6602704524993896, "num_tokens": 2905831337.0, "step": 17332 }, { "entropy": 1.6611623167991638, "epoch": 1.9041223805992695, "grad_norm": 0.8019027709960938, "learning_rate": 2.108574481576778e-06, "loss": 1.372, "mean_token_accuracy": 0.6643891384204229, "num_tokens": 2905988768.0, "step": 17333 }, { "entropy": 1.6944166123867035, "epoch": 1.9042322375106424, "grad_norm": 0.6995406150817871, "learning_rate": 2.1083266688526864e-06, "loss": 1.3526, "mean_token_accuracy": 0.6618950814008713, "num_tokens": 2906191608.0, "step": 17334 }, { "entropy": 1.7486229836940765, "epoch": 1.9043420944220153, "grad_norm": 0.7345391511917114, "learning_rate": 2.1080791375460497e-06, "loss": 1.3983, "mean_token_accuracy": 0.661010871330897, "num_tokens": 2906317489.0, "step": 17335 }, { "entropy": 1.6621031761169434, "epoch": 1.9044519513333884, "grad_norm": 0.6560261845588684, "learning_rate": 2.1078318876647008e-06, "loss": 1.2522, "mean_token_accuracy": 0.6751103301843008, "num_tokens": 2906467343.0, "step": 17336 }, { "entropy": 1.7779687742392223, "epoch": 1.904561808244761, "grad_norm": 0.6154184937477112, "learning_rate": 2.107584919216467e-06, "loss": 1.5128, "mean_token_accuracy": 0.6438574666778246, "num_tokens": 2906658202.0, "step": 17337 }, { "entropy": 1.8077290554841359, "epoch": 1.9046716651561342, "grad_norm": 0.7357187867164612, "learning_rate": 2.1073382322091633e-06, "loss": 1.5264, "mean_token_accuracy": 0.6432707210381826, "num_tokens": 2906818347.0, "step": 17338 }, { "entropy": 1.7658953468004863, "epoch": 1.904781522067507, "grad_norm": 0.615322470664978, "learning_rate": 2.107091826650596e-06, "loss": 1.4196, "mean_token_accuracy": 0.6580002655585607, "num_tokens": 2906975280.0, "step": 17339 }, { "entropy": 1.7291893462340038, "epoch": 1.90489137897888, "grad_norm": 0.6034359931945801, "learning_rate": 2.106845702548567e-06, "loss": 1.6408, "mean_token_accuracy": 0.6236142565806707, "num_tokens": 2907236930.0, "step": 17340 }, { "entropy": 1.6925783356030781, "epoch": 1.905001235890253, "grad_norm": 0.6417155861854553, "learning_rate": 2.1065998599108627e-06, "loss": 1.5223, "mean_token_accuracy": 0.631607269247373, "num_tokens": 2907449207.0, "step": 17341 }, { "entropy": 1.6567042768001556, "epoch": 1.9051110928016257, "grad_norm": 0.6812300682067871, "learning_rate": 2.106354298745266e-06, "loss": 1.3955, "mean_token_accuracy": 0.6692457795143127, "num_tokens": 2907609264.0, "step": 17342 }, { "entropy": 1.707110603650411, "epoch": 1.9052209497129988, "grad_norm": 0.59761643409729, "learning_rate": 2.1061090190595484e-06, "loss": 1.5329, "mean_token_accuracy": 0.6419897625843684, "num_tokens": 2907816900.0, "step": 17343 }, { "entropy": 1.7284224132696788, "epoch": 1.9053308066243717, "grad_norm": 0.7119576930999756, "learning_rate": 2.1058640208614723e-06, "loss": 1.3361, "mean_token_accuracy": 0.6653526822725931, "num_tokens": 2907986068.0, "step": 17344 }, { "entropy": 1.69676540295283, "epoch": 1.9054406635357446, "grad_norm": 0.7282069325447083, "learning_rate": 2.1056193041587924e-06, "loss": 1.2982, "mean_token_accuracy": 0.6773037711779276, "num_tokens": 2908134262.0, "step": 17345 }, { "entropy": 1.678769161303838, "epoch": 1.9055505204471177, "grad_norm": 0.6445490717887878, "learning_rate": 2.105374868959253e-06, "loss": 1.3609, "mean_token_accuracy": 0.6605921387672424, "num_tokens": 2908292260.0, "step": 17346 }, { "entropy": 1.7496456503868103, "epoch": 1.9056603773584906, "grad_norm": 0.8550807237625122, "learning_rate": 2.105130715270591e-06, "loss": 1.3914, "mean_token_accuracy": 0.6650789976119995, "num_tokens": 2908453413.0, "step": 17347 }, { "entropy": 1.743078887462616, "epoch": 1.9057702342698635, "grad_norm": 0.747218906879425, "learning_rate": 2.104886843100534e-06, "loss": 1.4484, "mean_token_accuracy": 0.6520692358414332, "num_tokens": 2908624898.0, "step": 17348 }, { "entropy": 1.767656107743581, "epoch": 1.9058800911812366, "grad_norm": 0.6700419783592224, "learning_rate": 2.104643252456801e-06, "loss": 1.498, "mean_token_accuracy": 0.6458103656768799, "num_tokens": 2908776179.0, "step": 17349 }, { "entropy": 1.7707056005795796, "epoch": 1.9059899480926092, "grad_norm": 0.7064527869224548, "learning_rate": 2.1043999433471006e-06, "loss": 1.3796, "mean_token_accuracy": 0.6523715605338415, "num_tokens": 2908908378.0, "step": 17350 }, { "entropy": 1.7640493313471477, "epoch": 1.9060998050039824, "grad_norm": 0.69893479347229, "learning_rate": 2.1041569157791325e-06, "loss": 1.356, "mean_token_accuracy": 0.6554395059744517, "num_tokens": 2909045131.0, "step": 17351 }, { "entropy": 1.7937356928984325, "epoch": 1.9062096619153552, "grad_norm": 0.7464211583137512, "learning_rate": 2.10391416976059e-06, "loss": 1.4799, "mean_token_accuracy": 0.6518764893213908, "num_tokens": 2909200452.0, "step": 17352 }, { "entropy": 1.7353100379308064, "epoch": 1.9063195188267281, "grad_norm": 0.6664573550224304, "learning_rate": 2.103671705299156e-06, "loss": 1.4749, "mean_token_accuracy": 0.6581431378920873, "num_tokens": 2909359220.0, "step": 17353 }, { "entropy": 1.7203071018060048, "epoch": 1.9064293757381012, "grad_norm": 0.7875437140464783, "learning_rate": 2.103429522402502e-06, "loss": 1.5624, "mean_token_accuracy": 0.6487771024306616, "num_tokens": 2909556852.0, "step": 17354 }, { "entropy": 1.6935315628846486, "epoch": 1.9065392326494741, "grad_norm": 0.6662817001342773, "learning_rate": 2.1031876210782954e-06, "loss": 1.2577, "mean_token_accuracy": 0.6786133150259653, "num_tokens": 2909700221.0, "step": 17355 }, { "entropy": 1.670400321483612, "epoch": 1.906649089560847, "grad_norm": 0.6516803503036499, "learning_rate": 2.1029460013341927e-06, "loss": 1.4057, "mean_token_accuracy": 0.6678726325432459, "num_tokens": 2909895492.0, "step": 17356 }, { "entropy": 1.6352357765038807, "epoch": 1.9067589464722199, "grad_norm": 0.6111500263214111, "learning_rate": 2.1027046631778395e-06, "loss": 1.3753, "mean_token_accuracy": 0.6627530604600906, "num_tokens": 2910096151.0, "step": 17357 }, { "entropy": 1.6735480030377705, "epoch": 1.9068688033835928, "grad_norm": 0.6414183974266052, "learning_rate": 2.1024636066168734e-06, "loss": 1.4689, "mean_token_accuracy": 0.6363677581151327, "num_tokens": 2910260269.0, "step": 17358 }, { "entropy": 1.7215021948019664, "epoch": 1.9069786602949659, "grad_norm": 0.6086724996566772, "learning_rate": 2.102222831658926e-06, "loss": 1.4963, "mean_token_accuracy": 0.6482528100411097, "num_tokens": 2910447473.0, "step": 17359 }, { "entropy": 1.642045497894287, "epoch": 1.9070885172063388, "grad_norm": 0.7172538042068481, "learning_rate": 2.1019823383116163e-06, "loss": 1.4312, "mean_token_accuracy": 0.6497194568316141, "num_tokens": 2910609340.0, "step": 17360 }, { "entropy": 1.710667649904887, "epoch": 1.9071983741177116, "grad_norm": 0.8276099562644958, "learning_rate": 2.1017421265825557e-06, "loss": 1.3401, "mean_token_accuracy": 0.6725934545199076, "num_tokens": 2910731278.0, "step": 17361 }, { "entropy": 1.733361969391505, "epoch": 1.9073082310290848, "grad_norm": 0.7834944128990173, "learning_rate": 2.101502196479348e-06, "loss": 1.3967, "mean_token_accuracy": 0.658065527677536, "num_tokens": 2910897871.0, "step": 17362 }, { "entropy": 1.7039579351743062, "epoch": 1.9074180879404574, "grad_norm": 0.7555103302001953, "learning_rate": 2.1012625480095844e-06, "loss": 1.3922, "mean_token_accuracy": 0.6700689966479937, "num_tokens": 2911030541.0, "step": 17363 }, { "entropy": 1.7056597967942555, "epoch": 1.9075279448518305, "grad_norm": 0.6581346988677979, "learning_rate": 2.1010231811808534e-06, "loss": 1.4952, "mean_token_accuracy": 0.6401078750689825, "num_tokens": 2911278929.0, "step": 17364 }, { "entropy": 1.6974429786205292, "epoch": 1.9076378017632034, "grad_norm": 0.573140561580658, "learning_rate": 2.1007840960007274e-06, "loss": 1.5212, "mean_token_accuracy": 0.6344274332125982, "num_tokens": 2911502264.0, "step": 17365 }, { "entropy": 1.679697851339976, "epoch": 1.9077476586745763, "grad_norm": 0.7497307658195496, "learning_rate": 2.1005452924767745e-06, "loss": 1.3362, "mean_token_accuracy": 0.659656897187233, "num_tokens": 2911637934.0, "step": 17366 }, { "entropy": 1.6486320694287617, "epoch": 1.9078575155859494, "grad_norm": 0.6654718518257141, "learning_rate": 2.1003067706165534e-06, "loss": 1.4975, "mean_token_accuracy": 0.6403248061736425, "num_tokens": 2911856512.0, "step": 17367 }, { "entropy": 1.712807983160019, "epoch": 1.9079673724973223, "grad_norm": 0.7178590297698975, "learning_rate": 2.1000685304276123e-06, "loss": 1.258, "mean_token_accuracy": 0.6742016822099686, "num_tokens": 2911978469.0, "step": 17368 }, { "entropy": 1.699036826690038, "epoch": 1.9080772294086952, "grad_norm": 0.7930957674980164, "learning_rate": 2.0998305719174924e-06, "loss": 1.3113, "mean_token_accuracy": 0.6729649156332016, "num_tokens": 2912136257.0, "step": 17369 }, { "entropy": 1.764205386241277, "epoch": 1.908187086320068, "grad_norm": 0.7400210499763489, "learning_rate": 2.0995928950937237e-06, "loss": 1.6815, "mean_token_accuracy": 0.6208955893913904, "num_tokens": 2912335968.0, "step": 17370 }, { "entropy": 1.6935129761695862, "epoch": 1.908296943231441, "grad_norm": 0.696378767490387, "learning_rate": 2.09935549996383e-06, "loss": 1.3272, "mean_token_accuracy": 0.6687455127636591, "num_tokens": 2912508242.0, "step": 17371 }, { "entropy": 1.725334147612254, "epoch": 1.908406800142814, "grad_norm": 0.7115861773490906, "learning_rate": 2.099118386535323e-06, "loss": 1.4895, "mean_token_accuracy": 0.6452667613824209, "num_tokens": 2912694485.0, "step": 17372 }, { "entropy": 1.714306155840556, "epoch": 1.908516657054187, "grad_norm": 0.7995036840438843, "learning_rate": 2.09888155481571e-06, "loss": 1.2769, "mean_token_accuracy": 0.6667713671922684, "num_tokens": 2912828341.0, "step": 17373 }, { "entropy": 1.6857503950595856, "epoch": 1.9086265139655598, "grad_norm": 0.7703235745429993, "learning_rate": 2.0986450048124836e-06, "loss": 1.4397, "mean_token_accuracy": 0.654585580031077, "num_tokens": 2912996677.0, "step": 17374 }, { "entropy": 1.6876949568589528, "epoch": 1.908736370876933, "grad_norm": 0.778724193572998, "learning_rate": 2.0984087365331315e-06, "loss": 1.501, "mean_token_accuracy": 0.6460902194182078, "num_tokens": 2913167483.0, "step": 17375 }, { "entropy": 1.6780872146288555, "epoch": 1.9088462277883056, "grad_norm": 0.6744540929794312, "learning_rate": 2.0981727499851326e-06, "loss": 1.5221, "mean_token_accuracy": 0.6462369163831075, "num_tokens": 2913400005.0, "step": 17376 }, { "entropy": 1.6881005962689717, "epoch": 1.9089560846996787, "grad_norm": 0.6781927347183228, "learning_rate": 2.097937045175954e-06, "loss": 1.4824, "mean_token_accuracy": 0.6568097323179245, "num_tokens": 2913569420.0, "step": 17377 }, { "entropy": 1.7392517030239105, "epoch": 1.9090659416110516, "grad_norm": 0.6244411468505859, "learning_rate": 2.0977016221130565e-06, "loss": 1.5132, "mean_token_accuracy": 0.6272874772548676, "num_tokens": 2913830881.0, "step": 17378 }, { "entropy": 1.6990625858306885, "epoch": 1.9091757985224245, "grad_norm": 0.7350092530250549, "learning_rate": 2.097466480803892e-06, "loss": 1.3652, "mean_token_accuracy": 0.6684706459442774, "num_tokens": 2914056190.0, "step": 17379 }, { "entropy": 1.7045234441757202, "epoch": 1.9092856554337976, "grad_norm": 0.7276471257209778, "learning_rate": 2.097231621255901e-06, "loss": 1.3865, "mean_token_accuracy": 0.6614778786897659, "num_tokens": 2914209787.0, "step": 17380 }, { "entropy": 1.6718364854653676, "epoch": 1.9093955123451705, "grad_norm": 0.7682698369026184, "learning_rate": 2.096997043476519e-06, "loss": 1.3389, "mean_token_accuracy": 0.6800911873579025, "num_tokens": 2914413262.0, "step": 17381 }, { "entropy": 1.7149316171805065, "epoch": 1.9095053692565434, "grad_norm": 0.7124384045600891, "learning_rate": 2.096762747473168e-06, "loss": 1.4091, "mean_token_accuracy": 0.6653865824143091, "num_tokens": 2914541576.0, "step": 17382 }, { "entropy": 1.6883401771386464, "epoch": 1.9096152261679162, "grad_norm": 0.6308038234710693, "learning_rate": 2.0965287332532634e-06, "loss": 1.3913, "mean_token_accuracy": 0.6438700606425604, "num_tokens": 2914680841.0, "step": 17383 }, { "entropy": 1.7368865112463634, "epoch": 1.9097250830792891, "grad_norm": 0.6777101755142212, "learning_rate": 2.0962950008242124e-06, "loss": 1.3046, "mean_token_accuracy": 0.6656106561422348, "num_tokens": 2914857873.0, "step": 17384 }, { "entropy": 1.673662155866623, "epoch": 1.9098349399906622, "grad_norm": 0.8075534105300903, "learning_rate": 2.096061550193414e-06, "loss": 1.3713, "mean_token_accuracy": 0.6667961577574412, "num_tokens": 2915027062.0, "step": 17385 }, { "entropy": 1.6893903613090515, "epoch": 1.9099447969020351, "grad_norm": 0.7893344759941101, "learning_rate": 2.0958283813682538e-06, "loss": 1.3099, "mean_token_accuracy": 0.668051486214002, "num_tokens": 2915178403.0, "step": 17386 }, { "entropy": 1.6581451892852783, "epoch": 1.910054653813408, "grad_norm": 0.6381314396858215, "learning_rate": 2.095595494356113e-06, "loss": 1.3248, "mean_token_accuracy": 0.6689875473578771, "num_tokens": 2915377301.0, "step": 17387 }, { "entropy": 1.7033253610134125, "epoch": 1.910164510724781, "grad_norm": 0.7965177893638611, "learning_rate": 2.0953628891643645e-06, "loss": 1.3608, "mean_token_accuracy": 0.6760942687590917, "num_tokens": 2915496356.0, "step": 17388 }, { "entropy": 1.715836187203725, "epoch": 1.9102743676361538, "grad_norm": 0.7303998470306396, "learning_rate": 2.0951305658003655e-06, "loss": 1.4864, "mean_token_accuracy": 0.6432332595189413, "num_tokens": 2915680397.0, "step": 17389 }, { "entropy": 1.6246002614498138, "epoch": 1.9103842245475269, "grad_norm": 0.5866871476173401, "learning_rate": 2.094898524271473e-06, "loss": 1.3568, "mean_token_accuracy": 0.6729725748300552, "num_tokens": 2915856875.0, "step": 17390 }, { "entropy": 1.6950217187404633, "epoch": 1.9104940814588998, "grad_norm": 0.604824423789978, "learning_rate": 2.094666764585028e-06, "loss": 1.5893, "mean_token_accuracy": 0.6251017103592554, "num_tokens": 2916071725.0, "step": 17391 }, { "entropy": 1.6907562216122944, "epoch": 1.9106039383702726, "grad_norm": 0.6250636577606201, "learning_rate": 2.0944352867483685e-06, "loss": 1.3843, "mean_token_accuracy": 0.6660982569058737, "num_tokens": 2916229565.0, "step": 17392 }, { "entropy": 1.6932495137055714, "epoch": 1.9107137952816458, "grad_norm": 0.7064518928527832, "learning_rate": 2.0942040907688184e-06, "loss": 1.3489, "mean_token_accuracy": 0.6507859379053116, "num_tokens": 2916382240.0, "step": 17393 }, { "entropy": 1.7231053411960602, "epoch": 1.9108236521930186, "grad_norm": 0.6741702556610107, "learning_rate": 2.0939731766536963e-06, "loss": 1.5839, "mean_token_accuracy": 0.6615618417660395, "num_tokens": 2916544920.0, "step": 17394 }, { "entropy": 1.7370639046033223, "epoch": 1.9109335091043915, "grad_norm": 0.6713312268257141, "learning_rate": 2.0937425444103105e-06, "loss": 1.3087, "mean_token_accuracy": 0.6731932461261749, "num_tokens": 2916669826.0, "step": 17395 }, { "entropy": 1.7642224729061127, "epoch": 1.9110433660157646, "grad_norm": 0.7128596305847168, "learning_rate": 2.0935121940459595e-06, "loss": 1.4046, "mean_token_accuracy": 0.6427824894587199, "num_tokens": 2916817048.0, "step": 17396 }, { "entropy": 1.6907521684964497, "epoch": 1.9111532229271373, "grad_norm": 0.7318049073219299, "learning_rate": 2.0932821255679337e-06, "loss": 1.3763, "mean_token_accuracy": 0.6755510369936625, "num_tokens": 2916967274.0, "step": 17397 }, { "entropy": 1.6487935086091359, "epoch": 1.9112630798385104, "grad_norm": 0.7536101341247559, "learning_rate": 2.0930523389835154e-06, "loss": 1.584, "mean_token_accuracy": 0.6465408056974411, "num_tokens": 2917128574.0, "step": 17398 }, { "entropy": 1.740364799896876, "epoch": 1.9113729367498833, "grad_norm": 0.6966463923454285, "learning_rate": 2.0928228342999764e-06, "loss": 1.4856, "mean_token_accuracy": 0.6480376496911049, "num_tokens": 2917283340.0, "step": 17399 }, { "entropy": 1.6265077789624531, "epoch": 1.9114827936612562, "grad_norm": 0.6639081835746765, "learning_rate": 2.092593611524582e-06, "loss": 1.4039, "mean_token_accuracy": 0.6535753359397253, "num_tokens": 2917514395.0, "step": 17400 }, { "entropy": 1.768510530392329, "epoch": 1.9115926505726293, "grad_norm": 0.6785690188407898, "learning_rate": 2.092364670664586e-06, "loss": 1.4136, "mean_token_accuracy": 0.6424920608599981, "num_tokens": 2917680252.0, "step": 17401 }, { "entropy": 1.6863116323947906, "epoch": 1.911702507484002, "grad_norm": 0.6685127019882202, "learning_rate": 2.0921360117272334e-06, "loss": 1.4231, "mean_token_accuracy": 0.6529277910788854, "num_tokens": 2917812834.0, "step": 17402 }, { "entropy": 1.6903614699840546, "epoch": 1.911812364395375, "grad_norm": 0.8102334141731262, "learning_rate": 2.0919076347197622e-06, "loss": 1.3383, "mean_token_accuracy": 0.6616157094637553, "num_tokens": 2917957581.0, "step": 17403 }, { "entropy": 1.7386558850606282, "epoch": 1.911922221306748, "grad_norm": 0.7050533294677734, "learning_rate": 2.091679539649401e-06, "loss": 1.4781, "mean_token_accuracy": 0.6581172744433085, "num_tokens": 2918097568.0, "step": 17404 }, { "entropy": 1.692396640777588, "epoch": 1.9120320782181208, "grad_norm": 0.8380081057548523, "learning_rate": 2.091451726523368e-06, "loss": 1.4178, "mean_token_accuracy": 0.6531778971354166, "num_tokens": 2918232997.0, "step": 17405 }, { "entropy": 1.774149735768636, "epoch": 1.912141935129494, "grad_norm": 0.7412464618682861, "learning_rate": 2.0912241953488736e-06, "loss": 1.4631, "mean_token_accuracy": 0.6348550717035929, "num_tokens": 2918390827.0, "step": 17406 }, { "entropy": 1.7355511287848155, "epoch": 1.9122517920408668, "grad_norm": 0.6477778553962708, "learning_rate": 2.0909969461331185e-06, "loss": 1.4172, "mean_token_accuracy": 0.6531795511643091, "num_tokens": 2918560623.0, "step": 17407 }, { "entropy": 1.6789377927780151, "epoch": 1.9123616489522397, "grad_norm": 0.6781573295593262, "learning_rate": 2.0907699788832962e-06, "loss": 1.4929, "mean_token_accuracy": 0.6445153504610062, "num_tokens": 2918738819.0, "step": 17408 }, { "entropy": 1.6440903345743816, "epoch": 1.9124715058636128, "grad_norm": 0.6116029620170593, "learning_rate": 2.0905432936065895e-06, "loss": 1.3995, "mean_token_accuracy": 0.6445668091376623, "num_tokens": 2918932198.0, "step": 17409 }, { "entropy": 1.72769961754481, "epoch": 1.9125813627749855, "grad_norm": 0.749138355255127, "learning_rate": 2.090316890310172e-06, "loss": 1.3863, "mean_token_accuracy": 0.6565118928750356, "num_tokens": 2919077877.0, "step": 17410 }, { "entropy": 1.7584010362625122, "epoch": 1.9126912196863586, "grad_norm": 0.6772981286048889, "learning_rate": 2.0900907690012095e-06, "loss": 1.2869, "mean_token_accuracy": 0.6611250092585882, "num_tokens": 2919201070.0, "step": 17411 }, { "entropy": 1.669664631287257, "epoch": 1.9128010765977315, "grad_norm": 0.7344382405281067, "learning_rate": 2.089864929686861e-06, "loss": 1.4473, "mean_token_accuracy": 0.6602032780647278, "num_tokens": 2919334440.0, "step": 17412 }, { "entropy": 1.6799374123414357, "epoch": 1.9129109335091043, "grad_norm": 0.5712904334068298, "learning_rate": 2.0896393723742725e-06, "loss": 1.3804, "mean_token_accuracy": 0.6711891492207845, "num_tokens": 2919517894.0, "step": 17413 }, { "entropy": 1.6531173884868622, "epoch": 1.9130207904204775, "grad_norm": 0.6494702696800232, "learning_rate": 2.089414097070581e-06, "loss": 1.2276, "mean_token_accuracy": 0.6815090030431747, "num_tokens": 2919684705.0, "step": 17414 }, { "entropy": 1.6996674636999767, "epoch": 1.9131306473318501, "grad_norm": 0.6836813688278198, "learning_rate": 2.0891891037829204e-06, "loss": 1.4035, "mean_token_accuracy": 0.6582774519920349, "num_tokens": 2919839562.0, "step": 17415 }, { "entropy": 1.6734414498011272, "epoch": 1.9132405042432232, "grad_norm": 0.804311215877533, "learning_rate": 2.0889643925184073e-06, "loss": 1.3143, "mean_token_accuracy": 0.6627353529135386, "num_tokens": 2919960685.0, "step": 17416 }, { "entropy": 1.669149398803711, "epoch": 1.9133503611545961, "grad_norm": 0.6251264214515686, "learning_rate": 2.0887399632841578e-06, "loss": 1.4457, "mean_token_accuracy": 0.6435550649960836, "num_tokens": 2920181248.0, "step": 17417 }, { "entropy": 1.7074171503384907, "epoch": 1.913460218065969, "grad_norm": 0.6743486523628235, "learning_rate": 2.0885158160872717e-06, "loss": 1.4268, "mean_token_accuracy": 0.654527614514033, "num_tokens": 2920389161.0, "step": 17418 }, { "entropy": 1.7007141311963399, "epoch": 1.913570074977342, "grad_norm": 0.6802394986152649, "learning_rate": 2.088291950934844e-06, "loss": 1.4994, "mean_token_accuracy": 0.634449248512586, "num_tokens": 2920611442.0, "step": 17419 }, { "entropy": 1.7454398373762767, "epoch": 1.913679931888715, "grad_norm": 0.7798505425453186, "learning_rate": 2.088068367833961e-06, "loss": 1.3725, "mean_token_accuracy": 0.6632242302099863, "num_tokens": 2920737237.0, "step": 17420 }, { "entropy": 1.7217259307702382, "epoch": 1.9137897888000879, "grad_norm": 0.6991998553276062, "learning_rate": 2.0878450667916983e-06, "loss": 1.4191, "mean_token_accuracy": 0.642817402879397, "num_tokens": 2920907848.0, "step": 17421 }, { "entropy": 1.6675984263420105, "epoch": 1.913899645711461, "grad_norm": 0.7746340036392212, "learning_rate": 2.0876220478151233e-06, "loss": 1.337, "mean_token_accuracy": 0.6696319133043289, "num_tokens": 2921027405.0, "step": 17422 }, { "entropy": 1.6807717482248943, "epoch": 1.9140095026228336, "grad_norm": 0.6704512238502502, "learning_rate": 2.0873993109112943e-06, "loss": 1.2705, "mean_token_accuracy": 0.6693969368934631, "num_tokens": 2921204488.0, "step": 17423 }, { "entropy": 1.6527533928553264, "epoch": 1.9141193595342068, "grad_norm": 0.7245538830757141, "learning_rate": 2.087176856087261e-06, "loss": 1.3807, "mean_token_accuracy": 0.671676109234492, "num_tokens": 2921333040.0, "step": 17424 }, { "entropy": 1.6895228326320648, "epoch": 1.9142292164455796, "grad_norm": 0.5984308123588562, "learning_rate": 2.086954683350064e-06, "loss": 1.3878, "mean_token_accuracy": 0.6493009428183237, "num_tokens": 2921582051.0, "step": 17425 }, { "entropy": 1.7049407164255779, "epoch": 1.9143390733569525, "grad_norm": 0.7265485525131226, "learning_rate": 2.086732792706735e-06, "loss": 1.326, "mean_token_accuracy": 0.6612226913372675, "num_tokens": 2921738662.0, "step": 17426 }, { "entropy": 1.7597126563390095, "epoch": 1.9144489302683256, "grad_norm": 0.8590186238288879, "learning_rate": 2.086511184164297e-06, "loss": 1.5987, "mean_token_accuracy": 0.6419850587844849, "num_tokens": 2921942676.0, "step": 17427 }, { "entropy": 1.7027497589588165, "epoch": 1.9145587871796983, "grad_norm": 0.6106962561607361, "learning_rate": 2.0862898577297636e-06, "loss": 1.3344, "mean_token_accuracy": 0.6656178931395212, "num_tokens": 2922083937.0, "step": 17428 }, { "entropy": 1.7116191983222961, "epoch": 1.9146686440910714, "grad_norm": 0.7605423927307129, "learning_rate": 2.0860688134101394e-06, "loss": 1.2908, "mean_token_accuracy": 0.6651194790999094, "num_tokens": 2922221769.0, "step": 17429 }, { "entropy": 1.7398067712783813, "epoch": 1.9147785010024443, "grad_norm": 0.6844810247421265, "learning_rate": 2.0858480512124205e-06, "loss": 1.3323, "mean_token_accuracy": 0.6669246157010397, "num_tokens": 2922352941.0, "step": 17430 }, { "entropy": 1.6831025381882985, "epoch": 1.9148883579138172, "grad_norm": 0.6590069532394409, "learning_rate": 2.0856275711435934e-06, "loss": 1.4787, "mean_token_accuracy": 0.6421498209238052, "num_tokens": 2922541900.0, "step": 17431 }, { "entropy": 1.7204078237215679, "epoch": 1.9149982148251903, "grad_norm": 0.7026862502098083, "learning_rate": 2.085407373210637e-06, "loss": 1.3757, "mean_token_accuracy": 0.645499716202418, "num_tokens": 2922689747.0, "step": 17432 }, { "entropy": 1.704828808705012, "epoch": 1.9151080717365632, "grad_norm": 0.7054374814033508, "learning_rate": 2.0851874574205206e-06, "loss": 1.4807, "mean_token_accuracy": 0.6405527790387472, "num_tokens": 2922909076.0, "step": 17433 }, { "entropy": 1.7403566241264343, "epoch": 1.915217928647936, "grad_norm": 0.6240404844284058, "learning_rate": 2.084967823780204e-06, "loss": 1.3858, "mean_token_accuracy": 0.6522353092829386, "num_tokens": 2923086459.0, "step": 17434 }, { "entropy": 1.6699285606543224, "epoch": 1.9153277855593092, "grad_norm": 0.6833006739616394, "learning_rate": 2.0847484722966383e-06, "loss": 1.3314, "mean_token_accuracy": 0.6654851237932841, "num_tokens": 2923264919.0, "step": 17435 }, { "entropy": 1.7221463322639465, "epoch": 1.9154376424706818, "grad_norm": 0.6218920946121216, "learning_rate": 2.0845294029767665e-06, "loss": 1.4466, "mean_token_accuracy": 0.6374649703502655, "num_tokens": 2923481855.0, "step": 17436 }, { "entropy": 1.7145523428916931, "epoch": 1.915547499382055, "grad_norm": 0.8189231157302856, "learning_rate": 2.084310615827522e-06, "loss": 1.7038, "mean_token_accuracy": 0.6330908884604772, "num_tokens": 2923683139.0, "step": 17437 }, { "entropy": 1.7109653453032176, "epoch": 1.9156573562934278, "grad_norm": 0.6383715271949768, "learning_rate": 2.0840921108558277e-06, "loss": 1.2914, "mean_token_accuracy": 0.664964367945989, "num_tokens": 2923815046.0, "step": 17438 }, { "entropy": 1.6905694603919983, "epoch": 1.9157672132048007, "grad_norm": 0.8235413432121277, "learning_rate": 2.0838738880686023e-06, "loss": 1.3632, "mean_token_accuracy": 0.6752482801675797, "num_tokens": 2923945485.0, "step": 17439 }, { "entropy": 1.6911144355932872, "epoch": 1.9158770701161738, "grad_norm": 0.794511616230011, "learning_rate": 2.083655947472749e-06, "loss": 1.3981, "mean_token_accuracy": 0.6572174479564031, "num_tokens": 2924111825.0, "step": 17440 }, { "entropy": 1.6802450319131215, "epoch": 1.9159869270275465, "grad_norm": 0.6762140989303589, "learning_rate": 2.0834382890751675e-06, "loss": 1.395, "mean_token_accuracy": 0.6544107298056284, "num_tokens": 2924227529.0, "step": 17441 }, { "entropy": 1.7163665493329365, "epoch": 1.9160967839389196, "grad_norm": 0.7602096199989319, "learning_rate": 2.0832209128827475e-06, "loss": 1.4227, "mean_token_accuracy": 0.6661782662073771, "num_tokens": 2924395248.0, "step": 17442 }, { "entropy": 1.6846363445123036, "epoch": 1.9162066408502925, "grad_norm": 0.6908214092254639, "learning_rate": 2.0830038189023657e-06, "loss": 1.6045, "mean_token_accuracy": 0.637446328997612, "num_tokens": 2924571785.0, "step": 17443 }, { "entropy": 1.6963723401228588, "epoch": 1.9163164977616653, "grad_norm": 0.7088649868965149, "learning_rate": 2.0827870071408965e-06, "loss": 1.2535, "mean_token_accuracy": 0.678161750237147, "num_tokens": 2924703162.0, "step": 17444 }, { "entropy": 1.6324211259682972, "epoch": 1.9164263546730385, "grad_norm": 0.6246728301048279, "learning_rate": 2.0825704776052e-06, "loss": 1.3837, "mean_token_accuracy": 0.66278408964475, "num_tokens": 2924980915.0, "step": 17445 }, { "entropy": 1.7229852279027302, "epoch": 1.9165362115844113, "grad_norm": 0.754826009273529, "learning_rate": 2.082354230302129e-06, "loss": 1.3334, "mean_token_accuracy": 0.6602563957373301, "num_tokens": 2925088795.0, "step": 17446 }, { "entropy": 1.6306418975194295, "epoch": 1.9166460684957842, "grad_norm": 0.6160972714424133, "learning_rate": 2.0821382652385284e-06, "loss": 1.4224, "mean_token_accuracy": 0.6546281178792318, "num_tokens": 2925322433.0, "step": 17447 }, { "entropy": 1.6297888457775116, "epoch": 1.9167559254071573, "grad_norm": 0.6668198704719543, "learning_rate": 2.081922582421233e-06, "loss": 1.4147, "mean_token_accuracy": 0.6672234535217285, "num_tokens": 2925496843.0, "step": 17448 }, { "entropy": 1.7703557411829631, "epoch": 1.91686578231853, "grad_norm": 0.7170999050140381, "learning_rate": 2.081707181857071e-06, "loss": 1.3329, "mean_token_accuracy": 0.6564952532450358, "num_tokens": 2925647583.0, "step": 17449 }, { "entropy": 1.7169082860151927, "epoch": 1.916975639229903, "grad_norm": 0.6741853356361389, "learning_rate": 2.0814920635528563e-06, "loss": 1.2811, "mean_token_accuracy": 0.6681034664312998, "num_tokens": 2925770751.0, "step": 17450 }, { "entropy": 1.6565737128257751, "epoch": 1.917085496141276, "grad_norm": 0.6688545346260071, "learning_rate": 2.081277227515399e-06, "loss": 1.2302, "mean_token_accuracy": 0.6802611798048019, "num_tokens": 2925890003.0, "step": 17451 }, { "entropy": 1.687977929910024, "epoch": 1.9171953530526489, "grad_norm": 0.7067691683769226, "learning_rate": 2.081062673751499e-06, "loss": 1.2822, "mean_token_accuracy": 0.6621923645337423, "num_tokens": 2926063066.0, "step": 17452 }, { "entropy": 1.6485174397627513, "epoch": 1.917305209964022, "grad_norm": 0.6077693104743958, "learning_rate": 2.0808484022679467e-06, "loss": 1.459, "mean_token_accuracy": 0.6410925338665644, "num_tokens": 2926254114.0, "step": 17453 }, { "entropy": 1.6806750198205311, "epoch": 1.9174150668753946, "grad_norm": 0.6232016682624817, "learning_rate": 2.0806344130715233e-06, "loss": 1.5307, "mean_token_accuracy": 0.6372226725021998, "num_tokens": 2926443709.0, "step": 17454 }, { "entropy": 1.6603333155314128, "epoch": 1.9175249237867678, "grad_norm": 0.7298293709754944, "learning_rate": 2.080420706169001e-06, "loss": 1.3589, "mean_token_accuracy": 0.6681396961212158, "num_tokens": 2926597936.0, "step": 17455 }, { "entropy": 1.82164399822553, "epoch": 1.9176347806981406, "grad_norm": 0.6539393663406372, "learning_rate": 2.080207281567144e-06, "loss": 1.6118, "mean_token_accuracy": 0.6109184970458349, "num_tokens": 2926871518.0, "step": 17456 }, { "entropy": 1.6787754893302917, "epoch": 1.9177446376095135, "grad_norm": 0.6502023339271545, "learning_rate": 2.079994139272708e-06, "loss": 1.3738, "mean_token_accuracy": 0.6680939892927805, "num_tokens": 2927064929.0, "step": 17457 }, { "entropy": 1.6514959534009297, "epoch": 1.9178544945208866, "grad_norm": 0.7282174825668335, "learning_rate": 2.0797812792924372e-06, "loss": 1.3208, "mean_token_accuracy": 0.6738225072622299, "num_tokens": 2927212785.0, "step": 17458 }, { "entropy": 1.6412135660648346, "epoch": 1.9179643514322595, "grad_norm": 0.6706090569496155, "learning_rate": 2.079568701633071e-06, "loss": 1.2853, "mean_token_accuracy": 0.6833283007144928, "num_tokens": 2927400983.0, "step": 17459 }, { "entropy": 1.6410308082898457, "epoch": 1.9180742083436324, "grad_norm": 0.7416619658470154, "learning_rate": 2.0793564063013337e-06, "loss": 1.41, "mean_token_accuracy": 0.6614979207515717, "num_tokens": 2927587509.0, "step": 17460 }, { "entropy": 1.6796442766984303, "epoch": 1.9181840652550055, "grad_norm": 0.613105058670044, "learning_rate": 2.0791443933039477e-06, "loss": 1.3794, "mean_token_accuracy": 0.6581423729658127, "num_tokens": 2927770429.0, "step": 17461 }, { "entropy": 1.6736660699049632, "epoch": 1.9182939221663782, "grad_norm": 0.6586642861366272, "learning_rate": 2.0789326626476213e-06, "loss": 1.3009, "mean_token_accuracy": 0.6772599170605341, "num_tokens": 2927929359.0, "step": 17462 }, { "entropy": 1.7245589395364125, "epoch": 1.9184037790777513, "grad_norm": 0.7125158905982971, "learning_rate": 2.078721214339057e-06, "loss": 1.2919, "mean_token_accuracy": 0.6627042591571808, "num_tokens": 2928041715.0, "step": 17463 }, { "entropy": 1.6420990029970806, "epoch": 1.9185136359891242, "grad_norm": 0.6651259660720825, "learning_rate": 2.078510048384944e-06, "loss": 1.3234, "mean_token_accuracy": 0.6715792467196783, "num_tokens": 2928211841.0, "step": 17464 }, { "entropy": 1.680494636297226, "epoch": 1.918623492900497, "grad_norm": 0.5542195439338684, "learning_rate": 2.0782991647919707e-06, "loss": 1.1542, "mean_token_accuracy": 0.6779807110627493, "num_tokens": 2928375456.0, "step": 17465 }, { "entropy": 1.6585955023765564, "epoch": 1.9187333498118702, "grad_norm": 0.6564247012138367, "learning_rate": 2.0780885635668067e-06, "loss": 1.3989, "mean_token_accuracy": 0.6611930181582769, "num_tokens": 2928528830.0, "step": 17466 }, { "entropy": 1.6445672412713368, "epoch": 1.9188432067232428, "grad_norm": 0.6655113101005554, "learning_rate": 2.0778782447161197e-06, "loss": 1.4603, "mean_token_accuracy": 0.6606913854678472, "num_tokens": 2928741569.0, "step": 17467 }, { "entropy": 1.7159535090128581, "epoch": 1.918953063634616, "grad_norm": 0.7392234206199646, "learning_rate": 2.077668208246567e-06, "loss": 1.3667, "mean_token_accuracy": 0.6548206061124802, "num_tokens": 2928905957.0, "step": 17468 }, { "entropy": 1.7086364229520161, "epoch": 1.9190629205459888, "grad_norm": 0.7043489813804626, "learning_rate": 2.0774584541647944e-06, "loss": 1.229, "mean_token_accuracy": 0.6727901895840963, "num_tokens": 2928997788.0, "step": 17469 }, { "entropy": 1.6513133843739827, "epoch": 1.9191727774573617, "grad_norm": 0.676826536655426, "learning_rate": 2.0772489824774392e-06, "loss": 1.437, "mean_token_accuracy": 0.6636106073856354, "num_tokens": 2929237876.0, "step": 17470 }, { "entropy": 1.714188575744629, "epoch": 1.9192826343687348, "grad_norm": 0.7203949689865112, "learning_rate": 2.0770397931911355e-06, "loss": 1.3942, "mean_token_accuracy": 0.662879596153895, "num_tokens": 2929369246.0, "step": 17471 }, { "entropy": 1.7575792769591014, "epoch": 1.9193924912801077, "grad_norm": 0.6798261404037476, "learning_rate": 2.0768308863125003e-06, "loss": 1.3794, "mean_token_accuracy": 0.6597266445557276, "num_tokens": 2929534671.0, "step": 17472 }, { "entropy": 1.6687467495600383, "epoch": 1.9195023481914806, "grad_norm": 0.619688868522644, "learning_rate": 2.0766222618481476e-06, "loss": 1.4953, "mean_token_accuracy": 0.6502044051885605, "num_tokens": 2929715709.0, "step": 17473 }, { "entropy": 1.714370201031367, "epoch": 1.9196122051028537, "grad_norm": 0.7117909789085388, "learning_rate": 2.076413919804679e-06, "loss": 1.4366, "mean_token_accuracy": 0.6580507506926855, "num_tokens": 2929870289.0, "step": 17474 }, { "entropy": 1.696961522102356, "epoch": 1.9197220620142263, "grad_norm": 0.7254346013069153, "learning_rate": 2.0762058601886882e-06, "loss": 1.3134, "mean_token_accuracy": 0.6692439218362173, "num_tokens": 2930053304.0, "step": 17475 }, { "entropy": 1.7915849188963573, "epoch": 1.9198319189255995, "grad_norm": 0.7773795127868652, "learning_rate": 2.0759980830067615e-06, "loss": 1.5147, "mean_token_accuracy": 0.6480228255192438, "num_tokens": 2930244054.0, "step": 17476 }, { "entropy": 1.7063364485899608, "epoch": 1.9199417758369723, "grad_norm": 0.6918734312057495, "learning_rate": 2.0757905882654744e-06, "loss": 1.355, "mean_token_accuracy": 0.6601203779379526, "num_tokens": 2930396665.0, "step": 17477 }, { "entropy": 1.7182010610898335, "epoch": 1.9200516327483452, "grad_norm": 0.7387691140174866, "learning_rate": 2.0755833759713935e-06, "loss": 1.4699, "mean_token_accuracy": 0.6380101641019186, "num_tokens": 2930590121.0, "step": 17478 }, { "entropy": 1.7257728974024455, "epoch": 1.9201614896597183, "grad_norm": 0.6251614689826965, "learning_rate": 2.075376446131076e-06, "loss": 1.3573, "mean_token_accuracy": 0.6537232995033264, "num_tokens": 2930743975.0, "step": 17479 }, { "entropy": 1.75346240401268, "epoch": 1.920271346571091, "grad_norm": 0.8177134990692139, "learning_rate": 2.0751697987510747e-06, "loss": 1.3432, "mean_token_accuracy": 0.6525897781054179, "num_tokens": 2930879478.0, "step": 17480 }, { "entropy": 1.6926167905330658, "epoch": 1.920381203482464, "grad_norm": 0.6901217699050903, "learning_rate": 2.0749634338379268e-06, "loss": 1.3196, "mean_token_accuracy": 0.6637932906548182, "num_tokens": 2931011772.0, "step": 17481 }, { "entropy": 1.725586086511612, "epoch": 1.920491060393837, "grad_norm": 0.7865347266197205, "learning_rate": 2.0747573513981635e-06, "loss": 1.4176, "mean_token_accuracy": 0.65843033293883, "num_tokens": 2931162551.0, "step": 17482 }, { "entropy": 1.685064325730006, "epoch": 1.9206009173052099, "grad_norm": 0.6249054074287415, "learning_rate": 2.0745515514383088e-06, "loss": 1.5084, "mean_token_accuracy": 0.6323985556761423, "num_tokens": 2931392882.0, "step": 17483 }, { "entropy": 1.6556347211201985, "epoch": 1.920710774216583, "grad_norm": 0.6788010001182556, "learning_rate": 2.0743460339648753e-06, "loss": 1.3561, "mean_token_accuracy": 0.6696719378232956, "num_tokens": 2931555075.0, "step": 17484 }, { "entropy": 1.6232224702835083, "epoch": 1.9208206311279559, "grad_norm": 0.651375412940979, "learning_rate": 2.074140798984369e-06, "loss": 1.3004, "mean_token_accuracy": 0.6754371821880341, "num_tokens": 2931698300.0, "step": 17485 }, { "entropy": 1.796891490618388, "epoch": 1.9209304880393288, "grad_norm": 0.6899563074111938, "learning_rate": 2.0739358465032837e-06, "loss": 1.4438, "mean_token_accuracy": 0.6399559328953425, "num_tokens": 2931835023.0, "step": 17486 }, { "entropy": 1.6822420060634613, "epoch": 1.9210403449507019, "grad_norm": 1.8057386875152588, "learning_rate": 2.0737311765281066e-06, "loss": 1.1202, "mean_token_accuracy": 0.6863191624482473, "num_tokens": 2931948990.0, "step": 17487 }, { "entropy": 1.6927362382411957, "epoch": 1.9211502018620745, "grad_norm": 0.7195414304733276, "learning_rate": 2.0735267890653154e-06, "loss": 1.3814, "mean_token_accuracy": 0.658441017071406, "num_tokens": 2932114658.0, "step": 17488 }, { "entropy": 1.57031911611557, "epoch": 1.9212600587734476, "grad_norm": 0.8102070093154907, "learning_rate": 2.0733226841213792e-06, "loss": 1.3443, "mean_token_accuracy": 0.6727907160917918, "num_tokens": 2932274915.0, "step": 17489 }, { "entropy": 1.6582297484079997, "epoch": 1.9213699156848205, "grad_norm": 0.7032040953636169, "learning_rate": 2.0731188617027572e-06, "loss": 1.399, "mean_token_accuracy": 0.66445920864741, "num_tokens": 2932448501.0, "step": 17490 }, { "entropy": 1.6773069600264232, "epoch": 1.9214797725961934, "grad_norm": 0.6263803839683533, "learning_rate": 2.072915321815901e-06, "loss": 1.5152, "mean_token_accuracy": 0.6362102230389913, "num_tokens": 2932722036.0, "step": 17491 }, { "entropy": 1.6817654371261597, "epoch": 1.9215896295075665, "grad_norm": 0.6429359912872314, "learning_rate": 2.072712064467252e-06, "loss": 1.5055, "mean_token_accuracy": 0.6553362160921097, "num_tokens": 2932910588.0, "step": 17492 }, { "entropy": 1.7349775632222493, "epoch": 1.9216994864189392, "grad_norm": 0.6507477164268494, "learning_rate": 2.0725090896632436e-06, "loss": 1.3682, "mean_token_accuracy": 0.6527557075023651, "num_tokens": 2933061736.0, "step": 17493 }, { "entropy": 1.6733063260714214, "epoch": 1.9218093433303123, "grad_norm": 0.7544586658477783, "learning_rate": 2.0723063974102996e-06, "loss": 1.4147, "mean_token_accuracy": 0.6568095733722051, "num_tokens": 2933234118.0, "step": 17494 }, { "entropy": 1.7533384064833324, "epoch": 1.9219192002416852, "grad_norm": 0.7645695209503174, "learning_rate": 2.072103987714835e-06, "loss": 1.3646, "mean_token_accuracy": 0.6804790943861008, "num_tokens": 2933344246.0, "step": 17495 }, { "entropy": 1.7520137230555217, "epoch": 1.922029057153058, "grad_norm": 0.8002681732177734, "learning_rate": 2.071901860583257e-06, "loss": 1.5351, "mean_token_accuracy": 0.6434406936168671, "num_tokens": 2933540169.0, "step": 17496 }, { "entropy": 1.7390229205290477, "epoch": 1.9221389140644312, "grad_norm": 0.7108039855957031, "learning_rate": 2.071700016021961e-06, "loss": 1.4703, "mean_token_accuracy": 0.6398574312527975, "num_tokens": 2933720785.0, "step": 17497 }, { "entropy": 1.6974186301231384, "epoch": 1.922248770975804, "grad_norm": 0.7148029804229736, "learning_rate": 2.0714984540373373e-06, "loss": 1.3062, "mean_token_accuracy": 0.6633280366659164, "num_tokens": 2933842641.0, "step": 17498 }, { "entropy": 1.6941316624482472, "epoch": 1.922358627887177, "grad_norm": 0.5549025535583496, "learning_rate": 2.071297174635763e-06, "loss": 1.4171, "mean_token_accuracy": 0.6544789026180903, "num_tokens": 2934021044.0, "step": 17499 }, { "entropy": 1.7065569758415222, "epoch": 1.92246848479855, "grad_norm": 0.6217483878135681, "learning_rate": 2.071096177823611e-06, "loss": 1.4027, "mean_token_accuracy": 0.6564749876658121, "num_tokens": 2934255736.0, "step": 17500 }, { "entropy": 1.6603321035703023, "epoch": 1.9225783417099227, "grad_norm": 0.734339714050293, "learning_rate": 2.070895463607242e-06, "loss": 1.3017, "mean_token_accuracy": 0.6692576507727305, "num_tokens": 2934400859.0, "step": 17501 }, { "entropy": 1.704683502515157, "epoch": 1.9226881986212958, "grad_norm": 0.8344931602478027, "learning_rate": 2.070695031993006e-06, "loss": 1.5092, "mean_token_accuracy": 0.6664090702931086, "num_tokens": 2934582181.0, "step": 17502 }, { "entropy": 1.7220360140005748, "epoch": 1.9227980555326687, "grad_norm": 0.7285640835762024, "learning_rate": 2.070494882987249e-06, "loss": 1.4179, "mean_token_accuracy": 0.6631739139556885, "num_tokens": 2934740982.0, "step": 17503 }, { "entropy": 1.6735987563927968, "epoch": 1.9229079124440416, "grad_norm": 0.6303743124008179, "learning_rate": 2.0702950165963066e-06, "loss": 1.3832, "mean_token_accuracy": 0.6726168394088745, "num_tokens": 2934932553.0, "step": 17504 }, { "entropy": 1.6929684579372406, "epoch": 1.9230177693554147, "grad_norm": 0.7466548681259155, "learning_rate": 2.0700954328265024e-06, "loss": 1.266, "mean_token_accuracy": 0.6844168156385422, "num_tokens": 2935095166.0, "step": 17505 }, { "entropy": 1.736617624759674, "epoch": 1.9231276262667873, "grad_norm": 0.7594635486602783, "learning_rate": 2.069896131684154e-06, "loss": 1.3552, "mean_token_accuracy": 0.6446435898542404, "num_tokens": 2935284625.0, "step": 17506 }, { "entropy": 1.6730614403883617, "epoch": 1.9232374831781605, "grad_norm": 0.6581339836120605, "learning_rate": 2.069697113175569e-06, "loss": 1.3928, "mean_token_accuracy": 0.663309171795845, "num_tokens": 2935444299.0, "step": 17507 }, { "entropy": 1.7582378685474396, "epoch": 1.9233473400895333, "grad_norm": 0.8001452088356018, "learning_rate": 2.069498377307045e-06, "loss": 1.4882, "mean_token_accuracy": 0.6397890994946162, "num_tokens": 2935579811.0, "step": 17508 }, { "entropy": 1.739981472492218, "epoch": 1.9234571970009062, "grad_norm": 0.721105694770813, "learning_rate": 2.0692999240848744e-06, "loss": 1.3548, "mean_token_accuracy": 0.6565016210079193, "num_tokens": 2935794799.0, "step": 17509 }, { "entropy": 1.7093225916226704, "epoch": 1.9235670539122793, "grad_norm": 0.6602572798728943, "learning_rate": 2.0691017535153375e-06, "loss": 1.4042, "mean_token_accuracy": 0.648267442981402, "num_tokens": 2935960630.0, "step": 17510 }, { "entropy": 1.6835937400658925, "epoch": 1.9236769108236522, "grad_norm": 0.6169915199279785, "learning_rate": 2.0689038656047046e-06, "loss": 1.4336, "mean_token_accuracy": 0.652891164024671, "num_tokens": 2936153886.0, "step": 17511 }, { "entropy": 1.691537966330846, "epoch": 1.923786767735025, "grad_norm": 0.6025387644767761, "learning_rate": 2.0687062603592407e-06, "loss": 1.3336, "mean_token_accuracy": 0.662538543343544, "num_tokens": 2936319156.0, "step": 17512 }, { "entropy": 1.68320166071256, "epoch": 1.9238966246463982, "grad_norm": 0.6816161274909973, "learning_rate": 2.068508937785198e-06, "loss": 1.3839, "mean_token_accuracy": 0.6539181371529897, "num_tokens": 2936475278.0, "step": 17513 }, { "entropy": 1.7054178714752197, "epoch": 1.9240064815577709, "grad_norm": 0.6919237375259399, "learning_rate": 2.0683118978888243e-06, "loss": 1.313, "mean_token_accuracy": 0.662996177872022, "num_tokens": 2936649177.0, "step": 17514 }, { "entropy": 1.6796445548534393, "epoch": 1.924116338469144, "grad_norm": 0.9301538467407227, "learning_rate": 2.0681151406763533e-06, "loss": 1.3744, "mean_token_accuracy": 0.6774726808071136, "num_tokens": 2936795606.0, "step": 17515 }, { "entropy": 1.746860404809316, "epoch": 1.9242261953805169, "grad_norm": 0.7453663349151611, "learning_rate": 2.067918666154014e-06, "loss": 1.337, "mean_token_accuracy": 0.6566027700901031, "num_tokens": 2936929561.0, "step": 17516 }, { "entropy": 1.6719463368256886, "epoch": 1.9243360522918898, "grad_norm": 0.695083498954773, "learning_rate": 2.067722474328024e-06, "loss": 1.3896, "mean_token_accuracy": 0.668315589427948, "num_tokens": 2937090524.0, "step": 17517 }, { "entropy": 1.7075058122475941, "epoch": 1.9244459092032629, "grad_norm": 0.6535371541976929, "learning_rate": 2.067526565204592e-06, "loss": 1.3489, "mean_token_accuracy": 0.6604385624329249, "num_tokens": 2937238065.0, "step": 17518 }, { "entropy": 1.8017792999744415, "epoch": 1.9245557661146355, "grad_norm": 0.723039448261261, "learning_rate": 2.0673309387899187e-06, "loss": 1.4437, "mean_token_accuracy": 0.6451696256796519, "num_tokens": 2937417366.0, "step": 17519 }, { "entropy": 1.67741854985555, "epoch": 1.9246656230260086, "grad_norm": 0.7515210509300232, "learning_rate": 2.067135595090197e-06, "loss": 1.3528, "mean_token_accuracy": 0.6660284996032715, "num_tokens": 2937572016.0, "step": 17520 }, { "entropy": 1.6874766151110332, "epoch": 1.9247754799373815, "grad_norm": 0.6784387230873108, "learning_rate": 2.0669405341116092e-06, "loss": 1.3964, "mean_token_accuracy": 0.6592771311601003, "num_tokens": 2937720468.0, "step": 17521 }, { "entropy": 1.6902848482131958, "epoch": 1.9248853368487544, "grad_norm": 0.5754929184913635, "learning_rate": 2.0667457558603264e-06, "loss": 1.4538, "mean_token_accuracy": 0.6473627537488937, "num_tokens": 2937920848.0, "step": 17522 }, { "entropy": 1.6405058304468791, "epoch": 1.9249951937601275, "grad_norm": 0.565492570400238, "learning_rate": 2.066551260342516e-06, "loss": 1.4986, "mean_token_accuracy": 0.6333438704411188, "num_tokens": 2938144092.0, "step": 17523 }, { "entropy": 1.7087134818236034, "epoch": 1.9251050506715004, "grad_norm": 0.624536395072937, "learning_rate": 2.0663570475643323e-06, "loss": 1.3191, "mean_token_accuracy": 0.6491807202498118, "num_tokens": 2938330146.0, "step": 17524 }, { "entropy": 1.6821011404196422, "epoch": 1.9252149075828733, "grad_norm": 0.6043448448181152, "learning_rate": 2.066163117531923e-06, "loss": 1.4004, "mean_token_accuracy": 0.6572328259547552, "num_tokens": 2938490177.0, "step": 17525 }, { "entropy": 1.697978417078654, "epoch": 1.9253247644942464, "grad_norm": 0.7227047085762024, "learning_rate": 2.065969470251425e-06, "loss": 1.3753, "mean_token_accuracy": 0.659215713540713, "num_tokens": 2938668497.0, "step": 17526 }, { "entropy": 1.6828550398349762, "epoch": 1.925434621405619, "grad_norm": 0.6420347690582275, "learning_rate": 2.065776105728967e-06, "loss": 1.5599, "mean_token_accuracy": 0.6333623677492142, "num_tokens": 2938924531.0, "step": 17527 }, { "entropy": 1.7999260822931926, "epoch": 1.9255444783169922, "grad_norm": 0.6861507892608643, "learning_rate": 2.0655830239706702e-06, "loss": 1.4962, "mean_token_accuracy": 0.6513161609570185, "num_tokens": 2939093357.0, "step": 17528 }, { "entropy": 1.7336215178171794, "epoch": 1.925654335228365, "grad_norm": 0.6525002717971802, "learning_rate": 2.0653902249826445e-06, "loss": 1.4093, "mean_token_accuracy": 0.6464171608289083, "num_tokens": 2939226820.0, "step": 17529 }, { "entropy": 1.7006452282269795, "epoch": 1.925764192139738, "grad_norm": 0.6565628051757812, "learning_rate": 2.065197708770992e-06, "loss": 1.5153, "mean_token_accuracy": 0.6620665639638901, "num_tokens": 2939408770.0, "step": 17530 }, { "entropy": 1.7232566873232524, "epoch": 1.925874049051111, "grad_norm": 0.6713189482688904, "learning_rate": 2.065005475341805e-06, "loss": 1.5064, "mean_token_accuracy": 0.6384440114100774, "num_tokens": 2939574469.0, "step": 17531 }, { "entropy": 1.6672783493995667, "epoch": 1.9259839059624837, "grad_norm": 0.63712477684021, "learning_rate": 2.06481352470117e-06, "loss": 1.4419, "mean_token_accuracy": 0.6587218890587488, "num_tokens": 2939782494.0, "step": 17532 }, { "entropy": 1.7159414490063984, "epoch": 1.9260937628738568, "grad_norm": 0.7770838141441345, "learning_rate": 2.064621856855161e-06, "loss": 1.4086, "mean_token_accuracy": 0.6606285522381464, "num_tokens": 2939922605.0, "step": 17533 }, { "entropy": 1.6527531743049622, "epoch": 1.9262036197852297, "grad_norm": 0.6143233776092529, "learning_rate": 2.064430471809843e-06, "loss": 1.2611, "mean_token_accuracy": 0.6733989963928858, "num_tokens": 2940107503.0, "step": 17534 }, { "entropy": 1.717937578757604, "epoch": 1.9263134766966026, "grad_norm": 0.7945669293403625, "learning_rate": 2.064239369571273e-06, "loss": 1.302, "mean_token_accuracy": 0.6729937593142191, "num_tokens": 2940246262.0, "step": 17535 }, { "entropy": 1.7034448285897572, "epoch": 1.9264233336079757, "grad_norm": 0.6210589408874512, "learning_rate": 2.064048550145502e-06, "loss": 1.3828, "mean_token_accuracy": 0.6582538237174352, "num_tokens": 2940382994.0, "step": 17536 }, { "entropy": 1.7313305934270222, "epoch": 1.9265331905193486, "grad_norm": 0.6636930108070374, "learning_rate": 2.0638580135385676e-06, "loss": 1.4957, "mean_token_accuracy": 0.641208882133166, "num_tokens": 2940615045.0, "step": 17537 }, { "entropy": 1.716033011674881, "epoch": 1.9266430474307215, "grad_norm": 0.6111452579498291, "learning_rate": 2.0636677597565e-06, "loss": 1.3465, "mean_token_accuracy": 0.6583975950876871, "num_tokens": 2940785009.0, "step": 17538 }, { "entropy": 1.7833648025989532, "epoch": 1.9267529043420946, "grad_norm": 0.6030783653259277, "learning_rate": 2.0634777888053214e-06, "loss": 1.4208, "mean_token_accuracy": 0.6543482542037964, "num_tokens": 2940945824.0, "step": 17539 }, { "entropy": 1.739495297273, "epoch": 1.9268627612534672, "grad_norm": 0.7100062966346741, "learning_rate": 2.063288100691043e-06, "loss": 1.4085, "mean_token_accuracy": 0.6565575549999872, "num_tokens": 2941091153.0, "step": 17540 }, { "entropy": 1.708542416493098, "epoch": 1.9269726181648403, "grad_norm": 0.6001754403114319, "learning_rate": 2.063098695419669e-06, "loss": 1.408, "mean_token_accuracy": 0.6558292259772619, "num_tokens": 2941308528.0, "step": 17541 }, { "entropy": 1.7160709202289581, "epoch": 1.9270824750762132, "grad_norm": 0.7005263566970825, "learning_rate": 2.0629095729971956e-06, "loss": 1.3614, "mean_token_accuracy": 0.6518668631712595, "num_tokens": 2941457810.0, "step": 17542 }, { "entropy": 1.7111981709798176, "epoch": 1.927192331987586, "grad_norm": 0.6936342120170593, "learning_rate": 2.0627207334296065e-06, "loss": 1.2869, "mean_token_accuracy": 0.6690366715192795, "num_tokens": 2941608696.0, "step": 17543 }, { "entropy": 1.7328161001205444, "epoch": 1.9273021888989592, "grad_norm": 0.6630276441574097, "learning_rate": 2.0625321767228782e-06, "loss": 1.3247, "mean_token_accuracy": 0.6659030715624491, "num_tokens": 2941744217.0, "step": 17544 }, { "entropy": 1.676056981086731, "epoch": 1.927412045810332, "grad_norm": 0.6332334280014038, "learning_rate": 2.062343902882981e-06, "loss": 1.3053, "mean_token_accuracy": 0.6878760854403178, "num_tokens": 2941870064.0, "step": 17545 }, { "entropy": 1.7297570407390594, "epoch": 1.927521902721705, "grad_norm": 0.8926072120666504, "learning_rate": 2.0621559119158707e-06, "loss": 1.3512, "mean_token_accuracy": 0.6621742248535156, "num_tokens": 2942009195.0, "step": 17546 }, { "entropy": 1.6616042951742809, "epoch": 1.9276317596330779, "grad_norm": 0.7044560313224792, "learning_rate": 2.061968203827498e-06, "loss": 1.1998, "mean_token_accuracy": 0.6850334058205286, "num_tokens": 2942128712.0, "step": 17547 }, { "entropy": 1.6350234846274059, "epoch": 1.9277416165444508, "grad_norm": 0.7069318294525146, "learning_rate": 2.0617807786238036e-06, "loss": 1.3889, "mean_token_accuracy": 0.662718782822291, "num_tokens": 2942299164.0, "step": 17548 }, { "entropy": 1.671046882867813, "epoch": 1.9278514734558239, "grad_norm": 0.5789463520050049, "learning_rate": 2.061593636310722e-06, "loss": 1.345, "mean_token_accuracy": 0.6666370083888372, "num_tokens": 2942465119.0, "step": 17549 }, { "entropy": 1.6353905896345775, "epoch": 1.9279613303671967, "grad_norm": 0.7177479267120361, "learning_rate": 2.061406776894172e-06, "loss": 1.3278, "mean_token_accuracy": 0.6639865090449651, "num_tokens": 2942607010.0, "step": 17550 }, { "entropy": 1.700663298368454, "epoch": 1.9280711872785696, "grad_norm": 0.6942954659461975, "learning_rate": 2.061220200380071e-06, "loss": 1.2797, "mean_token_accuracy": 0.6732474565505981, "num_tokens": 2942712681.0, "step": 17551 }, { "entropy": 1.7159783045450847, "epoch": 1.9281810441899427, "grad_norm": 0.6486497521400452, "learning_rate": 2.0610339067743213e-06, "loss": 1.4483, "mean_token_accuracy": 0.6449883927901586, "num_tokens": 2942933437.0, "step": 17552 }, { "entropy": 1.6851761241753895, "epoch": 1.9282909011013154, "grad_norm": 0.7932791113853455, "learning_rate": 2.060847896082822e-06, "loss": 1.2722, "mean_token_accuracy": 0.6713943233092626, "num_tokens": 2943051659.0, "step": 17553 }, { "entropy": 1.7198482652505238, "epoch": 1.9284007580126885, "grad_norm": 0.5689072012901306, "learning_rate": 2.0606621683114583e-06, "loss": 1.4662, "mean_token_accuracy": 0.639824832479159, "num_tokens": 2943252511.0, "step": 17554 }, { "entropy": 1.6842391788959503, "epoch": 1.9285106149240614, "grad_norm": 0.8211684823036194, "learning_rate": 2.0604767234661086e-06, "loss": 1.428, "mean_token_accuracy": 0.642557273308436, "num_tokens": 2943430385.0, "step": 17555 }, { "entropy": 1.7061157524585724, "epoch": 1.9286204718354343, "grad_norm": 0.7843329310417175, "learning_rate": 2.0602915615526418e-06, "loss": 1.4143, "mean_token_accuracy": 0.6773807754119238, "num_tokens": 2943557103.0, "step": 17556 }, { "entropy": 1.6724831461906433, "epoch": 1.9287303287468074, "grad_norm": 0.6326301693916321, "learning_rate": 2.06010668257692e-06, "loss": 1.2568, "mean_token_accuracy": 0.6788825293382009, "num_tokens": 2943675826.0, "step": 17557 }, { "entropy": 1.7057609955469768, "epoch": 1.9288401856581803, "grad_norm": 0.6408417820930481, "learning_rate": 2.0599220865447924e-06, "loss": 1.4451, "mean_token_accuracy": 0.6486535221338272, "num_tokens": 2943822102.0, "step": 17558 }, { "entropy": 1.7137981454531352, "epoch": 1.9289500425695532, "grad_norm": 0.6233062148094177, "learning_rate": 2.059737773462102e-06, "loss": 1.3929, "mean_token_accuracy": 0.6523498793443044, "num_tokens": 2943978771.0, "step": 17559 }, { "entropy": 1.7529114683469136, "epoch": 1.929059899480926, "grad_norm": 0.6746161580085754, "learning_rate": 2.059553743334683e-06, "loss": 1.4265, "mean_token_accuracy": 0.6609595467646917, "num_tokens": 2944172280.0, "step": 17560 }, { "entropy": 1.7099245886007945, "epoch": 1.929169756392299, "grad_norm": 0.6567638516426086, "learning_rate": 2.0593699961683594e-06, "loss": 1.4972, "mean_token_accuracy": 0.6532185673713684, "num_tokens": 2944307513.0, "step": 17561 }, { "entropy": 1.715930829445521, "epoch": 1.929279613303672, "grad_norm": 0.6228388547897339, "learning_rate": 2.059186531968946e-06, "loss": 1.3633, "mean_token_accuracy": 0.6599201112985611, "num_tokens": 2944488590.0, "step": 17562 }, { "entropy": 1.7236754894256592, "epoch": 1.929389470215045, "grad_norm": 0.6078210473060608, "learning_rate": 2.059003350742251e-06, "loss": 1.3334, "mean_token_accuracy": 0.6531506727139155, "num_tokens": 2944658053.0, "step": 17563 }, { "entropy": 1.7208663125832875, "epoch": 1.9294993271264178, "grad_norm": 0.6693648099899292, "learning_rate": 2.0588204524940702e-06, "loss": 1.4905, "mean_token_accuracy": 0.6456576486428579, "num_tokens": 2944876779.0, "step": 17564 }, { "entropy": 1.6756204068660736, "epoch": 1.929609184037791, "grad_norm": 0.6165689826011658, "learning_rate": 2.0586378372301948e-06, "loss": 1.4901, "mean_token_accuracy": 0.6373255352179209, "num_tokens": 2945070191.0, "step": 17565 }, { "entropy": 1.767015238602956, "epoch": 1.9297190409491636, "grad_norm": 0.8223422765731812, "learning_rate": 2.0584555049564012e-06, "loss": 1.3183, "mean_token_accuracy": 0.6613618781169256, "num_tokens": 2945195660.0, "step": 17566 }, { "entropy": 1.6678463419278462, "epoch": 1.9298288978605367, "grad_norm": 0.7065117955207825, "learning_rate": 2.0582734556784618e-06, "loss": 1.4749, "mean_token_accuracy": 0.6527349551518759, "num_tokens": 2945350628.0, "step": 17567 }, { "entropy": 1.59239661693573, "epoch": 1.9299387547719096, "grad_norm": 0.7592599987983704, "learning_rate": 2.0580916894021383e-06, "loss": 1.2039, "mean_token_accuracy": 0.6815276394287745, "num_tokens": 2945494933.0, "step": 17568 }, { "entropy": 1.770630935827891, "epoch": 1.9300486116832825, "grad_norm": 0.6339170932769775, "learning_rate": 2.0579102061331847e-06, "loss": 1.4645, "mean_token_accuracy": 0.6460703512032827, "num_tokens": 2945689644.0, "step": 17569 }, { "entropy": 1.6582429707050323, "epoch": 1.9301584685946556, "grad_norm": 0.6860533952713013, "learning_rate": 2.0577290058773418e-06, "loss": 1.3485, "mean_token_accuracy": 0.6733825008074442, "num_tokens": 2945818254.0, "step": 17570 }, { "entropy": 1.6789084871610005, "epoch": 1.9302683255060284, "grad_norm": 0.6942259073257446, "learning_rate": 2.057548088640347e-06, "loss": 1.2834, "mean_token_accuracy": 0.6708969473838806, "num_tokens": 2945953707.0, "step": 17571 }, { "entropy": 1.6964404781659443, "epoch": 1.9303781824174013, "grad_norm": 0.6454761624336243, "learning_rate": 2.0573674544279264e-06, "loss": 1.3878, "mean_token_accuracy": 0.6570356140534083, "num_tokens": 2946100936.0, "step": 17572 }, { "entropy": 1.6695491174856822, "epoch": 1.9304880393287742, "grad_norm": 0.6851876378059387, "learning_rate": 2.0571871032457957e-06, "loss": 1.6392, "mean_token_accuracy": 0.6317041863997778, "num_tokens": 2946309464.0, "step": 17573 }, { "entropy": 1.7130326131979625, "epoch": 1.930597896240147, "grad_norm": 0.8214697241783142, "learning_rate": 2.057007035099663e-06, "loss": 1.3649, "mean_token_accuracy": 0.673834909995397, "num_tokens": 2946427385.0, "step": 17574 }, { "entropy": 1.729885309934616, "epoch": 1.9307077531515202, "grad_norm": 0.6711888909339905, "learning_rate": 2.056827249995229e-06, "loss": 1.3746, "mean_token_accuracy": 0.661648154258728, "num_tokens": 2946587424.0, "step": 17575 }, { "entropy": 1.6910810470581055, "epoch": 1.930817610062893, "grad_norm": 0.5932799577713013, "learning_rate": 2.0566477479381818e-06, "loss": 1.3763, "mean_token_accuracy": 0.6474411884943644, "num_tokens": 2946719017.0, "step": 17576 }, { "entropy": 1.62163445353508, "epoch": 1.930927466974266, "grad_norm": 0.5636860728263855, "learning_rate": 2.0564685289342035e-06, "loss": 1.4351, "mean_token_accuracy": 0.6378550479809443, "num_tokens": 2946923068.0, "step": 17577 }, { "entropy": 1.7062998513380687, "epoch": 1.931037323885639, "grad_norm": 0.8094304800033569, "learning_rate": 2.0562895929889665e-06, "loss": 1.32, "mean_token_accuracy": 0.6633241772651672, "num_tokens": 2947031556.0, "step": 17578 }, { "entropy": 1.6850249568621318, "epoch": 1.9311471807970118, "grad_norm": 0.7753397226333618, "learning_rate": 2.0561109401081326e-06, "loss": 1.4007, "mean_token_accuracy": 0.6599368900060654, "num_tokens": 2947165163.0, "step": 17579 }, { "entropy": 1.7386715511480968, "epoch": 1.9312570377083849, "grad_norm": 0.610440731048584, "learning_rate": 2.055932570297359e-06, "loss": 1.3973, "mean_token_accuracy": 0.6602314561605453, "num_tokens": 2947361719.0, "step": 17580 }, { "entropy": 1.7325500547885895, "epoch": 1.9313668946197577, "grad_norm": 0.8086016178131104, "learning_rate": 2.0557544835622885e-06, "loss": 1.6061, "mean_token_accuracy": 0.6291801979144415, "num_tokens": 2947510879.0, "step": 17581 }, { "entropy": 1.701207419236501, "epoch": 1.9314767515311306, "grad_norm": 0.7074292898178101, "learning_rate": 2.055576679908558e-06, "loss": 1.3229, "mean_token_accuracy": 0.6661451806624731, "num_tokens": 2947660460.0, "step": 17582 }, { "entropy": 1.710488885641098, "epoch": 1.9315866084425037, "grad_norm": 0.6005741953849792, "learning_rate": 2.0553991593417954e-06, "loss": 1.4012, "mean_token_accuracy": 0.6515051871538162, "num_tokens": 2947880490.0, "step": 17583 }, { "entropy": 1.7140753865242004, "epoch": 1.9316964653538766, "grad_norm": 0.75965416431427, "learning_rate": 2.0552219218676184e-06, "loss": 1.416, "mean_token_accuracy": 0.655417412519455, "num_tokens": 2948057336.0, "step": 17584 }, { "entropy": 1.6573958098888397, "epoch": 1.9318063222652495, "grad_norm": 0.5710785388946533, "learning_rate": 2.0550449674916374e-06, "loss": 1.3928, "mean_token_accuracy": 0.6540137430032095, "num_tokens": 2948288883.0, "step": 17585 }, { "entropy": 1.7147573431332905, "epoch": 1.9319161791766224, "grad_norm": 0.709555447101593, "learning_rate": 2.0548682962194525e-06, "loss": 1.4287, "mean_token_accuracy": 0.6520878771940867, "num_tokens": 2948492498.0, "step": 17586 }, { "entropy": 1.7575667103131611, "epoch": 1.9320260360879953, "grad_norm": 0.5812540650367737, "learning_rate": 2.0546919080566545e-06, "loss": 1.4646, "mean_token_accuracy": 0.6403382619222006, "num_tokens": 2948692105.0, "step": 17587 }, { "entropy": 1.7437999844551086, "epoch": 1.9321358929993684, "grad_norm": 0.7731236219406128, "learning_rate": 2.054515803008827e-06, "loss": 1.3402, "mean_token_accuracy": 0.66108538210392, "num_tokens": 2948809266.0, "step": 17588 }, { "entropy": 1.6447148124376934, "epoch": 1.9322457499107413, "grad_norm": 0.6328097581863403, "learning_rate": 2.0543399810815448e-06, "loss": 1.3588, "mean_token_accuracy": 0.6679736226797104, "num_tokens": 2948943265.0, "step": 17589 }, { "entropy": 1.6997552712758381, "epoch": 1.9323556068221142, "grad_norm": 0.7128564119338989, "learning_rate": 2.05416444228037e-06, "loss": 1.2944, "mean_token_accuracy": 0.6734185715516409, "num_tokens": 2949056782.0, "step": 17590 }, { "entropy": 1.7652290364106495, "epoch": 1.9324654637334873, "grad_norm": 0.6556487679481506, "learning_rate": 2.053989186610859e-06, "loss": 1.4006, "mean_token_accuracy": 0.6583968649307886, "num_tokens": 2949200473.0, "step": 17591 }, { "entropy": 1.7773662507534027, "epoch": 1.93257532064486, "grad_norm": 0.7734547853469849, "learning_rate": 2.0538142140785604e-06, "loss": 1.567, "mean_token_accuracy": 0.6377990494171778, "num_tokens": 2949430212.0, "step": 17592 }, { "entropy": 1.6884727974732716, "epoch": 1.932685177556233, "grad_norm": 0.6802954077720642, "learning_rate": 2.0536395246890104e-06, "loss": 1.2043, "mean_token_accuracy": 0.6867648412783941, "num_tokens": 2949532312.0, "step": 17593 }, { "entropy": 1.707016219695409, "epoch": 1.932795034467606, "grad_norm": 0.9521844983100891, "learning_rate": 2.0534651184477376e-06, "loss": 1.3584, "mean_token_accuracy": 0.6677818149328232, "num_tokens": 2949680088.0, "step": 17594 }, { "entropy": 1.7301967144012451, "epoch": 1.9329048913789788, "grad_norm": 0.7132523059844971, "learning_rate": 2.053290995360262e-06, "loss": 1.3355, "mean_token_accuracy": 0.6592816412448883, "num_tokens": 2949816290.0, "step": 17595 }, { "entropy": 1.6510724425315857, "epoch": 1.933014748290352, "grad_norm": 0.7266356348991394, "learning_rate": 2.053117155432095e-06, "loss": 1.2963, "mean_token_accuracy": 0.6727642168601354, "num_tokens": 2949962938.0, "step": 17596 }, { "entropy": 1.6770485142866771, "epoch": 1.9331246052017248, "grad_norm": 0.6671058535575867, "learning_rate": 2.052943598668739e-06, "loss": 1.2558, "mean_token_accuracy": 0.6702116926511129, "num_tokens": 2950094455.0, "step": 17597 }, { "entropy": 1.6965230802694957, "epoch": 1.9332344621130977, "grad_norm": 0.7873666286468506, "learning_rate": 2.0527703250756874e-06, "loss": 1.3483, "mean_token_accuracy": 0.6622929026683172, "num_tokens": 2950236947.0, "step": 17598 }, { "entropy": 1.7883230050404866, "epoch": 1.9333443190244708, "grad_norm": 0.6915072798728943, "learning_rate": 2.0525973346584215e-06, "loss": 1.5227, "mean_token_accuracy": 0.6453968932231268, "num_tokens": 2950483692.0, "step": 17599 }, { "entropy": 1.6910318632920582, "epoch": 1.9334541759358435, "grad_norm": 0.6855702996253967, "learning_rate": 2.0524246274224193e-06, "loss": 1.4729, "mean_token_accuracy": 0.6422146856784821, "num_tokens": 2950643969.0, "step": 17600 }, { "entropy": 1.7280892829100292, "epoch": 1.9335640328472166, "grad_norm": 0.6929906606674194, "learning_rate": 2.0522522033731457e-06, "loss": 1.4143, "mean_token_accuracy": 0.6582867602507273, "num_tokens": 2950797242.0, "step": 17601 }, { "entropy": 1.6679367621739705, "epoch": 1.9336738897585894, "grad_norm": 0.5708891153335571, "learning_rate": 2.052080062516057e-06, "loss": 1.4377, "mean_token_accuracy": 0.6401932090520859, "num_tokens": 2951025260.0, "step": 17602 }, { "entropy": 1.7221278150876362, "epoch": 1.9337837466699623, "grad_norm": 0.7946709990501404, "learning_rate": 2.0519082048566026e-06, "loss": 1.2156, "mean_token_accuracy": 0.6832669277985891, "num_tokens": 2951122533.0, "step": 17603 }, { "entropy": 1.721358944972356, "epoch": 1.9338936035813354, "grad_norm": 0.7960524559020996, "learning_rate": 2.0517366304002225e-06, "loss": 1.3881, "mean_token_accuracy": 0.6513563940922419, "num_tokens": 2951259461.0, "step": 17604 }, { "entropy": 1.623354434967041, "epoch": 1.934003460492708, "grad_norm": 0.6415337920188904, "learning_rate": 2.0515653391523454e-06, "loss": 1.2289, "mean_token_accuracy": 0.678653672337532, "num_tokens": 2951398780.0, "step": 17605 }, { "entropy": 1.6779876053333282, "epoch": 1.9341133174040812, "grad_norm": 0.7085760235786438, "learning_rate": 2.051394331118392e-06, "loss": 1.3341, "mean_token_accuracy": 0.6740488906701406, "num_tokens": 2951548624.0, "step": 17606 }, { "entropy": 1.7369104822476704, "epoch": 1.934223174315454, "grad_norm": 0.6106285452842712, "learning_rate": 2.0512236063037767e-06, "loss": 1.4817, "mean_token_accuracy": 0.6403156071901321, "num_tokens": 2951760517.0, "step": 17607 }, { "entropy": 1.744108925263087, "epoch": 1.934333031226827, "grad_norm": 0.6894066333770752, "learning_rate": 2.051053164713902e-06, "loss": 1.4239, "mean_token_accuracy": 0.6464538921912512, "num_tokens": 2951967261.0, "step": 17608 }, { "entropy": 1.7185207108656566, "epoch": 1.9344428881382, "grad_norm": 0.7298069596290588, "learning_rate": 2.0508830063541615e-06, "loss": 1.398, "mean_token_accuracy": 0.6780803749958674, "num_tokens": 2952102298.0, "step": 17609 }, { "entropy": 1.7548142075538635, "epoch": 1.934552745049573, "grad_norm": 0.7205964922904968, "learning_rate": 2.0507131312299423e-06, "loss": 1.4849, "mean_token_accuracy": 0.6329950988292694, "num_tokens": 2952322155.0, "step": 17610 }, { "entropy": 1.6941269437472026, "epoch": 1.9346626019609459, "grad_norm": 0.6685055494308472, "learning_rate": 2.0505435393466183e-06, "loss": 1.3536, "mean_token_accuracy": 0.6650911470254263, "num_tokens": 2952502975.0, "step": 17611 }, { "entropy": 1.7216095228989918, "epoch": 1.934772458872319, "grad_norm": 0.687439501285553, "learning_rate": 2.0503742307095593e-06, "loss": 1.3819, "mean_token_accuracy": 0.6688835869232813, "num_tokens": 2952660501.0, "step": 17612 }, { "entropy": 1.7403018077214558, "epoch": 1.9348823157836916, "grad_norm": 0.7653110027313232, "learning_rate": 2.050205205324123e-06, "loss": 1.3546, "mean_token_accuracy": 0.6541765530904134, "num_tokens": 2952802470.0, "step": 17613 }, { "entropy": 1.7342408001422882, "epoch": 1.9349921726950647, "grad_norm": 0.8154670596122742, "learning_rate": 2.050036463195659e-06, "loss": 1.4267, "mean_token_accuracy": 0.6695893357197443, "num_tokens": 2952917329.0, "step": 17614 }, { "entropy": 1.6820714871088664, "epoch": 1.9351020296064376, "grad_norm": 0.7906708717346191, "learning_rate": 2.049868004329508e-06, "loss": 1.3398, "mean_token_accuracy": 0.6582440783580145, "num_tokens": 2953082322.0, "step": 17615 }, { "entropy": 1.7514929175376892, "epoch": 1.9352118865178105, "grad_norm": 0.7457329630851746, "learning_rate": 2.049699828731002e-06, "loss": 1.5964, "mean_token_accuracy": 0.6357724542419115, "num_tokens": 2953248248.0, "step": 17616 }, { "entropy": 1.665393054485321, "epoch": 1.9353217434291836, "grad_norm": 0.6951583027839661, "learning_rate": 2.049531936405464e-06, "loss": 1.5821, "mean_token_accuracy": 0.6351954390605291, "num_tokens": 2953467876.0, "step": 17617 }, { "entropy": 1.7500004569689434, "epoch": 1.9354316003405563, "grad_norm": 0.6640505790710449, "learning_rate": 2.0493643273582057e-06, "loss": 1.4366, "mean_token_accuracy": 0.6397050768136978, "num_tokens": 2953701201.0, "step": 17618 }, { "entropy": 1.7141669193903606, "epoch": 1.9355414572519294, "grad_norm": 0.654679536819458, "learning_rate": 2.049197001594534e-06, "loss": 1.5257, "mean_token_accuracy": 0.6353826026121775, "num_tokens": 2953900373.0, "step": 17619 }, { "entropy": 1.7072954674561818, "epoch": 1.9356513141633023, "grad_norm": 0.6300878524780273, "learning_rate": 2.0490299591197426e-06, "loss": 1.4607, "mean_token_accuracy": 0.6487697213888168, "num_tokens": 2954088615.0, "step": 17620 }, { "entropy": 1.7204040785630543, "epoch": 1.9357611710746752, "grad_norm": 0.6421894431114197, "learning_rate": 2.048863199939121e-06, "loss": 1.3859, "mean_token_accuracy": 0.6516237407922745, "num_tokens": 2954246097.0, "step": 17621 }, { "entropy": 1.7502519289652507, "epoch": 1.9358710279860483, "grad_norm": 0.6403509378433228, "learning_rate": 2.048696724057945e-06, "loss": 1.4453, "mean_token_accuracy": 0.6476548612117767, "num_tokens": 2954416854.0, "step": 17622 }, { "entropy": 1.7479767203330994, "epoch": 1.9359808848974212, "grad_norm": 0.6511620283126831, "learning_rate": 2.0485305314814843e-06, "loss": 1.5819, "mean_token_accuracy": 0.6273590077956518, "num_tokens": 2954650460.0, "step": 17623 }, { "entropy": 1.746782938639323, "epoch": 1.936090741808794, "grad_norm": 0.7770575284957886, "learning_rate": 2.0483646222149993e-06, "loss": 1.3677, "mean_token_accuracy": 0.6582979708909988, "num_tokens": 2954788939.0, "step": 17624 }, { "entropy": 1.7274068494637806, "epoch": 1.9362005987201671, "grad_norm": 0.6287463307380676, "learning_rate": 2.0481989962637393e-06, "loss": 1.3752, "mean_token_accuracy": 0.6612710257371267, "num_tokens": 2954960018.0, "step": 17625 }, { "entropy": 1.732554425795873, "epoch": 1.9363104556315398, "grad_norm": 0.6919560432434082, "learning_rate": 2.048033653632947e-06, "loss": 1.4181, "mean_token_accuracy": 0.651200125614802, "num_tokens": 2955097173.0, "step": 17626 }, { "entropy": 1.663443386554718, "epoch": 1.936420312542913, "grad_norm": 0.6868497729301453, "learning_rate": 2.0478685943278565e-06, "loss": 1.4677, "mean_token_accuracy": 0.6552320023377737, "num_tokens": 2955304001.0, "step": 17627 }, { "entropy": 1.7057395776112874, "epoch": 1.9365301694542858, "grad_norm": 0.6456490159034729, "learning_rate": 2.0477038183536913e-06, "loss": 1.369, "mean_token_accuracy": 0.6559461901585261, "num_tokens": 2955487842.0, "step": 17628 }, { "entropy": 1.7742679218451183, "epoch": 1.9366400263656587, "grad_norm": 0.6736863851547241, "learning_rate": 2.0475393257156655e-06, "loss": 1.4416, "mean_token_accuracy": 0.6455845187107722, "num_tokens": 2955688361.0, "step": 17629 }, { "entropy": 1.7152503629525502, "epoch": 1.9367498832770318, "grad_norm": 0.7249388098716736, "learning_rate": 2.0473751164189866e-06, "loss": 1.2432, "mean_token_accuracy": 0.6813737452030182, "num_tokens": 2955803129.0, "step": 17630 }, { "entropy": 1.6919386585553486, "epoch": 1.9368597401884045, "grad_norm": 0.6506887674331665, "learning_rate": 2.047211190468851e-06, "loss": 1.3881, "mean_token_accuracy": 0.6640054186185201, "num_tokens": 2955999765.0, "step": 17631 }, { "entropy": 1.7553547322750092, "epoch": 1.9369695970997776, "grad_norm": 0.7061113715171814, "learning_rate": 2.0470475478704465e-06, "loss": 1.3973, "mean_token_accuracy": 0.6518440246582031, "num_tokens": 2956153926.0, "step": 17632 }, { "entropy": 1.690263569355011, "epoch": 1.9370794540111504, "grad_norm": 0.6797465682029724, "learning_rate": 2.0468841886289534e-06, "loss": 1.3509, "mean_token_accuracy": 0.6615539789199829, "num_tokens": 2956278409.0, "step": 17633 }, { "entropy": 1.7189227143923442, "epoch": 1.9371893109225233, "grad_norm": 0.7588323950767517, "learning_rate": 2.0467211127495413e-06, "loss": 1.3427, "mean_token_accuracy": 0.680439700682958, "num_tokens": 2956426012.0, "step": 17634 }, { "entropy": 1.6739676396052043, "epoch": 1.9372991678338964, "grad_norm": 0.6452463269233704, "learning_rate": 2.0465583202373717e-06, "loss": 1.2856, "mean_token_accuracy": 0.6756146202484766, "num_tokens": 2956554851.0, "step": 17635 }, { "entropy": 1.6939974129199982, "epoch": 1.9374090247452693, "grad_norm": 0.7644104361534119, "learning_rate": 2.0463958110975957e-06, "loss": 1.2794, "mean_token_accuracy": 0.6747647374868393, "num_tokens": 2956682401.0, "step": 17636 }, { "entropy": 1.7220764855543773, "epoch": 1.9375188816566422, "grad_norm": 0.6523967385292053, "learning_rate": 2.046233585335359e-06, "loss": 1.4107, "mean_token_accuracy": 0.6561575432618459, "num_tokens": 2956840065.0, "step": 17637 }, { "entropy": 1.72719939549764, "epoch": 1.9376287385680153, "grad_norm": 0.5423910021781921, "learning_rate": 2.0460716429557937e-06, "loss": 1.5145, "mean_token_accuracy": 0.6321031053860983, "num_tokens": 2957092643.0, "step": 17638 }, { "entropy": 1.6563969254493713, "epoch": 1.937738595479388, "grad_norm": 0.634699821472168, "learning_rate": 2.045909983964027e-06, "loss": 1.262, "mean_token_accuracy": 0.6715675294399261, "num_tokens": 2957223272.0, "step": 17639 }, { "entropy": 1.7614007492860158, "epoch": 1.937848452390761, "grad_norm": 0.8633323311805725, "learning_rate": 2.045748608365174e-06, "loss": 1.2673, "mean_token_accuracy": 0.6737097253402075, "num_tokens": 2957385470.0, "step": 17640 }, { "entropy": 1.6651588181654613, "epoch": 1.937958309302134, "grad_norm": 0.716803252696991, "learning_rate": 2.045587516164342e-06, "loss": 1.3767, "mean_token_accuracy": 0.6795123418172201, "num_tokens": 2957501108.0, "step": 17641 }, { "entropy": 1.7195215821266174, "epoch": 1.9380681662135069, "grad_norm": 0.639150857925415, "learning_rate": 2.0454267073666314e-06, "loss": 1.4592, "mean_token_accuracy": 0.6458094666401545, "num_tokens": 2957718885.0, "step": 17642 }, { "entropy": 1.6857140560944874, "epoch": 1.93817802312488, "grad_norm": 0.6419286727905273, "learning_rate": 2.045266181977129e-06, "loss": 1.3571, "mean_token_accuracy": 0.6528473595778147, "num_tokens": 2957910681.0, "step": 17643 }, { "entropy": 1.7539623578389485, "epoch": 1.9382878800362526, "grad_norm": 0.8385192155838013, "learning_rate": 2.0451059400009183e-06, "loss": 1.4932, "mean_token_accuracy": 0.6529499888420105, "num_tokens": 2958064304.0, "step": 17644 }, { "entropy": 1.7804806133111317, "epoch": 1.9383977369476257, "grad_norm": 0.731391429901123, "learning_rate": 2.044945981443069e-06, "loss": 1.5301, "mean_token_accuracy": 0.6367037196954092, "num_tokens": 2958232801.0, "step": 17645 }, { "entropy": 1.6474103232224782, "epoch": 1.9385075938589986, "grad_norm": 0.7180584073066711, "learning_rate": 2.0447863063086444e-06, "loss": 1.3229, "mean_token_accuracy": 0.6761461794376373, "num_tokens": 2958390527.0, "step": 17646 }, { "entropy": 1.6555694937705994, "epoch": 1.9386174507703715, "grad_norm": 0.7365303635597229, "learning_rate": 2.0446269146026973e-06, "loss": 1.4973, "mean_token_accuracy": 0.6524901290734609, "num_tokens": 2958559788.0, "step": 17647 }, { "entropy": 1.6348630885283153, "epoch": 1.9387273076817446, "grad_norm": 0.7571955323219299, "learning_rate": 2.044467806330273e-06, "loss": 1.3723, "mean_token_accuracy": 0.6655997534592947, "num_tokens": 2958782862.0, "step": 17648 }, { "entropy": 1.7069834967454274, "epoch": 1.9388371645931175, "grad_norm": 0.6963647603988647, "learning_rate": 2.0443089814964074e-06, "loss": 1.3579, "mean_token_accuracy": 0.6717394888401031, "num_tokens": 2958926571.0, "step": 17649 }, { "entropy": 1.739360918601354, "epoch": 1.9389470215044904, "grad_norm": 0.7900950908660889, "learning_rate": 2.044150440106126e-06, "loss": 1.547, "mean_token_accuracy": 0.6345634957154592, "num_tokens": 2959092134.0, "step": 17650 }, { "entropy": 1.732648919026057, "epoch": 1.9390568784158635, "grad_norm": 0.7327719330787659, "learning_rate": 2.04399218216445e-06, "loss": 1.4618, "mean_token_accuracy": 0.6432109524806341, "num_tokens": 2959228429.0, "step": 17651 }, { "entropy": 1.7227603793144226, "epoch": 1.9391667353272362, "grad_norm": 0.6913332343101501, "learning_rate": 2.043834207676384e-06, "loss": 1.3656, "mean_token_accuracy": 0.6713483432928721, "num_tokens": 2959344802.0, "step": 17652 }, { "entropy": 1.7791978816191356, "epoch": 1.9392765922386093, "grad_norm": 0.8063234686851501, "learning_rate": 2.04367651664693e-06, "loss": 1.5213, "mean_token_accuracy": 0.6414127200841904, "num_tokens": 2959482823.0, "step": 17653 }, { "entropy": 1.7187303205331166, "epoch": 1.9393864491499822, "grad_norm": 0.6150225400924683, "learning_rate": 2.043519109081078e-06, "loss": 1.4355, "mean_token_accuracy": 0.6597120662530264, "num_tokens": 2959665866.0, "step": 17654 }, { "entropy": 1.6703492403030396, "epoch": 1.939496306061355, "grad_norm": 0.7310093641281128, "learning_rate": 2.04336198498381e-06, "loss": 1.3601, "mean_token_accuracy": 0.6774055063724518, "num_tokens": 2959809976.0, "step": 17655 }, { "entropy": 1.6999173561731975, "epoch": 1.9396061629727281, "grad_norm": 0.6536582112312317, "learning_rate": 2.0432051443601e-06, "loss": 1.4258, "mean_token_accuracy": 0.6664847979942957, "num_tokens": 2959974099.0, "step": 17656 }, { "entropy": 1.706270545721054, "epoch": 1.9397160198841008, "grad_norm": 0.5601027607917786, "learning_rate": 2.0430485872149117e-06, "loss": 1.2432, "mean_token_accuracy": 0.6715459475914637, "num_tokens": 2960154880.0, "step": 17657 }, { "entropy": 1.6991495788097382, "epoch": 1.939825876795474, "grad_norm": 0.6836209297180176, "learning_rate": 2.0428923135531984e-06, "loss": 1.2991, "mean_token_accuracy": 0.6632804969946543, "num_tokens": 2960313055.0, "step": 17658 }, { "entropy": 1.7432080507278442, "epoch": 1.9399357337068468, "grad_norm": 0.91303950548172, "learning_rate": 2.042736323379907e-06, "loss": 1.4199, "mean_token_accuracy": 0.6468232373396555, "num_tokens": 2960456172.0, "step": 17659 }, { "entropy": 1.727357546488444, "epoch": 1.9400455906182197, "grad_norm": 0.752682626247406, "learning_rate": 2.0425806166999755e-06, "loss": 1.3344, "mean_token_accuracy": 0.6625064412752787, "num_tokens": 2960584571.0, "step": 17660 }, { "entropy": 1.7152182559172313, "epoch": 1.9401554475295928, "grad_norm": 0.7578296661376953, "learning_rate": 2.0424251935183316e-06, "loss": 1.2797, "mean_token_accuracy": 0.6746835658947626, "num_tokens": 2960717321.0, "step": 17661 }, { "entropy": 1.7040897111097972, "epoch": 1.9402653044409657, "grad_norm": 0.6826738119125366, "learning_rate": 2.042270053839893e-06, "loss": 1.5189, "mean_token_accuracy": 0.6396296223004659, "num_tokens": 2960883517.0, "step": 17662 }, { "entropy": 1.7391219735145569, "epoch": 1.9403751613523386, "grad_norm": 0.7768815755844116, "learning_rate": 2.042115197669571e-06, "loss": 1.4045, "mean_token_accuracy": 0.6670640160640081, "num_tokens": 2961004315.0, "step": 17663 }, { "entropy": 1.7473591566085815, "epoch": 1.9404850182637117, "grad_norm": 0.6485081315040588, "learning_rate": 2.0419606250122666e-06, "loss": 1.5027, "mean_token_accuracy": 0.6461313168207804, "num_tokens": 2961219255.0, "step": 17664 }, { "entropy": 1.686641921599706, "epoch": 1.9405948751750843, "grad_norm": 0.7350051403045654, "learning_rate": 2.041806335872872e-06, "loss": 1.3163, "mean_token_accuracy": 0.6799498746792475, "num_tokens": 2961376511.0, "step": 17665 }, { "entropy": 1.7102410693963368, "epoch": 1.9407047320864574, "grad_norm": 0.7319943308830261, "learning_rate": 2.04165233025627e-06, "loss": 1.4536, "mean_token_accuracy": 0.647271086772283, "num_tokens": 2961578557.0, "step": 17666 }, { "entropy": 1.7241827249526978, "epoch": 1.9408145889978303, "grad_norm": 0.7803419232368469, "learning_rate": 2.041498608167335e-06, "loss": 1.4855, "mean_token_accuracy": 0.6466056903203329, "num_tokens": 2961771057.0, "step": 17667 }, { "entropy": 1.653838684161504, "epoch": 1.9409244459092032, "grad_norm": 0.7193467617034912, "learning_rate": 2.0413451696109315e-06, "loss": 1.2023, "mean_token_accuracy": 0.6801579395929972, "num_tokens": 2961886643.0, "step": 17668 }, { "entropy": 1.6649916072686513, "epoch": 1.9410343028205763, "grad_norm": 0.6423388123512268, "learning_rate": 2.0411920145919186e-06, "loss": 1.3242, "mean_token_accuracy": 0.6613106826941172, "num_tokens": 2962044517.0, "step": 17669 }, { "entropy": 1.7756304542223613, "epoch": 1.941144159731949, "grad_norm": 0.765924334526062, "learning_rate": 2.0410391431151396e-06, "loss": 1.2838, "mean_token_accuracy": 0.6603503326574961, "num_tokens": 2962163822.0, "step": 17670 }, { "entropy": 1.7474627792835236, "epoch": 1.941254016643322, "grad_norm": 0.7282141447067261, "learning_rate": 2.040886555185435e-06, "loss": 1.3865, "mean_token_accuracy": 0.6600453555583954, "num_tokens": 2962301011.0, "step": 17671 }, { "entropy": 1.6974960267543793, "epoch": 1.941363873554695, "grad_norm": 0.7170888781547546, "learning_rate": 2.040734250807634e-06, "loss": 1.4518, "mean_token_accuracy": 0.6686425358057022, "num_tokens": 2962491509.0, "step": 17672 }, { "entropy": 1.673610270023346, "epoch": 1.9414737304660679, "grad_norm": 0.597626805305481, "learning_rate": 2.0405822299865577e-06, "loss": 1.2537, "mean_token_accuracy": 0.6842442254225413, "num_tokens": 2962673019.0, "step": 17673 }, { "entropy": 1.7404329578081768, "epoch": 1.941583587377441, "grad_norm": 0.8017789721488953, "learning_rate": 2.0404304927270156e-06, "loss": 1.3374, "mean_token_accuracy": 0.6645545810461044, "num_tokens": 2962792301.0, "step": 17674 }, { "entropy": 1.664712945620219, "epoch": 1.9416934442888139, "grad_norm": 0.6364064812660217, "learning_rate": 2.040279039033812e-06, "loss": 1.5154, "mean_token_accuracy": 0.6475819051265717, "num_tokens": 2962974839.0, "step": 17675 }, { "entropy": 1.690699468056361, "epoch": 1.9418033012001867, "grad_norm": 0.681605339050293, "learning_rate": 2.0401278689117387e-06, "loss": 1.3548, "mean_token_accuracy": 0.6689165979623795, "num_tokens": 2963119502.0, "step": 17676 }, { "entropy": 1.714266578356425, "epoch": 1.9419131581115598, "grad_norm": 0.7001098394393921, "learning_rate": 2.039976982365581e-06, "loss": 1.3567, "mean_token_accuracy": 0.659160926938057, "num_tokens": 2963294588.0, "step": 17677 }, { "entropy": 1.7354275782903035, "epoch": 1.9420230150229325, "grad_norm": 0.7794528007507324, "learning_rate": 2.0398263794001142e-06, "loss": 1.315, "mean_token_accuracy": 0.6574402799208959, "num_tokens": 2963406921.0, "step": 17678 }, { "entropy": 1.693873792886734, "epoch": 1.9421328719343056, "grad_norm": 0.7015413045883179, "learning_rate": 2.0396760600201053e-06, "loss": 1.4379, "mean_token_accuracy": 0.6484410464763641, "num_tokens": 2963573114.0, "step": 17679 }, { "entropy": 1.7558051943778992, "epoch": 1.9422427288456785, "grad_norm": 0.7721998691558838, "learning_rate": 2.0395260242303113e-06, "loss": 1.6748, "mean_token_accuracy": 0.6339837138851484, "num_tokens": 2963763093.0, "step": 17680 }, { "entropy": 1.6947738925615947, "epoch": 1.9423525857570514, "grad_norm": 0.6746589541435242, "learning_rate": 2.0393762720354816e-06, "loss": 1.3065, "mean_token_accuracy": 0.6745160073041916, "num_tokens": 2963898566.0, "step": 17681 }, { "entropy": 1.6936275362968445, "epoch": 1.9424624426684245, "grad_norm": 0.6480662226676941, "learning_rate": 2.0392268034403545e-06, "loss": 1.3829, "mean_token_accuracy": 0.6479221681753794, "num_tokens": 2964098765.0, "step": 17682 }, { "entropy": 1.7492527961730957, "epoch": 1.9425722995797972, "grad_norm": 0.6564264893531799, "learning_rate": 2.0390776184496606e-06, "loss": 1.3571, "mean_token_accuracy": 0.6510317424933115, "num_tokens": 2964246008.0, "step": 17683 }, { "entropy": 1.6724448998769124, "epoch": 1.9426821564911703, "grad_norm": 0.6479511857032776, "learning_rate": 2.0389287170681226e-06, "loss": 1.3041, "mean_token_accuracy": 0.6661087870597839, "num_tokens": 2964399518.0, "step": 17684 }, { "entropy": 1.731959581375122, "epoch": 1.9427920134025431, "grad_norm": 0.7445155382156372, "learning_rate": 2.0387800993004534e-06, "loss": 1.329, "mean_token_accuracy": 0.659634068608284, "num_tokens": 2964555472.0, "step": 17685 }, { "entropy": 1.699200451374054, "epoch": 1.942901870313916, "grad_norm": 0.7213234305381775, "learning_rate": 2.0386317651513557e-06, "loss": 1.2045, "mean_token_accuracy": 0.6754323144753774, "num_tokens": 2964684843.0, "step": 17686 }, { "entropy": 1.7045519252618153, "epoch": 1.9430117272252891, "grad_norm": 0.6406714916229248, "learning_rate": 2.038483714625525e-06, "loss": 1.339, "mean_token_accuracy": 0.6636529515186945, "num_tokens": 2964860646.0, "step": 17687 }, { "entropy": 1.713558445374171, "epoch": 1.943121584136662, "grad_norm": 0.7353739142417908, "learning_rate": 2.038335947727646e-06, "loss": 1.4388, "mean_token_accuracy": 0.6637803067763647, "num_tokens": 2965024257.0, "step": 17688 }, { "entropy": 1.6618661483128865, "epoch": 1.943231441048035, "grad_norm": 0.6706850528717041, "learning_rate": 2.0381884644623956e-06, "loss": 1.3865, "mean_token_accuracy": 0.6615625272194544, "num_tokens": 2965227115.0, "step": 17689 }, { "entropy": 1.705021212498347, "epoch": 1.943341297959408, "grad_norm": 0.714533269405365, "learning_rate": 2.0380412648344426e-06, "loss": 1.418, "mean_token_accuracy": 0.6533076216777166, "num_tokens": 2965392439.0, "step": 17690 }, { "entropy": 1.6992263595263164, "epoch": 1.9434511548707807, "grad_norm": 0.6608665585517883, "learning_rate": 2.037894348848445e-06, "loss": 1.3918, "mean_token_accuracy": 0.6552961965401968, "num_tokens": 2965554340.0, "step": 17691 }, { "entropy": 1.6828101476033528, "epoch": 1.9435610117821538, "grad_norm": 0.663170576095581, "learning_rate": 2.0377477165090535e-06, "loss": 1.4066, "mean_token_accuracy": 0.6510103195905685, "num_tokens": 2965750001.0, "step": 17692 }, { "entropy": 1.6746436854203541, "epoch": 1.9436708686935267, "grad_norm": 0.6782816648483276, "learning_rate": 2.0376013678209085e-06, "loss": 1.3206, "mean_token_accuracy": 0.6747565368811289, "num_tokens": 2965884732.0, "step": 17693 }, { "entropy": 1.6658462782700856, "epoch": 1.9437807256048996, "grad_norm": 0.641159176826477, "learning_rate": 2.03745530278864e-06, "loss": 1.3442, "mean_token_accuracy": 0.6628729601701101, "num_tokens": 2966090536.0, "step": 17694 }, { "entropy": 1.7376565237840016, "epoch": 1.9438905825162727, "grad_norm": 0.7169440388679504, "learning_rate": 2.0373095214168737e-06, "loss": 1.4649, "mean_token_accuracy": 0.6538771440585455, "num_tokens": 2966257301.0, "step": 17695 }, { "entropy": 1.73634934425354, "epoch": 1.9440004394276453, "grad_norm": 0.7864576578140259, "learning_rate": 2.037164023710222e-06, "loss": 1.4079, "mean_token_accuracy": 0.6538835118214289, "num_tokens": 2966401908.0, "step": 17696 }, { "entropy": 1.7507271766662598, "epoch": 1.9441102963390184, "grad_norm": 0.819874107837677, "learning_rate": 2.0370188096732905e-06, "loss": 1.2977, "mean_token_accuracy": 0.660680502653122, "num_tokens": 2966499432.0, "step": 17697 }, { "entropy": 1.655652830998103, "epoch": 1.9442201532503913, "grad_norm": 0.6456592082977295, "learning_rate": 2.0368738793106745e-06, "loss": 1.3016, "mean_token_accuracy": 0.678799549738566, "num_tokens": 2966687978.0, "step": 17698 }, { "entropy": 1.6859602630138397, "epoch": 1.9443300101617642, "grad_norm": 0.7051413059234619, "learning_rate": 2.036729232626962e-06, "loss": 1.4748, "mean_token_accuracy": 0.6505583177010218, "num_tokens": 2966839033.0, "step": 17699 }, { "entropy": 1.6403660873572032, "epoch": 1.9444398670731373, "grad_norm": 0.6287828683853149, "learning_rate": 2.03658486962673e-06, "loss": 1.3748, "mean_token_accuracy": 0.6574486494064331, "num_tokens": 2967024711.0, "step": 17700 }, { "entropy": 1.719410906235377, "epoch": 1.9445497239845102, "grad_norm": 0.6784718036651611, "learning_rate": 2.036440790314548e-06, "loss": 1.3411, "mean_token_accuracy": 0.6652787824471792, "num_tokens": 2967151451.0, "step": 17701 }, { "entropy": 1.7372966408729553, "epoch": 1.944659580895883, "grad_norm": 0.8029721975326538, "learning_rate": 2.0362969946949755e-06, "loss": 1.4529, "mean_token_accuracy": 0.6361254553000132, "num_tokens": 2967375721.0, "step": 17702 }, { "entropy": 1.7620983918507893, "epoch": 1.9447694378072562, "grad_norm": 0.575481116771698, "learning_rate": 2.0361534827725636e-06, "loss": 1.5031, "mean_token_accuracy": 0.6388354301452637, "num_tokens": 2967592752.0, "step": 17703 }, { "entropy": 1.7115374505519867, "epoch": 1.9448792947186289, "grad_norm": 0.6605756878852844, "learning_rate": 2.0360102545518557e-06, "loss": 1.39, "mean_token_accuracy": 0.653022920091947, "num_tokens": 2967736489.0, "step": 17704 }, { "entropy": 1.6519458691279094, "epoch": 1.944989151630002, "grad_norm": 0.6345553398132324, "learning_rate": 2.035867310037384e-06, "loss": 1.4154, "mean_token_accuracy": 0.6511341631412506, "num_tokens": 2967887788.0, "step": 17705 }, { "entropy": 1.737346072991689, "epoch": 1.9450990085413749, "grad_norm": 0.7456694841384888, "learning_rate": 2.0357246492336716e-06, "loss": 1.313, "mean_token_accuracy": 0.6586676637331644, "num_tokens": 2968015984.0, "step": 17706 }, { "entropy": 1.714151293039322, "epoch": 1.9452088654527477, "grad_norm": 0.747020959854126, "learning_rate": 2.0355822721452358e-06, "loss": 1.6276, "mean_token_accuracy": 0.6316531747579575, "num_tokens": 2968206188.0, "step": 17707 }, { "entropy": 1.7121087312698364, "epoch": 1.9453187223641208, "grad_norm": 0.6129441857337952, "learning_rate": 2.03544017877658e-06, "loss": 1.339, "mean_token_accuracy": 0.6570817331473032, "num_tokens": 2968360647.0, "step": 17708 }, { "entropy": 1.6801585257053375, "epoch": 1.9454285792754935, "grad_norm": 0.7115065455436707, "learning_rate": 2.0352983691322046e-06, "loss": 1.3085, "mean_token_accuracy": 0.6725891331831614, "num_tokens": 2968553740.0, "step": 17709 }, { "entropy": 1.6956307987372081, "epoch": 1.9455384361868666, "grad_norm": 0.6248717904090881, "learning_rate": 2.035156843216596e-06, "loss": 1.4046, "mean_token_accuracy": 0.649314617117246, "num_tokens": 2968758358.0, "step": 17710 }, { "entropy": 1.7073156734307606, "epoch": 1.9456482930982395, "grad_norm": 0.7282310724258423, "learning_rate": 2.035015601034233e-06, "loss": 1.4184, "mean_token_accuracy": 0.6510714391867319, "num_tokens": 2968907627.0, "step": 17711 }, { "entropy": 1.767699142297109, "epoch": 1.9457581500096124, "grad_norm": 0.74590665102005, "learning_rate": 2.0348746425895865e-06, "loss": 1.3263, "mean_token_accuracy": 0.6632108837366104, "num_tokens": 2969049681.0, "step": 17712 }, { "entropy": 1.73310982187589, "epoch": 1.9458680069209855, "grad_norm": 0.9105587005615234, "learning_rate": 2.034733967887119e-06, "loss": 1.3848, "mean_token_accuracy": 0.6630599250396093, "num_tokens": 2969205626.0, "step": 17713 }, { "entropy": 1.7039326230684917, "epoch": 1.9459778638323584, "grad_norm": 0.6730808019638062, "learning_rate": 2.03459357693128e-06, "loss": 1.2393, "mean_token_accuracy": 0.6768955588340759, "num_tokens": 2969315930.0, "step": 17714 }, { "entropy": 1.695899059375127, "epoch": 1.9460877207437313, "grad_norm": 1.4940348863601685, "learning_rate": 2.0344534697265152e-06, "loss": 1.3153, "mean_token_accuracy": 0.6758741736412048, "num_tokens": 2969509322.0, "step": 17715 }, { "entropy": 1.7427550554275513, "epoch": 1.9461975776551044, "grad_norm": 0.6874081492424011, "learning_rate": 2.0343136462772583e-06, "loss": 1.4501, "mean_token_accuracy": 0.6438677261273066, "num_tokens": 2969660188.0, "step": 17716 }, { "entropy": 1.7365627984205882, "epoch": 1.946307434566477, "grad_norm": 0.6163883805274963, "learning_rate": 2.034174106587934e-06, "loss": 1.5149, "mean_token_accuracy": 0.6376241246859232, "num_tokens": 2969893508.0, "step": 17717 }, { "entropy": 1.6431179742018382, "epoch": 1.9464172914778501, "grad_norm": 0.606504499912262, "learning_rate": 2.0340348506629586e-06, "loss": 1.5092, "mean_token_accuracy": 0.6399561762809753, "num_tokens": 2970104649.0, "step": 17718 }, { "entropy": 1.7281967997550964, "epoch": 1.946527148389223, "grad_norm": 0.6497771739959717, "learning_rate": 2.033895878506742e-06, "loss": 1.5915, "mean_token_accuracy": 0.6247910012801489, "num_tokens": 2970337768.0, "step": 17719 }, { "entropy": 1.7278837660948436, "epoch": 1.946637005300596, "grad_norm": 0.7515074610710144, "learning_rate": 2.033757190123679e-06, "loss": 1.3733, "mean_token_accuracy": 0.6671270777781805, "num_tokens": 2970488423.0, "step": 17720 }, { "entropy": 1.755313863356908, "epoch": 1.946746862211969, "grad_norm": 0.7204878926277161, "learning_rate": 2.0336187855181603e-06, "loss": 1.4981, "mean_token_accuracy": 0.6442046463489532, "num_tokens": 2970643331.0, "step": 17721 }, { "entropy": 1.6647318204243977, "epoch": 1.9468567191233417, "grad_norm": 0.6584035158157349, "learning_rate": 2.033480664694568e-06, "loss": 1.4157, "mean_token_accuracy": 0.6540108720461527, "num_tokens": 2970795323.0, "step": 17722 }, { "entropy": 1.765694946050644, "epoch": 1.9469665760347148, "grad_norm": 0.8152051568031311, "learning_rate": 2.033342827657271e-06, "loss": 1.4476, "mean_token_accuracy": 0.6522760838270187, "num_tokens": 2970956293.0, "step": 17723 }, { "entropy": 1.776330480972926, "epoch": 1.9470764329460877, "grad_norm": 0.817134439945221, "learning_rate": 2.033205274410634e-06, "loss": 1.4082, "mean_token_accuracy": 0.6540219734112421, "num_tokens": 2971111548.0, "step": 17724 }, { "entropy": 1.7007002929846446, "epoch": 1.9471862898574606, "grad_norm": 0.6617542505264282, "learning_rate": 2.0330680049590095e-06, "loss": 1.4518, "mean_token_accuracy": 0.661642129222552, "num_tokens": 2971264294.0, "step": 17725 }, { "entropy": 1.6825316647688549, "epoch": 1.9472961467688337, "grad_norm": 0.6800007820129395, "learning_rate": 2.032931019306741e-06, "loss": 1.4463, "mean_token_accuracy": 0.6541166653235754, "num_tokens": 2971450963.0, "step": 17726 }, { "entropy": 1.7201205591360729, "epoch": 1.9474060036802066, "grad_norm": 0.6191043853759766, "learning_rate": 2.0327943174581663e-06, "loss": 1.4436, "mean_token_accuracy": 0.6575228720903397, "num_tokens": 2971650421.0, "step": 17727 }, { "entropy": 1.7219129304091136, "epoch": 1.9475158605915794, "grad_norm": 0.7525007128715515, "learning_rate": 2.0326578994176104e-06, "loss": 1.3096, "mean_token_accuracy": 0.6700589607159296, "num_tokens": 2971787284.0, "step": 17728 }, { "entropy": 1.7868116199970245, "epoch": 1.9476257175029525, "grad_norm": 0.6978966593742371, "learning_rate": 2.032521765189391e-06, "loss": 1.4937, "mean_token_accuracy": 0.6415872623523077, "num_tokens": 2971987111.0, "step": 17729 }, { "entropy": 1.7060894866784413, "epoch": 1.9477355744143252, "grad_norm": 1.1793681383132935, "learning_rate": 2.032385914777817e-06, "loss": 1.5346, "mean_token_accuracy": 0.642043317357699, "num_tokens": 2972138435.0, "step": 17730 }, { "entropy": 1.7055272956689198, "epoch": 1.9478454313256983, "grad_norm": 0.7151913642883301, "learning_rate": 2.032250348187188e-06, "loss": 1.3737, "mean_token_accuracy": 0.6631810615460078, "num_tokens": 2972288505.0, "step": 17731 }, { "entropy": 1.7625197768211365, "epoch": 1.9479552882370712, "grad_norm": 0.9355411529541016, "learning_rate": 2.032115065421794e-06, "loss": 1.4241, "mean_token_accuracy": 0.6463347425063452, "num_tokens": 2972489591.0, "step": 17732 }, { "entropy": 1.7217676838239033, "epoch": 1.948065145148444, "grad_norm": 0.7413977980613708, "learning_rate": 2.0319800664859175e-06, "loss": 1.4565, "mean_token_accuracy": 0.6653625269730886, "num_tokens": 2972644196.0, "step": 17733 }, { "entropy": 1.7668897410233815, "epoch": 1.9481750020598172, "grad_norm": 0.6924734711647034, "learning_rate": 2.031845351383831e-06, "loss": 1.3263, "mean_token_accuracy": 0.6588727583487829, "num_tokens": 2972777302.0, "step": 17734 }, { "entropy": 1.7585100928942363, "epoch": 1.9482848589711899, "grad_norm": 0.7000848650932312, "learning_rate": 2.0317109201197978e-06, "loss": 1.4577, "mean_token_accuracy": 0.6405526846647263, "num_tokens": 2972939201.0, "step": 17735 }, { "entropy": 1.6759247382481892, "epoch": 1.948394715882563, "grad_norm": 0.7132837176322937, "learning_rate": 2.0315767726980726e-06, "loss": 1.5039, "mean_token_accuracy": 0.6526048630475998, "num_tokens": 2973129903.0, "step": 17736 }, { "entropy": 1.668695737918218, "epoch": 1.9485045727939359, "grad_norm": 0.6133973598480225, "learning_rate": 2.031442909122902e-06, "loss": 1.3299, "mean_token_accuracy": 0.669144387046496, "num_tokens": 2973289265.0, "step": 17737 }, { "entropy": 1.6713014245033264, "epoch": 1.9486144297053087, "grad_norm": 0.6235535144805908, "learning_rate": 2.031309329398521e-06, "loss": 1.399, "mean_token_accuracy": 0.6442220707734426, "num_tokens": 2973478673.0, "step": 17738 }, { "entropy": 1.6983527541160583, "epoch": 1.9487242866166818, "grad_norm": 0.6092813014984131, "learning_rate": 2.031176033529158e-06, "loss": 1.4028, "mean_token_accuracy": 0.636608416835467, "num_tokens": 2973675888.0, "step": 17739 }, { "entropy": 1.6300262808799744, "epoch": 1.9488341435280547, "grad_norm": 0.7199864983558655, "learning_rate": 2.0310430215190336e-06, "loss": 1.3337, "mean_token_accuracy": 0.6639452030261358, "num_tokens": 2973853010.0, "step": 17740 }, { "entropy": 1.7593192954858143, "epoch": 1.9489440004394276, "grad_norm": 0.7048966884613037, "learning_rate": 2.0309102933723555e-06, "loss": 1.3223, "mean_token_accuracy": 0.6694928755362829, "num_tokens": 2973982382.0, "step": 17741 }, { "entropy": 1.6991233627001445, "epoch": 1.9490538573508007, "grad_norm": 0.6124697923660278, "learning_rate": 2.0307778490933245e-06, "loss": 1.3468, "mean_token_accuracy": 0.6536738177140554, "num_tokens": 2974186796.0, "step": 17742 }, { "entropy": 1.6812581419944763, "epoch": 1.9491637142621734, "grad_norm": 0.6754594445228577, "learning_rate": 2.0306456886861333e-06, "loss": 1.3569, "mean_token_accuracy": 0.6634483486413956, "num_tokens": 2974362784.0, "step": 17743 }, { "entropy": 1.7027353048324585, "epoch": 1.9492735711735465, "grad_norm": 0.7747242450714111, "learning_rate": 2.030513812154964e-06, "loss": 1.2265, "mean_token_accuracy": 0.682536577184995, "num_tokens": 2974489565.0, "step": 17744 }, { "entropy": 1.631873478492101, "epoch": 1.9493834280849194, "grad_norm": 0.6376305222511292, "learning_rate": 2.030382219503991e-06, "loss": 1.3407, "mean_token_accuracy": 0.6745425860087076, "num_tokens": 2974644841.0, "step": 17745 }, { "entropy": 1.6536372005939484, "epoch": 1.9494932849962923, "grad_norm": 0.812867283821106, "learning_rate": 2.03025091073738e-06, "loss": 1.4305, "mean_token_accuracy": 0.6665123303731283, "num_tokens": 2974819612.0, "step": 17746 }, { "entropy": 1.741923948129018, "epoch": 1.9496031419076654, "grad_norm": 0.6799856424331665, "learning_rate": 2.0301198858592847e-06, "loss": 1.4306, "mean_token_accuracy": 0.6507015228271484, "num_tokens": 2974975514.0, "step": 17747 }, { "entropy": 1.7347366710503895, "epoch": 1.9497129988190383, "grad_norm": 0.8397426605224609, "learning_rate": 2.029989144873853e-06, "loss": 1.2693, "mean_token_accuracy": 0.6738801846901575, "num_tokens": 2975106527.0, "step": 17748 }, { "entropy": 1.7347449858983357, "epoch": 1.9498228557304111, "grad_norm": 0.6037880182266235, "learning_rate": 2.0298586877852233e-06, "loss": 1.463, "mean_token_accuracy": 0.6357159316539764, "num_tokens": 2975283443.0, "step": 17749 }, { "entropy": 1.7268792192141216, "epoch": 1.949932712641784, "grad_norm": 0.698653519153595, "learning_rate": 2.0297285145975243e-06, "loss": 1.2884, "mean_token_accuracy": 0.6732809692621231, "num_tokens": 2975397640.0, "step": 17750 }, { "entropy": 1.7239871521790822, "epoch": 1.950042569553157, "grad_norm": 0.7377451658248901, "learning_rate": 2.0295986253148748e-06, "loss": 1.5272, "mean_token_accuracy": 0.6336576888958613, "num_tokens": 2975554478.0, "step": 17751 }, { "entropy": 1.7457629640897114, "epoch": 1.95015242646453, "grad_norm": 0.8000925779342651, "learning_rate": 2.029469019941387e-06, "loss": 1.4273, "mean_token_accuracy": 0.6509995808204015, "num_tokens": 2975691139.0, "step": 17752 }, { "entropy": 1.7045822242895763, "epoch": 1.950262283375903, "grad_norm": 0.6363489627838135, "learning_rate": 2.029339698481164e-06, "loss": 1.3028, "mean_token_accuracy": 0.6681992560625076, "num_tokens": 2975879366.0, "step": 17753 }, { "entropy": 1.723255564769109, "epoch": 1.9503721402872758, "grad_norm": 0.7007434368133545, "learning_rate": 2.029210660938295e-06, "loss": 1.5731, "mean_token_accuracy": 0.6275994380315145, "num_tokens": 2976062063.0, "step": 17754 }, { "entropy": 1.7033185164133708, "epoch": 1.950481997198649, "grad_norm": 0.6987881064414978, "learning_rate": 2.0290819073168673e-06, "loss": 1.2802, "mean_token_accuracy": 0.6664392650127411, "num_tokens": 2976182351.0, "step": 17755 }, { "entropy": 1.6668557822704315, "epoch": 1.9505918541100216, "grad_norm": 0.6711578965187073, "learning_rate": 2.028953437620955e-06, "loss": 1.2469, "mean_token_accuracy": 0.6852605938911438, "num_tokens": 2976305690.0, "step": 17756 }, { "entropy": 1.7156360646088917, "epoch": 1.9507017110213947, "grad_norm": 0.7528457045555115, "learning_rate": 2.0288252518546247e-06, "loss": 1.5161, "mean_token_accuracy": 0.64786363641421, "num_tokens": 2976493784.0, "step": 17757 }, { "entropy": 1.669191300868988, "epoch": 1.9508115679327676, "grad_norm": 0.6685511469841003, "learning_rate": 2.0286973500219315e-06, "loss": 1.4391, "mean_token_accuracy": 0.6614227841297785, "num_tokens": 2976692124.0, "step": 17758 }, { "entropy": 1.6809686024983723, "epoch": 1.9509214248441404, "grad_norm": 0.6204544901847839, "learning_rate": 2.028569732126924e-06, "loss": 1.5254, "mean_token_accuracy": 0.6390020251274109, "num_tokens": 2976904174.0, "step": 17759 }, { "entropy": 1.7139968574047089, "epoch": 1.9510312817555135, "grad_norm": 0.5688869953155518, "learning_rate": 2.0284423981736432e-06, "loss": 1.3492, "mean_token_accuracy": 0.663325771689415, "num_tokens": 2977049188.0, "step": 17760 }, { "entropy": 1.7387417654196422, "epoch": 1.9511411386668864, "grad_norm": 0.6545831561088562, "learning_rate": 2.028315348166117e-06, "loss": 1.387, "mean_token_accuracy": 0.6539622743924459, "num_tokens": 2977201898.0, "step": 17761 }, { "entropy": 1.6949077546596527, "epoch": 1.9512509955782593, "grad_norm": 0.7409424781799316, "learning_rate": 2.028188582108368e-06, "loss": 1.3519, "mean_token_accuracy": 0.6653612554073334, "num_tokens": 2977333297.0, "step": 17762 }, { "entropy": 1.735666275024414, "epoch": 1.9513608524896322, "grad_norm": 0.604129433631897, "learning_rate": 2.0280621000044065e-06, "loss": 1.4387, "mean_token_accuracy": 0.652505616346995, "num_tokens": 2977536291.0, "step": 17763 }, { "entropy": 1.6692261199156444, "epoch": 1.951470709401005, "grad_norm": 0.6267274022102356, "learning_rate": 2.0279359018582377e-06, "loss": 1.4824, "mean_token_accuracy": 0.6410986383756002, "num_tokens": 2977726406.0, "step": 17764 }, { "entropy": 1.7290876011053722, "epoch": 1.9515805663123782, "grad_norm": 0.7559896111488342, "learning_rate": 2.0278099876738543e-06, "loss": 1.3264, "mean_token_accuracy": 0.6726896514495214, "num_tokens": 2977841878.0, "step": 17765 }, { "entropy": 1.716409037510554, "epoch": 1.951690423223751, "grad_norm": 0.6588131785392761, "learning_rate": 2.0276843574552425e-06, "loss": 1.2942, "mean_token_accuracy": 0.6628698209921519, "num_tokens": 2977963873.0, "step": 17766 }, { "entropy": 1.717410941918691, "epoch": 1.951800280135124, "grad_norm": 0.7476875185966492, "learning_rate": 2.027559011206377e-06, "loss": 1.319, "mean_token_accuracy": 0.6718118588129679, "num_tokens": 2978082887.0, "step": 17767 }, { "entropy": 1.6688204904397328, "epoch": 1.951910137046497, "grad_norm": 0.6238333582878113, "learning_rate": 2.0274339489312252e-06, "loss": 1.4476, "mean_token_accuracy": 0.6366531451543173, "num_tokens": 2978319052.0, "step": 17768 }, { "entropy": 1.6899834473927815, "epoch": 1.9520199939578697, "grad_norm": 0.7009279131889343, "learning_rate": 2.0273091706337467e-06, "loss": 1.2801, "mean_token_accuracy": 0.6724486152331034, "num_tokens": 2978430156.0, "step": 17769 }, { "entropy": 1.7222119470437367, "epoch": 1.9521298508692428, "grad_norm": 0.6706877946853638, "learning_rate": 2.0271846763178895e-06, "loss": 1.3487, "mean_token_accuracy": 0.6513159523407618, "num_tokens": 2978603591.0, "step": 17770 }, { "entropy": 1.711435745159785, "epoch": 1.9522397077806157, "grad_norm": 0.6107102632522583, "learning_rate": 2.0270604659875943e-06, "loss": 1.374, "mean_token_accuracy": 0.6598343700170517, "num_tokens": 2978796689.0, "step": 17771 }, { "entropy": 1.771813799937566, "epoch": 1.9523495646919886, "grad_norm": 0.6782006621360779, "learning_rate": 2.026936539646792e-06, "loss": 1.5024, "mean_token_accuracy": 0.6387060980002085, "num_tokens": 2978999550.0, "step": 17772 }, { "entropy": 1.713943600654602, "epoch": 1.9524594216033617, "grad_norm": 0.8408421277999878, "learning_rate": 2.0268128972994044e-06, "loss": 1.401, "mean_token_accuracy": 0.6681250631809235, "num_tokens": 2979123400.0, "step": 17773 }, { "entropy": 1.7296073734760284, "epoch": 1.9525692785147346, "grad_norm": 0.8583366274833679, "learning_rate": 2.0266895389493456e-06, "loss": 1.4684, "mean_token_accuracy": 0.6323349376519521, "num_tokens": 2979299769.0, "step": 17774 }, { "entropy": 1.7292738854885101, "epoch": 1.9526791354261075, "grad_norm": 0.6199796199798584, "learning_rate": 2.0265664646005194e-06, "loss": 1.4296, "mean_token_accuracy": 0.6407396892706553, "num_tokens": 2979543584.0, "step": 17775 }, { "entropy": 1.6723608275254567, "epoch": 1.9527889923374804, "grad_norm": 0.6568523049354553, "learning_rate": 2.0264436742568204e-06, "loss": 1.4326, "mean_token_accuracy": 0.6430952151616415, "num_tokens": 2979713882.0, "step": 17776 }, { "entropy": 1.6405765612920125, "epoch": 1.9528988492488533, "grad_norm": 0.7078863382339478, "learning_rate": 2.0263211679221358e-06, "loss": 1.4032, "mean_token_accuracy": 0.6723550657431284, "num_tokens": 2979909442.0, "step": 17777 }, { "entropy": 1.6554110149542491, "epoch": 1.9530087061602264, "grad_norm": 0.6638414859771729, "learning_rate": 2.0261989456003436e-06, "loss": 1.2885, "mean_token_accuracy": 0.6878614326318105, "num_tokens": 2980088903.0, "step": 17778 }, { "entropy": 1.7123227616151173, "epoch": 1.9531185630715993, "grad_norm": 0.5971532464027405, "learning_rate": 2.02607700729531e-06, "loss": 1.4228, "mean_token_accuracy": 0.6444319188594818, "num_tokens": 2980281354.0, "step": 17779 }, { "entropy": 1.7631987730662029, "epoch": 1.9532284199829721, "grad_norm": 0.5625388622283936, "learning_rate": 2.025955353010896e-06, "loss": 1.4602, "mean_token_accuracy": 0.6468930691480637, "num_tokens": 2980500165.0, "step": 17780 }, { "entropy": 1.7447159190972645, "epoch": 1.9533382768943452, "grad_norm": 0.6851442456245422, "learning_rate": 2.0258339827509506e-06, "loss": 1.4913, "mean_token_accuracy": 0.6319667845964432, "num_tokens": 2980713289.0, "step": 17781 }, { "entropy": 1.7208144168059032, "epoch": 1.953448133805718, "grad_norm": 0.629928708076477, "learning_rate": 2.0257128965193165e-06, "loss": 1.4998, "mean_token_accuracy": 0.6411555955807368, "num_tokens": 2980902416.0, "step": 17782 }, { "entropy": 1.7709160546461742, "epoch": 1.953557990717091, "grad_norm": 0.7798748016357422, "learning_rate": 2.0255920943198244e-06, "loss": 1.3797, "mean_token_accuracy": 0.6637091686328253, "num_tokens": 2981033075.0, "step": 17783 }, { "entropy": 1.6994303961594899, "epoch": 1.953667847628464, "grad_norm": 0.7806686758995056, "learning_rate": 2.0254715761562998e-06, "loss": 1.5518, "mean_token_accuracy": 0.6446986546119055, "num_tokens": 2981192101.0, "step": 17784 }, { "entropy": 1.7072394092877705, "epoch": 1.9537777045398368, "grad_norm": 0.6744371056556702, "learning_rate": 2.0253513420325545e-06, "loss": 1.4875, "mean_token_accuracy": 0.6371971815824509, "num_tokens": 2981426393.0, "step": 17785 }, { "entropy": 1.6127947370211284, "epoch": 1.95388756145121, "grad_norm": 0.7404419779777527, "learning_rate": 2.025231391952396e-06, "loss": 1.346, "mean_token_accuracy": 0.6637389014164606, "num_tokens": 2981579481.0, "step": 17786 }, { "entropy": 1.7959074278672535, "epoch": 1.9539974183625828, "grad_norm": 0.7481608986854553, "learning_rate": 2.0251117259196202e-06, "loss": 1.3442, "mean_token_accuracy": 0.6640120794375738, "num_tokens": 2981726089.0, "step": 17787 }, { "entropy": 1.713577965895335, "epoch": 1.9541072752739557, "grad_norm": 0.835660994052887, "learning_rate": 2.0249923439380127e-06, "loss": 1.3407, "mean_token_accuracy": 0.6656025052070618, "num_tokens": 2981849837.0, "step": 17788 }, { "entropy": 1.696219692627589, "epoch": 1.9542171321853288, "grad_norm": 0.7188278436660767, "learning_rate": 2.024873246011354e-06, "loss": 1.2623, "mean_token_accuracy": 0.6720642745494843, "num_tokens": 2982006826.0, "step": 17789 }, { "entropy": 1.7328607738018036, "epoch": 1.9543269890967014, "grad_norm": 0.7431653738021851, "learning_rate": 2.0247544321434136e-06, "loss": 1.4974, "mean_token_accuracy": 0.6494582444429398, "num_tokens": 2982194388.0, "step": 17790 }, { "entropy": 1.713299572467804, "epoch": 1.9544368460080745, "grad_norm": 0.6786489486694336, "learning_rate": 2.02463590233795e-06, "loss": 1.3613, "mean_token_accuracy": 0.6588152448336283, "num_tokens": 2982336529.0, "step": 17791 }, { "entropy": 1.721155156691869, "epoch": 1.9545467029194474, "grad_norm": 0.6759476661682129, "learning_rate": 2.024517656598716e-06, "loss": 1.5028, "mean_token_accuracy": 0.6534711370865504, "num_tokens": 2982487665.0, "step": 17792 }, { "entropy": 1.7625728845596313, "epoch": 1.9546565598308203, "grad_norm": 0.683269739151001, "learning_rate": 2.0243996949294543e-06, "loss": 1.4255, "mean_token_accuracy": 0.6441936790943146, "num_tokens": 2982725011.0, "step": 17793 }, { "entropy": 1.7418459355831146, "epoch": 1.9547664167421934, "grad_norm": 0.7607301473617554, "learning_rate": 2.0242820173338963e-06, "loss": 1.3563, "mean_token_accuracy": 0.6651049753030142, "num_tokens": 2982847742.0, "step": 17794 }, { "entropy": 1.7185759842395782, "epoch": 1.954876273653566, "grad_norm": 0.6495237350463867, "learning_rate": 2.024164623815769e-06, "loss": 1.3563, "mean_token_accuracy": 0.6601082533597946, "num_tokens": 2982984398.0, "step": 17795 }, { "entropy": 1.6946265896161397, "epoch": 1.9549861305649392, "grad_norm": 0.8139024972915649, "learning_rate": 2.024047514378787e-06, "loss": 1.5517, "mean_token_accuracy": 0.6357475270827612, "num_tokens": 2983145945.0, "step": 17796 }, { "entropy": 1.6650099456310272, "epoch": 1.955095987476312, "grad_norm": 0.5920116305351257, "learning_rate": 2.0239306890266558e-06, "loss": 1.3403, "mean_token_accuracy": 0.6662998845179876, "num_tokens": 2983303994.0, "step": 17797 }, { "entropy": 1.6227064232031505, "epoch": 1.955205844387685, "grad_norm": 0.6600332260131836, "learning_rate": 2.0238141477630744e-06, "loss": 1.4115, "mean_token_accuracy": 0.6637008314331373, "num_tokens": 2983460489.0, "step": 17798 }, { "entropy": 1.6318263411521912, "epoch": 1.955315701299058, "grad_norm": 0.7498336434364319, "learning_rate": 2.0236978905917296e-06, "loss": 1.3756, "mean_token_accuracy": 0.6501528173685074, "num_tokens": 2983644135.0, "step": 17799 }, { "entropy": 1.7046960294246674, "epoch": 1.955425558210431, "grad_norm": 0.7306966781616211, "learning_rate": 2.0235819175163017e-06, "loss": 1.3088, "mean_token_accuracy": 0.6869508425394694, "num_tokens": 2983803611.0, "step": 17800 }, { "entropy": 1.7258172035217285, "epoch": 1.9555354151218038, "grad_norm": 0.7059761881828308, "learning_rate": 2.0234662285404617e-06, "loss": 1.3583, "mean_token_accuracy": 0.6637235432863235, "num_tokens": 2983979414.0, "step": 17801 }, { "entropy": 1.769530753294627, "epoch": 1.955645272033177, "grad_norm": 0.6218457818031311, "learning_rate": 2.0233508236678702e-06, "loss": 1.4673, "mean_token_accuracy": 0.6336728036403656, "num_tokens": 2984189537.0, "step": 17802 }, { "entropy": 1.6664655307928722, "epoch": 1.9557551289445496, "grad_norm": 0.7295881509780884, "learning_rate": 2.023235702902181e-06, "loss": 1.5348, "mean_token_accuracy": 0.6525272379318873, "num_tokens": 2984350464.0, "step": 17803 }, { "entropy": 1.7370224396387737, "epoch": 1.9558649858559227, "grad_norm": 0.5997971892356873, "learning_rate": 2.0231208662470357e-06, "loss": 1.4465, "mean_token_accuracy": 0.6436833242575327, "num_tokens": 2984529796.0, "step": 17804 }, { "entropy": 1.6230102678140004, "epoch": 1.9559748427672956, "grad_norm": 0.6606541275978088, "learning_rate": 2.023006313706071e-06, "loss": 1.315, "mean_token_accuracy": 0.6677302569150925, "num_tokens": 2984680008.0, "step": 17805 }, { "entropy": 1.7209392488002777, "epoch": 1.9560846996786685, "grad_norm": 0.6199936866760254, "learning_rate": 2.0228920452829103e-06, "loss": 1.352, "mean_token_accuracy": 0.6574979374806086, "num_tokens": 2984848335.0, "step": 17806 }, { "entropy": 1.692920833826065, "epoch": 1.9561945565900416, "grad_norm": 0.653578519821167, "learning_rate": 2.022778060981172e-06, "loss": 1.424, "mean_token_accuracy": 0.6595415671666464, "num_tokens": 2985045996.0, "step": 17807 }, { "entropy": 1.6915589074293773, "epoch": 1.9563044135014143, "grad_norm": 0.6038907766342163, "learning_rate": 2.0226643608044624e-06, "loss": 1.3544, "mean_token_accuracy": 0.6625373015801111, "num_tokens": 2985204883.0, "step": 17808 }, { "entropy": 1.691193660100301, "epoch": 1.9564142704127874, "grad_norm": 0.6831693053245544, "learning_rate": 2.022550944756381e-06, "loss": 1.5571, "mean_token_accuracy": 0.6408517857392629, "num_tokens": 2985381113.0, "step": 17809 }, { "entropy": 1.7213842968146007, "epoch": 1.9565241273241603, "grad_norm": 0.7153857946395874, "learning_rate": 2.0224378128405157e-06, "loss": 1.4151, "mean_token_accuracy": 0.6612313389778137, "num_tokens": 2985523825.0, "step": 17810 }, { "entropy": 1.7010469138622284, "epoch": 1.9566339842355331, "grad_norm": 0.6271878480911255, "learning_rate": 2.0223249650604493e-06, "loss": 1.4303, "mean_token_accuracy": 0.645938828587532, "num_tokens": 2985672895.0, "step": 17811 }, { "entropy": 1.6880601942539215, "epoch": 1.9567438411469062, "grad_norm": 0.8414680361747742, "learning_rate": 2.022212401419752e-06, "loss": 1.5198, "mean_token_accuracy": 0.6524382879336675, "num_tokens": 2985824112.0, "step": 17812 }, { "entropy": 1.697400947411855, "epoch": 1.9568536980582791, "grad_norm": 0.7417037487030029, "learning_rate": 2.0221001219219877e-06, "loss": 1.427, "mean_token_accuracy": 0.6512279262145361, "num_tokens": 2985971235.0, "step": 17813 }, { "entropy": 1.6493543187777202, "epoch": 1.956963554969652, "grad_norm": 0.6581324934959412, "learning_rate": 2.0219881265707077e-06, "loss": 1.3354, "mean_token_accuracy": 0.6625111848115921, "num_tokens": 2986149318.0, "step": 17814 }, { "entropy": 1.7380223472913106, "epoch": 1.9570734118810251, "grad_norm": 0.60667884349823, "learning_rate": 2.0218764153694586e-06, "loss": 1.4921, "mean_token_accuracy": 0.6427192886670431, "num_tokens": 2986347470.0, "step": 17815 }, { "entropy": 1.7578493853410084, "epoch": 1.9571832687923978, "grad_norm": 0.6818580031394958, "learning_rate": 2.0217649883217746e-06, "loss": 1.4195, "mean_token_accuracy": 0.64846271276474, "num_tokens": 2986555340.0, "step": 17816 }, { "entropy": 1.6980695327123005, "epoch": 1.957293125703771, "grad_norm": 0.6845924258232117, "learning_rate": 2.0216538454311836e-06, "loss": 1.3064, "mean_token_accuracy": 0.6753988613684972, "num_tokens": 2986682716.0, "step": 17817 }, { "entropy": 1.711136023203532, "epoch": 1.9574029826151438, "grad_norm": 0.6631032824516296, "learning_rate": 2.0215429867012017e-06, "loss": 1.3439, "mean_token_accuracy": 0.6630576600631078, "num_tokens": 2986821237.0, "step": 17818 }, { "entropy": 1.7483221391836803, "epoch": 1.9575128395265167, "grad_norm": 0.6929804086685181, "learning_rate": 2.0214324121353403e-06, "loss": 1.5989, "mean_token_accuracy": 0.6324756195147833, "num_tokens": 2986991133.0, "step": 17819 }, { "entropy": 1.7035737733046215, "epoch": 1.9576226964378898, "grad_norm": 0.6949917078018188, "learning_rate": 2.021322121737095e-06, "loss": 1.3302, "mean_token_accuracy": 0.6612950712442398, "num_tokens": 2987114817.0, "step": 17820 }, { "entropy": 1.7653352518876393, "epoch": 1.9577325533492624, "grad_norm": 0.7462692260742188, "learning_rate": 2.0212121155099607e-06, "loss": 1.337, "mean_token_accuracy": 0.6608709494272867, "num_tokens": 2987231442.0, "step": 17821 }, { "entropy": 1.7688710192839305, "epoch": 1.9578424102606355, "grad_norm": 0.651204526424408, "learning_rate": 2.0211023934574157e-06, "loss": 1.5705, "mean_token_accuracy": 0.6285947610934576, "num_tokens": 2987437378.0, "step": 17822 }, { "entropy": 1.7017524043718975, "epoch": 1.9579522671720084, "grad_norm": 0.9739505052566528, "learning_rate": 2.0209929555829346e-06, "loss": 1.3126, "mean_token_accuracy": 0.6632164816061655, "num_tokens": 2987588804.0, "step": 17823 }, { "entropy": 1.6945142149925232, "epoch": 1.9580621240833813, "grad_norm": 0.6685092449188232, "learning_rate": 2.02088380188998e-06, "loss": 1.259, "mean_token_accuracy": 0.6727628062168757, "num_tokens": 2987716913.0, "step": 17824 }, { "entropy": 1.674852301677068, "epoch": 1.9581719809947544, "grad_norm": 0.6597932577133179, "learning_rate": 2.020774932382007e-06, "loss": 1.4541, "mean_token_accuracy": 0.6472754130760828, "num_tokens": 2987941322.0, "step": 17825 }, { "entropy": 1.6793397963047028, "epoch": 1.9582818379061273, "grad_norm": 0.5924980640411377, "learning_rate": 2.0206663470624615e-06, "loss": 1.2984, "mean_token_accuracy": 0.678433025876681, "num_tokens": 2988134589.0, "step": 17826 }, { "entropy": 1.7250191171964009, "epoch": 1.9583916948175002, "grad_norm": 0.723518967628479, "learning_rate": 2.0205580459347796e-06, "loss": 1.4791, "mean_token_accuracy": 0.6549117714166641, "num_tokens": 2988285766.0, "step": 17827 }, { "entropy": 1.6623546183109283, "epoch": 1.9585015517288733, "grad_norm": 1.5038546323776245, "learning_rate": 2.0204500290023898e-06, "loss": 1.2749, "mean_token_accuracy": 0.6662939141194025, "num_tokens": 2988537104.0, "step": 17828 }, { "entropy": 1.6771051188309987, "epoch": 1.958611408640246, "grad_norm": 0.6059991717338562, "learning_rate": 2.0203422962687107e-06, "loss": 1.474, "mean_token_accuracy": 0.650149792432785, "num_tokens": 2988725414.0, "step": 17829 }, { "entropy": 1.757999986410141, "epoch": 1.958721265551619, "grad_norm": 0.7955412268638611, "learning_rate": 2.0202348477371504e-06, "loss": 1.4515, "mean_token_accuracy": 0.659949521223704, "num_tokens": 2988875269.0, "step": 17830 }, { "entropy": 1.7157710095246632, "epoch": 1.958831122462992, "grad_norm": 0.6813088059425354, "learning_rate": 2.0201276834111118e-06, "loss": 1.3589, "mean_token_accuracy": 0.6719192713499069, "num_tokens": 2989020852.0, "step": 17831 }, { "entropy": 1.7082193493843079, "epoch": 1.9589409793743648, "grad_norm": 0.7664187550544739, "learning_rate": 2.020020803293985e-06, "loss": 1.2593, "mean_token_accuracy": 0.6711249748865763, "num_tokens": 2989151390.0, "step": 17832 }, { "entropy": 1.770639955997467, "epoch": 1.959050836285738, "grad_norm": 0.6521178483963013, "learning_rate": 2.0199142073891527e-06, "loss": 1.3776, "mean_token_accuracy": 0.6535575886567434, "num_tokens": 2989329801.0, "step": 17833 }, { "entropy": 1.6498074233531952, "epoch": 1.9591606931971106, "grad_norm": 0.7866834998130798, "learning_rate": 2.019807895699991e-06, "loss": 1.4535, "mean_token_accuracy": 0.6591331660747528, "num_tokens": 2989475469.0, "step": 17834 }, { "entropy": 1.6990808149178822, "epoch": 1.9592705501084837, "grad_norm": 0.656196653842926, "learning_rate": 2.0197018682298614e-06, "loss": 1.551, "mean_token_accuracy": 0.6299006740252177, "num_tokens": 2989681045.0, "step": 17835 }, { "entropy": 1.6446273624897003, "epoch": 1.9593804070198566, "grad_norm": 0.6230295300483704, "learning_rate": 2.019596124982121e-06, "loss": 1.4115, "mean_token_accuracy": 0.6549782206614813, "num_tokens": 2989876665.0, "step": 17836 }, { "entropy": 1.6909184356530507, "epoch": 1.9594902639312295, "grad_norm": 0.674351692199707, "learning_rate": 2.0194906659601184e-06, "loss": 1.4357, "mean_token_accuracy": 0.6496634483337402, "num_tokens": 2990066264.0, "step": 17837 }, { "entropy": 1.762929618358612, "epoch": 1.9596001208426026, "grad_norm": 0.8096222281455994, "learning_rate": 2.0193854911671875e-06, "loss": 1.432, "mean_token_accuracy": 0.64181949198246, "num_tokens": 2990181928.0, "step": 17838 }, { "entropy": 1.728385289510091, "epoch": 1.9597099777539755, "grad_norm": 0.81138676404953, "learning_rate": 2.0192806006066588e-06, "loss": 1.3549, "mean_token_accuracy": 0.6485754102468491, "num_tokens": 2990336143.0, "step": 17839 }, { "entropy": 1.7208243906497955, "epoch": 1.9598198346653484, "grad_norm": 0.6620015501976013, "learning_rate": 2.019175994281854e-06, "loss": 1.4155, "mean_token_accuracy": 0.662138968706131, "num_tokens": 2990511656.0, "step": 17840 }, { "entropy": 1.6964669227600098, "epoch": 1.9599296915767215, "grad_norm": 0.614718496799469, "learning_rate": 2.019071672196081e-06, "loss": 1.4276, "mean_token_accuracy": 0.6447423497835795, "num_tokens": 2990694853.0, "step": 17841 }, { "entropy": 1.708295355240504, "epoch": 1.9600395484880941, "grad_norm": 0.6118777990341187, "learning_rate": 2.0189676343526424e-06, "loss": 1.4314, "mean_token_accuracy": 0.6555359264214834, "num_tokens": 2990875634.0, "step": 17842 }, { "entropy": 1.7391878565152485, "epoch": 1.9601494053994672, "grad_norm": 0.6601645946502686, "learning_rate": 2.0188638807548327e-06, "loss": 1.4738, "mean_token_accuracy": 0.6468467364708582, "num_tokens": 2991041250.0, "step": 17843 }, { "entropy": 1.734427313009898, "epoch": 1.9602592623108401, "grad_norm": 0.7099672555923462, "learning_rate": 2.0187604114059326e-06, "loss": 1.4544, "mean_token_accuracy": 0.648030087351799, "num_tokens": 2991231154.0, "step": 17844 }, { "entropy": 1.6761779586474101, "epoch": 1.960369119222213, "grad_norm": 0.8135700225830078, "learning_rate": 2.01865722630922e-06, "loss": 1.493, "mean_token_accuracy": 0.6484993646542231, "num_tokens": 2991397922.0, "step": 17845 }, { "entropy": 1.6813652515411377, "epoch": 1.9604789761335861, "grad_norm": 0.616743266582489, "learning_rate": 2.0185543254679576e-06, "loss": 1.4241, "mean_token_accuracy": 0.649142454067866, "num_tokens": 2991592641.0, "step": 17846 }, { "entropy": 1.7229444285233815, "epoch": 1.9605888330449588, "grad_norm": 0.7352148294448853, "learning_rate": 2.0184517088854044e-06, "loss": 1.4267, "mean_token_accuracy": 0.6816410024960836, "num_tokens": 2991728524.0, "step": 17847 }, { "entropy": 1.737259527047475, "epoch": 1.960698689956332, "grad_norm": 0.7628870606422424, "learning_rate": 2.0183493765648073e-06, "loss": 1.3125, "mean_token_accuracy": 0.6667766869068146, "num_tokens": 2991859087.0, "step": 17848 }, { "entropy": 1.7120140492916107, "epoch": 1.9608085468677048, "grad_norm": 0.8102467060089111, "learning_rate": 2.018247328509405e-06, "loss": 1.4574, "mean_token_accuracy": 0.6617083897193273, "num_tokens": 2992026244.0, "step": 17849 }, { "entropy": 1.7056652307510376, "epoch": 1.9609184037790777, "grad_norm": 0.6154736280441284, "learning_rate": 2.018145564722428e-06, "loss": 1.3339, "mean_token_accuracy": 0.6581693887710571, "num_tokens": 2992195285.0, "step": 17850 }, { "entropy": 1.7515573799610138, "epoch": 1.9610282606904508, "grad_norm": 0.662531852722168, "learning_rate": 2.0180440852070963e-06, "loss": 1.4603, "mean_token_accuracy": 0.6410819639762243, "num_tokens": 2992437084.0, "step": 17851 }, { "entropy": 1.7056761781374614, "epoch": 1.9611381176018237, "grad_norm": 0.6547462344169617, "learning_rate": 2.017942889966621e-06, "loss": 1.2562, "mean_token_accuracy": 0.676432599623998, "num_tokens": 2992561421.0, "step": 17852 }, { "entropy": 1.7316773037115734, "epoch": 1.9612479745131965, "grad_norm": 0.6276424527168274, "learning_rate": 2.0178419790042067e-06, "loss": 1.3315, "mean_token_accuracy": 0.6649932016928991, "num_tokens": 2992734585.0, "step": 17853 }, { "entropy": 1.6133818924427032, "epoch": 1.9613578314245697, "grad_norm": 0.6731429100036621, "learning_rate": 2.017741352323046e-06, "loss": 1.1471, "mean_token_accuracy": 0.7009394268194834, "num_tokens": 2992844477.0, "step": 17854 }, { "entropy": 1.6940800249576569, "epoch": 1.9614676883359423, "grad_norm": 0.7534624934196472, "learning_rate": 2.0176410099263245e-06, "loss": 1.1898, "mean_token_accuracy": 0.6858446151018143, "num_tokens": 2992951432.0, "step": 17855 }, { "entropy": 1.7091482083002727, "epoch": 1.9615775452473154, "grad_norm": 0.6614341735839844, "learning_rate": 2.017540951817217e-06, "loss": 1.4209, "mean_token_accuracy": 0.6604591459035873, "num_tokens": 2993157963.0, "step": 17856 }, { "entropy": 1.743918001651764, "epoch": 1.9616874021586883, "grad_norm": 0.8729313015937805, "learning_rate": 2.017441177998892e-06, "loss": 1.5282, "mean_token_accuracy": 0.6517240107059479, "num_tokens": 2993348516.0, "step": 17857 }, { "entropy": 1.6870131293932598, "epoch": 1.9617972590700612, "grad_norm": 1.8149455785751343, "learning_rate": 2.017341688474505e-06, "loss": 1.0478, "mean_token_accuracy": 0.6817310601472855, "num_tokens": 2993514126.0, "step": 17858 }, { "entropy": 1.7509052058060963, "epoch": 1.9619071159814343, "grad_norm": 0.7601586580276489, "learning_rate": 2.017242483247206e-06, "loss": 1.4875, "mean_token_accuracy": 0.6382629126310349, "num_tokens": 2993744695.0, "step": 17859 }, { "entropy": 1.7065601646900177, "epoch": 1.962016972892807, "grad_norm": 0.5603963732719421, "learning_rate": 2.017143562320135e-06, "loss": 1.4044, "mean_token_accuracy": 0.6478400429089864, "num_tokens": 2993936539.0, "step": 17860 }, { "entropy": 1.6877718269824982, "epoch": 1.96212682980418, "grad_norm": 0.7344236969947815, "learning_rate": 2.0170449256964217e-06, "loss": 1.3698, "mean_token_accuracy": 0.6719866941372553, "num_tokens": 2994087478.0, "step": 17861 }, { "entropy": 1.6488263805707295, "epoch": 1.962236686715553, "grad_norm": 0.6478110551834106, "learning_rate": 2.0169465733791895e-06, "loss": 1.403, "mean_token_accuracy": 0.6502639551957449, "num_tokens": 2994304002.0, "step": 17862 }, { "entropy": 1.7499266862869263, "epoch": 1.9623465436269258, "grad_norm": 0.8414444327354431, "learning_rate": 2.0168485053715497e-06, "loss": 1.4985, "mean_token_accuracy": 0.6633632381757101, "num_tokens": 2994442860.0, "step": 17863 }, { "entropy": 1.6683284640312195, "epoch": 1.962456400538299, "grad_norm": 0.8371381759643555, "learning_rate": 2.0167507216766076e-06, "loss": 1.2807, "mean_token_accuracy": 0.6784952729940414, "num_tokens": 2994570920.0, "step": 17864 }, { "entropy": 1.7751424014568329, "epoch": 1.9625662574496718, "grad_norm": 0.6342650651931763, "learning_rate": 2.0166532222974564e-06, "loss": 1.464, "mean_token_accuracy": 0.6453549315532049, "num_tokens": 2994786840.0, "step": 17865 }, { "entropy": 1.7355596522490184, "epoch": 1.9626761143610447, "grad_norm": 0.776604175567627, "learning_rate": 2.0165560072371824e-06, "loss": 1.4411, "mean_token_accuracy": 0.6572767297426859, "num_tokens": 2994965451.0, "step": 17866 }, { "entropy": 1.6895995835463207, "epoch": 1.9627859712724178, "grad_norm": 0.7442336678504944, "learning_rate": 2.0164590764988637e-06, "loss": 1.4932, "mean_token_accuracy": 0.6494418730338415, "num_tokens": 2995177256.0, "step": 17867 }, { "entropy": 1.7676254113515217, "epoch": 1.9628958281837905, "grad_norm": 0.6468122005462646, "learning_rate": 2.016362430085567e-06, "loss": 1.5374, "mean_token_accuracy": 0.6308100124200186, "num_tokens": 2995357688.0, "step": 17868 }, { "entropy": 1.7291751305262248, "epoch": 1.9630056850951636, "grad_norm": 0.7243047952651978, "learning_rate": 2.016266068000351e-06, "loss": 1.4871, "mean_token_accuracy": 0.6339818388223648, "num_tokens": 2995565257.0, "step": 17869 }, { "entropy": 1.7408881783485413, "epoch": 1.9631155420065365, "grad_norm": 0.7614642977714539, "learning_rate": 2.0161699902462664e-06, "loss": 1.5102, "mean_token_accuracy": 0.6478269298871359, "num_tokens": 2995720018.0, "step": 17870 }, { "entropy": 1.6972165405750275, "epoch": 1.9632253989179094, "grad_norm": 0.7591689229011536, "learning_rate": 2.016074196826353e-06, "loss": 1.3704, "mean_token_accuracy": 0.6604073345661163, "num_tokens": 2995871780.0, "step": 17871 }, { "entropy": 1.6704501807689667, "epoch": 1.9633352558292825, "grad_norm": 0.665471076965332, "learning_rate": 2.0159786877436425e-06, "loss": 1.3698, "mean_token_accuracy": 0.6625976065794627, "num_tokens": 2996074778.0, "step": 17872 }, { "entropy": 1.6610515713691711, "epoch": 1.9634451127406551, "grad_norm": 0.6701338887214661, "learning_rate": 2.015883463001159e-06, "loss": 1.3937, "mean_token_accuracy": 0.6479357580343882, "num_tokens": 2996245890.0, "step": 17873 }, { "entropy": 1.6841512620449066, "epoch": 1.9635549696520282, "grad_norm": 0.7010718584060669, "learning_rate": 2.015788522601915e-06, "loss": 1.368, "mean_token_accuracy": 0.6470450113217036, "num_tokens": 2996414751.0, "step": 17874 }, { "entropy": 1.678158462047577, "epoch": 1.9636648265634011, "grad_norm": 0.8353567123413086, "learning_rate": 2.0156938665489163e-06, "loss": 1.3558, "mean_token_accuracy": 0.6673894474903742, "num_tokens": 2996608436.0, "step": 17875 }, { "entropy": 1.6845565140247345, "epoch": 1.963774683474774, "grad_norm": 0.5820711851119995, "learning_rate": 2.0155994948451575e-06, "loss": 1.3492, "mean_token_accuracy": 0.6662445664405823, "num_tokens": 2996777414.0, "step": 17876 }, { "entropy": 1.692328284184138, "epoch": 1.9638845403861471, "grad_norm": 0.7429178357124329, "learning_rate": 2.015505407493627e-06, "loss": 1.3524, "mean_token_accuracy": 0.6588386446237564, "num_tokens": 2996917973.0, "step": 17877 }, { "entropy": 1.7147633930047352, "epoch": 1.96399439729752, "grad_norm": 0.6130802035331726, "learning_rate": 2.0154116044973023e-06, "loss": 1.3864, "mean_token_accuracy": 0.6510560760895411, "num_tokens": 2997075428.0, "step": 17878 }, { "entropy": 1.674227237701416, "epoch": 1.964104254208893, "grad_norm": 0.6673222184181213, "learning_rate": 2.015318085859151e-06, "loss": 1.2579, "mean_token_accuracy": 0.6707002917925516, "num_tokens": 2997213096.0, "step": 17879 }, { "entropy": 1.669933428366979, "epoch": 1.964214111120266, "grad_norm": 0.781856894493103, "learning_rate": 2.0152248515821334e-06, "loss": 1.3723, "mean_token_accuracy": 0.6608141760031382, "num_tokens": 2997395284.0, "step": 17880 }, { "entropy": 1.7202060023943584, "epoch": 1.9643239680316387, "grad_norm": 1.0455927848815918, "learning_rate": 2.0151319016692005e-06, "loss": 1.5199, "mean_token_accuracy": 0.6652625252803167, "num_tokens": 2997573918.0, "step": 17881 }, { "entropy": 1.700150231520335, "epoch": 1.9644338249430118, "grad_norm": 0.718952476978302, "learning_rate": 2.015039236123295e-06, "loss": 1.3763, "mean_token_accuracy": 0.649369607369105, "num_tokens": 2997731551.0, "step": 17882 }, { "entropy": 1.7306404809157054, "epoch": 1.9645436818543847, "grad_norm": 0.6521731615066528, "learning_rate": 2.014946854947349e-06, "loss": 1.3775, "mean_token_accuracy": 0.6443797498941422, "num_tokens": 2997883899.0, "step": 17883 }, { "entropy": 1.767806778351466, "epoch": 1.9646535387657575, "grad_norm": 0.9187655448913574, "learning_rate": 2.014854758144286e-06, "loss": 1.6006, "mean_token_accuracy": 0.6336929003397623, "num_tokens": 2998040914.0, "step": 17884 }, { "entropy": 1.73748313387235, "epoch": 1.9647633956771307, "grad_norm": 0.7256967425346375, "learning_rate": 2.0147629457170213e-06, "loss": 1.2618, "mean_token_accuracy": 0.6701177606980006, "num_tokens": 2998217430.0, "step": 17885 }, { "entropy": 1.6992026666800182, "epoch": 1.9648732525885033, "grad_norm": 0.6321195363998413, "learning_rate": 2.01467141766846e-06, "loss": 1.3484, "mean_token_accuracy": 0.6581118901570638, "num_tokens": 2998385433.0, "step": 17886 }, { "entropy": 1.7310790121555328, "epoch": 1.9649831094998764, "grad_norm": 0.7075302600860596, "learning_rate": 2.0145801740015e-06, "loss": 1.5002, "mean_token_accuracy": 0.6584192862113317, "num_tokens": 2998560209.0, "step": 17887 }, { "entropy": 1.6907731493314107, "epoch": 1.9650929664112493, "grad_norm": 0.7162296772003174, "learning_rate": 2.014489214719028e-06, "loss": 1.2948, "mean_token_accuracy": 0.6692901601394018, "num_tokens": 2998710324.0, "step": 17888 }, { "entropy": 1.7298618853092194, "epoch": 1.9652028233226222, "grad_norm": 0.6216132044792175, "learning_rate": 2.0143985398239234e-06, "loss": 1.4547, "mean_token_accuracy": 0.6533665706713995, "num_tokens": 2998882685.0, "step": 17889 }, { "entropy": 1.6897225081920624, "epoch": 1.9653126802339953, "grad_norm": 0.6112697720527649, "learning_rate": 2.0143081493190567e-06, "loss": 1.4371, "mean_token_accuracy": 0.6600435972213745, "num_tokens": 2999088643.0, "step": 17890 }, { "entropy": 1.688838044802348, "epoch": 1.9654225371453682, "grad_norm": 0.7275003790855408, "learning_rate": 2.0142180432072876e-06, "loss": 1.4302, "mean_token_accuracy": 0.6603851070006689, "num_tokens": 2999265814.0, "step": 17891 }, { "entropy": 1.6855897307395935, "epoch": 1.965532394056741, "grad_norm": 0.6274251937866211, "learning_rate": 2.0141282214914685e-06, "loss": 1.2999, "mean_token_accuracy": 0.6625367701053619, "num_tokens": 2999421528.0, "step": 17892 }, { "entropy": 1.6572815577189128, "epoch": 1.9656422509681142, "grad_norm": 0.766179084777832, "learning_rate": 2.014038684174442e-06, "loss": 1.3068, "mean_token_accuracy": 0.6744897613922755, "num_tokens": 2999571936.0, "step": 17893 }, { "entropy": 1.7280305624008179, "epoch": 1.9657521078794868, "grad_norm": 0.7325267195701599, "learning_rate": 2.0139494312590415e-06, "loss": 1.4753, "mean_token_accuracy": 0.6507488141457239, "num_tokens": 2999749565.0, "step": 17894 }, { "entropy": 1.6773878633975983, "epoch": 1.96586196479086, "grad_norm": 0.6621171236038208, "learning_rate": 2.013860462748093e-06, "loss": 1.4302, "mean_token_accuracy": 0.6595263083775839, "num_tokens": 2999909764.0, "step": 17895 }, { "entropy": 1.6768188774585724, "epoch": 1.9659718217022328, "grad_norm": 0.6506795883178711, "learning_rate": 2.0137717786444112e-06, "loss": 1.4475, "mean_token_accuracy": 0.6549390902121862, "num_tokens": 3000064739.0, "step": 17896 }, { "entropy": 1.7129511932531993, "epoch": 1.9660816786136057, "grad_norm": 0.643744707107544, "learning_rate": 2.0136833789508033e-06, "loss": 1.48, "mean_token_accuracy": 0.6619319965442022, "num_tokens": 3000241947.0, "step": 17897 }, { "entropy": 1.700930525859197, "epoch": 1.9661915355249788, "grad_norm": 0.625928521156311, "learning_rate": 2.0135952636700674e-06, "loss": 1.2884, "mean_token_accuracy": 0.6703586975733439, "num_tokens": 3000384757.0, "step": 17898 }, { "entropy": 1.697757363319397, "epoch": 1.9663013924363515, "grad_norm": 0.6434378623962402, "learning_rate": 2.0135074328049923e-06, "loss": 1.3812, "mean_token_accuracy": 0.6578892767429352, "num_tokens": 3000562786.0, "step": 17899 }, { "entropy": 1.6748135387897491, "epoch": 1.9664112493477246, "grad_norm": 0.6851823329925537, "learning_rate": 2.0134198863583563e-06, "loss": 1.4003, "mean_token_accuracy": 0.6644940574963888, "num_tokens": 3000707563.0, "step": 17900 }, { "entropy": 1.7217712998390198, "epoch": 1.9665211062590975, "grad_norm": 0.6717512011528015, "learning_rate": 2.0133326243329327e-06, "loss": 1.2687, "mean_token_accuracy": 0.6760751704374949, "num_tokens": 3000863358.0, "step": 17901 }, { "entropy": 1.6838585337003071, "epoch": 1.9666309631704704, "grad_norm": 0.7156099677085876, "learning_rate": 2.0132456467314814e-06, "loss": 1.4277, "mean_token_accuracy": 0.649495929479599, "num_tokens": 3001021623.0, "step": 17902 }, { "entropy": 1.6566158632437389, "epoch": 1.9667408200818435, "grad_norm": 0.6351657509803772, "learning_rate": 2.0131589535567566e-06, "loss": 1.4425, "mean_token_accuracy": 0.6564101775487264, "num_tokens": 3001189910.0, "step": 17903 }, { "entropy": 1.6897284885247548, "epoch": 1.9668506769932164, "grad_norm": 0.6042178273200989, "learning_rate": 2.0130725448115005e-06, "loss": 1.3028, "mean_token_accuracy": 0.6670292864243189, "num_tokens": 3001334695.0, "step": 17904 }, { "entropy": 1.6312000652154286, "epoch": 1.9669605339045892, "grad_norm": 0.59149169921875, "learning_rate": 2.012986420498449e-06, "loss": 1.3407, "mean_token_accuracy": 0.6634353597958883, "num_tokens": 3001513880.0, "step": 17905 }, { "entropy": 1.697037806113561, "epoch": 1.9670703908159624, "grad_norm": 0.6991804242134094, "learning_rate": 2.0129005806203278e-06, "loss": 1.3814, "mean_token_accuracy": 0.6621117989222208, "num_tokens": 3001658835.0, "step": 17906 }, { "entropy": 1.747629165649414, "epoch": 1.967180247727335, "grad_norm": 0.7504332065582275, "learning_rate": 2.0128150251798533e-06, "loss": 1.3286, "mean_token_accuracy": 0.6650111377239227, "num_tokens": 3001809013.0, "step": 17907 }, { "entropy": 1.7131927410761516, "epoch": 1.9672901046387081, "grad_norm": 0.7261272668838501, "learning_rate": 2.0127297541797336e-06, "loss": 1.3738, "mean_token_accuracy": 0.6725722004969915, "num_tokens": 3001948343.0, "step": 17908 }, { "entropy": 1.6718792418638866, "epoch": 1.967399961550081, "grad_norm": 0.6443445086479187, "learning_rate": 2.0126447676226678e-06, "loss": 1.3358, "mean_token_accuracy": 0.6656797925631205, "num_tokens": 3002112297.0, "step": 17909 }, { "entropy": 1.6951535542805989, "epoch": 1.967509818461454, "grad_norm": 0.7346131205558777, "learning_rate": 2.012560065511345e-06, "loss": 1.318, "mean_token_accuracy": 0.6694110383590063, "num_tokens": 3002236731.0, "step": 17910 }, { "entropy": 1.7302409609158833, "epoch": 1.967619675372827, "grad_norm": 0.625469982624054, "learning_rate": 2.012475647848446e-06, "loss": 1.3161, "mean_token_accuracy": 0.6690036505460739, "num_tokens": 3002373310.0, "step": 17911 }, { "entropy": 1.6581771274407704, "epoch": 1.9677295322841997, "grad_norm": 0.5975332260131836, "learning_rate": 2.0123915146366434e-06, "loss": 1.3223, "mean_token_accuracy": 0.6745662887891134, "num_tokens": 3002542926.0, "step": 17912 }, { "entropy": 1.6342376867930095, "epoch": 1.9678393891955728, "grad_norm": 0.6779927611351013, "learning_rate": 2.012307665878599e-06, "loss": 1.3618, "mean_token_accuracy": 0.6652511854966482, "num_tokens": 3002702125.0, "step": 17913 }, { "entropy": 1.714419464270274, "epoch": 1.9679492461069457, "grad_norm": 0.6036478877067566, "learning_rate": 2.0122241015769676e-06, "loss": 1.378, "mean_token_accuracy": 0.6464525610208511, "num_tokens": 3002865175.0, "step": 17914 }, { "entropy": 1.7365634739398956, "epoch": 1.9680591030183185, "grad_norm": 0.9775516390800476, "learning_rate": 2.0121408217343923e-06, "loss": 1.3371, "mean_token_accuracy": 0.6688741395870844, "num_tokens": 3002997739.0, "step": 17915 }, { "entropy": 1.7164349257946014, "epoch": 1.9681689599296917, "grad_norm": 0.6772252321243286, "learning_rate": 2.0120578263535116e-06, "loss": 1.4787, "mean_token_accuracy": 0.6568474372227987, "num_tokens": 3003173190.0, "step": 17916 }, { "entropy": 1.6874643166859944, "epoch": 1.9682788168410645, "grad_norm": 0.6911616325378418, "learning_rate": 2.01197511543695e-06, "loss": 1.4895, "mean_token_accuracy": 0.6461264938116074, "num_tokens": 3003386472.0, "step": 17917 }, { "entropy": 1.7422433296839397, "epoch": 1.9683886737524374, "grad_norm": 0.7325913906097412, "learning_rate": 2.011892688987325e-06, "loss": 1.5443, "mean_token_accuracy": 0.6517143944899241, "num_tokens": 3003544907.0, "step": 17918 }, { "entropy": 1.7027767598628998, "epoch": 1.9684985306638105, "grad_norm": 0.6650202870368958, "learning_rate": 2.011810547007247e-06, "loss": 1.3508, "mean_token_accuracy": 0.6672835250695547, "num_tokens": 3003683798.0, "step": 17919 }, { "entropy": 1.6662018199761708, "epoch": 1.9686083875751832, "grad_norm": 0.8393872976303101, "learning_rate": 2.0117286894993153e-06, "loss": 1.2116, "mean_token_accuracy": 0.684177945057551, "num_tokens": 3003813303.0, "step": 17920 }, { "entropy": 1.6738957564036052, "epoch": 1.9687182444865563, "grad_norm": 0.6645524501800537, "learning_rate": 2.01164711646612e-06, "loss": 1.4325, "mean_token_accuracy": 0.6557613164186478, "num_tokens": 3003993607.0, "step": 17921 }, { "entropy": 1.6993304590384166, "epoch": 1.9688281013979292, "grad_norm": 0.8092979788780212, "learning_rate": 2.0115658279102425e-06, "loss": 1.5485, "mean_token_accuracy": 0.632763127485911, "num_tokens": 3004208309.0, "step": 17922 }, { "entropy": 1.7931698858737946, "epoch": 1.968937958309302, "grad_norm": 0.6377851963043213, "learning_rate": 2.011484823834258e-06, "loss": 1.3844, "mean_token_accuracy": 0.6499971399704615, "num_tokens": 3004364405.0, "step": 17923 }, { "entropy": 1.7573831876118977, "epoch": 1.9690478152206752, "grad_norm": 0.729793131351471, "learning_rate": 2.0114041042407263e-06, "loss": 1.381, "mean_token_accuracy": 0.6636466036240259, "num_tokens": 3004561554.0, "step": 17924 }, { "entropy": 1.6667506992816925, "epoch": 1.9691576721320478, "grad_norm": 0.685034990310669, "learning_rate": 2.0113236691322057e-06, "loss": 1.5663, "mean_token_accuracy": 0.6583135426044464, "num_tokens": 3004756452.0, "step": 17925 }, { "entropy": 1.6636810302734375, "epoch": 1.969267529043421, "grad_norm": 0.660313606262207, "learning_rate": 2.0112435185112403e-06, "loss": 1.423, "mean_token_accuracy": 0.6492910335461298, "num_tokens": 3004946559.0, "step": 17926 }, { "entropy": 1.6946007212003071, "epoch": 1.9693773859547938, "grad_norm": 0.6375925540924072, "learning_rate": 2.0111636523803675e-06, "loss": 1.3594, "mean_token_accuracy": 0.6636908402045568, "num_tokens": 3005102722.0, "step": 17927 }, { "entropy": 1.6661100486914318, "epoch": 1.9694872428661667, "grad_norm": 0.6003373265266418, "learning_rate": 2.011084070742114e-06, "loss": 1.5099, "mean_token_accuracy": 0.657312293847402, "num_tokens": 3005269206.0, "step": 17928 }, { "entropy": 1.7043770054976146, "epoch": 1.9695970997775398, "grad_norm": 0.6125054359436035, "learning_rate": 2.0110047735989994e-06, "loss": 1.474, "mean_token_accuracy": 0.6454812387625376, "num_tokens": 3005453743.0, "step": 17929 }, { "entropy": 1.71139990290006, "epoch": 1.9697069566889127, "grad_norm": 0.6473939418792725, "learning_rate": 2.0109257609535333e-06, "loss": 1.4996, "mean_token_accuracy": 0.6278078705072403, "num_tokens": 3005689388.0, "step": 17930 }, { "entropy": 1.654092291990916, "epoch": 1.9698168136002856, "grad_norm": 0.6603448390960693, "learning_rate": 2.010847032808216e-06, "loss": 1.1989, "mean_token_accuracy": 0.6887961675723394, "num_tokens": 3005819944.0, "step": 17931 }, { "entropy": 1.6703099111715953, "epoch": 1.9699266705116587, "grad_norm": 0.7166707515716553, "learning_rate": 2.0107685891655396e-06, "loss": 1.3184, "mean_token_accuracy": 0.6670674930016199, "num_tokens": 3005962932.0, "step": 17932 }, { "entropy": 1.6925211747487385, "epoch": 1.9700365274230314, "grad_norm": 0.8018051981925964, "learning_rate": 2.0106904300279875e-06, "loss": 1.1981, "mean_token_accuracy": 0.6955769310394923, "num_tokens": 3006075267.0, "step": 17933 }, { "entropy": 1.6854363183180492, "epoch": 1.9701463843344045, "grad_norm": 0.6480568647384644, "learning_rate": 2.010612555398032e-06, "loss": 1.3139, "mean_token_accuracy": 0.6723464528719584, "num_tokens": 3006253222.0, "step": 17934 }, { "entropy": 1.6880821188290913, "epoch": 1.9702562412457774, "grad_norm": 0.7775602340698242, "learning_rate": 2.0105349652781383e-06, "loss": 1.2487, "mean_token_accuracy": 0.6716853429873785, "num_tokens": 3006374921.0, "step": 17935 }, { "entropy": 1.725311279296875, "epoch": 1.9703660981571502, "grad_norm": 0.8793063163757324, "learning_rate": 2.0104576596707627e-06, "loss": 1.451, "mean_token_accuracy": 0.6542079498370489, "num_tokens": 3006567762.0, "step": 17936 }, { "entropy": 1.6732623775800068, "epoch": 1.9704759550685234, "grad_norm": 0.6772112846374512, "learning_rate": 2.0103806385783504e-06, "loss": 1.2661, "mean_token_accuracy": 0.6756617873907089, "num_tokens": 3006709794.0, "step": 17937 }, { "entropy": 1.7369797627131145, "epoch": 1.9705858119798962, "grad_norm": 0.7336527109146118, "learning_rate": 2.0103039020033403e-06, "loss": 1.4932, "mean_token_accuracy": 0.6467774361371994, "num_tokens": 3006873792.0, "step": 17938 }, { "entropy": 1.671160767475764, "epoch": 1.9706956688912691, "grad_norm": 0.684969961643219, "learning_rate": 2.0102274499481617e-06, "loss": 1.3979, "mean_token_accuracy": 0.6610272874434789, "num_tokens": 3007044737.0, "step": 17939 }, { "entropy": 1.6652365227540333, "epoch": 1.970805525802642, "grad_norm": 0.6617401242256165, "learning_rate": 2.010151282415233e-06, "loss": 1.3237, "mean_token_accuracy": 0.6622043897708257, "num_tokens": 3007183324.0, "step": 17940 }, { "entropy": 1.6508016188939412, "epoch": 1.970915382714015, "grad_norm": 0.7206847667694092, "learning_rate": 2.010075399406965e-06, "loss": 1.4783, "mean_token_accuracy": 0.6530425846576691, "num_tokens": 3007373061.0, "step": 17941 }, { "entropy": 1.7173553705215454, "epoch": 1.971025239625388, "grad_norm": 0.741521954536438, "learning_rate": 2.00999980092576e-06, "loss": 1.4746, "mean_token_accuracy": 0.6368623872598013, "num_tokens": 3007552632.0, "step": 17942 }, { "entropy": 1.7114310661951702, "epoch": 1.9711350965367609, "grad_norm": 0.6318982243537903, "learning_rate": 2.0099244869740097e-06, "loss": 1.3902, "mean_token_accuracy": 0.6469387610753378, "num_tokens": 3007779921.0, "step": 17943 }, { "entropy": 1.7579089105129242, "epoch": 1.9712449534481338, "grad_norm": 0.7655471563339233, "learning_rate": 2.0098494575540984e-06, "loss": 1.3765, "mean_token_accuracy": 0.6498502790927887, "num_tokens": 3007926071.0, "step": 17944 }, { "entropy": 1.6568682293097179, "epoch": 1.9713548103595069, "grad_norm": 0.8164061903953552, "learning_rate": 2.009774712668402e-06, "loss": 1.373, "mean_token_accuracy": 0.6605587005615234, "num_tokens": 3008099242.0, "step": 17945 }, { "entropy": 1.680535574754079, "epoch": 1.9714646672708795, "grad_norm": 0.6372302174568176, "learning_rate": 2.009700252319283e-06, "loss": 1.3498, "mean_token_accuracy": 0.6511732886234919, "num_tokens": 3008321670.0, "step": 17946 }, { "entropy": 1.7110346754391987, "epoch": 1.9715745241822527, "grad_norm": 0.7086535096168518, "learning_rate": 2.0096260765091015e-06, "loss": 1.3739, "mean_token_accuracy": 0.6551353732744852, "num_tokens": 3008445047.0, "step": 17947 }, { "entropy": 1.7076995074748993, "epoch": 1.9716843810936255, "grad_norm": 0.7128680348396301, "learning_rate": 2.0095521852402027e-06, "loss": 1.3066, "mean_token_accuracy": 0.6718765745560328, "num_tokens": 3008565086.0, "step": 17948 }, { "entropy": 1.6902350385983784, "epoch": 1.9717942380049984, "grad_norm": 0.7834108471870422, "learning_rate": 2.0094785785149257e-06, "loss": 1.4292, "mean_token_accuracy": 0.6641406814257304, "num_tokens": 3008720383.0, "step": 17949 }, { "entropy": 1.642638107140859, "epoch": 1.9719040949163715, "grad_norm": 0.6965975761413574, "learning_rate": 2.009405256335602e-06, "loss": 1.2824, "mean_token_accuracy": 0.6902331511179606, "num_tokens": 3008836709.0, "step": 17950 }, { "entropy": 1.769042044878006, "epoch": 1.9720139518277444, "grad_norm": 0.9151806235313416, "learning_rate": 2.0093322187045495e-06, "loss": 1.5366, "mean_token_accuracy": 0.6416715830564499, "num_tokens": 3009021041.0, "step": 17951 }, { "entropy": 1.705027828613917, "epoch": 1.9721238087391173, "grad_norm": 0.6604277491569519, "learning_rate": 2.0092594656240805e-06, "loss": 1.2852, "mean_token_accuracy": 0.6765786459048589, "num_tokens": 3009177630.0, "step": 17952 }, { "entropy": 1.740889310836792, "epoch": 1.9722336656504902, "grad_norm": 0.7900790572166443, "learning_rate": 2.0091869970965e-06, "loss": 1.2679, "mean_token_accuracy": 0.6788128217061361, "num_tokens": 3009308901.0, "step": 17953 }, { "entropy": 1.6999844014644623, "epoch": 1.972343522561863, "grad_norm": 0.6160433888435364, "learning_rate": 2.0091148131240973e-06, "loss": 1.4133, "mean_token_accuracy": 0.6537296175956726, "num_tokens": 3009523642.0, "step": 17954 }, { "entropy": 1.7226817508538563, "epoch": 1.9724533794732362, "grad_norm": 0.7243252992630005, "learning_rate": 2.0090429137091604e-06, "loss": 1.3226, "mean_token_accuracy": 0.6633811742067337, "num_tokens": 3009666666.0, "step": 17955 }, { "entropy": 1.7036002576351166, "epoch": 1.972563236384609, "grad_norm": 0.6371243596076965, "learning_rate": 2.0089712988539647e-06, "loss": 1.3227, "mean_token_accuracy": 0.6615711351235708, "num_tokens": 3009817269.0, "step": 17956 }, { "entropy": 1.6567615171273549, "epoch": 1.972673093295982, "grad_norm": 0.7171679735183716, "learning_rate": 2.008899968560774e-06, "loss": 1.4099, "mean_token_accuracy": 0.649737944205602, "num_tokens": 3009963221.0, "step": 17957 }, { "entropy": 1.7603488365809123, "epoch": 1.972782950207355, "grad_norm": 0.7773517966270447, "learning_rate": 2.0088289228318493e-06, "loss": 1.2555, "mean_token_accuracy": 0.6756314287583033, "num_tokens": 3010058260.0, "step": 17958 }, { "entropy": 1.7130048672358196, "epoch": 1.9728928071187277, "grad_norm": 0.7091876864433289, "learning_rate": 2.008758161669438e-06, "loss": 1.4677, "mean_token_accuracy": 0.6474677075942358, "num_tokens": 3010235324.0, "step": 17959 }, { "entropy": 1.7637153267860413, "epoch": 1.9730026640301008, "grad_norm": 0.8148097395896912, "learning_rate": 2.008687685075778e-06, "loss": 1.3503, "mean_token_accuracy": 0.6695531010627747, "num_tokens": 3010358374.0, "step": 17960 }, { "entropy": 1.6533841292063396, "epoch": 1.9731125209414737, "grad_norm": 0.6595221161842346, "learning_rate": 2.0086174930531026e-06, "loss": 1.2779, "mean_token_accuracy": 0.6797658701737722, "num_tokens": 3010515100.0, "step": 17961 }, { "entropy": 1.6787741879622142, "epoch": 1.9732223778528466, "grad_norm": 0.6817485690116882, "learning_rate": 2.0085475856036317e-06, "loss": 1.6186, "mean_token_accuracy": 0.6448809107144674, "num_tokens": 3010711557.0, "step": 17962 }, { "entropy": 1.7178953389326732, "epoch": 1.9733322347642197, "grad_norm": 0.710960865020752, "learning_rate": 2.0084779627295764e-06, "loss": 1.4679, "mean_token_accuracy": 0.64095505575339, "num_tokens": 3010862378.0, "step": 17963 }, { "entropy": 1.7367713054021199, "epoch": 1.9734420916755926, "grad_norm": 0.7530184388160706, "learning_rate": 2.008408624433144e-06, "loss": 1.4309, "mean_token_accuracy": 0.6414470473925272, "num_tokens": 3011047947.0, "step": 17964 }, { "entropy": 1.6682301461696625, "epoch": 1.9735519485869655, "grad_norm": 0.747166633605957, "learning_rate": 2.008339570716525e-06, "loss": 1.3236, "mean_token_accuracy": 0.6601849645376205, "num_tokens": 3011180016.0, "step": 17965 }, { "entropy": 1.7371576726436615, "epoch": 1.9736618054983384, "grad_norm": 0.7257885932922363, "learning_rate": 2.0082708015819084e-06, "loss": 1.2824, "mean_token_accuracy": 0.6802943547566732, "num_tokens": 3011352111.0, "step": 17966 }, { "entropy": 1.7696191271146138, "epoch": 1.9737716624097112, "grad_norm": 0.6771504878997803, "learning_rate": 2.008202317031469e-06, "loss": 1.4747, "mean_token_accuracy": 0.6517404715220133, "num_tokens": 3011523618.0, "step": 17967 }, { "entropy": 1.7489943603674571, "epoch": 1.9738815193210844, "grad_norm": 0.6868027448654175, "learning_rate": 2.0081341170673733e-06, "loss": 1.4084, "mean_token_accuracy": 0.6525389303763708, "num_tokens": 3011696831.0, "step": 17968 }, { "entropy": 1.7214332520961761, "epoch": 1.9739913762324572, "grad_norm": 0.6243865489959717, "learning_rate": 2.0080662016917824e-06, "loss": 1.4266, "mean_token_accuracy": 0.6496342072884241, "num_tokens": 3011899761.0, "step": 17969 }, { "entropy": 1.6936425268650055, "epoch": 1.9741012331438301, "grad_norm": 0.6805530786514282, "learning_rate": 2.007998570906844e-06, "loss": 1.4454, "mean_token_accuracy": 0.6591909031073252, "num_tokens": 3012066461.0, "step": 17970 }, { "entropy": 1.6858153243859608, "epoch": 1.9742110900552032, "grad_norm": 0.8352508544921875, "learning_rate": 2.007931224714698e-06, "loss": 1.2945, "mean_token_accuracy": 0.6747691531976064, "num_tokens": 3012255327.0, "step": 17971 }, { "entropy": 1.6527644395828247, "epoch": 1.974320946966576, "grad_norm": 0.6151508092880249, "learning_rate": 2.0078641631174775e-06, "loss": 1.2585, "mean_token_accuracy": 0.6740232904752096, "num_tokens": 3012413394.0, "step": 17972 }, { "entropy": 1.7410860856374104, "epoch": 1.974430803877949, "grad_norm": 0.7390371561050415, "learning_rate": 2.007797386117304e-06, "loss": 1.38, "mean_token_accuracy": 0.6596208562453588, "num_tokens": 3012562797.0, "step": 17973 }, { "entropy": 1.6805489460627239, "epoch": 1.9745406607893219, "grad_norm": 0.5240894556045532, "learning_rate": 2.007730893716292e-06, "loss": 1.4129, "mean_token_accuracy": 0.6490062524875005, "num_tokens": 3012788257.0, "step": 17974 }, { "entropy": 1.6937835117181141, "epoch": 1.9746505177006948, "grad_norm": 0.7052786350250244, "learning_rate": 2.0076646859165442e-06, "loss": 1.4543, "mean_token_accuracy": 0.6538620889186859, "num_tokens": 3012948228.0, "step": 17975 }, { "entropy": 1.68635560075442, "epoch": 1.9747603746120679, "grad_norm": 0.6069852709770203, "learning_rate": 2.0075987627201576e-06, "loss": 1.4386, "mean_token_accuracy": 0.6424238681793213, "num_tokens": 3013179738.0, "step": 17976 }, { "entropy": 1.720237821340561, "epoch": 1.9748702315234408, "grad_norm": 0.6068885922431946, "learning_rate": 2.007533124129218e-06, "loss": 1.4261, "mean_token_accuracy": 0.6477613896131516, "num_tokens": 3013348074.0, "step": 17977 }, { "entropy": 1.7363394598166149, "epoch": 1.9749800884348137, "grad_norm": 0.7410934567451477, "learning_rate": 2.0074677701458028e-06, "loss": 1.4191, "mean_token_accuracy": 0.6523391604423523, "num_tokens": 3013487387.0, "step": 17978 }, { "entropy": 1.6087459822495778, "epoch": 1.9750899453461865, "grad_norm": 0.7028157711029053, "learning_rate": 2.007402700771981e-06, "loss": 1.3504, "mean_token_accuracy": 0.6695465197165807, "num_tokens": 3013654113.0, "step": 17979 }, { "entropy": 1.7600714067618053, "epoch": 1.9751998022575594, "grad_norm": 0.8189085721969604, "learning_rate": 2.007337916009811e-06, "loss": 1.4994, "mean_token_accuracy": 0.6365965008735657, "num_tokens": 3013843992.0, "step": 17980 }, { "entropy": 1.75289652744929, "epoch": 1.9753096591689325, "grad_norm": 0.7521695494651794, "learning_rate": 2.0072734158613445e-06, "loss": 1.3926, "mean_token_accuracy": 0.6658550798892975, "num_tokens": 3014006944.0, "step": 17981 }, { "entropy": 1.6855365534623463, "epoch": 1.9754195160803054, "grad_norm": 0.6774551272392273, "learning_rate": 2.0072092003286216e-06, "loss": 1.2929, "mean_token_accuracy": 0.680366670091947, "num_tokens": 3014157643.0, "step": 17982 }, { "entropy": 1.7333262066046398, "epoch": 1.9755293729916783, "grad_norm": 1.2397245168685913, "learning_rate": 2.0071452694136757e-06, "loss": 1.1822, "mean_token_accuracy": 0.6727334012587866, "num_tokens": 3014341117.0, "step": 17983 }, { "entropy": 1.7674992481867473, "epoch": 1.9756392299030514, "grad_norm": 0.7200416922569275, "learning_rate": 2.0070816231185293e-06, "loss": 1.4271, "mean_token_accuracy": 0.6562386403481165, "num_tokens": 3014516310.0, "step": 17984 }, { "entropy": 1.6975335478782654, "epoch": 1.975749086814424, "grad_norm": 0.6550208926200867, "learning_rate": 2.007018261445197e-06, "loss": 1.2928, "mean_token_accuracy": 0.6839944074551264, "num_tokens": 3014672547.0, "step": 17985 }, { "entropy": 1.7610229949156444, "epoch": 1.9758589437257972, "grad_norm": 0.6302772164344788, "learning_rate": 2.0069551843956847e-06, "loss": 1.4284, "mean_token_accuracy": 0.6451925585667292, "num_tokens": 3014823710.0, "step": 17986 }, { "entropy": 1.7303914825121562, "epoch": 1.97596880063717, "grad_norm": 0.6791033744812012, "learning_rate": 2.006892391971989e-06, "loss": 1.3366, "mean_token_accuracy": 0.6641490111748377, "num_tokens": 3014969094.0, "step": 17987 }, { "entropy": 1.7095533609390259, "epoch": 1.976078657548543, "grad_norm": 0.6710312962532043, "learning_rate": 2.0068298841760956e-06, "loss": 1.4577, "mean_token_accuracy": 0.631788025299708, "num_tokens": 3015156304.0, "step": 17988 }, { "entropy": 1.7411635220050812, "epoch": 1.976188514459916, "grad_norm": 0.6896677017211914, "learning_rate": 2.006767661009985e-06, "loss": 1.4357, "mean_token_accuracy": 0.6591566403706869, "num_tokens": 3015356842.0, "step": 17989 }, { "entropy": 1.7042207817236583, "epoch": 1.976298371371289, "grad_norm": 0.6808891892433167, "learning_rate": 2.0067057224756247e-06, "loss": 1.5334, "mean_token_accuracy": 0.6491953035195669, "num_tokens": 3015582556.0, "step": 17990 }, { "entropy": 1.6519914468129475, "epoch": 1.9764082282826618, "grad_norm": 0.6816950440406799, "learning_rate": 2.006644068574976e-06, "loss": 1.2486, "mean_token_accuracy": 0.6812132398287455, "num_tokens": 3015709616.0, "step": 17991 }, { "entropy": 1.6635911564032237, "epoch": 1.976518085194035, "grad_norm": 0.8687704801559448, "learning_rate": 2.00658269930999e-06, "loss": 1.3443, "mean_token_accuracy": 0.6642593095699946, "num_tokens": 3015864905.0, "step": 17992 }, { "entropy": 1.7611885865529378, "epoch": 1.9766279421054076, "grad_norm": 0.6940531134605408, "learning_rate": 2.00652161468261e-06, "loss": 1.3961, "mean_token_accuracy": 0.6467735171318054, "num_tokens": 3016015646.0, "step": 17993 }, { "entropy": 1.6912155350049336, "epoch": 1.9767377990167807, "grad_norm": 0.6218036413192749, "learning_rate": 2.0064608146947675e-06, "loss": 1.3728, "mean_token_accuracy": 0.6538327733675638, "num_tokens": 3016178510.0, "step": 17994 }, { "entropy": 1.788610190153122, "epoch": 1.9768476559281536, "grad_norm": 0.7761731147766113, "learning_rate": 2.006400299348387e-06, "loss": 1.2656, "mean_token_accuracy": 0.6742220024267832, "num_tokens": 3016306071.0, "step": 17995 }, { "entropy": 1.7301820814609528, "epoch": 1.9769575128395265, "grad_norm": 0.6517966389656067, "learning_rate": 2.006340068645385e-06, "loss": 1.3778, "mean_token_accuracy": 0.6603502233823141, "num_tokens": 3016492205.0, "step": 17996 }, { "entropy": 1.7159747183322906, "epoch": 1.9770673697508996, "grad_norm": 0.7238568067550659, "learning_rate": 2.0062801225876675e-06, "loss": 1.3778, "mean_token_accuracy": 0.6613292147715887, "num_tokens": 3016667084.0, "step": 17997 }, { "entropy": 1.7366797228654225, "epoch": 1.9771772266622722, "grad_norm": 0.6958896517753601, "learning_rate": 2.0062204611771306e-06, "loss": 1.4078, "mean_token_accuracy": 0.6479227592547735, "num_tokens": 3016828560.0, "step": 17998 }, { "entropy": 1.7239971260229747, "epoch": 1.9772870835736454, "grad_norm": 0.6808164119720459, "learning_rate": 2.006161084415664e-06, "loss": 1.5164, "mean_token_accuracy": 0.6513014584779739, "num_tokens": 3017060646.0, "step": 17999 }, { "entropy": 1.7256098488966625, "epoch": 1.9773969404850182, "grad_norm": 0.7273125052452087, "learning_rate": 2.006101992305146e-06, "loss": 1.3934, "mean_token_accuracy": 0.6515825539827347, "num_tokens": 3017199524.0, "step": 18000 }, { "entropy": 1.7242956161499023, "epoch": 1.9775067973963911, "grad_norm": 0.6841662526130676, "learning_rate": 2.0060431848474487e-06, "loss": 1.6029, "mean_token_accuracy": 0.6310764849185944, "num_tokens": 3017376477.0, "step": 18001 }, { "entropy": 1.6526127556959789, "epoch": 1.9776166543077642, "grad_norm": 0.7398411631584167, "learning_rate": 2.0059846620444303e-06, "loss": 1.3041, "mean_token_accuracy": 0.6696018973986307, "num_tokens": 3017536030.0, "step": 18002 }, { "entropy": 1.6591049631436665, "epoch": 1.9777265112191371, "grad_norm": 0.7715206146240234, "learning_rate": 2.0059264238979447e-06, "loss": 1.3687, "mean_token_accuracy": 0.6526884287595749, "num_tokens": 3017710213.0, "step": 18003 }, { "entropy": 1.698662171761195, "epoch": 1.97783636813051, "grad_norm": 0.7030515074729919, "learning_rate": 2.005868470409835e-06, "loss": 1.4247, "mean_token_accuracy": 0.6606688896814982, "num_tokens": 3017879975.0, "step": 18004 }, { "entropy": 1.6552705466747284, "epoch": 1.977946225041883, "grad_norm": 0.6308731436729431, "learning_rate": 2.0058108015819362e-06, "loss": 1.2674, "mean_token_accuracy": 0.6792994836966196, "num_tokens": 3018008327.0, "step": 18005 }, { "entropy": 1.7466795146465302, "epoch": 1.9780560819532558, "grad_norm": 0.7210440635681152, "learning_rate": 2.0057534174160713e-06, "loss": 1.3281, "mean_token_accuracy": 0.673799475034078, "num_tokens": 3018133227.0, "step": 18006 }, { "entropy": 1.7657952308654785, "epoch": 1.9781659388646289, "grad_norm": 0.9165833592414856, "learning_rate": 2.0056963179140585e-06, "loss": 1.4521, "mean_token_accuracy": 0.6580042143662771, "num_tokens": 3018281843.0, "step": 18007 }, { "entropy": 1.697232147057851, "epoch": 1.9782757957760018, "grad_norm": 0.6184810996055603, "learning_rate": 2.005639503077705e-06, "loss": 1.4727, "mean_token_accuracy": 0.6428412993748983, "num_tokens": 3018460132.0, "step": 18008 }, { "entropy": 1.6890581647555034, "epoch": 1.9783856526873747, "grad_norm": 0.6864956617355347, "learning_rate": 2.005582972908807e-06, "loss": 1.4036, "mean_token_accuracy": 0.6568064391613007, "num_tokens": 3018656885.0, "step": 18009 }, { "entropy": 1.715958833694458, "epoch": 1.9784955095987478, "grad_norm": 0.6373485326766968, "learning_rate": 2.0055267274091552e-06, "loss": 1.3331, "mean_token_accuracy": 0.6668579330046972, "num_tokens": 3018801487.0, "step": 18010 }, { "entropy": 1.6817961037158966, "epoch": 1.9786053665101204, "grad_norm": 0.7401055097579956, "learning_rate": 2.0054707665805303e-06, "loss": 1.3845, "mean_token_accuracy": 0.661645824710528, "num_tokens": 3018946548.0, "step": 18011 }, { "entropy": 1.663213074207306, "epoch": 1.9787152234214935, "grad_norm": 0.670312762260437, "learning_rate": 2.0054150904247017e-06, "loss": 1.4159, "mean_token_accuracy": 0.6704086015621821, "num_tokens": 3019135682.0, "step": 18012 }, { "entropy": 1.6933028101921082, "epoch": 1.9788250803328664, "grad_norm": 0.6959567666053772, "learning_rate": 2.0053596989434325e-06, "loss": 1.4025, "mean_token_accuracy": 0.6613306552171707, "num_tokens": 3019288636.0, "step": 18013 }, { "entropy": 1.65764586130778, "epoch": 1.9789349372442393, "grad_norm": 0.68830406665802, "learning_rate": 2.0053045921384766e-06, "loss": 1.4326, "mean_token_accuracy": 0.6502135346333185, "num_tokens": 3019479477.0, "step": 18014 }, { "entropy": 1.6973117391268413, "epoch": 1.9790447941556124, "grad_norm": 0.6733773946762085, "learning_rate": 2.005249770011576e-06, "loss": 1.5697, "mean_token_accuracy": 0.6302972286939621, "num_tokens": 3019680116.0, "step": 18015 }, { "entropy": 1.654345730940501, "epoch": 1.9791546510669853, "grad_norm": 0.6176797151565552, "learning_rate": 2.005195232564469e-06, "loss": 1.3239, "mean_token_accuracy": 0.6690139671166738, "num_tokens": 3019863791.0, "step": 18016 }, { "entropy": 1.7217604120572407, "epoch": 1.9792645079783582, "grad_norm": 0.8068521022796631, "learning_rate": 2.005140979798878e-06, "loss": 1.4333, "mean_token_accuracy": 0.6493235329786936, "num_tokens": 3020037805.0, "step": 18017 }, { "entropy": 1.7371099591255188, "epoch": 1.9793743648897313, "grad_norm": 0.7657269239425659, "learning_rate": 2.005087011716523e-06, "loss": 1.4369, "mean_token_accuracy": 0.6623478010296822, "num_tokens": 3020184002.0, "step": 18018 }, { "entropy": 1.7969795564810436, "epoch": 1.979484221801104, "grad_norm": 0.7154170274734497, "learning_rate": 2.0050333283191096e-06, "loss": 1.5782, "mean_token_accuracy": 0.6230311791102091, "num_tokens": 3020363052.0, "step": 18019 }, { "entropy": 1.7187202374140422, "epoch": 1.979594078712477, "grad_norm": 0.576965868473053, "learning_rate": 2.0049799296083384e-06, "loss": 1.1483, "mean_token_accuracy": 0.676199659705162, "num_tokens": 3020557205.0, "step": 18020 }, { "entropy": 1.707470417022705, "epoch": 1.97970393562385, "grad_norm": 0.759087324142456, "learning_rate": 2.0049268155859003e-06, "loss": 1.4009, "mean_token_accuracy": 0.6503723512093226, "num_tokens": 3020700036.0, "step": 18021 }, { "entropy": 1.6869849860668182, "epoch": 1.9798137925352228, "grad_norm": 0.7738426923751831, "learning_rate": 2.0048739862534737e-06, "loss": 1.3752, "mean_token_accuracy": 0.6719378630320231, "num_tokens": 3020861112.0, "step": 18022 }, { "entropy": 1.7128291428089142, "epoch": 1.979923649446596, "grad_norm": 0.6039331555366516, "learning_rate": 2.004821441612733e-06, "loss": 1.4135, "mean_token_accuracy": 0.6554146458705267, "num_tokens": 3021038362.0, "step": 18023 }, { "entropy": 1.7349358598391216, "epoch": 1.9800335063579686, "grad_norm": 0.6787173748016357, "learning_rate": 2.0047691816653407e-06, "loss": 1.4621, "mean_token_accuracy": 0.6508260667324066, "num_tokens": 3021217797.0, "step": 18024 }, { "entropy": 1.6685168147087097, "epoch": 1.9801433632693417, "grad_norm": 0.705312967300415, "learning_rate": 2.0047172064129493e-06, "loss": 1.4366, "mean_token_accuracy": 0.660369485616684, "num_tokens": 3021410023.0, "step": 18025 }, { "entropy": 1.7754539052645366, "epoch": 1.9802532201807146, "grad_norm": 0.5943109393119812, "learning_rate": 2.004665515857206e-06, "loss": 1.3934, "mean_token_accuracy": 0.6560729245344797, "num_tokens": 3021580962.0, "step": 18026 }, { "entropy": 1.680273950099945, "epoch": 1.9803630770920875, "grad_norm": 0.6903275847434998, "learning_rate": 2.004614109999745e-06, "loss": 1.3749, "mean_token_accuracy": 0.6597232023874918, "num_tokens": 3021788227.0, "step": 18027 }, { "entropy": 1.670421948035558, "epoch": 1.9804729340034606, "grad_norm": 0.6176491379737854, "learning_rate": 2.0045629888421937e-06, "loss": 1.3391, "mean_token_accuracy": 0.6674363017082214, "num_tokens": 3021959993.0, "step": 18028 }, { "entropy": 1.7221202949682872, "epoch": 1.9805827909148335, "grad_norm": 0.8627282977104187, "learning_rate": 2.004512152386172e-06, "loss": 1.4449, "mean_token_accuracy": 0.6531087706486384, "num_tokens": 3022159424.0, "step": 18029 }, { "entropy": 1.7343119382858276, "epoch": 1.9806926478262064, "grad_norm": 0.6736763119697571, "learning_rate": 2.0044616006332864e-06, "loss": 1.3932, "mean_token_accuracy": 0.6408475587765375, "num_tokens": 3022327707.0, "step": 18030 }, { "entropy": 1.710096687078476, "epoch": 1.9808025047375795, "grad_norm": 0.6935316324234009, "learning_rate": 2.0044113335851365e-06, "loss": 1.4266, "mean_token_accuracy": 0.6543708691994349, "num_tokens": 3022506535.0, "step": 18031 }, { "entropy": 1.7249768376350403, "epoch": 1.9809123616489521, "grad_norm": 0.7204832434654236, "learning_rate": 2.004361351243316e-06, "loss": 1.368, "mean_token_accuracy": 0.6743978013594946, "num_tokens": 3022623060.0, "step": 18032 }, { "entropy": 1.7260689040025075, "epoch": 1.9810222185603252, "grad_norm": 0.5968881845474243, "learning_rate": 2.004311653609404e-06, "loss": 1.531, "mean_token_accuracy": 0.6207184543212255, "num_tokens": 3022847515.0, "step": 18033 }, { "entropy": 1.7605952223141987, "epoch": 1.9811320754716981, "grad_norm": 0.677291989326477, "learning_rate": 2.004262240684976e-06, "loss": 1.3908, "mean_token_accuracy": 0.6491524130105972, "num_tokens": 3023004730.0, "step": 18034 }, { "entropy": 1.7027316590150197, "epoch": 1.981241932383071, "grad_norm": 0.590280294418335, "learning_rate": 2.004213112471593e-06, "loss": 1.4213, "mean_token_accuracy": 0.6485264748334885, "num_tokens": 3023213682.0, "step": 18035 }, { "entropy": 1.7386829058329265, "epoch": 1.981351789294444, "grad_norm": 0.7665061950683594, "learning_rate": 2.004164268970812e-06, "loss": 1.379, "mean_token_accuracy": 0.6652245422204336, "num_tokens": 3023388373.0, "step": 18036 }, { "entropy": 1.7038521766662598, "epoch": 1.9814616462058168, "grad_norm": 0.7388865947723389, "learning_rate": 2.004115710184179e-06, "loss": 1.3043, "mean_token_accuracy": 0.6681536138057709, "num_tokens": 3023530065.0, "step": 18037 }, { "entropy": 1.7088390787442524, "epoch": 1.9815715031171899, "grad_norm": 0.8848956227302551, "learning_rate": 2.004067436113229e-06, "loss": 1.3557, "mean_token_accuracy": 0.6594817042350769, "num_tokens": 3023709403.0, "step": 18038 }, { "entropy": 1.6985073586304982, "epoch": 1.9816813600285628, "grad_norm": 0.5689802169799805, "learning_rate": 2.004019446759491e-06, "loss": 1.3975, "mean_token_accuracy": 0.663287435968717, "num_tokens": 3023913464.0, "step": 18039 }, { "entropy": 1.6951737900575001, "epoch": 1.9817912169399357, "grad_norm": 0.7058504819869995, "learning_rate": 2.0039717421244838e-06, "loss": 1.3111, "mean_token_accuracy": 0.6672380814949671, "num_tokens": 3024026724.0, "step": 18040 }, { "entropy": 1.733831246693929, "epoch": 1.9819010738513088, "grad_norm": 0.7383568286895752, "learning_rate": 2.003924322209718e-06, "loss": 1.2293, "mean_token_accuracy": 0.6893104861179987, "num_tokens": 3024117143.0, "step": 18041 }, { "entropy": 1.6926162540912628, "epoch": 1.9820109307626816, "grad_norm": 0.7105924487113953, "learning_rate": 2.0038771870166933e-06, "loss": 1.278, "mean_token_accuracy": 0.6817958305279413, "num_tokens": 3024256149.0, "step": 18042 }, { "entropy": 1.722558597723643, "epoch": 1.9821207876740545, "grad_norm": 0.6348571181297302, "learning_rate": 2.0038303365469026e-06, "loss": 1.398, "mean_token_accuracy": 0.6517468144496282, "num_tokens": 3024410077.0, "step": 18043 }, { "entropy": 1.6495929559071858, "epoch": 1.9822306445854276, "grad_norm": 0.6759068965911865, "learning_rate": 2.0037837708018268e-06, "loss": 1.438, "mean_token_accuracy": 0.6489085604747137, "num_tokens": 3024594527.0, "step": 18044 }, { "entropy": 1.7154331902662914, "epoch": 1.9823405014968003, "grad_norm": 0.7698425054550171, "learning_rate": 2.0037374897829413e-06, "loss": 1.4302, "mean_token_accuracy": 0.6621577441692352, "num_tokens": 3024748417.0, "step": 18045 }, { "entropy": 1.694651484489441, "epoch": 1.9824503584081734, "grad_norm": 0.7667627930641174, "learning_rate": 2.0036914934917106e-06, "loss": 1.4109, "mean_token_accuracy": 0.6532551348209381, "num_tokens": 3024893648.0, "step": 18046 }, { "entropy": 1.6467284560203552, "epoch": 1.9825602153195463, "grad_norm": 0.6645065546035767, "learning_rate": 2.00364578192959e-06, "loss": 1.4022, "mean_token_accuracy": 0.6770640710989634, "num_tokens": 3025055067.0, "step": 18047 }, { "entropy": 1.7726899286111195, "epoch": 1.9826700722309192, "grad_norm": 0.6508256793022156, "learning_rate": 2.003600355098027e-06, "loss": 1.4051, "mean_token_accuracy": 0.6635939379533132, "num_tokens": 3025246791.0, "step": 18048 }, { "entropy": 1.7325632472832997, "epoch": 1.9827799291422923, "grad_norm": 0.7848646640777588, "learning_rate": 2.0035552129984595e-06, "loss": 1.4272, "mean_token_accuracy": 0.6580934077501297, "num_tokens": 3025385806.0, "step": 18049 }, { "entropy": 1.7530264457066853, "epoch": 1.982889786053665, "grad_norm": 0.6446986794471741, "learning_rate": 2.003510355632314e-06, "loss": 1.3292, "mean_token_accuracy": 0.6607964038848877, "num_tokens": 3025565951.0, "step": 18050 }, { "entropy": 1.6674051980177562, "epoch": 1.982999642965038, "grad_norm": 0.7394304275512695, "learning_rate": 2.003465783001013e-06, "loss": 1.3453, "mean_token_accuracy": 0.6573042770226797, "num_tokens": 3025704826.0, "step": 18051 }, { "entropy": 1.6773662368456523, "epoch": 1.983109499876411, "grad_norm": 0.558399498462677, "learning_rate": 2.003421495105966e-06, "loss": 1.4961, "mean_token_accuracy": 0.6545028338829676, "num_tokens": 3025925349.0, "step": 18052 }, { "entropy": 1.7445982893308003, "epoch": 1.9832193567877838, "grad_norm": 0.7014199495315552, "learning_rate": 2.003377491948574e-06, "loss": 1.2774, "mean_token_accuracy": 0.6735482960939407, "num_tokens": 3026050899.0, "step": 18053 }, { "entropy": 1.7237263023853302, "epoch": 1.983329213699157, "grad_norm": 0.7317463755607605, "learning_rate": 2.0033337735302303e-06, "loss": 1.285, "mean_token_accuracy": 0.6770526617765427, "num_tokens": 3026212822.0, "step": 18054 }, { "entropy": 1.6821326514085133, "epoch": 1.9834390706105298, "grad_norm": 0.793610692024231, "learning_rate": 2.003290339852319e-06, "loss": 1.242, "mean_token_accuracy": 0.6760970403750738, "num_tokens": 3026380803.0, "step": 18055 }, { "entropy": 1.6886110802491505, "epoch": 1.9835489275219027, "grad_norm": 0.621466338634491, "learning_rate": 2.003247190916215e-06, "loss": 1.2854, "mean_token_accuracy": 0.6679765482743582, "num_tokens": 3026512384.0, "step": 18056 }, { "entropy": 1.7125110626220703, "epoch": 1.9836587844332758, "grad_norm": 0.7082045674324036, "learning_rate": 2.0032043267232827e-06, "loss": 1.2844, "mean_token_accuracy": 0.6690214524666468, "num_tokens": 3026634106.0, "step": 18057 }, { "entropy": 1.6765822370847066, "epoch": 1.9837686413446485, "grad_norm": 0.7026678919792175, "learning_rate": 2.00316174727488e-06, "loss": 1.3097, "mean_token_accuracy": 0.6639251758654913, "num_tokens": 3026780481.0, "step": 18058 }, { "entropy": 1.7151767710844676, "epoch": 1.9838784982560216, "grad_norm": 0.7121601104736328, "learning_rate": 2.0031194525723535e-06, "loss": 1.3883, "mean_token_accuracy": 0.6660854717095693, "num_tokens": 3026923173.0, "step": 18059 }, { "entropy": 1.6836934685707092, "epoch": 1.9839883551673945, "grad_norm": 0.7249175906181335, "learning_rate": 2.003077442617042e-06, "loss": 1.394, "mean_token_accuracy": 0.6518472333749136, "num_tokens": 3027118633.0, "step": 18060 }, { "entropy": 1.6882561047871907, "epoch": 1.9840982120787674, "grad_norm": 0.797842800617218, "learning_rate": 2.0030357174102765e-06, "loss": 1.311, "mean_token_accuracy": 0.6653191695610682, "num_tokens": 3027259226.0, "step": 18061 }, { "entropy": 1.622244934240977, "epoch": 1.9842080689901405, "grad_norm": 0.6981305480003357, "learning_rate": 2.002994276953375e-06, "loss": 1.2685, "mean_token_accuracy": 0.6686497926712036, "num_tokens": 3027400721.0, "step": 18062 }, { "entropy": 1.6579966247081757, "epoch": 1.9843179259015131, "grad_norm": 0.7618954181671143, "learning_rate": 2.002953121247651e-06, "loss": 1.2649, "mean_token_accuracy": 0.6726798812548319, "num_tokens": 3027564629.0, "step": 18063 }, { "entropy": 1.66702335079511, "epoch": 1.9844277828128862, "grad_norm": 0.7515163421630859, "learning_rate": 2.0029122502944063e-06, "loss": 1.2735, "mean_token_accuracy": 0.6686499267816544, "num_tokens": 3027759663.0, "step": 18064 }, { "entropy": 1.6984266340732574, "epoch": 1.9845376397242591, "grad_norm": 0.6969068050384521, "learning_rate": 2.002871664094935e-06, "loss": 1.2867, "mean_token_accuracy": 0.6645700335502625, "num_tokens": 3027883746.0, "step": 18065 }, { "entropy": 1.7177335818608601, "epoch": 1.984647496635632, "grad_norm": 0.5522650480270386, "learning_rate": 2.0028313626505215e-06, "loss": 1.0616, "mean_token_accuracy": 0.6834283471107483, "num_tokens": 3028051127.0, "step": 18066 }, { "entropy": 1.6780750850836437, "epoch": 1.984757353547005, "grad_norm": 0.6429264545440674, "learning_rate": 2.002791345962441e-06, "loss": 1.3301, "mean_token_accuracy": 0.6632246325413386, "num_tokens": 3028212961.0, "step": 18067 }, { "entropy": 1.684230109055837, "epoch": 1.984867210458378, "grad_norm": 0.6792988777160645, "learning_rate": 2.0027516140319604e-06, "loss": 1.212, "mean_token_accuracy": 0.6855142414569855, "num_tokens": 3028319131.0, "step": 18068 }, { "entropy": 1.6362544397513072, "epoch": 1.9849770673697509, "grad_norm": 0.5678415298461914, "learning_rate": 2.0027121668603362e-06, "loss": 1.3782, "mean_token_accuracy": 0.6610343605279922, "num_tokens": 3028505641.0, "step": 18069 }, { "entropy": 1.668990820646286, "epoch": 1.985086924281124, "grad_norm": 0.6381085515022278, "learning_rate": 2.0026730044488184e-06, "loss": 1.3541, "mean_token_accuracy": 0.6664966394503912, "num_tokens": 3028655036.0, "step": 18070 }, { "entropy": 1.6200170914332073, "epoch": 1.9851967811924967, "grad_norm": 0.6306815147399902, "learning_rate": 2.0026341267986454e-06, "loss": 1.2853, "mean_token_accuracy": 0.6690742274125417, "num_tokens": 3028820136.0, "step": 18071 }, { "entropy": 1.7141570250193279, "epoch": 1.9853066381038698, "grad_norm": 0.6135541200637817, "learning_rate": 2.0025955339110474e-06, "loss": 1.4239, "mean_token_accuracy": 0.6421345720688502, "num_tokens": 3029017262.0, "step": 18072 }, { "entropy": 1.713169127702713, "epoch": 1.9854164950152426, "grad_norm": 0.7709214687347412, "learning_rate": 2.0025572257872475e-06, "loss": 1.3818, "mean_token_accuracy": 0.6472178449233373, "num_tokens": 3029195490.0, "step": 18073 }, { "entropy": 1.7533264259497325, "epoch": 1.9855263519266155, "grad_norm": 0.6595629453659058, "learning_rate": 2.002519202428457e-06, "loss": 1.4027, "mean_token_accuracy": 0.6588217069705328, "num_tokens": 3029344302.0, "step": 18074 }, { "entropy": 1.702713559071223, "epoch": 1.9856362088379886, "grad_norm": 0.6595112085342407, "learning_rate": 2.0024814638358793e-06, "loss": 1.4347, "mean_token_accuracy": 0.6574101795752844, "num_tokens": 3029555592.0, "step": 18075 }, { "entropy": 1.6746285160382588, "epoch": 1.9857460657493613, "grad_norm": 0.7601661086082458, "learning_rate": 2.002444010010708e-06, "loss": 1.3132, "mean_token_accuracy": 0.6680941929419836, "num_tokens": 3029696305.0, "step": 18076 }, { "entropy": 1.704234351714452, "epoch": 1.9858559226607344, "grad_norm": 0.6155273914337158, "learning_rate": 2.0024068409541304e-06, "loss": 1.3917, "mean_token_accuracy": 0.662379855910937, "num_tokens": 3029881772.0, "step": 18077 }, { "entropy": 1.6934775014718373, "epoch": 1.9859657795721073, "grad_norm": 0.6397415399551392, "learning_rate": 2.0023699566673213e-06, "loss": 1.2694, "mean_token_accuracy": 0.6719277749458948, "num_tokens": 3030039562.0, "step": 18078 }, { "entropy": 1.7212519546349843, "epoch": 1.9860756364834802, "grad_norm": 0.6646420955657959, "learning_rate": 2.0023333571514483e-06, "loss": 1.4844, "mean_token_accuracy": 0.6389352331558863, "num_tokens": 3030268176.0, "step": 18079 }, { "entropy": 1.6160194476445515, "epoch": 1.9861854933948533, "grad_norm": 0.7115574479103088, "learning_rate": 2.0022970424076705e-06, "loss": 1.3326, "mean_token_accuracy": 0.667738159497579, "num_tokens": 3030445462.0, "step": 18080 }, { "entropy": 1.729399710893631, "epoch": 1.9862953503062262, "grad_norm": 0.5949446558952332, "learning_rate": 2.002261012437137e-06, "loss": 1.4196, "mean_token_accuracy": 0.6431524356206259, "num_tokens": 3030648761.0, "step": 18081 }, { "entropy": 1.71317191918691, "epoch": 1.986405207217599, "grad_norm": 3.0127835273742676, "learning_rate": 2.002225267240988e-06, "loss": 1.3224, "mean_token_accuracy": 0.6609684824943542, "num_tokens": 3030837875.0, "step": 18082 }, { "entropy": 1.7283783555030823, "epoch": 1.9865150641289722, "grad_norm": 0.8065574169158936, "learning_rate": 2.0021898068203545e-06, "loss": 1.422, "mean_token_accuracy": 0.6655691017707189, "num_tokens": 3030997256.0, "step": 18083 }, { "entropy": 1.6900490025679271, "epoch": 1.9866249210403448, "grad_norm": 0.6390381455421448, "learning_rate": 2.00215463117636e-06, "loss": 1.2966, "mean_token_accuracy": 0.6703857729832331, "num_tokens": 3031132200.0, "step": 18084 }, { "entropy": 1.660315861304601, "epoch": 1.986734777951718, "grad_norm": 0.6758151054382324, "learning_rate": 2.0021197403101156e-06, "loss": 1.319, "mean_token_accuracy": 0.6526903261741003, "num_tokens": 3031266341.0, "step": 18085 }, { "entropy": 1.724764307339986, "epoch": 1.9868446348630908, "grad_norm": 0.7246851325035095, "learning_rate": 2.002085134222728e-06, "loss": 1.358, "mean_token_accuracy": 0.6704638799031576, "num_tokens": 3031400811.0, "step": 18086 }, { "entropy": 1.7192512452602386, "epoch": 1.9869544917744637, "grad_norm": 0.6231260895729065, "learning_rate": 2.002050812915291e-06, "loss": 1.5262, "mean_token_accuracy": 0.6408623903989792, "num_tokens": 3031597704.0, "step": 18087 }, { "entropy": 1.7471778094768524, "epoch": 1.9870643486858368, "grad_norm": 0.7257117033004761, "learning_rate": 2.0020167763888905e-06, "loss": 1.6226, "mean_token_accuracy": 0.6368986219167709, "num_tokens": 3031759715.0, "step": 18088 }, { "entropy": 1.6563644409179688, "epoch": 1.9871742055972095, "grad_norm": 0.7969918251037598, "learning_rate": 2.001983024644605e-06, "loss": 1.1957, "mean_token_accuracy": 0.68258864680926, "num_tokens": 3031879721.0, "step": 18089 }, { "entropy": 1.7332975169022877, "epoch": 1.9872840625085826, "grad_norm": 0.7111884951591492, "learning_rate": 2.0019495576835017e-06, "loss": 1.2027, "mean_token_accuracy": 0.678931881984075, "num_tokens": 3031980175.0, "step": 18090 }, { "entropy": 1.677456219991048, "epoch": 1.9873939194199555, "grad_norm": 0.7319856882095337, "learning_rate": 2.0019163755066414e-06, "loss": 1.4709, "mean_token_accuracy": 0.6506858567396799, "num_tokens": 3032174655.0, "step": 18091 }, { "entropy": 1.7617920140425365, "epoch": 1.9875037763313284, "grad_norm": 0.7048560976982117, "learning_rate": 2.0018834781150714e-06, "loss": 1.3913, "mean_token_accuracy": 0.6561016142368317, "num_tokens": 3032322569.0, "step": 18092 }, { "entropy": 1.6850563287734985, "epoch": 1.9876136332427015, "grad_norm": 0.7205668091773987, "learning_rate": 2.001850865509836e-06, "loss": 1.2585, "mean_token_accuracy": 0.6678894609212875, "num_tokens": 3032456059.0, "step": 18093 }, { "entropy": 1.6681140661239624, "epoch": 1.9877234901540743, "grad_norm": 0.7361934185028076, "learning_rate": 2.0018185376919665e-06, "loss": 1.4023, "mean_token_accuracy": 0.6583593388398489, "num_tokens": 3032622180.0, "step": 18094 }, { "entropy": 1.8122372031211853, "epoch": 1.9878333470654472, "grad_norm": 0.7276337146759033, "learning_rate": 2.0017864946624848e-06, "loss": 1.6099, "mean_token_accuracy": 0.6246108015378317, "num_tokens": 3032802456.0, "step": 18095 }, { "entropy": 1.6701407432556152, "epoch": 1.9879432039768203, "grad_norm": 0.6742632985115051, "learning_rate": 2.001754736422406e-06, "loss": 1.2446, "mean_token_accuracy": 0.6852086037397385, "num_tokens": 3032910500.0, "step": 18096 }, { "entropy": 1.7621448735396068, "epoch": 1.988053060888193, "grad_norm": 0.849463164806366, "learning_rate": 2.0017232629727345e-06, "loss": 1.5072, "mean_token_accuracy": 0.6401193489631017, "num_tokens": 3033111853.0, "step": 18097 }, { "entropy": 1.6534665822982788, "epoch": 1.988162917799566, "grad_norm": 0.6665313839912415, "learning_rate": 2.0016920743144674e-06, "loss": 1.272, "mean_token_accuracy": 0.6797701468070348, "num_tokens": 3033255604.0, "step": 18098 }, { "entropy": 1.7228951156139374, "epoch": 1.988272774710939, "grad_norm": 0.6837633848190308, "learning_rate": 2.0016611704485922e-06, "loss": 1.2836, "mean_token_accuracy": 0.671315535902977, "num_tokens": 3033393490.0, "step": 18099 }, { "entropy": 1.8056779702504475, "epoch": 1.9883826316223119, "grad_norm": 0.7126280069351196, "learning_rate": 2.001630551376086e-06, "loss": 1.3957, "mean_token_accuracy": 0.6486673851807913, "num_tokens": 3033543350.0, "step": 18100 }, { "entropy": 1.6797227362791698, "epoch": 1.988492488533685, "grad_norm": 0.6651612520217896, "learning_rate": 2.0016002170979173e-06, "loss": 1.4704, "mean_token_accuracy": 0.6411794424057007, "num_tokens": 3033771985.0, "step": 18101 }, { "entropy": 1.7329282363255818, "epoch": 1.9886023454450577, "grad_norm": 0.7120084762573242, "learning_rate": 2.0015701676150475e-06, "loss": 1.4927, "mean_token_accuracy": 0.6449198176463445, "num_tokens": 3033916379.0, "step": 18102 }, { "entropy": 1.7090756595134735, "epoch": 1.9887122023564308, "grad_norm": 0.866844117641449, "learning_rate": 2.001540402928426e-06, "loss": 1.3771, "mean_token_accuracy": 0.6641785850127538, "num_tokens": 3034065801.0, "step": 18103 }, { "entropy": 1.7364407777786255, "epoch": 1.9888220592678036, "grad_norm": 0.7089415788650513, "learning_rate": 2.001510923038997e-06, "loss": 1.4758, "mean_token_accuracy": 0.6599543740351995, "num_tokens": 3034236864.0, "step": 18104 }, { "entropy": 1.71084330479304, "epoch": 1.9889319161791765, "grad_norm": 0.6918189525604248, "learning_rate": 2.0014817279476928e-06, "loss": 1.377, "mean_token_accuracy": 0.645952895283699, "num_tokens": 3034406382.0, "step": 18105 }, { "entropy": 1.7627890010674794, "epoch": 1.9890417730905496, "grad_norm": 0.8844940662384033, "learning_rate": 2.0014528176554367e-06, "loss": 1.4876, "mean_token_accuracy": 0.6449083288510641, "num_tokens": 3034578259.0, "step": 18106 }, { "entropy": 1.74430717031161, "epoch": 1.9891516300019225, "grad_norm": 0.5609480738639832, "learning_rate": 2.0014241921631433e-06, "loss": 1.4494, "mean_token_accuracy": 0.6505746444066366, "num_tokens": 3034771034.0, "step": 18107 }, { "entropy": 1.7141135434309642, "epoch": 1.9892614869132954, "grad_norm": 0.7115533947944641, "learning_rate": 2.0013958514717206e-06, "loss": 1.3258, "mean_token_accuracy": 0.6693545977274576, "num_tokens": 3034960817.0, "step": 18108 }, { "entropy": 1.7084451814492543, "epoch": 1.9893713438246685, "grad_norm": 0.6754060983657837, "learning_rate": 2.001367795582063e-06, "loss": 1.5179, "mean_token_accuracy": 0.6482407848040262, "num_tokens": 3035162145.0, "step": 18109 }, { "entropy": 1.7450725734233856, "epoch": 1.9894812007360412, "grad_norm": 0.7590783834457397, "learning_rate": 2.001340024495061e-06, "loss": 1.4425, "mean_token_accuracy": 0.6480189065138499, "num_tokens": 3035344376.0, "step": 18110 }, { "entropy": 1.7174135446548462, "epoch": 1.9895910576474143, "grad_norm": 0.6129758358001709, "learning_rate": 2.0013125382115915e-06, "loss": 1.2797, "mean_token_accuracy": 0.669946551322937, "num_tokens": 3035521562.0, "step": 18111 }, { "entropy": 1.7383268078168232, "epoch": 1.9897009145587872, "grad_norm": 0.7468062043190002, "learning_rate": 2.0012853367325268e-06, "loss": 1.3131, "mean_token_accuracy": 0.6609462102254232, "num_tokens": 3035705803.0, "step": 18112 }, { "entropy": 1.7733658452828724, "epoch": 1.98981077147016, "grad_norm": 0.665949821472168, "learning_rate": 2.001258420058725e-06, "loss": 1.4654, "mean_token_accuracy": 0.6438464025656382, "num_tokens": 3035834345.0, "step": 18113 }, { "entropy": 1.7397722403208415, "epoch": 1.9899206283815332, "grad_norm": 0.6292708516120911, "learning_rate": 2.0012317881910387e-06, "loss": 1.4591, "mean_token_accuracy": 0.6419312010208765, "num_tokens": 3036056500.0, "step": 18114 }, { "entropy": 1.7050531804561615, "epoch": 1.9900304852929058, "grad_norm": 0.6399442553520203, "learning_rate": 2.0012054411303124e-06, "loss": 1.4583, "mean_token_accuracy": 0.6630610624949137, "num_tokens": 3036230464.0, "step": 18115 }, { "entropy": 1.6535969475905101, "epoch": 1.990140342204279, "grad_norm": 0.8072215914726257, "learning_rate": 2.0011793788773787e-06, "loss": 1.1218, "mean_token_accuracy": 0.6984556714693705, "num_tokens": 3036333949.0, "step": 18116 }, { "entropy": 1.7108362515767415, "epoch": 1.9902501991156518, "grad_norm": 0.6812421083450317, "learning_rate": 2.0011536014330627e-06, "loss": 1.2873, "mean_token_accuracy": 0.6654827296733856, "num_tokens": 3036471958.0, "step": 18117 }, { "entropy": 1.708745191494624, "epoch": 1.9903600560270247, "grad_norm": 0.6712941527366638, "learning_rate": 2.0011281087981796e-06, "loss": 1.3927, "mean_token_accuracy": 0.6592844178279241, "num_tokens": 3036634049.0, "step": 18118 }, { "entropy": 1.673752874135971, "epoch": 1.9904699129383978, "grad_norm": 0.7544751763343811, "learning_rate": 2.001102900973538e-06, "loss": 1.3683, "mean_token_accuracy": 0.667888343334198, "num_tokens": 3036777513.0, "step": 18119 }, { "entropy": 1.6410714586575825, "epoch": 1.9905797698497707, "grad_norm": 0.6688563227653503, "learning_rate": 2.0010779779599342e-06, "loss": 1.4143, "mean_token_accuracy": 0.6570560932159424, "num_tokens": 3036987258.0, "step": 18120 }, { "entropy": 1.7408175667126973, "epoch": 1.9906896267611436, "grad_norm": 0.7752634882926941, "learning_rate": 2.001053339758156e-06, "loss": 1.346, "mean_token_accuracy": 0.6612565120061239, "num_tokens": 3037148408.0, "step": 18121 }, { "entropy": 1.662128746509552, "epoch": 1.9907994836725167, "grad_norm": 0.6829171180725098, "learning_rate": 2.0010289863689857e-06, "loss": 1.3705, "mean_token_accuracy": 0.6696845690409342, "num_tokens": 3037348031.0, "step": 18122 }, { "entropy": 1.728913923104604, "epoch": 1.9909093405838894, "grad_norm": 0.7454518675804138, "learning_rate": 2.0010049177931933e-06, "loss": 1.4135, "mean_token_accuracy": 0.6618677377700806, "num_tokens": 3037516275.0, "step": 18123 }, { "entropy": 1.7775543729464214, "epoch": 1.9910191974952625, "grad_norm": 0.8814812898635864, "learning_rate": 2.0009811340315405e-06, "loss": 1.3856, "mean_token_accuracy": 0.6594171871741613, "num_tokens": 3037647651.0, "step": 18124 }, { "entropy": 1.7491299907366435, "epoch": 1.9911290544066353, "grad_norm": 0.5548111796379089, "learning_rate": 2.000957635084779e-06, "loss": 1.4133, "mean_token_accuracy": 0.6463166773319244, "num_tokens": 3037834677.0, "step": 18125 }, { "entropy": 1.695239543914795, "epoch": 1.9912389113180082, "grad_norm": 0.7862319946289062, "learning_rate": 2.0009344209536533e-06, "loss": 1.5992, "mean_token_accuracy": 0.6537997101744016, "num_tokens": 3037978289.0, "step": 18126 }, { "entropy": 1.7685978809992473, "epoch": 1.9913487682293813, "grad_norm": 0.632785439491272, "learning_rate": 2.000911491638899e-06, "loss": 1.5919, "mean_token_accuracy": 0.6281165331602097, "num_tokens": 3038171790.0, "step": 18127 }, { "entropy": 1.6535864472389221, "epoch": 1.991458625140754, "grad_norm": 0.7036636471748352, "learning_rate": 2.00088884714124e-06, "loss": 1.3068, "mean_token_accuracy": 0.6650771498680115, "num_tokens": 3038379611.0, "step": 18128 }, { "entropy": 1.6982588171958923, "epoch": 1.991568482052127, "grad_norm": 0.6188250184059143, "learning_rate": 2.000866487461393e-06, "loss": 1.5187, "mean_token_accuracy": 0.622700423002243, "num_tokens": 3038630107.0, "step": 18129 }, { "entropy": 1.7043708562850952, "epoch": 1.9916783389635, "grad_norm": 0.7103232145309448, "learning_rate": 2.000844412600068e-06, "loss": 1.3381, "mean_token_accuracy": 0.6587745447953542, "num_tokens": 3038829736.0, "step": 18130 }, { "entropy": 1.6450840930143993, "epoch": 1.9917881958748729, "grad_norm": 0.8464440107345581, "learning_rate": 2.0008226225579614e-06, "loss": 1.2032, "mean_token_accuracy": 0.6772677054007848, "num_tokens": 3039029703.0, "step": 18131 }, { "entropy": 1.687016874551773, "epoch": 1.991898052786246, "grad_norm": 0.5829222202301025, "learning_rate": 2.0008011173357644e-06, "loss": 1.2675, "mean_token_accuracy": 0.6710990617672602, "num_tokens": 3039166171.0, "step": 18132 }, { "entropy": 1.750480592250824, "epoch": 1.9920079096976189, "grad_norm": 0.6543905138969421, "learning_rate": 2.0007798969341565e-06, "loss": 1.4109, "mean_token_accuracy": 0.6614688485860825, "num_tokens": 3039302322.0, "step": 18133 }, { "entropy": 1.7635838687419891, "epoch": 1.9921177666089918, "grad_norm": 0.582114577293396, "learning_rate": 2.0007589613538104e-06, "loss": 1.4239, "mean_token_accuracy": 0.6485844204823176, "num_tokens": 3039512616.0, "step": 18134 }, { "entropy": 1.6652332345644634, "epoch": 1.9922276235203649, "grad_norm": 0.7193676233291626, "learning_rate": 2.000738310595387e-06, "loss": 1.3869, "mean_token_accuracy": 0.6623914440472921, "num_tokens": 3039667577.0, "step": 18135 }, { "entropy": 1.6600812375545502, "epoch": 1.9923374804317375, "grad_norm": 0.6071199774742126, "learning_rate": 2.0007179446595414e-06, "loss": 1.3844, "mean_token_accuracy": 0.6484141399463018, "num_tokens": 3039865053.0, "step": 18136 }, { "entropy": 1.7320877810319264, "epoch": 1.9924473373431106, "grad_norm": 0.7393842339515686, "learning_rate": 2.0006978635469175e-06, "loss": 1.3568, "mean_token_accuracy": 0.6677570939064026, "num_tokens": 3040001768.0, "step": 18137 }, { "entropy": 1.694648305575053, "epoch": 1.9925571942544835, "grad_norm": 0.7653499245643616, "learning_rate": 2.000678067258151e-06, "loss": 1.4478, "mean_token_accuracy": 0.644600714246432, "num_tokens": 3040212474.0, "step": 18138 }, { "entropy": 1.7461239794890087, "epoch": 1.9926670511658564, "grad_norm": 0.6602531671524048, "learning_rate": 2.000658555793869e-06, "loss": 1.3958, "mean_token_accuracy": 0.6600701163212458, "num_tokens": 3040389094.0, "step": 18139 }, { "entropy": 1.7250058154265087, "epoch": 1.9927769080772295, "grad_norm": 0.7628946900367737, "learning_rate": 2.0006393291546883e-06, "loss": 1.3985, "mean_token_accuracy": 0.6515516539414724, "num_tokens": 3040533558.0, "step": 18140 }, { "entropy": 1.6692057152589161, "epoch": 1.9928867649886024, "grad_norm": 0.6552335619926453, "learning_rate": 2.0006203873412174e-06, "loss": 1.5968, "mean_token_accuracy": 0.6354374637206396, "num_tokens": 3040772004.0, "step": 18141 }, { "entropy": 1.7071496148904164, "epoch": 1.9929966218999753, "grad_norm": 0.5911110639572144, "learning_rate": 2.000601730354056e-06, "loss": 1.3592, "mean_token_accuracy": 0.6535786141951879, "num_tokens": 3040998379.0, "step": 18142 }, { "entropy": 1.7665246824423473, "epoch": 1.9931064788113482, "grad_norm": 0.699324369430542, "learning_rate": 2.000583358193795e-06, "loss": 1.4768, "mean_token_accuracy": 0.6513043691714605, "num_tokens": 3041134684.0, "step": 18143 }, { "entropy": 1.7260617713133495, "epoch": 1.993216335722721, "grad_norm": 0.7378421425819397, "learning_rate": 2.0005652708610145e-06, "loss": 1.3723, "mean_token_accuracy": 0.6644292175769806, "num_tokens": 3041295927.0, "step": 18144 }, { "entropy": 1.696333905061086, "epoch": 1.9933261926340942, "grad_norm": 0.6919421553611755, "learning_rate": 2.000547468356289e-06, "loss": 1.4383, "mean_token_accuracy": 0.6629768361647924, "num_tokens": 3041453675.0, "step": 18145 }, { "entropy": 1.7212949494520824, "epoch": 1.993436049545467, "grad_norm": 0.770926296710968, "learning_rate": 2.0005299506801808e-06, "loss": 1.4902, "mean_token_accuracy": 0.6531381358702978, "num_tokens": 3041633288.0, "step": 18146 }, { "entropy": 1.6915223002433777, "epoch": 1.99354590645684, "grad_norm": 0.7864291071891785, "learning_rate": 2.000512717833244e-06, "loss": 1.4262, "mean_token_accuracy": 0.6496833662192026, "num_tokens": 3041787947.0, "step": 18147 }, { "entropy": 1.6634085575739543, "epoch": 1.993655763368213, "grad_norm": 0.7216148376464844, "learning_rate": 2.0004957698160243e-06, "loss": 1.351, "mean_token_accuracy": 0.6673944791158041, "num_tokens": 3041922058.0, "step": 18148 }, { "entropy": 1.7852267622947693, "epoch": 1.9937656202795857, "grad_norm": 0.6994895339012146, "learning_rate": 2.0004791066290583e-06, "loss": 1.5275, "mean_token_accuracy": 0.648076981306076, "num_tokens": 3042114828.0, "step": 18149 }, { "entropy": 1.7273716727892559, "epoch": 1.9938754771909588, "grad_norm": 0.7003465890884399, "learning_rate": 2.000462728272874e-06, "loss": 1.3692, "mean_token_accuracy": 0.6554417014122009, "num_tokens": 3042260808.0, "step": 18150 }, { "entropy": 1.640130211909612, "epoch": 1.9939853341023317, "grad_norm": 0.7557557821273804, "learning_rate": 2.000446634747988e-06, "loss": 1.4792, "mean_token_accuracy": 0.6581311722596487, "num_tokens": 3042433542.0, "step": 18151 }, { "entropy": 1.6444950600465138, "epoch": 1.9940951910137046, "grad_norm": 0.6619048714637756, "learning_rate": 2.0004308260549116e-06, "loss": 1.4316, "mean_token_accuracy": 0.6499272088209788, "num_tokens": 3042630077.0, "step": 18152 }, { "entropy": 1.7205499112606049, "epoch": 1.9942050479250777, "grad_norm": 0.7403351664543152, "learning_rate": 2.0004153021941435e-06, "loss": 1.2772, "mean_token_accuracy": 0.6660661300023397, "num_tokens": 3042738444.0, "step": 18153 }, { "entropy": 1.642647961775462, "epoch": 1.9943149048364506, "grad_norm": 0.6520124673843384, "learning_rate": 2.0004000631661763e-06, "loss": 1.3813, "mean_token_accuracy": 0.6548943569262823, "num_tokens": 3042898607.0, "step": 18154 }, { "entropy": 1.762593497832616, "epoch": 1.9944247617478235, "grad_norm": 0.6629482507705688, "learning_rate": 2.0003851089714914e-06, "loss": 1.3936, "mean_token_accuracy": 0.6553806563218435, "num_tokens": 3043072996.0, "step": 18155 }, { "entropy": 1.6303976476192474, "epoch": 1.9945346186591963, "grad_norm": 0.6785465478897095, "learning_rate": 2.000370439610563e-06, "loss": 1.2432, "mean_token_accuracy": 0.6743980348110199, "num_tokens": 3043205963.0, "step": 18156 }, { "entropy": 1.694928377866745, "epoch": 1.9946444755705692, "grad_norm": 0.622810959815979, "learning_rate": 2.000356055083854e-06, "loss": 1.4332, "mean_token_accuracy": 0.6510835389296213, "num_tokens": 3043413561.0, "step": 18157 }, { "entropy": 1.6337503294150035, "epoch": 1.9947543324819423, "grad_norm": 0.5835052132606506, "learning_rate": 2.000341955391821e-06, "loss": 1.4437, "mean_token_accuracy": 0.646688754359881, "num_tokens": 3043625964.0, "step": 18158 }, { "entropy": 1.7825499673684437, "epoch": 1.9948641893933152, "grad_norm": 0.652887761592865, "learning_rate": 2.0003281405349095e-06, "loss": 1.6008, "mean_token_accuracy": 0.6128579080104828, "num_tokens": 3043865035.0, "step": 18159 }, { "entropy": 1.7153000434239705, "epoch": 1.994974046304688, "grad_norm": 0.6086410880088806, "learning_rate": 2.0003146105135573e-06, "loss": 1.1742, "mean_token_accuracy": 0.6773505012194315, "num_tokens": 3044061042.0, "step": 18160 }, { "entropy": 1.6450629631678264, "epoch": 1.9950839032160612, "grad_norm": 0.6745823621749878, "learning_rate": 2.0003013653281926e-06, "loss": 1.3636, "mean_token_accuracy": 0.6562095880508423, "num_tokens": 3044210757.0, "step": 18161 }, { "entropy": 1.7687378525733948, "epoch": 1.9951937601274339, "grad_norm": 0.7521758675575256, "learning_rate": 2.000288404979235e-06, "loss": 1.5204, "mean_token_accuracy": 0.6322930653889974, "num_tokens": 3044409106.0, "step": 18162 }, { "entropy": 1.7188294629255931, "epoch": 1.995303617038807, "grad_norm": 0.6091630458831787, "learning_rate": 2.0002757294670926e-06, "loss": 1.337, "mean_token_accuracy": 0.6657722691694895, "num_tokens": 3044596447.0, "step": 18163 }, { "entropy": 1.7012285093466442, "epoch": 1.9954134739501799, "grad_norm": 0.6440872550010681, "learning_rate": 2.0002633387921676e-06, "loss": 1.3507, "mean_token_accuracy": 0.6604795008897781, "num_tokens": 3044790314.0, "step": 18164 }, { "entropy": 1.7051705221335094, "epoch": 1.9955233308615528, "grad_norm": 0.7219937443733215, "learning_rate": 2.000251232954854e-06, "loss": 1.291, "mean_token_accuracy": 0.6619381904602051, "num_tokens": 3044914619.0, "step": 18165 }, { "entropy": 1.6888903081417084, "epoch": 1.9956331877729259, "grad_norm": 0.637630820274353, "learning_rate": 2.0002394119555326e-06, "loss": 1.389, "mean_token_accuracy": 0.6619627823432287, "num_tokens": 3045098000.0, "step": 18166 }, { "entropy": 1.7443795800209045, "epoch": 1.9957430446842988, "grad_norm": 0.6435023546218872, "learning_rate": 2.000227875794579e-06, "loss": 1.5764, "mean_token_accuracy": 0.620304211974144, "num_tokens": 3045323953.0, "step": 18167 }, { "entropy": 1.6719048420588176, "epoch": 1.9958529015956716, "grad_norm": 0.6005743741989136, "learning_rate": 2.0002166244723573e-06, "loss": 1.4482, "mean_token_accuracy": 0.653143455584844, "num_tokens": 3045542928.0, "step": 18168 }, { "entropy": 1.713512271642685, "epoch": 1.9959627585070445, "grad_norm": 0.7789661884307861, "learning_rate": 2.000205657989225e-06, "loss": 1.3143, "mean_token_accuracy": 0.6610560963551203, "num_tokens": 3045703434.0, "step": 18169 }, { "entropy": 1.7187098960081737, "epoch": 1.9960726154184174, "grad_norm": 0.6709672808647156, "learning_rate": 2.000194976345527e-06, "loss": 1.3616, "mean_token_accuracy": 0.6639542629321417, "num_tokens": 3045898367.0, "step": 18170 }, { "entropy": 1.7733473777770996, "epoch": 1.9961824723297905, "grad_norm": 0.7715442776679993, "learning_rate": 2.0001845795416034e-06, "loss": 1.4116, "mean_token_accuracy": 0.6565881470839182, "num_tokens": 3046044074.0, "step": 18171 }, { "entropy": 1.7301356891791027, "epoch": 1.9962923292411634, "grad_norm": 0.8577042818069458, "learning_rate": 2.0001744675777812e-06, "loss": 1.4882, "mean_token_accuracy": 0.6440813392400742, "num_tokens": 3046206087.0, "step": 18172 }, { "entropy": 1.6903728346029918, "epoch": 1.9964021861525363, "grad_norm": 0.5791963934898376, "learning_rate": 2.000164640454383e-06, "loss": 1.3326, "mean_token_accuracy": 0.661042665441831, "num_tokens": 3046378472.0, "step": 18173 }, { "entropy": 1.687019368012746, "epoch": 1.9965120430639094, "grad_norm": 0.6625068187713623, "learning_rate": 2.000155098171718e-06, "loss": 1.4011, "mean_token_accuracy": 0.6618767331043879, "num_tokens": 3046510726.0, "step": 18174 }, { "entropy": 1.759124368429184, "epoch": 1.996621899975282, "grad_norm": 0.7094929218292236, "learning_rate": 2.000145840730089e-06, "loss": 1.3934, "mean_token_accuracy": 0.6440586149692535, "num_tokens": 3046658513.0, "step": 18175 }, { "entropy": 1.691829909880956, "epoch": 1.9967317568866552, "grad_norm": 0.690881073474884, "learning_rate": 2.000136868129788e-06, "loss": 1.428, "mean_token_accuracy": 0.6513862907886505, "num_tokens": 3046862461.0, "step": 18176 }, { "entropy": 1.7729640205701191, "epoch": 1.996841613798028, "grad_norm": 0.7034747004508972, "learning_rate": 2.0001281803711007e-06, "loss": 1.358, "mean_token_accuracy": 0.6604643066724142, "num_tokens": 3047004414.0, "step": 18177 }, { "entropy": 1.694592813650767, "epoch": 1.996951470709401, "grad_norm": 0.6533588767051697, "learning_rate": 2.0001197774543004e-06, "loss": 1.3068, "mean_token_accuracy": 0.6626057177782059, "num_tokens": 3047148671.0, "step": 18178 }, { "entropy": 1.7134621640046437, "epoch": 1.997061327620774, "grad_norm": 0.5928846597671509, "learning_rate": 2.000111659379654e-06, "loss": 1.4752, "mean_token_accuracy": 0.6483317414919535, "num_tokens": 3047315839.0, "step": 18179 }, { "entropy": 1.7521715660889943, "epoch": 1.997171184532147, "grad_norm": 0.7548753619194031, "learning_rate": 2.000103826147418e-06, "loss": 1.3642, "mean_token_accuracy": 0.6631999164819717, "num_tokens": 3047443079.0, "step": 18180 }, { "entropy": 1.747659554084142, "epoch": 1.9972810414435198, "grad_norm": 0.7508565187454224, "learning_rate": 2.0000962777578404e-06, "loss": 1.3458, "mean_token_accuracy": 0.659741202990214, "num_tokens": 3047566548.0, "step": 18181 }, { "entropy": 1.700925201177597, "epoch": 1.997390898354893, "grad_norm": 0.7700899839401245, "learning_rate": 2.0000890142111605e-06, "loss": 1.1955, "mean_token_accuracy": 0.6900986135005951, "num_tokens": 3047664506.0, "step": 18182 }, { "entropy": 1.6882247428099315, "epoch": 1.9975007552662656, "grad_norm": 0.7430019378662109, "learning_rate": 2.0000820355076072e-06, "loss": 1.4729, "mean_token_accuracy": 0.6475649029016495, "num_tokens": 3047869269.0, "step": 18183 }, { "entropy": 1.6784042815367382, "epoch": 1.9976106121776387, "grad_norm": 0.618037760257721, "learning_rate": 2.000075341647402e-06, "loss": 1.3216, "mean_token_accuracy": 0.6691079139709473, "num_tokens": 3048027510.0, "step": 18184 }, { "entropy": 1.718860884507497, "epoch": 1.9977204690890116, "grad_norm": 0.7390254139900208, "learning_rate": 2.0000689326307567e-06, "loss": 1.2494, "mean_token_accuracy": 0.6714848627646764, "num_tokens": 3048152184.0, "step": 18185 }, { "entropy": 1.6419156392415364, "epoch": 1.9978303260003845, "grad_norm": 0.7709030508995056, "learning_rate": 2.000062808457875e-06, "loss": 1.3404, "mean_token_accuracy": 0.6696422000726064, "num_tokens": 3048287917.0, "step": 18186 }, { "entropy": 1.68554683526357, "epoch": 1.9979401829117576, "grad_norm": 0.591776967048645, "learning_rate": 2.0000569691289495e-06, "loss": 1.4674, "mean_token_accuracy": 0.6471003343661627, "num_tokens": 3048539631.0, "step": 18187 }, { "entropy": 1.698345571756363, "epoch": 1.9980500398231302, "grad_norm": 0.7942776083946228, "learning_rate": 2.0000514146441654e-06, "loss": 1.5863, "mean_token_accuracy": 0.6430133432149887, "num_tokens": 3048720202.0, "step": 18188 }, { "entropy": 1.6566093067328136, "epoch": 1.9981598967345033, "grad_norm": 0.6725602746009827, "learning_rate": 2.0000461450036985e-06, "loss": 1.2891, "mean_token_accuracy": 0.6772060046593348, "num_tokens": 3048914194.0, "step": 18189 }, { "entropy": 1.7242831885814667, "epoch": 1.9982697536458762, "grad_norm": 0.6108221411705017, "learning_rate": 2.0000411602077163e-06, "loss": 1.461, "mean_token_accuracy": 0.6506116489569346, "num_tokens": 3049099713.0, "step": 18190 }, { "entropy": 1.7347515324751537, "epoch": 1.998379610557249, "grad_norm": 0.7071492075920105, "learning_rate": 2.0000364602563753e-06, "loss": 1.3625, "mean_token_accuracy": 0.6629375716050466, "num_tokens": 3049259887.0, "step": 18191 }, { "entropy": 1.7372618913650513, "epoch": 1.9984894674686222, "grad_norm": 0.6914885640144348, "learning_rate": 2.000032045149825e-06, "loss": 1.4312, "mean_token_accuracy": 0.650190552075704, "num_tokens": 3049424470.0, "step": 18192 }, { "entropy": 1.7188272774219513, "epoch": 1.998599324379995, "grad_norm": 0.7079371213912964, "learning_rate": 2.0000279148882053e-06, "loss": 1.6254, "mean_token_accuracy": 0.6389360229174296, "num_tokens": 3049646174.0, "step": 18193 }, { "entropy": 1.6750788291295369, "epoch": 1.998709181291368, "grad_norm": 0.7202898859977722, "learning_rate": 2.000024069471646e-06, "loss": 1.2239, "mean_token_accuracy": 0.6829964170853297, "num_tokens": 3049800566.0, "step": 18194 }, { "entropy": 1.686879813671112, "epoch": 1.998819038202741, "grad_norm": 0.7090538740158081, "learning_rate": 2.0000205089002696e-06, "loss": 1.3509, "mean_token_accuracy": 0.67240938047568, "num_tokens": 3049949036.0, "step": 18195 }, { "entropy": 1.6966327925523121, "epoch": 1.9989288951141138, "grad_norm": 0.6627102494239807, "learning_rate": 2.000017233174189e-06, "loss": 1.3099, "mean_token_accuracy": 0.6580508897701899, "num_tokens": 3050113118.0, "step": 18196 }, { "entropy": 1.7072912355264027, "epoch": 1.9990387520254869, "grad_norm": 0.689150869846344, "learning_rate": 2.0000142422935068e-06, "loss": 1.4376, "mean_token_accuracy": 0.6440875480572382, "num_tokens": 3050310659.0, "step": 18197 }, { "entropy": 1.6617674827575684, "epoch": 1.9991486089368598, "grad_norm": 0.6578044295310974, "learning_rate": 2.000011536258319e-06, "loss": 1.3804, "mean_token_accuracy": 0.6541756838560104, "num_tokens": 3050446831.0, "step": 18198 }, { "entropy": 1.661964366833369, "epoch": 1.9992584658482326, "grad_norm": 0.7953215837478638, "learning_rate": 2.00000911506871e-06, "loss": 1.3325, "mean_token_accuracy": 0.6677337139844894, "num_tokens": 3050595454.0, "step": 18199 }, { "entropy": 1.7314130862553914, "epoch": 1.9993683227596057, "grad_norm": 0.6476884484291077, "learning_rate": 2.0000069787247574e-06, "loss": 1.282, "mean_token_accuracy": 0.6776565164327621, "num_tokens": 3050722494.0, "step": 18200 }, { "entropy": 1.7134939829508464, "epoch": 1.9994781796709784, "grad_norm": 0.7247772812843323, "learning_rate": 2.0000051272265275e-06, "loss": 1.4396, "mean_token_accuracy": 0.6534018168846766, "num_tokens": 3050881167.0, "step": 18201 }, { "entropy": 1.6716128786404927, "epoch": 1.9995880365823515, "grad_norm": 0.6074260473251343, "learning_rate": 2.000003560574081e-06, "loss": 1.3665, "mean_token_accuracy": 0.6494092990954717, "num_tokens": 3051103681.0, "step": 18202 }, { "entropy": 1.7386046648025513, "epoch": 1.9996978934937244, "grad_norm": 0.7610368728637695, "learning_rate": 2.000002278767466e-06, "loss": 1.4719, "mean_token_accuracy": 0.6267879009246826, "num_tokens": 3051336600.0, "step": 18203 }, { "entropy": 1.6595459183057149, "epoch": 1.9998077504050973, "grad_norm": 1.1902509927749634, "learning_rate": 2.000001281806723e-06, "loss": 1.319, "mean_token_accuracy": 0.6607537617286047, "num_tokens": 3051506942.0, "step": 18204 }, { "entropy": 1.6532653272151947, "epoch": 1.9999176073164704, "grad_norm": 0.6938499212265015, "learning_rate": 2.000000569691885e-06, "loss": 1.3618, "mean_token_accuracy": 0.6603017499049505, "num_tokens": 3051631616.0, "step": 18205 }, { "entropy": 1.7311393817265828, "epoch": 2.0, "grad_norm": 0.7615280747413635, "learning_rate": 2.0000001424229725e-06, "loss": 1.4697, "mean_token_accuracy": 0.6398886442184448, "num_tokens": 3051740039.0, "step": 18206 }, { "epoch": 2.0, "step": 18206, "total_flos": 3.1404917922249834e+19, "train_loss": 1.4016287254459527, "train_runtime": 304386.5826, "train_samples_per_second": 7.177, "train_steps_per_second": 0.06 } ], "logging_steps": 1, "max_steps": 18206, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.1404917922249834e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }