{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 4.963671128107075, "eval_steps": 500, "global_step": 650, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0076481835564053535, "grad_norm": 0.9770538524861805, "learning_rate": 1e-05, "loss": 0.5557, "mean_token_accuracy": 0.8261559456586838, "num_tokens": 576000.0, "step": 1 }, { "epoch": 0.015296367112810707, "grad_norm": 1.0016201329962942, "learning_rate": 9.999996405511328e-06, "loss": 0.5724, "mean_token_accuracy": 0.8204647451639175, "num_tokens": 1152000.0, "step": 2 }, { "epoch": 0.022944550669216062, "grad_norm": 0.8796352845621198, "learning_rate": 9.999985622050476e-06, "loss": 0.5616, "mean_token_accuracy": 0.8209040015935898, "num_tokens": 1728000.0, "step": 3 }, { "epoch": 0.030592734225621414, "grad_norm": 0.7858551577048387, "learning_rate": 9.999967649632953e-06, "loss": 0.583, "mean_token_accuracy": 0.8155530691146851, "num_tokens": 2304000.0, "step": 4 }, { "epoch": 0.03824091778202677, "grad_norm": 0.3692234525413837, "learning_rate": 9.999942488284598e-06, "loss": 0.5371, "mean_token_accuracy": 0.8288730829954147, "num_tokens": 2880000.0, "step": 5 }, { "epoch": 0.045889101338432124, "grad_norm": 0.337938001083141, "learning_rate": 9.999910138041584e-06, "loss": 0.5252, "mean_token_accuracy": 0.8318870887160301, "num_tokens": 3456000.0, "step": 6 }, { "epoch": 0.05353728489483748, "grad_norm": 0.3249029142805452, "learning_rate": 9.99987059895043e-06, "loss": 0.5661, "mean_token_accuracy": 0.8197997808456421, "num_tokens": 4032000.0, "step": 7 }, { "epoch": 0.06118546845124283, "grad_norm": 0.6141658703983041, "learning_rate": 9.999823871067981e-06, "loss": 0.5406, "mean_token_accuracy": 0.8242708295583725, "num_tokens": 4585945.0, "step": 8 }, { "epoch": 0.06883365200764818, "grad_norm": 0.7987265960698617, "learning_rate": 9.999769954461425e-06, "loss": 0.5426, "mean_token_accuracy": 0.8265986815094948, "num_tokens": 5161945.0, "step": 9 }, { "epoch": 0.07648183556405354, "grad_norm": 0.7395483390721038, "learning_rate": 9.999708849208279e-06, "loss": 0.5517, "mean_token_accuracy": 0.8210133761167526, "num_tokens": 5737945.0, "step": 10 }, { "epoch": 0.0841300191204589, "grad_norm": 0.6041875583696124, "learning_rate": 9.999640555396404e-06, "loss": 0.5331, "mean_token_accuracy": 0.8267983421683311, "num_tokens": 6313945.0, "step": 11 }, { "epoch": 0.09177820267686425, "grad_norm": 0.45467723125722653, "learning_rate": 9.999565073123991e-06, "loss": 0.5164, "mean_token_accuracy": 0.8316023647785187, "num_tokens": 6889945.0, "step": 12 }, { "epoch": 0.0994263862332696, "grad_norm": 0.32791299547939345, "learning_rate": 9.999482402499569e-06, "loss": 0.5475, "mean_token_accuracy": 0.8233416005969048, "num_tokens": 7465945.0, "step": 13 }, { "epoch": 0.10707456978967496, "grad_norm": 0.23684367528291977, "learning_rate": 9.999392543642e-06, "loss": 0.5361, "mean_token_accuracy": 0.8290345445275307, "num_tokens": 8041945.0, "step": 14 }, { "epoch": 0.1147227533460803, "grad_norm": 0.2528394745096081, "learning_rate": 9.999295496680482e-06, "loss": 0.5142, "mean_token_accuracy": 0.8332222104072571, "num_tokens": 8617945.0, "step": 15 }, { "epoch": 0.12237093690248566, "grad_norm": 0.3264470002400492, "learning_rate": 9.99919126175455e-06, "loss": 0.5682, "mean_token_accuracy": 0.8158846944570541, "num_tokens": 9193945.0, "step": 16 }, { "epoch": 0.13001912045889102, "grad_norm": 0.37843802091227036, "learning_rate": 9.999079839014074e-06, "loss": 0.4766, "mean_token_accuracy": 0.8428528308868408, "num_tokens": 9769945.0, "step": 17 }, { "epoch": 0.13766730401529637, "grad_norm": 0.3930284951759443, "learning_rate": 9.998961228619255e-06, "loss": 0.5433, "mean_token_accuracy": 0.8239961341023445, "num_tokens": 10345945.0, "step": 18 }, { "epoch": 0.14531548757170173, "grad_norm": 0.38145764149019024, "learning_rate": 9.99883543074063e-06, "loss": 0.5147, "mean_token_accuracy": 0.8341441303491592, "num_tokens": 10921945.0, "step": 19 }, { "epoch": 0.15296367112810708, "grad_norm": 0.37363877853538846, "learning_rate": 9.998702445559071e-06, "loss": 0.5283, "mean_token_accuracy": 0.8283886983990669, "num_tokens": 11497945.0, "step": 20 }, { "epoch": 0.16061185468451242, "grad_norm": 0.34011521406149564, "learning_rate": 9.998562273265786e-06, "loss": 0.518, "mean_token_accuracy": 0.831451490521431, "num_tokens": 12060592.0, "step": 21 }, { "epoch": 0.1682600382409178, "grad_norm": 0.2676333142200147, "learning_rate": 9.99841491406231e-06, "loss": 0.5285, "mean_token_accuracy": 0.8288713470101357, "num_tokens": 12636592.0, "step": 22 }, { "epoch": 0.17590822179732313, "grad_norm": 0.24262999668848875, "learning_rate": 9.99826036816052e-06, "loss": 0.5199, "mean_token_accuracy": 0.8309321850538254, "num_tokens": 13212592.0, "step": 23 }, { "epoch": 0.1835564053537285, "grad_norm": 0.21796704803826206, "learning_rate": 9.998098635782616e-06, "loss": 0.5281, "mean_token_accuracy": 0.8288609310984612, "num_tokens": 13788592.0, "step": 24 }, { "epoch": 0.19120458891013384, "grad_norm": 0.20760778914940356, "learning_rate": 9.997929717161142e-06, "loss": 0.5368, "mean_token_accuracy": 0.8261212259531021, "num_tokens": 14364592.0, "step": 25 }, { "epoch": 0.1988527724665392, "grad_norm": 0.25167231093180664, "learning_rate": 9.997753612538963e-06, "loss": 0.4973, "mean_token_accuracy": 0.8385418727993965, "num_tokens": 14940592.0, "step": 26 }, { "epoch": 0.20650095602294455, "grad_norm": 0.2755218609160029, "learning_rate": 9.997570322169285e-06, "loss": 0.5221, "mean_token_accuracy": 0.8294894322752953, "num_tokens": 15516592.0, "step": 27 }, { "epoch": 0.21414913957934992, "grad_norm": 0.2808823032638946, "learning_rate": 9.99737984631564e-06, "loss": 0.5264, "mean_token_accuracy": 0.8296040147542953, "num_tokens": 16092592.0, "step": 28 }, { "epoch": 0.22179732313575526, "grad_norm": 0.2583300789913383, "learning_rate": 9.997182185251896e-06, "loss": 0.5098, "mean_token_accuracy": 0.8332170099020004, "num_tokens": 16668592.0, "step": 29 }, { "epoch": 0.2294455066921606, "grad_norm": 0.25870516586563114, "learning_rate": 9.996977339262247e-06, "loss": 0.4964, "mean_token_accuracy": 0.8371130004525185, "num_tokens": 17244592.0, "step": 30 }, { "epoch": 0.23709369024856597, "grad_norm": 0.2296798543288584, "learning_rate": 9.996765308641218e-06, "loss": 0.5203, "mean_token_accuracy": 0.8292376697063446, "num_tokens": 17820592.0, "step": 31 }, { "epoch": 0.2447418738049713, "grad_norm": 0.20940808224809382, "learning_rate": 9.996546093693671e-06, "loss": 0.5297, "mean_token_accuracy": 0.828244574368, "num_tokens": 18396592.0, "step": 32 }, { "epoch": 0.25239005736137665, "grad_norm": 0.15777243777113475, "learning_rate": 9.996319694734787e-06, "loss": 0.4841, "mean_token_accuracy": 0.8402329161763191, "num_tokens": 18972592.0, "step": 33 }, { "epoch": 0.26003824091778205, "grad_norm": 0.17599866369775718, "learning_rate": 9.996086112090085e-06, "loss": 0.5302, "mean_token_accuracy": 0.8264007493853569, "num_tokens": 19548592.0, "step": 34 }, { "epoch": 0.2676864244741874, "grad_norm": 0.197575035953607, "learning_rate": 9.995845346095406e-06, "loss": 0.5239, "mean_token_accuracy": 0.8283487483859062, "num_tokens": 20124592.0, "step": 35 }, { "epoch": 0.27533460803059273, "grad_norm": 0.18551509540101133, "learning_rate": 9.995597397096923e-06, "loss": 0.5166, "mean_token_accuracy": 0.8316527083516121, "num_tokens": 20700592.0, "step": 36 }, { "epoch": 0.2829827915869981, "grad_norm": 0.18027752934372468, "learning_rate": 9.995342265451138e-06, "loss": 0.4946, "mean_token_accuracy": 0.8379411548376083, "num_tokens": 21276592.0, "step": 37 }, { "epoch": 0.29063097514340347, "grad_norm": 0.1783882139249774, "learning_rate": 9.995079951524876e-06, "loss": 0.5298, "mean_token_accuracy": 0.8277133032679558, "num_tokens": 21852592.0, "step": 38 }, { "epoch": 0.2982791586998088, "grad_norm": 0.1617380846519598, "learning_rate": 9.994810455695291e-06, "loss": 0.481, "mean_token_accuracy": 0.8421861305832863, "num_tokens": 22428592.0, "step": 39 }, { "epoch": 0.30592734225621415, "grad_norm": 0.17083879189298595, "learning_rate": 9.994533778349867e-06, "loss": 0.5269, "mean_token_accuracy": 0.828789733350277, "num_tokens": 23004592.0, "step": 40 }, { "epoch": 0.3135755258126195, "grad_norm": 0.1509185021498891, "learning_rate": 9.994249919886402e-06, "loss": 0.5004, "mean_token_accuracy": 0.8369046598672867, "num_tokens": 23580592.0, "step": 41 }, { "epoch": 0.32122370936902483, "grad_norm": 0.1511275570122673, "learning_rate": 9.993958880713033e-06, "loss": 0.5094, "mean_token_accuracy": 0.8330798596143723, "num_tokens": 24156592.0, "step": 42 }, { "epoch": 0.32887189292543023, "grad_norm": 0.16474397925550863, "learning_rate": 9.99366066124821e-06, "loss": 0.5083, "mean_token_accuracy": 0.8331527784466743, "num_tokens": 24732592.0, "step": 43 }, { "epoch": 0.3365200764818356, "grad_norm": 0.1510059303541963, "learning_rate": 9.993355261920714e-06, "loss": 0.4811, "mean_token_accuracy": 0.8414968326687813, "num_tokens": 25306492.0, "step": 44 }, { "epoch": 0.3441682600382409, "grad_norm": 0.2206211577243794, "learning_rate": 9.993042683169647e-06, "loss": 0.5145, "mean_token_accuracy": 0.8324548229575157, "num_tokens": 25882492.0, "step": 45 }, { "epoch": 0.35181644359464626, "grad_norm": 0.19826261020124922, "learning_rate": 9.992722925444434e-06, "loss": 0.525, "mean_token_accuracy": 0.8288452923297882, "num_tokens": 26458492.0, "step": 46 }, { "epoch": 0.35946462715105165, "grad_norm": 0.1539436477198697, "learning_rate": 9.992395989204818e-06, "loss": 0.531, "mean_token_accuracy": 0.8279685229063034, "num_tokens": 27034492.0, "step": 47 }, { "epoch": 0.367112810707457, "grad_norm": 0.14749502264744746, "learning_rate": 9.992061874920869e-06, "loss": 0.5213, "mean_token_accuracy": 0.8304547518491745, "num_tokens": 27610492.0, "step": 48 }, { "epoch": 0.37476099426386233, "grad_norm": 0.15532363519268472, "learning_rate": 9.991720583072975e-06, "loss": 0.4906, "mean_token_accuracy": 0.8390665575861931, "num_tokens": 28178987.0, "step": 49 }, { "epoch": 0.3824091778202677, "grad_norm": 0.23556356163528056, "learning_rate": 9.991372114151843e-06, "loss": 0.5353, "mean_token_accuracy": 0.826218456029892, "num_tokens": 28754987.0, "step": 50 }, { "epoch": 0.390057361376673, "grad_norm": 0.15385965560459058, "learning_rate": 9.9910164686585e-06, "loss": 0.5187, "mean_token_accuracy": 0.8308228179812431, "num_tokens": 29330987.0, "step": 51 }, { "epoch": 0.3977055449330784, "grad_norm": 0.1436263981933282, "learning_rate": 9.990653647104292e-06, "loss": 0.5153, "mean_token_accuracy": 0.8325798138976097, "num_tokens": 29906987.0, "step": 52 }, { "epoch": 0.40535372848948376, "grad_norm": 0.14302536490724446, "learning_rate": 9.990283650010883e-06, "loss": 0.4872, "mean_token_accuracy": 0.8397606834769249, "num_tokens": 30482987.0, "step": 53 }, { "epoch": 0.4130019120458891, "grad_norm": 0.17140284653127127, "learning_rate": 9.98990647791025e-06, "loss": 0.5318, "mean_token_accuracy": 0.8275379464030266, "num_tokens": 31058987.0, "step": 54 }, { "epoch": 0.42065009560229444, "grad_norm": 0.15486894690765854, "learning_rate": 9.989522131344693e-06, "loss": 0.462, "mean_token_accuracy": 0.8481464609503746, "num_tokens": 31634987.0, "step": 55 }, { "epoch": 0.42829827915869984, "grad_norm": 0.14715377366317914, "learning_rate": 9.989130610866822e-06, "loss": 0.4978, "mean_token_accuracy": 0.8358698934316635, "num_tokens": 32210987.0, "step": 56 }, { "epoch": 0.4359464627151052, "grad_norm": 0.133253236290824, "learning_rate": 9.988731917039564e-06, "loss": 0.5115, "mean_token_accuracy": 0.8322514817118645, "num_tokens": 32784624.0, "step": 57 }, { "epoch": 0.4435946462715105, "grad_norm": 0.1424854004952171, "learning_rate": 9.988326050436158e-06, "loss": 0.5227, "mean_token_accuracy": 0.8293939307332039, "num_tokens": 33360624.0, "step": 58 }, { "epoch": 0.45124282982791586, "grad_norm": 0.14122226382716493, "learning_rate": 9.987913011640157e-06, "loss": 0.5248, "mean_token_accuracy": 0.8290501683950424, "num_tokens": 33936624.0, "step": 59 }, { "epoch": 0.4588910133843212, "grad_norm": 0.13993101979440462, "learning_rate": 9.987492801245431e-06, "loss": 0.4793, "mean_token_accuracy": 0.8423371836543083, "num_tokens": 34512624.0, "step": 60 }, { "epoch": 0.4665391969407266, "grad_norm": 0.14632054172475426, "learning_rate": 9.98706541985615e-06, "loss": 0.5469, "mean_token_accuracy": 0.821442224085331, "num_tokens": 35074731.0, "step": 61 }, { "epoch": 0.47418738049713194, "grad_norm": 0.1400105720290886, "learning_rate": 9.986630868086804e-06, "loss": 0.5063, "mean_token_accuracy": 0.8351511061191559, "num_tokens": 35650731.0, "step": 62 }, { "epoch": 0.4818355640535373, "grad_norm": 0.14863573475989134, "learning_rate": 9.986189146562191e-06, "loss": 0.5222, "mean_token_accuracy": 0.8316596522927284, "num_tokens": 36226731.0, "step": 63 }, { "epoch": 0.4894837476099426, "grad_norm": 0.14605144207294682, "learning_rate": 9.985740255917413e-06, "loss": 0.4925, "mean_token_accuracy": 0.8410836607217789, "num_tokens": 36802731.0, "step": 64 }, { "epoch": 0.497131931166348, "grad_norm": 0.1357927018597777, "learning_rate": 9.985284196797884e-06, "loss": 0.5007, "mean_token_accuracy": 0.835041731595993, "num_tokens": 37378731.0, "step": 65 }, { "epoch": 0.5047801147227533, "grad_norm": 0.16344701399255454, "learning_rate": 9.984820969859326e-06, "loss": 0.5519, "mean_token_accuracy": 0.8199577778577805, "num_tokens": 37954731.0, "step": 66 }, { "epoch": 0.5124282982791587, "grad_norm": 0.13295009461309124, "learning_rate": 9.984350575767763e-06, "loss": 0.4968, "mean_token_accuracy": 0.8373057246208191, "num_tokens": 38530731.0, "step": 67 }, { "epoch": 0.5200764818355641, "grad_norm": 0.1316301231700835, "learning_rate": 9.983873015199524e-06, "loss": 0.4541, "mean_token_accuracy": 0.8496673479676247, "num_tokens": 39106731.0, "step": 68 }, { "epoch": 0.5277246653919694, "grad_norm": 0.1381202418270699, "learning_rate": 9.983388288841246e-06, "loss": 0.5195, "mean_token_accuracy": 0.8305971175432205, "num_tokens": 39682731.0, "step": 69 }, { "epoch": 0.5353728489483748, "grad_norm": 0.13415757088597044, "learning_rate": 9.982896397389866e-06, "loss": 0.4641, "mean_token_accuracy": 0.8476342782378197, "num_tokens": 40258731.0, "step": 70 }, { "epoch": 0.5430210325047801, "grad_norm": 0.13977158090215638, "learning_rate": 9.98239734155262e-06, "loss": 0.5204, "mean_token_accuracy": 0.8300554230809212, "num_tokens": 40834731.0, "step": 71 }, { "epoch": 0.5506692160611855, "grad_norm": 0.1414811559776876, "learning_rate": 9.981891122047052e-06, "loss": 0.5039, "mean_token_accuracy": 0.8352587595582008, "num_tokens": 41410731.0, "step": 72 }, { "epoch": 0.5583173996175909, "grad_norm": 0.14675842034623368, "learning_rate": 9.981377739601002e-06, "loss": 0.5249, "mean_token_accuracy": 0.8289755135774612, "num_tokens": 41986731.0, "step": 73 }, { "epoch": 0.5659655831739961, "grad_norm": 0.1422646453200256, "learning_rate": 9.980857194952605e-06, "loss": 0.528, "mean_token_accuracy": 0.8280744329094887, "num_tokens": 42562731.0, "step": 74 }, { "epoch": 0.5736137667304015, "grad_norm": 0.1428993249729862, "learning_rate": 9.980329488850303e-06, "loss": 0.5147, "mean_token_accuracy": 0.8318679928779602, "num_tokens": 43138731.0, "step": 75 }, { "epoch": 0.5812619502868069, "grad_norm": 0.13904745870216947, "learning_rate": 9.979794622052825e-06, "loss": 0.5101, "mean_token_accuracy": 0.8323385044932365, "num_tokens": 43714731.0, "step": 76 }, { "epoch": 0.5889101338432122, "grad_norm": 0.1408937048587796, "learning_rate": 9.979252595329204e-06, "loss": 0.4874, "mean_token_accuracy": 0.8402296751737595, "num_tokens": 44267402.0, "step": 77 }, { "epoch": 0.5965583173996176, "grad_norm": 0.14639439502500298, "learning_rate": 9.97870340945876e-06, "loss": 0.5191, "mean_token_accuracy": 0.8298158273100853, "num_tokens": 44843402.0, "step": 78 }, { "epoch": 0.6042065009560229, "grad_norm": 0.14225383180098447, "learning_rate": 9.978147065231114e-06, "loss": 0.501, "mean_token_accuracy": 0.8345382362604141, "num_tokens": 45419402.0, "step": 79 }, { "epoch": 0.6118546845124283, "grad_norm": 0.15165811102237464, "learning_rate": 9.97758356344617e-06, "loss": 0.5053, "mean_token_accuracy": 0.8332482650876045, "num_tokens": 45995402.0, "step": 80 }, { "epoch": 0.6195028680688337, "grad_norm": 0.13839520774785488, "learning_rate": 9.977012904914133e-06, "loss": 0.5085, "mean_token_accuracy": 0.8349983245134354, "num_tokens": 46571402.0, "step": 81 }, { "epoch": 0.627151051625239, "grad_norm": 0.14712644887933124, "learning_rate": 9.97643509045549e-06, "loss": 0.5471, "mean_token_accuracy": 0.8224283754825592, "num_tokens": 47147402.0, "step": 82 }, { "epoch": 0.6347992351816444, "grad_norm": 0.13934749804071309, "learning_rate": 9.975850120901023e-06, "loss": 0.5107, "mean_token_accuracy": 0.832944430410862, "num_tokens": 47723402.0, "step": 83 }, { "epoch": 0.6424474187380497, "grad_norm": 0.14184064832659418, "learning_rate": 9.975257997091795e-06, "loss": 0.5019, "mean_token_accuracy": 0.8352049514651299, "num_tokens": 48299402.0, "step": 84 }, { "epoch": 0.6500956022944551, "grad_norm": 0.1592732332566689, "learning_rate": 9.974658719879163e-06, "loss": 0.5118, "mean_token_accuracy": 0.8321266919374466, "num_tokens": 48875402.0, "step": 85 }, { "epoch": 0.6577437858508605, "grad_norm": 0.13974194258972175, "learning_rate": 9.97405229012476e-06, "loss": 0.5183, "mean_token_accuracy": 0.8313193619251251, "num_tokens": 49451402.0, "step": 86 }, { "epoch": 0.6653919694072657, "grad_norm": 0.13246562428620298, "learning_rate": 9.973438708700513e-06, "loss": 0.4767, "mean_token_accuracy": 0.843241736292839, "num_tokens": 50027402.0, "step": 87 }, { "epoch": 0.6730401529636711, "grad_norm": 0.16684991357812562, "learning_rate": 9.972817976488623e-06, "loss": 0.4971, "mean_token_accuracy": 0.8372588530182838, "num_tokens": 50603402.0, "step": 88 }, { "epoch": 0.6806883365200764, "grad_norm": 0.13340574719306686, "learning_rate": 9.972190094381578e-06, "loss": 0.4949, "mean_token_accuracy": 0.8363786041736603, "num_tokens": 51179402.0, "step": 89 }, { "epoch": 0.6883365200764818, "grad_norm": 0.14359639069142585, "learning_rate": 9.971555063282145e-06, "loss": 0.4855, "mean_token_accuracy": 0.8407451063394547, "num_tokens": 51755402.0, "step": 90 }, { "epoch": 0.6959847036328872, "grad_norm": 0.1465843545803842, "learning_rate": 9.970912884103365e-06, "loss": 0.5498, "mean_token_accuracy": 0.8212963715195656, "num_tokens": 52331402.0, "step": 91 }, { "epoch": 0.7036328871892925, "grad_norm": 0.13373715318944093, "learning_rate": 9.970263557768565e-06, "loss": 0.5128, "mean_token_accuracy": 0.8318002820014954, "num_tokens": 52907402.0, "step": 92 }, { "epoch": 0.7112810707456979, "grad_norm": 0.1425977955340504, "learning_rate": 9.96960708521134e-06, "loss": 0.4922, "mean_token_accuracy": 0.8385525420308113, "num_tokens": 53473097.0, "step": 93 }, { "epoch": 0.7189292543021033, "grad_norm": 0.1414493480461768, "learning_rate": 9.968943467375563e-06, "loss": 0.5015, "mean_token_accuracy": 0.8355400264263153, "num_tokens": 54049097.0, "step": 94 }, { "epoch": 0.7265774378585086, "grad_norm": 0.13127380227428548, "learning_rate": 9.968272705215382e-06, "loss": 0.4915, "mean_token_accuracy": 0.8390123769640923, "num_tokens": 54609292.0, "step": 95 }, { "epoch": 0.734225621414914, "grad_norm": 0.13467129663035832, "learning_rate": 9.967594799695218e-06, "loss": 0.4794, "mean_token_accuracy": 0.8427561372518539, "num_tokens": 55170798.0, "step": 96 }, { "epoch": 0.7418738049713193, "grad_norm": 0.13621387326515388, "learning_rate": 9.966909751789758e-06, "loss": 0.487, "mean_token_accuracy": 0.8411270678043365, "num_tokens": 55746798.0, "step": 97 }, { "epoch": 0.7495219885277247, "grad_norm": 0.14264410268094876, "learning_rate": 9.96621756248396e-06, "loss": 0.4951, "mean_token_accuracy": 0.8365452662110329, "num_tokens": 56322798.0, "step": 98 }, { "epoch": 0.7571701720841301, "grad_norm": 0.13879533507650615, "learning_rate": 9.965518232773052e-06, "loss": 0.5045, "mean_token_accuracy": 0.8338455036282539, "num_tokens": 56898798.0, "step": 99 }, { "epoch": 0.7648183556405354, "grad_norm": 0.13281867848059528, "learning_rate": 9.964811763662528e-06, "loss": 0.4848, "mean_token_accuracy": 0.8396026864647865, "num_tokens": 57474798.0, "step": 100 }, { "epoch": 0.7724665391969407, "grad_norm": 0.14634204976216025, "learning_rate": 9.964098156168143e-06, "loss": 0.5432, "mean_token_accuracy": 0.822074182331562, "num_tokens": 58050798.0, "step": 101 }, { "epoch": 0.780114722753346, "grad_norm": 0.14047489451950268, "learning_rate": 9.963377411315922e-06, "loss": 0.5234, "mean_token_accuracy": 0.829129159450531, "num_tokens": 58625739.0, "step": 102 }, { "epoch": 0.7877629063097514, "grad_norm": 0.14274210766707315, "learning_rate": 9.962649530142147e-06, "loss": 0.5335, "mean_token_accuracy": 0.8258555829524994, "num_tokens": 59201739.0, "step": 103 }, { "epoch": 0.7954110898661568, "grad_norm": 0.14094212771766002, "learning_rate": 9.961914513693362e-06, "loss": 0.5211, "mean_token_accuracy": 0.8285171613097191, "num_tokens": 59777739.0, "step": 104 }, { "epoch": 0.8030592734225621, "grad_norm": 0.13762839208797772, "learning_rate": 9.96117236302637e-06, "loss": 0.5069, "mean_token_accuracy": 0.8346649780869484, "num_tokens": 60353739.0, "step": 105 }, { "epoch": 0.8107074569789675, "grad_norm": 0.1465369564636583, "learning_rate": 9.960423079208235e-06, "loss": 0.4612, "mean_token_accuracy": 0.8477652445435524, "num_tokens": 60907009.0, "step": 106 }, { "epoch": 0.8183556405353728, "grad_norm": 0.14637852098727955, "learning_rate": 9.959666663316269e-06, "loss": 0.5497, "mean_token_accuracy": 0.8230603337287903, "num_tokens": 61483009.0, "step": 107 }, { "epoch": 0.8260038240917782, "grad_norm": 0.1347219196728508, "learning_rate": 9.958903116438049e-06, "loss": 0.4858, "mean_token_accuracy": 0.841705210506916, "num_tokens": 62059009.0, "step": 108 }, { "epoch": 0.8336520076481836, "grad_norm": 0.13871053974279857, "learning_rate": 9.958132439671392e-06, "loss": 0.5269, "mean_token_accuracy": 0.8272046074271202, "num_tokens": 62635009.0, "step": 109 }, { "epoch": 0.8413001912045889, "grad_norm": 0.13866989193780266, "learning_rate": 9.95735463412438e-06, "loss": 0.4786, "mean_token_accuracy": 0.8430299237370491, "num_tokens": 63211009.0, "step": 110 }, { "epoch": 0.8489483747609943, "grad_norm": 0.1423988117487116, "learning_rate": 9.956569700915338e-06, "loss": 0.5059, "mean_token_accuracy": 0.8349323570728302, "num_tokens": 63787009.0, "step": 111 }, { "epoch": 0.8565965583173997, "grad_norm": 0.16163953173723308, "learning_rate": 9.955777641172836e-06, "loss": 0.4767, "mean_token_accuracy": 0.8433250710368156, "num_tokens": 64363009.0, "step": 112 }, { "epoch": 0.864244741873805, "grad_norm": 0.13530156968054668, "learning_rate": 9.954978456035695e-06, "loss": 0.4691, "mean_token_accuracy": 0.8455977290868759, "num_tokens": 64939009.0, "step": 113 }, { "epoch": 0.8718929254302104, "grad_norm": 0.13247546863564885, "learning_rate": 9.95417214665298e-06, "loss": 0.5163, "mean_token_accuracy": 0.8312828913331032, "num_tokens": 65515009.0, "step": 114 }, { "epoch": 0.8795411089866156, "grad_norm": 0.16495562650776383, "learning_rate": 9.953358714183999e-06, "loss": 0.5142, "mean_token_accuracy": 0.8312735706567764, "num_tokens": 66073393.0, "step": 115 }, { "epoch": 0.887189292543021, "grad_norm": 0.136972715382717, "learning_rate": 9.9525381597983e-06, "loss": 0.5155, "mean_token_accuracy": 0.8300295248627663, "num_tokens": 66643616.0, "step": 116 }, { "epoch": 0.8948374760994264, "grad_norm": 0.24477498311580997, "learning_rate": 9.951710484675677e-06, "loss": 0.4987, "mean_token_accuracy": 0.8363108783960342, "num_tokens": 67219616.0, "step": 117 }, { "epoch": 0.9024856596558317, "grad_norm": 0.15547014864530215, "learning_rate": 9.950875690006152e-06, "loss": 0.525, "mean_token_accuracy": 0.8279963135719299, "num_tokens": 67795616.0, "step": 118 }, { "epoch": 0.9101338432122371, "grad_norm": 0.13565975154579993, "learning_rate": 9.950033776989994e-06, "loss": 0.4875, "mean_token_accuracy": 0.8386981412768364, "num_tokens": 68371616.0, "step": 119 }, { "epoch": 0.9177820267686424, "grad_norm": 0.14096319378387628, "learning_rate": 9.949184746837697e-06, "loss": 0.5275, "mean_token_accuracy": 0.8280240818858147, "num_tokens": 68947616.0, "step": 120 }, { "epoch": 0.9254302103250478, "grad_norm": 0.14179059237775263, "learning_rate": 9.948328600769996e-06, "loss": 0.5041, "mean_token_accuracy": 0.834081619977951, "num_tokens": 69523616.0, "step": 121 }, { "epoch": 0.9330783938814532, "grad_norm": 0.13988551396496146, "learning_rate": 9.947465340017853e-06, "loss": 0.5219, "mean_token_accuracy": 0.8292012140154839, "num_tokens": 70099616.0, "step": 122 }, { "epoch": 0.9407265774378585, "grad_norm": 0.13758598960469923, "learning_rate": 9.94659496582246e-06, "loss": 0.4947, "mean_token_accuracy": 0.8360296338796616, "num_tokens": 70675616.0, "step": 123 }, { "epoch": 0.9483747609942639, "grad_norm": 0.1332527555285676, "learning_rate": 9.945717479435236e-06, "loss": 0.4987, "mean_token_accuracy": 0.8360331058502197, "num_tokens": 71251616.0, "step": 124 }, { "epoch": 0.9560229445506692, "grad_norm": 0.1346620611912445, "learning_rate": 9.94483288211783e-06, "loss": 0.4766, "mean_token_accuracy": 0.8422486484050751, "num_tokens": 71827616.0, "step": 125 }, { "epoch": 0.9636711281070746, "grad_norm": 0.1412101809738773, "learning_rate": 9.943941175142109e-06, "loss": 0.5009, "mean_token_accuracy": 0.8348843902349472, "num_tokens": 72389881.0, "step": 126 }, { "epoch": 0.97131931166348, "grad_norm": 0.1468591145222982, "learning_rate": 9.943042359790168e-06, "loss": 0.5318, "mean_token_accuracy": 0.8273720592260361, "num_tokens": 72963213.0, "step": 127 }, { "epoch": 0.9789674952198852, "grad_norm": 0.1920244752165593, "learning_rate": 9.942136437354316e-06, "loss": 0.5495, "mean_token_accuracy": 0.8214943110942841, "num_tokens": 73539213.0, "step": 128 }, { "epoch": 0.9866156787762906, "grad_norm": 0.15090295706963755, "learning_rate": 9.941223409137088e-06, "loss": 0.5079, "mean_token_accuracy": 0.8330902680754662, "num_tokens": 74115213.0, "step": 129 }, { "epoch": 0.994263862332696, "grad_norm": 0.13592806085375692, "learning_rate": 9.94030327645123e-06, "loss": 0.5012, "mean_token_accuracy": 0.8337656334042549, "num_tokens": 74691213.0, "step": 130 }, { "epoch": 1.0, "grad_norm": 0.15427033521640424, "learning_rate": 9.939376040619707e-06, "loss": 0.4586, "mean_token_accuracy": 0.8492923180262247, "num_tokens": 75123213.0, "step": 131 }, { "epoch": 1.0076481835564053, "grad_norm": 0.1365403277523185, "learning_rate": 9.938441702975689e-06, "loss": 0.4728, "mean_token_accuracy": 0.844464011490345, "num_tokens": 75699213.0, "step": 132 }, { "epoch": 1.0152963671128108, "grad_norm": 0.13190000895931886, "learning_rate": 9.937500264862567e-06, "loss": 0.4705, "mean_token_accuracy": 0.8449570834636688, "num_tokens": 76275213.0, "step": 133 }, { "epoch": 1.022944550669216, "grad_norm": 0.1389937992497058, "learning_rate": 9.936551727633934e-06, "loss": 0.4889, "mean_token_accuracy": 0.8385332003235817, "num_tokens": 76851213.0, "step": 134 }, { "epoch": 1.0305927342256214, "grad_norm": 0.12744266634531476, "learning_rate": 9.935596092653596e-06, "loss": 0.4817, "mean_token_accuracy": 0.8396877720952034, "num_tokens": 77427213.0, "step": 135 }, { "epoch": 1.0382409177820269, "grad_norm": 0.13524852499452464, "learning_rate": 9.934633361295558e-06, "loss": 0.4857, "mean_token_accuracy": 0.8397051244974136, "num_tokens": 78003213.0, "step": 136 }, { "epoch": 1.0458891013384322, "grad_norm": 0.1399382135972632, "learning_rate": 9.933663534944029e-06, "loss": 0.4856, "mean_token_accuracy": 0.8398839607834816, "num_tokens": 78579213.0, "step": 137 }, { "epoch": 1.0535372848948374, "grad_norm": 0.13322210319690403, "learning_rate": 9.932686614993425e-06, "loss": 0.4648, "mean_token_accuracy": 0.845810703933239, "num_tokens": 79141860.0, "step": 138 }, { "epoch": 1.0611854684512427, "grad_norm": 0.13213772381798283, "learning_rate": 9.931702602848354e-06, "loss": 0.4571, "mean_token_accuracy": 0.8474953845143318, "num_tokens": 79717860.0, "step": 139 }, { "epoch": 1.0688336520076482, "grad_norm": 0.1331479705432894, "learning_rate": 9.930711499923626e-06, "loss": 0.4984, "mean_token_accuracy": 0.835312582552433, "num_tokens": 80293860.0, "step": 140 }, { "epoch": 1.0764818355640535, "grad_norm": 0.14215781774983868, "learning_rate": 9.929713307644245e-06, "loss": 0.4788, "mean_token_accuracy": 0.8420683667063713, "num_tokens": 80846531.0, "step": 141 }, { "epoch": 1.0841300191204588, "grad_norm": 0.12936783165938912, "learning_rate": 9.928708027445403e-06, "loss": 0.5017, "mean_token_accuracy": 0.8367559015750885, "num_tokens": 81416754.0, "step": 142 }, { "epoch": 1.0917782026768643, "grad_norm": 0.13361158262185466, "learning_rate": 9.927695660772492e-06, "loss": 0.488, "mean_token_accuracy": 0.8376425430178642, "num_tokens": 81992754.0, "step": 143 }, { "epoch": 1.0994263862332696, "grad_norm": 0.16213706049923526, "learning_rate": 9.926676209081085e-06, "loss": 0.5066, "mean_token_accuracy": 0.8328819274902344, "num_tokens": 82568754.0, "step": 144 }, { "epoch": 1.107074569789675, "grad_norm": 0.1359440567719058, "learning_rate": 9.925649673836949e-06, "loss": 0.5083, "mean_token_accuracy": 0.8328124806284904, "num_tokens": 83144754.0, "step": 145 }, { "epoch": 1.1147227533460804, "grad_norm": 0.1295608761839531, "learning_rate": 9.924616056516027e-06, "loss": 0.4879, "mean_token_accuracy": 0.8381772711873055, "num_tokens": 83720754.0, "step": 146 }, { "epoch": 1.1223709369024857, "grad_norm": 0.13635991726116314, "learning_rate": 9.923575358604454e-06, "loss": 0.5083, "mean_token_accuracy": 0.8336632177233696, "num_tokens": 84296754.0, "step": 147 }, { "epoch": 1.130019120458891, "grad_norm": 0.1371786254385925, "learning_rate": 9.922527581598535e-06, "loss": 0.5097, "mean_token_accuracy": 0.8322065472602844, "num_tokens": 84872754.0, "step": 148 }, { "epoch": 1.1376673040152965, "grad_norm": 0.1280042137264246, "learning_rate": 9.921472727004765e-06, "loss": 0.4447, "mean_token_accuracy": 0.8517681285738945, "num_tokens": 85448754.0, "step": 149 }, { "epoch": 1.1453154875717018, "grad_norm": 0.14709174504186834, "learning_rate": 9.920410796339806e-06, "loss": 0.4984, "mean_token_accuracy": 0.8356476724147797, "num_tokens": 86024754.0, "step": 150 }, { "epoch": 1.152963671128107, "grad_norm": 0.13247230064106771, "learning_rate": 9.919341791130496e-06, "loss": 0.4493, "mean_token_accuracy": 0.8508201763033867, "num_tokens": 86600754.0, "step": 151 }, { "epoch": 1.1606118546845123, "grad_norm": 0.1396533122936014, "learning_rate": 9.91826571291385e-06, "loss": 0.5009, "mean_token_accuracy": 0.8353525176644325, "num_tokens": 87176754.0, "step": 152 }, { "epoch": 1.1682600382409178, "grad_norm": 0.12824246274478124, "learning_rate": 9.917182563237045e-06, "loss": 0.4778, "mean_token_accuracy": 0.8412972092628479, "num_tokens": 87752754.0, "step": 153 }, { "epoch": 1.1759082217973231, "grad_norm": 0.13963117320032115, "learning_rate": 9.91609234365743e-06, "loss": 0.494, "mean_token_accuracy": 0.8358664214611053, "num_tokens": 88328754.0, "step": 154 }, { "epoch": 1.1835564053537284, "grad_norm": 0.12893289909169098, "learning_rate": 9.914995055742515e-06, "loss": 0.4764, "mean_token_accuracy": 0.841441310942173, "num_tokens": 88904754.0, "step": 155 }, { "epoch": 1.191204588910134, "grad_norm": 0.13091574452160962, "learning_rate": 9.91389070106998e-06, "loss": 0.4487, "mean_token_accuracy": 0.8508792147040367, "num_tokens": 89480754.0, "step": 156 }, { "epoch": 1.1988527724665392, "grad_norm": 0.13244029890958137, "learning_rate": 9.912779281227656e-06, "loss": 0.473, "mean_token_accuracy": 0.8444153964519501, "num_tokens": 90056754.0, "step": 157 }, { "epoch": 1.2065009560229445, "grad_norm": 0.13918199257945654, "learning_rate": 9.911660797813542e-06, "loss": 0.5175, "mean_token_accuracy": 0.8303498476743698, "num_tokens": 90607633.0, "step": 158 }, { "epoch": 1.21414913957935, "grad_norm": 0.13614928489897143, "learning_rate": 9.91053525243579e-06, "loss": 0.4874, "mean_token_accuracy": 0.8389325365424156, "num_tokens": 91183633.0, "step": 159 }, { "epoch": 1.2217973231357553, "grad_norm": 0.13814896905701568, "learning_rate": 9.909402646712697e-06, "loss": 0.4886, "mean_token_accuracy": 0.8386929333209991, "num_tokens": 91759633.0, "step": 160 }, { "epoch": 1.2294455066921606, "grad_norm": 0.13390235159092828, "learning_rate": 9.908262982272724e-06, "loss": 0.4981, "mean_token_accuracy": 0.8359844833612442, "num_tokens": 92335633.0, "step": 161 }, { "epoch": 1.237093690248566, "grad_norm": 0.1424290492085377, "learning_rate": 9.907116260754472e-06, "loss": 0.4998, "mean_token_accuracy": 0.8363195657730103, "num_tokens": 92911633.0, "step": 162 }, { "epoch": 1.2447418738049714, "grad_norm": 0.13101785380987388, "learning_rate": 9.905962483806696e-06, "loss": 0.4826, "mean_token_accuracy": 0.8393439948558807, "num_tokens": 93487633.0, "step": 163 }, { "epoch": 1.2523900573613767, "grad_norm": 0.13349754239199546, "learning_rate": 9.904801653088287e-06, "loss": 0.4868, "mean_token_accuracy": 0.8397346436977386, "num_tokens": 94063633.0, "step": 164 }, { "epoch": 1.260038240917782, "grad_norm": 0.13506310588052065, "learning_rate": 9.903633770268286e-06, "loss": 0.4935, "mean_token_accuracy": 0.8379099145531654, "num_tokens": 94639633.0, "step": 165 }, { "epoch": 1.2676864244741874, "grad_norm": 0.1460942025739368, "learning_rate": 9.902458837025865e-06, "loss": 0.533, "mean_token_accuracy": 0.825570859014988, "num_tokens": 95215633.0, "step": 166 }, { "epoch": 1.2753346080305927, "grad_norm": 0.14230237493097705, "learning_rate": 9.901276855050342e-06, "loss": 0.509, "mean_token_accuracy": 0.8315832614898682, "num_tokens": 95791633.0, "step": 167 }, { "epoch": 1.282982791586998, "grad_norm": 0.13383575612568485, "learning_rate": 9.900087826041163e-06, "loss": 0.4828, "mean_token_accuracy": 0.8402016833424568, "num_tokens": 96367633.0, "step": 168 }, { "epoch": 1.2906309751434035, "grad_norm": 0.13987895499055866, "learning_rate": 9.89889175170791e-06, "loss": 0.5255, "mean_token_accuracy": 0.8278417810797691, "num_tokens": 96943633.0, "step": 169 }, { "epoch": 1.2982791586998088, "grad_norm": 0.1313503038847592, "learning_rate": 9.89768863377029e-06, "loss": 0.4956, "mean_token_accuracy": 0.8354028537869453, "num_tokens": 97519633.0, "step": 170 }, { "epoch": 1.305927342256214, "grad_norm": 0.13457635948414468, "learning_rate": 9.896478473958147e-06, "loss": 0.4697, "mean_token_accuracy": 0.8448807075619698, "num_tokens": 98095633.0, "step": 171 }, { "epoch": 1.3135755258126194, "grad_norm": 0.13241587496517163, "learning_rate": 9.895261274011436e-06, "loss": 0.4954, "mean_token_accuracy": 0.8361199200153351, "num_tokens": 98671633.0, "step": 172 }, { "epoch": 1.321223709369025, "grad_norm": 0.133287438364359, "learning_rate": 9.894037035680246e-06, "loss": 0.4827, "mean_token_accuracy": 0.842342384159565, "num_tokens": 99247633.0, "step": 173 }, { "epoch": 1.3288718929254302, "grad_norm": 0.13647661096592764, "learning_rate": 9.892805760724782e-06, "loss": 0.4902, "mean_token_accuracy": 0.8376477435231209, "num_tokens": 99823633.0, "step": 174 }, { "epoch": 1.3365200764818357, "grad_norm": 0.13426483562570884, "learning_rate": 9.89156745091536e-06, "loss": 0.4851, "mean_token_accuracy": 0.838831827044487, "num_tokens": 100399633.0, "step": 175 }, { "epoch": 1.344168260038241, "grad_norm": 0.21654547351312775, "learning_rate": 9.890322108032423e-06, "loss": 0.4898, "mean_token_accuracy": 0.8377328217029572, "num_tokens": 100975633.0, "step": 176 }, { "epoch": 1.3518164435946463, "grad_norm": 0.14468682987371165, "learning_rate": 9.889069733866515e-06, "loss": 0.4822, "mean_token_accuracy": 0.8400419503450394, "num_tokens": 101551633.0, "step": 177 }, { "epoch": 1.3594646271510515, "grad_norm": 0.1410032902289748, "learning_rate": 9.887810330218294e-06, "loss": 0.5063, "mean_token_accuracy": 0.8329392224550247, "num_tokens": 102127633.0, "step": 178 }, { "epoch": 1.367112810707457, "grad_norm": 0.1300256891404132, "learning_rate": 9.886543898898528e-06, "loss": 0.4759, "mean_token_accuracy": 0.8423354402184486, "num_tokens": 102703633.0, "step": 179 }, { "epoch": 1.3747609942638623, "grad_norm": 0.14166005366415435, "learning_rate": 9.885270441728085e-06, "loss": 0.5011, "mean_token_accuracy": 0.8352118879556656, "num_tokens": 103279633.0, "step": 180 }, { "epoch": 1.3824091778202676, "grad_norm": 0.1386111808825074, "learning_rate": 9.883989960537934e-06, "loss": 0.5052, "mean_token_accuracy": 0.8329878374934196, "num_tokens": 103855633.0, "step": 181 }, { "epoch": 1.3900573613766731, "grad_norm": 0.13817887508453544, "learning_rate": 9.882702457169147e-06, "loss": 0.5012, "mean_token_accuracy": 0.8339826613664627, "num_tokens": 104431633.0, "step": 182 }, { "epoch": 1.3977055449330784, "grad_norm": 0.14012765345513187, "learning_rate": 9.881407933472889e-06, "loss": 0.4994, "mean_token_accuracy": 0.834894172847271, "num_tokens": 105007633.0, "step": 183 }, { "epoch": 1.4053537284894837, "grad_norm": 0.12930750196514224, "learning_rate": 9.88010639131042e-06, "loss": 0.4611, "mean_token_accuracy": 0.845082089304924, "num_tokens": 105583633.0, "step": 184 }, { "epoch": 1.413001912045889, "grad_norm": 0.12331109134382856, "learning_rate": 9.878797832553093e-06, "loss": 0.4714, "mean_token_accuracy": 0.8437591195106506, "num_tokens": 106159633.0, "step": 185 }, { "epoch": 1.4206500956022945, "grad_norm": 0.1293813455298218, "learning_rate": 9.87748225908235e-06, "loss": 0.4691, "mean_token_accuracy": 0.8440056517720222, "num_tokens": 106735633.0, "step": 186 }, { "epoch": 1.4282982791586998, "grad_norm": 0.14507012516088896, "learning_rate": 9.876159672789711e-06, "loss": 0.5212, "mean_token_accuracy": 0.8286769017577171, "num_tokens": 107311633.0, "step": 187 }, { "epoch": 1.4359464627151053, "grad_norm": 0.15111476207924585, "learning_rate": 9.874830075576789e-06, "loss": 0.5101, "mean_token_accuracy": 0.8307446986436844, "num_tokens": 107887633.0, "step": 188 }, { "epoch": 1.4435946462715106, "grad_norm": 0.14709035714161833, "learning_rate": 9.873493469355271e-06, "loss": 0.4971, "mean_token_accuracy": 0.8350886180996895, "num_tokens": 108463633.0, "step": 189 }, { "epoch": 1.4512428298279159, "grad_norm": 0.1344401231018188, "learning_rate": 9.872149856046922e-06, "loss": 0.4757, "mean_token_accuracy": 0.8424031659960747, "num_tokens": 109039633.0, "step": 190 }, { "epoch": 1.4588910133843211, "grad_norm": 0.13403966584878735, "learning_rate": 9.870799237583586e-06, "loss": 0.4622, "mean_token_accuracy": 0.8463167697191238, "num_tokens": 109613270.0, "step": 191 }, { "epoch": 1.4665391969407267, "grad_norm": 0.13413337229810696, "learning_rate": 9.869441615907176e-06, "loss": 0.4905, "mean_token_accuracy": 0.8371755108237267, "num_tokens": 110189270.0, "step": 192 }, { "epoch": 1.474187380497132, "grad_norm": 0.13213885141206153, "learning_rate": 9.868076992969672e-06, "loss": 0.4629, "mean_token_accuracy": 0.8472193330526352, "num_tokens": 110765270.0, "step": 193 }, { "epoch": 1.4818355640535372, "grad_norm": 0.14284631334355893, "learning_rate": 9.866705370733126e-06, "loss": 0.4759, "mean_token_accuracy": 0.8413978591561317, "num_tokens": 111330965.0, "step": 194 }, { "epoch": 1.4894837476099427, "grad_norm": 0.14434856966382234, "learning_rate": 9.865326751169648e-06, "loss": 0.4682, "mean_token_accuracy": 0.8441445603966713, "num_tokens": 111906965.0, "step": 195 }, { "epoch": 1.497131931166348, "grad_norm": 0.13331488674498373, "learning_rate": 9.863941136261409e-06, "loss": 0.4743, "mean_token_accuracy": 0.842158354818821, "num_tokens": 112482965.0, "step": 196 }, { "epoch": 1.5047801147227533, "grad_norm": 0.1333685996060613, "learning_rate": 9.862548528000644e-06, "loss": 0.4833, "mean_token_accuracy": 0.8400801345705986, "num_tokens": 113058965.0, "step": 197 }, { "epoch": 1.5124282982791586, "grad_norm": 0.13278247248038624, "learning_rate": 9.861148928389634e-06, "loss": 0.4886, "mean_token_accuracy": 0.8382901325821877, "num_tokens": 113634965.0, "step": 198 }, { "epoch": 1.520076481835564, "grad_norm": 0.13693399891139466, "learning_rate": 9.859742339440723e-06, "loss": 0.4822, "mean_token_accuracy": 0.8395071998238564, "num_tokens": 114210965.0, "step": 199 }, { "epoch": 1.5277246653919694, "grad_norm": 0.14014126230020535, "learning_rate": 9.858328763176294e-06, "loss": 0.509, "mean_token_accuracy": 0.8321307003498077, "num_tokens": 114785906.0, "step": 200 }, { "epoch": 1.535372848948375, "grad_norm": 0.13812738362519864, "learning_rate": 9.85690820162878e-06, "loss": 0.495, "mean_token_accuracy": 0.8362865820527077, "num_tokens": 115361906.0, "step": 201 }, { "epoch": 1.5430210325047802, "grad_norm": 0.12908599718928895, "learning_rate": 9.855480656840662e-06, "loss": 0.471, "mean_token_accuracy": 0.843786895275116, "num_tokens": 115937906.0, "step": 202 }, { "epoch": 1.5506692160611855, "grad_norm": 0.13168108610671495, "learning_rate": 9.854046130864454e-06, "loss": 0.4765, "mean_token_accuracy": 0.8418128415942192, "num_tokens": 116513906.0, "step": 203 }, { "epoch": 1.5583173996175907, "grad_norm": 0.1374975523241788, "learning_rate": 9.852604625762712e-06, "loss": 0.4502, "mean_token_accuracy": 0.8503288328647614, "num_tokens": 117089906.0, "step": 204 }, { "epoch": 1.565965583173996, "grad_norm": 0.13716664205655626, "learning_rate": 9.851156143608025e-06, "loss": 0.4848, "mean_token_accuracy": 0.8381772711873055, "num_tokens": 117665906.0, "step": 205 }, { "epoch": 1.5736137667304015, "grad_norm": 0.19260127106490352, "learning_rate": 9.849700686483016e-06, "loss": 0.4606, "mean_token_accuracy": 0.8471116870641708, "num_tokens": 118241906.0, "step": 206 }, { "epoch": 1.581261950286807, "grad_norm": 0.1285761532416256, "learning_rate": 9.848238256480329e-06, "loss": 0.4689, "mean_token_accuracy": 0.8435264676809311, "num_tokens": 118817906.0, "step": 207 }, { "epoch": 1.5889101338432123, "grad_norm": 0.13245065174094972, "learning_rate": 9.846768855702646e-06, "loss": 0.4931, "mean_token_accuracy": 0.8375678807497025, "num_tokens": 119393906.0, "step": 208 }, { "epoch": 1.5965583173996176, "grad_norm": 0.13171290320659565, "learning_rate": 9.845292486262664e-06, "loss": 0.4801, "mean_token_accuracy": 0.8411461487412453, "num_tokens": 119969906.0, "step": 209 }, { "epoch": 1.604206500956023, "grad_norm": 0.1382945206321001, "learning_rate": 9.843809150283096e-06, "loss": 0.5201, "mean_token_accuracy": 0.8290223926305771, "num_tokens": 120545906.0, "step": 210 }, { "epoch": 1.6118546845124282, "grad_norm": 0.1308311967900458, "learning_rate": 9.842318849896679e-06, "loss": 0.4811, "mean_token_accuracy": 0.8402815386652946, "num_tokens": 121121906.0, "step": 211 }, { "epoch": 1.6195028680688337, "grad_norm": 0.13179762296615236, "learning_rate": 9.840821587246158e-06, "loss": 0.4841, "mean_token_accuracy": 0.8397172838449478, "num_tokens": 121697906.0, "step": 212 }, { "epoch": 1.627151051625239, "grad_norm": 0.13092322919750846, "learning_rate": 9.839317364484295e-06, "loss": 0.4857, "mean_token_accuracy": 0.8396096378564835, "num_tokens": 122273906.0, "step": 213 }, { "epoch": 1.6347992351816445, "grad_norm": 0.1406807481437863, "learning_rate": 9.837806183773851e-06, "loss": 0.5009, "mean_token_accuracy": 0.8343264311552048, "num_tokens": 122849906.0, "step": 214 }, { "epoch": 1.6424474187380498, "grad_norm": 0.15127230928084542, "learning_rate": 9.836288047287593e-06, "loss": 0.5012, "mean_token_accuracy": 0.8336441144347191, "num_tokens": 123425906.0, "step": 215 }, { "epoch": 1.650095602294455, "grad_norm": 0.13764770828391903, "learning_rate": 9.834762957208293e-06, "loss": 0.4933, "mean_token_accuracy": 0.8366129845380783, "num_tokens": 124001906.0, "step": 216 }, { "epoch": 1.6577437858508604, "grad_norm": 0.1309674988371225, "learning_rate": 9.83323091572872e-06, "loss": 0.5042, "mean_token_accuracy": 0.8334027752280235, "num_tokens": 124577906.0, "step": 217 }, { "epoch": 1.6653919694072656, "grad_norm": 0.1416672912642957, "learning_rate": 9.831691925051634e-06, "loss": 0.469, "mean_token_accuracy": 0.8436514809727669, "num_tokens": 125153906.0, "step": 218 }, { "epoch": 1.6730401529636711, "grad_norm": 0.13213277792263045, "learning_rate": 9.83014598738979e-06, "loss": 0.4744, "mean_token_accuracy": 0.8429621979594231, "num_tokens": 125729906.0, "step": 219 }, { "epoch": 1.6806883365200764, "grad_norm": 0.14828248446823036, "learning_rate": 9.82859310496593e-06, "loss": 0.476, "mean_token_accuracy": 0.841729499399662, "num_tokens": 126305906.0, "step": 220 }, { "epoch": 1.688336520076482, "grad_norm": 0.13273638622183012, "learning_rate": 9.827033280012783e-06, "loss": 0.4601, "mean_token_accuracy": 0.8464536741375923, "num_tokens": 126881906.0, "step": 221 }, { "epoch": 1.6959847036328872, "grad_norm": 0.1371281964569988, "learning_rate": 9.825466514773054e-06, "loss": 0.5065, "mean_token_accuracy": 0.8331058844923973, "num_tokens": 127457906.0, "step": 222 }, { "epoch": 1.7036328871892925, "grad_norm": 0.13113344781507827, "learning_rate": 9.823892811499435e-06, "loss": 0.4751, "mean_token_accuracy": 0.8415020853281021, "num_tokens": 128033906.0, "step": 223 }, { "epoch": 1.7112810707456978, "grad_norm": 0.1377436100503237, "learning_rate": 9.822312172454587e-06, "loss": 0.52, "mean_token_accuracy": 0.8276021778583527, "num_tokens": 128609906.0, "step": 224 }, { "epoch": 1.7189292543021033, "grad_norm": 0.12699707417451003, "learning_rate": 9.820724599911147e-06, "loss": 0.446, "mean_token_accuracy": 0.8517074510455132, "num_tokens": 129169503.0, "step": 225 }, { "epoch": 1.7265774378585086, "grad_norm": 0.12634559250621336, "learning_rate": 9.819130096151718e-06, "loss": 0.4544, "mean_token_accuracy": 0.8480943590402603, "num_tokens": 129745503.0, "step": 226 }, { "epoch": 1.734225621414914, "grad_norm": 0.12958989836341028, "learning_rate": 9.817528663468873e-06, "loss": 0.4783, "mean_token_accuracy": 0.8412247076630592, "num_tokens": 130319403.0, "step": 227 }, { "epoch": 1.7418738049713194, "grad_norm": 0.1372257707417811, "learning_rate": 9.815920304165144e-06, "loss": 0.4881, "mean_token_accuracy": 0.8391176462173462, "num_tokens": 130881510.0, "step": 228 }, { "epoch": 1.7495219885277247, "grad_norm": 0.13053898036659867, "learning_rate": 9.81430502055302e-06, "loss": 0.4723, "mean_token_accuracy": 0.8423580154776573, "num_tokens": 131457510.0, "step": 229 }, { "epoch": 1.75717017208413, "grad_norm": 0.13772367894783863, "learning_rate": 9.81268281495495e-06, "loss": 0.4873, "mean_token_accuracy": 0.8386755734682083, "num_tokens": 132033510.0, "step": 230 }, { "epoch": 1.7648183556405352, "grad_norm": 0.1357109696346542, "learning_rate": 9.811053689703333e-06, "loss": 0.5173, "mean_token_accuracy": 0.8325728848576546, "num_tokens": 132609510.0, "step": 231 }, { "epoch": 1.7724665391969407, "grad_norm": 0.1279722497480012, "learning_rate": 9.809417647140522e-06, "loss": 0.4733, "mean_token_accuracy": 0.8432070016860962, "num_tokens": 133185510.0, "step": 232 }, { "epoch": 1.780114722753346, "grad_norm": 0.14056689318218055, "learning_rate": 9.807774689618806e-06, "loss": 0.5136, "mean_token_accuracy": 0.8318975046277046, "num_tokens": 133761510.0, "step": 233 }, { "epoch": 1.7877629063097515, "grad_norm": 0.13180288049266284, "learning_rate": 9.806124819500427e-06, "loss": 0.4863, "mean_token_accuracy": 0.8381616622209549, "num_tokens": 134337510.0, "step": 234 }, { "epoch": 1.7954110898661568, "grad_norm": 0.14261327766170212, "learning_rate": 9.804468039157557e-06, "loss": 0.4865, "mean_token_accuracy": 0.8391356617212296, "num_tokens": 134913510.0, "step": 235 }, { "epoch": 1.8030592734225621, "grad_norm": 0.1415890220842668, "learning_rate": 9.802804350972308e-06, "loss": 0.5261, "mean_token_accuracy": 0.8269285559654236, "num_tokens": 135489510.0, "step": 236 }, { "epoch": 1.8107074569789674, "grad_norm": 0.13577919369547242, "learning_rate": 9.801133757336726e-06, "loss": 0.475, "mean_token_accuracy": 0.8420576602220535, "num_tokens": 136065510.0, "step": 237 }, { "epoch": 1.8183556405353727, "grad_norm": 0.13887863894469682, "learning_rate": 9.799456260652778e-06, "loss": 0.4676, "mean_token_accuracy": 0.8456170037388802, "num_tokens": 136625705.0, "step": 238 }, { "epoch": 1.8260038240917782, "grad_norm": 0.1329581032736448, "learning_rate": 9.797771863332365e-06, "loss": 0.468, "mean_token_accuracy": 0.8451133370399475, "num_tokens": 137201705.0, "step": 239 }, { "epoch": 1.8336520076481837, "grad_norm": 0.14266764923168374, "learning_rate": 9.796080567797304e-06, "loss": 0.4875, "mean_token_accuracy": 0.8391078859567642, "num_tokens": 137777705.0, "step": 240 }, { "epoch": 1.841300191204589, "grad_norm": 0.13123578796297256, "learning_rate": 9.794382376479334e-06, "loss": 0.4801, "mean_token_accuracy": 0.8414048627018929, "num_tokens": 138353705.0, "step": 241 }, { "epoch": 1.8489483747609943, "grad_norm": 0.13763073917570376, "learning_rate": 9.792677291820107e-06, "loss": 0.4785, "mean_token_accuracy": 0.8403075709939003, "num_tokens": 138929705.0, "step": 242 }, { "epoch": 1.8565965583173996, "grad_norm": 0.1339684449373738, "learning_rate": 9.790965316271183e-06, "loss": 0.4728, "mean_token_accuracy": 0.8434257805347443, "num_tokens": 139505705.0, "step": 243 }, { "epoch": 1.8642447418738048, "grad_norm": 0.13563074491467572, "learning_rate": 9.789246452294034e-06, "loss": 0.4885, "mean_token_accuracy": 0.8397867307066917, "num_tokens": 140081705.0, "step": 244 }, { "epoch": 1.8718929254302104, "grad_norm": 0.14368095826727606, "learning_rate": 9.787520702360035e-06, "loss": 0.4808, "mean_token_accuracy": 0.840397872030735, "num_tokens": 140657705.0, "step": 245 }, { "epoch": 1.8795411089866156, "grad_norm": 0.14397182347295984, "learning_rate": 9.785788068950463e-06, "loss": 0.5054, "mean_token_accuracy": 0.8325815722346306, "num_tokens": 141233705.0, "step": 246 }, { "epoch": 1.8871892925430211, "grad_norm": 0.1406587307230205, "learning_rate": 9.784048554556488e-06, "loss": 0.5024, "mean_token_accuracy": 0.8344358131289482, "num_tokens": 141809705.0, "step": 247 }, { "epoch": 1.8948374760994264, "grad_norm": 0.1346925448003243, "learning_rate": 9.782302161679177e-06, "loss": 0.4846, "mean_token_accuracy": 0.8397086039185524, "num_tokens": 142385705.0, "step": 248 }, { "epoch": 1.9024856596558317, "grad_norm": 0.13981403945386878, "learning_rate": 9.780548892829486e-06, "loss": 0.4886, "mean_token_accuracy": 0.8380331769585609, "num_tokens": 142961705.0, "step": 249 }, { "epoch": 1.910133843212237, "grad_norm": 0.13251833042206107, "learning_rate": 9.778788750528257e-06, "loss": 0.4667, "mean_token_accuracy": 0.8444865867495537, "num_tokens": 143537705.0, "step": 250 }, { "epoch": 1.9177820267686423, "grad_norm": 0.14376049074556502, "learning_rate": 9.777021737306214e-06, "loss": 0.4857, "mean_token_accuracy": 0.8387880772352219, "num_tokens": 144099211.0, "step": 251 }, { "epoch": 1.9254302103250478, "grad_norm": 0.14486384551210302, "learning_rate": 9.775247855703962e-06, "loss": 0.4826, "mean_token_accuracy": 0.8404586315155029, "num_tokens": 144675211.0, "step": 252 }, { "epoch": 1.9330783938814533, "grad_norm": 0.1388194492997388, "learning_rate": 9.773467108271978e-06, "loss": 0.51, "mean_token_accuracy": 0.8311353400349617, "num_tokens": 145251211.0, "step": 253 }, { "epoch": 1.9407265774378586, "grad_norm": 0.13026081138638712, "learning_rate": 9.771679497570614e-06, "loss": 0.4799, "mean_token_accuracy": 0.8409708067774773, "num_tokens": 145827211.0, "step": 254 }, { "epoch": 1.9483747609942639, "grad_norm": 0.1430745806772645, "learning_rate": 9.769885026170088e-06, "loss": 0.4629, "mean_token_accuracy": 0.8461342081427574, "num_tokens": 146403211.0, "step": 255 }, { "epoch": 1.9560229445506692, "grad_norm": 0.1423170947817084, "learning_rate": 9.768083696650481e-06, "loss": 0.4964, "mean_token_accuracy": 0.8357796147465706, "num_tokens": 146979211.0, "step": 256 }, { "epoch": 1.9636711281070744, "grad_norm": 0.1412781732428823, "learning_rate": 9.766275511601742e-06, "loss": 0.4685, "mean_token_accuracy": 0.84524355083704, "num_tokens": 147555211.0, "step": 257 }, { "epoch": 1.97131931166348, "grad_norm": 0.1347390798520367, "learning_rate": 9.764460473623665e-06, "loss": 0.4964, "mean_token_accuracy": 0.8352379277348518, "num_tokens": 148131211.0, "step": 258 }, { "epoch": 1.9789674952198852, "grad_norm": 0.140319211357454, "learning_rate": 9.762638585325907e-06, "loss": 0.4651, "mean_token_accuracy": 0.8467453494668007, "num_tokens": 148707211.0, "step": 259 }, { "epoch": 1.9866156787762907, "grad_norm": 0.14587479148422647, "learning_rate": 9.760809849327967e-06, "loss": 0.4859, "mean_token_accuracy": 0.8388071581721306, "num_tokens": 149261156.0, "step": 260 }, { "epoch": 1.994263862332696, "grad_norm": 0.15571274133757082, "learning_rate": 9.7589742682592e-06, "loss": 0.4865, "mean_token_accuracy": 0.8390511944890022, "num_tokens": 149814426.0, "step": 261 }, { "epoch": 2.0, "grad_norm": 0.1656455865676849, "learning_rate": 9.75713184475879e-06, "loss": 0.5189, "mean_token_accuracy": 0.8292289972305298, "num_tokens": 150246426.0, "step": 262 }, { "epoch": 2.0076481835564053, "grad_norm": 0.13050271447099013, "learning_rate": 9.755282581475769e-06, "loss": 0.4564, "mean_token_accuracy": 0.8467505499720573, "num_tokens": 150822426.0, "step": 263 }, { "epoch": 2.0152963671128106, "grad_norm": 0.1336370185440778, "learning_rate": 9.753426481068998e-06, "loss": 0.4913, "mean_token_accuracy": 0.8365452736616135, "num_tokens": 151398426.0, "step": 264 }, { "epoch": 2.022944550669216, "grad_norm": 0.1382072756245929, "learning_rate": 9.751563546207167e-06, "loss": 0.4878, "mean_token_accuracy": 0.8377542123198509, "num_tokens": 151952371.0, "step": 265 }, { "epoch": 2.0305927342256216, "grad_norm": 0.14387760662210322, "learning_rate": 9.749693779568799e-06, "loss": 0.5203, "mean_token_accuracy": 0.827904649078846, "num_tokens": 152520866.0, "step": 266 }, { "epoch": 2.038240917782027, "grad_norm": 0.14163598725064547, "learning_rate": 9.747817183842235e-06, "loss": 0.486, "mean_token_accuracy": 0.8394655287265778, "num_tokens": 153096866.0, "step": 267 }, { "epoch": 2.045889101338432, "grad_norm": 0.1357429538351474, "learning_rate": 9.745933761725635e-06, "loss": 0.4392, "mean_token_accuracy": 0.8543984368443489, "num_tokens": 153672866.0, "step": 268 }, { "epoch": 2.0535372848948374, "grad_norm": 0.14159049917519523, "learning_rate": 9.744043515926975e-06, "loss": 0.4704, "mean_token_accuracy": 0.8423840552568436, "num_tokens": 154248866.0, "step": 269 }, { "epoch": 2.0611854684512427, "grad_norm": 0.140767759584201, "learning_rate": 9.742146449164045e-06, "loss": 0.4657, "mean_token_accuracy": 0.8441150411963463, "num_tokens": 154824866.0, "step": 270 }, { "epoch": 2.068833652007648, "grad_norm": 0.1376003238674713, "learning_rate": 9.740242564164433e-06, "loss": 0.4829, "mean_token_accuracy": 0.8394047617912292, "num_tokens": 155400866.0, "step": 271 }, { "epoch": 2.0764818355640537, "grad_norm": 0.14234028861993708, "learning_rate": 9.738331863665541e-06, "loss": 0.4541, "mean_token_accuracy": 0.8469480872154236, "num_tokens": 155954136.0, "step": 272 }, { "epoch": 2.084130019120459, "grad_norm": 0.12785940871582666, "learning_rate": 9.736414350414564e-06, "loss": 0.4328, "mean_token_accuracy": 0.8540546894073486, "num_tokens": 156530136.0, "step": 273 }, { "epoch": 2.0917782026768643, "grad_norm": 0.13077439345298142, "learning_rate": 9.734490027168494e-06, "loss": 0.4536, "mean_token_accuracy": 0.8471047431230545, "num_tokens": 157106136.0, "step": 274 }, { "epoch": 2.0994263862332696, "grad_norm": 0.13230573836928355, "learning_rate": 9.732558896694114e-06, "loss": 0.4722, "mean_token_accuracy": 0.842342384159565, "num_tokens": 157682136.0, "step": 275 }, { "epoch": 2.107074569789675, "grad_norm": 0.13384258031186463, "learning_rate": 9.730620961767996e-06, "loss": 0.4693, "mean_token_accuracy": 0.8435490280389786, "num_tokens": 158258136.0, "step": 276 }, { "epoch": 2.11472275334608, "grad_norm": 0.13295124171892045, "learning_rate": 9.72867622517649e-06, "loss": 0.4848, "mean_token_accuracy": 0.8387293890118599, "num_tokens": 158834136.0, "step": 277 }, { "epoch": 2.1223709369024855, "grad_norm": 0.13698639518555758, "learning_rate": 9.726724689715734e-06, "loss": 0.4709, "mean_token_accuracy": 0.8430733233690262, "num_tokens": 159410136.0, "step": 278 }, { "epoch": 2.130019120458891, "grad_norm": 0.131887831277675, "learning_rate": 9.724766358191635e-06, "loss": 0.482, "mean_token_accuracy": 0.8394516482949257, "num_tokens": 159986136.0, "step": 279 }, { "epoch": 2.1376673040152965, "grad_norm": 0.14891285174122362, "learning_rate": 9.722801233419873e-06, "loss": 0.4966, "mean_token_accuracy": 0.8344601169228554, "num_tokens": 160562136.0, "step": 280 }, { "epoch": 2.1453154875717018, "grad_norm": 0.13274530265535694, "learning_rate": 9.720829318225897e-06, "loss": 0.4412, "mean_token_accuracy": 0.8516188189387321, "num_tokens": 161138136.0, "step": 281 }, { "epoch": 2.152963671128107, "grad_norm": 0.12558989675749826, "learning_rate": 9.718850615444915e-06, "loss": 0.4617, "mean_token_accuracy": 0.8458894118666649, "num_tokens": 161714136.0, "step": 282 }, { "epoch": 2.1606118546845123, "grad_norm": 0.13805526111149538, "learning_rate": 9.7168651279219e-06, "loss": 0.476, "mean_token_accuracy": 0.84109927713871, "num_tokens": 162290136.0, "step": 283 }, { "epoch": 2.1682600382409176, "grad_norm": 0.1365628904112165, "learning_rate": 9.714872858511574e-06, "loss": 0.4813, "mean_token_accuracy": 0.8389794006943703, "num_tokens": 162866136.0, "step": 284 }, { "epoch": 2.1759082217973233, "grad_norm": 0.14202588056377932, "learning_rate": 9.712873810078415e-06, "loss": 0.4731, "mean_token_accuracy": 0.8429031893610954, "num_tokens": 163442136.0, "step": 285 }, { "epoch": 2.1835564053537286, "grad_norm": 0.14026758376596035, "learning_rate": 9.710867985496644e-06, "loss": 0.4828, "mean_token_accuracy": 0.8403249308466911, "num_tokens": 164018136.0, "step": 286 }, { "epoch": 2.191204588910134, "grad_norm": 0.1268634171589506, "learning_rate": 9.708855387650229e-06, "loss": 0.4566, "mean_token_accuracy": 0.8471759259700775, "num_tokens": 164594136.0, "step": 287 }, { "epoch": 2.198852772466539, "grad_norm": 0.1361940615902871, "learning_rate": 9.706836019432872e-06, "loss": 0.4843, "mean_token_accuracy": 0.8384498655796051, "num_tokens": 165170136.0, "step": 288 }, { "epoch": 2.2065009560229445, "grad_norm": 0.13071252562608368, "learning_rate": 9.704809883748012e-06, "loss": 0.4619, "mean_token_accuracy": 0.8458164930343628, "num_tokens": 165746136.0, "step": 289 }, { "epoch": 2.21414913957935, "grad_norm": 0.13749244273488634, "learning_rate": 9.70277698350882e-06, "loss": 0.4763, "mean_token_accuracy": 0.8413892164826393, "num_tokens": 166322136.0, "step": 290 }, { "epoch": 2.221797323135755, "grad_norm": 0.13653291312402388, "learning_rate": 9.700737321638185e-06, "loss": 0.494, "mean_token_accuracy": 0.8379845768213272, "num_tokens": 166898136.0, "step": 291 }, { "epoch": 2.229445506692161, "grad_norm": 0.1314091235725935, "learning_rate": 9.69869090106873e-06, "loss": 0.4553, "mean_token_accuracy": 0.8476238548755646, "num_tokens": 167474136.0, "step": 292 }, { "epoch": 2.237093690248566, "grad_norm": 0.1306318774358581, "learning_rate": 9.696637724742785e-06, "loss": 0.4588, "mean_token_accuracy": 0.8466134071350098, "num_tokens": 168050136.0, "step": 293 }, { "epoch": 2.2447418738049714, "grad_norm": 0.1320429519519253, "learning_rate": 9.6945777956124e-06, "loss": 0.4893, "mean_token_accuracy": 0.8379810974001884, "num_tokens": 168626136.0, "step": 294 }, { "epoch": 2.2523900573613767, "grad_norm": 0.12581567893697732, "learning_rate": 9.69251111663933e-06, "loss": 0.4421, "mean_token_accuracy": 0.8516535460948944, "num_tokens": 169202136.0, "step": 295 }, { "epoch": 2.260038240917782, "grad_norm": 0.15120633186562035, "learning_rate": 9.690437690795038e-06, "loss": 0.4828, "mean_token_accuracy": 0.8403878286480904, "num_tokens": 169764783.0, "step": 296 }, { "epoch": 2.2676864244741872, "grad_norm": 0.14083593416082985, "learning_rate": 9.688357521060685e-06, "loss": 0.4997, "mean_token_accuracy": 0.8348125591874123, "num_tokens": 170340783.0, "step": 297 }, { "epoch": 2.275334608030593, "grad_norm": 0.13552190258745817, "learning_rate": 9.686270610427131e-06, "loss": 0.4831, "mean_token_accuracy": 0.8392866998910904, "num_tokens": 170916783.0, "step": 298 }, { "epoch": 2.2829827915869982, "grad_norm": 0.1301480903850601, "learning_rate": 9.684176961894927e-06, "loss": 0.4668, "mean_token_accuracy": 0.8437822833657265, "num_tokens": 171490115.0, "step": 299 }, { "epoch": 2.2906309751434035, "grad_norm": 0.14821266280449158, "learning_rate": 9.682076578474308e-06, "loss": 0.4986, "mean_token_accuracy": 0.8342083841562271, "num_tokens": 172066115.0, "step": 300 }, { "epoch": 2.298279158699809, "grad_norm": 0.13381251110623754, "learning_rate": 9.6799694631852e-06, "loss": 0.4603, "mean_token_accuracy": 0.8468859866261482, "num_tokens": 172642115.0, "step": 301 }, { "epoch": 2.305927342256214, "grad_norm": 0.13169362581883381, "learning_rate": 9.677855619057202e-06, "loss": 0.4604, "mean_token_accuracy": 0.8466967418789864, "num_tokens": 173218115.0, "step": 302 }, { "epoch": 2.3135755258126194, "grad_norm": 0.13195788424828656, "learning_rate": 9.675735049129588e-06, "loss": 0.4621, "mean_token_accuracy": 0.8446150496602058, "num_tokens": 173794115.0, "step": 303 }, { "epoch": 2.3212237093690247, "grad_norm": 0.14122025118479573, "learning_rate": 9.673607756451306e-06, "loss": 0.4349, "mean_token_accuracy": 0.853457435965538, "num_tokens": 174370115.0, "step": 304 }, { "epoch": 2.3288718929254304, "grad_norm": 0.12878410078634828, "learning_rate": 9.67147374408097e-06, "loss": 0.4447, "mean_token_accuracy": 0.8515077084302902, "num_tokens": 174946115.0, "step": 305 }, { "epoch": 2.3365200764818357, "grad_norm": 0.125714053577621, "learning_rate": 9.669333015086847e-06, "loss": 0.4384, "mean_token_accuracy": 0.8537577912211418, "num_tokens": 175522115.0, "step": 306 }, { "epoch": 2.344168260038241, "grad_norm": 0.1364732476611976, "learning_rate": 9.667185572546871e-06, "loss": 0.4656, "mean_token_accuracy": 0.8443147018551826, "num_tokens": 176098115.0, "step": 307 }, { "epoch": 2.3518164435946463, "grad_norm": 0.13118221069966887, "learning_rate": 9.665031419548625e-06, "loss": 0.4533, "mean_token_accuracy": 0.8475735187530518, "num_tokens": 176674115.0, "step": 308 }, { "epoch": 2.3594646271510515, "grad_norm": 0.13021585818887338, "learning_rate": 9.662870559189344e-06, "loss": 0.4772, "mean_token_accuracy": 0.8415628522634506, "num_tokens": 177250115.0, "step": 309 }, { "epoch": 2.367112810707457, "grad_norm": 0.1296491093025139, "learning_rate": 9.660702994575896e-06, "loss": 0.4504, "mean_token_accuracy": 0.8494465947151184, "num_tokens": 177815810.0, "step": 310 }, { "epoch": 2.3747609942638626, "grad_norm": 0.1269070770369225, "learning_rate": 9.658528728824799e-06, "loss": 0.4454, "mean_token_accuracy": 0.851382702589035, "num_tokens": 178391810.0, "step": 311 }, { "epoch": 2.382409177820268, "grad_norm": 0.13236942393674378, "learning_rate": 9.656347765062206e-06, "loss": 0.4959, "mean_token_accuracy": 0.8356025218963623, "num_tokens": 178967810.0, "step": 312 }, { "epoch": 2.390057361376673, "grad_norm": 0.13431040882625667, "learning_rate": 9.654160106423891e-06, "loss": 0.4647, "mean_token_accuracy": 0.8447903171181679, "num_tokens": 179541447.0, "step": 313 }, { "epoch": 2.3977055449330784, "grad_norm": 0.1304990964909841, "learning_rate": 9.651965756055262e-06, "loss": 0.4733, "mean_token_accuracy": 0.8432052731513977, "num_tokens": 180117447.0, "step": 314 }, { "epoch": 2.4053537284894837, "grad_norm": 0.13562049534152407, "learning_rate": 9.649764717111348e-06, "loss": 0.4827, "mean_token_accuracy": 0.8400332629680634, "num_tokens": 180693447.0, "step": 315 }, { "epoch": 2.413001912045889, "grad_norm": 0.1322708589540597, "learning_rate": 9.647556992756789e-06, "loss": 0.4685, "mean_token_accuracy": 0.8437608480453491, "num_tokens": 181269447.0, "step": 316 }, { "epoch": 2.4206500956022943, "grad_norm": 0.13106792460307698, "learning_rate": 9.645342586165845e-06, "loss": 0.4676, "mean_token_accuracy": 0.8447365760803223, "num_tokens": 181845447.0, "step": 317 }, { "epoch": 2.4282982791587, "grad_norm": 0.12774223293558123, "learning_rate": 9.643121500522377e-06, "loss": 0.4783, "mean_token_accuracy": 0.841241642832756, "num_tokens": 182421447.0, "step": 318 }, { "epoch": 2.4359464627151053, "grad_norm": 0.13329940881644012, "learning_rate": 9.640893739019852e-06, "loss": 0.4558, "mean_token_accuracy": 0.8470873907208443, "num_tokens": 182997447.0, "step": 319 }, { "epoch": 2.4435946462715106, "grad_norm": 0.1389284366392599, "learning_rate": 9.638659304861336e-06, "loss": 0.4873, "mean_token_accuracy": 0.8378456756472588, "num_tokens": 183573447.0, "step": 320 }, { "epoch": 2.451242829827916, "grad_norm": 0.13022523574202438, "learning_rate": 9.63641820125949e-06, "loss": 0.4532, "mean_token_accuracy": 0.8482332676649094, "num_tokens": 184149447.0, "step": 321 }, { "epoch": 2.458891013384321, "grad_norm": 0.12869518334269098, "learning_rate": 9.63417043143656e-06, "loss": 0.4364, "mean_token_accuracy": 0.8536501526832581, "num_tokens": 184725447.0, "step": 322 }, { "epoch": 2.4665391969407264, "grad_norm": 0.1685448514608509, "learning_rate": 9.631915998624382e-06, "loss": 0.4801, "mean_token_accuracy": 0.8401600047945976, "num_tokens": 185301447.0, "step": 323 }, { "epoch": 2.474187380497132, "grad_norm": 0.13160582073089025, "learning_rate": 9.629654906064365e-06, "loss": 0.4649, "mean_token_accuracy": 0.8448563814163208, "num_tokens": 185877447.0, "step": 324 }, { "epoch": 2.4818355640535374, "grad_norm": 0.1367794173463771, "learning_rate": 9.627387157007502e-06, "loss": 0.4883, "mean_token_accuracy": 0.8361355364322662, "num_tokens": 186453447.0, "step": 325 }, { "epoch": 2.4894837476099427, "grad_norm": 0.13222850798423158, "learning_rate": 9.62511275471435e-06, "loss": 0.4677, "mean_token_accuracy": 0.8434003219008446, "num_tokens": 187010772.0, "step": 326 }, { "epoch": 2.497131931166348, "grad_norm": 0.15953227190496977, "learning_rate": 9.622831702455035e-06, "loss": 0.4831, "mean_token_accuracy": 0.8401443809270859, "num_tokens": 187586772.0, "step": 327 }, { "epoch": 2.5047801147227533, "grad_norm": 0.1298624067386934, "learning_rate": 9.620544003509243e-06, "loss": 0.4741, "mean_token_accuracy": 0.8434900045394897, "num_tokens": 188162772.0, "step": 328 }, { "epoch": 2.5124282982791586, "grad_norm": 0.13840666580879918, "learning_rate": 9.618249661166218e-06, "loss": 0.4923, "mean_token_accuracy": 0.8363108858466148, "num_tokens": 188738772.0, "step": 329 }, { "epoch": 2.520076481835564, "grad_norm": 0.12560912611469238, "learning_rate": 9.615948678724756e-06, "loss": 0.4396, "mean_token_accuracy": 0.8522473201155663, "num_tokens": 189314772.0, "step": 330 }, { "epoch": 2.527724665391969, "grad_norm": 0.1296554841366996, "learning_rate": 9.613641059493197e-06, "loss": 0.4615, "mean_token_accuracy": 0.8463234454393387, "num_tokens": 189890772.0, "step": 331 }, { "epoch": 2.535372848948375, "grad_norm": 0.14613127575466625, "learning_rate": 9.611326806789424e-06, "loss": 0.4799, "mean_token_accuracy": 0.8401964753866196, "num_tokens": 190466772.0, "step": 332 }, { "epoch": 2.54302103250478, "grad_norm": 0.13394861492299956, "learning_rate": 9.609005923940865e-06, "loss": 0.455, "mean_token_accuracy": 0.8472384214401245, "num_tokens": 191042772.0, "step": 333 }, { "epoch": 2.5506692160611855, "grad_norm": 0.12785020969707311, "learning_rate": 9.606678414284469e-06, "loss": 0.45, "mean_token_accuracy": 0.8496569246053696, "num_tokens": 191618772.0, "step": 334 }, { "epoch": 2.5583173996175907, "grad_norm": 0.13667091866209735, "learning_rate": 9.604344281166721e-06, "loss": 0.4569, "mean_token_accuracy": 0.8474294021725655, "num_tokens": 192194772.0, "step": 335 }, { "epoch": 2.565965583173996, "grad_norm": 0.14170531764608627, "learning_rate": 9.602003527943629e-06, "loss": 0.4841, "mean_token_accuracy": 0.838857851922512, "num_tokens": 192770772.0, "step": 336 }, { "epoch": 2.5736137667304018, "grad_norm": 0.14203506631993854, "learning_rate": 9.599656157980715e-06, "loss": 0.4711, "mean_token_accuracy": 0.8428667038679123, "num_tokens": 193346772.0, "step": 337 }, { "epoch": 2.581261950286807, "grad_norm": 0.13109007712840628, "learning_rate": 9.597302174653016e-06, "loss": 0.4452, "mean_token_accuracy": 0.8508444800972939, "num_tokens": 193922772.0, "step": 338 }, { "epoch": 2.5889101338432123, "grad_norm": 0.1376764806733569, "learning_rate": 9.594941581345079e-06, "loss": 0.4743, "mean_token_accuracy": 0.8415871560573578, "num_tokens": 194498772.0, "step": 339 }, { "epoch": 2.5965583173996176, "grad_norm": 0.13134460713857604, "learning_rate": 9.592574381450957e-06, "loss": 0.4536, "mean_token_accuracy": 0.8493340015411377, "num_tokens": 195074772.0, "step": 340 }, { "epoch": 2.604206500956023, "grad_norm": 0.12998973657236298, "learning_rate": 9.590200578374198e-06, "loss": 0.4735, "mean_token_accuracy": 0.8423146158456802, "num_tokens": 195650772.0, "step": 341 }, { "epoch": 2.611854684512428, "grad_norm": 0.13974086743961503, "learning_rate": 9.587820175527844e-06, "loss": 0.4633, "mean_token_accuracy": 0.8453876450657845, "num_tokens": 196226772.0, "step": 342 }, { "epoch": 2.6195028680688335, "grad_norm": 0.13737097736695414, "learning_rate": 9.585433176334426e-06, "loss": 0.4881, "mean_token_accuracy": 0.8374150991439819, "num_tokens": 196802772.0, "step": 343 }, { "epoch": 2.6271510516252388, "grad_norm": 0.1328384415039295, "learning_rate": 9.583039584225966e-06, "loss": 0.452, "mean_token_accuracy": 0.8489537835121155, "num_tokens": 197378772.0, "step": 344 }, { "epoch": 2.6347992351816445, "grad_norm": 0.13333625376712616, "learning_rate": 9.580639402643957e-06, "loss": 0.4749, "mean_token_accuracy": 0.8421444669365883, "num_tokens": 197954772.0, "step": 345 }, { "epoch": 2.64244741873805, "grad_norm": 0.13099239341333282, "learning_rate": 9.578232635039368e-06, "loss": 0.468, "mean_token_accuracy": 0.8446983769536018, "num_tokens": 198530772.0, "step": 346 }, { "epoch": 2.650095602294455, "grad_norm": 0.1319772619241733, "learning_rate": 9.575819284872638e-06, "loss": 0.4753, "mean_token_accuracy": 0.8412885293364525, "num_tokens": 199106772.0, "step": 347 }, { "epoch": 2.6577437858508604, "grad_norm": 0.1315801229711581, "learning_rate": 9.573399355613675e-06, "loss": 0.4538, "mean_token_accuracy": 0.8480388075113297, "num_tokens": 199682772.0, "step": 348 }, { "epoch": 2.6653919694072656, "grad_norm": 0.14029244455419185, "learning_rate": 9.570972850741839e-06, "loss": 0.4642, "mean_token_accuracy": 0.8463078290224075, "num_tokens": 200258772.0, "step": 349 }, { "epoch": 2.6730401529636714, "grad_norm": 0.1382382609127072, "learning_rate": 9.568539773745947e-06, "loss": 0.4792, "mean_token_accuracy": 0.8414309024810791, "num_tokens": 200834772.0, "step": 350 }, { "epoch": 2.6806883365200767, "grad_norm": 0.13641202302875485, "learning_rate": 9.56610012812427e-06, "loss": 0.4948, "mean_token_accuracy": 0.8358525335788727, "num_tokens": 201410772.0, "step": 351 }, { "epoch": 2.688336520076482, "grad_norm": 0.13648644317956463, "learning_rate": 9.563653917384515e-06, "loss": 0.4791, "mean_token_accuracy": 0.8403978571295738, "num_tokens": 201986772.0, "step": 352 }, { "epoch": 2.6959847036328872, "grad_norm": 0.1265267382875485, "learning_rate": 9.561201145043835e-06, "loss": 0.4565, "mean_token_accuracy": 0.8470665439963341, "num_tokens": 202562772.0, "step": 353 }, { "epoch": 2.7036328871892925, "grad_norm": 0.14834388827490924, "learning_rate": 9.558741814628815e-06, "loss": 0.4927, "mean_token_accuracy": 0.8357188552618027, "num_tokens": 203138772.0, "step": 354 }, { "epoch": 2.711281070745698, "grad_norm": 0.1339156205744843, "learning_rate": 9.55627592967547e-06, "loss": 0.4552, "mean_token_accuracy": 0.8483408987522125, "num_tokens": 203714772.0, "step": 355 }, { "epoch": 2.718929254302103, "grad_norm": 0.13745433509215765, "learning_rate": 9.553803493729237e-06, "loss": 0.4797, "mean_token_accuracy": 0.8393444642424583, "num_tokens": 204277037.0, "step": 356 }, { "epoch": 2.7265774378585084, "grad_norm": 0.12921999079532936, "learning_rate": 9.551324510344972e-06, "loss": 0.4649, "mean_token_accuracy": 0.8451011776924133, "num_tokens": 204853037.0, "step": 357 }, { "epoch": 2.734225621414914, "grad_norm": 0.13846536123122738, "learning_rate": 9.548838983086948e-06, "loss": 0.4771, "mean_token_accuracy": 0.8409933820366859, "num_tokens": 205429037.0, "step": 358 }, { "epoch": 2.7418738049713194, "grad_norm": 0.13803335699731223, "learning_rate": 9.546346915528844e-06, "loss": 0.4704, "mean_token_accuracy": 0.8430611714720726, "num_tokens": 206005037.0, "step": 359 }, { "epoch": 2.7495219885277247, "grad_norm": 0.1425603356301425, "learning_rate": 9.543848311253744e-06, "loss": 0.4506, "mean_token_accuracy": 0.8472823351621628, "num_tokens": 206551931.0, "step": 360 }, { "epoch": 2.75717017208413, "grad_norm": 0.12337957343310935, "learning_rate": 9.541343173854128e-06, "loss": 0.4024, "mean_token_accuracy": 0.8640047535300255, "num_tokens": 207127931.0, "step": 361 }, { "epoch": 2.7648183556405352, "grad_norm": 0.1440988584019457, "learning_rate": 9.538831506931874e-06, "loss": 0.483, "mean_token_accuracy": 0.8385800793766975, "num_tokens": 207703931.0, "step": 362 }, { "epoch": 2.772466539196941, "grad_norm": 0.1336436468088995, "learning_rate": 9.536313314098243e-06, "loss": 0.4869, "mean_token_accuracy": 0.8362970650196075, "num_tokens": 208277831.0, "step": 363 }, { "epoch": 2.7801147227533463, "grad_norm": 0.13491196154454094, "learning_rate": 9.533788598973883e-06, "loss": 0.465, "mean_token_accuracy": 0.8448338061571121, "num_tokens": 208853831.0, "step": 364 }, { "epoch": 2.7877629063097515, "grad_norm": 0.13638411467630132, "learning_rate": 9.531257365188818e-06, "loss": 0.4909, "mean_token_accuracy": 0.8366459757089615, "num_tokens": 209429831.0, "step": 365 }, { "epoch": 2.795411089866157, "grad_norm": 0.1368094560097927, "learning_rate": 9.528719616382443e-06, "loss": 0.4842, "mean_token_accuracy": 0.8384652063250542, "num_tokens": 209990026.0, "step": 366 }, { "epoch": 2.803059273422562, "grad_norm": 0.13780641532143703, "learning_rate": 9.526175356203521e-06, "loss": 0.4659, "mean_token_accuracy": 0.8452418148517609, "num_tokens": 210566026.0, "step": 367 }, { "epoch": 2.8107074569789674, "grad_norm": 0.14787034743484256, "learning_rate": 9.523624588310181e-06, "loss": 0.4681, "mean_token_accuracy": 0.8442660793662071, "num_tokens": 211142026.0, "step": 368 }, { "epoch": 2.8183556405353727, "grad_norm": 0.13862557544263718, "learning_rate": 9.521067316369903e-06, "loss": 0.4571, "mean_token_accuracy": 0.8479363769292831, "num_tokens": 211718026.0, "step": 369 }, { "epoch": 2.826003824091778, "grad_norm": 0.13772334123321825, "learning_rate": 9.518503544059523e-06, "loss": 0.4799, "mean_token_accuracy": 0.8397033959627151, "num_tokens": 212294026.0, "step": 370 }, { "epoch": 2.8336520076481837, "grad_norm": 0.13948890447531806, "learning_rate": 9.515933275065218e-06, "loss": 0.4896, "mean_token_accuracy": 0.837375171482563, "num_tokens": 212870026.0, "step": 371 }, { "epoch": 2.841300191204589, "grad_norm": 0.13858419410017597, "learning_rate": 9.513356513082512e-06, "loss": 0.4671, "mean_token_accuracy": 0.8438424468040466, "num_tokens": 213446026.0, "step": 372 }, { "epoch": 2.8489483747609943, "grad_norm": 0.13242895194664173, "learning_rate": 9.510773261816261e-06, "loss": 0.4517, "mean_token_accuracy": 0.8478269800543785, "num_tokens": 214022026.0, "step": 373 }, { "epoch": 2.8565965583173996, "grad_norm": 0.1361208802973601, "learning_rate": 9.508183524980651e-06, "loss": 0.4773, "mean_token_accuracy": 0.8403232023119926, "num_tokens": 214598026.0, "step": 374 }, { "epoch": 2.864244741873805, "grad_norm": 0.14229532646455947, "learning_rate": 9.505587306299196e-06, "loss": 0.4601, "mean_token_accuracy": 0.8460283130407333, "num_tokens": 215174026.0, "step": 375 }, { "epoch": 2.8718929254302106, "grad_norm": 0.14481534193858964, "learning_rate": 9.502984609504724e-06, "loss": 0.4816, "mean_token_accuracy": 0.8387467563152313, "num_tokens": 215750026.0, "step": 376 }, { "epoch": 2.879541108986616, "grad_norm": 0.12944860389639234, "learning_rate": 9.500375438339384e-06, "loss": 0.4539, "mean_token_accuracy": 0.8480362817645073, "num_tokens": 216311532.0, "step": 377 }, { "epoch": 2.887189292543021, "grad_norm": 0.14010965456936167, "learning_rate": 9.497759796554629e-06, "loss": 0.4715, "mean_token_accuracy": 0.8425246849656105, "num_tokens": 216887532.0, "step": 378 }, { "epoch": 2.8948374760994264, "grad_norm": 0.14339976848438407, "learning_rate": 9.495137687911218e-06, "loss": 0.4645, "mean_token_accuracy": 0.8451827839016914, "num_tokens": 217463532.0, "step": 379 }, { "epoch": 2.9024856596558317, "grad_norm": 0.1425721008440435, "learning_rate": 9.492509116179206e-06, "loss": 0.4758, "mean_token_accuracy": 0.8416374921798706, "num_tokens": 218039532.0, "step": 380 }, { "epoch": 2.910133843212237, "grad_norm": 0.12904281233195608, "learning_rate": 9.48987408513794e-06, "loss": 0.4513, "mean_token_accuracy": 0.848038800060749, "num_tokens": 218615532.0, "step": 381 }, { "epoch": 2.9177820267686423, "grad_norm": 0.13124547175151383, "learning_rate": 9.487232598576056e-06, "loss": 0.4435, "mean_token_accuracy": 0.8508514240384102, "num_tokens": 219191532.0, "step": 382 }, { "epoch": 2.9254302103250476, "grad_norm": 0.14160726517557595, "learning_rate": 9.484584660291476e-06, "loss": 0.4815, "mean_token_accuracy": 0.8395801186561584, "num_tokens": 219767532.0, "step": 383 }, { "epoch": 2.9330783938814533, "grad_norm": 0.13800419935826905, "learning_rate": 9.481930274091388e-06, "loss": 0.4562, "mean_token_accuracy": 0.8468825072050095, "num_tokens": 220343532.0, "step": 384 }, { "epoch": 2.9407265774378586, "grad_norm": 0.13561246479535266, "learning_rate": 9.47926944379226e-06, "loss": 0.4505, "mean_token_accuracy": 0.8491343483328819, "num_tokens": 220919532.0, "step": 385 }, { "epoch": 2.948374760994264, "grad_norm": 0.13430277105466684, "learning_rate": 9.476602173219822e-06, "loss": 0.4531, "mean_token_accuracy": 0.8477696999907494, "num_tokens": 221495532.0, "step": 386 }, { "epoch": 2.956022944550669, "grad_norm": 0.1471890474737779, "learning_rate": 9.47392846620906e-06, "loss": 0.4869, "mean_token_accuracy": 0.8385036885738373, "num_tokens": 222071532.0, "step": 387 }, { "epoch": 2.9636711281070744, "grad_norm": 0.15113073329267498, "learning_rate": 9.47124832660422e-06, "loss": 0.4813, "mean_token_accuracy": 0.8399360328912735, "num_tokens": 222647532.0, "step": 388 }, { "epoch": 2.97131931166348, "grad_norm": 0.13475027561090738, "learning_rate": 9.468561758258795e-06, "loss": 0.5047, "mean_token_accuracy": 0.8322638496756554, "num_tokens": 223223532.0, "step": 389 }, { "epoch": 2.9789674952198855, "grad_norm": 0.13848998512836547, "learning_rate": 9.465868765035519e-06, "loss": 0.4982, "mean_token_accuracy": 0.8339010700583458, "num_tokens": 223799532.0, "step": 390 }, { "epoch": 2.9866156787762907, "grad_norm": 0.1319671191248173, "learning_rate": 9.463169350806369e-06, "loss": 0.4761, "mean_token_accuracy": 0.8407210931181908, "num_tokens": 224361639.0, "step": 391 }, { "epoch": 2.994263862332696, "grad_norm": 0.14429086224338838, "learning_rate": 9.460463519452547e-06, "loss": 0.4894, "mean_token_accuracy": 0.8367779329419136, "num_tokens": 224937639.0, "step": 392 }, { "epoch": 3.0, "grad_norm": 0.14450290671664134, "learning_rate": 9.457751274864486e-06, "loss": 0.4328, "mean_token_accuracy": 0.8539082705974579, "num_tokens": 225369639.0, "step": 393 }, { "epoch": 3.0076481835564053, "grad_norm": 0.14273013240376312, "learning_rate": 9.45503262094184e-06, "loss": 0.4488, "mean_token_accuracy": 0.8485075756907463, "num_tokens": 225945639.0, "step": 394 }, { "epoch": 3.0152963671128106, "grad_norm": 0.1318792500917861, "learning_rate": 9.452307561593476e-06, "loss": 0.4355, "mean_token_accuracy": 0.8525528833270073, "num_tokens": 226521639.0, "step": 395 }, { "epoch": 3.022944550669216, "grad_norm": 0.13529933934932678, "learning_rate": 9.449576100737474e-06, "loss": 0.4405, "mean_token_accuracy": 0.8521223217248917, "num_tokens": 227097639.0, "step": 396 }, { "epoch": 3.0305927342256216, "grad_norm": 0.1239800938473107, "learning_rate": 9.446838242301113e-06, "loss": 0.4272, "mean_token_accuracy": 0.8561867102980614, "num_tokens": 227673639.0, "step": 397 }, { "epoch": 3.038240917782027, "grad_norm": 0.1346937584568678, "learning_rate": 9.444093990220876e-06, "loss": 0.4578, "mean_token_accuracy": 0.8471168950200081, "num_tokens": 228249639.0, "step": 398 }, { "epoch": 3.045889101338432, "grad_norm": 0.1342389352088606, "learning_rate": 9.441343348442436e-06, "loss": 0.44, "mean_token_accuracy": 0.8513809442520142, "num_tokens": 228825639.0, "step": 399 }, { "epoch": 3.0535372848948374, "grad_norm": 0.12739390345432006, "learning_rate": 9.438586320920651e-06, "loss": 0.421, "mean_token_accuracy": 0.8576086536049843, "num_tokens": 229401639.0, "step": 400 }, { "epoch": 3.0611854684512427, "grad_norm": 0.13440046577139128, "learning_rate": 9.435822911619564e-06, "loss": 0.4785, "mean_token_accuracy": 0.8404169529676437, "num_tokens": 229977639.0, "step": 401 }, { "epoch": 3.068833652007648, "grad_norm": 0.14325897840167365, "learning_rate": 9.433053124512394e-06, "loss": 0.4928, "mean_token_accuracy": 0.8352709040045738, "num_tokens": 230553639.0, "step": 402 }, { "epoch": 3.0764818355640537, "grad_norm": 0.1420958733294688, "learning_rate": 9.430276963581526e-06, "loss": 0.4624, "mean_token_accuracy": 0.8442712873220444, "num_tokens": 231129639.0, "step": 403 }, { "epoch": 3.084130019120459, "grad_norm": 0.13205149824341564, "learning_rate": 9.427494432818514e-06, "loss": 0.4558, "mean_token_accuracy": 0.8468581959605217, "num_tokens": 231705639.0, "step": 404 }, { "epoch": 3.0917782026768643, "grad_norm": 0.1316126104720129, "learning_rate": 9.424705536224065e-06, "loss": 0.4634, "mean_token_accuracy": 0.8433320075273514, "num_tokens": 232281639.0, "step": 405 }, { "epoch": 3.0994263862332696, "grad_norm": 0.1407535847573216, "learning_rate": 9.421910277808044e-06, "loss": 0.4896, "mean_token_accuracy": 0.8361390084028244, "num_tokens": 232857639.0, "step": 406 }, { "epoch": 3.107074569789675, "grad_norm": 0.13472622990137864, "learning_rate": 9.419108661589462e-06, "loss": 0.4536, "mean_token_accuracy": 0.8465508967638016, "num_tokens": 233433639.0, "step": 407 }, { "epoch": 3.11472275334608, "grad_norm": 0.1451607665647199, "learning_rate": 9.416300691596469e-06, "loss": 0.4722, "mean_token_accuracy": 0.8422694802284241, "num_tokens": 234009639.0, "step": 408 }, { "epoch": 3.1223709369024855, "grad_norm": 0.1467639357286111, "learning_rate": 9.41348637186635e-06, "loss": 0.4591, "mean_token_accuracy": 0.8459449708461761, "num_tokens": 234585639.0, "step": 409 }, { "epoch": 3.130019120458891, "grad_norm": 0.14178633805551513, "learning_rate": 9.410665706445521e-06, "loss": 0.4388, "mean_token_accuracy": 0.8521133288741112, "num_tokens": 235138310.0, "step": 410 }, { "epoch": 3.1376673040152965, "grad_norm": 0.13734459053843015, "learning_rate": 9.407838699389525e-06, "loss": 0.4542, "mean_token_accuracy": 0.846922442317009, "num_tokens": 235714310.0, "step": 411 }, { "epoch": 3.1453154875717018, "grad_norm": 0.1332800709874699, "learning_rate": 9.405005354763017e-06, "loss": 0.4542, "mean_token_accuracy": 0.8467904925346375, "num_tokens": 236290310.0, "step": 412 }, { "epoch": 3.152963671128107, "grad_norm": 0.13291737890683744, "learning_rate": 9.40216567663977e-06, "loss": 0.4474, "mean_token_accuracy": 0.8494607359170914, "num_tokens": 236866310.0, "step": 413 }, { "epoch": 3.1606118546845123, "grad_norm": 0.13823265816280292, "learning_rate": 9.399319669102655e-06, "loss": 0.4419, "mean_token_accuracy": 0.8513462394475937, "num_tokens": 237442310.0, "step": 414 }, { "epoch": 3.1682600382409176, "grad_norm": 0.13709902619662914, "learning_rate": 9.396467336243656e-06, "loss": 0.4385, "mean_token_accuracy": 0.8522455841302872, "num_tokens": 238018310.0, "step": 415 }, { "epoch": 3.1759082217973233, "grad_norm": 0.13499552935521622, "learning_rate": 9.39360868216384e-06, "loss": 0.4576, "mean_token_accuracy": 0.8459102585911751, "num_tokens": 238594310.0, "step": 416 }, { "epoch": 3.1835564053537286, "grad_norm": 0.1433007756568991, "learning_rate": 9.390743710973366e-06, "loss": 0.4664, "mean_token_accuracy": 0.8439188376069069, "num_tokens": 239170310.0, "step": 417 }, { "epoch": 3.191204588910134, "grad_norm": 0.129781541311147, "learning_rate": 9.38787242679148e-06, "loss": 0.4508, "mean_token_accuracy": 0.8471464067697525, "num_tokens": 239746310.0, "step": 418 }, { "epoch": 3.198852772466539, "grad_norm": 0.13681530680608084, "learning_rate": 9.384994833746496e-06, "loss": 0.4529, "mean_token_accuracy": 0.8484097048640251, "num_tokens": 240314805.0, "step": 419 }, { "epoch": 3.2065009560229445, "grad_norm": 0.1352192841534592, "learning_rate": 9.382110935975811e-06, "loss": 0.4646, "mean_token_accuracy": 0.8462948203086853, "num_tokens": 240868750.0, "step": 420 }, { "epoch": 3.21414913957935, "grad_norm": 0.14157928396699657, "learning_rate": 9.379220737625877e-06, "loss": 0.4686, "mean_token_accuracy": 0.8430594280362129, "num_tokens": 241444750.0, "step": 421 }, { "epoch": 3.221797323135755, "grad_norm": 0.13680279148546654, "learning_rate": 9.376324242852206e-06, "loss": 0.4461, "mean_token_accuracy": 0.8499329835176468, "num_tokens": 242020750.0, "step": 422 }, { "epoch": 3.229445506692161, "grad_norm": 0.13441364291759264, "learning_rate": 9.37342145581937e-06, "loss": 0.4534, "mean_token_accuracy": 0.8477957472205162, "num_tokens": 242596750.0, "step": 423 }, { "epoch": 3.237093690248566, "grad_norm": 0.14996464113428698, "learning_rate": 9.370512380700976e-06, "loss": 0.4538, "mean_token_accuracy": 0.8473148196935654, "num_tokens": 243172750.0, "step": 424 }, { "epoch": 3.2447418738049714, "grad_norm": 0.1291772530250717, "learning_rate": 9.367597021679686e-06, "loss": 0.4576, "mean_token_accuracy": 0.8455560505390167, "num_tokens": 243748750.0, "step": 425 }, { "epoch": 3.2523900573613767, "grad_norm": 0.14578020775893444, "learning_rate": 9.364675382947185e-06, "loss": 0.4318, "mean_token_accuracy": 0.8554418906569481, "num_tokens": 244324750.0, "step": 426 }, { "epoch": 3.260038240917782, "grad_norm": 0.13750460715362675, "learning_rate": 9.361747468704196e-06, "loss": 0.4418, "mean_token_accuracy": 0.8520320132374763, "num_tokens": 244900750.0, "step": 427 }, { "epoch": 3.2676864244741872, "grad_norm": 0.1372424458911705, "learning_rate": 9.35881328316046e-06, "loss": 0.4765, "mean_token_accuracy": 0.8400749266147614, "num_tokens": 245476750.0, "step": 428 }, { "epoch": 3.275334608030593, "grad_norm": 0.1395311312663304, "learning_rate": 9.35587283053473e-06, "loss": 0.5033, "mean_token_accuracy": 0.8329617977142334, "num_tokens": 246052750.0, "step": 429 }, { "epoch": 3.2829827915869982, "grad_norm": 0.13615900761434377, "learning_rate": 9.352926115054783e-06, "loss": 0.4659, "mean_token_accuracy": 0.8443350717425346, "num_tokens": 246622973.0, "step": 430 }, { "epoch": 3.2906309751434035, "grad_norm": 0.13476399243406678, "learning_rate": 9.349973140957392e-06, "loss": 0.4588, "mean_token_accuracy": 0.8465144336223602, "num_tokens": 247198973.0, "step": 431 }, { "epoch": 3.298279158699809, "grad_norm": 0.138319841165643, "learning_rate": 9.347013912488324e-06, "loss": 0.4456, "mean_token_accuracy": 0.8498670160770416, "num_tokens": 247774973.0, "step": 432 }, { "epoch": 3.305927342256214, "grad_norm": 0.1295935255491883, "learning_rate": 9.344048433902351e-06, "loss": 0.444, "mean_token_accuracy": 0.8501743152737617, "num_tokens": 248350973.0, "step": 433 }, { "epoch": 3.3135755258126194, "grad_norm": 0.1352566895751494, "learning_rate": 9.34107670946322e-06, "loss": 0.4381, "mean_token_accuracy": 0.8526205867528915, "num_tokens": 248926973.0, "step": 434 }, { "epoch": 3.3212237093690247, "grad_norm": 0.13526914802404547, "learning_rate": 9.338098743443666e-06, "loss": 0.4552, "mean_token_accuracy": 0.8469623699784279, "num_tokens": 249502973.0, "step": 435 }, { "epoch": 3.3288718929254304, "grad_norm": 0.1378327942247355, "learning_rate": 9.335114540125393e-06, "loss": 0.4983, "mean_token_accuracy": 0.8344531655311584, "num_tokens": 250078973.0, "step": 436 }, { "epoch": 3.3365200764818357, "grad_norm": 0.13678024458347515, "learning_rate": 9.332124103799075e-06, "loss": 0.4648, "mean_token_accuracy": 0.8443667814135551, "num_tokens": 250654973.0, "step": 437 }, { "epoch": 3.344168260038241, "grad_norm": 0.13664942328914748, "learning_rate": 9.329127438764351e-06, "loss": 0.4892, "mean_token_accuracy": 0.8364584743976593, "num_tokens": 251230973.0, "step": 438 }, { "epoch": 3.3518164435946463, "grad_norm": 0.13783383905141766, "learning_rate": 9.32612454932981e-06, "loss": 0.4633, "mean_token_accuracy": 0.8456706553697586, "num_tokens": 251806973.0, "step": 439 }, { "epoch": 3.3594646271510515, "grad_norm": 0.13339399076355143, "learning_rate": 9.323115439812993e-06, "loss": 0.4437, "mean_token_accuracy": 0.8508689105510712, "num_tokens": 252367168.0, "step": 440 }, { "epoch": 3.367112810707457, "grad_norm": 0.1338168045570386, "learning_rate": 9.320100114540382e-06, "loss": 0.4634, "mean_token_accuracy": 0.8454900979995728, "num_tokens": 252943168.0, "step": 441 }, { "epoch": 3.3747609942638626, "grad_norm": 0.13234254602598014, "learning_rate": 9.317078577847402e-06, "loss": 0.4669, "mean_token_accuracy": 0.843821607530117, "num_tokens": 253519168.0, "step": 442 }, { "epoch": 3.382409177820268, "grad_norm": 0.13860690572904696, "learning_rate": 9.314050834078401e-06, "loss": 0.453, "mean_token_accuracy": 0.8475092798471451, "num_tokens": 254095168.0, "step": 443 }, { "epoch": 3.390057361376673, "grad_norm": 0.14010574280265636, "learning_rate": 9.311016887586659e-06, "loss": 0.4529, "mean_token_accuracy": 0.8471189886331558, "num_tokens": 254669068.0, "step": 444 }, { "epoch": 3.3977055449330784, "grad_norm": 0.13286366341534026, "learning_rate": 9.307976742734366e-06, "loss": 0.4579, "mean_token_accuracy": 0.8456550240516663, "num_tokens": 255245068.0, "step": 445 }, { "epoch": 3.4053537284894837, "grad_norm": 0.12287227122879738, "learning_rate": 9.304930403892633e-06, "loss": 0.4284, "mean_token_accuracy": 0.855134591460228, "num_tokens": 255821068.0, "step": 446 }, { "epoch": 3.413001912045889, "grad_norm": 0.13496131187274832, "learning_rate": 9.30187787544147e-06, "loss": 0.4519, "mean_token_accuracy": 0.8480301275849342, "num_tokens": 256397068.0, "step": 447 }, { "epoch": 3.4206500956022943, "grad_norm": 0.12304815197408708, "learning_rate": 9.298819161769788e-06, "loss": 0.4055, "mean_token_accuracy": 0.863836444914341, "num_tokens": 256970705.0, "step": 448 }, { "epoch": 3.4282982791587, "grad_norm": 0.1312084281080521, "learning_rate": 9.295754267275393e-06, "loss": 0.4304, "mean_token_accuracy": 0.8547022864222527, "num_tokens": 257546705.0, "step": 449 }, { "epoch": 3.4359464627151053, "grad_norm": 0.1424625968795541, "learning_rate": 9.292683196364975e-06, "loss": 0.5003, "mean_token_accuracy": 0.8330485969781876, "num_tokens": 258122705.0, "step": 450 }, { "epoch": 3.4435946462715106, "grad_norm": 0.13782377842414753, "learning_rate": 9.289605953454108e-06, "loss": 0.4567, "mean_token_accuracy": 0.8465578481554985, "num_tokens": 258698705.0, "step": 451 }, { "epoch": 3.451242829827916, "grad_norm": 0.13609928849701888, "learning_rate": 9.286522542967235e-06, "loss": 0.4584, "mean_token_accuracy": 0.8467818200588226, "num_tokens": 259274705.0, "step": 452 }, { "epoch": 3.458891013384321, "grad_norm": 0.1261203105793479, "learning_rate": 9.283432969337672e-06, "loss": 0.4616, "mean_token_accuracy": 0.8468790426850319, "num_tokens": 259850705.0, "step": 453 }, { "epoch": 3.4665391969407264, "grad_norm": 0.13614975492427833, "learning_rate": 9.280337237007592e-06, "loss": 0.464, "mean_token_accuracy": 0.8447556719183922, "num_tokens": 260426705.0, "step": 454 }, { "epoch": 3.474187380497132, "grad_norm": 0.1327458325746583, "learning_rate": 9.277235350428029e-06, "loss": 0.4454, "mean_token_accuracy": 0.8494572639465332, "num_tokens": 261002705.0, "step": 455 }, { "epoch": 3.4818355640535374, "grad_norm": 0.13085159750760433, "learning_rate": 9.274127314058857e-06, "loss": 0.4371, "mean_token_accuracy": 0.852618858218193, "num_tokens": 261578705.0, "step": 456 }, { "epoch": 3.4894837476099427, "grad_norm": 0.13578463225475795, "learning_rate": 9.271013132368799e-06, "loss": 0.4642, "mean_token_accuracy": 0.8443883061408997, "num_tokens": 262140970.0, "step": 457 }, { "epoch": 3.497131931166348, "grad_norm": 0.14006598972599096, "learning_rate": 9.267892809835409e-06, "loss": 0.4637, "mean_token_accuracy": 0.8449813947081566, "num_tokens": 262716970.0, "step": 458 }, { "epoch": 3.5047801147227533, "grad_norm": 0.13185777866523177, "learning_rate": 9.264766350945076e-06, "loss": 0.4302, "mean_token_accuracy": 0.8541102334856987, "num_tokens": 263292970.0, "step": 459 }, { "epoch": 3.5124282982791586, "grad_norm": 0.13342931833994348, "learning_rate": 9.261633760193005e-06, "loss": 0.4686, "mean_token_accuracy": 0.8435091152787209, "num_tokens": 263868970.0, "step": 460 }, { "epoch": 3.520076481835564, "grad_norm": 0.13168965271918778, "learning_rate": 9.258495042083222e-06, "loss": 0.4434, "mean_token_accuracy": 0.8501882031559944, "num_tokens": 264444970.0, "step": 461 }, { "epoch": 3.527724665391969, "grad_norm": 0.13243605685606727, "learning_rate": 9.255350201128564e-06, "loss": 0.4465, "mean_token_accuracy": 0.8496812433004379, "num_tokens": 265020970.0, "step": 462 }, { "epoch": 3.535372848948375, "grad_norm": 0.1552286657686917, "learning_rate": 9.252199241850666e-06, "loss": 0.446, "mean_token_accuracy": 0.8502941057085991, "num_tokens": 265596970.0, "step": 463 }, { "epoch": 3.54302103250478, "grad_norm": 0.13185696300414065, "learning_rate": 9.249042168779962e-06, "loss": 0.4391, "mean_token_accuracy": 0.8525671735405922, "num_tokens": 266155354.0, "step": 464 }, { "epoch": 3.5506692160611855, "grad_norm": 0.12925331557566425, "learning_rate": 9.245878986455684e-06, "loss": 0.4556, "mean_token_accuracy": 0.8468790277838707, "num_tokens": 266731354.0, "step": 465 }, { "epoch": 3.5583173996175907, "grad_norm": 0.14112072128802675, "learning_rate": 9.242709699425833e-06, "loss": 0.4499, "mean_token_accuracy": 0.8501986265182495, "num_tokens": 267307354.0, "step": 466 }, { "epoch": 3.565965583173996, "grad_norm": 0.12961482591836798, "learning_rate": 9.2395343122472e-06, "loss": 0.4794, "mean_token_accuracy": 0.8403509855270386, "num_tokens": 267883354.0, "step": 467 }, { "epoch": 3.5736137667304018, "grad_norm": 0.15553386932308658, "learning_rate": 9.236352829485342e-06, "loss": 0.447, "mean_token_accuracy": 0.8502125144004822, "num_tokens": 268459354.0, "step": 468 }, { "epoch": 3.581261950286807, "grad_norm": 0.13773631625374297, "learning_rate": 9.23316525571458e-06, "loss": 0.472, "mean_token_accuracy": 0.8414899259805679, "num_tokens": 269035354.0, "step": 469 }, { "epoch": 3.5889101338432123, "grad_norm": 0.12994566689301978, "learning_rate": 9.229971595517993e-06, "loss": 0.4278, "mean_token_accuracy": 0.8565200641751289, "num_tokens": 269611354.0, "step": 470 }, { "epoch": 3.5965583173996176, "grad_norm": 0.147867087508267, "learning_rate": 9.226771853487411e-06, "loss": 0.4862, "mean_token_accuracy": 0.8371338397264481, "num_tokens": 270187354.0, "step": 471 }, { "epoch": 3.604206500956023, "grad_norm": 0.1333395328504752, "learning_rate": 9.223566034223409e-06, "loss": 0.4849, "mean_token_accuracy": 0.838104359805584, "num_tokens": 270763354.0, "step": 472 }, { "epoch": 3.611854684512428, "grad_norm": 0.14126948407840498, "learning_rate": 9.2203541423353e-06, "loss": 0.4591, "mean_token_accuracy": 0.8453320935368538, "num_tokens": 271339354.0, "step": 473 }, { "epoch": 3.6195028680688335, "grad_norm": 0.1364081032107475, "learning_rate": 9.217136182441124e-06, "loss": 0.4311, "mean_token_accuracy": 0.8542838543653488, "num_tokens": 271915354.0, "step": 474 }, { "epoch": 3.6271510516252388, "grad_norm": 0.13312098293506802, "learning_rate": 9.213912159167655e-06, "loss": 0.4652, "mean_token_accuracy": 0.8445072919130325, "num_tokens": 272488686.0, "step": 475 }, { "epoch": 3.6347992351816445, "grad_norm": 0.17152486390579674, "learning_rate": 9.210682077150375e-06, "loss": 0.4693, "mean_token_accuracy": 0.8427104577422142, "num_tokens": 273064686.0, "step": 476 }, { "epoch": 3.64244741873805, "grad_norm": 0.1463912816972637, "learning_rate": 9.207445941033483e-06, "loss": 0.4397, "mean_token_accuracy": 0.8523378744721413, "num_tokens": 273617956.0, "step": 477 }, { "epoch": 3.650095602294455, "grad_norm": 0.1389469013265535, "learning_rate": 9.204203755469879e-06, "loss": 0.4722, "mean_token_accuracy": 0.8406079337000847, "num_tokens": 274193956.0, "step": 478 }, { "epoch": 3.6577437858508604, "grad_norm": 0.1304788404105764, "learning_rate": 9.200955525121165e-06, "loss": 0.4286, "mean_token_accuracy": 0.8560905903577805, "num_tokens": 274756063.0, "step": 479 }, { "epoch": 3.6653919694072656, "grad_norm": 0.13302202550891926, "learning_rate": 9.197701254657631e-06, "loss": 0.4328, "mean_token_accuracy": 0.8527768403291702, "num_tokens": 275332063.0, "step": 480 }, { "epoch": 3.6730401529636714, "grad_norm": 0.13640185735673466, "learning_rate": 9.19444094875825e-06, "loss": 0.4494, "mean_token_accuracy": 0.848458968102932, "num_tokens": 275908063.0, "step": 481 }, { "epoch": 3.6806883365200767, "grad_norm": 0.1356639995335594, "learning_rate": 9.19117461211068e-06, "loss": 0.4407, "mean_token_accuracy": 0.8518905341625214, "num_tokens": 276469569.0, "step": 482 }, { "epoch": 3.688336520076482, "grad_norm": 0.13619130952960193, "learning_rate": 9.187902249411241e-06, "loss": 0.4502, "mean_token_accuracy": 0.8488947451114655, "num_tokens": 277045569.0, "step": 483 }, { "epoch": 3.6959847036328872, "grad_norm": 0.13708965399379136, "learning_rate": 9.184623865364924e-06, "loss": 0.4769, "mean_token_accuracy": 0.8410697728395462, "num_tokens": 277621569.0, "step": 484 }, { "epoch": 3.7036328871892925, "grad_norm": 0.13720203429880218, "learning_rate": 9.18133946468537e-06, "loss": 0.4651, "mean_token_accuracy": 0.8440004512667656, "num_tokens": 278197569.0, "step": 485 }, { "epoch": 3.711281070745698, "grad_norm": 0.1299350454538957, "learning_rate": 9.178049052094881e-06, "loss": 0.437, "mean_token_accuracy": 0.8537300229072571, "num_tokens": 278773569.0, "step": 486 }, { "epoch": 3.718929254302103, "grad_norm": 0.13110362377063814, "learning_rate": 9.174752632324394e-06, "loss": 0.4408, "mean_token_accuracy": 0.8510458767414093, "num_tokens": 279349569.0, "step": 487 }, { "epoch": 3.7265774378585084, "grad_norm": 0.13551732054175086, "learning_rate": 9.171450210113487e-06, "loss": 0.4622, "mean_token_accuracy": 0.844747006893158, "num_tokens": 279925569.0, "step": 488 }, { "epoch": 3.734225621414914, "grad_norm": 0.13730675054002467, "learning_rate": 9.16814179021037e-06, "loss": 0.4284, "mean_token_accuracy": 0.8542994931340218, "num_tokens": 280501569.0, "step": 489 }, { "epoch": 3.7418738049713194, "grad_norm": 0.1292955427034706, "learning_rate": 9.16482737737187e-06, "loss": 0.463, "mean_token_accuracy": 0.8444327488541603, "num_tokens": 281077569.0, "step": 490 }, { "epoch": 3.7495219885277247, "grad_norm": 0.13912430248307478, "learning_rate": 9.161506976363438e-06, "loss": 0.4417, "mean_token_accuracy": 0.8520372435450554, "num_tokens": 281653569.0, "step": 491 }, { "epoch": 3.75717017208413, "grad_norm": 0.1366214099806215, "learning_rate": 9.158180591959131e-06, "loss": 0.4589, "mean_token_accuracy": 0.8459536507725716, "num_tokens": 282229569.0, "step": 492 }, { "epoch": 3.7648183556405352, "grad_norm": 0.1369364998873903, "learning_rate": 9.154848228941607e-06, "loss": 0.4385, "mean_token_accuracy": 0.8520493879914284, "num_tokens": 282805569.0, "step": 493 }, { "epoch": 3.772466539196941, "grad_norm": 0.12498764879815455, "learning_rate": 9.151509892102125e-06, "loss": 0.4247, "mean_token_accuracy": 0.8574089854955673, "num_tokens": 283381569.0, "step": 494 }, { "epoch": 3.7801147227533463, "grad_norm": 0.1322000135860979, "learning_rate": 9.148165586240531e-06, "loss": 0.4491, "mean_token_accuracy": 0.8488652408123016, "num_tokens": 283957569.0, "step": 495 }, { "epoch": 3.7877629063097515, "grad_norm": 0.13844783624470497, "learning_rate": 9.144815316165251e-06, "loss": 0.4533, "mean_token_accuracy": 0.8480179756879807, "num_tokens": 284533569.0, "step": 496 }, { "epoch": 3.795411089866157, "grad_norm": 0.14356041185467444, "learning_rate": 9.14145908669329e-06, "loss": 0.4415, "mean_token_accuracy": 0.8507316261529922, "num_tokens": 285109569.0, "step": 497 }, { "epoch": 3.803059273422562, "grad_norm": 0.13253514930903268, "learning_rate": 9.138096902650217e-06, "loss": 0.4577, "mean_token_accuracy": 0.845842532813549, "num_tokens": 285685569.0, "step": 498 }, { "epoch": 3.8107074569789674, "grad_norm": 0.1317692376942017, "learning_rate": 9.134728768870167e-06, "loss": 0.4439, "mean_token_accuracy": 0.8503479510545731, "num_tokens": 286261569.0, "step": 499 }, { "epoch": 3.8183556405353727, "grad_norm": 0.12686756349523995, "learning_rate": 9.131354690195827e-06, "loss": 0.4351, "mean_token_accuracy": 0.8522073924541473, "num_tokens": 286837569.0, "step": 500 }, { "epoch": 3.826003824091778, "grad_norm": 0.12900551278598996, "learning_rate": 9.127974671478432e-06, "loss": 0.4314, "mean_token_accuracy": 0.8539366275072098, "num_tokens": 287413569.0, "step": 501 }, { "epoch": 3.8336520076481837, "grad_norm": 0.13292231226530699, "learning_rate": 9.124588717577759e-06, "loss": 0.4686, "mean_token_accuracy": 0.8422486335039139, "num_tokens": 287989569.0, "step": 502 }, { "epoch": 3.841300191204589, "grad_norm": 0.136683585551859, "learning_rate": 9.121196833362112e-06, "loss": 0.4483, "mean_token_accuracy": 0.8491551652550697, "num_tokens": 288565569.0, "step": 503 }, { "epoch": 3.8489483747609943, "grad_norm": 0.1444111504728688, "learning_rate": 9.117799023708334e-06, "loss": 0.4773, "mean_token_accuracy": 0.8403370901942253, "num_tokens": 289141569.0, "step": 504 }, { "epoch": 3.8565965583173996, "grad_norm": 0.12171464057354885, "learning_rate": 9.114395293501775e-06, "loss": 0.4109, "mean_token_accuracy": 0.8606747463345528, "num_tokens": 289717569.0, "step": 505 }, { "epoch": 3.864244741873805, "grad_norm": 0.13308127354938107, "learning_rate": 9.110985647636303e-06, "loss": 0.4475, "mean_token_accuracy": 0.8503652960062027, "num_tokens": 290293569.0, "step": 506 }, { "epoch": 3.8718929254302106, "grad_norm": 0.16593234316109462, "learning_rate": 9.107570091014295e-06, "loss": 0.479, "mean_token_accuracy": 0.8400173112750053, "num_tokens": 290868510.0, "step": 507 }, { "epoch": 3.879541108986616, "grad_norm": 0.13689658405775249, "learning_rate": 9.10414862854662e-06, "loss": 0.4492, "mean_token_accuracy": 0.8498774170875549, "num_tokens": 291444510.0, "step": 508 }, { "epoch": 3.887189292543021, "grad_norm": 0.12795882142422862, "learning_rate": 9.100721265152644e-06, "loss": 0.4376, "mean_token_accuracy": 0.8533775582909584, "num_tokens": 292020510.0, "step": 509 }, { "epoch": 3.8948374760994264, "grad_norm": 0.13169792505551053, "learning_rate": 9.097288005760213e-06, "loss": 0.4498, "mean_token_accuracy": 0.8481290861964226, "num_tokens": 292596510.0, "step": 510 }, { "epoch": 3.9024856596558317, "grad_norm": 0.1297287757258664, "learning_rate": 9.09384885530565e-06, "loss": 0.4605, "mean_token_accuracy": 0.8447956144809723, "num_tokens": 293172510.0, "step": 511 }, { "epoch": 3.910133843212237, "grad_norm": 0.1376874016028007, "learning_rate": 9.09040381873375e-06, "loss": 0.476, "mean_token_accuracy": 0.8403527215123177, "num_tokens": 293748510.0, "step": 512 }, { "epoch": 3.9177820267686423, "grad_norm": 0.1297958223852381, "learning_rate": 9.086952900997774e-06, "loss": 0.4518, "mean_token_accuracy": 0.8482714593410492, "num_tokens": 294324510.0, "step": 513 }, { "epoch": 3.9254302103250476, "grad_norm": 0.13708161020019655, "learning_rate": 9.083496107059433e-06, "loss": 0.4636, "mean_token_accuracy": 0.8451602086424828, "num_tokens": 294900510.0, "step": 514 }, { "epoch": 3.9330783938814533, "grad_norm": 0.12977023510450897, "learning_rate": 9.08003344188889e-06, "loss": 0.4627, "mean_token_accuracy": 0.8444761633872986, "num_tokens": 295476510.0, "step": 515 }, { "epoch": 3.9407265774378586, "grad_norm": 0.13266699207916796, "learning_rate": 9.076564910464753e-06, "loss": 0.4432, "mean_token_accuracy": 0.8500024378299713, "num_tokens": 296052510.0, "step": 516 }, { "epoch": 3.948374760994264, "grad_norm": 0.13852665867070613, "learning_rate": 9.073090517774057e-06, "loss": 0.4505, "mean_token_accuracy": 0.8480318710207939, "num_tokens": 296628510.0, "step": 517 }, { "epoch": 3.956022944550669, "grad_norm": 0.13867926058656033, "learning_rate": 9.06961026881227e-06, "loss": 0.4736, "mean_token_accuracy": 0.8420107811689377, "num_tokens": 297204510.0, "step": 518 }, { "epoch": 3.9636711281070744, "grad_norm": 0.1472014718774958, "learning_rate": 9.066124168583277e-06, "loss": 0.4339, "mean_token_accuracy": 0.8534956350922585, "num_tokens": 297780510.0, "step": 519 }, { "epoch": 3.97131931166348, "grad_norm": 0.14861258236560726, "learning_rate": 9.062632222099375e-06, "loss": 0.4604, "mean_token_accuracy": 0.8463720604777336, "num_tokens": 298356510.0, "step": 520 }, { "epoch": 3.9789674952198855, "grad_norm": 0.13635801589712673, "learning_rate": 9.059134434381274e-06, "loss": 0.4673, "mean_token_accuracy": 0.8437070325016975, "num_tokens": 298932510.0, "step": 521 }, { "epoch": 3.9866156787762907, "grad_norm": 0.13517971159266337, "learning_rate": 9.055630810458072e-06, "loss": 0.4703, "mean_token_accuracy": 0.8419812694191933, "num_tokens": 299508510.0, "step": 522 }, { "epoch": 3.994263862332696, "grad_norm": 0.13502762281684197, "learning_rate": 9.052121355367267e-06, "loss": 0.447, "mean_token_accuracy": 0.8502528890967369, "num_tokens": 300071157.0, "step": 523 }, { "epoch": 4.0, "grad_norm": 0.16299524977157676, "learning_rate": 9.048606074154738e-06, "loss": 0.4471, "mean_token_accuracy": 0.8504007657368978, "num_tokens": 300492852.0, "step": 524 }, { "epoch": 4.007648183556405, "grad_norm": 0.1367796055868044, "learning_rate": 9.045084971874738e-06, "loss": 0.4092, "mean_token_accuracy": 0.8609004467725754, "num_tokens": 301068852.0, "step": 525 }, { "epoch": 4.015296367112811, "grad_norm": 0.13716134351284365, "learning_rate": 9.041558053589894e-06, "loss": 0.43, "mean_token_accuracy": 0.853941835463047, "num_tokens": 301644852.0, "step": 526 }, { "epoch": 4.022944550669216, "grad_norm": 0.13676487285704125, "learning_rate": 9.038025324371192e-06, "loss": 0.4638, "mean_token_accuracy": 0.8445838019251823, "num_tokens": 302220852.0, "step": 527 }, { "epoch": 4.030592734225621, "grad_norm": 0.13821684713351107, "learning_rate": 9.034486789297973e-06, "loss": 0.4609, "mean_token_accuracy": 0.8445247709751129, "num_tokens": 302796852.0, "step": 528 }, { "epoch": 4.038240917782026, "grad_norm": 0.14599457165525512, "learning_rate": 9.030942453457928e-06, "loss": 0.4614, "mean_token_accuracy": 0.8445004597306252, "num_tokens": 303372852.0, "step": 529 }, { "epoch": 4.045889101338432, "grad_norm": 0.16625511185143685, "learning_rate": 9.027392321947088e-06, "loss": 0.4793, "mean_token_accuracy": 0.838548831641674, "num_tokens": 303948852.0, "step": 530 }, { "epoch": 4.053537284894838, "grad_norm": 0.138119624588249, "learning_rate": 9.023836399869814e-06, "loss": 0.4242, "mean_token_accuracy": 0.8558134436607361, "num_tokens": 304524852.0, "step": 531 }, { "epoch": 4.061185468451243, "grad_norm": 0.14231125279660453, "learning_rate": 9.020274692338796e-06, "loss": 0.4149, "mean_token_accuracy": 0.858605220913887, "num_tokens": 305100852.0, "step": 532 }, { "epoch": 4.0688336520076485, "grad_norm": 0.13579393688100058, "learning_rate": 9.01670720447504e-06, "loss": 0.4289, "mean_token_accuracy": 0.8546015843749046, "num_tokens": 305676852.0, "step": 533 }, { "epoch": 4.076481835564054, "grad_norm": 0.14167790695591406, "learning_rate": 9.013133941407866e-06, "loss": 0.4585, "mean_token_accuracy": 0.8454310745000839, "num_tokens": 306252852.0, "step": 534 }, { "epoch": 4.084130019120459, "grad_norm": 0.1352511945058088, "learning_rate": 9.009554908274893e-06, "loss": 0.444, "mean_token_accuracy": 0.8504850938916206, "num_tokens": 306828852.0, "step": 535 }, { "epoch": 4.091778202676864, "grad_norm": 0.1432264112617214, "learning_rate": 9.00597011022204e-06, "loss": 0.4587, "mean_token_accuracy": 0.8464554026722908, "num_tokens": 307404852.0, "step": 536 }, { "epoch": 4.09942638623327, "grad_norm": 0.13151056491832114, "learning_rate": 9.00237955240351e-06, "loss": 0.4168, "mean_token_accuracy": 0.8592042028903961, "num_tokens": 307980852.0, "step": 537 }, { "epoch": 4.107074569789675, "grad_norm": 0.133618272264501, "learning_rate": 8.998783239981796e-06, "loss": 0.4271, "mean_token_accuracy": 0.8552821651101112, "num_tokens": 308556852.0, "step": 538 }, { "epoch": 4.11472275334608, "grad_norm": 0.13915971751181871, "learning_rate": 8.995181178127659e-06, "loss": 0.4518, "mean_token_accuracy": 0.8473391234874725, "num_tokens": 309132852.0, "step": 539 }, { "epoch": 4.1223709369024855, "grad_norm": 0.13454371391547879, "learning_rate": 8.991573372020123e-06, "loss": 0.4324, "mean_token_accuracy": 0.8538081347942352, "num_tokens": 309708852.0, "step": 540 }, { "epoch": 4.130019120458891, "grad_norm": 0.13213992505186528, "learning_rate": 8.987959826846479e-06, "loss": 0.4461, "mean_token_accuracy": 0.8492263630032539, "num_tokens": 310284852.0, "step": 541 }, { "epoch": 4.137667304015296, "grad_norm": 0.13593594697201444, "learning_rate": 8.984340547802264e-06, "loss": 0.4461, "mean_token_accuracy": 0.8491100370883942, "num_tokens": 310860852.0, "step": 542 }, { "epoch": 4.145315487571701, "grad_norm": 0.12981333151349195, "learning_rate": 8.980715540091263e-06, "loss": 0.4266, "mean_token_accuracy": 0.8562596216797829, "num_tokens": 311436852.0, "step": 543 }, { "epoch": 4.1529636711281075, "grad_norm": 0.13614955901925288, "learning_rate": 8.977084808925494e-06, "loss": 0.4121, "mean_token_accuracy": 0.859827496111393, "num_tokens": 312012852.0, "step": 544 }, { "epoch": 4.160611854684513, "grad_norm": 0.13682770116117984, "learning_rate": 8.973448359525207e-06, "loss": 0.4318, "mean_token_accuracy": 0.8550026342272758, "num_tokens": 312588852.0, "step": 545 }, { "epoch": 4.168260038240918, "grad_norm": 0.137849063910186, "learning_rate": 8.96980619711887e-06, "loss": 0.4395, "mean_token_accuracy": 0.8518254309892654, "num_tokens": 313164852.0, "step": 546 }, { "epoch": 4.175908221797323, "grad_norm": 0.13710001561248336, "learning_rate": 8.96615832694317e-06, "loss": 0.4189, "mean_token_accuracy": 0.8579558879137039, "num_tokens": 313740852.0, "step": 547 }, { "epoch": 4.183556405353729, "grad_norm": 0.13484817825542728, "learning_rate": 8.962504754242997e-06, "loss": 0.4178, "mean_token_accuracy": 0.8575565740466118, "num_tokens": 314316852.0, "step": 548 }, { "epoch": 4.191204588910134, "grad_norm": 0.1355879929584855, "learning_rate": 8.958845484271443e-06, "loss": 0.4326, "mean_token_accuracy": 0.8534713238477707, "num_tokens": 314892852.0, "step": 549 }, { "epoch": 4.198852772466539, "grad_norm": 0.13093335486708962, "learning_rate": 8.955180522289787e-06, "loss": 0.3991, "mean_token_accuracy": 0.8641314953565598, "num_tokens": 315468852.0, "step": 550 }, { "epoch": 4.2065009560229445, "grad_norm": 0.1356895962011802, "learning_rate": 8.951509873567498e-06, "loss": 0.4595, "mean_token_accuracy": 0.845748782157898, "num_tokens": 316044852.0, "step": 551 }, { "epoch": 4.21414913957935, "grad_norm": 0.1359263330797516, "learning_rate": 8.947833543382216e-06, "loss": 0.4512, "mean_token_accuracy": 0.8480579107999802, "num_tokens": 316620852.0, "step": 552 }, { "epoch": 4.221797323135755, "grad_norm": 0.13568308044384295, "learning_rate": 8.944151537019752e-06, "loss": 0.4331, "mean_token_accuracy": 0.8542803898453712, "num_tokens": 317196852.0, "step": 553 }, { "epoch": 4.22944550669216, "grad_norm": 0.14107612547899365, "learning_rate": 8.940463859774078e-06, "loss": 0.4391, "mean_token_accuracy": 0.8508462160825729, "num_tokens": 317772852.0, "step": 554 }, { "epoch": 4.237093690248566, "grad_norm": 0.14768495035601778, "learning_rate": 8.93677051694732e-06, "loss": 0.4169, "mean_token_accuracy": 0.8585600778460503, "num_tokens": 318348852.0, "step": 555 }, { "epoch": 4.244741873804971, "grad_norm": 0.1340278519676253, "learning_rate": 8.93307151384975e-06, "loss": 0.4234, "mean_token_accuracy": 0.8561016395688057, "num_tokens": 318924852.0, "step": 556 }, { "epoch": 4.252390057361376, "grad_norm": 0.13680193270577393, "learning_rate": 8.929366855799777e-06, "loss": 0.4493, "mean_token_accuracy": 0.8488930016756058, "num_tokens": 319500852.0, "step": 557 }, { "epoch": 4.260038240917782, "grad_norm": 0.13852513522634227, "learning_rate": 8.925656548123942e-06, "loss": 0.4648, "mean_token_accuracy": 0.8442719057202339, "num_tokens": 320063117.0, "step": 558 }, { "epoch": 4.267686424474188, "grad_norm": 0.13624189114266874, "learning_rate": 8.92194059615691e-06, "loss": 0.437, "mean_token_accuracy": 0.8514139503240585, "num_tokens": 320639117.0, "step": 559 }, { "epoch": 4.275334608030593, "grad_norm": 0.13350408206652886, "learning_rate": 8.918219005241458e-06, "loss": 0.4589, "mean_token_accuracy": 0.844880685210228, "num_tokens": 321215117.0, "step": 560 }, { "epoch": 4.282982791586998, "grad_norm": 0.13649099774497725, "learning_rate": 8.914491780728471e-06, "loss": 0.4671, "mean_token_accuracy": 0.8434830605983734, "num_tokens": 321791117.0, "step": 561 }, { "epoch": 4.2906309751434035, "grad_norm": 0.14170690277634168, "learning_rate": 8.91075892797694e-06, "loss": 0.4458, "mean_token_accuracy": 0.8494138568639755, "num_tokens": 322367117.0, "step": 562 }, { "epoch": 4.298279158699809, "grad_norm": 0.1428239008677417, "learning_rate": 8.90702045235394e-06, "loss": 0.4537, "mean_token_accuracy": 0.8470439687371254, "num_tokens": 322943117.0, "step": 563 }, { "epoch": 4.305927342256214, "grad_norm": 0.13750099119258852, "learning_rate": 8.903276359234638e-06, "loss": 0.434, "mean_token_accuracy": 0.8533461019396782, "num_tokens": 323518058.0, "step": 564 }, { "epoch": 4.313575525812619, "grad_norm": 0.13798858771811107, "learning_rate": 8.899526654002268e-06, "loss": 0.4482, "mean_token_accuracy": 0.8492783904075623, "num_tokens": 324071328.0, "step": 565 }, { "epoch": 4.321223709369025, "grad_norm": 0.13569499666557838, "learning_rate": 8.895771342048145e-06, "loss": 0.4322, "mean_token_accuracy": 0.8544071167707443, "num_tokens": 324647328.0, "step": 566 }, { "epoch": 4.32887189292543, "grad_norm": 0.14183511720361952, "learning_rate": 8.892010428771638e-06, "loss": 0.4325, "mean_token_accuracy": 0.8530407473444939, "num_tokens": 325223328.0, "step": 567 }, { "epoch": 4.336520076481835, "grad_norm": 0.14107156072568003, "learning_rate": 8.88824391958017e-06, "loss": 0.4307, "mean_token_accuracy": 0.8547335267066956, "num_tokens": 325799328.0, "step": 568 }, { "epoch": 4.3441682600382405, "grad_norm": 0.13599509659002554, "learning_rate": 8.88447181988921e-06, "loss": 0.4258, "mean_token_accuracy": 0.8559349700808525, "num_tokens": 326375328.0, "step": 569 }, { "epoch": 4.351816443594647, "grad_norm": 0.14168570065592437, "learning_rate": 8.88069413512227e-06, "loss": 0.447, "mean_token_accuracy": 0.8499486148357391, "num_tokens": 326951328.0, "step": 570 }, { "epoch": 4.359464627151052, "grad_norm": 0.14287905535251402, "learning_rate": 8.876910870710885e-06, "loss": 0.4468, "mean_token_accuracy": 0.8484693765640259, "num_tokens": 327527328.0, "step": 571 }, { "epoch": 4.367112810707457, "grad_norm": 0.13966911539057147, "learning_rate": 8.873122032094614e-06, "loss": 0.4578, "mean_token_accuracy": 0.8461237996816635, "num_tokens": 328103328.0, "step": 572 }, { "epoch": 4.374760994263863, "grad_norm": 0.14261415507458267, "learning_rate": 8.869327624721033e-06, "loss": 0.4484, "mean_token_accuracy": 0.8485492318868637, "num_tokens": 328679328.0, "step": 573 }, { "epoch": 4.382409177820268, "grad_norm": 0.133369532678928, "learning_rate": 8.865527654045727e-06, "loss": 0.4298, "mean_token_accuracy": 0.8543098941445351, "num_tokens": 329255328.0, "step": 574 }, { "epoch": 4.390057361376673, "grad_norm": 0.14255996347846578, "learning_rate": 8.861722125532272e-06, "loss": 0.4581, "mean_token_accuracy": 0.84616519510746, "num_tokens": 329817975.0, "step": 575 }, { "epoch": 4.397705544933078, "grad_norm": 0.14265360616209308, "learning_rate": 8.857911044652244e-06, "loss": 0.4457, "mean_token_accuracy": 0.8503062576055527, "num_tokens": 330393975.0, "step": 576 }, { "epoch": 4.405353728489484, "grad_norm": 0.14108492866232827, "learning_rate": 8.854094416885192e-06, "loss": 0.4797, "mean_token_accuracy": 0.8391808122396469, "num_tokens": 330969975.0, "step": 577 }, { "epoch": 4.413001912045889, "grad_norm": 0.1397903736370164, "learning_rate": 8.850272247718654e-06, "loss": 0.4497, "mean_token_accuracy": 0.8479728251695633, "num_tokens": 331545975.0, "step": 578 }, { "epoch": 4.420650095602294, "grad_norm": 0.1421238151544833, "learning_rate": 8.84644454264812e-06, "loss": 0.4181, "mean_token_accuracy": 0.8584993183612823, "num_tokens": 332121975.0, "step": 579 }, { "epoch": 4.4282982791587, "grad_norm": 0.13112204100458139, "learning_rate": 8.842611307177051e-06, "loss": 0.4182, "mean_token_accuracy": 0.857766643166542, "num_tokens": 332697975.0, "step": 580 }, { "epoch": 4.435946462715105, "grad_norm": 0.1421745075127969, "learning_rate": 8.838772546816857e-06, "loss": 0.43, "mean_token_accuracy": 0.8541948571801186, "num_tokens": 333251920.0, "step": 581 }, { "epoch": 4.44359464627151, "grad_norm": 0.1444747863216718, "learning_rate": 8.834928267086884e-06, "loss": 0.4605, "mean_token_accuracy": 0.8450855612754822, "num_tokens": 333827920.0, "step": 582 }, { "epoch": 4.451242829827915, "grad_norm": 0.13443943817675832, "learning_rate": 8.831078473514427e-06, "loss": 0.4485, "mean_token_accuracy": 0.8491065725684166, "num_tokens": 334403920.0, "step": 583 }, { "epoch": 4.458891013384322, "grad_norm": 0.12806454601850675, "learning_rate": 8.827223171634698e-06, "loss": 0.4518, "mean_token_accuracy": 0.8473460748791695, "num_tokens": 334979920.0, "step": 584 }, { "epoch": 4.466539196940727, "grad_norm": 0.14571665310132872, "learning_rate": 8.823362366990833e-06, "loss": 0.4519, "mean_token_accuracy": 0.847073495388031, "num_tokens": 335555920.0, "step": 585 }, { "epoch": 4.474187380497132, "grad_norm": 0.12883941943627572, "learning_rate": 8.819496065133879e-06, "loss": 0.4226, "mean_token_accuracy": 0.8574985191226006, "num_tokens": 336114304.0, "step": 586 }, { "epoch": 4.4818355640535374, "grad_norm": 0.1335387356959434, "learning_rate": 8.81562427162279e-06, "loss": 0.4597, "mean_token_accuracy": 0.8447174802422523, "num_tokens": 336690304.0, "step": 587 }, { "epoch": 4.489483747609943, "grad_norm": 0.12889357337389204, "learning_rate": 8.81174699202441e-06, "loss": 0.4405, "mean_token_accuracy": 0.8510493487119675, "num_tokens": 337266304.0, "step": 588 }, { "epoch": 4.497131931166348, "grad_norm": 0.1330406876609962, "learning_rate": 8.807864231913475e-06, "loss": 0.4332, "mean_token_accuracy": 0.8526778817176819, "num_tokens": 337842304.0, "step": 589 }, { "epoch": 4.504780114722753, "grad_norm": 0.12979232613135025, "learning_rate": 8.8039759968726e-06, "loss": 0.4236, "mean_token_accuracy": 0.856186717748642, "num_tokens": 338418304.0, "step": 590 }, { "epoch": 4.512428298279159, "grad_norm": 0.13730293405800015, "learning_rate": 8.800082292492274e-06, "loss": 0.4486, "mean_token_accuracy": 0.8482888266444206, "num_tokens": 338994304.0, "step": 591 }, { "epoch": 4.520076481835564, "grad_norm": 0.13489614197624644, "learning_rate": 8.796183124370843e-06, "loss": 0.4306, "mean_token_accuracy": 0.8548116609454155, "num_tokens": 339570304.0, "step": 592 }, { "epoch": 4.527724665391969, "grad_norm": 0.1347926609469706, "learning_rate": 8.792278498114517e-06, "loss": 0.4544, "mean_token_accuracy": 0.8463078364729881, "num_tokens": 340146304.0, "step": 593 }, { "epoch": 4.5353728489483744, "grad_norm": 0.1349897764307115, "learning_rate": 8.788368419337348e-06, "loss": 0.4396, "mean_token_accuracy": 0.8521014824509621, "num_tokens": 340722304.0, "step": 594 }, { "epoch": 4.54302103250478, "grad_norm": 0.1334117681436598, "learning_rate": 8.784452893661229e-06, "loss": 0.4388, "mean_token_accuracy": 0.8514469414949417, "num_tokens": 341298304.0, "step": 595 }, { "epoch": 4.550669216061186, "grad_norm": 0.13453993757890548, "learning_rate": 8.780531926715888e-06, "loss": 0.4408, "mean_token_accuracy": 0.8514851331710815, "num_tokens": 341874304.0, "step": 596 }, { "epoch": 4.558317399617591, "grad_norm": 0.14039109869111963, "learning_rate": 8.77660552413887e-06, "loss": 0.4373, "mean_token_accuracy": 0.8525997698307037, "num_tokens": 342450304.0, "step": 597 }, { "epoch": 4.5659655831739965, "grad_norm": 0.13204418688173547, "learning_rate": 8.772673691575541e-06, "loss": 0.4369, "mean_token_accuracy": 0.8529435247182846, "num_tokens": 343026304.0, "step": 598 }, { "epoch": 4.573613766730402, "grad_norm": 0.13217896451409836, "learning_rate": 8.768736434679073e-06, "loss": 0.4442, "mean_token_accuracy": 0.850242018699646, "num_tokens": 343602304.0, "step": 599 }, { "epoch": 4.581261950286807, "grad_norm": 0.13127212529235552, "learning_rate": 8.764793759110435e-06, "loss": 0.4293, "mean_token_accuracy": 0.8552266061306, "num_tokens": 344178304.0, "step": 600 }, { "epoch": 4.588910133843212, "grad_norm": 0.1343418903278343, "learning_rate": 8.760845670538387e-06, "loss": 0.4211, "mean_token_accuracy": 0.8569020330905914, "num_tokens": 344754304.0, "step": 601 }, { "epoch": 4.596558317399618, "grad_norm": 0.1271661715666118, "learning_rate": 8.756892174639473e-06, "loss": 0.408, "mean_token_accuracy": 0.8616817370057106, "num_tokens": 345330304.0, "step": 602 }, { "epoch": 4.604206500956023, "grad_norm": 0.13563629821014458, "learning_rate": 8.752933277098012e-06, "loss": 0.437, "mean_token_accuracy": 0.8516730964183807, "num_tokens": 345892411.0, "step": 603 }, { "epoch": 4.611854684512428, "grad_norm": 0.13068071235260656, "learning_rate": 8.74896898360609e-06, "loss": 0.4299, "mean_token_accuracy": 0.8544713631272316, "num_tokens": 346468411.0, "step": 604 }, { "epoch": 4.6195028680688335, "grad_norm": 0.13846369217848975, "learning_rate": 8.744999299863549e-06, "loss": 0.4211, "mean_token_accuracy": 0.857238844037056, "num_tokens": 347044411.0, "step": 605 }, { "epoch": 4.627151051625239, "grad_norm": 0.1367695822273923, "learning_rate": 8.741024231577983e-06, "loss": 0.4491, "mean_token_accuracy": 0.8486238941550255, "num_tokens": 347620411.0, "step": 606 }, { "epoch": 4.634799235181644, "grad_norm": 0.13305300129278447, "learning_rate": 8.737043784464726e-06, "loss": 0.3945, "mean_token_accuracy": 0.8662357404828072, "num_tokens": 348196411.0, "step": 607 }, { "epoch": 4.642447418738049, "grad_norm": 0.14038270415564713, "learning_rate": 8.733057964246849e-06, "loss": 0.4551, "mean_token_accuracy": 0.8459553942084312, "num_tokens": 348772411.0, "step": 608 }, { "epoch": 4.650095602294455, "grad_norm": 0.14422126563292137, "learning_rate": 8.729066776655144e-06, "loss": 0.4335, "mean_token_accuracy": 0.8531865999102592, "num_tokens": 349348411.0, "step": 609 }, { "epoch": 4.657743785850861, "grad_norm": 0.1413112708302926, "learning_rate": 8.725070227428123e-06, "loss": 0.452, "mean_token_accuracy": 0.8475092723965645, "num_tokens": 349924411.0, "step": 610 }, { "epoch": 4.665391969407266, "grad_norm": 0.13342911934210575, "learning_rate": 8.721068322312007e-06, "loss": 0.4195, "mean_token_accuracy": 0.8577718585729599, "num_tokens": 350500411.0, "step": 611 }, { "epoch": 4.673040152963671, "grad_norm": 0.1430879510207355, "learning_rate": 8.717061067060716e-06, "loss": 0.4465, "mean_token_accuracy": 0.8497645780444145, "num_tokens": 351076411.0, "step": 612 }, { "epoch": 4.680688336520077, "grad_norm": 0.14166248034965864, "learning_rate": 8.713048467435865e-06, "loss": 0.4604, "mean_token_accuracy": 0.8444344848394394, "num_tokens": 351652411.0, "step": 613 }, { "epoch": 4.688336520076482, "grad_norm": 0.1430056087650855, "learning_rate": 8.70903052920675e-06, "loss": 0.4295, "mean_token_accuracy": 0.8543862923979759, "num_tokens": 352228411.0, "step": 614 }, { "epoch": 4.695984703632887, "grad_norm": 0.13993804535154394, "learning_rate": 8.705007258150346e-06, "loss": 0.4736, "mean_token_accuracy": 0.8403875529766083, "num_tokens": 352796906.0, "step": 615 }, { "epoch": 4.7036328871892925, "grad_norm": 0.13853295092272744, "learning_rate": 8.700978660051293e-06, "loss": 0.4343, "mean_token_accuracy": 0.8529174774885178, "num_tokens": 353372906.0, "step": 616 }, { "epoch": 4.711281070745698, "grad_norm": 0.13648098405467515, "learning_rate": 8.696944740701891e-06, "loss": 0.4624, "mean_token_accuracy": 0.8432382643222809, "num_tokens": 353948906.0, "step": 617 }, { "epoch": 4.718929254302103, "grad_norm": 0.13063447639184614, "learning_rate": 8.692905505902091e-06, "loss": 0.4432, "mean_token_accuracy": 0.8508288562297821, "num_tokens": 354524906.0, "step": 618 }, { "epoch": 4.726577437858508, "grad_norm": 0.13137608605071113, "learning_rate": 8.688860961459487e-06, "loss": 0.412, "mean_token_accuracy": 0.8600583970546722, "num_tokens": 355100906.0, "step": 619 }, { "epoch": 4.734225621414914, "grad_norm": 0.15267208050688227, "learning_rate": 8.684811113189306e-06, "loss": 0.4541, "mean_token_accuracy": 0.8480162993073463, "num_tokens": 355674543.0, "step": 620 }, { "epoch": 4.741873804971319, "grad_norm": 0.13165612975561047, "learning_rate": 8.6807559669144e-06, "loss": 0.4107, "mean_token_accuracy": 0.8604946732521057, "num_tokens": 356247875.0, "step": 621 }, { "epoch": 4.749521988527725, "grad_norm": 0.12772216877759487, "learning_rate": 8.676695528465244e-06, "loss": 0.4011, "mean_token_accuracy": 0.8628982827067375, "num_tokens": 356809381.0, "step": 622 }, { "epoch": 4.75717017208413, "grad_norm": 0.13488174230220404, "learning_rate": 8.672629803679914e-06, "loss": 0.4498, "mean_token_accuracy": 0.8484294563531876, "num_tokens": 357385381.0, "step": 623 }, { "epoch": 4.764818355640536, "grad_norm": 0.1401189577724157, "learning_rate": 8.668558798404093e-06, "loss": 0.429, "mean_token_accuracy": 0.8550686091184616, "num_tokens": 357961381.0, "step": 624 }, { "epoch": 4.772466539196941, "grad_norm": 0.1364797558773597, "learning_rate": 8.664482518491053e-06, "loss": 0.4539, "mean_token_accuracy": 0.8475179374217987, "num_tokens": 358537381.0, "step": 625 }, { "epoch": 4.780114722753346, "grad_norm": 0.13289176813716883, "learning_rate": 8.660400969801653e-06, "loss": 0.4213, "mean_token_accuracy": 0.8573412746191025, "num_tokens": 359113381.0, "step": 626 }, { "epoch": 4.7877629063097515, "grad_norm": 0.13614408467258662, "learning_rate": 8.65631415820432e-06, "loss": 0.4437, "mean_token_accuracy": 0.8494902551174164, "num_tokens": 359689381.0, "step": 627 }, { "epoch": 4.795411089866157, "grad_norm": 0.1361828076841561, "learning_rate": 8.652222089575059e-06, "loss": 0.4528, "mean_token_accuracy": 0.8468199968338013, "num_tokens": 360265381.0, "step": 628 }, { "epoch": 4.803059273422562, "grad_norm": 0.13328710422461576, "learning_rate": 8.648124769797424e-06, "loss": 0.4365, "mean_token_accuracy": 0.8522612005472183, "num_tokens": 360841381.0, "step": 629 }, { "epoch": 4.810707456978967, "grad_norm": 0.14533854629935952, "learning_rate": 8.644022204762525e-06, "loss": 0.462, "mean_token_accuracy": 0.8440126106142998, "num_tokens": 361401576.0, "step": 630 }, { "epoch": 4.818355640535373, "grad_norm": 0.1308097892624671, "learning_rate": 8.63991440036901e-06, "loss": 0.4136, "mean_token_accuracy": 0.8578916490077972, "num_tokens": 361977576.0, "step": 631 }, { "epoch": 4.826003824091778, "grad_norm": 0.13348566755409633, "learning_rate": 8.63580136252306e-06, "loss": 0.4469, "mean_token_accuracy": 0.8494173437356949, "num_tokens": 362553576.0, "step": 632 }, { "epoch": 4.833652007648183, "grad_norm": 0.13082548673790548, "learning_rate": 8.631683097138386e-06, "loss": 0.4306, "mean_token_accuracy": 0.8541657999157906, "num_tokens": 363129576.0, "step": 633 }, { "epoch": 4.8413001912045885, "grad_norm": 0.13534537707509742, "learning_rate": 8.627559610136209e-06, "loss": 0.4378, "mean_token_accuracy": 0.8518879190087318, "num_tokens": 363705576.0, "step": 634 }, { "epoch": 4.848948374760994, "grad_norm": 0.1322877886744737, "learning_rate": 8.623430907445263e-06, "loss": 0.4501, "mean_token_accuracy": 0.848146453499794, "num_tokens": 364281576.0, "step": 635 }, { "epoch": 4.8565965583174, "grad_norm": 0.1305531222811248, "learning_rate": 8.619296995001773e-06, "loss": 0.4583, "mean_token_accuracy": 0.8479276895523071, "num_tokens": 364857576.0, "step": 636 }, { "epoch": 4.864244741873805, "grad_norm": 0.13613414953238218, "learning_rate": 8.615157878749462e-06, "loss": 0.4291, "mean_token_accuracy": 0.8538307175040245, "num_tokens": 365433576.0, "step": 637 }, { "epoch": 4.871892925430211, "grad_norm": 0.13535260994204976, "learning_rate": 8.611013564639532e-06, "loss": 0.452, "mean_token_accuracy": 0.8471116721630096, "num_tokens": 366009576.0, "step": 638 }, { "epoch": 4.879541108986616, "grad_norm": 0.13913988946297418, "learning_rate": 8.60686405863066e-06, "loss": 0.481, "mean_token_accuracy": 0.8386234864592552, "num_tokens": 366585576.0, "step": 639 }, { "epoch": 4.887189292543021, "grad_norm": 0.13926823889358061, "learning_rate": 8.602709366688989e-06, "loss": 0.4611, "mean_token_accuracy": 0.8453963324427605, "num_tokens": 367161576.0, "step": 640 }, { "epoch": 4.894837476099426, "grad_norm": 0.13451549889430478, "learning_rate": 8.598549494788111e-06, "loss": 0.4458, "mean_token_accuracy": 0.8480683267116547, "num_tokens": 367737576.0, "step": 641 }, { "epoch": 4.902485659655832, "grad_norm": 0.13290646145187515, "learning_rate": 8.594384448909074e-06, "loss": 0.4381, "mean_token_accuracy": 0.8519139811396599, "num_tokens": 368313576.0, "step": 642 }, { "epoch": 4.910133843212237, "grad_norm": 0.14817863417364635, "learning_rate": 8.590214235040363e-06, "loss": 0.4328, "mean_token_accuracy": 0.854470893740654, "num_tokens": 368887476.0, "step": 643 }, { "epoch": 4.917782026768642, "grad_norm": 0.12880758333637257, "learning_rate": 8.586038859177891e-06, "loss": 0.4261, "mean_token_accuracy": 0.8556710705161095, "num_tokens": 369463476.0, "step": 644 }, { "epoch": 4.925430210325048, "grad_norm": 0.13524444500578836, "learning_rate": 8.581858327324996e-06, "loss": 0.4523, "mean_token_accuracy": 0.8463720753788948, "num_tokens": 370039476.0, "step": 645 }, { "epoch": 4.933078393881453, "grad_norm": 0.13170708374690382, "learning_rate": 8.577672645492426e-06, "loss": 0.4198, "mean_token_accuracy": 0.8571259826421738, "num_tokens": 370615476.0, "step": 646 }, { "epoch": 4.940726577437858, "grad_norm": 0.13590194960038368, "learning_rate": 8.573481819698337e-06, "loss": 0.4288, "mean_token_accuracy": 0.8549418747425079, "num_tokens": 371191476.0, "step": 647 }, { "epoch": 4.948374760994264, "grad_norm": 0.13608499203187072, "learning_rate": 8.569285855968278e-06, "loss": 0.4287, "mean_token_accuracy": 0.8553623184561729, "num_tokens": 371761699.0, "step": 648 }, { "epoch": 4.95602294455067, "grad_norm": 0.1581401895374939, "learning_rate": 8.565084760335188e-06, "loss": 0.4357, "mean_token_accuracy": 0.8529161512851715, "num_tokens": 372304065.0, "step": 649 }, { "epoch": 4.963671128107075, "grad_norm": 0.13384003030736613, "learning_rate": 8.560878538839379e-06, "loss": 0.4169, "mean_token_accuracy": 0.858254499733448, "num_tokens": 372880065.0, "step": 650 } ], "logging_steps": 1, "max_steps": 2620, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.564795702483878e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }