ML-project-epoch5 / trainer_state.json
BlackZipper's picture
upload json
ac863e2 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.963671128107075,
"eval_steps": 500,
"global_step": 650,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0076481835564053535,
"grad_norm": 0.9770538524861805,
"learning_rate": 1e-05,
"loss": 0.5557,
"mean_token_accuracy": 0.8261559456586838,
"num_tokens": 576000.0,
"step": 1
},
{
"epoch": 0.015296367112810707,
"grad_norm": 1.0016201329962942,
"learning_rate": 9.999996405511328e-06,
"loss": 0.5724,
"mean_token_accuracy": 0.8204647451639175,
"num_tokens": 1152000.0,
"step": 2
},
{
"epoch": 0.022944550669216062,
"grad_norm": 0.8796352845621198,
"learning_rate": 9.999985622050476e-06,
"loss": 0.5616,
"mean_token_accuracy": 0.8209040015935898,
"num_tokens": 1728000.0,
"step": 3
},
{
"epoch": 0.030592734225621414,
"grad_norm": 0.7858551577048387,
"learning_rate": 9.999967649632953e-06,
"loss": 0.583,
"mean_token_accuracy": 0.8155530691146851,
"num_tokens": 2304000.0,
"step": 4
},
{
"epoch": 0.03824091778202677,
"grad_norm": 0.3692234525413837,
"learning_rate": 9.999942488284598e-06,
"loss": 0.5371,
"mean_token_accuracy": 0.8288730829954147,
"num_tokens": 2880000.0,
"step": 5
},
{
"epoch": 0.045889101338432124,
"grad_norm": 0.337938001083141,
"learning_rate": 9.999910138041584e-06,
"loss": 0.5252,
"mean_token_accuracy": 0.8318870887160301,
"num_tokens": 3456000.0,
"step": 6
},
{
"epoch": 0.05353728489483748,
"grad_norm": 0.3249029142805452,
"learning_rate": 9.99987059895043e-06,
"loss": 0.5661,
"mean_token_accuracy": 0.8197997808456421,
"num_tokens": 4032000.0,
"step": 7
},
{
"epoch": 0.06118546845124283,
"grad_norm": 0.6141658703983041,
"learning_rate": 9.999823871067981e-06,
"loss": 0.5406,
"mean_token_accuracy": 0.8242708295583725,
"num_tokens": 4585945.0,
"step": 8
},
{
"epoch": 0.06883365200764818,
"grad_norm": 0.7987265960698617,
"learning_rate": 9.999769954461425e-06,
"loss": 0.5426,
"mean_token_accuracy": 0.8265986815094948,
"num_tokens": 5161945.0,
"step": 9
},
{
"epoch": 0.07648183556405354,
"grad_norm": 0.7395483390721038,
"learning_rate": 9.999708849208279e-06,
"loss": 0.5517,
"mean_token_accuracy": 0.8210133761167526,
"num_tokens": 5737945.0,
"step": 10
},
{
"epoch": 0.0841300191204589,
"grad_norm": 0.6041875583696124,
"learning_rate": 9.999640555396404e-06,
"loss": 0.5331,
"mean_token_accuracy": 0.8267983421683311,
"num_tokens": 6313945.0,
"step": 11
},
{
"epoch": 0.09177820267686425,
"grad_norm": 0.45467723125722653,
"learning_rate": 9.999565073123991e-06,
"loss": 0.5164,
"mean_token_accuracy": 0.8316023647785187,
"num_tokens": 6889945.0,
"step": 12
},
{
"epoch": 0.0994263862332696,
"grad_norm": 0.32791299547939345,
"learning_rate": 9.999482402499569e-06,
"loss": 0.5475,
"mean_token_accuracy": 0.8233416005969048,
"num_tokens": 7465945.0,
"step": 13
},
{
"epoch": 0.10707456978967496,
"grad_norm": 0.23684367528291977,
"learning_rate": 9.999392543642e-06,
"loss": 0.5361,
"mean_token_accuracy": 0.8290345445275307,
"num_tokens": 8041945.0,
"step": 14
},
{
"epoch": 0.1147227533460803,
"grad_norm": 0.2528394745096081,
"learning_rate": 9.999295496680482e-06,
"loss": 0.5142,
"mean_token_accuracy": 0.8332222104072571,
"num_tokens": 8617945.0,
"step": 15
},
{
"epoch": 0.12237093690248566,
"grad_norm": 0.3264470002400492,
"learning_rate": 9.99919126175455e-06,
"loss": 0.5682,
"mean_token_accuracy": 0.8158846944570541,
"num_tokens": 9193945.0,
"step": 16
},
{
"epoch": 0.13001912045889102,
"grad_norm": 0.37843802091227036,
"learning_rate": 9.999079839014074e-06,
"loss": 0.4766,
"mean_token_accuracy": 0.8428528308868408,
"num_tokens": 9769945.0,
"step": 17
},
{
"epoch": 0.13766730401529637,
"grad_norm": 0.3930284951759443,
"learning_rate": 9.998961228619255e-06,
"loss": 0.5433,
"mean_token_accuracy": 0.8239961341023445,
"num_tokens": 10345945.0,
"step": 18
},
{
"epoch": 0.14531548757170173,
"grad_norm": 0.38145764149019024,
"learning_rate": 9.99883543074063e-06,
"loss": 0.5147,
"mean_token_accuracy": 0.8341441303491592,
"num_tokens": 10921945.0,
"step": 19
},
{
"epoch": 0.15296367112810708,
"grad_norm": 0.37363877853538846,
"learning_rate": 9.998702445559071e-06,
"loss": 0.5283,
"mean_token_accuracy": 0.8283886983990669,
"num_tokens": 11497945.0,
"step": 20
},
{
"epoch": 0.16061185468451242,
"grad_norm": 0.34011521406149564,
"learning_rate": 9.998562273265786e-06,
"loss": 0.518,
"mean_token_accuracy": 0.831451490521431,
"num_tokens": 12060592.0,
"step": 21
},
{
"epoch": 0.1682600382409178,
"grad_norm": 0.2676333142200147,
"learning_rate": 9.99841491406231e-06,
"loss": 0.5285,
"mean_token_accuracy": 0.8288713470101357,
"num_tokens": 12636592.0,
"step": 22
},
{
"epoch": 0.17590822179732313,
"grad_norm": 0.24262999668848875,
"learning_rate": 9.99826036816052e-06,
"loss": 0.5199,
"mean_token_accuracy": 0.8309321850538254,
"num_tokens": 13212592.0,
"step": 23
},
{
"epoch": 0.1835564053537285,
"grad_norm": 0.21796704803826206,
"learning_rate": 9.998098635782616e-06,
"loss": 0.5281,
"mean_token_accuracy": 0.8288609310984612,
"num_tokens": 13788592.0,
"step": 24
},
{
"epoch": 0.19120458891013384,
"grad_norm": 0.20760778914940356,
"learning_rate": 9.997929717161142e-06,
"loss": 0.5368,
"mean_token_accuracy": 0.8261212259531021,
"num_tokens": 14364592.0,
"step": 25
},
{
"epoch": 0.1988527724665392,
"grad_norm": 0.25167231093180664,
"learning_rate": 9.997753612538963e-06,
"loss": 0.4973,
"mean_token_accuracy": 0.8385418727993965,
"num_tokens": 14940592.0,
"step": 26
},
{
"epoch": 0.20650095602294455,
"grad_norm": 0.2755218609160029,
"learning_rate": 9.997570322169285e-06,
"loss": 0.5221,
"mean_token_accuracy": 0.8294894322752953,
"num_tokens": 15516592.0,
"step": 27
},
{
"epoch": 0.21414913957934992,
"grad_norm": 0.2808823032638946,
"learning_rate": 9.99737984631564e-06,
"loss": 0.5264,
"mean_token_accuracy": 0.8296040147542953,
"num_tokens": 16092592.0,
"step": 28
},
{
"epoch": 0.22179732313575526,
"grad_norm": 0.2583300789913383,
"learning_rate": 9.997182185251896e-06,
"loss": 0.5098,
"mean_token_accuracy": 0.8332170099020004,
"num_tokens": 16668592.0,
"step": 29
},
{
"epoch": 0.2294455066921606,
"grad_norm": 0.25870516586563114,
"learning_rate": 9.996977339262247e-06,
"loss": 0.4964,
"mean_token_accuracy": 0.8371130004525185,
"num_tokens": 17244592.0,
"step": 30
},
{
"epoch": 0.23709369024856597,
"grad_norm": 0.2296798543288584,
"learning_rate": 9.996765308641218e-06,
"loss": 0.5203,
"mean_token_accuracy": 0.8292376697063446,
"num_tokens": 17820592.0,
"step": 31
},
{
"epoch": 0.2447418738049713,
"grad_norm": 0.20940808224809382,
"learning_rate": 9.996546093693671e-06,
"loss": 0.5297,
"mean_token_accuracy": 0.828244574368,
"num_tokens": 18396592.0,
"step": 32
},
{
"epoch": 0.25239005736137665,
"grad_norm": 0.15777243777113475,
"learning_rate": 9.996319694734787e-06,
"loss": 0.4841,
"mean_token_accuracy": 0.8402329161763191,
"num_tokens": 18972592.0,
"step": 33
},
{
"epoch": 0.26003824091778205,
"grad_norm": 0.17599866369775718,
"learning_rate": 9.996086112090085e-06,
"loss": 0.5302,
"mean_token_accuracy": 0.8264007493853569,
"num_tokens": 19548592.0,
"step": 34
},
{
"epoch": 0.2676864244741874,
"grad_norm": 0.197575035953607,
"learning_rate": 9.995845346095406e-06,
"loss": 0.5239,
"mean_token_accuracy": 0.8283487483859062,
"num_tokens": 20124592.0,
"step": 35
},
{
"epoch": 0.27533460803059273,
"grad_norm": 0.18551509540101133,
"learning_rate": 9.995597397096923e-06,
"loss": 0.5166,
"mean_token_accuracy": 0.8316527083516121,
"num_tokens": 20700592.0,
"step": 36
},
{
"epoch": 0.2829827915869981,
"grad_norm": 0.18027752934372468,
"learning_rate": 9.995342265451138e-06,
"loss": 0.4946,
"mean_token_accuracy": 0.8379411548376083,
"num_tokens": 21276592.0,
"step": 37
},
{
"epoch": 0.29063097514340347,
"grad_norm": 0.1783882139249774,
"learning_rate": 9.995079951524876e-06,
"loss": 0.5298,
"mean_token_accuracy": 0.8277133032679558,
"num_tokens": 21852592.0,
"step": 38
},
{
"epoch": 0.2982791586998088,
"grad_norm": 0.1617380846519598,
"learning_rate": 9.994810455695291e-06,
"loss": 0.481,
"mean_token_accuracy": 0.8421861305832863,
"num_tokens": 22428592.0,
"step": 39
},
{
"epoch": 0.30592734225621415,
"grad_norm": 0.17083879189298595,
"learning_rate": 9.994533778349867e-06,
"loss": 0.5269,
"mean_token_accuracy": 0.828789733350277,
"num_tokens": 23004592.0,
"step": 40
},
{
"epoch": 0.3135755258126195,
"grad_norm": 0.1509185021498891,
"learning_rate": 9.994249919886402e-06,
"loss": 0.5004,
"mean_token_accuracy": 0.8369046598672867,
"num_tokens": 23580592.0,
"step": 41
},
{
"epoch": 0.32122370936902483,
"grad_norm": 0.1511275570122673,
"learning_rate": 9.993958880713033e-06,
"loss": 0.5094,
"mean_token_accuracy": 0.8330798596143723,
"num_tokens": 24156592.0,
"step": 42
},
{
"epoch": 0.32887189292543023,
"grad_norm": 0.16474397925550863,
"learning_rate": 9.99366066124821e-06,
"loss": 0.5083,
"mean_token_accuracy": 0.8331527784466743,
"num_tokens": 24732592.0,
"step": 43
},
{
"epoch": 0.3365200764818356,
"grad_norm": 0.1510059303541963,
"learning_rate": 9.993355261920714e-06,
"loss": 0.4811,
"mean_token_accuracy": 0.8414968326687813,
"num_tokens": 25306492.0,
"step": 44
},
{
"epoch": 0.3441682600382409,
"grad_norm": 0.2206211577243794,
"learning_rate": 9.993042683169647e-06,
"loss": 0.5145,
"mean_token_accuracy": 0.8324548229575157,
"num_tokens": 25882492.0,
"step": 45
},
{
"epoch": 0.35181644359464626,
"grad_norm": 0.19826261020124922,
"learning_rate": 9.992722925444434e-06,
"loss": 0.525,
"mean_token_accuracy": 0.8288452923297882,
"num_tokens": 26458492.0,
"step": 46
},
{
"epoch": 0.35946462715105165,
"grad_norm": 0.1539436477198697,
"learning_rate": 9.992395989204818e-06,
"loss": 0.531,
"mean_token_accuracy": 0.8279685229063034,
"num_tokens": 27034492.0,
"step": 47
},
{
"epoch": 0.367112810707457,
"grad_norm": 0.14749502264744746,
"learning_rate": 9.992061874920869e-06,
"loss": 0.5213,
"mean_token_accuracy": 0.8304547518491745,
"num_tokens": 27610492.0,
"step": 48
},
{
"epoch": 0.37476099426386233,
"grad_norm": 0.15532363519268472,
"learning_rate": 9.991720583072975e-06,
"loss": 0.4906,
"mean_token_accuracy": 0.8390665575861931,
"num_tokens": 28178987.0,
"step": 49
},
{
"epoch": 0.3824091778202677,
"grad_norm": 0.23556356163528056,
"learning_rate": 9.991372114151843e-06,
"loss": 0.5353,
"mean_token_accuracy": 0.826218456029892,
"num_tokens": 28754987.0,
"step": 50
},
{
"epoch": 0.390057361376673,
"grad_norm": 0.15385965560459058,
"learning_rate": 9.9910164686585e-06,
"loss": 0.5187,
"mean_token_accuracy": 0.8308228179812431,
"num_tokens": 29330987.0,
"step": 51
},
{
"epoch": 0.3977055449330784,
"grad_norm": 0.1436263981933282,
"learning_rate": 9.990653647104292e-06,
"loss": 0.5153,
"mean_token_accuracy": 0.8325798138976097,
"num_tokens": 29906987.0,
"step": 52
},
{
"epoch": 0.40535372848948376,
"grad_norm": 0.14302536490724446,
"learning_rate": 9.990283650010883e-06,
"loss": 0.4872,
"mean_token_accuracy": 0.8397606834769249,
"num_tokens": 30482987.0,
"step": 53
},
{
"epoch": 0.4130019120458891,
"grad_norm": 0.17140284653127127,
"learning_rate": 9.98990647791025e-06,
"loss": 0.5318,
"mean_token_accuracy": 0.8275379464030266,
"num_tokens": 31058987.0,
"step": 54
},
{
"epoch": 0.42065009560229444,
"grad_norm": 0.15486894690765854,
"learning_rate": 9.989522131344693e-06,
"loss": 0.462,
"mean_token_accuracy": 0.8481464609503746,
"num_tokens": 31634987.0,
"step": 55
},
{
"epoch": 0.42829827915869984,
"grad_norm": 0.14715377366317914,
"learning_rate": 9.989130610866822e-06,
"loss": 0.4978,
"mean_token_accuracy": 0.8358698934316635,
"num_tokens": 32210987.0,
"step": 56
},
{
"epoch": 0.4359464627151052,
"grad_norm": 0.133253236290824,
"learning_rate": 9.988731917039564e-06,
"loss": 0.5115,
"mean_token_accuracy": 0.8322514817118645,
"num_tokens": 32784624.0,
"step": 57
},
{
"epoch": 0.4435946462715105,
"grad_norm": 0.1424854004952171,
"learning_rate": 9.988326050436158e-06,
"loss": 0.5227,
"mean_token_accuracy": 0.8293939307332039,
"num_tokens": 33360624.0,
"step": 58
},
{
"epoch": 0.45124282982791586,
"grad_norm": 0.14122226382716493,
"learning_rate": 9.987913011640157e-06,
"loss": 0.5248,
"mean_token_accuracy": 0.8290501683950424,
"num_tokens": 33936624.0,
"step": 59
},
{
"epoch": 0.4588910133843212,
"grad_norm": 0.13993101979440462,
"learning_rate": 9.987492801245431e-06,
"loss": 0.4793,
"mean_token_accuracy": 0.8423371836543083,
"num_tokens": 34512624.0,
"step": 60
},
{
"epoch": 0.4665391969407266,
"grad_norm": 0.14632054172475426,
"learning_rate": 9.98706541985615e-06,
"loss": 0.5469,
"mean_token_accuracy": 0.821442224085331,
"num_tokens": 35074731.0,
"step": 61
},
{
"epoch": 0.47418738049713194,
"grad_norm": 0.1400105720290886,
"learning_rate": 9.986630868086804e-06,
"loss": 0.5063,
"mean_token_accuracy": 0.8351511061191559,
"num_tokens": 35650731.0,
"step": 62
},
{
"epoch": 0.4818355640535373,
"grad_norm": 0.14863573475989134,
"learning_rate": 9.986189146562191e-06,
"loss": 0.5222,
"mean_token_accuracy": 0.8316596522927284,
"num_tokens": 36226731.0,
"step": 63
},
{
"epoch": 0.4894837476099426,
"grad_norm": 0.14605144207294682,
"learning_rate": 9.985740255917413e-06,
"loss": 0.4925,
"mean_token_accuracy": 0.8410836607217789,
"num_tokens": 36802731.0,
"step": 64
},
{
"epoch": 0.497131931166348,
"grad_norm": 0.1357927018597777,
"learning_rate": 9.985284196797884e-06,
"loss": 0.5007,
"mean_token_accuracy": 0.835041731595993,
"num_tokens": 37378731.0,
"step": 65
},
{
"epoch": 0.5047801147227533,
"grad_norm": 0.16344701399255454,
"learning_rate": 9.984820969859326e-06,
"loss": 0.5519,
"mean_token_accuracy": 0.8199577778577805,
"num_tokens": 37954731.0,
"step": 66
},
{
"epoch": 0.5124282982791587,
"grad_norm": 0.13295009461309124,
"learning_rate": 9.984350575767763e-06,
"loss": 0.4968,
"mean_token_accuracy": 0.8373057246208191,
"num_tokens": 38530731.0,
"step": 67
},
{
"epoch": 0.5200764818355641,
"grad_norm": 0.1316301231700835,
"learning_rate": 9.983873015199524e-06,
"loss": 0.4541,
"mean_token_accuracy": 0.8496673479676247,
"num_tokens": 39106731.0,
"step": 68
},
{
"epoch": 0.5277246653919694,
"grad_norm": 0.1381202418270699,
"learning_rate": 9.983388288841246e-06,
"loss": 0.5195,
"mean_token_accuracy": 0.8305971175432205,
"num_tokens": 39682731.0,
"step": 69
},
{
"epoch": 0.5353728489483748,
"grad_norm": 0.13415757088597044,
"learning_rate": 9.982896397389866e-06,
"loss": 0.4641,
"mean_token_accuracy": 0.8476342782378197,
"num_tokens": 40258731.0,
"step": 70
},
{
"epoch": 0.5430210325047801,
"grad_norm": 0.13977158090215638,
"learning_rate": 9.98239734155262e-06,
"loss": 0.5204,
"mean_token_accuracy": 0.8300554230809212,
"num_tokens": 40834731.0,
"step": 71
},
{
"epoch": 0.5506692160611855,
"grad_norm": 0.1414811559776876,
"learning_rate": 9.981891122047052e-06,
"loss": 0.5039,
"mean_token_accuracy": 0.8352587595582008,
"num_tokens": 41410731.0,
"step": 72
},
{
"epoch": 0.5583173996175909,
"grad_norm": 0.14675842034623368,
"learning_rate": 9.981377739601002e-06,
"loss": 0.5249,
"mean_token_accuracy": 0.8289755135774612,
"num_tokens": 41986731.0,
"step": 73
},
{
"epoch": 0.5659655831739961,
"grad_norm": 0.1422646453200256,
"learning_rate": 9.980857194952605e-06,
"loss": 0.528,
"mean_token_accuracy": 0.8280744329094887,
"num_tokens": 42562731.0,
"step": 74
},
{
"epoch": 0.5736137667304015,
"grad_norm": 0.1428993249729862,
"learning_rate": 9.980329488850303e-06,
"loss": 0.5147,
"mean_token_accuracy": 0.8318679928779602,
"num_tokens": 43138731.0,
"step": 75
},
{
"epoch": 0.5812619502868069,
"grad_norm": 0.13904745870216947,
"learning_rate": 9.979794622052825e-06,
"loss": 0.5101,
"mean_token_accuracy": 0.8323385044932365,
"num_tokens": 43714731.0,
"step": 76
},
{
"epoch": 0.5889101338432122,
"grad_norm": 0.1408937048587796,
"learning_rate": 9.979252595329204e-06,
"loss": 0.4874,
"mean_token_accuracy": 0.8402296751737595,
"num_tokens": 44267402.0,
"step": 77
},
{
"epoch": 0.5965583173996176,
"grad_norm": 0.14639439502500298,
"learning_rate": 9.97870340945876e-06,
"loss": 0.5191,
"mean_token_accuracy": 0.8298158273100853,
"num_tokens": 44843402.0,
"step": 78
},
{
"epoch": 0.6042065009560229,
"grad_norm": 0.14225383180098447,
"learning_rate": 9.978147065231114e-06,
"loss": 0.501,
"mean_token_accuracy": 0.8345382362604141,
"num_tokens": 45419402.0,
"step": 79
},
{
"epoch": 0.6118546845124283,
"grad_norm": 0.15165811102237464,
"learning_rate": 9.97758356344617e-06,
"loss": 0.5053,
"mean_token_accuracy": 0.8332482650876045,
"num_tokens": 45995402.0,
"step": 80
},
{
"epoch": 0.6195028680688337,
"grad_norm": 0.13839520774785488,
"learning_rate": 9.977012904914133e-06,
"loss": 0.5085,
"mean_token_accuracy": 0.8349983245134354,
"num_tokens": 46571402.0,
"step": 81
},
{
"epoch": 0.627151051625239,
"grad_norm": 0.14712644887933124,
"learning_rate": 9.97643509045549e-06,
"loss": 0.5471,
"mean_token_accuracy": 0.8224283754825592,
"num_tokens": 47147402.0,
"step": 82
},
{
"epoch": 0.6347992351816444,
"grad_norm": 0.13934749804071309,
"learning_rate": 9.975850120901023e-06,
"loss": 0.5107,
"mean_token_accuracy": 0.832944430410862,
"num_tokens": 47723402.0,
"step": 83
},
{
"epoch": 0.6424474187380497,
"grad_norm": 0.14184064832659418,
"learning_rate": 9.975257997091795e-06,
"loss": 0.5019,
"mean_token_accuracy": 0.8352049514651299,
"num_tokens": 48299402.0,
"step": 84
},
{
"epoch": 0.6500956022944551,
"grad_norm": 0.1592732332566689,
"learning_rate": 9.974658719879163e-06,
"loss": 0.5118,
"mean_token_accuracy": 0.8321266919374466,
"num_tokens": 48875402.0,
"step": 85
},
{
"epoch": 0.6577437858508605,
"grad_norm": 0.13974194258972175,
"learning_rate": 9.97405229012476e-06,
"loss": 0.5183,
"mean_token_accuracy": 0.8313193619251251,
"num_tokens": 49451402.0,
"step": 86
},
{
"epoch": 0.6653919694072657,
"grad_norm": 0.13246562428620298,
"learning_rate": 9.973438708700513e-06,
"loss": 0.4767,
"mean_token_accuracy": 0.843241736292839,
"num_tokens": 50027402.0,
"step": 87
},
{
"epoch": 0.6730401529636711,
"grad_norm": 0.16684991357812562,
"learning_rate": 9.972817976488623e-06,
"loss": 0.4971,
"mean_token_accuracy": 0.8372588530182838,
"num_tokens": 50603402.0,
"step": 88
},
{
"epoch": 0.6806883365200764,
"grad_norm": 0.13340574719306686,
"learning_rate": 9.972190094381578e-06,
"loss": 0.4949,
"mean_token_accuracy": 0.8363786041736603,
"num_tokens": 51179402.0,
"step": 89
},
{
"epoch": 0.6883365200764818,
"grad_norm": 0.14359639069142585,
"learning_rate": 9.971555063282145e-06,
"loss": 0.4855,
"mean_token_accuracy": 0.8407451063394547,
"num_tokens": 51755402.0,
"step": 90
},
{
"epoch": 0.6959847036328872,
"grad_norm": 0.1465843545803842,
"learning_rate": 9.970912884103365e-06,
"loss": 0.5498,
"mean_token_accuracy": 0.8212963715195656,
"num_tokens": 52331402.0,
"step": 91
},
{
"epoch": 0.7036328871892925,
"grad_norm": 0.13373715318944093,
"learning_rate": 9.970263557768565e-06,
"loss": 0.5128,
"mean_token_accuracy": 0.8318002820014954,
"num_tokens": 52907402.0,
"step": 92
},
{
"epoch": 0.7112810707456979,
"grad_norm": 0.1425977955340504,
"learning_rate": 9.96960708521134e-06,
"loss": 0.4922,
"mean_token_accuracy": 0.8385525420308113,
"num_tokens": 53473097.0,
"step": 93
},
{
"epoch": 0.7189292543021033,
"grad_norm": 0.1414493480461768,
"learning_rate": 9.968943467375563e-06,
"loss": 0.5015,
"mean_token_accuracy": 0.8355400264263153,
"num_tokens": 54049097.0,
"step": 94
},
{
"epoch": 0.7265774378585086,
"grad_norm": 0.13127380227428548,
"learning_rate": 9.968272705215382e-06,
"loss": 0.4915,
"mean_token_accuracy": 0.8390123769640923,
"num_tokens": 54609292.0,
"step": 95
},
{
"epoch": 0.734225621414914,
"grad_norm": 0.13467129663035832,
"learning_rate": 9.967594799695218e-06,
"loss": 0.4794,
"mean_token_accuracy": 0.8427561372518539,
"num_tokens": 55170798.0,
"step": 96
},
{
"epoch": 0.7418738049713193,
"grad_norm": 0.13621387326515388,
"learning_rate": 9.966909751789758e-06,
"loss": 0.487,
"mean_token_accuracy": 0.8411270678043365,
"num_tokens": 55746798.0,
"step": 97
},
{
"epoch": 0.7495219885277247,
"grad_norm": 0.14264410268094876,
"learning_rate": 9.96621756248396e-06,
"loss": 0.4951,
"mean_token_accuracy": 0.8365452662110329,
"num_tokens": 56322798.0,
"step": 98
},
{
"epoch": 0.7571701720841301,
"grad_norm": 0.13879533507650615,
"learning_rate": 9.965518232773052e-06,
"loss": 0.5045,
"mean_token_accuracy": 0.8338455036282539,
"num_tokens": 56898798.0,
"step": 99
},
{
"epoch": 0.7648183556405354,
"grad_norm": 0.13281867848059528,
"learning_rate": 9.964811763662528e-06,
"loss": 0.4848,
"mean_token_accuracy": 0.8396026864647865,
"num_tokens": 57474798.0,
"step": 100
},
{
"epoch": 0.7724665391969407,
"grad_norm": 0.14634204976216025,
"learning_rate": 9.964098156168143e-06,
"loss": 0.5432,
"mean_token_accuracy": 0.822074182331562,
"num_tokens": 58050798.0,
"step": 101
},
{
"epoch": 0.780114722753346,
"grad_norm": 0.14047489451950268,
"learning_rate": 9.963377411315922e-06,
"loss": 0.5234,
"mean_token_accuracy": 0.829129159450531,
"num_tokens": 58625739.0,
"step": 102
},
{
"epoch": 0.7877629063097514,
"grad_norm": 0.14274210766707315,
"learning_rate": 9.962649530142147e-06,
"loss": 0.5335,
"mean_token_accuracy": 0.8258555829524994,
"num_tokens": 59201739.0,
"step": 103
},
{
"epoch": 0.7954110898661568,
"grad_norm": 0.14094212771766002,
"learning_rate": 9.961914513693362e-06,
"loss": 0.5211,
"mean_token_accuracy": 0.8285171613097191,
"num_tokens": 59777739.0,
"step": 104
},
{
"epoch": 0.8030592734225621,
"grad_norm": 0.13762839208797772,
"learning_rate": 9.96117236302637e-06,
"loss": 0.5069,
"mean_token_accuracy": 0.8346649780869484,
"num_tokens": 60353739.0,
"step": 105
},
{
"epoch": 0.8107074569789675,
"grad_norm": 0.1465369564636583,
"learning_rate": 9.960423079208235e-06,
"loss": 0.4612,
"mean_token_accuracy": 0.8477652445435524,
"num_tokens": 60907009.0,
"step": 106
},
{
"epoch": 0.8183556405353728,
"grad_norm": 0.14637852098727955,
"learning_rate": 9.959666663316269e-06,
"loss": 0.5497,
"mean_token_accuracy": 0.8230603337287903,
"num_tokens": 61483009.0,
"step": 107
},
{
"epoch": 0.8260038240917782,
"grad_norm": 0.1347219196728508,
"learning_rate": 9.958903116438049e-06,
"loss": 0.4858,
"mean_token_accuracy": 0.841705210506916,
"num_tokens": 62059009.0,
"step": 108
},
{
"epoch": 0.8336520076481836,
"grad_norm": 0.13871053974279857,
"learning_rate": 9.958132439671392e-06,
"loss": 0.5269,
"mean_token_accuracy": 0.8272046074271202,
"num_tokens": 62635009.0,
"step": 109
},
{
"epoch": 0.8413001912045889,
"grad_norm": 0.13866989193780266,
"learning_rate": 9.95735463412438e-06,
"loss": 0.4786,
"mean_token_accuracy": 0.8430299237370491,
"num_tokens": 63211009.0,
"step": 110
},
{
"epoch": 0.8489483747609943,
"grad_norm": 0.1423988117487116,
"learning_rate": 9.956569700915338e-06,
"loss": 0.5059,
"mean_token_accuracy": 0.8349323570728302,
"num_tokens": 63787009.0,
"step": 111
},
{
"epoch": 0.8565965583173997,
"grad_norm": 0.16163953173723308,
"learning_rate": 9.955777641172836e-06,
"loss": 0.4767,
"mean_token_accuracy": 0.8433250710368156,
"num_tokens": 64363009.0,
"step": 112
},
{
"epoch": 0.864244741873805,
"grad_norm": 0.13530156968054668,
"learning_rate": 9.954978456035695e-06,
"loss": 0.4691,
"mean_token_accuracy": 0.8455977290868759,
"num_tokens": 64939009.0,
"step": 113
},
{
"epoch": 0.8718929254302104,
"grad_norm": 0.13247546863564885,
"learning_rate": 9.95417214665298e-06,
"loss": 0.5163,
"mean_token_accuracy": 0.8312828913331032,
"num_tokens": 65515009.0,
"step": 114
},
{
"epoch": 0.8795411089866156,
"grad_norm": 0.16495562650776383,
"learning_rate": 9.953358714183999e-06,
"loss": 0.5142,
"mean_token_accuracy": 0.8312735706567764,
"num_tokens": 66073393.0,
"step": 115
},
{
"epoch": 0.887189292543021,
"grad_norm": 0.136972715382717,
"learning_rate": 9.9525381597983e-06,
"loss": 0.5155,
"mean_token_accuracy": 0.8300295248627663,
"num_tokens": 66643616.0,
"step": 116
},
{
"epoch": 0.8948374760994264,
"grad_norm": 0.24477498311580997,
"learning_rate": 9.951710484675677e-06,
"loss": 0.4987,
"mean_token_accuracy": 0.8363108783960342,
"num_tokens": 67219616.0,
"step": 117
},
{
"epoch": 0.9024856596558317,
"grad_norm": 0.15547014864530215,
"learning_rate": 9.950875690006152e-06,
"loss": 0.525,
"mean_token_accuracy": 0.8279963135719299,
"num_tokens": 67795616.0,
"step": 118
},
{
"epoch": 0.9101338432122371,
"grad_norm": 0.13565975154579993,
"learning_rate": 9.950033776989994e-06,
"loss": 0.4875,
"mean_token_accuracy": 0.8386981412768364,
"num_tokens": 68371616.0,
"step": 119
},
{
"epoch": 0.9177820267686424,
"grad_norm": 0.14096319378387628,
"learning_rate": 9.949184746837697e-06,
"loss": 0.5275,
"mean_token_accuracy": 0.8280240818858147,
"num_tokens": 68947616.0,
"step": 120
},
{
"epoch": 0.9254302103250478,
"grad_norm": 0.14179059237775263,
"learning_rate": 9.948328600769996e-06,
"loss": 0.5041,
"mean_token_accuracy": 0.834081619977951,
"num_tokens": 69523616.0,
"step": 121
},
{
"epoch": 0.9330783938814532,
"grad_norm": 0.13988551396496146,
"learning_rate": 9.947465340017853e-06,
"loss": 0.5219,
"mean_token_accuracy": 0.8292012140154839,
"num_tokens": 70099616.0,
"step": 122
},
{
"epoch": 0.9407265774378585,
"grad_norm": 0.13758598960469923,
"learning_rate": 9.94659496582246e-06,
"loss": 0.4947,
"mean_token_accuracy": 0.8360296338796616,
"num_tokens": 70675616.0,
"step": 123
},
{
"epoch": 0.9483747609942639,
"grad_norm": 0.1332527555285676,
"learning_rate": 9.945717479435236e-06,
"loss": 0.4987,
"mean_token_accuracy": 0.8360331058502197,
"num_tokens": 71251616.0,
"step": 124
},
{
"epoch": 0.9560229445506692,
"grad_norm": 0.1346620611912445,
"learning_rate": 9.94483288211783e-06,
"loss": 0.4766,
"mean_token_accuracy": 0.8422486484050751,
"num_tokens": 71827616.0,
"step": 125
},
{
"epoch": 0.9636711281070746,
"grad_norm": 0.1412101809738773,
"learning_rate": 9.943941175142109e-06,
"loss": 0.5009,
"mean_token_accuracy": 0.8348843902349472,
"num_tokens": 72389881.0,
"step": 126
},
{
"epoch": 0.97131931166348,
"grad_norm": 0.1468591145222982,
"learning_rate": 9.943042359790168e-06,
"loss": 0.5318,
"mean_token_accuracy": 0.8273720592260361,
"num_tokens": 72963213.0,
"step": 127
},
{
"epoch": 0.9789674952198852,
"grad_norm": 0.1920244752165593,
"learning_rate": 9.942136437354316e-06,
"loss": 0.5495,
"mean_token_accuracy": 0.8214943110942841,
"num_tokens": 73539213.0,
"step": 128
},
{
"epoch": 0.9866156787762906,
"grad_norm": 0.15090295706963755,
"learning_rate": 9.941223409137088e-06,
"loss": 0.5079,
"mean_token_accuracy": 0.8330902680754662,
"num_tokens": 74115213.0,
"step": 129
},
{
"epoch": 0.994263862332696,
"grad_norm": 0.13592806085375692,
"learning_rate": 9.94030327645123e-06,
"loss": 0.5012,
"mean_token_accuracy": 0.8337656334042549,
"num_tokens": 74691213.0,
"step": 130
},
{
"epoch": 1.0,
"grad_norm": 0.15427033521640424,
"learning_rate": 9.939376040619707e-06,
"loss": 0.4586,
"mean_token_accuracy": 0.8492923180262247,
"num_tokens": 75123213.0,
"step": 131
},
{
"epoch": 1.0076481835564053,
"grad_norm": 0.1365403277523185,
"learning_rate": 9.938441702975689e-06,
"loss": 0.4728,
"mean_token_accuracy": 0.844464011490345,
"num_tokens": 75699213.0,
"step": 132
},
{
"epoch": 1.0152963671128108,
"grad_norm": 0.13190000895931886,
"learning_rate": 9.937500264862567e-06,
"loss": 0.4705,
"mean_token_accuracy": 0.8449570834636688,
"num_tokens": 76275213.0,
"step": 133
},
{
"epoch": 1.022944550669216,
"grad_norm": 0.1389937992497058,
"learning_rate": 9.936551727633934e-06,
"loss": 0.4889,
"mean_token_accuracy": 0.8385332003235817,
"num_tokens": 76851213.0,
"step": 134
},
{
"epoch": 1.0305927342256214,
"grad_norm": 0.12744266634531476,
"learning_rate": 9.935596092653596e-06,
"loss": 0.4817,
"mean_token_accuracy": 0.8396877720952034,
"num_tokens": 77427213.0,
"step": 135
},
{
"epoch": 1.0382409177820269,
"grad_norm": 0.13524852499452464,
"learning_rate": 9.934633361295558e-06,
"loss": 0.4857,
"mean_token_accuracy": 0.8397051244974136,
"num_tokens": 78003213.0,
"step": 136
},
{
"epoch": 1.0458891013384322,
"grad_norm": 0.1399382135972632,
"learning_rate": 9.933663534944029e-06,
"loss": 0.4856,
"mean_token_accuracy": 0.8398839607834816,
"num_tokens": 78579213.0,
"step": 137
},
{
"epoch": 1.0535372848948374,
"grad_norm": 0.13322210319690403,
"learning_rate": 9.932686614993425e-06,
"loss": 0.4648,
"mean_token_accuracy": 0.845810703933239,
"num_tokens": 79141860.0,
"step": 138
},
{
"epoch": 1.0611854684512427,
"grad_norm": 0.13213772381798283,
"learning_rate": 9.931702602848354e-06,
"loss": 0.4571,
"mean_token_accuracy": 0.8474953845143318,
"num_tokens": 79717860.0,
"step": 139
},
{
"epoch": 1.0688336520076482,
"grad_norm": 0.1331479705432894,
"learning_rate": 9.930711499923626e-06,
"loss": 0.4984,
"mean_token_accuracy": 0.835312582552433,
"num_tokens": 80293860.0,
"step": 140
},
{
"epoch": 1.0764818355640535,
"grad_norm": 0.14215781774983868,
"learning_rate": 9.929713307644245e-06,
"loss": 0.4788,
"mean_token_accuracy": 0.8420683667063713,
"num_tokens": 80846531.0,
"step": 141
},
{
"epoch": 1.0841300191204588,
"grad_norm": 0.12936783165938912,
"learning_rate": 9.928708027445403e-06,
"loss": 0.5017,
"mean_token_accuracy": 0.8367559015750885,
"num_tokens": 81416754.0,
"step": 142
},
{
"epoch": 1.0917782026768643,
"grad_norm": 0.13361158262185466,
"learning_rate": 9.927695660772492e-06,
"loss": 0.488,
"mean_token_accuracy": 0.8376425430178642,
"num_tokens": 81992754.0,
"step": 143
},
{
"epoch": 1.0994263862332696,
"grad_norm": 0.16213706049923526,
"learning_rate": 9.926676209081085e-06,
"loss": 0.5066,
"mean_token_accuracy": 0.8328819274902344,
"num_tokens": 82568754.0,
"step": 144
},
{
"epoch": 1.107074569789675,
"grad_norm": 0.1359440567719058,
"learning_rate": 9.925649673836949e-06,
"loss": 0.5083,
"mean_token_accuracy": 0.8328124806284904,
"num_tokens": 83144754.0,
"step": 145
},
{
"epoch": 1.1147227533460804,
"grad_norm": 0.1295608761839531,
"learning_rate": 9.924616056516027e-06,
"loss": 0.4879,
"mean_token_accuracy": 0.8381772711873055,
"num_tokens": 83720754.0,
"step": 146
},
{
"epoch": 1.1223709369024857,
"grad_norm": 0.13635991726116314,
"learning_rate": 9.923575358604454e-06,
"loss": 0.5083,
"mean_token_accuracy": 0.8336632177233696,
"num_tokens": 84296754.0,
"step": 147
},
{
"epoch": 1.130019120458891,
"grad_norm": 0.1371786254385925,
"learning_rate": 9.922527581598535e-06,
"loss": 0.5097,
"mean_token_accuracy": 0.8322065472602844,
"num_tokens": 84872754.0,
"step": 148
},
{
"epoch": 1.1376673040152965,
"grad_norm": 0.1280042137264246,
"learning_rate": 9.921472727004765e-06,
"loss": 0.4447,
"mean_token_accuracy": 0.8517681285738945,
"num_tokens": 85448754.0,
"step": 149
},
{
"epoch": 1.1453154875717018,
"grad_norm": 0.14709174504186834,
"learning_rate": 9.920410796339806e-06,
"loss": 0.4984,
"mean_token_accuracy": 0.8356476724147797,
"num_tokens": 86024754.0,
"step": 150
},
{
"epoch": 1.152963671128107,
"grad_norm": 0.13247230064106771,
"learning_rate": 9.919341791130496e-06,
"loss": 0.4493,
"mean_token_accuracy": 0.8508201763033867,
"num_tokens": 86600754.0,
"step": 151
},
{
"epoch": 1.1606118546845123,
"grad_norm": 0.1396533122936014,
"learning_rate": 9.91826571291385e-06,
"loss": 0.5009,
"mean_token_accuracy": 0.8353525176644325,
"num_tokens": 87176754.0,
"step": 152
},
{
"epoch": 1.1682600382409178,
"grad_norm": 0.12824246274478124,
"learning_rate": 9.917182563237045e-06,
"loss": 0.4778,
"mean_token_accuracy": 0.8412972092628479,
"num_tokens": 87752754.0,
"step": 153
},
{
"epoch": 1.1759082217973231,
"grad_norm": 0.13963117320032115,
"learning_rate": 9.91609234365743e-06,
"loss": 0.494,
"mean_token_accuracy": 0.8358664214611053,
"num_tokens": 88328754.0,
"step": 154
},
{
"epoch": 1.1835564053537284,
"grad_norm": 0.12893289909169098,
"learning_rate": 9.914995055742515e-06,
"loss": 0.4764,
"mean_token_accuracy": 0.841441310942173,
"num_tokens": 88904754.0,
"step": 155
},
{
"epoch": 1.191204588910134,
"grad_norm": 0.13091574452160962,
"learning_rate": 9.91389070106998e-06,
"loss": 0.4487,
"mean_token_accuracy": 0.8508792147040367,
"num_tokens": 89480754.0,
"step": 156
},
{
"epoch": 1.1988527724665392,
"grad_norm": 0.13244029890958137,
"learning_rate": 9.912779281227656e-06,
"loss": 0.473,
"mean_token_accuracy": 0.8444153964519501,
"num_tokens": 90056754.0,
"step": 157
},
{
"epoch": 1.2065009560229445,
"grad_norm": 0.13918199257945654,
"learning_rate": 9.911660797813542e-06,
"loss": 0.5175,
"mean_token_accuracy": 0.8303498476743698,
"num_tokens": 90607633.0,
"step": 158
},
{
"epoch": 1.21414913957935,
"grad_norm": 0.13614928489897143,
"learning_rate": 9.91053525243579e-06,
"loss": 0.4874,
"mean_token_accuracy": 0.8389325365424156,
"num_tokens": 91183633.0,
"step": 159
},
{
"epoch": 1.2217973231357553,
"grad_norm": 0.13814896905701568,
"learning_rate": 9.909402646712697e-06,
"loss": 0.4886,
"mean_token_accuracy": 0.8386929333209991,
"num_tokens": 91759633.0,
"step": 160
},
{
"epoch": 1.2294455066921606,
"grad_norm": 0.13390235159092828,
"learning_rate": 9.908262982272724e-06,
"loss": 0.4981,
"mean_token_accuracy": 0.8359844833612442,
"num_tokens": 92335633.0,
"step": 161
},
{
"epoch": 1.237093690248566,
"grad_norm": 0.1424290492085377,
"learning_rate": 9.907116260754472e-06,
"loss": 0.4998,
"mean_token_accuracy": 0.8363195657730103,
"num_tokens": 92911633.0,
"step": 162
},
{
"epoch": 1.2447418738049714,
"grad_norm": 0.13101785380987388,
"learning_rate": 9.905962483806696e-06,
"loss": 0.4826,
"mean_token_accuracy": 0.8393439948558807,
"num_tokens": 93487633.0,
"step": 163
},
{
"epoch": 1.2523900573613767,
"grad_norm": 0.13349754239199546,
"learning_rate": 9.904801653088287e-06,
"loss": 0.4868,
"mean_token_accuracy": 0.8397346436977386,
"num_tokens": 94063633.0,
"step": 164
},
{
"epoch": 1.260038240917782,
"grad_norm": 0.13506310588052065,
"learning_rate": 9.903633770268286e-06,
"loss": 0.4935,
"mean_token_accuracy": 0.8379099145531654,
"num_tokens": 94639633.0,
"step": 165
},
{
"epoch": 1.2676864244741874,
"grad_norm": 0.1460942025739368,
"learning_rate": 9.902458837025865e-06,
"loss": 0.533,
"mean_token_accuracy": 0.825570859014988,
"num_tokens": 95215633.0,
"step": 166
},
{
"epoch": 1.2753346080305927,
"grad_norm": 0.14230237493097705,
"learning_rate": 9.901276855050342e-06,
"loss": 0.509,
"mean_token_accuracy": 0.8315832614898682,
"num_tokens": 95791633.0,
"step": 167
},
{
"epoch": 1.282982791586998,
"grad_norm": 0.13383575612568485,
"learning_rate": 9.900087826041163e-06,
"loss": 0.4828,
"mean_token_accuracy": 0.8402016833424568,
"num_tokens": 96367633.0,
"step": 168
},
{
"epoch": 1.2906309751434035,
"grad_norm": 0.13987895499055866,
"learning_rate": 9.89889175170791e-06,
"loss": 0.5255,
"mean_token_accuracy": 0.8278417810797691,
"num_tokens": 96943633.0,
"step": 169
},
{
"epoch": 1.2982791586998088,
"grad_norm": 0.1313503038847592,
"learning_rate": 9.89768863377029e-06,
"loss": 0.4956,
"mean_token_accuracy": 0.8354028537869453,
"num_tokens": 97519633.0,
"step": 170
},
{
"epoch": 1.305927342256214,
"grad_norm": 0.13457635948414468,
"learning_rate": 9.896478473958147e-06,
"loss": 0.4697,
"mean_token_accuracy": 0.8448807075619698,
"num_tokens": 98095633.0,
"step": 171
},
{
"epoch": 1.3135755258126194,
"grad_norm": 0.13241587496517163,
"learning_rate": 9.895261274011436e-06,
"loss": 0.4954,
"mean_token_accuracy": 0.8361199200153351,
"num_tokens": 98671633.0,
"step": 172
},
{
"epoch": 1.321223709369025,
"grad_norm": 0.133287438364359,
"learning_rate": 9.894037035680246e-06,
"loss": 0.4827,
"mean_token_accuracy": 0.842342384159565,
"num_tokens": 99247633.0,
"step": 173
},
{
"epoch": 1.3288718929254302,
"grad_norm": 0.13647661096592764,
"learning_rate": 9.892805760724782e-06,
"loss": 0.4902,
"mean_token_accuracy": 0.8376477435231209,
"num_tokens": 99823633.0,
"step": 174
},
{
"epoch": 1.3365200764818357,
"grad_norm": 0.13426483562570884,
"learning_rate": 9.89156745091536e-06,
"loss": 0.4851,
"mean_token_accuracy": 0.838831827044487,
"num_tokens": 100399633.0,
"step": 175
},
{
"epoch": 1.344168260038241,
"grad_norm": 0.21654547351312775,
"learning_rate": 9.890322108032423e-06,
"loss": 0.4898,
"mean_token_accuracy": 0.8377328217029572,
"num_tokens": 100975633.0,
"step": 176
},
{
"epoch": 1.3518164435946463,
"grad_norm": 0.14468682987371165,
"learning_rate": 9.889069733866515e-06,
"loss": 0.4822,
"mean_token_accuracy": 0.8400419503450394,
"num_tokens": 101551633.0,
"step": 177
},
{
"epoch": 1.3594646271510515,
"grad_norm": 0.1410032902289748,
"learning_rate": 9.887810330218294e-06,
"loss": 0.5063,
"mean_token_accuracy": 0.8329392224550247,
"num_tokens": 102127633.0,
"step": 178
},
{
"epoch": 1.367112810707457,
"grad_norm": 0.1300256891404132,
"learning_rate": 9.886543898898528e-06,
"loss": 0.4759,
"mean_token_accuracy": 0.8423354402184486,
"num_tokens": 102703633.0,
"step": 179
},
{
"epoch": 1.3747609942638623,
"grad_norm": 0.14166005366415435,
"learning_rate": 9.885270441728085e-06,
"loss": 0.5011,
"mean_token_accuracy": 0.8352118879556656,
"num_tokens": 103279633.0,
"step": 180
},
{
"epoch": 1.3824091778202676,
"grad_norm": 0.1386111808825074,
"learning_rate": 9.883989960537934e-06,
"loss": 0.5052,
"mean_token_accuracy": 0.8329878374934196,
"num_tokens": 103855633.0,
"step": 181
},
{
"epoch": 1.3900573613766731,
"grad_norm": 0.13817887508453544,
"learning_rate": 9.882702457169147e-06,
"loss": 0.5012,
"mean_token_accuracy": 0.8339826613664627,
"num_tokens": 104431633.0,
"step": 182
},
{
"epoch": 1.3977055449330784,
"grad_norm": 0.14012765345513187,
"learning_rate": 9.881407933472889e-06,
"loss": 0.4994,
"mean_token_accuracy": 0.834894172847271,
"num_tokens": 105007633.0,
"step": 183
},
{
"epoch": 1.4053537284894837,
"grad_norm": 0.12930750196514224,
"learning_rate": 9.88010639131042e-06,
"loss": 0.4611,
"mean_token_accuracy": 0.845082089304924,
"num_tokens": 105583633.0,
"step": 184
},
{
"epoch": 1.413001912045889,
"grad_norm": 0.12331109134382856,
"learning_rate": 9.878797832553093e-06,
"loss": 0.4714,
"mean_token_accuracy": 0.8437591195106506,
"num_tokens": 106159633.0,
"step": 185
},
{
"epoch": 1.4206500956022945,
"grad_norm": 0.1293813455298218,
"learning_rate": 9.87748225908235e-06,
"loss": 0.4691,
"mean_token_accuracy": 0.8440056517720222,
"num_tokens": 106735633.0,
"step": 186
},
{
"epoch": 1.4282982791586998,
"grad_norm": 0.14507012516088896,
"learning_rate": 9.876159672789711e-06,
"loss": 0.5212,
"mean_token_accuracy": 0.8286769017577171,
"num_tokens": 107311633.0,
"step": 187
},
{
"epoch": 1.4359464627151053,
"grad_norm": 0.15111476207924585,
"learning_rate": 9.874830075576789e-06,
"loss": 0.5101,
"mean_token_accuracy": 0.8307446986436844,
"num_tokens": 107887633.0,
"step": 188
},
{
"epoch": 1.4435946462715106,
"grad_norm": 0.14709035714161833,
"learning_rate": 9.873493469355271e-06,
"loss": 0.4971,
"mean_token_accuracy": 0.8350886180996895,
"num_tokens": 108463633.0,
"step": 189
},
{
"epoch": 1.4512428298279159,
"grad_norm": 0.1344401231018188,
"learning_rate": 9.872149856046922e-06,
"loss": 0.4757,
"mean_token_accuracy": 0.8424031659960747,
"num_tokens": 109039633.0,
"step": 190
},
{
"epoch": 1.4588910133843211,
"grad_norm": 0.13403966584878735,
"learning_rate": 9.870799237583586e-06,
"loss": 0.4622,
"mean_token_accuracy": 0.8463167697191238,
"num_tokens": 109613270.0,
"step": 191
},
{
"epoch": 1.4665391969407267,
"grad_norm": 0.13413337229810696,
"learning_rate": 9.869441615907176e-06,
"loss": 0.4905,
"mean_token_accuracy": 0.8371755108237267,
"num_tokens": 110189270.0,
"step": 192
},
{
"epoch": 1.474187380497132,
"grad_norm": 0.13213885141206153,
"learning_rate": 9.868076992969672e-06,
"loss": 0.4629,
"mean_token_accuracy": 0.8472193330526352,
"num_tokens": 110765270.0,
"step": 193
},
{
"epoch": 1.4818355640535372,
"grad_norm": 0.14284631334355893,
"learning_rate": 9.866705370733126e-06,
"loss": 0.4759,
"mean_token_accuracy": 0.8413978591561317,
"num_tokens": 111330965.0,
"step": 194
},
{
"epoch": 1.4894837476099427,
"grad_norm": 0.14434856966382234,
"learning_rate": 9.865326751169648e-06,
"loss": 0.4682,
"mean_token_accuracy": 0.8441445603966713,
"num_tokens": 111906965.0,
"step": 195
},
{
"epoch": 1.497131931166348,
"grad_norm": 0.13331488674498373,
"learning_rate": 9.863941136261409e-06,
"loss": 0.4743,
"mean_token_accuracy": 0.842158354818821,
"num_tokens": 112482965.0,
"step": 196
},
{
"epoch": 1.5047801147227533,
"grad_norm": 0.1333685996060613,
"learning_rate": 9.862548528000644e-06,
"loss": 0.4833,
"mean_token_accuracy": 0.8400801345705986,
"num_tokens": 113058965.0,
"step": 197
},
{
"epoch": 1.5124282982791586,
"grad_norm": 0.13278247248038624,
"learning_rate": 9.861148928389634e-06,
"loss": 0.4886,
"mean_token_accuracy": 0.8382901325821877,
"num_tokens": 113634965.0,
"step": 198
},
{
"epoch": 1.520076481835564,
"grad_norm": 0.13693399891139466,
"learning_rate": 9.859742339440723e-06,
"loss": 0.4822,
"mean_token_accuracy": 0.8395071998238564,
"num_tokens": 114210965.0,
"step": 199
},
{
"epoch": 1.5277246653919694,
"grad_norm": 0.14014126230020535,
"learning_rate": 9.858328763176294e-06,
"loss": 0.509,
"mean_token_accuracy": 0.8321307003498077,
"num_tokens": 114785906.0,
"step": 200
},
{
"epoch": 1.535372848948375,
"grad_norm": 0.13812738362519864,
"learning_rate": 9.85690820162878e-06,
"loss": 0.495,
"mean_token_accuracy": 0.8362865820527077,
"num_tokens": 115361906.0,
"step": 201
},
{
"epoch": 1.5430210325047802,
"grad_norm": 0.12908599718928895,
"learning_rate": 9.855480656840662e-06,
"loss": 0.471,
"mean_token_accuracy": 0.843786895275116,
"num_tokens": 115937906.0,
"step": 202
},
{
"epoch": 1.5506692160611855,
"grad_norm": 0.13168108610671495,
"learning_rate": 9.854046130864454e-06,
"loss": 0.4765,
"mean_token_accuracy": 0.8418128415942192,
"num_tokens": 116513906.0,
"step": 203
},
{
"epoch": 1.5583173996175907,
"grad_norm": 0.1374975523241788,
"learning_rate": 9.852604625762712e-06,
"loss": 0.4502,
"mean_token_accuracy": 0.8503288328647614,
"num_tokens": 117089906.0,
"step": 204
},
{
"epoch": 1.565965583173996,
"grad_norm": 0.13716664205655626,
"learning_rate": 9.851156143608025e-06,
"loss": 0.4848,
"mean_token_accuracy": 0.8381772711873055,
"num_tokens": 117665906.0,
"step": 205
},
{
"epoch": 1.5736137667304015,
"grad_norm": 0.19260127106490352,
"learning_rate": 9.849700686483016e-06,
"loss": 0.4606,
"mean_token_accuracy": 0.8471116870641708,
"num_tokens": 118241906.0,
"step": 206
},
{
"epoch": 1.581261950286807,
"grad_norm": 0.1285761532416256,
"learning_rate": 9.848238256480329e-06,
"loss": 0.4689,
"mean_token_accuracy": 0.8435264676809311,
"num_tokens": 118817906.0,
"step": 207
},
{
"epoch": 1.5889101338432123,
"grad_norm": 0.13245065174094972,
"learning_rate": 9.846768855702646e-06,
"loss": 0.4931,
"mean_token_accuracy": 0.8375678807497025,
"num_tokens": 119393906.0,
"step": 208
},
{
"epoch": 1.5965583173996176,
"grad_norm": 0.13171290320659565,
"learning_rate": 9.845292486262664e-06,
"loss": 0.4801,
"mean_token_accuracy": 0.8411461487412453,
"num_tokens": 119969906.0,
"step": 209
},
{
"epoch": 1.604206500956023,
"grad_norm": 0.1382945206321001,
"learning_rate": 9.843809150283096e-06,
"loss": 0.5201,
"mean_token_accuracy": 0.8290223926305771,
"num_tokens": 120545906.0,
"step": 210
},
{
"epoch": 1.6118546845124282,
"grad_norm": 0.1308311967900458,
"learning_rate": 9.842318849896679e-06,
"loss": 0.4811,
"mean_token_accuracy": 0.8402815386652946,
"num_tokens": 121121906.0,
"step": 211
},
{
"epoch": 1.6195028680688337,
"grad_norm": 0.13179762296615236,
"learning_rate": 9.840821587246158e-06,
"loss": 0.4841,
"mean_token_accuracy": 0.8397172838449478,
"num_tokens": 121697906.0,
"step": 212
},
{
"epoch": 1.627151051625239,
"grad_norm": 0.13092322919750846,
"learning_rate": 9.839317364484295e-06,
"loss": 0.4857,
"mean_token_accuracy": 0.8396096378564835,
"num_tokens": 122273906.0,
"step": 213
},
{
"epoch": 1.6347992351816445,
"grad_norm": 0.1406807481437863,
"learning_rate": 9.837806183773851e-06,
"loss": 0.5009,
"mean_token_accuracy": 0.8343264311552048,
"num_tokens": 122849906.0,
"step": 214
},
{
"epoch": 1.6424474187380498,
"grad_norm": 0.15127230928084542,
"learning_rate": 9.836288047287593e-06,
"loss": 0.5012,
"mean_token_accuracy": 0.8336441144347191,
"num_tokens": 123425906.0,
"step": 215
},
{
"epoch": 1.650095602294455,
"grad_norm": 0.13764770828391903,
"learning_rate": 9.834762957208293e-06,
"loss": 0.4933,
"mean_token_accuracy": 0.8366129845380783,
"num_tokens": 124001906.0,
"step": 216
},
{
"epoch": 1.6577437858508604,
"grad_norm": 0.1309674988371225,
"learning_rate": 9.83323091572872e-06,
"loss": 0.5042,
"mean_token_accuracy": 0.8334027752280235,
"num_tokens": 124577906.0,
"step": 217
},
{
"epoch": 1.6653919694072656,
"grad_norm": 0.1416672912642957,
"learning_rate": 9.831691925051634e-06,
"loss": 0.469,
"mean_token_accuracy": 0.8436514809727669,
"num_tokens": 125153906.0,
"step": 218
},
{
"epoch": 1.6730401529636711,
"grad_norm": 0.13213277792263045,
"learning_rate": 9.83014598738979e-06,
"loss": 0.4744,
"mean_token_accuracy": 0.8429621979594231,
"num_tokens": 125729906.0,
"step": 219
},
{
"epoch": 1.6806883365200764,
"grad_norm": 0.14828248446823036,
"learning_rate": 9.82859310496593e-06,
"loss": 0.476,
"mean_token_accuracy": 0.841729499399662,
"num_tokens": 126305906.0,
"step": 220
},
{
"epoch": 1.688336520076482,
"grad_norm": 0.13273638622183012,
"learning_rate": 9.827033280012783e-06,
"loss": 0.4601,
"mean_token_accuracy": 0.8464536741375923,
"num_tokens": 126881906.0,
"step": 221
},
{
"epoch": 1.6959847036328872,
"grad_norm": 0.1371281964569988,
"learning_rate": 9.825466514773054e-06,
"loss": 0.5065,
"mean_token_accuracy": 0.8331058844923973,
"num_tokens": 127457906.0,
"step": 222
},
{
"epoch": 1.7036328871892925,
"grad_norm": 0.13113344781507827,
"learning_rate": 9.823892811499435e-06,
"loss": 0.4751,
"mean_token_accuracy": 0.8415020853281021,
"num_tokens": 128033906.0,
"step": 223
},
{
"epoch": 1.7112810707456978,
"grad_norm": 0.1377436100503237,
"learning_rate": 9.822312172454587e-06,
"loss": 0.52,
"mean_token_accuracy": 0.8276021778583527,
"num_tokens": 128609906.0,
"step": 224
},
{
"epoch": 1.7189292543021033,
"grad_norm": 0.12699707417451003,
"learning_rate": 9.820724599911147e-06,
"loss": 0.446,
"mean_token_accuracy": 0.8517074510455132,
"num_tokens": 129169503.0,
"step": 225
},
{
"epoch": 1.7265774378585086,
"grad_norm": 0.12634559250621336,
"learning_rate": 9.819130096151718e-06,
"loss": 0.4544,
"mean_token_accuracy": 0.8480943590402603,
"num_tokens": 129745503.0,
"step": 226
},
{
"epoch": 1.734225621414914,
"grad_norm": 0.12958989836341028,
"learning_rate": 9.817528663468873e-06,
"loss": 0.4783,
"mean_token_accuracy": 0.8412247076630592,
"num_tokens": 130319403.0,
"step": 227
},
{
"epoch": 1.7418738049713194,
"grad_norm": 0.1372257707417811,
"learning_rate": 9.815920304165144e-06,
"loss": 0.4881,
"mean_token_accuracy": 0.8391176462173462,
"num_tokens": 130881510.0,
"step": 228
},
{
"epoch": 1.7495219885277247,
"grad_norm": 0.13053898036659867,
"learning_rate": 9.81430502055302e-06,
"loss": 0.4723,
"mean_token_accuracy": 0.8423580154776573,
"num_tokens": 131457510.0,
"step": 229
},
{
"epoch": 1.75717017208413,
"grad_norm": 0.13772367894783863,
"learning_rate": 9.81268281495495e-06,
"loss": 0.4873,
"mean_token_accuracy": 0.8386755734682083,
"num_tokens": 132033510.0,
"step": 230
},
{
"epoch": 1.7648183556405352,
"grad_norm": 0.1357109696346542,
"learning_rate": 9.811053689703333e-06,
"loss": 0.5173,
"mean_token_accuracy": 0.8325728848576546,
"num_tokens": 132609510.0,
"step": 231
},
{
"epoch": 1.7724665391969407,
"grad_norm": 0.1279722497480012,
"learning_rate": 9.809417647140522e-06,
"loss": 0.4733,
"mean_token_accuracy": 0.8432070016860962,
"num_tokens": 133185510.0,
"step": 232
},
{
"epoch": 1.780114722753346,
"grad_norm": 0.14056689318218055,
"learning_rate": 9.807774689618806e-06,
"loss": 0.5136,
"mean_token_accuracy": 0.8318975046277046,
"num_tokens": 133761510.0,
"step": 233
},
{
"epoch": 1.7877629063097515,
"grad_norm": 0.13180288049266284,
"learning_rate": 9.806124819500427e-06,
"loss": 0.4863,
"mean_token_accuracy": 0.8381616622209549,
"num_tokens": 134337510.0,
"step": 234
},
{
"epoch": 1.7954110898661568,
"grad_norm": 0.14261327766170212,
"learning_rate": 9.804468039157557e-06,
"loss": 0.4865,
"mean_token_accuracy": 0.8391356617212296,
"num_tokens": 134913510.0,
"step": 235
},
{
"epoch": 1.8030592734225621,
"grad_norm": 0.1415890220842668,
"learning_rate": 9.802804350972308e-06,
"loss": 0.5261,
"mean_token_accuracy": 0.8269285559654236,
"num_tokens": 135489510.0,
"step": 236
},
{
"epoch": 1.8107074569789674,
"grad_norm": 0.13577919369547242,
"learning_rate": 9.801133757336726e-06,
"loss": 0.475,
"mean_token_accuracy": 0.8420576602220535,
"num_tokens": 136065510.0,
"step": 237
},
{
"epoch": 1.8183556405353727,
"grad_norm": 0.13887863894469682,
"learning_rate": 9.799456260652778e-06,
"loss": 0.4676,
"mean_token_accuracy": 0.8456170037388802,
"num_tokens": 136625705.0,
"step": 238
},
{
"epoch": 1.8260038240917782,
"grad_norm": 0.1329581032736448,
"learning_rate": 9.797771863332365e-06,
"loss": 0.468,
"mean_token_accuracy": 0.8451133370399475,
"num_tokens": 137201705.0,
"step": 239
},
{
"epoch": 1.8336520076481837,
"grad_norm": 0.14266764923168374,
"learning_rate": 9.796080567797304e-06,
"loss": 0.4875,
"mean_token_accuracy": 0.8391078859567642,
"num_tokens": 137777705.0,
"step": 240
},
{
"epoch": 1.841300191204589,
"grad_norm": 0.13123578796297256,
"learning_rate": 9.794382376479334e-06,
"loss": 0.4801,
"mean_token_accuracy": 0.8414048627018929,
"num_tokens": 138353705.0,
"step": 241
},
{
"epoch": 1.8489483747609943,
"grad_norm": 0.13763073917570376,
"learning_rate": 9.792677291820107e-06,
"loss": 0.4785,
"mean_token_accuracy": 0.8403075709939003,
"num_tokens": 138929705.0,
"step": 242
},
{
"epoch": 1.8565965583173996,
"grad_norm": 0.1339684449373738,
"learning_rate": 9.790965316271183e-06,
"loss": 0.4728,
"mean_token_accuracy": 0.8434257805347443,
"num_tokens": 139505705.0,
"step": 243
},
{
"epoch": 1.8642447418738048,
"grad_norm": 0.13563074491467572,
"learning_rate": 9.789246452294034e-06,
"loss": 0.4885,
"mean_token_accuracy": 0.8397867307066917,
"num_tokens": 140081705.0,
"step": 244
},
{
"epoch": 1.8718929254302104,
"grad_norm": 0.14368095826727606,
"learning_rate": 9.787520702360035e-06,
"loss": 0.4808,
"mean_token_accuracy": 0.840397872030735,
"num_tokens": 140657705.0,
"step": 245
},
{
"epoch": 1.8795411089866156,
"grad_norm": 0.14397182347295984,
"learning_rate": 9.785788068950463e-06,
"loss": 0.5054,
"mean_token_accuracy": 0.8325815722346306,
"num_tokens": 141233705.0,
"step": 246
},
{
"epoch": 1.8871892925430211,
"grad_norm": 0.1406587307230205,
"learning_rate": 9.784048554556488e-06,
"loss": 0.5024,
"mean_token_accuracy": 0.8344358131289482,
"num_tokens": 141809705.0,
"step": 247
},
{
"epoch": 1.8948374760994264,
"grad_norm": 0.1346925448003243,
"learning_rate": 9.782302161679177e-06,
"loss": 0.4846,
"mean_token_accuracy": 0.8397086039185524,
"num_tokens": 142385705.0,
"step": 248
},
{
"epoch": 1.9024856596558317,
"grad_norm": 0.13981403945386878,
"learning_rate": 9.780548892829486e-06,
"loss": 0.4886,
"mean_token_accuracy": 0.8380331769585609,
"num_tokens": 142961705.0,
"step": 249
},
{
"epoch": 1.910133843212237,
"grad_norm": 0.13251833042206107,
"learning_rate": 9.778788750528257e-06,
"loss": 0.4667,
"mean_token_accuracy": 0.8444865867495537,
"num_tokens": 143537705.0,
"step": 250
},
{
"epoch": 1.9177820267686423,
"grad_norm": 0.14376049074556502,
"learning_rate": 9.777021737306214e-06,
"loss": 0.4857,
"mean_token_accuracy": 0.8387880772352219,
"num_tokens": 144099211.0,
"step": 251
},
{
"epoch": 1.9254302103250478,
"grad_norm": 0.14486384551210302,
"learning_rate": 9.775247855703962e-06,
"loss": 0.4826,
"mean_token_accuracy": 0.8404586315155029,
"num_tokens": 144675211.0,
"step": 252
},
{
"epoch": 1.9330783938814533,
"grad_norm": 0.1388194492997388,
"learning_rate": 9.773467108271978e-06,
"loss": 0.51,
"mean_token_accuracy": 0.8311353400349617,
"num_tokens": 145251211.0,
"step": 253
},
{
"epoch": 1.9407265774378586,
"grad_norm": 0.13026081138638712,
"learning_rate": 9.771679497570614e-06,
"loss": 0.4799,
"mean_token_accuracy": 0.8409708067774773,
"num_tokens": 145827211.0,
"step": 254
},
{
"epoch": 1.9483747609942639,
"grad_norm": 0.1430745806772645,
"learning_rate": 9.769885026170088e-06,
"loss": 0.4629,
"mean_token_accuracy": 0.8461342081427574,
"num_tokens": 146403211.0,
"step": 255
},
{
"epoch": 1.9560229445506692,
"grad_norm": 0.1423170947817084,
"learning_rate": 9.768083696650481e-06,
"loss": 0.4964,
"mean_token_accuracy": 0.8357796147465706,
"num_tokens": 146979211.0,
"step": 256
},
{
"epoch": 1.9636711281070744,
"grad_norm": 0.1412781732428823,
"learning_rate": 9.766275511601742e-06,
"loss": 0.4685,
"mean_token_accuracy": 0.84524355083704,
"num_tokens": 147555211.0,
"step": 257
},
{
"epoch": 1.97131931166348,
"grad_norm": 0.1347390798520367,
"learning_rate": 9.764460473623665e-06,
"loss": 0.4964,
"mean_token_accuracy": 0.8352379277348518,
"num_tokens": 148131211.0,
"step": 258
},
{
"epoch": 1.9789674952198852,
"grad_norm": 0.140319211357454,
"learning_rate": 9.762638585325907e-06,
"loss": 0.4651,
"mean_token_accuracy": 0.8467453494668007,
"num_tokens": 148707211.0,
"step": 259
},
{
"epoch": 1.9866156787762907,
"grad_norm": 0.14587479148422647,
"learning_rate": 9.760809849327967e-06,
"loss": 0.4859,
"mean_token_accuracy": 0.8388071581721306,
"num_tokens": 149261156.0,
"step": 260
},
{
"epoch": 1.994263862332696,
"grad_norm": 0.15571274133757082,
"learning_rate": 9.7589742682592e-06,
"loss": 0.4865,
"mean_token_accuracy": 0.8390511944890022,
"num_tokens": 149814426.0,
"step": 261
},
{
"epoch": 2.0,
"grad_norm": 0.1656455865676849,
"learning_rate": 9.75713184475879e-06,
"loss": 0.5189,
"mean_token_accuracy": 0.8292289972305298,
"num_tokens": 150246426.0,
"step": 262
},
{
"epoch": 2.0076481835564053,
"grad_norm": 0.13050271447099013,
"learning_rate": 9.755282581475769e-06,
"loss": 0.4564,
"mean_token_accuracy": 0.8467505499720573,
"num_tokens": 150822426.0,
"step": 263
},
{
"epoch": 2.0152963671128106,
"grad_norm": 0.1336370185440778,
"learning_rate": 9.753426481068998e-06,
"loss": 0.4913,
"mean_token_accuracy": 0.8365452736616135,
"num_tokens": 151398426.0,
"step": 264
},
{
"epoch": 2.022944550669216,
"grad_norm": 0.1382072756245929,
"learning_rate": 9.751563546207167e-06,
"loss": 0.4878,
"mean_token_accuracy": 0.8377542123198509,
"num_tokens": 151952371.0,
"step": 265
},
{
"epoch": 2.0305927342256216,
"grad_norm": 0.14387760662210322,
"learning_rate": 9.749693779568799e-06,
"loss": 0.5203,
"mean_token_accuracy": 0.827904649078846,
"num_tokens": 152520866.0,
"step": 266
},
{
"epoch": 2.038240917782027,
"grad_norm": 0.14163598725064547,
"learning_rate": 9.747817183842235e-06,
"loss": 0.486,
"mean_token_accuracy": 0.8394655287265778,
"num_tokens": 153096866.0,
"step": 267
},
{
"epoch": 2.045889101338432,
"grad_norm": 0.1357429538351474,
"learning_rate": 9.745933761725635e-06,
"loss": 0.4392,
"mean_token_accuracy": 0.8543984368443489,
"num_tokens": 153672866.0,
"step": 268
},
{
"epoch": 2.0535372848948374,
"grad_norm": 0.14159049917519523,
"learning_rate": 9.744043515926975e-06,
"loss": 0.4704,
"mean_token_accuracy": 0.8423840552568436,
"num_tokens": 154248866.0,
"step": 269
},
{
"epoch": 2.0611854684512427,
"grad_norm": 0.140767759584201,
"learning_rate": 9.742146449164045e-06,
"loss": 0.4657,
"mean_token_accuracy": 0.8441150411963463,
"num_tokens": 154824866.0,
"step": 270
},
{
"epoch": 2.068833652007648,
"grad_norm": 0.1376003238674713,
"learning_rate": 9.740242564164433e-06,
"loss": 0.4829,
"mean_token_accuracy": 0.8394047617912292,
"num_tokens": 155400866.0,
"step": 271
},
{
"epoch": 2.0764818355640537,
"grad_norm": 0.14234028861993708,
"learning_rate": 9.738331863665541e-06,
"loss": 0.4541,
"mean_token_accuracy": 0.8469480872154236,
"num_tokens": 155954136.0,
"step": 272
},
{
"epoch": 2.084130019120459,
"grad_norm": 0.12785940871582666,
"learning_rate": 9.736414350414564e-06,
"loss": 0.4328,
"mean_token_accuracy": 0.8540546894073486,
"num_tokens": 156530136.0,
"step": 273
},
{
"epoch": 2.0917782026768643,
"grad_norm": 0.13077439345298142,
"learning_rate": 9.734490027168494e-06,
"loss": 0.4536,
"mean_token_accuracy": 0.8471047431230545,
"num_tokens": 157106136.0,
"step": 274
},
{
"epoch": 2.0994263862332696,
"grad_norm": 0.13230573836928355,
"learning_rate": 9.732558896694114e-06,
"loss": 0.4722,
"mean_token_accuracy": 0.842342384159565,
"num_tokens": 157682136.0,
"step": 275
},
{
"epoch": 2.107074569789675,
"grad_norm": 0.13384258031186463,
"learning_rate": 9.730620961767996e-06,
"loss": 0.4693,
"mean_token_accuracy": 0.8435490280389786,
"num_tokens": 158258136.0,
"step": 276
},
{
"epoch": 2.11472275334608,
"grad_norm": 0.13295124171892045,
"learning_rate": 9.72867622517649e-06,
"loss": 0.4848,
"mean_token_accuracy": 0.8387293890118599,
"num_tokens": 158834136.0,
"step": 277
},
{
"epoch": 2.1223709369024855,
"grad_norm": 0.13698639518555758,
"learning_rate": 9.726724689715734e-06,
"loss": 0.4709,
"mean_token_accuracy": 0.8430733233690262,
"num_tokens": 159410136.0,
"step": 278
},
{
"epoch": 2.130019120458891,
"grad_norm": 0.131887831277675,
"learning_rate": 9.724766358191635e-06,
"loss": 0.482,
"mean_token_accuracy": 0.8394516482949257,
"num_tokens": 159986136.0,
"step": 279
},
{
"epoch": 2.1376673040152965,
"grad_norm": 0.14891285174122362,
"learning_rate": 9.722801233419873e-06,
"loss": 0.4966,
"mean_token_accuracy": 0.8344601169228554,
"num_tokens": 160562136.0,
"step": 280
},
{
"epoch": 2.1453154875717018,
"grad_norm": 0.13274530265535694,
"learning_rate": 9.720829318225897e-06,
"loss": 0.4412,
"mean_token_accuracy": 0.8516188189387321,
"num_tokens": 161138136.0,
"step": 281
},
{
"epoch": 2.152963671128107,
"grad_norm": 0.12558989675749826,
"learning_rate": 9.718850615444915e-06,
"loss": 0.4617,
"mean_token_accuracy": 0.8458894118666649,
"num_tokens": 161714136.0,
"step": 282
},
{
"epoch": 2.1606118546845123,
"grad_norm": 0.13805526111149538,
"learning_rate": 9.7168651279219e-06,
"loss": 0.476,
"mean_token_accuracy": 0.84109927713871,
"num_tokens": 162290136.0,
"step": 283
},
{
"epoch": 2.1682600382409176,
"grad_norm": 0.1365628904112165,
"learning_rate": 9.714872858511574e-06,
"loss": 0.4813,
"mean_token_accuracy": 0.8389794006943703,
"num_tokens": 162866136.0,
"step": 284
},
{
"epoch": 2.1759082217973233,
"grad_norm": 0.14202588056377932,
"learning_rate": 9.712873810078415e-06,
"loss": 0.4731,
"mean_token_accuracy": 0.8429031893610954,
"num_tokens": 163442136.0,
"step": 285
},
{
"epoch": 2.1835564053537286,
"grad_norm": 0.14026758376596035,
"learning_rate": 9.710867985496644e-06,
"loss": 0.4828,
"mean_token_accuracy": 0.8403249308466911,
"num_tokens": 164018136.0,
"step": 286
},
{
"epoch": 2.191204588910134,
"grad_norm": 0.1268634171589506,
"learning_rate": 9.708855387650229e-06,
"loss": 0.4566,
"mean_token_accuracy": 0.8471759259700775,
"num_tokens": 164594136.0,
"step": 287
},
{
"epoch": 2.198852772466539,
"grad_norm": 0.1361940615902871,
"learning_rate": 9.706836019432872e-06,
"loss": 0.4843,
"mean_token_accuracy": 0.8384498655796051,
"num_tokens": 165170136.0,
"step": 288
},
{
"epoch": 2.2065009560229445,
"grad_norm": 0.13071252562608368,
"learning_rate": 9.704809883748012e-06,
"loss": 0.4619,
"mean_token_accuracy": 0.8458164930343628,
"num_tokens": 165746136.0,
"step": 289
},
{
"epoch": 2.21414913957935,
"grad_norm": 0.13749244273488634,
"learning_rate": 9.70277698350882e-06,
"loss": 0.4763,
"mean_token_accuracy": 0.8413892164826393,
"num_tokens": 166322136.0,
"step": 290
},
{
"epoch": 2.221797323135755,
"grad_norm": 0.13653291312402388,
"learning_rate": 9.700737321638185e-06,
"loss": 0.494,
"mean_token_accuracy": 0.8379845768213272,
"num_tokens": 166898136.0,
"step": 291
},
{
"epoch": 2.229445506692161,
"grad_norm": 0.1314091235725935,
"learning_rate": 9.69869090106873e-06,
"loss": 0.4553,
"mean_token_accuracy": 0.8476238548755646,
"num_tokens": 167474136.0,
"step": 292
},
{
"epoch": 2.237093690248566,
"grad_norm": 0.1306318774358581,
"learning_rate": 9.696637724742785e-06,
"loss": 0.4588,
"mean_token_accuracy": 0.8466134071350098,
"num_tokens": 168050136.0,
"step": 293
},
{
"epoch": 2.2447418738049714,
"grad_norm": 0.1320429519519253,
"learning_rate": 9.6945777956124e-06,
"loss": 0.4893,
"mean_token_accuracy": 0.8379810974001884,
"num_tokens": 168626136.0,
"step": 294
},
{
"epoch": 2.2523900573613767,
"grad_norm": 0.12581567893697732,
"learning_rate": 9.69251111663933e-06,
"loss": 0.4421,
"mean_token_accuracy": 0.8516535460948944,
"num_tokens": 169202136.0,
"step": 295
},
{
"epoch": 2.260038240917782,
"grad_norm": 0.15120633186562035,
"learning_rate": 9.690437690795038e-06,
"loss": 0.4828,
"mean_token_accuracy": 0.8403878286480904,
"num_tokens": 169764783.0,
"step": 296
},
{
"epoch": 2.2676864244741872,
"grad_norm": 0.14083593416082985,
"learning_rate": 9.688357521060685e-06,
"loss": 0.4997,
"mean_token_accuracy": 0.8348125591874123,
"num_tokens": 170340783.0,
"step": 297
},
{
"epoch": 2.275334608030593,
"grad_norm": 0.13552190258745817,
"learning_rate": 9.686270610427131e-06,
"loss": 0.4831,
"mean_token_accuracy": 0.8392866998910904,
"num_tokens": 170916783.0,
"step": 298
},
{
"epoch": 2.2829827915869982,
"grad_norm": 0.1301480903850601,
"learning_rate": 9.684176961894927e-06,
"loss": 0.4668,
"mean_token_accuracy": 0.8437822833657265,
"num_tokens": 171490115.0,
"step": 299
},
{
"epoch": 2.2906309751434035,
"grad_norm": 0.14821266280449158,
"learning_rate": 9.682076578474308e-06,
"loss": 0.4986,
"mean_token_accuracy": 0.8342083841562271,
"num_tokens": 172066115.0,
"step": 300
},
{
"epoch": 2.298279158699809,
"grad_norm": 0.13381251110623754,
"learning_rate": 9.6799694631852e-06,
"loss": 0.4603,
"mean_token_accuracy": 0.8468859866261482,
"num_tokens": 172642115.0,
"step": 301
},
{
"epoch": 2.305927342256214,
"grad_norm": 0.13169362581883381,
"learning_rate": 9.677855619057202e-06,
"loss": 0.4604,
"mean_token_accuracy": 0.8466967418789864,
"num_tokens": 173218115.0,
"step": 302
},
{
"epoch": 2.3135755258126194,
"grad_norm": 0.13195788424828656,
"learning_rate": 9.675735049129588e-06,
"loss": 0.4621,
"mean_token_accuracy": 0.8446150496602058,
"num_tokens": 173794115.0,
"step": 303
},
{
"epoch": 2.3212237093690247,
"grad_norm": 0.14122025118479573,
"learning_rate": 9.673607756451306e-06,
"loss": 0.4349,
"mean_token_accuracy": 0.853457435965538,
"num_tokens": 174370115.0,
"step": 304
},
{
"epoch": 2.3288718929254304,
"grad_norm": 0.12878410078634828,
"learning_rate": 9.67147374408097e-06,
"loss": 0.4447,
"mean_token_accuracy": 0.8515077084302902,
"num_tokens": 174946115.0,
"step": 305
},
{
"epoch": 2.3365200764818357,
"grad_norm": 0.125714053577621,
"learning_rate": 9.669333015086847e-06,
"loss": 0.4384,
"mean_token_accuracy": 0.8537577912211418,
"num_tokens": 175522115.0,
"step": 306
},
{
"epoch": 2.344168260038241,
"grad_norm": 0.1364732476611976,
"learning_rate": 9.667185572546871e-06,
"loss": 0.4656,
"mean_token_accuracy": 0.8443147018551826,
"num_tokens": 176098115.0,
"step": 307
},
{
"epoch": 2.3518164435946463,
"grad_norm": 0.13118221069966887,
"learning_rate": 9.665031419548625e-06,
"loss": 0.4533,
"mean_token_accuracy": 0.8475735187530518,
"num_tokens": 176674115.0,
"step": 308
},
{
"epoch": 2.3594646271510515,
"grad_norm": 0.13021585818887338,
"learning_rate": 9.662870559189344e-06,
"loss": 0.4772,
"mean_token_accuracy": 0.8415628522634506,
"num_tokens": 177250115.0,
"step": 309
},
{
"epoch": 2.367112810707457,
"grad_norm": 0.1296491093025139,
"learning_rate": 9.660702994575896e-06,
"loss": 0.4504,
"mean_token_accuracy": 0.8494465947151184,
"num_tokens": 177815810.0,
"step": 310
},
{
"epoch": 2.3747609942638626,
"grad_norm": 0.1269070770369225,
"learning_rate": 9.658528728824799e-06,
"loss": 0.4454,
"mean_token_accuracy": 0.851382702589035,
"num_tokens": 178391810.0,
"step": 311
},
{
"epoch": 2.382409177820268,
"grad_norm": 0.13236942393674378,
"learning_rate": 9.656347765062206e-06,
"loss": 0.4959,
"mean_token_accuracy": 0.8356025218963623,
"num_tokens": 178967810.0,
"step": 312
},
{
"epoch": 2.390057361376673,
"grad_norm": 0.13431040882625667,
"learning_rate": 9.654160106423891e-06,
"loss": 0.4647,
"mean_token_accuracy": 0.8447903171181679,
"num_tokens": 179541447.0,
"step": 313
},
{
"epoch": 2.3977055449330784,
"grad_norm": 0.1304990964909841,
"learning_rate": 9.651965756055262e-06,
"loss": 0.4733,
"mean_token_accuracy": 0.8432052731513977,
"num_tokens": 180117447.0,
"step": 314
},
{
"epoch": 2.4053537284894837,
"grad_norm": 0.13562049534152407,
"learning_rate": 9.649764717111348e-06,
"loss": 0.4827,
"mean_token_accuracy": 0.8400332629680634,
"num_tokens": 180693447.0,
"step": 315
},
{
"epoch": 2.413001912045889,
"grad_norm": 0.1322708589540597,
"learning_rate": 9.647556992756789e-06,
"loss": 0.4685,
"mean_token_accuracy": 0.8437608480453491,
"num_tokens": 181269447.0,
"step": 316
},
{
"epoch": 2.4206500956022943,
"grad_norm": 0.13106792460307698,
"learning_rate": 9.645342586165845e-06,
"loss": 0.4676,
"mean_token_accuracy": 0.8447365760803223,
"num_tokens": 181845447.0,
"step": 317
},
{
"epoch": 2.4282982791587,
"grad_norm": 0.12774223293558123,
"learning_rate": 9.643121500522377e-06,
"loss": 0.4783,
"mean_token_accuracy": 0.841241642832756,
"num_tokens": 182421447.0,
"step": 318
},
{
"epoch": 2.4359464627151053,
"grad_norm": 0.13329940881644012,
"learning_rate": 9.640893739019852e-06,
"loss": 0.4558,
"mean_token_accuracy": 0.8470873907208443,
"num_tokens": 182997447.0,
"step": 319
},
{
"epoch": 2.4435946462715106,
"grad_norm": 0.1389284366392599,
"learning_rate": 9.638659304861336e-06,
"loss": 0.4873,
"mean_token_accuracy": 0.8378456756472588,
"num_tokens": 183573447.0,
"step": 320
},
{
"epoch": 2.451242829827916,
"grad_norm": 0.13022523574202438,
"learning_rate": 9.63641820125949e-06,
"loss": 0.4532,
"mean_token_accuracy": 0.8482332676649094,
"num_tokens": 184149447.0,
"step": 321
},
{
"epoch": 2.458891013384321,
"grad_norm": 0.12869518334269098,
"learning_rate": 9.63417043143656e-06,
"loss": 0.4364,
"mean_token_accuracy": 0.8536501526832581,
"num_tokens": 184725447.0,
"step": 322
},
{
"epoch": 2.4665391969407264,
"grad_norm": 0.1685448514608509,
"learning_rate": 9.631915998624382e-06,
"loss": 0.4801,
"mean_token_accuracy": 0.8401600047945976,
"num_tokens": 185301447.0,
"step": 323
},
{
"epoch": 2.474187380497132,
"grad_norm": 0.13160582073089025,
"learning_rate": 9.629654906064365e-06,
"loss": 0.4649,
"mean_token_accuracy": 0.8448563814163208,
"num_tokens": 185877447.0,
"step": 324
},
{
"epoch": 2.4818355640535374,
"grad_norm": 0.1367794173463771,
"learning_rate": 9.627387157007502e-06,
"loss": 0.4883,
"mean_token_accuracy": 0.8361355364322662,
"num_tokens": 186453447.0,
"step": 325
},
{
"epoch": 2.4894837476099427,
"grad_norm": 0.13222850798423158,
"learning_rate": 9.62511275471435e-06,
"loss": 0.4677,
"mean_token_accuracy": 0.8434003219008446,
"num_tokens": 187010772.0,
"step": 326
},
{
"epoch": 2.497131931166348,
"grad_norm": 0.15953227190496977,
"learning_rate": 9.622831702455035e-06,
"loss": 0.4831,
"mean_token_accuracy": 0.8401443809270859,
"num_tokens": 187586772.0,
"step": 327
},
{
"epoch": 2.5047801147227533,
"grad_norm": 0.1298624067386934,
"learning_rate": 9.620544003509243e-06,
"loss": 0.4741,
"mean_token_accuracy": 0.8434900045394897,
"num_tokens": 188162772.0,
"step": 328
},
{
"epoch": 2.5124282982791586,
"grad_norm": 0.13840666580879918,
"learning_rate": 9.618249661166218e-06,
"loss": 0.4923,
"mean_token_accuracy": 0.8363108858466148,
"num_tokens": 188738772.0,
"step": 329
},
{
"epoch": 2.520076481835564,
"grad_norm": 0.12560912611469238,
"learning_rate": 9.615948678724756e-06,
"loss": 0.4396,
"mean_token_accuracy": 0.8522473201155663,
"num_tokens": 189314772.0,
"step": 330
},
{
"epoch": 2.527724665391969,
"grad_norm": 0.1296554841366996,
"learning_rate": 9.613641059493197e-06,
"loss": 0.4615,
"mean_token_accuracy": 0.8463234454393387,
"num_tokens": 189890772.0,
"step": 331
},
{
"epoch": 2.535372848948375,
"grad_norm": 0.14613127575466625,
"learning_rate": 9.611326806789424e-06,
"loss": 0.4799,
"mean_token_accuracy": 0.8401964753866196,
"num_tokens": 190466772.0,
"step": 332
},
{
"epoch": 2.54302103250478,
"grad_norm": 0.13394861492299956,
"learning_rate": 9.609005923940865e-06,
"loss": 0.455,
"mean_token_accuracy": 0.8472384214401245,
"num_tokens": 191042772.0,
"step": 333
},
{
"epoch": 2.5506692160611855,
"grad_norm": 0.12785020969707311,
"learning_rate": 9.606678414284469e-06,
"loss": 0.45,
"mean_token_accuracy": 0.8496569246053696,
"num_tokens": 191618772.0,
"step": 334
},
{
"epoch": 2.5583173996175907,
"grad_norm": 0.13667091866209735,
"learning_rate": 9.604344281166721e-06,
"loss": 0.4569,
"mean_token_accuracy": 0.8474294021725655,
"num_tokens": 192194772.0,
"step": 335
},
{
"epoch": 2.565965583173996,
"grad_norm": 0.14170531764608627,
"learning_rate": 9.602003527943629e-06,
"loss": 0.4841,
"mean_token_accuracy": 0.838857851922512,
"num_tokens": 192770772.0,
"step": 336
},
{
"epoch": 2.5736137667304018,
"grad_norm": 0.14203506631993854,
"learning_rate": 9.599656157980715e-06,
"loss": 0.4711,
"mean_token_accuracy": 0.8428667038679123,
"num_tokens": 193346772.0,
"step": 337
},
{
"epoch": 2.581261950286807,
"grad_norm": 0.13109007712840628,
"learning_rate": 9.597302174653016e-06,
"loss": 0.4452,
"mean_token_accuracy": 0.8508444800972939,
"num_tokens": 193922772.0,
"step": 338
},
{
"epoch": 2.5889101338432123,
"grad_norm": 0.1376764806733569,
"learning_rate": 9.594941581345079e-06,
"loss": 0.4743,
"mean_token_accuracy": 0.8415871560573578,
"num_tokens": 194498772.0,
"step": 339
},
{
"epoch": 2.5965583173996176,
"grad_norm": 0.13134460713857604,
"learning_rate": 9.592574381450957e-06,
"loss": 0.4536,
"mean_token_accuracy": 0.8493340015411377,
"num_tokens": 195074772.0,
"step": 340
},
{
"epoch": 2.604206500956023,
"grad_norm": 0.12998973657236298,
"learning_rate": 9.590200578374198e-06,
"loss": 0.4735,
"mean_token_accuracy": 0.8423146158456802,
"num_tokens": 195650772.0,
"step": 341
},
{
"epoch": 2.611854684512428,
"grad_norm": 0.13974086743961503,
"learning_rate": 9.587820175527844e-06,
"loss": 0.4633,
"mean_token_accuracy": 0.8453876450657845,
"num_tokens": 196226772.0,
"step": 342
},
{
"epoch": 2.6195028680688335,
"grad_norm": 0.13737097736695414,
"learning_rate": 9.585433176334426e-06,
"loss": 0.4881,
"mean_token_accuracy": 0.8374150991439819,
"num_tokens": 196802772.0,
"step": 343
},
{
"epoch": 2.6271510516252388,
"grad_norm": 0.1328384415039295,
"learning_rate": 9.583039584225966e-06,
"loss": 0.452,
"mean_token_accuracy": 0.8489537835121155,
"num_tokens": 197378772.0,
"step": 344
},
{
"epoch": 2.6347992351816445,
"grad_norm": 0.13333625376712616,
"learning_rate": 9.580639402643957e-06,
"loss": 0.4749,
"mean_token_accuracy": 0.8421444669365883,
"num_tokens": 197954772.0,
"step": 345
},
{
"epoch": 2.64244741873805,
"grad_norm": 0.13099239341333282,
"learning_rate": 9.578232635039368e-06,
"loss": 0.468,
"mean_token_accuracy": 0.8446983769536018,
"num_tokens": 198530772.0,
"step": 346
},
{
"epoch": 2.650095602294455,
"grad_norm": 0.1319772619241733,
"learning_rate": 9.575819284872638e-06,
"loss": 0.4753,
"mean_token_accuracy": 0.8412885293364525,
"num_tokens": 199106772.0,
"step": 347
},
{
"epoch": 2.6577437858508604,
"grad_norm": 0.1315801229711581,
"learning_rate": 9.573399355613675e-06,
"loss": 0.4538,
"mean_token_accuracy": 0.8480388075113297,
"num_tokens": 199682772.0,
"step": 348
},
{
"epoch": 2.6653919694072656,
"grad_norm": 0.14029244455419185,
"learning_rate": 9.570972850741839e-06,
"loss": 0.4642,
"mean_token_accuracy": 0.8463078290224075,
"num_tokens": 200258772.0,
"step": 349
},
{
"epoch": 2.6730401529636714,
"grad_norm": 0.1382382609127072,
"learning_rate": 9.568539773745947e-06,
"loss": 0.4792,
"mean_token_accuracy": 0.8414309024810791,
"num_tokens": 200834772.0,
"step": 350
},
{
"epoch": 2.6806883365200767,
"grad_norm": 0.13641202302875485,
"learning_rate": 9.56610012812427e-06,
"loss": 0.4948,
"mean_token_accuracy": 0.8358525335788727,
"num_tokens": 201410772.0,
"step": 351
},
{
"epoch": 2.688336520076482,
"grad_norm": 0.13648644317956463,
"learning_rate": 9.563653917384515e-06,
"loss": 0.4791,
"mean_token_accuracy": 0.8403978571295738,
"num_tokens": 201986772.0,
"step": 352
},
{
"epoch": 2.6959847036328872,
"grad_norm": 0.1265267382875485,
"learning_rate": 9.561201145043835e-06,
"loss": 0.4565,
"mean_token_accuracy": 0.8470665439963341,
"num_tokens": 202562772.0,
"step": 353
},
{
"epoch": 2.7036328871892925,
"grad_norm": 0.14834388827490924,
"learning_rate": 9.558741814628815e-06,
"loss": 0.4927,
"mean_token_accuracy": 0.8357188552618027,
"num_tokens": 203138772.0,
"step": 354
},
{
"epoch": 2.711281070745698,
"grad_norm": 0.1339156205744843,
"learning_rate": 9.55627592967547e-06,
"loss": 0.4552,
"mean_token_accuracy": 0.8483408987522125,
"num_tokens": 203714772.0,
"step": 355
},
{
"epoch": 2.718929254302103,
"grad_norm": 0.13745433509215765,
"learning_rate": 9.553803493729237e-06,
"loss": 0.4797,
"mean_token_accuracy": 0.8393444642424583,
"num_tokens": 204277037.0,
"step": 356
},
{
"epoch": 2.7265774378585084,
"grad_norm": 0.12921999079532936,
"learning_rate": 9.551324510344972e-06,
"loss": 0.4649,
"mean_token_accuracy": 0.8451011776924133,
"num_tokens": 204853037.0,
"step": 357
},
{
"epoch": 2.734225621414914,
"grad_norm": 0.13846536123122738,
"learning_rate": 9.548838983086948e-06,
"loss": 0.4771,
"mean_token_accuracy": 0.8409933820366859,
"num_tokens": 205429037.0,
"step": 358
},
{
"epoch": 2.7418738049713194,
"grad_norm": 0.13803335699731223,
"learning_rate": 9.546346915528844e-06,
"loss": 0.4704,
"mean_token_accuracy": 0.8430611714720726,
"num_tokens": 206005037.0,
"step": 359
},
{
"epoch": 2.7495219885277247,
"grad_norm": 0.1425603356301425,
"learning_rate": 9.543848311253744e-06,
"loss": 0.4506,
"mean_token_accuracy": 0.8472823351621628,
"num_tokens": 206551931.0,
"step": 360
},
{
"epoch": 2.75717017208413,
"grad_norm": 0.12337957343310935,
"learning_rate": 9.541343173854128e-06,
"loss": 0.4024,
"mean_token_accuracy": 0.8640047535300255,
"num_tokens": 207127931.0,
"step": 361
},
{
"epoch": 2.7648183556405352,
"grad_norm": 0.1440988584019457,
"learning_rate": 9.538831506931874e-06,
"loss": 0.483,
"mean_token_accuracy": 0.8385800793766975,
"num_tokens": 207703931.0,
"step": 362
},
{
"epoch": 2.772466539196941,
"grad_norm": 0.1336436468088995,
"learning_rate": 9.536313314098243e-06,
"loss": 0.4869,
"mean_token_accuracy": 0.8362970650196075,
"num_tokens": 208277831.0,
"step": 363
},
{
"epoch": 2.7801147227533463,
"grad_norm": 0.13491196154454094,
"learning_rate": 9.533788598973883e-06,
"loss": 0.465,
"mean_token_accuracy": 0.8448338061571121,
"num_tokens": 208853831.0,
"step": 364
},
{
"epoch": 2.7877629063097515,
"grad_norm": 0.13638411467630132,
"learning_rate": 9.531257365188818e-06,
"loss": 0.4909,
"mean_token_accuracy": 0.8366459757089615,
"num_tokens": 209429831.0,
"step": 365
},
{
"epoch": 2.795411089866157,
"grad_norm": 0.1368094560097927,
"learning_rate": 9.528719616382443e-06,
"loss": 0.4842,
"mean_token_accuracy": 0.8384652063250542,
"num_tokens": 209990026.0,
"step": 366
},
{
"epoch": 2.803059273422562,
"grad_norm": 0.13780641532143703,
"learning_rate": 9.526175356203521e-06,
"loss": 0.4659,
"mean_token_accuracy": 0.8452418148517609,
"num_tokens": 210566026.0,
"step": 367
},
{
"epoch": 2.8107074569789674,
"grad_norm": 0.14787034743484256,
"learning_rate": 9.523624588310181e-06,
"loss": 0.4681,
"mean_token_accuracy": 0.8442660793662071,
"num_tokens": 211142026.0,
"step": 368
},
{
"epoch": 2.8183556405353727,
"grad_norm": 0.13862557544263718,
"learning_rate": 9.521067316369903e-06,
"loss": 0.4571,
"mean_token_accuracy": 0.8479363769292831,
"num_tokens": 211718026.0,
"step": 369
},
{
"epoch": 2.826003824091778,
"grad_norm": 0.13772334123321825,
"learning_rate": 9.518503544059523e-06,
"loss": 0.4799,
"mean_token_accuracy": 0.8397033959627151,
"num_tokens": 212294026.0,
"step": 370
},
{
"epoch": 2.8336520076481837,
"grad_norm": 0.13948890447531806,
"learning_rate": 9.515933275065218e-06,
"loss": 0.4896,
"mean_token_accuracy": 0.837375171482563,
"num_tokens": 212870026.0,
"step": 371
},
{
"epoch": 2.841300191204589,
"grad_norm": 0.13858419410017597,
"learning_rate": 9.513356513082512e-06,
"loss": 0.4671,
"mean_token_accuracy": 0.8438424468040466,
"num_tokens": 213446026.0,
"step": 372
},
{
"epoch": 2.8489483747609943,
"grad_norm": 0.13242895194664173,
"learning_rate": 9.510773261816261e-06,
"loss": 0.4517,
"mean_token_accuracy": 0.8478269800543785,
"num_tokens": 214022026.0,
"step": 373
},
{
"epoch": 2.8565965583173996,
"grad_norm": 0.1361208802973601,
"learning_rate": 9.508183524980651e-06,
"loss": 0.4773,
"mean_token_accuracy": 0.8403232023119926,
"num_tokens": 214598026.0,
"step": 374
},
{
"epoch": 2.864244741873805,
"grad_norm": 0.14229532646455947,
"learning_rate": 9.505587306299196e-06,
"loss": 0.4601,
"mean_token_accuracy": 0.8460283130407333,
"num_tokens": 215174026.0,
"step": 375
},
{
"epoch": 2.8718929254302106,
"grad_norm": 0.14481534193858964,
"learning_rate": 9.502984609504724e-06,
"loss": 0.4816,
"mean_token_accuracy": 0.8387467563152313,
"num_tokens": 215750026.0,
"step": 376
},
{
"epoch": 2.879541108986616,
"grad_norm": 0.12944860389639234,
"learning_rate": 9.500375438339384e-06,
"loss": 0.4539,
"mean_token_accuracy": 0.8480362817645073,
"num_tokens": 216311532.0,
"step": 377
},
{
"epoch": 2.887189292543021,
"grad_norm": 0.14010965456936167,
"learning_rate": 9.497759796554629e-06,
"loss": 0.4715,
"mean_token_accuracy": 0.8425246849656105,
"num_tokens": 216887532.0,
"step": 378
},
{
"epoch": 2.8948374760994264,
"grad_norm": 0.14339976848438407,
"learning_rate": 9.495137687911218e-06,
"loss": 0.4645,
"mean_token_accuracy": 0.8451827839016914,
"num_tokens": 217463532.0,
"step": 379
},
{
"epoch": 2.9024856596558317,
"grad_norm": 0.1425721008440435,
"learning_rate": 9.492509116179206e-06,
"loss": 0.4758,
"mean_token_accuracy": 0.8416374921798706,
"num_tokens": 218039532.0,
"step": 380
},
{
"epoch": 2.910133843212237,
"grad_norm": 0.12904281233195608,
"learning_rate": 9.48987408513794e-06,
"loss": 0.4513,
"mean_token_accuracy": 0.848038800060749,
"num_tokens": 218615532.0,
"step": 381
},
{
"epoch": 2.9177820267686423,
"grad_norm": 0.13124547175151383,
"learning_rate": 9.487232598576056e-06,
"loss": 0.4435,
"mean_token_accuracy": 0.8508514240384102,
"num_tokens": 219191532.0,
"step": 382
},
{
"epoch": 2.9254302103250476,
"grad_norm": 0.14160726517557595,
"learning_rate": 9.484584660291476e-06,
"loss": 0.4815,
"mean_token_accuracy": 0.8395801186561584,
"num_tokens": 219767532.0,
"step": 383
},
{
"epoch": 2.9330783938814533,
"grad_norm": 0.13800419935826905,
"learning_rate": 9.481930274091388e-06,
"loss": 0.4562,
"mean_token_accuracy": 0.8468825072050095,
"num_tokens": 220343532.0,
"step": 384
},
{
"epoch": 2.9407265774378586,
"grad_norm": 0.13561246479535266,
"learning_rate": 9.47926944379226e-06,
"loss": 0.4505,
"mean_token_accuracy": 0.8491343483328819,
"num_tokens": 220919532.0,
"step": 385
},
{
"epoch": 2.948374760994264,
"grad_norm": 0.13430277105466684,
"learning_rate": 9.476602173219822e-06,
"loss": 0.4531,
"mean_token_accuracy": 0.8477696999907494,
"num_tokens": 221495532.0,
"step": 386
},
{
"epoch": 2.956022944550669,
"grad_norm": 0.1471890474737779,
"learning_rate": 9.47392846620906e-06,
"loss": 0.4869,
"mean_token_accuracy": 0.8385036885738373,
"num_tokens": 222071532.0,
"step": 387
},
{
"epoch": 2.9636711281070744,
"grad_norm": 0.15113073329267498,
"learning_rate": 9.47124832660422e-06,
"loss": 0.4813,
"mean_token_accuracy": 0.8399360328912735,
"num_tokens": 222647532.0,
"step": 388
},
{
"epoch": 2.97131931166348,
"grad_norm": 0.13475027561090738,
"learning_rate": 9.468561758258795e-06,
"loss": 0.5047,
"mean_token_accuracy": 0.8322638496756554,
"num_tokens": 223223532.0,
"step": 389
},
{
"epoch": 2.9789674952198855,
"grad_norm": 0.13848998512836547,
"learning_rate": 9.465868765035519e-06,
"loss": 0.4982,
"mean_token_accuracy": 0.8339010700583458,
"num_tokens": 223799532.0,
"step": 390
},
{
"epoch": 2.9866156787762907,
"grad_norm": 0.1319671191248173,
"learning_rate": 9.463169350806369e-06,
"loss": 0.4761,
"mean_token_accuracy": 0.8407210931181908,
"num_tokens": 224361639.0,
"step": 391
},
{
"epoch": 2.994263862332696,
"grad_norm": 0.14429086224338838,
"learning_rate": 9.460463519452547e-06,
"loss": 0.4894,
"mean_token_accuracy": 0.8367779329419136,
"num_tokens": 224937639.0,
"step": 392
},
{
"epoch": 3.0,
"grad_norm": 0.14450290671664134,
"learning_rate": 9.457751274864486e-06,
"loss": 0.4328,
"mean_token_accuracy": 0.8539082705974579,
"num_tokens": 225369639.0,
"step": 393
},
{
"epoch": 3.0076481835564053,
"grad_norm": 0.14273013240376312,
"learning_rate": 9.45503262094184e-06,
"loss": 0.4488,
"mean_token_accuracy": 0.8485075756907463,
"num_tokens": 225945639.0,
"step": 394
},
{
"epoch": 3.0152963671128106,
"grad_norm": 0.1318792500917861,
"learning_rate": 9.452307561593476e-06,
"loss": 0.4355,
"mean_token_accuracy": 0.8525528833270073,
"num_tokens": 226521639.0,
"step": 395
},
{
"epoch": 3.022944550669216,
"grad_norm": 0.13529933934932678,
"learning_rate": 9.449576100737474e-06,
"loss": 0.4405,
"mean_token_accuracy": 0.8521223217248917,
"num_tokens": 227097639.0,
"step": 396
},
{
"epoch": 3.0305927342256216,
"grad_norm": 0.1239800938473107,
"learning_rate": 9.446838242301113e-06,
"loss": 0.4272,
"mean_token_accuracy": 0.8561867102980614,
"num_tokens": 227673639.0,
"step": 397
},
{
"epoch": 3.038240917782027,
"grad_norm": 0.1346937584568678,
"learning_rate": 9.444093990220876e-06,
"loss": 0.4578,
"mean_token_accuracy": 0.8471168950200081,
"num_tokens": 228249639.0,
"step": 398
},
{
"epoch": 3.045889101338432,
"grad_norm": 0.1342389352088606,
"learning_rate": 9.441343348442436e-06,
"loss": 0.44,
"mean_token_accuracy": 0.8513809442520142,
"num_tokens": 228825639.0,
"step": 399
},
{
"epoch": 3.0535372848948374,
"grad_norm": 0.12739390345432006,
"learning_rate": 9.438586320920651e-06,
"loss": 0.421,
"mean_token_accuracy": 0.8576086536049843,
"num_tokens": 229401639.0,
"step": 400
},
{
"epoch": 3.0611854684512427,
"grad_norm": 0.13440046577139128,
"learning_rate": 9.435822911619564e-06,
"loss": 0.4785,
"mean_token_accuracy": 0.8404169529676437,
"num_tokens": 229977639.0,
"step": 401
},
{
"epoch": 3.068833652007648,
"grad_norm": 0.14325897840167365,
"learning_rate": 9.433053124512394e-06,
"loss": 0.4928,
"mean_token_accuracy": 0.8352709040045738,
"num_tokens": 230553639.0,
"step": 402
},
{
"epoch": 3.0764818355640537,
"grad_norm": 0.1420958733294688,
"learning_rate": 9.430276963581526e-06,
"loss": 0.4624,
"mean_token_accuracy": 0.8442712873220444,
"num_tokens": 231129639.0,
"step": 403
},
{
"epoch": 3.084130019120459,
"grad_norm": 0.13205149824341564,
"learning_rate": 9.427494432818514e-06,
"loss": 0.4558,
"mean_token_accuracy": 0.8468581959605217,
"num_tokens": 231705639.0,
"step": 404
},
{
"epoch": 3.0917782026768643,
"grad_norm": 0.1316126104720129,
"learning_rate": 9.424705536224065e-06,
"loss": 0.4634,
"mean_token_accuracy": 0.8433320075273514,
"num_tokens": 232281639.0,
"step": 405
},
{
"epoch": 3.0994263862332696,
"grad_norm": 0.1407535847573216,
"learning_rate": 9.421910277808044e-06,
"loss": 0.4896,
"mean_token_accuracy": 0.8361390084028244,
"num_tokens": 232857639.0,
"step": 406
},
{
"epoch": 3.107074569789675,
"grad_norm": 0.13472622990137864,
"learning_rate": 9.419108661589462e-06,
"loss": 0.4536,
"mean_token_accuracy": 0.8465508967638016,
"num_tokens": 233433639.0,
"step": 407
},
{
"epoch": 3.11472275334608,
"grad_norm": 0.1451607665647199,
"learning_rate": 9.416300691596469e-06,
"loss": 0.4722,
"mean_token_accuracy": 0.8422694802284241,
"num_tokens": 234009639.0,
"step": 408
},
{
"epoch": 3.1223709369024855,
"grad_norm": 0.1467639357286111,
"learning_rate": 9.41348637186635e-06,
"loss": 0.4591,
"mean_token_accuracy": 0.8459449708461761,
"num_tokens": 234585639.0,
"step": 409
},
{
"epoch": 3.130019120458891,
"grad_norm": 0.14178633805551513,
"learning_rate": 9.410665706445521e-06,
"loss": 0.4388,
"mean_token_accuracy": 0.8521133288741112,
"num_tokens": 235138310.0,
"step": 410
},
{
"epoch": 3.1376673040152965,
"grad_norm": 0.13734459053843015,
"learning_rate": 9.407838699389525e-06,
"loss": 0.4542,
"mean_token_accuracy": 0.846922442317009,
"num_tokens": 235714310.0,
"step": 411
},
{
"epoch": 3.1453154875717018,
"grad_norm": 0.1332800709874699,
"learning_rate": 9.405005354763017e-06,
"loss": 0.4542,
"mean_token_accuracy": 0.8467904925346375,
"num_tokens": 236290310.0,
"step": 412
},
{
"epoch": 3.152963671128107,
"grad_norm": 0.13291737890683744,
"learning_rate": 9.40216567663977e-06,
"loss": 0.4474,
"mean_token_accuracy": 0.8494607359170914,
"num_tokens": 236866310.0,
"step": 413
},
{
"epoch": 3.1606118546845123,
"grad_norm": 0.13823265816280292,
"learning_rate": 9.399319669102655e-06,
"loss": 0.4419,
"mean_token_accuracy": 0.8513462394475937,
"num_tokens": 237442310.0,
"step": 414
},
{
"epoch": 3.1682600382409176,
"grad_norm": 0.13709902619662914,
"learning_rate": 9.396467336243656e-06,
"loss": 0.4385,
"mean_token_accuracy": 0.8522455841302872,
"num_tokens": 238018310.0,
"step": 415
},
{
"epoch": 3.1759082217973233,
"grad_norm": 0.13499552935521622,
"learning_rate": 9.39360868216384e-06,
"loss": 0.4576,
"mean_token_accuracy": 0.8459102585911751,
"num_tokens": 238594310.0,
"step": 416
},
{
"epoch": 3.1835564053537286,
"grad_norm": 0.1433007756568991,
"learning_rate": 9.390743710973366e-06,
"loss": 0.4664,
"mean_token_accuracy": 0.8439188376069069,
"num_tokens": 239170310.0,
"step": 417
},
{
"epoch": 3.191204588910134,
"grad_norm": 0.129781541311147,
"learning_rate": 9.38787242679148e-06,
"loss": 0.4508,
"mean_token_accuracy": 0.8471464067697525,
"num_tokens": 239746310.0,
"step": 418
},
{
"epoch": 3.198852772466539,
"grad_norm": 0.13681530680608084,
"learning_rate": 9.384994833746496e-06,
"loss": 0.4529,
"mean_token_accuracy": 0.8484097048640251,
"num_tokens": 240314805.0,
"step": 419
},
{
"epoch": 3.2065009560229445,
"grad_norm": 0.1352192841534592,
"learning_rate": 9.382110935975811e-06,
"loss": 0.4646,
"mean_token_accuracy": 0.8462948203086853,
"num_tokens": 240868750.0,
"step": 420
},
{
"epoch": 3.21414913957935,
"grad_norm": 0.14157928396699657,
"learning_rate": 9.379220737625877e-06,
"loss": 0.4686,
"mean_token_accuracy": 0.8430594280362129,
"num_tokens": 241444750.0,
"step": 421
},
{
"epoch": 3.221797323135755,
"grad_norm": 0.13680279148546654,
"learning_rate": 9.376324242852206e-06,
"loss": 0.4461,
"mean_token_accuracy": 0.8499329835176468,
"num_tokens": 242020750.0,
"step": 422
},
{
"epoch": 3.229445506692161,
"grad_norm": 0.13441364291759264,
"learning_rate": 9.37342145581937e-06,
"loss": 0.4534,
"mean_token_accuracy": 0.8477957472205162,
"num_tokens": 242596750.0,
"step": 423
},
{
"epoch": 3.237093690248566,
"grad_norm": 0.14996464113428698,
"learning_rate": 9.370512380700976e-06,
"loss": 0.4538,
"mean_token_accuracy": 0.8473148196935654,
"num_tokens": 243172750.0,
"step": 424
},
{
"epoch": 3.2447418738049714,
"grad_norm": 0.1291772530250717,
"learning_rate": 9.367597021679686e-06,
"loss": 0.4576,
"mean_token_accuracy": 0.8455560505390167,
"num_tokens": 243748750.0,
"step": 425
},
{
"epoch": 3.2523900573613767,
"grad_norm": 0.14578020775893444,
"learning_rate": 9.364675382947185e-06,
"loss": 0.4318,
"mean_token_accuracy": 0.8554418906569481,
"num_tokens": 244324750.0,
"step": 426
},
{
"epoch": 3.260038240917782,
"grad_norm": 0.13750460715362675,
"learning_rate": 9.361747468704196e-06,
"loss": 0.4418,
"mean_token_accuracy": 0.8520320132374763,
"num_tokens": 244900750.0,
"step": 427
},
{
"epoch": 3.2676864244741872,
"grad_norm": 0.1372424458911705,
"learning_rate": 9.35881328316046e-06,
"loss": 0.4765,
"mean_token_accuracy": 0.8400749266147614,
"num_tokens": 245476750.0,
"step": 428
},
{
"epoch": 3.275334608030593,
"grad_norm": 0.1395311312663304,
"learning_rate": 9.35587283053473e-06,
"loss": 0.5033,
"mean_token_accuracy": 0.8329617977142334,
"num_tokens": 246052750.0,
"step": 429
},
{
"epoch": 3.2829827915869982,
"grad_norm": 0.13615900761434377,
"learning_rate": 9.352926115054783e-06,
"loss": 0.4659,
"mean_token_accuracy": 0.8443350717425346,
"num_tokens": 246622973.0,
"step": 430
},
{
"epoch": 3.2906309751434035,
"grad_norm": 0.13476399243406678,
"learning_rate": 9.349973140957392e-06,
"loss": 0.4588,
"mean_token_accuracy": 0.8465144336223602,
"num_tokens": 247198973.0,
"step": 431
},
{
"epoch": 3.298279158699809,
"grad_norm": 0.138319841165643,
"learning_rate": 9.347013912488324e-06,
"loss": 0.4456,
"mean_token_accuracy": 0.8498670160770416,
"num_tokens": 247774973.0,
"step": 432
},
{
"epoch": 3.305927342256214,
"grad_norm": 0.1295935255491883,
"learning_rate": 9.344048433902351e-06,
"loss": 0.444,
"mean_token_accuracy": 0.8501743152737617,
"num_tokens": 248350973.0,
"step": 433
},
{
"epoch": 3.3135755258126194,
"grad_norm": 0.1352566895751494,
"learning_rate": 9.34107670946322e-06,
"loss": 0.4381,
"mean_token_accuracy": 0.8526205867528915,
"num_tokens": 248926973.0,
"step": 434
},
{
"epoch": 3.3212237093690247,
"grad_norm": 0.13526914802404547,
"learning_rate": 9.338098743443666e-06,
"loss": 0.4552,
"mean_token_accuracy": 0.8469623699784279,
"num_tokens": 249502973.0,
"step": 435
},
{
"epoch": 3.3288718929254304,
"grad_norm": 0.1378327942247355,
"learning_rate": 9.335114540125393e-06,
"loss": 0.4983,
"mean_token_accuracy": 0.8344531655311584,
"num_tokens": 250078973.0,
"step": 436
},
{
"epoch": 3.3365200764818357,
"grad_norm": 0.13678024458347515,
"learning_rate": 9.332124103799075e-06,
"loss": 0.4648,
"mean_token_accuracy": 0.8443667814135551,
"num_tokens": 250654973.0,
"step": 437
},
{
"epoch": 3.344168260038241,
"grad_norm": 0.13664942328914748,
"learning_rate": 9.329127438764351e-06,
"loss": 0.4892,
"mean_token_accuracy": 0.8364584743976593,
"num_tokens": 251230973.0,
"step": 438
},
{
"epoch": 3.3518164435946463,
"grad_norm": 0.13783383905141766,
"learning_rate": 9.32612454932981e-06,
"loss": 0.4633,
"mean_token_accuracy": 0.8456706553697586,
"num_tokens": 251806973.0,
"step": 439
},
{
"epoch": 3.3594646271510515,
"grad_norm": 0.13339399076355143,
"learning_rate": 9.323115439812993e-06,
"loss": 0.4437,
"mean_token_accuracy": 0.8508689105510712,
"num_tokens": 252367168.0,
"step": 440
},
{
"epoch": 3.367112810707457,
"grad_norm": 0.1338168045570386,
"learning_rate": 9.320100114540382e-06,
"loss": 0.4634,
"mean_token_accuracy": 0.8454900979995728,
"num_tokens": 252943168.0,
"step": 441
},
{
"epoch": 3.3747609942638626,
"grad_norm": 0.13234254602598014,
"learning_rate": 9.317078577847402e-06,
"loss": 0.4669,
"mean_token_accuracy": 0.843821607530117,
"num_tokens": 253519168.0,
"step": 442
},
{
"epoch": 3.382409177820268,
"grad_norm": 0.13860690572904696,
"learning_rate": 9.314050834078401e-06,
"loss": 0.453,
"mean_token_accuracy": 0.8475092798471451,
"num_tokens": 254095168.0,
"step": 443
},
{
"epoch": 3.390057361376673,
"grad_norm": 0.14010574280265636,
"learning_rate": 9.311016887586659e-06,
"loss": 0.4529,
"mean_token_accuracy": 0.8471189886331558,
"num_tokens": 254669068.0,
"step": 444
},
{
"epoch": 3.3977055449330784,
"grad_norm": 0.13286366341534026,
"learning_rate": 9.307976742734366e-06,
"loss": 0.4579,
"mean_token_accuracy": 0.8456550240516663,
"num_tokens": 255245068.0,
"step": 445
},
{
"epoch": 3.4053537284894837,
"grad_norm": 0.12287227122879738,
"learning_rate": 9.304930403892633e-06,
"loss": 0.4284,
"mean_token_accuracy": 0.855134591460228,
"num_tokens": 255821068.0,
"step": 446
},
{
"epoch": 3.413001912045889,
"grad_norm": 0.13496131187274832,
"learning_rate": 9.30187787544147e-06,
"loss": 0.4519,
"mean_token_accuracy": 0.8480301275849342,
"num_tokens": 256397068.0,
"step": 447
},
{
"epoch": 3.4206500956022943,
"grad_norm": 0.12304815197408708,
"learning_rate": 9.298819161769788e-06,
"loss": 0.4055,
"mean_token_accuracy": 0.863836444914341,
"num_tokens": 256970705.0,
"step": 448
},
{
"epoch": 3.4282982791587,
"grad_norm": 0.1312084281080521,
"learning_rate": 9.295754267275393e-06,
"loss": 0.4304,
"mean_token_accuracy": 0.8547022864222527,
"num_tokens": 257546705.0,
"step": 449
},
{
"epoch": 3.4359464627151053,
"grad_norm": 0.1424625968795541,
"learning_rate": 9.292683196364975e-06,
"loss": 0.5003,
"mean_token_accuracy": 0.8330485969781876,
"num_tokens": 258122705.0,
"step": 450
},
{
"epoch": 3.4435946462715106,
"grad_norm": 0.13782377842414753,
"learning_rate": 9.289605953454108e-06,
"loss": 0.4567,
"mean_token_accuracy": 0.8465578481554985,
"num_tokens": 258698705.0,
"step": 451
},
{
"epoch": 3.451242829827916,
"grad_norm": 0.13609928849701888,
"learning_rate": 9.286522542967235e-06,
"loss": 0.4584,
"mean_token_accuracy": 0.8467818200588226,
"num_tokens": 259274705.0,
"step": 452
},
{
"epoch": 3.458891013384321,
"grad_norm": 0.1261203105793479,
"learning_rate": 9.283432969337672e-06,
"loss": 0.4616,
"mean_token_accuracy": 0.8468790426850319,
"num_tokens": 259850705.0,
"step": 453
},
{
"epoch": 3.4665391969407264,
"grad_norm": 0.13614975492427833,
"learning_rate": 9.280337237007592e-06,
"loss": 0.464,
"mean_token_accuracy": 0.8447556719183922,
"num_tokens": 260426705.0,
"step": 454
},
{
"epoch": 3.474187380497132,
"grad_norm": 0.1327458325746583,
"learning_rate": 9.277235350428029e-06,
"loss": 0.4454,
"mean_token_accuracy": 0.8494572639465332,
"num_tokens": 261002705.0,
"step": 455
},
{
"epoch": 3.4818355640535374,
"grad_norm": 0.13085159750760433,
"learning_rate": 9.274127314058857e-06,
"loss": 0.4371,
"mean_token_accuracy": 0.852618858218193,
"num_tokens": 261578705.0,
"step": 456
},
{
"epoch": 3.4894837476099427,
"grad_norm": 0.13578463225475795,
"learning_rate": 9.271013132368799e-06,
"loss": 0.4642,
"mean_token_accuracy": 0.8443883061408997,
"num_tokens": 262140970.0,
"step": 457
},
{
"epoch": 3.497131931166348,
"grad_norm": 0.14006598972599096,
"learning_rate": 9.267892809835409e-06,
"loss": 0.4637,
"mean_token_accuracy": 0.8449813947081566,
"num_tokens": 262716970.0,
"step": 458
},
{
"epoch": 3.5047801147227533,
"grad_norm": 0.13185777866523177,
"learning_rate": 9.264766350945076e-06,
"loss": 0.4302,
"mean_token_accuracy": 0.8541102334856987,
"num_tokens": 263292970.0,
"step": 459
},
{
"epoch": 3.5124282982791586,
"grad_norm": 0.13342931833994348,
"learning_rate": 9.261633760193005e-06,
"loss": 0.4686,
"mean_token_accuracy": 0.8435091152787209,
"num_tokens": 263868970.0,
"step": 460
},
{
"epoch": 3.520076481835564,
"grad_norm": 0.13168965271918778,
"learning_rate": 9.258495042083222e-06,
"loss": 0.4434,
"mean_token_accuracy": 0.8501882031559944,
"num_tokens": 264444970.0,
"step": 461
},
{
"epoch": 3.527724665391969,
"grad_norm": 0.13243605685606727,
"learning_rate": 9.255350201128564e-06,
"loss": 0.4465,
"mean_token_accuracy": 0.8496812433004379,
"num_tokens": 265020970.0,
"step": 462
},
{
"epoch": 3.535372848948375,
"grad_norm": 0.1552286657686917,
"learning_rate": 9.252199241850666e-06,
"loss": 0.446,
"mean_token_accuracy": 0.8502941057085991,
"num_tokens": 265596970.0,
"step": 463
},
{
"epoch": 3.54302103250478,
"grad_norm": 0.13185696300414065,
"learning_rate": 9.249042168779962e-06,
"loss": 0.4391,
"mean_token_accuracy": 0.8525671735405922,
"num_tokens": 266155354.0,
"step": 464
},
{
"epoch": 3.5506692160611855,
"grad_norm": 0.12925331557566425,
"learning_rate": 9.245878986455684e-06,
"loss": 0.4556,
"mean_token_accuracy": 0.8468790277838707,
"num_tokens": 266731354.0,
"step": 465
},
{
"epoch": 3.5583173996175907,
"grad_norm": 0.14112072128802675,
"learning_rate": 9.242709699425833e-06,
"loss": 0.4499,
"mean_token_accuracy": 0.8501986265182495,
"num_tokens": 267307354.0,
"step": 466
},
{
"epoch": 3.565965583173996,
"grad_norm": 0.12961482591836798,
"learning_rate": 9.2395343122472e-06,
"loss": 0.4794,
"mean_token_accuracy": 0.8403509855270386,
"num_tokens": 267883354.0,
"step": 467
},
{
"epoch": 3.5736137667304018,
"grad_norm": 0.15553386932308658,
"learning_rate": 9.236352829485342e-06,
"loss": 0.447,
"mean_token_accuracy": 0.8502125144004822,
"num_tokens": 268459354.0,
"step": 468
},
{
"epoch": 3.581261950286807,
"grad_norm": 0.13773631625374297,
"learning_rate": 9.23316525571458e-06,
"loss": 0.472,
"mean_token_accuracy": 0.8414899259805679,
"num_tokens": 269035354.0,
"step": 469
},
{
"epoch": 3.5889101338432123,
"grad_norm": 0.12994566689301978,
"learning_rate": 9.229971595517993e-06,
"loss": 0.4278,
"mean_token_accuracy": 0.8565200641751289,
"num_tokens": 269611354.0,
"step": 470
},
{
"epoch": 3.5965583173996176,
"grad_norm": 0.147867087508267,
"learning_rate": 9.226771853487411e-06,
"loss": 0.4862,
"mean_token_accuracy": 0.8371338397264481,
"num_tokens": 270187354.0,
"step": 471
},
{
"epoch": 3.604206500956023,
"grad_norm": 0.1333395328504752,
"learning_rate": 9.223566034223409e-06,
"loss": 0.4849,
"mean_token_accuracy": 0.838104359805584,
"num_tokens": 270763354.0,
"step": 472
},
{
"epoch": 3.611854684512428,
"grad_norm": 0.14126948407840498,
"learning_rate": 9.2203541423353e-06,
"loss": 0.4591,
"mean_token_accuracy": 0.8453320935368538,
"num_tokens": 271339354.0,
"step": 473
},
{
"epoch": 3.6195028680688335,
"grad_norm": 0.1364081032107475,
"learning_rate": 9.217136182441124e-06,
"loss": 0.4311,
"mean_token_accuracy": 0.8542838543653488,
"num_tokens": 271915354.0,
"step": 474
},
{
"epoch": 3.6271510516252388,
"grad_norm": 0.13312098293506802,
"learning_rate": 9.213912159167655e-06,
"loss": 0.4652,
"mean_token_accuracy": 0.8445072919130325,
"num_tokens": 272488686.0,
"step": 475
},
{
"epoch": 3.6347992351816445,
"grad_norm": 0.17152486390579674,
"learning_rate": 9.210682077150375e-06,
"loss": 0.4693,
"mean_token_accuracy": 0.8427104577422142,
"num_tokens": 273064686.0,
"step": 476
},
{
"epoch": 3.64244741873805,
"grad_norm": 0.1463912816972637,
"learning_rate": 9.207445941033483e-06,
"loss": 0.4397,
"mean_token_accuracy": 0.8523378744721413,
"num_tokens": 273617956.0,
"step": 477
},
{
"epoch": 3.650095602294455,
"grad_norm": 0.1389469013265535,
"learning_rate": 9.204203755469879e-06,
"loss": 0.4722,
"mean_token_accuracy": 0.8406079337000847,
"num_tokens": 274193956.0,
"step": 478
},
{
"epoch": 3.6577437858508604,
"grad_norm": 0.1304788404105764,
"learning_rate": 9.200955525121165e-06,
"loss": 0.4286,
"mean_token_accuracy": 0.8560905903577805,
"num_tokens": 274756063.0,
"step": 479
},
{
"epoch": 3.6653919694072656,
"grad_norm": 0.13302202550891926,
"learning_rate": 9.197701254657631e-06,
"loss": 0.4328,
"mean_token_accuracy": 0.8527768403291702,
"num_tokens": 275332063.0,
"step": 480
},
{
"epoch": 3.6730401529636714,
"grad_norm": 0.13640185735673466,
"learning_rate": 9.19444094875825e-06,
"loss": 0.4494,
"mean_token_accuracy": 0.848458968102932,
"num_tokens": 275908063.0,
"step": 481
},
{
"epoch": 3.6806883365200767,
"grad_norm": 0.1356639995335594,
"learning_rate": 9.19117461211068e-06,
"loss": 0.4407,
"mean_token_accuracy": 0.8518905341625214,
"num_tokens": 276469569.0,
"step": 482
},
{
"epoch": 3.688336520076482,
"grad_norm": 0.13619130952960193,
"learning_rate": 9.187902249411241e-06,
"loss": 0.4502,
"mean_token_accuracy": 0.8488947451114655,
"num_tokens": 277045569.0,
"step": 483
},
{
"epoch": 3.6959847036328872,
"grad_norm": 0.13708965399379136,
"learning_rate": 9.184623865364924e-06,
"loss": 0.4769,
"mean_token_accuracy": 0.8410697728395462,
"num_tokens": 277621569.0,
"step": 484
},
{
"epoch": 3.7036328871892925,
"grad_norm": 0.13720203429880218,
"learning_rate": 9.18133946468537e-06,
"loss": 0.4651,
"mean_token_accuracy": 0.8440004512667656,
"num_tokens": 278197569.0,
"step": 485
},
{
"epoch": 3.711281070745698,
"grad_norm": 0.1299350454538957,
"learning_rate": 9.178049052094881e-06,
"loss": 0.437,
"mean_token_accuracy": 0.8537300229072571,
"num_tokens": 278773569.0,
"step": 486
},
{
"epoch": 3.718929254302103,
"grad_norm": 0.13110362377063814,
"learning_rate": 9.174752632324394e-06,
"loss": 0.4408,
"mean_token_accuracy": 0.8510458767414093,
"num_tokens": 279349569.0,
"step": 487
},
{
"epoch": 3.7265774378585084,
"grad_norm": 0.13551732054175086,
"learning_rate": 9.171450210113487e-06,
"loss": 0.4622,
"mean_token_accuracy": 0.844747006893158,
"num_tokens": 279925569.0,
"step": 488
},
{
"epoch": 3.734225621414914,
"grad_norm": 0.13730675054002467,
"learning_rate": 9.16814179021037e-06,
"loss": 0.4284,
"mean_token_accuracy": 0.8542994931340218,
"num_tokens": 280501569.0,
"step": 489
},
{
"epoch": 3.7418738049713194,
"grad_norm": 0.1292955427034706,
"learning_rate": 9.16482737737187e-06,
"loss": 0.463,
"mean_token_accuracy": 0.8444327488541603,
"num_tokens": 281077569.0,
"step": 490
},
{
"epoch": 3.7495219885277247,
"grad_norm": 0.13912430248307478,
"learning_rate": 9.161506976363438e-06,
"loss": 0.4417,
"mean_token_accuracy": 0.8520372435450554,
"num_tokens": 281653569.0,
"step": 491
},
{
"epoch": 3.75717017208413,
"grad_norm": 0.1366214099806215,
"learning_rate": 9.158180591959131e-06,
"loss": 0.4589,
"mean_token_accuracy": 0.8459536507725716,
"num_tokens": 282229569.0,
"step": 492
},
{
"epoch": 3.7648183556405352,
"grad_norm": 0.1369364998873903,
"learning_rate": 9.154848228941607e-06,
"loss": 0.4385,
"mean_token_accuracy": 0.8520493879914284,
"num_tokens": 282805569.0,
"step": 493
},
{
"epoch": 3.772466539196941,
"grad_norm": 0.12498764879815455,
"learning_rate": 9.151509892102125e-06,
"loss": 0.4247,
"mean_token_accuracy": 0.8574089854955673,
"num_tokens": 283381569.0,
"step": 494
},
{
"epoch": 3.7801147227533463,
"grad_norm": 0.1322000135860979,
"learning_rate": 9.148165586240531e-06,
"loss": 0.4491,
"mean_token_accuracy": 0.8488652408123016,
"num_tokens": 283957569.0,
"step": 495
},
{
"epoch": 3.7877629063097515,
"grad_norm": 0.13844783624470497,
"learning_rate": 9.144815316165251e-06,
"loss": 0.4533,
"mean_token_accuracy": 0.8480179756879807,
"num_tokens": 284533569.0,
"step": 496
},
{
"epoch": 3.795411089866157,
"grad_norm": 0.14356041185467444,
"learning_rate": 9.14145908669329e-06,
"loss": 0.4415,
"mean_token_accuracy": 0.8507316261529922,
"num_tokens": 285109569.0,
"step": 497
},
{
"epoch": 3.803059273422562,
"grad_norm": 0.13253514930903268,
"learning_rate": 9.138096902650217e-06,
"loss": 0.4577,
"mean_token_accuracy": 0.845842532813549,
"num_tokens": 285685569.0,
"step": 498
},
{
"epoch": 3.8107074569789674,
"grad_norm": 0.1317692376942017,
"learning_rate": 9.134728768870167e-06,
"loss": 0.4439,
"mean_token_accuracy": 0.8503479510545731,
"num_tokens": 286261569.0,
"step": 499
},
{
"epoch": 3.8183556405353727,
"grad_norm": 0.12686756349523995,
"learning_rate": 9.131354690195827e-06,
"loss": 0.4351,
"mean_token_accuracy": 0.8522073924541473,
"num_tokens": 286837569.0,
"step": 500
},
{
"epoch": 3.826003824091778,
"grad_norm": 0.12900551278598996,
"learning_rate": 9.127974671478432e-06,
"loss": 0.4314,
"mean_token_accuracy": 0.8539366275072098,
"num_tokens": 287413569.0,
"step": 501
},
{
"epoch": 3.8336520076481837,
"grad_norm": 0.13292231226530699,
"learning_rate": 9.124588717577759e-06,
"loss": 0.4686,
"mean_token_accuracy": 0.8422486335039139,
"num_tokens": 287989569.0,
"step": 502
},
{
"epoch": 3.841300191204589,
"grad_norm": 0.136683585551859,
"learning_rate": 9.121196833362112e-06,
"loss": 0.4483,
"mean_token_accuracy": 0.8491551652550697,
"num_tokens": 288565569.0,
"step": 503
},
{
"epoch": 3.8489483747609943,
"grad_norm": 0.1444111504728688,
"learning_rate": 9.117799023708334e-06,
"loss": 0.4773,
"mean_token_accuracy": 0.8403370901942253,
"num_tokens": 289141569.0,
"step": 504
},
{
"epoch": 3.8565965583173996,
"grad_norm": 0.12171464057354885,
"learning_rate": 9.114395293501775e-06,
"loss": 0.4109,
"mean_token_accuracy": 0.8606747463345528,
"num_tokens": 289717569.0,
"step": 505
},
{
"epoch": 3.864244741873805,
"grad_norm": 0.13308127354938107,
"learning_rate": 9.110985647636303e-06,
"loss": 0.4475,
"mean_token_accuracy": 0.8503652960062027,
"num_tokens": 290293569.0,
"step": 506
},
{
"epoch": 3.8718929254302106,
"grad_norm": 0.16593234316109462,
"learning_rate": 9.107570091014295e-06,
"loss": 0.479,
"mean_token_accuracy": 0.8400173112750053,
"num_tokens": 290868510.0,
"step": 507
},
{
"epoch": 3.879541108986616,
"grad_norm": 0.13689658405775249,
"learning_rate": 9.10414862854662e-06,
"loss": 0.4492,
"mean_token_accuracy": 0.8498774170875549,
"num_tokens": 291444510.0,
"step": 508
},
{
"epoch": 3.887189292543021,
"grad_norm": 0.12795882142422862,
"learning_rate": 9.100721265152644e-06,
"loss": 0.4376,
"mean_token_accuracy": 0.8533775582909584,
"num_tokens": 292020510.0,
"step": 509
},
{
"epoch": 3.8948374760994264,
"grad_norm": 0.13169792505551053,
"learning_rate": 9.097288005760213e-06,
"loss": 0.4498,
"mean_token_accuracy": 0.8481290861964226,
"num_tokens": 292596510.0,
"step": 510
},
{
"epoch": 3.9024856596558317,
"grad_norm": 0.1297287757258664,
"learning_rate": 9.09384885530565e-06,
"loss": 0.4605,
"mean_token_accuracy": 0.8447956144809723,
"num_tokens": 293172510.0,
"step": 511
},
{
"epoch": 3.910133843212237,
"grad_norm": 0.1376874016028007,
"learning_rate": 9.09040381873375e-06,
"loss": 0.476,
"mean_token_accuracy": 0.8403527215123177,
"num_tokens": 293748510.0,
"step": 512
},
{
"epoch": 3.9177820267686423,
"grad_norm": 0.1297958223852381,
"learning_rate": 9.086952900997774e-06,
"loss": 0.4518,
"mean_token_accuracy": 0.8482714593410492,
"num_tokens": 294324510.0,
"step": 513
},
{
"epoch": 3.9254302103250476,
"grad_norm": 0.13708161020019655,
"learning_rate": 9.083496107059433e-06,
"loss": 0.4636,
"mean_token_accuracy": 0.8451602086424828,
"num_tokens": 294900510.0,
"step": 514
},
{
"epoch": 3.9330783938814533,
"grad_norm": 0.12977023510450897,
"learning_rate": 9.08003344188889e-06,
"loss": 0.4627,
"mean_token_accuracy": 0.8444761633872986,
"num_tokens": 295476510.0,
"step": 515
},
{
"epoch": 3.9407265774378586,
"grad_norm": 0.13266699207916796,
"learning_rate": 9.076564910464753e-06,
"loss": 0.4432,
"mean_token_accuracy": 0.8500024378299713,
"num_tokens": 296052510.0,
"step": 516
},
{
"epoch": 3.948374760994264,
"grad_norm": 0.13852665867070613,
"learning_rate": 9.073090517774057e-06,
"loss": 0.4505,
"mean_token_accuracy": 0.8480318710207939,
"num_tokens": 296628510.0,
"step": 517
},
{
"epoch": 3.956022944550669,
"grad_norm": 0.13867926058656033,
"learning_rate": 9.06961026881227e-06,
"loss": 0.4736,
"mean_token_accuracy": 0.8420107811689377,
"num_tokens": 297204510.0,
"step": 518
},
{
"epoch": 3.9636711281070744,
"grad_norm": 0.1472014718774958,
"learning_rate": 9.066124168583277e-06,
"loss": 0.4339,
"mean_token_accuracy": 0.8534956350922585,
"num_tokens": 297780510.0,
"step": 519
},
{
"epoch": 3.97131931166348,
"grad_norm": 0.14861258236560726,
"learning_rate": 9.062632222099375e-06,
"loss": 0.4604,
"mean_token_accuracy": 0.8463720604777336,
"num_tokens": 298356510.0,
"step": 520
},
{
"epoch": 3.9789674952198855,
"grad_norm": 0.13635801589712673,
"learning_rate": 9.059134434381274e-06,
"loss": 0.4673,
"mean_token_accuracy": 0.8437070325016975,
"num_tokens": 298932510.0,
"step": 521
},
{
"epoch": 3.9866156787762907,
"grad_norm": 0.13517971159266337,
"learning_rate": 9.055630810458072e-06,
"loss": 0.4703,
"mean_token_accuracy": 0.8419812694191933,
"num_tokens": 299508510.0,
"step": 522
},
{
"epoch": 3.994263862332696,
"grad_norm": 0.13502762281684197,
"learning_rate": 9.052121355367267e-06,
"loss": 0.447,
"mean_token_accuracy": 0.8502528890967369,
"num_tokens": 300071157.0,
"step": 523
},
{
"epoch": 4.0,
"grad_norm": 0.16299524977157676,
"learning_rate": 9.048606074154738e-06,
"loss": 0.4471,
"mean_token_accuracy": 0.8504007657368978,
"num_tokens": 300492852.0,
"step": 524
},
{
"epoch": 4.007648183556405,
"grad_norm": 0.1367796055868044,
"learning_rate": 9.045084971874738e-06,
"loss": 0.4092,
"mean_token_accuracy": 0.8609004467725754,
"num_tokens": 301068852.0,
"step": 525
},
{
"epoch": 4.015296367112811,
"grad_norm": 0.13716134351284365,
"learning_rate": 9.041558053589894e-06,
"loss": 0.43,
"mean_token_accuracy": 0.853941835463047,
"num_tokens": 301644852.0,
"step": 526
},
{
"epoch": 4.022944550669216,
"grad_norm": 0.13676487285704125,
"learning_rate": 9.038025324371192e-06,
"loss": 0.4638,
"mean_token_accuracy": 0.8445838019251823,
"num_tokens": 302220852.0,
"step": 527
},
{
"epoch": 4.030592734225621,
"grad_norm": 0.13821684713351107,
"learning_rate": 9.034486789297973e-06,
"loss": 0.4609,
"mean_token_accuracy": 0.8445247709751129,
"num_tokens": 302796852.0,
"step": 528
},
{
"epoch": 4.038240917782026,
"grad_norm": 0.14599457165525512,
"learning_rate": 9.030942453457928e-06,
"loss": 0.4614,
"mean_token_accuracy": 0.8445004597306252,
"num_tokens": 303372852.0,
"step": 529
},
{
"epoch": 4.045889101338432,
"grad_norm": 0.16625511185143685,
"learning_rate": 9.027392321947088e-06,
"loss": 0.4793,
"mean_token_accuracy": 0.838548831641674,
"num_tokens": 303948852.0,
"step": 530
},
{
"epoch": 4.053537284894838,
"grad_norm": 0.138119624588249,
"learning_rate": 9.023836399869814e-06,
"loss": 0.4242,
"mean_token_accuracy": 0.8558134436607361,
"num_tokens": 304524852.0,
"step": 531
},
{
"epoch": 4.061185468451243,
"grad_norm": 0.14231125279660453,
"learning_rate": 9.020274692338796e-06,
"loss": 0.4149,
"mean_token_accuracy": 0.858605220913887,
"num_tokens": 305100852.0,
"step": 532
},
{
"epoch": 4.0688336520076485,
"grad_norm": 0.13579393688100058,
"learning_rate": 9.01670720447504e-06,
"loss": 0.4289,
"mean_token_accuracy": 0.8546015843749046,
"num_tokens": 305676852.0,
"step": 533
},
{
"epoch": 4.076481835564054,
"grad_norm": 0.14167790695591406,
"learning_rate": 9.013133941407866e-06,
"loss": 0.4585,
"mean_token_accuracy": 0.8454310745000839,
"num_tokens": 306252852.0,
"step": 534
},
{
"epoch": 4.084130019120459,
"grad_norm": 0.1352511945058088,
"learning_rate": 9.009554908274893e-06,
"loss": 0.444,
"mean_token_accuracy": 0.8504850938916206,
"num_tokens": 306828852.0,
"step": 535
},
{
"epoch": 4.091778202676864,
"grad_norm": 0.1432264112617214,
"learning_rate": 9.00597011022204e-06,
"loss": 0.4587,
"mean_token_accuracy": 0.8464554026722908,
"num_tokens": 307404852.0,
"step": 536
},
{
"epoch": 4.09942638623327,
"grad_norm": 0.13151056491832114,
"learning_rate": 9.00237955240351e-06,
"loss": 0.4168,
"mean_token_accuracy": 0.8592042028903961,
"num_tokens": 307980852.0,
"step": 537
},
{
"epoch": 4.107074569789675,
"grad_norm": 0.133618272264501,
"learning_rate": 8.998783239981796e-06,
"loss": 0.4271,
"mean_token_accuracy": 0.8552821651101112,
"num_tokens": 308556852.0,
"step": 538
},
{
"epoch": 4.11472275334608,
"grad_norm": 0.13915971751181871,
"learning_rate": 8.995181178127659e-06,
"loss": 0.4518,
"mean_token_accuracy": 0.8473391234874725,
"num_tokens": 309132852.0,
"step": 539
},
{
"epoch": 4.1223709369024855,
"grad_norm": 0.13454371391547879,
"learning_rate": 8.991573372020123e-06,
"loss": 0.4324,
"mean_token_accuracy": 0.8538081347942352,
"num_tokens": 309708852.0,
"step": 540
},
{
"epoch": 4.130019120458891,
"grad_norm": 0.13213992505186528,
"learning_rate": 8.987959826846479e-06,
"loss": 0.4461,
"mean_token_accuracy": 0.8492263630032539,
"num_tokens": 310284852.0,
"step": 541
},
{
"epoch": 4.137667304015296,
"grad_norm": 0.13593594697201444,
"learning_rate": 8.984340547802264e-06,
"loss": 0.4461,
"mean_token_accuracy": 0.8491100370883942,
"num_tokens": 310860852.0,
"step": 542
},
{
"epoch": 4.145315487571701,
"grad_norm": 0.12981333151349195,
"learning_rate": 8.980715540091263e-06,
"loss": 0.4266,
"mean_token_accuracy": 0.8562596216797829,
"num_tokens": 311436852.0,
"step": 543
},
{
"epoch": 4.1529636711281075,
"grad_norm": 0.13614955901925288,
"learning_rate": 8.977084808925494e-06,
"loss": 0.4121,
"mean_token_accuracy": 0.859827496111393,
"num_tokens": 312012852.0,
"step": 544
},
{
"epoch": 4.160611854684513,
"grad_norm": 0.13682770116117984,
"learning_rate": 8.973448359525207e-06,
"loss": 0.4318,
"mean_token_accuracy": 0.8550026342272758,
"num_tokens": 312588852.0,
"step": 545
},
{
"epoch": 4.168260038240918,
"grad_norm": 0.137849063910186,
"learning_rate": 8.96980619711887e-06,
"loss": 0.4395,
"mean_token_accuracy": 0.8518254309892654,
"num_tokens": 313164852.0,
"step": 546
},
{
"epoch": 4.175908221797323,
"grad_norm": 0.13710001561248336,
"learning_rate": 8.96615832694317e-06,
"loss": 0.4189,
"mean_token_accuracy": 0.8579558879137039,
"num_tokens": 313740852.0,
"step": 547
},
{
"epoch": 4.183556405353729,
"grad_norm": 0.13484817825542728,
"learning_rate": 8.962504754242997e-06,
"loss": 0.4178,
"mean_token_accuracy": 0.8575565740466118,
"num_tokens": 314316852.0,
"step": 548
},
{
"epoch": 4.191204588910134,
"grad_norm": 0.1355879929584855,
"learning_rate": 8.958845484271443e-06,
"loss": 0.4326,
"mean_token_accuracy": 0.8534713238477707,
"num_tokens": 314892852.0,
"step": 549
},
{
"epoch": 4.198852772466539,
"grad_norm": 0.13093335486708962,
"learning_rate": 8.955180522289787e-06,
"loss": 0.3991,
"mean_token_accuracy": 0.8641314953565598,
"num_tokens": 315468852.0,
"step": 550
},
{
"epoch": 4.2065009560229445,
"grad_norm": 0.1356895962011802,
"learning_rate": 8.951509873567498e-06,
"loss": 0.4595,
"mean_token_accuracy": 0.845748782157898,
"num_tokens": 316044852.0,
"step": 551
},
{
"epoch": 4.21414913957935,
"grad_norm": 0.1359263330797516,
"learning_rate": 8.947833543382216e-06,
"loss": 0.4512,
"mean_token_accuracy": 0.8480579107999802,
"num_tokens": 316620852.0,
"step": 552
},
{
"epoch": 4.221797323135755,
"grad_norm": 0.13568308044384295,
"learning_rate": 8.944151537019752e-06,
"loss": 0.4331,
"mean_token_accuracy": 0.8542803898453712,
"num_tokens": 317196852.0,
"step": 553
},
{
"epoch": 4.22944550669216,
"grad_norm": 0.14107612547899365,
"learning_rate": 8.940463859774078e-06,
"loss": 0.4391,
"mean_token_accuracy": 0.8508462160825729,
"num_tokens": 317772852.0,
"step": 554
},
{
"epoch": 4.237093690248566,
"grad_norm": 0.14768495035601778,
"learning_rate": 8.93677051694732e-06,
"loss": 0.4169,
"mean_token_accuracy": 0.8585600778460503,
"num_tokens": 318348852.0,
"step": 555
},
{
"epoch": 4.244741873804971,
"grad_norm": 0.1340278519676253,
"learning_rate": 8.93307151384975e-06,
"loss": 0.4234,
"mean_token_accuracy": 0.8561016395688057,
"num_tokens": 318924852.0,
"step": 556
},
{
"epoch": 4.252390057361376,
"grad_norm": 0.13680193270577393,
"learning_rate": 8.929366855799777e-06,
"loss": 0.4493,
"mean_token_accuracy": 0.8488930016756058,
"num_tokens": 319500852.0,
"step": 557
},
{
"epoch": 4.260038240917782,
"grad_norm": 0.13852513522634227,
"learning_rate": 8.925656548123942e-06,
"loss": 0.4648,
"mean_token_accuracy": 0.8442719057202339,
"num_tokens": 320063117.0,
"step": 558
},
{
"epoch": 4.267686424474188,
"grad_norm": 0.13624189114266874,
"learning_rate": 8.92194059615691e-06,
"loss": 0.437,
"mean_token_accuracy": 0.8514139503240585,
"num_tokens": 320639117.0,
"step": 559
},
{
"epoch": 4.275334608030593,
"grad_norm": 0.13350408206652886,
"learning_rate": 8.918219005241458e-06,
"loss": 0.4589,
"mean_token_accuracy": 0.844880685210228,
"num_tokens": 321215117.0,
"step": 560
},
{
"epoch": 4.282982791586998,
"grad_norm": 0.13649099774497725,
"learning_rate": 8.914491780728471e-06,
"loss": 0.4671,
"mean_token_accuracy": 0.8434830605983734,
"num_tokens": 321791117.0,
"step": 561
},
{
"epoch": 4.2906309751434035,
"grad_norm": 0.14170690277634168,
"learning_rate": 8.91075892797694e-06,
"loss": 0.4458,
"mean_token_accuracy": 0.8494138568639755,
"num_tokens": 322367117.0,
"step": 562
},
{
"epoch": 4.298279158699809,
"grad_norm": 0.1428239008677417,
"learning_rate": 8.90702045235394e-06,
"loss": 0.4537,
"mean_token_accuracy": 0.8470439687371254,
"num_tokens": 322943117.0,
"step": 563
},
{
"epoch": 4.305927342256214,
"grad_norm": 0.13750099119258852,
"learning_rate": 8.903276359234638e-06,
"loss": 0.434,
"mean_token_accuracy": 0.8533461019396782,
"num_tokens": 323518058.0,
"step": 564
},
{
"epoch": 4.313575525812619,
"grad_norm": 0.13798858771811107,
"learning_rate": 8.899526654002268e-06,
"loss": 0.4482,
"mean_token_accuracy": 0.8492783904075623,
"num_tokens": 324071328.0,
"step": 565
},
{
"epoch": 4.321223709369025,
"grad_norm": 0.13569499666557838,
"learning_rate": 8.895771342048145e-06,
"loss": 0.4322,
"mean_token_accuracy": 0.8544071167707443,
"num_tokens": 324647328.0,
"step": 566
},
{
"epoch": 4.32887189292543,
"grad_norm": 0.14183511720361952,
"learning_rate": 8.892010428771638e-06,
"loss": 0.4325,
"mean_token_accuracy": 0.8530407473444939,
"num_tokens": 325223328.0,
"step": 567
},
{
"epoch": 4.336520076481835,
"grad_norm": 0.14107156072568003,
"learning_rate": 8.88824391958017e-06,
"loss": 0.4307,
"mean_token_accuracy": 0.8547335267066956,
"num_tokens": 325799328.0,
"step": 568
},
{
"epoch": 4.3441682600382405,
"grad_norm": 0.13599509659002554,
"learning_rate": 8.88447181988921e-06,
"loss": 0.4258,
"mean_token_accuracy": 0.8559349700808525,
"num_tokens": 326375328.0,
"step": 569
},
{
"epoch": 4.351816443594647,
"grad_norm": 0.14168570065592437,
"learning_rate": 8.88069413512227e-06,
"loss": 0.447,
"mean_token_accuracy": 0.8499486148357391,
"num_tokens": 326951328.0,
"step": 570
},
{
"epoch": 4.359464627151052,
"grad_norm": 0.14287905535251402,
"learning_rate": 8.876910870710885e-06,
"loss": 0.4468,
"mean_token_accuracy": 0.8484693765640259,
"num_tokens": 327527328.0,
"step": 571
},
{
"epoch": 4.367112810707457,
"grad_norm": 0.13966911539057147,
"learning_rate": 8.873122032094614e-06,
"loss": 0.4578,
"mean_token_accuracy": 0.8461237996816635,
"num_tokens": 328103328.0,
"step": 572
},
{
"epoch": 4.374760994263863,
"grad_norm": 0.14261415507458267,
"learning_rate": 8.869327624721033e-06,
"loss": 0.4484,
"mean_token_accuracy": 0.8485492318868637,
"num_tokens": 328679328.0,
"step": 573
},
{
"epoch": 4.382409177820268,
"grad_norm": 0.133369532678928,
"learning_rate": 8.865527654045727e-06,
"loss": 0.4298,
"mean_token_accuracy": 0.8543098941445351,
"num_tokens": 329255328.0,
"step": 574
},
{
"epoch": 4.390057361376673,
"grad_norm": 0.14255996347846578,
"learning_rate": 8.861722125532272e-06,
"loss": 0.4581,
"mean_token_accuracy": 0.84616519510746,
"num_tokens": 329817975.0,
"step": 575
},
{
"epoch": 4.397705544933078,
"grad_norm": 0.14265360616209308,
"learning_rate": 8.857911044652244e-06,
"loss": 0.4457,
"mean_token_accuracy": 0.8503062576055527,
"num_tokens": 330393975.0,
"step": 576
},
{
"epoch": 4.405353728489484,
"grad_norm": 0.14108492866232827,
"learning_rate": 8.854094416885192e-06,
"loss": 0.4797,
"mean_token_accuracy": 0.8391808122396469,
"num_tokens": 330969975.0,
"step": 577
},
{
"epoch": 4.413001912045889,
"grad_norm": 0.1397903736370164,
"learning_rate": 8.850272247718654e-06,
"loss": 0.4497,
"mean_token_accuracy": 0.8479728251695633,
"num_tokens": 331545975.0,
"step": 578
},
{
"epoch": 4.420650095602294,
"grad_norm": 0.1421238151544833,
"learning_rate": 8.84644454264812e-06,
"loss": 0.4181,
"mean_token_accuracy": 0.8584993183612823,
"num_tokens": 332121975.0,
"step": 579
},
{
"epoch": 4.4282982791587,
"grad_norm": 0.13112204100458139,
"learning_rate": 8.842611307177051e-06,
"loss": 0.4182,
"mean_token_accuracy": 0.857766643166542,
"num_tokens": 332697975.0,
"step": 580
},
{
"epoch": 4.435946462715105,
"grad_norm": 0.1421745075127969,
"learning_rate": 8.838772546816857e-06,
"loss": 0.43,
"mean_token_accuracy": 0.8541948571801186,
"num_tokens": 333251920.0,
"step": 581
},
{
"epoch": 4.44359464627151,
"grad_norm": 0.1444747863216718,
"learning_rate": 8.834928267086884e-06,
"loss": 0.4605,
"mean_token_accuracy": 0.8450855612754822,
"num_tokens": 333827920.0,
"step": 582
},
{
"epoch": 4.451242829827915,
"grad_norm": 0.13443943817675832,
"learning_rate": 8.831078473514427e-06,
"loss": 0.4485,
"mean_token_accuracy": 0.8491065725684166,
"num_tokens": 334403920.0,
"step": 583
},
{
"epoch": 4.458891013384322,
"grad_norm": 0.12806454601850675,
"learning_rate": 8.827223171634698e-06,
"loss": 0.4518,
"mean_token_accuracy": 0.8473460748791695,
"num_tokens": 334979920.0,
"step": 584
},
{
"epoch": 4.466539196940727,
"grad_norm": 0.14571665310132872,
"learning_rate": 8.823362366990833e-06,
"loss": 0.4519,
"mean_token_accuracy": 0.847073495388031,
"num_tokens": 335555920.0,
"step": 585
},
{
"epoch": 4.474187380497132,
"grad_norm": 0.12883941943627572,
"learning_rate": 8.819496065133879e-06,
"loss": 0.4226,
"mean_token_accuracy": 0.8574985191226006,
"num_tokens": 336114304.0,
"step": 586
},
{
"epoch": 4.4818355640535374,
"grad_norm": 0.1335387356959434,
"learning_rate": 8.81562427162279e-06,
"loss": 0.4597,
"mean_token_accuracy": 0.8447174802422523,
"num_tokens": 336690304.0,
"step": 587
},
{
"epoch": 4.489483747609943,
"grad_norm": 0.12889357337389204,
"learning_rate": 8.81174699202441e-06,
"loss": 0.4405,
"mean_token_accuracy": 0.8510493487119675,
"num_tokens": 337266304.0,
"step": 588
},
{
"epoch": 4.497131931166348,
"grad_norm": 0.1330406876609962,
"learning_rate": 8.807864231913475e-06,
"loss": 0.4332,
"mean_token_accuracy": 0.8526778817176819,
"num_tokens": 337842304.0,
"step": 589
},
{
"epoch": 4.504780114722753,
"grad_norm": 0.12979232613135025,
"learning_rate": 8.8039759968726e-06,
"loss": 0.4236,
"mean_token_accuracy": 0.856186717748642,
"num_tokens": 338418304.0,
"step": 590
},
{
"epoch": 4.512428298279159,
"grad_norm": 0.13730293405800015,
"learning_rate": 8.800082292492274e-06,
"loss": 0.4486,
"mean_token_accuracy": 0.8482888266444206,
"num_tokens": 338994304.0,
"step": 591
},
{
"epoch": 4.520076481835564,
"grad_norm": 0.13489614197624644,
"learning_rate": 8.796183124370843e-06,
"loss": 0.4306,
"mean_token_accuracy": 0.8548116609454155,
"num_tokens": 339570304.0,
"step": 592
},
{
"epoch": 4.527724665391969,
"grad_norm": 0.1347926609469706,
"learning_rate": 8.792278498114517e-06,
"loss": 0.4544,
"mean_token_accuracy": 0.8463078364729881,
"num_tokens": 340146304.0,
"step": 593
},
{
"epoch": 4.5353728489483744,
"grad_norm": 0.1349897764307115,
"learning_rate": 8.788368419337348e-06,
"loss": 0.4396,
"mean_token_accuracy": 0.8521014824509621,
"num_tokens": 340722304.0,
"step": 594
},
{
"epoch": 4.54302103250478,
"grad_norm": 0.1334117681436598,
"learning_rate": 8.784452893661229e-06,
"loss": 0.4388,
"mean_token_accuracy": 0.8514469414949417,
"num_tokens": 341298304.0,
"step": 595
},
{
"epoch": 4.550669216061186,
"grad_norm": 0.13453993757890548,
"learning_rate": 8.780531926715888e-06,
"loss": 0.4408,
"mean_token_accuracy": 0.8514851331710815,
"num_tokens": 341874304.0,
"step": 596
},
{
"epoch": 4.558317399617591,
"grad_norm": 0.14039109869111963,
"learning_rate": 8.77660552413887e-06,
"loss": 0.4373,
"mean_token_accuracy": 0.8525997698307037,
"num_tokens": 342450304.0,
"step": 597
},
{
"epoch": 4.5659655831739965,
"grad_norm": 0.13204418688173547,
"learning_rate": 8.772673691575541e-06,
"loss": 0.4369,
"mean_token_accuracy": 0.8529435247182846,
"num_tokens": 343026304.0,
"step": 598
},
{
"epoch": 4.573613766730402,
"grad_norm": 0.13217896451409836,
"learning_rate": 8.768736434679073e-06,
"loss": 0.4442,
"mean_token_accuracy": 0.850242018699646,
"num_tokens": 343602304.0,
"step": 599
},
{
"epoch": 4.581261950286807,
"grad_norm": 0.13127212529235552,
"learning_rate": 8.764793759110435e-06,
"loss": 0.4293,
"mean_token_accuracy": 0.8552266061306,
"num_tokens": 344178304.0,
"step": 600
},
{
"epoch": 4.588910133843212,
"grad_norm": 0.1343418903278343,
"learning_rate": 8.760845670538387e-06,
"loss": 0.4211,
"mean_token_accuracy": 0.8569020330905914,
"num_tokens": 344754304.0,
"step": 601
},
{
"epoch": 4.596558317399618,
"grad_norm": 0.1271661715666118,
"learning_rate": 8.756892174639473e-06,
"loss": 0.408,
"mean_token_accuracy": 0.8616817370057106,
"num_tokens": 345330304.0,
"step": 602
},
{
"epoch": 4.604206500956023,
"grad_norm": 0.13563629821014458,
"learning_rate": 8.752933277098012e-06,
"loss": 0.437,
"mean_token_accuracy": 0.8516730964183807,
"num_tokens": 345892411.0,
"step": 603
},
{
"epoch": 4.611854684512428,
"grad_norm": 0.13068071235260656,
"learning_rate": 8.74896898360609e-06,
"loss": 0.4299,
"mean_token_accuracy": 0.8544713631272316,
"num_tokens": 346468411.0,
"step": 604
},
{
"epoch": 4.6195028680688335,
"grad_norm": 0.13846369217848975,
"learning_rate": 8.744999299863549e-06,
"loss": 0.4211,
"mean_token_accuracy": 0.857238844037056,
"num_tokens": 347044411.0,
"step": 605
},
{
"epoch": 4.627151051625239,
"grad_norm": 0.1367695822273923,
"learning_rate": 8.741024231577983e-06,
"loss": 0.4491,
"mean_token_accuracy": 0.8486238941550255,
"num_tokens": 347620411.0,
"step": 606
},
{
"epoch": 4.634799235181644,
"grad_norm": 0.13305300129278447,
"learning_rate": 8.737043784464726e-06,
"loss": 0.3945,
"mean_token_accuracy": 0.8662357404828072,
"num_tokens": 348196411.0,
"step": 607
},
{
"epoch": 4.642447418738049,
"grad_norm": 0.14038270415564713,
"learning_rate": 8.733057964246849e-06,
"loss": 0.4551,
"mean_token_accuracy": 0.8459553942084312,
"num_tokens": 348772411.0,
"step": 608
},
{
"epoch": 4.650095602294455,
"grad_norm": 0.14422126563292137,
"learning_rate": 8.729066776655144e-06,
"loss": 0.4335,
"mean_token_accuracy": 0.8531865999102592,
"num_tokens": 349348411.0,
"step": 609
},
{
"epoch": 4.657743785850861,
"grad_norm": 0.1413112708302926,
"learning_rate": 8.725070227428123e-06,
"loss": 0.452,
"mean_token_accuracy": 0.8475092723965645,
"num_tokens": 349924411.0,
"step": 610
},
{
"epoch": 4.665391969407266,
"grad_norm": 0.13342911934210575,
"learning_rate": 8.721068322312007e-06,
"loss": 0.4195,
"mean_token_accuracy": 0.8577718585729599,
"num_tokens": 350500411.0,
"step": 611
},
{
"epoch": 4.673040152963671,
"grad_norm": 0.1430879510207355,
"learning_rate": 8.717061067060716e-06,
"loss": 0.4465,
"mean_token_accuracy": 0.8497645780444145,
"num_tokens": 351076411.0,
"step": 612
},
{
"epoch": 4.680688336520077,
"grad_norm": 0.14166248034965864,
"learning_rate": 8.713048467435865e-06,
"loss": 0.4604,
"mean_token_accuracy": 0.8444344848394394,
"num_tokens": 351652411.0,
"step": 613
},
{
"epoch": 4.688336520076482,
"grad_norm": 0.1430056087650855,
"learning_rate": 8.70903052920675e-06,
"loss": 0.4295,
"mean_token_accuracy": 0.8543862923979759,
"num_tokens": 352228411.0,
"step": 614
},
{
"epoch": 4.695984703632887,
"grad_norm": 0.13993804535154394,
"learning_rate": 8.705007258150346e-06,
"loss": 0.4736,
"mean_token_accuracy": 0.8403875529766083,
"num_tokens": 352796906.0,
"step": 615
},
{
"epoch": 4.7036328871892925,
"grad_norm": 0.13853295092272744,
"learning_rate": 8.700978660051293e-06,
"loss": 0.4343,
"mean_token_accuracy": 0.8529174774885178,
"num_tokens": 353372906.0,
"step": 616
},
{
"epoch": 4.711281070745698,
"grad_norm": 0.13648098405467515,
"learning_rate": 8.696944740701891e-06,
"loss": 0.4624,
"mean_token_accuracy": 0.8432382643222809,
"num_tokens": 353948906.0,
"step": 617
},
{
"epoch": 4.718929254302103,
"grad_norm": 0.13063447639184614,
"learning_rate": 8.692905505902091e-06,
"loss": 0.4432,
"mean_token_accuracy": 0.8508288562297821,
"num_tokens": 354524906.0,
"step": 618
},
{
"epoch": 4.726577437858508,
"grad_norm": 0.13137608605071113,
"learning_rate": 8.688860961459487e-06,
"loss": 0.412,
"mean_token_accuracy": 0.8600583970546722,
"num_tokens": 355100906.0,
"step": 619
},
{
"epoch": 4.734225621414914,
"grad_norm": 0.15267208050688227,
"learning_rate": 8.684811113189306e-06,
"loss": 0.4541,
"mean_token_accuracy": 0.8480162993073463,
"num_tokens": 355674543.0,
"step": 620
},
{
"epoch": 4.741873804971319,
"grad_norm": 0.13165612975561047,
"learning_rate": 8.6807559669144e-06,
"loss": 0.4107,
"mean_token_accuracy": 0.8604946732521057,
"num_tokens": 356247875.0,
"step": 621
},
{
"epoch": 4.749521988527725,
"grad_norm": 0.12772216877759487,
"learning_rate": 8.676695528465244e-06,
"loss": 0.4011,
"mean_token_accuracy": 0.8628982827067375,
"num_tokens": 356809381.0,
"step": 622
},
{
"epoch": 4.75717017208413,
"grad_norm": 0.13488174230220404,
"learning_rate": 8.672629803679914e-06,
"loss": 0.4498,
"mean_token_accuracy": 0.8484294563531876,
"num_tokens": 357385381.0,
"step": 623
},
{
"epoch": 4.764818355640536,
"grad_norm": 0.1401189577724157,
"learning_rate": 8.668558798404093e-06,
"loss": 0.429,
"mean_token_accuracy": 0.8550686091184616,
"num_tokens": 357961381.0,
"step": 624
},
{
"epoch": 4.772466539196941,
"grad_norm": 0.1364797558773597,
"learning_rate": 8.664482518491053e-06,
"loss": 0.4539,
"mean_token_accuracy": 0.8475179374217987,
"num_tokens": 358537381.0,
"step": 625
},
{
"epoch": 4.780114722753346,
"grad_norm": 0.13289176813716883,
"learning_rate": 8.660400969801653e-06,
"loss": 0.4213,
"mean_token_accuracy": 0.8573412746191025,
"num_tokens": 359113381.0,
"step": 626
},
{
"epoch": 4.7877629063097515,
"grad_norm": 0.13614408467258662,
"learning_rate": 8.65631415820432e-06,
"loss": 0.4437,
"mean_token_accuracy": 0.8494902551174164,
"num_tokens": 359689381.0,
"step": 627
},
{
"epoch": 4.795411089866157,
"grad_norm": 0.1361828076841561,
"learning_rate": 8.652222089575059e-06,
"loss": 0.4528,
"mean_token_accuracy": 0.8468199968338013,
"num_tokens": 360265381.0,
"step": 628
},
{
"epoch": 4.803059273422562,
"grad_norm": 0.13328710422461576,
"learning_rate": 8.648124769797424e-06,
"loss": 0.4365,
"mean_token_accuracy": 0.8522612005472183,
"num_tokens": 360841381.0,
"step": 629
},
{
"epoch": 4.810707456978967,
"grad_norm": 0.14533854629935952,
"learning_rate": 8.644022204762525e-06,
"loss": 0.462,
"mean_token_accuracy": 0.8440126106142998,
"num_tokens": 361401576.0,
"step": 630
},
{
"epoch": 4.818355640535373,
"grad_norm": 0.1308097892624671,
"learning_rate": 8.63991440036901e-06,
"loss": 0.4136,
"mean_token_accuracy": 0.8578916490077972,
"num_tokens": 361977576.0,
"step": 631
},
{
"epoch": 4.826003824091778,
"grad_norm": 0.13348566755409633,
"learning_rate": 8.63580136252306e-06,
"loss": 0.4469,
"mean_token_accuracy": 0.8494173437356949,
"num_tokens": 362553576.0,
"step": 632
},
{
"epoch": 4.833652007648183,
"grad_norm": 0.13082548673790548,
"learning_rate": 8.631683097138386e-06,
"loss": 0.4306,
"mean_token_accuracy": 0.8541657999157906,
"num_tokens": 363129576.0,
"step": 633
},
{
"epoch": 4.8413001912045885,
"grad_norm": 0.13534537707509742,
"learning_rate": 8.627559610136209e-06,
"loss": 0.4378,
"mean_token_accuracy": 0.8518879190087318,
"num_tokens": 363705576.0,
"step": 634
},
{
"epoch": 4.848948374760994,
"grad_norm": 0.1322877886744737,
"learning_rate": 8.623430907445263e-06,
"loss": 0.4501,
"mean_token_accuracy": 0.848146453499794,
"num_tokens": 364281576.0,
"step": 635
},
{
"epoch": 4.8565965583174,
"grad_norm": 0.1305531222811248,
"learning_rate": 8.619296995001773e-06,
"loss": 0.4583,
"mean_token_accuracy": 0.8479276895523071,
"num_tokens": 364857576.0,
"step": 636
},
{
"epoch": 4.864244741873805,
"grad_norm": 0.13613414953238218,
"learning_rate": 8.615157878749462e-06,
"loss": 0.4291,
"mean_token_accuracy": 0.8538307175040245,
"num_tokens": 365433576.0,
"step": 637
},
{
"epoch": 4.871892925430211,
"grad_norm": 0.13535260994204976,
"learning_rate": 8.611013564639532e-06,
"loss": 0.452,
"mean_token_accuracy": 0.8471116721630096,
"num_tokens": 366009576.0,
"step": 638
},
{
"epoch": 4.879541108986616,
"grad_norm": 0.13913988946297418,
"learning_rate": 8.60686405863066e-06,
"loss": 0.481,
"mean_token_accuracy": 0.8386234864592552,
"num_tokens": 366585576.0,
"step": 639
},
{
"epoch": 4.887189292543021,
"grad_norm": 0.13926823889358061,
"learning_rate": 8.602709366688989e-06,
"loss": 0.4611,
"mean_token_accuracy": 0.8453963324427605,
"num_tokens": 367161576.0,
"step": 640
},
{
"epoch": 4.894837476099426,
"grad_norm": 0.13451549889430478,
"learning_rate": 8.598549494788111e-06,
"loss": 0.4458,
"mean_token_accuracy": 0.8480683267116547,
"num_tokens": 367737576.0,
"step": 641
},
{
"epoch": 4.902485659655832,
"grad_norm": 0.13290646145187515,
"learning_rate": 8.594384448909074e-06,
"loss": 0.4381,
"mean_token_accuracy": 0.8519139811396599,
"num_tokens": 368313576.0,
"step": 642
},
{
"epoch": 4.910133843212237,
"grad_norm": 0.14817863417364635,
"learning_rate": 8.590214235040363e-06,
"loss": 0.4328,
"mean_token_accuracy": 0.854470893740654,
"num_tokens": 368887476.0,
"step": 643
},
{
"epoch": 4.917782026768642,
"grad_norm": 0.12880758333637257,
"learning_rate": 8.586038859177891e-06,
"loss": 0.4261,
"mean_token_accuracy": 0.8556710705161095,
"num_tokens": 369463476.0,
"step": 644
},
{
"epoch": 4.925430210325048,
"grad_norm": 0.13524444500578836,
"learning_rate": 8.581858327324996e-06,
"loss": 0.4523,
"mean_token_accuracy": 0.8463720753788948,
"num_tokens": 370039476.0,
"step": 645
},
{
"epoch": 4.933078393881453,
"grad_norm": 0.13170708374690382,
"learning_rate": 8.577672645492426e-06,
"loss": 0.4198,
"mean_token_accuracy": 0.8571259826421738,
"num_tokens": 370615476.0,
"step": 646
},
{
"epoch": 4.940726577437858,
"grad_norm": 0.13590194960038368,
"learning_rate": 8.573481819698337e-06,
"loss": 0.4288,
"mean_token_accuracy": 0.8549418747425079,
"num_tokens": 371191476.0,
"step": 647
},
{
"epoch": 4.948374760994264,
"grad_norm": 0.13608499203187072,
"learning_rate": 8.569285855968278e-06,
"loss": 0.4287,
"mean_token_accuracy": 0.8553623184561729,
"num_tokens": 371761699.0,
"step": 648
},
{
"epoch": 4.95602294455067,
"grad_norm": 0.1581401895374939,
"learning_rate": 8.565084760335188e-06,
"loss": 0.4357,
"mean_token_accuracy": 0.8529161512851715,
"num_tokens": 372304065.0,
"step": 649
},
{
"epoch": 4.963671128107075,
"grad_norm": 0.13384003030736613,
"learning_rate": 8.560878538839379e-06,
"loss": 0.4169,
"mean_token_accuracy": 0.858254499733448,
"num_tokens": 372880065.0,
"step": 650
}
],
"logging_steps": 1,
"max_steps": 2620,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2.564795702483878e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}