PEFT
Safetensors
wmt25-cuni-sft / trainer_state.json
cepin's picture
Upload folder using huggingface_hub
6935ef1 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.5024967808799975,
"eval_steps": 200,
"global_step": 8000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0006281209760999969,
"grad_norm": 0.6444190740585327,
"learning_rate": 0.00019996231392500473,
"loss": 2.0338,
"mean_token_accuracy": 0.6655033957213163,
"num_tokens": 25949.0,
"step": 10
},
{
"epoch": 0.0012562419521999937,
"grad_norm": 0.6188492178916931,
"learning_rate": 0.0001999204405083433,
"loss": 1.2937,
"mean_token_accuracy": 0.7404974050819874,
"num_tokens": 52745.0,
"step": 20
},
{
"epoch": 0.0018843629282999906,
"grad_norm": 0.38000112771987915,
"learning_rate": 0.00019987856709168184,
"loss": 1.1577,
"mean_token_accuracy": 0.7421227186918259,
"num_tokens": 79263.0,
"step": 30
},
{
"epoch": 0.0025124839043999874,
"grad_norm": 0.2685104310512543,
"learning_rate": 0.00019983669367502042,
"loss": 1.1229,
"mean_token_accuracy": 0.7457756631076335,
"num_tokens": 105823.0,
"step": 40
},
{
"epoch": 0.003140604880499984,
"grad_norm": 0.272366464138031,
"learning_rate": 0.00019979482025835898,
"loss": 1.131,
"mean_token_accuracy": 0.744264229759574,
"num_tokens": 132844.0,
"step": 50
},
{
"epoch": 0.003768725856599981,
"grad_norm": 0.29806405305862427,
"learning_rate": 0.00019975294684169756,
"loss": 1.1209,
"mean_token_accuracy": 0.74511832408607,
"num_tokens": 160730.0,
"step": 60
},
{
"epoch": 0.004396846832699978,
"grad_norm": 0.21983323991298676,
"learning_rate": 0.0001997110734250361,
"loss": 1.1173,
"mean_token_accuracy": 0.7465705782175064,
"num_tokens": 186800.0,
"step": 70
},
{
"epoch": 0.005024967808799975,
"grad_norm": 0.3172134757041931,
"learning_rate": 0.0001996692000083747,
"loss": 1.065,
"mean_token_accuracy": 0.751339340209961,
"num_tokens": 214113.0,
"step": 80
},
{
"epoch": 0.005653088784899972,
"grad_norm": 0.2641281485557556,
"learning_rate": 0.00019962732659171327,
"loss": 1.0829,
"mean_token_accuracy": 0.7596591092646122,
"num_tokens": 240924.0,
"step": 90
},
{
"epoch": 0.006281209760999968,
"grad_norm": 0.2266390323638916,
"learning_rate": 0.00019958545317505183,
"loss": 1.0631,
"mean_token_accuracy": 0.7615599595010281,
"num_tokens": 268684.0,
"step": 100
},
{
"epoch": 0.006909330737099965,
"grad_norm": 0.2795678377151489,
"learning_rate": 0.0001995435797583904,
"loss": 1.0991,
"mean_token_accuracy": 0.7610994651913643,
"num_tokens": 294547.0,
"step": 110
},
{
"epoch": 0.007537451713199962,
"grad_norm": 0.3321146070957184,
"learning_rate": 0.00019950170634172896,
"loss": 1.0841,
"mean_token_accuracy": 0.7635640185326338,
"num_tokens": 321449.0,
"step": 120
},
{
"epoch": 0.008165572689299959,
"grad_norm": 0.27341774106025696,
"learning_rate": 0.00019945983292506754,
"loss": 1.0804,
"mean_token_accuracy": 0.7688221096992492,
"num_tokens": 347875.0,
"step": 130
},
{
"epoch": 0.008793693665399956,
"grad_norm": 0.2848992347717285,
"learning_rate": 0.0001994179595084061,
"loss": 1.1046,
"mean_token_accuracy": 0.7561811745166779,
"num_tokens": 376044.0,
"step": 140
},
{
"epoch": 0.009421814641499953,
"grad_norm": 0.23145896196365356,
"learning_rate": 0.00019937608609174468,
"loss": 1.0284,
"mean_token_accuracy": 0.7750971898436546,
"num_tokens": 402655.0,
"step": 150
},
{
"epoch": 0.01004993561759995,
"grad_norm": 0.2593105137348175,
"learning_rate": 0.00019933421267508323,
"loss": 1.0749,
"mean_token_accuracy": 0.7687978703528643,
"num_tokens": 428832.0,
"step": 160
},
{
"epoch": 0.010678056593699947,
"grad_norm": 0.23464348912239075,
"learning_rate": 0.00019929233925842179,
"loss": 1.0931,
"mean_token_accuracy": 0.7663742013275623,
"num_tokens": 454748.0,
"step": 170
},
{
"epoch": 0.011306177569799944,
"grad_norm": 0.26923108100891113,
"learning_rate": 0.00019925046584176037,
"loss": 1.0557,
"mean_token_accuracy": 0.768491517007351,
"num_tokens": 482138.0,
"step": 180
},
{
"epoch": 0.011934298545899941,
"grad_norm": 0.2493171989917755,
"learning_rate": 0.00019920859242509892,
"loss": 1.0172,
"mean_token_accuracy": 0.7774307206273079,
"num_tokens": 508277.0,
"step": 190
},
{
"epoch": 0.012562419521999936,
"grad_norm": 0.2916482090950012,
"learning_rate": 0.0001991667190084375,
"loss": 1.0439,
"mean_token_accuracy": 0.7681754004210234,
"num_tokens": 535580.0,
"step": 200
},
{
"epoch": 0.013190540498099933,
"grad_norm": 0.3120996654033661,
"learning_rate": 0.00019912484559177606,
"loss": 1.0623,
"mean_token_accuracy": 0.770273020491004,
"num_tokens": 562472.0,
"step": 210
},
{
"epoch": 0.01381866147419993,
"grad_norm": 0.259887158870697,
"learning_rate": 0.00019908297217511464,
"loss": 1.0382,
"mean_token_accuracy": 0.7745798997581005,
"num_tokens": 588526.0,
"step": 220
},
{
"epoch": 0.014446782450299928,
"grad_norm": 0.26071885228157043,
"learning_rate": 0.0001990410987584532,
"loss": 1.0921,
"mean_token_accuracy": 0.7673533238470555,
"num_tokens": 615067.0,
"step": 230
},
{
"epoch": 0.015074903426399925,
"grad_norm": 0.2627691328525543,
"learning_rate": 0.00019899922534179177,
"loss": 1.0575,
"mean_token_accuracy": 0.7697854313999415,
"num_tokens": 642418.0,
"step": 240
},
{
"epoch": 0.015703024402499922,
"grad_norm": 0.25009799003601074,
"learning_rate": 0.00019895735192513035,
"loss": 1.062,
"mean_token_accuracy": 0.7729831222444773,
"num_tokens": 669932.0,
"step": 250
},
{
"epoch": 0.016331145378599917,
"grad_norm": 0.25904473662376404,
"learning_rate": 0.0001989154785084689,
"loss": 1.0908,
"mean_token_accuracy": 0.7664148453623056,
"num_tokens": 697838.0,
"step": 260
},
{
"epoch": 0.016959266354699916,
"grad_norm": 0.2615382671356201,
"learning_rate": 0.0001988736050918075,
"loss": 1.0743,
"mean_token_accuracy": 0.7620799005031585,
"num_tokens": 726211.0,
"step": 270
},
{
"epoch": 0.01758738733079991,
"grad_norm": 0.2364761233329773,
"learning_rate": 0.00019883173167514604,
"loss": 1.0134,
"mean_token_accuracy": 0.7783870816230773,
"num_tokens": 753627.0,
"step": 280
},
{
"epoch": 0.01821550830689991,
"grad_norm": 0.3415350615978241,
"learning_rate": 0.00019878985825848462,
"loss": 1.037,
"mean_token_accuracy": 0.7740533579140901,
"num_tokens": 779489.0,
"step": 290
},
{
"epoch": 0.018843629282999905,
"grad_norm": 0.31381550431251526,
"learning_rate": 0.00019874798484182318,
"loss": 1.0293,
"mean_token_accuracy": 0.7746200568974018,
"num_tokens": 807064.0,
"step": 300
},
{
"epoch": 0.019471750259099904,
"grad_norm": 0.3045092523097992,
"learning_rate": 0.00019870611142516176,
"loss": 0.9941,
"mean_token_accuracy": 0.7810021504759789,
"num_tokens": 834484.0,
"step": 310
},
{
"epoch": 0.0200998712351999,
"grad_norm": 0.29344442486763,
"learning_rate": 0.0001986642380085003,
"loss": 1.0297,
"mean_token_accuracy": 0.7791498117148876,
"num_tokens": 860075.0,
"step": 320
},
{
"epoch": 0.020727992211299895,
"grad_norm": 0.377948522567749,
"learning_rate": 0.00019862236459183887,
"loss": 1.0225,
"mean_token_accuracy": 0.7731472756713629,
"num_tokens": 887521.0,
"step": 330
},
{
"epoch": 0.021356113187399894,
"grad_norm": 0.2640627324581146,
"learning_rate": 0.00019858049117517745,
"loss": 1.0703,
"mean_token_accuracy": 0.7751353096216917,
"num_tokens": 913836.0,
"step": 340
},
{
"epoch": 0.02198423416349989,
"grad_norm": 0.2751738727092743,
"learning_rate": 0.000198538617758516,
"loss": 1.0321,
"mean_token_accuracy": 0.7782084301114083,
"num_tokens": 941504.0,
"step": 350
},
{
"epoch": 0.022612355139599888,
"grad_norm": 0.28567424416542053,
"learning_rate": 0.00019849674434185458,
"loss": 1.0765,
"mean_token_accuracy": 0.7735152095556259,
"num_tokens": 966993.0,
"step": 360
},
{
"epoch": 0.023240476115699883,
"grad_norm": 0.24118006229400635,
"learning_rate": 0.00019845487092519314,
"loss": 1.0261,
"mean_token_accuracy": 0.776251096650958,
"num_tokens": 993389.0,
"step": 370
},
{
"epoch": 0.023868597091799882,
"grad_norm": 0.25114327669143677,
"learning_rate": 0.00019841299750853172,
"loss": 1.0475,
"mean_token_accuracy": 0.7716617304831743,
"num_tokens": 1020541.0,
"step": 380
},
{
"epoch": 0.024496718067899877,
"grad_norm": 0.29382482171058655,
"learning_rate": 0.0001983711240918703,
"loss": 0.9987,
"mean_token_accuracy": 0.7752502433955669,
"num_tokens": 1046501.0,
"step": 390
},
{
"epoch": 0.025124839043999873,
"grad_norm": 0.23243650794029236,
"learning_rate": 0.00019832925067520885,
"loss": 1.0581,
"mean_token_accuracy": 0.7718209594488143,
"num_tokens": 1071892.0,
"step": 400
},
{
"epoch": 0.02575296002009987,
"grad_norm": 0.2621685564517975,
"learning_rate": 0.00019828737725854743,
"loss": 1.0397,
"mean_token_accuracy": 0.7732171807438135,
"num_tokens": 1097346.0,
"step": 410
},
{
"epoch": 0.026381080996199867,
"grad_norm": 0.24452239274978638,
"learning_rate": 0.000198245503841886,
"loss": 1.0338,
"mean_token_accuracy": 0.775847963243723,
"num_tokens": 1124990.0,
"step": 420
},
{
"epoch": 0.027009201972299866,
"grad_norm": 0.2270130068063736,
"learning_rate": 0.00019820363042522457,
"loss": 1.0344,
"mean_token_accuracy": 0.7744502332061529,
"num_tokens": 1152942.0,
"step": 430
},
{
"epoch": 0.02763732294839986,
"grad_norm": 0.22909750044345856,
"learning_rate": 0.00019816175700856312,
"loss": 1.0799,
"mean_token_accuracy": 0.7713461548089982,
"num_tokens": 1178395.0,
"step": 440
},
{
"epoch": 0.02826544392449986,
"grad_norm": 0.25798511505126953,
"learning_rate": 0.0001981198835919017,
"loss": 1.0087,
"mean_token_accuracy": 0.7833342991769314,
"num_tokens": 1205452.0,
"step": 450
},
{
"epoch": 0.028893564900599855,
"grad_norm": 0.2909473478794098,
"learning_rate": 0.00019807801017524026,
"loss": 1.0093,
"mean_token_accuracy": 0.7790829930454493,
"num_tokens": 1231125.0,
"step": 460
},
{
"epoch": 0.029521685876699854,
"grad_norm": 0.2601008117198944,
"learning_rate": 0.0001980361367585788,
"loss": 1.003,
"mean_token_accuracy": 0.7839790925383567,
"num_tokens": 1258342.0,
"step": 470
},
{
"epoch": 0.03014980685279985,
"grad_norm": 0.23418252170085907,
"learning_rate": 0.0001979942633419174,
"loss": 1.012,
"mean_token_accuracy": 0.7800652399659157,
"num_tokens": 1285135.0,
"step": 480
},
{
"epoch": 0.030777927828899845,
"grad_norm": 0.23981328308582306,
"learning_rate": 0.00019795238992525595,
"loss": 1.0519,
"mean_token_accuracy": 0.7733560837805271,
"num_tokens": 1310716.0,
"step": 490
},
{
"epoch": 0.031406048804999843,
"grad_norm": 0.25937291979789734,
"learning_rate": 0.00019791051650859453,
"loss": 0.9777,
"mean_token_accuracy": 0.7844050772488117,
"num_tokens": 1338308.0,
"step": 500
},
{
"epoch": 0.03203416978109984,
"grad_norm": 0.2411338984966278,
"learning_rate": 0.00019786864309193308,
"loss": 1.0141,
"mean_token_accuracy": 0.78155472651124,
"num_tokens": 1365889.0,
"step": 510
},
{
"epoch": 0.032662290757199834,
"grad_norm": 0.24309079349040985,
"learning_rate": 0.00019782676967527166,
"loss": 1.0542,
"mean_token_accuracy": 0.7737418331205845,
"num_tokens": 1392368.0,
"step": 520
},
{
"epoch": 0.033290411733299836,
"grad_norm": 0.26009315252304077,
"learning_rate": 0.00019778489625861022,
"loss": 1.002,
"mean_token_accuracy": 0.7816768281161786,
"num_tokens": 1418557.0,
"step": 530
},
{
"epoch": 0.03391853270939983,
"grad_norm": 0.25517457723617554,
"learning_rate": 0.0001977430228419488,
"loss": 1.0178,
"mean_token_accuracy": 0.7780437018722296,
"num_tokens": 1444536.0,
"step": 540
},
{
"epoch": 0.03454665368549983,
"grad_norm": 0.2931221127510071,
"learning_rate": 0.00019770114942528738,
"loss": 1.0251,
"mean_token_accuracy": 0.7784773204475641,
"num_tokens": 1472148.0,
"step": 550
},
{
"epoch": 0.03517477466159982,
"grad_norm": 0.2510989308357239,
"learning_rate": 0.00019765927600862593,
"loss": 1.0322,
"mean_token_accuracy": 0.7775144059211015,
"num_tokens": 1497252.0,
"step": 560
},
{
"epoch": 0.03580289563769982,
"grad_norm": 0.24499671161174774,
"learning_rate": 0.00019761740259196451,
"loss": 0.9935,
"mean_token_accuracy": 0.7875256646424532,
"num_tokens": 1523531.0,
"step": 570
},
{
"epoch": 0.03643101661379982,
"grad_norm": 0.24137680232524872,
"learning_rate": 0.00019757552917530307,
"loss": 1.0253,
"mean_token_accuracy": 0.7757651243358851,
"num_tokens": 1550114.0,
"step": 580
},
{
"epoch": 0.037059137589899815,
"grad_norm": 0.2509494125843048,
"learning_rate": 0.00019753365575864165,
"loss": 1.0644,
"mean_token_accuracy": 0.7731641355901957,
"num_tokens": 1577217.0,
"step": 590
},
{
"epoch": 0.03768725856599981,
"grad_norm": 0.22997072339057922,
"learning_rate": 0.0001974917823419802,
"loss": 1.0306,
"mean_token_accuracy": 0.7778335962444544,
"num_tokens": 1605094.0,
"step": 600
},
{
"epoch": 0.038315379542099806,
"grad_norm": 0.2381758838891983,
"learning_rate": 0.00019744990892531876,
"loss": 1.0126,
"mean_token_accuracy": 0.7801825743168592,
"num_tokens": 1631692.0,
"step": 610
},
{
"epoch": 0.03894350051819981,
"grad_norm": 0.20709013938903809,
"learning_rate": 0.00019740803550865734,
"loss": 1.0068,
"mean_token_accuracy": 0.7762394435703754,
"num_tokens": 1660071.0,
"step": 620
},
{
"epoch": 0.039571621494299804,
"grad_norm": 0.2484230399131775,
"learning_rate": 0.0001973661620919959,
"loss": 1.0002,
"mean_token_accuracy": 0.7832283467054367,
"num_tokens": 1688142.0,
"step": 630
},
{
"epoch": 0.0401997424703998,
"grad_norm": 0.29590943455696106,
"learning_rate": 0.00019732428867533447,
"loss": 1.0198,
"mean_token_accuracy": 0.780064957216382,
"num_tokens": 1714501.0,
"step": 640
},
{
"epoch": 0.040827863446499794,
"grad_norm": 0.2250148206949234,
"learning_rate": 0.00019728241525867303,
"loss": 1.0231,
"mean_token_accuracy": 0.7774009238928556,
"num_tokens": 1740189.0,
"step": 650
},
{
"epoch": 0.04145598442259979,
"grad_norm": 0.2383430153131485,
"learning_rate": 0.0001972405418420116,
"loss": 1.0058,
"mean_token_accuracy": 0.7813835583627224,
"num_tokens": 1767202.0,
"step": 660
},
{
"epoch": 0.04208410539869979,
"grad_norm": 0.2511632442474365,
"learning_rate": 0.00019719866842535016,
"loss": 1.0022,
"mean_token_accuracy": 0.779718442261219,
"num_tokens": 1792213.0,
"step": 670
},
{
"epoch": 0.04271222637479979,
"grad_norm": 0.24368815124034882,
"learning_rate": 0.00019715679500868874,
"loss": 1.0201,
"mean_token_accuracy": 0.776063310727477,
"num_tokens": 1818181.0,
"step": 680
},
{
"epoch": 0.04334034735089978,
"grad_norm": 0.2473301738500595,
"learning_rate": 0.00019711492159202733,
"loss": 1.0522,
"mean_token_accuracy": 0.7746416825801135,
"num_tokens": 1844507.0,
"step": 690
},
{
"epoch": 0.04396846832699978,
"grad_norm": 0.24195240437984467,
"learning_rate": 0.00019707304817536588,
"loss": 1.0105,
"mean_token_accuracy": 0.7786858607083559,
"num_tokens": 1871297.0,
"step": 700
},
{
"epoch": 0.04459658930309978,
"grad_norm": 0.20953992009162903,
"learning_rate": 0.00019703117475870446,
"loss": 0.9984,
"mean_token_accuracy": 0.7896967530250549,
"num_tokens": 1897039.0,
"step": 710
},
{
"epoch": 0.045224710279199776,
"grad_norm": 0.24665352702140808,
"learning_rate": 0.00019698930134204301,
"loss": 1.0206,
"mean_token_accuracy": 0.7788492277264595,
"num_tokens": 1925344.0,
"step": 720
},
{
"epoch": 0.04585283125529977,
"grad_norm": 0.25865861773490906,
"learning_rate": 0.0001969474279253816,
"loss": 0.9794,
"mean_token_accuracy": 0.7853762939572334,
"num_tokens": 1952529.0,
"step": 730
},
{
"epoch": 0.046480952231399766,
"grad_norm": 0.26470091938972473,
"learning_rate": 0.00019690555450872015,
"loss": 1.0315,
"mean_token_accuracy": 0.776170663908124,
"num_tokens": 1979215.0,
"step": 740
},
{
"epoch": 0.04710907320749976,
"grad_norm": 0.24201270937919617,
"learning_rate": 0.0001968636810920587,
"loss": 1.0579,
"mean_token_accuracy": 0.7728814825415611,
"num_tokens": 2005105.0,
"step": 750
},
{
"epoch": 0.047737194183599764,
"grad_norm": 0.2657768428325653,
"learning_rate": 0.00019682180767539728,
"loss": 1.0635,
"mean_token_accuracy": 0.7685952417552471,
"num_tokens": 2031227.0,
"step": 760
},
{
"epoch": 0.04836531515969976,
"grad_norm": 0.24351350963115692,
"learning_rate": 0.00019677993425873584,
"loss": 1.0285,
"mean_token_accuracy": 0.7760228164494037,
"num_tokens": 2058110.0,
"step": 770
},
{
"epoch": 0.048993436135799755,
"grad_norm": 0.29012376070022583,
"learning_rate": 0.00019673806084207442,
"loss": 1.0441,
"mean_token_accuracy": 0.7735719617456198,
"num_tokens": 2085226.0,
"step": 780
},
{
"epoch": 0.04962155711189975,
"grad_norm": 0.3338630795478821,
"learning_rate": 0.00019669618742541297,
"loss": 1.0605,
"mean_token_accuracy": 0.7712657749652863,
"num_tokens": 2113154.0,
"step": 790
},
{
"epoch": 0.050249678087999745,
"grad_norm": 0.2387358844280243,
"learning_rate": 0.00019665431400875155,
"loss": 0.9938,
"mean_token_accuracy": 0.7843167375773191,
"num_tokens": 2139881.0,
"step": 800
},
{
"epoch": 0.05087779906409975,
"grad_norm": 0.2398860901594162,
"learning_rate": 0.0001966124405920901,
"loss": 1.0617,
"mean_token_accuracy": 0.7687791418284178,
"num_tokens": 2167003.0,
"step": 810
},
{
"epoch": 0.05150592004019974,
"grad_norm": 0.2620822489261627,
"learning_rate": 0.0001965705671754287,
"loss": 1.0088,
"mean_token_accuracy": 0.7815865609794855,
"num_tokens": 2193645.0,
"step": 820
},
{
"epoch": 0.05213404101629974,
"grad_norm": 0.26973757147789,
"learning_rate": 0.00019652869375876724,
"loss": 0.9954,
"mean_token_accuracy": 0.78002959638834,
"num_tokens": 2220126.0,
"step": 830
},
{
"epoch": 0.052762161992399734,
"grad_norm": 0.2633202075958252,
"learning_rate": 0.00019648682034210582,
"loss": 1.0282,
"mean_token_accuracy": 0.7779554452747106,
"num_tokens": 2245761.0,
"step": 840
},
{
"epoch": 0.053390282968499736,
"grad_norm": 0.22578182816505432,
"learning_rate": 0.0001964449469254444,
"loss": 1.0081,
"mean_token_accuracy": 0.7812429942190647,
"num_tokens": 2273109.0,
"step": 850
},
{
"epoch": 0.05401840394459973,
"grad_norm": 0.23409296572208405,
"learning_rate": 0.00019640307350878296,
"loss": 1.0229,
"mean_token_accuracy": 0.7737102590501308,
"num_tokens": 2301498.0,
"step": 860
},
{
"epoch": 0.05464652492069973,
"grad_norm": 0.28782615065574646,
"learning_rate": 0.00019636120009212154,
"loss": 0.9995,
"mean_token_accuracy": 0.7862726211547851,
"num_tokens": 2327875.0,
"step": 870
},
{
"epoch": 0.05527464589679972,
"grad_norm": 0.250499427318573,
"learning_rate": 0.0001963193266754601,
"loss": 1.0047,
"mean_token_accuracy": 0.7766373138874769,
"num_tokens": 2354761.0,
"step": 880
},
{
"epoch": 0.05590276687289972,
"grad_norm": 0.266989141702652,
"learning_rate": 0.00019627745325879868,
"loss": 1.0043,
"mean_token_accuracy": 0.7816190734505654,
"num_tokens": 2381600.0,
"step": 890
},
{
"epoch": 0.05653088784899972,
"grad_norm": 0.23852886259555817,
"learning_rate": 0.00019623557984213723,
"loss": 1.001,
"mean_token_accuracy": 0.7850340217351913,
"num_tokens": 2409324.0,
"step": 900
},
{
"epoch": 0.057159008825099715,
"grad_norm": 0.2646239697933197,
"learning_rate": 0.00019619370642547578,
"loss": 1.0083,
"mean_token_accuracy": 0.7854118514806032,
"num_tokens": 2434136.0,
"step": 910
},
{
"epoch": 0.05778712980119971,
"grad_norm": 0.32965826988220215,
"learning_rate": 0.00019615183300881437,
"loss": 1.0327,
"mean_token_accuracy": 0.778333380818367,
"num_tokens": 2460120.0,
"step": 920
},
{
"epoch": 0.058415250777299706,
"grad_norm": 0.2623177468776703,
"learning_rate": 0.00019610995959215292,
"loss": 0.9889,
"mean_token_accuracy": 0.7845939747989178,
"num_tokens": 2487177.0,
"step": 930
},
{
"epoch": 0.05904337175339971,
"grad_norm": 0.24970988929271698,
"learning_rate": 0.0001960680861754915,
"loss": 1.0301,
"mean_token_accuracy": 0.7751765877008439,
"num_tokens": 2513428.0,
"step": 940
},
{
"epoch": 0.0596714927294997,
"grad_norm": 0.21225541830062866,
"learning_rate": 0.00019602621275883005,
"loss": 1.0269,
"mean_token_accuracy": 0.7762201461941004,
"num_tokens": 2539126.0,
"step": 950
},
{
"epoch": 0.0602996137055997,
"grad_norm": 0.22666792571544647,
"learning_rate": 0.00019598433934216864,
"loss": 1.0081,
"mean_token_accuracy": 0.7794944658875466,
"num_tokens": 2565063.0,
"step": 960
},
{
"epoch": 0.060927734681699694,
"grad_norm": 0.263004332780838,
"learning_rate": 0.0001959424659255072,
"loss": 1.0402,
"mean_token_accuracy": 0.7806816603988409,
"num_tokens": 2591221.0,
"step": 970
},
{
"epoch": 0.06155585565779969,
"grad_norm": 0.2698504626750946,
"learning_rate": 0.00019590059250884577,
"loss": 1.008,
"mean_token_accuracy": 0.7808872204273939,
"num_tokens": 2617287.0,
"step": 980
},
{
"epoch": 0.06218397663389969,
"grad_norm": 0.23650215566158295,
"learning_rate": 0.00019585871909218435,
"loss": 0.9776,
"mean_token_accuracy": 0.7856047466397286,
"num_tokens": 2644243.0,
"step": 990
},
{
"epoch": 0.06281209760999969,
"grad_norm": 0.2340182512998581,
"learning_rate": 0.0001958168456755229,
"loss": 1.0056,
"mean_token_accuracy": 0.7824073404073715,
"num_tokens": 2670435.0,
"step": 1000
},
{
"epoch": 0.06344021858609969,
"grad_norm": 0.29255470633506775,
"learning_rate": 0.0001957749722588615,
"loss": 1.0055,
"mean_token_accuracy": 0.7834579069167376,
"num_tokens": 2697341.0,
"step": 1010
},
{
"epoch": 0.06406833956219968,
"grad_norm": 0.27291226387023926,
"learning_rate": 0.00019573309884220004,
"loss": 1.0239,
"mean_token_accuracy": 0.7748380672186613,
"num_tokens": 2724939.0,
"step": 1020
},
{
"epoch": 0.06469646053829968,
"grad_norm": 0.26702409982681274,
"learning_rate": 0.00019569122542553862,
"loss": 0.9629,
"mean_token_accuracy": 0.7890582896769047,
"num_tokens": 2751944.0,
"step": 1030
},
{
"epoch": 0.06532458151439967,
"grad_norm": 0.34549906849861145,
"learning_rate": 0.00019564935200887718,
"loss": 0.9939,
"mean_token_accuracy": 0.7854917496442795,
"num_tokens": 2778069.0,
"step": 1040
},
{
"epoch": 0.06595270249049967,
"grad_norm": 0.29321590065956116,
"learning_rate": 0.00019560747859221573,
"loss": 0.9785,
"mean_token_accuracy": 0.781370873004198,
"num_tokens": 2805448.0,
"step": 1050
},
{
"epoch": 0.06658082346659967,
"grad_norm": 0.27075111865997314,
"learning_rate": 0.0001955656051755543,
"loss": 1.0026,
"mean_token_accuracy": 0.7849268738180399,
"num_tokens": 2832499.0,
"step": 1060
},
{
"epoch": 0.06720894444269966,
"grad_norm": 0.2806377410888672,
"learning_rate": 0.00019552373175889286,
"loss": 1.0041,
"mean_token_accuracy": 0.7824723150581121,
"num_tokens": 2859168.0,
"step": 1070
},
{
"epoch": 0.06783706541879966,
"grad_norm": 0.2728383541107178,
"learning_rate": 0.00019548185834223145,
"loss": 1.0094,
"mean_token_accuracy": 0.7800588298588991,
"num_tokens": 2884736.0,
"step": 1080
},
{
"epoch": 0.06846518639489965,
"grad_norm": 0.27712950110435486,
"learning_rate": 0.00019543998492557,
"loss": 0.9936,
"mean_token_accuracy": 0.7834884870797396,
"num_tokens": 2912858.0,
"step": 1090
},
{
"epoch": 0.06909330737099965,
"grad_norm": 0.228750541806221,
"learning_rate": 0.00019539811150890858,
"loss": 1.0505,
"mean_token_accuracy": 0.7716195099055767,
"num_tokens": 2938752.0,
"step": 1100
},
{
"epoch": 0.06972142834709966,
"grad_norm": 0.29332438111305237,
"learning_rate": 0.00019535623809224714,
"loss": 1.0268,
"mean_token_accuracy": 0.7755599562078714,
"num_tokens": 2965362.0,
"step": 1110
},
{
"epoch": 0.07034954932319964,
"grad_norm": 0.2691513001918793,
"learning_rate": 0.00019531436467558572,
"loss": 0.9991,
"mean_token_accuracy": 0.7823032017797231,
"num_tokens": 2993187.0,
"step": 1120
},
{
"epoch": 0.07097767029929965,
"grad_norm": 0.2795611321926117,
"learning_rate": 0.0001952724912589243,
"loss": 0.9933,
"mean_token_accuracy": 0.7823376722633839,
"num_tokens": 3019494.0,
"step": 1130
},
{
"epoch": 0.07160579127539964,
"grad_norm": 0.2678844928741455,
"learning_rate": 0.00019523061784226285,
"loss": 0.996,
"mean_token_accuracy": 0.7836552064865827,
"num_tokens": 3047562.0,
"step": 1140
},
{
"epoch": 0.07223391225149964,
"grad_norm": 0.24954286217689514,
"learning_rate": 0.00019518874442560143,
"loss": 1.0048,
"mean_token_accuracy": 0.7796858191490174,
"num_tokens": 3074447.0,
"step": 1150
},
{
"epoch": 0.07286203322759964,
"grad_norm": 0.2411104440689087,
"learning_rate": 0.00019514687100893999,
"loss": 0.9947,
"mean_token_accuracy": 0.7852891199290752,
"num_tokens": 3101800.0,
"step": 1160
},
{
"epoch": 0.07349015420369963,
"grad_norm": 0.26690879464149475,
"learning_rate": 0.00019510499759227857,
"loss": 0.9629,
"mean_token_accuracy": 0.7877023875713348,
"num_tokens": 3129214.0,
"step": 1170
},
{
"epoch": 0.07411827517979963,
"grad_norm": 0.2763614058494568,
"learning_rate": 0.00019506312417561712,
"loss": 1.0005,
"mean_token_accuracy": 0.7793677289038896,
"num_tokens": 3156811.0,
"step": 1180
},
{
"epoch": 0.07474639615589963,
"grad_norm": 0.28668391704559326,
"learning_rate": 0.00019502125075895568,
"loss": 1.0135,
"mean_token_accuracy": 0.7761840496212244,
"num_tokens": 3183787.0,
"step": 1190
},
{
"epoch": 0.07537451713199962,
"grad_norm": 0.32997289299964905,
"learning_rate": 0.00019497937734229426,
"loss": 1.0379,
"mean_token_accuracy": 0.7786177668720484,
"num_tokens": 3209022.0,
"step": 1200
},
{
"epoch": 0.07600263810809962,
"grad_norm": 0.30645307898521423,
"learning_rate": 0.0001949375039256328,
"loss": 1.0069,
"mean_token_accuracy": 0.7809740912169219,
"num_tokens": 3237283.0,
"step": 1210
},
{
"epoch": 0.07663075908419961,
"grad_norm": 0.25995928049087524,
"learning_rate": 0.0001948956305089714,
"loss": 1.0102,
"mean_token_accuracy": 0.7820904731750489,
"num_tokens": 3262891.0,
"step": 1220
},
{
"epoch": 0.07725888006029961,
"grad_norm": 0.2744593024253845,
"learning_rate": 0.00019485375709230995,
"loss": 0.9766,
"mean_token_accuracy": 0.7883562445640564,
"num_tokens": 3290445.0,
"step": 1230
},
{
"epoch": 0.07788700103639962,
"grad_norm": 0.28681257367134094,
"learning_rate": 0.00019481188367564853,
"loss": 1.0141,
"mean_token_accuracy": 0.7812411531805992,
"num_tokens": 3316283.0,
"step": 1240
},
{
"epoch": 0.0785151220124996,
"grad_norm": 0.2726808190345764,
"learning_rate": 0.00019477001025898708,
"loss": 1.0456,
"mean_token_accuracy": 0.7727174177765846,
"num_tokens": 3344457.0,
"step": 1250
},
{
"epoch": 0.07914324298859961,
"grad_norm": 0.22378629446029663,
"learning_rate": 0.00019472813684232563,
"loss": 1.057,
"mean_token_accuracy": 0.77296442091465,
"num_tokens": 3371868.0,
"step": 1260
},
{
"epoch": 0.0797713639646996,
"grad_norm": 0.2829609513282776,
"learning_rate": 0.00019468626342566422,
"loss": 0.9634,
"mean_token_accuracy": 0.7868273138999939,
"num_tokens": 3399400.0,
"step": 1270
},
{
"epoch": 0.0803994849407996,
"grad_norm": 0.2706160247325897,
"learning_rate": 0.0001946443900090028,
"loss": 0.9711,
"mean_token_accuracy": 0.7930981118232012,
"num_tokens": 3424763.0,
"step": 1280
},
{
"epoch": 0.0810276059168996,
"grad_norm": 0.27218466997146606,
"learning_rate": 0.00019460251659234138,
"loss": 0.9635,
"mean_token_accuracy": 0.7916820932179689,
"num_tokens": 3451380.0,
"step": 1290
},
{
"epoch": 0.08165572689299959,
"grad_norm": 0.2326808124780655,
"learning_rate": 0.00019456064317567993,
"loss": 0.9921,
"mean_token_accuracy": 0.7916632521897554,
"num_tokens": 3477301.0,
"step": 1300
},
{
"epoch": 0.08228384786909959,
"grad_norm": 0.3434126079082489,
"learning_rate": 0.0001945187697590185,
"loss": 0.9911,
"mean_token_accuracy": 0.788581146299839,
"num_tokens": 3504062.0,
"step": 1310
},
{
"epoch": 0.08291196884519958,
"grad_norm": 0.24641671776771545,
"learning_rate": 0.00019447689634235707,
"loss": 1.0139,
"mean_token_accuracy": 0.7832327298820019,
"num_tokens": 3530414.0,
"step": 1320
},
{
"epoch": 0.08354008982129958,
"grad_norm": 0.28022703528404236,
"learning_rate": 0.00019443502292569565,
"loss": 1.0172,
"mean_token_accuracy": 0.7799417782574892,
"num_tokens": 3557839.0,
"step": 1330
},
{
"epoch": 0.08416821079739958,
"grad_norm": 0.2621849477291107,
"learning_rate": 0.0001943931495090342,
"loss": 0.9954,
"mean_token_accuracy": 0.7841709833592176,
"num_tokens": 3584652.0,
"step": 1340
},
{
"epoch": 0.08479633177349957,
"grad_norm": 0.2990066111087799,
"learning_rate": 0.00019435127609237276,
"loss": 0.9938,
"mean_token_accuracy": 0.784092029184103,
"num_tokens": 3611525.0,
"step": 1350
},
{
"epoch": 0.08542445274959957,
"grad_norm": 0.28870198130607605,
"learning_rate": 0.00019430940267571134,
"loss": 0.9715,
"mean_token_accuracy": 0.788496358320117,
"num_tokens": 3638315.0,
"step": 1360
},
{
"epoch": 0.08605257372569956,
"grad_norm": 0.2864430546760559,
"learning_rate": 0.0001942675292590499,
"loss": 0.9974,
"mean_token_accuracy": 0.7812655068933964,
"num_tokens": 3665550.0,
"step": 1370
},
{
"epoch": 0.08668069470179957,
"grad_norm": 0.2753150165081024,
"learning_rate": 0.00019422565584238847,
"loss": 1.0013,
"mean_token_accuracy": 0.7857158094644546,
"num_tokens": 3692171.0,
"step": 1380
},
{
"epoch": 0.08730881567789957,
"grad_norm": 0.29843252897262573,
"learning_rate": 0.00019418378242572703,
"loss": 0.9665,
"mean_token_accuracy": 0.7897020474076271,
"num_tokens": 3720109.0,
"step": 1390
},
{
"epoch": 0.08793693665399956,
"grad_norm": 0.2527588903903961,
"learning_rate": 0.0001941419090090656,
"loss": 0.9981,
"mean_token_accuracy": 0.7841325510293246,
"num_tokens": 3746495.0,
"step": 1400
},
{
"epoch": 0.08856505763009956,
"grad_norm": 0.23994463682174683,
"learning_rate": 0.00019410003559240416,
"loss": 1.0203,
"mean_token_accuracy": 0.7768059551715851,
"num_tokens": 3773355.0,
"step": 1410
},
{
"epoch": 0.08919317860619956,
"grad_norm": 0.2996773421764374,
"learning_rate": 0.00019405816217574274,
"loss": 0.9721,
"mean_token_accuracy": 0.7872179444879294,
"num_tokens": 3800935.0,
"step": 1420
},
{
"epoch": 0.08982129958229955,
"grad_norm": 0.2415090799331665,
"learning_rate": 0.00019401628875908132,
"loss": 1.0031,
"mean_token_accuracy": 0.7829441606998444,
"num_tokens": 3828006.0,
"step": 1430
},
{
"epoch": 0.09044942055839955,
"grad_norm": 0.23195892572402954,
"learning_rate": 0.00019397441534241988,
"loss": 0.9699,
"mean_token_accuracy": 0.7887255474925041,
"num_tokens": 3855488.0,
"step": 1440
},
{
"epoch": 0.09107754153449954,
"grad_norm": 0.2979389429092407,
"learning_rate": 0.00019393254192575846,
"loss": 1.0531,
"mean_token_accuracy": 0.77476004101336,
"num_tokens": 3881914.0,
"step": 1450
},
{
"epoch": 0.09170566251059954,
"grad_norm": 0.23635777831077576,
"learning_rate": 0.000193890668509097,
"loss": 1.0105,
"mean_token_accuracy": 0.7829858396202326,
"num_tokens": 3908974.0,
"step": 1460
},
{
"epoch": 0.09233378348669954,
"grad_norm": 0.2289458066225052,
"learning_rate": 0.0001938487950924356,
"loss": 1.023,
"mean_token_accuracy": 0.7806610990315676,
"num_tokens": 3936336.0,
"step": 1470
},
{
"epoch": 0.09296190446279953,
"grad_norm": 0.24525542557239532,
"learning_rate": 0.00019380692167577415,
"loss": 1.0107,
"mean_token_accuracy": 0.7814721431583166,
"num_tokens": 3962693.0,
"step": 1480
},
{
"epoch": 0.09359002543889954,
"grad_norm": 0.2598733603954315,
"learning_rate": 0.0001937650482591127,
"loss": 0.9717,
"mean_token_accuracy": 0.7862234275788069,
"num_tokens": 3988713.0,
"step": 1490
},
{
"epoch": 0.09421814641499952,
"grad_norm": 0.23323708772659302,
"learning_rate": 0.00019372317484245128,
"loss": 1.0059,
"mean_token_accuracy": 0.782355098053813,
"num_tokens": 4016895.0,
"step": 1500
},
{
"epoch": 0.09484626739109953,
"grad_norm": 0.24204787611961365,
"learning_rate": 0.00019368130142578984,
"loss": 0.9812,
"mean_token_accuracy": 0.787716443836689,
"num_tokens": 4043591.0,
"step": 1510
},
{
"epoch": 0.09547438836719953,
"grad_norm": 0.26067280769348145,
"learning_rate": 0.00019363942800912842,
"loss": 0.996,
"mean_token_accuracy": 0.7856833711266518,
"num_tokens": 4071338.0,
"step": 1520
},
{
"epoch": 0.09610250934329952,
"grad_norm": 0.3182675540447235,
"learning_rate": 0.00019359755459246697,
"loss": 0.9715,
"mean_token_accuracy": 0.7866728454828262,
"num_tokens": 4098744.0,
"step": 1530
},
{
"epoch": 0.09673063031939952,
"grad_norm": 0.3301153779029846,
"learning_rate": 0.00019355568117580555,
"loss": 1.0048,
"mean_token_accuracy": 0.7794850755482912,
"num_tokens": 4127367.0,
"step": 1540
},
{
"epoch": 0.09735875129549951,
"grad_norm": 0.26787373423576355,
"learning_rate": 0.0001935138077591441,
"loss": 1.0076,
"mean_token_accuracy": 0.7816127564758062,
"num_tokens": 4154162.0,
"step": 1550
},
{
"epoch": 0.09798687227159951,
"grad_norm": 0.30029037594795227,
"learning_rate": 0.0001934719343424827,
"loss": 0.9324,
"mean_token_accuracy": 0.7917449362576008,
"num_tokens": 4182109.0,
"step": 1560
},
{
"epoch": 0.09861499324769951,
"grad_norm": 0.24414442479610443,
"learning_rate": 0.00019343006092582124,
"loss": 1.0249,
"mean_token_accuracy": 0.7818967200815677,
"num_tokens": 4209142.0,
"step": 1570
},
{
"epoch": 0.0992431142237995,
"grad_norm": 0.26703324913978577,
"learning_rate": 0.00019338818750915982,
"loss": 1.0002,
"mean_token_accuracy": 0.7795033905655145,
"num_tokens": 4235912.0,
"step": 1580
},
{
"epoch": 0.0998712351998995,
"grad_norm": 0.28080078959465027,
"learning_rate": 0.0001933463140924984,
"loss": 0.9852,
"mean_token_accuracy": 0.786427366361022,
"num_tokens": 4262060.0,
"step": 1590
},
{
"epoch": 0.10049935617599949,
"grad_norm": 0.26151251792907715,
"learning_rate": 0.00019330444067583696,
"loss": 0.9857,
"mean_token_accuracy": 0.7810143373906613,
"num_tokens": 4289261.0,
"step": 1600
},
{
"epoch": 0.1011274771520995,
"grad_norm": 0.2997615933418274,
"learning_rate": 0.00019326256725917554,
"loss": 0.9968,
"mean_token_accuracy": 0.7858185056596995,
"num_tokens": 4316403.0,
"step": 1610
},
{
"epoch": 0.1017555981281995,
"grad_norm": 0.2625775635242462,
"learning_rate": 0.0001932206938425141,
"loss": 0.97,
"mean_token_accuracy": 0.7898166347295046,
"num_tokens": 4342577.0,
"step": 1620
},
{
"epoch": 0.10238371910429948,
"grad_norm": 0.3279540538787842,
"learning_rate": 0.00019317882042585265,
"loss": 0.9943,
"mean_token_accuracy": 0.7791620220988988,
"num_tokens": 4369956.0,
"step": 1630
},
{
"epoch": 0.10301184008039949,
"grad_norm": 0.25715646147727966,
"learning_rate": 0.00019313694700919123,
"loss": 1.0133,
"mean_token_accuracy": 0.7784452803432942,
"num_tokens": 4396863.0,
"step": 1640
},
{
"epoch": 0.10363996105649949,
"grad_norm": 0.28638651967048645,
"learning_rate": 0.00019309507359252978,
"loss": 0.9625,
"mean_token_accuracy": 0.7899250488728284,
"num_tokens": 4423603.0,
"step": 1650
},
{
"epoch": 0.10426808203259948,
"grad_norm": 0.29546940326690674,
"learning_rate": 0.00019305320017586836,
"loss": 0.979,
"mean_token_accuracy": 0.7873238857835532,
"num_tokens": 4450002.0,
"step": 1660
},
{
"epoch": 0.10489620300869948,
"grad_norm": 0.3040964901447296,
"learning_rate": 0.00019301132675920692,
"loss": 0.9833,
"mean_token_accuracy": 0.7876588020473718,
"num_tokens": 4477401.0,
"step": 1670
},
{
"epoch": 0.10552432398479947,
"grad_norm": 0.32785487174987793,
"learning_rate": 0.0001929694533425455,
"loss": 0.996,
"mean_token_accuracy": 0.7845009371638298,
"num_tokens": 4503456.0,
"step": 1680
},
{
"epoch": 0.10615244496089947,
"grad_norm": 0.28163284063339233,
"learning_rate": 0.00019292757992588405,
"loss": 1.0135,
"mean_token_accuracy": 0.7795811247080564,
"num_tokens": 4529752.0,
"step": 1690
},
{
"epoch": 0.10678056593699947,
"grad_norm": 0.28460705280303955,
"learning_rate": 0.0001928857065092226,
"loss": 0.9923,
"mean_token_accuracy": 0.7835657082498073,
"num_tokens": 4557856.0,
"step": 1700
},
{
"epoch": 0.10740868691309946,
"grad_norm": 0.2662385106086731,
"learning_rate": 0.0001928438330925612,
"loss": 0.9934,
"mean_token_accuracy": 0.7865429297089577,
"num_tokens": 4583859.0,
"step": 1710
},
{
"epoch": 0.10803680788919946,
"grad_norm": 0.2522580325603485,
"learning_rate": 0.00019280195967589977,
"loss": 1.0404,
"mean_token_accuracy": 0.7729208268225193,
"num_tokens": 4611276.0,
"step": 1720
},
{
"epoch": 0.10866492886529945,
"grad_norm": 0.2595483958721161,
"learning_rate": 0.00019276008625923835,
"loss": 1.0,
"mean_token_accuracy": 0.7807778958231211,
"num_tokens": 4637120.0,
"step": 1730
},
{
"epoch": 0.10929304984139945,
"grad_norm": 0.26809316873550415,
"learning_rate": 0.0001927182128425769,
"loss": 1.0054,
"mean_token_accuracy": 0.785046449303627,
"num_tokens": 4663842.0,
"step": 1740
},
{
"epoch": 0.10992117081749946,
"grad_norm": 0.2990230917930603,
"learning_rate": 0.00019267633942591548,
"loss": 0.9824,
"mean_token_accuracy": 0.7885061156004667,
"num_tokens": 4689968.0,
"step": 1750
},
{
"epoch": 0.11054929179359944,
"grad_norm": 0.3112734854221344,
"learning_rate": 0.00019263446600925404,
"loss": 0.9684,
"mean_token_accuracy": 0.7874101549386978,
"num_tokens": 4716585.0,
"step": 1760
},
{
"epoch": 0.11117741276969945,
"grad_norm": 0.30101364850997925,
"learning_rate": 0.0001925925925925926,
"loss": 1.0086,
"mean_token_accuracy": 0.7845278918743134,
"num_tokens": 4742455.0,
"step": 1770
},
{
"epoch": 0.11180553374579943,
"grad_norm": 0.25282517075538635,
"learning_rate": 0.00019255071917593117,
"loss": 1.0152,
"mean_token_accuracy": 0.779124328121543,
"num_tokens": 4768991.0,
"step": 1780
},
{
"epoch": 0.11243365472189944,
"grad_norm": 0.29368168115615845,
"learning_rate": 0.00019250884575926973,
"loss": 1.0208,
"mean_token_accuracy": 0.7812142610549927,
"num_tokens": 4794582.0,
"step": 1790
},
{
"epoch": 0.11306177569799944,
"grad_norm": 0.278226763010025,
"learning_rate": 0.0001924669723426083,
"loss": 0.9807,
"mean_token_accuracy": 0.7866754315793514,
"num_tokens": 4821482.0,
"step": 1800
},
{
"epoch": 0.11368989667409943,
"grad_norm": 0.26571571826934814,
"learning_rate": 0.00019242509892594686,
"loss": 0.9815,
"mean_token_accuracy": 0.7797151349484921,
"num_tokens": 4849134.0,
"step": 1810
},
{
"epoch": 0.11431801765019943,
"grad_norm": 0.31266430020332336,
"learning_rate": 0.00019238322550928544,
"loss": 0.9634,
"mean_token_accuracy": 0.7930579505860805,
"num_tokens": 4875237.0,
"step": 1820
},
{
"epoch": 0.11494613862629942,
"grad_norm": 0.28882619738578796,
"learning_rate": 0.000192341352092624,
"loss": 1.0578,
"mean_token_accuracy": 0.7782481156289578,
"num_tokens": 4901212.0,
"step": 1830
},
{
"epoch": 0.11557425960239942,
"grad_norm": 0.29485785961151123,
"learning_rate": 0.00019229947867596255,
"loss": 1.014,
"mean_token_accuracy": 0.7824111267924309,
"num_tokens": 4928104.0,
"step": 1840
},
{
"epoch": 0.11620238057849942,
"grad_norm": 0.2997286021709442,
"learning_rate": 0.00019225760525930113,
"loss": 0.9971,
"mean_token_accuracy": 0.7847440119832754,
"num_tokens": 4954454.0,
"step": 1850
},
{
"epoch": 0.11683050155459941,
"grad_norm": 0.3043171167373657,
"learning_rate": 0.00019221573184263971,
"loss": 1.0072,
"mean_token_accuracy": 0.7804120637476444,
"num_tokens": 4981254.0,
"step": 1860
},
{
"epoch": 0.11745862253069941,
"grad_norm": 0.29763031005859375,
"learning_rate": 0.00019217385842597827,
"loss": 0.953,
"mean_token_accuracy": 0.7923042386770248,
"num_tokens": 5007868.0,
"step": 1870
},
{
"epoch": 0.11808674350679942,
"grad_norm": 0.259555459022522,
"learning_rate": 0.00019213198500931685,
"loss": 0.9678,
"mean_token_accuracy": 0.7879516039043665,
"num_tokens": 5034353.0,
"step": 1880
},
{
"epoch": 0.1187148644828994,
"grad_norm": 0.24496302008628845,
"learning_rate": 0.00019209011159265543,
"loss": 1.0191,
"mean_token_accuracy": 0.7830112751573324,
"num_tokens": 5060668.0,
"step": 1890
},
{
"epoch": 0.1193429854589994,
"grad_norm": 0.25683844089508057,
"learning_rate": 0.00019204823817599398,
"loss": 1.0245,
"mean_token_accuracy": 0.7791640534996986,
"num_tokens": 5087233.0,
"step": 1900
},
{
"epoch": 0.1199711064350994,
"grad_norm": 0.26764971017837524,
"learning_rate": 0.00019200636475933257,
"loss": 0.9839,
"mean_token_accuracy": 0.7851655505597591,
"num_tokens": 5112981.0,
"step": 1910
},
{
"epoch": 0.1205992274111994,
"grad_norm": 0.2569602429866791,
"learning_rate": 0.00019196449134267112,
"loss": 0.982,
"mean_token_accuracy": 0.7832812406122684,
"num_tokens": 5139753.0,
"step": 1920
},
{
"epoch": 0.1212273483872994,
"grad_norm": 0.3184845745563507,
"learning_rate": 0.00019192261792600967,
"loss": 0.9749,
"mean_token_accuracy": 0.795413101837039,
"num_tokens": 5164964.0,
"step": 1930
},
{
"epoch": 0.12185546936339939,
"grad_norm": 0.27534252405166626,
"learning_rate": 0.00019188074450934825,
"loss": 0.9781,
"mean_token_accuracy": 0.7883633185178042,
"num_tokens": 5191637.0,
"step": 1940
},
{
"epoch": 0.12248359033949939,
"grad_norm": 0.32787275314331055,
"learning_rate": 0.0001918388710926868,
"loss": 0.9992,
"mean_token_accuracy": 0.782159774377942,
"num_tokens": 5218067.0,
"step": 1950
},
{
"epoch": 0.12311171131559938,
"grad_norm": 0.3248906433582306,
"learning_rate": 0.0001917969976760254,
"loss": 1.0157,
"mean_token_accuracy": 0.7802824929356575,
"num_tokens": 5243981.0,
"step": 1960
},
{
"epoch": 0.12373983229169938,
"grad_norm": 0.2404022514820099,
"learning_rate": 0.00019175512425936394,
"loss": 0.9981,
"mean_token_accuracy": 0.7823897533118724,
"num_tokens": 5271897.0,
"step": 1970
},
{
"epoch": 0.12436795326779938,
"grad_norm": 0.26277250051498413,
"learning_rate": 0.00019171325084270252,
"loss": 0.9313,
"mean_token_accuracy": 0.796189296618104,
"num_tokens": 5298642.0,
"step": 1980
},
{
"epoch": 0.12499607424389937,
"grad_norm": 0.24106673896312714,
"learning_rate": 0.00019167137742604108,
"loss": 1.0172,
"mean_token_accuracy": 0.7824347522109747,
"num_tokens": 5324815.0,
"step": 1990
},
{
"epoch": 0.12562419521999937,
"grad_norm": 0.25554001331329346,
"learning_rate": 0.00019162950400937963,
"loss": 0.958,
"mean_token_accuracy": 0.7892453044652938,
"num_tokens": 5352563.0,
"step": 2000
},
{
"epoch": 0.12625231619609936,
"grad_norm": 0.24342681467533112,
"learning_rate": 0.00019158763059271821,
"loss": 0.9531,
"mean_token_accuracy": 0.7900742635130882,
"num_tokens": 5380459.0,
"step": 2010
},
{
"epoch": 0.12688043717219938,
"grad_norm": 0.2770666182041168,
"learning_rate": 0.0001915457571760568,
"loss": 0.9917,
"mean_token_accuracy": 0.7898676563054323,
"num_tokens": 5406719.0,
"step": 2020
},
{
"epoch": 0.12750855814829937,
"grad_norm": 0.2893310785293579,
"learning_rate": 0.00019150388375939538,
"loss": 0.9453,
"mean_token_accuracy": 0.795016722008586,
"num_tokens": 5433643.0,
"step": 2030
},
{
"epoch": 0.12813667912439936,
"grad_norm": 0.30994004011154175,
"learning_rate": 0.00019146201034273393,
"loss": 0.9609,
"mean_token_accuracy": 0.7939148671925068,
"num_tokens": 5460355.0,
"step": 2040
},
{
"epoch": 0.12876480010049934,
"grad_norm": 0.27722305059432983,
"learning_rate": 0.0001914201369260725,
"loss": 0.9714,
"mean_token_accuracy": 0.7912579335272312,
"num_tokens": 5487760.0,
"step": 2050
},
{
"epoch": 0.12939292107659936,
"grad_norm": 0.28644341230392456,
"learning_rate": 0.00019137826350941107,
"loss": 0.9774,
"mean_token_accuracy": 0.7874169372022152,
"num_tokens": 5514206.0,
"step": 2060
},
{
"epoch": 0.13002104205269935,
"grad_norm": 0.2941623032093048,
"learning_rate": 0.00019133639009274962,
"loss": 1.0533,
"mean_token_accuracy": 0.7767478346824646,
"num_tokens": 5540555.0,
"step": 2070
},
{
"epoch": 0.13064916302879934,
"grad_norm": 0.31064921617507935,
"learning_rate": 0.0001912945166760882,
"loss": 1.0319,
"mean_token_accuracy": 0.7783826030790806,
"num_tokens": 5566402.0,
"step": 2080
},
{
"epoch": 0.13127728400489935,
"grad_norm": 0.28749072551727295,
"learning_rate": 0.00019125264325942675,
"loss": 1.0165,
"mean_token_accuracy": 0.7805188350379467,
"num_tokens": 5593168.0,
"step": 2090
},
{
"epoch": 0.13190540498099934,
"grad_norm": 0.28191903233528137,
"learning_rate": 0.00019121076984276534,
"loss": 0.9736,
"mean_token_accuracy": 0.7900103904306889,
"num_tokens": 5619001.0,
"step": 2100
},
{
"epoch": 0.13253352595709933,
"grad_norm": 0.2756195664405823,
"learning_rate": 0.0001911688964261039,
"loss": 1.0194,
"mean_token_accuracy": 0.7771706860512495,
"num_tokens": 5645679.0,
"step": 2110
},
{
"epoch": 0.13316164693319935,
"grad_norm": 0.25393643975257874,
"learning_rate": 0.00019112702300944247,
"loss": 0.9981,
"mean_token_accuracy": 0.7815123125910759,
"num_tokens": 5672282.0,
"step": 2120
},
{
"epoch": 0.13378976790929933,
"grad_norm": 0.2670615315437317,
"learning_rate": 0.00019108514959278102,
"loss": 0.993,
"mean_token_accuracy": 0.7842022236436605,
"num_tokens": 5699662.0,
"step": 2130
},
{
"epoch": 0.13441788888539932,
"grad_norm": 0.2826876640319824,
"learning_rate": 0.00019104327617611958,
"loss": 0.9995,
"mean_token_accuracy": 0.7823238395154476,
"num_tokens": 5726105.0,
"step": 2140
},
{
"epoch": 0.1350460098614993,
"grad_norm": 0.2938212454319,
"learning_rate": 0.00019100140275945816,
"loss": 1.004,
"mean_token_accuracy": 0.7877405568957329,
"num_tokens": 5752171.0,
"step": 2150
},
{
"epoch": 0.13567413083759933,
"grad_norm": 0.3114703297615051,
"learning_rate": 0.00019095952934279674,
"loss": 0.9896,
"mean_token_accuracy": 0.7846675843000412,
"num_tokens": 5779452.0,
"step": 2160
},
{
"epoch": 0.13630225181369932,
"grad_norm": 0.3218187391757965,
"learning_rate": 0.00019091765592613532,
"loss": 0.9587,
"mean_token_accuracy": 0.7931745149195194,
"num_tokens": 5805682.0,
"step": 2170
},
{
"epoch": 0.1369303727897993,
"grad_norm": 0.2846287190914154,
"learning_rate": 0.00019087578250947388,
"loss": 0.9928,
"mean_token_accuracy": 0.7823387812823057,
"num_tokens": 5832859.0,
"step": 2180
},
{
"epoch": 0.13755849376589932,
"grad_norm": 0.3179105520248413,
"learning_rate": 0.00019083390909281246,
"loss": 0.9854,
"mean_token_accuracy": 0.7867173902690411,
"num_tokens": 5859708.0,
"step": 2190
},
{
"epoch": 0.1381866147419993,
"grad_norm": 0.25780409574508667,
"learning_rate": 0.000190792035676151,
"loss": 0.9903,
"mean_token_accuracy": 0.7886831004172563,
"num_tokens": 5885166.0,
"step": 2200
},
{
"epoch": 0.1388147357180993,
"grad_norm": 0.27167221903800964,
"learning_rate": 0.00019075016225948956,
"loss": 0.9992,
"mean_token_accuracy": 0.7835381802171468,
"num_tokens": 5912676.0,
"step": 2210
},
{
"epoch": 0.1394428566941993,
"grad_norm": 0.28806015849113464,
"learning_rate": 0.00019070828884282815,
"loss": 0.9932,
"mean_token_accuracy": 0.7843023527413606,
"num_tokens": 5939655.0,
"step": 2220
},
{
"epoch": 0.1400709776702993,
"grad_norm": 0.26339516043663025,
"learning_rate": 0.0001906664154261667,
"loss": 0.9745,
"mean_token_accuracy": 0.7840303633362055,
"num_tokens": 5966542.0,
"step": 2230
},
{
"epoch": 0.1406990986463993,
"grad_norm": 0.289928138256073,
"learning_rate": 0.00019062454200950528,
"loss": 0.975,
"mean_token_accuracy": 0.7867904342710972,
"num_tokens": 5992575.0,
"step": 2240
},
{
"epoch": 0.1413272196224993,
"grad_norm": 0.24072448909282684,
"learning_rate": 0.00019058266859284383,
"loss": 0.9355,
"mean_token_accuracy": 0.7977154236286879,
"num_tokens": 6018768.0,
"step": 2250
},
{
"epoch": 0.1419553405985993,
"grad_norm": 0.2968997359275818,
"learning_rate": 0.00019054079517618242,
"loss": 1.011,
"mean_token_accuracy": 0.7791931878775358,
"num_tokens": 6046493.0,
"step": 2260
},
{
"epoch": 0.14258346157469928,
"grad_norm": 0.307750403881073,
"learning_rate": 0.00019049892175952097,
"loss": 0.9871,
"mean_token_accuracy": 0.7865296632051468,
"num_tokens": 6073460.0,
"step": 2270
},
{
"epoch": 0.14321158255079927,
"grad_norm": 0.24764509499073029,
"learning_rate": 0.00019045704834285952,
"loss": 0.9345,
"mean_token_accuracy": 0.7938060730695724,
"num_tokens": 6099802.0,
"step": 2280
},
{
"epoch": 0.1438397035268993,
"grad_norm": 0.26876288652420044,
"learning_rate": 0.0001904151749261981,
"loss": 0.9601,
"mean_token_accuracy": 0.7912701655179262,
"num_tokens": 6126401.0,
"step": 2290
},
{
"epoch": 0.14446782450299928,
"grad_norm": 0.25304463505744934,
"learning_rate": 0.00019037330150953666,
"loss": 0.9968,
"mean_token_accuracy": 0.7839574735611677,
"num_tokens": 6154217.0,
"step": 2300
},
{
"epoch": 0.14509594547909926,
"grad_norm": 0.3394694924354553,
"learning_rate": 0.00019033142809287524,
"loss": 0.9873,
"mean_token_accuracy": 0.7853979174047708,
"num_tokens": 6181392.0,
"step": 2310
},
{
"epoch": 0.14572406645519928,
"grad_norm": 0.244957834482193,
"learning_rate": 0.00019028955467621382,
"loss": 1.0261,
"mean_token_accuracy": 0.777070652320981,
"num_tokens": 6209710.0,
"step": 2320
},
{
"epoch": 0.14635218743129927,
"grad_norm": 0.2903886139392853,
"learning_rate": 0.0001902476812595524,
"loss": 0.9856,
"mean_token_accuracy": 0.7820085968822241,
"num_tokens": 6236756.0,
"step": 2330
},
{
"epoch": 0.14698030840739926,
"grad_norm": 0.2940092384815216,
"learning_rate": 0.00019020580784289096,
"loss": 1.0119,
"mean_token_accuracy": 0.782718800008297,
"num_tokens": 6263676.0,
"step": 2340
},
{
"epoch": 0.14760842938349927,
"grad_norm": 0.3023865222930908,
"learning_rate": 0.0001901639344262295,
"loss": 0.9761,
"mean_token_accuracy": 0.7865527033805847,
"num_tokens": 6290468.0,
"step": 2350
},
{
"epoch": 0.14823655035959926,
"grad_norm": 0.3264501392841339,
"learning_rate": 0.0001901220610095681,
"loss": 0.9889,
"mean_token_accuracy": 0.7882603086531162,
"num_tokens": 6317021.0,
"step": 2360
},
{
"epoch": 0.14886467133569925,
"grad_norm": 0.29618483781814575,
"learning_rate": 0.00019008018759290665,
"loss": 1.0414,
"mean_token_accuracy": 0.7765590559691191,
"num_tokens": 6342448.0,
"step": 2370
},
{
"epoch": 0.14949279231179927,
"grad_norm": 0.275785893201828,
"learning_rate": 0.00019003831417624523,
"loss": 0.9479,
"mean_token_accuracy": 0.7965521182864904,
"num_tokens": 6367783.0,
"step": 2380
},
{
"epoch": 0.15012091328789925,
"grad_norm": 0.32756808400154114,
"learning_rate": 0.00018999644075958378,
"loss": 0.9494,
"mean_token_accuracy": 0.7935776200145483,
"num_tokens": 6395320.0,
"step": 2390
},
{
"epoch": 0.15074903426399924,
"grad_norm": 0.3014850318431854,
"learning_rate": 0.00018995456734292236,
"loss": 0.961,
"mean_token_accuracy": 0.7955108307301998,
"num_tokens": 6422482.0,
"step": 2400
},
{
"epoch": 0.15137715524009923,
"grad_norm": 0.31270134449005127,
"learning_rate": 0.00018991269392626092,
"loss": 0.98,
"mean_token_accuracy": 0.7862629968672991,
"num_tokens": 6450301.0,
"step": 2410
},
{
"epoch": 0.15200527621619925,
"grad_norm": 0.27296221256256104,
"learning_rate": 0.0001898708205095995,
"loss": 0.9404,
"mean_token_accuracy": 0.7951943475753069,
"num_tokens": 6477122.0,
"step": 2420
},
{
"epoch": 0.15263339719229924,
"grad_norm": 0.2963928282260895,
"learning_rate": 0.00018982894709293805,
"loss": 0.9901,
"mean_token_accuracy": 0.7869273141026497,
"num_tokens": 6503999.0,
"step": 2430
},
{
"epoch": 0.15326151816839922,
"grad_norm": 0.25688695907592773,
"learning_rate": 0.0001897870736762766,
"loss": 0.9848,
"mean_token_accuracy": 0.7863536704331636,
"num_tokens": 6529907.0,
"step": 2440
},
{
"epoch": 0.15388963914449924,
"grad_norm": 0.2709560990333557,
"learning_rate": 0.00018974520025961519,
"loss": 0.9772,
"mean_token_accuracy": 0.7843648813664913,
"num_tokens": 6558499.0,
"step": 2450
},
{
"epoch": 0.15451776012059923,
"grad_norm": 0.268532395362854,
"learning_rate": 0.00018970332684295377,
"loss": 0.9949,
"mean_token_accuracy": 0.7847375877201557,
"num_tokens": 6585742.0,
"step": 2460
},
{
"epoch": 0.15514588109669922,
"grad_norm": 0.2693954408168793,
"learning_rate": 0.00018966145342629235,
"loss": 0.9563,
"mean_token_accuracy": 0.7913835499435663,
"num_tokens": 6612218.0,
"step": 2470
},
{
"epoch": 0.15577400207279923,
"grad_norm": 0.26215437054634094,
"learning_rate": 0.0001896195800096309,
"loss": 0.9858,
"mean_token_accuracy": 0.7850921977311373,
"num_tokens": 6639343.0,
"step": 2480
},
{
"epoch": 0.15640212304889922,
"grad_norm": 0.2571866512298584,
"learning_rate": 0.00018957770659296948,
"loss": 1.0043,
"mean_token_accuracy": 0.7850870199501514,
"num_tokens": 6667103.0,
"step": 2490
},
{
"epoch": 0.1570302440249992,
"grad_norm": 0.23914095759391785,
"learning_rate": 0.00018953583317630804,
"loss": 0.9701,
"mean_token_accuracy": 0.7945085145533085,
"num_tokens": 6694365.0,
"step": 2500
},
{
"epoch": 0.1576583650010992,
"grad_norm": 0.3401123285293579,
"learning_rate": 0.0001894939597596466,
"loss": 0.9711,
"mean_token_accuracy": 0.789525717869401,
"num_tokens": 6720672.0,
"step": 2510
},
{
"epoch": 0.15828648597719921,
"grad_norm": 0.33853694796562195,
"learning_rate": 0.00018945208634298517,
"loss": 0.9956,
"mean_token_accuracy": 0.7857484348118305,
"num_tokens": 6746709.0,
"step": 2520
},
{
"epoch": 0.1589146069532992,
"grad_norm": 0.26513391733169556,
"learning_rate": 0.00018941021292632373,
"loss": 0.9682,
"mean_token_accuracy": 0.7871077805757523,
"num_tokens": 6774512.0,
"step": 2530
},
{
"epoch": 0.1595427279293992,
"grad_norm": 0.29272544384002686,
"learning_rate": 0.0001893683395096623,
"loss": 0.9868,
"mean_token_accuracy": 0.7914240621030331,
"num_tokens": 6800463.0,
"step": 2540
},
{
"epoch": 0.1601708489054992,
"grad_norm": 0.2921249270439148,
"learning_rate": 0.00018932646609300086,
"loss": 0.9646,
"mean_token_accuracy": 0.7920668996870518,
"num_tokens": 6827179.0,
"step": 2550
},
{
"epoch": 0.1607989698815992,
"grad_norm": 0.33000192046165466,
"learning_rate": 0.00018928459267633944,
"loss": 1.0272,
"mean_token_accuracy": 0.7887022830545902,
"num_tokens": 6852306.0,
"step": 2560
},
{
"epoch": 0.16142709085769918,
"grad_norm": 0.29650112986564636,
"learning_rate": 0.000189242719259678,
"loss": 0.9629,
"mean_token_accuracy": 0.7959261048585177,
"num_tokens": 6878427.0,
"step": 2570
},
{
"epoch": 0.1620552118337992,
"grad_norm": 0.26963382959365845,
"learning_rate": 0.00018920084584301655,
"loss": 0.9915,
"mean_token_accuracy": 0.7845980357378721,
"num_tokens": 6903163.0,
"step": 2580
},
{
"epoch": 0.1626833328098992,
"grad_norm": 0.3108598589897156,
"learning_rate": 0.00018915897242635513,
"loss": 0.9931,
"mean_token_accuracy": 0.7855133522301913,
"num_tokens": 6930211.0,
"step": 2590
},
{
"epoch": 0.16331145378599918,
"grad_norm": 0.306082159280777,
"learning_rate": 0.0001891170990096937,
"loss": 0.9638,
"mean_token_accuracy": 0.7936804510653019,
"num_tokens": 6956489.0,
"step": 2600
},
{
"epoch": 0.1639395747620992,
"grad_norm": 0.286647230386734,
"learning_rate": 0.00018907522559303227,
"loss": 0.9918,
"mean_token_accuracy": 0.7832652296870947,
"num_tokens": 6982824.0,
"step": 2610
},
{
"epoch": 0.16456769573819918,
"grad_norm": 0.30177125334739685,
"learning_rate": 0.00018903335217637085,
"loss": 0.9848,
"mean_token_accuracy": 0.7813052102923393,
"num_tokens": 7010550.0,
"step": 2620
},
{
"epoch": 0.16519581671429917,
"grad_norm": 0.30501213669776917,
"learning_rate": 0.00018899147875970943,
"loss": 0.9836,
"mean_token_accuracy": 0.789811997488141,
"num_tokens": 7035917.0,
"step": 2630
},
{
"epoch": 0.16582393769039916,
"grad_norm": 0.2610650062561035,
"learning_rate": 0.00018894960534304798,
"loss": 0.9851,
"mean_token_accuracy": 0.7899245552718639,
"num_tokens": 7061388.0,
"step": 2640
},
{
"epoch": 0.16645205866649918,
"grad_norm": 0.2944414019584656,
"learning_rate": 0.00018890773192638654,
"loss": 1.0011,
"mean_token_accuracy": 0.7827542286366225,
"num_tokens": 7088492.0,
"step": 2650
},
{
"epoch": 0.16708017964259916,
"grad_norm": 0.2874250113964081,
"learning_rate": 0.00018886585850972512,
"loss": 0.9858,
"mean_token_accuracy": 0.7829206600785256,
"num_tokens": 7115146.0,
"step": 2660
},
{
"epoch": 0.16770830061869915,
"grad_norm": 0.27393653988838196,
"learning_rate": 0.00018882398509306367,
"loss": 0.973,
"mean_token_accuracy": 0.7905403438955545,
"num_tokens": 7140131.0,
"step": 2670
},
{
"epoch": 0.16833642159479917,
"grad_norm": 0.29718559980392456,
"learning_rate": 0.00018878211167640225,
"loss": 0.9891,
"mean_token_accuracy": 0.783644600585103,
"num_tokens": 7166821.0,
"step": 2680
},
{
"epoch": 0.16896454257089916,
"grad_norm": 0.27317872643470764,
"learning_rate": 0.0001887402382597408,
"loss": 1.017,
"mean_token_accuracy": 0.784059465304017,
"num_tokens": 7193362.0,
"step": 2690
},
{
"epoch": 0.16959266354699915,
"grad_norm": 0.25636228919029236,
"learning_rate": 0.0001886983648430794,
"loss": 0.9531,
"mean_token_accuracy": 0.7920433443039656,
"num_tokens": 7220619.0,
"step": 2700
},
{
"epoch": 0.17022078452309916,
"grad_norm": 0.2882969379425049,
"learning_rate": 0.00018865649142641794,
"loss": 0.934,
"mean_token_accuracy": 0.7929070591926575,
"num_tokens": 7247688.0,
"step": 2710
},
{
"epoch": 0.17084890549919915,
"grad_norm": 0.3216884434223175,
"learning_rate": 0.0001886146180097565,
"loss": 0.9496,
"mean_token_accuracy": 0.7929627750068903,
"num_tokens": 7274712.0,
"step": 2720
},
{
"epoch": 0.17147702647529914,
"grad_norm": 0.27841243147850037,
"learning_rate": 0.00018857274459309508,
"loss": 0.9323,
"mean_token_accuracy": 0.7978887390345335,
"num_tokens": 7300487.0,
"step": 2730
},
{
"epoch": 0.17210514745139913,
"grad_norm": 0.2577762007713318,
"learning_rate": 0.00018853087117643363,
"loss": 1.0107,
"mean_token_accuracy": 0.7844824850559234,
"num_tokens": 7327049.0,
"step": 2740
},
{
"epoch": 0.17273326842749914,
"grad_norm": 0.29990464448928833,
"learning_rate": 0.0001884889977597722,
"loss": 0.9467,
"mean_token_accuracy": 0.7949109837412834,
"num_tokens": 7352797.0,
"step": 2750
},
{
"epoch": 0.17336138940359913,
"grad_norm": 0.24397854506969452,
"learning_rate": 0.0001884471243431108,
"loss": 0.9869,
"mean_token_accuracy": 0.7791798021644354,
"num_tokens": 7381508.0,
"step": 2760
},
{
"epoch": 0.17398951037969912,
"grad_norm": 0.27623310685157776,
"learning_rate": 0.00018840525092644937,
"loss": 0.9483,
"mean_token_accuracy": 0.7909042112529278,
"num_tokens": 7409592.0,
"step": 2770
},
{
"epoch": 0.17461763135579914,
"grad_norm": 0.30223146080970764,
"learning_rate": 0.00018836337750978793,
"loss": 0.9961,
"mean_token_accuracy": 0.7863899141550064,
"num_tokens": 7436032.0,
"step": 2780
},
{
"epoch": 0.17524575233189912,
"grad_norm": 0.2969076633453369,
"learning_rate": 0.00018832150409312648,
"loss": 0.947,
"mean_token_accuracy": 0.7935981251299381,
"num_tokens": 7464224.0,
"step": 2790
},
{
"epoch": 0.1758738733079991,
"grad_norm": 0.2720794379711151,
"learning_rate": 0.00018827963067646506,
"loss": 0.967,
"mean_token_accuracy": 0.7897748045623303,
"num_tokens": 7491312.0,
"step": 2800
},
{
"epoch": 0.17650199428409913,
"grad_norm": 0.2531968355178833,
"learning_rate": 0.00018823775725980362,
"loss": 0.9677,
"mean_token_accuracy": 0.7889728490263224,
"num_tokens": 7520696.0,
"step": 2810
},
{
"epoch": 0.17713011526019912,
"grad_norm": 0.24469265341758728,
"learning_rate": 0.0001881958838431422,
"loss": 0.9286,
"mean_token_accuracy": 0.7995743758976459,
"num_tokens": 7546911.0,
"step": 2820
},
{
"epoch": 0.1777582362362991,
"grad_norm": 0.2589986026287079,
"learning_rate": 0.00018815401042648075,
"loss": 1.0061,
"mean_token_accuracy": 0.7818490665405988,
"num_tokens": 7573321.0,
"step": 2830
},
{
"epoch": 0.17838635721239912,
"grad_norm": 0.30957522988319397,
"learning_rate": 0.00018811213700981933,
"loss": 0.9243,
"mean_token_accuracy": 0.7941294971853494,
"num_tokens": 7600879.0,
"step": 2840
},
{
"epoch": 0.1790144781884991,
"grad_norm": 0.2634665071964264,
"learning_rate": 0.0001880702635931579,
"loss": 0.9441,
"mean_token_accuracy": 0.794154980033636,
"num_tokens": 7627977.0,
"step": 2850
},
{
"epoch": 0.1796425991645991,
"grad_norm": 0.2403445690870285,
"learning_rate": 0.00018802839017649644,
"loss": 0.9614,
"mean_token_accuracy": 0.7935540229082108,
"num_tokens": 7654559.0,
"step": 2860
},
{
"epoch": 0.1802707201406991,
"grad_norm": 0.2723034918308258,
"learning_rate": 0.00018798651675983502,
"loss": 0.9359,
"mean_token_accuracy": 0.7911634873598814,
"num_tokens": 7681465.0,
"step": 2870
},
{
"epoch": 0.1808988411167991,
"grad_norm": 0.24261696636676788,
"learning_rate": 0.00018794464334317358,
"loss": 0.9781,
"mean_token_accuracy": 0.788073031976819,
"num_tokens": 7708526.0,
"step": 2880
},
{
"epoch": 0.1815269620928991,
"grad_norm": 0.29172760248184204,
"learning_rate": 0.00018790276992651216,
"loss": 0.9764,
"mean_token_accuracy": 0.7852031115442515,
"num_tokens": 7735060.0,
"step": 2890
},
{
"epoch": 0.18215508306899908,
"grad_norm": 0.29553157091140747,
"learning_rate": 0.00018786089650985074,
"loss": 0.9929,
"mean_token_accuracy": 0.789004210010171,
"num_tokens": 7761517.0,
"step": 2900
},
{
"epoch": 0.1827832040450991,
"grad_norm": 0.26275527477264404,
"learning_rate": 0.0001878190230931893,
"loss": 0.98,
"mean_token_accuracy": 0.7868116334080696,
"num_tokens": 7788884.0,
"step": 2910
},
{
"epoch": 0.18341132502119908,
"grad_norm": 0.26689252257347107,
"learning_rate": 0.00018777714967652787,
"loss": 0.9934,
"mean_token_accuracy": 0.7822608612477779,
"num_tokens": 7816982.0,
"step": 2920
},
{
"epoch": 0.18403944599729907,
"grad_norm": 0.30625566840171814,
"learning_rate": 0.00018773527625986645,
"loss": 0.9735,
"mean_token_accuracy": 0.7886403530836106,
"num_tokens": 7844921.0,
"step": 2930
},
{
"epoch": 0.1846675669733991,
"grad_norm": 0.30838075280189514,
"learning_rate": 0.000187693402843205,
"loss": 0.9722,
"mean_token_accuracy": 0.7919574566185474,
"num_tokens": 7871646.0,
"step": 2940
},
{
"epoch": 0.18529568794949908,
"grad_norm": 0.286663293838501,
"learning_rate": 0.00018765152942654356,
"loss": 0.9823,
"mean_token_accuracy": 0.7880583092570305,
"num_tokens": 7899070.0,
"step": 2950
},
{
"epoch": 0.18592380892559907,
"grad_norm": 0.2832724452018738,
"learning_rate": 0.00018760965600988214,
"loss": 0.963,
"mean_token_accuracy": 0.7926479645073414,
"num_tokens": 7925945.0,
"step": 2960
},
{
"epoch": 0.18655192990169905,
"grad_norm": 0.3050813376903534,
"learning_rate": 0.0001875677825932207,
"loss": 0.9525,
"mean_token_accuracy": 0.7921919580549002,
"num_tokens": 7952472.0,
"step": 2970
},
{
"epoch": 0.18718005087779907,
"grad_norm": 0.29279011487960815,
"learning_rate": 0.00018752590917655928,
"loss": 1.0318,
"mean_token_accuracy": 0.7809717856347561,
"num_tokens": 7978482.0,
"step": 2980
},
{
"epoch": 0.18780817185389906,
"grad_norm": 0.32541248202323914,
"learning_rate": 0.00018748403575989783,
"loss": 0.972,
"mean_token_accuracy": 0.7893663041293622,
"num_tokens": 8005205.0,
"step": 2990
},
{
"epoch": 0.18843629282999905,
"grad_norm": 0.32494834065437317,
"learning_rate": 0.00018744216234323641,
"loss": 0.9281,
"mean_token_accuracy": 0.7966420441865921,
"num_tokens": 8032535.0,
"step": 3000
},
{
"epoch": 0.18906441380609906,
"grad_norm": 0.24331510066986084,
"learning_rate": 0.00018740028892657497,
"loss": 0.9983,
"mean_token_accuracy": 0.7830683149397373,
"num_tokens": 8059911.0,
"step": 3010
},
{
"epoch": 0.18969253478219905,
"grad_norm": 0.28518521785736084,
"learning_rate": 0.00018735841550991352,
"loss": 0.9488,
"mean_token_accuracy": 0.7934920992702246,
"num_tokens": 8086670.0,
"step": 3020
},
{
"epoch": 0.19032065575829904,
"grad_norm": 0.31971925497055054,
"learning_rate": 0.0001873165420932521,
"loss": 0.9806,
"mean_token_accuracy": 0.7857418902218342,
"num_tokens": 8112851.0,
"step": 3030
},
{
"epoch": 0.19094877673439906,
"grad_norm": 0.3072707951068878,
"learning_rate": 0.00018727466867659066,
"loss": 0.9864,
"mean_token_accuracy": 0.7890610966831446,
"num_tokens": 8139623.0,
"step": 3040
},
{
"epoch": 0.19157689771049904,
"grad_norm": 0.2833654284477234,
"learning_rate": 0.00018723279525992924,
"loss": 0.9304,
"mean_token_accuracy": 0.7993248742073774,
"num_tokens": 8165400.0,
"step": 3050
},
{
"epoch": 0.19220501868659903,
"grad_norm": 0.2709767818450928,
"learning_rate": 0.00018719092184326782,
"loss": 0.9766,
"mean_token_accuracy": 0.7900667380541563,
"num_tokens": 8192046.0,
"step": 3060
},
{
"epoch": 0.19283313966269905,
"grad_norm": 0.3056877851486206,
"learning_rate": 0.0001871490484266064,
"loss": 0.9859,
"mean_token_accuracy": 0.7832565013319254,
"num_tokens": 8218756.0,
"step": 3070
},
{
"epoch": 0.19346126063879904,
"grad_norm": 0.25996148586273193,
"learning_rate": 0.00018710717500994495,
"loss": 0.9694,
"mean_token_accuracy": 0.7857969712466002,
"num_tokens": 8247498.0,
"step": 3080
},
{
"epoch": 0.19408938161489903,
"grad_norm": 0.2731459140777588,
"learning_rate": 0.0001870653015932835,
"loss": 0.9327,
"mean_token_accuracy": 0.7962075632065535,
"num_tokens": 8273330.0,
"step": 3090
},
{
"epoch": 0.19471750259099901,
"grad_norm": 0.26292166113853455,
"learning_rate": 0.0001870234281766221,
"loss": 0.9521,
"mean_token_accuracy": 0.7912837877869606,
"num_tokens": 8300718.0,
"step": 3100
},
{
"epoch": 0.19534562356709903,
"grad_norm": 0.26134082674980164,
"learning_rate": 0.00018698155475996064,
"loss": 0.9764,
"mean_token_accuracy": 0.7882513340562582,
"num_tokens": 8328366.0,
"step": 3110
},
{
"epoch": 0.19597374454319902,
"grad_norm": 0.23814305663108826,
"learning_rate": 0.00018693968134329922,
"loss": 0.9752,
"mean_token_accuracy": 0.7884778048843145,
"num_tokens": 8355315.0,
"step": 3120
},
{
"epoch": 0.196601865519299,
"grad_norm": 0.3375711143016815,
"learning_rate": 0.00018689780792663778,
"loss": 0.9621,
"mean_token_accuracy": 0.7901697169989348,
"num_tokens": 8381642.0,
"step": 3130
},
{
"epoch": 0.19722998649539902,
"grad_norm": 0.34628236293792725,
"learning_rate": 0.00018685593450997636,
"loss": 0.9462,
"mean_token_accuracy": 0.795050111413002,
"num_tokens": 8407747.0,
"step": 3140
},
{
"epoch": 0.197858107471499,
"grad_norm": 0.28467345237731934,
"learning_rate": 0.0001868140610933149,
"loss": 0.964,
"mean_token_accuracy": 0.7922971405088901,
"num_tokens": 8433827.0,
"step": 3150
},
{
"epoch": 0.198486228447599,
"grad_norm": 0.28550681471824646,
"learning_rate": 0.00018677218767665347,
"loss": 0.9383,
"mean_token_accuracy": 0.7921677011996507,
"num_tokens": 8461185.0,
"step": 3160
},
{
"epoch": 0.19911434942369902,
"grad_norm": 0.2956470251083374,
"learning_rate": 0.00018673031425999205,
"loss": 0.9715,
"mean_token_accuracy": 0.7869658004492521,
"num_tokens": 8488228.0,
"step": 3170
},
{
"epoch": 0.199742470399799,
"grad_norm": 0.29445043206214905,
"learning_rate": 0.0001866884408433306,
"loss": 0.9375,
"mean_token_accuracy": 0.7965064492076636,
"num_tokens": 8515664.0,
"step": 3180
},
{
"epoch": 0.200370591375899,
"grad_norm": 0.2424841821193695,
"learning_rate": 0.00018664656742666918,
"loss": 0.9514,
"mean_token_accuracy": 0.7920619916170836,
"num_tokens": 8542727.0,
"step": 3190
},
{
"epoch": 0.20099871235199898,
"grad_norm": 0.29190316796302795,
"learning_rate": 0.00018660469401000776,
"loss": 0.9599,
"mean_token_accuracy": 0.7881575852632523,
"num_tokens": 8570325.0,
"step": 3200
},
{
"epoch": 0.201626833328099,
"grad_norm": 0.25599437952041626,
"learning_rate": 0.00018656282059334635,
"loss": 0.9754,
"mean_token_accuracy": 0.7894639134407043,
"num_tokens": 8598238.0,
"step": 3210
},
{
"epoch": 0.202254954304199,
"grad_norm": 0.28486067056655884,
"learning_rate": 0.0001865209471766849,
"loss": 0.963,
"mean_token_accuracy": 0.792768269777298,
"num_tokens": 8624523.0,
"step": 3220
},
{
"epoch": 0.20288307528029897,
"grad_norm": 0.3167647421360016,
"learning_rate": 0.00018647907376002345,
"loss": 0.9492,
"mean_token_accuracy": 0.791275979205966,
"num_tokens": 8651585.0,
"step": 3230
},
{
"epoch": 0.203511196256399,
"grad_norm": 0.2570751905441284,
"learning_rate": 0.00018643720034336203,
"loss": 0.963,
"mean_token_accuracy": 0.7880451161414385,
"num_tokens": 8678088.0,
"step": 3240
},
{
"epoch": 0.20413931723249898,
"grad_norm": 0.31692641973495483,
"learning_rate": 0.0001863953269267006,
"loss": 0.9428,
"mean_token_accuracy": 0.7953941386193037,
"num_tokens": 8706528.0,
"step": 3250
},
{
"epoch": 0.20476743820859897,
"grad_norm": 0.30903160572052,
"learning_rate": 0.00018635345351003917,
"loss": 0.956,
"mean_token_accuracy": 0.7911488272249698,
"num_tokens": 8734751.0,
"step": 3260
},
{
"epoch": 0.20539555918469898,
"grad_norm": 0.2711246907711029,
"learning_rate": 0.00018631158009337772,
"loss": 1.0267,
"mean_token_accuracy": 0.7815113704651594,
"num_tokens": 8761596.0,
"step": 3270
},
{
"epoch": 0.20602368016079897,
"grad_norm": 0.3340023458003998,
"learning_rate": 0.0001862697066767163,
"loss": 0.9381,
"mean_token_accuracy": 0.7947205103933811,
"num_tokens": 8788662.0,
"step": 3280
},
{
"epoch": 0.20665180113689896,
"grad_norm": 0.27200961112976074,
"learning_rate": 0.00018622783326005486,
"loss": 1.0081,
"mean_token_accuracy": 0.7830899234861135,
"num_tokens": 8814161.0,
"step": 3290
},
{
"epoch": 0.20727992211299898,
"grad_norm": 0.2782123386859894,
"learning_rate": 0.0001861859598433934,
"loss": 0.9468,
"mean_token_accuracy": 0.7955603264272213,
"num_tokens": 8839996.0,
"step": 3300
},
{
"epoch": 0.20790804308909897,
"grad_norm": 0.2713397443294525,
"learning_rate": 0.000186144086426732,
"loss": 0.9512,
"mean_token_accuracy": 0.793865691125393,
"num_tokens": 8866539.0,
"step": 3310
},
{
"epoch": 0.20853616406519895,
"grad_norm": 0.2926190495491028,
"learning_rate": 0.00018610221301007055,
"loss": 0.9966,
"mean_token_accuracy": 0.7881284438073635,
"num_tokens": 8892942.0,
"step": 3320
},
{
"epoch": 0.20916428504129894,
"grad_norm": 0.2809631824493408,
"learning_rate": 0.00018606033959340913,
"loss": 0.9387,
"mean_token_accuracy": 0.7946780778467655,
"num_tokens": 8920034.0,
"step": 3330
},
{
"epoch": 0.20979240601739896,
"grad_norm": 0.2645687758922577,
"learning_rate": 0.00018601846617674768,
"loss": 0.974,
"mean_token_accuracy": 0.7887616034597158,
"num_tokens": 8946885.0,
"step": 3340
},
{
"epoch": 0.21042052699349895,
"grad_norm": 0.32959234714508057,
"learning_rate": 0.00018597659276008626,
"loss": 0.97,
"mean_token_accuracy": 0.789527265354991,
"num_tokens": 8973303.0,
"step": 3350
},
{
"epoch": 0.21104864796959893,
"grad_norm": 0.2766159474849701,
"learning_rate": 0.00018593471934342485,
"loss": 1.0029,
"mean_token_accuracy": 0.78571757376194,
"num_tokens": 8999782.0,
"step": 3360
},
{
"epoch": 0.21167676894569895,
"grad_norm": 0.3022785484790802,
"learning_rate": 0.0001858928459267634,
"loss": 0.9799,
"mean_token_accuracy": 0.7888091869652272,
"num_tokens": 9025718.0,
"step": 3370
},
{
"epoch": 0.21230488992179894,
"grad_norm": 0.27634453773498535,
"learning_rate": 0.00018585097251010198,
"loss": 0.9694,
"mean_token_accuracy": 0.791129108890891,
"num_tokens": 9052146.0,
"step": 3380
},
{
"epoch": 0.21293301089789893,
"grad_norm": 0.29203152656555176,
"learning_rate": 0.00018580909909344053,
"loss": 0.9716,
"mean_token_accuracy": 0.7845335718244314,
"num_tokens": 9079458.0,
"step": 3390
},
{
"epoch": 0.21356113187399894,
"grad_norm": 0.260200172662735,
"learning_rate": 0.00018576722567677912,
"loss": 0.9915,
"mean_token_accuracy": 0.7869384720921516,
"num_tokens": 9108765.0,
"step": 3400
},
{
"epoch": 0.21418925285009893,
"grad_norm": 0.3571971654891968,
"learning_rate": 0.00018572535226011767,
"loss": 0.9498,
"mean_token_accuracy": 0.793991993367672,
"num_tokens": 9136630.0,
"step": 3410
},
{
"epoch": 0.21481737382619892,
"grad_norm": 0.33275195956230164,
"learning_rate": 0.00018568347884345625,
"loss": 0.9556,
"mean_token_accuracy": 0.7915182035416365,
"num_tokens": 9163266.0,
"step": 3420
},
{
"epoch": 0.2154454948022989,
"grad_norm": 0.2770121097564697,
"learning_rate": 0.0001856416054267948,
"loss": 0.9607,
"mean_token_accuracy": 0.7871123567223549,
"num_tokens": 9190732.0,
"step": 3430
},
{
"epoch": 0.21607361577839893,
"grad_norm": 0.30268242955207825,
"learning_rate": 0.00018559973201013336,
"loss": 0.9498,
"mean_token_accuracy": 0.7864726580679416,
"num_tokens": 9218316.0,
"step": 3440
},
{
"epoch": 0.2167017367544989,
"grad_norm": 0.3154946565628052,
"learning_rate": 0.00018555785859347194,
"loss": 0.9905,
"mean_token_accuracy": 0.7879769437015056,
"num_tokens": 9244245.0,
"step": 3450
},
{
"epoch": 0.2173298577305989,
"grad_norm": 0.23621073365211487,
"learning_rate": 0.0001855159851768105,
"loss": 0.9842,
"mean_token_accuracy": 0.7871025986969471,
"num_tokens": 9271353.0,
"step": 3460
},
{
"epoch": 0.21795797870669892,
"grad_norm": 0.2876494228839874,
"learning_rate": 0.00018547411176014907,
"loss": 0.9341,
"mean_token_accuracy": 0.8000877648591995,
"num_tokens": 9298879.0,
"step": 3470
},
{
"epoch": 0.2185860996827989,
"grad_norm": 0.30209431052207947,
"learning_rate": 0.00018543223834348763,
"loss": 0.9611,
"mean_token_accuracy": 0.790924321860075,
"num_tokens": 9326024.0,
"step": 3480
},
{
"epoch": 0.2192142206588989,
"grad_norm": 0.3473189175128937,
"learning_rate": 0.0001853903649268262,
"loss": 0.995,
"mean_token_accuracy": 0.7826048351824284,
"num_tokens": 9352117.0,
"step": 3490
},
{
"epoch": 0.2198423416349989,
"grad_norm": 0.27148741483688354,
"learning_rate": 0.0001853484915101648,
"loss": 0.9608,
"mean_token_accuracy": 0.7897682044655084,
"num_tokens": 9379999.0,
"step": 3500
},
{
"epoch": 0.2204704626110989,
"grad_norm": 0.26357343792915344,
"learning_rate": 0.00018530661809350337,
"loss": 0.9745,
"mean_token_accuracy": 0.792728316038847,
"num_tokens": 9406083.0,
"step": 3510
},
{
"epoch": 0.2210985835871989,
"grad_norm": 0.30297690629959106,
"learning_rate": 0.00018526474467684193,
"loss": 0.9431,
"mean_token_accuracy": 0.7893101371824741,
"num_tokens": 9434685.0,
"step": 3520
},
{
"epoch": 0.2217267045632989,
"grad_norm": 0.3760235905647278,
"learning_rate": 0.00018522287126018048,
"loss": 0.9832,
"mean_token_accuracy": 0.7870549734681844,
"num_tokens": 9462037.0,
"step": 3530
},
{
"epoch": 0.2223548255393989,
"grad_norm": 0.2574126422405243,
"learning_rate": 0.00018518099784351906,
"loss": 0.9653,
"mean_token_accuracy": 0.7888480603694916,
"num_tokens": 9488610.0,
"step": 3540
},
{
"epoch": 0.22298294651549888,
"grad_norm": 0.2631290555000305,
"learning_rate": 0.00018513912442685762,
"loss": 1.008,
"mean_token_accuracy": 0.7876730926334858,
"num_tokens": 9513663.0,
"step": 3550
},
{
"epoch": 0.22361106749159887,
"grad_norm": 0.27038082480430603,
"learning_rate": 0.0001850972510101962,
"loss": 0.9805,
"mean_token_accuracy": 0.7889320895075798,
"num_tokens": 9540071.0,
"step": 3560
},
{
"epoch": 0.22423918846769889,
"grad_norm": 0.28113994002342224,
"learning_rate": 0.00018505537759353475,
"loss": 0.9725,
"mean_token_accuracy": 0.7862703930586576,
"num_tokens": 9569174.0,
"step": 3570
},
{
"epoch": 0.22486730944379887,
"grad_norm": 0.3517085611820221,
"learning_rate": 0.00018501350417687333,
"loss": 0.9755,
"mean_token_accuracy": 0.78679881952703,
"num_tokens": 9595119.0,
"step": 3580
},
{
"epoch": 0.22549543041989886,
"grad_norm": 0.29562246799468994,
"learning_rate": 0.00018497163076021189,
"loss": 1.0037,
"mean_token_accuracy": 0.7866592183709145,
"num_tokens": 9621399.0,
"step": 3590
},
{
"epoch": 0.22612355139599888,
"grad_norm": 0.27493491768836975,
"learning_rate": 0.00018492975734355044,
"loss": 0.8894,
"mean_token_accuracy": 0.8016994591802359,
"num_tokens": 9650206.0,
"step": 3600
},
{
"epoch": 0.22675167237209887,
"grad_norm": 0.3233809471130371,
"learning_rate": 0.00018488788392688902,
"loss": 0.9396,
"mean_token_accuracy": 0.7987181950360537,
"num_tokens": 9677143.0,
"step": 3610
},
{
"epoch": 0.22737979334819886,
"grad_norm": 0.27679574489593506,
"learning_rate": 0.00018484601051022757,
"loss": 0.9585,
"mean_token_accuracy": 0.7922527860850096,
"num_tokens": 9702822.0,
"step": 3620
},
{
"epoch": 0.22800791432429887,
"grad_norm": 0.25461262464523315,
"learning_rate": 0.00018480413709356616,
"loss": 0.9332,
"mean_token_accuracy": 0.7918692424893379,
"num_tokens": 9730239.0,
"step": 3630
},
{
"epoch": 0.22863603530039886,
"grad_norm": 0.29292234778404236,
"learning_rate": 0.00018476226367690474,
"loss": 0.9731,
"mean_token_accuracy": 0.7911592714488507,
"num_tokens": 9755244.0,
"step": 3640
},
{
"epoch": 0.22926415627649885,
"grad_norm": 0.2941250801086426,
"learning_rate": 0.0001847203902602433,
"loss": 1.0108,
"mean_token_accuracy": 0.7864028055220842,
"num_tokens": 9781765.0,
"step": 3650
},
{
"epoch": 0.22989227725259884,
"grad_norm": 0.32997268438339233,
"learning_rate": 0.00018467851684358187,
"loss": 0.9819,
"mean_token_accuracy": 0.7870282482355833,
"num_tokens": 9808695.0,
"step": 3660
},
{
"epoch": 0.23052039822869885,
"grad_norm": 0.3786728084087372,
"learning_rate": 0.00018463664342692043,
"loss": 0.9318,
"mean_token_accuracy": 0.7967084005475045,
"num_tokens": 9835188.0,
"step": 3670
},
{
"epoch": 0.23114851920479884,
"grad_norm": 0.2662932574748993,
"learning_rate": 0.000184594770010259,
"loss": 0.9729,
"mean_token_accuracy": 0.787113618478179,
"num_tokens": 9861403.0,
"step": 3680
},
{
"epoch": 0.23177664018089883,
"grad_norm": 0.3011711537837982,
"learning_rate": 0.00018455289659359756,
"loss": 0.9363,
"mean_token_accuracy": 0.7950150787830352,
"num_tokens": 9887631.0,
"step": 3690
},
{
"epoch": 0.23240476115699885,
"grad_norm": 0.3154990077018738,
"learning_rate": 0.00018451102317693614,
"loss": 0.9622,
"mean_token_accuracy": 0.7922232504934072,
"num_tokens": 9913153.0,
"step": 3700
},
{
"epoch": 0.23303288213309883,
"grad_norm": 0.4112165868282318,
"learning_rate": 0.0001844691497602747,
"loss": 0.9703,
"mean_token_accuracy": 0.7831194877624512,
"num_tokens": 9940930.0,
"step": 3710
},
{
"epoch": 0.23366100310919882,
"grad_norm": 0.26975691318511963,
"learning_rate": 0.00018442727634361328,
"loss": 0.9212,
"mean_token_accuracy": 0.7968139354139566,
"num_tokens": 9968634.0,
"step": 3720
},
{
"epoch": 0.23428912408529884,
"grad_norm": 0.29109784960746765,
"learning_rate": 0.00018438540292695183,
"loss": 0.9648,
"mean_token_accuracy": 0.7936428785324097,
"num_tokens": 9995621.0,
"step": 3730
},
{
"epoch": 0.23491724506139883,
"grad_norm": 0.31260260939598083,
"learning_rate": 0.00018434352951029038,
"loss": 1.0045,
"mean_token_accuracy": 0.7816799312829972,
"num_tokens": 10022343.0,
"step": 3740
},
{
"epoch": 0.23554536603749882,
"grad_norm": 0.29457929730415344,
"learning_rate": 0.00018430165609362897,
"loss": 0.9825,
"mean_token_accuracy": 0.7891633450984955,
"num_tokens": 10049185.0,
"step": 3750
},
{
"epoch": 0.23617348701359883,
"grad_norm": 0.2756049335002899,
"learning_rate": 0.00018425978267696752,
"loss": 1.0145,
"mean_token_accuracy": 0.7817019656300545,
"num_tokens": 10075580.0,
"step": 3760
},
{
"epoch": 0.23680160798969882,
"grad_norm": 0.29868707060813904,
"learning_rate": 0.0001842179092603061,
"loss": 0.9691,
"mean_token_accuracy": 0.7917348992079496,
"num_tokens": 10101223.0,
"step": 3770
},
{
"epoch": 0.2374297289657988,
"grad_norm": 0.3161119222640991,
"learning_rate": 0.00018417603584364466,
"loss": 0.95,
"mean_token_accuracy": 0.7942048270255327,
"num_tokens": 10127396.0,
"step": 3780
},
{
"epoch": 0.2380578499418988,
"grad_norm": 0.29149937629699707,
"learning_rate": 0.00018413416242698324,
"loss": 0.9904,
"mean_token_accuracy": 0.7899536907672882,
"num_tokens": 10153465.0,
"step": 3790
},
{
"epoch": 0.2386859709179988,
"grad_norm": 0.27939069271087646,
"learning_rate": 0.00018409228901032182,
"loss": 0.9415,
"mean_token_accuracy": 0.7966868814080954,
"num_tokens": 10179724.0,
"step": 3800
},
{
"epoch": 0.2393140918940988,
"grad_norm": 0.3205685317516327,
"learning_rate": 0.00018405041559366037,
"loss": 0.9857,
"mean_token_accuracy": 0.7864225681871175,
"num_tokens": 10208771.0,
"step": 3810
},
{
"epoch": 0.2399422128701988,
"grad_norm": 0.26006463170051575,
"learning_rate": 0.00018400854217699895,
"loss": 0.9316,
"mean_token_accuracy": 0.7858447533100843,
"num_tokens": 10236536.0,
"step": 3820
},
{
"epoch": 0.2405703338462988,
"grad_norm": 0.3319949507713318,
"learning_rate": 0.0001839666687603375,
"loss": 0.9994,
"mean_token_accuracy": 0.7825042635202408,
"num_tokens": 10264224.0,
"step": 3830
},
{
"epoch": 0.2411984548223988,
"grad_norm": 0.3030165433883667,
"learning_rate": 0.0001839247953436761,
"loss": 0.8837,
"mean_token_accuracy": 0.7969729781150818,
"num_tokens": 10292171.0,
"step": 3840
},
{
"epoch": 0.24182657579849878,
"grad_norm": 0.29574254155158997,
"learning_rate": 0.00018388292192701464,
"loss": 0.9227,
"mean_token_accuracy": 0.7981263287365437,
"num_tokens": 10319685.0,
"step": 3850
},
{
"epoch": 0.2424546967745988,
"grad_norm": 0.333996444940567,
"learning_rate": 0.00018384104851035322,
"loss": 0.9511,
"mean_token_accuracy": 0.7944566797465086,
"num_tokens": 10346651.0,
"step": 3860
},
{
"epoch": 0.2430828177506988,
"grad_norm": 0.39183929562568665,
"learning_rate": 0.00018379917509369178,
"loss": 0.968,
"mean_token_accuracy": 0.793184470012784,
"num_tokens": 10371962.0,
"step": 3870
},
{
"epoch": 0.24371093872679878,
"grad_norm": 0.3321262300014496,
"learning_rate": 0.00018375730167703033,
"loss": 0.9696,
"mean_token_accuracy": 0.7914064366370439,
"num_tokens": 10397904.0,
"step": 3880
},
{
"epoch": 0.2443390597028988,
"grad_norm": 0.3826558589935303,
"learning_rate": 0.0001837154282603689,
"loss": 0.955,
"mean_token_accuracy": 0.7891239549964666,
"num_tokens": 10425656.0,
"step": 3890
},
{
"epoch": 0.24496718067899878,
"grad_norm": 0.35757163166999817,
"learning_rate": 0.00018367355484370747,
"loss": 0.9515,
"mean_token_accuracy": 0.7942003328353167,
"num_tokens": 10454621.0,
"step": 3900
},
{
"epoch": 0.24559530165509877,
"grad_norm": 0.28849247097969055,
"learning_rate": 0.00018363168142704605,
"loss": 0.9445,
"mean_token_accuracy": 0.7946181803941726,
"num_tokens": 10481140.0,
"step": 3910
},
{
"epoch": 0.24622342263119876,
"grad_norm": 0.3173221945762634,
"learning_rate": 0.0001835898080103846,
"loss": 0.9679,
"mean_token_accuracy": 0.789798391610384,
"num_tokens": 10507760.0,
"step": 3920
},
{
"epoch": 0.24685154360729877,
"grad_norm": 0.31581512093544006,
"learning_rate": 0.00018354793459372318,
"loss": 0.9838,
"mean_token_accuracy": 0.7864162161946296,
"num_tokens": 10535757.0,
"step": 3930
},
{
"epoch": 0.24747966458339876,
"grad_norm": 0.31570613384246826,
"learning_rate": 0.00018350606117706176,
"loss": 0.9984,
"mean_token_accuracy": 0.789697939157486,
"num_tokens": 10561484.0,
"step": 3940
},
{
"epoch": 0.24810778555949875,
"grad_norm": 0.3009042739868164,
"learning_rate": 0.00018346418776040032,
"loss": 0.9298,
"mean_token_accuracy": 0.7986709404736757,
"num_tokens": 10588115.0,
"step": 3950
},
{
"epoch": 0.24873590653559877,
"grad_norm": 0.3040034770965576,
"learning_rate": 0.0001834223143437389,
"loss": 1.0171,
"mean_token_accuracy": 0.7786661650985479,
"num_tokens": 10613945.0,
"step": 3960
},
{
"epoch": 0.24936402751169875,
"grad_norm": 0.26565641164779663,
"learning_rate": 0.00018338044092707745,
"loss": 0.9167,
"mean_token_accuracy": 0.8001658879220486,
"num_tokens": 10640631.0,
"step": 3970
},
{
"epoch": 0.24999214848779874,
"grad_norm": 0.32167062163352966,
"learning_rate": 0.00018333856751041603,
"loss": 0.9863,
"mean_token_accuracy": 0.7891666326671839,
"num_tokens": 10667054.0,
"step": 3980
},
{
"epoch": 0.25062026946389876,
"grad_norm": 0.307171106338501,
"learning_rate": 0.0001832966940937546,
"loss": 0.9554,
"mean_token_accuracy": 0.7922811262309551,
"num_tokens": 10694229.0,
"step": 3990
},
{
"epoch": 0.25124839043999875,
"grad_norm": 0.26245325803756714,
"learning_rate": 0.00018325482067709317,
"loss": 0.9843,
"mean_token_accuracy": 0.7896617949008942,
"num_tokens": 10719537.0,
"step": 4000
},
{
"epoch": 0.25187651141609874,
"grad_norm": 0.3621242046356201,
"learning_rate": 0.00018321294726043172,
"loss": 0.9465,
"mean_token_accuracy": 0.7954347494989633,
"num_tokens": 10745811.0,
"step": 4010
},
{
"epoch": 0.2525046323921987,
"grad_norm": 0.2879714071750641,
"learning_rate": 0.0001831710738437703,
"loss": 0.9682,
"mean_token_accuracy": 0.7943593975156545,
"num_tokens": 10772934.0,
"step": 4020
},
{
"epoch": 0.2531327533682987,
"grad_norm": 0.40183258056640625,
"learning_rate": 0.00018312920042710886,
"loss": 0.9464,
"mean_token_accuracy": 0.791998778283596,
"num_tokens": 10799932.0,
"step": 4030
},
{
"epoch": 0.25376087434439876,
"grad_norm": 0.3670477867126465,
"learning_rate": 0.0001830873270104474,
"loss": 0.9616,
"mean_token_accuracy": 0.7946044556796551,
"num_tokens": 10825734.0,
"step": 4040
},
{
"epoch": 0.25438899532049875,
"grad_norm": 0.3046157658100128,
"learning_rate": 0.000183045453593786,
"loss": 0.992,
"mean_token_accuracy": 0.7855597577989102,
"num_tokens": 10853694.0,
"step": 4050
},
{
"epoch": 0.25501711629659873,
"grad_norm": 0.30533158779144287,
"learning_rate": 0.00018300358017712455,
"loss": 0.9873,
"mean_token_accuracy": 0.7868763618171215,
"num_tokens": 10881365.0,
"step": 4060
},
{
"epoch": 0.2556452372726987,
"grad_norm": 0.33252570033073425,
"learning_rate": 0.00018296170676046313,
"loss": 0.9603,
"mean_token_accuracy": 0.789739453420043,
"num_tokens": 10907688.0,
"step": 4070
},
{
"epoch": 0.2562733582487987,
"grad_norm": 0.3089440166950226,
"learning_rate": 0.00018291983334380168,
"loss": 0.9137,
"mean_token_accuracy": 0.7987202100455761,
"num_tokens": 10935619.0,
"step": 4080
},
{
"epoch": 0.2569014792248987,
"grad_norm": 0.2957897186279297,
"learning_rate": 0.00018287795992714026,
"loss": 0.9725,
"mean_token_accuracy": 0.7916297178715468,
"num_tokens": 10962307.0,
"step": 4090
},
{
"epoch": 0.2575296002009987,
"grad_norm": 0.313556969165802,
"learning_rate": 0.00018283608651047884,
"loss": 1.0029,
"mean_token_accuracy": 0.7881143033504486,
"num_tokens": 10988375.0,
"step": 4100
},
{
"epoch": 0.25815772117709873,
"grad_norm": 0.3082195520401001,
"learning_rate": 0.0001827942130938174,
"loss": 0.9996,
"mean_token_accuracy": 0.786595806479454,
"num_tokens": 11015381.0,
"step": 4110
},
{
"epoch": 0.2587858421531987,
"grad_norm": 0.27436473965644836,
"learning_rate": 0.00018275233967715598,
"loss": 0.9067,
"mean_token_accuracy": 0.7987894963473081,
"num_tokens": 11042227.0,
"step": 4120
},
{
"epoch": 0.2594139631292987,
"grad_norm": 0.3056981861591339,
"learning_rate": 0.00018271046626049453,
"loss": 0.9891,
"mean_token_accuracy": 0.7872753620147706,
"num_tokens": 11068458.0,
"step": 4130
},
{
"epoch": 0.2600420841053987,
"grad_norm": 0.3361382782459259,
"learning_rate": 0.00018266859284383311,
"loss": 0.9871,
"mean_token_accuracy": 0.7879853140562773,
"num_tokens": 11094173.0,
"step": 4140
},
{
"epoch": 0.2606702050814987,
"grad_norm": 0.29034534096717834,
"learning_rate": 0.00018262671942717167,
"loss": 0.9292,
"mean_token_accuracy": 0.7946262218058109,
"num_tokens": 11121589.0,
"step": 4150
},
{
"epoch": 0.2612983260575987,
"grad_norm": 0.3440930247306824,
"learning_rate": 0.00018258484601051025,
"loss": 0.9392,
"mean_token_accuracy": 0.793309535458684,
"num_tokens": 11148217.0,
"step": 4160
},
{
"epoch": 0.2619264470336987,
"grad_norm": 0.38578805327415466,
"learning_rate": 0.0001825429725938488,
"loss": 0.9227,
"mean_token_accuracy": 0.7935150127857924,
"num_tokens": 11174747.0,
"step": 4170
},
{
"epoch": 0.2625545680097987,
"grad_norm": 0.28303763270378113,
"learning_rate": 0.00018250109917718736,
"loss": 0.9352,
"mean_token_accuracy": 0.7939535096287728,
"num_tokens": 11202085.0,
"step": 4180
},
{
"epoch": 0.2631826889858987,
"grad_norm": 0.275611013174057,
"learning_rate": 0.00018245922576052594,
"loss": 0.928,
"mean_token_accuracy": 0.7997066121548414,
"num_tokens": 11229432.0,
"step": 4190
},
{
"epoch": 0.2638108099619987,
"grad_norm": 0.34543925523757935,
"learning_rate": 0.0001824173523438645,
"loss": 0.9865,
"mean_token_accuracy": 0.7830164518207312,
"num_tokens": 11257570.0,
"step": 4200
},
{
"epoch": 0.26443893093809867,
"grad_norm": 0.2919905483722687,
"learning_rate": 0.00018237547892720307,
"loss": 0.925,
"mean_token_accuracy": 0.7997510485351086,
"num_tokens": 11285747.0,
"step": 4210
},
{
"epoch": 0.26506705191419866,
"grad_norm": 0.30447816848754883,
"learning_rate": 0.00018233360551054163,
"loss": 1.0015,
"mean_token_accuracy": 0.7857894655317068,
"num_tokens": 11312841.0,
"step": 4220
},
{
"epoch": 0.26569517289029865,
"grad_norm": 0.3498225212097168,
"learning_rate": 0.0001822917320938802,
"loss": 0.9477,
"mean_token_accuracy": 0.7929210104048252,
"num_tokens": 11341177.0,
"step": 4230
},
{
"epoch": 0.2663232938663987,
"grad_norm": 0.3432565927505493,
"learning_rate": 0.0001822498586772188,
"loss": 0.9762,
"mean_token_accuracy": 0.7873878616839647,
"num_tokens": 11367145.0,
"step": 4240
},
{
"epoch": 0.2669514148424987,
"grad_norm": 0.30563971400260925,
"learning_rate": 0.00018220798526055734,
"loss": 0.975,
"mean_token_accuracy": 0.788095697760582,
"num_tokens": 11394015.0,
"step": 4250
},
{
"epoch": 0.26757953581859867,
"grad_norm": 0.34202903509140015,
"learning_rate": 0.00018216611184389592,
"loss": 0.942,
"mean_token_accuracy": 0.7937498617917299,
"num_tokens": 11421244.0,
"step": 4260
},
{
"epoch": 0.26820765679469866,
"grad_norm": 0.306781142950058,
"learning_rate": 0.00018212423842723448,
"loss": 0.9205,
"mean_token_accuracy": 0.8030612777918578,
"num_tokens": 11448161.0,
"step": 4270
},
{
"epoch": 0.26883577777079865,
"grad_norm": 0.3466769754886627,
"learning_rate": 0.00018208236501057306,
"loss": 0.9918,
"mean_token_accuracy": 0.7877435315400362,
"num_tokens": 11474678.0,
"step": 4280
},
{
"epoch": 0.26946389874689863,
"grad_norm": 0.28220993280410767,
"learning_rate": 0.0001820404915939116,
"loss": 0.985,
"mean_token_accuracy": 0.7845060952007771,
"num_tokens": 11501747.0,
"step": 4290
},
{
"epoch": 0.2700920197229986,
"grad_norm": 0.2891874313354492,
"learning_rate": 0.0001819986181772502,
"loss": 0.9827,
"mean_token_accuracy": 0.786826417595148,
"num_tokens": 11527619.0,
"step": 4300
},
{
"epoch": 0.27072014069909867,
"grad_norm": 0.2904052734375,
"learning_rate": 0.00018195674476058875,
"loss": 0.9177,
"mean_token_accuracy": 0.7959654163569212,
"num_tokens": 11555059.0,
"step": 4310
},
{
"epoch": 0.27134826167519865,
"grad_norm": 0.35182616114616394,
"learning_rate": 0.0001819148713439273,
"loss": 0.9559,
"mean_token_accuracy": 0.7925355311483144,
"num_tokens": 11580504.0,
"step": 4320
},
{
"epoch": 0.27197638265129864,
"grad_norm": 0.2936030328273773,
"learning_rate": 0.00018187299792726588,
"loss": 0.9764,
"mean_token_accuracy": 0.7912591960281133,
"num_tokens": 11606802.0,
"step": 4330
},
{
"epoch": 0.27260450362739863,
"grad_norm": 0.3106949031352997,
"learning_rate": 0.00018183112451060444,
"loss": 0.966,
"mean_token_accuracy": 0.7912954032421112,
"num_tokens": 11633448.0,
"step": 4340
},
{
"epoch": 0.2732326246034986,
"grad_norm": 0.30941087007522583,
"learning_rate": 0.00018178925109394302,
"loss": 0.9231,
"mean_token_accuracy": 0.7992488227784633,
"num_tokens": 11660242.0,
"step": 4350
},
{
"epoch": 0.2738607455795986,
"grad_norm": 0.3347420394420624,
"learning_rate": 0.00018174737767728157,
"loss": 0.9753,
"mean_token_accuracy": 0.7909771021455526,
"num_tokens": 11687032.0,
"step": 4360
},
{
"epoch": 0.27448886655569865,
"grad_norm": 0.3798997104167938,
"learning_rate": 0.00018170550426062015,
"loss": 0.9779,
"mean_token_accuracy": 0.7896815791726113,
"num_tokens": 11712797.0,
"step": 4370
},
{
"epoch": 0.27511698753179864,
"grad_norm": 0.27888602018356323,
"learning_rate": 0.0001816636308439587,
"loss": 0.9728,
"mean_token_accuracy": 0.7927568309009075,
"num_tokens": 11738457.0,
"step": 4380
},
{
"epoch": 0.27574510850789863,
"grad_norm": 0.3379577398300171,
"learning_rate": 0.0001816217574272973,
"loss": 0.9381,
"mean_token_accuracy": 0.7947981592267752,
"num_tokens": 11766434.0,
"step": 4390
},
{
"epoch": 0.2763732294839986,
"grad_norm": 0.37430405616760254,
"learning_rate": 0.00018157988401063587,
"loss": 0.9852,
"mean_token_accuracy": 0.7833209618926048,
"num_tokens": 11793518.0,
"step": 4400
},
{
"epoch": 0.2770013504600986,
"grad_norm": 0.3272213041782379,
"learning_rate": 0.00018153801059397442,
"loss": 0.9891,
"mean_token_accuracy": 0.7863869782537222,
"num_tokens": 11819847.0,
"step": 4410
},
{
"epoch": 0.2776294714361986,
"grad_norm": 0.3244299292564392,
"learning_rate": 0.000181496137177313,
"loss": 0.9815,
"mean_token_accuracy": 0.7872505661100149,
"num_tokens": 11846697.0,
"step": 4420
},
{
"epoch": 0.2782575924122986,
"grad_norm": 0.43501153588294983,
"learning_rate": 0.00018145426376065156,
"loss": 0.9698,
"mean_token_accuracy": 0.789780105650425,
"num_tokens": 11872705.0,
"step": 4430
},
{
"epoch": 0.2788857133883986,
"grad_norm": 0.2940182089805603,
"learning_rate": 0.00018141239034399014,
"loss": 0.9041,
"mean_token_accuracy": 0.7976524058729411,
"num_tokens": 11901223.0,
"step": 4440
},
{
"epoch": 0.2795138343644986,
"grad_norm": 0.36934155225753784,
"learning_rate": 0.0001813705169273287,
"loss": 0.993,
"mean_token_accuracy": 0.7860413756221533,
"num_tokens": 11928250.0,
"step": 4450
},
{
"epoch": 0.2801419553405986,
"grad_norm": 0.33647122979164124,
"learning_rate": 0.00018132864351066725,
"loss": 0.9744,
"mean_token_accuracy": 0.7887967016547919,
"num_tokens": 11955060.0,
"step": 4460
},
{
"epoch": 0.2807700763166986,
"grad_norm": 0.3306765556335449,
"learning_rate": 0.00018128677009400583,
"loss": 0.9735,
"mean_token_accuracy": 0.7871614292263984,
"num_tokens": 11982955.0,
"step": 4470
},
{
"epoch": 0.2813981972927986,
"grad_norm": 0.2563401162624359,
"learning_rate": 0.00018124489667734438,
"loss": 0.9764,
"mean_token_accuracy": 0.7863705430179835,
"num_tokens": 12010704.0,
"step": 4480
},
{
"epoch": 0.28202631826889857,
"grad_norm": 0.4547005295753479,
"learning_rate": 0.00018120302326068296,
"loss": 0.952,
"mean_token_accuracy": 0.7937023017555476,
"num_tokens": 12037126.0,
"step": 4490
},
{
"epoch": 0.2826544392449986,
"grad_norm": 0.26261693239212036,
"learning_rate": 0.00018116114984402152,
"loss": 0.9602,
"mean_token_accuracy": 0.7892621707171201,
"num_tokens": 12063860.0,
"step": 4500
},
{
"epoch": 0.2832825602210986,
"grad_norm": 0.25773391127586365,
"learning_rate": 0.0001811192764273601,
"loss": 0.9724,
"mean_token_accuracy": 0.793383052945137,
"num_tokens": 12090682.0,
"step": 4510
},
{
"epoch": 0.2839106811971986,
"grad_norm": 0.26285916566848755,
"learning_rate": 0.00018107740301069865,
"loss": 0.9255,
"mean_token_accuracy": 0.7996356416493654,
"num_tokens": 12115946.0,
"step": 4520
},
{
"epoch": 0.2845388021732986,
"grad_norm": 0.32129448652267456,
"learning_rate": 0.00018103552959403723,
"loss": 1.0348,
"mean_token_accuracy": 0.7764022376388311,
"num_tokens": 12143516.0,
"step": 4530
},
{
"epoch": 0.28516692314939857,
"grad_norm": 0.27924448251724243,
"learning_rate": 0.00018099365617737582,
"loss": 0.9575,
"mean_token_accuracy": 0.7873435180634261,
"num_tokens": 12170735.0,
"step": 4540
},
{
"epoch": 0.28579504412549855,
"grad_norm": 0.2748197317123413,
"learning_rate": 0.00018095178276071437,
"loss": 1.0145,
"mean_token_accuracy": 0.7861239977180958,
"num_tokens": 12197615.0,
"step": 4550
},
{
"epoch": 0.28642316510159854,
"grad_norm": 0.38550078868865967,
"learning_rate": 0.00018090990934405295,
"loss": 0.9512,
"mean_token_accuracy": 0.7938284669071436,
"num_tokens": 12224675.0,
"step": 4560
},
{
"epoch": 0.2870512860776986,
"grad_norm": 0.3162452280521393,
"learning_rate": 0.0001808680359273915,
"loss": 0.9402,
"mean_token_accuracy": 0.7968017168343067,
"num_tokens": 12249844.0,
"step": 4570
},
{
"epoch": 0.2876794070537986,
"grad_norm": 0.37126636505126953,
"learning_rate": 0.00018082616251073009,
"loss": 0.9227,
"mean_token_accuracy": 0.7992094796150923,
"num_tokens": 12276906.0,
"step": 4580
},
{
"epoch": 0.28830752802989856,
"grad_norm": 0.32928577065467834,
"learning_rate": 0.00018078428909406864,
"loss": 0.9641,
"mean_token_accuracy": 0.790322245657444,
"num_tokens": 12303762.0,
"step": 4590
},
{
"epoch": 0.28893564900599855,
"grad_norm": 0.28634729981422424,
"learning_rate": 0.00018074241567740722,
"loss": 0.9171,
"mean_token_accuracy": 0.8008216977119446,
"num_tokens": 12331564.0,
"step": 4600
},
{
"epoch": 0.28956376998209854,
"grad_norm": 0.3661513924598694,
"learning_rate": 0.00018070054226074577,
"loss": 0.9081,
"mean_token_accuracy": 0.8026146795600653,
"num_tokens": 12358416.0,
"step": 4610
},
{
"epoch": 0.29019189095819853,
"grad_norm": 0.2952193021774292,
"learning_rate": 0.00018065866884408433,
"loss": 0.9801,
"mean_token_accuracy": 0.7893622420728207,
"num_tokens": 12384614.0,
"step": 4620
},
{
"epoch": 0.2908200119342986,
"grad_norm": 0.29245612025260925,
"learning_rate": 0.0001806167954274229,
"loss": 0.9465,
"mean_token_accuracy": 0.7965949896723032,
"num_tokens": 12411249.0,
"step": 4630
},
{
"epoch": 0.29144813291039856,
"grad_norm": 0.3053031265735626,
"learning_rate": 0.00018057492201076146,
"loss": 0.95,
"mean_token_accuracy": 0.79575967900455,
"num_tokens": 12436991.0,
"step": 4640
},
{
"epoch": 0.29207625388649855,
"grad_norm": 0.37885236740112305,
"learning_rate": 0.00018053304859410004,
"loss": 0.9614,
"mean_token_accuracy": 0.7889950573444366,
"num_tokens": 12465386.0,
"step": 4650
},
{
"epoch": 0.29270437486259854,
"grad_norm": 0.3537423610687256,
"learning_rate": 0.0001804911751774386,
"loss": 0.945,
"mean_token_accuracy": 0.793824827671051,
"num_tokens": 12491227.0,
"step": 4660
},
{
"epoch": 0.2933324958386985,
"grad_norm": 0.2990322411060333,
"learning_rate": 0.00018044930176077718,
"loss": 0.9232,
"mean_token_accuracy": 0.796284407377243,
"num_tokens": 12519230.0,
"step": 4670
},
{
"epoch": 0.2939606168147985,
"grad_norm": 0.32372578978538513,
"learning_rate": 0.00018040742834411576,
"loss": 0.9503,
"mean_token_accuracy": 0.7953814085572958,
"num_tokens": 12546047.0,
"step": 4680
},
{
"epoch": 0.2945887377908985,
"grad_norm": 0.2930040657520294,
"learning_rate": 0.00018036555492745431,
"loss": 0.8811,
"mean_token_accuracy": 0.8051992613822222,
"num_tokens": 12573184.0,
"step": 4690
},
{
"epoch": 0.29521685876699855,
"grad_norm": 0.3390980362892151,
"learning_rate": 0.0001803236815107929,
"loss": 0.9672,
"mean_token_accuracy": 0.7889087818562984,
"num_tokens": 12599385.0,
"step": 4700
},
{
"epoch": 0.29584497974309854,
"grad_norm": 0.28104329109191895,
"learning_rate": 0.00018028180809413145,
"loss": 0.9879,
"mean_token_accuracy": 0.7799696780741214,
"num_tokens": 12627295.0,
"step": 4710
},
{
"epoch": 0.2964731007191985,
"grad_norm": 0.2756447494029999,
"learning_rate": 0.00018023993467747003,
"loss": 0.9536,
"mean_token_accuracy": 0.7931569367647171,
"num_tokens": 12653585.0,
"step": 4720
},
{
"epoch": 0.2971012216952985,
"grad_norm": 0.34889882802963257,
"learning_rate": 0.00018019806126080859,
"loss": 0.9317,
"mean_token_accuracy": 0.7953014809638261,
"num_tokens": 12680922.0,
"step": 4730
},
{
"epoch": 0.2977293426713985,
"grad_norm": 0.3413899838924408,
"learning_rate": 0.00018015618784414717,
"loss": 0.9961,
"mean_token_accuracy": 0.783359244838357,
"num_tokens": 12708393.0,
"step": 4740
},
{
"epoch": 0.2983574636474985,
"grad_norm": 0.3137054145336151,
"learning_rate": 0.00018011431442748572,
"loss": 0.9451,
"mean_token_accuracy": 0.7911336876451969,
"num_tokens": 12735758.0,
"step": 4750
},
{
"epoch": 0.29898558462359853,
"grad_norm": 0.29756060242652893,
"learning_rate": 0.00018007244101082427,
"loss": 0.9465,
"mean_token_accuracy": 0.7898903641849756,
"num_tokens": 12763112.0,
"step": 4760
},
{
"epoch": 0.2996137055996985,
"grad_norm": 0.3134726881980896,
"learning_rate": 0.00018003056759416286,
"loss": 0.9612,
"mean_token_accuracy": 0.7912685304880143,
"num_tokens": 12789686.0,
"step": 4770
},
{
"epoch": 0.3002418265757985,
"grad_norm": 0.32834240794181824,
"learning_rate": 0.0001799886941775014,
"loss": 0.9307,
"mean_token_accuracy": 0.8001178815960884,
"num_tokens": 12813787.0,
"step": 4780
},
{
"epoch": 0.3008699475518985,
"grad_norm": 0.30090004205703735,
"learning_rate": 0.00017994682076084,
"loss": 0.9261,
"mean_token_accuracy": 0.7985043011605739,
"num_tokens": 12840195.0,
"step": 4790
},
{
"epoch": 0.3014980685279985,
"grad_norm": 0.2991957366466522,
"learning_rate": 0.00017990494734417854,
"loss": 0.9238,
"mean_token_accuracy": 0.799000171199441,
"num_tokens": 12868504.0,
"step": 4800
},
{
"epoch": 0.3021261895040985,
"grad_norm": 0.37887468934059143,
"learning_rate": 0.00017986307392751713,
"loss": 0.9457,
"mean_token_accuracy": 0.792426348477602,
"num_tokens": 12895941.0,
"step": 4810
},
{
"epoch": 0.30275431048019846,
"grad_norm": 0.3643001914024353,
"learning_rate": 0.00017982120051085568,
"loss": 0.9328,
"mean_token_accuracy": 0.7979659728705883,
"num_tokens": 12922039.0,
"step": 4820
},
{
"epoch": 0.3033824314562985,
"grad_norm": 0.32898885011672974,
"learning_rate": 0.00017977932709419426,
"loss": 0.997,
"mean_token_accuracy": 0.7901430610567332,
"num_tokens": 12946947.0,
"step": 4830
},
{
"epoch": 0.3040105524323985,
"grad_norm": 0.30726224184036255,
"learning_rate": 0.00017973745367753284,
"loss": 0.9484,
"mean_token_accuracy": 0.792856489494443,
"num_tokens": 12974095.0,
"step": 4840
},
{
"epoch": 0.3046386734084985,
"grad_norm": 0.35284626483917236,
"learning_rate": 0.0001796955802608714,
"loss": 0.9449,
"mean_token_accuracy": 0.7954190034419298,
"num_tokens": 13000158.0,
"step": 4850
},
{
"epoch": 0.30526679438459847,
"grad_norm": 0.35595861077308655,
"learning_rate": 0.00017965370684420998,
"loss": 0.9713,
"mean_token_accuracy": 0.7911023162305355,
"num_tokens": 13027170.0,
"step": 4860
},
{
"epoch": 0.30589491536069846,
"grad_norm": 0.34379082918167114,
"learning_rate": 0.00017961183342754853,
"loss": 0.9699,
"mean_token_accuracy": 0.7862888902425766,
"num_tokens": 13053584.0,
"step": 4870
},
{
"epoch": 0.30652303633679845,
"grad_norm": 0.37722787261009216,
"learning_rate": 0.0001795699600108871,
"loss": 0.9684,
"mean_token_accuracy": 0.7894018895924091,
"num_tokens": 13080176.0,
"step": 4880
},
{
"epoch": 0.30715115731289844,
"grad_norm": 0.3420683741569519,
"learning_rate": 0.00017952808659422567,
"loss": 0.9754,
"mean_token_accuracy": 0.7926111649721861,
"num_tokens": 13106832.0,
"step": 4890
},
{
"epoch": 0.3077792782889985,
"grad_norm": 0.30542516708374023,
"learning_rate": 0.00017948621317756422,
"loss": 0.9606,
"mean_token_accuracy": 0.7916467692703009,
"num_tokens": 13134773.0,
"step": 4900
},
{
"epoch": 0.30840739926509847,
"grad_norm": 0.4134035110473633,
"learning_rate": 0.0001794443397609028,
"loss": 0.9761,
"mean_token_accuracy": 0.7897003520280123,
"num_tokens": 13161464.0,
"step": 4910
},
{
"epoch": 0.30903552024119846,
"grad_norm": 0.35040563344955444,
"learning_rate": 0.00017940246634424135,
"loss": 0.9451,
"mean_token_accuracy": 0.7977105394005776,
"num_tokens": 13186861.0,
"step": 4920
},
{
"epoch": 0.30966364121729845,
"grad_norm": 0.2841908931732178,
"learning_rate": 0.00017936059292757994,
"loss": 0.9407,
"mean_token_accuracy": 0.7971719756722451,
"num_tokens": 13212420.0,
"step": 4930
},
{
"epoch": 0.31029176219339843,
"grad_norm": 0.30812788009643555,
"learning_rate": 0.0001793187195109185,
"loss": 0.9574,
"mean_token_accuracy": 0.7923252787441015,
"num_tokens": 13238267.0,
"step": 4940
},
{
"epoch": 0.3109198831694984,
"grad_norm": 0.2617832124233246,
"learning_rate": 0.00017927684609425707,
"loss": 0.9699,
"mean_token_accuracy": 0.7890806578099727,
"num_tokens": 13265852.0,
"step": 4950
},
{
"epoch": 0.31154800414559847,
"grad_norm": 0.32903388142585754,
"learning_rate": 0.00017923497267759563,
"loss": 0.9375,
"mean_token_accuracy": 0.7949575208127498,
"num_tokens": 13292850.0,
"step": 4960
},
{
"epoch": 0.31217612512169846,
"grad_norm": 0.31031638383865356,
"learning_rate": 0.0001791930992609342,
"loss": 0.9412,
"mean_token_accuracy": 0.7894805524498224,
"num_tokens": 13320007.0,
"step": 4970
},
{
"epoch": 0.31280424609779844,
"grad_norm": 0.3920259475708008,
"learning_rate": 0.0001791512258442728,
"loss": 0.9246,
"mean_token_accuracy": 0.7959085434675217,
"num_tokens": 13346689.0,
"step": 4980
},
{
"epoch": 0.31343236707389843,
"grad_norm": 0.30121907591819763,
"learning_rate": 0.00017910935242761134,
"loss": 0.9555,
"mean_token_accuracy": 0.7920045137405396,
"num_tokens": 13374367.0,
"step": 4990
},
{
"epoch": 0.3140604880499984,
"grad_norm": 0.3035444915294647,
"learning_rate": 0.00017906747901094992,
"loss": 0.9759,
"mean_token_accuracy": 0.7897166911512613,
"num_tokens": 13400170.0,
"step": 5000
},
{
"epoch": 0.3146886090260984,
"grad_norm": 0.27995365858078003,
"learning_rate": 0.00017902560559428848,
"loss": 0.9508,
"mean_token_accuracy": 0.796136661618948,
"num_tokens": 13428461.0,
"step": 5010
},
{
"epoch": 0.3153167300021984,
"grad_norm": 0.3135192096233368,
"learning_rate": 0.00017898373217762706,
"loss": 0.8978,
"mean_token_accuracy": 0.800453482940793,
"num_tokens": 13455697.0,
"step": 5020
},
{
"epoch": 0.31594485097829844,
"grad_norm": 0.3059029281139374,
"learning_rate": 0.0001789418587609656,
"loss": 0.9329,
"mean_token_accuracy": 0.793091481178999,
"num_tokens": 13483193.0,
"step": 5030
},
{
"epoch": 0.31657297195439843,
"grad_norm": 0.3233183026313782,
"learning_rate": 0.00017889998534430417,
"loss": 0.9333,
"mean_token_accuracy": 0.7970968656241894,
"num_tokens": 13508603.0,
"step": 5040
},
{
"epoch": 0.3172010929304984,
"grad_norm": 0.3030914068222046,
"learning_rate": 0.00017885811192764275,
"loss": 0.9514,
"mean_token_accuracy": 0.7929343525320292,
"num_tokens": 13536186.0,
"step": 5050
},
{
"epoch": 0.3178292139065984,
"grad_norm": 0.3712503910064697,
"learning_rate": 0.0001788162385109813,
"loss": 0.9723,
"mean_token_accuracy": 0.7916972611099482,
"num_tokens": 13562236.0,
"step": 5060
},
{
"epoch": 0.3184573348826984,
"grad_norm": 0.339174747467041,
"learning_rate": 0.00017877436509431988,
"loss": 0.9515,
"mean_token_accuracy": 0.7921494416892528,
"num_tokens": 13588482.0,
"step": 5070
},
{
"epoch": 0.3190854558587984,
"grad_norm": 0.3653663694858551,
"learning_rate": 0.00017873249167765844,
"loss": 0.9686,
"mean_token_accuracy": 0.7870390675961971,
"num_tokens": 13615395.0,
"step": 5080
},
{
"epoch": 0.3197135768348984,
"grad_norm": 0.27835947275161743,
"learning_rate": 0.00017869061826099702,
"loss": 0.989,
"mean_token_accuracy": 0.7866588454693556,
"num_tokens": 13643065.0,
"step": 5090
},
{
"epoch": 0.3203416978109984,
"grad_norm": 0.33390867710113525,
"learning_rate": 0.00017864874484433557,
"loss": 0.9766,
"mean_token_accuracy": 0.7885018114000559,
"num_tokens": 13670119.0,
"step": 5100
},
{
"epoch": 0.3209698187870984,
"grad_norm": 0.2845928966999054,
"learning_rate": 0.00017860687142767415,
"loss": 0.9291,
"mean_token_accuracy": 0.7989787317812442,
"num_tokens": 13696601.0,
"step": 5110
},
{
"epoch": 0.3215979397631984,
"grad_norm": 0.35259291529655457,
"learning_rate": 0.0001785649980110127,
"loss": 0.9458,
"mean_token_accuracy": 0.7948228023946285,
"num_tokens": 13723415.0,
"step": 5120
},
{
"epoch": 0.3222260607392984,
"grad_norm": 0.3824492394924164,
"learning_rate": 0.0001785231245943513,
"loss": 0.9885,
"mean_token_accuracy": 0.7887037217617034,
"num_tokens": 13748699.0,
"step": 5130
},
{
"epoch": 0.32285418171539837,
"grad_norm": 0.2946266829967499,
"learning_rate": 0.00017848125117768987,
"loss": 0.9656,
"mean_token_accuracy": 0.7888725634664298,
"num_tokens": 13775606.0,
"step": 5140
},
{
"epoch": 0.32348230269149836,
"grad_norm": 0.2542276382446289,
"learning_rate": 0.00017843937776102842,
"loss": 0.9737,
"mean_token_accuracy": 0.7885618463158608,
"num_tokens": 13802423.0,
"step": 5150
},
{
"epoch": 0.3241104236675984,
"grad_norm": 0.30069777369499207,
"learning_rate": 0.000178397504344367,
"loss": 0.9454,
"mean_token_accuracy": 0.7927018702030182,
"num_tokens": 13828657.0,
"step": 5160
},
{
"epoch": 0.3247385446436984,
"grad_norm": 0.3156718611717224,
"learning_rate": 0.00017835563092770556,
"loss": 0.9359,
"mean_token_accuracy": 0.795510170981288,
"num_tokens": 13855305.0,
"step": 5170
},
{
"epoch": 0.3253666656197984,
"grad_norm": 0.3123616874217987,
"learning_rate": 0.00017831375751104414,
"loss": 0.9678,
"mean_token_accuracy": 0.7933816347271204,
"num_tokens": 13881684.0,
"step": 5180
},
{
"epoch": 0.32599478659589837,
"grad_norm": 0.29278364777565,
"learning_rate": 0.0001782718840943827,
"loss": 0.9691,
"mean_token_accuracy": 0.7854080755263567,
"num_tokens": 13908078.0,
"step": 5190
},
{
"epoch": 0.32662290757199836,
"grad_norm": 0.3442671000957489,
"learning_rate": 0.00017823001067772125,
"loss": 0.9622,
"mean_token_accuracy": 0.7895106051117182,
"num_tokens": 13935018.0,
"step": 5200
},
{
"epoch": 0.32725102854809834,
"grad_norm": 0.325995534658432,
"learning_rate": 0.00017818813726105983,
"loss": 0.9734,
"mean_token_accuracy": 0.7888151530176402,
"num_tokens": 13960855.0,
"step": 5210
},
{
"epoch": 0.3278791495241984,
"grad_norm": 0.26817047595977783,
"learning_rate": 0.00017814626384439838,
"loss": 1.0164,
"mean_token_accuracy": 0.7806048065423965,
"num_tokens": 13987319.0,
"step": 5220
},
{
"epoch": 0.3285072705002984,
"grad_norm": 0.3482087552547455,
"learning_rate": 0.00017810439042773696,
"loss": 0.9569,
"mean_token_accuracy": 0.7944883365184069,
"num_tokens": 14012706.0,
"step": 5230
},
{
"epoch": 0.32913539147639836,
"grad_norm": 0.32223814725875854,
"learning_rate": 0.00017806251701107552,
"loss": 0.9498,
"mean_token_accuracy": 0.7911127615720034,
"num_tokens": 14038936.0,
"step": 5240
},
{
"epoch": 0.32976351245249835,
"grad_norm": 0.33856573700904846,
"learning_rate": 0.0001780206435944141,
"loss": 0.9662,
"mean_token_accuracy": 0.7920481752604246,
"num_tokens": 14064469.0,
"step": 5250
},
{
"epoch": 0.33039163342859834,
"grad_norm": 0.3517283499240875,
"learning_rate": 0.00017797877017775265,
"loss": 0.9737,
"mean_token_accuracy": 0.7935274243354797,
"num_tokens": 14091321.0,
"step": 5260
},
{
"epoch": 0.33101975440469833,
"grad_norm": 0.3329240083694458,
"learning_rate": 0.00017793689676109123,
"loss": 0.9772,
"mean_token_accuracy": 0.7892213884741068,
"num_tokens": 14116727.0,
"step": 5270
},
{
"epoch": 0.3316478753807983,
"grad_norm": 0.3505692780017853,
"learning_rate": 0.0001778950233444298,
"loss": 0.9517,
"mean_token_accuracy": 0.7921177882701158,
"num_tokens": 14144556.0,
"step": 5280
},
{
"epoch": 0.33227599635689836,
"grad_norm": 0.36645811796188354,
"learning_rate": 0.00017785314992776837,
"loss": 0.9694,
"mean_token_accuracy": 0.7911825001239776,
"num_tokens": 14171522.0,
"step": 5290
},
{
"epoch": 0.33290411733299835,
"grad_norm": 0.29255688190460205,
"learning_rate": 0.00017781127651110695,
"loss": 0.968,
"mean_token_accuracy": 0.789820882678032,
"num_tokens": 14198901.0,
"step": 5300
},
{
"epoch": 0.33353223830909834,
"grad_norm": 0.34405186772346497,
"learning_rate": 0.0001777694030944455,
"loss": 0.9414,
"mean_token_accuracy": 0.797522522136569,
"num_tokens": 14224801.0,
"step": 5310
},
{
"epoch": 0.3341603592851983,
"grad_norm": 0.2979678809642792,
"learning_rate": 0.00017772752967778408,
"loss": 0.9479,
"mean_token_accuracy": 0.7887560345232487,
"num_tokens": 14252677.0,
"step": 5320
},
{
"epoch": 0.3347884802612983,
"grad_norm": 0.2529391646385193,
"learning_rate": 0.00017768565626112264,
"loss": 0.9535,
"mean_token_accuracy": 0.7968549765646458,
"num_tokens": 14280416.0,
"step": 5330
},
{
"epoch": 0.3354166012373983,
"grad_norm": 0.31212252378463745,
"learning_rate": 0.0001776437828444612,
"loss": 0.9753,
"mean_token_accuracy": 0.7860465437173844,
"num_tokens": 14307299.0,
"step": 5340
},
{
"epoch": 0.3360447222134983,
"grad_norm": 0.3829305171966553,
"learning_rate": 0.00017760190942779977,
"loss": 0.9157,
"mean_token_accuracy": 0.7976312339305878,
"num_tokens": 14334235.0,
"step": 5350
},
{
"epoch": 0.33667284318959834,
"grad_norm": 0.2862161099910736,
"learning_rate": 0.00017756003601113833,
"loss": 0.8948,
"mean_token_accuracy": 0.8058628041297198,
"num_tokens": 14360748.0,
"step": 5360
},
{
"epoch": 0.3373009641656983,
"grad_norm": 0.3067629039287567,
"learning_rate": 0.0001775181625944769,
"loss": 0.93,
"mean_token_accuracy": 0.8005598716437816,
"num_tokens": 14387372.0,
"step": 5370
},
{
"epoch": 0.3379290851417983,
"grad_norm": 0.33197638392448425,
"learning_rate": 0.00017747628917781546,
"loss": 0.9684,
"mean_token_accuracy": 0.7905099768191576,
"num_tokens": 14414745.0,
"step": 5380
},
{
"epoch": 0.3385572061178983,
"grad_norm": 0.3821481168270111,
"learning_rate": 0.00017743441576115404,
"loss": 0.933,
"mean_token_accuracy": 0.7985155992209911,
"num_tokens": 14442045.0,
"step": 5390
},
{
"epoch": 0.3391853270939983,
"grad_norm": 0.32107749581336975,
"learning_rate": 0.0001773925423444926,
"loss": 0.9383,
"mean_token_accuracy": 0.7931062672287226,
"num_tokens": 14469523.0,
"step": 5400
},
{
"epoch": 0.3398134480700983,
"grad_norm": 0.3166064918041229,
"learning_rate": 0.00017735066892783118,
"loss": 0.9129,
"mean_token_accuracy": 0.7990059111267328,
"num_tokens": 14496732.0,
"step": 5410
},
{
"epoch": 0.3404415690461983,
"grad_norm": 0.3135284185409546,
"learning_rate": 0.00017730879551116973,
"loss": 0.932,
"mean_token_accuracy": 0.7971233692020178,
"num_tokens": 14523238.0,
"step": 5420
},
{
"epoch": 0.3410696900222983,
"grad_norm": 0.30107152462005615,
"learning_rate": 0.0001772669220945083,
"loss": 0.9483,
"mean_token_accuracy": 0.7947558045387269,
"num_tokens": 14550996.0,
"step": 5430
},
{
"epoch": 0.3416978109983983,
"grad_norm": 0.3249971568584442,
"learning_rate": 0.0001772250486778469,
"loss": 0.967,
"mean_token_accuracy": 0.7944178026169538,
"num_tokens": 14577389.0,
"step": 5440
},
{
"epoch": 0.3423259319744983,
"grad_norm": 0.29780882596969604,
"learning_rate": 0.00017718317526118545,
"loss": 0.9498,
"mean_token_accuracy": 0.7930607028305531,
"num_tokens": 14603545.0,
"step": 5450
},
{
"epoch": 0.3429540529505983,
"grad_norm": 0.3544989824295044,
"learning_rate": 0.00017714130184452403,
"loss": 0.944,
"mean_token_accuracy": 0.7989666901528836,
"num_tokens": 14631497.0,
"step": 5460
},
{
"epoch": 0.34358217392669826,
"grad_norm": 0.3154118061065674,
"learning_rate": 0.00017709942842786258,
"loss": 0.9414,
"mean_token_accuracy": 0.8006433036178351,
"num_tokens": 14657118.0,
"step": 5470
},
{
"epoch": 0.34421029490279825,
"grad_norm": 0.36263030767440796,
"learning_rate": 0.00017705755501120114,
"loss": 0.9528,
"mean_token_accuracy": 0.7943309776484966,
"num_tokens": 14683766.0,
"step": 5480
},
{
"epoch": 0.3448384158788983,
"grad_norm": 0.37745627760887146,
"learning_rate": 0.00017701568159453972,
"loss": 0.9394,
"mean_token_accuracy": 0.7972447019070387,
"num_tokens": 14712282.0,
"step": 5490
},
{
"epoch": 0.3454665368549983,
"grad_norm": 0.3214879333972931,
"learning_rate": 0.00017697380817787827,
"loss": 0.9196,
"mean_token_accuracy": 0.8019496221095324,
"num_tokens": 14738530.0,
"step": 5500
},
{
"epoch": 0.3460946578310983,
"grad_norm": 0.3479548692703247,
"learning_rate": 0.00017693193476121685,
"loss": 0.9791,
"mean_token_accuracy": 0.7863402977585793,
"num_tokens": 14765716.0,
"step": 5510
},
{
"epoch": 0.34672277880719826,
"grad_norm": 0.36872756481170654,
"learning_rate": 0.0001768900613445554,
"loss": 0.923,
"mean_token_accuracy": 0.7984681211411953,
"num_tokens": 14794865.0,
"step": 5520
},
{
"epoch": 0.34735089978329825,
"grad_norm": 0.2868911921977997,
"learning_rate": 0.000176848187927894,
"loss": 1.004,
"mean_token_accuracy": 0.7837614696472883,
"num_tokens": 14821928.0,
"step": 5530
},
{
"epoch": 0.34797902075939824,
"grad_norm": 0.2693776488304138,
"learning_rate": 0.00017680631451123254,
"loss": 0.9435,
"mean_token_accuracy": 0.7989333860576153,
"num_tokens": 14848566.0,
"step": 5540
},
{
"epoch": 0.3486071417354983,
"grad_norm": 0.32554733753204346,
"learning_rate": 0.0001767644410945711,
"loss": 0.9346,
"mean_token_accuracy": 0.7996318481862545,
"num_tokens": 14875559.0,
"step": 5550
},
{
"epoch": 0.34923526271159827,
"grad_norm": 0.3206787705421448,
"learning_rate": 0.00017672256767790968,
"loss": 0.947,
"mean_token_accuracy": 0.7963632360100746,
"num_tokens": 14901870.0,
"step": 5560
},
{
"epoch": 0.34986338368769826,
"grad_norm": 0.3555513322353363,
"learning_rate": 0.00017668069426124826,
"loss": 0.9494,
"mean_token_accuracy": 0.7952044978737831,
"num_tokens": 14928011.0,
"step": 5570
},
{
"epoch": 0.35049150466379825,
"grad_norm": 0.28491222858428955,
"learning_rate": 0.00017663882084458684,
"loss": 0.9522,
"mean_token_accuracy": 0.7924054119735956,
"num_tokens": 14956522.0,
"step": 5580
},
{
"epoch": 0.35111962563989824,
"grad_norm": 0.3041023910045624,
"learning_rate": 0.0001765969474279254,
"loss": 0.9318,
"mean_token_accuracy": 0.7982840724289417,
"num_tokens": 14982785.0,
"step": 5590
},
{
"epoch": 0.3517477466159982,
"grad_norm": 0.35401952266693115,
"learning_rate": 0.00017655507401126397,
"loss": 0.9956,
"mean_token_accuracy": 0.7896864812821149,
"num_tokens": 15008716.0,
"step": 5600
},
{
"epoch": 0.3523758675920982,
"grad_norm": 0.34883061051368713,
"learning_rate": 0.00017651320059460253,
"loss": 0.9171,
"mean_token_accuracy": 0.797677880898118,
"num_tokens": 15034928.0,
"step": 5610
},
{
"epoch": 0.35300398856819826,
"grad_norm": 0.32336753606796265,
"learning_rate": 0.00017647132717794108,
"loss": 0.9584,
"mean_token_accuracy": 0.7909554496407509,
"num_tokens": 15062010.0,
"step": 5620
},
{
"epoch": 0.35363210954429825,
"grad_norm": 0.3301476836204529,
"learning_rate": 0.00017642945376127966,
"loss": 0.9494,
"mean_token_accuracy": 0.7945166520774365,
"num_tokens": 15089039.0,
"step": 5630
},
{
"epoch": 0.35426023052039823,
"grad_norm": 0.37666943669319153,
"learning_rate": 0.00017638758034461822,
"loss": 0.9253,
"mean_token_accuracy": 0.7964357610791921,
"num_tokens": 15117169.0,
"step": 5640
},
{
"epoch": 0.3548883514964982,
"grad_norm": 0.3678019046783447,
"learning_rate": 0.0001763457069279568,
"loss": 0.9604,
"mean_token_accuracy": 0.7923226218670607,
"num_tokens": 15143120.0,
"step": 5650
},
{
"epoch": 0.3555164724725982,
"grad_norm": 0.30747926235198975,
"learning_rate": 0.00017630383351129535,
"loss": 0.9976,
"mean_token_accuracy": 0.7888089545071125,
"num_tokens": 15169183.0,
"step": 5660
},
{
"epoch": 0.3561445934486982,
"grad_norm": 0.28103527426719666,
"learning_rate": 0.00017626196009463393,
"loss": 0.9283,
"mean_token_accuracy": 0.7940921634435654,
"num_tokens": 15196750.0,
"step": 5670
},
{
"epoch": 0.35677271442479824,
"grad_norm": 0.30865031480789185,
"learning_rate": 0.0001762200866779725,
"loss": 0.9423,
"mean_token_accuracy": 0.7928956486284733,
"num_tokens": 15223518.0,
"step": 5680
},
{
"epoch": 0.35740083540089823,
"grad_norm": 0.30006086826324463,
"learning_rate": 0.00017617821326131107,
"loss": 0.9619,
"mean_token_accuracy": 0.791780112311244,
"num_tokens": 15249472.0,
"step": 5690
},
{
"epoch": 0.3580289563769982,
"grad_norm": 0.3119317293167114,
"learning_rate": 0.00017613633984464962,
"loss": 0.9378,
"mean_token_accuracy": 0.7980754714459181,
"num_tokens": 15275693.0,
"step": 5700
},
{
"epoch": 0.3586570773530982,
"grad_norm": 0.3226664066314697,
"learning_rate": 0.0001760944664279882,
"loss": 0.9678,
"mean_token_accuracy": 0.7938013020902872,
"num_tokens": 15302267.0,
"step": 5710
},
{
"epoch": 0.3592851983291982,
"grad_norm": 0.3189242482185364,
"learning_rate": 0.00017605259301132679,
"loss": 0.9533,
"mean_token_accuracy": 0.7953298572450876,
"num_tokens": 15328385.0,
"step": 5720
},
{
"epoch": 0.3599133193052982,
"grad_norm": 0.4274740517139435,
"learning_rate": 0.00017601071959466534,
"loss": 0.9676,
"mean_token_accuracy": 0.7888112541288137,
"num_tokens": 15356049.0,
"step": 5730
},
{
"epoch": 0.3605414402813982,
"grad_norm": 0.2636527121067047,
"learning_rate": 0.00017596884617800392,
"loss": 0.95,
"mean_token_accuracy": 0.794311236217618,
"num_tokens": 15383261.0,
"step": 5740
},
{
"epoch": 0.3611695612574982,
"grad_norm": 0.39001110196113586,
"learning_rate": 0.00017592697276134247,
"loss": 0.907,
"mean_token_accuracy": 0.8008437678217888,
"num_tokens": 15410782.0,
"step": 5750
},
{
"epoch": 0.3617976822335982,
"grad_norm": 0.3736308515071869,
"learning_rate": 0.00017588509934468106,
"loss": 0.9399,
"mean_token_accuracy": 0.7988893665373326,
"num_tokens": 15435944.0,
"step": 5760
},
{
"epoch": 0.3624258032096982,
"grad_norm": 0.41546639800071716,
"learning_rate": 0.0001758432259280196,
"loss": 0.9396,
"mean_token_accuracy": 0.7928215757012367,
"num_tokens": 15463019.0,
"step": 5770
},
{
"epoch": 0.3630539241857982,
"grad_norm": 0.3147844970226288,
"learning_rate": 0.00017580135251135816,
"loss": 0.9341,
"mean_token_accuracy": 0.8001567754894495,
"num_tokens": 15490689.0,
"step": 5780
},
{
"epoch": 0.36368204516189817,
"grad_norm": 0.3456019461154938,
"learning_rate": 0.00017575947909469674,
"loss": 0.9551,
"mean_token_accuracy": 0.7912769354879856,
"num_tokens": 15516630.0,
"step": 5790
},
{
"epoch": 0.36431016613799816,
"grad_norm": 0.3122086822986603,
"learning_rate": 0.0001757176056780353,
"loss": 0.9609,
"mean_token_accuracy": 0.7908839665353298,
"num_tokens": 15543530.0,
"step": 5800
},
{
"epoch": 0.36493828711409815,
"grad_norm": 0.29509490728378296,
"learning_rate": 0.00017567573226137388,
"loss": 0.9038,
"mean_token_accuracy": 0.8028480164706707,
"num_tokens": 15570842.0,
"step": 5810
},
{
"epoch": 0.3655664080901982,
"grad_norm": 0.32276204228401184,
"learning_rate": 0.00017563385884471243,
"loss": 0.9863,
"mean_token_accuracy": 0.7871858242899179,
"num_tokens": 15597394.0,
"step": 5820
},
{
"epoch": 0.3661945290662982,
"grad_norm": 0.29563000798225403,
"learning_rate": 0.00017559198542805101,
"loss": 0.9456,
"mean_token_accuracy": 0.796840837597847,
"num_tokens": 15624689.0,
"step": 5830
},
{
"epoch": 0.36682265004239817,
"grad_norm": 0.2788376808166504,
"learning_rate": 0.00017555011201138957,
"loss": 0.9471,
"mean_token_accuracy": 0.7943929139524698,
"num_tokens": 15651542.0,
"step": 5840
},
{
"epoch": 0.36745077101849816,
"grad_norm": 0.33232542872428894,
"learning_rate": 0.00017550823859472812,
"loss": 0.9591,
"mean_token_accuracy": 0.7914271518588066,
"num_tokens": 15677826.0,
"step": 5850
},
{
"epoch": 0.36807889199459815,
"grad_norm": 0.26375389099121094,
"learning_rate": 0.0001754663651780667,
"loss": 0.9518,
"mean_token_accuracy": 0.7952167768031358,
"num_tokens": 15704198.0,
"step": 5860
},
{
"epoch": 0.36870701297069813,
"grad_norm": 0.3428965210914612,
"learning_rate": 0.00017542449176140528,
"loss": 0.9521,
"mean_token_accuracy": 0.7944683827459812,
"num_tokens": 15730973.0,
"step": 5870
},
{
"epoch": 0.3693351339467982,
"grad_norm": 0.3386590778827667,
"learning_rate": 0.00017538261834474387,
"loss": 0.9106,
"mean_token_accuracy": 0.8001317955553532,
"num_tokens": 15758768.0,
"step": 5880
},
{
"epoch": 0.36996325492289817,
"grad_norm": 0.3398821949958801,
"learning_rate": 0.00017534074492808242,
"loss": 0.9813,
"mean_token_accuracy": 0.7854520630091428,
"num_tokens": 15786299.0,
"step": 5890
},
{
"epoch": 0.37059137589899815,
"grad_norm": 0.32635533809661865,
"learning_rate": 0.000175298871511421,
"loss": 0.9326,
"mean_token_accuracy": 0.79741803817451,
"num_tokens": 15813458.0,
"step": 5900
},
{
"epoch": 0.37121949687509814,
"grad_norm": 0.3272740840911865,
"learning_rate": 0.00017525699809475956,
"loss": 0.9625,
"mean_token_accuracy": 0.7884005717933178,
"num_tokens": 15840582.0,
"step": 5910
},
{
"epoch": 0.37184761785119813,
"grad_norm": 0.33792024850845337,
"learning_rate": 0.0001752151246780981,
"loss": 0.9932,
"mean_token_accuracy": 0.7824124969542027,
"num_tokens": 15867012.0,
"step": 5920
},
{
"epoch": 0.3724757388272981,
"grad_norm": 0.2628950774669647,
"learning_rate": 0.0001751732512614367,
"loss": 0.9155,
"mean_token_accuracy": 0.7996829584240913,
"num_tokens": 15894376.0,
"step": 5930
},
{
"epoch": 0.3731038598033981,
"grad_norm": 0.4289126694202423,
"learning_rate": 0.00017513137784477524,
"loss": 0.9667,
"mean_token_accuracy": 0.7891981620341539,
"num_tokens": 15923048.0,
"step": 5940
},
{
"epoch": 0.37373198077949815,
"grad_norm": 0.28455209732055664,
"learning_rate": 0.00017508950442811383,
"loss": 0.9627,
"mean_token_accuracy": 0.7922193612903357,
"num_tokens": 15951494.0,
"step": 5950
},
{
"epoch": 0.37436010175559814,
"grad_norm": 0.3707982003688812,
"learning_rate": 0.00017504763101145238,
"loss": 0.9345,
"mean_token_accuracy": 0.7945095077157021,
"num_tokens": 15978771.0,
"step": 5960
},
{
"epoch": 0.37498822273169813,
"grad_norm": 0.3321593105792999,
"learning_rate": 0.00017500575759479096,
"loss": 0.9217,
"mean_token_accuracy": 0.7928752236068248,
"num_tokens": 16006003.0,
"step": 5970
},
{
"epoch": 0.3756163437077981,
"grad_norm": 0.320150226354599,
"learning_rate": 0.00017496388417812951,
"loss": 0.952,
"mean_token_accuracy": 0.7876376051455736,
"num_tokens": 16033395.0,
"step": 5980
},
{
"epoch": 0.3762444646838981,
"grad_norm": 0.3004560172557831,
"learning_rate": 0.00017492201076146807,
"loss": 1.0095,
"mean_token_accuracy": 0.7821074955165386,
"num_tokens": 16060855.0,
"step": 5990
},
{
"epoch": 0.3768725856599981,
"grad_norm": 0.29771584272384644,
"learning_rate": 0.00017488013734480665,
"loss": 0.9307,
"mean_token_accuracy": 0.796100390329957,
"num_tokens": 16088000.0,
"step": 6000
},
{
"epoch": 0.37750070663609814,
"grad_norm": 0.27265238761901855,
"learning_rate": 0.00017483826392814523,
"loss": 0.9272,
"mean_token_accuracy": 0.797407491132617,
"num_tokens": 16115243.0,
"step": 6010
},
{
"epoch": 0.3781288276121981,
"grad_norm": 0.38913822174072266,
"learning_rate": 0.0001747963905114838,
"loss": 0.91,
"mean_token_accuracy": 0.8007095254957676,
"num_tokens": 16142434.0,
"step": 6020
},
{
"epoch": 0.3787569485882981,
"grad_norm": 0.2852244973182678,
"learning_rate": 0.00017475451709482237,
"loss": 0.9507,
"mean_token_accuracy": 0.7935369953513145,
"num_tokens": 16169907.0,
"step": 6030
},
{
"epoch": 0.3793850695643981,
"grad_norm": 0.3308016061782837,
"learning_rate": 0.00017471264367816095,
"loss": 0.9864,
"mean_token_accuracy": 0.7877254385501147,
"num_tokens": 16195064.0,
"step": 6040
},
{
"epoch": 0.3800131905404981,
"grad_norm": 0.3460164964199066,
"learning_rate": 0.0001746707702614995,
"loss": 0.9531,
"mean_token_accuracy": 0.7923174686729908,
"num_tokens": 16221200.0,
"step": 6050
},
{
"epoch": 0.3806413115165981,
"grad_norm": 0.30869802832603455,
"learning_rate": 0.00017462889684483805,
"loss": 0.9619,
"mean_token_accuracy": 0.7955431789159775,
"num_tokens": 16248508.0,
"step": 6060
},
{
"epoch": 0.38126943249269807,
"grad_norm": 0.28829026222229004,
"learning_rate": 0.00017458702342817664,
"loss": 0.9753,
"mean_token_accuracy": 0.7878083620220423,
"num_tokens": 16275106.0,
"step": 6070
},
{
"epoch": 0.3818975534687981,
"grad_norm": 0.3400105834007263,
"learning_rate": 0.0001745451500115152,
"loss": 0.9432,
"mean_token_accuracy": 0.7964709993451834,
"num_tokens": 16300621.0,
"step": 6080
},
{
"epoch": 0.3825256744448981,
"grad_norm": 0.30248478055000305,
"learning_rate": 0.00017450327659485377,
"loss": 0.9876,
"mean_token_accuracy": 0.7857365075498819,
"num_tokens": 16327441.0,
"step": 6090
},
{
"epoch": 0.3831537954209981,
"grad_norm": 0.3251391053199768,
"learning_rate": 0.00017446140317819232,
"loss": 0.9896,
"mean_token_accuracy": 0.7837534084916115,
"num_tokens": 16355426.0,
"step": 6100
},
{
"epoch": 0.3837819163970981,
"grad_norm": 0.2840956449508667,
"learning_rate": 0.0001744195297615309,
"loss": 0.9609,
"mean_token_accuracy": 0.795376755297184,
"num_tokens": 16381372.0,
"step": 6110
},
{
"epoch": 0.38441003737319807,
"grad_norm": 0.30968624353408813,
"learning_rate": 0.00017437765634486946,
"loss": 0.9473,
"mean_token_accuracy": 0.7971586957573891,
"num_tokens": 16407482.0,
"step": 6120
},
{
"epoch": 0.38503815834929805,
"grad_norm": 0.29990601539611816,
"learning_rate": 0.00017433578292820801,
"loss": 0.9728,
"mean_token_accuracy": 0.7887383218854666,
"num_tokens": 16435286.0,
"step": 6130
},
{
"epoch": 0.3856662793253981,
"grad_norm": 0.3170183300971985,
"learning_rate": 0.0001742939095115466,
"loss": 0.9532,
"mean_token_accuracy": 0.7952543575316667,
"num_tokens": 16461263.0,
"step": 6140
},
{
"epoch": 0.3862944003014981,
"grad_norm": 0.2933950126171112,
"learning_rate": 0.00017425203609488518,
"loss": 0.933,
"mean_token_accuracy": 0.7964635614305735,
"num_tokens": 16487429.0,
"step": 6150
},
{
"epoch": 0.3869225212775981,
"grad_norm": 0.386870801448822,
"learning_rate": 0.00017421016267822373,
"loss": 0.9126,
"mean_token_accuracy": 0.8014869604259729,
"num_tokens": 16514921.0,
"step": 6160
},
{
"epoch": 0.38755064225369806,
"grad_norm": 0.2772822976112366,
"learning_rate": 0.0001741682892615623,
"loss": 0.9646,
"mean_token_accuracy": 0.7937680229544639,
"num_tokens": 16541215.0,
"step": 6170
},
{
"epoch": 0.38817876322979805,
"grad_norm": 0.3255600035190582,
"learning_rate": 0.0001741264158449009,
"loss": 0.9233,
"mean_token_accuracy": 0.7992643032222986,
"num_tokens": 16568598.0,
"step": 6180
},
{
"epoch": 0.38880688420589804,
"grad_norm": 0.30316439270973206,
"learning_rate": 0.00017408454242823945,
"loss": 0.9584,
"mean_token_accuracy": 0.7939885523170233,
"num_tokens": 16596087.0,
"step": 6190
},
{
"epoch": 0.38943500518199803,
"grad_norm": 0.339186429977417,
"learning_rate": 0.00017404266901157803,
"loss": 0.9746,
"mean_token_accuracy": 0.7881553754210472,
"num_tokens": 16624223.0,
"step": 6200
},
{
"epoch": 0.3900631261580981,
"grad_norm": 0.3115599751472473,
"learning_rate": 0.00017400079559491658,
"loss": 0.9473,
"mean_token_accuracy": 0.7972762394696474,
"num_tokens": 16651744.0,
"step": 6210
},
{
"epoch": 0.39069124713419806,
"grad_norm": 0.36609265208244324,
"learning_rate": 0.00017395892217825514,
"loss": 0.9055,
"mean_token_accuracy": 0.8033052369952202,
"num_tokens": 16678227.0,
"step": 6220
},
{
"epoch": 0.39131936811029805,
"grad_norm": 0.38528645038604736,
"learning_rate": 0.00017391704876159372,
"loss": 0.9419,
"mean_token_accuracy": 0.8045696560293436,
"num_tokens": 16704268.0,
"step": 6230
},
{
"epoch": 0.39194748908639804,
"grad_norm": 0.29467713832855225,
"learning_rate": 0.00017387517534493227,
"loss": 0.9403,
"mean_token_accuracy": 0.7961732547730207,
"num_tokens": 16730577.0,
"step": 6240
},
{
"epoch": 0.392575610062498,
"grad_norm": 0.3198733329772949,
"learning_rate": 0.00017383330192827085,
"loss": 0.9486,
"mean_token_accuracy": 0.7977675545960665,
"num_tokens": 16755742.0,
"step": 6250
},
{
"epoch": 0.393203731038598,
"grad_norm": 0.2925213873386383,
"learning_rate": 0.0001737914285116094,
"loss": 0.949,
"mean_token_accuracy": 0.7897519588470459,
"num_tokens": 16783547.0,
"step": 6260
},
{
"epoch": 0.39383185201469806,
"grad_norm": 0.3132512867450714,
"learning_rate": 0.000173749555094948,
"loss": 0.9292,
"mean_token_accuracy": 0.7962834902107716,
"num_tokens": 16810651.0,
"step": 6270
},
{
"epoch": 0.39445997299079805,
"grad_norm": 0.3624895215034485,
"learning_rate": 0.00017370768167828654,
"loss": 0.9513,
"mean_token_accuracy": 0.7966616488993168,
"num_tokens": 16837067.0,
"step": 6280
},
{
"epoch": 0.39508809396689804,
"grad_norm": 0.389517605304718,
"learning_rate": 0.0001736658082616251,
"loss": 0.9852,
"mean_token_accuracy": 0.7895838055759669,
"num_tokens": 16865469.0,
"step": 6290
},
{
"epoch": 0.395716214942998,
"grad_norm": 0.27660834789276123,
"learning_rate": 0.00017362393484496368,
"loss": 0.9668,
"mean_token_accuracy": 0.789311607927084,
"num_tokens": 16893654.0,
"step": 6300
},
{
"epoch": 0.396344335919098,
"grad_norm": 0.30523520708084106,
"learning_rate": 0.00017358206142830226,
"loss": 0.9067,
"mean_token_accuracy": 0.801710982620716,
"num_tokens": 16921161.0,
"step": 6310
},
{
"epoch": 0.396972456895198,
"grad_norm": 0.37683388590812683,
"learning_rate": 0.00017354018801164084,
"loss": 0.9228,
"mean_token_accuracy": 0.7973937816917896,
"num_tokens": 16947895.0,
"step": 6320
},
{
"epoch": 0.397600577871298,
"grad_norm": 0.31565046310424805,
"learning_rate": 0.0001734983145949794,
"loss": 0.9568,
"mean_token_accuracy": 0.7894732590764761,
"num_tokens": 16975421.0,
"step": 6330
},
{
"epoch": 0.39822869884739803,
"grad_norm": 0.29164016246795654,
"learning_rate": 0.00017345644117831797,
"loss": 0.9618,
"mean_token_accuracy": 0.791145333275199,
"num_tokens": 17001397.0,
"step": 6340
},
{
"epoch": 0.398856819823498,
"grad_norm": 0.27090737223625183,
"learning_rate": 0.00017341456776165653,
"loss": 0.9091,
"mean_token_accuracy": 0.799335828050971,
"num_tokens": 17028394.0,
"step": 6350
},
{
"epoch": 0.399484940799598,
"grad_norm": 0.32882294058799744,
"learning_rate": 0.00017337269434499508,
"loss": 0.9809,
"mean_token_accuracy": 0.786191276833415,
"num_tokens": 17054934.0,
"step": 6360
},
{
"epoch": 0.400113061775698,
"grad_norm": 0.2733074128627777,
"learning_rate": 0.00017333082092833366,
"loss": 0.9421,
"mean_token_accuracy": 0.7956891294568778,
"num_tokens": 17081735.0,
"step": 6370
},
{
"epoch": 0.400741182751798,
"grad_norm": 0.2858097553253174,
"learning_rate": 0.00017328894751167222,
"loss": 0.922,
"mean_token_accuracy": 0.7970656007528305,
"num_tokens": 17108919.0,
"step": 6380
},
{
"epoch": 0.401369303727898,
"grad_norm": 0.2953729033470154,
"learning_rate": 0.0001732470740950108,
"loss": 0.9441,
"mean_token_accuracy": 0.7959697268903255,
"num_tokens": 17135174.0,
"step": 6390
},
{
"epoch": 0.40199742470399796,
"grad_norm": 0.3655385971069336,
"learning_rate": 0.00017320520067834935,
"loss": 1.0054,
"mean_token_accuracy": 0.7828585598617792,
"num_tokens": 17161216.0,
"step": 6400
},
{
"epoch": 0.402625545680098,
"grad_norm": 0.28704798221588135,
"learning_rate": 0.00017316332726168793,
"loss": 0.9546,
"mean_token_accuracy": 0.795269351825118,
"num_tokens": 17186800.0,
"step": 6410
},
{
"epoch": 0.403253666656198,
"grad_norm": 0.36064931750297546,
"learning_rate": 0.00017312145384502649,
"loss": 0.9412,
"mean_token_accuracy": 0.7885518711060285,
"num_tokens": 17214001.0,
"step": 6420
},
{
"epoch": 0.403881787632298,
"grad_norm": 0.31733861565589905,
"learning_rate": 0.00017307958042836504,
"loss": 0.9343,
"mean_token_accuracy": 0.7966097947210073,
"num_tokens": 17240349.0,
"step": 6430
},
{
"epoch": 0.404509908608398,
"grad_norm": 0.2990235388278961,
"learning_rate": 0.00017303770701170362,
"loss": 0.9641,
"mean_token_accuracy": 0.7916103590279817,
"num_tokens": 17267668.0,
"step": 6440
},
{
"epoch": 0.40513802958449796,
"grad_norm": 0.3221684992313385,
"learning_rate": 0.0001729958335950422,
"loss": 0.9661,
"mean_token_accuracy": 0.7940225251019001,
"num_tokens": 17293563.0,
"step": 6450
},
{
"epoch": 0.40576615056059795,
"grad_norm": 0.324481338262558,
"learning_rate": 0.00017295396017838076,
"loss": 0.9481,
"mean_token_accuracy": 0.7895933233201504,
"num_tokens": 17320263.0,
"step": 6460
},
{
"epoch": 0.406394271536698,
"grad_norm": 0.3949001729488373,
"learning_rate": 0.00017291208676171934,
"loss": 0.8954,
"mean_token_accuracy": 0.8022237163037061,
"num_tokens": 17347756.0,
"step": 6470
},
{
"epoch": 0.407022392512798,
"grad_norm": 0.3821220397949219,
"learning_rate": 0.00017287021334505792,
"loss": 0.9416,
"mean_token_accuracy": 0.8014149498194456,
"num_tokens": 17373740.0,
"step": 6480
},
{
"epoch": 0.40765051348889797,
"grad_norm": 0.3420533537864685,
"learning_rate": 0.00017282833992839647,
"loss": 0.9606,
"mean_token_accuracy": 0.7895737990736962,
"num_tokens": 17399868.0,
"step": 6490
},
{
"epoch": 0.40827863446499796,
"grad_norm": 0.32357707619667053,
"learning_rate": 0.00017278646651173503,
"loss": 0.9051,
"mean_token_accuracy": 0.8026961565017701,
"num_tokens": 17427575.0,
"step": 6500
},
{
"epoch": 0.40890675544109795,
"grad_norm": 0.34716370701789856,
"learning_rate": 0.0001727445930950736,
"loss": 0.94,
"mean_token_accuracy": 0.7938724718987942,
"num_tokens": 17455384.0,
"step": 6510
},
{
"epoch": 0.40953487641719793,
"grad_norm": 0.3508943021297455,
"learning_rate": 0.00017270271967841216,
"loss": 0.9854,
"mean_token_accuracy": 0.7838574007153511,
"num_tokens": 17482972.0,
"step": 6520
},
{
"epoch": 0.4101629973932979,
"grad_norm": 0.30769476294517517,
"learning_rate": 0.00017266084626175074,
"loss": 0.9398,
"mean_token_accuracy": 0.7959376715123654,
"num_tokens": 17510173.0,
"step": 6530
},
{
"epoch": 0.41079111836939797,
"grad_norm": 0.3114470839500427,
"learning_rate": 0.0001726189728450893,
"loss": 0.9722,
"mean_token_accuracy": 0.7882124871015549,
"num_tokens": 17537102.0,
"step": 6540
},
{
"epoch": 0.41141923934549796,
"grad_norm": 0.4177934527397156,
"learning_rate": 0.00017257709942842788,
"loss": 0.9756,
"mean_token_accuracy": 0.7910198099911213,
"num_tokens": 17563061.0,
"step": 6550
},
{
"epoch": 0.41204736032159794,
"grad_norm": 0.35182762145996094,
"learning_rate": 0.00017253522601176643,
"loss": 0.9422,
"mean_token_accuracy": 0.7991392657160759,
"num_tokens": 17589058.0,
"step": 6560
},
{
"epoch": 0.41267548129769793,
"grad_norm": 0.3126845061779022,
"learning_rate": 0.00017249335259510499,
"loss": 0.9305,
"mean_token_accuracy": 0.7946061249822378,
"num_tokens": 17615789.0,
"step": 6570
},
{
"epoch": 0.4133036022737979,
"grad_norm": 0.2823250889778137,
"learning_rate": 0.00017245147917844357,
"loss": 0.9141,
"mean_token_accuracy": 0.7987863086163998,
"num_tokens": 17643862.0,
"step": 6580
},
{
"epoch": 0.4139317232498979,
"grad_norm": 0.3772083520889282,
"learning_rate": 0.00017240960576178212,
"loss": 0.914,
"mean_token_accuracy": 0.799197156727314,
"num_tokens": 17671357.0,
"step": 6590
},
{
"epoch": 0.41455984422599795,
"grad_norm": 0.34354081749916077,
"learning_rate": 0.0001723677323451207,
"loss": 0.9411,
"mean_token_accuracy": 0.798019240796566,
"num_tokens": 17699385.0,
"step": 6600
},
{
"epoch": 0.41518796520209794,
"grad_norm": 0.39620301127433777,
"learning_rate": 0.00017232585892845928,
"loss": 0.8923,
"mean_token_accuracy": 0.8041703008115292,
"num_tokens": 17724468.0,
"step": 6610
},
{
"epoch": 0.41581608617819793,
"grad_norm": 0.33372464776039124,
"learning_rate": 0.00017228398551179786,
"loss": 0.9286,
"mean_token_accuracy": 0.7959783185273409,
"num_tokens": 17749763.0,
"step": 6620
},
{
"epoch": 0.4164442071542979,
"grad_norm": 0.3709011971950531,
"learning_rate": 0.00017224211209513642,
"loss": 0.9554,
"mean_token_accuracy": 0.7890860054641962,
"num_tokens": 17777337.0,
"step": 6630
},
{
"epoch": 0.4170723281303979,
"grad_norm": 0.32917919754981995,
"learning_rate": 0.00017220023867847497,
"loss": 0.9017,
"mean_token_accuracy": 0.8022000085562467,
"num_tokens": 17805938.0,
"step": 6640
},
{
"epoch": 0.4177004491064979,
"grad_norm": 0.29168930649757385,
"learning_rate": 0.00017215836526181355,
"loss": 0.9337,
"mean_token_accuracy": 0.7962145168334246,
"num_tokens": 17833769.0,
"step": 6650
},
{
"epoch": 0.4183285700825979,
"grad_norm": 0.3907414376735687,
"learning_rate": 0.0001721164918451521,
"loss": 0.9565,
"mean_token_accuracy": 0.7868779297918082,
"num_tokens": 17861061.0,
"step": 6660
},
{
"epoch": 0.4189566910586979,
"grad_norm": 0.3282051980495453,
"learning_rate": 0.0001720746184284907,
"loss": 0.965,
"mean_token_accuracy": 0.7910456649959088,
"num_tokens": 17887366.0,
"step": 6670
},
{
"epoch": 0.4195848120347979,
"grad_norm": 0.29146093130111694,
"learning_rate": 0.00017203274501182924,
"loss": 0.8854,
"mean_token_accuracy": 0.8069033030420542,
"num_tokens": 17913922.0,
"step": 6680
},
{
"epoch": 0.4202129330108979,
"grad_norm": 0.3952869772911072,
"learning_rate": 0.00017199087159516782,
"loss": 0.9253,
"mean_token_accuracy": 0.7987654652446509,
"num_tokens": 17939912.0,
"step": 6690
},
{
"epoch": 0.4208410539869979,
"grad_norm": 0.3404087424278259,
"learning_rate": 0.00017194899817850638,
"loss": 0.9574,
"mean_token_accuracy": 0.7907421611249447,
"num_tokens": 17967318.0,
"step": 6700
},
{
"epoch": 0.4214691749630979,
"grad_norm": 0.3051299750804901,
"learning_rate": 0.00017190712476184496,
"loss": 0.9063,
"mean_token_accuracy": 0.803283429145813,
"num_tokens": 17994300.0,
"step": 6710
},
{
"epoch": 0.42209729593919787,
"grad_norm": 0.3612078130245209,
"learning_rate": 0.0001718652513451835,
"loss": 0.9431,
"mean_token_accuracy": 0.7964126400649547,
"num_tokens": 18020402.0,
"step": 6720
},
{
"epoch": 0.4227254169152979,
"grad_norm": 0.31546175479888916,
"learning_rate": 0.00017182337792852207,
"loss": 0.9437,
"mean_token_accuracy": 0.7950374394655227,
"num_tokens": 18047710.0,
"step": 6730
},
{
"epoch": 0.4233535378913979,
"grad_norm": 0.35042259097099304,
"learning_rate": 0.00017178150451186065,
"loss": 0.9186,
"mean_token_accuracy": 0.8037286669015884,
"num_tokens": 18074038.0,
"step": 6740
},
{
"epoch": 0.4239816588674979,
"grad_norm": 0.2913114130496979,
"learning_rate": 0.00017173963109519923,
"loss": 0.9446,
"mean_token_accuracy": 0.7999244224280119,
"num_tokens": 18100392.0,
"step": 6750
},
{
"epoch": 0.4246097798435979,
"grad_norm": 0.5584277510643005,
"learning_rate": 0.0001716977576785378,
"loss": 0.9768,
"mean_token_accuracy": 0.791194049268961,
"num_tokens": 18127168.0,
"step": 6760
},
{
"epoch": 0.42523790081969787,
"grad_norm": 0.31865277886390686,
"learning_rate": 0.00017165588426187636,
"loss": 0.9285,
"mean_token_accuracy": 0.7973918996751308,
"num_tokens": 18153582.0,
"step": 6770
},
{
"epoch": 0.42586602179579786,
"grad_norm": 0.3605208992958069,
"learning_rate": 0.00017161401084521494,
"loss": 0.9184,
"mean_token_accuracy": 0.8000706914812327,
"num_tokens": 18179784.0,
"step": 6780
},
{
"epoch": 0.42649414277189784,
"grad_norm": 0.3389859199523926,
"learning_rate": 0.0001715721374285535,
"loss": 0.9829,
"mean_token_accuracy": 0.7899733152240515,
"num_tokens": 18206151.0,
"step": 6790
},
{
"epoch": 0.4271222637479979,
"grad_norm": 0.32781344652175903,
"learning_rate": 0.00017153026401189205,
"loss": 0.9596,
"mean_token_accuracy": 0.7900414571166039,
"num_tokens": 18234354.0,
"step": 6800
},
{
"epoch": 0.4277503847240979,
"grad_norm": 0.2912145256996155,
"learning_rate": 0.00017148839059523063,
"loss": 0.9257,
"mean_token_accuracy": 0.8005597397685051,
"num_tokens": 18261836.0,
"step": 6810
},
{
"epoch": 0.42837850570019786,
"grad_norm": 0.34917712211608887,
"learning_rate": 0.0001714465171785692,
"loss": 0.9402,
"mean_token_accuracy": 0.7959890987724065,
"num_tokens": 18288129.0,
"step": 6820
},
{
"epoch": 0.42900662667629785,
"grad_norm": 0.33733370900154114,
"learning_rate": 0.00017140464376190777,
"loss": 1.0073,
"mean_token_accuracy": 0.787006713822484,
"num_tokens": 18315159.0,
"step": 6830
},
{
"epoch": 0.42963474765239784,
"grad_norm": 0.3581954538822174,
"learning_rate": 0.00017136277034524632,
"loss": 0.9486,
"mean_token_accuracy": 0.794409342855215,
"num_tokens": 18342955.0,
"step": 6840
},
{
"epoch": 0.43026286862849783,
"grad_norm": 0.3676760792732239,
"learning_rate": 0.0001713208969285849,
"loss": 0.9483,
"mean_token_accuracy": 0.8001930240541697,
"num_tokens": 18368669.0,
"step": 6850
},
{
"epoch": 0.4308909896045978,
"grad_norm": 0.3197173774242401,
"learning_rate": 0.00017127902351192346,
"loss": 0.9116,
"mean_token_accuracy": 0.7953953389078379,
"num_tokens": 18397666.0,
"step": 6860
},
{
"epoch": 0.43151911058069786,
"grad_norm": 0.33263349533081055,
"learning_rate": 0.000171237150095262,
"loss": 0.9441,
"mean_token_accuracy": 0.7979573253542185,
"num_tokens": 18421950.0,
"step": 6870
},
{
"epoch": 0.43214723155679785,
"grad_norm": 0.38979992270469666,
"learning_rate": 0.0001711952766786006,
"loss": 0.9049,
"mean_token_accuracy": 0.8038561142981052,
"num_tokens": 18448253.0,
"step": 6880
},
{
"epoch": 0.43277535253289784,
"grad_norm": 0.3203209340572357,
"learning_rate": 0.00017115340326193915,
"loss": 0.9158,
"mean_token_accuracy": 0.8013805273920298,
"num_tokens": 18475417.0,
"step": 6890
},
{
"epoch": 0.4334034735089978,
"grad_norm": 0.3154747486114502,
"learning_rate": 0.00017111152984527773,
"loss": 0.9082,
"mean_token_accuracy": 0.8050479885190726,
"num_tokens": 18501073.0,
"step": 6900
},
{
"epoch": 0.4340315944850978,
"grad_norm": 0.30107998847961426,
"learning_rate": 0.0001710696564286163,
"loss": 0.9676,
"mean_token_accuracy": 0.791720773279667,
"num_tokens": 18527256.0,
"step": 6910
},
{
"epoch": 0.4346597154611978,
"grad_norm": 0.297690749168396,
"learning_rate": 0.0001710277830119549,
"loss": 0.9109,
"mean_token_accuracy": 0.8020948182791472,
"num_tokens": 18554278.0,
"step": 6920
},
{
"epoch": 0.43528783643729785,
"grad_norm": 0.33920198678970337,
"learning_rate": 0.00017098590959529344,
"loss": 0.9209,
"mean_token_accuracy": 0.8000293109565974,
"num_tokens": 18581647.0,
"step": 6930
},
{
"epoch": 0.43591595741339784,
"grad_norm": 0.3154403865337372,
"learning_rate": 0.000170944036178632,
"loss": 0.9408,
"mean_token_accuracy": 0.7958235062658787,
"num_tokens": 18608804.0,
"step": 6940
},
{
"epoch": 0.4365440783894978,
"grad_norm": 0.3859824538230896,
"learning_rate": 0.00017090216276197058,
"loss": 0.9133,
"mean_token_accuracy": 0.80115054436028,
"num_tokens": 18636323.0,
"step": 6950
},
{
"epoch": 0.4371721993655978,
"grad_norm": 0.3137420415878296,
"learning_rate": 0.00017086028934530913,
"loss": 0.8943,
"mean_token_accuracy": 0.8037898227572441,
"num_tokens": 18662828.0,
"step": 6960
},
{
"epoch": 0.4378003203416978,
"grad_norm": 0.37590697407722473,
"learning_rate": 0.00017081841592864771,
"loss": 0.9348,
"mean_token_accuracy": 0.7956229455769062,
"num_tokens": 18688409.0,
"step": 6970
},
{
"epoch": 0.4384284413177978,
"grad_norm": 0.38642698526382446,
"learning_rate": 0.00017077654251198627,
"loss": 0.9773,
"mean_token_accuracy": 0.7884405389428139,
"num_tokens": 18715579.0,
"step": 6980
},
{
"epoch": 0.4390565622938978,
"grad_norm": 0.38973766565322876,
"learning_rate": 0.00017073466909532485,
"loss": 0.9588,
"mean_token_accuracy": 0.7889534655958415,
"num_tokens": 18743302.0,
"step": 6990
},
{
"epoch": 0.4396846832699978,
"grad_norm": 0.3270561695098877,
"learning_rate": 0.0001706927956786634,
"loss": 0.9433,
"mean_token_accuracy": 0.7975011304020881,
"num_tokens": 18770735.0,
"step": 7000
},
{
"epoch": 0.4403128042460978,
"grad_norm": 0.332520455121994,
"learning_rate": 0.00017065092226200196,
"loss": 0.9271,
"mean_token_accuracy": 0.8032657954841852,
"num_tokens": 18795923.0,
"step": 7010
},
{
"epoch": 0.4409409252221978,
"grad_norm": 0.3008480668067932,
"learning_rate": 0.00017060904884534054,
"loss": 0.9778,
"mean_token_accuracy": 0.789083057269454,
"num_tokens": 18823499.0,
"step": 7020
},
{
"epoch": 0.4415690461982978,
"grad_norm": 0.34079182147979736,
"learning_rate": 0.0001705671754286791,
"loss": 0.9217,
"mean_token_accuracy": 0.80135333314538,
"num_tokens": 18851042.0,
"step": 7030
},
{
"epoch": 0.4421971671743978,
"grad_norm": 0.375567764043808,
"learning_rate": 0.00017052530201201767,
"loss": 0.951,
"mean_token_accuracy": 0.7928535658866167,
"num_tokens": 18877749.0,
"step": 7040
},
{
"epoch": 0.44282528815049776,
"grad_norm": 0.33165213465690613,
"learning_rate": 0.00017048342859535625,
"loss": 0.9332,
"mean_token_accuracy": 0.7962594710290432,
"num_tokens": 18903877.0,
"step": 7050
},
{
"epoch": 0.4434534091265978,
"grad_norm": 0.3591445982456207,
"learning_rate": 0.00017044155517869484,
"loss": 0.9415,
"mean_token_accuracy": 0.7983079668134451,
"num_tokens": 18928905.0,
"step": 7060
},
{
"epoch": 0.4440815301026978,
"grad_norm": 0.2903635501861572,
"learning_rate": 0.0001703996817620334,
"loss": 0.9026,
"mean_token_accuracy": 0.8050003577023744,
"num_tokens": 18955696.0,
"step": 7070
},
{
"epoch": 0.4447096510787978,
"grad_norm": 0.3327687382698059,
"learning_rate": 0.00017035780834537194,
"loss": 0.9184,
"mean_token_accuracy": 0.7981751836836338,
"num_tokens": 18983224.0,
"step": 7080
},
{
"epoch": 0.4453377720548978,
"grad_norm": 0.32097476720809937,
"learning_rate": 0.00017031593492871052,
"loss": 0.8946,
"mean_token_accuracy": 0.8068317249417305,
"num_tokens": 19010405.0,
"step": 7090
},
{
"epoch": 0.44596589303099776,
"grad_norm": 0.3469720780849457,
"learning_rate": 0.00017027406151204908,
"loss": 0.9037,
"mean_token_accuracy": 0.8039239943027496,
"num_tokens": 19037152.0,
"step": 7100
},
{
"epoch": 0.44659401400709775,
"grad_norm": 0.3317658007144928,
"learning_rate": 0.00017023218809538766,
"loss": 0.9582,
"mean_token_accuracy": 0.7961928177624941,
"num_tokens": 19063239.0,
"step": 7110
},
{
"epoch": 0.44722213498319774,
"grad_norm": 0.42860665917396545,
"learning_rate": 0.00017019031467872621,
"loss": 0.9076,
"mean_token_accuracy": 0.8047629177570343,
"num_tokens": 19088858.0,
"step": 7120
},
{
"epoch": 0.4478502559592978,
"grad_norm": 0.3589876592159271,
"learning_rate": 0.0001701484412620648,
"loss": 0.9166,
"mean_token_accuracy": 0.79860061109066,
"num_tokens": 19116820.0,
"step": 7130
},
{
"epoch": 0.44847837693539777,
"grad_norm": 0.3044925630092621,
"learning_rate": 0.00017010656784540335,
"loss": 0.9565,
"mean_token_accuracy": 0.7914801269769669,
"num_tokens": 19143530.0,
"step": 7140
},
{
"epoch": 0.44910649791149776,
"grad_norm": 0.3823811709880829,
"learning_rate": 0.0001700646944287419,
"loss": 0.966,
"mean_token_accuracy": 0.793233947083354,
"num_tokens": 19169692.0,
"step": 7150
},
{
"epoch": 0.44973461888759775,
"grad_norm": 0.33698752522468567,
"learning_rate": 0.00017002282101208048,
"loss": 0.9343,
"mean_token_accuracy": 0.7968378983438015,
"num_tokens": 19195206.0,
"step": 7160
},
{
"epoch": 0.45036273986369774,
"grad_norm": 0.28442952036857605,
"learning_rate": 0.00016998094759541904,
"loss": 0.9625,
"mean_token_accuracy": 0.7924941457808018,
"num_tokens": 19221574.0,
"step": 7170
},
{
"epoch": 0.4509908608397977,
"grad_norm": 0.29551246762275696,
"learning_rate": 0.00016993907417875762,
"loss": 0.9383,
"mean_token_accuracy": 0.7974245421588421,
"num_tokens": 19249889.0,
"step": 7180
},
{
"epoch": 0.45161898181589777,
"grad_norm": 0.38739240169525146,
"learning_rate": 0.0001698972007620962,
"loss": 0.8745,
"mean_token_accuracy": 0.8079649094492197,
"num_tokens": 19277540.0,
"step": 7190
},
{
"epoch": 0.45224710279199776,
"grad_norm": 0.3009992241859436,
"learning_rate": 0.00016985532734543475,
"loss": 0.9334,
"mean_token_accuracy": 0.7996255524456501,
"num_tokens": 19304330.0,
"step": 7200
},
{
"epoch": 0.45287522376809775,
"grad_norm": 0.3752877116203308,
"learning_rate": 0.00016981345392877334,
"loss": 0.9596,
"mean_token_accuracy": 0.7906508490443229,
"num_tokens": 19331369.0,
"step": 7210
},
{
"epoch": 0.45350334474419773,
"grad_norm": 0.3402617275714874,
"learning_rate": 0.0001697715805121119,
"loss": 0.9406,
"mean_token_accuracy": 0.7922864690423012,
"num_tokens": 19358691.0,
"step": 7220
},
{
"epoch": 0.4541314657202977,
"grad_norm": 0.3796190023422241,
"learning_rate": 0.00016972970709545047,
"loss": 0.9729,
"mean_token_accuracy": 0.7868019372224808,
"num_tokens": 19384598.0,
"step": 7230
},
{
"epoch": 0.4547595866963977,
"grad_norm": 0.2936389148235321,
"learning_rate": 0.00016968783367878902,
"loss": 0.9299,
"mean_token_accuracy": 0.7991322789341211,
"num_tokens": 19411913.0,
"step": 7240
},
{
"epoch": 0.4553877076724977,
"grad_norm": 0.3057098984718323,
"learning_rate": 0.0001696459602621276,
"loss": 0.9309,
"mean_token_accuracy": 0.793816527351737,
"num_tokens": 19440434.0,
"step": 7250
},
{
"epoch": 0.45601582864859774,
"grad_norm": 0.33800917863845825,
"learning_rate": 0.00016960408684546616,
"loss": 0.9347,
"mean_token_accuracy": 0.7935182463377715,
"num_tokens": 19468211.0,
"step": 7260
},
{
"epoch": 0.45664394962469773,
"grad_norm": 0.3345368802547455,
"learning_rate": 0.00016956221342880474,
"loss": 0.9564,
"mean_token_accuracy": 0.7953835293650627,
"num_tokens": 19494223.0,
"step": 7270
},
{
"epoch": 0.4572720706007977,
"grad_norm": 0.34066635370254517,
"learning_rate": 0.0001695203400121433,
"loss": 0.9516,
"mean_token_accuracy": 0.7983359940350055,
"num_tokens": 19520687.0,
"step": 7280
},
{
"epoch": 0.4579001915768977,
"grad_norm": 0.3939811885356903,
"learning_rate": 0.00016947846659548188,
"loss": 0.9402,
"mean_token_accuracy": 0.7942854754626751,
"num_tokens": 19547875.0,
"step": 7290
},
{
"epoch": 0.4585283125529977,
"grad_norm": 0.30416762828826904,
"learning_rate": 0.00016943659317882043,
"loss": 0.9776,
"mean_token_accuracy": 0.788750433549285,
"num_tokens": 19575021.0,
"step": 7300
},
{
"epoch": 0.4591564335290977,
"grad_norm": 0.2712084650993347,
"learning_rate": 0.00016939471976215898,
"loss": 0.9429,
"mean_token_accuracy": 0.7919867537915707,
"num_tokens": 19604046.0,
"step": 7310
},
{
"epoch": 0.4597845545051977,
"grad_norm": 0.31856003403663635,
"learning_rate": 0.00016935284634549756,
"loss": 0.9429,
"mean_token_accuracy": 0.7997392650693655,
"num_tokens": 19630841.0,
"step": 7320
},
{
"epoch": 0.4604126754812977,
"grad_norm": 0.2791038155555725,
"learning_rate": 0.00016931097292883612,
"loss": 0.9348,
"mean_token_accuracy": 0.7959219090640545,
"num_tokens": 19657308.0,
"step": 7330
},
{
"epoch": 0.4610407964573977,
"grad_norm": 0.3258204460144043,
"learning_rate": 0.0001692690995121747,
"loss": 0.9696,
"mean_token_accuracy": 0.7872245352715254,
"num_tokens": 19685937.0,
"step": 7340
},
{
"epoch": 0.4616689174334977,
"grad_norm": 0.32432809472084045,
"learning_rate": 0.00016922722609551328,
"loss": 0.9432,
"mean_token_accuracy": 0.7930789031088352,
"num_tokens": 19711611.0,
"step": 7350
},
{
"epoch": 0.4622970384095977,
"grad_norm": 0.31211572885513306,
"learning_rate": 0.00016918535267885186,
"loss": 0.9426,
"mean_token_accuracy": 0.7946818351745606,
"num_tokens": 19738162.0,
"step": 7360
},
{
"epoch": 0.46292515938569767,
"grad_norm": 0.35430920124053955,
"learning_rate": 0.00016914347926219042,
"loss": 0.9485,
"mean_token_accuracy": 0.7976432036608457,
"num_tokens": 19763931.0,
"step": 7370
},
{
"epoch": 0.46355328036179766,
"grad_norm": 0.2996009588241577,
"learning_rate": 0.00016910160584552897,
"loss": 0.9209,
"mean_token_accuracy": 0.7984352611005306,
"num_tokens": 19790663.0,
"step": 7380
},
{
"epoch": 0.4641814013378977,
"grad_norm": 0.3618237376213074,
"learning_rate": 0.00016905973242886755,
"loss": 0.9762,
"mean_token_accuracy": 0.7867707304656506,
"num_tokens": 19818336.0,
"step": 7390
},
{
"epoch": 0.4648095223139977,
"grad_norm": 0.3506768047809601,
"learning_rate": 0.0001690178590122061,
"loss": 0.9157,
"mean_token_accuracy": 0.8001652296632529,
"num_tokens": 19844561.0,
"step": 7400
},
{
"epoch": 0.4654376432900977,
"grad_norm": 0.31607791781425476,
"learning_rate": 0.00016897598559554469,
"loss": 0.9795,
"mean_token_accuracy": 0.7913108296692372,
"num_tokens": 19871050.0,
"step": 7410
},
{
"epoch": 0.46606576426619767,
"grad_norm": 0.36683353781700134,
"learning_rate": 0.00016893411217888324,
"loss": 0.9393,
"mean_token_accuracy": 0.7971708361059427,
"num_tokens": 19897758.0,
"step": 7420
},
{
"epoch": 0.46669388524229766,
"grad_norm": 0.3240242898464203,
"learning_rate": 0.00016889223876222182,
"loss": 0.9444,
"mean_token_accuracy": 0.7941307682543993,
"num_tokens": 19926668.0,
"step": 7430
},
{
"epoch": 0.46732200621839765,
"grad_norm": 0.2812100648880005,
"learning_rate": 0.00016885036534556038,
"loss": 1.0075,
"mean_token_accuracy": 0.783855975791812,
"num_tokens": 19955199.0,
"step": 7440
},
{
"epoch": 0.46795012719449763,
"grad_norm": 0.32872602343559265,
"learning_rate": 0.00016880849192889893,
"loss": 0.9386,
"mean_token_accuracy": 0.794111205264926,
"num_tokens": 19984176.0,
"step": 7450
},
{
"epoch": 0.4685782481705977,
"grad_norm": 0.35838210582733154,
"learning_rate": 0.0001687666185122375,
"loss": 0.9102,
"mean_token_accuracy": 0.8033748425543308,
"num_tokens": 20010815.0,
"step": 7460
},
{
"epoch": 0.46920636914669767,
"grad_norm": 0.30107223987579346,
"learning_rate": 0.00016872474509557606,
"loss": 0.9703,
"mean_token_accuracy": 0.7895241472870111,
"num_tokens": 20036032.0,
"step": 7470
},
{
"epoch": 0.46983449012279765,
"grad_norm": 0.3146842420101166,
"learning_rate": 0.00016868287167891465,
"loss": 0.9406,
"mean_token_accuracy": 0.792415551096201,
"num_tokens": 20064166.0,
"step": 7480
},
{
"epoch": 0.47046261109889764,
"grad_norm": 0.4442558288574219,
"learning_rate": 0.00016864099826225323,
"loss": 0.9504,
"mean_token_accuracy": 0.7952435094863176,
"num_tokens": 20090019.0,
"step": 7490
},
{
"epoch": 0.47109073207499763,
"grad_norm": 0.34173983335494995,
"learning_rate": 0.00016859912484559178,
"loss": 0.9739,
"mean_token_accuracy": 0.7935862522572279,
"num_tokens": 20116096.0,
"step": 7500
},
{
"epoch": 0.4717188530510976,
"grad_norm": 0.3807821273803711,
"learning_rate": 0.00016855725142893036,
"loss": 0.9247,
"mean_token_accuracy": 0.7981947991997004,
"num_tokens": 20141974.0,
"step": 7510
},
{
"epoch": 0.47234697402719766,
"grad_norm": 0.3637494146823883,
"learning_rate": 0.00016851537801226892,
"loss": 0.9383,
"mean_token_accuracy": 0.7955837201327085,
"num_tokens": 20168582.0,
"step": 7520
},
{
"epoch": 0.47297509500329765,
"grad_norm": 0.35919925570487976,
"learning_rate": 0.0001684735045956075,
"loss": 0.946,
"mean_token_accuracy": 0.7967754255980253,
"num_tokens": 20195729.0,
"step": 7530
},
{
"epoch": 0.47360321597939764,
"grad_norm": 0.4207704961299896,
"learning_rate": 0.00016843163117894605,
"loss": 0.9348,
"mean_token_accuracy": 0.8010726600885392,
"num_tokens": 20223094.0,
"step": 7540
},
{
"epoch": 0.47423133695549763,
"grad_norm": 0.3021661937236786,
"learning_rate": 0.00016838975776228463,
"loss": 0.9393,
"mean_token_accuracy": 0.7987467546015978,
"num_tokens": 20249412.0,
"step": 7550
},
{
"epoch": 0.4748594579315976,
"grad_norm": 0.29512059688568115,
"learning_rate": 0.00016834788434562319,
"loss": 0.9187,
"mean_token_accuracy": 0.8019442018121481,
"num_tokens": 20275957.0,
"step": 7560
},
{
"epoch": 0.4754875789076976,
"grad_norm": 0.40679019689559937,
"learning_rate": 0.00016830601092896177,
"loss": 0.9228,
"mean_token_accuracy": 0.7991116803139449,
"num_tokens": 20302470.0,
"step": 7570
},
{
"epoch": 0.4761156998837976,
"grad_norm": 0.3159092664718628,
"learning_rate": 0.00016826413751230032,
"loss": 0.9214,
"mean_token_accuracy": 0.7994810316711665,
"num_tokens": 20328996.0,
"step": 7580
},
{
"epoch": 0.47674382085989764,
"grad_norm": 0.41657283902168274,
"learning_rate": 0.00016822226409563887,
"loss": 0.8581,
"mean_token_accuracy": 0.8116535749286413,
"num_tokens": 20357442.0,
"step": 7590
},
{
"epoch": 0.4773719418359976,
"grad_norm": 0.420977383852005,
"learning_rate": 0.00016818039067897746,
"loss": 0.9449,
"mean_token_accuracy": 0.7980388529598713,
"num_tokens": 20384687.0,
"step": 7600
},
{
"epoch": 0.4780000628120976,
"grad_norm": 0.3933321237564087,
"learning_rate": 0.000168138517262316,
"loss": 0.9567,
"mean_token_accuracy": 0.7897981021553278,
"num_tokens": 20410938.0,
"step": 7610
},
{
"epoch": 0.4786281837881976,
"grad_norm": 0.3149840831756592,
"learning_rate": 0.0001680966438456546,
"loss": 0.9434,
"mean_token_accuracy": 0.7927991054952145,
"num_tokens": 20438513.0,
"step": 7620
},
{
"epoch": 0.4792563047642976,
"grad_norm": 0.334378182888031,
"learning_rate": 0.00016805477042899315,
"loss": 0.8846,
"mean_token_accuracy": 0.8075113136321306,
"num_tokens": 20464813.0,
"step": 7630
},
{
"epoch": 0.4798844257403976,
"grad_norm": 0.3132246434688568,
"learning_rate": 0.00016801289701233173,
"loss": 0.9215,
"mean_token_accuracy": 0.7987807631492615,
"num_tokens": 20490998.0,
"step": 7640
},
{
"epoch": 0.4805125467164976,
"grad_norm": 0.32393956184387207,
"learning_rate": 0.0001679710235956703,
"loss": 0.9562,
"mean_token_accuracy": 0.7920228894799948,
"num_tokens": 20518297.0,
"step": 7650
},
{
"epoch": 0.4811406676925976,
"grad_norm": 0.3257106840610504,
"learning_rate": 0.00016792915017900886,
"loss": 0.9873,
"mean_token_accuracy": 0.7902991570532322,
"num_tokens": 20545200.0,
"step": 7660
},
{
"epoch": 0.4817687886686976,
"grad_norm": 0.3050191402435303,
"learning_rate": 0.00016788727676234744,
"loss": 0.9315,
"mean_token_accuracy": 0.7982578534632921,
"num_tokens": 20570896.0,
"step": 7670
},
{
"epoch": 0.4823969096447976,
"grad_norm": 0.3876621723175049,
"learning_rate": 0.000167845403345686,
"loss": 0.9546,
"mean_token_accuracy": 0.8005735255777836,
"num_tokens": 20598172.0,
"step": 7680
},
{
"epoch": 0.4830250306208976,
"grad_norm": 0.3658469319343567,
"learning_rate": 0.00016780352992902458,
"loss": 0.9461,
"mean_token_accuracy": 0.7934409212321043,
"num_tokens": 20625326.0,
"step": 7690
},
{
"epoch": 0.48365315159699757,
"grad_norm": 0.29799938201904297,
"learning_rate": 0.00016776165651236313,
"loss": 0.915,
"mean_token_accuracy": 0.8017003744840622,
"num_tokens": 20652620.0,
"step": 7700
},
{
"epoch": 0.48428127257309755,
"grad_norm": 0.34464171528816223,
"learning_rate": 0.0001677197830957017,
"loss": 0.9907,
"mean_token_accuracy": 0.7844714995473623,
"num_tokens": 20679476.0,
"step": 7710
},
{
"epoch": 0.4849093935491976,
"grad_norm": 0.2975150942802429,
"learning_rate": 0.00016767790967904027,
"loss": 0.8977,
"mean_token_accuracy": 0.799476170167327,
"num_tokens": 20707100.0,
"step": 7720
},
{
"epoch": 0.4855375145252976,
"grad_norm": 0.4127698838710785,
"learning_rate": 0.00016763603626237882,
"loss": 0.8805,
"mean_token_accuracy": 0.8022565051913262,
"num_tokens": 20734538.0,
"step": 7730
},
{
"epoch": 0.4861656355013976,
"grad_norm": 0.2913120687007904,
"learning_rate": 0.0001675941628457174,
"loss": 0.9268,
"mean_token_accuracy": 0.7975892823189497,
"num_tokens": 20760647.0,
"step": 7740
},
{
"epoch": 0.48679375647749756,
"grad_norm": 0.37485092878341675,
"learning_rate": 0.00016755228942905596,
"loss": 0.914,
"mean_token_accuracy": 0.7988491103053093,
"num_tokens": 20787101.0,
"step": 7750
},
{
"epoch": 0.48742187745359755,
"grad_norm": 0.3177768886089325,
"learning_rate": 0.00016751041601239454,
"loss": 0.9609,
"mean_token_accuracy": 0.7920984081923962,
"num_tokens": 20814011.0,
"step": 7760
},
{
"epoch": 0.48804999842969754,
"grad_norm": 0.3514329195022583,
"learning_rate": 0.0001674685425957331,
"loss": 0.9249,
"mean_token_accuracy": 0.7984451025724411,
"num_tokens": 20839532.0,
"step": 7770
},
{
"epoch": 0.4886781194057976,
"grad_norm": 0.42157721519470215,
"learning_rate": 0.00016742666917907167,
"loss": 0.9322,
"mean_token_accuracy": 0.7990887399762869,
"num_tokens": 20865911.0,
"step": 7780
},
{
"epoch": 0.4893062403818976,
"grad_norm": 0.33001431822776794,
"learning_rate": 0.00016738479576241025,
"loss": 0.9204,
"mean_token_accuracy": 0.7994894739240408,
"num_tokens": 20892717.0,
"step": 7790
},
{
"epoch": 0.48993436135799756,
"grad_norm": 0.385657399892807,
"learning_rate": 0.00016734292234574883,
"loss": 0.9414,
"mean_token_accuracy": 0.7973318379372358,
"num_tokens": 20919688.0,
"step": 7800
},
{
"epoch": 0.49056248233409755,
"grad_norm": 0.3426561653614044,
"learning_rate": 0.0001673010489290874,
"loss": 0.9601,
"mean_token_accuracy": 0.7915532372891902,
"num_tokens": 20947274.0,
"step": 7810
},
{
"epoch": 0.49119060331019754,
"grad_norm": 0.34158486127853394,
"learning_rate": 0.00016725917551242594,
"loss": 0.9393,
"mean_token_accuracy": 0.7953637775033713,
"num_tokens": 20973896.0,
"step": 7820
},
{
"epoch": 0.4918187242862975,
"grad_norm": 0.36255061626434326,
"learning_rate": 0.00016721730209576452,
"loss": 0.9446,
"mean_token_accuracy": 0.7943896591663361,
"num_tokens": 20999609.0,
"step": 7830
},
{
"epoch": 0.4924468452623975,
"grad_norm": 0.38851070404052734,
"learning_rate": 0.00016717542867910308,
"loss": 0.9107,
"mean_token_accuracy": 0.8007842686027289,
"num_tokens": 21025356.0,
"step": 7840
},
{
"epoch": 0.49307496623849756,
"grad_norm": 0.35424819588661194,
"learning_rate": 0.00016713355526244166,
"loss": 0.916,
"mean_token_accuracy": 0.7971796747297049,
"num_tokens": 21051222.0,
"step": 7850
},
{
"epoch": 0.49370308721459755,
"grad_norm": 0.36249005794525146,
"learning_rate": 0.0001670916818457802,
"loss": 0.9501,
"mean_token_accuracy": 0.7942442841827869,
"num_tokens": 21077182.0,
"step": 7860
},
{
"epoch": 0.49433120819069754,
"grad_norm": 0.2965359091758728,
"learning_rate": 0.0001670498084291188,
"loss": 0.9108,
"mean_token_accuracy": 0.804225553944707,
"num_tokens": 21104365.0,
"step": 7870
},
{
"epoch": 0.4949593291667975,
"grad_norm": 0.379820317029953,
"learning_rate": 0.00016700793501245735,
"loss": 0.9214,
"mean_token_accuracy": 0.8005719102919102,
"num_tokens": 21131404.0,
"step": 7880
},
{
"epoch": 0.4955874501428975,
"grad_norm": 0.39546650648117065,
"learning_rate": 0.0001669660615957959,
"loss": 0.9775,
"mean_token_accuracy": 0.7893277246505022,
"num_tokens": 21157005.0,
"step": 7890
},
{
"epoch": 0.4962155711189975,
"grad_norm": 0.3420298993587494,
"learning_rate": 0.00016692418817913448,
"loss": 0.9174,
"mean_token_accuracy": 0.7994081798940897,
"num_tokens": 21184073.0,
"step": 7900
},
{
"epoch": 0.4968436920950975,
"grad_norm": 0.30282649397850037,
"learning_rate": 0.00016688231476247304,
"loss": 0.9289,
"mean_token_accuracy": 0.7969534825533628,
"num_tokens": 21210150.0,
"step": 7910
},
{
"epoch": 0.49747181307119753,
"grad_norm": 0.39200103282928467,
"learning_rate": 0.00016684044134581162,
"loss": 0.9481,
"mean_token_accuracy": 0.7922174122184515,
"num_tokens": 21237922.0,
"step": 7920
},
{
"epoch": 0.4980999340472975,
"grad_norm": 0.3704611659049988,
"learning_rate": 0.00016679856792915017,
"loss": 0.9554,
"mean_token_accuracy": 0.7943468518555165,
"num_tokens": 21265332.0,
"step": 7930
},
{
"epoch": 0.4987280550233975,
"grad_norm": 0.3158395290374756,
"learning_rate": 0.00016675669451248875,
"loss": 0.9366,
"mean_token_accuracy": 0.7983962200582028,
"num_tokens": 21292870.0,
"step": 7940
},
{
"epoch": 0.4993561759994975,
"grad_norm": 0.32152894139289856,
"learning_rate": 0.00016671482109582733,
"loss": 0.9699,
"mean_token_accuracy": 0.796767120435834,
"num_tokens": 21318347.0,
"step": 7950
},
{
"epoch": 0.4999842969755975,
"grad_norm": 0.3472147583961487,
"learning_rate": 0.0001666729476791659,
"loss": 0.9795,
"mean_token_accuracy": 0.7905366614460945,
"num_tokens": 21344579.0,
"step": 7960
},
{
"epoch": 0.5006124179516975,
"grad_norm": 0.35350507497787476,
"learning_rate": 0.00016663107426250447,
"loss": 0.9741,
"mean_token_accuracy": 0.7871046803891659,
"num_tokens": 21370562.0,
"step": 7970
},
{
"epoch": 0.5012405389277975,
"grad_norm": 0.37780460715293884,
"learning_rate": 0.00016658920084584302,
"loss": 0.9832,
"mean_token_accuracy": 0.7835981391370297,
"num_tokens": 21397739.0,
"step": 7980
},
{
"epoch": 0.5018686599038975,
"grad_norm": 0.29296019673347473,
"learning_rate": 0.0001665473274291816,
"loss": 0.9525,
"mean_token_accuracy": 0.7943347483873368,
"num_tokens": 21424598.0,
"step": 7990
},
{
"epoch": 0.5024967808799975,
"grad_norm": 0.32463762164115906,
"learning_rate": 0.00016650545401252016,
"loss": 0.9417,
"mean_token_accuracy": 0.7945131246000529,
"num_tokens": 21451365.0,
"step": 8000
}
],
"logging_steps": 10,
"max_steps": 47763,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.4324562160534979e+18,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}