| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.1588377723970944, |
| "eval_steps": 2000, |
| "global_step": 300, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00387409200968523, |
| "grad_norm": 1.598986029624939, |
| "learning_rate": 2.0000000000000002e-07, |
| "loss": 0.40103477239608765, |
| "step": 1, |
| "token_acc": 0.8705013179702646 |
| }, |
| { |
| "epoch": 0.00774818401937046, |
| "grad_norm": 1.988427758216858, |
| "learning_rate": 4.0000000000000003e-07, |
| "loss": 0.4314175248146057, |
| "step": 2, |
| "token_acc": 0.8610088406262493 |
| }, |
| { |
| "epoch": 0.01162227602905569, |
| "grad_norm": 1.6525965929031372, |
| "learning_rate": 6.000000000000001e-07, |
| "loss": 0.41751521825790405, |
| "step": 3, |
| "token_acc": 0.8659394954574845 |
| }, |
| { |
| "epoch": 0.01549636803874092, |
| "grad_norm": 1.3594496250152588, |
| "learning_rate": 8.000000000000001e-07, |
| "loss": 0.39516761898994446, |
| "step": 4, |
| "token_acc": 0.8712739341656057 |
| }, |
| { |
| "epoch": 0.01937046004842615, |
| "grad_norm": 1.4459697008132935, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 0.41443824768066406, |
| "step": 5, |
| "token_acc": 0.8673064711013153 |
| }, |
| { |
| "epoch": 0.02324455205811138, |
| "grad_norm": 1.165871024131775, |
| "learning_rate": 1.2000000000000002e-06, |
| "loss": 0.3951181471347809, |
| "step": 6, |
| "token_acc": 0.8717731277799119 |
| }, |
| { |
| "epoch": 0.02711864406779661, |
| "grad_norm": 1.150416374206543, |
| "learning_rate": 1.4000000000000001e-06, |
| "loss": 0.40562719106674194, |
| "step": 7, |
| "token_acc": 0.8683618627898853 |
| }, |
| { |
| "epoch": 0.03099273607748184, |
| "grad_norm": 0.7621377110481262, |
| "learning_rate": 1.6000000000000001e-06, |
| "loss": 0.4054454565048218, |
| "step": 8, |
| "token_acc": 0.8672108063124587 |
| }, |
| { |
| "epoch": 0.03486682808716707, |
| "grad_norm": 0.588590681552887, |
| "learning_rate": 1.8000000000000001e-06, |
| "loss": 0.383542001247406, |
| "step": 9, |
| "token_acc": 0.8732824386699718 |
| }, |
| { |
| "epoch": 0.0387409200968523, |
| "grad_norm": 0.5067570805549622, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 0.3769374489784241, |
| "step": 10, |
| "token_acc": 0.8749500487669789 |
| }, |
| { |
| "epoch": 0.04261501210653753, |
| "grad_norm": 0.6109248995780945, |
| "learning_rate": 2.2e-06, |
| "loss": 0.3687226176261902, |
| "step": 11, |
| "token_acc": 0.8778429629931872 |
| }, |
| { |
| "epoch": 0.04648910411622276, |
| "grad_norm": 0.6168301701545715, |
| "learning_rate": 2.4000000000000003e-06, |
| "loss": 0.3631238639354706, |
| "step": 12, |
| "token_acc": 0.8792909317747671 |
| }, |
| { |
| "epoch": 0.05036319612590799, |
| "grad_norm": 0.5205990076065063, |
| "learning_rate": 2.6e-06, |
| "loss": 0.37530872225761414, |
| "step": 13, |
| "token_acc": 0.8747995859550826 |
| }, |
| { |
| "epoch": 0.05423728813559322, |
| "grad_norm": 0.4970836639404297, |
| "learning_rate": 2.8000000000000003e-06, |
| "loss": 0.33857205510139465, |
| "step": 14, |
| "token_acc": 0.8863650931395268 |
| }, |
| { |
| "epoch": 0.05811138014527845, |
| "grad_norm": 0.4103075861930847, |
| "learning_rate": 3e-06, |
| "loss": 0.38399845361709595, |
| "step": 15, |
| "token_acc": 0.8722473100295478 |
| }, |
| { |
| "epoch": 0.06198547215496368, |
| "grad_norm": 0.505113959312439, |
| "learning_rate": 3.2000000000000003e-06, |
| "loss": 0.37927311658859253, |
| "step": 16, |
| "token_acc": 0.8732506907722828 |
| }, |
| { |
| "epoch": 0.06585956416464891, |
| "grad_norm": 0.4578634202480316, |
| "learning_rate": 3.4000000000000005e-06, |
| "loss": 0.388744592666626, |
| "step": 17, |
| "token_acc": 0.8707925977418891 |
| }, |
| { |
| "epoch": 0.06973365617433414, |
| "grad_norm": 0.40881460905075073, |
| "learning_rate": 3.6000000000000003e-06, |
| "loss": 0.37862884998321533, |
| "step": 18, |
| "token_acc": 0.8738148420049672 |
| }, |
| { |
| "epoch": 0.07360774818401937, |
| "grad_norm": 0.3267415165901184, |
| "learning_rate": 3.8000000000000005e-06, |
| "loss": 0.3523765206336975, |
| "step": 19, |
| "token_acc": 0.8821479488850912 |
| }, |
| { |
| "epoch": 0.0774818401937046, |
| "grad_norm": 0.3520510196685791, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 0.37575048208236694, |
| "step": 20, |
| "token_acc": 0.8746806805808569 |
| }, |
| { |
| "epoch": 0.08135593220338982, |
| "grad_norm": 0.3177695870399475, |
| "learning_rate": 4.2000000000000004e-06, |
| "loss": 0.3877210021018982, |
| "step": 21, |
| "token_acc": 0.8709381583839385 |
| }, |
| { |
| "epoch": 0.08523002421307506, |
| "grad_norm": 0.3101595640182495, |
| "learning_rate": 4.4e-06, |
| "loss": 0.35647860169410706, |
| "step": 22, |
| "token_acc": 0.8802609194999448 |
| }, |
| { |
| "epoch": 0.0891041162227603, |
| "grad_norm": 0.42295873165130615, |
| "learning_rate": 4.600000000000001e-06, |
| "loss": 0.34535130858421326, |
| "step": 23, |
| "token_acc": 0.8842312960154491 |
| }, |
| { |
| "epoch": 0.09297820823244551, |
| "grad_norm": 0.38459983468055725, |
| "learning_rate": 4.800000000000001e-06, |
| "loss": 0.3480440676212311, |
| "step": 24, |
| "token_acc": 0.8830844934941354 |
| }, |
| { |
| "epoch": 0.09685230024213075, |
| "grad_norm": 0.3167020082473755, |
| "learning_rate": 5e-06, |
| "loss": 0.3617573082447052, |
| "step": 25, |
| "token_acc": 0.8794729562611736 |
| }, |
| { |
| "epoch": 0.10072639225181598, |
| "grad_norm": 0.3235217332839966, |
| "learning_rate": 5.2e-06, |
| "loss": 0.34485846757888794, |
| "step": 26, |
| "token_acc": 0.8849654381719892 |
| }, |
| { |
| "epoch": 0.10460048426150122, |
| "grad_norm": 0.33688801527023315, |
| "learning_rate": 5.400000000000001e-06, |
| "loss": 0.325369268655777, |
| "step": 27, |
| "token_acc": 0.8904570911619978 |
| }, |
| { |
| "epoch": 0.10847457627118644, |
| "grad_norm": 0.28384602069854736, |
| "learning_rate": 5.600000000000001e-06, |
| "loss": 0.3820268213748932, |
| "step": 28, |
| "token_acc": 0.8722670041260794 |
| }, |
| { |
| "epoch": 0.11234866828087167, |
| "grad_norm": 0.2726050019264221, |
| "learning_rate": 5.8e-06, |
| "loss": 0.34821516275405884, |
| "step": 29, |
| "token_acc": 0.8829695430808375 |
| }, |
| { |
| "epoch": 0.1162227602905569, |
| "grad_norm": 0.2613418698310852, |
| "learning_rate": 6e-06, |
| "loss": 0.3505156636238098, |
| "step": 30, |
| "token_acc": 0.8820213661332177 |
| }, |
| { |
| "epoch": 0.12009685230024213, |
| "grad_norm": 0.27066054940223694, |
| "learning_rate": 6.200000000000001e-06, |
| "loss": 0.3500295877456665, |
| "step": 31, |
| "token_acc": 0.8819775128328553 |
| }, |
| { |
| "epoch": 0.12397094430992736, |
| "grad_norm": 0.2605418562889099, |
| "learning_rate": 6.4000000000000006e-06, |
| "loss": 0.32833147048950195, |
| "step": 32, |
| "token_acc": 0.8892601629599358 |
| }, |
| { |
| "epoch": 0.12784503631961258, |
| "grad_norm": 0.2576088607311249, |
| "learning_rate": 6.600000000000001e-06, |
| "loss": 0.3447936475276947, |
| "step": 33, |
| "token_acc": 0.8835445537223737 |
| }, |
| { |
| "epoch": 0.13171912832929783, |
| "grad_norm": 0.2707255482673645, |
| "learning_rate": 6.800000000000001e-06, |
| "loss": 0.352622389793396, |
| "step": 34, |
| "token_acc": 0.8808578896779464 |
| }, |
| { |
| "epoch": 0.13559322033898305, |
| "grad_norm": 0.23704984784126282, |
| "learning_rate": 7e-06, |
| "loss": 0.34251606464385986, |
| "step": 35, |
| "token_acc": 0.8839590527934595 |
| }, |
| { |
| "epoch": 0.13946731234866827, |
| "grad_norm": 0.2552218735218048, |
| "learning_rate": 7.2000000000000005e-06, |
| "loss": 0.36937713623046875, |
| "step": 36, |
| "token_acc": 0.8746555562093041 |
| }, |
| { |
| "epoch": 0.14334140435835352, |
| "grad_norm": 0.25926339626312256, |
| "learning_rate": 7.4e-06, |
| "loss": 0.37243181467056274, |
| "step": 37, |
| "token_acc": 0.8742657147624016 |
| }, |
| { |
| "epoch": 0.14721549636803874, |
| "grad_norm": 0.25272250175476074, |
| "learning_rate": 7.600000000000001e-06, |
| "loss": 0.3371140956878662, |
| "step": 38, |
| "token_acc": 0.8851879286597788 |
| }, |
| { |
| "epoch": 0.15108958837772396, |
| "grad_norm": 0.2262120097875595, |
| "learning_rate": 7.800000000000002e-06, |
| "loss": 0.32758837938308716, |
| "step": 39, |
| "token_acc": 0.8883092864316684 |
| }, |
| { |
| "epoch": 0.1549636803874092, |
| "grad_norm": 0.26067835092544556, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 0.32051679491996765, |
| "step": 40, |
| "token_acc": 0.8908219532219895 |
| }, |
| { |
| "epoch": 0.15883777239709443, |
| "grad_norm": 0.22696885466575623, |
| "learning_rate": 8.2e-06, |
| "loss": 0.34018558263778687, |
| "step": 41, |
| "token_acc": 0.8843387459744694 |
| }, |
| { |
| "epoch": 0.16271186440677965, |
| "grad_norm": 0.2458319216966629, |
| "learning_rate": 8.400000000000001e-06, |
| "loss": 0.3157382607460022, |
| "step": 42, |
| "token_acc": 0.8923707458363505 |
| }, |
| { |
| "epoch": 0.1665859564164649, |
| "grad_norm": 0.23234310746192932, |
| "learning_rate": 8.6e-06, |
| "loss": 0.32486584782600403, |
| "step": 43, |
| "token_acc": 0.8898832391328527 |
| }, |
| { |
| "epoch": 0.17046004842615012, |
| "grad_norm": 0.24149972200393677, |
| "learning_rate": 8.8e-06, |
| "loss": 0.3565906286239624, |
| "step": 44, |
| "token_acc": 0.8786636478836652 |
| }, |
| { |
| "epoch": 0.17433414043583534, |
| "grad_norm": 0.23454472422599792, |
| "learning_rate": 9e-06, |
| "loss": 0.34243613481521606, |
| "step": 45, |
| "token_acc": 0.8836981353220466 |
| }, |
| { |
| "epoch": 0.1782082324455206, |
| "grad_norm": 0.22611235082149506, |
| "learning_rate": 9.200000000000002e-06, |
| "loss": 0.3169807493686676, |
| "step": 46, |
| "token_acc": 0.8913642111117898 |
| }, |
| { |
| "epoch": 0.1820823244552058, |
| "grad_norm": 0.2332201898097992, |
| "learning_rate": 9.4e-06, |
| "loss": 0.3335682153701782, |
| "step": 47, |
| "token_acc": 0.8853106607331619 |
| }, |
| { |
| "epoch": 0.18595641646489103, |
| "grad_norm": 0.26498886942863464, |
| "learning_rate": 9.600000000000001e-06, |
| "loss": 0.3396722674369812, |
| "step": 48, |
| "token_acc": 0.884526213547764 |
| }, |
| { |
| "epoch": 0.18983050847457628, |
| "grad_norm": 0.29751622676849365, |
| "learning_rate": 9.800000000000001e-06, |
| "loss": 0.3544740676879883, |
| "step": 49, |
| "token_acc": 0.8790830507178009 |
| }, |
| { |
| "epoch": 0.1937046004842615, |
| "grad_norm": 0.24125243723392487, |
| "learning_rate": 1e-05, |
| "loss": 0.3444434702396393, |
| "step": 50, |
| "token_acc": 0.8833375152943368 |
| }, |
| { |
| "epoch": 0.19757869249394674, |
| "grad_norm": 0.23450158536434174, |
| "learning_rate": 9.999953315763929e-06, |
| "loss": 0.34759777784347534, |
| "step": 51, |
| "token_acc": 0.8809645656414854 |
| }, |
| { |
| "epoch": 0.20145278450363197, |
| "grad_norm": 0.24415536224842072, |
| "learning_rate": 9.999813263927483e-06, |
| "loss": 0.3302762508392334, |
| "step": 52, |
| "token_acc": 0.8872595593874244 |
| }, |
| { |
| "epoch": 0.20532687651331719, |
| "grad_norm": 0.23792694509029388, |
| "learning_rate": 9.999579847105947e-06, |
| "loss": 0.3057291805744171, |
| "step": 53, |
| "token_acc": 0.8958096559669801 |
| }, |
| { |
| "epoch": 0.20920096852300243, |
| "grad_norm": 0.24918483197689056, |
| "learning_rate": 9.999253069658074e-06, |
| "loss": 0.3550814390182495, |
| "step": 54, |
| "token_acc": 0.8789014457104403 |
| }, |
| { |
| "epoch": 0.21307506053268765, |
| "grad_norm": 0.24681781232357025, |
| "learning_rate": 9.99883293768601e-06, |
| "loss": 0.329832524061203, |
| "step": 55, |
| "token_acc": 0.8862846605616155 |
| }, |
| { |
| "epoch": 0.21694915254237288, |
| "grad_norm": 0.25197944045066833, |
| "learning_rate": 9.998319459035168e-06, |
| "loss": 0.3133784532546997, |
| "step": 56, |
| "token_acc": 0.8929686000759445 |
| }, |
| { |
| "epoch": 0.22082324455205812, |
| "grad_norm": 0.29595333337783813, |
| "learning_rate": 9.997712643294093e-06, |
| "loss": 0.3238765597343445, |
| "step": 57, |
| "token_acc": 0.8900121095092376 |
| }, |
| { |
| "epoch": 0.22469733656174334, |
| "grad_norm": 0.2436024248600006, |
| "learning_rate": 9.997012501794273e-06, |
| "loss": 0.3283236622810364, |
| "step": 58, |
| "token_acc": 0.887762605178964 |
| }, |
| { |
| "epoch": 0.22857142857142856, |
| "grad_norm": 0.23041026294231415, |
| "learning_rate": 9.996219047609943e-06, |
| "loss": 0.3104722797870636, |
| "step": 59, |
| "token_acc": 0.8931121325749851 |
| }, |
| { |
| "epoch": 0.2324455205811138, |
| "grad_norm": 0.237432062625885, |
| "learning_rate": 9.995332295557818e-06, |
| "loss": 0.30940210819244385, |
| "step": 60, |
| "token_acc": 0.8942070394423697 |
| }, |
| { |
| "epoch": 0.23631961259079903, |
| "grad_norm": 0.23901380598545074, |
| "learning_rate": 9.994352262196839e-06, |
| "loss": 0.32523292303085327, |
| "step": 61, |
| "token_acc": 0.8885503611348168 |
| }, |
| { |
| "epoch": 0.24019370460048425, |
| "grad_norm": 0.27438339591026306, |
| "learning_rate": 9.993278965827844e-06, |
| "loss": 0.3501031994819641, |
| "step": 62, |
| "token_acc": 0.8796217252529412 |
| }, |
| { |
| "epoch": 0.2440677966101695, |
| "grad_norm": 0.23662753403186798, |
| "learning_rate": 9.992112426493247e-06, |
| "loss": 0.32605987787246704, |
| "step": 63, |
| "token_acc": 0.8890396653634925 |
| }, |
| { |
| "epoch": 0.24794188861985472, |
| "grad_norm": 0.2232031375169754, |
| "learning_rate": 9.990852665976648e-06, |
| "loss": 0.3196948170661926, |
| "step": 64, |
| "token_acc": 0.8907072739748918 |
| }, |
| { |
| "epoch": 0.25181598062953997, |
| "grad_norm": 0.2665523886680603, |
| "learning_rate": 9.989499707802424e-06, |
| "loss": 0.33278700709342957, |
| "step": 65, |
| "token_acc": 0.8863953116150797 |
| }, |
| { |
| "epoch": 0.25569007263922516, |
| "grad_norm": 0.23870785534381866, |
| "learning_rate": 9.988053577235306e-06, |
| "loss": 0.351688951253891, |
| "step": 66, |
| "token_acc": 0.879823584223047 |
| }, |
| { |
| "epoch": 0.2595641646489104, |
| "grad_norm": 0.24755656719207764, |
| "learning_rate": 9.986514301279894e-06, |
| "loss": 0.31553030014038086, |
| "step": 67, |
| "token_acc": 0.8921622627267041 |
| }, |
| { |
| "epoch": 0.26343825665859566, |
| "grad_norm": 0.23198164999485016, |
| "learning_rate": 9.984881908680157e-06, |
| "loss": 0.3355843424797058, |
| "step": 68, |
| "token_acc": 0.8848336232927391 |
| }, |
| { |
| "epoch": 0.26731234866828085, |
| "grad_norm": 0.2461438924074173, |
| "learning_rate": 9.983156429918895e-06, |
| "loss": 0.3341342508792877, |
| "step": 69, |
| "token_acc": 0.8856444439357114 |
| }, |
| { |
| "epoch": 0.2711864406779661, |
| "grad_norm": 0.22579748928546906, |
| "learning_rate": 9.981337897217171e-06, |
| "loss": 0.3188900947570801, |
| "step": 70, |
| "token_acc": 0.8906781387812132 |
| }, |
| { |
| "epoch": 0.27506053268765135, |
| "grad_norm": 0.24103567004203796, |
| "learning_rate": 9.979426344533712e-06, |
| "loss": 0.3240354061126709, |
| "step": 71, |
| "token_acc": 0.8889305949367731 |
| }, |
| { |
| "epoch": 0.27893462469733654, |
| "grad_norm": 0.23146985471248627, |
| "learning_rate": 9.977421807564264e-06, |
| "loss": 0.3256258964538574, |
| "step": 72, |
| "token_acc": 0.8886470476040884 |
| }, |
| { |
| "epoch": 0.2828087167070218, |
| "grad_norm": 0.2992205023765564, |
| "learning_rate": 9.97532432374094e-06, |
| "loss": 0.3162704110145569, |
| "step": 73, |
| "token_acc": 0.8912486582241803 |
| }, |
| { |
| "epoch": 0.28668280871670704, |
| "grad_norm": 0.2314625382423401, |
| "learning_rate": 9.973133932231514e-06, |
| "loss": 0.33748123049736023, |
| "step": 74, |
| "token_acc": 0.8834313251000246 |
| }, |
| { |
| "epoch": 0.29055690072639223, |
| "grad_norm": 0.23197512328624725, |
| "learning_rate": 9.970850673938684e-06, |
| "loss": 0.3105667233467102, |
| "step": 75, |
| "token_acc": 0.8935043208256486 |
| }, |
| { |
| "epoch": 0.2944309927360775, |
| "grad_norm": 0.2275022268295288, |
| "learning_rate": 9.96847459149932e-06, |
| "loss": 0.3327932357788086, |
| "step": 76, |
| "token_acc": 0.8861917159302386 |
| }, |
| { |
| "epoch": 0.2983050847457627, |
| "grad_norm": 0.2508430778980255, |
| "learning_rate": 9.966005729283658e-06, |
| "loss": 0.32548677921295166, |
| "step": 77, |
| "token_acc": 0.8882381273480396 |
| }, |
| { |
| "epoch": 0.3021791767554479, |
| "grad_norm": 0.5134550333023071, |
| "learning_rate": 9.963444133394478e-06, |
| "loss": 0.3120523691177368, |
| "step": 78, |
| "token_acc": 0.8919503736696569 |
| }, |
| { |
| "epoch": 0.30605326876513317, |
| "grad_norm": 0.21315379440784454, |
| "learning_rate": 9.960789851666237e-06, |
| "loss": 0.3215460181236267, |
| "step": 79, |
| "token_acc": 0.8896002985397907 |
| }, |
| { |
| "epoch": 0.3099273607748184, |
| "grad_norm": 0.23902781307697296, |
| "learning_rate": 9.958042933664186e-06, |
| "loss": 0.33162713050842285, |
| "step": 80, |
| "token_acc": 0.8866171518838251 |
| }, |
| { |
| "epoch": 0.3138014527845036, |
| "grad_norm": 0.24128590524196625, |
| "learning_rate": 9.955203430683425e-06, |
| "loss": 0.3268725574016571, |
| "step": 81, |
| "token_acc": 0.8882163748841388 |
| }, |
| { |
| "epoch": 0.31767554479418886, |
| "grad_norm": 0.24751782417297363, |
| "learning_rate": 9.952271395747969e-06, |
| "loss": 0.3100839853286743, |
| "step": 82, |
| "token_acc": 0.893085253361785 |
| }, |
| { |
| "epoch": 0.3215496368038741, |
| "grad_norm": 0.23644764721393585, |
| "learning_rate": 9.949246883609743e-06, |
| "loss": 0.32995104789733887, |
| "step": 83, |
| "token_acc": 0.8866222032237766 |
| }, |
| { |
| "epoch": 0.3254237288135593, |
| "grad_norm": 0.232451930642128, |
| "learning_rate": 9.94612995074756e-06, |
| "loss": 0.31272488832473755, |
| "step": 84, |
| "token_acc": 0.8926265473810503 |
| }, |
| { |
| "epoch": 0.32929782082324455, |
| "grad_norm": 0.21610639989376068, |
| "learning_rate": 9.942920655366075e-06, |
| "loss": 0.302722692489624, |
| "step": 85, |
| "token_acc": 0.8952372082627079 |
| }, |
| { |
| "epoch": 0.3331719128329298, |
| "grad_norm": 0.24474947154521942, |
| "learning_rate": 9.939619057394687e-06, |
| "loss": 0.31238657236099243, |
| "step": 86, |
| "token_acc": 0.8932181956136864 |
| }, |
| { |
| "epoch": 0.337046004842615, |
| "grad_norm": 0.22313052415847778, |
| "learning_rate": 9.936225218486428e-06, |
| "loss": 0.30595749616622925, |
| "step": 87, |
| "token_acc": 0.8942476419229949 |
| }, |
| { |
| "epoch": 0.34092009685230024, |
| "grad_norm": 0.25018593668937683, |
| "learning_rate": 9.93273920201681e-06, |
| "loss": 0.34218600392341614, |
| "step": 88, |
| "token_acc": 0.8826220754003523 |
| }, |
| { |
| "epoch": 0.3447941888619855, |
| "grad_norm": 0.21603761613368988, |
| "learning_rate": 9.929161073082636e-06, |
| "loss": 0.26845768094062805, |
| "step": 89, |
| "token_acc": 0.9068716054841073 |
| }, |
| { |
| "epoch": 0.3486682808716707, |
| "grad_norm": 0.22996748983860016, |
| "learning_rate": 9.925490898500796e-06, |
| "loss": 0.32508569955825806, |
| "step": 90, |
| "token_acc": 0.8884358725254423 |
| }, |
| { |
| "epoch": 0.3525423728813559, |
| "grad_norm": 0.3635949194431305, |
| "learning_rate": 9.921728746807008e-06, |
| "loss": 0.34217730164527893, |
| "step": 91, |
| "token_acc": 0.8833008019688547 |
| }, |
| { |
| "epoch": 0.3564164648910412, |
| "grad_norm": 0.22128325700759888, |
| "learning_rate": 9.917874688254542e-06, |
| "loss": 0.32345396280288696, |
| "step": 92, |
| "token_acc": 0.8889643834760571 |
| }, |
| { |
| "epoch": 0.36029055690072637, |
| "grad_norm": 0.24601417779922485, |
| "learning_rate": 9.913928794812909e-06, |
| "loss": 0.3252776265144348, |
| "step": 93, |
| "token_acc": 0.8881070006006884 |
| }, |
| { |
| "epoch": 0.3641646489104116, |
| "grad_norm": 0.23473182320594788, |
| "learning_rate": 9.90989114016652e-06, |
| "loss": 0.33626118302345276, |
| "step": 94, |
| "token_acc": 0.8841867411739727 |
| }, |
| { |
| "epoch": 0.36803874092009686, |
| "grad_norm": 0.22333025932312012, |
| "learning_rate": 9.905761799713302e-06, |
| "loss": 0.34545931220054626, |
| "step": 95, |
| "token_acc": 0.8803537032594166 |
| }, |
| { |
| "epoch": 0.37191283292978206, |
| "grad_norm": 0.21172457933425903, |
| "learning_rate": 9.901540850563295e-06, |
| "loss": 0.3074107766151428, |
| "step": 96, |
| "token_acc": 0.8944196156632918 |
| }, |
| { |
| "epoch": 0.3757869249394673, |
| "grad_norm": 0.2134028971195221, |
| "learning_rate": 9.89722837153722e-06, |
| "loss": 0.2957490086555481, |
| "step": 97, |
| "token_acc": 0.8978778618134635 |
| }, |
| { |
| "epoch": 0.37966101694915255, |
| "grad_norm": 0.2610202729701996, |
| "learning_rate": 9.892824443164987e-06, |
| "loss": 0.3412560224533081, |
| "step": 98, |
| "token_acc": 0.8829380073969748 |
| }, |
| { |
| "epoch": 0.38353510895883774, |
| "grad_norm": 0.25488367676734924, |
| "learning_rate": 9.88832914768421e-06, |
| "loss": 0.3430347442626953, |
| "step": 99, |
| "token_acc": 0.8815459290145207 |
| }, |
| { |
| "epoch": 0.387409200968523, |
| "grad_norm": 0.22882606089115143, |
| "learning_rate": 9.883742569038663e-06, |
| "loss": 0.33350762724876404, |
| "step": 100, |
| "token_acc": 0.8861422500817198 |
| }, |
| { |
| "epoch": 0.39128329297820824, |
| "grad_norm": 0.304647833108902, |
| "learning_rate": 9.879064792876717e-06, |
| "loss": 0.31420135498046875, |
| "step": 101, |
| "token_acc": 0.8915588172822687 |
| }, |
| { |
| "epoch": 0.3951573849878935, |
| "grad_norm": 0.22871072590351105, |
| "learning_rate": 9.874295906549728e-06, |
| "loss": 0.3116581439971924, |
| "step": 102, |
| "token_acc": 0.8917020548921253 |
| }, |
| { |
| "epoch": 0.3990314769975787, |
| "grad_norm": 0.2979466915130615, |
| "learning_rate": 9.869435999110428e-06, |
| "loss": 0.3145788013935089, |
| "step": 103, |
| "token_acc": 0.8916011830301528 |
| }, |
| { |
| "epoch": 0.40290556900726393, |
| "grad_norm": 0.20779502391815186, |
| "learning_rate": 9.864485161311242e-06, |
| "loss": 0.3070036768913269, |
| "step": 104, |
| "token_acc": 0.8938107647266995 |
| }, |
| { |
| "epoch": 0.4067796610169492, |
| "grad_norm": 0.2354535311460495, |
| "learning_rate": 9.859443485602603e-06, |
| "loss": 0.32298558950424194, |
| "step": 105, |
| "token_acc": 0.8882189451059107 |
| }, |
| { |
| "epoch": 0.41065375302663437, |
| "grad_norm": 0.22240500152111053, |
| "learning_rate": 9.85431106613122e-06, |
| "loss": 0.3104989528656006, |
| "step": 106, |
| "token_acc": 0.8923007628162216 |
| }, |
| { |
| "epoch": 0.4145278450363196, |
| "grad_norm": 0.21981710195541382, |
| "learning_rate": 9.849087998738328e-06, |
| "loss": 0.3237101435661316, |
| "step": 107, |
| "token_acc": 0.8879955719309623 |
| }, |
| { |
| "epoch": 0.41840193704600487, |
| "grad_norm": 0.2649724781513214, |
| "learning_rate": 9.84377438095789e-06, |
| "loss": 0.323306679725647, |
| "step": 108, |
| "token_acc": 0.8889382382835521 |
| }, |
| { |
| "epoch": 0.42227602905569006, |
| "grad_norm": 0.2193301022052765, |
| "learning_rate": 9.838370312014783e-06, |
| "loss": 0.31488102674484253, |
| "step": 109, |
| "token_acc": 0.8910646836196473 |
| }, |
| { |
| "epoch": 0.4261501210653753, |
| "grad_norm": 0.21842491626739502, |
| "learning_rate": 9.832875892822937e-06, |
| "loss": 0.3206183910369873, |
| "step": 110, |
| "token_acc": 0.8890832728771944 |
| }, |
| { |
| "epoch": 0.43002421307506056, |
| "grad_norm": 0.2456243336200714, |
| "learning_rate": 9.827291225983458e-06, |
| "loss": 0.3201240301132202, |
| "step": 111, |
| "token_acc": 0.8904148288428204 |
| }, |
| { |
| "epoch": 0.43389830508474575, |
| "grad_norm": 0.21340763568878174, |
| "learning_rate": 9.821616415782708e-06, |
| "loss": 0.29961007833480835, |
| "step": 112, |
| "token_acc": 0.8965660205577574 |
| }, |
| { |
| "epoch": 0.437772397094431, |
| "grad_norm": 0.2308902144432068, |
| "learning_rate": 9.815851568190358e-06, |
| "loss": 0.3107410669326782, |
| "step": 113, |
| "token_acc": 0.8927536025516888 |
| }, |
| { |
| "epoch": 0.44164648910411625, |
| "grad_norm": 0.2292374223470688, |
| "learning_rate": 9.80999679085741e-06, |
| "loss": 0.3277205228805542, |
| "step": 114, |
| "token_acc": 0.886787084498464 |
| }, |
| { |
| "epoch": 0.44552058111380144, |
| "grad_norm": 0.21509671211242676, |
| "learning_rate": 9.80405219311419e-06, |
| "loss": 0.3161908984184265, |
| "step": 115, |
| "token_acc": 0.8916077261448497 |
| }, |
| { |
| "epoch": 0.4493946731234867, |
| "grad_norm": 0.20529279112815857, |
| "learning_rate": 9.798017885968295e-06, |
| "loss": 0.29131007194519043, |
| "step": 116, |
| "token_acc": 0.8990066361086406 |
| }, |
| { |
| "epoch": 0.45326876513317194, |
| "grad_norm": 0.24888373911380768, |
| "learning_rate": 9.791893982102537e-06, |
| "loss": 0.31967025995254517, |
| "step": 117, |
| "token_acc": 0.8899925908756566 |
| }, |
| { |
| "epoch": 0.45714285714285713, |
| "grad_norm": 0.22014780342578888, |
| "learning_rate": 9.785680595872824e-06, |
| "loss": 0.31103435158729553, |
| "step": 118, |
| "token_acc": 0.8928936680571538 |
| }, |
| { |
| "epoch": 0.4610169491525424, |
| "grad_norm": 0.21783359348773956, |
| "learning_rate": 9.77937784330603e-06, |
| "loss": 0.307749480009079, |
| "step": 119, |
| "token_acc": 0.8931600584652736 |
| }, |
| { |
| "epoch": 0.4648910411622276, |
| "grad_norm": 0.2104286551475525, |
| "learning_rate": 9.772985842097832e-06, |
| "loss": 0.31199365854263306, |
| "step": 120, |
| "token_acc": 0.8926850259294361 |
| }, |
| { |
| "epoch": 0.4687651331719128, |
| "grad_norm": 0.21124128997325897, |
| "learning_rate": 9.766504711610507e-06, |
| "loss": 0.3170176148414612, |
| "step": 121, |
| "token_acc": 0.8906264477918435 |
| }, |
| { |
| "epoch": 0.47263922518159807, |
| "grad_norm": 0.23777632415294647, |
| "learning_rate": 9.759934572870706e-06, |
| "loss": 0.3052697777748108, |
| "step": 122, |
| "token_acc": 0.894442848003123 |
| }, |
| { |
| "epoch": 0.4765133171912833, |
| "grad_norm": 0.2527632713317871, |
| "learning_rate": 9.753275548567192e-06, |
| "loss": 0.3045836091041565, |
| "step": 123, |
| "token_acc": 0.8951105518605069 |
| }, |
| { |
| "epoch": 0.4803874092009685, |
| "grad_norm": 0.20530211925506592, |
| "learning_rate": 9.74652776304855e-06, |
| "loss": 0.3366113305091858, |
| "step": 124, |
| "token_acc": 0.8836434912892324 |
| }, |
| { |
| "epoch": 0.48426150121065376, |
| "grad_norm": 0.26673150062561035, |
| "learning_rate": 9.739691342320866e-06, |
| "loss": 0.311764121055603, |
| "step": 125, |
| "token_acc": 0.8910826454277961 |
| }, |
| { |
| "epoch": 0.488135593220339, |
| "grad_norm": 0.2245185822248459, |
| "learning_rate": 9.732766414045368e-06, |
| "loss": 0.31055164337158203, |
| "step": 126, |
| "token_acc": 0.8926098098046538 |
| }, |
| { |
| "epoch": 0.4920096852300242, |
| "grad_norm": 0.2143883854150772, |
| "learning_rate": 9.725753107536053e-06, |
| "loss": 0.33499595522880554, |
| "step": 127, |
| "token_acc": 0.8840534260641282 |
| }, |
| { |
| "epoch": 0.49588377723970944, |
| "grad_norm": 0.22163285315036774, |
| "learning_rate": 9.718651553757266e-06, |
| "loss": 0.31920328736305237, |
| "step": 128, |
| "token_acc": 0.8901271163419964 |
| }, |
| { |
| "epoch": 0.4997578692493947, |
| "grad_norm": 0.2143898904323578, |
| "learning_rate": 9.711461885321247e-06, |
| "loss": 0.3301286995410919, |
| "step": 129, |
| "token_acc": 0.8853363916795757 |
| }, |
| { |
| "epoch": 0.5036319612590799, |
| "grad_norm": 0.24990734457969666, |
| "learning_rate": 9.704184236485672e-06, |
| "loss": 0.3278159201145172, |
| "step": 130, |
| "token_acc": 0.8874620923082561 |
| }, |
| { |
| "epoch": 0.5075060532687651, |
| "grad_norm": 0.22136539220809937, |
| "learning_rate": 9.696818743151128e-06, |
| "loss": 0.3319326937198639, |
| "step": 131, |
| "token_acc": 0.885009570455441 |
| }, |
| { |
| "epoch": 0.5113801452784503, |
| "grad_norm": 0.2669275999069214, |
| "learning_rate": 9.68936554285859e-06, |
| "loss": 0.3023684620857239, |
| "step": 132, |
| "token_acc": 0.8951259709956582 |
| }, |
| { |
| "epoch": 0.5152542372881356, |
| "grad_norm": 0.21833708882331848, |
| "learning_rate": 9.68182477478684e-06, |
| "loss": 0.3089104890823364, |
| "step": 133, |
| "token_acc": 0.8930920187299416 |
| }, |
| { |
| "epoch": 0.5191283292978208, |
| "grad_norm": 0.21197167038917542, |
| "learning_rate": 9.67419657974988e-06, |
| "loss": 0.3144392967224121, |
| "step": 134, |
| "token_acc": 0.8910884224709107 |
| }, |
| { |
| "epoch": 0.5230024213075061, |
| "grad_norm": 0.21434499323368073, |
| "learning_rate": 9.66648110019429e-06, |
| "loss": 0.3246540427207947, |
| "step": 135, |
| "token_acc": 0.8876412650671648 |
| }, |
| { |
| "epoch": 0.5268765133171913, |
| "grad_norm": 0.20343148708343506, |
| "learning_rate": 9.658678480196579e-06, |
| "loss": 0.315585196018219, |
| "step": 136, |
| "token_acc": 0.8905443269970013 |
| }, |
| { |
| "epoch": 0.5307506053268766, |
| "grad_norm": 0.23613257706165314, |
| "learning_rate": 9.650788865460487e-06, |
| "loss": 0.3131225109100342, |
| "step": 137, |
| "token_acc": 0.8912192170846405 |
| }, |
| { |
| "epoch": 0.5346246973365617, |
| "grad_norm": 0.4212075471878052, |
| "learning_rate": 9.642812403314272e-06, |
| "loss": 0.29884475469589233, |
| "step": 138, |
| "token_acc": 0.8966553773404051 |
| }, |
| { |
| "epoch": 0.538498789346247, |
| "grad_norm": 0.20193685591220856, |
| "learning_rate": 9.634749242707948e-06, |
| "loss": 0.26036083698272705, |
| "step": 139, |
| "token_acc": 0.9091038865111504 |
| }, |
| { |
| "epoch": 0.5423728813559322, |
| "grad_norm": 0.2208104431629181, |
| "learning_rate": 9.626599534210514e-06, |
| "loss": 0.33184394240379333, |
| "step": 140, |
| "token_acc": 0.8853617134142299 |
| }, |
| { |
| "epoch": 0.5462469733656174, |
| "grad_norm": 0.22493727505207062, |
| "learning_rate": 9.618363430007134e-06, |
| "loss": 0.31208667159080505, |
| "step": 141, |
| "token_acc": 0.8917024215686027 |
| }, |
| { |
| "epoch": 0.5501210653753027, |
| "grad_norm": 0.23963193595409393, |
| "learning_rate": 9.610041083896304e-06, |
| "loss": 0.33588868379592896, |
| "step": 142, |
| "token_acc": 0.883973627021253 |
| }, |
| { |
| "epoch": 0.553995157384988, |
| "grad_norm": 0.21784453094005585, |
| "learning_rate": 9.60163265128697e-06, |
| "loss": 0.3231375813484192, |
| "step": 143, |
| "token_acc": 0.8887875239014834 |
| }, |
| { |
| "epoch": 0.5578692493946731, |
| "grad_norm": 0.22835847735404968, |
| "learning_rate": 9.593138289195634e-06, |
| "loss": 0.3210199773311615, |
| "step": 144, |
| "token_acc": 0.8890582816354493 |
| }, |
| { |
| "epoch": 0.5617433414043583, |
| "grad_norm": 0.2136555314064026, |
| "learning_rate": 9.584558156243418e-06, |
| "loss": 0.3372665047645569, |
| "step": 145, |
| "token_acc": 0.8839793357706921 |
| }, |
| { |
| "epoch": 0.5656174334140436, |
| "grad_norm": 0.20598500967025757, |
| "learning_rate": 9.575892412653102e-06, |
| "loss": 0.30844664573669434, |
| "step": 146, |
| "token_acc": 0.8926156654585412 |
| }, |
| { |
| "epoch": 0.5694915254237288, |
| "grad_norm": 0.2522714138031006, |
| "learning_rate": 9.567141220246136e-06, |
| "loss": 0.36702272295951843, |
| "step": 147, |
| "token_acc": 0.8734296301671142 |
| }, |
| { |
| "epoch": 0.5733656174334141, |
| "grad_norm": 0.21975038945674896, |
| "learning_rate": 9.55830474243961e-06, |
| "loss": 0.32784411311149597, |
| "step": 148, |
| "token_acc": 0.8871756189192851 |
| }, |
| { |
| "epoch": 0.5772397094430993, |
| "grad_norm": 0.21233901381492615, |
| "learning_rate": 9.549383144243213e-06, |
| "loss": 0.2944122850894928, |
| "step": 149, |
| "token_acc": 0.8987453672884691 |
| }, |
| { |
| "epoch": 0.5811138014527845, |
| "grad_norm": 0.2199799120426178, |
| "learning_rate": 9.540376592256142e-06, |
| "loss": 0.3299463987350464, |
| "step": 150, |
| "token_acc": 0.8859144839374592 |
| }, |
| { |
| "epoch": 0.5849878934624697, |
| "grad_norm": 0.19698019325733185, |
| "learning_rate": 9.531285254663997e-06, |
| "loss": 0.3030051589012146, |
| "step": 151, |
| "token_acc": 0.8951707294894029 |
| }, |
| { |
| "epoch": 0.588861985472155, |
| "grad_norm": 0.22306668758392334, |
| "learning_rate": 9.522109301235637e-06, |
| "loss": 0.29752516746520996, |
| "step": 152, |
| "token_acc": 0.8966012679857996 |
| }, |
| { |
| "epoch": 0.5927360774818402, |
| "grad_norm": 0.21317337453365326, |
| "learning_rate": 9.512848903320017e-06, |
| "loss": 0.3052118122577667, |
| "step": 153, |
| "token_acc": 0.8944324633814714 |
| }, |
| { |
| "epoch": 0.5966101694915255, |
| "grad_norm": 0.2120915800333023, |
| "learning_rate": 9.503504233842973e-06, |
| "loss": 0.29761528968811035, |
| "step": 154, |
| "token_acc": 0.8966406260468731 |
| }, |
| { |
| "epoch": 0.6004842615012107, |
| "grad_norm": 0.23525090515613556, |
| "learning_rate": 9.494075467304007e-06, |
| "loss": 0.3034532070159912, |
| "step": 155, |
| "token_acc": 0.8944926637860167 |
| }, |
| { |
| "epoch": 0.6043583535108958, |
| "grad_norm": 0.2095353752374649, |
| "learning_rate": 9.484562779773027e-06, |
| "loss": 0.2903788089752197, |
| "step": 156, |
| "token_acc": 0.8990560027078014 |
| }, |
| { |
| "epoch": 0.6082324455205811, |
| "grad_norm": 0.23741677403450012, |
| "learning_rate": 9.474966348887055e-06, |
| "loss": 0.31467512249946594, |
| "step": 157, |
| "token_acc": 0.8904583329757747 |
| }, |
| { |
| "epoch": 0.6121065375302663, |
| "grad_norm": 0.2259555608034134, |
| "learning_rate": 9.465286353846905e-06, |
| "loss": 0.3404577374458313, |
| "step": 158, |
| "token_acc": 0.8826165622063978 |
| }, |
| { |
| "epoch": 0.6159806295399516, |
| "grad_norm": 0.2183879017829895, |
| "learning_rate": 9.455522975413846e-06, |
| "loss": 0.2766571640968323, |
| "step": 159, |
| "token_acc": 0.9038809421418853 |
| }, |
| { |
| "epoch": 0.6198547215496368, |
| "grad_norm": 0.22651784121990204, |
| "learning_rate": 9.445676395906226e-06, |
| "loss": 0.29638129472732544, |
| "step": 160, |
| "token_acc": 0.8970113168662065 |
| }, |
| { |
| "epoch": 0.6237288135593221, |
| "grad_norm": 0.22088395059108734, |
| "learning_rate": 9.435746799196061e-06, |
| "loss": 0.3023075759410858, |
| "step": 161, |
| "token_acc": 0.8946665593674712 |
| }, |
| { |
| "epoch": 0.6276029055690072, |
| "grad_norm": 0.21526560187339783, |
| "learning_rate": 9.425734370705606e-06, |
| "loss": 0.28661438822746277, |
| "step": 162, |
| "token_acc": 0.9002787847728345 |
| }, |
| { |
| "epoch": 0.6314769975786925, |
| "grad_norm": 0.23334769904613495, |
| "learning_rate": 9.415639297403891e-06, |
| "loss": 0.31685301661491394, |
| "step": 163, |
| "token_acc": 0.890886748080584 |
| }, |
| { |
| "epoch": 0.6353510895883777, |
| "grad_norm": 0.200165793299675, |
| "learning_rate": 9.40546176780323e-06, |
| "loss": 0.30981898307800293, |
| "step": 164, |
| "token_acc": 0.8924871164982372 |
| }, |
| { |
| "epoch": 0.639225181598063, |
| "grad_norm": 0.20800836384296417, |
| "learning_rate": 9.395201971955701e-06, |
| "loss": 0.3162352740764618, |
| "step": 165, |
| "token_acc": 0.8910434805285766 |
| }, |
| { |
| "epoch": 0.6430992736077482, |
| "grad_norm": 0.20923736691474915, |
| "learning_rate": 9.384860101449598e-06, |
| "loss": 0.32208406925201416, |
| "step": 166, |
| "token_acc": 0.8880633815629819 |
| }, |
| { |
| "epoch": 0.6469733656174335, |
| "grad_norm": 0.1986808031797409, |
| "learning_rate": 9.374436349405847e-06, |
| "loss": 0.28397923707962036, |
| "step": 167, |
| "token_acc": 0.9012052212352475 |
| }, |
| { |
| "epoch": 0.6508474576271186, |
| "grad_norm": 0.21215273439884186, |
| "learning_rate": 9.36393091047441e-06, |
| "loss": 0.3066609799861908, |
| "step": 168, |
| "token_acc": 0.894593303584187 |
| }, |
| { |
| "epoch": 0.6547215496368038, |
| "grad_norm": 0.20804037153720856, |
| "learning_rate": 9.353343980830644e-06, |
| "loss": 0.3097017705440521, |
| "step": 169, |
| "token_acc": 0.8926308156125992 |
| }, |
| { |
| "epoch": 0.6585956416464891, |
| "grad_norm": 0.20328834652900696, |
| "learning_rate": 9.342675758171638e-06, |
| "loss": 0.3010105490684509, |
| "step": 170, |
| "token_acc": 0.8950560660129195 |
| }, |
| { |
| "epoch": 0.6624697336561743, |
| "grad_norm": 0.2051060050725937, |
| "learning_rate": 9.331926441712522e-06, |
| "loss": 0.3019353151321411, |
| "step": 171, |
| "token_acc": 0.8949745506999682 |
| }, |
| { |
| "epoch": 0.6663438256658596, |
| "grad_norm": 0.24043123424053192, |
| "learning_rate": 9.32109623218275e-06, |
| "loss": 0.3116442859172821, |
| "step": 172, |
| "token_acc": 0.8915558784861239 |
| }, |
| { |
| "epoch": 0.6702179176755448, |
| "grad_norm": 0.21520181000232697, |
| "learning_rate": 9.310185331822338e-06, |
| "loss": 0.31186142563819885, |
| "step": 173, |
| "token_acc": 0.8917585320277845 |
| }, |
| { |
| "epoch": 0.67409200968523, |
| "grad_norm": 0.21344298124313354, |
| "learning_rate": 9.299193944378112e-06, |
| "loss": 0.3273160755634308, |
| "step": 174, |
| "token_acc": 0.886418268420563 |
| }, |
| { |
| "epoch": 0.6779661016949152, |
| "grad_norm": 0.20224156975746155, |
| "learning_rate": 9.28812227509988e-06, |
| "loss": 0.31608837842941284, |
| "step": 175, |
| "token_acc": 0.8894536504933755 |
| }, |
| { |
| "epoch": 0.6818401937046005, |
| "grad_norm": 0.2154257595539093, |
| "learning_rate": 9.27697053073661e-06, |
| "loss": 0.34367692470550537, |
| "step": 176, |
| "token_acc": 0.8811017511710314 |
| }, |
| { |
| "epoch": 0.6857142857142857, |
| "grad_norm": 0.22003678977489471, |
| "learning_rate": 9.26573891953257e-06, |
| "loss": 0.3205263018608093, |
| "step": 177, |
| "token_acc": 0.8893210947921869 |
| }, |
| { |
| "epoch": 0.689588377723971, |
| "grad_norm": 0.21449677646160126, |
| "learning_rate": 9.254427651223434e-06, |
| "loss": 0.28666430711746216, |
| "step": 178, |
| "token_acc": 0.9003720788020833 |
| }, |
| { |
| "epoch": 0.6934624697336562, |
| "grad_norm": 0.22110596299171448, |
| "learning_rate": 9.243036937032373e-06, |
| "loss": 0.3156067728996277, |
| "step": 179, |
| "token_acc": 0.8902597783694092 |
| }, |
| { |
| "epoch": 0.6973365617433414, |
| "grad_norm": 0.19700580835342407, |
| "learning_rate": 9.2315669896661e-06, |
| "loss": 0.28897273540496826, |
| "step": 180, |
| "token_acc": 0.8994499889336349 |
| }, |
| { |
| "epoch": 0.7012106537530266, |
| "grad_norm": 0.21460606157779694, |
| "learning_rate": 9.220018023310908e-06, |
| "loss": 0.31268295645713806, |
| "step": 181, |
| "token_acc": 0.8918378520876847 |
| }, |
| { |
| "epoch": 0.7050847457627119, |
| "grad_norm": 0.21692436933517456, |
| "learning_rate": 9.208390253628667e-06, |
| "loss": 0.28844964504241943, |
| "step": 182, |
| "token_acc": 0.8997311485616448 |
| }, |
| { |
| "epoch": 0.7089588377723971, |
| "grad_norm": 0.201703280210495, |
| "learning_rate": 9.196683897752794e-06, |
| "loss": 0.32861441373825073, |
| "step": 183, |
| "token_acc": 0.8854774295445417 |
| }, |
| { |
| "epoch": 0.7128329297820823, |
| "grad_norm": 3.976747751235962, |
| "learning_rate": 9.184899174284201e-06, |
| "loss": 0.33475255966186523, |
| "step": 184, |
| "token_acc": 0.8836819705392365 |
| }, |
| { |
| "epoch": 0.7167070217917676, |
| "grad_norm": 0.24247053265571594, |
| "learning_rate": 9.173036303287215e-06, |
| "loss": 0.3366454243659973, |
| "step": 185, |
| "token_acc": 0.8833432089980459 |
| }, |
| { |
| "epoch": 0.7205811138014527, |
| "grad_norm": 0.2282845675945282, |
| "learning_rate": 9.16109550628546e-06, |
| "loss": 0.2812536656856537, |
| "step": 186, |
| "token_acc": 0.9027706860502607 |
| }, |
| { |
| "epoch": 0.724455205811138, |
| "grad_norm": 0.2282128632068634, |
| "learning_rate": 9.149077006257734e-06, |
| "loss": 0.3136906027793884, |
| "step": 187, |
| "token_acc": 0.8912536222754189 |
| }, |
| { |
| "epoch": 0.7283292978208232, |
| "grad_norm": 0.20751290023326874, |
| "learning_rate": 9.136981027633834e-06, |
| "loss": 0.29636135697364807, |
| "step": 188, |
| "token_acc": 0.8974463288547996 |
| }, |
| { |
| "epoch": 0.7322033898305085, |
| "grad_norm": 0.23192144930362701, |
| "learning_rate": 9.124807796290366e-06, |
| "loss": 0.3046882152557373, |
| "step": 189, |
| "token_acc": 0.8943812414560115 |
| }, |
| { |
| "epoch": 0.7360774818401937, |
| "grad_norm": 0.221333310008049, |
| "learning_rate": 9.112557539546535e-06, |
| "loss": 0.32960376143455505, |
| "step": 190, |
| "token_acc": 0.8860915000599271 |
| }, |
| { |
| "epoch": 0.739951573849879, |
| "grad_norm": 0.1981872171163559, |
| "learning_rate": 9.100230486159893e-06, |
| "loss": 0.32151421904563904, |
| "step": 191, |
| "token_acc": 0.888598638535205 |
| }, |
| { |
| "epoch": 0.7438256658595641, |
| "grad_norm": 0.2172573357820511, |
| "learning_rate": 9.087826866322065e-06, |
| "loss": 0.3255336880683899, |
| "step": 192, |
| "token_acc": 0.8864367509340579 |
| }, |
| { |
| "epoch": 0.7476997578692494, |
| "grad_norm": 0.21215571463108063, |
| "learning_rate": 9.075346911654456e-06, |
| "loss": 0.30505236983299255, |
| "step": 193, |
| "token_acc": 0.8936060377931436 |
| }, |
| { |
| "epoch": 0.7515738498789346, |
| "grad_norm": 0.21355277299880981, |
| "learning_rate": 9.062790855203932e-06, |
| "loss": 0.3349328637123108, |
| "step": 194, |
| "token_acc": 0.8847527625851099 |
| }, |
| { |
| "epoch": 0.7554479418886199, |
| "grad_norm": 0.20415301620960236, |
| "learning_rate": 9.050158931438451e-06, |
| "loss": 0.3010273873806, |
| "step": 195, |
| "token_acc": 0.8946901896914337 |
| }, |
| { |
| "epoch": 0.7593220338983051, |
| "grad_norm": 0.2100018560886383, |
| "learning_rate": 9.037451376242696e-06, |
| "loss": 0.3295148015022278, |
| "step": 196, |
| "token_acc": 0.8861214255925314 |
| }, |
| { |
| "epoch": 0.7631961259079904, |
| "grad_norm": 0.21248096227645874, |
| "learning_rate": 9.024668426913671e-06, |
| "loss": 0.2901475727558136, |
| "step": 197, |
| "token_acc": 0.8984891018269412 |
| }, |
| { |
| "epoch": 0.7670702179176755, |
| "grad_norm": 0.20735451579093933, |
| "learning_rate": 9.011810322156269e-06, |
| "loss": 0.3123668134212494, |
| "step": 198, |
| "token_acc": 0.8911118341790296 |
| }, |
| { |
| "epoch": 0.7709443099273607, |
| "grad_norm": 0.2119433879852295, |
| "learning_rate": 8.998877302078803e-06, |
| "loss": 0.30766892433166504, |
| "step": 199, |
| "token_acc": 0.8930650097673094 |
| }, |
| { |
| "epoch": 0.774818401937046, |
| "grad_norm": 0.20151817798614502, |
| "learning_rate": 8.985869608188545e-06, |
| "loss": 0.294528067111969, |
| "step": 200, |
| "token_acc": 0.8973507748438794 |
| }, |
| { |
| "epoch": 0.7786924939467312, |
| "grad_norm": 0.20979715883731842, |
| "learning_rate": 8.97278748338719e-06, |
| "loss": 0.3116077184677124, |
| "step": 201, |
| "token_acc": 0.8916578293780434 |
| }, |
| { |
| "epoch": 0.7825665859564165, |
| "grad_norm": 0.21114560961723328, |
| "learning_rate": 8.95963117196634e-06, |
| "loss": 0.31117022037506104, |
| "step": 202, |
| "token_acc": 0.8922739117136779 |
| }, |
| { |
| "epoch": 0.7864406779661017, |
| "grad_norm": 0.2028111070394516, |
| "learning_rate": 8.946400919602933e-06, |
| "loss": 0.2925041913986206, |
| "step": 203, |
| "token_acc": 0.8979599612123477 |
| }, |
| { |
| "epoch": 0.790314769975787, |
| "grad_norm": 0.19873376190662384, |
| "learning_rate": 8.933096973354665e-06, |
| "loss": 0.3335387706756592, |
| "step": 204, |
| "token_acc": 0.8845781124549695 |
| }, |
| { |
| "epoch": 0.7941888619854721, |
| "grad_norm": 0.20865830779075623, |
| "learning_rate": 8.919719581655357e-06, |
| "loss": 0.3048374652862549, |
| "step": 205, |
| "token_acc": 0.8941424666394205 |
| }, |
| { |
| "epoch": 0.7980629539951574, |
| "grad_norm": 0.21847450733184814, |
| "learning_rate": 8.906268994310339e-06, |
| "loss": 0.30148929357528687, |
| "step": 206, |
| "token_acc": 0.8948231645494126 |
| }, |
| { |
| "epoch": 0.8019370460048426, |
| "grad_norm": 0.23447921872138977, |
| "learning_rate": 8.892745462491763e-06, |
| "loss": 0.3076891005039215, |
| "step": 207, |
| "token_acc": 0.8940680143003497 |
| }, |
| { |
| "epoch": 0.8058111380145279, |
| "grad_norm": 0.2047218531370163, |
| "learning_rate": 8.879149238733932e-06, |
| "loss": 0.2903471291065216, |
| "step": 208, |
| "token_acc": 0.8996930000967329 |
| }, |
| { |
| "epoch": 0.8096852300242131, |
| "grad_norm": 0.3560882806777954, |
| "learning_rate": 8.865480576928578e-06, |
| "loss": 0.2734353840351105, |
| "step": 209, |
| "token_acc": 0.9038816908230364 |
| }, |
| { |
| "epoch": 0.8135593220338984, |
| "grad_norm": 0.22588837146759033, |
| "learning_rate": 8.851739732320109e-06, |
| "loss": 0.30820316076278687, |
| "step": 210, |
| "token_acc": 0.8928903081404425 |
| }, |
| { |
| "epoch": 0.8174334140435835, |
| "grad_norm": 0.19928814470767975, |
| "learning_rate": 8.83792696150086e-06, |
| "loss": 0.30705487728118896, |
| "step": 211, |
| "token_acc": 0.8931717351449738 |
| }, |
| { |
| "epoch": 0.8213075060532687, |
| "grad_norm": 0.23134565353393555, |
| "learning_rate": 8.824042522406295e-06, |
| "loss": 0.3144133687019348, |
| "step": 212, |
| "token_acc": 0.8904542748607169 |
| }, |
| { |
| "epoch": 0.825181598062954, |
| "grad_norm": 0.20952780544757843, |
| "learning_rate": 8.810086674310184e-06, |
| "loss": 0.3166520595550537, |
| "step": 213, |
| "token_acc": 0.8902617260259249 |
| }, |
| { |
| "epoch": 0.8290556900726392, |
| "grad_norm": 0.21133121848106384, |
| "learning_rate": 8.796059677819773e-06, |
| "loss": 0.31384018063545227, |
| "step": 214, |
| "token_acc": 0.8909493414116798 |
| }, |
| { |
| "epoch": 0.8329297820823245, |
| "grad_norm": 0.3206462264060974, |
| "learning_rate": 8.781961794870903e-06, |
| "loss": 0.30939990282058716, |
| "step": 215, |
| "token_acc": 0.8926290243396312 |
| }, |
| { |
| "epoch": 0.8368038740920097, |
| "grad_norm": 0.21380406618118286, |
| "learning_rate": 8.767793288723137e-06, |
| "loss": 0.3126541078090668, |
| "step": 216, |
| "token_acc": 0.8918149018414423 |
| }, |
| { |
| "epoch": 0.8406779661016949, |
| "grad_norm": 0.2241922914981842, |
| "learning_rate": 8.753554423954828e-06, |
| "loss": 0.32906076312065125, |
| "step": 217, |
| "token_acc": 0.8866828065863777 |
| }, |
| { |
| "epoch": 0.8445520581113801, |
| "grad_norm": 0.19776619970798492, |
| "learning_rate": 8.739245466458187e-06, |
| "loss": 0.28062158823013306, |
| "step": 218, |
| "token_acc": 0.9022684784065322 |
| }, |
| { |
| "epoch": 0.8484261501210654, |
| "grad_norm": 0.2141999900341034, |
| "learning_rate": 8.72486668343431e-06, |
| "loss": 0.3276277184486389, |
| "step": 219, |
| "token_acc": 0.8861141792995992 |
| }, |
| { |
| "epoch": 0.8523002421307506, |
| "grad_norm": 0.2332129180431366, |
| "learning_rate": 8.7104183433882e-06, |
| "loss": 0.3168509304523468, |
| "step": 220, |
| "token_acc": 0.8899989570826125 |
| }, |
| { |
| "epoch": 0.8561743341404359, |
| "grad_norm": 0.2141677886247635, |
| "learning_rate": 8.695900716123744e-06, |
| "loss": 0.3259914219379425, |
| "step": 221, |
| "token_acc": 0.8866733094194235 |
| }, |
| { |
| "epoch": 0.8600484261501211, |
| "grad_norm": 0.20929858088493347, |
| "learning_rate": 8.681314072738678e-06, |
| "loss": 0.2776751220226288, |
| "step": 222, |
| "token_acc": 0.9029569916163804 |
| }, |
| { |
| "epoch": 0.8639225181598063, |
| "grad_norm": 0.26802197098731995, |
| "learning_rate": 8.666658685619523e-06, |
| "loss": 0.3192378282546997, |
| "step": 223, |
| "token_acc": 0.8888524656782731 |
| }, |
| { |
| "epoch": 0.8677966101694915, |
| "grad_norm": 0.19303195178508759, |
| "learning_rate": 8.651934828436497e-06, |
| "loss": 0.2820873260498047, |
| "step": 224, |
| "token_acc": 0.9010663601046539 |
| }, |
| { |
| "epoch": 0.8716707021791767, |
| "grad_norm": 0.20784462988376617, |
| "learning_rate": 8.637142776138415e-06, |
| "loss": 0.2850268483161926, |
| "step": 225, |
| "token_acc": 0.9003609394726915 |
| }, |
| { |
| "epoch": 0.875544794188862, |
| "grad_norm": 0.2194257229566574, |
| "learning_rate": 8.622282804947537e-06, |
| "loss": 0.31484997272491455, |
| "step": 226, |
| "token_acc": 0.8909253202507496 |
| }, |
| { |
| "epoch": 0.8794188861985472, |
| "grad_norm": 0.21197804808616638, |
| "learning_rate": 8.607355192354425e-06, |
| "loss": 0.3072202801704407, |
| "step": 227, |
| "token_acc": 0.8929364556285221 |
| }, |
| { |
| "epoch": 0.8832929782082325, |
| "grad_norm": 0.19514977931976318, |
| "learning_rate": 8.592360217112759e-06, |
| "loss": 0.31343895196914673, |
| "step": 228, |
| "token_acc": 0.8909144611151198 |
| }, |
| { |
| "epoch": 0.8871670702179176, |
| "grad_norm": 0.2198445200920105, |
| "learning_rate": 8.57729815923412e-06, |
| "loss": 0.31176120042800903, |
| "step": 229, |
| "token_acc": 0.8916788161998124 |
| }, |
| { |
| "epoch": 0.8910411622276029, |
| "grad_norm": 0.20297633111476898, |
| "learning_rate": 8.562169299982776e-06, |
| "loss": 0.30840498208999634, |
| "step": 230, |
| "token_acc": 0.8921534903182912 |
| }, |
| { |
| "epoch": 0.8949152542372881, |
| "grad_norm": 0.21356205642223358, |
| "learning_rate": 8.546973921870421e-06, |
| "loss": 0.3210839629173279, |
| "step": 231, |
| "token_acc": 0.8882864775840541 |
| }, |
| { |
| "epoch": 0.8987893462469734, |
| "grad_norm": 0.21405935287475586, |
| "learning_rate": 8.531712308650904e-06, |
| "loss": 0.3006952702999115, |
| "step": 232, |
| "token_acc": 0.8953128142705267 |
| }, |
| { |
| "epoch": 0.9026634382566586, |
| "grad_norm": 0.21220295131206512, |
| "learning_rate": 8.516384745314926e-06, |
| "loss": 0.33272668719291687, |
| "step": 233, |
| "token_acc": 0.8845533899027282 |
| }, |
| { |
| "epoch": 0.9065375302663439, |
| "grad_norm": 0.19546008110046387, |
| "learning_rate": 8.50099151808472e-06, |
| "loss": 0.26581257581710815, |
| "step": 234, |
| "token_acc": 0.9067262813046539 |
| }, |
| { |
| "epoch": 0.910411622276029, |
| "grad_norm": 0.2057773917913437, |
| "learning_rate": 8.485532914408712e-06, |
| "loss": 0.2936754524707794, |
| "step": 235, |
| "token_acc": 0.8980145512690381 |
| }, |
| { |
| "epoch": 0.9142857142857143, |
| "grad_norm": 0.21968601644039154, |
| "learning_rate": 8.470009222956138e-06, |
| "loss": 0.2990136742591858, |
| "step": 236, |
| "token_acc": 0.8944779048351311 |
| }, |
| { |
| "epoch": 0.9181598062953995, |
| "grad_norm": 0.22149494290351868, |
| "learning_rate": 8.45442073361167e-06, |
| "loss": 0.29907599091529846, |
| "step": 237, |
| "token_acc": 0.8953804266415489 |
| }, |
| { |
| "epoch": 0.9220338983050848, |
| "grad_norm": 0.18807418644428253, |
| "learning_rate": 8.438767737469995e-06, |
| "loss": 0.2596169412136078, |
| "step": 238, |
| "token_acc": 0.9094668271985952 |
| }, |
| { |
| "epoch": 0.92590799031477, |
| "grad_norm": 0.2053857445716858, |
| "learning_rate": 8.42305052683038e-06, |
| "loss": 0.320443719625473, |
| "step": 239, |
| "token_acc": 0.8882472950063495 |
| }, |
| { |
| "epoch": 0.9297820823244553, |
| "grad_norm": 0.19474725425243378, |
| "learning_rate": 8.407269395191216e-06, |
| "loss": 0.29054853320121765, |
| "step": 240, |
| "token_acc": 0.8986681898213217 |
| }, |
| { |
| "epoch": 0.9336561743341404, |
| "grad_norm": 0.22415153682231903, |
| "learning_rate": 8.391424637244528e-06, |
| "loss": 0.29720863699913025, |
| "step": 241, |
| "token_acc": 0.8967865758573351 |
| }, |
| { |
| "epoch": 0.9375302663438256, |
| "grad_norm": 0.20295462012290955, |
| "learning_rate": 8.375516548870489e-06, |
| "loss": 0.3213497996330261, |
| "step": 242, |
| "token_acc": 0.8888211973402874 |
| }, |
| { |
| "epoch": 0.9414043583535109, |
| "grad_norm": 0.235239177942276, |
| "learning_rate": 8.359545427131876e-06, |
| "loss": 0.31140708923339844, |
| "step": 243, |
| "token_acc": 0.8917541696945803 |
| }, |
| { |
| "epoch": 0.9452784503631961, |
| "grad_norm": 0.21419954299926758, |
| "learning_rate": 8.343511570268541e-06, |
| "loss": 0.3142154812812805, |
| "step": 244, |
| "token_acc": 0.890589961402836 |
| }, |
| { |
| "epoch": 0.9491525423728814, |
| "grad_norm": 0.20498663187026978, |
| "learning_rate": 8.327415277691824e-06, |
| "loss": 0.3464815020561218, |
| "step": 245, |
| "token_acc": 0.8797665540392294 |
| }, |
| { |
| "epoch": 0.9530266343825666, |
| "grad_norm": 0.20611073076725006, |
| "learning_rate": 8.311256849978974e-06, |
| "loss": 0.31497207283973694, |
| "step": 246, |
| "token_acc": 0.889790752866034 |
| }, |
| { |
| "epoch": 0.9569007263922518, |
| "grad_norm": 0.21447882056236267, |
| "learning_rate": 8.295036588867533e-06, |
| "loss": 0.28588759899139404, |
| "step": 247, |
| "token_acc": 0.8993494375908707 |
| }, |
| { |
| "epoch": 0.960774818401937, |
| "grad_norm": 0.21430622041225433, |
| "learning_rate": 8.278754797249702e-06, |
| "loss": 0.3209206461906433, |
| "step": 248, |
| "token_acc": 0.8878057052632179 |
| }, |
| { |
| "epoch": 0.9646489104116223, |
| "grad_norm": 0.1971716433763504, |
| "learning_rate": 8.262411779166681e-06, |
| "loss": 0.29577910900115967, |
| "step": 249, |
| "token_acc": 0.8970768255184925 |
| }, |
| { |
| "epoch": 0.9685230024213075, |
| "grad_norm": 0.20728042721748352, |
| "learning_rate": 8.246007839802997e-06, |
| "loss": 0.3149109482765198, |
| "step": 250, |
| "token_acc": 0.8904120076852685 |
| }, |
| { |
| "epoch": 0.9723970944309928, |
| "grad_norm": 0.23157289624214172, |
| "learning_rate": 8.229543285480797e-06, |
| "loss": 0.3057391047477722, |
| "step": 251, |
| "token_acc": 0.8943966929583815 |
| }, |
| { |
| "epoch": 0.976271186440678, |
| "grad_norm": 0.21818409860134125, |
| "learning_rate": 8.213018423654144e-06, |
| "loss": 0.3090881109237671, |
| "step": 252, |
| "token_acc": 0.8931029437419457 |
| }, |
| { |
| "epoch": 0.9801452784503631, |
| "grad_norm": 0.20345434546470642, |
| "learning_rate": 8.196433562903252e-06, |
| "loss": 0.2966330051422119, |
| "step": 253, |
| "token_acc": 0.8959465166900704 |
| }, |
| { |
| "epoch": 0.9840193704600484, |
| "grad_norm": 0.203868567943573, |
| "learning_rate": 8.179789012928747e-06, |
| "loss": 0.2893424928188324, |
| "step": 254, |
| "token_acc": 0.8989887993032385 |
| }, |
| { |
| "epoch": 0.9878934624697336, |
| "grad_norm": 0.20835842192173004, |
| "learning_rate": 8.163085084545867e-06, |
| "loss": 0.29561957716941833, |
| "step": 255, |
| "token_acc": 0.897130295078995 |
| }, |
| { |
| "epoch": 0.9917675544794189, |
| "grad_norm": 0.2602974772453308, |
| "learning_rate": 8.146322089678668e-06, |
| "loss": 0.33309951424598694, |
| "step": 256, |
| "token_acc": 0.8842519179704944 |
| }, |
| { |
| "epoch": 0.9956416464891041, |
| "grad_norm": 0.1993730664253235, |
| "learning_rate": 8.129500341354192e-06, |
| "loss": 0.32513946294784546, |
| "step": 257, |
| "token_acc": 0.8869922494628838 |
| }, |
| { |
| "epoch": 0.9995157384987894, |
| "grad_norm": 0.2033330649137497, |
| "learning_rate": 8.11262015369663e-06, |
| "loss": 0.29512181878089905, |
| "step": 258, |
| "token_acc": 0.8968425014801387 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.6673643589019775, |
| "learning_rate": 8.095681841921441e-06, |
| "loss": 0.28728920221328735, |
| "step": 259, |
| "token_acc": 0.9003083713758805 |
| }, |
| { |
| "epoch": 1.0038740920096851, |
| "grad_norm": 0.32744893431663513, |
| "learning_rate": 8.07868572232949e-06, |
| "loss": 0.269972562789917, |
| "step": 260, |
| "token_acc": 0.9038492097273063 |
| }, |
| { |
| "epoch": 1.0077481840193705, |
| "grad_norm": 0.2596898376941681, |
| "learning_rate": 8.061632112301122e-06, |
| "loss": 0.2655790150165558, |
| "step": 261, |
| "token_acc": 0.9053338855906853 |
| }, |
| { |
| "epoch": 1.0116222760290556, |
| "grad_norm": 0.2612839639186859, |
| "learning_rate": 8.044521330290235e-06, |
| "loss": 0.2887282967567444, |
| "step": 262, |
| "token_acc": 0.8971828029711167 |
| }, |
| { |
| "epoch": 1.015496368038741, |
| "grad_norm": 0.2769652009010315, |
| "learning_rate": 8.027353695818345e-06, |
| "loss": 0.26126527786254883, |
| "step": 263, |
| "token_acc": 0.9065780969019781 |
| }, |
| { |
| "epoch": 1.0193704600484261, |
| "grad_norm": 0.27929142117500305, |
| "learning_rate": 8.010129529468614e-06, |
| "loss": 0.27868735790252686, |
| "step": 264, |
| "token_acc": 0.9001419249114798 |
| }, |
| { |
| "epoch": 1.0232445520581113, |
| "grad_norm": 0.23997750878334045, |
| "learning_rate": 7.992849152879857e-06, |
| "loss": 0.2831759750843048, |
| "step": 265, |
| "token_acc": 0.899304001670737 |
| }, |
| { |
| "epoch": 1.0271186440677966, |
| "grad_norm": 0.25313815474510193, |
| "learning_rate": 7.97551288874055e-06, |
| "loss": 0.27934202551841736, |
| "step": 266, |
| "token_acc": 0.9004498805562496 |
| }, |
| { |
| "epoch": 1.0309927360774818, |
| "grad_norm": 0.23287494480609894, |
| "learning_rate": 7.95812106078279e-06, |
| "loss": 0.26112881302833557, |
| "step": 267, |
| "token_acc": 0.9065508038300509 |
| }, |
| { |
| "epoch": 1.0348668280871671, |
| "grad_norm": 0.22660091519355774, |
| "learning_rate": 7.940673993776258e-06, |
| "loss": 0.2504875063896179, |
| "step": 268, |
| "token_acc": 0.9097140867981872 |
| }, |
| { |
| "epoch": 1.0387409200968523, |
| "grad_norm": 0.2266615480184555, |
| "learning_rate": 7.923172013522153e-06, |
| "loss": 0.25760790705680847, |
| "step": 269, |
| "token_acc": 0.9073963735109954 |
| }, |
| { |
| "epoch": 1.0426150121065376, |
| "grad_norm": 0.22593924403190613, |
| "learning_rate": 7.905615446847107e-06, |
| "loss": 0.28686419129371643, |
| "step": 270, |
| "token_acc": 0.8976161305002275 |
| }, |
| { |
| "epoch": 1.0464891041162228, |
| "grad_norm": 0.2425071895122528, |
| "learning_rate": 7.888004621597079e-06, |
| "loss": 0.2573948800563812, |
| "step": 271, |
| "token_acc": 0.907380557815819 |
| }, |
| { |
| "epoch": 1.050363196125908, |
| "grad_norm": 0.23996935784816742, |
| "learning_rate": 7.87033986663124e-06, |
| "loss": 0.2808932065963745, |
| "step": 272, |
| "token_acc": 0.8994914728045711 |
| }, |
| { |
| "epoch": 1.0542372881355933, |
| "grad_norm": 0.25931164622306824, |
| "learning_rate": 7.852621511815825e-06, |
| "loss": 0.26375657320022583, |
| "step": 273, |
| "token_acc": 0.9051297163863579 |
| }, |
| { |
| "epoch": 1.0581113801452784, |
| "grad_norm": 0.20594951510429382, |
| "learning_rate": 7.834849888017979e-06, |
| "loss": 0.23789554834365845, |
| "step": 274, |
| "token_acc": 0.9142479611743739 |
| }, |
| { |
| "epoch": 1.0619854721549637, |
| "grad_norm": 0.23315519094467163, |
| "learning_rate": 7.817025327099574e-06, |
| "loss": 0.24684631824493408, |
| "step": 275, |
| "token_acc": 0.9110874200426439 |
| }, |
| { |
| "epoch": 1.0658595641646489, |
| "grad_norm": 0.2189839482307434, |
| "learning_rate": 7.799148161911013e-06, |
| "loss": 0.2684437334537506, |
| "step": 276, |
| "token_acc": 0.9041172254519392 |
| }, |
| { |
| "epoch": 1.0697336561743342, |
| "grad_norm": 0.21298226714134216, |
| "learning_rate": 7.781218726285014e-06, |
| "loss": 0.2720562815666199, |
| "step": 277, |
| "token_acc": 0.9027445373018297 |
| }, |
| { |
| "epoch": 1.0736077481840194, |
| "grad_norm": 0.21282611787319183, |
| "learning_rate": 7.763237355030384e-06, |
| "loss": 0.2579670548439026, |
| "step": 278, |
| "token_acc": 0.9080073119376767 |
| }, |
| { |
| "epoch": 1.0774818401937045, |
| "grad_norm": 0.21488887071609497, |
| "learning_rate": 7.745204383925753e-06, |
| "loss": 0.2742394804954529, |
| "step": 279, |
| "token_acc": 0.9015262545209174 |
| }, |
| { |
| "epoch": 1.0813559322033899, |
| "grad_norm": 0.19826629757881165, |
| "learning_rate": 7.727120149713313e-06, |
| "loss": 0.23731666803359985, |
| "step": 280, |
| "token_acc": 0.9146603883445988 |
| }, |
| { |
| "epoch": 1.085230024213075, |
| "grad_norm": 0.20840346813201904, |
| "learning_rate": 7.708984990092528e-06, |
| "loss": 0.22673961520195007, |
| "step": 281, |
| "token_acc": 0.9184409845576723 |
| }, |
| { |
| "epoch": 1.0891041162227604, |
| "grad_norm": 0.21199366450309753, |
| "learning_rate": 7.690799243713825e-06, |
| "loss": 0.2788952887058258, |
| "step": 282, |
| "token_acc": 0.9002122640890617 |
| }, |
| { |
| "epoch": 1.0929782082324455, |
| "grad_norm": 0.23963455855846405, |
| "learning_rate": 7.672563250172278e-06, |
| "loss": 0.2703215479850769, |
| "step": 283, |
| "token_acc": 0.902904561306835 |
| }, |
| { |
| "epoch": 1.0968523002421307, |
| "grad_norm": 0.20739565789699554, |
| "learning_rate": 7.654277350001255e-06, |
| "loss": 0.2556743621826172, |
| "step": 284, |
| "token_acc": 0.9087778504769448 |
| }, |
| { |
| "epoch": 1.100726392251816, |
| "grad_norm": 0.3205340504646301, |
| "learning_rate": 7.635941884666072e-06, |
| "loss": 0.2660865783691406, |
| "step": 285, |
| "token_acc": 0.9052546447746934 |
| }, |
| { |
| "epoch": 1.1046004842615011, |
| "grad_norm": 0.20611628890037537, |
| "learning_rate": 7.617557196557601e-06, |
| "loss": 0.2590142488479614, |
| "step": 286, |
| "token_acc": 0.9070821077566713 |
| }, |
| { |
| "epoch": 1.1084745762711865, |
| "grad_norm": 0.1932753622531891, |
| "learning_rate": 7.599123628985894e-06, |
| "loss": 0.2396095246076584, |
| "step": 287, |
| "token_acc": 0.9135842317299648 |
| }, |
| { |
| "epoch": 1.1123486682808716, |
| "grad_norm": 0.21151748299598694, |
| "learning_rate": 7.580641526173758e-06, |
| "loss": 0.2544936537742615, |
| "step": 288, |
| "token_acc": 0.9088854539111634 |
| }, |
| { |
| "epoch": 1.116222760290557, |
| "grad_norm": 0.1992950737476349, |
| "learning_rate": 7.5621112332503325e-06, |
| "loss": 0.2544850707054138, |
| "step": 289, |
| "token_acc": 0.9090426161294457 |
| }, |
| { |
| "epoch": 1.1200968523002421, |
| "grad_norm": 0.20908565819263458, |
| "learning_rate": 7.543533096244644e-06, |
| "loss": 0.2762412428855896, |
| "step": 290, |
| "token_acc": 0.9013541447063986 |
| }, |
| { |
| "epoch": 1.1239709443099273, |
| "grad_norm": 0.2157965451478958, |
| "learning_rate": 7.524907462079149e-06, |
| "loss": 0.25533056259155273, |
| "step": 291, |
| "token_acc": 0.9080176353704462 |
| }, |
| { |
| "epoch": 1.1278450363196126, |
| "grad_norm": 0.19141145050525665, |
| "learning_rate": 7.506234678563248e-06, |
| "loss": 0.2362717241048813, |
| "step": 292, |
| "token_acc": 0.9155038610363999 |
| }, |
| { |
| "epoch": 1.1317191283292978, |
| "grad_norm": 0.21533732116222382, |
| "learning_rate": 7.487515094386792e-06, |
| "loss": 0.23099368810653687, |
| "step": 293, |
| "token_acc": 0.9173202498403009 |
| }, |
| { |
| "epoch": 1.1355932203389831, |
| "grad_norm": 0.20129309594631195, |
| "learning_rate": 7.468749059113578e-06, |
| "loss": 0.26144838333129883, |
| "step": 294, |
| "token_acc": 0.9057641431815713 |
| }, |
| { |
| "epoch": 1.1394673123486683, |
| "grad_norm": 0.3953739404678345, |
| "learning_rate": 7.449936923174813e-06, |
| "loss": 0.2557257413864136, |
| "step": 295, |
| "token_acc": 0.9087617787160037 |
| }, |
| { |
| "epoch": 1.1433414043583534, |
| "grad_norm": 0.21214410662651062, |
| "learning_rate": 7.431079037862575e-06, |
| "loss": 0.27983057498931885, |
| "step": 296, |
| "token_acc": 0.8996573827559394 |
| }, |
| { |
| "epoch": 1.1472154963680388, |
| "grad_norm": 0.20280665159225464, |
| "learning_rate": 7.412175755323254e-06, |
| "loss": 0.2772400677204132, |
| "step": 297, |
| "token_acc": 0.9010093723967251 |
| }, |
| { |
| "epoch": 1.151089588377724, |
| "grad_norm": 0.21776501834392548, |
| "learning_rate": 7.39322742855097e-06, |
| "loss": 0.24517808854579926, |
| "step": 298, |
| "token_acc": 0.9120538077359621 |
| }, |
| { |
| "epoch": 1.1549636803874093, |
| "grad_norm": 0.21630938351154327, |
| "learning_rate": 7.374234411380987e-06, |
| "loss": 0.2736694812774658, |
| "step": 299, |
| "token_acc": 0.9020631116999458 |
| }, |
| { |
| "epoch": 1.1588377723970944, |
| "grad_norm": 0.19338402152061462, |
| "learning_rate": 7.355197058483103e-06, |
| "loss": 0.24092288315296173, |
| "step": 300, |
| "token_acc": 0.9133508019967492 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 777, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 50, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.1233780174946304e+16, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|