{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 50.0, "global_step": 717, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0013953082759222116, "grad_norm": 2.108574390411377, "learning_rate": 2.7777777777777774e-08, "loss": 0.5175914764404297, "step": 1, "token_acc": 0.9343839541547277 }, { "epoch": 0.006976541379611058, "grad_norm": 2.2113378047943115, "learning_rate": 1.3888888888888888e-07, "loss": 0.5278360247612, "step": 5, "token_acc": 0.9315846730327572 }, { "epoch": 0.013953082759222116, "grad_norm": 2.659011125564575, "learning_rate": 2.7777777777777776e-07, "loss": 0.5554334640502929, "step": 10, "token_acc": 0.9270202547504698 }, { "epoch": 0.020929624138833175, "grad_norm": 2.251737117767334, "learning_rate": 4.1666666666666667e-07, "loss": 0.5544517517089844, "step": 15, "token_acc": 0.9273873055524015 }, { "epoch": 0.027906165518444232, "grad_norm": 1.9254176616668701, "learning_rate": 5.555555555555555e-07, "loss": 0.5853276729583741, "step": 20, "token_acc": 0.920275952157407 }, { "epoch": 0.03488270689805529, "grad_norm": 2.059199333190918, "learning_rate": 6.944444444444444e-07, "loss": 0.5623232841491699, "step": 25, "token_acc": 0.9284975165562914 }, { "epoch": 0.04185924827766635, "grad_norm": 2.425384998321533, "learning_rate": 8.333333333333333e-07, "loss": 0.5716644763946533, "step": 30, "token_acc": 0.9264636757716613 }, { "epoch": 0.048835789657277404, "grad_norm": 1.9226746559143066, "learning_rate": 9.722222222222222e-07, "loss": 0.5484982490539551, "step": 35, "token_acc": 0.9298897411313519 }, { "epoch": 0.055812331036888464, "grad_norm": 2.045583724975586, "learning_rate": 9.999148757713664e-07, "loss": 0.5991110324859619, "step": 40, "token_acc": 0.9234525698937859 }, { "epoch": 0.06278887241649952, "grad_norm": 1.9219040870666504, "learning_rate": 9.995691082675907e-07, "loss": 0.5559669494628906, "step": 45, "token_acc": 0.9266224757206721 }, { "epoch": 0.06976541379611058, "grad_norm": 1.9935494661331177, "learning_rate": 9.98957561037365e-07, "loss": 0.5474924564361572, "step": 50, "token_acc": 0.92885522721629 }, { "epoch": 0.07674195517572163, "grad_norm": 2.460942506790161, "learning_rate": 9.980805594347849e-07, "loss": 0.5159276008605957, "step": 55, "token_acc": 0.9340729405763836 }, { "epoch": 0.0837184965553327, "grad_norm": 2.300776481628418, "learning_rate": 9.969385700404345e-07, "loss": 0.5166152000427247, "step": 60, "token_acc": 0.9320153815815421 }, { "epoch": 0.09069503793494375, "grad_norm": 2.573843240737915, "learning_rate": 9.955322004131553e-07, "loss": 0.5368542194366455, "step": 65, "token_acc": 0.9301501135545339 }, { "epoch": 0.09767157931455481, "grad_norm": 2.0781192779541016, "learning_rate": 9.93862198766815e-07, "loss": 0.5094423294067383, "step": 70, "token_acc": 0.9307623850489469 }, { "epoch": 0.10464812069416586, "grad_norm": 2.0247654914855957, "learning_rate": 9.91929453572245e-07, "loss": 0.47563705444335935, "step": 75, "token_acc": 0.9330075229257042 }, { "epoch": 0.11162466207377693, "grad_norm": 2.036161422729492, "learning_rate": 9.897349930845566e-07, "loss": 0.5021390914916992, "step": 80, "token_acc": 0.9329839883551674 }, { "epoch": 0.11860120345338798, "grad_norm": 2.1073312759399414, "learning_rate": 9.872799847960918e-07, "loss": 0.501053762435913, "step": 85, "token_acc": 0.9329673985362608 }, { "epoch": 0.12557774483299905, "grad_norm": 1.834761619567871, "learning_rate": 9.845657348152955e-07, "loss": 0.4600623607635498, "step": 90, "token_acc": 0.936784676510704 }, { "epoch": 0.1325542862126101, "grad_norm": 1.7184985876083374, "learning_rate": 9.81593687171844e-07, "loss": 0.5222196578979492, "step": 95, "token_acc": 0.9302824071593162 }, { "epoch": 0.13953082759222116, "grad_norm": 1.7423793077468872, "learning_rate": 9.783654230483934e-07, "loss": 0.4808220863342285, "step": 100, "token_acc": 0.9323681279740812 }, { "epoch": 0.14650736897183222, "grad_norm": 1.5745614767074585, "learning_rate": 9.748826599393632e-07, "loss": 0.44776349067687987, "step": 105, "token_acc": 0.9381572216222361 }, { "epoch": 0.15348391035144326, "grad_norm": 1.491922378540039, "learning_rate": 9.711472507371982e-07, "loss": 0.44771714210510255, "step": 110, "token_acc": 0.9347117653668952 }, { "epoch": 0.16046045173105433, "grad_norm": 1.5303000211715698, "learning_rate": 9.671611827465971e-07, "loss": 0.42823081016540526, "step": 115, "token_acc": 0.9424545917501274 }, { "epoch": 0.1674369931106654, "grad_norm": 1.52151620388031, "learning_rate": 9.629265766272291e-07, "loss": 0.4314168930053711, "step": 120, "token_acc": 0.9379815310638763 }, { "epoch": 0.17441353449027644, "grad_norm": 1.496336579322815, "learning_rate": 9.58445685265507e-07, "loss": 0.44430341720581057, "step": 125, "token_acc": 0.938302298442336 }, { "epoch": 0.1813900758698875, "grad_norm": 1.4200767278671265, "learning_rate": 9.537208925760093e-07, "loss": 0.4397609710693359, "step": 130, "token_acc": 0.9418849948962232 }, { "epoch": 0.18836661724949857, "grad_norm": 1.419700264930725, "learning_rate": 9.487547122331964e-07, "loss": 0.5382704734802246, "step": 135, "token_acc": 0.9312300174941184 }, { "epoch": 0.19534315862910961, "grad_norm": 1.3994510173797607, "learning_rate": 9.435497863340896e-07, "loss": 0.41959681510925295, "step": 140, "token_acc": 0.9434397845325125 }, { "epoch": 0.20231970000872068, "grad_norm": 1.2424718141555786, "learning_rate": 9.381088839926292e-07, "loss": 0.39974849224090575, "step": 145, "token_acc": 0.9442570675170172 }, { "epoch": 0.20929624138833172, "grad_norm": 1.1396198272705078, "learning_rate": 9.324348998664548e-07, "loss": 0.4095014572143555, "step": 150, "token_acc": 0.9402837198829093 }, { "epoch": 0.2162727827679428, "grad_norm": 1.1727170944213867, "learning_rate": 9.265308526168971e-07, "loss": 0.4158812999725342, "step": 155, "token_acc": 0.945169557184576 }, { "epoch": 0.22324932414755386, "grad_norm": 1.2740483283996582, "learning_rate": 9.203998833029945e-07, "loss": 0.40761551856994627, "step": 160, "token_acc": 0.941811175337187 }, { "epoch": 0.2302258655271649, "grad_norm": 1.2118650674819946, "learning_rate": 9.140452537103941e-07, "loss": 0.40488572120666505, "step": 165, "token_acc": 0.939998011533108 }, { "epoch": 0.23720240690677596, "grad_norm": 1.2679206132888794, "learning_rate": 9.074703446160232e-07, "loss": 0.38835389614105226, "step": 170, "token_acc": 0.9452747758105312 }, { "epoch": 0.24417894828638703, "grad_norm": 1.4239633083343506, "learning_rate": 9.006786539894554e-07, "loss": 0.434948205947876, "step": 175, "token_acc": 0.9388818618770554 }, { "epoch": 0.2511554896659981, "grad_norm": 1.4500908851623535, "learning_rate": 8.936737951319275e-07, "loss": 0.47136545181274414, "step": 180, "token_acc": 0.9320933879257776 }, { "epoch": 0.2581320310456091, "grad_norm": 1.1485167741775513, "learning_rate": 8.864594947539992e-07, "loss": 0.4100066661834717, "step": 185, "token_acc": 0.9440283102329696 }, { "epoch": 0.2651085724252202, "grad_norm": 1.468805193901062, "learning_rate": 8.790395909928753e-07, "loss": 0.39954936504364014, "step": 190, "token_acc": 0.9425916365513681 }, { "epoch": 0.27208511380483125, "grad_norm": 1.2177455425262451, "learning_rate": 8.714180313704489e-07, "loss": 0.3338632583618164, "step": 195, "token_acc": 0.9483336172145574 }, { "epoch": 0.2790616551844423, "grad_norm": 1.2997610569000244, "learning_rate": 8.635988706931486e-07, "loss": 0.38302700519561766, "step": 200, "token_acc": 0.9413486825782762 }, { "epoch": 0.2860381965640534, "grad_norm": 1.135811686515808, "learning_rate": 8.555862688947075e-07, "loss": 0.33866784572601316, "step": 205, "token_acc": 0.9479243990178724 }, { "epoch": 0.29301473794366445, "grad_norm": 1.3217355012893677, "learning_rate": 8.473844888230064e-07, "loss": 0.35600202083587645, "step": 210, "token_acc": 0.9461965574680733 }, { "epoch": 0.29999127932327546, "grad_norm": 1.2507730722427368, "learning_rate": 8.389978939721598e-07, "loss": 0.352951717376709, "step": 215, "token_acc": 0.9438299509473886 }, { "epoch": 0.30696782070288653, "grad_norm": 1.3820980787277222, "learning_rate": 8.304309461610601e-07, "loss": 0.3622483253479004, "step": 220, "token_acc": 0.9437283872995913 }, { "epoch": 0.3139443620824976, "grad_norm": 1.394065499305725, "learning_rate": 8.216882031596096e-07, "loss": 0.3512030363082886, "step": 225, "token_acc": 0.9435979832677711 }, { "epoch": 0.32092090346210866, "grad_norm": 1.2014678716659546, "learning_rate": 8.127743162639051e-07, "loss": 0.3101860523223877, "step": 230, "token_acc": 0.9467404378157325 }, { "epoch": 0.32789744484171973, "grad_norm": 1.3220340013504028, "learning_rate": 8.036940278216646e-07, "loss": 0.28164148330688477, "step": 235, "token_acc": 0.9506907137375288 }, { "epoch": 0.3348739862213308, "grad_norm": 1.5838576555252075, "learning_rate": 7.944521687092142e-07, "loss": 0.2631302118301392, "step": 240, "token_acc": 0.9514257294429708 }, { "epoch": 0.3418505276009418, "grad_norm": 1.5820879936218262, "learning_rate": 7.850536557613748e-07, "loss": 0.3039613962173462, "step": 245, "token_acc": 0.9476980693484858 }, { "epoch": 0.3488270689805529, "grad_norm": 1.4841257333755493, "learning_rate": 7.755034891556167e-07, "loss": 0.28357877731323244, "step": 250, "token_acc": 0.9478200246688475 }, { "epoch": 0.35580361036016395, "grad_norm": 1.2519296407699585, "learning_rate": 7.658067497518772e-07, "loss": 0.3490274429321289, "step": 255, "token_acc": 0.9376414667270258 }, { "epoch": 0.362780151739775, "grad_norm": 1.340489149093628, "learning_rate": 7.559685963894513e-07, "loss": 0.32015056610107423, "step": 260, "token_acc": 0.9416806521217933 }, { "epoch": 0.3697566931193861, "grad_norm": 1.3753681182861328, "learning_rate": 7.459942631423962e-07, "loss": 0.27373878955841063, "step": 265, "token_acc": 0.9539526646272742 }, { "epoch": 0.37673323449899715, "grad_norm": 1.1863863468170166, "learning_rate": 7.358890565349105e-07, "loss": 0.29328436851501466, "step": 270, "token_acc": 0.9487552700260992 }, { "epoch": 0.38370977587860816, "grad_norm": 1.490551233291626, "learning_rate": 7.256583527181683e-07, "loss": 0.33202688694000243, "step": 275, "token_acc": 0.9404879571346824 }, { "epoch": 0.39068631725821923, "grad_norm": 1.3143407106399536, "learning_rate": 7.153075946101097e-07, "loss": 0.278816294670105, "step": 280, "token_acc": 0.9493239404613112 }, { "epoch": 0.3976628586378303, "grad_norm": 1.0962167978286743, "learning_rate": 7.048422889997115e-07, "loss": 0.23789422512054442, "step": 285, "token_acc": 0.9503463643850215 }, { "epoch": 0.40463940001744136, "grad_norm": 0.824631929397583, "learning_rate": 6.942680036172762e-07, "loss": 0.24912948608398439, "step": 290, "token_acc": 0.9536533677324243 }, { "epoch": 0.41161594139705243, "grad_norm": 1.3604934215545654, "learning_rate": 6.835903641722999e-07, "loss": 0.3469517946243286, "step": 295, "token_acc": 0.9420104361524072 }, { "epoch": 0.41859248277666344, "grad_norm": 0.8911880850791931, "learning_rate": 6.72815051360494e-07, "loss": 0.24576101303100586, "step": 300, "token_acc": 0.953768733064204 }, { "epoch": 0.4255690241562745, "grad_norm": 1.0799229145050049, "learning_rate": 6.619477978415531e-07, "loss": 0.2791733980178833, "step": 305, "token_acc": 0.9415368904774062 }, { "epoch": 0.4325455655358856, "grad_norm": 0.872166633605957, "learning_rate": 6.509943851892766e-07, "loss": 0.3617237567901611, "step": 310, "token_acc": 0.9406946604458268 }, { "epoch": 0.43952210691549665, "grad_norm": 0.9697985649108887, "learning_rate": 6.399606408156687e-07, "loss": 0.22297954559326172, "step": 315, "token_acc": 0.954813046937152 }, { "epoch": 0.4464986482951077, "grad_norm": 0.8073396682739258, "learning_rate": 6.288524348706502e-07, "loss": 0.20998930931091309, "step": 320, "token_acc": 0.9557532836995339 }, { "epoch": 0.4534751896747188, "grad_norm": 0.9216898679733276, "learning_rate": 6.176756771190337e-07, "loss": 0.2161928176879883, "step": 325, "token_acc": 0.9520381208887968 }, { "epoch": 0.4604517310543298, "grad_norm": 0.9290446639060974, "learning_rate": 6.064363137964225e-07, "loss": 0.24029843807220458, "step": 330, "token_acc": 0.9494813278008298 }, { "epoch": 0.46742827243394086, "grad_norm": 0.9209424257278442, "learning_rate": 5.95140324445706e-07, "loss": 0.21532030105590821, "step": 335, "token_acc": 0.9515148253780337 }, { "epoch": 0.47440481381355193, "grad_norm": 0.7281814217567444, "learning_rate": 5.83793718735837e-07, "loss": 0.24763097763061523, "step": 340, "token_acc": 0.9499685006299874 }, { "epoch": 0.481381355193163, "grad_norm": 0.6570573449134827, "learning_rate": 5.724025332645793e-07, "loss": 0.19987608194351197, "step": 345, "token_acc": 0.9564738292011019 }, { "epoch": 0.48835789657277406, "grad_norm": 0.7470307946205139, "learning_rate": 5.609728283469288e-07, "loss": 0.1938636064529419, "step": 350, "token_acc": 0.9593920408400046 }, { "epoch": 0.49533443795238513, "grad_norm": 0.5753098130226135, "learning_rate": 5.495106847909182e-07, "loss": 0.2106870651245117, "step": 355, "token_acc": 0.9575553464414839 }, { "epoch": 0.5023109793319962, "grad_norm": 0.5757151246070862, "learning_rate": 5.380222006625179e-07, "loss": 0.18208713531494142, "step": 360, "token_acc": 0.963362694802052 }, { "epoch": 0.5092875207116072, "grad_norm": 0.5811958312988281, "learning_rate": 5.265134880413548e-07, "loss": 0.1780398368835449, "step": 365, "token_acc": 0.9639311886076607 }, { "epoch": 0.5162640620912182, "grad_norm": 0.5238430500030518, "learning_rate": 5.149906697689767e-07, "loss": 0.2431933879852295, "step": 370, "token_acc": 0.9474901594773364 }, { "epoch": 0.5232406034708293, "grad_norm": 0.7463929057121277, "learning_rate": 5.034598761913916e-07, "loss": 0.2559064865112305, "step": 375, "token_acc": 0.9454068781164859 }, { "epoch": 0.5302171448504404, "grad_norm": 0.5051546692848206, "learning_rate": 4.919272418976123e-07, "loss": 0.20950682163238527, "step": 380, "token_acc": 0.9548458149779736 }, { "epoch": 0.5371936862300515, "grad_norm": 0.47918471693992615, "learning_rate": 4.803989024559459e-07, "loss": 0.18409876823425292, "step": 385, "token_acc": 0.9592592592592593 }, { "epoch": 0.5441702276096625, "grad_norm": 0.48506343364715576, "learning_rate": 4.688809911497609e-07, "loss": 0.19301035404205322, "step": 390, "token_acc": 0.9593621399176955 }, { "epoch": 0.5511467689892736, "grad_norm": 0.6255201697349548, "learning_rate": 4.57379635714471e-07, "loss": 0.1948167562484741, "step": 395, "token_acc": 0.9559854371569853 }, { "epoch": 0.5581233103688846, "grad_norm": 0.6040950417518616, "learning_rate": 4.459009550774692e-07, "loss": 0.15679298639297484, "step": 400, "token_acc": 0.9631829798991504 }, { "epoch": 0.5650998517484956, "grad_norm": 0.6469866037368774, "learning_rate": 4.344510561027498e-07, "loss": 0.2119133472442627, "step": 405, "token_acc": 0.9538207806487081 }, { "epoch": 0.5720763931281068, "grad_norm": 0.6210054755210876, "learning_rate": 4.230360303419453e-07, "loss": 0.17766163349151612, "step": 410, "token_acc": 0.9606851549755302 }, { "epoch": 0.5790529345077178, "grad_norm": 0.4716809391975403, "learning_rate": 4.116619507935144e-07, "loss": 0.18397997617721557, "step": 415, "token_acc": 0.9560470014410819 }, { "epoch": 0.5860294758873289, "grad_norm": 0.5452926754951477, "learning_rate": 4.003348686717949e-07, "loss": 0.2028341293334961, "step": 420, "token_acc": 0.9551058385671086 }, { "epoch": 0.5930060172669399, "grad_norm": 0.5029204487800598, "learning_rate": 3.890608101876517e-07, "loss": 0.16716669797897338, "step": 425, "token_acc": 0.9617021276595744 }, { "epoch": 0.5999825586465509, "grad_norm": 0.5014962553977966, "learning_rate": 3.7784577334242273e-07, "loss": 0.18506402969360353, "step": 430, "token_acc": 0.9586908319676082 }, { "epoch": 0.606959100026162, "grad_norm": 0.5529438257217407, "learning_rate": 3.666957247368757e-07, "loss": 0.1777629852294922, "step": 435, "token_acc": 0.9608512874408828 }, { "epoch": 0.6139356414057731, "grad_norm": 0.4262159466743469, "learning_rate": 3.556165963968691e-07, "loss": 0.14577605724334716, "step": 440, "token_acc": 0.962014556659406 }, { "epoch": 0.6209121827853842, "grad_norm": 0.5142691135406494, "learning_rate": 3.4461428261740754e-07, "loss": 0.20166921615600586, "step": 445, "token_acc": 0.9590035201733008 }, { "epoch": 0.6278887241649952, "grad_norm": 0.5572395324707031, "learning_rate": 3.3369463682677234e-07, "loss": 0.20577445030212402, "step": 450, "token_acc": 0.9554721339878718 }, { "epoch": 0.6348652655446063, "grad_norm": 0.496382474899292, "learning_rate": 3.2286346847239123e-07, "loss": 0.14863760471343995, "step": 455, "token_acc": 0.9620308092861792 }, { "epoch": 0.6418418069242173, "grad_norm": 0.4650530517101288, "learning_rate": 3.1212653993010954e-07, "loss": 0.17070106267929078, "step": 460, "token_acc": 0.9571316789626649 }, { "epoch": 0.6488183483038283, "grad_norm": 0.4173397421836853, "learning_rate": 3.014895634385014e-07, "loss": 0.1784367799758911, "step": 465, "token_acc": 0.9574702782203701 }, { "epoch": 0.6557948896834395, "grad_norm": 0.50703364610672, "learning_rate": 2.9095819805985795e-07, "loss": 0.18249971866607667, "step": 470, "token_acc": 0.9553955949304787 }, { "epoch": 0.6627714310630505, "grad_norm": 0.4510941505432129, "learning_rate": 2.8053804666946287e-07, "loss": 0.17186166048049928, "step": 475, "token_acc": 0.9583355252775001 }, { "epoch": 0.6697479724426616, "grad_norm": 0.498515248298645, "learning_rate": 2.7023465297476424e-07, "loss": 0.23503575325012208, "step": 480, "token_acc": 0.9480993056596233 }, { "epoch": 0.6767245138222726, "grad_norm": 0.5071078538894653, "learning_rate": 2.6005349856602123e-07, "loss": 0.23459105491638182, "step": 485, "token_acc": 0.9456293181135476 }, { "epoch": 0.6837010552018836, "grad_norm": 0.5977618098258972, "learning_rate": 2.500000000000001e-07, "loss": 0.1519307851791382, "step": 490, "token_acc": 0.9616792137181096 }, { "epoch": 0.6906775965814947, "grad_norm": 0.49618807435035706, "learning_rate": 2.4007950591826913e-07, "loss": 0.21449580192565917, "step": 495, "token_acc": 0.9518640628962719 }, { "epoch": 0.6976541379611058, "grad_norm": 0.5371702313423157, "learning_rate": 2.3029729420162587e-07, "loss": 0.15500261783599853, "step": 500, "token_acc": 0.9591128732499071 }, { "epoch": 0.7046306793407169, "grad_norm": 0.5795394778251648, "learning_rate": 2.2065856916216786e-07, "loss": 0.16497514247894288, "step": 505, "token_acc": 0.9617788774580024 }, { "epoch": 0.7116072207203279, "grad_norm": 0.5824469327926636, "learning_rate": 2.1116845877450805e-07, "loss": 0.16700024604797364, "step": 510, "token_acc": 0.9572996706915478 }, { "epoch": 0.7185837620999389, "grad_norm": 0.539864182472229, "learning_rate": 2.0183201194759825e-07, "loss": 0.2224641799926758, "step": 515, "token_acc": 0.9501959166838524 }, { "epoch": 0.72556030347955, "grad_norm": 0.781697690486908, "learning_rate": 1.9265419583861952e-07, "loss": 0.1476673364639282, "step": 520, "token_acc": 0.9633160506216201 }, { "epoch": 0.732536844859161, "grad_norm": 0.4989713728427887, "learning_rate": 1.8363989321036577e-07, "loss": 0.143803870677948, "step": 525, "token_acc": 0.9604544058949954 }, { "epoch": 0.7395133862387722, "grad_norm": 0.5253103971481323, "learning_rate": 1.7479389983352656e-07, "loss": 0.17980681657791137, "step": 530, "token_acc": 0.9542640495272832 }, { "epoch": 0.7464899276183832, "grad_norm": 0.5387361645698547, "learning_rate": 1.6612092193525017e-07, "loss": 0.217242431640625, "step": 535, "token_acc": 0.950739773716275 }, { "epoch": 0.7534664689979943, "grad_norm": 0.5405040979385376, "learning_rate": 1.5762557369534708e-07, "loss": 0.19491589069366455, "step": 540, "token_acc": 0.954151055018734 }, { "epoch": 0.7604430103776053, "grad_norm": 0.43293488025665283, "learning_rate": 1.4931237479146326e-07, "loss": 0.18127689361572266, "step": 545, "token_acc": 0.9546740778170794 }, { "epoch": 0.7674195517572163, "grad_norm": 0.5920426249504089, "learning_rate": 1.4118574799453115e-07, "loss": 0.17992936372756957, "step": 550, "token_acc": 0.9548200289551195 }, { "epoch": 0.7743960931368274, "grad_norm": 0.8887305855751038, "learning_rate": 1.332500168157748e-07, "loss": 0.1434216856956482, "step": 555, "token_acc": 0.9616718027734977 }, { "epoch": 0.7813726345164385, "grad_norm": 0.45279937982559204, "learning_rate": 1.2550940320652614e-07, "loss": 0.15285730361938477, "step": 560, "token_acc": 0.9589277780520314 }, { "epoch": 0.7883491758960496, "grad_norm": 0.5594329833984375, "learning_rate": 1.179680253120699e-07, "loss": 0.14827193021774293, "step": 565, "token_acc": 0.9611136415395126 }, { "epoch": 0.7953257172756606, "grad_norm": 0.5205839276313782, "learning_rate": 1.1062989528071681e-07, "loss": 0.14820796251296997, "step": 570, "token_acc": 0.9608738340697104 }, { "epoch": 0.8023022586552716, "grad_norm": 0.7842152118682861, "learning_rate": 1.0349891712926855e-07, "loss": 0.14528849124908447, "step": 575, "token_acc": 0.9591823819769649 }, { "epoch": 0.8092788000348827, "grad_norm": 0.5881332159042358, "learning_rate": 9.65788846660116e-08, "loss": 0.12228701114654542, "step": 580, "token_acc": 0.9648055356716774 }, { "epoch": 0.8162553414144937, "grad_norm": 0.5683630704879761, "learning_rate": 8.987347947234192e-08, "loss": 0.15679004192352294, "step": 585, "token_acc": 0.9599228461208744 }, { "epoch": 0.8232318827941049, "grad_norm": 0.6288495659828186, "learning_rate": 8.33862689440985e-08, "loss": 0.16296907663345336, "step": 590, "token_acc": 0.957492548981287 }, { "epoch": 0.8302084241737159, "grad_norm": 0.46802279353141785, "learning_rate": 7.712070439364438e-08, "loss": 0.13914816379547118, "step": 595, "token_acc": 0.9615843086259211 }, { "epoch": 0.8371849655533269, "grad_norm": 0.5954472422599792, "learning_rate": 7.108011921370727e-08, "loss": 0.15333893299102783, "step": 600, "token_acc": 0.9563102463405927 }, { "epoch": 0.844161506932938, "grad_norm": 0.611599862575531, "learning_rate": 6.526772710395323e-08, "loss": 0.11822519302368165, "step": 605, "token_acc": 0.967852975495916 }, { "epoch": 0.851138048312549, "grad_norm": 0.5725059509277344, "learning_rate": 5.968662036124295e-08, "loss": 0.15996166467666625, "step": 610, "token_acc": 0.959222581157655 }, { "epoch": 0.8581145896921601, "grad_norm": 0.4814670979976654, "learning_rate": 5.433976823447262e-08, "loss": 0.13899474143981932, "step": 615, "token_acc": 0.9624644833258561 }, { "epoch": 0.8650911310717712, "grad_norm": 0.525391697883606, "learning_rate": 4.923001534488097e-08, "loss": 0.1286926746368408, "step": 620, "token_acc": 0.9623963626638139 }, { "epoch": 0.8720676724513823, "grad_norm": 0.6519795656204224, "learning_rate": 4.43600801726598e-08, "loss": 0.17959569692611693, "step": 625, "token_acc": 0.9575730509123389 }, { "epoch": 0.8790442138309933, "grad_norm": 0.5954372882843018, "learning_rate": 3.973255361067346e-08, "loss": 0.14509177207946777, "step": 630, "token_acc": 0.9609306955331591 }, { "epoch": 0.8860207552106043, "grad_norm": 0.6551011800765991, "learning_rate": 3.534989758605772e-08, "loss": 0.13519610166549684, "step": 635, "token_acc": 0.961144806671721 }, { "epoch": 0.8929972965902154, "grad_norm": 0.6382178664207458, "learning_rate": 3.121444375042992e-08, "loss": 0.14140852689743041, "step": 640, "token_acc": 0.9617294770669004 }, { "epoch": 0.8999738379698264, "grad_norm": 0.5000672340393066, "learning_rate": 2.732839223940914e-08, "loss": 0.15130863189697266, "step": 645, "token_acc": 0.9578913532626165 }, { "epoch": 0.9069503793494376, "grad_norm": 0.4966048002243042, "learning_rate": 2.3693810502103783e-08, "loss": 0.16461522579193116, "step": 650, "token_acc": 0.956586014881979 }, { "epoch": 0.9139269207290486, "grad_norm": 0.7203890085220337, "learning_rate": 2.0312632201192338e-08, "loss": 0.15151506662368774, "step": 655, "token_acc": 0.9578531445505433 }, { "epoch": 0.9209034621086596, "grad_norm": 0.5585451722145081, "learning_rate": 1.7186656184179473e-08, "loss": 0.19614295959472655, "step": 660, "token_acc": 0.9517613299030279 }, { "epoch": 0.9278800034882707, "grad_norm": 0.5863579511642456, "learning_rate": 1.431754552637754e-08, "loss": 0.13972072601318358, "step": 665, "token_acc": 0.960591916834624 }, { "epoch": 0.9348565448678817, "grad_norm": 0.540397584438324, "learning_rate": 1.1706826646119994e-08, "loss": 0.20453217029571533, "step": 670, "token_acc": 0.9501171417415072 }, { "epoch": 0.9418330862474928, "grad_norm": 0.4735467731952667, "learning_rate": 9.355888492680153e-09, "loss": 0.2564453840255737, "step": 675, "token_acc": 0.9417061863910055 }, { "epoch": 0.9488096276271039, "grad_norm": 0.41385316848754883, "learning_rate": 7.265981807324795e-09, "loss": 0.1432310461997986, "step": 680, "token_acc": 0.9586380054620738 }, { "epoch": 0.9557861690067149, "grad_norm": 0.5068058967590332, "learning_rate": 5.438218457897492e-09, "loss": 0.12814297676086425, "step": 685, "token_acc": 0.9636561355311355 }, { "epoch": 0.962762710386326, "grad_norm": 0.6264599561691284, "learning_rate": 3.873570847285012e-09, "loss": 0.2733027935028076, "step": 690, "token_acc": 0.9410119633331607 }, { "epoch": 0.969739251765937, "grad_norm": 0.5079652667045593, "learning_rate": 2.5728713960815884e-09, "loss": 0.1333064079284668, "step": 695, "token_acc": 0.963391442155309 }, { "epoch": 0.9767157931455481, "grad_norm": 0.5711411237716675, "learning_rate": 1.5368120997261147e-09, "loss": 0.21541709899902345, "step": 700, "token_acc": 0.9518470869325492 }, { "epoch": 0.9836923345251591, "grad_norm": 0.5428957939147949, "learning_rate": 7.65944160348142e-10, "loss": 0.14647810459136962, "step": 705, "token_acc": 0.9616447996782788 }, { "epoch": 0.9906688759047703, "grad_norm": 0.5820784568786621, "learning_rate": 2.6067769351867384e-10, "loss": 0.144012713432312, "step": 710, "token_acc": 0.9590297709494062 }, { "epoch": 0.9976454172843813, "grad_norm": 0.6706213355064392, "learning_rate": 2.128151006108858e-11, "loss": 0.16256020069122315, "step": 715, "token_acc": 0.9583173343572678 } ], "logging_steps": 5, "max_steps": 717, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.451809127798866e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }