{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 200.0, "global_step": 1641, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0006093845216331506, "grad_norm": 7.29836574807662, "learning_rate": 2.0000000000000002e-07, "loss": 0.6879574656486511, "step": 1, "token_acc": 0.8069400259219983 }, { "epoch": 0.006093845216331505, "grad_norm": 3.9069477397555814, "learning_rate": 2.0000000000000003e-06, "loss": 0.6863329675462511, "step": 10, "token_acc": 0.8062499341798103 }, { "epoch": 0.01218769043266301, "grad_norm": 1.6499482285217686, "learning_rate": 4.000000000000001e-06, "loss": 0.5632639408111573, "step": 20, "token_acc": 0.832315593221387 }, { "epoch": 0.018281535648994516, "grad_norm": 1.3391083762662725, "learning_rate": 6e-06, "loss": 0.4509421348571777, "step": 30, "token_acc": 0.859350495238436 }, { "epoch": 0.02437538086532602, "grad_norm": 1.0720631754188148, "learning_rate": 8.000000000000001e-06, "loss": 0.4215505599975586, "step": 40, "token_acc": 0.8660933700604836 }, { "epoch": 0.030469226081657527, "grad_norm": 1.300502159057601, "learning_rate": 1e-05, "loss": 0.3916645526885986, "step": 50, "token_acc": 0.8749446762027628 }, { "epoch": 0.03656307129798903, "grad_norm": 1.2222066565570464, "learning_rate": 9.999025267866269e-06, "loss": 0.37738680839538574, "step": 60, "token_acc": 0.8773714810281518 }, { "epoch": 0.042656916514320534, "grad_norm": 1.1003373829333203, "learning_rate": 9.996101451506166e-06, "loss": 0.36339468955993653, "step": 70, "token_acc": 0.8811438359423324 }, { "epoch": 0.04875076173065204, "grad_norm": 0.9139572833064542, "learning_rate": 9.991229690894796e-06, "loss": 0.35523133277893065, "step": 80, "token_acc": 0.8833139693331612 }, { "epoch": 0.054844606946983544, "grad_norm": 1.0649357265795398, "learning_rate": 9.984411885496807e-06, "loss": 0.36147160530090333, "step": 90, "token_acc": 0.8827829089555647 }, { "epoch": 0.06093845216331505, "grad_norm": 1.0474850371515747, "learning_rate": 9.975650693525798e-06, "loss": 0.35729637145996096, "step": 100, "token_acc": 0.8842431348706591 }, { "epoch": 0.06703229737964655, "grad_norm": 1.0275221386997506, "learning_rate": 9.964949530907907e-06, "loss": 0.3474123477935791, "step": 110, "token_acc": 0.8856792866706549 }, { "epoch": 0.07312614259597806, "grad_norm": 1.2902357608957626, "learning_rate": 9.952312569949963e-06, "loss": 0.3479644775390625, "step": 120, "token_acc": 0.8849104859335039 }, { "epoch": 0.07921998781230957, "grad_norm": 1.3451160019315398, "learning_rate": 9.937744737712734e-06, "loss": 0.3466474533081055, "step": 130, "token_acc": 0.8861058585962355 }, { "epoch": 0.08531383302864107, "grad_norm": 1.0790851469433436, "learning_rate": 9.921251714089898e-06, "loss": 0.34614810943603513, "step": 140, "token_acc": 0.8854811515034624 }, { "epoch": 0.09140767824497258, "grad_norm": 1.0038008030386316, "learning_rate": 9.9028399295935e-06, "loss": 0.3384540557861328, "step": 150, "token_acc": 0.8879619162858826 }, { "epoch": 0.09750152346130408, "grad_norm": 1.027349218243697, "learning_rate": 9.882516562846735e-06, "loss": 0.33826944828033445, "step": 160, "token_acc": 0.8878969612617404 }, { "epoch": 0.1035953686776356, "grad_norm": 1.0944757240532788, "learning_rate": 9.860289537785058e-06, "loss": 0.3368839740753174, "step": 170, "token_acc": 0.8883107398785887 }, { "epoch": 0.10968921389396709, "grad_norm": 0.9672890666466603, "learning_rate": 9.83616752056669e-06, "loss": 0.3455761194229126, "step": 180, "token_acc": 0.8851188684923262 }, { "epoch": 0.1157830591102986, "grad_norm": 0.8886431201198384, "learning_rate": 9.810159916193763e-06, "loss": 0.32952630519866943, "step": 190, "token_acc": 0.8905697047489018 }, { "epoch": 0.1218769043266301, "grad_norm": 0.9444272816074001, "learning_rate": 9.782276864845351e-06, "loss": 0.33125505447387693, "step": 200, "token_acc": 0.8897695109589824 }, { "epoch": 0.12797074954296161, "grad_norm": 1.0093535144294836, "learning_rate": 9.752529237923914e-06, "loss": 0.3311288833618164, "step": 210, "token_acc": 0.8905364268561583 }, { "epoch": 0.1340645947592931, "grad_norm": 1.1532189931201864, "learning_rate": 9.720928633816596e-06, "loss": 0.3244771003723145, "step": 220, "token_acc": 0.8915392526998382 }, { "epoch": 0.14015843997562463, "grad_norm": 0.9598378464215558, "learning_rate": 9.687487373373103e-06, "loss": 0.3279410362243652, "step": 230, "token_acc": 0.8906910502215741 }, { "epoch": 0.14625228519195613, "grad_norm": 0.9540187948014937, "learning_rate": 9.652218495101894e-06, "loss": 0.3265339136123657, "step": 240, "token_acc": 0.8910028614336833 }, { "epoch": 0.15234613040828762, "grad_norm": 1.0470189428654273, "learning_rate": 9.61513575008656e-06, "loss": 0.33319640159606934, "step": 250, "token_acc": 0.8888186484938951 }, { "epoch": 0.15843997562461914, "grad_norm": 0.9615038649371879, "learning_rate": 9.576253596624367e-06, "loss": 0.32928056716918946, "step": 260, "token_acc": 0.8897860391237342 }, { "epoch": 0.16453382084095064, "grad_norm": 1.09800599181465, "learning_rate": 9.53558719458908e-06, "loss": 0.32557024955749514, "step": 270, "token_acc": 0.8914715787293208 }, { "epoch": 0.17062766605728213, "grad_norm": 1.0743262974854428, "learning_rate": 9.49315239952023e-06, "loss": 0.32053494453430176, "step": 280, "token_acc": 0.8929576222604401 }, { "epoch": 0.17672151127361366, "grad_norm": 1.060412117175443, "learning_rate": 9.448965756441154e-06, "loss": 0.3243874073028564, "step": 290, "token_acc": 0.8921667614562232 }, { "epoch": 0.18281535648994515, "grad_norm": 0.9594753815838422, "learning_rate": 9.403044493408205e-06, "loss": 0.3233642578125, "step": 300, "token_acc": 0.891047436596846 }, { "epoch": 0.18890920170627665, "grad_norm": 0.9403281436285019, "learning_rate": 9.355406514793667e-06, "loss": 0.31829214096069336, "step": 310, "token_acc": 0.8938932609968795 }, { "epoch": 0.19500304692260817, "grad_norm": 0.9536634938537397, "learning_rate": 9.306070394304955e-06, "loss": 0.3202193260192871, "step": 320, "token_acc": 0.8931520198180799 }, { "epoch": 0.20109689213893966, "grad_norm": 1.119379822493263, "learning_rate": 9.255055367742868e-06, "loss": 0.3239091396331787, "step": 330, "token_acc": 0.8923521607278241 }, { "epoch": 0.2071907373552712, "grad_norm": 1.0373576096304553, "learning_rate": 9.202381325501683e-06, "loss": 0.31700589656829836, "step": 340, "token_acc": 0.8944783352337514 }, { "epoch": 0.21328458257160268, "grad_norm": 1.0632563437214946, "learning_rate": 9.148068804814032e-06, "loss": 0.31794281005859376, "step": 350, "token_acc": 0.8930956012903548 }, { "epoch": 0.21937842778793418, "grad_norm": 1.0242050960110551, "learning_rate": 9.092138981743588e-06, "loss": 0.3202871799468994, "step": 360, "token_acc": 0.8935469022061816 }, { "epoch": 0.2254722730042657, "grad_norm": 0.8239921572139911, "learning_rate": 9.034613662928665e-06, "loss": 0.3142183542251587, "step": 370, "token_acc": 0.8951745718050066 }, { "epoch": 0.2315661182205972, "grad_norm": 0.9147511550012487, "learning_rate": 8.975515277079961e-06, "loss": 0.3087962865829468, "step": 380, "token_acc": 0.8958298740422705 }, { "epoch": 0.2376599634369287, "grad_norm": 0.8794833827260621, "learning_rate": 8.91486686623577e-06, "loss": 0.3132402658462524, "step": 390, "token_acc": 0.8948639533970186 }, { "epoch": 0.2437538086532602, "grad_norm": 1.0069623307664877, "learning_rate": 8.85269207677806e-06, "loss": 0.31006736755371095, "step": 400, "token_acc": 0.8951928192311975 }, { "epoch": 0.2498476538695917, "grad_norm": 0.9808015041824597, "learning_rate": 8.789015150212907e-06, "loss": 0.30683579444885256, "step": 410, "token_acc": 0.8967586393232839 }, { "epoch": 0.25594149908592323, "grad_norm": 0.9081237770188716, "learning_rate": 8.72386091371891e-06, "loss": 0.3061988830566406, "step": 420, "token_acc": 0.8959391589507399 }, { "epoch": 0.2620353443022547, "grad_norm": 1.04219527083527, "learning_rate": 8.657254770467252e-06, "loss": 0.3091754674911499, "step": 430, "token_acc": 0.8954508616603208 }, { "epoch": 0.2681291895185862, "grad_norm": 1.0065133793639498, "learning_rate": 8.58922268971719e-06, "loss": 0.30993127822875977, "step": 440, "token_acc": 0.895664191270881 }, { "epoch": 0.2742230347349177, "grad_norm": 0.9080797671925362, "learning_rate": 8.51979119669081e-06, "loss": 0.31555490493774413, "step": 450, "token_acc": 0.8941405988077487 }, { "epoch": 0.28031687995124926, "grad_norm": 0.9841139463866474, "learning_rate": 8.448987362231054e-06, "loss": 0.30534186363220217, "step": 460, "token_acc": 0.8968707588256722 }, { "epoch": 0.28641072516758076, "grad_norm": 0.9677823622528902, "learning_rate": 8.376838792246978e-06, "loss": 0.3050978422164917, "step": 470, "token_acc": 0.8967596979985816 }, { "epoch": 0.29250457038391225, "grad_norm": 0.8117589456035273, "learning_rate": 8.303373616950408e-06, "loss": 0.3012993335723877, "step": 480, "token_acc": 0.898916481794861 }, { "epoch": 0.29859841560024375, "grad_norm": 0.8967761049487325, "learning_rate": 8.228620479888172e-06, "loss": 0.2984607219696045, "step": 490, "token_acc": 0.8986162002706045 }, { "epoch": 0.30469226081657524, "grad_norm": 0.7934114582439064, "learning_rate": 8.152608526774188e-06, "loss": 0.3049586057662964, "step": 500, "token_acc": 0.8968112886022876 }, { "epoch": 0.31078610603290674, "grad_norm": 0.825580955342704, "learning_rate": 8.075367394125755e-06, "loss": 0.30215206146240237, "step": 510, "token_acc": 0.8978885397098497 }, { "epoch": 0.3168799512492383, "grad_norm": 0.8296290441677941, "learning_rate": 7.996927197708486e-06, "loss": 0.3088541507720947, "step": 520, "token_acc": 0.8963321107035679 }, { "epoch": 0.3229737964655698, "grad_norm": 0.8755135202445912, "learning_rate": 7.917318520794395e-06, "loss": 0.30083427429199217, "step": 530, "token_acc": 0.899119480167394 }, { "epoch": 0.3290676416819013, "grad_norm": 0.9101072984644949, "learning_rate": 7.836572402237683e-06, "loss": 0.3058091878890991, "step": 540, "token_acc": 0.896643718272106 }, { "epoch": 0.3351614868982328, "grad_norm": 0.9771967807763615, "learning_rate": 7.754720324372924e-06, "loss": 0.30214991569519045, "step": 550, "token_acc": 0.8980588639486945 }, { "epoch": 0.34125533211456427, "grad_norm": 1.0026225580388461, "learning_rate": 7.67179420074032e-06, "loss": 0.3041478395462036, "step": 560, "token_acc": 0.8965942594865093 }, { "epoch": 0.3473491773308958, "grad_norm": 0.9388665918318329, "learning_rate": 7.587826363642845e-06, "loss": 0.30187268257141114, "step": 570, "token_acc": 0.8980740928392202 }, { "epoch": 0.3534430225472273, "grad_norm": 0.9610197211126468, "learning_rate": 7.502849551540106e-06, "loss": 0.2962314605712891, "step": 580, "token_acc": 0.8994921135841125 }, { "epoch": 0.3595368677635588, "grad_norm": 0.832216076371822, "learning_rate": 7.4168968962838524e-06, "loss": 0.2948365926742554, "step": 590, "token_acc": 0.8995369426034115 }, { "epoch": 0.3656307129798903, "grad_norm": 0.9377431212404606, "learning_rate": 7.330001910200111e-06, "loss": 0.29007649421691895, "step": 600, "token_acc": 0.9010131261293394 }, { "epoch": 0.3717245581962218, "grad_norm": 0.8726611852126548, "learning_rate": 7.242198473022958e-06, "loss": 0.2962885856628418, "step": 610, "token_acc": 0.9000062303355035 }, { "epoch": 0.3778184034125533, "grad_norm": 0.9153282793617801, "learning_rate": 7.15352081868506e-06, "loss": 0.30144367218017576, "step": 620, "token_acc": 0.8989331770222744 }, { "epoch": 0.38391224862888484, "grad_norm": 0.993391313101372, "learning_rate": 7.0640035219701085e-06, "loss": 0.301465106010437, "step": 630, "token_acc": 0.8974685325619576 }, { "epoch": 0.39000609384521634, "grad_norm": 1.0046408788594328, "learning_rate": 6.973681485032359e-06, "loss": 0.2955395460128784, "step": 640, "token_acc": 0.8996091046695718 }, { "epoch": 0.39609993906154783, "grad_norm": 0.822820271911727, "learning_rate": 6.8825899237885215e-06, "loss": 0.2931050300598145, "step": 650, "token_acc": 0.901203589259751 }, { "epoch": 0.40219378427787933, "grad_norm": 0.8482496681393756, "learning_rate": 6.7907643541873446e-06, "loss": 0.29596996307373047, "step": 660, "token_acc": 0.8996866207121305 }, { "epoch": 0.4082876294942108, "grad_norm": 0.8775663994372018, "learning_rate": 6.698240578362179e-06, "loss": 0.29141840934753416, "step": 670, "token_acc": 0.9003262426482238 }, { "epoch": 0.4143814747105424, "grad_norm": 0.984669646190565, "learning_rate": 6.6050546706719984e-06, "loss": 0.29290521144866943, "step": 680, "token_acc": 0.9014104043327218 }, { "epoch": 0.42047531992687387, "grad_norm": 0.8784418931211103, "learning_rate": 6.511242963636257e-06, "loss": 0.29056534767150877, "step": 690, "token_acc": 0.9016642094853267 }, { "epoch": 0.42656916514320536, "grad_norm": 1.0470361792821843, "learning_rate": 6.416842033769106e-06, "loss": 0.2978256940841675, "step": 700, "token_acc": 0.8997917186822428 }, { "epoch": 0.43266301035953686, "grad_norm": 0.9613791001197699, "learning_rate": 6.321888687318457e-06, "loss": 0.2870903253555298, "step": 710, "token_acc": 0.903113691147251 }, { "epoch": 0.43875685557586835, "grad_norm": 0.8405716630112535, "learning_rate": 6.2264199459155105e-06, "loss": 0.29581589698791505, "step": 720, "token_acc": 0.9003898532372131 }, { "epoch": 0.4448507007921999, "grad_norm": 0.9817927857442479, "learning_rate": 6.130473032140272e-06, "loss": 0.29129691123962403, "step": 730, "token_acc": 0.9009383225625913 }, { "epoch": 0.4509445460085314, "grad_norm": 0.9100915684781385, "learning_rate": 6.0340853550087345e-06, "loss": 0.29650187492370605, "step": 740, "token_acc": 0.9002656385758284 }, { "epoch": 0.4570383912248629, "grad_norm": 0.9238619342391209, "learning_rate": 5.937294495387377e-06, "loss": 0.2921621561050415, "step": 750, "token_acc": 0.9008455874319925 }, { "epoch": 0.4631322364411944, "grad_norm": 0.8289061064281614, "learning_rate": 5.840138191340651e-06, "loss": 0.28725643157958985, "step": 760, "token_acc": 0.9028466795835374 }, { "epoch": 0.4692260816575259, "grad_norm": 0.8901360785145829, "learning_rate": 5.7426543234171736e-06, "loss": 0.2865636348724365, "step": 770, "token_acc": 0.90197109501604 }, { "epoch": 0.4753199268738574, "grad_norm": 0.8709058451908881, "learning_rate": 5.644880899880382e-06, "loss": 0.2886040687561035, "step": 780, "token_acc": 0.9023270689287564 }, { "epoch": 0.48141377209018893, "grad_norm": 0.9306196525173549, "learning_rate": 5.546856041889374e-06, "loss": 0.28833470344543455, "step": 790, "token_acc": 0.9016039529639475 }, { "epoch": 0.4875076173065204, "grad_norm": 0.9401250944884257, "learning_rate": 5.448617968635741e-06, "loss": 0.28241567611694335, "step": 800, "token_acc": 0.9046351860634857 }, { "epoch": 0.4936014625228519, "grad_norm": 0.849983180158667, "learning_rate": 5.35020498244219e-06, "loss": 0.2863471508026123, "step": 810, "token_acc": 0.9020820443108771 }, { "epoch": 0.4996953077391834, "grad_norm": 0.7275676892245573, "learning_rate": 5.251655453828728e-06, "loss": 0.28403263092041015, "step": 820, "token_acc": 0.9032200331101135 }, { "epoch": 0.505789152955515, "grad_norm": 0.8630110541652776, "learning_rate": 5.153007806552275e-06, "loss": 0.28420357704162597, "step": 830, "token_acc": 0.9033704118180856 }, { "epoch": 0.5118829981718465, "grad_norm": 0.8835421688612489, "learning_rate": 5.054300502625517e-06, "loss": 0.2866727352142334, "step": 840, "token_acc": 0.9032091030720939 }, { "epoch": 0.517976843388178, "grad_norm": 0.8544875287993453, "learning_rate": 4.9555720273208475e-06, "loss": 0.289061975479126, "step": 850, "token_acc": 0.9017317721145331 }, { "epoch": 0.5240706886045094, "grad_norm": 0.8549205024097043, "learning_rate": 4.856860874165218e-06, "loss": 0.2889714241027832, "step": 860, "token_acc": 0.9025821278082484 }, { "epoch": 0.5301645338208409, "grad_norm": 0.9236105201664164, "learning_rate": 4.758205529931808e-06, "loss": 0.2887147903442383, "step": 870, "token_acc": 0.9019780647042623 }, { "epoch": 0.5362583790371724, "grad_norm": 0.8682794949168545, "learning_rate": 4.659644459634293e-06, "loss": 0.27901973724365237, "step": 880, "token_acc": 0.9043348147353298 }, { "epoch": 0.5423522242535039, "grad_norm": 0.8729641279912889, "learning_rate": 4.56121609152961e-06, "loss": 0.2851783275604248, "step": 890, "token_acc": 0.9031912203833561 }, { "epoch": 0.5484460694698354, "grad_norm": 0.8418875200344721, "learning_rate": 4.462958802135069e-06, "loss": 0.27748913764953614, "step": 900, "token_acc": 0.9059390881360567 }, { "epoch": 0.5545399146861669, "grad_norm": 0.8894129853584928, "learning_rate": 4.364910901265607e-06, "loss": 0.28034243583679197, "step": 910, "token_acc": 0.9040050510001095 }, { "epoch": 0.5606337599024985, "grad_norm": 0.8334588350840866, "learning_rate": 4.2671106170970734e-06, "loss": 0.2801810264587402, "step": 920, "token_acc": 0.9042555097117814 }, { "epoch": 0.56672760511883, "grad_norm": 0.8763484647820953, "learning_rate": 4.169596081261332e-06, "loss": 0.2837662696838379, "step": 930, "token_acc": 0.9037383810780553 }, { "epoch": 0.5728214503351615, "grad_norm": 0.8713237221620964, "learning_rate": 4.072405313979021e-06, "loss": 0.27712116241455076, "step": 940, "token_acc": 0.9053036654966837 }, { "epoch": 0.578915295551493, "grad_norm": 0.8844118885887313, "learning_rate": 3.975576209235726e-06, "loss": 0.2806640625, "step": 950, "token_acc": 0.9047340125759082 }, { "epoch": 0.5850091407678245, "grad_norm": 0.8719900072150049, "learning_rate": 3.879146520007399e-06, "loss": 0.27962145805358884, "step": 960, "token_acc": 0.9052189543003484 }, { "epoch": 0.591102985984156, "grad_norm": 0.8621214557871747, "learning_rate": 3.7831538435407344e-06, "loss": 0.281157398223877, "step": 970, "token_acc": 0.9040866660422715 }, { "epoch": 0.5971968312004875, "grad_norm": 0.85966956497571, "learning_rate": 3.687635606694271e-06, "loss": 0.2849492073059082, "step": 980, "token_acc": 0.9041384613065175 }, { "epoch": 0.603290676416819, "grad_norm": 0.8505152160082087, "learning_rate": 3.592629051345936e-06, "loss": 0.2792569637298584, "step": 990, "token_acc": 0.9054755884673447 }, { "epoch": 0.6093845216331505, "grad_norm": 0.9214402604733031, "learning_rate": 3.4981712198726956e-06, "loss": 0.2757925033569336, "step": 1000, "token_acc": 0.9061934946027913 }, { "epoch": 0.615478366849482, "grad_norm": 0.8580050185956459, "learning_rate": 3.4042989407079986e-06, "loss": 0.2790709972381592, "step": 1010, "token_acc": 0.9051715866568587 }, { "epoch": 0.6215722120658135, "grad_norm": 0.7762593811197912, "learning_rate": 3.311048813982627e-06, "loss": 0.2719182014465332, "step": 1020, "token_acc": 0.9072872717021148 }, { "epoch": 0.6276660572821451, "grad_norm": 0.8305900083620258, "learning_rate": 3.218457197254583e-06, "loss": 0.27586350440979, "step": 1030, "token_acc": 0.9060086339753238 }, { "epoch": 0.6337599024984766, "grad_norm": 0.8955059982745348, "learning_rate": 3.1265601913335196e-06, "loss": 0.2731196403503418, "step": 1040, "token_acc": 0.9076037121001682 }, { "epoch": 0.6398537477148081, "grad_norm": 0.8712242634564721, "learning_rate": 3.035393626205306e-06, "loss": 0.2795309066772461, "step": 1050, "token_acc": 0.9047484454494065 }, { "epoch": 0.6459475929311396, "grad_norm": 0.8162886626845998, "learning_rate": 2.944993047062161e-06, "loss": 0.26994550228118896, "step": 1060, "token_acc": 0.9082915598041501 }, { "epoch": 0.6520414381474711, "grad_norm": 0.8874044395879559, "learning_rate": 2.8553937004438425e-06, "loss": 0.2744093418121338, "step": 1070, "token_acc": 0.9072907727436752 }, { "epoch": 0.6581352833638026, "grad_norm": 0.8288310546310844, "learning_rate": 2.766630520495277e-06, "loss": 0.2674886226654053, "step": 1080, "token_acc": 0.9087633615660454 }, { "epoch": 0.664229128580134, "grad_norm": 0.8828846811452266, "learning_rate": 2.67873811534598e-06, "loss": 0.2735260486602783, "step": 1090, "token_acc": 0.9060899523658108 }, { "epoch": 0.6703229737964655, "grad_norm": 0.8055682508984224, "learning_rate": 2.591750753616596e-06, "loss": 0.2687216758728027, "step": 1100, "token_acc": 0.9077474362897096 }, { "epoch": 0.676416819012797, "grad_norm": 0.8527567804445506, "learning_rate": 2.505702351057804e-06, "loss": 0.27487955093383787, "step": 1110, "token_acc": 0.9064443638076686 }, { "epoch": 0.6825106642291285, "grad_norm": 0.8043496565707575, "learning_rate": 2.4206264573268174e-06, "loss": 0.2709942102432251, "step": 1120, "token_acc": 0.9082038753361505 }, { "epoch": 0.68860450944546, "grad_norm": 0.8177848047582682, "learning_rate": 2.336556242906608e-06, "loss": 0.26909465789794923, "step": 1130, "token_acc": 0.907756650686803 }, { "epoch": 0.6946983546617916, "grad_norm": 0.8281752422683824, "learning_rate": 2.2535244861729707e-06, "loss": 0.27281508445739744, "step": 1140, "token_acc": 0.9068872307019957 }, { "epoch": 0.7007921998781231, "grad_norm": 0.7368812719716331, "learning_rate": 2.1715635606144653e-06, "loss": 0.2704050064086914, "step": 1150, "token_acc": 0.9086829548350435 }, { "epoch": 0.7068860450944546, "grad_norm": 0.8983810091681733, "learning_rate": 2.0907054222102367e-06, "loss": 0.2690997362136841, "step": 1160, "token_acc": 0.9079458353782861 }, { "epoch": 0.7129798903107861, "grad_norm": 0.976946993038541, "learning_rate": 2.0109815969705922e-06, "loss": 0.2747433423995972, "step": 1170, "token_acc": 0.9060301301519122 }, { "epoch": 0.7190737355271176, "grad_norm": 0.8007237087596002, "learning_rate": 1.9324231686452478e-06, "loss": 0.2671233654022217, "step": 1180, "token_acc": 0.9086050565301521 }, { "epoch": 0.7251675807434491, "grad_norm": 0.8064570085543009, "learning_rate": 1.8550607666039877e-06, "loss": 0.27011594772338865, "step": 1190, "token_acc": 0.9079702457204528 }, { "epoch": 0.7312614259597806, "grad_norm": 0.8831329237202693, "learning_rate": 1.7789245538944971e-06, "loss": 0.2661958456039429, "step": 1200, "token_acc": 0.909048799129166 }, { "epoch": 0.7373552711761121, "grad_norm": 0.8430483750865159, "learning_rate": 1.7040442154820036e-06, "loss": 0.2669236183166504, "step": 1210, "token_acc": 0.9086229167124993 }, { "epoch": 0.7434491163924436, "grad_norm": 0.8347549917161227, "learning_rate": 1.6304489466753237e-06, "loss": 0.26542019844055176, "step": 1220, "token_acc": 0.9091426534148126 }, { "epoch": 0.7495429616087751, "grad_norm": 0.830454588444548, "learning_rate": 1.5581674417438143e-06, "loss": 0.2647353410720825, "step": 1230, "token_acc": 0.909506020348688 }, { "epoch": 0.7556368068251066, "grad_norm": 0.8676010280531331, "learning_rate": 1.4872278827296855e-06, "loss": 0.2685891628265381, "step": 1240, "token_acc": 0.9081622979570555 }, { "epoch": 0.7617306520414382, "grad_norm": 0.707455832514829, "learning_rate": 1.417657928460029e-06, "loss": 0.2678367614746094, "step": 1250, "token_acc": 0.9088005125349524 }, { "epoch": 0.7678244972577697, "grad_norm": 0.9332592296684585, "learning_rate": 1.349484703762834e-06, "loss": 0.2678724765777588, "step": 1260, "token_acc": 0.9090774872882107 }, { "epoch": 0.7739183424741012, "grad_norm": 0.9124536066814944, "learning_rate": 1.2827347888912057e-06, "loss": 0.2636892795562744, "step": 1270, "token_acc": 0.9094603622970171 }, { "epoch": 0.7800121876904327, "grad_norm": 0.8868523419233089, "learning_rate": 1.2174342091599277e-06, "loss": 0.2640355587005615, "step": 1280, "token_acc": 0.9101203136208611 }, { "epoch": 0.7861060329067642, "grad_norm": 0.8162281839833351, "learning_rate": 1.1536084247983626e-06, "loss": 0.2618927717208862, "step": 1290, "token_acc": 0.9093984578881031 }, { "epoch": 0.7921998781230957, "grad_norm": 0.8334510756887459, "learning_rate": 1.0912823210237033e-06, "loss": 0.2639930725097656, "step": 1300, "token_acc": 0.9095154304277207 }, { "epoch": 0.7982937233394272, "grad_norm": 0.9484830756554262, "learning_rate": 1.0304801983383989e-06, "loss": 0.2679661750793457, "step": 1310, "token_acc": 0.9085439305540266 }, { "epoch": 0.8043875685557587, "grad_norm": 0.7917038864004372, "learning_rate": 9.712257630555589e-07, "loss": 0.263914155960083, "step": 1320, "token_acc": 0.9098282765579997 }, { "epoch": 0.8104814137720902, "grad_norm": 0.8164310323072432, "learning_rate": 9.135421180560394e-07, "loss": 0.27391440868377687, "step": 1330, "token_acc": 0.9072812991094814 }, { "epoch": 0.8165752589884216, "grad_norm": 0.7878349824156636, "learning_rate": 8.574517537807897e-07, "loss": 0.2658750057220459, "step": 1340, "token_acc": 0.9089495350890863 }, { "epoch": 0.8226691042047533, "grad_norm": 0.7620095983862565, "learning_rate": 8.029765394619899e-07, "loss": 0.25719194412231444, "step": 1350, "token_acc": 0.911888654763225 }, { "epoch": 0.8287629494210847, "grad_norm": 0.8206579913283775, "learning_rate": 7.501377145963939e-07, "loss": 0.2592960834503174, "step": 1360, "token_acc": 0.9114338606023208 }, { "epoch": 0.8348567946374162, "grad_norm": 0.8789992765077687, "learning_rate": 6.98955880664205e-07, "loss": 0.26435413360595705, "step": 1370, "token_acc": 0.9108234231521902 }, { "epoch": 0.8409506398537477, "grad_norm": 0.9837537034286392, "learning_rate": 6.494509930967019e-07, "loss": 0.2641714572906494, "step": 1380, "token_acc": 0.9101989856105199 }, { "epoch": 0.8470444850700792, "grad_norm": 0.8346126227296959, "learning_rate": 6.016423534957616e-07, "loss": 0.26149678230285645, "step": 1390, "token_acc": 0.9105589320112891 }, { "epoch": 0.8531383302864107, "grad_norm": 0.789773058927434, "learning_rate": 5.555486021082979e-07, "loss": 0.25979223251342776, "step": 1400, "token_acc": 0.9105615762961907 }, { "epoch": 0.8592321755027422, "grad_norm": 0.7391262213112039, "learning_rate": 5.111877105585672e-07, "loss": 0.2619319915771484, "step": 1410, "token_acc": 0.9112515917773331 }, { "epoch": 0.8653260207190737, "grad_norm": 0.732756554862386, "learning_rate": 4.6857697484116006e-07, "loss": 0.26052017211914064, "step": 1420, "token_acc": 0.9111355670436785 }, { "epoch": 0.8714198659354052, "grad_norm": 0.9052605008388693, "learning_rate": 4.277330085774156e-07, "loss": 0.26050865650177, "step": 1430, "token_acc": 0.9113159185335296 }, { "epoch": 0.8775137111517367, "grad_norm": 0.8239425361941399, "learning_rate": 3.886717365378867e-07, "loss": 0.2652243137359619, "step": 1440, "token_acc": 0.9098248347337728 }, { "epoch": 0.8836075563680682, "grad_norm": 0.8321718064306127, "learning_rate": 3.5140838843339073e-07, "loss": 0.2614146709442139, "step": 1450, "token_acc": 0.9103242825028786 }, { "epoch": 0.8897014015843998, "grad_norm": 0.9427110487674982, "learning_rate": 3.159574929770515e-07, "loss": 0.26317219734191893, "step": 1460, "token_acc": 0.9102542106779491 }, { "epoch": 0.8957952468007313, "grad_norm": 0.8005907233947733, "learning_rate": 2.8233287221965555e-07, "loss": 0.2689415216445923, "step": 1470, "token_acc": 0.9084669140620019 }, { "epoch": 0.9018890920170628, "grad_norm": 0.8834142513691242, "learning_rate": 2.5054763616053967e-07, "loss": 0.26386346817016604, "step": 1480, "token_acc": 0.9098926633899981 }, { "epoch": 0.9079829372333943, "grad_norm": 0.8652226986660423, "learning_rate": 2.2061417763608818e-07, "loss": 0.2603492259979248, "step": 1490, "token_acc": 0.9111148919621807 }, { "epoch": 0.9140767824497258, "grad_norm": 0.7761477175475302, "learning_rate": 1.9254416748786086e-07, "loss": 0.2592171669006348, "step": 1500, "token_acc": 0.9112373322356396 }, { "epoch": 0.9201706276660573, "grad_norm": 0.7766751712855907, "learning_rate": 1.6634855001221195e-07, "loss": 0.258951997756958, "step": 1510, "token_acc": 0.9106356546794409 }, { "epoch": 0.9262644728823888, "grad_norm": 0.856909898768609, "learning_rate": 1.4203753869318882e-07, "loss": 0.2605564117431641, "step": 1520, "token_acc": 0.9109015609309732 }, { "epoch": 0.9323583180987203, "grad_norm": 0.8678261922910359, "learning_rate": 1.196206122203647e-07, "loss": 0.267201566696167, "step": 1530, "token_acc": 0.9091924387660025 }, { "epoch": 0.9384521633150518, "grad_norm": 0.8245437796092319, "learning_rate": 9.910651079316824e-08, "loss": 0.25865275859832765, "step": 1540, "token_acc": 0.9117370919567883 }, { "epoch": 0.9445460085313833, "grad_norm": 0.7648349491441419, "learning_rate": 8.050323271314331e-08, "loss": 0.2569366216659546, "step": 1550, "token_acc": 0.9122892575583048 }, { "epoch": 0.9506398537477148, "grad_norm": 0.844132664732268, "learning_rate": 6.381803126546405e-08, "loss": 0.26746933460235595, "step": 1560, "token_acc": 0.9087516916083089 }, { "epoch": 0.9567336989640464, "grad_norm": 0.8550282187735159, "learning_rate": 4.9057411890933714e-08, "loss": 0.2634291172027588, "step": 1570, "token_acc": 0.9101502847948816 }, { "epoch": 0.9628275441803779, "grad_norm": 0.8962920945122091, "learning_rate": 3.622712964956032e-08, "loss": 0.26028733253479003, "step": 1580, "token_acc": 0.9110691577022408 }, { "epoch": 0.9689213893967094, "grad_norm": 0.8191620838439264, "learning_rate": 2.5332186976697037e-08, "loss": 0.26295406818389894, "step": 1590, "token_acc": 0.9106372558253433 }, { "epoch": 0.9750152346130408, "grad_norm": 0.803005796954641, "learning_rate": 1.637683173263238e-08, "loss": 0.2601941585540771, "step": 1600, "token_acc": 0.9106438532047947 }, { "epoch": 0.9811090798293723, "grad_norm": 1.0200184560604955, "learning_rate": 9.364555546375054e-09, "loss": 0.265762186050415, "step": 1610, "token_acc": 0.9099375217270665 }, { "epoch": 0.9872029250457038, "grad_norm": 0.8217240197064228, "learning_rate": 4.2980924542984634e-09, "loss": 0.261862587928772, "step": 1620, "token_acc": 0.9104295425993519 }, { "epoch": 0.9932967702620353, "grad_norm": 0.8981159929317022, "learning_rate": 1.179417834153429e-09, "loss": 0.2626341342926025, "step": 1630, "token_acc": 0.9100063135380294 }, { "epoch": 0.9993906154783668, "grad_norm": 0.8766885423326849, "learning_rate": 9.74763488759134e-12, "loss": 0.2605599880218506, "step": 1640, "token_acc": 0.9109949846594887 } ], "logging_steps": 10, "max_steps": 1641, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1566399809454080.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }