{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 200.0, "global_step": 1641, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0006093845216331506, "grad_norm": 5.139511599456721, "learning_rate": 2.0000000000000002e-07, "loss": 0.97658371925354, "step": 1, "token_acc": 0.752757254524039 }, { "epoch": 0.006093845216331505, "grad_norm": 4.02627916602829, "learning_rate": 2.0000000000000003e-06, "loss": 0.979477776421441, "step": 10, "token_acc": 0.7513394629500156 }, { "epoch": 0.01218769043266301, "grad_norm": 2.133243665962445, "learning_rate": 4.000000000000001e-06, "loss": 0.8085936546325684, "step": 20, "token_acc": 0.7841330046869243 }, { "epoch": 0.018281535648994516, "grad_norm": 1.6130698467539089, "learning_rate": 6e-06, "loss": 0.6815763473510742, "step": 30, "token_acc": 0.8088598674958689 }, { "epoch": 0.02437538086532602, "grad_norm": 1.5958993846193437, "learning_rate": 8.000000000000001e-06, "loss": 0.6228148937225342, "step": 40, "token_acc": 0.820709892041175 }, { "epoch": 0.030469226081657527, "grad_norm": 1.5835525047156958, "learning_rate": 1e-05, "loss": 0.5660243034362793, "step": 50, "token_acc": 0.8334044587872124 }, { "epoch": 0.03656307129798903, "grad_norm": 1.486912724454441, "learning_rate": 9.999025267866269e-06, "loss": 0.5425637722015381, "step": 60, "token_acc": 0.8377751665687463 }, { "epoch": 0.042656916514320534, "grad_norm": 1.5337888751937572, "learning_rate": 9.996101451506166e-06, "loss": 0.5277560710906982, "step": 70, "token_acc": 0.8436990187965481 }, { "epoch": 0.04875076173065204, "grad_norm": 1.3940886051095893, "learning_rate": 9.991229690894796e-06, "loss": 0.5164490699768066, "step": 80, "token_acc": 0.843745632858913 }, { "epoch": 0.054844606946983544, "grad_norm": 1.732808941800786, "learning_rate": 9.984411885496807e-06, "loss": 0.5111546516418457, "step": 90, "token_acc": 0.8453230842547292 }, { "epoch": 0.06093845216331505, "grad_norm": 1.502010177161109, "learning_rate": 9.975650693525798e-06, "loss": 0.5041120052337646, "step": 100, "token_acc": 0.8452364415692656 }, { "epoch": 0.06703229737964655, "grad_norm": 1.4601020474033113, "learning_rate": 9.964949530907907e-06, "loss": 0.5016684532165527, "step": 110, "token_acc": 0.8476517417815049 }, { "epoch": 0.07312614259597806, "grad_norm": 1.2952511202685608, "learning_rate": 9.952312569949963e-06, "loss": 0.481311559677124, "step": 120, "token_acc": 0.8533387907153323 }, { "epoch": 0.07921998781230957, "grad_norm": 1.4971652792627679, "learning_rate": 9.937744737712734e-06, "loss": 0.4774615287780762, "step": 130, "token_acc": 0.8534425009304056 }, { "epoch": 0.08531383302864107, "grad_norm": 1.3779208298780503, "learning_rate": 9.921251714089898e-06, "loss": 0.4781217575073242, "step": 140, "token_acc": 0.8540719832383131 }, { "epoch": 0.09140767824497258, "grad_norm": 1.2833076700573953, "learning_rate": 9.9028399295935e-06, "loss": 0.470335865020752, "step": 150, "token_acc": 0.855729364137813 }, { "epoch": 0.09750152346130408, "grad_norm": 1.275667403101274, "learning_rate": 9.882516562846735e-06, "loss": 0.4639917850494385, "step": 160, "token_acc": 0.856720295350119 }, { "epoch": 0.1035953686776356, "grad_norm": 1.3612747480512406, "learning_rate": 9.860289537785058e-06, "loss": 0.46750926971435547, "step": 170, "token_acc": 0.8558511446900368 }, { "epoch": 0.10968921389396709, "grad_norm": 1.415269189793894, "learning_rate": 9.83616752056669e-06, "loss": 0.4647522926330566, "step": 180, "token_acc": 0.8561545157582173 }, { "epoch": 0.1157830591102986, "grad_norm": 1.1892931159809241, "learning_rate": 9.810159916193763e-06, "loss": 0.45995321273803713, "step": 190, "token_acc": 0.8573448602405447 }, { "epoch": 0.1218769043266301, "grad_norm": 1.3064527572301536, "learning_rate": 9.782276864845351e-06, "loss": 0.4638189792633057, "step": 200, "token_acc": 0.8556243509610675 }, { "epoch": 0.12797074954296161, "grad_norm": 1.314500952457575, "learning_rate": 9.752529237923914e-06, "loss": 0.4490074634552002, "step": 210, "token_acc": 0.8597137978120026 }, { "epoch": 0.1340645947592931, "grad_norm": 1.1373343380715917, "learning_rate": 9.720928633816596e-06, "loss": 0.4501980781555176, "step": 220, "token_acc": 0.859648604947868 }, { "epoch": 0.14015843997562463, "grad_norm": 1.230125123804479, "learning_rate": 9.687487373373103e-06, "loss": 0.44935040473937987, "step": 230, "token_acc": 0.8588833095343918 }, { "epoch": 0.14625228519195613, "grad_norm": 1.6077603667743245, "learning_rate": 9.652218495101894e-06, "loss": 0.44729223251342776, "step": 240, "token_acc": 0.85995841942315 }, { "epoch": 0.15234613040828762, "grad_norm": 1.2688011877880512, "learning_rate": 9.61513575008656e-06, "loss": 0.43803844451904295, "step": 250, "token_acc": 0.8627407782309685 }, { "epoch": 0.15843997562461914, "grad_norm": 1.178112411608835, "learning_rate": 9.576253596624367e-06, "loss": 0.43675899505615234, "step": 260, "token_acc": 0.8637918234089942 }, { "epoch": 0.16453382084095064, "grad_norm": 1.294840110909924, "learning_rate": 9.53558719458908e-06, "loss": 0.4456604480743408, "step": 270, "token_acc": 0.8607995996575384 }, { "epoch": 0.17062766605728213, "grad_norm": 1.2275948762976965, "learning_rate": 9.49315239952023e-06, "loss": 0.44009056091308596, "step": 280, "token_acc": 0.8624380989923378 }, { "epoch": 0.17672151127361366, "grad_norm": 1.1835184130640346, "learning_rate": 9.448965756441154e-06, "loss": 0.43228535652160643, "step": 290, "token_acc": 0.8642425086011517 }, { "epoch": 0.18281535648994515, "grad_norm": 1.0506109771841785, "learning_rate": 9.403044493408205e-06, "loss": 0.4331789970397949, "step": 300, "token_acc": 0.8650773124725752 }, { "epoch": 0.18890920170627665, "grad_norm": 1.2538833747077607, "learning_rate": 9.355406514793667e-06, "loss": 0.44378862380981443, "step": 310, "token_acc": 0.8612388746191983 }, { "epoch": 0.19500304692260817, "grad_norm": 1.1649745576637627, "learning_rate": 9.306070394304955e-06, "loss": 0.4216612339019775, "step": 320, "token_acc": 0.8665872154728236 }, { "epoch": 0.20109689213893966, "grad_norm": 1.1640287810040342, "learning_rate": 9.255055367742868e-06, "loss": 0.43276224136352537, "step": 330, "token_acc": 0.864183550146075 }, { "epoch": 0.2071907373552712, "grad_norm": 1.1205258611684763, "learning_rate": 9.202381325501683e-06, "loss": 0.42910175323486327, "step": 340, "token_acc": 0.8651370039640893 }, { "epoch": 0.21328458257160268, "grad_norm": 1.1044569461174318, "learning_rate": 9.148068804814032e-06, "loss": 0.425107479095459, "step": 350, "token_acc": 0.8655852823220787 }, { "epoch": 0.21937842778793418, "grad_norm": 1.256804160100686, "learning_rate": 9.092138981743588e-06, "loss": 0.4197092533111572, "step": 360, "token_acc": 0.8678029564108461 }, { "epoch": 0.2254722730042657, "grad_norm": 1.219596805077906, "learning_rate": 9.034613662928665e-06, "loss": 0.4218160629272461, "step": 370, "token_acc": 0.8669598748703018 }, { "epoch": 0.2315661182205972, "grad_norm": 1.0651000148469036, "learning_rate": 8.975515277079961e-06, "loss": 0.4222999095916748, "step": 380, "token_acc": 0.8668562219942147 }, { "epoch": 0.2376599634369287, "grad_norm": 1.1478789808745513, "learning_rate": 8.91486686623577e-06, "loss": 0.41972966194152833, "step": 390, "token_acc": 0.8667315262188772 }, { "epoch": 0.2437538086532602, "grad_norm": 1.0060533858058822, "learning_rate": 8.85269207677806e-06, "loss": 0.4143358707427979, "step": 400, "token_acc": 0.8689943563130941 }, { "epoch": 0.2498476538695917, "grad_norm": 1.2219261737292129, "learning_rate": 8.789015150212907e-06, "loss": 0.41486186981201173, "step": 410, "token_acc": 0.867653374528066 }, { "epoch": 0.25594149908592323, "grad_norm": 1.2842286146778168, "learning_rate": 8.72386091371891e-06, "loss": 0.4264723777770996, "step": 420, "token_acc": 0.865345114787771 }, { "epoch": 0.2620353443022547, "grad_norm": 1.0550559155752623, "learning_rate": 8.657254770467252e-06, "loss": 0.40860881805419924, "step": 430, "token_acc": 0.8694270527928576 }, { "epoch": 0.2681291895185862, "grad_norm": 1.1246909396790437, "learning_rate": 8.58922268971719e-06, "loss": 0.4148720264434814, "step": 440, "token_acc": 0.86849521403236 }, { "epoch": 0.2742230347349177, "grad_norm": 1.1877909840033853, "learning_rate": 8.51979119669081e-06, "loss": 0.4155715465545654, "step": 450, "token_acc": 0.8686248236499153 }, { "epoch": 0.28031687995124926, "grad_norm": 1.03946599413896, "learning_rate": 8.448987362231054e-06, "loss": 0.4156056880950928, "step": 460, "token_acc": 0.8682606492506055 }, { "epoch": 0.28641072516758076, "grad_norm": 1.1045440790462375, "learning_rate": 8.376838792246978e-06, "loss": 0.41259098052978516, "step": 470, "token_acc": 0.868615067345492 }, { "epoch": 0.29250457038391225, "grad_norm": 1.1044055109636997, "learning_rate": 8.303373616950408e-06, "loss": 0.41626744270324706, "step": 480, "token_acc": 0.867445116993405 }, { "epoch": 0.29859841560024375, "grad_norm": 1.0612884186160958, "learning_rate": 8.228620479888172e-06, "loss": 0.4087618350982666, "step": 490, "token_acc": 0.869433255622514 }, { "epoch": 0.30469226081657524, "grad_norm": 1.079879116921211, "learning_rate": 8.152608526774188e-06, "loss": 0.40863656997680664, "step": 500, "token_acc": 0.8705444341829626 }, { "epoch": 0.31078610603290674, "grad_norm": 1.0470334273877924, "learning_rate": 8.075367394125755e-06, "loss": 0.41130657196044923, "step": 510, "token_acc": 0.8699947913802195 }, { "epoch": 0.3168799512492383, "grad_norm": 1.2778777056879977, "learning_rate": 7.996927197708486e-06, "loss": 0.4074504852294922, "step": 520, "token_acc": 0.8711178129454153 }, { "epoch": 0.3229737964655698, "grad_norm": 1.133795250933889, "learning_rate": 7.917318520794395e-06, "loss": 0.4040180206298828, "step": 530, "token_acc": 0.8719991647774729 }, { "epoch": 0.3290676416819013, "grad_norm": 1.1320221274981666, "learning_rate": 7.836572402237683e-06, "loss": 0.4074112892150879, "step": 540, "token_acc": 0.8696679374619692 }, { "epoch": 0.3351614868982328, "grad_norm": 1.0153565229717176, "learning_rate": 7.754720324372924e-06, "loss": 0.4030743598937988, "step": 550, "token_acc": 0.8720831783254012 }, { "epoch": 0.34125533211456427, "grad_norm": 1.0985579621580885, "learning_rate": 7.67179420074032e-06, "loss": 0.3988363742828369, "step": 560, "token_acc": 0.8726780258889484 }, { "epoch": 0.3473491773308958, "grad_norm": 1.0584699143582574, "learning_rate": 7.587826363642845e-06, "loss": 0.4028042793273926, "step": 570, "token_acc": 0.8709437860238254 }, { "epoch": 0.3534430225472273, "grad_norm": 1.1632651891282637, "learning_rate": 7.502849551540106e-06, "loss": 0.3974143028259277, "step": 580, "token_acc": 0.8732772418431721 }, { "epoch": 0.3595368677635588, "grad_norm": 0.9585380945132779, "learning_rate": 7.4168968962838524e-06, "loss": 0.40021185874938964, "step": 590, "token_acc": 0.8715715660830257 }, { "epoch": 0.3656307129798903, "grad_norm": 0.939779800665415, "learning_rate": 7.330001910200111e-06, "loss": 0.39843976497650146, "step": 600, "token_acc": 0.8733910783350537 }, { "epoch": 0.3717245581962218, "grad_norm": 0.9815164073943617, "learning_rate": 7.242198473022958e-06, "loss": 0.3972899913787842, "step": 610, "token_acc": 0.8731910420095998 }, { "epoch": 0.3778184034125533, "grad_norm": 1.0569386302509218, "learning_rate": 7.15352081868506e-06, "loss": 0.4026960372924805, "step": 620, "token_acc": 0.8716591305210795 }, { "epoch": 0.38391224862888484, "grad_norm": 1.0897077358900225, "learning_rate": 7.0640035219701085e-06, "loss": 0.39238433837890624, "step": 630, "token_acc": 0.8741110700683207 }, { "epoch": 0.39000609384521634, "grad_norm": 1.0094259905078886, "learning_rate": 6.973681485032359e-06, "loss": 0.3934662342071533, "step": 640, "token_acc": 0.874180305698641 }, { "epoch": 0.39609993906154783, "grad_norm": 0.9880095870102604, "learning_rate": 6.8825899237885215e-06, "loss": 0.3929059743881226, "step": 650, "token_acc": 0.873847849697677 }, { "epoch": 0.40219378427787933, "grad_norm": 0.9583618057687778, "learning_rate": 6.7907643541873446e-06, "loss": 0.38638834953308104, "step": 660, "token_acc": 0.8764517709444076 }, { "epoch": 0.4082876294942108, "grad_norm": 1.1091462631909463, "learning_rate": 6.698240578362179e-06, "loss": 0.3935162782669067, "step": 670, "token_acc": 0.8743182876186542 }, { "epoch": 0.4143814747105424, "grad_norm": 0.959273015275344, "learning_rate": 6.6050546706719984e-06, "loss": 0.38172011375427245, "step": 680, "token_acc": 0.8772576395099669 }, { "epoch": 0.42047531992687387, "grad_norm": 1.0010757728338364, "learning_rate": 6.511242963636257e-06, "loss": 0.3927836179733276, "step": 690, "token_acc": 0.8740263817041508 }, { "epoch": 0.42656916514320536, "grad_norm": 1.045230237684538, "learning_rate": 6.416842033769106e-06, "loss": 0.38949809074401853, "step": 700, "token_acc": 0.8748742675586352 }, { "epoch": 0.43266301035953686, "grad_norm": 0.9849032327305663, "learning_rate": 6.321888687318457e-06, "loss": 0.39299988746643066, "step": 710, "token_acc": 0.8744398373706392 }, { "epoch": 0.43875685557586835, "grad_norm": 0.9773426657855283, "learning_rate": 6.2264199459155105e-06, "loss": 0.38987624645233154, "step": 720, "token_acc": 0.8749521585172907 }, { "epoch": 0.4448507007921999, "grad_norm": 1.037517468712357, "learning_rate": 6.130473032140272e-06, "loss": 0.38550682067871095, "step": 730, "token_acc": 0.8752092114104209 }, { "epoch": 0.4509445460085314, "grad_norm": 1.0310013780608072, "learning_rate": 6.0340853550087345e-06, "loss": 0.378936243057251, "step": 740, "token_acc": 0.878043851367452 }, { "epoch": 0.4570383912248629, "grad_norm": 0.8055934899750623, "learning_rate": 5.937294495387377e-06, "loss": 0.38777313232421873, "step": 750, "token_acc": 0.8762303990063655 }, { "epoch": 0.4631322364411944, "grad_norm": 1.0076731680308868, "learning_rate": 5.840138191340651e-06, "loss": 0.3867051601409912, "step": 760, "token_acc": 0.875447200037364 }, { "epoch": 0.4692260816575259, "grad_norm": 0.9392775195574543, "learning_rate": 5.7426543234171736e-06, "loss": 0.3799318552017212, "step": 770, "token_acc": 0.8780739671196323 }, { "epoch": 0.4753199268738574, "grad_norm": 0.9059297874010275, "learning_rate": 5.644880899880382e-06, "loss": 0.38845138549804686, "step": 780, "token_acc": 0.8756513846485855 }, { "epoch": 0.48141377209018893, "grad_norm": 1.0364591251718924, "learning_rate": 5.546856041889374e-06, "loss": 0.384658670425415, "step": 790, "token_acc": 0.8760285406658391 }, { "epoch": 0.4875076173065204, "grad_norm": 0.9573686942596932, "learning_rate": 5.448617968635741e-06, "loss": 0.3791942596435547, "step": 800, "token_acc": 0.8779162415307187 }, { "epoch": 0.4936014625228519, "grad_norm": 0.9636242802763855, "learning_rate": 5.35020498244219e-06, "loss": 0.37176291942596434, "step": 810, "token_acc": 0.8793090876456928 }, { "epoch": 0.4996953077391834, "grad_norm": 1.037660587481492, "learning_rate": 5.251655453828728e-06, "loss": 0.37394251823425295, "step": 820, "token_acc": 0.8786210190654307 }, { "epoch": 0.505789152955515, "grad_norm": 1.0719330406024963, "learning_rate": 5.153007806552275e-06, "loss": 0.3745760679244995, "step": 830, "token_acc": 0.8784241641412887 }, { "epoch": 0.5118829981718465, "grad_norm": 0.8899515496236061, "learning_rate": 5.054300502625517e-06, "loss": 0.3706503868103027, "step": 840, "token_acc": 0.8798184912767585 }, { "epoch": 0.517976843388178, "grad_norm": 0.9136772226114551, "learning_rate": 4.9555720273208475e-06, "loss": 0.3767611742019653, "step": 850, "token_acc": 0.8780427238279765 }, { "epoch": 0.5240706886045094, "grad_norm": 0.9760538746168989, "learning_rate": 4.856860874165218e-06, "loss": 0.37979438304901125, "step": 860, "token_acc": 0.8784071947906439 }, { "epoch": 0.5301645338208409, "grad_norm": 0.9424993647974058, "learning_rate": 4.758205529931808e-06, "loss": 0.3839302062988281, "step": 870, "token_acc": 0.8770481761661205 }, { "epoch": 0.5362583790371724, "grad_norm": 1.0293112779306877, "learning_rate": 4.659644459634293e-06, "loss": 0.3767723321914673, "step": 880, "token_acc": 0.8782181679486365 }, { "epoch": 0.5423522242535039, "grad_norm": 1.0743397927299763, "learning_rate": 4.56121609152961e-06, "loss": 0.3791919946670532, "step": 890, "token_acc": 0.8769342677312787 }, { "epoch": 0.5484460694698354, "grad_norm": 0.8651643017417293, "learning_rate": 4.462958802135069e-06, "loss": 0.36331801414489745, "step": 900, "token_acc": 0.8819762679763837 }, { "epoch": 0.5545399146861669, "grad_norm": 0.9197439306994798, "learning_rate": 4.364910901265607e-06, "loss": 0.3720353603363037, "step": 910, "token_acc": 0.8795370329732339 }, { "epoch": 0.5606337599024985, "grad_norm": 0.9973864478854872, "learning_rate": 4.2671106170970734e-06, "loss": 0.37818198204040526, "step": 920, "token_acc": 0.8787091854009224 }, { "epoch": 0.56672760511883, "grad_norm": 0.9979320322546561, "learning_rate": 4.169596081261332e-06, "loss": 0.368232798576355, "step": 930, "token_acc": 0.8808049967885766 }, { "epoch": 0.5728214503351615, "grad_norm": 0.9817455772913783, "learning_rate": 4.072405313979021e-06, "loss": 0.37091827392578125, "step": 940, "token_acc": 0.8796466097957818 }, { "epoch": 0.578915295551493, "grad_norm": 1.0935297334377472, "learning_rate": 3.975576209235726e-06, "loss": 0.3674028396606445, "step": 950, "token_acc": 0.8807917695163083 }, { "epoch": 0.5850091407678245, "grad_norm": 0.9835469765967159, "learning_rate": 3.879146520007399e-06, "loss": 0.3728478908538818, "step": 960, "token_acc": 0.8795413152600885 }, { "epoch": 0.591102985984156, "grad_norm": 0.9625183356689964, "learning_rate": 3.7831538435407344e-06, "loss": 0.37494525909423826, "step": 970, "token_acc": 0.8792245580635571 }, { "epoch": 0.5971968312004875, "grad_norm": 0.9012795424730173, "learning_rate": 3.687635606694271e-06, "loss": 0.3702352046966553, "step": 980, "token_acc": 0.8801223453080008 }, { "epoch": 0.603290676416819, "grad_norm": 0.9782757486531443, "learning_rate": 3.592629051345936e-06, "loss": 0.3673159837722778, "step": 990, "token_acc": 0.8810825035648933 }, { "epoch": 0.6093845216331505, "grad_norm": 1.0059100640922563, "learning_rate": 3.4981712198726956e-06, "loss": 0.3642214059829712, "step": 1000, "token_acc": 0.8818312088488447 }, { "epoch": 0.615478366849482, "grad_norm": 0.9395189399708234, "learning_rate": 3.4042989407079986e-06, "loss": 0.3784639358520508, "step": 1010, "token_acc": 0.8780194366406157 }, { "epoch": 0.6215722120658135, "grad_norm": 1.0425592930772825, "learning_rate": 3.311048813982627e-06, "loss": 0.36695384979248047, "step": 1020, "token_acc": 0.8809777292779815 }, { "epoch": 0.6276660572821451, "grad_norm": 0.9146308056797927, "learning_rate": 3.218457197254583e-06, "loss": 0.36698212623596194, "step": 1030, "token_acc": 0.8810339710207495 }, { "epoch": 0.6337599024984766, "grad_norm": 0.976263078958663, "learning_rate": 3.1265601913335196e-06, "loss": 0.365465784072876, "step": 1040, "token_acc": 0.8814162812670944 }, { "epoch": 0.6398537477148081, "grad_norm": 1.0567379406046713, "learning_rate": 3.035393626205306e-06, "loss": 0.3610874891281128, "step": 1050, "token_acc": 0.8824792140002385 }, { "epoch": 0.6459475929311396, "grad_norm": 1.0205537815943757, "learning_rate": 2.944993047062161e-06, "loss": 0.35759830474853516, "step": 1060, "token_acc": 0.8834624031976018 }, { "epoch": 0.6520414381474711, "grad_norm": 1.0280714401242652, "learning_rate": 2.8553937004438425e-06, "loss": 0.3574142217636108, "step": 1070, "token_acc": 0.884169503378651 }, { "epoch": 0.6581352833638026, "grad_norm": 1.0187298702407688, "learning_rate": 2.766630520495277e-06, "loss": 0.36029987335205077, "step": 1080, "token_acc": 0.8823869756562952 }, { "epoch": 0.664229128580134, "grad_norm": 0.9191494153561297, "learning_rate": 2.67873811534598e-06, "loss": 0.35897092819213866, "step": 1090, "token_acc": 0.8827260508533868 }, { "epoch": 0.6703229737964655, "grad_norm": 0.9492740813391064, "learning_rate": 2.591750753616596e-06, "loss": 0.36168532371520995, "step": 1100, "token_acc": 0.8825941425209475 }, { "epoch": 0.676416819012797, "grad_norm": 0.9644543574186545, "learning_rate": 2.505702351057804e-06, "loss": 0.3665107488632202, "step": 1110, "token_acc": 0.8816928952036972 }, { "epoch": 0.6825106642291285, "grad_norm": 0.9521683470371731, "learning_rate": 2.4206264573268174e-06, "loss": 0.35790448188781737, "step": 1120, "token_acc": 0.8832886728694526 }, { "epoch": 0.68860450944546, "grad_norm": 1.0783164983743936, "learning_rate": 2.336556242906608e-06, "loss": 0.3561516284942627, "step": 1130, "token_acc": 0.8839432945670233 }, { "epoch": 0.6946983546617916, "grad_norm": 0.9994299291097577, "learning_rate": 2.2535244861729707e-06, "loss": 0.3557067632675171, "step": 1140, "token_acc": 0.8837923958883728 }, { "epoch": 0.7007921998781231, "grad_norm": 1.039214819811771, "learning_rate": 2.1715635606144653e-06, "loss": 0.3563429832458496, "step": 1150, "token_acc": 0.8836427544336156 }, { "epoch": 0.7068860450944546, "grad_norm": 0.8549094000634878, "learning_rate": 2.0907054222102367e-06, "loss": 0.35337374210357664, "step": 1160, "token_acc": 0.8852147256677358 }, { "epoch": 0.7129798903107861, "grad_norm": 0.894156191232295, "learning_rate": 2.0109815969705922e-06, "loss": 0.359290337562561, "step": 1170, "token_acc": 0.8828725266946272 }, { "epoch": 0.7190737355271176, "grad_norm": 0.8673526133846996, "learning_rate": 1.9324231686452478e-06, "loss": 0.35991313457489016, "step": 1180, "token_acc": 0.8837700799671174 }, { "epoch": 0.7251675807434491, "grad_norm": 0.9356232121590031, "learning_rate": 1.8550607666039877e-06, "loss": 0.3538203716278076, "step": 1190, "token_acc": 0.8850202284200351 }, { "epoch": 0.7312614259597806, "grad_norm": 1.0163312252270116, "learning_rate": 1.7789245538944971e-06, "loss": 0.3607466459274292, "step": 1200, "token_acc": 0.8824661130842316 }, { "epoch": 0.7373552711761121, "grad_norm": 0.8390316456040804, "learning_rate": 1.7040442154820036e-06, "loss": 0.35505869388580324, "step": 1210, "token_acc": 0.8845901901507859 }, { "epoch": 0.7434491163924436, "grad_norm": 0.921086850463397, "learning_rate": 1.6304489466753237e-06, "loss": 0.35682291984558107, "step": 1220, "token_acc": 0.884017590582417 }, { "epoch": 0.7495429616087751, "grad_norm": 0.8352814372993298, "learning_rate": 1.5581674417438143e-06, "loss": 0.3599454164505005, "step": 1230, "token_acc": 0.8830610223076613 }, { "epoch": 0.7556368068251066, "grad_norm": 0.9561368940432438, "learning_rate": 1.4872278827296855e-06, "loss": 0.3544511079788208, "step": 1240, "token_acc": 0.884971241183666 }, { "epoch": 0.7617306520414382, "grad_norm": 0.9963256225377098, "learning_rate": 1.417657928460029e-06, "loss": 0.35143122673034666, "step": 1250, "token_acc": 0.8854597977852672 }, { "epoch": 0.7678244972577697, "grad_norm": 1.0464860200353496, "learning_rate": 1.349484703762834e-06, "loss": 0.3545159101486206, "step": 1260, "token_acc": 0.8848001191868091 }, { "epoch": 0.7739183424741012, "grad_norm": 0.9553675018651967, "learning_rate": 1.2827347888912057e-06, "loss": 0.3540821552276611, "step": 1270, "token_acc": 0.8845431750704823 }, { "epoch": 0.7800121876904327, "grad_norm": 0.9171124221466627, "learning_rate": 1.2174342091599277e-06, "loss": 0.3459270477294922, "step": 1280, "token_acc": 0.8876378370255273 }, { "epoch": 0.7861060329067642, "grad_norm": 0.9897434740336704, "learning_rate": 1.1536084247983626e-06, "loss": 0.3577150821685791, "step": 1290, "token_acc": 0.8842498302783435 }, { "epoch": 0.7921998781230957, "grad_norm": 0.88979092902762, "learning_rate": 1.0912823210237033e-06, "loss": 0.350811505317688, "step": 1300, "token_acc": 0.8856008373344852 }, { "epoch": 0.7982937233394272, "grad_norm": 0.9287859784083828, "learning_rate": 1.0304801983383989e-06, "loss": 0.3551754951477051, "step": 1310, "token_acc": 0.8848410538592661 }, { "epoch": 0.8043875685557587, "grad_norm": 0.8802985747226686, "learning_rate": 9.712257630555589e-07, "loss": 0.35124433040618896, "step": 1320, "token_acc": 0.8857088187898194 }, { "epoch": 0.8104814137720902, "grad_norm": 0.9867993671885138, "learning_rate": 9.135421180560394e-07, "loss": 0.3533953666687012, "step": 1330, "token_acc": 0.8847630099080603 }, { "epoch": 0.8165752589884216, "grad_norm": 0.926446790364043, "learning_rate": 8.574517537807897e-07, "loss": 0.345960807800293, "step": 1340, "token_acc": 0.8876687663254338 }, { "epoch": 0.8226691042047533, "grad_norm": 0.9083144656784397, "learning_rate": 8.029765394619899e-07, "loss": 0.35233092308044434, "step": 1350, "token_acc": 0.8852270821778219 }, { "epoch": 0.8287629494210847, "grad_norm": 0.8865701179019319, "learning_rate": 7.501377145963939e-07, "loss": 0.35347394943237304, "step": 1360, "token_acc": 0.8848507491917527 }, { "epoch": 0.8348567946374162, "grad_norm": 0.8797844443235806, "learning_rate": 6.98955880664205e-07, "loss": 0.35233142375946047, "step": 1370, "token_acc": 0.8857494626572902 }, { "epoch": 0.8409506398537477, "grad_norm": 0.985930499180468, "learning_rate": 6.494509930967019e-07, "loss": 0.3484508991241455, "step": 1380, "token_acc": 0.8862226663569039 }, { "epoch": 0.8470444850700792, "grad_norm": 0.8385926015490823, "learning_rate": 6.016423534957616e-07, "loss": 0.34513344764709475, "step": 1390, "token_acc": 0.88766630420385 }, { "epoch": 0.8531383302864107, "grad_norm": 0.9469060182104153, "learning_rate": 5.555486021082979e-07, "loss": 0.3453853130340576, "step": 1400, "token_acc": 0.8872980190401473 }, { "epoch": 0.8592321755027422, "grad_norm": 1.0619116209691746, "learning_rate": 5.111877105585672e-07, "loss": 0.35715694427490235, "step": 1410, "token_acc": 0.8840584828365589 }, { "epoch": 0.8653260207190737, "grad_norm": 0.9990471419637711, "learning_rate": 4.6857697484116006e-07, "loss": 0.34844698905944826, "step": 1420, "token_acc": 0.8861595746957418 }, { "epoch": 0.8714198659354052, "grad_norm": 0.8653174829680845, "learning_rate": 4.277330085774156e-07, "loss": 0.34473817348480223, "step": 1430, "token_acc": 0.8869124712097335 }, { "epoch": 0.8775137111517367, "grad_norm": 0.978323586867761, "learning_rate": 3.886717365378867e-07, "loss": 0.3523882865905762, "step": 1440, "token_acc": 0.8849034480348013 }, { "epoch": 0.8836075563680682, "grad_norm": 1.0140389777919647, "learning_rate": 3.5140838843339073e-07, "loss": 0.3476292848587036, "step": 1450, "token_acc": 0.8866329934005767 }, { "epoch": 0.8897014015843998, "grad_norm": 1.0064657138214737, "learning_rate": 3.159574929770515e-07, "loss": 0.35365211963653564, "step": 1460, "token_acc": 0.8852465385385505 }, { "epoch": 0.8957952468007313, "grad_norm": 0.9324871915195588, "learning_rate": 2.8233287221965555e-07, "loss": 0.3441819190979004, "step": 1470, "token_acc": 0.8871095878318941 }, { "epoch": 0.9018890920170628, "grad_norm": 0.9055988245681192, "learning_rate": 2.5054763616053967e-07, "loss": 0.34738845825195314, "step": 1480, "token_acc": 0.8870410481583068 }, { "epoch": 0.9079829372333943, "grad_norm": 0.8845337059700371, "learning_rate": 2.2061417763608818e-07, "loss": 0.3496507167816162, "step": 1490, "token_acc": 0.8858089991712572 }, { "epoch": 0.9140767824497258, "grad_norm": 0.8884170981985747, "learning_rate": 1.9254416748786086e-07, "loss": 0.34417023658752444, "step": 1500, "token_acc": 0.8876897324425693 }, { "epoch": 0.9201706276660573, "grad_norm": 0.991921395955364, "learning_rate": 1.6634855001221195e-07, "loss": 0.3475677490234375, "step": 1510, "token_acc": 0.8866243585461391 }, { "epoch": 0.9262644728823888, "grad_norm": 0.8822494961130495, "learning_rate": 1.4203753869318882e-07, "loss": 0.35834810733795164, "step": 1520, "token_acc": 0.8836910930175179 }, { "epoch": 0.9323583180987203, "grad_norm": 1.0007870631310825, "learning_rate": 1.196206122203647e-07, "loss": 0.3498887777328491, "step": 1530, "token_acc": 0.8859136668935295 }, { "epoch": 0.9384521633150518, "grad_norm": 0.9115641715955437, "learning_rate": 9.910651079316824e-08, "loss": 0.3380606651306152, "step": 1540, "token_acc": 0.8888504997761748 }, { "epoch": 0.9445460085313833, "grad_norm": 0.9336474945041258, "learning_rate": 8.050323271314331e-08, "loss": 0.34683611392974856, "step": 1550, "token_acc": 0.8867626671565236 }, { "epoch": 0.9506398537477148, "grad_norm": 0.9153041920993996, "learning_rate": 6.381803126546405e-08, "loss": 0.3438985824584961, "step": 1560, "token_acc": 0.8876278171714178 }, { "epoch": 0.9567336989640464, "grad_norm": 0.8723491469657118, "learning_rate": 4.9057411890933714e-08, "loss": 0.35089046955108644, "step": 1570, "token_acc": 0.8854520115332541 }, { "epoch": 0.9628275441803779, "grad_norm": 0.8955372459045878, "learning_rate": 3.622712964956032e-08, "loss": 0.34657576084136965, "step": 1580, "token_acc": 0.8870879211520062 }, { "epoch": 0.9689213893967094, "grad_norm": 0.9416702515233623, "learning_rate": 2.5332186976697037e-08, "loss": 0.35133283138275145, "step": 1590, "token_acc": 0.8860156117328086 }, { "epoch": 0.9750152346130408, "grad_norm": 0.8711895076149625, "learning_rate": 1.637683173263238e-08, "loss": 0.35227146148681643, "step": 1600, "token_acc": 0.8855705009128142 }, { "epoch": 0.9811090798293723, "grad_norm": 0.983729311544991, "learning_rate": 9.364555546375054e-09, "loss": 0.34629082679748535, "step": 1610, "token_acc": 0.8869587094319709 }, { "epoch": 0.9872029250457038, "grad_norm": 0.94198406227018, "learning_rate": 4.2980924542984634e-09, "loss": 0.3403524875640869, "step": 1620, "token_acc": 0.8887918722020187 }, { "epoch": 0.9932967702620353, "grad_norm": 0.8500481071531769, "learning_rate": 1.179417834153429e-09, "loss": 0.3546321868896484, "step": 1630, "token_acc": 0.8848728077900511 }, { "epoch": 0.9993906154783668, "grad_norm": 0.9946340081463461, "learning_rate": 9.74763488759134e-12, "loss": 0.35070624351501467, "step": 1640, "token_acc": 0.8863686895606487 } ], "logging_steps": 10, "max_steps": 1641, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2214001985716224.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }