AnyAudio-Judge-7B / trainer_state.json
cucl2's picture
Add files using upload-large-folder tool
e9d6389 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 200.0,
"global_step": 1641,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0006093845216331506,
"grad_norm": 5.139511599456721,
"learning_rate": 2.0000000000000002e-07,
"loss": 0.97658371925354,
"step": 1,
"token_acc": 0.752757254524039
},
{
"epoch": 0.006093845216331505,
"grad_norm": 4.02627916602829,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.979477776421441,
"step": 10,
"token_acc": 0.7513394629500156
},
{
"epoch": 0.01218769043266301,
"grad_norm": 2.133243665962445,
"learning_rate": 4.000000000000001e-06,
"loss": 0.8085936546325684,
"step": 20,
"token_acc": 0.7841330046869243
},
{
"epoch": 0.018281535648994516,
"grad_norm": 1.6130698467539089,
"learning_rate": 6e-06,
"loss": 0.6815763473510742,
"step": 30,
"token_acc": 0.8088598674958689
},
{
"epoch": 0.02437538086532602,
"grad_norm": 1.5958993846193437,
"learning_rate": 8.000000000000001e-06,
"loss": 0.6228148937225342,
"step": 40,
"token_acc": 0.820709892041175
},
{
"epoch": 0.030469226081657527,
"grad_norm": 1.5835525047156958,
"learning_rate": 1e-05,
"loss": 0.5660243034362793,
"step": 50,
"token_acc": 0.8334044587872124
},
{
"epoch": 0.03656307129798903,
"grad_norm": 1.486912724454441,
"learning_rate": 9.999025267866269e-06,
"loss": 0.5425637722015381,
"step": 60,
"token_acc": 0.8377751665687463
},
{
"epoch": 0.042656916514320534,
"grad_norm": 1.5337888751937572,
"learning_rate": 9.996101451506166e-06,
"loss": 0.5277560710906982,
"step": 70,
"token_acc": 0.8436990187965481
},
{
"epoch": 0.04875076173065204,
"grad_norm": 1.3940886051095893,
"learning_rate": 9.991229690894796e-06,
"loss": 0.5164490699768066,
"step": 80,
"token_acc": 0.843745632858913
},
{
"epoch": 0.054844606946983544,
"grad_norm": 1.732808941800786,
"learning_rate": 9.984411885496807e-06,
"loss": 0.5111546516418457,
"step": 90,
"token_acc": 0.8453230842547292
},
{
"epoch": 0.06093845216331505,
"grad_norm": 1.502010177161109,
"learning_rate": 9.975650693525798e-06,
"loss": 0.5041120052337646,
"step": 100,
"token_acc": 0.8452364415692656
},
{
"epoch": 0.06703229737964655,
"grad_norm": 1.4601020474033113,
"learning_rate": 9.964949530907907e-06,
"loss": 0.5016684532165527,
"step": 110,
"token_acc": 0.8476517417815049
},
{
"epoch": 0.07312614259597806,
"grad_norm": 1.2952511202685608,
"learning_rate": 9.952312569949963e-06,
"loss": 0.481311559677124,
"step": 120,
"token_acc": 0.8533387907153323
},
{
"epoch": 0.07921998781230957,
"grad_norm": 1.4971652792627679,
"learning_rate": 9.937744737712734e-06,
"loss": 0.4774615287780762,
"step": 130,
"token_acc": 0.8534425009304056
},
{
"epoch": 0.08531383302864107,
"grad_norm": 1.3779208298780503,
"learning_rate": 9.921251714089898e-06,
"loss": 0.4781217575073242,
"step": 140,
"token_acc": 0.8540719832383131
},
{
"epoch": 0.09140767824497258,
"grad_norm": 1.2833076700573953,
"learning_rate": 9.9028399295935e-06,
"loss": 0.470335865020752,
"step": 150,
"token_acc": 0.855729364137813
},
{
"epoch": 0.09750152346130408,
"grad_norm": 1.275667403101274,
"learning_rate": 9.882516562846735e-06,
"loss": 0.4639917850494385,
"step": 160,
"token_acc": 0.856720295350119
},
{
"epoch": 0.1035953686776356,
"grad_norm": 1.3612747480512406,
"learning_rate": 9.860289537785058e-06,
"loss": 0.46750926971435547,
"step": 170,
"token_acc": 0.8558511446900368
},
{
"epoch": 0.10968921389396709,
"grad_norm": 1.415269189793894,
"learning_rate": 9.83616752056669e-06,
"loss": 0.4647522926330566,
"step": 180,
"token_acc": 0.8561545157582173
},
{
"epoch": 0.1157830591102986,
"grad_norm": 1.1892931159809241,
"learning_rate": 9.810159916193763e-06,
"loss": 0.45995321273803713,
"step": 190,
"token_acc": 0.8573448602405447
},
{
"epoch": 0.1218769043266301,
"grad_norm": 1.3064527572301536,
"learning_rate": 9.782276864845351e-06,
"loss": 0.4638189792633057,
"step": 200,
"token_acc": 0.8556243509610675
},
{
"epoch": 0.12797074954296161,
"grad_norm": 1.314500952457575,
"learning_rate": 9.752529237923914e-06,
"loss": 0.4490074634552002,
"step": 210,
"token_acc": 0.8597137978120026
},
{
"epoch": 0.1340645947592931,
"grad_norm": 1.1373343380715917,
"learning_rate": 9.720928633816596e-06,
"loss": 0.4501980781555176,
"step": 220,
"token_acc": 0.859648604947868
},
{
"epoch": 0.14015843997562463,
"grad_norm": 1.230125123804479,
"learning_rate": 9.687487373373103e-06,
"loss": 0.44935040473937987,
"step": 230,
"token_acc": 0.8588833095343918
},
{
"epoch": 0.14625228519195613,
"grad_norm": 1.6077603667743245,
"learning_rate": 9.652218495101894e-06,
"loss": 0.44729223251342776,
"step": 240,
"token_acc": 0.85995841942315
},
{
"epoch": 0.15234613040828762,
"grad_norm": 1.2688011877880512,
"learning_rate": 9.61513575008656e-06,
"loss": 0.43803844451904295,
"step": 250,
"token_acc": 0.8627407782309685
},
{
"epoch": 0.15843997562461914,
"grad_norm": 1.178112411608835,
"learning_rate": 9.576253596624367e-06,
"loss": 0.43675899505615234,
"step": 260,
"token_acc": 0.8637918234089942
},
{
"epoch": 0.16453382084095064,
"grad_norm": 1.294840110909924,
"learning_rate": 9.53558719458908e-06,
"loss": 0.4456604480743408,
"step": 270,
"token_acc": 0.8607995996575384
},
{
"epoch": 0.17062766605728213,
"grad_norm": 1.2275948762976965,
"learning_rate": 9.49315239952023e-06,
"loss": 0.44009056091308596,
"step": 280,
"token_acc": 0.8624380989923378
},
{
"epoch": 0.17672151127361366,
"grad_norm": 1.1835184130640346,
"learning_rate": 9.448965756441154e-06,
"loss": 0.43228535652160643,
"step": 290,
"token_acc": 0.8642425086011517
},
{
"epoch": 0.18281535648994515,
"grad_norm": 1.0506109771841785,
"learning_rate": 9.403044493408205e-06,
"loss": 0.4331789970397949,
"step": 300,
"token_acc": 0.8650773124725752
},
{
"epoch": 0.18890920170627665,
"grad_norm": 1.2538833747077607,
"learning_rate": 9.355406514793667e-06,
"loss": 0.44378862380981443,
"step": 310,
"token_acc": 0.8612388746191983
},
{
"epoch": 0.19500304692260817,
"grad_norm": 1.1649745576637627,
"learning_rate": 9.306070394304955e-06,
"loss": 0.4216612339019775,
"step": 320,
"token_acc": 0.8665872154728236
},
{
"epoch": 0.20109689213893966,
"grad_norm": 1.1640287810040342,
"learning_rate": 9.255055367742868e-06,
"loss": 0.43276224136352537,
"step": 330,
"token_acc": 0.864183550146075
},
{
"epoch": 0.2071907373552712,
"grad_norm": 1.1205258611684763,
"learning_rate": 9.202381325501683e-06,
"loss": 0.42910175323486327,
"step": 340,
"token_acc": 0.8651370039640893
},
{
"epoch": 0.21328458257160268,
"grad_norm": 1.1044569461174318,
"learning_rate": 9.148068804814032e-06,
"loss": 0.425107479095459,
"step": 350,
"token_acc": 0.8655852823220787
},
{
"epoch": 0.21937842778793418,
"grad_norm": 1.256804160100686,
"learning_rate": 9.092138981743588e-06,
"loss": 0.4197092533111572,
"step": 360,
"token_acc": 0.8678029564108461
},
{
"epoch": 0.2254722730042657,
"grad_norm": 1.219596805077906,
"learning_rate": 9.034613662928665e-06,
"loss": 0.4218160629272461,
"step": 370,
"token_acc": 0.8669598748703018
},
{
"epoch": 0.2315661182205972,
"grad_norm": 1.0651000148469036,
"learning_rate": 8.975515277079961e-06,
"loss": 0.4222999095916748,
"step": 380,
"token_acc": 0.8668562219942147
},
{
"epoch": 0.2376599634369287,
"grad_norm": 1.1478789808745513,
"learning_rate": 8.91486686623577e-06,
"loss": 0.41972966194152833,
"step": 390,
"token_acc": 0.8667315262188772
},
{
"epoch": 0.2437538086532602,
"grad_norm": 1.0060533858058822,
"learning_rate": 8.85269207677806e-06,
"loss": 0.4143358707427979,
"step": 400,
"token_acc": 0.8689943563130941
},
{
"epoch": 0.2498476538695917,
"grad_norm": 1.2219261737292129,
"learning_rate": 8.789015150212907e-06,
"loss": 0.41486186981201173,
"step": 410,
"token_acc": 0.867653374528066
},
{
"epoch": 0.25594149908592323,
"grad_norm": 1.2842286146778168,
"learning_rate": 8.72386091371891e-06,
"loss": 0.4264723777770996,
"step": 420,
"token_acc": 0.865345114787771
},
{
"epoch": 0.2620353443022547,
"grad_norm": 1.0550559155752623,
"learning_rate": 8.657254770467252e-06,
"loss": 0.40860881805419924,
"step": 430,
"token_acc": 0.8694270527928576
},
{
"epoch": 0.2681291895185862,
"grad_norm": 1.1246909396790437,
"learning_rate": 8.58922268971719e-06,
"loss": 0.4148720264434814,
"step": 440,
"token_acc": 0.86849521403236
},
{
"epoch": 0.2742230347349177,
"grad_norm": 1.1877909840033853,
"learning_rate": 8.51979119669081e-06,
"loss": 0.4155715465545654,
"step": 450,
"token_acc": 0.8686248236499153
},
{
"epoch": 0.28031687995124926,
"grad_norm": 1.03946599413896,
"learning_rate": 8.448987362231054e-06,
"loss": 0.4156056880950928,
"step": 460,
"token_acc": 0.8682606492506055
},
{
"epoch": 0.28641072516758076,
"grad_norm": 1.1045440790462375,
"learning_rate": 8.376838792246978e-06,
"loss": 0.41259098052978516,
"step": 470,
"token_acc": 0.868615067345492
},
{
"epoch": 0.29250457038391225,
"grad_norm": 1.1044055109636997,
"learning_rate": 8.303373616950408e-06,
"loss": 0.41626744270324706,
"step": 480,
"token_acc": 0.867445116993405
},
{
"epoch": 0.29859841560024375,
"grad_norm": 1.0612884186160958,
"learning_rate": 8.228620479888172e-06,
"loss": 0.4087618350982666,
"step": 490,
"token_acc": 0.869433255622514
},
{
"epoch": 0.30469226081657524,
"grad_norm": 1.079879116921211,
"learning_rate": 8.152608526774188e-06,
"loss": 0.40863656997680664,
"step": 500,
"token_acc": 0.8705444341829626
},
{
"epoch": 0.31078610603290674,
"grad_norm": 1.0470334273877924,
"learning_rate": 8.075367394125755e-06,
"loss": 0.41130657196044923,
"step": 510,
"token_acc": 0.8699947913802195
},
{
"epoch": 0.3168799512492383,
"grad_norm": 1.2778777056879977,
"learning_rate": 7.996927197708486e-06,
"loss": 0.4074504852294922,
"step": 520,
"token_acc": 0.8711178129454153
},
{
"epoch": 0.3229737964655698,
"grad_norm": 1.133795250933889,
"learning_rate": 7.917318520794395e-06,
"loss": 0.4040180206298828,
"step": 530,
"token_acc": 0.8719991647774729
},
{
"epoch": 0.3290676416819013,
"grad_norm": 1.1320221274981666,
"learning_rate": 7.836572402237683e-06,
"loss": 0.4074112892150879,
"step": 540,
"token_acc": 0.8696679374619692
},
{
"epoch": 0.3351614868982328,
"grad_norm": 1.0153565229717176,
"learning_rate": 7.754720324372924e-06,
"loss": 0.4030743598937988,
"step": 550,
"token_acc": 0.8720831783254012
},
{
"epoch": 0.34125533211456427,
"grad_norm": 1.0985579621580885,
"learning_rate": 7.67179420074032e-06,
"loss": 0.3988363742828369,
"step": 560,
"token_acc": 0.8726780258889484
},
{
"epoch": 0.3473491773308958,
"grad_norm": 1.0584699143582574,
"learning_rate": 7.587826363642845e-06,
"loss": 0.4028042793273926,
"step": 570,
"token_acc": 0.8709437860238254
},
{
"epoch": 0.3534430225472273,
"grad_norm": 1.1632651891282637,
"learning_rate": 7.502849551540106e-06,
"loss": 0.3974143028259277,
"step": 580,
"token_acc": 0.8732772418431721
},
{
"epoch": 0.3595368677635588,
"grad_norm": 0.9585380945132779,
"learning_rate": 7.4168968962838524e-06,
"loss": 0.40021185874938964,
"step": 590,
"token_acc": 0.8715715660830257
},
{
"epoch": 0.3656307129798903,
"grad_norm": 0.939779800665415,
"learning_rate": 7.330001910200111e-06,
"loss": 0.39843976497650146,
"step": 600,
"token_acc": 0.8733910783350537
},
{
"epoch": 0.3717245581962218,
"grad_norm": 0.9815164073943617,
"learning_rate": 7.242198473022958e-06,
"loss": 0.3972899913787842,
"step": 610,
"token_acc": 0.8731910420095998
},
{
"epoch": 0.3778184034125533,
"grad_norm": 1.0569386302509218,
"learning_rate": 7.15352081868506e-06,
"loss": 0.4026960372924805,
"step": 620,
"token_acc": 0.8716591305210795
},
{
"epoch": 0.38391224862888484,
"grad_norm": 1.0897077358900225,
"learning_rate": 7.0640035219701085e-06,
"loss": 0.39238433837890624,
"step": 630,
"token_acc": 0.8741110700683207
},
{
"epoch": 0.39000609384521634,
"grad_norm": 1.0094259905078886,
"learning_rate": 6.973681485032359e-06,
"loss": 0.3934662342071533,
"step": 640,
"token_acc": 0.874180305698641
},
{
"epoch": 0.39609993906154783,
"grad_norm": 0.9880095870102604,
"learning_rate": 6.8825899237885215e-06,
"loss": 0.3929059743881226,
"step": 650,
"token_acc": 0.873847849697677
},
{
"epoch": 0.40219378427787933,
"grad_norm": 0.9583618057687778,
"learning_rate": 6.7907643541873446e-06,
"loss": 0.38638834953308104,
"step": 660,
"token_acc": 0.8764517709444076
},
{
"epoch": 0.4082876294942108,
"grad_norm": 1.1091462631909463,
"learning_rate": 6.698240578362179e-06,
"loss": 0.3935162782669067,
"step": 670,
"token_acc": 0.8743182876186542
},
{
"epoch": 0.4143814747105424,
"grad_norm": 0.959273015275344,
"learning_rate": 6.6050546706719984e-06,
"loss": 0.38172011375427245,
"step": 680,
"token_acc": 0.8772576395099669
},
{
"epoch": 0.42047531992687387,
"grad_norm": 1.0010757728338364,
"learning_rate": 6.511242963636257e-06,
"loss": 0.3927836179733276,
"step": 690,
"token_acc": 0.8740263817041508
},
{
"epoch": 0.42656916514320536,
"grad_norm": 1.045230237684538,
"learning_rate": 6.416842033769106e-06,
"loss": 0.38949809074401853,
"step": 700,
"token_acc": 0.8748742675586352
},
{
"epoch": 0.43266301035953686,
"grad_norm": 0.9849032327305663,
"learning_rate": 6.321888687318457e-06,
"loss": 0.39299988746643066,
"step": 710,
"token_acc": 0.8744398373706392
},
{
"epoch": 0.43875685557586835,
"grad_norm": 0.9773426657855283,
"learning_rate": 6.2264199459155105e-06,
"loss": 0.38987624645233154,
"step": 720,
"token_acc": 0.8749521585172907
},
{
"epoch": 0.4448507007921999,
"grad_norm": 1.037517468712357,
"learning_rate": 6.130473032140272e-06,
"loss": 0.38550682067871095,
"step": 730,
"token_acc": 0.8752092114104209
},
{
"epoch": 0.4509445460085314,
"grad_norm": 1.0310013780608072,
"learning_rate": 6.0340853550087345e-06,
"loss": 0.378936243057251,
"step": 740,
"token_acc": 0.878043851367452
},
{
"epoch": 0.4570383912248629,
"grad_norm": 0.8055934899750623,
"learning_rate": 5.937294495387377e-06,
"loss": 0.38777313232421873,
"step": 750,
"token_acc": 0.8762303990063655
},
{
"epoch": 0.4631322364411944,
"grad_norm": 1.0076731680308868,
"learning_rate": 5.840138191340651e-06,
"loss": 0.3867051601409912,
"step": 760,
"token_acc": 0.875447200037364
},
{
"epoch": 0.4692260816575259,
"grad_norm": 0.9392775195574543,
"learning_rate": 5.7426543234171736e-06,
"loss": 0.3799318552017212,
"step": 770,
"token_acc": 0.8780739671196323
},
{
"epoch": 0.4753199268738574,
"grad_norm": 0.9059297874010275,
"learning_rate": 5.644880899880382e-06,
"loss": 0.38845138549804686,
"step": 780,
"token_acc": 0.8756513846485855
},
{
"epoch": 0.48141377209018893,
"grad_norm": 1.0364591251718924,
"learning_rate": 5.546856041889374e-06,
"loss": 0.384658670425415,
"step": 790,
"token_acc": 0.8760285406658391
},
{
"epoch": 0.4875076173065204,
"grad_norm": 0.9573686942596932,
"learning_rate": 5.448617968635741e-06,
"loss": 0.3791942596435547,
"step": 800,
"token_acc": 0.8779162415307187
},
{
"epoch": 0.4936014625228519,
"grad_norm": 0.9636242802763855,
"learning_rate": 5.35020498244219e-06,
"loss": 0.37176291942596434,
"step": 810,
"token_acc": 0.8793090876456928
},
{
"epoch": 0.4996953077391834,
"grad_norm": 1.037660587481492,
"learning_rate": 5.251655453828728e-06,
"loss": 0.37394251823425295,
"step": 820,
"token_acc": 0.8786210190654307
},
{
"epoch": 0.505789152955515,
"grad_norm": 1.0719330406024963,
"learning_rate": 5.153007806552275e-06,
"loss": 0.3745760679244995,
"step": 830,
"token_acc": 0.8784241641412887
},
{
"epoch": 0.5118829981718465,
"grad_norm": 0.8899515496236061,
"learning_rate": 5.054300502625517e-06,
"loss": 0.3706503868103027,
"step": 840,
"token_acc": 0.8798184912767585
},
{
"epoch": 0.517976843388178,
"grad_norm": 0.9136772226114551,
"learning_rate": 4.9555720273208475e-06,
"loss": 0.3767611742019653,
"step": 850,
"token_acc": 0.8780427238279765
},
{
"epoch": 0.5240706886045094,
"grad_norm": 0.9760538746168989,
"learning_rate": 4.856860874165218e-06,
"loss": 0.37979438304901125,
"step": 860,
"token_acc": 0.8784071947906439
},
{
"epoch": 0.5301645338208409,
"grad_norm": 0.9424993647974058,
"learning_rate": 4.758205529931808e-06,
"loss": 0.3839302062988281,
"step": 870,
"token_acc": 0.8770481761661205
},
{
"epoch": 0.5362583790371724,
"grad_norm": 1.0293112779306877,
"learning_rate": 4.659644459634293e-06,
"loss": 0.3767723321914673,
"step": 880,
"token_acc": 0.8782181679486365
},
{
"epoch": 0.5423522242535039,
"grad_norm": 1.0743397927299763,
"learning_rate": 4.56121609152961e-06,
"loss": 0.3791919946670532,
"step": 890,
"token_acc": 0.8769342677312787
},
{
"epoch": 0.5484460694698354,
"grad_norm": 0.8651643017417293,
"learning_rate": 4.462958802135069e-06,
"loss": 0.36331801414489745,
"step": 900,
"token_acc": 0.8819762679763837
},
{
"epoch": 0.5545399146861669,
"grad_norm": 0.9197439306994798,
"learning_rate": 4.364910901265607e-06,
"loss": 0.3720353603363037,
"step": 910,
"token_acc": 0.8795370329732339
},
{
"epoch": 0.5606337599024985,
"grad_norm": 0.9973864478854872,
"learning_rate": 4.2671106170970734e-06,
"loss": 0.37818198204040526,
"step": 920,
"token_acc": 0.8787091854009224
},
{
"epoch": 0.56672760511883,
"grad_norm": 0.9979320322546561,
"learning_rate": 4.169596081261332e-06,
"loss": 0.368232798576355,
"step": 930,
"token_acc": 0.8808049967885766
},
{
"epoch": 0.5728214503351615,
"grad_norm": 0.9817455772913783,
"learning_rate": 4.072405313979021e-06,
"loss": 0.37091827392578125,
"step": 940,
"token_acc": 0.8796466097957818
},
{
"epoch": 0.578915295551493,
"grad_norm": 1.0935297334377472,
"learning_rate": 3.975576209235726e-06,
"loss": 0.3674028396606445,
"step": 950,
"token_acc": 0.8807917695163083
},
{
"epoch": 0.5850091407678245,
"grad_norm": 0.9835469765967159,
"learning_rate": 3.879146520007399e-06,
"loss": 0.3728478908538818,
"step": 960,
"token_acc": 0.8795413152600885
},
{
"epoch": 0.591102985984156,
"grad_norm": 0.9625183356689964,
"learning_rate": 3.7831538435407344e-06,
"loss": 0.37494525909423826,
"step": 970,
"token_acc": 0.8792245580635571
},
{
"epoch": 0.5971968312004875,
"grad_norm": 0.9012795424730173,
"learning_rate": 3.687635606694271e-06,
"loss": 0.3702352046966553,
"step": 980,
"token_acc": 0.8801223453080008
},
{
"epoch": 0.603290676416819,
"grad_norm": 0.9782757486531443,
"learning_rate": 3.592629051345936e-06,
"loss": 0.3673159837722778,
"step": 990,
"token_acc": 0.8810825035648933
},
{
"epoch": 0.6093845216331505,
"grad_norm": 1.0059100640922563,
"learning_rate": 3.4981712198726956e-06,
"loss": 0.3642214059829712,
"step": 1000,
"token_acc": 0.8818312088488447
},
{
"epoch": 0.615478366849482,
"grad_norm": 0.9395189399708234,
"learning_rate": 3.4042989407079986e-06,
"loss": 0.3784639358520508,
"step": 1010,
"token_acc": 0.8780194366406157
},
{
"epoch": 0.6215722120658135,
"grad_norm": 1.0425592930772825,
"learning_rate": 3.311048813982627e-06,
"loss": 0.36695384979248047,
"step": 1020,
"token_acc": 0.8809777292779815
},
{
"epoch": 0.6276660572821451,
"grad_norm": 0.9146308056797927,
"learning_rate": 3.218457197254583e-06,
"loss": 0.36698212623596194,
"step": 1030,
"token_acc": 0.8810339710207495
},
{
"epoch": 0.6337599024984766,
"grad_norm": 0.976263078958663,
"learning_rate": 3.1265601913335196e-06,
"loss": 0.365465784072876,
"step": 1040,
"token_acc": 0.8814162812670944
},
{
"epoch": 0.6398537477148081,
"grad_norm": 1.0567379406046713,
"learning_rate": 3.035393626205306e-06,
"loss": 0.3610874891281128,
"step": 1050,
"token_acc": 0.8824792140002385
},
{
"epoch": 0.6459475929311396,
"grad_norm": 1.0205537815943757,
"learning_rate": 2.944993047062161e-06,
"loss": 0.35759830474853516,
"step": 1060,
"token_acc": 0.8834624031976018
},
{
"epoch": 0.6520414381474711,
"grad_norm": 1.0280714401242652,
"learning_rate": 2.8553937004438425e-06,
"loss": 0.3574142217636108,
"step": 1070,
"token_acc": 0.884169503378651
},
{
"epoch": 0.6581352833638026,
"grad_norm": 1.0187298702407688,
"learning_rate": 2.766630520495277e-06,
"loss": 0.36029987335205077,
"step": 1080,
"token_acc": 0.8823869756562952
},
{
"epoch": 0.664229128580134,
"grad_norm": 0.9191494153561297,
"learning_rate": 2.67873811534598e-06,
"loss": 0.35897092819213866,
"step": 1090,
"token_acc": 0.8827260508533868
},
{
"epoch": 0.6703229737964655,
"grad_norm": 0.9492740813391064,
"learning_rate": 2.591750753616596e-06,
"loss": 0.36168532371520995,
"step": 1100,
"token_acc": 0.8825941425209475
},
{
"epoch": 0.676416819012797,
"grad_norm": 0.9644543574186545,
"learning_rate": 2.505702351057804e-06,
"loss": 0.3665107488632202,
"step": 1110,
"token_acc": 0.8816928952036972
},
{
"epoch": 0.6825106642291285,
"grad_norm": 0.9521683470371731,
"learning_rate": 2.4206264573268174e-06,
"loss": 0.35790448188781737,
"step": 1120,
"token_acc": 0.8832886728694526
},
{
"epoch": 0.68860450944546,
"grad_norm": 1.0783164983743936,
"learning_rate": 2.336556242906608e-06,
"loss": 0.3561516284942627,
"step": 1130,
"token_acc": 0.8839432945670233
},
{
"epoch": 0.6946983546617916,
"grad_norm": 0.9994299291097577,
"learning_rate": 2.2535244861729707e-06,
"loss": 0.3557067632675171,
"step": 1140,
"token_acc": 0.8837923958883728
},
{
"epoch": 0.7007921998781231,
"grad_norm": 1.039214819811771,
"learning_rate": 2.1715635606144653e-06,
"loss": 0.3563429832458496,
"step": 1150,
"token_acc": 0.8836427544336156
},
{
"epoch": 0.7068860450944546,
"grad_norm": 0.8549094000634878,
"learning_rate": 2.0907054222102367e-06,
"loss": 0.35337374210357664,
"step": 1160,
"token_acc": 0.8852147256677358
},
{
"epoch": 0.7129798903107861,
"grad_norm": 0.894156191232295,
"learning_rate": 2.0109815969705922e-06,
"loss": 0.359290337562561,
"step": 1170,
"token_acc": 0.8828725266946272
},
{
"epoch": 0.7190737355271176,
"grad_norm": 0.8673526133846996,
"learning_rate": 1.9324231686452478e-06,
"loss": 0.35991313457489016,
"step": 1180,
"token_acc": 0.8837700799671174
},
{
"epoch": 0.7251675807434491,
"grad_norm": 0.9356232121590031,
"learning_rate": 1.8550607666039877e-06,
"loss": 0.3538203716278076,
"step": 1190,
"token_acc": 0.8850202284200351
},
{
"epoch": 0.7312614259597806,
"grad_norm": 1.0163312252270116,
"learning_rate": 1.7789245538944971e-06,
"loss": 0.3607466459274292,
"step": 1200,
"token_acc": 0.8824661130842316
},
{
"epoch": 0.7373552711761121,
"grad_norm": 0.8390316456040804,
"learning_rate": 1.7040442154820036e-06,
"loss": 0.35505869388580324,
"step": 1210,
"token_acc": 0.8845901901507859
},
{
"epoch": 0.7434491163924436,
"grad_norm": 0.921086850463397,
"learning_rate": 1.6304489466753237e-06,
"loss": 0.35682291984558107,
"step": 1220,
"token_acc": 0.884017590582417
},
{
"epoch": 0.7495429616087751,
"grad_norm": 0.8352814372993298,
"learning_rate": 1.5581674417438143e-06,
"loss": 0.3599454164505005,
"step": 1230,
"token_acc": 0.8830610223076613
},
{
"epoch": 0.7556368068251066,
"grad_norm": 0.9561368940432438,
"learning_rate": 1.4872278827296855e-06,
"loss": 0.3544511079788208,
"step": 1240,
"token_acc": 0.884971241183666
},
{
"epoch": 0.7617306520414382,
"grad_norm": 0.9963256225377098,
"learning_rate": 1.417657928460029e-06,
"loss": 0.35143122673034666,
"step": 1250,
"token_acc": 0.8854597977852672
},
{
"epoch": 0.7678244972577697,
"grad_norm": 1.0464860200353496,
"learning_rate": 1.349484703762834e-06,
"loss": 0.3545159101486206,
"step": 1260,
"token_acc": 0.8848001191868091
},
{
"epoch": 0.7739183424741012,
"grad_norm": 0.9553675018651967,
"learning_rate": 1.2827347888912057e-06,
"loss": 0.3540821552276611,
"step": 1270,
"token_acc": 0.8845431750704823
},
{
"epoch": 0.7800121876904327,
"grad_norm": 0.9171124221466627,
"learning_rate": 1.2174342091599277e-06,
"loss": 0.3459270477294922,
"step": 1280,
"token_acc": 0.8876378370255273
},
{
"epoch": 0.7861060329067642,
"grad_norm": 0.9897434740336704,
"learning_rate": 1.1536084247983626e-06,
"loss": 0.3577150821685791,
"step": 1290,
"token_acc": 0.8842498302783435
},
{
"epoch": 0.7921998781230957,
"grad_norm": 0.88979092902762,
"learning_rate": 1.0912823210237033e-06,
"loss": 0.350811505317688,
"step": 1300,
"token_acc": 0.8856008373344852
},
{
"epoch": 0.7982937233394272,
"grad_norm": 0.9287859784083828,
"learning_rate": 1.0304801983383989e-06,
"loss": 0.3551754951477051,
"step": 1310,
"token_acc": 0.8848410538592661
},
{
"epoch": 0.8043875685557587,
"grad_norm": 0.8802985747226686,
"learning_rate": 9.712257630555589e-07,
"loss": 0.35124433040618896,
"step": 1320,
"token_acc": 0.8857088187898194
},
{
"epoch": 0.8104814137720902,
"grad_norm": 0.9867993671885138,
"learning_rate": 9.135421180560394e-07,
"loss": 0.3533953666687012,
"step": 1330,
"token_acc": 0.8847630099080603
},
{
"epoch": 0.8165752589884216,
"grad_norm": 0.926446790364043,
"learning_rate": 8.574517537807897e-07,
"loss": 0.345960807800293,
"step": 1340,
"token_acc": 0.8876687663254338
},
{
"epoch": 0.8226691042047533,
"grad_norm": 0.9083144656784397,
"learning_rate": 8.029765394619899e-07,
"loss": 0.35233092308044434,
"step": 1350,
"token_acc": 0.8852270821778219
},
{
"epoch": 0.8287629494210847,
"grad_norm": 0.8865701179019319,
"learning_rate": 7.501377145963939e-07,
"loss": 0.35347394943237304,
"step": 1360,
"token_acc": 0.8848507491917527
},
{
"epoch": 0.8348567946374162,
"grad_norm": 0.8797844443235806,
"learning_rate": 6.98955880664205e-07,
"loss": 0.35233142375946047,
"step": 1370,
"token_acc": 0.8857494626572902
},
{
"epoch": 0.8409506398537477,
"grad_norm": 0.985930499180468,
"learning_rate": 6.494509930967019e-07,
"loss": 0.3484508991241455,
"step": 1380,
"token_acc": 0.8862226663569039
},
{
"epoch": 0.8470444850700792,
"grad_norm": 0.8385926015490823,
"learning_rate": 6.016423534957616e-07,
"loss": 0.34513344764709475,
"step": 1390,
"token_acc": 0.88766630420385
},
{
"epoch": 0.8531383302864107,
"grad_norm": 0.9469060182104153,
"learning_rate": 5.555486021082979e-07,
"loss": 0.3453853130340576,
"step": 1400,
"token_acc": 0.8872980190401473
},
{
"epoch": 0.8592321755027422,
"grad_norm": 1.0619116209691746,
"learning_rate": 5.111877105585672e-07,
"loss": 0.35715694427490235,
"step": 1410,
"token_acc": 0.8840584828365589
},
{
"epoch": 0.8653260207190737,
"grad_norm": 0.9990471419637711,
"learning_rate": 4.6857697484116006e-07,
"loss": 0.34844698905944826,
"step": 1420,
"token_acc": 0.8861595746957418
},
{
"epoch": 0.8714198659354052,
"grad_norm": 0.8653174829680845,
"learning_rate": 4.277330085774156e-07,
"loss": 0.34473817348480223,
"step": 1430,
"token_acc": 0.8869124712097335
},
{
"epoch": 0.8775137111517367,
"grad_norm": 0.978323586867761,
"learning_rate": 3.886717365378867e-07,
"loss": 0.3523882865905762,
"step": 1440,
"token_acc": 0.8849034480348013
},
{
"epoch": 0.8836075563680682,
"grad_norm": 1.0140389777919647,
"learning_rate": 3.5140838843339073e-07,
"loss": 0.3476292848587036,
"step": 1450,
"token_acc": 0.8866329934005767
},
{
"epoch": 0.8897014015843998,
"grad_norm": 1.0064657138214737,
"learning_rate": 3.159574929770515e-07,
"loss": 0.35365211963653564,
"step": 1460,
"token_acc": 0.8852465385385505
},
{
"epoch": 0.8957952468007313,
"grad_norm": 0.9324871915195588,
"learning_rate": 2.8233287221965555e-07,
"loss": 0.3441819190979004,
"step": 1470,
"token_acc": 0.8871095878318941
},
{
"epoch": 0.9018890920170628,
"grad_norm": 0.9055988245681192,
"learning_rate": 2.5054763616053967e-07,
"loss": 0.34738845825195314,
"step": 1480,
"token_acc": 0.8870410481583068
},
{
"epoch": 0.9079829372333943,
"grad_norm": 0.8845337059700371,
"learning_rate": 2.2061417763608818e-07,
"loss": 0.3496507167816162,
"step": 1490,
"token_acc": 0.8858089991712572
},
{
"epoch": 0.9140767824497258,
"grad_norm": 0.8884170981985747,
"learning_rate": 1.9254416748786086e-07,
"loss": 0.34417023658752444,
"step": 1500,
"token_acc": 0.8876897324425693
},
{
"epoch": 0.9201706276660573,
"grad_norm": 0.991921395955364,
"learning_rate": 1.6634855001221195e-07,
"loss": 0.3475677490234375,
"step": 1510,
"token_acc": 0.8866243585461391
},
{
"epoch": 0.9262644728823888,
"grad_norm": 0.8822494961130495,
"learning_rate": 1.4203753869318882e-07,
"loss": 0.35834810733795164,
"step": 1520,
"token_acc": 0.8836910930175179
},
{
"epoch": 0.9323583180987203,
"grad_norm": 1.0007870631310825,
"learning_rate": 1.196206122203647e-07,
"loss": 0.3498887777328491,
"step": 1530,
"token_acc": 0.8859136668935295
},
{
"epoch": 0.9384521633150518,
"grad_norm": 0.9115641715955437,
"learning_rate": 9.910651079316824e-08,
"loss": 0.3380606651306152,
"step": 1540,
"token_acc": 0.8888504997761748
},
{
"epoch": 0.9445460085313833,
"grad_norm": 0.9336474945041258,
"learning_rate": 8.050323271314331e-08,
"loss": 0.34683611392974856,
"step": 1550,
"token_acc": 0.8867626671565236
},
{
"epoch": 0.9506398537477148,
"grad_norm": 0.9153041920993996,
"learning_rate": 6.381803126546405e-08,
"loss": 0.3438985824584961,
"step": 1560,
"token_acc": 0.8876278171714178
},
{
"epoch": 0.9567336989640464,
"grad_norm": 0.8723491469657118,
"learning_rate": 4.9057411890933714e-08,
"loss": 0.35089046955108644,
"step": 1570,
"token_acc": 0.8854520115332541
},
{
"epoch": 0.9628275441803779,
"grad_norm": 0.8955372459045878,
"learning_rate": 3.622712964956032e-08,
"loss": 0.34657576084136965,
"step": 1580,
"token_acc": 0.8870879211520062
},
{
"epoch": 0.9689213893967094,
"grad_norm": 0.9416702515233623,
"learning_rate": 2.5332186976697037e-08,
"loss": 0.35133283138275145,
"step": 1590,
"token_acc": 0.8860156117328086
},
{
"epoch": 0.9750152346130408,
"grad_norm": 0.8711895076149625,
"learning_rate": 1.637683173263238e-08,
"loss": 0.35227146148681643,
"step": 1600,
"token_acc": 0.8855705009128142
},
{
"epoch": 0.9811090798293723,
"grad_norm": 0.983729311544991,
"learning_rate": 9.364555546375054e-09,
"loss": 0.34629082679748535,
"step": 1610,
"token_acc": 0.8869587094319709
},
{
"epoch": 0.9872029250457038,
"grad_norm": 0.94198406227018,
"learning_rate": 4.2980924542984634e-09,
"loss": 0.3403524875640869,
"step": 1620,
"token_acc": 0.8887918722020187
},
{
"epoch": 0.9932967702620353,
"grad_norm": 0.8500481071531769,
"learning_rate": 1.179417834153429e-09,
"loss": 0.3546321868896484,
"step": 1630,
"token_acc": 0.8848728077900511
},
{
"epoch": 0.9993906154783668,
"grad_norm": 0.9946340081463461,
"learning_rate": 9.74763488759134e-12,
"loss": 0.35070624351501467,
"step": 1640,
"token_acc": 0.8863686895606487
}
],
"logging_steps": 10,
"max_steps": 1641,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2214001985716224.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}