AnyAudio-Judge-30B / trainer_state.json
cucl2's picture
Add files using upload-large-folder tool
85b84c2 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 200.0,
"global_step": 1641,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0006093845216331506,
"grad_norm": 7.29836574807662,
"learning_rate": 2.0000000000000002e-07,
"loss": 0.6879574656486511,
"step": 1,
"token_acc": 0.8069400259219983
},
{
"epoch": 0.006093845216331505,
"grad_norm": 3.9069477397555814,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.6863329675462511,
"step": 10,
"token_acc": 0.8062499341798103
},
{
"epoch": 0.01218769043266301,
"grad_norm": 1.6499482285217686,
"learning_rate": 4.000000000000001e-06,
"loss": 0.5632639408111573,
"step": 20,
"token_acc": 0.832315593221387
},
{
"epoch": 0.018281535648994516,
"grad_norm": 1.3391083762662725,
"learning_rate": 6e-06,
"loss": 0.4509421348571777,
"step": 30,
"token_acc": 0.859350495238436
},
{
"epoch": 0.02437538086532602,
"grad_norm": 1.0720631754188148,
"learning_rate": 8.000000000000001e-06,
"loss": 0.4215505599975586,
"step": 40,
"token_acc": 0.8660933700604836
},
{
"epoch": 0.030469226081657527,
"grad_norm": 1.300502159057601,
"learning_rate": 1e-05,
"loss": 0.3916645526885986,
"step": 50,
"token_acc": 0.8749446762027628
},
{
"epoch": 0.03656307129798903,
"grad_norm": 1.2222066565570464,
"learning_rate": 9.999025267866269e-06,
"loss": 0.37738680839538574,
"step": 60,
"token_acc": 0.8773714810281518
},
{
"epoch": 0.042656916514320534,
"grad_norm": 1.1003373829333203,
"learning_rate": 9.996101451506166e-06,
"loss": 0.36339468955993653,
"step": 70,
"token_acc": 0.8811438359423324
},
{
"epoch": 0.04875076173065204,
"grad_norm": 0.9139572833064542,
"learning_rate": 9.991229690894796e-06,
"loss": 0.35523133277893065,
"step": 80,
"token_acc": 0.8833139693331612
},
{
"epoch": 0.054844606946983544,
"grad_norm": 1.0649357265795398,
"learning_rate": 9.984411885496807e-06,
"loss": 0.36147160530090333,
"step": 90,
"token_acc": 0.8827829089555647
},
{
"epoch": 0.06093845216331505,
"grad_norm": 1.0474850371515747,
"learning_rate": 9.975650693525798e-06,
"loss": 0.35729637145996096,
"step": 100,
"token_acc": 0.8842431348706591
},
{
"epoch": 0.06703229737964655,
"grad_norm": 1.0275221386997506,
"learning_rate": 9.964949530907907e-06,
"loss": 0.3474123477935791,
"step": 110,
"token_acc": 0.8856792866706549
},
{
"epoch": 0.07312614259597806,
"grad_norm": 1.2902357608957626,
"learning_rate": 9.952312569949963e-06,
"loss": 0.3479644775390625,
"step": 120,
"token_acc": 0.8849104859335039
},
{
"epoch": 0.07921998781230957,
"grad_norm": 1.3451160019315398,
"learning_rate": 9.937744737712734e-06,
"loss": 0.3466474533081055,
"step": 130,
"token_acc": 0.8861058585962355
},
{
"epoch": 0.08531383302864107,
"grad_norm": 1.0790851469433436,
"learning_rate": 9.921251714089898e-06,
"loss": 0.34614810943603513,
"step": 140,
"token_acc": 0.8854811515034624
},
{
"epoch": 0.09140767824497258,
"grad_norm": 1.0038008030386316,
"learning_rate": 9.9028399295935e-06,
"loss": 0.3384540557861328,
"step": 150,
"token_acc": 0.8879619162858826
},
{
"epoch": 0.09750152346130408,
"grad_norm": 1.027349218243697,
"learning_rate": 9.882516562846735e-06,
"loss": 0.33826944828033445,
"step": 160,
"token_acc": 0.8878969612617404
},
{
"epoch": 0.1035953686776356,
"grad_norm": 1.0944757240532788,
"learning_rate": 9.860289537785058e-06,
"loss": 0.3368839740753174,
"step": 170,
"token_acc": 0.8883107398785887
},
{
"epoch": 0.10968921389396709,
"grad_norm": 0.9672890666466603,
"learning_rate": 9.83616752056669e-06,
"loss": 0.3455761194229126,
"step": 180,
"token_acc": 0.8851188684923262
},
{
"epoch": 0.1157830591102986,
"grad_norm": 0.8886431201198384,
"learning_rate": 9.810159916193763e-06,
"loss": 0.32952630519866943,
"step": 190,
"token_acc": 0.8905697047489018
},
{
"epoch": 0.1218769043266301,
"grad_norm": 0.9444272816074001,
"learning_rate": 9.782276864845351e-06,
"loss": 0.33125505447387693,
"step": 200,
"token_acc": 0.8897695109589824
},
{
"epoch": 0.12797074954296161,
"grad_norm": 1.0093535144294836,
"learning_rate": 9.752529237923914e-06,
"loss": 0.3311288833618164,
"step": 210,
"token_acc": 0.8905364268561583
},
{
"epoch": 0.1340645947592931,
"grad_norm": 1.1532189931201864,
"learning_rate": 9.720928633816596e-06,
"loss": 0.3244771003723145,
"step": 220,
"token_acc": 0.8915392526998382
},
{
"epoch": 0.14015843997562463,
"grad_norm": 0.9598378464215558,
"learning_rate": 9.687487373373103e-06,
"loss": 0.3279410362243652,
"step": 230,
"token_acc": 0.8906910502215741
},
{
"epoch": 0.14625228519195613,
"grad_norm": 0.9540187948014937,
"learning_rate": 9.652218495101894e-06,
"loss": 0.3265339136123657,
"step": 240,
"token_acc": 0.8910028614336833
},
{
"epoch": 0.15234613040828762,
"grad_norm": 1.0470189428654273,
"learning_rate": 9.61513575008656e-06,
"loss": 0.33319640159606934,
"step": 250,
"token_acc": 0.8888186484938951
},
{
"epoch": 0.15843997562461914,
"grad_norm": 0.9615038649371879,
"learning_rate": 9.576253596624367e-06,
"loss": 0.32928056716918946,
"step": 260,
"token_acc": 0.8897860391237342
},
{
"epoch": 0.16453382084095064,
"grad_norm": 1.09800599181465,
"learning_rate": 9.53558719458908e-06,
"loss": 0.32557024955749514,
"step": 270,
"token_acc": 0.8914715787293208
},
{
"epoch": 0.17062766605728213,
"grad_norm": 1.0743262974854428,
"learning_rate": 9.49315239952023e-06,
"loss": 0.32053494453430176,
"step": 280,
"token_acc": 0.8929576222604401
},
{
"epoch": 0.17672151127361366,
"grad_norm": 1.060412117175443,
"learning_rate": 9.448965756441154e-06,
"loss": 0.3243874073028564,
"step": 290,
"token_acc": 0.8921667614562232
},
{
"epoch": 0.18281535648994515,
"grad_norm": 0.9594753815838422,
"learning_rate": 9.403044493408205e-06,
"loss": 0.3233642578125,
"step": 300,
"token_acc": 0.891047436596846
},
{
"epoch": 0.18890920170627665,
"grad_norm": 0.9403281436285019,
"learning_rate": 9.355406514793667e-06,
"loss": 0.31829214096069336,
"step": 310,
"token_acc": 0.8938932609968795
},
{
"epoch": 0.19500304692260817,
"grad_norm": 0.9536634938537397,
"learning_rate": 9.306070394304955e-06,
"loss": 0.3202193260192871,
"step": 320,
"token_acc": 0.8931520198180799
},
{
"epoch": 0.20109689213893966,
"grad_norm": 1.119379822493263,
"learning_rate": 9.255055367742868e-06,
"loss": 0.3239091396331787,
"step": 330,
"token_acc": 0.8923521607278241
},
{
"epoch": 0.2071907373552712,
"grad_norm": 1.0373576096304553,
"learning_rate": 9.202381325501683e-06,
"loss": 0.31700589656829836,
"step": 340,
"token_acc": 0.8944783352337514
},
{
"epoch": 0.21328458257160268,
"grad_norm": 1.0632563437214946,
"learning_rate": 9.148068804814032e-06,
"loss": 0.31794281005859376,
"step": 350,
"token_acc": 0.8930956012903548
},
{
"epoch": 0.21937842778793418,
"grad_norm": 1.0242050960110551,
"learning_rate": 9.092138981743588e-06,
"loss": 0.3202871799468994,
"step": 360,
"token_acc": 0.8935469022061816
},
{
"epoch": 0.2254722730042657,
"grad_norm": 0.8239921572139911,
"learning_rate": 9.034613662928665e-06,
"loss": 0.3142183542251587,
"step": 370,
"token_acc": 0.8951745718050066
},
{
"epoch": 0.2315661182205972,
"grad_norm": 0.9147511550012487,
"learning_rate": 8.975515277079961e-06,
"loss": 0.3087962865829468,
"step": 380,
"token_acc": 0.8958298740422705
},
{
"epoch": 0.2376599634369287,
"grad_norm": 0.8794833827260621,
"learning_rate": 8.91486686623577e-06,
"loss": 0.3132402658462524,
"step": 390,
"token_acc": 0.8948639533970186
},
{
"epoch": 0.2437538086532602,
"grad_norm": 1.0069623307664877,
"learning_rate": 8.85269207677806e-06,
"loss": 0.31006736755371095,
"step": 400,
"token_acc": 0.8951928192311975
},
{
"epoch": 0.2498476538695917,
"grad_norm": 0.9808015041824597,
"learning_rate": 8.789015150212907e-06,
"loss": 0.30683579444885256,
"step": 410,
"token_acc": 0.8967586393232839
},
{
"epoch": 0.25594149908592323,
"grad_norm": 0.9081237770188716,
"learning_rate": 8.72386091371891e-06,
"loss": 0.3061988830566406,
"step": 420,
"token_acc": 0.8959391589507399
},
{
"epoch": 0.2620353443022547,
"grad_norm": 1.04219527083527,
"learning_rate": 8.657254770467252e-06,
"loss": 0.3091754674911499,
"step": 430,
"token_acc": 0.8954508616603208
},
{
"epoch": 0.2681291895185862,
"grad_norm": 1.0065133793639498,
"learning_rate": 8.58922268971719e-06,
"loss": 0.30993127822875977,
"step": 440,
"token_acc": 0.895664191270881
},
{
"epoch": 0.2742230347349177,
"grad_norm": 0.9080797671925362,
"learning_rate": 8.51979119669081e-06,
"loss": 0.31555490493774413,
"step": 450,
"token_acc": 0.8941405988077487
},
{
"epoch": 0.28031687995124926,
"grad_norm": 0.9841139463866474,
"learning_rate": 8.448987362231054e-06,
"loss": 0.30534186363220217,
"step": 460,
"token_acc": 0.8968707588256722
},
{
"epoch": 0.28641072516758076,
"grad_norm": 0.9677823622528902,
"learning_rate": 8.376838792246978e-06,
"loss": 0.3050978422164917,
"step": 470,
"token_acc": 0.8967596979985816
},
{
"epoch": 0.29250457038391225,
"grad_norm": 0.8117589456035273,
"learning_rate": 8.303373616950408e-06,
"loss": 0.3012993335723877,
"step": 480,
"token_acc": 0.898916481794861
},
{
"epoch": 0.29859841560024375,
"grad_norm": 0.8967761049487325,
"learning_rate": 8.228620479888172e-06,
"loss": 0.2984607219696045,
"step": 490,
"token_acc": 0.8986162002706045
},
{
"epoch": 0.30469226081657524,
"grad_norm": 0.7934114582439064,
"learning_rate": 8.152608526774188e-06,
"loss": 0.3049586057662964,
"step": 500,
"token_acc": 0.8968112886022876
},
{
"epoch": 0.31078610603290674,
"grad_norm": 0.825580955342704,
"learning_rate": 8.075367394125755e-06,
"loss": 0.30215206146240237,
"step": 510,
"token_acc": 0.8978885397098497
},
{
"epoch": 0.3168799512492383,
"grad_norm": 0.8296290441677941,
"learning_rate": 7.996927197708486e-06,
"loss": 0.3088541507720947,
"step": 520,
"token_acc": 0.8963321107035679
},
{
"epoch": 0.3229737964655698,
"grad_norm": 0.8755135202445912,
"learning_rate": 7.917318520794395e-06,
"loss": 0.30083427429199217,
"step": 530,
"token_acc": 0.899119480167394
},
{
"epoch": 0.3290676416819013,
"grad_norm": 0.9101072984644949,
"learning_rate": 7.836572402237683e-06,
"loss": 0.3058091878890991,
"step": 540,
"token_acc": 0.896643718272106
},
{
"epoch": 0.3351614868982328,
"grad_norm": 0.9771967807763615,
"learning_rate": 7.754720324372924e-06,
"loss": 0.30214991569519045,
"step": 550,
"token_acc": 0.8980588639486945
},
{
"epoch": 0.34125533211456427,
"grad_norm": 1.0026225580388461,
"learning_rate": 7.67179420074032e-06,
"loss": 0.3041478395462036,
"step": 560,
"token_acc": 0.8965942594865093
},
{
"epoch": 0.3473491773308958,
"grad_norm": 0.9388665918318329,
"learning_rate": 7.587826363642845e-06,
"loss": 0.30187268257141114,
"step": 570,
"token_acc": 0.8980740928392202
},
{
"epoch": 0.3534430225472273,
"grad_norm": 0.9610197211126468,
"learning_rate": 7.502849551540106e-06,
"loss": 0.2962314605712891,
"step": 580,
"token_acc": 0.8994921135841125
},
{
"epoch": 0.3595368677635588,
"grad_norm": 0.832216076371822,
"learning_rate": 7.4168968962838524e-06,
"loss": 0.2948365926742554,
"step": 590,
"token_acc": 0.8995369426034115
},
{
"epoch": 0.3656307129798903,
"grad_norm": 0.9377431212404606,
"learning_rate": 7.330001910200111e-06,
"loss": 0.29007649421691895,
"step": 600,
"token_acc": 0.9010131261293394
},
{
"epoch": 0.3717245581962218,
"grad_norm": 0.8726611852126548,
"learning_rate": 7.242198473022958e-06,
"loss": 0.2962885856628418,
"step": 610,
"token_acc": 0.9000062303355035
},
{
"epoch": 0.3778184034125533,
"grad_norm": 0.9153282793617801,
"learning_rate": 7.15352081868506e-06,
"loss": 0.30144367218017576,
"step": 620,
"token_acc": 0.8989331770222744
},
{
"epoch": 0.38391224862888484,
"grad_norm": 0.993391313101372,
"learning_rate": 7.0640035219701085e-06,
"loss": 0.301465106010437,
"step": 630,
"token_acc": 0.8974685325619576
},
{
"epoch": 0.39000609384521634,
"grad_norm": 1.0046408788594328,
"learning_rate": 6.973681485032359e-06,
"loss": 0.2955395460128784,
"step": 640,
"token_acc": 0.8996091046695718
},
{
"epoch": 0.39609993906154783,
"grad_norm": 0.822820271911727,
"learning_rate": 6.8825899237885215e-06,
"loss": 0.2931050300598145,
"step": 650,
"token_acc": 0.901203589259751
},
{
"epoch": 0.40219378427787933,
"grad_norm": 0.8482496681393756,
"learning_rate": 6.7907643541873446e-06,
"loss": 0.29596996307373047,
"step": 660,
"token_acc": 0.8996866207121305
},
{
"epoch": 0.4082876294942108,
"grad_norm": 0.8775663994372018,
"learning_rate": 6.698240578362179e-06,
"loss": 0.29141840934753416,
"step": 670,
"token_acc": 0.9003262426482238
},
{
"epoch": 0.4143814747105424,
"grad_norm": 0.984669646190565,
"learning_rate": 6.6050546706719984e-06,
"loss": 0.29290521144866943,
"step": 680,
"token_acc": 0.9014104043327218
},
{
"epoch": 0.42047531992687387,
"grad_norm": 0.8784418931211103,
"learning_rate": 6.511242963636257e-06,
"loss": 0.29056534767150877,
"step": 690,
"token_acc": 0.9016642094853267
},
{
"epoch": 0.42656916514320536,
"grad_norm": 1.0470361792821843,
"learning_rate": 6.416842033769106e-06,
"loss": 0.2978256940841675,
"step": 700,
"token_acc": 0.8997917186822428
},
{
"epoch": 0.43266301035953686,
"grad_norm": 0.9613791001197699,
"learning_rate": 6.321888687318457e-06,
"loss": 0.2870903253555298,
"step": 710,
"token_acc": 0.903113691147251
},
{
"epoch": 0.43875685557586835,
"grad_norm": 0.8405716630112535,
"learning_rate": 6.2264199459155105e-06,
"loss": 0.29581589698791505,
"step": 720,
"token_acc": 0.9003898532372131
},
{
"epoch": 0.4448507007921999,
"grad_norm": 0.9817927857442479,
"learning_rate": 6.130473032140272e-06,
"loss": 0.29129691123962403,
"step": 730,
"token_acc": 0.9009383225625913
},
{
"epoch": 0.4509445460085314,
"grad_norm": 0.9100915684781385,
"learning_rate": 6.0340853550087345e-06,
"loss": 0.29650187492370605,
"step": 740,
"token_acc": 0.9002656385758284
},
{
"epoch": 0.4570383912248629,
"grad_norm": 0.9238619342391209,
"learning_rate": 5.937294495387377e-06,
"loss": 0.2921621561050415,
"step": 750,
"token_acc": 0.9008455874319925
},
{
"epoch": 0.4631322364411944,
"grad_norm": 0.8289061064281614,
"learning_rate": 5.840138191340651e-06,
"loss": 0.28725643157958985,
"step": 760,
"token_acc": 0.9028466795835374
},
{
"epoch": 0.4692260816575259,
"grad_norm": 0.8901360785145829,
"learning_rate": 5.7426543234171736e-06,
"loss": 0.2865636348724365,
"step": 770,
"token_acc": 0.90197109501604
},
{
"epoch": 0.4753199268738574,
"grad_norm": 0.8709058451908881,
"learning_rate": 5.644880899880382e-06,
"loss": 0.2886040687561035,
"step": 780,
"token_acc": 0.9023270689287564
},
{
"epoch": 0.48141377209018893,
"grad_norm": 0.9306196525173549,
"learning_rate": 5.546856041889374e-06,
"loss": 0.28833470344543455,
"step": 790,
"token_acc": 0.9016039529639475
},
{
"epoch": 0.4875076173065204,
"grad_norm": 0.9401250944884257,
"learning_rate": 5.448617968635741e-06,
"loss": 0.28241567611694335,
"step": 800,
"token_acc": 0.9046351860634857
},
{
"epoch": 0.4936014625228519,
"grad_norm": 0.849983180158667,
"learning_rate": 5.35020498244219e-06,
"loss": 0.2863471508026123,
"step": 810,
"token_acc": 0.9020820443108771
},
{
"epoch": 0.4996953077391834,
"grad_norm": 0.7275676892245573,
"learning_rate": 5.251655453828728e-06,
"loss": 0.28403263092041015,
"step": 820,
"token_acc": 0.9032200331101135
},
{
"epoch": 0.505789152955515,
"grad_norm": 0.8630110541652776,
"learning_rate": 5.153007806552275e-06,
"loss": 0.28420357704162597,
"step": 830,
"token_acc": 0.9033704118180856
},
{
"epoch": 0.5118829981718465,
"grad_norm": 0.8835421688612489,
"learning_rate": 5.054300502625517e-06,
"loss": 0.2866727352142334,
"step": 840,
"token_acc": 0.9032091030720939
},
{
"epoch": 0.517976843388178,
"grad_norm": 0.8544875287993453,
"learning_rate": 4.9555720273208475e-06,
"loss": 0.289061975479126,
"step": 850,
"token_acc": 0.9017317721145331
},
{
"epoch": 0.5240706886045094,
"grad_norm": 0.8549205024097043,
"learning_rate": 4.856860874165218e-06,
"loss": 0.2889714241027832,
"step": 860,
"token_acc": 0.9025821278082484
},
{
"epoch": 0.5301645338208409,
"grad_norm": 0.9236105201664164,
"learning_rate": 4.758205529931808e-06,
"loss": 0.2887147903442383,
"step": 870,
"token_acc": 0.9019780647042623
},
{
"epoch": 0.5362583790371724,
"grad_norm": 0.8682794949168545,
"learning_rate": 4.659644459634293e-06,
"loss": 0.27901973724365237,
"step": 880,
"token_acc": 0.9043348147353298
},
{
"epoch": 0.5423522242535039,
"grad_norm": 0.8729641279912889,
"learning_rate": 4.56121609152961e-06,
"loss": 0.2851783275604248,
"step": 890,
"token_acc": 0.9031912203833561
},
{
"epoch": 0.5484460694698354,
"grad_norm": 0.8418875200344721,
"learning_rate": 4.462958802135069e-06,
"loss": 0.27748913764953614,
"step": 900,
"token_acc": 0.9059390881360567
},
{
"epoch": 0.5545399146861669,
"grad_norm": 0.8894129853584928,
"learning_rate": 4.364910901265607e-06,
"loss": 0.28034243583679197,
"step": 910,
"token_acc": 0.9040050510001095
},
{
"epoch": 0.5606337599024985,
"grad_norm": 0.8334588350840866,
"learning_rate": 4.2671106170970734e-06,
"loss": 0.2801810264587402,
"step": 920,
"token_acc": 0.9042555097117814
},
{
"epoch": 0.56672760511883,
"grad_norm": 0.8763484647820953,
"learning_rate": 4.169596081261332e-06,
"loss": 0.2837662696838379,
"step": 930,
"token_acc": 0.9037383810780553
},
{
"epoch": 0.5728214503351615,
"grad_norm": 0.8713237221620964,
"learning_rate": 4.072405313979021e-06,
"loss": 0.27712116241455076,
"step": 940,
"token_acc": 0.9053036654966837
},
{
"epoch": 0.578915295551493,
"grad_norm": 0.8844118885887313,
"learning_rate": 3.975576209235726e-06,
"loss": 0.2806640625,
"step": 950,
"token_acc": 0.9047340125759082
},
{
"epoch": 0.5850091407678245,
"grad_norm": 0.8719900072150049,
"learning_rate": 3.879146520007399e-06,
"loss": 0.27962145805358884,
"step": 960,
"token_acc": 0.9052189543003484
},
{
"epoch": 0.591102985984156,
"grad_norm": 0.8621214557871747,
"learning_rate": 3.7831538435407344e-06,
"loss": 0.281157398223877,
"step": 970,
"token_acc": 0.9040866660422715
},
{
"epoch": 0.5971968312004875,
"grad_norm": 0.85966956497571,
"learning_rate": 3.687635606694271e-06,
"loss": 0.2849492073059082,
"step": 980,
"token_acc": 0.9041384613065175
},
{
"epoch": 0.603290676416819,
"grad_norm": 0.8505152160082087,
"learning_rate": 3.592629051345936e-06,
"loss": 0.2792569637298584,
"step": 990,
"token_acc": 0.9054755884673447
},
{
"epoch": 0.6093845216331505,
"grad_norm": 0.9214402604733031,
"learning_rate": 3.4981712198726956e-06,
"loss": 0.2757925033569336,
"step": 1000,
"token_acc": 0.9061934946027913
},
{
"epoch": 0.615478366849482,
"grad_norm": 0.8580050185956459,
"learning_rate": 3.4042989407079986e-06,
"loss": 0.2790709972381592,
"step": 1010,
"token_acc": 0.9051715866568587
},
{
"epoch": 0.6215722120658135,
"grad_norm": 0.7762593811197912,
"learning_rate": 3.311048813982627e-06,
"loss": 0.2719182014465332,
"step": 1020,
"token_acc": 0.9072872717021148
},
{
"epoch": 0.6276660572821451,
"grad_norm": 0.8305900083620258,
"learning_rate": 3.218457197254583e-06,
"loss": 0.27586350440979,
"step": 1030,
"token_acc": 0.9060086339753238
},
{
"epoch": 0.6337599024984766,
"grad_norm": 0.8955059982745348,
"learning_rate": 3.1265601913335196e-06,
"loss": 0.2731196403503418,
"step": 1040,
"token_acc": 0.9076037121001682
},
{
"epoch": 0.6398537477148081,
"grad_norm": 0.8712242634564721,
"learning_rate": 3.035393626205306e-06,
"loss": 0.2795309066772461,
"step": 1050,
"token_acc": 0.9047484454494065
},
{
"epoch": 0.6459475929311396,
"grad_norm": 0.8162886626845998,
"learning_rate": 2.944993047062161e-06,
"loss": 0.26994550228118896,
"step": 1060,
"token_acc": 0.9082915598041501
},
{
"epoch": 0.6520414381474711,
"grad_norm": 0.8874044395879559,
"learning_rate": 2.8553937004438425e-06,
"loss": 0.2744093418121338,
"step": 1070,
"token_acc": 0.9072907727436752
},
{
"epoch": 0.6581352833638026,
"grad_norm": 0.8288310546310844,
"learning_rate": 2.766630520495277e-06,
"loss": 0.2674886226654053,
"step": 1080,
"token_acc": 0.9087633615660454
},
{
"epoch": 0.664229128580134,
"grad_norm": 0.8828846811452266,
"learning_rate": 2.67873811534598e-06,
"loss": 0.2735260486602783,
"step": 1090,
"token_acc": 0.9060899523658108
},
{
"epoch": 0.6703229737964655,
"grad_norm": 0.8055682508984224,
"learning_rate": 2.591750753616596e-06,
"loss": 0.2687216758728027,
"step": 1100,
"token_acc": 0.9077474362897096
},
{
"epoch": 0.676416819012797,
"grad_norm": 0.8527567804445506,
"learning_rate": 2.505702351057804e-06,
"loss": 0.27487955093383787,
"step": 1110,
"token_acc": 0.9064443638076686
},
{
"epoch": 0.6825106642291285,
"grad_norm": 0.8043496565707575,
"learning_rate": 2.4206264573268174e-06,
"loss": 0.2709942102432251,
"step": 1120,
"token_acc": 0.9082038753361505
},
{
"epoch": 0.68860450944546,
"grad_norm": 0.8177848047582682,
"learning_rate": 2.336556242906608e-06,
"loss": 0.26909465789794923,
"step": 1130,
"token_acc": 0.907756650686803
},
{
"epoch": 0.6946983546617916,
"grad_norm": 0.8281752422683824,
"learning_rate": 2.2535244861729707e-06,
"loss": 0.27281508445739744,
"step": 1140,
"token_acc": 0.9068872307019957
},
{
"epoch": 0.7007921998781231,
"grad_norm": 0.7368812719716331,
"learning_rate": 2.1715635606144653e-06,
"loss": 0.2704050064086914,
"step": 1150,
"token_acc": 0.9086829548350435
},
{
"epoch": 0.7068860450944546,
"grad_norm": 0.8983810091681733,
"learning_rate": 2.0907054222102367e-06,
"loss": 0.2690997362136841,
"step": 1160,
"token_acc": 0.9079458353782861
},
{
"epoch": 0.7129798903107861,
"grad_norm": 0.976946993038541,
"learning_rate": 2.0109815969705922e-06,
"loss": 0.2747433423995972,
"step": 1170,
"token_acc": 0.9060301301519122
},
{
"epoch": 0.7190737355271176,
"grad_norm": 0.8007237087596002,
"learning_rate": 1.9324231686452478e-06,
"loss": 0.2671233654022217,
"step": 1180,
"token_acc": 0.9086050565301521
},
{
"epoch": 0.7251675807434491,
"grad_norm": 0.8064570085543009,
"learning_rate": 1.8550607666039877e-06,
"loss": 0.27011594772338865,
"step": 1190,
"token_acc": 0.9079702457204528
},
{
"epoch": 0.7312614259597806,
"grad_norm": 0.8831329237202693,
"learning_rate": 1.7789245538944971e-06,
"loss": 0.2661958456039429,
"step": 1200,
"token_acc": 0.909048799129166
},
{
"epoch": 0.7373552711761121,
"grad_norm": 0.8430483750865159,
"learning_rate": 1.7040442154820036e-06,
"loss": 0.2669236183166504,
"step": 1210,
"token_acc": 0.9086229167124993
},
{
"epoch": 0.7434491163924436,
"grad_norm": 0.8347549917161227,
"learning_rate": 1.6304489466753237e-06,
"loss": 0.26542019844055176,
"step": 1220,
"token_acc": 0.9091426534148126
},
{
"epoch": 0.7495429616087751,
"grad_norm": 0.830454588444548,
"learning_rate": 1.5581674417438143e-06,
"loss": 0.2647353410720825,
"step": 1230,
"token_acc": 0.909506020348688
},
{
"epoch": 0.7556368068251066,
"grad_norm": 0.8676010280531331,
"learning_rate": 1.4872278827296855e-06,
"loss": 0.2685891628265381,
"step": 1240,
"token_acc": 0.9081622979570555
},
{
"epoch": 0.7617306520414382,
"grad_norm": 0.707455832514829,
"learning_rate": 1.417657928460029e-06,
"loss": 0.2678367614746094,
"step": 1250,
"token_acc": 0.9088005125349524
},
{
"epoch": 0.7678244972577697,
"grad_norm": 0.9332592296684585,
"learning_rate": 1.349484703762834e-06,
"loss": 0.2678724765777588,
"step": 1260,
"token_acc": 0.9090774872882107
},
{
"epoch": 0.7739183424741012,
"grad_norm": 0.9124536066814944,
"learning_rate": 1.2827347888912057e-06,
"loss": 0.2636892795562744,
"step": 1270,
"token_acc": 0.9094603622970171
},
{
"epoch": 0.7800121876904327,
"grad_norm": 0.8868523419233089,
"learning_rate": 1.2174342091599277e-06,
"loss": 0.2640355587005615,
"step": 1280,
"token_acc": 0.9101203136208611
},
{
"epoch": 0.7861060329067642,
"grad_norm": 0.8162281839833351,
"learning_rate": 1.1536084247983626e-06,
"loss": 0.2618927717208862,
"step": 1290,
"token_acc": 0.9093984578881031
},
{
"epoch": 0.7921998781230957,
"grad_norm": 0.8334510756887459,
"learning_rate": 1.0912823210237033e-06,
"loss": 0.2639930725097656,
"step": 1300,
"token_acc": 0.9095154304277207
},
{
"epoch": 0.7982937233394272,
"grad_norm": 0.9484830756554262,
"learning_rate": 1.0304801983383989e-06,
"loss": 0.2679661750793457,
"step": 1310,
"token_acc": 0.9085439305540266
},
{
"epoch": 0.8043875685557587,
"grad_norm": 0.7917038864004372,
"learning_rate": 9.712257630555589e-07,
"loss": 0.263914155960083,
"step": 1320,
"token_acc": 0.9098282765579997
},
{
"epoch": 0.8104814137720902,
"grad_norm": 0.8164310323072432,
"learning_rate": 9.135421180560394e-07,
"loss": 0.27391440868377687,
"step": 1330,
"token_acc": 0.9072812991094814
},
{
"epoch": 0.8165752589884216,
"grad_norm": 0.7878349824156636,
"learning_rate": 8.574517537807897e-07,
"loss": 0.2658750057220459,
"step": 1340,
"token_acc": 0.9089495350890863
},
{
"epoch": 0.8226691042047533,
"grad_norm": 0.7620095983862565,
"learning_rate": 8.029765394619899e-07,
"loss": 0.25719194412231444,
"step": 1350,
"token_acc": 0.911888654763225
},
{
"epoch": 0.8287629494210847,
"grad_norm": 0.8206579913283775,
"learning_rate": 7.501377145963939e-07,
"loss": 0.2592960834503174,
"step": 1360,
"token_acc": 0.9114338606023208
},
{
"epoch": 0.8348567946374162,
"grad_norm": 0.8789992765077687,
"learning_rate": 6.98955880664205e-07,
"loss": 0.26435413360595705,
"step": 1370,
"token_acc": 0.9108234231521902
},
{
"epoch": 0.8409506398537477,
"grad_norm": 0.9837537034286392,
"learning_rate": 6.494509930967019e-07,
"loss": 0.2641714572906494,
"step": 1380,
"token_acc": 0.9101989856105199
},
{
"epoch": 0.8470444850700792,
"grad_norm": 0.8346126227296959,
"learning_rate": 6.016423534957616e-07,
"loss": 0.26149678230285645,
"step": 1390,
"token_acc": 0.9105589320112891
},
{
"epoch": 0.8531383302864107,
"grad_norm": 0.789773058927434,
"learning_rate": 5.555486021082979e-07,
"loss": 0.25979223251342776,
"step": 1400,
"token_acc": 0.9105615762961907
},
{
"epoch": 0.8592321755027422,
"grad_norm": 0.7391262213112039,
"learning_rate": 5.111877105585672e-07,
"loss": 0.2619319915771484,
"step": 1410,
"token_acc": 0.9112515917773331
},
{
"epoch": 0.8653260207190737,
"grad_norm": 0.732756554862386,
"learning_rate": 4.6857697484116006e-07,
"loss": 0.26052017211914064,
"step": 1420,
"token_acc": 0.9111355670436785
},
{
"epoch": 0.8714198659354052,
"grad_norm": 0.9052605008388693,
"learning_rate": 4.277330085774156e-07,
"loss": 0.26050865650177,
"step": 1430,
"token_acc": 0.9113159185335296
},
{
"epoch": 0.8775137111517367,
"grad_norm": 0.8239425361941399,
"learning_rate": 3.886717365378867e-07,
"loss": 0.2652243137359619,
"step": 1440,
"token_acc": 0.9098248347337728
},
{
"epoch": 0.8836075563680682,
"grad_norm": 0.8321718064306127,
"learning_rate": 3.5140838843339073e-07,
"loss": 0.2614146709442139,
"step": 1450,
"token_acc": 0.9103242825028786
},
{
"epoch": 0.8897014015843998,
"grad_norm": 0.9427110487674982,
"learning_rate": 3.159574929770515e-07,
"loss": 0.26317219734191893,
"step": 1460,
"token_acc": 0.9102542106779491
},
{
"epoch": 0.8957952468007313,
"grad_norm": 0.8005907233947733,
"learning_rate": 2.8233287221965555e-07,
"loss": 0.2689415216445923,
"step": 1470,
"token_acc": 0.9084669140620019
},
{
"epoch": 0.9018890920170628,
"grad_norm": 0.8834142513691242,
"learning_rate": 2.5054763616053967e-07,
"loss": 0.26386346817016604,
"step": 1480,
"token_acc": 0.9098926633899981
},
{
"epoch": 0.9079829372333943,
"grad_norm": 0.8652226986660423,
"learning_rate": 2.2061417763608818e-07,
"loss": 0.2603492259979248,
"step": 1490,
"token_acc": 0.9111148919621807
},
{
"epoch": 0.9140767824497258,
"grad_norm": 0.7761477175475302,
"learning_rate": 1.9254416748786086e-07,
"loss": 0.2592171669006348,
"step": 1500,
"token_acc": 0.9112373322356396
},
{
"epoch": 0.9201706276660573,
"grad_norm": 0.7766751712855907,
"learning_rate": 1.6634855001221195e-07,
"loss": 0.258951997756958,
"step": 1510,
"token_acc": 0.9106356546794409
},
{
"epoch": 0.9262644728823888,
"grad_norm": 0.856909898768609,
"learning_rate": 1.4203753869318882e-07,
"loss": 0.2605564117431641,
"step": 1520,
"token_acc": 0.9109015609309732
},
{
"epoch": 0.9323583180987203,
"grad_norm": 0.8678261922910359,
"learning_rate": 1.196206122203647e-07,
"loss": 0.267201566696167,
"step": 1530,
"token_acc": 0.9091924387660025
},
{
"epoch": 0.9384521633150518,
"grad_norm": 0.8245437796092319,
"learning_rate": 9.910651079316824e-08,
"loss": 0.25865275859832765,
"step": 1540,
"token_acc": 0.9117370919567883
},
{
"epoch": 0.9445460085313833,
"grad_norm": 0.7648349491441419,
"learning_rate": 8.050323271314331e-08,
"loss": 0.2569366216659546,
"step": 1550,
"token_acc": 0.9122892575583048
},
{
"epoch": 0.9506398537477148,
"grad_norm": 0.844132664732268,
"learning_rate": 6.381803126546405e-08,
"loss": 0.26746933460235595,
"step": 1560,
"token_acc": 0.9087516916083089
},
{
"epoch": 0.9567336989640464,
"grad_norm": 0.8550282187735159,
"learning_rate": 4.9057411890933714e-08,
"loss": 0.2634291172027588,
"step": 1570,
"token_acc": 0.9101502847948816
},
{
"epoch": 0.9628275441803779,
"grad_norm": 0.8962920945122091,
"learning_rate": 3.622712964956032e-08,
"loss": 0.26028733253479003,
"step": 1580,
"token_acc": 0.9110691577022408
},
{
"epoch": 0.9689213893967094,
"grad_norm": 0.8191620838439264,
"learning_rate": 2.5332186976697037e-08,
"loss": 0.26295406818389894,
"step": 1590,
"token_acc": 0.9106372558253433
},
{
"epoch": 0.9750152346130408,
"grad_norm": 0.803005796954641,
"learning_rate": 1.637683173263238e-08,
"loss": 0.2601941585540771,
"step": 1600,
"token_acc": 0.9106438532047947
},
{
"epoch": 0.9811090798293723,
"grad_norm": 1.0200184560604955,
"learning_rate": 9.364555546375054e-09,
"loss": 0.265762186050415,
"step": 1610,
"token_acc": 0.9099375217270665
},
{
"epoch": 0.9872029250457038,
"grad_norm": 0.8217240197064228,
"learning_rate": 4.2980924542984634e-09,
"loss": 0.261862587928772,
"step": 1620,
"token_acc": 0.9104295425993519
},
{
"epoch": 0.9932967702620353,
"grad_norm": 0.8981159929317022,
"learning_rate": 1.179417834153429e-09,
"loss": 0.2626341342926025,
"step": 1630,
"token_acc": 0.9100063135380294
},
{
"epoch": 0.9993906154783668,
"grad_norm": 0.8766885423326849,
"learning_rate": 9.74763488759134e-12,
"loss": 0.2605599880218506,
"step": 1640,
"token_acc": 0.9109949846594887
}
],
"logging_steps": 10,
"max_steps": 1641,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1566399809454080.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}