gpt-4o-sql-tool2 / trainer_state.json
chuxuan's picture
update files
4799b3d verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 50.0,
"global_step": 717,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0013953082759222116,
"grad_norm": 2.108574390411377,
"learning_rate": 2.7777777777777774e-08,
"loss": 0.5175914764404297,
"step": 1,
"token_acc": 0.9343839541547277
},
{
"epoch": 0.006976541379611058,
"grad_norm": 2.2113378047943115,
"learning_rate": 1.3888888888888888e-07,
"loss": 0.5278360247612,
"step": 5,
"token_acc": 0.9315846730327572
},
{
"epoch": 0.013953082759222116,
"grad_norm": 2.659011125564575,
"learning_rate": 2.7777777777777776e-07,
"loss": 0.5554334640502929,
"step": 10,
"token_acc": 0.9270202547504698
},
{
"epoch": 0.020929624138833175,
"grad_norm": 2.251737117767334,
"learning_rate": 4.1666666666666667e-07,
"loss": 0.5544517517089844,
"step": 15,
"token_acc": 0.9273873055524015
},
{
"epoch": 0.027906165518444232,
"grad_norm": 1.9254176616668701,
"learning_rate": 5.555555555555555e-07,
"loss": 0.5853276729583741,
"step": 20,
"token_acc": 0.920275952157407
},
{
"epoch": 0.03488270689805529,
"grad_norm": 2.059199333190918,
"learning_rate": 6.944444444444444e-07,
"loss": 0.5623232841491699,
"step": 25,
"token_acc": 0.9284975165562914
},
{
"epoch": 0.04185924827766635,
"grad_norm": 2.425384998321533,
"learning_rate": 8.333333333333333e-07,
"loss": 0.5716644763946533,
"step": 30,
"token_acc": 0.9264636757716613
},
{
"epoch": 0.048835789657277404,
"grad_norm": 1.9226746559143066,
"learning_rate": 9.722222222222222e-07,
"loss": 0.5484982490539551,
"step": 35,
"token_acc": 0.9298897411313519
},
{
"epoch": 0.055812331036888464,
"grad_norm": 2.045583724975586,
"learning_rate": 9.999148757713664e-07,
"loss": 0.5991110324859619,
"step": 40,
"token_acc": 0.9234525698937859
},
{
"epoch": 0.06278887241649952,
"grad_norm": 1.9219040870666504,
"learning_rate": 9.995691082675907e-07,
"loss": 0.5559669494628906,
"step": 45,
"token_acc": 0.9266224757206721
},
{
"epoch": 0.06976541379611058,
"grad_norm": 1.9935494661331177,
"learning_rate": 9.98957561037365e-07,
"loss": 0.5474924564361572,
"step": 50,
"token_acc": 0.92885522721629
},
{
"epoch": 0.07674195517572163,
"grad_norm": 2.460942506790161,
"learning_rate": 9.980805594347849e-07,
"loss": 0.5159276008605957,
"step": 55,
"token_acc": 0.9340729405763836
},
{
"epoch": 0.0837184965553327,
"grad_norm": 2.300776481628418,
"learning_rate": 9.969385700404345e-07,
"loss": 0.5166152000427247,
"step": 60,
"token_acc": 0.9320153815815421
},
{
"epoch": 0.09069503793494375,
"grad_norm": 2.573843240737915,
"learning_rate": 9.955322004131553e-07,
"loss": 0.5368542194366455,
"step": 65,
"token_acc": 0.9301501135545339
},
{
"epoch": 0.09767157931455481,
"grad_norm": 2.0781192779541016,
"learning_rate": 9.93862198766815e-07,
"loss": 0.5094423294067383,
"step": 70,
"token_acc": 0.9307623850489469
},
{
"epoch": 0.10464812069416586,
"grad_norm": 2.0247654914855957,
"learning_rate": 9.91929453572245e-07,
"loss": 0.47563705444335935,
"step": 75,
"token_acc": 0.9330075229257042
},
{
"epoch": 0.11162466207377693,
"grad_norm": 2.036161422729492,
"learning_rate": 9.897349930845566e-07,
"loss": 0.5021390914916992,
"step": 80,
"token_acc": 0.9329839883551674
},
{
"epoch": 0.11860120345338798,
"grad_norm": 2.1073312759399414,
"learning_rate": 9.872799847960918e-07,
"loss": 0.501053762435913,
"step": 85,
"token_acc": 0.9329673985362608
},
{
"epoch": 0.12557774483299905,
"grad_norm": 1.834761619567871,
"learning_rate": 9.845657348152955e-07,
"loss": 0.4600623607635498,
"step": 90,
"token_acc": 0.936784676510704
},
{
"epoch": 0.1325542862126101,
"grad_norm": 1.7184985876083374,
"learning_rate": 9.81593687171844e-07,
"loss": 0.5222196578979492,
"step": 95,
"token_acc": 0.9302824071593162
},
{
"epoch": 0.13953082759222116,
"grad_norm": 1.7423793077468872,
"learning_rate": 9.783654230483934e-07,
"loss": 0.4808220863342285,
"step": 100,
"token_acc": 0.9323681279740812
},
{
"epoch": 0.14650736897183222,
"grad_norm": 1.5745614767074585,
"learning_rate": 9.748826599393632e-07,
"loss": 0.44776349067687987,
"step": 105,
"token_acc": 0.9381572216222361
},
{
"epoch": 0.15348391035144326,
"grad_norm": 1.491922378540039,
"learning_rate": 9.711472507371982e-07,
"loss": 0.44771714210510255,
"step": 110,
"token_acc": 0.9347117653668952
},
{
"epoch": 0.16046045173105433,
"grad_norm": 1.5303000211715698,
"learning_rate": 9.671611827465971e-07,
"loss": 0.42823081016540526,
"step": 115,
"token_acc": 0.9424545917501274
},
{
"epoch": 0.1674369931106654,
"grad_norm": 1.52151620388031,
"learning_rate": 9.629265766272291e-07,
"loss": 0.4314168930053711,
"step": 120,
"token_acc": 0.9379815310638763
},
{
"epoch": 0.17441353449027644,
"grad_norm": 1.496336579322815,
"learning_rate": 9.58445685265507e-07,
"loss": 0.44430341720581057,
"step": 125,
"token_acc": 0.938302298442336
},
{
"epoch": 0.1813900758698875,
"grad_norm": 1.4200767278671265,
"learning_rate": 9.537208925760093e-07,
"loss": 0.4397609710693359,
"step": 130,
"token_acc": 0.9418849948962232
},
{
"epoch": 0.18836661724949857,
"grad_norm": 1.419700264930725,
"learning_rate": 9.487547122331964e-07,
"loss": 0.5382704734802246,
"step": 135,
"token_acc": 0.9312300174941184
},
{
"epoch": 0.19534315862910961,
"grad_norm": 1.3994510173797607,
"learning_rate": 9.435497863340896e-07,
"loss": 0.41959681510925295,
"step": 140,
"token_acc": 0.9434397845325125
},
{
"epoch": 0.20231970000872068,
"grad_norm": 1.2424718141555786,
"learning_rate": 9.381088839926292e-07,
"loss": 0.39974849224090575,
"step": 145,
"token_acc": 0.9442570675170172
},
{
"epoch": 0.20929624138833172,
"grad_norm": 1.1396198272705078,
"learning_rate": 9.324348998664548e-07,
"loss": 0.4095014572143555,
"step": 150,
"token_acc": 0.9402837198829093
},
{
"epoch": 0.2162727827679428,
"grad_norm": 1.1727170944213867,
"learning_rate": 9.265308526168971e-07,
"loss": 0.4158812999725342,
"step": 155,
"token_acc": 0.945169557184576
},
{
"epoch": 0.22324932414755386,
"grad_norm": 1.2740483283996582,
"learning_rate": 9.203998833029945e-07,
"loss": 0.40761551856994627,
"step": 160,
"token_acc": 0.941811175337187
},
{
"epoch": 0.2302258655271649,
"grad_norm": 1.2118650674819946,
"learning_rate": 9.140452537103941e-07,
"loss": 0.40488572120666505,
"step": 165,
"token_acc": 0.939998011533108
},
{
"epoch": 0.23720240690677596,
"grad_norm": 1.2679206132888794,
"learning_rate": 9.074703446160232e-07,
"loss": 0.38835389614105226,
"step": 170,
"token_acc": 0.9452747758105312
},
{
"epoch": 0.24417894828638703,
"grad_norm": 1.4239633083343506,
"learning_rate": 9.006786539894554e-07,
"loss": 0.434948205947876,
"step": 175,
"token_acc": 0.9388818618770554
},
{
"epoch": 0.2511554896659981,
"grad_norm": 1.4500908851623535,
"learning_rate": 8.936737951319275e-07,
"loss": 0.47136545181274414,
"step": 180,
"token_acc": 0.9320933879257776
},
{
"epoch": 0.2581320310456091,
"grad_norm": 1.1485167741775513,
"learning_rate": 8.864594947539992e-07,
"loss": 0.4100066661834717,
"step": 185,
"token_acc": 0.9440283102329696
},
{
"epoch": 0.2651085724252202,
"grad_norm": 1.468805193901062,
"learning_rate": 8.790395909928753e-07,
"loss": 0.39954936504364014,
"step": 190,
"token_acc": 0.9425916365513681
},
{
"epoch": 0.27208511380483125,
"grad_norm": 1.2177455425262451,
"learning_rate": 8.714180313704489e-07,
"loss": 0.3338632583618164,
"step": 195,
"token_acc": 0.9483336172145574
},
{
"epoch": 0.2790616551844423,
"grad_norm": 1.2997610569000244,
"learning_rate": 8.635988706931486e-07,
"loss": 0.38302700519561766,
"step": 200,
"token_acc": 0.9413486825782762
},
{
"epoch": 0.2860381965640534,
"grad_norm": 1.135811686515808,
"learning_rate": 8.555862688947075e-07,
"loss": 0.33866784572601316,
"step": 205,
"token_acc": 0.9479243990178724
},
{
"epoch": 0.29301473794366445,
"grad_norm": 1.3217355012893677,
"learning_rate": 8.473844888230064e-07,
"loss": 0.35600202083587645,
"step": 210,
"token_acc": 0.9461965574680733
},
{
"epoch": 0.29999127932327546,
"grad_norm": 1.2507730722427368,
"learning_rate": 8.389978939721598e-07,
"loss": 0.352951717376709,
"step": 215,
"token_acc": 0.9438299509473886
},
{
"epoch": 0.30696782070288653,
"grad_norm": 1.3820980787277222,
"learning_rate": 8.304309461610601e-07,
"loss": 0.3622483253479004,
"step": 220,
"token_acc": 0.9437283872995913
},
{
"epoch": 0.3139443620824976,
"grad_norm": 1.394065499305725,
"learning_rate": 8.216882031596096e-07,
"loss": 0.3512030363082886,
"step": 225,
"token_acc": 0.9435979832677711
},
{
"epoch": 0.32092090346210866,
"grad_norm": 1.2014678716659546,
"learning_rate": 8.127743162639051e-07,
"loss": 0.3101860523223877,
"step": 230,
"token_acc": 0.9467404378157325
},
{
"epoch": 0.32789744484171973,
"grad_norm": 1.3220340013504028,
"learning_rate": 8.036940278216646e-07,
"loss": 0.28164148330688477,
"step": 235,
"token_acc": 0.9506907137375288
},
{
"epoch": 0.3348739862213308,
"grad_norm": 1.5838576555252075,
"learning_rate": 7.944521687092142e-07,
"loss": 0.2631302118301392,
"step": 240,
"token_acc": 0.9514257294429708
},
{
"epoch": 0.3418505276009418,
"grad_norm": 1.5820879936218262,
"learning_rate": 7.850536557613748e-07,
"loss": 0.3039613962173462,
"step": 245,
"token_acc": 0.9476980693484858
},
{
"epoch": 0.3488270689805529,
"grad_norm": 1.4841257333755493,
"learning_rate": 7.755034891556167e-07,
"loss": 0.28357877731323244,
"step": 250,
"token_acc": 0.9478200246688475
},
{
"epoch": 0.35580361036016395,
"grad_norm": 1.2519296407699585,
"learning_rate": 7.658067497518772e-07,
"loss": 0.3490274429321289,
"step": 255,
"token_acc": 0.9376414667270258
},
{
"epoch": 0.362780151739775,
"grad_norm": 1.340489149093628,
"learning_rate": 7.559685963894513e-07,
"loss": 0.32015056610107423,
"step": 260,
"token_acc": 0.9416806521217933
},
{
"epoch": 0.3697566931193861,
"grad_norm": 1.3753681182861328,
"learning_rate": 7.459942631423962e-07,
"loss": 0.27373878955841063,
"step": 265,
"token_acc": 0.9539526646272742
},
{
"epoch": 0.37673323449899715,
"grad_norm": 1.1863863468170166,
"learning_rate": 7.358890565349105e-07,
"loss": 0.29328436851501466,
"step": 270,
"token_acc": 0.9487552700260992
},
{
"epoch": 0.38370977587860816,
"grad_norm": 1.490551233291626,
"learning_rate": 7.256583527181683e-07,
"loss": 0.33202688694000243,
"step": 275,
"token_acc": 0.9404879571346824
},
{
"epoch": 0.39068631725821923,
"grad_norm": 1.3143407106399536,
"learning_rate": 7.153075946101097e-07,
"loss": 0.278816294670105,
"step": 280,
"token_acc": 0.9493239404613112
},
{
"epoch": 0.3976628586378303,
"grad_norm": 1.0962167978286743,
"learning_rate": 7.048422889997115e-07,
"loss": 0.23789422512054442,
"step": 285,
"token_acc": 0.9503463643850215
},
{
"epoch": 0.40463940001744136,
"grad_norm": 0.824631929397583,
"learning_rate": 6.942680036172762e-07,
"loss": 0.24912948608398439,
"step": 290,
"token_acc": 0.9536533677324243
},
{
"epoch": 0.41161594139705243,
"grad_norm": 1.3604934215545654,
"learning_rate": 6.835903641722999e-07,
"loss": 0.3469517946243286,
"step": 295,
"token_acc": 0.9420104361524072
},
{
"epoch": 0.41859248277666344,
"grad_norm": 0.8911880850791931,
"learning_rate": 6.72815051360494e-07,
"loss": 0.24576101303100586,
"step": 300,
"token_acc": 0.953768733064204
},
{
"epoch": 0.4255690241562745,
"grad_norm": 1.0799229145050049,
"learning_rate": 6.619477978415531e-07,
"loss": 0.2791733980178833,
"step": 305,
"token_acc": 0.9415368904774062
},
{
"epoch": 0.4325455655358856,
"grad_norm": 0.872166633605957,
"learning_rate": 6.509943851892766e-07,
"loss": 0.3617237567901611,
"step": 310,
"token_acc": 0.9406946604458268
},
{
"epoch": 0.43952210691549665,
"grad_norm": 0.9697985649108887,
"learning_rate": 6.399606408156687e-07,
"loss": 0.22297954559326172,
"step": 315,
"token_acc": 0.954813046937152
},
{
"epoch": 0.4464986482951077,
"grad_norm": 0.8073396682739258,
"learning_rate": 6.288524348706502e-07,
"loss": 0.20998930931091309,
"step": 320,
"token_acc": 0.9557532836995339
},
{
"epoch": 0.4534751896747188,
"grad_norm": 0.9216898679733276,
"learning_rate": 6.176756771190337e-07,
"loss": 0.2161928176879883,
"step": 325,
"token_acc": 0.9520381208887968
},
{
"epoch": 0.4604517310543298,
"grad_norm": 0.9290446639060974,
"learning_rate": 6.064363137964225e-07,
"loss": 0.24029843807220458,
"step": 330,
"token_acc": 0.9494813278008298
},
{
"epoch": 0.46742827243394086,
"grad_norm": 0.9209424257278442,
"learning_rate": 5.95140324445706e-07,
"loss": 0.21532030105590821,
"step": 335,
"token_acc": 0.9515148253780337
},
{
"epoch": 0.47440481381355193,
"grad_norm": 0.7281814217567444,
"learning_rate": 5.83793718735837e-07,
"loss": 0.24763097763061523,
"step": 340,
"token_acc": 0.9499685006299874
},
{
"epoch": 0.481381355193163,
"grad_norm": 0.6570573449134827,
"learning_rate": 5.724025332645793e-07,
"loss": 0.19987608194351197,
"step": 345,
"token_acc": 0.9564738292011019
},
{
"epoch": 0.48835789657277406,
"grad_norm": 0.7470307946205139,
"learning_rate": 5.609728283469288e-07,
"loss": 0.1938636064529419,
"step": 350,
"token_acc": 0.9593920408400046
},
{
"epoch": 0.49533443795238513,
"grad_norm": 0.5753098130226135,
"learning_rate": 5.495106847909182e-07,
"loss": 0.2106870651245117,
"step": 355,
"token_acc": 0.9575553464414839
},
{
"epoch": 0.5023109793319962,
"grad_norm": 0.5757151246070862,
"learning_rate": 5.380222006625179e-07,
"loss": 0.18208713531494142,
"step": 360,
"token_acc": 0.963362694802052
},
{
"epoch": 0.5092875207116072,
"grad_norm": 0.5811958312988281,
"learning_rate": 5.265134880413548e-07,
"loss": 0.1780398368835449,
"step": 365,
"token_acc": 0.9639311886076607
},
{
"epoch": 0.5162640620912182,
"grad_norm": 0.5238430500030518,
"learning_rate": 5.149906697689767e-07,
"loss": 0.2431933879852295,
"step": 370,
"token_acc": 0.9474901594773364
},
{
"epoch": 0.5232406034708293,
"grad_norm": 0.7463929057121277,
"learning_rate": 5.034598761913916e-07,
"loss": 0.2559064865112305,
"step": 375,
"token_acc": 0.9454068781164859
},
{
"epoch": 0.5302171448504404,
"grad_norm": 0.5051546692848206,
"learning_rate": 4.919272418976123e-07,
"loss": 0.20950682163238527,
"step": 380,
"token_acc": 0.9548458149779736
},
{
"epoch": 0.5371936862300515,
"grad_norm": 0.47918471693992615,
"learning_rate": 4.803989024559459e-07,
"loss": 0.18409876823425292,
"step": 385,
"token_acc": 0.9592592592592593
},
{
"epoch": 0.5441702276096625,
"grad_norm": 0.48506343364715576,
"learning_rate": 4.688809911497609e-07,
"loss": 0.19301035404205322,
"step": 390,
"token_acc": 0.9593621399176955
},
{
"epoch": 0.5511467689892736,
"grad_norm": 0.6255201697349548,
"learning_rate": 4.57379635714471e-07,
"loss": 0.1948167562484741,
"step": 395,
"token_acc": 0.9559854371569853
},
{
"epoch": 0.5581233103688846,
"grad_norm": 0.6040950417518616,
"learning_rate": 4.459009550774692e-07,
"loss": 0.15679298639297484,
"step": 400,
"token_acc": 0.9631829798991504
},
{
"epoch": 0.5650998517484956,
"grad_norm": 0.6469866037368774,
"learning_rate": 4.344510561027498e-07,
"loss": 0.2119133472442627,
"step": 405,
"token_acc": 0.9538207806487081
},
{
"epoch": 0.5720763931281068,
"grad_norm": 0.6210054755210876,
"learning_rate": 4.230360303419453e-07,
"loss": 0.17766163349151612,
"step": 410,
"token_acc": 0.9606851549755302
},
{
"epoch": 0.5790529345077178,
"grad_norm": 0.4716809391975403,
"learning_rate": 4.116619507935144e-07,
"loss": 0.18397997617721557,
"step": 415,
"token_acc": 0.9560470014410819
},
{
"epoch": 0.5860294758873289,
"grad_norm": 0.5452926754951477,
"learning_rate": 4.003348686717949e-07,
"loss": 0.2028341293334961,
"step": 420,
"token_acc": 0.9551058385671086
},
{
"epoch": 0.5930060172669399,
"grad_norm": 0.5029204487800598,
"learning_rate": 3.890608101876517e-07,
"loss": 0.16716669797897338,
"step": 425,
"token_acc": 0.9617021276595744
},
{
"epoch": 0.5999825586465509,
"grad_norm": 0.5014962553977966,
"learning_rate": 3.7784577334242273e-07,
"loss": 0.18506402969360353,
"step": 430,
"token_acc": 0.9586908319676082
},
{
"epoch": 0.606959100026162,
"grad_norm": 0.5529438257217407,
"learning_rate": 3.666957247368757e-07,
"loss": 0.1777629852294922,
"step": 435,
"token_acc": 0.9608512874408828
},
{
"epoch": 0.6139356414057731,
"grad_norm": 0.4262159466743469,
"learning_rate": 3.556165963968691e-07,
"loss": 0.14577605724334716,
"step": 440,
"token_acc": 0.962014556659406
},
{
"epoch": 0.6209121827853842,
"grad_norm": 0.5142691135406494,
"learning_rate": 3.4461428261740754e-07,
"loss": 0.20166921615600586,
"step": 445,
"token_acc": 0.9590035201733008
},
{
"epoch": 0.6278887241649952,
"grad_norm": 0.5572395324707031,
"learning_rate": 3.3369463682677234e-07,
"loss": 0.20577445030212402,
"step": 450,
"token_acc": 0.9554721339878718
},
{
"epoch": 0.6348652655446063,
"grad_norm": 0.496382474899292,
"learning_rate": 3.2286346847239123e-07,
"loss": 0.14863760471343995,
"step": 455,
"token_acc": 0.9620308092861792
},
{
"epoch": 0.6418418069242173,
"grad_norm": 0.4650530517101288,
"learning_rate": 3.1212653993010954e-07,
"loss": 0.17070106267929078,
"step": 460,
"token_acc": 0.9571316789626649
},
{
"epoch": 0.6488183483038283,
"grad_norm": 0.4173397421836853,
"learning_rate": 3.014895634385014e-07,
"loss": 0.1784367799758911,
"step": 465,
"token_acc": 0.9574702782203701
},
{
"epoch": 0.6557948896834395,
"grad_norm": 0.50703364610672,
"learning_rate": 2.9095819805985795e-07,
"loss": 0.18249971866607667,
"step": 470,
"token_acc": 0.9553955949304787
},
{
"epoch": 0.6627714310630505,
"grad_norm": 0.4510941505432129,
"learning_rate": 2.8053804666946287e-07,
"loss": 0.17186166048049928,
"step": 475,
"token_acc": 0.9583355252775001
},
{
"epoch": 0.6697479724426616,
"grad_norm": 0.498515248298645,
"learning_rate": 2.7023465297476424e-07,
"loss": 0.23503575325012208,
"step": 480,
"token_acc": 0.9480993056596233
},
{
"epoch": 0.6767245138222726,
"grad_norm": 0.5071078538894653,
"learning_rate": 2.6005349856602123e-07,
"loss": 0.23459105491638182,
"step": 485,
"token_acc": 0.9456293181135476
},
{
"epoch": 0.6837010552018836,
"grad_norm": 0.5977618098258972,
"learning_rate": 2.500000000000001e-07,
"loss": 0.1519307851791382,
"step": 490,
"token_acc": 0.9616792137181096
},
{
"epoch": 0.6906775965814947,
"grad_norm": 0.49618807435035706,
"learning_rate": 2.4007950591826913e-07,
"loss": 0.21449580192565917,
"step": 495,
"token_acc": 0.9518640628962719
},
{
"epoch": 0.6976541379611058,
"grad_norm": 0.5371702313423157,
"learning_rate": 2.3029729420162587e-07,
"loss": 0.15500261783599853,
"step": 500,
"token_acc": 0.9591128732499071
},
{
"epoch": 0.7046306793407169,
"grad_norm": 0.5795394778251648,
"learning_rate": 2.2065856916216786e-07,
"loss": 0.16497514247894288,
"step": 505,
"token_acc": 0.9617788774580024
},
{
"epoch": 0.7116072207203279,
"grad_norm": 0.5824469327926636,
"learning_rate": 2.1116845877450805e-07,
"loss": 0.16700024604797364,
"step": 510,
"token_acc": 0.9572996706915478
},
{
"epoch": 0.7185837620999389,
"grad_norm": 0.539864182472229,
"learning_rate": 2.0183201194759825e-07,
"loss": 0.2224641799926758,
"step": 515,
"token_acc": 0.9501959166838524
},
{
"epoch": 0.72556030347955,
"grad_norm": 0.781697690486908,
"learning_rate": 1.9265419583861952e-07,
"loss": 0.1476673364639282,
"step": 520,
"token_acc": 0.9633160506216201
},
{
"epoch": 0.732536844859161,
"grad_norm": 0.4989713728427887,
"learning_rate": 1.8363989321036577e-07,
"loss": 0.143803870677948,
"step": 525,
"token_acc": 0.9604544058949954
},
{
"epoch": 0.7395133862387722,
"grad_norm": 0.5253103971481323,
"learning_rate": 1.7479389983352656e-07,
"loss": 0.17980681657791137,
"step": 530,
"token_acc": 0.9542640495272832
},
{
"epoch": 0.7464899276183832,
"grad_norm": 0.5387361645698547,
"learning_rate": 1.6612092193525017e-07,
"loss": 0.217242431640625,
"step": 535,
"token_acc": 0.950739773716275
},
{
"epoch": 0.7534664689979943,
"grad_norm": 0.5405040979385376,
"learning_rate": 1.5762557369534708e-07,
"loss": 0.19491589069366455,
"step": 540,
"token_acc": 0.954151055018734
},
{
"epoch": 0.7604430103776053,
"grad_norm": 0.43293488025665283,
"learning_rate": 1.4931237479146326e-07,
"loss": 0.18127689361572266,
"step": 545,
"token_acc": 0.9546740778170794
},
{
"epoch": 0.7674195517572163,
"grad_norm": 0.5920426249504089,
"learning_rate": 1.4118574799453115e-07,
"loss": 0.17992936372756957,
"step": 550,
"token_acc": 0.9548200289551195
},
{
"epoch": 0.7743960931368274,
"grad_norm": 0.8887305855751038,
"learning_rate": 1.332500168157748e-07,
"loss": 0.1434216856956482,
"step": 555,
"token_acc": 0.9616718027734977
},
{
"epoch": 0.7813726345164385,
"grad_norm": 0.45279937982559204,
"learning_rate": 1.2550940320652614e-07,
"loss": 0.15285730361938477,
"step": 560,
"token_acc": 0.9589277780520314
},
{
"epoch": 0.7883491758960496,
"grad_norm": 0.5594329833984375,
"learning_rate": 1.179680253120699e-07,
"loss": 0.14827193021774293,
"step": 565,
"token_acc": 0.9611136415395126
},
{
"epoch": 0.7953257172756606,
"grad_norm": 0.5205839276313782,
"learning_rate": 1.1062989528071681e-07,
"loss": 0.14820796251296997,
"step": 570,
"token_acc": 0.9608738340697104
},
{
"epoch": 0.8023022586552716,
"grad_norm": 0.7842152118682861,
"learning_rate": 1.0349891712926855e-07,
"loss": 0.14528849124908447,
"step": 575,
"token_acc": 0.9591823819769649
},
{
"epoch": 0.8092788000348827,
"grad_norm": 0.5881332159042358,
"learning_rate": 9.65788846660116e-08,
"loss": 0.12228701114654542,
"step": 580,
"token_acc": 0.9648055356716774
},
{
"epoch": 0.8162553414144937,
"grad_norm": 0.5683630704879761,
"learning_rate": 8.987347947234192e-08,
"loss": 0.15679004192352294,
"step": 585,
"token_acc": 0.9599228461208744
},
{
"epoch": 0.8232318827941049,
"grad_norm": 0.6288495659828186,
"learning_rate": 8.33862689440985e-08,
"loss": 0.16296907663345336,
"step": 590,
"token_acc": 0.957492548981287
},
{
"epoch": 0.8302084241737159,
"grad_norm": 0.46802279353141785,
"learning_rate": 7.712070439364438e-08,
"loss": 0.13914816379547118,
"step": 595,
"token_acc": 0.9615843086259211
},
{
"epoch": 0.8371849655533269,
"grad_norm": 0.5954472422599792,
"learning_rate": 7.108011921370727e-08,
"loss": 0.15333893299102783,
"step": 600,
"token_acc": 0.9563102463405927
},
{
"epoch": 0.844161506932938,
"grad_norm": 0.611599862575531,
"learning_rate": 6.526772710395323e-08,
"loss": 0.11822519302368165,
"step": 605,
"token_acc": 0.967852975495916
},
{
"epoch": 0.851138048312549,
"grad_norm": 0.5725059509277344,
"learning_rate": 5.968662036124295e-08,
"loss": 0.15996166467666625,
"step": 610,
"token_acc": 0.959222581157655
},
{
"epoch": 0.8581145896921601,
"grad_norm": 0.4814670979976654,
"learning_rate": 5.433976823447262e-08,
"loss": 0.13899474143981932,
"step": 615,
"token_acc": 0.9624644833258561
},
{
"epoch": 0.8650911310717712,
"grad_norm": 0.525391697883606,
"learning_rate": 4.923001534488097e-08,
"loss": 0.1286926746368408,
"step": 620,
"token_acc": 0.9623963626638139
},
{
"epoch": 0.8720676724513823,
"grad_norm": 0.6519795656204224,
"learning_rate": 4.43600801726598e-08,
"loss": 0.17959569692611693,
"step": 625,
"token_acc": 0.9575730509123389
},
{
"epoch": 0.8790442138309933,
"grad_norm": 0.5954372882843018,
"learning_rate": 3.973255361067346e-08,
"loss": 0.14509177207946777,
"step": 630,
"token_acc": 0.9609306955331591
},
{
"epoch": 0.8860207552106043,
"grad_norm": 0.6551011800765991,
"learning_rate": 3.534989758605772e-08,
"loss": 0.13519610166549684,
"step": 635,
"token_acc": 0.961144806671721
},
{
"epoch": 0.8929972965902154,
"grad_norm": 0.6382178664207458,
"learning_rate": 3.121444375042992e-08,
"loss": 0.14140852689743041,
"step": 640,
"token_acc": 0.9617294770669004
},
{
"epoch": 0.8999738379698264,
"grad_norm": 0.5000672340393066,
"learning_rate": 2.732839223940914e-08,
"loss": 0.15130863189697266,
"step": 645,
"token_acc": 0.9578913532626165
},
{
"epoch": 0.9069503793494376,
"grad_norm": 0.4966048002243042,
"learning_rate": 2.3693810502103783e-08,
"loss": 0.16461522579193116,
"step": 650,
"token_acc": 0.956586014881979
},
{
"epoch": 0.9139269207290486,
"grad_norm": 0.7203890085220337,
"learning_rate": 2.0312632201192338e-08,
"loss": 0.15151506662368774,
"step": 655,
"token_acc": 0.9578531445505433
},
{
"epoch": 0.9209034621086596,
"grad_norm": 0.5585451722145081,
"learning_rate": 1.7186656184179473e-08,
"loss": 0.19614295959472655,
"step": 660,
"token_acc": 0.9517613299030279
},
{
"epoch": 0.9278800034882707,
"grad_norm": 0.5863579511642456,
"learning_rate": 1.431754552637754e-08,
"loss": 0.13972072601318358,
"step": 665,
"token_acc": 0.960591916834624
},
{
"epoch": 0.9348565448678817,
"grad_norm": 0.540397584438324,
"learning_rate": 1.1706826646119994e-08,
"loss": 0.20453217029571533,
"step": 670,
"token_acc": 0.9501171417415072
},
{
"epoch": 0.9418330862474928,
"grad_norm": 0.4735467731952667,
"learning_rate": 9.355888492680153e-09,
"loss": 0.2564453840255737,
"step": 675,
"token_acc": 0.9417061863910055
},
{
"epoch": 0.9488096276271039,
"grad_norm": 0.41385316848754883,
"learning_rate": 7.265981807324795e-09,
"loss": 0.1432310461997986,
"step": 680,
"token_acc": 0.9586380054620738
},
{
"epoch": 0.9557861690067149,
"grad_norm": 0.5068058967590332,
"learning_rate": 5.438218457897492e-09,
"loss": 0.12814297676086425,
"step": 685,
"token_acc": 0.9636561355311355
},
{
"epoch": 0.962762710386326,
"grad_norm": 0.6264599561691284,
"learning_rate": 3.873570847285012e-09,
"loss": 0.2733027935028076,
"step": 690,
"token_acc": 0.9410119633331607
},
{
"epoch": 0.969739251765937,
"grad_norm": 0.5079652667045593,
"learning_rate": 2.5728713960815884e-09,
"loss": 0.1333064079284668,
"step": 695,
"token_acc": 0.963391442155309
},
{
"epoch": 0.9767157931455481,
"grad_norm": 0.5711411237716675,
"learning_rate": 1.5368120997261147e-09,
"loss": 0.21541709899902345,
"step": 700,
"token_acc": 0.9518470869325492
},
{
"epoch": 0.9836923345251591,
"grad_norm": 0.5428957939147949,
"learning_rate": 7.65944160348142e-10,
"loss": 0.14647810459136962,
"step": 705,
"token_acc": 0.9616447996782788
},
{
"epoch": 0.9906688759047703,
"grad_norm": 0.5820784568786621,
"learning_rate": 2.6067769351867384e-10,
"loss": 0.144012713432312,
"step": 710,
"token_acc": 0.9590297709494062
},
{
"epoch": 0.9976454172843813,
"grad_norm": 0.6706213355064392,
"learning_rate": 2.128151006108858e-11,
"loss": 0.16256020069122315,
"step": 715,
"token_acc": 0.9583173343572678
}
],
"logging_steps": 5,
"max_steps": 717,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.451809127798866e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}