LLaMA-EatFit-2-7b-chat / trainer_state.json
sanjaypn14's picture
Uploading fine-tuned LLaMA-EatFit-2-7b model
fc4a80f verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 3092,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.008085381630012937,
"grad_norm": 0.3009789288043976,
"learning_rate": 0.00013440860215053763,
"loss": 1.7048,
"mean_token_accuracy": 0.661682380437851,
"step": 25
},
{
"epoch": 0.016170763260025874,
"grad_norm": 0.4460005462169647,
"learning_rate": 0.00026881720430107527,
"loss": 1.2718,
"mean_token_accuracy": 0.713244981765747,
"step": 50
},
{
"epoch": 0.02425614489003881,
"grad_norm": 0.25601527094841003,
"learning_rate": 0.0004032258064516129,
"loss": 0.9995,
"mean_token_accuracy": 0.7553433787822723,
"step": 75
},
{
"epoch": 0.03234152652005175,
"grad_norm": 0.2627596855163574,
"learning_rate": 0.0004999932787358948,
"loss": 0.9036,
"mean_token_accuracy": 0.7764883422851563,
"step": 100
},
{
"epoch": 0.04042690815006468,
"grad_norm": 0.4143337905406952,
"learning_rate": 0.0004998595518201121,
"loss": 0.8985,
"mean_token_accuracy": 0.7727250349521637,
"step": 125
},
{
"epoch": 0.04851228978007762,
"grad_norm": 0.23922297358512878,
"learning_rate": 0.00049955446943686,
"loss": 0.7909,
"mean_token_accuracy": 0.7968144822120666,
"step": 150
},
{
"epoch": 0.056597671410090554,
"grad_norm": 0.2834464907646179,
"learning_rate": 0.0004990782408138185,
"loss": 0.822,
"mean_token_accuracy": 0.7941457486152649,
"step": 175
},
{
"epoch": 0.0646830530401035,
"grad_norm": 0.29430675506591797,
"learning_rate": 0.000498431192551983,
"loss": 0.8546,
"mean_token_accuracy": 0.7836022305488587,
"step": 200
},
{
"epoch": 0.07276843467011643,
"grad_norm": 0.23746679723262787,
"learning_rate": 0.0004976137684016788,
"loss": 0.7788,
"mean_token_accuracy": 0.8025749707221985,
"step": 225
},
{
"epoch": 0.08085381630012936,
"grad_norm": 0.24229322373867035,
"learning_rate": 0.0004966265289582338,
"loss": 0.8583,
"mean_token_accuracy": 0.7800547671318054,
"step": 250
},
{
"epoch": 0.08893919793014231,
"grad_norm": 0.24889522790908813,
"learning_rate": 0.0004954701512775184,
"loss": 0.685,
"mean_token_accuracy": 0.8196275687217712,
"step": 275
},
{
"epoch": 0.09702457956015524,
"grad_norm": 0.22737586498260498,
"learning_rate": 0.0004941454284116157,
"loss": 0.8068,
"mean_token_accuracy": 0.7950991153717041,
"step": 300
},
{
"epoch": 0.10510996119016817,
"grad_norm": 0.2224387228488922,
"learning_rate": 0.0004926532688649407,
"loss": 0.8054,
"mean_token_accuracy": 0.7932506108283996,
"step": 325
},
{
"epoch": 0.11319534282018111,
"grad_norm": 0.2293468415737152,
"learning_rate": 0.0004909946959711816,
"loss": 0.7816,
"mean_token_accuracy": 0.7965369141101837,
"step": 350
},
{
"epoch": 0.12128072445019406,
"grad_norm": 0.2561907470226288,
"learning_rate": 0.0004891708471914897,
"loss": 0.7487,
"mean_token_accuracy": 0.8107043826580047,
"step": 375
},
{
"epoch": 0.129366106080207,
"grad_norm": 0.2583022117614746,
"learning_rate": 0.0004871829733344012,
"loss": 0.7786,
"mean_token_accuracy": 0.7999367988109589,
"step": 400
},
{
"epoch": 0.13745148771021992,
"grad_norm": 0.2550680339336395,
"learning_rate": 0.00048503243769802327,
"loss": 0.7576,
"mean_token_accuracy": 0.8000009500980377,
"step": 425
},
{
"epoch": 0.14553686934023286,
"grad_norm": 0.24982689321041107,
"learning_rate": 0.0004827207151350745,
"loss": 0.7897,
"mean_token_accuracy": 0.7945795893669129,
"step": 450
},
{
"epoch": 0.1536222509702458,
"grad_norm": 0.2784912884235382,
"learning_rate": 0.0004802493910414205,
"loss": 0.8013,
"mean_token_accuracy": 0.792938197851181,
"step": 475
},
{
"epoch": 0.16170763260025872,
"grad_norm": 0.2220473736524582,
"learning_rate": 0.00047762016026879807,
"loss": 0.711,
"mean_token_accuracy": 0.8124627935886383,
"step": 500
},
{
"epoch": 0.16979301423027165,
"grad_norm": 0.2841341197490692,
"learning_rate": 0.00047483482596247353,
"loss": 0.7382,
"mean_token_accuracy": 0.8129073297977447,
"step": 525
},
{
"epoch": 0.17787839586028462,
"grad_norm": 0.23232664167881012,
"learning_rate": 0.00047189529832463296,
"loss": 0.7588,
"mean_token_accuracy": 0.8059421420097351,
"step": 550
},
{
"epoch": 0.18596377749029755,
"grad_norm": 0.22178468108177185,
"learning_rate": 0.00046880359330435216,
"loss": 0.8146,
"mean_token_accuracy": 0.7915572142601013,
"step": 575
},
{
"epoch": 0.19404915912031048,
"grad_norm": 0.23006677627563477,
"learning_rate": 0.0004655618312150437,
"loss": 0.7822,
"mean_token_accuracy": 0.7942324769496918,
"step": 600
},
{
"epoch": 0.20213454075032342,
"grad_norm": 0.32772719860076904,
"learning_rate": 0.00046217223528033146,
"loss": 0.7632,
"mean_token_accuracy": 0.8043809008598327,
"step": 625
},
{
"epoch": 0.21021992238033635,
"grad_norm": 0.18335266411304474,
"learning_rate": 0.0004586371301093476,
"loss": 0.7218,
"mean_token_accuracy": 0.816128705739975,
"step": 650
},
{
"epoch": 0.21830530401034928,
"grad_norm": 0.22064125537872314,
"learning_rate": 0.00045495894010249915,
"loss": 0.7364,
"mean_token_accuracy": 0.8087958478927613,
"step": 675
},
{
"epoch": 0.22639068564036222,
"grad_norm": 0.21018439531326294,
"learning_rate": 0.0004511401877887967,
"loss": 0.738,
"mean_token_accuracy": 0.8069515633583069,
"step": 700
},
{
"epoch": 0.23447606727037515,
"grad_norm": 0.21225683391094208,
"learning_rate": 0.0004471834920958864,
"loss": 0.7397,
"mean_token_accuracy": 0.8051351046562195,
"step": 725
},
{
"epoch": 0.2425614489003881,
"grad_norm": 0.22577480971813202,
"learning_rate": 0.00044309156655397003,
"loss": 0.7872,
"mean_token_accuracy": 0.7951467871665955,
"step": 750
},
{
"epoch": 0.25064683053040104,
"grad_norm": 0.19751884043216705,
"learning_rate": 0.000438867217434847,
"loss": 0.7147,
"mean_token_accuracy": 0.8134795188903808,
"step": 775
},
{
"epoch": 0.258732212160414,
"grad_norm": 0.2011658251285553,
"learning_rate": 0.0004345133418273529,
"loss": 0.7923,
"mean_token_accuracy": 0.7959077858924866,
"step": 800
},
{
"epoch": 0.2668175937904269,
"grad_norm": 0.2174764722585678,
"learning_rate": 0.00043003292565051544,
"loss": 0.7576,
"mean_token_accuracy": 0.8044932758808137,
"step": 825
},
{
"epoch": 0.27490297542043984,
"grad_norm": 0.20990514755249023,
"learning_rate": 0.0004254290416057898,
"loss": 0.739,
"mean_token_accuracy": 0.8073045027256012,
"step": 850
},
{
"epoch": 0.2829883570504528,
"grad_norm": 0.1976221799850464,
"learning_rate": 0.0004207048470697777,
"loss": 0.6717,
"mean_token_accuracy": 0.824974125623703,
"step": 875
},
{
"epoch": 0.2910737386804657,
"grad_norm": 0.2552309036254883,
"learning_rate": 0.0004158635819288762,
"loss": 0.7311,
"mean_token_accuracy": 0.8078971183300019,
"step": 900
},
{
"epoch": 0.29915912031047864,
"grad_norm": 0.26244303584098816,
"learning_rate": 0.00041090856635734067,
"loss": 0.7264,
"mean_token_accuracy": 0.8127052938938141,
"step": 925
},
{
"epoch": 0.3072445019404916,
"grad_norm": 0.24333028495311737,
"learning_rate": 0.000405843198540285,
"loss": 0.7184,
"mean_token_accuracy": 0.8114526355266571,
"step": 950
},
{
"epoch": 0.3153298835705045,
"grad_norm": 0.19133129715919495,
"learning_rate": 0.0004006709523431822,
"loss": 0.7538,
"mean_token_accuracy": 0.8016650295257568,
"step": 975
},
{
"epoch": 0.32341526520051744,
"grad_norm": 0.25047338008880615,
"learning_rate": 0.00039539537492946285,
"loss": 0.8019,
"mean_token_accuracy": 0.7935136258602142,
"step": 1000
},
{
"epoch": 0.3315006468305304,
"grad_norm": 0.19724124670028687,
"learning_rate": 0.0003900200843278449,
"loss": 0.6892,
"mean_token_accuracy": 0.8166925406455994,
"step": 1025
},
{
"epoch": 0.3395860284605433,
"grad_norm": 0.21111617982387543,
"learning_rate": 0.0003845487669510631,
"loss": 0.7281,
"mean_token_accuracy": 0.8110716784000397,
"step": 1050
},
{
"epoch": 0.3476714100905563,
"grad_norm": 0.2487187534570694,
"learning_rate": 0.00037898517506770196,
"loss": 0.7962,
"mean_token_accuracy": 0.7921491277217865,
"step": 1075
},
{
"epoch": 0.35575679172056923,
"grad_norm": 0.2757723927497864,
"learning_rate": 0.0003733331242288622,
"loss": 0.7533,
"mean_token_accuracy": 0.8056223785877228,
"step": 1100
},
{
"epoch": 0.36384217335058217,
"grad_norm": 0.27630847692489624,
"learning_rate": 0.0003675964906514289,
"loss": 0.7885,
"mean_token_accuracy": 0.7973137283325196,
"step": 1125
},
{
"epoch": 0.3719275549805951,
"grad_norm": 0.2365068644285202,
"learning_rate": 0.00036177920855973405,
"loss": 0.7275,
"mean_token_accuracy": 0.8077067303657531,
"step": 1150
},
{
"epoch": 0.38001293661060803,
"grad_norm": 0.2511255443096161,
"learning_rate": 0.00035588526748743754,
"loss": 0.8052,
"mean_token_accuracy": 0.7931141972541809,
"step": 1175
},
{
"epoch": 0.38809831824062097,
"grad_norm": 0.2489156723022461,
"learning_rate": 0.0003499187095414763,
"loss": 0.7369,
"mean_token_accuracy": 0.8076127851009369,
"step": 1200
},
{
"epoch": 0.3961836998706339,
"grad_norm": 0.23067978024482727,
"learning_rate": 0.00034388362662995855,
"loss": 0.7393,
"mean_token_accuracy": 0.8086310243606567,
"step": 1225
},
{
"epoch": 0.40426908150064683,
"grad_norm": 0.2202920764684677,
"learning_rate": 0.000337784157655904,
"loss": 0.7578,
"mean_token_accuracy": 0.8064273929595948,
"step": 1250
},
{
"epoch": 0.41235446313065977,
"grad_norm": 0.21269731223583221,
"learning_rate": 0.0003316244856787544,
"loss": 0.7937,
"mean_token_accuracy": 0.795601452589035,
"step": 1275
},
{
"epoch": 0.4204398447606727,
"grad_norm": 0.24461720883846283,
"learning_rate": 0.0003254088350456017,
"loss": 0.6656,
"mean_token_accuracy": 0.8226878666877746,
"step": 1300
},
{
"epoch": 0.42852522639068563,
"grad_norm": 0.24032443761825562,
"learning_rate": 0.0003191414684941003,
"loss": 0.7778,
"mean_token_accuracy": 0.7960509729385375,
"step": 1325
},
{
"epoch": 0.43661060802069857,
"grad_norm": 0.25362804532051086,
"learning_rate": 0.0003128266842290513,
"loss": 0.6967,
"mean_token_accuracy": 0.8132575881481171,
"step": 1350
},
{
"epoch": 0.4446959896507115,
"grad_norm": 0.2388894110918045,
"learning_rate": 0.0003064688129746629,
"loss": 0.716,
"mean_token_accuracy": 0.8112483811378479,
"step": 1375
},
{
"epoch": 0.45278137128072443,
"grad_norm": 0.2121650129556656,
"learning_rate": 0.0003000722150045085,
"loss": 0.6942,
"mean_token_accuracy": 0.8156200110912323,
"step": 1400
},
{
"epoch": 0.46086675291073737,
"grad_norm": 0.2317550778388977,
"learning_rate": 0.0002936412771512206,
"loss": 0.7493,
"mean_token_accuracy": 0.8051575112342835,
"step": 1425
},
{
"epoch": 0.4689521345407503,
"grad_norm": 0.23476050794124603,
"learning_rate": 0.0002871804097979687,
"loss": 0.7136,
"mean_token_accuracy": 0.8104170382022857,
"step": 1450
},
{
"epoch": 0.4770375161707633,
"grad_norm": 0.2009560465812683,
"learning_rate": 0.00028069404385378736,
"loss": 0.7117,
"mean_token_accuracy": 0.8178813803195953,
"step": 1475
},
{
"epoch": 0.4851228978007762,
"grad_norm": 0.21633079648017883,
"learning_rate": 0.0002741866277148276,
"loss": 0.7392,
"mean_token_accuracy": 0.8080459308624267,
"step": 1500
},
{
"epoch": 0.49320827943078915,
"grad_norm": 0.2863864004611969,
"learning_rate": 0.00026766262421361407,
"loss": 0.7429,
"mean_token_accuracy": 0.8051086151599884,
"step": 1525
},
{
"epoch": 0.5012936610608021,
"grad_norm": 0.2746196389198303,
"learning_rate": 0.0002611265075584034,
"loss": 0.7378,
"mean_token_accuracy": 0.8071331679821014,
"step": 1550
},
{
"epoch": 0.509379042690815,
"grad_norm": 0.1745605319738388,
"learning_rate": 0.0002545827602647397,
"loss": 0.7329,
"mean_token_accuracy": 0.8071370398998261,
"step": 1575
},
{
"epoch": 0.517464424320828,
"grad_norm": 0.2692930996417999,
"learning_rate": 0.0002480358700813135,
"loss": 0.6469,
"mean_token_accuracy": 0.8244569575786591,
"step": 1600
},
{
"epoch": 0.5255498059508409,
"grad_norm": 0.2551010549068451,
"learning_rate": 0.00024149032691223173,
"loss": 0.7093,
"mean_token_accuracy": 0.8107299065589905,
"step": 1625
},
{
"epoch": 0.5336351875808538,
"grad_norm": 0.19366180896759033,
"learning_rate": 0.0002349506197378092,
"loss": 0.7518,
"mean_token_accuracy": 0.8024619662761688,
"step": 1650
},
{
"epoch": 0.5417205692108668,
"grad_norm": 0.22391283512115479,
"learning_rate": 0.00022842123353599369,
"loss": 0.6933,
"mean_token_accuracy": 0.8174584257602692,
"step": 1675
},
{
"epoch": 0.5498059508408797,
"grad_norm": 0.24723011255264282,
"learning_rate": 0.0002219066462065364,
"loss": 0.669,
"mean_token_accuracy": 0.822363510131836,
"step": 1700
},
{
"epoch": 0.5578913324708926,
"grad_norm": 0.20229819416999817,
"learning_rate": 0.00021541132550001584,
"loss": 0.6508,
"mean_token_accuracy": 0.8255820453166962,
"step": 1725
},
{
"epoch": 0.5659767141009056,
"grad_norm": 0.2818906605243683,
"learning_rate": 0.00020893972595382274,
"loss": 0.6417,
"mean_token_accuracy": 0.830688863992691,
"step": 1750
},
{
"epoch": 0.5740620957309185,
"grad_norm": 0.24256223440170288,
"learning_rate": 0.00020249628583720672,
"loss": 0.7353,
"mean_token_accuracy": 0.8104202616214752,
"step": 1775
},
{
"epoch": 0.5821474773609314,
"grad_norm": 0.24385611712932587,
"learning_rate": 0.00019608542410747888,
"loss": 0.6876,
"mean_token_accuracy": 0.819042581319809,
"step": 1800
},
{
"epoch": 0.5902328589909444,
"grad_norm": 0.15913242101669312,
"learning_rate": 0.00018971153737945968,
"loss": 0.646,
"mean_token_accuracy": 0.8284247839450836,
"step": 1825
},
{
"epoch": 0.5983182406209573,
"grad_norm": 0.17542122304439545,
"learning_rate": 0.00018337899691024914,
"loss": 0.6216,
"mean_token_accuracy": 0.83616614818573,
"step": 1850
},
{
"epoch": 0.6064036222509702,
"grad_norm": 0.22214658558368683,
"learning_rate": 0.0001770921456013872,
"loss": 0.6947,
"mean_token_accuracy": 0.8170740747451782,
"step": 1875
},
{
"epoch": 0.6144890038809832,
"grad_norm": 0.22132755815982819,
"learning_rate": 0.00017085529502046073,
"loss": 0.6788,
"mean_token_accuracy": 0.820680763721466,
"step": 1900
},
{
"epoch": 0.6225743855109961,
"grad_norm": 0.26897531747817993,
"learning_rate": 0.00016467272244420029,
"loss": 0.6833,
"mean_token_accuracy": 0.822064242362976,
"step": 1925
},
{
"epoch": 0.630659767141009,
"grad_norm": 0.25688934326171875,
"learning_rate": 0.0001585486679250922,
"loss": 0.6945,
"mean_token_accuracy": 0.8143122482299805,
"step": 1950
},
{
"epoch": 0.638745148771022,
"grad_norm": 0.21207553148269653,
"learning_rate": 0.0001524873313835208,
"loss": 0.6596,
"mean_token_accuracy": 0.8273860597610474,
"step": 1975
},
{
"epoch": 0.6468305304010349,
"grad_norm": 0.281393438577652,
"learning_rate": 0.00014649286972743319,
"loss": 0.6767,
"mean_token_accuracy": 0.8178416419029236,
"step": 2000
},
{
"epoch": 0.6549159120310478,
"grad_norm": 0.27408191561698914,
"learning_rate": 0.00014056939400150143,
"loss": 0.6974,
"mean_token_accuracy": 0.8189209842681885,
"step": 2025
},
{
"epoch": 0.6630012936610608,
"grad_norm": 0.26886942982673645,
"learning_rate": 0.00013472096656773913,
"loss": 0.6497,
"mean_token_accuracy": 0.8288757252693176,
"step": 2050
},
{
"epoch": 0.6710866752910737,
"grad_norm": 0.21919454634189606,
"learning_rate": 0.00012895159831950462,
"loss": 0.693,
"mean_token_accuracy": 0.8163833570480347,
"step": 2075
},
{
"epoch": 0.6791720569210866,
"grad_norm": 0.21283280849456787,
"learning_rate": 0.0001232652459308012,
"loss": 0.7117,
"mean_token_accuracy": 0.808628898859024,
"step": 2100
},
{
"epoch": 0.6872574385510997,
"grad_norm": 0.229765385389328,
"learning_rate": 0.00011766580914276209,
"loss": 0.7317,
"mean_token_accuracy": 0.8035627353191376,
"step": 2125
},
{
"epoch": 0.6953428201811126,
"grad_norm": 0.21411098539829254,
"learning_rate": 0.00011215712808918003,
"loss": 0.6469,
"mean_token_accuracy": 0.8277445828914642,
"step": 2150
},
{
"epoch": 0.7034282018111255,
"grad_norm": 0.2254790961742401,
"learning_rate": 0.00010674298066291601,
"loss": 0.6976,
"mean_token_accuracy": 0.8171502375602722,
"step": 2175
},
{
"epoch": 0.7115135834411385,
"grad_norm": 0.27148380875587463,
"learning_rate": 0.0001014270799249933,
"loss": 0.717,
"mean_token_accuracy": 0.8086051964759826,
"step": 2200
},
{
"epoch": 0.7195989650711514,
"grad_norm": 0.2047407031059265,
"learning_rate": 9.621307155815398e-05,
"loss": 0.718,
"mean_token_accuracy": 0.8121638822555542,
"step": 2225
},
{
"epoch": 0.7276843467011643,
"grad_norm": 0.22144050896167755,
"learning_rate": 9.11045313666231e-05,
"loss": 0.6623,
"mean_token_accuracy": 0.8254709720611573,
"step": 2250
},
{
"epoch": 0.7357697283311773,
"grad_norm": 0.27873218059539795,
"learning_rate": 8.610496282379687e-05,
"loss": 0.7073,
"mean_token_accuracy": 0.8168034076690673,
"step": 2275
},
{
"epoch": 0.7438551099611902,
"grad_norm": 0.25058674812316895,
"learning_rate": 8.121779466953572e-05,
"loss": 0.7961,
"mean_token_accuracy": 0.7905523943901062,
"step": 2300
},
{
"epoch": 0.7519404915912031,
"grad_norm": 0.2540716826915741,
"learning_rate": 7.644637855870959e-05,
"loss": 0.7561,
"mean_token_accuracy": 0.8036962306499481,
"step": 2325
},
{
"epoch": 0.7600258732212161,
"grad_norm": 0.2069474756717682,
"learning_rate": 7.179398676260923e-05,
"loss": 0.7163,
"mean_token_accuracy": 0.8117474913597107,
"step": 2350
},
{
"epoch": 0.768111254851229,
"grad_norm": 0.23127734661102295,
"learning_rate": 6.726380992479941e-05,
"loss": 0.6983,
"mean_token_accuracy": 0.8151715826988221,
"step": 2375
},
{
"epoch": 0.7761966364812419,
"grad_norm": 0.19075877964496613,
"learning_rate": 6.285895487295229e-05,
"loss": 0.6644,
"mean_token_accuracy": 0.8226857626438141,
"step": 2400
},
{
"epoch": 0.7842820181112549,
"grad_norm": 0.26920101046562195,
"learning_rate": 5.858244248816302e-05,
"loss": 0.678,
"mean_token_accuracy": 0.8184169673919678,
"step": 2425
},
{
"epoch": 0.7923673997412678,
"grad_norm": 0.22957506775856018,
"learning_rate": 5.443720563320792e-05,
"loss": 0.7125,
"mean_token_accuracy": 0.8165527045726776,
"step": 2450
},
{
"epoch": 0.8004527813712807,
"grad_norm": 0.23255349695682526,
"learning_rate": 5.042608714116612e-05,
"loss": 0.6648,
"mean_token_accuracy": 0.8205063927173615,
"step": 2475
},
{
"epoch": 0.8085381630012937,
"grad_norm": 0.21010981500148773,
"learning_rate": 4.655183786578426e-05,
"loss": 0.6833,
"mean_token_accuracy": 0.8196286606788635,
"step": 2500
},
{
"epoch": 0.8166235446313066,
"grad_norm": 0.21943055093288422,
"learning_rate": 4.2817114794921677e-05,
"loss": 0.6897,
"mean_token_accuracy": 0.8164256310462952,
"step": 2525
},
{
"epoch": 0.8247089262613195,
"grad_norm": 0.21263104677200317,
"learning_rate": 3.92244792283685e-05,
"loss": 0.6553,
"mean_token_accuracy": 0.8244921159744263,
"step": 2550
},
{
"epoch": 0.8327943078913325,
"grad_norm": 0.30405542254447937,
"learning_rate": 3.577639502128843e-05,
"loss": 0.6771,
"mean_token_accuracy": 0.8220798122882843,
"step": 2575
},
{
"epoch": 0.8408796895213454,
"grad_norm": 0.2544702887535095,
"learning_rate": 3.247522689448923e-05,
"loss": 0.6825,
"mean_token_accuracy": 0.8175348448753357,
"step": 2600
},
{
"epoch": 0.8489650711513583,
"grad_norm": 0.2168809473514557,
"learning_rate": 2.9323238812679982e-05,
"loss": 0.7437,
"mean_token_accuracy": 0.8062794303894043,
"step": 2625
},
{
"epoch": 0.8570504527813713,
"grad_norm": 0.32856041193008423,
"learning_rate": 2.6322592431828136e-05,
"loss": 0.6968,
"mean_token_accuracy": 0.8184567129611969,
"step": 2650
},
{
"epoch": 0.8651358344113842,
"grad_norm": 0.2495715171098709,
"learning_rate": 2.3475345616680327e-05,
"loss": 0.6864,
"mean_token_accuracy": 0.817262338399887,
"step": 2675
},
{
"epoch": 0.8732212160413971,
"grad_norm": 0.21743454039096832,
"learning_rate": 2.0783451029463995e-05,
"loss": 0.6554,
"mean_token_accuracy": 0.8261248970031738,
"step": 2700
},
{
"epoch": 0.8813065976714101,
"grad_norm": 0.19413378834724426,
"learning_rate": 1.8248754790737733e-05,
"loss": 0.691,
"mean_token_accuracy": 0.8169219958782196,
"step": 2725
},
{
"epoch": 0.889391979301423,
"grad_norm": 0.2755376100540161,
"learning_rate": 1.5872995213308566e-05,
"loss": 0.6868,
"mean_token_accuracy": 0.8156666767597198,
"step": 2750
},
{
"epoch": 0.8974773609314359,
"grad_norm": 0.23579077422618866,
"learning_rate": 1.3657801610084563e-05,
"loss": 0.669,
"mean_token_accuracy": 0.8254081463813782,
"step": 2775
},
{
"epoch": 0.9055627425614489,
"grad_norm": 0.21246632933616638,
"learning_rate": 1.1604693176680392e-05,
"loss": 0.6519,
"mean_token_accuracy": 0.8244655966758728,
"step": 2800
},
{
"epoch": 0.9136481241914618,
"grad_norm": 0.29115888476371765,
"learning_rate": 9.715077949542184e-06,
"loss": 0.6673,
"mean_token_accuracy": 0.825350991487503,
"step": 2825
},
{
"epoch": 0.9217335058214747,
"grad_norm": 0.24020685255527496,
"learning_rate": 7.990251840305996e-06,
"loss": 0.6349,
"mean_token_accuracy": 0.8334643471240998,
"step": 2850
},
{
"epoch": 0.9298188874514877,
"grad_norm": 0.2604895532131195,
"learning_rate": 6.431397747052342e-06,
"loss": 0.6659,
"mean_token_accuracy": 0.8195065236091614,
"step": 2875
},
{
"epoch": 0.9379042690815006,
"grad_norm": 0.19995881617069244,
"learning_rate": 5.039584743066344e-06,
"loss": 0.706,
"mean_token_accuracy": 0.8151924252510071,
"step": 2900
},
{
"epoch": 0.9459896507115135,
"grad_norm": 0.22976571321487427,
"learning_rate": 3.815767343659377e-06,
"loss": 0.6477,
"mean_token_accuracy": 0.8256394731998443,
"step": 2925
},
{
"epoch": 0.9540750323415266,
"grad_norm": 0.22688381373882294,
"learning_rate": 2.760784851555953e-06,
"loss": 0.672,
"mean_token_accuracy": 0.8214276111125947,
"step": 2950
},
{
"epoch": 0.9621604139715395,
"grad_norm": 0.23771828413009644,
"learning_rate": 1.875360781293689e-06,
"loss": 0.6642,
"mean_token_accuracy": 0.8211515319347381,
"step": 2975
},
{
"epoch": 0.9702457956015524,
"grad_norm": 0.22044353187084198,
"learning_rate": 1.1601023630319064e-06,
"loss": 0.6148,
"mean_token_accuracy": 0.838649377822876,
"step": 3000
},
{
"epoch": 0.9783311772315654,
"grad_norm": 0.21916313469409943,
"learning_rate": 6.155001261089477e-07,
"loss": 0.7242,
"mean_token_accuracy": 0.8094534778594971,
"step": 3025
},
{
"epoch": 0.9864165588615783,
"grad_norm": 0.20908524096012115,
"learning_rate": 2.4192756263349826e-07,
"loss": 0.7293,
"mean_token_accuracy": 0.8055576062202454,
"step": 3050
},
{
"epoch": 0.9945019404915912,
"grad_norm": 0.2526475489139557,
"learning_rate": 3.9640871341173336e-08,
"loss": 0.6961,
"mean_token_accuracy": 0.8134376859664917,
"step": 3075
}
],
"logging_steps": 25,
"max_steps": 3092,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 0,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.2584006622747034e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}