klora_2000_skill / 15 /trainer_state.json
RayDu0010's picture
Upload folder using huggingface_hub
3e18738 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 500,
"global_step": 734,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.013623978201634877,
"grad_norm": 1.2644609212875366,
"learning_rate": 1.3043478260869566e-06,
"loss": 1.3371,
"step": 5
},
{
"epoch": 0.027247956403269755,
"grad_norm": 0.9776316285133362,
"learning_rate": 2.9347826086956523e-06,
"loss": 1.3342,
"step": 10
},
{
"epoch": 0.04087193460490463,
"grad_norm": 0.8481394052505493,
"learning_rate": 4.565217391304348e-06,
"loss": 1.3057,
"step": 15
},
{
"epoch": 0.05449591280653951,
"grad_norm": 0.6515122652053833,
"learning_rate": 6.195652173913044e-06,
"loss": 1.2589,
"step": 20
},
{
"epoch": 0.0681198910081744,
"grad_norm": 0.5874186158180237,
"learning_rate": 7.826086956521738e-06,
"loss": 1.3107,
"step": 25
},
{
"epoch": 0.08174386920980926,
"grad_norm": 0.5392442345619202,
"learning_rate": 9.456521739130436e-06,
"loss": 1.2746,
"step": 30
},
{
"epoch": 0.09536784741144415,
"grad_norm": 0.47512176632881165,
"learning_rate": 1.108695652173913e-05,
"loss": 1.187,
"step": 35
},
{
"epoch": 0.10899182561307902,
"grad_norm": 0.4385203421115875,
"learning_rate": 1.2717391304347827e-05,
"loss": 1.2059,
"step": 40
},
{
"epoch": 0.1226158038147139,
"grad_norm": 0.6362432241439819,
"learning_rate": 1.4347826086956522e-05,
"loss": 1.1893,
"step": 45
},
{
"epoch": 0.1362397820163488,
"grad_norm": 0.46012377738952637,
"learning_rate": 1.597826086956522e-05,
"loss": 1.2839,
"step": 50
},
{
"epoch": 0.14986376021798364,
"grad_norm": 0.5726909637451172,
"learning_rate": 1.7608695652173915e-05,
"loss": 1.1741,
"step": 55
},
{
"epoch": 0.16348773841961853,
"grad_norm": 0.4440523684024811,
"learning_rate": 1.9239130434782607e-05,
"loss": 1.2477,
"step": 60
},
{
"epoch": 0.1771117166212534,
"grad_norm": 0.4722035229206085,
"learning_rate": 2.0869565217391306e-05,
"loss": 1.1941,
"step": 65
},
{
"epoch": 0.1907356948228883,
"grad_norm": 0.45267796516418457,
"learning_rate": 2.25e-05,
"loss": 1.2442,
"step": 70
},
{
"epoch": 0.20435967302452315,
"grad_norm": 0.5661253929138184,
"learning_rate": 2.4130434782608697e-05,
"loss": 1.2069,
"step": 75
},
{
"epoch": 0.21798365122615804,
"grad_norm": 0.46782588958740234,
"learning_rate": 2.5760869565217392e-05,
"loss": 1.1675,
"step": 80
},
{
"epoch": 0.23160762942779292,
"grad_norm": 0.48947134613990784,
"learning_rate": 2.7391304347826085e-05,
"loss": 1.1297,
"step": 85
},
{
"epoch": 0.2452316076294278,
"grad_norm": 0.5822666883468628,
"learning_rate": 2.9021739130434783e-05,
"loss": 1.1361,
"step": 90
},
{
"epoch": 0.25885558583106266,
"grad_norm": 0.48768264055252075,
"learning_rate": 2.9999902540146195e-05,
"loss": 1.1555,
"step": 95
},
{
"epoch": 0.2724795640326976,
"grad_norm": 0.5380802750587463,
"learning_rate": 2.999880613133526e-05,
"loss": 1.1335,
"step": 100
},
{
"epoch": 0.28610354223433243,
"grad_norm": 0.5643327832221985,
"learning_rate": 2.9996491578238983e-05,
"loss": 1.0905,
"step": 105
},
{
"epoch": 0.2997275204359673,
"grad_norm": 0.5208688974380493,
"learning_rate": 2.9992959068836304e-05,
"loss": 1.1015,
"step": 110
},
{
"epoch": 0.3133514986376022,
"grad_norm": 0.5542858839035034,
"learning_rate": 2.99882088900238e-05,
"loss": 1.0951,
"step": 115
},
{
"epoch": 0.32697547683923706,
"grad_norm": 0.5248655080795288,
"learning_rate": 2.9982241427592387e-05,
"loss": 1.0697,
"step": 120
},
{
"epoch": 0.3405994550408719,
"grad_norm": 0.6014358401298523,
"learning_rate": 2.997505716619599e-05,
"loss": 1.0873,
"step": 125
},
{
"epoch": 0.3542234332425068,
"grad_norm": 0.6086240410804749,
"learning_rate": 2.996665668931218e-05,
"loss": 1.0565,
"step": 130
},
{
"epoch": 0.3678474114441417,
"grad_norm": 0.6661133766174316,
"learning_rate": 2.9957040679194782e-05,
"loss": 1.0233,
"step": 135
},
{
"epoch": 0.3814713896457766,
"grad_norm": 0.707312285900116,
"learning_rate": 2.9946209916818477e-05,
"loss": 1.0613,
"step": 140
},
{
"epoch": 0.39509536784741145,
"grad_norm": 0.5628401041030884,
"learning_rate": 2.9934165281815363e-05,
"loss": 0.9882,
"step": 145
},
{
"epoch": 0.4087193460490463,
"grad_norm": 0.6965730786323547,
"learning_rate": 2.9920907752403513e-05,
"loss": 0.9984,
"step": 150
},
{
"epoch": 0.4223433242506812,
"grad_norm": 0.7086770534515381,
"learning_rate": 2.9906438405307548e-05,
"loss": 0.9605,
"step": 155
},
{
"epoch": 0.4359673024523161,
"grad_norm": 0.6676215529441833,
"learning_rate": 2.989075841567115e-05,
"loss": 1.0023,
"step": 160
},
{
"epoch": 0.44959128065395093,
"grad_norm": 0.8493645191192627,
"learning_rate": 2.987386905696167e-05,
"loss": 0.9039,
"step": 165
},
{
"epoch": 0.46321525885558584,
"grad_norm": 0.7191267609596252,
"learning_rate": 2.9855771700866665e-05,
"loss": 0.9652,
"step": 170
},
{
"epoch": 0.4768392370572207,
"grad_norm": 0.7579267621040344,
"learning_rate": 2.983646781718251e-05,
"loss": 0.9237,
"step": 175
},
{
"epoch": 0.4904632152588556,
"grad_norm": 0.8027481436729431,
"learning_rate": 2.9815958973695034e-05,
"loss": 0.9653,
"step": 180
},
{
"epoch": 0.5040871934604905,
"grad_norm": 0.8199446797370911,
"learning_rate": 2.9794246836052167e-05,
"loss": 0.9864,
"step": 185
},
{
"epoch": 0.5177111716621253,
"grad_norm": 0.7790918946266174,
"learning_rate": 2.977133316762869e-05,
"loss": 0.9403,
"step": 190
},
{
"epoch": 0.5313351498637602,
"grad_norm": 0.7630456686019897,
"learning_rate": 2.9747219829382997e-05,
"loss": 0.9161,
"step": 195
},
{
"epoch": 0.5449591280653951,
"grad_norm": 0.8171895742416382,
"learning_rate": 2.9721908779705974e-05,
"loss": 0.8556,
"step": 200
},
{
"epoch": 0.55858310626703,
"grad_norm": 0.8095611929893494,
"learning_rate": 2.969540207426193e-05,
"loss": 0.9051,
"step": 205
},
{
"epoch": 0.5722070844686649,
"grad_norm": 0.7118027806282043,
"learning_rate": 2.9667701865821666e-05,
"loss": 0.9304,
"step": 210
},
{
"epoch": 0.5858310626702997,
"grad_norm": 0.897177517414093,
"learning_rate": 2.9638810404087603e-05,
"loss": 0.87,
"step": 215
},
{
"epoch": 0.5994550408719346,
"grad_norm": 0.7748181819915771,
"learning_rate": 2.960873003551111e-05,
"loss": 0.8723,
"step": 220
},
{
"epoch": 0.6130790190735694,
"grad_norm": 0.8167896270751953,
"learning_rate": 2.9577463203101897e-05,
"loss": 0.8648,
"step": 225
},
{
"epoch": 0.6267029972752044,
"grad_norm": 0.8939515352249146,
"learning_rate": 2.9545012446229613e-05,
"loss": 0.7818,
"step": 230
},
{
"epoch": 0.6403269754768393,
"grad_norm": 0.7820625305175781,
"learning_rate": 2.951138040041764e-05,
"loss": 0.9032,
"step": 235
},
{
"epoch": 0.6539509536784741,
"grad_norm": 0.844601571559906,
"learning_rate": 2.9476569797129e-05,
"loss": 0.8736,
"step": 240
},
{
"epoch": 0.667574931880109,
"grad_norm": 0.7247692942619324,
"learning_rate": 2.944058346354454e-05,
"loss": 0.8779,
"step": 245
},
{
"epoch": 0.6811989100817438,
"grad_norm": 0.8070963621139526,
"learning_rate": 2.9403424322333326e-05,
"loss": 0.8503,
"step": 250
},
{
"epoch": 0.6948228882833788,
"grad_norm": 0.8530706167221069,
"learning_rate": 2.9365095391415254e-05,
"loss": 0.8546,
"step": 255
},
{
"epoch": 0.7084468664850136,
"grad_norm": 0.8497718572616577,
"learning_rate": 2.932559978371596e-05,
"loss": 0.78,
"step": 260
},
{
"epoch": 0.7220708446866485,
"grad_norm": 0.9066139459609985,
"learning_rate": 2.928494070691401e-05,
"loss": 0.829,
"step": 265
},
{
"epoch": 0.7356948228882834,
"grad_norm": 0.7985149621963501,
"learning_rate": 2.9243121463180362e-05,
"loss": 0.8262,
"step": 270
},
{
"epoch": 0.7493188010899182,
"grad_norm": 1.0057857036590576,
"learning_rate": 2.9200145448910184e-05,
"loss": 0.7681,
"step": 275
},
{
"epoch": 0.7629427792915532,
"grad_norm": 0.9126638770103455,
"learning_rate": 2.915601615444703e-05,
"loss": 0.8171,
"step": 280
},
{
"epoch": 0.776566757493188,
"grad_norm": 0.8743240237236023,
"learning_rate": 2.9110737163799347e-05,
"loss": 0.7672,
"step": 285
},
{
"epoch": 0.7901907356948229,
"grad_norm": 0.8928736448287964,
"learning_rate": 2.9064312154349395e-05,
"loss": 0.7824,
"step": 290
},
{
"epoch": 0.8038147138964578,
"grad_norm": 1.0869660377502441,
"learning_rate": 2.9016744896554606e-05,
"loss": 0.7687,
"step": 295
},
{
"epoch": 0.8174386920980926,
"grad_norm": 0.9520823955535889,
"learning_rate": 2.8968039253641347e-05,
"loss": 0.7603,
"step": 300
},
{
"epoch": 0.8310626702997275,
"grad_norm": 0.9313263297080994,
"learning_rate": 2.8918199181291154e-05,
"loss": 0.7344,
"step": 305
},
{
"epoch": 0.8446866485013624,
"grad_norm": 0.8860709071159363,
"learning_rate": 2.8867228727319484e-05,
"loss": 0.7221,
"step": 310
},
{
"epoch": 0.8583106267029973,
"grad_norm": 1.0076904296875,
"learning_rate": 2.8815132031346967e-05,
"loss": 0.7163,
"step": 315
},
{
"epoch": 0.8719346049046321,
"grad_norm": 1.0047346353530884,
"learning_rate": 2.8761913324463193e-05,
"loss": 0.731,
"step": 320
},
{
"epoch": 0.885558583106267,
"grad_norm": 0.9905893802642822,
"learning_rate": 2.8707576928883083e-05,
"loss": 0.7234,
"step": 325
},
{
"epoch": 0.8991825613079019,
"grad_norm": 0.8848779797554016,
"learning_rate": 2.8652127257595852e-05,
"loss": 0.7241,
"step": 330
},
{
"epoch": 0.9128065395095368,
"grad_norm": 1.086945652961731,
"learning_rate": 2.8595568814006618e-05,
"loss": 0.7474,
"step": 335
},
{
"epoch": 0.9264305177111717,
"grad_norm": 1.058239221572876,
"learning_rate": 2.853790619157063e-05,
"loss": 0.6512,
"step": 340
},
{
"epoch": 0.9400544959128065,
"grad_norm": 0.9978867173194885,
"learning_rate": 2.8479144073420237e-05,
"loss": 0.6968,
"step": 345
},
{
"epoch": 0.9536784741144414,
"grad_norm": 0.9755434989929199,
"learning_rate": 2.841928723198449e-05,
"loss": 0.6774,
"step": 350
},
{
"epoch": 0.9673024523160763,
"grad_norm": 0.9939659833908081,
"learning_rate": 2.835834052860162e-05,
"loss": 0.6851,
"step": 355
},
{
"epoch": 0.9809264305177112,
"grad_norm": 0.919965922832489,
"learning_rate": 2.8296308913124137e-05,
"loss": 0.6636,
"step": 360
},
{
"epoch": 0.9945504087193461,
"grad_norm": 1.1084834337234497,
"learning_rate": 2.8233197423516885e-05,
"loss": 0.696,
"step": 365
},
{
"epoch": 1.008174386920981,
"grad_norm": 1.0081297159194946,
"learning_rate": 2.816901118544785e-05,
"loss": 0.6079,
"step": 370
},
{
"epoch": 1.021798365122616,
"grad_norm": 0.9290661811828613,
"learning_rate": 2.810375541187188e-05,
"loss": 0.5794,
"step": 375
},
{
"epoch": 1.0354223433242506,
"grad_norm": 1.0772560834884644,
"learning_rate": 2.80374354026073e-05,
"loss": 0.5495,
"step": 380
},
{
"epoch": 1.0490463215258856,
"grad_norm": 1.0354938507080078,
"learning_rate": 2.79700565439055e-05,
"loss": 0.6109,
"step": 385
},
{
"epoch": 1.0626702997275204,
"grad_norm": 1.0381073951721191,
"learning_rate": 2.7901624308013465e-05,
"loss": 0.5849,
"step": 390
},
{
"epoch": 1.0762942779291553,
"grad_norm": 1.146996021270752,
"learning_rate": 2.7832144252729354e-05,
"loss": 0.5798,
"step": 395
},
{
"epoch": 1.0899182561307903,
"grad_norm": 1.0819785594940186,
"learning_rate": 2.776162202095111e-05,
"loss": 0.584,
"step": 400
},
{
"epoch": 1.103542234332425,
"grad_norm": 1.090579628944397,
"learning_rate": 2.7690063340218173e-05,
"loss": 0.556,
"step": 405
},
{
"epoch": 1.11716621253406,
"grad_norm": 1.034417748451233,
"learning_rate": 2.7617474022246297e-05,
"loss": 0.5788,
"step": 410
},
{
"epoch": 1.1307901907356948,
"grad_norm": 1.1490421295166016,
"learning_rate": 2.7543859962455576e-05,
"loss": 0.5576,
"step": 415
},
{
"epoch": 1.1444141689373297,
"grad_norm": 1.0201855897903442,
"learning_rate": 2.7469227139491603e-05,
"loss": 0.5886,
"step": 420
},
{
"epoch": 1.1580381471389645,
"grad_norm": 1.01875901222229,
"learning_rate": 2.7393581614739924e-05,
"loss": 0.6062,
"step": 425
},
{
"epoch": 1.1716621253405994,
"grad_norm": 1.0762981176376343,
"learning_rate": 2.7316929531833775e-05,
"loss": 0.5389,
"step": 430
},
{
"epoch": 1.1852861035422344,
"grad_norm": 1.0140329599380493,
"learning_rate": 2.7239277116155077e-05,
"loss": 0.5462,
"step": 435
},
{
"epoch": 1.1989100817438691,
"grad_norm": 1.1165515184402466,
"learning_rate": 2.7160630674328893e-05,
"loss": 0.5596,
"step": 440
},
{
"epoch": 1.2125340599455041,
"grad_norm": 1.0689040422439575,
"learning_rate": 2.7080996593711172e-05,
"loss": 0.5137,
"step": 445
},
{
"epoch": 1.226158038147139,
"grad_norm": 1.2397655248641968,
"learning_rate": 2.700038134187002e-05,
"loss": 0.5643,
"step": 450
},
{
"epoch": 1.2397820163487738,
"grad_norm": 0.9885849952697754,
"learning_rate": 2.691879146606043e-05,
"loss": 0.5921,
"step": 455
},
{
"epoch": 1.2534059945504088,
"grad_norm": 1.279818058013916,
"learning_rate": 2.6836233592692544e-05,
"loss": 0.5126,
"step": 460
},
{
"epoch": 1.2670299727520435,
"grad_norm": 1.1189254522323608,
"learning_rate": 2.675271442679346e-05,
"loss": 0.5198,
"step": 465
},
{
"epoch": 1.2806539509536785,
"grad_norm": 1.1345500946044922,
"learning_rate": 2.6668240751462707e-05,
"loss": 0.5117,
"step": 470
},
{
"epoch": 1.2942779291553133,
"grad_norm": 1.1066192388534546,
"learning_rate": 2.6582819427321313e-05,
"loss": 0.5314,
"step": 475
},
{
"epoch": 1.3079019073569482,
"grad_norm": 1.0839710235595703,
"learning_rate": 2.649645739195464e-05,
"loss": 0.5382,
"step": 480
},
{
"epoch": 1.3215258855585832,
"grad_norm": 1.052043080329895,
"learning_rate": 2.640916165934893e-05,
"loss": 0.5135,
"step": 485
},
{
"epoch": 1.335149863760218,
"grad_norm": 1.1333352327346802,
"learning_rate": 2.6320939319321657e-05,
"loss": 0.5359,
"step": 490
},
{
"epoch": 1.348773841961853,
"grad_norm": 1.21845543384552,
"learning_rate": 2.623179753694573e-05,
"loss": 0.4853,
"step": 495
},
{
"epoch": 1.3623978201634879,
"grad_norm": 1.142262578010559,
"learning_rate": 2.614174355196754e-05,
"loss": 0.4993,
"step": 500
},
{
"epoch": 1.3760217983651226,
"grad_norm": 1.037720799446106,
"learning_rate": 2.6050784678219024e-05,
"loss": 0.512,
"step": 505
},
{
"epoch": 1.3896457765667574,
"grad_norm": 1.1667251586914062,
"learning_rate": 2.5958928303023634e-05,
"loss": 0.4788,
"step": 510
},
{
"epoch": 1.4032697547683923,
"grad_norm": 1.046761393547058,
"learning_rate": 2.5866181886596367e-05,
"loss": 0.4867,
"step": 515
},
{
"epoch": 1.4168937329700273,
"grad_norm": 1.1029340028762817,
"learning_rate": 2.5772552961437893e-05,
"loss": 0.4799,
"step": 520
},
{
"epoch": 1.430517711171662,
"grad_norm": 1.1235623359680176,
"learning_rate": 2.5678049131722772e-05,
"loss": 0.4752,
"step": 525
},
{
"epoch": 1.444141689373297,
"grad_norm": 1.064278483390808,
"learning_rate": 2.5582678072681903e-05,
"loss": 0.5173,
"step": 530
},
{
"epoch": 1.457765667574932,
"grad_norm": 1.13005793094635,
"learning_rate": 2.5486447529979136e-05,
"loss": 0.4451,
"step": 535
},
{
"epoch": 1.4713896457765667,
"grad_norm": 1.0552195310592651,
"learning_rate": 2.5389365319082226e-05,
"loss": 0.4595,
"step": 540
},
{
"epoch": 1.4850136239782017,
"grad_norm": 1.1078029870986938,
"learning_rate": 2.5291439324628084e-05,
"loss": 0.4693,
"step": 545
},
{
"epoch": 1.4986376021798364,
"grad_norm": 1.090710997581482,
"learning_rate": 2.5192677499782413e-05,
"loss": 0.4537,
"step": 550
},
{
"epoch": 1.5122615803814714,
"grad_norm": 1.037945032119751,
"learning_rate": 2.5093087865593784e-05,
"loss": 0.4556,
"step": 555
},
{
"epoch": 1.5258855585831061,
"grad_norm": 1.1792460680007935,
"learning_rate": 2.499267851034221e-05,
"loss": 0.4734,
"step": 560
},
{
"epoch": 1.5395095367847411,
"grad_norm": 1.0996177196502686,
"learning_rate": 2.4891457588882238e-05,
"loss": 0.4444,
"step": 565
},
{
"epoch": 1.553133514986376,
"grad_norm": 1.13473641872406,
"learning_rate": 2.478943332198062e-05,
"loss": 0.4513,
"step": 570
},
{
"epoch": 1.5667574931880108,
"grad_norm": 1.212607979774475,
"learning_rate": 2.468661399564871e-05,
"loss": 0.4506,
"step": 575
},
{
"epoch": 1.5803814713896458,
"grad_norm": 1.0172079801559448,
"learning_rate": 2.458300796046946e-05,
"loss": 0.4238,
"step": 580
},
{
"epoch": 1.5940054495912808,
"grad_norm": 1.3280726671218872,
"learning_rate": 2.4478623630919236e-05,
"loss": 0.4419,
"step": 585
},
{
"epoch": 1.6076294277929155,
"grad_norm": 1.1108782291412354,
"learning_rate": 2.437346948468441e-05,
"loss": 0.3942,
"step": 590
},
{
"epoch": 1.6212534059945503,
"grad_norm": 1.157791256904602,
"learning_rate": 2.4267554061972873e-05,
"loss": 0.397,
"step": 595
},
{
"epoch": 1.6348773841961854,
"grad_norm": 1.1420488357543945,
"learning_rate": 2.416088596482039e-05,
"loss": 0.4849,
"step": 600
},
{
"epoch": 1.6485013623978202,
"grad_norm": 1.264007329940796,
"learning_rate": 2.405347385639202e-05,
"loss": 0.4365,
"step": 605
},
{
"epoch": 1.662125340599455,
"grad_norm": 1.0671672821044922,
"learning_rate": 2.394532646027848e-05,
"loss": 0.4259,
"step": 610
},
{
"epoch": 1.67574931880109,
"grad_norm": 1.245835781097412,
"learning_rate": 2.3836452559787673e-05,
"loss": 0.4078,
"step": 615
},
{
"epoch": 1.6893732970027249,
"grad_norm": 1.1798008680343628,
"learning_rate": 2.3726860997231356e-05,
"loss": 0.4319,
"step": 620
},
{
"epoch": 1.7029972752043596,
"grad_norm": 1.3666014671325684,
"learning_rate": 2.3616560673206984e-05,
"loss": 0.4645,
"step": 625
},
{
"epoch": 1.7166212534059946,
"grad_norm": 1.0500986576080322,
"learning_rate": 2.3505560545874843e-05,
"loss": 0.4543,
"step": 630
},
{
"epoch": 1.7302452316076296,
"grad_norm": 1.4002113342285156,
"learning_rate": 2.3393869630230495e-05,
"loss": 0.3978,
"step": 635
},
{
"epoch": 1.7438692098092643,
"grad_norm": 1.170268177986145,
"learning_rate": 2.3281496997372625e-05,
"loss": 0.4355,
"step": 640
},
{
"epoch": 1.757493188010899,
"grad_norm": 1.0691126585006714,
"learning_rate": 2.316845177376633e-05,
"loss": 0.4097,
"step": 645
},
{
"epoch": 1.771117166212534,
"grad_norm": 1.1894110441207886,
"learning_rate": 2.3054743140501877e-05,
"loss": 0.4025,
"step": 650
},
{
"epoch": 1.784741144414169,
"grad_norm": 1.0559704303741455,
"learning_rate": 2.2940380332549086e-05,
"loss": 0.4237,
"step": 655
},
{
"epoch": 1.7983651226158037,
"grad_norm": 1.1340513229370117,
"learning_rate": 2.282537263800727e-05,
"loss": 0.3523,
"step": 660
},
{
"epoch": 1.8119891008174387,
"grad_norm": 1.1120586395263672,
"learning_rate": 2.2709729397350904e-05,
"loss": 0.4037,
"step": 665
},
{
"epoch": 1.8256130790190737,
"grad_norm": 1.1562973260879517,
"learning_rate": 2.2593460002671024e-05,
"loss": 0.3839,
"step": 670
},
{
"epoch": 1.8392370572207084,
"grad_norm": 1.2246955633163452,
"learning_rate": 2.247657389691247e-05,
"loss": 0.4188,
"step": 675
},
{
"epoch": 1.8528610354223434,
"grad_norm": 0.9505524635314941,
"learning_rate": 2.2359080573106913e-05,
"loss": 0.3348,
"step": 680
},
{
"epoch": 1.8664850136239783,
"grad_norm": 1.161018967628479,
"learning_rate": 2.2240989573601902e-05,
"loss": 0.3978,
"step": 685
},
{
"epoch": 1.880108991825613,
"grad_norm": 1.076042890548706,
"learning_rate": 2.212231048928587e-05,
"loss": 0.3608,
"step": 690
},
{
"epoch": 1.8937329700272478,
"grad_norm": 1.248965859413147,
"learning_rate": 2.2003052958809185e-05,
"loss": 0.3621,
"step": 695
},
{
"epoch": 1.9073569482288828,
"grad_norm": 1.0826005935668945,
"learning_rate": 2.1883226667801374e-05,
"loss": 0.3733,
"step": 700
},
{
"epoch": 1.9209809264305178,
"grad_norm": 1.1231858730316162,
"learning_rate": 2.1762841348084425e-05,
"loss": 0.3916,
"step": 705
},
{
"epoch": 1.9346049046321525,
"grad_norm": 1.1095854043960571,
"learning_rate": 2.164190677688248e-05,
"loss": 0.3406,
"step": 710
},
{
"epoch": 1.9482288828337875,
"grad_norm": 1.0947611331939697,
"learning_rate": 2.1520432776027723e-05,
"loss": 0.3511,
"step": 715
},
{
"epoch": 1.9618528610354224,
"grad_norm": 1.035023808479309,
"learning_rate": 2.1398429211162706e-05,
"loss": 0.3894,
"step": 720
},
{
"epoch": 1.9754768392370572,
"grad_norm": 1.166717529296875,
"learning_rate": 2.127590599093909e-05,
"loss": 0.3692,
"step": 725
},
{
"epoch": 1.989100817438692,
"grad_norm": 1.1927279233932495,
"learning_rate": 2.1152873066212913e-05,
"loss": 0.3278,
"step": 730
}
],
"logging_steps": 5,
"max_steps": 1835,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 2000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.0749654008583946e+18,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}