BRFSS_Med42 / trainer_state.json
AITheChillGuy's picture
Upload 11 files
1f407de verified
{
"best_metric": 1.4403988122940063,
"best_model_checkpoint": "epoch_weights/checkpoint-39171",
"epoch": 2.0,
"eval_steps": 500,
"global_step": 78342,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01276454519925455,
"grad_norm": 222.047119140625,
"learning_rate": 1.9948941819202982e-05,
"loss": 1.7112,
"step": 500
},
{
"epoch": 0.0255290903985091,
"grad_norm": 219.56724548339844,
"learning_rate": 1.9897883638405966e-05,
"loss": 1.6108,
"step": 1000
},
{
"epoch": 0.03829363559776365,
"grad_norm": 0.10834893584251404,
"learning_rate": 1.9846825457608946e-05,
"loss": 1.4721,
"step": 1500
},
{
"epoch": 0.0510581807970182,
"grad_norm": 4.082756996154785,
"learning_rate": 1.9795767276811927e-05,
"loss": 1.5408,
"step": 2000
},
{
"epoch": 0.06382272599627276,
"grad_norm": 216.5404052734375,
"learning_rate": 1.974470909601491e-05,
"loss": 1.5711,
"step": 2500
},
{
"epoch": 0.0765872711955273,
"grad_norm": 211.48715209960938,
"learning_rate": 1.969365091521789e-05,
"loss": 1.4534,
"step": 3000
},
{
"epoch": 0.08935181639478186,
"grad_norm": 0.7258212566375732,
"learning_rate": 1.9642592734420874e-05,
"loss": 1.4123,
"step": 3500
},
{
"epoch": 0.1021163615940364,
"grad_norm": 0.06425017863512039,
"learning_rate": 1.9591534553623858e-05,
"loss": 1.4799,
"step": 4000
},
{
"epoch": 0.11488090679329095,
"grad_norm": 0.02403501234948635,
"learning_rate": 1.954047637282684e-05,
"loss": 1.3951,
"step": 4500
},
{
"epoch": 0.1276454519925455,
"grad_norm": 29.32894515991211,
"learning_rate": 1.948941819202982e-05,
"loss": 1.4062,
"step": 5000
},
{
"epoch": 0.14040999719180006,
"grad_norm": 0.008796547539532185,
"learning_rate": 1.9438360011232803e-05,
"loss": 1.3253,
"step": 5500
},
{
"epoch": 0.1531745423910546,
"grad_norm": 0.0019760148134082556,
"learning_rate": 1.9387301830435783e-05,
"loss": 1.3675,
"step": 6000
},
{
"epoch": 0.16593908759030915,
"grad_norm": 189.2718963623047,
"learning_rate": 1.9336243649638764e-05,
"loss": 1.4932,
"step": 6500
},
{
"epoch": 0.17870363278956372,
"grad_norm": 204.2582550048828,
"learning_rate": 1.9285185468841747e-05,
"loss": 1.4275,
"step": 7000
},
{
"epoch": 0.19146817798881827,
"grad_norm": 0.02435586415231228,
"learning_rate": 1.9234127288044728e-05,
"loss": 1.3903,
"step": 7500
},
{
"epoch": 0.2042327231880728,
"grad_norm": 0.013938084244728088,
"learning_rate": 1.9183069107247708e-05,
"loss": 1.4386,
"step": 8000
},
{
"epoch": 0.21699726838732736,
"grad_norm": 3.0198631286621094,
"learning_rate": 1.9132010926450692e-05,
"loss": 1.3725,
"step": 8500
},
{
"epoch": 0.2297618135865819,
"grad_norm": 0.21919088065624237,
"learning_rate": 1.9080952745653672e-05,
"loss": 1.533,
"step": 9000
},
{
"epoch": 0.24252635878583645,
"grad_norm": 0.8041609525680542,
"learning_rate": 1.9029894564856656e-05,
"loss": 1.2956,
"step": 9500
},
{
"epoch": 0.255290903985091,
"grad_norm": 0.000984588055871427,
"learning_rate": 1.8978836384059636e-05,
"loss": 1.3231,
"step": 10000
},
{
"epoch": 0.26805544918434554,
"grad_norm": 0.005330982618033886,
"learning_rate": 1.892777820326262e-05,
"loss": 1.3237,
"step": 10500
},
{
"epoch": 0.2808199943836001,
"grad_norm": 0.009334022179245949,
"learning_rate": 1.8876720022465604e-05,
"loss": 1.5006,
"step": 11000
},
{
"epoch": 0.2935845395828547,
"grad_norm": 197.56333923339844,
"learning_rate": 1.8825661841668584e-05,
"loss": 1.4254,
"step": 11500
},
{
"epoch": 0.3063490847821092,
"grad_norm": 0.06512461602687836,
"learning_rate": 1.8774603660871565e-05,
"loss": 1.2744,
"step": 12000
},
{
"epoch": 0.3191136299813638,
"grad_norm": 0.009354041889309883,
"learning_rate": 1.872354548007455e-05,
"loss": 1.4442,
"step": 12500
},
{
"epoch": 0.3318781751806183,
"grad_norm": 0.26544374227523804,
"learning_rate": 1.867248729927753e-05,
"loss": 1.3993,
"step": 13000
},
{
"epoch": 0.34464272037987287,
"grad_norm": 175.60020446777344,
"learning_rate": 1.862142911848051e-05,
"loss": 1.4547,
"step": 13500
},
{
"epoch": 0.35740726557912744,
"grad_norm": 0.2694757878780365,
"learning_rate": 1.8570370937683493e-05,
"loss": 1.3493,
"step": 14000
},
{
"epoch": 0.37017181077838196,
"grad_norm": 0.12333753705024719,
"learning_rate": 1.8519312756886473e-05,
"loss": 1.4431,
"step": 14500
},
{
"epoch": 0.38293635597763653,
"grad_norm": 19.029407501220703,
"learning_rate": 1.8468254576089454e-05,
"loss": 1.3567,
"step": 15000
},
{
"epoch": 0.39570090117689105,
"grad_norm": 0.11109142750501633,
"learning_rate": 1.8417196395292437e-05,
"loss": 1.4003,
"step": 15500
},
{
"epoch": 0.4084654463761456,
"grad_norm": 0.2682472765445709,
"learning_rate": 1.8366138214495418e-05,
"loss": 1.3246,
"step": 16000
},
{
"epoch": 0.42122999157540014,
"grad_norm": 0.4953567683696747,
"learning_rate": 1.83150800336984e-05,
"loss": 1.4414,
"step": 16500
},
{
"epoch": 0.4339945367746547,
"grad_norm": 2.7462313175201416,
"learning_rate": 1.8264021852901382e-05,
"loss": 1.338,
"step": 17000
},
{
"epoch": 0.4467590819739093,
"grad_norm": 0.07703999429941177,
"learning_rate": 1.8212963672104366e-05,
"loss": 1.4822,
"step": 17500
},
{
"epoch": 0.4595236271731638,
"grad_norm": 198.0802764892578,
"learning_rate": 1.8161905491307346e-05,
"loss": 1.3102,
"step": 18000
},
{
"epoch": 0.4722881723724184,
"grad_norm": 0.03811747580766678,
"learning_rate": 1.811084731051033e-05,
"loss": 1.5193,
"step": 18500
},
{
"epoch": 0.4850527175716729,
"grad_norm": 0.2128518968820572,
"learning_rate": 1.805978912971331e-05,
"loss": 1.4392,
"step": 19000
},
{
"epoch": 0.49781726277092747,
"grad_norm": 0.06205818057060242,
"learning_rate": 1.800873094891629e-05,
"loss": 1.4332,
"step": 19500
},
{
"epoch": 0.510581807970182,
"grad_norm": 0.013560527004301548,
"learning_rate": 1.7957672768119274e-05,
"loss": 1.3889,
"step": 20000
},
{
"epoch": 0.5233463531694366,
"grad_norm": 12.423797607421875,
"learning_rate": 1.7906614587322255e-05,
"loss": 1.3728,
"step": 20500
},
{
"epoch": 0.5361108983686911,
"grad_norm": 196.36489868164062,
"learning_rate": 1.7855556406525235e-05,
"loss": 1.3271,
"step": 21000
},
{
"epoch": 0.5488754435679457,
"grad_norm": 195.81605529785156,
"learning_rate": 1.780449822572822e-05,
"loss": 1.4699,
"step": 21500
},
{
"epoch": 0.5616399887672002,
"grad_norm": 0.4498778283596039,
"learning_rate": 1.77534400449312e-05,
"loss": 1.3657,
"step": 22000
},
{
"epoch": 0.5744045339664547,
"grad_norm": 0.02744384855031967,
"learning_rate": 1.7702381864134183e-05,
"loss": 1.4181,
"step": 22500
},
{
"epoch": 0.5871690791657094,
"grad_norm": 0.8262328505516052,
"learning_rate": 1.7651323683337164e-05,
"loss": 1.4702,
"step": 23000
},
{
"epoch": 0.5999336243649639,
"grad_norm": 0.1493290364742279,
"learning_rate": 1.7600265502540147e-05,
"loss": 1.2346,
"step": 23500
},
{
"epoch": 0.6126981695642184,
"grad_norm": 0.16114306449890137,
"learning_rate": 1.7549207321743128e-05,
"loss": 1.2564,
"step": 24000
},
{
"epoch": 0.6254627147634729,
"grad_norm": 0.040154699236154556,
"learning_rate": 1.749814914094611e-05,
"loss": 1.401,
"step": 24500
},
{
"epoch": 0.6382272599627276,
"grad_norm": 0.10609349608421326,
"learning_rate": 1.7447090960149092e-05,
"loss": 1.4858,
"step": 25000
},
{
"epoch": 0.6509918051619821,
"grad_norm": 0.17226967215538025,
"learning_rate": 1.7396032779352072e-05,
"loss": 1.5393,
"step": 25500
},
{
"epoch": 0.6637563503612366,
"grad_norm": 124.09440612792969,
"learning_rate": 1.7344974598555056e-05,
"loss": 1.439,
"step": 26000
},
{
"epoch": 0.6765208955604912,
"grad_norm": 0.056608621031045914,
"learning_rate": 1.7293916417758036e-05,
"loss": 1.4665,
"step": 26500
},
{
"epoch": 0.6892854407597457,
"grad_norm": 171.162353515625,
"learning_rate": 1.7242858236961017e-05,
"loss": 1.4144,
"step": 27000
},
{
"epoch": 0.7020499859590003,
"grad_norm": 177.61441040039062,
"learning_rate": 1.7191800056164e-05,
"loss": 1.2879,
"step": 27500
},
{
"epoch": 0.7148145311582549,
"grad_norm": 1.4865118265151978,
"learning_rate": 1.714074187536698e-05,
"loss": 1.355,
"step": 28000
},
{
"epoch": 0.7275790763575094,
"grad_norm": 170.93099975585938,
"learning_rate": 1.708968369456996e-05,
"loss": 1.4421,
"step": 28500
},
{
"epoch": 0.7403436215567639,
"grad_norm": 0.5723881721496582,
"learning_rate": 1.7038625513772945e-05,
"loss": 1.3732,
"step": 29000
},
{
"epoch": 0.7531081667560184,
"grad_norm": 0.7495973110198975,
"learning_rate": 1.698756733297593e-05,
"loss": 1.5122,
"step": 29500
},
{
"epoch": 0.7658727119552731,
"grad_norm": 0.033791378140449524,
"learning_rate": 1.693650915217891e-05,
"loss": 1.3276,
"step": 30000
},
{
"epoch": 0.7786372571545276,
"grad_norm": 0.02948080748319626,
"learning_rate": 1.6885450971381893e-05,
"loss": 1.4913,
"step": 30500
},
{
"epoch": 0.7914018023537821,
"grad_norm": 0.064676433801651,
"learning_rate": 1.6834392790584873e-05,
"loss": 1.3377,
"step": 31000
},
{
"epoch": 0.8041663475530367,
"grad_norm": 0.05471309274435043,
"learning_rate": 1.6783334609787854e-05,
"loss": 1.464,
"step": 31500
},
{
"epoch": 0.8169308927522912,
"grad_norm": 2.461292028427124,
"learning_rate": 1.6732276428990837e-05,
"loss": 1.4163,
"step": 32000
},
{
"epoch": 0.8296954379515458,
"grad_norm": 0.15830573439598083,
"learning_rate": 1.6681218248193818e-05,
"loss": 1.3718,
"step": 32500
},
{
"epoch": 0.8424599831508003,
"grad_norm": 0.05488978698849678,
"learning_rate": 1.66301600673968e-05,
"loss": 1.1682,
"step": 33000
},
{
"epoch": 0.8552245283500549,
"grad_norm": 0.09544608741998672,
"learning_rate": 1.6579101886599782e-05,
"loss": 1.3872,
"step": 33500
},
{
"epoch": 0.8679890735493094,
"grad_norm": 0.15158647298812866,
"learning_rate": 1.6528043705802762e-05,
"loss": 1.3859,
"step": 34000
},
{
"epoch": 0.880753618748564,
"grad_norm": 0.0812167227268219,
"learning_rate": 1.6476985525005746e-05,
"loss": 1.4938,
"step": 34500
},
{
"epoch": 0.8935181639478186,
"grad_norm": 0.17645303905010223,
"learning_rate": 1.6425927344208727e-05,
"loss": 1.431,
"step": 35000
},
{
"epoch": 0.9062827091470731,
"grad_norm": 255.2915802001953,
"learning_rate": 1.6374869163411707e-05,
"loss": 1.3779,
"step": 35500
},
{
"epoch": 0.9190472543463276,
"grad_norm": 0.05036506801843643,
"learning_rate": 1.632381098261469e-05,
"loss": 1.4157,
"step": 36000
},
{
"epoch": 0.9318117995455822,
"grad_norm": 1.9150407314300537,
"learning_rate": 1.6272752801817674e-05,
"loss": 1.2288,
"step": 36500
},
{
"epoch": 0.9445763447448368,
"grad_norm": 0.12360112369060516,
"learning_rate": 1.6221694621020655e-05,
"loss": 1.6366,
"step": 37000
},
{
"epoch": 0.9573408899440913,
"grad_norm": 188.09226989746094,
"learning_rate": 1.617063644022364e-05,
"loss": 1.351,
"step": 37500
},
{
"epoch": 0.9701054351433458,
"grad_norm": 187.25831604003906,
"learning_rate": 1.611957825942662e-05,
"loss": 1.5859,
"step": 38000
},
{
"epoch": 0.9828699803426004,
"grad_norm": 1.1277300119400024,
"learning_rate": 1.60685200786296e-05,
"loss": 1.374,
"step": 38500
},
{
"epoch": 0.9956345255418549,
"grad_norm": 188.23500061035156,
"learning_rate": 1.6017461897832583e-05,
"loss": 1.4467,
"step": 39000
},
{
"epoch": 1.0,
"eval_accuracy": 0.7385220199372865,
"eval_f1": 0.7278749208513955,
"eval_loss": 1.4403988122940063,
"eval_precision": 0.7608186539048977,
"eval_recall": 0.6976657329598506,
"eval_runtime": 5324.8216,
"eval_samples_per_second": 4.013,
"eval_steps_per_second": 4.013,
"step": 39171
},
{
"epoch": 1.0083990707411095,
"grad_norm": 0.03257722407579422,
"learning_rate": 1.5966403717035563e-05,
"loss": 1.3554,
"step": 39500
},
{
"epoch": 1.021163615940364,
"grad_norm": 190.5370330810547,
"learning_rate": 1.5915345536238544e-05,
"loss": 1.4967,
"step": 40000
},
{
"epoch": 1.0339281611396185,
"grad_norm": 188.77444458007812,
"learning_rate": 1.5864287355441528e-05,
"loss": 1.3658,
"step": 40500
},
{
"epoch": 1.0466927063388731,
"grad_norm": 186.61973571777344,
"learning_rate": 1.5813229174644508e-05,
"loss": 1.4126,
"step": 41000
},
{
"epoch": 1.0594572515381278,
"grad_norm": 0.010557322762906551,
"learning_rate": 1.576217099384749e-05,
"loss": 1.3419,
"step": 41500
},
{
"epoch": 1.0722217967373822,
"grad_norm": 0.05373761057853699,
"learning_rate": 1.5711112813050472e-05,
"loss": 1.5751,
"step": 42000
},
{
"epoch": 1.0849863419366368,
"grad_norm": 187.22401428222656,
"learning_rate": 1.5660054632253453e-05,
"loss": 1.3604,
"step": 42500
},
{
"epoch": 1.0977508871358914,
"grad_norm": 0.10460960865020752,
"learning_rate": 1.5608996451456436e-05,
"loss": 1.3608,
"step": 43000
},
{
"epoch": 1.1105154323351458,
"grad_norm": 0.09090613573789597,
"learning_rate": 1.555793827065942e-05,
"loss": 1.2372,
"step": 43500
},
{
"epoch": 1.1232799775344005,
"grad_norm": 139.329833984375,
"learning_rate": 1.55068800898624e-05,
"loss": 1.6332,
"step": 44000
},
{
"epoch": 1.136044522733655,
"grad_norm": 0.05455148592591286,
"learning_rate": 1.545582190906538e-05,
"loss": 1.2923,
"step": 44500
},
{
"epoch": 1.1488090679329095,
"grad_norm": 0.43457621335983276,
"learning_rate": 1.5404763728268365e-05,
"loss": 1.4429,
"step": 45000
},
{
"epoch": 1.1615736131321641,
"grad_norm": 3.162357807159424,
"learning_rate": 1.5353705547471345e-05,
"loss": 1.4601,
"step": 45500
},
{
"epoch": 1.1743381583314187,
"grad_norm": 0.3721790015697479,
"learning_rate": 1.5302647366674325e-05,
"loss": 1.3188,
"step": 46000
},
{
"epoch": 1.1871027035306732,
"grad_norm": 0.47164618968963623,
"learning_rate": 1.5251589185877309e-05,
"loss": 1.3612,
"step": 46500
},
{
"epoch": 1.1998672487299278,
"grad_norm": 0.1116417869925499,
"learning_rate": 1.520053100508029e-05,
"loss": 1.3726,
"step": 47000
},
{
"epoch": 1.2126317939291824,
"grad_norm": 183.283203125,
"learning_rate": 1.5149472824283272e-05,
"loss": 1.4789,
"step": 47500
},
{
"epoch": 1.2253963391284368,
"grad_norm": 0.05037030950188637,
"learning_rate": 1.5098414643486254e-05,
"loss": 1.3543,
"step": 48000
},
{
"epoch": 1.2381608843276914,
"grad_norm": 0.074338898062706,
"learning_rate": 1.5047356462689236e-05,
"loss": 1.4093,
"step": 48500
},
{
"epoch": 1.250925429526946,
"grad_norm": 0.3007078468799591,
"learning_rate": 1.4996298281892216e-05,
"loss": 1.4532,
"step": 49000
},
{
"epoch": 1.2636899747262005,
"grad_norm": 0.05486530438065529,
"learning_rate": 1.49452401010952e-05,
"loss": 1.3855,
"step": 49500
},
{
"epoch": 1.2764545199254551,
"grad_norm": 0.04864773899316788,
"learning_rate": 1.489418192029818e-05,
"loss": 1.2134,
"step": 50000
},
{
"epoch": 1.2892190651247097,
"grad_norm": 4.833871841430664,
"learning_rate": 1.4843123739501162e-05,
"loss": 1.2763,
"step": 50500
},
{
"epoch": 1.3019836103239641,
"grad_norm": 0.05781451240181923,
"learning_rate": 1.4792065558704144e-05,
"loss": 1.5011,
"step": 51000
},
{
"epoch": 1.3147481555232188,
"grad_norm": 0.16321195662021637,
"learning_rate": 1.4741007377907126e-05,
"loss": 1.1684,
"step": 51500
},
{
"epoch": 1.3275127007224732,
"grad_norm": 0.02163376845419407,
"learning_rate": 1.4689949197110107e-05,
"loss": 1.2844,
"step": 52000
},
{
"epoch": 1.3402772459217278,
"grad_norm": 0.09530438482761383,
"learning_rate": 1.463889101631309e-05,
"loss": 1.2992,
"step": 52500
},
{
"epoch": 1.3530417911209824,
"grad_norm": 0.07205236703157425,
"learning_rate": 1.4587832835516071e-05,
"loss": 1.4271,
"step": 53000
},
{
"epoch": 1.3658063363202368,
"grad_norm": 0.19131618738174438,
"learning_rate": 1.4536774654719053e-05,
"loss": 1.3343,
"step": 53500
},
{
"epoch": 1.3785708815194915,
"grad_norm": 0.3695744276046753,
"learning_rate": 1.4485716473922035e-05,
"loss": 1.5796,
"step": 54000
},
{
"epoch": 1.3913354267187459,
"grad_norm": 0.03115621767938137,
"learning_rate": 1.4434658293125017e-05,
"loss": 1.3827,
"step": 54500
},
{
"epoch": 1.4040999719180005,
"grad_norm": 182.7641143798828,
"learning_rate": 1.4383600112328e-05,
"loss": 1.4294,
"step": 55000
},
{
"epoch": 1.4168645171172551,
"grad_norm": 0.031005267053842545,
"learning_rate": 1.4332541931530981e-05,
"loss": 1.2326,
"step": 55500
},
{
"epoch": 1.4296290623165095,
"grad_norm": 0.6513388752937317,
"learning_rate": 1.4281483750733962e-05,
"loss": 1.3676,
"step": 56000
},
{
"epoch": 1.4423936075157642,
"grad_norm": 0.037028513848781586,
"learning_rate": 1.4230425569936946e-05,
"loss": 1.2845,
"step": 56500
},
{
"epoch": 1.4551581527150188,
"grad_norm": 0.2638658285140991,
"learning_rate": 1.4179367389139926e-05,
"loss": 1.2656,
"step": 57000
},
{
"epoch": 1.4679226979142732,
"grad_norm": 0.08626891672611237,
"learning_rate": 1.4128309208342908e-05,
"loss": 1.3681,
"step": 57500
},
{
"epoch": 1.4806872431135278,
"grad_norm": 0.21934285759925842,
"learning_rate": 1.407725102754589e-05,
"loss": 1.5165,
"step": 58000
},
{
"epoch": 1.4934517883127825,
"grad_norm": 141.995849609375,
"learning_rate": 1.4026192846748872e-05,
"loss": 1.4522,
"step": 58500
},
{
"epoch": 1.5062163335120369,
"grad_norm": 0.1067727729678154,
"learning_rate": 1.3975134665951853e-05,
"loss": 1.3865,
"step": 59000
},
{
"epoch": 1.5189808787112915,
"grad_norm": 124.26591491699219,
"learning_rate": 1.3924076485154836e-05,
"loss": 1.4049,
"step": 59500
},
{
"epoch": 1.5317454239105461,
"grad_norm": 0.14310626685619354,
"learning_rate": 1.3873018304357817e-05,
"loss": 1.274,
"step": 60000
},
{
"epoch": 1.5445099691098005,
"grad_norm": 0.048091161996126175,
"learning_rate": 1.3821960123560799e-05,
"loss": 1.5721,
"step": 60500
},
{
"epoch": 1.5572745143090552,
"grad_norm": 11.530976295471191,
"learning_rate": 1.377090194276378e-05,
"loss": 1.4719,
"step": 61000
},
{
"epoch": 1.5700390595083098,
"grad_norm": 156.86859130859375,
"learning_rate": 1.3719843761966763e-05,
"loss": 1.309,
"step": 61500
},
{
"epoch": 1.5828036047075642,
"grad_norm": 0.02001790702342987,
"learning_rate": 1.3668785581169743e-05,
"loss": 1.4863,
"step": 62000
},
{
"epoch": 1.5955681499068188,
"grad_norm": 0.1691671907901764,
"learning_rate": 1.3617727400372727e-05,
"loss": 1.4029,
"step": 62500
},
{
"epoch": 1.6083326951060735,
"grad_norm": 9.837677955627441,
"learning_rate": 1.3566669219575707e-05,
"loss": 1.2785,
"step": 63000
},
{
"epoch": 1.6210972403053279,
"grad_norm": 0.523800790309906,
"learning_rate": 1.351561103877869e-05,
"loss": 1.3517,
"step": 63500
},
{
"epoch": 1.6338617855045825,
"grad_norm": 0.18429873883724213,
"learning_rate": 1.3464552857981672e-05,
"loss": 1.4001,
"step": 64000
},
{
"epoch": 1.6466263307038371,
"grad_norm": 0.12691473960876465,
"learning_rate": 1.3413494677184654e-05,
"loss": 1.3813,
"step": 64500
},
{
"epoch": 1.6593908759030915,
"grad_norm": 0.2408001571893692,
"learning_rate": 1.3362436496387634e-05,
"loss": 1.3847,
"step": 65000
},
{
"epoch": 1.6721554211023462,
"grad_norm": 0.29580166935920715,
"learning_rate": 1.3311378315590618e-05,
"loss": 1.2934,
"step": 65500
},
{
"epoch": 1.6849199663016008,
"grad_norm": 1.7998812198638916,
"learning_rate": 1.3260320134793598e-05,
"loss": 1.3437,
"step": 66000
},
{
"epoch": 1.6976845115008552,
"grad_norm": 0.16410896182060242,
"learning_rate": 1.3209261953996579e-05,
"loss": 1.3868,
"step": 66500
},
{
"epoch": 1.7104490567001098,
"grad_norm": 123.77171325683594,
"learning_rate": 1.3158203773199562e-05,
"loss": 1.3834,
"step": 67000
},
{
"epoch": 1.7232136018993645,
"grad_norm": 0.1973133236169815,
"learning_rate": 1.3107145592402544e-05,
"loss": 1.336,
"step": 67500
},
{
"epoch": 1.7359781470986189,
"grad_norm": 122.43399810791016,
"learning_rate": 1.3056087411605525e-05,
"loss": 1.3769,
"step": 68000
},
{
"epoch": 1.7487426922978733,
"grad_norm": 0.043416813015937805,
"learning_rate": 1.3005029230808509e-05,
"loss": 1.3546,
"step": 68500
},
{
"epoch": 1.7615072374971281,
"grad_norm": 176.3306121826172,
"learning_rate": 1.2953971050011489e-05,
"loss": 1.3697,
"step": 69000
},
{
"epoch": 1.7742717826963825,
"grad_norm": 0.12407238781452179,
"learning_rate": 1.290291286921447e-05,
"loss": 1.3679,
"step": 69500
},
{
"epoch": 1.787036327895637,
"grad_norm": 0.17243346571922302,
"learning_rate": 1.2851854688417453e-05,
"loss": 1.2599,
"step": 70000
},
{
"epoch": 1.7998008730948918,
"grad_norm": 0.9491329789161682,
"learning_rate": 1.2800796507620435e-05,
"loss": 1.4064,
"step": 70500
},
{
"epoch": 1.8125654182941462,
"grad_norm": 0.2661564350128174,
"learning_rate": 1.2749738326823416e-05,
"loss": 1.5705,
"step": 71000
},
{
"epoch": 1.8253299634934006,
"grad_norm": 0.3472389578819275,
"learning_rate": 1.26986801460264e-05,
"loss": 1.1724,
"step": 71500
},
{
"epoch": 1.8380945086926552,
"grad_norm": 181.07867431640625,
"learning_rate": 1.264762196522938e-05,
"loss": 1.3177,
"step": 72000
},
{
"epoch": 1.8508590538919099,
"grad_norm": 0.3067033290863037,
"learning_rate": 1.259656378443236e-05,
"loss": 1.2724,
"step": 72500
},
{
"epoch": 1.8636235990911643,
"grad_norm": 176.80873107910156,
"learning_rate": 1.2545505603635344e-05,
"loss": 1.6316,
"step": 73000
},
{
"epoch": 1.876388144290419,
"grad_norm": 0.07874713838100433,
"learning_rate": 1.2494447422838324e-05,
"loss": 1.4511,
"step": 73500
},
{
"epoch": 1.8891526894896735,
"grad_norm": 0.24819862842559814,
"learning_rate": 1.2443389242041306e-05,
"loss": 1.5432,
"step": 74000
},
{
"epoch": 1.901917234688928,
"grad_norm": 0.04649261757731438,
"learning_rate": 1.239233106124429e-05,
"loss": 1.438,
"step": 74500
},
{
"epoch": 1.9146817798881826,
"grad_norm": 9.840631484985352,
"learning_rate": 1.234127288044727e-05,
"loss": 1.449,
"step": 75000
},
{
"epoch": 1.9274463250874372,
"grad_norm": 0.10612073540687561,
"learning_rate": 1.229021469965025e-05,
"loss": 1.3352,
"step": 75500
},
{
"epoch": 1.9402108702866916,
"grad_norm": 0.09319282323122025,
"learning_rate": 1.2239156518853235e-05,
"loss": 1.3719,
"step": 76000
},
{
"epoch": 1.9529754154859462,
"grad_norm": 0.9062605500221252,
"learning_rate": 1.2188098338056215e-05,
"loss": 1.439,
"step": 76500
},
{
"epoch": 1.9657399606852008,
"grad_norm": 122.2042465209961,
"learning_rate": 1.2137040157259199e-05,
"loss": 1.4236,
"step": 77000
},
{
"epoch": 1.9785045058844553,
"grad_norm": 0.13181094825267792,
"learning_rate": 1.2085981976462179e-05,
"loss": 1.223,
"step": 77500
},
{
"epoch": 1.9912690510837099,
"grad_norm": 0.2945082485675812,
"learning_rate": 1.2034923795665161e-05,
"loss": 1.4358,
"step": 78000
},
{
"epoch": 2.0,
"eval_accuracy": 0.7466654186362147,
"eval_f1": 0.7778552960971806,
"eval_loss": 1.5664894580841064,
"eval_precision": 0.6939298528227282,
"eval_recall": 0.8848739495798319,
"eval_runtime": 5324.3531,
"eval_samples_per_second": 4.013,
"eval_steps_per_second": 4.013,
"step": 78342
}
],
"logging_steps": 500,
"max_steps": 195855,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 1,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 4.504949469617357e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}