| { |
| "best_metric": 1.4403988122940063, |
| "best_model_checkpoint": "epoch_weights/checkpoint-39171", |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 78342, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01276454519925455, |
| "grad_norm": 222.047119140625, |
| "learning_rate": 1.9948941819202982e-05, |
| "loss": 1.7112, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.0255290903985091, |
| "grad_norm": 219.56724548339844, |
| "learning_rate": 1.9897883638405966e-05, |
| "loss": 1.6108, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.03829363559776365, |
| "grad_norm": 0.10834893584251404, |
| "learning_rate": 1.9846825457608946e-05, |
| "loss": 1.4721, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.0510581807970182, |
| "grad_norm": 4.082756996154785, |
| "learning_rate": 1.9795767276811927e-05, |
| "loss": 1.5408, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.06382272599627276, |
| "grad_norm": 216.5404052734375, |
| "learning_rate": 1.974470909601491e-05, |
| "loss": 1.5711, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.0765872711955273, |
| "grad_norm": 211.48715209960938, |
| "learning_rate": 1.969365091521789e-05, |
| "loss": 1.4534, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.08935181639478186, |
| "grad_norm": 0.7258212566375732, |
| "learning_rate": 1.9642592734420874e-05, |
| "loss": 1.4123, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.1021163615940364, |
| "grad_norm": 0.06425017863512039, |
| "learning_rate": 1.9591534553623858e-05, |
| "loss": 1.4799, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.11488090679329095, |
| "grad_norm": 0.02403501234948635, |
| "learning_rate": 1.954047637282684e-05, |
| "loss": 1.3951, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.1276454519925455, |
| "grad_norm": 29.32894515991211, |
| "learning_rate": 1.948941819202982e-05, |
| "loss": 1.4062, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.14040999719180006, |
| "grad_norm": 0.008796547539532185, |
| "learning_rate": 1.9438360011232803e-05, |
| "loss": 1.3253, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.1531745423910546, |
| "grad_norm": 0.0019760148134082556, |
| "learning_rate": 1.9387301830435783e-05, |
| "loss": 1.3675, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.16593908759030915, |
| "grad_norm": 189.2718963623047, |
| "learning_rate": 1.9336243649638764e-05, |
| "loss": 1.4932, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.17870363278956372, |
| "grad_norm": 204.2582550048828, |
| "learning_rate": 1.9285185468841747e-05, |
| "loss": 1.4275, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.19146817798881827, |
| "grad_norm": 0.02435586415231228, |
| "learning_rate": 1.9234127288044728e-05, |
| "loss": 1.3903, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.2042327231880728, |
| "grad_norm": 0.013938084244728088, |
| "learning_rate": 1.9183069107247708e-05, |
| "loss": 1.4386, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.21699726838732736, |
| "grad_norm": 3.0198631286621094, |
| "learning_rate": 1.9132010926450692e-05, |
| "loss": 1.3725, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.2297618135865819, |
| "grad_norm": 0.21919088065624237, |
| "learning_rate": 1.9080952745653672e-05, |
| "loss": 1.533, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.24252635878583645, |
| "grad_norm": 0.8041609525680542, |
| "learning_rate": 1.9029894564856656e-05, |
| "loss": 1.2956, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.255290903985091, |
| "grad_norm": 0.000984588055871427, |
| "learning_rate": 1.8978836384059636e-05, |
| "loss": 1.3231, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.26805544918434554, |
| "grad_norm": 0.005330982618033886, |
| "learning_rate": 1.892777820326262e-05, |
| "loss": 1.3237, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.2808199943836001, |
| "grad_norm": 0.009334022179245949, |
| "learning_rate": 1.8876720022465604e-05, |
| "loss": 1.5006, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.2935845395828547, |
| "grad_norm": 197.56333923339844, |
| "learning_rate": 1.8825661841668584e-05, |
| "loss": 1.4254, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.3063490847821092, |
| "grad_norm": 0.06512461602687836, |
| "learning_rate": 1.8774603660871565e-05, |
| "loss": 1.2744, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.3191136299813638, |
| "grad_norm": 0.009354041889309883, |
| "learning_rate": 1.872354548007455e-05, |
| "loss": 1.4442, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.3318781751806183, |
| "grad_norm": 0.26544374227523804, |
| "learning_rate": 1.867248729927753e-05, |
| "loss": 1.3993, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.34464272037987287, |
| "grad_norm": 175.60020446777344, |
| "learning_rate": 1.862142911848051e-05, |
| "loss": 1.4547, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.35740726557912744, |
| "grad_norm": 0.2694757878780365, |
| "learning_rate": 1.8570370937683493e-05, |
| "loss": 1.3493, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.37017181077838196, |
| "grad_norm": 0.12333753705024719, |
| "learning_rate": 1.8519312756886473e-05, |
| "loss": 1.4431, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.38293635597763653, |
| "grad_norm": 19.029407501220703, |
| "learning_rate": 1.8468254576089454e-05, |
| "loss": 1.3567, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.39570090117689105, |
| "grad_norm": 0.11109142750501633, |
| "learning_rate": 1.8417196395292437e-05, |
| "loss": 1.4003, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.4084654463761456, |
| "grad_norm": 0.2682472765445709, |
| "learning_rate": 1.8366138214495418e-05, |
| "loss": 1.3246, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.42122999157540014, |
| "grad_norm": 0.4953567683696747, |
| "learning_rate": 1.83150800336984e-05, |
| "loss": 1.4414, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.4339945367746547, |
| "grad_norm": 2.7462313175201416, |
| "learning_rate": 1.8264021852901382e-05, |
| "loss": 1.338, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.4467590819739093, |
| "grad_norm": 0.07703999429941177, |
| "learning_rate": 1.8212963672104366e-05, |
| "loss": 1.4822, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.4595236271731638, |
| "grad_norm": 198.0802764892578, |
| "learning_rate": 1.8161905491307346e-05, |
| "loss": 1.3102, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.4722881723724184, |
| "grad_norm": 0.03811747580766678, |
| "learning_rate": 1.811084731051033e-05, |
| "loss": 1.5193, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.4850527175716729, |
| "grad_norm": 0.2128518968820572, |
| "learning_rate": 1.805978912971331e-05, |
| "loss": 1.4392, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.49781726277092747, |
| "grad_norm": 0.06205818057060242, |
| "learning_rate": 1.800873094891629e-05, |
| "loss": 1.4332, |
| "step": 19500 |
| }, |
| { |
| "epoch": 0.510581807970182, |
| "grad_norm": 0.013560527004301548, |
| "learning_rate": 1.7957672768119274e-05, |
| "loss": 1.3889, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.5233463531694366, |
| "grad_norm": 12.423797607421875, |
| "learning_rate": 1.7906614587322255e-05, |
| "loss": 1.3728, |
| "step": 20500 |
| }, |
| { |
| "epoch": 0.5361108983686911, |
| "grad_norm": 196.36489868164062, |
| "learning_rate": 1.7855556406525235e-05, |
| "loss": 1.3271, |
| "step": 21000 |
| }, |
| { |
| "epoch": 0.5488754435679457, |
| "grad_norm": 195.81605529785156, |
| "learning_rate": 1.780449822572822e-05, |
| "loss": 1.4699, |
| "step": 21500 |
| }, |
| { |
| "epoch": 0.5616399887672002, |
| "grad_norm": 0.4498778283596039, |
| "learning_rate": 1.77534400449312e-05, |
| "loss": 1.3657, |
| "step": 22000 |
| }, |
| { |
| "epoch": 0.5744045339664547, |
| "grad_norm": 0.02744384855031967, |
| "learning_rate": 1.7702381864134183e-05, |
| "loss": 1.4181, |
| "step": 22500 |
| }, |
| { |
| "epoch": 0.5871690791657094, |
| "grad_norm": 0.8262328505516052, |
| "learning_rate": 1.7651323683337164e-05, |
| "loss": 1.4702, |
| "step": 23000 |
| }, |
| { |
| "epoch": 0.5999336243649639, |
| "grad_norm": 0.1493290364742279, |
| "learning_rate": 1.7600265502540147e-05, |
| "loss": 1.2346, |
| "step": 23500 |
| }, |
| { |
| "epoch": 0.6126981695642184, |
| "grad_norm": 0.16114306449890137, |
| "learning_rate": 1.7549207321743128e-05, |
| "loss": 1.2564, |
| "step": 24000 |
| }, |
| { |
| "epoch": 0.6254627147634729, |
| "grad_norm": 0.040154699236154556, |
| "learning_rate": 1.749814914094611e-05, |
| "loss": 1.401, |
| "step": 24500 |
| }, |
| { |
| "epoch": 0.6382272599627276, |
| "grad_norm": 0.10609349608421326, |
| "learning_rate": 1.7447090960149092e-05, |
| "loss": 1.4858, |
| "step": 25000 |
| }, |
| { |
| "epoch": 0.6509918051619821, |
| "grad_norm": 0.17226967215538025, |
| "learning_rate": 1.7396032779352072e-05, |
| "loss": 1.5393, |
| "step": 25500 |
| }, |
| { |
| "epoch": 0.6637563503612366, |
| "grad_norm": 124.09440612792969, |
| "learning_rate": 1.7344974598555056e-05, |
| "loss": 1.439, |
| "step": 26000 |
| }, |
| { |
| "epoch": 0.6765208955604912, |
| "grad_norm": 0.056608621031045914, |
| "learning_rate": 1.7293916417758036e-05, |
| "loss": 1.4665, |
| "step": 26500 |
| }, |
| { |
| "epoch": 0.6892854407597457, |
| "grad_norm": 171.162353515625, |
| "learning_rate": 1.7242858236961017e-05, |
| "loss": 1.4144, |
| "step": 27000 |
| }, |
| { |
| "epoch": 0.7020499859590003, |
| "grad_norm": 177.61441040039062, |
| "learning_rate": 1.7191800056164e-05, |
| "loss": 1.2879, |
| "step": 27500 |
| }, |
| { |
| "epoch": 0.7148145311582549, |
| "grad_norm": 1.4865118265151978, |
| "learning_rate": 1.714074187536698e-05, |
| "loss": 1.355, |
| "step": 28000 |
| }, |
| { |
| "epoch": 0.7275790763575094, |
| "grad_norm": 170.93099975585938, |
| "learning_rate": 1.708968369456996e-05, |
| "loss": 1.4421, |
| "step": 28500 |
| }, |
| { |
| "epoch": 0.7403436215567639, |
| "grad_norm": 0.5723881721496582, |
| "learning_rate": 1.7038625513772945e-05, |
| "loss": 1.3732, |
| "step": 29000 |
| }, |
| { |
| "epoch": 0.7531081667560184, |
| "grad_norm": 0.7495973110198975, |
| "learning_rate": 1.698756733297593e-05, |
| "loss": 1.5122, |
| "step": 29500 |
| }, |
| { |
| "epoch": 0.7658727119552731, |
| "grad_norm": 0.033791378140449524, |
| "learning_rate": 1.693650915217891e-05, |
| "loss": 1.3276, |
| "step": 30000 |
| }, |
| { |
| "epoch": 0.7786372571545276, |
| "grad_norm": 0.02948080748319626, |
| "learning_rate": 1.6885450971381893e-05, |
| "loss": 1.4913, |
| "step": 30500 |
| }, |
| { |
| "epoch": 0.7914018023537821, |
| "grad_norm": 0.064676433801651, |
| "learning_rate": 1.6834392790584873e-05, |
| "loss": 1.3377, |
| "step": 31000 |
| }, |
| { |
| "epoch": 0.8041663475530367, |
| "grad_norm": 0.05471309274435043, |
| "learning_rate": 1.6783334609787854e-05, |
| "loss": 1.464, |
| "step": 31500 |
| }, |
| { |
| "epoch": 0.8169308927522912, |
| "grad_norm": 2.461292028427124, |
| "learning_rate": 1.6732276428990837e-05, |
| "loss": 1.4163, |
| "step": 32000 |
| }, |
| { |
| "epoch": 0.8296954379515458, |
| "grad_norm": 0.15830573439598083, |
| "learning_rate": 1.6681218248193818e-05, |
| "loss": 1.3718, |
| "step": 32500 |
| }, |
| { |
| "epoch": 0.8424599831508003, |
| "grad_norm": 0.05488978698849678, |
| "learning_rate": 1.66301600673968e-05, |
| "loss": 1.1682, |
| "step": 33000 |
| }, |
| { |
| "epoch": 0.8552245283500549, |
| "grad_norm": 0.09544608741998672, |
| "learning_rate": 1.6579101886599782e-05, |
| "loss": 1.3872, |
| "step": 33500 |
| }, |
| { |
| "epoch": 0.8679890735493094, |
| "grad_norm": 0.15158647298812866, |
| "learning_rate": 1.6528043705802762e-05, |
| "loss": 1.3859, |
| "step": 34000 |
| }, |
| { |
| "epoch": 0.880753618748564, |
| "grad_norm": 0.0812167227268219, |
| "learning_rate": 1.6476985525005746e-05, |
| "loss": 1.4938, |
| "step": 34500 |
| }, |
| { |
| "epoch": 0.8935181639478186, |
| "grad_norm": 0.17645303905010223, |
| "learning_rate": 1.6425927344208727e-05, |
| "loss": 1.431, |
| "step": 35000 |
| }, |
| { |
| "epoch": 0.9062827091470731, |
| "grad_norm": 255.2915802001953, |
| "learning_rate": 1.6374869163411707e-05, |
| "loss": 1.3779, |
| "step": 35500 |
| }, |
| { |
| "epoch": 0.9190472543463276, |
| "grad_norm": 0.05036506801843643, |
| "learning_rate": 1.632381098261469e-05, |
| "loss": 1.4157, |
| "step": 36000 |
| }, |
| { |
| "epoch": 0.9318117995455822, |
| "grad_norm": 1.9150407314300537, |
| "learning_rate": 1.6272752801817674e-05, |
| "loss": 1.2288, |
| "step": 36500 |
| }, |
| { |
| "epoch": 0.9445763447448368, |
| "grad_norm": 0.12360112369060516, |
| "learning_rate": 1.6221694621020655e-05, |
| "loss": 1.6366, |
| "step": 37000 |
| }, |
| { |
| "epoch": 0.9573408899440913, |
| "grad_norm": 188.09226989746094, |
| "learning_rate": 1.617063644022364e-05, |
| "loss": 1.351, |
| "step": 37500 |
| }, |
| { |
| "epoch": 0.9701054351433458, |
| "grad_norm": 187.25831604003906, |
| "learning_rate": 1.611957825942662e-05, |
| "loss": 1.5859, |
| "step": 38000 |
| }, |
| { |
| "epoch": 0.9828699803426004, |
| "grad_norm": 1.1277300119400024, |
| "learning_rate": 1.60685200786296e-05, |
| "loss": 1.374, |
| "step": 38500 |
| }, |
| { |
| "epoch": 0.9956345255418549, |
| "grad_norm": 188.23500061035156, |
| "learning_rate": 1.6017461897832583e-05, |
| "loss": 1.4467, |
| "step": 39000 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.7385220199372865, |
| "eval_f1": 0.7278749208513955, |
| "eval_loss": 1.4403988122940063, |
| "eval_precision": 0.7608186539048977, |
| "eval_recall": 0.6976657329598506, |
| "eval_runtime": 5324.8216, |
| "eval_samples_per_second": 4.013, |
| "eval_steps_per_second": 4.013, |
| "step": 39171 |
| }, |
| { |
| "epoch": 1.0083990707411095, |
| "grad_norm": 0.03257722407579422, |
| "learning_rate": 1.5966403717035563e-05, |
| "loss": 1.3554, |
| "step": 39500 |
| }, |
| { |
| "epoch": 1.021163615940364, |
| "grad_norm": 190.5370330810547, |
| "learning_rate": 1.5915345536238544e-05, |
| "loss": 1.4967, |
| "step": 40000 |
| }, |
| { |
| "epoch": 1.0339281611396185, |
| "grad_norm": 188.77444458007812, |
| "learning_rate": 1.5864287355441528e-05, |
| "loss": 1.3658, |
| "step": 40500 |
| }, |
| { |
| "epoch": 1.0466927063388731, |
| "grad_norm": 186.61973571777344, |
| "learning_rate": 1.5813229174644508e-05, |
| "loss": 1.4126, |
| "step": 41000 |
| }, |
| { |
| "epoch": 1.0594572515381278, |
| "grad_norm": 0.010557322762906551, |
| "learning_rate": 1.576217099384749e-05, |
| "loss": 1.3419, |
| "step": 41500 |
| }, |
| { |
| "epoch": 1.0722217967373822, |
| "grad_norm": 0.05373761057853699, |
| "learning_rate": 1.5711112813050472e-05, |
| "loss": 1.5751, |
| "step": 42000 |
| }, |
| { |
| "epoch": 1.0849863419366368, |
| "grad_norm": 187.22401428222656, |
| "learning_rate": 1.5660054632253453e-05, |
| "loss": 1.3604, |
| "step": 42500 |
| }, |
| { |
| "epoch": 1.0977508871358914, |
| "grad_norm": 0.10460960865020752, |
| "learning_rate": 1.5608996451456436e-05, |
| "loss": 1.3608, |
| "step": 43000 |
| }, |
| { |
| "epoch": 1.1105154323351458, |
| "grad_norm": 0.09090613573789597, |
| "learning_rate": 1.555793827065942e-05, |
| "loss": 1.2372, |
| "step": 43500 |
| }, |
| { |
| "epoch": 1.1232799775344005, |
| "grad_norm": 139.329833984375, |
| "learning_rate": 1.55068800898624e-05, |
| "loss": 1.6332, |
| "step": 44000 |
| }, |
| { |
| "epoch": 1.136044522733655, |
| "grad_norm": 0.05455148592591286, |
| "learning_rate": 1.545582190906538e-05, |
| "loss": 1.2923, |
| "step": 44500 |
| }, |
| { |
| "epoch": 1.1488090679329095, |
| "grad_norm": 0.43457621335983276, |
| "learning_rate": 1.5404763728268365e-05, |
| "loss": 1.4429, |
| "step": 45000 |
| }, |
| { |
| "epoch": 1.1615736131321641, |
| "grad_norm": 3.162357807159424, |
| "learning_rate": 1.5353705547471345e-05, |
| "loss": 1.4601, |
| "step": 45500 |
| }, |
| { |
| "epoch": 1.1743381583314187, |
| "grad_norm": 0.3721790015697479, |
| "learning_rate": 1.5302647366674325e-05, |
| "loss": 1.3188, |
| "step": 46000 |
| }, |
| { |
| "epoch": 1.1871027035306732, |
| "grad_norm": 0.47164618968963623, |
| "learning_rate": 1.5251589185877309e-05, |
| "loss": 1.3612, |
| "step": 46500 |
| }, |
| { |
| "epoch": 1.1998672487299278, |
| "grad_norm": 0.1116417869925499, |
| "learning_rate": 1.520053100508029e-05, |
| "loss": 1.3726, |
| "step": 47000 |
| }, |
| { |
| "epoch": 1.2126317939291824, |
| "grad_norm": 183.283203125, |
| "learning_rate": 1.5149472824283272e-05, |
| "loss": 1.4789, |
| "step": 47500 |
| }, |
| { |
| "epoch": 1.2253963391284368, |
| "grad_norm": 0.05037030950188637, |
| "learning_rate": 1.5098414643486254e-05, |
| "loss": 1.3543, |
| "step": 48000 |
| }, |
| { |
| "epoch": 1.2381608843276914, |
| "grad_norm": 0.074338898062706, |
| "learning_rate": 1.5047356462689236e-05, |
| "loss": 1.4093, |
| "step": 48500 |
| }, |
| { |
| "epoch": 1.250925429526946, |
| "grad_norm": 0.3007078468799591, |
| "learning_rate": 1.4996298281892216e-05, |
| "loss": 1.4532, |
| "step": 49000 |
| }, |
| { |
| "epoch": 1.2636899747262005, |
| "grad_norm": 0.05486530438065529, |
| "learning_rate": 1.49452401010952e-05, |
| "loss": 1.3855, |
| "step": 49500 |
| }, |
| { |
| "epoch": 1.2764545199254551, |
| "grad_norm": 0.04864773899316788, |
| "learning_rate": 1.489418192029818e-05, |
| "loss": 1.2134, |
| "step": 50000 |
| }, |
| { |
| "epoch": 1.2892190651247097, |
| "grad_norm": 4.833871841430664, |
| "learning_rate": 1.4843123739501162e-05, |
| "loss": 1.2763, |
| "step": 50500 |
| }, |
| { |
| "epoch": 1.3019836103239641, |
| "grad_norm": 0.05781451240181923, |
| "learning_rate": 1.4792065558704144e-05, |
| "loss": 1.5011, |
| "step": 51000 |
| }, |
| { |
| "epoch": 1.3147481555232188, |
| "grad_norm": 0.16321195662021637, |
| "learning_rate": 1.4741007377907126e-05, |
| "loss": 1.1684, |
| "step": 51500 |
| }, |
| { |
| "epoch": 1.3275127007224732, |
| "grad_norm": 0.02163376845419407, |
| "learning_rate": 1.4689949197110107e-05, |
| "loss": 1.2844, |
| "step": 52000 |
| }, |
| { |
| "epoch": 1.3402772459217278, |
| "grad_norm": 0.09530438482761383, |
| "learning_rate": 1.463889101631309e-05, |
| "loss": 1.2992, |
| "step": 52500 |
| }, |
| { |
| "epoch": 1.3530417911209824, |
| "grad_norm": 0.07205236703157425, |
| "learning_rate": 1.4587832835516071e-05, |
| "loss": 1.4271, |
| "step": 53000 |
| }, |
| { |
| "epoch": 1.3658063363202368, |
| "grad_norm": 0.19131618738174438, |
| "learning_rate": 1.4536774654719053e-05, |
| "loss": 1.3343, |
| "step": 53500 |
| }, |
| { |
| "epoch": 1.3785708815194915, |
| "grad_norm": 0.3695744276046753, |
| "learning_rate": 1.4485716473922035e-05, |
| "loss": 1.5796, |
| "step": 54000 |
| }, |
| { |
| "epoch": 1.3913354267187459, |
| "grad_norm": 0.03115621767938137, |
| "learning_rate": 1.4434658293125017e-05, |
| "loss": 1.3827, |
| "step": 54500 |
| }, |
| { |
| "epoch": 1.4040999719180005, |
| "grad_norm": 182.7641143798828, |
| "learning_rate": 1.4383600112328e-05, |
| "loss": 1.4294, |
| "step": 55000 |
| }, |
| { |
| "epoch": 1.4168645171172551, |
| "grad_norm": 0.031005267053842545, |
| "learning_rate": 1.4332541931530981e-05, |
| "loss": 1.2326, |
| "step": 55500 |
| }, |
| { |
| "epoch": 1.4296290623165095, |
| "grad_norm": 0.6513388752937317, |
| "learning_rate": 1.4281483750733962e-05, |
| "loss": 1.3676, |
| "step": 56000 |
| }, |
| { |
| "epoch": 1.4423936075157642, |
| "grad_norm": 0.037028513848781586, |
| "learning_rate": 1.4230425569936946e-05, |
| "loss": 1.2845, |
| "step": 56500 |
| }, |
| { |
| "epoch": 1.4551581527150188, |
| "grad_norm": 0.2638658285140991, |
| "learning_rate": 1.4179367389139926e-05, |
| "loss": 1.2656, |
| "step": 57000 |
| }, |
| { |
| "epoch": 1.4679226979142732, |
| "grad_norm": 0.08626891672611237, |
| "learning_rate": 1.4128309208342908e-05, |
| "loss": 1.3681, |
| "step": 57500 |
| }, |
| { |
| "epoch": 1.4806872431135278, |
| "grad_norm": 0.21934285759925842, |
| "learning_rate": 1.407725102754589e-05, |
| "loss": 1.5165, |
| "step": 58000 |
| }, |
| { |
| "epoch": 1.4934517883127825, |
| "grad_norm": 141.995849609375, |
| "learning_rate": 1.4026192846748872e-05, |
| "loss": 1.4522, |
| "step": 58500 |
| }, |
| { |
| "epoch": 1.5062163335120369, |
| "grad_norm": 0.1067727729678154, |
| "learning_rate": 1.3975134665951853e-05, |
| "loss": 1.3865, |
| "step": 59000 |
| }, |
| { |
| "epoch": 1.5189808787112915, |
| "grad_norm": 124.26591491699219, |
| "learning_rate": 1.3924076485154836e-05, |
| "loss": 1.4049, |
| "step": 59500 |
| }, |
| { |
| "epoch": 1.5317454239105461, |
| "grad_norm": 0.14310626685619354, |
| "learning_rate": 1.3873018304357817e-05, |
| "loss": 1.274, |
| "step": 60000 |
| }, |
| { |
| "epoch": 1.5445099691098005, |
| "grad_norm": 0.048091161996126175, |
| "learning_rate": 1.3821960123560799e-05, |
| "loss": 1.5721, |
| "step": 60500 |
| }, |
| { |
| "epoch": 1.5572745143090552, |
| "grad_norm": 11.530976295471191, |
| "learning_rate": 1.377090194276378e-05, |
| "loss": 1.4719, |
| "step": 61000 |
| }, |
| { |
| "epoch": 1.5700390595083098, |
| "grad_norm": 156.86859130859375, |
| "learning_rate": 1.3719843761966763e-05, |
| "loss": 1.309, |
| "step": 61500 |
| }, |
| { |
| "epoch": 1.5828036047075642, |
| "grad_norm": 0.02001790702342987, |
| "learning_rate": 1.3668785581169743e-05, |
| "loss": 1.4863, |
| "step": 62000 |
| }, |
| { |
| "epoch": 1.5955681499068188, |
| "grad_norm": 0.1691671907901764, |
| "learning_rate": 1.3617727400372727e-05, |
| "loss": 1.4029, |
| "step": 62500 |
| }, |
| { |
| "epoch": 1.6083326951060735, |
| "grad_norm": 9.837677955627441, |
| "learning_rate": 1.3566669219575707e-05, |
| "loss": 1.2785, |
| "step": 63000 |
| }, |
| { |
| "epoch": 1.6210972403053279, |
| "grad_norm": 0.523800790309906, |
| "learning_rate": 1.351561103877869e-05, |
| "loss": 1.3517, |
| "step": 63500 |
| }, |
| { |
| "epoch": 1.6338617855045825, |
| "grad_norm": 0.18429873883724213, |
| "learning_rate": 1.3464552857981672e-05, |
| "loss": 1.4001, |
| "step": 64000 |
| }, |
| { |
| "epoch": 1.6466263307038371, |
| "grad_norm": 0.12691473960876465, |
| "learning_rate": 1.3413494677184654e-05, |
| "loss": 1.3813, |
| "step": 64500 |
| }, |
| { |
| "epoch": 1.6593908759030915, |
| "grad_norm": 0.2408001571893692, |
| "learning_rate": 1.3362436496387634e-05, |
| "loss": 1.3847, |
| "step": 65000 |
| }, |
| { |
| "epoch": 1.6721554211023462, |
| "grad_norm": 0.29580166935920715, |
| "learning_rate": 1.3311378315590618e-05, |
| "loss": 1.2934, |
| "step": 65500 |
| }, |
| { |
| "epoch": 1.6849199663016008, |
| "grad_norm": 1.7998812198638916, |
| "learning_rate": 1.3260320134793598e-05, |
| "loss": 1.3437, |
| "step": 66000 |
| }, |
| { |
| "epoch": 1.6976845115008552, |
| "grad_norm": 0.16410896182060242, |
| "learning_rate": 1.3209261953996579e-05, |
| "loss": 1.3868, |
| "step": 66500 |
| }, |
| { |
| "epoch": 1.7104490567001098, |
| "grad_norm": 123.77171325683594, |
| "learning_rate": 1.3158203773199562e-05, |
| "loss": 1.3834, |
| "step": 67000 |
| }, |
| { |
| "epoch": 1.7232136018993645, |
| "grad_norm": 0.1973133236169815, |
| "learning_rate": 1.3107145592402544e-05, |
| "loss": 1.336, |
| "step": 67500 |
| }, |
| { |
| "epoch": 1.7359781470986189, |
| "grad_norm": 122.43399810791016, |
| "learning_rate": 1.3056087411605525e-05, |
| "loss": 1.3769, |
| "step": 68000 |
| }, |
| { |
| "epoch": 1.7487426922978733, |
| "grad_norm": 0.043416813015937805, |
| "learning_rate": 1.3005029230808509e-05, |
| "loss": 1.3546, |
| "step": 68500 |
| }, |
| { |
| "epoch": 1.7615072374971281, |
| "grad_norm": 176.3306121826172, |
| "learning_rate": 1.2953971050011489e-05, |
| "loss": 1.3697, |
| "step": 69000 |
| }, |
| { |
| "epoch": 1.7742717826963825, |
| "grad_norm": 0.12407238781452179, |
| "learning_rate": 1.290291286921447e-05, |
| "loss": 1.3679, |
| "step": 69500 |
| }, |
| { |
| "epoch": 1.787036327895637, |
| "grad_norm": 0.17243346571922302, |
| "learning_rate": 1.2851854688417453e-05, |
| "loss": 1.2599, |
| "step": 70000 |
| }, |
| { |
| "epoch": 1.7998008730948918, |
| "grad_norm": 0.9491329789161682, |
| "learning_rate": 1.2800796507620435e-05, |
| "loss": 1.4064, |
| "step": 70500 |
| }, |
| { |
| "epoch": 1.8125654182941462, |
| "grad_norm": 0.2661564350128174, |
| "learning_rate": 1.2749738326823416e-05, |
| "loss": 1.5705, |
| "step": 71000 |
| }, |
| { |
| "epoch": 1.8253299634934006, |
| "grad_norm": 0.3472389578819275, |
| "learning_rate": 1.26986801460264e-05, |
| "loss": 1.1724, |
| "step": 71500 |
| }, |
| { |
| "epoch": 1.8380945086926552, |
| "grad_norm": 181.07867431640625, |
| "learning_rate": 1.264762196522938e-05, |
| "loss": 1.3177, |
| "step": 72000 |
| }, |
| { |
| "epoch": 1.8508590538919099, |
| "grad_norm": 0.3067033290863037, |
| "learning_rate": 1.259656378443236e-05, |
| "loss": 1.2724, |
| "step": 72500 |
| }, |
| { |
| "epoch": 1.8636235990911643, |
| "grad_norm": 176.80873107910156, |
| "learning_rate": 1.2545505603635344e-05, |
| "loss": 1.6316, |
| "step": 73000 |
| }, |
| { |
| "epoch": 1.876388144290419, |
| "grad_norm": 0.07874713838100433, |
| "learning_rate": 1.2494447422838324e-05, |
| "loss": 1.4511, |
| "step": 73500 |
| }, |
| { |
| "epoch": 1.8891526894896735, |
| "grad_norm": 0.24819862842559814, |
| "learning_rate": 1.2443389242041306e-05, |
| "loss": 1.5432, |
| "step": 74000 |
| }, |
| { |
| "epoch": 1.901917234688928, |
| "grad_norm": 0.04649261757731438, |
| "learning_rate": 1.239233106124429e-05, |
| "loss": 1.438, |
| "step": 74500 |
| }, |
| { |
| "epoch": 1.9146817798881826, |
| "grad_norm": 9.840631484985352, |
| "learning_rate": 1.234127288044727e-05, |
| "loss": 1.449, |
| "step": 75000 |
| }, |
| { |
| "epoch": 1.9274463250874372, |
| "grad_norm": 0.10612073540687561, |
| "learning_rate": 1.229021469965025e-05, |
| "loss": 1.3352, |
| "step": 75500 |
| }, |
| { |
| "epoch": 1.9402108702866916, |
| "grad_norm": 0.09319282323122025, |
| "learning_rate": 1.2239156518853235e-05, |
| "loss": 1.3719, |
| "step": 76000 |
| }, |
| { |
| "epoch": 1.9529754154859462, |
| "grad_norm": 0.9062605500221252, |
| "learning_rate": 1.2188098338056215e-05, |
| "loss": 1.439, |
| "step": 76500 |
| }, |
| { |
| "epoch": 1.9657399606852008, |
| "grad_norm": 122.2042465209961, |
| "learning_rate": 1.2137040157259199e-05, |
| "loss": 1.4236, |
| "step": 77000 |
| }, |
| { |
| "epoch": 1.9785045058844553, |
| "grad_norm": 0.13181094825267792, |
| "learning_rate": 1.2085981976462179e-05, |
| "loss": 1.223, |
| "step": 77500 |
| }, |
| { |
| "epoch": 1.9912690510837099, |
| "grad_norm": 0.2945082485675812, |
| "learning_rate": 1.2034923795665161e-05, |
| "loss": 1.4358, |
| "step": 78000 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.7466654186362147, |
| "eval_f1": 0.7778552960971806, |
| "eval_loss": 1.5664894580841064, |
| "eval_precision": 0.6939298528227282, |
| "eval_recall": 0.8848739495798319, |
| "eval_runtime": 5324.3531, |
| "eval_samples_per_second": 4.013, |
| "eval_steps_per_second": 4.013, |
| "step": 78342 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 195855, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 1, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4.504949469617357e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|