| { |
| "best_global_step": 26250, |
| "best_metric": 0.9502699810655684, |
| "best_model_checkpoint": "D:\\Task_design\\Topic\\strategy_train\\outputs\\qwen7b-lora-topic_strategy\\checkpoint-26250", |
| "epoch": 0.7518231711901361, |
| "eval_steps": 1250, |
| "global_step": 26250, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0014320441356002593, |
| "grad_norm": 608.0, |
| "learning_rate": 9.351145038167939e-06, |
| "loss": 28.2021, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.0028640882712005185, |
| "grad_norm": 326.0, |
| "learning_rate": 1.8893129770992367e-05, |
| "loss": 13.4193, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.004296132406800777, |
| "grad_norm": 600.0, |
| "learning_rate": 2.8435114503816796e-05, |
| "loss": 8.5573, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.005728176542401037, |
| "grad_norm": 146.0, |
| "learning_rate": 3.797709923664122e-05, |
| "loss": 4.1909, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.007160220678001296, |
| "grad_norm": 372.0, |
| "learning_rate": 4.751908396946565e-05, |
| "loss": 3.6133, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.008592264813601555, |
| "grad_norm": 36.75, |
| "learning_rate": 5.7061068702290074e-05, |
| "loss": 2.8409, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.010024308949201815, |
| "grad_norm": 210.0, |
| "learning_rate": 6.66030534351145e-05, |
| "loss": 2.9306, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.011456353084802074, |
| "grad_norm": 0.00037384033203125, |
| "learning_rate": 7.614503816793893e-05, |
| "loss": 4.0291, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.012888397220402333, |
| "grad_norm": 24.875, |
| "learning_rate": 8.568702290076335e-05, |
| "loss": 4.0621, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.014320441356002592, |
| "grad_norm": 378.0, |
| "learning_rate": 9.522900763358779e-05, |
| "loss": 5.0668, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.015752485491602852, |
| "grad_norm": 20.5, |
| "learning_rate": 0.00010477099236641222, |
| "loss": 3.8491, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.01718452962720311, |
| "grad_norm": 478.0, |
| "learning_rate": 0.00011431297709923666, |
| "loss": 2.9158, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.01861657376280337, |
| "grad_norm": 53.5, |
| "learning_rate": 0.00012385496183206106, |
| "loss": 4.3353, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.02004861789840363, |
| "grad_norm": 0.0010986328125, |
| "learning_rate": 0.0001333969465648855, |
| "loss": 4.2307, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.021480662034003888, |
| "grad_norm": 44.75, |
| "learning_rate": 0.0001429389312977099, |
| "loss": 3.1633, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.022912706169604148, |
| "grad_norm": 396.0, |
| "learning_rate": 0.00015248091603053436, |
| "loss": 4.8827, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.024344750305204405, |
| "grad_norm": 3.838539123535156e-05, |
| "learning_rate": 0.0001620229007633588, |
| "loss": 3.0475, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.025776794440804666, |
| "grad_norm": 12.625, |
| "learning_rate": 0.0001715648854961832, |
| "loss": 4.3648, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.027208838576404926, |
| "grad_norm": 0.009033203125, |
| "learning_rate": 0.00018110687022900764, |
| "loss": 3.8295, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.028640882712005183, |
| "grad_norm": 304.0, |
| "learning_rate": 0.00019064885496183207, |
| "loss": 5.2518, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.030072926847605444, |
| "grad_norm": 872.0, |
| "learning_rate": 0.0001999940947206803, |
| "loss": 17.7632, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.031504970983205705, |
| "grad_norm": 328.0, |
| "learning_rate": 0.00019969883075469472, |
| "loss": 16.0223, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.03293701511880596, |
| "grad_norm": 468.0, |
| "learning_rate": 0.0001994035667887091, |
| "loss": 10.9938, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.03436905925440622, |
| "grad_norm": 462.0, |
| "learning_rate": 0.00019910830282272353, |
| "loss": 9.1089, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.03580110339000648, |
| "grad_norm": 43.25, |
| "learning_rate": 0.00019881303885673795, |
| "loss": 8.8076, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.03580110339000648, |
| "eval_accuracy": 0.473, |
| "eval_loss": 1.0368720293045044, |
| "eval_macro_f1": 0.3829550887916939, |
| "eval_runtime": 172.7823, |
| "eval_samples_per_second": 11.575, |
| "eval_steps_per_second": 11.575, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.03723314752560674, |
| "grad_norm": 276.0, |
| "learning_rate": 0.00019851777489075234, |
| "loss": 8.0924, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.038665191661207, |
| "grad_norm": 150.0, |
| "learning_rate": 0.00019822251092476676, |
| "loss": 7.168, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.04009723579680726, |
| "grad_norm": 282.0, |
| "learning_rate": 0.00019792724695878115, |
| "loss": 6.6729, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.041529279932407515, |
| "grad_norm": 155.0, |
| "learning_rate": 0.00019763198299279557, |
| "loss": 6.2658, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.042961324068007775, |
| "grad_norm": 95.0, |
| "learning_rate": 0.00019733671902680996, |
| "loss": 4.2749, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.044393368203608036, |
| "grad_norm": 121.5, |
| "learning_rate": 0.00019704145506082438, |
| "loss": 6.0376, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.045825412339208296, |
| "grad_norm": 484.0, |
| "learning_rate": 0.0001967461910948388, |
| "loss": 5.4624, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.04725745647480856, |
| "grad_norm": 83.0, |
| "learning_rate": 0.00019645092712885321, |
| "loss": 4.8571, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.04868950061040881, |
| "grad_norm": 86.0, |
| "learning_rate": 0.00019615566316286763, |
| "loss": 5.2631, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.05012154474600907, |
| "grad_norm": 0.35546875, |
| "learning_rate": 0.00019586039919688202, |
| "loss": 4.2013, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.05155358888160933, |
| "grad_norm": 58.0, |
| "learning_rate": 0.00019556513523089644, |
| "loss": 5.4813, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.05298563301720959, |
| "grad_norm": 868.0, |
| "learning_rate": 0.00019526987126491083, |
| "loss": 4.6324, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.05441767715280985, |
| "grad_norm": 169.0, |
| "learning_rate": 0.00019497460729892525, |
| "loss": 3.9849, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.055849721288410106, |
| "grad_norm": 4.28125, |
| "learning_rate": 0.00019467934333293967, |
| "loss": 3.2505, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.05728176542401037, |
| "grad_norm": 11.0625, |
| "learning_rate": 0.00019438407936695406, |
| "loss": 3.7368, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.05871380955961063, |
| "grad_norm": 40.75, |
| "learning_rate": 0.00019408881540096848, |
| "loss": 4.2252, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.06014585369521089, |
| "grad_norm": 2240.0, |
| "learning_rate": 0.00019379355143498287, |
| "loss": 3.8708, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.06157789783081115, |
| "grad_norm": 318.0, |
| "learning_rate": 0.0001934982874689973, |
| "loss": 3.7427, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.06300994196641141, |
| "grad_norm": 79.0, |
| "learning_rate": 0.00019320302350301168, |
| "loss": 2.5798, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.06444198610201167, |
| "grad_norm": 188.0, |
| "learning_rate": 0.0001929077595370261, |
| "loss": 3.2888, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.06587403023761192, |
| "grad_norm": 255.0, |
| "learning_rate": 0.00019261249557104052, |
| "loss": 3.5956, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.06730607437321218, |
| "grad_norm": 111.5, |
| "learning_rate": 0.0001923172316050549, |
| "loss": 2.6906, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.06873811850881244, |
| "grad_norm": 8.8125, |
| "learning_rate": 0.00019202196763906933, |
| "loss": 2.9821, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.0701701626444127, |
| "grad_norm": 62.25, |
| "learning_rate": 0.00019172670367308375, |
| "loss": 2.9432, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.07160220678001296, |
| "grad_norm": 268.0, |
| "learning_rate": 0.00019143143970709817, |
| "loss": 5.8543, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.07160220678001296, |
| "eval_accuracy": 0.8855, |
| "eval_loss": 0.4594672918319702, |
| "eval_macro_f1": 0.8847948863660271, |
| "eval_runtime": 174.3198, |
| "eval_samples_per_second": 11.473, |
| "eval_steps_per_second": 11.473, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.07303425091561322, |
| "grad_norm": 1.703125, |
| "learning_rate": 0.00019113617574111256, |
| "loss": 4.3718, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.07446629505121348, |
| "grad_norm": 6.125, |
| "learning_rate": 0.00019084091177512698, |
| "loss": 5.6269, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.07589833918681374, |
| "grad_norm": 2.796875, |
| "learning_rate": 0.0001905456478091414, |
| "loss": 4.2341, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.077330383322414, |
| "grad_norm": 608.0, |
| "learning_rate": 0.0001902503838431558, |
| "loss": 3.3186, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.07876242745801426, |
| "grad_norm": 140.0, |
| "learning_rate": 0.0001899551198771702, |
| "loss": 5.9126, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.08019447159361452, |
| "grad_norm": 824.0, |
| "learning_rate": 0.0001896598559111846, |
| "loss": 5.0582, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.08162651572921477, |
| "grad_norm": 165.0, |
| "learning_rate": 0.00018936459194519902, |
| "loss": 3.5105, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.08305855986481503, |
| "grad_norm": 0.0284423828125, |
| "learning_rate": 0.0001890693279792134, |
| "loss": 4.6236, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.08449060400041529, |
| "grad_norm": 116.5, |
| "learning_rate": 0.00018877406401322783, |
| "loss": 3.9021, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.08592264813601555, |
| "grad_norm": 0.251953125, |
| "learning_rate": 0.00018847880004724225, |
| "loss": 3.883, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.08735469227161581, |
| "grad_norm": 0.0260009765625, |
| "learning_rate": 0.00018818353608125664, |
| "loss": 3.9736, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.08878673640721607, |
| "grad_norm": 290.0, |
| "learning_rate": 0.00018788827211527106, |
| "loss": 5.218, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.09021878054281633, |
| "grad_norm": 1720.0, |
| "learning_rate": 0.00018759300814928548, |
| "loss": 3.2961, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.09165082467841659, |
| "grad_norm": 0.26953125, |
| "learning_rate": 0.0001872977441832999, |
| "loss": 3.4482, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.09308286881401685, |
| "grad_norm": 13.375, |
| "learning_rate": 0.0001870024802173143, |
| "loss": 2.928, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.09451491294961711, |
| "grad_norm": 73.0, |
| "learning_rate": 0.0001867072162513287, |
| "loss": 3.4569, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.09594695708521736, |
| "grad_norm": 9.25, |
| "learning_rate": 0.00018641195228534313, |
| "loss": 3.8492, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.09737900122081762, |
| "grad_norm": 274.0, |
| "learning_rate": 0.00018611668831935752, |
| "loss": 3.4008, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.09881104535641788, |
| "grad_norm": 158.0, |
| "learning_rate": 0.00018582142435337194, |
| "loss": 3.6703, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.10024308949201814, |
| "grad_norm": 264.0, |
| "learning_rate": 0.00018552616038738633, |
| "loss": 3.4321, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.1016751336276184, |
| "grad_norm": 2.015625, |
| "learning_rate": 0.00018523089642140075, |
| "loss": 2.4367, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.10310717776321866, |
| "grad_norm": 270.0, |
| "learning_rate": 0.00018493563245541514, |
| "loss": 3.6473, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.10453922189881892, |
| "grad_norm": 0.0478515625, |
| "learning_rate": 0.00018464036848942956, |
| "loss": 2.3759, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.10597126603441918, |
| "grad_norm": 282.0, |
| "learning_rate": 0.00018434510452344395, |
| "loss": 2.5434, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.10740331017001944, |
| "grad_norm": 100.0, |
| "learning_rate": 0.00018404984055745837, |
| "loss": 2.4411, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.10740331017001944, |
| "eval_accuracy": 0.911, |
| "eval_loss": 0.6009318232536316, |
| "eval_macro_f1": 0.9109307309196768, |
| "eval_runtime": 173.1976, |
| "eval_samples_per_second": 11.548, |
| "eval_steps_per_second": 11.548, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.1088353543056197, |
| "grad_norm": 8.875, |
| "learning_rate": 0.00018375457659147279, |
| "loss": 3.7952, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.11026739844121995, |
| "grad_norm": 408.0, |
| "learning_rate": 0.00018345931262548718, |
| "loss": 2.7528, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.11169944257682021, |
| "grad_norm": 4.65625, |
| "learning_rate": 0.0001831640486595016, |
| "loss": 3.0934, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.11313148671242047, |
| "grad_norm": 0.0274658203125, |
| "learning_rate": 0.00018286878469351601, |
| "loss": 3.3618, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.11456353084802073, |
| "grad_norm": 93.0, |
| "learning_rate": 0.00018257352072753043, |
| "loss": 3.635, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.115995574983621, |
| "grad_norm": 159.0, |
| "learning_rate": 0.00018227825676154482, |
| "loss": 2.3589, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.11742761911922125, |
| "grad_norm": 290.0, |
| "learning_rate": 0.00018198299279555924, |
| "loss": 3.9717, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.11885966325482152, |
| "grad_norm": 8.375, |
| "learning_rate": 0.00018168772882957366, |
| "loss": 3.0616, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.12029170739042178, |
| "grad_norm": 264.0, |
| "learning_rate": 0.00018139246486358805, |
| "loss": 3.4315, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.12172375152602204, |
| "grad_norm": 206.0, |
| "learning_rate": 0.00018109720089760247, |
| "loss": 3.3353, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.1231557956616223, |
| "grad_norm": 0.37109375, |
| "learning_rate": 0.00018080193693161686, |
| "loss": 2.7568, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.12458783979722254, |
| "grad_norm": 314.0, |
| "learning_rate": 0.00018050667296563128, |
| "loss": 3.0107, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.12601988393282282, |
| "grad_norm": 992.0, |
| "learning_rate": 0.00018021140899964567, |
| "loss": 2.8247, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.12745192806842306, |
| "grad_norm": 225.0, |
| "learning_rate": 0.0001799161450336601, |
| "loss": 3.3408, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.12888397220402334, |
| "grad_norm": 0.703125, |
| "learning_rate": 0.0001796208810676745, |
| "loss": 2.8974, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.13031601633962359, |
| "grad_norm": 280.0, |
| "learning_rate": 0.0001793256171016889, |
| "loss": 2.8223, |
| "step": 4550 |
| }, |
| { |
| "epoch": 0.13174806047522383, |
| "grad_norm": 180.0, |
| "learning_rate": 0.00017903035313570332, |
| "loss": 3.7603, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.1331801046108241, |
| "grad_norm": 6.28125, |
| "learning_rate": 0.00017873508916971774, |
| "loss": 4.2271, |
| "step": 4650 |
| }, |
| { |
| "epoch": 0.13461214874642435, |
| "grad_norm": 338.0, |
| "learning_rate": 0.00017843982520373216, |
| "loss": 3.2114, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.13604419288202463, |
| "grad_norm": 0.6796875, |
| "learning_rate": 0.00017814456123774655, |
| "loss": 3.4457, |
| "step": 4750 |
| }, |
| { |
| "epoch": 0.13747623701762487, |
| "grad_norm": 2.34375, |
| "learning_rate": 0.00017784929727176097, |
| "loss": 2.2643, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.13890828115322515, |
| "grad_norm": 288.0, |
| "learning_rate": 0.0001775540333057754, |
| "loss": 3.0672, |
| "step": 4850 |
| }, |
| { |
| "epoch": 0.1403403252888254, |
| "grad_norm": 1896.0, |
| "learning_rate": 0.00017725876933978978, |
| "loss": 2.8551, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.14177236942442567, |
| "grad_norm": 88.0, |
| "learning_rate": 0.0001769635053738042, |
| "loss": 3.5021, |
| "step": 4950 |
| }, |
| { |
| "epoch": 0.14320441356002592, |
| "grad_norm": 94.0, |
| "learning_rate": 0.0001766682414078186, |
| "loss": 2.1413, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.14320441356002592, |
| "eval_accuracy": 0.917, |
| "eval_loss": 0.3995007872581482, |
| "eval_macro_f1": 0.9161602620439439, |
| "eval_runtime": 179.9592, |
| "eval_samples_per_second": 11.114, |
| "eval_steps_per_second": 11.114, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.1446364576956262, |
| "grad_norm": 0.97265625, |
| "learning_rate": 0.000176372977441833, |
| "loss": 2.3626, |
| "step": 5050 |
| }, |
| { |
| "epoch": 0.14606850183122644, |
| "grad_norm": 266.0, |
| "learning_rate": 0.0001760777134758474, |
| "loss": 3.3284, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.14750054596682668, |
| "grad_norm": 0.2314453125, |
| "learning_rate": 0.00017578244950986182, |
| "loss": 2.2628, |
| "step": 5150 |
| }, |
| { |
| "epoch": 0.14893259010242696, |
| "grad_norm": 237.0, |
| "learning_rate": 0.00017548718554387624, |
| "loss": 2.5359, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.1503646342380272, |
| "grad_norm": 65.5, |
| "learning_rate": 0.00017519192157789063, |
| "loss": 2.5109, |
| "step": 5250 |
| }, |
| { |
| "epoch": 0.15179667837362748, |
| "grad_norm": 0.2197265625, |
| "learning_rate": 0.00017489665761190505, |
| "loss": 3.4319, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.15322872250922773, |
| "grad_norm": 140.0, |
| "learning_rate": 0.00017460139364591944, |
| "loss": 2.149, |
| "step": 5350 |
| }, |
| { |
| "epoch": 0.154660766644828, |
| "grad_norm": 74.0, |
| "learning_rate": 0.00017430612967993386, |
| "loss": 3.3437, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.15609281078042825, |
| "grad_norm": 160.0, |
| "learning_rate": 0.00017401086571394828, |
| "loss": 3.2952, |
| "step": 5450 |
| }, |
| { |
| "epoch": 0.15752485491602852, |
| "grad_norm": 0.451171875, |
| "learning_rate": 0.0001737156017479627, |
| "loss": 2.6442, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.15895689905162877, |
| "grad_norm": 246.0, |
| "learning_rate": 0.00017342033778197712, |
| "loss": 2.1805, |
| "step": 5550 |
| }, |
| { |
| "epoch": 0.16038894318722904, |
| "grad_norm": 0.1669921875, |
| "learning_rate": 0.0001731250738159915, |
| "loss": 2.957, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.1618209873228293, |
| "grad_norm": 11.4375, |
| "learning_rate": 0.00017282980985000593, |
| "loss": 3.791, |
| "step": 5650 |
| }, |
| { |
| "epoch": 0.16325303145842954, |
| "grad_norm": 241.0, |
| "learning_rate": 0.00017253454588402032, |
| "loss": 2.3945, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.1646850755940298, |
| "grad_norm": 0.1572265625, |
| "learning_rate": 0.00017223928191803474, |
| "loss": 2.3927, |
| "step": 5750 |
| }, |
| { |
| "epoch": 0.16611711972963006, |
| "grad_norm": 0.06494140625, |
| "learning_rate": 0.00017194401795204913, |
| "loss": 2.4573, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.16754916386523033, |
| "grad_norm": 0.34375, |
| "learning_rate": 0.00017164875398606355, |
| "loss": 2.6829, |
| "step": 5850 |
| }, |
| { |
| "epoch": 0.16898120800083058, |
| "grad_norm": 7.46875, |
| "learning_rate": 0.00017135349002007797, |
| "loss": 3.0156, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.17041325213643085, |
| "grad_norm": 0.2470703125, |
| "learning_rate": 0.00017105822605409236, |
| "loss": 2.5155, |
| "step": 5950 |
| }, |
| { |
| "epoch": 0.1718452962720311, |
| "grad_norm": 3.65625, |
| "learning_rate": 0.00017076296208810678, |
| "loss": 2.5886, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.17327734040763138, |
| "grad_norm": 420.0, |
| "learning_rate": 0.00017046769812212117, |
| "loss": 3.7327, |
| "step": 6050 |
| }, |
| { |
| "epoch": 0.17470938454323162, |
| "grad_norm": 88.0, |
| "learning_rate": 0.00017017243415613559, |
| "loss": 4.1712, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.17614142867883187, |
| "grad_norm": 1864.0, |
| "learning_rate": 0.00016987717019015, |
| "loss": 3.0617, |
| "step": 6150 |
| }, |
| { |
| "epoch": 0.17757347281443214, |
| "grad_norm": 56.25, |
| "learning_rate": 0.00016958190622416442, |
| "loss": 2.6603, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.1790055169500324, |
| "grad_norm": 25.25, |
| "learning_rate": 0.00016928664225817884, |
| "loss": 2.7308, |
| "step": 6250 |
| }, |
| { |
| "epoch": 0.1790055169500324, |
| "eval_accuracy": 0.9195, |
| "eval_loss": 0.47414371371269226, |
| "eval_macro_f1": 0.9193664539192946, |
| "eval_runtime": 182.0886, |
| "eval_samples_per_second": 10.984, |
| "eval_steps_per_second": 10.984, |
| "step": 6250 |
| }, |
| { |
| "epoch": 0.18043756108563266, |
| "grad_norm": 66.5, |
| "learning_rate": 0.00016899137829219323, |
| "loss": 2.9805, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.1818696052212329, |
| "grad_norm": 119.0, |
| "learning_rate": 0.00016869611432620765, |
| "loss": 2.343, |
| "step": 6350 |
| }, |
| { |
| "epoch": 0.18330164935683319, |
| "grad_norm": 0.14453125, |
| "learning_rate": 0.00016840085036022204, |
| "loss": 2.5346, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.18473369349243343, |
| "grad_norm": 67.5, |
| "learning_rate": 0.00016810558639423646, |
| "loss": 2.6565, |
| "step": 6450 |
| }, |
| { |
| "epoch": 0.1861657376280337, |
| "grad_norm": 14.0, |
| "learning_rate": 0.00016781032242825085, |
| "loss": 3.2329, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.18759778176363395, |
| "grad_norm": 1408.0, |
| "learning_rate": 0.00016751505846226527, |
| "loss": 2.7886, |
| "step": 6550 |
| }, |
| { |
| "epoch": 0.18902982589923423, |
| "grad_norm": 23.0, |
| "learning_rate": 0.0001672197944962797, |
| "loss": 2.2165, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.19046187003483447, |
| "grad_norm": 88.0, |
| "learning_rate": 0.00016692453053029408, |
| "loss": 2.826, |
| "step": 6650 |
| }, |
| { |
| "epoch": 0.19189391417043472, |
| "grad_norm": 7.28125, |
| "learning_rate": 0.0001666292665643085, |
| "loss": 2.6884, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.193325958306035, |
| "grad_norm": 4.3125, |
| "learning_rate": 0.0001663340025983229, |
| "loss": 2.3811, |
| "step": 6750 |
| }, |
| { |
| "epoch": 0.19475800244163524, |
| "grad_norm": 2.78125, |
| "learning_rate": 0.0001660387386323373, |
| "loss": 2.1648, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.19619004657723552, |
| "grad_norm": 2.65625, |
| "learning_rate": 0.0001657434746663517, |
| "loss": 2.0769, |
| "step": 6850 |
| }, |
| { |
| "epoch": 0.19762209071283576, |
| "grad_norm": 0.337890625, |
| "learning_rate": 0.00016544821070036612, |
| "loss": 3.2644, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.19905413484843604, |
| "grad_norm": 5.15625, |
| "learning_rate": 0.00016515294673438054, |
| "loss": 3.1548, |
| "step": 6950 |
| }, |
| { |
| "epoch": 0.20048617898403628, |
| "grad_norm": 52.75, |
| "learning_rate": 0.00016485768276839496, |
| "loss": 2.3094, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.20191822311963656, |
| "grad_norm": 0.15625, |
| "learning_rate": 0.00016456241880240938, |
| "loss": 2.2522, |
| "step": 7050 |
| }, |
| { |
| "epoch": 0.2033502672552368, |
| "grad_norm": 0.09521484375, |
| "learning_rate": 0.00016426715483642377, |
| "loss": 2.1453, |
| "step": 7100 |
| }, |
| { |
| "epoch": 0.20478231139083705, |
| "grad_norm": 274.0, |
| "learning_rate": 0.0001639718908704382, |
| "loss": 2.8386, |
| "step": 7150 |
| }, |
| { |
| "epoch": 0.20621435552643733, |
| "grad_norm": 274.0, |
| "learning_rate": 0.00016367662690445258, |
| "loss": 3.5395, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.20764639966203757, |
| "grad_norm": 81.0, |
| "learning_rate": 0.000163381362938467, |
| "loss": 2.668, |
| "step": 7250 |
| }, |
| { |
| "epoch": 0.20907844379763785, |
| "grad_norm": 0.1162109375, |
| "learning_rate": 0.00016308609897248142, |
| "loss": 2.2543, |
| "step": 7300 |
| }, |
| { |
| "epoch": 0.2105104879332381, |
| "grad_norm": 0.05517578125, |
| "learning_rate": 0.0001627908350064958, |
| "loss": 2.4399, |
| "step": 7350 |
| }, |
| { |
| "epoch": 0.21194253206883837, |
| "grad_norm": 0.283203125, |
| "learning_rate": 0.00016249557104051023, |
| "loss": 2.0814, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.21337457620443862, |
| "grad_norm": 79.0, |
| "learning_rate": 0.00016220030707452462, |
| "loss": 3.2041, |
| "step": 7450 |
| }, |
| { |
| "epoch": 0.2148066203400389, |
| "grad_norm": 144.0, |
| "learning_rate": 0.00016190504310853904, |
| "loss": 1.962, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.2148066203400389, |
| "eval_accuracy": 0.93, |
| "eval_loss": 0.3529609441757202, |
| "eval_macro_f1": 0.9295120271109343, |
| "eval_runtime": 175.7548, |
| "eval_samples_per_second": 11.379, |
| "eval_steps_per_second": 11.379, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.21623866447563914, |
| "grad_norm": 2592.0, |
| "learning_rate": 0.00016160977914255343, |
| "loss": 2.7684, |
| "step": 7550 |
| }, |
| { |
| "epoch": 0.2176707086112394, |
| "grad_norm": 0.03271484375, |
| "learning_rate": 0.00016131451517656785, |
| "loss": 2.5066, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.21910275274683966, |
| "grad_norm": 0.09423828125, |
| "learning_rate": 0.00016101925121058227, |
| "loss": 2.6791, |
| "step": 7650 |
| }, |
| { |
| "epoch": 0.2205347968824399, |
| "grad_norm": 536.0, |
| "learning_rate": 0.0001607239872445967, |
| "loss": 3.3268, |
| "step": 7700 |
| }, |
| { |
| "epoch": 0.22196684101804018, |
| "grad_norm": 0.01007080078125, |
| "learning_rate": 0.0001604287232786111, |
| "loss": 2.2916, |
| "step": 7750 |
| }, |
| { |
| "epoch": 0.22339888515364043, |
| "grad_norm": 0.1396484375, |
| "learning_rate": 0.0001601334593126255, |
| "loss": 2.8402, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.2248309292892407, |
| "grad_norm": 93.5, |
| "learning_rate": 0.00015983819534663992, |
| "loss": 2.5527, |
| "step": 7850 |
| }, |
| { |
| "epoch": 0.22626297342484095, |
| "grad_norm": 0.318359375, |
| "learning_rate": 0.0001595429313806543, |
| "loss": 3.0559, |
| "step": 7900 |
| }, |
| { |
| "epoch": 0.22769501756044122, |
| "grad_norm": 276.0, |
| "learning_rate": 0.00015924766741466873, |
| "loss": 1.8897, |
| "step": 7950 |
| }, |
| { |
| "epoch": 0.22912706169604147, |
| "grad_norm": 1.7421875, |
| "learning_rate": 0.00015895240344868315, |
| "loss": 1.9342, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.23055910583164174, |
| "grad_norm": 0.036865234375, |
| "learning_rate": 0.00015865713948269754, |
| "loss": 2.0979, |
| "step": 8050 |
| }, |
| { |
| "epoch": 0.231991149967242, |
| "grad_norm": 164.0, |
| "learning_rate": 0.00015836187551671196, |
| "loss": 2.2929, |
| "step": 8100 |
| }, |
| { |
| "epoch": 0.23342319410284226, |
| "grad_norm": 88.5, |
| "learning_rate": 0.00015806661155072635, |
| "loss": 3.0427, |
| "step": 8150 |
| }, |
| { |
| "epoch": 0.2348552382384425, |
| "grad_norm": 1104.0, |
| "learning_rate": 0.00015777134758474077, |
| "loss": 2.8966, |
| "step": 8200 |
| }, |
| { |
| "epoch": 0.23628728237404276, |
| "grad_norm": 520.0, |
| "learning_rate": 0.00015747608361875516, |
| "loss": 2.0752, |
| "step": 8250 |
| }, |
| { |
| "epoch": 0.23771932650964303, |
| "grad_norm": 0.07568359375, |
| "learning_rate": 0.00015718081965276958, |
| "loss": 1.7808, |
| "step": 8300 |
| }, |
| { |
| "epoch": 0.23915137064524328, |
| "grad_norm": 0.06982421875, |
| "learning_rate": 0.000156885555686784, |
| "loss": 2.9426, |
| "step": 8350 |
| }, |
| { |
| "epoch": 0.24058341478084355, |
| "grad_norm": 242.0, |
| "learning_rate": 0.00015659029172079839, |
| "loss": 2.3159, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.2420154589164438, |
| "grad_norm": 7.5, |
| "learning_rate": 0.0001562950277548128, |
| "loss": 2.6197, |
| "step": 8450 |
| }, |
| { |
| "epoch": 0.24344750305204407, |
| "grad_norm": 57.25, |
| "learning_rate": 0.00015599976378882722, |
| "loss": 2.6834, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.24487954718764432, |
| "grad_norm": 4.0, |
| "learning_rate": 0.00015570449982284164, |
| "loss": 2.116, |
| "step": 8550 |
| }, |
| { |
| "epoch": 0.2463115913232446, |
| "grad_norm": 0.11181640625, |
| "learning_rate": 0.00015540923585685603, |
| "loss": 3.5668, |
| "step": 8600 |
| }, |
| { |
| "epoch": 0.24774363545884484, |
| "grad_norm": 240.0, |
| "learning_rate": 0.00015511397189087045, |
| "loss": 3.1473, |
| "step": 8650 |
| }, |
| { |
| "epoch": 0.2491756795944451, |
| "grad_norm": 117.5, |
| "learning_rate": 0.00015481870792488487, |
| "loss": 2.4813, |
| "step": 8700 |
| }, |
| { |
| "epoch": 0.25060772373004536, |
| "grad_norm": 7.46875, |
| "learning_rate": 0.00015452344395889926, |
| "loss": 1.8936, |
| "step": 8750 |
| }, |
| { |
| "epoch": 0.25060772373004536, |
| "eval_accuracy": 0.9365, |
| "eval_loss": 0.328545480966568, |
| "eval_macro_f1": 0.9360277798015127, |
| "eval_runtime": 178.5517, |
| "eval_samples_per_second": 11.201, |
| "eval_steps_per_second": 11.201, |
| "step": 8750 |
| }, |
| { |
| "epoch": 0.25203976786564564, |
| "grad_norm": 126.0, |
| "learning_rate": 0.00015422817999291368, |
| "loss": 2.649, |
| "step": 8800 |
| }, |
| { |
| "epoch": 0.25347181200124586, |
| "grad_norm": 0.06884765625, |
| "learning_rate": 0.00015393291602692807, |
| "loss": 2.8102, |
| "step": 8850 |
| }, |
| { |
| "epoch": 0.25490385613684613, |
| "grad_norm": 0.6015625, |
| "learning_rate": 0.0001536376520609425, |
| "loss": 2.4762, |
| "step": 8900 |
| }, |
| { |
| "epoch": 0.2563359002724464, |
| "grad_norm": 370.0, |
| "learning_rate": 0.00015334238809495688, |
| "loss": 2.1245, |
| "step": 8950 |
| }, |
| { |
| "epoch": 0.2577679444080467, |
| "grad_norm": 238.0, |
| "learning_rate": 0.0001530471241289713, |
| "loss": 1.4588, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.2591999885436469, |
| "grad_norm": 8.5625, |
| "learning_rate": 0.00015275186016298572, |
| "loss": 2.7869, |
| "step": 9050 |
| }, |
| { |
| "epoch": 0.26063203267924717, |
| "grad_norm": 118.5, |
| "learning_rate": 0.0001524565961970001, |
| "loss": 2.1987, |
| "step": 9100 |
| }, |
| { |
| "epoch": 0.26206407681484745, |
| "grad_norm": 37.25, |
| "learning_rate": 0.00015216133223101453, |
| "loss": 2.8539, |
| "step": 9150 |
| }, |
| { |
| "epoch": 0.26349612095044767, |
| "grad_norm": 0.62109375, |
| "learning_rate": 0.00015186606826502895, |
| "loss": 2.6421, |
| "step": 9200 |
| }, |
| { |
| "epoch": 0.26492816508604794, |
| "grad_norm": 0.404296875, |
| "learning_rate": 0.00015157080429904337, |
| "loss": 3.3623, |
| "step": 9250 |
| }, |
| { |
| "epoch": 0.2663602092216482, |
| "grad_norm": 0.09130859375, |
| "learning_rate": 0.00015127554033305776, |
| "loss": 2.6995, |
| "step": 9300 |
| }, |
| { |
| "epoch": 0.2677922533572485, |
| "grad_norm": 82.5, |
| "learning_rate": 0.00015098027636707218, |
| "loss": 1.8874, |
| "step": 9350 |
| }, |
| { |
| "epoch": 0.2692242974928487, |
| "grad_norm": 4416.0, |
| "learning_rate": 0.0001506850124010866, |
| "loss": 2.2107, |
| "step": 9400 |
| }, |
| { |
| "epoch": 0.270656341628449, |
| "grad_norm": 18.125, |
| "learning_rate": 0.000150389748435101, |
| "loss": 3.2056, |
| "step": 9450 |
| }, |
| { |
| "epoch": 0.27208838576404926, |
| "grad_norm": 3.09375, |
| "learning_rate": 0.0001500944844691154, |
| "loss": 2.9934, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.27352042989964953, |
| "grad_norm": 280.0, |
| "learning_rate": 0.0001497992205031298, |
| "loss": 2.2205, |
| "step": 9550 |
| }, |
| { |
| "epoch": 0.27495247403524975, |
| "grad_norm": 94.5, |
| "learning_rate": 0.00014950395653714422, |
| "loss": 2.5102, |
| "step": 9600 |
| }, |
| { |
| "epoch": 0.27638451817085, |
| "grad_norm": 0.2021484375, |
| "learning_rate": 0.0001492086925711586, |
| "loss": 2.0138, |
| "step": 9650 |
| }, |
| { |
| "epoch": 0.2778165623064503, |
| "grad_norm": 1.3359375, |
| "learning_rate": 0.00014891342860517303, |
| "loss": 1.556, |
| "step": 9700 |
| }, |
| { |
| "epoch": 0.2792486064420505, |
| "grad_norm": 0.494140625, |
| "learning_rate": 0.00014861816463918745, |
| "loss": 2.7351, |
| "step": 9750 |
| }, |
| { |
| "epoch": 0.2806806505776508, |
| "grad_norm": 0.1953125, |
| "learning_rate": 0.00014832290067320184, |
| "loss": 2.0641, |
| "step": 9800 |
| }, |
| { |
| "epoch": 0.28211269471325107, |
| "grad_norm": 0.421875, |
| "learning_rate": 0.00014802763670721626, |
| "loss": 2.642, |
| "step": 9850 |
| }, |
| { |
| "epoch": 0.28354473884885134, |
| "grad_norm": 292.0, |
| "learning_rate": 0.00014773237274123065, |
| "loss": 2.5676, |
| "step": 9900 |
| }, |
| { |
| "epoch": 0.28497678298445156, |
| "grad_norm": 4.40625, |
| "learning_rate": 0.00014743710877524507, |
| "loss": 2.5438, |
| "step": 9950 |
| }, |
| { |
| "epoch": 0.28640882712005183, |
| "grad_norm": 129.0, |
| "learning_rate": 0.0001471418448092595, |
| "loss": 3.0776, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.28640882712005183, |
| "eval_accuracy": 0.9335, |
| "eval_loss": 0.34245818853378296, |
| "eval_macro_f1": 0.9327568911653952, |
| "eval_runtime": 181.524, |
| "eval_samples_per_second": 11.018, |
| "eval_steps_per_second": 11.018, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.2878408712556521, |
| "grad_norm": 9.25, |
| "learning_rate": 0.0001468465808432739, |
| "loss": 2.2061, |
| "step": 10050 |
| }, |
| { |
| "epoch": 0.2892729153912524, |
| "grad_norm": 0.10693359375, |
| "learning_rate": 0.00014655131687728832, |
| "loss": 2.6087, |
| "step": 10100 |
| }, |
| { |
| "epoch": 0.2907049595268526, |
| "grad_norm": 0.10107421875, |
| "learning_rate": 0.00014625605291130272, |
| "loss": 2.4579, |
| "step": 10150 |
| }, |
| { |
| "epoch": 0.2921370036624529, |
| "grad_norm": 0.33203125, |
| "learning_rate": 0.00014596078894531714, |
| "loss": 2.2037, |
| "step": 10200 |
| }, |
| { |
| "epoch": 0.29356904779805315, |
| "grad_norm": 0.1513671875, |
| "learning_rate": 0.00014566552497933153, |
| "loss": 2.3772, |
| "step": 10250 |
| }, |
| { |
| "epoch": 0.29500109193365337, |
| "grad_norm": 80.5, |
| "learning_rate": 0.00014537026101334595, |
| "loss": 2.2901, |
| "step": 10300 |
| }, |
| { |
| "epoch": 0.29643313606925364, |
| "grad_norm": 21.0, |
| "learning_rate": 0.00014507499704736034, |
| "loss": 2.1736, |
| "step": 10350 |
| }, |
| { |
| "epoch": 0.2978651802048539, |
| "grad_norm": 168.0, |
| "learning_rate": 0.00014477973308137476, |
| "loss": 2.3493, |
| "step": 10400 |
| }, |
| { |
| "epoch": 0.2992972243404542, |
| "grad_norm": 229.0, |
| "learning_rate": 0.00014448446911538915, |
| "loss": 2.4933, |
| "step": 10450 |
| }, |
| { |
| "epoch": 0.3007292684760544, |
| "grad_norm": 274.0, |
| "learning_rate": 0.00014418920514940357, |
| "loss": 2.6936, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.3021613126116547, |
| "grad_norm": 272.0, |
| "learning_rate": 0.00014389394118341799, |
| "loss": 3.3879, |
| "step": 10550 |
| }, |
| { |
| "epoch": 0.30359335674725496, |
| "grad_norm": 0.0103759765625, |
| "learning_rate": 0.00014359867721743238, |
| "loss": 1.6445, |
| "step": 10600 |
| }, |
| { |
| "epoch": 0.30502540088285524, |
| "grad_norm": 0.02880859375, |
| "learning_rate": 0.0001433034132514468, |
| "loss": 1.5567, |
| "step": 10650 |
| }, |
| { |
| "epoch": 0.30645744501845545, |
| "grad_norm": 292.0, |
| "learning_rate": 0.00014300814928546121, |
| "loss": 2.5947, |
| "step": 10700 |
| }, |
| { |
| "epoch": 0.30788948915405573, |
| "grad_norm": 97.5, |
| "learning_rate": 0.00014271288531947563, |
| "loss": 2.7865, |
| "step": 10750 |
| }, |
| { |
| "epoch": 0.309321533289656, |
| "grad_norm": 79.5, |
| "learning_rate": 0.00014241762135349002, |
| "loss": 2.1275, |
| "step": 10800 |
| }, |
| { |
| "epoch": 0.3107535774252562, |
| "grad_norm": 298.0, |
| "learning_rate": 0.00014212235738750444, |
| "loss": 2.0145, |
| "step": 10850 |
| }, |
| { |
| "epoch": 0.3121856215608565, |
| "grad_norm": 0.040771484375, |
| "learning_rate": 0.00014182709342151886, |
| "loss": 1.8322, |
| "step": 10900 |
| }, |
| { |
| "epoch": 0.31361766569645677, |
| "grad_norm": 142.0, |
| "learning_rate": 0.00014153182945553325, |
| "loss": 1.3864, |
| "step": 10950 |
| }, |
| { |
| "epoch": 0.31504970983205705, |
| "grad_norm": 1.7578125, |
| "learning_rate": 0.00014123656548954767, |
| "loss": 2.7755, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.31648175396765726, |
| "grad_norm": 82.0, |
| "learning_rate": 0.00014094130152356206, |
| "loss": 2.5528, |
| "step": 11050 |
| }, |
| { |
| "epoch": 0.31791379810325754, |
| "grad_norm": 80.0, |
| "learning_rate": 0.00014064603755757648, |
| "loss": 2.5284, |
| "step": 11100 |
| }, |
| { |
| "epoch": 0.3193458422388578, |
| "grad_norm": 0.05224609375, |
| "learning_rate": 0.00014035077359159087, |
| "loss": 2.8708, |
| "step": 11150 |
| }, |
| { |
| "epoch": 0.3207778863744581, |
| "grad_norm": 0.140625, |
| "learning_rate": 0.0001400555096256053, |
| "loss": 3.5295, |
| "step": 11200 |
| }, |
| { |
| "epoch": 0.3222099305100583, |
| "grad_norm": 0.050048828125, |
| "learning_rate": 0.0001397602456596197, |
| "loss": 3.325, |
| "step": 11250 |
| }, |
| { |
| "epoch": 0.3222099305100583, |
| "eval_accuracy": 0.94, |
| "eval_loss": 0.2819044888019562, |
| "eval_macro_f1": 0.9395225640341313, |
| "eval_runtime": 173.501, |
| "eval_samples_per_second": 11.527, |
| "eval_steps_per_second": 11.527, |
| "step": 11250 |
| }, |
| { |
| "epoch": 0.3236419746456586, |
| "grad_norm": 1.078125, |
| "learning_rate": 0.0001394649816936341, |
| "loss": 3.0985, |
| "step": 11300 |
| }, |
| { |
| "epoch": 0.32507401878125886, |
| "grad_norm": 116.0, |
| "learning_rate": 0.00013916971772764852, |
| "loss": 2.5793, |
| "step": 11350 |
| }, |
| { |
| "epoch": 0.3265060629168591, |
| "grad_norm": 0.1865234375, |
| "learning_rate": 0.00013887445376166291, |
| "loss": 2.5646, |
| "step": 11400 |
| }, |
| { |
| "epoch": 0.32793810705245935, |
| "grad_norm": 306.0, |
| "learning_rate": 0.00013857918979567733, |
| "loss": 1.9864, |
| "step": 11450 |
| }, |
| { |
| "epoch": 0.3293701511880596, |
| "grad_norm": 274.0, |
| "learning_rate": 0.00013828392582969175, |
| "loss": 1.8868, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.3308021953236599, |
| "grad_norm": 0.08984375, |
| "learning_rate": 0.00013798866186370617, |
| "loss": 2.5106, |
| "step": 11550 |
| }, |
| { |
| "epoch": 0.3322342394592601, |
| "grad_norm": 270.0, |
| "learning_rate": 0.0001376933978977206, |
| "loss": 1.8537, |
| "step": 11600 |
| }, |
| { |
| "epoch": 0.3336662835948604, |
| "grad_norm": 270.0, |
| "learning_rate": 0.00013739813393173498, |
| "loss": 2.3735, |
| "step": 11650 |
| }, |
| { |
| "epoch": 0.33509832773046067, |
| "grad_norm": 0.416015625, |
| "learning_rate": 0.0001371028699657494, |
| "loss": 2.0794, |
| "step": 11700 |
| }, |
| { |
| "epoch": 0.3365303718660609, |
| "grad_norm": 0.32421875, |
| "learning_rate": 0.0001368076059997638, |
| "loss": 2.5114, |
| "step": 11750 |
| }, |
| { |
| "epoch": 0.33796241600166116, |
| "grad_norm": 0.0595703125, |
| "learning_rate": 0.0001365123420337782, |
| "loss": 2.199, |
| "step": 11800 |
| }, |
| { |
| "epoch": 0.33939446013726143, |
| "grad_norm": 61.75, |
| "learning_rate": 0.0001362170780677926, |
| "loss": 2.64, |
| "step": 11850 |
| }, |
| { |
| "epoch": 0.3408265042728617, |
| "grad_norm": 9.0, |
| "learning_rate": 0.00013592181410180702, |
| "loss": 1.8553, |
| "step": 11900 |
| }, |
| { |
| "epoch": 0.3422585484084619, |
| "grad_norm": 177.0, |
| "learning_rate": 0.00013562655013582144, |
| "loss": 1.6963, |
| "step": 11950 |
| }, |
| { |
| "epoch": 0.3436905925440622, |
| "grad_norm": 326.0, |
| "learning_rate": 0.00013533128616983583, |
| "loss": 3.007, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.3451226366796625, |
| "grad_norm": 0.06787109375, |
| "learning_rate": 0.00013503602220385025, |
| "loss": 1.6731, |
| "step": 12050 |
| }, |
| { |
| "epoch": 0.34655468081526275, |
| "grad_norm": 5.28125, |
| "learning_rate": 0.00013474075823786464, |
| "loss": 2.5167, |
| "step": 12100 |
| }, |
| { |
| "epoch": 0.34798672495086297, |
| "grad_norm": 0.11865234375, |
| "learning_rate": 0.00013444549427187906, |
| "loss": 3.4208, |
| "step": 12150 |
| }, |
| { |
| "epoch": 0.34941876908646324, |
| "grad_norm": 0.1337890625, |
| "learning_rate": 0.00013415023030589348, |
| "loss": 1.3073, |
| "step": 12200 |
| }, |
| { |
| "epoch": 0.3508508132220635, |
| "grad_norm": 82.5, |
| "learning_rate": 0.0001338549663399079, |
| "loss": 2.3618, |
| "step": 12250 |
| }, |
| { |
| "epoch": 0.35228285735766374, |
| "grad_norm": 1.546875, |
| "learning_rate": 0.00013355970237392232, |
| "loss": 2.0756, |
| "step": 12300 |
| }, |
| { |
| "epoch": 0.353714901493264, |
| "grad_norm": 120.0, |
| "learning_rate": 0.0001332644384079367, |
| "loss": 2.2016, |
| "step": 12350 |
| }, |
| { |
| "epoch": 0.3551469456288643, |
| "grad_norm": 5.34375, |
| "learning_rate": 0.00013296917444195113, |
| "loss": 2.7446, |
| "step": 12400 |
| }, |
| { |
| "epoch": 0.35657898976446456, |
| "grad_norm": 0.057861328125, |
| "learning_rate": 0.00013267391047596552, |
| "loss": 2.3892, |
| "step": 12450 |
| }, |
| { |
| "epoch": 0.3580110339000648, |
| "grad_norm": 1.6640625, |
| "learning_rate": 0.00013237864650997994, |
| "loss": 2.3038, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.3580110339000648, |
| "eval_accuracy": 0.943, |
| "eval_loss": 0.2820850610733032, |
| "eval_macro_f1": 0.9423954094372721, |
| "eval_runtime": 180.0229, |
| "eval_samples_per_second": 11.11, |
| "eval_steps_per_second": 11.11, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.35944307803566505, |
| "grad_norm": 0.15625, |
| "learning_rate": 0.00013208338254399433, |
| "loss": 1.6647, |
| "step": 12550 |
| }, |
| { |
| "epoch": 0.36087512217126533, |
| "grad_norm": 0.06640625, |
| "learning_rate": 0.00013178811857800875, |
| "loss": 2.2751, |
| "step": 12600 |
| }, |
| { |
| "epoch": 0.3623071663068656, |
| "grad_norm": 0.984375, |
| "learning_rate": 0.00013149285461202316, |
| "loss": 2.7458, |
| "step": 12650 |
| }, |
| { |
| "epoch": 0.3637392104424658, |
| "grad_norm": 420.0, |
| "learning_rate": 0.00013119759064603756, |
| "loss": 2.2766, |
| "step": 12700 |
| }, |
| { |
| "epoch": 0.3651712545780661, |
| "grad_norm": 0.1103515625, |
| "learning_rate": 0.00013090232668005198, |
| "loss": 1.4627, |
| "step": 12750 |
| }, |
| { |
| "epoch": 0.36660329871366637, |
| "grad_norm": 0.25390625, |
| "learning_rate": 0.00013060706271406637, |
| "loss": 1.9492, |
| "step": 12800 |
| }, |
| { |
| "epoch": 0.3680353428492666, |
| "grad_norm": 0.232421875, |
| "learning_rate": 0.00013031179874808079, |
| "loss": 1.8883, |
| "step": 12850 |
| }, |
| { |
| "epoch": 0.36946738698486686, |
| "grad_norm": 0.205078125, |
| "learning_rate": 0.00013001653478209518, |
| "loss": 2.058, |
| "step": 12900 |
| }, |
| { |
| "epoch": 0.37089943112046714, |
| "grad_norm": 0.53515625, |
| "learning_rate": 0.0001297212708161096, |
| "loss": 2.7257, |
| "step": 12950 |
| }, |
| { |
| "epoch": 0.3723314752560674, |
| "grad_norm": 764.0, |
| "learning_rate": 0.00012942600685012401, |
| "loss": 2.4923, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.37376351939166763, |
| "grad_norm": 298.0, |
| "learning_rate": 0.00012913074288413843, |
| "loss": 1.8667, |
| "step": 13050 |
| }, |
| { |
| "epoch": 0.3751955635272679, |
| "grad_norm": 282.0, |
| "learning_rate": 0.00012883547891815285, |
| "loss": 2.4477, |
| "step": 13100 |
| }, |
| { |
| "epoch": 0.3766276076628682, |
| "grad_norm": 0.02587890625, |
| "learning_rate": 0.00012854021495216724, |
| "loss": 1.0699, |
| "step": 13150 |
| }, |
| { |
| "epoch": 0.37805965179846845, |
| "grad_norm": 160.0, |
| "learning_rate": 0.00012824495098618166, |
| "loss": 2.8487, |
| "step": 13200 |
| }, |
| { |
| "epoch": 0.3794916959340687, |
| "grad_norm": 216.0, |
| "learning_rate": 0.00012794968702019605, |
| "loss": 1.8886, |
| "step": 13250 |
| }, |
| { |
| "epoch": 0.38092374006966895, |
| "grad_norm": 302.0, |
| "learning_rate": 0.00012765442305421047, |
| "loss": 2.4619, |
| "step": 13300 |
| }, |
| { |
| "epoch": 0.3823557842052692, |
| "grad_norm": 172.0, |
| "learning_rate": 0.0001273591590882249, |
| "loss": 2.8237, |
| "step": 13350 |
| }, |
| { |
| "epoch": 0.38378782834086944, |
| "grad_norm": 91.5, |
| "learning_rate": 0.00012706389512223928, |
| "loss": 2.7431, |
| "step": 13400 |
| }, |
| { |
| "epoch": 0.3852198724764697, |
| "grad_norm": 0.275390625, |
| "learning_rate": 0.0001267686311562537, |
| "loss": 1.9888, |
| "step": 13450 |
| }, |
| { |
| "epoch": 0.38665191661207, |
| "grad_norm": 194.0, |
| "learning_rate": 0.0001264733671902681, |
| "loss": 2.5123, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.38808396074767026, |
| "grad_norm": 396.0, |
| "learning_rate": 0.0001261781032242825, |
| "loss": 1.9384, |
| "step": 13550 |
| }, |
| { |
| "epoch": 0.3895160048832705, |
| "grad_norm": 54.75, |
| "learning_rate": 0.0001258828392582969, |
| "loss": 1.9031, |
| "step": 13600 |
| }, |
| { |
| "epoch": 0.39094804901887076, |
| "grad_norm": 0.828125, |
| "learning_rate": 0.00012558757529231132, |
| "loss": 2.65, |
| "step": 13650 |
| }, |
| { |
| "epoch": 0.39238009315447103, |
| "grad_norm": 36.25, |
| "learning_rate": 0.00012529231132632574, |
| "loss": 2.2794, |
| "step": 13700 |
| }, |
| { |
| "epoch": 0.3938121372900713, |
| "grad_norm": 0.11962890625, |
| "learning_rate": 0.00012499704736034016, |
| "loss": 2.1189, |
| "step": 13750 |
| }, |
| { |
| "epoch": 0.3938121372900713, |
| "eval_accuracy": 0.9465, |
| "eval_loss": 0.31273505091667175, |
| "eval_macro_f1": 0.9457595736365828, |
| "eval_runtime": 172.8312, |
| "eval_samples_per_second": 11.572, |
| "eval_steps_per_second": 11.572, |
| "step": 13750 |
| }, |
| { |
| "epoch": 0.3952441814256715, |
| "grad_norm": 398.0, |
| "learning_rate": 0.00012470178339435458, |
| "loss": 2.5979, |
| "step": 13800 |
| }, |
| { |
| "epoch": 0.3966762255612718, |
| "grad_norm": 266.0, |
| "learning_rate": 0.00012440651942836897, |
| "loss": 2.8401, |
| "step": 13850 |
| }, |
| { |
| "epoch": 0.3981082696968721, |
| "grad_norm": 0.1884765625, |
| "learning_rate": 0.0001241112554623834, |
| "loss": 1.9365, |
| "step": 13900 |
| }, |
| { |
| "epoch": 0.3995403138324723, |
| "grad_norm": 0.055908203125, |
| "learning_rate": 0.00012381599149639778, |
| "loss": 0.9845, |
| "step": 13950 |
| }, |
| { |
| "epoch": 0.40097235796807257, |
| "grad_norm": 130.0, |
| "learning_rate": 0.0001235207275304122, |
| "loss": 2.9002, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.40240440210367284, |
| "grad_norm": 0.0045166015625, |
| "learning_rate": 0.00012322546356442662, |
| "loss": 2.1528, |
| "step": 14050 |
| }, |
| { |
| "epoch": 0.4038364462392731, |
| "grad_norm": 0.39453125, |
| "learning_rate": 0.000122930199598441, |
| "loss": 2.4575, |
| "step": 14100 |
| }, |
| { |
| "epoch": 0.40526849037487334, |
| "grad_norm": 241.0, |
| "learning_rate": 0.00012263493563245543, |
| "loss": 2.475, |
| "step": 14150 |
| }, |
| { |
| "epoch": 0.4067005345104736, |
| "grad_norm": 37.5, |
| "learning_rate": 0.00012233967166646982, |
| "loss": 1.6529, |
| "step": 14200 |
| }, |
| { |
| "epoch": 0.4081325786460739, |
| "grad_norm": 272.0, |
| "learning_rate": 0.00012204440770048424, |
| "loss": 2.922, |
| "step": 14250 |
| }, |
| { |
| "epoch": 0.4095646227816741, |
| "grad_norm": 3.734375, |
| "learning_rate": 0.00012174914373449864, |
| "loss": 2.2361, |
| "step": 14300 |
| }, |
| { |
| "epoch": 0.4109966669172744, |
| "grad_norm": 338.0, |
| "learning_rate": 0.00012145387976851306, |
| "loss": 1.5299, |
| "step": 14350 |
| }, |
| { |
| "epoch": 0.41242871105287465, |
| "grad_norm": 0.043212890625, |
| "learning_rate": 0.00012115861580252748, |
| "loss": 2.5728, |
| "step": 14400 |
| }, |
| { |
| "epoch": 0.4138607551884749, |
| "grad_norm": 0.15625, |
| "learning_rate": 0.00012086335183654187, |
| "loss": 2.122, |
| "step": 14450 |
| }, |
| { |
| "epoch": 0.41529279932407515, |
| "grad_norm": 0.008056640625, |
| "learning_rate": 0.00012056808787055629, |
| "loss": 2.4863, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.4167248434596754, |
| "grad_norm": 32.5, |
| "learning_rate": 0.00012027282390457068, |
| "loss": 3.3401, |
| "step": 14550 |
| }, |
| { |
| "epoch": 0.4181568875952757, |
| "grad_norm": 0.2490234375, |
| "learning_rate": 0.0001199775599385851, |
| "loss": 2.0398, |
| "step": 14600 |
| }, |
| { |
| "epoch": 0.41958893173087597, |
| "grad_norm": 0.326171875, |
| "learning_rate": 0.00011968229597259951, |
| "loss": 2.8364, |
| "step": 14650 |
| }, |
| { |
| "epoch": 0.4210209758664762, |
| "grad_norm": 0.15625, |
| "learning_rate": 0.00011938703200661391, |
| "loss": 1.4864, |
| "step": 14700 |
| }, |
| { |
| "epoch": 0.42245302000207646, |
| "grad_norm": 80.0, |
| "learning_rate": 0.00011909176804062833, |
| "loss": 2.3397, |
| "step": 14750 |
| }, |
| { |
| "epoch": 0.42388506413767674, |
| "grad_norm": 101.5, |
| "learning_rate": 0.00011879650407464274, |
| "loss": 3.3186, |
| "step": 14800 |
| }, |
| { |
| "epoch": 0.42531710827327696, |
| "grad_norm": 2.6875, |
| "learning_rate": 0.00011850124010865716, |
| "loss": 2.7605, |
| "step": 14850 |
| }, |
| { |
| "epoch": 0.42674915240887723, |
| "grad_norm": 95.0, |
| "learning_rate": 0.00011820597614267155, |
| "loss": 1.9215, |
| "step": 14900 |
| }, |
| { |
| "epoch": 0.4281811965444775, |
| "grad_norm": 0.65625, |
| "learning_rate": 0.00011791071217668597, |
| "loss": 2.0689, |
| "step": 14950 |
| }, |
| { |
| "epoch": 0.4296132406800778, |
| "grad_norm": 0.66015625, |
| "learning_rate": 0.00011761544821070036, |
| "loss": 3.5633, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.4296132406800778, |
| "eval_accuracy": 0.942, |
| "eval_loss": 0.27190178632736206, |
| "eval_macro_f1": 0.9413541591870516, |
| "eval_runtime": 181.3524, |
| "eval_samples_per_second": 11.028, |
| "eval_steps_per_second": 11.028, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.431045284815678, |
| "grad_norm": 42.25, |
| "learning_rate": 0.00011732018424471478, |
| "loss": 1.8073, |
| "step": 15050 |
| }, |
| { |
| "epoch": 0.4324773289512783, |
| "grad_norm": 0.22265625, |
| "learning_rate": 0.0001170249202787292, |
| "loss": 1.777, |
| "step": 15100 |
| }, |
| { |
| "epoch": 0.43390937308687855, |
| "grad_norm": 0.11328125, |
| "learning_rate": 0.0001167296563127436, |
| "loss": 1.9598, |
| "step": 15150 |
| }, |
| { |
| "epoch": 0.4353414172224788, |
| "grad_norm": 0.095703125, |
| "learning_rate": 0.00011643439234675802, |
| "loss": 2.7789, |
| "step": 15200 |
| }, |
| { |
| "epoch": 0.43677346135807904, |
| "grad_norm": 0.8125, |
| "learning_rate": 0.00011613912838077241, |
| "loss": 2.3985, |
| "step": 15250 |
| }, |
| { |
| "epoch": 0.4382055054936793, |
| "grad_norm": 0.22265625, |
| "learning_rate": 0.00011584386441478683, |
| "loss": 1.6076, |
| "step": 15300 |
| }, |
| { |
| "epoch": 0.4396375496292796, |
| "grad_norm": 0.578125, |
| "learning_rate": 0.00011554860044880122, |
| "loss": 2.9266, |
| "step": 15350 |
| }, |
| { |
| "epoch": 0.4410695937648798, |
| "grad_norm": 0.0130615234375, |
| "learning_rate": 0.00011525333648281564, |
| "loss": 1.388, |
| "step": 15400 |
| }, |
| { |
| "epoch": 0.4425016379004801, |
| "grad_norm": 88.5, |
| "learning_rate": 0.00011495807251683006, |
| "loss": 2.6264, |
| "step": 15450 |
| }, |
| { |
| "epoch": 0.44393368203608036, |
| "grad_norm": 0.123046875, |
| "learning_rate": 0.00011466280855084446, |
| "loss": 1.9447, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.44536572617168063, |
| "grad_norm": 318.0, |
| "learning_rate": 0.00011436754458485888, |
| "loss": 1.4494, |
| "step": 15550 |
| }, |
| { |
| "epoch": 0.44679777030728085, |
| "grad_norm": 0.123046875, |
| "learning_rate": 0.00011407228061887327, |
| "loss": 2.4483, |
| "step": 15600 |
| }, |
| { |
| "epoch": 0.4482298144428811, |
| "grad_norm": 260.0, |
| "learning_rate": 0.00011377701665288769, |
| "loss": 2.8899, |
| "step": 15650 |
| }, |
| { |
| "epoch": 0.4496618585784814, |
| "grad_norm": 0.07666015625, |
| "learning_rate": 0.00011348175268690208, |
| "loss": 2.0935, |
| "step": 15700 |
| }, |
| { |
| "epoch": 0.4510939027140817, |
| "grad_norm": 0.047119140625, |
| "learning_rate": 0.0001131864887209165, |
| "loss": 3.0298, |
| "step": 15750 |
| }, |
| { |
| "epoch": 0.4525259468496819, |
| "grad_norm": 18.25, |
| "learning_rate": 0.00011289122475493092, |
| "loss": 1.8288, |
| "step": 15800 |
| }, |
| { |
| "epoch": 0.45395799098528217, |
| "grad_norm": 11.75, |
| "learning_rate": 0.00011259596078894533, |
| "loss": 2.9182, |
| "step": 15850 |
| }, |
| { |
| "epoch": 0.45539003512088244, |
| "grad_norm": 0.1416015625, |
| "learning_rate": 0.00011230069682295975, |
| "loss": 1.9045, |
| "step": 15900 |
| }, |
| { |
| "epoch": 0.45682207925648266, |
| "grad_norm": 0.1982421875, |
| "learning_rate": 0.00011200543285697414, |
| "loss": 1.7933, |
| "step": 15950 |
| }, |
| { |
| "epoch": 0.45825412339208293, |
| "grad_norm": 201.0, |
| "learning_rate": 0.00011171016889098856, |
| "loss": 2.4752, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.4596861675276832, |
| "grad_norm": 284.0, |
| "learning_rate": 0.00011141490492500295, |
| "loss": 1.7394, |
| "step": 16050 |
| }, |
| { |
| "epoch": 0.4611182116632835, |
| "grad_norm": 0.1162109375, |
| "learning_rate": 0.00011111964095901737, |
| "loss": 1.3612, |
| "step": 16100 |
| }, |
| { |
| "epoch": 0.4625502557988837, |
| "grad_norm": 0.30859375, |
| "learning_rate": 0.00011082437699303178, |
| "loss": 2.6066, |
| "step": 16150 |
| }, |
| { |
| "epoch": 0.463982299934484, |
| "grad_norm": 23.75, |
| "learning_rate": 0.00011052911302704618, |
| "loss": 2.6868, |
| "step": 16200 |
| }, |
| { |
| "epoch": 0.46541434407008425, |
| "grad_norm": 1.5546875, |
| "learning_rate": 0.0001102338490610606, |
| "loss": 2.6526, |
| "step": 16250 |
| }, |
| { |
| "epoch": 0.46541434407008425, |
| "eval_accuracy": 0.9445, |
| "eval_loss": 0.2859993577003479, |
| "eval_macro_f1": 0.9439942443541156, |
| "eval_runtime": 175.6054, |
| "eval_samples_per_second": 11.389, |
| "eval_steps_per_second": 11.389, |
| "step": 16250 |
| }, |
| { |
| "epoch": 0.4668463882056845, |
| "grad_norm": 0.11181640625, |
| "learning_rate": 0.000109938585095075, |
| "loss": 2.041, |
| "step": 16300 |
| }, |
| { |
| "epoch": 0.46827843234128474, |
| "grad_norm": 8.3125, |
| "learning_rate": 0.00010964332112908942, |
| "loss": 2.3374, |
| "step": 16350 |
| }, |
| { |
| "epoch": 0.469710476476885, |
| "grad_norm": 268.0, |
| "learning_rate": 0.00010934805716310381, |
| "loss": 3.461, |
| "step": 16400 |
| }, |
| { |
| "epoch": 0.4711425206124853, |
| "grad_norm": 270.0, |
| "learning_rate": 0.00010905279319711823, |
| "loss": 1.5168, |
| "step": 16450 |
| }, |
| { |
| "epoch": 0.4725745647480855, |
| "grad_norm": 1.6171875, |
| "learning_rate": 0.00010875752923113265, |
| "loss": 1.9156, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.4740066088836858, |
| "grad_norm": 3.25, |
| "learning_rate": 0.00010846226526514704, |
| "loss": 1.9988, |
| "step": 16550 |
| }, |
| { |
| "epoch": 0.47543865301928606, |
| "grad_norm": 266.0, |
| "learning_rate": 0.00010816700129916146, |
| "loss": 1.9911, |
| "step": 16600 |
| }, |
| { |
| "epoch": 0.47687069715488634, |
| "grad_norm": 0.45703125, |
| "learning_rate": 0.00010787173733317586, |
| "loss": 2.2805, |
| "step": 16650 |
| }, |
| { |
| "epoch": 0.47830274129048655, |
| "grad_norm": 0.08740234375, |
| "learning_rate": 0.00010757647336719028, |
| "loss": 2.3786, |
| "step": 16700 |
| }, |
| { |
| "epoch": 0.47973478542608683, |
| "grad_norm": 0.0263671875, |
| "learning_rate": 0.00010728120940120467, |
| "loss": 2.0964, |
| "step": 16750 |
| }, |
| { |
| "epoch": 0.4811668295616871, |
| "grad_norm": 288.0, |
| "learning_rate": 0.00010698594543521909, |
| "loss": 2.5816, |
| "step": 16800 |
| }, |
| { |
| "epoch": 0.4825988736972873, |
| "grad_norm": 0.2138671875, |
| "learning_rate": 0.00010669068146923348, |
| "loss": 1.2136, |
| "step": 16850 |
| }, |
| { |
| "epoch": 0.4840309178328876, |
| "grad_norm": 1.1796875, |
| "learning_rate": 0.0001063954175032479, |
| "loss": 2.2321, |
| "step": 16900 |
| }, |
| { |
| "epoch": 0.48546296196848787, |
| "grad_norm": 0.259765625, |
| "learning_rate": 0.00010610015353726232, |
| "loss": 2.7485, |
| "step": 16950 |
| }, |
| { |
| "epoch": 0.48689500610408815, |
| "grad_norm": 75.5, |
| "learning_rate": 0.00010580488957127673, |
| "loss": 3.0909, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.48832705023968836, |
| "grad_norm": 86.0, |
| "learning_rate": 0.00010550962560529115, |
| "loss": 2.0178, |
| "step": 17050 |
| }, |
| { |
| "epoch": 0.48975909437528864, |
| "grad_norm": 0.98046875, |
| "learning_rate": 0.00010521436163930554, |
| "loss": 2.0193, |
| "step": 17100 |
| }, |
| { |
| "epoch": 0.4911911385108889, |
| "grad_norm": 68.0, |
| "learning_rate": 0.00010491909767331996, |
| "loss": 2.178, |
| "step": 17150 |
| }, |
| { |
| "epoch": 0.4926231826464892, |
| "grad_norm": 0.375, |
| "learning_rate": 0.00010462383370733435, |
| "loss": 2.4366, |
| "step": 17200 |
| }, |
| { |
| "epoch": 0.4940552267820894, |
| "grad_norm": 0.322265625, |
| "learning_rate": 0.00010432856974134877, |
| "loss": 2.7989, |
| "step": 17250 |
| }, |
| { |
| "epoch": 0.4954872709176897, |
| "grad_norm": 0.1513671875, |
| "learning_rate": 0.00010403330577536318, |
| "loss": 1.8681, |
| "step": 17300 |
| }, |
| { |
| "epoch": 0.49691931505328996, |
| "grad_norm": 0.08935546875, |
| "learning_rate": 0.00010373804180937759, |
| "loss": 1.9697, |
| "step": 17350 |
| }, |
| { |
| "epoch": 0.4983513591888902, |
| "grad_norm": 0.1328125, |
| "learning_rate": 0.00010344277784339201, |
| "loss": 1.7415, |
| "step": 17400 |
| }, |
| { |
| "epoch": 0.49978340332449045, |
| "grad_norm": 0.65234375, |
| "learning_rate": 0.0001031475138774064, |
| "loss": 1.6849, |
| "step": 17450 |
| }, |
| { |
| "epoch": 0.5012154474600907, |
| "grad_norm": 86.5, |
| "learning_rate": 0.00010285224991142082, |
| "loss": 2.1474, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.5012154474600907, |
| "eval_accuracy": 0.947, |
| "eval_loss": 0.2907390892505646, |
| "eval_macro_f1": 0.9463529866080697, |
| "eval_runtime": 172.6878, |
| "eval_samples_per_second": 11.582, |
| "eval_steps_per_second": 11.582, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.502647491595691, |
| "grad_norm": 472.0, |
| "learning_rate": 0.00010255698594543521, |
| "loss": 1.9173, |
| "step": 17550 |
| }, |
| { |
| "epoch": 0.5040795357312913, |
| "grad_norm": 1.3828125, |
| "learning_rate": 0.00010226172197944963, |
| "loss": 3.1869, |
| "step": 17600 |
| }, |
| { |
| "epoch": 0.5055115798668915, |
| "grad_norm": 145.0, |
| "learning_rate": 0.00010196645801346405, |
| "loss": 2.5482, |
| "step": 17650 |
| }, |
| { |
| "epoch": 0.5069436240024917, |
| "grad_norm": 0.58203125, |
| "learning_rate": 0.00010167119404747844, |
| "loss": 2.8567, |
| "step": 17700 |
| }, |
| { |
| "epoch": 0.508375668138092, |
| "grad_norm": 0.0888671875, |
| "learning_rate": 0.00010137593008149286, |
| "loss": 1.7268, |
| "step": 17750 |
| }, |
| { |
| "epoch": 0.5098077122736923, |
| "grad_norm": 286.0, |
| "learning_rate": 0.00010108066611550726, |
| "loss": 2.2268, |
| "step": 17800 |
| }, |
| { |
| "epoch": 0.5112397564092925, |
| "grad_norm": 169.0, |
| "learning_rate": 0.00010078540214952168, |
| "loss": 1.8245, |
| "step": 17850 |
| }, |
| { |
| "epoch": 0.5126718005448928, |
| "grad_norm": 0.1923828125, |
| "learning_rate": 0.00010049013818353607, |
| "loss": 2.3801, |
| "step": 17900 |
| }, |
| { |
| "epoch": 0.5141038446804931, |
| "grad_norm": 77.0, |
| "learning_rate": 0.00010019487421755049, |
| "loss": 2.3412, |
| "step": 17950 |
| }, |
| { |
| "epoch": 0.5155358888160934, |
| "grad_norm": 328.0, |
| "learning_rate": 9.98996102515649e-05, |
| "loss": 3.1564, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.5169679329516935, |
| "grad_norm": 0.57421875, |
| "learning_rate": 9.96043462855793e-05, |
| "loss": 2.0409, |
| "step": 18050 |
| }, |
| { |
| "epoch": 0.5183999770872938, |
| "grad_norm": 0.8828125, |
| "learning_rate": 9.930908231959372e-05, |
| "loss": 1.8093, |
| "step": 18100 |
| }, |
| { |
| "epoch": 0.5198320212228941, |
| "grad_norm": 0.263671875, |
| "learning_rate": 9.901381835360814e-05, |
| "loss": 2.1228, |
| "step": 18150 |
| }, |
| { |
| "epoch": 0.5212640653584943, |
| "grad_norm": 13.625, |
| "learning_rate": 9.871855438762255e-05, |
| "loss": 1.6072, |
| "step": 18200 |
| }, |
| { |
| "epoch": 0.5226961094940946, |
| "grad_norm": 176.0, |
| "learning_rate": 9.842329042163695e-05, |
| "loss": 2.1088, |
| "step": 18250 |
| }, |
| { |
| "epoch": 0.5241281536296949, |
| "grad_norm": 0.09228515625, |
| "learning_rate": 9.812802645565136e-05, |
| "loss": 2.2985, |
| "step": 18300 |
| }, |
| { |
| "epoch": 0.5255601977652952, |
| "grad_norm": 5.9375, |
| "learning_rate": 9.783276248966576e-05, |
| "loss": 2.8687, |
| "step": 18350 |
| }, |
| { |
| "epoch": 0.5269922419008953, |
| "grad_norm": 0.1142578125, |
| "learning_rate": 9.753749852368017e-05, |
| "loss": 1.9855, |
| "step": 18400 |
| }, |
| { |
| "epoch": 0.5284242860364956, |
| "grad_norm": 1.03125, |
| "learning_rate": 9.724223455769457e-05, |
| "loss": 2.5827, |
| "step": 18450 |
| }, |
| { |
| "epoch": 0.5298563301720959, |
| "grad_norm": 270.0, |
| "learning_rate": 9.694697059170899e-05, |
| "loss": 1.9905, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.5312883743076962, |
| "grad_norm": 0.2392578125, |
| "learning_rate": 9.665170662572341e-05, |
| "loss": 1.9516, |
| "step": 18550 |
| }, |
| { |
| "epoch": 0.5327204184432964, |
| "grad_norm": 116.5, |
| "learning_rate": 9.635644265973781e-05, |
| "loss": 1.7887, |
| "step": 18600 |
| }, |
| { |
| "epoch": 0.5341524625788967, |
| "grad_norm": 0.0306396484375, |
| "learning_rate": 9.606117869375222e-05, |
| "loss": 1.8686, |
| "step": 18650 |
| }, |
| { |
| "epoch": 0.535584506714497, |
| "grad_norm": 0.7109375, |
| "learning_rate": 9.576591472776662e-05, |
| "loss": 1.7828, |
| "step": 18700 |
| }, |
| { |
| "epoch": 0.5370165508500973, |
| "grad_norm": 0.92578125, |
| "learning_rate": 9.547065076178103e-05, |
| "loss": 1.8761, |
| "step": 18750 |
| }, |
| { |
| "epoch": 0.5370165508500973, |
| "eval_accuracy": 0.9495, |
| "eval_loss": 0.26102420687675476, |
| "eval_macro_f1": 0.9488861373782008, |
| "eval_runtime": 172.7705, |
| "eval_samples_per_second": 11.576, |
| "eval_steps_per_second": 11.576, |
| "step": 18750 |
| }, |
| { |
| "epoch": 0.5384485949856974, |
| "grad_norm": 0.1416015625, |
| "learning_rate": 9.517538679579543e-05, |
| "loss": 2.3765, |
| "step": 18800 |
| }, |
| { |
| "epoch": 0.5398806391212977, |
| "grad_norm": 258.0, |
| "learning_rate": 9.488012282980985e-05, |
| "loss": 1.5944, |
| "step": 18850 |
| }, |
| { |
| "epoch": 0.541312683256898, |
| "grad_norm": 106.0, |
| "learning_rate": 9.458485886382427e-05, |
| "loss": 2.4606, |
| "step": 18900 |
| }, |
| { |
| "epoch": 0.5427447273924982, |
| "grad_norm": 92.5, |
| "learning_rate": 9.428959489783868e-05, |
| "loss": 2.6863, |
| "step": 18950 |
| }, |
| { |
| "epoch": 0.5441767715280985, |
| "grad_norm": 207.0, |
| "learning_rate": 9.399433093185308e-05, |
| "loss": 1.9462, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.5456088156636988, |
| "grad_norm": 82.5, |
| "learning_rate": 9.369906696586749e-05, |
| "loss": 2.8243, |
| "step": 19050 |
| }, |
| { |
| "epoch": 0.5470408597992991, |
| "grad_norm": 0.1572265625, |
| "learning_rate": 9.340380299988189e-05, |
| "loss": 1.8158, |
| "step": 19100 |
| }, |
| { |
| "epoch": 0.5484729039348992, |
| "grad_norm": 0.4609375, |
| "learning_rate": 9.31085390338963e-05, |
| "loss": 2.7108, |
| "step": 19150 |
| }, |
| { |
| "epoch": 0.5499049480704995, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.281327506791072e-05, |
| "loss": 1.9835, |
| "step": 19200 |
| }, |
| { |
| "epoch": 0.5513369922060998, |
| "grad_norm": 0.039794921875, |
| "learning_rate": 9.251801110192512e-05, |
| "loss": 2.0608, |
| "step": 19250 |
| }, |
| { |
| "epoch": 0.5527690363417, |
| "grad_norm": 1.265625, |
| "learning_rate": 9.222274713593954e-05, |
| "loss": 1.9135, |
| "step": 19300 |
| }, |
| { |
| "epoch": 0.5542010804773003, |
| "grad_norm": 0.030517578125, |
| "learning_rate": 9.192748316995395e-05, |
| "loss": 2.5224, |
| "step": 19350 |
| }, |
| { |
| "epoch": 0.5556331246129006, |
| "grad_norm": 332.0, |
| "learning_rate": 9.163221920396835e-05, |
| "loss": 3.2123, |
| "step": 19400 |
| }, |
| { |
| "epoch": 0.5570651687485009, |
| "grad_norm": 0.046142578125, |
| "learning_rate": 9.133695523798276e-05, |
| "loss": 1.9807, |
| "step": 19450 |
| }, |
| { |
| "epoch": 0.558497212884101, |
| "grad_norm": 0.036376953125, |
| "learning_rate": 9.104169127199716e-05, |
| "loss": 2.8211, |
| "step": 19500 |
| }, |
| { |
| "epoch": 0.5599292570197013, |
| "grad_norm": 0.2412109375, |
| "learning_rate": 9.074642730601158e-05, |
| "loss": 2.7913, |
| "step": 19550 |
| }, |
| { |
| "epoch": 0.5613613011553016, |
| "grad_norm": 0.087890625, |
| "learning_rate": 9.045116334002599e-05, |
| "loss": 1.5528, |
| "step": 19600 |
| }, |
| { |
| "epoch": 0.5627933452909019, |
| "grad_norm": 0.21484375, |
| "learning_rate": 9.01558993740404e-05, |
| "loss": 2.3024, |
| "step": 19650 |
| }, |
| { |
| "epoch": 0.5642253894265021, |
| "grad_norm": 0.271484375, |
| "learning_rate": 8.986063540805481e-05, |
| "loss": 1.305, |
| "step": 19700 |
| }, |
| { |
| "epoch": 0.5656574335621024, |
| "grad_norm": 0.373046875, |
| "learning_rate": 8.956537144206921e-05, |
| "loss": 2.1656, |
| "step": 19750 |
| }, |
| { |
| "epoch": 0.5670894776977027, |
| "grad_norm": 84.5, |
| "learning_rate": 8.927010747608362e-05, |
| "loss": 1.6671, |
| "step": 19800 |
| }, |
| { |
| "epoch": 0.568521521833303, |
| "grad_norm": 112.0, |
| "learning_rate": 8.897484351009802e-05, |
| "loss": 2.4715, |
| "step": 19850 |
| }, |
| { |
| "epoch": 0.5699535659689031, |
| "grad_norm": 0.130859375, |
| "learning_rate": 8.867957954411244e-05, |
| "loss": 1.7577, |
| "step": 19900 |
| }, |
| { |
| "epoch": 0.5713856101045034, |
| "grad_norm": 0.1357421875, |
| "learning_rate": 8.838431557812685e-05, |
| "loss": 1.6778, |
| "step": 19950 |
| }, |
| { |
| "epoch": 0.5728176542401037, |
| "grad_norm": 0.07080078125, |
| "learning_rate": 8.808905161214125e-05, |
| "loss": 1.4789, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.5728176542401037, |
| "eval_accuracy": 0.949, |
| "eval_loss": 0.2858292758464813, |
| "eval_macro_f1": 0.9484543460104051, |
| "eval_runtime": 172.7421, |
| "eval_samples_per_second": 11.578, |
| "eval_steps_per_second": 11.578, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.5742496983757039, |
| "grad_norm": 0.140625, |
| "learning_rate": 8.779378764615567e-05, |
| "loss": 1.6797, |
| "step": 20050 |
| }, |
| { |
| "epoch": 0.5756817425113042, |
| "grad_norm": 0.115234375, |
| "learning_rate": 8.749852368017008e-05, |
| "loss": 1.5026, |
| "step": 20100 |
| }, |
| { |
| "epoch": 0.5771137866469045, |
| "grad_norm": 0.02880859375, |
| "learning_rate": 8.720325971418448e-05, |
| "loss": 2.1316, |
| "step": 20150 |
| }, |
| { |
| "epoch": 0.5785458307825048, |
| "grad_norm": 0.08544921875, |
| "learning_rate": 8.690799574819889e-05, |
| "loss": 1.7517, |
| "step": 20200 |
| }, |
| { |
| "epoch": 0.5799778749181049, |
| "grad_norm": 82.0, |
| "learning_rate": 8.661273178221331e-05, |
| "loss": 2.4167, |
| "step": 20250 |
| }, |
| { |
| "epoch": 0.5814099190537052, |
| "grad_norm": 0.138671875, |
| "learning_rate": 8.631746781622771e-05, |
| "loss": 1.8565, |
| "step": 20300 |
| }, |
| { |
| "epoch": 0.5828419631893055, |
| "grad_norm": 34.25, |
| "learning_rate": 8.602220385024212e-05, |
| "loss": 1.9747, |
| "step": 20350 |
| }, |
| { |
| "epoch": 0.5842740073249058, |
| "grad_norm": 91.0, |
| "learning_rate": 8.572693988425654e-05, |
| "loss": 2.3284, |
| "step": 20400 |
| }, |
| { |
| "epoch": 0.585706051460506, |
| "grad_norm": 0.80859375, |
| "learning_rate": 8.543167591827094e-05, |
| "loss": 2.1788, |
| "step": 20450 |
| }, |
| { |
| "epoch": 0.5871380955961063, |
| "grad_norm": 446.0, |
| "learning_rate": 8.513641195228535e-05, |
| "loss": 1.6187, |
| "step": 20500 |
| }, |
| { |
| "epoch": 0.5885701397317066, |
| "grad_norm": 43.75, |
| "learning_rate": 8.484114798629975e-05, |
| "loss": 1.6338, |
| "step": 20550 |
| }, |
| { |
| "epoch": 0.5900021838673067, |
| "grad_norm": 0.91015625, |
| "learning_rate": 8.454588402031417e-05, |
| "loss": 2.424, |
| "step": 20600 |
| }, |
| { |
| "epoch": 0.591434228002907, |
| "grad_norm": 0.44921875, |
| "learning_rate": 8.425062005432858e-05, |
| "loss": 2.3043, |
| "step": 20650 |
| }, |
| { |
| "epoch": 0.5928662721385073, |
| "grad_norm": 268.0, |
| "learning_rate": 8.395535608834298e-05, |
| "loss": 2.5707, |
| "step": 20700 |
| }, |
| { |
| "epoch": 0.5942983162741076, |
| "grad_norm": 4.125, |
| "learning_rate": 8.366009212235739e-05, |
| "loss": 1.9577, |
| "step": 20750 |
| }, |
| { |
| "epoch": 0.5957303604097078, |
| "grad_norm": 5.0, |
| "learning_rate": 8.33648281563718e-05, |
| "loss": 0.7482, |
| "step": 20800 |
| }, |
| { |
| "epoch": 0.5971624045453081, |
| "grad_norm": 0.04052734375, |
| "learning_rate": 8.306956419038621e-05, |
| "loss": 1.5055, |
| "step": 20850 |
| }, |
| { |
| "epoch": 0.5985944486809084, |
| "grad_norm": 1.109375, |
| "learning_rate": 8.277430022440061e-05, |
| "loss": 3.3671, |
| "step": 20900 |
| }, |
| { |
| "epoch": 0.6000264928165085, |
| "grad_norm": 0.162109375, |
| "learning_rate": 8.247903625841503e-05, |
| "loss": 2.0574, |
| "step": 20950 |
| }, |
| { |
| "epoch": 0.6014585369521088, |
| "grad_norm": 0.03173828125, |
| "learning_rate": 8.218377229242944e-05, |
| "loss": 2.1942, |
| "step": 21000 |
| }, |
| { |
| "epoch": 0.6028905810877091, |
| "grad_norm": 0.251953125, |
| "learning_rate": 8.188850832644384e-05, |
| "loss": 1.6319, |
| "step": 21050 |
| }, |
| { |
| "epoch": 0.6043226252233094, |
| "grad_norm": 86.5, |
| "learning_rate": 8.159324436045825e-05, |
| "loss": 2.1558, |
| "step": 21100 |
| }, |
| { |
| "epoch": 0.6057546693589096, |
| "grad_norm": 2.984375, |
| "learning_rate": 8.129798039447267e-05, |
| "loss": 2.2353, |
| "step": 21150 |
| }, |
| { |
| "epoch": 0.6071867134945099, |
| "grad_norm": 0.0908203125, |
| "learning_rate": 8.100271642848707e-05, |
| "loss": 1.4975, |
| "step": 21200 |
| }, |
| { |
| "epoch": 0.6086187576301102, |
| "grad_norm": 98.0, |
| "learning_rate": 8.070745246250148e-05, |
| "loss": 2.5975, |
| "step": 21250 |
| }, |
| { |
| "epoch": 0.6086187576301102, |
| "eval_accuracy": 0.948, |
| "eval_loss": 0.2741381525993347, |
| "eval_macro_f1": 0.9473973559594594, |
| "eval_runtime": 172.6111, |
| "eval_samples_per_second": 11.587, |
| "eval_steps_per_second": 11.587, |
| "step": 21250 |
| }, |
| { |
| "epoch": 0.6100508017657105, |
| "grad_norm": 328.0, |
| "learning_rate": 8.04121884965159e-05, |
| "loss": 2.4534, |
| "step": 21300 |
| }, |
| { |
| "epoch": 0.6114828459013106, |
| "grad_norm": 11.125, |
| "learning_rate": 8.01169245305303e-05, |
| "loss": 2.1319, |
| "step": 21350 |
| }, |
| { |
| "epoch": 0.6129148900369109, |
| "grad_norm": 0.08642578125, |
| "learning_rate": 7.982166056454471e-05, |
| "loss": 2.4199, |
| "step": 21400 |
| }, |
| { |
| "epoch": 0.6143469341725112, |
| "grad_norm": 0.18359375, |
| "learning_rate": 7.952639659855911e-05, |
| "loss": 1.7527, |
| "step": 21450 |
| }, |
| { |
| "epoch": 0.6157789783081115, |
| "grad_norm": 0.458984375, |
| "learning_rate": 7.923113263257352e-05, |
| "loss": 2.4992, |
| "step": 21500 |
| }, |
| { |
| "epoch": 0.6172110224437117, |
| "grad_norm": 0.671875, |
| "learning_rate": 7.893586866658794e-05, |
| "loss": 2.5082, |
| "step": 21550 |
| }, |
| { |
| "epoch": 0.618643066579312, |
| "grad_norm": 272.0, |
| "learning_rate": 7.864060470060234e-05, |
| "loss": 2.2187, |
| "step": 21600 |
| }, |
| { |
| "epoch": 0.6200751107149123, |
| "grad_norm": 310.0, |
| "learning_rate": 7.834534073461675e-05, |
| "loss": 3.006, |
| "step": 21650 |
| }, |
| { |
| "epoch": 0.6215071548505124, |
| "grad_norm": 536.0, |
| "learning_rate": 7.805007676863117e-05, |
| "loss": 2.4535, |
| "step": 21700 |
| }, |
| { |
| "epoch": 0.6229391989861127, |
| "grad_norm": 408.0, |
| "learning_rate": 7.775481280264557e-05, |
| "loss": 2.376, |
| "step": 21750 |
| }, |
| { |
| "epoch": 0.624371243121713, |
| "grad_norm": 0.123046875, |
| "learning_rate": 7.745954883665998e-05, |
| "loss": 1.3044, |
| "step": 21800 |
| }, |
| { |
| "epoch": 0.6258032872573133, |
| "grad_norm": 0.70703125, |
| "learning_rate": 7.716428487067438e-05, |
| "loss": 1.9046, |
| "step": 21850 |
| }, |
| { |
| "epoch": 0.6272353313929135, |
| "grad_norm": 0.189453125, |
| "learning_rate": 7.68690209046888e-05, |
| "loss": 1.8825, |
| "step": 21900 |
| }, |
| { |
| "epoch": 0.6286673755285138, |
| "grad_norm": 0.251953125, |
| "learning_rate": 7.65737569387032e-05, |
| "loss": 2.353, |
| "step": 21950 |
| }, |
| { |
| "epoch": 0.6300994196641141, |
| "grad_norm": 0.0301513671875, |
| "learning_rate": 7.627849297271761e-05, |
| "loss": 1.7222, |
| "step": 22000 |
| }, |
| { |
| "epoch": 0.6315314637997143, |
| "grad_norm": 0.126953125, |
| "learning_rate": 7.598322900673203e-05, |
| "loss": 2.4583, |
| "step": 22050 |
| }, |
| { |
| "epoch": 0.6329635079353145, |
| "grad_norm": 1.0234375, |
| "learning_rate": 7.568796504074643e-05, |
| "loss": 1.9643, |
| "step": 22100 |
| }, |
| { |
| "epoch": 0.6343955520709148, |
| "grad_norm": 70.5, |
| "learning_rate": 7.539270107476084e-05, |
| "loss": 1.6712, |
| "step": 22150 |
| }, |
| { |
| "epoch": 0.6358275962065151, |
| "grad_norm": 8.0, |
| "learning_rate": 7.509743710877524e-05, |
| "loss": 2.1964, |
| "step": 22200 |
| }, |
| { |
| "epoch": 0.6372596403421154, |
| "grad_norm": 11.625, |
| "learning_rate": 7.480217314278965e-05, |
| "loss": 2.0319, |
| "step": 22250 |
| }, |
| { |
| "epoch": 0.6386916844777156, |
| "grad_norm": 0.09033203125, |
| "learning_rate": 7.450690917680407e-05, |
| "loss": 3.1062, |
| "step": 22300 |
| }, |
| { |
| "epoch": 0.6401237286133159, |
| "grad_norm": 490.0, |
| "learning_rate": 7.421164521081847e-05, |
| "loss": 2.028, |
| "step": 22350 |
| }, |
| { |
| "epoch": 0.6415557727489162, |
| "grad_norm": 688.0, |
| "learning_rate": 7.391638124483289e-05, |
| "loss": 1.6743, |
| "step": 22400 |
| }, |
| { |
| "epoch": 0.6429878168845163, |
| "grad_norm": 0.2578125, |
| "learning_rate": 7.36211172788473e-05, |
| "loss": 1.3926, |
| "step": 22450 |
| }, |
| { |
| "epoch": 0.6444198610201166, |
| "grad_norm": 278.0, |
| "learning_rate": 7.33258533128617e-05, |
| "loss": 2.073, |
| "step": 22500 |
| }, |
| { |
| "epoch": 0.6444198610201166, |
| "eval_accuracy": 0.9495, |
| "eval_loss": 0.2617259919643402, |
| "eval_macro_f1": 0.9489699460568645, |
| "eval_runtime": 172.6662, |
| "eval_samples_per_second": 11.583, |
| "eval_steps_per_second": 11.583, |
| "step": 22500 |
| }, |
| { |
| "epoch": 0.6458519051557169, |
| "grad_norm": 0.07373046875, |
| "learning_rate": 7.303058934687611e-05, |
| "loss": 2.099, |
| "step": 22550 |
| }, |
| { |
| "epoch": 0.6472839492913172, |
| "grad_norm": 0.5859375, |
| "learning_rate": 7.273532538089051e-05, |
| "loss": 2.2826, |
| "step": 22600 |
| }, |
| { |
| "epoch": 0.6487159934269174, |
| "grad_norm": 79.5, |
| "learning_rate": 7.244006141490493e-05, |
| "loss": 1.377, |
| "step": 22650 |
| }, |
| { |
| "epoch": 0.6501480375625177, |
| "grad_norm": 2.265625, |
| "learning_rate": 7.214479744891934e-05, |
| "loss": 1.9826, |
| "step": 22700 |
| }, |
| { |
| "epoch": 0.651580081698118, |
| "grad_norm": 0.37109375, |
| "learning_rate": 7.184953348293376e-05, |
| "loss": 2.2446, |
| "step": 22750 |
| }, |
| { |
| "epoch": 0.6530121258337181, |
| "grad_norm": 4.25, |
| "learning_rate": 7.155426951694816e-05, |
| "loss": 2.0254, |
| "step": 22800 |
| }, |
| { |
| "epoch": 0.6544441699693184, |
| "grad_norm": 0.03564453125, |
| "learning_rate": 7.125900555096257e-05, |
| "loss": 2.0871, |
| "step": 22850 |
| }, |
| { |
| "epoch": 0.6558762141049187, |
| "grad_norm": 0.390625, |
| "learning_rate": 7.096374158497697e-05, |
| "loss": 2.9276, |
| "step": 22900 |
| }, |
| { |
| "epoch": 0.657308258240519, |
| "grad_norm": 0.384765625, |
| "learning_rate": 7.066847761899138e-05, |
| "loss": 1.0622, |
| "step": 22950 |
| }, |
| { |
| "epoch": 0.6587403023761192, |
| "grad_norm": 4576.0, |
| "learning_rate": 7.037321365300578e-05, |
| "loss": 3.0808, |
| "step": 23000 |
| }, |
| { |
| "epoch": 0.6601723465117195, |
| "grad_norm": 372.0, |
| "learning_rate": 7.00779496870202e-05, |
| "loss": 1.8306, |
| "step": 23050 |
| }, |
| { |
| "epoch": 0.6616043906473198, |
| "grad_norm": 0.208984375, |
| "learning_rate": 6.978268572103462e-05, |
| "loss": 2.1282, |
| "step": 23100 |
| }, |
| { |
| "epoch": 0.66303643478292, |
| "grad_norm": 5.65625, |
| "learning_rate": 6.948742175504902e-05, |
| "loss": 1.8392, |
| "step": 23150 |
| }, |
| { |
| "epoch": 0.6644684789185202, |
| "grad_norm": 0.24609375, |
| "learning_rate": 6.919215778906343e-05, |
| "loss": 2.594, |
| "step": 23200 |
| }, |
| { |
| "epoch": 0.6659005230541205, |
| "grad_norm": 0.123046875, |
| "learning_rate": 6.889689382307783e-05, |
| "loss": 2.4234, |
| "step": 23250 |
| }, |
| { |
| "epoch": 0.6673325671897208, |
| "grad_norm": 268.0, |
| "learning_rate": 6.860162985709224e-05, |
| "loss": 2.3424, |
| "step": 23300 |
| }, |
| { |
| "epoch": 0.6687646113253211, |
| "grad_norm": 0.427734375, |
| "learning_rate": 6.830636589110664e-05, |
| "loss": 2.3216, |
| "step": 23350 |
| }, |
| { |
| "epoch": 0.6701966554609213, |
| "grad_norm": 0.9296875, |
| "learning_rate": 6.801110192512106e-05, |
| "loss": 2.4566, |
| "step": 23400 |
| }, |
| { |
| "epoch": 0.6716286995965216, |
| "grad_norm": 88.5, |
| "learning_rate": 6.771583795913548e-05, |
| "loss": 1.3767, |
| "step": 23450 |
| }, |
| { |
| "epoch": 0.6730607437321218, |
| "grad_norm": 488.0, |
| "learning_rate": 6.742057399314989e-05, |
| "loss": 2.343, |
| "step": 23500 |
| }, |
| { |
| "epoch": 0.674492787867722, |
| "grad_norm": 296.0, |
| "learning_rate": 6.712531002716429e-05, |
| "loss": 1.4841, |
| "step": 23550 |
| }, |
| { |
| "epoch": 0.6759248320033223, |
| "grad_norm": 218.0, |
| "learning_rate": 6.68300460611787e-05, |
| "loss": 2.4037, |
| "step": 23600 |
| }, |
| { |
| "epoch": 0.6773568761389226, |
| "grad_norm": 0.466796875, |
| "learning_rate": 6.65347820951931e-05, |
| "loss": 1.4982, |
| "step": 23650 |
| }, |
| { |
| "epoch": 0.6787889202745229, |
| "grad_norm": 49.25, |
| "learning_rate": 6.623951812920751e-05, |
| "loss": 2.2085, |
| "step": 23700 |
| }, |
| { |
| "epoch": 0.6802209644101231, |
| "grad_norm": 0.30078125, |
| "learning_rate": 6.594425416322191e-05, |
| "loss": 1.7055, |
| "step": 23750 |
| }, |
| { |
| "epoch": 0.6802209644101231, |
| "eval_accuracy": 0.9505, |
| "eval_loss": 0.26270824670791626, |
| "eval_macro_f1": 0.9498080478089564, |
| "eval_runtime": 172.6664, |
| "eval_samples_per_second": 11.583, |
| "eval_steps_per_second": 11.583, |
| "step": 23750 |
| }, |
| { |
| "epoch": 0.6816530085457234, |
| "grad_norm": 95.5, |
| "learning_rate": 6.564899019723633e-05, |
| "loss": 2.5024, |
| "step": 23800 |
| }, |
| { |
| "epoch": 0.6830850526813237, |
| "grad_norm": 0.30078125, |
| "learning_rate": 6.535372623125075e-05, |
| "loss": 2.2518, |
| "step": 23850 |
| }, |
| { |
| "epoch": 0.6845170968169239, |
| "grad_norm": 116.5, |
| "learning_rate": 6.505846226526516e-05, |
| "loss": 2.0539, |
| "step": 23900 |
| }, |
| { |
| "epoch": 0.6859491409525241, |
| "grad_norm": 264.0, |
| "learning_rate": 6.476319829927956e-05, |
| "loss": 2.5857, |
| "step": 23950 |
| }, |
| { |
| "epoch": 0.6873811850881244, |
| "grad_norm": 302.0, |
| "learning_rate": 6.446793433329397e-05, |
| "loss": 2.1408, |
| "step": 24000 |
| }, |
| { |
| "epoch": 0.6888132292237247, |
| "grad_norm": 0.5390625, |
| "learning_rate": 6.417267036730837e-05, |
| "loss": 1.9618, |
| "step": 24050 |
| }, |
| { |
| "epoch": 0.690245273359325, |
| "grad_norm": 164.0, |
| "learning_rate": 6.387740640132278e-05, |
| "loss": 2.0112, |
| "step": 24100 |
| }, |
| { |
| "epoch": 0.6916773174949252, |
| "grad_norm": 14.0625, |
| "learning_rate": 6.35821424353372e-05, |
| "loss": 2.7256, |
| "step": 24150 |
| }, |
| { |
| "epoch": 0.6931093616305255, |
| "grad_norm": 164.0, |
| "learning_rate": 6.328687846935161e-05, |
| "loss": 0.8362, |
| "step": 24200 |
| }, |
| { |
| "epoch": 0.6945414057661257, |
| "grad_norm": 0.271484375, |
| "learning_rate": 6.299161450336602e-05, |
| "loss": 2.2874, |
| "step": 24250 |
| }, |
| { |
| "epoch": 0.6959734499017259, |
| "grad_norm": 110.5, |
| "learning_rate": 6.269635053738042e-05, |
| "loss": 1.5674, |
| "step": 24300 |
| }, |
| { |
| "epoch": 0.6974054940373262, |
| "grad_norm": 0.1630859375, |
| "learning_rate": 6.240108657139483e-05, |
| "loss": 2.5817, |
| "step": 24350 |
| }, |
| { |
| "epoch": 0.6988375381729265, |
| "grad_norm": 0.99609375, |
| "learning_rate": 6.210582260540923e-05, |
| "loss": 1.2537, |
| "step": 24400 |
| }, |
| { |
| "epoch": 0.7002695823085268, |
| "grad_norm": 266.0, |
| "learning_rate": 6.181055863942364e-05, |
| "loss": 2.4499, |
| "step": 24450 |
| }, |
| { |
| "epoch": 0.701701626444127, |
| "grad_norm": 1.59375, |
| "learning_rate": 6.151529467343806e-05, |
| "loss": 2.8047, |
| "step": 24500 |
| }, |
| { |
| "epoch": 0.7031336705797273, |
| "grad_norm": 0.08447265625, |
| "learning_rate": 6.122003070745246e-05, |
| "loss": 1.6917, |
| "step": 24550 |
| }, |
| { |
| "epoch": 0.7045657147153275, |
| "grad_norm": 296.0, |
| "learning_rate": 6.0924766741466875e-05, |
| "loss": 2.2486, |
| "step": 24600 |
| }, |
| { |
| "epoch": 0.7059977588509277, |
| "grad_norm": 183.0, |
| "learning_rate": 6.062950277548128e-05, |
| "loss": 2.5183, |
| "step": 24650 |
| }, |
| { |
| "epoch": 0.707429802986528, |
| "grad_norm": 0.123046875, |
| "learning_rate": 6.033423880949569e-05, |
| "loss": 2.2984, |
| "step": 24700 |
| }, |
| { |
| "epoch": 0.7088618471221283, |
| "grad_norm": 0.37890625, |
| "learning_rate": 6.00389748435101e-05, |
| "loss": 2.4598, |
| "step": 24750 |
| }, |
| { |
| "epoch": 0.7102938912577286, |
| "grad_norm": 6.25, |
| "learning_rate": 5.97437108775245e-05, |
| "loss": 2.0554, |
| "step": 24800 |
| }, |
| { |
| "epoch": 0.7117259353933288, |
| "grad_norm": 1.5234375, |
| "learning_rate": 5.9448446911538915e-05, |
| "loss": 1.3688, |
| "step": 24850 |
| }, |
| { |
| "epoch": 0.7131579795289291, |
| "grad_norm": 83.5, |
| "learning_rate": 5.9153182945553334e-05, |
| "loss": 2.6434, |
| "step": 24900 |
| }, |
| { |
| "epoch": 0.7145900236645294, |
| "grad_norm": 0.8046875, |
| "learning_rate": 5.885791897956774e-05, |
| "loss": 1.1703, |
| "step": 24950 |
| }, |
| { |
| "epoch": 0.7160220678001296, |
| "grad_norm": 0.76953125, |
| "learning_rate": 5.8562655013582144e-05, |
| "loss": 1.7433, |
| "step": 25000 |
| }, |
| { |
| "epoch": 0.7160220678001296, |
| "eval_accuracy": 0.9475, |
| "eval_loss": 0.2805185317993164, |
| "eval_macro_f1": 0.9469725724830536, |
| "eval_runtime": 172.6365, |
| "eval_samples_per_second": 11.585, |
| "eval_steps_per_second": 11.585, |
| "step": 25000 |
| }, |
| { |
| "epoch": 0.7174541119357298, |
| "grad_norm": 268.0, |
| "learning_rate": 5.8267391047596556e-05, |
| "loss": 2.7963, |
| "step": 25050 |
| }, |
| { |
| "epoch": 0.7188861560713301, |
| "grad_norm": 294.0, |
| "learning_rate": 5.797212708161096e-05, |
| "loss": 2.3253, |
| "step": 25100 |
| }, |
| { |
| "epoch": 0.7203182002069304, |
| "grad_norm": 0.11181640625, |
| "learning_rate": 5.7676863115625366e-05, |
| "loss": 1.0165, |
| "step": 25150 |
| }, |
| { |
| "epoch": 0.7217502443425307, |
| "grad_norm": 756.0, |
| "learning_rate": 5.738159914963978e-05, |
| "loss": 1.4844, |
| "step": 25200 |
| }, |
| { |
| "epoch": 0.7231822884781309, |
| "grad_norm": 0.10107421875, |
| "learning_rate": 5.70863351836542e-05, |
| "loss": 2.7171, |
| "step": 25250 |
| }, |
| { |
| "epoch": 0.7246143326137312, |
| "grad_norm": 336.0, |
| "learning_rate": 5.67910712176686e-05, |
| "loss": 3.1605, |
| "step": 25300 |
| }, |
| { |
| "epoch": 0.7260463767493314, |
| "grad_norm": 177.0, |
| "learning_rate": 5.649580725168301e-05, |
| "loss": 1.9816, |
| "step": 25350 |
| }, |
| { |
| "epoch": 0.7274784208849316, |
| "grad_norm": 1.78125, |
| "learning_rate": 5.620054328569741e-05, |
| "loss": 1.8129, |
| "step": 25400 |
| }, |
| { |
| "epoch": 0.7289104650205319, |
| "grad_norm": 0.040771484375, |
| "learning_rate": 5.5905279319711824e-05, |
| "loss": 1.3484, |
| "step": 25450 |
| }, |
| { |
| "epoch": 0.7303425091561322, |
| "grad_norm": 8.375, |
| "learning_rate": 5.561001535372623e-05, |
| "loss": 2.1354, |
| "step": 25500 |
| }, |
| { |
| "epoch": 0.7317745532917325, |
| "grad_norm": 0.0308837890625, |
| "learning_rate": 5.5314751387740635e-05, |
| "loss": 1.747, |
| "step": 25550 |
| }, |
| { |
| "epoch": 0.7332065974273327, |
| "grad_norm": 808.0, |
| "learning_rate": 5.5019487421755053e-05, |
| "loss": 2.6803, |
| "step": 25600 |
| }, |
| { |
| "epoch": 0.734638641562933, |
| "grad_norm": 0.96484375, |
| "learning_rate": 5.4724223455769465e-05, |
| "loss": 2.2422, |
| "step": 25650 |
| }, |
| { |
| "epoch": 0.7360706856985332, |
| "grad_norm": 10.0, |
| "learning_rate": 5.442895948978387e-05, |
| "loss": 2.0731, |
| "step": 25700 |
| }, |
| { |
| "epoch": 0.7375027298341335, |
| "grad_norm": 0.27734375, |
| "learning_rate": 5.4133695523798276e-05, |
| "loss": 2.9622, |
| "step": 25750 |
| }, |
| { |
| "epoch": 0.7389347739697337, |
| "grad_norm": 200.0, |
| "learning_rate": 5.383843155781269e-05, |
| "loss": 2.179, |
| "step": 25800 |
| }, |
| { |
| "epoch": 0.740366818105334, |
| "grad_norm": 118.5, |
| "learning_rate": 5.354316759182709e-05, |
| "loss": 2.4152, |
| "step": 25850 |
| }, |
| { |
| "epoch": 0.7417988622409343, |
| "grad_norm": 0.2470703125, |
| "learning_rate": 5.32479036258415e-05, |
| "loss": 1.4274, |
| "step": 25900 |
| }, |
| { |
| "epoch": 0.7432309063765346, |
| "grad_norm": 6.71875, |
| "learning_rate": 5.295263965985592e-05, |
| "loss": 2.0263, |
| "step": 25950 |
| }, |
| { |
| "epoch": 0.7446629505121348, |
| "grad_norm": 0.28125, |
| "learning_rate": 5.265737569387033e-05, |
| "loss": 1.8231, |
| "step": 26000 |
| }, |
| { |
| "epoch": 0.746094994647735, |
| "grad_norm": 0.8046875, |
| "learning_rate": 5.2362111727884734e-05, |
| "loss": 1.7974, |
| "step": 26050 |
| }, |
| { |
| "epoch": 0.7475270387833353, |
| "grad_norm": 102.5, |
| "learning_rate": 5.206684776189914e-05, |
| "loss": 2.5667, |
| "step": 26100 |
| }, |
| { |
| "epoch": 0.7489590829189355, |
| "grad_norm": 380.0, |
| "learning_rate": 5.1771583795913544e-05, |
| "loss": 1.8334, |
| "step": 26150 |
| }, |
| { |
| "epoch": 0.7503911270545358, |
| "grad_norm": 0.74609375, |
| "learning_rate": 5.1476319829927956e-05, |
| "loss": 1.2929, |
| "step": 26200 |
| }, |
| { |
| "epoch": 0.7518231711901361, |
| "grad_norm": 7.3125, |
| "learning_rate": 5.118105586394236e-05, |
| "loss": 2.2943, |
| "step": 26250 |
| }, |
| { |
| "epoch": 0.7518231711901361, |
| "eval_accuracy": 0.951, |
| "eval_loss": 0.2633407413959503, |
| "eval_macro_f1": 0.9502699810655684, |
| "eval_runtime": 172.7789, |
| "eval_samples_per_second": 11.575, |
| "eval_steps_per_second": 11.575, |
| "step": 26250 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 34916, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1250, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.1179332952064e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|