| { |
| "best_global_step": 2661, |
| "best_metric": 0.9992947813822285, |
| "best_model_checkpoint": "models/classifier_model_diverse/checkpoints/checkpoint-2661", |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 2661, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.011273957158962795, |
| "grad_norm": 3.8056137561798096, |
| "learning_rate": 3.6000000000000003e-06, |
| "loss": 1.2736279487609863, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.02254791431792559, |
| "grad_norm": 2.672968864440918, |
| "learning_rate": 7.600000000000001e-06, |
| "loss": 1.2905834197998047, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.033821871476888386, |
| "grad_norm": 3.9050815105438232, |
| "learning_rate": 1.16e-05, |
| "loss": 1.1200922966003417, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.04509582863585118, |
| "grad_norm": 3.9839513301849365, |
| "learning_rate": 1.5600000000000003e-05, |
| "loss": 0.7939215183258057, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.05636978579481398, |
| "grad_norm": 6.401978492736816, |
| "learning_rate": 1.9600000000000002e-05, |
| "loss": 0.47928524017333984, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.06764374295377677, |
| "grad_norm": 3.8583123683929443, |
| "learning_rate": 1.9931060896208353e-05, |
| "loss": 0.28248274326324463, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.07891770011273957, |
| "grad_norm": 0.5728406310081482, |
| "learning_rate": 1.9854461891995405e-05, |
| "loss": 0.12984446287155152, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.09019165727170236, |
| "grad_norm": 0.7124487161636353, |
| "learning_rate": 1.977786288778246e-05, |
| "loss": 0.0438654363155365, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.10146561443066517, |
| "grad_norm": 0.25945696234703064, |
| "learning_rate": 1.9701263883569516e-05, |
| "loss": 0.03924116194248199, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.11273957158962795, |
| "grad_norm": 0.08549337089061737, |
| "learning_rate": 1.962466487935657e-05, |
| "loss": 0.05853708386421204, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.12401352874859076, |
| "grad_norm": 0.09827116131782532, |
| "learning_rate": 1.9548065875143624e-05, |
| "loss": 0.019747741520404816, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.13528748590755355, |
| "grad_norm": 0.2975553274154663, |
| "learning_rate": 1.947146687093068e-05, |
| "loss": 0.0481842428445816, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.14656144306651633, |
| "grad_norm": 0.356662392616272, |
| "learning_rate": 1.9394867866717735e-05, |
| "loss": 0.12549594640731812, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.15783540022547915, |
| "grad_norm": 0.6477656960487366, |
| "learning_rate": 1.931826886250479e-05, |
| "loss": 0.03554241955280304, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.16910935738444194, |
| "grad_norm": 0.03700140118598938, |
| "learning_rate": 1.9241669858291846e-05, |
| "loss": 0.003657575324177742, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.18038331454340473, |
| "grad_norm": 0.1101224422454834, |
| "learning_rate": 1.9165070854078898e-05, |
| "loss": 0.007288780063390732, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.19165727170236754, |
| "grad_norm": 0.02779577113687992, |
| "learning_rate": 1.9088471849865953e-05, |
| "loss": 0.01651781052350998, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.20293122886133033, |
| "grad_norm": 0.0255696102976799, |
| "learning_rate": 1.901187284565301e-05, |
| "loss": 0.002168308012187481, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.21420518602029312, |
| "grad_norm": 0.02410353161394596, |
| "learning_rate": 1.8935273841440064e-05, |
| "loss": 0.014644216001033782, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.2254791431792559, |
| "grad_norm": 0.0849456861615181, |
| "learning_rate": 1.8858674837227116e-05, |
| "loss": 0.09340885281562805, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.23675310033821872, |
| "grad_norm": 0.059883181005716324, |
| "learning_rate": 1.8782075833014172e-05, |
| "loss": 0.026225173473358156, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.2480270574971815, |
| "grad_norm": 0.024772750213742256, |
| "learning_rate": 1.8705476828801227e-05, |
| "loss": 0.02779638171195984, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.2593010146561443, |
| "grad_norm": 0.02442971244454384, |
| "learning_rate": 1.8628877824588283e-05, |
| "loss": 0.0028540484607219698, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.2705749718151071, |
| "grad_norm": 0.017573878169059753, |
| "learning_rate": 1.8552278820375335e-05, |
| "loss": 0.0022183064371347427, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.2818489289740699, |
| "grad_norm": 1.5109485387802124, |
| "learning_rate": 1.847567981616239e-05, |
| "loss": 0.021869483590126037, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.29312288613303267, |
| "grad_norm": 0.021146811544895172, |
| "learning_rate": 1.8399080811949446e-05, |
| "loss": 0.0013299805112183094, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.3043968432919955, |
| "grad_norm": 0.08148454874753952, |
| "learning_rate": 1.83224818077365e-05, |
| "loss": 0.07713921666145325, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.3156708004509583, |
| "grad_norm": 0.014423428103327751, |
| "learning_rate": 1.8245882803523557e-05, |
| "loss": 0.0011368718929588794, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.3269447576099211, |
| "grad_norm": 28.73524284362793, |
| "learning_rate": 1.8169283799310612e-05, |
| "loss": 0.03270837366580963, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.3382187147688839, |
| "grad_norm": 0.019931884482502937, |
| "learning_rate": 1.8092684795097664e-05, |
| "loss": 0.006438140571117401, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.34949267192784667, |
| "grad_norm": 0.2398950159549713, |
| "learning_rate": 1.801608579088472e-05, |
| "loss": 0.0019076481461524964, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.36076662908680945, |
| "grad_norm": 0.01809663511812687, |
| "learning_rate": 1.7939486786671775e-05, |
| "loss": 0.006735321879386902, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.37204058624577224, |
| "grad_norm": 0.01682177186012268, |
| "learning_rate": 1.7862887782458827e-05, |
| "loss": 0.0009955359622836114, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.3833145434047351, |
| "grad_norm": 0.012714880518615246, |
| "learning_rate": 1.7786288778245883e-05, |
| "loss": 0.0011549751274287702, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.3945885005636979, |
| "grad_norm": 0.011172255501151085, |
| "learning_rate": 1.7709689774032938e-05, |
| "loss": 0.04715018570423126, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.40586245772266066, |
| "grad_norm": 0.01148612704128027, |
| "learning_rate": 1.7633090769819994e-05, |
| "loss": 0.0011292855255305768, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.41713641488162345, |
| "grad_norm": 0.03625890612602234, |
| "learning_rate": 1.755649176560705e-05, |
| "loss": 0.0010867375880479813, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.42841037204058624, |
| "grad_norm": 0.008941585198044777, |
| "learning_rate": 1.7479892761394105e-05, |
| "loss": 0.0009030814282596112, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.43968432919954903, |
| "grad_norm": 0.022719021886587143, |
| "learning_rate": 1.740329375718116e-05, |
| "loss": 0.13739874362945556, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.4509582863585118, |
| "grad_norm": 0.3535501956939697, |
| "learning_rate": 1.7326694752968215e-05, |
| "loss": 0.06437238454818725, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.46223224351747466, |
| "grad_norm": 0.038008544594049454, |
| "learning_rate": 1.7250095748755268e-05, |
| "loss": 0.01665329039096832, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.47350620067643745, |
| "grad_norm": 0.013796325773000717, |
| "learning_rate": 1.7173496744542323e-05, |
| "loss": 0.0013358186930418014, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.48478015783540024, |
| "grad_norm": 0.029519330710172653, |
| "learning_rate": 1.7096897740329375e-05, |
| "loss": 0.027624106407165526, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.496054114994363, |
| "grad_norm": 0.06211524456739426, |
| "learning_rate": 1.702029873611643e-05, |
| "loss": 0.0008195113390684128, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.5073280721533259, |
| "grad_norm": 0.00987821165472269, |
| "learning_rate": 1.6943699731903486e-05, |
| "loss": 0.0007175941951572895, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.5186020293122886, |
| "grad_norm": 0.007800395600497723, |
| "learning_rate": 1.686710072769054e-05, |
| "loss": 0.0006243153009563684, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.5298759864712514, |
| "grad_norm": 0.007644494995474815, |
| "learning_rate": 1.6790501723477597e-05, |
| "loss": 0.0011569895781576634, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.5411499436302142, |
| "grad_norm": 0.013480519875884056, |
| "learning_rate": 1.6713902719264652e-05, |
| "loss": 0.036841681599617, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.552423900789177, |
| "grad_norm": 0.015233520418405533, |
| "learning_rate": 1.6637303715051705e-05, |
| "loss": 0.013653479516506195, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.5636978579481398, |
| "grad_norm": 0.049566902220249176, |
| "learning_rate": 1.656070471083876e-05, |
| "loss": 0.0016534214839339257, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.5749718151071026, |
| "grad_norm": 1.538453221321106, |
| "learning_rate": 1.6484105706625815e-05, |
| "loss": 0.002873715199530125, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.5862457722660653, |
| "grad_norm": 0.009479410015046597, |
| "learning_rate": 1.640750670241287e-05, |
| "loss": 0.000595852080732584, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.5975197294250282, |
| "grad_norm": 1.0129826068878174, |
| "learning_rate": 1.6330907698199926e-05, |
| "loss": 0.09629083275794983, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.608793686583991, |
| "grad_norm": 0.019592303782701492, |
| "learning_rate": 1.625430869398698e-05, |
| "loss": 0.06828267574310302, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.6200676437429538, |
| "grad_norm": 0.043163444846868515, |
| "learning_rate": 1.6177709689774034e-05, |
| "loss": 0.001705889031291008, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.6313416009019166, |
| "grad_norm": 0.11190011352300644, |
| "learning_rate": 1.610111068556109e-05, |
| "loss": 0.0010928096249699592, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.6426155580608793, |
| "grad_norm": 0.11181443929672241, |
| "learning_rate": 1.602451168134814e-05, |
| "loss": 0.0012638685293495655, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.6538895152198422, |
| "grad_norm": 0.007554885931313038, |
| "learning_rate": 1.5947912677135197e-05, |
| "loss": 0.001112994272261858, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.6651634723788049, |
| "grad_norm": 0.009411131031811237, |
| "learning_rate": 1.5871313672922252e-05, |
| "loss": 0.03673492074012756, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.6764374295377678, |
| "grad_norm": 0.009411299601197243, |
| "learning_rate": 1.5794714668709308e-05, |
| "loss": 0.0006293295882642269, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.6877113866967306, |
| "grad_norm": 0.007238287944346666, |
| "learning_rate": 1.5718115664496363e-05, |
| "loss": 0.0008005211129784584, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.6989853438556933, |
| "grad_norm": 0.024015795439481735, |
| "learning_rate": 1.564151666028342e-05, |
| "loss": 0.06360973715782166, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.7102593010146562, |
| "grad_norm": 0.022069167345762253, |
| "learning_rate": 1.5564917656070474e-05, |
| "loss": 0.09372783899307251, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.7215332581736189, |
| "grad_norm": 0.017568539828062057, |
| "learning_rate": 1.5488318651857526e-05, |
| "loss": 0.0960399329662323, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.7328072153325818, |
| "grad_norm": 0.023654062300920486, |
| "learning_rate": 1.5411719647644582e-05, |
| "loss": 0.001814677007496357, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.7440811724915445, |
| "grad_norm": 3.3847196102142334, |
| "learning_rate": 1.5335120643431637e-05, |
| "loss": 0.012697191536426544, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.7553551296505073, |
| "grad_norm": 0.03137872368097305, |
| "learning_rate": 1.5258521639218691e-05, |
| "loss": 0.017509229481220245, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.7666290868094702, |
| "grad_norm": 0.008311662822961807, |
| "learning_rate": 1.5181922635005747e-05, |
| "loss": 0.003624839335680008, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.7779030439684329, |
| "grad_norm": 1.8481999635696411, |
| "learning_rate": 1.51053236307928e-05, |
| "loss": 0.0024514470249414446, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.7891770011273957, |
| "grad_norm": 0.869351863861084, |
| "learning_rate": 1.5028724626579856e-05, |
| "loss": 0.015725354850292205, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.8004509582863585, |
| "grad_norm": 11.315881729125977, |
| "learning_rate": 1.4952125622366911e-05, |
| "loss": 0.03875987529754639, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.8117249154453213, |
| "grad_norm": 0.02918623574078083, |
| "learning_rate": 1.4875526618153965e-05, |
| "loss": 0.001158976461738348, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.8229988726042841, |
| "grad_norm": 0.007244312670081854, |
| "learning_rate": 1.479892761394102e-05, |
| "loss": 0.0007107657380402088, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.8342728297632469, |
| "grad_norm": 0.0057315886951982975, |
| "learning_rate": 1.4722328609728074e-05, |
| "loss": 0.02150535732507706, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.8455467869222097, |
| "grad_norm": 0.00698477029800415, |
| "learning_rate": 1.4645729605515128e-05, |
| "loss": 0.000717653427273035, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.8568207440811725, |
| "grad_norm": 0.006467051338404417, |
| "learning_rate": 1.4569130601302184e-05, |
| "loss": 0.0435500293970108, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.8680947012401353, |
| "grad_norm": 0.005048634018748999, |
| "learning_rate": 1.4492531597089239e-05, |
| "loss": 0.0007005668710917234, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.8793686583990981, |
| "grad_norm": 0.005153645761311054, |
| "learning_rate": 1.4415932592876295e-05, |
| "loss": 0.0018698316067457198, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.8906426155580609, |
| "grad_norm": 36.11130142211914, |
| "learning_rate": 1.4339333588663348e-05, |
| "loss": 0.08988013863563538, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.9019165727170236, |
| "grad_norm": 0.004656560719013214, |
| "learning_rate": 1.4262734584450404e-05, |
| "loss": 0.04341347217559814, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.9131905298759865, |
| "grad_norm": 4.095223426818848, |
| "learning_rate": 1.418613558023746e-05, |
| "loss": 0.010267064720392228, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.9244644870349493, |
| "grad_norm": 0.0213918499648571, |
| "learning_rate": 1.4109536576024511e-05, |
| "loss": 0.03770207762718201, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.9357384441939121, |
| "grad_norm": 0.06250061094760895, |
| "learning_rate": 1.4032937571811567e-05, |
| "loss": 0.0006878524087369442, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.9470124013528749, |
| "grad_norm": 0.005574820097535849, |
| "learning_rate": 1.3956338567598622e-05, |
| "loss": 0.0005925031378865242, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.9582863585118376, |
| "grad_norm": 0.008809318765997887, |
| "learning_rate": 1.3879739563385676e-05, |
| "loss": 0.0017010247334837913, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.9695603156708005, |
| "grad_norm": 0.03311969339847565, |
| "learning_rate": 1.3803140559172731e-05, |
| "loss": 0.0896953821182251, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.9808342728297632, |
| "grad_norm": 0.012214568443596363, |
| "learning_rate": 1.3726541554959787e-05, |
| "loss": 0.0007818067446351051, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.992108229988726, |
| "grad_norm": 0.011341788806021214, |
| "learning_rate": 1.3649942550746842e-05, |
| "loss": 0.0006305632647126913, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_avg_accuracy": 0.9990126939351198, |
| "eval_explanation_accuracy": 0.9988716502115655, |
| "eval_explanation_f1": 0.9977851605758582, |
| "eval_loss": 0.011286958120763302, |
| "eval_refusal_accuracy": 0.9991537376586742, |
| "eval_refusal_f1": 0.9991546914623838, |
| "eval_runtime": 7.3755, |
| "eval_samples_per_second": 480.647, |
| "eval_steps_per_second": 30.1, |
| "step": 887 |
| }, |
| { |
| "epoch": 1.003382187147689, |
| "grad_norm": 0.015435208566486835, |
| "learning_rate": 1.3573343546533896e-05, |
| "loss": 0.0006994560360908508, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.0146561443066517, |
| "grad_norm": 0.007135583553463221, |
| "learning_rate": 1.3496744542320952e-05, |
| "loss": 0.01103305146098137, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.0259301014656144, |
| "grad_norm": 0.006527516525238752, |
| "learning_rate": 1.3420145538108005e-05, |
| "loss": 0.010759656876325607, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.0372040586245772, |
| "grad_norm": 0.012697388418018818, |
| "learning_rate": 1.334354653389506e-05, |
| "loss": 0.013554251194000244, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.04847801578354, |
| "grad_norm": 0.01060900092124939, |
| "learning_rate": 1.3266947529682115e-05, |
| "loss": 0.0013550018891692161, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.059751972942503, |
| "grad_norm": 0.006642420310527086, |
| "learning_rate": 1.319034852546917e-05, |
| "loss": 0.0008933816105127334, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.0710259301014655, |
| "grad_norm": 0.004995852708816528, |
| "learning_rate": 1.3113749521256226e-05, |
| "loss": 0.0015664011240005492, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.0822998872604284, |
| "grad_norm": 0.0033098841086030006, |
| "learning_rate": 1.303715051704328e-05, |
| "loss": 0.06807108521461487, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.0935738444193912, |
| "grad_norm": 0.004117046482861042, |
| "learning_rate": 1.2960551512830335e-05, |
| "loss": 0.0003929378930479288, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.104847801578354, |
| "grad_norm": 0.004257792141288519, |
| "learning_rate": 1.288395250861739e-05, |
| "loss": 0.00031005307100713254, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.116121758737317, |
| "grad_norm": 0.004429277963936329, |
| "learning_rate": 1.2807353504404442e-05, |
| "loss": 0.0003260139375925064, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.1273957158962795, |
| "grad_norm": 0.004686293192207813, |
| "learning_rate": 1.2730754500191498e-05, |
| "loss": 0.0004351689480245113, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.1386696730552424, |
| "grad_norm": 3.382127523422241, |
| "learning_rate": 1.2654155495978553e-05, |
| "loss": 0.05924311876296997, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.1499436302142052, |
| "grad_norm": 0.039729081094264984, |
| "learning_rate": 1.2577556491765607e-05, |
| "loss": 0.00036267689429223535, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.161217587373168, |
| "grad_norm": 0.0045091030187904835, |
| "learning_rate": 1.2500957487552663e-05, |
| "loss": 0.005849485099315643, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.172491544532131, |
| "grad_norm": 0.005293088499456644, |
| "learning_rate": 1.2424358483339718e-05, |
| "loss": 0.02991122901439667, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.1837655016910935, |
| "grad_norm": 0.004526405595242977, |
| "learning_rate": 1.2347759479126774e-05, |
| "loss": 0.000873743649572134, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.1950394588500564, |
| "grad_norm": 0.006649188231676817, |
| "learning_rate": 1.2271160474913827e-05, |
| "loss": 0.00711873322725296, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.2063134160090192, |
| "grad_norm": 0.008573385886847973, |
| "learning_rate": 1.2194561470700881e-05, |
| "loss": 0.0004316195845603943, |
| "step": 1070 |
| }, |
| { |
| "epoch": 1.217587373167982, |
| "grad_norm": 0.004641649313271046, |
| "learning_rate": 1.2117962466487937e-05, |
| "loss": 0.03292413949966431, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.2288613303269447, |
| "grad_norm": 0.006113206036388874, |
| "learning_rate": 1.204136346227499e-05, |
| "loss": 0.005398290976881981, |
| "step": 1090 |
| }, |
| { |
| "epoch": 1.2401352874859075, |
| "grad_norm": 0.0036639939062297344, |
| "learning_rate": 1.1964764458062046e-05, |
| "loss": 0.001103377342224121, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.2514092446448704, |
| "grad_norm": 0.003979232627898455, |
| "learning_rate": 1.1888165453849101e-05, |
| "loss": 0.00032636863179504874, |
| "step": 1110 |
| }, |
| { |
| "epoch": 1.2626832018038332, |
| "grad_norm": 0.005784187000244856, |
| "learning_rate": 1.1811566449636157e-05, |
| "loss": 0.0003367730416357517, |
| "step": 1120 |
| }, |
| { |
| "epoch": 1.273957158962796, |
| "grad_norm": 0.0034572940785437822, |
| "learning_rate": 1.173496744542321e-05, |
| "loss": 0.00031754772644490006, |
| "step": 1130 |
| }, |
| { |
| "epoch": 1.2852311161217587, |
| "grad_norm": 0.0034566449467092752, |
| "learning_rate": 1.1658368441210266e-05, |
| "loss": 0.00032663308084011076, |
| "step": 1140 |
| }, |
| { |
| "epoch": 1.2965050732807215, |
| "grad_norm": 0.0035605451557785273, |
| "learning_rate": 1.1581769436997321e-05, |
| "loss": 0.033251908421516416, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.3077790304396844, |
| "grad_norm": 0.0048577445559203625, |
| "learning_rate": 1.1505170432784374e-05, |
| "loss": 0.006116595864295959, |
| "step": 1160 |
| }, |
| { |
| "epoch": 1.3190529875986472, |
| "grad_norm": 1.5070332288742065, |
| "learning_rate": 1.1428571428571429e-05, |
| "loss": 0.0013676982372999192, |
| "step": 1170 |
| }, |
| { |
| "epoch": 1.3303269447576098, |
| "grad_norm": 0.2583716809749603, |
| "learning_rate": 1.1351972424358484e-05, |
| "loss": 0.0008500318974256516, |
| "step": 1180 |
| }, |
| { |
| "epoch": 1.3416009019165727, |
| "grad_norm": 0.005735157057642937, |
| "learning_rate": 1.1275373420145538e-05, |
| "loss": 0.010501163452863694, |
| "step": 1190 |
| }, |
| { |
| "epoch": 1.3528748590755355, |
| "grad_norm": 0.00698908930644393, |
| "learning_rate": 1.1198774415932594e-05, |
| "loss": 0.00038872202858328817, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.3641488162344984, |
| "grad_norm": 0.005838852841407061, |
| "learning_rate": 1.112217541171965e-05, |
| "loss": 0.0002969707129523158, |
| "step": 1210 |
| }, |
| { |
| "epoch": 1.3754227733934612, |
| "grad_norm": 0.004003811627626419, |
| "learning_rate": 1.1045576407506705e-05, |
| "loss": 0.022138766944408417, |
| "step": 1220 |
| }, |
| { |
| "epoch": 1.386696730552424, |
| "grad_norm": 0.004359162412583828, |
| "learning_rate": 1.0968977403293758e-05, |
| "loss": 0.0002911401214078069, |
| "step": 1230 |
| }, |
| { |
| "epoch": 1.3979706877113867, |
| "grad_norm": 0.006011870689690113, |
| "learning_rate": 1.0892378399080812e-05, |
| "loss": 0.000280232192017138, |
| "step": 1240 |
| }, |
| { |
| "epoch": 1.4092446448703495, |
| "grad_norm": 0.006036574020981789, |
| "learning_rate": 1.0815779394867868e-05, |
| "loss": 0.00034907390363514423, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.4205186020293123, |
| "grad_norm": 0.004344291985034943, |
| "learning_rate": 1.0739180390654921e-05, |
| "loss": 0.00030681025236845016, |
| "step": 1260 |
| }, |
| { |
| "epoch": 1.431792559188275, |
| "grad_norm": 0.03644406422972679, |
| "learning_rate": 1.0662581386441977e-05, |
| "loss": 0.0342051237821579, |
| "step": 1270 |
| }, |
| { |
| "epoch": 1.4430665163472378, |
| "grad_norm": 0.03530158847570419, |
| "learning_rate": 1.0585982382229032e-05, |
| "loss": 0.0008967567235231399, |
| "step": 1280 |
| }, |
| { |
| "epoch": 1.4543404735062007, |
| "grad_norm": 0.003094166051596403, |
| "learning_rate": 1.0509383378016086e-05, |
| "loss": 0.00035515332128852604, |
| "step": 1290 |
| }, |
| { |
| "epoch": 1.4656144306651635, |
| "grad_norm": 0.0031914988067001104, |
| "learning_rate": 1.0432784373803142e-05, |
| "loss": 0.10675209760665894, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.4768883878241263, |
| "grad_norm": 0.005797514691948891, |
| "learning_rate": 1.0356185369590197e-05, |
| "loss": 0.03572855889797211, |
| "step": 1310 |
| }, |
| { |
| "epoch": 1.4881623449830892, |
| "grad_norm": 0.007767012342810631, |
| "learning_rate": 1.027958636537725e-05, |
| "loss": 0.0005267852451652288, |
| "step": 1320 |
| }, |
| { |
| "epoch": 1.4994363021420518, |
| "grad_norm": 0.006930226925760508, |
| "learning_rate": 1.0202987361164305e-05, |
| "loss": 0.0003944524563848972, |
| "step": 1330 |
| }, |
| { |
| "epoch": 1.5107102593010147, |
| "grad_norm": 0.02050807699561119, |
| "learning_rate": 1.012638835695136e-05, |
| "loss": 0.0003367907367646694, |
| "step": 1340 |
| }, |
| { |
| "epoch": 1.5219842164599775, |
| "grad_norm": 0.004242436494678259, |
| "learning_rate": 1.0049789352738416e-05, |
| "loss": 0.0003088915720582008, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.5332581736189401, |
| "grad_norm": 0.014584529213607311, |
| "learning_rate": 9.97319034852547e-06, |
| "loss": 0.00032701201271265743, |
| "step": 1360 |
| }, |
| { |
| "epoch": 1.544532130777903, |
| "grad_norm": 0.006828560493886471, |
| "learning_rate": 9.896591344312525e-06, |
| "loss": 0.00032078479416668414, |
| "step": 1370 |
| }, |
| { |
| "epoch": 1.5558060879368658, |
| "grad_norm": 0.007192549761384726, |
| "learning_rate": 9.81999234009958e-06, |
| "loss": 0.00030694154556840656, |
| "step": 1380 |
| }, |
| { |
| "epoch": 1.5670800450958287, |
| "grad_norm": 0.004133477341383696, |
| "learning_rate": 9.743393335886634e-06, |
| "loss": 0.0002738636452704668, |
| "step": 1390 |
| }, |
| { |
| "epoch": 1.5783540022547915, |
| "grad_norm": 0.004576428793370724, |
| "learning_rate": 9.66679433167369e-06, |
| "loss": 0.0003862300887703896, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.5896279594137543, |
| "grad_norm": 0.004046239424496889, |
| "learning_rate": 9.590195327460743e-06, |
| "loss": 0.00035432146396487953, |
| "step": 1410 |
| }, |
| { |
| "epoch": 1.6009019165727172, |
| "grad_norm": 0.0037296744994819164, |
| "learning_rate": 9.513596323247799e-06, |
| "loss": 0.0002848912263289094, |
| "step": 1420 |
| }, |
| { |
| "epoch": 1.6121758737316798, |
| "grad_norm": 0.015460504218935966, |
| "learning_rate": 9.436997319034853e-06, |
| "loss": 0.01671164333820343, |
| "step": 1430 |
| }, |
| { |
| "epoch": 1.6234498308906427, |
| "grad_norm": 0.003927943762391806, |
| "learning_rate": 9.360398314821908e-06, |
| "loss": 0.00033472839277237654, |
| "step": 1440 |
| }, |
| { |
| "epoch": 1.6347237880496053, |
| "grad_norm": 0.003112305421382189, |
| "learning_rate": 9.283799310608964e-06, |
| "loss": 0.0003046044381335378, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.6459977452085681, |
| "grad_norm": 0.043022263795137405, |
| "learning_rate": 9.207200306396017e-06, |
| "loss": 0.0005796106066554785, |
| "step": 1460 |
| }, |
| { |
| "epoch": 1.657271702367531, |
| "grad_norm": 0.05339162051677704, |
| "learning_rate": 9.130601302183073e-06, |
| "loss": 0.00047549596056342127, |
| "step": 1470 |
| }, |
| { |
| "epoch": 1.6685456595264938, |
| "grad_norm": 0.00355844059959054, |
| "learning_rate": 9.054002297970127e-06, |
| "loss": 0.0002459665760397911, |
| "step": 1480 |
| }, |
| { |
| "epoch": 1.6798196166854567, |
| "grad_norm": 0.0048475307412445545, |
| "learning_rate": 8.977403293757182e-06, |
| "loss": 0.002888294868171215, |
| "step": 1490 |
| }, |
| { |
| "epoch": 1.6910935738444195, |
| "grad_norm": 0.012677814811468124, |
| "learning_rate": 8.900804289544237e-06, |
| "loss": 0.00027513087261468174, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.7023675310033823, |
| "grad_norm": 0.003317478811368346, |
| "learning_rate": 8.824205285331291e-06, |
| "loss": 0.0003019048599526286, |
| "step": 1510 |
| }, |
| { |
| "epoch": 1.713641488162345, |
| "grad_norm": 0.003984434064477682, |
| "learning_rate": 8.747606281118347e-06, |
| "loss": 0.048750275373458864, |
| "step": 1520 |
| }, |
| { |
| "epoch": 1.7249154453213078, |
| "grad_norm": 0.010433024726808071, |
| "learning_rate": 8.6710072769054e-06, |
| "loss": 0.00035001789219677446, |
| "step": 1530 |
| }, |
| { |
| "epoch": 1.7361894024802704, |
| "grad_norm": 0.009012402966618538, |
| "learning_rate": 8.594408272692456e-06, |
| "loss": 0.003188546746969223, |
| "step": 1540 |
| }, |
| { |
| "epoch": 1.7474633596392333, |
| "grad_norm": 0.003260941244661808, |
| "learning_rate": 8.51780926847951e-06, |
| "loss": 0.00041163493879139426, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.7587373167981961, |
| "grad_norm": 0.003769806120544672, |
| "learning_rate": 8.441210264266565e-06, |
| "loss": 0.0005010565742850304, |
| "step": 1560 |
| }, |
| { |
| "epoch": 1.770011273957159, |
| "grad_norm": 0.002419503405690193, |
| "learning_rate": 8.36461126005362e-06, |
| "loss": 0.00027262703515589236, |
| "step": 1570 |
| }, |
| { |
| "epoch": 1.7812852311161218, |
| "grad_norm": 0.5945590734481812, |
| "learning_rate": 8.288012255840674e-06, |
| "loss": 0.0005243656225502491, |
| "step": 1580 |
| }, |
| { |
| "epoch": 1.7925591882750846, |
| "grad_norm": 0.0026839941274374723, |
| "learning_rate": 8.21141325162773e-06, |
| "loss": 0.00037036684807389975, |
| "step": 1590 |
| }, |
| { |
| "epoch": 1.8038331454340475, |
| "grad_norm": 0.003954754676669836, |
| "learning_rate": 8.134814247414784e-06, |
| "loss": 0.0003723638830706477, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.8151071025930101, |
| "grad_norm": 0.0031616361811757088, |
| "learning_rate": 8.05821524320184e-06, |
| "loss": 0.019034263491630555, |
| "step": 1610 |
| }, |
| { |
| "epoch": 1.826381059751973, |
| "grad_norm": 0.002904184628278017, |
| "learning_rate": 7.981616238988895e-06, |
| "loss": 0.00026692179962992666, |
| "step": 1620 |
| }, |
| { |
| "epoch": 1.8376550169109356, |
| "grad_norm": 32.38505172729492, |
| "learning_rate": 7.905017234775948e-06, |
| "loss": 0.0391994833946228, |
| "step": 1630 |
| }, |
| { |
| "epoch": 1.8489289740698984, |
| "grad_norm": 0.0033882863353937864, |
| "learning_rate": 7.828418230563002e-06, |
| "loss": 0.0020088400691747666, |
| "step": 1640 |
| }, |
| { |
| "epoch": 1.8602029312288613, |
| "grad_norm": 0.002738871844485402, |
| "learning_rate": 7.751819226350058e-06, |
| "loss": 0.0017810318619012832, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.8714768883878241, |
| "grad_norm": 5.877318382263184, |
| "learning_rate": 7.675220222137113e-06, |
| "loss": 0.0020224183797836305, |
| "step": 1660 |
| }, |
| { |
| "epoch": 1.882750845546787, |
| "grad_norm": 0.009523645974695683, |
| "learning_rate": 7.598621217924168e-06, |
| "loss": 0.0007010831497609616, |
| "step": 1670 |
| }, |
| { |
| "epoch": 1.8940248027057498, |
| "grad_norm": 0.0037067264784127474, |
| "learning_rate": 7.522022213711222e-06, |
| "loss": 0.002430897019803524, |
| "step": 1680 |
| }, |
| { |
| "epoch": 1.9052987598647126, |
| "grad_norm": 0.0031290531624108553, |
| "learning_rate": 7.445423209498277e-06, |
| "loss": 0.012180248647928238, |
| "step": 1690 |
| }, |
| { |
| "epoch": 1.9165727170236753, |
| "grad_norm": 0.0031704511493444443, |
| "learning_rate": 7.368824205285332e-06, |
| "loss": 0.00022213710471987724, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.927846674182638, |
| "grad_norm": 0.005679958499968052, |
| "learning_rate": 7.292225201072387e-06, |
| "loss": 0.0002556309336796403, |
| "step": 1710 |
| }, |
| { |
| "epoch": 1.939120631341601, |
| "grad_norm": 0.0029895652551203966, |
| "learning_rate": 7.215626196859441e-06, |
| "loss": 0.00023862749803811313, |
| "step": 1720 |
| }, |
| { |
| "epoch": 1.9503945885005636, |
| "grad_norm": 0.003230735659599304, |
| "learning_rate": 7.139027192646496e-06, |
| "loss": 0.00020957824308425187, |
| "step": 1730 |
| }, |
| { |
| "epoch": 1.9616685456595264, |
| "grad_norm": 0.0025276802480220795, |
| "learning_rate": 7.062428188433551e-06, |
| "loss": 0.0014538018964231013, |
| "step": 1740 |
| }, |
| { |
| "epoch": 1.9729425028184893, |
| "grad_norm": 0.002372529823333025, |
| "learning_rate": 6.9858291842206064e-06, |
| "loss": 0.0001988589996472001, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.984216459977452, |
| "grad_norm": 0.0026245692279189825, |
| "learning_rate": 6.90923018000766e-06, |
| "loss": 0.00025464268401265144, |
| "step": 1760 |
| }, |
| { |
| "epoch": 1.995490417136415, |
| "grad_norm": 0.002548970514908433, |
| "learning_rate": 6.832631175794715e-06, |
| "loss": 0.0005531720817089081, |
| "step": 1770 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_avg_accuracy": 0.9988716502115655, |
| "eval_explanation_accuracy": 0.998589562764457, |
| "eval_explanation_f1": 0.997229916897507, |
| "eval_loss": 0.010701753199100494, |
| "eval_refusal_accuracy": 0.9991537376586742, |
| "eval_refusal_f1": 0.9991546914623838, |
| "eval_runtime": 13.2492, |
| "eval_samples_per_second": 267.563, |
| "eval_steps_per_second": 16.756, |
| "step": 1774 |
| }, |
| { |
| "epoch": 2.006764374295378, |
| "grad_norm": 0.0026448487769812346, |
| "learning_rate": 6.75603217158177e-06, |
| "loss": 0.001086287945508957, |
| "step": 1780 |
| }, |
| { |
| "epoch": 2.0180383314543406, |
| "grad_norm": 0.05540625751018524, |
| "learning_rate": 6.679433167368825e-06, |
| "loss": 0.0002975093899294734, |
| "step": 1790 |
| }, |
| { |
| "epoch": 2.0293122886133035, |
| "grad_norm": 0.0036341070663183928, |
| "learning_rate": 6.602834163155879e-06, |
| "loss": 0.00019313200609758497, |
| "step": 1800 |
| }, |
| { |
| "epoch": 2.040586245772266, |
| "grad_norm": 0.0022988717537373304, |
| "learning_rate": 6.526235158942934e-06, |
| "loss": 0.00020039482042193412, |
| "step": 1810 |
| }, |
| { |
| "epoch": 2.0518602029312287, |
| "grad_norm": 0.003095146268606186, |
| "learning_rate": 6.449636154729989e-06, |
| "loss": 0.004662838205695153, |
| "step": 1820 |
| }, |
| { |
| "epoch": 2.0631341600901916, |
| "grad_norm": 0.005171661730855703, |
| "learning_rate": 6.373037150517044e-06, |
| "loss": 0.00023003274109214544, |
| "step": 1830 |
| }, |
| { |
| "epoch": 2.0744081172491544, |
| "grad_norm": 0.0035116367507725954, |
| "learning_rate": 6.296438146304099e-06, |
| "loss": 0.0002057728124782443, |
| "step": 1840 |
| }, |
| { |
| "epoch": 2.0856820744081173, |
| "grad_norm": 0.0033011254854500294, |
| "learning_rate": 6.219839142091153e-06, |
| "loss": 0.00030097963754087685, |
| "step": 1850 |
| }, |
| { |
| "epoch": 2.09695603156708, |
| "grad_norm": 0.002900548744946718, |
| "learning_rate": 6.143240137878208e-06, |
| "loss": 0.00018712843302637339, |
| "step": 1860 |
| }, |
| { |
| "epoch": 2.108229988726043, |
| "grad_norm": 0.006290908437222242, |
| "learning_rate": 6.066641133665263e-06, |
| "loss": 0.000566501123830676, |
| "step": 1870 |
| }, |
| { |
| "epoch": 2.119503945885006, |
| "grad_norm": 0.0031396099366247654, |
| "learning_rate": 5.990042129452318e-06, |
| "loss": 0.00022002970799803734, |
| "step": 1880 |
| }, |
| { |
| "epoch": 2.1307779030439686, |
| "grad_norm": 0.002400788012892008, |
| "learning_rate": 5.913443125239372e-06, |
| "loss": 0.00019659055396914482, |
| "step": 1890 |
| }, |
| { |
| "epoch": 2.142051860202931, |
| "grad_norm": 0.0021314811892807484, |
| "learning_rate": 5.8368441210264275e-06, |
| "loss": 0.00017432052409276365, |
| "step": 1900 |
| }, |
| { |
| "epoch": 2.153325817361894, |
| "grad_norm": 0.002507928293198347, |
| "learning_rate": 5.760245116813482e-06, |
| "loss": 0.00019732387736439704, |
| "step": 1910 |
| }, |
| { |
| "epoch": 2.1645997745208567, |
| "grad_norm": 0.0028056411538273096, |
| "learning_rate": 5.683646112600537e-06, |
| "loss": 0.00019395540002733468, |
| "step": 1920 |
| }, |
| { |
| "epoch": 2.1758737316798196, |
| "grad_norm": 0.0024449010379612446, |
| "learning_rate": 5.607047108387591e-06, |
| "loss": 0.0001847828272730112, |
| "step": 1930 |
| }, |
| { |
| "epoch": 2.1871476888387824, |
| "grad_norm": 0.0024868773762136698, |
| "learning_rate": 5.530448104174646e-06, |
| "loss": 0.00018266570987179875, |
| "step": 1940 |
| }, |
| { |
| "epoch": 2.1984216459977453, |
| "grad_norm": 0.002107750391587615, |
| "learning_rate": 5.453849099961701e-06, |
| "loss": 0.025128710269927978, |
| "step": 1950 |
| }, |
| { |
| "epoch": 2.209695603156708, |
| "grad_norm": 0.0024662779178470373, |
| "learning_rate": 5.377250095748756e-06, |
| "loss": 0.00017324247164651752, |
| "step": 1960 |
| }, |
| { |
| "epoch": 2.220969560315671, |
| "grad_norm": 0.002248900244012475, |
| "learning_rate": 5.30065109153581e-06, |
| "loss": 0.0001846975414082408, |
| "step": 1970 |
| }, |
| { |
| "epoch": 2.232243517474634, |
| "grad_norm": 0.0025225630961358547, |
| "learning_rate": 5.224052087322865e-06, |
| "loss": 0.004743445664644241, |
| "step": 1980 |
| }, |
| { |
| "epoch": 2.243517474633596, |
| "grad_norm": 0.002253986429423094, |
| "learning_rate": 5.14745308310992e-06, |
| "loss": 0.0001700402470305562, |
| "step": 1990 |
| }, |
| { |
| "epoch": 2.254791431792559, |
| "grad_norm": 0.003215694334357977, |
| "learning_rate": 5.070854078896975e-06, |
| "loss": 0.0005138281732797623, |
| "step": 2000 |
| }, |
| { |
| "epoch": 2.266065388951522, |
| "grad_norm": 0.00215666345320642, |
| "learning_rate": 4.99425507468403e-06, |
| "loss": 0.001671653985977173, |
| "step": 2010 |
| }, |
| { |
| "epoch": 2.2773393461104847, |
| "grad_norm": 0.0021826960146427155, |
| "learning_rate": 4.917656070471084e-06, |
| "loss": 0.00017449007136747242, |
| "step": 2020 |
| }, |
| { |
| "epoch": 2.2886133032694476, |
| "grad_norm": 0.0023455084301531315, |
| "learning_rate": 4.841057066258139e-06, |
| "loss": 0.03777830898761749, |
| "step": 2030 |
| }, |
| { |
| "epoch": 2.2998872604284104, |
| "grad_norm": 0.002160080010071397, |
| "learning_rate": 4.764458062045194e-06, |
| "loss": 0.00018261241493746638, |
| "step": 2040 |
| }, |
| { |
| "epoch": 2.3111612175873733, |
| "grad_norm": 0.004147669766098261, |
| "learning_rate": 4.6878590578322485e-06, |
| "loss": 0.00036145471967756747, |
| "step": 2050 |
| }, |
| { |
| "epoch": 2.322435174746336, |
| "grad_norm": 0.0024037472903728485, |
| "learning_rate": 4.611260053619303e-06, |
| "loss": 0.00017412586603313685, |
| "step": 2060 |
| }, |
| { |
| "epoch": 2.333709131905299, |
| "grad_norm": 0.0037479486782103777, |
| "learning_rate": 4.534661049406358e-06, |
| "loss": 0.0001948391436599195, |
| "step": 2070 |
| }, |
| { |
| "epoch": 2.344983089064262, |
| "grad_norm": 0.003978535998612642, |
| "learning_rate": 4.458062045193413e-06, |
| "loss": 0.011761841922998428, |
| "step": 2080 |
| }, |
| { |
| "epoch": 2.356257046223224, |
| "grad_norm": 0.0021312201861292124, |
| "learning_rate": 4.381463040980468e-06, |
| "loss": 0.00018223656807094812, |
| "step": 2090 |
| }, |
| { |
| "epoch": 2.367531003382187, |
| "grad_norm": 0.7311570048332214, |
| "learning_rate": 4.3048640367675224e-06, |
| "loss": 0.0005515201948583127, |
| "step": 2100 |
| }, |
| { |
| "epoch": 2.37880496054115, |
| "grad_norm": 0.0028338211122900248, |
| "learning_rate": 4.228265032554577e-06, |
| "loss": 0.00019430217798799276, |
| "step": 2110 |
| }, |
| { |
| "epoch": 2.3900789177001127, |
| "grad_norm": 5.098042011260986, |
| "learning_rate": 4.1516660283416325e-06, |
| "loss": 0.10585907697677613, |
| "step": 2120 |
| }, |
| { |
| "epoch": 2.4013528748590756, |
| "grad_norm": 0.0023855555336922407, |
| "learning_rate": 4.075067024128686e-06, |
| "loss": 0.00016728861955925823, |
| "step": 2130 |
| }, |
| { |
| "epoch": 2.4126268320180384, |
| "grad_norm": 0.003139745444059372, |
| "learning_rate": 3.998468019915742e-06, |
| "loss": 0.0001855495385825634, |
| "step": 2140 |
| }, |
| { |
| "epoch": 2.4239007891770012, |
| "grad_norm": 0.0026146220043301582, |
| "learning_rate": 3.921869015702796e-06, |
| "loss": 0.0001866686507128179, |
| "step": 2150 |
| }, |
| { |
| "epoch": 2.435174746335964, |
| "grad_norm": 0.0025597689673304558, |
| "learning_rate": 3.845270011489851e-06, |
| "loss": 0.00018023275770246984, |
| "step": 2160 |
| }, |
| { |
| "epoch": 2.4464487034949265, |
| "grad_norm": 0.002527853474020958, |
| "learning_rate": 3.7686710072769056e-06, |
| "loss": 0.00019359666621312499, |
| "step": 2170 |
| }, |
| { |
| "epoch": 2.4577226606538893, |
| "grad_norm": 0.0018621304770931602, |
| "learning_rate": 3.6920720030639607e-06, |
| "loss": 0.0002490927465260029, |
| "step": 2180 |
| }, |
| { |
| "epoch": 2.468996617812852, |
| "grad_norm": 0.004213541280478239, |
| "learning_rate": 3.6154729988510153e-06, |
| "loss": 0.0001960037974640727, |
| "step": 2190 |
| }, |
| { |
| "epoch": 2.480270574971815, |
| "grad_norm": 0.00442644115537405, |
| "learning_rate": 3.53887399463807e-06, |
| "loss": 0.005311020836234093, |
| "step": 2200 |
| }, |
| { |
| "epoch": 2.491544532130778, |
| "grad_norm": 0.0024214547593146563, |
| "learning_rate": 3.4622749904251246e-06, |
| "loss": 0.00021092374809086322, |
| "step": 2210 |
| }, |
| { |
| "epoch": 2.5028184892897407, |
| "grad_norm": 0.04687405005097389, |
| "learning_rate": 3.3856759862121796e-06, |
| "loss": 0.00021563491318374873, |
| "step": 2220 |
| }, |
| { |
| "epoch": 2.5140924464487036, |
| "grad_norm": 0.0022504194639623165, |
| "learning_rate": 3.3090769819992342e-06, |
| "loss": 0.0001966242678463459, |
| "step": 2230 |
| }, |
| { |
| "epoch": 2.5253664036076664, |
| "grad_norm": 0.0021047424525022507, |
| "learning_rate": 3.2324779777862893e-06, |
| "loss": 0.044666323065757754, |
| "step": 2240 |
| }, |
| { |
| "epoch": 2.5366403607666292, |
| "grad_norm": 0.012675349600613117, |
| "learning_rate": 3.1558789735733435e-06, |
| "loss": 0.00024262876249849796, |
| "step": 2250 |
| }, |
| { |
| "epoch": 2.547914317925592, |
| "grad_norm": 0.0033559410367161036, |
| "learning_rate": 3.0792799693603985e-06, |
| "loss": 0.0001711784047074616, |
| "step": 2260 |
| }, |
| { |
| "epoch": 2.559188275084555, |
| "grad_norm": 0.003151422832161188, |
| "learning_rate": 3.002680965147453e-06, |
| "loss": 0.00019296318059787155, |
| "step": 2270 |
| }, |
| { |
| "epoch": 2.5704622322435173, |
| "grad_norm": 0.003021210664883256, |
| "learning_rate": 2.926081960934508e-06, |
| "loss": 0.00021011463832110167, |
| "step": 2280 |
| }, |
| { |
| "epoch": 2.58173618940248, |
| "grad_norm": 0.002194963628426194, |
| "learning_rate": 2.849482956721563e-06, |
| "loss": 0.0001936237793415785, |
| "step": 2290 |
| }, |
| { |
| "epoch": 2.593010146561443, |
| "grad_norm": 0.0019431166583672166, |
| "learning_rate": 2.772883952508618e-06, |
| "loss": 0.00018298645736649632, |
| "step": 2300 |
| }, |
| { |
| "epoch": 2.604284103720406, |
| "grad_norm": 0.0022242714185267687, |
| "learning_rate": 2.696284948295672e-06, |
| "loss": 0.00020617968402802944, |
| "step": 2310 |
| }, |
| { |
| "epoch": 2.6155580608793687, |
| "grad_norm": 0.0025375671684741974, |
| "learning_rate": 2.619685944082727e-06, |
| "loss": 0.000198179273866117, |
| "step": 2320 |
| }, |
| { |
| "epoch": 2.6268320180383316, |
| "grad_norm": 0.0024469804484397173, |
| "learning_rate": 2.543086939869782e-06, |
| "loss": 0.0001969418255612254, |
| "step": 2330 |
| }, |
| { |
| "epoch": 2.6381059751972944, |
| "grad_norm": 0.0028019780293107033, |
| "learning_rate": 2.4664879356568368e-06, |
| "loss": 0.00028534492012113335, |
| "step": 2340 |
| }, |
| { |
| "epoch": 2.649379932356257, |
| "grad_norm": 0.0021919722203165293, |
| "learning_rate": 2.3898889314438914e-06, |
| "loss": 0.0001910912338644266, |
| "step": 2350 |
| }, |
| { |
| "epoch": 2.6606538895152196, |
| "grad_norm": 0.006022196263074875, |
| "learning_rate": 2.313289927230946e-06, |
| "loss": 0.00019817908760160207, |
| "step": 2360 |
| }, |
| { |
| "epoch": 2.6719278466741825, |
| "grad_norm": 0.0033763679675757885, |
| "learning_rate": 2.236690923018001e-06, |
| "loss": 0.00017044605920091271, |
| "step": 2370 |
| }, |
| { |
| "epoch": 2.6832018038331453, |
| "grad_norm": 0.007944841869175434, |
| "learning_rate": 2.1600919188050557e-06, |
| "loss": 0.0001895905937999487, |
| "step": 2380 |
| }, |
| { |
| "epoch": 2.694475760992108, |
| "grad_norm": 0.0026104075368493795, |
| "learning_rate": 2.0834929145921103e-06, |
| "loss": 0.0002025722526013851, |
| "step": 2390 |
| }, |
| { |
| "epoch": 2.705749718151071, |
| "grad_norm": 0.0022773994132876396, |
| "learning_rate": 2.0068939103791653e-06, |
| "loss": 0.00020347356330603362, |
| "step": 2400 |
| }, |
| { |
| "epoch": 2.717023675310034, |
| "grad_norm": 0.002472513820976019, |
| "learning_rate": 1.93029490616622e-06, |
| "loss": 0.00018234442686662077, |
| "step": 2410 |
| }, |
| { |
| "epoch": 2.7282976324689967, |
| "grad_norm": 0.002856658538803458, |
| "learning_rate": 1.8536959019532746e-06, |
| "loss": 0.00017694416455924511, |
| "step": 2420 |
| }, |
| { |
| "epoch": 2.7395715896279595, |
| "grad_norm": 0.0023991933558136225, |
| "learning_rate": 1.7770968977403294e-06, |
| "loss": 0.00017012828029692174, |
| "step": 2430 |
| }, |
| { |
| "epoch": 2.7508455467869224, |
| "grad_norm": 0.0018577013397589326, |
| "learning_rate": 1.700497893527384e-06, |
| "loss": 0.00019613306503742933, |
| "step": 2440 |
| }, |
| { |
| "epoch": 2.7621195039458852, |
| "grad_norm": 0.0023332657292485237, |
| "learning_rate": 1.623898889314439e-06, |
| "loss": 0.00018154056742787362, |
| "step": 2450 |
| }, |
| { |
| "epoch": 2.773393461104848, |
| "grad_norm": 0.00224124058149755, |
| "learning_rate": 1.547299885101494e-06, |
| "loss": 0.00017112611094489695, |
| "step": 2460 |
| }, |
| { |
| "epoch": 2.7846674182638105, |
| "grad_norm": 0.0025407865177839994, |
| "learning_rate": 1.4707008808885488e-06, |
| "loss": 0.0033345352858304977, |
| "step": 2470 |
| }, |
| { |
| "epoch": 2.7959413754227733, |
| "grad_norm": 0.002220644848421216, |
| "learning_rate": 1.3941018766756034e-06, |
| "loss": 0.0001717488979920745, |
| "step": 2480 |
| }, |
| { |
| "epoch": 2.807215332581736, |
| "grad_norm": 0.01924099028110504, |
| "learning_rate": 1.3175028724626582e-06, |
| "loss": 0.00020746220834553242, |
| "step": 2490 |
| }, |
| { |
| "epoch": 2.818489289740699, |
| "grad_norm": 0.002599391620606184, |
| "learning_rate": 1.2409038682497128e-06, |
| "loss": 0.00016747122863307594, |
| "step": 2500 |
| }, |
| { |
| "epoch": 2.829763246899662, |
| "grad_norm": 0.6239835619926453, |
| "learning_rate": 1.1643048640367677e-06, |
| "loss": 0.00034458544105291365, |
| "step": 2510 |
| }, |
| { |
| "epoch": 2.8410372040586247, |
| "grad_norm": 0.002947124419733882, |
| "learning_rate": 1.0877058598238225e-06, |
| "loss": 0.00016304129967465998, |
| "step": 2520 |
| }, |
| { |
| "epoch": 2.852311161217587, |
| "grad_norm": 0.002417457289993763, |
| "learning_rate": 1.0111068556108771e-06, |
| "loss": 0.00016610570019111038, |
| "step": 2530 |
| }, |
| { |
| "epoch": 2.86358511837655, |
| "grad_norm": 0.0022089367266744375, |
| "learning_rate": 9.34507851397932e-07, |
| "loss": 0.00023457645438611506, |
| "step": 2540 |
| }, |
| { |
| "epoch": 2.874859075535513, |
| "grad_norm": 0.002480805618688464, |
| "learning_rate": 8.579088471849867e-07, |
| "loss": 0.00017723250202834607, |
| "step": 2550 |
| }, |
| { |
| "epoch": 2.8861330326944756, |
| "grad_norm": 0.0019805266056209803, |
| "learning_rate": 7.813098429720414e-07, |
| "loss": 0.00017158776754513382, |
| "step": 2560 |
| }, |
| { |
| "epoch": 2.8974069898534385, |
| "grad_norm": 0.0019271186320111156, |
| "learning_rate": 7.047108387590961e-07, |
| "loss": 0.0001926283701322973, |
| "step": 2570 |
| }, |
| { |
| "epoch": 2.9086809470124013, |
| "grad_norm": 0.002350042574107647, |
| "learning_rate": 6.28111834546151e-07, |
| "loss": 0.00016755717806518077, |
| "step": 2580 |
| }, |
| { |
| "epoch": 2.919954904171364, |
| "grad_norm": 0.0027232232969254255, |
| "learning_rate": 5.515128303332057e-07, |
| "loss": 0.00016589296283200383, |
| "step": 2590 |
| }, |
| { |
| "epoch": 2.931228861330327, |
| "grad_norm": 0.00370240886695683, |
| "learning_rate": 4.749138261202605e-07, |
| "loss": 0.0001985297305509448, |
| "step": 2600 |
| }, |
| { |
| "epoch": 2.94250281848929, |
| "grad_norm": 0.00637893145903945, |
| "learning_rate": 3.9831482190731527e-07, |
| "loss": 0.01036590412259102, |
| "step": 2610 |
| }, |
| { |
| "epoch": 2.9537767756482527, |
| "grad_norm": 0.0027874845545738935, |
| "learning_rate": 3.2171581769437e-07, |
| "loss": 0.000199174671433866, |
| "step": 2620 |
| }, |
| { |
| "epoch": 2.9650507328072155, |
| "grad_norm": 0.002098528202623129, |
| "learning_rate": 2.451168134814248e-07, |
| "loss": 0.00017001798842102288, |
| "step": 2630 |
| }, |
| { |
| "epoch": 2.9763246899661784, |
| "grad_norm": 0.003855147399008274, |
| "learning_rate": 1.685178092684795e-07, |
| "loss": 0.0009324193932116031, |
| "step": 2640 |
| }, |
| { |
| "epoch": 2.987598647125141, |
| "grad_norm": 0.003377134446054697, |
| "learning_rate": 9.19188050555343e-08, |
| "loss": 0.00022254332434386016, |
| "step": 2650 |
| }, |
| { |
| "epoch": 2.9988726042841036, |
| "grad_norm": 0.001944106537848711, |
| "learning_rate": 1.531980084258905e-08, |
| "loss": 0.00032243425957858565, |
| "step": 2660 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_avg_accuracy": 0.9992947813822285, |
| "eval_explanation_accuracy": 0.9991537376586742, |
| "eval_explanation_f1": 0.9983397897066962, |
| "eval_loss": 0.008407124318182468, |
| "eval_refusal_accuracy": 0.9994358251057828, |
| "eval_refusal_f1": 0.9994359842075579, |
| "eval_runtime": 8.1788, |
| "eval_samples_per_second": 433.439, |
| "eval_steps_per_second": 27.143, |
| "step": 2661 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 2661, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|