{ "best_global_step": 23254, "best_metric": 0.8739344728917919, "best_model_checkpoint": "output/QA-DeBERTa-v3-base-binary/checkpoint-23254", "epoch": 3.750887154009936, "eval_steps": 2114, "global_step": 31710, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0011828720132481666, "grad_norm": 1.452476978302002, "learning_rate": 5.4e-08, "loss": 0.6892, "step": 10 }, { "epoch": 0.0023657440264963333, "grad_norm": 1.3107129335403442, "learning_rate": 1.14e-07, "loss": 0.6895, "step": 20 }, { "epoch": 0.0035486160397444995, "grad_norm": 1.5076279640197754, "learning_rate": 1.7400000000000002e-07, "loss": 0.6912, "step": 30 }, { "epoch": 0.0047314880529926665, "grad_norm": 0.7335036993026733, "learning_rate": 2.34e-07, "loss": 0.6838, "step": 40 }, { "epoch": 0.005914360066240833, "grad_norm": 1.0573631525039673, "learning_rate": 2.94e-07, "loss": 0.692, "step": 50 }, { "epoch": 0.007097232079488999, "grad_norm": 0.5848931670188904, "learning_rate": 3.5399999999999997e-07, "loss": 0.6922, "step": 60 }, { "epoch": 0.008280104092737165, "grad_norm": 0.5945519804954529, "learning_rate": 4.1400000000000003e-07, "loss": 0.691, "step": 70 }, { "epoch": 0.009462976105985333, "grad_norm": 0.7829030156135559, "learning_rate": 4.7400000000000004e-07, "loss": 0.6774, "step": 80 }, { "epoch": 0.0106458481192335, "grad_norm": 0.6574153304100037, "learning_rate": 5.34e-07, "loss": 0.6908, "step": 90 }, { "epoch": 0.011828720132481665, "grad_norm": 0.9069286584854126, "learning_rate": 5.94e-07, "loss": 0.6826, "step": 100 }, { "epoch": 0.013011592145729832, "grad_norm": 0.7912967801094055, "learning_rate": 6.54e-07, "loss": 0.6919, "step": 110 }, { "epoch": 0.014194464158977998, "grad_norm": 1.0637558698654175, "learning_rate": 7.14e-07, "loss": 0.6882, "step": 120 }, { "epoch": 0.015377336172226166, "grad_norm": 0.7037789821624756, "learning_rate": 7.74e-07, "loss": 0.6886, "step": 130 }, { "epoch": 0.01656020818547433, "grad_norm": 0.830253005027771, "learning_rate": 8.340000000000001e-07, "loss": 0.6949, "step": 140 }, { "epoch": 0.017743080198722498, "grad_norm": 1.0448371171951294, "learning_rate": 8.939999999999999e-07, "loss": 0.6897, "step": 150 }, { "epoch": 0.018925952211970666, "grad_norm": 0.7149848341941833, "learning_rate": 9.54e-07, "loss": 0.6944, "step": 160 }, { "epoch": 0.02010882422521883, "grad_norm": 0.8854143023490906, "learning_rate": 1.0140000000000002e-06, "loss": 0.6905, "step": 170 }, { "epoch": 0.021291696238467, "grad_norm": 0.5680384039878845, "learning_rate": 1.074e-06, "loss": 0.6829, "step": 180 }, { "epoch": 0.022474568251715163, "grad_norm": 0.9411191940307617, "learning_rate": 1.134e-06, "loss": 0.6861, "step": 190 }, { "epoch": 0.02365744026496333, "grad_norm": 0.7921398878097534, "learning_rate": 1.1940000000000001e-06, "loss": 0.6854, "step": 200 }, { "epoch": 0.0248403122782115, "grad_norm": 0.7178440093994141, "learning_rate": 1.254e-06, "loss": 0.6839, "step": 210 }, { "epoch": 0.026023184291459663, "grad_norm": 0.8898746371269226, "learning_rate": 1.314e-06, "loss": 0.6703, "step": 220 }, { "epoch": 0.02720605630470783, "grad_norm": 0.9438018202781677, "learning_rate": 1.374e-06, "loss": 0.6892, "step": 230 }, { "epoch": 0.028388928317955996, "grad_norm": 1.8930295705795288, "learning_rate": 1.434e-06, "loss": 0.6877, "step": 240 }, { "epoch": 0.029571800331204164, "grad_norm": 0.9319143891334534, "learning_rate": 1.494e-06, "loss": 0.69, "step": 250 }, { "epoch": 0.03075467234445233, "grad_norm": 0.824596107006073, "learning_rate": 1.554e-06, "loss": 0.6712, "step": 260 }, { "epoch": 0.0319375443577005, "grad_norm": 1.2915641069412231, "learning_rate": 1.6140000000000001e-06, "loss": 0.6642, "step": 270 }, { "epoch": 0.03312041637094866, "grad_norm": 1.0681672096252441, "learning_rate": 1.6740000000000002e-06, "loss": 0.6626, "step": 280 }, { "epoch": 0.03430328838419683, "grad_norm": 0.9478822946548462, "learning_rate": 1.7339999999999998e-06, "loss": 0.6621, "step": 290 }, { "epoch": 0.035486160397444996, "grad_norm": 1.407970905303955, "learning_rate": 1.7939999999999999e-06, "loss": 0.6333, "step": 300 }, { "epoch": 0.036669032410693164, "grad_norm": 1.8239948749542236, "learning_rate": 1.854e-06, "loss": 0.6293, "step": 310 }, { "epoch": 0.03785190442394133, "grad_norm": 1.1743783950805664, "learning_rate": 1.9140000000000002e-06, "loss": 0.5745, "step": 320 }, { "epoch": 0.03903477643718949, "grad_norm": 2.4400320053100586, "learning_rate": 1.974e-06, "loss": 0.6148, "step": 330 }, { "epoch": 0.04021764845043766, "grad_norm": 2.1994576454162598, "learning_rate": 2.0340000000000003e-06, "loss": 0.5938, "step": 340 }, { "epoch": 0.04140052046368583, "grad_norm": 6.897472858428955, "learning_rate": 2.0939999999999998e-06, "loss": 0.5736, "step": 350 }, { "epoch": 0.042583392476934, "grad_norm": 2.3344879150390625, "learning_rate": 2.154e-06, "loss": 0.5793, "step": 360 }, { "epoch": 0.043766264490182165, "grad_norm": 1.583277702331543, "learning_rate": 2.214e-06, "loss": 0.5565, "step": 370 }, { "epoch": 0.044949136503430326, "grad_norm": 1.6724634170532227, "learning_rate": 2.274e-06, "loss": 0.556, "step": 380 }, { "epoch": 0.046132008516678494, "grad_norm": 2.3909051418304443, "learning_rate": 2.334e-06, "loss": 0.5604, "step": 390 }, { "epoch": 0.04731488052992666, "grad_norm": 1.8554753065109253, "learning_rate": 2.3940000000000003e-06, "loss": 0.5495, "step": 400 }, { "epoch": 0.04849775254317483, "grad_norm": 3.570477247238159, "learning_rate": 2.4539999999999997e-06, "loss": 0.5379, "step": 410 }, { "epoch": 0.049680624556423, "grad_norm": 3.0133328437805176, "learning_rate": 2.514e-06, "loss": 0.5462, "step": 420 }, { "epoch": 0.05086349656967116, "grad_norm": 2.053877830505371, "learning_rate": 2.574e-06, "loss": 0.5224, "step": 430 }, { "epoch": 0.052046368582919326, "grad_norm": 3.332078218460083, "learning_rate": 2.634e-06, "loss": 0.5472, "step": 440 }, { "epoch": 0.053229240596167494, "grad_norm": 3.0517325401306152, "learning_rate": 2.694e-06, "loss": 0.5352, "step": 450 }, { "epoch": 0.05441211260941566, "grad_norm": 3.1004769802093506, "learning_rate": 2.7540000000000002e-06, "loss": 0.5301, "step": 460 }, { "epoch": 0.05559498462266383, "grad_norm": 2.9489076137542725, "learning_rate": 2.814e-06, "loss": 0.5352, "step": 470 }, { "epoch": 0.05677785663591199, "grad_norm": 2.8148906230926514, "learning_rate": 2.874e-06, "loss": 0.518, "step": 480 }, { "epoch": 0.05796072864916016, "grad_norm": 1.5071579217910767, "learning_rate": 2.934e-06, "loss": 0.5416, "step": 490 }, { "epoch": 0.05914360066240833, "grad_norm": 3.4842145442962646, "learning_rate": 2.994e-06, "loss": 0.5383, "step": 500 }, { "epoch": 0.060326472675656495, "grad_norm": 3.9251625537872314, "learning_rate": 3.0540000000000003e-06, "loss": 0.4893, "step": 510 }, { "epoch": 0.06150934468890466, "grad_norm": 3.9749224185943604, "learning_rate": 3.114e-06, "loss": 0.4967, "step": 520 }, { "epoch": 0.06269221670215283, "grad_norm": 2.711362361907959, "learning_rate": 3.1740000000000004e-06, "loss": 0.4955, "step": 530 }, { "epoch": 0.063875088715401, "grad_norm": 10.314888954162598, "learning_rate": 3.2340000000000003e-06, "loss": 0.493, "step": 540 }, { "epoch": 0.06505796072864917, "grad_norm": 4.265482425689697, "learning_rate": 3.294e-06, "loss": 0.4981, "step": 550 }, { "epoch": 0.06624083274189732, "grad_norm": 4.920806407928467, "learning_rate": 3.3540000000000004e-06, "loss": 0.496, "step": 560 }, { "epoch": 0.06742370475514549, "grad_norm": 4.876100063323975, "learning_rate": 3.414e-06, "loss": 0.512, "step": 570 }, { "epoch": 0.06860657676839366, "grad_norm": 2.3723716735839844, "learning_rate": 3.4739999999999997e-06, "loss": 0.5073, "step": 580 }, { "epoch": 0.06978944878164182, "grad_norm": 2.6958115100860596, "learning_rate": 3.534e-06, "loss": 0.5293, "step": 590 }, { "epoch": 0.07097232079488999, "grad_norm": 3.673887014389038, "learning_rate": 3.594e-06, "loss": 0.4858, "step": 600 }, { "epoch": 0.07215519280813816, "grad_norm": 3.069746255874634, "learning_rate": 3.654e-06, "loss": 0.4787, "step": 610 }, { "epoch": 0.07333806482138633, "grad_norm": 1.7422595024108887, "learning_rate": 3.714e-06, "loss": 0.4961, "step": 620 }, { "epoch": 0.0745209368346345, "grad_norm": 3.383603572845459, "learning_rate": 3.774e-06, "loss": 0.5233, "step": 630 }, { "epoch": 0.07570380884788266, "grad_norm": 6.0858869552612305, "learning_rate": 3.834e-06, "loss": 0.4821, "step": 640 }, { "epoch": 0.07688668086113083, "grad_norm": 2.8031091690063477, "learning_rate": 3.894e-06, "loss": 0.5011, "step": 650 }, { "epoch": 0.07806955287437899, "grad_norm": 2.313732147216797, "learning_rate": 3.954000000000001e-06, "loss": 0.4887, "step": 660 }, { "epoch": 0.07925242488762715, "grad_norm": 3.6443533897399902, "learning_rate": 4.014e-06, "loss": 0.459, "step": 670 }, { "epoch": 0.08043529690087532, "grad_norm": 4.758196830749512, "learning_rate": 4.074e-06, "loss": 0.4168, "step": 680 }, { "epoch": 0.08161816891412349, "grad_norm": 2.776780366897583, "learning_rate": 4.134e-06, "loss": 0.4426, "step": 690 }, { "epoch": 0.08280104092737166, "grad_norm": 2.4684460163116455, "learning_rate": 4.194e-06, "loss": 0.4241, "step": 700 }, { "epoch": 0.08398391294061983, "grad_norm": 2.3552892208099365, "learning_rate": 4.254e-06, "loss": 0.4329, "step": 710 }, { "epoch": 0.085166784953868, "grad_norm": 3.4647741317749023, "learning_rate": 4.314e-06, "loss": 0.4323, "step": 720 }, { "epoch": 0.08634965696711616, "grad_norm": 2.714560031890869, "learning_rate": 4.374e-06, "loss": 0.4453, "step": 730 }, { "epoch": 0.08753252898036433, "grad_norm": 3.3246028423309326, "learning_rate": 4.434e-06, "loss": 0.4311, "step": 740 }, { "epoch": 0.0887154009936125, "grad_norm": 2.8064894676208496, "learning_rate": 4.4940000000000005e-06, "loss": 0.3956, "step": 750 }, { "epoch": 0.08989827300686065, "grad_norm": 4.829779624938965, "learning_rate": 4.554e-06, "loss": 0.4217, "step": 760 }, { "epoch": 0.09108114502010882, "grad_norm": 4.982199668884277, "learning_rate": 4.614e-06, "loss": 0.4535, "step": 770 }, { "epoch": 0.09226401703335699, "grad_norm": 4.116279125213623, "learning_rate": 4.6740000000000005e-06, "loss": 0.4447, "step": 780 }, { "epoch": 0.09344688904660516, "grad_norm": 3.007605791091919, "learning_rate": 4.734e-06, "loss": 0.4308, "step": 790 }, { "epoch": 0.09462976105985332, "grad_norm": 6.277472972869873, "learning_rate": 4.794e-06, "loss": 0.4705, "step": 800 }, { "epoch": 0.09581263307310149, "grad_norm": 2.7225468158721924, "learning_rate": 4.8540000000000005e-06, "loss": 0.4687, "step": 810 }, { "epoch": 0.09699550508634966, "grad_norm": 3.167908191680908, "learning_rate": 4.914e-06, "loss": 0.4203, "step": 820 }, { "epoch": 0.09817837709959783, "grad_norm": 3.6750435829162598, "learning_rate": 4.974e-06, "loss": 0.424, "step": 830 }, { "epoch": 0.099361249112846, "grad_norm": 5.332252025604248, "learning_rate": 5.0339999999999996e-06, "loss": 0.4211, "step": 840 }, { "epoch": 0.10054412112609416, "grad_norm": 6.136302471160889, "learning_rate": 5.094e-06, "loss": 0.4367, "step": 850 }, { "epoch": 0.10172699313934232, "grad_norm": 4.148458003997803, "learning_rate": 5.154e-06, "loss": 0.4384, "step": 860 }, { "epoch": 0.10290986515259049, "grad_norm": 4.16589879989624, "learning_rate": 5.214e-06, "loss": 0.4513, "step": 870 }, { "epoch": 0.10409273716583865, "grad_norm": 3.297203302383423, "learning_rate": 5.274e-06, "loss": 0.4404, "step": 880 }, { "epoch": 0.10527560917908682, "grad_norm": 4.277662754058838, "learning_rate": 5.334e-06, "loss": 0.4283, "step": 890 }, { "epoch": 0.10645848119233499, "grad_norm": 2.4507994651794434, "learning_rate": 5.394e-06, "loss": 0.4375, "step": 900 }, { "epoch": 0.10764135320558316, "grad_norm": 2.6106863021850586, "learning_rate": 5.454000000000001e-06, "loss": 0.4293, "step": 910 }, { "epoch": 0.10882422521883132, "grad_norm": 2.76210618019104, "learning_rate": 5.514e-06, "loss": 0.4118, "step": 920 }, { "epoch": 0.11000709723207949, "grad_norm": 6.881728649139404, "learning_rate": 5.574e-06, "loss": 0.3949, "step": 930 }, { "epoch": 0.11118996924532766, "grad_norm": 4.583062171936035, "learning_rate": 5.634e-06, "loss": 0.4289, "step": 940 }, { "epoch": 0.11237284125857583, "grad_norm": 3.283390522003174, "learning_rate": 5.694e-06, "loss": 0.4056, "step": 950 }, { "epoch": 0.11355571327182398, "grad_norm": 5.579883575439453, "learning_rate": 5.754e-06, "loss": 0.4345, "step": 960 }, { "epoch": 0.11473858528507215, "grad_norm": 2.8114514350891113, "learning_rate": 5.814e-06, "loss": 0.4077, "step": 970 }, { "epoch": 0.11592145729832032, "grad_norm": 2.615882396697998, "learning_rate": 5.874e-06, "loss": 0.3869, "step": 980 }, { "epoch": 0.11710432931156849, "grad_norm": 2.7312803268432617, "learning_rate": 5.934e-06, "loss": 0.3984, "step": 990 }, { "epoch": 0.11828720132481665, "grad_norm": 2.6970901489257812, "learning_rate": 5.9940000000000005e-06, "loss": 0.4326, "step": 1000 }, { "epoch": 0.11947007333806482, "grad_norm": 3.9762089252471924, "learning_rate": 5.999353603064401e-06, "loss": 0.4402, "step": 1010 }, { "epoch": 0.12065294535131299, "grad_norm": 3.597429037094116, "learning_rate": 5.998635384247068e-06, "loss": 0.4223, "step": 1020 }, { "epoch": 0.12183581736456116, "grad_norm": 4.2126617431640625, "learning_rate": 5.997917165429735e-06, "loss": 0.3894, "step": 1030 }, { "epoch": 0.12301868937780933, "grad_norm": 3.482478618621826, "learning_rate": 5.9971989466124016e-06, "loss": 0.416, "step": 1040 }, { "epoch": 0.1242015613910575, "grad_norm": 4.943940162658691, "learning_rate": 5.9964807277950685e-06, "loss": 0.4042, "step": 1050 }, { "epoch": 0.12538443340430566, "grad_norm": 3.6237728595733643, "learning_rate": 5.995762508977735e-06, "loss": 0.4751, "step": 1060 }, { "epoch": 0.12656730541755382, "grad_norm": 3.268051862716675, "learning_rate": 5.995044290160402e-06, "loss": 0.4226, "step": 1070 }, { "epoch": 0.127750177430802, "grad_norm": 3.9961740970611572, "learning_rate": 5.994326071343069e-06, "loss": 0.4644, "step": 1080 }, { "epoch": 0.12893304944405015, "grad_norm": 4.443755149841309, "learning_rate": 5.993607852525736e-06, "loss": 0.4504, "step": 1090 }, { "epoch": 0.13011592145729833, "grad_norm": 5.0579657554626465, "learning_rate": 5.992889633708403e-06, "loss": 0.416, "step": 1100 }, { "epoch": 0.1312987934705465, "grad_norm": 2.979658365249634, "learning_rate": 5.99217141489107e-06, "loss": 0.4218, "step": 1110 }, { "epoch": 0.13248166548379464, "grad_norm": 3.2593445777893066, "learning_rate": 5.991453196073737e-06, "loss": 0.4365, "step": 1120 }, { "epoch": 0.13366453749704282, "grad_norm": 3.9376895427703857, "learning_rate": 5.990734977256405e-06, "loss": 0.426, "step": 1130 }, { "epoch": 0.13484740951029098, "grad_norm": 3.19195818901062, "learning_rate": 5.990016758439071e-06, "loss": 0.3864, "step": 1140 }, { "epoch": 0.13603028152353916, "grad_norm": 3.565159320831299, "learning_rate": 5.9892985396217386e-06, "loss": 0.3965, "step": 1150 }, { "epoch": 0.1372131535367873, "grad_norm": 3.654405355453491, "learning_rate": 5.988580320804405e-06, "loss": 0.4216, "step": 1160 }, { "epoch": 0.1383960255500355, "grad_norm": 2.9028587341308594, "learning_rate": 5.987862101987072e-06, "loss": 0.4401, "step": 1170 }, { "epoch": 0.13957889756328365, "grad_norm": 3.0010299682617188, "learning_rate": 5.987143883169739e-06, "loss": 0.4155, "step": 1180 }, { "epoch": 0.14076176957653183, "grad_norm": 3.2944576740264893, "learning_rate": 5.986425664352406e-06, "loss": 0.4355, "step": 1190 }, { "epoch": 0.14194464158977999, "grad_norm": 2.1551876068115234, "learning_rate": 5.985707445535073e-06, "loss": 0.3769, "step": 1200 }, { "epoch": 0.14312751360302814, "grad_norm": 2.915942668914795, "learning_rate": 5.98498922671774e-06, "loss": 0.4033, "step": 1210 }, { "epoch": 0.14431038561627632, "grad_norm": 7.247549533843994, "learning_rate": 5.984271007900407e-06, "loss": 0.4371, "step": 1220 }, { "epoch": 0.14549325762952448, "grad_norm": 6.543298721313477, "learning_rate": 5.983552789083074e-06, "loss": 0.4329, "step": 1230 }, { "epoch": 0.14667612964277266, "grad_norm": 2.8402793407440186, "learning_rate": 5.982834570265742e-06, "loss": 0.4176, "step": 1240 }, { "epoch": 0.1478590016560208, "grad_norm": 3.3635027408599854, "learning_rate": 5.982116351448408e-06, "loss": 0.3844, "step": 1250 }, { "epoch": 0.149041873669269, "grad_norm": 4.37659215927124, "learning_rate": 5.9813981326310756e-06, "loss": 0.4131, "step": 1260 }, { "epoch": 0.15022474568251715, "grad_norm": 2.964122772216797, "learning_rate": 5.980679913813742e-06, "loss": 0.3912, "step": 1270 }, { "epoch": 0.15140761769576533, "grad_norm": 3.0633158683776855, "learning_rate": 5.979961694996409e-06, "loss": 0.4181, "step": 1280 }, { "epoch": 0.15259048970901348, "grad_norm": 2.4021260738372803, "learning_rate": 5.979243476179076e-06, "loss": 0.3976, "step": 1290 }, { "epoch": 0.15377336172226166, "grad_norm": 5.8716301918029785, "learning_rate": 5.978525257361743e-06, "loss": 0.3847, "step": 1300 }, { "epoch": 0.15495623373550982, "grad_norm": 3.9792373180389404, "learning_rate": 5.97780703854441e-06, "loss": 0.4625, "step": 1310 }, { "epoch": 0.15613910574875797, "grad_norm": 2.3841233253479004, "learning_rate": 5.977088819727077e-06, "loss": 0.3979, "step": 1320 }, { "epoch": 0.15732197776200615, "grad_norm": 3.4258790016174316, "learning_rate": 5.976370600909744e-06, "loss": 0.4252, "step": 1330 }, { "epoch": 0.1585048497752543, "grad_norm": 3.380558729171753, "learning_rate": 5.975652382092411e-06, "loss": 0.3861, "step": 1340 }, { "epoch": 0.1596877217885025, "grad_norm": 3.1744539737701416, "learning_rate": 5.974934163275078e-06, "loss": 0.4341, "step": 1350 }, { "epoch": 0.16087059380175064, "grad_norm": 4.253352165222168, "learning_rate": 5.974215944457745e-06, "loss": 0.4012, "step": 1360 }, { "epoch": 0.16205346581499883, "grad_norm": 3.2548880577087402, "learning_rate": 5.973497725640412e-06, "loss": 0.4238, "step": 1370 }, { "epoch": 0.16323633782824698, "grad_norm": 3.095524549484253, "learning_rate": 5.972779506823079e-06, "loss": 0.4042, "step": 1380 }, { "epoch": 0.16441920984149516, "grad_norm": 3.0992841720581055, "learning_rate": 5.9720612880057456e-06, "loss": 0.3896, "step": 1390 }, { "epoch": 0.16560208185474332, "grad_norm": 2.9010965824127197, "learning_rate": 5.971343069188413e-06, "loss": 0.375, "step": 1400 }, { "epoch": 0.16678495386799147, "grad_norm": 3.7394485473632812, "learning_rate": 5.970624850371079e-06, "loss": 0.3699, "step": 1410 }, { "epoch": 0.16796782588123965, "grad_norm": 2.4592206478118896, "learning_rate": 5.969906631553747e-06, "loss": 0.4302, "step": 1420 }, { "epoch": 0.1691506978944878, "grad_norm": 3.285252332687378, "learning_rate": 5.969188412736413e-06, "loss": 0.4148, "step": 1430 }, { "epoch": 0.170333569907736, "grad_norm": 2.6802074909210205, "learning_rate": 5.968470193919081e-06, "loss": 0.3806, "step": 1440 }, { "epoch": 0.17151644192098414, "grad_norm": 4.508257865905762, "learning_rate": 5.967751975101747e-06, "loss": 0.4043, "step": 1450 }, { "epoch": 0.17269931393423232, "grad_norm": 2.8117973804473877, "learning_rate": 5.967033756284415e-06, "loss": 0.3803, "step": 1460 }, { "epoch": 0.17388218594748048, "grad_norm": 2.628289222717285, "learning_rate": 5.966315537467082e-06, "loss": 0.3646, "step": 1470 }, { "epoch": 0.17506505796072866, "grad_norm": 2.9634876251220703, "learning_rate": 5.965597318649749e-06, "loss": 0.3857, "step": 1480 }, { "epoch": 0.1762479299739768, "grad_norm": 6.016693115234375, "learning_rate": 5.9648790998324165e-06, "loss": 0.4257, "step": 1490 }, { "epoch": 0.177430801987225, "grad_norm": 3.9821629524230957, "learning_rate": 5.9641608810150826e-06, "loss": 0.408, "step": 1500 }, { "epoch": 0.17861367400047315, "grad_norm": 3.5072484016418457, "learning_rate": 5.96344266219775e-06, "loss": 0.372, "step": 1510 }, { "epoch": 0.1797965460137213, "grad_norm": 5.004875659942627, "learning_rate": 5.962724443380416e-06, "loss": 0.3937, "step": 1520 }, { "epoch": 0.18097941802696949, "grad_norm": 2.6555981636047363, "learning_rate": 5.962006224563084e-06, "loss": 0.3813, "step": 1530 }, { "epoch": 0.18216229004021764, "grad_norm": 2.96158504486084, "learning_rate": 5.96128800574575e-06, "loss": 0.3965, "step": 1540 }, { "epoch": 0.18334516205346582, "grad_norm": 5.002642631530762, "learning_rate": 5.960569786928418e-06, "loss": 0.4257, "step": 1550 }, { "epoch": 0.18452803406671398, "grad_norm": 3.1771128177642822, "learning_rate": 5.959851568111085e-06, "loss": 0.3539, "step": 1560 }, { "epoch": 0.18571090607996216, "grad_norm": 3.9393372535705566, "learning_rate": 5.959133349293752e-06, "loss": 0.4115, "step": 1570 }, { "epoch": 0.1868937780932103, "grad_norm": 3.128077268600464, "learning_rate": 5.958415130476419e-06, "loss": 0.4221, "step": 1580 }, { "epoch": 0.1880766501064585, "grad_norm": 2.337475061416626, "learning_rate": 5.957696911659086e-06, "loss": 0.3709, "step": 1590 }, { "epoch": 0.18925952211970665, "grad_norm": 3.5397274494171143, "learning_rate": 5.956978692841753e-06, "loss": 0.4058, "step": 1600 }, { "epoch": 0.1904423941329548, "grad_norm": 2.622213363647461, "learning_rate": 5.9562604740244196e-06, "loss": 0.4133, "step": 1610 }, { "epoch": 0.19162526614620298, "grad_norm": 3.6349985599517822, "learning_rate": 5.9555422552070865e-06, "loss": 0.4253, "step": 1620 }, { "epoch": 0.19280813815945114, "grad_norm": 4.266642093658447, "learning_rate": 5.954824036389753e-06, "loss": 0.388, "step": 1630 }, { "epoch": 0.19399101017269932, "grad_norm": 3.642580986022949, "learning_rate": 5.95410581757242e-06, "loss": 0.3962, "step": 1640 }, { "epoch": 0.19517388218594747, "grad_norm": 2.2537295818328857, "learning_rate": 5.953387598755087e-06, "loss": 0.3534, "step": 1650 }, { "epoch": 0.19635675419919565, "grad_norm": 2.525167942047119, "learning_rate": 5.952669379937754e-06, "loss": 0.3834, "step": 1660 }, { "epoch": 0.1975396262124438, "grad_norm": 2.454159736633301, "learning_rate": 5.951951161120422e-06, "loss": 0.3894, "step": 1670 }, { "epoch": 0.198722498225692, "grad_norm": 1.8713299036026, "learning_rate": 5.951232942303088e-06, "loss": 0.3597, "step": 1680 }, { "epoch": 0.19990537023894014, "grad_norm": 2.86362886428833, "learning_rate": 5.950514723485756e-06, "loss": 0.3814, "step": 1690 }, { "epoch": 0.20108824225218833, "grad_norm": 2.5308420658111572, "learning_rate": 5.949796504668422e-06, "loss": 0.3884, "step": 1700 }, { "epoch": 0.20227111426543648, "grad_norm": 3.108450412750244, "learning_rate": 5.94907828585109e-06, "loss": 0.3671, "step": 1710 }, { "epoch": 0.20345398627868463, "grad_norm": 3.3343942165374756, "learning_rate": 5.948360067033756e-06, "loss": 0.3887, "step": 1720 }, { "epoch": 0.20463685829193282, "grad_norm": 4.274463653564453, "learning_rate": 5.9476418482164235e-06, "loss": 0.4278, "step": 1730 }, { "epoch": 0.20581973030518097, "grad_norm": 3.6236939430236816, "learning_rate": 5.94692362939909e-06, "loss": 0.3704, "step": 1740 }, { "epoch": 0.20700260231842915, "grad_norm": 2.8490209579467773, "learning_rate": 5.946205410581757e-06, "loss": 0.3971, "step": 1750 }, { "epoch": 0.2081854743316773, "grad_norm": 4.853827953338623, "learning_rate": 5.945487191764425e-06, "loss": 0.3956, "step": 1760 }, { "epoch": 0.2093683463449255, "grad_norm": 6.677058219909668, "learning_rate": 5.944768972947091e-06, "loss": 0.3551, "step": 1770 }, { "epoch": 0.21055121835817364, "grad_norm": 4.976045608520508, "learning_rate": 5.944050754129759e-06, "loss": 0.3735, "step": 1780 }, { "epoch": 0.21173409037142182, "grad_norm": 2.5858116149902344, "learning_rate": 5.943332535312425e-06, "loss": 0.3669, "step": 1790 }, { "epoch": 0.21291696238466998, "grad_norm": 3.8134396076202393, "learning_rate": 5.942614316495093e-06, "loss": 0.3608, "step": 1800 }, { "epoch": 0.21409983439791813, "grad_norm": 2.15547251701355, "learning_rate": 5.941896097677759e-06, "loss": 0.3849, "step": 1810 }, { "epoch": 0.2152827064111663, "grad_norm": 4.892831325531006, "learning_rate": 5.941177878860427e-06, "loss": 0.3866, "step": 1820 }, { "epoch": 0.21646557842441447, "grad_norm": 2.8356385231018066, "learning_rate": 5.9404596600430936e-06, "loss": 0.3836, "step": 1830 }, { "epoch": 0.21764845043766265, "grad_norm": 2.7952866554260254, "learning_rate": 5.9397414412257605e-06, "loss": 0.4047, "step": 1840 }, { "epoch": 0.2188313224509108, "grad_norm": 2.3543381690979004, "learning_rate": 5.939023222408427e-06, "loss": 0.3844, "step": 1850 }, { "epoch": 0.22001419446415899, "grad_norm": 2.464482307434082, "learning_rate": 5.938305003591094e-06, "loss": 0.4003, "step": 1860 }, { "epoch": 0.22119706647740714, "grad_norm": 3.2464239597320557, "learning_rate": 5.937586784773761e-06, "loss": 0.3808, "step": 1870 }, { "epoch": 0.22237993849065532, "grad_norm": 1.8649559020996094, "learning_rate": 5.936868565956428e-06, "loss": 0.3898, "step": 1880 }, { "epoch": 0.22356281050390348, "grad_norm": 3.750849723815918, "learning_rate": 5.936150347139095e-06, "loss": 0.3423, "step": 1890 }, { "epoch": 0.22474568251715166, "grad_norm": 4.348412036895752, "learning_rate": 5.935432128321762e-06, "loss": 0.3948, "step": 1900 }, { "epoch": 0.2259285545303998, "grad_norm": 3.9699575901031494, "learning_rate": 5.934713909504429e-06, "loss": 0.3497, "step": 1910 }, { "epoch": 0.22711142654364797, "grad_norm": 3.1678178310394287, "learning_rate": 5.933995690687096e-06, "loss": 0.4167, "step": 1920 }, { "epoch": 0.22829429855689615, "grad_norm": 2.6658902168273926, "learning_rate": 5.933277471869763e-06, "loss": 0.3526, "step": 1930 }, { "epoch": 0.2294771705701443, "grad_norm": 4.206330299377441, "learning_rate": 5.9325592530524306e-06, "loss": 0.4197, "step": 1940 }, { "epoch": 0.23066004258339248, "grad_norm": 2.8765106201171875, "learning_rate": 5.931841034235097e-06, "loss": 0.3971, "step": 1950 }, { "epoch": 0.23184291459664064, "grad_norm": 4.623599052429199, "learning_rate": 5.931122815417764e-06, "loss": 0.3781, "step": 1960 }, { "epoch": 0.23302578660988882, "grad_norm": 2.4479308128356934, "learning_rate": 5.9304045966004305e-06, "loss": 0.3967, "step": 1970 }, { "epoch": 0.23420865862313697, "grad_norm": 3.326092481613159, "learning_rate": 5.929686377783098e-06, "loss": 0.382, "step": 1980 }, { "epoch": 0.23539153063638515, "grad_norm": 2.986705780029297, "learning_rate": 5.928968158965765e-06, "loss": 0.4075, "step": 1990 }, { "epoch": 0.2365744026496333, "grad_norm": 3.2481510639190674, "learning_rate": 5.928249940148432e-06, "loss": 0.3972, "step": 2000 }, { "epoch": 0.23775727466288146, "grad_norm": 2.377636194229126, "learning_rate": 5.927531721331099e-06, "loss": 0.3621, "step": 2010 }, { "epoch": 0.23894014667612964, "grad_norm": 2.9155020713806152, "learning_rate": 5.926813502513766e-06, "loss": 0.3469, "step": 2020 }, { "epoch": 0.2401230186893778, "grad_norm": 4.382843017578125, "learning_rate": 5.926095283696433e-06, "loss": 0.3988, "step": 2030 }, { "epoch": 0.24130589070262598, "grad_norm": 2.594512462615967, "learning_rate": 5.9253770648791e-06, "loss": 0.3548, "step": 2040 }, { "epoch": 0.24248876271587413, "grad_norm": 2.9058191776275635, "learning_rate": 5.9246588460617676e-06, "loss": 0.3762, "step": 2050 }, { "epoch": 0.24367163472912232, "grad_norm": 2.5409326553344727, "learning_rate": 5.923940627244434e-06, "loss": 0.3736, "step": 2060 }, { "epoch": 0.24485450674237047, "grad_norm": 3.0224828720092773, "learning_rate": 5.923222408427101e-06, "loss": 0.3646, "step": 2070 }, { "epoch": 0.24603737875561865, "grad_norm": 4.444828987121582, "learning_rate": 5.9225041896097675e-06, "loss": 0.4032, "step": 2080 }, { "epoch": 0.2472202507688668, "grad_norm": 2.7398457527160645, "learning_rate": 5.921785970792435e-06, "loss": 0.3379, "step": 2090 }, { "epoch": 0.248403122782115, "grad_norm": 3.4850106239318848, "learning_rate": 5.921067751975102e-06, "loss": 0.3529, "step": 2100 }, { "epoch": 0.24958599479536314, "grad_norm": 4.8330464363098145, "learning_rate": 5.920349533157769e-06, "loss": 0.3224, "step": 2110 }, { "epoch": 0.2500591436006624, "eval_accuracy": 0.8331669827328076, "eval_loss": 0.37664809823036194, "eval_runtime": 77.8219, "eval_safe_aucpr": 0.880340223163281, "eval_safe_f1": 0.8234735007832714, "eval_safe_fpr": 0.2020268452363152, "eval_safe_precision": 0.7758540630182421, "eval_safe_recall": 0.8773206315868431, "eval_samples_per_second": 772.456, "eval_steps_per_second": 12.079, "eval_unsafe_aucpr": 0.9350832296948681, "eval_unsafe_f1": 0.841851297011748, "eval_unsafe_fpr": 0.12267936841315635, "eval_unsafe_precision": 0.8908356694700307, "eval_unsafe_recall": 0.7979731547636842, "step": 2114 }, { "epoch": 0.2507688668086113, "grad_norm": 2.590071201324463, "learning_rate": 5.919631314340436e-06, "loss": 0.4043, "step": 2120 }, { "epoch": 0.25195173882185945, "grad_norm": 3.865354299545288, "learning_rate": 5.918913095523103e-06, "loss": 0.4099, "step": 2130 }, { "epoch": 0.25313461083510763, "grad_norm": 2.53672456741333, "learning_rate": 5.91819487670577e-06, "loss": 0.3427, "step": 2140 }, { "epoch": 0.2543174828483558, "grad_norm": 6.158299446105957, "learning_rate": 5.917476657888437e-06, "loss": 0.3877, "step": 2150 }, { "epoch": 0.255500354861604, "grad_norm": 4.042518138885498, "learning_rate": 5.916758439071104e-06, "loss": 0.3818, "step": 2160 }, { "epoch": 0.2566832268748521, "grad_norm": 3.8671555519104004, "learning_rate": 5.916040220253771e-06, "loss": 0.3413, "step": 2170 }, { "epoch": 0.2578660988881003, "grad_norm": 5.481204986572266, "learning_rate": 5.9153220014364375e-06, "loss": 0.3718, "step": 2180 }, { "epoch": 0.2590489709013485, "grad_norm": 2.517652750015259, "learning_rate": 5.9146037826191045e-06, "loss": 0.3354, "step": 2190 }, { "epoch": 0.26023184291459667, "grad_norm": 5.2531046867370605, "learning_rate": 5.913885563801771e-06, "loss": 0.4276, "step": 2200 }, { "epoch": 0.2614147149278448, "grad_norm": 2.3096024990081787, "learning_rate": 5.913167344984439e-06, "loss": 0.412, "step": 2210 }, { "epoch": 0.262597586941093, "grad_norm": 3.40287446975708, "learning_rate": 5.912449126167105e-06, "loss": 0.3723, "step": 2220 }, { "epoch": 0.26378045895434116, "grad_norm": 3.98356556892395, "learning_rate": 5.911730907349773e-06, "loss": 0.3836, "step": 2230 }, { "epoch": 0.2649633309675893, "grad_norm": 3.054837703704834, "learning_rate": 5.911012688532439e-06, "loss": 0.3816, "step": 2240 }, { "epoch": 0.26614620298083747, "grad_norm": 3.8618481159210205, "learning_rate": 5.910294469715107e-06, "loss": 0.3949, "step": 2250 }, { "epoch": 0.26732907499408565, "grad_norm": 5.447487831115723, "learning_rate": 5.909576250897774e-06, "loss": 0.3752, "step": 2260 }, { "epoch": 0.26851194700733383, "grad_norm": 2.9153223037719727, "learning_rate": 5.908858032080441e-06, "loss": 0.3893, "step": 2270 }, { "epoch": 0.26969481902058196, "grad_norm": 2.5316803455352783, "learning_rate": 5.908139813263108e-06, "loss": 0.3725, "step": 2280 }, { "epoch": 0.27087769103383014, "grad_norm": 4.825555324554443, "learning_rate": 5.9074215944457745e-06, "loss": 0.3761, "step": 2290 }, { "epoch": 0.2720605630470783, "grad_norm": 4.993202209472656, "learning_rate": 5.9067033756284415e-06, "loss": 0.3532, "step": 2300 }, { "epoch": 0.27324343506032645, "grad_norm": 4.285012245178223, "learning_rate": 5.905985156811108e-06, "loss": 0.3674, "step": 2310 }, { "epoch": 0.2744263070735746, "grad_norm": 2.8119378089904785, "learning_rate": 5.905266937993776e-06, "loss": 0.3785, "step": 2320 }, { "epoch": 0.2756091790868228, "grad_norm": 2.9245150089263916, "learning_rate": 5.904548719176442e-06, "loss": 0.3995, "step": 2330 }, { "epoch": 0.276792051100071, "grad_norm": 3.624386787414551, "learning_rate": 5.90383050035911e-06, "loss": 0.394, "step": 2340 }, { "epoch": 0.2779749231133191, "grad_norm": 2.3951256275177, "learning_rate": 5.903112281541776e-06, "loss": 0.3655, "step": 2350 }, { "epoch": 0.2791577951265673, "grad_norm": 4.64055061340332, "learning_rate": 5.902394062724444e-06, "loss": 0.3347, "step": 2360 }, { "epoch": 0.2803406671398155, "grad_norm": 7.313331604003906, "learning_rate": 5.90167584390711e-06, "loss": 0.4117, "step": 2370 }, { "epoch": 0.28152353915306366, "grad_norm": 3.114755868911743, "learning_rate": 5.900957625089778e-06, "loss": 0.3871, "step": 2380 }, { "epoch": 0.2827064111663118, "grad_norm": 2.2430553436279297, "learning_rate": 5.900239406272445e-06, "loss": 0.3798, "step": 2390 }, { "epoch": 0.28388928317955997, "grad_norm": 2.886373519897461, "learning_rate": 5.8995211874551115e-06, "loss": 0.3363, "step": 2400 }, { "epoch": 0.28507215519280815, "grad_norm": 2.8287220001220703, "learning_rate": 5.8988029686377785e-06, "loss": 0.3655, "step": 2410 }, { "epoch": 0.2862550272060563, "grad_norm": 2.4173007011413574, "learning_rate": 5.898084749820445e-06, "loss": 0.3791, "step": 2420 }, { "epoch": 0.28743789921930446, "grad_norm": 2.5527825355529785, "learning_rate": 5.897366531003112e-06, "loss": 0.3881, "step": 2430 }, { "epoch": 0.28862077123255264, "grad_norm": 2.515178918838501, "learning_rate": 5.896648312185779e-06, "loss": 0.3542, "step": 2440 }, { "epoch": 0.2898036432458008, "grad_norm": 4.246175765991211, "learning_rate": 5.895930093368446e-06, "loss": 0.3882, "step": 2450 }, { "epoch": 0.29098651525904895, "grad_norm": 4.691845893859863, "learning_rate": 5.895211874551113e-06, "loss": 0.391, "step": 2460 }, { "epoch": 0.29216938727229713, "grad_norm": 4.220634460449219, "learning_rate": 5.89449365573378e-06, "loss": 0.3567, "step": 2470 }, { "epoch": 0.2933522592855453, "grad_norm": 4.378153324127197, "learning_rate": 5.893775436916448e-06, "loss": 0.3652, "step": 2480 }, { "epoch": 0.2945351312987935, "grad_norm": 3.2689096927642822, "learning_rate": 5.893057218099114e-06, "loss": 0.326, "step": 2490 }, { "epoch": 0.2957180033120416, "grad_norm": 2.6781718730926514, "learning_rate": 5.892338999281782e-06, "loss": 0.4234, "step": 2500 }, { "epoch": 0.2969008753252898, "grad_norm": 2.7543728351593018, "learning_rate": 5.8916207804644485e-06, "loss": 0.3553, "step": 2510 }, { "epoch": 0.298083747338538, "grad_norm": 2.7876243591308594, "learning_rate": 5.8909025616471155e-06, "loss": 0.3719, "step": 2520 }, { "epoch": 0.2992666193517861, "grad_norm": 2.540703058242798, "learning_rate": 5.890184342829782e-06, "loss": 0.3428, "step": 2530 }, { "epoch": 0.3004494913650343, "grad_norm": 2.270244598388672, "learning_rate": 5.889466124012449e-06, "loss": 0.3873, "step": 2540 }, { "epoch": 0.3016323633782825, "grad_norm": 2.754493236541748, "learning_rate": 5.888747905195116e-06, "loss": 0.3708, "step": 2550 }, { "epoch": 0.30281523539153066, "grad_norm": 3.41325306892395, "learning_rate": 5.888029686377783e-06, "loss": 0.4017, "step": 2560 }, { "epoch": 0.3039981074047788, "grad_norm": 3.529622793197632, "learning_rate": 5.88731146756045e-06, "loss": 0.3569, "step": 2570 }, { "epoch": 0.30518097941802697, "grad_norm": 4.984379291534424, "learning_rate": 5.886593248743117e-06, "loss": 0.3821, "step": 2580 }, { "epoch": 0.30636385143127515, "grad_norm": 2.7920539379119873, "learning_rate": 5.885875029925785e-06, "loss": 0.3681, "step": 2590 }, { "epoch": 0.30754672344452333, "grad_norm": 2.944453716278076, "learning_rate": 5.885156811108451e-06, "loss": 0.3617, "step": 2600 }, { "epoch": 0.30872959545777146, "grad_norm": 3.739870309829712, "learning_rate": 5.884438592291119e-06, "loss": 0.3568, "step": 2610 }, { "epoch": 0.30991246747101964, "grad_norm": 3.3336400985717773, "learning_rate": 5.883720373473785e-06, "loss": 0.3694, "step": 2620 }, { "epoch": 0.3110953394842678, "grad_norm": 5.322179794311523, "learning_rate": 5.8830021546564525e-06, "loss": 0.3779, "step": 2630 }, { "epoch": 0.31227821149751595, "grad_norm": 2.317539930343628, "learning_rate": 5.8822839358391185e-06, "loss": 0.3868, "step": 2640 }, { "epoch": 0.3134610835107641, "grad_norm": 5.140389919281006, "learning_rate": 5.881565717021786e-06, "loss": 0.3613, "step": 2650 }, { "epoch": 0.3146439555240123, "grad_norm": 2.92518949508667, "learning_rate": 5.880847498204453e-06, "loss": 0.3851, "step": 2660 }, { "epoch": 0.3158268275372605, "grad_norm": 3.6105711460113525, "learning_rate": 5.88012927938712e-06, "loss": 0.3477, "step": 2670 }, { "epoch": 0.3170096995505086, "grad_norm": 5.269661903381348, "learning_rate": 5.879411060569787e-06, "loss": 0.3607, "step": 2680 }, { "epoch": 0.3181925715637568, "grad_norm": 4.06647253036499, "learning_rate": 5.878692841752454e-06, "loss": 0.4068, "step": 2690 }, { "epoch": 0.319375443577005, "grad_norm": 2.253429412841797, "learning_rate": 5.877974622935121e-06, "loss": 0.3937, "step": 2700 }, { "epoch": 0.32055831559025316, "grad_norm": 3.5654208660125732, "learning_rate": 5.877256404117788e-06, "loss": 0.3438, "step": 2710 }, { "epoch": 0.3217411876035013, "grad_norm": 3.4543228149414062, "learning_rate": 5.876538185300455e-06, "loss": 0.4375, "step": 2720 }, { "epoch": 0.32292405961674947, "grad_norm": 4.9412522315979, "learning_rate": 5.875819966483122e-06, "loss": 0.3522, "step": 2730 }, { "epoch": 0.32410693162999765, "grad_norm": 2.722914457321167, "learning_rate": 5.875101747665789e-06, "loss": 0.386, "step": 2740 }, { "epoch": 0.3252898036432458, "grad_norm": 4.104313850402832, "learning_rate": 5.874383528848456e-06, "loss": 0.3676, "step": 2750 }, { "epoch": 0.32647267565649396, "grad_norm": 2.5414774417877197, "learning_rate": 5.873665310031123e-06, "loss": 0.3586, "step": 2760 }, { "epoch": 0.32765554766974214, "grad_norm": 4.0231218338012695, "learning_rate": 5.87294709121379e-06, "loss": 0.3902, "step": 2770 }, { "epoch": 0.3288384196829903, "grad_norm": 2.965622901916504, "learning_rate": 5.872228872396457e-06, "loss": 0.4128, "step": 2780 }, { "epoch": 0.33002129169623845, "grad_norm": 3.556389331817627, "learning_rate": 5.871510653579124e-06, "loss": 0.3614, "step": 2790 }, { "epoch": 0.33120416370948663, "grad_norm": 3.5115513801574707, "learning_rate": 5.870792434761791e-06, "loss": 0.3352, "step": 2800 }, { "epoch": 0.3323870357227348, "grad_norm": 5.717298984527588, "learning_rate": 5.870074215944458e-06, "loss": 0.4149, "step": 2810 }, { "epoch": 0.33356990773598294, "grad_norm": 3.6197214126586914, "learning_rate": 5.869355997127125e-06, "loss": 0.3499, "step": 2820 }, { "epoch": 0.3347527797492311, "grad_norm": 2.577221155166626, "learning_rate": 5.868637778309792e-06, "loss": 0.401, "step": 2830 }, { "epoch": 0.3359356517624793, "grad_norm": 3.8798322677612305, "learning_rate": 5.867919559492459e-06, "loss": 0.4022, "step": 2840 }, { "epoch": 0.3371185237757275, "grad_norm": 2.9696784019470215, "learning_rate": 5.867201340675126e-06, "loss": 0.3903, "step": 2850 }, { "epoch": 0.3383013957889756, "grad_norm": 2.462855577468872, "learning_rate": 5.866483121857793e-06, "loss": 0.3487, "step": 2860 }, { "epoch": 0.3394842678022238, "grad_norm": 4.781073570251465, "learning_rate": 5.8657649030404595e-06, "loss": 0.3934, "step": 2870 }, { "epoch": 0.340667139815472, "grad_norm": 2.6815013885498047, "learning_rate": 5.865046684223127e-06, "loss": 0.3409, "step": 2880 }, { "epoch": 0.34185001182872016, "grad_norm": 2.956439971923828, "learning_rate": 5.864328465405793e-06, "loss": 0.3573, "step": 2890 }, { "epoch": 0.3430328838419683, "grad_norm": 2.7877068519592285, "learning_rate": 5.863610246588461e-06, "loss": 0.358, "step": 2900 }, { "epoch": 0.34421575585521647, "grad_norm": 3.0387110710144043, "learning_rate": 5.862892027771127e-06, "loss": 0.4111, "step": 2910 }, { "epoch": 0.34539862786846465, "grad_norm": 2.6195383071899414, "learning_rate": 5.862173808953795e-06, "loss": 0.37, "step": 2920 }, { "epoch": 0.3465814998817128, "grad_norm": 2.8747026920318604, "learning_rate": 5.861455590136462e-06, "loss": 0.3414, "step": 2930 }, { "epoch": 0.34776437189496096, "grad_norm": 2.943514585494995, "learning_rate": 5.860737371319129e-06, "loss": 0.3947, "step": 2940 }, { "epoch": 0.34894724390820914, "grad_norm": 3.779690980911255, "learning_rate": 5.860019152501796e-06, "loss": 0.3654, "step": 2950 }, { "epoch": 0.3501301159214573, "grad_norm": 5.39602518081665, "learning_rate": 5.859300933684463e-06, "loss": 0.3774, "step": 2960 }, { "epoch": 0.35131298793470545, "grad_norm": 2.6064445972442627, "learning_rate": 5.8585827148671295e-06, "loss": 0.3975, "step": 2970 }, { "epoch": 0.3524958599479536, "grad_norm": 2.7651381492614746, "learning_rate": 5.8578644960497965e-06, "loss": 0.3907, "step": 2980 }, { "epoch": 0.3536787319612018, "grad_norm": 5.168848037719727, "learning_rate": 5.857146277232463e-06, "loss": 0.4008, "step": 2990 }, { "epoch": 0.35486160397445, "grad_norm": 3.5402283668518066, "learning_rate": 5.85642805841513e-06, "loss": 0.3633, "step": 3000 }, { "epoch": 0.3560444759876981, "grad_norm": 4.439717769622803, "learning_rate": 5.855709839597798e-06, "loss": 0.3586, "step": 3010 }, { "epoch": 0.3572273480009463, "grad_norm": 4.7232160568237305, "learning_rate": 5.854991620780465e-06, "loss": 0.4107, "step": 3020 }, { "epoch": 0.3584102200141945, "grad_norm": 4.003586292266846, "learning_rate": 5.854273401963132e-06, "loss": 0.3556, "step": 3030 }, { "epoch": 0.3595930920274426, "grad_norm": 3.4900431632995605, "learning_rate": 5.853555183145799e-06, "loss": 0.4113, "step": 3040 }, { "epoch": 0.3607759640406908, "grad_norm": 4.203525066375732, "learning_rate": 5.852836964328466e-06, "loss": 0.3604, "step": 3050 }, { "epoch": 0.36195883605393897, "grad_norm": 4.817180156707764, "learning_rate": 5.852118745511133e-06, "loss": 0.3702, "step": 3060 }, { "epoch": 0.36314170806718715, "grad_norm": 3.0071041584014893, "learning_rate": 5.8514005266938e-06, "loss": 0.3406, "step": 3070 }, { "epoch": 0.3643245800804353, "grad_norm": 2.2780613899230957, "learning_rate": 5.8506823078764665e-06, "loss": 0.3781, "step": 3080 }, { "epoch": 0.36550745209368346, "grad_norm": 2.8988680839538574, "learning_rate": 5.8499640890591335e-06, "loss": 0.3819, "step": 3090 }, { "epoch": 0.36669032410693164, "grad_norm": 5.551490783691406, "learning_rate": 5.8492458702418e-06, "loss": 0.3656, "step": 3100 }, { "epoch": 0.3678731961201798, "grad_norm": 2.538804769515991, "learning_rate": 5.848527651424467e-06, "loss": 0.4029, "step": 3110 }, { "epoch": 0.36905606813342795, "grad_norm": 2.9220805168151855, "learning_rate": 5.847809432607134e-06, "loss": 0.3703, "step": 3120 }, { "epoch": 0.37023894014667613, "grad_norm": 4.952260971069336, "learning_rate": 5.847091213789802e-06, "loss": 0.401, "step": 3130 }, { "epoch": 0.3714218121599243, "grad_norm": 2.877650260925293, "learning_rate": 5.846372994972468e-06, "loss": 0.3581, "step": 3140 }, { "epoch": 0.37260468417317244, "grad_norm": 2.4547641277313232, "learning_rate": 5.845654776155136e-06, "loss": 0.3879, "step": 3150 }, { "epoch": 0.3737875561864206, "grad_norm": 2.919623613357544, "learning_rate": 5.844936557337802e-06, "loss": 0.4104, "step": 3160 }, { "epoch": 0.3749704281996688, "grad_norm": 3.500019073486328, "learning_rate": 5.84421833852047e-06, "loss": 0.4147, "step": 3170 }, { "epoch": 0.376153300212917, "grad_norm": 2.103543758392334, "learning_rate": 5.843500119703136e-06, "loss": 0.4031, "step": 3180 }, { "epoch": 0.3773361722261651, "grad_norm": 2.6760292053222656, "learning_rate": 5.8427819008858035e-06, "loss": 0.3965, "step": 3190 }, { "epoch": 0.3785190442394133, "grad_norm": 4.392319679260254, "learning_rate": 5.8420636820684704e-06, "loss": 0.344, "step": 3200 }, { "epoch": 0.3797019162526615, "grad_norm": 2.9038119316101074, "learning_rate": 5.841345463251137e-06, "loss": 0.3519, "step": 3210 }, { "epoch": 0.3808847882659096, "grad_norm": 2.3231678009033203, "learning_rate": 5.840627244433804e-06, "loss": 0.3642, "step": 3220 }, { "epoch": 0.3820676602791578, "grad_norm": 2.466710090637207, "learning_rate": 5.839909025616471e-06, "loss": 0.4116, "step": 3230 }, { "epoch": 0.38325053229240597, "grad_norm": 3.0128297805786133, "learning_rate": 5.839190806799138e-06, "loss": 0.4079, "step": 3240 }, { "epoch": 0.38443340430565415, "grad_norm": 2.3803048133850098, "learning_rate": 5.838472587981805e-06, "loss": 0.3639, "step": 3250 }, { "epoch": 0.3856162763189023, "grad_norm": 4.2432541847229, "learning_rate": 5.837754369164473e-06, "loss": 0.3898, "step": 3260 }, { "epoch": 0.38679914833215046, "grad_norm": 3.1298515796661377, "learning_rate": 5.837036150347139e-06, "loss": 0.3861, "step": 3270 }, { "epoch": 0.38798202034539864, "grad_norm": 3.26191782951355, "learning_rate": 5.836317931529807e-06, "loss": 0.4084, "step": 3280 }, { "epoch": 0.3891648923586468, "grad_norm": 2.265130043029785, "learning_rate": 5.835599712712473e-06, "loss": 0.3743, "step": 3290 }, { "epoch": 0.39034776437189495, "grad_norm": 5.0080060958862305, "learning_rate": 5.8348814938951405e-06, "loss": 0.3658, "step": 3300 }, { "epoch": 0.3915306363851431, "grad_norm": 2.9180374145507812, "learning_rate": 5.8341632750778074e-06, "loss": 0.3638, "step": 3310 }, { "epoch": 0.3927135083983913, "grad_norm": 4.007022380828857, "learning_rate": 5.833445056260474e-06, "loss": 0.4106, "step": 3320 }, { "epoch": 0.39389638041163944, "grad_norm": 2.20994234085083, "learning_rate": 5.832726837443141e-06, "loss": 0.3307, "step": 3330 }, { "epoch": 0.3950792524248876, "grad_norm": 3.977005958557129, "learning_rate": 5.832008618625808e-06, "loss": 0.3331, "step": 3340 }, { "epoch": 0.3962621244381358, "grad_norm": 5.412336826324463, "learning_rate": 5.831290399808475e-06, "loss": 0.3944, "step": 3350 }, { "epoch": 0.397444996451384, "grad_norm": 3.2714927196502686, "learning_rate": 5.830572180991142e-06, "loss": 0.3718, "step": 3360 }, { "epoch": 0.3986278684646321, "grad_norm": 3.78078031539917, "learning_rate": 5.829853962173809e-06, "loss": 0.398, "step": 3370 }, { "epoch": 0.3998107404778803, "grad_norm": 3.3845877647399902, "learning_rate": 5.829135743356476e-06, "loss": 0.3729, "step": 3380 }, { "epoch": 0.40099361249112847, "grad_norm": 2.473590850830078, "learning_rate": 5.828417524539143e-06, "loss": 0.3623, "step": 3390 }, { "epoch": 0.40217648450437665, "grad_norm": 4.318367958068848, "learning_rate": 5.827699305721811e-06, "loss": 0.378, "step": 3400 }, { "epoch": 0.4033593565176248, "grad_norm": 2.945312261581421, "learning_rate": 5.826981086904477e-06, "loss": 0.3766, "step": 3410 }, { "epoch": 0.40454222853087296, "grad_norm": 2.770726203918457, "learning_rate": 5.8262628680871444e-06, "loss": 0.3723, "step": 3420 }, { "epoch": 0.40572510054412114, "grad_norm": 3.8866899013519287, "learning_rate": 5.8255446492698105e-06, "loss": 0.3853, "step": 3430 }, { "epoch": 0.40690797255736927, "grad_norm": 2.1387736797332764, "learning_rate": 5.824826430452478e-06, "loss": 0.3305, "step": 3440 }, { "epoch": 0.40809084457061745, "grad_norm": 1.9608129262924194, "learning_rate": 5.824108211635144e-06, "loss": 0.3479, "step": 3450 }, { "epoch": 0.40927371658386563, "grad_norm": 2.608383893966675, "learning_rate": 5.823389992817812e-06, "loss": 0.3677, "step": 3460 }, { "epoch": 0.4104565885971138, "grad_norm": 2.4288978576660156, "learning_rate": 5.822671774000479e-06, "loss": 0.3472, "step": 3470 }, { "epoch": 0.41163946061036194, "grad_norm": 4.163777828216553, "learning_rate": 5.821953555183146e-06, "loss": 0.3938, "step": 3480 }, { "epoch": 0.4128223326236101, "grad_norm": 3.4677999019622803, "learning_rate": 5.821235336365813e-06, "loss": 0.333, "step": 3490 }, { "epoch": 0.4140052046368583, "grad_norm": 3.1297054290771484, "learning_rate": 5.82051711754848e-06, "loss": 0.317, "step": 3500 }, { "epoch": 0.4151880766501065, "grad_norm": 3.443112373352051, "learning_rate": 5.819798898731148e-06, "loss": 0.3727, "step": 3510 }, { "epoch": 0.4163709486633546, "grad_norm": 2.942333459854126, "learning_rate": 5.819080679913814e-06, "loss": 0.3459, "step": 3520 }, { "epoch": 0.4175538206766028, "grad_norm": 2.94356107711792, "learning_rate": 5.8183624610964814e-06, "loss": 0.3232, "step": 3530 }, { "epoch": 0.418736692689851, "grad_norm": 2.3919339179992676, "learning_rate": 5.8176442422791475e-06, "loss": 0.366, "step": 3540 }, { "epoch": 0.4199195647030991, "grad_norm": 5.207398414611816, "learning_rate": 5.816926023461815e-06, "loss": 0.3714, "step": 3550 }, { "epoch": 0.4211024367163473, "grad_norm": 3.081705331802368, "learning_rate": 5.816207804644481e-06, "loss": 0.373, "step": 3560 }, { "epoch": 0.42228530872959547, "grad_norm": 1.8942437171936035, "learning_rate": 5.815489585827149e-06, "loss": 0.3721, "step": 3570 }, { "epoch": 0.42346818074284365, "grad_norm": 2.5785560607910156, "learning_rate": 5.814771367009816e-06, "loss": 0.3455, "step": 3580 }, { "epoch": 0.4246510527560918, "grad_norm": 3.5257067680358887, "learning_rate": 5.814053148192483e-06, "loss": 0.3375, "step": 3590 }, { "epoch": 0.42583392476933996, "grad_norm": 3.3724899291992188, "learning_rate": 5.81333492937515e-06, "loss": 0.3863, "step": 3600 }, { "epoch": 0.42701679678258814, "grad_norm": 2.5944249629974365, "learning_rate": 5.812616710557817e-06, "loss": 0.358, "step": 3610 }, { "epoch": 0.42819966879583626, "grad_norm": 2.341181755065918, "learning_rate": 5.811898491740484e-06, "loss": 0.4097, "step": 3620 }, { "epoch": 0.42938254080908445, "grad_norm": 2.9497246742248535, "learning_rate": 5.811180272923151e-06, "loss": 0.3858, "step": 3630 }, { "epoch": 0.4305654128223326, "grad_norm": 2.6940529346466064, "learning_rate": 5.810462054105818e-06, "loss": 0.3517, "step": 3640 }, { "epoch": 0.4317482848355808, "grad_norm": 3.812220811843872, "learning_rate": 5.8097438352884845e-06, "loss": 0.3358, "step": 3650 }, { "epoch": 0.43293115684882894, "grad_norm": 3.533381700515747, "learning_rate": 5.8090256164711514e-06, "loss": 0.3807, "step": 3660 }, { "epoch": 0.4341140288620771, "grad_norm": 2.998255968093872, "learning_rate": 5.808307397653819e-06, "loss": 0.3836, "step": 3670 }, { "epoch": 0.4352969008753253, "grad_norm": 2.8560478687286377, "learning_rate": 5.807589178836485e-06, "loss": 0.3445, "step": 3680 }, { "epoch": 0.4364797728885735, "grad_norm": 2.7697792053222656, "learning_rate": 5.806870960019153e-06, "loss": 0.3202, "step": 3690 }, { "epoch": 0.4376626449018216, "grad_norm": 2.462221384048462, "learning_rate": 5.806152741201819e-06, "loss": 0.3565, "step": 3700 }, { "epoch": 0.4388455169150698, "grad_norm": 2.524683952331543, "learning_rate": 5.805434522384487e-06, "loss": 0.374, "step": 3710 }, { "epoch": 0.44002838892831797, "grad_norm": 2.5711159706115723, "learning_rate": 5.804716303567153e-06, "loss": 0.3755, "step": 3720 }, { "epoch": 0.4412112609415661, "grad_norm": 2.726163148880005, "learning_rate": 5.803998084749821e-06, "loss": 0.3834, "step": 3730 }, { "epoch": 0.4423941329548143, "grad_norm": 2.688717842102051, "learning_rate": 5.803279865932488e-06, "loss": 0.3675, "step": 3740 }, { "epoch": 0.44357700496806246, "grad_norm": 3.8020083904266357, "learning_rate": 5.802561647115155e-06, "loss": 0.3921, "step": 3750 }, { "epoch": 0.44475987698131064, "grad_norm": 2.9287753105163574, "learning_rate": 5.8018434282978215e-06, "loss": 0.3394, "step": 3760 }, { "epoch": 0.44594274899455877, "grad_norm": 2.516939878463745, "learning_rate": 5.8011252094804884e-06, "loss": 0.398, "step": 3770 }, { "epoch": 0.44712562100780695, "grad_norm": 2.728116273880005, "learning_rate": 5.800406990663156e-06, "loss": 0.3568, "step": 3780 }, { "epoch": 0.44830849302105513, "grad_norm": 4.527690887451172, "learning_rate": 5.799688771845822e-06, "loss": 0.3663, "step": 3790 }, { "epoch": 0.4494913650343033, "grad_norm": 3.5766899585723877, "learning_rate": 5.79897055302849e-06, "loss": 0.3889, "step": 3800 }, { "epoch": 0.45067423704755144, "grad_norm": 3.0356013774871826, "learning_rate": 5.798252334211156e-06, "loss": 0.3635, "step": 3810 }, { "epoch": 0.4518571090607996, "grad_norm": 4.886003017425537, "learning_rate": 5.797534115393824e-06, "loss": 0.3476, "step": 3820 }, { "epoch": 0.4530399810740478, "grad_norm": 2.641568899154663, "learning_rate": 5.79681589657649e-06, "loss": 0.3702, "step": 3830 }, { "epoch": 0.45422285308729593, "grad_norm": 3.4170846939086914, "learning_rate": 5.796097677759158e-06, "loss": 0.3623, "step": 3840 }, { "epoch": 0.4554057251005441, "grad_norm": 3.7190732955932617, "learning_rate": 5.795379458941825e-06, "loss": 0.4053, "step": 3850 }, { "epoch": 0.4565885971137923, "grad_norm": 3.826817035675049, "learning_rate": 5.794661240124492e-06, "loss": 0.3523, "step": 3860 }, { "epoch": 0.4577714691270405, "grad_norm": 2.791008949279785, "learning_rate": 5.7939430213071585e-06, "loss": 0.3953, "step": 3870 }, { "epoch": 0.4589543411402886, "grad_norm": 2.7187814712524414, "learning_rate": 5.7932248024898254e-06, "loss": 0.3532, "step": 3880 }, { "epoch": 0.4601372131535368, "grad_norm": 3.3384859561920166, "learning_rate": 5.792506583672492e-06, "loss": 0.3645, "step": 3890 }, { "epoch": 0.46132008516678497, "grad_norm": 3.3910651206970215, "learning_rate": 5.791788364855159e-06, "loss": 0.3393, "step": 3900 }, { "epoch": 0.46250295718003315, "grad_norm": 2.875401258468628, "learning_rate": 5.791070146037826e-06, "loss": 0.3337, "step": 3910 }, { "epoch": 0.4636858291932813, "grad_norm": 2.77256178855896, "learning_rate": 5.790351927220493e-06, "loss": 0.3857, "step": 3920 }, { "epoch": 0.46486870120652946, "grad_norm": 2.3083810806274414, "learning_rate": 5.78963370840316e-06, "loss": 0.3648, "step": 3930 }, { "epoch": 0.46605157321977764, "grad_norm": 2.5307555198669434, "learning_rate": 5.788915489585827e-06, "loss": 0.3605, "step": 3940 }, { "epoch": 0.46723444523302576, "grad_norm": 4.298022747039795, "learning_rate": 5.788197270768494e-06, "loss": 0.3479, "step": 3950 }, { "epoch": 0.46841731724627395, "grad_norm": 3.032050848007202, "learning_rate": 5.787479051951162e-06, "loss": 0.3622, "step": 3960 }, { "epoch": 0.4696001892595221, "grad_norm": 3.9290056228637695, "learning_rate": 5.786760833133828e-06, "loss": 0.3834, "step": 3970 }, { "epoch": 0.4707830612727703, "grad_norm": 2.1643033027648926, "learning_rate": 5.7860426143164955e-06, "loss": 0.3634, "step": 3980 }, { "epoch": 0.47196593328601844, "grad_norm": 4.665809154510498, "learning_rate": 5.785324395499162e-06, "loss": 0.3849, "step": 3990 }, { "epoch": 0.4731488052992666, "grad_norm": 2.8869147300720215, "learning_rate": 5.784606176681829e-06, "loss": 0.3884, "step": 4000 }, { "epoch": 0.4743316773125148, "grad_norm": 2.3516039848327637, "learning_rate": 5.783887957864496e-06, "loss": 0.3515, "step": 4010 }, { "epoch": 0.4755145493257629, "grad_norm": 2.363905191421509, "learning_rate": 5.783169739047163e-06, "loss": 0.3157, "step": 4020 }, { "epoch": 0.4766974213390111, "grad_norm": 3.9049179553985596, "learning_rate": 5.78245152022983e-06, "loss": 0.3558, "step": 4030 }, { "epoch": 0.4778802933522593, "grad_norm": 3.5855188369750977, "learning_rate": 5.781733301412497e-06, "loss": 0.3629, "step": 4040 }, { "epoch": 0.47906316536550747, "grad_norm": 2.6367034912109375, "learning_rate": 5.781015082595165e-06, "loss": 0.3628, "step": 4050 }, { "epoch": 0.4802460373787556, "grad_norm": 2.409397602081299, "learning_rate": 5.780296863777831e-06, "loss": 0.3406, "step": 4060 }, { "epoch": 0.4814289093920038, "grad_norm": 2.5876379013061523, "learning_rate": 5.779578644960499e-06, "loss": 0.3215, "step": 4070 }, { "epoch": 0.48261178140525196, "grad_norm": 2.785447597503662, "learning_rate": 5.778860426143165e-06, "loss": 0.3363, "step": 4080 }, { "epoch": 0.48379465341850014, "grad_norm": 2.5361294746398926, "learning_rate": 5.7781422073258325e-06, "loss": 0.3546, "step": 4090 }, { "epoch": 0.48497752543174827, "grad_norm": 2.828770637512207, "learning_rate": 5.777423988508499e-06, "loss": 0.368, "step": 4100 }, { "epoch": 0.48616039744499645, "grad_norm": 4.09275484085083, "learning_rate": 5.776705769691166e-06, "loss": 0.3641, "step": 4110 }, { "epoch": 0.48734326945824463, "grad_norm": 2.1944663524627686, "learning_rate": 5.775987550873833e-06, "loss": 0.4035, "step": 4120 }, { "epoch": 0.48852614147149276, "grad_norm": 2.6475987434387207, "learning_rate": 5.7752693320565e-06, "loss": 0.3765, "step": 4130 }, { "epoch": 0.48970901348474094, "grad_norm": 3.410083770751953, "learning_rate": 5.774551113239167e-06, "loss": 0.3852, "step": 4140 }, { "epoch": 0.4908918854979891, "grad_norm": 2.8198800086975098, "learning_rate": 5.773832894421834e-06, "loss": 0.4084, "step": 4150 }, { "epoch": 0.4920747575112373, "grad_norm": 2.296844482421875, "learning_rate": 5.773114675604501e-06, "loss": 0.3593, "step": 4160 }, { "epoch": 0.49325762952448543, "grad_norm": 2.5193898677825928, "learning_rate": 5.772396456787168e-06, "loss": 0.3598, "step": 4170 }, { "epoch": 0.4944405015377336, "grad_norm": 3.2671520709991455, "learning_rate": 5.771678237969835e-06, "loss": 0.3588, "step": 4180 }, { "epoch": 0.4956233735509818, "grad_norm": 2.365128517150879, "learning_rate": 5.770960019152502e-06, "loss": 0.3568, "step": 4190 }, { "epoch": 0.49680624556423, "grad_norm": 4.848801136016846, "learning_rate": 5.770241800335169e-06, "loss": 0.348, "step": 4200 }, { "epoch": 0.4979891175774781, "grad_norm": 5.710461139678955, "learning_rate": 5.769523581517836e-06, "loss": 0.3588, "step": 4210 }, { "epoch": 0.4991719895907263, "grad_norm": 3.037342071533203, "learning_rate": 5.7688053627005025e-06, "loss": 0.3601, "step": 4220 }, { "epoch": 0.5001182872013248, "eval_accuracy": 0.8440795821272915, "eval_loss": 0.34948381781578064, "eval_runtime": 77.7073, "eval_safe_aucpr": 0.8954198959966346, "eval_safe_f1": 0.823160953153595, "eval_safe_fpr": 0.13527248811694675, "eval_safe_precision": 0.8282080485952923, "eval_safe_recall": 0.8181749990623711, "eval_samples_per_second": 773.595, "eval_steps_per_second": 12.097, "eval_unsafe_aucpr": 0.9433171137892656, "eval_unsafe_f1": 0.860572703607289, "eval_unsafe_fpr": 0.18182500093762827, "eval_unsafe_precision": 0.8564576301296856, "eval_unsafe_recall": 0.8647275118830529, "step": 4228 }, { "epoch": 0.5003548616039745, "grad_norm": 2.9329936504364014, "learning_rate": 5.76808714388317e-06, "loss": 0.3792, "step": 4230 }, { "epoch": 0.5015377336172226, "grad_norm": 2.630964756011963, "learning_rate": 5.767368925065836e-06, "loss": 0.3469, "step": 4240 }, { "epoch": 0.5027206056304708, "grad_norm": 3.4824013710021973, "learning_rate": 5.766650706248504e-06, "loss": 0.3968, "step": 4250 }, { "epoch": 0.5039034776437189, "grad_norm": 2.520982503890991, "learning_rate": 5.76593248743117e-06, "loss": 0.3671, "step": 4260 }, { "epoch": 0.5050863496569671, "grad_norm": 2.3600871562957764, "learning_rate": 5.765214268613838e-06, "loss": 0.3573, "step": 4270 }, { "epoch": 0.5062692216702153, "grad_norm": 2.865267515182495, "learning_rate": 5.764496049796505e-06, "loss": 0.3421, "step": 4280 }, { "epoch": 0.5074520936834634, "grad_norm": 2.9639503955841064, "learning_rate": 5.763777830979172e-06, "loss": 0.361, "step": 4290 }, { "epoch": 0.5086349656967116, "grad_norm": 2.9893205165863037, "learning_rate": 5.763059612161839e-06, "loss": 0.3452, "step": 4300 }, { "epoch": 0.5098178377099598, "grad_norm": 4.492689609527588, "learning_rate": 5.762341393344506e-06, "loss": 0.3961, "step": 4310 }, { "epoch": 0.511000709723208, "grad_norm": 3.260425329208374, "learning_rate": 5.7616231745271734e-06, "loss": 0.4085, "step": 4320 }, { "epoch": 0.5121835817364561, "grad_norm": 2.229388475418091, "learning_rate": 5.7609049557098395e-06, "loss": 0.3584, "step": 4330 }, { "epoch": 0.5133664537497042, "grad_norm": 2.2329540252685547, "learning_rate": 5.760186736892507e-06, "loss": 0.3875, "step": 4340 }, { "epoch": 0.5145493257629524, "grad_norm": 2.5038681030273438, "learning_rate": 5.759468518075173e-06, "loss": 0.3654, "step": 4350 }, { "epoch": 0.5157321977762006, "grad_norm": 2.483992338180542, "learning_rate": 5.758750299257841e-06, "loss": 0.3613, "step": 4360 }, { "epoch": 0.5169150697894488, "grad_norm": 2.7301363945007324, "learning_rate": 5.758032080440507e-06, "loss": 0.3873, "step": 4370 }, { "epoch": 0.518097941802697, "grad_norm": 1.9997198581695557, "learning_rate": 5.757313861623175e-06, "loss": 0.3657, "step": 4380 }, { "epoch": 0.5192808138159452, "grad_norm": 3.300851345062256, "learning_rate": 5.756595642805842e-06, "loss": 0.378, "step": 4390 }, { "epoch": 0.5204636858291933, "grad_norm": 4.093571186065674, "learning_rate": 5.755877423988509e-06, "loss": 0.3517, "step": 4400 }, { "epoch": 0.5216465578424414, "grad_norm": 3.943171977996826, "learning_rate": 5.755159205171176e-06, "loss": 0.3609, "step": 4410 }, { "epoch": 0.5228294298556896, "grad_norm": 5.173525810241699, "learning_rate": 5.754440986353843e-06, "loss": 0.3513, "step": 4420 }, { "epoch": 0.5240123018689378, "grad_norm": 3.548471450805664, "learning_rate": 5.7537227675365096e-06, "loss": 0.3501, "step": 4430 }, { "epoch": 0.525195173882186, "grad_norm": 3.0566258430480957, "learning_rate": 5.7530045487191765e-06, "loss": 0.3577, "step": 4440 }, { "epoch": 0.5263780458954341, "grad_norm": 3.547243595123291, "learning_rate": 5.752286329901843e-06, "loss": 0.359, "step": 4450 }, { "epoch": 0.5275609179086823, "grad_norm": 3.283626079559326, "learning_rate": 5.75156811108451e-06, "loss": 0.376, "step": 4460 }, { "epoch": 0.5287437899219305, "grad_norm": 2.571317672729492, "learning_rate": 5.750849892267177e-06, "loss": 0.4127, "step": 4470 }, { "epoch": 0.5299266619351786, "grad_norm": 2.866814374923706, "learning_rate": 5.750131673449844e-06, "loss": 0.3591, "step": 4480 }, { "epoch": 0.5311095339484267, "grad_norm": 2.8994390964508057, "learning_rate": 5.749413454632511e-06, "loss": 0.4139, "step": 4490 }, { "epoch": 0.5322924059616749, "grad_norm": 2.784627676010132, "learning_rate": 5.748695235815179e-06, "loss": 0.352, "step": 4500 }, { "epoch": 0.5334752779749231, "grad_norm": 2.3118021488189697, "learning_rate": 5.747977016997845e-06, "loss": 0.3363, "step": 4510 }, { "epoch": 0.5346581499881713, "grad_norm": 2.584204912185669, "learning_rate": 5.747258798180513e-06, "loss": 0.3569, "step": 4520 }, { "epoch": 0.5358410220014195, "grad_norm": 2.7564291954040527, "learning_rate": 5.74654057936318e-06, "loss": 0.3062, "step": 4530 }, { "epoch": 0.5370238940146677, "grad_norm": 3.0223724842071533, "learning_rate": 5.7458223605458466e-06, "loss": 0.3547, "step": 4540 }, { "epoch": 0.5382067660279157, "grad_norm": 2.4846091270446777, "learning_rate": 5.7451041417285135e-06, "loss": 0.3431, "step": 4550 }, { "epoch": 0.5393896380411639, "grad_norm": 3.637694835662842, "learning_rate": 5.74438592291118e-06, "loss": 0.364, "step": 4560 }, { "epoch": 0.5405725100544121, "grad_norm": 2.5037472248077393, "learning_rate": 5.743667704093847e-06, "loss": 0.3858, "step": 4570 }, { "epoch": 0.5417553820676603, "grad_norm": 2.9102423191070557, "learning_rate": 5.742949485276514e-06, "loss": 0.3944, "step": 4580 }, { "epoch": 0.5429382540809085, "grad_norm": 3.7652969360351562, "learning_rate": 5.742231266459182e-06, "loss": 0.3244, "step": 4590 }, { "epoch": 0.5441211260941566, "grad_norm": 2.990288257598877, "learning_rate": 5.741513047641848e-06, "loss": 0.3314, "step": 4600 }, { "epoch": 0.5453039981074048, "grad_norm": 2.4400131702423096, "learning_rate": 5.740794828824516e-06, "loss": 0.3781, "step": 4610 }, { "epoch": 0.5464868701206529, "grad_norm": 2.8999412059783936, "learning_rate": 5.740076610007182e-06, "loss": 0.3951, "step": 4620 }, { "epoch": 0.5476697421339011, "grad_norm": 3.390578508377075, "learning_rate": 5.73935839118985e-06, "loss": 0.3489, "step": 4630 }, { "epoch": 0.5488526141471493, "grad_norm": 2.4869701862335205, "learning_rate": 5.738640172372516e-06, "loss": 0.3451, "step": 4640 }, { "epoch": 0.5500354861603974, "grad_norm": 2.432013988494873, "learning_rate": 5.7379219535551836e-06, "loss": 0.3418, "step": 4650 }, { "epoch": 0.5512183581736456, "grad_norm": 3.0324032306671143, "learning_rate": 5.7372037347378505e-06, "loss": 0.3946, "step": 4660 }, { "epoch": 0.5524012301868938, "grad_norm": 2.967174530029297, "learning_rate": 5.736485515920517e-06, "loss": 0.3793, "step": 4670 }, { "epoch": 0.553584102200142, "grad_norm": 2.171663999557495, "learning_rate": 5.735767297103184e-06, "loss": 0.3787, "step": 4680 }, { "epoch": 0.5547669742133902, "grad_norm": 2.6231937408447266, "learning_rate": 5.735049078285851e-06, "loss": 0.3866, "step": 4690 }, { "epoch": 0.5559498462266382, "grad_norm": 2.7934768199920654, "learning_rate": 5.734330859468518e-06, "loss": 0.3324, "step": 4700 }, { "epoch": 0.5571327182398864, "grad_norm": 3.470262289047241, "learning_rate": 5.733612640651185e-06, "loss": 0.3173, "step": 4710 }, { "epoch": 0.5583155902531346, "grad_norm": 3.697437047958374, "learning_rate": 5.732894421833852e-06, "loss": 0.391, "step": 4720 }, { "epoch": 0.5594984622663828, "grad_norm": 2.727518320083618, "learning_rate": 5.732176203016519e-06, "loss": 0.3807, "step": 4730 }, { "epoch": 0.560681334279631, "grad_norm": 2.2123401165008545, "learning_rate": 5.731457984199186e-06, "loss": 0.365, "step": 4740 }, { "epoch": 0.5618642062928791, "grad_norm": 2.3912441730499268, "learning_rate": 5.730739765381853e-06, "loss": 0.3316, "step": 4750 }, { "epoch": 0.5630470783061273, "grad_norm": 2.8068788051605225, "learning_rate": 5.73002154656452e-06, "loss": 0.3761, "step": 4760 }, { "epoch": 0.5642299503193754, "grad_norm": 3.748828887939453, "learning_rate": 5.7293033277471875e-06, "loss": 0.3867, "step": 4770 }, { "epoch": 0.5654128223326236, "grad_norm": 3.5966951847076416, "learning_rate": 5.728585108929854e-06, "loss": 0.3144, "step": 4780 }, { "epoch": 0.5665956943458718, "grad_norm": 2.2783448696136475, "learning_rate": 5.727866890112521e-06, "loss": 0.3135, "step": 4790 }, { "epoch": 0.5677785663591199, "grad_norm": 4.028083801269531, "learning_rate": 5.727148671295188e-06, "loss": 0.3491, "step": 4800 }, { "epoch": 0.5689614383723681, "grad_norm": 2.8650357723236084, "learning_rate": 5.726430452477855e-06, "loss": 0.413, "step": 4810 }, { "epoch": 0.5701443103856163, "grad_norm": 2.58620285987854, "learning_rate": 5.725712233660522e-06, "loss": 0.3532, "step": 4820 }, { "epoch": 0.5713271823988645, "grad_norm": 2.4366037845611572, "learning_rate": 5.724994014843189e-06, "loss": 0.3804, "step": 4830 }, { "epoch": 0.5725100544121126, "grad_norm": 2.0628104209899902, "learning_rate": 5.724275796025856e-06, "loss": 0.3129, "step": 4840 }, { "epoch": 0.5736929264253607, "grad_norm": 2.460841417312622, "learning_rate": 5.723557577208523e-06, "loss": 0.3626, "step": 4850 }, { "epoch": 0.5748757984386089, "grad_norm": 2.232182025909424, "learning_rate": 5.72283935839119e-06, "loss": 0.3221, "step": 4860 }, { "epoch": 0.5760586704518571, "grad_norm": 3.2975473403930664, "learning_rate": 5.722121139573857e-06, "loss": 0.3529, "step": 4870 }, { "epoch": 0.5772415424651053, "grad_norm": 3.717776298522949, "learning_rate": 5.7214029207565245e-06, "loss": 0.3185, "step": 4880 }, { "epoch": 0.5784244144783535, "grad_norm": 2.8947947025299072, "learning_rate": 5.7206847019391906e-06, "loss": 0.3667, "step": 4890 }, { "epoch": 0.5796072864916016, "grad_norm": 2.6210103034973145, "learning_rate": 5.719966483121858e-06, "loss": 0.345, "step": 4900 }, { "epoch": 0.5807901585048498, "grad_norm": 2.359241008758545, "learning_rate": 5.719248264304524e-06, "loss": 0.3065, "step": 4910 }, { "epoch": 0.5819730305180979, "grad_norm": 3.0238962173461914, "learning_rate": 5.718530045487192e-06, "loss": 0.3423, "step": 4920 }, { "epoch": 0.5831559025313461, "grad_norm": 2.6816389560699463, "learning_rate": 5.717811826669859e-06, "loss": 0.34, "step": 4930 }, { "epoch": 0.5843387745445943, "grad_norm": 2.968125820159912, "learning_rate": 5.717093607852526e-06, "loss": 0.3532, "step": 4940 }, { "epoch": 0.5855216465578424, "grad_norm": 2.4294793605804443, "learning_rate": 5.716375389035193e-06, "loss": 0.3613, "step": 4950 }, { "epoch": 0.5867045185710906, "grad_norm": 3.200100898742676, "learning_rate": 5.71565717021786e-06, "loss": 0.3516, "step": 4960 }, { "epoch": 0.5878873905843388, "grad_norm": 2.0361745357513428, "learning_rate": 5.714938951400527e-06, "loss": 0.3764, "step": 4970 }, { "epoch": 0.589070262597587, "grad_norm": 2.789825916290283, "learning_rate": 5.714220732583194e-06, "loss": 0.3775, "step": 4980 }, { "epoch": 0.5902531346108351, "grad_norm": 2.6486117839813232, "learning_rate": 5.713502513765861e-06, "loss": 0.3044, "step": 4990 }, { "epoch": 0.5914360066240832, "grad_norm": 3.0227205753326416, "learning_rate": 5.7127842949485276e-06, "loss": 0.3552, "step": 5000 }, { "epoch": 0.5926188786373314, "grad_norm": 3.0813512802124023, "learning_rate": 5.7120660761311945e-06, "loss": 0.349, "step": 5010 }, { "epoch": 0.5938017506505796, "grad_norm": 3.0543744564056396, "learning_rate": 5.711347857313861e-06, "loss": 0.3271, "step": 5020 }, { "epoch": 0.5949846226638278, "grad_norm": 2.3086960315704346, "learning_rate": 5.710629638496529e-06, "loss": 0.3666, "step": 5030 }, { "epoch": 0.596167494677076, "grad_norm": 3.788231134414673, "learning_rate": 5.709911419679196e-06, "loss": 0.3399, "step": 5040 }, { "epoch": 0.5973503666903242, "grad_norm": 2.0678153038024902, "learning_rate": 5.709193200861863e-06, "loss": 0.3751, "step": 5050 }, { "epoch": 0.5985332387035722, "grad_norm": 2.638007879257202, "learning_rate": 5.70847498204453e-06, "loss": 0.395, "step": 5060 }, { "epoch": 0.5997161107168204, "grad_norm": 3.0321481227874756, "learning_rate": 5.707756763227197e-06, "loss": 0.327, "step": 5070 }, { "epoch": 0.6008989827300686, "grad_norm": 2.9551167488098145, "learning_rate": 5.707038544409864e-06, "loss": 0.3802, "step": 5080 }, { "epoch": 0.6020818547433168, "grad_norm": 3.5318825244903564, "learning_rate": 5.706320325592531e-06, "loss": 0.3315, "step": 5090 }, { "epoch": 0.603264726756565, "grad_norm": 4.272298812866211, "learning_rate": 5.705602106775198e-06, "loss": 0.3295, "step": 5100 }, { "epoch": 0.6044475987698131, "grad_norm": 3.083944082260132, "learning_rate": 5.7048838879578646e-06, "loss": 0.3564, "step": 5110 }, { "epoch": 0.6056304707830613, "grad_norm": 3.0958378314971924, "learning_rate": 5.7041656691405315e-06, "loss": 0.3427, "step": 5120 }, { "epoch": 0.6068133427963094, "grad_norm": 2.100768804550171, "learning_rate": 5.703447450323198e-06, "loss": 0.3449, "step": 5130 }, { "epoch": 0.6079962148095576, "grad_norm": 2.492600679397583, "learning_rate": 5.702729231505865e-06, "loss": 0.3812, "step": 5140 }, { "epoch": 0.6091790868228057, "grad_norm": 2.495830774307251, "learning_rate": 5.702011012688533e-06, "loss": 0.3339, "step": 5150 }, { "epoch": 0.6103619588360539, "grad_norm": 3.3077280521392822, "learning_rate": 5.701292793871199e-06, "loss": 0.3747, "step": 5160 }, { "epoch": 0.6115448308493021, "grad_norm": 2.5646684169769287, "learning_rate": 5.700574575053867e-06, "loss": 0.347, "step": 5170 }, { "epoch": 0.6127277028625503, "grad_norm": 4.435232162475586, "learning_rate": 5.699856356236533e-06, "loss": 0.3447, "step": 5180 }, { "epoch": 0.6139105748757985, "grad_norm": 4.579883098602295, "learning_rate": 5.699138137419201e-06, "loss": 0.3769, "step": 5190 }, { "epoch": 0.6150934468890467, "grad_norm": 3.25585675239563, "learning_rate": 5.698419918601867e-06, "loss": 0.3722, "step": 5200 }, { "epoch": 0.6162763189022947, "grad_norm": 3.463557243347168, "learning_rate": 5.697701699784535e-06, "loss": 0.3351, "step": 5210 }, { "epoch": 0.6174591909155429, "grad_norm": 2.9261481761932373, "learning_rate": 5.6969834809672016e-06, "loss": 0.382, "step": 5220 }, { "epoch": 0.6186420629287911, "grad_norm": 4.577706813812256, "learning_rate": 5.6962652621498685e-06, "loss": 0.3918, "step": 5230 }, { "epoch": 0.6198249349420393, "grad_norm": 2.9830856323242188, "learning_rate": 5.695547043332535e-06, "loss": 0.3715, "step": 5240 }, { "epoch": 0.6210078069552875, "grad_norm": 4.0253071784973145, "learning_rate": 5.694828824515202e-06, "loss": 0.4072, "step": 5250 }, { "epoch": 0.6221906789685356, "grad_norm": 2.062934637069702, "learning_rate": 5.694110605697869e-06, "loss": 0.3296, "step": 5260 }, { "epoch": 0.6233735509817838, "grad_norm": 2.803471565246582, "learning_rate": 5.693392386880536e-06, "loss": 0.3698, "step": 5270 }, { "epoch": 0.6245564229950319, "grad_norm": 3.8620738983154297, "learning_rate": 5.692674168063204e-06, "loss": 0.3858, "step": 5280 }, { "epoch": 0.6257392950082801, "grad_norm": 2.846827983856201, "learning_rate": 5.69195594924587e-06, "loss": 0.3495, "step": 5290 }, { "epoch": 0.6269221670215283, "grad_norm": 2.0253992080688477, "learning_rate": 5.691237730428538e-06, "loss": 0.3363, "step": 5300 }, { "epoch": 0.6281050390347764, "grad_norm": 2.903935432434082, "learning_rate": 5.690519511611205e-06, "loss": 0.3236, "step": 5310 }, { "epoch": 0.6292879110480246, "grad_norm": 2.9494948387145996, "learning_rate": 5.689801292793872e-06, "loss": 0.3778, "step": 5320 }, { "epoch": 0.6304707830612728, "grad_norm": 2.0794708728790283, "learning_rate": 5.6890830739765386e-06, "loss": 0.33, "step": 5330 }, { "epoch": 0.631653655074521, "grad_norm": 2.6170406341552734, "learning_rate": 5.6883648551592055e-06, "loss": 0.3391, "step": 5340 }, { "epoch": 0.632836527087769, "grad_norm": 2.3067479133605957, "learning_rate": 5.687646636341872e-06, "loss": 0.3172, "step": 5350 }, { "epoch": 0.6340193991010172, "grad_norm": 3.9792582988739014, "learning_rate": 5.686928417524539e-06, "loss": 0.3137, "step": 5360 }, { "epoch": 0.6352022711142654, "grad_norm": 3.4487500190734863, "learning_rate": 5.686210198707206e-06, "loss": 0.342, "step": 5370 }, { "epoch": 0.6363851431275136, "grad_norm": 2.9459004402160645, "learning_rate": 5.685491979889873e-06, "loss": 0.3605, "step": 5380 }, { "epoch": 0.6375680151407618, "grad_norm": 3.2506394386291504, "learning_rate": 5.68477376107254e-06, "loss": 0.3207, "step": 5390 }, { "epoch": 0.63875088715401, "grad_norm": 2.6361186504364014, "learning_rate": 5.684055542255207e-06, "loss": 0.3793, "step": 5400 }, { "epoch": 0.6399337591672581, "grad_norm": 3.1658058166503906, "learning_rate": 5.683337323437874e-06, "loss": 0.3764, "step": 5410 }, { "epoch": 0.6411166311805063, "grad_norm": 2.703413486480713, "learning_rate": 5.682619104620542e-06, "loss": 0.3297, "step": 5420 }, { "epoch": 0.6422995031937544, "grad_norm": 3.6086349487304688, "learning_rate": 5.681900885803208e-06, "loss": 0.3371, "step": 5430 }, { "epoch": 0.6434823752070026, "grad_norm": 3.9716713428497314, "learning_rate": 5.6811826669858756e-06, "loss": 0.3965, "step": 5440 }, { "epoch": 0.6446652472202508, "grad_norm": 2.501960515975952, "learning_rate": 5.680464448168542e-06, "loss": 0.3049, "step": 5450 }, { "epoch": 0.6458481192334989, "grad_norm": 3.220345973968506, "learning_rate": 5.679746229351209e-06, "loss": 0.3382, "step": 5460 }, { "epoch": 0.6470309912467471, "grad_norm": 3.021064043045044, "learning_rate": 5.6790280105338755e-06, "loss": 0.3277, "step": 5470 }, { "epoch": 0.6482138632599953, "grad_norm": 2.709476947784424, "learning_rate": 5.678309791716543e-06, "loss": 0.3224, "step": 5480 }, { "epoch": 0.6493967352732435, "grad_norm": 2.4916460514068604, "learning_rate": 5.67759157289921e-06, "loss": 0.3742, "step": 5490 }, { "epoch": 0.6505796072864916, "grad_norm": 3.362166404724121, "learning_rate": 5.676873354081877e-06, "loss": 0.3745, "step": 5500 }, { "epoch": 0.6517624792997397, "grad_norm": 2.5697948932647705, "learning_rate": 5.676155135264544e-06, "loss": 0.4016, "step": 5510 }, { "epoch": 0.6529453513129879, "grad_norm": 2.7755134105682373, "learning_rate": 5.675436916447211e-06, "loss": 0.3634, "step": 5520 }, { "epoch": 0.6541282233262361, "grad_norm": 3.1397430896759033, "learning_rate": 5.674718697629878e-06, "loss": 0.3518, "step": 5530 }, { "epoch": 0.6553110953394843, "grad_norm": 3.169858694076538, "learning_rate": 5.674000478812545e-06, "loss": 0.3747, "step": 5540 }, { "epoch": 0.6564939673527325, "grad_norm": 3.622579574584961, "learning_rate": 5.6732822599952126e-06, "loss": 0.3737, "step": 5550 }, { "epoch": 0.6576768393659806, "grad_norm": 3.7488350868225098, "learning_rate": 5.672564041177879e-06, "loss": 0.347, "step": 5560 }, { "epoch": 0.6588597113792287, "grad_norm": 3.859649419784546, "learning_rate": 5.671845822360546e-06, "loss": 0.3324, "step": 5570 }, { "epoch": 0.6600425833924769, "grad_norm": 2.853318214416504, "learning_rate": 5.671127603543213e-06, "loss": 0.3608, "step": 5580 }, { "epoch": 0.6612254554057251, "grad_norm": 2.2855758666992188, "learning_rate": 5.67040938472588e-06, "loss": 0.3387, "step": 5590 }, { "epoch": 0.6624083274189733, "grad_norm": 2.5921871662139893, "learning_rate": 5.669691165908547e-06, "loss": 0.3591, "step": 5600 }, { "epoch": 0.6635911994322214, "grad_norm": 2.95080304145813, "learning_rate": 5.668972947091214e-06, "loss": 0.3631, "step": 5610 }, { "epoch": 0.6647740714454696, "grad_norm": 2.3368730545043945, "learning_rate": 5.668254728273881e-06, "loss": 0.3485, "step": 5620 }, { "epoch": 0.6659569434587178, "grad_norm": 3.513460397720337, "learning_rate": 5.667536509456548e-06, "loss": 0.367, "step": 5630 }, { "epoch": 0.6671398154719659, "grad_norm": 2.4163262844085693, "learning_rate": 5.666818290639215e-06, "loss": 0.3482, "step": 5640 }, { "epoch": 0.6683226874852141, "grad_norm": 4.090065956115723, "learning_rate": 5.666100071821882e-06, "loss": 0.3526, "step": 5650 }, { "epoch": 0.6695055594984622, "grad_norm": 3.317941427230835, "learning_rate": 5.665381853004549e-06, "loss": 0.3667, "step": 5660 }, { "epoch": 0.6706884315117104, "grad_norm": 2.652280569076538, "learning_rate": 5.664663634187216e-06, "loss": 0.3699, "step": 5670 }, { "epoch": 0.6718713035249586, "grad_norm": 2.5559115409851074, "learning_rate": 5.6639454153698825e-06, "loss": 0.3333, "step": 5680 }, { "epoch": 0.6730541755382068, "grad_norm": 2.700054883956909, "learning_rate": 5.66322719655255e-06, "loss": 0.3706, "step": 5690 }, { "epoch": 0.674237047551455, "grad_norm": 2.8341550827026367, "learning_rate": 5.662508977735216e-06, "loss": 0.3469, "step": 5700 }, { "epoch": 0.6754199195647032, "grad_norm": 3.4807088375091553, "learning_rate": 5.661790758917884e-06, "loss": 0.3222, "step": 5710 }, { "epoch": 0.6766027915779512, "grad_norm": 3.6275129318237305, "learning_rate": 5.66107254010055e-06, "loss": 0.3334, "step": 5720 }, { "epoch": 0.6777856635911994, "grad_norm": 2.0586740970611572, "learning_rate": 5.660354321283218e-06, "loss": 0.3323, "step": 5730 }, { "epoch": 0.6789685356044476, "grad_norm": 2.88397479057312, "learning_rate": 5.659636102465884e-06, "loss": 0.2893, "step": 5740 }, { "epoch": 0.6801514076176958, "grad_norm": 3.2640860080718994, "learning_rate": 5.658917883648552e-06, "loss": 0.3097, "step": 5750 }, { "epoch": 0.681334279630944, "grad_norm": 3.416465997695923, "learning_rate": 5.658199664831219e-06, "loss": 0.3649, "step": 5760 }, { "epoch": 0.6825171516441921, "grad_norm": 1.7977756261825562, "learning_rate": 5.657481446013886e-06, "loss": 0.3342, "step": 5770 }, { "epoch": 0.6837000236574403, "grad_norm": 2.6693594455718994, "learning_rate": 5.656763227196553e-06, "loss": 0.351, "step": 5780 }, { "epoch": 0.6848828956706884, "grad_norm": 2.2618138790130615, "learning_rate": 5.6560450083792195e-06, "loss": 0.3289, "step": 5790 }, { "epoch": 0.6860657676839366, "grad_norm": 2.292304754257202, "learning_rate": 5.655326789561887e-06, "loss": 0.3386, "step": 5800 }, { "epoch": 0.6872486396971847, "grad_norm": 2.810405731201172, "learning_rate": 5.654608570744553e-06, "loss": 0.3391, "step": 5810 }, { "epoch": 0.6884315117104329, "grad_norm": 2.73765230178833, "learning_rate": 5.653890351927221e-06, "loss": 0.3935, "step": 5820 }, { "epoch": 0.6896143837236811, "grad_norm": 2.1042165756225586, "learning_rate": 5.653172133109887e-06, "loss": 0.3523, "step": 5830 }, { "epoch": 0.6907972557369293, "grad_norm": 2.7655107975006104, "learning_rate": 5.652453914292555e-06, "loss": 0.3465, "step": 5840 }, { "epoch": 0.6919801277501775, "grad_norm": 2.2958226203918457, "learning_rate": 5.651735695475222e-06, "loss": 0.365, "step": 5850 }, { "epoch": 0.6931629997634255, "grad_norm": 2.7822420597076416, "learning_rate": 5.651017476657889e-06, "loss": 0.3341, "step": 5860 }, { "epoch": 0.6943458717766737, "grad_norm": 3.8763067722320557, "learning_rate": 5.650299257840556e-06, "loss": 0.3503, "step": 5870 }, { "epoch": 0.6955287437899219, "grad_norm": 4.107594966888428, "learning_rate": 5.649581039023223e-06, "loss": 0.3743, "step": 5880 }, { "epoch": 0.6967116158031701, "grad_norm": 2.4280354976654053, "learning_rate": 5.64886282020589e-06, "loss": 0.3681, "step": 5890 }, { "epoch": 0.6978944878164183, "grad_norm": 2.016907215118408, "learning_rate": 5.6481446013885565e-06, "loss": 0.3443, "step": 5900 }, { "epoch": 0.6990773598296665, "grad_norm": 2.1594183444976807, "learning_rate": 5.6474263825712235e-06, "loss": 0.352, "step": 5910 }, { "epoch": 0.7002602318429146, "grad_norm": 2.6787216663360596, "learning_rate": 5.64670816375389e-06, "loss": 0.3404, "step": 5920 }, { "epoch": 0.7014431038561627, "grad_norm": 4.282622814178467, "learning_rate": 5.645989944936557e-06, "loss": 0.3229, "step": 5930 }, { "epoch": 0.7026259758694109, "grad_norm": 3.5148532390594482, "learning_rate": 5.645271726119224e-06, "loss": 0.3727, "step": 5940 }, { "epoch": 0.7038088478826591, "grad_norm": 2.5899875164031982, "learning_rate": 5.644553507301891e-06, "loss": 0.3066, "step": 5950 }, { "epoch": 0.7049917198959073, "grad_norm": 1.9410797357559204, "learning_rate": 5.643835288484559e-06, "loss": 0.3376, "step": 5960 }, { "epoch": 0.7061745919091554, "grad_norm": 2.4812545776367188, "learning_rate": 5.643117069667225e-06, "loss": 0.3315, "step": 5970 }, { "epoch": 0.7073574639224036, "grad_norm": 2.838182210922241, "learning_rate": 5.642398850849893e-06, "loss": 0.3412, "step": 5980 }, { "epoch": 0.7085403359356518, "grad_norm": 3.1450905799865723, "learning_rate": 5.641680632032559e-06, "loss": 0.3058, "step": 5990 }, { "epoch": 0.7097232079489, "grad_norm": 3.805575132369995, "learning_rate": 5.640962413215227e-06, "loss": 0.3116, "step": 6000 }, { "epoch": 0.710906079962148, "grad_norm": 2.4876396656036377, "learning_rate": 5.640244194397893e-06, "loss": 0.326, "step": 6010 }, { "epoch": 0.7120889519753962, "grad_norm": 2.369574546813965, "learning_rate": 5.6395259755805605e-06, "loss": 0.335, "step": 6020 }, { "epoch": 0.7132718239886444, "grad_norm": 2.075327157974243, "learning_rate": 5.638807756763227e-06, "loss": 0.3385, "step": 6030 }, { "epoch": 0.7144546960018926, "grad_norm": 1.7152106761932373, "learning_rate": 5.638089537945894e-06, "loss": 0.312, "step": 6040 }, { "epoch": 0.7156375680151408, "grad_norm": 2.547874689102173, "learning_rate": 5.637371319128561e-06, "loss": 0.3252, "step": 6050 }, { "epoch": 0.716820440028389, "grad_norm": 3.207555055618286, "learning_rate": 5.636653100311228e-06, "loss": 0.3626, "step": 6060 }, { "epoch": 0.7180033120416371, "grad_norm": 3.471651315689087, "learning_rate": 5.635934881493896e-06, "loss": 0.4018, "step": 6070 }, { "epoch": 0.7191861840548852, "grad_norm": 2.3718714714050293, "learning_rate": 5.635216662676562e-06, "loss": 0.3157, "step": 6080 }, { "epoch": 0.7203690560681334, "grad_norm": 2.1830906867980957, "learning_rate": 5.63449844385923e-06, "loss": 0.3814, "step": 6090 }, { "epoch": 0.7215519280813816, "grad_norm": 2.37980318069458, "learning_rate": 5.633780225041896e-06, "loss": 0.2789, "step": 6100 }, { "epoch": 0.7227348000946298, "grad_norm": 3.602928638458252, "learning_rate": 5.633062006224564e-06, "loss": 0.3802, "step": 6110 }, { "epoch": 0.7239176721078779, "grad_norm": 2.9294888973236084, "learning_rate": 5.63234378740723e-06, "loss": 0.3705, "step": 6120 }, { "epoch": 0.7251005441211261, "grad_norm": 2.5093209743499756, "learning_rate": 5.6316255685898975e-06, "loss": 0.3333, "step": 6130 }, { "epoch": 0.7262834161343743, "grad_norm": 1.8491617441177368, "learning_rate": 5.630907349772564e-06, "loss": 0.3774, "step": 6140 }, { "epoch": 0.7274662881476224, "grad_norm": 3.2130048274993896, "learning_rate": 5.630189130955231e-06, "loss": 0.3286, "step": 6150 }, { "epoch": 0.7286491601608706, "grad_norm": 2.143611192703247, "learning_rate": 5.629470912137898e-06, "loss": 0.3252, "step": 6160 }, { "epoch": 0.7298320321741187, "grad_norm": 3.1197423934936523, "learning_rate": 5.628752693320565e-06, "loss": 0.3415, "step": 6170 }, { "epoch": 0.7310149041873669, "grad_norm": 4.243905067443848, "learning_rate": 5.628034474503232e-06, "loss": 0.3856, "step": 6180 }, { "epoch": 0.7321977762006151, "grad_norm": 2.5275042057037354, "learning_rate": 5.627316255685899e-06, "loss": 0.3272, "step": 6190 }, { "epoch": 0.7333806482138633, "grad_norm": 3.6818149089813232, "learning_rate": 5.626598036868566e-06, "loss": 0.3799, "step": 6200 }, { "epoch": 0.7345635202271115, "grad_norm": 2.4534871578216553, "learning_rate": 5.625879818051233e-06, "loss": 0.3836, "step": 6210 }, { "epoch": 0.7357463922403596, "grad_norm": 4.3578200340271, "learning_rate": 5.6251615992339e-06, "loss": 0.3487, "step": 6220 }, { "epoch": 0.7369292642536077, "grad_norm": 3.0013763904571533, "learning_rate": 5.6244433804165675e-06, "loss": 0.3369, "step": 6230 }, { "epoch": 0.7381121362668559, "grad_norm": 3.0948095321655273, "learning_rate": 5.623725161599234e-06, "loss": 0.3457, "step": 6240 }, { "epoch": 0.7392950082801041, "grad_norm": 2.9563987255096436, "learning_rate": 5.623006942781901e-06, "loss": 0.3261, "step": 6250 }, { "epoch": 0.7404778802933523, "grad_norm": 2.799682855606079, "learning_rate": 5.6222887239645675e-06, "loss": 0.3734, "step": 6260 }, { "epoch": 0.7416607523066004, "grad_norm": 2.5684351921081543, "learning_rate": 5.621570505147235e-06, "loss": 0.3747, "step": 6270 }, { "epoch": 0.7428436243198486, "grad_norm": 2.4596593379974365, "learning_rate": 5.620852286329901e-06, "loss": 0.3313, "step": 6280 }, { "epoch": 0.7440264963330968, "grad_norm": 2.4302868843078613, "learning_rate": 5.620134067512569e-06, "loss": 0.34, "step": 6290 }, { "epoch": 0.7452093683463449, "grad_norm": 4.000497341156006, "learning_rate": 5.619415848695236e-06, "loss": 0.3304, "step": 6300 }, { "epoch": 0.7463922403595931, "grad_norm": 3.440857172012329, "learning_rate": 5.618697629877903e-06, "loss": 0.3036, "step": 6310 }, { "epoch": 0.7475751123728412, "grad_norm": 2.37349796295166, "learning_rate": 5.61797941106057e-06, "loss": 0.3056, "step": 6320 }, { "epoch": 0.7487579843860894, "grad_norm": 3.1543045043945312, "learning_rate": 5.617261192243237e-06, "loss": 0.3822, "step": 6330 }, { "epoch": 0.7499408563993376, "grad_norm": 2.3295421600341797, "learning_rate": 5.6165429734259045e-06, "loss": 0.3229, "step": 6340 }, { "epoch": 0.7501774308019872, "eval_accuracy": 0.8502678244668463, "eval_loss": 0.3409457206726074, "eval_runtime": 77.6735, "eval_safe_aucpr": 0.9011741121226409, "eval_safe_f1": 0.8374008707119244, "eval_safe_fpr": 0.16489791037637092, "eval_safe_precision": 0.8077646894821217, "eval_safe_recall": 0.8692945279975997, "eval_samples_per_second": 773.932, "eval_steps_per_second": 12.102, "eval_unsafe_aucpr": 0.9463641974187076, "eval_unsafe_f1": 0.861247706987714, "eval_unsafe_fpr": 0.13070547200239985, "eval_unsafe_precision": 0.8890833863781031, "eval_unsafe_recall": 0.8351020896236286, "step": 6342 }, { "epoch": 0.7511237284125858, "grad_norm": 2.7695460319519043, "learning_rate": 5.615824754608571e-06, "loss": 0.3108, "step": 6350 }, { "epoch": 0.752306600425834, "grad_norm": 3.2920620441436768, "learning_rate": 5.615106535791238e-06, "loss": 0.3713, "step": 6360 }, { "epoch": 0.753489472439082, "grad_norm": 2.614239454269409, "learning_rate": 5.6143883169739045e-06, "loss": 0.3636, "step": 6370 }, { "epoch": 0.7546723444523302, "grad_norm": 2.2651712894439697, "learning_rate": 5.613670098156572e-06, "loss": 0.3549, "step": 6380 }, { "epoch": 0.7558552164655784, "grad_norm": 2.1967551708221436, "learning_rate": 5.612951879339238e-06, "loss": 0.3479, "step": 6390 }, { "epoch": 0.7570380884788266, "grad_norm": 2.7348268032073975, "learning_rate": 5.612233660521906e-06, "loss": 0.3288, "step": 6400 }, { "epoch": 0.7582209604920748, "grad_norm": 2.1394784450531006, "learning_rate": 5.611515441704573e-06, "loss": 0.3259, "step": 6410 }, { "epoch": 0.759403832505323, "grad_norm": 2.4483065605163574, "learning_rate": 5.61079722288724e-06, "loss": 0.3101, "step": 6420 }, { "epoch": 0.7605867045185711, "grad_norm": 2.4361226558685303, "learning_rate": 5.610079004069907e-06, "loss": 0.3997, "step": 6430 }, { "epoch": 0.7617695765318192, "grad_norm": 2.7097060680389404, "learning_rate": 5.609360785252574e-06, "loss": 0.3409, "step": 6440 }, { "epoch": 0.7629524485450674, "grad_norm": 2.4710869789123535, "learning_rate": 5.608642566435241e-06, "loss": 0.3373, "step": 6450 }, { "epoch": 0.7641353205583156, "grad_norm": 2.2179853916168213, "learning_rate": 5.607924347617908e-06, "loss": 0.317, "step": 6460 }, { "epoch": 0.7653181925715637, "grad_norm": 2.3045737743377686, "learning_rate": 5.6072061288005745e-06, "loss": 0.367, "step": 6470 }, { "epoch": 0.7665010645848119, "grad_norm": 2.5970547199249268, "learning_rate": 5.6064879099832415e-06, "loss": 0.3172, "step": 6480 }, { "epoch": 0.7676839365980601, "grad_norm": 2.2353339195251465, "learning_rate": 5.605769691165908e-06, "loss": 0.371, "step": 6490 }, { "epoch": 0.7688668086113083, "grad_norm": 3.267253875732422, "learning_rate": 5.605051472348576e-06, "loss": 0.3598, "step": 6500 }, { "epoch": 0.7700496806245565, "grad_norm": 3.300684928894043, "learning_rate": 5.604333253531242e-06, "loss": 0.3908, "step": 6510 }, { "epoch": 0.7712325526378045, "grad_norm": 2.7900242805480957, "learning_rate": 5.60361503471391e-06, "loss": 0.317, "step": 6520 }, { "epoch": 0.7724154246510527, "grad_norm": 3.9156737327575684, "learning_rate": 5.602896815896576e-06, "loss": 0.3443, "step": 6530 }, { "epoch": 0.7735982966643009, "grad_norm": 2.0143091678619385, "learning_rate": 5.602178597079244e-06, "loss": 0.3617, "step": 6540 }, { "epoch": 0.7747811686775491, "grad_norm": 3.1424200534820557, "learning_rate": 5.601460378261911e-06, "loss": 0.3308, "step": 6550 }, { "epoch": 0.7759640406907973, "grad_norm": 2.1877145767211914, "learning_rate": 5.600742159444578e-06, "loss": 0.346, "step": 6560 }, { "epoch": 0.7771469127040455, "grad_norm": 3.5627498626708984, "learning_rate": 5.600023940627245e-06, "loss": 0.3614, "step": 6570 }, { "epoch": 0.7783297847172936, "grad_norm": 2.8966684341430664, "learning_rate": 5.5993057218099115e-06, "loss": 0.3911, "step": 6580 }, { "epoch": 0.7795126567305417, "grad_norm": 2.6046037673950195, "learning_rate": 5.5985875029925785e-06, "loss": 0.3354, "step": 6590 }, { "epoch": 0.7806955287437899, "grad_norm": 4.150275230407715, "learning_rate": 5.597869284175245e-06, "loss": 0.3374, "step": 6600 }, { "epoch": 0.7818784007570381, "grad_norm": 3.0789692401885986, "learning_rate": 5.597151065357913e-06, "loss": 0.3208, "step": 6610 }, { "epoch": 0.7830612727702863, "grad_norm": 2.3965251445770264, "learning_rate": 5.596432846540579e-06, "loss": 0.3108, "step": 6620 }, { "epoch": 0.7842441447835344, "grad_norm": 2.5840611457824707, "learning_rate": 5.595714627723247e-06, "loss": 0.3603, "step": 6630 }, { "epoch": 0.7854270167967826, "grad_norm": 3.554948329925537, "learning_rate": 5.594996408905913e-06, "loss": 0.3952, "step": 6640 }, { "epoch": 0.7866098888100308, "grad_norm": 2.754347324371338, "learning_rate": 5.594278190088581e-06, "loss": 0.3722, "step": 6650 }, { "epoch": 0.7877927608232789, "grad_norm": 2.548170566558838, "learning_rate": 5.593559971271247e-06, "loss": 0.3622, "step": 6660 }, { "epoch": 0.788975632836527, "grad_norm": 2.657484769821167, "learning_rate": 5.592841752453915e-06, "loss": 0.3553, "step": 6670 }, { "epoch": 0.7901585048497752, "grad_norm": 3.3273067474365234, "learning_rate": 5.592123533636582e-06, "loss": 0.3271, "step": 6680 }, { "epoch": 0.7913413768630234, "grad_norm": 3.056093692779541, "learning_rate": 5.5914053148192485e-06, "loss": 0.3669, "step": 6690 }, { "epoch": 0.7925242488762716, "grad_norm": 2.1021077632904053, "learning_rate": 5.5906870960019154e-06, "loss": 0.3486, "step": 6700 }, { "epoch": 0.7937071208895198, "grad_norm": 2.397303819656372, "learning_rate": 5.589968877184582e-06, "loss": 0.3442, "step": 6710 }, { "epoch": 0.794889992902768, "grad_norm": 3.0344865322113037, "learning_rate": 5.589250658367249e-06, "loss": 0.3759, "step": 6720 }, { "epoch": 0.796072864916016, "grad_norm": 2.3328771591186523, "learning_rate": 5.588532439549916e-06, "loss": 0.3658, "step": 6730 }, { "epoch": 0.7972557369292642, "grad_norm": 2.2357239723205566, "learning_rate": 5.587814220732583e-06, "loss": 0.328, "step": 6740 }, { "epoch": 0.7984386089425124, "grad_norm": 2.4008595943450928, "learning_rate": 5.58709600191525e-06, "loss": 0.3679, "step": 6750 }, { "epoch": 0.7996214809557606, "grad_norm": 2.082193613052368, "learning_rate": 5.586377783097917e-06, "loss": 0.3653, "step": 6760 }, { "epoch": 0.8008043529690088, "grad_norm": 2.2875049114227295, "learning_rate": 5.585659564280584e-06, "loss": 0.3993, "step": 6770 }, { "epoch": 0.8019872249822569, "grad_norm": 4.390810012817383, "learning_rate": 5.584941345463251e-06, "loss": 0.3251, "step": 6780 }, { "epoch": 0.8031700969955051, "grad_norm": 2.708571672439575, "learning_rate": 5.584223126645919e-06, "loss": 0.3497, "step": 6790 }, { "epoch": 0.8043529690087533, "grad_norm": 3.668095827102661, "learning_rate": 5.5835049078285855e-06, "loss": 0.4234, "step": 6800 }, { "epoch": 0.8055358410220014, "grad_norm": 2.2173752784729004, "learning_rate": 5.5827866890112524e-06, "loss": 0.3288, "step": 6810 }, { "epoch": 0.8067187130352496, "grad_norm": 2.5242574214935303, "learning_rate": 5.582068470193919e-06, "loss": 0.3371, "step": 6820 }, { "epoch": 0.8079015850484977, "grad_norm": 2.4785118103027344, "learning_rate": 5.581350251376586e-06, "loss": 0.3406, "step": 6830 }, { "epoch": 0.8090844570617459, "grad_norm": 2.005140781402588, "learning_rate": 5.580632032559253e-06, "loss": 0.3422, "step": 6840 }, { "epoch": 0.8102673290749941, "grad_norm": 4.3739728927612305, "learning_rate": 5.57991381374192e-06, "loss": 0.3839, "step": 6850 }, { "epoch": 0.8114502010882423, "grad_norm": 2.7078897953033447, "learning_rate": 5.579195594924587e-06, "loss": 0.3326, "step": 6860 }, { "epoch": 0.8126330731014905, "grad_norm": 2.3173391819000244, "learning_rate": 5.578477376107254e-06, "loss": 0.3626, "step": 6870 }, { "epoch": 0.8138159451147385, "grad_norm": 2.316404342651367, "learning_rate": 5.577759157289922e-06, "loss": 0.3743, "step": 6880 }, { "epoch": 0.8149988171279867, "grad_norm": 2.4765071868896484, "learning_rate": 5.577040938472588e-06, "loss": 0.3304, "step": 6890 }, { "epoch": 0.8161816891412349, "grad_norm": 2.8449864387512207, "learning_rate": 5.576322719655256e-06, "loss": 0.3603, "step": 6900 }, { "epoch": 0.8173645611544831, "grad_norm": 2.9987564086914062, "learning_rate": 5.575604500837922e-06, "loss": 0.348, "step": 6910 }, { "epoch": 0.8185474331677313, "grad_norm": 2.6204235553741455, "learning_rate": 5.5748862820205894e-06, "loss": 0.3072, "step": 6920 }, { "epoch": 0.8197303051809794, "grad_norm": 2.3653435707092285, "learning_rate": 5.5741680632032555e-06, "loss": 0.3398, "step": 6930 }, { "epoch": 0.8209131771942276, "grad_norm": 3.261428117752075, "learning_rate": 5.573449844385923e-06, "loss": 0.3434, "step": 6940 }, { "epoch": 0.8220960492074757, "grad_norm": 4.365106582641602, "learning_rate": 5.57273162556859e-06, "loss": 0.3566, "step": 6950 }, { "epoch": 0.8232789212207239, "grad_norm": 3.3341214656829834, "learning_rate": 5.572013406751257e-06, "loss": 0.3665, "step": 6960 }, { "epoch": 0.8244617932339721, "grad_norm": 2.0983595848083496, "learning_rate": 5.571295187933924e-06, "loss": 0.3646, "step": 6970 }, { "epoch": 0.8256446652472202, "grad_norm": 3.0781376361846924, "learning_rate": 5.570576969116591e-06, "loss": 0.3294, "step": 6980 }, { "epoch": 0.8268275372604684, "grad_norm": 2.7174408435821533, "learning_rate": 5.569858750299258e-06, "loss": 0.3606, "step": 6990 }, { "epoch": 0.8280104092737166, "grad_norm": 2.8317363262176514, "learning_rate": 5.569140531481925e-06, "loss": 0.3627, "step": 7000 }, { "epoch": 0.8291932812869648, "grad_norm": 2.5212972164154053, "learning_rate": 5.568422312664592e-06, "loss": 0.3358, "step": 7010 }, { "epoch": 0.830376153300213, "grad_norm": 2.977804660797119, "learning_rate": 5.567704093847259e-06, "loss": 0.3542, "step": 7020 }, { "epoch": 0.831559025313461, "grad_norm": 2.880445957183838, "learning_rate": 5.566985875029926e-06, "loss": 0.3508, "step": 7030 }, { "epoch": 0.8327418973267092, "grad_norm": 3.8674228191375732, "learning_rate": 5.5662676562125925e-06, "loss": 0.3337, "step": 7040 }, { "epoch": 0.8339247693399574, "grad_norm": 3.2273359298706055, "learning_rate": 5.5655494373952594e-06, "loss": 0.3638, "step": 7050 }, { "epoch": 0.8351076413532056, "grad_norm": 2.6208560466766357, "learning_rate": 5.564831218577927e-06, "loss": 0.3764, "step": 7060 }, { "epoch": 0.8362905133664538, "grad_norm": 4.46844482421875, "learning_rate": 5.564112999760594e-06, "loss": 0.3223, "step": 7070 }, { "epoch": 0.837473385379702, "grad_norm": 2.8332033157348633, "learning_rate": 5.563394780943261e-06, "loss": 0.3439, "step": 7080 }, { "epoch": 0.8386562573929501, "grad_norm": 1.84604811668396, "learning_rate": 5.562676562125928e-06, "loss": 0.3599, "step": 7090 }, { "epoch": 0.8398391294061982, "grad_norm": 3.16796875, "learning_rate": 5.561958343308595e-06, "loss": 0.3226, "step": 7100 }, { "epoch": 0.8410220014194464, "grad_norm": 2.880558729171753, "learning_rate": 5.561240124491262e-06, "loss": 0.3564, "step": 7110 }, { "epoch": 0.8422048734326946, "grad_norm": 3.119356632232666, "learning_rate": 5.560521905673929e-06, "loss": 0.3507, "step": 7120 }, { "epoch": 0.8433877454459427, "grad_norm": 2.4311368465423584, "learning_rate": 5.559803686856596e-06, "loss": 0.3507, "step": 7130 }, { "epoch": 0.8445706174591909, "grad_norm": 2.9251863956451416, "learning_rate": 5.559085468039263e-06, "loss": 0.3467, "step": 7140 }, { "epoch": 0.8457534894724391, "grad_norm": 3.1497440338134766, "learning_rate": 5.55836724922193e-06, "loss": 0.3782, "step": 7150 }, { "epoch": 0.8469363614856873, "grad_norm": 2.303025960922241, "learning_rate": 5.5576490304045964e-06, "loss": 0.3522, "step": 7160 }, { "epoch": 0.8481192334989354, "grad_norm": 2.60343074798584, "learning_rate": 5.556930811587264e-06, "loss": 0.3398, "step": 7170 }, { "epoch": 0.8493021055121835, "grad_norm": 2.61942982673645, "learning_rate": 5.55621259276993e-06, "loss": 0.3504, "step": 7180 }, { "epoch": 0.8504849775254317, "grad_norm": 3.136608600616455, "learning_rate": 5.555494373952598e-06, "loss": 0.3493, "step": 7190 }, { "epoch": 0.8516678495386799, "grad_norm": 2.6096303462982178, "learning_rate": 5.554776155135264e-06, "loss": 0.3217, "step": 7200 }, { "epoch": 0.8528507215519281, "grad_norm": 3.7467339038848877, "learning_rate": 5.554057936317932e-06, "loss": 0.3334, "step": 7210 }, { "epoch": 0.8540335935651763, "grad_norm": 2.4341225624084473, "learning_rate": 5.553339717500599e-06, "loss": 0.3638, "step": 7220 }, { "epoch": 0.8552164655784245, "grad_norm": 2.3212246894836426, "learning_rate": 5.552621498683266e-06, "loss": 0.3515, "step": 7230 }, { "epoch": 0.8563993375916725, "grad_norm": 2.513843059539795, "learning_rate": 5.551903279865933e-06, "loss": 0.3225, "step": 7240 }, { "epoch": 0.8575822096049207, "grad_norm": 2.3628575801849365, "learning_rate": 5.5511850610486e-06, "loss": 0.3138, "step": 7250 }, { "epoch": 0.8587650816181689, "grad_norm": 3.140106678009033, "learning_rate": 5.5504668422312665e-06, "loss": 0.3321, "step": 7260 }, { "epoch": 0.8599479536314171, "grad_norm": 2.6459336280822754, "learning_rate": 5.5497486234139334e-06, "loss": 0.3633, "step": 7270 }, { "epoch": 0.8611308256446653, "grad_norm": 2.208141326904297, "learning_rate": 5.5490304045966e-06, "loss": 0.3335, "step": 7280 }, { "epoch": 0.8623136976579134, "grad_norm": 2.6104226112365723, "learning_rate": 5.548312185779267e-06, "loss": 0.352, "step": 7290 }, { "epoch": 0.8634965696711616, "grad_norm": 2.234896183013916, "learning_rate": 5.547593966961934e-06, "loss": 0.3889, "step": 7300 }, { "epoch": 0.8646794416844098, "grad_norm": 2.363894462585449, "learning_rate": 5.546875748144601e-06, "loss": 0.3238, "step": 7310 }, { "epoch": 0.8658623136976579, "grad_norm": 2.8927001953125, "learning_rate": 5.546157529327269e-06, "loss": 0.357, "step": 7320 }, { "epoch": 0.867045185710906, "grad_norm": 3.3064918518066406, "learning_rate": 5.545439310509936e-06, "loss": 0.3878, "step": 7330 }, { "epoch": 0.8682280577241542, "grad_norm": 3.236659526824951, "learning_rate": 5.544721091692603e-06, "loss": 0.3851, "step": 7340 }, { "epoch": 0.8694109297374024, "grad_norm": 2.66019344329834, "learning_rate": 5.54400287287527e-06, "loss": 0.3599, "step": 7350 }, { "epoch": 0.8705938017506506, "grad_norm": 1.7286221981048584, "learning_rate": 5.543284654057937e-06, "loss": 0.3258, "step": 7360 }, { "epoch": 0.8717766737638988, "grad_norm": 2.7896177768707275, "learning_rate": 5.5425664352406035e-06, "loss": 0.3575, "step": 7370 }, { "epoch": 0.872959545777147, "grad_norm": 3.1597604751586914, "learning_rate": 5.5418482164232704e-06, "loss": 0.3931, "step": 7380 }, { "epoch": 0.874142417790395, "grad_norm": 1.8662405014038086, "learning_rate": 5.541129997605937e-06, "loss": 0.3439, "step": 7390 }, { "epoch": 0.8753252898036432, "grad_norm": 2.0772244930267334, "learning_rate": 5.540411778788604e-06, "loss": 0.3351, "step": 7400 }, { "epoch": 0.8765081618168914, "grad_norm": 2.4114174842834473, "learning_rate": 5.539693559971271e-06, "loss": 0.3593, "step": 7410 }, { "epoch": 0.8776910338301396, "grad_norm": 3.092982053756714, "learning_rate": 5.538975341153939e-06, "loss": 0.3258, "step": 7420 }, { "epoch": 0.8788739058433878, "grad_norm": 1.9947749376296997, "learning_rate": 5.538257122336605e-06, "loss": 0.3298, "step": 7430 }, { "epoch": 0.8800567778566359, "grad_norm": 4.120667457580566, "learning_rate": 5.537538903519273e-06, "loss": 0.3927, "step": 7440 }, { "epoch": 0.8812396498698841, "grad_norm": 2.6464829444885254, "learning_rate": 5.536820684701939e-06, "loss": 0.3306, "step": 7450 }, { "epoch": 0.8824225218831322, "grad_norm": 2.8128652572631836, "learning_rate": 5.536102465884607e-06, "loss": 0.3556, "step": 7460 }, { "epoch": 0.8836053938963804, "grad_norm": 2.0512959957122803, "learning_rate": 5.535384247067273e-06, "loss": 0.3366, "step": 7470 }, { "epoch": 0.8847882659096286, "grad_norm": 2.7022056579589844, "learning_rate": 5.5346660282499405e-06, "loss": 0.3759, "step": 7480 }, { "epoch": 0.8859711379228767, "grad_norm": 1.867337942123413, "learning_rate": 5.5339478094326074e-06, "loss": 0.3218, "step": 7490 }, { "epoch": 0.8871540099361249, "grad_norm": 3.7488763332366943, "learning_rate": 5.533229590615274e-06, "loss": 0.3551, "step": 7500 }, { "epoch": 0.8883368819493731, "grad_norm": 4.215018272399902, "learning_rate": 5.532511371797941e-06, "loss": 0.3316, "step": 7510 }, { "epoch": 0.8895197539626213, "grad_norm": 2.2023723125457764, "learning_rate": 5.531793152980608e-06, "loss": 0.3306, "step": 7520 }, { "epoch": 0.8907026259758695, "grad_norm": 2.64302659034729, "learning_rate": 5.531074934163275e-06, "loss": 0.3486, "step": 7530 }, { "epoch": 0.8918854979891175, "grad_norm": 3.5505239963531494, "learning_rate": 5.530356715345942e-06, "loss": 0.3361, "step": 7540 }, { "epoch": 0.8930683700023657, "grad_norm": 2.461738348007202, "learning_rate": 5.529638496528609e-06, "loss": 0.3531, "step": 7550 }, { "epoch": 0.8942512420156139, "grad_norm": 2.906794548034668, "learning_rate": 5.528920277711276e-06, "loss": 0.3305, "step": 7560 }, { "epoch": 0.8954341140288621, "grad_norm": 3.0541648864746094, "learning_rate": 5.528202058893944e-06, "loss": 0.3634, "step": 7570 }, { "epoch": 0.8966169860421103, "grad_norm": 2.518399715423584, "learning_rate": 5.52748384007661e-06, "loss": 0.3543, "step": 7580 }, { "epoch": 0.8977998580553584, "grad_norm": 3.4840755462646484, "learning_rate": 5.5267656212592775e-06, "loss": 0.366, "step": 7590 }, { "epoch": 0.8989827300686066, "grad_norm": 2.303093194961548, "learning_rate": 5.5260474024419444e-06, "loss": 0.3602, "step": 7600 }, { "epoch": 0.9001656020818547, "grad_norm": 3.2364866733551025, "learning_rate": 5.525329183624611e-06, "loss": 0.3562, "step": 7610 }, { "epoch": 0.9013484740951029, "grad_norm": 2.9462339878082275, "learning_rate": 5.524610964807278e-06, "loss": 0.3609, "step": 7620 }, { "epoch": 0.9025313461083511, "grad_norm": 2.3655786514282227, "learning_rate": 5.523892745989945e-06, "loss": 0.3436, "step": 7630 }, { "epoch": 0.9037142181215992, "grad_norm": 2.2203845977783203, "learning_rate": 5.523174527172612e-06, "loss": 0.3728, "step": 7640 }, { "epoch": 0.9048970901348474, "grad_norm": 2.290207862854004, "learning_rate": 5.522456308355279e-06, "loss": 0.3182, "step": 7650 }, { "epoch": 0.9060799621480956, "grad_norm": 3.579007863998413, "learning_rate": 5.521738089537946e-06, "loss": 0.3684, "step": 7660 }, { "epoch": 0.9072628341613438, "grad_norm": 2.4544854164123535, "learning_rate": 5.521019870720613e-06, "loss": 0.2881, "step": 7670 }, { "epoch": 0.9084457061745919, "grad_norm": 2.399073839187622, "learning_rate": 5.52030165190328e-06, "loss": 0.3262, "step": 7680 }, { "epoch": 0.90962857818784, "grad_norm": 3.2292399406433105, "learning_rate": 5.519583433085947e-06, "loss": 0.3417, "step": 7690 }, { "epoch": 0.9108114502010882, "grad_norm": 2.4320437908172607, "learning_rate": 5.518865214268614e-06, "loss": 0.3597, "step": 7700 }, { "epoch": 0.9119943222143364, "grad_norm": 3.3153836727142334, "learning_rate": 5.5181469954512814e-06, "loss": 0.331, "step": 7710 }, { "epoch": 0.9131771942275846, "grad_norm": 3.12115478515625, "learning_rate": 5.5174287766339475e-06, "loss": 0.3033, "step": 7720 }, { "epoch": 0.9143600662408328, "grad_norm": 1.7770037651062012, "learning_rate": 5.516710557816615e-06, "loss": 0.3451, "step": 7730 }, { "epoch": 0.915542938254081, "grad_norm": 2.761713743209839, "learning_rate": 5.515992338999281e-06, "loss": 0.3295, "step": 7740 }, { "epoch": 0.916725810267329, "grad_norm": 3.4372050762176514, "learning_rate": 5.515274120181949e-06, "loss": 0.3115, "step": 7750 }, { "epoch": 0.9179086822805772, "grad_norm": 2.6007134914398193, "learning_rate": 5.514555901364616e-06, "loss": 0.3574, "step": 7760 }, { "epoch": 0.9190915542938254, "grad_norm": 1.9798741340637207, "learning_rate": 5.513837682547283e-06, "loss": 0.355, "step": 7770 }, { "epoch": 0.9202744263070736, "grad_norm": 2.710803270339966, "learning_rate": 5.51311946372995e-06, "loss": 0.3608, "step": 7780 }, { "epoch": 0.9214572983203217, "grad_norm": 2.823716163635254, "learning_rate": 5.512401244912617e-06, "loss": 0.3676, "step": 7790 }, { "epoch": 0.9226401703335699, "grad_norm": 2.5856072902679443, "learning_rate": 5.511683026095284e-06, "loss": 0.3661, "step": 7800 }, { "epoch": 0.9238230423468181, "grad_norm": 2.7194437980651855, "learning_rate": 5.510964807277951e-06, "loss": 0.3507, "step": 7810 }, { "epoch": 0.9250059143600663, "grad_norm": 2.136730909347534, "learning_rate": 5.5102465884606184e-06, "loss": 0.3346, "step": 7820 }, { "epoch": 0.9261887863733144, "grad_norm": 3.40328049659729, "learning_rate": 5.5095283696432845e-06, "loss": 0.3275, "step": 7830 }, { "epoch": 0.9273716583865625, "grad_norm": 5.959146022796631, "learning_rate": 5.508810150825952e-06, "loss": 0.3761, "step": 7840 }, { "epoch": 0.9285545303998107, "grad_norm": 2.9853756427764893, "learning_rate": 5.508091932008618e-06, "loss": 0.3588, "step": 7850 }, { "epoch": 0.9297374024130589, "grad_norm": 2.2519445419311523, "learning_rate": 5.507373713191286e-06, "loss": 0.3017, "step": 7860 }, { "epoch": 0.9309202744263071, "grad_norm": 3.8326361179351807, "learning_rate": 5.506655494373953e-06, "loss": 0.3472, "step": 7870 }, { "epoch": 0.9321031464395553, "grad_norm": 3.3324077129364014, "learning_rate": 5.50593727555662e-06, "loss": 0.3803, "step": 7880 }, { "epoch": 0.9332860184528035, "grad_norm": 2.6756067276000977, "learning_rate": 5.505219056739287e-06, "loss": 0.3185, "step": 7890 }, { "epoch": 0.9344688904660515, "grad_norm": 2.8437869548797607, "learning_rate": 5.504500837921954e-06, "loss": 0.3579, "step": 7900 }, { "epoch": 0.9356517624792997, "grad_norm": 3.054748773574829, "learning_rate": 5.503782619104621e-06, "loss": 0.3564, "step": 7910 }, { "epoch": 0.9368346344925479, "grad_norm": 2.167482852935791, "learning_rate": 5.503064400287288e-06, "loss": 0.3494, "step": 7920 }, { "epoch": 0.9380175065057961, "grad_norm": 2.3468101024627686, "learning_rate": 5.5023461814699546e-06, "loss": 0.3285, "step": 7930 }, { "epoch": 0.9392003785190443, "grad_norm": 2.8190131187438965, "learning_rate": 5.5016279626526215e-06, "loss": 0.2858, "step": 7940 }, { "epoch": 0.9403832505322924, "grad_norm": 2.783245325088501, "learning_rate": 5.500909743835288e-06, "loss": 0.3725, "step": 7950 }, { "epoch": 0.9415661225455406, "grad_norm": 2.941138505935669, "learning_rate": 5.500191525017955e-06, "loss": 0.3345, "step": 7960 }, { "epoch": 0.9427489945587887, "grad_norm": 2.27093505859375, "learning_rate": 5.499473306200622e-06, "loss": 0.3384, "step": 7970 }, { "epoch": 0.9439318665720369, "grad_norm": 2.350109338760376, "learning_rate": 5.49875508738329e-06, "loss": 0.381, "step": 7980 }, { "epoch": 0.945114738585285, "grad_norm": 2.4800150394439697, "learning_rate": 5.498036868565956e-06, "loss": 0.3243, "step": 7990 }, { "epoch": 0.9462976105985332, "grad_norm": 2.5707695484161377, "learning_rate": 5.497318649748624e-06, "loss": 0.3237, "step": 8000 }, { "epoch": 0.9474804826117814, "grad_norm": 5.119116306304932, "learning_rate": 5.49660043093129e-06, "loss": 0.3766, "step": 8010 }, { "epoch": 0.9486633546250296, "grad_norm": 2.0871899127960205, "learning_rate": 5.495882212113958e-06, "loss": 0.3399, "step": 8020 }, { "epoch": 0.9498462266382778, "grad_norm": 2.1982295513153076, "learning_rate": 5.495163993296624e-06, "loss": 0.3437, "step": 8030 }, { "epoch": 0.9510290986515259, "grad_norm": 2.7046656608581543, "learning_rate": 5.4944457744792916e-06, "loss": 0.3225, "step": 8040 }, { "epoch": 0.952211970664774, "grad_norm": 2.497591495513916, "learning_rate": 5.4937275556619585e-06, "loss": 0.369, "step": 8050 }, { "epoch": 0.9533948426780222, "grad_norm": 2.4972617626190186, "learning_rate": 5.493009336844625e-06, "loss": 0.3459, "step": 8060 }, { "epoch": 0.9545777146912704, "grad_norm": 2.939979314804077, "learning_rate": 5.492291118027293e-06, "loss": 0.3471, "step": 8070 }, { "epoch": 0.9557605867045186, "grad_norm": 2.3696694374084473, "learning_rate": 5.491572899209959e-06, "loss": 0.3112, "step": 8080 }, { "epoch": 0.9569434587177668, "grad_norm": 2.358957290649414, "learning_rate": 5.490854680392627e-06, "loss": 0.329, "step": 8090 }, { "epoch": 0.9581263307310149, "grad_norm": 2.4226739406585693, "learning_rate": 5.490136461575293e-06, "loss": 0.3186, "step": 8100 }, { "epoch": 0.9593092027442631, "grad_norm": 2.8412845134735107, "learning_rate": 5.489418242757961e-06, "loss": 0.3168, "step": 8110 }, { "epoch": 0.9604920747575112, "grad_norm": 2.624074697494507, "learning_rate": 5.488700023940627e-06, "loss": 0.3293, "step": 8120 }, { "epoch": 0.9616749467707594, "grad_norm": 3.3990612030029297, "learning_rate": 5.487981805123295e-06, "loss": 0.3583, "step": 8130 }, { "epoch": 0.9628578187840076, "grad_norm": 2.37146258354187, "learning_rate": 5.487263586305962e-06, "loss": 0.3207, "step": 8140 }, { "epoch": 0.9640406907972557, "grad_norm": 2.7736618518829346, "learning_rate": 5.4865453674886286e-06, "loss": 0.3393, "step": 8150 }, { "epoch": 0.9652235628105039, "grad_norm": 2.7632699012756348, "learning_rate": 5.4858271486712955e-06, "loss": 0.3341, "step": 8160 }, { "epoch": 0.9664064348237521, "grad_norm": 2.1342551708221436, "learning_rate": 5.485108929853962e-06, "loss": 0.377, "step": 8170 }, { "epoch": 0.9675893068370003, "grad_norm": 2.6788430213928223, "learning_rate": 5.484390711036629e-06, "loss": 0.3033, "step": 8180 }, { "epoch": 0.9687721788502484, "grad_norm": 2.574594020843506, "learning_rate": 5.483672492219296e-06, "loss": 0.3315, "step": 8190 }, { "epoch": 0.9699550508634965, "grad_norm": 2.56632399559021, "learning_rate": 5.482954273401963e-06, "loss": 0.3317, "step": 8200 }, { "epoch": 0.9711379228767447, "grad_norm": 4.020124435424805, "learning_rate": 5.48223605458463e-06, "loss": 0.3644, "step": 8210 }, { "epoch": 0.9723207948899929, "grad_norm": 2.2321345806121826, "learning_rate": 5.481517835767297e-06, "loss": 0.3244, "step": 8220 }, { "epoch": 0.9735036669032411, "grad_norm": 2.39266037940979, "learning_rate": 5.480799616949964e-06, "loss": 0.3584, "step": 8230 }, { "epoch": 0.9746865389164893, "grad_norm": 2.82624888420105, "learning_rate": 5.480081398132631e-06, "loss": 0.335, "step": 8240 }, { "epoch": 0.9758694109297374, "grad_norm": 2.847728729248047, "learning_rate": 5.479363179315299e-06, "loss": 0.3797, "step": 8250 }, { "epoch": 0.9770522829429855, "grad_norm": 2.8681814670562744, "learning_rate": 5.478644960497965e-06, "loss": 0.3158, "step": 8260 }, { "epoch": 0.9782351549562337, "grad_norm": 2.9551432132720947, "learning_rate": 5.4779267416806325e-06, "loss": 0.3126, "step": 8270 }, { "epoch": 0.9794180269694819, "grad_norm": 3.640418529510498, "learning_rate": 5.4772085228632986e-06, "loss": 0.3419, "step": 8280 }, { "epoch": 0.9806008989827301, "grad_norm": 3.0809133052825928, "learning_rate": 5.476490304045966e-06, "loss": 0.3261, "step": 8290 }, { "epoch": 0.9817837709959782, "grad_norm": 2.4587645530700684, "learning_rate": 5.475772085228632e-06, "loss": 0.3296, "step": 8300 }, { "epoch": 0.9829666430092264, "grad_norm": 2.6227495670318604, "learning_rate": 5.4750538664113e-06, "loss": 0.3633, "step": 8310 }, { "epoch": 0.9841495150224746, "grad_norm": 2.2411069869995117, "learning_rate": 5.474335647593967e-06, "loss": 0.3587, "step": 8320 }, { "epoch": 0.9853323870357228, "grad_norm": 2.9606146812438965, "learning_rate": 5.473617428776634e-06, "loss": 0.3344, "step": 8330 }, { "epoch": 0.9865152590489709, "grad_norm": 2.1389524936676025, "learning_rate": 5.472899209959302e-06, "loss": 0.3239, "step": 8340 }, { "epoch": 0.987698131062219, "grad_norm": 2.7911806106567383, "learning_rate": 5.472180991141968e-06, "loss": 0.3391, "step": 8350 }, { "epoch": 0.9888810030754672, "grad_norm": 2.3505120277404785, "learning_rate": 5.471462772324636e-06, "loss": 0.3235, "step": 8360 }, { "epoch": 0.9900638750887154, "grad_norm": 2.090029001235962, "learning_rate": 5.470744553507302e-06, "loss": 0.3564, "step": 8370 }, { "epoch": 0.9912467471019636, "grad_norm": 3.6726338863372803, "learning_rate": 5.4700263346899695e-06, "loss": 0.3693, "step": 8380 }, { "epoch": 0.9924296191152118, "grad_norm": 3.1619338989257812, "learning_rate": 5.4693081158726356e-06, "loss": 0.3692, "step": 8390 }, { "epoch": 0.99361249112846, "grad_norm": 2.480262517929077, "learning_rate": 5.468589897055303e-06, "loss": 0.3565, "step": 8400 }, { "epoch": 0.994795363141708, "grad_norm": 4.622408866882324, "learning_rate": 5.46787167823797e-06, "loss": 0.358, "step": 8410 }, { "epoch": 0.9959782351549562, "grad_norm": 2.087460994720459, "learning_rate": 5.467153459420637e-06, "loss": 0.3632, "step": 8420 }, { "epoch": 0.9971611071682044, "grad_norm": 2.0399322509765625, "learning_rate": 5.466435240603304e-06, "loss": 0.3306, "step": 8430 }, { "epoch": 0.9983439791814526, "grad_norm": 2.897136926651001, "learning_rate": 5.465717021785971e-06, "loss": 0.3553, "step": 8440 }, { "epoch": 0.9995268511947007, "grad_norm": 3.2266910076141357, "learning_rate": 5.464998802968638e-06, "loss": 0.3597, "step": 8450 }, { "epoch": 1.0002365744026496, "eval_accuracy": 0.8519812356522607, "eval_loss": 0.33436116576194763, "eval_runtime": 79.1167, "eval_safe_aucpr": 0.9059820300762905, "eval_safe_f1": 0.8338313289011727, "eval_safe_fpr": 0.1363187946548679, "eval_safe_precision": 0.8303886925795053, "eval_safe_recall": 0.8373026291115029, "eval_samples_per_second": 759.815, "eval_steps_per_second": 11.881, "eval_unsafe_aucpr": 0.9481555247484019, "eval_unsafe_f1": 0.8665566886622675, "eval_unsafe_fpr": 0.16269737088849656, "eval_unsafe_precision": 0.8694513828282524, "eval_unsafe_recall": 0.8636812053451317, "step": 8456 }, { "epoch": 1.000709723207949, "grad_norm": 3.032815456390381, "learning_rate": 5.464280584151305e-06, "loss": 0.3003, "step": 8460 }, { "epoch": 1.001892595221197, "grad_norm": 3.270686149597168, "learning_rate": 5.463562365333972e-06, "loss": 0.3169, "step": 8470 }, { "epoch": 1.0030754672344453, "grad_norm": 3.1649651527404785, "learning_rate": 5.462844146516639e-06, "loss": 0.3107, "step": 8480 }, { "epoch": 1.0042583392476934, "grad_norm": 4.533438205718994, "learning_rate": 5.462125927699306e-06, "loss": 0.3696, "step": 8490 }, { "epoch": 1.0054412112609417, "grad_norm": 2.518247604370117, "learning_rate": 5.4614077088819726e-06, "loss": 0.3226, "step": 8500 }, { "epoch": 1.0066240832741897, "grad_norm": 2.138230562210083, "learning_rate": 5.4606894900646395e-06, "loss": 0.3334, "step": 8510 }, { "epoch": 1.0078069552874378, "grad_norm": 2.659940719604492, "learning_rate": 5.459971271247307e-06, "loss": 0.3236, "step": 8520 }, { "epoch": 1.008989827300686, "grad_norm": 2.521111011505127, "learning_rate": 5.459253052429973e-06, "loss": 0.3356, "step": 8530 }, { "epoch": 1.0101726993139342, "grad_norm": 2.4494261741638184, "learning_rate": 5.458534833612641e-06, "loss": 0.3562, "step": 8540 }, { "epoch": 1.0113555713271825, "grad_norm": 2.304314374923706, "learning_rate": 5.457816614795307e-06, "loss": 0.3342, "step": 8550 }, { "epoch": 1.0125384433404305, "grad_norm": 2.936882734298706, "learning_rate": 5.457098395977975e-06, "loss": 0.3332, "step": 8560 }, { "epoch": 1.0137213153536788, "grad_norm": 2.1796059608459473, "learning_rate": 5.456380177160641e-06, "loss": 0.2911, "step": 8570 }, { "epoch": 1.014904187366927, "grad_norm": 3.1883201599121094, "learning_rate": 5.455661958343309e-06, "loss": 0.342, "step": 8580 }, { "epoch": 1.016087059380175, "grad_norm": 2.9558184146881104, "learning_rate": 5.454943739525976e-06, "loss": 0.3225, "step": 8590 }, { "epoch": 1.0172699313934233, "grad_norm": 2.6654281616210938, "learning_rate": 5.454225520708643e-06, "loss": 0.2589, "step": 8600 }, { "epoch": 1.0184528034066713, "grad_norm": 3.352184534072876, "learning_rate": 5.4535073018913096e-06, "loss": 0.3118, "step": 8610 }, { "epoch": 1.0196356754199196, "grad_norm": 2.928210496902466, "learning_rate": 5.4527890830739765e-06, "loss": 0.3515, "step": 8620 }, { "epoch": 1.0208185474331677, "grad_norm": 2.8811118602752686, "learning_rate": 5.452070864256644e-06, "loss": 0.3125, "step": 8630 }, { "epoch": 1.022001419446416, "grad_norm": 2.919084072113037, "learning_rate": 5.45135264543931e-06, "loss": 0.3126, "step": 8640 }, { "epoch": 1.023184291459664, "grad_norm": 2.5222673416137695, "learning_rate": 5.450634426621978e-06, "loss": 0.336, "step": 8650 }, { "epoch": 1.0243671634729121, "grad_norm": 3.217428684234619, "learning_rate": 5.449916207804644e-06, "loss": 0.3497, "step": 8660 }, { "epoch": 1.0255500354861604, "grad_norm": 2.234304189682007, "learning_rate": 5.449197988987312e-06, "loss": 0.3029, "step": 8670 }, { "epoch": 1.0267329074994085, "grad_norm": 2.950589418411255, "learning_rate": 5.448479770169979e-06, "loss": 0.3024, "step": 8680 }, { "epoch": 1.0279157795126568, "grad_norm": 3.1446995735168457, "learning_rate": 5.447761551352646e-06, "loss": 0.2645, "step": 8690 }, { "epoch": 1.0290986515259049, "grad_norm": 4.240466117858887, "learning_rate": 5.447043332535313e-06, "loss": 0.3296, "step": 8700 }, { "epoch": 1.0302815235391531, "grad_norm": 2.519169330596924, "learning_rate": 5.44632511371798e-06, "loss": 0.2848, "step": 8710 }, { "epoch": 1.0314643955524012, "grad_norm": 2.583394765853882, "learning_rate": 5.4456068949006466e-06, "loss": 0.3419, "step": 8720 }, { "epoch": 1.0326472675656495, "grad_norm": 3.5348169803619385, "learning_rate": 5.4448886760833135e-06, "loss": 0.3245, "step": 8730 }, { "epoch": 1.0338301395788976, "grad_norm": 2.2338781356811523, "learning_rate": 5.44417045726598e-06, "loss": 0.3585, "step": 8740 }, { "epoch": 1.0350130115921456, "grad_norm": 2.400956630706787, "learning_rate": 5.443452238448647e-06, "loss": 0.3284, "step": 8750 }, { "epoch": 1.036195883605394, "grad_norm": 2.292003870010376, "learning_rate": 5.442734019631314e-06, "loss": 0.3532, "step": 8760 }, { "epoch": 1.037378755618642, "grad_norm": 2.004927158355713, "learning_rate": 5.442015800813981e-06, "loss": 0.3454, "step": 8770 }, { "epoch": 1.0385616276318903, "grad_norm": 2.7153475284576416, "learning_rate": 5.441297581996648e-06, "loss": 0.3152, "step": 8780 }, { "epoch": 1.0397444996451384, "grad_norm": 2.201547861099243, "learning_rate": 5.440579363179316e-06, "loss": 0.3092, "step": 8790 }, { "epoch": 1.0409273716583867, "grad_norm": 2.537216901779175, "learning_rate": 5.439861144361982e-06, "loss": 0.323, "step": 8800 }, { "epoch": 1.0421102436716347, "grad_norm": 1.951098918914795, "learning_rate": 5.43914292554465e-06, "loss": 0.3439, "step": 8810 }, { "epoch": 1.0432931156848828, "grad_norm": 2.515214443206787, "learning_rate": 5.438424706727316e-06, "loss": 0.3453, "step": 8820 }, { "epoch": 1.044475987698131, "grad_norm": 2.52101731300354, "learning_rate": 5.4377064879099836e-06, "loss": 0.2679, "step": 8830 }, { "epoch": 1.0456588597113792, "grad_norm": 2.983762502670288, "learning_rate": 5.4369882690926505e-06, "loss": 0.3119, "step": 8840 }, { "epoch": 1.0468417317246275, "grad_norm": 3.678133726119995, "learning_rate": 5.436270050275317e-06, "loss": 0.3321, "step": 8850 }, { "epoch": 1.0480246037378755, "grad_norm": 2.3628365993499756, "learning_rate": 5.435551831457984e-06, "loss": 0.3077, "step": 8860 }, { "epoch": 1.0492074757511238, "grad_norm": 2.0662803649902344, "learning_rate": 5.434833612640651e-06, "loss": 0.3505, "step": 8870 }, { "epoch": 1.050390347764372, "grad_norm": 3.0162265300750732, "learning_rate": 5.434115393823318e-06, "loss": 0.3485, "step": 8880 }, { "epoch": 1.05157321977762, "grad_norm": 2.200587034225464, "learning_rate": 5.433397175005985e-06, "loss": 0.327, "step": 8890 }, { "epoch": 1.0527560917908683, "grad_norm": 2.201749324798584, "learning_rate": 5.432678956188653e-06, "loss": 0.3306, "step": 8900 }, { "epoch": 1.0539389638041163, "grad_norm": 3.2741239070892334, "learning_rate": 5.431960737371319e-06, "loss": 0.3378, "step": 8910 }, { "epoch": 1.0551218358173646, "grad_norm": 2.7297585010528564, "learning_rate": 5.431242518553987e-06, "loss": 0.3071, "step": 8920 }, { "epoch": 1.0563047078306127, "grad_norm": 2.7042174339294434, "learning_rate": 5.430524299736653e-06, "loss": 0.3246, "step": 8930 }, { "epoch": 1.057487579843861, "grad_norm": 2.48810076713562, "learning_rate": 5.4298060809193206e-06, "loss": 0.3364, "step": 8940 }, { "epoch": 1.058670451857109, "grad_norm": 3.0956532955169678, "learning_rate": 5.429087862101987e-06, "loss": 0.2946, "step": 8950 }, { "epoch": 1.0598533238703571, "grad_norm": 2.7961843013763428, "learning_rate": 5.428369643284654e-06, "loss": 0.4025, "step": 8960 }, { "epoch": 1.0610361958836054, "grad_norm": 2.458209753036499, "learning_rate": 5.427651424467321e-06, "loss": 0.3359, "step": 8970 }, { "epoch": 1.0622190678968535, "grad_norm": 2.2479140758514404, "learning_rate": 5.426933205649988e-06, "loss": 0.3544, "step": 8980 }, { "epoch": 1.0634019399101018, "grad_norm": 2.5327320098876953, "learning_rate": 5.426214986832655e-06, "loss": 0.2918, "step": 8990 }, { "epoch": 1.0645848119233499, "grad_norm": 2.4770946502685547, "learning_rate": 5.425496768015322e-06, "loss": 0.337, "step": 9000 }, { "epoch": 1.0657676839365982, "grad_norm": 2.397608995437622, "learning_rate": 5.424778549197989e-06, "loss": 0.3459, "step": 9010 }, { "epoch": 1.0669505559498462, "grad_norm": 3.4776039123535156, "learning_rate": 5.424060330380656e-06, "loss": 0.3269, "step": 9020 }, { "epoch": 1.0681334279630943, "grad_norm": 2.9039711952209473, "learning_rate": 5.423342111563323e-06, "loss": 0.3209, "step": 9030 }, { "epoch": 1.0693162999763426, "grad_norm": 2.191887140274048, "learning_rate": 5.42262389274599e-06, "loss": 0.3018, "step": 9040 }, { "epoch": 1.0704991719895907, "grad_norm": 2.682880163192749, "learning_rate": 5.421905673928657e-06, "loss": 0.3186, "step": 9050 }, { "epoch": 1.071682044002839, "grad_norm": 3.5908334255218506, "learning_rate": 5.4211874551113245e-06, "loss": 0.3186, "step": 9060 }, { "epoch": 1.072864916016087, "grad_norm": 2.4704625606536865, "learning_rate": 5.4204692362939906e-06, "loss": 0.3082, "step": 9070 }, { "epoch": 1.0740477880293353, "grad_norm": 2.317227840423584, "learning_rate": 5.419751017476658e-06, "loss": 0.3457, "step": 9080 }, { "epoch": 1.0752306600425834, "grad_norm": 2.4439492225646973, "learning_rate": 5.419032798659325e-06, "loss": 0.3069, "step": 9090 }, { "epoch": 1.0764135320558315, "grad_norm": 4.290738582611084, "learning_rate": 5.418314579841992e-06, "loss": 0.3373, "step": 9100 }, { "epoch": 1.0775964040690797, "grad_norm": 3.5892770290374756, "learning_rate": 5.417596361024659e-06, "loss": 0.3549, "step": 9110 }, { "epoch": 1.0787792760823278, "grad_norm": 2.733102798461914, "learning_rate": 5.416878142207326e-06, "loss": 0.3389, "step": 9120 }, { "epoch": 1.0799621480955761, "grad_norm": 3.0099611282348633, "learning_rate": 5.416159923389993e-06, "loss": 0.3304, "step": 9130 }, { "epoch": 1.0811450201088242, "grad_norm": 3.5245816707611084, "learning_rate": 5.41544170457266e-06, "loss": 0.3359, "step": 9140 }, { "epoch": 1.0823278921220725, "grad_norm": 3.3787643909454346, "learning_rate": 5.414723485755327e-06, "loss": 0.2982, "step": 9150 }, { "epoch": 1.0835107641353205, "grad_norm": 2.499584197998047, "learning_rate": 5.414005266937994e-06, "loss": 0.3199, "step": 9160 }, { "epoch": 1.0846936361485686, "grad_norm": 2.0115272998809814, "learning_rate": 5.4132870481206615e-06, "loss": 0.3543, "step": 9170 }, { "epoch": 1.085876508161817, "grad_norm": 2.129696846008301, "learning_rate": 5.4125688293033275e-06, "loss": 0.3323, "step": 9180 }, { "epoch": 1.087059380175065, "grad_norm": 2.24391508102417, "learning_rate": 5.411850610485995e-06, "loss": 0.3399, "step": 9190 }, { "epoch": 1.0882422521883133, "grad_norm": 3.175449848175049, "learning_rate": 5.411132391668661e-06, "loss": 0.3249, "step": 9200 }, { "epoch": 1.0894251242015613, "grad_norm": 2.4642605781555176, "learning_rate": 5.410414172851329e-06, "loss": 0.3292, "step": 9210 }, { "epoch": 1.0906079962148096, "grad_norm": 2.4592621326446533, "learning_rate": 5.409695954033995e-06, "loss": 0.3159, "step": 9220 }, { "epoch": 1.0917908682280577, "grad_norm": 2.652513027191162, "learning_rate": 5.408977735216663e-06, "loss": 0.3197, "step": 9230 }, { "epoch": 1.0929737402413058, "grad_norm": 3.861598253250122, "learning_rate": 5.40825951639933e-06, "loss": 0.3261, "step": 9240 }, { "epoch": 1.094156612254554, "grad_norm": 3.9381461143493652, "learning_rate": 5.407541297581997e-06, "loss": 0.328, "step": 9250 }, { "epoch": 1.0953394842678021, "grad_norm": 2.2220299243927, "learning_rate": 5.406823078764664e-06, "loss": 0.3508, "step": 9260 }, { "epoch": 1.0965223562810504, "grad_norm": 2.5837607383728027, "learning_rate": 5.406104859947331e-06, "loss": 0.3117, "step": 9270 }, { "epoch": 1.0977052282942985, "grad_norm": 2.157978057861328, "learning_rate": 5.405386641129998e-06, "loss": 0.3027, "step": 9280 }, { "epoch": 1.0988881003075468, "grad_norm": 2.818922281265259, "learning_rate": 5.4046684223126645e-06, "loss": 0.32, "step": 9290 }, { "epoch": 1.1000709723207949, "grad_norm": 3.150458812713623, "learning_rate": 5.4039502034953315e-06, "loss": 0.3058, "step": 9300 }, { "epoch": 1.101253844334043, "grad_norm": 2.7295992374420166, "learning_rate": 5.403231984677998e-06, "loss": 0.3101, "step": 9310 }, { "epoch": 1.1024367163472912, "grad_norm": 3.992326259613037, "learning_rate": 5.402513765860665e-06, "loss": 0.3732, "step": 9320 }, { "epoch": 1.1036195883605393, "grad_norm": 2.365307331085205, "learning_rate": 5.401795547043333e-06, "loss": 0.332, "step": 9330 }, { "epoch": 1.1048024603737876, "grad_norm": 1.9618815183639526, "learning_rate": 5.401077328226e-06, "loss": 0.3279, "step": 9340 }, { "epoch": 1.1059853323870357, "grad_norm": 2.1657872200012207, "learning_rate": 5.400359109408667e-06, "loss": 0.3234, "step": 9350 }, { "epoch": 1.107168204400284, "grad_norm": 2.3764379024505615, "learning_rate": 5.399640890591334e-06, "loss": 0.3213, "step": 9360 }, { "epoch": 1.108351076413532, "grad_norm": 2.755964994430542, "learning_rate": 5.398922671774001e-06, "loss": 0.3053, "step": 9370 }, { "epoch": 1.1095339484267803, "grad_norm": 2.9501616954803467, "learning_rate": 5.398204452956668e-06, "loss": 0.3158, "step": 9380 }, { "epoch": 1.1107168204400284, "grad_norm": 2.6937944889068604, "learning_rate": 5.397486234139335e-06, "loss": 0.347, "step": 9390 }, { "epoch": 1.1118996924532765, "grad_norm": 3.0070104598999023, "learning_rate": 5.3967680153220015e-06, "loss": 0.3224, "step": 9400 }, { "epoch": 1.1130825644665248, "grad_norm": 2.28945255279541, "learning_rate": 5.3960497965046685e-06, "loss": 0.3066, "step": 9410 }, { "epoch": 1.1142654364797728, "grad_norm": 3.1918983459472656, "learning_rate": 5.395331577687335e-06, "loss": 0.3332, "step": 9420 }, { "epoch": 1.1154483084930211, "grad_norm": 1.994618535041809, "learning_rate": 5.394613358870002e-06, "loss": 0.3331, "step": 9430 }, { "epoch": 1.1166311805062692, "grad_norm": 3.330002546310425, "learning_rate": 5.39389514005267e-06, "loss": 0.2817, "step": 9440 }, { "epoch": 1.1178140525195175, "grad_norm": 3.884190320968628, "learning_rate": 5.393176921235336e-06, "loss": 0.3136, "step": 9450 }, { "epoch": 1.1189969245327656, "grad_norm": 3.1539881229400635, "learning_rate": 5.392458702418004e-06, "loss": 0.3238, "step": 9460 }, { "epoch": 1.1201797965460136, "grad_norm": 2.9703457355499268, "learning_rate": 5.39174048360067e-06, "loss": 0.3234, "step": 9470 }, { "epoch": 1.121362668559262, "grad_norm": 1.9814268350601196, "learning_rate": 5.391022264783338e-06, "loss": 0.3297, "step": 9480 }, { "epoch": 1.12254554057251, "grad_norm": 3.241884231567383, "learning_rate": 5.390304045966004e-06, "loss": 0.3699, "step": 9490 }, { "epoch": 1.1237284125857583, "grad_norm": 2.133580446243286, "learning_rate": 5.389585827148672e-06, "loss": 0.2978, "step": 9500 }, { "epoch": 1.1249112845990064, "grad_norm": 2.891195774078369, "learning_rate": 5.3888676083313385e-06, "loss": 0.3122, "step": 9510 }, { "epoch": 1.1260941566122546, "grad_norm": 3.0090155601501465, "learning_rate": 5.3881493895140055e-06, "loss": 0.3151, "step": 9520 }, { "epoch": 1.1272770286255027, "grad_norm": 3.4183151721954346, "learning_rate": 5.387431170696672e-06, "loss": 0.3306, "step": 9530 }, { "epoch": 1.1284599006387508, "grad_norm": 2.636528968811035, "learning_rate": 5.386712951879339e-06, "loss": 0.3244, "step": 9540 }, { "epoch": 1.129642772651999, "grad_norm": 2.98867130279541, "learning_rate": 5.385994733062006e-06, "loss": 0.3545, "step": 9550 }, { "epoch": 1.1308256446652472, "grad_norm": 3.331969976425171, "learning_rate": 5.385276514244673e-06, "loss": 0.3422, "step": 9560 }, { "epoch": 1.1320085166784954, "grad_norm": 3.2384042739868164, "learning_rate": 5.38455829542734e-06, "loss": 0.3648, "step": 9570 }, { "epoch": 1.1331913886917435, "grad_norm": 2.898747205734253, "learning_rate": 5.383840076610007e-06, "loss": 0.2721, "step": 9580 }, { "epoch": 1.1343742607049918, "grad_norm": 3.3465516567230225, "learning_rate": 5.383121857792675e-06, "loss": 0.3728, "step": 9590 }, { "epoch": 1.1355571327182399, "grad_norm": 2.604771137237549, "learning_rate": 5.382403638975341e-06, "loss": 0.3116, "step": 9600 }, { "epoch": 1.1367400047314882, "grad_norm": 4.8374786376953125, "learning_rate": 5.381685420158009e-06, "loss": 0.3038, "step": 9610 }, { "epoch": 1.1379228767447362, "grad_norm": 3.862927198410034, "learning_rate": 5.3809672013406755e-06, "loss": 0.3708, "step": 9620 }, { "epoch": 1.1391057487579843, "grad_norm": 2.758249282836914, "learning_rate": 5.3802489825233425e-06, "loss": 0.3599, "step": 9630 }, { "epoch": 1.1402886207712326, "grad_norm": 2.8053455352783203, "learning_rate": 5.379530763706009e-06, "loss": 0.3608, "step": 9640 }, { "epoch": 1.1414714927844807, "grad_norm": 2.310523509979248, "learning_rate": 5.378812544888676e-06, "loss": 0.3451, "step": 9650 }, { "epoch": 1.142654364797729, "grad_norm": 1.741196632385254, "learning_rate": 5.378094326071343e-06, "loss": 0.387, "step": 9660 }, { "epoch": 1.143837236810977, "grad_norm": 3.097195863723755, "learning_rate": 5.37737610725401e-06, "loss": 0.3098, "step": 9670 }, { "epoch": 1.1450201088242253, "grad_norm": 4.80789041519165, "learning_rate": 5.376657888436677e-06, "loss": 0.3382, "step": 9680 }, { "epoch": 1.1462029808374734, "grad_norm": 2.501476526260376, "learning_rate": 5.375939669619344e-06, "loss": 0.3259, "step": 9690 }, { "epoch": 1.1473858528507215, "grad_norm": 3.66853404045105, "learning_rate": 5.375221450802011e-06, "loss": 0.334, "step": 9700 }, { "epoch": 1.1485687248639698, "grad_norm": 1.9440921545028687, "learning_rate": 5.374503231984679e-06, "loss": 0.383, "step": 9710 }, { "epoch": 1.1497515968772178, "grad_norm": 2.009800434112549, "learning_rate": 5.373785013167345e-06, "loss": 0.2965, "step": 9720 }, { "epoch": 1.1509344688904661, "grad_norm": 3.147747755050659, "learning_rate": 5.3730667943500125e-06, "loss": 0.3293, "step": 9730 }, { "epoch": 1.1521173409037142, "grad_norm": 2.8316309452056885, "learning_rate": 5.372348575532679e-06, "loss": 0.2886, "step": 9740 }, { "epoch": 1.1533002129169625, "grad_norm": 2.775974988937378, "learning_rate": 5.371630356715346e-06, "loss": 0.3287, "step": 9750 }, { "epoch": 1.1544830849302106, "grad_norm": 3.234229803085327, "learning_rate": 5.3709121378980125e-06, "loss": 0.3466, "step": 9760 }, { "epoch": 1.1556659569434586, "grad_norm": 2.839085102081299, "learning_rate": 5.37019391908068e-06, "loss": 0.2998, "step": 9770 }, { "epoch": 1.156848828956707, "grad_norm": 2.5250282287597656, "learning_rate": 5.369475700263347e-06, "loss": 0.2971, "step": 9780 }, { "epoch": 1.158031700969955, "grad_norm": 2.6362595558166504, "learning_rate": 5.368757481446014e-06, "loss": 0.3456, "step": 9790 }, { "epoch": 1.1592145729832033, "grad_norm": 2.554025650024414, "learning_rate": 5.368039262628681e-06, "loss": 0.3319, "step": 9800 }, { "epoch": 1.1603974449964514, "grad_norm": 3.0926215648651123, "learning_rate": 5.367321043811348e-06, "loss": 0.3508, "step": 9810 }, { "epoch": 1.1615803170096997, "grad_norm": 3.5291664600372314, "learning_rate": 5.366602824994015e-06, "loss": 0.3262, "step": 9820 }, { "epoch": 1.1627631890229477, "grad_norm": 2.089956283569336, "learning_rate": 5.365884606176682e-06, "loss": 0.3305, "step": 9830 }, { "epoch": 1.1639460610361958, "grad_norm": 2.995192289352417, "learning_rate": 5.3651663873593495e-06, "loss": 0.3556, "step": 9840 }, { "epoch": 1.165128933049444, "grad_norm": 2.551785707473755, "learning_rate": 5.364448168542016e-06, "loss": 0.3554, "step": 9850 }, { "epoch": 1.1663118050626922, "grad_norm": 2.8216392993927, "learning_rate": 5.363729949724683e-06, "loss": 0.3189, "step": 9860 }, { "epoch": 1.1674946770759405, "grad_norm": 2.8255176544189453, "learning_rate": 5.3630117309073495e-06, "loss": 0.3238, "step": 9870 }, { "epoch": 1.1686775490891885, "grad_norm": 2.8966610431671143, "learning_rate": 5.362293512090017e-06, "loss": 0.3263, "step": 9880 }, { "epoch": 1.1698604211024368, "grad_norm": 2.827005386352539, "learning_rate": 5.361575293272684e-06, "loss": 0.3291, "step": 9890 }, { "epoch": 1.171043293115685, "grad_norm": 3.3333427906036377, "learning_rate": 5.360857074455351e-06, "loss": 0.3327, "step": 9900 }, { "epoch": 1.172226165128933, "grad_norm": 6.229929447174072, "learning_rate": 5.360138855638018e-06, "loss": 0.2978, "step": 9910 }, { "epoch": 1.1734090371421813, "grad_norm": 3.740514039993286, "learning_rate": 5.359420636820685e-06, "loss": 0.3287, "step": 9920 }, { "epoch": 1.1745919091554293, "grad_norm": 3.0174272060394287, "learning_rate": 5.358702418003352e-06, "loss": 0.3065, "step": 9930 }, { "epoch": 1.1757747811686776, "grad_norm": 2.7568047046661377, "learning_rate": 5.357984199186019e-06, "loss": 0.3337, "step": 9940 }, { "epoch": 1.1769576531819257, "grad_norm": 2.7569351196289062, "learning_rate": 5.357265980368686e-06, "loss": 0.2926, "step": 9950 }, { "epoch": 1.178140525195174, "grad_norm": 3.5561347007751465, "learning_rate": 5.356547761551353e-06, "loss": 0.323, "step": 9960 }, { "epoch": 1.179323397208422, "grad_norm": 3.757917642593384, "learning_rate": 5.3558295427340195e-06, "loss": 0.329, "step": 9970 }, { "epoch": 1.1805062692216701, "grad_norm": 2.378509044647217, "learning_rate": 5.355111323916687e-06, "loss": 0.3016, "step": 9980 }, { "epoch": 1.1816891412349184, "grad_norm": 3.223623752593994, "learning_rate": 5.354393105099353e-06, "loss": 0.2803, "step": 9990 }, { "epoch": 1.1828720132481665, "grad_norm": 3.2154698371887207, "learning_rate": 5.353674886282021e-06, "loss": 0.3432, "step": 10000 }, { "epoch": 1.1840548852614148, "grad_norm": 3.9279282093048096, "learning_rate": 5.352956667464687e-06, "loss": 0.3255, "step": 10010 }, { "epoch": 1.1852377572746629, "grad_norm": 2.274242401123047, "learning_rate": 5.352238448647355e-06, "loss": 0.2996, "step": 10020 }, { "epoch": 1.1864206292879111, "grad_norm": 2.1344165802001953, "learning_rate": 5.351520229830021e-06, "loss": 0.3347, "step": 10030 }, { "epoch": 1.1876035013011592, "grad_norm": 2.5024712085723877, "learning_rate": 5.350802011012689e-06, "loss": 0.3257, "step": 10040 }, { "epoch": 1.1887863733144073, "grad_norm": 3.5476930141448975, "learning_rate": 5.350083792195356e-06, "loss": 0.3462, "step": 10050 }, { "epoch": 1.1899692453276556, "grad_norm": 2.743263006210327, "learning_rate": 5.349365573378023e-06, "loss": 0.3142, "step": 10060 }, { "epoch": 1.1911521173409036, "grad_norm": 2.665689468383789, "learning_rate": 5.34864735456069e-06, "loss": 0.3255, "step": 10070 }, { "epoch": 1.192334989354152, "grad_norm": 2.821195363998413, "learning_rate": 5.3479291357433565e-06, "loss": 0.3485, "step": 10080 }, { "epoch": 1.1935178613674, "grad_norm": 3.750598669052124, "learning_rate": 5.347210916926024e-06, "loss": 0.3565, "step": 10090 }, { "epoch": 1.1947007333806483, "grad_norm": 3.2968881130218506, "learning_rate": 5.34649269810869e-06, "loss": 0.3316, "step": 10100 }, { "epoch": 1.1958836053938964, "grad_norm": 2.7333712577819824, "learning_rate": 5.345774479291358e-06, "loss": 0.3552, "step": 10110 }, { "epoch": 1.1970664774071444, "grad_norm": 3.273253917694092, "learning_rate": 5.345056260474024e-06, "loss": 0.3052, "step": 10120 }, { "epoch": 1.1982493494203927, "grad_norm": 3.5871641635894775, "learning_rate": 5.344338041656692e-06, "loss": 0.3539, "step": 10130 }, { "epoch": 1.1994322214336408, "grad_norm": 2.6012492179870605, "learning_rate": 5.343619822839358e-06, "loss": 0.3048, "step": 10140 }, { "epoch": 1.200615093446889, "grad_norm": 2.8074467182159424, "learning_rate": 5.342901604022026e-06, "loss": 0.3208, "step": 10150 }, { "epoch": 1.2017979654601372, "grad_norm": 2.587714433670044, "learning_rate": 5.342183385204693e-06, "loss": 0.364, "step": 10160 }, { "epoch": 1.2029808374733855, "grad_norm": 2.8514552116394043, "learning_rate": 5.34146516638736e-06, "loss": 0.374, "step": 10170 }, { "epoch": 1.2041637094866335, "grad_norm": 2.119905471801758, "learning_rate": 5.340746947570027e-06, "loss": 0.3108, "step": 10180 }, { "epoch": 1.2053465814998816, "grad_norm": 2.2122278213500977, "learning_rate": 5.3400287287526935e-06, "loss": 0.3663, "step": 10190 }, { "epoch": 1.20652945351313, "grad_norm": 2.449744462966919, "learning_rate": 5.3393105099353604e-06, "loss": 0.316, "step": 10200 }, { "epoch": 1.207712325526378, "grad_norm": 2.322458028793335, "learning_rate": 5.338592291118027e-06, "loss": 0.3294, "step": 10210 }, { "epoch": 1.2088951975396263, "grad_norm": 3.058540105819702, "learning_rate": 5.337874072300694e-06, "loss": 0.3217, "step": 10220 }, { "epoch": 1.2100780695528743, "grad_norm": 2.831397771835327, "learning_rate": 5.337155853483361e-06, "loss": 0.3483, "step": 10230 }, { "epoch": 1.2112609415661226, "grad_norm": 3.7924118041992188, "learning_rate": 5.336437634666028e-06, "loss": 0.3268, "step": 10240 }, { "epoch": 1.2124438135793707, "grad_norm": 2.430237293243408, "learning_rate": 5.335719415848696e-06, "loss": 0.3177, "step": 10250 }, { "epoch": 1.2136266855926188, "grad_norm": 2.777331829071045, "learning_rate": 5.335001197031362e-06, "loss": 0.3226, "step": 10260 }, { "epoch": 1.214809557605867, "grad_norm": 4.125950336456299, "learning_rate": 5.33428297821403e-06, "loss": 0.3387, "step": 10270 }, { "epoch": 1.2159924296191151, "grad_norm": 4.0093865394592285, "learning_rate": 5.333564759396696e-06, "loss": 0.3114, "step": 10280 }, { "epoch": 1.2171753016323634, "grad_norm": 2.7753725051879883, "learning_rate": 5.332846540579364e-06, "loss": 0.3477, "step": 10290 }, { "epoch": 1.2183581736456115, "grad_norm": 2.7971582412719727, "learning_rate": 5.33212832176203e-06, "loss": 0.2958, "step": 10300 }, { "epoch": 1.2195410456588598, "grad_norm": 4.457556247711182, "learning_rate": 5.3314101029446974e-06, "loss": 0.3467, "step": 10310 }, { "epoch": 1.2207239176721079, "grad_norm": 2.207031726837158, "learning_rate": 5.330691884127364e-06, "loss": 0.3495, "step": 10320 }, { "epoch": 1.221906789685356, "grad_norm": 2.6760246753692627, "learning_rate": 5.329973665310031e-06, "loss": 0.2982, "step": 10330 }, { "epoch": 1.2230896616986042, "grad_norm": 2.5255050659179688, "learning_rate": 5.329255446492698e-06, "loss": 0.3041, "step": 10340 }, { "epoch": 1.2242725337118523, "grad_norm": 3.199742555618286, "learning_rate": 5.328537227675365e-06, "loss": 0.3206, "step": 10350 }, { "epoch": 1.2254554057251006, "grad_norm": 2.4137117862701416, "learning_rate": 5.327819008858033e-06, "loss": 0.3294, "step": 10360 }, { "epoch": 1.2266382777383487, "grad_norm": 2.261509895324707, "learning_rate": 5.327100790040699e-06, "loss": 0.3637, "step": 10370 }, { "epoch": 1.227821149751597, "grad_norm": 2.8423104286193848, "learning_rate": 5.326382571223367e-06, "loss": 0.3163, "step": 10380 }, { "epoch": 1.229004021764845, "grad_norm": 2.7373669147491455, "learning_rate": 5.325664352406033e-06, "loss": 0.3, "step": 10390 }, { "epoch": 1.230186893778093, "grad_norm": 2.387768268585205, "learning_rate": 5.324946133588701e-06, "loss": 0.2863, "step": 10400 }, { "epoch": 1.2313697657913414, "grad_norm": 4.27761697769165, "learning_rate": 5.324227914771367e-06, "loss": 0.3159, "step": 10410 }, { "epoch": 1.2325526378045895, "grad_norm": 3.5470752716064453, "learning_rate": 5.3235096959540344e-06, "loss": 0.3164, "step": 10420 }, { "epoch": 1.2337355098178377, "grad_norm": 3.484423875808716, "learning_rate": 5.322791477136701e-06, "loss": 0.3537, "step": 10430 }, { "epoch": 1.2349183818310858, "grad_norm": 2.320530414581299, "learning_rate": 5.322073258319368e-06, "loss": 0.3151, "step": 10440 }, { "epoch": 1.2361012538443341, "grad_norm": 2.4881606101989746, "learning_rate": 5.321355039502035e-06, "loss": 0.343, "step": 10450 }, { "epoch": 1.2372841258575822, "grad_norm": 2.5587258338928223, "learning_rate": 5.320636820684702e-06, "loss": 0.3331, "step": 10460 }, { "epoch": 1.2384669978708303, "grad_norm": 3.2363245487213135, "learning_rate": 5.319918601867369e-06, "loss": 0.3409, "step": 10470 }, { "epoch": 1.2396498698840785, "grad_norm": 3.1542718410491943, "learning_rate": 5.319200383050036e-06, "loss": 0.336, "step": 10480 }, { "epoch": 1.2408327418973266, "grad_norm": 2.3025355339050293, "learning_rate": 5.318482164232703e-06, "loss": 0.3123, "step": 10490 }, { "epoch": 1.242015613910575, "grad_norm": 2.1847636699676514, "learning_rate": 5.31776394541537e-06, "loss": 0.3192, "step": 10500 }, { "epoch": 1.243198485923823, "grad_norm": 3.5734481811523438, "learning_rate": 5.317045726598037e-06, "loss": 0.3639, "step": 10510 }, { "epoch": 1.2443813579370713, "grad_norm": 3.351245880126953, "learning_rate": 5.316327507780704e-06, "loss": 0.3508, "step": 10520 }, { "epoch": 1.2455642299503193, "grad_norm": 2.6809604167938232, "learning_rate": 5.315609288963371e-06, "loss": 0.3249, "step": 10530 }, { "epoch": 1.2467471019635676, "grad_norm": 2.790921688079834, "learning_rate": 5.314891070146038e-06, "loss": 0.3244, "step": 10540 }, { "epoch": 1.2479299739768157, "grad_norm": 3.341811418533325, "learning_rate": 5.3141728513287044e-06, "loss": 0.3594, "step": 10550 }, { "epoch": 1.2491128459900638, "grad_norm": 2.7346391677856445, "learning_rate": 5.313454632511372e-06, "loss": 0.3232, "step": 10560 }, { "epoch": 1.250295718003312, "grad_norm": 3.2251312732696533, "learning_rate": 5.312736413694038e-06, "loss": 0.3295, "step": 10570 }, { "epoch": 1.250295718003312, "eval_accuracy": 0.8541105233389893, "eval_loss": 0.3356674313545227, "eval_runtime": 82.1894, "eval_safe_aucpr": 0.9073804423875799, "eval_safe_f1": 0.8369646044021416, "eval_safe_fpr": 0.1380526740605658, "eval_safe_precision": 0.8297762541929301, "eval_safe_recall": 0.8442785883058921, "eval_samples_per_second": 731.408, "eval_steps_per_second": 11.437, "eval_unsafe_aucpr": 0.9491187573387495, "eval_unsafe_f1": 0.8679932566680715, "eval_unsafe_fpr": 0.15572141169410736, "eval_unsafe_precision": 0.8741246020918599, "eval_unsafe_recall": 0.8619473259394338, "step": 10570 }, { "epoch": 1.2514785900165601, "grad_norm": 3.807976484298706, "learning_rate": 5.312018194876706e-06, "loss": 0.3023, "step": 10580 }, { "epoch": 1.2526614620298084, "grad_norm": 2.064093589782715, "learning_rate": 5.311299976059373e-06, "loss": 0.2914, "step": 10590 }, { "epoch": 1.2538443340430565, "grad_norm": 3.07285475730896, "learning_rate": 5.31058175724204e-06, "loss": 0.2944, "step": 10600 }, { "epoch": 1.2550272060563046, "grad_norm": 2.0549986362457275, "learning_rate": 5.309863538424707e-06, "loss": 0.3066, "step": 10610 }, { "epoch": 1.2562100780695529, "grad_norm": 3.30754017829895, "learning_rate": 5.309145319607374e-06, "loss": 0.3404, "step": 10620 }, { "epoch": 1.2573929500828012, "grad_norm": 2.483982563018799, "learning_rate": 5.3084271007900415e-06, "loss": 0.3406, "step": 10630 }, { "epoch": 1.2585758220960492, "grad_norm": 2.510258436203003, "learning_rate": 5.307708881972708e-06, "loss": 0.3272, "step": 10640 }, { "epoch": 1.2597586941092973, "grad_norm": 2.251511335372925, "learning_rate": 5.306990663155375e-06, "loss": 0.3063, "step": 10650 }, { "epoch": 1.2609415661225456, "grad_norm": 2.3225739002227783, "learning_rate": 5.3062724443380414e-06, "loss": 0.3289, "step": 10660 }, { "epoch": 1.2621244381357937, "grad_norm": 2.219557046890259, "learning_rate": 5.305554225520709e-06, "loss": 0.3263, "step": 10670 }, { "epoch": 1.2633073101490417, "grad_norm": 2.8224964141845703, "learning_rate": 5.304836006703375e-06, "loss": 0.2961, "step": 10680 }, { "epoch": 1.26449018216229, "grad_norm": 2.6272690296173096, "learning_rate": 5.304117787886043e-06, "loss": 0.3388, "step": 10690 }, { "epoch": 1.2656730541755383, "grad_norm": 3.0494840145111084, "learning_rate": 5.30339956906871e-06, "loss": 0.3588, "step": 10700 }, { "epoch": 1.2668559261887864, "grad_norm": 2.026481866836548, "learning_rate": 5.302681350251377e-06, "loss": 0.3125, "step": 10710 }, { "epoch": 1.2680387982020345, "grad_norm": 2.3703815937042236, "learning_rate": 5.301963131434044e-06, "loss": 0.2892, "step": 10720 }, { "epoch": 1.2692216702152828, "grad_norm": 3.1509850025177, "learning_rate": 5.301244912616711e-06, "loss": 0.3643, "step": 10730 }, { "epoch": 1.2704045422285308, "grad_norm": 2.430760145187378, "learning_rate": 5.300526693799378e-06, "loss": 0.3583, "step": 10740 }, { "epoch": 1.271587414241779, "grad_norm": 3.22849702835083, "learning_rate": 5.299808474982045e-06, "loss": 0.3373, "step": 10750 }, { "epoch": 1.2727702862550272, "grad_norm": 2.1429686546325684, "learning_rate": 5.2990902561647115e-06, "loss": 0.3179, "step": 10760 }, { "epoch": 1.2739531582682755, "grad_norm": 3.37874174118042, "learning_rate": 5.2983720373473784e-06, "loss": 0.3002, "step": 10770 }, { "epoch": 1.2751360302815236, "grad_norm": 2.9585418701171875, "learning_rate": 5.297653818530045e-06, "loss": 0.302, "step": 10780 }, { "epoch": 1.2763189022947716, "grad_norm": 2.9790499210357666, "learning_rate": 5.296935599712712e-06, "loss": 0.3347, "step": 10790 }, { "epoch": 1.27750177430802, "grad_norm": 3.7681424617767334, "learning_rate": 5.296217380895379e-06, "loss": 0.2844, "step": 10800 }, { "epoch": 1.278684646321268, "grad_norm": 3.4820749759674072, "learning_rate": 5.295499162078047e-06, "loss": 0.3105, "step": 10810 }, { "epoch": 1.2798675183345163, "grad_norm": 3.544236421585083, "learning_rate": 5.294780943260713e-06, "loss": 0.3314, "step": 10820 }, { "epoch": 1.2810503903477644, "grad_norm": 2.3502871990203857, "learning_rate": 5.294062724443381e-06, "loss": 0.3441, "step": 10830 }, { "epoch": 1.2822332623610126, "grad_norm": 3.532820701599121, "learning_rate": 5.293344505626047e-06, "loss": 0.3264, "step": 10840 }, { "epoch": 1.2834161343742607, "grad_norm": 2.5632216930389404, "learning_rate": 5.292626286808715e-06, "loss": 0.3387, "step": 10850 }, { "epoch": 1.2845990063875088, "grad_norm": 2.079050064086914, "learning_rate": 5.291908067991382e-06, "loss": 0.3391, "step": 10860 }, { "epoch": 1.285781878400757, "grad_norm": 2.3422341346740723, "learning_rate": 5.2911898491740485e-06, "loss": 0.355, "step": 10870 }, { "epoch": 1.2869647504140052, "grad_norm": 3.7250454425811768, "learning_rate": 5.2904716303567154e-06, "loss": 0.3188, "step": 10880 }, { "epoch": 1.2881476224272534, "grad_norm": 2.586632013320923, "learning_rate": 5.289753411539382e-06, "loss": 0.2941, "step": 10890 }, { "epoch": 1.2893304944405015, "grad_norm": 3.148038387298584, "learning_rate": 5.28903519272205e-06, "loss": 0.3393, "step": 10900 }, { "epoch": 1.2905133664537498, "grad_norm": 2.8730502128601074, "learning_rate": 5.288316973904716e-06, "loss": 0.3557, "step": 10910 }, { "epoch": 1.2916962384669979, "grad_norm": 2.5956265926361084, "learning_rate": 5.287598755087384e-06, "loss": 0.3339, "step": 10920 }, { "epoch": 1.292879110480246, "grad_norm": 3.057295083999634, "learning_rate": 5.28688053627005e-06, "loss": 0.2943, "step": 10930 }, { "epoch": 1.2940619824934942, "grad_norm": 2.643946647644043, "learning_rate": 5.286162317452718e-06, "loss": 0.3291, "step": 10940 }, { "epoch": 1.2952448545067423, "grad_norm": 2.0065228939056396, "learning_rate": 5.285444098635384e-06, "loss": 0.3167, "step": 10950 }, { "epoch": 1.2964277265199906, "grad_norm": 2.5850989818573, "learning_rate": 5.284725879818052e-06, "loss": 0.3454, "step": 10960 }, { "epoch": 1.2976105985332387, "grad_norm": 2.7382800579071045, "learning_rate": 5.284007661000719e-06, "loss": 0.3218, "step": 10970 }, { "epoch": 1.298793470546487, "grad_norm": 3.3061835765838623, "learning_rate": 5.2832894421833855e-06, "loss": 0.2986, "step": 10980 }, { "epoch": 1.299976342559735, "grad_norm": 3.0424976348876953, "learning_rate": 5.2825712233660524e-06, "loss": 0.3172, "step": 10990 }, { "epoch": 1.3011592145729831, "grad_norm": 3.1832244396209717, "learning_rate": 5.281853004548719e-06, "loss": 0.3074, "step": 11000 }, { "epoch": 1.3023420865862314, "grad_norm": 2.583782434463501, "learning_rate": 5.281134785731386e-06, "loss": 0.3497, "step": 11010 }, { "epoch": 1.3035249585994795, "grad_norm": 2.6923210620880127, "learning_rate": 5.280416566914053e-06, "loss": 0.314, "step": 11020 }, { "epoch": 1.3047078306127278, "grad_norm": 2.3665506839752197, "learning_rate": 5.27969834809672e-06, "loss": 0.351, "step": 11030 }, { "epoch": 1.3058907026259758, "grad_norm": 2.606867790222168, "learning_rate": 5.278980129279387e-06, "loss": 0.3156, "step": 11040 }, { "epoch": 1.3070735746392241, "grad_norm": 2.3391175270080566, "learning_rate": 5.278261910462054e-06, "loss": 0.3196, "step": 11050 }, { "epoch": 1.3082564466524722, "grad_norm": 2.604893207550049, "learning_rate": 5.277543691644721e-06, "loss": 0.3428, "step": 11060 }, { "epoch": 1.3094393186657203, "grad_norm": 2.3523573875427246, "learning_rate": 5.276825472827388e-06, "loss": 0.3045, "step": 11070 }, { "epoch": 1.3106221906789686, "grad_norm": 2.6932084560394287, "learning_rate": 5.276107254010056e-06, "loss": 0.3248, "step": 11080 }, { "epoch": 1.3118050626922166, "grad_norm": 3.2833149433135986, "learning_rate": 5.275389035192722e-06, "loss": 0.2873, "step": 11090 }, { "epoch": 1.312987934705465, "grad_norm": 2.871731996536255, "learning_rate": 5.2746708163753894e-06, "loss": 0.3365, "step": 11100 }, { "epoch": 1.314170806718713, "grad_norm": 2.786803722381592, "learning_rate": 5.273952597558056e-06, "loss": 0.3591, "step": 11110 }, { "epoch": 1.3153536787319613, "grad_norm": 1.9333808422088623, "learning_rate": 5.273234378740723e-06, "loss": 0.3163, "step": 11120 }, { "epoch": 1.3165365507452094, "grad_norm": 2.6212830543518066, "learning_rate": 5.27251615992339e-06, "loss": 0.3183, "step": 11130 }, { "epoch": 1.3177194227584574, "grad_norm": 2.1172983646392822, "learning_rate": 5.271797941106057e-06, "loss": 0.3007, "step": 11140 }, { "epoch": 1.3189022947717057, "grad_norm": 2.256084680557251, "learning_rate": 5.271079722288724e-06, "loss": 0.3298, "step": 11150 }, { "epoch": 1.3200851667849538, "grad_norm": 2.8654685020446777, "learning_rate": 5.270361503471391e-06, "loss": 0.2992, "step": 11160 }, { "epoch": 1.321268038798202, "grad_norm": 2.6788671016693115, "learning_rate": 5.269643284654059e-06, "loss": 0.357, "step": 11170 }, { "epoch": 1.3224509108114502, "grad_norm": 3.450791358947754, "learning_rate": 5.268925065836725e-06, "loss": 0.3608, "step": 11180 }, { "epoch": 1.3236337828246985, "grad_norm": 3.0075740814208984, "learning_rate": 5.268206847019393e-06, "loss": 0.3562, "step": 11190 }, { "epoch": 1.3248166548379465, "grad_norm": 2.720026969909668, "learning_rate": 5.267488628202059e-06, "loss": 0.3092, "step": 11200 }, { "epoch": 1.3259995268511946, "grad_norm": 2.3526370525360107, "learning_rate": 5.2667704093847264e-06, "loss": 0.3435, "step": 11210 }, { "epoch": 1.327182398864443, "grad_norm": 2.9320015907287598, "learning_rate": 5.2660521905673925e-06, "loss": 0.3487, "step": 11220 }, { "epoch": 1.328365270877691, "grad_norm": 4.365499973297119, "learning_rate": 5.26533397175006e-06, "loss": 0.3665, "step": 11230 }, { "epoch": 1.3295481428909393, "grad_norm": 2.805607795715332, "learning_rate": 5.264615752932727e-06, "loss": 0.3094, "step": 11240 }, { "epoch": 1.3307310149041873, "grad_norm": 2.4460062980651855, "learning_rate": 5.263897534115394e-06, "loss": 0.3369, "step": 11250 }, { "epoch": 1.3319138869174356, "grad_norm": 2.7052359580993652, "learning_rate": 5.263179315298061e-06, "loss": 0.2975, "step": 11260 }, { "epoch": 1.3330967589306837, "grad_norm": 2.7273125648498535, "learning_rate": 5.262461096480728e-06, "loss": 0.3063, "step": 11270 }, { "epoch": 1.3342796309439318, "grad_norm": 3.5072457790374756, "learning_rate": 5.261742877663395e-06, "loss": 0.3081, "step": 11280 }, { "epoch": 1.33546250295718, "grad_norm": 2.7681143283843994, "learning_rate": 5.261024658846062e-06, "loss": 0.2928, "step": 11290 }, { "epoch": 1.3366453749704281, "grad_norm": 3.2708802223205566, "learning_rate": 5.260306440028729e-06, "loss": 0.394, "step": 11300 }, { "epoch": 1.3378282469836764, "grad_norm": 2.7023983001708984, "learning_rate": 5.259588221211396e-06, "loss": 0.2943, "step": 11310 }, { "epoch": 1.3390111189969245, "grad_norm": 4.062198638916016, "learning_rate": 5.258870002394063e-06, "loss": 0.3312, "step": 11320 }, { "epoch": 1.3401939910101728, "grad_norm": 2.1799333095550537, "learning_rate": 5.2581517835767295e-06, "loss": 0.351, "step": 11330 }, { "epoch": 1.3413768630234209, "grad_norm": 2.283543109893799, "learning_rate": 5.257433564759396e-06, "loss": 0.3142, "step": 11340 }, { "epoch": 1.342559735036669, "grad_norm": 2.688551664352417, "learning_rate": 5.256715345942064e-06, "loss": 0.3193, "step": 11350 }, { "epoch": 1.3437426070499172, "grad_norm": 2.31280517578125, "learning_rate": 5.255997127124731e-06, "loss": 0.364, "step": 11360 }, { "epoch": 1.3449254790631653, "grad_norm": 1.948833703994751, "learning_rate": 5.255278908307398e-06, "loss": 0.3257, "step": 11370 }, { "epoch": 1.3461083510764136, "grad_norm": 2.9267709255218506, "learning_rate": 5.254560689490065e-06, "loss": 0.3508, "step": 11380 }, { "epoch": 1.3472912230896616, "grad_norm": 2.355447292327881, "learning_rate": 5.253842470672732e-06, "loss": 0.2633, "step": 11390 }, { "epoch": 1.34847409510291, "grad_norm": 3.1869235038757324, "learning_rate": 5.253124251855399e-06, "loss": 0.3702, "step": 11400 }, { "epoch": 1.349656967116158, "grad_norm": 2.661611557006836, "learning_rate": 5.252406033038066e-06, "loss": 0.3112, "step": 11410 }, { "epoch": 1.350839839129406, "grad_norm": 2.0842931270599365, "learning_rate": 5.251687814220733e-06, "loss": 0.3578, "step": 11420 }, { "epoch": 1.3520227111426544, "grad_norm": 2.38382887840271, "learning_rate": 5.2509695954033996e-06, "loss": 0.2996, "step": 11430 }, { "epoch": 1.3532055831559027, "grad_norm": 4.086627960205078, "learning_rate": 5.2502513765860665e-06, "loss": 0.3499, "step": 11440 }, { "epoch": 1.3543884551691507, "grad_norm": 2.342360496520996, "learning_rate": 5.249533157768733e-06, "loss": 0.3639, "step": 11450 }, { "epoch": 1.3555713271823988, "grad_norm": 2.4554357528686523, "learning_rate": 5.248814938951401e-06, "loss": 0.3128, "step": 11460 }, { "epoch": 1.356754199195647, "grad_norm": 3.7021262645721436, "learning_rate": 5.248096720134067e-06, "loss": 0.2959, "step": 11470 }, { "epoch": 1.3579370712088952, "grad_norm": 2.7638843059539795, "learning_rate": 5.247378501316735e-06, "loss": 0.3535, "step": 11480 }, { "epoch": 1.3591199432221432, "grad_norm": 2.3986899852752686, "learning_rate": 5.246660282499401e-06, "loss": 0.3337, "step": 11490 }, { "epoch": 1.3603028152353915, "grad_norm": 2.0446488857269287, "learning_rate": 5.245942063682069e-06, "loss": 0.2912, "step": 11500 }, { "epoch": 1.3614856872486398, "grad_norm": 3.6219053268432617, "learning_rate": 5.245223844864736e-06, "loss": 0.3002, "step": 11510 }, { "epoch": 1.362668559261888, "grad_norm": 3.7377607822418213, "learning_rate": 5.244505626047403e-06, "loss": 0.3342, "step": 11520 }, { "epoch": 1.363851431275136, "grad_norm": 2.9352986812591553, "learning_rate": 5.24378740723007e-06, "loss": 0.332, "step": 11530 }, { "epoch": 1.3650343032883843, "grad_norm": 2.2465410232543945, "learning_rate": 5.2430691884127366e-06, "loss": 0.3389, "step": 11540 }, { "epoch": 1.3662171753016323, "grad_norm": 2.2292497158050537, "learning_rate": 5.2423509695954035e-06, "loss": 0.3494, "step": 11550 }, { "epoch": 1.3674000473148804, "grad_norm": 3.6063504219055176, "learning_rate": 5.24163275077807e-06, "loss": 0.3452, "step": 11560 }, { "epoch": 1.3685829193281287, "grad_norm": 4.247971057891846, "learning_rate": 5.240914531960737e-06, "loss": 0.3217, "step": 11570 }, { "epoch": 1.369765791341377, "grad_norm": 2.7763800621032715, "learning_rate": 5.240196313143404e-06, "loss": 0.3203, "step": 11580 }, { "epoch": 1.370948663354625, "grad_norm": 1.9633818864822388, "learning_rate": 5.239478094326071e-06, "loss": 0.3002, "step": 11590 }, { "epoch": 1.3721315353678731, "grad_norm": 2.645674467086792, "learning_rate": 5.238759875508738e-06, "loss": 0.3495, "step": 11600 }, { "epoch": 1.3733144073811214, "grad_norm": 2.3986480236053467, "learning_rate": 5.238041656691406e-06, "loss": 0.2729, "step": 11610 }, { "epoch": 1.3744972793943695, "grad_norm": 2.614625930786133, "learning_rate": 5.237323437874073e-06, "loss": 0.3146, "step": 11620 }, { "epoch": 1.3756801514076176, "grad_norm": 2.432405948638916, "learning_rate": 5.23660521905674e-06, "loss": 0.3236, "step": 11630 }, { "epoch": 1.3768630234208659, "grad_norm": 2.2257299423217773, "learning_rate": 5.235887000239407e-06, "loss": 0.2882, "step": 11640 }, { "epoch": 1.3780458954341142, "grad_norm": 3.6342475414276123, "learning_rate": 5.2351687814220736e-06, "loss": 0.3301, "step": 11650 }, { "epoch": 1.3792287674473622, "grad_norm": 2.5768871307373047, "learning_rate": 5.2344505626047405e-06, "loss": 0.3673, "step": 11660 }, { "epoch": 1.3804116394606103, "grad_norm": 3.101595640182495, "learning_rate": 5.233732343787407e-06, "loss": 0.3189, "step": 11670 }, { "epoch": 1.3815945114738586, "grad_norm": 2.9230449199676514, "learning_rate": 5.233014124970074e-06, "loss": 0.3122, "step": 11680 }, { "epoch": 1.3827773834871067, "grad_norm": 3.3076393604278564, "learning_rate": 5.232295906152741e-06, "loss": 0.3312, "step": 11690 }, { "epoch": 1.3839602555003547, "grad_norm": 2.8266141414642334, "learning_rate": 5.231577687335408e-06, "loss": 0.3375, "step": 11700 }, { "epoch": 1.385143127513603, "grad_norm": 3.0489702224731445, "learning_rate": 5.230859468518075e-06, "loss": 0.3299, "step": 11710 }, { "epoch": 1.3863259995268513, "grad_norm": 2.535065174102783, "learning_rate": 5.230141249700742e-06, "loss": 0.2962, "step": 11720 }, { "epoch": 1.3875088715400994, "grad_norm": 2.4045450687408447, "learning_rate": 5.22942303088341e-06, "loss": 0.3162, "step": 11730 }, { "epoch": 1.3886917435533475, "grad_norm": 3.010890483856201, "learning_rate": 5.228704812066076e-06, "loss": 0.346, "step": 11740 }, { "epoch": 1.3898746155665958, "grad_norm": 2.096226215362549, "learning_rate": 5.227986593248744e-06, "loss": 0.3457, "step": 11750 }, { "epoch": 1.3910574875798438, "grad_norm": 2.902386426925659, "learning_rate": 5.22726837443141e-06, "loss": 0.361, "step": 11760 }, { "epoch": 1.392240359593092, "grad_norm": 2.158539056777954, "learning_rate": 5.2265501556140775e-06, "loss": 0.3379, "step": 11770 }, { "epoch": 1.3934232316063402, "grad_norm": 2.7760837078094482, "learning_rate": 5.2258319367967436e-06, "loss": 0.346, "step": 11780 }, { "epoch": 1.3946061036195885, "grad_norm": 2.0591464042663574, "learning_rate": 5.225113717979411e-06, "loss": 0.3663, "step": 11790 }, { "epoch": 1.3957889756328365, "grad_norm": 2.9406681060791016, "learning_rate": 5.224395499162078e-06, "loss": 0.3514, "step": 11800 }, { "epoch": 1.3969718476460846, "grad_norm": 2.612901210784912, "learning_rate": 5.223677280344745e-06, "loss": 0.3659, "step": 11810 }, { "epoch": 1.398154719659333, "grad_norm": 3.072481870651245, "learning_rate": 5.222959061527412e-06, "loss": 0.3547, "step": 11820 }, { "epoch": 1.399337591672581, "grad_norm": 2.3625690937042236, "learning_rate": 5.222240842710079e-06, "loss": 0.3135, "step": 11830 }, { "epoch": 1.4005204636858293, "grad_norm": 2.9312098026275635, "learning_rate": 5.221522623892746e-06, "loss": 0.3182, "step": 11840 }, { "epoch": 1.4017033356990773, "grad_norm": 3.0753791332244873, "learning_rate": 5.220804405075413e-06, "loss": 0.3232, "step": 11850 }, { "epoch": 1.4028862077123256, "grad_norm": 3.2985544204711914, "learning_rate": 5.22008618625808e-06, "loss": 0.3542, "step": 11860 }, { "epoch": 1.4040690797255737, "grad_norm": 3.0760841369628906, "learning_rate": 5.219367967440747e-06, "loss": 0.3421, "step": 11870 }, { "epoch": 1.4052519517388218, "grad_norm": 2.818378210067749, "learning_rate": 5.2186497486234145e-06, "loss": 0.324, "step": 11880 }, { "epoch": 1.40643482375207, "grad_norm": 2.093960762023926, "learning_rate": 5.217931529806081e-06, "loss": 0.324, "step": 11890 }, { "epoch": 1.4076176957653181, "grad_norm": 2.4740841388702393, "learning_rate": 5.217213310988748e-06, "loss": 0.3533, "step": 11900 }, { "epoch": 1.4088005677785664, "grad_norm": 2.261019229888916, "learning_rate": 5.216495092171415e-06, "loss": 0.3465, "step": 11910 }, { "epoch": 1.4099834397918145, "grad_norm": 2.8680613040924072, "learning_rate": 5.215776873354082e-06, "loss": 0.3562, "step": 11920 }, { "epoch": 1.4111663118050628, "grad_norm": 2.969210386276245, "learning_rate": 5.215058654536749e-06, "loss": 0.3316, "step": 11930 }, { "epoch": 1.4123491838183109, "grad_norm": 2.5013744831085205, "learning_rate": 5.214340435719416e-06, "loss": 0.3386, "step": 11940 }, { "epoch": 1.413532055831559, "grad_norm": 3.086883068084717, "learning_rate": 5.213622216902083e-06, "loss": 0.3355, "step": 11950 }, { "epoch": 1.4147149278448072, "grad_norm": 2.6564736366271973, "learning_rate": 5.21290399808475e-06, "loss": 0.2641, "step": 11960 }, { "epoch": 1.4158977998580553, "grad_norm": 3.8855438232421875, "learning_rate": 5.212185779267417e-06, "loss": 0.3573, "step": 11970 }, { "epoch": 1.4170806718713036, "grad_norm": 3.215693473815918, "learning_rate": 5.211467560450084e-06, "loss": 0.3343, "step": 11980 }, { "epoch": 1.4182635438845517, "grad_norm": 2.6905269622802734, "learning_rate": 5.210749341632751e-06, "loss": 0.3521, "step": 11990 }, { "epoch": 1.4194464158978, "grad_norm": 2.1683082580566406, "learning_rate": 5.210031122815418e-06, "loss": 0.3104, "step": 12000 }, { "epoch": 1.420629287911048, "grad_norm": 1.7524878978729248, "learning_rate": 5.2093129039980845e-06, "loss": 0.3374, "step": 12010 }, { "epoch": 1.421812159924296, "grad_norm": 2.7673189640045166, "learning_rate": 5.208594685180752e-06, "loss": 0.2807, "step": 12020 }, { "epoch": 1.4229950319375444, "grad_norm": 3.531315565109253, "learning_rate": 5.207876466363418e-06, "loss": 0.2941, "step": 12030 }, { "epoch": 1.4241779039507925, "grad_norm": 3.0989954471588135, "learning_rate": 5.207158247546086e-06, "loss": 0.3545, "step": 12040 }, { "epoch": 1.4253607759640408, "grad_norm": 3.6251354217529297, "learning_rate": 5.206440028728752e-06, "loss": 0.3375, "step": 12050 }, { "epoch": 1.4265436479772888, "grad_norm": 2.1570992469787598, "learning_rate": 5.20572180991142e-06, "loss": 0.3477, "step": 12060 }, { "epoch": 1.4277265199905371, "grad_norm": 2.2915897369384766, "learning_rate": 5.205003591094087e-06, "loss": 0.3349, "step": 12070 }, { "epoch": 1.4289093920037852, "grad_norm": 2.6954102516174316, "learning_rate": 5.204285372276754e-06, "loss": 0.3117, "step": 12080 }, { "epoch": 1.4300922640170333, "grad_norm": 2.97685170173645, "learning_rate": 5.203567153459421e-06, "loss": 0.2804, "step": 12090 }, { "epoch": 1.4312751360302816, "grad_norm": 2.3667688369750977, "learning_rate": 5.202848934642088e-06, "loss": 0.3374, "step": 12100 }, { "epoch": 1.4324580080435296, "grad_norm": 2.4757254123687744, "learning_rate": 5.2021307158247546e-06, "loss": 0.299, "step": 12110 }, { "epoch": 1.433640880056778, "grad_norm": 2.718502998352051, "learning_rate": 5.2014124970074215e-06, "loss": 0.3317, "step": 12120 }, { "epoch": 1.434823752070026, "grad_norm": 2.0895771980285645, "learning_rate": 5.200694278190089e-06, "loss": 0.3412, "step": 12130 }, { "epoch": 1.4360066240832743, "grad_norm": 3.8666303157806396, "learning_rate": 5.199976059372755e-06, "loss": 0.3479, "step": 12140 }, { "epoch": 1.4371894960965224, "grad_norm": 2.6922268867492676, "learning_rate": 5.199257840555423e-06, "loss": 0.3274, "step": 12150 }, { "epoch": 1.4383723681097704, "grad_norm": 1.681287407875061, "learning_rate": 5.19853962173809e-06, "loss": 0.3032, "step": 12160 }, { "epoch": 1.4395552401230187, "grad_norm": 3.0996005535125732, "learning_rate": 5.197821402920757e-06, "loss": 0.3212, "step": 12170 }, { "epoch": 1.4407381121362668, "grad_norm": 3.6387147903442383, "learning_rate": 5.197103184103424e-06, "loss": 0.3263, "step": 12180 }, { "epoch": 1.441920984149515, "grad_norm": 2.525237798690796, "learning_rate": 5.196384965286091e-06, "loss": 0.3255, "step": 12190 }, { "epoch": 1.4431038561627632, "grad_norm": 2.6674628257751465, "learning_rate": 5.195666746468758e-06, "loss": 0.3566, "step": 12200 }, { "epoch": 1.4442867281760114, "grad_norm": 3.596350908279419, "learning_rate": 5.194948527651425e-06, "loss": 0.3259, "step": 12210 }, { "epoch": 1.4454696001892595, "grad_norm": 2.30674409866333, "learning_rate": 5.1942303088340916e-06, "loss": 0.3329, "step": 12220 }, { "epoch": 1.4466524722025076, "grad_norm": 1.9230536222457886, "learning_rate": 5.1935120900167585e-06, "loss": 0.3326, "step": 12230 }, { "epoch": 1.4478353442157559, "grad_norm": 2.4507272243499756, "learning_rate": 5.192793871199425e-06, "loss": 0.31, "step": 12240 }, { "epoch": 1.449018216229004, "grad_norm": 2.7487950325012207, "learning_rate": 5.192075652382092e-06, "loss": 0.3455, "step": 12250 }, { "epoch": 1.4502010882422522, "grad_norm": 2.4836816787719727, "learning_rate": 5.191357433564759e-06, "loss": 0.3169, "step": 12260 }, { "epoch": 1.4513839602555003, "grad_norm": 3.292184591293335, "learning_rate": 5.190639214747427e-06, "loss": 0.3226, "step": 12270 }, { "epoch": 1.4525668322687486, "grad_norm": 2.257387638092041, "learning_rate": 5.189920995930093e-06, "loss": 0.2974, "step": 12280 }, { "epoch": 1.4537497042819967, "grad_norm": 3.23683762550354, "learning_rate": 5.189202777112761e-06, "loss": 0.3179, "step": 12290 }, { "epoch": 1.4549325762952448, "grad_norm": 3.1413047313690186, "learning_rate": 5.188484558295427e-06, "loss": 0.3107, "step": 12300 }, { "epoch": 1.456115448308493, "grad_norm": 3.582163095474243, "learning_rate": 5.187766339478095e-06, "loss": 0.3294, "step": 12310 }, { "epoch": 1.4572983203217411, "grad_norm": 2.904231548309326, "learning_rate": 5.187048120660761e-06, "loss": 0.3329, "step": 12320 }, { "epoch": 1.4584811923349894, "grad_norm": 3.519880533218384, "learning_rate": 5.1863299018434286e-06, "loss": 0.3343, "step": 12330 }, { "epoch": 1.4596640643482375, "grad_norm": 3.1488707065582275, "learning_rate": 5.1856116830260955e-06, "loss": 0.3015, "step": 12340 }, { "epoch": 1.4608469363614858, "grad_norm": 2.2017030715942383, "learning_rate": 5.184893464208762e-06, "loss": 0.3118, "step": 12350 }, { "epoch": 1.4620298083747338, "grad_norm": 4.503572940826416, "learning_rate": 5.184175245391429e-06, "loss": 0.3325, "step": 12360 }, { "epoch": 1.463212680387982, "grad_norm": 2.683884859085083, "learning_rate": 5.183457026574096e-06, "loss": 0.3451, "step": 12370 }, { "epoch": 1.4643955524012302, "grad_norm": 2.986691951751709, "learning_rate": 5.182738807756764e-06, "loss": 0.3588, "step": 12380 }, { "epoch": 1.4655784244144783, "grad_norm": 2.1415863037109375, "learning_rate": 5.18202058893943e-06, "loss": 0.3279, "step": 12390 }, { "epoch": 1.4667612964277266, "grad_norm": 2.3410966396331787, "learning_rate": 5.181302370122098e-06, "loss": 0.3175, "step": 12400 }, { "epoch": 1.4679441684409746, "grad_norm": 3.037393808364868, "learning_rate": 5.180584151304764e-06, "loss": 0.3485, "step": 12410 }, { "epoch": 1.469127040454223, "grad_norm": 2.986999034881592, "learning_rate": 5.179865932487432e-06, "loss": 0.3268, "step": 12420 }, { "epoch": 1.470309912467471, "grad_norm": 2.3629612922668457, "learning_rate": 5.179147713670098e-06, "loss": 0.2918, "step": 12430 }, { "epoch": 1.471492784480719, "grad_norm": 3.546999931335449, "learning_rate": 5.1784294948527656e-06, "loss": 0.3504, "step": 12440 }, { "epoch": 1.4726756564939674, "grad_norm": 2.512006998062134, "learning_rate": 5.1777112760354325e-06, "loss": 0.3147, "step": 12450 }, { "epoch": 1.4738585285072154, "grad_norm": 2.3637118339538574, "learning_rate": 5.176993057218099e-06, "loss": 0.3577, "step": 12460 }, { "epoch": 1.4750414005204637, "grad_norm": 3.7535767555236816, "learning_rate": 5.176274838400766e-06, "loss": 0.3255, "step": 12470 }, { "epoch": 1.4762242725337118, "grad_norm": 2.303182363510132, "learning_rate": 5.175556619583433e-06, "loss": 0.3162, "step": 12480 }, { "epoch": 1.47740714454696, "grad_norm": 2.876225471496582, "learning_rate": 5.1748384007661e-06, "loss": 0.2976, "step": 12490 }, { "epoch": 1.4785900165602082, "grad_norm": 2.6297693252563477, "learning_rate": 5.174120181948767e-06, "loss": 0.3224, "step": 12500 }, { "epoch": 1.4797728885734562, "grad_norm": 3.692824125289917, "learning_rate": 5.173401963131434e-06, "loss": 0.3632, "step": 12510 }, { "epoch": 1.4809557605867045, "grad_norm": 3.0064969062805176, "learning_rate": 5.172683744314101e-06, "loss": 0.3408, "step": 12520 }, { "epoch": 1.4821386325999528, "grad_norm": 3.15277099609375, "learning_rate": 5.171965525496768e-06, "loss": 0.3292, "step": 12530 }, { "epoch": 1.483321504613201, "grad_norm": 3.2995247840881348, "learning_rate": 5.171247306679436e-06, "loss": 0.3373, "step": 12540 }, { "epoch": 1.484504376626449, "grad_norm": 2.506042957305908, "learning_rate": 5.170529087862102e-06, "loss": 0.2838, "step": 12550 }, { "epoch": 1.4856872486396973, "grad_norm": 4.044424533843994, "learning_rate": 5.1698108690447695e-06, "loss": 0.3801, "step": 12560 }, { "epoch": 1.4868701206529453, "grad_norm": 2.122138261795044, "learning_rate": 5.1690926502274355e-06, "loss": 0.3225, "step": 12570 }, { "epoch": 1.4880529926661934, "grad_norm": 2.4794294834136963, "learning_rate": 5.168374431410103e-06, "loss": 0.3351, "step": 12580 }, { "epoch": 1.4892358646794417, "grad_norm": 2.462641716003418, "learning_rate": 5.167656212592769e-06, "loss": 0.3448, "step": 12590 }, { "epoch": 1.49041873669269, "grad_norm": 2.709441900253296, "learning_rate": 5.166937993775437e-06, "loss": 0.3474, "step": 12600 }, { "epoch": 1.491601608705938, "grad_norm": 1.9150044918060303, "learning_rate": 5.166219774958104e-06, "loss": 0.302, "step": 12610 }, { "epoch": 1.4927844807191861, "grad_norm": 3.5195353031158447, "learning_rate": 5.165501556140771e-06, "loss": 0.2885, "step": 12620 }, { "epoch": 1.4939673527324344, "grad_norm": 2.907759666442871, "learning_rate": 5.164783337323438e-06, "loss": 0.3514, "step": 12630 }, { "epoch": 1.4951502247456825, "grad_norm": 2.093947410583496, "learning_rate": 5.164065118506105e-06, "loss": 0.3439, "step": 12640 }, { "epoch": 1.4963330967589306, "grad_norm": 2.4467952251434326, "learning_rate": 5.163346899688773e-06, "loss": 0.3049, "step": 12650 }, { "epoch": 1.4975159687721789, "grad_norm": 3.3428568840026855, "learning_rate": 5.162628680871439e-06, "loss": 0.3278, "step": 12660 }, { "epoch": 1.4986988407854271, "grad_norm": 2.2584049701690674, "learning_rate": 5.1619104620541065e-06, "loss": 0.2973, "step": 12670 }, { "epoch": 1.4998817127986752, "grad_norm": 2.3084425926208496, "learning_rate": 5.1611922432367725e-06, "loss": 0.2894, "step": 12680 }, { "epoch": 1.5003548616039746, "eval_accuracy": 0.8551918022424061, "eval_loss": 0.3492177426815033, "eval_runtime": 78.9912, "eval_safe_aucpr": 0.9095762579248807, "eval_safe_f1": 0.8370795979861878, "eval_safe_fpr": 0.13168515141550288, "eval_safe_precision": 0.835437836222355, "eval_safe_recall": 0.8387278250759479, "eval_samples_per_second": 761.022, "eval_steps_per_second": 11.9, "eval_unsafe_aucpr": 0.9499714959856256, "eval_unsafe_f1": 0.869679776037846, "eval_unsafe_fpr": 0.16127217492405146, "eval_unsafe_precision": 0.8710490013794758, "eval_unsafe_recall": 0.8683148485844967, "step": 12684 }, { "epoch": 1.5010645848119233, "grad_norm": 3.8595757484436035, "learning_rate": 5.16047402441944e-06, "loss": 0.3577, "step": 12690 }, { "epoch": 1.5022474568251716, "grad_norm": 2.9607419967651367, "learning_rate": 5.159755805602106e-06, "loss": 0.3321, "step": 12700 }, { "epoch": 1.5034303288384196, "grad_norm": 2.4730141162872314, "learning_rate": 5.159037586784774e-06, "loss": 0.3155, "step": 12710 }, { "epoch": 1.5046132008516677, "grad_norm": 2.097149133682251, "learning_rate": 5.158319367967441e-06, "loss": 0.3253, "step": 12720 }, { "epoch": 1.505796072864916, "grad_norm": 3.887446641921997, "learning_rate": 5.157601149150108e-06, "loss": 0.3647, "step": 12730 }, { "epoch": 1.5069789448781643, "grad_norm": 2.82692289352417, "learning_rate": 5.156882930332775e-06, "loss": 0.3596, "step": 12740 }, { "epoch": 1.5081618168914124, "grad_norm": 2.2878870964050293, "learning_rate": 5.156164711515442e-06, "loss": 0.3325, "step": 12750 }, { "epoch": 1.5093446889046604, "grad_norm": 1.9933377504348755, "learning_rate": 5.155446492698109e-06, "loss": 0.3292, "step": 12760 }, { "epoch": 1.5105275609179087, "grad_norm": 2.473442792892456, "learning_rate": 5.154728273880776e-06, "loss": 0.3369, "step": 12770 }, { "epoch": 1.5117104329311568, "grad_norm": 2.962991237640381, "learning_rate": 5.154010055063443e-06, "loss": 0.3111, "step": 12780 }, { "epoch": 1.5128933049444049, "grad_norm": 3.5352776050567627, "learning_rate": 5.1532918362461095e-06, "loss": 0.3317, "step": 12790 }, { "epoch": 1.5140761769576532, "grad_norm": 2.941756010055542, "learning_rate": 5.1525736174287765e-06, "loss": 0.3628, "step": 12800 }, { "epoch": 1.5152590489709015, "grad_norm": 2.2009263038635254, "learning_rate": 5.151855398611444e-06, "loss": 0.3233, "step": 12810 }, { "epoch": 1.5164419209841495, "grad_norm": 1.9298968315124512, "learning_rate": 5.15113717979411e-06, "loss": 0.3409, "step": 12820 }, { "epoch": 1.5176247929973976, "grad_norm": 3.332143545150757, "learning_rate": 5.150418960976778e-06, "loss": 0.3158, "step": 12830 }, { "epoch": 1.518807665010646, "grad_norm": 3.445125102996826, "learning_rate": 5.149700742159444e-06, "loss": 0.3261, "step": 12840 }, { "epoch": 1.519990537023894, "grad_norm": 4.30973482131958, "learning_rate": 5.148982523342112e-06, "loss": 0.3142, "step": 12850 }, { "epoch": 1.521173409037142, "grad_norm": 3.637707471847534, "learning_rate": 5.148264304524778e-06, "loss": 0.3362, "step": 12860 }, { "epoch": 1.5223562810503903, "grad_norm": 1.9568496942520142, "learning_rate": 5.147546085707446e-06, "loss": 0.3156, "step": 12870 }, { "epoch": 1.5235391530636386, "grad_norm": 3.361077308654785, "learning_rate": 5.146827866890113e-06, "loss": 0.3081, "step": 12880 }, { "epoch": 1.5247220250768867, "grad_norm": 2.1092593669891357, "learning_rate": 5.14610964807278e-06, "loss": 0.3118, "step": 12890 }, { "epoch": 1.5259048970901348, "grad_norm": 2.4282894134521484, "learning_rate": 5.1453914292554465e-06, "loss": 0.3418, "step": 12900 }, { "epoch": 1.527087769103383, "grad_norm": 1.910859227180481, "learning_rate": 5.1446732104381135e-06, "loss": 0.3249, "step": 12910 }, { "epoch": 1.5282706411166311, "grad_norm": 1.8701509237289429, "learning_rate": 5.143954991620781e-06, "loss": 0.321, "step": 12920 }, { "epoch": 1.5294535131298792, "grad_norm": 1.975669503211975, "learning_rate": 5.143236772803447e-06, "loss": 0.3111, "step": 12930 }, { "epoch": 1.5306363851431275, "grad_norm": 2.1121163368225098, "learning_rate": 5.142518553986115e-06, "loss": 0.366, "step": 12940 }, { "epoch": 1.5318192571563758, "grad_norm": 2.2763612270355225, "learning_rate": 5.141800335168781e-06, "loss": 0.3514, "step": 12950 }, { "epoch": 1.5330021291696239, "grad_norm": 3.066706895828247, "learning_rate": 5.141082116351449e-06, "loss": 0.2817, "step": 12960 }, { "epoch": 1.534185001182872, "grad_norm": 3.723223924636841, "learning_rate": 5.140363897534115e-06, "loss": 0.311, "step": 12970 }, { "epoch": 1.5353678731961202, "grad_norm": 3.5070247650146484, "learning_rate": 5.139645678716783e-06, "loss": 0.2961, "step": 12980 }, { "epoch": 1.5365507452093683, "grad_norm": 2.911402463912964, "learning_rate": 5.13892745989945e-06, "loss": 0.293, "step": 12990 }, { "epoch": 1.5377336172226164, "grad_norm": 3.4748942852020264, "learning_rate": 5.138209241082117e-06, "loss": 0.3386, "step": 13000 }, { "epoch": 1.5389164892358647, "grad_norm": 3.109605550765991, "learning_rate": 5.1374910222647835e-06, "loss": 0.3143, "step": 13010 }, { "epoch": 1.540099361249113, "grad_norm": 2.4909555912017822, "learning_rate": 5.1367728034474505e-06, "loss": 0.3147, "step": 13020 }, { "epoch": 1.541282233262361, "grad_norm": 4.059962272644043, "learning_rate": 5.136054584630117e-06, "loss": 0.3662, "step": 13030 }, { "epoch": 1.542465105275609, "grad_norm": 2.515399694442749, "learning_rate": 5.135336365812784e-06, "loss": 0.3076, "step": 13040 }, { "epoch": 1.5436479772888574, "grad_norm": 2.5504794120788574, "learning_rate": 5.134618146995451e-06, "loss": 0.3077, "step": 13050 }, { "epoch": 1.5448308493021055, "grad_norm": 3.5667295455932617, "learning_rate": 5.133899928178118e-06, "loss": 0.3484, "step": 13060 }, { "epoch": 1.5460137213153535, "grad_norm": 2.1200897693634033, "learning_rate": 5.133181709360785e-06, "loss": 0.3031, "step": 13070 }, { "epoch": 1.5471965933286018, "grad_norm": 3.417478084564209, "learning_rate": 5.132463490543453e-06, "loss": 0.2872, "step": 13080 }, { "epoch": 1.5483794653418501, "grad_norm": 3.6542742252349854, "learning_rate": 5.131745271726119e-06, "loss": 0.3273, "step": 13090 }, { "epoch": 1.5495623373550982, "grad_norm": 2.444547414779663, "learning_rate": 5.131027052908787e-06, "loss": 0.334, "step": 13100 }, { "epoch": 1.5507452093683463, "grad_norm": 3.357236385345459, "learning_rate": 5.130308834091453e-06, "loss": 0.319, "step": 13110 }, { "epoch": 1.5519280813815945, "grad_norm": 2.1662325859069824, "learning_rate": 5.1295906152741205e-06, "loss": 0.2919, "step": 13120 }, { "epoch": 1.5531109533948428, "grad_norm": 3.5623395442962646, "learning_rate": 5.1288723964567875e-06, "loss": 0.2937, "step": 13130 }, { "epoch": 1.5542938254080907, "grad_norm": 2.6335065364837646, "learning_rate": 5.128154177639454e-06, "loss": 0.3072, "step": 13140 }, { "epoch": 1.555476697421339, "grad_norm": 2.833822727203369, "learning_rate": 5.127435958822121e-06, "loss": 0.3179, "step": 13150 }, { "epoch": 1.5566595694345873, "grad_norm": 2.881520986557007, "learning_rate": 5.126717740004788e-06, "loss": 0.3229, "step": 13160 }, { "epoch": 1.5578424414478353, "grad_norm": 3.164757251739502, "learning_rate": 5.125999521187455e-06, "loss": 0.3208, "step": 13170 }, { "epoch": 1.5590253134610834, "grad_norm": 2.3359062671661377, "learning_rate": 5.125281302370122e-06, "loss": 0.2989, "step": 13180 }, { "epoch": 1.5602081854743317, "grad_norm": 2.1890311241149902, "learning_rate": 5.12456308355279e-06, "loss": 0.3499, "step": 13190 }, { "epoch": 1.56139105748758, "grad_norm": 5.257014751434326, "learning_rate": 5.123844864735456e-06, "loss": 0.3021, "step": 13200 }, { "epoch": 1.5625739295008279, "grad_norm": 3.5936100482940674, "learning_rate": 5.123126645918124e-06, "loss": 0.317, "step": 13210 }, { "epoch": 1.5637568015140761, "grad_norm": 2.7884414196014404, "learning_rate": 5.12240842710079e-06, "loss": 0.2951, "step": 13220 }, { "epoch": 1.5649396735273244, "grad_norm": 3.281930685043335, "learning_rate": 5.1216902082834575e-06, "loss": 0.3239, "step": 13230 }, { "epoch": 1.5661225455405725, "grad_norm": 3.519055128097534, "learning_rate": 5.120971989466124e-06, "loss": 0.3244, "step": 13240 }, { "epoch": 1.5673054175538206, "grad_norm": 3.2428009510040283, "learning_rate": 5.120253770648791e-06, "loss": 0.3556, "step": 13250 }, { "epoch": 1.5684882895670689, "grad_norm": 2.028970956802368, "learning_rate": 5.119535551831458e-06, "loss": 0.3077, "step": 13260 }, { "epoch": 1.5696711615803172, "grad_norm": 2.781470537185669, "learning_rate": 5.118817333014125e-06, "loss": 0.3255, "step": 13270 }, { "epoch": 1.570854033593565, "grad_norm": 2.2510530948638916, "learning_rate": 5.118099114196792e-06, "loss": 0.3051, "step": 13280 }, { "epoch": 1.5720369056068133, "grad_norm": 2.658205986022949, "learning_rate": 5.117380895379459e-06, "loss": 0.3221, "step": 13290 }, { "epoch": 1.5732197776200616, "grad_norm": 2.8818469047546387, "learning_rate": 5.116662676562126e-06, "loss": 0.3675, "step": 13300 }, { "epoch": 1.5744026496333097, "grad_norm": 2.4344351291656494, "learning_rate": 5.115944457744793e-06, "loss": 0.3007, "step": 13310 }, { "epoch": 1.5755855216465577, "grad_norm": 3.1090714931488037, "learning_rate": 5.11522623892746e-06, "loss": 0.3178, "step": 13320 }, { "epoch": 1.576768393659806, "grad_norm": 2.475126266479492, "learning_rate": 5.114508020110127e-06, "loss": 0.3233, "step": 13330 }, { "epoch": 1.5779512656730543, "grad_norm": 2.812148094177246, "learning_rate": 5.113789801292794e-06, "loss": 0.3157, "step": 13340 }, { "epoch": 1.5791341376863024, "grad_norm": 2.5128300189971924, "learning_rate": 5.113071582475461e-06, "loss": 0.3839, "step": 13350 }, { "epoch": 1.5803170096995505, "grad_norm": 2.6277406215667725, "learning_rate": 5.1123533636581275e-06, "loss": 0.2921, "step": 13360 }, { "epoch": 1.5814998817127988, "grad_norm": 2.604107141494751, "learning_rate": 5.111635144840795e-06, "loss": 0.3278, "step": 13370 }, { "epoch": 1.5826827537260468, "grad_norm": 2.4044811725616455, "learning_rate": 5.110916926023461e-06, "loss": 0.3081, "step": 13380 }, { "epoch": 1.583865625739295, "grad_norm": 3.4405109882354736, "learning_rate": 5.110198707206129e-06, "loss": 0.3289, "step": 13390 }, { "epoch": 1.5850484977525432, "grad_norm": 2.8589260578155518, "learning_rate": 5.109480488388796e-06, "loss": 0.3478, "step": 13400 }, { "epoch": 1.5862313697657915, "grad_norm": 2.8309414386749268, "learning_rate": 5.108762269571463e-06, "loss": 0.3041, "step": 13410 }, { "epoch": 1.5874142417790396, "grad_norm": 3.194594383239746, "learning_rate": 5.10804405075413e-06, "loss": 0.3462, "step": 13420 }, { "epoch": 1.5885971137922876, "grad_norm": 2.5025646686553955, "learning_rate": 5.107325831936797e-06, "loss": 0.3289, "step": 13430 }, { "epoch": 1.589779985805536, "grad_norm": 2.4323976039886475, "learning_rate": 5.106607613119464e-06, "loss": 0.3008, "step": 13440 }, { "epoch": 1.590962857818784, "grad_norm": 2.7668864727020264, "learning_rate": 5.105889394302131e-06, "loss": 0.3194, "step": 13450 }, { "epoch": 1.592145729832032, "grad_norm": 2.426161289215088, "learning_rate": 5.1051711754847985e-06, "loss": 0.3612, "step": 13460 }, { "epoch": 1.5933286018452804, "grad_norm": 2.4547650814056396, "learning_rate": 5.1044529566674645e-06, "loss": 0.2981, "step": 13470 }, { "epoch": 1.5945114738585286, "grad_norm": 1.7698646783828735, "learning_rate": 5.103734737850132e-06, "loss": 0.3075, "step": 13480 }, { "epoch": 1.5956943458717767, "grad_norm": 2.407890558242798, "learning_rate": 5.103016519032798e-06, "loss": 0.3229, "step": 13490 }, { "epoch": 1.5968772178850248, "grad_norm": 3.103731393814087, "learning_rate": 5.102298300215466e-06, "loss": 0.3252, "step": 13500 }, { "epoch": 1.598060089898273, "grad_norm": 2.5034029483795166, "learning_rate": 5.101580081398132e-06, "loss": 0.3042, "step": 13510 }, { "epoch": 1.5992429619115212, "grad_norm": 3.7358813285827637, "learning_rate": 5.1008618625808e-06, "loss": 0.3194, "step": 13520 }, { "epoch": 1.6004258339247692, "grad_norm": 2.2295784950256348, "learning_rate": 5.100143643763467e-06, "loss": 0.3506, "step": 13530 }, { "epoch": 1.6016087059380175, "grad_norm": 2.52673077583313, "learning_rate": 5.099425424946134e-06, "loss": 0.3234, "step": 13540 }, { "epoch": 1.6027915779512658, "grad_norm": 2.1552894115448, "learning_rate": 5.098707206128801e-06, "loss": 0.2655, "step": 13550 }, { "epoch": 1.6039744499645139, "grad_norm": 2.6268768310546875, "learning_rate": 5.097988987311468e-06, "loss": 0.2887, "step": 13560 }, { "epoch": 1.605157321977762, "grad_norm": 2.4877986907958984, "learning_rate": 5.097270768494135e-06, "loss": 0.3372, "step": 13570 }, { "epoch": 1.6063401939910102, "grad_norm": 4.144022464752197, "learning_rate": 5.0965525496768015e-06, "loss": 0.3158, "step": 13580 }, { "epoch": 1.6075230660042583, "grad_norm": 2.7924280166625977, "learning_rate": 5.0958343308594685e-06, "loss": 0.2999, "step": 13590 }, { "epoch": 1.6087059380175064, "grad_norm": 2.384228229522705, "learning_rate": 5.095116112042135e-06, "loss": 0.3614, "step": 13600 }, { "epoch": 1.6098888100307547, "grad_norm": 2.802626371383667, "learning_rate": 5.094397893224802e-06, "loss": 0.324, "step": 13610 }, { "epoch": 1.611071682044003, "grad_norm": 2.819253921508789, "learning_rate": 5.093679674407469e-06, "loss": 0.3243, "step": 13620 }, { "epoch": 1.612254554057251, "grad_norm": 3.2491488456726074, "learning_rate": 5.092961455590136e-06, "loss": 0.3314, "step": 13630 }, { "epoch": 1.6134374260704991, "grad_norm": 3.445171356201172, "learning_rate": 5.092243236772804e-06, "loss": 0.3056, "step": 13640 }, { "epoch": 1.6146202980837474, "grad_norm": 3.236532688140869, "learning_rate": 5.091525017955471e-06, "loss": 0.3026, "step": 13650 }, { "epoch": 1.6158031700969955, "grad_norm": 2.369788646697998, "learning_rate": 5.090806799138138e-06, "loss": 0.3126, "step": 13660 }, { "epoch": 1.6169860421102435, "grad_norm": 3.9594063758850098, "learning_rate": 5.090088580320805e-06, "loss": 0.3252, "step": 13670 }, { "epoch": 1.6181689141234918, "grad_norm": 2.6396729946136475, "learning_rate": 5.089370361503472e-06, "loss": 0.3687, "step": 13680 }, { "epoch": 1.6193517861367401, "grad_norm": 2.9766297340393066, "learning_rate": 5.0886521426861385e-06, "loss": 0.3366, "step": 13690 }, { "epoch": 1.6205346581499882, "grad_norm": 2.8285045623779297, "learning_rate": 5.0879339238688054e-06, "loss": 0.3369, "step": 13700 }, { "epoch": 1.6217175301632363, "grad_norm": 2.51643967628479, "learning_rate": 5.087215705051472e-06, "loss": 0.3449, "step": 13710 }, { "epoch": 1.6229004021764846, "grad_norm": 2.7198190689086914, "learning_rate": 5.086497486234139e-06, "loss": 0.3007, "step": 13720 }, { "epoch": 1.6240832741897326, "grad_norm": 2.0131969451904297, "learning_rate": 5.085779267416807e-06, "loss": 0.3598, "step": 13730 }, { "epoch": 1.6252661462029807, "grad_norm": 2.2213804721832275, "learning_rate": 5.085061048599473e-06, "loss": 0.3138, "step": 13740 }, { "epoch": 1.626449018216229, "grad_norm": 2.639836311340332, "learning_rate": 5.084342829782141e-06, "loss": 0.3667, "step": 13750 }, { "epoch": 1.6276318902294773, "grad_norm": 2.830441474914551, "learning_rate": 5.083624610964807e-06, "loss": 0.3059, "step": 13760 }, { "epoch": 1.6288147622427254, "grad_norm": 3.0879757404327393, "learning_rate": 5.082906392147475e-06, "loss": 0.338, "step": 13770 }, { "epoch": 1.6299976342559734, "grad_norm": 3.0475282669067383, "learning_rate": 5.082188173330141e-06, "loss": 0.3497, "step": 13780 }, { "epoch": 1.6311805062692217, "grad_norm": 2.055182695388794, "learning_rate": 5.081469954512809e-06, "loss": 0.3156, "step": 13790 }, { "epoch": 1.6323633782824698, "grad_norm": 2.2830474376678467, "learning_rate": 5.0807517356954755e-06, "loss": 0.3428, "step": 13800 }, { "epoch": 1.6335462502957179, "grad_norm": 3.2028403282165527, "learning_rate": 5.0800335168781424e-06, "loss": 0.2927, "step": 13810 }, { "epoch": 1.6347291223089662, "grad_norm": 3.4929287433624268, "learning_rate": 5.079315298060809e-06, "loss": 0.3376, "step": 13820 }, { "epoch": 1.6359119943222145, "grad_norm": 2.8479928970336914, "learning_rate": 5.078597079243476e-06, "loss": 0.3134, "step": 13830 }, { "epoch": 1.6370948663354625, "grad_norm": 1.9470689296722412, "learning_rate": 5.077878860426143e-06, "loss": 0.3183, "step": 13840 }, { "epoch": 1.6382777383487106, "grad_norm": 2.4081637859344482, "learning_rate": 5.07716064160881e-06, "loss": 0.3391, "step": 13850 }, { "epoch": 1.639460610361959, "grad_norm": 2.22745943069458, "learning_rate": 5.076442422791477e-06, "loss": 0.3205, "step": 13860 }, { "epoch": 1.640643482375207, "grad_norm": 2.6729397773742676, "learning_rate": 5.075724203974144e-06, "loss": 0.324, "step": 13870 }, { "epoch": 1.641826354388455, "grad_norm": 2.435185432434082, "learning_rate": 5.075005985156811e-06, "loss": 0.3658, "step": 13880 }, { "epoch": 1.6430092264017033, "grad_norm": 2.6831552982330322, "learning_rate": 5.074287766339478e-06, "loss": 0.2849, "step": 13890 }, { "epoch": 1.6441920984149516, "grad_norm": 2.8662123680114746, "learning_rate": 5.073569547522146e-06, "loss": 0.3481, "step": 13900 }, { "epoch": 1.6453749704281997, "grad_norm": 2.2142205238342285, "learning_rate": 5.0728513287048125e-06, "loss": 0.3413, "step": 13910 }, { "epoch": 1.6465578424414478, "grad_norm": 3.125593423843384, "learning_rate": 5.0721331098874794e-06, "loss": 0.3057, "step": 13920 }, { "epoch": 1.647740714454696, "grad_norm": 3.180004835128784, "learning_rate": 5.071414891070146e-06, "loss": 0.3383, "step": 13930 }, { "epoch": 1.6489235864679441, "grad_norm": 3.923089027404785, "learning_rate": 5.070696672252813e-06, "loss": 0.3122, "step": 13940 }, { "epoch": 1.6501064584811922, "grad_norm": 2.646047592163086, "learning_rate": 5.06997845343548e-06, "loss": 0.3103, "step": 13950 }, { "epoch": 1.6512893304944405, "grad_norm": 3.303234100341797, "learning_rate": 5.069260234618147e-06, "loss": 0.3356, "step": 13960 }, { "epoch": 1.6524722025076888, "grad_norm": 1.9866607189178467, "learning_rate": 5.068542015800814e-06, "loss": 0.3274, "step": 13970 }, { "epoch": 1.6536550745209369, "grad_norm": 2.8178138732910156, "learning_rate": 5.067823796983481e-06, "loss": 0.3697, "step": 13980 }, { "epoch": 1.654837946534185, "grad_norm": 1.9609490633010864, "learning_rate": 5.067105578166148e-06, "loss": 0.3062, "step": 13990 }, { "epoch": 1.6560208185474332, "grad_norm": 2.2423834800720215, "learning_rate": 5.066387359348816e-06, "loss": 0.3148, "step": 14000 }, { "epoch": 1.6572036905606813, "grad_norm": 3.6977040767669678, "learning_rate": 5.065669140531482e-06, "loss": 0.2901, "step": 14010 }, { "epoch": 1.6583865625739294, "grad_norm": 3.108161449432373, "learning_rate": 5.0649509217141495e-06, "loss": 0.3094, "step": 14020 }, { "epoch": 1.6595694345871776, "grad_norm": 3.174072027206421, "learning_rate": 5.064232702896816e-06, "loss": 0.336, "step": 14030 }, { "epoch": 1.660752306600426, "grad_norm": 3.211223602294922, "learning_rate": 5.063514484079483e-06, "loss": 0.284, "step": 14040 }, { "epoch": 1.661935178613674, "grad_norm": 2.340867757797241, "learning_rate": 5.0627962652621494e-06, "loss": 0.3352, "step": 14050 }, { "epoch": 1.663118050626922, "grad_norm": 2.3574111461639404, "learning_rate": 5.062078046444817e-06, "loss": 0.3324, "step": 14060 }, { "epoch": 1.6643009226401704, "grad_norm": 3.4371180534362793, "learning_rate": 5.061359827627484e-06, "loss": 0.3469, "step": 14070 }, { "epoch": 1.6654837946534184, "grad_norm": 2.4418962001800537, "learning_rate": 5.060641608810151e-06, "loss": 0.3366, "step": 14080 }, { "epoch": 1.6666666666666665, "grad_norm": 3.1625897884368896, "learning_rate": 5.059923389992818e-06, "loss": 0.309, "step": 14090 }, { "epoch": 1.6678495386799148, "grad_norm": 2.799833297729492, "learning_rate": 5.059205171175485e-06, "loss": 0.3117, "step": 14100 }, { "epoch": 1.669032410693163, "grad_norm": 2.8320069313049316, "learning_rate": 5.058486952358152e-06, "loss": 0.329, "step": 14110 }, { "epoch": 1.6702152827064112, "grad_norm": 2.614542007446289, "learning_rate": 5.057768733540819e-06, "loss": 0.322, "step": 14120 }, { "epoch": 1.6713981547196592, "grad_norm": 3.401003122329712, "learning_rate": 5.057050514723486e-06, "loss": 0.2921, "step": 14130 }, { "epoch": 1.6725810267329075, "grad_norm": 2.30155086517334, "learning_rate": 5.056332295906153e-06, "loss": 0.331, "step": 14140 }, { "epoch": 1.6737638987461558, "grad_norm": 2.7106785774230957, "learning_rate": 5.05561407708882e-06, "loss": 0.3345, "step": 14150 }, { "epoch": 1.6749467707594037, "grad_norm": 2.9260377883911133, "learning_rate": 5.0548958582714864e-06, "loss": 0.3113, "step": 14160 }, { "epoch": 1.676129642772652, "grad_norm": 2.626962423324585, "learning_rate": 5.054177639454154e-06, "loss": 0.3409, "step": 14170 }, { "epoch": 1.6773125147859003, "grad_norm": 2.456526517868042, "learning_rate": 5.053459420636821e-06, "loss": 0.3169, "step": 14180 }, { "epoch": 1.6784953867991483, "grad_norm": 2.4258885383605957, "learning_rate": 5.052741201819488e-06, "loss": 0.3114, "step": 14190 }, { "epoch": 1.6796782588123964, "grad_norm": 3.0785810947418213, "learning_rate": 5.052022983002155e-06, "loss": 0.2927, "step": 14200 }, { "epoch": 1.6808611308256447, "grad_norm": 2.3288445472717285, "learning_rate": 5.051304764184822e-06, "loss": 0.3244, "step": 14210 }, { "epoch": 1.682044002838893, "grad_norm": 2.9143729209899902, "learning_rate": 5.050586545367489e-06, "loss": 0.3313, "step": 14220 }, { "epoch": 1.6832268748521408, "grad_norm": 2.184540033340454, "learning_rate": 5.049868326550156e-06, "loss": 0.3088, "step": 14230 }, { "epoch": 1.6844097468653891, "grad_norm": 2.156137228012085, "learning_rate": 5.049150107732823e-06, "loss": 0.3172, "step": 14240 }, { "epoch": 1.6855926188786374, "grad_norm": 2.418489694595337, "learning_rate": 5.04843188891549e-06, "loss": 0.3047, "step": 14250 }, { "epoch": 1.6867754908918855, "grad_norm": 3.1165213584899902, "learning_rate": 5.0477136700981565e-06, "loss": 0.2754, "step": 14260 }, { "epoch": 1.6879583629051336, "grad_norm": 2.4814577102661133, "learning_rate": 5.0469954512808234e-06, "loss": 0.3371, "step": 14270 }, { "epoch": 1.6891412349183819, "grad_norm": 3.270131826400757, "learning_rate": 5.04627723246349e-06, "loss": 0.2998, "step": 14280 }, { "epoch": 1.6903241069316302, "grad_norm": 2.3310813903808594, "learning_rate": 5.045559013646158e-06, "loss": 0.2948, "step": 14290 }, { "epoch": 1.691506978944878, "grad_norm": 3.6687893867492676, "learning_rate": 5.044840794828824e-06, "loss": 0.3071, "step": 14300 }, { "epoch": 1.6926898509581263, "grad_norm": 2.5189130306243896, "learning_rate": 5.044122576011492e-06, "loss": 0.2991, "step": 14310 }, { "epoch": 1.6938727229713746, "grad_norm": 2.2292873859405518, "learning_rate": 5.043404357194158e-06, "loss": 0.3645, "step": 14320 }, { "epoch": 1.6950555949846227, "grad_norm": 2.4766671657562256, "learning_rate": 5.042686138376826e-06, "loss": 0.3062, "step": 14330 }, { "epoch": 1.6962384669978707, "grad_norm": 3.1414923667907715, "learning_rate": 5.041967919559493e-06, "loss": 0.3216, "step": 14340 }, { "epoch": 1.697421339011119, "grad_norm": 2.6457090377807617, "learning_rate": 5.04124970074216e-06, "loss": 0.3048, "step": 14350 }, { "epoch": 1.6986042110243673, "grad_norm": 1.8806536197662354, "learning_rate": 5.040531481924827e-06, "loss": 0.3906, "step": 14360 }, { "epoch": 1.6997870830376152, "grad_norm": 2.4284303188323975, "learning_rate": 5.0398132631074935e-06, "loss": 0.3135, "step": 14370 }, { "epoch": 1.7009699550508635, "grad_norm": 2.1625561714172363, "learning_rate": 5.0390950442901604e-06, "loss": 0.3359, "step": 14380 }, { "epoch": 1.7021528270641118, "grad_norm": 2.284396171569824, "learning_rate": 5.038376825472827e-06, "loss": 0.3306, "step": 14390 }, { "epoch": 1.7033356990773598, "grad_norm": 2.887921094894409, "learning_rate": 5.037658606655495e-06, "loss": 0.3091, "step": 14400 }, { "epoch": 1.704518571090608, "grad_norm": 3.903743267059326, "learning_rate": 5.036940387838161e-06, "loss": 0.3783, "step": 14410 }, { "epoch": 1.7057014431038562, "grad_norm": 3.2908666133880615, "learning_rate": 5.036222169020829e-06, "loss": 0.3208, "step": 14420 }, { "epoch": 1.7068843151171045, "grad_norm": 2.6162149906158447, "learning_rate": 5.035503950203495e-06, "loss": 0.3251, "step": 14430 }, { "epoch": 1.7080671871303525, "grad_norm": 2.951775074005127, "learning_rate": 5.034785731386163e-06, "loss": 0.3457, "step": 14440 }, { "epoch": 1.7092500591436006, "grad_norm": 2.26690411567688, "learning_rate": 5.03406751256883e-06, "loss": 0.3251, "step": 14450 }, { "epoch": 1.710432931156849, "grad_norm": 3.271415948867798, "learning_rate": 5.033349293751497e-06, "loss": 0.3411, "step": 14460 }, { "epoch": 1.711615803170097, "grad_norm": 2.0259809494018555, "learning_rate": 5.032631074934164e-06, "loss": 0.3225, "step": 14470 }, { "epoch": 1.712798675183345, "grad_norm": 2.0512285232543945, "learning_rate": 5.0319128561168305e-06, "loss": 0.2946, "step": 14480 }, { "epoch": 1.7139815471965933, "grad_norm": 2.5374696254730225, "learning_rate": 5.0311946372994974e-06, "loss": 0.3411, "step": 14490 }, { "epoch": 1.7151644192098416, "grad_norm": 2.3170814514160156, "learning_rate": 5.030476418482164e-06, "loss": 0.3025, "step": 14500 }, { "epoch": 1.7163472912230897, "grad_norm": 2.375981330871582, "learning_rate": 5.029758199664831e-06, "loss": 0.2748, "step": 14510 }, { "epoch": 1.7175301632363378, "grad_norm": 3.1739301681518555, "learning_rate": 5.029039980847498e-06, "loss": 0.312, "step": 14520 }, { "epoch": 1.718713035249586, "grad_norm": 2.581071138381958, "learning_rate": 5.028321762030165e-06, "loss": 0.3039, "step": 14530 }, { "epoch": 1.7198959072628341, "grad_norm": 2.506803274154663, "learning_rate": 5.027603543212832e-06, "loss": 0.3452, "step": 14540 }, { "epoch": 1.7210787792760822, "grad_norm": 3.3117330074310303, "learning_rate": 5.026885324395499e-06, "loss": 0.3663, "step": 14550 }, { "epoch": 1.7222616512893305, "grad_norm": 2.346388816833496, "learning_rate": 5.026167105578167e-06, "loss": 0.354, "step": 14560 }, { "epoch": 1.7234445233025788, "grad_norm": 2.311544418334961, "learning_rate": 5.025448886760833e-06, "loss": 0.3132, "step": 14570 }, { "epoch": 1.7246273953158269, "grad_norm": 2.3980321884155273, "learning_rate": 5.024730667943501e-06, "loss": 0.2823, "step": 14580 }, { "epoch": 1.725810267329075, "grad_norm": 2.5401647090911865, "learning_rate": 5.024012449126167e-06, "loss": 0.2922, "step": 14590 }, { "epoch": 1.7269931393423232, "grad_norm": 2.3093924522399902, "learning_rate": 5.0232942303088344e-06, "loss": 0.2924, "step": 14600 }, { "epoch": 1.7281760113555713, "grad_norm": 2.8203935623168945, "learning_rate": 5.0225760114915005e-06, "loss": 0.3306, "step": 14610 }, { "epoch": 1.7293588833688194, "grad_norm": 3.4530975818634033, "learning_rate": 5.021857792674168e-06, "loss": 0.3344, "step": 14620 }, { "epoch": 1.7305417553820677, "grad_norm": 3.3713724613189697, "learning_rate": 5.021139573856835e-06, "loss": 0.3436, "step": 14630 }, { "epoch": 1.731724627395316, "grad_norm": 2.5409672260284424, "learning_rate": 5.020421355039502e-06, "loss": 0.3749, "step": 14640 }, { "epoch": 1.732907499408564, "grad_norm": 4.277678966522217, "learning_rate": 5.01970313622217e-06, "loss": 0.3052, "step": 14650 }, { "epoch": 1.734090371421812, "grad_norm": 3.7035672664642334, "learning_rate": 5.018984917404836e-06, "loss": 0.3081, "step": 14660 }, { "epoch": 1.7352732434350604, "grad_norm": 2.3813202381134033, "learning_rate": 5.018266698587504e-06, "loss": 0.3127, "step": 14670 }, { "epoch": 1.7364561154483085, "grad_norm": 2.580883741378784, "learning_rate": 5.01754847977017e-06, "loss": 0.2905, "step": 14680 }, { "epoch": 1.7376389874615565, "grad_norm": 2.821316957473755, "learning_rate": 5.016830260952838e-06, "loss": 0.3214, "step": 14690 }, { "epoch": 1.7388218594748048, "grad_norm": 3.987694025039673, "learning_rate": 5.016112042135504e-06, "loss": 0.2954, "step": 14700 }, { "epoch": 1.7400047314880531, "grad_norm": 3.289832830429077, "learning_rate": 5.0153938233181714e-06, "loss": 0.329, "step": 14710 }, { "epoch": 1.7411876035013012, "grad_norm": 2.1757147312164307, "learning_rate": 5.014675604500838e-06, "loss": 0.3071, "step": 14720 }, { "epoch": 1.7423704755145493, "grad_norm": 2.166836977005005, "learning_rate": 5.013957385683505e-06, "loss": 0.3247, "step": 14730 }, { "epoch": 1.7435533475277976, "grad_norm": 3.105241298675537, "learning_rate": 5.013239166866172e-06, "loss": 0.2866, "step": 14740 }, { "epoch": 1.7447362195410456, "grad_norm": 2.3274075984954834, "learning_rate": 5.012520948048839e-06, "loss": 0.3615, "step": 14750 }, { "epoch": 1.7459190915542937, "grad_norm": 2.7024152278900146, "learning_rate": 5.011802729231506e-06, "loss": 0.3349, "step": 14760 }, { "epoch": 1.747101963567542, "grad_norm": 2.497018575668335, "learning_rate": 5.011084510414173e-06, "loss": 0.2949, "step": 14770 }, { "epoch": 1.7482848355807903, "grad_norm": 3.0676562786102295, "learning_rate": 5.01036629159684e-06, "loss": 0.3364, "step": 14780 }, { "epoch": 1.7494677075940384, "grad_norm": 3.283435583114624, "learning_rate": 5.009648072779507e-06, "loss": 0.2902, "step": 14790 }, { "epoch": 1.7504140052046369, "eval_accuracy": 0.8563562564460858, "eval_loss": 0.3321095407009125, "eval_runtime": 77.8043, "eval_safe_aucpr": 0.912319127206396, "eval_safe_f1": 0.8371891321153158, "eval_safe_fpr": 0.12474963379271135, "eval_safe_precision": 0.841775991506787, "eval_safe_recall": 0.8326519896485767, "eval_samples_per_second": 772.631, "eval_steps_per_second": 12.082, "eval_unsafe_aucpr": 0.9509520176742567, "eval_unsafe_f1": 0.8714857644625024, "eval_unsafe_fpr": 0.16734801035142272, "eval_unsafe_precision": 0.8677534084173089, "eval_unsafe_recall": 0.8752503662072882, "step": 14798 }, { "epoch": 1.7506505796072864, "grad_norm": 2.357008695602417, "learning_rate": 5.008929853962174e-06, "loss": 0.2962, "step": 14800 }, { "epoch": 1.7518334516205347, "grad_norm": 3.520799160003662, "learning_rate": 5.008211635144841e-06, "loss": 0.3438, "step": 14810 }, { "epoch": 1.7530163236337828, "grad_norm": 2.4718270301818848, "learning_rate": 5.007493416327508e-06, "loss": 0.2888, "step": 14820 }, { "epoch": 1.7541991956470309, "grad_norm": 2.4665021896362305, "learning_rate": 5.006775197510175e-06, "loss": 0.3219, "step": 14830 }, { "epoch": 1.7553820676602792, "grad_norm": 3.32997727394104, "learning_rate": 5.006056978692841e-06, "loss": 0.3331, "step": 14840 }, { "epoch": 1.7565649396735274, "grad_norm": 2.663910388946533, "learning_rate": 5.005338759875509e-06, "loss": 0.3391, "step": 14850 }, { "epoch": 1.7577478116867755, "grad_norm": 2.1464967727661133, "learning_rate": 5.004620541058175e-06, "loss": 0.2876, "step": 14860 }, { "epoch": 1.7589306837000236, "grad_norm": 2.893955945968628, "learning_rate": 5.003902322240843e-06, "loss": 0.333, "step": 14870 }, { "epoch": 1.7601135557132719, "grad_norm": 2.531080722808838, "learning_rate": 5.003184103423509e-06, "loss": 0.318, "step": 14880 }, { "epoch": 1.76129642772652, "grad_norm": 2.4850759506225586, "learning_rate": 5.002465884606177e-06, "loss": 0.3414, "step": 14890 }, { "epoch": 1.762479299739768, "grad_norm": 3.054048776626587, "learning_rate": 5.001747665788844e-06, "loss": 0.3491, "step": 14900 }, { "epoch": 1.7636621717530163, "grad_norm": 2.300985097885132, "learning_rate": 5.001029446971511e-06, "loss": 0.3134, "step": 14910 }, { "epoch": 1.7648450437662646, "grad_norm": 2.196075677871704, "learning_rate": 5.000311228154178e-06, "loss": 0.3487, "step": 14920 }, { "epoch": 1.7660279157795127, "grad_norm": 2.2579915523529053, "learning_rate": 4.9995930093368446e-06, "loss": 0.2823, "step": 14930 }, { "epoch": 1.7672107877927608, "grad_norm": 3.092942953109741, "learning_rate": 4.998874790519512e-06, "loss": 0.3153, "step": 14940 }, { "epoch": 1.768393659806009, "grad_norm": 4.024479866027832, "learning_rate": 4.998156571702178e-06, "loss": 0.3484, "step": 14950 }, { "epoch": 1.7695765318192571, "grad_norm": 2.4754703044891357, "learning_rate": 4.997438352884846e-06, "loss": 0.3691, "step": 14960 }, { "epoch": 1.7707594038325052, "grad_norm": 2.749239444732666, "learning_rate": 4.996720134067512e-06, "loss": 0.3407, "step": 14970 }, { "epoch": 1.7719422758457535, "grad_norm": 1.894676923751831, "learning_rate": 4.99600191525018e-06, "loss": 0.312, "step": 14980 }, { "epoch": 1.7731251478590018, "grad_norm": 3.5794310569763184, "learning_rate": 4.995283696432847e-06, "loss": 0.3488, "step": 14990 }, { "epoch": 1.7743080198722498, "grad_norm": 3.6035690307617188, "learning_rate": 4.994565477615514e-06, "loss": 0.3257, "step": 15000 }, { "epoch": 1.775490891885498, "grad_norm": 1.712687373161316, "learning_rate": 4.993847258798181e-06, "loss": 0.2978, "step": 15010 }, { "epoch": 1.7766737638987462, "grad_norm": 2.74722957611084, "learning_rate": 4.993129039980848e-06, "loss": 0.3155, "step": 15020 }, { "epoch": 1.7778566359119943, "grad_norm": 2.21646785736084, "learning_rate": 4.992410821163515e-06, "loss": 0.3277, "step": 15030 }, { "epoch": 1.7790395079252423, "grad_norm": 2.4206202030181885, "learning_rate": 4.9916926023461816e-06, "loss": 0.2708, "step": 15040 }, { "epoch": 1.7802223799384906, "grad_norm": 3.1641170978546143, "learning_rate": 4.9909743835288485e-06, "loss": 0.3207, "step": 15050 }, { "epoch": 1.781405251951739, "grad_norm": 2.7150776386260986, "learning_rate": 4.990256164711515e-06, "loss": 0.3354, "step": 15060 }, { "epoch": 1.782588123964987, "grad_norm": 2.523766040802002, "learning_rate": 4.989537945894182e-06, "loss": 0.3017, "step": 15070 }, { "epoch": 1.783770995978235, "grad_norm": 2.2823326587677, "learning_rate": 4.988819727076849e-06, "loss": 0.3233, "step": 15080 }, { "epoch": 1.7849538679914834, "grad_norm": 4.041593074798584, "learning_rate": 4.988101508259516e-06, "loss": 0.3148, "step": 15090 }, { "epoch": 1.7861367400047314, "grad_norm": 2.364863395690918, "learning_rate": 4.987383289442184e-06, "loss": 0.2999, "step": 15100 }, { "epoch": 1.7873196120179795, "grad_norm": 4.033505916595459, "learning_rate": 4.98666507062485e-06, "loss": 0.3229, "step": 15110 }, { "epoch": 1.7885024840312278, "grad_norm": 2.4584884643554688, "learning_rate": 4.985946851807518e-06, "loss": 0.3095, "step": 15120 }, { "epoch": 1.789685356044476, "grad_norm": 1.9042985439300537, "learning_rate": 4.985228632990184e-06, "loss": 0.2744, "step": 15130 }, { "epoch": 1.7908682280577242, "grad_norm": 3.170943021774292, "learning_rate": 4.984510414172852e-06, "loss": 0.3133, "step": 15140 }, { "epoch": 1.7920511000709722, "grad_norm": 2.462503671646118, "learning_rate": 4.983792195355518e-06, "loss": 0.3562, "step": 15150 }, { "epoch": 1.7932339720842205, "grad_norm": 2.148439407348633, "learning_rate": 4.9830739765381855e-06, "loss": 0.3529, "step": 15160 }, { "epoch": 1.7944168440974686, "grad_norm": 2.2401418685913086, "learning_rate": 4.982355757720852e-06, "loss": 0.3314, "step": 15170 }, { "epoch": 1.7955997161107167, "grad_norm": 2.526993989944458, "learning_rate": 4.981637538903519e-06, "loss": 0.3091, "step": 15180 }, { "epoch": 1.796782588123965, "grad_norm": 4.488979816436768, "learning_rate": 4.980919320086186e-06, "loss": 0.3199, "step": 15190 }, { "epoch": 1.7979654601372133, "grad_norm": 2.7945244312286377, "learning_rate": 4.980201101268853e-06, "loss": 0.3415, "step": 15200 }, { "epoch": 1.7991483321504613, "grad_norm": 2.6339950561523438, "learning_rate": 4.979482882451521e-06, "loss": 0.3692, "step": 15210 }, { "epoch": 1.8003312041637094, "grad_norm": 3.3200294971466064, "learning_rate": 4.978764663634187e-06, "loss": 0.3222, "step": 15220 }, { "epoch": 1.8015140761769577, "grad_norm": 2.7311697006225586, "learning_rate": 4.978046444816855e-06, "loss": 0.3199, "step": 15230 }, { "epoch": 1.802696948190206, "grad_norm": 2.4543228149414062, "learning_rate": 4.977328225999521e-06, "loss": 0.2966, "step": 15240 }, { "epoch": 1.8038798202034538, "grad_norm": 2.3450186252593994, "learning_rate": 4.976610007182189e-06, "loss": 0.3515, "step": 15250 }, { "epoch": 1.8050626922167021, "grad_norm": 3.66555118560791, "learning_rate": 4.975891788364855e-06, "loss": 0.324, "step": 15260 }, { "epoch": 1.8062455642299504, "grad_norm": 2.4101505279541016, "learning_rate": 4.9751735695475225e-06, "loss": 0.2979, "step": 15270 }, { "epoch": 1.8074284362431985, "grad_norm": 2.7355422973632812, "learning_rate": 4.974455350730189e-06, "loss": 0.3147, "step": 15280 }, { "epoch": 1.8086113082564466, "grad_norm": 3.208298444747925, "learning_rate": 4.973737131912856e-06, "loss": 0.3688, "step": 15290 }, { "epoch": 1.8097941802696949, "grad_norm": 2.7905843257904053, "learning_rate": 4.973018913095523e-06, "loss": 0.3597, "step": 15300 }, { "epoch": 1.8109770522829431, "grad_norm": 2.308422565460205, "learning_rate": 4.97230069427819e-06, "loss": 0.3251, "step": 15310 }, { "epoch": 1.812159924296191, "grad_norm": 3.4839212894439697, "learning_rate": 4.971582475460857e-06, "loss": 0.3373, "step": 15320 }, { "epoch": 1.8133427963094393, "grad_norm": 2.911137819290161, "learning_rate": 4.970864256643524e-06, "loss": 0.3159, "step": 15330 }, { "epoch": 1.8145256683226876, "grad_norm": 2.1877129077911377, "learning_rate": 4.970146037826191e-06, "loss": 0.2728, "step": 15340 }, { "epoch": 1.8157085403359357, "grad_norm": 2.5772745609283447, "learning_rate": 4.969427819008858e-06, "loss": 0.295, "step": 15350 }, { "epoch": 1.8168914123491837, "grad_norm": 2.590778112411499, "learning_rate": 4.968709600191525e-06, "loss": 0.3209, "step": 15360 }, { "epoch": 1.818074284362432, "grad_norm": 2.6555583477020264, "learning_rate": 4.9679913813741926e-06, "loss": 0.294, "step": 15370 }, { "epoch": 1.8192571563756803, "grad_norm": 2.4802496433258057, "learning_rate": 4.967273162556859e-06, "loss": 0.3685, "step": 15380 }, { "epoch": 1.8204400283889282, "grad_norm": 2.965207815170288, "learning_rate": 4.966554943739526e-06, "loss": 0.3094, "step": 15390 }, { "epoch": 1.8216229004021764, "grad_norm": 2.8850719928741455, "learning_rate": 4.9658367249221925e-06, "loss": 0.2854, "step": 15400 }, { "epoch": 1.8228057724154247, "grad_norm": 2.8029391765594482, "learning_rate": 4.96511850610486e-06, "loss": 0.3411, "step": 15410 }, { "epoch": 1.8239886444286728, "grad_norm": 2.1750428676605225, "learning_rate": 4.964400287287527e-06, "loss": 0.3108, "step": 15420 }, { "epoch": 1.8251715164419209, "grad_norm": 1.994527816772461, "learning_rate": 4.963682068470194e-06, "loss": 0.3013, "step": 15430 }, { "epoch": 1.8263543884551692, "grad_norm": 2.478555917739868, "learning_rate": 4.962963849652861e-06, "loss": 0.2966, "step": 15440 }, { "epoch": 1.8275372604684175, "grad_norm": 2.7759952545166016, "learning_rate": 4.962245630835528e-06, "loss": 0.3379, "step": 15450 }, { "epoch": 1.8287201324816655, "grad_norm": 2.4554290771484375, "learning_rate": 4.961527412018195e-06, "loss": 0.2662, "step": 15460 }, { "epoch": 1.8299030044949136, "grad_norm": 2.1369402408599854, "learning_rate": 4.960809193200862e-06, "loss": 0.2674, "step": 15470 }, { "epoch": 1.831085876508162, "grad_norm": 3.9057397842407227, "learning_rate": 4.9600909743835296e-06, "loss": 0.3578, "step": 15480 }, { "epoch": 1.83226874852141, "grad_norm": 2.922919988632202, "learning_rate": 4.959372755566196e-06, "loss": 0.3491, "step": 15490 }, { "epoch": 1.833451620534658, "grad_norm": 3.4353342056274414, "learning_rate": 4.958654536748863e-06, "loss": 0.3066, "step": 15500 }, { "epoch": 1.8346344925479063, "grad_norm": 3.313187837600708, "learning_rate": 4.9579363179315295e-06, "loss": 0.3208, "step": 15510 }, { "epoch": 1.8358173645611546, "grad_norm": 3.00732159614563, "learning_rate": 4.957218099114197e-06, "loss": 0.3246, "step": 15520 }, { "epoch": 1.8370002365744027, "grad_norm": 1.9314966201782227, "learning_rate": 4.956499880296863e-06, "loss": 0.2884, "step": 15530 }, { "epoch": 1.8381831085876508, "grad_norm": 1.8576322793960571, "learning_rate": 4.955781661479531e-06, "loss": 0.3116, "step": 15540 }, { "epoch": 1.839365980600899, "grad_norm": 2.75687313079834, "learning_rate": 4.955063442662198e-06, "loss": 0.3189, "step": 15550 }, { "epoch": 1.8405488526141471, "grad_norm": 2.6360726356506348, "learning_rate": 4.954345223844865e-06, "loss": 0.3422, "step": 15560 }, { "epoch": 1.8417317246273952, "grad_norm": 3.248277425765991, "learning_rate": 4.953627005027532e-06, "loss": 0.3313, "step": 15570 }, { "epoch": 1.8429145966406435, "grad_norm": 2.726261615753174, "learning_rate": 4.952908786210199e-06, "loss": 0.3268, "step": 15580 }, { "epoch": 1.8440974686538918, "grad_norm": 2.659207344055176, "learning_rate": 4.952190567392866e-06, "loss": 0.302, "step": 15590 }, { "epoch": 1.8452803406671399, "grad_norm": 2.1712844371795654, "learning_rate": 4.951472348575533e-06, "loss": 0.3303, "step": 15600 }, { "epoch": 1.846463212680388, "grad_norm": 2.3552823066711426, "learning_rate": 4.9507541297581996e-06, "loss": 0.3412, "step": 15610 }, { "epoch": 1.8476460846936362, "grad_norm": 2.0031118392944336, "learning_rate": 4.9500359109408665e-06, "loss": 0.3161, "step": 15620 }, { "epoch": 1.8488289567068843, "grad_norm": 2.548931360244751, "learning_rate": 4.949317692123533e-06, "loss": 0.3335, "step": 15630 }, { "epoch": 1.8500118287201324, "grad_norm": 3.401005983352661, "learning_rate": 4.948599473306201e-06, "loss": 0.3518, "step": 15640 }, { "epoch": 1.8511947007333807, "grad_norm": 2.6640453338623047, "learning_rate": 4.947881254488867e-06, "loss": 0.3158, "step": 15650 }, { "epoch": 1.852377572746629, "grad_norm": 1.7123150825500488, "learning_rate": 4.947163035671535e-06, "loss": 0.297, "step": 15660 }, { "epoch": 1.853560444759877, "grad_norm": 2.112438201904297, "learning_rate": 4.946444816854202e-06, "loss": 0.3326, "step": 15670 }, { "epoch": 1.854743316773125, "grad_norm": 2.7215986251831055, "learning_rate": 4.945726598036869e-06, "loss": 0.3217, "step": 15680 }, { "epoch": 1.8559261887863734, "grad_norm": 3.511922597885132, "learning_rate": 4.945008379219536e-06, "loss": 0.3307, "step": 15690 }, { "epoch": 1.8571090607996215, "grad_norm": 2.523838996887207, "learning_rate": 4.944290160402203e-06, "loss": 0.3054, "step": 15700 }, { "epoch": 1.8582919328128695, "grad_norm": 3.113474130630493, "learning_rate": 4.94357194158487e-06, "loss": 0.3372, "step": 15710 }, { "epoch": 1.8594748048261178, "grad_norm": 2.440829277038574, "learning_rate": 4.9428537227675366e-06, "loss": 0.3174, "step": 15720 }, { "epoch": 1.8606576768393661, "grad_norm": 2.771348714828491, "learning_rate": 4.9421355039502035e-06, "loss": 0.325, "step": 15730 }, { "epoch": 1.8618405488526142, "grad_norm": 2.479052782058716, "learning_rate": 4.94141728513287e-06, "loss": 0.3264, "step": 15740 }, { "epoch": 1.8630234208658623, "grad_norm": 3.5350215435028076, "learning_rate": 4.940699066315538e-06, "loss": 0.2986, "step": 15750 }, { "epoch": 1.8642062928791105, "grad_norm": 2.6968698501586914, "learning_rate": 4.939980847498204e-06, "loss": 0.2951, "step": 15760 }, { "epoch": 1.8653891648923586, "grad_norm": 4.0666890144348145, "learning_rate": 4.939262628680872e-06, "loss": 0.2839, "step": 15770 }, { "epoch": 1.8665720369056067, "grad_norm": 2.678946018218994, "learning_rate": 4.938544409863538e-06, "loss": 0.3036, "step": 15780 }, { "epoch": 1.867754908918855, "grad_norm": 3.919001340866089, "learning_rate": 4.937826191046206e-06, "loss": 0.3149, "step": 15790 }, { "epoch": 1.8689377809321033, "grad_norm": 3.258880138397217, "learning_rate": 4.937107972228872e-06, "loss": 0.295, "step": 15800 }, { "epoch": 1.8701206529453513, "grad_norm": 2.228896141052246, "learning_rate": 4.93638975341154e-06, "loss": 0.3111, "step": 15810 }, { "epoch": 1.8713035249585994, "grad_norm": 4.7691144943237305, "learning_rate": 4.935671534594207e-06, "loss": 0.3496, "step": 15820 }, { "epoch": 1.8724863969718477, "grad_norm": 2.3389461040496826, "learning_rate": 4.9349533157768736e-06, "loss": 0.3096, "step": 15830 }, { "epoch": 1.8736692689850958, "grad_norm": 2.6259357929229736, "learning_rate": 4.9342350969595405e-06, "loss": 0.3102, "step": 15840 }, { "epoch": 1.8748521409983439, "grad_norm": 2.053807258605957, "learning_rate": 4.933516878142207e-06, "loss": 0.3245, "step": 15850 }, { "epoch": 1.8760350130115921, "grad_norm": 2.1412627696990967, "learning_rate": 4.932798659324874e-06, "loss": 0.3178, "step": 15860 }, { "epoch": 1.8772178850248404, "grad_norm": 2.4492874145507812, "learning_rate": 4.932080440507541e-06, "loss": 0.3419, "step": 15870 }, { "epoch": 1.8784007570380885, "grad_norm": 2.0245025157928467, "learning_rate": 4.931362221690208e-06, "loss": 0.2854, "step": 15880 }, { "epoch": 1.8795836290513366, "grad_norm": 2.60402774810791, "learning_rate": 4.930644002872875e-06, "loss": 0.3392, "step": 15890 }, { "epoch": 1.8807665010645849, "grad_norm": 2.6295523643493652, "learning_rate": 4.929925784055542e-06, "loss": 0.3452, "step": 15900 }, { "epoch": 1.881949373077833, "grad_norm": 2.099587917327881, "learning_rate": 4.92920756523821e-06, "loss": 0.3254, "step": 15910 }, { "epoch": 1.883132245091081, "grad_norm": 2.967813491821289, "learning_rate": 4.928489346420877e-06, "loss": 0.294, "step": 15920 }, { "epoch": 1.8843151171043293, "grad_norm": 2.3281209468841553, "learning_rate": 4.927771127603544e-06, "loss": 0.2939, "step": 15930 }, { "epoch": 1.8854979891175776, "grad_norm": 2.6038031578063965, "learning_rate": 4.9270529087862106e-06, "loss": 0.2921, "step": 15940 }, { "epoch": 1.8866808611308257, "grad_norm": 2.3744089603424072, "learning_rate": 4.9263346899688775e-06, "loss": 0.3583, "step": 15950 }, { "epoch": 1.8878637331440737, "grad_norm": 2.654546022415161, "learning_rate": 4.925616471151544e-06, "loss": 0.2888, "step": 15960 }, { "epoch": 1.889046605157322, "grad_norm": 2.916886329650879, "learning_rate": 4.924898252334211e-06, "loss": 0.3246, "step": 15970 }, { "epoch": 1.89022947717057, "grad_norm": 3.3064868450164795, "learning_rate": 4.924180033516878e-06, "loss": 0.3232, "step": 15980 }, { "epoch": 1.8914123491838182, "grad_norm": 3.14512038230896, "learning_rate": 4.923461814699545e-06, "loss": 0.3583, "step": 15990 }, { "epoch": 1.8925952211970665, "grad_norm": 2.039816379547119, "learning_rate": 4.922743595882212e-06, "loss": 0.3007, "step": 16000 }, { "epoch": 1.8937780932103148, "grad_norm": 2.053156614303589, "learning_rate": 4.922025377064879e-06, "loss": 0.3105, "step": 16010 }, { "epoch": 1.8949609652235628, "grad_norm": 3.142106771469116, "learning_rate": 4.921307158247547e-06, "loss": 0.2969, "step": 16020 }, { "epoch": 1.896143837236811, "grad_norm": 3.165627956390381, "learning_rate": 4.920588939430213e-06, "loss": 0.3458, "step": 16030 }, { "epoch": 1.8973267092500592, "grad_norm": 3.0727994441986084, "learning_rate": 4.919870720612881e-06, "loss": 0.346, "step": 16040 }, { "epoch": 1.8985095812633073, "grad_norm": 2.516692876815796, "learning_rate": 4.919152501795547e-06, "loss": 0.3155, "step": 16050 }, { "epoch": 1.8996924532765553, "grad_norm": 2.974064588546753, "learning_rate": 4.9184342829782145e-06, "loss": 0.3093, "step": 16060 }, { "epoch": 1.9008753252898036, "grad_norm": 3.889378309249878, "learning_rate": 4.9177160641608805e-06, "loss": 0.3042, "step": 16070 }, { "epoch": 1.902058197303052, "grad_norm": 3.5090787410736084, "learning_rate": 4.916997845343548e-06, "loss": 0.2987, "step": 16080 }, { "epoch": 1.9032410693163, "grad_norm": 2.5992817878723145, "learning_rate": 4.916279626526215e-06, "loss": 0.3221, "step": 16090 }, { "epoch": 1.904423941329548, "grad_norm": 2.4082863330841064, "learning_rate": 4.915561407708882e-06, "loss": 0.32, "step": 16100 }, { "epoch": 1.9056068133427964, "grad_norm": 2.10164737701416, "learning_rate": 4.914843188891549e-06, "loss": 0.3309, "step": 16110 }, { "epoch": 1.9067896853560444, "grad_norm": 3.4077935218811035, "learning_rate": 4.914124970074216e-06, "loss": 0.2901, "step": 16120 }, { "epoch": 1.9079725573692925, "grad_norm": 3.679767608642578, "learning_rate": 4.913406751256883e-06, "loss": 0.2932, "step": 16130 }, { "epoch": 1.9091554293825408, "grad_norm": 2.589367389678955, "learning_rate": 4.91268853243955e-06, "loss": 0.3221, "step": 16140 }, { "epoch": 1.910338301395789, "grad_norm": 2.4946177005767822, "learning_rate": 4.911970313622217e-06, "loss": 0.3345, "step": 16150 }, { "epoch": 1.9115211734090372, "grad_norm": 2.770401954650879, "learning_rate": 4.911252094804884e-06, "loss": 0.3289, "step": 16160 }, { "epoch": 1.9127040454222852, "grad_norm": 1.7573366165161133, "learning_rate": 4.9105338759875515e-06, "loss": 0.3577, "step": 16170 }, { "epoch": 1.9138869174355335, "grad_norm": 2.5026323795318604, "learning_rate": 4.9098156571702175e-06, "loss": 0.2945, "step": 16180 }, { "epoch": 1.9150697894487816, "grad_norm": 2.5507638454437256, "learning_rate": 4.909097438352885e-06, "loss": 0.3074, "step": 16190 }, { "epoch": 1.9162526614620297, "grad_norm": 2.670314073562622, "learning_rate": 4.908379219535552e-06, "loss": 0.364, "step": 16200 }, { "epoch": 1.917435533475278, "grad_norm": 2.6045992374420166, "learning_rate": 4.907661000718219e-06, "loss": 0.2892, "step": 16210 }, { "epoch": 1.9186184054885262, "grad_norm": 3.1325511932373047, "learning_rate": 4.906942781900886e-06, "loss": 0.3123, "step": 16220 }, { "epoch": 1.9198012775017743, "grad_norm": 3.4678025245666504, "learning_rate": 4.906224563083553e-06, "loss": 0.3233, "step": 16230 }, { "epoch": 1.9209841495150224, "grad_norm": 2.2840728759765625, "learning_rate": 4.90550634426622e-06, "loss": 0.3126, "step": 16240 }, { "epoch": 1.9221670215282707, "grad_norm": 2.3752148151397705, "learning_rate": 4.904788125448887e-06, "loss": 0.3123, "step": 16250 }, { "epoch": 1.923349893541519, "grad_norm": 3.966721296310425, "learning_rate": 4.904069906631554e-06, "loss": 0.3001, "step": 16260 }, { "epoch": 1.9245327655547668, "grad_norm": 3.2313265800476074, "learning_rate": 4.903351687814221e-06, "loss": 0.3092, "step": 16270 }, { "epoch": 1.9257156375680151, "grad_norm": 3.653590679168701, "learning_rate": 4.902633468996888e-06, "loss": 0.3049, "step": 16280 }, { "epoch": 1.9268985095812634, "grad_norm": 2.0597825050354004, "learning_rate": 4.901915250179555e-06, "loss": 0.2974, "step": 16290 }, { "epoch": 1.9280813815945115, "grad_norm": 4.069886207580566, "learning_rate": 4.9011970313622215e-06, "loss": 0.3612, "step": 16300 }, { "epoch": 1.9292642536077595, "grad_norm": 2.4084854125976562, "learning_rate": 4.900478812544889e-06, "loss": 0.3309, "step": 16310 }, { "epoch": 1.9304471256210078, "grad_norm": 2.6260111331939697, "learning_rate": 4.899760593727555e-06, "loss": 0.3455, "step": 16320 }, { "epoch": 1.9316299976342561, "grad_norm": 2.047407865524292, "learning_rate": 4.899042374910223e-06, "loss": 0.3119, "step": 16330 }, { "epoch": 1.932812869647504, "grad_norm": 3.172539472579956, "learning_rate": 4.898324156092889e-06, "loss": 0.3302, "step": 16340 }, { "epoch": 1.9339957416607523, "grad_norm": 2.5367207527160645, "learning_rate": 4.897605937275557e-06, "loss": 0.3523, "step": 16350 }, { "epoch": 1.9351786136740006, "grad_norm": 2.7896671295166016, "learning_rate": 4.896887718458224e-06, "loss": 0.2973, "step": 16360 }, { "epoch": 1.9363614856872486, "grad_norm": 2.9528543949127197, "learning_rate": 4.896169499640891e-06, "loss": 0.3202, "step": 16370 }, { "epoch": 1.9375443577004967, "grad_norm": 2.150696277618408, "learning_rate": 4.895451280823558e-06, "loss": 0.3064, "step": 16380 }, { "epoch": 1.938727229713745, "grad_norm": 2.2622129917144775, "learning_rate": 4.894733062006225e-06, "loss": 0.2953, "step": 16390 }, { "epoch": 1.9399101017269933, "grad_norm": 2.1877410411834717, "learning_rate": 4.8940148431888915e-06, "loss": 0.3107, "step": 16400 }, { "epoch": 1.9410929737402411, "grad_norm": 4.725532531738281, "learning_rate": 4.8932966243715585e-06, "loss": 0.2905, "step": 16410 }, { "epoch": 1.9422758457534894, "grad_norm": 3.707885980606079, "learning_rate": 4.892578405554226e-06, "loss": 0.2985, "step": 16420 }, { "epoch": 1.9434587177667377, "grad_norm": 2.2911036014556885, "learning_rate": 4.891860186736892e-06, "loss": 0.2885, "step": 16430 }, { "epoch": 1.9446415897799858, "grad_norm": 2.9711685180664062, "learning_rate": 4.89114196791956e-06, "loss": 0.3159, "step": 16440 }, { "epoch": 1.9458244617932339, "grad_norm": 3.330817461013794, "learning_rate": 4.890423749102226e-06, "loss": 0.3232, "step": 16450 }, { "epoch": 1.9470073338064822, "grad_norm": 2.778247117996216, "learning_rate": 4.889705530284894e-06, "loss": 0.3191, "step": 16460 }, { "epoch": 1.9481902058197305, "grad_norm": 3.1285018920898438, "learning_rate": 4.888987311467561e-06, "loss": 0.3045, "step": 16470 }, { "epoch": 1.9493730778329783, "grad_norm": 2.4701411724090576, "learning_rate": 4.888269092650228e-06, "loss": 0.3315, "step": 16480 }, { "epoch": 1.9505559498462266, "grad_norm": 2.2952330112457275, "learning_rate": 4.887550873832895e-06, "loss": 0.2978, "step": 16490 }, { "epoch": 1.951738821859475, "grad_norm": 2.485168218612671, "learning_rate": 4.886832655015562e-06, "loss": 0.327, "step": 16500 }, { "epoch": 1.952921693872723, "grad_norm": 2.294721841812134, "learning_rate": 4.8861144361982285e-06, "loss": 0.3108, "step": 16510 }, { "epoch": 1.954104565885971, "grad_norm": 2.7091660499572754, "learning_rate": 4.8853962173808955e-06, "loss": 0.3095, "step": 16520 }, { "epoch": 1.9552874378992193, "grad_norm": 3.023503541946411, "learning_rate": 4.884677998563562e-06, "loss": 0.329, "step": 16530 }, { "epoch": 1.9564703099124676, "grad_norm": 2.9048104286193848, "learning_rate": 4.883959779746229e-06, "loss": 0.3134, "step": 16540 }, { "epoch": 1.9576531819257157, "grad_norm": 2.192518949508667, "learning_rate": 4.883241560928896e-06, "loss": 0.3424, "step": 16550 }, { "epoch": 1.9588360539389638, "grad_norm": 2.448760986328125, "learning_rate": 4.882523342111564e-06, "loss": 0.3148, "step": 16560 }, { "epoch": 1.960018925952212, "grad_norm": 2.6753344535827637, "learning_rate": 4.88180512329423e-06, "loss": 0.3368, "step": 16570 }, { "epoch": 1.9612017979654601, "grad_norm": 2.6930882930755615, "learning_rate": 4.881086904476898e-06, "loss": 0.3326, "step": 16580 }, { "epoch": 1.9623846699787082, "grad_norm": 3.712798833847046, "learning_rate": 4.880368685659564e-06, "loss": 0.3191, "step": 16590 }, { "epoch": 1.9635675419919565, "grad_norm": 2.318413496017456, "learning_rate": 4.879650466842232e-06, "loss": 0.313, "step": 16600 }, { "epoch": 1.9647504140052048, "grad_norm": 3.4627861976623535, "learning_rate": 4.878932248024898e-06, "loss": 0.3249, "step": 16610 }, { "epoch": 1.9659332860184529, "grad_norm": 2.2681097984313965, "learning_rate": 4.8782140292075655e-06, "loss": 0.3827, "step": 16620 }, { "epoch": 1.967116158031701, "grad_norm": 2.2495839595794678, "learning_rate": 4.8774958103902325e-06, "loss": 0.3035, "step": 16630 }, { "epoch": 1.9682990300449492, "grad_norm": 3.3230841159820557, "learning_rate": 4.876777591572899e-06, "loss": 0.2947, "step": 16640 }, { "epoch": 1.9694819020581973, "grad_norm": 3.0023653507232666, "learning_rate": 4.876059372755566e-06, "loss": 0.3282, "step": 16650 }, { "epoch": 1.9706647740714454, "grad_norm": 3.0990710258483887, "learning_rate": 4.875341153938233e-06, "loss": 0.3423, "step": 16660 }, { "epoch": 1.9718476460846937, "grad_norm": 4.0534586906433105, "learning_rate": 4.8746229351209e-06, "loss": 0.3292, "step": 16670 }, { "epoch": 1.973030518097942, "grad_norm": 2.5327095985412598, "learning_rate": 4.873904716303567e-06, "loss": 0.3534, "step": 16680 }, { "epoch": 1.97421339011119, "grad_norm": 2.820181131362915, "learning_rate": 4.873186497486235e-06, "loss": 0.3455, "step": 16690 }, { "epoch": 1.975396262124438, "grad_norm": 2.5984952449798584, "learning_rate": 4.872468278668901e-06, "loss": 0.3098, "step": 16700 }, { "epoch": 1.9765791341376864, "grad_norm": 2.8330180644989014, "learning_rate": 4.871750059851569e-06, "loss": 0.2865, "step": 16710 }, { "epoch": 1.9777620061509344, "grad_norm": 3.222081422805786, "learning_rate": 4.871031841034235e-06, "loss": 0.3366, "step": 16720 }, { "epoch": 1.9789448781641825, "grad_norm": 2.0557262897491455, "learning_rate": 4.8703136222169025e-06, "loss": 0.298, "step": 16730 }, { "epoch": 1.9801277501774308, "grad_norm": 2.3653440475463867, "learning_rate": 4.8695954033995695e-06, "loss": 0.3525, "step": 16740 }, { "epoch": 1.981310622190679, "grad_norm": 1.8951219320297241, "learning_rate": 4.868877184582236e-06, "loss": 0.2949, "step": 16750 }, { "epoch": 1.9824934942039272, "grad_norm": 3.2149484157562256, "learning_rate": 4.868158965764903e-06, "loss": 0.2989, "step": 16760 }, { "epoch": 1.9836763662171752, "grad_norm": 2.8718087673187256, "learning_rate": 4.86744074694757e-06, "loss": 0.3223, "step": 16770 }, { "epoch": 1.9848592382304235, "grad_norm": 2.507594585418701, "learning_rate": 4.866722528130237e-06, "loss": 0.2927, "step": 16780 }, { "epoch": 1.9860421102436716, "grad_norm": 2.338623523712158, "learning_rate": 4.866004309312904e-06, "loss": 0.3463, "step": 16790 }, { "epoch": 1.9872249822569197, "grad_norm": 1.9228732585906982, "learning_rate": 4.865286090495571e-06, "loss": 0.3451, "step": 16800 }, { "epoch": 1.988407854270168, "grad_norm": 1.9833261966705322, "learning_rate": 4.864567871678238e-06, "loss": 0.3127, "step": 16810 }, { "epoch": 1.9895907262834163, "grad_norm": 2.5209105014801025, "learning_rate": 4.863849652860905e-06, "loss": 0.3199, "step": 16820 }, { "epoch": 1.9907735982966643, "grad_norm": 3.8026509284973145, "learning_rate": 4.863131434043573e-06, "loss": 0.3096, "step": 16830 }, { "epoch": 1.9919564703099124, "grad_norm": 1.6769155263900757, "learning_rate": 4.862413215226239e-06, "loss": 0.3013, "step": 16840 }, { "epoch": 1.9931393423231607, "grad_norm": 2.8461453914642334, "learning_rate": 4.8616949964089065e-06, "loss": 0.2846, "step": 16850 }, { "epoch": 1.9943222143364088, "grad_norm": 2.2293806076049805, "learning_rate": 4.8609767775915725e-06, "loss": 0.3239, "step": 16860 }, { "epoch": 1.9955050863496568, "grad_norm": 3.478363275527954, "learning_rate": 4.86025855877424e-06, "loss": 0.3146, "step": 16870 }, { "epoch": 1.9966879583629051, "grad_norm": 2.097290277481079, "learning_rate": 4.859540339956906e-06, "loss": 0.3078, "step": 16880 }, { "epoch": 1.9978708303761534, "grad_norm": 3.2661945819854736, "learning_rate": 4.858822121139574e-06, "loss": 0.2909, "step": 16890 }, { "epoch": 1.9990537023894015, "grad_norm": 2.188856840133667, "learning_rate": 4.858103902322241e-06, "loss": 0.354, "step": 16900 }, { "epoch": 2.0002365744026496, "grad_norm": 2.378852128982544, "learning_rate": 4.857385683504908e-06, "loss": 0.3441, "step": 16910 }, { "epoch": 2.000473148805299, "eval_accuracy": 0.8596999035166517, "eval_loss": 0.32458868622779846, "eval_runtime": 77.9165, "eval_safe_aucpr": 0.9144648316642549, "eval_safe_f1": 0.8435192400460129, "eval_safe_fpr": 0.13461480972168205, "eval_safe_precision": 0.8346612814393244, "eval_safe_recall": 0.8525672279938492, "eval_samples_per_second": 771.518, "eval_steps_per_second": 12.064, "eval_unsafe_aucpr": 0.9524256073577522, "eval_unsafe_f1": 0.8728478818031057, "eval_unsafe_fpr": 0.1474327720061503, "eval_unsafe_precision": 0.8804404026886462, "eval_unsafe_recall": 0.8653851902783175, "step": 16912 }, { "epoch": 2.001419446415898, "grad_norm": 2.5568602085113525, "learning_rate": 4.856667464687575e-06, "loss": 0.3011, "step": 16920 }, { "epoch": 2.002602318429146, "grad_norm": 2.720656156539917, "learning_rate": 4.855949245870242e-06, "loss": 0.3187, "step": 16930 }, { "epoch": 2.003785190442394, "grad_norm": 2.677154779434204, "learning_rate": 4.85523102705291e-06, "loss": 0.2824, "step": 16940 }, { "epoch": 2.0049680624556423, "grad_norm": 2.2328920364379883, "learning_rate": 4.854512808235576e-06, "loss": 0.3051, "step": 16950 }, { "epoch": 2.0061509344688906, "grad_norm": 2.8281667232513428, "learning_rate": 4.8537945894182435e-06, "loss": 0.2961, "step": 16960 }, { "epoch": 2.0073338064821384, "grad_norm": 2.5169615745544434, "learning_rate": 4.8530763706009095e-06, "loss": 0.2839, "step": 16970 }, { "epoch": 2.0085166784953867, "grad_norm": 2.4663641452789307, "learning_rate": 4.852358151783577e-06, "loss": 0.2746, "step": 16980 }, { "epoch": 2.009699550508635, "grad_norm": 2.8655757904052734, "learning_rate": 4.851639932966243e-06, "loss": 0.2983, "step": 16990 }, { "epoch": 2.0108824225218833, "grad_norm": 2.2925379276275635, "learning_rate": 4.850921714148911e-06, "loss": 0.2793, "step": 17000 }, { "epoch": 2.012065294535131, "grad_norm": 2.860703945159912, "learning_rate": 4.850203495331578e-06, "loss": 0.2945, "step": 17010 }, { "epoch": 2.0132481665483795, "grad_norm": 1.9645798206329346, "learning_rate": 4.849485276514245e-06, "loss": 0.3051, "step": 17020 }, { "epoch": 2.0144310385616278, "grad_norm": 3.2021446228027344, "learning_rate": 4.848767057696912e-06, "loss": 0.3046, "step": 17030 }, { "epoch": 2.0156139105748756, "grad_norm": 2.800635576248169, "learning_rate": 4.848048838879579e-06, "loss": 0.2976, "step": 17040 }, { "epoch": 2.016796782588124, "grad_norm": 2.4295828342437744, "learning_rate": 4.847330620062246e-06, "loss": 0.2766, "step": 17050 }, { "epoch": 2.017979654601372, "grad_norm": 3.77390718460083, "learning_rate": 4.846612401244913e-06, "loss": 0.327, "step": 17060 }, { "epoch": 2.0191625266146205, "grad_norm": 2.4511208534240723, "learning_rate": 4.84589418242758e-06, "loss": 0.2611, "step": 17070 }, { "epoch": 2.0203453986278683, "grad_norm": 1.8200575113296509, "learning_rate": 4.8451759636102465e-06, "loss": 0.2896, "step": 17080 }, { "epoch": 2.0215282706411166, "grad_norm": 3.8342087268829346, "learning_rate": 4.8444577447929135e-06, "loss": 0.3108, "step": 17090 }, { "epoch": 2.022711142654365, "grad_norm": 2.991922616958618, "learning_rate": 4.84373952597558e-06, "loss": 0.2787, "step": 17100 }, { "epoch": 2.0238940146676128, "grad_norm": 2.300657033920288, "learning_rate": 4.843021307158247e-06, "loss": 0.2909, "step": 17110 }, { "epoch": 2.025076886680861, "grad_norm": 2.628596782684326, "learning_rate": 4.842303088340915e-06, "loss": 0.2577, "step": 17120 }, { "epoch": 2.0262597586941093, "grad_norm": 4.038423538208008, "learning_rate": 4.841584869523581e-06, "loss": 0.2979, "step": 17130 }, { "epoch": 2.0274426307073576, "grad_norm": 3.1706392765045166, "learning_rate": 4.840866650706249e-06, "loss": 0.2929, "step": 17140 }, { "epoch": 2.0286255027206055, "grad_norm": 3.052032232284546, "learning_rate": 4.840148431888915e-06, "loss": 0.3322, "step": 17150 }, { "epoch": 2.029808374733854, "grad_norm": 2.10490083694458, "learning_rate": 4.839430213071583e-06, "loss": 0.2729, "step": 17160 }, { "epoch": 2.030991246747102, "grad_norm": 3.376361608505249, "learning_rate": 4.83871199425425e-06, "loss": 0.2934, "step": 17170 }, { "epoch": 2.03217411876035, "grad_norm": 2.8784351348876953, "learning_rate": 4.837993775436917e-06, "loss": 0.3, "step": 17180 }, { "epoch": 2.033356990773598, "grad_norm": 2.5840699672698975, "learning_rate": 4.8372755566195835e-06, "loss": 0.2863, "step": 17190 }, { "epoch": 2.0345398627868465, "grad_norm": 3.836290121078491, "learning_rate": 4.8365573378022504e-06, "loss": 0.2774, "step": 17200 }, { "epoch": 2.035722734800095, "grad_norm": 2.947138547897339, "learning_rate": 4.835839118984918e-06, "loss": 0.3377, "step": 17210 }, { "epoch": 2.0369056068133427, "grad_norm": 3.6667375564575195, "learning_rate": 4.835120900167584e-06, "loss": 0.3329, "step": 17220 }, { "epoch": 2.038088478826591, "grad_norm": 2.7514986991882324, "learning_rate": 4.834402681350252e-06, "loss": 0.3048, "step": 17230 }, { "epoch": 2.0392713508398392, "grad_norm": 2.772559642791748, "learning_rate": 4.833684462532918e-06, "loss": 0.3044, "step": 17240 }, { "epoch": 2.040454222853087, "grad_norm": 3.929675340652466, "learning_rate": 4.832966243715586e-06, "loss": 0.3011, "step": 17250 }, { "epoch": 2.0416370948663354, "grad_norm": 3.0461666584014893, "learning_rate": 4.832248024898252e-06, "loss": 0.3615, "step": 17260 }, { "epoch": 2.0428199668795837, "grad_norm": 3.056796073913574, "learning_rate": 4.83152980608092e-06, "loss": 0.3246, "step": 17270 }, { "epoch": 2.044002838892832, "grad_norm": 3.44435453414917, "learning_rate": 4.830811587263587e-06, "loss": 0.3283, "step": 17280 }, { "epoch": 2.04518571090608, "grad_norm": 2.164142370223999, "learning_rate": 4.830093368446254e-06, "loss": 0.2769, "step": 17290 }, { "epoch": 2.046368582919328, "grad_norm": 2.2050135135650635, "learning_rate": 4.8293751496289205e-06, "loss": 0.2849, "step": 17300 }, { "epoch": 2.0475514549325764, "grad_norm": 1.9663429260253906, "learning_rate": 4.8286569308115874e-06, "loss": 0.3121, "step": 17310 }, { "epoch": 2.0487343269458242, "grad_norm": 1.7338041067123413, "learning_rate": 4.827938711994254e-06, "loss": 0.3041, "step": 17320 }, { "epoch": 2.0499171989590725, "grad_norm": 2.510803699493408, "learning_rate": 4.827220493176921e-06, "loss": 0.3124, "step": 17330 }, { "epoch": 2.051100070972321, "grad_norm": 2.887864112854004, "learning_rate": 4.826502274359588e-06, "loss": 0.3033, "step": 17340 }, { "epoch": 2.052282942985569, "grad_norm": 2.86704158782959, "learning_rate": 4.825784055542255e-06, "loss": 0.2912, "step": 17350 }, { "epoch": 2.053465814998817, "grad_norm": 3.1380202770233154, "learning_rate": 4.825065836724922e-06, "loss": 0.3342, "step": 17360 }, { "epoch": 2.0546486870120653, "grad_norm": 2.568093776702881, "learning_rate": 4.824347617907589e-06, "loss": 0.3281, "step": 17370 }, { "epoch": 2.0558315590253136, "grad_norm": 2.2394888401031494, "learning_rate": 4.823629399090256e-06, "loss": 0.3013, "step": 17380 }, { "epoch": 2.057014431038562, "grad_norm": 2.0911223888397217, "learning_rate": 4.822911180272924e-06, "loss": 0.3067, "step": 17390 }, { "epoch": 2.0581973030518097, "grad_norm": 2.685105800628662, "learning_rate": 4.82219296145559e-06, "loss": 0.328, "step": 17400 }, { "epoch": 2.059380175065058, "grad_norm": 2.4722275733947754, "learning_rate": 4.8214747426382575e-06, "loss": 0.2925, "step": 17410 }, { "epoch": 2.0605630470783063, "grad_norm": 3.348921537399292, "learning_rate": 4.820756523820924e-06, "loss": 0.3009, "step": 17420 }, { "epoch": 2.061745919091554, "grad_norm": 2.4126553535461426, "learning_rate": 4.820038305003591e-06, "loss": 0.3329, "step": 17430 }, { "epoch": 2.0629287911048024, "grad_norm": 3.171835422515869, "learning_rate": 4.819320086186258e-06, "loss": 0.2819, "step": 17440 }, { "epoch": 2.0641116631180507, "grad_norm": 3.453155279159546, "learning_rate": 4.818601867368925e-06, "loss": 0.3032, "step": 17450 }, { "epoch": 2.065294535131299, "grad_norm": 3.60829758644104, "learning_rate": 4.817883648551592e-06, "loss": 0.311, "step": 17460 }, { "epoch": 2.066477407144547, "grad_norm": 2.449158191680908, "learning_rate": 4.817165429734259e-06, "loss": 0.2647, "step": 17470 }, { "epoch": 2.067660279157795, "grad_norm": 4.3593220710754395, "learning_rate": 4.816447210916927e-06, "loss": 0.3355, "step": 17480 }, { "epoch": 2.0688431511710434, "grad_norm": 3.0162782669067383, "learning_rate": 4.815728992099593e-06, "loss": 0.2849, "step": 17490 }, { "epoch": 2.0700260231842913, "grad_norm": 2.3126392364501953, "learning_rate": 4.815010773282261e-06, "loss": 0.3154, "step": 17500 }, { "epoch": 2.0712088951975396, "grad_norm": 2.1091065406799316, "learning_rate": 4.814292554464927e-06, "loss": 0.2635, "step": 17510 }, { "epoch": 2.072391767210788, "grad_norm": 2.544283628463745, "learning_rate": 4.8135743356475945e-06, "loss": 0.3101, "step": 17520 }, { "epoch": 2.073574639224036, "grad_norm": 3.6290197372436523, "learning_rate": 4.812856116830261e-06, "loss": 0.2831, "step": 17530 }, { "epoch": 2.074757511237284, "grad_norm": 3.472325086593628, "learning_rate": 4.812137898012928e-06, "loss": 0.3088, "step": 17540 }, { "epoch": 2.0759403832505323, "grad_norm": 3.375314950942993, "learning_rate": 4.811419679195595e-06, "loss": 0.305, "step": 17550 }, { "epoch": 2.0771232552637806, "grad_norm": 1.766257882118225, "learning_rate": 4.810701460378262e-06, "loss": 0.2758, "step": 17560 }, { "epoch": 2.0783061272770285, "grad_norm": 3.06916880607605, "learning_rate": 4.809983241560929e-06, "loss": 0.2686, "step": 17570 }, { "epoch": 2.0794889992902768, "grad_norm": 2.8144593238830566, "learning_rate": 4.809265022743596e-06, "loss": 0.3432, "step": 17580 }, { "epoch": 2.080671871303525, "grad_norm": 2.9351181983947754, "learning_rate": 4.808546803926263e-06, "loss": 0.2899, "step": 17590 }, { "epoch": 2.0818547433167733, "grad_norm": 3.8633251190185547, "learning_rate": 4.80782858510893e-06, "loss": 0.3236, "step": 17600 }, { "epoch": 2.083037615330021, "grad_norm": 2.226646900177002, "learning_rate": 4.807110366291597e-06, "loss": 0.3458, "step": 17610 }, { "epoch": 2.0842204873432695, "grad_norm": 3.1733455657958984, "learning_rate": 4.806392147474264e-06, "loss": 0.3047, "step": 17620 }, { "epoch": 2.0854033593565178, "grad_norm": 2.6069700717926025, "learning_rate": 4.805673928656931e-06, "loss": 0.3481, "step": 17630 }, { "epoch": 2.0865862313697656, "grad_norm": 2.2039647102355957, "learning_rate": 4.804955709839598e-06, "loss": 0.3056, "step": 17640 }, { "epoch": 2.087769103383014, "grad_norm": 2.762326717376709, "learning_rate": 4.8042374910222645e-06, "loss": 0.2749, "step": 17650 }, { "epoch": 2.088951975396262, "grad_norm": 3.3768422603607178, "learning_rate": 4.803519272204932e-06, "loss": 0.3152, "step": 17660 }, { "epoch": 2.0901348474095105, "grad_norm": 3.592968463897705, "learning_rate": 4.802801053387598e-06, "loss": 0.322, "step": 17670 }, { "epoch": 2.0913177194227583, "grad_norm": 3.6042120456695557, "learning_rate": 4.802082834570266e-06, "loss": 0.3365, "step": 17680 }, { "epoch": 2.0925005914360066, "grad_norm": 3.1354405879974365, "learning_rate": 4.801364615752933e-06, "loss": 0.3007, "step": 17690 }, { "epoch": 2.093683463449255, "grad_norm": 2.3326992988586426, "learning_rate": 4.8006463969356e-06, "loss": 0.262, "step": 17700 }, { "epoch": 2.094866335462503, "grad_norm": 2.435826063156128, "learning_rate": 4.799928178118267e-06, "loss": 0.2746, "step": 17710 }, { "epoch": 2.096049207475751, "grad_norm": 2.6682724952697754, "learning_rate": 4.799209959300934e-06, "loss": 0.2686, "step": 17720 }, { "epoch": 2.0972320794889994, "grad_norm": 3.148589611053467, "learning_rate": 4.798491740483601e-06, "loss": 0.3076, "step": 17730 }, { "epoch": 2.0984149515022477, "grad_norm": 3.278606414794922, "learning_rate": 4.797773521666268e-06, "loss": 0.2969, "step": 17740 }, { "epoch": 2.0995978235154955, "grad_norm": 2.676833391189575, "learning_rate": 4.797055302848935e-06, "loss": 0.335, "step": 17750 }, { "epoch": 2.100780695528744, "grad_norm": 2.208115339279175, "learning_rate": 4.7963370840316015e-06, "loss": 0.3225, "step": 17760 }, { "epoch": 2.101963567541992, "grad_norm": 2.8714101314544678, "learning_rate": 4.795618865214269e-06, "loss": 0.3168, "step": 17770 }, { "epoch": 2.10314643955524, "grad_norm": 3.031416416168213, "learning_rate": 4.794900646396935e-06, "loss": 0.322, "step": 17780 }, { "epoch": 2.1043293115684882, "grad_norm": 3.5075860023498535, "learning_rate": 4.794182427579603e-06, "loss": 0.263, "step": 17790 }, { "epoch": 2.1055121835817365, "grad_norm": 2.9896793365478516, "learning_rate": 4.793464208762269e-06, "loss": 0.3071, "step": 17800 }, { "epoch": 2.106695055594985, "grad_norm": 2.4877758026123047, "learning_rate": 4.792745989944937e-06, "loss": 0.3474, "step": 17810 }, { "epoch": 2.1078779276082327, "grad_norm": 2.5066843032836914, "learning_rate": 4.792027771127604e-06, "loss": 0.2792, "step": 17820 }, { "epoch": 2.109060799621481, "grad_norm": 4.5107879638671875, "learning_rate": 4.791309552310271e-06, "loss": 0.3341, "step": 17830 }, { "epoch": 2.1102436716347293, "grad_norm": 2.9022371768951416, "learning_rate": 4.790591333492938e-06, "loss": 0.3126, "step": 17840 }, { "epoch": 2.111426543647977, "grad_norm": 2.792145252227783, "learning_rate": 4.789873114675605e-06, "loss": 0.2626, "step": 17850 }, { "epoch": 2.1126094156612254, "grad_norm": 2.527423143386841, "learning_rate": 4.789154895858272e-06, "loss": 0.254, "step": 17860 }, { "epoch": 2.1137922876744737, "grad_norm": 2.630368232727051, "learning_rate": 4.7884366770409385e-06, "loss": 0.2923, "step": 17870 }, { "epoch": 2.114975159687722, "grad_norm": 3.8062503337860107, "learning_rate": 4.7877184582236054e-06, "loss": 0.2795, "step": 17880 }, { "epoch": 2.11615803170097, "grad_norm": 2.3464980125427246, "learning_rate": 4.787000239406272e-06, "loss": 0.2899, "step": 17890 }, { "epoch": 2.117340903714218, "grad_norm": 3.2609386444091797, "learning_rate": 4.786282020588939e-06, "loss": 0.3082, "step": 17900 }, { "epoch": 2.1185237757274664, "grad_norm": 3.002751111984253, "learning_rate": 4.785563801771606e-06, "loss": 0.3, "step": 17910 }, { "epoch": 2.1197066477407143, "grad_norm": 4.029548645019531, "learning_rate": 4.784845582954273e-06, "loss": 0.2626, "step": 17920 }, { "epoch": 2.1208895197539626, "grad_norm": 3.2029058933258057, "learning_rate": 4.784127364136941e-06, "loss": 0.2857, "step": 17930 }, { "epoch": 2.122072391767211, "grad_norm": 3.222637176513672, "learning_rate": 4.783409145319608e-06, "loss": 0.2907, "step": 17940 }, { "epoch": 2.123255263780459, "grad_norm": 3.828500509262085, "learning_rate": 4.782690926502275e-06, "loss": 0.317, "step": 17950 }, { "epoch": 2.124438135793707, "grad_norm": 2.0794427394866943, "learning_rate": 4.781972707684942e-06, "loss": 0.2811, "step": 17960 }, { "epoch": 2.1256210078069553, "grad_norm": 3.8097901344299316, "learning_rate": 4.781254488867609e-06, "loss": 0.3, "step": 17970 }, { "epoch": 2.1268038798202036, "grad_norm": 3.452726125717163, "learning_rate": 4.7805362700502755e-06, "loss": 0.3051, "step": 17980 }, { "epoch": 2.1279867518334514, "grad_norm": 2.9992544651031494, "learning_rate": 4.7798180512329424e-06, "loss": 0.3314, "step": 17990 }, { "epoch": 2.1291696238466997, "grad_norm": 3.1494879722595215, "learning_rate": 4.779099832415609e-06, "loss": 0.2847, "step": 18000 }, { "epoch": 2.130352495859948, "grad_norm": 2.6865220069885254, "learning_rate": 4.778381613598276e-06, "loss": 0.3348, "step": 18010 }, { "epoch": 2.1315353678731963, "grad_norm": 2.88586163520813, "learning_rate": 4.777663394780943e-06, "loss": 0.315, "step": 18020 }, { "epoch": 2.132718239886444, "grad_norm": 1.7792019844055176, "learning_rate": 4.77694517596361e-06, "loss": 0.297, "step": 18030 }, { "epoch": 2.1339011118996924, "grad_norm": 2.483880043029785, "learning_rate": 4.776226957146278e-06, "loss": 0.2953, "step": 18040 }, { "epoch": 2.1350839839129407, "grad_norm": 2.2779011726379395, "learning_rate": 4.775508738328944e-06, "loss": 0.2728, "step": 18050 }, { "epoch": 2.1362668559261886, "grad_norm": 2.3991661071777344, "learning_rate": 4.774790519511612e-06, "loss": 0.2754, "step": 18060 }, { "epoch": 2.137449727939437, "grad_norm": 2.5661368370056152, "learning_rate": 4.774072300694278e-06, "loss": 0.3089, "step": 18070 }, { "epoch": 2.138632599952685, "grad_norm": 3.713167667388916, "learning_rate": 4.773354081876946e-06, "loss": 0.3264, "step": 18080 }, { "epoch": 2.1398154719659335, "grad_norm": 3.65191388130188, "learning_rate": 4.772635863059612e-06, "loss": 0.3124, "step": 18090 }, { "epoch": 2.1409983439791813, "grad_norm": 3.8617939949035645, "learning_rate": 4.7719176442422794e-06, "loss": 0.2983, "step": 18100 }, { "epoch": 2.1421812159924296, "grad_norm": 3.0634047985076904, "learning_rate": 4.771199425424946e-06, "loss": 0.3095, "step": 18110 }, { "epoch": 2.143364088005678, "grad_norm": 2.234780788421631, "learning_rate": 4.770481206607613e-06, "loss": 0.3135, "step": 18120 }, { "epoch": 2.1445469600189258, "grad_norm": 4.0136308670043945, "learning_rate": 4.76976298779028e-06, "loss": 0.3775, "step": 18130 }, { "epoch": 2.145729832032174, "grad_norm": 2.7620391845703125, "learning_rate": 4.769044768972947e-06, "loss": 0.2839, "step": 18140 }, { "epoch": 2.1469127040454223, "grad_norm": 2.1845767498016357, "learning_rate": 4.768326550155614e-06, "loss": 0.2854, "step": 18150 }, { "epoch": 2.1480955760586706, "grad_norm": 3.211034059524536, "learning_rate": 4.767608331338281e-06, "loss": 0.3039, "step": 18160 }, { "epoch": 2.1492784480719185, "grad_norm": 3.8874318599700928, "learning_rate": 4.766890112520948e-06, "loss": 0.3248, "step": 18170 }, { "epoch": 2.1504613200851668, "grad_norm": 2.706275224685669, "learning_rate": 4.766171893703615e-06, "loss": 0.3081, "step": 18180 }, { "epoch": 2.151644192098415, "grad_norm": 2.9191946983337402, "learning_rate": 4.765453674886283e-06, "loss": 0.2904, "step": 18190 }, { "epoch": 2.152827064111663, "grad_norm": 2.4864437580108643, "learning_rate": 4.7647354560689495e-06, "loss": 0.3024, "step": 18200 }, { "epoch": 2.154009936124911, "grad_norm": 2.5812764167785645, "learning_rate": 4.7640172372516164e-06, "loss": 0.3345, "step": 18210 }, { "epoch": 2.1551928081381595, "grad_norm": 3.733234167098999, "learning_rate": 4.763299018434283e-06, "loss": 0.2855, "step": 18220 }, { "epoch": 2.156375680151408, "grad_norm": 2.7658438682556152, "learning_rate": 4.76258079961695e-06, "loss": 0.2872, "step": 18230 }, { "epoch": 2.1575585521646556, "grad_norm": 3.477675437927246, "learning_rate": 4.761862580799617e-06, "loss": 0.3414, "step": 18240 }, { "epoch": 2.158741424177904, "grad_norm": 2.810882806777954, "learning_rate": 4.761144361982284e-06, "loss": 0.3015, "step": 18250 }, { "epoch": 2.1599242961911522, "grad_norm": 2.0806736946105957, "learning_rate": 4.760426143164951e-06, "loss": 0.3025, "step": 18260 }, { "epoch": 2.1611071682044, "grad_norm": 3.2939205169677734, "learning_rate": 4.759707924347618e-06, "loss": 0.2662, "step": 18270 }, { "epoch": 2.1622900402176484, "grad_norm": 3.723284959793091, "learning_rate": 4.758989705530285e-06, "loss": 0.314, "step": 18280 }, { "epoch": 2.1634729122308967, "grad_norm": 3.262580394744873, "learning_rate": 4.758271486712952e-06, "loss": 0.3444, "step": 18290 }, { "epoch": 2.164655784244145, "grad_norm": 3.2462034225463867, "learning_rate": 4.757553267895619e-06, "loss": 0.3254, "step": 18300 }, { "epoch": 2.165838656257393, "grad_norm": 1.9273457527160645, "learning_rate": 4.7568350490782865e-06, "loss": 0.3244, "step": 18310 }, { "epoch": 2.167021528270641, "grad_norm": 2.0826480388641357, "learning_rate": 4.756116830260953e-06, "loss": 0.2756, "step": 18320 }, { "epoch": 2.1682044002838894, "grad_norm": 2.599860906600952, "learning_rate": 4.75539861144362e-06, "loss": 0.3436, "step": 18330 }, { "epoch": 2.1693872722971372, "grad_norm": 3.9145467281341553, "learning_rate": 4.754680392626286e-06, "loss": 0.3371, "step": 18340 }, { "epoch": 2.1705701443103855, "grad_norm": 2.7741270065307617, "learning_rate": 4.753962173808954e-06, "loss": 0.2983, "step": 18350 }, { "epoch": 2.171753016323634, "grad_norm": 3.1485610008239746, "learning_rate": 4.75324395499162e-06, "loss": 0.3037, "step": 18360 }, { "epoch": 2.172935888336882, "grad_norm": 2.2255494594573975, "learning_rate": 4.752525736174288e-06, "loss": 0.303, "step": 18370 }, { "epoch": 2.17411876035013, "grad_norm": 2.101179599761963, "learning_rate": 4.751807517356955e-06, "loss": 0.3207, "step": 18380 }, { "epoch": 2.1753016323633783, "grad_norm": 2.9513349533081055, "learning_rate": 4.751089298539622e-06, "loss": 0.2832, "step": 18390 }, { "epoch": 2.1764845043766265, "grad_norm": 1.9937074184417725, "learning_rate": 4.750371079722289e-06, "loss": 0.2832, "step": 18400 }, { "epoch": 2.1776673763898744, "grad_norm": 2.811944007873535, "learning_rate": 4.749652860904956e-06, "loss": 0.2897, "step": 18410 }, { "epoch": 2.1788502484031227, "grad_norm": 2.1398425102233887, "learning_rate": 4.748934642087623e-06, "loss": 0.3283, "step": 18420 }, { "epoch": 2.180033120416371, "grad_norm": 2.717740058898926, "learning_rate": 4.7482164232702896e-06, "loss": 0.2815, "step": 18430 }, { "epoch": 2.1812159924296193, "grad_norm": 3.368557929992676, "learning_rate": 4.7474982044529565e-06, "loss": 0.2796, "step": 18440 }, { "epoch": 2.182398864442867, "grad_norm": 3.1988909244537354, "learning_rate": 4.746779985635623e-06, "loss": 0.3179, "step": 18450 }, { "epoch": 2.1835817364561154, "grad_norm": 2.1743054389953613, "learning_rate": 4.746061766818291e-06, "loss": 0.3072, "step": 18460 }, { "epoch": 2.1847646084693637, "grad_norm": 2.70023250579834, "learning_rate": 4.745343548000958e-06, "loss": 0.318, "step": 18470 }, { "epoch": 2.1859474804826116, "grad_norm": 2.7275190353393555, "learning_rate": 4.744625329183625e-06, "loss": 0.3574, "step": 18480 }, { "epoch": 2.18713035249586, "grad_norm": 2.8033287525177, "learning_rate": 4.743907110366292e-06, "loss": 0.3234, "step": 18490 }, { "epoch": 2.188313224509108, "grad_norm": 2.5095129013061523, "learning_rate": 4.743188891548959e-06, "loss": 0.3088, "step": 18500 }, { "epoch": 2.1894960965223564, "grad_norm": 2.745701551437378, "learning_rate": 4.742470672731626e-06, "loss": 0.3002, "step": 18510 }, { "epoch": 2.1906789685356043, "grad_norm": 4.642023086547852, "learning_rate": 4.741752453914293e-06, "loss": 0.3005, "step": 18520 }, { "epoch": 2.1918618405488526, "grad_norm": 3.1611831188201904, "learning_rate": 4.74103423509696e-06, "loss": 0.2878, "step": 18530 }, { "epoch": 2.193044712562101, "grad_norm": 2.574026107788086, "learning_rate": 4.7403160162796266e-06, "loss": 0.3123, "step": 18540 }, { "epoch": 2.1942275845753487, "grad_norm": 3.551522970199585, "learning_rate": 4.7395977974622935e-06, "loss": 0.3249, "step": 18550 }, { "epoch": 2.195410456588597, "grad_norm": 3.1419999599456787, "learning_rate": 4.73887957864496e-06, "loss": 0.3533, "step": 18560 }, { "epoch": 2.1965933286018453, "grad_norm": 2.939589500427246, "learning_rate": 4.738161359827627e-06, "loss": 0.3039, "step": 18570 }, { "epoch": 2.1977762006150936, "grad_norm": 2.616450786590576, "learning_rate": 4.737443141010295e-06, "loss": 0.3401, "step": 18580 }, { "epoch": 2.1989590726283414, "grad_norm": 2.348172187805176, "learning_rate": 4.736724922192961e-06, "loss": 0.3196, "step": 18590 }, { "epoch": 2.2001419446415897, "grad_norm": 3.9113495349884033, "learning_rate": 4.736006703375629e-06, "loss": 0.3107, "step": 18600 }, { "epoch": 2.201324816654838, "grad_norm": 3.003316640853882, "learning_rate": 4.735288484558295e-06, "loss": 0.274, "step": 18610 }, { "epoch": 2.202507688668086, "grad_norm": 2.2264556884765625, "learning_rate": 4.734570265740963e-06, "loss": 0.2746, "step": 18620 }, { "epoch": 2.203690560681334, "grad_norm": 3.4186272621154785, "learning_rate": 4.733852046923629e-06, "loss": 0.3261, "step": 18630 }, { "epoch": 2.2048734326945825, "grad_norm": 2.3731203079223633, "learning_rate": 4.733133828106297e-06, "loss": 0.2993, "step": 18640 }, { "epoch": 2.2060563047078308, "grad_norm": 2.3863048553466797, "learning_rate": 4.7324156092889636e-06, "loss": 0.3394, "step": 18650 }, { "epoch": 2.2072391767210786, "grad_norm": 2.5173656940460205, "learning_rate": 4.7316973904716305e-06, "loss": 0.3133, "step": 18660 }, { "epoch": 2.208422048734327, "grad_norm": 3.6146798133850098, "learning_rate": 4.730979171654297e-06, "loss": 0.3189, "step": 18670 }, { "epoch": 2.209604920747575, "grad_norm": 5.136168003082275, "learning_rate": 4.730260952836964e-06, "loss": 0.2865, "step": 18680 }, { "epoch": 2.210787792760823, "grad_norm": 3.4043924808502197, "learning_rate": 4.729542734019631e-06, "loss": 0.2866, "step": 18690 }, { "epoch": 2.2119706647740713, "grad_norm": 3.6937315464019775, "learning_rate": 4.728824515202298e-06, "loss": 0.3092, "step": 18700 }, { "epoch": 2.2131535367873196, "grad_norm": 2.4753952026367188, "learning_rate": 4.728106296384966e-06, "loss": 0.2671, "step": 18710 }, { "epoch": 2.214336408800568, "grad_norm": 3.532831907272339, "learning_rate": 4.727388077567632e-06, "loss": 0.3366, "step": 18720 }, { "epoch": 2.2155192808138158, "grad_norm": 2.442025899887085, "learning_rate": 4.7266698587503e-06, "loss": 0.2818, "step": 18730 }, { "epoch": 2.216702152827064, "grad_norm": 2.913315773010254, "learning_rate": 4.725951639932967e-06, "loss": 0.3276, "step": 18740 }, { "epoch": 2.2178850248403124, "grad_norm": 3.2740283012390137, "learning_rate": 4.725233421115634e-06, "loss": 0.3276, "step": 18750 }, { "epoch": 2.2190678968535607, "grad_norm": 2.6545071601867676, "learning_rate": 4.7245152022983006e-06, "loss": 0.2847, "step": 18760 }, { "epoch": 2.2202507688668085, "grad_norm": 3.57932186126709, "learning_rate": 4.7237969834809675e-06, "loss": 0.2928, "step": 18770 }, { "epoch": 2.221433640880057, "grad_norm": 2.417505979537964, "learning_rate": 4.723078764663634e-06, "loss": 0.2684, "step": 18780 }, { "epoch": 2.222616512893305, "grad_norm": 2.3136589527130127, "learning_rate": 4.722360545846301e-06, "loss": 0.3005, "step": 18790 }, { "epoch": 2.223799384906553, "grad_norm": 2.154871940612793, "learning_rate": 4.721642327028968e-06, "loss": 0.3537, "step": 18800 }, { "epoch": 2.2249822569198012, "grad_norm": 3.6316823959350586, "learning_rate": 4.720924108211635e-06, "loss": 0.2946, "step": 18810 }, { "epoch": 2.2261651289330495, "grad_norm": 3.7252657413482666, "learning_rate": 4.720205889394302e-06, "loss": 0.291, "step": 18820 }, { "epoch": 2.227348000946298, "grad_norm": 3.5270731449127197, "learning_rate": 4.719487670576969e-06, "loss": 0.2811, "step": 18830 }, { "epoch": 2.2285308729595457, "grad_norm": 3.1668782234191895, "learning_rate": 4.718769451759636e-06, "loss": 0.2607, "step": 18840 }, { "epoch": 2.229713744972794, "grad_norm": 4.512148380279541, "learning_rate": 4.718051232942304e-06, "loss": 0.3083, "step": 18850 }, { "epoch": 2.2308966169860422, "grad_norm": 3.8025619983673096, "learning_rate": 4.71733301412497e-06, "loss": 0.2977, "step": 18860 }, { "epoch": 2.23207948899929, "grad_norm": 2.41748309135437, "learning_rate": 4.7166147953076376e-06, "loss": 0.2873, "step": 18870 }, { "epoch": 2.2332623610125384, "grad_norm": 3.397749662399292, "learning_rate": 4.715896576490304e-06, "loss": 0.2936, "step": 18880 }, { "epoch": 2.2344452330257867, "grad_norm": 2.466710090637207, "learning_rate": 4.715178357672971e-06, "loss": 0.3118, "step": 18890 }, { "epoch": 2.235628105039035, "grad_norm": 3.05507493019104, "learning_rate": 4.7144601388556375e-06, "loss": 0.3089, "step": 18900 }, { "epoch": 2.236810977052283, "grad_norm": 3.3372766971588135, "learning_rate": 4.713741920038305e-06, "loss": 0.3141, "step": 18910 }, { "epoch": 2.237993849065531, "grad_norm": 3.158478260040283, "learning_rate": 4.713023701220972e-06, "loss": 0.3711, "step": 18920 }, { "epoch": 2.2391767210787794, "grad_norm": 2.5206143856048584, "learning_rate": 4.712305482403639e-06, "loss": 0.2905, "step": 18930 }, { "epoch": 2.2403595930920273, "grad_norm": 1.9004238843917847, "learning_rate": 4.711587263586306e-06, "loss": 0.3032, "step": 18940 }, { "epoch": 2.2415424651052756, "grad_norm": 2.576606512069702, "learning_rate": 4.710869044768973e-06, "loss": 0.3181, "step": 18950 }, { "epoch": 2.242725337118524, "grad_norm": 3.5385725498199463, "learning_rate": 4.710150825951641e-06, "loss": 0.3029, "step": 18960 }, { "epoch": 2.243908209131772, "grad_norm": 3.0396499633789062, "learning_rate": 4.709432607134307e-06, "loss": 0.3044, "step": 18970 }, { "epoch": 2.24509108114502, "grad_norm": 1.7584151029586792, "learning_rate": 4.7087143883169746e-06, "loss": 0.3075, "step": 18980 }, { "epoch": 2.2462739531582683, "grad_norm": 2.714115858078003, "learning_rate": 4.707996169499641e-06, "loss": 0.3015, "step": 18990 }, { "epoch": 2.2474568251715166, "grad_norm": 2.1493730545043945, "learning_rate": 4.707277950682308e-06, "loss": 0.3111, "step": 19000 }, { "epoch": 2.2486396971847644, "grad_norm": 3.415705680847168, "learning_rate": 4.7065597318649745e-06, "loss": 0.3232, "step": 19010 }, { "epoch": 2.2498225691980127, "grad_norm": 3.356984853744507, "learning_rate": 4.705841513047642e-06, "loss": 0.3268, "step": 19020 }, { "epoch": 2.250532292405962, "eval_accuracy": 0.8579532222111321, "eval_loss": 0.33104264736175537, "eval_runtime": 77.9569, "eval_safe_aucpr": 0.9135082414503558, "eval_safe_f1": 0.8382121677182213, "eval_safe_fpr": 0.1194583121580816, "eval_safe_precision": 0.8469903507428397, "eval_safe_recall": 0.829614071934891, "eval_samples_per_second": 771.118, "eval_steps_per_second": 12.058, "eval_unsafe_aucpr": 0.9516369514480087, "eval_unsafe_f1": 0.8734006434491245, "eval_unsafe_fpr": 0.17038592806510833, "eval_unsafe_precision": 0.8663744926172128, "eval_unsafe_recall": 0.880541687841918, "step": 19026 }, { "epoch": 2.251005441211261, "grad_norm": 4.034279823303223, "learning_rate": 4.705123294230309e-06, "loss": 0.2868, "step": 19030 }, { "epoch": 2.2521883132245093, "grad_norm": 2.953068733215332, "learning_rate": 4.704405075412976e-06, "loss": 0.3178, "step": 19040 }, { "epoch": 2.253371185237757, "grad_norm": 3.3599815368652344, "learning_rate": 4.703686856595643e-06, "loss": 0.321, "step": 19050 }, { "epoch": 2.2545540572510054, "grad_norm": 3.519252300262451, "learning_rate": 4.70296863777831e-06, "loss": 0.3394, "step": 19060 }, { "epoch": 2.2557369292642537, "grad_norm": 2.8658931255340576, "learning_rate": 4.702250418960977e-06, "loss": 0.3005, "step": 19070 }, { "epoch": 2.2569198012775016, "grad_norm": 3.4949440956115723, "learning_rate": 4.701532200143644e-06, "loss": 0.2953, "step": 19080 }, { "epoch": 2.25810267329075, "grad_norm": 3.6394600868225098, "learning_rate": 4.700813981326311e-06, "loss": 0.3103, "step": 19090 }, { "epoch": 2.259285545303998, "grad_norm": 2.5448102951049805, "learning_rate": 4.700095762508978e-06, "loss": 0.3538, "step": 19100 }, { "epoch": 2.2604684173172465, "grad_norm": 3.217200517654419, "learning_rate": 4.6993775436916446e-06, "loss": 0.3279, "step": 19110 }, { "epoch": 2.2616512893304943, "grad_norm": 3.0768990516662598, "learning_rate": 4.698659324874312e-06, "loss": 0.2874, "step": 19120 }, { "epoch": 2.2628341613437426, "grad_norm": 2.182265281677246, "learning_rate": 4.697941106056978e-06, "loss": 0.3065, "step": 19130 }, { "epoch": 2.264017033356991, "grad_norm": 2.897308111190796, "learning_rate": 4.697222887239646e-06, "loss": 0.322, "step": 19140 }, { "epoch": 2.265199905370239, "grad_norm": 3.513573408126831, "learning_rate": 4.696504668422312e-06, "loss": 0.3455, "step": 19150 }, { "epoch": 2.266382777383487, "grad_norm": 2.9148173332214355, "learning_rate": 4.69578644960498e-06, "loss": 0.3234, "step": 19160 }, { "epoch": 2.2675656493967353, "grad_norm": 2.3201258182525635, "learning_rate": 4.695068230787646e-06, "loss": 0.3156, "step": 19170 }, { "epoch": 2.2687485214099836, "grad_norm": 2.708378791809082, "learning_rate": 4.694350011970314e-06, "loss": 0.3013, "step": 19180 }, { "epoch": 2.2699313934232315, "grad_norm": 3.0447473526000977, "learning_rate": 4.693631793152981e-06, "loss": 0.3114, "step": 19190 }, { "epoch": 2.2711142654364798, "grad_norm": 3.66853666305542, "learning_rate": 4.692913574335648e-06, "loss": 0.2915, "step": 19200 }, { "epoch": 2.272297137449728, "grad_norm": 3.050086498260498, "learning_rate": 4.692195355518315e-06, "loss": 0.3295, "step": 19210 }, { "epoch": 2.2734800094629763, "grad_norm": 3.328956365585327, "learning_rate": 4.6914771367009816e-06, "loss": 0.3121, "step": 19220 }, { "epoch": 2.274662881476224, "grad_norm": 1.9344512224197388, "learning_rate": 4.690758917883649e-06, "loss": 0.2827, "step": 19230 }, { "epoch": 2.2758457534894725, "grad_norm": 3.0727407932281494, "learning_rate": 4.690040699066315e-06, "loss": 0.2722, "step": 19240 }, { "epoch": 2.277028625502721, "grad_norm": 2.5586421489715576, "learning_rate": 4.689322480248983e-06, "loss": 0.2848, "step": 19250 }, { "epoch": 2.2782114975159686, "grad_norm": 2.8167285919189453, "learning_rate": 4.688604261431649e-06, "loss": 0.3229, "step": 19260 }, { "epoch": 2.279394369529217, "grad_norm": 3.2754266262054443, "learning_rate": 4.687886042614317e-06, "loss": 0.3083, "step": 19270 }, { "epoch": 2.280577241542465, "grad_norm": 2.7070839405059814, "learning_rate": 4.687167823796983e-06, "loss": 0.3037, "step": 19280 }, { "epoch": 2.2817601135557135, "grad_norm": 3.1131722927093506, "learning_rate": 4.686449604979651e-06, "loss": 0.2822, "step": 19290 }, { "epoch": 2.2829429855689614, "grad_norm": 3.0373032093048096, "learning_rate": 4.685731386162318e-06, "loss": 0.3162, "step": 19300 }, { "epoch": 2.2841258575822097, "grad_norm": 2.733614444732666, "learning_rate": 4.685013167344985e-06, "loss": 0.3181, "step": 19310 }, { "epoch": 2.285308729595458, "grad_norm": 2.5604472160339355, "learning_rate": 4.684294948527652e-06, "loss": 0.319, "step": 19320 }, { "epoch": 2.286491601608706, "grad_norm": 2.2983901500701904, "learning_rate": 4.6835767297103186e-06, "loss": 0.2795, "step": 19330 }, { "epoch": 2.287674473621954, "grad_norm": 3.369814872741699, "learning_rate": 4.6828585108929855e-06, "loss": 0.2908, "step": 19340 }, { "epoch": 2.2888573456352024, "grad_norm": 2.4933390617370605, "learning_rate": 4.682140292075652e-06, "loss": 0.2893, "step": 19350 }, { "epoch": 2.2900402176484507, "grad_norm": 3.431755304336548, "learning_rate": 4.681422073258319e-06, "loss": 0.2856, "step": 19360 }, { "epoch": 2.2912230896616985, "grad_norm": 3.2312777042388916, "learning_rate": 4.680703854440986e-06, "loss": 0.2634, "step": 19370 }, { "epoch": 2.292405961674947, "grad_norm": 2.5952422618865967, "learning_rate": 4.679985635623653e-06, "loss": 0.3406, "step": 19380 }, { "epoch": 2.293588833688195, "grad_norm": 2.603264093399048, "learning_rate": 4.679267416806321e-06, "loss": 0.3124, "step": 19390 }, { "epoch": 2.294771705701443, "grad_norm": 2.6363089084625244, "learning_rate": 4.678549197988987e-06, "loss": 0.2636, "step": 19400 }, { "epoch": 2.2959545777146912, "grad_norm": 3.3012826442718506, "learning_rate": 4.677830979171655e-06, "loss": 0.2939, "step": 19410 }, { "epoch": 2.2971374497279395, "grad_norm": 3.1667308807373047, "learning_rate": 4.677112760354321e-06, "loss": 0.285, "step": 19420 }, { "epoch": 2.298320321741188, "grad_norm": 2.783691167831421, "learning_rate": 4.676394541536989e-06, "loss": 0.336, "step": 19430 }, { "epoch": 2.2995031937544357, "grad_norm": 2.593380928039551, "learning_rate": 4.675676322719655e-06, "loss": 0.3245, "step": 19440 }, { "epoch": 2.300686065767684, "grad_norm": 2.234391450881958, "learning_rate": 4.6749581039023225e-06, "loss": 0.322, "step": 19450 }, { "epoch": 2.3018689377809323, "grad_norm": 2.6867315769195557, "learning_rate": 4.674239885084989e-06, "loss": 0.2845, "step": 19460 }, { "epoch": 2.30305180979418, "grad_norm": 3.1070406436920166, "learning_rate": 4.673521666267656e-06, "loss": 0.2986, "step": 19470 }, { "epoch": 2.3042346818074284, "grad_norm": 2.7052433490753174, "learning_rate": 4.672803447450323e-06, "loss": 0.3074, "step": 19480 }, { "epoch": 2.3054175538206767, "grad_norm": 4.0416460037231445, "learning_rate": 4.67208522863299e-06, "loss": 0.3245, "step": 19490 }, { "epoch": 2.306600425833925, "grad_norm": 2.0998635292053223, "learning_rate": 4.671367009815658e-06, "loss": 0.3209, "step": 19500 }, { "epoch": 2.307783297847173, "grad_norm": 2.429802179336548, "learning_rate": 4.670648790998324e-06, "loss": 0.309, "step": 19510 }, { "epoch": 2.308966169860421, "grad_norm": 2.9419801235198975, "learning_rate": 4.669930572180992e-06, "loss": 0.2895, "step": 19520 }, { "epoch": 2.3101490418736694, "grad_norm": 2.927443027496338, "learning_rate": 4.669212353363658e-06, "loss": 0.334, "step": 19530 }, { "epoch": 2.3113319138869173, "grad_norm": 3.3526933193206787, "learning_rate": 4.668494134546326e-06, "loss": 0.3067, "step": 19540 }, { "epoch": 2.3125147859001656, "grad_norm": 2.4565911293029785, "learning_rate": 4.667775915728992e-06, "loss": 0.3019, "step": 19550 }, { "epoch": 2.313697657913414, "grad_norm": 2.520803928375244, "learning_rate": 4.6670576969116595e-06, "loss": 0.3434, "step": 19560 }, { "epoch": 2.314880529926662, "grad_norm": 3.4721646308898926, "learning_rate": 4.666339478094326e-06, "loss": 0.2899, "step": 19570 }, { "epoch": 2.31606340193991, "grad_norm": 2.2394354343414307, "learning_rate": 4.665621259276993e-06, "loss": 0.326, "step": 19580 }, { "epoch": 2.3172462739531583, "grad_norm": 2.9581689834594727, "learning_rate": 4.66490304045966e-06, "loss": 0.328, "step": 19590 }, { "epoch": 2.3184291459664066, "grad_norm": 3.223263740539551, "learning_rate": 4.664184821642327e-06, "loss": 0.2757, "step": 19600 }, { "epoch": 2.3196120179796544, "grad_norm": 3.566864490509033, "learning_rate": 4.663466602824994e-06, "loss": 0.2946, "step": 19610 }, { "epoch": 2.3207948899929027, "grad_norm": 3.2584853172302246, "learning_rate": 4.662748384007661e-06, "loss": 0.2535, "step": 19620 }, { "epoch": 2.321977762006151, "grad_norm": 2.983661413192749, "learning_rate": 4.662030165190328e-06, "loss": 0.3433, "step": 19630 }, { "epoch": 2.3231606340193993, "grad_norm": 2.6936511993408203, "learning_rate": 4.661311946372995e-06, "loss": 0.2743, "step": 19640 }, { "epoch": 2.324343506032647, "grad_norm": 2.610374927520752, "learning_rate": 4.660593727555662e-06, "loss": 0.2931, "step": 19650 }, { "epoch": 2.3255263780458955, "grad_norm": 2.68072247505188, "learning_rate": 4.6598755087383295e-06, "loss": 0.2751, "step": 19660 }, { "epoch": 2.3267092500591438, "grad_norm": 2.336444139480591, "learning_rate": 4.659157289920996e-06, "loss": 0.2942, "step": 19670 }, { "epoch": 2.3278921220723916, "grad_norm": 2.8506205081939697, "learning_rate": 4.658439071103663e-06, "loss": 0.275, "step": 19680 }, { "epoch": 2.32907499408564, "grad_norm": 2.9952659606933594, "learning_rate": 4.6577208522863295e-06, "loss": 0.3054, "step": 19690 }, { "epoch": 2.330257866098888, "grad_norm": 1.9596766233444214, "learning_rate": 4.657002633468997e-06, "loss": 0.2583, "step": 19700 }, { "epoch": 2.3314407381121365, "grad_norm": 2.6753013134002686, "learning_rate": 4.656284414651664e-06, "loss": 0.2724, "step": 19710 }, { "epoch": 2.3326236101253843, "grad_norm": 4.986375331878662, "learning_rate": 4.655566195834331e-06, "loss": 0.2966, "step": 19720 }, { "epoch": 2.3338064821386326, "grad_norm": 2.780099630355835, "learning_rate": 4.654847977016998e-06, "loss": 0.3073, "step": 19730 }, { "epoch": 2.334989354151881, "grad_norm": 3.2119815349578857, "learning_rate": 4.654129758199665e-06, "loss": 0.3336, "step": 19740 }, { "epoch": 2.3361722261651288, "grad_norm": 3.1022114753723145, "learning_rate": 4.653411539382332e-06, "loss": 0.2799, "step": 19750 }, { "epoch": 2.337355098178377, "grad_norm": 3.45047664642334, "learning_rate": 4.652693320564999e-06, "loss": 0.2875, "step": 19760 }, { "epoch": 2.3385379701916253, "grad_norm": 2.6528635025024414, "learning_rate": 4.6519751017476665e-06, "loss": 0.3256, "step": 19770 }, { "epoch": 2.3397208422048736, "grad_norm": 2.416339159011841, "learning_rate": 4.651256882930333e-06, "loss": 0.304, "step": 19780 }, { "epoch": 2.3409037142181215, "grad_norm": 2.61565899848938, "learning_rate": 4.650538664113e-06, "loss": 0.318, "step": 19790 }, { "epoch": 2.34208658623137, "grad_norm": 2.7060976028442383, "learning_rate": 4.6498204452956665e-06, "loss": 0.2995, "step": 19800 }, { "epoch": 2.343269458244618, "grad_norm": 3.9703264236450195, "learning_rate": 4.649102226478334e-06, "loss": 0.3036, "step": 19810 }, { "epoch": 2.344452330257866, "grad_norm": 2.4769108295440674, "learning_rate": 4.648384007661e-06, "loss": 0.3118, "step": 19820 }, { "epoch": 2.345635202271114, "grad_norm": 3.06601881980896, "learning_rate": 4.647665788843668e-06, "loss": 0.2861, "step": 19830 }, { "epoch": 2.3468180742843625, "grad_norm": 2.9873290061950684, "learning_rate": 4.646947570026335e-06, "loss": 0.2802, "step": 19840 }, { "epoch": 2.348000946297611, "grad_norm": 3.300034284591675, "learning_rate": 4.646229351209002e-06, "loss": 0.2915, "step": 19850 }, { "epoch": 2.3491838183108587, "grad_norm": 2.580639362335205, "learning_rate": 4.645511132391669e-06, "loss": 0.2945, "step": 19860 }, { "epoch": 2.350366690324107, "grad_norm": 3.396291494369507, "learning_rate": 4.644792913574336e-06, "loss": 0.2461, "step": 19870 }, { "epoch": 2.3515495623373552, "grad_norm": 3.3407390117645264, "learning_rate": 4.644074694757003e-06, "loss": 0.339, "step": 19880 }, { "epoch": 2.352732434350603, "grad_norm": 2.8220138549804688, "learning_rate": 4.64335647593967e-06, "loss": 0.34, "step": 19890 }, { "epoch": 2.3539153063638514, "grad_norm": 2.042041778564453, "learning_rate": 4.6426382571223365e-06, "loss": 0.2851, "step": 19900 }, { "epoch": 2.3550981783770997, "grad_norm": 3.4644346237182617, "learning_rate": 4.6419200383050035e-06, "loss": 0.3093, "step": 19910 }, { "epoch": 2.356281050390348, "grad_norm": 2.6001265048980713, "learning_rate": 4.64120181948767e-06, "loss": 0.2824, "step": 19920 }, { "epoch": 2.357463922403596, "grad_norm": 3.2763378620147705, "learning_rate": 4.640483600670337e-06, "loss": 0.3283, "step": 19930 }, { "epoch": 2.358646794416844, "grad_norm": 2.4971261024475098, "learning_rate": 4.639765381853004e-06, "loss": 0.2916, "step": 19940 }, { "epoch": 2.3598296664300924, "grad_norm": 2.6270179748535156, "learning_rate": 4.639047163035672e-06, "loss": 0.3105, "step": 19950 }, { "epoch": 2.3610125384433402, "grad_norm": 3.1729302406311035, "learning_rate": 4.638328944218338e-06, "loss": 0.3487, "step": 19960 }, { "epoch": 2.3621954104565885, "grad_norm": 3.2375006675720215, "learning_rate": 4.637610725401006e-06, "loss": 0.2896, "step": 19970 }, { "epoch": 2.363378282469837, "grad_norm": 2.603255033493042, "learning_rate": 4.636892506583673e-06, "loss": 0.2954, "step": 19980 }, { "epoch": 2.364561154483085, "grad_norm": 3.0251717567443848, "learning_rate": 4.63617428776634e-06, "loss": 0.308, "step": 19990 }, { "epoch": 2.365744026496333, "grad_norm": 3.3653202056884766, "learning_rate": 4.635456068949007e-06, "loss": 0.3014, "step": 20000 }, { "epoch": 2.3669268985095813, "grad_norm": 3.3685343265533447, "learning_rate": 4.6347378501316735e-06, "loss": 0.3402, "step": 20010 }, { "epoch": 2.3681097705228296, "grad_norm": 3.4178988933563232, "learning_rate": 4.6340196313143405e-06, "loss": 0.303, "step": 20020 }, { "epoch": 2.3692926425360774, "grad_norm": 2.6259653568267822, "learning_rate": 4.633301412497007e-06, "loss": 0.3156, "step": 20030 }, { "epoch": 2.3704755145493257, "grad_norm": 2.317650318145752, "learning_rate": 4.632583193679675e-06, "loss": 0.346, "step": 20040 }, { "epoch": 2.371658386562574, "grad_norm": 2.2929182052612305, "learning_rate": 4.631864974862341e-06, "loss": 0.2802, "step": 20050 }, { "epoch": 2.3728412585758223, "grad_norm": 2.9007229804992676, "learning_rate": 4.631146756045009e-06, "loss": 0.3227, "step": 20060 }, { "epoch": 2.37402413058907, "grad_norm": 2.8625431060791016, "learning_rate": 4.630428537227675e-06, "loss": 0.3436, "step": 20070 }, { "epoch": 2.3752070026023184, "grad_norm": 2.63838791847229, "learning_rate": 4.629710318410343e-06, "loss": 0.2974, "step": 20080 }, { "epoch": 2.3763898746155667, "grad_norm": 2.5006182193756104, "learning_rate": 4.628992099593009e-06, "loss": 0.2927, "step": 20090 }, { "epoch": 2.3775727466288146, "grad_norm": 3.098212718963623, "learning_rate": 4.628273880775677e-06, "loss": 0.3077, "step": 20100 }, { "epoch": 2.378755618642063, "grad_norm": 3.923412799835205, "learning_rate": 4.627555661958344e-06, "loss": 0.303, "step": 20110 }, { "epoch": 2.379938490655311, "grad_norm": 3.1200942993164062, "learning_rate": 4.6268374431410105e-06, "loss": 0.3419, "step": 20120 }, { "epoch": 2.3811213626685594, "grad_norm": 1.6198961734771729, "learning_rate": 4.6261192243236775e-06, "loss": 0.286, "step": 20130 }, { "epoch": 2.3823042346818073, "grad_norm": 2.5041675567626953, "learning_rate": 4.625401005506344e-06, "loss": 0.3103, "step": 20140 }, { "epoch": 2.3834871066950556, "grad_norm": 2.900592565536499, "learning_rate": 4.624682786689011e-06, "loss": 0.331, "step": 20150 }, { "epoch": 2.384669978708304, "grad_norm": 2.689657688140869, "learning_rate": 4.623964567871678e-06, "loss": 0.3322, "step": 20160 }, { "epoch": 2.3858528507215517, "grad_norm": 2.471005916595459, "learning_rate": 4.623246349054345e-06, "loss": 0.3073, "step": 20170 }, { "epoch": 2.3870357227348, "grad_norm": 2.933124542236328, "learning_rate": 4.622528130237012e-06, "loss": 0.3033, "step": 20180 }, { "epoch": 2.3882185947480483, "grad_norm": 2.789827585220337, "learning_rate": 4.621809911419679e-06, "loss": 0.2927, "step": 20190 }, { "epoch": 2.3894014667612966, "grad_norm": 2.2694129943847656, "learning_rate": 4.621091692602346e-06, "loss": 0.3027, "step": 20200 }, { "epoch": 2.3905843387745445, "grad_norm": 3.2975857257843018, "learning_rate": 4.620373473785013e-06, "loss": 0.3143, "step": 20210 }, { "epoch": 2.3917672107877928, "grad_norm": 3.478466272354126, "learning_rate": 4.619655254967681e-06, "loss": 0.3402, "step": 20220 }, { "epoch": 2.392950082801041, "grad_norm": 2.7437846660614014, "learning_rate": 4.6189370361503475e-06, "loss": 0.3057, "step": 20230 }, { "epoch": 2.394132954814289, "grad_norm": 2.510840892791748, "learning_rate": 4.6182188173330145e-06, "loss": 0.3097, "step": 20240 }, { "epoch": 2.395315826827537, "grad_norm": 3.0884249210357666, "learning_rate": 4.617500598515681e-06, "loss": 0.3152, "step": 20250 }, { "epoch": 2.3964986988407855, "grad_norm": 2.712763786315918, "learning_rate": 4.616782379698348e-06, "loss": 0.3074, "step": 20260 }, { "epoch": 2.3976815708540338, "grad_norm": 2.518413543701172, "learning_rate": 4.616064160881015e-06, "loss": 0.3171, "step": 20270 }, { "epoch": 2.3988644428672816, "grad_norm": 3.805736541748047, "learning_rate": 4.615345942063682e-06, "loss": 0.2897, "step": 20280 }, { "epoch": 2.40004731488053, "grad_norm": 3.109661817550659, "learning_rate": 4.614627723246349e-06, "loss": 0.3072, "step": 20290 }, { "epoch": 2.401230186893778, "grad_norm": 4.400880336761475, "learning_rate": 4.613909504429016e-06, "loss": 0.3297, "step": 20300 }, { "epoch": 2.402413058907026, "grad_norm": 2.9064347743988037, "learning_rate": 4.613191285611684e-06, "loss": 0.2985, "step": 20310 }, { "epoch": 2.4035959309202743, "grad_norm": 3.0515923500061035, "learning_rate": 4.61247306679435e-06, "loss": 0.2738, "step": 20320 }, { "epoch": 2.4047788029335226, "grad_norm": 3.078885793685913, "learning_rate": 4.611754847977018e-06, "loss": 0.3325, "step": 20330 }, { "epoch": 2.405961674946771, "grad_norm": 2.3880527019500732, "learning_rate": 4.611036629159684e-06, "loss": 0.3024, "step": 20340 }, { "epoch": 2.407144546960019, "grad_norm": 2.6147024631500244, "learning_rate": 4.6103184103423515e-06, "loss": 0.288, "step": 20350 }, { "epoch": 2.408327418973267, "grad_norm": 2.7234206199645996, "learning_rate": 4.6096001915250175e-06, "loss": 0.3391, "step": 20360 }, { "epoch": 2.4095102909865154, "grad_norm": 2.5554802417755127, "learning_rate": 4.608881972707685e-06, "loss": 0.2922, "step": 20370 }, { "epoch": 2.410693162999763, "grad_norm": 2.6028528213500977, "learning_rate": 4.608163753890352e-06, "loss": 0.3206, "step": 20380 }, { "epoch": 2.4118760350130115, "grad_norm": 2.4608428478240967, "learning_rate": 4.607445535073019e-06, "loss": 0.2791, "step": 20390 }, { "epoch": 2.41305890702626, "grad_norm": 2.69881272315979, "learning_rate": 4.606727316255686e-06, "loss": 0.3024, "step": 20400 }, { "epoch": 2.414241779039508, "grad_norm": 3.897646903991699, "learning_rate": 4.606009097438353e-06, "loss": 0.2985, "step": 20410 }, { "epoch": 2.415424651052756, "grad_norm": 3.009213924407959, "learning_rate": 4.60529087862102e-06, "loss": 0.2716, "step": 20420 }, { "epoch": 2.4166075230660042, "grad_norm": 3.926177740097046, "learning_rate": 4.604572659803687e-06, "loss": 0.2428, "step": 20430 }, { "epoch": 2.4177903950792525, "grad_norm": 4.079202175140381, "learning_rate": 4.603854440986354e-06, "loss": 0.306, "step": 20440 }, { "epoch": 2.4189732670925004, "grad_norm": 2.471909523010254, "learning_rate": 4.603136222169021e-06, "loss": 0.2836, "step": 20450 }, { "epoch": 2.4201561391057487, "grad_norm": 2.6632494926452637, "learning_rate": 4.602418003351688e-06, "loss": 0.3114, "step": 20460 }, { "epoch": 2.421339011118997, "grad_norm": 2.557344675064087, "learning_rate": 4.6016997845343545e-06, "loss": 0.253, "step": 20470 }, { "epoch": 2.4225218831322453, "grad_norm": 3.323770046234131, "learning_rate": 4.600981565717022e-06, "loss": 0.3351, "step": 20480 }, { "epoch": 2.423704755145493, "grad_norm": 2.374248504638672, "learning_rate": 4.600263346899689e-06, "loss": 0.3271, "step": 20490 }, { "epoch": 2.4248876271587414, "grad_norm": 2.0508761405944824, "learning_rate": 4.599545128082356e-06, "loss": 0.2808, "step": 20500 }, { "epoch": 2.4260704991719897, "grad_norm": 2.739738702774048, "learning_rate": 4.598826909265023e-06, "loss": 0.2765, "step": 20510 }, { "epoch": 2.4272533711852375, "grad_norm": 1.862618327140808, "learning_rate": 4.59810869044769e-06, "loss": 0.3155, "step": 20520 }, { "epoch": 2.428436243198486, "grad_norm": 3.4794418811798096, "learning_rate": 4.597390471630357e-06, "loss": 0.3207, "step": 20530 }, { "epoch": 2.429619115211734, "grad_norm": 2.0952260494232178, "learning_rate": 4.596672252813024e-06, "loss": 0.2776, "step": 20540 }, { "epoch": 2.4308019872249824, "grad_norm": 2.8477554321289062, "learning_rate": 4.595954033995691e-06, "loss": 0.3107, "step": 20550 }, { "epoch": 2.4319848592382303, "grad_norm": 3.1266961097717285, "learning_rate": 4.595235815178358e-06, "loss": 0.2608, "step": 20560 }, { "epoch": 2.4331677312514786, "grad_norm": 2.8391010761260986, "learning_rate": 4.594517596361025e-06, "loss": 0.2696, "step": 20570 }, { "epoch": 2.434350603264727, "grad_norm": 1.6859503984451294, "learning_rate": 4.5937993775436915e-06, "loss": 0.3014, "step": 20580 }, { "epoch": 2.4355334752779747, "grad_norm": 2.543356418609619, "learning_rate": 4.5930811587263584e-06, "loss": 0.3163, "step": 20590 }, { "epoch": 2.436716347291223, "grad_norm": 2.9169318675994873, "learning_rate": 4.592362939909026e-06, "loss": 0.2907, "step": 20600 }, { "epoch": 2.4378992193044713, "grad_norm": 3.6094706058502197, "learning_rate": 4.591644721091692e-06, "loss": 0.3256, "step": 20610 }, { "epoch": 2.4390820913177196, "grad_norm": 3.3926591873168945, "learning_rate": 4.59092650227436e-06, "loss": 0.3021, "step": 20620 }, { "epoch": 2.4402649633309674, "grad_norm": 3.370145797729492, "learning_rate": 4.590208283457026e-06, "loss": 0.3536, "step": 20630 }, { "epoch": 2.4414478353442157, "grad_norm": 2.698267936706543, "learning_rate": 4.589490064639694e-06, "loss": 0.2693, "step": 20640 }, { "epoch": 2.442630707357464, "grad_norm": 2.918368339538574, "learning_rate": 4.588771845822361e-06, "loss": 0.301, "step": 20650 }, { "epoch": 2.443813579370712, "grad_norm": 3.023146867752075, "learning_rate": 4.588053627005028e-06, "loss": 0.3137, "step": 20660 }, { "epoch": 2.44499645138396, "grad_norm": 3.6508171558380127, "learning_rate": 4.587335408187695e-06, "loss": 0.3164, "step": 20670 }, { "epoch": 2.4461793233972084, "grad_norm": 4.299176216125488, "learning_rate": 4.586617189370362e-06, "loss": 0.2992, "step": 20680 }, { "epoch": 2.4473621954104567, "grad_norm": 3.4763195514678955, "learning_rate": 4.5858989705530285e-06, "loss": 0.3068, "step": 20690 }, { "epoch": 2.4485450674237046, "grad_norm": 4.034111022949219, "learning_rate": 4.5851807517356954e-06, "loss": 0.3909, "step": 20700 }, { "epoch": 2.449727939436953, "grad_norm": 2.4070658683776855, "learning_rate": 4.584462532918362e-06, "loss": 0.2944, "step": 20710 }, { "epoch": 2.450910811450201, "grad_norm": 3.4822850227355957, "learning_rate": 4.583744314101029e-06, "loss": 0.2817, "step": 20720 }, { "epoch": 2.452093683463449, "grad_norm": 2.9878408908843994, "learning_rate": 4.583026095283697e-06, "loss": 0.2915, "step": 20730 }, { "epoch": 2.4532765554766973, "grad_norm": 3.455247163772583, "learning_rate": 4.582307876466363e-06, "loss": 0.301, "step": 20740 }, { "epoch": 2.4544594274899456, "grad_norm": 2.6818161010742188, "learning_rate": 4.581589657649031e-06, "loss": 0.286, "step": 20750 }, { "epoch": 2.455642299503194, "grad_norm": 3.109696388244629, "learning_rate": 4.580871438831698e-06, "loss": 0.3193, "step": 20760 }, { "epoch": 2.4568251715164418, "grad_norm": 2.9084906578063965, "learning_rate": 4.580153220014365e-06, "loss": 0.3013, "step": 20770 }, { "epoch": 2.45800804352969, "grad_norm": 3.4805383682250977, "learning_rate": 4.579435001197032e-06, "loss": 0.2984, "step": 20780 }, { "epoch": 2.4591909155429383, "grad_norm": 2.6101019382476807, "learning_rate": 4.578716782379699e-06, "loss": 0.3048, "step": 20790 }, { "epoch": 2.460373787556186, "grad_norm": 2.232529640197754, "learning_rate": 4.5779985635623655e-06, "loss": 0.2833, "step": 20800 }, { "epoch": 2.4615566595694345, "grad_norm": 3.5824155807495117, "learning_rate": 4.5772803447450324e-06, "loss": 0.3059, "step": 20810 }, { "epoch": 2.4627395315826828, "grad_norm": 2.1113779544830322, "learning_rate": 4.576562125927699e-06, "loss": 0.3131, "step": 20820 }, { "epoch": 2.463922403595931, "grad_norm": 3.2497670650482178, "learning_rate": 4.575843907110366e-06, "loss": 0.3125, "step": 20830 }, { "epoch": 2.465105275609179, "grad_norm": 2.5704400539398193, "learning_rate": 4.575125688293033e-06, "loss": 0.3283, "step": 20840 }, { "epoch": 2.466288147622427, "grad_norm": 2.7244441509246826, "learning_rate": 4.5744074694757e-06, "loss": 0.3156, "step": 20850 }, { "epoch": 2.4674710196356755, "grad_norm": 2.3715591430664062, "learning_rate": 4.573689250658367e-06, "loss": 0.313, "step": 20860 }, { "epoch": 2.4686538916489233, "grad_norm": 4.268498420715332, "learning_rate": 4.572971031841035e-06, "loss": 0.3363, "step": 20870 }, { "epoch": 2.4698367636621716, "grad_norm": 3.0117766857147217, "learning_rate": 4.572252813023701e-06, "loss": 0.335, "step": 20880 }, { "epoch": 2.47101963567542, "grad_norm": 3.2900314331054688, "learning_rate": 4.571534594206369e-06, "loss": 0.3027, "step": 20890 }, { "epoch": 2.4722025076886682, "grad_norm": 2.577617883682251, "learning_rate": 4.570816375389035e-06, "loss": 0.2658, "step": 20900 }, { "epoch": 2.473385379701916, "grad_norm": 3.5049071311950684, "learning_rate": 4.5700981565717025e-06, "loss": 0.2969, "step": 20910 }, { "epoch": 2.4745682517151644, "grad_norm": 3.682277202606201, "learning_rate": 4.569379937754369e-06, "loss": 0.2825, "step": 20920 }, { "epoch": 2.4757511237284127, "grad_norm": 2.5858194828033447, "learning_rate": 4.568661718937036e-06, "loss": 0.332, "step": 20930 }, { "epoch": 2.4769339957416605, "grad_norm": 2.7255759239196777, "learning_rate": 4.567943500119703e-06, "loss": 0.3069, "step": 20940 }, { "epoch": 2.478116867754909, "grad_norm": 2.588320016860962, "learning_rate": 4.56722528130237e-06, "loss": 0.3146, "step": 20950 }, { "epoch": 2.479299739768157, "grad_norm": 3.010643482208252, "learning_rate": 4.566507062485037e-06, "loss": 0.2928, "step": 20960 }, { "epoch": 2.4804826117814054, "grad_norm": 3.469820499420166, "learning_rate": 4.565788843667704e-06, "loss": 0.2786, "step": 20970 }, { "epoch": 2.4816654837946532, "grad_norm": 2.7268991470336914, "learning_rate": 4.565070624850372e-06, "loss": 0.2685, "step": 20980 }, { "epoch": 2.4828483558079015, "grad_norm": 4.038430690765381, "learning_rate": 4.564352406033038e-06, "loss": 0.2477, "step": 20990 }, { "epoch": 2.48403122782115, "grad_norm": 2.6853692531585693, "learning_rate": 4.563634187215706e-06, "loss": 0.3062, "step": 21000 }, { "epoch": 2.4852140998343977, "grad_norm": 2.2105298042297363, "learning_rate": 4.562915968398372e-06, "loss": 0.3348, "step": 21010 }, { "epoch": 2.486396971847646, "grad_norm": 2.810724973678589, "learning_rate": 4.5621977495810395e-06, "loss": 0.2911, "step": 21020 }, { "epoch": 2.4875798438608943, "grad_norm": 4.207290172576904, "learning_rate": 4.5614795307637064e-06, "loss": 0.2766, "step": 21030 }, { "epoch": 2.4887627158741426, "grad_norm": 2.644639492034912, "learning_rate": 4.560761311946373e-06, "loss": 0.2967, "step": 21040 }, { "epoch": 2.4899455878873904, "grad_norm": 3.2864556312561035, "learning_rate": 4.56004309312904e-06, "loss": 0.3415, "step": 21050 }, { "epoch": 2.4911284599006387, "grad_norm": 3.14288330078125, "learning_rate": 4.559324874311707e-06, "loss": 0.2855, "step": 21060 }, { "epoch": 2.492311331913887, "grad_norm": 2.1075081825256348, "learning_rate": 4.558606655494374e-06, "loss": 0.2802, "step": 21070 }, { "epoch": 2.4934942039271353, "grad_norm": 3.4036238193511963, "learning_rate": 4.557888436677041e-06, "loss": 0.2902, "step": 21080 }, { "epoch": 2.494677075940383, "grad_norm": 2.650482654571533, "learning_rate": 4.557170217859708e-06, "loss": 0.2826, "step": 21090 }, { "epoch": 2.4958599479536314, "grad_norm": 3.0203163623809814, "learning_rate": 4.556451999042375e-06, "loss": 0.2931, "step": 21100 }, { "epoch": 2.4970428199668797, "grad_norm": 3.077556848526001, "learning_rate": 4.555733780225042e-06, "loss": 0.3441, "step": 21110 }, { "epoch": 2.4982256919801276, "grad_norm": 1.740944504737854, "learning_rate": 4.555015561407709e-06, "loss": 0.2761, "step": 21120 }, { "epoch": 2.499408563993376, "grad_norm": 3.0141775608062744, "learning_rate": 4.554297342590376e-06, "loss": 0.3299, "step": 21130 }, { "epoch": 2.500591436006624, "grad_norm": 3.199282646179199, "learning_rate": 4.5535791237730434e-06, "loss": 0.2752, "step": 21140 }, { "epoch": 2.500591436006624, "eval_accuracy": 0.8580031273912899, "eval_loss": 0.3318200707435608, "eval_runtime": 77.8696, "eval_safe_aucpr": 0.9148747208601448, "eval_safe_f1": 0.8377741457295982, "eval_safe_fpr": 0.11700696541209495, "eval_safe_precision": 0.8492005393951069, "eval_safe_recall": 0.8266511645351236, "eval_samples_per_second": 771.983, "eval_steps_per_second": 12.071, "eval_unsafe_aucpr": 0.95208993937996, "eval_unsafe_f1": 0.8737464872060346, "eval_unsafe_fpr": 0.17334883546487578, "eval_unsafe_precision": 0.8646915893322404, "eval_unsafe_recall": 0.8829930345879047, "step": 21140 }, { "epoch": 2.501774308019872, "grad_norm": 3.0372202396392822, "learning_rate": 4.5528609049557095e-06, "loss": 0.3057, "step": 21150 }, { "epoch": 2.5029571800331203, "grad_norm": 2.6270434856414795, "learning_rate": 4.552142686138377e-06, "loss": 0.2973, "step": 21160 }, { "epoch": 2.5041400520463686, "grad_norm": 3.112229108810425, "learning_rate": 4.551424467321043e-06, "loss": 0.3133, "step": 21170 }, { "epoch": 2.505322924059617, "grad_norm": 2.4659974575042725, "learning_rate": 4.550706248503711e-06, "loss": 0.3142, "step": 21180 }, { "epoch": 2.506505796072865, "grad_norm": 3.3613669872283936, "learning_rate": 4.549988029686377e-06, "loss": 0.3077, "step": 21190 }, { "epoch": 2.507688668086113, "grad_norm": 4.1130781173706055, "learning_rate": 4.549269810869045e-06, "loss": 0.3078, "step": 21200 }, { "epoch": 2.5088715400993613, "grad_norm": 2.4725160598754883, "learning_rate": 4.548551592051712e-06, "loss": 0.2686, "step": 21210 }, { "epoch": 2.510054412112609, "grad_norm": 4.3177103996276855, "learning_rate": 4.547833373234379e-06, "loss": 0.3091, "step": 21220 }, { "epoch": 2.5112372841258574, "grad_norm": 3.7701759338378906, "learning_rate": 4.547115154417047e-06, "loss": 0.3035, "step": 21230 }, { "epoch": 2.5124201561391057, "grad_norm": 2.6487650871276855, "learning_rate": 4.546396935599713e-06, "loss": 0.2616, "step": 21240 }, { "epoch": 2.513603028152354, "grad_norm": 2.873460292816162, "learning_rate": 4.5456787167823804e-06, "loss": 0.3269, "step": 21250 }, { "epoch": 2.5147859001656023, "grad_norm": 3.469243288040161, "learning_rate": 4.5449604979650465e-06, "loss": 0.3474, "step": 21260 }, { "epoch": 2.51596877217885, "grad_norm": 2.8477437496185303, "learning_rate": 4.544242279147714e-06, "loss": 0.3172, "step": 21270 }, { "epoch": 2.5171516441920985, "grad_norm": 3.201421022415161, "learning_rate": 4.54352406033038e-06, "loss": 0.3075, "step": 21280 }, { "epoch": 2.5183345162053463, "grad_norm": 2.6932663917541504, "learning_rate": 4.542805841513048e-06, "loss": 0.2892, "step": 21290 }, { "epoch": 2.5195173882185946, "grad_norm": 2.7699522972106934, "learning_rate": 4.542087622695715e-06, "loss": 0.3032, "step": 21300 }, { "epoch": 2.520700260231843, "grad_norm": 3.174916982650757, "learning_rate": 4.541369403878382e-06, "loss": 0.2635, "step": 21310 }, { "epoch": 2.521883132245091, "grad_norm": 2.373048782348633, "learning_rate": 4.540651185061049e-06, "loss": 0.2538, "step": 21320 }, { "epoch": 2.5230660042583395, "grad_norm": 2.9634103775024414, "learning_rate": 4.539932966243716e-06, "loss": 0.3142, "step": 21330 }, { "epoch": 2.5242488762715873, "grad_norm": 3.273416042327881, "learning_rate": 4.539214747426383e-06, "loss": 0.3097, "step": 21340 }, { "epoch": 2.5254317482848356, "grad_norm": 2.718534231185913, "learning_rate": 4.53849652860905e-06, "loss": 0.2889, "step": 21350 }, { "epoch": 2.5266146202980835, "grad_norm": 3.2437613010406494, "learning_rate": 4.537778309791717e-06, "loss": 0.3551, "step": 21360 }, { "epoch": 2.5277974923113318, "grad_norm": 3.0339436531066895, "learning_rate": 4.5370600909743835e-06, "loss": 0.292, "step": 21370 }, { "epoch": 2.52898036432458, "grad_norm": 2.994718074798584, "learning_rate": 4.5363418721570504e-06, "loss": 0.3065, "step": 21380 }, { "epoch": 2.5301632363378284, "grad_norm": 3.1579229831695557, "learning_rate": 4.535623653339717e-06, "loss": 0.2907, "step": 21390 }, { "epoch": 2.5313461083510767, "grad_norm": 3.506098985671997, "learning_rate": 4.534905434522384e-06, "loss": 0.2769, "step": 21400 }, { "epoch": 2.5325289803643245, "grad_norm": 3.2988998889923096, "learning_rate": 4.534187215705052e-06, "loss": 0.3311, "step": 21410 }, { "epoch": 2.533711852377573, "grad_norm": 3.10424542427063, "learning_rate": 4.533468996887718e-06, "loss": 0.3184, "step": 21420 }, { "epoch": 2.5348947243908206, "grad_norm": 2.427213430404663, "learning_rate": 4.532750778070386e-06, "loss": 0.2945, "step": 21430 }, { "epoch": 2.536077596404069, "grad_norm": 2.4065308570861816, "learning_rate": 4.532032559253052e-06, "loss": 0.3029, "step": 21440 }, { "epoch": 2.5372604684173172, "grad_norm": 2.4313242435455322, "learning_rate": 4.53131434043572e-06, "loss": 0.3207, "step": 21450 }, { "epoch": 2.5384433404305655, "grad_norm": 3.308527946472168, "learning_rate": 4.530596121618386e-06, "loss": 0.3129, "step": 21460 }, { "epoch": 2.539626212443814, "grad_norm": 3.2686848640441895, "learning_rate": 4.529877902801054e-06, "loss": 0.3103, "step": 21470 }, { "epoch": 2.5408090844570617, "grad_norm": 3.570476531982422, "learning_rate": 4.5291596839837205e-06, "loss": 0.3136, "step": 21480 }, { "epoch": 2.54199195647031, "grad_norm": 2.1805944442749023, "learning_rate": 4.5284414651663874e-06, "loss": 0.2934, "step": 21490 }, { "epoch": 2.543174828483558, "grad_norm": 2.063673496246338, "learning_rate": 4.527723246349054e-06, "loss": 0.2818, "step": 21500 }, { "epoch": 2.544357700496806, "grad_norm": 2.700390100479126, "learning_rate": 4.527005027531721e-06, "loss": 0.2917, "step": 21510 }, { "epoch": 2.5455405725100544, "grad_norm": 2.2416744232177734, "learning_rate": 4.526286808714389e-06, "loss": 0.2994, "step": 21520 }, { "epoch": 2.5467234445233027, "grad_norm": 3.3600687980651855, "learning_rate": 4.525568589897055e-06, "loss": 0.3007, "step": 21530 }, { "epoch": 2.547906316536551, "grad_norm": 3.249323844909668, "learning_rate": 4.524850371079723e-06, "loss": 0.3158, "step": 21540 }, { "epoch": 2.549089188549799, "grad_norm": 4.444723129272461, "learning_rate": 4.524132152262389e-06, "loss": 0.3219, "step": 21550 }, { "epoch": 2.550272060563047, "grad_norm": 4.1026434898376465, "learning_rate": 4.523413933445057e-06, "loss": 0.3285, "step": 21560 }, { "epoch": 2.5514549325762954, "grad_norm": 3.7367851734161377, "learning_rate": 4.522695714627724e-06, "loss": 0.2852, "step": 21570 }, { "epoch": 2.5526378045895433, "grad_norm": 3.6800010204315186, "learning_rate": 4.521977495810391e-06, "loss": 0.3259, "step": 21580 }, { "epoch": 2.5538206766027916, "grad_norm": 2.178433418273926, "learning_rate": 4.5212592769930575e-06, "loss": 0.311, "step": 21590 }, { "epoch": 2.55500354861604, "grad_norm": 2.957578420639038, "learning_rate": 4.5205410581757244e-06, "loss": 0.3059, "step": 21600 }, { "epoch": 2.556186420629288, "grad_norm": 2.5414247512817383, "learning_rate": 4.519822839358391e-06, "loss": 0.2645, "step": 21610 }, { "epoch": 2.557369292642536, "grad_norm": 3.454031467437744, "learning_rate": 4.519104620541058e-06, "loss": 0.3247, "step": 21620 }, { "epoch": 2.5585521646557843, "grad_norm": 2.417210817337036, "learning_rate": 4.518386401723725e-06, "loss": 0.266, "step": 21630 }, { "epoch": 2.5597350366690326, "grad_norm": 2.803642988204956, "learning_rate": 4.517668182906392e-06, "loss": 0.3107, "step": 21640 }, { "epoch": 2.5609179086822804, "grad_norm": 4.093634128570557, "learning_rate": 4.516949964089059e-06, "loss": 0.3397, "step": 21650 }, { "epoch": 2.5621007806955287, "grad_norm": 2.1972320079803467, "learning_rate": 4.516231745271726e-06, "loss": 0.3163, "step": 21660 }, { "epoch": 2.563283652708777, "grad_norm": 2.724701166152954, "learning_rate": 4.515513526454393e-06, "loss": 0.3146, "step": 21670 }, { "epoch": 2.5644665247220253, "grad_norm": 2.9636809825897217, "learning_rate": 4.514795307637061e-06, "loss": 0.2948, "step": 21680 }, { "epoch": 2.565649396735273, "grad_norm": 1.956942081451416, "learning_rate": 4.514077088819727e-06, "loss": 0.3168, "step": 21690 }, { "epoch": 2.5668322687485214, "grad_norm": 3.7892258167266846, "learning_rate": 4.5133588700023945e-06, "loss": 0.293, "step": 21700 }, { "epoch": 2.5680151407617697, "grad_norm": 3.2520649433135986, "learning_rate": 4.512640651185061e-06, "loss": 0.3285, "step": 21710 }, { "epoch": 2.5691980127750176, "grad_norm": 2.8439433574676514, "learning_rate": 4.511922432367728e-06, "loss": 0.2665, "step": 21720 }, { "epoch": 2.570380884788266, "grad_norm": 2.6865758895874023, "learning_rate": 4.5112042135503944e-06, "loss": 0.2898, "step": 21730 }, { "epoch": 2.571563756801514, "grad_norm": 3.938616991043091, "learning_rate": 4.510485994733062e-06, "loss": 0.3113, "step": 21740 }, { "epoch": 2.5727466288147625, "grad_norm": 3.2345218658447266, "learning_rate": 4.509767775915729e-06, "loss": 0.3455, "step": 21750 }, { "epoch": 2.5739295008280103, "grad_norm": 2.0615406036376953, "learning_rate": 4.509049557098396e-06, "loss": 0.2942, "step": 21760 }, { "epoch": 2.5751123728412586, "grad_norm": 2.9529225826263428, "learning_rate": 4.508331338281063e-06, "loss": 0.279, "step": 21770 }, { "epoch": 2.576295244854507, "grad_norm": 3.24729061126709, "learning_rate": 4.50761311946373e-06, "loss": 0.3042, "step": 21780 }, { "epoch": 2.5774781168677547, "grad_norm": 4.246401786804199, "learning_rate": 4.506894900646398e-06, "loss": 0.2974, "step": 21790 }, { "epoch": 2.578660988881003, "grad_norm": 2.7710227966308594, "learning_rate": 4.506176681829064e-06, "loss": 0.3063, "step": 21800 }, { "epoch": 2.5798438608942513, "grad_norm": 3.360891342163086, "learning_rate": 4.5054584630117315e-06, "loss": 0.3089, "step": 21810 }, { "epoch": 2.5810267329074996, "grad_norm": 3.049873113632202, "learning_rate": 4.504740244194398e-06, "loss": 0.2715, "step": 21820 }, { "epoch": 2.5822096049207475, "grad_norm": 2.9670944213867188, "learning_rate": 4.504022025377065e-06, "loss": 0.3164, "step": 21830 }, { "epoch": 2.5833924769339958, "grad_norm": 3.935147762298584, "learning_rate": 4.503303806559731e-06, "loss": 0.3048, "step": 21840 }, { "epoch": 2.584575348947244, "grad_norm": 2.235919713973999, "learning_rate": 4.502585587742399e-06, "loss": 0.2899, "step": 21850 }, { "epoch": 2.585758220960492, "grad_norm": 2.5635204315185547, "learning_rate": 4.501867368925066e-06, "loss": 0.3051, "step": 21860 }, { "epoch": 2.58694109297374, "grad_norm": 2.865705966949463, "learning_rate": 4.501149150107733e-06, "loss": 0.3228, "step": 21870 }, { "epoch": 2.5881239649869885, "grad_norm": 2.436107873916626, "learning_rate": 4.5004309312904e-06, "loss": 0.293, "step": 21880 }, { "epoch": 2.589306837000237, "grad_norm": 3.265331506729126, "learning_rate": 4.499712712473067e-06, "loss": 0.3014, "step": 21890 }, { "epoch": 2.5904897090134846, "grad_norm": 2.8012731075286865, "learning_rate": 4.498994493655734e-06, "loss": 0.3133, "step": 21900 }, { "epoch": 2.591672581026733, "grad_norm": 2.5874252319335938, "learning_rate": 4.498276274838401e-06, "loss": 0.3176, "step": 21910 }, { "epoch": 2.592855453039981, "grad_norm": 2.8500173091888428, "learning_rate": 4.497558056021068e-06, "loss": 0.2909, "step": 21920 }, { "epoch": 2.594038325053229, "grad_norm": 2.56345272064209, "learning_rate": 4.4968398372037346e-06, "loss": 0.3019, "step": 21930 }, { "epoch": 2.5952211970664774, "grad_norm": 3.251715660095215, "learning_rate": 4.4961216183864015e-06, "loss": 0.2899, "step": 21940 }, { "epoch": 2.5964040690797257, "grad_norm": 2.881084442138672, "learning_rate": 4.495403399569069e-06, "loss": 0.29, "step": 21950 }, { "epoch": 2.597586941092974, "grad_norm": 2.4817402362823486, "learning_rate": 4.494685180751735e-06, "loss": 0.3291, "step": 21960 }, { "epoch": 2.598769813106222, "grad_norm": 3.5438082218170166, "learning_rate": 4.493966961934403e-06, "loss": 0.2523, "step": 21970 }, { "epoch": 2.59995268511947, "grad_norm": 2.795470714569092, "learning_rate": 4.493248743117069e-06, "loss": 0.3352, "step": 21980 }, { "epoch": 2.6011355571327184, "grad_norm": 2.066418170928955, "learning_rate": 4.492530524299737e-06, "loss": 0.3098, "step": 21990 }, { "epoch": 2.6023184291459662, "grad_norm": 3.746610403060913, "learning_rate": 4.491812305482404e-06, "loss": 0.323, "step": 22000 }, { "epoch": 2.6035013011592145, "grad_norm": 3.1087605953216553, "learning_rate": 4.491094086665071e-06, "loss": 0.3441, "step": 22010 }, { "epoch": 2.604684173172463, "grad_norm": 2.8416616916656494, "learning_rate": 4.490375867847738e-06, "loss": 0.2848, "step": 22020 }, { "epoch": 2.605867045185711, "grad_norm": 2.4401543140411377, "learning_rate": 4.489657649030405e-06, "loss": 0.2817, "step": 22030 }, { "epoch": 2.607049917198959, "grad_norm": 2.596233367919922, "learning_rate": 4.4889394302130716e-06, "loss": 0.2915, "step": 22040 }, { "epoch": 2.6082327892122072, "grad_norm": 2.7294564247131348, "learning_rate": 4.4882212113957385e-06, "loss": 0.3363, "step": 22050 }, { "epoch": 2.6094156612254555, "grad_norm": 4.349059104919434, "learning_rate": 4.487502992578406e-06, "loss": 0.2844, "step": 22060 }, { "epoch": 2.6105985332387034, "grad_norm": 3.3832664489746094, "learning_rate": 4.486784773761072e-06, "loss": 0.3283, "step": 22070 }, { "epoch": 2.6117814052519517, "grad_norm": 3.112755537033081, "learning_rate": 4.48606655494374e-06, "loss": 0.3205, "step": 22080 }, { "epoch": 2.6129642772652, "grad_norm": 2.8589746952056885, "learning_rate": 4.485348336126406e-06, "loss": 0.2879, "step": 22090 }, { "epoch": 2.6141471492784483, "grad_norm": 2.2620599269866943, "learning_rate": 4.484630117309074e-06, "loss": 0.2856, "step": 22100 }, { "epoch": 2.615330021291696, "grad_norm": 3.2509114742279053, "learning_rate": 4.48391189849174e-06, "loss": 0.3067, "step": 22110 }, { "epoch": 2.6165128933049444, "grad_norm": 2.9454476833343506, "learning_rate": 4.483193679674408e-06, "loss": 0.3149, "step": 22120 }, { "epoch": 2.6176957653181927, "grad_norm": 3.732296943664551, "learning_rate": 4.482475460857075e-06, "loss": 0.3113, "step": 22130 }, { "epoch": 2.6188786373314406, "grad_norm": 2.731187343597412, "learning_rate": 4.481757242039742e-06, "loss": 0.3033, "step": 22140 }, { "epoch": 2.620061509344689, "grad_norm": 2.3635094165802, "learning_rate": 4.4810390232224086e-06, "loss": 0.2897, "step": 22150 }, { "epoch": 2.621244381357937, "grad_norm": 2.8644402027130127, "learning_rate": 4.4803208044050755e-06, "loss": 0.285, "step": 22160 }, { "epoch": 2.6224272533711854, "grad_norm": 3.13881254196167, "learning_rate": 4.479602585587742e-06, "loss": 0.3308, "step": 22170 }, { "epoch": 2.6236101253844333, "grad_norm": 2.368556022644043, "learning_rate": 4.478884366770409e-06, "loss": 0.2808, "step": 22180 }, { "epoch": 2.6247929973976816, "grad_norm": 3.4502034187316895, "learning_rate": 4.478166147953076e-06, "loss": 0.3235, "step": 22190 }, { "epoch": 2.62597586941093, "grad_norm": 2.3046300411224365, "learning_rate": 4.477447929135743e-06, "loss": 0.3126, "step": 22200 }, { "epoch": 2.6271587414241777, "grad_norm": 2.2939493656158447, "learning_rate": 4.47672971031841e-06, "loss": 0.3189, "step": 22210 }, { "epoch": 2.628341613437426, "grad_norm": 2.177417039871216, "learning_rate": 4.476011491501078e-06, "loss": 0.2869, "step": 22220 }, { "epoch": 2.6295244854506743, "grad_norm": 3.3147706985473633, "learning_rate": 4.475293272683744e-06, "loss": 0.2878, "step": 22230 }, { "epoch": 2.6307073574639226, "grad_norm": 3.703477382659912, "learning_rate": 4.474575053866412e-06, "loss": 0.3166, "step": 22240 }, { "epoch": 2.6318902294771704, "grad_norm": 2.5452001094818115, "learning_rate": 4.473856835049079e-06, "loss": 0.2937, "step": 22250 }, { "epoch": 2.6330731014904187, "grad_norm": 5.185401439666748, "learning_rate": 4.4731386162317456e-06, "loss": 0.354, "step": 22260 }, { "epoch": 2.634255973503667, "grad_norm": 4.242445945739746, "learning_rate": 4.4724203974144125e-06, "loss": 0.3036, "step": 22270 }, { "epoch": 2.635438845516915, "grad_norm": 2.7605392932891846, "learning_rate": 4.471702178597079e-06, "loss": 0.2871, "step": 22280 }, { "epoch": 2.636621717530163, "grad_norm": 2.5750670433044434, "learning_rate": 4.470983959779746e-06, "loss": 0.3005, "step": 22290 }, { "epoch": 2.6378045895434115, "grad_norm": 2.137120008468628, "learning_rate": 4.470265740962413e-06, "loss": 0.2761, "step": 22300 }, { "epoch": 2.6389874615566598, "grad_norm": 2.8040382862091064, "learning_rate": 4.46954752214508e-06, "loss": 0.3107, "step": 22310 }, { "epoch": 2.6401703335699076, "grad_norm": 2.520559787750244, "learning_rate": 4.468829303327747e-06, "loss": 0.3228, "step": 22320 }, { "epoch": 2.641353205583156, "grad_norm": 2.437270402908325, "learning_rate": 4.468111084510415e-06, "loss": 0.3162, "step": 22330 }, { "epoch": 2.642536077596404, "grad_norm": 2.3186631202697754, "learning_rate": 4.467392865693081e-06, "loss": 0.2796, "step": 22340 }, { "epoch": 2.643718949609652, "grad_norm": 3.4087679386138916, "learning_rate": 4.466674646875749e-06, "loss": 0.3037, "step": 22350 }, { "epoch": 2.6449018216229003, "grad_norm": 2.9389445781707764, "learning_rate": 4.465956428058415e-06, "loss": 0.3005, "step": 22360 }, { "epoch": 2.6460846936361486, "grad_norm": 2.935872793197632, "learning_rate": 4.4652382092410826e-06, "loss": 0.3152, "step": 22370 }, { "epoch": 2.647267565649397, "grad_norm": 3.1000871658325195, "learning_rate": 4.464519990423749e-06, "loss": 0.3329, "step": 22380 }, { "epoch": 2.6484504376626448, "grad_norm": 2.442791700363159, "learning_rate": 4.463801771606416e-06, "loss": 0.3109, "step": 22390 }, { "epoch": 2.649633309675893, "grad_norm": 2.769963026046753, "learning_rate": 4.463083552789083e-06, "loss": 0.3012, "step": 22400 }, { "epoch": 2.6508161816891413, "grad_norm": 2.4669077396392822, "learning_rate": 4.46236533397175e-06, "loss": 0.3252, "step": 22410 }, { "epoch": 2.651999053702389, "grad_norm": 3.6878228187561035, "learning_rate": 4.461647115154417e-06, "loss": 0.3205, "step": 22420 }, { "epoch": 2.6531819257156375, "grad_norm": 2.7062792778015137, "learning_rate": 4.460928896337084e-06, "loss": 0.3093, "step": 22430 }, { "epoch": 2.654364797728886, "grad_norm": 3.205240249633789, "learning_rate": 4.460210677519751e-06, "loss": 0.3015, "step": 22440 }, { "epoch": 2.655547669742134, "grad_norm": 3.016710042953491, "learning_rate": 4.459492458702418e-06, "loss": 0.3214, "step": 22450 }, { "epoch": 2.656730541755382, "grad_norm": 3.4308531284332275, "learning_rate": 4.458774239885085e-06, "loss": 0.2982, "step": 22460 }, { "epoch": 2.65791341376863, "grad_norm": 5.198909759521484, "learning_rate": 4.458056021067752e-06, "loss": 0.3001, "step": 22470 }, { "epoch": 2.6590962857818785, "grad_norm": 3.1551146507263184, "learning_rate": 4.457337802250419e-06, "loss": 0.2969, "step": 22480 }, { "epoch": 2.6602791577951264, "grad_norm": 3.4289937019348145, "learning_rate": 4.4566195834330865e-06, "loss": 0.3356, "step": 22490 }, { "epoch": 2.6614620298083747, "grad_norm": 2.881150007247925, "learning_rate": 4.455901364615753e-06, "loss": 0.3053, "step": 22500 }, { "epoch": 2.662644901821623, "grad_norm": 2.57087779045105, "learning_rate": 4.45518314579842e-06, "loss": 0.2908, "step": 22510 }, { "epoch": 2.6638277738348712, "grad_norm": 3.0298633575439453, "learning_rate": 4.454464926981087e-06, "loss": 0.3385, "step": 22520 }, { "epoch": 2.665010645848119, "grad_norm": 3.2643322944641113, "learning_rate": 4.453746708163754e-06, "loss": 0.3455, "step": 22530 }, { "epoch": 2.6661935178613674, "grad_norm": 3.6884989738464355, "learning_rate": 4.453028489346421e-06, "loss": 0.2844, "step": 22540 }, { "epoch": 2.6673763898746157, "grad_norm": 2.1394948959350586, "learning_rate": 4.452310270529088e-06, "loss": 0.2886, "step": 22550 }, { "epoch": 2.6685592618878635, "grad_norm": 2.022263765335083, "learning_rate": 4.451592051711755e-06, "loss": 0.3082, "step": 22560 }, { "epoch": 2.669742133901112, "grad_norm": 2.3887062072753906, "learning_rate": 4.450873832894422e-06, "loss": 0.3268, "step": 22570 }, { "epoch": 2.67092500591436, "grad_norm": 3.5192759037017822, "learning_rate": 4.450155614077089e-06, "loss": 0.3252, "step": 22580 }, { "epoch": 2.6721078779276084, "grad_norm": 3.427703380584717, "learning_rate": 4.449437395259756e-06, "loss": 0.3138, "step": 22590 }, { "epoch": 2.6732907499408562, "grad_norm": 2.6806910037994385, "learning_rate": 4.4487191764424235e-06, "loss": 0.3115, "step": 22600 }, { "epoch": 2.6744736219541045, "grad_norm": 3.288033962249756, "learning_rate": 4.4480009576250896e-06, "loss": 0.2962, "step": 22610 }, { "epoch": 2.675656493967353, "grad_norm": 3.175037145614624, "learning_rate": 4.447282738807757e-06, "loss": 0.2638, "step": 22620 }, { "epoch": 2.6768393659806007, "grad_norm": 3.9674432277679443, "learning_rate": 4.446564519990423e-06, "loss": 0.3351, "step": 22630 }, { "epoch": 2.678022237993849, "grad_norm": 2.132941484451294, "learning_rate": 4.445846301173091e-06, "loss": 0.264, "step": 22640 }, { "epoch": 2.6792051100070973, "grad_norm": 2.6347246170043945, "learning_rate": 4.445128082355757e-06, "loss": 0.2353, "step": 22650 }, { "epoch": 2.6803879820203456, "grad_norm": 3.1588587760925293, "learning_rate": 4.444409863538425e-06, "loss": 0.2735, "step": 22660 }, { "epoch": 2.6815708540335934, "grad_norm": 2.4475293159484863, "learning_rate": 4.443691644721092e-06, "loss": 0.2938, "step": 22670 }, { "epoch": 2.6827537260468417, "grad_norm": 2.680957317352295, "learning_rate": 4.442973425903759e-06, "loss": 0.3135, "step": 22680 }, { "epoch": 2.68393659806009, "grad_norm": 2.5454654693603516, "learning_rate": 4.442255207086426e-06, "loss": 0.2888, "step": 22690 }, { "epoch": 2.685119470073338, "grad_norm": 2.876617193222046, "learning_rate": 4.441536988269093e-06, "loss": 0.3378, "step": 22700 }, { "epoch": 2.686302342086586, "grad_norm": 2.1972174644470215, "learning_rate": 4.44081876945176e-06, "loss": 0.2835, "step": 22710 }, { "epoch": 2.6874852140998344, "grad_norm": 2.8111729621887207, "learning_rate": 4.4401005506344266e-06, "loss": 0.3071, "step": 22720 }, { "epoch": 2.6886680861130827, "grad_norm": 2.6271204948425293, "learning_rate": 4.4393823318170935e-06, "loss": 0.3073, "step": 22730 }, { "epoch": 2.6898509581263306, "grad_norm": 2.106633186340332, "learning_rate": 4.43866411299976e-06, "loss": 0.2769, "step": 22740 }, { "epoch": 2.691033830139579, "grad_norm": 1.852852463722229, "learning_rate": 4.437945894182428e-06, "loss": 0.36, "step": 22750 }, { "epoch": 2.692216702152827, "grad_norm": 3.6838326454162598, "learning_rate": 4.437227675365094e-06, "loss": 0.2906, "step": 22760 }, { "epoch": 2.693399574166075, "grad_norm": 3.3854281902313232, "learning_rate": 4.436509456547762e-06, "loss": 0.3291, "step": 22770 }, { "epoch": 2.6945824461793233, "grad_norm": 1.9606446027755737, "learning_rate": 4.435791237730429e-06, "loss": 0.3019, "step": 22780 }, { "epoch": 2.6957653181925716, "grad_norm": 2.7873902320861816, "learning_rate": 4.435073018913096e-06, "loss": 0.2771, "step": 22790 }, { "epoch": 2.69694819020582, "grad_norm": 2.504549026489258, "learning_rate": 4.434354800095763e-06, "loss": 0.3005, "step": 22800 }, { "epoch": 2.6981310622190677, "grad_norm": 3.122368812561035, "learning_rate": 4.43363658127843e-06, "loss": 0.3112, "step": 22810 }, { "epoch": 2.699313934232316, "grad_norm": 2.7788901329040527, "learning_rate": 4.432918362461097e-06, "loss": 0.3093, "step": 22820 }, { "epoch": 2.7004968062455643, "grad_norm": 2.8516581058502197, "learning_rate": 4.4322001436437636e-06, "loss": 0.2746, "step": 22830 }, { "epoch": 2.701679678258812, "grad_norm": 2.4541964530944824, "learning_rate": 4.4314819248264305e-06, "loss": 0.3131, "step": 22840 }, { "epoch": 2.7028625502720605, "grad_norm": 3.6100246906280518, "learning_rate": 4.430763706009097e-06, "loss": 0.3273, "step": 22850 }, { "epoch": 2.7040454222853088, "grad_norm": 3.8532838821411133, "learning_rate": 4.430045487191764e-06, "loss": 0.3644, "step": 22860 }, { "epoch": 2.705228294298557, "grad_norm": 1.9744536876678467, "learning_rate": 4.429327268374432e-06, "loss": 0.296, "step": 22870 }, { "epoch": 2.7064111663118053, "grad_norm": 2.1615397930145264, "learning_rate": 4.428609049557098e-06, "loss": 0.289, "step": 22880 }, { "epoch": 2.707594038325053, "grad_norm": 3.225660800933838, "learning_rate": 4.427890830739766e-06, "loss": 0.322, "step": 22890 }, { "epoch": 2.7087769103383015, "grad_norm": 2.590725898742676, "learning_rate": 4.427172611922432e-06, "loss": 0.2718, "step": 22900 }, { "epoch": 2.7099597823515493, "grad_norm": 2.792945384979248, "learning_rate": 4.4264543931051e-06, "loss": 0.325, "step": 22910 }, { "epoch": 2.7111426543647976, "grad_norm": 2.9607298374176025, "learning_rate": 4.425736174287766e-06, "loss": 0.342, "step": 22920 }, { "epoch": 2.712325526378046, "grad_norm": 3.1209685802459717, "learning_rate": 4.425017955470434e-06, "loss": 0.3042, "step": 22930 }, { "epoch": 2.713508398391294, "grad_norm": 1.8263630867004395, "learning_rate": 4.4242997366531006e-06, "loss": 0.2865, "step": 22940 }, { "epoch": 2.7146912704045425, "grad_norm": 3.2093710899353027, "learning_rate": 4.4235815178357675e-06, "loss": 0.3028, "step": 22950 }, { "epoch": 2.7158741424177903, "grad_norm": 2.9916210174560547, "learning_rate": 4.422863299018434e-06, "loss": 0.2778, "step": 22960 }, { "epoch": 2.7170570144310386, "grad_norm": 3.09580135345459, "learning_rate": 4.422145080201101e-06, "loss": 0.325, "step": 22970 }, { "epoch": 2.7182398864442865, "grad_norm": 3.132143259048462, "learning_rate": 4.421426861383768e-06, "loss": 0.3074, "step": 22980 }, { "epoch": 2.719422758457535, "grad_norm": 2.373967170715332, "learning_rate": 4.420708642566435e-06, "loss": 0.2816, "step": 22990 }, { "epoch": 2.720605630470783, "grad_norm": 2.1527655124664307, "learning_rate": 4.419990423749103e-06, "loss": 0.2915, "step": 23000 }, { "epoch": 2.7217885024840314, "grad_norm": 2.5220601558685303, "learning_rate": 4.419272204931769e-06, "loss": 0.2757, "step": 23010 }, { "epoch": 2.7229713744972797, "grad_norm": 3.4006309509277344, "learning_rate": 4.418553986114437e-06, "loss": 0.3054, "step": 23020 }, { "epoch": 2.7241542465105275, "grad_norm": 2.8180737495422363, "learning_rate": 4.417835767297103e-06, "loss": 0.2854, "step": 23030 }, { "epoch": 2.725337118523776, "grad_norm": 3.7637462615966797, "learning_rate": 4.417117548479771e-06, "loss": 0.3404, "step": 23040 }, { "epoch": 2.7265199905370237, "grad_norm": 2.473134756088257, "learning_rate": 4.4163993296624376e-06, "loss": 0.3056, "step": 23050 }, { "epoch": 2.727702862550272, "grad_norm": 3.0845396518707275, "learning_rate": 4.4156811108451045e-06, "loss": 0.2885, "step": 23060 }, { "epoch": 2.7288857345635202, "grad_norm": 2.06636118888855, "learning_rate": 4.414962892027771e-06, "loss": 0.3223, "step": 23070 }, { "epoch": 2.7300686065767685, "grad_norm": 2.638631582260132, "learning_rate": 4.414244673210438e-06, "loss": 0.3525, "step": 23080 }, { "epoch": 2.731251478590017, "grad_norm": 2.579003095626831, "learning_rate": 4.413526454393105e-06, "loss": 0.316, "step": 23090 }, { "epoch": 2.7324343506032647, "grad_norm": 3.0096957683563232, "learning_rate": 4.412808235575772e-06, "loss": 0.3101, "step": 23100 }, { "epoch": 2.733617222616513, "grad_norm": 2.992541790008545, "learning_rate": 4.412090016758439e-06, "loss": 0.2963, "step": 23110 }, { "epoch": 2.734800094629761, "grad_norm": 4.029899597167969, "learning_rate": 4.411371797941106e-06, "loss": 0.3535, "step": 23120 }, { "epoch": 2.735982966643009, "grad_norm": 2.9068877696990967, "learning_rate": 4.410653579123773e-06, "loss": 0.2761, "step": 23130 }, { "epoch": 2.7371658386562574, "grad_norm": 2.3513588905334473, "learning_rate": 4.409935360306441e-06, "loss": 0.3317, "step": 23140 }, { "epoch": 2.7383487106695057, "grad_norm": 2.666271448135376, "learning_rate": 4.409217141489107e-06, "loss": 0.2716, "step": 23150 }, { "epoch": 2.739531582682754, "grad_norm": 2.6380770206451416, "learning_rate": 4.4084989226717745e-06, "loss": 0.3471, "step": 23160 }, { "epoch": 2.740714454696002, "grad_norm": 2.5514445304870605, "learning_rate": 4.407780703854441e-06, "loss": 0.2789, "step": 23170 }, { "epoch": 2.74189732670925, "grad_norm": 2.63517165184021, "learning_rate": 4.407062485037108e-06, "loss": 0.3273, "step": 23180 }, { "epoch": 2.743080198722498, "grad_norm": 2.637009620666504, "learning_rate": 4.4063442662197745e-06, "loss": 0.2867, "step": 23190 }, { "epoch": 2.7442630707357463, "grad_norm": 2.422926187515259, "learning_rate": 4.405626047402442e-06, "loss": 0.3315, "step": 23200 }, { "epoch": 2.7454459427489946, "grad_norm": 3.1612865924835205, "learning_rate": 4.404907828585109e-06, "loss": 0.3239, "step": 23210 }, { "epoch": 2.746628814762243, "grad_norm": 4.27061653137207, "learning_rate": 4.404189609767776e-06, "loss": 0.3408, "step": 23220 }, { "epoch": 2.747811686775491, "grad_norm": 2.0837695598602295, "learning_rate": 4.403471390950443e-06, "loss": 0.2802, "step": 23230 }, { "epoch": 2.748994558788739, "grad_norm": 2.7004873752593994, "learning_rate": 4.40275317213311e-06, "loss": 0.3214, "step": 23240 }, { "epoch": 2.7501774308019873, "grad_norm": 2.703378438949585, "learning_rate": 4.402034953315777e-06, "loss": 0.2938, "step": 23250 }, { "epoch": 2.7506505796072864, "eval_accuracy": 0.8600159696576505, "eval_loss": 0.3236427307128906, "eval_runtime": 78.0091, "eval_safe_aucpr": 0.915810949681183, "eval_safe_f1": 0.8426426314116349, "eval_safe_fpr": 0.12803802576903492, "eval_safe_precision": 0.8402700082046692, "eval_safe_recall": 0.8450286914450736, "eval_samples_per_second": 770.602, "eval_steps_per_second": 12.05, "eval_unsafe_aucpr": 0.9529369806974803, "eval_unsafe_f1": 0.8739344728917919, "eval_unsafe_fpr": 0.15497130855492572, "eval_unsafe_precision": 0.8759159159159159, "eval_unsafe_recall": 0.8719619742309647, "step": 23254 }, { "epoch": 2.751360302815235, "grad_norm": 3.1155622005462646, "learning_rate": 4.401316734498444e-06, "loss": 0.2745, "step": 23260 }, { "epoch": 2.7525431748284834, "grad_norm": 2.831904888153076, "learning_rate": 4.4005985156811115e-06, "loss": 0.318, "step": 23270 }, { "epoch": 2.7537260468417317, "grad_norm": 3.1426374912261963, "learning_rate": 4.399880296863778e-06, "loss": 0.3276, "step": 23280 }, { "epoch": 2.75490891885498, "grad_norm": 2.297433614730835, "learning_rate": 4.399162078046445e-06, "loss": 0.3027, "step": 23290 }, { "epoch": 2.7560917908682283, "grad_norm": 2.904245615005493, "learning_rate": 4.3984438592291115e-06, "loss": 0.3362, "step": 23300 }, { "epoch": 2.757274662881476, "grad_norm": 2.9260730743408203, "learning_rate": 4.397725640411779e-06, "loss": 0.2942, "step": 23310 }, { "epoch": 2.7584575348947244, "grad_norm": 2.8044800758361816, "learning_rate": 4.397007421594446e-06, "loss": 0.319, "step": 23320 }, { "epoch": 2.7596404069079723, "grad_norm": 3.2318618297576904, "learning_rate": 4.396289202777113e-06, "loss": 0.3323, "step": 23330 }, { "epoch": 2.7608232789212206, "grad_norm": 2.8827571868896484, "learning_rate": 4.39557098395978e-06, "loss": 0.3319, "step": 23340 }, { "epoch": 2.762006150934469, "grad_norm": 2.591367721557617, "learning_rate": 4.394852765142447e-06, "loss": 0.3233, "step": 23350 }, { "epoch": 2.763189022947717, "grad_norm": 2.1080996990203857, "learning_rate": 4.394134546325114e-06, "loss": 0.3121, "step": 23360 }, { "epoch": 2.7643718949609655, "grad_norm": 2.24174427986145, "learning_rate": 4.393416327507781e-06, "loss": 0.2933, "step": 23370 }, { "epoch": 2.7655547669742133, "grad_norm": 2.059626579284668, "learning_rate": 4.392698108690448e-06, "loss": 0.288, "step": 23380 }, { "epoch": 2.7667376389874616, "grad_norm": 3.090696334838867, "learning_rate": 4.391979889873115e-06, "loss": 0.3132, "step": 23390 }, { "epoch": 2.7679205110007095, "grad_norm": 2.660612106323242, "learning_rate": 4.3912616710557815e-06, "loss": 0.3008, "step": 23400 }, { "epoch": 2.7691033830139578, "grad_norm": 2.2376856803894043, "learning_rate": 4.3905434522384485e-06, "loss": 0.3195, "step": 23410 }, { "epoch": 2.770286255027206, "grad_norm": 2.5992982387542725, "learning_rate": 4.389825233421115e-06, "loss": 0.3193, "step": 23420 }, { "epoch": 2.7714691270404543, "grad_norm": 2.2484004497528076, "learning_rate": 4.389107014603783e-06, "loss": 0.2877, "step": 23430 }, { "epoch": 2.7726519990537026, "grad_norm": 2.537644386291504, "learning_rate": 4.388388795786449e-06, "loss": 0.3035, "step": 23440 }, { "epoch": 2.7738348710669505, "grad_norm": 2.8209824562072754, "learning_rate": 4.387670576969117e-06, "loss": 0.3389, "step": 23450 }, { "epoch": 2.7750177430801988, "grad_norm": 2.1926157474517822, "learning_rate": 4.386952358151783e-06, "loss": 0.2674, "step": 23460 }, { "epoch": 2.7762006150934466, "grad_norm": 3.265162706375122, "learning_rate": 4.386234139334451e-06, "loss": 0.3192, "step": 23470 }, { "epoch": 2.777383487106695, "grad_norm": 5.2143707275390625, "learning_rate": 4.385515920517118e-06, "loss": 0.2838, "step": 23480 }, { "epoch": 2.778566359119943, "grad_norm": 2.268308162689209, "learning_rate": 4.384797701699785e-06, "loss": 0.3102, "step": 23490 }, { "epoch": 2.7797492311331915, "grad_norm": 1.8075358867645264, "learning_rate": 4.384079482882452e-06, "loss": 0.3567, "step": 23500 }, { "epoch": 2.78093210314644, "grad_norm": 1.7917972803115845, "learning_rate": 4.3833612640651185e-06, "loss": 0.3117, "step": 23510 }, { "epoch": 2.7821149751596876, "grad_norm": 2.8360867500305176, "learning_rate": 4.382643045247786e-06, "loss": 0.2782, "step": 23520 }, { "epoch": 2.783297847172936, "grad_norm": 4.134777545928955, "learning_rate": 4.381924826430452e-06, "loss": 0.3221, "step": 23530 }, { "epoch": 2.784480719186184, "grad_norm": 3.0642571449279785, "learning_rate": 4.38120660761312e-06, "loss": 0.3283, "step": 23540 }, { "epoch": 2.785663591199432, "grad_norm": 3.0967137813568115, "learning_rate": 4.380488388795786e-06, "loss": 0.3269, "step": 23550 }, { "epoch": 2.7868464632126804, "grad_norm": 3.2665817737579346, "learning_rate": 4.379770169978454e-06, "loss": 0.3101, "step": 23560 }, { "epoch": 2.7880293352259287, "grad_norm": 3.0433547496795654, "learning_rate": 4.37905195116112e-06, "loss": 0.3173, "step": 23570 }, { "epoch": 2.789212207239177, "grad_norm": 2.697173595428467, "learning_rate": 4.378333732343788e-06, "loss": 0.3025, "step": 23580 }, { "epoch": 2.790395079252425, "grad_norm": 2.210649013519287, "learning_rate": 4.377615513526455e-06, "loss": 0.3011, "step": 23590 }, { "epoch": 2.791577951265673, "grad_norm": 3.6849350929260254, "learning_rate": 4.376897294709122e-06, "loss": 0.2838, "step": 23600 }, { "epoch": 2.792760823278921, "grad_norm": 3.105637788772583, "learning_rate": 4.376179075891789e-06, "loss": 0.3174, "step": 23610 }, { "epoch": 2.7939436952921692, "grad_norm": 2.7724554538726807, "learning_rate": 4.3754608570744555e-06, "loss": 0.355, "step": 23620 }, { "epoch": 2.7951265673054175, "grad_norm": 2.5946154594421387, "learning_rate": 4.3747426382571225e-06, "loss": 0.2961, "step": 23630 }, { "epoch": 2.796309439318666, "grad_norm": 2.994760274887085, "learning_rate": 4.374024419439789e-06, "loss": 0.281, "step": 23640 }, { "epoch": 2.797492311331914, "grad_norm": 2.9595296382904053, "learning_rate": 4.373306200622456e-06, "loss": 0.3369, "step": 23650 }, { "epoch": 2.798675183345162, "grad_norm": 3.436298131942749, "learning_rate": 4.372587981805123e-06, "loss": 0.2829, "step": 23660 }, { "epoch": 2.7998580553584103, "grad_norm": 3.4422378540039062, "learning_rate": 4.37186976298779e-06, "loss": 0.3076, "step": 23670 }, { "epoch": 2.8010409273716586, "grad_norm": 2.474370002746582, "learning_rate": 4.371151544170457e-06, "loss": 0.3513, "step": 23680 }, { "epoch": 2.8022237993849064, "grad_norm": 2.483954906463623, "learning_rate": 4.370433325353124e-06, "loss": 0.2953, "step": 23690 }, { "epoch": 2.8034066713981547, "grad_norm": 2.6980764865875244, "learning_rate": 4.369715106535792e-06, "loss": 0.3324, "step": 23700 }, { "epoch": 2.804589543411403, "grad_norm": 3.0941340923309326, "learning_rate": 4.368996887718458e-06, "loss": 0.3381, "step": 23710 }, { "epoch": 2.8057724154246513, "grad_norm": 3.7191810607910156, "learning_rate": 4.368278668901126e-06, "loss": 0.3103, "step": 23720 }, { "epoch": 2.806955287437899, "grad_norm": 2.45870041847229, "learning_rate": 4.367560450083792e-06, "loss": 0.2724, "step": 23730 }, { "epoch": 2.8081381594511474, "grad_norm": 2.5216989517211914, "learning_rate": 4.3668422312664595e-06, "loss": 0.2965, "step": 23740 }, { "epoch": 2.8093210314643957, "grad_norm": 3.067351818084717, "learning_rate": 4.3661240124491255e-06, "loss": 0.3239, "step": 23750 }, { "epoch": 2.8105039034776436, "grad_norm": 2.456937551498413, "learning_rate": 4.365405793631793e-06, "loss": 0.2802, "step": 23760 }, { "epoch": 2.811686775490892, "grad_norm": 2.1812660694122314, "learning_rate": 4.36468757481446e-06, "loss": 0.2657, "step": 23770 }, { "epoch": 2.81286964750414, "grad_norm": 2.795524835586548, "learning_rate": 4.363969355997127e-06, "loss": 0.2958, "step": 23780 }, { "epoch": 2.8140525195173884, "grad_norm": 3.1462368965148926, "learning_rate": 4.363251137179795e-06, "loss": 0.3151, "step": 23790 }, { "epoch": 2.8152353915306363, "grad_norm": 3.474961996078491, "learning_rate": 4.362532918362461e-06, "loss": 0.3123, "step": 23800 }, { "epoch": 2.8164182635438846, "grad_norm": 2.382246732711792, "learning_rate": 4.361814699545129e-06, "loss": 0.303, "step": 23810 }, { "epoch": 2.817601135557133, "grad_norm": 3.0838286876678467, "learning_rate": 4.361096480727795e-06, "loss": 0.3176, "step": 23820 }, { "epoch": 2.8187840075703807, "grad_norm": 3.0229458808898926, "learning_rate": 4.360378261910463e-06, "loss": 0.2883, "step": 23830 }, { "epoch": 2.819966879583629, "grad_norm": 2.019643783569336, "learning_rate": 4.359660043093129e-06, "loss": 0.2997, "step": 23840 }, { "epoch": 2.8211497515968773, "grad_norm": 2.7812955379486084, "learning_rate": 4.3589418242757965e-06, "loss": 0.2914, "step": 23850 }, { "epoch": 2.8223326236101256, "grad_norm": 2.832305908203125, "learning_rate": 4.358223605458463e-06, "loss": 0.2701, "step": 23860 }, { "epoch": 2.8235154956233735, "grad_norm": 2.620081663131714, "learning_rate": 4.35750538664113e-06, "loss": 0.2716, "step": 23870 }, { "epoch": 2.8246983676366217, "grad_norm": 3.746365547180176, "learning_rate": 4.356787167823797e-06, "loss": 0.3167, "step": 23880 }, { "epoch": 2.82588123964987, "grad_norm": 3.218179941177368, "learning_rate": 4.356068949006464e-06, "loss": 0.2752, "step": 23890 }, { "epoch": 2.827064111663118, "grad_norm": 2.8903679847717285, "learning_rate": 4.355350730189131e-06, "loss": 0.3341, "step": 23900 }, { "epoch": 2.828246983676366, "grad_norm": 3.1378731727600098, "learning_rate": 4.354632511371798e-06, "loss": 0.3288, "step": 23910 }, { "epoch": 2.8294298556896145, "grad_norm": 3.520890951156616, "learning_rate": 4.353914292554465e-06, "loss": 0.3031, "step": 23920 }, { "epoch": 2.8306127277028628, "grad_norm": 3.1894991397857666, "learning_rate": 4.353196073737132e-06, "loss": 0.2866, "step": 23930 }, { "epoch": 2.8317955997161106, "grad_norm": 3.105095863342285, "learning_rate": 4.352477854919799e-06, "loss": 0.3252, "step": 23940 }, { "epoch": 2.832978471729359, "grad_norm": 3.4214463233947754, "learning_rate": 4.351759636102466e-06, "loss": 0.3235, "step": 23950 }, { "epoch": 2.834161343742607, "grad_norm": 2.883387327194214, "learning_rate": 4.351041417285133e-06, "loss": 0.2861, "step": 23960 }, { "epoch": 2.835344215755855, "grad_norm": 3.905980348587036, "learning_rate": 4.3503231984678e-06, "loss": 0.3208, "step": 23970 }, { "epoch": 2.8365270877691033, "grad_norm": 2.9791083335876465, "learning_rate": 4.3496049796504665e-06, "loss": 0.3083, "step": 23980 }, { "epoch": 2.8377099597823516, "grad_norm": 2.9695980548858643, "learning_rate": 4.348886760833134e-06, "loss": 0.2938, "step": 23990 }, { "epoch": 2.8388928317956, "grad_norm": 2.961609125137329, "learning_rate": 4.3481685420158e-06, "loss": 0.2956, "step": 24000 }, { "epoch": 2.8400757038088478, "grad_norm": 4.365261554718018, "learning_rate": 4.347450323198468e-06, "loss": 0.3352, "step": 24010 }, { "epoch": 2.841258575822096, "grad_norm": 2.174947738647461, "learning_rate": 4.346732104381135e-06, "loss": 0.3087, "step": 24020 }, { "epoch": 2.8424414478353444, "grad_norm": 2.263009548187256, "learning_rate": 4.346013885563802e-06, "loss": 0.3029, "step": 24030 }, { "epoch": 2.843624319848592, "grad_norm": 2.728564739227295, "learning_rate": 4.345295666746469e-06, "loss": 0.2667, "step": 24040 }, { "epoch": 2.8448071918618405, "grad_norm": 2.0310652256011963, "learning_rate": 4.344577447929136e-06, "loss": 0.2938, "step": 24050 }, { "epoch": 2.845990063875089, "grad_norm": 3.1241891384124756, "learning_rate": 4.3438592291118035e-06, "loss": 0.302, "step": 24060 }, { "epoch": 2.847172935888337, "grad_norm": 1.7154382467269897, "learning_rate": 4.34314101029447e-06, "loss": 0.3165, "step": 24070 }, { "epoch": 2.848355807901585, "grad_norm": 2.4480531215667725, "learning_rate": 4.342422791477137e-06, "loss": 0.3132, "step": 24080 }, { "epoch": 2.8495386799148332, "grad_norm": 2.5914530754089355, "learning_rate": 4.3417045726598034e-06, "loss": 0.3099, "step": 24090 }, { "epoch": 2.8507215519280815, "grad_norm": 2.5049376487731934, "learning_rate": 4.340986353842471e-06, "loss": 0.2598, "step": 24100 }, { "epoch": 2.8519044239413294, "grad_norm": 2.4667346477508545, "learning_rate": 4.340268135025137e-06, "loss": 0.3143, "step": 24110 }, { "epoch": 2.8530872959545777, "grad_norm": 3.8974785804748535, "learning_rate": 4.339549916207805e-06, "loss": 0.3016, "step": 24120 }, { "epoch": 2.854270167967826, "grad_norm": 3.359978199005127, "learning_rate": 4.338831697390472e-06, "loss": 0.3351, "step": 24130 }, { "epoch": 2.8554530399810742, "grad_norm": 2.733987808227539, "learning_rate": 4.338113478573139e-06, "loss": 0.3091, "step": 24140 }, { "epoch": 2.856635911994322, "grad_norm": 2.608760356903076, "learning_rate": 4.337395259755806e-06, "loss": 0.3083, "step": 24150 }, { "epoch": 2.8578187840075704, "grad_norm": 2.0584022998809814, "learning_rate": 4.336677040938473e-06, "loss": 0.2611, "step": 24160 }, { "epoch": 2.8590016560208187, "grad_norm": 3.2685577869415283, "learning_rate": 4.33595882212114e-06, "loss": 0.3026, "step": 24170 }, { "epoch": 2.8601845280340665, "grad_norm": 2.4171926975250244, "learning_rate": 4.335240603303807e-06, "loss": 0.2818, "step": 24180 }, { "epoch": 2.861367400047315, "grad_norm": 4.359286308288574, "learning_rate": 4.3345223844864735e-06, "loss": 0.3372, "step": 24190 }, { "epoch": 2.862550272060563, "grad_norm": 2.5716099739074707, "learning_rate": 4.3338041656691404e-06, "loss": 0.2796, "step": 24200 }, { "epoch": 2.8637331440738114, "grad_norm": 3.2029051780700684, "learning_rate": 4.333085946851807e-06, "loss": 0.3038, "step": 24210 }, { "epoch": 2.8649160160870593, "grad_norm": 3.3147220611572266, "learning_rate": 4.332367728034474e-06, "loss": 0.3271, "step": 24220 }, { "epoch": 2.8660988881003076, "grad_norm": 2.245283365249634, "learning_rate": 4.331649509217141e-06, "loss": 0.2911, "step": 24230 }, { "epoch": 2.867281760113556, "grad_norm": 2.960596799850464, "learning_rate": 4.330931290399809e-06, "loss": 0.2754, "step": 24240 }, { "epoch": 2.8684646321268037, "grad_norm": 2.703169822692871, "learning_rate": 4.330213071582475e-06, "loss": 0.3137, "step": 24250 }, { "epoch": 2.869647504140052, "grad_norm": 2.8850743770599365, "learning_rate": 4.329494852765143e-06, "loss": 0.3061, "step": 24260 }, { "epoch": 2.8708303761533003, "grad_norm": 2.7234113216400146, "learning_rate": 4.32877663394781e-06, "loss": 0.3161, "step": 24270 }, { "epoch": 2.8720132481665486, "grad_norm": 2.654510021209717, "learning_rate": 4.328058415130477e-06, "loss": 0.3112, "step": 24280 }, { "epoch": 2.8731961201797964, "grad_norm": 2.526492118835449, "learning_rate": 4.327340196313144e-06, "loss": 0.3162, "step": 24290 }, { "epoch": 2.8743789921930447, "grad_norm": 2.7180864810943604, "learning_rate": 4.3266219774958105e-06, "loss": 0.2565, "step": 24300 }, { "epoch": 2.875561864206293, "grad_norm": 2.5691475868225098, "learning_rate": 4.3259037586784774e-06, "loss": 0.27, "step": 24310 }, { "epoch": 2.876744736219541, "grad_norm": 3.05277681350708, "learning_rate": 4.325185539861144e-06, "loss": 0.3074, "step": 24320 }, { "epoch": 2.877927608232789, "grad_norm": 3.2249338626861572, "learning_rate": 4.324467321043811e-06, "loss": 0.2773, "step": 24330 }, { "epoch": 2.8791104802460374, "grad_norm": 1.9042539596557617, "learning_rate": 4.323749102226478e-06, "loss": 0.3033, "step": 24340 }, { "epoch": 2.8802933522592857, "grad_norm": 2.6295275688171387, "learning_rate": 4.323030883409146e-06, "loss": 0.3131, "step": 24350 }, { "epoch": 2.8814762242725336, "grad_norm": 4.007050514221191, "learning_rate": 4.322312664591812e-06, "loss": 0.303, "step": 24360 }, { "epoch": 2.882659096285782, "grad_norm": 3.637464761734009, "learning_rate": 4.32159444577448e-06, "loss": 0.2933, "step": 24370 }, { "epoch": 2.88384196829903, "grad_norm": 3.6937456130981445, "learning_rate": 4.320876226957146e-06, "loss": 0.3288, "step": 24380 }, { "epoch": 2.885024840312278, "grad_norm": 2.2494723796844482, "learning_rate": 4.320158008139814e-06, "loss": 0.3043, "step": 24390 }, { "epoch": 2.8862077123255263, "grad_norm": 2.7955732345581055, "learning_rate": 4.319439789322481e-06, "loss": 0.2888, "step": 24400 }, { "epoch": 2.8873905843387746, "grad_norm": 3.020750045776367, "learning_rate": 4.3187215705051475e-06, "loss": 0.3021, "step": 24410 }, { "epoch": 2.888573456352023, "grad_norm": 3.558333158493042, "learning_rate": 4.3180033516878144e-06, "loss": 0.3243, "step": 24420 }, { "epoch": 2.8897563283652707, "grad_norm": 2.969136953353882, "learning_rate": 4.317285132870481e-06, "loss": 0.2831, "step": 24430 }, { "epoch": 2.890939200378519, "grad_norm": 2.7450790405273438, "learning_rate": 4.316566914053148e-06, "loss": 0.2479, "step": 24440 }, { "epoch": 2.8921220723917673, "grad_norm": 2.771754026412964, "learning_rate": 4.315848695235815e-06, "loss": 0.3102, "step": 24450 }, { "epoch": 2.893304944405015, "grad_norm": 3.246392250061035, "learning_rate": 4.315130476418482e-06, "loss": 0.3173, "step": 24460 }, { "epoch": 2.8944878164182635, "grad_norm": 2.8471264839172363, "learning_rate": 4.314412257601149e-06, "loss": 0.3438, "step": 24470 }, { "epoch": 2.8956706884315118, "grad_norm": 2.771209478378296, "learning_rate": 4.313694038783816e-06, "loss": 0.3138, "step": 24480 }, { "epoch": 2.89685356044476, "grad_norm": 2.642932176589966, "learning_rate": 4.312975819966483e-06, "loss": 0.3052, "step": 24490 }, { "epoch": 2.898036432458008, "grad_norm": 2.9823434352874756, "learning_rate": 4.31225760114915e-06, "loss": 0.3557, "step": 24500 }, { "epoch": 2.899219304471256, "grad_norm": 2.5010318756103516, "learning_rate": 4.311539382331818e-06, "loss": 0.2704, "step": 24510 }, { "epoch": 2.9004021764845045, "grad_norm": 2.6502225399017334, "learning_rate": 4.3108211635144845e-06, "loss": 0.3315, "step": 24520 }, { "epoch": 2.9015850484977523, "grad_norm": 3.0236613750457764, "learning_rate": 4.3101029446971514e-06, "loss": 0.2636, "step": 24530 }, { "epoch": 2.9027679205110006, "grad_norm": 2.8792481422424316, "learning_rate": 4.309384725879818e-06, "loss": 0.325, "step": 24540 }, { "epoch": 2.903950792524249, "grad_norm": 4.091868877410889, "learning_rate": 4.308666507062485e-06, "loss": 0.3304, "step": 24550 }, { "epoch": 2.905133664537497, "grad_norm": 2.891231060028076, "learning_rate": 4.307948288245152e-06, "loss": 0.3346, "step": 24560 }, { "epoch": 2.906316536550745, "grad_norm": 2.7249867916107178, "learning_rate": 4.307230069427819e-06, "loss": 0.32, "step": 24570 }, { "epoch": 2.9074994085639934, "grad_norm": 2.7951316833496094, "learning_rate": 4.306511850610486e-06, "loss": 0.3018, "step": 24580 }, { "epoch": 2.9086822805772417, "grad_norm": 3.3010826110839844, "learning_rate": 4.305793631793153e-06, "loss": 0.2936, "step": 24590 }, { "epoch": 2.9098651525904895, "grad_norm": 2.40899920463562, "learning_rate": 4.30507541297582e-06, "loss": 0.2715, "step": 24600 }, { "epoch": 2.911048024603738, "grad_norm": 2.509979724884033, "learning_rate": 4.304357194158487e-06, "loss": 0.3051, "step": 24610 }, { "epoch": 2.912230896616986, "grad_norm": 3.967857599258423, "learning_rate": 4.303638975341155e-06, "loss": 0.3077, "step": 24620 }, { "epoch": 2.9134137686302344, "grad_norm": 2.6073169708251953, "learning_rate": 4.302920756523821e-06, "loss": 0.3067, "step": 24630 }, { "epoch": 2.9145966406434822, "grad_norm": 2.43867564201355, "learning_rate": 4.3022025377064884e-06, "loss": 0.3202, "step": 24640 }, { "epoch": 2.9157795126567305, "grad_norm": 3.090456247329712, "learning_rate": 4.3014843188891545e-06, "loss": 0.2951, "step": 24650 }, { "epoch": 2.916962384669979, "grad_norm": 2.836073160171509, "learning_rate": 4.300766100071822e-06, "loss": 0.3012, "step": 24660 }, { "epoch": 2.9181452566832267, "grad_norm": 2.6881330013275146, "learning_rate": 4.300047881254488e-06, "loss": 0.3224, "step": 24670 }, { "epoch": 2.919328128696475, "grad_norm": 2.8682494163513184, "learning_rate": 4.299329662437156e-06, "loss": 0.3178, "step": 24680 }, { "epoch": 2.9205110007097232, "grad_norm": 2.399474620819092, "learning_rate": 4.298611443619823e-06, "loss": 0.2934, "step": 24690 }, { "epoch": 2.9216938727229715, "grad_norm": 2.8152213096618652, "learning_rate": 4.29789322480249e-06, "loss": 0.3258, "step": 24700 }, { "epoch": 2.9228767447362194, "grad_norm": 2.896705389022827, "learning_rate": 4.297175005985157e-06, "loss": 0.3083, "step": 24710 }, { "epoch": 2.9240596167494677, "grad_norm": 3.403679132461548, "learning_rate": 4.296456787167824e-06, "loss": 0.309, "step": 24720 }, { "epoch": 2.925242488762716, "grad_norm": 2.7205591201782227, "learning_rate": 4.295738568350491e-06, "loss": 0.3258, "step": 24730 }, { "epoch": 2.926425360775964, "grad_norm": 2.4609246253967285, "learning_rate": 4.295020349533158e-06, "loss": 0.2887, "step": 24740 }, { "epoch": 2.927608232789212, "grad_norm": 2.5012879371643066, "learning_rate": 4.294302130715825e-06, "loss": 0.3044, "step": 24750 }, { "epoch": 2.9287911048024604, "grad_norm": 3.2999508380889893, "learning_rate": 4.2935839118984915e-06, "loss": 0.2661, "step": 24760 }, { "epoch": 2.9299739768157087, "grad_norm": 2.513847827911377, "learning_rate": 4.2928656930811584e-06, "loss": 0.2638, "step": 24770 }, { "epoch": 2.9311568488289566, "grad_norm": 2.975158452987671, "learning_rate": 4.292147474263826e-06, "loss": 0.3524, "step": 24780 }, { "epoch": 2.932339720842205, "grad_norm": 2.611801862716675, "learning_rate": 4.291429255446493e-06, "loss": 0.3385, "step": 24790 }, { "epoch": 2.933522592855453, "grad_norm": 1.6549113988876343, "learning_rate": 4.29071103662916e-06, "loss": 0.2907, "step": 24800 }, { "epoch": 2.934705464868701, "grad_norm": 2.966315984725952, "learning_rate": 4.289992817811827e-06, "loss": 0.2909, "step": 24810 }, { "epoch": 2.9358883368819493, "grad_norm": 2.4530463218688965, "learning_rate": 4.289274598994494e-06, "loss": 0.3181, "step": 24820 }, { "epoch": 2.9370712088951976, "grad_norm": 2.3788347244262695, "learning_rate": 4.288556380177161e-06, "loss": 0.3199, "step": 24830 }, { "epoch": 2.938254080908446, "grad_norm": 2.2570769786834717, "learning_rate": 4.287838161359828e-06, "loss": 0.2976, "step": 24840 }, { "epoch": 2.9394369529216937, "grad_norm": 2.7231757640838623, "learning_rate": 4.287119942542495e-06, "loss": 0.3131, "step": 24850 }, { "epoch": 2.940619824934942, "grad_norm": 2.869208812713623, "learning_rate": 4.286401723725162e-06, "loss": 0.2664, "step": 24860 }, { "epoch": 2.9418026969481903, "grad_norm": 2.2201197147369385, "learning_rate": 4.2856835049078285e-06, "loss": 0.2978, "step": 24870 }, { "epoch": 2.942985568961438, "grad_norm": 3.213529109954834, "learning_rate": 4.2849652860904954e-06, "loss": 0.3149, "step": 24880 }, { "epoch": 2.9441684409746864, "grad_norm": 3.057131052017212, "learning_rate": 4.284247067273163e-06, "loss": 0.308, "step": 24890 }, { "epoch": 2.9453513129879347, "grad_norm": 2.3544867038726807, "learning_rate": 4.283528848455829e-06, "loss": 0.3129, "step": 24900 }, { "epoch": 2.946534185001183, "grad_norm": 2.861154794692993, "learning_rate": 4.282810629638497e-06, "loss": 0.2907, "step": 24910 }, { "epoch": 2.947717057014431, "grad_norm": 2.8775789737701416, "learning_rate": 4.282092410821163e-06, "loss": 0.2641, "step": 24920 }, { "epoch": 2.948899929027679, "grad_norm": 5.674015998840332, "learning_rate": 4.281374192003831e-06, "loss": 0.3215, "step": 24930 }, { "epoch": 2.9500828010409275, "grad_norm": 2.2902441024780273, "learning_rate": 4.280655973186497e-06, "loss": 0.2831, "step": 24940 }, { "epoch": 2.9512656730541753, "grad_norm": 3.575136423110962, "learning_rate": 4.279937754369165e-06, "loss": 0.3087, "step": 24950 }, { "epoch": 2.9524485450674236, "grad_norm": 2.246788263320923, "learning_rate": 4.279219535551832e-06, "loss": 0.3228, "step": 24960 }, { "epoch": 2.953631417080672, "grad_norm": 2.2626841068267822, "learning_rate": 4.278501316734499e-06, "loss": 0.3177, "step": 24970 }, { "epoch": 2.95481428909392, "grad_norm": 2.23987078666687, "learning_rate": 4.2777830979171655e-06, "loss": 0.3079, "step": 24980 }, { "epoch": 2.9559971611071685, "grad_norm": 2.4407312870025635, "learning_rate": 4.2770648790998324e-06, "loss": 0.2809, "step": 24990 }, { "epoch": 2.9571800331204163, "grad_norm": 2.185276508331299, "learning_rate": 4.276346660282499e-06, "loss": 0.2932, "step": 25000 }, { "epoch": 2.9583629051336646, "grad_norm": 2.1709935665130615, "learning_rate": 4.275628441465166e-06, "loss": 0.3016, "step": 25010 }, { "epoch": 2.9595457771469125, "grad_norm": 3.0491926670074463, "learning_rate": 4.274910222647833e-06, "loss": 0.2597, "step": 25020 }, { "epoch": 2.9607286491601608, "grad_norm": 3.196272134780884, "learning_rate": 4.2741920038305e-06, "loss": 0.3248, "step": 25030 }, { "epoch": 2.961911521173409, "grad_norm": 2.5127756595611572, "learning_rate": 4.273473785013168e-06, "loss": 0.3259, "step": 25040 }, { "epoch": 2.9630943931866573, "grad_norm": 2.741196393966675, "learning_rate": 4.272755566195835e-06, "loss": 0.3141, "step": 25050 }, { "epoch": 2.9642772651999056, "grad_norm": 2.100416421890259, "learning_rate": 4.272037347378502e-06, "loss": 0.2968, "step": 25060 }, { "epoch": 2.9654601372131535, "grad_norm": 3.8466129302978516, "learning_rate": 4.271319128561169e-06, "loss": 0.2745, "step": 25070 }, { "epoch": 2.966643009226402, "grad_norm": 2.1351864337921143, "learning_rate": 4.270600909743836e-06, "loss": 0.2583, "step": 25080 }, { "epoch": 2.9678258812396496, "grad_norm": 2.9576733112335205, "learning_rate": 4.2698826909265025e-06, "loss": 0.3536, "step": 25090 }, { "epoch": 2.969008753252898, "grad_norm": 2.7691287994384766, "learning_rate": 4.2691644721091694e-06, "loss": 0.3374, "step": 25100 }, { "epoch": 2.970191625266146, "grad_norm": 1.9637224674224854, "learning_rate": 4.268446253291836e-06, "loss": 0.3056, "step": 25110 }, { "epoch": 2.9713744972793945, "grad_norm": 3.046872138977051, "learning_rate": 4.267728034474503e-06, "loss": 0.3292, "step": 25120 }, { "epoch": 2.972557369292643, "grad_norm": 1.9840370416641235, "learning_rate": 4.26700981565717e-06, "loss": 0.3321, "step": 25130 }, { "epoch": 2.9737402413058907, "grad_norm": 2.9284114837646484, "learning_rate": 4.266291596839837e-06, "loss": 0.2942, "step": 25140 }, { "epoch": 2.974923113319139, "grad_norm": 2.532626152038574, "learning_rate": 4.265573378022504e-06, "loss": 0.345, "step": 25150 }, { "epoch": 2.976105985332387, "grad_norm": 4.168957233428955, "learning_rate": 4.264855159205172e-06, "loss": 0.3143, "step": 25160 }, { "epoch": 2.977288857345635, "grad_norm": 2.9889402389526367, "learning_rate": 4.264136940387838e-06, "loss": 0.3262, "step": 25170 }, { "epoch": 2.9784717293588834, "grad_norm": 2.7924888134002686, "learning_rate": 4.263418721570506e-06, "loss": 0.3165, "step": 25180 }, { "epoch": 2.9796546013721317, "grad_norm": 2.8963141441345215, "learning_rate": 4.262700502753172e-06, "loss": 0.2907, "step": 25190 }, { "epoch": 2.98083747338538, "grad_norm": 2.3187415599823, "learning_rate": 4.2619822839358395e-06, "loss": 0.3271, "step": 25200 }, { "epoch": 2.982020345398628, "grad_norm": 2.196073532104492, "learning_rate": 4.261264065118506e-06, "loss": 0.2723, "step": 25210 }, { "epoch": 2.983203217411876, "grad_norm": 2.0752670764923096, "learning_rate": 4.260545846301173e-06, "loss": 0.2533, "step": 25220 }, { "epoch": 2.984386089425124, "grad_norm": 2.282978057861328, "learning_rate": 4.25982762748384e-06, "loss": 0.2677, "step": 25230 }, { "epoch": 2.9855689614383722, "grad_norm": 3.7450320720672607, "learning_rate": 4.259109408666507e-06, "loss": 0.281, "step": 25240 }, { "epoch": 2.9867518334516205, "grad_norm": 3.024444103240967, "learning_rate": 4.258391189849174e-06, "loss": 0.3004, "step": 25250 }, { "epoch": 2.987934705464869, "grad_norm": 1.9286129474639893, "learning_rate": 4.257672971031841e-06, "loss": 0.2872, "step": 25260 }, { "epoch": 2.989117577478117, "grad_norm": 2.5781302452087402, "learning_rate": 4.256954752214508e-06, "loss": 0.2969, "step": 25270 }, { "epoch": 2.990300449491365, "grad_norm": 3.5776491165161133, "learning_rate": 4.256236533397175e-06, "loss": 0.2907, "step": 25280 }, { "epoch": 2.9914833215046133, "grad_norm": 2.300605297088623, "learning_rate": 4.255518314579843e-06, "loss": 0.3402, "step": 25290 }, { "epoch": 2.992666193517861, "grad_norm": 3.1672215461730957, "learning_rate": 4.254800095762509e-06, "loss": 0.2997, "step": 25300 }, { "epoch": 2.9938490655311094, "grad_norm": 1.8890864849090576, "learning_rate": 4.2540818769451765e-06, "loss": 0.2888, "step": 25310 }, { "epoch": 2.9950319375443577, "grad_norm": 2.8092312812805176, "learning_rate": 4.253363658127843e-06, "loss": 0.3448, "step": 25320 }, { "epoch": 2.996214809557606, "grad_norm": 3.5105812549591064, "learning_rate": 4.25264543931051e-06, "loss": 0.3037, "step": 25330 }, { "epoch": 2.9973976815708543, "grad_norm": 2.169095277786255, "learning_rate": 4.251927220493177e-06, "loss": 0.3121, "step": 25340 }, { "epoch": 2.998580553584102, "grad_norm": 3.7993338108062744, "learning_rate": 4.251209001675844e-06, "loss": 0.3087, "step": 25350 }, { "epoch": 2.9997634255973504, "grad_norm": 3.136486768722534, "learning_rate": 4.250490782858511e-06, "loss": 0.2993, "step": 25360 }, { "epoch": 3.0007097232079487, "eval_accuracy": 0.8604651162790697, "eval_loss": 0.3228709399700165, "eval_runtime": 80.8873, "eval_safe_aucpr": 0.9165642380986325, "eval_safe_f1": 0.8447241762310256, "eval_safe_fpr": 0.1357507996771393, "eval_safe_precision": 0.8340095770735095, "eval_safe_recall": 0.8557176611784121, "eval_samples_per_second": 743.182, "eval_steps_per_second": 11.621, "eval_unsafe_aucpr": 0.95316312759668, "eval_unsafe_f1": 0.8733083615273078, "eval_unsafe_fpr": 0.14428233882158745, "eval_unsafe_precision": 0.8825594529413561, "eval_unsafe_recall": 0.8642492003228603, "step": 25368 }, { "epoch": 3.0009462976105987, "grad_norm": 2.4251770973205566, "learning_rate": 4.249772564041178e-06, "loss": 0.324, "step": 25370 }, { "epoch": 3.0021291696238466, "grad_norm": 2.4300856590270996, "learning_rate": 4.249054345223845e-06, "loss": 0.2973, "step": 25380 }, { "epoch": 3.003312041637095, "grad_norm": 3.1689414978027344, "learning_rate": 4.248336126406512e-06, "loss": 0.2707, "step": 25390 }, { "epoch": 3.004494913650343, "grad_norm": 3.675501585006714, "learning_rate": 4.247617907589179e-06, "loss": 0.2787, "step": 25400 }, { "epoch": 3.005677785663591, "grad_norm": 2.5266361236572266, "learning_rate": 4.246899688771846e-06, "loss": 0.3087, "step": 25410 }, { "epoch": 3.0068606576768393, "grad_norm": 2.952092170715332, "learning_rate": 4.246181469954513e-06, "loss": 0.2941, "step": 25420 }, { "epoch": 3.0080435296900876, "grad_norm": 3.5902297496795654, "learning_rate": 4.24546325113718e-06, "loss": 0.3034, "step": 25430 }, { "epoch": 3.009226401703336, "grad_norm": 3.6910159587860107, "learning_rate": 4.2447450323198465e-06, "loss": 0.3116, "step": 25440 }, { "epoch": 3.0104092737165837, "grad_norm": 2.407165288925171, "learning_rate": 4.244026813502514e-06, "loss": 0.2696, "step": 25450 }, { "epoch": 3.011592145729832, "grad_norm": 3.68829345703125, "learning_rate": 4.24330859468518e-06, "loss": 0.2955, "step": 25460 }, { "epoch": 3.0127750177430803, "grad_norm": 4.646860122680664, "learning_rate": 4.242590375867848e-06, "loss": 0.3156, "step": 25470 }, { "epoch": 3.013957889756328, "grad_norm": 2.9903318881988525, "learning_rate": 4.241872157050514e-06, "loss": 0.3033, "step": 25480 }, { "epoch": 3.0151407617695765, "grad_norm": 2.9443302154541016, "learning_rate": 4.241153938233182e-06, "loss": 0.2607, "step": 25490 }, { "epoch": 3.0163236337828248, "grad_norm": 3.1221108436584473, "learning_rate": 4.240435719415849e-06, "loss": 0.2323, "step": 25500 }, { "epoch": 3.017506505796073, "grad_norm": 4.149600028991699, "learning_rate": 4.239717500598516e-06, "loss": 0.3212, "step": 25510 }, { "epoch": 3.018689377809321, "grad_norm": 4.904343605041504, "learning_rate": 4.238999281781183e-06, "loss": 0.3559, "step": 25520 }, { "epoch": 3.019872249822569, "grad_norm": 2.3706648349761963, "learning_rate": 4.23828106296385e-06, "loss": 0.2591, "step": 25530 }, { "epoch": 3.0210551218358175, "grad_norm": 3.659315586090088, "learning_rate": 4.237562844146517e-06, "loss": 0.2776, "step": 25540 }, { "epoch": 3.0222379938490653, "grad_norm": 3.6037652492523193, "learning_rate": 4.2368446253291835e-06, "loss": 0.2918, "step": 25550 }, { "epoch": 3.0234208658623136, "grad_norm": 3.219311237335205, "learning_rate": 4.236126406511851e-06, "loss": 0.2757, "step": 25560 }, { "epoch": 3.024603737875562, "grad_norm": 3.2861392498016357, "learning_rate": 4.235408187694517e-06, "loss": 0.3014, "step": 25570 }, { "epoch": 3.02578660988881, "grad_norm": 2.407735586166382, "learning_rate": 4.234689968877185e-06, "loss": 0.3028, "step": 25580 }, { "epoch": 3.026969481902058, "grad_norm": 2.3614001274108887, "learning_rate": 4.233971750059851e-06, "loss": 0.2979, "step": 25590 }, { "epoch": 3.0281523539153063, "grad_norm": 2.4324471950531006, "learning_rate": 4.233253531242519e-06, "loss": 0.2938, "step": 25600 }, { "epoch": 3.0293352259285546, "grad_norm": 3.4657468795776367, "learning_rate": 4.232535312425186e-06, "loss": 0.3027, "step": 25610 }, { "epoch": 3.0305180979418025, "grad_norm": 2.8468799591064453, "learning_rate": 4.231817093607853e-06, "loss": 0.2888, "step": 25620 }, { "epoch": 3.031700969955051, "grad_norm": 3.5469186305999756, "learning_rate": 4.23109887479052e-06, "loss": 0.2841, "step": 25630 }, { "epoch": 3.032883841968299, "grad_norm": 2.9140517711639404, "learning_rate": 4.230380655973187e-06, "loss": 0.3127, "step": 25640 }, { "epoch": 3.0340667139815474, "grad_norm": 3.262416124343872, "learning_rate": 4.2296624371558536e-06, "loss": 0.2479, "step": 25650 }, { "epoch": 3.035249585994795, "grad_norm": 3.1434836387634277, "learning_rate": 4.2289442183385205e-06, "loss": 0.2631, "step": 25660 }, { "epoch": 3.0364324580080435, "grad_norm": 2.56406569480896, "learning_rate": 4.228225999521187e-06, "loss": 0.2893, "step": 25670 }, { "epoch": 3.037615330021292, "grad_norm": 4.192211627960205, "learning_rate": 4.227507780703854e-06, "loss": 0.2978, "step": 25680 }, { "epoch": 3.0387982020345397, "grad_norm": 2.9986400604248047, "learning_rate": 4.226789561886521e-06, "loss": 0.299, "step": 25690 }, { "epoch": 3.039981074047788, "grad_norm": 2.8809149265289307, "learning_rate": 4.226071343069189e-06, "loss": 0.2407, "step": 25700 }, { "epoch": 3.0411639460610362, "grad_norm": 2.47594952583313, "learning_rate": 4.225353124251855e-06, "loss": 0.252, "step": 25710 }, { "epoch": 3.0423468180742845, "grad_norm": 2.641633987426758, "learning_rate": 4.224634905434523e-06, "loss": 0.2982, "step": 25720 }, { "epoch": 3.0435296900875324, "grad_norm": 2.83441424369812, "learning_rate": 4.223916686617189e-06, "loss": 0.2832, "step": 25730 }, { "epoch": 3.0447125621007807, "grad_norm": 4.333640098571777, "learning_rate": 4.223198467799857e-06, "loss": 0.2938, "step": 25740 }, { "epoch": 3.045895434114029, "grad_norm": 3.4308552742004395, "learning_rate": 4.222480248982523e-06, "loss": 0.3003, "step": 25750 }, { "epoch": 3.047078306127277, "grad_norm": 2.4360756874084473, "learning_rate": 4.2217620301651906e-06, "loss": 0.3109, "step": 25760 }, { "epoch": 3.048261178140525, "grad_norm": 2.7177233695983887, "learning_rate": 4.2210438113478575e-06, "loss": 0.2916, "step": 25770 }, { "epoch": 3.0494440501537734, "grad_norm": 2.774137020111084, "learning_rate": 4.220325592530524e-06, "loss": 0.2899, "step": 25780 }, { "epoch": 3.0506269221670217, "grad_norm": 3.293748378753662, "learning_rate": 4.219607373713191e-06, "loss": 0.2763, "step": 25790 }, { "epoch": 3.0518097941802695, "grad_norm": 2.5247225761413574, "learning_rate": 4.218889154895858e-06, "loss": 0.2886, "step": 25800 }, { "epoch": 3.052992666193518, "grad_norm": 2.5937037467956543, "learning_rate": 4.218170936078526e-06, "loss": 0.3018, "step": 25810 }, { "epoch": 3.054175538206766, "grad_norm": 3.2484025955200195, "learning_rate": 4.217452717261192e-06, "loss": 0.3125, "step": 25820 }, { "epoch": 3.055358410220014, "grad_norm": 3.243189573287964, "learning_rate": 4.21673449844386e-06, "loss": 0.2932, "step": 25830 }, { "epoch": 3.0565412822332623, "grad_norm": 2.8589673042297363, "learning_rate": 4.216016279626526e-06, "loss": 0.295, "step": 25840 }, { "epoch": 3.0577241542465106, "grad_norm": 2.360625743865967, "learning_rate": 4.215298060809194e-06, "loss": 0.2603, "step": 25850 }, { "epoch": 3.058907026259759, "grad_norm": 2.2223730087280273, "learning_rate": 4.21457984199186e-06, "loss": 0.2855, "step": 25860 }, { "epoch": 3.0600898982730067, "grad_norm": 3.6078221797943115, "learning_rate": 4.2138616231745276e-06, "loss": 0.2577, "step": 25870 }, { "epoch": 3.061272770286255, "grad_norm": 4.722879886627197, "learning_rate": 4.2131434043571945e-06, "loss": 0.2839, "step": 25880 }, { "epoch": 3.0624556422995033, "grad_norm": 4.164831161499023, "learning_rate": 4.212425185539861e-06, "loss": 0.3016, "step": 25890 }, { "epoch": 3.0636385143127516, "grad_norm": 3.193854808807373, "learning_rate": 4.211706966722528e-06, "loss": 0.2676, "step": 25900 }, { "epoch": 3.0648213863259994, "grad_norm": 2.405898332595825, "learning_rate": 4.210988747905195e-06, "loss": 0.3062, "step": 25910 }, { "epoch": 3.0660042583392477, "grad_norm": 3.8819572925567627, "learning_rate": 4.210270529087862e-06, "loss": 0.3073, "step": 25920 }, { "epoch": 3.067187130352496, "grad_norm": 2.2638633251190186, "learning_rate": 4.209552310270529e-06, "loss": 0.3137, "step": 25930 }, { "epoch": 3.068370002365744, "grad_norm": 3.4960858821868896, "learning_rate": 4.208834091453196e-06, "loss": 0.2656, "step": 25940 }, { "epoch": 3.069552874378992, "grad_norm": 2.579909086227417, "learning_rate": 4.208115872635863e-06, "loss": 0.2826, "step": 25950 }, { "epoch": 3.0707357463922405, "grad_norm": 2.192331552505493, "learning_rate": 4.20739765381853e-06, "loss": 0.2903, "step": 25960 }, { "epoch": 3.0719186184054887, "grad_norm": 2.671562910079956, "learning_rate": 4.206679435001198e-06, "loss": 0.3281, "step": 25970 }, { "epoch": 3.0731014904187366, "grad_norm": 2.543046474456787, "learning_rate": 4.205961216183864e-06, "loss": 0.2938, "step": 25980 }, { "epoch": 3.074284362431985, "grad_norm": 2.1825907230377197, "learning_rate": 4.2052429973665315e-06, "loss": 0.2729, "step": 25990 }, { "epoch": 3.075467234445233, "grad_norm": 3.01511812210083, "learning_rate": 4.2045247785491976e-06, "loss": 0.3047, "step": 26000 }, { "epoch": 3.076650106458481, "grad_norm": 2.9775047302246094, "learning_rate": 4.203806559731865e-06, "loss": 0.31, "step": 26010 }, { "epoch": 3.0778329784717293, "grad_norm": 2.236973762512207, "learning_rate": 4.203088340914531e-06, "loss": 0.2602, "step": 26020 }, { "epoch": 3.0790158504849776, "grad_norm": 3.7626545429229736, "learning_rate": 4.202370122097199e-06, "loss": 0.2554, "step": 26030 }, { "epoch": 3.080198722498226, "grad_norm": 3.7447025775909424, "learning_rate": 4.201651903279866e-06, "loss": 0.2911, "step": 26040 }, { "epoch": 3.0813815945114738, "grad_norm": 3.142407178878784, "learning_rate": 4.200933684462533e-06, "loss": 0.3039, "step": 26050 }, { "epoch": 3.082564466524722, "grad_norm": 2.8024702072143555, "learning_rate": 4.2002154656452e-06, "loss": 0.264, "step": 26060 }, { "epoch": 3.0837473385379703, "grad_norm": 3.014652967453003, "learning_rate": 4.199497246827867e-06, "loss": 0.2737, "step": 26070 }, { "epoch": 3.084930210551218, "grad_norm": 3.9171454906463623, "learning_rate": 4.198779028010535e-06, "loss": 0.306, "step": 26080 }, { "epoch": 3.0861130825644665, "grad_norm": 3.677460193634033, "learning_rate": 4.198060809193201e-06, "loss": 0.3058, "step": 26090 }, { "epoch": 3.0872959545777148, "grad_norm": 3.0797815322875977, "learning_rate": 4.1973425903758685e-06, "loss": 0.2664, "step": 26100 }, { "epoch": 3.088478826590963, "grad_norm": 3.137662649154663, "learning_rate": 4.1966243715585346e-06, "loss": 0.292, "step": 26110 }, { "epoch": 3.089661698604211, "grad_norm": 3.9400734901428223, "learning_rate": 4.195906152741202e-06, "loss": 0.2802, "step": 26120 }, { "epoch": 3.090844570617459, "grad_norm": 2.6781139373779297, "learning_rate": 4.195187933923868e-06, "loss": 0.2936, "step": 26130 }, { "epoch": 3.0920274426307075, "grad_norm": 4.493440628051758, "learning_rate": 4.194469715106536e-06, "loss": 0.2962, "step": 26140 }, { "epoch": 3.0932103146439553, "grad_norm": 2.9951131343841553, "learning_rate": 4.193751496289203e-06, "loss": 0.2792, "step": 26150 }, { "epoch": 3.0943931866572036, "grad_norm": 3.6866304874420166, "learning_rate": 4.19303327747187e-06, "loss": 0.2699, "step": 26160 }, { "epoch": 3.095576058670452, "grad_norm": 3.072524070739746, "learning_rate": 4.192315058654537e-06, "loss": 0.3185, "step": 26170 }, { "epoch": 3.0967589306837002, "grad_norm": 3.688554525375366, "learning_rate": 4.191596839837204e-06, "loss": 0.341, "step": 26180 }, { "epoch": 3.097941802696948, "grad_norm": 2.4094104766845703, "learning_rate": 4.190878621019871e-06, "loss": 0.2707, "step": 26190 }, { "epoch": 3.0991246747101964, "grad_norm": 2.873302459716797, "learning_rate": 4.190160402202538e-06, "loss": 0.3242, "step": 26200 }, { "epoch": 3.1003075467234447, "grad_norm": 2.9964234828948975, "learning_rate": 4.189442183385205e-06, "loss": 0.2686, "step": 26210 }, { "epoch": 3.1014904187366925, "grad_norm": 2.406423330307007, "learning_rate": 4.1887239645678716e-06, "loss": 0.2489, "step": 26220 }, { "epoch": 3.102673290749941, "grad_norm": 2.8716514110565186, "learning_rate": 4.1880057457505385e-06, "loss": 0.2898, "step": 26230 }, { "epoch": 3.103856162763189, "grad_norm": 3.505645990371704, "learning_rate": 4.187287526933205e-06, "loss": 0.2902, "step": 26240 }, { "epoch": 3.1050390347764374, "grad_norm": 3.3556034564971924, "learning_rate": 4.186569308115872e-06, "loss": 0.2803, "step": 26250 }, { "epoch": 3.1062219067896852, "grad_norm": 2.842924118041992, "learning_rate": 4.18585108929854e-06, "loss": 0.2709, "step": 26260 }, { "epoch": 3.1074047788029335, "grad_norm": 3.348853588104248, "learning_rate": 4.185132870481206e-06, "loss": 0.3035, "step": 26270 }, { "epoch": 3.108587650816182, "grad_norm": 3.3913230895996094, "learning_rate": 4.184414651663874e-06, "loss": 0.2929, "step": 26280 }, { "epoch": 3.1097705228294297, "grad_norm": 2.457864761352539, "learning_rate": 4.18369643284654e-06, "loss": 0.326, "step": 26290 }, { "epoch": 3.110953394842678, "grad_norm": 2.600060224533081, "learning_rate": 4.182978214029208e-06, "loss": 0.2937, "step": 26300 }, { "epoch": 3.1121362668559263, "grad_norm": 2.278590440750122, "learning_rate": 4.182259995211875e-06, "loss": 0.2695, "step": 26310 }, { "epoch": 3.1133191388691746, "grad_norm": 3.3055567741394043, "learning_rate": 4.181541776394542e-06, "loss": 0.3319, "step": 26320 }, { "epoch": 3.1145020108824224, "grad_norm": 2.4813735485076904, "learning_rate": 4.1808235575772086e-06, "loss": 0.2872, "step": 26330 }, { "epoch": 3.1156848828956707, "grad_norm": 2.511098861694336, "learning_rate": 4.1801053387598755e-06, "loss": 0.2558, "step": 26340 }, { "epoch": 3.116867754908919, "grad_norm": 3.455292224884033, "learning_rate": 4.179387119942543e-06, "loss": 0.2826, "step": 26350 }, { "epoch": 3.118050626922167, "grad_norm": 4.4167680740356445, "learning_rate": 4.178668901125209e-06, "loss": 0.2787, "step": 26360 }, { "epoch": 3.119233498935415, "grad_norm": 3.423621892929077, "learning_rate": 4.177950682307877e-06, "loss": 0.2755, "step": 26370 }, { "epoch": 3.1204163709486634, "grad_norm": 2.4179739952087402, "learning_rate": 4.177232463490543e-06, "loss": 0.287, "step": 26380 }, { "epoch": 3.1215992429619117, "grad_norm": 3.2276782989501953, "learning_rate": 4.176514244673211e-06, "loss": 0.2641, "step": 26390 }, { "epoch": 3.1227821149751596, "grad_norm": 2.348263740539551, "learning_rate": 4.175796025855877e-06, "loss": 0.2567, "step": 26400 }, { "epoch": 3.123964986988408, "grad_norm": 3.7675414085388184, "learning_rate": 4.175077807038545e-06, "loss": 0.2954, "step": 26410 }, { "epoch": 3.125147859001656, "grad_norm": 2.0599918365478516, "learning_rate": 4.174359588221212e-06, "loss": 0.2725, "step": 26420 }, { "epoch": 3.126330731014904, "grad_norm": 2.903104305267334, "learning_rate": 4.173641369403879e-06, "loss": 0.2488, "step": 26430 }, { "epoch": 3.1275136030281523, "grad_norm": 3.985673189163208, "learning_rate": 4.1729231505865456e-06, "loss": 0.3004, "step": 26440 }, { "epoch": 3.1286964750414006, "grad_norm": 3.22426700592041, "learning_rate": 4.1722049317692125e-06, "loss": 0.2582, "step": 26450 }, { "epoch": 3.129879347054649, "grad_norm": 3.736717700958252, "learning_rate": 4.171486712951879e-06, "loss": 0.3004, "step": 26460 }, { "epoch": 3.1310622190678967, "grad_norm": 3.241243362426758, "learning_rate": 4.170768494134546e-06, "loss": 0.296, "step": 26470 }, { "epoch": 3.132245091081145, "grad_norm": 3.2949328422546387, "learning_rate": 4.170050275317213e-06, "loss": 0.3042, "step": 26480 }, { "epoch": 3.1334279630943933, "grad_norm": 2.447862148284912, "learning_rate": 4.16933205649988e-06, "loss": 0.2656, "step": 26490 }, { "epoch": 3.134610835107641, "grad_norm": 3.1463232040405273, "learning_rate": 4.168613837682547e-06, "loss": 0.3263, "step": 26500 }, { "epoch": 3.1357937071208895, "grad_norm": 3.263399362564087, "learning_rate": 4.167895618865214e-06, "loss": 0.3124, "step": 26510 }, { "epoch": 3.1369765791341377, "grad_norm": 3.49643611907959, "learning_rate": 4.167177400047881e-06, "loss": 0.2748, "step": 26520 }, { "epoch": 3.138159451147386, "grad_norm": 2.467164993286133, "learning_rate": 4.166459181230549e-06, "loss": 0.2554, "step": 26530 }, { "epoch": 3.139342323160634, "grad_norm": 3.2034528255462646, "learning_rate": 4.165740962413215e-06, "loss": 0.2237, "step": 26540 }, { "epoch": 3.140525195173882, "grad_norm": 3.184513807296753, "learning_rate": 4.1650227435958825e-06, "loss": 0.2879, "step": 26550 }, { "epoch": 3.1417080671871305, "grad_norm": 3.386791467666626, "learning_rate": 4.1643045247785495e-06, "loss": 0.2779, "step": 26560 }, { "epoch": 3.1428909392003783, "grad_norm": 2.694204330444336, "learning_rate": 4.163586305961216e-06, "loss": 0.2807, "step": 26570 }, { "epoch": 3.1440738112136266, "grad_norm": 3.4444072246551514, "learning_rate": 4.162868087143883e-06, "loss": 0.2786, "step": 26580 }, { "epoch": 3.145256683226875, "grad_norm": 2.3471930027008057, "learning_rate": 4.16214986832655e-06, "loss": 0.2867, "step": 26590 }, { "epoch": 3.146439555240123, "grad_norm": 2.7064785957336426, "learning_rate": 4.161431649509217e-06, "loss": 0.294, "step": 26600 }, { "epoch": 3.147622427253371, "grad_norm": 2.492715358734131, "learning_rate": 4.160713430691884e-06, "loss": 0.3076, "step": 26610 }, { "epoch": 3.1488052992666193, "grad_norm": 2.777384042739868, "learning_rate": 4.159995211874552e-06, "loss": 0.3291, "step": 26620 }, { "epoch": 3.1499881712798676, "grad_norm": 2.460726261138916, "learning_rate": 4.159276993057218e-06, "loss": 0.2631, "step": 26630 }, { "epoch": 3.1511710432931155, "grad_norm": 2.6271657943725586, "learning_rate": 4.158558774239886e-06, "loss": 0.3205, "step": 26640 }, { "epoch": 3.1523539153063638, "grad_norm": 2.0604748725891113, "learning_rate": 4.157840555422552e-06, "loss": 0.234, "step": 26650 }, { "epoch": 3.153536787319612, "grad_norm": 3.011331558227539, "learning_rate": 4.1571223366052195e-06, "loss": 0.3137, "step": 26660 }, { "epoch": 3.1547196593328604, "grad_norm": 3.1126832962036133, "learning_rate": 4.156404117787886e-06, "loss": 0.2921, "step": 26670 }, { "epoch": 3.155902531346108, "grad_norm": 2.7079503536224365, "learning_rate": 4.155685898970553e-06, "loss": 0.2412, "step": 26680 }, { "epoch": 3.1570854033593565, "grad_norm": 2.268372058868408, "learning_rate": 4.15496768015322e-06, "loss": 0.2539, "step": 26690 }, { "epoch": 3.158268275372605, "grad_norm": 2.8086812496185303, "learning_rate": 4.154249461335887e-06, "loss": 0.2623, "step": 26700 }, { "epoch": 3.1594511473858526, "grad_norm": 4.629752159118652, "learning_rate": 4.153531242518554e-06, "loss": 0.302, "step": 26710 }, { "epoch": 3.160634019399101, "grad_norm": 4.051839828491211, "learning_rate": 4.152813023701221e-06, "loss": 0.3064, "step": 26720 }, { "epoch": 3.1618168914123492, "grad_norm": 4.20828104019165, "learning_rate": 4.152094804883888e-06, "loss": 0.297, "step": 26730 }, { "epoch": 3.1629997634255975, "grad_norm": 3.0412421226501465, "learning_rate": 4.151376586066555e-06, "loss": 0.2532, "step": 26740 }, { "epoch": 3.1641826354388454, "grad_norm": 3.1068429946899414, "learning_rate": 4.150658367249222e-06, "loss": 0.3012, "step": 26750 }, { "epoch": 3.1653655074520937, "grad_norm": 2.692471742630005, "learning_rate": 4.149940148431889e-06, "loss": 0.2613, "step": 26760 }, { "epoch": 3.166548379465342, "grad_norm": 4.510305881500244, "learning_rate": 4.149221929614556e-06, "loss": 0.2919, "step": 26770 }, { "epoch": 3.16773125147859, "grad_norm": 3.4577081203460693, "learning_rate": 4.148503710797223e-06, "loss": 0.2821, "step": 26780 }, { "epoch": 3.168914123491838, "grad_norm": 3.4594273567199707, "learning_rate": 4.1477854919798895e-06, "loss": 0.2767, "step": 26790 }, { "epoch": 3.1700969955050864, "grad_norm": 3.4486966133117676, "learning_rate": 4.147067273162557e-06, "loss": 0.3115, "step": 26800 }, { "epoch": 3.1712798675183347, "grad_norm": 3.1208348274230957, "learning_rate": 4.146349054345224e-06, "loss": 0.2534, "step": 26810 }, { "epoch": 3.1724627395315825, "grad_norm": 3.5404696464538574, "learning_rate": 4.145630835527891e-06, "loss": 0.3081, "step": 26820 }, { "epoch": 3.173645611544831, "grad_norm": 3.9507696628570557, "learning_rate": 4.144912616710558e-06, "loss": 0.3049, "step": 26830 }, { "epoch": 3.174828483558079, "grad_norm": 2.977548599243164, "learning_rate": 4.144194397893225e-06, "loss": 0.3269, "step": 26840 }, { "epoch": 3.176011355571327, "grad_norm": 2.3489491939544678, "learning_rate": 4.143476179075892e-06, "loss": 0.2973, "step": 26850 }, { "epoch": 3.1771942275845753, "grad_norm": 2.961366653442383, "learning_rate": 4.142757960258559e-06, "loss": 0.2739, "step": 26860 }, { "epoch": 3.1783770995978236, "grad_norm": 3.6704015731811523, "learning_rate": 4.142039741441226e-06, "loss": 0.2234, "step": 26870 }, { "epoch": 3.179559971611072, "grad_norm": 3.6285295486450195, "learning_rate": 4.141321522623893e-06, "loss": 0.2764, "step": 26880 }, { "epoch": 3.1807428436243197, "grad_norm": 3.0299291610717773, "learning_rate": 4.1406033038065605e-06, "loss": 0.3245, "step": 26890 }, { "epoch": 3.181925715637568, "grad_norm": 2.3116037845611572, "learning_rate": 4.1398850849892265e-06, "loss": 0.2951, "step": 26900 }, { "epoch": 3.1831085876508163, "grad_norm": 3.053914785385132, "learning_rate": 4.139166866171894e-06, "loss": 0.2644, "step": 26910 }, { "epoch": 3.184291459664064, "grad_norm": 2.29020094871521, "learning_rate": 4.13844864735456e-06, "loss": 0.2855, "step": 26920 }, { "epoch": 3.1854743316773124, "grad_norm": 2.8435728549957275, "learning_rate": 4.137730428537228e-06, "loss": 0.2589, "step": 26930 }, { "epoch": 3.1866572036905607, "grad_norm": 1.9491233825683594, "learning_rate": 4.137012209719894e-06, "loss": 0.2356, "step": 26940 }, { "epoch": 3.187840075703809, "grad_norm": 5.043102741241455, "learning_rate": 4.136293990902562e-06, "loss": 0.2842, "step": 26950 }, { "epoch": 3.189022947717057, "grad_norm": 3.5162951946258545, "learning_rate": 4.135575772085229e-06, "loss": 0.2923, "step": 26960 }, { "epoch": 3.190205819730305, "grad_norm": 3.5482230186462402, "learning_rate": 4.134857553267896e-06, "loss": 0.2906, "step": 26970 }, { "epoch": 3.1913886917435534, "grad_norm": 2.1520400047302246, "learning_rate": 4.134139334450563e-06, "loss": 0.2535, "step": 26980 }, { "epoch": 3.1925715637568013, "grad_norm": 3.102755308151245, "learning_rate": 4.13342111563323e-06, "loss": 0.3323, "step": 26990 }, { "epoch": 3.1937544357700496, "grad_norm": 3.3286232948303223, "learning_rate": 4.132702896815897e-06, "loss": 0.3122, "step": 27000 }, { "epoch": 3.194937307783298, "grad_norm": 2.536963701248169, "learning_rate": 4.1319846779985635e-06, "loss": 0.3061, "step": 27010 }, { "epoch": 3.196120179796546, "grad_norm": 2.525052309036255, "learning_rate": 4.1312664591812305e-06, "loss": 0.2897, "step": 27020 }, { "epoch": 3.197303051809794, "grad_norm": 3.134566307067871, "learning_rate": 4.130548240363897e-06, "loss": 0.2569, "step": 27030 }, { "epoch": 3.1984859238230423, "grad_norm": 2.9788222312927246, "learning_rate": 4.129830021546564e-06, "loss": 0.3115, "step": 27040 }, { "epoch": 3.1996687958362906, "grad_norm": 3.5071332454681396, "learning_rate": 4.129111802729231e-06, "loss": 0.2822, "step": 27050 }, { "epoch": 3.2008516678495385, "grad_norm": 3.399627685546875, "learning_rate": 4.128393583911899e-06, "loss": 0.3212, "step": 27060 }, { "epoch": 3.2020345398627867, "grad_norm": 3.182009696960449, "learning_rate": 4.127675365094566e-06, "loss": 0.3061, "step": 27070 }, { "epoch": 3.203217411876035, "grad_norm": 3.0417139530181885, "learning_rate": 4.126957146277233e-06, "loss": 0.2725, "step": 27080 }, { "epoch": 3.2044002838892833, "grad_norm": 3.618405818939209, "learning_rate": 4.1262389274599e-06, "loss": 0.284, "step": 27090 }, { "epoch": 3.205583155902531, "grad_norm": 3.0297694206237793, "learning_rate": 4.125520708642567e-06, "loss": 0.2716, "step": 27100 }, { "epoch": 3.2067660279157795, "grad_norm": 3.3863964080810547, "learning_rate": 4.124802489825234e-06, "loss": 0.3105, "step": 27110 }, { "epoch": 3.2079488999290278, "grad_norm": 3.787440776824951, "learning_rate": 4.1240842710079005e-06, "loss": 0.2648, "step": 27120 }, { "epoch": 3.2091317719422756, "grad_norm": 2.421241521835327, "learning_rate": 4.1233660521905675e-06, "loss": 0.2739, "step": 27130 }, { "epoch": 3.210314643955524, "grad_norm": 2.030874729156494, "learning_rate": 4.122647833373234e-06, "loss": 0.2796, "step": 27140 }, { "epoch": 3.211497515968772, "grad_norm": 2.9440155029296875, "learning_rate": 4.121929614555901e-06, "loss": 0.294, "step": 27150 }, { "epoch": 3.2126803879820205, "grad_norm": 2.9077959060668945, "learning_rate": 4.121211395738568e-06, "loss": 0.3183, "step": 27160 }, { "epoch": 3.2138632599952683, "grad_norm": 1.959438681602478, "learning_rate": 4.120493176921235e-06, "loss": 0.2543, "step": 27170 }, { "epoch": 3.2150461320085166, "grad_norm": 2.29925537109375, "learning_rate": 4.119774958103903e-06, "loss": 0.3062, "step": 27180 }, { "epoch": 3.216229004021765, "grad_norm": 2.8926730155944824, "learning_rate": 4.119056739286569e-06, "loss": 0.299, "step": 27190 }, { "epoch": 3.217411876035013, "grad_norm": 3.610867738723755, "learning_rate": 4.118338520469237e-06, "loss": 0.2981, "step": 27200 }, { "epoch": 3.218594748048261, "grad_norm": 2.610407829284668, "learning_rate": 4.117620301651903e-06, "loss": 0.2397, "step": 27210 }, { "epoch": 3.2197776200615094, "grad_norm": 3.3427069187164307, "learning_rate": 4.116902082834571e-06, "loss": 0.2785, "step": 27220 }, { "epoch": 3.2209604920747577, "grad_norm": 4.121489524841309, "learning_rate": 4.1161838640172375e-06, "loss": 0.255, "step": 27230 }, { "epoch": 3.2221433640880055, "grad_norm": 2.9172403812408447, "learning_rate": 4.1154656451999045e-06, "loss": 0.3029, "step": 27240 }, { "epoch": 3.223326236101254, "grad_norm": 3.5640225410461426, "learning_rate": 4.114747426382571e-06, "loss": 0.2919, "step": 27250 }, { "epoch": 3.224509108114502, "grad_norm": 2.6899254322052, "learning_rate": 4.114029207565238e-06, "loss": 0.2827, "step": 27260 }, { "epoch": 3.2256919801277504, "grad_norm": 2.448078155517578, "learning_rate": 4.113310988747905e-06, "loss": 0.268, "step": 27270 }, { "epoch": 3.2268748521409982, "grad_norm": 3.2071781158447266, "learning_rate": 4.112592769930572e-06, "loss": 0.3381, "step": 27280 }, { "epoch": 3.2280577241542465, "grad_norm": 4.201130390167236, "learning_rate": 4.111874551113239e-06, "loss": 0.2953, "step": 27290 }, { "epoch": 3.229240596167495, "grad_norm": 3.7503464221954346, "learning_rate": 4.111156332295906e-06, "loss": 0.2597, "step": 27300 }, { "epoch": 3.2304234681807427, "grad_norm": 2.761509418487549, "learning_rate": 4.110438113478574e-06, "loss": 0.2491, "step": 27310 }, { "epoch": 3.231606340193991, "grad_norm": 2.505431652069092, "learning_rate": 4.10971989466124e-06, "loss": 0.3237, "step": 27320 }, { "epoch": 3.2327892122072392, "grad_norm": 3.6810736656188965, "learning_rate": 4.109001675843908e-06, "loss": 0.2835, "step": 27330 }, { "epoch": 3.2339720842204875, "grad_norm": 3.3416645526885986, "learning_rate": 4.1082834570265745e-06, "loss": 0.2891, "step": 27340 }, { "epoch": 3.2351549562337354, "grad_norm": 3.9127213954925537, "learning_rate": 4.1075652382092415e-06, "loss": 0.3115, "step": 27350 }, { "epoch": 3.2363378282469837, "grad_norm": 2.8421289920806885, "learning_rate": 4.106847019391908e-06, "loss": 0.2562, "step": 27360 }, { "epoch": 3.237520700260232, "grad_norm": 3.3126332759857178, "learning_rate": 4.106128800574575e-06, "loss": 0.2883, "step": 27370 }, { "epoch": 3.23870357227348, "grad_norm": 2.2606143951416016, "learning_rate": 4.105410581757242e-06, "loss": 0.2951, "step": 27380 }, { "epoch": 3.239886444286728, "grad_norm": 3.2302465438842773, "learning_rate": 4.104692362939909e-06, "loss": 0.2649, "step": 27390 }, { "epoch": 3.2410693162999764, "grad_norm": 2.675093412399292, "learning_rate": 4.103974144122576e-06, "loss": 0.2912, "step": 27400 }, { "epoch": 3.2422521883132247, "grad_norm": 2.608977794647217, "learning_rate": 4.103255925305243e-06, "loss": 0.2848, "step": 27410 }, { "epoch": 3.2434350603264726, "grad_norm": 2.509690523147583, "learning_rate": 4.10253770648791e-06, "loss": 0.2799, "step": 27420 }, { "epoch": 3.244617932339721, "grad_norm": 3.0851919651031494, "learning_rate": 4.101819487670577e-06, "loss": 0.2864, "step": 27430 }, { "epoch": 3.245800804352969, "grad_norm": 5.2798051834106445, "learning_rate": 4.101101268853244e-06, "loss": 0.3175, "step": 27440 }, { "epoch": 3.246983676366217, "grad_norm": 2.9658639430999756, "learning_rate": 4.1003830500359115e-06, "loss": 0.2932, "step": 27450 }, { "epoch": 3.2481665483794653, "grad_norm": 2.701160192489624, "learning_rate": 4.099664831218578e-06, "loss": 0.2835, "step": 27460 }, { "epoch": 3.2493494203927136, "grad_norm": 2.572829246520996, "learning_rate": 4.098946612401245e-06, "loss": 0.2978, "step": 27470 }, { "epoch": 3.250532292405962, "grad_norm": 3.43685245513916, "learning_rate": 4.0982283935839115e-06, "loss": 0.2973, "step": 27480 }, { "epoch": 3.2507688668086114, "eval_accuracy": 0.8603985760388595, "eval_loss": 0.3283090889453888, "eval_runtime": 81.9879, "eval_safe_aucpr": 0.9154902059248551, "eval_safe_f1": 0.8448052668565299, "eval_safe_fpr": 0.13661773937998822, "eval_safe_precision": 0.8332786107766955, "eval_safe_recall": 0.856655290102389, "eval_samples_per_second": 733.206, "eval_steps_per_second": 11.465, "eval_unsafe_aucpr": 0.9529076569503385, "eval_unsafe_f1": 0.8731444810593464, "eval_unsafe_fpr": 0.1433447098976104, "eval_unsafe_precision": 0.8831299880744886, "eval_unsafe_recall": 0.8633822606200113, "step": 27482 }, { "epoch": 3.2517151644192097, "grad_norm": 2.9928042888641357, "learning_rate": 4.097510174766579e-06, "loss": 0.3163, "step": 27490 }, { "epoch": 3.252898036432458, "grad_norm": 3.460139274597168, "learning_rate": 4.096791955949245e-06, "loss": 0.3174, "step": 27500 }, { "epoch": 3.2540809084457063, "grad_norm": 2.9319708347320557, "learning_rate": 4.096073737131913e-06, "loss": 0.3242, "step": 27510 }, { "epoch": 3.255263780458954, "grad_norm": 2.2399494647979736, "learning_rate": 4.09535551831458e-06, "loss": 0.2882, "step": 27520 }, { "epoch": 3.2564466524722024, "grad_norm": 3.403815746307373, "learning_rate": 4.094637299497247e-06, "loss": 0.2891, "step": 27530 }, { "epoch": 3.2576295244854507, "grad_norm": 2.6851112842559814, "learning_rate": 4.093919080679914e-06, "loss": 0.2943, "step": 27540 }, { "epoch": 3.258812396498699, "grad_norm": 2.8747034072875977, "learning_rate": 4.093200861862581e-06, "loss": 0.2988, "step": 27550 }, { "epoch": 3.259995268511947, "grad_norm": 3.6352221965789795, "learning_rate": 4.0924826430452485e-06, "loss": 0.2984, "step": 27560 }, { "epoch": 3.261178140525195, "grad_norm": 1.976767897605896, "learning_rate": 4.091764424227915e-06, "loss": 0.2717, "step": 27570 }, { "epoch": 3.2623610125384435, "grad_norm": 2.54714035987854, "learning_rate": 4.091046205410582e-06, "loss": 0.2882, "step": 27580 }, { "epoch": 3.2635438845516918, "grad_norm": 2.0810391902923584, "learning_rate": 4.0903279865932484e-06, "loss": 0.2399, "step": 27590 }, { "epoch": 3.2647267565649396, "grad_norm": 2.7840323448181152, "learning_rate": 4.089609767775916e-06, "loss": 0.2715, "step": 27600 }, { "epoch": 3.265909628578188, "grad_norm": 2.911987781524658, "learning_rate": 4.088891548958583e-06, "loss": 0.3072, "step": 27610 }, { "epoch": 3.267092500591436, "grad_norm": 3.1399085521698, "learning_rate": 4.08817333014125e-06, "loss": 0.2501, "step": 27620 }, { "epoch": 3.268275372604684, "grad_norm": 3.8143203258514404, "learning_rate": 4.087455111323917e-06, "loss": 0.2905, "step": 27630 }, { "epoch": 3.2694582446179323, "grad_norm": 2.5066142082214355, "learning_rate": 4.086736892506584e-06, "loss": 0.2796, "step": 27640 }, { "epoch": 3.2706411166311806, "grad_norm": 4.362666130065918, "learning_rate": 4.086018673689251e-06, "loss": 0.2595, "step": 27650 }, { "epoch": 3.271823988644429, "grad_norm": 2.412616491317749, "learning_rate": 4.085300454871918e-06, "loss": 0.2998, "step": 27660 }, { "epoch": 3.2730068606576768, "grad_norm": 2.690500020980835, "learning_rate": 4.084582236054585e-06, "loss": 0.2642, "step": 27670 }, { "epoch": 3.274189732670925, "grad_norm": 3.0411665439605713, "learning_rate": 4.083864017237252e-06, "loss": 0.2956, "step": 27680 }, { "epoch": 3.2753726046841733, "grad_norm": 4.12064266204834, "learning_rate": 4.0831457984199185e-06, "loss": 0.3109, "step": 27690 }, { "epoch": 3.276555476697421, "grad_norm": 3.065070152282715, "learning_rate": 4.0824275796025854e-06, "loss": 0.3127, "step": 27700 }, { "epoch": 3.2777383487106695, "grad_norm": 2.9523682594299316, "learning_rate": 4.081709360785252e-06, "loss": 0.273, "step": 27710 }, { "epoch": 3.278921220723918, "grad_norm": 2.6885054111480713, "learning_rate": 4.08099114196792e-06, "loss": 0.3113, "step": 27720 }, { "epoch": 3.280104092737166, "grad_norm": 5.8971638679504395, "learning_rate": 4.080272923150586e-06, "loss": 0.3232, "step": 27730 }, { "epoch": 3.281286964750414, "grad_norm": 3.010758876800537, "learning_rate": 4.079554704333254e-06, "loss": 0.3236, "step": 27740 }, { "epoch": 3.282469836763662, "grad_norm": 2.8600690364837646, "learning_rate": 4.07883648551592e-06, "loss": 0.2913, "step": 27750 }, { "epoch": 3.2836527087769105, "grad_norm": 2.074984312057495, "learning_rate": 4.078118266698588e-06, "loss": 0.2787, "step": 27760 }, { "epoch": 3.2848355807901584, "grad_norm": 4.146457672119141, "learning_rate": 4.077400047881254e-06, "loss": 0.2468, "step": 27770 }, { "epoch": 3.2860184528034067, "grad_norm": 3.0289111137390137, "learning_rate": 4.076681829063922e-06, "loss": 0.3381, "step": 27780 }, { "epoch": 3.287201324816655, "grad_norm": 2.3751113414764404, "learning_rate": 4.075963610246589e-06, "loss": 0.3018, "step": 27790 }, { "epoch": 3.2883841968299032, "grad_norm": 2.964958667755127, "learning_rate": 4.0752453914292555e-06, "loss": 0.289, "step": 27800 }, { "epoch": 3.289567068843151, "grad_norm": 2.8259074687957764, "learning_rate": 4.074527172611923e-06, "loss": 0.2733, "step": 27810 }, { "epoch": 3.2907499408563994, "grad_norm": 3.40511417388916, "learning_rate": 4.073808953794589e-06, "loss": 0.2844, "step": 27820 }, { "epoch": 3.2919328128696477, "grad_norm": 5.153759956359863, "learning_rate": 4.073090734977257e-06, "loss": 0.3, "step": 27830 }, { "epoch": 3.2931156848828955, "grad_norm": 2.7530295848846436, "learning_rate": 4.072372516159923e-06, "loss": 0.3119, "step": 27840 }, { "epoch": 3.294298556896144, "grad_norm": 3.2891340255737305, "learning_rate": 4.071654297342591e-06, "loss": 0.3307, "step": 27850 }, { "epoch": 3.295481428909392, "grad_norm": 3.0643022060394287, "learning_rate": 4.070936078525257e-06, "loss": 0.2848, "step": 27860 }, { "epoch": 3.2966643009226404, "grad_norm": 2.238170862197876, "learning_rate": 4.070217859707925e-06, "loss": 0.293, "step": 27870 }, { "epoch": 3.2978471729358882, "grad_norm": 2.643157482147217, "learning_rate": 4.069499640890592e-06, "loss": 0.2731, "step": 27880 }, { "epoch": 3.2990300449491365, "grad_norm": 3.4272263050079346, "learning_rate": 4.068781422073259e-06, "loss": 0.2782, "step": 27890 }, { "epoch": 3.300212916962385, "grad_norm": 2.283717155456543, "learning_rate": 4.068063203255926e-06, "loss": 0.3043, "step": 27900 }, { "epoch": 3.3013957889756327, "grad_norm": 3.3664350509643555, "learning_rate": 4.0673449844385925e-06, "loss": 0.2904, "step": 27910 }, { "epoch": 3.302578660988881, "grad_norm": 2.9079642295837402, "learning_rate": 4.0666267656212594e-06, "loss": 0.2865, "step": 27920 }, { "epoch": 3.3037615330021293, "grad_norm": 3.117150068283081, "learning_rate": 4.065908546803926e-06, "loss": 0.3322, "step": 27930 }, { "epoch": 3.3049444050153776, "grad_norm": 2.965116262435913, "learning_rate": 4.065190327986593e-06, "loss": 0.2442, "step": 27940 }, { "epoch": 3.3061272770286254, "grad_norm": 2.861928939819336, "learning_rate": 4.06447210916926e-06, "loss": 0.2987, "step": 27950 }, { "epoch": 3.3073101490418737, "grad_norm": 2.784311056137085, "learning_rate": 4.063753890351927e-06, "loss": 0.3441, "step": 27960 }, { "epoch": 3.308493021055122, "grad_norm": 3.677492380142212, "learning_rate": 4.063035671534594e-06, "loss": 0.3065, "step": 27970 }, { "epoch": 3.30967589306837, "grad_norm": 2.0552244186401367, "learning_rate": 4.062317452717261e-06, "loss": 0.2875, "step": 27980 }, { "epoch": 3.310858765081618, "grad_norm": 1.9184958934783936, "learning_rate": 4.061599233899929e-06, "loss": 0.2706, "step": 27990 }, { "epoch": 3.3120416370948664, "grad_norm": 4.538043975830078, "learning_rate": 4.060881015082595e-06, "loss": 0.3078, "step": 28000 }, { "epoch": 3.3132245091081147, "grad_norm": 3.2088682651519775, "learning_rate": 4.060162796265263e-06, "loss": 0.2863, "step": 28010 }, { "epoch": 3.3144073811213626, "grad_norm": 2.603923797607422, "learning_rate": 4.059444577447929e-06, "loss": 0.2776, "step": 28020 }, { "epoch": 3.315590253134611, "grad_norm": 4.3191423416137695, "learning_rate": 4.0587263586305964e-06, "loss": 0.2976, "step": 28030 }, { "epoch": 3.316773125147859, "grad_norm": 3.0668866634368896, "learning_rate": 4.0580081398132625e-06, "loss": 0.2839, "step": 28040 }, { "epoch": 3.317955997161107, "grad_norm": 4.304630756378174, "learning_rate": 4.05728992099593e-06, "loss": 0.2949, "step": 28050 }, { "epoch": 3.3191388691743553, "grad_norm": 2.4871647357940674, "learning_rate": 4.056571702178597e-06, "loss": 0.2625, "step": 28060 }, { "epoch": 3.3203217411876036, "grad_norm": 4.306042671203613, "learning_rate": 4.055853483361264e-06, "loss": 0.3007, "step": 28070 }, { "epoch": 3.321504613200852, "grad_norm": 3.3634445667266846, "learning_rate": 4.055135264543931e-06, "loss": 0.2725, "step": 28080 }, { "epoch": 3.3226874852140997, "grad_norm": 3.9011049270629883, "learning_rate": 4.054417045726598e-06, "loss": 0.2554, "step": 28090 }, { "epoch": 3.323870357227348, "grad_norm": 3.2487316131591797, "learning_rate": 4.053698826909266e-06, "loss": 0.2659, "step": 28100 }, { "epoch": 3.3250532292405963, "grad_norm": 3.0760748386383057, "learning_rate": 4.052980608091932e-06, "loss": 0.2734, "step": 28110 }, { "epoch": 3.326236101253844, "grad_norm": 3.7253193855285645, "learning_rate": 4.0522623892746e-06, "loss": 0.3043, "step": 28120 }, { "epoch": 3.3274189732670925, "grad_norm": 2.54416823387146, "learning_rate": 4.051544170457266e-06, "loss": 0.2756, "step": 28130 }, { "epoch": 3.3286018452803408, "grad_norm": 3.150343894958496, "learning_rate": 4.0508259516399334e-06, "loss": 0.276, "step": 28140 }, { "epoch": 3.329784717293589, "grad_norm": 3.0068886280059814, "learning_rate": 4.0501077328226e-06, "loss": 0.3174, "step": 28150 }, { "epoch": 3.330967589306837, "grad_norm": 2.7225942611694336, "learning_rate": 4.049389514005267e-06, "loss": 0.2524, "step": 28160 }, { "epoch": 3.332150461320085, "grad_norm": 3.4403152465820312, "learning_rate": 4.048671295187934e-06, "loss": 0.2884, "step": 28170 }, { "epoch": 3.3333333333333335, "grad_norm": 4.012026786804199, "learning_rate": 4.047953076370601e-06, "loss": 0.2943, "step": 28180 }, { "epoch": 3.3345162053465813, "grad_norm": 2.679258108139038, "learning_rate": 4.047234857553268e-06, "loss": 0.3146, "step": 28190 }, { "epoch": 3.3356990773598296, "grad_norm": 2.6075501441955566, "learning_rate": 4.046516638735935e-06, "loss": 0.2733, "step": 28200 }, { "epoch": 3.336881949373078, "grad_norm": 2.1568117141723633, "learning_rate": 4.045798419918602e-06, "loss": 0.3105, "step": 28210 }, { "epoch": 3.338064821386326, "grad_norm": 3.158740997314453, "learning_rate": 4.045080201101269e-06, "loss": 0.2996, "step": 28220 }, { "epoch": 3.339247693399574, "grad_norm": 3.0008444786071777, "learning_rate": 4.044361982283936e-06, "loss": 0.3032, "step": 28230 }, { "epoch": 3.3404305654128224, "grad_norm": 2.9587295055389404, "learning_rate": 4.043643763466603e-06, "loss": 0.2997, "step": 28240 }, { "epoch": 3.3416134374260706, "grad_norm": 2.94698429107666, "learning_rate": 4.04292554464927e-06, "loss": 0.3154, "step": 28250 }, { "epoch": 3.3427963094393185, "grad_norm": 2.5821921825408936, "learning_rate": 4.042207325831937e-06, "loss": 0.2642, "step": 28260 }, { "epoch": 3.343979181452567, "grad_norm": 2.43442702293396, "learning_rate": 4.0414891070146034e-06, "loss": 0.2981, "step": 28270 }, { "epoch": 3.345162053465815, "grad_norm": 2.9026975631713867, "learning_rate": 4.040770888197271e-06, "loss": 0.3275, "step": 28280 }, { "epoch": 3.3463449254790634, "grad_norm": 2.9597880840301514, "learning_rate": 4.040052669379937e-06, "loss": 0.3208, "step": 28290 }, { "epoch": 3.347527797492311, "grad_norm": 2.3282508850097656, "learning_rate": 4.039334450562605e-06, "loss": 0.2796, "step": 28300 }, { "epoch": 3.3487106695055595, "grad_norm": 3.9105443954467773, "learning_rate": 4.038616231745271e-06, "loss": 0.3055, "step": 28310 }, { "epoch": 3.349893541518808, "grad_norm": 2.4692790508270264, "learning_rate": 4.037898012927939e-06, "loss": 0.2689, "step": 28320 }, { "epoch": 3.3510764135320557, "grad_norm": 3.3734171390533447, "learning_rate": 4.037179794110606e-06, "loss": 0.2962, "step": 28330 }, { "epoch": 3.352259285545304, "grad_norm": 3.8791041374206543, "learning_rate": 4.036461575293273e-06, "loss": 0.3038, "step": 28340 }, { "epoch": 3.3534421575585522, "grad_norm": 3.6339375972747803, "learning_rate": 4.03574335647594e-06, "loss": 0.248, "step": 28350 }, { "epoch": 3.3546250295718005, "grad_norm": 3.4359793663024902, "learning_rate": 4.035025137658607e-06, "loss": 0.2854, "step": 28360 }, { "epoch": 3.3558079015850484, "grad_norm": 2.6512808799743652, "learning_rate": 4.034306918841274e-06, "loss": 0.31, "step": 28370 }, { "epoch": 3.3569907735982967, "grad_norm": 3.134368896484375, "learning_rate": 4.0335887000239404e-06, "loss": 0.2799, "step": 28380 }, { "epoch": 3.358173645611545, "grad_norm": 4.1644158363342285, "learning_rate": 4.032870481206608e-06, "loss": 0.309, "step": 28390 }, { "epoch": 3.359356517624793, "grad_norm": 2.5575039386749268, "learning_rate": 4.032152262389274e-06, "loss": 0.2866, "step": 28400 }, { "epoch": 3.360539389638041, "grad_norm": 3.720283269882202, "learning_rate": 4.031434043571942e-06, "loss": 0.2776, "step": 28410 }, { "epoch": 3.3617222616512894, "grad_norm": 2.4713282585144043, "learning_rate": 4.030715824754608e-06, "loss": 0.3024, "step": 28420 }, { "epoch": 3.3629051336645377, "grad_norm": 3.4714038372039795, "learning_rate": 4.029997605937276e-06, "loss": 0.3007, "step": 28430 }, { "epoch": 3.3640880056777855, "grad_norm": 3.2137274742126465, "learning_rate": 4.029279387119943e-06, "loss": 0.2642, "step": 28440 }, { "epoch": 3.365270877691034, "grad_norm": 2.4611263275146484, "learning_rate": 4.02856116830261e-06, "loss": 0.2609, "step": 28450 }, { "epoch": 3.366453749704282, "grad_norm": 3.8252193927764893, "learning_rate": 4.027842949485277e-06, "loss": 0.2761, "step": 28460 }, { "epoch": 3.36763662171753, "grad_norm": 2.8619930744171143, "learning_rate": 4.027124730667944e-06, "loss": 0.2902, "step": 28470 }, { "epoch": 3.3688194937307783, "grad_norm": 3.314082622528076, "learning_rate": 4.0264065118506105e-06, "loss": 0.2961, "step": 28480 }, { "epoch": 3.3700023657440266, "grad_norm": 3.308055877685547, "learning_rate": 4.0256882930332774e-06, "loss": 0.3228, "step": 28490 }, { "epoch": 3.371185237757275, "grad_norm": 2.3181183338165283, "learning_rate": 4.024970074215944e-06, "loss": 0.3098, "step": 28500 }, { "epoch": 3.3723681097705227, "grad_norm": 2.3771865367889404, "learning_rate": 4.024251855398611e-06, "loss": 0.3007, "step": 28510 }, { "epoch": 3.373550981783771, "grad_norm": 2.6836299896240234, "learning_rate": 4.023533636581278e-06, "loss": 0.3389, "step": 28520 }, { "epoch": 3.3747338537970193, "grad_norm": 3.9915354251861572, "learning_rate": 4.022815417763946e-06, "loss": 0.2986, "step": 28530 }, { "epoch": 3.375916725810267, "grad_norm": 2.5483341217041016, "learning_rate": 4.022097198946612e-06, "loss": 0.3298, "step": 28540 }, { "epoch": 3.3770995978235154, "grad_norm": 2.7282094955444336, "learning_rate": 4.02137898012928e-06, "loss": 0.2541, "step": 28550 }, { "epoch": 3.3782824698367637, "grad_norm": 2.5950067043304443, "learning_rate": 4.020660761311946e-06, "loss": 0.2956, "step": 28560 }, { "epoch": 3.379465341850012, "grad_norm": 3.0112249851226807, "learning_rate": 4.019942542494614e-06, "loss": 0.2583, "step": 28570 }, { "epoch": 3.38064821386326, "grad_norm": 2.200986862182617, "learning_rate": 4.019224323677281e-06, "loss": 0.2461, "step": 28580 }, { "epoch": 3.381831085876508, "grad_norm": 2.6726768016815186, "learning_rate": 4.0185061048599475e-06, "loss": 0.3154, "step": 28590 }, { "epoch": 3.3830139578897565, "grad_norm": 3.4485435485839844, "learning_rate": 4.0177878860426144e-06, "loss": 0.3149, "step": 28600 }, { "epoch": 3.3841968299030043, "grad_norm": 3.5943193435668945, "learning_rate": 4.017069667225281e-06, "loss": 0.3536, "step": 28610 }, { "epoch": 3.3853797019162526, "grad_norm": 3.0588438510894775, "learning_rate": 4.016351448407948e-06, "loss": 0.3039, "step": 28620 }, { "epoch": 3.386562573929501, "grad_norm": 3.3482131958007812, "learning_rate": 4.015633229590615e-06, "loss": 0.3269, "step": 28630 }, { "epoch": 3.387745445942749, "grad_norm": 2.8402023315429688, "learning_rate": 4.014915010773283e-06, "loss": 0.3037, "step": 28640 }, { "epoch": 3.388928317955997, "grad_norm": 2.354872465133667, "learning_rate": 4.014196791955949e-06, "loss": 0.2845, "step": 28650 }, { "epoch": 3.3901111899692453, "grad_norm": 2.502476453781128, "learning_rate": 4.013478573138617e-06, "loss": 0.2786, "step": 28660 }, { "epoch": 3.3912940619824936, "grad_norm": 2.517012119293213, "learning_rate": 4.012760354321283e-06, "loss": 0.2767, "step": 28670 }, { "epoch": 3.3924769339957415, "grad_norm": 4.204011917114258, "learning_rate": 4.012042135503951e-06, "loss": 0.2871, "step": 28680 }, { "epoch": 3.3936598060089898, "grad_norm": 2.779322385787964, "learning_rate": 4.011323916686617e-06, "loss": 0.243, "step": 28690 }, { "epoch": 3.394842678022238, "grad_norm": 4.354095458984375, "learning_rate": 4.0106056978692845e-06, "loss": 0.3019, "step": 28700 }, { "epoch": 3.3960255500354863, "grad_norm": 2.8547163009643555, "learning_rate": 4.0098874790519514e-06, "loss": 0.2686, "step": 28710 }, { "epoch": 3.397208422048734, "grad_norm": 2.772738456726074, "learning_rate": 4.009169260234618e-06, "loss": 0.2578, "step": 28720 }, { "epoch": 3.3983912940619825, "grad_norm": 2.603830337524414, "learning_rate": 4.008451041417285e-06, "loss": 0.2592, "step": 28730 }, { "epoch": 3.3995741660752308, "grad_norm": 2.9823734760284424, "learning_rate": 4.007732822599952e-06, "loss": 0.2849, "step": 28740 }, { "epoch": 3.4007570380884786, "grad_norm": 3.963719129562378, "learning_rate": 4.007014603782619e-06, "loss": 0.3095, "step": 28750 }, { "epoch": 3.401939910101727, "grad_norm": 4.043334484100342, "learning_rate": 4.006296384965286e-06, "loss": 0.2659, "step": 28760 }, { "epoch": 3.403122782114975, "grad_norm": 2.87335467338562, "learning_rate": 4.005578166147953e-06, "loss": 0.2677, "step": 28770 }, { "epoch": 3.4043056541282235, "grad_norm": 2.177003860473633, "learning_rate": 4.00485994733062e-06, "loss": 0.2967, "step": 28780 }, { "epoch": 3.4054885261414714, "grad_norm": 5.445313453674316, "learning_rate": 4.004141728513287e-06, "loss": 0.2742, "step": 28790 }, { "epoch": 3.4066713981547196, "grad_norm": 2.3107638359069824, "learning_rate": 4.003423509695955e-06, "loss": 0.3064, "step": 28800 }, { "epoch": 3.407854270167968, "grad_norm": 2.7333712577819824, "learning_rate": 4.002705290878621e-06, "loss": 0.3044, "step": 28810 }, { "epoch": 3.409037142181216, "grad_norm": 1.6254793405532837, "learning_rate": 4.001987072061288e-06, "loss": 0.2496, "step": 28820 }, { "epoch": 3.410220014194464, "grad_norm": 4.3058671951293945, "learning_rate": 4.001268853243955e-06, "loss": 0.291, "step": 28830 }, { "epoch": 3.4114028862077124, "grad_norm": 3.107118606567383, "learning_rate": 4.000550634426622e-06, "loss": 0.3196, "step": 28840 }, { "epoch": 3.4125857582209607, "grad_norm": 3.680126190185547, "learning_rate": 3.999832415609289e-06, "loss": 0.275, "step": 28850 }, { "epoch": 3.4137686302342085, "grad_norm": 2.1049747467041016, "learning_rate": 3.999114196791956e-06, "loss": 0.3002, "step": 28860 }, { "epoch": 3.414951502247457, "grad_norm": 4.366775035858154, "learning_rate": 3.998395977974623e-06, "loss": 0.255, "step": 28870 }, { "epoch": 3.416134374260705, "grad_norm": 2.862595796585083, "learning_rate": 3.99767775915729e-06, "loss": 0.2793, "step": 28880 }, { "epoch": 3.417317246273953, "grad_norm": 2.8145334720611572, "learning_rate": 3.996959540339957e-06, "loss": 0.2671, "step": 28890 }, { "epoch": 3.4185001182872012, "grad_norm": 4.602574825286865, "learning_rate": 3.996241321522624e-06, "loss": 0.2923, "step": 28900 }, { "epoch": 3.4196829903004495, "grad_norm": 2.1978883743286133, "learning_rate": 3.9955231027052916e-06, "loss": 0.2923, "step": 28910 }, { "epoch": 3.420865862313698, "grad_norm": 4.055898666381836, "learning_rate": 3.994804883887958e-06, "loss": 0.2681, "step": 28920 }, { "epoch": 3.4220487343269457, "grad_norm": 4.268734455108643, "learning_rate": 3.994086665070625e-06, "loss": 0.29, "step": 28930 }, { "epoch": 3.423231606340194, "grad_norm": 3.2915849685668945, "learning_rate": 3.9933684462532915e-06, "loss": 0.3109, "step": 28940 }, { "epoch": 3.4244144783534423, "grad_norm": 2.6965219974517822, "learning_rate": 3.992650227435959e-06, "loss": 0.2631, "step": 28950 }, { "epoch": 3.42559735036669, "grad_norm": 2.662470579147339, "learning_rate": 3.991932008618625e-06, "loss": 0.2842, "step": 28960 }, { "epoch": 3.4267802223799384, "grad_norm": 3.452353000640869, "learning_rate": 3.991213789801293e-06, "loss": 0.2976, "step": 28970 }, { "epoch": 3.4279630943931867, "grad_norm": 2.6745400428771973, "learning_rate": 3.99049557098396e-06, "loss": 0.2956, "step": 28980 }, { "epoch": 3.429145966406435, "grad_norm": 3.3390755653381348, "learning_rate": 3.989777352166627e-06, "loss": 0.2756, "step": 28990 }, { "epoch": 3.430328838419683, "grad_norm": 3.5056352615356445, "learning_rate": 3.989059133349294e-06, "loss": 0.2445, "step": 29000 }, { "epoch": 3.431511710432931, "grad_norm": 2.393807888031006, "learning_rate": 3.988340914531961e-06, "loss": 0.2724, "step": 29010 }, { "epoch": 3.4326945824461794, "grad_norm": 3.7909843921661377, "learning_rate": 3.987622695714628e-06, "loss": 0.2675, "step": 29020 }, { "epoch": 3.4338774544594273, "grad_norm": 2.4259259700775146, "learning_rate": 3.986904476897295e-06, "loss": 0.2832, "step": 29030 }, { "epoch": 3.4350603264726756, "grad_norm": 3.82837176322937, "learning_rate": 3.9861862580799616e-06, "loss": 0.2667, "step": 29040 }, { "epoch": 3.436243198485924, "grad_norm": 4.073571681976318, "learning_rate": 3.9854680392626285e-06, "loss": 0.3378, "step": 29050 }, { "epoch": 3.437426070499172, "grad_norm": 2.9060354232788086, "learning_rate": 3.984749820445295e-06, "loss": 0.2834, "step": 29060 }, { "epoch": 3.43860894251242, "grad_norm": 1.9702935218811035, "learning_rate": 3.984031601627962e-06, "loss": 0.2717, "step": 29070 }, { "epoch": 3.4397918145256683, "grad_norm": 3.5200788974761963, "learning_rate": 3.98331338281063e-06, "loss": 0.2951, "step": 29080 }, { "epoch": 3.4409746865389166, "grad_norm": 3.1678929328918457, "learning_rate": 3.982595163993297e-06, "loss": 0.3277, "step": 29090 }, { "epoch": 3.4421575585521644, "grad_norm": 3.3186113834381104, "learning_rate": 3.981876945175964e-06, "loss": 0.2906, "step": 29100 }, { "epoch": 3.4433404305654127, "grad_norm": 3.593663215637207, "learning_rate": 3.981158726358631e-06, "loss": 0.3329, "step": 29110 }, { "epoch": 3.444523302578661, "grad_norm": 2.9948604106903076, "learning_rate": 3.980440507541298e-06, "loss": 0.2916, "step": 29120 }, { "epoch": 3.4457061745919093, "grad_norm": 2.2265982627868652, "learning_rate": 3.979722288723965e-06, "loss": 0.2569, "step": 29130 }, { "epoch": 3.446889046605157, "grad_norm": 3.2792186737060547, "learning_rate": 3.979004069906632e-06, "loss": 0.2539, "step": 29140 }, { "epoch": 3.4480719186184055, "grad_norm": 1.9254900217056274, "learning_rate": 3.9782858510892986e-06, "loss": 0.278, "step": 29150 }, { "epoch": 3.4492547906316537, "grad_norm": 3.3341944217681885, "learning_rate": 3.9775676322719655e-06, "loss": 0.2713, "step": 29160 }, { "epoch": 3.4504376626449016, "grad_norm": 2.7676820755004883, "learning_rate": 3.976849413454632e-06, "loss": 0.2993, "step": 29170 }, { "epoch": 3.45162053465815, "grad_norm": 2.2772769927978516, "learning_rate": 3.9761311946373e-06, "loss": 0.2982, "step": 29180 }, { "epoch": 3.452803406671398, "grad_norm": 2.897183895111084, "learning_rate": 3.975412975819966e-06, "loss": 0.3238, "step": 29190 }, { "epoch": 3.4539862786846465, "grad_norm": 4.300553798675537, "learning_rate": 3.974694757002634e-06, "loss": 0.3149, "step": 29200 }, { "epoch": 3.4551691506978943, "grad_norm": 2.5118746757507324, "learning_rate": 3.9739765381853e-06, "loss": 0.2477, "step": 29210 }, { "epoch": 3.4563520227111426, "grad_norm": 3.409400463104248, "learning_rate": 3.973258319367968e-06, "loss": 0.304, "step": 29220 }, { "epoch": 3.457534894724391, "grad_norm": 3.270049810409546, "learning_rate": 3.972540100550634e-06, "loss": 0.2817, "step": 29230 }, { "epoch": 3.4587177667376388, "grad_norm": 2.6571133136749268, "learning_rate": 3.971821881733302e-06, "loss": 0.299, "step": 29240 }, { "epoch": 3.459900638750887, "grad_norm": 2.746952533721924, "learning_rate": 3.971103662915969e-06, "loss": 0.2604, "step": 29250 }, { "epoch": 3.4610835107641353, "grad_norm": 3.310290575027466, "learning_rate": 3.9703854440986356e-06, "loss": 0.2783, "step": 29260 }, { "epoch": 3.4622663827773836, "grad_norm": 3.8183412551879883, "learning_rate": 3.9696672252813025e-06, "loss": 0.2463, "step": 29270 }, { "epoch": 3.4634492547906315, "grad_norm": 2.7475006580352783, "learning_rate": 3.968949006463969e-06, "loss": 0.309, "step": 29280 }, { "epoch": 3.4646321268038798, "grad_norm": 2.4192402362823486, "learning_rate": 3.968230787646636e-06, "loss": 0.2886, "step": 29290 }, { "epoch": 3.465814998817128, "grad_norm": 4.498946189880371, "learning_rate": 3.967512568829303e-06, "loss": 0.2904, "step": 29300 }, { "epoch": 3.466997870830376, "grad_norm": 3.021507978439331, "learning_rate": 3.96679435001197e-06, "loss": 0.2949, "step": 29310 }, { "epoch": 3.468180742843624, "grad_norm": 2.8634607791900635, "learning_rate": 3.966076131194637e-06, "loss": 0.2391, "step": 29320 }, { "epoch": 3.4693636148568725, "grad_norm": 3.459980010986328, "learning_rate": 3.965357912377305e-06, "loss": 0.2775, "step": 29330 }, { "epoch": 3.470546486870121, "grad_norm": 2.320286750793457, "learning_rate": 3.964639693559971e-06, "loss": 0.2625, "step": 29340 }, { "epoch": 3.4717293588833686, "grad_norm": 3.8461697101593018, "learning_rate": 3.963921474742639e-06, "loss": 0.3486, "step": 29350 }, { "epoch": 3.472912230896617, "grad_norm": 2.476952075958252, "learning_rate": 3.963203255925306e-06, "loss": 0.2703, "step": 29360 }, { "epoch": 3.4740951029098652, "grad_norm": 3.1494569778442383, "learning_rate": 3.9624850371079726e-06, "loss": 0.2762, "step": 29370 }, { "epoch": 3.475277974923113, "grad_norm": 2.9971723556518555, "learning_rate": 3.9617668182906395e-06, "loss": 0.2826, "step": 29380 }, { "epoch": 3.4764608469363614, "grad_norm": 3.3210978507995605, "learning_rate": 3.961048599473306e-06, "loss": 0.3112, "step": 29390 }, { "epoch": 3.4776437189496097, "grad_norm": 3.2452621459960938, "learning_rate": 3.960330380655973e-06, "loss": 0.294, "step": 29400 }, { "epoch": 3.478826590962858, "grad_norm": 2.9135382175445557, "learning_rate": 3.95961216183864e-06, "loss": 0.2956, "step": 29410 }, { "epoch": 3.480009462976106, "grad_norm": 3.071747064590454, "learning_rate": 3.958893943021307e-06, "loss": 0.3052, "step": 29420 }, { "epoch": 3.481192334989354, "grad_norm": 3.312474012374878, "learning_rate": 3.958175724203974e-06, "loss": 0.2897, "step": 29430 }, { "epoch": 3.4823752070026024, "grad_norm": 2.7206039428710938, "learning_rate": 3.957457505386641e-06, "loss": 0.2913, "step": 29440 }, { "epoch": 3.4835580790158502, "grad_norm": 2.353300094604492, "learning_rate": 3.956739286569309e-06, "loss": 0.2414, "step": 29450 }, { "epoch": 3.4847409510290985, "grad_norm": 2.789315938949585, "learning_rate": 3.956021067751975e-06, "loss": 0.2547, "step": 29460 }, { "epoch": 3.485923823042347, "grad_norm": 2.9860832691192627, "learning_rate": 3.955302848934643e-06, "loss": 0.293, "step": 29470 }, { "epoch": 3.487106695055595, "grad_norm": 2.748837947845459, "learning_rate": 3.954584630117309e-06, "loss": 0.3159, "step": 29480 }, { "epoch": 3.488289567068843, "grad_norm": 2.4259722232818604, "learning_rate": 3.9538664112999765e-06, "loss": 0.266, "step": 29490 }, { "epoch": 3.4894724390820913, "grad_norm": 3.2140469551086426, "learning_rate": 3.9531481924826426e-06, "loss": 0.3055, "step": 29500 }, { "epoch": 3.4906553110953396, "grad_norm": 2.755739212036133, "learning_rate": 3.95242997366531e-06, "loss": 0.29, "step": 29510 }, { "epoch": 3.4918381831085874, "grad_norm": 2.7468137741088867, "learning_rate": 3.951711754847977e-06, "loss": 0.301, "step": 29520 }, { "epoch": 3.4930210551218357, "grad_norm": 3.684769630432129, "learning_rate": 3.950993536030644e-06, "loss": 0.2755, "step": 29530 }, { "epoch": 3.494203927135084, "grad_norm": 2.875627279281616, "learning_rate": 3.950275317213311e-06, "loss": 0.3011, "step": 29540 }, { "epoch": 3.4953867991483323, "grad_norm": 3.1565229892730713, "learning_rate": 3.949557098395978e-06, "loss": 0.3004, "step": 29550 }, { "epoch": 3.49656967116158, "grad_norm": 4.146491527557373, "learning_rate": 3.948838879578645e-06, "loss": 0.291, "step": 29560 }, { "epoch": 3.4977525431748284, "grad_norm": 2.752413749694824, "learning_rate": 3.948120660761312e-06, "loss": 0.3018, "step": 29570 }, { "epoch": 3.4989354151880767, "grad_norm": 3.5726985931396484, "learning_rate": 3.947402441943979e-06, "loss": 0.3009, "step": 29580 }, { "epoch": 3.5001182872013246, "grad_norm": 2.2508862018585205, "learning_rate": 3.946684223126646e-06, "loss": 0.2741, "step": 29590 }, { "epoch": 3.5008280104092737, "eval_accuracy": 0.8600159696576505, "eval_loss": 0.32884901762008667, "eval_runtime": 77.7977, "eval_safe_aucpr": 0.915389373270268, "eval_safe_f1": 0.8444748368972591, "eval_safe_fpr": 0.13745478461032512, "eval_safe_precision": 0.8324588252441335, "eval_safe_recall": 0.8568428158871845, "eval_samples_per_second": 772.696, "eval_steps_per_second": 12.083, "eval_unsafe_aucpr": 0.9530512352714813, "eval_unsafe_f1": 0.8727333222425553, "eval_unsafe_fpr": 0.14315718411281497, "eval_unsafe_precision": 0.8831649831649832, "eval_unsafe_recall": 0.8625452153896744, "step": 29596 }, { "epoch": 3.501301159214573, "grad_norm": 3.029310941696167, "learning_rate": 3.9459660043093135e-06, "loss": 0.2791, "step": 29600 }, { "epoch": 3.502484031227821, "grad_norm": 2.4385011196136475, "learning_rate": 3.9452477854919796e-06, "loss": 0.2656, "step": 29610 }, { "epoch": 3.5036669032410694, "grad_norm": 3.1024386882781982, "learning_rate": 3.944529566674647e-06, "loss": 0.272, "step": 29620 }, { "epoch": 3.5048497752543177, "grad_norm": 2.7745182514190674, "learning_rate": 3.943811347857314e-06, "loss": 0.2835, "step": 29630 }, { "epoch": 3.5060326472675656, "grad_norm": 3.7471024990081787, "learning_rate": 3.943093129039981e-06, "loss": 0.3007, "step": 29640 }, { "epoch": 3.507215519280814, "grad_norm": 3.8284647464752197, "learning_rate": 3.942374910222648e-06, "loss": 0.3026, "step": 29650 }, { "epoch": 3.5083983912940617, "grad_norm": 2.3284730911254883, "learning_rate": 3.941656691405315e-06, "loss": 0.2985, "step": 29660 }, { "epoch": 3.50958126330731, "grad_norm": 3.232443332672119, "learning_rate": 3.940938472587982e-06, "loss": 0.3027, "step": 29670 }, { "epoch": 3.5107641353205583, "grad_norm": 2.2535243034362793, "learning_rate": 3.940220253770649e-06, "loss": 0.3141, "step": 29680 }, { "epoch": 3.5119470073338066, "grad_norm": 2.911020278930664, "learning_rate": 3.939502034953316e-06, "loss": 0.3234, "step": 29690 }, { "epoch": 3.513129879347055, "grad_norm": 2.8949711322784424, "learning_rate": 3.938783816135983e-06, "loss": 0.3127, "step": 29700 }, { "epoch": 3.5143127513603027, "grad_norm": 3.0444653034210205, "learning_rate": 3.93806559731865e-06, "loss": 0.2895, "step": 29710 }, { "epoch": 3.515495623373551, "grad_norm": 3.122600555419922, "learning_rate": 3.937347378501317e-06, "loss": 0.3108, "step": 29720 }, { "epoch": 3.516678495386799, "grad_norm": 2.93048095703125, "learning_rate": 3.9366291596839835e-06, "loss": 0.2738, "step": 29730 }, { "epoch": 3.517861367400047, "grad_norm": 3.568284034729004, "learning_rate": 3.935910940866651e-06, "loss": 0.2656, "step": 29740 }, { "epoch": 3.5190442394132955, "grad_norm": 3.0430760383605957, "learning_rate": 3.935192722049317e-06, "loss": 0.3161, "step": 29750 }, { "epoch": 3.5202271114265438, "grad_norm": 3.3558688163757324, "learning_rate": 3.934474503231985e-06, "loss": 0.2549, "step": 29760 }, { "epoch": 3.521409983439792, "grad_norm": 4.9178972244262695, "learning_rate": 3.933756284414651e-06, "loss": 0.2799, "step": 29770 }, { "epoch": 3.52259285545304, "grad_norm": 5.353640556335449, "learning_rate": 3.933038065597319e-06, "loss": 0.2798, "step": 29780 }, { "epoch": 3.523775727466288, "grad_norm": 2.5971882343292236, "learning_rate": 3.932319846779986e-06, "loss": 0.2976, "step": 29790 }, { "epoch": 3.524958599479536, "grad_norm": 4.564641952514648, "learning_rate": 3.931601627962653e-06, "loss": 0.3102, "step": 29800 }, { "epoch": 3.5261414714927843, "grad_norm": 3.1244876384735107, "learning_rate": 3.93088340914532e-06, "loss": 0.3059, "step": 29810 }, { "epoch": 3.5273243435060326, "grad_norm": 2.325773000717163, "learning_rate": 3.930165190327987e-06, "loss": 0.2833, "step": 29820 }, { "epoch": 3.528507215519281, "grad_norm": 2.4007983207702637, "learning_rate": 3.9294469715106536e-06, "loss": 0.2945, "step": 29830 }, { "epoch": 3.529690087532529, "grad_norm": 2.8207406997680664, "learning_rate": 3.9287287526933205e-06, "loss": 0.314, "step": 29840 }, { "epoch": 3.530872959545777, "grad_norm": 3.3562471866607666, "learning_rate": 3.928010533875988e-06, "loss": 0.3105, "step": 29850 }, { "epoch": 3.5320558315590254, "grad_norm": 1.9103692770004272, "learning_rate": 3.927292315058654e-06, "loss": 0.2849, "step": 29860 }, { "epoch": 3.533238703572273, "grad_norm": 2.370021104812622, "learning_rate": 3.926574096241322e-06, "loss": 0.2933, "step": 29870 }, { "epoch": 3.5344215755855215, "grad_norm": 3.22601318359375, "learning_rate": 3.925855877423988e-06, "loss": 0.2486, "step": 29880 }, { "epoch": 3.53560444759877, "grad_norm": 2.821993589401245, "learning_rate": 3.925137658606656e-06, "loss": 0.2691, "step": 29890 }, { "epoch": 3.536787319612018, "grad_norm": 3.3346800804138184, "learning_rate": 3.924419439789323e-06, "loss": 0.2883, "step": 29900 }, { "epoch": 3.5379701916252664, "grad_norm": 3.168386220932007, "learning_rate": 3.92370122097199e-06, "loss": 0.3288, "step": 29910 }, { "epoch": 3.5391530636385142, "grad_norm": 2.7423057556152344, "learning_rate": 3.922983002154657e-06, "loss": 0.2781, "step": 29920 }, { "epoch": 3.5403359356517625, "grad_norm": 2.565031051635742, "learning_rate": 3.922264783337324e-06, "loss": 0.2943, "step": 29930 }, { "epoch": 3.5415188076650104, "grad_norm": 4.034544944763184, "learning_rate": 3.9215465645199906e-06, "loss": 0.2915, "step": 29940 }, { "epoch": 3.5427016796782587, "grad_norm": 2.539607524871826, "learning_rate": 3.9208283457026575e-06, "loss": 0.2865, "step": 29950 }, { "epoch": 3.543884551691507, "grad_norm": 3.653226375579834, "learning_rate": 3.920110126885324e-06, "loss": 0.262, "step": 29960 }, { "epoch": 3.5450674237047552, "grad_norm": 2.855692148208618, "learning_rate": 3.919391908067991e-06, "loss": 0.2774, "step": 29970 }, { "epoch": 3.5462502957180035, "grad_norm": 2.8344321250915527, "learning_rate": 3.918673689250658e-06, "loss": 0.2708, "step": 29980 }, { "epoch": 3.5474331677312514, "grad_norm": 3.342066526412964, "learning_rate": 3.917955470433325e-06, "loss": 0.3056, "step": 29990 }, { "epoch": 3.5486160397444997, "grad_norm": 2.6532962322235107, "learning_rate": 3.917237251615992e-06, "loss": 0.2947, "step": 30000 }, { "epoch": 3.549798911757748, "grad_norm": 2.5257811546325684, "learning_rate": 3.91651903279866e-06, "loss": 0.2967, "step": 30010 }, { "epoch": 3.550981783770996, "grad_norm": 2.410673141479492, "learning_rate": 3.915800813981326e-06, "loss": 0.272, "step": 30020 }, { "epoch": 3.552164655784244, "grad_norm": 1.8771846294403076, "learning_rate": 3.915082595163994e-06, "loss": 0.2444, "step": 30030 }, { "epoch": 3.5533475277974924, "grad_norm": 3.0626044273376465, "learning_rate": 3.91436437634666e-06, "loss": 0.3246, "step": 30040 }, { "epoch": 3.5545303998107407, "grad_norm": 4.596074104309082, "learning_rate": 3.9136461575293275e-06, "loss": 0.2811, "step": 30050 }, { "epoch": 3.5557132718239886, "grad_norm": 3.1455276012420654, "learning_rate": 3.9129279387119945e-06, "loss": 0.3007, "step": 30060 }, { "epoch": 3.556896143837237, "grad_norm": 2.6878602504730225, "learning_rate": 3.912209719894661e-06, "loss": 0.2825, "step": 30070 }, { "epoch": 3.558079015850485, "grad_norm": 4.2512664794921875, "learning_rate": 3.911491501077328e-06, "loss": 0.2945, "step": 30080 }, { "epoch": 3.559261887863733, "grad_norm": 3.9039673805236816, "learning_rate": 3.910773282259995e-06, "loss": 0.3123, "step": 30090 }, { "epoch": 3.5604447598769813, "grad_norm": 2.679293394088745, "learning_rate": 3.910055063442663e-06, "loss": 0.2603, "step": 30100 }, { "epoch": 3.5616276318902296, "grad_norm": 2.824568510055542, "learning_rate": 3.909336844625329e-06, "loss": 0.3108, "step": 30110 }, { "epoch": 3.562810503903478, "grad_norm": 2.5078423023223877, "learning_rate": 3.908618625807997e-06, "loss": 0.28, "step": 30120 }, { "epoch": 3.5639933759167257, "grad_norm": 3.575838327407837, "learning_rate": 3.907900406990663e-06, "loss": 0.3054, "step": 30130 }, { "epoch": 3.565176247929974, "grad_norm": 2.186633825302124, "learning_rate": 3.907182188173331e-06, "loss": 0.3101, "step": 30140 }, { "epoch": 3.5663591199432223, "grad_norm": 3.1343424320220947, "learning_rate": 3.906463969355997e-06, "loss": 0.2979, "step": 30150 }, { "epoch": 3.56754199195647, "grad_norm": 2.1943230628967285, "learning_rate": 3.9057457505386645e-06, "loss": 0.2691, "step": 30160 }, { "epoch": 3.5687248639697184, "grad_norm": 2.861821413040161, "learning_rate": 3.9050275317213315e-06, "loss": 0.3343, "step": 30170 }, { "epoch": 3.5699077359829667, "grad_norm": 3.097752809524536, "learning_rate": 3.904309312903998e-06, "loss": 0.2983, "step": 30180 }, { "epoch": 3.571090607996215, "grad_norm": 2.444945812225342, "learning_rate": 3.903591094086665e-06, "loss": 0.287, "step": 30190 }, { "epoch": 3.572273480009463, "grad_norm": 2.8476860523223877, "learning_rate": 3.902872875269332e-06, "loss": 0.329, "step": 30200 }, { "epoch": 3.573456352022711, "grad_norm": 2.4883365631103516, "learning_rate": 3.902154656451999e-06, "loss": 0.2899, "step": 30210 }, { "epoch": 3.5746392240359595, "grad_norm": 1.879276990890503, "learning_rate": 3.901436437634666e-06, "loss": 0.2965, "step": 30220 }, { "epoch": 3.5758220960492073, "grad_norm": 2.9747819900512695, "learning_rate": 3.900718218817333e-06, "loss": 0.2782, "step": 30230 }, { "epoch": 3.5770049680624556, "grad_norm": 3.0752792358398438, "learning_rate": 3.9e-06, "loss": 0.2892, "step": 30240 }, { "epoch": 3.578187840075704, "grad_norm": 4.906447410583496, "learning_rate": 3.899281781182667e-06, "loss": 0.2656, "step": 30250 }, { "epoch": 3.579370712088952, "grad_norm": 2.7060699462890625, "learning_rate": 3.898563562365334e-06, "loss": 0.2929, "step": 30260 }, { "epoch": 3.5805535841022, "grad_norm": 3.572575330734253, "learning_rate": 3.897845343548001e-06, "loss": 0.2854, "step": 30270 }, { "epoch": 3.5817364561154483, "grad_norm": 2.5565311908721924, "learning_rate": 3.8971271247306685e-06, "loss": 0.3296, "step": 30280 }, { "epoch": 3.5829193281286966, "grad_norm": 4.189861297607422, "learning_rate": 3.8964089059133345e-06, "loss": 0.2903, "step": 30290 }, { "epoch": 3.5841022001419445, "grad_norm": 2.8932878971099854, "learning_rate": 3.895690687096002e-06, "loss": 0.2952, "step": 30300 }, { "epoch": 3.5852850721551928, "grad_norm": 2.233625888824463, "learning_rate": 3.894972468278668e-06, "loss": 0.2778, "step": 30310 }, { "epoch": 3.586467944168441, "grad_norm": 3.5226712226867676, "learning_rate": 3.894254249461336e-06, "loss": 0.2912, "step": 30320 }, { "epoch": 3.5876508161816894, "grad_norm": 2.51426362991333, "learning_rate": 3.893536030644002e-06, "loss": 0.2679, "step": 30330 }, { "epoch": 3.588833688194937, "grad_norm": 2.8012824058532715, "learning_rate": 3.89281781182667e-06, "loss": 0.2974, "step": 30340 }, { "epoch": 3.5900165602081855, "grad_norm": 3.285240411758423, "learning_rate": 3.892099593009337e-06, "loss": 0.2842, "step": 30350 }, { "epoch": 3.591199432221434, "grad_norm": 2.258108377456665, "learning_rate": 3.891381374192004e-06, "loss": 0.3086, "step": 30360 }, { "epoch": 3.5923823042346816, "grad_norm": 2.47900652885437, "learning_rate": 3.890663155374672e-06, "loss": 0.3126, "step": 30370 }, { "epoch": 3.59356517624793, "grad_norm": 3.498553514480591, "learning_rate": 3.889944936557338e-06, "loss": 0.2909, "step": 30380 }, { "epoch": 3.594748048261178, "grad_norm": 3.9336187839508057, "learning_rate": 3.8892267177400055e-06, "loss": 0.3375, "step": 30390 }, { "epoch": 3.5959309202744265, "grad_norm": 3.1217427253723145, "learning_rate": 3.8885084989226715e-06, "loss": 0.3114, "step": 30400 }, { "epoch": 3.5971137922876744, "grad_norm": 2.4833168983459473, "learning_rate": 3.887790280105339e-06, "loss": 0.257, "step": 30410 }, { "epoch": 3.5982966643009227, "grad_norm": 3.588465929031372, "learning_rate": 3.887072061288005e-06, "loss": 0.2929, "step": 30420 }, { "epoch": 3.599479536314171, "grad_norm": 2.513824939727783, "learning_rate": 3.886353842470673e-06, "loss": 0.2817, "step": 30430 }, { "epoch": 3.600662408327419, "grad_norm": 2.834028720855713, "learning_rate": 3.88563562365334e-06, "loss": 0.2872, "step": 30440 }, { "epoch": 3.601845280340667, "grad_norm": 2.060053586959839, "learning_rate": 3.884917404836007e-06, "loss": 0.3027, "step": 30450 }, { "epoch": 3.6030281523539154, "grad_norm": 2.384411096572876, "learning_rate": 3.884199186018674e-06, "loss": 0.2466, "step": 30460 }, { "epoch": 3.6042110243671637, "grad_norm": 2.235736131668091, "learning_rate": 3.883480967201341e-06, "loss": 0.2809, "step": 30470 }, { "epoch": 3.6053938963804115, "grad_norm": 2.988762378692627, "learning_rate": 3.882762748384008e-06, "loss": 0.2932, "step": 30480 }, { "epoch": 3.60657676839366, "grad_norm": 2.637009620666504, "learning_rate": 3.882044529566675e-06, "loss": 0.2987, "step": 30490 }, { "epoch": 3.607759640406908, "grad_norm": 2.588085651397705, "learning_rate": 3.881326310749342e-06, "loss": 0.2842, "step": 30500 }, { "epoch": 3.608942512420156, "grad_norm": 2.797858953475952, "learning_rate": 3.8806080919320085e-06, "loss": 0.3066, "step": 30510 }, { "epoch": 3.6101253844334042, "grad_norm": 2.368443250656128, "learning_rate": 3.8798898731146755e-06, "loss": 0.2947, "step": 30520 }, { "epoch": 3.6113082564466525, "grad_norm": 2.7464444637298584, "learning_rate": 3.879171654297342e-06, "loss": 0.2945, "step": 30530 }, { "epoch": 3.612491128459901, "grad_norm": 2.8854618072509766, "learning_rate": 3.878453435480009e-06, "loss": 0.3248, "step": 30540 }, { "epoch": 3.6136740004731487, "grad_norm": 2.9559497833251953, "learning_rate": 3.877735216662677e-06, "loss": 0.2718, "step": 30550 }, { "epoch": 3.614856872486397, "grad_norm": 3.2206692695617676, "learning_rate": 3.877016997845343e-06, "loss": 0.3053, "step": 30560 }, { "epoch": 3.6160397444996453, "grad_norm": 2.0784428119659424, "learning_rate": 3.876298779028011e-06, "loss": 0.3018, "step": 30570 }, { "epoch": 3.617222616512893, "grad_norm": 4.011874198913574, "learning_rate": 3.875580560210677e-06, "loss": 0.3048, "step": 30580 }, { "epoch": 3.6184054885261414, "grad_norm": 3.6103837490081787, "learning_rate": 3.874862341393345e-06, "loss": 0.3138, "step": 30590 }, { "epoch": 3.6195883605393897, "grad_norm": 2.5846097469329834, "learning_rate": 3.874144122576012e-06, "loss": 0.3142, "step": 30600 }, { "epoch": 3.620771232552638, "grad_norm": 2.300175428390503, "learning_rate": 3.873425903758679e-06, "loss": 0.305, "step": 30610 }, { "epoch": 3.621954104565886, "grad_norm": 2.6782939434051514, "learning_rate": 3.8727076849413455e-06, "loss": 0.3091, "step": 30620 }, { "epoch": 3.623136976579134, "grad_norm": 2.2132413387298584, "learning_rate": 3.8719894661240125e-06, "loss": 0.2721, "step": 30630 }, { "epoch": 3.6243198485923824, "grad_norm": 3.218013286590576, "learning_rate": 3.87127124730668e-06, "loss": 0.317, "step": 30640 }, { "epoch": 3.6255027206056303, "grad_norm": 3.503964424133301, "learning_rate": 3.870553028489346e-06, "loss": 0.3056, "step": 30650 }, { "epoch": 3.6266855926188786, "grad_norm": 2.749220132827759, "learning_rate": 3.869834809672014e-06, "loss": 0.3126, "step": 30660 }, { "epoch": 3.627868464632127, "grad_norm": 3.0884857177734375, "learning_rate": 3.86911659085468e-06, "loss": 0.3057, "step": 30670 }, { "epoch": 3.629051336645375, "grad_norm": 4.062482833862305, "learning_rate": 3.868398372037348e-06, "loss": 0.3089, "step": 30680 }, { "epoch": 3.630234208658623, "grad_norm": 2.715261220932007, "learning_rate": 3.867680153220014e-06, "loss": 0.281, "step": 30690 }, { "epoch": 3.6314170806718713, "grad_norm": 2.3458049297332764, "learning_rate": 3.866961934402682e-06, "loss": 0.2811, "step": 30700 }, { "epoch": 3.6325999526851196, "grad_norm": 4.217658996582031, "learning_rate": 3.866243715585349e-06, "loss": 0.2974, "step": 30710 }, { "epoch": 3.6337828246983674, "grad_norm": 4.3304219245910645, "learning_rate": 3.865525496768016e-06, "loss": 0.2866, "step": 30720 }, { "epoch": 3.6349656967116157, "grad_norm": 2.932298421859741, "learning_rate": 3.8648072779506825e-06, "loss": 0.2598, "step": 30730 }, { "epoch": 3.636148568724864, "grad_norm": 2.707641124725342, "learning_rate": 3.8640890591333495e-06, "loss": 0.302, "step": 30740 }, { "epoch": 3.6373314407381123, "grad_norm": 3.807304859161377, "learning_rate": 3.863370840316016e-06, "loss": 0.2939, "step": 30750 }, { "epoch": 3.63851431275136, "grad_norm": 3.115504741668701, "learning_rate": 3.862652621498683e-06, "loss": 0.2974, "step": 30760 }, { "epoch": 3.6396971847646085, "grad_norm": 3.444378614425659, "learning_rate": 3.86193440268135e-06, "loss": 0.3166, "step": 30770 }, { "epoch": 3.6408800567778568, "grad_norm": 2.917494773864746, "learning_rate": 3.861216183864017e-06, "loss": 0.2872, "step": 30780 }, { "epoch": 3.6420629287911046, "grad_norm": 2.107465982437134, "learning_rate": 3.860497965046684e-06, "loss": 0.2691, "step": 30790 }, { "epoch": 3.643245800804353, "grad_norm": 2.2286996841430664, "learning_rate": 3.859779746229351e-06, "loss": 0.2554, "step": 30800 }, { "epoch": 3.644428672817601, "grad_norm": 2.9824001789093018, "learning_rate": 3.859061527412018e-06, "loss": 0.304, "step": 30810 }, { "epoch": 3.6456115448308495, "grad_norm": 2.3912899494171143, "learning_rate": 3.858343308594686e-06, "loss": 0.2915, "step": 30820 }, { "epoch": 3.6467944168440973, "grad_norm": 5.2417192459106445, "learning_rate": 3.857625089777352e-06, "loss": 0.2984, "step": 30830 }, { "epoch": 3.6479772888573456, "grad_norm": 2.8914151191711426, "learning_rate": 3.8569068709600195e-06, "loss": 0.2892, "step": 30840 }, { "epoch": 3.649160160870594, "grad_norm": 4.74766206741333, "learning_rate": 3.8561886521426865e-06, "loss": 0.2714, "step": 30850 }, { "epoch": 3.6503430328838418, "grad_norm": 3.0595216751098633, "learning_rate": 3.855470433325353e-06, "loss": 0.2804, "step": 30860 }, { "epoch": 3.65152590489709, "grad_norm": 4.202428340911865, "learning_rate": 3.85475221450802e-06, "loss": 0.2727, "step": 30870 }, { "epoch": 3.6527087769103384, "grad_norm": 4.499485015869141, "learning_rate": 3.854033995690687e-06, "loss": 0.2583, "step": 30880 }, { "epoch": 3.6538916489235866, "grad_norm": 3.0247628688812256, "learning_rate": 3.853315776873354e-06, "loss": 0.31, "step": 30890 }, { "epoch": 3.6550745209368345, "grad_norm": 2.4006307125091553, "learning_rate": 3.852597558056021e-06, "loss": 0.2623, "step": 30900 }, { "epoch": 3.656257392950083, "grad_norm": 2.835519552230835, "learning_rate": 3.851879339238688e-06, "loss": 0.2986, "step": 30910 }, { "epoch": 3.657440264963331, "grad_norm": 2.091010570526123, "learning_rate": 3.851161120421355e-06, "loss": 0.2754, "step": 30920 }, { "epoch": 3.658623136976579, "grad_norm": 3.398745059967041, "learning_rate": 3.850442901604023e-06, "loss": 0.3174, "step": 30930 }, { "epoch": 3.659806008989827, "grad_norm": 3.152393102645874, "learning_rate": 3.849724682786689e-06, "loss": 0.3182, "step": 30940 }, { "epoch": 3.6609888810030755, "grad_norm": 3.5269439220428467, "learning_rate": 3.8490064639693565e-06, "loss": 0.2934, "step": 30950 }, { "epoch": 3.662171753016324, "grad_norm": 2.7336370944976807, "learning_rate": 3.848288245152023e-06, "loss": 0.292, "step": 30960 }, { "epoch": 3.6633546250295717, "grad_norm": 3.7625656127929688, "learning_rate": 3.84757002633469e-06, "loss": 0.2588, "step": 30970 }, { "epoch": 3.66453749704282, "grad_norm": 2.673926830291748, "learning_rate": 3.846851807517357e-06, "loss": 0.3115, "step": 30980 }, { "epoch": 3.6657203690560682, "grad_norm": 4.187891006469727, "learning_rate": 3.846133588700024e-06, "loss": 0.3174, "step": 30990 }, { "epoch": 3.666903241069316, "grad_norm": 1.9319275617599487, "learning_rate": 3.845415369882691e-06, "loss": 0.2886, "step": 31000 }, { "epoch": 3.6680861130825644, "grad_norm": 3.7331576347351074, "learning_rate": 3.844697151065358e-06, "loss": 0.3056, "step": 31010 }, { "epoch": 3.6692689850958127, "grad_norm": 2.1255035400390625, "learning_rate": 3.843978932248025e-06, "loss": 0.2886, "step": 31020 }, { "epoch": 3.670451857109061, "grad_norm": 2.0068323612213135, "learning_rate": 3.843260713430692e-06, "loss": 0.268, "step": 31030 }, { "epoch": 3.671634729122309, "grad_norm": 3.2803544998168945, "learning_rate": 3.842542494613359e-06, "loss": 0.2855, "step": 31040 }, { "epoch": 3.672817601135557, "grad_norm": 3.192193031311035, "learning_rate": 3.841824275796026e-06, "loss": 0.2921, "step": 31050 }, { "epoch": 3.6740004731488054, "grad_norm": 2.564889430999756, "learning_rate": 3.841106056978693e-06, "loss": 0.319, "step": 31060 }, { "epoch": 3.6751833451620533, "grad_norm": 2.705695152282715, "learning_rate": 3.84038783816136e-06, "loss": 0.3019, "step": 31070 }, { "epoch": 3.6763662171753015, "grad_norm": 2.6498305797576904, "learning_rate": 3.8396696193440265e-06, "loss": 0.3075, "step": 31080 }, { "epoch": 3.67754908918855, "grad_norm": 2.5106942653656006, "learning_rate": 3.838951400526694e-06, "loss": 0.2847, "step": 31090 }, { "epoch": 3.678731961201798, "grad_norm": 2.990410089492798, "learning_rate": 3.83823318170936e-06, "loss": 0.2857, "step": 31100 }, { "epoch": 3.679914833215046, "grad_norm": 3.141381025314331, "learning_rate": 3.837514962892028e-06, "loss": 0.2668, "step": 31110 }, { "epoch": 3.6810977052282943, "grad_norm": 3.528865337371826, "learning_rate": 3.836796744074695e-06, "loss": 0.277, "step": 31120 }, { "epoch": 3.6822805772415426, "grad_norm": 4.061430931091309, "learning_rate": 3.836078525257362e-06, "loss": 0.2929, "step": 31130 }, { "epoch": 3.6834634492547904, "grad_norm": 2.838484287261963, "learning_rate": 3.835360306440029e-06, "loss": 0.2854, "step": 31140 }, { "epoch": 3.6846463212680387, "grad_norm": 3.8740718364715576, "learning_rate": 3.834642087622696e-06, "loss": 0.3001, "step": 31150 }, { "epoch": 3.685829193281287, "grad_norm": 2.8395168781280518, "learning_rate": 3.833923868805363e-06, "loss": 0.2852, "step": 31160 }, { "epoch": 3.6870120652945353, "grad_norm": 2.3050224781036377, "learning_rate": 3.83320564998803e-06, "loss": 0.3339, "step": 31170 }, { "epoch": 3.688194937307783, "grad_norm": 2.0187599658966064, "learning_rate": 3.832487431170697e-06, "loss": 0.2871, "step": 31180 }, { "epoch": 3.6893778093210314, "grad_norm": 2.740751028060913, "learning_rate": 3.8317692123533635e-06, "loss": 0.2929, "step": 31190 }, { "epoch": 3.6905606813342797, "grad_norm": 3.669309377670288, "learning_rate": 3.831050993536031e-06, "loss": 0.2948, "step": 31200 }, { "epoch": 3.6917435533475276, "grad_norm": 2.3854892253875732, "learning_rate": 3.830332774718697e-06, "loss": 0.2667, "step": 31210 }, { "epoch": 3.692926425360776, "grad_norm": 3.5488622188568115, "learning_rate": 3.829614555901365e-06, "loss": 0.2893, "step": 31220 }, { "epoch": 3.694109297374024, "grad_norm": 2.9817185401916504, "learning_rate": 3.828896337084031e-06, "loss": 0.3179, "step": 31230 }, { "epoch": 3.6952921693872725, "grad_norm": 3.7668981552124023, "learning_rate": 3.828178118266699e-06, "loss": 0.2976, "step": 31240 }, { "epoch": 3.6964750414005203, "grad_norm": 3.384310483932495, "learning_rate": 3.827459899449365e-06, "loss": 0.2769, "step": 31250 }, { "epoch": 3.6976579134137686, "grad_norm": 3.0994839668273926, "learning_rate": 3.826741680632033e-06, "loss": 0.2736, "step": 31260 }, { "epoch": 3.698840785427017, "grad_norm": 3.4753875732421875, "learning_rate": 3.8260234618147e-06, "loss": 0.3087, "step": 31270 }, { "epoch": 3.7000236574402647, "grad_norm": 3.772836685180664, "learning_rate": 3.825305242997367e-06, "loss": 0.2893, "step": 31280 }, { "epoch": 3.701206529453513, "grad_norm": 2.2013442516326904, "learning_rate": 3.824587024180034e-06, "loss": 0.3055, "step": 31290 }, { "epoch": 3.7023894014667613, "grad_norm": 4.810434341430664, "learning_rate": 3.8238688053627005e-06, "loss": 0.276, "step": 31300 }, { "epoch": 3.7035722734800096, "grad_norm": 3.599900007247925, "learning_rate": 3.8231505865453674e-06, "loss": 0.2991, "step": 31310 }, { "epoch": 3.704755145493258, "grad_norm": 2.5832552909851074, "learning_rate": 3.822432367728034e-06, "loss": 0.2866, "step": 31320 }, { "epoch": 3.7059380175065058, "grad_norm": 3.2084455490112305, "learning_rate": 3.821714148910701e-06, "loss": 0.2615, "step": 31330 }, { "epoch": 3.707120889519754, "grad_norm": 2.4905202388763428, "learning_rate": 3.820995930093368e-06, "loss": 0.2596, "step": 31340 }, { "epoch": 3.708303761533002, "grad_norm": 3.578376293182373, "learning_rate": 3.820277711276035e-06, "loss": 0.3025, "step": 31350 }, { "epoch": 3.70948663354625, "grad_norm": 2.4837441444396973, "learning_rate": 3.819559492458703e-06, "loss": 0.2811, "step": 31360 }, { "epoch": 3.7106695055594985, "grad_norm": 2.794616222381592, "learning_rate": 3.81884127364137e-06, "loss": 0.3052, "step": 31370 }, { "epoch": 3.7118523775727468, "grad_norm": 3.169340133666992, "learning_rate": 3.818123054824037e-06, "loss": 0.3129, "step": 31380 }, { "epoch": 3.713035249585995, "grad_norm": 2.6523597240448, "learning_rate": 3.817404836006704e-06, "loss": 0.2436, "step": 31390 }, { "epoch": 3.714218121599243, "grad_norm": 3.1492745876312256, "learning_rate": 3.816686617189371e-06, "loss": 0.307, "step": 31400 }, { "epoch": 3.715400993612491, "grad_norm": 2.586974859237671, "learning_rate": 3.8159683983720375e-06, "loss": 0.315, "step": 31410 }, { "epoch": 3.716583865625739, "grad_norm": 2.3166165351867676, "learning_rate": 3.8152501795547044e-06, "loss": 0.2497, "step": 31420 }, { "epoch": 3.7177667376389874, "grad_norm": 3.380039930343628, "learning_rate": 3.814531960737372e-06, "loss": 0.3091, "step": 31430 }, { "epoch": 3.7189496096522356, "grad_norm": 3.345799446105957, "learning_rate": 3.8138137419200383e-06, "loss": 0.3012, "step": 31440 }, { "epoch": 3.720132481665484, "grad_norm": 2.7278225421905518, "learning_rate": 3.8130955231027056e-06, "loss": 0.2915, "step": 31450 }, { "epoch": 3.7213153536787322, "grad_norm": 2.6513779163360596, "learning_rate": 3.812377304285372e-06, "loss": 0.2837, "step": 31460 }, { "epoch": 3.72249822569198, "grad_norm": 3.645665407180786, "learning_rate": 3.8116590854680395e-06, "loss": 0.3055, "step": 31470 }, { "epoch": 3.7236810977052284, "grad_norm": 2.8154947757720947, "learning_rate": 3.810940866650706e-06, "loss": 0.2655, "step": 31480 }, { "epoch": 3.724863969718476, "grad_norm": 2.4965240955352783, "learning_rate": 3.8102226478333733e-06, "loss": 0.2711, "step": 31490 }, { "epoch": 3.7260468417317245, "grad_norm": 3.8089048862457275, "learning_rate": 3.8095044290160402e-06, "loss": 0.2908, "step": 31500 }, { "epoch": 3.727229713744973, "grad_norm": 2.390972852706909, "learning_rate": 3.8087862101987076e-06, "loss": 0.297, "step": 31510 }, { "epoch": 3.728412585758221, "grad_norm": 2.890800952911377, "learning_rate": 3.808067991381374e-06, "loss": 0.2924, "step": 31520 }, { "epoch": 3.7295954577714694, "grad_norm": 2.223830461502075, "learning_rate": 3.8073497725640414e-06, "loss": 0.2866, "step": 31530 }, { "epoch": 3.7307783297847172, "grad_norm": 4.307246685028076, "learning_rate": 3.806631553746708e-06, "loss": 0.3134, "step": 31540 }, { "epoch": 3.7319612017979655, "grad_norm": 3.1333954334259033, "learning_rate": 3.8059133349293753e-06, "loss": 0.2665, "step": 31550 }, { "epoch": 3.7331440738112134, "grad_norm": 5.665109157562256, "learning_rate": 3.8051951161120418e-06, "loss": 0.275, "step": 31560 }, { "epoch": 3.7343269458244617, "grad_norm": 3.2309370040893555, "learning_rate": 3.804476897294709e-06, "loss": 0.3323, "step": 31570 }, { "epoch": 3.73550981783771, "grad_norm": 2.3520450592041016, "learning_rate": 3.803758678477376e-06, "loss": 0.3115, "step": 31580 }, { "epoch": 3.7366926898509583, "grad_norm": 2.021888256072998, "learning_rate": 3.8030404596600434e-06, "loss": 0.2255, "step": 31590 }, { "epoch": 3.7378755618642066, "grad_norm": 3.181809902191162, "learning_rate": 3.80232224084271e-06, "loss": 0.3065, "step": 31600 }, { "epoch": 3.7390584338774544, "grad_norm": 2.6291301250457764, "learning_rate": 3.8016040220253772e-06, "loss": 0.2832, "step": 31610 }, { "epoch": 3.7402413058907027, "grad_norm": 2.8491785526275635, "learning_rate": 3.8008858032080446e-06, "loss": 0.3048, "step": 31620 }, { "epoch": 3.7414241779039505, "grad_norm": 2.6975810527801514, "learning_rate": 3.800167584390711e-06, "loss": 0.311, "step": 31630 }, { "epoch": 3.742607049917199, "grad_norm": 2.963815689086914, "learning_rate": 3.7994493655733784e-06, "loss": 0.3217, "step": 31640 }, { "epoch": 3.743789921930447, "grad_norm": 2.3831546306610107, "learning_rate": 3.798731146756045e-06, "loss": 0.3046, "step": 31650 }, { "epoch": 3.7449727939436954, "grad_norm": 3.1295571327209473, "learning_rate": 3.7980129279387123e-06, "loss": 0.2888, "step": 31660 }, { "epoch": 3.7461556659569437, "grad_norm": 2.733480215072632, "learning_rate": 3.7972947091213788e-06, "loss": 0.2986, "step": 31670 }, { "epoch": 3.7473385379701916, "grad_norm": 3.7552599906921387, "learning_rate": 3.796576490304046e-06, "loss": 0.2841, "step": 31680 }, { "epoch": 3.74852140998344, "grad_norm": 2.9118576049804688, "learning_rate": 3.795858271486713e-06, "loss": 0.2524, "step": 31690 }, { "epoch": 3.7497042819966877, "grad_norm": 4.615795135498047, "learning_rate": 3.7951400526693804e-06, "loss": 0.3208, "step": 31700 }, { "epoch": 3.750887154009936, "grad_norm": 3.5630717277526855, "learning_rate": 3.794421833852047e-06, "loss": 0.3123, "step": 31710 }, { "epoch": 3.750887154009936, "eval_accuracy": 0.8592174867751272, "eval_loss": 0.32891905307769775, "eval_runtime": 77.9037, "eval_safe_aucpr": 0.9157634659184628, "eval_safe_f1": 0.8470532955017801, "eval_safe_fpr": 0.1564975636004898, "eval_safe_precision": 0.8174049529124521, "eval_safe_recall": 0.8789333533360837, "eval_samples_per_second": 771.645, "eval_steps_per_second": 12.066, "eval_unsafe_aucpr": 0.9534351493536815, "eval_unsafe_f1": 0.8695893366206949, "eval_unsafe_fpr": 0.12106664666391584, "eval_unsafe_precision": 0.8973413051774584, "eval_unsafe_recall": 0.8435024363995097, "step": 31710 }, { "epoch": 3.750887154009936, "step": 31710, "total_flos": 3.585941570958701e+17, "train_loss": 0.3279850626021896, "train_runtime": 10021.2364, "train_samples_per_second": 539.874, "train_steps_per_second": 8.436 } ], "logging_steps": 10, "max_steps": 84540, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 2114, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 4, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 4 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.585941570958701e+17, "train_batch_size": 64, "trial_name": null, "trial_params": null }