{ "best_global_step": 25368, "best_metric": 0.7567282129720675, "best_model_checkpoint": "output/QA-DeBERTa-v3-large-diff-binary/checkpoint-25368", "epoch": 4.000946297610598, "eval_steps": 2114, "global_step": 33824, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0011828720132481666, "grad_norm": 0.27666720747947693, "learning_rate": 5.4e-08, "loss": 0.6896, "step": 10 }, { "epoch": 0.0023657440264963333, "grad_norm": 0.2948802411556244, "learning_rate": 1.14e-07, "loss": 0.6895, "step": 20 }, { "epoch": 0.0035486160397444995, "grad_norm": 0.27228832244873047, "learning_rate": 1.7400000000000002e-07, "loss": 0.689, "step": 30 }, { "epoch": 0.0047314880529926665, "grad_norm": 0.2775748074054718, "learning_rate": 2.34e-07, "loss": 0.6882, "step": 40 }, { "epoch": 0.005914360066240833, "grad_norm": 0.2831368148326874, "learning_rate": 2.94e-07, "loss": 0.6876, "step": 50 }, { "epoch": 0.007097232079488999, "grad_norm": 0.3047918975353241, "learning_rate": 3.5399999999999997e-07, "loss": 0.6867, "step": 60 }, { "epoch": 0.008280104092737165, "grad_norm": 0.3290751874446869, "learning_rate": 4.1400000000000003e-07, "loss": 0.6857, "step": 70 }, { "epoch": 0.009462976105985333, "grad_norm": 0.39538559317588806, "learning_rate": 4.7400000000000004e-07, "loss": 0.6823, "step": 80 }, { "epoch": 0.0106458481192335, "grad_norm": 0.4546091854572296, "learning_rate": 5.34e-07, "loss": 0.6786, "step": 90 }, { "epoch": 0.011828720132481665, "grad_norm": 0.6779038310050964, "learning_rate": 5.94e-07, "loss": 0.6716, "step": 100 }, { "epoch": 0.013011592145729832, "grad_norm": 0.6615303158760071, "learning_rate": 6.54e-07, "loss": 0.6613, "step": 110 }, { "epoch": 0.014194464158977998, "grad_norm": 0.8023790121078491, "learning_rate": 7.14e-07, "loss": 0.6497, "step": 120 }, { "epoch": 0.015377336172226166, "grad_norm": 0.853452205657959, "learning_rate": 7.74e-07, "loss": 0.6369, "step": 130 }, { "epoch": 0.01656020818547433, "grad_norm": 0.9517714977264404, "learning_rate": 8.340000000000001e-07, "loss": 0.6196, "step": 140 }, { "epoch": 0.017743080198722498, "grad_norm": 0.9177682995796204, "learning_rate": 8.939999999999999e-07, "loss": 0.6, "step": 150 }, { "epoch": 0.018925952211970666, "grad_norm": 1.0413706302642822, "learning_rate": 9.54e-07, "loss": 0.5785, "step": 160 }, { "epoch": 0.02010882422521883, "grad_norm": 1.0531803369522095, "learning_rate": 1.0140000000000002e-06, "loss": 0.5582, "step": 170 }, { "epoch": 0.021291696238467, "grad_norm": 1.0206302404403687, "learning_rate": 1.074e-06, "loss": 0.5357, "step": 180 }, { "epoch": 0.022474568251715163, "grad_norm": 1.1189782619476318, "learning_rate": 1.134e-06, "loss": 0.5137, "step": 190 }, { "epoch": 0.02365744026496333, "grad_norm": 1.2713814973831177, "learning_rate": 1.1940000000000001e-06, "loss": 0.497, "step": 200 }, { "epoch": 0.0248403122782115, "grad_norm": 1.2837798595428467, "learning_rate": 1.254e-06, "loss": 0.4668, "step": 210 }, { "epoch": 0.026023184291459663, "grad_norm": 1.125508427619934, "learning_rate": 1.314e-06, "loss": 0.4454, "step": 220 }, { "epoch": 0.02720605630470783, "grad_norm": 1.4479429721832275, "learning_rate": 1.374e-06, "loss": 0.4178, "step": 230 }, { "epoch": 0.028388928317955996, "grad_norm": 1.2514110803604126, "learning_rate": 1.434e-06, "loss": 0.3973, "step": 240 }, { "epoch": 0.029571800331204164, "grad_norm": 1.069929838180542, "learning_rate": 1.494e-06, "loss": 0.3718, "step": 250 }, { "epoch": 0.03075467234445233, "grad_norm": 1.242750644683838, "learning_rate": 1.554e-06, "loss": 0.356, "step": 260 }, { "epoch": 0.0319375443577005, "grad_norm": 1.2301188707351685, "learning_rate": 1.6140000000000001e-06, "loss": 0.3325, "step": 270 }, { "epoch": 0.03312041637094866, "grad_norm": 1.8025130033493042, "learning_rate": 1.6740000000000002e-06, "loss": 0.3094, "step": 280 }, { "epoch": 0.03430328838419683, "grad_norm": 2.9036948680877686, "learning_rate": 1.7339999999999998e-06, "loss": 0.2971, "step": 290 }, { "epoch": 0.035486160397444996, "grad_norm": 0.7061401009559631, "learning_rate": 1.7939999999999999e-06, "loss": 0.2857, "step": 300 }, { "epoch": 0.036669032410693164, "grad_norm": 0.6862852573394775, "learning_rate": 1.854e-06, "loss": 0.2635, "step": 310 }, { "epoch": 0.03785190442394133, "grad_norm": 2.7285008430480957, "learning_rate": 1.9140000000000002e-06, "loss": 0.2703, "step": 320 }, { "epoch": 0.03903477643718949, "grad_norm": 1.8736552000045776, "learning_rate": 1.974e-06, "loss": 0.2474, "step": 330 }, { "epoch": 0.04021764845043766, "grad_norm": 0.589243471622467, "learning_rate": 2.0340000000000003e-06, "loss": 0.239, "step": 340 }, { "epoch": 0.04140052046368583, "grad_norm": 2.2330617904663086, "learning_rate": 2.0939999999999998e-06, "loss": 0.2428, "step": 350 }, { "epoch": 0.042583392476934, "grad_norm": 0.4800102114677429, "learning_rate": 2.154e-06, "loss": 0.2396, "step": 360 }, { "epoch": 0.043766264490182165, "grad_norm": 0.452298641204834, "learning_rate": 2.214e-06, "loss": 0.2366, "step": 370 }, { "epoch": 0.044949136503430326, "grad_norm": 0.41422396898269653, "learning_rate": 2.274e-06, "loss": 0.2328, "step": 380 }, { "epoch": 0.046132008516678494, "grad_norm": 3.539194107055664, "learning_rate": 2.334e-06, "loss": 0.2203, "step": 390 }, { "epoch": 0.04731488052992666, "grad_norm": 0.4063061475753784, "learning_rate": 2.3940000000000003e-06, "loss": 0.2201, "step": 400 }, { "epoch": 0.04849775254317483, "grad_norm": 0.38423001766204834, "learning_rate": 2.4539999999999997e-06, "loss": 0.2215, "step": 410 }, { "epoch": 0.049680624556423, "grad_norm": 0.40775400400161743, "learning_rate": 2.514e-06, "loss": 0.215, "step": 420 }, { "epoch": 0.05086349656967116, "grad_norm": 0.36205559968948364, "learning_rate": 2.574e-06, "loss": 0.222, "step": 430 }, { "epoch": 0.052046368582919326, "grad_norm": 0.36605480313301086, "learning_rate": 2.634e-06, "loss": 0.2158, "step": 440 }, { "epoch": 0.053229240596167494, "grad_norm": 0.3730807900428772, "learning_rate": 2.694e-06, "loss": 0.2082, "step": 450 }, { "epoch": 0.05441211260941566, "grad_norm": 1.1760071516036987, "learning_rate": 2.7540000000000002e-06, "loss": 0.2106, "step": 460 }, { "epoch": 0.05559498462266383, "grad_norm": 0.6382777094841003, "learning_rate": 2.814e-06, "loss": 0.2039, "step": 470 }, { "epoch": 0.05677785663591199, "grad_norm": 0.38585031032562256, "learning_rate": 2.874e-06, "loss": 0.2142, "step": 480 }, { "epoch": 0.05796072864916016, "grad_norm": 1.1704167127609253, "learning_rate": 2.934e-06, "loss": 0.2208, "step": 490 }, { "epoch": 0.05914360066240833, "grad_norm": 0.34876057505607605, "learning_rate": 2.994e-06, "loss": 0.2063, "step": 500 }, { "epoch": 0.060326472675656495, "grad_norm": 0.6792126297950745, "learning_rate": 3.0540000000000003e-06, "loss": 0.2121, "step": 510 }, { "epoch": 0.06150934468890466, "grad_norm": 0.5502442717552185, "learning_rate": 3.114e-06, "loss": 0.2187, "step": 520 }, { "epoch": 0.06269221670215283, "grad_norm": 0.41466549038887024, "learning_rate": 3.1740000000000004e-06, "loss": 0.213, "step": 530 }, { "epoch": 0.063875088715401, "grad_norm": 0.46700504422187805, "learning_rate": 3.2340000000000003e-06, "loss": 0.2151, "step": 540 }, { "epoch": 0.06505796072864917, "grad_norm": 0.515309751033783, "learning_rate": 3.294e-06, "loss": 0.2013, "step": 550 }, { "epoch": 0.06624083274189732, "grad_norm": 0.48580706119537354, "learning_rate": 3.3540000000000004e-06, "loss": 0.2056, "step": 560 }, { "epoch": 0.06742370475514549, "grad_norm": 0.4501067101955414, "learning_rate": 3.414e-06, "loss": 0.208, "step": 570 }, { "epoch": 0.06860657676839366, "grad_norm": 1.0023359060287476, "learning_rate": 3.4739999999999997e-06, "loss": 0.2139, "step": 580 }, { "epoch": 0.06978944878164182, "grad_norm": 0.5155152082443237, "learning_rate": 3.534e-06, "loss": 0.2113, "step": 590 }, { "epoch": 0.07097232079488999, "grad_norm": 0.4461996257305145, "learning_rate": 3.594e-06, "loss": 0.2092, "step": 600 }, { "epoch": 0.07215519280813816, "grad_norm": 0.6491278409957886, "learning_rate": 3.654e-06, "loss": 0.2008, "step": 610 }, { "epoch": 0.07333806482138633, "grad_norm": 0.6394904255867004, "learning_rate": 3.714e-06, "loss": 0.2081, "step": 620 }, { "epoch": 0.0745209368346345, "grad_norm": 0.39615026116371155, "learning_rate": 3.774e-06, "loss": 0.1979, "step": 630 }, { "epoch": 0.07570380884788266, "grad_norm": 0.8480771780014038, "learning_rate": 3.834e-06, "loss": 0.2075, "step": 640 }, { "epoch": 0.07688668086113083, "grad_norm": 0.5080680251121521, "learning_rate": 3.894e-06, "loss": 0.2017, "step": 650 }, { "epoch": 0.07806955287437899, "grad_norm": 0.40745317935943604, "learning_rate": 3.954000000000001e-06, "loss": 0.2051, "step": 660 }, { "epoch": 0.07925242488762715, "grad_norm": 0.6602805256843567, "learning_rate": 4.014e-06, "loss": 0.1981, "step": 670 }, { "epoch": 0.08043529690087532, "grad_norm": 0.6654038429260254, "learning_rate": 4.074e-06, "loss": 0.184, "step": 680 }, { "epoch": 0.08161816891412349, "grad_norm": 0.5476865172386169, "learning_rate": 4.134e-06, "loss": 0.1856, "step": 690 }, { "epoch": 0.08280104092737166, "grad_norm": 0.4695960581302643, "learning_rate": 4.194e-06, "loss": 0.1946, "step": 700 }, { "epoch": 0.08398391294061983, "grad_norm": 0.45363283157348633, "learning_rate": 4.254e-06, "loss": 0.192, "step": 710 }, { "epoch": 0.085166784953868, "grad_norm": 0.4504893124103546, "learning_rate": 4.314e-06, "loss": 0.1939, "step": 720 }, { "epoch": 0.08634965696711616, "grad_norm": 0.574321448802948, "learning_rate": 4.374e-06, "loss": 0.1834, "step": 730 }, { "epoch": 0.08753252898036433, "grad_norm": 1.2701773643493652, "learning_rate": 4.434e-06, "loss": 0.1783, "step": 740 }, { "epoch": 0.0887154009936125, "grad_norm": 0.6490401029586792, "learning_rate": 4.4940000000000005e-06, "loss": 0.1766, "step": 750 }, { "epoch": 0.08989827300686065, "grad_norm": 0.6250841617584229, "learning_rate": 4.554e-06, "loss": 0.1793, "step": 760 }, { "epoch": 0.09108114502010882, "grad_norm": 0.5290296673774719, "learning_rate": 4.614e-06, "loss": 0.1841, "step": 770 }, { "epoch": 0.09226401703335699, "grad_norm": 0.49206840991973877, "learning_rate": 4.6740000000000005e-06, "loss": 0.1867, "step": 780 }, { "epoch": 0.09344688904660516, "grad_norm": 0.7351230382919312, "learning_rate": 4.734e-06, "loss": 0.1822, "step": 790 }, { "epoch": 0.09462976105985332, "grad_norm": 0.58413165807724, "learning_rate": 4.794e-06, "loss": 0.1786, "step": 800 }, { "epoch": 0.09581263307310149, "grad_norm": 0.5458696484565735, "learning_rate": 4.8540000000000005e-06, "loss": 0.1772, "step": 810 }, { "epoch": 0.09699550508634966, "grad_norm": 0.6570992469787598, "learning_rate": 4.914e-06, "loss": 0.1663, "step": 820 }, { "epoch": 0.09817837709959783, "grad_norm": 2.373054265975952, "learning_rate": 4.974e-06, "loss": 0.1758, "step": 830 }, { "epoch": 0.099361249112846, "grad_norm": 0.5683428049087524, "learning_rate": 5.0339999999999996e-06, "loss": 0.1728, "step": 840 }, { "epoch": 0.10054412112609416, "grad_norm": 0.9402995109558105, "learning_rate": 5.094e-06, "loss": 0.1735, "step": 850 }, { "epoch": 0.10172699313934232, "grad_norm": 0.9833584427833557, "learning_rate": 5.154e-06, "loss": 0.1643, "step": 860 }, { "epoch": 0.10290986515259049, "grad_norm": 0.8081067800521851, "learning_rate": 5.214e-06, "loss": 0.1768, "step": 870 }, { "epoch": 0.10409273716583865, "grad_norm": 0.7307280898094177, "learning_rate": 5.274e-06, "loss": 0.1693, "step": 880 }, { "epoch": 0.10527560917908682, "grad_norm": 0.7629750967025757, "learning_rate": 5.334e-06, "loss": 0.1649, "step": 890 }, { "epoch": 0.10645848119233499, "grad_norm": 0.5831959247589111, "learning_rate": 5.394e-06, "loss": 0.1625, "step": 900 }, { "epoch": 0.10764135320558316, "grad_norm": 0.6492188572883606, "learning_rate": 5.454000000000001e-06, "loss": 0.1624, "step": 910 }, { "epoch": 0.10882422521883132, "grad_norm": 0.9148478507995605, "learning_rate": 5.514e-06, "loss": 0.1658, "step": 920 }, { "epoch": 0.11000709723207949, "grad_norm": 0.6186856627464294, "learning_rate": 5.574e-06, "loss": 0.1602, "step": 930 }, { "epoch": 0.11118996924532766, "grad_norm": 0.5935899019241333, "learning_rate": 5.634e-06, "loss": 0.1561, "step": 940 }, { "epoch": 0.11237284125857583, "grad_norm": 0.5446743965148926, "learning_rate": 5.694e-06, "loss": 0.1562, "step": 950 }, { "epoch": 0.11355571327182398, "grad_norm": 0.6119495034217834, "learning_rate": 5.754e-06, "loss": 0.1514, "step": 960 }, { "epoch": 0.11473858528507215, "grad_norm": 0.69134521484375, "learning_rate": 5.814e-06, "loss": 0.1536, "step": 970 }, { "epoch": 0.11592145729832032, "grad_norm": 0.5774104595184326, "learning_rate": 5.874e-06, "loss": 0.1586, "step": 980 }, { "epoch": 0.11710432931156849, "grad_norm": 1.6409279108047485, "learning_rate": 5.934e-06, "loss": 0.1652, "step": 990 }, { "epoch": 0.11828720132481665, "grad_norm": 0.5299248695373535, "learning_rate": 5.9940000000000005e-06, "loss": 0.1546, "step": 1000 }, { "epoch": 0.11947007333806482, "grad_norm": 0.652356743812561, "learning_rate": 5.999353603064401e-06, "loss": 0.1508, "step": 1010 }, { "epoch": 0.12065294535131299, "grad_norm": 0.6462839245796204, "learning_rate": 5.998635384247068e-06, "loss": 0.1546, "step": 1020 }, { "epoch": 0.12183581736456116, "grad_norm": 1.1162582635879517, "learning_rate": 5.997917165429735e-06, "loss": 0.1543, "step": 1030 }, { "epoch": 0.12301868937780933, "grad_norm": 0.6795859932899475, "learning_rate": 5.9971989466124016e-06, "loss": 0.1659, "step": 1040 }, { "epoch": 0.1242015613910575, "grad_norm": 1.811183214187622, "learning_rate": 5.9964807277950685e-06, "loss": 0.1529, "step": 1050 }, { "epoch": 0.12538443340430566, "grad_norm": 0.8033897280693054, "learning_rate": 5.995762508977735e-06, "loss": 0.1563, "step": 1060 }, { "epoch": 0.12656730541755382, "grad_norm": 0.7713770270347595, "learning_rate": 5.995044290160402e-06, "loss": 0.1472, "step": 1070 }, { "epoch": 0.127750177430802, "grad_norm": 0.7180285453796387, "learning_rate": 5.994326071343069e-06, "loss": 0.1508, "step": 1080 }, { "epoch": 0.12893304944405015, "grad_norm": 0.8387850522994995, "learning_rate": 5.993607852525736e-06, "loss": 0.1523, "step": 1090 }, { "epoch": 0.13011592145729833, "grad_norm": 0.6409116983413696, "learning_rate": 5.992889633708403e-06, "loss": 0.1438, "step": 1100 }, { "epoch": 0.1312987934705465, "grad_norm": 0.5884638428688049, "learning_rate": 5.99217141489107e-06, "loss": 0.149, "step": 1110 }, { "epoch": 0.13248166548379464, "grad_norm": 0.6320044994354248, "learning_rate": 5.991453196073737e-06, "loss": 0.1546, "step": 1120 }, { "epoch": 0.13366453749704282, "grad_norm": 0.6921868324279785, "learning_rate": 5.990734977256405e-06, "loss": 0.1554, "step": 1130 }, { "epoch": 0.13484740951029098, "grad_norm": 0.800104022026062, "learning_rate": 5.990016758439071e-06, "loss": 0.1552, "step": 1140 }, { "epoch": 0.13603028152353916, "grad_norm": 0.6269745826721191, "learning_rate": 5.9892985396217386e-06, "loss": 0.1586, "step": 1150 }, { "epoch": 0.1372131535367873, "grad_norm": 0.47620028257369995, "learning_rate": 5.988580320804405e-06, "loss": 0.1435, "step": 1160 }, { "epoch": 0.1383960255500355, "grad_norm": 0.6672356128692627, "learning_rate": 5.987862101987072e-06, "loss": 0.1405, "step": 1170 }, { "epoch": 0.13957889756328365, "grad_norm": 0.6523860096931458, "learning_rate": 5.987143883169739e-06, "loss": 0.1462, "step": 1180 }, { "epoch": 0.14076176957653183, "grad_norm": 0.7624364495277405, "learning_rate": 5.986425664352406e-06, "loss": 0.1419, "step": 1190 }, { "epoch": 0.14194464158977999, "grad_norm": 0.6649390459060669, "learning_rate": 5.985707445535073e-06, "loss": 0.1475, "step": 1200 }, { "epoch": 0.14312751360302814, "grad_norm": 0.7243581414222717, "learning_rate": 5.98498922671774e-06, "loss": 0.1453, "step": 1210 }, { "epoch": 0.14431038561627632, "grad_norm": 0.6720548868179321, "learning_rate": 5.984271007900407e-06, "loss": 0.1458, "step": 1220 }, { "epoch": 0.14549325762952448, "grad_norm": 1.091115951538086, "learning_rate": 5.983552789083074e-06, "loss": 0.1438, "step": 1230 }, { "epoch": 0.14667612964277266, "grad_norm": 0.5090327858924866, "learning_rate": 5.982834570265742e-06, "loss": 0.1348, "step": 1240 }, { "epoch": 0.1478590016560208, "grad_norm": 0.5930888056755066, "learning_rate": 5.982116351448408e-06, "loss": 0.1412, "step": 1250 }, { "epoch": 0.149041873669269, "grad_norm": 0.6313456296920776, "learning_rate": 5.9813981326310756e-06, "loss": 0.1523, "step": 1260 }, { "epoch": 0.15022474568251715, "grad_norm": 0.6217260956764221, "learning_rate": 5.980679913813742e-06, "loss": 0.156, "step": 1270 }, { "epoch": 0.15140761769576533, "grad_norm": 0.7095080018043518, "learning_rate": 5.979961694996409e-06, "loss": 0.1383, "step": 1280 }, { "epoch": 0.15259048970901348, "grad_norm": 0.5592630505561829, "learning_rate": 5.979243476179076e-06, "loss": 0.1325, "step": 1290 }, { "epoch": 0.15377336172226166, "grad_norm": 0.8711151480674744, "learning_rate": 5.978525257361743e-06, "loss": 0.1472, "step": 1300 }, { "epoch": 0.15495623373550982, "grad_norm": 0.8925802111625671, "learning_rate": 5.97780703854441e-06, "loss": 0.1539, "step": 1310 }, { "epoch": 0.15613910574875797, "grad_norm": 1.400760531425476, "learning_rate": 5.977088819727077e-06, "loss": 0.1415, "step": 1320 }, { "epoch": 0.15732197776200615, "grad_norm": 0.5288618206977844, "learning_rate": 5.976370600909744e-06, "loss": 0.1392, "step": 1330 }, { "epoch": 0.1585048497752543, "grad_norm": 0.7368156313896179, "learning_rate": 5.975652382092411e-06, "loss": 0.1337, "step": 1340 }, { "epoch": 0.1596877217885025, "grad_norm": 0.7652743458747864, "learning_rate": 5.974934163275078e-06, "loss": 0.1431, "step": 1350 }, { "epoch": 0.16087059380175064, "grad_norm": 0.9258527159690857, "learning_rate": 5.974215944457745e-06, "loss": 0.1381, "step": 1360 }, { "epoch": 0.16205346581499883, "grad_norm": 0.7157357335090637, "learning_rate": 5.973497725640412e-06, "loss": 0.1479, "step": 1370 }, { "epoch": 0.16323633782824698, "grad_norm": 0.6426119804382324, "learning_rate": 5.972779506823079e-06, "loss": 0.1447, "step": 1380 }, { "epoch": 0.16441920984149516, "grad_norm": 1.0444904565811157, "learning_rate": 5.9720612880057456e-06, "loss": 0.1417, "step": 1390 }, { "epoch": 0.16560208185474332, "grad_norm": 0.6736558079719543, "learning_rate": 5.971343069188413e-06, "loss": 0.1357, "step": 1400 }, { "epoch": 0.16678495386799147, "grad_norm": 1.2439801692962646, "learning_rate": 5.970624850371079e-06, "loss": 0.1293, "step": 1410 }, { "epoch": 0.16796782588123965, "grad_norm": 0.6193522810935974, "learning_rate": 5.969906631553747e-06, "loss": 0.1335, "step": 1420 }, { "epoch": 0.1691506978944878, "grad_norm": 0.9210103154182434, "learning_rate": 5.969188412736413e-06, "loss": 0.1367, "step": 1430 }, { "epoch": 0.170333569907736, "grad_norm": 0.5470699071884155, "learning_rate": 5.968470193919081e-06, "loss": 0.1318, "step": 1440 }, { "epoch": 0.17151644192098414, "grad_norm": 0.5305323600769043, "learning_rate": 5.967751975101747e-06, "loss": 0.1394, "step": 1450 }, { "epoch": 0.17269931393423232, "grad_norm": 0.6635201573371887, "learning_rate": 5.967033756284415e-06, "loss": 0.1366, "step": 1460 }, { "epoch": 0.17388218594748048, "grad_norm": 0.8658506870269775, "learning_rate": 5.966315537467082e-06, "loss": 0.1289, "step": 1470 }, { "epoch": 0.17506505796072866, "grad_norm": 0.6589860916137695, "learning_rate": 5.965597318649749e-06, "loss": 0.1322, "step": 1480 }, { "epoch": 0.1762479299739768, "grad_norm": 0.7368478775024414, "learning_rate": 5.9648790998324165e-06, "loss": 0.1199, "step": 1490 }, { "epoch": 0.177430801987225, "grad_norm": 1.0435322523117065, "learning_rate": 5.9641608810150826e-06, "loss": 0.1367, "step": 1500 }, { "epoch": 0.17861367400047315, "grad_norm": 0.6753053069114685, "learning_rate": 5.96344266219775e-06, "loss": 0.1255, "step": 1510 }, { "epoch": 0.1797965460137213, "grad_norm": 0.7435557842254639, "learning_rate": 5.962724443380416e-06, "loss": 0.1279, "step": 1520 }, { "epoch": 0.18097941802696949, "grad_norm": 1.0143587589263916, "learning_rate": 5.962006224563084e-06, "loss": 0.1293, "step": 1530 }, { "epoch": 0.18216229004021764, "grad_norm": 0.7713755369186401, "learning_rate": 5.96128800574575e-06, "loss": 0.1309, "step": 1540 }, { "epoch": 0.18334516205346582, "grad_norm": 0.7028072476387024, "learning_rate": 5.960569786928418e-06, "loss": 0.131, "step": 1550 }, { "epoch": 0.18452803406671398, "grad_norm": 0.760503888130188, "learning_rate": 5.959851568111085e-06, "loss": 0.1273, "step": 1560 }, { "epoch": 0.18571090607996216, "grad_norm": 0.6365183591842651, "learning_rate": 5.959133349293752e-06, "loss": 0.1291, "step": 1570 }, { "epoch": 0.1868937780932103, "grad_norm": 0.6163486242294312, "learning_rate": 5.958415130476419e-06, "loss": 0.1271, "step": 1580 }, { "epoch": 0.1880766501064585, "grad_norm": 0.6071023941040039, "learning_rate": 5.957696911659086e-06, "loss": 0.1254, "step": 1590 }, { "epoch": 0.18925952211970665, "grad_norm": 0.8195270299911499, "learning_rate": 5.956978692841753e-06, "loss": 0.125, "step": 1600 }, { "epoch": 0.1904423941329548, "grad_norm": 0.9120619893074036, "learning_rate": 5.9562604740244196e-06, "loss": 0.1285, "step": 1610 }, { "epoch": 0.19162526614620298, "grad_norm": 0.9497324228286743, "learning_rate": 5.9555422552070865e-06, "loss": 0.1261, "step": 1620 }, { "epoch": 0.19280813815945114, "grad_norm": 0.6703548431396484, "learning_rate": 5.954824036389753e-06, "loss": 0.151, "step": 1630 }, { "epoch": 0.19399101017269932, "grad_norm": 0.6480337977409363, "learning_rate": 5.95410581757242e-06, "loss": 0.1262, "step": 1640 }, { "epoch": 0.19517388218594747, "grad_norm": 0.9792718291282654, "learning_rate": 5.953387598755087e-06, "loss": 0.1247, "step": 1650 }, { "epoch": 0.19635675419919565, "grad_norm": 0.8115242123603821, "learning_rate": 5.952669379937754e-06, "loss": 0.1254, "step": 1660 }, { "epoch": 0.1975396262124438, "grad_norm": 0.7136529088020325, "learning_rate": 5.951951161120422e-06, "loss": 0.1183, "step": 1670 }, { "epoch": 0.198722498225692, "grad_norm": 0.7508926391601562, "learning_rate": 5.951232942303088e-06, "loss": 0.1196, "step": 1680 }, { "epoch": 0.19990537023894014, "grad_norm": 0.6986336708068848, "learning_rate": 5.950514723485756e-06, "loss": 0.1268, "step": 1690 }, { "epoch": 0.20108824225218833, "grad_norm": 0.6534774899482727, "learning_rate": 5.949796504668422e-06, "loss": 0.1194, "step": 1700 }, { "epoch": 0.20227111426543648, "grad_norm": 0.6797937154769897, "learning_rate": 5.94907828585109e-06, "loss": 0.1298, "step": 1710 }, { "epoch": 0.20345398627868463, "grad_norm": 0.6170414090156555, "learning_rate": 5.948360067033756e-06, "loss": 0.122, "step": 1720 }, { "epoch": 0.20463685829193282, "grad_norm": 0.9647011756896973, "learning_rate": 5.9476418482164235e-06, "loss": 0.1272, "step": 1730 }, { "epoch": 0.20581973030518097, "grad_norm": 0.9179564118385315, "learning_rate": 5.94692362939909e-06, "loss": 0.1275, "step": 1740 }, { "epoch": 0.20700260231842915, "grad_norm": 0.5881721377372742, "learning_rate": 5.946205410581757e-06, "loss": 0.1357, "step": 1750 }, { "epoch": 0.2081854743316773, "grad_norm": 0.5980613231658936, "learning_rate": 5.945487191764425e-06, "loss": 0.1255, "step": 1760 }, { "epoch": 0.2093683463449255, "grad_norm": 0.6583918929100037, "learning_rate": 5.944768972947091e-06, "loss": 0.1263, "step": 1770 }, { "epoch": 0.21055121835817364, "grad_norm": 0.7026111483573914, "learning_rate": 5.944050754129759e-06, "loss": 0.1195, "step": 1780 }, { "epoch": 0.21173409037142182, "grad_norm": 0.6585308909416199, "learning_rate": 5.943332535312425e-06, "loss": 0.1301, "step": 1790 }, { "epoch": 0.21291696238466998, "grad_norm": 0.9596797823905945, "learning_rate": 5.942614316495093e-06, "loss": 0.1221, "step": 1800 }, { "epoch": 0.21409983439791813, "grad_norm": 0.6373745203018188, "learning_rate": 5.941896097677759e-06, "loss": 0.1259, "step": 1810 }, { "epoch": 0.2152827064111663, "grad_norm": 0.9211647510528564, "learning_rate": 5.941177878860427e-06, "loss": 0.1305, "step": 1820 }, { "epoch": 0.21646557842441447, "grad_norm": 0.6365073919296265, "learning_rate": 5.9404596600430936e-06, "loss": 0.1259, "step": 1830 }, { "epoch": 0.21764845043766265, "grad_norm": 0.6830868124961853, "learning_rate": 5.9397414412257605e-06, "loss": 0.1192, "step": 1840 }, { "epoch": 0.2188313224509108, "grad_norm": 0.7025856971740723, "learning_rate": 5.939023222408427e-06, "loss": 0.1322, "step": 1850 }, { "epoch": 0.22001419446415899, "grad_norm": 0.7849975228309631, "learning_rate": 5.938305003591094e-06, "loss": 0.1148, "step": 1860 }, { "epoch": 0.22119706647740714, "grad_norm": 2.2914817333221436, "learning_rate": 5.937586784773761e-06, "loss": 0.1227, "step": 1870 }, { "epoch": 0.22237993849065532, "grad_norm": 0.8156799077987671, "learning_rate": 5.936868565956428e-06, "loss": 0.1281, "step": 1880 }, { "epoch": 0.22356281050390348, "grad_norm": 0.7320992946624756, "learning_rate": 5.936150347139095e-06, "loss": 0.126, "step": 1890 }, { "epoch": 0.22474568251715166, "grad_norm": 1.1044219732284546, "learning_rate": 5.935432128321762e-06, "loss": 0.1256, "step": 1900 }, { "epoch": 0.2259285545303998, "grad_norm": 0.778630256652832, "learning_rate": 5.934713909504429e-06, "loss": 0.1188, "step": 1910 }, { "epoch": 0.22711142654364797, "grad_norm": 2.845395088195801, "learning_rate": 5.933995690687096e-06, "loss": 0.1233, "step": 1920 }, { "epoch": 0.22829429855689615, "grad_norm": 0.9641199707984924, "learning_rate": 5.933277471869763e-06, "loss": 0.1174, "step": 1930 }, { "epoch": 0.2294771705701443, "grad_norm": 0.6778221130371094, "learning_rate": 5.9325592530524306e-06, "loss": 0.1268, "step": 1940 }, { "epoch": 0.23066004258339248, "grad_norm": 0.6706300377845764, "learning_rate": 5.931841034235097e-06, "loss": 0.1285, "step": 1950 }, { "epoch": 0.23184291459664064, "grad_norm": 0.702949583530426, "learning_rate": 5.931122815417764e-06, "loss": 0.1259, "step": 1960 }, { "epoch": 0.23302578660988882, "grad_norm": 0.6624971628189087, "learning_rate": 5.9304045966004305e-06, "loss": 0.119, "step": 1970 }, { "epoch": 0.23420865862313697, "grad_norm": 0.5697908997535706, "learning_rate": 5.929686377783098e-06, "loss": 0.1115, "step": 1980 }, { "epoch": 0.23539153063638515, "grad_norm": 0.6725486516952515, "learning_rate": 5.928968158965765e-06, "loss": 0.1147, "step": 1990 }, { "epoch": 0.2365744026496333, "grad_norm": 0.6686209440231323, "learning_rate": 5.928249940148432e-06, "loss": 0.1243, "step": 2000 }, { "epoch": 0.23775727466288146, "grad_norm": 0.8630842566490173, "learning_rate": 5.927531721331099e-06, "loss": 0.1208, "step": 2010 }, { "epoch": 0.23894014667612964, "grad_norm": 0.6212077736854553, "learning_rate": 5.926813502513766e-06, "loss": 0.1163, "step": 2020 }, { "epoch": 0.2401230186893778, "grad_norm": 0.6866915225982666, "learning_rate": 5.926095283696433e-06, "loss": 0.1168, "step": 2030 }, { "epoch": 0.24130589070262598, "grad_norm": 0.7627171874046326, "learning_rate": 5.9253770648791e-06, "loss": 0.115, "step": 2040 }, { "epoch": 0.24248876271587413, "grad_norm": 0.8140373229980469, "learning_rate": 5.9246588460617676e-06, "loss": 0.1176, "step": 2050 }, { "epoch": 0.24367163472912232, "grad_norm": 0.7697188854217529, "learning_rate": 5.923940627244434e-06, "loss": 0.111, "step": 2060 }, { "epoch": 0.24485450674237047, "grad_norm": 0.9204941987991333, "learning_rate": 5.923222408427101e-06, "loss": 0.1247, "step": 2070 }, { "epoch": 0.24603737875561865, "grad_norm": 0.7235327959060669, "learning_rate": 5.9225041896097675e-06, "loss": 0.1301, "step": 2080 }, { "epoch": 0.2472202507688668, "grad_norm": 0.7417043447494507, "learning_rate": 5.921785970792435e-06, "loss": 0.1112, "step": 2090 }, { "epoch": 0.248403122782115, "grad_norm": 0.6270762085914612, "learning_rate": 5.921067751975102e-06, "loss": 0.1213, "step": 2100 }, { "epoch": 0.24958599479536314, "grad_norm": 0.6876906156539917, "learning_rate": 5.920349533157769e-06, "loss": 0.1083, "step": 2110 }, { "epoch": 0.2500591436006624, "eval_accuracy": 0.6084938616628406, "eval_animal_abuse/accuracy": 0.9885550786838341, "eval_animal_abuse/f1": 0.0, "eval_animal_abuse/fpr": 0.0, "eval_animal_abuse/precision": 0.0, "eval_animal_abuse/recall": 0.0, "eval_animal_abuse/threshold": 0.5, "eval_child_abuse/accuracy": 0.9944605250024953, "eval_child_abuse/f1": 0.0, "eval_child_abuse/fpr": 0.0, "eval_child_abuse/precision": 0.0, "eval_child_abuse/recall": 0.0, "eval_child_abuse/threshold": 0.5, "eval_controversial_topics,politics/accuracy": 0.969624380344013, "eval_controversial_topics,politics/f1": 0.054865424430641824, "eval_controversial_topics,politics/fpr": 0.0006349533223503559, "eval_controversial_topics,politics/precision": 0.5888888888888889, "eval_controversial_topics,politics/recall": 0.028773072747014114, "eval_controversial_topics,politics/threshold": 0.5, "eval_discrimination,stereotype,injustice/accuracy": 0.9447383305053731, "eval_discrimination,stereotype,injustice/f1": 0.6333333333333333, "eval_discrimination,stereotype,injustice/fpr": 0.0254996927747858, "eval_discrimination,stereotype,injustice/precision": 0.6703271028037383, "eval_discrimination,stereotype,injustice/recall": 0.6002092050209205, "eval_discrimination,stereotype,injustice/threshold": 0.5, "eval_drug_abuse,weapons,banned_substance/accuracy": 0.9665967994144459, "eval_drug_abuse,weapons,banned_substance/f1": 0.6709275647328745, "eval_drug_abuse,weapons,banned_substance/fpr": 0.011793118036948222, "eval_drug_abuse,weapons,banned_substance/precision": 0.7536818851251841, "eval_drug_abuse,weapons,banned_substance/recall": 0.6045481393975192, "eval_drug_abuse,weapons,banned_substance/threshold": 0.5, "eval_financial_crime,property_crime,theft/accuracy": 0.9482815982965699, "eval_financial_crime,property_crime,theft/f1": 0.7342962139988035, "eval_financial_crime,property_crime,theft/fpr": 0.02863829865654308, "eval_financial_crime,property_crime,theft/precision": 0.7343589743589743, "eval_financial_crime,property_crime,theft/recall": 0.7342334643650658, "eval_financial_crime,property_crime,theft/threshold": 0.5, "eval_flagged/accuracy": 0.7505905446318661, "eval_flagged/aucpr": 0.8500664061618557, "eval_flagged/f1": 0.7516522833810936, "eval_flagged/fpr": 0.15868431909387481, "eval_flagged/precision": 0.8428306092124814, "eval_flagged/recall": 0.678275686825506, "eval_hate_speech,offensive_language/accuracy": 0.9433077153408523, "eval_hate_speech,offensive_language/f1": 0.635741769987174, "eval_hate_speech,offensive_language/fpr": 0.018234971679152168, "eval_hate_speech,offensive_language/precision": 0.7487411883182276, "eval_hate_speech,offensive_language/recall": 0.5523774145616642, "eval_hate_speech,offensive_language/threshold": 0.5, "eval_loss": 0.11300733685493469, "eval_macro_f1": 0.34110166603702263, "eval_macro_precision": 0.41102939821706785, "eval_macro_recall": 0.3148924329979099, "eval_micro_f1": 0.6511538578172225, "eval_micro_precision": 0.7544504825260002, "eval_micro_recall": 0.5727368102850436, "eval_misinformation_regarding_ethics,laws_and_safety/accuracy": 0.9878397711015737, "eval_misinformation_regarding_ethics,laws_and_safety/f1": 0.0, "eval_misinformation_regarding_ethics,laws_and_safety/fpr": 0.0, "eval_misinformation_regarding_ethics,laws_and_safety/precision": 0.0, "eval_misinformation_regarding_ethics,laws_and_safety/recall": 0.0, "eval_misinformation_regarding_ethics,laws_and_safety/threshold": 0.5, "eval_non_violent_unethical_behavior/accuracy": 0.8670526000598863, "eval_non_violent_unethical_behavior/f1": 0.599879843796936, "eval_non_violent_unethical_behavior/fpr": 0.042289486796213166, "eval_non_violent_unethical_behavior/precision": 0.7462630792227205, "eval_non_violent_unethical_behavior/recall": 0.5015067805123054, "eval_non_violent_unethical_behavior/threshold": 0.5, "eval_privacy_violation/accuracy": 0.9691419636024886, "eval_privacy_violation/f1": 0.6680980497405619, "eval_privacy_violation/fpr": 0.013228809407153333, "eval_privacy_violation/precision": 0.7117804041174228, "eval_privacy_violation/recall": 0.6294672960215779, "eval_privacy_violation/threshold": 0.5, "eval_runtime": 768.9608, "eval_samples_per_second": 78.176, "eval_self_harm/accuracy": 0.9931796253784476, "eval_self_harm/f1": 0.0, "eval_self_harm/fpr": 0.0, "eval_self_harm/precision": 0.0, "eval_self_harm/recall": 0.0, "eval_self_harm/threshold": 0.5, "eval_sexually_explicit,adult_content/accuracy": 0.9759290681039359, "eval_sexually_explicit,adult_content/f1": 0.0, "eval_sexually_explicit,adult_content/fpr": 0.0, "eval_sexually_explicit,adult_content/precision": 0.0, "eval_sexually_explicit,adult_content/recall": 0.0, "eval_sexually_explicit,adult_content/threshold": 0.5, "eval_steps_per_second": 1.222, "eval_terrorism,organized_crime/accuracy": 0.9919985361147153, "eval_terrorism,organized_crime/f1": 0.0, "eval_terrorism,organized_crime/fpr": 0.0, "eval_terrorism,organized_crime/precision": 0.0, "eval_terrorism,organized_crime/recall": 0.0, "eval_terrorism,organized_crime/threshold": 0.5, "eval_violence,aiding_and_abetting,incitement/accuracy": 0.8852014505772365, "eval_violence,aiding_and_abetting,incitement/f1": 0.7782811244979919, "eval_violence,aiding_and_abetting,incitement/fpr": 0.06846924436788887, "eval_violence,aiding_and_abetting,incitement/precision": 0.800370052203793, "eval_violence,aiding_and_abetting,incitement/recall": 0.7573786893446723, "eval_violence,aiding_and_abetting,incitement/threshold": 0.5, "step": 2114 }, { "epoch": 0.2507688668086113, "grad_norm": 0.7290387153625488, "learning_rate": 5.919631314340436e-06, "loss": 0.1222, "step": 2120 }, { "epoch": 0.25195173882185945, "grad_norm": 0.8140690922737122, "learning_rate": 5.918913095523103e-06, "loss": 0.1205, "step": 2130 }, { "epoch": 0.25313461083510763, "grad_norm": 0.7384748458862305, "learning_rate": 5.91819487670577e-06, "loss": 0.111, "step": 2140 }, { "epoch": 0.2543174828483558, "grad_norm": 0.7703582644462585, "learning_rate": 5.917476657888437e-06, "loss": 0.1102, "step": 2150 }, { "epoch": 0.255500354861604, "grad_norm": 0.9074225425720215, "learning_rate": 5.916758439071104e-06, "loss": 0.1201, "step": 2160 }, { "epoch": 0.2566832268748521, "grad_norm": 0.9322734475135803, "learning_rate": 5.916040220253771e-06, "loss": 0.1067, "step": 2170 }, { "epoch": 0.2578660988881003, "grad_norm": 0.6061740517616272, "learning_rate": 5.9153220014364375e-06, "loss": 0.111, "step": 2180 }, { "epoch": 0.2590489709013485, "grad_norm": 0.8890019655227661, "learning_rate": 5.9146037826191045e-06, "loss": 0.1223, "step": 2190 }, { "epoch": 0.26023184291459667, "grad_norm": 0.7498615980148315, "learning_rate": 5.913885563801771e-06, "loss": 0.1141, "step": 2200 }, { "epoch": 0.2614147149278448, "grad_norm": 0.6340054273605347, "learning_rate": 5.913167344984439e-06, "loss": 0.1203, "step": 2210 }, { "epoch": 0.262597586941093, "grad_norm": 0.864166259765625, "learning_rate": 5.912449126167105e-06, "loss": 0.1176, "step": 2220 }, { "epoch": 0.26378045895434116, "grad_norm": 0.6206380128860474, "learning_rate": 5.911730907349773e-06, "loss": 0.1272, "step": 2230 }, { "epoch": 0.2649633309675893, "grad_norm": 0.6121813058853149, "learning_rate": 5.911012688532439e-06, "loss": 0.1177, "step": 2240 }, { "epoch": 0.26614620298083747, "grad_norm": 0.6177744269371033, "learning_rate": 5.910294469715107e-06, "loss": 0.1114, "step": 2250 }, { "epoch": 0.26732907499408565, "grad_norm": 0.641111433506012, "learning_rate": 5.909576250897774e-06, "loss": 0.1174, "step": 2260 }, { "epoch": 0.26851194700733383, "grad_norm": 0.5114830732345581, "learning_rate": 5.908858032080441e-06, "loss": 0.1082, "step": 2270 }, { "epoch": 0.26969481902058196, "grad_norm": 0.7555609345436096, "learning_rate": 5.908139813263108e-06, "loss": 0.1212, "step": 2280 }, { "epoch": 0.27087769103383014, "grad_norm": 0.6607798337936401, "learning_rate": 5.9074215944457745e-06, "loss": 0.1207, "step": 2290 }, { "epoch": 0.2720605630470783, "grad_norm": 0.6070739030838013, "learning_rate": 5.9067033756284415e-06, "loss": 0.1129, "step": 2300 }, { "epoch": 0.27324343506032645, "grad_norm": 0.7364168167114258, "learning_rate": 5.905985156811108e-06, "loss": 0.1168, "step": 2310 }, { "epoch": 0.2744263070735746, "grad_norm": 0.7492311000823975, "learning_rate": 5.905266937993776e-06, "loss": 0.1171, "step": 2320 }, { "epoch": 0.2756091790868228, "grad_norm": 0.6615567803382874, "learning_rate": 5.904548719176442e-06, "loss": 0.115, "step": 2330 }, { "epoch": 0.276792051100071, "grad_norm": 0.515124499797821, "learning_rate": 5.90383050035911e-06, "loss": 0.1137, "step": 2340 }, { "epoch": 0.2779749231133191, "grad_norm": 0.6674433946609497, "learning_rate": 5.903112281541776e-06, "loss": 0.1059, "step": 2350 }, { "epoch": 0.2791577951265673, "grad_norm": 0.6278784275054932, "learning_rate": 5.902394062724444e-06, "loss": 0.1057, "step": 2360 }, { "epoch": 0.2803406671398155, "grad_norm": 0.8889399170875549, "learning_rate": 5.90167584390711e-06, "loss": 0.1172, "step": 2370 }, { "epoch": 0.28152353915306366, "grad_norm": 0.7860614061355591, "learning_rate": 5.900957625089778e-06, "loss": 0.1157, "step": 2380 }, { "epoch": 0.2827064111663118, "grad_norm": 0.7526464462280273, "learning_rate": 5.900239406272445e-06, "loss": 0.1184, "step": 2390 }, { "epoch": 0.28388928317955997, "grad_norm": 0.7598295211791992, "learning_rate": 5.8995211874551115e-06, "loss": 0.1202, "step": 2400 }, { "epoch": 0.28507215519280815, "grad_norm": 0.6493052244186401, "learning_rate": 5.8988029686377785e-06, "loss": 0.1166, "step": 2410 }, { "epoch": 0.2862550272060563, "grad_norm": 0.6362748742103577, "learning_rate": 5.898084749820445e-06, "loss": 0.1144, "step": 2420 }, { "epoch": 0.28743789921930446, "grad_norm": 0.6504588723182678, "learning_rate": 5.897366531003112e-06, "loss": 0.1146, "step": 2430 }, { "epoch": 0.28862077123255264, "grad_norm": 0.7361891269683838, "learning_rate": 5.896648312185779e-06, "loss": 0.1128, "step": 2440 }, { "epoch": 0.2898036432458008, "grad_norm": 0.838032066822052, "learning_rate": 5.895930093368446e-06, "loss": 0.125, "step": 2450 }, { "epoch": 0.29098651525904895, "grad_norm": 0.7334731221199036, "learning_rate": 5.895211874551113e-06, "loss": 0.115, "step": 2460 }, { "epoch": 0.29216938727229713, "grad_norm": 0.8120739459991455, "learning_rate": 5.89449365573378e-06, "loss": 0.1257, "step": 2470 }, { "epoch": 0.2933522592855453, "grad_norm": 1.258314609527588, "learning_rate": 5.893775436916448e-06, "loss": 0.1203, "step": 2480 }, { "epoch": 0.2945351312987935, "grad_norm": 0.6718959212303162, "learning_rate": 5.893057218099114e-06, "loss": 0.1086, "step": 2490 }, { "epoch": 0.2957180033120416, "grad_norm": 0.6640666127204895, "learning_rate": 5.892338999281782e-06, "loss": 0.1245, "step": 2500 }, { "epoch": 0.2969008753252898, "grad_norm": 1.0004814863204956, "learning_rate": 5.8916207804644485e-06, "loss": 0.1133, "step": 2510 }, { "epoch": 0.298083747338538, "grad_norm": 0.531430184841156, "learning_rate": 5.8909025616471155e-06, "loss": 0.1043, "step": 2520 }, { "epoch": 0.2992666193517861, "grad_norm": 0.560541570186615, "learning_rate": 5.890184342829782e-06, "loss": 0.1091, "step": 2530 }, { "epoch": 0.3004494913650343, "grad_norm": 1.030714511871338, "learning_rate": 5.889466124012449e-06, "loss": 0.1085, "step": 2540 }, { "epoch": 0.3016323633782825, "grad_norm": 0.5054564476013184, "learning_rate": 5.888747905195116e-06, "loss": 0.1128, "step": 2550 }, { "epoch": 0.30281523539153066, "grad_norm": 0.8914719820022583, "learning_rate": 5.888029686377783e-06, "loss": 0.1124, "step": 2560 }, { "epoch": 0.3039981074047788, "grad_norm": 0.5464050769805908, "learning_rate": 5.88731146756045e-06, "loss": 0.1083, "step": 2570 }, { "epoch": 0.30518097941802697, "grad_norm": 0.6888423562049866, "learning_rate": 5.886593248743117e-06, "loss": 0.1106, "step": 2580 }, { "epoch": 0.30636385143127515, "grad_norm": 0.7277998924255371, "learning_rate": 5.885875029925785e-06, "loss": 0.1233, "step": 2590 }, { "epoch": 0.30754672344452333, "grad_norm": 0.5721694827079773, "learning_rate": 5.885156811108451e-06, "loss": 0.1038, "step": 2600 }, { "epoch": 0.30872959545777146, "grad_norm": 0.6838468313217163, "learning_rate": 5.884438592291119e-06, "loss": 0.1148, "step": 2610 }, { "epoch": 0.30991246747101964, "grad_norm": 0.7850162386894226, "learning_rate": 5.883720373473785e-06, "loss": 0.112, "step": 2620 }, { "epoch": 0.3110953394842678, "grad_norm": 0.9477525353431702, "learning_rate": 5.8830021546564525e-06, "loss": 0.1186, "step": 2630 }, { "epoch": 0.31227821149751595, "grad_norm": 0.6580942869186401, "learning_rate": 5.8822839358391185e-06, "loss": 0.1132, "step": 2640 }, { "epoch": 0.3134610835107641, "grad_norm": 0.7668166756629944, "learning_rate": 5.881565717021786e-06, "loss": 0.1199, "step": 2650 }, { "epoch": 0.3146439555240123, "grad_norm": 0.5366681814193726, "learning_rate": 5.880847498204453e-06, "loss": 0.1087, "step": 2660 }, { "epoch": 0.3158268275372605, "grad_norm": 0.6080490946769714, "learning_rate": 5.88012927938712e-06, "loss": 0.1192, "step": 2670 }, { "epoch": 0.3170096995505086, "grad_norm": 0.8137325048446655, "learning_rate": 5.879411060569787e-06, "loss": 0.1123, "step": 2680 }, { "epoch": 0.3181925715637568, "grad_norm": 0.7607372403144836, "learning_rate": 5.878692841752454e-06, "loss": 0.1179, "step": 2690 }, { "epoch": 0.319375443577005, "grad_norm": 0.9860873222351074, "learning_rate": 5.877974622935121e-06, "loss": 0.1149, "step": 2700 }, { "epoch": 0.32055831559025316, "grad_norm": 0.7873374819755554, "learning_rate": 5.877256404117788e-06, "loss": 0.1094, "step": 2710 }, { "epoch": 0.3217411876035013, "grad_norm": 0.6685267686843872, "learning_rate": 5.876538185300455e-06, "loss": 0.1113, "step": 2720 }, { "epoch": 0.32292405961674947, "grad_norm": 0.8916003704071045, "learning_rate": 5.875819966483122e-06, "loss": 0.1138, "step": 2730 }, { "epoch": 0.32410693162999765, "grad_norm": 0.6571289896965027, "learning_rate": 5.875101747665789e-06, "loss": 0.1141, "step": 2740 }, { "epoch": 0.3252898036432458, "grad_norm": 0.6800270676612854, "learning_rate": 5.874383528848456e-06, "loss": 0.1125, "step": 2750 }, { "epoch": 0.32647267565649396, "grad_norm": 0.6767061948776245, "learning_rate": 5.873665310031123e-06, "loss": 0.1146, "step": 2760 }, { "epoch": 0.32765554766974214, "grad_norm": 0.8163686394691467, "learning_rate": 5.87294709121379e-06, "loss": 0.1126, "step": 2770 }, { "epoch": 0.3288384196829903, "grad_norm": 0.664160966873169, "learning_rate": 5.872228872396457e-06, "loss": 0.1101, "step": 2780 }, { "epoch": 0.33002129169623845, "grad_norm": 0.7490115165710449, "learning_rate": 5.871510653579124e-06, "loss": 0.1062, "step": 2790 }, { "epoch": 0.33120416370948663, "grad_norm": 0.6979317665100098, "learning_rate": 5.870792434761791e-06, "loss": 0.1098, "step": 2800 }, { "epoch": 0.3323870357227348, "grad_norm": 0.6810917258262634, "learning_rate": 5.870074215944458e-06, "loss": 0.1219, "step": 2810 }, { "epoch": 0.33356990773598294, "grad_norm": 0.7075555324554443, "learning_rate": 5.869355997127125e-06, "loss": 0.1106, "step": 2820 }, { "epoch": 0.3347527797492311, "grad_norm": 0.8275021314620972, "learning_rate": 5.868637778309792e-06, "loss": 0.1072, "step": 2830 }, { "epoch": 0.3359356517624793, "grad_norm": 0.68669593334198, "learning_rate": 5.867919559492459e-06, "loss": 0.1102, "step": 2840 }, { "epoch": 0.3371185237757275, "grad_norm": 0.8721610307693481, "learning_rate": 5.867201340675126e-06, "loss": 0.1195, "step": 2850 }, { "epoch": 0.3383013957889756, "grad_norm": 0.7322043180465698, "learning_rate": 5.866483121857793e-06, "loss": 0.1103, "step": 2860 }, { "epoch": 0.3394842678022238, "grad_norm": 0.8502479195594788, "learning_rate": 5.8657649030404595e-06, "loss": 0.1145, "step": 2870 }, { "epoch": 0.340667139815472, "grad_norm": 0.6261866688728333, "learning_rate": 5.865046684223127e-06, "loss": 0.1073, "step": 2880 }, { "epoch": 0.34185001182872016, "grad_norm": 0.9226065278053284, "learning_rate": 5.864328465405793e-06, "loss": 0.1141, "step": 2890 }, { "epoch": 0.3430328838419683, "grad_norm": 0.6089714765548706, "learning_rate": 5.863610246588461e-06, "loss": 0.1075, "step": 2900 }, { "epoch": 0.34421575585521647, "grad_norm": 0.9027312994003296, "learning_rate": 5.862892027771127e-06, "loss": 0.1109, "step": 2910 }, { "epoch": 0.34539862786846465, "grad_norm": 0.542414665222168, "learning_rate": 5.862173808953795e-06, "loss": 0.1038, "step": 2920 }, { "epoch": 0.3465814998817128, "grad_norm": 0.7762967348098755, "learning_rate": 5.861455590136462e-06, "loss": 0.1084, "step": 2930 }, { "epoch": 0.34776437189496096, "grad_norm": 0.5834124684333801, "learning_rate": 5.860737371319129e-06, "loss": 0.1073, "step": 2940 }, { "epoch": 0.34894724390820914, "grad_norm": 0.6700456142425537, "learning_rate": 5.860019152501796e-06, "loss": 0.1049, "step": 2950 }, { "epoch": 0.3501301159214573, "grad_norm": 0.6610418558120728, "learning_rate": 5.859300933684463e-06, "loss": 0.1124, "step": 2960 }, { "epoch": 0.35131298793470545, "grad_norm": 0.7411563396453857, "learning_rate": 5.8585827148671295e-06, "loss": 0.1116, "step": 2970 }, { "epoch": 0.3524958599479536, "grad_norm": 0.6440965533256531, "learning_rate": 5.8578644960497965e-06, "loss": 0.1049, "step": 2980 }, { "epoch": 0.3536787319612018, "grad_norm": 0.7202072143554688, "learning_rate": 5.857146277232463e-06, "loss": 0.1144, "step": 2990 }, { "epoch": 0.35486160397445, "grad_norm": 0.6822220683097839, "learning_rate": 5.85642805841513e-06, "loss": 0.1058, "step": 3000 }, { "epoch": 0.3560444759876981, "grad_norm": 0.6334253549575806, "learning_rate": 5.855709839597798e-06, "loss": 0.1067, "step": 3010 }, { "epoch": 0.3572273480009463, "grad_norm": 0.7457287311553955, "learning_rate": 5.854991620780465e-06, "loss": 0.1101, "step": 3020 }, { "epoch": 0.3584102200141945, "grad_norm": 1.6010487079620361, "learning_rate": 5.854273401963132e-06, "loss": 0.1165, "step": 3030 }, { "epoch": 0.3595930920274426, "grad_norm": 0.5436598062515259, "learning_rate": 5.853555183145799e-06, "loss": 0.1057, "step": 3040 }, { "epoch": 0.3607759640406908, "grad_norm": 0.6319106221199036, "learning_rate": 5.852836964328466e-06, "loss": 0.1024, "step": 3050 }, { "epoch": 0.36195883605393897, "grad_norm": 0.810879111289978, "learning_rate": 5.852118745511133e-06, "loss": 0.1115, "step": 3060 }, { "epoch": 0.36314170806718715, "grad_norm": 0.7097305655479431, "learning_rate": 5.8514005266938e-06, "loss": 0.1061, "step": 3070 }, { "epoch": 0.3643245800804353, "grad_norm": 0.6485995650291443, "learning_rate": 5.8506823078764665e-06, "loss": 0.1059, "step": 3080 }, { "epoch": 0.36550745209368346, "grad_norm": 0.6558486819267273, "learning_rate": 5.8499640890591335e-06, "loss": 0.1014, "step": 3090 }, { "epoch": 0.36669032410693164, "grad_norm": 0.8074762225151062, "learning_rate": 5.8492458702418e-06, "loss": 0.1004, "step": 3100 }, { "epoch": 0.3678731961201798, "grad_norm": 0.9040413498878479, "learning_rate": 5.848527651424467e-06, "loss": 0.1108, "step": 3110 }, { "epoch": 0.36905606813342795, "grad_norm": 0.7260216474533081, "learning_rate": 5.847809432607134e-06, "loss": 0.107, "step": 3120 }, { "epoch": 0.37023894014667613, "grad_norm": 0.6044929623603821, "learning_rate": 5.847091213789802e-06, "loss": 0.1149, "step": 3130 }, { "epoch": 0.3714218121599243, "grad_norm": 0.7758194208145142, "learning_rate": 5.846372994972468e-06, "loss": 0.1111, "step": 3140 }, { "epoch": 0.37260468417317244, "grad_norm": 0.7502528429031372, "learning_rate": 5.845654776155136e-06, "loss": 0.1056, "step": 3150 }, { "epoch": 0.3737875561864206, "grad_norm": 0.73923659324646, "learning_rate": 5.844936557337802e-06, "loss": 0.1064, "step": 3160 }, { "epoch": 0.3749704281996688, "grad_norm": 0.6698951721191406, "learning_rate": 5.84421833852047e-06, "loss": 0.1065, "step": 3170 }, { "epoch": 0.376153300212917, "grad_norm": 0.6622264385223389, "learning_rate": 5.843500119703136e-06, "loss": 0.1058, "step": 3180 }, { "epoch": 0.3773361722261651, "grad_norm": 0.5708851218223572, "learning_rate": 5.8427819008858035e-06, "loss": 0.103, "step": 3190 }, { "epoch": 0.3785190442394133, "grad_norm": 0.686040461063385, "learning_rate": 5.8420636820684704e-06, "loss": 0.0998, "step": 3200 }, { "epoch": 0.3797019162526615, "grad_norm": 0.6299132108688354, "learning_rate": 5.841345463251137e-06, "loss": 0.101, "step": 3210 }, { "epoch": 0.3808847882659096, "grad_norm": 0.6035275459289551, "learning_rate": 5.840627244433804e-06, "loss": 0.0949, "step": 3220 }, { "epoch": 0.3820676602791578, "grad_norm": 0.6723672151565552, "learning_rate": 5.839909025616471e-06, "loss": 0.109, "step": 3230 }, { "epoch": 0.38325053229240597, "grad_norm": 0.6812660098075867, "learning_rate": 5.839190806799138e-06, "loss": 0.1119, "step": 3240 }, { "epoch": 0.38443340430565415, "grad_norm": 0.5562373995780945, "learning_rate": 5.838472587981805e-06, "loss": 0.1105, "step": 3250 }, { "epoch": 0.3856162763189023, "grad_norm": 0.6611257195472717, "learning_rate": 5.837754369164473e-06, "loss": 0.1127, "step": 3260 }, { "epoch": 0.38679914833215046, "grad_norm": 0.6648499369621277, "learning_rate": 5.837036150347139e-06, "loss": 0.1122, "step": 3270 }, { "epoch": 0.38798202034539864, "grad_norm": 0.644600510597229, "learning_rate": 5.836317931529807e-06, "loss": 0.1094, "step": 3280 }, { "epoch": 0.3891648923586468, "grad_norm": 0.7623692154884338, "learning_rate": 5.835599712712473e-06, "loss": 0.1155, "step": 3290 }, { "epoch": 0.39034776437189495, "grad_norm": 0.9358124136924744, "learning_rate": 5.8348814938951405e-06, "loss": 0.1063, "step": 3300 }, { "epoch": 0.3915306363851431, "grad_norm": 0.6110962629318237, "learning_rate": 5.8341632750778074e-06, "loss": 0.1021, "step": 3310 }, { "epoch": 0.3927135083983913, "grad_norm": 0.46405160427093506, "learning_rate": 5.833445056260474e-06, "loss": 0.1097, "step": 3320 }, { "epoch": 0.39389638041163944, "grad_norm": 0.5891819596290588, "learning_rate": 5.832726837443141e-06, "loss": 0.1049, "step": 3330 }, { "epoch": 0.3950792524248876, "grad_norm": 0.646066427230835, "learning_rate": 5.832008618625808e-06, "loss": 0.1059, "step": 3340 }, { "epoch": 0.3962621244381358, "grad_norm": 0.6550779938697815, "learning_rate": 5.831290399808475e-06, "loss": 0.1094, "step": 3350 }, { "epoch": 0.397444996451384, "grad_norm": 0.5682654976844788, "learning_rate": 5.830572180991142e-06, "loss": 0.1123, "step": 3360 }, { "epoch": 0.3986278684646321, "grad_norm": 0.6977178454399109, "learning_rate": 5.829853962173809e-06, "loss": 0.1066, "step": 3370 }, { "epoch": 0.3998107404778803, "grad_norm": 0.6778519749641418, "learning_rate": 5.829135743356476e-06, "loss": 0.1126, "step": 3380 }, { "epoch": 0.40099361249112847, "grad_norm": 0.9452935457229614, "learning_rate": 5.828417524539143e-06, "loss": 0.1035, "step": 3390 }, { "epoch": 0.40217648450437665, "grad_norm": 0.7760059833526611, "learning_rate": 5.827699305721811e-06, "loss": 0.101, "step": 3400 }, { "epoch": 0.4033593565176248, "grad_norm": 0.6987895965576172, "learning_rate": 5.826981086904477e-06, "loss": 0.1013, "step": 3410 }, { "epoch": 0.40454222853087296, "grad_norm": 0.643004298210144, "learning_rate": 5.8262628680871444e-06, "loss": 0.0977, "step": 3420 }, { "epoch": 0.40572510054412114, "grad_norm": 0.776431679725647, "learning_rate": 5.8255446492698105e-06, "loss": 0.1042, "step": 3430 }, { "epoch": 0.40690797255736927, "grad_norm": 0.5766503214836121, "learning_rate": 5.824826430452478e-06, "loss": 0.1017, "step": 3440 }, { "epoch": 0.40809084457061745, "grad_norm": 0.8150132298469543, "learning_rate": 5.824108211635144e-06, "loss": 0.1035, "step": 3450 }, { "epoch": 0.40927371658386563, "grad_norm": 0.7687274217605591, "learning_rate": 5.823389992817812e-06, "loss": 0.1141, "step": 3460 }, { "epoch": 0.4104565885971138, "grad_norm": 0.7500854134559631, "learning_rate": 5.822671774000479e-06, "loss": 0.0987, "step": 3470 }, { "epoch": 0.41163946061036194, "grad_norm": 0.5279942750930786, "learning_rate": 5.821953555183146e-06, "loss": 0.1119, "step": 3480 }, { "epoch": 0.4128223326236101, "grad_norm": 0.6052038073539734, "learning_rate": 5.821235336365813e-06, "loss": 0.1037, "step": 3490 }, { "epoch": 0.4140052046368583, "grad_norm": 0.580561101436615, "learning_rate": 5.82051711754848e-06, "loss": 0.0939, "step": 3500 }, { "epoch": 0.4151880766501065, "grad_norm": 0.5403978824615479, "learning_rate": 5.819798898731148e-06, "loss": 0.1071, "step": 3510 }, { "epoch": 0.4163709486633546, "grad_norm": 0.5758402943611145, "learning_rate": 5.819080679913814e-06, "loss": 0.0924, "step": 3520 }, { "epoch": 0.4175538206766028, "grad_norm": 0.8839231729507446, "learning_rate": 5.8183624610964814e-06, "loss": 0.0999, "step": 3530 }, { "epoch": 0.418736692689851, "grad_norm": 0.6029626727104187, "learning_rate": 5.8176442422791475e-06, "loss": 0.1115, "step": 3540 }, { "epoch": 0.4199195647030991, "grad_norm": 0.750297486782074, "learning_rate": 5.816926023461815e-06, "loss": 0.1125, "step": 3550 }, { "epoch": 0.4211024367163473, "grad_norm": 0.6742335557937622, "learning_rate": 5.816207804644481e-06, "loss": 0.1, "step": 3560 }, { "epoch": 0.42228530872959547, "grad_norm": 0.6136927604675293, "learning_rate": 5.815489585827149e-06, "loss": 0.1009, "step": 3570 }, { "epoch": 0.42346818074284365, "grad_norm": 0.5102875232696533, "learning_rate": 5.814771367009816e-06, "loss": 0.1001, "step": 3580 }, { "epoch": 0.4246510527560918, "grad_norm": 0.607638955116272, "learning_rate": 5.814053148192483e-06, "loss": 0.0943, "step": 3590 }, { "epoch": 0.42583392476933996, "grad_norm": 0.48254504799842834, "learning_rate": 5.81333492937515e-06, "loss": 0.1058, "step": 3600 }, { "epoch": 0.42701679678258814, "grad_norm": 0.5645486116409302, "learning_rate": 5.812616710557817e-06, "loss": 0.1014, "step": 3610 }, { "epoch": 0.42819966879583626, "grad_norm": 0.5330508947372437, "learning_rate": 5.811898491740484e-06, "loss": 0.1053, "step": 3620 }, { "epoch": 0.42938254080908445, "grad_norm": 0.5580485463142395, "learning_rate": 5.811180272923151e-06, "loss": 0.1025, "step": 3630 }, { "epoch": 0.4305654128223326, "grad_norm": 0.6958622932434082, "learning_rate": 5.810462054105818e-06, "loss": 0.0988, "step": 3640 }, { "epoch": 0.4317482848355808, "grad_norm": 0.6975813508033752, "learning_rate": 5.8097438352884845e-06, "loss": 0.0943, "step": 3650 }, { "epoch": 0.43293115684882894, "grad_norm": 0.7741396427154541, "learning_rate": 5.8090256164711514e-06, "loss": 0.1059, "step": 3660 }, { "epoch": 0.4341140288620771, "grad_norm": 0.7456374764442444, "learning_rate": 5.808307397653819e-06, "loss": 0.1001, "step": 3670 }, { "epoch": 0.4352969008753253, "grad_norm": 0.7208385467529297, "learning_rate": 5.807589178836485e-06, "loss": 0.101, "step": 3680 }, { "epoch": 0.4364797728885735, "grad_norm": 0.7719465494155884, "learning_rate": 5.806870960019153e-06, "loss": 0.1044, "step": 3690 }, { "epoch": 0.4376626449018216, "grad_norm": 0.609201967716217, "learning_rate": 5.806152741201819e-06, "loss": 0.0992, "step": 3700 }, { "epoch": 0.4388455169150698, "grad_norm": 0.5958719253540039, "learning_rate": 5.805434522384487e-06, "loss": 0.098, "step": 3710 }, { "epoch": 0.44002838892831797, "grad_norm": 0.561337411403656, "learning_rate": 5.804716303567153e-06, "loss": 0.0981, "step": 3720 }, { "epoch": 0.4412112609415661, "grad_norm": 0.6900650858879089, "learning_rate": 5.803998084749821e-06, "loss": 0.1008, "step": 3730 }, { "epoch": 0.4423941329548143, "grad_norm": 0.6921085715293884, "learning_rate": 5.803279865932488e-06, "loss": 0.0988, "step": 3740 }, { "epoch": 0.44357700496806246, "grad_norm": 0.4769885241985321, "learning_rate": 5.802561647115155e-06, "loss": 0.0978, "step": 3750 }, { "epoch": 0.44475987698131064, "grad_norm": 0.5868682861328125, "learning_rate": 5.8018434282978215e-06, "loss": 0.0955, "step": 3760 }, { "epoch": 0.44594274899455877, "grad_norm": 0.6369176506996155, "learning_rate": 5.8011252094804884e-06, "loss": 0.104, "step": 3770 }, { "epoch": 0.44712562100780695, "grad_norm": 0.6108672618865967, "learning_rate": 5.800406990663156e-06, "loss": 0.1022, "step": 3780 }, { "epoch": 0.44830849302105513, "grad_norm": 0.5287630558013916, "learning_rate": 5.799688771845822e-06, "loss": 0.104, "step": 3790 }, { "epoch": 0.4494913650343033, "grad_norm": 0.6586527824401855, "learning_rate": 5.79897055302849e-06, "loss": 0.1051, "step": 3800 }, { "epoch": 0.45067423704755144, "grad_norm": 0.6159712672233582, "learning_rate": 5.798252334211156e-06, "loss": 0.1025, "step": 3810 }, { "epoch": 0.4518571090607996, "grad_norm": 0.6884573101997375, "learning_rate": 5.797534115393824e-06, "loss": 0.1004, "step": 3820 }, { "epoch": 0.4530399810740478, "grad_norm": 0.712568461894989, "learning_rate": 5.79681589657649e-06, "loss": 0.1014, "step": 3830 }, { "epoch": 0.45422285308729593, "grad_norm": 0.5936951637268066, "learning_rate": 5.796097677759158e-06, "loss": 0.0994, "step": 3840 }, { "epoch": 0.4554057251005441, "grad_norm": 0.5803804993629456, "learning_rate": 5.795379458941825e-06, "loss": 0.1027, "step": 3850 }, { "epoch": 0.4565885971137923, "grad_norm": 0.6160012483596802, "learning_rate": 5.794661240124492e-06, "loss": 0.0967, "step": 3860 }, { "epoch": 0.4577714691270405, "grad_norm": 0.6074231266975403, "learning_rate": 5.7939430213071585e-06, "loss": 0.0999, "step": 3870 }, { "epoch": 0.4589543411402886, "grad_norm": 0.7833523750305176, "learning_rate": 5.7932248024898254e-06, "loss": 0.098, "step": 3880 }, { "epoch": 0.4601372131535368, "grad_norm": 0.6724730134010315, "learning_rate": 5.792506583672492e-06, "loss": 0.1018, "step": 3890 }, { "epoch": 0.46132008516678497, "grad_norm": 0.6990271210670471, "learning_rate": 5.791788364855159e-06, "loss": 0.0942, "step": 3900 }, { "epoch": 0.46250295718003315, "grad_norm": 0.588724672794342, "learning_rate": 5.791070146037826e-06, "loss": 0.1054, "step": 3910 }, { "epoch": 0.4636858291932813, "grad_norm": 0.9868181943893433, "learning_rate": 5.790351927220493e-06, "loss": 0.0997, "step": 3920 }, { "epoch": 0.46486870120652946, "grad_norm": 0.5909291505813599, "learning_rate": 5.78963370840316e-06, "loss": 0.1019, "step": 3930 }, { "epoch": 0.46605157321977764, "grad_norm": 0.6021145582199097, "learning_rate": 5.788915489585827e-06, "loss": 0.1065, "step": 3940 }, { "epoch": 0.46723444523302576, "grad_norm": 0.6307471990585327, "learning_rate": 5.788197270768494e-06, "loss": 0.1004, "step": 3950 }, { "epoch": 0.46841731724627395, "grad_norm": 0.6723840832710266, "learning_rate": 5.787479051951162e-06, "loss": 0.1003, "step": 3960 }, { "epoch": 0.4696001892595221, "grad_norm": 0.8404982089996338, "learning_rate": 5.786760833133828e-06, "loss": 0.0924, "step": 3970 }, { "epoch": 0.4707830612727703, "grad_norm": 0.5327188968658447, "learning_rate": 5.7860426143164955e-06, "loss": 0.1021, "step": 3980 }, { "epoch": 0.47196593328601844, "grad_norm": 0.9069047570228577, "learning_rate": 5.785324395499162e-06, "loss": 0.1039, "step": 3990 }, { "epoch": 0.4731488052992666, "grad_norm": 0.8306993842124939, "learning_rate": 5.784606176681829e-06, "loss": 0.1056, "step": 4000 }, { "epoch": 0.4743316773125148, "grad_norm": 0.9778540134429932, "learning_rate": 5.783887957864496e-06, "loss": 0.0985, "step": 4010 }, { "epoch": 0.4755145493257629, "grad_norm": 0.5018677115440369, "learning_rate": 5.783169739047163e-06, "loss": 0.0896, "step": 4020 }, { "epoch": 0.4766974213390111, "grad_norm": 0.7373573184013367, "learning_rate": 5.78245152022983e-06, "loss": 0.0971, "step": 4030 }, { "epoch": 0.4778802933522593, "grad_norm": 0.6151018738746643, "learning_rate": 5.781733301412497e-06, "loss": 0.1048, "step": 4040 }, { "epoch": 0.47906316536550747, "grad_norm": 0.5788290500640869, "learning_rate": 5.781015082595165e-06, "loss": 0.108, "step": 4050 }, { "epoch": 0.4802460373787556, "grad_norm": 0.5560311079025269, "learning_rate": 5.780296863777831e-06, "loss": 0.0965, "step": 4060 }, { "epoch": 0.4814289093920038, "grad_norm": 0.5658525824546814, "learning_rate": 5.779578644960499e-06, "loss": 0.0982, "step": 4070 }, { "epoch": 0.48261178140525196, "grad_norm": 0.6230155229568481, "learning_rate": 5.778860426143165e-06, "loss": 0.102, "step": 4080 }, { "epoch": 0.48379465341850014, "grad_norm": 0.5433495044708252, "learning_rate": 5.7781422073258325e-06, "loss": 0.1004, "step": 4090 }, { "epoch": 0.48497752543174827, "grad_norm": 0.7886695265769958, "learning_rate": 5.777423988508499e-06, "loss": 0.1052, "step": 4100 }, { "epoch": 0.48616039744499645, "grad_norm": 0.6581328511238098, "learning_rate": 5.776705769691166e-06, "loss": 0.1077, "step": 4110 }, { "epoch": 0.48734326945824463, "grad_norm": 0.6687142252922058, "learning_rate": 5.775987550873833e-06, "loss": 0.0938, "step": 4120 }, { "epoch": 0.48852614147149276, "grad_norm": 0.5832991600036621, "learning_rate": 5.7752693320565e-06, "loss": 0.1007, "step": 4130 }, { "epoch": 0.48970901348474094, "grad_norm": 0.7504956126213074, "learning_rate": 5.774551113239167e-06, "loss": 0.0969, "step": 4140 }, { "epoch": 0.4908918854979891, "grad_norm": 0.6121857762336731, "learning_rate": 5.773832894421834e-06, "loss": 0.1026, "step": 4150 }, { "epoch": 0.4920747575112373, "grad_norm": 0.4761281907558441, "learning_rate": 5.773114675604501e-06, "loss": 0.0973, "step": 4160 }, { "epoch": 0.49325762952448543, "grad_norm": 0.5653198957443237, "learning_rate": 5.772396456787168e-06, "loss": 0.0937, "step": 4170 }, { "epoch": 0.4944405015377336, "grad_norm": 0.8240869641304016, "learning_rate": 5.771678237969835e-06, "loss": 0.0997, "step": 4180 }, { "epoch": 0.4956233735509818, "grad_norm": 0.5740234851837158, "learning_rate": 5.770960019152502e-06, "loss": 0.1011, "step": 4190 }, { "epoch": 0.49680624556423, "grad_norm": 0.7603582739830017, "learning_rate": 5.770241800335169e-06, "loss": 0.0951, "step": 4200 }, { "epoch": 0.4979891175774781, "grad_norm": 0.6935368776321411, "learning_rate": 5.769523581517836e-06, "loss": 0.1023, "step": 4210 }, { "epoch": 0.4991719895907263, "grad_norm": 0.5736734867095947, "learning_rate": 5.7688053627005025e-06, "loss": 0.0926, "step": 4220 }, { "epoch": 0.5001182872013248, "eval_accuracy": 0.6631400339355225, "eval_animal_abuse/accuracy": 0.9934291512792361, "eval_animal_abuse/f1": 0.6626814688300597, "eval_animal_abuse/fpr": 0.0015986268636623672, "eval_animal_abuse/precision": 0.8033126293995859, "eval_animal_abuse/recall": 0.563953488372093, "eval_animal_abuse/threshold": 0.5, "eval_child_abuse/accuracy": 0.9944272548823901, "eval_child_abuse/f1": 0.0, "eval_child_abuse/fpr": 3.3455445710175416e-05, "eval_child_abuse/precision": 0.0, "eval_child_abuse/recall": 0.0, "eval_child_abuse/threshold": 0.5, "eval_controversial_topics,politics/accuracy": 0.9702731476860631, "eval_controversial_topics,politics/f1": 0.4570039501671225, "eval_controversial_topics,politics/fpr": 0.01196114772103238, "eval_controversial_topics,politics/precision": 0.5189786059351277, "eval_controversial_topics,politics/recall": 0.40825190010857765, "eval_controversial_topics,politics/threshold": 0.5, "eval_discrimination,stereotype,injustice/accuracy": 0.9519413115081345, "eval_discrimination,stereotype,injustice/f1": 0.7028083530500977, "eval_discrimination,stereotype,injustice/fpr": 0.02755990891675999, "eval_discrimination,stereotype,injustice/precision": 0.691358024691358, "eval_discrimination,stereotype,injustice/recall": 0.7146443514644352, "eval_discrimination,stereotype,injustice/threshold": 0.5, "eval_drug_abuse,weapons,banned_substance/accuracy": 0.9721030042918455, "eval_drug_abuse,weapons,banned_substance/f1": 0.7482360006005104, "eval_drug_abuse,weapons,banned_substance/fpr": 0.013802707657594109, "eval_drug_abuse,weapons,banned_substance/precision": 0.7609160305343512, "eval_drug_abuse,weapons,banned_substance/recall": 0.7359716479621973, "eval_drug_abuse,weapons,banned_substance/threshold": 0.5, "eval_financial_crime,property_crime,theft/accuracy": 0.9583624446884254, "eval_financial_crime,property_crime,theft/f1": 0.7850948742165365, "eval_financial_crime,property_crime,theft/fpr": 0.02255680666384088, "eval_financial_crime,property_crime,theft/precision": 0.7888198757763976, "eval_financial_crime,property_crime,theft/recall": 0.7814048880533242, "eval_financial_crime,property_crime,theft/threshold": 0.5, "eval_flagged/accuracy": 0.8375919087067905, "eval_flagged/aucpr": 0.8970477139672008, "eval_flagged/f1": 0.8516283947052476, "eval_flagged/fpr": 0.162434834789783, "eval_flagged/precision": 0.8661205564142195, "eval_flagged/recall": 0.8376132253146393, "eval_hate_speech,offensive_language/accuracy": 0.9478823568553082, "eval_hate_speech,offensive_language/f1": 0.6738836265223275, "eval_hate_speech,offensive_language/fpr": 0.018015713502649335, "eval_hate_speech,offensive_language/precision": 0.7665166942931565, "eval_hate_speech,offensive_language/recall": 0.6012258543833581, "eval_hate_speech,offensive_language/threshold": 0.5, "eval_loss": 0.09459679573774338, "eval_macro_f1": 0.5346106922180488, "eval_macro_precision": 0.5849093655958313, "eval_macro_recall": 0.5048232287062734, "eval_micro_f1": 0.7277981939391701, "eval_micro_precision": 0.7569576490924805, "eval_micro_recall": 0.7008019631203656, "eval_misinformation_regarding_ethics,laws_and_safety/accuracy": 0.9878065009814685, "eval_misinformation_regarding_ethics,laws_and_safety/f1": 0.0, "eval_misinformation_regarding_ethics,laws_and_safety/fpr": 3.3679672633581944e-05, "eval_misinformation_regarding_ethics,laws_and_safety/precision": 0.0, "eval_misinformation_regarding_ethics,laws_and_safety/recall": 0.0, "eval_misinformation_regarding_ethics,laws_and_safety/threshold": 0.5, "eval_non_violent_unethical_behavior/accuracy": 0.8716937818145524, "eval_non_violent_unethical_behavior/f1": 0.6761284904471971, "eval_non_violent_unethical_behavior/fpr": 0.07926424182029547, "eval_non_violent_unethical_behavior/precision": 0.6783216783216783, "eval_non_violent_unethical_behavior/recall": 0.6739494391428094, "eval_non_violent_unethical_behavior/threshold": 0.5, "eval_privacy_violation/accuracy": 0.9794723358951326, "eval_privacy_violation/f1": 0.7941274607941274, "eval_privacy_violation/fpr": 0.011338979491845714, "eval_privacy_violation/precision": 0.7859973579920739, "eval_privacy_violation/recall": 0.8024275118004046, "eval_privacy_violation/threshold": 0.5, "eval_runtime": 598.6147, "eval_samples_per_second": 100.422, "eval_self_harm/accuracy": 0.9955750740260172, "eval_self_harm/f1": 0.5460750853242321, "eval_self_harm/fpr": 0.0002679887444727317, "eval_self_harm/precision": 0.9090909090909091, "eval_self_harm/recall": 0.3902439024390244, "eval_self_harm/threshold": 0.5, "eval_sexually_explicit,adult_content/accuracy": 0.981950959842965, "eval_sexually_explicit,adult_content/f1": 0.6050236621769203, "eval_sexually_explicit,adult_content/fpr": 0.007994272759813851, "eval_sexually_explicit,adult_content/precision": 0.6392307692307693, "eval_sexually_explicit,adult_content/recall": 0.5742916378714582, "eval_sexually_explicit,adult_content/threshold": 0.5, "eval_steps_per_second": 1.57, "eval_terrorism,organized_crime/accuracy": 0.9919652659946102, "eval_terrorism,organized_crime/f1": 0.0, "eval_terrorism,organized_crime/fpr": 3.353847701775857e-05, "eval_terrorism,organized_crime/precision": 0.0, "eval_terrorism,organized_crime/recall": 0.0, "eval_terrorism,organized_crime/threshold": 0.5, "eval_violence,aiding_and_abetting,incitement/accuracy": 0.912715839904182, "eval_violence,aiding_and_abetting,incitement/f1": 0.8334867189235505, "eval_violence,aiding_and_abetting,incitement/fpr": 0.05409999546711378, "eval_violence,aiding_and_abetting,incitement/precision": 0.8461885430762291, "eval_violence,aiding_and_abetting,incitement/recall": 0.821160580290145, "eval_violence,aiding_and_abetting,incitement/threshold": 0.5, "step": 4228 }, { "epoch": 0.5003548616039745, "grad_norm": 0.6602314710617065, "learning_rate": 5.76808714388317e-06, "loss": 0.0947, "step": 4230 }, { "epoch": 0.5015377336172226, "grad_norm": 0.6039230823516846, "learning_rate": 5.767368925065836e-06, "loss": 0.1076, "step": 4240 }, { "epoch": 0.5027206056304708, "grad_norm": 0.6553640365600586, "learning_rate": 5.766650706248504e-06, "loss": 0.1076, "step": 4250 }, { "epoch": 0.5039034776437189, "grad_norm": 0.5243545174598694, "learning_rate": 5.76593248743117e-06, "loss": 0.0935, "step": 4260 }, { "epoch": 0.5050863496569671, "grad_norm": 0.5985537767410278, "learning_rate": 5.765214268613838e-06, "loss": 0.0966, "step": 4270 }, { "epoch": 0.5062692216702153, "grad_norm": 0.5156497359275818, "learning_rate": 5.764496049796505e-06, "loss": 0.0975, "step": 4280 }, { "epoch": 0.5074520936834634, "grad_norm": 0.5800735950469971, "learning_rate": 5.763777830979172e-06, "loss": 0.1002, "step": 4290 }, { "epoch": 0.5086349656967116, "grad_norm": 0.47967925667762756, "learning_rate": 5.763059612161839e-06, "loss": 0.0971, "step": 4300 }, { "epoch": 0.5098178377099598, "grad_norm": 0.6986796855926514, "learning_rate": 5.762341393344506e-06, "loss": 0.1083, "step": 4310 }, { "epoch": 0.511000709723208, "grad_norm": 0.5263969302177429, "learning_rate": 5.7616231745271734e-06, "loss": 0.1, "step": 4320 }, { "epoch": 0.5121835817364561, "grad_norm": 0.6327197551727295, "learning_rate": 5.7609049557098395e-06, "loss": 0.0988, "step": 4330 }, { "epoch": 0.5133664537497042, "grad_norm": 0.47799837589263916, "learning_rate": 5.760186736892507e-06, "loss": 0.0978, "step": 4340 }, { "epoch": 0.5145493257629524, "grad_norm": 0.5215609073638916, "learning_rate": 5.759468518075173e-06, "loss": 0.0907, "step": 4350 }, { "epoch": 0.5157321977762006, "grad_norm": 0.692636251449585, "learning_rate": 5.758750299257841e-06, "loss": 0.0975, "step": 4360 }, { "epoch": 0.5169150697894488, "grad_norm": 0.7149266004562378, "learning_rate": 5.758032080440507e-06, "loss": 0.1044, "step": 4370 }, { "epoch": 0.518097941802697, "grad_norm": 0.6370978355407715, "learning_rate": 5.757313861623175e-06, "loss": 0.0938, "step": 4380 }, { "epoch": 0.5192808138159452, "grad_norm": 0.6770809888839722, "learning_rate": 5.756595642805842e-06, "loss": 0.0959, "step": 4390 }, { "epoch": 0.5204636858291933, "grad_norm": 0.9177107810974121, "learning_rate": 5.755877423988509e-06, "loss": 0.0943, "step": 4400 }, { "epoch": 0.5216465578424414, "grad_norm": 0.7914104461669922, "learning_rate": 5.755159205171176e-06, "loss": 0.1059, "step": 4410 }, { "epoch": 0.5228294298556896, "grad_norm": 0.6397340297698975, "learning_rate": 5.754440986353843e-06, "loss": 0.0944, "step": 4420 }, { "epoch": 0.5240123018689378, "grad_norm": 0.7854965925216675, "learning_rate": 5.7537227675365096e-06, "loss": 0.0951, "step": 4430 }, { "epoch": 0.525195173882186, "grad_norm": 0.542870044708252, "learning_rate": 5.7530045487191765e-06, "loss": 0.0937, "step": 4440 }, { "epoch": 0.5263780458954341, "grad_norm": 0.7200729846954346, "learning_rate": 5.752286329901843e-06, "loss": 0.0993, "step": 4450 }, { "epoch": 0.5275609179086823, "grad_norm": 0.5843743085861206, "learning_rate": 5.75156811108451e-06, "loss": 0.1064, "step": 4460 }, { "epoch": 0.5287437899219305, "grad_norm": 0.6201034784317017, "learning_rate": 5.750849892267177e-06, "loss": 0.1031, "step": 4470 }, { "epoch": 0.5299266619351786, "grad_norm": 0.5595511198043823, "learning_rate": 5.750131673449844e-06, "loss": 0.1006, "step": 4480 }, { "epoch": 0.5311095339484267, "grad_norm": 0.66150963306427, "learning_rate": 5.749413454632511e-06, "loss": 0.1013, "step": 4490 }, { "epoch": 0.5322924059616749, "grad_norm": 0.516914427280426, "learning_rate": 5.748695235815179e-06, "loss": 0.0969, "step": 4500 }, { "epoch": 0.5334752779749231, "grad_norm": 0.6760537028312683, "learning_rate": 5.747977016997845e-06, "loss": 0.1046, "step": 4510 }, { "epoch": 0.5346581499881713, "grad_norm": 0.6804845929145813, "learning_rate": 5.747258798180513e-06, "loss": 0.0987, "step": 4520 }, { "epoch": 0.5358410220014195, "grad_norm": 0.4774968922138214, "learning_rate": 5.74654057936318e-06, "loss": 0.0822, "step": 4530 }, { "epoch": 0.5370238940146677, "grad_norm": 0.5536130666732788, "learning_rate": 5.7458223605458466e-06, "loss": 0.0966, "step": 4540 }, { "epoch": 0.5382067660279157, "grad_norm": 0.6262601613998413, "learning_rate": 5.7451041417285135e-06, "loss": 0.0912, "step": 4550 }, { "epoch": 0.5393896380411639, "grad_norm": 0.4774028956890106, "learning_rate": 5.74438592291118e-06, "loss": 0.1024, "step": 4560 }, { "epoch": 0.5405725100544121, "grad_norm": 0.6938728094100952, "learning_rate": 5.743667704093847e-06, "loss": 0.1005, "step": 4570 }, { "epoch": 0.5417553820676603, "grad_norm": 0.5075764060020447, "learning_rate": 5.742949485276514e-06, "loss": 0.0921, "step": 4580 }, { "epoch": 0.5429382540809085, "grad_norm": 0.6124941110610962, "learning_rate": 5.742231266459182e-06, "loss": 0.1002, "step": 4590 }, { "epoch": 0.5441211260941566, "grad_norm": 0.6924780607223511, "learning_rate": 5.741513047641848e-06, "loss": 0.0896, "step": 4600 }, { "epoch": 0.5453039981074048, "grad_norm": 0.4520123302936554, "learning_rate": 5.740794828824516e-06, "loss": 0.1037, "step": 4610 }, { "epoch": 0.5464868701206529, "grad_norm": 0.6680993437767029, "learning_rate": 5.740076610007182e-06, "loss": 0.1054, "step": 4620 }, { "epoch": 0.5476697421339011, "grad_norm": 0.5666351914405823, "learning_rate": 5.73935839118985e-06, "loss": 0.1007, "step": 4630 }, { "epoch": 0.5488526141471493, "grad_norm": 0.6350187659263611, "learning_rate": 5.738640172372516e-06, "loss": 0.0933, "step": 4640 }, { "epoch": 0.5500354861603974, "grad_norm": 0.6256552338600159, "learning_rate": 5.7379219535551836e-06, "loss": 0.0994, "step": 4650 }, { "epoch": 0.5512183581736456, "grad_norm": 0.5422120690345764, "learning_rate": 5.7372037347378505e-06, "loss": 0.1053, "step": 4660 }, { "epoch": 0.5524012301868938, "grad_norm": 0.6361149549484253, "learning_rate": 5.736485515920517e-06, "loss": 0.101, "step": 4670 }, { "epoch": 0.553584102200142, "grad_norm": 0.5249139070510864, "learning_rate": 5.735767297103184e-06, "loss": 0.1041, "step": 4680 }, { "epoch": 0.5547669742133902, "grad_norm": 0.47622352838516235, "learning_rate": 5.735049078285851e-06, "loss": 0.0916, "step": 4690 }, { "epoch": 0.5559498462266382, "grad_norm": 0.4758620262145996, "learning_rate": 5.734330859468518e-06, "loss": 0.1068, "step": 4700 }, { "epoch": 0.5571327182398864, "grad_norm": 0.6519463658332825, "learning_rate": 5.733612640651185e-06, "loss": 0.0988, "step": 4710 }, { "epoch": 0.5583155902531346, "grad_norm": 0.6158110499382019, "learning_rate": 5.732894421833852e-06, "loss": 0.0957, "step": 4720 }, { "epoch": 0.5594984622663828, "grad_norm": 0.7673488259315491, "learning_rate": 5.732176203016519e-06, "loss": 0.0879, "step": 4730 }, { "epoch": 0.560681334279631, "grad_norm": 0.5824710726737976, "learning_rate": 5.731457984199186e-06, "loss": 0.0931, "step": 4740 }, { "epoch": 0.5618642062928791, "grad_norm": 0.627084493637085, "learning_rate": 5.730739765381853e-06, "loss": 0.0885, "step": 4750 }, { "epoch": 0.5630470783061273, "grad_norm": 0.616784930229187, "learning_rate": 5.73002154656452e-06, "loss": 0.0973, "step": 4760 }, { "epoch": 0.5642299503193754, "grad_norm": 0.5228744149208069, "learning_rate": 5.7293033277471875e-06, "loss": 0.0962, "step": 4770 }, { "epoch": 0.5654128223326236, "grad_norm": 0.6673073768615723, "learning_rate": 5.728585108929854e-06, "loss": 0.0875, "step": 4780 }, { "epoch": 0.5665956943458718, "grad_norm": 0.5446866154670715, "learning_rate": 5.727866890112521e-06, "loss": 0.0927, "step": 4790 }, { "epoch": 0.5677785663591199, "grad_norm": 0.8148944973945618, "learning_rate": 5.727148671295188e-06, "loss": 0.097, "step": 4800 }, { "epoch": 0.5689614383723681, "grad_norm": 0.6075975894927979, "learning_rate": 5.726430452477855e-06, "loss": 0.1102, "step": 4810 }, { "epoch": 0.5701443103856163, "grad_norm": 0.463281512260437, "learning_rate": 5.725712233660522e-06, "loss": 0.0933, "step": 4820 }, { "epoch": 0.5713271823988645, "grad_norm": 0.5787643790245056, "learning_rate": 5.724994014843189e-06, "loss": 0.101, "step": 4830 }, { "epoch": 0.5725100544121126, "grad_norm": 0.4733627438545227, "learning_rate": 5.724275796025856e-06, "loss": 0.0898, "step": 4840 }, { "epoch": 0.5736929264253607, "grad_norm": 0.6306363940238953, "learning_rate": 5.723557577208523e-06, "loss": 0.1007, "step": 4850 }, { "epoch": 0.5748757984386089, "grad_norm": 0.5726945400238037, "learning_rate": 5.72283935839119e-06, "loss": 0.0975, "step": 4860 }, { "epoch": 0.5760586704518571, "grad_norm": 0.6072408556938171, "learning_rate": 5.722121139573857e-06, "loss": 0.1001, "step": 4870 }, { "epoch": 0.5772415424651053, "grad_norm": 0.6986492872238159, "learning_rate": 5.7214029207565245e-06, "loss": 0.0934, "step": 4880 }, { "epoch": 0.5784244144783535, "grad_norm": 0.6815310716629028, "learning_rate": 5.7206847019391906e-06, "loss": 0.0946, "step": 4890 }, { "epoch": 0.5796072864916016, "grad_norm": 0.5303977131843567, "learning_rate": 5.719966483121858e-06, "loss": 0.097, "step": 4900 }, { "epoch": 0.5807901585048498, "grad_norm": 0.5608043074607849, "learning_rate": 5.719248264304524e-06, "loss": 0.0907, "step": 4910 }, { "epoch": 0.5819730305180979, "grad_norm": 0.6419598460197449, "learning_rate": 5.718530045487192e-06, "loss": 0.0863, "step": 4920 }, { "epoch": 0.5831559025313461, "grad_norm": 0.6530010104179382, "learning_rate": 5.717811826669859e-06, "loss": 0.094, "step": 4930 }, { "epoch": 0.5843387745445943, "grad_norm": 0.8199701309204102, "learning_rate": 5.717093607852526e-06, "loss": 0.0931, "step": 4940 }, { "epoch": 0.5855216465578424, "grad_norm": 0.7080064415931702, "learning_rate": 5.716375389035193e-06, "loss": 0.0921, "step": 4950 }, { "epoch": 0.5867045185710906, "grad_norm": 0.47231751680374146, "learning_rate": 5.71565717021786e-06, "loss": 0.0938, "step": 4960 }, { "epoch": 0.5878873905843388, "grad_norm": 0.6132064461708069, "learning_rate": 5.714938951400527e-06, "loss": 0.1019, "step": 4970 }, { "epoch": 0.589070262597587, "grad_norm": 0.6419149041175842, "learning_rate": 5.714220732583194e-06, "loss": 0.0966, "step": 4980 }, { "epoch": 0.5902531346108351, "grad_norm": 0.640676736831665, "learning_rate": 5.713502513765861e-06, "loss": 0.0994, "step": 4990 }, { "epoch": 0.5914360066240832, "grad_norm": 0.6193518042564392, "learning_rate": 5.7127842949485276e-06, "loss": 0.1045, "step": 5000 }, { "epoch": 0.5926188786373314, "grad_norm": 0.5580844283103943, "learning_rate": 5.7120660761311945e-06, "loss": 0.1005, "step": 5010 }, { "epoch": 0.5938017506505796, "grad_norm": 0.5440773367881775, "learning_rate": 5.711347857313861e-06, "loss": 0.1014, "step": 5020 }, { "epoch": 0.5949846226638278, "grad_norm": 0.5914257764816284, "learning_rate": 5.710629638496529e-06, "loss": 0.1038, "step": 5030 }, { "epoch": 0.596167494677076, "grad_norm": 0.6799629330635071, "learning_rate": 5.709911419679196e-06, "loss": 0.0937, "step": 5040 }, { "epoch": 0.5973503666903242, "grad_norm": 0.4832763969898224, "learning_rate": 5.709193200861863e-06, "loss": 0.11, "step": 5050 }, { "epoch": 0.5985332387035722, "grad_norm": 0.6140722632408142, "learning_rate": 5.70847498204453e-06, "loss": 0.1104, "step": 5060 }, { "epoch": 0.5997161107168204, "grad_norm": 0.4985930323600769, "learning_rate": 5.707756763227197e-06, "loss": 0.0926, "step": 5070 }, { "epoch": 0.6008989827300686, "grad_norm": 0.568899393081665, "learning_rate": 5.707038544409864e-06, "loss": 0.1045, "step": 5080 }, { "epoch": 0.6020818547433168, "grad_norm": 0.602124810218811, "learning_rate": 5.706320325592531e-06, "loss": 0.0894, "step": 5090 }, { "epoch": 0.603264726756565, "grad_norm": 0.5471999645233154, "learning_rate": 5.705602106775198e-06, "loss": 0.0929, "step": 5100 }, { "epoch": 0.6044475987698131, "grad_norm": 0.7618167996406555, "learning_rate": 5.7048838879578646e-06, "loss": 0.0839, "step": 5110 }, { "epoch": 0.6056304707830613, "grad_norm": 0.6616073846817017, "learning_rate": 5.7041656691405315e-06, "loss": 0.1003, "step": 5120 }, { "epoch": 0.6068133427963094, "grad_norm": 0.47781872749328613, "learning_rate": 5.703447450323198e-06, "loss": 0.0926, "step": 5130 }, { "epoch": 0.6079962148095576, "grad_norm": 0.7380110621452332, "learning_rate": 5.702729231505865e-06, "loss": 0.1015, "step": 5140 }, { "epoch": 0.6091790868228057, "grad_norm": 0.5813941359519958, "learning_rate": 5.702011012688533e-06, "loss": 0.0901, "step": 5150 }, { "epoch": 0.6103619588360539, "grad_norm": 0.523969292640686, "learning_rate": 5.701292793871199e-06, "loss": 0.0996, "step": 5160 }, { "epoch": 0.6115448308493021, "grad_norm": 0.4976615607738495, "learning_rate": 5.700574575053867e-06, "loss": 0.0884, "step": 5170 }, { "epoch": 0.6127277028625503, "grad_norm": 0.6008037328720093, "learning_rate": 5.699856356236533e-06, "loss": 0.0976, "step": 5180 }, { "epoch": 0.6139105748757985, "grad_norm": 0.6786630153656006, "learning_rate": 5.699138137419201e-06, "loss": 0.0878, "step": 5190 }, { "epoch": 0.6150934468890467, "grad_norm": 0.9552755355834961, "learning_rate": 5.698419918601867e-06, "loss": 0.095, "step": 5200 }, { "epoch": 0.6162763189022947, "grad_norm": 0.624603807926178, "learning_rate": 5.697701699784535e-06, "loss": 0.0975, "step": 5210 }, { "epoch": 0.6174591909155429, "grad_norm": 0.7362788319587708, "learning_rate": 5.6969834809672016e-06, "loss": 0.1009, "step": 5220 }, { "epoch": 0.6186420629287911, "grad_norm": 0.5665349960327148, "learning_rate": 5.6962652621498685e-06, "loss": 0.0984, "step": 5230 }, { "epoch": 0.6198249349420393, "grad_norm": 0.5436658263206482, "learning_rate": 5.695547043332535e-06, "loss": 0.0934, "step": 5240 }, { "epoch": 0.6210078069552875, "grad_norm": 0.6138890981674194, "learning_rate": 5.694828824515202e-06, "loss": 0.1025, "step": 5250 }, { "epoch": 0.6221906789685356, "grad_norm": 0.5778772234916687, "learning_rate": 5.694110605697869e-06, "loss": 0.0909, "step": 5260 }, { "epoch": 0.6233735509817838, "grad_norm": 0.5502493977546692, "learning_rate": 5.693392386880536e-06, "loss": 0.0982, "step": 5270 }, { "epoch": 0.6245564229950319, "grad_norm": 0.7991401553153992, "learning_rate": 5.692674168063204e-06, "loss": 0.1071, "step": 5280 }, { "epoch": 0.6257392950082801, "grad_norm": 0.5124003291130066, "learning_rate": 5.69195594924587e-06, "loss": 0.0975, "step": 5290 }, { "epoch": 0.6269221670215283, "grad_norm": 0.4274737536907196, "learning_rate": 5.691237730428538e-06, "loss": 0.0933, "step": 5300 }, { "epoch": 0.6281050390347764, "grad_norm": 0.5033624172210693, "learning_rate": 5.690519511611205e-06, "loss": 0.0859, "step": 5310 }, { "epoch": 0.6292879110480246, "grad_norm": 0.6268101930618286, "learning_rate": 5.689801292793872e-06, "loss": 0.0888, "step": 5320 }, { "epoch": 0.6304707830612728, "grad_norm": 0.4643569886684418, "learning_rate": 5.6890830739765386e-06, "loss": 0.0923, "step": 5330 }, { "epoch": 0.631653655074521, "grad_norm": 0.5595318078994751, "learning_rate": 5.6883648551592055e-06, "loss": 0.0963, "step": 5340 }, { "epoch": 0.632836527087769, "grad_norm": 0.5314580798149109, "learning_rate": 5.687646636341872e-06, "loss": 0.089, "step": 5350 }, { "epoch": 0.6340193991010172, "grad_norm": 0.6070656180381775, "learning_rate": 5.686928417524539e-06, "loss": 0.0871, "step": 5360 }, { "epoch": 0.6352022711142654, "grad_norm": 0.5328777432441711, "learning_rate": 5.686210198707206e-06, "loss": 0.0936, "step": 5370 }, { "epoch": 0.6363851431275136, "grad_norm": 0.7058836817741394, "learning_rate": 5.685491979889873e-06, "loss": 0.0937, "step": 5380 }, { "epoch": 0.6375680151407618, "grad_norm": 0.5538118481636047, "learning_rate": 5.68477376107254e-06, "loss": 0.0882, "step": 5390 }, { "epoch": 0.63875088715401, "grad_norm": 0.6232064366340637, "learning_rate": 5.684055542255207e-06, "loss": 0.0991, "step": 5400 }, { "epoch": 0.6399337591672581, "grad_norm": 0.6724734902381897, "learning_rate": 5.683337323437874e-06, "loss": 0.0984, "step": 5410 }, { "epoch": 0.6411166311805063, "grad_norm": 0.5098162293434143, "learning_rate": 5.682619104620542e-06, "loss": 0.0876, "step": 5420 }, { "epoch": 0.6422995031937544, "grad_norm": 0.4075785279273987, "learning_rate": 5.681900885803208e-06, "loss": 0.0873, "step": 5430 }, { "epoch": 0.6434823752070026, "grad_norm": 0.7280784845352173, "learning_rate": 5.6811826669858756e-06, "loss": 0.097, "step": 5440 }, { "epoch": 0.6446652472202508, "grad_norm": 0.6973534822463989, "learning_rate": 5.680464448168542e-06, "loss": 0.0919, "step": 5450 }, { "epoch": 0.6458481192334989, "grad_norm": 0.5580155849456787, "learning_rate": 5.679746229351209e-06, "loss": 0.0915, "step": 5460 }, { "epoch": 0.6470309912467471, "grad_norm": 0.5762267112731934, "learning_rate": 5.6790280105338755e-06, "loss": 0.087, "step": 5470 }, { "epoch": 0.6482138632599953, "grad_norm": 0.6038976311683655, "learning_rate": 5.678309791716543e-06, "loss": 0.0923, "step": 5480 }, { "epoch": 0.6493967352732435, "grad_norm": 0.5124639868736267, "learning_rate": 5.67759157289921e-06, "loss": 0.0911, "step": 5490 }, { "epoch": 0.6505796072864916, "grad_norm": 0.7599403262138367, "learning_rate": 5.676873354081877e-06, "loss": 0.1013, "step": 5500 }, { "epoch": 0.6517624792997397, "grad_norm": 0.5355880856513977, "learning_rate": 5.676155135264544e-06, "loss": 0.1061, "step": 5510 }, { "epoch": 0.6529453513129879, "grad_norm": 0.5913971662521362, "learning_rate": 5.675436916447211e-06, "loss": 0.099, "step": 5520 }, { "epoch": 0.6541282233262361, "grad_norm": 0.6490454077720642, "learning_rate": 5.674718697629878e-06, "loss": 0.1023, "step": 5530 }, { "epoch": 0.6553110953394843, "grad_norm": 0.5362119078636169, "learning_rate": 5.674000478812545e-06, "loss": 0.0904, "step": 5540 }, { "epoch": 0.6564939673527325, "grad_norm": 0.5282692313194275, "learning_rate": 5.6732822599952126e-06, "loss": 0.0937, "step": 5550 }, { "epoch": 0.6576768393659806, "grad_norm": 0.795635998249054, "learning_rate": 5.672564041177879e-06, "loss": 0.0927, "step": 5560 }, { "epoch": 0.6588597113792287, "grad_norm": 0.660574197769165, "learning_rate": 5.671845822360546e-06, "loss": 0.095, "step": 5570 }, { "epoch": 0.6600425833924769, "grad_norm": 0.47181305289268494, "learning_rate": 5.671127603543213e-06, "loss": 0.0983, "step": 5580 }, { "epoch": 0.6612254554057251, "grad_norm": 0.5357106328010559, "learning_rate": 5.67040938472588e-06, "loss": 0.0908, "step": 5590 }, { "epoch": 0.6624083274189733, "grad_norm": 0.508901834487915, "learning_rate": 5.669691165908547e-06, "loss": 0.1001, "step": 5600 }, { "epoch": 0.6635911994322214, "grad_norm": 0.5255958437919617, "learning_rate": 5.668972947091214e-06, "loss": 0.0992, "step": 5610 }, { "epoch": 0.6647740714454696, "grad_norm": 0.5262123942375183, "learning_rate": 5.668254728273881e-06, "loss": 0.1035, "step": 5620 }, { "epoch": 0.6659569434587178, "grad_norm": 0.5783426761627197, "learning_rate": 5.667536509456548e-06, "loss": 0.0999, "step": 5630 }, { "epoch": 0.6671398154719659, "grad_norm": 0.5561472773551941, "learning_rate": 5.666818290639215e-06, "loss": 0.0992, "step": 5640 }, { "epoch": 0.6683226874852141, "grad_norm": 0.6310315132141113, "learning_rate": 5.666100071821882e-06, "loss": 0.097, "step": 5650 }, { "epoch": 0.6695055594984622, "grad_norm": 0.5435957312583923, "learning_rate": 5.665381853004549e-06, "loss": 0.0966, "step": 5660 }, { "epoch": 0.6706884315117104, "grad_norm": 0.9346890449523926, "learning_rate": 5.664663634187216e-06, "loss": 0.0862, "step": 5670 }, { "epoch": 0.6718713035249586, "grad_norm": 0.563909113407135, "learning_rate": 5.6639454153698825e-06, "loss": 0.0887, "step": 5680 }, { "epoch": 0.6730541755382068, "grad_norm": 0.7156330347061157, "learning_rate": 5.66322719655255e-06, "loss": 0.0951, "step": 5690 }, { "epoch": 0.674237047551455, "grad_norm": 0.6774573922157288, "learning_rate": 5.662508977735216e-06, "loss": 0.0875, "step": 5700 }, { "epoch": 0.6754199195647032, "grad_norm": 0.4993340075016022, "learning_rate": 5.661790758917884e-06, "loss": 0.0958, "step": 5710 }, { "epoch": 0.6766027915779512, "grad_norm": 0.6915708184242249, "learning_rate": 5.66107254010055e-06, "loss": 0.0996, "step": 5720 }, { "epoch": 0.6777856635911994, "grad_norm": 0.4024944305419922, "learning_rate": 5.660354321283218e-06, "loss": 0.0924, "step": 5730 }, { "epoch": 0.6789685356044476, "grad_norm": 0.5789515972137451, "learning_rate": 5.659636102465884e-06, "loss": 0.0816, "step": 5740 }, { "epoch": 0.6801514076176958, "grad_norm": 0.5888805985450745, "learning_rate": 5.658917883648552e-06, "loss": 0.0984, "step": 5750 }, { "epoch": 0.681334279630944, "grad_norm": 0.560070276260376, "learning_rate": 5.658199664831219e-06, "loss": 0.0941, "step": 5760 }, { "epoch": 0.6825171516441921, "grad_norm": 0.4538366496562958, "learning_rate": 5.657481446013886e-06, "loss": 0.1023, "step": 5770 }, { "epoch": 0.6837000236574403, "grad_norm": 0.5329626798629761, "learning_rate": 5.656763227196553e-06, "loss": 0.0949, "step": 5780 }, { "epoch": 0.6848828956706884, "grad_norm": 0.5539559721946716, "learning_rate": 5.6560450083792195e-06, "loss": 0.0931, "step": 5790 }, { "epoch": 0.6860657676839366, "grad_norm": 0.5171330571174622, "learning_rate": 5.655326789561887e-06, "loss": 0.0972, "step": 5800 }, { "epoch": 0.6872486396971847, "grad_norm": 0.5368791818618774, "learning_rate": 5.654608570744553e-06, "loss": 0.0898, "step": 5810 }, { "epoch": 0.6884315117104329, "grad_norm": 0.5450922250747681, "learning_rate": 5.653890351927221e-06, "loss": 0.095, "step": 5820 }, { "epoch": 0.6896143837236811, "grad_norm": 0.5543373823165894, "learning_rate": 5.653172133109887e-06, "loss": 0.0989, "step": 5830 }, { "epoch": 0.6907972557369293, "grad_norm": 0.5000300407409668, "learning_rate": 5.652453914292555e-06, "loss": 0.0988, "step": 5840 }, { "epoch": 0.6919801277501775, "grad_norm": 0.6355880498886108, "learning_rate": 5.651735695475222e-06, "loss": 0.096, "step": 5850 }, { "epoch": 0.6931629997634255, "grad_norm": 0.6157683730125427, "learning_rate": 5.651017476657889e-06, "loss": 0.0975, "step": 5860 }, { "epoch": 0.6943458717766737, "grad_norm": 0.6694032549858093, "learning_rate": 5.650299257840556e-06, "loss": 0.0954, "step": 5870 }, { "epoch": 0.6955287437899219, "grad_norm": 0.5142399072647095, "learning_rate": 5.649581039023223e-06, "loss": 0.0902, "step": 5880 }, { "epoch": 0.6967116158031701, "grad_norm": 0.5652394890785217, "learning_rate": 5.64886282020589e-06, "loss": 0.0963, "step": 5890 }, { "epoch": 0.6978944878164183, "grad_norm": 0.4215143918991089, "learning_rate": 5.6481446013885565e-06, "loss": 0.0965, "step": 5900 }, { "epoch": 0.6990773598296665, "grad_norm": 0.4524252712726593, "learning_rate": 5.6474263825712235e-06, "loss": 0.0901, "step": 5910 }, { "epoch": 0.7002602318429146, "grad_norm": 0.38867199420928955, "learning_rate": 5.64670816375389e-06, "loss": 0.0937, "step": 5920 }, { "epoch": 0.7014431038561627, "grad_norm": 0.5471032857894897, "learning_rate": 5.645989944936557e-06, "loss": 0.0875, "step": 5930 }, { "epoch": 0.7026259758694109, "grad_norm": 0.5288333296775818, "learning_rate": 5.645271726119224e-06, "loss": 0.0942, "step": 5940 }, { "epoch": 0.7038088478826591, "grad_norm": 0.6418154835700989, "learning_rate": 5.644553507301891e-06, "loss": 0.0893, "step": 5950 }, { "epoch": 0.7049917198959073, "grad_norm": 0.5688426494598389, "learning_rate": 5.643835288484559e-06, "loss": 0.0842, "step": 5960 }, { "epoch": 0.7061745919091554, "grad_norm": 0.6261107921600342, "learning_rate": 5.643117069667225e-06, "loss": 0.0879, "step": 5970 }, { "epoch": 0.7073574639224036, "grad_norm": 0.6241941452026367, "learning_rate": 5.642398850849893e-06, "loss": 0.0887, "step": 5980 }, { "epoch": 0.7085403359356518, "grad_norm": 0.9900299906730652, "learning_rate": 5.641680632032559e-06, "loss": 0.0873, "step": 5990 }, { "epoch": 0.7097232079489, "grad_norm": 0.6169202923774719, "learning_rate": 5.640962413215227e-06, "loss": 0.088, "step": 6000 }, { "epoch": 0.710906079962148, "grad_norm": 0.5337333679199219, "learning_rate": 5.640244194397893e-06, "loss": 0.0943, "step": 6010 }, { "epoch": 0.7120889519753962, "grad_norm": 0.42041653394699097, "learning_rate": 5.6395259755805605e-06, "loss": 0.0869, "step": 6020 }, { "epoch": 0.7132718239886444, "grad_norm": 0.570706844329834, "learning_rate": 5.638807756763227e-06, "loss": 0.087, "step": 6030 }, { "epoch": 0.7144546960018926, "grad_norm": 0.5114567875862122, "learning_rate": 5.638089537945894e-06, "loss": 0.099, "step": 6040 }, { "epoch": 0.7156375680151408, "grad_norm": 0.9057469964027405, "learning_rate": 5.637371319128561e-06, "loss": 0.0848, "step": 6050 }, { "epoch": 0.716820440028389, "grad_norm": 0.5898886919021606, "learning_rate": 5.636653100311228e-06, "loss": 0.0908, "step": 6060 }, { "epoch": 0.7180033120416371, "grad_norm": 0.6484323740005493, "learning_rate": 5.635934881493896e-06, "loss": 0.0911, "step": 6070 }, { "epoch": 0.7191861840548852, "grad_norm": 0.4362123906612396, "learning_rate": 5.635216662676562e-06, "loss": 0.0823, "step": 6080 }, { "epoch": 0.7203690560681334, "grad_norm": 0.557669460773468, "learning_rate": 5.63449844385923e-06, "loss": 0.1057, "step": 6090 }, { "epoch": 0.7215519280813816, "grad_norm": 0.49477940797805786, "learning_rate": 5.633780225041896e-06, "loss": 0.093, "step": 6100 }, { "epoch": 0.7227348000946298, "grad_norm": 0.488801509141922, "learning_rate": 5.633062006224564e-06, "loss": 0.088, "step": 6110 }, { "epoch": 0.7239176721078779, "grad_norm": 0.5233747959136963, "learning_rate": 5.63234378740723e-06, "loss": 0.0913, "step": 6120 }, { "epoch": 0.7251005441211261, "grad_norm": 0.5643594264984131, "learning_rate": 5.6316255685898975e-06, "loss": 0.1013, "step": 6130 }, { "epoch": 0.7262834161343743, "grad_norm": 0.4980293810367584, "learning_rate": 5.630907349772564e-06, "loss": 0.1023, "step": 6140 }, { "epoch": 0.7274662881476224, "grad_norm": 0.5257401466369629, "learning_rate": 5.630189130955231e-06, "loss": 0.0854, "step": 6150 }, { "epoch": 0.7286491601608706, "grad_norm": 0.4835401773452759, "learning_rate": 5.629470912137898e-06, "loss": 0.0814, "step": 6160 }, { "epoch": 0.7298320321741187, "grad_norm": 0.5545103549957275, "learning_rate": 5.628752693320565e-06, "loss": 0.0945, "step": 6170 }, { "epoch": 0.7310149041873669, "grad_norm": 0.5919556617736816, "learning_rate": 5.628034474503232e-06, "loss": 0.1022, "step": 6180 }, { "epoch": 0.7321977762006151, "grad_norm": 0.5400152206420898, "learning_rate": 5.627316255685899e-06, "loss": 0.0958, "step": 6190 }, { "epoch": 0.7333806482138633, "grad_norm": 0.5466191172599792, "learning_rate": 5.626598036868566e-06, "loss": 0.0937, "step": 6200 }, { "epoch": 0.7345635202271115, "grad_norm": 0.5635471343994141, "learning_rate": 5.625879818051233e-06, "loss": 0.0935, "step": 6210 }, { "epoch": 0.7357463922403596, "grad_norm": 0.9066815376281738, "learning_rate": 5.6251615992339e-06, "loss": 0.0921, "step": 6220 }, { "epoch": 0.7369292642536077, "grad_norm": 0.6125943660736084, "learning_rate": 5.6244433804165675e-06, "loss": 0.0916, "step": 6230 }, { "epoch": 0.7381121362668559, "grad_norm": 0.4669167697429657, "learning_rate": 5.623725161599234e-06, "loss": 0.0953, "step": 6240 }, { "epoch": 0.7392950082801041, "grad_norm": 0.4145720601081848, "learning_rate": 5.623006942781901e-06, "loss": 0.0884, "step": 6250 }, { "epoch": 0.7404778802933523, "grad_norm": 0.6034293174743652, "learning_rate": 5.6222887239645675e-06, "loss": 0.0943, "step": 6260 }, { "epoch": 0.7416607523066004, "grad_norm": 0.6175572872161865, "learning_rate": 5.621570505147235e-06, "loss": 0.0872, "step": 6270 }, { "epoch": 0.7428436243198486, "grad_norm": 0.6331788897514343, "learning_rate": 5.620852286329901e-06, "loss": 0.0926, "step": 6280 }, { "epoch": 0.7440264963330968, "grad_norm": 0.5088357329368591, "learning_rate": 5.620134067512569e-06, "loss": 0.085, "step": 6290 }, { "epoch": 0.7452093683463449, "grad_norm": 1.4171618223190308, "learning_rate": 5.619415848695236e-06, "loss": 0.0952, "step": 6300 }, { "epoch": 0.7463922403595931, "grad_norm": 0.5780158638954163, "learning_rate": 5.618697629877903e-06, "loss": 0.0869, "step": 6310 }, { "epoch": 0.7475751123728412, "grad_norm": 0.5572367310523987, "learning_rate": 5.61797941106057e-06, "loss": 0.0815, "step": 6320 }, { "epoch": 0.7487579843860894, "grad_norm": 0.6366298794746399, "learning_rate": 5.617261192243237e-06, "loss": 0.0956, "step": 6330 }, { "epoch": 0.7499408563993376, "grad_norm": 0.4955446422100067, "learning_rate": 5.6165429734259045e-06, "loss": 0.0925, "step": 6340 }, { "epoch": 0.7501774308019872, "eval_accuracy": 0.6800246198888777, "eval_animal_abuse/accuracy": 0.9946934158432312, "eval_animal_abuse/f1": 0.7540478026214341, "eval_animal_abuse/fpr": 0.0020193181435735163, "eval_animal_abuse/precision": 0.8029556650246306, "eval_animal_abuse/recall": 0.7107558139534884, "eval_animal_abuse/threshold": 0.5, "eval_child_abuse/accuracy": 0.9959244102871211, "eval_child_abuse/f1": 0.49897750511247446, "eval_child_abuse/fpr": 0.0005687425770729821, "eval_child_abuse/precision": 0.782051282051282, "eval_child_abuse/recall": 0.3663663663663664, "eval_child_abuse/threshold": 0.5, "eval_controversial_topics,politics/accuracy": 0.9696742855241708, "eval_controversial_topics,politics/f1": 0.4695955775385511, "eval_controversial_topics,politics/fpr": 0.013522789676002173, "eval_controversial_topics,politics/precision": 0.5059561128526646, "eval_controversial_topics,politics/recall": 0.4381107491856677, "eval_controversial_topics,politics/threshold": 0.5, "eval_discrimination,stereotype,injustice/accuracy": 0.9551851482183851, "eval_discrimination,stereotype,injustice/f1": 0.6861602982292637, "eval_discrimination,stereotype,injustice/fpr": 0.015523909350489725, "eval_discrimination,stereotype,injustice/precision": 0.7741850683491062, "eval_discrimination,stereotype,injustice/recall": 0.6161087866108786, "eval_discrimination,stereotype,injustice/threshold": 0.5, "eval_drug_abuse,weapons,banned_substance/accuracy": 0.9719865588714776, "eval_drug_abuse,weapons,banned_substance/f1": 0.7495538370017847, "eval_drug_abuse,weapons,banned_substance/fpr": 0.014419686927090652, "eval_drug_abuse,weapons,banned_substance/precision": 0.7549430796884362, "eval_drug_abuse,weapons,banned_substance/recall": 0.7442409923213231, "eval_drug_abuse,weapons,banned_substance/threshold": 0.5, "eval_financial_crime,property_crime,theft/accuracy": 0.9572811657850084, "eval_financial_crime,property_crime,theft/f1": 0.7922330097087379, "eval_financial_crime,property_crime,theft/fpr": 0.029725595709783774, "eval_financial_crime,property_crime,theft/precision": 0.7521892763865418, "eval_financial_crime,property_crime,theft/recall": 0.8367800376004102, "eval_financial_crime,property_crime,theft/threshold": 0.5, "eval_flagged/accuracy": 0.8361446584822171, "eval_flagged/aucpr": 0.9040015941081498, "eval_flagged/f1": 0.8437152921016723, "eval_flagged/fpr": 0.11202790383677715, "eval_flagged/precision": 0.8990025359256129, "eval_flagged/recall": 0.7948342351499208, "eval_hate_speech,offensive_language/accuracy": 0.949695578401038, "eval_hate_speech,offensive_language/f1": 0.6502428868841083, "eval_hate_speech,offensive_language/fpr": 0.008240453133564756, "eval_hate_speech,offensive_language/precision": 0.8617412630288167, "eval_hate_speech,offensive_language/recall": 0.5221025260029718, "eval_hate_speech,offensive_language/threshold": 0.5, "eval_loss": 0.09003351628780365, "eval_macro_f1": 0.5825161264758619, "eval_macro_precision": 0.6747148840017854, "eval_macro_recall": 0.5343796583413616, "eval_micro_f1": 0.7305501380655377, "eval_micro_precision": 0.7992268204575702, "eval_micro_recall": 0.6727421448512545, "eval_misinformation_regarding_ethics,laws_and_safety/accuracy": 0.9878397711015737, "eval_misinformation_regarding_ethics,laws_and_safety/f1": 0.0, "eval_misinformation_regarding_ethics,laws_and_safety/fpr": 0.0, "eval_misinformation_regarding_ethics,laws_and_safety/precision": 0.0, "eval_misinformation_regarding_ethics,laws_and_safety/recall": 0.0, "eval_misinformation_regarding_ethics,laws_and_safety/threshold": 0.5, "eval_non_violent_unethical_behavior/accuracy": 0.8842865222743455, "eval_non_violent_unethical_behavior/f1": 0.6521304260852171, "eval_non_violent_unethical_behavior/fpr": 0.031763826606875864, "eval_non_violent_unethical_behavior/precision": 0.8099378881987578, "eval_non_violent_unethical_behavior/recall": 0.5457893855683911, "eval_non_violent_unethical_behavior/threshold": 0.5, "eval_privacy_violation/accuracy": 0.9807698705792328, "eval_privacy_violation/f1": 0.7920863309352518, "eval_privacy_violation/fpr": 0.006859382655560987, "eval_privacy_violation/precision": 0.848882035466461, "eval_privacy_violation/recall": 0.7424140256237357, "eval_privacy_violation/threshold": 0.5, "eval_runtime": 598.3523, "eval_samples_per_second": 100.466, "eval_self_harm/accuracy": 0.9964400971487507, "eval_self_harm/f1": 0.7220779220779221, "eval_self_harm/fpr": 0.0013734423154227499, "eval_self_harm/precision": 0.7722222222222223, "eval_self_harm/recall": 0.6780487804878049, "eval_self_harm/threshold": 0.5, "eval_sexually_explicit,adult_content/accuracy": 0.9821173104434907, "eval_sexually_explicit,adult_content/f1": 0.5332175423360833, "eval_sexually_explicit,adult_content/fpr": 0.0041249765626331595, "eval_sexually_explicit,adult_content/precision": 0.7172897196261683, "eval_sexually_explicit,adult_content/recall": 0.424326192121631, "eval_sexually_explicit,adult_content/threshold": 0.5, "eval_steps_per_second": 1.571, "eval_terrorism,organized_crime/accuracy": 0.9919153608144525, "eval_terrorism,organized_crime/f1": 0.00816326530612245, "eval_terrorism,organized_crime/fpr": 0.00011738466956215499, "eval_terrorism,organized_crime/precision": 0.2222222222222222, "eval_terrorism,organized_crime/recall": 0.004158004158004158, "eval_terrorism,organized_crime/threshold": 0.5, "eval_violence,aiding_and_abetting,incitement/accuracy": 0.917939248760688, "eval_violence,aiding_and_abetting,incitement/f1": 0.8467393668251157, "eval_violence,aiding_and_abetting,incitement/fpr": 0.05820225737727198, "eval_violence,aiding_and_abetting,incitement/precision": 0.8414325409076876, "eval_violence,aiding_and_abetting,incitement/recall": 0.8521135567783892, "eval_violence,aiding_and_abetting,incitement/threshold": 0.5, "step": 6342 }, { "epoch": 0.7511237284125858, "grad_norm": 0.6250366568565369, "learning_rate": 5.615824754608571e-06, "loss": 0.0967, "step": 6350 }, { "epoch": 0.752306600425834, "grad_norm": 0.6285472512245178, "learning_rate": 5.615106535791238e-06, "loss": 0.0975, "step": 6360 }, { "epoch": 0.753489472439082, "grad_norm": 0.5591168999671936, "learning_rate": 5.6143883169739045e-06, "loss": 0.0977, "step": 6370 }, { "epoch": 0.7546723444523302, "grad_norm": 0.49159547686576843, "learning_rate": 5.613670098156572e-06, "loss": 0.0963, "step": 6380 }, { "epoch": 0.7558552164655784, "grad_norm": 0.5657317638397217, "learning_rate": 5.612951879339238e-06, "loss": 0.0986, "step": 6390 }, { "epoch": 0.7570380884788266, "grad_norm": 0.8390008211135864, "learning_rate": 5.612233660521906e-06, "loss": 0.0959, "step": 6400 }, { "epoch": 0.7582209604920748, "grad_norm": 0.49121350049972534, "learning_rate": 5.611515441704573e-06, "loss": 0.0929, "step": 6410 }, { "epoch": 0.759403832505323, "grad_norm": 0.49891501665115356, "learning_rate": 5.61079722288724e-06, "loss": 0.0853, "step": 6420 }, { "epoch": 0.7605867045185711, "grad_norm": 0.4740714430809021, "learning_rate": 5.610079004069907e-06, "loss": 0.0917, "step": 6430 }, { "epoch": 0.7617695765318192, "grad_norm": 0.6599583029747009, "learning_rate": 5.609360785252574e-06, "loss": 0.0927, "step": 6440 }, { "epoch": 0.7629524485450674, "grad_norm": 0.6263123750686646, "learning_rate": 5.608642566435241e-06, "loss": 0.0929, "step": 6450 }, { "epoch": 0.7641353205583156, "grad_norm": 0.5150656700134277, "learning_rate": 5.607924347617908e-06, "loss": 0.0881, "step": 6460 }, { "epoch": 0.7653181925715637, "grad_norm": 0.43694251775741577, "learning_rate": 5.6072061288005745e-06, "loss": 0.0914, "step": 6470 }, { "epoch": 0.7665010645848119, "grad_norm": 0.47304198145866394, "learning_rate": 5.6064879099832415e-06, "loss": 0.0905, "step": 6480 }, { "epoch": 0.7676839365980601, "grad_norm": 0.4751938581466675, "learning_rate": 5.605769691165908e-06, "loss": 0.0927, "step": 6490 }, { "epoch": 0.7688668086113083, "grad_norm": 0.5619322061538696, "learning_rate": 5.605051472348576e-06, "loss": 0.0957, "step": 6500 }, { "epoch": 0.7700496806245565, "grad_norm": 0.7171851992607117, "learning_rate": 5.604333253531242e-06, "loss": 0.0991, "step": 6510 }, { "epoch": 0.7712325526378045, "grad_norm": 0.42374107241630554, "learning_rate": 5.60361503471391e-06, "loss": 0.0901, "step": 6520 }, { "epoch": 0.7724154246510527, "grad_norm": 0.5820472240447998, "learning_rate": 5.602896815896576e-06, "loss": 0.088, "step": 6530 }, { "epoch": 0.7735982966643009, "grad_norm": 0.5002573132514954, "learning_rate": 5.602178597079244e-06, "loss": 0.0893, "step": 6540 }, { "epoch": 0.7747811686775491, "grad_norm": 0.5378569960594177, "learning_rate": 5.601460378261911e-06, "loss": 0.085, "step": 6550 }, { "epoch": 0.7759640406907973, "grad_norm": 0.4839074909687042, "learning_rate": 5.600742159444578e-06, "loss": 0.0918, "step": 6560 }, { "epoch": 0.7771469127040455, "grad_norm": 0.5836463570594788, "learning_rate": 5.600023940627245e-06, "loss": 0.0932, "step": 6570 }, { "epoch": 0.7783297847172936, "grad_norm": 0.5022076368331909, "learning_rate": 5.5993057218099115e-06, "loss": 0.0881, "step": 6580 }, { "epoch": 0.7795126567305417, "grad_norm": 0.48048046231269836, "learning_rate": 5.5985875029925785e-06, "loss": 0.0939, "step": 6590 }, { "epoch": 0.7806955287437899, "grad_norm": 0.4665192663669586, "learning_rate": 5.597869284175245e-06, "loss": 0.0924, "step": 6600 }, { "epoch": 0.7818784007570381, "grad_norm": 0.5240653157234192, "learning_rate": 5.597151065357913e-06, "loss": 0.0988, "step": 6610 }, { "epoch": 0.7830612727702863, "grad_norm": 0.5692888498306274, "learning_rate": 5.596432846540579e-06, "loss": 0.0873, "step": 6620 }, { "epoch": 0.7842441447835344, "grad_norm": 0.7057889699935913, "learning_rate": 5.595714627723247e-06, "loss": 0.0904, "step": 6630 }, { "epoch": 0.7854270167967826, "grad_norm": 0.6426042318344116, "learning_rate": 5.594996408905913e-06, "loss": 0.0973, "step": 6640 }, { "epoch": 0.7866098888100308, "grad_norm": 0.6610566973686218, "learning_rate": 5.594278190088581e-06, "loss": 0.0963, "step": 6650 }, { "epoch": 0.7877927608232789, "grad_norm": 0.6449587941169739, "learning_rate": 5.593559971271247e-06, "loss": 0.0905, "step": 6660 }, { "epoch": 0.788975632836527, "grad_norm": 0.4754040241241455, "learning_rate": 5.592841752453915e-06, "loss": 0.0878, "step": 6670 }, { "epoch": 0.7901585048497752, "grad_norm": 0.6771761178970337, "learning_rate": 5.592123533636582e-06, "loss": 0.093, "step": 6680 }, { "epoch": 0.7913413768630234, "grad_norm": 0.485807329416275, "learning_rate": 5.5914053148192485e-06, "loss": 0.0879, "step": 6690 }, { "epoch": 0.7925242488762716, "grad_norm": 0.5014014840126038, "learning_rate": 5.5906870960019154e-06, "loss": 0.0861, "step": 6700 }, { "epoch": 0.7937071208895198, "grad_norm": 0.5224199891090393, "learning_rate": 5.589968877184582e-06, "loss": 0.0904, "step": 6710 }, { "epoch": 0.794889992902768, "grad_norm": 0.46814125776290894, "learning_rate": 5.589250658367249e-06, "loss": 0.0882, "step": 6720 }, { "epoch": 0.796072864916016, "grad_norm": 0.5427312254905701, "learning_rate": 5.588532439549916e-06, "loss": 0.0908, "step": 6730 }, { "epoch": 0.7972557369292642, "grad_norm": 0.5693920850753784, "learning_rate": 5.587814220732583e-06, "loss": 0.0847, "step": 6740 }, { "epoch": 0.7984386089425124, "grad_norm": 0.6256138682365417, "learning_rate": 5.58709600191525e-06, "loss": 0.095, "step": 6750 }, { "epoch": 0.7996214809557606, "grad_norm": 0.6319018006324768, "learning_rate": 5.586377783097917e-06, "loss": 0.0857, "step": 6760 }, { "epoch": 0.8008043529690088, "grad_norm": 0.5671972632408142, "learning_rate": 5.585659564280584e-06, "loss": 0.0946, "step": 6770 }, { "epoch": 0.8019872249822569, "grad_norm": 0.7096156477928162, "learning_rate": 5.584941345463251e-06, "loss": 0.0922, "step": 6780 }, { "epoch": 0.8031700969955051, "grad_norm": 0.4761767089366913, "learning_rate": 5.584223126645919e-06, "loss": 0.0994, "step": 6790 }, { "epoch": 0.8043529690087533, "grad_norm": 0.5490329265594482, "learning_rate": 5.5835049078285855e-06, "loss": 0.096, "step": 6800 }, { "epoch": 0.8055358410220014, "grad_norm": 0.6050373911857605, "learning_rate": 5.5827866890112524e-06, "loss": 0.0854, "step": 6810 }, { "epoch": 0.8067187130352496, "grad_norm": 0.6081627011299133, "learning_rate": 5.582068470193919e-06, "loss": 0.0838, "step": 6820 }, { "epoch": 0.8079015850484977, "grad_norm": 0.7895649075508118, "learning_rate": 5.581350251376586e-06, "loss": 0.0961, "step": 6830 }, { "epoch": 0.8090844570617459, "grad_norm": 0.4728451073169708, "learning_rate": 5.580632032559253e-06, "loss": 0.0869, "step": 6840 }, { "epoch": 0.8102673290749941, "grad_norm": 0.4970038831233978, "learning_rate": 5.57991381374192e-06, "loss": 0.0989, "step": 6850 }, { "epoch": 0.8114502010882423, "grad_norm": 0.5486234426498413, "learning_rate": 5.579195594924587e-06, "loss": 0.0896, "step": 6860 }, { "epoch": 0.8126330731014905, "grad_norm": 0.6001537442207336, "learning_rate": 5.578477376107254e-06, "loss": 0.0939, "step": 6870 }, { "epoch": 0.8138159451147385, "grad_norm": 0.5939499735832214, "learning_rate": 5.577759157289922e-06, "loss": 0.0966, "step": 6880 }, { "epoch": 0.8149988171279867, "grad_norm": 0.5227894186973572, "learning_rate": 5.577040938472588e-06, "loss": 0.0935, "step": 6890 }, { "epoch": 0.8161816891412349, "grad_norm": 0.5572964549064636, "learning_rate": 5.576322719655256e-06, "loss": 0.0899, "step": 6900 }, { "epoch": 0.8173645611544831, "grad_norm": 0.691532552242279, "learning_rate": 5.575604500837922e-06, "loss": 0.0919, "step": 6910 }, { "epoch": 0.8185474331677313, "grad_norm": 0.46826180815696716, "learning_rate": 5.5748862820205894e-06, "loss": 0.0917, "step": 6920 }, { "epoch": 0.8197303051809794, "grad_norm": 0.5747814178466797, "learning_rate": 5.5741680632032555e-06, "loss": 0.0916, "step": 6930 }, { "epoch": 0.8209131771942276, "grad_norm": 0.5997245907783508, "learning_rate": 5.573449844385923e-06, "loss": 0.0894, "step": 6940 }, { "epoch": 0.8220960492074757, "grad_norm": 0.9271543025970459, "learning_rate": 5.57273162556859e-06, "loss": 0.0897, "step": 6950 }, { "epoch": 0.8232789212207239, "grad_norm": 0.6090934872627258, "learning_rate": 5.572013406751257e-06, "loss": 0.092, "step": 6960 }, { "epoch": 0.8244617932339721, "grad_norm": 0.543346643447876, "learning_rate": 5.571295187933924e-06, "loss": 0.0962, "step": 6970 }, { "epoch": 0.8256446652472202, "grad_norm": 0.6361589431762695, "learning_rate": 5.570576969116591e-06, "loss": 0.0953, "step": 6980 }, { "epoch": 0.8268275372604684, "grad_norm": 0.510200023651123, "learning_rate": 5.569858750299258e-06, "loss": 0.0963, "step": 6990 }, { "epoch": 0.8280104092737166, "grad_norm": 0.6752845644950867, "learning_rate": 5.569140531481925e-06, "loss": 0.099, "step": 7000 }, { "epoch": 0.8291932812869648, "grad_norm": 0.5645956993103027, "learning_rate": 5.568422312664592e-06, "loss": 0.0884, "step": 7010 }, { "epoch": 0.830376153300213, "grad_norm": 0.5138949155807495, "learning_rate": 5.567704093847259e-06, "loss": 0.0863, "step": 7020 }, { "epoch": 0.831559025313461, "grad_norm": 0.8196755647659302, "learning_rate": 5.566985875029926e-06, "loss": 0.0855, "step": 7030 }, { "epoch": 0.8327418973267092, "grad_norm": 0.6646247506141663, "learning_rate": 5.5662676562125925e-06, "loss": 0.0981, "step": 7040 }, { "epoch": 0.8339247693399574, "grad_norm": 0.5703924298286438, "learning_rate": 5.5655494373952594e-06, "loss": 0.097, "step": 7050 }, { "epoch": 0.8351076413532056, "grad_norm": 0.6575745940208435, "learning_rate": 5.564831218577927e-06, "loss": 0.0981, "step": 7060 }, { "epoch": 0.8362905133664538, "grad_norm": 0.5075008869171143, "learning_rate": 5.564112999760594e-06, "loss": 0.0904, "step": 7070 }, { "epoch": 0.837473385379702, "grad_norm": 0.4884212911128998, "learning_rate": 5.563394780943261e-06, "loss": 0.0976, "step": 7080 }, { "epoch": 0.8386562573929501, "grad_norm": 0.48469433188438416, "learning_rate": 5.562676562125928e-06, "loss": 0.088, "step": 7090 }, { "epoch": 0.8398391294061982, "grad_norm": 0.48695263266563416, "learning_rate": 5.561958343308595e-06, "loss": 0.0839, "step": 7100 }, { "epoch": 0.8410220014194464, "grad_norm": 0.5640247464179993, "learning_rate": 5.561240124491262e-06, "loss": 0.095, "step": 7110 }, { "epoch": 0.8422048734326946, "grad_norm": 0.49873480200767517, "learning_rate": 5.560521905673929e-06, "loss": 0.0842, "step": 7120 }, { "epoch": 0.8433877454459427, "grad_norm": 0.429731547832489, "learning_rate": 5.559803686856596e-06, "loss": 0.0957, "step": 7130 }, { "epoch": 0.8445706174591909, "grad_norm": 0.4358295798301697, "learning_rate": 5.559085468039263e-06, "loss": 0.0962, "step": 7140 }, { "epoch": 0.8457534894724391, "grad_norm": 0.6492884159088135, "learning_rate": 5.55836724922193e-06, "loss": 0.0932, "step": 7150 }, { "epoch": 0.8469363614856873, "grad_norm": 0.4648359417915344, "learning_rate": 5.5576490304045964e-06, "loss": 0.091, "step": 7160 }, { "epoch": 0.8481192334989354, "grad_norm": 0.55613774061203, "learning_rate": 5.556930811587264e-06, "loss": 0.0927, "step": 7170 }, { "epoch": 0.8493021055121835, "grad_norm": 0.530727207660675, "learning_rate": 5.55621259276993e-06, "loss": 0.0932, "step": 7180 }, { "epoch": 0.8504849775254317, "grad_norm": 0.6301290392875671, "learning_rate": 5.555494373952598e-06, "loss": 0.0894, "step": 7190 }, { "epoch": 0.8516678495386799, "grad_norm": 0.49362561106681824, "learning_rate": 5.554776155135264e-06, "loss": 0.0899, "step": 7200 }, { "epoch": 0.8528507215519281, "grad_norm": 1.3415329456329346, "learning_rate": 5.554057936317932e-06, "loss": 0.0879, "step": 7210 }, { "epoch": 0.8540335935651763, "grad_norm": 0.5016365647315979, "learning_rate": 5.553339717500599e-06, "loss": 0.0853, "step": 7220 }, { "epoch": 0.8552164655784245, "grad_norm": 0.4500231444835663, "learning_rate": 5.552621498683266e-06, "loss": 0.0845, "step": 7230 }, { "epoch": 0.8563993375916725, "grad_norm": 0.6322329640388489, "learning_rate": 5.551903279865933e-06, "loss": 0.0961, "step": 7240 }, { "epoch": 0.8575822096049207, "grad_norm": 0.43628576397895813, "learning_rate": 5.5511850610486e-06, "loss": 0.0907, "step": 7250 }, { "epoch": 0.8587650816181689, "grad_norm": 0.6593953967094421, "learning_rate": 5.5504668422312665e-06, "loss": 0.0925, "step": 7260 }, { "epoch": 0.8599479536314171, "grad_norm": 0.5449455976486206, "learning_rate": 5.5497486234139334e-06, "loss": 0.0914, "step": 7270 }, { "epoch": 0.8611308256446653, "grad_norm": 0.5198991894721985, "learning_rate": 5.5490304045966e-06, "loss": 0.0911, "step": 7280 }, { "epoch": 0.8623136976579134, "grad_norm": 0.5385783314704895, "learning_rate": 5.548312185779267e-06, "loss": 0.0864, "step": 7290 }, { "epoch": 0.8634965696711616, "grad_norm": 0.6400865316390991, "learning_rate": 5.547593966961934e-06, "loss": 0.0978, "step": 7300 }, { "epoch": 0.8646794416844098, "grad_norm": 0.4232085943222046, "learning_rate": 5.546875748144601e-06, "loss": 0.0854, "step": 7310 }, { "epoch": 0.8658623136976579, "grad_norm": 0.5584732294082642, "learning_rate": 5.546157529327269e-06, "loss": 0.0902, "step": 7320 }, { "epoch": 0.867045185710906, "grad_norm": 0.6432327628135681, "learning_rate": 5.545439310509936e-06, "loss": 0.0911, "step": 7330 }, { "epoch": 0.8682280577241542, "grad_norm": 0.6981496214866638, "learning_rate": 5.544721091692603e-06, "loss": 0.0882, "step": 7340 }, { "epoch": 0.8694109297374024, "grad_norm": 0.6976572871208191, "learning_rate": 5.54400287287527e-06, "loss": 0.0943, "step": 7350 }, { "epoch": 0.8705938017506506, "grad_norm": 0.5253883004188538, "learning_rate": 5.543284654057937e-06, "loss": 0.0927, "step": 7360 }, { "epoch": 0.8717766737638988, "grad_norm": 0.5533382296562195, "learning_rate": 5.5425664352406035e-06, "loss": 0.0907, "step": 7370 }, { "epoch": 0.872959545777147, "grad_norm": 0.5107969641685486, "learning_rate": 5.5418482164232704e-06, "loss": 0.0964, "step": 7380 }, { "epoch": 0.874142417790395, "grad_norm": 0.5178835988044739, "learning_rate": 5.541129997605937e-06, "loss": 0.0885, "step": 7390 }, { "epoch": 0.8753252898036432, "grad_norm": 0.5704941749572754, "learning_rate": 5.540411778788604e-06, "loss": 0.097, "step": 7400 }, { "epoch": 0.8765081618168914, "grad_norm": 0.503154456615448, "learning_rate": 5.539693559971271e-06, "loss": 0.0922, "step": 7410 }, { "epoch": 0.8776910338301396, "grad_norm": 0.5647052526473999, "learning_rate": 5.538975341153939e-06, "loss": 0.0853, "step": 7420 }, { "epoch": 0.8788739058433878, "grad_norm": 0.6308786869049072, "learning_rate": 5.538257122336605e-06, "loss": 0.0926, "step": 7430 }, { "epoch": 0.8800567778566359, "grad_norm": 0.714667022228241, "learning_rate": 5.537538903519273e-06, "loss": 0.0864, "step": 7440 }, { "epoch": 0.8812396498698841, "grad_norm": 0.5458607077598572, "learning_rate": 5.536820684701939e-06, "loss": 0.0921, "step": 7450 }, { "epoch": 0.8824225218831322, "grad_norm": 0.6277599334716797, "learning_rate": 5.536102465884607e-06, "loss": 0.0986, "step": 7460 }, { "epoch": 0.8836053938963804, "grad_norm": 0.4972817003726959, "learning_rate": 5.535384247067273e-06, "loss": 0.0973, "step": 7470 }, { "epoch": 0.8847882659096286, "grad_norm": 0.5870622396469116, "learning_rate": 5.5346660282499405e-06, "loss": 0.0934, "step": 7480 }, { "epoch": 0.8859711379228767, "grad_norm": 0.43468207120895386, "learning_rate": 5.5339478094326074e-06, "loss": 0.0899, "step": 7490 }, { "epoch": 0.8871540099361249, "grad_norm": 0.788546085357666, "learning_rate": 5.533229590615274e-06, "loss": 0.0953, "step": 7500 }, { "epoch": 0.8883368819493731, "grad_norm": 0.5575612187385559, "learning_rate": 5.532511371797941e-06, "loss": 0.0963, "step": 7510 }, { "epoch": 0.8895197539626213, "grad_norm": 0.5157278180122375, "learning_rate": 5.531793152980608e-06, "loss": 0.0812, "step": 7520 }, { "epoch": 0.8907026259758695, "grad_norm": 0.5690245628356934, "learning_rate": 5.531074934163275e-06, "loss": 0.0924, "step": 7530 }, { "epoch": 0.8918854979891175, "grad_norm": 0.5988470315933228, "learning_rate": 5.530356715345942e-06, "loss": 0.0824, "step": 7540 }, { "epoch": 0.8930683700023657, "grad_norm": 0.5548174381256104, "learning_rate": 5.529638496528609e-06, "loss": 0.0942, "step": 7550 }, { "epoch": 0.8942512420156139, "grad_norm": 0.8076465725898743, "learning_rate": 5.528920277711276e-06, "loss": 0.0821, "step": 7560 }, { "epoch": 0.8954341140288621, "grad_norm": 0.6518809199333191, "learning_rate": 5.528202058893944e-06, "loss": 0.0934, "step": 7570 }, { "epoch": 0.8966169860421103, "grad_norm": 0.5106729865074158, "learning_rate": 5.52748384007661e-06, "loss": 0.0866, "step": 7580 }, { "epoch": 0.8977998580553584, "grad_norm": 0.6785132884979248, "learning_rate": 5.5267656212592775e-06, "loss": 0.0896, "step": 7590 }, { "epoch": 0.8989827300686066, "grad_norm": 0.5222122669219971, "learning_rate": 5.5260474024419444e-06, "loss": 0.1, "step": 7600 }, { "epoch": 0.9001656020818547, "grad_norm": 0.5371795296669006, "learning_rate": 5.525329183624611e-06, "loss": 0.0943, "step": 7610 }, { "epoch": 0.9013484740951029, "grad_norm": 0.5491868257522583, "learning_rate": 5.524610964807278e-06, "loss": 0.0795, "step": 7620 }, { "epoch": 0.9025313461083511, "grad_norm": 0.5228870511054993, "learning_rate": 5.523892745989945e-06, "loss": 0.0844, "step": 7630 }, { "epoch": 0.9037142181215992, "grad_norm": 0.6228838562965393, "learning_rate": 5.523174527172612e-06, "loss": 0.0979, "step": 7640 }, { "epoch": 0.9048970901348474, "grad_norm": 0.5079696178436279, "learning_rate": 5.522456308355279e-06, "loss": 0.0964, "step": 7650 }, { "epoch": 0.9060799621480956, "grad_norm": 0.4998623728752136, "learning_rate": 5.521738089537946e-06, "loss": 0.0921, "step": 7660 }, { "epoch": 0.9072628341613438, "grad_norm": 0.4994312822818756, "learning_rate": 5.521019870720613e-06, "loss": 0.0823, "step": 7670 }, { "epoch": 0.9084457061745919, "grad_norm": 0.5454375743865967, "learning_rate": 5.52030165190328e-06, "loss": 0.0947, "step": 7680 }, { "epoch": 0.90962857818784, "grad_norm": 0.6506507396697998, "learning_rate": 5.519583433085947e-06, "loss": 0.092, "step": 7690 }, { "epoch": 0.9108114502010882, "grad_norm": 0.5300822854042053, "learning_rate": 5.518865214268614e-06, "loss": 0.1019, "step": 7700 }, { "epoch": 0.9119943222143364, "grad_norm": 0.46735846996307373, "learning_rate": 5.5181469954512814e-06, "loss": 0.0922, "step": 7710 }, { "epoch": 0.9131771942275846, "grad_norm": 0.45003989338874817, "learning_rate": 5.5174287766339475e-06, "loss": 0.089, "step": 7720 }, { "epoch": 0.9143600662408328, "grad_norm": 0.39825817942619324, "learning_rate": 5.516710557816615e-06, "loss": 0.0838, "step": 7730 }, { "epoch": 0.915542938254081, "grad_norm": 0.4732776880264282, "learning_rate": 5.515992338999281e-06, "loss": 0.0875, "step": 7740 }, { "epoch": 0.916725810267329, "grad_norm": 0.5532517433166504, "learning_rate": 5.515274120181949e-06, "loss": 0.0929, "step": 7750 }, { "epoch": 0.9179086822805772, "grad_norm": 0.4382435083389282, "learning_rate": 5.514555901364616e-06, "loss": 0.0843, "step": 7760 }, { "epoch": 0.9190915542938254, "grad_norm": 0.4662255048751831, "learning_rate": 5.513837682547283e-06, "loss": 0.0809, "step": 7770 }, { "epoch": 0.9202744263070736, "grad_norm": 0.5414842963218689, "learning_rate": 5.51311946372995e-06, "loss": 0.0934, "step": 7780 }, { "epoch": 0.9214572983203217, "grad_norm": 0.7459576725959778, "learning_rate": 5.512401244912617e-06, "loss": 0.0987, "step": 7790 }, { "epoch": 0.9226401703335699, "grad_norm": 0.6166794300079346, "learning_rate": 5.511683026095284e-06, "loss": 0.0852, "step": 7800 }, { "epoch": 0.9238230423468181, "grad_norm": 0.548773467540741, "learning_rate": 5.510964807277951e-06, "loss": 0.0956, "step": 7810 }, { "epoch": 0.9250059143600663, "grad_norm": 0.5908733010292053, "learning_rate": 5.5102465884606184e-06, "loss": 0.0961, "step": 7820 }, { "epoch": 0.9261887863733144, "grad_norm": 0.5259242653846741, "learning_rate": 5.5095283696432845e-06, "loss": 0.0858, "step": 7830 }, { "epoch": 0.9273716583865625, "grad_norm": 0.602846086025238, "learning_rate": 5.508810150825952e-06, "loss": 0.0898, "step": 7840 }, { "epoch": 0.9285545303998107, "grad_norm": 0.5604369044303894, "learning_rate": 5.508091932008618e-06, "loss": 0.0931, "step": 7850 }, { "epoch": 0.9297374024130589, "grad_norm": 0.6392659544944763, "learning_rate": 5.507373713191286e-06, "loss": 0.0897, "step": 7860 }, { "epoch": 0.9309202744263071, "grad_norm": 0.6247487664222717, "learning_rate": 5.506655494373953e-06, "loss": 0.0898, "step": 7870 }, { "epoch": 0.9321031464395553, "grad_norm": 0.48103243112564087, "learning_rate": 5.50593727555662e-06, "loss": 0.0899, "step": 7880 }, { "epoch": 0.9332860184528035, "grad_norm": 0.5454939007759094, "learning_rate": 5.505219056739287e-06, "loss": 0.0939, "step": 7890 }, { "epoch": 0.9344688904660515, "grad_norm": 0.45005032420158386, "learning_rate": 5.504500837921954e-06, "loss": 0.0907, "step": 7900 }, { "epoch": 0.9356517624792997, "grad_norm": 0.4482378363609314, "learning_rate": 5.503782619104621e-06, "loss": 0.0903, "step": 7910 }, { "epoch": 0.9368346344925479, "grad_norm": 0.6674760580062866, "learning_rate": 5.503064400287288e-06, "loss": 0.0901, "step": 7920 }, { "epoch": 0.9380175065057961, "grad_norm": 0.37034618854522705, "learning_rate": 5.5023461814699546e-06, "loss": 0.0875, "step": 7930 }, { "epoch": 0.9392003785190443, "grad_norm": 0.4043606221675873, "learning_rate": 5.5016279626526215e-06, "loss": 0.0804, "step": 7940 }, { "epoch": 0.9403832505322924, "grad_norm": 0.5328341722488403, "learning_rate": 5.500909743835288e-06, "loss": 0.0918, "step": 7950 }, { "epoch": 0.9415661225455406, "grad_norm": 0.5725728869438171, "learning_rate": 5.500191525017955e-06, "loss": 0.0982, "step": 7960 }, { "epoch": 0.9427489945587887, "grad_norm": 0.4792079031467438, "learning_rate": 5.499473306200622e-06, "loss": 0.0918, "step": 7970 }, { "epoch": 0.9439318665720369, "grad_norm": 0.4700903296470642, "learning_rate": 5.49875508738329e-06, "loss": 0.0992, "step": 7980 }, { "epoch": 0.945114738585285, "grad_norm": 0.4010038375854492, "learning_rate": 5.498036868565956e-06, "loss": 0.092, "step": 7990 }, { "epoch": 0.9462976105985332, "grad_norm": 0.5075040459632874, "learning_rate": 5.497318649748624e-06, "loss": 0.0845, "step": 8000 }, { "epoch": 0.9474804826117814, "grad_norm": 0.5531440377235413, "learning_rate": 5.49660043093129e-06, "loss": 0.0866, "step": 8010 }, { "epoch": 0.9486633546250296, "grad_norm": 0.5388569235801697, "learning_rate": 5.495882212113958e-06, "loss": 0.083, "step": 8020 }, { "epoch": 0.9498462266382778, "grad_norm": 0.46871113777160645, "learning_rate": 5.495163993296624e-06, "loss": 0.0797, "step": 8030 }, { "epoch": 0.9510290986515259, "grad_norm": 0.5389636754989624, "learning_rate": 5.4944457744792916e-06, "loss": 0.0923, "step": 8040 }, { "epoch": 0.952211970664774, "grad_norm": 0.5777597427368164, "learning_rate": 5.4937275556619585e-06, "loss": 0.1011, "step": 8050 }, { "epoch": 0.9533948426780222, "grad_norm": 0.5742731690406799, "learning_rate": 5.493009336844625e-06, "loss": 0.0907, "step": 8060 }, { "epoch": 0.9545777146912704, "grad_norm": 0.459907591342926, "learning_rate": 5.492291118027293e-06, "loss": 0.0865, "step": 8070 }, { "epoch": 0.9557605867045186, "grad_norm": 0.5118345618247986, "learning_rate": 5.491572899209959e-06, "loss": 0.0908, "step": 8080 }, { "epoch": 0.9569434587177668, "grad_norm": 0.4662259817123413, "learning_rate": 5.490854680392627e-06, "loss": 0.0874, "step": 8090 }, { "epoch": 0.9581263307310149, "grad_norm": 0.6265116333961487, "learning_rate": 5.490136461575293e-06, "loss": 0.0818, "step": 8100 }, { "epoch": 0.9593092027442631, "grad_norm": 0.3877846896648407, "learning_rate": 5.489418242757961e-06, "loss": 0.0895, "step": 8110 }, { "epoch": 0.9604920747575112, "grad_norm": 0.4799104928970337, "learning_rate": 5.488700023940627e-06, "loss": 0.0861, "step": 8120 }, { "epoch": 0.9616749467707594, "grad_norm": 0.5877001285552979, "learning_rate": 5.487981805123295e-06, "loss": 0.0918, "step": 8130 }, { "epoch": 0.9628578187840076, "grad_norm": 0.4394959509372711, "learning_rate": 5.487263586305962e-06, "loss": 0.0871, "step": 8140 }, { "epoch": 0.9640406907972557, "grad_norm": 0.6177457571029663, "learning_rate": 5.4865453674886286e-06, "loss": 0.0863, "step": 8150 }, { "epoch": 0.9652235628105039, "grad_norm": 0.5394150614738464, "learning_rate": 5.4858271486712955e-06, "loss": 0.0824, "step": 8160 }, { "epoch": 0.9664064348237521, "grad_norm": 0.5127959251403809, "learning_rate": 5.485108929853962e-06, "loss": 0.0854, "step": 8170 }, { "epoch": 0.9675893068370003, "grad_norm": 0.4102541208267212, "learning_rate": 5.484390711036629e-06, "loss": 0.0858, "step": 8180 }, { "epoch": 0.9687721788502484, "grad_norm": 0.5607635378837585, "learning_rate": 5.483672492219296e-06, "loss": 0.0909, "step": 8190 }, { "epoch": 0.9699550508634965, "grad_norm": 0.49821120500564575, "learning_rate": 5.482954273401963e-06, "loss": 0.0875, "step": 8200 }, { "epoch": 0.9711379228767447, "grad_norm": 0.7809765934944153, "learning_rate": 5.48223605458463e-06, "loss": 0.0931, "step": 8210 }, { "epoch": 0.9723207948899929, "grad_norm": 0.5329886078834534, "learning_rate": 5.481517835767297e-06, "loss": 0.0889, "step": 8220 }, { "epoch": 0.9735036669032411, "grad_norm": 0.3727686405181885, "learning_rate": 5.480799616949964e-06, "loss": 0.0934, "step": 8230 }, { "epoch": 0.9746865389164893, "grad_norm": 0.7152588367462158, "learning_rate": 5.480081398132631e-06, "loss": 0.0915, "step": 8240 }, { "epoch": 0.9758694109297374, "grad_norm": 0.6428688168525696, "learning_rate": 5.479363179315299e-06, "loss": 0.0932, "step": 8250 }, { "epoch": 0.9770522829429855, "grad_norm": 0.4829529821872711, "learning_rate": 5.478644960497965e-06, "loss": 0.0961, "step": 8260 }, { "epoch": 0.9782351549562337, "grad_norm": 0.5006052255630493, "learning_rate": 5.4779267416806325e-06, "loss": 0.0943, "step": 8270 }, { "epoch": 0.9794180269694819, "grad_norm": 0.5520729422569275, "learning_rate": 5.4772085228632986e-06, "loss": 0.0867, "step": 8280 }, { "epoch": 0.9806008989827301, "grad_norm": 0.5741737484931946, "learning_rate": 5.476490304045966e-06, "loss": 0.0943, "step": 8290 }, { "epoch": 0.9817837709959782, "grad_norm": 0.44856759905815125, "learning_rate": 5.475772085228632e-06, "loss": 0.0921, "step": 8300 }, { "epoch": 0.9829666430092264, "grad_norm": 0.49074020981788635, "learning_rate": 5.4750538664113e-06, "loss": 0.0882, "step": 8310 }, { "epoch": 0.9841495150224746, "grad_norm": 0.5074799656867981, "learning_rate": 5.474335647593967e-06, "loss": 0.0856, "step": 8320 }, { "epoch": 0.9853323870357228, "grad_norm": 0.6246621012687683, "learning_rate": 5.473617428776634e-06, "loss": 0.0896, "step": 8330 }, { "epoch": 0.9865152590489709, "grad_norm": 0.48970863223075867, "learning_rate": 5.472899209959302e-06, "loss": 0.0916, "step": 8340 }, { "epoch": 0.987698131062219, "grad_norm": 0.4722272455692291, "learning_rate": 5.472180991141968e-06, "loss": 0.0874, "step": 8350 }, { "epoch": 0.9888810030754672, "grad_norm": 0.4224880635738373, "learning_rate": 5.471462772324636e-06, "loss": 0.0866, "step": 8360 }, { "epoch": 0.9900638750887154, "grad_norm": 0.5797383785247803, "learning_rate": 5.470744553507302e-06, "loss": 0.094, "step": 8370 }, { "epoch": 0.9912467471019636, "grad_norm": 0.8932232856750488, "learning_rate": 5.4700263346899695e-06, "loss": 0.0922, "step": 8380 }, { "epoch": 0.9924296191152118, "grad_norm": 0.5436427593231201, "learning_rate": 5.4693081158726356e-06, "loss": 0.092, "step": 8390 }, { "epoch": 0.99361249112846, "grad_norm": 0.5700507164001465, "learning_rate": 5.468589897055303e-06, "loss": 0.0948, "step": 8400 }, { "epoch": 0.994795363141708, "grad_norm": 0.6400808691978455, "learning_rate": 5.46787167823797e-06, "loss": 0.0948, "step": 8410 }, { "epoch": 0.9959782351549562, "grad_norm": 0.49238288402557373, "learning_rate": 5.467153459420637e-06, "loss": 0.0942, "step": 8420 }, { "epoch": 0.9971611071682044, "grad_norm": 0.40776512026786804, "learning_rate": 5.466435240603304e-06, "loss": 0.0925, "step": 8430 }, { "epoch": 0.9983439791814526, "grad_norm": 0.53659588098526, "learning_rate": 5.465717021785971e-06, "loss": 0.0874, "step": 8440 }, { "epoch": 0.9995268511947007, "grad_norm": 0.5676822662353516, "learning_rate": 5.464998802968638e-06, "loss": 0.0899, "step": 8450 }, { "epoch": 1.0002365744026496, "eval_accuracy": 0.6763482716172605, "eval_animal_abuse/accuracy": 0.9946601457231261, "eval_animal_abuse/f1": 0.7623982235381199, "eval_animal_abuse/fpr": 0.002490492377074004, "eval_animal_abuse/precision": 0.7767722473604827, "eval_animal_abuse/recall": 0.748546511627907, "eval_animal_abuse/threshold": 0.5, "eval_child_abuse/accuracy": 0.9965232724490135, "eval_child_abuse/f1": 0.6415094339622641, "eval_child_abuse/fpr": 0.0010538465398705257, "eval_child_abuse/precision": 0.748, "eval_child_abuse/recall": 0.5615615615615616, "eval_child_abuse/threshold": 0.5, "eval_controversial_topics,politics/accuracy": 0.9714043317696377, "eval_controversial_topics,politics/f1": 0.4626445764301344, "eval_controversial_topics,politics/fpr": 0.010588275672707285, "eval_controversial_topics,politics/precision": 0.5453205600589536, "eval_controversial_topics,politics/recall": 0.4017372421281216, "eval_controversial_topics,politics/threshold": 0.5, "eval_discrimination,stereotype,injustice/accuracy": 0.9549688924377017, "eval_discrimination,stereotype,injustice/f1": 0.7111917209004588, "eval_discrimination,stereotype,injustice/fpr": 0.022770809990241038, "eval_discrimination,stereotype,injustice/precision": 0.7256694970607446, "eval_discrimination,stereotype,injustice/recall": 0.6972803347280335, "eval_discrimination,stereotype,injustice/threshold": 0.5, "eval_drug_abuse,weapons,banned_substance/accuracy": 0.9698406361246964, "eval_drug_abuse,weapons,banned_substance/f1": 0.7560877169379793, "eval_drug_abuse,weapons,banned_substance/fpr": 0.021805810181920706, "eval_drug_abuse,weapons,banned_substance/precision": 0.694341487521621, "eval_drug_abuse,weapons,banned_substance/recall": 0.8298877731836976, "eval_drug_abuse,weapons,banned_substance/threshold": 0.5, "eval_financial_crime,property_crime,theft/accuracy": 0.9595102638320524, "eval_financial_crime,property_crime,theft/f1": 0.791930244486237, "eval_financial_crime,property_crime,theft/fpr": 0.02239094779131264, "eval_financial_crime,property_crime,theft/precision": 0.7922011287839917, "eval_financial_crime,property_crime,theft/recall": 0.7916595453768587, "eval_financial_crime,property_crime,theft/threshold": 0.5, "eval_flagged/accuracy": 0.8477559303989087, "eval_flagged/aucpr": 0.9048859091677256, "eval_flagged/f1": 0.860009789525208, "eval_flagged/fpr": 0.14300716348497866, "eval_flagged/precision": 0.8805638214565388, "eval_flagged/recall": 0.8403934112582584, "eval_hate_speech,offensive_language/accuracy": 0.9503942509232458, "eval_hate_speech,offensive_language/f1": 0.6811377245508982, "eval_hate_speech,offensive_language/fpr": 0.014306596016809767, "eval_hate_speech,offensive_language/precision": 0.8026713709677419, "eval_hate_speech,offensive_language/recall": 0.5915676077265973, "eval_hate_speech,offensive_language/threshold": 0.5, "eval_loss": 0.08912717550992966, "eval_macro_f1": 0.610737446012669, "eval_macro_precision": 0.6765181985059417, "eval_macro_recall": 0.5905474799286716, "eval_micro_f1": 0.7412833176109354, "eval_micro_precision": 0.7723409585846711, "eval_micro_recall": 0.7126269182211, "eval_misinformation_regarding_ethics,laws_and_safety/accuracy": 0.9878397711015737, "eval_misinformation_regarding_ethics,laws_and_safety/f1": 0.0, "eval_misinformation_regarding_ethics,laws_and_safety/fpr": 0.0, "eval_misinformation_regarding_ethics,laws_and_safety/precision": 0.0, "eval_misinformation_regarding_ethics,laws_and_safety/recall": 0.0, "eval_misinformation_regarding_ethics,laws_and_safety/threshold": 0.5, "eval_non_violent_unethical_behavior/accuracy": 0.8837375652926107, "eval_non_violent_unethical_behavior/f1": 0.661237942901459, "eval_non_violent_unethical_behavior/fpr": 0.038697890715827854, "eval_non_violent_unethical_behavior/precision": 0.7853770869314911, "eval_non_violent_unethical_behavior/recall": 0.5709861041352754, "eval_non_violent_unethical_behavior/threshold": 0.5, "eval_privacy_violation/accuracy": 0.9802541837176032, "eval_privacy_violation/f1": 0.7941737471822438, "eval_privacy_violation/fpr": 0.008941695247427715, "eval_privacy_violation/precision": 0.817565155301678, "eval_privacy_violation/recall": 0.7720836142953472, "eval_privacy_violation/threshold": 0.5, "eval_runtime": 598.2456, "eval_samples_per_second": 100.484, "eval_self_harm/accuracy": 0.9964567322088033, "eval_self_harm/f1": 0.7286624203821656, "eval_self_harm/fpr": 0.00149068739112957, "eval_self_harm/precision": 0.7626666666666667, "eval_self_harm/recall": 0.697560975609756, "eval_self_harm/threshold": 0.5, "eval_sexually_explicit,adult_content/accuracy": 0.9823002961040689, "eval_sexually_explicit,adult_content/f1": 0.6486129458388376, "eval_sexually_explicit,adult_content/fpr": 0.010210169260401912, "eval_sexually_explicit,adult_content/precision": 0.6211258697027198, "eval_sexually_explicit,adult_content/recall": 0.6786454733932273, "eval_sexually_explicit,adult_content/threshold": 0.5, "eval_steps_per_second": 1.571, "eval_terrorism,organized_crime/accuracy": 0.9920817114149783, "eval_terrorism,organized_crime/f1": 0.06299212598425197, "eval_terrorism,organized_crime/fpr": 0.0001844616235976721, "eval_terrorism,organized_crime/precision": 0.5925925925925926, "eval_terrorism,organized_crime/recall": 0.033264033264033266, "eval_terrorism,organized_crime/threshold": 0.5, "eval_violence,aiding_and_abetting,incitement/accuracy": 0.9146787769903849, "eval_violence,aiding_and_abetting,incitement/f1": 0.8477454210823167, "eval_violence,aiding_and_abetting,incitement/fpr": 0.07742169439281973, "eval_violence,aiding_and_abetting,incitement/precision": 0.8069511161345013, "eval_violence,aiding_and_abetting,incitement/recall": 0.8928839419709855, "eval_violence,aiding_and_abetting,incitement/threshold": 0.5, "step": 8456 }, { "epoch": 1.000709723207949, "grad_norm": 0.6274157762527466, "learning_rate": 5.464280584151305e-06, "loss": 0.0809, "step": 8460 }, { "epoch": 1.001892595221197, "grad_norm": 0.5995720624923706, "learning_rate": 5.463562365333972e-06, "loss": 0.0925, "step": 8470 }, { "epoch": 1.0030754672344453, "grad_norm": 0.4436396360397339, "learning_rate": 5.462844146516639e-06, "loss": 0.0853, "step": 8480 }, { "epoch": 1.0042583392476934, "grad_norm": 0.5848539471626282, "learning_rate": 5.462125927699306e-06, "loss": 0.0976, "step": 8490 }, { "epoch": 1.0054412112609417, "grad_norm": 0.45275256037712097, "learning_rate": 5.4614077088819726e-06, "loss": 0.0845, "step": 8500 }, { "epoch": 1.0066240832741897, "grad_norm": 0.44840994477272034, "learning_rate": 5.4606894900646395e-06, "loss": 0.0842, "step": 8510 }, { "epoch": 1.0078069552874378, "grad_norm": 0.5017544627189636, "learning_rate": 5.459971271247307e-06, "loss": 0.0904, "step": 8520 }, { "epoch": 1.008989827300686, "grad_norm": 0.5083673596382141, "learning_rate": 5.459253052429973e-06, "loss": 0.0834, "step": 8530 }, { "epoch": 1.0101726993139342, "grad_norm": 0.4569782018661499, "learning_rate": 5.458534833612641e-06, "loss": 0.0841, "step": 8540 }, { "epoch": 1.0113555713271825, "grad_norm": 0.6606743335723877, "learning_rate": 5.457816614795307e-06, "loss": 0.0853, "step": 8550 }, { "epoch": 1.0125384433404305, "grad_norm": 0.7549706697463989, "learning_rate": 5.457098395977975e-06, "loss": 0.0908, "step": 8560 }, { "epoch": 1.0137213153536788, "grad_norm": 0.5130405426025391, "learning_rate": 5.456380177160641e-06, "loss": 0.0807, "step": 8570 }, { "epoch": 1.014904187366927, "grad_norm": 0.5999173521995544, "learning_rate": 5.455661958343309e-06, "loss": 0.0864, "step": 8580 }, { "epoch": 1.016087059380175, "grad_norm": 0.4243204593658447, "learning_rate": 5.454943739525976e-06, "loss": 0.077, "step": 8590 }, { "epoch": 1.0172699313934233, "grad_norm": 0.5313964486122131, "learning_rate": 5.454225520708643e-06, "loss": 0.0723, "step": 8600 }, { "epoch": 1.0184528034066713, "grad_norm": 0.5646783709526062, "learning_rate": 5.4535073018913096e-06, "loss": 0.0782, "step": 8610 }, { "epoch": 1.0196356754199196, "grad_norm": 0.5696002244949341, "learning_rate": 5.4527890830739765e-06, "loss": 0.0892, "step": 8620 }, { "epoch": 1.0208185474331677, "grad_norm": 0.4519401788711548, "learning_rate": 5.452070864256644e-06, "loss": 0.0833, "step": 8630 }, { "epoch": 1.022001419446416, "grad_norm": 0.3950730562210083, "learning_rate": 5.45135264543931e-06, "loss": 0.0934, "step": 8640 }, { "epoch": 1.023184291459664, "grad_norm": 0.48284634947776794, "learning_rate": 5.450634426621978e-06, "loss": 0.079, "step": 8650 }, { "epoch": 1.0243671634729121, "grad_norm": 0.7064188718795776, "learning_rate": 5.449916207804644e-06, "loss": 0.0855, "step": 8660 }, { "epoch": 1.0255500354861604, "grad_norm": 0.48714372515678406, "learning_rate": 5.449197988987312e-06, "loss": 0.0816, "step": 8670 }, { "epoch": 1.0267329074994085, "grad_norm": 0.5230892896652222, "learning_rate": 5.448479770169979e-06, "loss": 0.08, "step": 8680 }, { "epoch": 1.0279157795126568, "grad_norm": 0.667404294013977, "learning_rate": 5.447761551352646e-06, "loss": 0.0843, "step": 8690 }, { "epoch": 1.0290986515259049, "grad_norm": 0.5393511652946472, "learning_rate": 5.447043332535313e-06, "loss": 0.1022, "step": 8700 }, { "epoch": 1.0302815235391531, "grad_norm": 0.4754451811313629, "learning_rate": 5.44632511371798e-06, "loss": 0.0862, "step": 8710 }, { "epoch": 1.0314643955524012, "grad_norm": 0.5088645815849304, "learning_rate": 5.4456068949006466e-06, "loss": 0.0886, "step": 8720 }, { "epoch": 1.0326472675656495, "grad_norm": 0.5827282667160034, "learning_rate": 5.4448886760833135e-06, "loss": 0.0825, "step": 8730 }, { "epoch": 1.0338301395788976, "grad_norm": 0.5488784909248352, "learning_rate": 5.44417045726598e-06, "loss": 0.0972, "step": 8740 }, { "epoch": 1.0350130115921456, "grad_norm": 0.4169002175331116, "learning_rate": 5.443452238448647e-06, "loss": 0.0844, "step": 8750 }, { "epoch": 1.036195883605394, "grad_norm": 0.5005811452865601, "learning_rate": 5.442734019631314e-06, "loss": 0.0884, "step": 8760 }, { "epoch": 1.037378755618642, "grad_norm": 0.5493341088294983, "learning_rate": 5.442015800813981e-06, "loss": 0.0857, "step": 8770 }, { "epoch": 1.0385616276318903, "grad_norm": 0.5497134327888489, "learning_rate": 5.441297581996648e-06, "loss": 0.0859, "step": 8780 }, { "epoch": 1.0397444996451384, "grad_norm": 0.4470288157463074, "learning_rate": 5.440579363179316e-06, "loss": 0.0846, "step": 8790 }, { "epoch": 1.0409273716583867, "grad_norm": 0.5622507929801941, "learning_rate": 5.439861144361982e-06, "loss": 0.0928, "step": 8800 }, { "epoch": 1.0421102436716347, "grad_norm": 0.5007113814353943, "learning_rate": 5.43914292554465e-06, "loss": 0.0972, "step": 8810 }, { "epoch": 1.0432931156848828, "grad_norm": 0.6156823635101318, "learning_rate": 5.438424706727316e-06, "loss": 0.0898, "step": 8820 }, { "epoch": 1.044475987698131, "grad_norm": 0.36993855237960815, "learning_rate": 5.4377064879099836e-06, "loss": 0.0853, "step": 8830 }, { "epoch": 1.0456588597113792, "grad_norm": 0.5635237693786621, "learning_rate": 5.4369882690926505e-06, "loss": 0.0885, "step": 8840 }, { "epoch": 1.0468417317246275, "grad_norm": 0.5274733304977417, "learning_rate": 5.436270050275317e-06, "loss": 0.082, "step": 8850 }, { "epoch": 1.0480246037378755, "grad_norm": 0.48065507411956787, "learning_rate": 5.435551831457984e-06, "loss": 0.0846, "step": 8860 }, { "epoch": 1.0492074757511238, "grad_norm": 0.43446817994117737, "learning_rate": 5.434833612640651e-06, "loss": 0.0912, "step": 8870 }, { "epoch": 1.050390347764372, "grad_norm": 0.5465797185897827, "learning_rate": 5.434115393823318e-06, "loss": 0.0919, "step": 8880 }, { "epoch": 1.05157321977762, "grad_norm": 0.5790572166442871, "learning_rate": 5.433397175005985e-06, "loss": 0.0819, "step": 8890 }, { "epoch": 1.0527560917908683, "grad_norm": 0.6667059659957886, "learning_rate": 5.432678956188653e-06, "loss": 0.0869, "step": 8900 }, { "epoch": 1.0539389638041163, "grad_norm": 0.6088250279426575, "learning_rate": 5.431960737371319e-06, "loss": 0.0933, "step": 8910 }, { "epoch": 1.0551218358173646, "grad_norm": 0.638674259185791, "learning_rate": 5.431242518553987e-06, "loss": 0.086, "step": 8920 }, { "epoch": 1.0563047078306127, "grad_norm": 0.5147150754928589, "learning_rate": 5.430524299736653e-06, "loss": 0.0842, "step": 8930 }, { "epoch": 1.057487579843861, "grad_norm": 0.49922576546669006, "learning_rate": 5.4298060809193206e-06, "loss": 0.0915, "step": 8940 }, { "epoch": 1.058670451857109, "grad_norm": 0.49659278988838196, "learning_rate": 5.429087862101987e-06, "loss": 0.0846, "step": 8950 }, { "epoch": 1.0598533238703571, "grad_norm": 0.4895166754722595, "learning_rate": 5.428369643284654e-06, "loss": 0.1026, "step": 8960 }, { "epoch": 1.0610361958836054, "grad_norm": 0.5322574973106384, "learning_rate": 5.427651424467321e-06, "loss": 0.0844, "step": 8970 }, { "epoch": 1.0622190678968535, "grad_norm": 0.6189786791801453, "learning_rate": 5.426933205649988e-06, "loss": 0.0874, "step": 8980 }, { "epoch": 1.0634019399101018, "grad_norm": 0.6155826449394226, "learning_rate": 5.426214986832655e-06, "loss": 0.0904, "step": 8990 }, { "epoch": 1.0645848119233499, "grad_norm": 0.47198542952537537, "learning_rate": 5.425496768015322e-06, "loss": 0.0886, "step": 9000 }, { "epoch": 1.0657676839365982, "grad_norm": 0.4745090901851654, "learning_rate": 5.424778549197989e-06, "loss": 0.0931, "step": 9010 }, { "epoch": 1.0669505559498462, "grad_norm": 0.48856914043426514, "learning_rate": 5.424060330380656e-06, "loss": 0.0884, "step": 9020 }, { "epoch": 1.0681334279630943, "grad_norm": 0.5496382117271423, "learning_rate": 5.423342111563323e-06, "loss": 0.0911, "step": 9030 }, { "epoch": 1.0693162999763426, "grad_norm": 0.4917236864566803, "learning_rate": 5.42262389274599e-06, "loss": 0.0898, "step": 9040 }, { "epoch": 1.0704991719895907, "grad_norm": 0.4246980845928192, "learning_rate": 5.421905673928657e-06, "loss": 0.0859, "step": 9050 }, { "epoch": 1.071682044002839, "grad_norm": 0.7078351974487305, "learning_rate": 5.4211874551113245e-06, "loss": 0.091, "step": 9060 }, { "epoch": 1.072864916016087, "grad_norm": 0.5810017585754395, "learning_rate": 5.4204692362939906e-06, "loss": 0.0938, "step": 9070 }, { "epoch": 1.0740477880293353, "grad_norm": 0.5764498114585876, "learning_rate": 5.419751017476658e-06, "loss": 0.0909, "step": 9080 }, { "epoch": 1.0752306600425834, "grad_norm": 0.6192542910575867, "learning_rate": 5.419032798659325e-06, "loss": 0.0882, "step": 9090 }, { "epoch": 1.0764135320558315, "grad_norm": 0.47284090518951416, "learning_rate": 5.418314579841992e-06, "loss": 0.0905, "step": 9100 }, { "epoch": 1.0775964040690797, "grad_norm": 0.475250780582428, "learning_rate": 5.417596361024659e-06, "loss": 0.0952, "step": 9110 }, { "epoch": 1.0787792760823278, "grad_norm": 0.5209870934486389, "learning_rate": 5.416878142207326e-06, "loss": 0.086, "step": 9120 }, { "epoch": 1.0799621480955761, "grad_norm": 0.5307260155677795, "learning_rate": 5.416159923389993e-06, "loss": 0.0856, "step": 9130 }, { "epoch": 1.0811450201088242, "grad_norm": 0.46354934573173523, "learning_rate": 5.41544170457266e-06, "loss": 0.0905, "step": 9140 }, { "epoch": 1.0823278921220725, "grad_norm": 0.6902420520782471, "learning_rate": 5.414723485755327e-06, "loss": 0.0854, "step": 9150 }, { "epoch": 1.0835107641353205, "grad_norm": 0.5147972106933594, "learning_rate": 5.414005266937994e-06, "loss": 0.0851, "step": 9160 }, { "epoch": 1.0846936361485686, "grad_norm": 0.469000905752182, "learning_rate": 5.4132870481206615e-06, "loss": 0.0929, "step": 9170 }, { "epoch": 1.085876508161817, "grad_norm": 0.561771035194397, "learning_rate": 5.4125688293033275e-06, "loss": 0.0877, "step": 9180 }, { "epoch": 1.087059380175065, "grad_norm": 0.3447943925857544, "learning_rate": 5.411850610485995e-06, "loss": 0.0789, "step": 9190 }, { "epoch": 1.0882422521883133, "grad_norm": 0.4885978698730469, "learning_rate": 5.411132391668661e-06, "loss": 0.0895, "step": 9200 }, { "epoch": 1.0894251242015613, "grad_norm": 0.48291468620300293, "learning_rate": 5.410414172851329e-06, "loss": 0.0955, "step": 9210 }, { "epoch": 1.0906079962148096, "grad_norm": 0.5172104835510254, "learning_rate": 5.409695954033995e-06, "loss": 0.0899, "step": 9220 }, { "epoch": 1.0917908682280577, "grad_norm": 0.45213428139686584, "learning_rate": 5.408977735216663e-06, "loss": 0.0876, "step": 9230 }, { "epoch": 1.0929737402413058, "grad_norm": 0.6115255355834961, "learning_rate": 5.40825951639933e-06, "loss": 0.0931, "step": 9240 }, { "epoch": 1.094156612254554, "grad_norm": 0.4367902874946594, "learning_rate": 5.407541297581997e-06, "loss": 0.0849, "step": 9250 }, { "epoch": 1.0953394842678021, "grad_norm": 0.4378194808959961, "learning_rate": 5.406823078764664e-06, "loss": 0.079, "step": 9260 }, { "epoch": 1.0965223562810504, "grad_norm": 0.4875485599040985, "learning_rate": 5.406104859947331e-06, "loss": 0.0796, "step": 9270 }, { "epoch": 1.0977052282942985, "grad_norm": 0.4118654429912567, "learning_rate": 5.405386641129998e-06, "loss": 0.0861, "step": 9280 }, { "epoch": 1.0988881003075468, "grad_norm": 0.6090260148048401, "learning_rate": 5.4046684223126645e-06, "loss": 0.0925, "step": 9290 }, { "epoch": 1.1000709723207949, "grad_norm": 0.4736242890357971, "learning_rate": 5.4039502034953315e-06, "loss": 0.0961, "step": 9300 }, { "epoch": 1.101253844334043, "grad_norm": 0.4856838285923004, "learning_rate": 5.403231984677998e-06, "loss": 0.0851, "step": 9310 }, { "epoch": 1.1024367163472912, "grad_norm": 0.62180095911026, "learning_rate": 5.402513765860665e-06, "loss": 0.0878, "step": 9320 }, { "epoch": 1.1036195883605393, "grad_norm": 0.4781513810157776, "learning_rate": 5.401795547043333e-06, "loss": 0.0827, "step": 9330 }, { "epoch": 1.1048024603737876, "grad_norm": 0.4106467664241791, "learning_rate": 5.401077328226e-06, "loss": 0.0831, "step": 9340 }, { "epoch": 1.1059853323870357, "grad_norm": 0.4056965410709381, "learning_rate": 5.400359109408667e-06, "loss": 0.0913, "step": 9350 }, { "epoch": 1.107168204400284, "grad_norm": 0.5375430583953857, "learning_rate": 5.399640890591334e-06, "loss": 0.0933, "step": 9360 }, { "epoch": 1.108351076413532, "grad_norm": 0.40786123275756836, "learning_rate": 5.398922671774001e-06, "loss": 0.083, "step": 9370 }, { "epoch": 1.1095339484267803, "grad_norm": 0.4962882697582245, "learning_rate": 5.398204452956668e-06, "loss": 0.0865, "step": 9380 }, { "epoch": 1.1107168204400284, "grad_norm": 0.5573738217353821, "learning_rate": 5.397486234139335e-06, "loss": 0.0979, "step": 9390 }, { "epoch": 1.1118996924532765, "grad_norm": 0.6149940490722656, "learning_rate": 5.3967680153220015e-06, "loss": 0.0924, "step": 9400 }, { "epoch": 1.1130825644665248, "grad_norm": 0.6028623580932617, "learning_rate": 5.3960497965046685e-06, "loss": 0.0904, "step": 9410 }, { "epoch": 1.1142654364797728, "grad_norm": 0.5852101445198059, "learning_rate": 5.395331577687335e-06, "loss": 0.0914, "step": 9420 }, { "epoch": 1.1154483084930211, "grad_norm": 0.4011322557926178, "learning_rate": 5.394613358870002e-06, "loss": 0.0927, "step": 9430 }, { "epoch": 1.1166311805062692, "grad_norm": 0.5081813335418701, "learning_rate": 5.39389514005267e-06, "loss": 0.082, "step": 9440 }, { "epoch": 1.1178140525195175, "grad_norm": 0.46505123376846313, "learning_rate": 5.393176921235336e-06, "loss": 0.0891, "step": 9450 }, { "epoch": 1.1189969245327656, "grad_norm": 0.5162445902824402, "learning_rate": 5.392458702418004e-06, "loss": 0.0912, "step": 9460 }, { "epoch": 1.1201797965460136, "grad_norm": 0.560789167881012, "learning_rate": 5.39174048360067e-06, "loss": 0.0824, "step": 9470 }, { "epoch": 1.121362668559262, "grad_norm": 0.424791544675827, "learning_rate": 5.391022264783338e-06, "loss": 0.087, "step": 9480 }, { "epoch": 1.12254554057251, "grad_norm": 0.8710706830024719, "learning_rate": 5.390304045966004e-06, "loss": 0.0893, "step": 9490 }, { "epoch": 1.1237284125857583, "grad_norm": 0.5220861434936523, "learning_rate": 5.389585827148672e-06, "loss": 0.0858, "step": 9500 }, { "epoch": 1.1249112845990064, "grad_norm": 0.49459177255630493, "learning_rate": 5.3888676083313385e-06, "loss": 0.0852, "step": 9510 }, { "epoch": 1.1260941566122546, "grad_norm": 0.5049262046813965, "learning_rate": 5.3881493895140055e-06, "loss": 0.082, "step": 9520 }, { "epoch": 1.1272770286255027, "grad_norm": 0.5695374608039856, "learning_rate": 5.387431170696672e-06, "loss": 0.0962, "step": 9530 }, { "epoch": 1.1284599006387508, "grad_norm": 0.4049016535282135, "learning_rate": 5.386712951879339e-06, "loss": 0.0909, "step": 9540 }, { "epoch": 1.129642772651999, "grad_norm": 0.5591046810150146, "learning_rate": 5.385994733062006e-06, "loss": 0.0898, "step": 9550 }, { "epoch": 1.1308256446652472, "grad_norm": 0.5406733751296997, "learning_rate": 5.385276514244673e-06, "loss": 0.096, "step": 9560 }, { "epoch": 1.1320085166784954, "grad_norm": 0.5433228015899658, "learning_rate": 5.38455829542734e-06, "loss": 0.0885, "step": 9570 }, { "epoch": 1.1331913886917435, "grad_norm": 0.47881484031677246, "learning_rate": 5.383840076610007e-06, "loss": 0.08, "step": 9580 }, { "epoch": 1.1343742607049918, "grad_norm": 0.48588788509368896, "learning_rate": 5.383121857792675e-06, "loss": 0.091, "step": 9590 }, { "epoch": 1.1355571327182399, "grad_norm": 0.4655308723449707, "learning_rate": 5.382403638975341e-06, "loss": 0.0884, "step": 9600 }, { "epoch": 1.1367400047314882, "grad_norm": 0.491664856672287, "learning_rate": 5.381685420158009e-06, "loss": 0.082, "step": 9610 }, { "epoch": 1.1379228767447362, "grad_norm": 0.4754998981952667, "learning_rate": 5.3809672013406755e-06, "loss": 0.0975, "step": 9620 }, { "epoch": 1.1391057487579843, "grad_norm": 0.6044057607650757, "learning_rate": 5.3802489825233425e-06, "loss": 0.0929, "step": 9630 }, { "epoch": 1.1402886207712326, "grad_norm": 0.5464172959327698, "learning_rate": 5.379530763706009e-06, "loss": 0.0937, "step": 9640 }, { "epoch": 1.1414714927844807, "grad_norm": 0.4828408360481262, "learning_rate": 5.378812544888676e-06, "loss": 0.0847, "step": 9650 }, { "epoch": 1.142654364797729, "grad_norm": 0.5283211469650269, "learning_rate": 5.378094326071343e-06, "loss": 0.0901, "step": 9660 }, { "epoch": 1.143837236810977, "grad_norm": 0.4238324761390686, "learning_rate": 5.37737610725401e-06, "loss": 0.0809, "step": 9670 }, { "epoch": 1.1450201088242253, "grad_norm": 1.0856746435165405, "learning_rate": 5.376657888436677e-06, "loss": 0.0888, "step": 9680 }, { "epoch": 1.1462029808374734, "grad_norm": 0.4695402979850769, "learning_rate": 5.375939669619344e-06, "loss": 0.0858, "step": 9690 }, { "epoch": 1.1473858528507215, "grad_norm": 0.47493284940719604, "learning_rate": 5.375221450802011e-06, "loss": 0.0845, "step": 9700 }, { "epoch": 1.1485687248639698, "grad_norm": 0.36611706018447876, "learning_rate": 5.374503231984679e-06, "loss": 0.0822, "step": 9710 }, { "epoch": 1.1497515968772178, "grad_norm": 0.5464303493499756, "learning_rate": 5.373785013167345e-06, "loss": 0.0834, "step": 9720 }, { "epoch": 1.1509344688904661, "grad_norm": 0.7112718820571899, "learning_rate": 5.3730667943500125e-06, "loss": 0.0872, "step": 9730 }, { "epoch": 1.1521173409037142, "grad_norm": 0.5661655068397522, "learning_rate": 5.372348575532679e-06, "loss": 0.0905, "step": 9740 }, { "epoch": 1.1533002129169625, "grad_norm": 0.4760710299015045, "learning_rate": 5.371630356715346e-06, "loss": 0.0917, "step": 9750 }, { "epoch": 1.1544830849302106, "grad_norm": 0.9727491736412048, "learning_rate": 5.3709121378980125e-06, "loss": 0.1018, "step": 9760 }, { "epoch": 1.1556659569434586, "grad_norm": 0.4797080159187317, "learning_rate": 5.37019391908068e-06, "loss": 0.0798, "step": 9770 }, { "epoch": 1.156848828956707, "grad_norm": 0.46472564339637756, "learning_rate": 5.369475700263347e-06, "loss": 0.0839, "step": 9780 }, { "epoch": 1.158031700969955, "grad_norm": 0.5081021189689636, "learning_rate": 5.368757481446014e-06, "loss": 0.0924, "step": 9790 }, { "epoch": 1.1592145729832033, "grad_norm": 0.4785832166671753, "learning_rate": 5.368039262628681e-06, "loss": 0.0936, "step": 9800 }, { "epoch": 1.1603974449964514, "grad_norm": 0.67185378074646, "learning_rate": 5.367321043811348e-06, "loss": 0.0878, "step": 9810 }, { "epoch": 1.1615803170096997, "grad_norm": 0.5562613010406494, "learning_rate": 5.366602824994015e-06, "loss": 0.087, "step": 9820 }, { "epoch": 1.1627631890229477, "grad_norm": 0.49450069665908813, "learning_rate": 5.365884606176682e-06, "loss": 0.0841, "step": 9830 }, { "epoch": 1.1639460610361958, "grad_norm": 0.6051644086837769, "learning_rate": 5.3651663873593495e-06, "loss": 0.096, "step": 9840 }, { "epoch": 1.165128933049444, "grad_norm": 0.5072309374809265, "learning_rate": 5.364448168542016e-06, "loss": 0.0992, "step": 9850 }, { "epoch": 1.1663118050626922, "grad_norm": 0.4744078814983368, "learning_rate": 5.363729949724683e-06, "loss": 0.0816, "step": 9860 }, { "epoch": 1.1674946770759405, "grad_norm": 0.5826630592346191, "learning_rate": 5.3630117309073495e-06, "loss": 0.0866, "step": 9870 }, { "epoch": 1.1686775490891885, "grad_norm": 0.4936651885509491, "learning_rate": 5.362293512090017e-06, "loss": 0.0918, "step": 9880 }, { "epoch": 1.1698604211024368, "grad_norm": 0.45047131180763245, "learning_rate": 5.361575293272684e-06, "loss": 0.0875, "step": 9890 }, { "epoch": 1.171043293115685, "grad_norm": 0.5508641004562378, "learning_rate": 5.360857074455351e-06, "loss": 0.0929, "step": 9900 }, { "epoch": 1.172226165128933, "grad_norm": 0.6044427156448364, "learning_rate": 5.360138855638018e-06, "loss": 0.077, "step": 9910 }, { "epoch": 1.1734090371421813, "grad_norm": 0.5424599647521973, "learning_rate": 5.359420636820685e-06, "loss": 0.0832, "step": 9920 }, { "epoch": 1.1745919091554293, "grad_norm": 0.6143479347229004, "learning_rate": 5.358702418003352e-06, "loss": 0.0771, "step": 9930 }, { "epoch": 1.1757747811686776, "grad_norm": 0.45856624841690063, "learning_rate": 5.357984199186019e-06, "loss": 0.0814, "step": 9940 }, { "epoch": 1.1769576531819257, "grad_norm": 0.43320778012275696, "learning_rate": 5.357265980368686e-06, "loss": 0.0864, "step": 9950 }, { "epoch": 1.178140525195174, "grad_norm": 0.4647403657436371, "learning_rate": 5.356547761551353e-06, "loss": 0.0856, "step": 9960 }, { "epoch": 1.179323397208422, "grad_norm": 0.4761173129081726, "learning_rate": 5.3558295427340195e-06, "loss": 0.0823, "step": 9970 }, { "epoch": 1.1805062692216701, "grad_norm": 0.561248779296875, "learning_rate": 5.355111323916687e-06, "loss": 0.0791, "step": 9980 }, { "epoch": 1.1816891412349184, "grad_norm": 0.505769670009613, "learning_rate": 5.354393105099353e-06, "loss": 0.0858, "step": 9990 }, { "epoch": 1.1828720132481665, "grad_norm": 0.44343236088752747, "learning_rate": 5.353674886282021e-06, "loss": 0.0896, "step": 10000 }, { "epoch": 1.1840548852614148, "grad_norm": 0.5441662669181824, "learning_rate": 5.352956667464687e-06, "loss": 0.0907, "step": 10010 }, { "epoch": 1.1852377572746629, "grad_norm": 0.5024474263191223, "learning_rate": 5.352238448647355e-06, "loss": 0.0846, "step": 10020 }, { "epoch": 1.1864206292879111, "grad_norm": 0.444717675447464, "learning_rate": 5.351520229830021e-06, "loss": 0.0862, "step": 10030 }, { "epoch": 1.1876035013011592, "grad_norm": 0.4260452091693878, "learning_rate": 5.350802011012689e-06, "loss": 0.0895, "step": 10040 }, { "epoch": 1.1887863733144073, "grad_norm": 0.48192930221557617, "learning_rate": 5.350083792195356e-06, "loss": 0.0864, "step": 10050 }, { "epoch": 1.1899692453276556, "grad_norm": 0.6497620940208435, "learning_rate": 5.349365573378023e-06, "loss": 0.0868, "step": 10060 }, { "epoch": 1.1911521173409036, "grad_norm": 0.49749717116355896, "learning_rate": 5.34864735456069e-06, "loss": 0.0885, "step": 10070 }, { "epoch": 1.192334989354152, "grad_norm": 0.47007885575294495, "learning_rate": 5.3479291357433565e-06, "loss": 0.0908, "step": 10080 }, { "epoch": 1.1935178613674, "grad_norm": 0.5130528211593628, "learning_rate": 5.347210916926024e-06, "loss": 0.0911, "step": 10090 }, { "epoch": 1.1947007333806483, "grad_norm": 0.4154418706893921, "learning_rate": 5.34649269810869e-06, "loss": 0.0909, "step": 10100 }, { "epoch": 1.1958836053938964, "grad_norm": 0.40893369913101196, "learning_rate": 5.345774479291358e-06, "loss": 0.0839, "step": 10110 }, { "epoch": 1.1970664774071444, "grad_norm": 0.49418845772743225, "learning_rate": 5.345056260474024e-06, "loss": 0.0734, "step": 10120 }, { "epoch": 1.1982493494203927, "grad_norm": 0.611636221408844, "learning_rate": 5.344338041656692e-06, "loss": 0.088, "step": 10130 }, { "epoch": 1.1994322214336408, "grad_norm": 0.4633472263813019, "learning_rate": 5.343619822839358e-06, "loss": 0.0922, "step": 10140 }, { "epoch": 1.200615093446889, "grad_norm": 0.4591403305530548, "learning_rate": 5.342901604022026e-06, "loss": 0.0936, "step": 10150 }, { "epoch": 1.2017979654601372, "grad_norm": 0.496496319770813, "learning_rate": 5.342183385204693e-06, "loss": 0.0933, "step": 10160 }, { "epoch": 1.2029808374733855, "grad_norm": 0.5422917008399963, "learning_rate": 5.34146516638736e-06, "loss": 0.0966, "step": 10170 }, { "epoch": 1.2041637094866335, "grad_norm": 0.39381733536720276, "learning_rate": 5.340746947570027e-06, "loss": 0.0809, "step": 10180 }, { "epoch": 1.2053465814998816, "grad_norm": 0.40148812532424927, "learning_rate": 5.3400287287526935e-06, "loss": 0.0916, "step": 10190 }, { "epoch": 1.20652945351313, "grad_norm": 0.3905751407146454, "learning_rate": 5.3393105099353604e-06, "loss": 0.092, "step": 10200 }, { "epoch": 1.207712325526378, "grad_norm": 0.40296775102615356, "learning_rate": 5.338592291118027e-06, "loss": 0.0859, "step": 10210 }, { "epoch": 1.2088951975396263, "grad_norm": 0.46552565693855286, "learning_rate": 5.337874072300694e-06, "loss": 0.0896, "step": 10220 }, { "epoch": 1.2100780695528743, "grad_norm": 0.46913525462150574, "learning_rate": 5.337155853483361e-06, "loss": 0.0867, "step": 10230 }, { "epoch": 1.2112609415661226, "grad_norm": 0.6057754755020142, "learning_rate": 5.336437634666028e-06, "loss": 0.0859, "step": 10240 }, { "epoch": 1.2124438135793707, "grad_norm": 0.4530210793018341, "learning_rate": 5.335719415848696e-06, "loss": 0.0859, "step": 10250 }, { "epoch": 1.2136266855926188, "grad_norm": 0.5956141948699951, "learning_rate": 5.335001197031362e-06, "loss": 0.0854, "step": 10260 }, { "epoch": 1.214809557605867, "grad_norm": 0.5126981735229492, "learning_rate": 5.33428297821403e-06, "loss": 0.0857, "step": 10270 }, { "epoch": 1.2159924296191151, "grad_norm": 0.5384624004364014, "learning_rate": 5.333564759396696e-06, "loss": 0.0885, "step": 10280 }, { "epoch": 1.2171753016323634, "grad_norm": 0.4633718729019165, "learning_rate": 5.332846540579364e-06, "loss": 0.0913, "step": 10290 }, { "epoch": 1.2183581736456115, "grad_norm": 0.5757312774658203, "learning_rate": 5.33212832176203e-06, "loss": 0.0874, "step": 10300 }, { "epoch": 1.2195410456588598, "grad_norm": 0.5390454530715942, "learning_rate": 5.3314101029446974e-06, "loss": 0.092, "step": 10310 }, { "epoch": 1.2207239176721079, "grad_norm": 0.44399166107177734, "learning_rate": 5.330691884127364e-06, "loss": 0.0888, "step": 10320 }, { "epoch": 1.221906789685356, "grad_norm": 0.4444388151168823, "learning_rate": 5.329973665310031e-06, "loss": 0.0845, "step": 10330 }, { "epoch": 1.2230896616986042, "grad_norm": 0.9916264414787292, "learning_rate": 5.329255446492698e-06, "loss": 0.0842, "step": 10340 }, { "epoch": 1.2242725337118523, "grad_norm": 0.7519932985305786, "learning_rate": 5.328537227675365e-06, "loss": 0.0861, "step": 10350 }, { "epoch": 1.2254554057251006, "grad_norm": 0.4394116997718811, "learning_rate": 5.327819008858033e-06, "loss": 0.0936, "step": 10360 }, { "epoch": 1.2266382777383487, "grad_norm": 0.5064253807067871, "learning_rate": 5.327100790040699e-06, "loss": 0.0923, "step": 10370 }, { "epoch": 1.227821149751597, "grad_norm": 0.4315269887447357, "learning_rate": 5.326382571223367e-06, "loss": 0.0842, "step": 10380 }, { "epoch": 1.229004021764845, "grad_norm": 0.45311784744262695, "learning_rate": 5.325664352406033e-06, "loss": 0.0853, "step": 10390 }, { "epoch": 1.230186893778093, "grad_norm": 0.48846128582954407, "learning_rate": 5.324946133588701e-06, "loss": 0.088, "step": 10400 }, { "epoch": 1.2313697657913414, "grad_norm": 0.5848224759101868, "learning_rate": 5.324227914771367e-06, "loss": 0.0846, "step": 10410 }, { "epoch": 1.2325526378045895, "grad_norm": 0.4260803461074829, "learning_rate": 5.3235096959540344e-06, "loss": 0.0895, "step": 10420 }, { "epoch": 1.2337355098178377, "grad_norm": 0.5965057611465454, "learning_rate": 5.322791477136701e-06, "loss": 0.0892, "step": 10430 }, { "epoch": 1.2349183818310858, "grad_norm": 0.5608574151992798, "learning_rate": 5.322073258319368e-06, "loss": 0.0879, "step": 10440 }, { "epoch": 1.2361012538443341, "grad_norm": 0.41147366166114807, "learning_rate": 5.321355039502035e-06, "loss": 0.0847, "step": 10450 }, { "epoch": 1.2372841258575822, "grad_norm": 0.44151705503463745, "learning_rate": 5.320636820684702e-06, "loss": 0.0833, "step": 10460 }, { "epoch": 1.2384669978708303, "grad_norm": 0.6828200221061707, "learning_rate": 5.319918601867369e-06, "loss": 0.0872, "step": 10470 }, { "epoch": 1.2396498698840785, "grad_norm": 0.6200929880142212, "learning_rate": 5.319200383050036e-06, "loss": 0.0874, "step": 10480 }, { "epoch": 1.2408327418973266, "grad_norm": 0.4809166193008423, "learning_rate": 5.318482164232703e-06, "loss": 0.0799, "step": 10490 }, { "epoch": 1.242015613910575, "grad_norm": 0.3847498893737793, "learning_rate": 5.31776394541537e-06, "loss": 0.0816, "step": 10500 }, { "epoch": 1.243198485923823, "grad_norm": 0.5115920305252075, "learning_rate": 5.317045726598037e-06, "loss": 0.0875, "step": 10510 }, { "epoch": 1.2443813579370713, "grad_norm": 0.5326212048530579, "learning_rate": 5.316327507780704e-06, "loss": 0.087, "step": 10520 }, { "epoch": 1.2455642299503193, "grad_norm": 0.5160808563232422, "learning_rate": 5.315609288963371e-06, "loss": 0.0961, "step": 10530 }, { "epoch": 1.2467471019635676, "grad_norm": 0.5371615886688232, "learning_rate": 5.314891070146038e-06, "loss": 0.0874, "step": 10540 }, { "epoch": 1.2479299739768157, "grad_norm": 0.43422722816467285, "learning_rate": 5.3141728513287044e-06, "loss": 0.0892, "step": 10550 }, { "epoch": 1.2491128459900638, "grad_norm": 0.5045953392982483, "learning_rate": 5.313454632511372e-06, "loss": 0.0845, "step": 10560 }, { "epoch": 1.250295718003312, "grad_norm": 0.4718835949897766, "learning_rate": 5.312736413694038e-06, "loss": 0.0843, "step": 10570 }, { "epoch": 1.250295718003312, "eval_accuracy": 0.6788435306251456, "eval_animal_abuse/accuracy": 0.9945437003027581, "eval_animal_abuse/f1": 0.7703081232492998, "eval_animal_abuse/fpr": 0.0031972537273247345, "eval_animal_abuse/precision": 0.7432432432432432, "eval_animal_abuse/recall": 0.7994186046511628, "eval_animal_abuse/threshold": 0.5, "eval_child_abuse/accuracy": 0.9959410453471736, "eval_child_abuse/f1": 0.6379821958456974, "eval_child_abuse/fpr": 0.0021076930797410513, "eval_child_abuse/precision": 0.6304985337243402, "eval_child_abuse/recall": 0.6456456456456456, "eval_child_abuse/threshold": 0.5, "eval_controversial_topics,politics/accuracy": 0.9710217253884287, "eval_controversial_topics,politics/f1": 0.4692260816575259, "eval_controversial_topics,politics/fpr": 0.011497803404722659, "eval_controversial_topics,politics/precision": 0.5347222222222222, "eval_controversial_topics,politics/recall": 0.4180238870792617, "eval_controversial_topics,politics/threshold": 0.5, "eval_discrimination,stereotype,injustice/accuracy": 0.9528728748710783, "eval_discrimination,stereotype,injustice/f1": 0.7173500947820014, "eval_discrimination,stereotype,injustice/fpr": 0.029782773701521614, "eval_discrimination,stereotype,injustice/precision": 0.6856761396147244, "eval_discrimination,stereotype,injustice/recall": 0.752092050209205, "eval_discrimination,stereotype,injustice/threshold": 0.5, "eval_drug_abuse,weapons,banned_substance/accuracy": 0.972735136573843, "eval_drug_abuse,weapons,banned_substance/f1": 0.7646805455850681, "eval_drug_abuse,weapons,banned_substance/fpr": 0.016147228881680974, "eval_drug_abuse,weapons,banned_substance/precision": 0.7440625873148924, "eval_drug_abuse,weapons,banned_substance/recall": 0.786473715298287, "eval_drug_abuse,weapons,banned_substance/threshold": 0.5, "eval_financial_crime,property_crime,theft/accuracy": 0.960408557074891, "eval_financial_crime,property_crime,theft/f1": 0.7930434782608695, "eval_financial_crime,property_crime,theft/fpr": 0.020068923575917254, "eval_financial_crime,property_crime,theft/precision": 0.807222517259692, "eval_financial_crime,property_crime,theft/recall": 0.7793539565886173, "eval_financial_crime,property_crime,theft/threshold": 0.5, "eval_flagged/accuracy": 0.8500016635060053, "eval_flagged/aucpr": 0.9044920720503336, "eval_flagged/f1": 0.8636329265157359, "eval_flagged/fpr": 0.15448374151445768, "eval_flagged/precision": 0.8739287463271302, "eval_flagged/recall": 0.8535768736360647, "eval_hate_speech,offensive_language/accuracy": 0.9504774262235086, "eval_hate_speech,offensive_language/f1": 0.6885657495553928, "eval_hate_speech,offensive_language/fpr": 0.016152019002375267, "eval_hate_speech,offensive_language/precision": 0.7882634730538922, "eval_hate_speech,offensive_language/recall": 0.611255572065379, "eval_hate_speech,offensive_language/threshold": 0.5, "eval_loss": 0.08604487776756287, "eval_macro_f1": 0.6182634011192977, "eval_macro_precision": 0.6730225220136384, "eval_macro_recall": 0.6076710356725188, "eval_micro_f1": 0.7471801925722146, "eval_micro_precision": 0.7714009808191165, "eval_micro_recall": 0.7244340914344648, "eval_misinformation_regarding_ethics,laws_and_safety/accuracy": 0.9878397711015737, "eval_misinformation_regarding_ethics,laws_and_safety/f1": 0.0, "eval_misinformation_regarding_ethics,laws_and_safety/fpr": 0.0, "eval_misinformation_regarding_ethics,laws_and_safety/precision": 0.0, "eval_misinformation_regarding_ethics,laws_and_safety/recall": 0.0, "eval_misinformation_regarding_ethics,laws_and_safety/threshold": 0.5, "eval_non_violent_unethical_behavior/accuracy": 0.8840369963735569, "eval_non_violent_unethical_behavior/f1": 0.6792435466801638, "eval_non_violent_unethical_behavior/fpr": 0.04995017438963617, "eval_non_violent_unethical_behavior/precision": 0.7541636865229386, "eval_non_violent_unethical_behavior/recall": 0.6178637200736649, "eval_non_violent_unethical_behavior/threshold": 0.5, "eval_privacy_violation/accuracy": 0.9802209135974981, "eval_privacy_violation/f1": 0.8052416052416053, "eval_privacy_violation/fpr": 0.011916427521523042, "eval_privacy_violation/precision": 0.783051927365403, "eval_privacy_violation/recall": 0.8287255563047876, "eval_privacy_violation/threshold": 0.5, "eval_runtime": 598.4367, "eval_samples_per_second": 100.452, "eval_self_harm/accuracy": 0.9966729879894867, "eval_self_harm/f1": 0.7340425531914894, "eval_self_harm/fpr": 0.0011054535709500181, "eval_self_harm/precision": 0.8070175438596491, "eval_self_harm/recall": 0.6731707317073171, "eval_self_harm/threshold": 0.5, "eval_sexually_explicit,adult_content/accuracy": 0.9828658881458562, "eval_sexually_explicit,adult_content/f1": 0.654130288784419, "eval_sexually_explicit,adult_content/fpr": 0.009494264237135, "eval_sexually_explicit,adult_content/precision": 0.6361854996734161, "eval_sexually_explicit,adult_content/recall": 0.6731167933655839, "eval_sexually_explicit,adult_content/threshold": 0.5, "eval_steps_per_second": 1.571, "eval_terrorism,organized_crime/accuracy": 0.9921981568353462, "eval_terrorism,organized_crime/f1": 0.08932038834951456, "eval_terrorism,organized_crime/fpr": 0.0001844616235976721, "eval_terrorism,organized_crime/precision": 0.6764705882352942, "eval_terrorism,organized_crime/recall": 0.04781704781704782, "eval_terrorism,organized_crime/threshold": 0.5, "eval_violence,aiding_and_abetting,incitement/accuracy": 0.9195362145257344, "eval_violence,aiding_and_abetting,incitement/f1": 0.8525529644871208, "eval_violence,aiding_and_abetting,incitement/fpr": 0.0641176737228592, "eval_violence,aiding_and_abetting,incitement/precision": 0.8317373461012312, "eval_violence,aiding_and_abetting,incitement/recall": 0.8744372186093047, "eval_violence,aiding_and_abetting,incitement/threshold": 0.5, "step": 10570 }, { "epoch": 1.2514785900165601, "grad_norm": 0.5344790816307068, "learning_rate": 5.312018194876706e-06, "loss": 0.0859, "step": 10580 }, { "epoch": 1.2526614620298084, "grad_norm": 0.5027660727500916, "learning_rate": 5.311299976059373e-06, "loss": 0.0805, "step": 10590 }, { "epoch": 1.2538443340430565, "grad_norm": 0.548696756362915, "learning_rate": 5.31058175724204e-06, "loss": 0.0859, "step": 10600 }, { "epoch": 1.2550272060563046, "grad_norm": 0.49048352241516113, "learning_rate": 5.309863538424707e-06, "loss": 0.088, "step": 10610 }, { "epoch": 1.2562100780695529, "grad_norm": 0.519137442111969, "learning_rate": 5.309145319607374e-06, "loss": 0.0892, "step": 10620 }, { "epoch": 1.2573929500828012, "grad_norm": 0.49376288056373596, "learning_rate": 5.3084271007900415e-06, "loss": 0.0884, "step": 10630 }, { "epoch": 1.2585758220960492, "grad_norm": 0.47846388816833496, "learning_rate": 5.307708881972708e-06, "loss": 0.091, "step": 10640 }, { "epoch": 1.2597586941092973, "grad_norm": 0.41205939650535583, "learning_rate": 5.306990663155375e-06, "loss": 0.0901, "step": 10650 }, { "epoch": 1.2609415661225456, "grad_norm": 0.4550676643848419, "learning_rate": 5.3062724443380414e-06, "loss": 0.0805, "step": 10660 }, { "epoch": 1.2621244381357937, "grad_norm": 0.44260671734809875, "learning_rate": 5.305554225520709e-06, "loss": 0.0856, "step": 10670 }, { "epoch": 1.2633073101490417, "grad_norm": 0.5240995287895203, "learning_rate": 5.304836006703375e-06, "loss": 0.0839, "step": 10680 }, { "epoch": 1.26449018216229, "grad_norm": 0.47073498368263245, "learning_rate": 5.304117787886043e-06, "loss": 0.086, "step": 10690 }, { "epoch": 1.2656730541755383, "grad_norm": 0.6053902506828308, "learning_rate": 5.30339956906871e-06, "loss": 0.0883, "step": 10700 }, { "epoch": 1.2668559261887864, "grad_norm": 0.43848299980163574, "learning_rate": 5.302681350251377e-06, "loss": 0.0851, "step": 10710 }, { "epoch": 1.2680387982020345, "grad_norm": 0.5319357514381409, "learning_rate": 5.301963131434044e-06, "loss": 0.0833, "step": 10720 }, { "epoch": 1.2692216702152828, "grad_norm": 0.6005284190177917, "learning_rate": 5.301244912616711e-06, "loss": 0.0863, "step": 10730 }, { "epoch": 1.2704045422285308, "grad_norm": 0.4534222483634949, "learning_rate": 5.300526693799378e-06, "loss": 0.0838, "step": 10740 }, { "epoch": 1.271587414241779, "grad_norm": 0.5835275650024414, "learning_rate": 5.299808474982045e-06, "loss": 0.0778, "step": 10750 }, { "epoch": 1.2727702862550272, "grad_norm": 0.4950994849205017, "learning_rate": 5.2990902561647115e-06, "loss": 0.0876, "step": 10760 }, { "epoch": 1.2739531582682755, "grad_norm": 0.5406625866889954, "learning_rate": 5.2983720373473784e-06, "loss": 0.0871, "step": 10770 }, { "epoch": 1.2751360302815236, "grad_norm": 0.4970603585243225, "learning_rate": 5.297653818530045e-06, "loss": 0.0832, "step": 10780 }, { "epoch": 1.2763189022947716, "grad_norm": 0.6738425493240356, "learning_rate": 5.296935599712712e-06, "loss": 0.0873, "step": 10790 }, { "epoch": 1.27750177430802, "grad_norm": 0.5150232911109924, "learning_rate": 5.296217380895379e-06, "loss": 0.0765, "step": 10800 }, { "epoch": 1.278684646321268, "grad_norm": 0.5198239088058472, "learning_rate": 5.295499162078047e-06, "loss": 0.0857, "step": 10810 }, { "epoch": 1.2798675183345163, "grad_norm": 0.6045175194740295, "learning_rate": 5.294780943260713e-06, "loss": 0.0877, "step": 10820 }, { "epoch": 1.2810503903477644, "grad_norm": 0.45646077394485474, "learning_rate": 5.294062724443381e-06, "loss": 0.0898, "step": 10830 }, { "epoch": 1.2822332623610126, "grad_norm": 0.6328542232513428, "learning_rate": 5.293344505626047e-06, "loss": 0.0832, "step": 10840 }, { "epoch": 1.2834161343742607, "grad_norm": 0.5109391808509827, "learning_rate": 5.292626286808715e-06, "loss": 0.0871, "step": 10850 }, { "epoch": 1.2845990063875088, "grad_norm": 0.39858904480934143, "learning_rate": 5.291908067991382e-06, "loss": 0.0961, "step": 10860 }, { "epoch": 1.285781878400757, "grad_norm": 0.5025159120559692, "learning_rate": 5.2911898491740485e-06, "loss": 0.0827, "step": 10870 }, { "epoch": 1.2869647504140052, "grad_norm": 0.6457040309906006, "learning_rate": 5.2904716303567154e-06, "loss": 0.0918, "step": 10880 }, { "epoch": 1.2881476224272534, "grad_norm": 0.4695596694946289, "learning_rate": 5.289753411539382e-06, "loss": 0.0859, "step": 10890 }, { "epoch": 1.2893304944405015, "grad_norm": 0.4673847556114197, "learning_rate": 5.28903519272205e-06, "loss": 0.082, "step": 10900 }, { "epoch": 1.2905133664537498, "grad_norm": 0.4973726272583008, "learning_rate": 5.288316973904716e-06, "loss": 0.0876, "step": 10910 }, { "epoch": 1.2916962384669979, "grad_norm": 0.42267149686813354, "learning_rate": 5.287598755087384e-06, "loss": 0.0919, "step": 10920 }, { "epoch": 1.292879110480246, "grad_norm": 0.4448626637458801, "learning_rate": 5.28688053627005e-06, "loss": 0.0794, "step": 10930 }, { "epoch": 1.2940619824934942, "grad_norm": 0.46809622645378113, "learning_rate": 5.286162317452718e-06, "loss": 0.0866, "step": 10940 }, { "epoch": 1.2952448545067423, "grad_norm": 0.46744176745414734, "learning_rate": 5.285444098635384e-06, "loss": 0.0844, "step": 10950 }, { "epoch": 1.2964277265199906, "grad_norm": 0.5362006425857544, "learning_rate": 5.284725879818052e-06, "loss": 0.0925, "step": 10960 }, { "epoch": 1.2976105985332387, "grad_norm": 0.3757772147655487, "learning_rate": 5.284007661000719e-06, "loss": 0.0856, "step": 10970 }, { "epoch": 1.298793470546487, "grad_norm": 0.4613181948661804, "learning_rate": 5.2832894421833855e-06, "loss": 0.0819, "step": 10980 }, { "epoch": 1.299976342559735, "grad_norm": 0.6300205588340759, "learning_rate": 5.2825712233660524e-06, "loss": 0.0858, "step": 10990 }, { "epoch": 1.3011592145729831, "grad_norm": 0.5926040410995483, "learning_rate": 5.281853004548719e-06, "loss": 0.0839, "step": 11000 }, { "epoch": 1.3023420865862314, "grad_norm": 0.5955724120140076, "learning_rate": 5.281134785731386e-06, "loss": 0.0961, "step": 11010 }, { "epoch": 1.3035249585994795, "grad_norm": 0.6476084589958191, "learning_rate": 5.280416566914053e-06, "loss": 0.0829, "step": 11020 }, { "epoch": 1.3047078306127278, "grad_norm": 0.4446814954280853, "learning_rate": 5.27969834809672e-06, "loss": 0.0842, "step": 11030 }, { "epoch": 1.3058907026259758, "grad_norm": 0.6516354084014893, "learning_rate": 5.278980129279387e-06, "loss": 0.0804, "step": 11040 }, { "epoch": 1.3070735746392241, "grad_norm": 0.5470334887504578, "learning_rate": 5.278261910462054e-06, "loss": 0.0839, "step": 11050 }, { "epoch": 1.3082564466524722, "grad_norm": 0.48053520917892456, "learning_rate": 5.277543691644721e-06, "loss": 0.0871, "step": 11060 }, { "epoch": 1.3094393186657203, "grad_norm": 0.5465850234031677, "learning_rate": 5.276825472827388e-06, "loss": 0.0819, "step": 11070 }, { "epoch": 1.3106221906789686, "grad_norm": 0.3892795741558075, "learning_rate": 5.276107254010056e-06, "loss": 0.0828, "step": 11080 }, { "epoch": 1.3118050626922166, "grad_norm": 0.6903217434883118, "learning_rate": 5.275389035192722e-06, "loss": 0.0856, "step": 11090 }, { "epoch": 1.312987934705465, "grad_norm": 0.4891171455383301, "learning_rate": 5.2746708163753894e-06, "loss": 0.0854, "step": 11100 }, { "epoch": 1.314170806718713, "grad_norm": 0.5934156179428101, "learning_rate": 5.273952597558056e-06, "loss": 0.0865, "step": 11110 }, { "epoch": 1.3153536787319613, "grad_norm": 0.5215594172477722, "learning_rate": 5.273234378740723e-06, "loss": 0.0849, "step": 11120 }, { "epoch": 1.3165365507452094, "grad_norm": 0.45571306347846985, "learning_rate": 5.27251615992339e-06, "loss": 0.087, "step": 11130 }, { "epoch": 1.3177194227584574, "grad_norm": 0.4514807462692261, "learning_rate": 5.271797941106057e-06, "loss": 0.0763, "step": 11140 }, { "epoch": 1.3189022947717057, "grad_norm": 0.41736313700675964, "learning_rate": 5.271079722288724e-06, "loss": 0.0857, "step": 11150 }, { "epoch": 1.3200851667849538, "grad_norm": 0.4487389922142029, "learning_rate": 5.270361503471391e-06, "loss": 0.0805, "step": 11160 }, { "epoch": 1.321268038798202, "grad_norm": 0.4362076222896576, "learning_rate": 5.269643284654059e-06, "loss": 0.0872, "step": 11170 }, { "epoch": 1.3224509108114502, "grad_norm": 0.7353972792625427, "learning_rate": 5.268925065836725e-06, "loss": 0.0852, "step": 11180 }, { "epoch": 1.3236337828246985, "grad_norm": 0.6633795499801636, "learning_rate": 5.268206847019393e-06, "loss": 0.0878, "step": 11190 }, { "epoch": 1.3248166548379465, "grad_norm": 0.497685968875885, "learning_rate": 5.267488628202059e-06, "loss": 0.088, "step": 11200 }, { "epoch": 1.3259995268511946, "grad_norm": 0.4273037910461426, "learning_rate": 5.2667704093847264e-06, "loss": 0.0884, "step": 11210 }, { "epoch": 1.327182398864443, "grad_norm": 0.6067870855331421, "learning_rate": 5.2660521905673925e-06, "loss": 0.0921, "step": 11220 }, { "epoch": 1.328365270877691, "grad_norm": 0.567546546459198, "learning_rate": 5.26533397175006e-06, "loss": 0.0907, "step": 11230 }, { "epoch": 1.3295481428909393, "grad_norm": 0.5567662715911865, "learning_rate": 5.264615752932727e-06, "loss": 0.0827, "step": 11240 }, { "epoch": 1.3307310149041873, "grad_norm": 0.49965691566467285, "learning_rate": 5.263897534115394e-06, "loss": 0.0847, "step": 11250 }, { "epoch": 1.3319138869174356, "grad_norm": 0.6275544166564941, "learning_rate": 5.263179315298061e-06, "loss": 0.0812, "step": 11260 }, { "epoch": 1.3330967589306837, "grad_norm": 0.5215426087379456, "learning_rate": 5.262461096480728e-06, "loss": 0.0859, "step": 11270 }, { "epoch": 1.3342796309439318, "grad_norm": 0.5060540437698364, "learning_rate": 5.261742877663395e-06, "loss": 0.0811, "step": 11280 }, { "epoch": 1.33546250295718, "grad_norm": 0.4246026575565338, "learning_rate": 5.261024658846062e-06, "loss": 0.0792, "step": 11290 }, { "epoch": 1.3366453749704281, "grad_norm": 0.6230758428573608, "learning_rate": 5.260306440028729e-06, "loss": 0.0907, "step": 11300 }, { "epoch": 1.3378282469836764, "grad_norm": 0.5304140448570251, "learning_rate": 5.259588221211396e-06, "loss": 0.0856, "step": 11310 }, { "epoch": 1.3390111189969245, "grad_norm": 0.4622262120246887, "learning_rate": 5.258870002394063e-06, "loss": 0.0876, "step": 11320 }, { "epoch": 1.3401939910101728, "grad_norm": 0.4321751594543457, "learning_rate": 5.2581517835767295e-06, "loss": 0.0896, "step": 11330 }, { "epoch": 1.3413768630234209, "grad_norm": 0.5394893288612366, "learning_rate": 5.257433564759396e-06, "loss": 0.085, "step": 11340 }, { "epoch": 1.342559735036669, "grad_norm": 0.424145370721817, "learning_rate": 5.256715345942064e-06, "loss": 0.0929, "step": 11350 }, { "epoch": 1.3437426070499172, "grad_norm": 0.5680561065673828, "learning_rate": 5.255997127124731e-06, "loss": 0.0877, "step": 11360 }, { "epoch": 1.3449254790631653, "grad_norm": 0.4428084194660187, "learning_rate": 5.255278908307398e-06, "loss": 0.0863, "step": 11370 }, { "epoch": 1.3461083510764136, "grad_norm": 0.4948295056819916, "learning_rate": 5.254560689490065e-06, "loss": 0.0913, "step": 11380 }, { "epoch": 1.3472912230896616, "grad_norm": 0.5776395201683044, "learning_rate": 5.253842470672732e-06, "loss": 0.077, "step": 11390 }, { "epoch": 1.34847409510291, "grad_norm": 0.5966656804084778, "learning_rate": 5.253124251855399e-06, "loss": 0.0911, "step": 11400 }, { "epoch": 1.349656967116158, "grad_norm": 0.5097173452377319, "learning_rate": 5.252406033038066e-06, "loss": 0.0896, "step": 11410 }, { "epoch": 1.350839839129406, "grad_norm": 0.5077007412910461, "learning_rate": 5.251687814220733e-06, "loss": 0.0844, "step": 11420 }, { "epoch": 1.3520227111426544, "grad_norm": 0.4040808379650116, "learning_rate": 5.2509695954033996e-06, "loss": 0.081, "step": 11430 }, { "epoch": 1.3532055831559027, "grad_norm": 0.5471307039260864, "learning_rate": 5.2502513765860665e-06, "loss": 0.0843, "step": 11440 }, { "epoch": 1.3543884551691507, "grad_norm": 0.9440762400627136, "learning_rate": 5.249533157768733e-06, "loss": 0.0935, "step": 11450 }, { "epoch": 1.3555713271823988, "grad_norm": 0.459966242313385, "learning_rate": 5.248814938951401e-06, "loss": 0.0845, "step": 11460 }, { "epoch": 1.356754199195647, "grad_norm": 0.4879663288593292, "learning_rate": 5.248096720134067e-06, "loss": 0.0811, "step": 11470 }, { "epoch": 1.3579370712088952, "grad_norm": 0.3976109027862549, "learning_rate": 5.247378501316735e-06, "loss": 0.0921, "step": 11480 }, { "epoch": 1.3591199432221432, "grad_norm": 0.5509858727455139, "learning_rate": 5.246660282499401e-06, "loss": 0.0837, "step": 11490 }, { "epoch": 1.3603028152353915, "grad_norm": 0.46328768134117126, "learning_rate": 5.245942063682069e-06, "loss": 0.0803, "step": 11500 }, { "epoch": 1.3614856872486398, "grad_norm": 0.49146798253059387, "learning_rate": 5.245223844864736e-06, "loss": 0.0765, "step": 11510 }, { "epoch": 1.362668559261888, "grad_norm": 0.42991721630096436, "learning_rate": 5.244505626047403e-06, "loss": 0.078, "step": 11520 }, { "epoch": 1.363851431275136, "grad_norm": 0.4684593677520752, "learning_rate": 5.24378740723007e-06, "loss": 0.0861, "step": 11530 }, { "epoch": 1.3650343032883843, "grad_norm": 0.4439728856086731, "learning_rate": 5.2430691884127366e-06, "loss": 0.0807, "step": 11540 }, { "epoch": 1.3662171753016323, "grad_norm": 0.4551149904727936, "learning_rate": 5.2423509695954035e-06, "loss": 0.0894, "step": 11550 }, { "epoch": 1.3674000473148804, "grad_norm": 0.5864676237106323, "learning_rate": 5.24163275077807e-06, "loss": 0.0921, "step": 11560 }, { "epoch": 1.3685829193281287, "grad_norm": 0.53575199842453, "learning_rate": 5.240914531960737e-06, "loss": 0.0893, "step": 11570 }, { "epoch": 1.369765791341377, "grad_norm": 0.4585458040237427, "learning_rate": 5.240196313143404e-06, "loss": 0.0844, "step": 11580 }, { "epoch": 1.370948663354625, "grad_norm": 0.5347262024879456, "learning_rate": 5.239478094326071e-06, "loss": 0.0886, "step": 11590 }, { "epoch": 1.3721315353678731, "grad_norm": 0.5540777444839478, "learning_rate": 5.238759875508738e-06, "loss": 0.0858, "step": 11600 }, { "epoch": 1.3733144073811214, "grad_norm": 0.4475511610507965, "learning_rate": 5.238041656691406e-06, "loss": 0.0743, "step": 11610 }, { "epoch": 1.3744972793943695, "grad_norm": 0.48247140645980835, "learning_rate": 5.237323437874073e-06, "loss": 0.0838, "step": 11620 }, { "epoch": 1.3756801514076176, "grad_norm": 0.5612298846244812, "learning_rate": 5.23660521905674e-06, "loss": 0.0894, "step": 11630 }, { "epoch": 1.3768630234208659, "grad_norm": 0.49812594056129456, "learning_rate": 5.235887000239407e-06, "loss": 0.0806, "step": 11640 }, { "epoch": 1.3780458954341142, "grad_norm": 0.4720252454280853, "learning_rate": 5.2351687814220736e-06, "loss": 0.0871, "step": 11650 }, { "epoch": 1.3792287674473622, "grad_norm": 0.5013387799263, "learning_rate": 5.2344505626047405e-06, "loss": 0.0959, "step": 11660 }, { "epoch": 1.3804116394606103, "grad_norm": 0.6254510879516602, "learning_rate": 5.233732343787407e-06, "loss": 0.0845, "step": 11670 }, { "epoch": 1.3815945114738586, "grad_norm": 0.49776336550712585, "learning_rate": 5.233014124970074e-06, "loss": 0.082, "step": 11680 }, { "epoch": 1.3827773834871067, "grad_norm": 0.41635167598724365, "learning_rate": 5.232295906152741e-06, "loss": 0.0929, "step": 11690 }, { "epoch": 1.3839602555003547, "grad_norm": 0.47905147075653076, "learning_rate": 5.231577687335408e-06, "loss": 0.087, "step": 11700 }, { "epoch": 1.385143127513603, "grad_norm": 0.43062323331832886, "learning_rate": 5.230859468518075e-06, "loss": 0.0872, "step": 11710 }, { "epoch": 1.3863259995268513, "grad_norm": 0.5399793386459351, "learning_rate": 5.230141249700742e-06, "loss": 0.0902, "step": 11720 }, { "epoch": 1.3875088715400994, "grad_norm": 0.4112526774406433, "learning_rate": 5.22942303088341e-06, "loss": 0.0847, "step": 11730 }, { "epoch": 1.3886917435533475, "grad_norm": 0.4147592782974243, "learning_rate": 5.228704812066076e-06, "loss": 0.0874, "step": 11740 }, { "epoch": 1.3898746155665958, "grad_norm": 0.5513148903846741, "learning_rate": 5.227986593248744e-06, "loss": 0.0864, "step": 11750 }, { "epoch": 1.3910574875798438, "grad_norm": 0.46406859159469604, "learning_rate": 5.22726837443141e-06, "loss": 0.0925, "step": 11760 }, { "epoch": 1.392240359593092, "grad_norm": 0.3427566587924957, "learning_rate": 5.2265501556140775e-06, "loss": 0.0822, "step": 11770 }, { "epoch": 1.3934232316063402, "grad_norm": 0.5526914000511169, "learning_rate": 5.2258319367967436e-06, "loss": 0.0919, "step": 11780 }, { "epoch": 1.3946061036195885, "grad_norm": 0.4518110454082489, "learning_rate": 5.225113717979411e-06, "loss": 0.1001, "step": 11790 }, { "epoch": 1.3957889756328365, "grad_norm": 0.42616426944732666, "learning_rate": 5.224395499162078e-06, "loss": 0.0855, "step": 11800 }, { "epoch": 1.3969718476460846, "grad_norm": 0.6300626397132874, "learning_rate": 5.223677280344745e-06, "loss": 0.0898, "step": 11810 }, { "epoch": 1.398154719659333, "grad_norm": 0.6843329668045044, "learning_rate": 5.222959061527412e-06, "loss": 0.0923, "step": 11820 }, { "epoch": 1.399337591672581, "grad_norm": 0.6879268288612366, "learning_rate": 5.222240842710079e-06, "loss": 0.0806, "step": 11830 }, { "epoch": 1.4005204636858293, "grad_norm": 0.4773183763027191, "learning_rate": 5.221522623892746e-06, "loss": 0.0874, "step": 11840 }, { "epoch": 1.4017033356990773, "grad_norm": 0.4848884046077728, "learning_rate": 5.220804405075413e-06, "loss": 0.0878, "step": 11850 }, { "epoch": 1.4028862077123256, "grad_norm": 0.5129106044769287, "learning_rate": 5.22008618625808e-06, "loss": 0.0865, "step": 11860 }, { "epoch": 1.4040690797255737, "grad_norm": 0.6020897030830383, "learning_rate": 5.219367967440747e-06, "loss": 0.0965, "step": 11870 }, { "epoch": 1.4052519517388218, "grad_norm": 0.5482921004295349, "learning_rate": 5.2186497486234145e-06, "loss": 0.0872, "step": 11880 }, { "epoch": 1.40643482375207, "grad_norm": 0.4468929171562195, "learning_rate": 5.217931529806081e-06, "loss": 0.0897, "step": 11890 }, { "epoch": 1.4076176957653181, "grad_norm": 0.40067562460899353, "learning_rate": 5.217213310988748e-06, "loss": 0.083, "step": 11900 }, { "epoch": 1.4088005677785664, "grad_norm": 0.5015385746955872, "learning_rate": 5.216495092171415e-06, "loss": 0.0849, "step": 11910 }, { "epoch": 1.4099834397918145, "grad_norm": 0.4591981768608093, "learning_rate": 5.215776873354082e-06, "loss": 0.0893, "step": 11920 }, { "epoch": 1.4111663118050628, "grad_norm": 0.42917659878730774, "learning_rate": 5.215058654536749e-06, "loss": 0.0862, "step": 11930 }, { "epoch": 1.4123491838183109, "grad_norm": 0.49667981266975403, "learning_rate": 5.214340435719416e-06, "loss": 0.0923, "step": 11940 }, { "epoch": 1.413532055831559, "grad_norm": 0.5945940613746643, "learning_rate": 5.213622216902083e-06, "loss": 0.0909, "step": 11950 }, { "epoch": 1.4147149278448072, "grad_norm": 0.46169814467430115, "learning_rate": 5.21290399808475e-06, "loss": 0.0792, "step": 11960 }, { "epoch": 1.4158977998580553, "grad_norm": 0.6175219416618347, "learning_rate": 5.212185779267417e-06, "loss": 0.0906, "step": 11970 }, { "epoch": 1.4170806718713036, "grad_norm": 0.4979042708873749, "learning_rate": 5.211467560450084e-06, "loss": 0.0913, "step": 11980 }, { "epoch": 1.4182635438845517, "grad_norm": 0.5631945133209229, "learning_rate": 5.210749341632751e-06, "loss": 0.0874, "step": 11990 }, { "epoch": 1.4194464158978, "grad_norm": 0.514599084854126, "learning_rate": 5.210031122815418e-06, "loss": 0.087, "step": 12000 }, { "epoch": 1.420629287911048, "grad_norm": 0.4502360224723816, "learning_rate": 5.2093129039980845e-06, "loss": 0.0818, "step": 12010 }, { "epoch": 1.421812159924296, "grad_norm": 0.4143269956111908, "learning_rate": 5.208594685180752e-06, "loss": 0.0853, "step": 12020 }, { "epoch": 1.4229950319375444, "grad_norm": 0.5013532638549805, "learning_rate": 5.207876466363418e-06, "loss": 0.0723, "step": 12030 }, { "epoch": 1.4241779039507925, "grad_norm": 0.4505346417427063, "learning_rate": 5.207158247546086e-06, "loss": 0.0892, "step": 12040 }, { "epoch": 1.4253607759640408, "grad_norm": 0.48967260122299194, "learning_rate": 5.206440028728752e-06, "loss": 0.09, "step": 12050 }, { "epoch": 1.4265436479772888, "grad_norm": 0.41970643401145935, "learning_rate": 5.20572180991142e-06, "loss": 0.0916, "step": 12060 }, { "epoch": 1.4277265199905371, "grad_norm": 0.4685288667678833, "learning_rate": 5.205003591094087e-06, "loss": 0.0833, "step": 12070 }, { "epoch": 1.4289093920037852, "grad_norm": 0.5216024518013, "learning_rate": 5.204285372276754e-06, "loss": 0.0798, "step": 12080 }, { "epoch": 1.4300922640170333, "grad_norm": 0.5520346760749817, "learning_rate": 5.203567153459421e-06, "loss": 0.0785, "step": 12090 }, { "epoch": 1.4312751360302816, "grad_norm": 0.5364843606948853, "learning_rate": 5.202848934642088e-06, "loss": 0.0843, "step": 12100 }, { "epoch": 1.4324580080435296, "grad_norm": 0.4216214120388031, "learning_rate": 5.2021307158247546e-06, "loss": 0.0786, "step": 12110 }, { "epoch": 1.433640880056778, "grad_norm": 0.49400538206100464, "learning_rate": 5.2014124970074215e-06, "loss": 0.0809, "step": 12120 }, { "epoch": 1.434823752070026, "grad_norm": 0.46163347363471985, "learning_rate": 5.200694278190089e-06, "loss": 0.0889, "step": 12130 }, { "epoch": 1.4360066240832743, "grad_norm": 0.4463256597518921, "learning_rate": 5.199976059372755e-06, "loss": 0.0919, "step": 12140 }, { "epoch": 1.4371894960965224, "grad_norm": 0.5229039192199707, "learning_rate": 5.199257840555423e-06, "loss": 0.0877, "step": 12150 }, { "epoch": 1.4383723681097704, "grad_norm": 0.3632770776748657, "learning_rate": 5.19853962173809e-06, "loss": 0.0824, "step": 12160 }, { "epoch": 1.4395552401230187, "grad_norm": 0.43362298607826233, "learning_rate": 5.197821402920757e-06, "loss": 0.0821, "step": 12170 }, { "epoch": 1.4407381121362668, "grad_norm": 0.6022460460662842, "learning_rate": 5.197103184103424e-06, "loss": 0.0856, "step": 12180 }, { "epoch": 1.441920984149515, "grad_norm": 0.46729424595832825, "learning_rate": 5.196384965286091e-06, "loss": 0.0883, "step": 12190 }, { "epoch": 1.4431038561627632, "grad_norm": 0.5155225992202759, "learning_rate": 5.195666746468758e-06, "loss": 0.0834, "step": 12200 }, { "epoch": 1.4442867281760114, "grad_norm": 0.4990426301956177, "learning_rate": 5.194948527651425e-06, "loss": 0.084, "step": 12210 }, { "epoch": 1.4454696001892595, "grad_norm": 0.5280868411064148, "learning_rate": 5.1942303088340916e-06, "loss": 0.0902, "step": 12220 }, { "epoch": 1.4466524722025076, "grad_norm": 0.5008606910705566, "learning_rate": 5.1935120900167585e-06, "loss": 0.0848, "step": 12230 }, { "epoch": 1.4478353442157559, "grad_norm": 0.47742927074432373, "learning_rate": 5.192793871199425e-06, "loss": 0.0809, "step": 12240 }, { "epoch": 1.449018216229004, "grad_norm": 0.4984307587146759, "learning_rate": 5.192075652382092e-06, "loss": 0.0855, "step": 12250 }, { "epoch": 1.4502010882422522, "grad_norm": 0.46932902932167053, "learning_rate": 5.191357433564759e-06, "loss": 0.0805, "step": 12260 }, { "epoch": 1.4513839602555003, "grad_norm": 0.5519961714744568, "learning_rate": 5.190639214747427e-06, "loss": 0.0814, "step": 12270 }, { "epoch": 1.4525668322687486, "grad_norm": 0.5535449385643005, "learning_rate": 5.189920995930093e-06, "loss": 0.0818, "step": 12280 }, { "epoch": 1.4537497042819967, "grad_norm": 0.5398666262626648, "learning_rate": 5.189202777112761e-06, "loss": 0.0883, "step": 12290 }, { "epoch": 1.4549325762952448, "grad_norm": 0.5301707983016968, "learning_rate": 5.188484558295427e-06, "loss": 0.0823, "step": 12300 }, { "epoch": 1.456115448308493, "grad_norm": 0.5100451111793518, "learning_rate": 5.187766339478095e-06, "loss": 0.0863, "step": 12310 }, { "epoch": 1.4572983203217411, "grad_norm": 0.4442741870880127, "learning_rate": 5.187048120660761e-06, "loss": 0.09, "step": 12320 }, { "epoch": 1.4584811923349894, "grad_norm": 0.4735712707042694, "learning_rate": 5.1863299018434286e-06, "loss": 0.0835, "step": 12330 }, { "epoch": 1.4596640643482375, "grad_norm": 0.44654735922813416, "learning_rate": 5.1856116830260955e-06, "loss": 0.0833, "step": 12340 }, { "epoch": 1.4608469363614858, "grad_norm": 0.4130767583847046, "learning_rate": 5.184893464208762e-06, "loss": 0.0811, "step": 12350 }, { "epoch": 1.4620298083747338, "grad_norm": 0.5469282269477844, "learning_rate": 5.184175245391429e-06, "loss": 0.0879, "step": 12360 }, { "epoch": 1.463212680387982, "grad_norm": 0.49571576714515686, "learning_rate": 5.183457026574096e-06, "loss": 0.0831, "step": 12370 }, { "epoch": 1.4643955524012302, "grad_norm": 0.4691697657108307, "learning_rate": 5.182738807756764e-06, "loss": 0.0824, "step": 12380 }, { "epoch": 1.4655784244144783, "grad_norm": 0.4959926903247833, "learning_rate": 5.18202058893943e-06, "loss": 0.0817, "step": 12390 }, { "epoch": 1.4667612964277266, "grad_norm": 0.46208786964416504, "learning_rate": 5.181302370122098e-06, "loss": 0.089, "step": 12400 }, { "epoch": 1.4679441684409746, "grad_norm": 0.4866841435432434, "learning_rate": 5.180584151304764e-06, "loss": 0.0904, "step": 12410 }, { "epoch": 1.469127040454223, "grad_norm": 0.5763052701950073, "learning_rate": 5.179865932487432e-06, "loss": 0.0869, "step": 12420 }, { "epoch": 1.470309912467471, "grad_norm": 0.41000130772590637, "learning_rate": 5.179147713670098e-06, "loss": 0.0746, "step": 12430 }, { "epoch": 1.471492784480719, "grad_norm": 0.5471966862678528, "learning_rate": 5.1784294948527656e-06, "loss": 0.0824, "step": 12440 }, { "epoch": 1.4726756564939674, "grad_norm": 0.4523822069168091, "learning_rate": 5.1777112760354325e-06, "loss": 0.093, "step": 12450 }, { "epoch": 1.4738585285072154, "grad_norm": 0.45505866408348083, "learning_rate": 5.176993057218099e-06, "loss": 0.0889, "step": 12460 }, { "epoch": 1.4750414005204637, "grad_norm": 0.5611518621444702, "learning_rate": 5.176274838400766e-06, "loss": 0.0854, "step": 12470 }, { "epoch": 1.4762242725337118, "grad_norm": 0.41183364391326904, "learning_rate": 5.175556619583433e-06, "loss": 0.0866, "step": 12480 }, { "epoch": 1.47740714454696, "grad_norm": 0.5093260407447815, "learning_rate": 5.1748384007661e-06, "loss": 0.0821, "step": 12490 }, { "epoch": 1.4785900165602082, "grad_norm": 0.49506834149360657, "learning_rate": 5.174120181948767e-06, "loss": 0.0838, "step": 12500 }, { "epoch": 1.4797728885734562, "grad_norm": 0.5007583498954773, "learning_rate": 5.173401963131434e-06, "loss": 0.0894, "step": 12510 }, { "epoch": 1.4809557605867045, "grad_norm": 0.4532022178173065, "learning_rate": 5.172683744314101e-06, "loss": 0.0898, "step": 12520 }, { "epoch": 1.4821386325999528, "grad_norm": 0.6050737500190735, "learning_rate": 5.171965525496768e-06, "loss": 0.0884, "step": 12530 }, { "epoch": 1.483321504613201, "grad_norm": 0.5277379155158997, "learning_rate": 5.171247306679436e-06, "loss": 0.079, "step": 12540 }, { "epoch": 1.484504376626449, "grad_norm": 0.4154554307460785, "learning_rate": 5.170529087862102e-06, "loss": 0.0756, "step": 12550 }, { "epoch": 1.4856872486396973, "grad_norm": 0.5407411456108093, "learning_rate": 5.1698108690447695e-06, "loss": 0.0974, "step": 12560 }, { "epoch": 1.4868701206529453, "grad_norm": 0.704882800579071, "learning_rate": 5.1690926502274355e-06, "loss": 0.0833, "step": 12570 }, { "epoch": 1.4880529926661934, "grad_norm": 0.42364880442619324, "learning_rate": 5.168374431410103e-06, "loss": 0.0873, "step": 12580 }, { "epoch": 1.4892358646794417, "grad_norm": 0.5154273509979248, "learning_rate": 5.167656212592769e-06, "loss": 0.0885, "step": 12590 }, { "epoch": 1.49041873669269, "grad_norm": 0.5277481079101562, "learning_rate": 5.166937993775437e-06, "loss": 0.0844, "step": 12600 }, { "epoch": 1.491601608705938, "grad_norm": 0.47936031222343445, "learning_rate": 5.166219774958104e-06, "loss": 0.0856, "step": 12610 }, { "epoch": 1.4927844807191861, "grad_norm": 0.6884175539016724, "learning_rate": 5.165501556140771e-06, "loss": 0.0829, "step": 12620 }, { "epoch": 1.4939673527324344, "grad_norm": 0.4915885925292969, "learning_rate": 5.164783337323438e-06, "loss": 0.0919, "step": 12630 }, { "epoch": 1.4951502247456825, "grad_norm": 0.5315811038017273, "learning_rate": 5.164065118506105e-06, "loss": 0.0989, "step": 12640 }, { "epoch": 1.4963330967589306, "grad_norm": 0.42621931433677673, "learning_rate": 5.163346899688773e-06, "loss": 0.0826, "step": 12650 }, { "epoch": 1.4975159687721789, "grad_norm": 0.5501675009727478, "learning_rate": 5.162628680871439e-06, "loss": 0.0813, "step": 12660 }, { "epoch": 1.4986988407854271, "grad_norm": 0.5109488368034363, "learning_rate": 5.1619104620541065e-06, "loss": 0.0913, "step": 12670 }, { "epoch": 1.4998817127986752, "grad_norm": 0.5206631422042847, "learning_rate": 5.1611922432367725e-06, "loss": 0.0864, "step": 12680 }, { "epoch": 1.5003548616039746, "eval_accuracy": 0.6876767475130585, "eval_animal_abuse/accuracy": 0.9948098612635992, "eval_animal_abuse/f1": 0.7742402315484804, "eval_animal_abuse/fpr": 0.0026755965402349095, "eval_animal_abuse/precision": 0.770893371757925, "eval_animal_abuse/recall": 0.7776162790697675, "eval_animal_abuse/threshold": 0.5, "eval_child_abuse/accuracy": 0.9957913298067006, "eval_child_abuse/f1": 0.6328011611030478, "eval_child_abuse/fpr": 0.0023084257540021036, "eval_child_abuse/precision": 0.6123595505617978, "eval_child_abuse/recall": 0.6546546546546547, "eval_child_abuse/threshold": 0.5, "eval_controversial_topics,politics/accuracy": 0.9730012975346841, "eval_controversial_topics,politics/f1": 0.4662939822426833, "eval_controversial_topics,politics/fpr": 0.008408841295991199, "eval_controversial_topics,politics/precision": 0.591326105087573, "eval_controversial_topics,politics/recall": 0.38490770901194354, "eval_controversial_topics,politics/threshold": 0.5, "eval_discrimination,stereotype,injustice/accuracy": 0.9563662374821174, "eval_discrimination,stereotype,injustice/f1": 0.7066323677441002, "eval_discrimination,stereotype,injustice/fpr": 0.018108215563667873, "eval_discrimination,stereotype,injustice/precision": 0.7591925018024513, "eval_discrimination,stereotype,injustice/recall": 0.6608786610878661, "eval_discrimination,stereotype,injustice/threshold": 0.5, "eval_drug_abuse,weapons,banned_substance/accuracy": 0.9719366536913199, "eval_drug_abuse,weapons,banned_substance/f1": 0.7661167336753085, "eval_drug_abuse,weapons,banned_substance/fpr": 0.01875616979269493, "eval_drug_abuse,weapons,banned_substance/precision": 0.7219754376796447, "eval_drug_abuse,weapons,banned_substance/recall": 0.8160070880094507, "eval_drug_abuse,weapons,banned_substance/threshold": 0.5, "eval_financial_crime,property_crime,theft/accuracy": 0.9598263299730512, "eval_financial_crime,property_crime,theft/f1": 0.8034827894865326, "eval_financial_crime,property_crime,theft/fpr": 0.027661574184987877, "eval_financial_crime,property_crime,theft/precision": 0.7668530599565082, "eval_financial_crime,property_crime,theft/recall": 0.843787386771492, "eval_financial_crime,property_crime,theft/threshold": 0.5, "eval_flagged/accuracy": 0.8499184882057425, "eval_flagged/aucpr": 0.9082912101937527, "eval_flagged/f1": 0.860513296227582, "eval_flagged/fpr": 0.1275175336608779, "eval_flagged/precision": 0.8911268372346217, "eval_flagged/recall": 0.8319332755373532, "eval_hate_speech,offensive_language/accuracy": 0.9511428286256114, "eval_hate_speech,offensive_language/f1": 0.6859831070244842, "eval_hate_speech,offensive_language/fpr": 0.013904622693221243, "eval_hate_speech,offensive_language/precision": 0.8082640463592845, "eval_hate_speech,offensive_language/recall": 0.5958395245170877, "eval_hate_speech,offensive_language/threshold": 0.5, "eval_loss": 0.08513917028903961, "eval_macro_f1": 0.6258539064132823, "eval_macro_precision": 0.6721740541163025, "eval_macro_recall": 0.6070276251618244, "eval_micro_f1": 0.7488132248636372, "eval_micro_precision": 0.7856710354122312, "eval_micro_recall": 0.7152586375517898, "eval_misinformation_regarding_ethics,laws_and_safety/accuracy": 0.9878397711015737, "eval_misinformation_regarding_ethics,laws_and_safety/f1": 0.0, "eval_misinformation_regarding_ethics,laws_and_safety/fpr": 0.0, "eval_misinformation_regarding_ethics,laws_and_safety/precision": 0.0, "eval_misinformation_regarding_ethics,laws_and_safety/recall": 0.0, "eval_misinformation_regarding_ethics,laws_and_safety/threshold": 0.5, "eval_non_violent_unethical_behavior/accuracy": 0.8868316864623881, "eval_non_violent_unethical_behavior/f1": 0.6819690524052171, "eval_non_violent_unethical_behavior/fpr": 0.04465620328849019, "eval_non_violent_unethical_behavior/precision": 0.7722604552673372, "eval_non_violent_unethical_behavior/recall": 0.6105809475975221, "eval_non_violent_unethical_behavior/threshold": 0.5, "eval_privacy_violation/accuracy": 0.9796220514356057, "eval_privacy_violation/f1": 0.8035284683239775, "eval_privacy_violation/fpr": 0.013368796808287231, "eval_privacy_violation/precision": 0.7662893851330682, "eval_privacy_violation/recall": 0.844571813890762, "eval_privacy_violation/threshold": 0.5, "eval_runtime": 598.4499, "eval_samples_per_second": 100.45, "eval_self_harm/accuracy": 0.9964900023289084, "eval_self_harm/f1": 0.7298335467349552, "eval_self_harm/fpr": 0.0014404395015409329, "eval_self_harm/precision": 0.7681940700808625, "eval_self_harm/recall": 0.6951219512195121, "eval_self_harm/threshold": 0.5, "eval_sexually_explicit,adult_content/accuracy": 0.9835312905479588, "eval_sexually_explicit,adult_content/f1": 0.6491849751948973, "eval_sexually_explicit,adult_content/fpr": 0.007823819182845538, "eval_sexually_explicit,adult_content/precision": 0.6661818181818182, "eval_sexually_explicit,adult_content/recall": 0.6330338631651693, "eval_sexually_explicit,adult_content/threshold": 0.5, "eval_steps_per_second": 1.571, "eval_terrorism,organized_crime/accuracy": 0.9921981568353462, "eval_terrorism,organized_crime/f1": 0.209106239460371, "eval_terrorism,organized_crime/fpr": 0.0008384619254439642, "eval_terrorism,organized_crime/precision": 0.5535714285714286, "eval_terrorism,organized_crime/recall": 0.1288981288981289, "eval_terrorism,organized_crime/threshold": 0.5, "eval_violence,aiding_and_abetting,incitement/accuracy": 0.9216987723325681, "eval_violence,aiding_and_abetting,incitement/f1": 0.8527820348418979, "eval_violence,aiding_and_abetting,incitement/fpr": 0.05321608267984213, "eval_violence,aiding_and_abetting,incitement/precision": 0.8530755271885364, "eval_violence,aiding_and_abetting,incitement/recall": 0.8524887443721861, "eval_violence,aiding_and_abetting,incitement/threshold": 0.5, "step": 12684 }, { "epoch": 1.5010645848119233, "grad_norm": 0.5451022982597351, "learning_rate": 5.16047402441944e-06, "loss": 0.0934, "step": 12690 }, { "epoch": 1.5022474568251716, "grad_norm": 0.44518181681632996, "learning_rate": 5.159755805602106e-06, "loss": 0.0927, "step": 12700 }, { "epoch": 1.5034303288384196, "grad_norm": 0.34876641631126404, "learning_rate": 5.159037586784774e-06, "loss": 0.0727, "step": 12710 }, { "epoch": 1.5046132008516677, "grad_norm": 0.3890266716480255, "learning_rate": 5.158319367967441e-06, "loss": 0.0872, "step": 12720 }, { "epoch": 1.505796072864916, "grad_norm": 0.42890921235084534, "learning_rate": 5.157601149150108e-06, "loss": 0.0829, "step": 12730 }, { "epoch": 1.5069789448781643, "grad_norm": 0.5202159881591797, "learning_rate": 5.156882930332775e-06, "loss": 0.09, "step": 12740 }, { "epoch": 1.5081618168914124, "grad_norm": 0.4782649278640747, "learning_rate": 5.156164711515442e-06, "loss": 0.0901, "step": 12750 }, { "epoch": 1.5093446889046604, "grad_norm": 0.42550885677337646, "learning_rate": 5.155446492698109e-06, "loss": 0.0821, "step": 12760 }, { "epoch": 1.5105275609179087, "grad_norm": 0.430534690618515, "learning_rate": 5.154728273880776e-06, "loss": 0.0772, "step": 12770 }, { "epoch": 1.5117104329311568, "grad_norm": 0.5041429996490479, "learning_rate": 5.154010055063443e-06, "loss": 0.0973, "step": 12780 }, { "epoch": 1.5128933049444049, "grad_norm": 0.4582228362560272, "learning_rate": 5.1532918362461095e-06, "loss": 0.0862, "step": 12790 }, { "epoch": 1.5140761769576532, "grad_norm": 0.5539277791976929, "learning_rate": 5.1525736174287765e-06, "loss": 0.0868, "step": 12800 }, { "epoch": 1.5152590489709015, "grad_norm": 0.6905962824821472, "learning_rate": 5.151855398611444e-06, "loss": 0.0976, "step": 12810 }, { "epoch": 1.5164419209841495, "grad_norm": 0.4199164807796478, "learning_rate": 5.15113717979411e-06, "loss": 0.0851, "step": 12820 }, { "epoch": 1.5176247929973976, "grad_norm": 0.45384782552719116, "learning_rate": 5.150418960976778e-06, "loss": 0.0847, "step": 12830 }, { "epoch": 1.518807665010646, "grad_norm": 0.5550031065940857, "learning_rate": 5.149700742159444e-06, "loss": 0.0862, "step": 12840 }, { "epoch": 1.519990537023894, "grad_norm": 0.6459217071533203, "learning_rate": 5.148982523342112e-06, "loss": 0.0844, "step": 12850 }, { "epoch": 1.521173409037142, "grad_norm": 0.5380029082298279, "learning_rate": 5.148264304524778e-06, "loss": 0.0882, "step": 12860 }, { "epoch": 1.5223562810503903, "grad_norm": 0.5286968350410461, "learning_rate": 5.147546085707446e-06, "loss": 0.0888, "step": 12870 }, { "epoch": 1.5235391530636386, "grad_norm": 0.3601886034011841, "learning_rate": 5.146827866890113e-06, "loss": 0.081, "step": 12880 }, { "epoch": 1.5247220250768867, "grad_norm": 0.4349885582923889, "learning_rate": 5.14610964807278e-06, "loss": 0.0889, "step": 12890 }, { "epoch": 1.5259048970901348, "grad_norm": 0.4728254973888397, "learning_rate": 5.1453914292554465e-06, "loss": 0.0758, "step": 12900 }, { "epoch": 1.527087769103383, "grad_norm": 0.47574329376220703, "learning_rate": 5.1446732104381135e-06, "loss": 0.0848, "step": 12910 }, { "epoch": 1.5282706411166311, "grad_norm": 0.3857431411743164, "learning_rate": 5.143954991620781e-06, "loss": 0.0878, "step": 12920 }, { "epoch": 1.5294535131298792, "grad_norm": 0.43219229578971863, "learning_rate": 5.143236772803447e-06, "loss": 0.0816, "step": 12930 }, { "epoch": 1.5306363851431275, "grad_norm": 0.39201819896698, "learning_rate": 5.142518553986115e-06, "loss": 0.093, "step": 12940 }, { "epoch": 1.5318192571563758, "grad_norm": 0.4828786551952362, "learning_rate": 5.141800335168781e-06, "loss": 0.0896, "step": 12950 }, { "epoch": 1.5330021291696239, "grad_norm": 0.5764405131340027, "learning_rate": 5.141082116351449e-06, "loss": 0.0809, "step": 12960 }, { "epoch": 1.534185001182872, "grad_norm": 0.5455014109611511, "learning_rate": 5.140363897534115e-06, "loss": 0.0836, "step": 12970 }, { "epoch": 1.5353678731961202, "grad_norm": 0.5095604658126831, "learning_rate": 5.139645678716783e-06, "loss": 0.0837, "step": 12980 }, { "epoch": 1.5365507452093683, "grad_norm": 0.5282939672470093, "learning_rate": 5.13892745989945e-06, "loss": 0.0808, "step": 12990 }, { "epoch": 1.5377336172226164, "grad_norm": 0.5118599534034729, "learning_rate": 5.138209241082117e-06, "loss": 0.0778, "step": 13000 }, { "epoch": 1.5389164892358647, "grad_norm": 0.417383074760437, "learning_rate": 5.1374910222647835e-06, "loss": 0.0887, "step": 13010 }, { "epoch": 1.540099361249113, "grad_norm": 0.41792649030685425, "learning_rate": 5.1367728034474505e-06, "loss": 0.0896, "step": 13020 }, { "epoch": 1.541282233262361, "grad_norm": 0.445429265499115, "learning_rate": 5.136054584630117e-06, "loss": 0.0858, "step": 13030 }, { "epoch": 1.542465105275609, "grad_norm": 0.4394303858280182, "learning_rate": 5.135336365812784e-06, "loss": 0.0859, "step": 13040 }, { "epoch": 1.5436479772888574, "grad_norm": 0.41196584701538086, "learning_rate": 5.134618146995451e-06, "loss": 0.0801, "step": 13050 }, { "epoch": 1.5448308493021055, "grad_norm": 0.4072735905647278, "learning_rate": 5.133899928178118e-06, "loss": 0.0925, "step": 13060 }, { "epoch": 1.5460137213153535, "grad_norm": 0.41002094745635986, "learning_rate": 5.133181709360785e-06, "loss": 0.0879, "step": 13070 }, { "epoch": 1.5471965933286018, "grad_norm": 0.4437108635902405, "learning_rate": 5.132463490543453e-06, "loss": 0.0835, "step": 13080 }, { "epoch": 1.5483794653418501, "grad_norm": 0.45409607887268066, "learning_rate": 5.131745271726119e-06, "loss": 0.0833, "step": 13090 }, { "epoch": 1.5495623373550982, "grad_norm": 0.4421013295650482, "learning_rate": 5.131027052908787e-06, "loss": 0.0793, "step": 13100 }, { "epoch": 1.5507452093683463, "grad_norm": 0.49561014771461487, "learning_rate": 5.130308834091453e-06, "loss": 0.0832, "step": 13110 }, { "epoch": 1.5519280813815945, "grad_norm": 0.5307127833366394, "learning_rate": 5.1295906152741205e-06, "loss": 0.0863, "step": 13120 }, { "epoch": 1.5531109533948428, "grad_norm": 0.5208670496940613, "learning_rate": 5.1288723964567875e-06, "loss": 0.0808, "step": 13130 }, { "epoch": 1.5542938254080907, "grad_norm": 0.5384665131568909, "learning_rate": 5.128154177639454e-06, "loss": 0.079, "step": 13140 }, { "epoch": 1.555476697421339, "grad_norm": 0.4188074767589569, "learning_rate": 5.127435958822121e-06, "loss": 0.0864, "step": 13150 }, { "epoch": 1.5566595694345873, "grad_norm": 0.4938463866710663, "learning_rate": 5.126717740004788e-06, "loss": 0.0809, "step": 13160 }, { "epoch": 1.5578424414478353, "grad_norm": 0.4209802448749542, "learning_rate": 5.125999521187455e-06, "loss": 0.0841, "step": 13170 }, { "epoch": 1.5590253134610834, "grad_norm": 0.4319836497306824, "learning_rate": 5.125281302370122e-06, "loss": 0.0913, "step": 13180 }, { "epoch": 1.5602081854743317, "grad_norm": 0.5063772797584534, "learning_rate": 5.12456308355279e-06, "loss": 0.0898, "step": 13190 }, { "epoch": 1.56139105748758, "grad_norm": 0.6030347347259521, "learning_rate": 5.123844864735456e-06, "loss": 0.0871, "step": 13200 }, { "epoch": 1.5625739295008279, "grad_norm": 0.47259074449539185, "learning_rate": 5.123126645918124e-06, "loss": 0.0845, "step": 13210 }, { "epoch": 1.5637568015140761, "grad_norm": 0.48763424158096313, "learning_rate": 5.12240842710079e-06, "loss": 0.0814, "step": 13220 }, { "epoch": 1.5649396735273244, "grad_norm": 0.49168914556503296, "learning_rate": 5.1216902082834575e-06, "loss": 0.0867, "step": 13230 }, { "epoch": 1.5661225455405725, "grad_norm": 0.5321483016014099, "learning_rate": 5.120971989466124e-06, "loss": 0.0853, "step": 13240 }, { "epoch": 1.5673054175538206, "grad_norm": 0.4776305556297302, "learning_rate": 5.120253770648791e-06, "loss": 0.0842, "step": 13250 }, { "epoch": 1.5684882895670689, "grad_norm": 0.4125671684741974, "learning_rate": 5.119535551831458e-06, "loss": 0.0781, "step": 13260 }, { "epoch": 1.5696711615803172, "grad_norm": 0.5492151975631714, "learning_rate": 5.118817333014125e-06, "loss": 0.0877, "step": 13270 }, { "epoch": 1.570854033593565, "grad_norm": 0.5131093859672546, "learning_rate": 5.118099114196792e-06, "loss": 0.0865, "step": 13280 }, { "epoch": 1.5720369056068133, "grad_norm": 0.42910289764404297, "learning_rate": 5.117380895379459e-06, "loss": 0.0837, "step": 13290 }, { "epoch": 1.5732197776200616, "grad_norm": 0.4754706919193268, "learning_rate": 5.116662676562126e-06, "loss": 0.0923, "step": 13300 }, { "epoch": 1.5744026496333097, "grad_norm": 0.4037545323371887, "learning_rate": 5.115944457744793e-06, "loss": 0.077, "step": 13310 }, { "epoch": 1.5755855216465577, "grad_norm": 0.42603200674057007, "learning_rate": 5.11522623892746e-06, "loss": 0.0889, "step": 13320 }, { "epoch": 1.576768393659806, "grad_norm": 0.4421207010746002, "learning_rate": 5.114508020110127e-06, "loss": 0.0809, "step": 13330 }, { "epoch": 1.5779512656730543, "grad_norm": 0.3859231770038605, "learning_rate": 5.113789801292794e-06, "loss": 0.0788, "step": 13340 }, { "epoch": 1.5791341376863024, "grad_norm": 0.5129840970039368, "learning_rate": 5.113071582475461e-06, "loss": 0.0942, "step": 13350 }, { "epoch": 1.5803170096995505, "grad_norm": 0.4262208938598633, "learning_rate": 5.1123533636581275e-06, "loss": 0.0871, "step": 13360 }, { "epoch": 1.5814998817127988, "grad_norm": 0.5309639573097229, "learning_rate": 5.111635144840795e-06, "loss": 0.0868, "step": 13370 }, { "epoch": 1.5826827537260468, "grad_norm": 0.4574877619743347, "learning_rate": 5.110916926023461e-06, "loss": 0.0822, "step": 13380 }, { "epoch": 1.583865625739295, "grad_norm": 0.4201333224773407, "learning_rate": 5.110198707206129e-06, "loss": 0.0916, "step": 13390 }, { "epoch": 1.5850484977525432, "grad_norm": 0.39514097571372986, "learning_rate": 5.109480488388796e-06, "loss": 0.0876, "step": 13400 }, { "epoch": 1.5862313697657915, "grad_norm": 0.5352892875671387, "learning_rate": 5.108762269571463e-06, "loss": 0.0877, "step": 13410 }, { "epoch": 1.5874142417790396, "grad_norm": 0.6078268885612488, "learning_rate": 5.10804405075413e-06, "loss": 0.0928, "step": 13420 }, { "epoch": 1.5885971137922876, "grad_norm": 0.515243411064148, "learning_rate": 5.107325831936797e-06, "loss": 0.0819, "step": 13430 }, { "epoch": 1.589779985805536, "grad_norm": 0.469497412443161, "learning_rate": 5.106607613119464e-06, "loss": 0.0908, "step": 13440 }, { "epoch": 1.590962857818784, "grad_norm": 1.1221482753753662, "learning_rate": 5.105889394302131e-06, "loss": 0.0804, "step": 13450 }, { "epoch": 1.592145729832032, "grad_norm": 0.49614229798316956, "learning_rate": 5.1051711754847985e-06, "loss": 0.0874, "step": 13460 }, { "epoch": 1.5933286018452804, "grad_norm": 0.5423674583435059, "learning_rate": 5.1044529566674645e-06, "loss": 0.0884, "step": 13470 }, { "epoch": 1.5945114738585286, "grad_norm": 0.38937488198280334, "learning_rate": 5.103734737850132e-06, "loss": 0.0826, "step": 13480 }, { "epoch": 1.5956943458717767, "grad_norm": 0.3987462520599365, "learning_rate": 5.103016519032798e-06, "loss": 0.0931, "step": 13490 }, { "epoch": 1.5968772178850248, "grad_norm": 0.46256017684936523, "learning_rate": 5.102298300215466e-06, "loss": 0.0854, "step": 13500 }, { "epoch": 1.598060089898273, "grad_norm": 0.44698619842529297, "learning_rate": 5.101580081398132e-06, "loss": 0.0798, "step": 13510 }, { "epoch": 1.5992429619115212, "grad_norm": 0.5572316646575928, "learning_rate": 5.1008618625808e-06, "loss": 0.0879, "step": 13520 }, { "epoch": 1.6004258339247692, "grad_norm": 0.49993929266929626, "learning_rate": 5.100143643763467e-06, "loss": 0.0879, "step": 13530 }, { "epoch": 1.6016087059380175, "grad_norm": 0.5378242135047913, "learning_rate": 5.099425424946134e-06, "loss": 0.0924, "step": 13540 }, { "epoch": 1.6027915779512658, "grad_norm": 0.36781078577041626, "learning_rate": 5.098707206128801e-06, "loss": 0.0786, "step": 13550 }, { "epoch": 1.6039744499645139, "grad_norm": 0.4819577932357788, "learning_rate": 5.097988987311468e-06, "loss": 0.0814, "step": 13560 }, { "epoch": 1.605157321977762, "grad_norm": 0.47067803144454956, "learning_rate": 5.097270768494135e-06, "loss": 0.0861, "step": 13570 }, { "epoch": 1.6063401939910102, "grad_norm": 0.4379580616950989, "learning_rate": 5.0965525496768015e-06, "loss": 0.0846, "step": 13580 }, { "epoch": 1.6075230660042583, "grad_norm": 0.41308915615081787, "learning_rate": 5.0958343308594685e-06, "loss": 0.0735, "step": 13590 }, { "epoch": 1.6087059380175064, "grad_norm": 0.4910452663898468, "learning_rate": 5.095116112042135e-06, "loss": 0.0885, "step": 13600 }, { "epoch": 1.6098888100307547, "grad_norm": 0.5785920023918152, "learning_rate": 5.094397893224802e-06, "loss": 0.0837, "step": 13610 }, { "epoch": 1.611071682044003, "grad_norm": 0.49226444959640503, "learning_rate": 5.093679674407469e-06, "loss": 0.086, "step": 13620 }, { "epoch": 1.612254554057251, "grad_norm": 0.4559890329837799, "learning_rate": 5.092961455590136e-06, "loss": 0.0752, "step": 13630 }, { "epoch": 1.6134374260704991, "grad_norm": 0.4879823923110962, "learning_rate": 5.092243236772804e-06, "loss": 0.0853, "step": 13640 }, { "epoch": 1.6146202980837474, "grad_norm": 0.5032764673233032, "learning_rate": 5.091525017955471e-06, "loss": 0.0792, "step": 13650 }, { "epoch": 1.6158031700969955, "grad_norm": 0.507117509841919, "learning_rate": 5.090806799138138e-06, "loss": 0.0834, "step": 13660 }, { "epoch": 1.6169860421102435, "grad_norm": 0.4368346929550171, "learning_rate": 5.090088580320805e-06, "loss": 0.0838, "step": 13670 }, { "epoch": 1.6181689141234918, "grad_norm": 0.5360224843025208, "learning_rate": 5.089370361503472e-06, "loss": 0.0856, "step": 13680 }, { "epoch": 1.6193517861367401, "grad_norm": 0.464647501707077, "learning_rate": 5.0886521426861385e-06, "loss": 0.0826, "step": 13690 }, { "epoch": 1.6205346581499882, "grad_norm": 0.5082803964614868, "learning_rate": 5.0879339238688054e-06, "loss": 0.0882, "step": 13700 }, { "epoch": 1.6217175301632363, "grad_norm": 0.523277759552002, "learning_rate": 5.087215705051472e-06, "loss": 0.0855, "step": 13710 }, { "epoch": 1.6229004021764846, "grad_norm": 0.5262504816055298, "learning_rate": 5.086497486234139e-06, "loss": 0.0858, "step": 13720 }, { "epoch": 1.6240832741897326, "grad_norm": 0.38642701506614685, "learning_rate": 5.085779267416807e-06, "loss": 0.0914, "step": 13730 }, { "epoch": 1.6252661462029807, "grad_norm": 0.44468575716018677, "learning_rate": 5.085061048599473e-06, "loss": 0.0818, "step": 13740 }, { "epoch": 1.626449018216229, "grad_norm": 0.4140653908252716, "learning_rate": 5.084342829782141e-06, "loss": 0.0888, "step": 13750 }, { "epoch": 1.6276318902294773, "grad_norm": 0.3989298641681671, "learning_rate": 5.083624610964807e-06, "loss": 0.0774, "step": 13760 }, { "epoch": 1.6288147622427254, "grad_norm": 0.4525853395462036, "learning_rate": 5.082906392147475e-06, "loss": 0.0822, "step": 13770 }, { "epoch": 1.6299976342559734, "grad_norm": 0.5370970368385315, "learning_rate": 5.082188173330141e-06, "loss": 0.0916, "step": 13780 }, { "epoch": 1.6311805062692217, "grad_norm": 0.5342410206794739, "learning_rate": 5.081469954512809e-06, "loss": 0.0884, "step": 13790 }, { "epoch": 1.6323633782824698, "grad_norm": 0.4691530764102936, "learning_rate": 5.0807517356954755e-06, "loss": 0.0868, "step": 13800 }, { "epoch": 1.6335462502957179, "grad_norm": 0.45714765787124634, "learning_rate": 5.0800335168781424e-06, "loss": 0.0843, "step": 13810 }, { "epoch": 1.6347291223089662, "grad_norm": 0.4359150826931, "learning_rate": 5.079315298060809e-06, "loss": 0.0894, "step": 13820 }, { "epoch": 1.6359119943222145, "grad_norm": 0.5161109566688538, "learning_rate": 5.078597079243476e-06, "loss": 0.0869, "step": 13830 }, { "epoch": 1.6370948663354625, "grad_norm": 0.5185908079147339, "learning_rate": 5.077878860426143e-06, "loss": 0.0843, "step": 13840 }, { "epoch": 1.6382777383487106, "grad_norm": 0.46719321608543396, "learning_rate": 5.07716064160881e-06, "loss": 0.0868, "step": 13850 }, { "epoch": 1.639460610361959, "grad_norm": 0.3824554681777954, "learning_rate": 5.076442422791477e-06, "loss": 0.0828, "step": 13860 }, { "epoch": 1.640643482375207, "grad_norm": 0.4530118405818939, "learning_rate": 5.075724203974144e-06, "loss": 0.08, "step": 13870 }, { "epoch": 1.641826354388455, "grad_norm": 0.4759071171283722, "learning_rate": 5.075005985156811e-06, "loss": 0.0812, "step": 13880 }, { "epoch": 1.6430092264017033, "grad_norm": 0.49863654375076294, "learning_rate": 5.074287766339478e-06, "loss": 0.0888, "step": 13890 }, { "epoch": 1.6441920984149516, "grad_norm": 0.47390881180763245, "learning_rate": 5.073569547522146e-06, "loss": 0.0893, "step": 13900 }, { "epoch": 1.6453749704281997, "grad_norm": 0.4850195050239563, "learning_rate": 5.0728513287048125e-06, "loss": 0.0843, "step": 13910 }, { "epoch": 1.6465578424414478, "grad_norm": 0.4923732578754425, "learning_rate": 5.0721331098874794e-06, "loss": 0.0862, "step": 13920 }, { "epoch": 1.647740714454696, "grad_norm": 0.5443207621574402, "learning_rate": 5.071414891070146e-06, "loss": 0.086, "step": 13930 }, { "epoch": 1.6489235864679441, "grad_norm": 0.6342526078224182, "learning_rate": 5.070696672252813e-06, "loss": 0.0852, "step": 13940 }, { "epoch": 1.6501064584811922, "grad_norm": 0.439960241317749, "learning_rate": 5.06997845343548e-06, "loss": 0.0901, "step": 13950 }, { "epoch": 1.6512893304944405, "grad_norm": 0.427645206451416, "learning_rate": 5.069260234618147e-06, "loss": 0.0833, "step": 13960 }, { "epoch": 1.6524722025076888, "grad_norm": 0.4918398857116699, "learning_rate": 5.068542015800814e-06, "loss": 0.085, "step": 13970 }, { "epoch": 1.6536550745209369, "grad_norm": 0.47024616599082947, "learning_rate": 5.067823796983481e-06, "loss": 0.0903, "step": 13980 }, { "epoch": 1.654837946534185, "grad_norm": 0.5475759506225586, "learning_rate": 5.067105578166148e-06, "loss": 0.0761, "step": 13990 }, { "epoch": 1.6560208185474332, "grad_norm": 0.4551384747028351, "learning_rate": 5.066387359348816e-06, "loss": 0.0846, "step": 14000 }, { "epoch": 1.6572036905606813, "grad_norm": 0.49339112639427185, "learning_rate": 5.065669140531482e-06, "loss": 0.0811, "step": 14010 }, { "epoch": 1.6583865625739294, "grad_norm": 0.40458226203918457, "learning_rate": 5.0649509217141495e-06, "loss": 0.0773, "step": 14020 }, { "epoch": 1.6595694345871776, "grad_norm": 0.477711945772171, "learning_rate": 5.064232702896816e-06, "loss": 0.0954, "step": 14030 }, { "epoch": 1.660752306600426, "grad_norm": 0.41989997029304504, "learning_rate": 5.063514484079483e-06, "loss": 0.0793, "step": 14040 }, { "epoch": 1.661935178613674, "grad_norm": 0.4381128251552582, "learning_rate": 5.0627962652621494e-06, "loss": 0.076, "step": 14050 }, { "epoch": 1.663118050626922, "grad_norm": 0.5876767635345459, "learning_rate": 5.062078046444817e-06, "loss": 0.0783, "step": 14060 }, { "epoch": 1.6643009226401704, "grad_norm": 0.5253058075904846, "learning_rate": 5.061359827627484e-06, "loss": 0.0897, "step": 14070 }, { "epoch": 1.6654837946534184, "grad_norm": 0.4800052344799042, "learning_rate": 5.060641608810151e-06, "loss": 0.0857, "step": 14080 }, { "epoch": 1.6666666666666665, "grad_norm": 0.5640294551849365, "learning_rate": 5.059923389992818e-06, "loss": 0.0869, "step": 14090 }, { "epoch": 1.6678495386799148, "grad_norm": 0.39901667833328247, "learning_rate": 5.059205171175485e-06, "loss": 0.0854, "step": 14100 }, { "epoch": 1.669032410693163, "grad_norm": 0.48886993527412415, "learning_rate": 5.058486952358152e-06, "loss": 0.0849, "step": 14110 }, { "epoch": 1.6702152827064112, "grad_norm": 0.5667365789413452, "learning_rate": 5.057768733540819e-06, "loss": 0.0726, "step": 14120 }, { "epoch": 1.6713981547196592, "grad_norm": 0.5897577404975891, "learning_rate": 5.057050514723486e-06, "loss": 0.0802, "step": 14130 }, { "epoch": 1.6725810267329075, "grad_norm": 0.5659159421920776, "learning_rate": 5.056332295906153e-06, "loss": 0.0878, "step": 14140 }, { "epoch": 1.6737638987461558, "grad_norm": 0.4621312916278839, "learning_rate": 5.05561407708882e-06, "loss": 0.0851, "step": 14150 }, { "epoch": 1.6749467707594037, "grad_norm": 0.48763999342918396, "learning_rate": 5.0548958582714864e-06, "loss": 0.0763, "step": 14160 }, { "epoch": 1.676129642772652, "grad_norm": 0.49499693512916565, "learning_rate": 5.054177639454154e-06, "loss": 0.082, "step": 14170 }, { "epoch": 1.6773125147859003, "grad_norm": 0.4495164752006531, "learning_rate": 5.053459420636821e-06, "loss": 0.0816, "step": 14180 }, { "epoch": 1.6784953867991483, "grad_norm": 0.4357379674911499, "learning_rate": 5.052741201819488e-06, "loss": 0.086, "step": 14190 }, { "epoch": 1.6796782588123964, "grad_norm": 0.45698365569114685, "learning_rate": 5.052022983002155e-06, "loss": 0.0825, "step": 14200 }, { "epoch": 1.6808611308256447, "grad_norm": 0.4621385335922241, "learning_rate": 5.051304764184822e-06, "loss": 0.0871, "step": 14210 }, { "epoch": 1.682044002838893, "grad_norm": 0.421455442905426, "learning_rate": 5.050586545367489e-06, "loss": 0.079, "step": 14220 }, { "epoch": 1.6832268748521408, "grad_norm": 0.5462622046470642, "learning_rate": 5.049868326550156e-06, "loss": 0.0863, "step": 14230 }, { "epoch": 1.6844097468653891, "grad_norm": 0.46460989117622375, "learning_rate": 5.049150107732823e-06, "loss": 0.0785, "step": 14240 }, { "epoch": 1.6855926188786374, "grad_norm": 0.40375712513923645, "learning_rate": 5.04843188891549e-06, "loss": 0.0863, "step": 14250 }, { "epoch": 1.6867754908918855, "grad_norm": 0.37221693992614746, "learning_rate": 5.0477136700981565e-06, "loss": 0.0768, "step": 14260 }, { "epoch": 1.6879583629051336, "grad_norm": 0.604553759098053, "learning_rate": 5.0469954512808234e-06, "loss": 0.0883, "step": 14270 }, { "epoch": 1.6891412349183819, "grad_norm": 0.5659784078598022, "learning_rate": 5.04627723246349e-06, "loss": 0.0825, "step": 14280 }, { "epoch": 1.6903241069316302, "grad_norm": 0.46185752749443054, "learning_rate": 5.045559013646158e-06, "loss": 0.0819, "step": 14290 }, { "epoch": 1.691506978944878, "grad_norm": 0.4523204565048218, "learning_rate": 5.044840794828824e-06, "loss": 0.0829, "step": 14300 }, { "epoch": 1.6926898509581263, "grad_norm": 0.45360180735588074, "learning_rate": 5.044122576011492e-06, "loss": 0.0834, "step": 14310 }, { "epoch": 1.6938727229713746, "grad_norm": 0.5072771906852722, "learning_rate": 5.043404357194158e-06, "loss": 0.0826, "step": 14320 }, { "epoch": 1.6950555949846227, "grad_norm": 0.397657573223114, "learning_rate": 5.042686138376826e-06, "loss": 0.0782, "step": 14330 }, { "epoch": 1.6962384669978707, "grad_norm": 0.5385441184043884, "learning_rate": 5.041967919559493e-06, "loss": 0.0844, "step": 14340 }, { "epoch": 1.697421339011119, "grad_norm": 0.4644298255443573, "learning_rate": 5.04124970074216e-06, "loss": 0.0824, "step": 14350 }, { "epoch": 1.6986042110243673, "grad_norm": 0.5390400290489197, "learning_rate": 5.040531481924827e-06, "loss": 0.0913, "step": 14360 }, { "epoch": 1.6997870830376152, "grad_norm": 0.39994779229164124, "learning_rate": 5.0398132631074935e-06, "loss": 0.0823, "step": 14370 }, { "epoch": 1.7009699550508635, "grad_norm": 0.43383222818374634, "learning_rate": 5.0390950442901604e-06, "loss": 0.0888, "step": 14380 }, { "epoch": 1.7021528270641118, "grad_norm": 0.6087126135826111, "learning_rate": 5.038376825472827e-06, "loss": 0.0916, "step": 14390 }, { "epoch": 1.7033356990773598, "grad_norm": 0.48791828751564026, "learning_rate": 5.037658606655495e-06, "loss": 0.0905, "step": 14400 }, { "epoch": 1.704518571090608, "grad_norm": 0.5998507142066956, "learning_rate": 5.036940387838161e-06, "loss": 0.0927, "step": 14410 }, { "epoch": 1.7057014431038562, "grad_norm": 0.47150087356567383, "learning_rate": 5.036222169020829e-06, "loss": 0.0835, "step": 14420 }, { "epoch": 1.7068843151171045, "grad_norm": 0.4353649318218231, "learning_rate": 5.035503950203495e-06, "loss": 0.0848, "step": 14430 }, { "epoch": 1.7080671871303525, "grad_norm": 0.4664275050163269, "learning_rate": 5.034785731386163e-06, "loss": 0.0849, "step": 14440 }, { "epoch": 1.7092500591436006, "grad_norm": 0.46540412306785583, "learning_rate": 5.03406751256883e-06, "loss": 0.0934, "step": 14450 }, { "epoch": 1.710432931156849, "grad_norm": 0.48796892166137695, "learning_rate": 5.033349293751497e-06, "loss": 0.0851, "step": 14460 }, { "epoch": 1.711615803170097, "grad_norm": 0.36350828409194946, "learning_rate": 5.032631074934164e-06, "loss": 0.0804, "step": 14470 }, { "epoch": 1.712798675183345, "grad_norm": 0.4214877784252167, "learning_rate": 5.0319128561168305e-06, "loss": 0.0849, "step": 14480 }, { "epoch": 1.7139815471965933, "grad_norm": 0.4448481798171997, "learning_rate": 5.0311946372994974e-06, "loss": 0.0882, "step": 14490 }, { "epoch": 1.7151644192098416, "grad_norm": 0.4421767294406891, "learning_rate": 5.030476418482164e-06, "loss": 0.0817, "step": 14500 }, { "epoch": 1.7163472912230897, "grad_norm": 0.3899979293346405, "learning_rate": 5.029758199664831e-06, "loss": 0.0696, "step": 14510 }, { "epoch": 1.7175301632363378, "grad_norm": 0.49959424138069153, "learning_rate": 5.029039980847498e-06, "loss": 0.0894, "step": 14520 }, { "epoch": 1.718713035249586, "grad_norm": 0.44834429025650024, "learning_rate": 5.028321762030165e-06, "loss": 0.0828, "step": 14530 }, { "epoch": 1.7198959072628341, "grad_norm": 0.353654682636261, "learning_rate": 5.027603543212832e-06, "loss": 0.0871, "step": 14540 }, { "epoch": 1.7210787792760822, "grad_norm": 0.4393976032733917, "learning_rate": 5.026885324395499e-06, "loss": 0.0935, "step": 14550 }, { "epoch": 1.7222616512893305, "grad_norm": 0.5019580125808716, "learning_rate": 5.026167105578167e-06, "loss": 0.0936, "step": 14560 }, { "epoch": 1.7234445233025788, "grad_norm": 0.5106562376022339, "learning_rate": 5.025448886760833e-06, "loss": 0.0948, "step": 14570 }, { "epoch": 1.7246273953158269, "grad_norm": 0.5101697444915771, "learning_rate": 5.024730667943501e-06, "loss": 0.0852, "step": 14580 }, { "epoch": 1.725810267329075, "grad_norm": 0.4592476189136505, "learning_rate": 5.024012449126167e-06, "loss": 0.0812, "step": 14590 }, { "epoch": 1.7269931393423232, "grad_norm": 0.5134539604187012, "learning_rate": 5.0232942303088344e-06, "loss": 0.0804, "step": 14600 }, { "epoch": 1.7281760113555713, "grad_norm": 0.5580853223800659, "learning_rate": 5.0225760114915005e-06, "loss": 0.0809, "step": 14610 }, { "epoch": 1.7293588833688194, "grad_norm": 0.5349219441413879, "learning_rate": 5.021857792674168e-06, "loss": 0.083, "step": 14620 }, { "epoch": 1.7305417553820677, "grad_norm": 0.5862219333648682, "learning_rate": 5.021139573856835e-06, "loss": 0.0878, "step": 14630 }, { "epoch": 1.731724627395316, "grad_norm": 0.7241619825363159, "learning_rate": 5.020421355039502e-06, "loss": 0.0871, "step": 14640 }, { "epoch": 1.732907499408564, "grad_norm": 0.500517725944519, "learning_rate": 5.01970313622217e-06, "loss": 0.0847, "step": 14650 }, { "epoch": 1.734090371421812, "grad_norm": 0.4849531352519989, "learning_rate": 5.018984917404836e-06, "loss": 0.0815, "step": 14660 }, { "epoch": 1.7352732434350604, "grad_norm": 0.35597267746925354, "learning_rate": 5.018266698587504e-06, "loss": 0.0799, "step": 14670 }, { "epoch": 1.7364561154483085, "grad_norm": 0.4448806941509247, "learning_rate": 5.01754847977017e-06, "loss": 0.0843, "step": 14680 }, { "epoch": 1.7376389874615565, "grad_norm": 0.4899101257324219, "learning_rate": 5.016830260952838e-06, "loss": 0.082, "step": 14690 }, { "epoch": 1.7388218594748048, "grad_norm": 0.3826588988304138, "learning_rate": 5.016112042135504e-06, "loss": 0.0728, "step": 14700 }, { "epoch": 1.7400047314880531, "grad_norm": 0.4365646243095398, "learning_rate": 5.0153938233181714e-06, "loss": 0.0861, "step": 14710 }, { "epoch": 1.7411876035013012, "grad_norm": 0.48221346735954285, "learning_rate": 5.014675604500838e-06, "loss": 0.0778, "step": 14720 }, { "epoch": 1.7423704755145493, "grad_norm": 0.4003322720527649, "learning_rate": 5.013957385683505e-06, "loss": 0.0827, "step": 14730 }, { "epoch": 1.7435533475277976, "grad_norm": 0.3685636520385742, "learning_rate": 5.013239166866172e-06, "loss": 0.0756, "step": 14740 }, { "epoch": 1.7447362195410456, "grad_norm": 0.4674631357192993, "learning_rate": 5.012520948048839e-06, "loss": 0.0885, "step": 14750 }, { "epoch": 1.7459190915542937, "grad_norm": 0.6063559651374817, "learning_rate": 5.011802729231506e-06, "loss": 0.0848, "step": 14760 }, { "epoch": 1.747101963567542, "grad_norm": 0.4469563364982605, "learning_rate": 5.011084510414173e-06, "loss": 0.0786, "step": 14770 }, { "epoch": 1.7482848355807903, "grad_norm": 0.42557021975517273, "learning_rate": 5.01036629159684e-06, "loss": 0.0844, "step": 14780 }, { "epoch": 1.7494677075940384, "grad_norm": 0.5144723057746887, "learning_rate": 5.009648072779507e-06, "loss": 0.0851, "step": 14790 }, { "epoch": 1.7504140052046369, "eval_accuracy": 0.6871111554712712, "eval_animal_abuse/accuracy": 0.9949762118641249, "eval_animal_abuse/f1": 0.7725903614457831, "eval_animal_abuse/fpr": 0.0021371117019486382, "eval_animal_abuse/precision": 0.8015625, "eval_animal_abuse/recall": 0.7456395348837209, "eval_animal_abuse/threshold": 0.5, "eval_child_abuse/accuracy": 0.9964234620886981, "eval_child_abuse/f1": 0.6717557251908397, "eval_child_abuse/fpr": 0.0017062277312189463, "eval_child_abuse/precision": 0.6832298136645962, "eval_child_abuse/recall": 0.6606606606606606, "eval_child_abuse/threshold": 0.5, "eval_controversial_topics,politics/accuracy": 0.9702232425059054, "eval_controversial_topics,politics/f1": 0.4772196261682243, "eval_controversial_topics,politics/fpr": 0.013128088962108708, "eval_controversial_topics,politics/precision": 0.5164348925410872, "eval_controversial_topics,politics/recall": 0.44353963083604775, "eval_controversial_topics,politics/threshold": 0.5, "eval_discrimination,stereotype,injustice/accuracy": 0.9569983697641149, "eval_discrimination,stereotype,injustice/f1": 0.7104290355102498, "eval_discrimination,stereotype,injustice/fpr": 0.01763834170672639, "eval_discrimination,stereotype,injustice/precision": 0.7646491439594888, "eval_discrimination,stereotype,injustice/recall": 0.6633891213389121, "eval_discrimination,stereotype,injustice/threshold": 0.5, "eval_drug_abuse,weapons,banned_substance/accuracy": 0.9732840935555778, "eval_drug_abuse,weapons,banned_substance/f1": 0.7620035566093657, "eval_drug_abuse,weapons,banned_substance/fpr": 0.01394373149062189, "eval_drug_abuse,weapons,banned_substance/precision": 0.7647233789411065, "eval_drug_abuse,weapons,banned_substance/recall": 0.7593030124040165, "eval_drug_abuse,weapons,banned_substance/threshold": 0.5, "eval_financial_crime,property_crime,theft/accuracy": 0.9593771833516319, "eval_financial_crime,property_crime,theft/f1": 0.800229057591623, "eval_financial_crime,property_crime,theft/fpr": 0.027311427676317144, "eval_financial_crime,property_crime,theft/precision": 0.7674564569276636, "eval_financial_crime,property_crime,theft/recall": 0.835925482823449, "eval_financial_crime,property_crime,theft/threshold": 0.5, "eval_flagged/accuracy": 0.8505339854276874, "eval_flagged/aucpr": 0.9070477555675066, "eval_flagged/f1": 0.8623304987359228, "eval_flagged/fpr": 0.13779394666766628, "eval_flagged/precision": 0.8845162507072358, "eval_flagged/recall": 0.8412304564885953, "eval_hate_speech,offensive_language/accuracy": 0.9513923545263998, "eval_hate_speech,offensive_language/f1": 0.6851971557853911, "eval_hate_speech,offensive_language/fpr": 0.013118947560752763, "eval_hate_speech,offensive_language/precision": 0.8158029758850692, "eval_hate_speech,offensive_language/recall": 0.5906389301634473, "eval_hate_speech,offensive_language/threshold": 0.5, "eval_loss": 0.0836065262556076, "eval_macro_f1": 0.6326025183657532, "eval_macro_precision": 0.6834877664102811, "eval_macro_recall": 0.6059294750927456, "eval_micro_f1": 0.749558558391108, "eval_micro_precision": 0.7851065900905285, "eval_micro_recall": 0.7170901719508509, "eval_misinformation_regarding_ethics,laws_and_safety/accuracy": 0.9878397711015737, "eval_misinformation_regarding_ethics,laws_and_safety/f1": 0.0, "eval_misinformation_regarding_ethics,laws_and_safety/fpr": 0.0, "eval_misinformation_regarding_ethics,laws_and_safety/precision": 0.0, "eval_misinformation_regarding_ethics,laws_and_safety/recall": 0.0, "eval_misinformation_regarding_ethics,laws_and_safety/threshold": 0.5, "eval_non_violent_unethical_behavior/accuracy": 0.8873473733240177, "eval_non_violent_unethical_behavior/f1": 0.6814375764418101, "eval_non_violent_unethical_behavior/fpr": 0.042953828267729526, "eval_non_violent_unethical_behavior/precision": 0.7778135738831615, "eval_non_violent_unethical_behavior/recall": 0.6063117361459903, "eval_non_violent_unethical_behavior/threshold": 0.5, "eval_privacy_violation/accuracy": 0.980187643477393, "eval_privacy_violation/f1": 0.8062469497315764, "eval_privacy_violation/fpr": 0.01230139287464126, "eval_privacy_violation/precision": 0.7790003143665514, "eval_privacy_violation/recall": 0.8354686446392448, "eval_privacy_violation/threshold": 0.5, "eval_runtime": 598.2874, "eval_samples_per_second": 100.477, "eval_self_harm/accuracy": 0.9968227035299597, "eval_self_harm/f1": 0.7463479415670651, "eval_self_harm/fpr": 0.0010384563848318353, "eval_self_harm/precision": 0.8192419825072886, "eval_self_harm/recall": 0.6853658536585366, "eval_self_harm/threshold": 0.5, "eval_sexually_explicit,adult_content/accuracy": 0.984180057890009, "eval_sexually_explicit,adult_content/f1": 0.660720656439529, "eval_sexually_explicit,adult_content/fpr": 0.007329503809637433, "eval_sexually_explicit,adult_content/precision": 0.6828908554572272, "eval_sexually_explicit,adult_content/recall": 0.6399447131997236, "eval_sexually_explicit,adult_content/threshold": 0.5, "eval_steps_per_second": 1.571, "eval_terrorism,organized_crime/accuracy": 0.9922480620155039, "eval_terrorism,organized_crime/f1": 0.22847682119205298, "eval_terrorism,organized_crime/fpr": 0.0009055388794794813, "eval_terrorism,organized_crime/precision": 0.5609756097560976, "eval_terrorism,organized_crime/recall": 0.14345114345114346, "eval_terrorism,organized_crime/threshold": 0.5, "eval_violence,aiding_and_abetting,incitement/accuracy": 0.9204178727085205, "eval_violence,aiding_and_abetting,incitement/f1": 0.8537807934470322, "eval_violence,aiding_and_abetting,incitement/fpr": 0.0625311635918588, "eval_violence,aiding_and_abetting,incitement/precision": 0.8350472318545976, "eval_violence,aiding_and_abetting,incitement/recall": 0.8733741870935467, "eval_violence,aiding_and_abetting,incitement/threshold": 0.5, "step": 14798 }, { "epoch": 1.7506505796072864, "grad_norm": 0.3898236155509949, "learning_rate": 5.008929853962174e-06, "loss": 0.0772, "step": 14800 }, { "epoch": 1.7518334516205347, "grad_norm": 0.4501531422138214, "learning_rate": 5.008211635144841e-06, "loss": 0.0867, "step": 14810 }, { "epoch": 1.7530163236337828, "grad_norm": 0.43499940633773804, "learning_rate": 5.007493416327508e-06, "loss": 0.0874, "step": 14820 }, { "epoch": 1.7541991956470309, "grad_norm": 0.3990466594696045, "learning_rate": 5.006775197510175e-06, "loss": 0.0843, "step": 14830 }, { "epoch": 1.7553820676602792, "grad_norm": 0.46902087330818176, "learning_rate": 5.006056978692841e-06, "loss": 0.0829, "step": 14840 }, { "epoch": 1.7565649396735274, "grad_norm": 0.47388482093811035, "learning_rate": 5.005338759875509e-06, "loss": 0.0892, "step": 14850 }, { "epoch": 1.7577478116867755, "grad_norm": 0.3494555950164795, "learning_rate": 5.004620541058175e-06, "loss": 0.0782, "step": 14860 }, { "epoch": 1.7589306837000236, "grad_norm": 0.37217211723327637, "learning_rate": 5.003902322240843e-06, "loss": 0.0825, "step": 14870 }, { "epoch": 1.7601135557132719, "grad_norm": 0.5203355550765991, "learning_rate": 5.003184103423509e-06, "loss": 0.0862, "step": 14880 }, { "epoch": 1.76129642772652, "grad_norm": 0.4392995834350586, "learning_rate": 5.002465884606177e-06, "loss": 0.0787, "step": 14890 }, { "epoch": 1.762479299739768, "grad_norm": 0.48038697242736816, "learning_rate": 5.001747665788844e-06, "loss": 0.0813, "step": 14900 }, { "epoch": 1.7636621717530163, "grad_norm": 0.6388407349586487, "learning_rate": 5.001029446971511e-06, "loss": 0.0898, "step": 14910 }, { "epoch": 1.7648450437662646, "grad_norm": 0.5217719674110413, "learning_rate": 5.000311228154178e-06, "loss": 0.0872, "step": 14920 }, { "epoch": 1.7660279157795127, "grad_norm": 0.4611804485321045, "learning_rate": 4.9995930093368446e-06, "loss": 0.0823, "step": 14930 }, { "epoch": 1.7672107877927608, "grad_norm": 0.5180298686027527, "learning_rate": 4.998874790519512e-06, "loss": 0.0795, "step": 14940 }, { "epoch": 1.768393659806009, "grad_norm": 0.5414012670516968, "learning_rate": 4.998156571702178e-06, "loss": 0.0873, "step": 14950 }, { "epoch": 1.7695765318192571, "grad_norm": 0.46057578921318054, "learning_rate": 4.997438352884846e-06, "loss": 0.0881, "step": 14960 }, { "epoch": 1.7707594038325052, "grad_norm": 0.4909656345844269, "learning_rate": 4.996720134067512e-06, "loss": 0.0914, "step": 14970 }, { "epoch": 1.7719422758457535, "grad_norm": 0.400807648897171, "learning_rate": 4.99600191525018e-06, "loss": 0.0837, "step": 14980 }, { "epoch": 1.7731251478590018, "grad_norm": 0.6635568141937256, "learning_rate": 4.995283696432847e-06, "loss": 0.0841, "step": 14990 }, { "epoch": 1.7743080198722498, "grad_norm": 0.41544538736343384, "learning_rate": 4.994565477615514e-06, "loss": 0.0811, "step": 15000 }, { "epoch": 1.775490891885498, "grad_norm": 0.5433276891708374, "learning_rate": 4.993847258798181e-06, "loss": 0.0818, "step": 15010 }, { "epoch": 1.7766737638987462, "grad_norm": 0.5289433598518372, "learning_rate": 4.993129039980848e-06, "loss": 0.0861, "step": 15020 }, { "epoch": 1.7778566359119943, "grad_norm": 0.40395763516426086, "learning_rate": 4.992410821163515e-06, "loss": 0.0921, "step": 15030 }, { "epoch": 1.7790395079252423, "grad_norm": 0.48226863145828247, "learning_rate": 4.9916926023461816e-06, "loss": 0.0798, "step": 15040 }, { "epoch": 1.7802223799384906, "grad_norm": 0.40582960844039917, "learning_rate": 4.9909743835288485e-06, "loss": 0.0854, "step": 15050 }, { "epoch": 1.781405251951739, "grad_norm": 0.4804951548576355, "learning_rate": 4.990256164711515e-06, "loss": 0.087, "step": 15060 }, { "epoch": 1.782588123964987, "grad_norm": 0.513481855392456, "learning_rate": 4.989537945894182e-06, "loss": 0.0869, "step": 15070 }, { "epoch": 1.783770995978235, "grad_norm": 0.48782968521118164, "learning_rate": 4.988819727076849e-06, "loss": 0.0914, "step": 15080 }, { "epoch": 1.7849538679914834, "grad_norm": 0.5819026827812195, "learning_rate": 4.988101508259516e-06, "loss": 0.0864, "step": 15090 }, { "epoch": 1.7861367400047314, "grad_norm": 0.41509053111076355, "learning_rate": 4.987383289442184e-06, "loss": 0.083, "step": 15100 }, { "epoch": 1.7873196120179795, "grad_norm": 0.536231279373169, "learning_rate": 4.98666507062485e-06, "loss": 0.079, "step": 15110 }, { "epoch": 1.7885024840312278, "grad_norm": 0.42679938673973083, "learning_rate": 4.985946851807518e-06, "loss": 0.0777, "step": 15120 }, { "epoch": 1.789685356044476, "grad_norm": 0.5637160539627075, "learning_rate": 4.985228632990184e-06, "loss": 0.0822, "step": 15130 }, { "epoch": 1.7908682280577242, "grad_norm": 0.44209080934524536, "learning_rate": 4.984510414172852e-06, "loss": 0.0853, "step": 15140 }, { "epoch": 1.7920511000709722, "grad_norm": 0.43635833263397217, "learning_rate": 4.983792195355518e-06, "loss": 0.0868, "step": 15150 }, { "epoch": 1.7932339720842205, "grad_norm": 0.4218122661113739, "learning_rate": 4.9830739765381855e-06, "loss": 0.0854, "step": 15160 }, { "epoch": 1.7944168440974686, "grad_norm": 0.4036318063735962, "learning_rate": 4.982355757720852e-06, "loss": 0.0782, "step": 15170 }, { "epoch": 1.7955997161107167, "grad_norm": 0.4380760192871094, "learning_rate": 4.981637538903519e-06, "loss": 0.0844, "step": 15180 }, { "epoch": 1.796782588123965, "grad_norm": 0.5436810255050659, "learning_rate": 4.980919320086186e-06, "loss": 0.0836, "step": 15190 }, { "epoch": 1.7979654601372133, "grad_norm": 0.40635013580322266, "learning_rate": 4.980201101268853e-06, "loss": 0.0777, "step": 15200 }, { "epoch": 1.7991483321504613, "grad_norm": 0.5426424145698547, "learning_rate": 4.979482882451521e-06, "loss": 0.0901, "step": 15210 }, { "epoch": 1.8003312041637094, "grad_norm": 0.42183759808540344, "learning_rate": 4.978764663634187e-06, "loss": 0.084, "step": 15220 }, { "epoch": 1.8015140761769577, "grad_norm": 0.42566707730293274, "learning_rate": 4.978046444816855e-06, "loss": 0.0796, "step": 15230 }, { "epoch": 1.802696948190206, "grad_norm": 0.5441873669624329, "learning_rate": 4.977328225999521e-06, "loss": 0.0888, "step": 15240 }, { "epoch": 1.8038798202034538, "grad_norm": 0.4647371470928192, "learning_rate": 4.976610007182189e-06, "loss": 0.0871, "step": 15250 }, { "epoch": 1.8050626922167021, "grad_norm": 0.4735593795776367, "learning_rate": 4.975891788364855e-06, "loss": 0.0824, "step": 15260 }, { "epoch": 1.8062455642299504, "grad_norm": 0.3692554533481598, "learning_rate": 4.9751735695475225e-06, "loss": 0.0788, "step": 15270 }, { "epoch": 1.8074284362431985, "grad_norm": 0.48800015449523926, "learning_rate": 4.974455350730189e-06, "loss": 0.0865, "step": 15280 }, { "epoch": 1.8086113082564466, "grad_norm": 0.5453053116798401, "learning_rate": 4.973737131912856e-06, "loss": 0.0864, "step": 15290 }, { "epoch": 1.8097941802696949, "grad_norm": 0.4880900979042053, "learning_rate": 4.973018913095523e-06, "loss": 0.0886, "step": 15300 }, { "epoch": 1.8109770522829431, "grad_norm": 0.4320691227912903, "learning_rate": 4.97230069427819e-06, "loss": 0.0809, "step": 15310 }, { "epoch": 1.812159924296191, "grad_norm": 0.5723305940628052, "learning_rate": 4.971582475460857e-06, "loss": 0.0867, "step": 15320 }, { "epoch": 1.8133427963094393, "grad_norm": 0.5696466565132141, "learning_rate": 4.970864256643524e-06, "loss": 0.0851, "step": 15330 }, { "epoch": 1.8145256683226876, "grad_norm": 0.3877998888492584, "learning_rate": 4.970146037826191e-06, "loss": 0.0743, "step": 15340 }, { "epoch": 1.8157085403359357, "grad_norm": 0.3125903606414795, "learning_rate": 4.969427819008858e-06, "loss": 0.0817, "step": 15350 }, { "epoch": 1.8168914123491837, "grad_norm": 0.5857609510421753, "learning_rate": 4.968709600191525e-06, "loss": 0.0912, "step": 15360 }, { "epoch": 1.818074284362432, "grad_norm": 0.5082122683525085, "learning_rate": 4.9679913813741926e-06, "loss": 0.0814, "step": 15370 }, { "epoch": 1.8192571563756803, "grad_norm": 0.45207247138023376, "learning_rate": 4.967273162556859e-06, "loss": 0.0916, "step": 15380 }, { "epoch": 1.8204400283889282, "grad_norm": 0.40892741084098816, "learning_rate": 4.966554943739526e-06, "loss": 0.0971, "step": 15390 }, { "epoch": 1.8216229004021764, "grad_norm": 0.4592600166797638, "learning_rate": 4.9658367249221925e-06, "loss": 0.0791, "step": 15400 }, { "epoch": 1.8228057724154247, "grad_norm": 0.4706800878047943, "learning_rate": 4.96511850610486e-06, "loss": 0.08, "step": 15410 }, { "epoch": 1.8239886444286728, "grad_norm": 0.4002559781074524, "learning_rate": 4.964400287287527e-06, "loss": 0.0817, "step": 15420 }, { "epoch": 1.8251715164419209, "grad_norm": 0.4132227301597595, "learning_rate": 4.963682068470194e-06, "loss": 0.0802, "step": 15430 }, { "epoch": 1.8263543884551692, "grad_norm": 0.4903717637062073, "learning_rate": 4.962963849652861e-06, "loss": 0.0731, "step": 15440 }, { "epoch": 1.8275372604684175, "grad_norm": 0.3824477791786194, "learning_rate": 4.962245630835528e-06, "loss": 0.0818, "step": 15450 }, { "epoch": 1.8287201324816655, "grad_norm": 0.42937853932380676, "learning_rate": 4.961527412018195e-06, "loss": 0.0726, "step": 15460 }, { "epoch": 1.8299030044949136, "grad_norm": 0.41023358702659607, "learning_rate": 4.960809193200862e-06, "loss": 0.0794, "step": 15470 }, { "epoch": 1.831085876508162, "grad_norm": 0.4835211932659149, "learning_rate": 4.9600909743835296e-06, "loss": 0.0851, "step": 15480 }, { "epoch": 1.83226874852141, "grad_norm": 0.46570977568626404, "learning_rate": 4.959372755566196e-06, "loss": 0.0922, "step": 15490 }, { "epoch": 1.833451620534658, "grad_norm": 0.435480535030365, "learning_rate": 4.958654536748863e-06, "loss": 0.0888, "step": 15500 }, { "epoch": 1.8346344925479063, "grad_norm": 0.5286694765090942, "learning_rate": 4.9579363179315295e-06, "loss": 0.0924, "step": 15510 }, { "epoch": 1.8358173645611546, "grad_norm": 0.4733712077140808, "learning_rate": 4.957218099114197e-06, "loss": 0.0849, "step": 15520 }, { "epoch": 1.8370002365744027, "grad_norm": 0.4937007427215576, "learning_rate": 4.956499880296863e-06, "loss": 0.0782, "step": 15530 }, { "epoch": 1.8381831085876508, "grad_norm": 0.5484076738357544, "learning_rate": 4.955781661479531e-06, "loss": 0.0878, "step": 15540 }, { "epoch": 1.839365980600899, "grad_norm": 0.48638248443603516, "learning_rate": 4.955063442662198e-06, "loss": 0.0893, "step": 15550 }, { "epoch": 1.8405488526141471, "grad_norm": 0.38909563422203064, "learning_rate": 4.954345223844865e-06, "loss": 0.0859, "step": 15560 }, { "epoch": 1.8417317246273952, "grad_norm": 0.4605638384819031, "learning_rate": 4.953627005027532e-06, "loss": 0.0857, "step": 15570 }, { "epoch": 1.8429145966406435, "grad_norm": 0.4918062090873718, "learning_rate": 4.952908786210199e-06, "loss": 0.0853, "step": 15580 }, { "epoch": 1.8440974686538918, "grad_norm": 0.47632917761802673, "learning_rate": 4.952190567392866e-06, "loss": 0.083, "step": 15590 }, { "epoch": 1.8452803406671399, "grad_norm": 0.38878393173217773, "learning_rate": 4.951472348575533e-06, "loss": 0.0926, "step": 15600 }, { "epoch": 1.846463212680388, "grad_norm": 0.37463560700416565, "learning_rate": 4.9507541297581996e-06, "loss": 0.0872, "step": 15610 }, { "epoch": 1.8476460846936362, "grad_norm": 0.45256805419921875, "learning_rate": 4.9500359109408665e-06, "loss": 0.0852, "step": 15620 }, { "epoch": 1.8488289567068843, "grad_norm": 0.5473739504814148, "learning_rate": 4.949317692123533e-06, "loss": 0.0852, "step": 15630 }, { "epoch": 1.8500118287201324, "grad_norm": 0.576340913772583, "learning_rate": 4.948599473306201e-06, "loss": 0.0839, "step": 15640 }, { "epoch": 1.8511947007333807, "grad_norm": 0.5214903950691223, "learning_rate": 4.947881254488867e-06, "loss": 0.0816, "step": 15650 }, { "epoch": 1.852377572746629, "grad_norm": 0.46985742449760437, "learning_rate": 4.947163035671535e-06, "loss": 0.0867, "step": 15660 }, { "epoch": 1.853560444759877, "grad_norm": 0.3884131610393524, "learning_rate": 4.946444816854202e-06, "loss": 0.082, "step": 15670 }, { "epoch": 1.854743316773125, "grad_norm": 0.6048451066017151, "learning_rate": 4.945726598036869e-06, "loss": 0.0816, "step": 15680 }, { "epoch": 1.8559261887863734, "grad_norm": 0.43747344613075256, "learning_rate": 4.945008379219536e-06, "loss": 0.081, "step": 15690 }, { "epoch": 1.8571090607996215, "grad_norm": 0.4798510670661926, "learning_rate": 4.944290160402203e-06, "loss": 0.0852, "step": 15700 }, { "epoch": 1.8582919328128695, "grad_norm": 0.5993862152099609, "learning_rate": 4.94357194158487e-06, "loss": 0.0806, "step": 15710 }, { "epoch": 1.8594748048261178, "grad_norm": 0.4466570317745209, "learning_rate": 4.9428537227675366e-06, "loss": 0.084, "step": 15720 }, { "epoch": 1.8606576768393661, "grad_norm": 0.5510773062705994, "learning_rate": 4.9421355039502035e-06, "loss": 0.0856, "step": 15730 }, { "epoch": 1.8618405488526142, "grad_norm": 0.435863733291626, "learning_rate": 4.94141728513287e-06, "loss": 0.0914, "step": 15740 }, { "epoch": 1.8630234208658623, "grad_norm": 0.46571415662765503, "learning_rate": 4.940699066315538e-06, "loss": 0.0792, "step": 15750 }, { "epoch": 1.8642062928791105, "grad_norm": 0.6232130527496338, "learning_rate": 4.939980847498204e-06, "loss": 0.0866, "step": 15760 }, { "epoch": 1.8653891648923586, "grad_norm": 0.5265774130821228, "learning_rate": 4.939262628680872e-06, "loss": 0.0817, "step": 15770 }, { "epoch": 1.8665720369056067, "grad_norm": 0.41559019684791565, "learning_rate": 4.938544409863538e-06, "loss": 0.0857, "step": 15780 }, { "epoch": 1.867754908918855, "grad_norm": 0.48837214708328247, "learning_rate": 4.937826191046206e-06, "loss": 0.0852, "step": 15790 }, { "epoch": 1.8689377809321033, "grad_norm": 0.3839782476425171, "learning_rate": 4.937107972228872e-06, "loss": 0.0843, "step": 15800 }, { "epoch": 1.8701206529453513, "grad_norm": 0.39202678203582764, "learning_rate": 4.93638975341154e-06, "loss": 0.0826, "step": 15810 }, { "epoch": 1.8713035249585994, "grad_norm": 0.5618425607681274, "learning_rate": 4.935671534594207e-06, "loss": 0.0854, "step": 15820 }, { "epoch": 1.8724863969718477, "grad_norm": 0.4404359757900238, "learning_rate": 4.9349533157768736e-06, "loss": 0.087, "step": 15830 }, { "epoch": 1.8736692689850958, "grad_norm": 0.42046114802360535, "learning_rate": 4.9342350969595405e-06, "loss": 0.0876, "step": 15840 }, { "epoch": 1.8748521409983439, "grad_norm": 0.4758741855621338, "learning_rate": 4.933516878142207e-06, "loss": 0.0877, "step": 15850 }, { "epoch": 1.8760350130115921, "grad_norm": 0.3714681565761566, "learning_rate": 4.932798659324874e-06, "loss": 0.0811, "step": 15860 }, { "epoch": 1.8772178850248404, "grad_norm": 0.4946613311767578, "learning_rate": 4.932080440507541e-06, "loss": 0.0898, "step": 15870 }, { "epoch": 1.8784007570380885, "grad_norm": 0.4098410904407501, "learning_rate": 4.931362221690208e-06, "loss": 0.0869, "step": 15880 }, { "epoch": 1.8795836290513366, "grad_norm": 0.4137856960296631, "learning_rate": 4.930644002872875e-06, "loss": 0.0922, "step": 15890 }, { "epoch": 1.8807665010645849, "grad_norm": 0.49188536405563354, "learning_rate": 4.929925784055542e-06, "loss": 0.0883, "step": 15900 }, { "epoch": 1.881949373077833, "grad_norm": 0.5094638466835022, "learning_rate": 4.92920756523821e-06, "loss": 0.0911, "step": 15910 }, { "epoch": 1.883132245091081, "grad_norm": 0.514210045337677, "learning_rate": 4.928489346420877e-06, "loss": 0.0809, "step": 15920 }, { "epoch": 1.8843151171043293, "grad_norm": 0.39019954204559326, "learning_rate": 4.927771127603544e-06, "loss": 0.0792, "step": 15930 }, { "epoch": 1.8854979891175776, "grad_norm": 0.3985145092010498, "learning_rate": 4.9270529087862106e-06, "loss": 0.0854, "step": 15940 }, { "epoch": 1.8866808611308257, "grad_norm": 0.43371543288230896, "learning_rate": 4.9263346899688775e-06, "loss": 0.0925, "step": 15950 }, { "epoch": 1.8878637331440737, "grad_norm": 0.3985431492328644, "learning_rate": 4.925616471151544e-06, "loss": 0.0804, "step": 15960 }, { "epoch": 1.889046605157322, "grad_norm": 0.5010446906089783, "learning_rate": 4.924898252334211e-06, "loss": 0.0842, "step": 15970 }, { "epoch": 1.89022947717057, "grad_norm": 0.5090044140815735, "learning_rate": 4.924180033516878e-06, "loss": 0.0867, "step": 15980 }, { "epoch": 1.8914123491838182, "grad_norm": 0.3643990159034729, "learning_rate": 4.923461814699545e-06, "loss": 0.0843, "step": 15990 }, { "epoch": 1.8925952211970665, "grad_norm": 0.47101426124572754, "learning_rate": 4.922743595882212e-06, "loss": 0.087, "step": 16000 }, { "epoch": 1.8937780932103148, "grad_norm": 0.4659419655799866, "learning_rate": 4.922025377064879e-06, "loss": 0.0779, "step": 16010 }, { "epoch": 1.8949609652235628, "grad_norm": 0.5119287371635437, "learning_rate": 4.921307158247547e-06, "loss": 0.0809, "step": 16020 }, { "epoch": 1.896143837236811, "grad_norm": 0.3866863548755646, "learning_rate": 4.920588939430213e-06, "loss": 0.0904, "step": 16030 }, { "epoch": 1.8973267092500592, "grad_norm": 0.6278448700904846, "learning_rate": 4.919870720612881e-06, "loss": 0.0893, "step": 16040 }, { "epoch": 1.8985095812633073, "grad_norm": 0.5060496926307678, "learning_rate": 4.919152501795547e-06, "loss": 0.0848, "step": 16050 }, { "epoch": 1.8996924532765553, "grad_norm": 0.5165701508522034, "learning_rate": 4.9184342829782145e-06, "loss": 0.0897, "step": 16060 }, { "epoch": 1.9008753252898036, "grad_norm": 0.5012458562850952, "learning_rate": 4.9177160641608805e-06, "loss": 0.079, "step": 16070 }, { "epoch": 1.902058197303052, "grad_norm": 0.6436921954154968, "learning_rate": 4.916997845343548e-06, "loss": 0.0769, "step": 16080 }, { "epoch": 1.9032410693163, "grad_norm": 0.4909416735172272, "learning_rate": 4.916279626526215e-06, "loss": 0.0873, "step": 16090 }, { "epoch": 1.904423941329548, "grad_norm": 0.5381664633750916, "learning_rate": 4.915561407708882e-06, "loss": 0.0846, "step": 16100 }, { "epoch": 1.9056068133427964, "grad_norm": 0.5190691947937012, "learning_rate": 4.914843188891549e-06, "loss": 0.0838, "step": 16110 }, { "epoch": 1.9067896853560444, "grad_norm": 0.5317950248718262, "learning_rate": 4.914124970074216e-06, "loss": 0.082, "step": 16120 }, { "epoch": 1.9079725573692925, "grad_norm": 0.4426456689834595, "learning_rate": 4.913406751256883e-06, "loss": 0.0806, "step": 16130 }, { "epoch": 1.9091554293825408, "grad_norm": 0.43813571333885193, "learning_rate": 4.91268853243955e-06, "loss": 0.0783, "step": 16140 }, { "epoch": 1.910338301395789, "grad_norm": 0.46479544043540955, "learning_rate": 4.911970313622217e-06, "loss": 0.0861, "step": 16150 }, { "epoch": 1.9115211734090372, "grad_norm": 0.4219079315662384, "learning_rate": 4.911252094804884e-06, "loss": 0.0883, "step": 16160 }, { "epoch": 1.9127040454222852, "grad_norm": 0.4021356701850891, "learning_rate": 4.9105338759875515e-06, "loss": 0.0872, "step": 16170 }, { "epoch": 1.9138869174355335, "grad_norm": 0.6016069650650024, "learning_rate": 4.9098156571702175e-06, "loss": 0.0824, "step": 16180 }, { "epoch": 1.9150697894487816, "grad_norm": 0.5032521486282349, "learning_rate": 4.909097438352885e-06, "loss": 0.0843, "step": 16190 }, { "epoch": 1.9162526614620297, "grad_norm": 0.41720372438430786, "learning_rate": 4.908379219535552e-06, "loss": 0.0896, "step": 16200 }, { "epoch": 1.917435533475278, "grad_norm": 0.5209783911705017, "learning_rate": 4.907661000718219e-06, "loss": 0.0821, "step": 16210 }, { "epoch": 1.9186184054885262, "grad_norm": 0.46876260638237, "learning_rate": 4.906942781900886e-06, "loss": 0.0804, "step": 16220 }, { "epoch": 1.9198012775017743, "grad_norm": 0.4880792200565338, "learning_rate": 4.906224563083553e-06, "loss": 0.0833, "step": 16230 }, { "epoch": 1.9209841495150224, "grad_norm": 0.393856942653656, "learning_rate": 4.90550634426622e-06, "loss": 0.0842, "step": 16240 }, { "epoch": 1.9221670215282707, "grad_norm": 0.4902395009994507, "learning_rate": 4.904788125448887e-06, "loss": 0.084, "step": 16250 }, { "epoch": 1.923349893541519, "grad_norm": 0.5400048494338989, "learning_rate": 4.904069906631554e-06, "loss": 0.0796, "step": 16260 }, { "epoch": 1.9245327655547668, "grad_norm": 0.426654577255249, "learning_rate": 4.903351687814221e-06, "loss": 0.0845, "step": 16270 }, { "epoch": 1.9257156375680151, "grad_norm": 0.4529011845588684, "learning_rate": 4.902633468996888e-06, "loss": 0.0804, "step": 16280 }, { "epoch": 1.9268985095812634, "grad_norm": 0.48149970173835754, "learning_rate": 4.901915250179555e-06, "loss": 0.0853, "step": 16290 }, { "epoch": 1.9280813815945115, "grad_norm": 0.4331369400024414, "learning_rate": 4.9011970313622215e-06, "loss": 0.0845, "step": 16300 }, { "epoch": 1.9292642536077595, "grad_norm": 0.4567747712135315, "learning_rate": 4.900478812544889e-06, "loss": 0.0825, "step": 16310 }, { "epoch": 1.9304471256210078, "grad_norm": 0.5403380990028381, "learning_rate": 4.899760593727555e-06, "loss": 0.0835, "step": 16320 }, { "epoch": 1.9316299976342561, "grad_norm": 0.528096079826355, "learning_rate": 4.899042374910223e-06, "loss": 0.0739, "step": 16330 }, { "epoch": 1.932812869647504, "grad_norm": 0.48877203464508057, "learning_rate": 4.898324156092889e-06, "loss": 0.0856, "step": 16340 }, { "epoch": 1.9339957416607523, "grad_norm": 0.4258859157562256, "learning_rate": 4.897605937275557e-06, "loss": 0.0846, "step": 16350 }, { "epoch": 1.9351786136740006, "grad_norm": 0.45702052116394043, "learning_rate": 4.896887718458224e-06, "loss": 0.0783, "step": 16360 }, { "epoch": 1.9363614856872486, "grad_norm": 0.508746325969696, "learning_rate": 4.896169499640891e-06, "loss": 0.0815, "step": 16370 }, { "epoch": 1.9375443577004967, "grad_norm": 0.4060031473636627, "learning_rate": 4.895451280823558e-06, "loss": 0.0836, "step": 16380 }, { "epoch": 1.938727229713745, "grad_norm": 0.4715941548347473, "learning_rate": 4.894733062006225e-06, "loss": 0.0824, "step": 16390 }, { "epoch": 1.9399101017269933, "grad_norm": 0.3513747751712799, "learning_rate": 4.8940148431888915e-06, "loss": 0.0809, "step": 16400 }, { "epoch": 1.9410929737402411, "grad_norm": 0.7689327597618103, "learning_rate": 4.8932966243715585e-06, "loss": 0.0875, "step": 16410 }, { "epoch": 1.9422758457534894, "grad_norm": 0.549260675907135, "learning_rate": 4.892578405554226e-06, "loss": 0.0808, "step": 16420 }, { "epoch": 1.9434587177667377, "grad_norm": 0.4770592451095581, "learning_rate": 4.891860186736892e-06, "loss": 0.0791, "step": 16430 }, { "epoch": 1.9446415897799858, "grad_norm": 0.5003240704536438, "learning_rate": 4.89114196791956e-06, "loss": 0.0764, "step": 16440 }, { "epoch": 1.9458244617932339, "grad_norm": 0.43694058060646057, "learning_rate": 4.890423749102226e-06, "loss": 0.0796, "step": 16450 }, { "epoch": 1.9470073338064822, "grad_norm": 0.48944342136383057, "learning_rate": 4.889705530284894e-06, "loss": 0.0869, "step": 16460 }, { "epoch": 1.9481902058197305, "grad_norm": 0.7246806621551514, "learning_rate": 4.888987311467561e-06, "loss": 0.0844, "step": 16470 }, { "epoch": 1.9493730778329783, "grad_norm": 0.45746952295303345, "learning_rate": 4.888269092650228e-06, "loss": 0.0808, "step": 16480 }, { "epoch": 1.9505559498462266, "grad_norm": 0.5016586780548096, "learning_rate": 4.887550873832895e-06, "loss": 0.0878, "step": 16490 }, { "epoch": 1.951738821859475, "grad_norm": 0.462515264749527, "learning_rate": 4.886832655015562e-06, "loss": 0.0833, "step": 16500 }, { "epoch": 1.952921693872723, "grad_norm": 0.46272340416908264, "learning_rate": 4.8861144361982285e-06, "loss": 0.0953, "step": 16510 }, { "epoch": 1.954104565885971, "grad_norm": 0.4062754809856415, "learning_rate": 4.8853962173808955e-06, "loss": 0.0855, "step": 16520 }, { "epoch": 1.9552874378992193, "grad_norm": 0.4083622992038727, "learning_rate": 4.884677998563562e-06, "loss": 0.0852, "step": 16530 }, { "epoch": 1.9564703099124676, "grad_norm": 0.5315936803817749, "learning_rate": 4.883959779746229e-06, "loss": 0.0819, "step": 16540 }, { "epoch": 1.9576531819257157, "grad_norm": 0.39595380425453186, "learning_rate": 4.883241560928896e-06, "loss": 0.0909, "step": 16550 }, { "epoch": 1.9588360539389638, "grad_norm": 0.43326810002326965, "learning_rate": 4.882523342111564e-06, "loss": 0.0845, "step": 16560 }, { "epoch": 1.960018925952212, "grad_norm": 0.4456225335597992, "learning_rate": 4.88180512329423e-06, "loss": 0.0851, "step": 16570 }, { "epoch": 1.9612017979654601, "grad_norm": 0.5476830005645752, "learning_rate": 4.881086904476898e-06, "loss": 0.0853, "step": 16580 }, { "epoch": 1.9623846699787082, "grad_norm": 0.5306927561759949, "learning_rate": 4.880368685659564e-06, "loss": 0.0846, "step": 16590 }, { "epoch": 1.9635675419919565, "grad_norm": 0.48085543513298035, "learning_rate": 4.879650466842232e-06, "loss": 0.0788, "step": 16600 }, { "epoch": 1.9647504140052048, "grad_norm": 0.41933801770210266, "learning_rate": 4.878932248024898e-06, "loss": 0.0832, "step": 16610 }, { "epoch": 1.9659332860184529, "grad_norm": 0.4308774173259735, "learning_rate": 4.8782140292075655e-06, "loss": 0.0889, "step": 16620 }, { "epoch": 1.967116158031701, "grad_norm": 0.469061940908432, "learning_rate": 4.8774958103902325e-06, "loss": 0.0789, "step": 16630 }, { "epoch": 1.9682990300449492, "grad_norm": 0.42867952585220337, "learning_rate": 4.876777591572899e-06, "loss": 0.0737, "step": 16640 }, { "epoch": 1.9694819020581973, "grad_norm": 0.3530597388744354, "learning_rate": 4.876059372755566e-06, "loss": 0.0752, "step": 16650 }, { "epoch": 1.9706647740714454, "grad_norm": 0.5743340253829956, "learning_rate": 4.875341153938233e-06, "loss": 0.0858, "step": 16660 }, { "epoch": 1.9718476460846937, "grad_norm": 0.6513424515724182, "learning_rate": 4.8746229351209e-06, "loss": 0.0854, "step": 16670 }, { "epoch": 1.973030518097942, "grad_norm": 0.4524577856063843, "learning_rate": 4.873904716303567e-06, "loss": 0.0891, "step": 16680 }, { "epoch": 1.97421339011119, "grad_norm": 0.44377443194389343, "learning_rate": 4.873186497486235e-06, "loss": 0.0804, "step": 16690 }, { "epoch": 1.975396262124438, "grad_norm": 0.46467265486717224, "learning_rate": 4.872468278668901e-06, "loss": 0.0825, "step": 16700 }, { "epoch": 1.9765791341376864, "grad_norm": 0.4272754192352295, "learning_rate": 4.871750059851569e-06, "loss": 0.0796, "step": 16710 }, { "epoch": 1.9777620061509344, "grad_norm": 0.4420712888240814, "learning_rate": 4.871031841034235e-06, "loss": 0.0889, "step": 16720 }, { "epoch": 1.9789448781641825, "grad_norm": 0.48825451731681824, "learning_rate": 4.8703136222169025e-06, "loss": 0.0751, "step": 16730 }, { "epoch": 1.9801277501774308, "grad_norm": 0.3588007688522339, "learning_rate": 4.8695954033995695e-06, "loss": 0.0862, "step": 16740 }, { "epoch": 1.981310622190679, "grad_norm": 0.45821240544319153, "learning_rate": 4.868877184582236e-06, "loss": 0.0818, "step": 16750 }, { "epoch": 1.9824934942039272, "grad_norm": 0.4399438500404358, "learning_rate": 4.868158965764903e-06, "loss": 0.078, "step": 16760 }, { "epoch": 1.9836763662171752, "grad_norm": 0.5462121963500977, "learning_rate": 4.86744074694757e-06, "loss": 0.0785, "step": 16770 }, { "epoch": 1.9848592382304235, "grad_norm": 0.5880784392356873, "learning_rate": 4.866722528130237e-06, "loss": 0.0768, "step": 16780 }, { "epoch": 1.9860421102436716, "grad_norm": 0.4528180658817291, "learning_rate": 4.866004309312904e-06, "loss": 0.083, "step": 16790 }, { "epoch": 1.9872249822569197, "grad_norm": 0.5266770124435425, "learning_rate": 4.865286090495571e-06, "loss": 0.0811, "step": 16800 }, { "epoch": 1.988407854270168, "grad_norm": 0.5666595697402954, "learning_rate": 4.864567871678238e-06, "loss": 0.0919, "step": 16810 }, { "epoch": 1.9895907262834163, "grad_norm": 0.4762324392795563, "learning_rate": 4.863849652860905e-06, "loss": 0.0772, "step": 16820 }, { "epoch": 1.9907735982966643, "grad_norm": 0.5815285444259644, "learning_rate": 4.863131434043573e-06, "loss": 0.0778, "step": 16830 }, { "epoch": 1.9919564703099124, "grad_norm": 0.5183305740356445, "learning_rate": 4.862413215226239e-06, "loss": 0.0845, "step": 16840 }, { "epoch": 1.9931393423231607, "grad_norm": 0.5055912733078003, "learning_rate": 4.8616949964089065e-06, "loss": 0.0811, "step": 16850 }, { "epoch": 1.9943222143364088, "grad_norm": 0.5169409513473511, "learning_rate": 4.8609767775915725e-06, "loss": 0.0846, "step": 16860 }, { "epoch": 1.9955050863496568, "grad_norm": 0.5330750942230225, "learning_rate": 4.86025855877424e-06, "loss": 0.0904, "step": 16870 }, { "epoch": 1.9966879583629051, "grad_norm": 0.5304578542709351, "learning_rate": 4.859540339956906e-06, "loss": 0.0814, "step": 16880 }, { "epoch": 1.9978708303761534, "grad_norm": 0.4241310954093933, "learning_rate": 4.858822121139574e-06, "loss": 0.0817, "step": 16890 }, { "epoch": 1.9990537023894015, "grad_norm": 0.5028892159461975, "learning_rate": 4.858103902322241e-06, "loss": 0.0833, "step": 16900 }, { "epoch": 2.0002365744026496, "grad_norm": 0.43169069290161133, "learning_rate": 4.857385683504908e-06, "loss": 0.0864, "step": 16910 }, { "epoch": 2.000473148805299, "eval_accuracy": 0.6861795921083275, "eval_animal_abuse/accuracy": 0.9946601457231261, "eval_animal_abuse/f1": 0.7620459599703484, "eval_animal_abuse/fpr": 0.0024736647258775577, "eval_animal_abuse/precision": 0.7776096822995462, "eval_animal_abuse/recall": 0.747093023255814, "eval_animal_abuse/threshold": 0.5, "eval_child_abuse/accuracy": 0.9957580596865955, "eval_child_abuse/f1": 0.6443514644351465, "eval_child_abuse/fpr": 0.0025593415968284193, "eval_child_abuse/precision": 0.6015625, "eval_child_abuse/recall": 0.6936936936936937, "eval_child_abuse/threshold": 0.5, "eval_controversial_topics,politics/accuracy": 0.9731343780151046, "eval_controversial_topics,politics/f1": 0.45309854385370807, "eval_controversial_topics,politics/fpr": 0.007585118066996143, "eval_controversial_topics,politics/precision": 0.6021602160216022, "eval_controversial_topics,politics/recall": 0.36319218241042345, "eval_controversial_topics,politics/threshold": 0.5, "eval_discrimination,stereotype,injustice/accuracy": 0.9545197458162824, "eval_discrimination,stereotype,injustice/f1": 0.722830494728305, "eval_discrimination,stereotype,injustice/fpr": 0.02745147648823503, "eval_discrimination,stereotype,injustice/precision": 0.7012195121951219, "eval_discrimination,stereotype,injustice/recall": 0.74581589958159, "eval_discrimination,stereotype,injustice/threshold": 0.5, "eval_drug_abuse,weapons,banned_substance/accuracy": 0.973966131017733, "eval_drug_abuse,weapons,banned_substance/f1": 0.7681137946362424, "eval_drug_abuse,weapons,banned_substance/fpr": 0.013591171908052436, "eval_drug_abuse,weapons,banned_substance/precision": 0.7707404103479036, "eval_drug_abuse,weapons,banned_substance/recall": 0.7655050206733609, "eval_drug_abuse,weapons,banned_substance/threshold": 0.5, "eval_financial_crime,property_crime,theft/accuracy": 0.9597098845526832, "eval_financial_crime,property_crime,theft/f1": 0.8002309468822171, "eval_financial_crime,property_crime,theft/fpr": 0.0262057018594622, "eval_financial_crime,property_crime,theft/precision": 0.7733142037302726, "eval_financial_crime,property_crime,theft/recall": 0.8290890446077593, "eval_financial_crime,property_crime,theft/threshold": 0.5, "eval_flagged/accuracy": 0.8533619456366237, "eval_flagged/aucpr": 0.9084060974346816, "eval_flagged/f1": 0.865343781983716, "eval_flagged/fpr": 0.13831901886509343, "eval_flagged/precision": 0.8847932025490441, "eval_flagged/recall": 0.8467310394308093, "eval_hate_speech,offensive_language/accuracy": 0.9515587051269255, "eval_hate_speech,offensive_language/f1": 0.692891794979962, "eval_hate_speech,offensive_language/fpr": 0.014854741458066846, "eval_hate_speech,offensive_language/precision": 0.801610541727672, "eval_hate_speech,offensive_language/recall": 0.6101411589895989, "eval_hate_speech,offensive_language/threshold": 0.5, "eval_loss": 0.08373646438121796, "eval_macro_f1": 0.6271583549193481, "eval_macro_precision": 0.7497947779968038, "eval_macro_recall": 0.606732903826513, "eval_micro_f1": 0.752390390721063, "eval_micro_precision": 0.7773269372973588, "eval_micro_recall": 0.7290040364884329, "eval_misinformation_regarding_ethics,laws_and_safety/accuracy": 0.9878564061616263, "eval_misinformation_regarding_ethics,laws_and_safety/f1": 0.00273224043715847, "eval_misinformation_regarding_ethics,laws_and_safety/fpr": 0.0, "eval_misinformation_regarding_ethics,laws_and_safety/precision": 1.0, "eval_misinformation_regarding_ethics,laws_and_safety/recall": 0.0013679890560875513, "eval_misinformation_regarding_ethics,laws_and_safety/threshold": 0.5, "eval_non_violent_unethical_behavior/accuracy": 0.8844196027547659, "eval_non_violent_unethical_behavior/f1": 0.6909802526240882, "eval_non_violent_unethical_behavior/fpr": 0.05750705862813474, "eval_non_violent_unethical_behavior/precision": 0.737141772632378, "eval_non_violent_unethical_behavior/recall": 0.6502595010882304, "eval_non_violent_unethical_behavior/threshold": 0.5, "eval_privacy_violation/accuracy": 0.9813687327411252, "eval_privacy_violation/f1": 0.8075601374570447, "eval_privacy_violation/fpr": 0.008819206271435556, "eval_privacy_violation/precision": 0.8234057463209531, "eval_privacy_violation/recall": 0.7923128792987189, "eval_privacy_violation/threshold": 0.5, "eval_runtime": 598.3654, "eval_samples_per_second": 100.464, "eval_self_harm/accuracy": 0.9966064477492764, "eval_self_harm/f1": 0.7357512953367875, "eval_self_harm/fpr": 0.0013064451293045669, "eval_self_harm/precision": 0.7845303867403315, "eval_self_harm/recall": 0.6926829268292682, "eval_self_harm/threshold": 0.5, "eval_sexually_explicit,adult_content/accuracy": 0.9842465981302192, "eval_sexually_explicit,adult_content/f1": 0.6534943285766557, "eval_sexually_explicit,adult_content/fpr": 0.006698825574854676, "eval_sexually_explicit,adult_content/precision": 0.6944012441679627, "eval_sexually_explicit,adult_content/recall": 0.6171389080856945, "eval_sexually_explicit,adult_content/threshold": 0.5, "eval_steps_per_second": 1.571, "eval_terrorism,organized_crime/accuracy": 0.992281332135609, "eval_terrorism,organized_crime/f1": 0.1916376306620209, "eval_terrorism,organized_crime/fpr": 0.0006372310633374127, "eval_terrorism,organized_crime/precision": 0.5913978494623656, "eval_terrorism,organized_crime/recall": 0.11434511434511435, "eval_terrorism,organized_crime/threshold": 0.5, "eval_violence,aiding_and_abetting,incitement/accuracy": 0.9210333699304655, "eval_violence,aiding_and_abetting,incitement/f1": 0.8544980842911878, "eval_violence,aiding_and_abetting,incitement/fpr": 0.061057975613072704, "eval_violence,aiding_and_abetting,incitement/precision": 0.8380328263091444, "eval_violence,aiding_and_abetting,incitement/recall": 0.871623311655828, "eval_violence,aiding_and_abetting,incitement/threshold": 0.5, "step": 16912 }, { "epoch": 2.001419446415898, "grad_norm": 0.35401254892349243, "learning_rate": 4.856667464687575e-06, "loss": 0.079, "step": 16920 }, { "epoch": 2.002602318429146, "grad_norm": 0.4602322578430176, "learning_rate": 4.855949245870242e-06, "loss": 0.0805, "step": 16930 }, { "epoch": 2.003785190442394, "grad_norm": 0.3821829557418823, "learning_rate": 4.85523102705291e-06, "loss": 0.0788, "step": 16940 }, { "epoch": 2.0049680624556423, "grad_norm": 0.47090932726860046, "learning_rate": 4.854512808235576e-06, "loss": 0.0741, "step": 16950 }, { "epoch": 2.0061509344688906, "grad_norm": 0.4705190360546112, "learning_rate": 4.8537945894182435e-06, "loss": 0.0753, "step": 16960 }, { "epoch": 2.0073338064821384, "grad_norm": 0.4664526581764221, "learning_rate": 4.8530763706009095e-06, "loss": 0.0824, "step": 16970 }, { "epoch": 2.0085166784953867, "grad_norm": 0.49209272861480713, "learning_rate": 4.852358151783577e-06, "loss": 0.0816, "step": 16980 }, { "epoch": 2.009699550508635, "grad_norm": 0.43480074405670166, "learning_rate": 4.851639932966243e-06, "loss": 0.0803, "step": 16990 }, { "epoch": 2.0108824225218833, "grad_norm": 0.42851176857948303, "learning_rate": 4.850921714148911e-06, "loss": 0.0762, "step": 17000 }, { "epoch": 2.012065294535131, "grad_norm": 0.48568588495254517, "learning_rate": 4.850203495331578e-06, "loss": 0.0777, "step": 17010 }, { "epoch": 2.0132481665483795, "grad_norm": 0.44218912720680237, "learning_rate": 4.849485276514245e-06, "loss": 0.0789, "step": 17020 }, { "epoch": 2.0144310385616278, "grad_norm": 0.5948452353477478, "learning_rate": 4.848767057696912e-06, "loss": 0.0822, "step": 17030 }, { "epoch": 2.0156139105748756, "grad_norm": 0.38376766443252563, "learning_rate": 4.848048838879579e-06, "loss": 0.0761, "step": 17040 }, { "epoch": 2.016796782588124, "grad_norm": 0.4572240710258484, "learning_rate": 4.847330620062246e-06, "loss": 0.0777, "step": 17050 }, { "epoch": 2.017979654601372, "grad_norm": 0.6041489243507385, "learning_rate": 4.846612401244913e-06, "loss": 0.0844, "step": 17060 }, { "epoch": 2.0191625266146205, "grad_norm": 0.5754886865615845, "learning_rate": 4.84589418242758e-06, "loss": 0.0812, "step": 17070 }, { "epoch": 2.0203453986278683, "grad_norm": 0.3614943027496338, "learning_rate": 4.8451759636102465e-06, "loss": 0.0734, "step": 17080 }, { "epoch": 2.0215282706411166, "grad_norm": 0.5752710103988647, "learning_rate": 4.8444577447929135e-06, "loss": 0.0804, "step": 17090 }, { "epoch": 2.022711142654365, "grad_norm": 0.5679340958595276, "learning_rate": 4.84373952597558e-06, "loss": 0.0757, "step": 17100 }, { "epoch": 2.0238940146676128, "grad_norm": 0.45443299412727356, "learning_rate": 4.843021307158247e-06, "loss": 0.0784, "step": 17110 }, { "epoch": 2.025076886680861, "grad_norm": 0.3938104212284088, "learning_rate": 4.842303088340915e-06, "loss": 0.0769, "step": 17120 }, { "epoch": 2.0262597586941093, "grad_norm": 0.5060205459594727, "learning_rate": 4.841584869523581e-06, "loss": 0.0889, "step": 17130 }, { "epoch": 2.0274426307073576, "grad_norm": 0.5020876526832581, "learning_rate": 4.840866650706249e-06, "loss": 0.0744, "step": 17140 }, { "epoch": 2.0286255027206055, "grad_norm": 0.4779887795448303, "learning_rate": 4.840148431888915e-06, "loss": 0.0773, "step": 17150 }, { "epoch": 2.029808374733854, "grad_norm": 0.49128130078315735, "learning_rate": 4.839430213071583e-06, "loss": 0.0806, "step": 17160 }, { "epoch": 2.030991246747102, "grad_norm": 0.4133373200893402, "learning_rate": 4.83871199425425e-06, "loss": 0.0771, "step": 17170 }, { "epoch": 2.03217411876035, "grad_norm": 0.4817720353603363, "learning_rate": 4.837993775436917e-06, "loss": 0.0789, "step": 17180 }, { "epoch": 2.033356990773598, "grad_norm": 0.6214243173599243, "learning_rate": 4.8372755566195835e-06, "loss": 0.0771, "step": 17190 }, { "epoch": 2.0345398627868465, "grad_norm": 0.5657630562782288, "learning_rate": 4.8365573378022504e-06, "loss": 0.0827, "step": 17200 }, { "epoch": 2.035722734800095, "grad_norm": 0.5018747448921204, "learning_rate": 4.835839118984918e-06, "loss": 0.0883, "step": 17210 }, { "epoch": 2.0369056068133427, "grad_norm": 0.5206968784332275, "learning_rate": 4.835120900167584e-06, "loss": 0.0816, "step": 17220 }, { "epoch": 2.038088478826591, "grad_norm": 0.4710036516189575, "learning_rate": 4.834402681350252e-06, "loss": 0.0785, "step": 17230 }, { "epoch": 2.0392713508398392, "grad_norm": 0.5315642952919006, "learning_rate": 4.833684462532918e-06, "loss": 0.0808, "step": 17240 }, { "epoch": 2.040454222853087, "grad_norm": 0.47044581174850464, "learning_rate": 4.832966243715586e-06, "loss": 0.079, "step": 17250 }, { "epoch": 2.0416370948663354, "grad_norm": 0.4744825065135956, "learning_rate": 4.832248024898252e-06, "loss": 0.0829, "step": 17260 }, { "epoch": 2.0428199668795837, "grad_norm": 0.4559400975704193, "learning_rate": 4.83152980608092e-06, "loss": 0.0829, "step": 17270 }, { "epoch": 2.044002838892832, "grad_norm": 0.738996148109436, "learning_rate": 4.830811587263587e-06, "loss": 0.0755, "step": 17280 }, { "epoch": 2.04518571090608, "grad_norm": 0.43474069237709045, "learning_rate": 4.830093368446254e-06, "loss": 0.0773, "step": 17290 }, { "epoch": 2.046368582919328, "grad_norm": 0.381229430437088, "learning_rate": 4.8293751496289205e-06, "loss": 0.0742, "step": 17300 }, { "epoch": 2.0475514549325764, "grad_norm": 0.46511006355285645, "learning_rate": 4.8286569308115874e-06, "loss": 0.0811, "step": 17310 }, { "epoch": 2.0487343269458242, "grad_norm": 0.33987414836883545, "learning_rate": 4.827938711994254e-06, "loss": 0.079, "step": 17320 }, { "epoch": 2.0499171989590725, "grad_norm": 0.38452577590942383, "learning_rate": 4.827220493176921e-06, "loss": 0.0789, "step": 17330 }, { "epoch": 2.051100070972321, "grad_norm": 0.43608465790748596, "learning_rate": 4.826502274359588e-06, "loss": 0.0766, "step": 17340 }, { "epoch": 2.052282942985569, "grad_norm": 0.4135875999927521, "learning_rate": 4.825784055542255e-06, "loss": 0.0868, "step": 17350 }, { "epoch": 2.053465814998817, "grad_norm": 0.5210779905319214, "learning_rate": 4.825065836724922e-06, "loss": 0.0854, "step": 17360 }, { "epoch": 2.0546486870120653, "grad_norm": 0.4482611119747162, "learning_rate": 4.824347617907589e-06, "loss": 0.0826, "step": 17370 }, { "epoch": 2.0558315590253136, "grad_norm": 0.38253816962242126, "learning_rate": 4.823629399090256e-06, "loss": 0.0749, "step": 17380 }, { "epoch": 2.057014431038562, "grad_norm": 0.6800917387008667, "learning_rate": 4.822911180272924e-06, "loss": 0.082, "step": 17390 }, { "epoch": 2.0581973030518097, "grad_norm": 0.5153548121452332, "learning_rate": 4.82219296145559e-06, "loss": 0.087, "step": 17400 }, { "epoch": 2.059380175065058, "grad_norm": 0.4851289689540863, "learning_rate": 4.8214747426382575e-06, "loss": 0.0748, "step": 17410 }, { "epoch": 2.0605630470783063, "grad_norm": 0.4626041650772095, "learning_rate": 4.820756523820924e-06, "loss": 0.087, "step": 17420 }, { "epoch": 2.061745919091554, "grad_norm": 0.44458046555519104, "learning_rate": 4.820038305003591e-06, "loss": 0.0847, "step": 17430 }, { "epoch": 2.0629287911048024, "grad_norm": 0.45775315165519714, "learning_rate": 4.819320086186258e-06, "loss": 0.0806, "step": 17440 }, { "epoch": 2.0641116631180507, "grad_norm": 0.5234962701797485, "learning_rate": 4.818601867368925e-06, "loss": 0.0818, "step": 17450 }, { "epoch": 2.065294535131299, "grad_norm": 0.5040955543518066, "learning_rate": 4.817883648551592e-06, "loss": 0.0784, "step": 17460 }, { "epoch": 2.066477407144547, "grad_norm": 0.4097939133644104, "learning_rate": 4.817165429734259e-06, "loss": 0.0723, "step": 17470 }, { "epoch": 2.067660279157795, "grad_norm": 0.6047165989875793, "learning_rate": 4.816447210916927e-06, "loss": 0.0827, "step": 17480 }, { "epoch": 2.0688431511710434, "grad_norm": 0.392147034406662, "learning_rate": 4.815728992099593e-06, "loss": 0.0705, "step": 17490 }, { "epoch": 2.0700260231842913, "grad_norm": 0.4921884834766388, "learning_rate": 4.815010773282261e-06, "loss": 0.0786, "step": 17500 }, { "epoch": 2.0712088951975396, "grad_norm": 0.4833137094974518, "learning_rate": 4.814292554464927e-06, "loss": 0.079, "step": 17510 }, { "epoch": 2.072391767210788, "grad_norm": 0.5232948660850525, "learning_rate": 4.8135743356475945e-06, "loss": 0.0828, "step": 17520 }, { "epoch": 2.073574639224036, "grad_norm": 0.6484633088111877, "learning_rate": 4.812856116830261e-06, "loss": 0.0748, "step": 17530 }, { "epoch": 2.074757511237284, "grad_norm": 0.6261690258979797, "learning_rate": 4.812137898012928e-06, "loss": 0.0836, "step": 17540 }, { "epoch": 2.0759403832505323, "grad_norm": 0.43177565932273865, "learning_rate": 4.811419679195595e-06, "loss": 0.0795, "step": 17550 }, { "epoch": 2.0771232552637806, "grad_norm": 0.39646923542022705, "learning_rate": 4.810701460378262e-06, "loss": 0.0766, "step": 17560 }, { "epoch": 2.0783061272770285, "grad_norm": 0.4741382598876953, "learning_rate": 4.809983241560929e-06, "loss": 0.0682, "step": 17570 }, { "epoch": 2.0794889992902768, "grad_norm": 0.6712583303451538, "learning_rate": 4.809265022743596e-06, "loss": 0.0907, "step": 17580 }, { "epoch": 2.080671871303525, "grad_norm": 0.46442413330078125, "learning_rate": 4.808546803926263e-06, "loss": 0.081, "step": 17590 }, { "epoch": 2.0818547433167733, "grad_norm": 0.5724080801010132, "learning_rate": 4.80782858510893e-06, "loss": 0.083, "step": 17600 }, { "epoch": 2.083037615330021, "grad_norm": 0.5272369384765625, "learning_rate": 4.807110366291597e-06, "loss": 0.0873, "step": 17610 }, { "epoch": 2.0842204873432695, "grad_norm": 0.4198405146598816, "learning_rate": 4.806392147474264e-06, "loss": 0.0846, "step": 17620 }, { "epoch": 2.0854033593565178, "grad_norm": 0.4960279166698456, "learning_rate": 4.805673928656931e-06, "loss": 0.0867, "step": 17630 }, { "epoch": 2.0865862313697656, "grad_norm": 0.4299582242965698, "learning_rate": 4.804955709839598e-06, "loss": 0.0753, "step": 17640 }, { "epoch": 2.087769103383014, "grad_norm": 0.40097859501838684, "learning_rate": 4.8042374910222645e-06, "loss": 0.0787, "step": 17650 }, { "epoch": 2.088951975396262, "grad_norm": 0.4837346076965332, "learning_rate": 4.803519272204932e-06, "loss": 0.0805, "step": 17660 }, { "epoch": 2.0901348474095105, "grad_norm": 0.661058783531189, "learning_rate": 4.802801053387598e-06, "loss": 0.0916, "step": 17670 }, { "epoch": 2.0913177194227583, "grad_norm": 0.5860831141471863, "learning_rate": 4.802082834570266e-06, "loss": 0.0776, "step": 17680 }, { "epoch": 2.0925005914360066, "grad_norm": 0.4726531505584717, "learning_rate": 4.801364615752933e-06, "loss": 0.0803, "step": 17690 }, { "epoch": 2.093683463449255, "grad_norm": 0.4437042474746704, "learning_rate": 4.8006463969356e-06, "loss": 0.0804, "step": 17700 }, { "epoch": 2.094866335462503, "grad_norm": 0.4859260618686676, "learning_rate": 4.799928178118267e-06, "loss": 0.0834, "step": 17710 }, { "epoch": 2.096049207475751, "grad_norm": 0.40652230381965637, "learning_rate": 4.799209959300934e-06, "loss": 0.0773, "step": 17720 }, { "epoch": 2.0972320794889994, "grad_norm": 0.41018712520599365, "learning_rate": 4.798491740483601e-06, "loss": 0.0854, "step": 17730 }, { "epoch": 2.0984149515022477, "grad_norm": 0.3930619955062866, "learning_rate": 4.797773521666268e-06, "loss": 0.0741, "step": 17740 }, { "epoch": 2.0995978235154955, "grad_norm": 0.4428681433200836, "learning_rate": 4.797055302848935e-06, "loss": 0.0804, "step": 17750 }, { "epoch": 2.100780695528744, "grad_norm": 0.43586763739585876, "learning_rate": 4.7963370840316015e-06, "loss": 0.0831, "step": 17760 }, { "epoch": 2.101963567541992, "grad_norm": 0.41135454177856445, "learning_rate": 4.795618865214269e-06, "loss": 0.083, "step": 17770 }, { "epoch": 2.10314643955524, "grad_norm": 0.48118534684181213, "learning_rate": 4.794900646396935e-06, "loss": 0.085, "step": 17780 }, { "epoch": 2.1043293115684882, "grad_norm": 0.47915661334991455, "learning_rate": 4.794182427579603e-06, "loss": 0.0747, "step": 17790 }, { "epoch": 2.1055121835817365, "grad_norm": 0.5813650488853455, "learning_rate": 4.793464208762269e-06, "loss": 0.0888, "step": 17800 }, { "epoch": 2.106695055594985, "grad_norm": 0.47547850012779236, "learning_rate": 4.792745989944937e-06, "loss": 0.0748, "step": 17810 }, { "epoch": 2.1078779276082327, "grad_norm": 0.4403519630432129, "learning_rate": 4.792027771127604e-06, "loss": 0.0783, "step": 17820 }, { "epoch": 2.109060799621481, "grad_norm": 0.5593721866607666, "learning_rate": 4.791309552310271e-06, "loss": 0.0816, "step": 17830 }, { "epoch": 2.1102436716347293, "grad_norm": 0.42318272590637207, "learning_rate": 4.790591333492938e-06, "loss": 0.0798, "step": 17840 }, { "epoch": 2.111426543647977, "grad_norm": 0.41688433289527893, "learning_rate": 4.789873114675605e-06, "loss": 0.0721, "step": 17850 }, { "epoch": 2.1126094156612254, "grad_norm": 0.5438916087150574, "learning_rate": 4.789154895858272e-06, "loss": 0.0721, "step": 17860 }, { "epoch": 2.1137922876744737, "grad_norm": 0.44959700107574463, "learning_rate": 4.7884366770409385e-06, "loss": 0.0706, "step": 17870 }, { "epoch": 2.114975159687722, "grad_norm": 0.5929701328277588, "learning_rate": 4.7877184582236054e-06, "loss": 0.0756, "step": 17880 }, { "epoch": 2.11615803170097, "grad_norm": 0.47911667823791504, "learning_rate": 4.787000239406272e-06, "loss": 0.0816, "step": 17890 }, { "epoch": 2.117340903714218, "grad_norm": 0.4694361090660095, "learning_rate": 4.786282020588939e-06, "loss": 0.0871, "step": 17900 }, { "epoch": 2.1185237757274664, "grad_norm": 0.38303107023239136, "learning_rate": 4.785563801771606e-06, "loss": 0.0743, "step": 17910 }, { "epoch": 2.1197066477407143, "grad_norm": 0.5508629679679871, "learning_rate": 4.784845582954273e-06, "loss": 0.0774, "step": 17920 }, { "epoch": 2.1208895197539626, "grad_norm": 0.5796292424201965, "learning_rate": 4.784127364136941e-06, "loss": 0.0788, "step": 17930 }, { "epoch": 2.122072391767211, "grad_norm": 0.5783708691596985, "learning_rate": 4.783409145319608e-06, "loss": 0.0817, "step": 17940 }, { "epoch": 2.123255263780459, "grad_norm": 0.5653509497642517, "learning_rate": 4.782690926502275e-06, "loss": 0.0788, "step": 17950 }, { "epoch": 2.124438135793707, "grad_norm": 0.45692843198776245, "learning_rate": 4.781972707684942e-06, "loss": 0.0808, "step": 17960 }, { "epoch": 2.1256210078069553, "grad_norm": 0.5500239729881287, "learning_rate": 4.781254488867609e-06, "loss": 0.0817, "step": 17970 }, { "epoch": 2.1268038798202036, "grad_norm": 0.5230019092559814, "learning_rate": 4.7805362700502755e-06, "loss": 0.0774, "step": 17980 }, { "epoch": 2.1279867518334514, "grad_norm": 0.5791219472885132, "learning_rate": 4.7798180512329424e-06, "loss": 0.084, "step": 17990 }, { "epoch": 2.1291696238466997, "grad_norm": 0.5067125558853149, "learning_rate": 4.779099832415609e-06, "loss": 0.0775, "step": 18000 }, { "epoch": 2.130352495859948, "grad_norm": 0.4571417570114136, "learning_rate": 4.778381613598276e-06, "loss": 0.0926, "step": 18010 }, { "epoch": 2.1315353678731963, "grad_norm": 0.47809237241744995, "learning_rate": 4.777663394780943e-06, "loss": 0.0788, "step": 18020 }, { "epoch": 2.132718239886444, "grad_norm": 0.3685837686061859, "learning_rate": 4.77694517596361e-06, "loss": 0.0848, "step": 18030 }, { "epoch": 2.1339011118996924, "grad_norm": 0.4592578113079071, "learning_rate": 4.776226957146278e-06, "loss": 0.0831, "step": 18040 }, { "epoch": 2.1350839839129407, "grad_norm": 0.40751913189888, "learning_rate": 4.775508738328944e-06, "loss": 0.0748, "step": 18050 }, { "epoch": 2.1362668559261886, "grad_norm": 0.4039672315120697, "learning_rate": 4.774790519511612e-06, "loss": 0.0752, "step": 18060 }, { "epoch": 2.137449727939437, "grad_norm": 0.509703516960144, "learning_rate": 4.774072300694278e-06, "loss": 0.0792, "step": 18070 }, { "epoch": 2.138632599952685, "grad_norm": 0.5621660947799683, "learning_rate": 4.773354081876946e-06, "loss": 0.0825, "step": 18080 }, { "epoch": 2.1398154719659335, "grad_norm": 0.4130712151527405, "learning_rate": 4.772635863059612e-06, "loss": 0.0821, "step": 18090 }, { "epoch": 2.1409983439791813, "grad_norm": 0.5065471529960632, "learning_rate": 4.7719176442422794e-06, "loss": 0.0819, "step": 18100 }, { "epoch": 2.1421812159924296, "grad_norm": 0.3958010673522949, "learning_rate": 4.771199425424946e-06, "loss": 0.0793, "step": 18110 }, { "epoch": 2.143364088005678, "grad_norm": 0.3825806975364685, "learning_rate": 4.770481206607613e-06, "loss": 0.0824, "step": 18120 }, { "epoch": 2.1445469600189258, "grad_norm": 0.5503684878349304, "learning_rate": 4.76976298779028e-06, "loss": 0.0834, "step": 18130 }, { "epoch": 2.145729832032174, "grad_norm": 0.4662480056285858, "learning_rate": 4.769044768972947e-06, "loss": 0.0829, "step": 18140 }, { "epoch": 2.1469127040454223, "grad_norm": 0.35405123233795166, "learning_rate": 4.768326550155614e-06, "loss": 0.078, "step": 18150 }, { "epoch": 2.1480955760586706, "grad_norm": 0.49312737584114075, "learning_rate": 4.767608331338281e-06, "loss": 0.0802, "step": 18160 }, { "epoch": 2.1492784480719185, "grad_norm": 0.5160984992980957, "learning_rate": 4.766890112520948e-06, "loss": 0.083, "step": 18170 }, { "epoch": 2.1504613200851668, "grad_norm": 0.36266300082206726, "learning_rate": 4.766171893703615e-06, "loss": 0.0795, "step": 18180 }, { "epoch": 2.151644192098415, "grad_norm": 0.42601293325424194, "learning_rate": 4.765453674886283e-06, "loss": 0.0845, "step": 18190 }, { "epoch": 2.152827064111663, "grad_norm": 0.6074909567832947, "learning_rate": 4.7647354560689495e-06, "loss": 0.0855, "step": 18200 }, { "epoch": 2.154009936124911, "grad_norm": 0.4359244108200073, "learning_rate": 4.7640172372516164e-06, "loss": 0.085, "step": 18210 }, { "epoch": 2.1551928081381595, "grad_norm": 0.6269486546516418, "learning_rate": 4.763299018434283e-06, "loss": 0.0737, "step": 18220 }, { "epoch": 2.156375680151408, "grad_norm": 0.4602348804473877, "learning_rate": 4.76258079961695e-06, "loss": 0.0835, "step": 18230 }, { "epoch": 2.1575585521646556, "grad_norm": 0.5415191650390625, "learning_rate": 4.761862580799617e-06, "loss": 0.0806, "step": 18240 }, { "epoch": 2.158741424177904, "grad_norm": 0.4152541160583496, "learning_rate": 4.761144361982284e-06, "loss": 0.083, "step": 18250 }, { "epoch": 2.1599242961911522, "grad_norm": 0.4296933114528656, "learning_rate": 4.760426143164951e-06, "loss": 0.0833, "step": 18260 }, { "epoch": 2.1611071682044, "grad_norm": 0.428305447101593, "learning_rate": 4.759707924347618e-06, "loss": 0.0827, "step": 18270 }, { "epoch": 2.1622900402176484, "grad_norm": 0.46615174412727356, "learning_rate": 4.758989705530285e-06, "loss": 0.0755, "step": 18280 }, { "epoch": 2.1634729122308967, "grad_norm": 0.48684969544410706, "learning_rate": 4.758271486712952e-06, "loss": 0.0831, "step": 18290 }, { "epoch": 2.164655784244145, "grad_norm": 0.48977038264274597, "learning_rate": 4.757553267895619e-06, "loss": 0.077, "step": 18300 }, { "epoch": 2.165838656257393, "grad_norm": 0.4179462194442749, "learning_rate": 4.7568350490782865e-06, "loss": 0.0822, "step": 18310 }, { "epoch": 2.167021528270641, "grad_norm": 0.41017434000968933, "learning_rate": 4.756116830260953e-06, "loss": 0.0848, "step": 18320 }, { "epoch": 2.1682044002838894, "grad_norm": 0.4614346921443939, "learning_rate": 4.75539861144362e-06, "loss": 0.0857, "step": 18330 }, { "epoch": 2.1693872722971372, "grad_norm": 0.967361330986023, "learning_rate": 4.754680392626286e-06, "loss": 0.0791, "step": 18340 }, { "epoch": 2.1705701443103855, "grad_norm": 0.7197526097297668, "learning_rate": 4.753962173808954e-06, "loss": 0.089, "step": 18350 }, { "epoch": 2.171753016323634, "grad_norm": 0.4860106110572815, "learning_rate": 4.75324395499162e-06, "loss": 0.0831, "step": 18360 }, { "epoch": 2.172935888336882, "grad_norm": 0.40545061230659485, "learning_rate": 4.752525736174288e-06, "loss": 0.0775, "step": 18370 }, { "epoch": 2.17411876035013, "grad_norm": 0.4366656541824341, "learning_rate": 4.751807517356955e-06, "loss": 0.0717, "step": 18380 }, { "epoch": 2.1753016323633783, "grad_norm": 0.46056175231933594, "learning_rate": 4.751089298539622e-06, "loss": 0.0757, "step": 18390 }, { "epoch": 2.1764845043766265, "grad_norm": 0.4366072416305542, "learning_rate": 4.750371079722289e-06, "loss": 0.0831, "step": 18400 }, { "epoch": 2.1776673763898744, "grad_norm": 0.4345017671585083, "learning_rate": 4.749652860904956e-06, "loss": 0.0816, "step": 18410 }, { "epoch": 2.1788502484031227, "grad_norm": 0.4769870936870575, "learning_rate": 4.748934642087623e-06, "loss": 0.0843, "step": 18420 }, { "epoch": 2.180033120416371, "grad_norm": 0.47804349660873413, "learning_rate": 4.7482164232702896e-06, "loss": 0.0795, "step": 18430 }, { "epoch": 2.1812159924296193, "grad_norm": 0.5179824829101562, "learning_rate": 4.7474982044529565e-06, "loss": 0.0866, "step": 18440 }, { "epoch": 2.182398864442867, "grad_norm": 0.44717586040496826, "learning_rate": 4.746779985635623e-06, "loss": 0.0786, "step": 18450 }, { "epoch": 2.1835817364561154, "grad_norm": 0.3781997561454773, "learning_rate": 4.746061766818291e-06, "loss": 0.0766, "step": 18460 }, { "epoch": 2.1847646084693637, "grad_norm": 0.5003459453582764, "learning_rate": 4.745343548000958e-06, "loss": 0.087, "step": 18470 }, { "epoch": 2.1859474804826116, "grad_norm": 0.5373108983039856, "learning_rate": 4.744625329183625e-06, "loss": 0.0875, "step": 18480 }, { "epoch": 2.18713035249586, "grad_norm": 0.42033499479293823, "learning_rate": 4.743907110366292e-06, "loss": 0.0894, "step": 18490 }, { "epoch": 2.188313224509108, "grad_norm": 0.4286472797393799, "learning_rate": 4.743188891548959e-06, "loss": 0.0833, "step": 18500 }, { "epoch": 2.1894960965223564, "grad_norm": 0.49333393573760986, "learning_rate": 4.742470672731626e-06, "loss": 0.076, "step": 18510 }, { "epoch": 2.1906789685356043, "grad_norm": 0.724864661693573, "learning_rate": 4.741752453914293e-06, "loss": 0.0713, "step": 18520 }, { "epoch": 2.1918618405488526, "grad_norm": 0.5704152584075928, "learning_rate": 4.74103423509696e-06, "loss": 0.0822, "step": 18530 }, { "epoch": 2.193044712562101, "grad_norm": 0.5341521501541138, "learning_rate": 4.7403160162796266e-06, "loss": 0.0799, "step": 18540 }, { "epoch": 2.1942275845753487, "grad_norm": 0.5238526463508606, "learning_rate": 4.7395977974622935e-06, "loss": 0.0786, "step": 18550 }, { "epoch": 2.195410456588597, "grad_norm": 0.5870595574378967, "learning_rate": 4.73887957864496e-06, "loss": 0.0922, "step": 18560 }, { "epoch": 2.1965933286018453, "grad_norm": 0.5622965097427368, "learning_rate": 4.738161359827627e-06, "loss": 0.0836, "step": 18570 }, { "epoch": 2.1977762006150936, "grad_norm": 0.5081386566162109, "learning_rate": 4.737443141010295e-06, "loss": 0.084, "step": 18580 }, { "epoch": 2.1989590726283414, "grad_norm": 0.47124356031417847, "learning_rate": 4.736724922192961e-06, "loss": 0.0787, "step": 18590 }, { "epoch": 2.2001419446415897, "grad_norm": 0.5208353996276855, "learning_rate": 4.736006703375629e-06, "loss": 0.0852, "step": 18600 }, { "epoch": 2.201324816654838, "grad_norm": 0.4776752293109894, "learning_rate": 4.735288484558295e-06, "loss": 0.0724, "step": 18610 }, { "epoch": 2.202507688668086, "grad_norm": 0.4173198938369751, "learning_rate": 4.734570265740963e-06, "loss": 0.0754, "step": 18620 }, { "epoch": 2.203690560681334, "grad_norm": 0.3915116786956787, "learning_rate": 4.733852046923629e-06, "loss": 0.081, "step": 18630 }, { "epoch": 2.2048734326945825, "grad_norm": 0.4634286165237427, "learning_rate": 4.733133828106297e-06, "loss": 0.0832, "step": 18640 }, { "epoch": 2.2060563047078308, "grad_norm": 0.46147555112838745, "learning_rate": 4.7324156092889636e-06, "loss": 0.0822, "step": 18650 }, { "epoch": 2.2072391767210786, "grad_norm": 0.44262808561325073, "learning_rate": 4.7316973904716305e-06, "loss": 0.0787, "step": 18660 }, { "epoch": 2.208422048734327, "grad_norm": 0.4654567837715149, "learning_rate": 4.730979171654297e-06, "loss": 0.085, "step": 18670 }, { "epoch": 2.209604920747575, "grad_norm": 0.8238278031349182, "learning_rate": 4.730260952836964e-06, "loss": 0.0879, "step": 18680 }, { "epoch": 2.210787792760823, "grad_norm": 0.48342791199684143, "learning_rate": 4.729542734019631e-06, "loss": 0.0785, "step": 18690 }, { "epoch": 2.2119706647740713, "grad_norm": 0.4843010902404785, "learning_rate": 4.728824515202298e-06, "loss": 0.0826, "step": 18700 }, { "epoch": 2.2131535367873196, "grad_norm": 0.43217194080352783, "learning_rate": 4.728106296384966e-06, "loss": 0.0799, "step": 18710 }, { "epoch": 2.214336408800568, "grad_norm": 0.5034468770027161, "learning_rate": 4.727388077567632e-06, "loss": 0.0811, "step": 18720 }, { "epoch": 2.2155192808138158, "grad_norm": 0.386362224817276, "learning_rate": 4.7266698587503e-06, "loss": 0.0712, "step": 18730 }, { "epoch": 2.216702152827064, "grad_norm": 0.39769935607910156, "learning_rate": 4.725951639932967e-06, "loss": 0.0867, "step": 18740 }, { "epoch": 2.2178850248403124, "grad_norm": 0.45898640155792236, "learning_rate": 4.725233421115634e-06, "loss": 0.0846, "step": 18750 }, { "epoch": 2.2190678968535607, "grad_norm": 0.4162012040615082, "learning_rate": 4.7245152022983006e-06, "loss": 0.0806, "step": 18760 }, { "epoch": 2.2202507688668085, "grad_norm": 0.5654481053352356, "learning_rate": 4.7237969834809675e-06, "loss": 0.0749, "step": 18770 }, { "epoch": 2.221433640880057, "grad_norm": 0.4079549014568329, "learning_rate": 4.723078764663634e-06, "loss": 0.0781, "step": 18780 }, { "epoch": 2.222616512893305, "grad_norm": 0.45339158177375793, "learning_rate": 4.722360545846301e-06, "loss": 0.0846, "step": 18790 }, { "epoch": 2.223799384906553, "grad_norm": 0.4317767322063446, "learning_rate": 4.721642327028968e-06, "loss": 0.0866, "step": 18800 }, { "epoch": 2.2249822569198012, "grad_norm": 0.5747681260108948, "learning_rate": 4.720924108211635e-06, "loss": 0.0814, "step": 18810 }, { "epoch": 2.2261651289330495, "grad_norm": 0.6248317956924438, "learning_rate": 4.720205889394302e-06, "loss": 0.0792, "step": 18820 }, { "epoch": 2.227348000946298, "grad_norm": 0.600416898727417, "learning_rate": 4.719487670576969e-06, "loss": 0.0756, "step": 18830 }, { "epoch": 2.2285308729595457, "grad_norm": 0.5021460056304932, "learning_rate": 4.718769451759636e-06, "loss": 0.0686, "step": 18840 }, { "epoch": 2.229713744972794, "grad_norm": 0.4853549599647522, "learning_rate": 4.718051232942304e-06, "loss": 0.0849, "step": 18850 }, { "epoch": 2.2308966169860422, "grad_norm": 0.4909280240535736, "learning_rate": 4.71733301412497e-06, "loss": 0.0826, "step": 18860 }, { "epoch": 2.23207948899929, "grad_norm": 0.4186185300350189, "learning_rate": 4.7166147953076376e-06, "loss": 0.0774, "step": 18870 }, { "epoch": 2.2332623610125384, "grad_norm": 0.37559664249420166, "learning_rate": 4.715896576490304e-06, "loss": 0.0725, "step": 18880 }, { "epoch": 2.2344452330257867, "grad_norm": 0.4028061032295227, "learning_rate": 4.715178357672971e-06, "loss": 0.0812, "step": 18890 }, { "epoch": 2.235628105039035, "grad_norm": 0.5105864405632019, "learning_rate": 4.7144601388556375e-06, "loss": 0.0833, "step": 18900 }, { "epoch": 2.236810977052283, "grad_norm": 0.49329373240470886, "learning_rate": 4.713741920038305e-06, "loss": 0.0798, "step": 18910 }, { "epoch": 2.237993849065531, "grad_norm": 0.7083413004875183, "learning_rate": 4.713023701220972e-06, "loss": 0.0967, "step": 18920 }, { "epoch": 2.2391767210787794, "grad_norm": 0.5080183148384094, "learning_rate": 4.712305482403639e-06, "loss": 0.0782, "step": 18930 }, { "epoch": 2.2403595930920273, "grad_norm": 0.4800988733768463, "learning_rate": 4.711587263586306e-06, "loss": 0.0896, "step": 18940 }, { "epoch": 2.2415424651052756, "grad_norm": 0.4371141493320465, "learning_rate": 4.710869044768973e-06, "loss": 0.0797, "step": 18950 }, { "epoch": 2.242725337118524, "grad_norm": 0.5274348855018616, "learning_rate": 4.710150825951641e-06, "loss": 0.0766, "step": 18960 }, { "epoch": 2.243908209131772, "grad_norm": 0.5672169923782349, "learning_rate": 4.709432607134307e-06, "loss": 0.0794, "step": 18970 }, { "epoch": 2.24509108114502, "grad_norm": 0.48892608284950256, "learning_rate": 4.7087143883169746e-06, "loss": 0.0833, "step": 18980 }, { "epoch": 2.2462739531582683, "grad_norm": 0.43957632780075073, "learning_rate": 4.707996169499641e-06, "loss": 0.0785, "step": 18990 }, { "epoch": 2.2474568251715166, "grad_norm": 0.4782010614871979, "learning_rate": 4.707277950682308e-06, "loss": 0.086, "step": 19000 }, { "epoch": 2.2486396971847644, "grad_norm": 0.8844341039657593, "learning_rate": 4.7065597318649745e-06, "loss": 0.0894, "step": 19010 }, { "epoch": 2.2498225691980127, "grad_norm": 0.479496568441391, "learning_rate": 4.705841513047642e-06, "loss": 0.085, "step": 19020 }, { "epoch": 2.250532292405962, "eval_accuracy": 0.6862794024686429, "eval_animal_abuse/accuracy": 0.994893036563862, "eval_animal_abuse/f1": 0.7717472118959108, "eval_animal_abuse/fpr": 0.002322215865109544, "eval_animal_abuse/precision": 0.7899543378995434, "eval_animal_abuse/recall": 0.7543604651162791, "eval_animal_abuse/threshold": 0.5, "eval_child_abuse/accuracy": 0.9964400971487507, "eval_child_abuse/f1": 0.6433333333333333, "eval_child_abuse/fpr": 0.0012378514912764905, "eval_child_abuse/precision": 0.7228464419475655, "eval_child_abuse/recall": 0.5795795795795796, "eval_child_abuse/threshold": 0.5, "eval_controversial_topics,politics/accuracy": 0.9713211564693749, "eval_controversial_topics,politics/f1": 0.47566909975669097, "eval_controversial_topics,politics/fpr": 0.011394838001098277, "eval_controversial_topics,politics/precision": 0.5408022130013831, "eval_controversial_topics,politics/recall": 0.4245385450597177, "eval_controversial_topics,politics/threshold": 0.5, "eval_discrimination,stereotype,injustice/accuracy": 0.9556842000199621, "eval_discrimination,stereotype,injustice/f1": 0.71875, "eval_discrimination,stereotype,injustice/fpr": 0.023276827990024174, "eval_discrimination,stereotype,injustice/precision": 0.7254901960784313, "eval_discrimination,stereotype,injustice/recall": 0.7121338912133891, "eval_discrimination,stereotype,injustice/threshold": 0.5, "eval_drug_abuse,weapons,banned_substance/accuracy": 0.9737997804172073, "eval_drug_abuse,weapons,banned_substance/f1": 0.7752247752247752, "eval_drug_abuse,weapons,banned_substance/fpr": 0.015953321111267776, "eval_drug_abuse,weapons,banned_substance/precision": 0.7500690417011875, "eval_drug_abuse,weapons,banned_substance/recall": 0.8021264028352038, "eval_drug_abuse,weapons,banned_substance/threshold": 0.5, "eval_financial_crime,property_crime,theft/accuracy": 0.9595102638320524, "eval_financial_crime,property_crime,theft/f1": 0.8015005708693524, "eval_financial_crime,property_crime,theft/fpr": 0.02758785913053088, "eval_financial_crime,property_crime,theft/precision": 0.7664950865699579, "eval_financial_crime,property_crime,theft/recall": 0.8398564347974705, "eval_financial_crime,property_crime,theft/threshold": 0.5, "eval_flagged/accuracy": 0.8526466380543634, "eval_flagged/aucpr": 0.9072967069830886, "eval_flagged/f1": 0.8651791421874524, "eval_flagged/fpr": 0.14360724599632396, "eval_flagged/precision": 0.8812749992248302, "eval_flagged/recall": 0.8496606977369884, "eval_hate_speech,offensive_language/accuracy": 0.9505938716438767, "eval_hate_speech,offensive_language/f1": 0.6968769136558481, "eval_hate_speech,offensive_language/fpr": 0.018271514708569307, "eval_hate_speech,offensive_language/precision": 0.7734481196193929, "eval_hate_speech,offensive_language/recall": 0.6341010401188707, "eval_hate_speech,offensive_language/threshold": 0.5, "eval_loss": 0.08401020616292953, "eval_macro_f1": 0.6373405155956436, "eval_macro_precision": 0.7125564267224097, "eval_macro_recall": 0.6124955542060372, "eval_micro_f1": 0.7515904909239036, "eval_micro_precision": 0.7744972267290495, "eval_micro_recall": 0.7299998221811264, "eval_misinformation_regarding_ethics,laws_and_safety/accuracy": 0.9878564061616263, "eval_misinformation_regarding_ethics,laws_and_safety/f1": 0.03183023872679045, "eval_misinformation_regarding_ethics,laws_and_safety/fpr": 0.00018523819948470068, "eval_misinformation_regarding_ethics,laws_and_safety/precision": 0.5217391304347826, "eval_misinformation_regarding_ethics,laws_and_safety/recall": 0.016415868673050615, "eval_misinformation_regarding_ethics,laws_and_safety/threshold": 0.5, "eval_non_violent_unethical_behavior/accuracy": 0.8864490800811791, "eval_non_violent_unethical_behavior/f1": 0.6808490742472414, "eval_non_violent_unethical_behavior/fpr": 0.044863809998339055, "eval_non_violent_unethical_behavior/precision": 0.7711289980936242, "eval_non_violent_unethical_behavior/recall": 0.6094927172275239, "eval_non_violent_unethical_behavior/threshold": 0.5, "eval_privacy_violation/accuracy": 0.9799048474564993, "eval_privacy_violation/f1": 0.8054750402576489, "eval_privacy_violation/fpr": 0.013001329880310749, "eval_privacy_violation/precision": 0.7709617755856967, "eval_privacy_violation/recall": 0.8432231962238705, "eval_privacy_violation/threshold": 0.5, "eval_runtime": 598.177, "eval_samples_per_second": 100.495, "eval_self_harm/accuracy": 0.9965565425691186, "eval_self_harm/f1": 0.7236315086782377, "eval_self_harm/fpr": 0.0011389521640091096, "eval_self_harm/precision": 0.799410029498525, "eval_self_harm/recall": 0.6609756097560976, "eval_self_harm/threshold": 0.5, "eval_sexually_explicit,adult_content/accuracy": 0.9838972618691153, "eval_sexually_explicit,adult_content/f1": 0.6461988304093568, "eval_sexually_explicit,adult_content/fpr": 0.006903369867216651, "eval_sexually_explicit,adult_content/precision": 0.6858029480217223, "eval_sexually_explicit,adult_content/recall": 0.6109191430545957, "eval_sexually_explicit,adult_content/threshold": 0.5, "eval_steps_per_second": 1.571, "eval_terrorism,organized_crime/accuracy": 0.9921815217752936, "eval_terrorism,organized_crime/f1": 0.29850746268656714, "eval_terrorism,organized_crime/fpr": 0.0014924622272902562, "eval_terrorism,organized_crime/precision": 0.5291005291005291, "eval_terrorism,organized_crime/recall": 0.2079002079002079, "eval_terrorism,organized_crime/threshold": 0.5, "eval_violence,aiding_and_abetting,incitement/accuracy": 0.9194863093455767, "eval_violence,aiding_and_abetting,incitement/f1": 0.8531731585972576, "eval_violence,aiding_and_abetting,incitement/fpr": 0.0659534925887311, "eval_violence,aiding_and_abetting,incitement/precision": 0.8285411265613952, "eval_violence,aiding_and_abetting,incitement/recall": 0.8793146573286643, "eval_violence,aiding_and_abetting,incitement/threshold": 0.5, "step": 19026 }, { "epoch": 2.251005441211261, "grad_norm": 0.48985597491264343, "learning_rate": 4.705123294230309e-06, "loss": 0.0803, "step": 19030 }, { "epoch": 2.2521883132245093, "grad_norm": 0.4446103870868683, "learning_rate": 4.704405075412976e-06, "loss": 0.0724, "step": 19040 }, { "epoch": 2.253371185237757, "grad_norm": 0.4318327009677887, "learning_rate": 4.703686856595643e-06, "loss": 0.0855, "step": 19050 }, { "epoch": 2.2545540572510054, "grad_norm": 0.4958570897579193, "learning_rate": 4.70296863777831e-06, "loss": 0.0865, "step": 19060 }, { "epoch": 2.2557369292642537, "grad_norm": 0.4559321701526642, "learning_rate": 4.702250418960977e-06, "loss": 0.0766, "step": 19070 }, { "epoch": 2.2569198012775016, "grad_norm": 0.6812647581100464, "learning_rate": 4.701532200143644e-06, "loss": 0.078, "step": 19080 }, { "epoch": 2.25810267329075, "grad_norm": 0.4464665651321411, "learning_rate": 4.700813981326311e-06, "loss": 0.0725, "step": 19090 }, { "epoch": 2.259285545303998, "grad_norm": 0.5123809576034546, "learning_rate": 4.700095762508978e-06, "loss": 0.0792, "step": 19100 }, { "epoch": 2.2604684173172465, "grad_norm": 0.47164186835289, "learning_rate": 4.6993775436916446e-06, "loss": 0.0736, "step": 19110 }, { "epoch": 2.2616512893304943, "grad_norm": 0.44315263628959656, "learning_rate": 4.698659324874312e-06, "loss": 0.0708, "step": 19120 }, { "epoch": 2.2628341613437426, "grad_norm": 0.4744740128517151, "learning_rate": 4.697941106056978e-06, "loss": 0.0748, "step": 19130 }, { "epoch": 2.264017033356991, "grad_norm": 0.4795951247215271, "learning_rate": 4.697222887239646e-06, "loss": 0.0763, "step": 19140 }, { "epoch": 2.265199905370239, "grad_norm": 0.49936389923095703, "learning_rate": 4.696504668422312e-06, "loss": 0.0837, "step": 19150 }, { "epoch": 2.266382777383487, "grad_norm": 0.5128158926963806, "learning_rate": 4.69578644960498e-06, "loss": 0.083, "step": 19160 }, { "epoch": 2.2675656493967353, "grad_norm": 0.3657524287700653, "learning_rate": 4.695068230787646e-06, "loss": 0.0855, "step": 19170 }, { "epoch": 2.2687485214099836, "grad_norm": 0.37343576550483704, "learning_rate": 4.694350011970314e-06, "loss": 0.0765, "step": 19180 }, { "epoch": 2.2699313934232315, "grad_norm": 0.4429990351200104, "learning_rate": 4.693631793152981e-06, "loss": 0.0897, "step": 19190 }, { "epoch": 2.2711142654364798, "grad_norm": 0.43846625089645386, "learning_rate": 4.692913574335648e-06, "loss": 0.0725, "step": 19200 }, { "epoch": 2.272297137449728, "grad_norm": 0.5168274641036987, "learning_rate": 4.692195355518315e-06, "loss": 0.0864, "step": 19210 }, { "epoch": 2.2734800094629763, "grad_norm": 0.5803168416023254, "learning_rate": 4.6914771367009816e-06, "loss": 0.0783, "step": 19220 }, { "epoch": 2.274662881476224, "grad_norm": 0.46903830766677856, "learning_rate": 4.690758917883649e-06, "loss": 0.0826, "step": 19230 }, { "epoch": 2.2758457534894725, "grad_norm": 0.7214665412902832, "learning_rate": 4.690040699066315e-06, "loss": 0.0791, "step": 19240 }, { "epoch": 2.277028625502721, "grad_norm": 0.43224745988845825, "learning_rate": 4.689322480248983e-06, "loss": 0.0836, "step": 19250 }, { "epoch": 2.2782114975159686, "grad_norm": 0.5344483256340027, "learning_rate": 4.688604261431649e-06, "loss": 0.0793, "step": 19260 }, { "epoch": 2.279394369529217, "grad_norm": 0.5494910478591919, "learning_rate": 4.687886042614317e-06, "loss": 0.08, "step": 19270 }, { "epoch": 2.280577241542465, "grad_norm": 0.4302140772342682, "learning_rate": 4.687167823796983e-06, "loss": 0.0759, "step": 19280 }, { "epoch": 2.2817601135557135, "grad_norm": 0.4943113327026367, "learning_rate": 4.686449604979651e-06, "loss": 0.0845, "step": 19290 }, { "epoch": 2.2829429855689614, "grad_norm": 0.44899046421051025, "learning_rate": 4.685731386162318e-06, "loss": 0.0784, "step": 19300 }, { "epoch": 2.2841258575822097, "grad_norm": 0.5087020993232727, "learning_rate": 4.685013167344985e-06, "loss": 0.0815, "step": 19310 }, { "epoch": 2.285308729595458, "grad_norm": 0.4267829656600952, "learning_rate": 4.684294948527652e-06, "loss": 0.0821, "step": 19320 }, { "epoch": 2.286491601608706, "grad_norm": 0.5285939574241638, "learning_rate": 4.6835767297103186e-06, "loss": 0.0891, "step": 19330 }, { "epoch": 2.287674473621954, "grad_norm": 0.6544280052185059, "learning_rate": 4.6828585108929855e-06, "loss": 0.0773, "step": 19340 }, { "epoch": 2.2888573456352024, "grad_norm": 0.404792845249176, "learning_rate": 4.682140292075652e-06, "loss": 0.0817, "step": 19350 }, { "epoch": 2.2900402176484507, "grad_norm": 0.43705520033836365, "learning_rate": 4.681422073258319e-06, "loss": 0.0735, "step": 19360 }, { "epoch": 2.2912230896616985, "grad_norm": 0.5473356246948242, "learning_rate": 4.680703854440986e-06, "loss": 0.0773, "step": 19370 }, { "epoch": 2.292405961674947, "grad_norm": 0.540752112865448, "learning_rate": 4.679985635623653e-06, "loss": 0.081, "step": 19380 }, { "epoch": 2.293588833688195, "grad_norm": 0.41740262508392334, "learning_rate": 4.679267416806321e-06, "loss": 0.0822, "step": 19390 }, { "epoch": 2.294771705701443, "grad_norm": 0.42364501953125, "learning_rate": 4.678549197988987e-06, "loss": 0.0793, "step": 19400 }, { "epoch": 2.2959545777146912, "grad_norm": 0.6465044021606445, "learning_rate": 4.677830979171655e-06, "loss": 0.0775, "step": 19410 }, { "epoch": 2.2971374497279395, "grad_norm": 0.4886012673377991, "learning_rate": 4.677112760354321e-06, "loss": 0.0797, "step": 19420 }, { "epoch": 2.298320321741188, "grad_norm": 0.39484602212905884, "learning_rate": 4.676394541536989e-06, "loss": 0.0832, "step": 19430 }, { "epoch": 2.2995031937544357, "grad_norm": 0.45964694023132324, "learning_rate": 4.675676322719655e-06, "loss": 0.0783, "step": 19440 }, { "epoch": 2.300686065767684, "grad_norm": 0.5101012587547302, "learning_rate": 4.6749581039023225e-06, "loss": 0.0783, "step": 19450 }, { "epoch": 2.3018689377809323, "grad_norm": 0.6236776113510132, "learning_rate": 4.674239885084989e-06, "loss": 0.0762, "step": 19460 }, { "epoch": 2.30305180979418, "grad_norm": 0.4507417678833008, "learning_rate": 4.673521666267656e-06, "loss": 0.0829, "step": 19470 }, { "epoch": 2.3042346818074284, "grad_norm": 0.5371777415275574, "learning_rate": 4.672803447450323e-06, "loss": 0.0824, "step": 19480 }, { "epoch": 2.3054175538206767, "grad_norm": 0.611461877822876, "learning_rate": 4.67208522863299e-06, "loss": 0.0857, "step": 19490 }, { "epoch": 2.306600425833925, "grad_norm": 0.5994073152542114, "learning_rate": 4.671367009815658e-06, "loss": 0.077, "step": 19500 }, { "epoch": 2.307783297847173, "grad_norm": 0.4773325026035309, "learning_rate": 4.670648790998324e-06, "loss": 0.0741, "step": 19510 }, { "epoch": 2.308966169860421, "grad_norm": 0.5011317133903503, "learning_rate": 4.669930572180992e-06, "loss": 0.0808, "step": 19520 }, { "epoch": 2.3101490418736694, "grad_norm": 0.42325666546821594, "learning_rate": 4.669212353363658e-06, "loss": 0.0768, "step": 19530 }, { "epoch": 2.3113319138869173, "grad_norm": 0.332438588142395, "learning_rate": 4.668494134546326e-06, "loss": 0.082, "step": 19540 }, { "epoch": 2.3125147859001656, "grad_norm": 0.5570465922355652, "learning_rate": 4.667775915728992e-06, "loss": 0.0821, "step": 19550 }, { "epoch": 2.313697657913414, "grad_norm": 0.34542950987815857, "learning_rate": 4.6670576969116595e-06, "loss": 0.0819, "step": 19560 }, { "epoch": 2.314880529926662, "grad_norm": 0.5183144211769104, "learning_rate": 4.666339478094326e-06, "loss": 0.0794, "step": 19570 }, { "epoch": 2.31606340193991, "grad_norm": 0.4806484580039978, "learning_rate": 4.665621259276993e-06, "loss": 0.0849, "step": 19580 }, { "epoch": 2.3172462739531583, "grad_norm": 0.5735294222831726, "learning_rate": 4.66490304045966e-06, "loss": 0.0813, "step": 19590 }, { "epoch": 2.3184291459664066, "grad_norm": 0.43929117918014526, "learning_rate": 4.664184821642327e-06, "loss": 0.0742, "step": 19600 }, { "epoch": 2.3196120179796544, "grad_norm": 0.7018395066261292, "learning_rate": 4.663466602824994e-06, "loss": 0.0782, "step": 19610 }, { "epoch": 2.3207948899929027, "grad_norm": 0.5085379481315613, "learning_rate": 4.662748384007661e-06, "loss": 0.0736, "step": 19620 }, { "epoch": 2.321977762006151, "grad_norm": 0.5797022581100464, "learning_rate": 4.662030165190328e-06, "loss": 0.0878, "step": 19630 }, { "epoch": 2.3231606340193993, "grad_norm": 0.4331245422363281, "learning_rate": 4.661311946372995e-06, "loss": 0.0806, "step": 19640 }, { "epoch": 2.324343506032647, "grad_norm": 0.5110427141189575, "learning_rate": 4.660593727555662e-06, "loss": 0.0791, "step": 19650 }, { "epoch": 2.3255263780458955, "grad_norm": 0.42796117067337036, "learning_rate": 4.6598755087383295e-06, "loss": 0.0739, "step": 19660 }, { "epoch": 2.3267092500591438, "grad_norm": 0.3377985656261444, "learning_rate": 4.659157289920996e-06, "loss": 0.08, "step": 19670 }, { "epoch": 2.3278921220723916, "grad_norm": 0.44814977049827576, "learning_rate": 4.658439071103663e-06, "loss": 0.0797, "step": 19680 }, { "epoch": 2.32907499408564, "grad_norm": 0.6034530997276306, "learning_rate": 4.6577208522863295e-06, "loss": 0.0866, "step": 19690 }, { "epoch": 2.330257866098888, "grad_norm": 0.4972134232521057, "learning_rate": 4.657002633468997e-06, "loss": 0.0813, "step": 19700 }, { "epoch": 2.3314407381121365, "grad_norm": 0.494204044342041, "learning_rate": 4.656284414651664e-06, "loss": 0.0831, "step": 19710 }, { "epoch": 2.3326236101253843, "grad_norm": 0.4937899112701416, "learning_rate": 4.655566195834331e-06, "loss": 0.084, "step": 19720 }, { "epoch": 2.3338064821386326, "grad_norm": 0.48554009199142456, "learning_rate": 4.654847977016998e-06, "loss": 0.08, "step": 19730 }, { "epoch": 2.334989354151881, "grad_norm": 0.4122422933578491, "learning_rate": 4.654129758199665e-06, "loss": 0.0796, "step": 19740 }, { "epoch": 2.3361722261651288, "grad_norm": 0.49321088194847107, "learning_rate": 4.653411539382332e-06, "loss": 0.0747, "step": 19750 }, { "epoch": 2.337355098178377, "grad_norm": 0.5123177766799927, "learning_rate": 4.652693320564999e-06, "loss": 0.0831, "step": 19760 }, { "epoch": 2.3385379701916253, "grad_norm": 0.43787217140197754, "learning_rate": 4.6519751017476665e-06, "loss": 0.0867, "step": 19770 }, { "epoch": 2.3397208422048736, "grad_norm": 0.39549511671066284, "learning_rate": 4.651256882930333e-06, "loss": 0.0813, "step": 19780 }, { "epoch": 2.3409037142181215, "grad_norm": 0.5754088163375854, "learning_rate": 4.650538664113e-06, "loss": 0.0815, "step": 19790 }, { "epoch": 2.34208658623137, "grad_norm": 0.44408130645751953, "learning_rate": 4.6498204452956665e-06, "loss": 0.077, "step": 19800 }, { "epoch": 2.343269458244618, "grad_norm": 0.517722487449646, "learning_rate": 4.649102226478334e-06, "loss": 0.0769, "step": 19810 }, { "epoch": 2.344452330257866, "grad_norm": 0.5626716017723083, "learning_rate": 4.648384007661e-06, "loss": 0.0833, "step": 19820 }, { "epoch": 2.345635202271114, "grad_norm": 0.4841638505458832, "learning_rate": 4.647665788843668e-06, "loss": 0.0803, "step": 19830 }, { "epoch": 2.3468180742843625, "grad_norm": 0.43866851925849915, "learning_rate": 4.646947570026335e-06, "loss": 0.0775, "step": 19840 }, { "epoch": 2.348000946297611, "grad_norm": 0.5035989880561829, "learning_rate": 4.646229351209002e-06, "loss": 0.0719, "step": 19850 }, { "epoch": 2.3491838183108587, "grad_norm": 0.40316250920295715, "learning_rate": 4.645511132391669e-06, "loss": 0.0771, "step": 19860 }, { "epoch": 2.350366690324107, "grad_norm": 0.5612912774085999, "learning_rate": 4.644792913574336e-06, "loss": 0.0659, "step": 19870 }, { "epoch": 2.3515495623373552, "grad_norm": 0.6774323582649231, "learning_rate": 4.644074694757003e-06, "loss": 0.0852, "step": 19880 }, { "epoch": 2.352732434350603, "grad_norm": 0.4686830937862396, "learning_rate": 4.64335647593967e-06, "loss": 0.0865, "step": 19890 }, { "epoch": 2.3539153063638514, "grad_norm": 0.45900535583496094, "learning_rate": 4.6426382571223365e-06, "loss": 0.0739, "step": 19900 }, { "epoch": 2.3550981783770997, "grad_norm": 0.5912988185882568, "learning_rate": 4.6419200383050035e-06, "loss": 0.0789, "step": 19910 }, { "epoch": 2.356281050390348, "grad_norm": 0.4085247218608856, "learning_rate": 4.64120181948767e-06, "loss": 0.0751, "step": 19920 }, { "epoch": 2.357463922403596, "grad_norm": 0.451188325881958, "learning_rate": 4.640483600670337e-06, "loss": 0.0849, "step": 19930 }, { "epoch": 2.358646794416844, "grad_norm": 0.49985089898109436, "learning_rate": 4.639765381853004e-06, "loss": 0.0838, "step": 19940 }, { "epoch": 2.3598296664300924, "grad_norm": 0.5006523132324219, "learning_rate": 4.639047163035672e-06, "loss": 0.0743, "step": 19950 }, { "epoch": 2.3610125384433402, "grad_norm": 0.5866096615791321, "learning_rate": 4.638328944218338e-06, "loss": 0.0881, "step": 19960 }, { "epoch": 2.3621954104565885, "grad_norm": 0.4487146735191345, "learning_rate": 4.637610725401006e-06, "loss": 0.0809, "step": 19970 }, { "epoch": 2.363378282469837, "grad_norm": 0.5197978615760803, "learning_rate": 4.636892506583673e-06, "loss": 0.088, "step": 19980 }, { "epoch": 2.364561154483085, "grad_norm": 0.501154363155365, "learning_rate": 4.63617428776634e-06, "loss": 0.0819, "step": 19990 }, { "epoch": 2.365744026496333, "grad_norm": 0.4437161087989807, "learning_rate": 4.635456068949007e-06, "loss": 0.0811, "step": 20000 }, { "epoch": 2.3669268985095813, "grad_norm": 0.5721325278282166, "learning_rate": 4.6347378501316735e-06, "loss": 0.0899, "step": 20010 }, { "epoch": 2.3681097705228296, "grad_norm": 0.5523505210876465, "learning_rate": 4.6340196313143405e-06, "loss": 0.0788, "step": 20020 }, { "epoch": 2.3692926425360774, "grad_norm": 0.49490684270858765, "learning_rate": 4.633301412497007e-06, "loss": 0.0843, "step": 20030 }, { "epoch": 2.3704755145493257, "grad_norm": 0.44148728251457214, "learning_rate": 4.632583193679675e-06, "loss": 0.0821, "step": 20040 }, { "epoch": 2.371658386562574, "grad_norm": 0.4276270568370819, "learning_rate": 4.631864974862341e-06, "loss": 0.0843, "step": 20050 }, { "epoch": 2.3728412585758223, "grad_norm": 0.44791069626808167, "learning_rate": 4.631146756045009e-06, "loss": 0.084, "step": 20060 }, { "epoch": 2.37402413058907, "grad_norm": 0.5309301018714905, "learning_rate": 4.630428537227675e-06, "loss": 0.0816, "step": 20070 }, { "epoch": 2.3752070026023184, "grad_norm": 0.473468542098999, "learning_rate": 4.629710318410343e-06, "loss": 0.0827, "step": 20080 }, { "epoch": 2.3763898746155667, "grad_norm": 0.37931352853775024, "learning_rate": 4.628992099593009e-06, "loss": 0.0795, "step": 20090 }, { "epoch": 2.3775727466288146, "grad_norm": 0.5902916193008423, "learning_rate": 4.628273880775677e-06, "loss": 0.0747, "step": 20100 }, { "epoch": 2.378755618642063, "grad_norm": 0.6926960349082947, "learning_rate": 4.627555661958344e-06, "loss": 0.0829, "step": 20110 }, { "epoch": 2.379938490655311, "grad_norm": 0.47248831391334534, "learning_rate": 4.6268374431410105e-06, "loss": 0.081, "step": 20120 }, { "epoch": 2.3811213626685594, "grad_norm": 0.37989234924316406, "learning_rate": 4.6261192243236775e-06, "loss": 0.0766, "step": 20130 }, { "epoch": 2.3823042346818073, "grad_norm": 0.40075793862342834, "learning_rate": 4.625401005506344e-06, "loss": 0.075, "step": 20140 }, { "epoch": 2.3834871066950556, "grad_norm": 0.49893903732299805, "learning_rate": 4.624682786689011e-06, "loss": 0.0854, "step": 20150 }, { "epoch": 2.384669978708304, "grad_norm": 0.4603833556175232, "learning_rate": 4.623964567871678e-06, "loss": 0.0867, "step": 20160 }, { "epoch": 2.3858528507215517, "grad_norm": 0.4658143222332001, "learning_rate": 4.623246349054345e-06, "loss": 0.0813, "step": 20170 }, { "epoch": 2.3870357227348, "grad_norm": 0.5196372270584106, "learning_rate": 4.622528130237012e-06, "loss": 0.0768, "step": 20180 }, { "epoch": 2.3882185947480483, "grad_norm": 0.46605679392814636, "learning_rate": 4.621809911419679e-06, "loss": 0.0808, "step": 20190 }, { "epoch": 2.3894014667612966, "grad_norm": 0.45823168754577637, "learning_rate": 4.621091692602346e-06, "loss": 0.0799, "step": 20200 }, { "epoch": 2.3905843387745445, "grad_norm": 0.4916255474090576, "learning_rate": 4.620373473785013e-06, "loss": 0.0768, "step": 20210 }, { "epoch": 2.3917672107877928, "grad_norm": 0.5116035342216492, "learning_rate": 4.619655254967681e-06, "loss": 0.0805, "step": 20220 }, { "epoch": 2.392950082801041, "grad_norm": 0.5443277955055237, "learning_rate": 4.6189370361503475e-06, "loss": 0.0739, "step": 20230 }, { "epoch": 2.394132954814289, "grad_norm": 0.4774996042251587, "learning_rate": 4.6182188173330145e-06, "loss": 0.0835, "step": 20240 }, { "epoch": 2.395315826827537, "grad_norm": 0.5231410264968872, "learning_rate": 4.617500598515681e-06, "loss": 0.0769, "step": 20250 }, { "epoch": 2.3964986988407855, "grad_norm": 0.4652608335018158, "learning_rate": 4.616782379698348e-06, "loss": 0.0732, "step": 20260 }, { "epoch": 2.3976815708540338, "grad_norm": 0.3737054169178009, "learning_rate": 4.616064160881015e-06, "loss": 0.0757, "step": 20270 }, { "epoch": 2.3988644428672816, "grad_norm": 0.49625757336616516, "learning_rate": 4.615345942063682e-06, "loss": 0.0778, "step": 20280 }, { "epoch": 2.40004731488053, "grad_norm": 0.44636717438697815, "learning_rate": 4.614627723246349e-06, "loss": 0.0841, "step": 20290 }, { "epoch": 2.401230186893778, "grad_norm": 0.5421101450920105, "learning_rate": 4.613909504429016e-06, "loss": 0.087, "step": 20300 }, { "epoch": 2.402413058907026, "grad_norm": 0.32357555627822876, "learning_rate": 4.613191285611684e-06, "loss": 0.0824, "step": 20310 }, { "epoch": 2.4035959309202743, "grad_norm": 0.45455503463745117, "learning_rate": 4.61247306679435e-06, "loss": 0.0789, "step": 20320 }, { "epoch": 2.4047788029335226, "grad_norm": 0.4851369261741638, "learning_rate": 4.611754847977018e-06, "loss": 0.0858, "step": 20330 }, { "epoch": 2.405961674946771, "grad_norm": 0.5116434097290039, "learning_rate": 4.611036629159684e-06, "loss": 0.0784, "step": 20340 }, { "epoch": 2.407144546960019, "grad_norm": 0.6080803871154785, "learning_rate": 4.6103184103423515e-06, "loss": 0.085, "step": 20350 }, { "epoch": 2.408327418973267, "grad_norm": 0.4660366177558899, "learning_rate": 4.6096001915250175e-06, "loss": 0.0802, "step": 20360 }, { "epoch": 2.4095102909865154, "grad_norm": 0.49883168935775757, "learning_rate": 4.608881972707685e-06, "loss": 0.0803, "step": 20370 }, { "epoch": 2.410693162999763, "grad_norm": 0.4145399332046509, "learning_rate": 4.608163753890352e-06, "loss": 0.078, "step": 20380 }, { "epoch": 2.4118760350130115, "grad_norm": 0.5329418778419495, "learning_rate": 4.607445535073019e-06, "loss": 0.0736, "step": 20390 }, { "epoch": 2.41305890702626, "grad_norm": 0.43556907773017883, "learning_rate": 4.606727316255686e-06, "loss": 0.0786, "step": 20400 }, { "epoch": 2.414241779039508, "grad_norm": 0.5713589191436768, "learning_rate": 4.606009097438353e-06, "loss": 0.0836, "step": 20410 }, { "epoch": 2.415424651052756, "grad_norm": 0.43281665444374084, "learning_rate": 4.60529087862102e-06, "loss": 0.0806, "step": 20420 }, { "epoch": 2.4166075230660042, "grad_norm": 0.43619173765182495, "learning_rate": 4.604572659803687e-06, "loss": 0.0723, "step": 20430 }, { "epoch": 2.4177903950792525, "grad_norm": 0.5250333547592163, "learning_rate": 4.603854440986354e-06, "loss": 0.0797, "step": 20440 }, { "epoch": 2.4189732670925004, "grad_norm": 0.45119205117225647, "learning_rate": 4.603136222169021e-06, "loss": 0.0783, "step": 20450 }, { "epoch": 2.4201561391057487, "grad_norm": 0.4899902045726776, "learning_rate": 4.602418003351688e-06, "loss": 0.0804, "step": 20460 }, { "epoch": 2.421339011118997, "grad_norm": 0.37370607256889343, "learning_rate": 4.6016997845343545e-06, "loss": 0.0806, "step": 20470 }, { "epoch": 2.4225218831322453, "grad_norm": 0.46439656615257263, "learning_rate": 4.600981565717022e-06, "loss": 0.0891, "step": 20480 }, { "epoch": 2.423704755145493, "grad_norm": 0.5241281390190125, "learning_rate": 4.600263346899689e-06, "loss": 0.084, "step": 20490 }, { "epoch": 2.4248876271587414, "grad_norm": 0.4403243362903595, "learning_rate": 4.599545128082356e-06, "loss": 0.0811, "step": 20500 }, { "epoch": 2.4260704991719897, "grad_norm": 0.4439948797225952, "learning_rate": 4.598826909265023e-06, "loss": 0.0812, "step": 20510 }, { "epoch": 2.4272533711852375, "grad_norm": 0.5029915571212769, "learning_rate": 4.59810869044769e-06, "loss": 0.084, "step": 20520 }, { "epoch": 2.428436243198486, "grad_norm": 0.5782212615013123, "learning_rate": 4.597390471630357e-06, "loss": 0.0848, "step": 20530 }, { "epoch": 2.429619115211734, "grad_norm": 0.41771236062049866, "learning_rate": 4.596672252813024e-06, "loss": 0.0763, "step": 20540 }, { "epoch": 2.4308019872249824, "grad_norm": 0.4836272895336151, "learning_rate": 4.595954033995691e-06, "loss": 0.0827, "step": 20550 }, { "epoch": 2.4319848592382303, "grad_norm": 0.46531185507774353, "learning_rate": 4.595235815178358e-06, "loss": 0.0818, "step": 20560 }, { "epoch": 2.4331677312514786, "grad_norm": 0.41835829615592957, "learning_rate": 4.594517596361025e-06, "loss": 0.0699, "step": 20570 }, { "epoch": 2.434350603264727, "grad_norm": 0.3395691215991974, "learning_rate": 4.5937993775436915e-06, "loss": 0.0743, "step": 20580 }, { "epoch": 2.4355334752779747, "grad_norm": 0.40243715047836304, "learning_rate": 4.5930811587263584e-06, "loss": 0.0788, "step": 20590 }, { "epoch": 2.436716347291223, "grad_norm": 0.5432870984077454, "learning_rate": 4.592362939909026e-06, "loss": 0.0815, "step": 20600 }, { "epoch": 2.4378992193044713, "grad_norm": 0.4963539242744446, "learning_rate": 4.591644721091692e-06, "loss": 0.0799, "step": 20610 }, { "epoch": 2.4390820913177196, "grad_norm": 0.38240012526512146, "learning_rate": 4.59092650227436e-06, "loss": 0.0821, "step": 20620 }, { "epoch": 2.4402649633309674, "grad_norm": 0.4940338134765625, "learning_rate": 4.590208283457026e-06, "loss": 0.0852, "step": 20630 }, { "epoch": 2.4414478353442157, "grad_norm": 0.505824625492096, "learning_rate": 4.589490064639694e-06, "loss": 0.0747, "step": 20640 }, { "epoch": 2.442630707357464, "grad_norm": 0.4258606433868408, "learning_rate": 4.588771845822361e-06, "loss": 0.0825, "step": 20650 }, { "epoch": 2.443813579370712, "grad_norm": 0.48021793365478516, "learning_rate": 4.588053627005028e-06, "loss": 0.0813, "step": 20660 }, { "epoch": 2.44499645138396, "grad_norm": 0.5355902910232544, "learning_rate": 4.587335408187695e-06, "loss": 0.0817, "step": 20670 }, { "epoch": 2.4461793233972084, "grad_norm": 0.5413947701454163, "learning_rate": 4.586617189370362e-06, "loss": 0.0828, "step": 20680 }, { "epoch": 2.4473621954104567, "grad_norm": 0.4687840938568115, "learning_rate": 4.5858989705530285e-06, "loss": 0.0837, "step": 20690 }, { "epoch": 2.4485450674237046, "grad_norm": 0.6473014950752258, "learning_rate": 4.5851807517356954e-06, "loss": 0.0869, "step": 20700 }, { "epoch": 2.449727939436953, "grad_norm": 0.4147103428840637, "learning_rate": 4.584462532918362e-06, "loss": 0.0782, "step": 20710 }, { "epoch": 2.450910811450201, "grad_norm": 0.4286137521266937, "learning_rate": 4.583744314101029e-06, "loss": 0.0799, "step": 20720 }, { "epoch": 2.452093683463449, "grad_norm": 0.4513815939426422, "learning_rate": 4.583026095283697e-06, "loss": 0.0756, "step": 20730 }, { "epoch": 2.4532765554766973, "grad_norm": 0.5128229856491089, "learning_rate": 4.582307876466363e-06, "loss": 0.0767, "step": 20740 }, { "epoch": 2.4544594274899456, "grad_norm": 0.34456565976142883, "learning_rate": 4.581589657649031e-06, "loss": 0.0842, "step": 20750 }, { "epoch": 2.455642299503194, "grad_norm": 0.49764618277549744, "learning_rate": 4.580871438831698e-06, "loss": 0.0863, "step": 20760 }, { "epoch": 2.4568251715164418, "grad_norm": 0.5763587355613708, "learning_rate": 4.580153220014365e-06, "loss": 0.0761, "step": 20770 }, { "epoch": 2.45800804352969, "grad_norm": 0.49884939193725586, "learning_rate": 4.579435001197032e-06, "loss": 0.0777, "step": 20780 }, { "epoch": 2.4591909155429383, "grad_norm": 0.4940331280231476, "learning_rate": 4.578716782379699e-06, "loss": 0.0766, "step": 20790 }, { "epoch": 2.460373787556186, "grad_norm": 0.4681645929813385, "learning_rate": 4.5779985635623655e-06, "loss": 0.0802, "step": 20800 }, { "epoch": 2.4615566595694345, "grad_norm": 0.6063777208328247, "learning_rate": 4.5772803447450324e-06, "loss": 0.0805, "step": 20810 }, { "epoch": 2.4627395315826828, "grad_norm": 0.44728150963783264, "learning_rate": 4.576562125927699e-06, "loss": 0.0854, "step": 20820 }, { "epoch": 2.463922403595931, "grad_norm": 0.47446149587631226, "learning_rate": 4.575843907110366e-06, "loss": 0.0808, "step": 20830 }, { "epoch": 2.465105275609179, "grad_norm": 0.4359745383262634, "learning_rate": 4.575125688293033e-06, "loss": 0.0852, "step": 20840 }, { "epoch": 2.466288147622427, "grad_norm": 0.47312358021736145, "learning_rate": 4.5744074694757e-06, "loss": 0.084, "step": 20850 }, { "epoch": 2.4674710196356755, "grad_norm": 0.4785905182361603, "learning_rate": 4.573689250658367e-06, "loss": 0.0799, "step": 20860 }, { "epoch": 2.4686538916489233, "grad_norm": 0.4746895134449005, "learning_rate": 4.572971031841035e-06, "loss": 0.0831, "step": 20870 }, { "epoch": 2.4698367636621716, "grad_norm": 0.4853614270687103, "learning_rate": 4.572252813023701e-06, "loss": 0.0833, "step": 20880 }, { "epoch": 2.47101963567542, "grad_norm": 0.4507676959037781, "learning_rate": 4.571534594206369e-06, "loss": 0.0779, "step": 20890 }, { "epoch": 2.4722025076886682, "grad_norm": 0.44415482878685, "learning_rate": 4.570816375389035e-06, "loss": 0.0831, "step": 20900 }, { "epoch": 2.473385379701916, "grad_norm": 0.5261666774749756, "learning_rate": 4.5700981565717025e-06, "loss": 0.0798, "step": 20910 }, { "epoch": 2.4745682517151644, "grad_norm": 0.4870808720588684, "learning_rate": 4.569379937754369e-06, "loss": 0.0757, "step": 20920 }, { "epoch": 2.4757511237284127, "grad_norm": 0.4420444965362549, "learning_rate": 4.568661718937036e-06, "loss": 0.081, "step": 20930 }, { "epoch": 2.4769339957416605, "grad_norm": 0.4341696798801422, "learning_rate": 4.567943500119703e-06, "loss": 0.0852, "step": 20940 }, { "epoch": 2.478116867754909, "grad_norm": 0.4431030750274658, "learning_rate": 4.56722528130237e-06, "loss": 0.0867, "step": 20950 }, { "epoch": 2.479299739768157, "grad_norm": 0.46596479415893555, "learning_rate": 4.566507062485037e-06, "loss": 0.0812, "step": 20960 }, { "epoch": 2.4804826117814054, "grad_norm": 0.5264905095100403, "learning_rate": 4.565788843667704e-06, "loss": 0.0847, "step": 20970 }, { "epoch": 2.4816654837946532, "grad_norm": 0.47532251477241516, "learning_rate": 4.565070624850372e-06, "loss": 0.0769, "step": 20980 }, { "epoch": 2.4828483558079015, "grad_norm": 0.4263322651386261, "learning_rate": 4.564352406033038e-06, "loss": 0.0777, "step": 20990 }, { "epoch": 2.48403122782115, "grad_norm": 0.491851806640625, "learning_rate": 4.563634187215706e-06, "loss": 0.0785, "step": 21000 }, { "epoch": 2.4852140998343977, "grad_norm": 0.5229894518852234, "learning_rate": 4.562915968398372e-06, "loss": 0.0744, "step": 21010 }, { "epoch": 2.486396971847646, "grad_norm": 0.5771435499191284, "learning_rate": 4.5621977495810395e-06, "loss": 0.0794, "step": 21020 }, { "epoch": 2.4875798438608943, "grad_norm": 0.4717404544353485, "learning_rate": 4.5614795307637064e-06, "loss": 0.0704, "step": 21030 }, { "epoch": 2.4887627158741426, "grad_norm": 0.3642178177833557, "learning_rate": 4.560761311946373e-06, "loss": 0.0854, "step": 21040 }, { "epoch": 2.4899455878873904, "grad_norm": 0.4847264587879181, "learning_rate": 4.56004309312904e-06, "loss": 0.0874, "step": 21050 }, { "epoch": 2.4911284599006387, "grad_norm": 0.4256085157394409, "learning_rate": 4.559324874311707e-06, "loss": 0.0786, "step": 21060 }, { "epoch": 2.492311331913887, "grad_norm": 0.4784383475780487, "learning_rate": 4.558606655494374e-06, "loss": 0.0765, "step": 21070 }, { "epoch": 2.4934942039271353, "grad_norm": 0.467079222202301, "learning_rate": 4.557888436677041e-06, "loss": 0.0827, "step": 21080 }, { "epoch": 2.494677075940383, "grad_norm": 0.49154597520828247, "learning_rate": 4.557170217859708e-06, "loss": 0.0824, "step": 21090 }, { "epoch": 2.4958599479536314, "grad_norm": 0.517427384853363, "learning_rate": 4.556451999042375e-06, "loss": 0.0748, "step": 21100 }, { "epoch": 2.4970428199668797, "grad_norm": 0.5036735534667969, "learning_rate": 4.555733780225042e-06, "loss": 0.0794, "step": 21110 }, { "epoch": 2.4982256919801276, "grad_norm": 0.456575870513916, "learning_rate": 4.555015561407709e-06, "loss": 0.0795, "step": 21120 }, { "epoch": 2.499408563993376, "grad_norm": 0.5531090497970581, "learning_rate": 4.554297342590376e-06, "loss": 0.0899, "step": 21130 }, { "epoch": 2.500591436006624, "grad_norm": 0.5632674694061279, "learning_rate": 4.5535791237730434e-06, "loss": 0.0818, "step": 21140 }, { "epoch": 2.500591436006624, "eval_accuracy": 0.6819875569750807, "eval_animal_abuse/accuracy": 0.994893036563862, "eval_animal_abuse/f1": 0.7802433786685755, "eval_animal_abuse/fpr": 0.0027597347962171393, "eval_animal_abuse/precision": 0.768688293370945, "eval_animal_abuse/recall": 0.7921511627906976, "eval_animal_abuse/threshold": 0.5, "eval_child_abuse/accuracy": 0.9961406660678045, "eval_child_abuse/f1": 0.6578171091445427, "eval_child_abuse/fpr": 0.0020407821883207003, "eval_child_abuse/precision": 0.6463768115942029, "eval_child_abuse/recall": 0.6696696696696697, "eval_child_abuse/threshold": 0.5, "eval_controversial_topics,politics/accuracy": 0.9712047110490069, "eval_controversial_topics,politics/f1": 0.4818916492068243, "eval_controversial_topics,politics/fpr": 0.011909665019220188, "eval_controversial_topics,politics/precision": 0.5370246831220814, "eval_controversial_topics,politics/recall": 0.4370249728555918, "eval_controversial_topics,politics/threshold": 0.5, "eval_discrimination,stereotype,injustice/accuracy": 0.9565159530225904, "eval_discrimination,stereotype,injustice/f1": 0.7251892346509672, "eval_discrimination,stereotype,injustice/fpr": 0.02318646763292004, "eval_discrimination,stereotype,injustice/precision": 0.7288672865595942, "eval_discrimination,stereotype,injustice/recall": 0.7215481171548117, "eval_discrimination,stereotype,injustice/threshold": 0.5, "eval_drug_abuse,weapons,banned_substance/accuracy": 0.97258542103337, "eval_drug_abuse,weapons,banned_substance/f1": 0.7737506864360242, "eval_drug_abuse,weapons,banned_substance/fpr": 0.019038217458750496, "eval_drug_abuse,weapons,banned_substance/precision": 0.7229348383786557, "eval_drug_abuse,weapons,banned_substance/recall": 0.8322504430005907, "eval_drug_abuse,weapons,banned_substance/threshold": 0.5, "eval_financial_crime,property_crime,theft/accuracy": 0.9599261403333667, "eval_financial_crime,property_crime,theft/f1": 0.7995006242197253, "eval_financial_crime,property_crime,theft/fpr": 0.025081547278993005, "eval_financial_crime,property_crime,theft/precision": 0.7792018170019468, "eval_financial_crime,property_crime,theft/recall": 0.8208853187489318, "eval_financial_crime,property_crime,theft/threshold": 0.5, "eval_flagged/accuracy": 0.8548591010413548, "eval_flagged/aucpr": 0.9060015204642168, "eval_flagged/f1": 0.869470251185614, "eval_flagged/fpr": 0.16250984510370117, "eval_flagged/precision": 0.8702383804504072, "eval_flagged/recall": 0.8687034767271532, "eval_hate_speech,offensive_language/accuracy": 0.9518248660877666, "eval_hate_speech,offensive_language/f1": 0.6921114182436743, "eval_hate_speech,offensive_language/fpr": 0.014014251781472658, "eval_hate_speech,offensive_language/precision": 0.8092988562904028, "eval_hate_speech,offensive_language/recall": 0.6045690936106983, "eval_hate_speech,offensive_language/threshold": 0.5, "eval_loss": 0.08329590409994125, "eval_macro_f1": 0.6461637028662383, "eval_macro_precision": 0.6948851413916318, "eval_macro_recall": 0.6408400554416191, "eval_micro_f1": 0.7544343284123473, "eval_micro_precision": 0.7697773830011658, "eval_micro_recall": 0.7396909507975177, "eval_misinformation_regarding_ethics,laws_and_safety/accuracy": 0.9878564061616263, "eval_misinformation_regarding_ethics,laws_and_safety/f1": 0.04450261780104712, "eval_misinformation_regarding_ethics,laws_and_safety/fpr": 0.00026943738106865556, "eval_misinformation_regarding_ethics,laws_and_safety/precision": 0.5151515151515151, "eval_misinformation_regarding_ethics,laws_and_safety/recall": 0.023255813953488372, "eval_misinformation_regarding_ethics,laws_and_safety/threshold": 0.5, "eval_non_violent_unethical_behavior/accuracy": 0.885916758159497, "eval_non_violent_unethical_behavior/f1": 0.694765889264732, "eval_non_violent_unethical_behavior/fpr": 0.05640674306593577, "eval_non_violent_unethical_behavior/precision": 0.7417791294430717, "eval_non_violent_unethical_behavior/recall": 0.6533567721413025, "eval_non_violent_unethical_behavior/threshold": 0.5, "eval_privacy_violation/accuracy": 0.9790564593938184, "eval_privacy_violation/f1": 0.8023858107047559, "eval_privacy_violation/fpr": 0.014856162945334893, "eval_privacy_violation/precision": 0.7506607929515419, "eval_privacy_violation/recall": 0.8617666891436278, "eval_privacy_violation/threshold": 0.5, "eval_runtime": 598.3169, "eval_samples_per_second": 100.472, "eval_self_harm/accuracy": 0.9964900023289084, "eval_self_harm/f1": 0.7318932655654383, "eval_self_harm/fpr": 0.00149068739112957, "eval_self_harm/precision": 0.7639257294429708, "eval_self_harm/recall": 0.7024390243902439, "eval_self_harm/threshold": 0.5, "eval_sexually_explicit,adult_content/accuracy": 0.9834647503077486, "eval_sexually_explicit,adult_content/f1": 0.6848446417247939, "eval_sexually_explicit,adult_content/fpr": 0.010687439275913186, "eval_sexually_explicit,adult_content/precision": 0.632688927943761, "eval_sexually_explicit,adult_content/recall": 0.7463718037318591, "eval_sexually_explicit,adult_content/threshold": 0.5, "eval_steps_per_second": 1.571, "eval_terrorism,organized_crime/accuracy": 0.9918987257544, "eval_terrorism,organized_crime/f1": 0.3226703755215577, "eval_terrorism,organized_crime/fpr": 0.0020458470980832724, "eval_terrorism,organized_crime/precision": 0.48739495798319327, "eval_terrorism,organized_crime/recall": 0.24116424116424118, "eval_terrorism,organized_crime/threshold": 0.5, "eval_violence,aiding_and_abetting,incitement/accuracy": 0.9217486775127258, "eval_violence,aiding_and_abetting,incitement/f1": 0.8547251389746757, "eval_violence,aiding_and_abetting,incitement/fpr": 0.05779429762930044, "eval_violence,aiding_and_abetting,incitement/precision": 0.8443983402489627, "eval_violence,aiding_and_abetting,incitement/recall": 0.8653076538269134, "eval_violence,aiding_and_abetting,incitement/threshold": 0.5, "step": 21140 }, { "epoch": 2.501774308019872, "grad_norm": 0.5136081576347351, "learning_rate": 4.5528609049557095e-06, "loss": 0.0855, "step": 21150 }, { "epoch": 2.5029571800331203, "grad_norm": 0.499203622341156, "learning_rate": 4.552142686138377e-06, "loss": 0.0787, "step": 21160 }, { "epoch": 2.5041400520463686, "grad_norm": 0.5025759339332581, "learning_rate": 4.551424467321043e-06, "loss": 0.0751, "step": 21170 }, { "epoch": 2.505322924059617, "grad_norm": 0.4376293122768402, "learning_rate": 4.550706248503711e-06, "loss": 0.0789, "step": 21180 }, { "epoch": 2.506505796072865, "grad_norm": 0.570671021938324, "learning_rate": 4.549988029686377e-06, "loss": 0.0761, "step": 21190 }, { "epoch": 2.507688668086113, "grad_norm": 0.6300224661827087, "learning_rate": 4.549269810869045e-06, "loss": 0.0838, "step": 21200 }, { "epoch": 2.5088715400993613, "grad_norm": 0.4575369358062744, "learning_rate": 4.548551592051712e-06, "loss": 0.0765, "step": 21210 }, { "epoch": 2.510054412112609, "grad_norm": 0.5600529909133911, "learning_rate": 4.547833373234379e-06, "loss": 0.0842, "step": 21220 }, { "epoch": 2.5112372841258574, "grad_norm": 0.7562234997749329, "learning_rate": 4.547115154417047e-06, "loss": 0.0824, "step": 21230 }, { "epoch": 2.5124201561391057, "grad_norm": 0.33872631192207336, "learning_rate": 4.546396935599713e-06, "loss": 0.078, "step": 21240 }, { "epoch": 2.513603028152354, "grad_norm": 0.5224665403366089, "learning_rate": 4.5456787167823804e-06, "loss": 0.0828, "step": 21250 }, { "epoch": 2.5147859001656023, "grad_norm": 0.5235574841499329, "learning_rate": 4.5449604979650465e-06, "loss": 0.0867, "step": 21260 }, { "epoch": 2.51596877217885, "grad_norm": 0.5171691179275513, "learning_rate": 4.544242279147714e-06, "loss": 0.0839, "step": 21270 }, { "epoch": 2.5171516441920985, "grad_norm": 0.4633418321609497, "learning_rate": 4.54352406033038e-06, "loss": 0.0856, "step": 21280 }, { "epoch": 2.5183345162053463, "grad_norm": 0.4685080945491791, "learning_rate": 4.542805841513048e-06, "loss": 0.0764, "step": 21290 }, { "epoch": 2.5195173882185946, "grad_norm": 0.44002699851989746, "learning_rate": 4.542087622695715e-06, "loss": 0.079, "step": 21300 }, { "epoch": 2.520700260231843, "grad_norm": 0.4958799481391907, "learning_rate": 4.541369403878382e-06, "loss": 0.0775, "step": 21310 }, { "epoch": 2.521883132245091, "grad_norm": 0.5143820643424988, "learning_rate": 4.540651185061049e-06, "loss": 0.0803, "step": 21320 }, { "epoch": 2.5230660042583395, "grad_norm": 0.49474596977233887, "learning_rate": 4.539932966243716e-06, "loss": 0.0739, "step": 21330 }, { "epoch": 2.5242488762715873, "grad_norm": 0.4815296232700348, "learning_rate": 4.539214747426383e-06, "loss": 0.086, "step": 21340 }, { "epoch": 2.5254317482848356, "grad_norm": 0.46011093258857727, "learning_rate": 4.53849652860905e-06, "loss": 0.0803, "step": 21350 }, { "epoch": 2.5266146202980835, "grad_norm": 0.3654122054576874, "learning_rate": 4.537778309791717e-06, "loss": 0.0807, "step": 21360 }, { "epoch": 2.5277974923113318, "grad_norm": 0.5350679755210876, "learning_rate": 4.5370600909743835e-06, "loss": 0.0767, "step": 21370 }, { "epoch": 2.52898036432458, "grad_norm": 0.4901530146598816, "learning_rate": 4.5363418721570504e-06, "loss": 0.0857, "step": 21380 }, { "epoch": 2.5301632363378284, "grad_norm": 0.5094398260116577, "learning_rate": 4.535623653339717e-06, "loss": 0.0768, "step": 21390 }, { "epoch": 2.5313461083510767, "grad_norm": 0.5787261128425598, "learning_rate": 4.534905434522384e-06, "loss": 0.0776, "step": 21400 }, { "epoch": 2.5325289803643245, "grad_norm": 0.46654045581817627, "learning_rate": 4.534187215705052e-06, "loss": 0.0896, "step": 21410 }, { "epoch": 2.533711852377573, "grad_norm": 0.4515915811061859, "learning_rate": 4.533468996887718e-06, "loss": 0.0859, "step": 21420 }, { "epoch": 2.5348947243908206, "grad_norm": 0.431338369846344, "learning_rate": 4.532750778070386e-06, "loss": 0.0835, "step": 21430 }, { "epoch": 2.536077596404069, "grad_norm": 0.3582168519496918, "learning_rate": 4.532032559253052e-06, "loss": 0.0737, "step": 21440 }, { "epoch": 2.5372604684173172, "grad_norm": 0.38403138518333435, "learning_rate": 4.53131434043572e-06, "loss": 0.0879, "step": 21450 }, { "epoch": 2.5384433404305655, "grad_norm": 0.3991612195968628, "learning_rate": 4.530596121618386e-06, "loss": 0.0853, "step": 21460 }, { "epoch": 2.539626212443814, "grad_norm": 0.49279144406318665, "learning_rate": 4.529877902801054e-06, "loss": 0.0753, "step": 21470 }, { "epoch": 2.5408090844570617, "grad_norm": 0.5508975982666016, "learning_rate": 4.5291596839837205e-06, "loss": 0.0811, "step": 21480 }, { "epoch": 2.54199195647031, "grad_norm": 0.4104280173778534, "learning_rate": 4.5284414651663874e-06, "loss": 0.0822, "step": 21490 }, { "epoch": 2.543174828483558, "grad_norm": 0.4417148232460022, "learning_rate": 4.527723246349054e-06, "loss": 0.0747, "step": 21500 }, { "epoch": 2.544357700496806, "grad_norm": 0.3788343667984009, "learning_rate": 4.527005027531721e-06, "loss": 0.0756, "step": 21510 }, { "epoch": 2.5455405725100544, "grad_norm": 0.36975792050361633, "learning_rate": 4.526286808714389e-06, "loss": 0.0715, "step": 21520 }, { "epoch": 2.5467234445233027, "grad_norm": 0.6049025654792786, "learning_rate": 4.525568589897055e-06, "loss": 0.0839, "step": 21530 }, { "epoch": 2.547906316536551, "grad_norm": 0.6820806860923767, "learning_rate": 4.524850371079723e-06, "loss": 0.0791, "step": 21540 }, { "epoch": 2.549089188549799, "grad_norm": 0.6281963586807251, "learning_rate": 4.524132152262389e-06, "loss": 0.0815, "step": 21550 }, { "epoch": 2.550272060563047, "grad_norm": 0.45937925577163696, "learning_rate": 4.523413933445057e-06, "loss": 0.081, "step": 21560 }, { "epoch": 2.5514549325762954, "grad_norm": 0.44279342889785767, "learning_rate": 4.522695714627724e-06, "loss": 0.0794, "step": 21570 }, { "epoch": 2.5526378045895433, "grad_norm": 0.5542247891426086, "learning_rate": 4.521977495810391e-06, "loss": 0.0829, "step": 21580 }, { "epoch": 2.5538206766027916, "grad_norm": 0.3933584988117218, "learning_rate": 4.5212592769930575e-06, "loss": 0.0828, "step": 21590 }, { "epoch": 2.55500354861604, "grad_norm": 0.5883395075798035, "learning_rate": 4.5205410581757244e-06, "loss": 0.0805, "step": 21600 }, { "epoch": 2.556186420629288, "grad_norm": 0.5045763850212097, "learning_rate": 4.519822839358391e-06, "loss": 0.0778, "step": 21610 }, { "epoch": 2.557369292642536, "grad_norm": 0.5200919508934021, "learning_rate": 4.519104620541058e-06, "loss": 0.0815, "step": 21620 }, { "epoch": 2.5585521646557843, "grad_norm": 0.44000041484832764, "learning_rate": 4.518386401723725e-06, "loss": 0.0734, "step": 21630 }, { "epoch": 2.5597350366690326, "grad_norm": 0.4748843014240265, "learning_rate": 4.517668182906392e-06, "loss": 0.0787, "step": 21640 }, { "epoch": 2.5609179086822804, "grad_norm": 0.5557259321212769, "learning_rate": 4.516949964089059e-06, "loss": 0.0864, "step": 21650 }, { "epoch": 2.5621007806955287, "grad_norm": 0.48572665452957153, "learning_rate": 4.516231745271726e-06, "loss": 0.0797, "step": 21660 }, { "epoch": 2.563283652708777, "grad_norm": 0.47832968831062317, "learning_rate": 4.515513526454393e-06, "loss": 0.0799, "step": 21670 }, { "epoch": 2.5644665247220253, "grad_norm": 0.500515341758728, "learning_rate": 4.514795307637061e-06, "loss": 0.0774, "step": 21680 }, { "epoch": 2.565649396735273, "grad_norm": 0.4625786542892456, "learning_rate": 4.514077088819727e-06, "loss": 0.0785, "step": 21690 }, { "epoch": 2.5668322687485214, "grad_norm": 0.4779948592185974, "learning_rate": 4.5133588700023945e-06, "loss": 0.0813, "step": 21700 }, { "epoch": 2.5680151407617697, "grad_norm": 0.4325886070728302, "learning_rate": 4.512640651185061e-06, "loss": 0.084, "step": 21710 }, { "epoch": 2.5691980127750176, "grad_norm": 0.5817387104034424, "learning_rate": 4.511922432367728e-06, "loss": 0.0807, "step": 21720 }, { "epoch": 2.570380884788266, "grad_norm": 0.4387485980987549, "learning_rate": 4.5112042135503944e-06, "loss": 0.0747, "step": 21730 }, { "epoch": 2.571563756801514, "grad_norm": 0.44155630469322205, "learning_rate": 4.510485994733062e-06, "loss": 0.0846, "step": 21740 }, { "epoch": 2.5727466288147625, "grad_norm": 0.4378257393836975, "learning_rate": 4.509767775915729e-06, "loss": 0.0852, "step": 21750 }, { "epoch": 2.5739295008280103, "grad_norm": 0.467141717672348, "learning_rate": 4.509049557098396e-06, "loss": 0.0918, "step": 21760 }, { "epoch": 2.5751123728412586, "grad_norm": 0.4679413437843323, "learning_rate": 4.508331338281063e-06, "loss": 0.0819, "step": 21770 }, { "epoch": 2.576295244854507, "grad_norm": 0.7412169575691223, "learning_rate": 4.50761311946373e-06, "loss": 0.0844, "step": 21780 }, { "epoch": 2.5774781168677547, "grad_norm": 0.466885507106781, "learning_rate": 4.506894900646398e-06, "loss": 0.0762, "step": 21790 }, { "epoch": 2.578660988881003, "grad_norm": 0.47488313913345337, "learning_rate": 4.506176681829064e-06, "loss": 0.0807, "step": 21800 }, { "epoch": 2.5798438608942513, "grad_norm": 0.5655505657196045, "learning_rate": 4.5054584630117315e-06, "loss": 0.0718, "step": 21810 }, { "epoch": 2.5810267329074996, "grad_norm": 0.5880069732666016, "learning_rate": 4.504740244194398e-06, "loss": 0.079, "step": 21820 }, { "epoch": 2.5822096049207475, "grad_norm": 0.6439675092697144, "learning_rate": 4.504022025377065e-06, "loss": 0.0796, "step": 21830 }, { "epoch": 2.5833924769339958, "grad_norm": 0.679969847202301, "learning_rate": 4.503303806559731e-06, "loss": 0.0737, "step": 21840 }, { "epoch": 2.584575348947244, "grad_norm": 0.4376768171787262, "learning_rate": 4.502585587742399e-06, "loss": 0.076, "step": 21850 }, { "epoch": 2.585758220960492, "grad_norm": 0.4569198191165924, "learning_rate": 4.501867368925066e-06, "loss": 0.0818, "step": 21860 }, { "epoch": 2.58694109297374, "grad_norm": 0.5264814496040344, "learning_rate": 4.501149150107733e-06, "loss": 0.082, "step": 21870 }, { "epoch": 2.5881239649869885, "grad_norm": 0.4254799485206604, "learning_rate": 4.5004309312904e-06, "loss": 0.0827, "step": 21880 }, { "epoch": 2.589306837000237, "grad_norm": 0.45132508873939514, "learning_rate": 4.499712712473067e-06, "loss": 0.0802, "step": 21890 }, { "epoch": 2.5904897090134846, "grad_norm": 0.4405305087566376, "learning_rate": 4.498994493655734e-06, "loss": 0.0818, "step": 21900 }, { "epoch": 2.591672581026733, "grad_norm": 0.488386869430542, "learning_rate": 4.498276274838401e-06, "loss": 0.0851, "step": 21910 }, { "epoch": 2.592855453039981, "grad_norm": 0.5419790148735046, "learning_rate": 4.497558056021068e-06, "loss": 0.0762, "step": 21920 }, { "epoch": 2.594038325053229, "grad_norm": 0.45194292068481445, "learning_rate": 4.4968398372037346e-06, "loss": 0.0755, "step": 21930 }, { "epoch": 2.5952211970664774, "grad_norm": 0.5286113619804382, "learning_rate": 4.4961216183864015e-06, "loss": 0.0736, "step": 21940 }, { "epoch": 2.5964040690797257, "grad_norm": 0.4518856108188629, "learning_rate": 4.495403399569069e-06, "loss": 0.0763, "step": 21950 }, { "epoch": 2.597586941092974, "grad_norm": 0.4726989269256592, "learning_rate": 4.494685180751735e-06, "loss": 0.0807, "step": 21960 }, { "epoch": 2.598769813106222, "grad_norm": 0.5025595426559448, "learning_rate": 4.493966961934403e-06, "loss": 0.0752, "step": 21970 }, { "epoch": 2.59995268511947, "grad_norm": 0.4806779623031616, "learning_rate": 4.493248743117069e-06, "loss": 0.0933, "step": 21980 }, { "epoch": 2.6011355571327184, "grad_norm": 0.519506573677063, "learning_rate": 4.492530524299737e-06, "loss": 0.0797, "step": 21990 }, { "epoch": 2.6023184291459662, "grad_norm": 0.42776909470558167, "learning_rate": 4.491812305482404e-06, "loss": 0.0843, "step": 22000 }, { "epoch": 2.6035013011592145, "grad_norm": 0.3662411570549011, "learning_rate": 4.491094086665071e-06, "loss": 0.0803, "step": 22010 }, { "epoch": 2.604684173172463, "grad_norm": 0.4683622717857361, "learning_rate": 4.490375867847738e-06, "loss": 0.0756, "step": 22020 }, { "epoch": 2.605867045185711, "grad_norm": 0.5410827994346619, "learning_rate": 4.489657649030405e-06, "loss": 0.0819, "step": 22030 }, { "epoch": 2.607049917198959, "grad_norm": 0.41128960251808167, "learning_rate": 4.4889394302130716e-06, "loss": 0.0764, "step": 22040 }, { "epoch": 2.6082327892122072, "grad_norm": 0.47575390338897705, "learning_rate": 4.4882212113957385e-06, "loss": 0.0867, "step": 22050 }, { "epoch": 2.6094156612254555, "grad_norm": 0.5510316491127014, "learning_rate": 4.487502992578406e-06, "loss": 0.0793, "step": 22060 }, { "epoch": 2.6105985332387034, "grad_norm": 0.47031572461128235, "learning_rate": 4.486784773761072e-06, "loss": 0.0786, "step": 22070 }, { "epoch": 2.6117814052519517, "grad_norm": 0.42571625113487244, "learning_rate": 4.48606655494374e-06, "loss": 0.0819, "step": 22080 }, { "epoch": 2.6129642772652, "grad_norm": 0.5220041871070862, "learning_rate": 4.485348336126406e-06, "loss": 0.0759, "step": 22090 }, { "epoch": 2.6141471492784483, "grad_norm": 0.3739183247089386, "learning_rate": 4.484630117309074e-06, "loss": 0.0785, "step": 22100 }, { "epoch": 2.615330021291696, "grad_norm": 0.5697585940361023, "learning_rate": 4.48391189849174e-06, "loss": 0.0783, "step": 22110 }, { "epoch": 2.6165128933049444, "grad_norm": 0.521039605140686, "learning_rate": 4.483193679674408e-06, "loss": 0.076, "step": 22120 }, { "epoch": 2.6176957653181927, "grad_norm": 0.5400190949440002, "learning_rate": 4.482475460857075e-06, "loss": 0.0859, "step": 22130 }, { "epoch": 2.6188786373314406, "grad_norm": 0.4180915951728821, "learning_rate": 4.481757242039742e-06, "loss": 0.0778, "step": 22140 }, { "epoch": 2.620061509344689, "grad_norm": 0.4916454553604126, "learning_rate": 4.4810390232224086e-06, "loss": 0.0783, "step": 22150 }, { "epoch": 2.621244381357937, "grad_norm": 0.44929301738739014, "learning_rate": 4.4803208044050755e-06, "loss": 0.0798, "step": 22160 }, { "epoch": 2.6224272533711854, "grad_norm": 0.4823136627674103, "learning_rate": 4.479602585587742e-06, "loss": 0.0756, "step": 22170 }, { "epoch": 2.6236101253844333, "grad_norm": 0.3913840651512146, "learning_rate": 4.478884366770409e-06, "loss": 0.0805, "step": 22180 }, { "epoch": 2.6247929973976816, "grad_norm": 0.40715718269348145, "learning_rate": 4.478166147953076e-06, "loss": 0.0777, "step": 22190 }, { "epoch": 2.62597586941093, "grad_norm": 0.5455523133277893, "learning_rate": 4.477447929135743e-06, "loss": 0.0803, "step": 22200 }, { "epoch": 2.6271587414241777, "grad_norm": 0.46028372645378113, "learning_rate": 4.47672971031841e-06, "loss": 0.0784, "step": 22210 }, { "epoch": 2.628341613437426, "grad_norm": 0.5180447101593018, "learning_rate": 4.476011491501078e-06, "loss": 0.0784, "step": 22220 }, { "epoch": 2.6295244854506743, "grad_norm": 0.4879997968673706, "learning_rate": 4.475293272683744e-06, "loss": 0.0691, "step": 22230 }, { "epoch": 2.6307073574639226, "grad_norm": 0.5363999605178833, "learning_rate": 4.474575053866412e-06, "loss": 0.0802, "step": 22240 }, { "epoch": 2.6318902294771704, "grad_norm": 0.4594934582710266, "learning_rate": 4.473856835049079e-06, "loss": 0.0803, "step": 22250 }, { "epoch": 2.6330731014904187, "grad_norm": 0.5892936587333679, "learning_rate": 4.4731386162317456e-06, "loss": 0.0811, "step": 22260 }, { "epoch": 2.634255973503667, "grad_norm": 0.5844414234161377, "learning_rate": 4.4724203974144125e-06, "loss": 0.0849, "step": 22270 }, { "epoch": 2.635438845516915, "grad_norm": 0.44135230779647827, "learning_rate": 4.471702178597079e-06, "loss": 0.0815, "step": 22280 }, { "epoch": 2.636621717530163, "grad_norm": 0.41314342617988586, "learning_rate": 4.470983959779746e-06, "loss": 0.086, "step": 22290 }, { "epoch": 2.6378045895434115, "grad_norm": 0.41554883122444153, "learning_rate": 4.470265740962413e-06, "loss": 0.0775, "step": 22300 }, { "epoch": 2.6389874615566598, "grad_norm": 0.5764572024345398, "learning_rate": 4.46954752214508e-06, "loss": 0.0756, "step": 22310 }, { "epoch": 2.6401703335699076, "grad_norm": 0.45600757002830505, "learning_rate": 4.468829303327747e-06, "loss": 0.0839, "step": 22320 }, { "epoch": 2.641353205583156, "grad_norm": 0.4902828633785248, "learning_rate": 4.468111084510415e-06, "loss": 0.0839, "step": 22330 }, { "epoch": 2.642536077596404, "grad_norm": 0.4417888820171356, "learning_rate": 4.467392865693081e-06, "loss": 0.0805, "step": 22340 }, { "epoch": 2.643718949609652, "grad_norm": 0.6459925174713135, "learning_rate": 4.466674646875749e-06, "loss": 0.0866, "step": 22350 }, { "epoch": 2.6449018216229003, "grad_norm": 0.5976530313491821, "learning_rate": 4.465956428058415e-06, "loss": 0.0797, "step": 22360 }, { "epoch": 2.6460846936361486, "grad_norm": 0.34710612893104553, "learning_rate": 4.4652382092410826e-06, "loss": 0.0777, "step": 22370 }, { "epoch": 2.647267565649397, "grad_norm": 0.4011804759502411, "learning_rate": 4.464519990423749e-06, "loss": 0.0835, "step": 22380 }, { "epoch": 2.6484504376626448, "grad_norm": 0.4088188409805298, "learning_rate": 4.463801771606416e-06, "loss": 0.0843, "step": 22390 }, { "epoch": 2.649633309675893, "grad_norm": 0.49913108348846436, "learning_rate": 4.463083552789083e-06, "loss": 0.0774, "step": 22400 }, { "epoch": 2.6508161816891413, "grad_norm": 0.4357440173625946, "learning_rate": 4.46236533397175e-06, "loss": 0.0772, "step": 22410 }, { "epoch": 2.651999053702389, "grad_norm": 0.4751048982143402, "learning_rate": 4.461647115154417e-06, "loss": 0.0821, "step": 22420 }, { "epoch": 2.6531819257156375, "grad_norm": 0.618244469165802, "learning_rate": 4.460928896337084e-06, "loss": 0.0828, "step": 22430 }, { "epoch": 2.654364797728886, "grad_norm": 0.5170827507972717, "learning_rate": 4.460210677519751e-06, "loss": 0.0805, "step": 22440 }, { "epoch": 2.655547669742134, "grad_norm": 0.540025532245636, "learning_rate": 4.459492458702418e-06, "loss": 0.0858, "step": 22450 }, { "epoch": 2.656730541755382, "grad_norm": 0.5428736805915833, "learning_rate": 4.458774239885085e-06, "loss": 0.0808, "step": 22460 }, { "epoch": 2.65791341376863, "grad_norm": 0.49258267879486084, "learning_rate": 4.458056021067752e-06, "loss": 0.0793, "step": 22470 }, { "epoch": 2.6590962857818785, "grad_norm": 0.5244420766830444, "learning_rate": 4.457337802250419e-06, "loss": 0.0808, "step": 22480 }, { "epoch": 2.6602791577951264, "grad_norm": 0.5105174779891968, "learning_rate": 4.4566195834330865e-06, "loss": 0.0938, "step": 22490 }, { "epoch": 2.6614620298083747, "grad_norm": 0.4632636308670044, "learning_rate": 4.455901364615753e-06, "loss": 0.0794, "step": 22500 }, { "epoch": 2.662644901821623, "grad_norm": 0.40590807795524597, "learning_rate": 4.45518314579842e-06, "loss": 0.075, "step": 22510 }, { "epoch": 2.6638277738348712, "grad_norm": 0.46818673610687256, "learning_rate": 4.454464926981087e-06, "loss": 0.0894, "step": 22520 }, { "epoch": 2.665010645848119, "grad_norm": 0.6016082167625427, "learning_rate": 4.453746708163754e-06, "loss": 0.088, "step": 22530 }, { "epoch": 2.6661935178613674, "grad_norm": 0.533366858959198, "learning_rate": 4.453028489346421e-06, "loss": 0.0778, "step": 22540 }, { "epoch": 2.6673763898746157, "grad_norm": 0.4421257972717285, "learning_rate": 4.452310270529088e-06, "loss": 0.0823, "step": 22550 }, { "epoch": 2.6685592618878635, "grad_norm": 0.42762821912765503, "learning_rate": 4.451592051711755e-06, "loss": 0.08, "step": 22560 }, { "epoch": 2.669742133901112, "grad_norm": 0.39533373713493347, "learning_rate": 4.450873832894422e-06, "loss": 0.0809, "step": 22570 }, { "epoch": 2.67092500591436, "grad_norm": 0.4694071114063263, "learning_rate": 4.450155614077089e-06, "loss": 0.078, "step": 22580 }, { "epoch": 2.6721078779276084, "grad_norm": 0.44924262166023254, "learning_rate": 4.449437395259756e-06, "loss": 0.086, "step": 22590 }, { "epoch": 2.6732907499408562, "grad_norm": 0.5736104249954224, "learning_rate": 4.4487191764424235e-06, "loss": 0.0785, "step": 22600 }, { "epoch": 2.6744736219541045, "grad_norm": 0.5314193964004517, "learning_rate": 4.4480009576250896e-06, "loss": 0.0852, "step": 22610 }, { "epoch": 2.675656493967353, "grad_norm": 0.5016767382621765, "learning_rate": 4.447282738807757e-06, "loss": 0.0778, "step": 22620 }, { "epoch": 2.6768393659806007, "grad_norm": 0.5586536526679993, "learning_rate": 4.446564519990423e-06, "loss": 0.0833, "step": 22630 }, { "epoch": 2.678022237993849, "grad_norm": 0.33202728629112244, "learning_rate": 4.445846301173091e-06, "loss": 0.0759, "step": 22640 }, { "epoch": 2.6792051100070973, "grad_norm": 0.4838477075099945, "learning_rate": 4.445128082355757e-06, "loss": 0.0752, "step": 22650 }, { "epoch": 2.6803879820203456, "grad_norm": 0.41793060302734375, "learning_rate": 4.444409863538425e-06, "loss": 0.0816, "step": 22660 }, { "epoch": 2.6815708540335934, "grad_norm": 0.4471065104007721, "learning_rate": 4.443691644721092e-06, "loss": 0.0804, "step": 22670 }, { "epoch": 2.6827537260468417, "grad_norm": 0.520482063293457, "learning_rate": 4.442973425903759e-06, "loss": 0.0844, "step": 22680 }, { "epoch": 2.68393659806009, "grad_norm": 0.4904128611087799, "learning_rate": 4.442255207086426e-06, "loss": 0.0827, "step": 22690 }, { "epoch": 2.685119470073338, "grad_norm": 0.4732155501842499, "learning_rate": 4.441536988269093e-06, "loss": 0.083, "step": 22700 }, { "epoch": 2.686302342086586, "grad_norm": 0.48728832602500916, "learning_rate": 4.44081876945176e-06, "loss": 0.0804, "step": 22710 }, { "epoch": 2.6874852140998344, "grad_norm": 0.47748222947120667, "learning_rate": 4.4401005506344266e-06, "loss": 0.0833, "step": 22720 }, { "epoch": 2.6886680861130827, "grad_norm": 0.5735906362533569, "learning_rate": 4.4393823318170935e-06, "loss": 0.0822, "step": 22730 }, { "epoch": 2.6898509581263306, "grad_norm": 0.40193358063697815, "learning_rate": 4.43866411299976e-06, "loss": 0.0784, "step": 22740 }, { "epoch": 2.691033830139579, "grad_norm": 0.39076390862464905, "learning_rate": 4.437945894182428e-06, "loss": 0.0886, "step": 22750 }, { "epoch": 2.692216702152827, "grad_norm": 0.4834092855453491, "learning_rate": 4.437227675365094e-06, "loss": 0.0812, "step": 22760 }, { "epoch": 2.693399574166075, "grad_norm": 0.44902050495147705, "learning_rate": 4.436509456547762e-06, "loss": 0.0789, "step": 22770 }, { "epoch": 2.6945824461793233, "grad_norm": 0.5012722015380859, "learning_rate": 4.435791237730429e-06, "loss": 0.0844, "step": 22780 }, { "epoch": 2.6957653181925716, "grad_norm": 0.4558945894241333, "learning_rate": 4.435073018913096e-06, "loss": 0.0766, "step": 22790 }, { "epoch": 2.69694819020582, "grad_norm": 0.40806642174720764, "learning_rate": 4.434354800095763e-06, "loss": 0.0849, "step": 22800 }, { "epoch": 2.6981310622190677, "grad_norm": 0.5097873210906982, "learning_rate": 4.43363658127843e-06, "loss": 0.0829, "step": 22810 }, { "epoch": 2.699313934232316, "grad_norm": 0.5190248489379883, "learning_rate": 4.432918362461097e-06, "loss": 0.0808, "step": 22820 }, { "epoch": 2.7004968062455643, "grad_norm": 0.4540204405784607, "learning_rate": 4.4322001436437636e-06, "loss": 0.0863, "step": 22830 }, { "epoch": 2.701679678258812, "grad_norm": 0.4128114581108093, "learning_rate": 4.4314819248264305e-06, "loss": 0.0823, "step": 22840 }, { "epoch": 2.7028625502720605, "grad_norm": 0.3626920282840729, "learning_rate": 4.430763706009097e-06, "loss": 0.0834, "step": 22850 }, { "epoch": 2.7040454222853088, "grad_norm": 0.5941124558448792, "learning_rate": 4.430045487191764e-06, "loss": 0.0814, "step": 22860 }, { "epoch": 2.705228294298557, "grad_norm": 0.43684470653533936, "learning_rate": 4.429327268374432e-06, "loss": 0.0796, "step": 22870 }, { "epoch": 2.7064111663118053, "grad_norm": 0.4641019105911255, "learning_rate": 4.428609049557098e-06, "loss": 0.0797, "step": 22880 }, { "epoch": 2.707594038325053, "grad_norm": 0.5706600546836853, "learning_rate": 4.427890830739766e-06, "loss": 0.0843, "step": 22890 }, { "epoch": 2.7087769103383015, "grad_norm": 0.4606274366378784, "learning_rate": 4.427172611922432e-06, "loss": 0.0754, "step": 22900 }, { "epoch": 2.7099597823515493, "grad_norm": 0.43816450238227844, "learning_rate": 4.4264543931051e-06, "loss": 0.0729, "step": 22910 }, { "epoch": 2.7111426543647976, "grad_norm": 0.4515572190284729, "learning_rate": 4.425736174287766e-06, "loss": 0.0825, "step": 22920 }, { "epoch": 2.712325526378046, "grad_norm": 0.5326845645904541, "learning_rate": 4.425017955470434e-06, "loss": 0.086, "step": 22930 }, { "epoch": 2.713508398391294, "grad_norm": 0.4525586664676666, "learning_rate": 4.4242997366531006e-06, "loss": 0.0772, "step": 22940 }, { "epoch": 2.7146912704045425, "grad_norm": 0.49735817313194275, "learning_rate": 4.4235815178357675e-06, "loss": 0.075, "step": 22950 }, { "epoch": 2.7158741424177903, "grad_norm": 0.43421050906181335, "learning_rate": 4.422863299018434e-06, "loss": 0.0785, "step": 22960 }, { "epoch": 2.7170570144310386, "grad_norm": 0.6485195159912109, "learning_rate": 4.422145080201101e-06, "loss": 0.0935, "step": 22970 }, { "epoch": 2.7182398864442865, "grad_norm": 0.446837455034256, "learning_rate": 4.421426861383768e-06, "loss": 0.081, "step": 22980 }, { "epoch": 2.719422758457535, "grad_norm": 0.43061506748199463, "learning_rate": 4.420708642566435e-06, "loss": 0.0774, "step": 22990 }, { "epoch": 2.720605630470783, "grad_norm": 0.4682246148586273, "learning_rate": 4.419990423749103e-06, "loss": 0.0777, "step": 23000 }, { "epoch": 2.7217885024840314, "grad_norm": 0.4493151307106018, "learning_rate": 4.419272204931769e-06, "loss": 0.0806, "step": 23010 }, { "epoch": 2.7229713744972797, "grad_norm": 0.4554472267627716, "learning_rate": 4.418553986114437e-06, "loss": 0.0771, "step": 23020 }, { "epoch": 2.7241542465105275, "grad_norm": 0.47791987657546997, "learning_rate": 4.417835767297103e-06, "loss": 0.0863, "step": 23030 }, { "epoch": 2.725337118523776, "grad_norm": 0.5310934782028198, "learning_rate": 4.417117548479771e-06, "loss": 0.083, "step": 23040 }, { "epoch": 2.7265199905370237, "grad_norm": 0.4356756806373596, "learning_rate": 4.4163993296624376e-06, "loss": 0.0836, "step": 23050 }, { "epoch": 2.727702862550272, "grad_norm": 0.5057719945907593, "learning_rate": 4.4156811108451045e-06, "loss": 0.0822, "step": 23060 }, { "epoch": 2.7288857345635202, "grad_norm": 0.4783954620361328, "learning_rate": 4.414962892027771e-06, "loss": 0.082, "step": 23070 }, { "epoch": 2.7300686065767685, "grad_norm": 0.40734249353408813, "learning_rate": 4.414244673210438e-06, "loss": 0.0816, "step": 23080 }, { "epoch": 2.731251478590017, "grad_norm": 0.3985946476459503, "learning_rate": 4.413526454393105e-06, "loss": 0.0809, "step": 23090 }, { "epoch": 2.7324343506032647, "grad_norm": 0.46169307827949524, "learning_rate": 4.412808235575772e-06, "loss": 0.0803, "step": 23100 }, { "epoch": 2.733617222616513, "grad_norm": 0.4182621240615845, "learning_rate": 4.412090016758439e-06, "loss": 0.0785, "step": 23110 }, { "epoch": 2.734800094629761, "grad_norm": 0.5309221744537354, "learning_rate": 4.411371797941106e-06, "loss": 0.0775, "step": 23120 }, { "epoch": 2.735982966643009, "grad_norm": 0.48512542247772217, "learning_rate": 4.410653579123773e-06, "loss": 0.0802, "step": 23130 }, { "epoch": 2.7371658386562574, "grad_norm": 0.5166702270507812, "learning_rate": 4.409935360306441e-06, "loss": 0.0826, "step": 23140 }, { "epoch": 2.7383487106695057, "grad_norm": 0.3692150115966797, "learning_rate": 4.409217141489107e-06, "loss": 0.0738, "step": 23150 }, { "epoch": 2.739531582682754, "grad_norm": 0.5255678296089172, "learning_rate": 4.4084989226717745e-06, "loss": 0.0882, "step": 23160 }, { "epoch": 2.740714454696002, "grad_norm": 0.4492394030094147, "learning_rate": 4.407780703854441e-06, "loss": 0.0756, "step": 23170 }, { "epoch": 2.74189732670925, "grad_norm": 0.4493676424026489, "learning_rate": 4.407062485037108e-06, "loss": 0.0833, "step": 23180 }, { "epoch": 2.743080198722498, "grad_norm": 0.45990654826164246, "learning_rate": 4.4063442662197745e-06, "loss": 0.0804, "step": 23190 }, { "epoch": 2.7442630707357463, "grad_norm": 0.3962289094924927, "learning_rate": 4.405626047402442e-06, "loss": 0.0854, "step": 23200 }, { "epoch": 2.7454459427489946, "grad_norm": 0.5175402164459229, "learning_rate": 4.404907828585109e-06, "loss": 0.0896, "step": 23210 }, { "epoch": 2.746628814762243, "grad_norm": 0.5334787964820862, "learning_rate": 4.404189609767776e-06, "loss": 0.0818, "step": 23220 }, { "epoch": 2.747811686775491, "grad_norm": 0.4181287884712219, "learning_rate": 4.403471390950443e-06, "loss": 0.0762, "step": 23230 }, { "epoch": 2.748994558788739, "grad_norm": 0.548985481262207, "learning_rate": 4.40275317213311e-06, "loss": 0.0843, "step": 23240 }, { "epoch": 2.7501774308019873, "grad_norm": 0.3618430495262146, "learning_rate": 4.402034953315777e-06, "loss": 0.0828, "step": 23250 }, { "epoch": 2.7506505796072864, "eval_accuracy": 0.6904381674817847, "eval_animal_abuse/accuracy": 0.9951924676448082, "eval_animal_abuse/f1": 0.7733333333333333, "eval_animal_abuse/fpr": 0.0015817992124659212, "eval_animal_abuse/precision": 0.8398637137989778, "eval_animal_abuse/recall": 0.7165697674418605, "eval_animal_abuse/threshold": 0.5, "eval_child_abuse/accuracy": 0.996506637388961, "eval_child_abuse/f1": 0.6728971962616822, "eval_child_abuse/fpr": 0.0015556782255231568, "eval_child_abuse/precision": 0.6990291262135923, "eval_child_abuse/recall": 0.6486486486486487, "eval_child_abuse/threshold": 0.5, "eval_controversial_topics,politics/accuracy": 0.9726186911534751, "eval_controversial_topics,politics/f1": 0.4596191726854892, "eval_controversial_topics,politics/fpr": 0.008649093904448091, "eval_controversial_topics,politics/precision": 0.5813953488372093, "eval_controversial_topics,politics/recall": 0.3800217155266015, "eval_controversial_topics,politics/threshold": 0.5, "eval_discrimination,stereotype,injustice/accuracy": 0.9571647203646405, "eval_discrimination,stereotype,injustice/f1": 0.7157522905397947, "eval_discrimination,stereotype,injustice/fpr": 0.01874073806339679, "eval_discrimination,stereotype,injustice/precision": 0.757653657396588, "eval_discrimination,stereotype,injustice/recall": 0.6782426778242678, "eval_discrimination,stereotype,injustice/threshold": 0.5, "eval_drug_abuse,weapons,banned_substance/accuracy": 0.9743154672788369, "eval_drug_abuse,weapons,banned_substance/f1": 0.7753201396973225, "eval_drug_abuse,weapons,banned_substance/fpr": 0.014490198843604543, "eval_drug_abuse,weapons,banned_substance/precision": 0.7641996557659209, "eval_drug_abuse,weapons,banned_substance/recall": 0.7867690490253987, "eval_drug_abuse,weapons,banned_substance/threshold": 0.5, "eval_financial_crime,property_crime,theft/accuracy": 0.9600592208137871, "eval_financial_crime,property_crime,theft/f1": 0.8032451036630337, "eval_financial_crime,property_crime,theft/fpr": 0.02674013600427542, "eval_financial_crime,property_crime,theft/precision": 0.7715680100755667, "eval_financial_crime,property_crime,theft/recall": 0.8376345923773714, "eval_financial_crime,property_crime,theft/threshold": 0.5, "eval_flagged/accuracy": 0.853777822137938, "eval_flagged/aucpr": 0.9099931114522944, "eval_flagged/f1": 0.8647567467766255, "eval_flagged/fpr": 0.12905524509620026, "eval_flagged/precision": 0.890910820150271, "eval_flagged/recall": 0.840094466533138, "eval_hate_speech,offensive_language/accuracy": 0.9516751505472935, "eval_hate_speech,offensive_language/f1": 0.6884050198433981, "eval_hate_speech,offensive_language/fpr": 0.013338205737255594, "eval_hate_speech,offensive_language/precision": 0.8146737750698146, "eval_hate_speech,offensive_language/recall": 0.5960252600297177, "eval_hate_speech,offensive_language/threshold": 0.5, "eval_loss": 0.08165422827005386, "eval_macro_f1": 0.6398347528685616, "eval_macro_precision": 0.7160435180205749, "eval_macro_recall": 0.6089786499806198, "eval_micro_f1": 0.7534035950909955, "eval_micro_precision": 0.7914676168846425, "eval_micro_recall": 0.7188327969130643, "eval_misinformation_regarding_ethics,laws_and_safety/accuracy": 0.9877233256812057, "eval_misinformation_regarding_ethics,laws_and_safety/f1": 0.061068702290076333, "eval_misinformation_regarding_ethics,laws_and_safety/fpr": 0.0005220349258205202, "eval_misinformation_regarding_ethics,laws_and_safety/precision": 0.43636363636363634, "eval_misinformation_regarding_ethics,laws_and_safety/recall": 0.03283173734610123, "eval_misinformation_regarding_ethics,laws_and_safety/threshold": 0.5, "eval_non_violent_unethical_behavior/accuracy": 0.8885118275276974, "eval_non_violent_unethical_behavior/f1": 0.6824298711144806, "eval_non_violent_unethical_behavior/fpr": 0.04062863311742227, "eval_non_violent_unethical_behavior/precision": 0.7863070539419087, "eval_non_violent_unethical_behavior/recall": 0.6027959149506111, "eval_non_violent_unethical_behavior/threshold": 0.5, "eval_privacy_violation/accuracy": 0.9805369797384968, "eval_privacy_violation/f1": 0.8090731070496083, "eval_privacy_violation/fpr": 0.011951424371806517, "eval_privacy_violation/precision": 0.7839974699557243, "eval_privacy_violation/recall": 0.8358057990559676, "eval_privacy_violation/threshold": 0.5, "eval_runtime": 598.6163, "eval_samples_per_second": 100.422, "eval_self_harm/accuracy": 0.9963569218484879, "eval_self_harm/f1": 0.7292954264524104, "eval_self_harm/fpr": 0.001741926839072756, "eval_self_harm/precision": 0.7393483709273183, "eval_self_harm/recall": 0.7195121951219512, "eval_self_harm/threshold": 0.5, "eval_sexually_explicit,adult_content/accuracy": 0.9846624746315334, "eval_sexually_explicit,adult_content/f1": 0.6673881673881674, "eval_sexually_explicit,adult_content/fpr": 0.006818143078732495, "eval_sexually_explicit,adult_content/precision": 0.6981132075471698, "eval_sexually_explicit,adult_content/recall": 0.6392536281962682, "eval_sexually_explicit,adult_content/threshold": 0.5, "eval_steps_per_second": 1.57, "eval_terrorism,organized_crime/accuracy": 0.9920650763549257, "eval_terrorism,organized_crime/f1": 0.26275115919629055, "eval_terrorism,organized_crime/fpr": 0.001358308319219222, "eval_terrorism,organized_crime/precision": 0.5120481927710844, "eval_terrorism,organized_crime/recall": 0.17671517671517672, "eval_terrorism,organized_crime/threshold": 0.5, "eval_violence,aiding_and_abetting,incitement/accuracy": 0.9223974448547759, "eval_violence,aiding_and_abetting,incitement/f1": 0.8571078506447759, "eval_violence,aiding_and_abetting,incitement/fpr": 0.06037804269978681, "eval_violence,aiding_and_abetting,incitement/precision": 0.8400480336235365, "eval_violence,aiding_and_abetting,incitement/recall": 0.8748749374687343, "eval_violence,aiding_and_abetting,incitement/threshold": 0.5, "step": 23254 }, { "epoch": 2.751360302815235, "grad_norm": 0.47598403692245483, "learning_rate": 4.401316734498444e-06, "loss": 0.0785, "step": 23260 }, { "epoch": 2.7525431748284834, "grad_norm": 0.4835112690925598, "learning_rate": 4.4005985156811115e-06, "loss": 0.0807, "step": 23270 }, { "epoch": 2.7537260468417317, "grad_norm": 0.4828835129737854, "learning_rate": 4.399880296863778e-06, "loss": 0.0829, "step": 23280 }, { "epoch": 2.75490891885498, "grad_norm": 0.48093798756599426, "learning_rate": 4.399162078046445e-06, "loss": 0.08, "step": 23290 }, { "epoch": 2.7560917908682283, "grad_norm": 0.5895255208015442, "learning_rate": 4.3984438592291115e-06, "loss": 0.0793, "step": 23300 }, { "epoch": 2.757274662881476, "grad_norm": 0.5164315700531006, "learning_rate": 4.397725640411779e-06, "loss": 0.0813, "step": 23310 }, { "epoch": 2.7584575348947244, "grad_norm": 0.33538928627967834, "learning_rate": 4.397007421594446e-06, "loss": 0.079, "step": 23320 }, { "epoch": 2.7596404069079723, "grad_norm": 0.5004042387008667, "learning_rate": 4.396289202777113e-06, "loss": 0.0799, "step": 23330 }, { "epoch": 2.7608232789212206, "grad_norm": 0.48051318526268005, "learning_rate": 4.39557098395978e-06, "loss": 0.0865, "step": 23340 }, { "epoch": 2.762006150934469, "grad_norm": 0.5883886814117432, "learning_rate": 4.394852765142447e-06, "loss": 0.0793, "step": 23350 }, { "epoch": 2.763189022947717, "grad_norm": 0.4226415753364563, "learning_rate": 4.394134546325114e-06, "loss": 0.0847, "step": 23360 }, { "epoch": 2.7643718949609655, "grad_norm": 0.4184028208255768, "learning_rate": 4.393416327507781e-06, "loss": 0.0794, "step": 23370 }, { "epoch": 2.7655547669742133, "grad_norm": 0.45031270384788513, "learning_rate": 4.392698108690448e-06, "loss": 0.0742, "step": 23380 }, { "epoch": 2.7667376389874616, "grad_norm": 0.4973052442073822, "learning_rate": 4.391979889873115e-06, "loss": 0.0803, "step": 23390 }, { "epoch": 2.7679205110007095, "grad_norm": 0.39221951365470886, "learning_rate": 4.3912616710557815e-06, "loss": 0.0805, "step": 23400 }, { "epoch": 2.7691033830139578, "grad_norm": 0.41222083568573, "learning_rate": 4.3905434522384485e-06, "loss": 0.0779, "step": 23410 }, { "epoch": 2.770286255027206, "grad_norm": 0.33635103702545166, "learning_rate": 4.389825233421115e-06, "loss": 0.0731, "step": 23420 }, { "epoch": 2.7714691270404543, "grad_norm": 0.4841151237487793, "learning_rate": 4.389107014603783e-06, "loss": 0.0794, "step": 23430 }, { "epoch": 2.7726519990537026, "grad_norm": 0.47708407044410706, "learning_rate": 4.388388795786449e-06, "loss": 0.082, "step": 23440 }, { "epoch": 2.7738348710669505, "grad_norm": 0.5159716010093689, "learning_rate": 4.387670576969117e-06, "loss": 0.0848, "step": 23450 }, { "epoch": 2.7750177430801988, "grad_norm": 0.44535234570503235, "learning_rate": 4.386952358151783e-06, "loss": 0.078, "step": 23460 }, { "epoch": 2.7762006150934466, "grad_norm": 0.5012584924697876, "learning_rate": 4.386234139334451e-06, "loss": 0.0859, "step": 23470 }, { "epoch": 2.777383487106695, "grad_norm": 0.4438575506210327, "learning_rate": 4.385515920517118e-06, "loss": 0.0806, "step": 23480 }, { "epoch": 2.778566359119943, "grad_norm": 0.44758471846580505, "learning_rate": 4.384797701699785e-06, "loss": 0.0763, "step": 23490 }, { "epoch": 2.7797492311331915, "grad_norm": 0.45385754108428955, "learning_rate": 4.384079482882452e-06, "loss": 0.0897, "step": 23500 }, { "epoch": 2.78093210314644, "grad_norm": 0.3289681673049927, "learning_rate": 4.3833612640651185e-06, "loss": 0.075, "step": 23510 }, { "epoch": 2.7821149751596876, "grad_norm": 0.4561167359352112, "learning_rate": 4.382643045247786e-06, "loss": 0.0728, "step": 23520 }, { "epoch": 2.783297847172936, "grad_norm": 0.6802912354469299, "learning_rate": 4.381924826430452e-06, "loss": 0.0843, "step": 23530 }, { "epoch": 2.784480719186184, "grad_norm": 0.5386157035827637, "learning_rate": 4.38120660761312e-06, "loss": 0.0777, "step": 23540 }, { "epoch": 2.785663591199432, "grad_norm": 0.6276704668998718, "learning_rate": 4.380488388795786e-06, "loss": 0.0771, "step": 23550 }, { "epoch": 2.7868464632126804, "grad_norm": 0.43475690484046936, "learning_rate": 4.379770169978454e-06, "loss": 0.0805, "step": 23560 }, { "epoch": 2.7880293352259287, "grad_norm": 0.5096838474273682, "learning_rate": 4.37905195116112e-06, "loss": 0.0738, "step": 23570 }, { "epoch": 2.789212207239177, "grad_norm": 0.37858572602272034, "learning_rate": 4.378333732343788e-06, "loss": 0.0803, "step": 23580 }, { "epoch": 2.790395079252425, "grad_norm": 0.4833757281303406, "learning_rate": 4.377615513526455e-06, "loss": 0.0755, "step": 23590 }, { "epoch": 2.791577951265673, "grad_norm": 0.5018813014030457, "learning_rate": 4.376897294709122e-06, "loss": 0.0731, "step": 23600 }, { "epoch": 2.792760823278921, "grad_norm": 0.4879356026649475, "learning_rate": 4.376179075891789e-06, "loss": 0.0848, "step": 23610 }, { "epoch": 2.7939436952921692, "grad_norm": 0.4296044707298279, "learning_rate": 4.3754608570744555e-06, "loss": 0.0818, "step": 23620 }, { "epoch": 2.7951265673054175, "grad_norm": 0.4931047260761261, "learning_rate": 4.3747426382571225e-06, "loss": 0.0729, "step": 23630 }, { "epoch": 2.796309439318666, "grad_norm": 0.4535120129585266, "learning_rate": 4.374024419439789e-06, "loss": 0.076, "step": 23640 }, { "epoch": 2.797492311331914, "grad_norm": 0.3919844329357147, "learning_rate": 4.373306200622456e-06, "loss": 0.071, "step": 23650 }, { "epoch": 2.798675183345162, "grad_norm": 0.4502662122249603, "learning_rate": 4.372587981805123e-06, "loss": 0.0827, "step": 23660 }, { "epoch": 2.7998580553584103, "grad_norm": 0.5214430689811707, "learning_rate": 4.37186976298779e-06, "loss": 0.0813, "step": 23670 }, { "epoch": 2.8010409273716586, "grad_norm": 0.4398854076862335, "learning_rate": 4.371151544170457e-06, "loss": 0.0822, "step": 23680 }, { "epoch": 2.8022237993849064, "grad_norm": 0.4129050374031067, "learning_rate": 4.370433325353124e-06, "loss": 0.0782, "step": 23690 }, { "epoch": 2.8034066713981547, "grad_norm": 0.47273722290992737, "learning_rate": 4.369715106535792e-06, "loss": 0.0909, "step": 23700 }, { "epoch": 2.804589543411403, "grad_norm": 0.47969067096710205, "learning_rate": 4.368996887718458e-06, "loss": 0.0796, "step": 23710 }, { "epoch": 2.8057724154246513, "grad_norm": 0.4781343638896942, "learning_rate": 4.368278668901126e-06, "loss": 0.0774, "step": 23720 }, { "epoch": 2.806955287437899, "grad_norm": 0.3876035511493683, "learning_rate": 4.367560450083792e-06, "loss": 0.0766, "step": 23730 }, { "epoch": 2.8081381594511474, "grad_norm": 0.4562600255012512, "learning_rate": 4.3668422312664595e-06, "loss": 0.0717, "step": 23740 }, { "epoch": 2.8093210314643957, "grad_norm": 0.4738783538341522, "learning_rate": 4.3661240124491255e-06, "loss": 0.0802, "step": 23750 }, { "epoch": 2.8105039034776436, "grad_norm": 0.46035507321357727, "learning_rate": 4.365405793631793e-06, "loss": 0.0783, "step": 23760 }, { "epoch": 2.811686775490892, "grad_norm": 0.42247578501701355, "learning_rate": 4.36468757481446e-06, "loss": 0.0756, "step": 23770 }, { "epoch": 2.81286964750414, "grad_norm": 0.5339348912239075, "learning_rate": 4.363969355997127e-06, "loss": 0.0796, "step": 23780 }, { "epoch": 2.8140525195173884, "grad_norm": 0.5507614612579346, "learning_rate": 4.363251137179795e-06, "loss": 0.084, "step": 23790 }, { "epoch": 2.8152353915306363, "grad_norm": 0.5076237916946411, "learning_rate": 4.362532918362461e-06, "loss": 0.0781, "step": 23800 }, { "epoch": 2.8164182635438846, "grad_norm": 0.49668851494789124, "learning_rate": 4.361814699545129e-06, "loss": 0.0766, "step": 23810 }, { "epoch": 2.817601135557133, "grad_norm": 0.4360135495662689, "learning_rate": 4.361096480727795e-06, "loss": 0.0861, "step": 23820 }, { "epoch": 2.8187840075703807, "grad_norm": 0.4326266944408417, "learning_rate": 4.360378261910463e-06, "loss": 0.0795, "step": 23830 }, { "epoch": 2.819966879583629, "grad_norm": 0.530697226524353, "learning_rate": 4.359660043093129e-06, "loss": 0.0797, "step": 23840 }, { "epoch": 2.8211497515968773, "grad_norm": 0.5380526781082153, "learning_rate": 4.3589418242757965e-06, "loss": 0.0781, "step": 23850 }, { "epoch": 2.8223326236101256, "grad_norm": 0.4436025619506836, "learning_rate": 4.358223605458463e-06, "loss": 0.0802, "step": 23860 }, { "epoch": 2.8235154956233735, "grad_norm": 0.41049638390541077, "learning_rate": 4.35750538664113e-06, "loss": 0.0767, "step": 23870 }, { "epoch": 2.8246983676366217, "grad_norm": 0.5323441028594971, "learning_rate": 4.356787167823797e-06, "loss": 0.0773, "step": 23880 }, { "epoch": 2.82588123964987, "grad_norm": 0.4777607023715973, "learning_rate": 4.356068949006464e-06, "loss": 0.0872, "step": 23890 }, { "epoch": 2.827064111663118, "grad_norm": 1.1803984642028809, "learning_rate": 4.355350730189131e-06, "loss": 0.0814, "step": 23900 }, { "epoch": 2.828246983676366, "grad_norm": 0.43950212001800537, "learning_rate": 4.354632511371798e-06, "loss": 0.0787, "step": 23910 }, { "epoch": 2.8294298556896145, "grad_norm": 0.4280116558074951, "learning_rate": 4.353914292554465e-06, "loss": 0.0751, "step": 23920 }, { "epoch": 2.8306127277028628, "grad_norm": 0.500647246837616, "learning_rate": 4.353196073737132e-06, "loss": 0.0808, "step": 23930 }, { "epoch": 2.8317955997161106, "grad_norm": 0.5098908543586731, "learning_rate": 4.352477854919799e-06, "loss": 0.0846, "step": 23940 }, { "epoch": 2.832978471729359, "grad_norm": 0.48183590173721313, "learning_rate": 4.351759636102466e-06, "loss": 0.0829, "step": 23950 }, { "epoch": 2.834161343742607, "grad_norm": 0.5114542245864868, "learning_rate": 4.351041417285133e-06, "loss": 0.0785, "step": 23960 }, { "epoch": 2.835344215755855, "grad_norm": 0.42791086435317993, "learning_rate": 4.3503231984678e-06, "loss": 0.0834, "step": 23970 }, { "epoch": 2.8365270877691033, "grad_norm": 0.4425320327281952, "learning_rate": 4.3496049796504665e-06, "loss": 0.0764, "step": 23980 }, { "epoch": 2.8377099597823516, "grad_norm": 0.43707016110420227, "learning_rate": 4.348886760833134e-06, "loss": 0.0802, "step": 23990 }, { "epoch": 2.8388928317956, "grad_norm": 0.5231457352638245, "learning_rate": 4.3481685420158e-06, "loss": 0.0753, "step": 24000 }, { "epoch": 2.8400757038088478, "grad_norm": 0.6172152161598206, "learning_rate": 4.347450323198468e-06, "loss": 0.0857, "step": 24010 }, { "epoch": 2.841258575822096, "grad_norm": 0.42436373233795166, "learning_rate": 4.346732104381135e-06, "loss": 0.0849, "step": 24020 }, { "epoch": 2.8424414478353444, "grad_norm": 0.41213440895080566, "learning_rate": 4.346013885563802e-06, "loss": 0.0787, "step": 24030 }, { "epoch": 2.843624319848592, "grad_norm": 0.46816352009773254, "learning_rate": 4.345295666746469e-06, "loss": 0.0812, "step": 24040 }, { "epoch": 2.8448071918618405, "grad_norm": 0.454588919878006, "learning_rate": 4.344577447929136e-06, "loss": 0.087, "step": 24050 }, { "epoch": 2.845990063875089, "grad_norm": 0.41554099321365356, "learning_rate": 4.3438592291118035e-06, "loss": 0.078, "step": 24060 }, { "epoch": 2.847172935888337, "grad_norm": 0.3863159120082855, "learning_rate": 4.34314101029447e-06, "loss": 0.0837, "step": 24070 }, { "epoch": 2.848355807901585, "grad_norm": 0.48252072930336, "learning_rate": 4.342422791477137e-06, "loss": 0.084, "step": 24080 }, { "epoch": 2.8495386799148332, "grad_norm": 0.3151131570339203, "learning_rate": 4.3417045726598034e-06, "loss": 0.0781, "step": 24090 }, { "epoch": 2.8507215519280815, "grad_norm": 0.4376365840435028, "learning_rate": 4.340986353842471e-06, "loss": 0.0786, "step": 24100 }, { "epoch": 2.8519044239413294, "grad_norm": 0.46426063776016235, "learning_rate": 4.340268135025137e-06, "loss": 0.0805, "step": 24110 }, { "epoch": 2.8530872959545777, "grad_norm": 0.5775500535964966, "learning_rate": 4.339549916207805e-06, "loss": 0.0827, "step": 24120 }, { "epoch": 2.854270167967826, "grad_norm": 0.45915812253952026, "learning_rate": 4.338831697390472e-06, "loss": 0.0845, "step": 24130 }, { "epoch": 2.8554530399810742, "grad_norm": 0.710263192653656, "learning_rate": 4.338113478573139e-06, "loss": 0.0849, "step": 24140 }, { "epoch": 2.856635911994322, "grad_norm": 0.3830888569355011, "learning_rate": 4.337395259755806e-06, "loss": 0.0807, "step": 24150 }, { "epoch": 2.8578187840075704, "grad_norm": 0.48624470829963684, "learning_rate": 4.336677040938473e-06, "loss": 0.0787, "step": 24160 }, { "epoch": 2.8590016560208187, "grad_norm": 0.4780791997909546, "learning_rate": 4.33595882212114e-06, "loss": 0.0813, "step": 24170 }, { "epoch": 2.8601845280340665, "grad_norm": 0.4921967089176178, "learning_rate": 4.335240603303807e-06, "loss": 0.0823, "step": 24180 }, { "epoch": 2.861367400047315, "grad_norm": 0.47672706842422485, "learning_rate": 4.3345223844864735e-06, "loss": 0.0871, "step": 24190 }, { "epoch": 2.862550272060563, "grad_norm": 0.5176831483840942, "learning_rate": 4.3338041656691404e-06, "loss": 0.0747, "step": 24200 }, { "epoch": 2.8637331440738114, "grad_norm": 0.50445157289505, "learning_rate": 4.333085946851807e-06, "loss": 0.0825, "step": 24210 }, { "epoch": 2.8649160160870593, "grad_norm": 0.560304582118988, "learning_rate": 4.332367728034474e-06, "loss": 0.0852, "step": 24220 }, { "epoch": 2.8660988881003076, "grad_norm": 0.4839022159576416, "learning_rate": 4.331649509217141e-06, "loss": 0.0796, "step": 24230 }, { "epoch": 2.867281760113556, "grad_norm": 0.5501173138618469, "learning_rate": 4.330931290399809e-06, "loss": 0.0854, "step": 24240 }, { "epoch": 2.8684646321268037, "grad_norm": 0.40604788064956665, "learning_rate": 4.330213071582475e-06, "loss": 0.0819, "step": 24250 }, { "epoch": 2.869647504140052, "grad_norm": 0.5356053709983826, "learning_rate": 4.329494852765143e-06, "loss": 0.0914, "step": 24260 }, { "epoch": 2.8708303761533003, "grad_norm": 0.5419309139251709, "learning_rate": 4.32877663394781e-06, "loss": 0.0835, "step": 24270 }, { "epoch": 2.8720132481665486, "grad_norm": 0.4809447228908539, "learning_rate": 4.328058415130477e-06, "loss": 0.0798, "step": 24280 }, { "epoch": 2.8731961201797964, "grad_norm": 0.49625077843666077, "learning_rate": 4.327340196313144e-06, "loss": 0.0863, "step": 24290 }, { "epoch": 2.8743789921930447, "grad_norm": 0.4663814902305603, "learning_rate": 4.3266219774958105e-06, "loss": 0.0726, "step": 24300 }, { "epoch": 2.875561864206293, "grad_norm": 0.4592251181602478, "learning_rate": 4.3259037586784774e-06, "loss": 0.0806, "step": 24310 }, { "epoch": 2.876744736219541, "grad_norm": 0.5519812107086182, "learning_rate": 4.325185539861144e-06, "loss": 0.0845, "step": 24320 }, { "epoch": 2.877927608232789, "grad_norm": 0.37868112325668335, "learning_rate": 4.324467321043811e-06, "loss": 0.0801, "step": 24330 }, { "epoch": 2.8791104802460374, "grad_norm": 0.40410101413726807, "learning_rate": 4.323749102226478e-06, "loss": 0.0746, "step": 24340 }, { "epoch": 2.8802933522592857, "grad_norm": 0.4261287450790405, "learning_rate": 4.323030883409146e-06, "loss": 0.0752, "step": 24350 }, { "epoch": 2.8814762242725336, "grad_norm": 0.42709773778915405, "learning_rate": 4.322312664591812e-06, "loss": 0.0741, "step": 24360 }, { "epoch": 2.882659096285782, "grad_norm": 0.5374822616577148, "learning_rate": 4.32159444577448e-06, "loss": 0.0805, "step": 24370 }, { "epoch": 2.88384196829903, "grad_norm": 0.53302001953125, "learning_rate": 4.320876226957146e-06, "loss": 0.0846, "step": 24380 }, { "epoch": 2.885024840312278, "grad_norm": 0.4146970510482788, "learning_rate": 4.320158008139814e-06, "loss": 0.0806, "step": 24390 }, { "epoch": 2.8862077123255263, "grad_norm": 0.5116605162620544, "learning_rate": 4.319439789322481e-06, "loss": 0.0756, "step": 24400 }, { "epoch": 2.8873905843387746, "grad_norm": 0.5255545973777771, "learning_rate": 4.3187215705051475e-06, "loss": 0.0795, "step": 24410 }, { "epoch": 2.888573456352023, "grad_norm": 0.5663859844207764, "learning_rate": 4.3180033516878144e-06, "loss": 0.0889, "step": 24420 }, { "epoch": 2.8897563283652707, "grad_norm": 0.5044987797737122, "learning_rate": 4.317285132870481e-06, "loss": 0.0815, "step": 24430 }, { "epoch": 2.890939200378519, "grad_norm": 0.46279826760292053, "learning_rate": 4.316566914053148e-06, "loss": 0.0717, "step": 24440 }, { "epoch": 2.8921220723917673, "grad_norm": 0.5071410536766052, "learning_rate": 4.315848695235815e-06, "loss": 0.0812, "step": 24450 }, { "epoch": 2.893304944405015, "grad_norm": 0.327825129032135, "learning_rate": 4.315130476418482e-06, "loss": 0.0802, "step": 24460 }, { "epoch": 2.8944878164182635, "grad_norm": 0.4249825179576874, "learning_rate": 4.314412257601149e-06, "loss": 0.0765, "step": 24470 }, { "epoch": 2.8956706884315118, "grad_norm": 0.5437846183776855, "learning_rate": 4.313694038783816e-06, "loss": 0.079, "step": 24480 }, { "epoch": 2.89685356044476, "grad_norm": 0.5648318529129028, "learning_rate": 4.312975819966483e-06, "loss": 0.0871, "step": 24490 }, { "epoch": 2.898036432458008, "grad_norm": 0.46478673815727234, "learning_rate": 4.31225760114915e-06, "loss": 0.0824, "step": 24500 }, { "epoch": 2.899219304471256, "grad_norm": 0.44929036498069763, "learning_rate": 4.311539382331818e-06, "loss": 0.0796, "step": 24510 }, { "epoch": 2.9004021764845045, "grad_norm": 0.38870713114738464, "learning_rate": 4.3108211635144845e-06, "loss": 0.0819, "step": 24520 }, { "epoch": 2.9015850484977523, "grad_norm": 0.47616615891456604, "learning_rate": 4.3101029446971514e-06, "loss": 0.0795, "step": 24530 }, { "epoch": 2.9027679205110006, "grad_norm": 0.42569127678871155, "learning_rate": 4.309384725879818e-06, "loss": 0.0772, "step": 24540 }, { "epoch": 2.903950792524249, "grad_norm": 0.5259504318237305, "learning_rate": 4.308666507062485e-06, "loss": 0.0863, "step": 24550 }, { "epoch": 2.905133664537497, "grad_norm": 0.5369442701339722, "learning_rate": 4.307948288245152e-06, "loss": 0.0794, "step": 24560 }, { "epoch": 2.906316536550745, "grad_norm": 0.40406209230422974, "learning_rate": 4.307230069427819e-06, "loss": 0.0667, "step": 24570 }, { "epoch": 2.9074994085639934, "grad_norm": 0.4094081521034241, "learning_rate": 4.306511850610486e-06, "loss": 0.0855, "step": 24580 }, { "epoch": 2.9086822805772417, "grad_norm": 0.5511181354522705, "learning_rate": 4.305793631793153e-06, "loss": 0.0723, "step": 24590 }, { "epoch": 2.9098651525904895, "grad_norm": 0.4727068245410919, "learning_rate": 4.30507541297582e-06, "loss": 0.0808, "step": 24600 }, { "epoch": 2.911048024603738, "grad_norm": 0.4433978497982025, "learning_rate": 4.304357194158487e-06, "loss": 0.0863, "step": 24610 }, { "epoch": 2.912230896616986, "grad_norm": 0.5299775004386902, "learning_rate": 4.303638975341155e-06, "loss": 0.0804, "step": 24620 }, { "epoch": 2.9134137686302344, "grad_norm": 0.40658289194107056, "learning_rate": 4.302920756523821e-06, "loss": 0.0769, "step": 24630 }, { "epoch": 2.9145966406434822, "grad_norm": 0.39335477352142334, "learning_rate": 4.3022025377064884e-06, "loss": 0.0807, "step": 24640 }, { "epoch": 2.9157795126567305, "grad_norm": 0.4153980016708374, "learning_rate": 4.3014843188891545e-06, "loss": 0.0871, "step": 24650 }, { "epoch": 2.916962384669979, "grad_norm": 0.4517754912376404, "learning_rate": 4.300766100071822e-06, "loss": 0.0759, "step": 24660 }, { "epoch": 2.9181452566832267, "grad_norm": 0.5531895756721497, "learning_rate": 4.300047881254488e-06, "loss": 0.0824, "step": 24670 }, { "epoch": 2.919328128696475, "grad_norm": 0.3546598553657532, "learning_rate": 4.299329662437156e-06, "loss": 0.082, "step": 24680 }, { "epoch": 2.9205110007097232, "grad_norm": 0.47106266021728516, "learning_rate": 4.298611443619823e-06, "loss": 0.0823, "step": 24690 }, { "epoch": 2.9216938727229715, "grad_norm": 0.47069719433784485, "learning_rate": 4.29789322480249e-06, "loss": 0.0746, "step": 24700 }, { "epoch": 2.9228767447362194, "grad_norm": 0.4483301341533661, "learning_rate": 4.297175005985157e-06, "loss": 0.0791, "step": 24710 }, { "epoch": 2.9240596167494677, "grad_norm": 0.4702540636062622, "learning_rate": 4.296456787167824e-06, "loss": 0.0811, "step": 24720 }, { "epoch": 2.925242488762716, "grad_norm": 0.41385409235954285, "learning_rate": 4.295738568350491e-06, "loss": 0.0811, "step": 24730 }, { "epoch": 2.926425360775964, "grad_norm": 0.5175915360450745, "learning_rate": 4.295020349533158e-06, "loss": 0.0814, "step": 24740 }, { "epoch": 2.927608232789212, "grad_norm": 0.4296513497829437, "learning_rate": 4.294302130715825e-06, "loss": 0.0784, "step": 24750 }, { "epoch": 2.9287911048024604, "grad_norm": 0.4109087884426117, "learning_rate": 4.2935839118984915e-06, "loss": 0.0716, "step": 24760 }, { "epoch": 2.9299739768157087, "grad_norm": 0.5042969584465027, "learning_rate": 4.2928656930811584e-06, "loss": 0.0762, "step": 24770 }, { "epoch": 2.9311568488289566, "grad_norm": 0.41999319195747375, "learning_rate": 4.292147474263826e-06, "loss": 0.0806, "step": 24780 }, { "epoch": 2.932339720842205, "grad_norm": 0.4503871500492096, "learning_rate": 4.291429255446493e-06, "loss": 0.0854, "step": 24790 }, { "epoch": 2.933522592855453, "grad_norm": 0.347689688205719, "learning_rate": 4.29071103662916e-06, "loss": 0.0753, "step": 24800 }, { "epoch": 2.934705464868701, "grad_norm": 0.5350316166877747, "learning_rate": 4.289992817811827e-06, "loss": 0.0759, "step": 24810 }, { "epoch": 2.9358883368819493, "grad_norm": 0.42705634236335754, "learning_rate": 4.289274598994494e-06, "loss": 0.0823, "step": 24820 }, { "epoch": 2.9370712088951976, "grad_norm": 0.4301840662956238, "learning_rate": 4.288556380177161e-06, "loss": 0.0847, "step": 24830 }, { "epoch": 2.938254080908446, "grad_norm": 0.3903310000896454, "learning_rate": 4.287838161359828e-06, "loss": 0.073, "step": 24840 }, { "epoch": 2.9394369529216937, "grad_norm": 0.4619901776313782, "learning_rate": 4.287119942542495e-06, "loss": 0.0903, "step": 24850 }, { "epoch": 2.940619824934942, "grad_norm": 0.46312034130096436, "learning_rate": 4.286401723725162e-06, "loss": 0.0833, "step": 24860 }, { "epoch": 2.9418026969481903, "grad_norm": 0.5312151908874512, "learning_rate": 4.2856835049078285e-06, "loss": 0.0831, "step": 24870 }, { "epoch": 2.942985568961438, "grad_norm": 0.5601499080657959, "learning_rate": 4.2849652860904954e-06, "loss": 0.0906, "step": 24880 }, { "epoch": 2.9441684409746864, "grad_norm": 0.4858594536781311, "learning_rate": 4.284247067273163e-06, "loss": 0.0794, "step": 24890 }, { "epoch": 2.9453513129879347, "grad_norm": 0.3695913851261139, "learning_rate": 4.283528848455829e-06, "loss": 0.081, "step": 24900 }, { "epoch": 2.946534185001183, "grad_norm": 0.45976272225379944, "learning_rate": 4.282810629638497e-06, "loss": 0.0823, "step": 24910 }, { "epoch": 2.947717057014431, "grad_norm": 0.5093495845794678, "learning_rate": 4.282092410821163e-06, "loss": 0.0718, "step": 24920 }, { "epoch": 2.948899929027679, "grad_norm": 0.6196326613426208, "learning_rate": 4.281374192003831e-06, "loss": 0.0869, "step": 24930 }, { "epoch": 2.9500828010409275, "grad_norm": 0.48339584469795227, "learning_rate": 4.280655973186497e-06, "loss": 0.0739, "step": 24940 }, { "epoch": 2.9512656730541753, "grad_norm": 0.4757029712200165, "learning_rate": 4.279937754369165e-06, "loss": 0.0805, "step": 24950 }, { "epoch": 2.9524485450674236, "grad_norm": 0.46414715051651, "learning_rate": 4.279219535551832e-06, "loss": 0.0846, "step": 24960 }, { "epoch": 2.953631417080672, "grad_norm": 0.39380383491516113, "learning_rate": 4.278501316734499e-06, "loss": 0.0826, "step": 24970 }, { "epoch": 2.95481428909392, "grad_norm": 0.3971128463745117, "learning_rate": 4.2777830979171655e-06, "loss": 0.077, "step": 24980 }, { "epoch": 2.9559971611071685, "grad_norm": 0.4091704487800598, "learning_rate": 4.2770648790998324e-06, "loss": 0.0782, "step": 24990 }, { "epoch": 2.9571800331204163, "grad_norm": 0.3834697902202606, "learning_rate": 4.276346660282499e-06, "loss": 0.0828, "step": 25000 }, { "epoch": 2.9583629051336646, "grad_norm": 0.47628435492515564, "learning_rate": 4.275628441465166e-06, "loss": 0.0869, "step": 25010 }, { "epoch": 2.9595457771469125, "grad_norm": 0.4734494090080261, "learning_rate": 4.274910222647833e-06, "loss": 0.0732, "step": 25020 }, { "epoch": 2.9607286491601608, "grad_norm": 0.4939827024936676, "learning_rate": 4.2741920038305e-06, "loss": 0.0844, "step": 25030 }, { "epoch": 2.961911521173409, "grad_norm": 0.39582183957099915, "learning_rate": 4.273473785013168e-06, "loss": 0.0874, "step": 25040 }, { "epoch": 2.9630943931866573, "grad_norm": 0.474837988615036, "learning_rate": 4.272755566195835e-06, "loss": 0.0782, "step": 25050 }, { "epoch": 2.9642772651999056, "grad_norm": 0.3759056031703949, "learning_rate": 4.272037347378502e-06, "loss": 0.0761, "step": 25060 }, { "epoch": 2.9654601372131535, "grad_norm": 0.4801785349845886, "learning_rate": 4.271319128561169e-06, "loss": 0.0701, "step": 25070 }, { "epoch": 2.966643009226402, "grad_norm": 0.3665676712989807, "learning_rate": 4.270600909743836e-06, "loss": 0.0726, "step": 25080 }, { "epoch": 2.9678258812396496, "grad_norm": 0.5724700689315796, "learning_rate": 4.2698826909265025e-06, "loss": 0.0887, "step": 25090 }, { "epoch": 2.969008753252898, "grad_norm": 0.4849093556404114, "learning_rate": 4.2691644721091694e-06, "loss": 0.0797, "step": 25100 }, { "epoch": 2.970191625266146, "grad_norm": 0.5360398888587952, "learning_rate": 4.268446253291836e-06, "loss": 0.0792, "step": 25110 }, { "epoch": 2.9713744972793945, "grad_norm": 0.4688781499862671, "learning_rate": 4.267728034474503e-06, "loss": 0.0769, "step": 25120 }, { "epoch": 2.972557369292643, "grad_norm": 0.44842132925987244, "learning_rate": 4.26700981565717e-06, "loss": 0.0847, "step": 25130 }, { "epoch": 2.9737402413058907, "grad_norm": 0.5162602066993713, "learning_rate": 4.266291596839837e-06, "loss": 0.0819, "step": 25140 }, { "epoch": 2.974923113319139, "grad_norm": 0.5361006259918213, "learning_rate": 4.265573378022504e-06, "loss": 0.0824, "step": 25150 }, { "epoch": 2.976105985332387, "grad_norm": 0.3910074234008789, "learning_rate": 4.264855159205172e-06, "loss": 0.08, "step": 25160 }, { "epoch": 2.977288857345635, "grad_norm": 0.43428680300712585, "learning_rate": 4.264136940387838e-06, "loss": 0.09, "step": 25170 }, { "epoch": 2.9784717293588834, "grad_norm": 0.3885464072227478, "learning_rate": 4.263418721570506e-06, "loss": 0.0758, "step": 25180 }, { "epoch": 2.9796546013721317, "grad_norm": 0.45806393027305603, "learning_rate": 4.262700502753172e-06, "loss": 0.0786, "step": 25190 }, { "epoch": 2.98083747338538, "grad_norm": 0.393555223941803, "learning_rate": 4.2619822839358395e-06, "loss": 0.0787, "step": 25200 }, { "epoch": 2.982020345398628, "grad_norm": 0.4442451596260071, "learning_rate": 4.261264065118506e-06, "loss": 0.0721, "step": 25210 }, { "epoch": 2.983203217411876, "grad_norm": 0.39381980895996094, "learning_rate": 4.260545846301173e-06, "loss": 0.0756, "step": 25220 }, { "epoch": 2.984386089425124, "grad_norm": 0.43242543935775757, "learning_rate": 4.25982762748384e-06, "loss": 0.0779, "step": 25230 }, { "epoch": 2.9855689614383722, "grad_norm": 0.5106036067008972, "learning_rate": 4.259109408666507e-06, "loss": 0.082, "step": 25240 }, { "epoch": 2.9867518334516205, "grad_norm": 0.5076878666877747, "learning_rate": 4.258391189849174e-06, "loss": 0.0783, "step": 25250 }, { "epoch": 2.987934705464869, "grad_norm": 0.4833449125289917, "learning_rate": 4.257672971031841e-06, "loss": 0.0781, "step": 25260 }, { "epoch": 2.989117577478117, "grad_norm": 0.5515257120132446, "learning_rate": 4.256954752214508e-06, "loss": 0.0806, "step": 25270 }, { "epoch": 2.990300449491365, "grad_norm": 0.3785024583339691, "learning_rate": 4.256236533397175e-06, "loss": 0.0764, "step": 25280 }, { "epoch": 2.9914833215046133, "grad_norm": 0.45574840903282166, "learning_rate": 4.255518314579843e-06, "loss": 0.0798, "step": 25290 }, { "epoch": 2.992666193517861, "grad_norm": 0.5191769599914551, "learning_rate": 4.254800095762509e-06, "loss": 0.0818, "step": 25300 }, { "epoch": 2.9938490655311094, "grad_norm": 0.3385672867298126, "learning_rate": 4.2540818769451765e-06, "loss": 0.081, "step": 25310 }, { "epoch": 2.9950319375443577, "grad_norm": 0.5107899308204651, "learning_rate": 4.253363658127843e-06, "loss": 0.0829, "step": 25320 }, { "epoch": 2.996214809557606, "grad_norm": 0.5128483176231384, "learning_rate": 4.25264543931051e-06, "loss": 0.0824, "step": 25330 }, { "epoch": 2.9973976815708543, "grad_norm": 0.39739060401916504, "learning_rate": 4.251927220493177e-06, "loss": 0.0776, "step": 25340 }, { "epoch": 2.998580553584102, "grad_norm": 0.4617627263069153, "learning_rate": 4.251209001675844e-06, "loss": 0.0768, "step": 25350 }, { "epoch": 2.9997634255973504, "grad_norm": 0.4558548033237457, "learning_rate": 4.250490782858511e-06, "loss": 0.0838, "step": 25360 }, { "epoch": 3.0007097232079487, "eval_accuracy": 0.6889576471371062, "eval_animal_abuse/accuracy": 0.994510430182653, "eval_animal_abuse/f1": 0.7730398899587345, "eval_animal_abuse/fpr": 0.0034328408440749783, "eval_animal_abuse/precision": 0.7336814621409922, "eval_animal_abuse/recall": 0.8168604651162791, "eval_animal_abuse/threshold": 0.5, "eval_child_abuse/accuracy": 0.9964234620886981, "eval_child_abuse/f1": 0.6570972886762361, "eval_child_abuse/fpr": 0.0014720396112477183, "eval_child_abuse/precision": 0.7006802721088435, "eval_child_abuse/recall": 0.6186186186186187, "eval_child_abuse/threshold": 0.5, "eval_controversial_topics,politics/accuracy": 0.9715041421299531, "eval_controversial_topics,politics/f1": 0.46850760161340366, "eval_controversial_topics,politics/fpr": 0.010742723778143858, "eval_controversial_topics,politics/precision": 0.5467052860246199, "eval_controversial_topics,politics/recall": 0.40988056460369166, "eval_controversial_topics,politics/threshold": 0.5, "eval_discrimination,stereotype,injustice/accuracy": 0.9564327777223276, "eval_discrimination,stereotype,injustice/f1": 0.7228864670405248, "eval_discrimination,stereotype,injustice/fpr": 0.02268044963313691, "eval_discrimination,stereotype,injustice/precision": 0.7313209162920146, "eval_discrimination,stereotype,injustice/recall": 0.7146443514644352, "eval_discrimination,stereotype,injustice/threshold": 0.5, "eval_drug_abuse,weapons,banned_substance/accuracy": 0.974199021858469, "eval_drug_abuse,weapons,banned_substance/f1": 0.7740713765477057, "eval_drug_abuse,weapons,banned_substance/fpr": 0.014490198843604543, "eval_drug_abuse,weapons,banned_substance/precision": 0.7637252083932164, "eval_drug_abuse,weapons,banned_substance/recall": 0.7847017129356173, "eval_drug_abuse,weapons,banned_substance/threshold": 0.5, "eval_financial_crime,property_crime,theft/accuracy": 0.9601091259939448, "eval_financial_crime,property_crime,theft/f1": 0.8050723459600065, "eval_financial_crime,property_crime,theft/fpr": 0.02762471665775938, "eval_financial_crime,property_crime,theft/precision": 0.7676329251278872, "eval_financial_crime,property_crime,theft/recall": 0.8463510511023756, "eval_financial_crime,property_crime,theft/threshold": 0.5, "eval_flagged/accuracy": 0.8562065409056127, "eval_flagged/aucpr": 0.9090708301585808, "eval_flagged/f1": 0.8688912482936447, "eval_flagged/fpr": 0.14386978209503754, "eval_flagged/precision": 0.8818929154222729, "eval_flagged/recall": 0.8562673761621477, "eval_hate_speech,offensive_language/accuracy": 0.9506271417639818, "eval_hate_speech,offensive_language/f1": 0.7009873060648801, "eval_hate_speech,offensive_language/fpr": 0.019422620135209172, "eval_hate_speech,offensive_language/precision": 0.765962131219727, "eval_hate_speech,offensive_language/recall": 0.6461738484398217, "eval_hate_speech,offensive_language/threshold": 0.5, "eval_loss": 0.08228794485330582, "eval_macro_f1": 0.6418844118541835, "eval_macro_precision": 0.704528612500405, "eval_macro_recall": 0.6272299800362929, "eval_micro_f1": 0.7567282129720675, "eval_micro_precision": 0.7753502863859401, "eval_micro_recall": 0.7389796753027367, "eval_misinformation_regarding_ethics,laws_and_safety/accuracy": 0.9878730412216788, "eval_misinformation_regarding_ethics,laws_and_safety/f1": 0.07369758576874205, "eval_misinformation_regarding_ethics,laws_and_safety/fpr": 0.00045467558055335624, "eval_misinformation_regarding_ethics,laws_and_safety/precision": 0.5178571428571429, "eval_misinformation_regarding_ethics,laws_and_safety/recall": 0.03967168262653899, "eval_misinformation_regarding_ethics,laws_and_safety/threshold": 0.5, "eval_non_violent_unethical_behavior/accuracy": 0.8879628705459627, "eval_non_violent_unethical_behavior/f1": 0.6949590108247656, "eval_non_violent_unethical_behavior/fpr": 0.05109201129380491, "eval_non_violent_unethical_behavior/precision": 0.7571301687555512, "eval_non_violent_unethical_behavior/recall": 0.642223338355935, "eval_non_violent_unethical_behavior/threshold": 0.5, "eval_privacy_violation/accuracy": 0.9808530458794956, "eval_privacy_violation/f1": 0.8130583076173461, "eval_privacy_violation/fpr": 0.012038916497515203, "eval_privacy_violation/precision": 0.784393607019743, "eval_privacy_violation/recall": 0.8438975050573162, "eval_privacy_violation/threshold": 0.5, "eval_runtime": 598.4058, "eval_samples_per_second": 100.457, "eval_self_harm/accuracy": 0.9965399075090661, "eval_self_harm/f1": 0.7360406091370558, "eval_self_harm/fpr": 0.0014739380946000244, "eval_self_harm/precision": 0.7671957671957672, "eval_self_harm/recall": 0.7073170731707317, "eval_self_harm/threshold": 0.5, "eval_sexually_explicit,adult_content/accuracy": 0.984296503310377, "eval_sexually_explicit,adult_content/f1": 0.678254942058623, "eval_sexually_explicit,adult_content/fpr": 0.00838631598684097, "eval_sexually_explicit,adult_content/precision": 0.6691324815063887, "eval_sexually_explicit,adult_content/recall": 0.6876295784381479, "eval_sexually_explicit,adult_content/threshold": 0.5, "eval_steps_per_second": 1.571, "eval_terrorism,organized_crime/accuracy": 0.9920817114149783, "eval_terrorism,organized_crime/f1": 0.23225806451612904, "eval_terrorism,organized_crime/fpr": 0.001123538980094912, "eval_terrorism,organized_crime/precision": 0.5179856115107914, "eval_terrorism,organized_crime/recall": 0.1496881496881497, "eval_terrorism,organized_crime/threshold": 0.5, "eval_violence,aiding_and_abetting,incitement/accuracy": 0.9220980137738297, "eval_violence,aiding_and_abetting,incitement/f1": 0.8564509701744168, "eval_violence,aiding_and_abetting,incitement/fpr": 0.06031004940845822, "eval_violence,aiding_and_abetting,incitement/precision": 0.8399975948529854, "eval_violence,aiding_and_abetting,incitement/recall": 0.8735617808904452, "eval_violence,aiding_and_abetting,incitement/threshold": 0.5, "step": 25368 }, { "epoch": 3.0009462976105987, "grad_norm": 0.3990446925163269, "learning_rate": 4.249772564041178e-06, "loss": 0.0733, "step": 25370 }, { "epoch": 3.0021291696238466, "grad_norm": 0.3921988010406494, "learning_rate": 4.249054345223845e-06, "loss": 0.0821, "step": 25380 }, { "epoch": 3.003312041637095, "grad_norm": 0.4501315653324127, "learning_rate": 4.248336126406512e-06, "loss": 0.0722, "step": 25390 }, { "epoch": 3.004494913650343, "grad_norm": 0.4528018534183502, "learning_rate": 4.247617907589179e-06, "loss": 0.0763, "step": 25400 }, { "epoch": 3.005677785663591, "grad_norm": 0.4007049798965454, "learning_rate": 4.246899688771846e-06, "loss": 0.0747, "step": 25410 }, { "epoch": 3.0068606576768393, "grad_norm": 0.3600677251815796, "learning_rate": 4.246181469954513e-06, "loss": 0.0775, "step": 25420 }, { "epoch": 3.0080435296900876, "grad_norm": 0.5421508550643921, "learning_rate": 4.24546325113718e-06, "loss": 0.0846, "step": 25430 }, { "epoch": 3.009226401703336, "grad_norm": 0.5508347153663635, "learning_rate": 4.2447450323198465e-06, "loss": 0.0777, "step": 25440 }, { "epoch": 3.0104092737165837, "grad_norm": 0.43491658568382263, "learning_rate": 4.244026813502514e-06, "loss": 0.0749, "step": 25450 }, { "epoch": 3.011592145729832, "grad_norm": 0.5365955829620361, "learning_rate": 4.24330859468518e-06, "loss": 0.0827, "step": 25460 }, { "epoch": 3.0127750177430803, "grad_norm": 0.5059813261032104, "learning_rate": 4.242590375867848e-06, "loss": 0.0857, "step": 25470 }, { "epoch": 3.013957889756328, "grad_norm": 0.6406557559967041, "learning_rate": 4.241872157050514e-06, "loss": 0.0826, "step": 25480 }, { "epoch": 3.0151407617695765, "grad_norm": 0.3868311047554016, "learning_rate": 4.241153938233182e-06, "loss": 0.077, "step": 25490 }, { "epoch": 3.0163236337828248, "grad_norm": 0.4545860290527344, "learning_rate": 4.240435719415849e-06, "loss": 0.0689, "step": 25500 }, { "epoch": 3.017506505796073, "grad_norm": 0.6880464553833008, "learning_rate": 4.239717500598516e-06, "loss": 0.0744, "step": 25510 }, { "epoch": 3.018689377809321, "grad_norm": 0.521814227104187, "learning_rate": 4.238999281781183e-06, "loss": 0.0781, "step": 25520 }, { "epoch": 3.019872249822569, "grad_norm": 0.372196227312088, "learning_rate": 4.23828106296385e-06, "loss": 0.0708, "step": 25530 }, { "epoch": 3.0210551218358175, "grad_norm": 0.4654785990715027, "learning_rate": 4.237562844146517e-06, "loss": 0.0774, "step": 25540 }, { "epoch": 3.0222379938490653, "grad_norm": 0.49794623255729675, "learning_rate": 4.2368446253291835e-06, "loss": 0.0809, "step": 25550 }, { "epoch": 3.0234208658623136, "grad_norm": 0.4278413951396942, "learning_rate": 4.236126406511851e-06, "loss": 0.0753, "step": 25560 }, { "epoch": 3.024603737875562, "grad_norm": 0.4262978136539459, "learning_rate": 4.235408187694517e-06, "loss": 0.0734, "step": 25570 }, { "epoch": 3.02578660988881, "grad_norm": 0.6792017817497253, "learning_rate": 4.234689968877185e-06, "loss": 0.0795, "step": 25580 }, { "epoch": 3.026969481902058, "grad_norm": 0.5204272866249084, "learning_rate": 4.233971750059851e-06, "loss": 0.0762, "step": 25590 }, { "epoch": 3.0281523539153063, "grad_norm": 0.49102869629859924, "learning_rate": 4.233253531242519e-06, "loss": 0.0814, "step": 25600 }, { "epoch": 3.0293352259285546, "grad_norm": 0.5146456956863403, "learning_rate": 4.232535312425186e-06, "loss": 0.0716, "step": 25610 }, { "epoch": 3.0305180979418025, "grad_norm": 0.466389924287796, "learning_rate": 4.231817093607853e-06, "loss": 0.0766, "step": 25620 }, { "epoch": 3.031700969955051, "grad_norm": 0.48365557193756104, "learning_rate": 4.23109887479052e-06, "loss": 0.0742, "step": 25630 }, { "epoch": 3.032883841968299, "grad_norm": 0.4823105037212372, "learning_rate": 4.230380655973187e-06, "loss": 0.0845, "step": 25640 }, { "epoch": 3.0340667139815474, "grad_norm": 0.6901135444641113, "learning_rate": 4.2296624371558536e-06, "loss": 0.0765, "step": 25650 }, { "epoch": 3.035249585994795, "grad_norm": 0.5461510419845581, "learning_rate": 4.2289442183385205e-06, "loss": 0.0693, "step": 25660 }, { "epoch": 3.0364324580080435, "grad_norm": 0.4707520008087158, "learning_rate": 4.228225999521187e-06, "loss": 0.0797, "step": 25670 }, { "epoch": 3.037615330021292, "grad_norm": 0.4101470708847046, "learning_rate": 4.227507780703854e-06, "loss": 0.0727, "step": 25680 }, { "epoch": 3.0387982020345397, "grad_norm": 0.5878470540046692, "learning_rate": 4.226789561886521e-06, "loss": 0.0756, "step": 25690 }, { "epoch": 3.039981074047788, "grad_norm": 0.5163437724113464, "learning_rate": 4.226071343069189e-06, "loss": 0.0695, "step": 25700 }, { "epoch": 3.0411639460610362, "grad_norm": 0.34845077991485596, "learning_rate": 4.225353124251855e-06, "loss": 0.0768, "step": 25710 }, { "epoch": 3.0423468180742845, "grad_norm": 0.3500116169452667, "learning_rate": 4.224634905434523e-06, "loss": 0.0747, "step": 25720 }, { "epoch": 3.0435296900875324, "grad_norm": 0.4491555690765381, "learning_rate": 4.223916686617189e-06, "loss": 0.0688, "step": 25730 }, { "epoch": 3.0447125621007807, "grad_norm": 0.5037168264389038, "learning_rate": 4.223198467799857e-06, "loss": 0.0761, "step": 25740 }, { "epoch": 3.045895434114029, "grad_norm": 0.5036609768867493, "learning_rate": 4.222480248982523e-06, "loss": 0.074, "step": 25750 }, { "epoch": 3.047078306127277, "grad_norm": 0.5039355754852295, "learning_rate": 4.2217620301651906e-06, "loss": 0.0762, "step": 25760 }, { "epoch": 3.048261178140525, "grad_norm": 0.4770478308200836, "learning_rate": 4.2210438113478575e-06, "loss": 0.0782, "step": 25770 }, { "epoch": 3.0494440501537734, "grad_norm": 0.3701343834400177, "learning_rate": 4.220325592530524e-06, "loss": 0.0795, "step": 25780 }, { "epoch": 3.0506269221670217, "grad_norm": 0.4925128221511841, "learning_rate": 4.219607373713191e-06, "loss": 0.0757, "step": 25790 }, { "epoch": 3.0518097941802695, "grad_norm": 0.45410844683647156, "learning_rate": 4.218889154895858e-06, "loss": 0.0735, "step": 25800 }, { "epoch": 3.052992666193518, "grad_norm": 0.41098400950431824, "learning_rate": 4.218170936078526e-06, "loss": 0.0751, "step": 25810 }, { "epoch": 3.054175538206766, "grad_norm": 0.5858665704727173, "learning_rate": 4.217452717261192e-06, "loss": 0.0825, "step": 25820 }, { "epoch": 3.055358410220014, "grad_norm": 0.6130269765853882, "learning_rate": 4.21673449844386e-06, "loss": 0.0785, "step": 25830 }, { "epoch": 3.0565412822332623, "grad_norm": 0.5176180601119995, "learning_rate": 4.216016279626526e-06, "loss": 0.08, "step": 25840 }, { "epoch": 3.0577241542465106, "grad_norm": 0.4414305090904236, "learning_rate": 4.215298060809194e-06, "loss": 0.0681, "step": 25850 }, { "epoch": 3.058907026259759, "grad_norm": 0.4602257311344147, "learning_rate": 4.21457984199186e-06, "loss": 0.0769, "step": 25860 }, { "epoch": 3.0600898982730067, "grad_norm": 0.6573535203933716, "learning_rate": 4.2138616231745276e-06, "loss": 0.0684, "step": 25870 }, { "epoch": 3.061272770286255, "grad_norm": 0.5486814975738525, "learning_rate": 4.2131434043571945e-06, "loss": 0.0746, "step": 25880 }, { "epoch": 3.0624556422995033, "grad_norm": 0.5108104944229126, "learning_rate": 4.212425185539861e-06, "loss": 0.0851, "step": 25890 }, { "epoch": 3.0636385143127516, "grad_norm": 0.45864468812942505, "learning_rate": 4.211706966722528e-06, "loss": 0.0777, "step": 25900 }, { "epoch": 3.0648213863259994, "grad_norm": 0.3681281805038452, "learning_rate": 4.210988747905195e-06, "loss": 0.0717, "step": 25910 }, { "epoch": 3.0660042583392477, "grad_norm": 0.5826372504234314, "learning_rate": 4.210270529087862e-06, "loss": 0.0782, "step": 25920 }, { "epoch": 3.067187130352496, "grad_norm": 0.5170095562934875, "learning_rate": 4.209552310270529e-06, "loss": 0.0789, "step": 25930 }, { "epoch": 3.068370002365744, "grad_norm": 0.490077406167984, "learning_rate": 4.208834091453196e-06, "loss": 0.0706, "step": 25940 }, { "epoch": 3.069552874378992, "grad_norm": 0.43391498923301697, "learning_rate": 4.208115872635863e-06, "loss": 0.0775, "step": 25950 }, { "epoch": 3.0707357463922405, "grad_norm": 0.5557274222373962, "learning_rate": 4.20739765381853e-06, "loss": 0.0793, "step": 25960 }, { "epoch": 3.0719186184054887, "grad_norm": 0.43127337098121643, "learning_rate": 4.206679435001198e-06, "loss": 0.0782, "step": 25970 }, { "epoch": 3.0731014904187366, "grad_norm": 0.5160218477249146, "learning_rate": 4.205961216183864e-06, "loss": 0.078, "step": 25980 }, { "epoch": 3.074284362431985, "grad_norm": 0.45702797174453735, "learning_rate": 4.2052429973665315e-06, "loss": 0.077, "step": 25990 }, { "epoch": 3.075467234445233, "grad_norm": 0.47716906666755676, "learning_rate": 4.2045247785491976e-06, "loss": 0.0768, "step": 26000 }, { "epoch": 3.076650106458481, "grad_norm": 0.794670581817627, "learning_rate": 4.203806559731865e-06, "loss": 0.0872, "step": 26010 }, { "epoch": 3.0778329784717293, "grad_norm": 0.5852227210998535, "learning_rate": 4.203088340914531e-06, "loss": 0.0761, "step": 26020 }, { "epoch": 3.0790158504849776, "grad_norm": 0.5020566582679749, "learning_rate": 4.202370122097199e-06, "loss": 0.0721, "step": 26030 }, { "epoch": 3.080198722498226, "grad_norm": 0.4581356346607208, "learning_rate": 4.201651903279866e-06, "loss": 0.0757, "step": 26040 }, { "epoch": 3.0813815945114738, "grad_norm": 0.5539992451667786, "learning_rate": 4.200933684462533e-06, "loss": 0.0788, "step": 26050 }, { "epoch": 3.082564466524722, "grad_norm": 0.40849658846855164, "learning_rate": 4.2002154656452e-06, "loss": 0.0772, "step": 26060 }, { "epoch": 3.0837473385379703, "grad_norm": 0.5400962233543396, "learning_rate": 4.199497246827867e-06, "loss": 0.077, "step": 26070 }, { "epoch": 3.084930210551218, "grad_norm": 0.4248446524143219, "learning_rate": 4.198779028010535e-06, "loss": 0.0795, "step": 26080 }, { "epoch": 3.0861130825644665, "grad_norm": 0.46899518370628357, "learning_rate": 4.198060809193201e-06, "loss": 0.079, "step": 26090 }, { "epoch": 3.0872959545777148, "grad_norm": 0.6625282764434814, "learning_rate": 4.1973425903758685e-06, "loss": 0.0795, "step": 26100 }, { "epoch": 3.088478826590963, "grad_norm": 0.43462100625038147, "learning_rate": 4.1966243715585346e-06, "loss": 0.073, "step": 26110 }, { "epoch": 3.089661698604211, "grad_norm": 0.5249341726303101, "learning_rate": 4.195906152741202e-06, "loss": 0.0733, "step": 26120 }, { "epoch": 3.090844570617459, "grad_norm": 0.3629348576068878, "learning_rate": 4.195187933923868e-06, "loss": 0.0718, "step": 26130 }, { "epoch": 3.0920274426307075, "grad_norm": 0.5012208223342896, "learning_rate": 4.194469715106536e-06, "loss": 0.0815, "step": 26140 }, { "epoch": 3.0932103146439553, "grad_norm": 0.509267270565033, "learning_rate": 4.193751496289203e-06, "loss": 0.0716, "step": 26150 }, { "epoch": 3.0943931866572036, "grad_norm": 0.5374495387077332, "learning_rate": 4.19303327747187e-06, "loss": 0.0716, "step": 26160 }, { "epoch": 3.095576058670452, "grad_norm": 0.46500200033187866, "learning_rate": 4.192315058654537e-06, "loss": 0.0867, "step": 26170 }, { "epoch": 3.0967589306837002, "grad_norm": 0.4275670647621155, "learning_rate": 4.191596839837204e-06, "loss": 0.0786, "step": 26180 }, { "epoch": 3.097941802696948, "grad_norm": 0.5296131372451782, "learning_rate": 4.190878621019871e-06, "loss": 0.0747, "step": 26190 }, { "epoch": 3.0991246747101964, "grad_norm": 0.49134957790374756, "learning_rate": 4.190160402202538e-06, "loss": 0.0774, "step": 26200 }, { "epoch": 3.1003075467234447, "grad_norm": 0.5478315353393555, "learning_rate": 4.189442183385205e-06, "loss": 0.0773, "step": 26210 }, { "epoch": 3.1014904187366925, "grad_norm": 0.46971628069877625, "learning_rate": 4.1887239645678716e-06, "loss": 0.0687, "step": 26220 }, { "epoch": 3.102673290749941, "grad_norm": 0.4163883626461029, "learning_rate": 4.1880057457505385e-06, "loss": 0.0764, "step": 26230 }, { "epoch": 3.103856162763189, "grad_norm": 0.5021790266036987, "learning_rate": 4.187287526933205e-06, "loss": 0.0767, "step": 26240 }, { "epoch": 3.1050390347764374, "grad_norm": 0.5332714319229126, "learning_rate": 4.186569308115872e-06, "loss": 0.0791, "step": 26250 }, { "epoch": 3.1062219067896852, "grad_norm": 0.6445925235748291, "learning_rate": 4.18585108929854e-06, "loss": 0.0788, "step": 26260 }, { "epoch": 3.1074047788029335, "grad_norm": 0.48212096095085144, "learning_rate": 4.185132870481206e-06, "loss": 0.0778, "step": 26270 }, { "epoch": 3.108587650816182, "grad_norm": 0.5868581533432007, "learning_rate": 4.184414651663874e-06, "loss": 0.0794, "step": 26280 }, { "epoch": 3.1097705228294297, "grad_norm": 0.40161845088005066, "learning_rate": 4.18369643284654e-06, "loss": 0.0803, "step": 26290 }, { "epoch": 3.110953394842678, "grad_norm": 0.42701008915901184, "learning_rate": 4.182978214029208e-06, "loss": 0.0748, "step": 26300 }, { "epoch": 3.1121362668559263, "grad_norm": 0.6539421081542969, "learning_rate": 4.182259995211875e-06, "loss": 0.0755, "step": 26310 }, { "epoch": 3.1133191388691746, "grad_norm": 0.5852883458137512, "learning_rate": 4.181541776394542e-06, "loss": 0.0833, "step": 26320 }, { "epoch": 3.1145020108824224, "grad_norm": 0.45836296677589417, "learning_rate": 4.1808235575772086e-06, "loss": 0.0749, "step": 26330 }, { "epoch": 3.1156848828956707, "grad_norm": 0.4710302948951721, "learning_rate": 4.1801053387598755e-06, "loss": 0.0744, "step": 26340 }, { "epoch": 3.116867754908919, "grad_norm": 0.6011082530021667, "learning_rate": 4.179387119942543e-06, "loss": 0.0857, "step": 26350 }, { "epoch": 3.118050626922167, "grad_norm": 0.4627363681793213, "learning_rate": 4.178668901125209e-06, "loss": 0.0723, "step": 26360 }, { "epoch": 3.119233498935415, "grad_norm": 0.4189067482948303, "learning_rate": 4.177950682307877e-06, "loss": 0.0714, "step": 26370 }, { "epoch": 3.1204163709486634, "grad_norm": 0.40054214000701904, "learning_rate": 4.177232463490543e-06, "loss": 0.0711, "step": 26380 }, { "epoch": 3.1215992429619117, "grad_norm": 0.609768807888031, "learning_rate": 4.176514244673211e-06, "loss": 0.073, "step": 26390 }, { "epoch": 3.1227821149751596, "grad_norm": 0.44757407903671265, "learning_rate": 4.175796025855877e-06, "loss": 0.0728, "step": 26400 }, { "epoch": 3.123964986988408, "grad_norm": 0.4284588098526001, "learning_rate": 4.175077807038545e-06, "loss": 0.0753, "step": 26410 }, { "epoch": 3.125147859001656, "grad_norm": 0.4440842866897583, "learning_rate": 4.174359588221212e-06, "loss": 0.0737, "step": 26420 }, { "epoch": 3.126330731014904, "grad_norm": 0.46968621015548706, "learning_rate": 4.173641369403879e-06, "loss": 0.0736, "step": 26430 }, { "epoch": 3.1275136030281523, "grad_norm": 0.5898688435554504, "learning_rate": 4.1729231505865456e-06, "loss": 0.0761, "step": 26440 }, { "epoch": 3.1286964750414006, "grad_norm": 0.6714507937431335, "learning_rate": 4.1722049317692125e-06, "loss": 0.0763, "step": 26450 }, { "epoch": 3.129879347054649, "grad_norm": 0.39806345105171204, "learning_rate": 4.171486712951879e-06, "loss": 0.082, "step": 26460 }, { "epoch": 3.1310622190678967, "grad_norm": 0.7108418345451355, "learning_rate": 4.170768494134546e-06, "loss": 0.0751, "step": 26470 }, { "epoch": 3.132245091081145, "grad_norm": 0.4301842749118805, "learning_rate": 4.170050275317213e-06, "loss": 0.0804, "step": 26480 }, { "epoch": 3.1334279630943933, "grad_norm": 0.4981456696987152, "learning_rate": 4.16933205649988e-06, "loss": 0.0755, "step": 26490 }, { "epoch": 3.134610835107641, "grad_norm": 0.40608370304107666, "learning_rate": 4.168613837682547e-06, "loss": 0.0758, "step": 26500 }, { "epoch": 3.1357937071208895, "grad_norm": 0.4242158830165863, "learning_rate": 4.167895618865214e-06, "loss": 0.0737, "step": 26510 }, { "epoch": 3.1369765791341377, "grad_norm": 0.6211506128311157, "learning_rate": 4.167177400047881e-06, "loss": 0.077, "step": 26520 }, { "epoch": 3.138159451147386, "grad_norm": 0.4193684756755829, "learning_rate": 4.166459181230549e-06, "loss": 0.0679, "step": 26530 }, { "epoch": 3.139342323160634, "grad_norm": 0.42126336693763733, "learning_rate": 4.165740962413215e-06, "loss": 0.0737, "step": 26540 }, { "epoch": 3.140525195173882, "grad_norm": 0.4125877916812897, "learning_rate": 4.1650227435958825e-06, "loss": 0.0583, "step": 26550 }, { "epoch": 3.1417080671871305, "grad_norm": 0.5140947103500366, "learning_rate": 4.1643045247785495e-06, "loss": 0.0761, "step": 26560 }, { "epoch": 3.1428909392003783, "grad_norm": 0.43297454714775085, "learning_rate": 4.163586305961216e-06, "loss": 0.0798, "step": 26570 }, { "epoch": 3.1440738112136266, "grad_norm": 0.5598752498626709, "learning_rate": 4.162868087143883e-06, "loss": 0.0808, "step": 26580 }, { "epoch": 3.145256683226875, "grad_norm": 0.4987586438655853, "learning_rate": 4.16214986832655e-06, "loss": 0.0749, "step": 26590 }, { "epoch": 3.146439555240123, "grad_norm": 0.4253380000591278, "learning_rate": 4.161431649509217e-06, "loss": 0.0794, "step": 26600 }, { "epoch": 3.147622427253371, "grad_norm": 0.46630245447158813, "learning_rate": 4.160713430691884e-06, "loss": 0.0751, "step": 26610 }, { "epoch": 3.1488052992666193, "grad_norm": 0.5095551013946533, "learning_rate": 4.159995211874552e-06, "loss": 0.0754, "step": 26620 }, { "epoch": 3.1499881712798676, "grad_norm": 0.6077864766120911, "learning_rate": 4.159276993057218e-06, "loss": 0.0776, "step": 26630 }, { "epoch": 3.1511710432931155, "grad_norm": 0.3616574704647064, "learning_rate": 4.158558774239886e-06, "loss": 0.078, "step": 26640 }, { "epoch": 3.1523539153063638, "grad_norm": 0.39439114928245544, "learning_rate": 4.157840555422552e-06, "loss": 0.0636, "step": 26650 }, { "epoch": 3.153536787319612, "grad_norm": 0.4675343930721283, "learning_rate": 4.1571223366052195e-06, "loss": 0.0741, "step": 26660 }, { "epoch": 3.1547196593328604, "grad_norm": 0.4180538058280945, "learning_rate": 4.156404117787886e-06, "loss": 0.0768, "step": 26670 }, { "epoch": 3.155902531346108, "grad_norm": 0.4684538245201111, "learning_rate": 4.155685898970553e-06, "loss": 0.0738, "step": 26680 }, { "epoch": 3.1570854033593565, "grad_norm": 0.43163493275642395, "learning_rate": 4.15496768015322e-06, "loss": 0.0781, "step": 26690 }, { "epoch": 3.158268275372605, "grad_norm": 0.5112494826316833, "learning_rate": 4.154249461335887e-06, "loss": 0.0765, "step": 26700 }, { "epoch": 3.1594511473858526, "grad_norm": 0.559335470199585, "learning_rate": 4.153531242518554e-06, "loss": 0.0755, "step": 26710 }, { "epoch": 3.160634019399101, "grad_norm": 0.528243362903595, "learning_rate": 4.152813023701221e-06, "loss": 0.0776, "step": 26720 }, { "epoch": 3.1618168914123492, "grad_norm": 0.4130913317203522, "learning_rate": 4.152094804883888e-06, "loss": 0.0785, "step": 26730 }, { "epoch": 3.1629997634255975, "grad_norm": 0.5099791884422302, "learning_rate": 4.151376586066555e-06, "loss": 0.0726, "step": 26740 }, { "epoch": 3.1641826354388454, "grad_norm": 0.5442439913749695, "learning_rate": 4.150658367249222e-06, "loss": 0.0726, "step": 26750 }, { "epoch": 3.1653655074520937, "grad_norm": 0.49684953689575195, "learning_rate": 4.149940148431889e-06, "loss": 0.0766, "step": 26760 }, { "epoch": 3.166548379465342, "grad_norm": 0.5063852071762085, "learning_rate": 4.149221929614556e-06, "loss": 0.0727, "step": 26770 }, { "epoch": 3.16773125147859, "grad_norm": 0.5242724418640137, "learning_rate": 4.148503710797223e-06, "loss": 0.0768, "step": 26780 }, { "epoch": 3.168914123491838, "grad_norm": 0.46463802456855774, "learning_rate": 4.1477854919798895e-06, "loss": 0.075, "step": 26790 }, { "epoch": 3.1700969955050864, "grad_norm": 0.47540661692619324, "learning_rate": 4.147067273162557e-06, "loss": 0.0787, "step": 26800 }, { "epoch": 3.1712798675183347, "grad_norm": 0.4728701412677765, "learning_rate": 4.146349054345224e-06, "loss": 0.0708, "step": 26810 }, { "epoch": 3.1724627395315825, "grad_norm": 0.48258593678474426, "learning_rate": 4.145630835527891e-06, "loss": 0.0765, "step": 26820 }, { "epoch": 3.173645611544831, "grad_norm": 0.5328062772750854, "learning_rate": 4.144912616710558e-06, "loss": 0.0777, "step": 26830 }, { "epoch": 3.174828483558079, "grad_norm": 0.5634903311729431, "learning_rate": 4.144194397893225e-06, "loss": 0.0797, "step": 26840 }, { "epoch": 3.176011355571327, "grad_norm": 0.3925206661224365, "learning_rate": 4.143476179075892e-06, "loss": 0.0836, "step": 26850 }, { "epoch": 3.1771942275845753, "grad_norm": 0.5485296845436096, "learning_rate": 4.142757960258559e-06, "loss": 0.0764, "step": 26860 }, { "epoch": 3.1783770995978236, "grad_norm": 0.5918638706207275, "learning_rate": 4.142039741441226e-06, "loss": 0.0748, "step": 26870 }, { "epoch": 3.179559971611072, "grad_norm": 0.6178056597709656, "learning_rate": 4.141321522623893e-06, "loss": 0.0739, "step": 26880 }, { "epoch": 3.1807428436243197, "grad_norm": 0.4763590693473816, "learning_rate": 4.1406033038065605e-06, "loss": 0.075, "step": 26890 }, { "epoch": 3.181925715637568, "grad_norm": 0.3556731045246124, "learning_rate": 4.1398850849892265e-06, "loss": 0.0763, "step": 26900 }, { "epoch": 3.1831085876508163, "grad_norm": 0.465175598859787, "learning_rate": 4.139166866171894e-06, "loss": 0.0737, "step": 26910 }, { "epoch": 3.184291459664064, "grad_norm": 0.39644134044647217, "learning_rate": 4.13844864735456e-06, "loss": 0.0766, "step": 26920 }, { "epoch": 3.1854743316773124, "grad_norm": 0.5461254715919495, "learning_rate": 4.137730428537228e-06, "loss": 0.0799, "step": 26930 }, { "epoch": 3.1866572036905607, "grad_norm": 0.37270936369895935, "learning_rate": 4.137012209719894e-06, "loss": 0.0666, "step": 26940 }, { "epoch": 3.187840075703809, "grad_norm": 0.47572383284568787, "learning_rate": 4.136293990902562e-06, "loss": 0.0737, "step": 26950 }, { "epoch": 3.189022947717057, "grad_norm": 0.5914353132247925, "learning_rate": 4.135575772085229e-06, "loss": 0.0738, "step": 26960 }, { "epoch": 3.190205819730305, "grad_norm": 0.47734150290489197, "learning_rate": 4.134857553267896e-06, "loss": 0.0783, "step": 26970 }, { "epoch": 3.1913886917435534, "grad_norm": 0.3511407971382141, "learning_rate": 4.134139334450563e-06, "loss": 0.0713, "step": 26980 }, { "epoch": 3.1925715637568013, "grad_norm": 0.5570262670516968, "learning_rate": 4.13342111563323e-06, "loss": 0.0811, "step": 26990 }, { "epoch": 3.1937544357700496, "grad_norm": 0.5203074812889099, "learning_rate": 4.132702896815897e-06, "loss": 0.0788, "step": 27000 }, { "epoch": 3.194937307783298, "grad_norm": 0.4395122528076172, "learning_rate": 4.1319846779985635e-06, "loss": 0.0744, "step": 27010 }, { "epoch": 3.196120179796546, "grad_norm": 0.46549537777900696, "learning_rate": 4.1312664591812305e-06, "loss": 0.0709, "step": 27020 }, { "epoch": 3.197303051809794, "grad_norm": 0.5933847427368164, "learning_rate": 4.130548240363897e-06, "loss": 0.0708, "step": 27030 }, { "epoch": 3.1984859238230423, "grad_norm": 0.4680193364620209, "learning_rate": 4.129830021546564e-06, "loss": 0.0748, "step": 27040 }, { "epoch": 3.1996687958362906, "grad_norm": 0.6048795580863953, "learning_rate": 4.129111802729231e-06, "loss": 0.0763, "step": 27050 }, { "epoch": 3.2008516678495385, "grad_norm": 0.4343719780445099, "learning_rate": 4.128393583911899e-06, "loss": 0.08, "step": 27060 }, { "epoch": 3.2020345398627867, "grad_norm": 0.45877668261528015, "learning_rate": 4.127675365094566e-06, "loss": 0.0794, "step": 27070 }, { "epoch": 3.203217411876035, "grad_norm": 0.5358856916427612, "learning_rate": 4.126957146277233e-06, "loss": 0.0797, "step": 27080 }, { "epoch": 3.2044002838892833, "grad_norm": 0.44452333450317383, "learning_rate": 4.1262389274599e-06, "loss": 0.0736, "step": 27090 }, { "epoch": 3.205583155902531, "grad_norm": 0.43365758657455444, "learning_rate": 4.125520708642567e-06, "loss": 0.0709, "step": 27100 }, { "epoch": 3.2067660279157795, "grad_norm": 0.4476073086261749, "learning_rate": 4.124802489825234e-06, "loss": 0.0642, "step": 27110 }, { "epoch": 3.2079488999290278, "grad_norm": 0.529181182384491, "learning_rate": 4.1240842710079005e-06, "loss": 0.0771, "step": 27120 }, { "epoch": 3.2091317719422756, "grad_norm": 0.586692750453949, "learning_rate": 4.1233660521905675e-06, "loss": 0.0853, "step": 27130 }, { "epoch": 3.210314643955524, "grad_norm": 0.3644962012767792, "learning_rate": 4.122647833373234e-06, "loss": 0.0786, "step": 27140 }, { "epoch": 3.211497515968772, "grad_norm": 0.47340214252471924, "learning_rate": 4.121929614555901e-06, "loss": 0.0701, "step": 27150 }, { "epoch": 3.2126803879820205, "grad_norm": 0.49624103307724, "learning_rate": 4.121211395738568e-06, "loss": 0.0783, "step": 27160 }, { "epoch": 3.2138632599952683, "grad_norm": 0.5268238186836243, "learning_rate": 4.120493176921235e-06, "loss": 0.0719, "step": 27170 }, { "epoch": 3.2150461320085166, "grad_norm": 0.36799025535583496, "learning_rate": 4.119774958103903e-06, "loss": 0.0786, "step": 27180 }, { "epoch": 3.216229004021765, "grad_norm": 0.5110949277877808, "learning_rate": 4.119056739286569e-06, "loss": 0.0775, "step": 27190 }, { "epoch": 3.217411876035013, "grad_norm": 0.5561712980270386, "learning_rate": 4.118338520469237e-06, "loss": 0.0812, "step": 27200 }, { "epoch": 3.218594748048261, "grad_norm": 0.4782586693763733, "learning_rate": 4.117620301651903e-06, "loss": 0.0829, "step": 27210 }, { "epoch": 3.2197776200615094, "grad_norm": 0.47894081473350525, "learning_rate": 4.116902082834571e-06, "loss": 0.0743, "step": 27220 }, { "epoch": 3.2209604920747577, "grad_norm": 0.5971357822418213, "learning_rate": 4.1161838640172375e-06, "loss": 0.0751, "step": 27230 }, { "epoch": 3.2221433640880055, "grad_norm": 0.6245249509811401, "learning_rate": 4.1154656451999045e-06, "loss": 0.0826, "step": 27240 }, { "epoch": 3.223326236101254, "grad_norm": 0.6118314266204834, "learning_rate": 4.114747426382571e-06, "loss": 0.0746, "step": 27250 }, { "epoch": 3.224509108114502, "grad_norm": 0.551966667175293, "learning_rate": 4.114029207565238e-06, "loss": 0.0719, "step": 27260 }, { "epoch": 3.2256919801277504, "grad_norm": 0.4046506881713867, "learning_rate": 4.113310988747905e-06, "loss": 0.0746, "step": 27270 }, { "epoch": 3.2268748521409982, "grad_norm": 0.8668612241744995, "learning_rate": 4.112592769930572e-06, "loss": 0.0761, "step": 27280 }, { "epoch": 3.2280577241542465, "grad_norm": 0.42175784707069397, "learning_rate": 4.111874551113239e-06, "loss": 0.0738, "step": 27290 }, { "epoch": 3.229240596167495, "grad_norm": 0.518036425113678, "learning_rate": 4.111156332295906e-06, "loss": 0.0769, "step": 27300 }, { "epoch": 3.2304234681807427, "grad_norm": 0.488725870847702, "learning_rate": 4.110438113478574e-06, "loss": 0.0703, "step": 27310 }, { "epoch": 3.231606340193991, "grad_norm": 0.5095588564872742, "learning_rate": 4.10971989466124e-06, "loss": 0.0799, "step": 27320 }, { "epoch": 3.2327892122072392, "grad_norm": 0.43449199199676514, "learning_rate": 4.109001675843908e-06, "loss": 0.0736, "step": 27330 }, { "epoch": 3.2339720842204875, "grad_norm": 0.4390982687473297, "learning_rate": 4.1082834570265745e-06, "loss": 0.0817, "step": 27340 }, { "epoch": 3.2351549562337354, "grad_norm": 0.4414650499820709, "learning_rate": 4.1075652382092415e-06, "loss": 0.0777, "step": 27350 }, { "epoch": 3.2363378282469837, "grad_norm": 0.572296679019928, "learning_rate": 4.106847019391908e-06, "loss": 0.0748, "step": 27360 }, { "epoch": 3.237520700260232, "grad_norm": 0.6366207003593445, "learning_rate": 4.106128800574575e-06, "loss": 0.0759, "step": 27370 }, { "epoch": 3.23870357227348, "grad_norm": 0.36533117294311523, "learning_rate": 4.105410581757242e-06, "loss": 0.0782, "step": 27380 }, { "epoch": 3.239886444286728, "grad_norm": 0.42771992087364197, "learning_rate": 4.104692362939909e-06, "loss": 0.0716, "step": 27390 }, { "epoch": 3.2410693162999764, "grad_norm": 0.5031138062477112, "learning_rate": 4.103974144122576e-06, "loss": 0.0763, "step": 27400 }, { "epoch": 3.2422521883132247, "grad_norm": 0.3997482359409332, "learning_rate": 4.103255925305243e-06, "loss": 0.0793, "step": 27410 }, { "epoch": 3.2434350603264726, "grad_norm": 0.5114167332649231, "learning_rate": 4.10253770648791e-06, "loss": 0.0747, "step": 27420 }, { "epoch": 3.244617932339721, "grad_norm": 0.4705285429954529, "learning_rate": 4.101819487670577e-06, "loss": 0.0688, "step": 27430 }, { "epoch": 3.245800804352969, "grad_norm": 0.5586870908737183, "learning_rate": 4.101101268853244e-06, "loss": 0.0805, "step": 27440 }, { "epoch": 3.246983676366217, "grad_norm": 0.4638514816761017, "learning_rate": 4.1003830500359115e-06, "loss": 0.0795, "step": 27450 }, { "epoch": 3.2481665483794653, "grad_norm": 0.49333566427230835, "learning_rate": 4.099664831218578e-06, "loss": 0.0732, "step": 27460 }, { "epoch": 3.2493494203927136, "grad_norm": 0.43076786398887634, "learning_rate": 4.098946612401245e-06, "loss": 0.0801, "step": 27470 }, { "epoch": 3.250532292405962, "grad_norm": 0.47808387875556946, "learning_rate": 4.0982283935839115e-06, "loss": 0.076, "step": 27480 }, { "epoch": 3.2507688668086114, "eval_accuracy": 0.6855807299464352, "eval_animal_abuse/accuracy": 0.9948098612635992, "eval_animal_abuse/f1": 0.7664670658682635, "eval_animal_abuse/fpr": 0.002288560562716652, "eval_animal_abuse/precision": 0.7901234567901234, "eval_animal_abuse/recall": 0.7441860465116279, "eval_animal_abuse/threshold": 0.5, "eval_child_abuse/accuracy": 0.9962072063080147, "eval_child_abuse/f1": 0.6714697406340058, "eval_child_abuse/fpr": 0.0021411485254512266, "eval_child_abuse/precision": 0.6454293628808865, "eval_child_abuse/recall": 0.6996996996996997, "eval_child_abuse/threshold": 0.5, "eval_controversial_topics,politics/accuracy": 0.9698240010646438, "eval_controversial_topics,politics/f1": 0.47963281698221455, "eval_controversial_topics,politics/fpr": 0.013866007688083446, "eval_controversial_topics,politics/precision": 0.5085158150851582, "eval_controversial_topics,politics/recall": 0.4538545059717698, "eval_controversial_topics,politics/threshold": 0.5, "eval_discrimination,stereotype,injustice/accuracy": 0.9565159530225904, "eval_discrimination,stereotype,injustice/f1": 0.7187432752313321, "eval_discrimination,stereotype,injustice/fpr": 0.021216611848049984, "eval_discrimination,stereotype,injustice/precision": 0.7399202481169694, "eval_discrimination,stereotype,injustice/recall": 0.698744769874477, "eval_discrimination,stereotype,injustice/threshold": 0.5, "eval_drug_abuse,weapons,banned_substance/accuracy": 0.9726852313936853, "eval_drug_abuse,weapons,banned_substance/f1": 0.7719444444444444, "eval_drug_abuse,weapons,banned_substance/fpr": 0.018244958397969226, "eval_drug_abuse,weapons,banned_substance/precision": 0.7286313581541689, "eval_drug_abuse,weapons,banned_substance/recall": 0.8207324276432368, "eval_drug_abuse,weapons,banned_substance/threshold": 0.5, "eval_financial_crime,property_crime,theft/accuracy": 0.9598429650331037, "eval_financial_crime,property_crime,theft/f1": 0.8022608125819135, "eval_financial_crime,property_crime,theft/fpr": 0.026905994876803663, "eval_financial_crime,property_crime,theft/precision": 0.7703319175711814, "eval_financial_crime,property_crime,theft/recall": 0.8369509485558024, "eval_financial_crime,property_crime,theft/threshold": 0.5, "eval_flagged/accuracy": 0.8536447416575174, "eval_flagged/aucpr": 0.9083457855081679, "eval_flagged/f1": 0.8657941302092867, "eval_flagged/fpr": 0.13974421482953855, "eval_flagged/precision": 0.883943311010746, "eval_flagged/recall": 0.8483752354189711, "eval_hate_speech,offensive_language/accuracy": 0.9495791329806701, "eval_hate_speech,offensive_language/f1": 0.6978969400976777, "eval_hate_speech,offensive_language/fpr": 0.020975698885437565, "eval_hate_speech,offensive_language/precision": 0.7530651753065175, "eval_hate_speech,offensive_language/recall": 0.650260029717682, "eval_hate_speech,offensive_language/threshold": 0.5, "eval_loss": 0.08374538272619247, "eval_macro_f1": 0.65233572083313, "eval_macro_precision": 0.6858107740174907, "eval_macro_recall": 0.6412612068631829, "eval_micro_f1": 0.7525952660888269, "eval_micro_precision": 0.7719793575408549, "eval_micro_recall": 0.7341607838255952, "eval_misinformation_regarding_ethics,laws_and_safety/accuracy": 0.987440529660312, "eval_misinformation_regarding_ethics,laws_and_safety/f1": 0.10011918951132301, "eval_misinformation_regarding_ethics,laws_and_safety/fpr": 0.001111429196908204, "eval_misinformation_regarding_ethics,laws_and_safety/precision": 0.3888888888888889, "eval_misinformation_regarding_ethics,laws_and_safety/recall": 0.057455540355677154, "eval_misinformation_regarding_ethics,laws_and_safety/threshold": 0.5, "eval_non_violent_unethical_behavior/accuracy": 0.8875469940446485, "eval_non_violent_unethical_behavior/f1": 0.6880479926165205, "eval_non_violent_unethical_behavior/fpr": 0.04710596246470676, "eval_non_violent_unethical_behavior/precision": 0.7666598107774578, "eval_non_violent_unethical_behavior/recall": 0.6240582621798091, "eval_non_violent_unethical_behavior/threshold": 0.5, "eval_privacy_violation/accuracy": 0.9802708187776558, "eval_privacy_violation/f1": 0.8081527013911356, "eval_privacy_violation/fpr": 0.012563869251767319, "eval_privacy_violation/precision": 0.7767412935323383, "eval_privacy_violation/recall": 0.842211732973702, "eval_privacy_violation/threshold": 0.5, "eval_runtime": 598.5356, "eval_samples_per_second": 100.435, "eval_self_harm/accuracy": 0.9962571114881724, "eval_self_harm/f1": 0.7266099635479951, "eval_self_harm/fpr": 0.0019094198043682132, "eval_self_harm/precision": 0.7239709443099274, "eval_self_harm/recall": 0.7292682926829268, "eval_self_harm/threshold": 0.5, "eval_sexually_explicit,adult_content/accuracy": 0.984679109691586, "eval_sexually_explicit,adult_content/f1": 0.6625137412971784, "eval_sexually_explicit,adult_content/fpr": 0.006443145209402208, "eval_sexually_explicit,adult_content/precision": 0.7051482059282371, "eval_sexually_explicit,adult_content/recall": 0.6247408431237043, "eval_sexually_explicit,adult_content/threshold": 0.5, "eval_steps_per_second": 1.57, "eval_terrorism,organized_crime/accuracy": 0.9915493894932961, "eval_terrorism,organized_crime/f1": 0.38498789346246975, "eval_terrorism,organized_crime/fpr": 0.0031190783626515468, "eval_terrorism,organized_crime/precision": 0.4608695652173913, "eval_terrorism,organized_crime/recall": 0.3305613305613306, "eval_terrorism,organized_crime/threshold": 0.5, "eval_violence,aiding_and_abetting,incitement/accuracy": 0.9212329906510962, "eval_violence,aiding_and_abetting,incitement/f1": 0.8538535139973455, "eval_violence,aiding_and_abetting,incitement/fpr": 0.058360908390372015, "eval_violence,aiding_and_abetting,incitement/precision": 0.843054793685622, "eval_violence,aiding_and_abetting,incitement/recall": 0.8649324662331166, "eval_violence,aiding_and_abetting,incitement/threshold": 0.5, "step": 27482 }, { "epoch": 3.2517151644192097, "grad_norm": 0.6072354912757874, "learning_rate": 4.097510174766579e-06, "loss": 0.0843, "step": 27490 }, { "epoch": 3.252898036432458, "grad_norm": 0.5145090818405151, "learning_rate": 4.096791955949245e-06, "loss": 0.0843, "step": 27500 }, { "epoch": 3.2540809084457063, "grad_norm": 0.4932909905910492, "learning_rate": 4.096073737131913e-06, "loss": 0.0795, "step": 27510 }, { "epoch": 3.255263780458954, "grad_norm": 0.43546953797340393, "learning_rate": 4.09535551831458e-06, "loss": 0.0792, "step": 27520 }, { "epoch": 3.2564466524722024, "grad_norm": 0.4854007661342621, "learning_rate": 4.094637299497247e-06, "loss": 0.0756, "step": 27530 }, { "epoch": 3.2576295244854507, "grad_norm": 0.5321205258369446, "learning_rate": 4.093919080679914e-06, "loss": 0.0847, "step": 27540 }, { "epoch": 3.258812396498699, "grad_norm": 0.3837909996509552, "learning_rate": 4.093200861862581e-06, "loss": 0.0796, "step": 27550 }, { "epoch": 3.259995268511947, "grad_norm": 0.440055251121521, "learning_rate": 4.0924826430452485e-06, "loss": 0.0784, "step": 27560 }, { "epoch": 3.261178140525195, "grad_norm": 0.3987789452075958, "learning_rate": 4.091764424227915e-06, "loss": 0.0778, "step": 27570 }, { "epoch": 3.2623610125384435, "grad_norm": 0.46628355979919434, "learning_rate": 4.091046205410582e-06, "loss": 0.0761, "step": 27580 }, { "epoch": 3.2635438845516918, "grad_norm": 0.4253142476081848, "learning_rate": 4.0903279865932484e-06, "loss": 0.0693, "step": 27590 }, { "epoch": 3.2647267565649396, "grad_norm": 0.4778863787651062, "learning_rate": 4.089609767775916e-06, "loss": 0.0816, "step": 27600 }, { "epoch": 3.265909628578188, "grad_norm": 0.38017937541007996, "learning_rate": 4.088891548958583e-06, "loss": 0.0803, "step": 27610 }, { "epoch": 3.267092500591436, "grad_norm": 0.49800732731819153, "learning_rate": 4.08817333014125e-06, "loss": 0.0782, "step": 27620 }, { "epoch": 3.268275372604684, "grad_norm": 0.5401929020881653, "learning_rate": 4.087455111323917e-06, "loss": 0.0714, "step": 27630 }, { "epoch": 3.2694582446179323, "grad_norm": 0.4518119990825653, "learning_rate": 4.086736892506584e-06, "loss": 0.0724, "step": 27640 }, { "epoch": 3.2706411166311806, "grad_norm": 0.6006473302841187, "learning_rate": 4.086018673689251e-06, "loss": 0.0814, "step": 27650 }, { "epoch": 3.271823988644429, "grad_norm": 0.502289354801178, "learning_rate": 4.085300454871918e-06, "loss": 0.0767, "step": 27660 }, { "epoch": 3.2730068606576768, "grad_norm": 0.4356301426887512, "learning_rate": 4.084582236054585e-06, "loss": 0.077, "step": 27670 }, { "epoch": 3.274189732670925, "grad_norm": 0.544177770614624, "learning_rate": 4.083864017237252e-06, "loss": 0.0814, "step": 27680 }, { "epoch": 3.2753726046841733, "grad_norm": 0.4865387976169586, "learning_rate": 4.0831457984199185e-06, "loss": 0.0768, "step": 27690 }, { "epoch": 3.276555476697421, "grad_norm": 0.4443269968032837, "learning_rate": 4.0824275796025854e-06, "loss": 0.0708, "step": 27700 }, { "epoch": 3.2777383487106695, "grad_norm": 0.5350326299667358, "learning_rate": 4.081709360785252e-06, "loss": 0.0812, "step": 27710 }, { "epoch": 3.278921220723918, "grad_norm": 0.48073649406433105, "learning_rate": 4.08099114196792e-06, "loss": 0.0739, "step": 27720 }, { "epoch": 3.280104092737166, "grad_norm": 0.6726370453834534, "learning_rate": 4.080272923150586e-06, "loss": 0.077, "step": 27730 }, { "epoch": 3.281286964750414, "grad_norm": 0.5977928638458252, "learning_rate": 4.079554704333254e-06, "loss": 0.0848, "step": 27740 }, { "epoch": 3.282469836763662, "grad_norm": 0.41251739859580994, "learning_rate": 4.07883648551592e-06, "loss": 0.073, "step": 27750 }, { "epoch": 3.2836527087769105, "grad_norm": 0.45987462997436523, "learning_rate": 4.078118266698588e-06, "loss": 0.0728, "step": 27760 }, { "epoch": 3.2848355807901584, "grad_norm": 0.49553102254867554, "learning_rate": 4.077400047881254e-06, "loss": 0.0801, "step": 27770 }, { "epoch": 3.2860184528034067, "grad_norm": 0.4810299575328827, "learning_rate": 4.076681829063922e-06, "loss": 0.0863, "step": 27780 }, { "epoch": 3.287201324816655, "grad_norm": 0.33568859100341797, "learning_rate": 4.075963610246589e-06, "loss": 0.0785, "step": 27790 }, { "epoch": 3.2883841968299032, "grad_norm": 0.5407776832580566, "learning_rate": 4.0752453914292555e-06, "loss": 0.0771, "step": 27800 }, { "epoch": 3.289567068843151, "grad_norm": 0.42842572927474976, "learning_rate": 4.074527172611923e-06, "loss": 0.0725, "step": 27810 }, { "epoch": 3.2907499408563994, "grad_norm": 0.49273166060447693, "learning_rate": 4.073808953794589e-06, "loss": 0.0733, "step": 27820 }, { "epoch": 3.2919328128696477, "grad_norm": 0.5353176593780518, "learning_rate": 4.073090734977257e-06, "loss": 0.0755, "step": 27830 }, { "epoch": 3.2931156848828955, "grad_norm": 0.547826886177063, "learning_rate": 4.072372516159923e-06, "loss": 0.0753, "step": 27840 }, { "epoch": 3.294298556896144, "grad_norm": 0.8451334238052368, "learning_rate": 4.071654297342591e-06, "loss": 0.0783, "step": 27850 }, { "epoch": 3.295481428909392, "grad_norm": 0.6000754833221436, "learning_rate": 4.070936078525257e-06, "loss": 0.0711, "step": 27860 }, { "epoch": 3.2966643009226404, "grad_norm": 0.38211897015571594, "learning_rate": 4.070217859707925e-06, "loss": 0.078, "step": 27870 }, { "epoch": 3.2978471729358882, "grad_norm": 0.383466511964798, "learning_rate": 4.069499640890592e-06, "loss": 0.0793, "step": 27880 }, { "epoch": 3.2990300449491365, "grad_norm": 0.5180786848068237, "learning_rate": 4.068781422073259e-06, "loss": 0.0755, "step": 27890 }, { "epoch": 3.300212916962385, "grad_norm": 0.4569891095161438, "learning_rate": 4.068063203255926e-06, "loss": 0.077, "step": 27900 }, { "epoch": 3.3013957889756327, "grad_norm": 0.5370858311653137, "learning_rate": 4.0673449844385925e-06, "loss": 0.0764, "step": 27910 }, { "epoch": 3.302578660988881, "grad_norm": 0.40655517578125, "learning_rate": 4.0666267656212594e-06, "loss": 0.074, "step": 27920 }, { "epoch": 3.3037615330021293, "grad_norm": 0.5284567475318909, "learning_rate": 4.065908546803926e-06, "loss": 0.0736, "step": 27930 }, { "epoch": 3.3049444050153776, "grad_norm": 0.44787245988845825, "learning_rate": 4.065190327986593e-06, "loss": 0.0749, "step": 27940 }, { "epoch": 3.3061272770286254, "grad_norm": 0.7161146998405457, "learning_rate": 4.06447210916926e-06, "loss": 0.0675, "step": 27950 }, { "epoch": 3.3073101490418737, "grad_norm": 0.5823842883110046, "learning_rate": 4.063753890351927e-06, "loss": 0.0764, "step": 27960 }, { "epoch": 3.308493021055122, "grad_norm": 0.403730571269989, "learning_rate": 4.063035671534594e-06, "loss": 0.076, "step": 27970 }, { "epoch": 3.30967589306837, "grad_norm": 0.42429816722869873, "learning_rate": 4.062317452717261e-06, "loss": 0.073, "step": 27980 }, { "epoch": 3.310858765081618, "grad_norm": 0.3634824752807617, "learning_rate": 4.061599233899929e-06, "loss": 0.0724, "step": 27990 }, { "epoch": 3.3120416370948664, "grad_norm": 0.4847436547279358, "learning_rate": 4.060881015082595e-06, "loss": 0.0793, "step": 28000 }, { "epoch": 3.3132245091081147, "grad_norm": 0.42650744318962097, "learning_rate": 4.060162796265263e-06, "loss": 0.0724, "step": 28010 }, { "epoch": 3.3144073811213626, "grad_norm": 0.48866426944732666, "learning_rate": 4.059444577447929e-06, "loss": 0.0816, "step": 28020 }, { "epoch": 3.315590253134611, "grad_norm": 0.49990320205688477, "learning_rate": 4.0587263586305964e-06, "loss": 0.081, "step": 28030 }, { "epoch": 3.316773125147859, "grad_norm": 0.5255056023597717, "learning_rate": 4.0580081398132625e-06, "loss": 0.0783, "step": 28040 }, { "epoch": 3.317955997161107, "grad_norm": 0.4595147967338562, "learning_rate": 4.05728992099593e-06, "loss": 0.0762, "step": 28050 }, { "epoch": 3.3191388691743553, "grad_norm": 0.3983681797981262, "learning_rate": 4.056571702178597e-06, "loss": 0.0703, "step": 28060 }, { "epoch": 3.3203217411876036, "grad_norm": 0.4406883716583252, "learning_rate": 4.055853483361264e-06, "loss": 0.078, "step": 28070 }, { "epoch": 3.321504613200852, "grad_norm": 0.368556410074234, "learning_rate": 4.055135264543931e-06, "loss": 0.0687, "step": 28080 }, { "epoch": 3.3226874852140997, "grad_norm": 0.6657793521881104, "learning_rate": 4.054417045726598e-06, "loss": 0.0776, "step": 28090 }, { "epoch": 3.323870357227348, "grad_norm": 0.48898443579673767, "learning_rate": 4.053698826909266e-06, "loss": 0.0782, "step": 28100 }, { "epoch": 3.3250532292405963, "grad_norm": 0.5613859295845032, "learning_rate": 4.052980608091932e-06, "loss": 0.0715, "step": 28110 }, { "epoch": 3.326236101253844, "grad_norm": 0.4971611201763153, "learning_rate": 4.0522623892746e-06, "loss": 0.0746, "step": 28120 }, { "epoch": 3.3274189732670925, "grad_norm": 0.4439704418182373, "learning_rate": 4.051544170457266e-06, "loss": 0.0765, "step": 28130 }, { "epoch": 3.3286018452803408, "grad_norm": 0.6572776436805725, "learning_rate": 4.0508259516399334e-06, "loss": 0.0757, "step": 28140 }, { "epoch": 3.329784717293589, "grad_norm": 0.42458072304725647, "learning_rate": 4.0501077328226e-06, "loss": 0.0839, "step": 28150 }, { "epoch": 3.330967589306837, "grad_norm": 0.38020116090774536, "learning_rate": 4.049389514005267e-06, "loss": 0.0742, "step": 28160 }, { "epoch": 3.332150461320085, "grad_norm": 0.5258458256721497, "learning_rate": 4.048671295187934e-06, "loss": 0.0825, "step": 28170 }, { "epoch": 3.3333333333333335, "grad_norm": 0.5635269284248352, "learning_rate": 4.047953076370601e-06, "loss": 0.0767, "step": 28180 }, { "epoch": 3.3345162053465813, "grad_norm": 0.518713653087616, "learning_rate": 4.047234857553268e-06, "loss": 0.0758, "step": 28190 }, { "epoch": 3.3356990773598296, "grad_norm": 0.5655438899993896, "learning_rate": 4.046516638735935e-06, "loss": 0.0835, "step": 28200 }, { "epoch": 3.336881949373078, "grad_norm": 0.5497361421585083, "learning_rate": 4.045798419918602e-06, "loss": 0.0809, "step": 28210 }, { "epoch": 3.338064821386326, "grad_norm": 0.4827767014503479, "learning_rate": 4.045080201101269e-06, "loss": 0.0755, "step": 28220 }, { "epoch": 3.339247693399574, "grad_norm": 0.47095760703086853, "learning_rate": 4.044361982283936e-06, "loss": 0.0674, "step": 28230 }, { "epoch": 3.3404305654128224, "grad_norm": 0.5108373165130615, "learning_rate": 4.043643763466603e-06, "loss": 0.0708, "step": 28240 }, { "epoch": 3.3416134374260706, "grad_norm": 0.5309790968894958, "learning_rate": 4.04292554464927e-06, "loss": 0.0792, "step": 28250 }, { "epoch": 3.3427963094393185, "grad_norm": 0.38607197999954224, "learning_rate": 4.042207325831937e-06, "loss": 0.0769, "step": 28260 }, { "epoch": 3.343979181452567, "grad_norm": 0.34177497029304504, "learning_rate": 4.0414891070146034e-06, "loss": 0.0789, "step": 28270 }, { "epoch": 3.345162053465815, "grad_norm": 0.41540175676345825, "learning_rate": 4.040770888197271e-06, "loss": 0.0805, "step": 28280 }, { "epoch": 3.3463449254790634, "grad_norm": 0.3849349319934845, "learning_rate": 4.040052669379937e-06, "loss": 0.0756, "step": 28290 }, { "epoch": 3.347527797492311, "grad_norm": 0.4810726046562195, "learning_rate": 4.039334450562605e-06, "loss": 0.0829, "step": 28300 }, { "epoch": 3.3487106695055595, "grad_norm": 0.5247235298156738, "learning_rate": 4.038616231745271e-06, "loss": 0.071, "step": 28310 }, { "epoch": 3.349893541518808, "grad_norm": 0.4414138197898865, "learning_rate": 4.037898012927939e-06, "loss": 0.0743, "step": 28320 }, { "epoch": 3.3510764135320557, "grad_norm": 0.4004254639148712, "learning_rate": 4.037179794110606e-06, "loss": 0.0789, "step": 28330 }, { "epoch": 3.352259285545304, "grad_norm": 0.6104505062103271, "learning_rate": 4.036461575293273e-06, "loss": 0.0782, "step": 28340 }, { "epoch": 3.3534421575585522, "grad_norm": 0.5016964673995972, "learning_rate": 4.03574335647594e-06, "loss": 0.0786, "step": 28350 }, { "epoch": 3.3546250295718005, "grad_norm": 0.46187204122543335, "learning_rate": 4.035025137658607e-06, "loss": 0.0772, "step": 28360 }, { "epoch": 3.3558079015850484, "grad_norm": 0.5760659575462341, "learning_rate": 4.034306918841274e-06, "loss": 0.084, "step": 28370 }, { "epoch": 3.3569907735982967, "grad_norm": 0.47001373767852783, "learning_rate": 4.0335887000239404e-06, "loss": 0.0821, "step": 28380 }, { "epoch": 3.358173645611545, "grad_norm": 0.4849869906902313, "learning_rate": 4.032870481206608e-06, "loss": 0.0735, "step": 28390 }, { "epoch": 3.359356517624793, "grad_norm": 0.519777238368988, "learning_rate": 4.032152262389274e-06, "loss": 0.0764, "step": 28400 }, { "epoch": 3.360539389638041, "grad_norm": 0.4827943742275238, "learning_rate": 4.031434043571942e-06, "loss": 0.0755, "step": 28410 }, { "epoch": 3.3617222616512894, "grad_norm": 0.4516553580760956, "learning_rate": 4.030715824754608e-06, "loss": 0.0801, "step": 28420 }, { "epoch": 3.3629051336645377, "grad_norm": 0.47336065769195557, "learning_rate": 4.029997605937276e-06, "loss": 0.0746, "step": 28430 }, { "epoch": 3.3640880056777855, "grad_norm": 0.49213412404060364, "learning_rate": 4.029279387119943e-06, "loss": 0.0748, "step": 28440 }, { "epoch": 3.365270877691034, "grad_norm": 0.492160826921463, "learning_rate": 4.02856116830261e-06, "loss": 0.0758, "step": 28450 }, { "epoch": 3.366453749704282, "grad_norm": 0.45443448424339294, "learning_rate": 4.027842949485277e-06, "loss": 0.0758, "step": 28460 }, { "epoch": 3.36763662171753, "grad_norm": 0.4902106821537018, "learning_rate": 4.027124730667944e-06, "loss": 0.08, "step": 28470 }, { "epoch": 3.3688194937307783, "grad_norm": 0.5188368558883667, "learning_rate": 4.0264065118506105e-06, "loss": 0.0796, "step": 28480 }, { "epoch": 3.3700023657440266, "grad_norm": 0.5559605956077576, "learning_rate": 4.0256882930332774e-06, "loss": 0.0802, "step": 28490 }, { "epoch": 3.371185237757275, "grad_norm": 0.4222972095012665, "learning_rate": 4.024970074215944e-06, "loss": 0.0775, "step": 28500 }, { "epoch": 3.3723681097705227, "grad_norm": 0.4151909053325653, "learning_rate": 4.024251855398611e-06, "loss": 0.0743, "step": 28510 }, { "epoch": 3.373550981783771, "grad_norm": 0.4467265009880066, "learning_rate": 4.023533636581278e-06, "loss": 0.0902, "step": 28520 }, { "epoch": 3.3747338537970193, "grad_norm": 0.7386625409126282, "learning_rate": 4.022815417763946e-06, "loss": 0.0755, "step": 28530 }, { "epoch": 3.375916725810267, "grad_norm": 0.5208406448364258, "learning_rate": 4.022097198946612e-06, "loss": 0.0831, "step": 28540 }, { "epoch": 3.3770995978235154, "grad_norm": 0.37120455503463745, "learning_rate": 4.02137898012928e-06, "loss": 0.0741, "step": 28550 }, { "epoch": 3.3782824698367637, "grad_norm": 0.4606970250606537, "learning_rate": 4.020660761311946e-06, "loss": 0.0733, "step": 28560 }, { "epoch": 3.379465341850012, "grad_norm": 0.4706851840019226, "learning_rate": 4.019942542494614e-06, "loss": 0.0724, "step": 28570 }, { "epoch": 3.38064821386326, "grad_norm": 0.4241083264350891, "learning_rate": 4.019224323677281e-06, "loss": 0.0726, "step": 28580 }, { "epoch": 3.381831085876508, "grad_norm": 0.6178443431854248, "learning_rate": 4.0185061048599475e-06, "loss": 0.0825, "step": 28590 }, { "epoch": 3.3830139578897565, "grad_norm": 0.6406059265136719, "learning_rate": 4.0177878860426144e-06, "loss": 0.0741, "step": 28600 }, { "epoch": 3.3841968299030043, "grad_norm": 0.6291756629943848, "learning_rate": 4.017069667225281e-06, "loss": 0.0849, "step": 28610 }, { "epoch": 3.3853797019162526, "grad_norm": 0.5480164289474487, "learning_rate": 4.016351448407948e-06, "loss": 0.0798, "step": 28620 }, { "epoch": 3.386562573929501, "grad_norm": 0.5015382766723633, "learning_rate": 4.015633229590615e-06, "loss": 0.0841, "step": 28630 }, { "epoch": 3.387745445942749, "grad_norm": 0.46265560388565063, "learning_rate": 4.014915010773283e-06, "loss": 0.0778, "step": 28640 }, { "epoch": 3.388928317955997, "grad_norm": 0.5332260131835938, "learning_rate": 4.014196791955949e-06, "loss": 0.0781, "step": 28650 }, { "epoch": 3.3901111899692453, "grad_norm": 0.34095877408981323, "learning_rate": 4.013478573138617e-06, "loss": 0.0739, "step": 28660 }, { "epoch": 3.3912940619824936, "grad_norm": 0.5041068196296692, "learning_rate": 4.012760354321283e-06, "loss": 0.0806, "step": 28670 }, { "epoch": 3.3924769339957415, "grad_norm": 0.4475233256816864, "learning_rate": 4.012042135503951e-06, "loss": 0.0729, "step": 28680 }, { "epoch": 3.3936598060089898, "grad_norm": 0.4862820506095886, "learning_rate": 4.011323916686617e-06, "loss": 0.0705, "step": 28690 }, { "epoch": 3.394842678022238, "grad_norm": 0.6961069107055664, "learning_rate": 4.0106056978692845e-06, "loss": 0.0808, "step": 28700 }, { "epoch": 3.3960255500354863, "grad_norm": 0.438713014125824, "learning_rate": 4.0098874790519514e-06, "loss": 0.0761, "step": 28710 }, { "epoch": 3.397208422048734, "grad_norm": 0.6228511929512024, "learning_rate": 4.009169260234618e-06, "loss": 0.0807, "step": 28720 }, { "epoch": 3.3983912940619825, "grad_norm": 0.48113372921943665, "learning_rate": 4.008451041417285e-06, "loss": 0.0699, "step": 28730 }, { "epoch": 3.3995741660752308, "grad_norm": 0.4577452838420868, "learning_rate": 4.007732822599952e-06, "loss": 0.0743, "step": 28740 }, { "epoch": 3.4007570380884786, "grad_norm": 0.6414778828620911, "learning_rate": 4.007014603782619e-06, "loss": 0.0814, "step": 28750 }, { "epoch": 3.401939910101727, "grad_norm": 0.49609485268592834, "learning_rate": 4.006296384965286e-06, "loss": 0.0794, "step": 28760 }, { "epoch": 3.403122782114975, "grad_norm": 0.6179436445236206, "learning_rate": 4.005578166147953e-06, "loss": 0.0665, "step": 28770 }, { "epoch": 3.4043056541282235, "grad_norm": 0.4675549566745758, "learning_rate": 4.00485994733062e-06, "loss": 0.0761, "step": 28780 }, { "epoch": 3.4054885261414714, "grad_norm": 0.5192151069641113, "learning_rate": 4.004141728513287e-06, "loss": 0.0753, "step": 28790 }, { "epoch": 3.4066713981547196, "grad_norm": 0.5390852093696594, "learning_rate": 4.003423509695955e-06, "loss": 0.0757, "step": 28800 }, { "epoch": 3.407854270167968, "grad_norm": 0.5827447772026062, "learning_rate": 4.002705290878621e-06, "loss": 0.0808, "step": 28810 }, { "epoch": 3.409037142181216, "grad_norm": 0.5564484000205994, "learning_rate": 4.001987072061288e-06, "loss": 0.0746, "step": 28820 }, { "epoch": 3.410220014194464, "grad_norm": 0.5976808071136475, "learning_rate": 4.001268853243955e-06, "loss": 0.0804, "step": 28830 }, { "epoch": 3.4114028862077124, "grad_norm": 0.510066568851471, "learning_rate": 4.000550634426622e-06, "loss": 0.0776, "step": 28840 }, { "epoch": 3.4125857582209607, "grad_norm": 0.47857069969177246, "learning_rate": 3.999832415609289e-06, "loss": 0.0758, "step": 28850 }, { "epoch": 3.4137686302342085, "grad_norm": 0.49315011501312256, "learning_rate": 3.999114196791956e-06, "loss": 0.0808, "step": 28860 }, { "epoch": 3.414951502247457, "grad_norm": 0.5446792244911194, "learning_rate": 3.998395977974623e-06, "loss": 0.0788, "step": 28870 }, { "epoch": 3.416134374260705, "grad_norm": 0.4794192314147949, "learning_rate": 3.99767775915729e-06, "loss": 0.0757, "step": 28880 }, { "epoch": 3.417317246273953, "grad_norm": 0.5131387114524841, "learning_rate": 3.996959540339957e-06, "loss": 0.0792, "step": 28890 }, { "epoch": 3.4185001182872012, "grad_norm": 0.5517047643661499, "learning_rate": 3.996241321522624e-06, "loss": 0.0773, "step": 28900 }, { "epoch": 3.4196829903004495, "grad_norm": 0.48601052165031433, "learning_rate": 3.9955231027052916e-06, "loss": 0.0773, "step": 28910 }, { "epoch": 3.420865862313698, "grad_norm": 0.4482939541339874, "learning_rate": 3.994804883887958e-06, "loss": 0.0775, "step": 28920 }, { "epoch": 3.4220487343269457, "grad_norm": 0.5795282125473022, "learning_rate": 3.994086665070625e-06, "loss": 0.0799, "step": 28930 }, { "epoch": 3.423231606340194, "grad_norm": 0.517400324344635, "learning_rate": 3.9933684462532915e-06, "loss": 0.0787, "step": 28940 }, { "epoch": 3.4244144783534423, "grad_norm": 0.47772783041000366, "learning_rate": 3.992650227435959e-06, "loss": 0.0747, "step": 28950 }, { "epoch": 3.42559735036669, "grad_norm": 0.5013516545295715, "learning_rate": 3.991932008618625e-06, "loss": 0.0801, "step": 28960 }, { "epoch": 3.4267802223799384, "grad_norm": 0.4684916138648987, "learning_rate": 3.991213789801293e-06, "loss": 0.0726, "step": 28970 }, { "epoch": 3.4279630943931867, "grad_norm": 0.5293290615081787, "learning_rate": 3.99049557098396e-06, "loss": 0.0727, "step": 28980 }, { "epoch": 3.429145966406435, "grad_norm": 0.468184232711792, "learning_rate": 3.989777352166627e-06, "loss": 0.0793, "step": 28990 }, { "epoch": 3.430328838419683, "grad_norm": 0.5100991129875183, "learning_rate": 3.989059133349294e-06, "loss": 0.0743, "step": 29000 }, { "epoch": 3.431511710432931, "grad_norm": 0.49255746603012085, "learning_rate": 3.988340914531961e-06, "loss": 0.0764, "step": 29010 }, { "epoch": 3.4326945824461794, "grad_norm": 0.554591953754425, "learning_rate": 3.987622695714628e-06, "loss": 0.0715, "step": 29020 }, { "epoch": 3.4338774544594273, "grad_norm": 0.4593774974346161, "learning_rate": 3.986904476897295e-06, "loss": 0.0744, "step": 29030 }, { "epoch": 3.4350603264726756, "grad_norm": 0.46875250339508057, "learning_rate": 3.9861862580799616e-06, "loss": 0.0709, "step": 29040 }, { "epoch": 3.436243198485924, "grad_norm": 0.46495839953422546, "learning_rate": 3.9854680392626285e-06, "loss": 0.0751, "step": 29050 }, { "epoch": 3.437426070499172, "grad_norm": 0.47398802638053894, "learning_rate": 3.984749820445295e-06, "loss": 0.0782, "step": 29060 }, { "epoch": 3.43860894251242, "grad_norm": 0.4228682518005371, "learning_rate": 3.984031601627962e-06, "loss": 0.0791, "step": 29070 }, { "epoch": 3.4397918145256683, "grad_norm": 0.47684407234191895, "learning_rate": 3.98331338281063e-06, "loss": 0.072, "step": 29080 }, { "epoch": 3.4409746865389166, "grad_norm": 0.5944690108299255, "learning_rate": 3.982595163993297e-06, "loss": 0.076, "step": 29090 }, { "epoch": 3.4421575585521644, "grad_norm": 0.4410305619239807, "learning_rate": 3.981876945175964e-06, "loss": 0.0853, "step": 29100 }, { "epoch": 3.4433404305654127, "grad_norm": 0.5838159322738647, "learning_rate": 3.981158726358631e-06, "loss": 0.0826, "step": 29110 }, { "epoch": 3.444523302578661, "grad_norm": 0.3890014588832855, "learning_rate": 3.980440507541298e-06, "loss": 0.0732, "step": 29120 }, { "epoch": 3.4457061745919093, "grad_norm": 0.44875389337539673, "learning_rate": 3.979722288723965e-06, "loss": 0.0737, "step": 29130 }, { "epoch": 3.446889046605157, "grad_norm": 0.4087563753128052, "learning_rate": 3.979004069906632e-06, "loss": 0.0796, "step": 29140 }, { "epoch": 3.4480719186184055, "grad_norm": 0.3696449398994446, "learning_rate": 3.9782858510892986e-06, "loss": 0.0734, "step": 29150 }, { "epoch": 3.4492547906316537, "grad_norm": 0.5083557367324829, "learning_rate": 3.9775676322719655e-06, "loss": 0.0811, "step": 29160 }, { "epoch": 3.4504376626449016, "grad_norm": 0.4537920355796814, "learning_rate": 3.976849413454632e-06, "loss": 0.0765, "step": 29170 }, { "epoch": 3.45162053465815, "grad_norm": 0.3982715904712677, "learning_rate": 3.9761311946373e-06, "loss": 0.0776, "step": 29180 }, { "epoch": 3.452803406671398, "grad_norm": 0.6796822547912598, "learning_rate": 3.975412975819966e-06, "loss": 0.0778, "step": 29190 }, { "epoch": 3.4539862786846465, "grad_norm": 0.7790104746818542, "learning_rate": 3.974694757002634e-06, "loss": 0.0757, "step": 29200 }, { "epoch": 3.4551691506978943, "grad_norm": 0.5692980289459229, "learning_rate": 3.9739765381853e-06, "loss": 0.0726, "step": 29210 }, { "epoch": 3.4563520227111426, "grad_norm": 0.49411219358444214, "learning_rate": 3.973258319367968e-06, "loss": 0.0813, "step": 29220 }, { "epoch": 3.457534894724391, "grad_norm": 0.5059658885002136, "learning_rate": 3.972540100550634e-06, "loss": 0.0754, "step": 29230 }, { "epoch": 3.4587177667376388, "grad_norm": 0.43720507621765137, "learning_rate": 3.971821881733302e-06, "loss": 0.0725, "step": 29240 }, { "epoch": 3.459900638750887, "grad_norm": 0.5233420729637146, "learning_rate": 3.971103662915969e-06, "loss": 0.0746, "step": 29250 }, { "epoch": 3.4610835107641353, "grad_norm": 0.5149396061897278, "learning_rate": 3.9703854440986356e-06, "loss": 0.0729, "step": 29260 }, { "epoch": 3.4622663827773836, "grad_norm": 0.4699614942073822, "learning_rate": 3.9696672252813025e-06, "loss": 0.073, "step": 29270 }, { "epoch": 3.4634492547906315, "grad_norm": 0.6462719440460205, "learning_rate": 3.968949006463969e-06, "loss": 0.0797, "step": 29280 }, { "epoch": 3.4646321268038798, "grad_norm": 0.36168769001960754, "learning_rate": 3.968230787646636e-06, "loss": 0.0771, "step": 29290 }, { "epoch": 3.465814998817128, "grad_norm": 0.6236043572425842, "learning_rate": 3.967512568829303e-06, "loss": 0.0752, "step": 29300 }, { "epoch": 3.466997870830376, "grad_norm": 0.45412570238113403, "learning_rate": 3.96679435001197e-06, "loss": 0.0744, "step": 29310 }, { "epoch": 3.468180742843624, "grad_norm": 0.471900999546051, "learning_rate": 3.966076131194637e-06, "loss": 0.0737, "step": 29320 }, { "epoch": 3.4693636148568725, "grad_norm": 0.47338971495628357, "learning_rate": 3.965357912377305e-06, "loss": 0.0863, "step": 29330 }, { "epoch": 3.470546486870121, "grad_norm": 0.5060920119285583, "learning_rate": 3.964639693559971e-06, "loss": 0.0778, "step": 29340 }, { "epoch": 3.4717293588833686, "grad_norm": 0.3929210603237152, "learning_rate": 3.963921474742639e-06, "loss": 0.0789, "step": 29350 }, { "epoch": 3.472912230896617, "grad_norm": 0.4866841733455658, "learning_rate": 3.963203255925306e-06, "loss": 0.078, "step": 29360 }, { "epoch": 3.4740951029098652, "grad_norm": 0.5488289594650269, "learning_rate": 3.9624850371079726e-06, "loss": 0.076, "step": 29370 }, { "epoch": 3.475277974923113, "grad_norm": 0.46391069889068604, "learning_rate": 3.9617668182906395e-06, "loss": 0.0764, "step": 29380 }, { "epoch": 3.4764608469363614, "grad_norm": 0.5132879614830017, "learning_rate": 3.961048599473306e-06, "loss": 0.0756, "step": 29390 }, { "epoch": 3.4776437189496097, "grad_norm": 0.5156050324440002, "learning_rate": 3.960330380655973e-06, "loss": 0.0705, "step": 29400 }, { "epoch": 3.478826590962858, "grad_norm": 0.5627337694168091, "learning_rate": 3.95961216183864e-06, "loss": 0.0751, "step": 29410 }, { "epoch": 3.480009462976106, "grad_norm": 0.5063336491584778, "learning_rate": 3.958893943021307e-06, "loss": 0.078, "step": 29420 }, { "epoch": 3.481192334989354, "grad_norm": 0.4922984540462494, "learning_rate": 3.958175724203974e-06, "loss": 0.0692, "step": 29430 }, { "epoch": 3.4823752070026024, "grad_norm": 0.41539129614830017, "learning_rate": 3.957457505386641e-06, "loss": 0.0775, "step": 29440 }, { "epoch": 3.4835580790158502, "grad_norm": 0.42562875151634216, "learning_rate": 3.956739286569309e-06, "loss": 0.065, "step": 29450 }, { "epoch": 3.4847409510290985, "grad_norm": 0.5890735983848572, "learning_rate": 3.956021067751975e-06, "loss": 0.0695, "step": 29460 }, { "epoch": 3.485923823042347, "grad_norm": 0.5278323292732239, "learning_rate": 3.955302848934643e-06, "loss": 0.07, "step": 29470 }, { "epoch": 3.487106695055595, "grad_norm": 0.5567238926887512, "learning_rate": 3.954584630117309e-06, "loss": 0.0823, "step": 29480 }, { "epoch": 3.488289567068843, "grad_norm": 0.48242390155792236, "learning_rate": 3.9538664112999765e-06, "loss": 0.0831, "step": 29490 }, { "epoch": 3.4894724390820913, "grad_norm": 0.4160406291484833, "learning_rate": 3.9531481924826426e-06, "loss": 0.0715, "step": 29500 }, { "epoch": 3.4906553110953396, "grad_norm": 0.4855782389640808, "learning_rate": 3.95242997366531e-06, "loss": 0.0754, "step": 29510 }, { "epoch": 3.4918381831085874, "grad_norm": 0.536721408367157, "learning_rate": 3.951711754847977e-06, "loss": 0.0753, "step": 29520 }, { "epoch": 3.4930210551218357, "grad_norm": 0.5511588454246521, "learning_rate": 3.950993536030644e-06, "loss": 0.0794, "step": 29530 }, { "epoch": 3.494203927135084, "grad_norm": 0.45829954743385315, "learning_rate": 3.950275317213311e-06, "loss": 0.0748, "step": 29540 }, { "epoch": 3.4953867991483323, "grad_norm": 0.5441047549247742, "learning_rate": 3.949557098395978e-06, "loss": 0.0765, "step": 29550 }, { "epoch": 3.49656967116158, "grad_norm": 0.6126371026039124, "learning_rate": 3.948838879578645e-06, "loss": 0.0799, "step": 29560 }, { "epoch": 3.4977525431748284, "grad_norm": 0.43216779828071594, "learning_rate": 3.948120660761312e-06, "loss": 0.0778, "step": 29570 }, { "epoch": 3.4989354151880767, "grad_norm": 0.5520009994506836, "learning_rate": 3.947402441943979e-06, "loss": 0.0759, "step": 29580 }, { "epoch": 3.5001182872013246, "grad_norm": 0.4046032726764679, "learning_rate": 3.946684223126646e-06, "loss": 0.0741, "step": 29590 }, { "epoch": 3.5008280104092737, "eval_accuracy": 0.6877599228133213, "eval_animal_abuse/accuracy": 0.9947266859633364, "eval_animal_abuse/f1": 0.7743772241992882, "eval_animal_abuse/fpr": 0.002911183656985153, "eval_animal_abuse/precision": 0.7587168758716876, "eval_animal_abuse/recall": 0.7906976744186046, "eval_animal_abuse/threshold": 0.5, "eval_child_abuse/accuracy": 0.9960574907675417, "eval_child_abuse/f1": 0.654014598540146, "eval_child_abuse/fpr": 0.0021411485254512266, "eval_child_abuse/precision": 0.6363636363636364, "eval_child_abuse/recall": 0.6726726726726727, "eval_child_abuse/threshold": 0.5, "eval_controversial_topics,politics/accuracy": 0.9719033835712146, "eval_controversial_topics,politics/f1": 0.4759540800496432, "eval_controversial_topics,politics/fpr": 0.010536792970895095, "eval_controversial_topics,politics/precision": 0.555394641564084, "eval_controversial_topics,politics/recall": 0.4163952225841477, "eval_controversial_topics,politics/threshold": 0.5, "eval_discrimination,stereotype,injustice/accuracy": 0.956416142662275, "eval_discrimination,stereotype,injustice/f1": 0.7152173913043478, "eval_discrimination,stereotype,injustice/fpr": 0.02042144070553363, "eval_discrimination,stereotype,injustice/precision": 0.744343891402715, "eval_discrimination,stereotype,injustice/recall": 0.6882845188284519, "eval_discrimination,stereotype,injustice/threshold": 0.5, "eval_drug_abuse,weapons,banned_substance/accuracy": 0.9743154672788369, "eval_drug_abuse,weapons,banned_substance/f1": 0.7759721416134648, "eval_drug_abuse,weapons,banned_substance/fpr": 0.01466647863488927, "eval_drug_abuse,weapons,banned_substance/precision": 0.7626925270964061, "eval_drug_abuse,weapons,banned_substance/recall": 0.789722386296515, "eval_drug_abuse,weapons,banned_substance/threshold": 0.5, "eval_financial_crime,property_crime,theft/accuracy": 0.9612403100775194, "eval_financial_crime,property_crime,theft/f1": 0.8039380679905755, "eval_financial_crime,property_crime,theft/fpr": 0.023146527099496852, "eval_financial_crime,property_crime,theft/precision": 0.7918117023039947, "eval_financial_crime,property_crime,theft/recall": 0.8164416339087336, "eval_financial_crime,property_crime,theft/threshold": 0.5, "eval_flagged/accuracy": 0.8538277273180956, "eval_flagged/aucpr": 0.9083588584899469, "eval_flagged/f1": 0.8660457033096026, "eval_flagged/fpr": 0.14030679218392478, "eval_flagged/precision": 0.8836247122503578, "eval_flagged/recall": 0.8491524917042839, "eval_hate_speech,offensive_language/accuracy": 0.9496789433409855, "eval_hate_speech,offensive_language/f1": 0.6933603649265079, "eval_hate_speech,offensive_language/fpr": 0.019386077105792033, "eval_hate_speech,offensive_language/precision": 0.7632224949787993, "eval_hate_speech,offensive_language/recall": 0.6352154531946508, "eval_hate_speech,offensive_language/threshold": 0.5, "eval_loss": 0.08256716281175613, "eval_macro_f1": 0.6431184548556734, "eval_macro_precision": 0.700443269673935, "eval_macro_recall": 0.6257021832380106, "eval_micro_f1": 0.7535136824495123, "eval_micro_precision": 0.7799322575734511, "eval_micro_recall": 0.7288262176147376, "eval_misinformation_regarding_ethics,laws_and_safety/accuracy": 0.9876900555611006, "eval_misinformation_regarding_ethics,laws_and_safety/f1": 0.08641975308641975, "eval_misinformation_regarding_ethics,laws_and_safety/fpr": 0.0007409527979388027, "eval_misinformation_regarding_ethics,laws_and_safety/precision": 0.4430379746835443, "eval_misinformation_regarding_ethics,laws_and_safety/recall": 0.047879616963064295, "eval_misinformation_regarding_ethics,laws_and_safety/threshold": 0.5, "eval_non_violent_unethical_behavior/accuracy": 0.8874970888644907, "eval_non_violent_unethical_behavior/f1": 0.6927166159298469, "eval_non_violent_unethical_behavior/fpr": 0.0506560372031223, "eval_non_violent_unethical_behavior/precision": 0.7575275762695022, "eval_non_violent_unethical_behavior/recall": 0.638121546961326, "eval_non_violent_unethical_behavior/threshold": 0.5, "eval_privacy_violation/accuracy": 0.9792893502345543, "eval_privacy_violation/f1": 0.8030997944013918, "eval_privacy_violation/fpr": 0.01431371176594104, "eval_privacy_violation/precision": 0.7563300565981531, "eval_privacy_violation/recall": 0.8560350640593392, "eval_privacy_violation/threshold": 0.5, "eval_runtime": 598.5797, "eval_samples_per_second": 100.428, "eval_self_harm/accuracy": 0.9964567322088033, "eval_self_harm/f1": 0.7313997477931904, "eval_self_harm/fpr": 0.001557684577247753, "eval_self_harm/precision": 0.7571801566579635, "eval_self_harm/recall": 0.7073170731707317, "eval_self_harm/threshold": 0.5, "eval_sexually_explicit,adult_content/accuracy": 0.9840802475296936, "eval_sexually_explicit,adult_content/f1": 0.6645636172450052, "eval_sexually_explicit,adult_content/fpr": 0.0078067738251487075, "eval_sexually_explicit,adult_content/precision": 0.6742532005689901, "eval_sexually_explicit,adult_content/recall": 0.6551485832757429, "eval_sexually_explicit,adult_content/threshold": 0.5, "eval_steps_per_second": 1.57, "eval_terrorism,organized_crime/accuracy": 0.9922979671956615, "eval_terrorism,organized_crime/f1": 0.27769110764430577, "eval_terrorism,organized_crime/fpr": 0.0011906159341304292, "eval_terrorism,organized_crime/precision": 0.55625, "eval_terrorism,organized_crime/recall": 0.18503118503118504, "eval_terrorism,organized_crime/threshold": 0.5, "eval_violence,aiding_and_abetting,incitement/accuracy": 0.922280999434408, "eval_violence,aiding_and_abetting,incitement/f1": 0.8549338632552941, "eval_violence,aiding_and_abetting,incitement/fpr": 0.05545986129368556, "eval_violence,aiding_and_abetting,incitement/precision": 0.8490810410756137, "eval_violence,aiding_and_abetting,incitement/recall": 0.8608679339669835, "eval_violence,aiding_and_abetting,incitement/threshold": 0.5, "step": 29596 }, { "epoch": 3.501301159214573, "grad_norm": 0.6801959872245789, "learning_rate": 3.9459660043093135e-06, "loss": 0.0724, "step": 29600 }, { "epoch": 3.502484031227821, "grad_norm": 0.4805217981338501, "learning_rate": 3.9452477854919796e-06, "loss": 0.0748, "step": 29610 }, { "epoch": 3.5036669032410694, "grad_norm": 0.504030168056488, "learning_rate": 3.944529566674647e-06, "loss": 0.0769, "step": 29620 }, { "epoch": 3.5048497752543177, "grad_norm": 0.5501826405525208, "learning_rate": 3.943811347857314e-06, "loss": 0.0804, "step": 29630 }, { "epoch": 3.5060326472675656, "grad_norm": 0.5403172969818115, "learning_rate": 3.943093129039981e-06, "loss": 0.0817, "step": 29640 }, { "epoch": 3.507215519280814, "grad_norm": 0.44868603348731995, "learning_rate": 3.942374910222648e-06, "loss": 0.0753, "step": 29650 }, { "epoch": 3.5083983912940617, "grad_norm": 0.43007802963256836, "learning_rate": 3.941656691405315e-06, "loss": 0.0794, "step": 29660 }, { "epoch": 3.50958126330731, "grad_norm": 0.4943160116672516, "learning_rate": 3.940938472587982e-06, "loss": 0.0815, "step": 29670 }, { "epoch": 3.5107641353205583, "grad_norm": 0.4062444269657135, "learning_rate": 3.940220253770649e-06, "loss": 0.0801, "step": 29680 }, { "epoch": 3.5119470073338066, "grad_norm": 0.4968205988407135, "learning_rate": 3.939502034953316e-06, "loss": 0.0827, "step": 29690 }, { "epoch": 3.513129879347055, "grad_norm": 0.4544658064842224, "learning_rate": 3.938783816135983e-06, "loss": 0.0743, "step": 29700 }, { "epoch": 3.5143127513603027, "grad_norm": 0.5261556506156921, "learning_rate": 3.93806559731865e-06, "loss": 0.0788, "step": 29710 }, { "epoch": 3.515495623373551, "grad_norm": 0.4296594560146332, "learning_rate": 3.937347378501317e-06, "loss": 0.075, "step": 29720 }, { "epoch": 3.516678495386799, "grad_norm": 0.4397100806236267, "learning_rate": 3.9366291596839835e-06, "loss": 0.0725, "step": 29730 }, { "epoch": 3.517861367400047, "grad_norm": 0.5916833281517029, "learning_rate": 3.935910940866651e-06, "loss": 0.0687, "step": 29740 }, { "epoch": 3.5190442394132955, "grad_norm": 0.46545565128326416, "learning_rate": 3.935192722049317e-06, "loss": 0.074, "step": 29750 }, { "epoch": 3.5202271114265438, "grad_norm": 0.42188993096351624, "learning_rate": 3.934474503231985e-06, "loss": 0.0742, "step": 29760 }, { "epoch": 3.521409983439792, "grad_norm": 0.4538876414299011, "learning_rate": 3.933756284414651e-06, "loss": 0.0756, "step": 29770 }, { "epoch": 3.52259285545304, "grad_norm": 0.60561203956604, "learning_rate": 3.933038065597319e-06, "loss": 0.0797, "step": 29780 }, { "epoch": 3.523775727466288, "grad_norm": 0.6185647249221802, "learning_rate": 3.932319846779986e-06, "loss": 0.0761, "step": 29790 }, { "epoch": 3.524958599479536, "grad_norm": 0.5526715517044067, "learning_rate": 3.931601627962653e-06, "loss": 0.0867, "step": 29800 }, { "epoch": 3.5261414714927843, "grad_norm": 0.416323184967041, "learning_rate": 3.93088340914532e-06, "loss": 0.0764, "step": 29810 }, { "epoch": 3.5273243435060326, "grad_norm": 0.4476059079170227, "learning_rate": 3.930165190327987e-06, "loss": 0.0781, "step": 29820 }, { "epoch": 3.528507215519281, "grad_norm": 0.4931854009628296, "learning_rate": 3.9294469715106536e-06, "loss": 0.0778, "step": 29830 }, { "epoch": 3.529690087532529, "grad_norm": 0.5977569222450256, "learning_rate": 3.9287287526933205e-06, "loss": 0.0791, "step": 29840 }, { "epoch": 3.530872959545777, "grad_norm": 0.6133980751037598, "learning_rate": 3.928010533875988e-06, "loss": 0.0733, "step": 29850 }, { "epoch": 3.5320558315590254, "grad_norm": 0.3986093997955322, "learning_rate": 3.927292315058654e-06, "loss": 0.0757, "step": 29860 }, { "epoch": 3.533238703572273, "grad_norm": 0.49799731373786926, "learning_rate": 3.926574096241322e-06, "loss": 0.0821, "step": 29870 }, { "epoch": 3.5344215755855215, "grad_norm": 0.46968501806259155, "learning_rate": 3.925855877423988e-06, "loss": 0.0743, "step": 29880 }, { "epoch": 3.53560444759877, "grad_norm": 0.49903395771980286, "learning_rate": 3.925137658606656e-06, "loss": 0.0735, "step": 29890 }, { "epoch": 3.536787319612018, "grad_norm": 0.4188426434993744, "learning_rate": 3.924419439789323e-06, "loss": 0.0762, "step": 29900 }, { "epoch": 3.5379701916252664, "grad_norm": 0.4461091160774231, "learning_rate": 3.92370122097199e-06, "loss": 0.0791, "step": 29910 }, { "epoch": 3.5391530636385142, "grad_norm": 0.4511685371398926, "learning_rate": 3.922983002154657e-06, "loss": 0.0745, "step": 29920 }, { "epoch": 3.5403359356517625, "grad_norm": 0.5457428097724915, "learning_rate": 3.922264783337324e-06, "loss": 0.0795, "step": 29930 }, { "epoch": 3.5415188076650104, "grad_norm": 0.44295555353164673, "learning_rate": 3.9215465645199906e-06, "loss": 0.0733, "step": 29940 }, { "epoch": 3.5427016796782587, "grad_norm": 0.5473998188972473, "learning_rate": 3.9208283457026575e-06, "loss": 0.0815, "step": 29950 }, { "epoch": 3.543884551691507, "grad_norm": 0.4159155488014221, "learning_rate": 3.920110126885324e-06, "loss": 0.0677, "step": 29960 }, { "epoch": 3.5450674237047552, "grad_norm": 0.6578656435012817, "learning_rate": 3.919391908067991e-06, "loss": 0.0718, "step": 29970 }, { "epoch": 3.5462502957180035, "grad_norm": 0.41140687465667725, "learning_rate": 3.918673689250658e-06, "loss": 0.0709, "step": 29980 }, { "epoch": 3.5474331677312514, "grad_norm": 0.4087271988391876, "learning_rate": 3.917955470433325e-06, "loss": 0.0798, "step": 29990 }, { "epoch": 3.5486160397444997, "grad_norm": 0.4075837731361389, "learning_rate": 3.917237251615992e-06, "loss": 0.0791, "step": 30000 }, { "epoch": 3.549798911757748, "grad_norm": 0.3352956473827362, "learning_rate": 3.91651903279866e-06, "loss": 0.0704, "step": 30010 }, { "epoch": 3.550981783770996, "grad_norm": 0.42119139432907104, "learning_rate": 3.915800813981326e-06, "loss": 0.0762, "step": 30020 }, { "epoch": 3.552164655784244, "grad_norm": 0.34849196672439575, "learning_rate": 3.915082595163994e-06, "loss": 0.0744, "step": 30030 }, { "epoch": 3.5533475277974924, "grad_norm": 0.462999552488327, "learning_rate": 3.91436437634666e-06, "loss": 0.0788, "step": 30040 }, { "epoch": 3.5545303998107407, "grad_norm": 0.5142143964767456, "learning_rate": 3.9136461575293275e-06, "loss": 0.0786, "step": 30050 }, { "epoch": 3.5557132718239886, "grad_norm": 0.4762876629829407, "learning_rate": 3.9129279387119945e-06, "loss": 0.079, "step": 30060 }, { "epoch": 3.556896143837237, "grad_norm": 0.46461546421051025, "learning_rate": 3.912209719894661e-06, "loss": 0.0756, "step": 30070 }, { "epoch": 3.558079015850485, "grad_norm": 0.6356685161590576, "learning_rate": 3.911491501077328e-06, "loss": 0.0806, "step": 30080 }, { "epoch": 3.559261887863733, "grad_norm": 0.554053783416748, "learning_rate": 3.910773282259995e-06, "loss": 0.0809, "step": 30090 }, { "epoch": 3.5604447598769813, "grad_norm": 0.4786475598812103, "learning_rate": 3.910055063442663e-06, "loss": 0.0764, "step": 30100 }, { "epoch": 3.5616276318902296, "grad_norm": 0.4265531003475189, "learning_rate": 3.909336844625329e-06, "loss": 0.072, "step": 30110 }, { "epoch": 3.562810503903478, "grad_norm": 0.5767152905464172, "learning_rate": 3.908618625807997e-06, "loss": 0.0821, "step": 30120 }, { "epoch": 3.5639933759167257, "grad_norm": 0.5646173357963562, "learning_rate": 3.907900406990663e-06, "loss": 0.0788, "step": 30130 }, { "epoch": 3.565176247929974, "grad_norm": 0.47844359278678894, "learning_rate": 3.907182188173331e-06, "loss": 0.0776, "step": 30140 }, { "epoch": 3.5663591199432223, "grad_norm": 0.41242194175720215, "learning_rate": 3.906463969355997e-06, "loss": 0.0778, "step": 30150 }, { "epoch": 3.56754199195647, "grad_norm": 0.43130218982696533, "learning_rate": 3.9057457505386645e-06, "loss": 0.0684, "step": 30160 }, { "epoch": 3.5687248639697184, "grad_norm": 0.5441617369651794, "learning_rate": 3.9050275317213315e-06, "loss": 0.0864, "step": 30170 }, { "epoch": 3.5699077359829667, "grad_norm": 0.42717763781547546, "learning_rate": 3.904309312903998e-06, "loss": 0.0769, "step": 30180 }, { "epoch": 3.571090607996215, "grad_norm": 0.44450244307518005, "learning_rate": 3.903591094086665e-06, "loss": 0.0823, "step": 30190 }, { "epoch": 3.572273480009463, "grad_norm": 0.52937251329422, "learning_rate": 3.902872875269332e-06, "loss": 0.0779, "step": 30200 }, { "epoch": 3.573456352022711, "grad_norm": 0.5183858275413513, "learning_rate": 3.902154656451999e-06, "loss": 0.0751, "step": 30210 }, { "epoch": 3.5746392240359595, "grad_norm": 0.41199734807014465, "learning_rate": 3.901436437634666e-06, "loss": 0.0791, "step": 30220 }, { "epoch": 3.5758220960492073, "grad_norm": 0.5112448930740356, "learning_rate": 3.900718218817333e-06, "loss": 0.0856, "step": 30230 }, { "epoch": 3.5770049680624556, "grad_norm": 0.5140843391418457, "learning_rate": 3.9e-06, "loss": 0.0791, "step": 30240 }, { "epoch": 3.578187840075704, "grad_norm": 0.5423294901847839, "learning_rate": 3.899281781182667e-06, "loss": 0.0724, "step": 30250 }, { "epoch": 3.579370712088952, "grad_norm": 0.43991079926490784, "learning_rate": 3.898563562365334e-06, "loss": 0.075, "step": 30260 }, { "epoch": 3.5805535841022, "grad_norm": 0.5899460911750793, "learning_rate": 3.897845343548001e-06, "loss": 0.0837, "step": 30270 }, { "epoch": 3.5817364561154483, "grad_norm": 0.35619139671325684, "learning_rate": 3.8971271247306685e-06, "loss": 0.0822, "step": 30280 }, { "epoch": 3.5829193281286966, "grad_norm": 0.634025514125824, "learning_rate": 3.8964089059133345e-06, "loss": 0.0786, "step": 30290 }, { "epoch": 3.5841022001419445, "grad_norm": 0.5344125628471375, "learning_rate": 3.895690687096002e-06, "loss": 0.0779, "step": 30300 }, { "epoch": 3.5852850721551928, "grad_norm": 0.4119875431060791, "learning_rate": 3.894972468278668e-06, "loss": 0.0748, "step": 30310 }, { "epoch": 3.586467944168441, "grad_norm": 0.3985072672367096, "learning_rate": 3.894254249461336e-06, "loss": 0.0784, "step": 30320 }, { "epoch": 3.5876508161816894, "grad_norm": 0.4354095160961151, "learning_rate": 3.893536030644002e-06, "loss": 0.0773, "step": 30330 }, { "epoch": 3.588833688194937, "grad_norm": 0.5883209109306335, "learning_rate": 3.89281781182667e-06, "loss": 0.0743, "step": 30340 }, { "epoch": 3.5900165602081855, "grad_norm": 0.5142048001289368, "learning_rate": 3.892099593009337e-06, "loss": 0.0717, "step": 30350 }, { "epoch": 3.591199432221434, "grad_norm": 0.4442501664161682, "learning_rate": 3.891381374192004e-06, "loss": 0.0748, "step": 30360 }, { "epoch": 3.5923823042346816, "grad_norm": 0.7536374926567078, "learning_rate": 3.890663155374672e-06, "loss": 0.0808, "step": 30370 }, { "epoch": 3.59356517624793, "grad_norm": 0.51649010181427, "learning_rate": 3.889944936557338e-06, "loss": 0.0746, "step": 30380 }, { "epoch": 3.594748048261178, "grad_norm": 0.43018174171447754, "learning_rate": 3.8892267177400055e-06, "loss": 0.0832, "step": 30390 }, { "epoch": 3.5959309202744265, "grad_norm": 0.44724634289741516, "learning_rate": 3.8885084989226715e-06, "loss": 0.0788, "step": 30400 }, { "epoch": 3.5971137922876744, "grad_norm": 0.42644980549812317, "learning_rate": 3.887790280105339e-06, "loss": 0.0708, "step": 30410 }, { "epoch": 3.5982966643009227, "grad_norm": 0.5213482975959778, "learning_rate": 3.887072061288005e-06, "loss": 0.0813, "step": 30420 }, { "epoch": 3.599479536314171, "grad_norm": 0.49189493060112, "learning_rate": 3.886353842470673e-06, "loss": 0.0791, "step": 30430 }, { "epoch": 3.600662408327419, "grad_norm": 0.4207351505756378, "learning_rate": 3.88563562365334e-06, "loss": 0.0772, "step": 30440 }, { "epoch": 3.601845280340667, "grad_norm": 0.3949311375617981, "learning_rate": 3.884917404836007e-06, "loss": 0.0789, "step": 30450 }, { "epoch": 3.6030281523539154, "grad_norm": 0.6785097122192383, "learning_rate": 3.884199186018674e-06, "loss": 0.0776, "step": 30460 }, { "epoch": 3.6042110243671637, "grad_norm": 0.4071280360221863, "learning_rate": 3.883480967201341e-06, "loss": 0.0785, "step": 30470 }, { "epoch": 3.6053938963804115, "grad_norm": 0.4018535912036896, "learning_rate": 3.882762748384008e-06, "loss": 0.0772, "step": 30480 }, { "epoch": 3.60657676839366, "grad_norm": 0.4343741238117218, "learning_rate": 3.882044529566675e-06, "loss": 0.0704, "step": 30490 }, { "epoch": 3.607759640406908, "grad_norm": 0.37204262614250183, "learning_rate": 3.881326310749342e-06, "loss": 0.0801, "step": 30500 }, { "epoch": 3.608942512420156, "grad_norm": 0.4498167932033539, "learning_rate": 3.8806080919320085e-06, "loss": 0.0733, "step": 30510 }, { "epoch": 3.6101253844334042, "grad_norm": 0.6283396482467651, "learning_rate": 3.8798898731146755e-06, "loss": 0.0813, "step": 30520 }, { "epoch": 3.6113082564466525, "grad_norm": 0.48989683389663696, "learning_rate": 3.879171654297342e-06, "loss": 0.0825, "step": 30530 }, { "epoch": 3.612491128459901, "grad_norm": 0.46587517857551575, "learning_rate": 3.878453435480009e-06, "loss": 0.0806, "step": 30540 }, { "epoch": 3.6136740004731487, "grad_norm": 0.4203934967517853, "learning_rate": 3.877735216662677e-06, "loss": 0.0826, "step": 30550 }, { "epoch": 3.614856872486397, "grad_norm": 0.4773674011230469, "learning_rate": 3.877016997845343e-06, "loss": 0.0738, "step": 30560 }, { "epoch": 3.6160397444996453, "grad_norm": 0.5118473768234253, "learning_rate": 3.876298779028011e-06, "loss": 0.0787, "step": 30570 }, { "epoch": 3.617222616512893, "grad_norm": 0.4816795289516449, "learning_rate": 3.875580560210677e-06, "loss": 0.0774, "step": 30580 }, { "epoch": 3.6184054885261414, "grad_norm": 0.5176694393157959, "learning_rate": 3.874862341393345e-06, "loss": 0.0806, "step": 30590 }, { "epoch": 3.6195883605393897, "grad_norm": 0.739837646484375, "learning_rate": 3.874144122576012e-06, "loss": 0.0833, "step": 30600 }, { "epoch": 3.620771232552638, "grad_norm": 0.42033928632736206, "learning_rate": 3.873425903758679e-06, "loss": 0.0822, "step": 30610 }, { "epoch": 3.621954104565886, "grad_norm": 0.4167430102825165, "learning_rate": 3.8727076849413455e-06, "loss": 0.0837, "step": 30620 }, { "epoch": 3.623136976579134, "grad_norm": 0.4307820796966553, "learning_rate": 3.8719894661240125e-06, "loss": 0.0778, "step": 30630 }, { "epoch": 3.6243198485923824, "grad_norm": 0.5003849267959595, "learning_rate": 3.87127124730668e-06, "loss": 0.0774, "step": 30640 }, { "epoch": 3.6255027206056303, "grad_norm": 0.4322277307510376, "learning_rate": 3.870553028489346e-06, "loss": 0.0788, "step": 30650 }, { "epoch": 3.6266855926188786, "grad_norm": 0.5024195909500122, "learning_rate": 3.869834809672014e-06, "loss": 0.0849, "step": 30660 }, { "epoch": 3.627868464632127, "grad_norm": 0.5118405222892761, "learning_rate": 3.86911659085468e-06, "loss": 0.0796, "step": 30670 }, { "epoch": 3.629051336645375, "grad_norm": 0.5001336932182312, "learning_rate": 3.868398372037348e-06, "loss": 0.0767, "step": 30680 }, { "epoch": 3.630234208658623, "grad_norm": 0.41827765107154846, "learning_rate": 3.867680153220014e-06, "loss": 0.0696, "step": 30690 }, { "epoch": 3.6314170806718713, "grad_norm": 0.36211642622947693, "learning_rate": 3.866961934402682e-06, "loss": 0.0757, "step": 30700 }, { "epoch": 3.6325999526851196, "grad_norm": 0.49040526151657104, "learning_rate": 3.866243715585349e-06, "loss": 0.0759, "step": 30710 }, { "epoch": 3.6337828246983674, "grad_norm": 0.5257058143615723, "learning_rate": 3.865525496768016e-06, "loss": 0.0817, "step": 30720 }, { "epoch": 3.6349656967116157, "grad_norm": 0.4138934314250946, "learning_rate": 3.8648072779506825e-06, "loss": 0.0726, "step": 30730 }, { "epoch": 3.636148568724864, "grad_norm": 0.4790090322494507, "learning_rate": 3.8640890591333495e-06, "loss": 0.0734, "step": 30740 }, { "epoch": 3.6373314407381123, "grad_norm": 0.6922604441642761, "learning_rate": 3.863370840316016e-06, "loss": 0.078, "step": 30750 }, { "epoch": 3.63851431275136, "grad_norm": 0.4742043614387512, "learning_rate": 3.862652621498683e-06, "loss": 0.0781, "step": 30760 }, { "epoch": 3.6396971847646085, "grad_norm": 0.5308073163032532, "learning_rate": 3.86193440268135e-06, "loss": 0.0832, "step": 30770 }, { "epoch": 3.6408800567778568, "grad_norm": 0.5203481912612915, "learning_rate": 3.861216183864017e-06, "loss": 0.0776, "step": 30780 }, { "epoch": 3.6420629287911046, "grad_norm": 0.47670114040374756, "learning_rate": 3.860497965046684e-06, "loss": 0.0755, "step": 30790 }, { "epoch": 3.643245800804353, "grad_norm": 0.4815932810306549, "learning_rate": 3.859779746229351e-06, "loss": 0.0774, "step": 30800 }, { "epoch": 3.644428672817601, "grad_norm": 0.44921591877937317, "learning_rate": 3.859061527412018e-06, "loss": 0.0757, "step": 30810 }, { "epoch": 3.6456115448308495, "grad_norm": 0.7247675657272339, "learning_rate": 3.858343308594686e-06, "loss": 0.0775, "step": 30820 }, { "epoch": 3.6467944168440973, "grad_norm": 0.7362940907478333, "learning_rate": 3.857625089777352e-06, "loss": 0.0798, "step": 30830 }, { "epoch": 3.6479772888573456, "grad_norm": 0.3934761583805084, "learning_rate": 3.8569068709600195e-06, "loss": 0.0718, "step": 30840 }, { "epoch": 3.649160160870594, "grad_norm": 0.552147626876831, "learning_rate": 3.8561886521426865e-06, "loss": 0.076, "step": 30850 }, { "epoch": 3.6503430328838418, "grad_norm": 0.43012911081314087, "learning_rate": 3.855470433325353e-06, "loss": 0.0765, "step": 30860 }, { "epoch": 3.65152590489709, "grad_norm": 0.5156548023223877, "learning_rate": 3.85475221450802e-06, "loss": 0.0778, "step": 30870 }, { "epoch": 3.6527087769103384, "grad_norm": 0.41516801714897156, "learning_rate": 3.854033995690687e-06, "loss": 0.0739, "step": 30880 }, { "epoch": 3.6538916489235866, "grad_norm": 0.40510043501853943, "learning_rate": 3.853315776873354e-06, "loss": 0.078, "step": 30890 }, { "epoch": 3.6550745209368345, "grad_norm": 0.4382188618183136, "learning_rate": 3.852597558056021e-06, "loss": 0.0744, "step": 30900 }, { "epoch": 3.656257392950083, "grad_norm": 0.39627647399902344, "learning_rate": 3.851879339238688e-06, "loss": 0.0764, "step": 30910 }, { "epoch": 3.657440264963331, "grad_norm": 0.3530038297176361, "learning_rate": 3.851161120421355e-06, "loss": 0.0752, "step": 30920 }, { "epoch": 3.658623136976579, "grad_norm": 0.5604034066200256, "learning_rate": 3.850442901604023e-06, "loss": 0.0845, "step": 30930 }, { "epoch": 3.659806008989827, "grad_norm": 0.5860835313796997, "learning_rate": 3.849724682786689e-06, "loss": 0.0835, "step": 30940 }, { "epoch": 3.6609888810030755, "grad_norm": 0.47773945331573486, "learning_rate": 3.8490064639693565e-06, "loss": 0.0774, "step": 30950 }, { "epoch": 3.662171753016324, "grad_norm": 0.5268082022666931, "learning_rate": 3.848288245152023e-06, "loss": 0.0756, "step": 30960 }, { "epoch": 3.6633546250295717, "grad_norm": 0.4733009338378906, "learning_rate": 3.84757002633469e-06, "loss": 0.0747, "step": 30970 }, { "epoch": 3.66453749704282, "grad_norm": 0.4484284520149231, "learning_rate": 3.846851807517357e-06, "loss": 0.0841, "step": 30980 }, { "epoch": 3.6657203690560682, "grad_norm": 0.9101925492286682, "learning_rate": 3.846133588700024e-06, "loss": 0.0862, "step": 30990 }, { "epoch": 3.666903241069316, "grad_norm": 0.4211864769458771, "learning_rate": 3.845415369882691e-06, "loss": 0.0765, "step": 31000 }, { "epoch": 3.6680861130825644, "grad_norm": 0.48594987392425537, "learning_rate": 3.844697151065358e-06, "loss": 0.0753, "step": 31010 }, { "epoch": 3.6692689850958127, "grad_norm": 0.552697479724884, "learning_rate": 3.843978932248025e-06, "loss": 0.0755, "step": 31020 }, { "epoch": 3.670451857109061, "grad_norm": 0.4896167814731598, "learning_rate": 3.843260713430692e-06, "loss": 0.0743, "step": 31030 }, { "epoch": 3.671634729122309, "grad_norm": 0.5095158219337463, "learning_rate": 3.842542494613359e-06, "loss": 0.078, "step": 31040 }, { "epoch": 3.672817601135557, "grad_norm": 0.4982917904853821, "learning_rate": 3.841824275796026e-06, "loss": 0.0769, "step": 31050 }, { "epoch": 3.6740004731488054, "grad_norm": 0.46891486644744873, "learning_rate": 3.841106056978693e-06, "loss": 0.0886, "step": 31060 }, { "epoch": 3.6751833451620533, "grad_norm": 0.3970227539539337, "learning_rate": 3.84038783816136e-06, "loss": 0.0775, "step": 31070 }, { "epoch": 3.6763662171753015, "grad_norm": 0.3879355788230896, "learning_rate": 3.8396696193440265e-06, "loss": 0.0747, "step": 31080 }, { "epoch": 3.67754908918855, "grad_norm": 0.5032815337181091, "learning_rate": 3.838951400526694e-06, "loss": 0.0756, "step": 31090 }, { "epoch": 3.678731961201798, "grad_norm": 0.5693585276603699, "learning_rate": 3.83823318170936e-06, "loss": 0.0771, "step": 31100 }, { "epoch": 3.679914833215046, "grad_norm": 0.5337594151496887, "learning_rate": 3.837514962892028e-06, "loss": 0.0751, "step": 31110 }, { "epoch": 3.6810977052282943, "grad_norm": 0.5127356648445129, "learning_rate": 3.836796744074695e-06, "loss": 0.0717, "step": 31120 }, { "epoch": 3.6822805772415426, "grad_norm": 0.45233476161956787, "learning_rate": 3.836078525257362e-06, "loss": 0.0756, "step": 31130 }, { "epoch": 3.6834634492547904, "grad_norm": 0.5893577337265015, "learning_rate": 3.835360306440029e-06, "loss": 0.0744, "step": 31140 }, { "epoch": 3.6846463212680387, "grad_norm": 0.4174349308013916, "learning_rate": 3.834642087622696e-06, "loss": 0.0733, "step": 31150 }, { "epoch": 3.685829193281287, "grad_norm": 0.5071923732757568, "learning_rate": 3.833923868805363e-06, "loss": 0.0781, "step": 31160 }, { "epoch": 3.6870120652945353, "grad_norm": 0.6108683347702026, "learning_rate": 3.83320564998803e-06, "loss": 0.0847, "step": 31170 }, { "epoch": 3.688194937307783, "grad_norm": 0.3468452990055084, "learning_rate": 3.832487431170697e-06, "loss": 0.0745, "step": 31180 }, { "epoch": 3.6893778093210314, "grad_norm": 0.40279433131217957, "learning_rate": 3.8317692123533635e-06, "loss": 0.0761, "step": 31190 }, { "epoch": 3.6905606813342797, "grad_norm": 0.5498315095901489, "learning_rate": 3.831050993536031e-06, "loss": 0.0716, "step": 31200 }, { "epoch": 3.6917435533475276, "grad_norm": 0.41828954219818115, "learning_rate": 3.830332774718697e-06, "loss": 0.0853, "step": 31210 }, { "epoch": 3.692926425360776, "grad_norm": 0.5140865445137024, "learning_rate": 3.829614555901365e-06, "loss": 0.0715, "step": 31220 }, { "epoch": 3.694109297374024, "grad_norm": 0.5079203844070435, "learning_rate": 3.828896337084031e-06, "loss": 0.0769, "step": 31230 }, { "epoch": 3.6952921693872725, "grad_norm": 0.4573640823364258, "learning_rate": 3.828178118266699e-06, "loss": 0.0799, "step": 31240 }, { "epoch": 3.6964750414005203, "grad_norm": 0.42878609895706177, "learning_rate": 3.827459899449365e-06, "loss": 0.0755, "step": 31250 }, { "epoch": 3.6976579134137686, "grad_norm": 0.47296833992004395, "learning_rate": 3.826741680632033e-06, "loss": 0.0782, "step": 31260 }, { "epoch": 3.698840785427017, "grad_norm": 0.5116195678710938, "learning_rate": 3.8260234618147e-06, "loss": 0.0806, "step": 31270 }, { "epoch": 3.7000236574402647, "grad_norm": 0.4853268563747406, "learning_rate": 3.825305242997367e-06, "loss": 0.0718, "step": 31280 }, { "epoch": 3.701206529453513, "grad_norm": 0.44282805919647217, "learning_rate": 3.824587024180034e-06, "loss": 0.0879, "step": 31290 }, { "epoch": 3.7023894014667613, "grad_norm": 0.5223445892333984, "learning_rate": 3.8238688053627005e-06, "loss": 0.0751, "step": 31300 }, { "epoch": 3.7035722734800096, "grad_norm": 0.4828682243824005, "learning_rate": 3.8231505865453674e-06, "loss": 0.0876, "step": 31310 }, { "epoch": 3.704755145493258, "grad_norm": 0.41710907220840454, "learning_rate": 3.822432367728034e-06, "loss": 0.0705, "step": 31320 }, { "epoch": 3.7059380175065058, "grad_norm": 0.6334251165390015, "learning_rate": 3.821714148910701e-06, "loss": 0.0772, "step": 31330 }, { "epoch": 3.707120889519754, "grad_norm": 0.38305845856666565, "learning_rate": 3.820995930093368e-06, "loss": 0.0776, "step": 31340 }, { "epoch": 3.708303761533002, "grad_norm": 0.4634537994861603, "learning_rate": 3.820277711276035e-06, "loss": 0.0749, "step": 31350 }, { "epoch": 3.70948663354625, "grad_norm": 0.49320468306541443, "learning_rate": 3.819559492458703e-06, "loss": 0.0738, "step": 31360 }, { "epoch": 3.7106695055594985, "grad_norm": 0.4159335792064667, "learning_rate": 3.81884127364137e-06, "loss": 0.0719, "step": 31370 }, { "epoch": 3.7118523775727468, "grad_norm": 0.5903071761131287, "learning_rate": 3.818123054824037e-06, "loss": 0.0805, "step": 31380 }, { "epoch": 3.713035249585995, "grad_norm": 0.39124569296836853, "learning_rate": 3.817404836006704e-06, "loss": 0.0806, "step": 31390 }, { "epoch": 3.714218121599243, "grad_norm": 0.512255847454071, "learning_rate": 3.816686617189371e-06, "loss": 0.0828, "step": 31400 }, { "epoch": 3.715400993612491, "grad_norm": 0.36794641613960266, "learning_rate": 3.8159683983720375e-06, "loss": 0.0756, "step": 31410 }, { "epoch": 3.716583865625739, "grad_norm": 0.4124870300292969, "learning_rate": 3.8152501795547044e-06, "loss": 0.0733, "step": 31420 }, { "epoch": 3.7177667376389874, "grad_norm": 0.41881272196769714, "learning_rate": 3.814531960737372e-06, "loss": 0.0805, "step": 31430 }, { "epoch": 3.7189496096522356, "grad_norm": 0.6654837727546692, "learning_rate": 3.8138137419200383e-06, "loss": 0.0769, "step": 31440 }, { "epoch": 3.720132481665484, "grad_norm": 0.4225428104400635, "learning_rate": 3.8130955231027056e-06, "loss": 0.0766, "step": 31450 }, { "epoch": 3.7213153536787322, "grad_norm": 0.5160561800003052, "learning_rate": 3.812377304285372e-06, "loss": 0.0787, "step": 31460 }, { "epoch": 3.72249822569198, "grad_norm": 0.5828892588615417, "learning_rate": 3.8116590854680395e-06, "loss": 0.072, "step": 31470 }, { "epoch": 3.7236810977052284, "grad_norm": 0.48425573110580444, "learning_rate": 3.810940866650706e-06, "loss": 0.0667, "step": 31480 }, { "epoch": 3.724863969718476, "grad_norm": 0.3661248981952667, "learning_rate": 3.8102226478333733e-06, "loss": 0.0714, "step": 31490 }, { "epoch": 3.7260468417317245, "grad_norm": 0.49171483516693115, "learning_rate": 3.8095044290160402e-06, "loss": 0.0767, "step": 31500 }, { "epoch": 3.727229713744973, "grad_norm": 0.38192152976989746, "learning_rate": 3.8087862101987076e-06, "loss": 0.0741, "step": 31510 }, { "epoch": 3.728412585758221, "grad_norm": 0.4182201027870178, "learning_rate": 3.808067991381374e-06, "loss": 0.083, "step": 31520 }, { "epoch": 3.7295954577714694, "grad_norm": 0.4035213887691498, "learning_rate": 3.8073497725640414e-06, "loss": 0.0739, "step": 31530 }, { "epoch": 3.7307783297847172, "grad_norm": 0.5739060044288635, "learning_rate": 3.806631553746708e-06, "loss": 0.0712, "step": 31540 }, { "epoch": 3.7319612017979655, "grad_norm": 0.3951142430305481, "learning_rate": 3.8059133349293753e-06, "loss": 0.0704, "step": 31550 }, { "epoch": 3.7331440738112134, "grad_norm": 0.550438404083252, "learning_rate": 3.8051951161120418e-06, "loss": 0.0809, "step": 31560 }, { "epoch": 3.7343269458244617, "grad_norm": 0.4984144866466522, "learning_rate": 3.804476897294709e-06, "loss": 0.0805, "step": 31570 }, { "epoch": 3.73550981783771, "grad_norm": 0.6257160902023315, "learning_rate": 3.803758678477376e-06, "loss": 0.0823, "step": 31580 }, { "epoch": 3.7366926898509583, "grad_norm": 0.49435117840766907, "learning_rate": 3.8030404596600434e-06, "loss": 0.0754, "step": 31590 }, { "epoch": 3.7378755618642066, "grad_norm": 0.4900533854961395, "learning_rate": 3.80232224084271e-06, "loss": 0.0757, "step": 31600 }, { "epoch": 3.7390584338774544, "grad_norm": 0.3943498730659485, "learning_rate": 3.8016040220253772e-06, "loss": 0.079, "step": 31610 }, { "epoch": 3.7402413058907027, "grad_norm": 0.5789697766304016, "learning_rate": 3.8008858032080446e-06, "loss": 0.0855, "step": 31620 }, { "epoch": 3.7414241779039505, "grad_norm": 0.3551311194896698, "learning_rate": 3.800167584390711e-06, "loss": 0.0781, "step": 31630 }, { "epoch": 3.742607049917199, "grad_norm": 0.47470545768737793, "learning_rate": 3.7994493655733784e-06, "loss": 0.0788, "step": 31640 }, { "epoch": 3.743789921930447, "grad_norm": 0.4434899687767029, "learning_rate": 3.798731146756045e-06, "loss": 0.0761, "step": 31650 }, { "epoch": 3.7449727939436954, "grad_norm": 0.4540637135505676, "learning_rate": 3.7980129279387123e-06, "loss": 0.0792, "step": 31660 }, { "epoch": 3.7461556659569437, "grad_norm": 0.6423723101615906, "learning_rate": 3.7972947091213788e-06, "loss": 0.0774, "step": 31670 }, { "epoch": 3.7473385379701916, "grad_norm": 0.4968102276325226, "learning_rate": 3.796576490304046e-06, "loss": 0.0831, "step": 31680 }, { "epoch": 3.74852140998344, "grad_norm": 0.4431005120277405, "learning_rate": 3.795858271486713e-06, "loss": 0.0733, "step": 31690 }, { "epoch": 3.7497042819966877, "grad_norm": 0.6439856886863708, "learning_rate": 3.7951400526693804e-06, "loss": 0.0855, "step": 31700 }, { "epoch": 3.750887154009936, "grad_norm": 0.4775540232658386, "learning_rate": 3.794421833852047e-06, "loss": 0.0773, "step": 31710 }, { "epoch": 3.750887154009936, "eval_accuracy": 0.6846325315234388, "eval_animal_abuse/accuracy": 0.9947266859633364, "eval_animal_abuse/f1": 0.7717782577393808, "eval_animal_abuse/fpr": 0.002776562447413585, "eval_animal_abuse/precision": 0.7646219686162625, "eval_animal_abuse/recall": 0.7790697674418605, "eval_animal_abuse/threshold": 0.5, "eval_child_abuse/accuracy": 0.9962903816082777, "eval_child_abuse/f1": 0.6656671664167916, "eval_child_abuse/fpr": 0.0018735049597698233, "eval_child_abuse/precision": 0.6646706586826348, "eval_child_abuse/recall": 0.6666666666666666, "eval_child_abuse/threshold": 0.5, "eval_controversial_topics,politics/accuracy": 0.9727018664537379, "eval_controversial_topics,politics/f1": 0.47555129434324067, "eval_controversial_topics,politics/fpr": 0.009318369028006573, "eval_controversial_topics,politics/precision": 0.578088578088578, "eval_controversial_topics,politics/recall": 0.40390879478827363, "eval_controversial_topics,politics/threshold": 0.5, "eval_discrimination,stereotype,injustice/accuracy": 0.9533885617327078, "eval_discrimination,stereotype,injustice/f1": 0.7239408866995074, "eval_discrimination,stereotype,injustice/fpr": 0.03065023312972127, "eval_discrimination,stereotype,injustice/precision": 0.684171322160149, "eval_discrimination,stereotype,injustice/recall": 0.7686192468619247, "eval_discrimination,stereotype,injustice/threshold": 0.5, "eval_drug_abuse,weapons,banned_substance/accuracy": 0.973849685597365, "eval_drug_abuse,weapons,banned_substance/f1": 0.7708454810495626, "eval_drug_abuse,weapons,banned_substance/fpr": 0.014631222676632324, "eval_drug_abuse,weapons,banned_substance/precision": 0.7610823258491652, "eval_drug_abuse,weapons,banned_substance/recall": 0.780862374483166, "eval_drug_abuse,weapons,banned_substance/threshold": 0.5, "eval_financial_crime,property_crime,theft/accuracy": 0.9605416375553115, "eval_financial_crime,property_crime,theft/f1": 0.8049342105263158, "eval_financial_crime,property_crime,theft/fpr": 0.026076700514162457, "eval_financial_crime,property_crime,theft/precision": 0.7757172293548898, "eval_financial_crime,property_crime,theft/recall": 0.8364382156896257, "eval_financial_crime,property_crime,theft/threshold": 0.5, "eval_flagged/accuracy": 0.8544598596000932, "eval_flagged/aucpr": 0.9085423384933629, "eval_flagged/f1": 0.8668077397354119, "eval_flagged/fpr": 0.14128192626486089, "eval_flagged/precision": 0.8831430698597841, "eval_flagged/recall": 0.8510657379450539, "eval_hate_speech,offensive_language/accuracy": 0.9477825464949928, "eval_hate_speech,offensive_language/f1": 0.6965683905268245, "eval_hate_speech,offensive_language/fpr": 0.024812716974237117, "eval_hate_speech,offensive_language/precision": 0.7262648659544446, "eval_hate_speech,offensive_language/recall": 0.6692050520059435, "eval_hate_speech,offensive_language/threshold": 0.5, "eval_loss": 0.08325859159231186, "eval_macro_f1": 0.6466683335916256, "eval_macro_precision": 0.6958934110236145, "eval_macro_recall": 0.6305090560186043, "eval_micro_f1": 0.7526256699985514, "eval_micro_precision": 0.7666894172769364, "eval_micro_recall": 0.7390685847395843, "eval_misinformation_regarding_ethics,laws_and_safety/accuracy": 0.987523704960575, "eval_misinformation_regarding_ethics,laws_and_safety/f1": 0.10501193317422435, "eval_misinformation_regarding_ethics,laws_and_safety/fpr": 0.0010609096879578312, "eval_misinformation_regarding_ethics,laws_and_safety/precision": 0.411214953271028, "eval_misinformation_regarding_ethics,laws_and_safety/recall": 0.060191518467852256, "eval_misinformation_regarding_ethics,laws_and_safety/threshold": 0.5, "eval_non_violent_unethical_behavior/accuracy": 0.883188608310876, "eval_non_violent_unethical_behavior/f1": 0.6955956303103867, "eval_non_violent_unethical_behavior/fpr": 0.06433731938216229, "eval_non_violent_unethical_behavior/precision": 0.7213630641970868, "eval_non_violent_unethical_behavior/recall": 0.6716055583458899, "eval_non_violent_unethical_behavior/threshold": 0.5, "eval_privacy_violation/accuracy": 0.981069301660179, "eval_privacy_violation/f1": 0.8065283917035022, "eval_privacy_violation/fpr": 0.009519143277105043, "eval_privacy_violation/precision": 0.813443072702332, "eval_privacy_violation/recall": 0.7997302764666218, "eval_privacy_violation/threshold": 0.5, "eval_runtime": 598.5049, "eval_samples_per_second": 100.44, "eval_self_harm/accuracy": 0.996390191968593, "eval_self_harm/f1": 0.7284105131414268, "eval_self_harm/fpr": 0.0016414310598954816, "eval_self_harm/precision": 0.7480719794344473, "eval_self_harm/recall": 0.7097560975609756, "eval_self_harm/threshold": 0.5, "eval_sexually_explicit,adult_content/accuracy": 0.9843630435505872, "eval_sexually_explicit,adult_content/f1": 0.6608946608946609, "eval_sexually_explicit,adult_content/fpr": 0.006971551298003977, "eval_sexually_explicit,adult_content/precision": 0.6913207547169812, "eval_sexually_explicit,adult_content/recall": 0.6330338631651693, "eval_sexually_explicit,adult_content/threshold": 0.5, "eval_steps_per_second": 1.571, "eval_terrorism,organized_crime/accuracy": 0.9922646970755564, "eval_terrorism,organized_crime/f1": 0.29652042360060515, "eval_terrorism,organized_crime/fpr": 0.0013750775577281013, "eval_terrorism,organized_crime/precision": 0.5444444444444444, "eval_terrorism,organized_crime/recall": 0.20374220374220375, "eval_terrorism,organized_crime/threshold": 0.5, "eval_violence,aiding_and_abetting,incitement/accuracy": 0.9214159763116745, "eval_violence,aiding_and_abetting,incitement/f1": 0.8511094301563288, "eval_violence,aiding_and_abetting,incitement/fpr": 0.05063233760935576, "eval_violence,aiding_and_abetting,incitement/precision": 0.8580325368581596, "eval_violence,aiding_and_abetting,incitement/recall": 0.8442971485742872, "eval_violence,aiding_and_abetting,incitement/threshold": 0.5, "step": 31710 }, { "epoch": 3.7520700260231843, "grad_norm": 0.5144846439361572, "learning_rate": 3.7937036150347142e-06, "loss": 0.0774, "step": 31720 }, { "epoch": 3.7532528980364326, "grad_norm": 0.46503472328186035, "learning_rate": 3.7929853962173807e-06, "loss": 0.0789, "step": 31730 }, { "epoch": 3.754435770049681, "grad_norm": 0.39554598927497864, "learning_rate": 3.792267177400048e-06, "loss": 0.0725, "step": 31740 }, { "epoch": 3.7556186420629287, "grad_norm": 0.5135816931724548, "learning_rate": 3.7915489585827146e-06, "loss": 0.0744, "step": 31750 }, { "epoch": 3.756801514076177, "grad_norm": 0.45230111479759216, "learning_rate": 3.790830739765382e-06, "loss": 0.0775, "step": 31760 }, { "epoch": 3.757984386089425, "grad_norm": 0.4054371416568756, "learning_rate": 3.790112520948049e-06, "loss": 0.0711, "step": 31770 }, { "epoch": 3.759167258102673, "grad_norm": 0.4274665415287018, "learning_rate": 3.789394302130716e-06, "loss": 0.0732, "step": 31780 }, { "epoch": 3.7603501301159215, "grad_norm": 0.5920975804328918, "learning_rate": 3.7886760833133827e-06, "loss": 0.0799, "step": 31790 }, { "epoch": 3.7615330021291697, "grad_norm": 0.6265707612037659, "learning_rate": 3.78795786449605e-06, "loss": 0.0774, "step": 31800 }, { "epoch": 3.762715874142418, "grad_norm": 0.38968726992607117, "learning_rate": 3.7872396456787165e-06, "loss": 0.0822, "step": 31810 }, { "epoch": 3.763898746155666, "grad_norm": 0.5241072773933411, "learning_rate": 3.786521426861384e-06, "loss": 0.0752, "step": 31820 }, { "epoch": 3.765081618168914, "grad_norm": 0.4375741183757782, "learning_rate": 3.7858032080440504e-06, "loss": 0.0801, "step": 31830 }, { "epoch": 3.766264490182162, "grad_norm": 0.5752775073051453, "learning_rate": 3.7850849892267177e-06, "loss": 0.0742, "step": 31840 }, { "epoch": 3.7674473621954103, "grad_norm": 0.7783891558647156, "learning_rate": 3.7843667704093847e-06, "loss": 0.0781, "step": 31850 }, { "epoch": 3.7686302342086586, "grad_norm": 0.4913389980792999, "learning_rate": 3.783648551592052e-06, "loss": 0.0853, "step": 31860 }, { "epoch": 3.769813106221907, "grad_norm": 0.5085858702659607, "learning_rate": 3.782930332774719e-06, "loss": 0.0803, "step": 31870 }, { "epoch": 3.770995978235155, "grad_norm": 0.5070111155509949, "learning_rate": 3.782212113957386e-06, "loss": 0.0826, "step": 31880 }, { "epoch": 3.772178850248403, "grad_norm": 0.5161232948303223, "learning_rate": 3.781493895140053e-06, "loss": 0.076, "step": 31890 }, { "epoch": 3.7733617222616513, "grad_norm": 0.3266914486885071, "learning_rate": 3.7807756763227197e-06, "loss": 0.0653, "step": 31900 }, { "epoch": 3.774544594274899, "grad_norm": 0.4774837791919708, "learning_rate": 3.780057457505387e-06, "loss": 0.0801, "step": 31910 }, { "epoch": 3.7757274662881475, "grad_norm": 0.468374103307724, "learning_rate": 3.7793392386880535e-06, "loss": 0.0796, "step": 31920 }, { "epoch": 3.7769103383013958, "grad_norm": 0.48337483406066895, "learning_rate": 3.778621019870721e-06, "loss": 0.0761, "step": 31930 }, { "epoch": 3.778093210314644, "grad_norm": 0.47050148248672485, "learning_rate": 3.7779028010533874e-06, "loss": 0.076, "step": 31940 }, { "epoch": 3.7792760823278924, "grad_norm": 0.43520548939704895, "learning_rate": 3.7771845822360547e-06, "loss": 0.0714, "step": 31950 }, { "epoch": 3.78045895434114, "grad_norm": 0.4321243166923523, "learning_rate": 3.7764663634187217e-06, "loss": 0.0869, "step": 31960 }, { "epoch": 3.7816418263543885, "grad_norm": 0.422360360622406, "learning_rate": 3.775748144601389e-06, "loss": 0.0722, "step": 31970 }, { "epoch": 3.7828246983676364, "grad_norm": 0.39680516719818115, "learning_rate": 3.7750299257840555e-06, "loss": 0.0734, "step": 31980 }, { "epoch": 3.7840075703808846, "grad_norm": 0.39478591084480286, "learning_rate": 3.774311706966723e-06, "loss": 0.0752, "step": 31990 }, { "epoch": 3.785190442394133, "grad_norm": 0.5164887309074402, "learning_rate": 3.7735934881493894e-06, "loss": 0.0755, "step": 32000 }, { "epoch": 3.7863733144073812, "grad_norm": 0.4505905508995056, "learning_rate": 3.7728752693320567e-06, "loss": 0.0807, "step": 32010 }, { "epoch": 3.7875561864206295, "grad_norm": 0.5198802351951599, "learning_rate": 3.772157050514723e-06, "loss": 0.0831, "step": 32020 }, { "epoch": 3.7887390584338774, "grad_norm": 0.4362444579601288, "learning_rate": 3.7714388316973905e-06, "loss": 0.0808, "step": 32030 }, { "epoch": 3.7899219304471257, "grad_norm": 0.41880330443382263, "learning_rate": 3.7707206128800575e-06, "loss": 0.0781, "step": 32040 }, { "epoch": 3.7911048024603735, "grad_norm": 0.4280758798122406, "learning_rate": 3.770002394062725e-06, "loss": 0.0738, "step": 32050 }, { "epoch": 3.792287674473622, "grad_norm": 0.43940988183021545, "learning_rate": 3.7692841752453913e-06, "loss": 0.0834, "step": 32060 }, { "epoch": 3.79347054648687, "grad_norm": 0.5249049067497253, "learning_rate": 3.7685659564280587e-06, "loss": 0.0746, "step": 32070 }, { "epoch": 3.7946534185001184, "grad_norm": 0.5017831325531006, "learning_rate": 3.767847737610725e-06, "loss": 0.082, "step": 32080 }, { "epoch": 3.7958362905133667, "grad_norm": 0.4433688223361969, "learning_rate": 3.7671295187933925e-06, "loss": 0.07, "step": 32090 }, { "epoch": 3.7970191625266145, "grad_norm": 0.4332752823829651, "learning_rate": 3.766411299976059e-06, "loss": 0.0817, "step": 32100 }, { "epoch": 3.798202034539863, "grad_norm": 0.45500195026397705, "learning_rate": 3.7656930811587263e-06, "loss": 0.0811, "step": 32110 }, { "epoch": 3.799384906553111, "grad_norm": 0.46811944246292114, "learning_rate": 3.7649748623413937e-06, "loss": 0.0773, "step": 32120 }, { "epoch": 3.800567778566359, "grad_norm": 0.40255865454673767, "learning_rate": 3.76425664352406e-06, "loss": 0.0725, "step": 32130 }, { "epoch": 3.8017506505796073, "grad_norm": 0.47635936737060547, "learning_rate": 3.7635384247067275e-06, "loss": 0.0757, "step": 32140 }, { "epoch": 3.8029335225928556, "grad_norm": 0.5155047178268433, "learning_rate": 3.7628202058893945e-06, "loss": 0.072, "step": 32150 }, { "epoch": 3.804116394606104, "grad_norm": 0.4080367386341095, "learning_rate": 3.762101987072062e-06, "loss": 0.089, "step": 32160 }, { "epoch": 3.8052992666193517, "grad_norm": 0.48949307203292847, "learning_rate": 3.7613837682547283e-06, "loss": 0.08, "step": 32170 }, { "epoch": 3.8064821386326, "grad_norm": 0.4111352264881134, "learning_rate": 3.7606655494373957e-06, "loss": 0.0813, "step": 32180 }, { "epoch": 3.8076650106458483, "grad_norm": 0.4597035050392151, "learning_rate": 3.759947330620062e-06, "loss": 0.0739, "step": 32190 }, { "epoch": 3.808847882659096, "grad_norm": 0.5479676723480225, "learning_rate": 3.7592291118027295e-06, "loss": 0.0743, "step": 32200 }, { "epoch": 3.8100307546723444, "grad_norm": 0.47066301107406616, "learning_rate": 3.758510892985396e-06, "loss": 0.0737, "step": 32210 }, { "epoch": 3.8112136266855927, "grad_norm": 0.3738173842430115, "learning_rate": 3.7577926741680633e-06, "loss": 0.0751, "step": 32220 }, { "epoch": 3.812396498698841, "grad_norm": 0.47639569640159607, "learning_rate": 3.7570744553507303e-06, "loss": 0.077, "step": 32230 }, { "epoch": 3.813579370712089, "grad_norm": 0.3957284986972809, "learning_rate": 3.7563562365333976e-06, "loss": 0.0728, "step": 32240 }, { "epoch": 3.814762242725337, "grad_norm": 0.5583634376525879, "learning_rate": 3.755638017716064e-06, "loss": 0.081, "step": 32250 }, { "epoch": 3.8159451147385854, "grad_norm": 0.3913753628730774, "learning_rate": 3.7549197988987315e-06, "loss": 0.0802, "step": 32260 }, { "epoch": 3.8171279867518333, "grad_norm": 0.4483395516872406, "learning_rate": 3.754201580081398e-06, "loss": 0.0763, "step": 32270 }, { "epoch": 3.8183108587650816, "grad_norm": 0.4809946119785309, "learning_rate": 3.7534833612640653e-06, "loss": 0.0779, "step": 32280 }, { "epoch": 3.81949373077833, "grad_norm": 0.3851666748523712, "learning_rate": 3.752765142446732e-06, "loss": 0.0721, "step": 32290 }, { "epoch": 3.820676602791578, "grad_norm": 0.5840352773666382, "learning_rate": 3.752046923629399e-06, "loss": 0.0816, "step": 32300 }, { "epoch": 3.821859474804826, "grad_norm": 0.4927110970020294, "learning_rate": 3.751328704812066e-06, "loss": 0.0815, "step": 32310 }, { "epoch": 3.8230423468180743, "grad_norm": 0.46766406297683716, "learning_rate": 3.7506104859947334e-06, "loss": 0.0757, "step": 32320 }, { "epoch": 3.8242252188313226, "grad_norm": 0.43894124031066895, "learning_rate": 3.7498922671774e-06, "loss": 0.0788, "step": 32330 }, { "epoch": 3.8254080908445705, "grad_norm": 0.5377935171127319, "learning_rate": 3.7491740483600673e-06, "loss": 0.0722, "step": 32340 }, { "epoch": 3.8265909628578187, "grad_norm": 0.5577464699745178, "learning_rate": 3.7484558295427338e-06, "loss": 0.0788, "step": 32350 }, { "epoch": 3.827773834871067, "grad_norm": 0.46455809473991394, "learning_rate": 3.747737610725401e-06, "loss": 0.0766, "step": 32360 }, { "epoch": 3.8289567068843153, "grad_norm": 0.6855377554893494, "learning_rate": 3.7470193919080685e-06, "loss": 0.0826, "step": 32370 }, { "epoch": 3.830139578897563, "grad_norm": 0.5709244012832642, "learning_rate": 3.746301173090735e-06, "loss": 0.0754, "step": 32380 }, { "epoch": 3.8313224509108115, "grad_norm": 0.4403274357318878, "learning_rate": 3.7455829542734023e-06, "loss": 0.0813, "step": 32390 }, { "epoch": 3.8325053229240598, "grad_norm": 0.44979363679885864, "learning_rate": 3.744864735456069e-06, "loss": 0.0721, "step": 32400 }, { "epoch": 3.8336881949373076, "grad_norm": 0.5208628177642822, "learning_rate": 3.744146516638736e-06, "loss": 0.0747, "step": 32410 }, { "epoch": 3.834871066950556, "grad_norm": 0.40563929080963135, "learning_rate": 3.743428297821403e-06, "loss": 0.0737, "step": 32420 }, { "epoch": 3.836053938963804, "grad_norm": 0.4743156135082245, "learning_rate": 3.7427100790040704e-06, "loss": 0.0814, "step": 32430 }, { "epoch": 3.8372368109770525, "grad_norm": 0.4210798442363739, "learning_rate": 3.741991860186737e-06, "loss": 0.0783, "step": 32440 }, { "epoch": 3.8384196829903003, "grad_norm": 0.49941807985305786, "learning_rate": 3.7412736413694043e-06, "loss": 0.0806, "step": 32450 }, { "epoch": 3.8396025550035486, "grad_norm": 0.46047961711883545, "learning_rate": 3.7405554225520708e-06, "loss": 0.0722, "step": 32460 }, { "epoch": 3.840785427016797, "grad_norm": 0.39157742261886597, "learning_rate": 3.739837203734738e-06, "loss": 0.0737, "step": 32470 }, { "epoch": 3.8419682990300448, "grad_norm": 0.5988749861717224, "learning_rate": 3.7391189849174046e-06, "loss": 0.0832, "step": 32480 }, { "epoch": 3.843151171043293, "grad_norm": 0.38226205110549927, "learning_rate": 3.738400766100072e-06, "loss": 0.0743, "step": 32490 }, { "epoch": 3.8443340430565414, "grad_norm": 0.44242510199546814, "learning_rate": 3.737682547282739e-06, "loss": 0.0824, "step": 32500 }, { "epoch": 3.8455169150697897, "grad_norm": 0.377104252576828, "learning_rate": 3.7369643284654062e-06, "loss": 0.0722, "step": 32510 }, { "epoch": 3.8466997870830375, "grad_norm": 0.40601328015327454, "learning_rate": 3.7362461096480727e-06, "loss": 0.0766, "step": 32520 }, { "epoch": 3.847882659096286, "grad_norm": 0.4191611111164093, "learning_rate": 3.73552789083074e-06, "loss": 0.0738, "step": 32530 }, { "epoch": 3.849065531109534, "grad_norm": 0.5299749970436096, "learning_rate": 3.7348096720134066e-06, "loss": 0.0774, "step": 32540 }, { "epoch": 3.850248403122782, "grad_norm": 0.7024903297424316, "learning_rate": 3.734091453196074e-06, "loss": 0.0803, "step": 32550 }, { "epoch": 3.8514312751360302, "grad_norm": 0.40415510535240173, "learning_rate": 3.7333732343787404e-06, "loss": 0.0726, "step": 32560 }, { "epoch": 3.8526141471492785, "grad_norm": 0.4420202970504761, "learning_rate": 3.7326550155614078e-06, "loss": 0.082, "step": 32570 }, { "epoch": 3.853797019162527, "grad_norm": 0.5897517204284668, "learning_rate": 3.7319367967440747e-06, "loss": 0.0791, "step": 32580 }, { "epoch": 3.8549798911757747, "grad_norm": 0.45850855112075806, "learning_rate": 3.7312185779267416e-06, "loss": 0.0745, "step": 32590 }, { "epoch": 3.856162763189023, "grad_norm": 0.4483683407306671, "learning_rate": 3.7305003591094085e-06, "loss": 0.0799, "step": 32600 }, { "epoch": 3.8573456352022713, "grad_norm": 0.5118029713630676, "learning_rate": 3.729782140292076e-06, "loss": 0.0827, "step": 32610 }, { "epoch": 3.858528507215519, "grad_norm": 0.484018474817276, "learning_rate": 3.7290639214747432e-06, "loss": 0.0738, "step": 32620 }, { "epoch": 3.8597113792287674, "grad_norm": 0.4349741041660309, "learning_rate": 3.7283457026574097e-06, "loss": 0.0733, "step": 32630 }, { "epoch": 3.8608942512420157, "grad_norm": 0.5488787293434143, "learning_rate": 3.727627483840077e-06, "loss": 0.0764, "step": 32640 }, { "epoch": 3.862077123255264, "grad_norm": 0.45061758160591125, "learning_rate": 3.7269092650227436e-06, "loss": 0.0736, "step": 32650 }, { "epoch": 3.863259995268512, "grad_norm": 0.5051406025886536, "learning_rate": 3.726191046205411e-06, "loss": 0.0821, "step": 32660 }, { "epoch": 3.86444286728176, "grad_norm": 0.5612226724624634, "learning_rate": 3.7254728273880774e-06, "loss": 0.0784, "step": 32670 }, { "epoch": 3.8656257392950084, "grad_norm": 0.5229612588882446, "learning_rate": 3.7247546085707448e-06, "loss": 0.0726, "step": 32680 }, { "epoch": 3.8668086113082563, "grad_norm": 0.5154691338539124, "learning_rate": 3.7240363897534117e-06, "loss": 0.0723, "step": 32690 }, { "epoch": 3.8679914833215046, "grad_norm": 0.5307480096817017, "learning_rate": 3.723318170936079e-06, "loss": 0.0803, "step": 32700 }, { "epoch": 3.869174355334753, "grad_norm": 0.44806134700775146, "learning_rate": 3.7225999521187455e-06, "loss": 0.0724, "step": 32710 }, { "epoch": 3.870357227348001, "grad_norm": 0.6161800026893616, "learning_rate": 3.721881733301413e-06, "loss": 0.0753, "step": 32720 }, { "epoch": 3.871540099361249, "grad_norm": 0.39671438932418823, "learning_rate": 3.7211635144840794e-06, "loss": 0.0823, "step": 32730 }, { "epoch": 3.8727229713744973, "grad_norm": 0.4870849549770355, "learning_rate": 3.7204452956667467e-06, "loss": 0.0783, "step": 32740 }, { "epoch": 3.8739058433877456, "grad_norm": 0.5906985998153687, "learning_rate": 3.7197270768494132e-06, "loss": 0.0776, "step": 32750 }, { "epoch": 3.8750887154009934, "grad_norm": 0.6637869477272034, "learning_rate": 3.7190088580320806e-06, "loss": 0.0788, "step": 32760 }, { "epoch": 3.8762715874142417, "grad_norm": 0.47809073328971863, "learning_rate": 3.7182906392147475e-06, "loss": 0.0746, "step": 32770 }, { "epoch": 3.87745445942749, "grad_norm": 0.501751184463501, "learning_rate": 3.717572420397415e-06, "loss": 0.0802, "step": 32780 }, { "epoch": 3.8786373314407383, "grad_norm": 0.3795642852783203, "learning_rate": 3.7168542015800813e-06, "loss": 0.0697, "step": 32790 }, { "epoch": 3.879820203453986, "grad_norm": 0.45169368386268616, "learning_rate": 3.7161359827627487e-06, "loss": 0.0782, "step": 32800 }, { "epoch": 3.8810030754672344, "grad_norm": 0.43832895159721375, "learning_rate": 3.715417763945415e-06, "loss": 0.0791, "step": 32810 }, { "epoch": 3.8821859474804827, "grad_norm": 0.4533604383468628, "learning_rate": 3.7146995451280825e-06, "loss": 0.0815, "step": 32820 }, { "epoch": 3.8833688194937306, "grad_norm": 0.46016085147857666, "learning_rate": 3.713981326310749e-06, "loss": 0.0724, "step": 32830 }, { "epoch": 3.884551691506979, "grad_norm": 0.5087478756904602, "learning_rate": 3.7132631074934164e-06, "loss": 0.0763, "step": 32840 }, { "epoch": 3.885734563520227, "grad_norm": 0.5356550812721252, "learning_rate": 3.7125448886760833e-06, "loss": 0.0785, "step": 32850 }, { "epoch": 3.8869174355334755, "grad_norm": 0.43236416578292847, "learning_rate": 3.7118266698587502e-06, "loss": 0.0759, "step": 32860 }, { "epoch": 3.8881003075467233, "grad_norm": 0.33881691098213196, "learning_rate": 3.711108451041417e-06, "loss": 0.0736, "step": 32870 }, { "epoch": 3.8892831795599716, "grad_norm": 0.4736548960208893, "learning_rate": 3.7103902322240845e-06, "loss": 0.0771, "step": 32880 }, { "epoch": 3.89046605157322, "grad_norm": 0.7015770673751831, "learning_rate": 3.709672013406752e-06, "loss": 0.0847, "step": 32890 }, { "epoch": 3.8916489235864677, "grad_norm": 0.44752681255340576, "learning_rate": 3.7089537945894183e-06, "loss": 0.0718, "step": 32900 }, { "epoch": 3.892831795599716, "grad_norm": 0.5802624225616455, "learning_rate": 3.7082355757720857e-06, "loss": 0.0871, "step": 32910 }, { "epoch": 3.8940146676129643, "grad_norm": 0.39542484283447266, "learning_rate": 3.707517356954752e-06, "loss": 0.0762, "step": 32920 }, { "epoch": 3.8951975396262126, "grad_norm": 0.47916045784950256, "learning_rate": 3.7067991381374195e-06, "loss": 0.0795, "step": 32930 }, { "epoch": 3.8963804116394605, "grad_norm": 0.5289046168327332, "learning_rate": 3.706080919320086e-06, "loss": 0.0791, "step": 32940 }, { "epoch": 3.8975632836527088, "grad_norm": 0.4124532639980316, "learning_rate": 3.7053627005027534e-06, "loss": 0.0796, "step": 32950 }, { "epoch": 3.898746155665957, "grad_norm": 0.5684840083122253, "learning_rate": 3.7046444816854203e-06, "loss": 0.0756, "step": 32960 }, { "epoch": 3.899929027679205, "grad_norm": 0.40630853176116943, "learning_rate": 3.7039262628680876e-06, "loss": 0.078, "step": 32970 }, { "epoch": 3.901111899692453, "grad_norm": 0.4497604966163635, "learning_rate": 3.703208044050754e-06, "loss": 0.0749, "step": 32980 }, { "epoch": 3.9022947717057015, "grad_norm": 0.5314636826515198, "learning_rate": 3.7024898252334215e-06, "loss": 0.0826, "step": 32990 }, { "epoch": 3.90347764371895, "grad_norm": 0.4573725461959839, "learning_rate": 3.701771606416088e-06, "loss": 0.0737, "step": 33000 }, { "epoch": 3.9046605157321976, "grad_norm": 0.5544387102127075, "learning_rate": 3.7010533875987553e-06, "loss": 0.0839, "step": 33010 }, { "epoch": 3.905843387745446, "grad_norm": 0.41996437311172485, "learning_rate": 3.700335168781422e-06, "loss": 0.0801, "step": 33020 }, { "epoch": 3.907026259758694, "grad_norm": 0.5038214325904846, "learning_rate": 3.699616949964089e-06, "loss": 0.0725, "step": 33030 }, { "epoch": 3.908209131771942, "grad_norm": 0.5281226634979248, "learning_rate": 3.698898731146756e-06, "loss": 0.0743, "step": 33040 }, { "epoch": 3.9093920037851904, "grad_norm": 0.4948503375053406, "learning_rate": 3.698180512329423e-06, "loss": 0.081, "step": 33050 }, { "epoch": 3.9105748757984387, "grad_norm": 0.3845081329345703, "learning_rate": 3.69746229351209e-06, "loss": 0.0843, "step": 33060 }, { "epoch": 3.911757747811687, "grad_norm": 0.7349977493286133, "learning_rate": 3.6967440746947573e-06, "loss": 0.0805, "step": 33070 }, { "epoch": 3.912940619824935, "grad_norm": 0.35707390308380127, "learning_rate": 3.696025855877424e-06, "loss": 0.0768, "step": 33080 }, { "epoch": 3.914123491838183, "grad_norm": 0.4085548520088196, "learning_rate": 3.695307637060091e-06, "loss": 0.0717, "step": 33090 }, { "epoch": 3.9153063638514314, "grad_norm": 0.4100322127342224, "learning_rate": 3.6945894182427576e-06, "loss": 0.081, "step": 33100 }, { "epoch": 3.9164892358646792, "grad_norm": 0.538526713848114, "learning_rate": 3.693871199425425e-06, "loss": 0.0749, "step": 33110 }, { "epoch": 3.9176721078779275, "grad_norm": 0.404019832611084, "learning_rate": 3.693152980608092e-06, "loss": 0.0797, "step": 33120 }, { "epoch": 3.918854979891176, "grad_norm": 0.6288910508155823, "learning_rate": 3.692434761790759e-06, "loss": 0.0683, "step": 33130 }, { "epoch": 3.920037851904424, "grad_norm": 0.4783177375793457, "learning_rate": 3.691716542973426e-06, "loss": 0.0678, "step": 33140 }, { "epoch": 3.921220723917672, "grad_norm": 0.3779924809932709, "learning_rate": 3.690998324156093e-06, "loss": 0.0751, "step": 33150 }, { "epoch": 3.9224035959309203, "grad_norm": 0.4001327455043793, "learning_rate": 3.6902801053387604e-06, "loss": 0.0724, "step": 33160 }, { "epoch": 3.9235864679441685, "grad_norm": 0.4488086402416229, "learning_rate": 3.689561886521427e-06, "loss": 0.0801, "step": 33170 }, { "epoch": 3.9247693399574164, "grad_norm": 0.5914836525917053, "learning_rate": 3.6888436677040943e-06, "loss": 0.0755, "step": 33180 }, { "epoch": 3.9259522119706647, "grad_norm": 0.5212802886962891, "learning_rate": 3.6881254488867608e-06, "loss": 0.0789, "step": 33190 }, { "epoch": 3.927135083983913, "grad_norm": 0.4766732454299927, "learning_rate": 3.687407230069428e-06, "loss": 0.0784, "step": 33200 }, { "epoch": 3.9283179559971613, "grad_norm": 0.5257973074913025, "learning_rate": 3.6866890112520946e-06, "loss": 0.073, "step": 33210 }, { "epoch": 3.929500828010409, "grad_norm": 0.46750327944755554, "learning_rate": 3.685970792434762e-06, "loss": 0.0754, "step": 33220 }, { "epoch": 3.9306837000236574, "grad_norm": 0.46958282589912415, "learning_rate": 3.685252573617429e-06, "loss": 0.0705, "step": 33230 }, { "epoch": 3.9318665720369057, "grad_norm": 0.47172099351882935, "learning_rate": 3.6845343548000962e-06, "loss": 0.077, "step": 33240 }, { "epoch": 3.9330494440501536, "grad_norm": 0.6132487654685974, "learning_rate": 3.6838161359827627e-06, "loss": 0.0799, "step": 33250 }, { "epoch": 3.934232316063402, "grad_norm": 0.5194699764251709, "learning_rate": 3.68309791716543e-06, "loss": 0.0765, "step": 33260 }, { "epoch": 3.93541518807665, "grad_norm": 0.4479357898235321, "learning_rate": 3.6823796983480966e-06, "loss": 0.0768, "step": 33270 }, { "epoch": 3.9365980600898984, "grad_norm": 0.4702802896499634, "learning_rate": 3.681661479530764e-06, "loss": 0.0757, "step": 33280 }, { "epoch": 3.9377809321031463, "grad_norm": 0.45353466272354126, "learning_rate": 3.6809432607134304e-06, "loss": 0.0766, "step": 33290 }, { "epoch": 3.9389638041163946, "grad_norm": 0.5861786007881165, "learning_rate": 3.6802250418960978e-06, "loss": 0.0754, "step": 33300 }, { "epoch": 3.940146676129643, "grad_norm": 0.4680328071117401, "learning_rate": 3.6795068230787647e-06, "loss": 0.0818, "step": 33310 }, { "epoch": 3.9413295481428907, "grad_norm": 0.46566465497016907, "learning_rate": 3.6787886042614316e-06, "loss": 0.0841, "step": 33320 }, { "epoch": 3.942512420156139, "grad_norm": 0.3618694841861725, "learning_rate": 3.6780703854440986e-06, "loss": 0.0734, "step": 33330 }, { "epoch": 3.9436952921693873, "grad_norm": 0.4542178511619568, "learning_rate": 3.677352166626766e-06, "loss": 0.0845, "step": 33340 }, { "epoch": 3.9448781641826356, "grad_norm": 0.504176914691925, "learning_rate": 3.6766339478094324e-06, "loss": 0.0849, "step": 33350 }, { "epoch": 3.9460610361958834, "grad_norm": 0.3794676661491394, "learning_rate": 3.6759157289920997e-06, "loss": 0.0805, "step": 33360 }, { "epoch": 3.9472439082091317, "grad_norm": 0.3639920651912689, "learning_rate": 3.6751975101747662e-06, "loss": 0.0842, "step": 33370 }, { "epoch": 3.94842678022238, "grad_norm": 0.4472178816795349, "learning_rate": 3.6744792913574336e-06, "loss": 0.0783, "step": 33380 }, { "epoch": 3.949609652235628, "grad_norm": 0.4028298258781433, "learning_rate": 3.673761072540101e-06, "loss": 0.0797, "step": 33390 }, { "epoch": 3.950792524248876, "grad_norm": 0.4191296398639679, "learning_rate": 3.6730428537227674e-06, "loss": 0.0765, "step": 33400 }, { "epoch": 3.9519753962621245, "grad_norm": 0.6009833812713623, "learning_rate": 3.6723246349054348e-06, "loss": 0.0816, "step": 33410 }, { "epoch": 3.9531582682753728, "grad_norm": 0.497717022895813, "learning_rate": 3.6716064160881017e-06, "loss": 0.0742, "step": 33420 }, { "epoch": 3.954341140288621, "grad_norm": 0.5534936189651489, "learning_rate": 3.670888197270769e-06, "loss": 0.0792, "step": 33430 }, { "epoch": 3.955524012301869, "grad_norm": 0.5036735534667969, "learning_rate": 3.6701699784534356e-06, "loss": 0.0768, "step": 33440 }, { "epoch": 3.956706884315117, "grad_norm": 0.4373331069946289, "learning_rate": 3.669451759636103e-06, "loss": 0.0774, "step": 33450 }, { "epoch": 3.957889756328365, "grad_norm": 0.5554269552230835, "learning_rate": 3.6687335408187694e-06, "loss": 0.0771, "step": 33460 }, { "epoch": 3.9590726283416133, "grad_norm": 0.3956579267978668, "learning_rate": 3.6680153220014367e-06, "loss": 0.0736, "step": 33470 }, { "epoch": 3.9602555003548616, "grad_norm": 0.4574085474014282, "learning_rate": 3.6672971031841032e-06, "loss": 0.0723, "step": 33480 }, { "epoch": 3.96143837236811, "grad_norm": 0.5089538097381592, "learning_rate": 3.6665788843667706e-06, "loss": 0.0814, "step": 33490 }, { "epoch": 3.962621244381358, "grad_norm": 0.42138671875, "learning_rate": 3.6658606655494375e-06, "loss": 0.0776, "step": 33500 }, { "epoch": 3.963804116394606, "grad_norm": 0.44745591282844543, "learning_rate": 3.6651424467321044e-06, "loss": 0.0759, "step": 33510 }, { "epoch": 3.9649869884078544, "grad_norm": 0.49112409353256226, "learning_rate": 3.6644242279147714e-06, "loss": 0.0765, "step": 33520 }, { "epoch": 3.966169860421102, "grad_norm": 0.4068516790866852, "learning_rate": 3.6637060090974387e-06, "loss": 0.0763, "step": 33530 }, { "epoch": 3.9673527324343505, "grad_norm": 0.36749348044395447, "learning_rate": 3.662987790280105e-06, "loss": 0.0717, "step": 33540 }, { "epoch": 3.968535604447599, "grad_norm": 0.35042575001716614, "learning_rate": 3.6622695714627725e-06, "loss": 0.0761, "step": 33550 }, { "epoch": 3.969718476460847, "grad_norm": 0.4755893647670746, "learning_rate": 3.661551352645439e-06, "loss": 0.0844, "step": 33560 }, { "epoch": 3.9709013484740954, "grad_norm": 0.4519107937812805, "learning_rate": 3.6608331338281064e-06, "loss": 0.0751, "step": 33570 }, { "epoch": 3.972084220487343, "grad_norm": 0.40270093083381653, "learning_rate": 3.6601149150107733e-06, "loss": 0.0777, "step": 33580 }, { "epoch": 3.9732670925005915, "grad_norm": 0.48224782943725586, "learning_rate": 3.6593966961934402e-06, "loss": 0.0743, "step": 33590 }, { "epoch": 3.9744499645138394, "grad_norm": 0.577165424823761, "learning_rate": 3.658678477376107e-06, "loss": 0.078, "step": 33600 }, { "epoch": 3.9756328365270877, "grad_norm": 0.46609270572662354, "learning_rate": 3.6579602585587745e-06, "loss": 0.0824, "step": 33610 }, { "epoch": 3.976815708540336, "grad_norm": 0.5312780141830444, "learning_rate": 3.657242039741441e-06, "loss": 0.0695, "step": 33620 }, { "epoch": 3.9779985805535842, "grad_norm": 0.38292261958122253, "learning_rate": 3.6565238209241084e-06, "loss": 0.0778, "step": 33630 }, { "epoch": 3.9791814525668325, "grad_norm": 0.686711847782135, "learning_rate": 3.6558056021067757e-06, "loss": 0.0743, "step": 33640 }, { "epoch": 3.9803643245800804, "grad_norm": 0.4912627041339874, "learning_rate": 3.655087383289442e-06, "loss": 0.0739, "step": 33650 }, { "epoch": 3.9815471965933287, "grad_norm": 0.5413533449172974, "learning_rate": 3.6543691644721095e-06, "loss": 0.0777, "step": 33660 }, { "epoch": 3.9827300686065765, "grad_norm": 0.41975072026252747, "learning_rate": 3.653650945654776e-06, "loss": 0.0765, "step": 33670 }, { "epoch": 3.983912940619825, "grad_norm": 0.5174323320388794, "learning_rate": 3.6529327268374434e-06, "loss": 0.0791, "step": 33680 }, { "epoch": 3.985095812633073, "grad_norm": 0.5094000101089478, "learning_rate": 3.6522145080201103e-06, "loss": 0.0711, "step": 33690 }, { "epoch": 3.9862786846463214, "grad_norm": 0.5135118961334229, "learning_rate": 3.6514962892027772e-06, "loss": 0.0697, "step": 33700 }, { "epoch": 3.9874615566595697, "grad_norm": 0.4072645306587219, "learning_rate": 3.650778070385444e-06, "loss": 0.0671, "step": 33710 }, { "epoch": 3.9886444286728175, "grad_norm": 0.4359702467918396, "learning_rate": 3.6500598515681115e-06, "loss": 0.0687, "step": 33720 }, { "epoch": 3.989827300686066, "grad_norm": 0.5420372486114502, "learning_rate": 3.649341632750778e-06, "loss": 0.0799, "step": 33730 }, { "epoch": 3.9910101726993137, "grad_norm": 0.4817779064178467, "learning_rate": 3.6486234139334454e-06, "loss": 0.0835, "step": 33740 }, { "epoch": 3.992193044712562, "grad_norm": 0.641960084438324, "learning_rate": 3.647905195116112e-06, "loss": 0.0788, "step": 33750 }, { "epoch": 3.9933759167258103, "grad_norm": 0.5214112997055054, "learning_rate": 3.647186976298779e-06, "loss": 0.0791, "step": 33760 }, { "epoch": 3.9945587887390586, "grad_norm": 0.4271097481250763, "learning_rate": 3.646468757481446e-06, "loss": 0.0845, "step": 33770 }, { "epoch": 3.995741660752307, "grad_norm": 0.3869156837463379, "learning_rate": 3.645750538664113e-06, "loss": 0.0774, "step": 33780 }, { "epoch": 3.9969245327655547, "grad_norm": 0.5025977492332458, "learning_rate": 3.64503231984678e-06, "loss": 0.076, "step": 33790 }, { "epoch": 3.998107404778803, "grad_norm": 0.40017369389533997, "learning_rate": 3.6443141010294473e-06, "loss": 0.0817, "step": 33800 }, { "epoch": 3.999290276792051, "grad_norm": 0.5121616721153259, "learning_rate": 3.643595882212114e-06, "loss": 0.0767, "step": 33810 }, { "epoch": 4.000473148805299, "grad_norm": 0.5204899907112122, "learning_rate": 3.642877663394781e-06, "loss": 0.077, "step": 33820 }, { "epoch": 4.000946297610598, "eval_accuracy": 0.6855308247662774, "eval_animal_abuse/accuracy": 0.9947266859633364, "eval_animal_abuse/f1": 0.7694545454545455, "eval_animal_abuse/fpr": 0.0026587688890384637, "eval_animal_abuse/precision": 0.7700145560407569, "eval_animal_abuse/recall": 0.7688953488372093, "eval_animal_abuse/threshold": 0.5, "eval_child_abuse/accuracy": 0.9962571114881724, "eval_child_abuse/f1": 0.6511627906976745, "eval_child_abuse/fpr": 0.0017062277312189463, "eval_child_abuse/precision": 0.6730769230769231, "eval_child_abuse/recall": 0.6306306306306306, "eval_child_abuse/threshold": 0.5, "eval_controversial_topics,politics/accuracy": 0.9691752337225937, "eval_controversial_topics,politics/f1": 0.49357748018584313, "eval_controversial_topics,politics/fpr": 0.015685063152114195, "eval_controversial_topics,politics/precision": 0.49697303247110625, "eval_controversial_topics,politics/recall": 0.49022801302931596, "eval_controversial_topics,politics/threshold": 0.5, "eval_discrimination,stereotype,injustice/accuracy": 0.955917090860698, "eval_discrimination,stereotype,injustice/f1": 0.7176044330775788, "eval_discrimination,stereotype,injustice/fpr": 0.022355152347562038, "eval_discrimination,stereotype,injustice/precision": 0.7313205907906168, "eval_discrimination,stereotype,injustice/recall": 0.7043933054393305, "eval_discrimination,stereotype,injustice/threshold": 0.5, "eval_drug_abuse,weapons,banned_substance/accuracy": 0.9742489270386266, "eval_drug_abuse,weapons,banned_substance/f1": 0.7713441654357459, "eval_drug_abuse,weapons,banned_substance/fpr": 0.013626427866309382, "eval_drug_abuse,weapons,banned_substance/precision": 0.7715721040189125, "eval_drug_abuse,weapons,banned_substance/recall": 0.771116361488482, "eval_drug_abuse,weapons,banned_substance/threshold": 0.5, "eval_financial_crime,property_crime,theft/accuracy": 0.9603087467145757, "eval_financial_crime,property_crime,theft/f1": 0.8043620859298131, "eval_financial_crime,property_crime,theft/fpr": 0.026537419604518685, "eval_financial_crime,property_crime,theft/precision": 0.7730496453900709, "eval_financial_crime,property_crime,theft/recall": 0.8383182361989403, "eval_financial_crime,property_crime,theft/threshold": 0.5, "eval_flagged/accuracy": 0.8531124197358353, "eval_flagged/aucpr": 0.9072866171676852, "eval_flagged/f1": 0.8658584753744721, "eval_flagged/fpr": 0.1454074935303599, "eval_flagged/precision": 0.8802471042471043, "eval_flagged/recall": 0.8519326776479029, "eval_hate_speech,offensive_language/accuracy": 0.9510097481451908, "eval_hate_speech,offensive_language/f1": 0.693069306930693, "eval_hate_speech,offensive_language/fpr": 0.016188562031792406, "eval_hate_speech,offensive_language/precision": 0.7895986701496082, "eval_hate_speech,offensive_language/recall": 0.6175705794947994, "eval_hate_speech,offensive_language/threshold": 0.5, "eval_loss": 0.08177196234464645, "eval_macro_f1": 0.6424661592339712, "eval_macro_precision": 0.6874592979141945, "eval_macro_recall": 0.6309857780631145, "eval_micro_f1": 0.7527637266623747, "eval_micro_precision": 0.7796087097327262, "eval_micro_recall": 0.7277059587104575, "eval_misinformation_regarding_ethics,laws_and_safety/accuracy": 0.9874072595402069, "eval_misinformation_regarding_ethics,laws_and_safety/f1": 0.11668611435239207, "eval_misinformation_regarding_ethics,laws_and_safety/fpr": 0.0012798275600761138, "eval_misinformation_regarding_ethics,laws_and_safety/precision": 0.3968253968253968, "eval_misinformation_regarding_ethics,laws_and_safety/recall": 0.06839945280437756, "eval_misinformation_regarding_ethics,laws_and_safety/threshold": 0.5, "eval_non_violent_unethical_behavior/accuracy": 0.8883454769271717, "eval_non_violent_unethical_behavior/f1": 0.6865601942654338, "eval_non_violent_unethical_behavior/fpr": 0.04395034047500406, "eval_non_violent_unethical_behavior/precision": 0.7764047317279257, "eval_non_violent_unethical_behavior/recall": 0.6153524192198225, "eval_non_violent_unethical_behavior/threshold": 0.5, "eval_privacy_violation/accuracy": 0.9809362211797584, "eval_privacy_violation/f1": 0.8132942326490714, "eval_privacy_violation/fpr": 0.011828935395814356, "eval_privacy_violation/precision": 0.7868852459016393, "eval_privacy_violation/recall": 0.8415374241402562, "eval_privacy_violation/threshold": 0.5, "eval_runtime": 598.491, "eval_samples_per_second": 100.443, "eval_self_harm/accuracy": 0.9953754533053865, "eval_self_harm/f1": 0.6951754385964912, "eval_self_harm/fpr": 0.00309861985796596, "eval_self_harm/precision": 0.6314741035856574, "eval_self_harm/recall": 0.7731707317073171, "eval_self_harm/threshold": 0.5, "eval_sexually_explicit,adult_content/accuracy": 0.9839305319892205, "eval_sexually_explicit,adult_content/f1": 0.671875, "eval_sexually_explicit,adult_content/fpr": 0.008659041709990269, "eval_sexually_explicit,adult_content/precision": 0.6606546426185704, "eval_sexually_explicit,adult_content/recall": 0.6834830684174154, "eval_sexually_explicit,adult_content/threshold": 0.5, "eval_steps_per_second": 1.571, "eval_terrorism,organized_crime/accuracy": 0.9920817114149783, "eval_terrorism,organized_crime/f1": 0.25391849529780564, "eval_terrorism,organized_crime/fpr": 0.0012744621266748254, "eval_terrorism,organized_crime/precision": 0.5159235668789809, "eval_terrorism,organized_crime/recall": 0.1683991683991684, "eval_terrorism,organized_crime/threshold": 0.5, "eval_violence,aiding_and_abetting,incitement/accuracy": 0.9230961173769837, "eval_violence,aiding_and_abetting,incitement/f1": 0.856441946402509, "eval_violence,aiding_and_abetting,incitement/fpr": 0.054870586102171125, "eval_violence,aiding_and_abetting,incitement/precision": 0.8506569613225587, "eval_violence,aiding_and_abetting,incitement/recall": 0.8623061530765382, "eval_violence,aiding_and_abetting,incitement/threshold": 0.5, "step": 33824 }, { "epoch": 4.000946297610598, "step": 33824, "total_flos": 0.0, "train_loss": 0.09237335174007019, "train_runtime": 91030.0055, "train_samples_per_second": 59.433, "train_steps_per_second": 0.929 } ], "logging_steps": 10, "max_steps": 84540, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 2114, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 4, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 4 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }