{ "best_metric": 0.8435394814430963, "best_model_checkpoint": "/media/hongss/ssd/hatespeech_modelbackup/new_weights/1216/KcElectra_123/stage2/20241218T16-09-52/checkpoint-40000", "epoch": 46.205544665359845, "eval_steps": 5000, "global_step": 495000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.009334453467749464, "grad_norm": 0.6633870005607605, "learning_rate": 5.000000000000001e-07, "loss": 0.6861, "step": 100 }, { "epoch": 0.018668906935498927, "grad_norm": 0.6493275761604309, "learning_rate": 1.0000000000000002e-06, "loss": 0.6608, "step": 200 }, { "epoch": 0.02800336040324839, "grad_norm": 0.7191958427429199, "learning_rate": 1.5e-06, "loss": 0.6035, "step": 300 }, { "epoch": 0.037337813870997855, "grad_norm": 0.663436233997345, "learning_rate": 2.0000000000000003e-06, "loss": 0.5126, "step": 400 }, { "epoch": 0.04667226733874732, "grad_norm": 0.6369535326957703, "learning_rate": 2.5e-06, "loss": 0.4386, "step": 500 }, { "epoch": 0.05600672080649678, "grad_norm": 0.5260729789733887, "learning_rate": 3e-06, "loss": 0.3861, "step": 600 }, { "epoch": 0.06534117427424624, "grad_norm": 0.5856143832206726, "learning_rate": 3.5e-06, "loss": 0.3458, "step": 700 }, { "epoch": 0.07467562774199571, "grad_norm": 0.7811357975006104, "learning_rate": 4.000000000000001e-06, "loss": 0.3055, "step": 800 }, { "epoch": 0.08401008120974517, "grad_norm": 1.2109060287475586, "learning_rate": 4.5e-06, "loss": 0.271, "step": 900 }, { "epoch": 0.09334453467749464, "grad_norm": 0.49256953597068787, "learning_rate": 5e-06, "loss": 0.2464, "step": 1000 }, { "epoch": 0.1026789881452441, "grad_norm": 0.41324612498283386, "learning_rate": 4.9990648087533904e-06, "loss": 0.2249, "step": 1100 }, { "epoch": 0.11201344161299356, "grad_norm": 0.4733312427997589, "learning_rate": 4.9981296175067805e-06, "loss": 0.2124, "step": 1200 }, { "epoch": 0.12134789508074302, "grad_norm": 0.9590297937393188, "learning_rate": 4.9971944262601705e-06, "loss": 0.1993, "step": 1300 }, { "epoch": 0.13068234854849248, "grad_norm": 0.787800669670105, "learning_rate": 4.9962592350135605e-06, "loss": 0.1864, "step": 1400 }, { "epoch": 0.14001680201624195, "grad_norm": 0.4495645761489868, "learning_rate": 4.9953240437669506e-06, "loss": 0.1828, "step": 1500 }, { "epoch": 0.14935125548399142, "grad_norm": 0.4731336832046509, "learning_rate": 4.994388852520341e-06, "loss": 0.1847, "step": 1600 }, { "epoch": 0.1586857089517409, "grad_norm": 0.4778302311897278, "learning_rate": 4.9934536612737315e-06, "loss": 0.169, "step": 1700 }, { "epoch": 0.16802016241949033, "grad_norm": 0.3960910439491272, "learning_rate": 4.9925184700271215e-06, "loss": 0.1621, "step": 1800 }, { "epoch": 0.1773546158872398, "grad_norm": 0.45346570014953613, "learning_rate": 4.9915832787805115e-06, "loss": 0.1586, "step": 1900 }, { "epoch": 0.18668906935498927, "grad_norm": 1.06520676612854, "learning_rate": 4.990648087533901e-06, "loss": 0.1556, "step": 2000 }, { "epoch": 0.19602352282273872, "grad_norm": 0.258771151304245, "learning_rate": 4.989712896287291e-06, "loss": 0.1522, "step": 2100 }, { "epoch": 0.2053579762904882, "grad_norm": 1.332026481628418, "learning_rate": 4.988777705040681e-06, "loss": 0.15, "step": 2200 }, { "epoch": 0.21469242975823766, "grad_norm": 1.318795919418335, "learning_rate": 4.987842513794071e-06, "loss": 0.1485, "step": 2300 }, { "epoch": 0.22402688322598713, "grad_norm": 0.4808869957923889, "learning_rate": 4.986907322547461e-06, "loss": 0.1423, "step": 2400 }, { "epoch": 0.23336133669373657, "grad_norm": 2.1478779315948486, "learning_rate": 4.985972131300852e-06, "loss": 0.1254, "step": 2500 }, { "epoch": 0.24269579016148604, "grad_norm": 0.46549296379089355, "learning_rate": 4.985036940054242e-06, "loss": 0.1313, "step": 2600 }, { "epoch": 0.2520302436292355, "grad_norm": 0.7649227976799011, "learning_rate": 4.984101748807632e-06, "loss": 0.134, "step": 2700 }, { "epoch": 0.26136469709698495, "grad_norm": 0.34054622054100037, "learning_rate": 4.983166557561022e-06, "loss": 0.1319, "step": 2800 }, { "epoch": 0.2706991505647344, "grad_norm": 2.4163575172424316, "learning_rate": 4.982231366314412e-06, "loss": 0.1248, "step": 2900 }, { "epoch": 0.2800336040324839, "grad_norm": 0.6720597147941589, "learning_rate": 4.981296175067802e-06, "loss": 0.1299, "step": 3000 }, { "epoch": 0.28936805750023337, "grad_norm": 0.7193818688392639, "learning_rate": 4.980360983821192e-06, "loss": 0.1253, "step": 3100 }, { "epoch": 0.29870251096798284, "grad_norm": 0.7698010206222534, "learning_rate": 4.979425792574582e-06, "loss": 0.1199, "step": 3200 }, { "epoch": 0.3080369644357323, "grad_norm": 1.5464136600494385, "learning_rate": 4.978490601327972e-06, "loss": 0.1219, "step": 3300 }, { "epoch": 0.3173714179034818, "grad_norm": 0.26300716400146484, "learning_rate": 4.977555410081362e-06, "loss": 0.1206, "step": 3400 }, { "epoch": 0.3267058713712312, "grad_norm": 3.255107879638672, "learning_rate": 4.976620218834752e-06, "loss": 0.1168, "step": 3500 }, { "epoch": 0.33604032483898066, "grad_norm": 0.6628056764602661, "learning_rate": 4.975685027588142e-06, "loss": 0.1161, "step": 3600 }, { "epoch": 0.34537477830673013, "grad_norm": 0.4120551645755768, "learning_rate": 4.974749836341533e-06, "loss": 0.1094, "step": 3700 }, { "epoch": 0.3547092317744796, "grad_norm": 0.9527609944343567, "learning_rate": 4.973814645094922e-06, "loss": 0.1113, "step": 3800 }, { "epoch": 0.3640436852422291, "grad_norm": 0.36488914489746094, "learning_rate": 4.972879453848312e-06, "loss": 0.1083, "step": 3900 }, { "epoch": 0.37337813870997855, "grad_norm": 0.6202889084815979, "learning_rate": 4.971944262601702e-06, "loss": 0.1087, "step": 4000 }, { "epoch": 0.382712592177728, "grad_norm": 0.48227280378341675, "learning_rate": 4.971009071355092e-06, "loss": 0.105, "step": 4100 }, { "epoch": 0.39204704564547743, "grad_norm": 0.8419789671897888, "learning_rate": 4.970073880108482e-06, "loss": 0.1149, "step": 4200 }, { "epoch": 0.4013814991132269, "grad_norm": 2.6310179233551025, "learning_rate": 4.969138688861872e-06, "loss": 0.1045, "step": 4300 }, { "epoch": 0.4107159525809764, "grad_norm": 1.6030837297439575, "learning_rate": 4.968203497615262e-06, "loss": 0.1104, "step": 4400 }, { "epoch": 0.42005040604872584, "grad_norm": 0.9124351143836975, "learning_rate": 4.967268306368653e-06, "loss": 0.1093, "step": 4500 }, { "epoch": 0.4293848595164753, "grad_norm": 0.6123986840248108, "learning_rate": 4.966333115122043e-06, "loss": 0.104, "step": 4600 }, { "epoch": 0.4387193129842248, "grad_norm": 0.5702049732208252, "learning_rate": 4.965397923875433e-06, "loss": 0.1053, "step": 4700 }, { "epoch": 0.44805376645197426, "grad_norm": 0.47836875915527344, "learning_rate": 4.964462732628823e-06, "loss": 0.1163, "step": 4800 }, { "epoch": 0.4573882199197237, "grad_norm": 0.9605004787445068, "learning_rate": 4.963527541382213e-06, "loss": 0.103, "step": 4900 }, { "epoch": 0.46672267338747314, "grad_norm": 1.3456240892410278, "learning_rate": 4.962592350135603e-06, "loss": 0.106, "step": 5000 }, { "epoch": 0.46672267338747314, "eval_accuracy": 0.6848541423570595, "eval_f1": 0.8121174640996026, "eval_loss": 0.10020891577005386, "eval_roc_auc": 0.8894598368854443, "eval_runtime": 243.7108, "eval_samples_per_second": 175.823, "eval_steps_per_second": 175.823, "step": 5000 }, { "epoch": 0.4760571268552226, "grad_norm": 1.0622531175613403, "learning_rate": 4.961657158888993e-06, "loss": 0.1032, "step": 5100 }, { "epoch": 0.4853915803229721, "grad_norm": 0.8295788764953613, "learning_rate": 4.960721967642383e-06, "loss": 0.1085, "step": 5200 }, { "epoch": 0.49472603379072155, "grad_norm": 0.662139356136322, "learning_rate": 4.959786776395773e-06, "loss": 0.1069, "step": 5300 }, { "epoch": 0.504060487258471, "grad_norm": 1.412575602531433, "learning_rate": 4.9588515851491635e-06, "loss": 0.1039, "step": 5400 }, { "epoch": 0.5133949407262205, "grad_norm": 0.926964521408081, "learning_rate": 4.9579163939025535e-06, "loss": 0.1036, "step": 5500 }, { "epoch": 0.5227293941939699, "grad_norm": 0.9951503872871399, "learning_rate": 4.9569812026559435e-06, "loss": 0.1044, "step": 5600 }, { "epoch": 0.5320638476617194, "grad_norm": 2.082270622253418, "learning_rate": 4.9560460114093335e-06, "loss": 0.1009, "step": 5700 }, { "epoch": 0.5413983011294689, "grad_norm": 1.2664024829864502, "learning_rate": 4.9551108201627236e-06, "loss": 0.1025, "step": 5800 }, { "epoch": 0.5507327545972184, "grad_norm": 1.7706208229064941, "learning_rate": 4.954175628916114e-06, "loss": 0.096, "step": 5900 }, { "epoch": 0.5600672080649678, "grad_norm": 0.6352158188819885, "learning_rate": 4.953240437669504e-06, "loss": 0.0972, "step": 6000 }, { "epoch": 0.5694016615327172, "grad_norm": 0.5798928141593933, "learning_rate": 4.952305246422894e-06, "loss": 0.0922, "step": 6100 }, { "epoch": 0.5787361150004667, "grad_norm": 0.6962963938713074, "learning_rate": 4.951370055176284e-06, "loss": 0.0946, "step": 6200 }, { "epoch": 0.5880705684682161, "grad_norm": 1.0120832920074463, "learning_rate": 4.950434863929674e-06, "loss": 0.0975, "step": 6300 }, { "epoch": 0.5974050219359657, "grad_norm": 2.5864970684051514, "learning_rate": 4.949499672683064e-06, "loss": 0.0972, "step": 6400 }, { "epoch": 0.6067394754037151, "grad_norm": 1.0199625492095947, "learning_rate": 4.948564481436454e-06, "loss": 0.0983, "step": 6500 }, { "epoch": 0.6160739288714646, "grad_norm": 0.6539067625999451, "learning_rate": 4.947629290189845e-06, "loss": 0.0923, "step": 6600 }, { "epoch": 0.625408382339214, "grad_norm": 1.715169906616211, "learning_rate": 4.946694098943235e-06, "loss": 0.0992, "step": 6700 }, { "epoch": 0.6347428358069636, "grad_norm": 0.49740713834762573, "learning_rate": 4.945758907696625e-06, "loss": 0.1003, "step": 6800 }, { "epoch": 0.644077289274713, "grad_norm": 0.9319292306900024, "learning_rate": 4.944823716450015e-06, "loss": 0.0924, "step": 6900 }, { "epoch": 0.6534117427424624, "grad_norm": 0.523908793926239, "learning_rate": 4.943888525203405e-06, "loss": 0.0923, "step": 7000 }, { "epoch": 0.6627461962102119, "grad_norm": 0.904934823513031, "learning_rate": 4.942953333956795e-06, "loss": 0.0933, "step": 7100 }, { "epoch": 0.6720806496779613, "grad_norm": 0.7272130250930786, "learning_rate": 4.942018142710185e-06, "loss": 0.0931, "step": 7200 }, { "epoch": 0.6814151031457109, "grad_norm": 0.9489511847496033, "learning_rate": 4.941082951463575e-06, "loss": 0.0955, "step": 7300 }, { "epoch": 0.6907495566134603, "grad_norm": 1.0959975719451904, "learning_rate": 4.940147760216965e-06, "loss": 0.0974, "step": 7400 }, { "epoch": 0.7000840100812098, "grad_norm": 1.0124865770339966, "learning_rate": 4.939212568970355e-06, "loss": 0.1035, "step": 7500 }, { "epoch": 0.7094184635489592, "grad_norm": 1.2198047637939453, "learning_rate": 4.938277377723745e-06, "loss": 0.096, "step": 7600 }, { "epoch": 0.7187529170167086, "grad_norm": 0.5766215920448303, "learning_rate": 4.937342186477135e-06, "loss": 0.0944, "step": 7700 }, { "epoch": 0.7280873704844582, "grad_norm": 0.6060758233070374, "learning_rate": 4.936406995230525e-06, "loss": 0.0957, "step": 7800 }, { "epoch": 0.7374218239522076, "grad_norm": 1.0637242794036865, "learning_rate": 4.935471803983915e-06, "loss": 0.0976, "step": 7900 }, { "epoch": 0.7467562774199571, "grad_norm": 0.8493542671203613, "learning_rate": 4.934536612737305e-06, "loss": 0.0932, "step": 8000 }, { "epoch": 0.7560907308877065, "grad_norm": 0.7301217317581177, "learning_rate": 4.933601421490695e-06, "loss": 0.1016, "step": 8100 }, { "epoch": 0.765425184355456, "grad_norm": 0.8102872967720032, "learning_rate": 4.932666230244085e-06, "loss": 0.0947, "step": 8200 }, { "epoch": 0.7747596378232054, "grad_norm": 0.912520170211792, "learning_rate": 4.931731038997475e-06, "loss": 0.0875, "step": 8300 }, { "epoch": 0.7840940912909549, "grad_norm": 0.4936845898628235, "learning_rate": 4.930795847750865e-06, "loss": 0.0947, "step": 8400 }, { "epoch": 0.7934285447587044, "grad_norm": 0.8171100616455078, "learning_rate": 4.929860656504255e-06, "loss": 0.0943, "step": 8500 }, { "epoch": 0.8027629982264538, "grad_norm": 0.4529333710670471, "learning_rate": 4.928925465257646e-06, "loss": 0.092, "step": 8600 }, { "epoch": 0.8120974516942033, "grad_norm": 1.6065642833709717, "learning_rate": 4.927990274011036e-06, "loss": 0.0948, "step": 8700 }, { "epoch": 0.8214319051619527, "grad_norm": 0.9707145094871521, "learning_rate": 4.927055082764426e-06, "loss": 0.0882, "step": 8800 }, { "epoch": 0.8307663586297023, "grad_norm": 0.4777999818325043, "learning_rate": 4.926119891517816e-06, "loss": 0.0955, "step": 8900 }, { "epoch": 0.8401008120974517, "grad_norm": 0.7687111496925354, "learning_rate": 4.925184700271206e-06, "loss": 0.098, "step": 9000 }, { "epoch": 0.8494352655652011, "grad_norm": 1.0065224170684814, "learning_rate": 4.924249509024596e-06, "loss": 0.0946, "step": 9100 }, { "epoch": 0.8587697190329506, "grad_norm": 1.3893612623214722, "learning_rate": 4.9233143177779855e-06, "loss": 0.0954, "step": 9200 }, { "epoch": 0.8681041725007, "grad_norm": 0.8990705013275146, "learning_rate": 4.9223791265313755e-06, "loss": 0.0988, "step": 9300 }, { "epoch": 0.8774386259684496, "grad_norm": 1.3585472106933594, "learning_rate": 4.9214439352847655e-06, "loss": 0.0953, "step": 9400 }, { "epoch": 0.886773079436199, "grad_norm": 0.6954057812690735, "learning_rate": 4.920508744038156e-06, "loss": 0.0891, "step": 9500 }, { "epoch": 0.8961075329039485, "grad_norm": 0.8189443349838257, "learning_rate": 4.9195735527915464e-06, "loss": 0.0884, "step": 9600 }, { "epoch": 0.9054419863716979, "grad_norm": 1.9409208297729492, "learning_rate": 4.9186383615449365e-06, "loss": 0.0897, "step": 9700 }, { "epoch": 0.9147764398394475, "grad_norm": 0.7617313265800476, "learning_rate": 4.9177031702983265e-06, "loss": 0.0929, "step": 9800 }, { "epoch": 0.9241108933071969, "grad_norm": 0.6284356713294983, "learning_rate": 4.9167679790517165e-06, "loss": 0.0963, "step": 9900 }, { "epoch": 0.9334453467749463, "grad_norm": 1.102933406829834, "learning_rate": 4.9158327878051066e-06, "loss": 0.0952, "step": 10000 }, { "epoch": 0.9334453467749463, "eval_accuracy": 0.7080280046674445, "eval_f1": 0.8337786713934231, "eval_loss": 0.08920912444591522, "eval_roc_auc": 0.9120750310019967, "eval_runtime": 237.1888, "eval_samples_per_second": 180.658, "eval_steps_per_second": 180.658, "step": 10000 }, { "epoch": 0.9427798002426958, "grad_norm": 1.3865083456039429, "learning_rate": 4.914897596558497e-06, "loss": 0.0908, "step": 10100 }, { "epoch": 0.9521142537104452, "grad_norm": 0.7737976312637329, "learning_rate": 4.913962405311887e-06, "loss": 0.091, "step": 10200 }, { "epoch": 0.9614487071781948, "grad_norm": 0.6218439340591431, "learning_rate": 4.913027214065277e-06, "loss": 0.0812, "step": 10300 }, { "epoch": 0.9707831606459442, "grad_norm": 3.8853960037231445, "learning_rate": 4.912092022818667e-06, "loss": 0.0947, "step": 10400 }, { "epoch": 0.9801176141136937, "grad_norm": 0.5121055245399475, "learning_rate": 4.911156831572057e-06, "loss": 0.0899, "step": 10500 }, { "epoch": 0.9894520675814431, "grad_norm": 1.7373566627502441, "learning_rate": 4.910221640325447e-06, "loss": 0.0927, "step": 10600 }, { "epoch": 0.9987865210491925, "grad_norm": 0.773533046245575, "learning_rate": 4.909286449078838e-06, "loss": 0.0959, "step": 10700 }, { "epoch": 1.008120974516942, "grad_norm": 0.9396854043006897, "learning_rate": 4.908351257832228e-06, "loss": 0.0788, "step": 10800 }, { "epoch": 1.0174554279846915, "grad_norm": 0.5905917882919312, "learning_rate": 4.907416066585618e-06, "loss": 0.0915, "step": 10900 }, { "epoch": 1.026789881452441, "grad_norm": 2.216852903366089, "learning_rate": 4.906480875339007e-06, "loss": 0.0856, "step": 11000 }, { "epoch": 1.0361243349201905, "grad_norm": 0.6978827714920044, "learning_rate": 4.905545684092397e-06, "loss": 0.078, "step": 11100 }, { "epoch": 1.0454587883879398, "grad_norm": 0.7456490993499756, "learning_rate": 4.904610492845787e-06, "loss": 0.0906, "step": 11200 }, { "epoch": 1.0547932418556893, "grad_norm": 1.1889469623565674, "learning_rate": 4.903675301599177e-06, "loss": 0.0873, "step": 11300 }, { "epoch": 1.0641276953234389, "grad_norm": 0.8040895462036133, "learning_rate": 4.902740110352567e-06, "loss": 0.0848, "step": 11400 }, { "epoch": 1.0734621487911882, "grad_norm": 0.5739530324935913, "learning_rate": 4.901804919105958e-06, "loss": 0.0912, "step": 11500 }, { "epoch": 1.0827966022589377, "grad_norm": 1.061485767364502, "learning_rate": 4.900869727859348e-06, "loss": 0.0846, "step": 11600 }, { "epoch": 1.0921310557266872, "grad_norm": 1.4351203441619873, "learning_rate": 4.899934536612738e-06, "loss": 0.0872, "step": 11700 }, { "epoch": 1.1014655091944368, "grad_norm": 0.9134341478347778, "learning_rate": 4.898999345366128e-06, "loss": 0.0915, "step": 11800 }, { "epoch": 1.110799962662186, "grad_norm": 1.5578075647354126, "learning_rate": 4.898064154119518e-06, "loss": 0.0879, "step": 11900 }, { "epoch": 1.1201344161299356, "grad_norm": 1.0672986507415771, "learning_rate": 4.897128962872908e-06, "loss": 0.0864, "step": 12000 }, { "epoch": 1.129468869597685, "grad_norm": 0.6705206036567688, "learning_rate": 4.896193771626298e-06, "loss": 0.0852, "step": 12100 }, { "epoch": 1.1388033230654346, "grad_norm": 0.6674344539642334, "learning_rate": 4.895258580379688e-06, "loss": 0.0812, "step": 12200 }, { "epoch": 1.148137776533184, "grad_norm": 0.9838173985481262, "learning_rate": 4.894323389133078e-06, "loss": 0.0911, "step": 12300 }, { "epoch": 1.1574722300009335, "grad_norm": 0.7195733189582825, "learning_rate": 4.893388197886468e-06, "loss": 0.0827, "step": 12400 }, { "epoch": 1.166806683468683, "grad_norm": 0.726023256778717, "learning_rate": 4.892453006639858e-06, "loss": 0.0835, "step": 12500 }, { "epoch": 1.1761411369364323, "grad_norm": 0.6076546311378479, "learning_rate": 4.891517815393248e-06, "loss": 0.0807, "step": 12600 }, { "epoch": 1.1854755904041818, "grad_norm": 0.8179360628128052, "learning_rate": 4.890582624146638e-06, "loss": 0.0911, "step": 12700 }, { "epoch": 1.1948100438719313, "grad_norm": 2.201394557952881, "learning_rate": 4.889647432900028e-06, "loss": 0.0863, "step": 12800 }, { "epoch": 1.2041444973396809, "grad_norm": 0.6127134561538696, "learning_rate": 4.888712241653418e-06, "loss": 0.0795, "step": 12900 }, { "epoch": 1.2134789508074302, "grad_norm": 0.5104560852050781, "learning_rate": 4.887777050406808e-06, "loss": 0.084, "step": 13000 }, { "epoch": 1.2228134042751797, "grad_norm": 1.0378248691558838, "learning_rate": 4.886841859160198e-06, "loss": 0.0852, "step": 13100 }, { "epoch": 1.2321478577429292, "grad_norm": 0.6261497735977173, "learning_rate": 4.885906667913588e-06, "loss": 0.0803, "step": 13200 }, { "epoch": 1.2414823112106785, "grad_norm": 1.0074316263198853, "learning_rate": 4.884971476666978e-06, "loss": 0.0865, "step": 13300 }, { "epoch": 1.250816764678428, "grad_norm": 0.92315673828125, "learning_rate": 4.8840362854203684e-06, "loss": 0.0843, "step": 13400 }, { "epoch": 1.2601512181461776, "grad_norm": 0.8196158409118652, "learning_rate": 4.8831010941737585e-06, "loss": 0.0899, "step": 13500 }, { "epoch": 1.2694856716139271, "grad_norm": 0.7994422912597656, "learning_rate": 4.882165902927149e-06, "loss": 0.0842, "step": 13600 }, { "epoch": 1.2788201250816764, "grad_norm": 0.9403783679008484, "learning_rate": 4.881230711680539e-06, "loss": 0.0925, "step": 13700 }, { "epoch": 1.288154578549426, "grad_norm": 1.7253316640853882, "learning_rate": 4.880295520433929e-06, "loss": 0.0828, "step": 13800 }, { "epoch": 1.2974890320171755, "grad_norm": 2.4280264377593994, "learning_rate": 4.8793603291873194e-06, "loss": 0.0857, "step": 13900 }, { "epoch": 1.3068234854849248, "grad_norm": 0.5727500319480896, "learning_rate": 4.8784251379407095e-06, "loss": 0.0823, "step": 14000 }, { "epoch": 1.3161579389526743, "grad_norm": 1.0649619102478027, "learning_rate": 4.8774899466940995e-06, "loss": 0.0807, "step": 14100 }, { "epoch": 1.3254923924204238, "grad_norm": 0.7996290326118469, "learning_rate": 4.8765547554474895e-06, "loss": 0.0859, "step": 14200 }, { "epoch": 1.3348268458881734, "grad_norm": 0.8078342080116272, "learning_rate": 4.8756195642008796e-06, "loss": 0.0828, "step": 14300 }, { "epoch": 1.3441612993559227, "grad_norm": 1.0784138441085815, "learning_rate": 4.87468437295427e-06, "loss": 0.0917, "step": 14400 }, { "epoch": 1.3534957528236722, "grad_norm": 0.4113708734512329, "learning_rate": 4.87374918170766e-06, "loss": 0.0826, "step": 14500 }, { "epoch": 1.3628302062914217, "grad_norm": 0.8154153823852539, "learning_rate": 4.87281399046105e-06, "loss": 0.083, "step": 14600 }, { "epoch": 1.372164659759171, "grad_norm": 0.9552295804023743, "learning_rate": 4.87187879921444e-06, "loss": 0.0872, "step": 14700 }, { "epoch": 1.3814991132269205, "grad_norm": 1.236308217048645, "learning_rate": 4.87094360796783e-06, "loss": 0.0806, "step": 14800 }, { "epoch": 1.39083356669467, "grad_norm": 1.0708487033843994, "learning_rate": 4.87000841672122e-06, "loss": 0.0853, "step": 14900 }, { "epoch": 1.4001680201624196, "grad_norm": 0.51058429479599, "learning_rate": 4.86907322547461e-06, "loss": 0.0832, "step": 15000 }, { "epoch": 1.4001680201624196, "eval_accuracy": 0.7088914819136523, "eval_f1": 0.8365035038727014, "eval_loss": 0.08669888973236084, "eval_roc_auc": 0.9151105707069954, "eval_runtime": 271.1755, "eval_samples_per_second": 158.016, "eval_steps_per_second": 158.016, "step": 15000 }, { "epoch": 1.409502473630169, "grad_norm": 0.6804799437522888, "learning_rate": 4.868138034228e-06, "loss": 0.084, "step": 15100 }, { "epoch": 1.4188369270979184, "grad_norm": 1.0619351863861084, "learning_rate": 4.86720284298139e-06, "loss": 0.0817, "step": 15200 }, { "epoch": 1.428171380565668, "grad_norm": 0.5485318303108215, "learning_rate": 4.86626765173478e-06, "loss": 0.0858, "step": 15300 }, { "epoch": 1.4375058340334173, "grad_norm": 0.7728336453437805, "learning_rate": 4.86533246048817e-06, "loss": 0.0861, "step": 15400 }, { "epoch": 1.4468402875011668, "grad_norm": 1.0971782207489014, "learning_rate": 4.86439726924156e-06, "loss": 0.0821, "step": 15500 }, { "epoch": 1.4561747409689163, "grad_norm": 0.9029784798622131, "learning_rate": 4.863462077994951e-06, "loss": 0.0877, "step": 15600 }, { "epoch": 1.4655091944366658, "grad_norm": 3.546658515930176, "learning_rate": 4.862526886748341e-06, "loss": 0.0812, "step": 15700 }, { "epoch": 1.4748436479044151, "grad_norm": 0.8030270934104919, "learning_rate": 4.861591695501731e-06, "loss": 0.0864, "step": 15800 }, { "epoch": 1.4841781013721647, "grad_norm": 0.666737973690033, "learning_rate": 4.860656504255121e-06, "loss": 0.0863, "step": 15900 }, { "epoch": 1.4935125548399142, "grad_norm": 0.8928977847099304, "learning_rate": 4.859721313008511e-06, "loss": 0.0811, "step": 16000 }, { "epoch": 1.5028470083076635, "grad_norm": 0.8195933699607849, "learning_rate": 4.858786121761901e-06, "loss": 0.0811, "step": 16100 }, { "epoch": 1.512181461775413, "grad_norm": 0.8287771940231323, "learning_rate": 4.857850930515291e-06, "loss": 0.0864, "step": 16200 }, { "epoch": 1.5215159152431625, "grad_norm": 0.7763432860374451, "learning_rate": 4.856915739268681e-06, "loss": 0.0824, "step": 16300 }, { "epoch": 1.530850368710912, "grad_norm": 1.631101131439209, "learning_rate": 4.855980548022071e-06, "loss": 0.0867, "step": 16400 }, { "epoch": 1.5401848221786616, "grad_norm": 0.8905367851257324, "learning_rate": 4.855045356775461e-06, "loss": 0.0827, "step": 16500 }, { "epoch": 1.549519275646411, "grad_norm": 0.8785068392753601, "learning_rate": 4.854110165528851e-06, "loss": 0.0859, "step": 16600 }, { "epoch": 1.5588537291141602, "grad_norm": 1.6522717475891113, "learning_rate": 4.853174974282241e-06, "loss": 0.0849, "step": 16700 }, { "epoch": 1.5681881825819097, "grad_norm": 1.2310458421707153, "learning_rate": 4.852239783035631e-06, "loss": 0.0828, "step": 16800 }, { "epoch": 1.5775226360496593, "grad_norm": 1.0474956035614014, "learning_rate": 4.851304591789021e-06, "loss": 0.0843, "step": 16900 }, { "epoch": 1.5868570895174088, "grad_norm": 1.1789672374725342, "learning_rate": 4.850369400542411e-06, "loss": 0.0854, "step": 17000 }, { "epoch": 1.5961915429851583, "grad_norm": 1.3143459558486938, "learning_rate": 4.849434209295801e-06, "loss": 0.0828, "step": 17100 }, { "epoch": 1.6055259964529078, "grad_norm": 0.9399304986000061, "learning_rate": 4.848499018049191e-06, "loss": 0.0878, "step": 17200 }, { "epoch": 1.6148604499206571, "grad_norm": 0.9758147597312927, "learning_rate": 4.847563826802581e-06, "loss": 0.0812, "step": 17300 }, { "epoch": 1.6241949033884067, "grad_norm": 1.2949702739715576, "learning_rate": 4.846628635555971e-06, "loss": 0.0811, "step": 17400 }, { "epoch": 1.633529356856156, "grad_norm": 0.4574110209941864, "learning_rate": 4.845693444309361e-06, "loss": 0.0799, "step": 17500 }, { "epoch": 1.6428638103239055, "grad_norm": 0.7923356890678406, "learning_rate": 4.8447582530627514e-06, "loss": 0.0795, "step": 17600 }, { "epoch": 1.652198263791655, "grad_norm": 0.8129155039787292, "learning_rate": 4.843823061816142e-06, "loss": 0.0869, "step": 17700 }, { "epoch": 1.6615327172594045, "grad_norm": 0.5633899569511414, "learning_rate": 4.842887870569532e-06, "loss": 0.0844, "step": 17800 }, { "epoch": 1.670867170727154, "grad_norm": 0.7545639872550964, "learning_rate": 4.841952679322922e-06, "loss": 0.0825, "step": 17900 }, { "epoch": 1.6802016241949034, "grad_norm": 1.4346576929092407, "learning_rate": 4.841017488076312e-06, "loss": 0.0875, "step": 18000 }, { "epoch": 1.689536077662653, "grad_norm": 0.3790111839771271, "learning_rate": 4.8400822968297024e-06, "loss": 0.0755, "step": 18100 }, { "epoch": 1.6988705311304022, "grad_norm": 0.769446611404419, "learning_rate": 4.839147105583092e-06, "loss": 0.0822, "step": 18200 }, { "epoch": 1.7082049845981517, "grad_norm": 1.104099154472351, "learning_rate": 4.838211914336482e-06, "loss": 0.0796, "step": 18300 }, { "epoch": 1.7175394380659013, "grad_norm": 1.1908490657806396, "learning_rate": 4.837276723089872e-06, "loss": 0.0804, "step": 18400 }, { "epoch": 1.7268738915336508, "grad_norm": 2.673532724380493, "learning_rate": 4.8363415318432625e-06, "loss": 0.0782, "step": 18500 }, { "epoch": 1.7362083450014003, "grad_norm": 0.7583387494087219, "learning_rate": 4.835406340596653e-06, "loss": 0.0792, "step": 18600 }, { "epoch": 1.7455427984691496, "grad_norm": 0.6538230776786804, "learning_rate": 4.834471149350043e-06, "loss": 0.0907, "step": 18700 }, { "epoch": 1.7548772519368991, "grad_norm": 0.8183677792549133, "learning_rate": 4.833535958103433e-06, "loss": 0.0852, "step": 18800 }, { "epoch": 1.7642117054046484, "grad_norm": 0.9957406520843506, "learning_rate": 4.832600766856823e-06, "loss": 0.0877, "step": 18900 }, { "epoch": 1.773546158872398, "grad_norm": 0.6792452335357666, "learning_rate": 4.831665575610213e-06, "loss": 0.084, "step": 19000 }, { "epoch": 1.7828806123401475, "grad_norm": 0.5733925700187683, "learning_rate": 4.830730384363603e-06, "loss": 0.0838, "step": 19100 }, { "epoch": 1.792215065807897, "grad_norm": 1.970196008682251, "learning_rate": 4.829795193116993e-06, "loss": 0.0837, "step": 19200 }, { "epoch": 1.8015495192756465, "grad_norm": 1.3127257823944092, "learning_rate": 4.828860001870383e-06, "loss": 0.079, "step": 19300 }, { "epoch": 1.8108839727433959, "grad_norm": 0.6472167372703552, "learning_rate": 4.827924810623773e-06, "loss": 0.0812, "step": 19400 }, { "epoch": 1.8202184262111454, "grad_norm": 1.152933120727539, "learning_rate": 4.826989619377163e-06, "loss": 0.0823, "step": 19500 }, { "epoch": 1.8295528796788947, "grad_norm": 0.6292151212692261, "learning_rate": 4.826054428130553e-06, "loss": 0.0817, "step": 19600 }, { "epoch": 1.8388873331466442, "grad_norm": 1.0838409662246704, "learning_rate": 4.825119236883944e-06, "loss": 0.0774, "step": 19700 }, { "epoch": 1.8482217866143937, "grad_norm": 0.4433206021785736, "learning_rate": 4.824184045637334e-06, "loss": 0.0884, "step": 19800 }, { "epoch": 1.8575562400821433, "grad_norm": 0.686724066734314, "learning_rate": 4.823248854390724e-06, "loss": 0.0814, "step": 19900 }, { "epoch": 1.8668906935498928, "grad_norm": 0.8488237857818604, "learning_rate": 4.822313663144113e-06, "loss": 0.0748, "step": 20000 }, { "epoch": 1.8668906935498928, "eval_accuracy": 0.7093582263710618, "eval_f1": 0.8400819558271982, "eval_loss": 0.08558176457881927, "eval_roc_auc": 0.9212553709772696, "eval_runtime": 315.4927, "eval_samples_per_second": 135.819, "eval_steps_per_second": 135.819, "step": 20000 }, { "epoch": 1.876225147017642, "grad_norm": 0.6829097270965576, "learning_rate": 4.821378471897503e-06, "loss": 0.0806, "step": 20100 }, { "epoch": 1.8855596004853916, "grad_norm": 0.9934221506118774, "learning_rate": 4.820443280650893e-06, "loss": 0.0845, "step": 20200 }, { "epoch": 1.894894053953141, "grad_norm": 0.631829023361206, "learning_rate": 4.819508089404283e-06, "loss": 0.083, "step": 20300 }, { "epoch": 1.9042285074208904, "grad_norm": 1.1934970617294312, "learning_rate": 4.818572898157673e-06, "loss": 0.0854, "step": 20400 }, { "epoch": 1.91356296088864, "grad_norm": 0.9370301365852356, "learning_rate": 4.817637706911063e-06, "loss": 0.0804, "step": 20500 }, { "epoch": 1.9228974143563895, "grad_norm": 0.7443779110908508, "learning_rate": 4.816702515664454e-06, "loss": 0.0824, "step": 20600 }, { "epoch": 1.932231867824139, "grad_norm": 0.7444396018981934, "learning_rate": 4.815767324417844e-06, "loss": 0.0796, "step": 20700 }, { "epoch": 1.9415663212918883, "grad_norm": 0.8452122807502747, "learning_rate": 4.814832133171234e-06, "loss": 0.0791, "step": 20800 }, { "epoch": 1.9509007747596379, "grad_norm": 0.8130319118499756, "learning_rate": 4.813896941924624e-06, "loss": 0.0809, "step": 20900 }, { "epoch": 1.9602352282273872, "grad_norm": 0.9655837416648865, "learning_rate": 4.812961750678014e-06, "loss": 0.081, "step": 21000 }, { "epoch": 1.9695696816951367, "grad_norm": 0.7041159272193909, "learning_rate": 4.812026559431404e-06, "loss": 0.0828, "step": 21100 }, { "epoch": 1.9789041351628862, "grad_norm": 0.7219372987747192, "learning_rate": 4.811091368184794e-06, "loss": 0.081, "step": 21200 }, { "epoch": 1.9882385886306357, "grad_norm": 0.8959358334541321, "learning_rate": 4.810156176938184e-06, "loss": 0.083, "step": 21300 }, { "epoch": 1.9975730420983853, "grad_norm": 0.8118568062782288, "learning_rate": 4.809220985691574e-06, "loss": 0.079, "step": 21400 }, { "epoch": 2.006907495566135, "grad_norm": 0.8533639907836914, "learning_rate": 4.808285794444964e-06, "loss": 0.082, "step": 21500 }, { "epoch": 2.016241949033884, "grad_norm": 1.2092796564102173, "learning_rate": 4.807350603198354e-06, "loss": 0.0781, "step": 21600 }, { "epoch": 2.0255764025016334, "grad_norm": 1.3276946544647217, "learning_rate": 4.806415411951744e-06, "loss": 0.0755, "step": 21700 }, { "epoch": 2.034910855969383, "grad_norm": 2.473252773284912, "learning_rate": 4.805480220705135e-06, "loss": 0.0782, "step": 21800 }, { "epoch": 2.0442453094371325, "grad_norm": 0.8211420774459839, "learning_rate": 4.8045450294585244e-06, "loss": 0.0773, "step": 21900 }, { "epoch": 2.053579762904882, "grad_norm": 1.0904179811477661, "learning_rate": 4.8036098382119145e-06, "loss": 0.0748, "step": 22000 }, { "epoch": 2.0629142163726315, "grad_norm": 0.7296389937400818, "learning_rate": 4.8026746469653045e-06, "loss": 0.0736, "step": 22100 }, { "epoch": 2.072248669840381, "grad_norm": 0.8017075061798096, "learning_rate": 4.8017394557186945e-06, "loss": 0.0732, "step": 22200 }, { "epoch": 2.08158312330813, "grad_norm": 0.6875936985015869, "learning_rate": 4.8008042644720846e-06, "loss": 0.0779, "step": 22300 }, { "epoch": 2.0909175767758796, "grad_norm": 1.8142985105514526, "learning_rate": 4.799869073225475e-06, "loss": 0.0809, "step": 22400 }, { "epoch": 2.100252030243629, "grad_norm": 1.1371846199035645, "learning_rate": 4.798933881978865e-06, "loss": 0.0783, "step": 22500 }, { "epoch": 2.1095864837113787, "grad_norm": 1.2423408031463623, "learning_rate": 4.7979986907322555e-06, "loss": 0.076, "step": 22600 }, { "epoch": 2.118920937179128, "grad_norm": 0.7879576683044434, "learning_rate": 4.7970634994856455e-06, "loss": 0.074, "step": 22700 }, { "epoch": 2.1282553906468777, "grad_norm": 7.567473411560059, "learning_rate": 4.7961283082390356e-06, "loss": 0.0798, "step": 22800 }, { "epoch": 2.1375898441146273, "grad_norm": 1.124089241027832, "learning_rate": 4.795193116992426e-06, "loss": 0.0735, "step": 22900 }, { "epoch": 2.1469242975823764, "grad_norm": 0.6740363836288452, "learning_rate": 4.794257925745816e-06, "loss": 0.067, "step": 23000 }, { "epoch": 2.156258751050126, "grad_norm": 1.2070947885513306, "learning_rate": 4.793322734499206e-06, "loss": 0.0688, "step": 23100 }, { "epoch": 2.1655932045178754, "grad_norm": 0.8228589296340942, "learning_rate": 4.792387543252596e-06, "loss": 0.0798, "step": 23200 }, { "epoch": 2.174927657985625, "grad_norm": 0.6020055413246155, "learning_rate": 4.791452352005986e-06, "loss": 0.0734, "step": 23300 }, { "epoch": 2.1842621114533745, "grad_norm": 1.4077461957931519, "learning_rate": 4.790517160759376e-06, "loss": 0.0741, "step": 23400 }, { "epoch": 2.193596564921124, "grad_norm": 0.8479403257369995, "learning_rate": 4.789581969512766e-06, "loss": 0.0787, "step": 23500 }, { "epoch": 2.2029310183888735, "grad_norm": 0.6592878699302673, "learning_rate": 4.788646778266156e-06, "loss": 0.0745, "step": 23600 }, { "epoch": 2.2122654718566226, "grad_norm": 1.8098366260528564, "learning_rate": 4.787711587019546e-06, "loss": 0.0811, "step": 23700 }, { "epoch": 2.221599925324372, "grad_norm": 0.7827460765838623, "learning_rate": 4.786776395772936e-06, "loss": 0.0817, "step": 23800 }, { "epoch": 2.2309343787921216, "grad_norm": 0.7363933324813843, "learning_rate": 4.785841204526326e-06, "loss": 0.0766, "step": 23900 }, { "epoch": 2.240268832259871, "grad_norm": 0.9372043013572693, "learning_rate": 4.784906013279716e-06, "loss": 0.0788, "step": 24000 }, { "epoch": 2.2496032857276207, "grad_norm": 0.6170584559440613, "learning_rate": 4.783970822033106e-06, "loss": 0.074, "step": 24100 }, { "epoch": 2.25893773919537, "grad_norm": 0.3955153524875641, "learning_rate": 4.783035630786496e-06, "loss": 0.0754, "step": 24200 }, { "epoch": 2.2682721926631197, "grad_norm": 0.9319208264350891, "learning_rate": 4.782100439539886e-06, "loss": 0.0723, "step": 24300 }, { "epoch": 2.2776066461308693, "grad_norm": 0.7785887718200684, "learning_rate": 4.781165248293276e-06, "loss": 0.0746, "step": 24400 }, { "epoch": 2.2869410995986184, "grad_norm": 0.6844919919967651, "learning_rate": 4.780230057046666e-06, "loss": 0.0702, "step": 24500 }, { "epoch": 2.296275553066368, "grad_norm": 0.4508419334888458, "learning_rate": 4.779294865800056e-06, "loss": 0.0805, "step": 24600 }, { "epoch": 2.3056100065341174, "grad_norm": 0.8115683197975159, "learning_rate": 4.778359674553447e-06, "loss": 0.0784, "step": 24700 }, { "epoch": 2.314944460001867, "grad_norm": 1.2865110635757446, "learning_rate": 4.777424483306837e-06, "loss": 0.077, "step": 24800 }, { "epoch": 2.3242789134696165, "grad_norm": 0.6021707653999329, "learning_rate": 4.776489292060227e-06, "loss": 0.081, "step": 24900 }, { "epoch": 2.333613366937366, "grad_norm": 0.5476040840148926, "learning_rate": 4.775554100813617e-06, "loss": 0.0775, "step": 25000 }, { "epoch": 2.333613366937366, "eval_accuracy": 0.7244107351225204, "eval_f1": 0.8412339563161451, "eval_loss": 0.08265367895364761, "eval_roc_auc": 0.9088867380974313, "eval_runtime": 370.2691, "eval_samples_per_second": 115.727, "eval_steps_per_second": 115.727, "step": 25000 }, { "epoch": 2.342947820405115, "grad_norm": 0.41831299662590027, "learning_rate": 4.774618909567007e-06, "loss": 0.0732, "step": 25100 }, { "epoch": 2.3522822738728646, "grad_norm": 1.161587119102478, "learning_rate": 4.773683718320397e-06, "loss": 0.0736, "step": 25200 }, { "epoch": 2.361616727340614, "grad_norm": 1.1184415817260742, "learning_rate": 4.772748527073787e-06, "loss": 0.0763, "step": 25300 }, { "epoch": 2.3709511808083636, "grad_norm": 0.3816167414188385, "learning_rate": 4.771813335827177e-06, "loss": 0.0781, "step": 25400 }, { "epoch": 2.380285634276113, "grad_norm": 1.3536192178726196, "learning_rate": 4.770878144580567e-06, "loss": 0.0775, "step": 25500 }, { "epoch": 2.3896200877438627, "grad_norm": 0.5921593904495239, "learning_rate": 4.769942953333957e-06, "loss": 0.0772, "step": 25600 }, { "epoch": 2.3989545412116122, "grad_norm": 0.8628026247024536, "learning_rate": 4.769007762087347e-06, "loss": 0.0814, "step": 25700 }, { "epoch": 2.4082889946793617, "grad_norm": 0.8849796652793884, "learning_rate": 4.768072570840737e-06, "loss": 0.082, "step": 25800 }, { "epoch": 2.417623448147111, "grad_norm": 0.909490704536438, "learning_rate": 4.767137379594127e-06, "loss": 0.0746, "step": 25900 }, { "epoch": 2.4269579016148604, "grad_norm": 0.8026836514472961, "learning_rate": 4.766202188347517e-06, "loss": 0.0755, "step": 26000 }, { "epoch": 2.43629235508261, "grad_norm": 0.5954301953315735, "learning_rate": 4.765266997100907e-06, "loss": 0.0773, "step": 26100 }, { "epoch": 2.4456268085503594, "grad_norm": 0.856101393699646, "learning_rate": 4.7643318058542974e-06, "loss": 0.0749, "step": 26200 }, { "epoch": 2.454961262018109, "grad_norm": 0.8656226992607117, "learning_rate": 4.7633966146076875e-06, "loss": 0.0737, "step": 26300 }, { "epoch": 2.4642957154858585, "grad_norm": 0.6958594918251038, "learning_rate": 4.7624614233610775e-06, "loss": 0.0799, "step": 26400 }, { "epoch": 2.4736301689536075, "grad_norm": 0.6429681777954102, "learning_rate": 4.7615262321144675e-06, "loss": 0.0776, "step": 26500 }, { "epoch": 2.482964622421357, "grad_norm": 0.9182838201522827, "learning_rate": 4.7605910408678576e-06, "loss": 0.0807, "step": 26600 }, { "epoch": 2.4922990758891066, "grad_norm": 1.1440132856369019, "learning_rate": 4.7596558496212484e-06, "loss": 0.0736, "step": 26700 }, { "epoch": 2.501633529356856, "grad_norm": 0.5545814633369446, "learning_rate": 4.7587206583746385e-06, "loss": 0.0771, "step": 26800 }, { "epoch": 2.5109679828246056, "grad_norm": 0.6440008878707886, "learning_rate": 4.7577854671280285e-06, "loss": 0.0751, "step": 26900 }, { "epoch": 2.520302436292355, "grad_norm": 0.394196480512619, "learning_rate": 4.7568502758814185e-06, "loss": 0.0723, "step": 27000 }, { "epoch": 2.5296368897601047, "grad_norm": 1.937624454498291, "learning_rate": 4.7559150846348086e-06, "loss": 0.0748, "step": 27100 }, { "epoch": 2.5389713432278542, "grad_norm": 1.5011743307113647, "learning_rate": 4.754979893388199e-06, "loss": 0.0751, "step": 27200 }, { "epoch": 2.5483057966956033, "grad_norm": 1.1376004219055176, "learning_rate": 4.754044702141588e-06, "loss": 0.0751, "step": 27300 }, { "epoch": 2.557640250163353, "grad_norm": 0.9133825302124023, "learning_rate": 4.753109510894978e-06, "loss": 0.0792, "step": 27400 }, { "epoch": 2.5669747036311024, "grad_norm": 0.7915837168693542, "learning_rate": 4.752174319648369e-06, "loss": 0.0786, "step": 27500 }, { "epoch": 2.576309157098852, "grad_norm": 0.8544344305992126, "learning_rate": 4.751239128401759e-06, "loss": 0.0749, "step": 27600 }, { "epoch": 2.5856436105666014, "grad_norm": 0.6572892665863037, "learning_rate": 4.750303937155149e-06, "loss": 0.0751, "step": 27700 }, { "epoch": 2.594978064034351, "grad_norm": 1.10152006149292, "learning_rate": 4.749368745908539e-06, "loss": 0.0805, "step": 27800 }, { "epoch": 2.6043125175021, "grad_norm": 0.4928382635116577, "learning_rate": 4.748433554661929e-06, "loss": 0.0727, "step": 27900 }, { "epoch": 2.6136469709698495, "grad_norm": 2.0755615234375, "learning_rate": 4.747498363415319e-06, "loss": 0.0702, "step": 28000 }, { "epoch": 2.622981424437599, "grad_norm": 1.2826941013336182, "learning_rate": 4.746563172168709e-06, "loss": 0.0738, "step": 28100 }, { "epoch": 2.6323158779053486, "grad_norm": 2.7509264945983887, "learning_rate": 4.745627980922099e-06, "loss": 0.078, "step": 28200 }, { "epoch": 2.641650331373098, "grad_norm": 0.5564242005348206, "learning_rate": 4.744692789675489e-06, "loss": 0.0732, "step": 28300 }, { "epoch": 2.6509847848408477, "grad_norm": 1.0438953638076782, "learning_rate": 4.743757598428879e-06, "loss": 0.0777, "step": 28400 }, { "epoch": 2.660319238308597, "grad_norm": 1.88390052318573, "learning_rate": 4.742822407182269e-06, "loss": 0.0808, "step": 28500 }, { "epoch": 2.6696536917763467, "grad_norm": 0.5230315327644348, "learning_rate": 4.741887215935659e-06, "loss": 0.0795, "step": 28600 }, { "epoch": 2.6789881452440962, "grad_norm": 0.8375095129013062, "learning_rate": 4.740952024689049e-06, "loss": 0.0756, "step": 28700 }, { "epoch": 2.6883225987118453, "grad_norm": 1.0672707557678223, "learning_rate": 4.74001683344244e-06, "loss": 0.0769, "step": 28800 }, { "epoch": 2.697657052179595, "grad_norm": 0.574353814125061, "learning_rate": 4.73908164219583e-06, "loss": 0.0758, "step": 28900 }, { "epoch": 2.7069915056473444, "grad_norm": 1.2673457860946655, "learning_rate": 4.73814645094922e-06, "loss": 0.0758, "step": 29000 }, { "epoch": 2.716325959115094, "grad_norm": 0.8491700887680054, "learning_rate": 4.737211259702609e-06, "loss": 0.0709, "step": 29100 }, { "epoch": 2.7256604125828434, "grad_norm": 0.37486106157302856, "learning_rate": 4.736276068455999e-06, "loss": 0.0725, "step": 29200 }, { "epoch": 2.7349948660505925, "grad_norm": 1.0346736907958984, "learning_rate": 4.735340877209389e-06, "loss": 0.0769, "step": 29300 }, { "epoch": 2.744329319518342, "grad_norm": 1.254014253616333, "learning_rate": 4.734405685962779e-06, "loss": 0.0695, "step": 29400 }, { "epoch": 2.7536637729860916, "grad_norm": 1.1030189990997314, "learning_rate": 4.733470494716169e-06, "loss": 0.0775, "step": 29500 }, { "epoch": 2.762998226453841, "grad_norm": 1.2111154794692993, "learning_rate": 4.73253530346956e-06, "loss": 0.0761, "step": 29600 }, { "epoch": 2.7723326799215906, "grad_norm": 0.580629289150238, "learning_rate": 4.73160011222295e-06, "loss": 0.0762, "step": 29700 }, { "epoch": 2.78166713338934, "grad_norm": 0.5934848189353943, "learning_rate": 4.73066492097634e-06, "loss": 0.0795, "step": 29800 }, { "epoch": 2.7910015868570897, "grad_norm": 0.7956952452659607, "learning_rate": 4.72972972972973e-06, "loss": 0.0751, "step": 29900 }, { "epoch": 2.800336040324839, "grad_norm": 1.387649655342102, "learning_rate": 4.72879453848312e-06, "loss": 0.0785, "step": 30000 }, { "epoch": 2.800336040324839, "eval_accuracy": 0.7181796966161027, "eval_f1": 0.8428111918924873, "eval_loss": 0.08312787115573883, "eval_roc_auc": 0.917464383955, "eval_runtime": 442.2945, "eval_samples_per_second": 96.881, "eval_steps_per_second": 96.881, "step": 30000 }, { "epoch": 2.8096704937925887, "grad_norm": 0.6785910725593567, "learning_rate": 4.72785934723651e-06, "loss": 0.0788, "step": 30100 }, { "epoch": 2.819004947260338, "grad_norm": 1.253549337387085, "learning_rate": 4.7269241559899e-06, "loss": 0.0822, "step": 30200 }, { "epoch": 2.8283394007280873, "grad_norm": 0.6282103657722473, "learning_rate": 4.72598896474329e-06, "loss": 0.0697, "step": 30300 }, { "epoch": 2.837673854195837, "grad_norm": 0.44414961338043213, "learning_rate": 4.7250537734966804e-06, "loss": 0.0739, "step": 30400 }, { "epoch": 2.8470083076635864, "grad_norm": 0.7630642652511597, "learning_rate": 4.7241185822500705e-06, "loss": 0.0708, "step": 30500 }, { "epoch": 2.856342761131336, "grad_norm": 1.1691311597824097, "learning_rate": 4.7231833910034605e-06, "loss": 0.0752, "step": 30600 }, { "epoch": 2.865677214599085, "grad_norm": 1.4165804386138916, "learning_rate": 4.7222481997568505e-06, "loss": 0.0779, "step": 30700 }, { "epoch": 2.8750116680668345, "grad_norm": 0.654381513595581, "learning_rate": 4.721313008510241e-06, "loss": 0.076, "step": 30800 }, { "epoch": 2.884346121534584, "grad_norm": 1.0067996978759766, "learning_rate": 4.720377817263631e-06, "loss": 0.0785, "step": 30900 }, { "epoch": 2.8936805750023336, "grad_norm": 0.6855391263961792, "learning_rate": 4.719442626017021e-06, "loss": 0.0771, "step": 31000 }, { "epoch": 2.903015028470083, "grad_norm": 0.44642287492752075, "learning_rate": 4.718507434770411e-06, "loss": 0.072, "step": 31100 }, { "epoch": 2.9123494819378326, "grad_norm": 0.8298773169517517, "learning_rate": 4.717572243523801e-06, "loss": 0.0758, "step": 31200 }, { "epoch": 2.921683935405582, "grad_norm": 1.2116279602050781, "learning_rate": 4.716637052277191e-06, "loss": 0.0811, "step": 31300 }, { "epoch": 2.9310183888733317, "grad_norm": 0.7975913286209106, "learning_rate": 4.715701861030581e-06, "loss": 0.069, "step": 31400 }, { "epoch": 2.940352842341081, "grad_norm": 0.515428900718689, "learning_rate": 4.714766669783971e-06, "loss": 0.0692, "step": 31500 }, { "epoch": 2.9496872958088303, "grad_norm": 0.8248312473297119, "learning_rate": 4.713831478537362e-06, "loss": 0.0691, "step": 31600 }, { "epoch": 2.95902174927658, "grad_norm": 0.7041317820549011, "learning_rate": 4.712896287290752e-06, "loss": 0.0785, "step": 31700 }, { "epoch": 2.9683562027443293, "grad_norm": 0.7298337817192078, "learning_rate": 4.711961096044142e-06, "loss": 0.0772, "step": 31800 }, { "epoch": 2.977690656212079, "grad_norm": 0.5263153910636902, "learning_rate": 4.711025904797532e-06, "loss": 0.0776, "step": 31900 }, { "epoch": 2.9870251096798284, "grad_norm": 0.38115614652633667, "learning_rate": 4.710090713550922e-06, "loss": 0.0718, "step": 32000 }, { "epoch": 2.9963595631475775, "grad_norm": 0.6701536774635315, "learning_rate": 4.709155522304312e-06, "loss": 0.0736, "step": 32100 }, { "epoch": 3.005694016615327, "grad_norm": 0.6095858812332153, "learning_rate": 4.708220331057702e-06, "loss": 0.0755, "step": 32200 }, { "epoch": 3.0150284700830765, "grad_norm": 0.6592550277709961, "learning_rate": 4.707285139811092e-06, "loss": 0.0703, "step": 32300 }, { "epoch": 3.024362923550826, "grad_norm": 1.3717174530029297, "learning_rate": 4.706349948564482e-06, "loss": 0.0755, "step": 32400 }, { "epoch": 3.0336973770185756, "grad_norm": 1.3234035968780518, "learning_rate": 4.705414757317872e-06, "loss": 0.0743, "step": 32500 }, { "epoch": 3.043031830486325, "grad_norm": 1.0745387077331543, "learning_rate": 4.704479566071262e-06, "loss": 0.0702, "step": 32600 }, { "epoch": 3.0523662839540746, "grad_norm": 0.9502926468849182, "learning_rate": 4.703544374824652e-06, "loss": 0.0719, "step": 32700 }, { "epoch": 3.061700737421824, "grad_norm": 1.9984395503997803, "learning_rate": 4.702609183578042e-06, "loss": 0.0697, "step": 32800 }, { "epoch": 3.071035190889573, "grad_norm": 1.064260721206665, "learning_rate": 4.701673992331432e-06, "loss": 0.0703, "step": 32900 }, { "epoch": 3.0803696443573227, "grad_norm": 0.8611910343170166, "learning_rate": 4.700738801084822e-06, "loss": 0.0629, "step": 33000 }, { "epoch": 3.0897040978250723, "grad_norm": 0.8101058602333069, "learning_rate": 4.699803609838212e-06, "loss": 0.0732, "step": 33100 }, { "epoch": 3.099038551292822, "grad_norm": 0.44057029485702515, "learning_rate": 4.698868418591602e-06, "loss": 0.0674, "step": 33200 }, { "epoch": 3.1083730047605713, "grad_norm": 1.468570351600647, "learning_rate": 4.697933227344992e-06, "loss": 0.0708, "step": 33300 }, { "epoch": 3.117707458228321, "grad_norm": 1.4405447244644165, "learning_rate": 4.696998036098382e-06, "loss": 0.067, "step": 33400 }, { "epoch": 3.1270419116960704, "grad_norm": 0.836079478263855, "learning_rate": 4.696062844851772e-06, "loss": 0.0729, "step": 33500 }, { "epoch": 3.1363763651638195, "grad_norm": 0.8641823530197144, "learning_rate": 4.695127653605162e-06, "loss": 0.0709, "step": 33600 }, { "epoch": 3.145710818631569, "grad_norm": 0.6719909310340881, "learning_rate": 4.694192462358553e-06, "loss": 0.0706, "step": 33700 }, { "epoch": 3.1550452720993185, "grad_norm": 0.8492844700813293, "learning_rate": 4.693257271111943e-06, "loss": 0.072, "step": 33800 }, { "epoch": 3.164379725567068, "grad_norm": 0.6883953809738159, "learning_rate": 4.692322079865333e-06, "loss": 0.0676, "step": 33900 }, { "epoch": 3.1737141790348176, "grad_norm": 0.8814612030982971, "learning_rate": 4.691386888618723e-06, "loss": 0.075, "step": 34000 }, { "epoch": 3.183048632502567, "grad_norm": 0.7401642203330994, "learning_rate": 4.690451697372113e-06, "loss": 0.0711, "step": 34100 }, { "epoch": 3.1923830859703166, "grad_norm": 1.0256643295288086, "learning_rate": 4.689516506125503e-06, "loss": 0.0682, "step": 34200 }, { "epoch": 3.2017175394380657, "grad_norm": 0.5240198969841003, "learning_rate": 4.688581314878893e-06, "loss": 0.0687, "step": 34300 }, { "epoch": 3.2110519929058152, "grad_norm": 0.46370792388916016, "learning_rate": 4.687646123632283e-06, "loss": 0.0664, "step": 34400 }, { "epoch": 3.2203864463735647, "grad_norm": 0.9029916524887085, "learning_rate": 4.686710932385673e-06, "loss": 0.065, "step": 34500 }, { "epoch": 3.2297208998413143, "grad_norm": 0.724449872970581, "learning_rate": 4.685775741139063e-06, "loss": 0.0774, "step": 34600 }, { "epoch": 3.239055353309064, "grad_norm": 0.6227043867111206, "learning_rate": 4.6848405498924534e-06, "loss": 0.0685, "step": 34700 }, { "epoch": 3.2483898067768133, "grad_norm": 0.639691948890686, "learning_rate": 4.6839053586458435e-06, "loss": 0.0689, "step": 34800 }, { "epoch": 3.257724260244563, "grad_norm": 0.8001283407211304, "learning_rate": 4.6829701673992335e-06, "loss": 0.0685, "step": 34900 }, { "epoch": 3.267058713712312, "grad_norm": 0.896821916103363, "learning_rate": 4.6820349761526235e-06, "loss": 0.0711, "step": 35000 }, { "epoch": 3.267058713712312, "eval_accuracy": 0.7213768961493582, "eval_f1": 0.8424729800420261, "eval_loss": 0.084145687520504, "eval_roc_auc": 0.9151110310858371, "eval_runtime": 421.3949, "eval_samples_per_second": 101.686, "eval_steps_per_second": 101.686, "step": 35000 }, { "epoch": 3.2763931671800615, "grad_norm": 0.9499011039733887, "learning_rate": 4.6810997849060136e-06, "loss": 0.0684, "step": 35100 }, { "epoch": 3.285727620647811, "grad_norm": 0.6036292910575867, "learning_rate": 4.680164593659404e-06, "loss": 0.0658, "step": 35200 }, { "epoch": 3.2950620741155605, "grad_norm": 1.0212057828903198, "learning_rate": 4.679229402412794e-06, "loss": 0.0668, "step": 35300 }, { "epoch": 3.30439652758331, "grad_norm": 2.832141399383545, "learning_rate": 4.678294211166184e-06, "loss": 0.0693, "step": 35400 }, { "epoch": 3.3137309810510596, "grad_norm": 0.6937221884727478, "learning_rate": 4.677359019919574e-06, "loss": 0.073, "step": 35500 }, { "epoch": 3.323065434518809, "grad_norm": 0.7512823343276978, "learning_rate": 4.676423828672964e-06, "loss": 0.072, "step": 35600 }, { "epoch": 3.3323998879865586, "grad_norm": 0.630892276763916, "learning_rate": 4.675488637426354e-06, "loss": 0.0701, "step": 35700 }, { "epoch": 3.3417343414543077, "grad_norm": 1.6426749229431152, "learning_rate": 4.674553446179745e-06, "loss": 0.072, "step": 35800 }, { "epoch": 3.3510687949220572, "grad_norm": 0.7343888282775879, "learning_rate": 4.673618254933135e-06, "loss": 0.0697, "step": 35900 }, { "epoch": 3.3604032483898068, "grad_norm": 0.716952383518219, "learning_rate": 4.672683063686525e-06, "loss": 0.0674, "step": 36000 }, { "epoch": 3.3697377018575563, "grad_norm": 1.3071855306625366, "learning_rate": 4.671747872439915e-06, "loss": 0.0672, "step": 36100 }, { "epoch": 3.379072155325306, "grad_norm": 0.9695940613746643, "learning_rate": 4.670812681193305e-06, "loss": 0.0723, "step": 36200 }, { "epoch": 3.3884066087930553, "grad_norm": 1.4347296953201294, "learning_rate": 4.669877489946694e-06, "loss": 0.0749, "step": 36300 }, { "epoch": 3.3977410622608044, "grad_norm": 1.7911466360092163, "learning_rate": 4.668942298700084e-06, "loss": 0.0691, "step": 36400 }, { "epoch": 3.407075515728554, "grad_norm": 0.6902058720588684, "learning_rate": 4.668007107453474e-06, "loss": 0.0703, "step": 36500 }, { "epoch": 3.4164099691963035, "grad_norm": 1.0473436117172241, "learning_rate": 4.667071916206865e-06, "loss": 0.0686, "step": 36600 }, { "epoch": 3.425744422664053, "grad_norm": 0.770994246006012, "learning_rate": 4.666136724960255e-06, "loss": 0.0665, "step": 36700 }, { "epoch": 3.4350788761318025, "grad_norm": 0.7791262269020081, "learning_rate": 4.665201533713645e-06, "loss": 0.0718, "step": 36800 }, { "epoch": 3.444413329599552, "grad_norm": 0.6905605792999268, "learning_rate": 4.664266342467035e-06, "loss": 0.0685, "step": 36900 }, { "epoch": 3.4537477830673016, "grad_norm": 0.7250871658325195, "learning_rate": 4.663331151220425e-06, "loss": 0.0752, "step": 37000 }, { "epoch": 3.463082236535051, "grad_norm": 4.7371368408203125, "learning_rate": 4.662395959973815e-06, "loss": 0.0696, "step": 37100 }, { "epoch": 3.4724166900028, "grad_norm": 0.5657176375389099, "learning_rate": 4.661460768727205e-06, "loss": 0.0713, "step": 37200 }, { "epoch": 3.4817511434705497, "grad_norm": 1.0386663675308228, "learning_rate": 4.660525577480595e-06, "loss": 0.0702, "step": 37300 }, { "epoch": 3.4910855969382992, "grad_norm": 1.4617897272109985, "learning_rate": 4.659590386233985e-06, "loss": 0.073, "step": 37400 }, { "epoch": 3.5004200504060488, "grad_norm": 0.7392893433570862, "learning_rate": 4.658655194987375e-06, "loss": 0.0722, "step": 37500 }, { "epoch": 3.5097545038737983, "grad_norm": 0.540181040763855, "learning_rate": 4.657720003740765e-06, "loss": 0.0677, "step": 37600 }, { "epoch": 3.519088957341548, "grad_norm": 0.616308867931366, "learning_rate": 4.656784812494155e-06, "loss": 0.0703, "step": 37700 }, { "epoch": 3.528423410809297, "grad_norm": 0.8486816883087158, "learning_rate": 4.655849621247546e-06, "loss": 0.0709, "step": 37800 }, { "epoch": 3.5377578642770464, "grad_norm": 0.48479345440864563, "learning_rate": 4.654914430000936e-06, "loss": 0.068, "step": 37900 }, { "epoch": 3.547092317744796, "grad_norm": 0.7945524454116821, "learning_rate": 4.653979238754326e-06, "loss": 0.0673, "step": 38000 }, { "epoch": 3.5564267712125455, "grad_norm": 0.8818780183792114, "learning_rate": 4.653044047507715e-06, "loss": 0.0699, "step": 38100 }, { "epoch": 3.565761224680295, "grad_norm": 0.8894991278648376, "learning_rate": 4.652108856261105e-06, "loss": 0.0742, "step": 38200 }, { "epoch": 3.5750956781480445, "grad_norm": 1.2130554914474487, "learning_rate": 4.651173665014495e-06, "loss": 0.0718, "step": 38300 }, { "epoch": 3.584430131615794, "grad_norm": 1.1891839504241943, "learning_rate": 4.650238473767885e-06, "loss": 0.0665, "step": 38400 }, { "epoch": 3.5937645850835436, "grad_norm": 1.1907507181167603, "learning_rate": 4.6493032825212755e-06, "loss": 0.0713, "step": 38500 }, { "epoch": 3.603099038551293, "grad_norm": 0.5847403407096863, "learning_rate": 4.648368091274666e-06, "loss": 0.0691, "step": 38600 }, { "epoch": 3.612433492019042, "grad_norm": 0.5308549404144287, "learning_rate": 4.647432900028056e-06, "loss": 0.0689, "step": 38700 }, { "epoch": 3.6217679454867917, "grad_norm": 0.4569282531738281, "learning_rate": 4.646497708781446e-06, "loss": 0.0705, "step": 38800 }, { "epoch": 3.6311023989545412, "grad_norm": 0.6113088726997375, "learning_rate": 4.645562517534836e-06, "loss": 0.0654, "step": 38900 }, { "epoch": 3.6404368524222908, "grad_norm": 1.2847880125045776, "learning_rate": 4.6446273262882265e-06, "loss": 0.0648, "step": 39000 }, { "epoch": 3.6497713058900403, "grad_norm": 1.0133851766586304, "learning_rate": 4.6436921350416165e-06, "loss": 0.0734, "step": 39100 }, { "epoch": 3.6591057593577894, "grad_norm": 0.5364477634429932, "learning_rate": 4.6427569437950065e-06, "loss": 0.0703, "step": 39200 }, { "epoch": 3.668440212825539, "grad_norm": 2.5909812450408936, "learning_rate": 4.6418217525483965e-06, "loss": 0.0751, "step": 39300 }, { "epoch": 3.6777746662932884, "grad_norm": 0.618204653263092, "learning_rate": 4.6408865613017866e-06, "loss": 0.074, "step": 39400 }, { "epoch": 3.687109119761038, "grad_norm": 1.0533266067504883, "learning_rate": 4.639951370055177e-06, "loss": 0.0704, "step": 39500 }, { "epoch": 3.6964435732287875, "grad_norm": 0.771244466304779, "learning_rate": 4.639016178808567e-06, "loss": 0.0736, "step": 39600 }, { "epoch": 3.705778026696537, "grad_norm": 0.4775882661342621, "learning_rate": 4.638080987561957e-06, "loss": 0.0718, "step": 39700 }, { "epoch": 3.7151124801642865, "grad_norm": 0.7274486422538757, "learning_rate": 4.637145796315347e-06, "loss": 0.0723, "step": 39800 }, { "epoch": 3.724446933632036, "grad_norm": 0.7875614762306213, "learning_rate": 4.636210605068738e-06, "loss": 0.0736, "step": 39900 }, { "epoch": 3.7337813870997856, "grad_norm": 0.4791102111339569, "learning_rate": 4.635275413822127e-06, "loss": 0.0705, "step": 40000 }, { "epoch": 3.7337813870997856, "eval_accuracy": 0.7262777129521587, "eval_f1": 0.8435394814430963, "eval_loss": 0.08349325507879257, "eval_roc_auc": 0.9130066639755477, "eval_runtime": 351.6117, "eval_samples_per_second": 121.867, "eval_steps_per_second": 121.867, "step": 40000 }, { "epoch": 3.7431158405675347, "grad_norm": 0.8582549095153809, "learning_rate": 4.634340222575517e-06, "loss": 0.0729, "step": 40100 }, { "epoch": 3.752450294035284, "grad_norm": 0.8719303011894226, "learning_rate": 4.633405031328907e-06, "loss": 0.0723, "step": 40200 }, { "epoch": 3.7617847475030337, "grad_norm": 0.8843119144439697, "learning_rate": 4.632469840082297e-06, "loss": 0.0705, "step": 40300 }, { "epoch": 3.7711192009707832, "grad_norm": 0.8851020336151123, "learning_rate": 4.631534648835687e-06, "loss": 0.0773, "step": 40400 }, { "epoch": 3.7804536544385328, "grad_norm": 0.9045310616493225, "learning_rate": 4.630599457589077e-06, "loss": 0.0741, "step": 40500 }, { "epoch": 3.789788107906282, "grad_norm": 1.0240435600280762, "learning_rate": 4.629664266342467e-06, "loss": 0.0753, "step": 40600 }, { "epoch": 3.7991225613740314, "grad_norm": 1.1240394115447998, "learning_rate": 4.628729075095858e-06, "loss": 0.0733, "step": 40700 }, { "epoch": 3.808457014841781, "grad_norm": 0.8633688688278198, "learning_rate": 4.627793883849248e-06, "loss": 0.0755, "step": 40800 }, { "epoch": 3.8177914683095304, "grad_norm": 1.0664052963256836, "learning_rate": 4.626858692602638e-06, "loss": 0.0636, "step": 40900 }, { "epoch": 3.82712592177728, "grad_norm": 0.6548974514007568, "learning_rate": 4.625923501356028e-06, "loss": 0.0707, "step": 41000 }, { "epoch": 3.8364603752450295, "grad_norm": 0.8380701541900635, "learning_rate": 4.624988310109418e-06, "loss": 0.0667, "step": 41100 }, { "epoch": 3.845794828712779, "grad_norm": 1.0225739479064941, "learning_rate": 4.624053118862808e-06, "loss": 0.0688, "step": 41200 }, { "epoch": 3.8551292821805285, "grad_norm": 1.0608185529708862, "learning_rate": 4.623117927616198e-06, "loss": 0.0672, "step": 41300 }, { "epoch": 3.864463735648278, "grad_norm": 0.8617438077926636, "learning_rate": 4.622182736369588e-06, "loss": 0.0643, "step": 41400 }, { "epoch": 3.873798189116027, "grad_norm": 1.829993724822998, "learning_rate": 4.621247545122978e-06, "loss": 0.0712, "step": 41500 }, { "epoch": 3.8831326425837767, "grad_norm": 0.9006952047348022, "learning_rate": 4.620312353876368e-06, "loss": 0.0682, "step": 41600 }, { "epoch": 3.892467096051526, "grad_norm": 0.9497913718223572, "learning_rate": 4.619377162629758e-06, "loss": 0.0673, "step": 41700 }, { "epoch": 3.9018015495192757, "grad_norm": 0.7192357182502747, "learning_rate": 4.618441971383148e-06, "loss": 0.0671, "step": 41800 }, { "epoch": 3.9111360029870252, "grad_norm": 2.484524965286255, "learning_rate": 4.617506780136538e-06, "loss": 0.076, "step": 41900 }, { "epoch": 3.9204704564547743, "grad_norm": 0.6844746470451355, "learning_rate": 4.616571588889928e-06, "loss": 0.0741, "step": 42000 }, { "epoch": 3.929804909922524, "grad_norm": 0.35694408416748047, "learning_rate": 4.615636397643318e-06, "loss": 0.0658, "step": 42100 }, { "epoch": 3.9391393633902734, "grad_norm": 0.42462000250816345, "learning_rate": 4.614701206396708e-06, "loss": 0.0675, "step": 42200 }, { "epoch": 3.948473816858023, "grad_norm": 1.0796679258346558, "learning_rate": 4.613766015150098e-06, "loss": 0.0728, "step": 42300 }, { "epoch": 3.9578082703257724, "grad_norm": 1.5831118822097778, "learning_rate": 4.612830823903488e-06, "loss": 0.0687, "step": 42400 }, { "epoch": 3.967142723793522, "grad_norm": 1.1326252222061157, "learning_rate": 4.611895632656878e-06, "loss": 0.0688, "step": 42500 }, { "epoch": 3.9764771772612715, "grad_norm": 1.0836035013198853, "learning_rate": 4.610960441410268e-06, "loss": 0.0678, "step": 42600 }, { "epoch": 3.985811630729021, "grad_norm": 0.801672101020813, "learning_rate": 4.610025250163659e-06, "loss": 0.0703, "step": 42700 }, { "epoch": 3.9951460841967705, "grad_norm": 1.3776246309280396, "learning_rate": 4.609090058917049e-06, "loss": 0.0698, "step": 42800 }, { "epoch": 4.00448053766452, "grad_norm": 0.5161024928092957, "learning_rate": 4.608154867670439e-06, "loss": 0.0692, "step": 42900 }, { "epoch": 4.01381499113227, "grad_norm": 1.1156377792358398, "learning_rate": 4.607219676423829e-06, "loss": 0.0573, "step": 43000 }, { "epoch": 4.023149444600019, "grad_norm": 0.8498590588569641, "learning_rate": 4.606284485177219e-06, "loss": 0.0659, "step": 43100 }, { "epoch": 4.032483898067768, "grad_norm": 0.5669699311256409, "learning_rate": 4.6053492939306094e-06, "loss": 0.0671, "step": 43200 }, { "epoch": 4.041818351535517, "grad_norm": 0.6417893767356873, "learning_rate": 4.6044141026839995e-06, "loss": 0.0675, "step": 43300 }, { "epoch": 4.051152805003267, "grad_norm": 0.7705711126327515, "learning_rate": 4.6034789114373895e-06, "loss": 0.0681, "step": 43400 }, { "epoch": 4.060487258471016, "grad_norm": 1.0475765466690063, "learning_rate": 4.6025437201907795e-06, "loss": 0.066, "step": 43500 }, { "epoch": 4.069821711938766, "grad_norm": 0.6559087634086609, "learning_rate": 4.6016085289441696e-06, "loss": 0.0602, "step": 43600 }, { "epoch": 4.079156165406515, "grad_norm": 1.6699305772781372, "learning_rate": 4.60067333769756e-06, "loss": 0.0631, "step": 43700 }, { "epoch": 4.088490618874265, "grad_norm": 0.5648127794265747, "learning_rate": 4.59973814645095e-06, "loss": 0.0617, "step": 43800 }, { "epoch": 4.097825072342014, "grad_norm": 0.8592658638954163, "learning_rate": 4.59880295520434e-06, "loss": 0.0643, "step": 43900 }, { "epoch": 4.107159525809764, "grad_norm": 0.9824233651161194, "learning_rate": 4.59786776395773e-06, "loss": 0.0662, "step": 44000 }, { "epoch": 4.1164939792775135, "grad_norm": 1.1051466464996338, "learning_rate": 4.59693257271112e-06, "loss": 0.0618, "step": 44100 }, { "epoch": 4.125828432745263, "grad_norm": 1.296904444694519, "learning_rate": 4.59599738146451e-06, "loss": 0.0681, "step": 44200 }, { "epoch": 4.1351628862130125, "grad_norm": 0.8039494156837463, "learning_rate": 4.5950621902179e-06, "loss": 0.067, "step": 44300 }, { "epoch": 4.144497339680762, "grad_norm": 1.6954636573791504, "learning_rate": 4.59412699897129e-06, "loss": 0.0683, "step": 44400 }, { "epoch": 4.153831793148511, "grad_norm": 1.0369861125946045, "learning_rate": 4.59319180772468e-06, "loss": 0.0669, "step": 44500 }, { "epoch": 4.16316624661626, "grad_norm": 1.067354679107666, "learning_rate": 4.59225661647807e-06, "loss": 0.0676, "step": 44600 }, { "epoch": 4.17250070008401, "grad_norm": 0.6121203899383545, "learning_rate": 4.59132142523146e-06, "loss": 0.0671, "step": 44700 }, { "epoch": 4.181835153551759, "grad_norm": 0.8158302307128906, "learning_rate": 4.590386233984851e-06, "loss": 0.0627, "step": 44800 }, { "epoch": 4.191169607019509, "grad_norm": 0.6615408062934875, "learning_rate": 4.589451042738241e-06, "loss": 0.066, "step": 44900 }, { "epoch": 4.200504060487258, "grad_norm": 1.8158048391342163, "learning_rate": 4.588515851491631e-06, "loss": 0.0677, "step": 45000 }, { "epoch": 4.200504060487258, "eval_accuracy": 0.7249241540256709, "eval_f1": 0.8433631638175875, "eval_loss": 0.08434685319662094, "eval_roc_auc": 0.9130333439520731, "eval_runtime": 291.473, "eval_samples_per_second": 147.012, "eval_steps_per_second": 147.012, "step": 45000 }, { "epoch": 4.209838513955008, "grad_norm": 0.6939775347709656, "learning_rate": 4.587580660245021e-06, "loss": 0.0664, "step": 45100 }, { "epoch": 4.219172967422757, "grad_norm": 0.7911635041236877, "learning_rate": 4.586645468998411e-06, "loss": 0.0649, "step": 45200 }, { "epoch": 4.228507420890507, "grad_norm": 0.8824059367179871, "learning_rate": 4.585710277751801e-06, "loss": 0.0691, "step": 45300 }, { "epoch": 4.237841874358256, "grad_norm": 1.286568522453308, "learning_rate": 4.58477508650519e-06, "loss": 0.0626, "step": 45400 }, { "epoch": 4.247176327826006, "grad_norm": 0.5241003632545471, "learning_rate": 4.58383989525858e-06, "loss": 0.0644, "step": 45500 }, { "epoch": 4.2565107812937555, "grad_norm": 1.139330267906189, "learning_rate": 4.582904704011971e-06, "loss": 0.0645, "step": 45600 }, { "epoch": 4.265845234761505, "grad_norm": 1.0811359882354736, "learning_rate": 4.581969512765361e-06, "loss": 0.0669, "step": 45700 }, { "epoch": 4.2751796882292545, "grad_norm": 1.316413402557373, "learning_rate": 4.581034321518751e-06, "loss": 0.0647, "step": 45800 }, { "epoch": 4.284514141697004, "grad_norm": 0.8186858892440796, "learning_rate": 4.580099130272141e-06, "loss": 0.0675, "step": 45900 }, { "epoch": 4.293848595164753, "grad_norm": 0.424663782119751, "learning_rate": 4.579163939025531e-06, "loss": 0.064, "step": 46000 }, { "epoch": 4.303183048632502, "grad_norm": 0.8986467719078064, "learning_rate": 4.578228747778921e-06, "loss": 0.065, "step": 46100 }, { "epoch": 4.312517502100252, "grad_norm": 0.6456283926963806, "learning_rate": 4.577293556532311e-06, "loss": 0.0667, "step": 46200 }, { "epoch": 4.321851955568001, "grad_norm": 0.7704375982284546, "learning_rate": 4.576358365285701e-06, "loss": 0.0699, "step": 46300 }, { "epoch": 4.331186409035751, "grad_norm": 1.1539630889892578, "learning_rate": 4.575423174039091e-06, "loss": 0.0662, "step": 46400 }, { "epoch": 4.3405208625035, "grad_norm": 0.7274278998374939, "learning_rate": 4.574487982792481e-06, "loss": 0.062, "step": 46500 }, { "epoch": 4.34985531597125, "grad_norm": 0.6978820562362671, "learning_rate": 4.573552791545871e-06, "loss": 0.0703, "step": 46600 }, { "epoch": 4.359189769438999, "grad_norm": 2.1834566593170166, "learning_rate": 4.572617600299261e-06, "loss": 0.0671, "step": 46700 }, { "epoch": 4.368524222906749, "grad_norm": 0.94724440574646, "learning_rate": 4.571682409052652e-06, "loss": 0.0652, "step": 46800 }, { "epoch": 4.377858676374498, "grad_norm": 0.5164903402328491, "learning_rate": 4.570747217806042e-06, "loss": 0.0703, "step": 46900 }, { "epoch": 4.387193129842248, "grad_norm": 0.8588458299636841, "learning_rate": 4.569812026559432e-06, "loss": 0.0635, "step": 47000 }, { "epoch": 4.3965275833099975, "grad_norm": 0.890032172203064, "learning_rate": 4.568876835312822e-06, "loss": 0.0641, "step": 47100 }, { "epoch": 4.405862036777747, "grad_norm": 0.5931698083877563, "learning_rate": 4.5679416440662115e-06, "loss": 0.0628, "step": 47200 }, { "epoch": 4.4151964902454965, "grad_norm": 0.552584707736969, "learning_rate": 4.5670064528196015e-06, "loss": 0.0607, "step": 47300 }, { "epoch": 4.424530943713245, "grad_norm": 1.1729700565338135, "learning_rate": 4.5660712615729916e-06, "loss": 0.0624, "step": 47400 }, { "epoch": 4.433865397180995, "grad_norm": 2.2677507400512695, "learning_rate": 4.565136070326382e-06, "loss": 0.0633, "step": 47500 }, { "epoch": 4.443199850648744, "grad_norm": 1.3658419847488403, "learning_rate": 4.564200879079772e-06, "loss": 0.062, "step": 47600 }, { "epoch": 4.452534304116494, "grad_norm": 0.7790820598602295, "learning_rate": 4.5632656878331625e-06, "loss": 0.0647, "step": 47700 }, { "epoch": 4.461868757584243, "grad_norm": 0.7811168432235718, "learning_rate": 4.5623304965865525e-06, "loss": 0.0687, "step": 47800 }, { "epoch": 4.471203211051993, "grad_norm": 0.8779417872428894, "learning_rate": 4.5613953053399426e-06, "loss": 0.0681, "step": 47900 }, { "epoch": 4.480537664519742, "grad_norm": 0.9764981269836426, "learning_rate": 4.560460114093333e-06, "loss": 0.0718, "step": 48000 }, { "epoch": 4.489872117987492, "grad_norm": 1.0056012868881226, "learning_rate": 4.559524922846723e-06, "loss": 0.0606, "step": 48100 }, { "epoch": 4.499206571455241, "grad_norm": 0.7150539755821228, "learning_rate": 4.558589731600113e-06, "loss": 0.0616, "step": 48200 }, { "epoch": 4.508541024922991, "grad_norm": 1.232472538948059, "learning_rate": 4.557654540353503e-06, "loss": 0.0631, "step": 48300 }, { "epoch": 4.51787547839074, "grad_norm": 0.8411718010902405, "learning_rate": 4.556719349106893e-06, "loss": 0.0658, "step": 48400 }, { "epoch": 4.52720993185849, "grad_norm": 1.118788719177246, "learning_rate": 4.555784157860283e-06, "loss": 0.0665, "step": 48500 }, { "epoch": 4.5365443853262395, "grad_norm": 0.7626038789749146, "learning_rate": 4.554848966613673e-06, "loss": 0.07, "step": 48600 }, { "epoch": 4.545878838793989, "grad_norm": 0.5250329971313477, "learning_rate": 4.553913775367063e-06, "loss": 0.0633, "step": 48700 }, { "epoch": 4.5552132922617385, "grad_norm": 0.7812219858169556, "learning_rate": 4.552978584120453e-06, "loss": 0.0682, "step": 48800 }, { "epoch": 4.564547745729487, "grad_norm": 1.214002251625061, "learning_rate": 4.552043392873844e-06, "loss": 0.0601, "step": 48900 }, { "epoch": 4.573882199197237, "grad_norm": 1.180737853050232, "learning_rate": 4.551108201627233e-06, "loss": 0.0682, "step": 49000 }, { "epoch": 4.583216652664986, "grad_norm": 1.5645318031311035, "learning_rate": 4.550173010380623e-06, "loss": 0.0638, "step": 49100 }, { "epoch": 4.592551106132736, "grad_norm": 0.49773871898651123, "learning_rate": 4.549237819134013e-06, "loss": 0.0654, "step": 49200 }, { "epoch": 4.601885559600485, "grad_norm": 0.8017587661743164, "learning_rate": 4.548302627887403e-06, "loss": 0.0609, "step": 49300 }, { "epoch": 4.611220013068235, "grad_norm": 1.3467339277267456, "learning_rate": 4.547367436640793e-06, "loss": 0.0613, "step": 49400 }, { "epoch": 4.620554466535984, "grad_norm": 0.48602136969566345, "learning_rate": 4.546432245394183e-06, "loss": 0.0642, "step": 49500 }, { "epoch": 4.629888920003734, "grad_norm": 1.3800562620162964, "learning_rate": 4.545497054147573e-06, "loss": 0.0663, "step": 49600 }, { "epoch": 4.639223373471483, "grad_norm": 1.123764157295227, "learning_rate": 4.544561862900964e-06, "loss": 0.0637, "step": 49700 }, { "epoch": 4.648557826939233, "grad_norm": 0.8025959730148315, "learning_rate": 4.543626671654354e-06, "loss": 0.0709, "step": 49800 }, { "epoch": 4.657892280406982, "grad_norm": 0.7678070664405823, "learning_rate": 4.542691480407744e-06, "loss": 0.0652, "step": 49900 }, { "epoch": 4.667226733874732, "grad_norm": 0.47041189670562744, "learning_rate": 4.541756289161134e-06, "loss": 0.064, "step": 50000 }, { "epoch": 4.667226733874732, "eval_accuracy": 0.7250175029171528, "eval_f1": 0.8412621446401954, "eval_loss": 0.08549964427947998, "eval_roc_auc": 0.90997126107341, "eval_runtime": 243.8627, "eval_samples_per_second": 175.714, "eval_steps_per_second": 175.714, "step": 50000 }, { "epoch": 4.676561187342481, "grad_norm": 0.6899334788322449, "learning_rate": 4.540821097914524e-06, "loss": 0.0659, "step": 50100 }, { "epoch": 4.68589564081023, "grad_norm": 0.6595396399497986, "learning_rate": 4.539885906667914e-06, "loss": 0.0662, "step": 50200 }, { "epoch": 4.69523009427798, "grad_norm": 1.1884797811508179, "learning_rate": 4.538950715421304e-06, "loss": 0.0646, "step": 50300 }, { "epoch": 4.704564547745729, "grad_norm": 1.261449933052063, "learning_rate": 4.538015524174694e-06, "loss": 0.0625, "step": 50400 }, { "epoch": 4.713899001213479, "grad_norm": 0.5972244143486023, "learning_rate": 4.537080332928084e-06, "loss": 0.0641, "step": 50500 }, { "epoch": 4.723233454681228, "grad_norm": 0.5673565864562988, "learning_rate": 4.536145141681474e-06, "loss": 0.064, "step": 50600 }, { "epoch": 4.732567908148978, "grad_norm": 0.7185744047164917, "learning_rate": 4.535209950434864e-06, "loss": 0.0646, "step": 50700 }, { "epoch": 4.741902361616727, "grad_norm": 1.0770295858383179, "learning_rate": 4.534274759188254e-06, "loss": 0.0718, "step": 50800 }, { "epoch": 4.751236815084477, "grad_norm": 0.688117504119873, "learning_rate": 4.533339567941644e-06, "loss": 0.0694, "step": 50900 }, { "epoch": 4.760571268552226, "grad_norm": 0.8660634160041809, "learning_rate": 4.532404376695034e-06, "loss": 0.0645, "step": 51000 }, { "epoch": 4.769905722019976, "grad_norm": 0.7308102250099182, "learning_rate": 4.531469185448424e-06, "loss": 0.0615, "step": 51100 }, { "epoch": 4.779240175487725, "grad_norm": 0.9034064412117004, "learning_rate": 4.5305339942018144e-06, "loss": 0.0634, "step": 51200 }, { "epoch": 4.788574628955475, "grad_norm": 0.8430311679840088, "learning_rate": 4.5295988029552045e-06, "loss": 0.0677, "step": 51300 }, { "epoch": 4.7979090824232244, "grad_norm": 0.8912951946258545, "learning_rate": 4.5286636117085945e-06, "loss": 0.0629, "step": 51400 }, { "epoch": 4.807243535890974, "grad_norm": 0.7189156413078308, "learning_rate": 4.5277284204619845e-06, "loss": 0.062, "step": 51500 }, { "epoch": 4.8165779893587235, "grad_norm": 0.898830235004425, "learning_rate": 4.5267932292153746e-06, "loss": 0.0664, "step": 51600 }, { "epoch": 4.825912442826472, "grad_norm": 1.2175198793411255, "learning_rate": 4.525858037968765e-06, "loss": 0.0652, "step": 51700 }, { "epoch": 4.835246896294222, "grad_norm": 2.060880184173584, "learning_rate": 4.5249228467221555e-06, "loss": 0.0652, "step": 51800 }, { "epoch": 4.844581349761971, "grad_norm": 2.9028806686401367, "learning_rate": 4.5239876554755455e-06, "loss": 0.0634, "step": 51900 }, { "epoch": 4.853915803229721, "grad_norm": 0.5127193927764893, "learning_rate": 4.5230524642289355e-06, "loss": 0.0661, "step": 52000 }, { "epoch": 4.86325025669747, "grad_norm": 0.6585423350334167, "learning_rate": 4.5221172729823256e-06, "loss": 0.0693, "step": 52100 }, { "epoch": 4.87258471016522, "grad_norm": 1.4652658700942993, "learning_rate": 4.521182081735716e-06, "loss": 0.064, "step": 52200 }, { "epoch": 4.881919163632969, "grad_norm": 1.38148033618927, "learning_rate": 4.520246890489106e-06, "loss": 0.0722, "step": 52300 }, { "epoch": 4.891253617100719, "grad_norm": 1.0009233951568604, "learning_rate": 4.519311699242496e-06, "loss": 0.0643, "step": 52400 }, { "epoch": 4.900588070568468, "grad_norm": 3.0784127712249756, "learning_rate": 4.518376507995886e-06, "loss": 0.0638, "step": 52500 }, { "epoch": 4.909922524036218, "grad_norm": 1.0737767219543457, "learning_rate": 4.517441316749276e-06, "loss": 0.0609, "step": 52600 }, { "epoch": 4.919256977503967, "grad_norm": 0.42382147908210754, "learning_rate": 4.516506125502666e-06, "loss": 0.0666, "step": 52700 }, { "epoch": 4.928591430971717, "grad_norm": 0.7445490956306458, "learning_rate": 4.515570934256056e-06, "loss": 0.0633, "step": 52800 }, { "epoch": 4.9379258844394665, "grad_norm": 1.0341641902923584, "learning_rate": 4.514635743009446e-06, "loss": 0.0639, "step": 52900 }, { "epoch": 4.947260337907215, "grad_norm": 1.3225160837173462, "learning_rate": 4.513700551762836e-06, "loss": 0.0669, "step": 53000 }, { "epoch": 4.956594791374965, "grad_norm": 0.7978038787841797, "learning_rate": 4.512765360516226e-06, "loss": 0.0637, "step": 53100 }, { "epoch": 4.965929244842714, "grad_norm": 0.8359413146972656, "learning_rate": 4.511830169269616e-06, "loss": 0.0618, "step": 53200 }, { "epoch": 4.975263698310464, "grad_norm": 0.9357098340988159, "learning_rate": 4.510894978023006e-06, "loss": 0.0653, "step": 53300 }, { "epoch": 4.984598151778213, "grad_norm": 1.605373740196228, "learning_rate": 4.509959786776396e-06, "loss": 0.0658, "step": 53400 }, { "epoch": 4.993932605245963, "grad_norm": 1.3523693084716797, "learning_rate": 4.509024595529786e-06, "loss": 0.0647, "step": 53500 }, { "epoch": 5.003267058713712, "grad_norm": 0.5821368098258972, "learning_rate": 4.508089404283176e-06, "loss": 0.0682, "step": 53600 }, { "epoch": 5.012601512181462, "grad_norm": 0.808694064617157, "learning_rate": 4.507154213036566e-06, "loss": 0.0593, "step": 53700 }, { "epoch": 5.021935965649211, "grad_norm": 3.33782958984375, "learning_rate": 4.506219021789957e-06, "loss": 0.056, "step": 53800 }, { "epoch": 5.031270419116961, "grad_norm": 0.8387459516525269, "learning_rate": 4.505283830543347e-06, "loss": 0.057, "step": 53900 }, { "epoch": 5.04060487258471, "grad_norm": 0.782145619392395, "learning_rate": 4.504348639296737e-06, "loss": 0.0611, "step": 54000 }, { "epoch": 5.04993932605246, "grad_norm": 0.5992030501365662, "learning_rate": 4.503413448050127e-06, "loss": 0.0574, "step": 54100 }, { "epoch": 5.059273779520209, "grad_norm": 0.730396032333374, "learning_rate": 4.502478256803517e-06, "loss": 0.0553, "step": 54200 }, { "epoch": 5.068608232987959, "grad_norm": 1.6837677955627441, "learning_rate": 4.501543065556907e-06, "loss": 0.0574, "step": 54300 }, { "epoch": 5.0779426864557085, "grad_norm": 0.7964657545089722, "learning_rate": 4.500607874310296e-06, "loss": 0.0565, "step": 54400 }, { "epoch": 5.087277139923457, "grad_norm": 0.6608617305755615, "learning_rate": 4.499672683063686e-06, "loss": 0.0616, "step": 54500 }, { "epoch": 5.096611593391207, "grad_norm": 1.485681176185608, "learning_rate": 4.498737491817077e-06, "loss": 0.062, "step": 54600 }, { "epoch": 5.105946046858956, "grad_norm": 0.9453593492507935, "learning_rate": 4.497802300570467e-06, "loss": 0.0596, "step": 54700 }, { "epoch": 5.115280500326706, "grad_norm": 0.5536121129989624, "learning_rate": 4.496867109323857e-06, "loss": 0.0559, "step": 54800 }, { "epoch": 5.124614953794455, "grad_norm": 1.0316871404647827, "learning_rate": 4.495931918077247e-06, "loss": 0.064, "step": 54900 }, { "epoch": 5.133949407262205, "grad_norm": 0.6699665188789368, "learning_rate": 4.494996726830637e-06, "loss": 0.053, "step": 55000 }, { "epoch": 5.133949407262205, "eval_accuracy": 0.7257876312718786, "eval_f1": 0.8410170889594204, "eval_loss": 0.08942010998725891, "eval_roc_auc": 0.9103605546245133, "eval_runtime": 237.2213, "eval_samples_per_second": 180.633, "eval_steps_per_second": 180.633, "step": 55000 }, { "epoch": 5.143283860729954, "grad_norm": 1.2884070873260498, "learning_rate": 4.494061535584027e-06, "loss": 0.0604, "step": 55100 }, { "epoch": 5.152618314197704, "grad_norm": 1.060477375984192, "learning_rate": 4.493126344337417e-06, "loss": 0.0603, "step": 55200 }, { "epoch": 5.161952767665453, "grad_norm": 1.7921513319015503, "learning_rate": 4.492191153090807e-06, "loss": 0.0581, "step": 55300 }, { "epoch": 5.171287221133203, "grad_norm": 0.7294164299964905, "learning_rate": 4.491255961844197e-06, "loss": 0.0661, "step": 55400 }, { "epoch": 5.180621674600952, "grad_norm": 1.5550895929336548, "learning_rate": 4.4903207705975874e-06, "loss": 0.0633, "step": 55500 }, { "epoch": 5.189956128068702, "grad_norm": 0.6117040514945984, "learning_rate": 4.4893855793509775e-06, "loss": 0.06, "step": 55600 }, { "epoch": 5.199290581536451, "grad_norm": 0.9458345174789429, "learning_rate": 4.4884503881043675e-06, "loss": 0.0661, "step": 55700 }, { "epoch": 5.208625035004201, "grad_norm": 1.9546419382095337, "learning_rate": 4.4875151968577575e-06, "loss": 0.0597, "step": 55800 }, { "epoch": 5.21795948847195, "grad_norm": 1.2444877624511719, "learning_rate": 4.486580005611148e-06, "loss": 0.0624, "step": 55900 }, { "epoch": 5.227293941939699, "grad_norm": 1.6985496282577515, "learning_rate": 4.4856448143645384e-06, "loss": 0.0571, "step": 56000 }, { "epoch": 5.236628395407449, "grad_norm": 0.8657322525978088, "learning_rate": 4.4847096231179285e-06, "loss": 0.0582, "step": 56100 }, { "epoch": 5.245962848875198, "grad_norm": 1.7500957250595093, "learning_rate": 4.483774431871318e-06, "loss": 0.0573, "step": 56200 }, { "epoch": 5.255297302342948, "grad_norm": 1.1761984825134277, "learning_rate": 4.482839240624708e-06, "loss": 0.0565, "step": 56300 }, { "epoch": 5.264631755810697, "grad_norm": 1.8459393978118896, "learning_rate": 4.481904049378098e-06, "loss": 0.0618, "step": 56400 }, { "epoch": 5.273966209278447, "grad_norm": 1.5223190784454346, "learning_rate": 4.480968858131488e-06, "loss": 0.0615, "step": 56500 }, { "epoch": 5.283300662746196, "grad_norm": 1.4355651140213013, "learning_rate": 4.480033666884878e-06, "loss": 0.065, "step": 56600 }, { "epoch": 5.292635116213946, "grad_norm": 0.8949202299118042, "learning_rate": 4.479098475638269e-06, "loss": 0.0589, "step": 56700 }, { "epoch": 5.301969569681695, "grad_norm": 0.8922284841537476, "learning_rate": 4.478163284391659e-06, "loss": 0.0585, "step": 56800 }, { "epoch": 5.311304023149445, "grad_norm": 0.6118394732475281, "learning_rate": 4.477228093145049e-06, "loss": 0.0626, "step": 56900 }, { "epoch": 5.320638476617194, "grad_norm": 0.7595558762550354, "learning_rate": 4.476292901898439e-06, "loss": 0.0604, "step": 57000 }, { "epoch": 5.329972930084944, "grad_norm": 1.1290780305862427, "learning_rate": 4.475357710651829e-06, "loss": 0.0576, "step": 57100 }, { "epoch": 5.339307383552693, "grad_norm": 1.377669095993042, "learning_rate": 4.474422519405219e-06, "loss": 0.0603, "step": 57200 }, { "epoch": 5.348641837020442, "grad_norm": 0.5273600220680237, "learning_rate": 4.473487328158609e-06, "loss": 0.0599, "step": 57300 }, { "epoch": 5.357976290488192, "grad_norm": 1.4486112594604492, "learning_rate": 4.472552136911999e-06, "loss": 0.059, "step": 57400 }, { "epoch": 5.367310743955941, "grad_norm": 1.1537847518920898, "learning_rate": 4.471616945665389e-06, "loss": 0.062, "step": 57500 }, { "epoch": 5.376645197423691, "grad_norm": 0.3961065113544464, "learning_rate": 4.470681754418779e-06, "loss": 0.0589, "step": 57600 }, { "epoch": 5.38597965089144, "grad_norm": 0.7809767127037048, "learning_rate": 4.469746563172169e-06, "loss": 0.0615, "step": 57700 }, { "epoch": 5.39531410435919, "grad_norm": 0.4689590036869049, "learning_rate": 4.468811371925559e-06, "loss": 0.0622, "step": 57800 }, { "epoch": 5.404648557826939, "grad_norm": 0.9694661498069763, "learning_rate": 4.46787618067895e-06, "loss": 0.0582, "step": 57900 }, { "epoch": 5.413983011294689, "grad_norm": 1.6637543439865112, "learning_rate": 4.466940989432339e-06, "loss": 0.0562, "step": 58000 }, { "epoch": 5.423317464762438, "grad_norm": 1.1682994365692139, "learning_rate": 4.466005798185729e-06, "loss": 0.0604, "step": 58100 }, { "epoch": 5.432651918230188, "grad_norm": 3.323533535003662, "learning_rate": 4.465070606939119e-06, "loss": 0.0598, "step": 58200 }, { "epoch": 5.441986371697937, "grad_norm": 1.3482946157455444, "learning_rate": 4.464135415692509e-06, "loss": 0.056, "step": 58300 }, { "epoch": 5.451320825165687, "grad_norm": 1.8049932718276978, "learning_rate": 4.463200224445899e-06, "loss": 0.0587, "step": 58400 }, { "epoch": 5.460655278633436, "grad_norm": 1.0362271070480347, "learning_rate": 4.462265033199289e-06, "loss": 0.059, "step": 58500 }, { "epoch": 5.469989732101186, "grad_norm": 1.4298433065414429, "learning_rate": 4.461329841952679e-06, "loss": 0.0626, "step": 58600 }, { "epoch": 5.4793241855689345, "grad_norm": 1.2073612213134766, "learning_rate": 4.46039465070607e-06, "loss": 0.0612, "step": 58700 }, { "epoch": 5.488658639036684, "grad_norm": 1.0545377731323242, "learning_rate": 4.45945945945946e-06, "loss": 0.0653, "step": 58800 }, { "epoch": 5.497993092504434, "grad_norm": 1.1682157516479492, "learning_rate": 4.45852426821285e-06, "loss": 0.0621, "step": 58900 }, { "epoch": 5.507327545972183, "grad_norm": 1.5784131288528442, "learning_rate": 4.45758907696624e-06, "loss": 0.0594, "step": 59000 }, { "epoch": 5.516661999439933, "grad_norm": 0.522055447101593, "learning_rate": 4.45665388571963e-06, "loss": 0.0594, "step": 59100 }, { "epoch": 5.525996452907682, "grad_norm": 1.3595131635665894, "learning_rate": 4.45571869447302e-06, "loss": 0.0577, "step": 59200 }, { "epoch": 5.535330906375432, "grad_norm": 0.9966509938240051, "learning_rate": 4.45478350322641e-06, "loss": 0.0598, "step": 59300 }, { "epoch": 5.544665359843181, "grad_norm": 0.7471063733100891, "learning_rate": 4.4538483119798e-06, "loss": 0.0606, "step": 59400 }, { "epoch": 5.553999813310931, "grad_norm": 0.21422342956066132, "learning_rate": 4.45291312073319e-06, "loss": 0.0623, "step": 59500 }, { "epoch": 5.56333426677868, "grad_norm": 1.0573238134384155, "learning_rate": 4.45197792948658e-06, "loss": 0.0542, "step": 59600 }, { "epoch": 5.57266872024643, "grad_norm": 1.2489149570465088, "learning_rate": 4.45104273823997e-06, "loss": 0.0637, "step": 59700 }, { "epoch": 5.582003173714179, "grad_norm": 0.7781186103820801, "learning_rate": 4.4501075469933605e-06, "loss": 0.0619, "step": 59800 }, { "epoch": 5.591337627181929, "grad_norm": 1.1502577066421509, "learning_rate": 4.4491723557467505e-06, "loss": 0.0616, "step": 59900 }, { "epoch": 5.600672080649678, "grad_norm": 1.0231844186782837, "learning_rate": 4.4482371645001405e-06, "loss": 0.0607, "step": 60000 }, { "epoch": 5.600672080649678, "eval_accuracy": 0.7160793465577596, "eval_f1": 0.8396621527293033, "eval_loss": 0.09088416397571564, "eval_roc_auc": 0.919211771354077, "eval_runtime": 270.3032, "eval_samples_per_second": 158.526, "eval_steps_per_second": 158.526, "step": 60000 }, { "epoch": 5.610006534117428, "grad_norm": 1.1876798868179321, "learning_rate": 4.4473019732535305e-06, "loss": 0.066, "step": 60100 }, { "epoch": 5.6193409875851765, "grad_norm": 0.6543289422988892, "learning_rate": 4.4463667820069206e-06, "loss": 0.0598, "step": 60200 }, { "epoch": 5.628675441052926, "grad_norm": 1.0975614786148071, "learning_rate": 4.445431590760311e-06, "loss": 0.0579, "step": 60300 }, { "epoch": 5.638009894520676, "grad_norm": 0.7412407994270325, "learning_rate": 4.444496399513701e-06, "loss": 0.0647, "step": 60400 }, { "epoch": 5.647344347988425, "grad_norm": 1.4409759044647217, "learning_rate": 4.443561208267091e-06, "loss": 0.0626, "step": 60500 }, { "epoch": 5.656678801456175, "grad_norm": 0.7144232392311096, "learning_rate": 4.442626017020481e-06, "loss": 0.0615, "step": 60600 }, { "epoch": 5.666013254923924, "grad_norm": 1.0084257125854492, "learning_rate": 4.441690825773871e-06, "loss": 0.0545, "step": 60700 }, { "epoch": 5.675347708391674, "grad_norm": 0.7624267935752869, "learning_rate": 4.440755634527262e-06, "loss": 0.0598, "step": 60800 }, { "epoch": 5.684682161859423, "grad_norm": 2.382127285003662, "learning_rate": 4.439820443280652e-06, "loss": 0.0588, "step": 60900 }, { "epoch": 5.694016615327173, "grad_norm": 1.1389535665512085, "learning_rate": 4.438885252034042e-06, "loss": 0.0553, "step": 61000 }, { "epoch": 5.703351068794922, "grad_norm": 3.1383302211761475, "learning_rate": 4.437950060787432e-06, "loss": 0.0647, "step": 61100 }, { "epoch": 5.712685522262672, "grad_norm": 2.3153927326202393, "learning_rate": 4.437014869540822e-06, "loss": 0.0611, "step": 61200 }, { "epoch": 5.722019975730421, "grad_norm": 0.9494896531105042, "learning_rate": 4.436079678294212e-06, "loss": 0.0587, "step": 61300 }, { "epoch": 5.73135442919817, "grad_norm": 1.5355327129364014, "learning_rate": 4.435144487047602e-06, "loss": 0.0598, "step": 61400 }, { "epoch": 5.7406888826659195, "grad_norm": 0.6803508400917053, "learning_rate": 4.434209295800992e-06, "loss": 0.0624, "step": 61500 }, { "epoch": 5.750023336133669, "grad_norm": 0.8583879470825195, "learning_rate": 4.433274104554382e-06, "loss": 0.0626, "step": 61600 }, { "epoch": 5.7593577896014185, "grad_norm": 1.4103775024414062, "learning_rate": 4.432338913307772e-06, "loss": 0.057, "step": 61700 }, { "epoch": 5.768692243069168, "grad_norm": 1.525816798210144, "learning_rate": 4.431403722061162e-06, "loss": 0.0614, "step": 61800 }, { "epoch": 5.778026696536918, "grad_norm": 1.1512523889541626, "learning_rate": 4.430468530814552e-06, "loss": 0.0618, "step": 61900 }, { "epoch": 5.787361150004667, "grad_norm": 0.9331355690956116, "learning_rate": 4.429533339567942e-06, "loss": 0.0593, "step": 62000 }, { "epoch": 5.796695603472417, "grad_norm": 0.9300373792648315, "learning_rate": 4.428598148321332e-06, "loss": 0.0632, "step": 62100 }, { "epoch": 5.806030056940166, "grad_norm": 1.0573028326034546, "learning_rate": 4.427662957074722e-06, "loss": 0.06, "step": 62200 }, { "epoch": 5.815364510407916, "grad_norm": 1.037513017654419, "learning_rate": 4.426727765828112e-06, "loss": 0.0617, "step": 62300 }, { "epoch": 5.824698963875665, "grad_norm": 0.6044671535491943, "learning_rate": 4.425792574581502e-06, "loss": 0.0552, "step": 62400 }, { "epoch": 5.834033417343415, "grad_norm": 0.7900469899177551, "learning_rate": 4.424857383334892e-06, "loss": 0.0604, "step": 62500 }, { "epoch": 5.843367870811164, "grad_norm": 0.9395179152488708, "learning_rate": 4.423922192088282e-06, "loss": 0.0542, "step": 62600 }, { "epoch": 5.852702324278914, "grad_norm": 1.0410608053207397, "learning_rate": 4.422987000841672e-06, "loss": 0.0609, "step": 62700 }, { "epoch": 5.862036777746663, "grad_norm": 1.8297505378723145, "learning_rate": 4.422051809595062e-06, "loss": 0.0624, "step": 62800 }, { "epoch": 5.871371231214413, "grad_norm": 1.237855315208435, "learning_rate": 4.421116618348453e-06, "loss": 0.0642, "step": 62900 }, { "epoch": 5.8807056846821615, "grad_norm": 1.7373284101486206, "learning_rate": 4.420181427101843e-06, "loss": 0.0625, "step": 63000 }, { "epoch": 5.890040138149911, "grad_norm": 0.609740138053894, "learning_rate": 4.419246235855233e-06, "loss": 0.0597, "step": 63100 }, { "epoch": 5.8993745916176605, "grad_norm": 1.0115846395492554, "learning_rate": 4.418311044608623e-06, "loss": 0.0584, "step": 63200 }, { "epoch": 5.90870904508541, "grad_norm": 0.6937131285667419, "learning_rate": 4.417375853362013e-06, "loss": 0.0616, "step": 63300 }, { "epoch": 5.91804349855316, "grad_norm": 0.7485745549201965, "learning_rate": 4.416440662115403e-06, "loss": 0.0605, "step": 63400 }, { "epoch": 5.927377952020909, "grad_norm": 1.868227481842041, "learning_rate": 4.4155054708687924e-06, "loss": 0.0625, "step": 63500 }, { "epoch": 5.936712405488659, "grad_norm": 0.7718109488487244, "learning_rate": 4.4145702796221825e-06, "loss": 0.0677, "step": 63600 }, { "epoch": 5.946046858956408, "grad_norm": 1.5902236700057983, "learning_rate": 4.413635088375573e-06, "loss": 0.0612, "step": 63700 }, { "epoch": 5.955381312424158, "grad_norm": 1.4949499368667603, "learning_rate": 4.412699897128963e-06, "loss": 0.0648, "step": 63800 }, { "epoch": 5.964715765891907, "grad_norm": 0.9245728850364685, "learning_rate": 4.411764705882353e-06, "loss": 0.0584, "step": 63900 }, { "epoch": 5.974050219359657, "grad_norm": 0.9854455590248108, "learning_rate": 4.4108295146357434e-06, "loss": 0.0612, "step": 64000 }, { "epoch": 5.983384672827406, "grad_norm": 0.921771764755249, "learning_rate": 4.4098943233891335e-06, "loss": 0.0599, "step": 64100 }, { "epoch": 5.992719126295156, "grad_norm": 1.0151314735412598, "learning_rate": 4.4089591321425235e-06, "loss": 0.0565, "step": 64200 }, { "epoch": 6.002053579762905, "grad_norm": 1.001386284828186, "learning_rate": 4.4080239408959135e-06, "loss": 0.0582, "step": 64300 }, { "epoch": 6.011388033230654, "grad_norm": 0.604015588760376, "learning_rate": 4.4070887496493036e-06, "loss": 0.0597, "step": 64400 }, { "epoch": 6.0207224866984035, "grad_norm": 1.5700677633285522, "learning_rate": 4.406153558402694e-06, "loss": 0.0534, "step": 64500 }, { "epoch": 6.030056940166153, "grad_norm": 0.9267865419387817, "learning_rate": 4.405218367156084e-06, "loss": 0.0559, "step": 64600 }, { "epoch": 6.0393913936339025, "grad_norm": 1.2584235668182373, "learning_rate": 4.404283175909474e-06, "loss": 0.0564, "step": 64700 }, { "epoch": 6.048725847101652, "grad_norm": 1.0141167640686035, "learning_rate": 4.403347984662864e-06, "loss": 0.057, "step": 64800 }, { "epoch": 6.058060300569402, "grad_norm": 0.8783392906188965, "learning_rate": 4.4024127934162546e-06, "loss": 0.0522, "step": 64900 }, { "epoch": 6.067394754037151, "grad_norm": 1.7417374849319458, "learning_rate": 4.401477602169645e-06, "loss": 0.057, "step": 65000 }, { "epoch": 6.067394754037151, "eval_accuracy": 0.7219603267211202, "eval_f1": 0.8411683634908639, "eval_loss": 0.09258955717086792, "eval_roc_auc": 0.9133629120854533, "eval_runtime": 312.004, "eval_samples_per_second": 137.338, "eval_steps_per_second": 137.338, "step": 65000 }, { "epoch": 6.076729207504901, "grad_norm": 1.03317129611969, "learning_rate": 4.400542410923035e-06, "loss": 0.0579, "step": 65100 }, { "epoch": 6.08606366097265, "grad_norm": 1.1962816715240479, "learning_rate": 4.399607219676425e-06, "loss": 0.0565, "step": 65200 }, { "epoch": 6.0953981144404, "grad_norm": 1.1180720329284668, "learning_rate": 4.398672028429814e-06, "loss": 0.0513, "step": 65300 }, { "epoch": 6.104732567908149, "grad_norm": 1.1667932271957397, "learning_rate": 4.397736837183204e-06, "loss": 0.0533, "step": 65400 }, { "epoch": 6.114067021375899, "grad_norm": 0.926884114742279, "learning_rate": 4.396801645936594e-06, "loss": 0.0535, "step": 65500 }, { "epoch": 6.123401474843648, "grad_norm": 1.384803056716919, "learning_rate": 4.395866454689984e-06, "loss": 0.0533, "step": 65600 }, { "epoch": 6.132735928311398, "grad_norm": 1.2000075578689575, "learning_rate": 4.394931263443375e-06, "loss": 0.0567, "step": 65700 }, { "epoch": 6.142070381779146, "grad_norm": 1.0714457035064697, "learning_rate": 4.393996072196765e-06, "loss": 0.0564, "step": 65800 }, { "epoch": 6.151404835246896, "grad_norm": 0.717960774898529, "learning_rate": 4.393060880950155e-06, "loss": 0.0499, "step": 65900 }, { "epoch": 6.1607392887146455, "grad_norm": 1.343293309211731, "learning_rate": 4.392125689703545e-06, "loss": 0.0591, "step": 66000 }, { "epoch": 6.170073742182395, "grad_norm": 1.0709763765335083, "learning_rate": 4.391190498456935e-06, "loss": 0.0568, "step": 66100 }, { "epoch": 6.1794081956501445, "grad_norm": 0.5421103239059448, "learning_rate": 4.390255307210325e-06, "loss": 0.0528, "step": 66200 }, { "epoch": 6.188742649117894, "grad_norm": 1.5285899639129639, "learning_rate": 4.389320115963715e-06, "loss": 0.057, "step": 66300 }, { "epoch": 6.198077102585644, "grad_norm": 0.7678933143615723, "learning_rate": 4.388384924717105e-06, "loss": 0.0531, "step": 66400 }, { "epoch": 6.207411556053393, "grad_norm": 1.6598721742630005, "learning_rate": 4.387449733470495e-06, "loss": 0.0556, "step": 66500 }, { "epoch": 6.216746009521143, "grad_norm": 0.8640401363372803, "learning_rate": 4.386514542223885e-06, "loss": 0.0538, "step": 66600 }, { "epoch": 6.226080462988892, "grad_norm": 1.1740844249725342, "learning_rate": 4.385579350977275e-06, "loss": 0.0516, "step": 66700 }, { "epoch": 6.235414916456642, "grad_norm": 0.899919867515564, "learning_rate": 4.384644159730665e-06, "loss": 0.0562, "step": 66800 }, { "epoch": 6.244749369924391, "grad_norm": 1.26015043258667, "learning_rate": 4.383708968484055e-06, "loss": 0.048, "step": 66900 }, { "epoch": 6.254083823392141, "grad_norm": 2.2749204635620117, "learning_rate": 4.382773777237446e-06, "loss": 0.0562, "step": 67000 }, { "epoch": 6.263418276859889, "grad_norm": 0.6692554354667664, "learning_rate": 4.381838585990835e-06, "loss": 0.0507, "step": 67100 }, { "epoch": 6.272752730327639, "grad_norm": 1.1830015182495117, "learning_rate": 4.380903394744225e-06, "loss": 0.0516, "step": 67200 }, { "epoch": 6.282087183795388, "grad_norm": 1.5980244874954224, "learning_rate": 4.379968203497615e-06, "loss": 0.0597, "step": 67300 }, { "epoch": 6.291421637263138, "grad_norm": 1.0235507488250732, "learning_rate": 4.379033012251005e-06, "loss": 0.0523, "step": 67400 }, { "epoch": 6.3007560907308875, "grad_norm": 1.0619663000106812, "learning_rate": 4.378097821004395e-06, "loss": 0.0478, "step": 67500 }, { "epoch": 6.310090544198637, "grad_norm": 1.3680976629257202, "learning_rate": 4.377162629757785e-06, "loss": 0.0582, "step": 67600 }, { "epoch": 6.3194249976663865, "grad_norm": 1.0829286575317383, "learning_rate": 4.376227438511175e-06, "loss": 0.051, "step": 67700 }, { "epoch": 6.328759451134136, "grad_norm": 0.5550968050956726, "learning_rate": 4.375292247264566e-06, "loss": 0.0575, "step": 67800 }, { "epoch": 6.338093904601886, "grad_norm": 1.7262980937957764, "learning_rate": 4.374357056017956e-06, "loss": 0.0506, "step": 67900 }, { "epoch": 6.347428358069635, "grad_norm": 2.1530234813690186, "learning_rate": 4.373421864771346e-06, "loss": 0.051, "step": 68000 }, { "epoch": 6.356762811537385, "grad_norm": 0.7774823904037476, "learning_rate": 4.372486673524736e-06, "loss": 0.0563, "step": 68100 }, { "epoch": 6.366097265005134, "grad_norm": 1.6762458086013794, "learning_rate": 4.371551482278126e-06, "loss": 0.0553, "step": 68200 }, { "epoch": 6.375431718472884, "grad_norm": 1.3667351007461548, "learning_rate": 4.3706162910315164e-06, "loss": 0.0575, "step": 68300 }, { "epoch": 6.384766171940633, "grad_norm": 0.8613729476928711, "learning_rate": 4.3696810997849065e-06, "loss": 0.0575, "step": 68400 }, { "epoch": 6.394100625408383, "grad_norm": 1.3681460618972778, "learning_rate": 4.3687459085382965e-06, "loss": 0.0519, "step": 68500 }, { "epoch": 6.403435078876131, "grad_norm": 0.4264625608921051, "learning_rate": 4.3678107172916865e-06, "loss": 0.0528, "step": 68600 }, { "epoch": 6.412769532343881, "grad_norm": 1.157013177871704, "learning_rate": 4.3668755260450766e-06, "loss": 0.0553, "step": 68700 }, { "epoch": 6.4221039858116304, "grad_norm": 1.84316086769104, "learning_rate": 4.365940334798467e-06, "loss": 0.0567, "step": 68800 }, { "epoch": 6.43143843927938, "grad_norm": 0.5543715953826904, "learning_rate": 4.365005143551857e-06, "loss": 0.0565, "step": 68900 }, { "epoch": 6.4407728927471295, "grad_norm": 2.004446506500244, "learning_rate": 4.364069952305247e-06, "loss": 0.0571, "step": 69000 }, { "epoch": 6.450107346214879, "grad_norm": 1.723239779472351, "learning_rate": 4.363134761058637e-06, "loss": 0.0562, "step": 69100 }, { "epoch": 6.4594417996826285, "grad_norm": 1.4663580656051636, "learning_rate": 4.362199569812027e-06, "loss": 0.0565, "step": 69200 }, { "epoch": 6.468776253150378, "grad_norm": 1.5717527866363525, "learning_rate": 4.361264378565417e-06, "loss": 0.0468, "step": 69300 }, { "epoch": 6.478110706618128, "grad_norm": 1.0201493501663208, "learning_rate": 4.360329187318807e-06, "loss": 0.0539, "step": 69400 }, { "epoch": 6.487445160085877, "grad_norm": 1.2932502031326294, "learning_rate": 4.359393996072197e-06, "loss": 0.0529, "step": 69500 }, { "epoch": 6.496779613553627, "grad_norm": 0.6695718765258789, "learning_rate": 4.358458804825587e-06, "loss": 0.0557, "step": 69600 }, { "epoch": 6.506114067021376, "grad_norm": 1.3696361780166626, "learning_rate": 4.357523613578977e-06, "loss": 0.0564, "step": 69700 }, { "epoch": 6.515448520489126, "grad_norm": 1.3158940076828003, "learning_rate": 4.356588422332368e-06, "loss": 0.0554, "step": 69800 }, { "epoch": 6.524782973956874, "grad_norm": 5.171343803405762, "learning_rate": 4.355653231085758e-06, "loss": 0.0557, "step": 69900 }, { "epoch": 6.534117427424624, "grad_norm": 0.5448005795478821, "learning_rate": 4.354718039839148e-06, "loss": 0.0555, "step": 70000 }, { "epoch": 6.534117427424624, "eval_accuracy": 0.718296382730455, "eval_f1": 0.8396194897223541, "eval_loss": 0.0964573323726654, "eval_roc_auc": 0.9164800334138451, "eval_runtime": 371.3076, "eval_samples_per_second": 115.403, "eval_steps_per_second": 115.403, "step": 70000 }, { "epoch": 6.543451880892373, "grad_norm": 1.2382159233093262, "learning_rate": 4.353782848592538e-06, "loss": 0.0524, "step": 70100 }, { "epoch": 6.552786334360123, "grad_norm": 1.6130993366241455, "learning_rate": 4.352847657345928e-06, "loss": 0.0551, "step": 70200 }, { "epoch": 6.5621207878278724, "grad_norm": 1.3971660137176514, "learning_rate": 4.351912466099318e-06, "loss": 0.0515, "step": 70300 }, { "epoch": 6.571455241295622, "grad_norm": 1.6542308330535889, "learning_rate": 4.350977274852708e-06, "loss": 0.058, "step": 70400 }, { "epoch": 6.5807896947633715, "grad_norm": 1.3451213836669922, "learning_rate": 4.350042083606098e-06, "loss": 0.0547, "step": 70500 }, { "epoch": 6.590124148231121, "grad_norm": 0.9208652377128601, "learning_rate": 4.349106892359488e-06, "loss": 0.0616, "step": 70600 }, { "epoch": 6.5994586016988706, "grad_norm": 1.5493488311767578, "learning_rate": 4.348171701112878e-06, "loss": 0.055, "step": 70700 }, { "epoch": 6.60879305516662, "grad_norm": 1.3627309799194336, "learning_rate": 4.347236509866268e-06, "loss": 0.0562, "step": 70800 }, { "epoch": 6.61812750863437, "grad_norm": 0.9817646741867065, "learning_rate": 4.346301318619658e-06, "loss": 0.0562, "step": 70900 }, { "epoch": 6.627461962102119, "grad_norm": 4.412078380584717, "learning_rate": 4.345366127373048e-06, "loss": 0.0491, "step": 71000 }, { "epoch": 6.636796415569869, "grad_norm": 1.1879462003707886, "learning_rate": 4.344430936126438e-06, "loss": 0.0584, "step": 71100 }, { "epoch": 6.646130869037618, "grad_norm": 1.2077075242996216, "learning_rate": 4.343495744879828e-06, "loss": 0.0561, "step": 71200 }, { "epoch": 6.655465322505368, "grad_norm": 1.2987253665924072, "learning_rate": 4.342560553633218e-06, "loss": 0.0589, "step": 71300 }, { "epoch": 6.664799775973117, "grad_norm": 1.0931525230407715, "learning_rate": 4.341625362386608e-06, "loss": 0.059, "step": 71400 }, { "epoch": 6.674134229440866, "grad_norm": 0.7583901882171631, "learning_rate": 4.340690171139998e-06, "loss": 0.0549, "step": 71500 }, { "epoch": 6.683468682908615, "grad_norm": 1.0785542726516724, "learning_rate": 4.339754979893388e-06, "loss": 0.0536, "step": 71600 }, { "epoch": 6.692803136376365, "grad_norm": 1.1034150123596191, "learning_rate": 4.338819788646778e-06, "loss": 0.0523, "step": 71700 }, { "epoch": 6.7021375898441145, "grad_norm": 1.3473154306411743, "learning_rate": 4.337884597400168e-06, "loss": 0.0565, "step": 71800 }, { "epoch": 6.711472043311864, "grad_norm": 1.0186741352081299, "learning_rate": 4.336949406153559e-06, "loss": 0.0554, "step": 71900 }, { "epoch": 6.7208064967796135, "grad_norm": 1.1330205202102661, "learning_rate": 4.336014214906949e-06, "loss": 0.0624, "step": 72000 }, { "epoch": 6.730140950247363, "grad_norm": 1.3901538848876953, "learning_rate": 4.335079023660339e-06, "loss": 0.0539, "step": 72100 }, { "epoch": 6.7394754037151126, "grad_norm": 1.0010273456573486, "learning_rate": 4.334143832413729e-06, "loss": 0.0576, "step": 72200 }, { "epoch": 6.748809857182862, "grad_norm": 1.1742784976959229, "learning_rate": 4.333208641167119e-06, "loss": 0.0554, "step": 72300 }, { "epoch": 6.758144310650612, "grad_norm": 1.1369987726211548, "learning_rate": 4.332273449920509e-06, "loss": 0.0579, "step": 72400 }, { "epoch": 6.767478764118361, "grad_norm": 1.6060645580291748, "learning_rate": 4.331338258673899e-06, "loss": 0.062, "step": 72500 }, { "epoch": 6.776813217586111, "grad_norm": 1.2670186758041382, "learning_rate": 4.330403067427289e-06, "loss": 0.0561, "step": 72600 }, { "epoch": 6.786147671053859, "grad_norm": 1.1495561599731445, "learning_rate": 4.3294678761806795e-06, "loss": 0.0541, "step": 72700 }, { "epoch": 6.795482124521609, "grad_norm": 1.2559233903884888, "learning_rate": 4.3285326849340695e-06, "loss": 0.0591, "step": 72800 }, { "epoch": 6.804816577989358, "grad_norm": 1.156572937965393, "learning_rate": 4.3275974936874595e-06, "loss": 0.0551, "step": 72900 }, { "epoch": 6.814151031457108, "grad_norm": 0.6221569776535034, "learning_rate": 4.32666230244085e-06, "loss": 0.0546, "step": 73000 }, { "epoch": 6.823485484924857, "grad_norm": 4.337308883666992, "learning_rate": 4.32572711119424e-06, "loss": 0.0572, "step": 73100 }, { "epoch": 6.832819938392607, "grad_norm": 0.620670735836029, "learning_rate": 4.32479191994763e-06, "loss": 0.052, "step": 73200 }, { "epoch": 6.8421543918603565, "grad_norm": 1.8489940166473389, "learning_rate": 4.32385672870102e-06, "loss": 0.0506, "step": 73300 }, { "epoch": 6.851488845328106, "grad_norm": 1.4755682945251465, "learning_rate": 4.32292153745441e-06, "loss": 0.0515, "step": 73400 }, { "epoch": 6.8608232987958555, "grad_norm": 1.1321771144866943, "learning_rate": 4.3219863462078e-06, "loss": 0.059, "step": 73500 }, { "epoch": 6.870157752263605, "grad_norm": 0.7917705774307251, "learning_rate": 4.32105115496119e-06, "loss": 0.0523, "step": 73600 }, { "epoch": 6.879492205731355, "grad_norm": 0.9447356462478638, "learning_rate": 4.32011596371458e-06, "loss": 0.0579, "step": 73700 }, { "epoch": 6.888826659199104, "grad_norm": 0.5105993747711182, "learning_rate": 4.31918077246797e-06, "loss": 0.0552, "step": 73800 }, { "epoch": 6.898161112666854, "grad_norm": 1.8278632164001465, "learning_rate": 4.318245581221361e-06, "loss": 0.0546, "step": 73900 }, { "epoch": 6.907495566134603, "grad_norm": 1.5826325416564941, "learning_rate": 4.317310389974751e-06, "loss": 0.0569, "step": 74000 }, { "epoch": 6.916830019602353, "grad_norm": 1.39820396900177, "learning_rate": 4.316375198728141e-06, "loss": 0.0499, "step": 74100 }, { "epoch": 6.926164473070102, "grad_norm": 1.6977546215057373, "learning_rate": 4.315440007481531e-06, "loss": 0.0574, "step": 74200 }, { "epoch": 6.935498926537851, "grad_norm": 0.969083845615387, "learning_rate": 4.31450481623492e-06, "loss": 0.0566, "step": 74300 }, { "epoch": 6.9448333800056, "grad_norm": 0.9030424952507019, "learning_rate": 4.31356962498831e-06, "loss": 0.0598, "step": 74400 }, { "epoch": 6.95416783347335, "grad_norm": 1.4899497032165527, "learning_rate": 4.3126344337417e-06, "loss": 0.0562, "step": 74500 }, { "epoch": 6.963502286941099, "grad_norm": 1.8908016681671143, "learning_rate": 4.31169924249509e-06, "loss": 0.0569, "step": 74600 }, { "epoch": 6.972836740408849, "grad_norm": 1.393380045890808, "learning_rate": 4.31076405124848e-06, "loss": 0.0578, "step": 74700 }, { "epoch": 6.9821711938765985, "grad_norm": 0.7164834141731262, "learning_rate": 4.309828860001871e-06, "loss": 0.0561, "step": 74800 }, { "epoch": 6.991505647344348, "grad_norm": 6.7437028884887695, "learning_rate": 4.308893668755261e-06, "loss": 0.0532, "step": 74900 }, { "epoch": 7.0008401008120975, "grad_norm": 1.5835105180740356, "learning_rate": 4.307958477508651e-06, "loss": 0.053, "step": 75000 }, { "epoch": 7.0008401008120975, "eval_accuracy": 0.719906651108518, "eval_f1": 0.8384102999687066, "eval_loss": 0.09439481049776077, "eval_roc_auc": 0.9098706282597445, "eval_runtime": 442.153, "eval_samples_per_second": 96.912, "eval_steps_per_second": 96.912, "step": 75000 }, { "epoch": 7.010174554279847, "grad_norm": 2.084047317504883, "learning_rate": 4.307023286262041e-06, "loss": 0.0464, "step": 75100 }, { "epoch": 7.019509007747597, "grad_norm": 0.7721230983734131, "learning_rate": 4.306088095015431e-06, "loss": 0.0515, "step": 75200 }, { "epoch": 7.028843461215346, "grad_norm": 1.0084415674209595, "learning_rate": 4.305152903768821e-06, "loss": 0.0442, "step": 75300 }, { "epoch": 7.038177914683096, "grad_norm": 0.8776457905769348, "learning_rate": 4.304217712522211e-06, "loss": 0.05, "step": 75400 }, { "epoch": 7.047512368150845, "grad_norm": 0.9890807867050171, "learning_rate": 4.303282521275601e-06, "loss": 0.0495, "step": 75500 }, { "epoch": 7.056846821618595, "grad_norm": 1.5118169784545898, "learning_rate": 4.302347330028991e-06, "loss": 0.0524, "step": 75600 }, { "epoch": 7.066181275086343, "grad_norm": 0.823929488658905, "learning_rate": 4.301412138782381e-06, "loss": 0.0485, "step": 75700 }, { "epoch": 7.075515728554093, "grad_norm": 0.7516288757324219, "learning_rate": 4.300476947535771e-06, "loss": 0.0488, "step": 75800 }, { "epoch": 7.084850182021842, "grad_norm": 1.3900123834609985, "learning_rate": 4.299541756289161e-06, "loss": 0.0448, "step": 75900 }, { "epoch": 7.094184635489592, "grad_norm": 1.3783007860183716, "learning_rate": 4.298606565042552e-06, "loss": 0.0557, "step": 76000 }, { "epoch": 7.103519088957341, "grad_norm": 1.8114618062973022, "learning_rate": 4.297671373795941e-06, "loss": 0.0487, "step": 76100 }, { "epoch": 7.112853542425091, "grad_norm": 0.9959580302238464, "learning_rate": 4.296736182549331e-06, "loss": 0.0459, "step": 76200 }, { "epoch": 7.1221879958928405, "grad_norm": 1.4533172845840454, "learning_rate": 4.2958009913027214e-06, "loss": 0.0493, "step": 76300 }, { "epoch": 7.13152244936059, "grad_norm": 1.4935479164123535, "learning_rate": 4.2948658000561115e-06, "loss": 0.0481, "step": 76400 }, { "epoch": 7.1408569028283395, "grad_norm": 1.55165696144104, "learning_rate": 4.2939306088095015e-06, "loss": 0.0474, "step": 76500 }, { "epoch": 7.150191356296089, "grad_norm": 0.949858546257019, "learning_rate": 4.2929954175628915e-06, "loss": 0.0493, "step": 76600 }, { "epoch": 7.159525809763839, "grad_norm": 1.6703355312347412, "learning_rate": 4.2920602263162816e-06, "loss": 0.0455, "step": 76700 }, { "epoch": 7.168860263231588, "grad_norm": 2.9667580127716064, "learning_rate": 4.2911250350696724e-06, "loss": 0.0507, "step": 76800 }, { "epoch": 7.178194716699338, "grad_norm": 1.398945689201355, "learning_rate": 4.2901898438230625e-06, "loss": 0.0464, "step": 76900 }, { "epoch": 7.187529170167087, "grad_norm": 0.4286075830459595, "learning_rate": 4.2892546525764525e-06, "loss": 0.0465, "step": 77000 }, { "epoch": 7.196863623634836, "grad_norm": 1.8141902685165405, "learning_rate": 4.2883194613298425e-06, "loss": 0.0496, "step": 77100 }, { "epoch": 7.206198077102585, "grad_norm": 1.556246042251587, "learning_rate": 4.2873842700832326e-06, "loss": 0.0466, "step": 77200 }, { "epoch": 7.215532530570335, "grad_norm": 0.6448650360107422, "learning_rate": 4.286449078836623e-06, "loss": 0.0495, "step": 77300 }, { "epoch": 7.224866984038084, "grad_norm": 1.2889045476913452, "learning_rate": 4.285513887590013e-06, "loss": 0.0515, "step": 77400 }, { "epoch": 7.234201437505834, "grad_norm": 2.3179988861083984, "learning_rate": 4.284578696343403e-06, "loss": 0.0465, "step": 77500 }, { "epoch": 7.243535890973583, "grad_norm": 1.4847781658172607, "learning_rate": 4.283643505096793e-06, "loss": 0.0465, "step": 77600 }, { "epoch": 7.252870344441333, "grad_norm": 1.0862756967544556, "learning_rate": 4.282708313850183e-06, "loss": 0.0485, "step": 77700 }, { "epoch": 7.2622047979090825, "grad_norm": 2.0877931118011475, "learning_rate": 4.281773122603573e-06, "loss": 0.0536, "step": 77800 }, { "epoch": 7.271539251376832, "grad_norm": 1.7218574285507202, "learning_rate": 4.280837931356963e-06, "loss": 0.0513, "step": 77900 }, { "epoch": 7.2808737048445815, "grad_norm": 0.9789395928382874, "learning_rate": 4.279902740110353e-06, "loss": 0.0483, "step": 78000 }, { "epoch": 7.290208158312331, "grad_norm": 0.9494025707244873, "learning_rate": 4.278967548863743e-06, "loss": 0.0519, "step": 78100 }, { "epoch": 7.299542611780081, "grad_norm": 1.763017177581787, "learning_rate": 4.278032357617133e-06, "loss": 0.05, "step": 78200 }, { "epoch": 7.30887706524783, "grad_norm": 2.5303914546966553, "learning_rate": 4.277097166370523e-06, "loss": 0.0519, "step": 78300 }, { "epoch": 7.318211518715579, "grad_norm": 1.0946743488311768, "learning_rate": 4.276161975123913e-06, "loss": 0.049, "step": 78400 }, { "epoch": 7.327545972183328, "grad_norm": 0.7262258529663086, "learning_rate": 4.275226783877303e-06, "loss": 0.0565, "step": 78500 }, { "epoch": 7.336880425651078, "grad_norm": 2.0766773223876953, "learning_rate": 4.274291592630693e-06, "loss": 0.0521, "step": 78600 }, { "epoch": 7.346214879118827, "grad_norm": 0.7879973649978638, "learning_rate": 4.273356401384083e-06, "loss": 0.0473, "step": 78700 }, { "epoch": 7.355549332586577, "grad_norm": 1.6840286254882812, "learning_rate": 4.272421210137473e-06, "loss": 0.055, "step": 78800 }, { "epoch": 7.364883786054326, "grad_norm": 1.2958650588989258, "learning_rate": 4.271486018890864e-06, "loss": 0.0483, "step": 78900 }, { "epoch": 7.374218239522076, "grad_norm": 0.4911278784275055, "learning_rate": 4.270550827644254e-06, "loss": 0.0519, "step": 79000 }, { "epoch": 7.383552692989825, "grad_norm": 1.773228645324707, "learning_rate": 4.269615636397644e-06, "loss": 0.0514, "step": 79100 }, { "epoch": 7.392887146457575, "grad_norm": 1.388335108757019, "learning_rate": 4.268680445151034e-06, "loss": 0.0494, "step": 79200 }, { "epoch": 7.4022215999253245, "grad_norm": 1.1652065515518188, "learning_rate": 4.267745253904424e-06, "loss": 0.0505, "step": 79300 }, { "epoch": 7.411556053393074, "grad_norm": 0.8937588334083557, "learning_rate": 4.266810062657814e-06, "loss": 0.0495, "step": 79400 }, { "epoch": 7.4208905068608235, "grad_norm": 1.0933773517608643, "learning_rate": 4.265874871411204e-06, "loss": 0.0515, "step": 79500 }, { "epoch": 7.430224960328573, "grad_norm": 1.293104648590088, "learning_rate": 4.264939680164594e-06, "loss": 0.0505, "step": 79600 }, { "epoch": 7.439559413796323, "grad_norm": 1.9544748067855835, "learning_rate": 4.264004488917984e-06, "loss": 0.0491, "step": 79700 }, { "epoch": 7.448893867264072, "grad_norm": 3.0530896186828613, "learning_rate": 4.263069297671374e-06, "loss": 0.051, "step": 79800 }, { "epoch": 7.458228320731821, "grad_norm": 1.9887738227844238, "learning_rate": 4.262134106424764e-06, "loss": 0.0515, "step": 79900 }, { "epoch": 7.46756277419957, "grad_norm": 1.3019362688064575, "learning_rate": 4.261198915178154e-06, "loss": 0.0542, "step": 80000 }, { "epoch": 7.46756277419957, "eval_accuracy": 0.718926487747958, "eval_f1": 0.8370825178075091, "eval_loss": 0.09950225800275803, "eval_roc_auc": 0.90926798292067, "eval_runtime": 423.6556, "eval_samples_per_second": 101.143, "eval_steps_per_second": 101.143, "step": 80000 }, { "epoch": 7.47689722766732, "grad_norm": 2.786957263946533, "learning_rate": 4.260263723931544e-06, "loss": 0.0495, "step": 80100 }, { "epoch": 7.486231681135069, "grad_norm": 0.5671988725662231, "learning_rate": 4.259328532684934e-06, "loss": 0.0503, "step": 80200 }, { "epoch": 7.495566134602819, "grad_norm": 0.7418658137321472, "learning_rate": 4.258393341438324e-06, "loss": 0.0517, "step": 80300 }, { "epoch": 7.504900588070568, "grad_norm": 0.6571666598320007, "learning_rate": 4.257458150191714e-06, "loss": 0.0552, "step": 80400 }, { "epoch": 7.514235041538318, "grad_norm": 1.0323582887649536, "learning_rate": 4.256522958945104e-06, "loss": 0.0515, "step": 80500 }, { "epoch": 7.523569495006067, "grad_norm": 1.486782193183899, "learning_rate": 4.2555877676984944e-06, "loss": 0.0513, "step": 80600 }, { "epoch": 7.532903948473817, "grad_norm": 0.32526251673698425, "learning_rate": 4.2546525764518845e-06, "loss": 0.0465, "step": 80700 }, { "epoch": 7.5422384019415665, "grad_norm": 1.3061977624893188, "learning_rate": 4.2537173852052745e-06, "loss": 0.0497, "step": 80800 }, { "epoch": 7.551572855409316, "grad_norm": 0.6157976388931274, "learning_rate": 4.252782193958665e-06, "loss": 0.0474, "step": 80900 }, { "epoch": 7.5609073088770655, "grad_norm": 1.0431103706359863, "learning_rate": 4.251847002712055e-06, "loss": 0.0512, "step": 81000 }, { "epoch": 7.570241762344815, "grad_norm": 2.854995012283325, "learning_rate": 4.2509118114654454e-06, "loss": 0.0514, "step": 81100 }, { "epoch": 7.579576215812564, "grad_norm": 1.870150089263916, "learning_rate": 4.2499766202188355e-06, "loss": 0.0497, "step": 81200 }, { "epoch": 7.588910669280313, "grad_norm": 6.472436904907227, "learning_rate": 4.2490414289722255e-06, "loss": 0.0533, "step": 81300 }, { "epoch": 7.598245122748063, "grad_norm": 1.250641942024231, "learning_rate": 4.2481062377256155e-06, "loss": 0.048, "step": 81400 }, { "epoch": 7.607579576215812, "grad_norm": 1.3704780340194702, "learning_rate": 4.2471710464790056e-06, "loss": 0.045, "step": 81500 }, { "epoch": 7.616914029683562, "grad_norm": 1.322293758392334, "learning_rate": 4.246235855232395e-06, "loss": 0.0532, "step": 81600 }, { "epoch": 7.626248483151311, "grad_norm": 1.1777706146240234, "learning_rate": 4.245300663985786e-06, "loss": 0.0513, "step": 81700 }, { "epoch": 7.635582936619061, "grad_norm": 1.3004252910614014, "learning_rate": 4.244365472739176e-06, "loss": 0.048, "step": 81800 }, { "epoch": 7.64491739008681, "grad_norm": 1.3123252391815186, "learning_rate": 4.243430281492566e-06, "loss": 0.0504, "step": 81900 }, { "epoch": 7.65425184355456, "grad_norm": 2.237435817718506, "learning_rate": 4.242495090245956e-06, "loss": 0.0526, "step": 82000 }, { "epoch": 7.663586297022309, "grad_norm": 0.9692328572273254, "learning_rate": 4.241559898999346e-06, "loss": 0.0511, "step": 82100 }, { "epoch": 7.672920750490059, "grad_norm": 1.3014707565307617, "learning_rate": 4.240624707752736e-06, "loss": 0.05, "step": 82200 }, { "epoch": 7.6822552039578085, "grad_norm": 1.3753529787063599, "learning_rate": 4.239689516506126e-06, "loss": 0.0517, "step": 82300 }, { "epoch": 7.691589657425558, "grad_norm": 1.6993976831436157, "learning_rate": 4.238754325259516e-06, "loss": 0.0483, "step": 82400 }, { "epoch": 7.7009241108933075, "grad_norm": 1.6093099117279053, "learning_rate": 4.237819134012906e-06, "loss": 0.0485, "step": 82500 }, { "epoch": 7.710258564361057, "grad_norm": 1.2096747159957886, "learning_rate": 4.236883942766296e-06, "loss": 0.0535, "step": 82600 }, { "epoch": 7.719593017828807, "grad_norm": 0.7273058891296387, "learning_rate": 4.235948751519686e-06, "loss": 0.0486, "step": 82700 }, { "epoch": 7.728927471296555, "grad_norm": 1.0419132709503174, "learning_rate": 4.235013560273076e-06, "loss": 0.0494, "step": 82800 }, { "epoch": 7.738261924764305, "grad_norm": 0.7865692377090454, "learning_rate": 4.234078369026466e-06, "loss": 0.0525, "step": 82900 }, { "epoch": 7.747596378232054, "grad_norm": 0.8834711313247681, "learning_rate": 4.233143177779857e-06, "loss": 0.0495, "step": 83000 }, { "epoch": 7.756930831699804, "grad_norm": 1.9887572526931763, "learning_rate": 4.232207986533247e-06, "loss": 0.0502, "step": 83100 }, { "epoch": 7.766265285167553, "grad_norm": 1.82724928855896, "learning_rate": 4.231272795286637e-06, "loss": 0.0524, "step": 83200 }, { "epoch": 7.775599738635303, "grad_norm": 1.487930178642273, "learning_rate": 4.230337604040027e-06, "loss": 0.0477, "step": 83300 }, { "epoch": 7.784934192103052, "grad_norm": 0.4634471535682678, "learning_rate": 4.229402412793416e-06, "loss": 0.0514, "step": 83400 }, { "epoch": 7.794268645570802, "grad_norm": 0.8138898611068726, "learning_rate": 4.228467221546806e-06, "loss": 0.0463, "step": 83500 }, { "epoch": 7.803603099038551, "grad_norm": 0.5810794830322266, "learning_rate": 4.227532030300196e-06, "loss": 0.051, "step": 83600 }, { "epoch": 7.812937552506301, "grad_norm": 0.924345076084137, "learning_rate": 4.226596839053586e-06, "loss": 0.0511, "step": 83700 }, { "epoch": 7.8222720059740505, "grad_norm": 1.6980875730514526, "learning_rate": 4.225661647806977e-06, "loss": 0.0486, "step": 83800 }, { "epoch": 7.8316064594418, "grad_norm": 1.2705451250076294, "learning_rate": 4.224726456560367e-06, "loss": 0.0525, "step": 83900 }, { "epoch": 7.8409409129095495, "grad_norm": 2.187919855117798, "learning_rate": 4.223791265313757e-06, "loss": 0.0459, "step": 84000 }, { "epoch": 7.850275366377298, "grad_norm": 1.5260167121887207, "learning_rate": 4.222856074067147e-06, "loss": 0.0496, "step": 84100 }, { "epoch": 7.859609819845048, "grad_norm": 1.294994831085205, "learning_rate": 4.221920882820537e-06, "loss": 0.0492, "step": 84200 }, { "epoch": 7.868944273312797, "grad_norm": 0.9599978923797607, "learning_rate": 4.220985691573927e-06, "loss": 0.0477, "step": 84300 }, { "epoch": 7.878278726780547, "grad_norm": 0.9344620704650879, "learning_rate": 4.220050500327317e-06, "loss": 0.0509, "step": 84400 }, { "epoch": 7.887613180248296, "grad_norm": 1.433194875717163, "learning_rate": 4.219115309080707e-06, "loss": 0.0498, "step": 84500 }, { "epoch": 7.896947633716046, "grad_norm": 1.8096892833709717, "learning_rate": 4.218180117834097e-06, "loss": 0.0511, "step": 84600 }, { "epoch": 7.906282087183795, "grad_norm": 1.4516615867614746, "learning_rate": 4.217244926587487e-06, "loss": 0.0525, "step": 84700 }, { "epoch": 7.915616540651545, "grad_norm": 1.875609040260315, "learning_rate": 4.2163097353408774e-06, "loss": 0.0512, "step": 84800 }, { "epoch": 7.924950994119294, "grad_norm": 0.7043289542198181, "learning_rate": 4.2153745440942675e-06, "loss": 0.0491, "step": 84900 }, { "epoch": 7.934285447587044, "grad_norm": 2.634597063064575, "learning_rate": 4.214439352847658e-06, "loss": 0.0517, "step": 85000 }, { "epoch": 7.934285447587044, "eval_accuracy": 0.720536756126021, "eval_f1": 0.8333000090884304, "eval_loss": 0.10117647796869278, "eval_roc_auc": 0.901383791625668, "eval_runtime": 335.3759, "eval_samples_per_second": 127.767, "eval_steps_per_second": 127.767, "step": 85000 }, { "epoch": 7.943619901054793, "grad_norm": 1.2174620628356934, "learning_rate": 4.213504161601048e-06, "loss": 0.0553, "step": 85100 }, { "epoch": 7.952954354522543, "grad_norm": 1.1806315183639526, "learning_rate": 4.2125689703544376e-06, "loss": 0.0526, "step": 85200 }, { "epoch": 7.9622888079902925, "grad_norm": 1.3883838653564453, "learning_rate": 4.211633779107828e-06, "loss": 0.0532, "step": 85300 }, { "epoch": 7.971623261458042, "grad_norm": 0.5754416584968567, "learning_rate": 4.210698587861218e-06, "loss": 0.0516, "step": 85400 }, { "epoch": 7.9809577149257915, "grad_norm": 1.863527774810791, "learning_rate": 4.209763396614608e-06, "loss": 0.0569, "step": 85500 }, { "epoch": 7.99029216839354, "grad_norm": 0.9408576488494873, "learning_rate": 4.208828205367998e-06, "loss": 0.0507, "step": 85600 }, { "epoch": 7.99962662186129, "grad_norm": 1.9337635040283203, "learning_rate": 4.207893014121388e-06, "loss": 0.0557, "step": 85700 }, { "epoch": 8.00896107532904, "grad_norm": 1.3411000967025757, "learning_rate": 4.206957822874779e-06, "loss": 0.0411, "step": 85800 }, { "epoch": 8.01829552879679, "grad_norm": 0.9162245392799377, "learning_rate": 4.206022631628169e-06, "loss": 0.0465, "step": 85900 }, { "epoch": 8.02762998226454, "grad_norm": 1.0908981561660767, "learning_rate": 4.205087440381559e-06, "loss": 0.0483, "step": 86000 }, { "epoch": 8.036964435732289, "grad_norm": 1.2315964698791504, "learning_rate": 4.204152249134949e-06, "loss": 0.0458, "step": 86100 }, { "epoch": 8.046298889200038, "grad_norm": 1.3411628007888794, "learning_rate": 4.203217057888339e-06, "loss": 0.0441, "step": 86200 }, { "epoch": 8.055633342667786, "grad_norm": 16.28641700744629, "learning_rate": 4.202281866641729e-06, "loss": 0.0458, "step": 86300 }, { "epoch": 8.064967796135535, "grad_norm": 0.9326385855674744, "learning_rate": 4.201346675395119e-06, "loss": 0.043, "step": 86400 }, { "epoch": 8.074302249603285, "grad_norm": 1.6193753480911255, "learning_rate": 4.200411484148509e-06, "loss": 0.0449, "step": 86500 }, { "epoch": 8.083636703071035, "grad_norm": 1.5244615077972412, "learning_rate": 4.199476292901899e-06, "loss": 0.0447, "step": 86600 }, { "epoch": 8.092971156538784, "grad_norm": 1.315413236618042, "learning_rate": 4.198541101655289e-06, "loss": 0.0445, "step": 86700 }, { "epoch": 8.102305610006534, "grad_norm": 1.2596796751022339, "learning_rate": 4.197605910408679e-06, "loss": 0.044, "step": 86800 }, { "epoch": 8.111640063474283, "grad_norm": 1.6920976638793945, "learning_rate": 4.196670719162069e-06, "loss": 0.042, "step": 86900 }, { "epoch": 8.120974516942033, "grad_norm": 1.4467309713363647, "learning_rate": 4.195735527915459e-06, "loss": 0.0455, "step": 87000 }, { "epoch": 8.130308970409782, "grad_norm": 1.4090152978897095, "learning_rate": 4.194800336668849e-06, "loss": 0.046, "step": 87100 }, { "epoch": 8.139643423877532, "grad_norm": 0.6371055841445923, "learning_rate": 4.193865145422239e-06, "loss": 0.0443, "step": 87200 }, { "epoch": 8.148977877345281, "grad_norm": 1.7532577514648438, "learning_rate": 4.192929954175629e-06, "loss": 0.0457, "step": 87300 }, { "epoch": 8.15831233081303, "grad_norm": 1.8618261814117432, "learning_rate": 4.191994762929019e-06, "loss": 0.0428, "step": 87400 }, { "epoch": 8.16764678428078, "grad_norm": 1.0953607559204102, "learning_rate": 4.191059571682409e-06, "loss": 0.0476, "step": 87500 }, { "epoch": 8.17698123774853, "grad_norm": 1.4281262159347534, "learning_rate": 4.190124380435799e-06, "loss": 0.0437, "step": 87600 }, { "epoch": 8.18631569121628, "grad_norm": 0.26820072531700134, "learning_rate": 4.189189189189189e-06, "loss": 0.0423, "step": 87700 }, { "epoch": 8.195650144684029, "grad_norm": 1.0762094259262085, "learning_rate": 4.188253997942579e-06, "loss": 0.0412, "step": 87800 }, { "epoch": 8.204984598151778, "grad_norm": 2.0113766193389893, "learning_rate": 4.18731880669597e-06, "loss": 0.0445, "step": 87900 }, { "epoch": 8.214319051619528, "grad_norm": 1.5653704404830933, "learning_rate": 4.18638361544936e-06, "loss": 0.0443, "step": 88000 }, { "epoch": 8.223653505087277, "grad_norm": 0.8673405647277832, "learning_rate": 4.18544842420275e-06, "loss": 0.0436, "step": 88100 }, { "epoch": 8.232987958555027, "grad_norm": 3.387294054031372, "learning_rate": 4.18451323295614e-06, "loss": 0.0473, "step": 88200 }, { "epoch": 8.242322412022776, "grad_norm": 0.9315579533576965, "learning_rate": 4.18357804170953e-06, "loss": 0.0446, "step": 88300 }, { "epoch": 8.251656865490526, "grad_norm": 1.8314648866653442, "learning_rate": 4.18264285046292e-06, "loss": 0.0444, "step": 88400 }, { "epoch": 8.260991318958276, "grad_norm": 2.186568260192871, "learning_rate": 4.18170765921631e-06, "loss": 0.042, "step": 88500 }, { "epoch": 8.270325772426025, "grad_norm": 0.32874396443367004, "learning_rate": 4.1807724679697e-06, "loss": 0.0449, "step": 88600 }, { "epoch": 8.279660225893775, "grad_norm": 2.9648547172546387, "learning_rate": 4.17983727672309e-06, "loss": 0.0454, "step": 88700 }, { "epoch": 8.288994679361524, "grad_norm": 0.49550774693489075, "learning_rate": 4.17890208547648e-06, "loss": 0.0429, "step": 88800 }, { "epoch": 8.298329132829274, "grad_norm": 1.1523876190185547, "learning_rate": 4.17796689422987e-06, "loss": 0.0439, "step": 88900 }, { "epoch": 8.307663586297021, "grad_norm": 1.667288899421692, "learning_rate": 4.17703170298326e-06, "loss": 0.045, "step": 89000 }, { "epoch": 8.316998039764771, "grad_norm": 1.4356316328048706, "learning_rate": 4.1760965117366504e-06, "loss": 0.0473, "step": 89100 }, { "epoch": 8.32633249323252, "grad_norm": 1.4472638368606567, "learning_rate": 4.1751613204900405e-06, "loss": 0.0481, "step": 89200 }, { "epoch": 8.33566694670027, "grad_norm": 2.282935380935669, "learning_rate": 4.1742261292434305e-06, "loss": 0.0497, "step": 89300 }, { "epoch": 8.34500140016802, "grad_norm": 2.357179641723633, "learning_rate": 4.1732909379968205e-06, "loss": 0.0458, "step": 89400 }, { "epoch": 8.354335853635769, "grad_norm": 1.9043017625808716, "learning_rate": 4.1723557467502106e-06, "loss": 0.0405, "step": 89500 }, { "epoch": 8.363670307103519, "grad_norm": 2.102672576904297, "learning_rate": 4.171420555503601e-06, "loss": 0.0468, "step": 89600 }, { "epoch": 8.373004760571268, "grad_norm": 1.0020416975021362, "learning_rate": 4.170485364256991e-06, "loss": 0.0425, "step": 89700 }, { "epoch": 8.382339214039018, "grad_norm": 2.7653748989105225, "learning_rate": 4.169550173010381e-06, "loss": 0.0514, "step": 89800 }, { "epoch": 8.391673667506767, "grad_norm": 0.719533383846283, "learning_rate": 4.168614981763771e-06, "loss": 0.0397, "step": 89900 }, { "epoch": 8.401008120974517, "grad_norm": 2.3924760818481445, "learning_rate": 4.1676797905171616e-06, "loss": 0.0423, "step": 90000 }, { "epoch": 8.401008120974517, "eval_accuracy": 0.7142123687281213, "eval_f1": 0.8331676489384364, "eval_loss": 0.10914179682731628, "eval_roc_auc": 0.9094593761791979, "eval_runtime": 278.5006, "eval_samples_per_second": 153.86, "eval_steps_per_second": 153.86, "step": 90000 }, { "epoch": 8.410342574442266, "grad_norm": 0.9451422095298767, "learning_rate": 4.166744599270552e-06, "loss": 0.045, "step": 90100 }, { "epoch": 8.419677027910016, "grad_norm": 1.4729901552200317, "learning_rate": 4.165809408023942e-06, "loss": 0.0472, "step": 90200 }, { "epoch": 8.429011481377765, "grad_norm": 1.0862857103347778, "learning_rate": 4.164874216777332e-06, "loss": 0.0435, "step": 90300 }, { "epoch": 8.438345934845515, "grad_norm": 0.5579088926315308, "learning_rate": 4.163939025530722e-06, "loss": 0.0451, "step": 90400 }, { "epoch": 8.447680388313264, "grad_norm": 2.0146291255950928, "learning_rate": 4.163003834284112e-06, "loss": 0.0467, "step": 90500 }, { "epoch": 8.457014841781014, "grad_norm": 1.3372385501861572, "learning_rate": 4.162068643037501e-06, "loss": 0.0455, "step": 90600 }, { "epoch": 8.466349295248763, "grad_norm": 1.0408650636672974, "learning_rate": 4.161133451790891e-06, "loss": 0.0412, "step": 90700 }, { "epoch": 8.475683748716513, "grad_norm": 1.131712555885315, "learning_rate": 4.160198260544282e-06, "loss": 0.0465, "step": 90800 }, { "epoch": 8.485018202184262, "grad_norm": 1.8085250854492188, "learning_rate": 4.159263069297672e-06, "loss": 0.045, "step": 90900 }, { "epoch": 8.494352655652012, "grad_norm": 1.6612430810928345, "learning_rate": 4.158327878051062e-06, "loss": 0.044, "step": 91000 }, { "epoch": 8.503687109119761, "grad_norm": 2.180926561355591, "learning_rate": 4.157392686804452e-06, "loss": 0.0431, "step": 91100 }, { "epoch": 8.513021562587511, "grad_norm": 1.7844724655151367, "learning_rate": 4.156457495557842e-06, "loss": 0.0416, "step": 91200 }, { "epoch": 8.52235601605526, "grad_norm": 1.4234685897827148, "learning_rate": 4.155522304311232e-06, "loss": 0.0444, "step": 91300 }, { "epoch": 8.53169046952301, "grad_norm": 1.6556077003479004, "learning_rate": 4.154587113064622e-06, "loss": 0.0469, "step": 91400 }, { "epoch": 8.54102492299076, "grad_norm": 1.2140084505081177, "learning_rate": 4.153651921818012e-06, "loss": 0.045, "step": 91500 }, { "epoch": 8.550359376458509, "grad_norm": 2.773210287094116, "learning_rate": 4.152716730571402e-06, "loss": 0.0496, "step": 91600 }, { "epoch": 8.559693829926259, "grad_norm": 1.902849793434143, "learning_rate": 4.151781539324792e-06, "loss": 0.05, "step": 91700 }, { "epoch": 8.569028283394008, "grad_norm": 1.01845121383667, "learning_rate": 4.150846348078182e-06, "loss": 0.0468, "step": 91800 }, { "epoch": 8.578362736861756, "grad_norm": 1.3362393379211426, "learning_rate": 4.149911156831572e-06, "loss": 0.0405, "step": 91900 }, { "epoch": 8.587697190329505, "grad_norm": 1.682104468345642, "learning_rate": 4.148975965584963e-06, "loss": 0.0444, "step": 92000 }, { "epoch": 8.597031643797255, "grad_norm": 0.3701697587966919, "learning_rate": 4.148040774338353e-06, "loss": 0.0443, "step": 92100 }, { "epoch": 8.606366097265004, "grad_norm": 0.9337983131408691, "learning_rate": 4.147105583091743e-06, "loss": 0.0469, "step": 92200 }, { "epoch": 8.615700550732754, "grad_norm": 3.7678046226501465, "learning_rate": 4.146170391845133e-06, "loss": 0.0504, "step": 92300 }, { "epoch": 8.625035004200504, "grad_norm": 1.2244102954864502, "learning_rate": 4.145235200598522e-06, "loss": 0.0431, "step": 92400 }, { "epoch": 8.634369457668253, "grad_norm": 1.3210524320602417, "learning_rate": 4.144300009351912e-06, "loss": 0.0459, "step": 92500 }, { "epoch": 8.643703911136003, "grad_norm": 2.3021469116210938, "learning_rate": 4.143364818105302e-06, "loss": 0.0451, "step": 92600 }, { "epoch": 8.653038364603752, "grad_norm": 3.384725570678711, "learning_rate": 4.142429626858692e-06, "loss": 0.0452, "step": 92700 }, { "epoch": 8.662372818071502, "grad_norm": 1.058915376663208, "learning_rate": 4.141494435612083e-06, "loss": 0.0485, "step": 92800 }, { "epoch": 8.671707271539251, "grad_norm": 1.009801983833313, "learning_rate": 4.140559244365473e-06, "loss": 0.0488, "step": 92900 }, { "epoch": 8.681041725007, "grad_norm": 1.7832045555114746, "learning_rate": 4.139624053118863e-06, "loss": 0.0459, "step": 93000 }, { "epoch": 8.69037617847475, "grad_norm": 1.3970378637313843, "learning_rate": 4.138688861872253e-06, "loss": 0.0459, "step": 93100 }, { "epoch": 8.6997106319425, "grad_norm": 1.2007447481155396, "learning_rate": 4.137753670625643e-06, "loss": 0.0447, "step": 93200 }, { "epoch": 8.70904508541025, "grad_norm": 2.089463472366333, "learning_rate": 4.136818479379033e-06, "loss": 0.0441, "step": 93300 }, { "epoch": 8.718379538877999, "grad_norm": 1.1761178970336914, "learning_rate": 4.1358832881324235e-06, "loss": 0.0478, "step": 93400 }, { "epoch": 8.727713992345748, "grad_norm": 2.114921808242798, "learning_rate": 4.1349480968858135e-06, "loss": 0.0456, "step": 93500 }, { "epoch": 8.737048445813498, "grad_norm": 1.2774345874786377, "learning_rate": 4.1340129056392035e-06, "loss": 0.0489, "step": 93600 }, { "epoch": 8.746382899281247, "grad_norm": 2.646120548248291, "learning_rate": 4.1330777143925935e-06, "loss": 0.0451, "step": 93700 }, { "epoch": 8.755717352748997, "grad_norm": 1.3969744443893433, "learning_rate": 4.1321425231459836e-06, "loss": 0.0434, "step": 93800 }, { "epoch": 8.765051806216746, "grad_norm": 1.536044716835022, "learning_rate": 4.131207331899374e-06, "loss": 0.0445, "step": 93900 }, { "epoch": 8.774386259684496, "grad_norm": 2.585411787033081, "learning_rate": 4.130272140652764e-06, "loss": 0.0474, "step": 94000 }, { "epoch": 8.783720713152245, "grad_norm": 1.3964184522628784, "learning_rate": 4.1293369494061545e-06, "loss": 0.0461, "step": 94100 }, { "epoch": 8.793055166619995, "grad_norm": 3.184845447540283, "learning_rate": 4.128401758159544e-06, "loss": 0.0494, "step": 94200 }, { "epoch": 8.802389620087745, "grad_norm": 1.1090646982192993, "learning_rate": 4.127466566912934e-06, "loss": 0.0434, "step": 94300 }, { "epoch": 8.811724073555494, "grad_norm": 2.809664487838745, "learning_rate": 4.126531375666324e-06, "loss": 0.046, "step": 94400 }, { "epoch": 8.821058527023244, "grad_norm": 1.262279748916626, "learning_rate": 4.125596184419714e-06, "loss": 0.0457, "step": 94500 }, { "epoch": 8.830392980490993, "grad_norm": 0.872014045715332, "learning_rate": 4.124660993173104e-06, "loss": 0.049, "step": 94600 }, { "epoch": 8.839727433958743, "grad_norm": 1.0711236000061035, "learning_rate": 4.123725801926494e-06, "loss": 0.045, "step": 94700 }, { "epoch": 8.84906188742649, "grad_norm": 2.5769572257995605, "learning_rate": 4.122790610679884e-06, "loss": 0.0458, "step": 94800 }, { "epoch": 8.85839634089424, "grad_norm": 1.351606845855713, "learning_rate": 4.121855419433275e-06, "loss": 0.0444, "step": 94900 }, { "epoch": 8.86773079436199, "grad_norm": 1.443747878074646, "learning_rate": 4.120920228186665e-06, "loss": 0.0484, "step": 95000 }, { "epoch": 8.86773079436199, "eval_accuracy": 0.7132555425904318, "eval_f1": 0.8345415438540588, "eval_loss": 0.10653097927570343, "eval_roc_auc": 0.9112072679692321, "eval_runtime": 236.0022, "eval_samples_per_second": 181.566, "eval_steps_per_second": 181.566, "step": 95000 }, { "epoch": 8.877065247829739, "grad_norm": 1.4825226068496704, "learning_rate": 4.119985036940055e-06, "loss": 0.0452, "step": 95100 }, { "epoch": 8.886399701297488, "grad_norm": 1.6699721813201904, "learning_rate": 4.119049845693445e-06, "loss": 0.0454, "step": 95200 }, { "epoch": 8.895734154765238, "grad_norm": 2.1665749549865723, "learning_rate": 4.118114654446835e-06, "loss": 0.0512, "step": 95300 }, { "epoch": 8.905068608232988, "grad_norm": 1.9466471672058105, "learning_rate": 4.117179463200225e-06, "loss": 0.0465, "step": 95400 }, { "epoch": 8.914403061700737, "grad_norm": 1.9556076526641846, "learning_rate": 4.116244271953615e-06, "loss": 0.0477, "step": 95500 }, { "epoch": 8.923737515168487, "grad_norm": 1.318790078163147, "learning_rate": 4.115309080707005e-06, "loss": 0.0471, "step": 95600 }, { "epoch": 8.933071968636236, "grad_norm": 2.300886631011963, "learning_rate": 4.114373889460395e-06, "loss": 0.0478, "step": 95700 }, { "epoch": 8.942406422103986, "grad_norm": 1.734837532043457, "learning_rate": 4.113438698213785e-06, "loss": 0.0445, "step": 95800 }, { "epoch": 8.951740875571735, "grad_norm": 0.6897457242012024, "learning_rate": 4.112503506967175e-06, "loss": 0.0473, "step": 95900 }, { "epoch": 8.961075329039485, "grad_norm": 1.6150586605072021, "learning_rate": 4.111568315720565e-06, "loss": 0.0425, "step": 96000 }, { "epoch": 8.970409782507234, "grad_norm": 1.9031270742416382, "learning_rate": 4.110633124473955e-06, "loss": 0.0501, "step": 96100 }, { "epoch": 8.979744235974984, "grad_norm": 2.8793766498565674, "learning_rate": 4.109697933227345e-06, "loss": 0.0464, "step": 96200 }, { "epoch": 8.989078689442733, "grad_norm": 3.1916162967681885, "learning_rate": 4.108762741980735e-06, "loss": 0.0494, "step": 96300 }, { "epoch": 8.998413142910483, "grad_norm": 2.0394248962402344, "learning_rate": 4.107827550734125e-06, "loss": 0.0472, "step": 96400 }, { "epoch": 9.007747596378232, "grad_norm": 1.037163257598877, "learning_rate": 4.106892359487515e-06, "loss": 0.0409, "step": 96500 }, { "epoch": 9.017082049845982, "grad_norm": 1.6622933149337769, "learning_rate": 4.105957168240905e-06, "loss": 0.0413, "step": 96600 }, { "epoch": 9.026416503313731, "grad_norm": 1.7712889909744263, "learning_rate": 4.105021976994295e-06, "loss": 0.0365, "step": 96700 }, { "epoch": 9.03575095678148, "grad_norm": 1.386853814125061, "learning_rate": 4.104086785747685e-06, "loss": 0.0442, "step": 96800 }, { "epoch": 9.04508541024923, "grad_norm": 0.7723145484924316, "learning_rate": 4.103151594501076e-06, "loss": 0.0453, "step": 96900 }, { "epoch": 9.05441986371698, "grad_norm": 0.8764021396636963, "learning_rate": 4.102216403254466e-06, "loss": 0.0396, "step": 97000 }, { "epoch": 9.06375431718473, "grad_norm": 1.7529503107070923, "learning_rate": 4.101281212007856e-06, "loss": 0.0418, "step": 97100 }, { "epoch": 9.073088770652479, "grad_norm": 1.1604406833648682, "learning_rate": 4.100346020761246e-06, "loss": 0.0412, "step": 97200 }, { "epoch": 9.082423224120229, "grad_norm": 0.9803750514984131, "learning_rate": 4.099410829514636e-06, "loss": 0.0426, "step": 97300 }, { "epoch": 9.091757677587978, "grad_norm": 1.6186162233352661, "learning_rate": 4.098475638268026e-06, "loss": 0.0429, "step": 97400 }, { "epoch": 9.101092131055728, "grad_norm": 0.9343589544296265, "learning_rate": 4.097540447021416e-06, "loss": 0.0378, "step": 97500 }, { "epoch": 9.110426584523475, "grad_norm": 0.762942910194397, "learning_rate": 4.0966052557748064e-06, "loss": 0.0373, "step": 97600 }, { "epoch": 9.119761037991225, "grad_norm": 0.8738827705383301, "learning_rate": 4.0956700645281965e-06, "loss": 0.0374, "step": 97700 }, { "epoch": 9.129095491458974, "grad_norm": 2.0420236587524414, "learning_rate": 4.0947348732815865e-06, "loss": 0.0385, "step": 97800 }, { "epoch": 9.138429944926724, "grad_norm": 1.979295253753662, "learning_rate": 4.0937996820349765e-06, "loss": 0.0387, "step": 97900 }, { "epoch": 9.147764398394473, "grad_norm": 1.9109234809875488, "learning_rate": 4.0928644907883666e-06, "loss": 0.0438, "step": 98000 }, { "epoch": 9.157098851862223, "grad_norm": 2.1418919563293457, "learning_rate": 4.091929299541757e-06, "loss": 0.0392, "step": 98100 }, { "epoch": 9.166433305329972, "grad_norm": 1.5460178852081299, "learning_rate": 4.090994108295147e-06, "loss": 0.0413, "step": 98200 }, { "epoch": 9.175767758797722, "grad_norm": 1.4942265748977661, "learning_rate": 4.090058917048537e-06, "loss": 0.0424, "step": 98300 }, { "epoch": 9.185102212265472, "grad_norm": 1.0865761041641235, "learning_rate": 4.089123725801927e-06, "loss": 0.0456, "step": 98400 }, { "epoch": 9.194436665733221, "grad_norm": 3.3230814933776855, "learning_rate": 4.088188534555317e-06, "loss": 0.0375, "step": 98500 }, { "epoch": 9.20377111920097, "grad_norm": 0.7810090780258179, "learning_rate": 4.087253343308707e-06, "loss": 0.039, "step": 98600 }, { "epoch": 9.21310557266872, "grad_norm": 1.290023922920227, "learning_rate": 4.086318152062097e-06, "loss": 0.0417, "step": 98700 }, { "epoch": 9.22244002613647, "grad_norm": 1.342603325843811, "learning_rate": 4.085382960815487e-06, "loss": 0.0457, "step": 98800 }, { "epoch": 9.23177447960422, "grad_norm": 1.3632856607437134, "learning_rate": 4.084447769568877e-06, "loss": 0.0413, "step": 98900 }, { "epoch": 9.241108933071969, "grad_norm": 1.474469542503357, "learning_rate": 4.083512578322268e-06, "loss": 0.0404, "step": 99000 }, { "epoch": 9.250443386539718, "grad_norm": 1.0991060733795166, "learning_rate": 4.082577387075658e-06, "loss": 0.0392, "step": 99100 }, { "epoch": 9.259777840007468, "grad_norm": 1.1467899084091187, "learning_rate": 4.081642195829048e-06, "loss": 0.0398, "step": 99200 }, { "epoch": 9.269112293475217, "grad_norm": 2.9273295402526855, "learning_rate": 4.080707004582438e-06, "loss": 0.0375, "step": 99300 }, { "epoch": 9.278446746942967, "grad_norm": 3.2783684730529785, "learning_rate": 4.079771813335828e-06, "loss": 0.041, "step": 99400 }, { "epoch": 9.287781200410716, "grad_norm": 0.36493784189224243, "learning_rate": 4.078836622089218e-06, "loss": 0.0403, "step": 99500 }, { "epoch": 9.297115653878466, "grad_norm": 1.2151552438735962, "learning_rate": 4.077901430842608e-06, "loss": 0.0404, "step": 99600 }, { "epoch": 9.306450107346215, "grad_norm": 1.6631826162338257, "learning_rate": 4.076966239595997e-06, "loss": 0.0433, "step": 99700 }, { "epoch": 9.315784560813965, "grad_norm": 1.3799182176589966, "learning_rate": 4.076031048349388e-06, "loss": 0.0406, "step": 99800 }, { "epoch": 9.325119014281714, "grad_norm": 1.0982177257537842, "learning_rate": 4.075095857102778e-06, "loss": 0.0404, "step": 99900 }, { "epoch": 9.334453467749464, "grad_norm": 1.4746732711791992, "learning_rate": 4.074160665856168e-06, "loss": 0.0402, "step": 100000 }, { "epoch": 9.334453467749464, "eval_accuracy": 0.7110151691948658, "eval_f1": 0.8328770521578113, "eval_loss": 0.11481796205043793, "eval_roc_auc": 0.9099288464627382, "eval_runtime": 245.2524, "eval_samples_per_second": 174.718, "eval_steps_per_second": 174.718, "step": 100000 }, { "epoch": 9.343787921217213, "grad_norm": 2.4036707878112793, "learning_rate": 4.073225474609558e-06, "loss": 0.0418, "step": 100100 }, { "epoch": 9.353122374684963, "grad_norm": 2.2646493911743164, "learning_rate": 4.072290283362948e-06, "loss": 0.0426, "step": 100200 }, { "epoch": 9.362456828152713, "grad_norm": 1.3251582384109497, "learning_rate": 4.071355092116338e-06, "loss": 0.0425, "step": 100300 }, { "epoch": 9.37179128162046, "grad_norm": 1.4975179433822632, "learning_rate": 4.070419900869728e-06, "loss": 0.0428, "step": 100400 }, { "epoch": 9.38112573508821, "grad_norm": 1.7720681428909302, "learning_rate": 4.069484709623118e-06, "loss": 0.0394, "step": 100500 }, { "epoch": 9.39046018855596, "grad_norm": 0.9140447378158569, "learning_rate": 4.068549518376508e-06, "loss": 0.0391, "step": 100600 }, { "epoch": 9.399794642023709, "grad_norm": 1.7742828130722046, "learning_rate": 4.067614327129898e-06, "loss": 0.0412, "step": 100700 }, { "epoch": 9.409129095491458, "grad_norm": 1.5437482595443726, "learning_rate": 4.066679135883288e-06, "loss": 0.0385, "step": 100800 }, { "epoch": 9.418463548959208, "grad_norm": 2.0108509063720703, "learning_rate": 4.065743944636678e-06, "loss": 0.0421, "step": 100900 }, { "epoch": 9.427798002426957, "grad_norm": 1.7707815170288086, "learning_rate": 4.064808753390069e-06, "loss": 0.0374, "step": 101000 }, { "epoch": 9.437132455894707, "grad_norm": 0.7223606109619141, "learning_rate": 4.063873562143459e-06, "loss": 0.043, "step": 101100 }, { "epoch": 9.446466909362456, "grad_norm": 1.3927267789840698, "learning_rate": 4.062938370896849e-06, "loss": 0.0372, "step": 101200 }, { "epoch": 9.455801362830206, "grad_norm": 0.5272591710090637, "learning_rate": 4.062003179650239e-06, "loss": 0.0413, "step": 101300 }, { "epoch": 9.465135816297956, "grad_norm": 1.1050945520401, "learning_rate": 4.061067988403629e-06, "loss": 0.0383, "step": 101400 }, { "epoch": 9.474470269765705, "grad_norm": 1.4005180597305298, "learning_rate": 4.0601327971570185e-06, "loss": 0.0407, "step": 101500 }, { "epoch": 9.483804723233455, "grad_norm": 3.643659830093384, "learning_rate": 4.0591976059104085e-06, "loss": 0.0408, "step": 101600 }, { "epoch": 9.493139176701204, "grad_norm": 1.384295105934143, "learning_rate": 4.0582624146637985e-06, "loss": 0.0434, "step": 101700 }, { "epoch": 9.502473630168954, "grad_norm": 2.0031051635742188, "learning_rate": 4.0573272234171886e-06, "loss": 0.0398, "step": 101800 }, { "epoch": 9.511808083636703, "grad_norm": 1.5823559761047363, "learning_rate": 4.0563920321705794e-06, "loss": 0.0413, "step": 101900 }, { "epoch": 9.521142537104453, "grad_norm": 1.0959833860397339, "learning_rate": 4.0554568409239695e-06, "loss": 0.0403, "step": 102000 }, { "epoch": 9.530476990572202, "grad_norm": 0.6850040555000305, "learning_rate": 4.0545216496773595e-06, "loss": 0.0436, "step": 102100 }, { "epoch": 9.539811444039952, "grad_norm": 1.472144365310669, "learning_rate": 4.0535864584307495e-06, "loss": 0.0414, "step": 102200 }, { "epoch": 9.549145897507701, "grad_norm": 2.9191489219665527, "learning_rate": 4.0526512671841396e-06, "loss": 0.0406, "step": 102300 }, { "epoch": 9.55848035097545, "grad_norm": 0.6316206455230713, "learning_rate": 4.05171607593753e-06, "loss": 0.0427, "step": 102400 }, { "epoch": 9.5678148044432, "grad_norm": 1.7773360013961792, "learning_rate": 4.05078088469092e-06, "loss": 0.037, "step": 102500 }, { "epoch": 9.57714925791095, "grad_norm": 0.2555617392063141, "learning_rate": 4.04984569344431e-06, "loss": 0.0435, "step": 102600 }, { "epoch": 9.5864837113787, "grad_norm": 1.8562630414962769, "learning_rate": 4.0489105021977e-06, "loss": 0.0406, "step": 102700 }, { "epoch": 9.595818164846449, "grad_norm": 2.8650028705596924, "learning_rate": 4.04797531095109e-06, "loss": 0.0402, "step": 102800 }, { "epoch": 9.605152618314198, "grad_norm": 1.6060155630111694, "learning_rate": 4.04704011970448e-06, "loss": 0.0402, "step": 102900 }, { "epoch": 9.614487071781948, "grad_norm": 1.9710595607757568, "learning_rate": 4.04610492845787e-06, "loss": 0.0424, "step": 103000 }, { "epoch": 9.623821525249696, "grad_norm": 3.353839635848999, "learning_rate": 4.045169737211261e-06, "loss": 0.0487, "step": 103100 }, { "epoch": 9.633155978717447, "grad_norm": 0.9013803601264954, "learning_rate": 4.044234545964651e-06, "loss": 0.0399, "step": 103200 }, { "epoch": 9.642490432185195, "grad_norm": 1.4386621713638306, "learning_rate": 4.04329935471804e-06, "loss": 0.0415, "step": 103300 }, { "epoch": 9.651824885652944, "grad_norm": 2.0820109844207764, "learning_rate": 4.04236416347143e-06, "loss": 0.036, "step": 103400 }, { "epoch": 9.661159339120694, "grad_norm": 1.910815954208374, "learning_rate": 4.04142897222482e-06, "loss": 0.0421, "step": 103500 }, { "epoch": 9.670493792588443, "grad_norm": 0.9020383954048157, "learning_rate": 4.04049378097821e-06, "loss": 0.0422, "step": 103600 }, { "epoch": 9.679828246056193, "grad_norm": 1.5208380222320557, "learning_rate": 4.0395585897316e-06, "loss": 0.0389, "step": 103700 }, { "epoch": 9.689162699523942, "grad_norm": 1.5014643669128418, "learning_rate": 4.03862339848499e-06, "loss": 0.0413, "step": 103800 }, { "epoch": 9.698497152991692, "grad_norm": 2.7203567028045654, "learning_rate": 4.037688207238381e-06, "loss": 0.0399, "step": 103900 }, { "epoch": 9.707831606459441, "grad_norm": 1.3846876621246338, "learning_rate": 4.036753015991771e-06, "loss": 0.0403, "step": 104000 }, { "epoch": 9.717166059927191, "grad_norm": 1.8482463359832764, "learning_rate": 4.035817824745161e-06, "loss": 0.0403, "step": 104100 }, { "epoch": 9.72650051339494, "grad_norm": 1.175569772720337, "learning_rate": 4.034882633498551e-06, "loss": 0.0422, "step": 104200 }, { "epoch": 9.73583496686269, "grad_norm": 0.943160891532898, "learning_rate": 4.033947442251941e-06, "loss": 0.0391, "step": 104300 }, { "epoch": 9.74516942033044, "grad_norm": 1.3211737871170044, "learning_rate": 4.033012251005331e-06, "loss": 0.0412, "step": 104400 }, { "epoch": 9.754503873798189, "grad_norm": 1.5440043210983276, "learning_rate": 4.032077059758721e-06, "loss": 0.0386, "step": 104500 }, { "epoch": 9.763838327265939, "grad_norm": 1.2918140888214111, "learning_rate": 4.031141868512111e-06, "loss": 0.0449, "step": 104600 }, { "epoch": 9.773172780733688, "grad_norm": 1.2991468906402588, "learning_rate": 4.030206677265501e-06, "loss": 0.0424, "step": 104700 }, { "epoch": 9.782507234201438, "grad_norm": 1.2439590692520142, "learning_rate": 4.029271486018891e-06, "loss": 0.0408, "step": 104800 }, { "epoch": 9.791841687669187, "grad_norm": 2.2895917892456055, "learning_rate": 4.028336294772281e-06, "loss": 0.0425, "step": 104900 }, { "epoch": 9.801176141136937, "grad_norm": 0.9269893765449524, "learning_rate": 4.027401103525671e-06, "loss": 0.0397, "step": 105000 }, { "epoch": 9.801176141136937, "eval_accuracy": 0.706977829638273, "eval_f1": 0.8311418563306402, "eval_loss": 0.11370216310024261, "eval_roc_auc": 0.9112670790279295, "eval_runtime": 277.843, "eval_samples_per_second": 154.224, "eval_steps_per_second": 154.224, "step": 105000 }, { "epoch": 9.810510594604686, "grad_norm": 2.0240061283111572, "learning_rate": 4.026465912279061e-06, "loss": 0.0415, "step": 105100 }, { "epoch": 9.819845048072436, "grad_norm": 0.7485975623130798, "learning_rate": 4.025530721032451e-06, "loss": 0.0401, "step": 105200 }, { "epoch": 9.829179501540185, "grad_norm": 0.7071278691291809, "learning_rate": 4.024595529785841e-06, "loss": 0.0408, "step": 105300 }, { "epoch": 9.838513955007935, "grad_norm": 2.503045082092285, "learning_rate": 4.023660338539231e-06, "loss": 0.0402, "step": 105400 }, { "epoch": 9.847848408475684, "grad_norm": 1.4843112230300903, "learning_rate": 4.022725147292621e-06, "loss": 0.044, "step": 105500 }, { "epoch": 9.857182861943434, "grad_norm": 2.6434226036071777, "learning_rate": 4.0217899560460114e-06, "loss": 0.0407, "step": 105600 }, { "epoch": 9.866517315411183, "grad_norm": 1.2272447347640991, "learning_rate": 4.0208547647994015e-06, "loss": 0.0391, "step": 105700 }, { "epoch": 9.875851768878933, "grad_norm": 2.515641689300537, "learning_rate": 4.0199195735527915e-06, "loss": 0.0415, "step": 105800 }, { "epoch": 9.885186222346682, "grad_norm": 2.2287826538085938, "learning_rate": 4.0189843823061815e-06, "loss": 0.0403, "step": 105900 }, { "epoch": 9.89452067581443, "grad_norm": 2.5706584453582764, "learning_rate": 4.018049191059572e-06, "loss": 0.0417, "step": 106000 }, { "epoch": 9.90385512928218, "grad_norm": 1.263587474822998, "learning_rate": 4.0171139998129624e-06, "loss": 0.0406, "step": 106100 }, { "epoch": 9.91318958274993, "grad_norm": 1.4389511346817017, "learning_rate": 4.0161788085663525e-06, "loss": 0.0391, "step": 106200 }, { "epoch": 9.922524036217679, "grad_norm": 1.3214988708496094, "learning_rate": 4.0152436173197425e-06, "loss": 0.0447, "step": 106300 }, { "epoch": 9.931858489685428, "grad_norm": 1.4993070363998413, "learning_rate": 4.0143084260731325e-06, "loss": 0.0416, "step": 106400 }, { "epoch": 9.941192943153178, "grad_norm": 0.6991490125656128, "learning_rate": 4.0133732348265226e-06, "loss": 0.04, "step": 106500 }, { "epoch": 9.950527396620927, "grad_norm": 0.8235797882080078, "learning_rate": 4.012438043579913e-06, "loss": 0.0391, "step": 106600 }, { "epoch": 9.959861850088677, "grad_norm": 1.1014268398284912, "learning_rate": 4.011502852333303e-06, "loss": 0.0444, "step": 106700 }, { "epoch": 9.969196303556426, "grad_norm": 2.0206925868988037, "learning_rate": 4.010567661086693e-06, "loss": 0.0431, "step": 106800 }, { "epoch": 9.978530757024176, "grad_norm": 1.9797160625457764, "learning_rate": 4.009632469840083e-06, "loss": 0.0402, "step": 106900 }, { "epoch": 9.987865210491925, "grad_norm": 0.832914412021637, "learning_rate": 4.008697278593473e-06, "loss": 0.0397, "step": 107000 }, { "epoch": 9.997199663959675, "grad_norm": 0.6225849390029907, "learning_rate": 4.007762087346863e-06, "loss": 0.0448, "step": 107100 }, { "epoch": 10.006534117427424, "grad_norm": 1.126889705657959, "learning_rate": 4.006826896100253e-06, "loss": 0.0365, "step": 107200 }, { "epoch": 10.015868570895174, "grad_norm": 1.7320517301559448, "learning_rate": 4.005891704853643e-06, "loss": 0.0353, "step": 107300 }, { "epoch": 10.025203024362924, "grad_norm": 1.0367671251296997, "learning_rate": 4.004956513607033e-06, "loss": 0.0379, "step": 107400 }, { "epoch": 10.034537477830673, "grad_norm": 3.001727819442749, "learning_rate": 4.004021322360423e-06, "loss": 0.036, "step": 107500 }, { "epoch": 10.043871931298423, "grad_norm": 2.7407331466674805, "learning_rate": 4.003086131113813e-06, "loss": 0.0375, "step": 107600 }, { "epoch": 10.053206384766172, "grad_norm": 3.2468318939208984, "learning_rate": 4.002150939867203e-06, "loss": 0.0375, "step": 107700 }, { "epoch": 10.062540838233922, "grad_norm": 1.336418628692627, "learning_rate": 4.001215748620593e-06, "loss": 0.0372, "step": 107800 }, { "epoch": 10.071875291701671, "grad_norm": 0.5754865407943726, "learning_rate": 4.000280557373983e-06, "loss": 0.035, "step": 107900 }, { "epoch": 10.08120974516942, "grad_norm": 2.061115026473999, "learning_rate": 3.999345366127374e-06, "loss": 0.0389, "step": 108000 }, { "epoch": 10.09054419863717, "grad_norm": 2.4892587661743164, "learning_rate": 3.998410174880764e-06, "loss": 0.0351, "step": 108100 }, { "epoch": 10.09987865210492, "grad_norm": 1.5056114196777344, "learning_rate": 3.997474983634154e-06, "loss": 0.0361, "step": 108200 }, { "epoch": 10.10921310557267, "grad_norm": 1.0827305316925049, "learning_rate": 3.996539792387544e-06, "loss": 0.0345, "step": 108300 }, { "epoch": 10.118547559040419, "grad_norm": 1.829848289489746, "learning_rate": 3.995604601140934e-06, "loss": 0.0372, "step": 108400 }, { "epoch": 10.127882012508168, "grad_norm": 1.8029826879501343, "learning_rate": 3.994669409894324e-06, "loss": 0.0373, "step": 108500 }, { "epoch": 10.137216465975918, "grad_norm": 0.8853600025177002, "learning_rate": 3.993734218647714e-06, "loss": 0.0376, "step": 108600 }, { "epoch": 10.146550919443667, "grad_norm": 0.5897056460380554, "learning_rate": 3.992799027401103e-06, "loss": 0.0347, "step": 108700 }, { "epoch": 10.155885372911417, "grad_norm": 0.8051505088806152, "learning_rate": 3.991863836154494e-06, "loss": 0.0339, "step": 108800 }, { "epoch": 10.165219826379165, "grad_norm": 0.9145634174346924, "learning_rate": 3.990928644907884e-06, "loss": 0.0354, "step": 108900 }, { "epoch": 10.174554279846914, "grad_norm": 1.0032274723052979, "learning_rate": 3.989993453661274e-06, "loss": 0.0316, "step": 109000 }, { "epoch": 10.183888733314664, "grad_norm": 1.6057904958724976, "learning_rate": 3.989058262414664e-06, "loss": 0.037, "step": 109100 }, { "epoch": 10.193223186782413, "grad_norm": 2.258253335952759, "learning_rate": 3.988123071168054e-06, "loss": 0.0347, "step": 109200 }, { "epoch": 10.202557640250163, "grad_norm": 2.8038644790649414, "learning_rate": 3.987187879921444e-06, "loss": 0.034, "step": 109300 }, { "epoch": 10.211892093717912, "grad_norm": 0.6052369475364685, "learning_rate": 3.986252688674834e-06, "loss": 0.037, "step": 109400 }, { "epoch": 10.221226547185662, "grad_norm": 1.7901508808135986, "learning_rate": 3.985317497428224e-06, "loss": 0.0342, "step": 109500 }, { "epoch": 10.230561000653411, "grad_norm": 1.5829893350601196, "learning_rate": 3.984382306181614e-06, "loss": 0.0371, "step": 109600 }, { "epoch": 10.23989545412116, "grad_norm": 3.26950740814209, "learning_rate": 3.983447114935004e-06, "loss": 0.0359, "step": 109700 }, { "epoch": 10.24922990758891, "grad_norm": 1.9242045879364014, "learning_rate": 3.982511923688394e-06, "loss": 0.0366, "step": 109800 }, { "epoch": 10.25856436105666, "grad_norm": 1.2684547901153564, "learning_rate": 3.9815767324417844e-06, "loss": 0.0374, "step": 109900 }, { "epoch": 10.26789881452441, "grad_norm": 1.9700772762298584, "learning_rate": 3.9806415411951745e-06, "loss": 0.0331, "step": 110000 }, { "epoch": 10.26789881452441, "eval_accuracy": 0.7083080513418903, "eval_f1": 0.8311013356510634, "eval_loss": 0.12188153713941574, "eval_roc_auc": 0.9085676573108453, "eval_runtime": 308.1494, "eval_samples_per_second": 139.056, "eval_steps_per_second": 139.056, "step": 110000 }, { "epoch": 10.277233267992159, "grad_norm": 2.5090808868408203, "learning_rate": 3.979706349948565e-06, "loss": 0.0338, "step": 110100 }, { "epoch": 10.286567721459908, "grad_norm": 1.1698442697525024, "learning_rate": 3.978771158701955e-06, "loss": 0.0411, "step": 110200 }, { "epoch": 10.295902174927658, "grad_norm": 1.6109659671783447, "learning_rate": 3.977835967455345e-06, "loss": 0.0398, "step": 110300 }, { "epoch": 10.305236628395408, "grad_norm": 0.9092515707015991, "learning_rate": 3.9769007762087354e-06, "loss": 0.0368, "step": 110400 }, { "epoch": 10.314571081863157, "grad_norm": 1.9212427139282227, "learning_rate": 3.975965584962125e-06, "loss": 0.0378, "step": 110500 }, { "epoch": 10.323905535330907, "grad_norm": 1.7711687088012695, "learning_rate": 3.975030393715515e-06, "loss": 0.0332, "step": 110600 }, { "epoch": 10.333239988798656, "grad_norm": 1.3874770402908325, "learning_rate": 3.974095202468905e-06, "loss": 0.0399, "step": 110700 }, { "epoch": 10.342574442266406, "grad_norm": 3.1139469146728516, "learning_rate": 3.973160011222295e-06, "loss": 0.0393, "step": 110800 }, { "epoch": 10.351908895734155, "grad_norm": 2.294222116470337, "learning_rate": 3.972224819975686e-06, "loss": 0.0335, "step": 110900 }, { "epoch": 10.361243349201905, "grad_norm": 1.127304196357727, "learning_rate": 3.971289628729076e-06, "loss": 0.0379, "step": 111000 }, { "epoch": 10.370577802669654, "grad_norm": 1.8752909898757935, "learning_rate": 3.970354437482466e-06, "loss": 0.0383, "step": 111100 }, { "epoch": 10.379912256137404, "grad_norm": 2.180906295776367, "learning_rate": 3.969419246235856e-06, "loss": 0.0387, "step": 111200 }, { "epoch": 10.389246709605153, "grad_norm": 2.625993013381958, "learning_rate": 3.968484054989246e-06, "loss": 0.0379, "step": 111300 }, { "epoch": 10.398581163072903, "grad_norm": 1.4526822566986084, "learning_rate": 3.967548863742636e-06, "loss": 0.039, "step": 111400 }, { "epoch": 10.407915616540652, "grad_norm": 0.9188318848609924, "learning_rate": 3.966613672496026e-06, "loss": 0.0364, "step": 111500 }, { "epoch": 10.417250070008402, "grad_norm": 1.70741868019104, "learning_rate": 3.965678481249416e-06, "loss": 0.0375, "step": 111600 }, { "epoch": 10.42658452347615, "grad_norm": 1.0865141153335571, "learning_rate": 3.964743290002806e-06, "loss": 0.0377, "step": 111700 }, { "epoch": 10.4359189769439, "grad_norm": 2.925647020339966, "learning_rate": 3.963808098756196e-06, "loss": 0.0381, "step": 111800 }, { "epoch": 10.445253430411649, "grad_norm": 2.4245376586914062, "learning_rate": 3.962872907509586e-06, "loss": 0.0363, "step": 111900 }, { "epoch": 10.454587883879398, "grad_norm": 0.19193170964717865, "learning_rate": 3.961937716262976e-06, "loss": 0.0391, "step": 112000 }, { "epoch": 10.463922337347148, "grad_norm": 1.2933944463729858, "learning_rate": 3.961002525016367e-06, "loss": 0.036, "step": 112100 }, { "epoch": 10.473256790814897, "grad_norm": 0.39182743430137634, "learning_rate": 3.960067333769757e-06, "loss": 0.0377, "step": 112200 }, { "epoch": 10.482591244282647, "grad_norm": 1.1527847051620483, "learning_rate": 3.959132142523146e-06, "loss": 0.037, "step": 112300 }, { "epoch": 10.491925697750396, "grad_norm": 0.6842463612556458, "learning_rate": 3.958196951276536e-06, "loss": 0.0339, "step": 112400 }, { "epoch": 10.501260151218146, "grad_norm": 1.2041200399398804, "learning_rate": 3.957261760029926e-06, "loss": 0.0354, "step": 112500 }, { "epoch": 10.510594604685895, "grad_norm": 1.1626628637313843, "learning_rate": 3.956326568783316e-06, "loss": 0.0362, "step": 112600 }, { "epoch": 10.519929058153645, "grad_norm": 0.6397420763969421, "learning_rate": 3.955391377536706e-06, "loss": 0.0339, "step": 112700 }, { "epoch": 10.529263511621394, "grad_norm": 1.2050179243087769, "learning_rate": 3.954456186290096e-06, "loss": 0.039, "step": 112800 }, { "epoch": 10.538597965089144, "grad_norm": 1.0102075338363647, "learning_rate": 3.953520995043487e-06, "loss": 0.041, "step": 112900 }, { "epoch": 10.547932418556893, "grad_norm": 3.9999985694885254, "learning_rate": 3.952585803796877e-06, "loss": 0.0361, "step": 113000 }, { "epoch": 10.557266872024643, "grad_norm": 2.5963854789733887, "learning_rate": 3.951650612550267e-06, "loss": 0.0373, "step": 113100 }, { "epoch": 10.566601325492393, "grad_norm": 2.1850786209106445, "learning_rate": 3.950715421303657e-06, "loss": 0.0384, "step": 113200 }, { "epoch": 10.575935778960142, "grad_norm": 1.5754753351211548, "learning_rate": 3.949780230057047e-06, "loss": 0.0361, "step": 113300 }, { "epoch": 10.585270232427892, "grad_norm": 1.0677639245986938, "learning_rate": 3.948845038810437e-06, "loss": 0.0371, "step": 113400 }, { "epoch": 10.594604685895641, "grad_norm": 1.2092610597610474, "learning_rate": 3.947909847563827e-06, "loss": 0.0362, "step": 113500 }, { "epoch": 10.60393913936339, "grad_norm": 2.4268436431884766, "learning_rate": 3.946974656317217e-06, "loss": 0.0366, "step": 113600 }, { "epoch": 10.61327359283114, "grad_norm": 0.9172078967094421, "learning_rate": 3.946039465070607e-06, "loss": 0.036, "step": 113700 }, { "epoch": 10.62260804629889, "grad_norm": 1.4127870798110962, "learning_rate": 3.945104273823997e-06, "loss": 0.0386, "step": 113800 }, { "epoch": 10.63194249976664, "grad_norm": 1.6420464515686035, "learning_rate": 3.944169082577387e-06, "loss": 0.0396, "step": 113900 }, { "epoch": 10.641276953234389, "grad_norm": 1.6468687057495117, "learning_rate": 3.943233891330777e-06, "loss": 0.0325, "step": 114000 }, { "epoch": 10.650611406702138, "grad_norm": 2.0539586544036865, "learning_rate": 3.942298700084167e-06, "loss": 0.0326, "step": 114100 }, { "epoch": 10.659945860169888, "grad_norm": 3.0246317386627197, "learning_rate": 3.9413635088375575e-06, "loss": 0.036, "step": 114200 }, { "epoch": 10.669280313637637, "grad_norm": 0.3392831087112427, "learning_rate": 3.9404283175909475e-06, "loss": 0.0361, "step": 114300 }, { "epoch": 10.678614767105387, "grad_norm": 1.4281671047210693, "learning_rate": 3.9394931263443375e-06, "loss": 0.038, "step": 114400 }, { "epoch": 10.687949220573135, "grad_norm": 0.5781307816505432, "learning_rate": 3.9385579350977275e-06, "loss": 0.0406, "step": 114500 }, { "epoch": 10.697283674040884, "grad_norm": 2.6627559661865234, "learning_rate": 3.9376227438511176e-06, "loss": 0.0399, "step": 114600 }, { "epoch": 10.706618127508634, "grad_norm": 1.876204252243042, "learning_rate": 3.936687552604508e-06, "loss": 0.0397, "step": 114700 }, { "epoch": 10.715952580976383, "grad_norm": 1.6831400394439697, "learning_rate": 3.935752361357898e-06, "loss": 0.0405, "step": 114800 }, { "epoch": 10.725287034444133, "grad_norm": 1.326921820640564, "learning_rate": 3.934817170111288e-06, "loss": 0.0337, "step": 114900 }, { "epoch": 10.734621487911882, "grad_norm": 0.8526946306228638, "learning_rate": 3.9338819788646785e-06, "loss": 0.0362, "step": 115000 }, { "epoch": 10.734621487911882, "eval_accuracy": 0.7094749124854143, "eval_f1": 0.8263896364876018, "eval_loss": 0.12359249591827393, "eval_roc_auc": 0.9019647484661355, "eval_runtime": 367.9774, "eval_samples_per_second": 116.447, "eval_steps_per_second": 116.447, "step": 115000 }, { "epoch": 10.743955941379632, "grad_norm": 2.6565158367156982, "learning_rate": 3.9329467876180686e-06, "loss": 0.0381, "step": 115100 }, { "epoch": 10.753290394847381, "grad_norm": 2.300997495651245, "learning_rate": 3.932011596371459e-06, "loss": 0.0369, "step": 115200 }, { "epoch": 10.76262484831513, "grad_norm": 1.772383213043213, "learning_rate": 3.931076405124849e-06, "loss": 0.0357, "step": 115300 }, { "epoch": 10.77195930178288, "grad_norm": 0.9058948755264282, "learning_rate": 3.930141213878239e-06, "loss": 0.0404, "step": 115400 }, { "epoch": 10.78129375525063, "grad_norm": 2.12406325340271, "learning_rate": 3.929206022631629e-06, "loss": 0.036, "step": 115500 }, { "epoch": 10.79062820871838, "grad_norm": 2.414334297180176, "learning_rate": 3.928270831385019e-06, "loss": 0.0397, "step": 115600 }, { "epoch": 10.799962662186129, "grad_norm": 4.067594051361084, "learning_rate": 3.927335640138409e-06, "loss": 0.0395, "step": 115700 }, { "epoch": 10.809297115653878, "grad_norm": 4.449220657348633, "learning_rate": 3.926400448891799e-06, "loss": 0.0409, "step": 115800 }, { "epoch": 10.818631569121628, "grad_norm": 0.15116126835346222, "learning_rate": 3.925465257645189e-06, "loss": 0.035, "step": 115900 }, { "epoch": 10.827966022589377, "grad_norm": 0.7203426957130432, "learning_rate": 3.924530066398579e-06, "loss": 0.0367, "step": 116000 }, { "epoch": 10.837300476057127, "grad_norm": 2.215205192565918, "learning_rate": 3.923594875151969e-06, "loss": 0.0421, "step": 116100 }, { "epoch": 10.846634929524877, "grad_norm": 7.958796977996826, "learning_rate": 3.922659683905359e-06, "loss": 0.0358, "step": 116200 }, { "epoch": 10.855969382992626, "grad_norm": 0.8771901726722717, "learning_rate": 3.921724492658749e-06, "loss": 0.0366, "step": 116300 }, { "epoch": 10.865303836460376, "grad_norm": 0.8935365676879883, "learning_rate": 3.920789301412139e-06, "loss": 0.0421, "step": 116400 }, { "epoch": 10.874638289928125, "grad_norm": 8.47152042388916, "learning_rate": 3.919854110165529e-06, "loss": 0.0426, "step": 116500 }, { "epoch": 10.883972743395875, "grad_norm": 2.1826014518737793, "learning_rate": 3.918918918918919e-06, "loss": 0.0373, "step": 116600 }, { "epoch": 10.893307196863624, "grad_norm": 3.744623899459839, "learning_rate": 3.917983727672309e-06, "loss": 0.0366, "step": 116700 }, { "epoch": 10.902641650331374, "grad_norm": 1.0529848337173462, "learning_rate": 3.917048536425699e-06, "loss": 0.0332, "step": 116800 }, { "epoch": 10.911976103799123, "grad_norm": 3.0694305896759033, "learning_rate": 3.916113345179089e-06, "loss": 0.0361, "step": 116900 }, { "epoch": 10.921310557266873, "grad_norm": 1.146734356880188, "learning_rate": 3.915178153932479e-06, "loss": 0.0383, "step": 117000 }, { "epoch": 10.930645010734622, "grad_norm": 1.3440457582473755, "learning_rate": 3.91424296268587e-06, "loss": 0.0349, "step": 117100 }, { "epoch": 10.939979464202372, "grad_norm": 2.8159146308898926, "learning_rate": 3.91330777143926e-06, "loss": 0.036, "step": 117200 }, { "epoch": 10.949313917670121, "grad_norm": 1.25785493850708, "learning_rate": 3.91237258019265e-06, "loss": 0.0373, "step": 117300 }, { "epoch": 10.958648371137869, "grad_norm": 1.7790802717208862, "learning_rate": 3.91143738894604e-06, "loss": 0.0353, "step": 117400 }, { "epoch": 10.967982824605619, "grad_norm": 1.7132749557495117, "learning_rate": 3.91050219769943e-06, "loss": 0.0357, "step": 117500 }, { "epoch": 10.977317278073368, "grad_norm": 2.4165821075439453, "learning_rate": 3.90956700645282e-06, "loss": 0.038, "step": 117600 }, { "epoch": 10.986651731541118, "grad_norm": 2.8933680057525635, "learning_rate": 3.90863181520621e-06, "loss": 0.037, "step": 117700 }, { "epoch": 10.995986185008867, "grad_norm": 2.1416046619415283, "learning_rate": 3.907696623959599e-06, "loss": 0.0362, "step": 117800 }, { "epoch": 11.005320638476617, "grad_norm": 2.848639965057373, "learning_rate": 3.90676143271299e-06, "loss": 0.0372, "step": 117900 }, { "epoch": 11.014655091944366, "grad_norm": 1.3614567518234253, "learning_rate": 3.90582624146638e-06, "loss": 0.0299, "step": 118000 }, { "epoch": 11.023989545412116, "grad_norm": 0.9328247904777527, "learning_rate": 3.90489105021977e-06, "loss": 0.0341, "step": 118100 }, { "epoch": 11.033323998879865, "grad_norm": 0.6602435111999512, "learning_rate": 3.90395585897316e-06, "loss": 0.0319, "step": 118200 }, { "epoch": 11.042658452347615, "grad_norm": 1.6617337465286255, "learning_rate": 3.90302066772655e-06, "loss": 0.0318, "step": 118300 }, { "epoch": 11.051992905815364, "grad_norm": 1.6842557191848755, "learning_rate": 3.9020854764799404e-06, "loss": 0.0321, "step": 118400 }, { "epoch": 11.061327359283114, "grad_norm": 3.088630437850952, "learning_rate": 3.9011502852333305e-06, "loss": 0.0286, "step": 118500 }, { "epoch": 11.070661812750863, "grad_norm": 2.8239288330078125, "learning_rate": 3.9002150939867205e-06, "loss": 0.0305, "step": 118600 }, { "epoch": 11.079996266218613, "grad_norm": 1.8181819915771484, "learning_rate": 3.8992799027401105e-06, "loss": 0.0295, "step": 118700 }, { "epoch": 11.089330719686362, "grad_norm": 1.5125356912612915, "learning_rate": 3.8983447114935006e-06, "loss": 0.0325, "step": 118800 }, { "epoch": 11.098665173154112, "grad_norm": 2.9955854415893555, "learning_rate": 3.897409520246891e-06, "loss": 0.032, "step": 118900 }, { "epoch": 11.107999626621861, "grad_norm": 1.6998971700668335, "learning_rate": 3.896474329000281e-06, "loss": 0.0349, "step": 119000 }, { "epoch": 11.117334080089611, "grad_norm": 1.94708251953125, "learning_rate": 3.8955391377536715e-06, "loss": 0.0324, "step": 119100 }, { "epoch": 11.12666853355736, "grad_norm": 2.34228515625, "learning_rate": 3.8946039465070615e-06, "loss": 0.0299, "step": 119200 }, { "epoch": 11.13600298702511, "grad_norm": 2.6231141090393066, "learning_rate": 3.8936687552604516e-06, "loss": 0.0303, "step": 119300 }, { "epoch": 11.14533744049286, "grad_norm": 3.2039599418640137, "learning_rate": 3.892733564013842e-06, "loss": 0.0338, "step": 119400 }, { "epoch": 11.154671893960609, "grad_norm": 1.8523154258728027, "learning_rate": 3.891798372767232e-06, "loss": 0.0324, "step": 119500 }, { "epoch": 11.164006347428359, "grad_norm": 1.4449597597122192, "learning_rate": 3.890863181520621e-06, "loss": 0.0299, "step": 119600 }, { "epoch": 11.173340800896108, "grad_norm": 2.8235390186309814, "learning_rate": 3.889927990274011e-06, "loss": 0.0338, "step": 119700 }, { "epoch": 11.182675254363858, "grad_norm": 5.438403606414795, "learning_rate": 3.888992799027401e-06, "loss": 0.0279, "step": 119800 }, { "epoch": 11.192009707831607, "grad_norm": 1.0441724061965942, "learning_rate": 3.888057607780792e-06, "loss": 0.0313, "step": 119900 }, { "epoch": 11.201344161299357, "grad_norm": 3.032236099243164, "learning_rate": 3.887122416534182e-06, "loss": 0.0344, "step": 120000 }, { "epoch": 11.201344161299357, "eval_accuracy": 0.7048074679113185, "eval_f1": 0.8280805070924174, "eval_loss": 0.13016606867313385, "eval_roc_auc": 0.9089340858743638, "eval_runtime": 442.2704, "eval_samples_per_second": 96.886, "eval_steps_per_second": 96.886, "step": 120000 }, { "epoch": 11.210678614767106, "grad_norm": 2.3758294582366943, "learning_rate": 3.886187225287572e-06, "loss": 0.0336, "step": 120100 }, { "epoch": 11.220013068234854, "grad_norm": 1.9052493572235107, "learning_rate": 3.885252034040962e-06, "loss": 0.0336, "step": 120200 }, { "epoch": 11.229347521702604, "grad_norm": 0.9044409394264221, "learning_rate": 3.884316842794352e-06, "loss": 0.0323, "step": 120300 }, { "epoch": 11.238681975170353, "grad_norm": 2.0466811656951904, "learning_rate": 3.883381651547742e-06, "loss": 0.0369, "step": 120400 }, { "epoch": 11.248016428638103, "grad_norm": 2.36255145072937, "learning_rate": 3.882446460301132e-06, "loss": 0.0312, "step": 120500 }, { "epoch": 11.257350882105852, "grad_norm": 0.5004875063896179, "learning_rate": 3.881511269054522e-06, "loss": 0.0342, "step": 120600 }, { "epoch": 11.266685335573602, "grad_norm": 2.3065147399902344, "learning_rate": 3.880576077807912e-06, "loss": 0.0339, "step": 120700 }, { "epoch": 11.276019789041351, "grad_norm": 1.4460052251815796, "learning_rate": 3.879640886561302e-06, "loss": 0.034, "step": 120800 }, { "epoch": 11.2853542425091, "grad_norm": 0.9856796264648438, "learning_rate": 3.878705695314692e-06, "loss": 0.0307, "step": 120900 }, { "epoch": 11.29468869597685, "grad_norm": 0.3333209455013275, "learning_rate": 3.877770504068082e-06, "loss": 0.0319, "step": 121000 }, { "epoch": 11.3040231494446, "grad_norm": 1.672767996788025, "learning_rate": 3.876835312821472e-06, "loss": 0.0308, "step": 121100 }, { "epoch": 11.31335760291235, "grad_norm": 1.6488512754440308, "learning_rate": 3.875900121574863e-06, "loss": 0.031, "step": 121200 }, { "epoch": 11.322692056380099, "grad_norm": 1.3934030532836914, "learning_rate": 3.874964930328253e-06, "loss": 0.0291, "step": 121300 }, { "epoch": 11.332026509847848, "grad_norm": 2.237295627593994, "learning_rate": 3.874029739081642e-06, "loss": 0.0348, "step": 121400 }, { "epoch": 11.341360963315598, "grad_norm": 2.345660448074341, "learning_rate": 3.873094547835032e-06, "loss": 0.0338, "step": 121500 }, { "epoch": 11.350695416783347, "grad_norm": 2.6133992671966553, "learning_rate": 3.872159356588422e-06, "loss": 0.0318, "step": 121600 }, { "epoch": 11.360029870251097, "grad_norm": 2.3904216289520264, "learning_rate": 3.871224165341812e-06, "loss": 0.035, "step": 121700 }, { "epoch": 11.369364323718846, "grad_norm": 1.894097924232483, "learning_rate": 3.870288974095202e-06, "loss": 0.031, "step": 121800 }, { "epoch": 11.378698777186596, "grad_norm": 1.5171622037887573, "learning_rate": 3.869353782848592e-06, "loss": 0.032, "step": 121900 }, { "epoch": 11.388033230654345, "grad_norm": 1.098861813545227, "learning_rate": 3.868418591601983e-06, "loss": 0.0296, "step": 122000 }, { "epoch": 11.397367684122095, "grad_norm": 1.7855114936828613, "learning_rate": 3.867483400355373e-06, "loss": 0.0324, "step": 122100 }, { "epoch": 11.406702137589845, "grad_norm": 0.8846161961555481, "learning_rate": 3.866548209108763e-06, "loss": 0.034, "step": 122200 }, { "epoch": 11.416036591057594, "grad_norm": 1.018062710762024, "learning_rate": 3.865613017862153e-06, "loss": 0.0319, "step": 122300 }, { "epoch": 11.425371044525344, "grad_norm": 2.8814899921417236, "learning_rate": 3.864677826615543e-06, "loss": 0.033, "step": 122400 }, { "epoch": 11.434705497993093, "grad_norm": 0.784453272819519, "learning_rate": 3.863742635368933e-06, "loss": 0.0336, "step": 122500 }, { "epoch": 11.444039951460843, "grad_norm": 2.4229576587677, "learning_rate": 3.862807444122323e-06, "loss": 0.0337, "step": 122600 }, { "epoch": 11.453374404928592, "grad_norm": 2.1612257957458496, "learning_rate": 3.8618722528757134e-06, "loss": 0.0334, "step": 122700 }, { "epoch": 11.462708858396342, "grad_norm": 2.7249927520751953, "learning_rate": 3.8609370616291035e-06, "loss": 0.0349, "step": 122800 }, { "epoch": 11.472043311864091, "grad_norm": 2.82106351852417, "learning_rate": 3.8600018703824935e-06, "loss": 0.0348, "step": 122900 }, { "epoch": 11.481377765331839, "grad_norm": 3.161189079284668, "learning_rate": 3.8590666791358835e-06, "loss": 0.034, "step": 123000 }, { "epoch": 11.490712218799588, "grad_norm": 1.5806331634521484, "learning_rate": 3.8581314878892736e-06, "loss": 0.0364, "step": 123100 }, { "epoch": 11.500046672267338, "grad_norm": 0.7249224781990051, "learning_rate": 3.857196296642664e-06, "loss": 0.0314, "step": 123200 }, { "epoch": 11.509381125735088, "grad_norm": 1.052594780921936, "learning_rate": 3.856261105396054e-06, "loss": 0.034, "step": 123300 }, { "epoch": 11.518715579202837, "grad_norm": 1.2321397066116333, "learning_rate": 3.855325914149444e-06, "loss": 0.0321, "step": 123400 }, { "epoch": 11.528050032670587, "grad_norm": 1.2260328531265259, "learning_rate": 3.854390722902834e-06, "loss": 0.0315, "step": 123500 }, { "epoch": 11.537384486138336, "grad_norm": 2.9174234867095947, "learning_rate": 3.853455531656224e-06, "loss": 0.0318, "step": 123600 }, { "epoch": 11.546718939606086, "grad_norm": 3.2269093990325928, "learning_rate": 3.852520340409614e-06, "loss": 0.0354, "step": 123700 }, { "epoch": 11.556053393073835, "grad_norm": 1.0306735038757324, "learning_rate": 3.851585149163004e-06, "loss": 0.0336, "step": 123800 }, { "epoch": 11.565387846541585, "grad_norm": 1.3264538049697876, "learning_rate": 3.850649957916394e-06, "loss": 0.0356, "step": 123900 }, { "epoch": 11.574722300009334, "grad_norm": 2.0072412490844727, "learning_rate": 3.849714766669785e-06, "loss": 0.0346, "step": 124000 }, { "epoch": 11.584056753477084, "grad_norm": 1.0714682340621948, "learning_rate": 3.848779575423175e-06, "loss": 0.0331, "step": 124100 }, { "epoch": 11.593391206944833, "grad_norm": 1.6354395151138306, "learning_rate": 3.847844384176565e-06, "loss": 0.0341, "step": 124200 }, { "epoch": 11.602725660412583, "grad_norm": 0.8709101676940918, "learning_rate": 3.846909192929955e-06, "loss": 0.0364, "step": 124300 }, { "epoch": 11.612060113880332, "grad_norm": 1.4203240871429443, "learning_rate": 3.845974001683345e-06, "loss": 0.0344, "step": 124400 }, { "epoch": 11.621394567348082, "grad_norm": 1.8724440336227417, "learning_rate": 3.845038810436735e-06, "loss": 0.0334, "step": 124500 }, { "epoch": 11.630729020815831, "grad_norm": 16.636337280273438, "learning_rate": 3.844103619190125e-06, "loss": 0.0356, "step": 124600 }, { "epoch": 11.640063474283581, "grad_norm": 1.971029281616211, "learning_rate": 3.843168427943515e-06, "loss": 0.036, "step": 124700 }, { "epoch": 11.64939792775133, "grad_norm": 1.0349078178405762, "learning_rate": 3.842233236696905e-06, "loss": 0.0294, "step": 124800 }, { "epoch": 11.65873238121908, "grad_norm": 1.9390078783035278, "learning_rate": 3.841298045450295e-06, "loss": 0.0353, "step": 124900 }, { "epoch": 11.66806683468683, "grad_norm": 1.5264805555343628, "learning_rate": 3.840362854203685e-06, "loss": 0.0305, "step": 125000 }, { "epoch": 11.66806683468683, "eval_accuracy": 0.702730455075846, "eval_f1": 0.8288877396281654, "eval_loss": 0.13252520561218262, "eval_roc_auc": 0.9113970231683121, "eval_runtime": 428.7936, "eval_samples_per_second": 99.932, "eval_steps_per_second": 99.932, "step": 125000 }, { "epoch": 11.677401288154579, "grad_norm": 1.8842053413391113, "learning_rate": 3.839427662957075e-06, "loss": 0.0341, "step": 125100 }, { "epoch": 11.686735741622329, "grad_norm": 2.1449780464172363, "learning_rate": 3.838492471710465e-06, "loss": 0.0327, "step": 125200 }, { "epoch": 11.696070195090078, "grad_norm": 3.187577486038208, "learning_rate": 3.837557280463855e-06, "loss": 0.0317, "step": 125300 }, { "epoch": 11.705404648557828, "grad_norm": 0.9446759819984436, "learning_rate": 3.836622089217245e-06, "loss": 0.0312, "step": 125400 }, { "epoch": 11.714739102025577, "grad_norm": 2.152543783187866, "learning_rate": 3.835686897970635e-06, "loss": 0.0333, "step": 125500 }, { "epoch": 11.724073555493327, "grad_norm": 1.3889483213424683, "learning_rate": 3.834751706724025e-06, "loss": 0.0346, "step": 125600 }, { "epoch": 11.733408008961074, "grad_norm": 2.2608466148376465, "learning_rate": 3.833816515477415e-06, "loss": 0.0331, "step": 125700 }, { "epoch": 11.742742462428826, "grad_norm": 1.4978859424591064, "learning_rate": 3.832881324230805e-06, "loss": 0.033, "step": 125800 }, { "epoch": 11.752076915896573, "grad_norm": 1.975625991821289, "learning_rate": 3.831946132984195e-06, "loss": 0.0327, "step": 125900 }, { "epoch": 11.761411369364323, "grad_norm": 1.6714463233947754, "learning_rate": 3.831010941737585e-06, "loss": 0.0351, "step": 126000 }, { "epoch": 11.770745822832072, "grad_norm": 1.3927501440048218, "learning_rate": 3.830075750490976e-06, "loss": 0.0303, "step": 126100 }, { "epoch": 11.780080276299822, "grad_norm": 1.5079671144485474, "learning_rate": 3.829140559244366e-06, "loss": 0.0388, "step": 126200 }, { "epoch": 11.789414729767572, "grad_norm": 2.665618658065796, "learning_rate": 3.828205367997756e-06, "loss": 0.0309, "step": 126300 }, { "epoch": 11.798749183235321, "grad_norm": 1.8315072059631348, "learning_rate": 3.827270176751146e-06, "loss": 0.0289, "step": 126400 }, { "epoch": 11.80808363670307, "grad_norm": 1.5068633556365967, "learning_rate": 3.826334985504536e-06, "loss": 0.0319, "step": 126500 }, { "epoch": 11.81741809017082, "grad_norm": 2.0744593143463135, "learning_rate": 3.825399794257926e-06, "loss": 0.0328, "step": 126600 }, { "epoch": 11.82675254363857, "grad_norm": 1.9753278493881226, "learning_rate": 3.824464603011316e-06, "loss": 0.0325, "step": 126700 }, { "epoch": 11.83608699710632, "grad_norm": 1.2491792440414429, "learning_rate": 3.8235294117647055e-06, "loss": 0.0334, "step": 126800 }, { "epoch": 11.845421450574069, "grad_norm": 0.3937609791755676, "learning_rate": 3.8225942205180964e-06, "loss": 0.0304, "step": 126900 }, { "epoch": 11.854755904041818, "grad_norm": 1.6056652069091797, "learning_rate": 3.8216590292714865e-06, "loss": 0.0367, "step": 127000 }, { "epoch": 11.864090357509568, "grad_norm": 2.3362789154052734, "learning_rate": 3.8207238380248765e-06, "loss": 0.0378, "step": 127100 }, { "epoch": 11.873424810977317, "grad_norm": 2.6009485721588135, "learning_rate": 3.8197886467782665e-06, "loss": 0.0353, "step": 127200 }, { "epoch": 11.882759264445067, "grad_norm": 2.8628594875335693, "learning_rate": 3.8188534555316565e-06, "loss": 0.0339, "step": 127300 }, { "epoch": 11.892093717912816, "grad_norm": 2.4228551387786865, "learning_rate": 3.817918264285047e-06, "loss": 0.0355, "step": 127400 }, { "epoch": 11.901428171380566, "grad_norm": 3.7704994678497314, "learning_rate": 3.816983073038437e-06, "loss": 0.0309, "step": 127500 }, { "epoch": 11.910762624848315, "grad_norm": 1.8874956369400024, "learning_rate": 3.816047881791827e-06, "loss": 0.0342, "step": 127600 }, { "epoch": 11.920097078316065, "grad_norm": 1.1339677572250366, "learning_rate": 3.815112690545217e-06, "loss": 0.0343, "step": 127700 }, { "epoch": 11.929431531783814, "grad_norm": 2.5600712299346924, "learning_rate": 3.814177499298607e-06, "loss": 0.0364, "step": 127800 }, { "epoch": 11.938765985251564, "grad_norm": 0.9636703729629517, "learning_rate": 3.813242308051997e-06, "loss": 0.0377, "step": 127900 }, { "epoch": 11.948100438719313, "grad_norm": 1.977637767791748, "learning_rate": 3.812307116805387e-06, "loss": 0.0344, "step": 128000 }, { "epoch": 11.957434892187063, "grad_norm": 0.736196756362915, "learning_rate": 3.8113719255587772e-06, "loss": 0.0333, "step": 128100 }, { "epoch": 11.966769345654813, "grad_norm": 2.256035566329956, "learning_rate": 3.8104367343121672e-06, "loss": 0.0397, "step": 128200 }, { "epoch": 11.976103799122562, "grad_norm": 4.687311172485352, "learning_rate": 3.8095015430655573e-06, "loss": 0.0325, "step": 128300 }, { "epoch": 11.985438252590312, "grad_norm": 1.1714528799057007, "learning_rate": 3.8085663518189473e-06, "loss": 0.0339, "step": 128400 }, { "epoch": 11.994772706058061, "grad_norm": 1.295044183731079, "learning_rate": 3.8076311605723378e-06, "loss": 0.0333, "step": 128500 }, { "epoch": 12.00410715952581, "grad_norm": 2.7508256435394287, "learning_rate": 3.8066959693257274e-06, "loss": 0.0335, "step": 128600 }, { "epoch": 12.013441612993558, "grad_norm": 2.039813995361328, "learning_rate": 3.8057607780791174e-06, "loss": 0.0308, "step": 128700 }, { "epoch": 12.022776066461308, "grad_norm": 3.4854979515075684, "learning_rate": 3.8048255868325074e-06, "loss": 0.0268, "step": 128800 }, { "epoch": 12.032110519929057, "grad_norm": 2.4273290634155273, "learning_rate": 3.8038903955858975e-06, "loss": 0.0269, "step": 128900 }, { "epoch": 12.041444973396807, "grad_norm": 0.6085324883460999, "learning_rate": 3.8029552043392875e-06, "loss": 0.0269, "step": 129000 }, { "epoch": 12.050779426864556, "grad_norm": 2.853523015975952, "learning_rate": 3.8020200130926775e-06, "loss": 0.0306, "step": 129100 }, { "epoch": 12.060113880332306, "grad_norm": 3.2789723873138428, "learning_rate": 3.8010848218460676e-06, "loss": 0.0292, "step": 129200 }, { "epoch": 12.069448333800056, "grad_norm": 1.3152612447738647, "learning_rate": 3.800149630599458e-06, "loss": 0.0306, "step": 129300 }, { "epoch": 12.078782787267805, "grad_norm": 2.372352361679077, "learning_rate": 3.799214439352848e-06, "loss": 0.0289, "step": 129400 }, { "epoch": 12.088117240735555, "grad_norm": 1.4397834539413452, "learning_rate": 3.798279248106238e-06, "loss": 0.0297, "step": 129500 }, { "epoch": 12.097451694203304, "grad_norm": 1.458917498588562, "learning_rate": 3.797344056859628e-06, "loss": 0.0267, "step": 129600 }, { "epoch": 12.106786147671054, "grad_norm": 1.6294349431991577, "learning_rate": 3.796408865613018e-06, "loss": 0.0288, "step": 129700 }, { "epoch": 12.116120601138803, "grad_norm": 2.0647764205932617, "learning_rate": 3.795473674366408e-06, "loss": 0.0302, "step": 129800 }, { "epoch": 12.125455054606553, "grad_norm": 1.8845385313034058, "learning_rate": 3.7945384831197986e-06, "loss": 0.0336, "step": 129900 }, { "epoch": 12.134789508074302, "grad_norm": 3.3060567378997803, "learning_rate": 3.7936032918731886e-06, "loss": 0.0298, "step": 130000 }, { "epoch": 12.134789508074302, "eval_accuracy": 0.7007001166861143, "eval_f1": 0.8261508799422469, "eval_loss": 0.13617804646492004, "eval_roc_auc": 0.9083118956787161, "eval_runtime": 350.6529, "eval_samples_per_second": 122.201, "eval_steps_per_second": 122.201, "step": 130000 }, { "epoch": 12.144123961542052, "grad_norm": 1.8479039669036865, "learning_rate": 3.7926681006265787e-06, "loss": 0.0299, "step": 130100 }, { "epoch": 12.153458415009801, "grad_norm": 2.1098270416259766, "learning_rate": 3.7917329093799687e-06, "loss": 0.0256, "step": 130200 }, { "epoch": 12.16279286847755, "grad_norm": 2.346247673034668, "learning_rate": 3.7907977181333587e-06, "loss": 0.0286, "step": 130300 }, { "epoch": 12.1721273219453, "grad_norm": 2.276707887649536, "learning_rate": 3.7898625268867483e-06, "loss": 0.028, "step": 130400 }, { "epoch": 12.18146177541305, "grad_norm": 1.498591661453247, "learning_rate": 3.7889273356401384e-06, "loss": 0.0269, "step": 130500 }, { "epoch": 12.1907962288808, "grad_norm": 0.7825024127960205, "learning_rate": 3.7879921443935284e-06, "loss": 0.0266, "step": 130600 }, { "epoch": 12.200130682348549, "grad_norm": 2.114832878112793, "learning_rate": 3.787056953146919e-06, "loss": 0.0262, "step": 130700 }, { "epoch": 12.209465135816298, "grad_norm": 0.6117820739746094, "learning_rate": 3.786121761900309e-06, "loss": 0.0264, "step": 130800 }, { "epoch": 12.218799589284048, "grad_norm": 1.6221612691879272, "learning_rate": 3.785186570653699e-06, "loss": 0.032, "step": 130900 }, { "epoch": 12.228134042751797, "grad_norm": 1.1813958883285522, "learning_rate": 3.784251379407089e-06, "loss": 0.0289, "step": 131000 }, { "epoch": 12.237468496219547, "grad_norm": 0.008644762448966503, "learning_rate": 3.783316188160479e-06, "loss": 0.0289, "step": 131100 }, { "epoch": 12.246802949687297, "grad_norm": 1.0623973608016968, "learning_rate": 3.782380996913869e-06, "loss": 0.0262, "step": 131200 }, { "epoch": 12.256137403155046, "grad_norm": 1.6030511856079102, "learning_rate": 3.7814458056672595e-06, "loss": 0.0287, "step": 131300 }, { "epoch": 12.265471856622796, "grad_norm": 0.6374735832214355, "learning_rate": 3.7805106144206495e-06, "loss": 0.0278, "step": 131400 }, { "epoch": 12.274806310090543, "grad_norm": 0.8452407121658325, "learning_rate": 3.7795754231740395e-06, "loss": 0.0279, "step": 131500 }, { "epoch": 12.284140763558293, "grad_norm": 0.8691614866256714, "learning_rate": 3.7786402319274296e-06, "loss": 0.0297, "step": 131600 }, { "epoch": 12.293475217026042, "grad_norm": 0.6970386505126953, "learning_rate": 3.7777050406808196e-06, "loss": 0.0275, "step": 131700 }, { "epoch": 12.302809670493792, "grad_norm": 2.785088539123535, "learning_rate": 3.7767698494342096e-06, "loss": 0.0361, "step": 131800 }, { "epoch": 12.312144123961541, "grad_norm": 0.33980661630630493, "learning_rate": 3.7758346581876e-06, "loss": 0.0269, "step": 131900 }, { "epoch": 12.321478577429291, "grad_norm": 0.8593186736106873, "learning_rate": 3.77489946694099e-06, "loss": 0.0294, "step": 132000 }, { "epoch": 12.33081303089704, "grad_norm": 0.5531963109970093, "learning_rate": 3.77396427569438e-06, "loss": 0.0289, "step": 132100 }, { "epoch": 12.34014748436479, "grad_norm": 1.957345724105835, "learning_rate": 3.7730290844477697e-06, "loss": 0.0246, "step": 132200 }, { "epoch": 12.34948193783254, "grad_norm": 1.5563161373138428, "learning_rate": 3.7720938932011598e-06, "loss": 0.0311, "step": 132300 }, { "epoch": 12.358816391300289, "grad_norm": 1.1131196022033691, "learning_rate": 3.77115870195455e-06, "loss": 0.0289, "step": 132400 }, { "epoch": 12.368150844768039, "grad_norm": 1.4881969690322876, "learning_rate": 3.77022351070794e-06, "loss": 0.0302, "step": 132500 }, { "epoch": 12.377485298235788, "grad_norm": 2.615744113922119, "learning_rate": 3.76928831946133e-06, "loss": 0.0311, "step": 132600 }, { "epoch": 12.386819751703538, "grad_norm": 1.7677770853042603, "learning_rate": 3.76835312821472e-06, "loss": 0.0291, "step": 132700 }, { "epoch": 12.396154205171287, "grad_norm": 2.135699510574341, "learning_rate": 3.7674179369681104e-06, "loss": 0.0306, "step": 132800 }, { "epoch": 12.405488658639037, "grad_norm": 1.4975916147232056, "learning_rate": 3.7664827457215004e-06, "loss": 0.0304, "step": 132900 }, { "epoch": 12.414823112106786, "grad_norm": 1.8848680257797241, "learning_rate": 3.7655475544748904e-06, "loss": 0.029, "step": 133000 }, { "epoch": 12.424157565574536, "grad_norm": 1.8306351900100708, "learning_rate": 3.7646123632282804e-06, "loss": 0.0328, "step": 133100 }, { "epoch": 12.433492019042285, "grad_norm": 2.982465982437134, "learning_rate": 3.7636771719816705e-06, "loss": 0.0266, "step": 133200 }, { "epoch": 12.442826472510035, "grad_norm": 1.7595432996749878, "learning_rate": 3.7627419807350605e-06, "loss": 0.0306, "step": 133300 }, { "epoch": 12.452160925977784, "grad_norm": 1.233398199081421, "learning_rate": 3.761806789488451e-06, "loss": 0.0298, "step": 133400 }, { "epoch": 12.461495379445534, "grad_norm": 0.967143177986145, "learning_rate": 3.760871598241841e-06, "loss": 0.0291, "step": 133500 }, { "epoch": 12.470829832913283, "grad_norm": 2.325838565826416, "learning_rate": 3.759936406995231e-06, "loss": 0.0289, "step": 133600 }, { "epoch": 12.480164286381033, "grad_norm": 0.8071816563606262, "learning_rate": 3.759001215748621e-06, "loss": 0.0313, "step": 133700 }, { "epoch": 12.489498739848782, "grad_norm": 1.1629141569137573, "learning_rate": 3.758066024502011e-06, "loss": 0.0291, "step": 133800 }, { "epoch": 12.498833193316532, "grad_norm": 0.6949602961540222, "learning_rate": 3.757130833255401e-06, "loss": 0.0319, "step": 133900 }, { "epoch": 12.508167646784282, "grad_norm": 1.7146610021591187, "learning_rate": 3.7561956420087907e-06, "loss": 0.0282, "step": 134000 }, { "epoch": 12.517502100252031, "grad_norm": 0.5988059639930725, "learning_rate": 3.7552604507621807e-06, "loss": 0.0282, "step": 134100 }, { "epoch": 12.526836553719779, "grad_norm": 1.440969705581665, "learning_rate": 3.754325259515571e-06, "loss": 0.0301, "step": 134200 }, { "epoch": 12.53617100718753, "grad_norm": 2.3079819679260254, "learning_rate": 3.7533900682689612e-06, "loss": 0.0295, "step": 134300 }, { "epoch": 12.545505460655278, "grad_norm": 1.0699312686920166, "learning_rate": 3.7524548770223513e-06, "loss": 0.0269, "step": 134400 }, { "epoch": 12.554839914123027, "grad_norm": 0.9553667306900024, "learning_rate": 3.7515196857757413e-06, "loss": 0.0311, "step": 134500 }, { "epoch": 12.564174367590777, "grad_norm": 2.741276741027832, "learning_rate": 3.7505844945291313e-06, "loss": 0.0304, "step": 134600 }, { "epoch": 12.573508821058526, "grad_norm": 2.377042531967163, "learning_rate": 3.7496493032825214e-06, "loss": 0.0311, "step": 134700 }, { "epoch": 12.582843274526276, "grad_norm": 0.478151798248291, "learning_rate": 3.748714112035912e-06, "loss": 0.0315, "step": 134800 }, { "epoch": 12.592177727994025, "grad_norm": 2.5878026485443115, "learning_rate": 3.747778920789302e-06, "loss": 0.0312, "step": 134900 }, { "epoch": 12.601512181461775, "grad_norm": 1.8835389614105225, "learning_rate": 3.746843729542692e-06, "loss": 0.0297, "step": 135000 }, { "epoch": 12.601512181461775, "eval_accuracy": 0.7049008168028005, "eval_f1": 0.8299350723317006, "eval_loss": 0.13892559707164764, "eval_roc_auc": 0.9121325235618258, "eval_runtime": 292.2572, "eval_samples_per_second": 146.617, "eval_steps_per_second": 146.617, "step": 135000 }, { "epoch": 12.610846634929525, "grad_norm": 0.5830561518669128, "learning_rate": 3.745908538296082e-06, "loss": 0.0315, "step": 135100 }, { "epoch": 12.620181088397274, "grad_norm": 2.0670907497406006, "learning_rate": 3.744973347049472e-06, "loss": 0.0309, "step": 135200 }, { "epoch": 12.629515541865024, "grad_norm": 2.077833652496338, "learning_rate": 3.744038155802862e-06, "loss": 0.0298, "step": 135300 }, { "epoch": 12.638849995332773, "grad_norm": 2.722602605819702, "learning_rate": 3.7431029645562524e-06, "loss": 0.0291, "step": 135400 }, { "epoch": 12.648184448800523, "grad_norm": 1.6959154605865479, "learning_rate": 3.7421677733096424e-06, "loss": 0.0277, "step": 135500 }, { "epoch": 12.657518902268272, "grad_norm": 3.280757427215576, "learning_rate": 3.7412325820630325e-06, "loss": 0.0275, "step": 135600 }, { "epoch": 12.666853355736022, "grad_norm": 1.6305515766143799, "learning_rate": 3.7402973908164225e-06, "loss": 0.0254, "step": 135700 }, { "epoch": 12.676187809203771, "grad_norm": 0.8566631078720093, "learning_rate": 3.739362199569812e-06, "loss": 0.0291, "step": 135800 }, { "epoch": 12.68552226267152, "grad_norm": 3.17173433303833, "learning_rate": 3.738427008323202e-06, "loss": 0.0277, "step": 135900 }, { "epoch": 12.69485671613927, "grad_norm": 1.6550495624542236, "learning_rate": 3.737491817076592e-06, "loss": 0.0308, "step": 136000 }, { "epoch": 12.70419116960702, "grad_norm": 3.4326391220092773, "learning_rate": 3.736556625829982e-06, "loss": 0.03, "step": 136100 }, { "epoch": 12.71352562307477, "grad_norm": 0.8628426194190979, "learning_rate": 3.7356214345833727e-06, "loss": 0.03, "step": 136200 }, { "epoch": 12.722860076542519, "grad_norm": 3.0451571941375732, "learning_rate": 3.7346862433367627e-06, "loss": 0.0264, "step": 136300 }, { "epoch": 12.732194530010268, "grad_norm": 1.93718683719635, "learning_rate": 3.7337510520901527e-06, "loss": 0.03, "step": 136400 }, { "epoch": 12.741528983478018, "grad_norm": 2.549499273300171, "learning_rate": 3.7328158608435428e-06, "loss": 0.031, "step": 136500 }, { "epoch": 12.750863436945767, "grad_norm": 1.822261929512024, "learning_rate": 3.7318806695969328e-06, "loss": 0.0293, "step": 136600 }, { "epoch": 12.760197890413517, "grad_norm": 0.716874897480011, "learning_rate": 3.730945478350323e-06, "loss": 0.0284, "step": 136700 }, { "epoch": 12.769532343881266, "grad_norm": 0.9337921738624573, "learning_rate": 3.730010287103713e-06, "loss": 0.0294, "step": 136800 }, { "epoch": 12.778866797349016, "grad_norm": 2.5959115028381348, "learning_rate": 3.7290750958571033e-06, "loss": 0.0302, "step": 136900 }, { "epoch": 12.788201250816766, "grad_norm": 0.6116209626197815, "learning_rate": 3.7281399046104933e-06, "loss": 0.0252, "step": 137000 }, { "epoch": 12.797535704284513, "grad_norm": 1.9206653833389282, "learning_rate": 3.7272047133638834e-06, "loss": 0.031, "step": 137100 }, { "epoch": 12.806870157752263, "grad_norm": 1.7202324867248535, "learning_rate": 3.7262695221172734e-06, "loss": 0.0278, "step": 137200 }, { "epoch": 12.816204611220012, "grad_norm": 2.6364500522613525, "learning_rate": 3.7253343308706634e-06, "loss": 0.027, "step": 137300 }, { "epoch": 12.825539064687762, "grad_norm": 1.149749994277954, "learning_rate": 3.7243991396240535e-06, "loss": 0.0296, "step": 137400 }, { "epoch": 12.834873518155511, "grad_norm": 2.015876293182373, "learning_rate": 3.723463948377444e-06, "loss": 0.0276, "step": 137500 }, { "epoch": 12.844207971623261, "grad_norm": 1.230569839477539, "learning_rate": 3.722528757130834e-06, "loss": 0.0304, "step": 137600 }, { "epoch": 12.85354242509101, "grad_norm": 1.033018708229065, "learning_rate": 3.7215935658842235e-06, "loss": 0.0308, "step": 137700 }, { "epoch": 12.86287687855876, "grad_norm": 1.90570068359375, "learning_rate": 3.7206583746376136e-06, "loss": 0.0289, "step": 137800 }, { "epoch": 12.87221133202651, "grad_norm": 0.9730318188667297, "learning_rate": 3.7197231833910036e-06, "loss": 0.0297, "step": 137900 }, { "epoch": 12.881545785494259, "grad_norm": 1.4880870580673218, "learning_rate": 3.7187879921443936e-06, "loss": 0.0315, "step": 138000 }, { "epoch": 12.890880238962009, "grad_norm": 0.17818982899188995, "learning_rate": 3.7178528008977837e-06, "loss": 0.0311, "step": 138100 }, { "epoch": 12.900214692429758, "grad_norm": 1.7374799251556396, "learning_rate": 3.7169176096511737e-06, "loss": 0.0312, "step": 138200 }, { "epoch": 12.909549145897508, "grad_norm": 2.2358474731445312, "learning_rate": 3.715982418404564e-06, "loss": 0.0288, "step": 138300 }, { "epoch": 12.918883599365257, "grad_norm": 2.2579333782196045, "learning_rate": 3.715047227157954e-06, "loss": 0.0289, "step": 138400 }, { "epoch": 12.928218052833007, "grad_norm": 2.4947078227996826, "learning_rate": 3.7141120359113442e-06, "loss": 0.0323, "step": 138500 }, { "epoch": 12.937552506300756, "grad_norm": 0.7782678008079529, "learning_rate": 3.7131768446647342e-06, "loss": 0.028, "step": 138600 }, { "epoch": 12.946886959768506, "grad_norm": 0.4673269987106323, "learning_rate": 3.7122416534181243e-06, "loss": 0.0301, "step": 138700 }, { "epoch": 12.956221413236255, "grad_norm": 1.166088342666626, "learning_rate": 3.7113064621715143e-06, "loss": 0.0309, "step": 138800 }, { "epoch": 12.965555866704005, "grad_norm": 2.0393917560577393, "learning_rate": 3.7103712709249048e-06, "loss": 0.0323, "step": 138900 }, { "epoch": 12.974890320171754, "grad_norm": 2.6429049968719482, "learning_rate": 3.709436079678295e-06, "loss": 0.0318, "step": 139000 }, { "epoch": 12.984224773639504, "grad_norm": 0.5561652779579163, "learning_rate": 3.708500888431685e-06, "loss": 0.0291, "step": 139100 }, { "epoch": 12.993559227107253, "grad_norm": 1.0141006708145142, "learning_rate": 3.707565697185075e-06, "loss": 0.0319, "step": 139200 }, { "epoch": 13.002893680575003, "grad_norm": 2.246272087097168, "learning_rate": 3.706630505938465e-06, "loss": 0.0298, "step": 139300 }, { "epoch": 13.012228134042752, "grad_norm": 1.5770875215530396, "learning_rate": 3.705695314691855e-06, "loss": 0.0261, "step": 139400 }, { "epoch": 13.021562587510502, "grad_norm": 1.9068331718444824, "learning_rate": 3.7047601234452445e-06, "loss": 0.0266, "step": 139500 }, { "epoch": 13.030897040978251, "grad_norm": 2.533865451812744, "learning_rate": 3.7038249321986346e-06, "loss": 0.0262, "step": 139600 }, { "epoch": 13.040231494446001, "grad_norm": 1.3406758308410645, "learning_rate": 3.702889740952025e-06, "loss": 0.0274, "step": 139700 }, { "epoch": 13.04956594791375, "grad_norm": 1.3135144710540771, "learning_rate": 3.701954549705415e-06, "loss": 0.0263, "step": 139800 }, { "epoch": 13.0589004013815, "grad_norm": 0.2728913426399231, "learning_rate": 3.701019358458805e-06, "loss": 0.0213, "step": 139900 }, { "epoch": 13.068234854849248, "grad_norm": 5.546918869018555, "learning_rate": 3.700084167212195e-06, "loss": 0.024, "step": 140000 }, { "epoch": 13.068234854849248, "eval_accuracy": 0.7006301050175029, "eval_f1": 0.8282168403217496, "eval_loss": 0.1498192548751831, "eval_roc_auc": 0.9122892392778509, "eval_runtime": 245.4462, "eval_samples_per_second": 174.58, "eval_steps_per_second": 174.58, "step": 140000 }, { "epoch": 13.077569308316997, "grad_norm": 0.5448386669158936, "learning_rate": 3.699148975965585e-06, "loss": 0.0242, "step": 140100 }, { "epoch": 13.086903761784747, "grad_norm": 1.4900062084197998, "learning_rate": 3.698213784718975e-06, "loss": 0.0283, "step": 140200 }, { "epoch": 13.096238215252496, "grad_norm": 1.5443243980407715, "learning_rate": 3.697278593472365e-06, "loss": 0.0256, "step": 140300 }, { "epoch": 13.105572668720246, "grad_norm": 0.5456923842430115, "learning_rate": 3.6963434022257556e-06, "loss": 0.0264, "step": 140400 }, { "epoch": 13.114907122187995, "grad_norm": 1.9743298292160034, "learning_rate": 3.6954082109791457e-06, "loss": 0.0273, "step": 140500 }, { "epoch": 13.124241575655745, "grad_norm": 2.135833740234375, "learning_rate": 3.6944730197325357e-06, "loss": 0.0244, "step": 140600 }, { "epoch": 13.133576029123494, "grad_norm": 1.5825005769729614, "learning_rate": 3.6935378284859257e-06, "loss": 0.0282, "step": 140700 }, { "epoch": 13.142910482591244, "grad_norm": 1.8259848356246948, "learning_rate": 3.6926026372393158e-06, "loss": 0.0255, "step": 140800 }, { "epoch": 13.152244936058993, "grad_norm": 2.803631544113159, "learning_rate": 3.691667445992706e-06, "loss": 0.0247, "step": 140900 }, { "epoch": 13.161579389526743, "grad_norm": 0.678063154220581, "learning_rate": 3.6907322547460963e-06, "loss": 0.0283, "step": 141000 }, { "epoch": 13.170913842994493, "grad_norm": 3.078503370285034, "learning_rate": 3.6897970634994863e-06, "loss": 0.0246, "step": 141100 }, { "epoch": 13.180248296462242, "grad_norm": 1.9334124326705933, "learning_rate": 3.6888618722528763e-06, "loss": 0.0264, "step": 141200 }, { "epoch": 13.189582749929992, "grad_norm": 1.7534503936767578, "learning_rate": 3.687926681006266e-06, "loss": 0.0254, "step": 141300 }, { "epoch": 13.198917203397741, "grad_norm": 1.96674644947052, "learning_rate": 3.686991489759656e-06, "loss": 0.0238, "step": 141400 }, { "epoch": 13.20825165686549, "grad_norm": 1.379254937171936, "learning_rate": 3.686056298513046e-06, "loss": 0.0286, "step": 141500 }, { "epoch": 13.21758611033324, "grad_norm": 1.2166365385055542, "learning_rate": 3.685121107266436e-06, "loss": 0.0209, "step": 141600 }, { "epoch": 13.22692056380099, "grad_norm": 2.1895065307617188, "learning_rate": 3.684185916019826e-06, "loss": 0.0247, "step": 141700 }, { "epoch": 13.23625501726874, "grad_norm": 2.4768285751342773, "learning_rate": 3.6832507247732165e-06, "loss": 0.0237, "step": 141800 }, { "epoch": 13.245589470736489, "grad_norm": 0.843777596950531, "learning_rate": 3.6823155335266065e-06, "loss": 0.0242, "step": 141900 }, { "epoch": 13.254923924204238, "grad_norm": 1.9806196689605713, "learning_rate": 3.6813803422799966e-06, "loss": 0.028, "step": 142000 }, { "epoch": 13.264258377671988, "grad_norm": 2.1698334217071533, "learning_rate": 3.6804451510333866e-06, "loss": 0.0248, "step": 142100 }, { "epoch": 13.273592831139737, "grad_norm": 2.7050039768218994, "learning_rate": 3.6795099597867766e-06, "loss": 0.0286, "step": 142200 }, { "epoch": 13.282927284607487, "grad_norm": 0.8367059230804443, "learning_rate": 3.6785747685401667e-06, "loss": 0.026, "step": 142300 }, { "epoch": 13.292261738075236, "grad_norm": 2.2572450637817383, "learning_rate": 3.677639577293557e-06, "loss": 0.0263, "step": 142400 }, { "epoch": 13.301596191542986, "grad_norm": 0.1467469483613968, "learning_rate": 3.676704386046947e-06, "loss": 0.0259, "step": 142500 }, { "epoch": 13.310930645010735, "grad_norm": 2.127816677093506, "learning_rate": 3.675769194800337e-06, "loss": 0.0237, "step": 142600 }, { "epoch": 13.320265098478485, "grad_norm": 1.8887851238250732, "learning_rate": 3.674834003553727e-06, "loss": 0.0236, "step": 142700 }, { "epoch": 13.329599551946233, "grad_norm": 2.2585413455963135, "learning_rate": 3.6738988123071172e-06, "loss": 0.0251, "step": 142800 }, { "epoch": 13.338934005413982, "grad_norm": 2.6057920455932617, "learning_rate": 3.6729636210605073e-06, "loss": 0.0266, "step": 142900 }, { "epoch": 13.348268458881732, "grad_norm": 1.9435811042785645, "learning_rate": 3.6720284298138977e-06, "loss": 0.0283, "step": 143000 }, { "epoch": 13.357602912349481, "grad_norm": 3.2301342487335205, "learning_rate": 3.671093238567287e-06, "loss": 0.0235, "step": 143100 }, { "epoch": 13.36693736581723, "grad_norm": 1.8052853345870972, "learning_rate": 3.6701580473206773e-06, "loss": 0.0257, "step": 143200 }, { "epoch": 13.37627181928498, "grad_norm": 1.22776198387146, "learning_rate": 3.6692228560740674e-06, "loss": 0.0279, "step": 143300 }, { "epoch": 13.38560627275273, "grad_norm": 6.619986057281494, "learning_rate": 3.6682876648274574e-06, "loss": 0.0323, "step": 143400 }, { "epoch": 13.39494072622048, "grad_norm": 2.218353509902954, "learning_rate": 3.6673524735808474e-06, "loss": 0.0254, "step": 143500 }, { "epoch": 13.404275179688229, "grad_norm": 3.1545801162719727, "learning_rate": 3.6664172823342375e-06, "loss": 0.0231, "step": 143600 }, { "epoch": 13.413609633155978, "grad_norm": 1.5052598714828491, "learning_rate": 3.6654820910876275e-06, "loss": 0.025, "step": 143700 }, { "epoch": 13.422944086623728, "grad_norm": 3.9086647033691406, "learning_rate": 3.664546899841018e-06, "loss": 0.0275, "step": 143800 }, { "epoch": 13.432278540091477, "grad_norm": 2.2325780391693115, "learning_rate": 3.663611708594408e-06, "loss": 0.0223, "step": 143900 }, { "epoch": 13.441612993559227, "grad_norm": 0.29537492990493774, "learning_rate": 3.662676517347798e-06, "loss": 0.0272, "step": 144000 }, { "epoch": 13.450947447026977, "grad_norm": 1.6791108846664429, "learning_rate": 3.661741326101188e-06, "loss": 0.0262, "step": 144100 }, { "epoch": 13.460281900494726, "grad_norm": 0.6349210143089294, "learning_rate": 3.660806134854578e-06, "loss": 0.0243, "step": 144200 }, { "epoch": 13.469616353962476, "grad_norm": 2.848510503768921, "learning_rate": 3.659870943607968e-06, "loss": 0.0305, "step": 144300 }, { "epoch": 13.478950807430225, "grad_norm": 2.8994688987731934, "learning_rate": 3.658935752361358e-06, "loss": 0.0273, "step": 144400 }, { "epoch": 13.488285260897975, "grad_norm": 3.36873459815979, "learning_rate": 3.6580005611147486e-06, "loss": 0.0249, "step": 144500 }, { "epoch": 13.497619714365724, "grad_norm": 2.346935272216797, "learning_rate": 3.6570653698681386e-06, "loss": 0.0238, "step": 144600 }, { "epoch": 13.506954167833474, "grad_norm": 1.200295329093933, "learning_rate": 3.6561301786215287e-06, "loss": 0.0233, "step": 144700 }, { "epoch": 13.516288621301223, "grad_norm": 1.1735639572143555, "learning_rate": 3.6551949873749187e-06, "loss": 0.0267, "step": 144800 }, { "epoch": 13.525623074768973, "grad_norm": 1.5973484516143799, "learning_rate": 3.6542597961283083e-06, "loss": 0.0283, "step": 144900 }, { "epoch": 13.534957528236722, "grad_norm": 1.7752182483673096, "learning_rate": 3.6533246048816983e-06, "loss": 0.0224, "step": 145000 }, { "epoch": 13.534957528236722, "eval_accuracy": 0.6990431738623104, "eval_f1": 0.822179664816302, "eval_loss": 0.14965105056762695, "eval_roc_auc": 0.9052077681607342, "eval_runtime": 237.081, "eval_samples_per_second": 180.74, "eval_steps_per_second": 180.74, "step": 145000 }, { "epoch": 13.544291981704472, "grad_norm": 0.9074592590332031, "learning_rate": 3.6523894136350884e-06, "loss": 0.0254, "step": 145100 }, { "epoch": 13.553626435172221, "grad_norm": 3.418098211288452, "learning_rate": 3.6514542223884784e-06, "loss": 0.0247, "step": 145200 }, { "epoch": 13.56296088863997, "grad_norm": 2.498141288757324, "learning_rate": 3.650519031141869e-06, "loss": 0.0304, "step": 145300 }, { "epoch": 13.57229534210772, "grad_norm": 2.5925536155700684, "learning_rate": 3.649583839895259e-06, "loss": 0.0291, "step": 145400 }, { "epoch": 13.58162979557547, "grad_norm": 1.379132866859436, "learning_rate": 3.648648648648649e-06, "loss": 0.0265, "step": 145500 }, { "epoch": 13.590964249043218, "grad_norm": 1.6678296327590942, "learning_rate": 3.647713457402039e-06, "loss": 0.0278, "step": 145600 }, { "epoch": 13.600298702510967, "grad_norm": 2.2167880535125732, "learning_rate": 3.646778266155429e-06, "loss": 0.0238, "step": 145700 }, { "epoch": 13.609633155978717, "grad_norm": 0.4692205488681793, "learning_rate": 3.645843074908819e-06, "loss": 0.0256, "step": 145800 }, { "epoch": 13.618967609446466, "grad_norm": 1.3645844459533691, "learning_rate": 3.6449078836622094e-06, "loss": 0.0265, "step": 145900 }, { "epoch": 13.628302062914216, "grad_norm": 2.859494209289551, "learning_rate": 3.6439726924155995e-06, "loss": 0.0253, "step": 146000 }, { "epoch": 13.637636516381965, "grad_norm": 0.7956030368804932, "learning_rate": 3.6430375011689895e-06, "loss": 0.0246, "step": 146100 }, { "epoch": 13.646970969849715, "grad_norm": 1.6017595529556274, "learning_rate": 3.6421023099223795e-06, "loss": 0.0256, "step": 146200 }, { "epoch": 13.656305423317464, "grad_norm": 2.8659112453460693, "learning_rate": 3.6411671186757696e-06, "loss": 0.0263, "step": 146300 }, { "epoch": 13.665639876785214, "grad_norm": 2.9091742038726807, "learning_rate": 3.6402319274291596e-06, "loss": 0.0264, "step": 146400 }, { "epoch": 13.674974330252963, "grad_norm": 2.081066370010376, "learning_rate": 3.63929673618255e-06, "loss": 0.0235, "step": 146500 }, { "epoch": 13.684308783720713, "grad_norm": 2.2308974266052246, "learning_rate": 3.63836154493594e-06, "loss": 0.0302, "step": 146600 }, { "epoch": 13.693643237188462, "grad_norm": 0.667881190776825, "learning_rate": 3.6374263536893297e-06, "loss": 0.0282, "step": 146700 }, { "epoch": 13.702977690656212, "grad_norm": 3.146743059158325, "learning_rate": 3.6364911624427197e-06, "loss": 0.0257, "step": 146800 }, { "epoch": 13.712312144123961, "grad_norm": 1.952141523361206, "learning_rate": 3.6355559711961098e-06, "loss": 0.0255, "step": 146900 }, { "epoch": 13.721646597591711, "grad_norm": 0.9129677414894104, "learning_rate": 3.6346207799494998e-06, "loss": 0.0279, "step": 147000 }, { "epoch": 13.73098105105946, "grad_norm": 1.4731398820877075, "learning_rate": 3.63368558870289e-06, "loss": 0.0236, "step": 147100 }, { "epoch": 13.74031550452721, "grad_norm": 1.986914038658142, "learning_rate": 3.63275039745628e-06, "loss": 0.0279, "step": 147200 }, { "epoch": 13.74964995799496, "grad_norm": 2.514854669570923, "learning_rate": 3.6318152062096703e-06, "loss": 0.0244, "step": 147300 }, { "epoch": 13.75898441146271, "grad_norm": 2.7076375484466553, "learning_rate": 3.6308800149630603e-06, "loss": 0.0279, "step": 147400 }, { "epoch": 13.768318864930459, "grad_norm": 2.446366548538208, "learning_rate": 3.6299448237164504e-06, "loss": 0.0282, "step": 147500 }, { "epoch": 13.777653318398208, "grad_norm": 2.437586545944214, "learning_rate": 3.6290096324698404e-06, "loss": 0.0224, "step": 147600 }, { "epoch": 13.786987771865958, "grad_norm": 2.1879684925079346, "learning_rate": 3.6280744412232304e-06, "loss": 0.0279, "step": 147700 }, { "epoch": 13.796322225333707, "grad_norm": 1.8861479759216309, "learning_rate": 3.6271392499766205e-06, "loss": 0.0276, "step": 147800 }, { "epoch": 13.805656678801457, "grad_norm": 1.5380396842956543, "learning_rate": 3.6262040587300105e-06, "loss": 0.0275, "step": 147900 }, { "epoch": 13.814991132269206, "grad_norm": 1.9994155168533325, "learning_rate": 3.625268867483401e-06, "loss": 0.0262, "step": 148000 }, { "epoch": 13.824325585736956, "grad_norm": 2.433413028717041, "learning_rate": 3.624333676236791e-06, "loss": 0.0293, "step": 148100 }, { "epoch": 13.833660039204705, "grad_norm": 4.20536994934082, "learning_rate": 3.623398484990181e-06, "loss": 0.0252, "step": 148200 }, { "epoch": 13.842994492672455, "grad_norm": 2.0909829139709473, "learning_rate": 3.622463293743571e-06, "loss": 0.0256, "step": 148300 }, { "epoch": 13.852328946140204, "grad_norm": 2.2521584033966064, "learning_rate": 3.621528102496961e-06, "loss": 0.0261, "step": 148400 }, { "epoch": 13.861663399607952, "grad_norm": 1.7057069540023804, "learning_rate": 3.6205929112503507e-06, "loss": 0.0271, "step": 148500 }, { "epoch": 13.870997853075702, "grad_norm": 0.022600797936320305, "learning_rate": 3.6196577200037407e-06, "loss": 0.0283, "step": 148600 }, { "epoch": 13.880332306543451, "grad_norm": 1.5317589044570923, "learning_rate": 3.6187225287571307e-06, "loss": 0.0262, "step": 148700 }, { "epoch": 13.8896667600112, "grad_norm": 1.5070593357086182, "learning_rate": 3.617787337510521e-06, "loss": 0.0285, "step": 148800 }, { "epoch": 13.89900121347895, "grad_norm": 3.125170946121216, "learning_rate": 3.6168521462639112e-06, "loss": 0.0235, "step": 148900 }, { "epoch": 13.9083356669467, "grad_norm": 2.142052173614502, "learning_rate": 3.6159169550173012e-06, "loss": 0.0275, "step": 149000 }, { "epoch": 13.91767012041445, "grad_norm": 4.153106689453125, "learning_rate": 3.6149817637706913e-06, "loss": 0.0271, "step": 149100 }, { "epoch": 13.927004573882199, "grad_norm": 2.409613609313965, "learning_rate": 3.6140465725240813e-06, "loss": 0.0256, "step": 149200 }, { "epoch": 13.936339027349948, "grad_norm": 0.7104167938232422, "learning_rate": 3.6131113812774713e-06, "loss": 0.0226, "step": 149300 }, { "epoch": 13.945673480817698, "grad_norm": 1.4304745197296143, "learning_rate": 3.612176190030862e-06, "loss": 0.0243, "step": 149400 }, { "epoch": 13.955007934285447, "grad_norm": 4.04243278503418, "learning_rate": 3.611240998784252e-06, "loss": 0.0264, "step": 149500 }, { "epoch": 13.964342387753197, "grad_norm": 2.3366448879241943, "learning_rate": 3.610305807537642e-06, "loss": 0.028, "step": 149600 }, { "epoch": 13.973676841220946, "grad_norm": 0.37414970993995667, "learning_rate": 3.609370616291032e-06, "loss": 0.0282, "step": 149700 }, { "epoch": 13.983011294688696, "grad_norm": 1.826093316078186, "learning_rate": 3.608435425044422e-06, "loss": 0.0212, "step": 149800 }, { "epoch": 13.992345748156445, "grad_norm": 3.6165621280670166, "learning_rate": 3.607500233797812e-06, "loss": 0.0264, "step": 149900 }, { "epoch": 14.001680201624195, "grad_norm": 5.165619850158691, "learning_rate": 3.6065650425512024e-06, "loss": 0.0259, "step": 150000 }, { "epoch": 14.001680201624195, "eval_accuracy": 0.7051575262543758, "eval_f1": 0.8273634658266379, "eval_loss": 0.15090130269527435, "eval_roc_auc": 0.9072415549184103, "eval_runtime": 271.1348, "eval_samples_per_second": 158.039, "eval_steps_per_second": 158.039, "step": 150000 }, { "epoch": 14.011014655091945, "grad_norm": 2.690483808517456, "learning_rate": 3.6056298513045924e-06, "loss": 0.0218, "step": 150100 }, { "epoch": 14.020349108559694, "grad_norm": 2.3462729454040527, "learning_rate": 3.6046946600579825e-06, "loss": 0.022, "step": 150200 }, { "epoch": 14.029683562027444, "grad_norm": 0.7747630476951599, "learning_rate": 3.603759468811372e-06, "loss": 0.0204, "step": 150300 }, { "epoch": 14.039018015495193, "grad_norm": 1.3381973505020142, "learning_rate": 3.602824277564762e-06, "loss": 0.0222, "step": 150400 }, { "epoch": 14.048352468962943, "grad_norm": 3.3940794467926025, "learning_rate": 3.601889086318152e-06, "loss": 0.0192, "step": 150500 }, { "epoch": 14.057686922430692, "grad_norm": 1.67719304561615, "learning_rate": 3.600953895071542e-06, "loss": 0.0233, "step": 150600 }, { "epoch": 14.067021375898442, "grad_norm": 0.9678069353103638, "learning_rate": 3.600018703824932e-06, "loss": 0.0214, "step": 150700 }, { "epoch": 14.076355829366191, "grad_norm": 1.1872013807296753, "learning_rate": 3.5990835125783226e-06, "loss": 0.0189, "step": 150800 }, { "epoch": 14.08569028283394, "grad_norm": 1.0482934713363647, "learning_rate": 3.5981483213317127e-06, "loss": 0.025, "step": 150900 }, { "epoch": 14.09502473630169, "grad_norm": 1.6356589794158936, "learning_rate": 3.5972131300851027e-06, "loss": 0.0217, "step": 151000 }, { "epoch": 14.10435918976944, "grad_norm": 0.5577924251556396, "learning_rate": 3.5962779388384927e-06, "loss": 0.0195, "step": 151100 }, { "epoch": 14.11369364323719, "grad_norm": 1.430550217628479, "learning_rate": 3.5953427475918828e-06, "loss": 0.0208, "step": 151200 }, { "epoch": 14.123028096704937, "grad_norm": 2.2340660095214844, "learning_rate": 3.594407556345273e-06, "loss": 0.0239, "step": 151300 }, { "epoch": 14.132362550172687, "grad_norm": 1.701676607131958, "learning_rate": 3.5934723650986633e-06, "loss": 0.0222, "step": 151400 }, { "epoch": 14.141697003640436, "grad_norm": 1.0612292289733887, "learning_rate": 3.5925371738520533e-06, "loss": 0.0215, "step": 151500 }, { "epoch": 14.151031457108186, "grad_norm": 1.285701870918274, "learning_rate": 3.5916019826054433e-06, "loss": 0.0218, "step": 151600 }, { "epoch": 14.160365910575935, "grad_norm": 1.5197957754135132, "learning_rate": 3.5906667913588333e-06, "loss": 0.0201, "step": 151700 }, { "epoch": 14.169700364043685, "grad_norm": 1.7107561826705933, "learning_rate": 3.5897316001122234e-06, "loss": 0.0238, "step": 151800 }, { "epoch": 14.179034817511434, "grad_norm": 1.217633843421936, "learning_rate": 3.5887964088656134e-06, "loss": 0.023, "step": 151900 }, { "epoch": 14.188369270979184, "grad_norm": 3.985726833343506, "learning_rate": 3.5878612176190034e-06, "loss": 0.0234, "step": 152000 }, { "epoch": 14.197703724446933, "grad_norm": 0.4418995976448059, "learning_rate": 3.586926026372393e-06, "loss": 0.0215, "step": 152100 }, { "epoch": 14.207038177914683, "grad_norm": 0.22486884891986847, "learning_rate": 3.585990835125783e-06, "loss": 0.0238, "step": 152200 }, { "epoch": 14.216372631382432, "grad_norm": 2.052088737487793, "learning_rate": 3.5850556438791735e-06, "loss": 0.0236, "step": 152300 }, { "epoch": 14.225707084850182, "grad_norm": 1.7341680526733398, "learning_rate": 3.5841204526325636e-06, "loss": 0.021, "step": 152400 }, { "epoch": 14.235041538317931, "grad_norm": 1.9254136085510254, "learning_rate": 3.5831852613859536e-06, "loss": 0.0248, "step": 152500 }, { "epoch": 14.244375991785681, "grad_norm": 2.377591371536255, "learning_rate": 3.5822500701393436e-06, "loss": 0.0209, "step": 152600 }, { "epoch": 14.25371044525343, "grad_norm": 2.832890748977661, "learning_rate": 3.5813148788927336e-06, "loss": 0.0205, "step": 152700 }, { "epoch": 14.26304489872118, "grad_norm": 1.4805750846862793, "learning_rate": 3.5803796876461237e-06, "loss": 0.0222, "step": 152800 }, { "epoch": 14.27237935218893, "grad_norm": 1.1905975341796875, "learning_rate": 3.579444496399514e-06, "loss": 0.0224, "step": 152900 }, { "epoch": 14.281713805656679, "grad_norm": 2.5854663848876953, "learning_rate": 3.578509305152904e-06, "loss": 0.0228, "step": 153000 }, { "epoch": 14.291048259124429, "grad_norm": 0.4966909885406494, "learning_rate": 3.577574113906294e-06, "loss": 0.0234, "step": 153100 }, { "epoch": 14.300382712592178, "grad_norm": 1.8966412544250488, "learning_rate": 3.5766389226596842e-06, "loss": 0.0217, "step": 153200 }, { "epoch": 14.309717166059928, "grad_norm": 1.2196269035339355, "learning_rate": 3.5757037314130743e-06, "loss": 0.0195, "step": 153300 }, { "epoch": 14.319051619527677, "grad_norm": 2.038987398147583, "learning_rate": 3.5747685401664643e-06, "loss": 0.0229, "step": 153400 }, { "epoch": 14.328386072995427, "grad_norm": 1.9604883193969727, "learning_rate": 3.5738333489198547e-06, "loss": 0.0227, "step": 153500 }, { "epoch": 14.337720526463176, "grad_norm": 1.3627568483352661, "learning_rate": 3.5728981576732448e-06, "loss": 0.0259, "step": 153600 }, { "epoch": 14.347054979930926, "grad_norm": 0.6210748553276062, "learning_rate": 3.571962966426635e-06, "loss": 0.0257, "step": 153700 }, { "epoch": 14.356389433398675, "grad_norm": 1.544683575630188, "learning_rate": 3.571027775180025e-06, "loss": 0.0205, "step": 153800 }, { "epoch": 14.365723886866425, "grad_norm": 2.0926945209503174, "learning_rate": 3.5700925839334144e-06, "loss": 0.0211, "step": 153900 }, { "epoch": 14.375058340334174, "grad_norm": 0.5934177041053772, "learning_rate": 3.5691573926868045e-06, "loss": 0.0223, "step": 154000 }, { "epoch": 14.384392793801922, "grad_norm": 1.0952565670013428, "learning_rate": 3.5682222014401945e-06, "loss": 0.0218, "step": 154100 }, { "epoch": 14.393727247269672, "grad_norm": 2.68979549407959, "learning_rate": 3.5672870101935845e-06, "loss": 0.0281, "step": 154200 }, { "epoch": 14.403061700737421, "grad_norm": 2.159641742706299, "learning_rate": 3.566351818946975e-06, "loss": 0.0221, "step": 154300 }, { "epoch": 14.41239615420517, "grad_norm": 2.5568766593933105, "learning_rate": 3.565416627700365e-06, "loss": 0.0212, "step": 154400 }, { "epoch": 14.42173060767292, "grad_norm": 0.4403325617313385, "learning_rate": 3.564481436453755e-06, "loss": 0.0198, "step": 154500 }, { "epoch": 14.43106506114067, "grad_norm": 0.9334236979484558, "learning_rate": 3.563546245207145e-06, "loss": 0.0254, "step": 154600 }, { "epoch": 14.44039951460842, "grad_norm": 1.2325587272644043, "learning_rate": 3.562611053960535e-06, "loss": 0.0233, "step": 154700 }, { "epoch": 14.449733968076169, "grad_norm": 2.5817112922668457, "learning_rate": 3.561675862713925e-06, "loss": 0.0247, "step": 154800 }, { "epoch": 14.459068421543918, "grad_norm": 0.3331148326396942, "learning_rate": 3.5607406714673156e-06, "loss": 0.019, "step": 154900 }, { "epoch": 14.468402875011668, "grad_norm": 0.7188334465026855, "learning_rate": 3.5598054802207056e-06, "loss": 0.0224, "step": 155000 }, { "epoch": 14.468402875011668, "eval_accuracy": 0.6992998833138856, "eval_f1": 0.823898370967172, "eval_loss": 0.16083230078220367, "eval_roc_auc": 0.9058354578001432, "eval_runtime": 312.8196, "eval_samples_per_second": 136.98, "eval_steps_per_second": 136.98, "step": 155000 }, { "epoch": 14.477737328479417, "grad_norm": 1.2692539691925049, "learning_rate": 3.5588702889740957e-06, "loss": 0.0226, "step": 155100 }, { "epoch": 14.487071781947167, "grad_norm": 1.6504724025726318, "learning_rate": 3.5579350977274857e-06, "loss": 0.0215, "step": 155200 }, { "epoch": 14.496406235414916, "grad_norm": 1.4216066598892212, "learning_rate": 3.5569999064808757e-06, "loss": 0.0208, "step": 155300 }, { "epoch": 14.505740688882666, "grad_norm": 2.143620491027832, "learning_rate": 3.5560647152342657e-06, "loss": 0.022, "step": 155400 }, { "epoch": 14.515075142350415, "grad_norm": 1.4977740049362183, "learning_rate": 3.5551295239876558e-06, "loss": 0.0226, "step": 155500 }, { "epoch": 14.524409595818165, "grad_norm": 0.9961738586425781, "learning_rate": 3.5541943327410462e-06, "loss": 0.0228, "step": 155600 }, { "epoch": 14.533744049285914, "grad_norm": 2.361504554748535, "learning_rate": 3.5532591414944354e-06, "loss": 0.0211, "step": 155700 }, { "epoch": 14.543078502753664, "grad_norm": 1.1056783199310303, "learning_rate": 3.552323950247826e-06, "loss": 0.0278, "step": 155800 }, { "epoch": 14.552412956221414, "grad_norm": 2.7351489067077637, "learning_rate": 3.551388759001216e-06, "loss": 0.0249, "step": 155900 }, { "epoch": 14.561747409689163, "grad_norm": 2.6126105785369873, "learning_rate": 3.550453567754606e-06, "loss": 0.0242, "step": 156000 }, { "epoch": 14.571081863156913, "grad_norm": 0.8220415115356445, "learning_rate": 3.549518376507996e-06, "loss": 0.0268, "step": 156100 }, { "epoch": 14.580416316624662, "grad_norm": 2.5709288120269775, "learning_rate": 3.548583185261386e-06, "loss": 0.0264, "step": 156200 }, { "epoch": 14.589750770092412, "grad_norm": 2.750049591064453, "learning_rate": 3.547647994014776e-06, "loss": 0.0255, "step": 156300 }, { "epoch": 14.599085223560161, "grad_norm": 2.7892022132873535, "learning_rate": 3.5467128027681665e-06, "loss": 0.0217, "step": 156400 }, { "epoch": 14.60841967702791, "grad_norm": 1.0601885318756104, "learning_rate": 3.5457776115215565e-06, "loss": 0.0258, "step": 156500 }, { "epoch": 14.61775413049566, "grad_norm": 1.8517082929611206, "learning_rate": 3.5448424202749465e-06, "loss": 0.0224, "step": 156600 }, { "epoch": 14.62708858396341, "grad_norm": 2.89884877204895, "learning_rate": 3.5439072290283366e-06, "loss": 0.0247, "step": 156700 }, { "epoch": 14.636423037431157, "grad_norm": 2.001084804534912, "learning_rate": 3.5429720377817266e-06, "loss": 0.0173, "step": 156800 }, { "epoch": 14.645757490898909, "grad_norm": 0.2015133500099182, "learning_rate": 3.5420368465351166e-06, "loss": 0.0272, "step": 156900 }, { "epoch": 14.655091944366657, "grad_norm": 1.0811923742294312, "learning_rate": 3.541101655288507e-06, "loss": 0.0239, "step": 157000 }, { "epoch": 14.664426397834406, "grad_norm": 1.6980054378509521, "learning_rate": 3.540166464041897e-06, "loss": 0.0232, "step": 157100 }, { "epoch": 14.673760851302156, "grad_norm": 1.261167049407959, "learning_rate": 3.539231272795287e-06, "loss": 0.0218, "step": 157200 }, { "epoch": 14.683095304769905, "grad_norm": 1.3261240720748901, "learning_rate": 3.538296081548677e-06, "loss": 0.0235, "step": 157300 }, { "epoch": 14.692429758237655, "grad_norm": 2.169097900390625, "learning_rate": 3.537360890302067e-06, "loss": 0.0211, "step": 157400 }, { "epoch": 14.701764211705404, "grad_norm": 1.817237377166748, "learning_rate": 3.5364256990554572e-06, "loss": 0.0238, "step": 157500 }, { "epoch": 14.711098665173154, "grad_norm": 0.198072150349617, "learning_rate": 3.535490507808847e-06, "loss": 0.0223, "step": 157600 }, { "epoch": 14.720433118640903, "grad_norm": 3.545483350753784, "learning_rate": 3.534555316562237e-06, "loss": 0.0255, "step": 157700 }, { "epoch": 14.729767572108653, "grad_norm": 2.0467021465301514, "learning_rate": 3.5336201253156273e-06, "loss": 0.0203, "step": 157800 }, { "epoch": 14.739102025576402, "grad_norm": 6.054535865783691, "learning_rate": 3.5326849340690174e-06, "loss": 0.0214, "step": 157900 }, { "epoch": 14.748436479044152, "grad_norm": 3.52366042137146, "learning_rate": 3.5317497428224074e-06, "loss": 0.0269, "step": 158000 }, { "epoch": 14.757770932511901, "grad_norm": 3.3212854862213135, "learning_rate": 3.5308145515757974e-06, "loss": 0.0252, "step": 158100 }, { "epoch": 14.76710538597965, "grad_norm": 4.061441421508789, "learning_rate": 3.5298793603291875e-06, "loss": 0.0205, "step": 158200 }, { "epoch": 14.7764398394474, "grad_norm": 3.0583701133728027, "learning_rate": 3.5289441690825775e-06, "loss": 0.0225, "step": 158300 }, { "epoch": 14.78577429291515, "grad_norm": 0.3098071217536926, "learning_rate": 3.528008977835968e-06, "loss": 0.0254, "step": 158400 }, { "epoch": 14.7951087463829, "grad_norm": 1.8502284288406372, "learning_rate": 3.527073786589358e-06, "loss": 0.0258, "step": 158500 }, { "epoch": 14.804443199850649, "grad_norm": 2.8419876098632812, "learning_rate": 3.526138595342748e-06, "loss": 0.027, "step": 158600 }, { "epoch": 14.813777653318398, "grad_norm": 1.2209020853042603, "learning_rate": 3.525203404096138e-06, "loss": 0.02, "step": 158700 }, { "epoch": 14.823112106786148, "grad_norm": 0.6389832496643066, "learning_rate": 3.524268212849528e-06, "loss": 0.0231, "step": 158800 }, { "epoch": 14.832446560253898, "grad_norm": 0.7608928680419922, "learning_rate": 3.523333021602918e-06, "loss": 0.024, "step": 158900 }, { "epoch": 14.841781013721647, "grad_norm": 0.6924042701721191, "learning_rate": 3.5223978303563085e-06, "loss": 0.0239, "step": 159000 }, { "epoch": 14.851115467189397, "grad_norm": 1.9137835502624512, "learning_rate": 3.5214626391096986e-06, "loss": 0.0253, "step": 159100 }, { "epoch": 14.860449920657146, "grad_norm": 3.6974387168884277, "learning_rate": 3.5205274478630886e-06, "loss": 0.023, "step": 159200 }, { "epoch": 14.869784374124896, "grad_norm": 1.773364543914795, "learning_rate": 3.5195922566164786e-06, "loss": 0.023, "step": 159300 }, { "epoch": 14.879118827592645, "grad_norm": 0.9080832600593567, "learning_rate": 3.5186570653698682e-06, "loss": 0.0246, "step": 159400 }, { "epoch": 14.888453281060395, "grad_norm": 4.469156742095947, "learning_rate": 3.5177218741232583e-06, "loss": 0.023, "step": 159500 }, { "epoch": 14.897787734528144, "grad_norm": 1.8691493272781372, "learning_rate": 3.5167866828766483e-06, "loss": 0.0277, "step": 159600 }, { "epoch": 14.907122187995892, "grad_norm": 0.609882652759552, "learning_rate": 3.5158514916300383e-06, "loss": 0.0243, "step": 159700 }, { "epoch": 14.916456641463641, "grad_norm": 0.13328823447227478, "learning_rate": 3.5149163003834284e-06, "loss": 0.0234, "step": 159800 }, { "epoch": 14.925791094931391, "grad_norm": 5.28687047958374, "learning_rate": 3.513981109136819e-06, "loss": 0.0223, "step": 159900 }, { "epoch": 14.93512554839914, "grad_norm": 3.0480828285217285, "learning_rate": 3.513045917890209e-06, "loss": 0.026, "step": 160000 }, { "epoch": 14.93512554839914, "eval_accuracy": 0.7007934655775963, "eval_f1": 0.826254079110556, "eval_loss": 0.16089195013046265, "eval_roc_auc": 0.908682781616622, "eval_runtime": 362.6415, "eval_samples_per_second": 118.161, "eval_steps_per_second": 118.161, "step": 160000 }, { "epoch": 14.94446000186689, "grad_norm": 2.701669216156006, "learning_rate": 3.512110726643599e-06, "loss": 0.0228, "step": 160100 }, { "epoch": 14.95379445533464, "grad_norm": 3.3312952518463135, "learning_rate": 3.511175535396989e-06, "loss": 0.0253, "step": 160200 }, { "epoch": 14.963128908802389, "grad_norm": 2.578136920928955, "learning_rate": 3.510240344150379e-06, "loss": 0.0221, "step": 160300 }, { "epoch": 14.972463362270139, "grad_norm": 1.983435869216919, "learning_rate": 3.509305152903769e-06, "loss": 0.0216, "step": 160400 }, { "epoch": 14.981797815737888, "grad_norm": 1.0367134809494019, "learning_rate": 3.5083699616571594e-06, "loss": 0.0211, "step": 160500 }, { "epoch": 14.991132269205638, "grad_norm": 1.3325797319412231, "learning_rate": 3.5074347704105495e-06, "loss": 0.019, "step": 160600 }, { "epoch": 15.000466722673387, "grad_norm": 2.1416938304901123, "learning_rate": 3.5064995791639395e-06, "loss": 0.0228, "step": 160700 }, { "epoch": 15.009801176141137, "grad_norm": 3.232598304748535, "learning_rate": 3.5055643879173295e-06, "loss": 0.0196, "step": 160800 }, { "epoch": 15.019135629608886, "grad_norm": 2.668543577194214, "learning_rate": 3.5046291966707195e-06, "loss": 0.0215, "step": 160900 }, { "epoch": 15.028470083076636, "grad_norm": 1.4536118507385254, "learning_rate": 3.5036940054241096e-06, "loss": 0.0178, "step": 161000 }, { "epoch": 15.037804536544385, "grad_norm": 0.9958510994911194, "learning_rate": 3.5027588141775e-06, "loss": 0.0209, "step": 161100 }, { "epoch": 15.047138990012135, "grad_norm": 1.889060378074646, "learning_rate": 3.5018236229308892e-06, "loss": 0.0216, "step": 161200 }, { "epoch": 15.056473443479884, "grad_norm": 1.852758765220642, "learning_rate": 3.5008884316842797e-06, "loss": 0.0178, "step": 161300 }, { "epoch": 15.065807896947634, "grad_norm": 3.941452980041504, "learning_rate": 3.4999532404376697e-06, "loss": 0.0187, "step": 161400 }, { "epoch": 15.075142350415383, "grad_norm": 1.598854899406433, "learning_rate": 3.4990180491910597e-06, "loss": 0.0193, "step": 161500 }, { "epoch": 15.084476803883133, "grad_norm": 3.6824722290039062, "learning_rate": 3.4980828579444498e-06, "loss": 0.0199, "step": 161600 }, { "epoch": 15.093811257350882, "grad_norm": 1.965414047241211, "learning_rate": 3.49714766669784e-06, "loss": 0.0197, "step": 161700 }, { "epoch": 15.103145710818632, "grad_norm": 0.9170790910720825, "learning_rate": 3.49621247545123e-06, "loss": 0.0205, "step": 161800 }, { "epoch": 15.112480164286382, "grad_norm": 1.160752773284912, "learning_rate": 3.4952772842046203e-06, "loss": 0.0194, "step": 161900 }, { "epoch": 15.121814617754131, "grad_norm": 1.053518295288086, "learning_rate": 3.4943420929580103e-06, "loss": 0.0175, "step": 162000 }, { "epoch": 15.13114907122188, "grad_norm": 1.384353518486023, "learning_rate": 3.4934069017114003e-06, "loss": 0.0193, "step": 162100 }, { "epoch": 15.14048352468963, "grad_norm": 1.1754568815231323, "learning_rate": 3.4924717104647904e-06, "loss": 0.0195, "step": 162200 }, { "epoch": 15.14981797815738, "grad_norm": 1.611194372177124, "learning_rate": 3.4915365192181804e-06, "loss": 0.0215, "step": 162300 }, { "epoch": 15.15915243162513, "grad_norm": 0.5550276637077332, "learning_rate": 3.4906013279715704e-06, "loss": 0.0204, "step": 162400 }, { "epoch": 15.168486885092879, "grad_norm": 0.4574323296546936, "learning_rate": 3.489666136724961e-06, "loss": 0.0189, "step": 162500 }, { "epoch": 15.177821338560626, "grad_norm": 1.6854791641235352, "learning_rate": 3.488730945478351e-06, "loss": 0.0228, "step": 162600 }, { "epoch": 15.187155792028376, "grad_norm": 0.8447314500808716, "learning_rate": 3.487795754231741e-06, "loss": 0.0209, "step": 162700 }, { "epoch": 15.196490245496125, "grad_norm": 2.3692245483398438, "learning_rate": 3.486860562985131e-06, "loss": 0.0233, "step": 162800 }, { "epoch": 15.205824698963875, "grad_norm": 1.7693450450897217, "learning_rate": 3.485925371738521e-06, "loss": 0.0233, "step": 162900 }, { "epoch": 15.215159152431625, "grad_norm": 1.1947965621948242, "learning_rate": 3.4849901804919106e-06, "loss": 0.0207, "step": 163000 }, { "epoch": 15.224493605899374, "grad_norm": 2.543220043182373, "learning_rate": 3.4840549892453006e-06, "loss": 0.0179, "step": 163100 }, { "epoch": 15.233828059367124, "grad_norm": 0.9260329008102417, "learning_rate": 3.4831197979986907e-06, "loss": 0.0185, "step": 163200 }, { "epoch": 15.243162512834873, "grad_norm": 2.019254446029663, "learning_rate": 3.4821846067520807e-06, "loss": 0.0183, "step": 163300 }, { "epoch": 15.252496966302623, "grad_norm": 2.3406636714935303, "learning_rate": 3.481249415505471e-06, "loss": 0.0209, "step": 163400 }, { "epoch": 15.261831419770372, "grad_norm": 1.0455573797225952, "learning_rate": 3.480314224258861e-06, "loss": 0.0195, "step": 163500 }, { "epoch": 15.271165873238122, "grad_norm": 4.441176414489746, "learning_rate": 3.4793790330122512e-06, "loss": 0.0248, "step": 163600 }, { "epoch": 15.280500326705871, "grad_norm": 2.372380018234253, "learning_rate": 3.4784438417656413e-06, "loss": 0.0211, "step": 163700 }, { "epoch": 15.28983478017362, "grad_norm": 2.6047537326812744, "learning_rate": 3.4775086505190313e-06, "loss": 0.0246, "step": 163800 }, { "epoch": 15.29916923364137, "grad_norm": 3.0051984786987305, "learning_rate": 3.4765734592724213e-06, "loss": 0.0177, "step": 163900 }, { "epoch": 15.30850368710912, "grad_norm": 2.350534200668335, "learning_rate": 3.4756382680258118e-06, "loss": 0.0212, "step": 164000 }, { "epoch": 15.31783814057687, "grad_norm": 1.9883819818496704, "learning_rate": 3.474703076779202e-06, "loss": 0.0229, "step": 164100 }, { "epoch": 15.327172594044619, "grad_norm": 3.0060946941375732, "learning_rate": 3.473767885532592e-06, "loss": 0.0188, "step": 164200 }, { "epoch": 15.336507047512368, "grad_norm": 1.872897982597351, "learning_rate": 3.472832694285982e-06, "loss": 0.0209, "step": 164300 }, { "epoch": 15.345841500980118, "grad_norm": 1.389573335647583, "learning_rate": 3.471897503039372e-06, "loss": 0.019, "step": 164400 }, { "epoch": 15.355175954447867, "grad_norm": 0.9667443633079529, "learning_rate": 3.470962311792762e-06, "loss": 0.0213, "step": 164500 }, { "epoch": 15.364510407915617, "grad_norm": 2.1915249824523926, "learning_rate": 3.4700271205461524e-06, "loss": 0.0232, "step": 164600 }, { "epoch": 15.373844861383366, "grad_norm": 1.7258248329162598, "learning_rate": 3.4690919292995424e-06, "loss": 0.0205, "step": 164700 }, { "epoch": 15.383179314851116, "grad_norm": 1.5410066843032837, "learning_rate": 3.468156738052932e-06, "loss": 0.0184, "step": 164800 }, { "epoch": 15.392513768318866, "grad_norm": 0.22864247858524323, "learning_rate": 3.467221546806322e-06, "loss": 0.0188, "step": 164900 }, { "epoch": 15.401848221786615, "grad_norm": 2.102827548980713, "learning_rate": 3.466286355559712e-06, "loss": 0.0191, "step": 165000 }, { "epoch": 15.401848221786615, "eval_accuracy": 0.6998599766627771, "eval_f1": 0.8253020511379601, "eval_loss": 0.17148452997207642, "eval_roc_auc": 0.9087469584505901, "eval_runtime": 435.5222, "eval_samples_per_second": 98.388, "eval_steps_per_second": 98.388, "step": 165000 }, { "epoch": 15.411182675254365, "grad_norm": 3.9069650173187256, "learning_rate": 3.465351164313102e-06, "loss": 0.0209, "step": 165100 }, { "epoch": 15.420517128722114, "grad_norm": 1.9861966371536255, "learning_rate": 3.464415973066492e-06, "loss": 0.0224, "step": 165200 }, { "epoch": 15.429851582189864, "grad_norm": 4.59876823425293, "learning_rate": 3.463480781819882e-06, "loss": 0.0186, "step": 165300 }, { "epoch": 15.439186035657613, "grad_norm": 2.6602094173431396, "learning_rate": 3.4625455905732726e-06, "loss": 0.0207, "step": 165400 }, { "epoch": 15.448520489125361, "grad_norm": 1.689025640487671, "learning_rate": 3.4616103993266627e-06, "loss": 0.0199, "step": 165500 }, { "epoch": 15.45785494259311, "grad_norm": 1.71340811252594, "learning_rate": 3.4606752080800527e-06, "loss": 0.0182, "step": 165600 }, { "epoch": 15.46718939606086, "grad_norm": 2.8050873279571533, "learning_rate": 3.4597400168334427e-06, "loss": 0.0214, "step": 165700 }, { "epoch": 15.47652384952861, "grad_norm": 2.2533750534057617, "learning_rate": 3.4588048255868327e-06, "loss": 0.02, "step": 165800 }, { "epoch": 15.485858302996359, "grad_norm": 2.7577602863311768, "learning_rate": 3.4578696343402228e-06, "loss": 0.0216, "step": 165900 }, { "epoch": 15.495192756464109, "grad_norm": 1.9418126344680786, "learning_rate": 3.4569344430936132e-06, "loss": 0.0199, "step": 166000 }, { "epoch": 15.504527209931858, "grad_norm": 2.4071531295776367, "learning_rate": 3.4559992518470033e-06, "loss": 0.0205, "step": 166100 }, { "epoch": 15.513861663399608, "grad_norm": 4.5172834396362305, "learning_rate": 3.4550640606003933e-06, "loss": 0.0194, "step": 166200 }, { "epoch": 15.523196116867357, "grad_norm": 3.2387354373931885, "learning_rate": 3.4541288693537833e-06, "loss": 0.0243, "step": 166300 }, { "epoch": 15.532530570335107, "grad_norm": 3.1692826747894287, "learning_rate": 3.4531936781071734e-06, "loss": 0.0205, "step": 166400 }, { "epoch": 15.541865023802856, "grad_norm": 2.1711254119873047, "learning_rate": 3.4522584868605634e-06, "loss": 0.0188, "step": 166500 }, { "epoch": 15.551199477270606, "grad_norm": 1.1504786014556885, "learning_rate": 3.451323295613953e-06, "loss": 0.022, "step": 166600 }, { "epoch": 15.560533930738355, "grad_norm": 3.3450043201446533, "learning_rate": 3.450388104367343e-06, "loss": 0.0173, "step": 166700 }, { "epoch": 15.569868384206105, "grad_norm": 0.8004257082939148, "learning_rate": 3.4494529131207335e-06, "loss": 0.0193, "step": 166800 }, { "epoch": 15.579202837673854, "grad_norm": 2.5337862968444824, "learning_rate": 3.4485177218741235e-06, "loss": 0.0169, "step": 166900 }, { "epoch": 15.588537291141604, "grad_norm": 5.512589931488037, "learning_rate": 3.4475825306275135e-06, "loss": 0.02, "step": 167000 }, { "epoch": 15.597871744609353, "grad_norm": 5.234777927398682, "learning_rate": 3.4466473393809036e-06, "loss": 0.0212, "step": 167100 }, { "epoch": 15.607206198077103, "grad_norm": 1.6575056314468384, "learning_rate": 3.4457121481342936e-06, "loss": 0.0185, "step": 167200 }, { "epoch": 15.616540651544852, "grad_norm": 0.8412301540374756, "learning_rate": 3.4447769568876836e-06, "loss": 0.021, "step": 167300 }, { "epoch": 15.625875105012602, "grad_norm": 0.6516097784042358, "learning_rate": 3.4438417656410737e-06, "loss": 0.0206, "step": 167400 }, { "epoch": 15.635209558480351, "grad_norm": 1.1677939891815186, "learning_rate": 3.442906574394464e-06, "loss": 0.0191, "step": 167500 }, { "epoch": 15.644544011948101, "grad_norm": 4.379790306091309, "learning_rate": 3.441971383147854e-06, "loss": 0.0198, "step": 167600 }, { "epoch": 15.65387846541585, "grad_norm": 1.0996187925338745, "learning_rate": 3.441036191901244e-06, "loss": 0.0212, "step": 167700 }, { "epoch": 15.6632129188836, "grad_norm": 1.681767463684082, "learning_rate": 3.440101000654634e-06, "loss": 0.021, "step": 167800 }, { "epoch": 15.67254737235135, "grad_norm": 2.0261099338531494, "learning_rate": 3.4391658094080242e-06, "loss": 0.0207, "step": 167900 }, { "epoch": 15.681881825819099, "grad_norm": 3.7864181995391846, "learning_rate": 3.4382306181614143e-06, "loss": 0.0213, "step": 168000 }, { "epoch": 15.691216279286849, "grad_norm": 1.6322617530822754, "learning_rate": 3.4372954269148047e-06, "loss": 0.0171, "step": 168100 }, { "epoch": 15.700550732754596, "grad_norm": 2.6018619537353516, "learning_rate": 3.4363602356681948e-06, "loss": 0.0216, "step": 168200 }, { "epoch": 15.709885186222346, "grad_norm": 2.3751518726348877, "learning_rate": 3.4354250444215848e-06, "loss": 0.0227, "step": 168300 }, { "epoch": 15.719219639690095, "grad_norm": 3.35097336769104, "learning_rate": 3.4344898531749744e-06, "loss": 0.0199, "step": 168400 }, { "epoch": 15.728554093157845, "grad_norm": 2.2200629711151123, "learning_rate": 3.4335546619283644e-06, "loss": 0.0193, "step": 168500 }, { "epoch": 15.737888546625594, "grad_norm": 1.8708114624023438, "learning_rate": 3.4326194706817545e-06, "loss": 0.0203, "step": 168600 }, { "epoch": 15.747223000093344, "grad_norm": 1.4379018545150757, "learning_rate": 3.4316842794351445e-06, "loss": 0.0188, "step": 168700 }, { "epoch": 15.756557453561093, "grad_norm": 2.6426608562469482, "learning_rate": 3.4307490881885345e-06, "loss": 0.0196, "step": 168800 }, { "epoch": 15.765891907028843, "grad_norm": 2.0259816646575928, "learning_rate": 3.429813896941925e-06, "loss": 0.021, "step": 168900 }, { "epoch": 15.775226360496593, "grad_norm": 3.3965206146240234, "learning_rate": 3.428878705695315e-06, "loss": 0.0183, "step": 169000 }, { "epoch": 15.784560813964342, "grad_norm": 3.4316554069519043, "learning_rate": 3.427943514448705e-06, "loss": 0.0215, "step": 169100 }, { "epoch": 15.793895267432092, "grad_norm": 1.1840797662734985, "learning_rate": 3.427008323202095e-06, "loss": 0.0189, "step": 169200 }, { "epoch": 15.803229720899841, "grad_norm": 0.8700115084648132, "learning_rate": 3.426073131955485e-06, "loss": 0.0202, "step": 169300 }, { "epoch": 15.81256417436759, "grad_norm": 0.4582575559616089, "learning_rate": 3.425137940708875e-06, "loss": 0.0219, "step": 169400 }, { "epoch": 15.82189862783534, "grad_norm": 2.813271999359131, "learning_rate": 3.4242027494622656e-06, "loss": 0.0216, "step": 169500 }, { "epoch": 15.83123308130309, "grad_norm": 2.7789201736450195, "learning_rate": 3.4232675582156556e-06, "loss": 0.0201, "step": 169600 }, { "epoch": 15.84056753477084, "grad_norm": 1.8325895071029663, "learning_rate": 3.4223323669690456e-06, "loss": 0.0188, "step": 169700 }, { "epoch": 15.849901988238589, "grad_norm": 2.9798355102539062, "learning_rate": 3.4213971757224357e-06, "loss": 0.0201, "step": 169800 }, { "epoch": 15.859236441706338, "grad_norm": 1.8630480766296387, "learning_rate": 3.4204619844758257e-06, "loss": 0.0223, "step": 169900 }, { "epoch": 15.868570895174088, "grad_norm": 2.7050940990448, "learning_rate": 3.4195267932292157e-06, "loss": 0.0192, "step": 170000 }, { "epoch": 15.868570895174088, "eval_accuracy": 0.6969428238039673, "eval_f1": 0.8256821493704244, "eval_loss": 0.17533518373966217, "eval_roc_auc": 0.9111433540163296, "eval_runtime": 425.1168, "eval_samples_per_second": 100.796, "eval_steps_per_second": 100.796, "step": 170000 }, { "epoch": 15.877905348641837, "grad_norm": 3.073798656463623, "learning_rate": 3.418591601982606e-06, "loss": 0.0227, "step": 170100 }, { "epoch": 15.887239802109587, "grad_norm": 2.402926206588745, "learning_rate": 3.4176564107359954e-06, "loss": 0.0196, "step": 170200 }, { "epoch": 15.896574255577336, "grad_norm": 1.9393365383148193, "learning_rate": 3.416721219489386e-06, "loss": 0.0228, "step": 170300 }, { "epoch": 15.905908709045086, "grad_norm": 0.5633155703544617, "learning_rate": 3.415786028242776e-06, "loss": 0.0216, "step": 170400 }, { "epoch": 15.915243162512835, "grad_norm": 0.9566142559051514, "learning_rate": 3.414850836996166e-06, "loss": 0.0219, "step": 170500 }, { "epoch": 15.924577615980585, "grad_norm": 1.352370023727417, "learning_rate": 3.413915645749556e-06, "loss": 0.0199, "step": 170600 }, { "epoch": 15.933912069448334, "grad_norm": 2.169187068939209, "learning_rate": 3.412980454502946e-06, "loss": 0.0214, "step": 170700 }, { "epoch": 15.943246522916084, "grad_norm": 4.479240417480469, "learning_rate": 3.412045263256336e-06, "loss": 0.0201, "step": 170800 }, { "epoch": 15.952580976383834, "grad_norm": 1.6532189846038818, "learning_rate": 3.4111100720097264e-06, "loss": 0.023, "step": 170900 }, { "epoch": 15.961915429851583, "grad_norm": 1.9649741649627686, "learning_rate": 3.4101748807631165e-06, "loss": 0.0213, "step": 171000 }, { "epoch": 15.97124988331933, "grad_norm": 3.3526010513305664, "learning_rate": 3.4092396895165065e-06, "loss": 0.0204, "step": 171100 }, { "epoch": 15.98058433678708, "grad_norm": 1.291989803314209, "learning_rate": 3.4083044982698965e-06, "loss": 0.0215, "step": 171200 }, { "epoch": 15.98991879025483, "grad_norm": 2.334669828414917, "learning_rate": 3.4073693070232865e-06, "loss": 0.023, "step": 171300 }, { "epoch": 15.99925324372258, "grad_norm": 1.6055434942245483, "learning_rate": 3.4064341157766766e-06, "loss": 0.0198, "step": 171400 }, { "epoch": 16.00858769719033, "grad_norm": 4.212900161743164, "learning_rate": 3.4054989245300666e-06, "loss": 0.0168, "step": 171500 }, { "epoch": 16.01792215065808, "grad_norm": 0.7508007884025574, "learning_rate": 3.404563733283457e-06, "loss": 0.0157, "step": 171600 }, { "epoch": 16.027256604125828, "grad_norm": 1.248660683631897, "learning_rate": 3.403628542036847e-06, "loss": 0.015, "step": 171700 }, { "epoch": 16.03659105759358, "grad_norm": 1.241773247718811, "learning_rate": 3.402693350790237e-06, "loss": 0.0163, "step": 171800 }, { "epoch": 16.045925511061327, "grad_norm": 2.696409225463867, "learning_rate": 3.401758159543627e-06, "loss": 0.0183, "step": 171900 }, { "epoch": 16.05525996452908, "grad_norm": 1.3441718816757202, "learning_rate": 3.4008229682970168e-06, "loss": 0.0214, "step": 172000 }, { "epoch": 16.064594417996826, "grad_norm": 0.9939357042312622, "learning_rate": 3.399887777050407e-06, "loss": 0.0185, "step": 172100 }, { "epoch": 16.073928871464577, "grad_norm": 2.508540630340576, "learning_rate": 3.398952585803797e-06, "loss": 0.0175, "step": 172200 }, { "epoch": 16.083263324932325, "grad_norm": 3.2755777835845947, "learning_rate": 3.398017394557187e-06, "loss": 0.0167, "step": 172300 }, { "epoch": 16.092597778400076, "grad_norm": 1.86623215675354, "learning_rate": 3.3970822033105773e-06, "loss": 0.0176, "step": 172400 }, { "epoch": 16.101932231867824, "grad_norm": 1.3291388750076294, "learning_rate": 3.3961470120639673e-06, "loss": 0.0165, "step": 172500 }, { "epoch": 16.111266685335572, "grad_norm": 2.5755321979522705, "learning_rate": 3.3952118208173574e-06, "loss": 0.0152, "step": 172600 }, { "epoch": 16.120601138803323, "grad_norm": 1.6198773384094238, "learning_rate": 3.3942766295707474e-06, "loss": 0.0185, "step": 172700 }, { "epoch": 16.12993559227107, "grad_norm": 0.34505578875541687, "learning_rate": 3.3933414383241374e-06, "loss": 0.0186, "step": 172800 }, { "epoch": 16.139270045738822, "grad_norm": 1.086126446723938, "learning_rate": 3.3924062470775275e-06, "loss": 0.0153, "step": 172900 }, { "epoch": 16.14860449920657, "grad_norm": 1.8989906311035156, "learning_rate": 3.391471055830918e-06, "loss": 0.0158, "step": 173000 }, { "epoch": 16.15793895267432, "grad_norm": 3.368983745574951, "learning_rate": 3.390535864584308e-06, "loss": 0.0169, "step": 173100 }, { "epoch": 16.16727340614207, "grad_norm": 3.6938703060150146, "learning_rate": 3.389600673337698e-06, "loss": 0.017, "step": 173200 }, { "epoch": 16.17660785960982, "grad_norm": 2.0467369556427, "learning_rate": 3.388665482091088e-06, "loss": 0.0157, "step": 173300 }, { "epoch": 16.185942313077568, "grad_norm": 0.8781160116195679, "learning_rate": 3.387730290844478e-06, "loss": 0.0181, "step": 173400 }, { "epoch": 16.19527676654532, "grad_norm": 0.600684404373169, "learning_rate": 3.386795099597868e-06, "loss": 0.018, "step": 173500 }, { "epoch": 16.204611220013067, "grad_norm": 2.63281512260437, "learning_rate": 3.3858599083512585e-06, "loss": 0.0211, "step": 173600 }, { "epoch": 16.21394567348082, "grad_norm": 1.6392271518707275, "learning_rate": 3.3849247171046486e-06, "loss": 0.0169, "step": 173700 }, { "epoch": 16.223280126948566, "grad_norm": 1.9677090644836426, "learning_rate": 3.383989525858038e-06, "loss": 0.0145, "step": 173800 }, { "epoch": 16.232614580416318, "grad_norm": 6.78564977645874, "learning_rate": 3.383054334611428e-06, "loss": 0.0151, "step": 173900 }, { "epoch": 16.241949033884065, "grad_norm": 2.281921625137329, "learning_rate": 3.3821191433648182e-06, "loss": 0.0185, "step": 174000 }, { "epoch": 16.251283487351817, "grad_norm": 1.1907693147659302, "learning_rate": 3.3811839521182083e-06, "loss": 0.0183, "step": 174100 }, { "epoch": 16.260617940819564, "grad_norm": 1.3230631351470947, "learning_rate": 3.3802487608715983e-06, "loss": 0.0181, "step": 174200 }, { "epoch": 16.269952394287316, "grad_norm": 1.0672928094863892, "learning_rate": 3.3793135696249883e-06, "loss": 0.0178, "step": 174300 }, { "epoch": 16.279286847755063, "grad_norm": 0.6883413791656494, "learning_rate": 3.3783783783783788e-06, "loss": 0.0186, "step": 174400 }, { "epoch": 16.288621301222815, "grad_norm": 2.0362436771392822, "learning_rate": 3.377443187131769e-06, "loss": 0.0204, "step": 174500 }, { "epoch": 16.297955754690562, "grad_norm": 1.7583487033843994, "learning_rate": 3.376507995885159e-06, "loss": 0.0171, "step": 174600 }, { "epoch": 16.307290208158314, "grad_norm": 1.933211326599121, "learning_rate": 3.375572804638549e-06, "loss": 0.0167, "step": 174700 }, { "epoch": 16.31662466162606, "grad_norm": 3.052903652191162, "learning_rate": 3.374637613391939e-06, "loss": 0.0156, "step": 174800 }, { "epoch": 16.325959115093813, "grad_norm": 4.310525894165039, "learning_rate": 3.373702422145329e-06, "loss": 0.0217, "step": 174900 }, { "epoch": 16.33529356856156, "grad_norm": 2.327129364013672, "learning_rate": 3.372767230898719e-06, "loss": 0.0183, "step": 175000 }, { "epoch": 16.33529356856156, "eval_accuracy": 0.7008634772462077, "eval_f1": 0.826158038147139, "eval_loss": 0.1793038547039032, "eval_roc_auc": 0.9088756785149937, "eval_runtime": 346.8858, "eval_samples_per_second": 123.528, "eval_steps_per_second": 123.528, "step": 175000 }, { "epoch": 16.344628022029312, "grad_norm": 2.8590006828308105, "learning_rate": 3.3718320396521094e-06, "loss": 0.0199, "step": 175100 }, { "epoch": 16.35396247549706, "grad_norm": 1.8983961343765259, "learning_rate": 3.3708968484054994e-06, "loss": 0.0178, "step": 175200 }, { "epoch": 16.363296928964807, "grad_norm": 2.9639477729797363, "learning_rate": 3.3699616571588895e-06, "loss": 0.0188, "step": 175300 }, { "epoch": 16.37263138243256, "grad_norm": 0.18122395873069763, "learning_rate": 3.3690264659122795e-06, "loss": 0.0175, "step": 175400 }, { "epoch": 16.381965835900306, "grad_norm": 5.8461127281188965, "learning_rate": 3.3680912746656695e-06, "loss": 0.0191, "step": 175500 }, { "epoch": 16.391300289368058, "grad_norm": 1.0261869430541992, "learning_rate": 3.367156083419059e-06, "loss": 0.018, "step": 175600 }, { "epoch": 16.400634742835805, "grad_norm": 2.913017749786377, "learning_rate": 3.366220892172449e-06, "loss": 0.0196, "step": 175700 }, { "epoch": 16.409969196303557, "grad_norm": 2.1028690338134766, "learning_rate": 3.365285700925839e-06, "loss": 0.0143, "step": 175800 }, { "epoch": 16.419303649771305, "grad_norm": 2.2933688163757324, "learning_rate": 3.3643505096792297e-06, "loss": 0.0178, "step": 175900 }, { "epoch": 16.428638103239056, "grad_norm": 2.9289793968200684, "learning_rate": 3.3634153184326197e-06, "loss": 0.0167, "step": 176000 }, { "epoch": 16.437972556706804, "grad_norm": 1.834648847579956, "learning_rate": 3.3624801271860097e-06, "loss": 0.0215, "step": 176100 }, { "epoch": 16.447307010174555, "grad_norm": 0.6361536979675293, "learning_rate": 3.3615449359393997e-06, "loss": 0.017, "step": 176200 }, { "epoch": 16.456641463642303, "grad_norm": 1.57742440700531, "learning_rate": 3.3606097446927898e-06, "loss": 0.0166, "step": 176300 }, { "epoch": 16.465975917110054, "grad_norm": 0.8230921030044556, "learning_rate": 3.35967455344618e-06, "loss": 0.0203, "step": 176400 }, { "epoch": 16.4753103705778, "grad_norm": 3.3650989532470703, "learning_rate": 3.3587393621995703e-06, "loss": 0.0149, "step": 176500 }, { "epoch": 16.484644824045553, "grad_norm": 5.282135963439941, "learning_rate": 3.3578041709529603e-06, "loss": 0.0173, "step": 176600 }, { "epoch": 16.4939792775133, "grad_norm": 2.6329779624938965, "learning_rate": 3.3568689797063503e-06, "loss": 0.0201, "step": 176700 }, { "epoch": 16.503313730981052, "grad_norm": 2.4832940101623535, "learning_rate": 3.3559337884597404e-06, "loss": 0.0171, "step": 176800 }, { "epoch": 16.5126481844488, "grad_norm": 2.429884433746338, "learning_rate": 3.3549985972131304e-06, "loss": 0.0185, "step": 176900 }, { "epoch": 16.52198263791655, "grad_norm": 0.7687972187995911, "learning_rate": 3.3540634059665204e-06, "loss": 0.0206, "step": 177000 }, { "epoch": 16.5313170913843, "grad_norm": 2.8897817134857178, "learning_rate": 3.353128214719911e-06, "loss": 0.0172, "step": 177100 }, { "epoch": 16.54065154485205, "grad_norm": 3.4591829776763916, "learning_rate": 3.352193023473301e-06, "loss": 0.0165, "step": 177200 }, { "epoch": 16.549985998319798, "grad_norm": 2.1899421215057373, "learning_rate": 3.351257832226691e-06, "loss": 0.0223, "step": 177300 }, { "epoch": 16.55932045178755, "grad_norm": 2.63464093208313, "learning_rate": 3.350322640980081e-06, "loss": 0.0191, "step": 177400 }, { "epoch": 16.568654905255297, "grad_norm": 2.951556444168091, "learning_rate": 3.3493874497334706e-06, "loss": 0.0164, "step": 177500 }, { "epoch": 16.57798935872305, "grad_norm": 1.2993179559707642, "learning_rate": 3.3484522584868606e-06, "loss": 0.0186, "step": 177600 }, { "epoch": 16.587323812190796, "grad_norm": 0.518732488155365, "learning_rate": 3.3475170672402506e-06, "loss": 0.0195, "step": 177700 }, { "epoch": 16.596658265658547, "grad_norm": 2.5482800006866455, "learning_rate": 3.3465818759936407e-06, "loss": 0.0204, "step": 177800 }, { "epoch": 16.605992719126295, "grad_norm": 2.611234426498413, "learning_rate": 3.345646684747031e-06, "loss": 0.0204, "step": 177900 }, { "epoch": 16.615327172594043, "grad_norm": 1.382502555847168, "learning_rate": 3.344711493500421e-06, "loss": 0.0155, "step": 178000 }, { "epoch": 16.624661626061794, "grad_norm": 1.742380976676941, "learning_rate": 3.343776302253811e-06, "loss": 0.0171, "step": 178100 }, { "epoch": 16.633996079529542, "grad_norm": 0.7645307779312134, "learning_rate": 3.342841111007201e-06, "loss": 0.0182, "step": 178200 }, { "epoch": 16.643330532997293, "grad_norm": 3.854586362838745, "learning_rate": 3.3419059197605912e-06, "loss": 0.0168, "step": 178300 }, { "epoch": 16.65266498646504, "grad_norm": 3.8064870834350586, "learning_rate": 3.3409707285139813e-06, "loss": 0.0166, "step": 178400 }, { "epoch": 16.661999439932792, "grad_norm": 0.4557892084121704, "learning_rate": 3.3400355372673717e-06, "loss": 0.0216, "step": 178500 }, { "epoch": 16.67133389340054, "grad_norm": 3.5564863681793213, "learning_rate": 3.3391003460207618e-06, "loss": 0.0187, "step": 178600 }, { "epoch": 16.68066834686829, "grad_norm": 1.665201187133789, "learning_rate": 3.3381651547741518e-06, "loss": 0.016, "step": 178700 }, { "epoch": 16.69000280033604, "grad_norm": 2.677339792251587, "learning_rate": 3.337229963527542e-06, "loss": 0.0177, "step": 178800 }, { "epoch": 16.69933725380379, "grad_norm": 0.22744332253932953, "learning_rate": 3.336294772280932e-06, "loss": 0.0167, "step": 178900 }, { "epoch": 16.708671707271538, "grad_norm": 1.7743045091629028, "learning_rate": 3.335359581034322e-06, "loss": 0.0187, "step": 179000 }, { "epoch": 16.71800616073929, "grad_norm": 2.6737351417541504, "learning_rate": 3.334424389787712e-06, "loss": 0.0153, "step": 179100 }, { "epoch": 16.727340614207037, "grad_norm": 2.1863090991973877, "learning_rate": 3.3334891985411024e-06, "loss": 0.0187, "step": 179200 }, { "epoch": 16.73667506767479, "grad_norm": 1.7528609037399292, "learning_rate": 3.3325540072944915e-06, "loss": 0.0197, "step": 179300 }, { "epoch": 16.746009521142536, "grad_norm": 1.7390730381011963, "learning_rate": 3.331618816047882e-06, "loss": 0.0172, "step": 179400 }, { "epoch": 16.755343974610287, "grad_norm": 3.174689769744873, "learning_rate": 3.330683624801272e-06, "loss": 0.0154, "step": 179500 }, { "epoch": 16.764678428078035, "grad_norm": 2.7850730419158936, "learning_rate": 3.329748433554662e-06, "loss": 0.0165, "step": 179600 }, { "epoch": 16.774012881545787, "grad_norm": 4.674572467803955, "learning_rate": 3.328813242308052e-06, "loss": 0.0204, "step": 179700 }, { "epoch": 16.783347335013534, "grad_norm": 0.6823003888130188, "learning_rate": 3.327878051061442e-06, "loss": 0.0192, "step": 179800 }, { "epoch": 16.792681788481286, "grad_norm": 1.7792460918426514, "learning_rate": 3.326942859814832e-06, "loss": 0.0191, "step": 179900 }, { "epoch": 16.802016241949033, "grad_norm": 2.0887157917022705, "learning_rate": 3.3260076685682226e-06, "loss": 0.0188, "step": 180000 }, { "epoch": 16.802016241949033, "eval_accuracy": 0.6976196032672112, "eval_f1": 0.8261143046843604, "eval_loss": 0.18182584643363953, "eval_roc_auc": 0.9109792283830018, "eval_runtime": 287.0269, "eval_samples_per_second": 149.289, "eval_steps_per_second": 149.289, "step": 180000 }, { "epoch": 16.811350695416785, "grad_norm": 0.45861196517944336, "learning_rate": 3.3250724773216126e-06, "loss": 0.0157, "step": 180100 }, { "epoch": 16.820685148884532, "grad_norm": 2.4480178356170654, "learning_rate": 3.3241372860750027e-06, "loss": 0.0186, "step": 180200 }, { "epoch": 16.830019602352284, "grad_norm": 0.3643224239349365, "learning_rate": 3.3232020948283927e-06, "loss": 0.0187, "step": 180300 }, { "epoch": 16.83935405582003, "grad_norm": 2.272106170654297, "learning_rate": 3.3222669035817827e-06, "loss": 0.0183, "step": 180400 }, { "epoch": 16.848688509287783, "grad_norm": 0.9754737615585327, "learning_rate": 3.3213317123351728e-06, "loss": 0.0173, "step": 180500 }, { "epoch": 16.85802296275553, "grad_norm": 2.0219507217407227, "learning_rate": 3.320396521088563e-06, "loss": 0.0177, "step": 180600 }, { "epoch": 16.86735741622328, "grad_norm": 0.21214249730110168, "learning_rate": 3.3194613298419532e-06, "loss": 0.0169, "step": 180700 }, { "epoch": 16.87669186969103, "grad_norm": 0.928091287612915, "learning_rate": 3.3185261385953433e-06, "loss": 0.0212, "step": 180800 }, { "epoch": 16.88602632315878, "grad_norm": 1.7916498184204102, "learning_rate": 3.3175909473487333e-06, "loss": 0.0183, "step": 180900 }, { "epoch": 16.89536077662653, "grad_norm": 1.1259974241256714, "learning_rate": 3.3166557561021233e-06, "loss": 0.0192, "step": 181000 }, { "epoch": 16.904695230094276, "grad_norm": 1.6057769060134888, "learning_rate": 3.315720564855513e-06, "loss": 0.0167, "step": 181100 }, { "epoch": 16.914029683562028, "grad_norm": 0.9318802952766418, "learning_rate": 3.314785373608903e-06, "loss": 0.0164, "step": 181200 }, { "epoch": 16.923364137029775, "grad_norm": 2.6623826026916504, "learning_rate": 3.313850182362293e-06, "loss": 0.0197, "step": 181300 }, { "epoch": 16.932698590497527, "grad_norm": 1.9044229984283447, "learning_rate": 3.3129149911156835e-06, "loss": 0.0188, "step": 181400 }, { "epoch": 16.942033043965274, "grad_norm": 5.556858062744141, "learning_rate": 3.3119797998690735e-06, "loss": 0.0178, "step": 181500 }, { "epoch": 16.951367497433026, "grad_norm": 2.5023000240325928, "learning_rate": 3.3110446086224635e-06, "loss": 0.017, "step": 181600 }, { "epoch": 16.960701950900773, "grad_norm": 1.481224775314331, "learning_rate": 3.3101094173758535e-06, "loss": 0.0189, "step": 181700 }, { "epoch": 16.970036404368525, "grad_norm": 3.049241065979004, "learning_rate": 3.3091742261292436e-06, "loss": 0.0191, "step": 181800 }, { "epoch": 16.979370857836273, "grad_norm": 1.4187257289886475, "learning_rate": 3.3082390348826336e-06, "loss": 0.0178, "step": 181900 }, { "epoch": 16.988705311304024, "grad_norm": 1.1761176586151123, "learning_rate": 3.307303843636024e-06, "loss": 0.0219, "step": 182000 }, { "epoch": 16.99803976477177, "grad_norm": 0.3333366811275482, "learning_rate": 3.306368652389414e-06, "loss": 0.0177, "step": 182100 }, { "epoch": 17.007374218239523, "grad_norm": 0.18102015554904938, "learning_rate": 3.305433461142804e-06, "loss": 0.015, "step": 182200 }, { "epoch": 17.01670867170727, "grad_norm": 1.915571928024292, "learning_rate": 3.304498269896194e-06, "loss": 0.0149, "step": 182300 }, { "epoch": 17.026043125175022, "grad_norm": 2.2430026531219482, "learning_rate": 3.303563078649584e-06, "loss": 0.0159, "step": 182400 }, { "epoch": 17.03537757864277, "grad_norm": 3.814424991607666, "learning_rate": 3.3026278874029742e-06, "loss": 0.0165, "step": 182500 }, { "epoch": 17.04471203211052, "grad_norm": 3.2641093730926514, "learning_rate": 3.3016926961563642e-06, "loss": 0.0127, "step": 182600 }, { "epoch": 17.05404648557827, "grad_norm": 0.877919614315033, "learning_rate": 3.3007575049097547e-06, "loss": 0.0144, "step": 182700 }, { "epoch": 17.06338093904602, "grad_norm": 6.073800563812256, "learning_rate": 3.2998223136631447e-06, "loss": 0.0149, "step": 182800 }, { "epoch": 17.072715392513768, "grad_norm": 0.8520061373710632, "learning_rate": 3.2988871224165343e-06, "loss": 0.0166, "step": 182900 }, { "epoch": 17.08204984598152, "grad_norm": 1.3160456418991089, "learning_rate": 3.2979519311699244e-06, "loss": 0.0172, "step": 183000 }, { "epoch": 17.091384299449267, "grad_norm": 1.493266224861145, "learning_rate": 3.2970167399233144e-06, "loss": 0.0143, "step": 183100 }, { "epoch": 17.100718752917018, "grad_norm": 2.561609983444214, "learning_rate": 3.2960815486767044e-06, "loss": 0.0161, "step": 183200 }, { "epoch": 17.110053206384766, "grad_norm": 1.2349138259887695, "learning_rate": 3.2951463574300945e-06, "loss": 0.0171, "step": 183300 }, { "epoch": 17.119387659852517, "grad_norm": 2.122559070587158, "learning_rate": 3.2942111661834845e-06, "loss": 0.0154, "step": 183400 }, { "epoch": 17.128722113320265, "grad_norm": 2.534268617630005, "learning_rate": 3.293275974936875e-06, "loss": 0.0156, "step": 183500 }, { "epoch": 17.138056566788016, "grad_norm": 1.5321375131607056, "learning_rate": 3.292340783690265e-06, "loss": 0.014, "step": 183600 }, { "epoch": 17.147391020255764, "grad_norm": 2.220184564590454, "learning_rate": 3.291405592443655e-06, "loss": 0.0167, "step": 183700 }, { "epoch": 17.15672547372351, "grad_norm": 3.950584888458252, "learning_rate": 3.290470401197045e-06, "loss": 0.0186, "step": 183800 }, { "epoch": 17.166059927191263, "grad_norm": 3.121062994003296, "learning_rate": 3.289535209950435e-06, "loss": 0.0147, "step": 183900 }, { "epoch": 17.17539438065901, "grad_norm": 1.5267720222473145, "learning_rate": 3.288600018703825e-06, "loss": 0.0147, "step": 184000 }, { "epoch": 17.184728834126762, "grad_norm": 1.0809935331344604, "learning_rate": 3.2876648274572156e-06, "loss": 0.0144, "step": 184100 }, { "epoch": 17.19406328759451, "grad_norm": 1.5286049842834473, "learning_rate": 3.2867296362106056e-06, "loss": 0.0145, "step": 184200 }, { "epoch": 17.20339774106226, "grad_norm": 0.6808598637580872, "learning_rate": 3.2857944449639956e-06, "loss": 0.0157, "step": 184300 }, { "epoch": 17.21273219453001, "grad_norm": 0.5860894918441772, "learning_rate": 3.2848592537173856e-06, "loss": 0.0162, "step": 184400 }, { "epoch": 17.22206664799776, "grad_norm": 1.346124529838562, "learning_rate": 3.2839240624707757e-06, "loss": 0.0177, "step": 184500 }, { "epoch": 17.231401101465508, "grad_norm": 3.370781183242798, "learning_rate": 3.2829888712241657e-06, "loss": 0.015, "step": 184600 }, { "epoch": 17.24073555493326, "grad_norm": 0.6089754104614258, "learning_rate": 3.2820536799775553e-06, "loss": 0.0164, "step": 184700 }, { "epoch": 17.250070008401007, "grad_norm": 1.693347454071045, "learning_rate": 3.2811184887309453e-06, "loss": 0.0155, "step": 184800 }, { "epoch": 17.25940446186876, "grad_norm": 2.980039596557617, "learning_rate": 3.280183297484336e-06, "loss": 0.0154, "step": 184900 }, { "epoch": 17.268738915336506, "grad_norm": 2.218909978866577, "learning_rate": 3.279248106237726e-06, "loss": 0.0185, "step": 185000 }, { "epoch": 17.268738915336506, "eval_accuracy": 0.6979696616102684, "eval_f1": 0.8249149944504149, "eval_loss": 0.19261440634727478, "eval_roc_auc": 0.907408451088048, "eval_runtime": 241.5108, "eval_samples_per_second": 177.425, "eval_steps_per_second": 177.425, "step": 185000 }, { "epoch": 17.278073368804257, "grad_norm": 1.3936904668807983, "learning_rate": 3.278312914991116e-06, "loss": 0.0143, "step": 185100 }, { "epoch": 17.287407822272005, "grad_norm": 0.15477506816387177, "learning_rate": 3.277377723744506e-06, "loss": 0.0173, "step": 185200 }, { "epoch": 17.296742275739756, "grad_norm": 0.04629424959421158, "learning_rate": 3.276442532497896e-06, "loss": 0.0153, "step": 185300 }, { "epoch": 17.306076729207504, "grad_norm": 1.9632600545883179, "learning_rate": 3.275507341251286e-06, "loss": 0.0169, "step": 185400 }, { "epoch": 17.315411182675255, "grad_norm": 2.64042067527771, "learning_rate": 3.2745721500046764e-06, "loss": 0.0164, "step": 185500 }, { "epoch": 17.324745636143003, "grad_norm": 3.5198421478271484, "learning_rate": 3.2736369587580664e-06, "loss": 0.0152, "step": 185600 }, { "epoch": 17.334080089610755, "grad_norm": 1.2429661750793457, "learning_rate": 3.2727017675114565e-06, "loss": 0.0153, "step": 185700 }, { "epoch": 17.343414543078502, "grad_norm": 3.4199001789093018, "learning_rate": 3.2717665762648465e-06, "loss": 0.0145, "step": 185800 }, { "epoch": 17.352748996546254, "grad_norm": 3.340757131576538, "learning_rate": 3.2708313850182365e-06, "loss": 0.0179, "step": 185900 }, { "epoch": 17.362083450014, "grad_norm": 1.8042428493499756, "learning_rate": 3.2698961937716266e-06, "loss": 0.0149, "step": 186000 }, { "epoch": 17.371417903481753, "grad_norm": 2.3757448196411133, "learning_rate": 3.268961002525017e-06, "loss": 0.0154, "step": 186100 }, { "epoch": 17.3807523569495, "grad_norm": 0.8245157599449158, "learning_rate": 3.268025811278407e-06, "loss": 0.0177, "step": 186200 }, { "epoch": 17.39008681041725, "grad_norm": 3.102368116378784, "learning_rate": 3.267090620031797e-06, "loss": 0.0152, "step": 186300 }, { "epoch": 17.399421263885, "grad_norm": 1.51011061668396, "learning_rate": 3.266155428785187e-06, "loss": 0.0148, "step": 186400 }, { "epoch": 17.40875571735275, "grad_norm": 0.7103033661842346, "learning_rate": 3.2652202375385767e-06, "loss": 0.0155, "step": 186500 }, { "epoch": 17.4180901708205, "grad_norm": 1.1427134275436401, "learning_rate": 3.2642850462919667e-06, "loss": 0.0173, "step": 186600 }, { "epoch": 17.427424624288246, "grad_norm": 3.307443857192993, "learning_rate": 3.2633498550453568e-06, "loss": 0.0183, "step": 186700 }, { "epoch": 17.436759077755998, "grad_norm": 1.4269607067108154, "learning_rate": 3.262414663798747e-06, "loss": 0.0137, "step": 186800 }, { "epoch": 17.446093531223745, "grad_norm": 0.8508163690567017, "learning_rate": 3.261479472552137e-06, "loss": 0.0172, "step": 186900 }, { "epoch": 17.455427984691497, "grad_norm": 0.8271816372871399, "learning_rate": 3.2605442813055273e-06, "loss": 0.013, "step": 187000 }, { "epoch": 17.464762438159244, "grad_norm": 1.7666863203048706, "learning_rate": 3.2596090900589173e-06, "loss": 0.0144, "step": 187100 }, { "epoch": 17.474096891626996, "grad_norm": 0.03663098067045212, "learning_rate": 3.2586738988123074e-06, "loss": 0.0159, "step": 187200 }, { "epoch": 17.483431345094743, "grad_norm": 3.540508270263672, "learning_rate": 3.2577387075656974e-06, "loss": 0.0146, "step": 187300 }, { "epoch": 17.492765798562495, "grad_norm": 1.3400294780731201, "learning_rate": 3.2568035163190874e-06, "loss": 0.0149, "step": 187400 }, { "epoch": 17.502100252030242, "grad_norm": 1.362966775894165, "learning_rate": 3.2558683250724774e-06, "loss": 0.0175, "step": 187500 }, { "epoch": 17.511434705497994, "grad_norm": 7.40570592880249, "learning_rate": 3.254933133825868e-06, "loss": 0.0152, "step": 187600 }, { "epoch": 17.52076915896574, "grad_norm": 2.3918066024780273, "learning_rate": 3.253997942579258e-06, "loss": 0.0182, "step": 187700 }, { "epoch": 17.530103612433493, "grad_norm": 2.2025809288024902, "learning_rate": 3.253062751332648e-06, "loss": 0.0162, "step": 187800 }, { "epoch": 17.53943806590124, "grad_norm": 0.2713797390460968, "learning_rate": 3.252127560086038e-06, "loss": 0.016, "step": 187900 }, { "epoch": 17.548772519368992, "grad_norm": 1.3609062433242798, "learning_rate": 3.251192368839428e-06, "loss": 0.0166, "step": 188000 }, { "epoch": 17.55810697283674, "grad_norm": 2.1187899112701416, "learning_rate": 3.250257177592818e-06, "loss": 0.0161, "step": 188100 }, { "epoch": 17.56744142630449, "grad_norm": 0.6589877009391785, "learning_rate": 3.2493219863462085e-06, "loss": 0.014, "step": 188200 }, { "epoch": 17.57677587977224, "grad_norm": 4.7381086349487305, "learning_rate": 3.2483867950995977e-06, "loss": 0.0152, "step": 188300 }, { "epoch": 17.58611033323999, "grad_norm": 2.0269935131073, "learning_rate": 3.247451603852988e-06, "loss": 0.0155, "step": 188400 }, { "epoch": 17.595444786707738, "grad_norm": 2.486924409866333, "learning_rate": 3.246516412606378e-06, "loss": 0.0162, "step": 188500 }, { "epoch": 17.60477924017549, "grad_norm": 0.8726558685302734, "learning_rate": 3.245581221359768e-06, "loss": 0.0155, "step": 188600 }, { "epoch": 17.614113693643237, "grad_norm": 0.0674937516450882, "learning_rate": 3.2446460301131582e-06, "loss": 0.013, "step": 188700 }, { "epoch": 17.623448147110988, "grad_norm": 0.8111122250556946, "learning_rate": 3.2437108388665483e-06, "loss": 0.016, "step": 188800 }, { "epoch": 17.632782600578736, "grad_norm": 2.0692927837371826, "learning_rate": 3.2427756476199383e-06, "loss": 0.0173, "step": 188900 }, { "epoch": 17.642117054046487, "grad_norm": 2.630054235458374, "learning_rate": 3.2418404563733287e-06, "loss": 0.0161, "step": 189000 }, { "epoch": 17.651451507514235, "grad_norm": 9.102375984191895, "learning_rate": 3.2409052651267188e-06, "loss": 0.0177, "step": 189100 }, { "epoch": 17.660785960981986, "grad_norm": 7.8840789794921875, "learning_rate": 3.239970073880109e-06, "loss": 0.0163, "step": 189200 }, { "epoch": 17.670120414449734, "grad_norm": 0.052358876913785934, "learning_rate": 3.239034882633499e-06, "loss": 0.0154, "step": 189300 }, { "epoch": 17.679454867917485, "grad_norm": 14.023784637451172, "learning_rate": 3.238099691386889e-06, "loss": 0.0152, "step": 189400 }, { "epoch": 17.688789321385233, "grad_norm": 0.3715645372867584, "learning_rate": 3.237164500140279e-06, "loss": 0.014, "step": 189500 }, { "epoch": 17.69812377485298, "grad_norm": 1.2945834398269653, "learning_rate": 3.2362293088936694e-06, "loss": 0.0158, "step": 189600 }, { "epoch": 17.707458228320732, "grad_norm": 2.4024267196655273, "learning_rate": 3.2352941176470594e-06, "loss": 0.0182, "step": 189700 }, { "epoch": 17.71679268178848, "grad_norm": 0.40837693214416504, "learning_rate": 3.2343589264004494e-06, "loss": 0.0133, "step": 189800 }, { "epoch": 17.72612713525623, "grad_norm": 0.9932096600532532, "learning_rate": 3.2334237351538394e-06, "loss": 0.0194, "step": 189900 }, { "epoch": 17.73546158872398, "grad_norm": 2.048612117767334, "learning_rate": 3.2324885439072295e-06, "loss": 0.0161, "step": 190000 }, { "epoch": 17.73546158872398, "eval_accuracy": 0.6972928821470245, "eval_f1": 0.8243483749923287, "eval_loss": 0.19110000133514404, "eval_roc_auc": 0.9087290040664636, "eval_runtime": 234.4344, "eval_samples_per_second": 182.78, "eval_steps_per_second": 182.78, "step": 190000 }, { "epoch": 17.74479604219173, "grad_norm": 1.871202826499939, "learning_rate": 3.231553352660619e-06, "loss": 0.0158, "step": 190100 }, { "epoch": 17.754130495659478, "grad_norm": 1.2344913482666016, "learning_rate": 3.230618161414009e-06, "loss": 0.0158, "step": 190200 }, { "epoch": 17.76346494912723, "grad_norm": 0.2932121157646179, "learning_rate": 3.229682970167399e-06, "loss": 0.0155, "step": 190300 }, { "epoch": 17.772799402594977, "grad_norm": 1.8711097240447998, "learning_rate": 3.228747778920789e-06, "loss": 0.017, "step": 190400 }, { "epoch": 17.78213385606273, "grad_norm": 2.22102689743042, "learning_rate": 3.2278125876741796e-06, "loss": 0.0143, "step": 190500 }, { "epoch": 17.791468309530476, "grad_norm": 1.5045928955078125, "learning_rate": 3.2268773964275697e-06, "loss": 0.0174, "step": 190600 }, { "epoch": 17.800802762998227, "grad_norm": 1.049522876739502, "learning_rate": 3.2259422051809597e-06, "loss": 0.0177, "step": 190700 }, { "epoch": 17.810137216465975, "grad_norm": 2.0110294818878174, "learning_rate": 3.2250070139343497e-06, "loss": 0.0138, "step": 190800 }, { "epoch": 17.819471669933726, "grad_norm": 1.8813849687576294, "learning_rate": 3.2240718226877398e-06, "loss": 0.0158, "step": 190900 }, { "epoch": 17.828806123401474, "grad_norm": 1.9658849239349365, "learning_rate": 3.2231366314411298e-06, "loss": 0.019, "step": 191000 }, { "epoch": 17.838140576869225, "grad_norm": 0.2762412130832672, "learning_rate": 3.2222014401945202e-06, "loss": 0.0155, "step": 191100 }, { "epoch": 17.847475030336973, "grad_norm": 1.5228303670883179, "learning_rate": 3.2212662489479103e-06, "loss": 0.0144, "step": 191200 }, { "epoch": 17.856809483804724, "grad_norm": 0.8548043966293335, "learning_rate": 3.2203310577013003e-06, "loss": 0.0164, "step": 191300 }, { "epoch": 17.866143937272472, "grad_norm": 3.041515827178955, "learning_rate": 3.2193958664546903e-06, "loss": 0.0161, "step": 191400 }, { "epoch": 17.875478390740223, "grad_norm": 2.113831043243408, "learning_rate": 3.2184606752080804e-06, "loss": 0.0159, "step": 191500 }, { "epoch": 17.88481284420797, "grad_norm": 3.8627421855926514, "learning_rate": 3.2175254839614704e-06, "loss": 0.0186, "step": 191600 }, { "epoch": 17.894147297675723, "grad_norm": 4.1863112449646, "learning_rate": 3.216590292714861e-06, "loss": 0.0161, "step": 191700 }, { "epoch": 17.90348175114347, "grad_norm": 2.047816753387451, "learning_rate": 3.215655101468251e-06, "loss": 0.0163, "step": 191800 }, { "epoch": 17.91281620461122, "grad_norm": 1.73228919506073, "learning_rate": 3.2147199102216405e-06, "loss": 0.015, "step": 191900 }, { "epoch": 17.92215065807897, "grad_norm": 2.8653736114501953, "learning_rate": 3.2137847189750305e-06, "loss": 0.0152, "step": 192000 }, { "epoch": 17.93148511154672, "grad_norm": 3.2864904403686523, "learning_rate": 3.2128495277284205e-06, "loss": 0.0168, "step": 192100 }, { "epoch": 17.94081956501447, "grad_norm": 4.56441593170166, "learning_rate": 3.2119143364818106e-06, "loss": 0.0139, "step": 192200 }, { "epoch": 17.950154018482216, "grad_norm": 2.940105438232422, "learning_rate": 3.2109791452352006e-06, "loss": 0.0172, "step": 192300 }, { "epoch": 17.959488471949967, "grad_norm": 1.6837762594223022, "learning_rate": 3.2100439539885906e-06, "loss": 0.0189, "step": 192400 }, { "epoch": 17.968822925417715, "grad_norm": 1.6740742921829224, "learning_rate": 3.209108762741981e-06, "loss": 0.0177, "step": 192500 }, { "epoch": 17.978157378885466, "grad_norm": 1.7635782957077026, "learning_rate": 3.208173571495371e-06, "loss": 0.015, "step": 192600 }, { "epoch": 17.987491832353214, "grad_norm": 3.1810507774353027, "learning_rate": 3.207238380248761e-06, "loss": 0.0144, "step": 192700 }, { "epoch": 17.996826285820966, "grad_norm": 0.8500552177429199, "learning_rate": 3.206303189002151e-06, "loss": 0.0136, "step": 192800 }, { "epoch": 18.006160739288713, "grad_norm": 5.76578950881958, "learning_rate": 3.2053679977555412e-06, "loss": 0.0134, "step": 192900 }, { "epoch": 18.015495192756465, "grad_norm": 1.0770704746246338, "learning_rate": 3.2044328065089312e-06, "loss": 0.0128, "step": 193000 }, { "epoch": 18.024829646224212, "grad_norm": 2.0020592212677, "learning_rate": 3.2034976152623217e-06, "loss": 0.0145, "step": 193100 }, { "epoch": 18.034164099691964, "grad_norm": 1.18739652633667, "learning_rate": 3.2025624240157117e-06, "loss": 0.0157, "step": 193200 }, { "epoch": 18.04349855315971, "grad_norm": 0.2606801986694336, "learning_rate": 3.2016272327691018e-06, "loss": 0.0145, "step": 193300 }, { "epoch": 18.052833006627463, "grad_norm": 1.1845996379852295, "learning_rate": 3.200692041522492e-06, "loss": 0.0106, "step": 193400 }, { "epoch": 18.06216746009521, "grad_norm": 0.8093076348304749, "learning_rate": 3.199756850275882e-06, "loss": 0.0148, "step": 193500 }, { "epoch": 18.07150191356296, "grad_norm": 4.37527322769165, "learning_rate": 3.198821659029272e-06, "loss": 0.0119, "step": 193600 }, { "epoch": 18.08083636703071, "grad_norm": 0.13032008707523346, "learning_rate": 3.1978864677826615e-06, "loss": 0.0123, "step": 193700 }, { "epoch": 18.09017082049846, "grad_norm": 2.1523468494415283, "learning_rate": 3.1969512765360515e-06, "loss": 0.0139, "step": 193800 }, { "epoch": 18.09950527396621, "grad_norm": 1.7620820999145508, "learning_rate": 3.196016085289442e-06, "loss": 0.0137, "step": 193900 }, { "epoch": 18.10883972743396, "grad_norm": 0.6018241047859192, "learning_rate": 3.195080894042832e-06, "loss": 0.0132, "step": 194000 }, { "epoch": 18.118174180901708, "grad_norm": 2.9260263442993164, "learning_rate": 3.194145702796222e-06, "loss": 0.0122, "step": 194100 }, { "epoch": 18.12750863436946, "grad_norm": 0.7378761768341064, "learning_rate": 3.193210511549612e-06, "loss": 0.0149, "step": 194200 }, { "epoch": 18.136843087837207, "grad_norm": 2.704927444458008, "learning_rate": 3.192275320303002e-06, "loss": 0.0133, "step": 194300 }, { "epoch": 18.146177541304958, "grad_norm": 1.3752076625823975, "learning_rate": 3.191340129056392e-06, "loss": 0.0166, "step": 194400 }, { "epoch": 18.155511994772706, "grad_norm": 2.4076263904571533, "learning_rate": 3.190404937809782e-06, "loss": 0.0133, "step": 194500 }, { "epoch": 18.164846448240457, "grad_norm": 4.021129608154297, "learning_rate": 3.1894697465631726e-06, "loss": 0.0132, "step": 194600 }, { "epoch": 18.174180901708205, "grad_norm": 4.0093512535095215, "learning_rate": 3.1885345553165626e-06, "loss": 0.0135, "step": 194700 }, { "epoch": 18.183515355175956, "grad_norm": 3.0196726322174072, "learning_rate": 3.1875993640699526e-06, "loss": 0.0141, "step": 194800 }, { "epoch": 18.192849808643704, "grad_norm": 0.559459924697876, "learning_rate": 3.1866641728233427e-06, "loss": 0.0153, "step": 194900 }, { "epoch": 18.202184262111455, "grad_norm": 0.2976052463054657, "learning_rate": 3.1857289815767327e-06, "loss": 0.0147, "step": 195000 }, { "epoch": 18.202184262111455, "eval_accuracy": 0.6971061843640607, "eval_f1": 0.824973123213677, "eval_loss": 0.202330082654953, "eval_roc_auc": 0.9101552788937225, "eval_runtime": 263.6115, "eval_samples_per_second": 162.55, "eval_steps_per_second": 162.55, "step": 195000 }, { "epoch": 18.211518715579203, "grad_norm": 1.1147135496139526, "learning_rate": 3.1847937903301227e-06, "loss": 0.0156, "step": 195100 }, { "epoch": 18.22085316904695, "grad_norm": 3.363304853439331, "learning_rate": 3.183858599083513e-06, "loss": 0.0139, "step": 195200 }, { "epoch": 18.230187622514702, "grad_norm": 2.590541362762451, "learning_rate": 3.1829234078369032e-06, "loss": 0.0136, "step": 195300 }, { "epoch": 18.23952207598245, "grad_norm": 1.030617117881775, "learning_rate": 3.1819882165902933e-06, "loss": 0.0139, "step": 195400 }, { "epoch": 18.2488565294502, "grad_norm": 0.6613557934761047, "learning_rate": 3.1810530253436833e-06, "loss": 0.0137, "step": 195500 }, { "epoch": 18.25819098291795, "grad_norm": 2.38324236869812, "learning_rate": 3.180117834097073e-06, "loss": 0.0155, "step": 195600 }, { "epoch": 18.2675254363857, "grad_norm": 2.766449213027954, "learning_rate": 3.179182642850463e-06, "loss": 0.0133, "step": 195700 }, { "epoch": 18.276859889853448, "grad_norm": 0.2860342562198639, "learning_rate": 3.178247451603853e-06, "loss": 0.011, "step": 195800 }, { "epoch": 18.2861943433212, "grad_norm": 2.215444564819336, "learning_rate": 3.177312260357243e-06, "loss": 0.0144, "step": 195900 }, { "epoch": 18.295528796788947, "grad_norm": 1.807881474494934, "learning_rate": 3.1763770691106334e-06, "loss": 0.0132, "step": 196000 }, { "epoch": 18.304863250256698, "grad_norm": 0.41217857599258423, "learning_rate": 3.1754418778640235e-06, "loss": 0.0136, "step": 196100 }, { "epoch": 18.314197703724446, "grad_norm": 1.1333640813827515, "learning_rate": 3.1745066866174135e-06, "loss": 0.0138, "step": 196200 }, { "epoch": 18.323532157192197, "grad_norm": 3.2474825382232666, "learning_rate": 3.1735714953708035e-06, "loss": 0.0125, "step": 196300 }, { "epoch": 18.332866610659945, "grad_norm": 2.8285465240478516, "learning_rate": 3.1726363041241936e-06, "loss": 0.0134, "step": 196400 }, { "epoch": 18.342201064127696, "grad_norm": 1.5453355312347412, "learning_rate": 3.1717011128775836e-06, "loss": 0.0117, "step": 196500 }, { "epoch": 18.351535517595444, "grad_norm": 3.8479137420654297, "learning_rate": 3.170765921630974e-06, "loss": 0.0141, "step": 196600 }, { "epoch": 18.360869971063195, "grad_norm": 0.4151049256324768, "learning_rate": 3.169830730384364e-06, "loss": 0.0136, "step": 196700 }, { "epoch": 18.370204424530943, "grad_norm": 0.42008137702941895, "learning_rate": 3.168895539137754e-06, "loss": 0.0166, "step": 196800 }, { "epoch": 18.379538877998694, "grad_norm": 2.587353467941284, "learning_rate": 3.167960347891144e-06, "loss": 0.0157, "step": 196900 }, { "epoch": 18.388873331466442, "grad_norm": 1.909388542175293, "learning_rate": 3.167025156644534e-06, "loss": 0.0155, "step": 197000 }, { "epoch": 18.398207784934193, "grad_norm": 0.33268991112709045, "learning_rate": 3.166089965397924e-06, "loss": 0.0129, "step": 197100 }, { "epoch": 18.40754223840194, "grad_norm": 0.6705350875854492, "learning_rate": 3.1651547741513147e-06, "loss": 0.014, "step": 197200 }, { "epoch": 18.416876691869692, "grad_norm": 0.048088543117046356, "learning_rate": 3.1642195829047047e-06, "loss": 0.0127, "step": 197300 }, { "epoch": 18.42621114533744, "grad_norm": 0.33118194341659546, "learning_rate": 3.1632843916580943e-06, "loss": 0.0151, "step": 197400 }, { "epoch": 18.43554559880519, "grad_norm": 1.4365428686141968, "learning_rate": 3.1623492004114843e-06, "loss": 0.0143, "step": 197500 }, { "epoch": 18.44488005227294, "grad_norm": 3.3532814979553223, "learning_rate": 3.1614140091648743e-06, "loss": 0.0122, "step": 197600 }, { "epoch": 18.45421450574069, "grad_norm": 3.4825496673583984, "learning_rate": 3.1604788179182644e-06, "loss": 0.0126, "step": 197700 }, { "epoch": 18.46354895920844, "grad_norm": 0.6478165984153748, "learning_rate": 3.1595436266716544e-06, "loss": 0.0133, "step": 197800 }, { "epoch": 18.47288341267619, "grad_norm": 0.03805939108133316, "learning_rate": 3.1586084354250444e-06, "loss": 0.0124, "step": 197900 }, { "epoch": 18.482217866143937, "grad_norm": 0.4500657021999359, "learning_rate": 3.1576732441784345e-06, "loss": 0.0137, "step": 198000 }, { "epoch": 18.491552319611685, "grad_norm": 0.05304978787899017, "learning_rate": 3.156738052931825e-06, "loss": 0.0149, "step": 198100 }, { "epoch": 18.500886773079436, "grad_norm": 2.1249818801879883, "learning_rate": 3.155802861685215e-06, "loss": 0.0131, "step": 198200 }, { "epoch": 18.510221226547184, "grad_norm": 1.834684133529663, "learning_rate": 3.154867670438605e-06, "loss": 0.0132, "step": 198300 }, { "epoch": 18.519555680014935, "grad_norm": 2.1202757358551025, "learning_rate": 3.153932479191995e-06, "loss": 0.0117, "step": 198400 }, { "epoch": 18.528890133482683, "grad_norm": 0.5491761565208435, "learning_rate": 3.152997287945385e-06, "loss": 0.0144, "step": 198500 }, { "epoch": 18.538224586950435, "grad_norm": 0.14039498567581177, "learning_rate": 3.152062096698775e-06, "loss": 0.0157, "step": 198600 }, { "epoch": 18.547559040418182, "grad_norm": 0.2018619179725647, "learning_rate": 3.1511269054521655e-06, "loss": 0.0148, "step": 198700 }, { "epoch": 18.556893493885934, "grad_norm": 1.9849281311035156, "learning_rate": 3.1501917142055556e-06, "loss": 0.0136, "step": 198800 }, { "epoch": 18.56622794735368, "grad_norm": 2.995671272277832, "learning_rate": 3.1492565229589456e-06, "loss": 0.0137, "step": 198900 }, { "epoch": 18.575562400821433, "grad_norm": 1.6283726692199707, "learning_rate": 3.1483213317123356e-06, "loss": 0.0131, "step": 199000 }, { "epoch": 18.58489685428918, "grad_norm": 2.0294060707092285, "learning_rate": 3.1473861404657257e-06, "loss": 0.0137, "step": 199100 }, { "epoch": 18.59423130775693, "grad_norm": 2.2314114570617676, "learning_rate": 3.1464509492191153e-06, "loss": 0.0147, "step": 199200 }, { "epoch": 18.60356576122468, "grad_norm": 3.9055280685424805, "learning_rate": 3.1455157579725053e-06, "loss": 0.0135, "step": 199300 }, { "epoch": 18.61290021469243, "grad_norm": 4.530161380767822, "learning_rate": 3.1445805667258953e-06, "loss": 0.014, "step": 199400 }, { "epoch": 18.62223466816018, "grad_norm": 2.286137104034424, "learning_rate": 3.1436453754792858e-06, "loss": 0.0156, "step": 199500 }, { "epoch": 18.63156912162793, "grad_norm": 0.9268496632575989, "learning_rate": 3.142710184232676e-06, "loss": 0.0167, "step": 199600 }, { "epoch": 18.640903575095678, "grad_norm": 2.1450774669647217, "learning_rate": 3.141774992986066e-06, "loss": 0.0128, "step": 199700 }, { "epoch": 18.65023802856343, "grad_norm": 1.4313608407974243, "learning_rate": 3.140839801739456e-06, "loss": 0.0143, "step": 199800 }, { "epoch": 18.659572482031177, "grad_norm": 0.04734492674469948, "learning_rate": 3.139904610492846e-06, "loss": 0.0173, "step": 199900 }, { "epoch": 18.668906935498928, "grad_norm": 1.721450924873352, "learning_rate": 3.138969419246236e-06, "loss": 0.0131, "step": 200000 }, { "epoch": 18.668906935498928, "eval_accuracy": 0.698343057176196, "eval_f1": 0.824125794533261, "eval_loss": 0.2038935124874115, "eval_roc_auc": 0.9090800727337026, "eval_runtime": 305.6026, "eval_samples_per_second": 140.215, "eval_steps_per_second": 140.215, "step": 200000 }, { "epoch": 18.678241388966676, "grad_norm": 3.7955429553985596, "learning_rate": 3.1380342279996264e-06, "loss": 0.013, "step": 200100 }, { "epoch": 18.687575842434427, "grad_norm": 0.5376506447792053, "learning_rate": 3.1370990367530164e-06, "loss": 0.0143, "step": 200200 }, { "epoch": 18.696910295902175, "grad_norm": 1.4311047792434692, "learning_rate": 3.1361638455064064e-06, "loss": 0.0136, "step": 200300 }, { "epoch": 18.706244749369926, "grad_norm": 0.5777319669723511, "learning_rate": 3.1352286542597965e-06, "loss": 0.0165, "step": 200400 }, { "epoch": 18.715579202837674, "grad_norm": 0.948178768157959, "learning_rate": 3.1342934630131865e-06, "loss": 0.0143, "step": 200500 }, { "epoch": 18.724913656305425, "grad_norm": 4.82988166809082, "learning_rate": 3.1333582717665765e-06, "loss": 0.016, "step": 200600 }, { "epoch": 18.734248109773173, "grad_norm": 5.463191986083984, "learning_rate": 3.132423080519967e-06, "loss": 0.0142, "step": 200700 }, { "epoch": 18.74358256324092, "grad_norm": 0.07724849879741669, "learning_rate": 3.131487889273357e-06, "loss": 0.0166, "step": 200800 }, { "epoch": 18.752917016708672, "grad_norm": 0.7019322514533997, "learning_rate": 3.130552698026747e-06, "loss": 0.0143, "step": 200900 }, { "epoch": 18.76225147017642, "grad_norm": 1.9100068807601929, "learning_rate": 3.1296175067801367e-06, "loss": 0.0139, "step": 201000 }, { "epoch": 18.77158592364417, "grad_norm": 0.06793493032455444, "learning_rate": 3.1286823155335267e-06, "loss": 0.0148, "step": 201100 }, { "epoch": 18.78092037711192, "grad_norm": 2.6353721618652344, "learning_rate": 3.1277471242869167e-06, "loss": 0.016, "step": 201200 }, { "epoch": 18.79025483057967, "grad_norm": 0.875765323638916, "learning_rate": 3.1268119330403068e-06, "loss": 0.0126, "step": 201300 }, { "epoch": 18.799589284047418, "grad_norm": 3.1786370277404785, "learning_rate": 3.1258767417936968e-06, "loss": 0.0138, "step": 201400 }, { "epoch": 18.80892373751517, "grad_norm": 0.9562956690788269, "learning_rate": 3.1249415505470872e-06, "loss": 0.0137, "step": 201500 }, { "epoch": 18.818258190982917, "grad_norm": 0.5332959890365601, "learning_rate": 3.1240063593004773e-06, "loss": 0.012, "step": 201600 }, { "epoch": 18.827592644450668, "grad_norm": 4.556670665740967, "learning_rate": 3.1230711680538673e-06, "loss": 0.0136, "step": 201700 }, { "epoch": 18.836927097918416, "grad_norm": 0.7207038402557373, "learning_rate": 3.1221359768072573e-06, "loss": 0.0145, "step": 201800 }, { "epoch": 18.846261551386167, "grad_norm": 2.964127540588379, "learning_rate": 3.1212007855606474e-06, "loss": 0.0149, "step": 201900 }, { "epoch": 18.855596004853915, "grad_norm": 0.485819935798645, "learning_rate": 3.1202655943140374e-06, "loss": 0.0192, "step": 202000 }, { "epoch": 18.864930458321666, "grad_norm": 3.1362600326538086, "learning_rate": 3.1193304030674274e-06, "loss": 0.0146, "step": 202100 }, { "epoch": 18.874264911789414, "grad_norm": 0.6100403070449829, "learning_rate": 3.118395211820818e-06, "loss": 0.0161, "step": 202200 }, { "epoch": 18.883599365257165, "grad_norm": 0.012918166816234589, "learning_rate": 3.117460020574208e-06, "loss": 0.0152, "step": 202300 }, { "epoch": 18.892933818724913, "grad_norm": 0.6768700480461121, "learning_rate": 3.116524829327598e-06, "loss": 0.0144, "step": 202400 }, { "epoch": 18.902268272192664, "grad_norm": 2.3280582427978516, "learning_rate": 3.115589638080988e-06, "loss": 0.0142, "step": 202500 }, { "epoch": 18.911602725660412, "grad_norm": 0.2881123125553131, "learning_rate": 3.114654446834378e-06, "loss": 0.0123, "step": 202600 }, { "epoch": 18.920937179128163, "grad_norm": 2.567854404449463, "learning_rate": 3.113719255587768e-06, "loss": 0.0156, "step": 202700 }, { "epoch": 18.93027163259591, "grad_norm": 3.5037574768066406, "learning_rate": 3.1127840643411576e-06, "loss": 0.0169, "step": 202800 }, { "epoch": 18.939606086063662, "grad_norm": 1.5127900838851929, "learning_rate": 3.1118488730945477e-06, "loss": 0.0152, "step": 202900 }, { "epoch": 18.94894053953141, "grad_norm": 1.8825160264968872, "learning_rate": 3.110913681847938e-06, "loss": 0.0158, "step": 203000 }, { "epoch": 18.95827499299916, "grad_norm": 2.250319242477417, "learning_rate": 3.109978490601328e-06, "loss": 0.014, "step": 203100 }, { "epoch": 18.96760944646691, "grad_norm": 0.2977730333805084, "learning_rate": 3.109043299354718e-06, "loss": 0.0131, "step": 203200 }, { "epoch": 18.97694389993466, "grad_norm": 2.423746347427368, "learning_rate": 3.1081081081081082e-06, "loss": 0.0145, "step": 203300 }, { "epoch": 18.986278353402408, "grad_norm": 2.175414562225342, "learning_rate": 3.1071729168614982e-06, "loss": 0.0144, "step": 203400 }, { "epoch": 18.995612806870156, "grad_norm": 0.4847501516342163, "learning_rate": 3.1062377256148883e-06, "loss": 0.0151, "step": 203500 }, { "epoch": 19.004947260337907, "grad_norm": 1.7288622856140137, "learning_rate": 3.1053025343682787e-06, "loss": 0.0115, "step": 203600 }, { "epoch": 19.014281713805655, "grad_norm": 0.09647651016712189, "learning_rate": 3.1043673431216688e-06, "loss": 0.0135, "step": 203700 }, { "epoch": 19.023616167273406, "grad_norm": 1.2967782020568848, "learning_rate": 3.103432151875059e-06, "loss": 0.0134, "step": 203800 }, { "epoch": 19.032950620741154, "grad_norm": 1.5363342761993408, "learning_rate": 3.102496960628449e-06, "loss": 0.0118, "step": 203900 }, { "epoch": 19.042285074208905, "grad_norm": 0.6508591771125793, "learning_rate": 3.101561769381839e-06, "loss": 0.0116, "step": 204000 }, { "epoch": 19.051619527676653, "grad_norm": 1.627902626991272, "learning_rate": 3.100626578135229e-06, "loss": 0.0128, "step": 204100 }, { "epoch": 19.060953981144404, "grad_norm": 2.2799723148345947, "learning_rate": 3.0996913868886193e-06, "loss": 0.0112, "step": 204200 }, { "epoch": 19.070288434612152, "grad_norm": 1.6323964595794678, "learning_rate": 3.0987561956420094e-06, "loss": 0.0158, "step": 204300 }, { "epoch": 19.079622888079903, "grad_norm": 1.6610661745071411, "learning_rate": 3.0978210043953994e-06, "loss": 0.0118, "step": 204400 }, { "epoch": 19.08895734154765, "grad_norm": 3.584804058074951, "learning_rate": 3.0968858131487894e-06, "loss": 0.011, "step": 204500 }, { "epoch": 19.098291795015403, "grad_norm": 2.292832136154175, "learning_rate": 3.095950621902179e-06, "loss": 0.0113, "step": 204600 }, { "epoch": 19.10762624848315, "grad_norm": 3.0764684677124023, "learning_rate": 3.095015430655569e-06, "loss": 0.0123, "step": 204700 }, { "epoch": 19.1169607019509, "grad_norm": 0.34966886043548584, "learning_rate": 3.094080239408959e-06, "loss": 0.0127, "step": 204800 }, { "epoch": 19.12629515541865, "grad_norm": 1.903686285018921, "learning_rate": 3.093145048162349e-06, "loss": 0.011, "step": 204900 }, { "epoch": 19.1356296088864, "grad_norm": 2.633467435836792, "learning_rate": 3.0922098569157396e-06, "loss": 0.0118, "step": 205000 }, { "epoch": 19.1356296088864, "eval_accuracy": 0.6957992998833139, "eval_f1": 0.8230635113942018, "eval_loss": 0.20997093617916107, "eval_roc_auc": 0.9082242162382304, "eval_runtime": 360.8906, "eval_samples_per_second": 118.734, "eval_steps_per_second": 118.734, "step": 205000 }, { "epoch": 19.14496406235415, "grad_norm": 1.89105224609375, "learning_rate": 3.0912746656691296e-06, "loss": 0.0117, "step": 205100 }, { "epoch": 19.1542985158219, "grad_norm": 5.085088729858398, "learning_rate": 3.0903394744225196e-06, "loss": 0.01, "step": 205200 }, { "epoch": 19.163632969289647, "grad_norm": 0.7133585810661316, "learning_rate": 3.0894042831759097e-06, "loss": 0.013, "step": 205300 }, { "epoch": 19.1729674227574, "grad_norm": 1.5643093585968018, "learning_rate": 3.0884690919292997e-06, "loss": 0.0121, "step": 205400 }, { "epoch": 19.182301876225146, "grad_norm": 3.494886636734009, "learning_rate": 3.0875339006826897e-06, "loss": 0.0122, "step": 205500 }, { "epoch": 19.191636329692898, "grad_norm": 6.001270771026611, "learning_rate": 3.0865987094360798e-06, "loss": 0.0121, "step": 205600 }, { "epoch": 19.200970783160646, "grad_norm": 1.3227609395980835, "learning_rate": 3.0856635181894702e-06, "loss": 0.0112, "step": 205700 }, { "epoch": 19.210305236628397, "grad_norm": 2.4293582439422607, "learning_rate": 3.0847283269428603e-06, "loss": 0.0113, "step": 205800 }, { "epoch": 19.219639690096145, "grad_norm": 0.4127531349658966, "learning_rate": 3.0837931356962503e-06, "loss": 0.0112, "step": 205900 }, { "epoch": 19.228974143563896, "grad_norm": 2.7576277256011963, "learning_rate": 3.0828579444496403e-06, "loss": 0.0119, "step": 206000 }, { "epoch": 19.238308597031644, "grad_norm": 1.103031873703003, "learning_rate": 3.0819227532030303e-06, "loss": 0.0113, "step": 206100 }, { "epoch": 19.247643050499395, "grad_norm": 3.040665864944458, "learning_rate": 3.0809875619564204e-06, "loss": 0.0161, "step": 206200 }, { "epoch": 19.256977503967143, "grad_norm": 0.04530937969684601, "learning_rate": 3.080052370709811e-06, "loss": 0.0128, "step": 206300 }, { "epoch": 19.26631195743489, "grad_norm": 3.924407482147217, "learning_rate": 3.0791171794632e-06, "loss": 0.0103, "step": 206400 }, { "epoch": 19.27564641090264, "grad_norm": 1.5059771537780762, "learning_rate": 3.0781819882165905e-06, "loss": 0.0122, "step": 206500 }, { "epoch": 19.28498086437039, "grad_norm": 0.7037464380264282, "learning_rate": 3.0772467969699805e-06, "loss": 0.0151, "step": 206600 }, { "epoch": 19.29431531783814, "grad_norm": 3.287116050720215, "learning_rate": 3.0763116057233705e-06, "loss": 0.0109, "step": 206700 }, { "epoch": 19.30364977130589, "grad_norm": 0.2877686619758606, "learning_rate": 3.0753764144767606e-06, "loss": 0.0122, "step": 206800 }, { "epoch": 19.31298422477364, "grad_norm": 2.985529661178589, "learning_rate": 3.0744412232301506e-06, "loss": 0.0119, "step": 206900 }, { "epoch": 19.322318678241388, "grad_norm": 3.090141773223877, "learning_rate": 3.0735060319835406e-06, "loss": 0.0139, "step": 207000 }, { "epoch": 19.33165313170914, "grad_norm": 1.7579286098480225, "learning_rate": 3.072570840736931e-06, "loss": 0.0125, "step": 207100 }, { "epoch": 19.340987585176887, "grad_norm": 2.868077516555786, "learning_rate": 3.071635649490321e-06, "loss": 0.0114, "step": 207200 }, { "epoch": 19.350322038644638, "grad_norm": 2.7000441551208496, "learning_rate": 3.070700458243711e-06, "loss": 0.0132, "step": 207300 }, { "epoch": 19.359656492112386, "grad_norm": 1.6412945985794067, "learning_rate": 3.069765266997101e-06, "loss": 0.0149, "step": 207400 }, { "epoch": 19.368990945580137, "grad_norm": 1.6329227685928345, "learning_rate": 3.068830075750491e-06, "loss": 0.0127, "step": 207500 }, { "epoch": 19.378325399047885, "grad_norm": 4.358251571655273, "learning_rate": 3.0678948845038812e-06, "loss": 0.0123, "step": 207600 }, { "epoch": 19.387659852515636, "grad_norm": 1.008575677871704, "learning_rate": 3.0669596932572717e-06, "loss": 0.0125, "step": 207700 }, { "epoch": 19.396994305983384, "grad_norm": 3.040309190750122, "learning_rate": 3.0660245020106617e-06, "loss": 0.012, "step": 207800 }, { "epoch": 19.406328759451135, "grad_norm": 1.5304089784622192, "learning_rate": 3.0650893107640517e-06, "loss": 0.0124, "step": 207900 }, { "epoch": 19.415663212918883, "grad_norm": 5.081597805023193, "learning_rate": 3.0641541195174418e-06, "loss": 0.0121, "step": 208000 }, { "epoch": 19.424997666386634, "grad_norm": 3.4068636894226074, "learning_rate": 3.063218928270832e-06, "loss": 0.0132, "step": 208100 }, { "epoch": 19.434332119854382, "grad_norm": 0.10760590434074402, "learning_rate": 3.0622837370242214e-06, "loss": 0.0126, "step": 208200 }, { "epoch": 19.443666573322133, "grad_norm": 2.008765935897827, "learning_rate": 3.0613485457776114e-06, "loss": 0.0126, "step": 208300 }, { "epoch": 19.45300102678988, "grad_norm": 1.1101243495941162, "learning_rate": 3.0604133545310015e-06, "loss": 0.0142, "step": 208400 }, { "epoch": 19.462335480257632, "grad_norm": 0.007872304879128933, "learning_rate": 3.059478163284392e-06, "loss": 0.0134, "step": 208500 }, { "epoch": 19.47166993372538, "grad_norm": 0.5731226205825806, "learning_rate": 3.058542972037782e-06, "loss": 0.0115, "step": 208600 }, { "epoch": 19.48100438719313, "grad_norm": 3.9016287326812744, "learning_rate": 3.057607780791172e-06, "loss": 0.0132, "step": 208700 }, { "epoch": 19.49033884066088, "grad_norm": 2.1364502906799316, "learning_rate": 3.056672589544562e-06, "loss": 0.0115, "step": 208800 }, { "epoch": 19.49967329412863, "grad_norm": 4.1160101890563965, "learning_rate": 3.055737398297952e-06, "loss": 0.0139, "step": 208900 }, { "epoch": 19.509007747596378, "grad_norm": 2.0198755264282227, "learning_rate": 3.054802207051342e-06, "loss": 0.0147, "step": 209000 }, { "epoch": 19.51834220106413, "grad_norm": 0.5164903998374939, "learning_rate": 3.0538670158047325e-06, "loss": 0.0145, "step": 209100 }, { "epoch": 19.527676654531877, "grad_norm": 0.3615107834339142, "learning_rate": 3.0529318245581226e-06, "loss": 0.0128, "step": 209200 }, { "epoch": 19.537011107999625, "grad_norm": 0.7270076870918274, "learning_rate": 3.0519966333115126e-06, "loss": 0.0138, "step": 209300 }, { "epoch": 19.546345561467376, "grad_norm": 0.272657185792923, "learning_rate": 3.0510614420649026e-06, "loss": 0.0118, "step": 209400 }, { "epoch": 19.555680014935124, "grad_norm": 0.7047315835952759, "learning_rate": 3.0501262508182927e-06, "loss": 0.0112, "step": 209500 }, { "epoch": 19.565014468402875, "grad_norm": 2.4384803771972656, "learning_rate": 3.0491910595716827e-06, "loss": 0.0136, "step": 209600 }, { "epoch": 19.574348921870623, "grad_norm": 3.6925418376922607, "learning_rate": 3.0482558683250727e-06, "loss": 0.0124, "step": 209700 }, { "epoch": 19.583683375338374, "grad_norm": 1.5840814113616943, "learning_rate": 3.047320677078463e-06, "loss": 0.0117, "step": 209800 }, { "epoch": 19.593017828806122, "grad_norm": 1.18039071559906, "learning_rate": 3.046385485831853e-06, "loss": 0.0121, "step": 209900 }, { "epoch": 19.602352282273873, "grad_norm": 0.8340321779251099, "learning_rate": 3.045450294585243e-06, "loss": 0.0146, "step": 210000 }, { "epoch": 19.602352282273873, "eval_accuracy": 0.6948424737456242, "eval_f1": 0.8211562980874918, "eval_loss": 0.21287989616394043, "eval_roc_auc": 0.905881179433913, "eval_runtime": 429.7697, "eval_samples_per_second": 99.705, "eval_steps_per_second": 99.705, "step": 210000 }, { "epoch": 19.61168673574162, "grad_norm": 2.34354567527771, "learning_rate": 3.044515103338633e-06, "loss": 0.0141, "step": 210100 }, { "epoch": 19.621021189209372, "grad_norm": 0.19111299514770508, "learning_rate": 3.043579912092023e-06, "loss": 0.0137, "step": 210200 }, { "epoch": 19.63035564267712, "grad_norm": 2.253302812576294, "learning_rate": 3.042644720845413e-06, "loss": 0.0104, "step": 210300 }, { "epoch": 19.63969009614487, "grad_norm": 2.0998358726501465, "learning_rate": 3.041709529598803e-06, "loss": 0.0138, "step": 210400 }, { "epoch": 19.64902454961262, "grad_norm": 0.9411817193031311, "learning_rate": 3.040774338352193e-06, "loss": 0.0135, "step": 210500 }, { "epoch": 19.65835900308037, "grad_norm": 1.4696274995803833, "learning_rate": 3.0398391471055834e-06, "loss": 0.0117, "step": 210600 }, { "epoch": 19.66769345654812, "grad_norm": 0.7129770517349243, "learning_rate": 3.0389039558589734e-06, "loss": 0.0124, "step": 210700 }, { "epoch": 19.67702791001587, "grad_norm": 0.18528826534748077, "learning_rate": 3.0379687646123635e-06, "loss": 0.0114, "step": 210800 }, { "epoch": 19.686362363483617, "grad_norm": 2.5201213359832764, "learning_rate": 3.0370335733657535e-06, "loss": 0.0138, "step": 210900 }, { "epoch": 19.69569681695137, "grad_norm": 3.2855522632598877, "learning_rate": 3.0360983821191435e-06, "loss": 0.0126, "step": 211000 }, { "epoch": 19.705031270419116, "grad_norm": 1.4678208827972412, "learning_rate": 3.0351631908725336e-06, "loss": 0.0123, "step": 211100 }, { "epoch": 19.714365723886868, "grad_norm": 0.839155912399292, "learning_rate": 3.034227999625924e-06, "loss": 0.0128, "step": 211200 }, { "epoch": 19.723700177354615, "grad_norm": 0.05170531943440437, "learning_rate": 3.033292808379314e-06, "loss": 0.0126, "step": 211300 }, { "epoch": 19.733034630822367, "grad_norm": 1.3683853149414062, "learning_rate": 3.032357617132704e-06, "loss": 0.0122, "step": 211400 }, { "epoch": 19.742369084290115, "grad_norm": 1.8418455123901367, "learning_rate": 3.031422425886094e-06, "loss": 0.0149, "step": 211500 }, { "epoch": 19.751703537757866, "grad_norm": 2.7218143939971924, "learning_rate": 3.030487234639484e-06, "loss": 0.0129, "step": 211600 }, { "epoch": 19.761037991225614, "grad_norm": 0.6090812087059021, "learning_rate": 3.029552043392874e-06, "loss": 0.0134, "step": 211700 }, { "epoch": 19.770372444693365, "grad_norm": 5.024089813232422, "learning_rate": 3.0286168521462638e-06, "loss": 0.0131, "step": 211800 }, { "epoch": 19.779706898161113, "grad_norm": 0.5129848718643188, "learning_rate": 3.027681660899654e-06, "loss": 0.0122, "step": 211900 }, { "epoch": 19.78904135162886, "grad_norm": 2.8886666297912598, "learning_rate": 3.0267464696530443e-06, "loss": 0.0115, "step": 212000 }, { "epoch": 19.79837580509661, "grad_norm": 6.274740695953369, "learning_rate": 3.0258112784064343e-06, "loss": 0.0116, "step": 212100 }, { "epoch": 19.80771025856436, "grad_norm": 1.109872817993164, "learning_rate": 3.0248760871598243e-06, "loss": 0.014, "step": 212200 }, { "epoch": 19.81704471203211, "grad_norm": 0.16813012957572937, "learning_rate": 3.0239408959132144e-06, "loss": 0.0105, "step": 212300 }, { "epoch": 19.82637916549986, "grad_norm": 0.9176939129829407, "learning_rate": 3.0230057046666044e-06, "loss": 0.0136, "step": 212400 }, { "epoch": 19.83571361896761, "grad_norm": 3.6270813941955566, "learning_rate": 3.0220705134199944e-06, "loss": 0.0133, "step": 212500 }, { "epoch": 19.845048072435358, "grad_norm": 4.019731521606445, "learning_rate": 3.021135322173385e-06, "loss": 0.0113, "step": 212600 }, { "epoch": 19.85438252590311, "grad_norm": 0.6033906936645508, "learning_rate": 3.020200130926775e-06, "loss": 0.0146, "step": 212700 }, { "epoch": 19.863716979370857, "grad_norm": 0.6597771644592285, "learning_rate": 3.019264939680165e-06, "loss": 0.0107, "step": 212800 }, { "epoch": 19.873051432838608, "grad_norm": 1.5018970966339111, "learning_rate": 3.018329748433555e-06, "loss": 0.012, "step": 212900 }, { "epoch": 19.882385886306356, "grad_norm": 4.433917999267578, "learning_rate": 3.017394557186945e-06, "loss": 0.0155, "step": 213000 }, { "epoch": 19.891720339774107, "grad_norm": 2.0943822860717773, "learning_rate": 3.016459365940335e-06, "loss": 0.0161, "step": 213100 }, { "epoch": 19.901054793241855, "grad_norm": 4.0510029792785645, "learning_rate": 3.015524174693725e-06, "loss": 0.012, "step": 213200 }, { "epoch": 19.910389246709606, "grad_norm": 4.247588634490967, "learning_rate": 3.0145889834471155e-06, "loss": 0.013, "step": 213300 }, { "epoch": 19.919723700177354, "grad_norm": 3.033874750137329, "learning_rate": 3.0136537922005055e-06, "loss": 0.0137, "step": 213400 }, { "epoch": 19.929058153645105, "grad_norm": 2.8961284160614014, "learning_rate": 3.0127186009538956e-06, "loss": 0.0132, "step": 213500 }, { "epoch": 19.938392607112853, "grad_norm": 3.15143084526062, "learning_rate": 3.011783409707285e-06, "loss": 0.0117, "step": 213600 }, { "epoch": 19.947727060580604, "grad_norm": 0.7451964020729065, "learning_rate": 3.0108482184606752e-06, "loss": 0.0126, "step": 213700 }, { "epoch": 19.957061514048352, "grad_norm": 1.8201011419296265, "learning_rate": 3.0099130272140652e-06, "loss": 0.0114, "step": 213800 }, { "epoch": 19.966395967516103, "grad_norm": 1.4566015005111694, "learning_rate": 3.0089778359674553e-06, "loss": 0.0121, "step": 213900 }, { "epoch": 19.97573042098385, "grad_norm": 1.2588807344436646, "learning_rate": 3.0080426447208453e-06, "loss": 0.0132, "step": 214000 }, { "epoch": 19.985064874451602, "grad_norm": 3.470776319503784, "learning_rate": 3.0071074534742358e-06, "loss": 0.0144, "step": 214100 }, { "epoch": 19.99439932791935, "grad_norm": 0.15924517810344696, "learning_rate": 3.006172262227626e-06, "loss": 0.0144, "step": 214200 }, { "epoch": 20.0037337813871, "grad_norm": 0.05132641643285751, "learning_rate": 3.005237070981016e-06, "loss": 0.0132, "step": 214300 }, { "epoch": 20.01306823485485, "grad_norm": 3.4181599617004395, "learning_rate": 3.004301879734406e-06, "loss": 0.0084, "step": 214400 }, { "epoch": 20.0224026883226, "grad_norm": 2.5250236988067627, "learning_rate": 3.003366688487796e-06, "loss": 0.0079, "step": 214500 }, { "epoch": 20.031737141790348, "grad_norm": 12.565043449401855, "learning_rate": 3.002431497241186e-06, "loss": 0.0097, "step": 214600 }, { "epoch": 20.0410715952581, "grad_norm": 0.5790240168571472, "learning_rate": 3.0014963059945764e-06, "loss": 0.0101, "step": 214700 }, { "epoch": 20.050406048725847, "grad_norm": 1.989608645439148, "learning_rate": 3.0005611147479664e-06, "loss": 0.0094, "step": 214800 }, { "epoch": 20.059740502193595, "grad_norm": 7.267418384552002, "learning_rate": 2.9996259235013564e-06, "loss": 0.0128, "step": 214900 }, { "epoch": 20.069074955661346, "grad_norm": 1.983026146888733, "learning_rate": 2.9986907322547465e-06, "loss": 0.01, "step": 215000 }, { "epoch": 20.069074955661346, "eval_accuracy": 0.7003967327887981, "eval_f1": 0.825632568107455, "eval_loss": 0.22017644345760345, "eval_roc_auc": 0.9074050429328052, "eval_runtime": 428.9251, "eval_samples_per_second": 99.901, "eval_steps_per_second": 99.901, "step": 215000 }, { "epoch": 20.078409409129094, "grad_norm": 1.0570791959762573, "learning_rate": 2.9977555410081365e-06, "loss": 0.0088, "step": 215100 }, { "epoch": 20.087743862596845, "grad_norm": 5.124983310699463, "learning_rate": 2.9968203497615265e-06, "loss": 0.0114, "step": 215200 }, { "epoch": 20.097078316064593, "grad_norm": 1.5262564420700073, "learning_rate": 2.995885158514917e-06, "loss": 0.0097, "step": 215300 }, { "epoch": 20.106412769532344, "grad_norm": 1.4549790620803833, "learning_rate": 2.994949967268307e-06, "loss": 0.0101, "step": 215400 }, { "epoch": 20.115747223000092, "grad_norm": 1.3257499933242798, "learning_rate": 2.9940147760216966e-06, "loss": 0.0132, "step": 215500 }, { "epoch": 20.125081676467843, "grad_norm": 1.9218007326126099, "learning_rate": 2.9930795847750866e-06, "loss": 0.0105, "step": 215600 }, { "epoch": 20.13441612993559, "grad_norm": 1.834761142730713, "learning_rate": 2.9921443935284767e-06, "loss": 0.0093, "step": 215700 }, { "epoch": 20.143750583403342, "grad_norm": 1.6914253234863281, "learning_rate": 2.9912092022818667e-06, "loss": 0.0106, "step": 215800 }, { "epoch": 20.15308503687109, "grad_norm": 2.0424721240997314, "learning_rate": 2.9902740110352567e-06, "loss": 0.0127, "step": 215900 }, { "epoch": 20.16241949033884, "grad_norm": 2.5122389793395996, "learning_rate": 2.9893388197886468e-06, "loss": 0.0115, "step": 216000 }, { "epoch": 20.17175394380659, "grad_norm": 3.561110019683838, "learning_rate": 2.9884036285420372e-06, "loss": 0.0097, "step": 216100 }, { "epoch": 20.18108839727434, "grad_norm": 1.3669387102127075, "learning_rate": 2.9874684372954272e-06, "loss": 0.0116, "step": 216200 }, { "epoch": 20.190422850742088, "grad_norm": 0.588504433631897, "learning_rate": 2.9865332460488173e-06, "loss": 0.0104, "step": 216300 }, { "epoch": 20.19975730420984, "grad_norm": 1.290254831314087, "learning_rate": 2.9855980548022073e-06, "loss": 0.0108, "step": 216400 }, { "epoch": 20.209091757677587, "grad_norm": 11.966572761535645, "learning_rate": 2.9846628635555973e-06, "loss": 0.0109, "step": 216500 }, { "epoch": 20.21842621114534, "grad_norm": 1.3327422142028809, "learning_rate": 2.9837276723089874e-06, "loss": 0.0111, "step": 216600 }, { "epoch": 20.227760664613086, "grad_norm": 3.58494234085083, "learning_rate": 2.982792481062378e-06, "loss": 0.0098, "step": 216700 }, { "epoch": 20.237095118080838, "grad_norm": 2.4236037731170654, "learning_rate": 2.981857289815768e-06, "loss": 0.0097, "step": 216800 }, { "epoch": 20.246429571548585, "grad_norm": 0.52142333984375, "learning_rate": 2.980922098569158e-06, "loss": 0.0111, "step": 216900 }, { "epoch": 20.255764025016337, "grad_norm": 4.126894950866699, "learning_rate": 2.979986907322548e-06, "loss": 0.0118, "step": 217000 }, { "epoch": 20.265098478484084, "grad_norm": 0.3380080461502075, "learning_rate": 2.979051716075938e-06, "loss": 0.0111, "step": 217100 }, { "epoch": 20.274432931951836, "grad_norm": 0.21118128299713135, "learning_rate": 2.978116524829328e-06, "loss": 0.0116, "step": 217200 }, { "epoch": 20.283767385419583, "grad_norm": 0.4820391833782196, "learning_rate": 2.9771813335827176e-06, "loss": 0.0096, "step": 217300 }, { "epoch": 20.293101838887335, "grad_norm": 2.4179301261901855, "learning_rate": 2.9762461423361076e-06, "loss": 0.0115, "step": 217400 }, { "epoch": 20.302436292355083, "grad_norm": 0.20750416815280914, "learning_rate": 2.9753109510894976e-06, "loss": 0.0124, "step": 217500 }, { "epoch": 20.311770745822834, "grad_norm": 6.444693088531494, "learning_rate": 2.974375759842888e-06, "loss": 0.0126, "step": 217600 }, { "epoch": 20.32110519929058, "grad_norm": 3.478623867034912, "learning_rate": 2.973440568596278e-06, "loss": 0.0141, "step": 217700 }, { "epoch": 20.33043965275833, "grad_norm": 2.3852181434631348, "learning_rate": 2.972505377349668e-06, "loss": 0.014, "step": 217800 }, { "epoch": 20.33977410622608, "grad_norm": 1.7948217391967773, "learning_rate": 2.971570186103058e-06, "loss": 0.0094, "step": 217900 }, { "epoch": 20.34910855969383, "grad_norm": 2.0845587253570557, "learning_rate": 2.9706349948564482e-06, "loss": 0.0116, "step": 218000 }, { "epoch": 20.35844301316158, "grad_norm": 0.05018536373972893, "learning_rate": 2.9696998036098383e-06, "loss": 0.0095, "step": 218100 }, { "epoch": 20.367777466629327, "grad_norm": 0.8144143223762512, "learning_rate": 2.9687646123632287e-06, "loss": 0.0107, "step": 218200 }, { "epoch": 20.37711192009708, "grad_norm": 4.386103630065918, "learning_rate": 2.9678294211166187e-06, "loss": 0.0119, "step": 218300 }, { "epoch": 20.386446373564826, "grad_norm": 6.932032585144043, "learning_rate": 2.9668942298700088e-06, "loss": 0.0122, "step": 218400 }, { "epoch": 20.395780827032578, "grad_norm": 1.5744847059249878, "learning_rate": 2.965959038623399e-06, "loss": 0.0136, "step": 218500 }, { "epoch": 20.405115280500326, "grad_norm": 1.870851993560791, "learning_rate": 2.965023847376789e-06, "loss": 0.0121, "step": 218600 }, { "epoch": 20.414449733968077, "grad_norm": 2.208407402038574, "learning_rate": 2.964088656130179e-06, "loss": 0.0103, "step": 218700 }, { "epoch": 20.423784187435825, "grad_norm": 0.126612588763237, "learning_rate": 2.9631534648835693e-06, "loss": 0.0111, "step": 218800 }, { "epoch": 20.433118640903576, "grad_norm": 2.185563802719116, "learning_rate": 2.9622182736369593e-06, "loss": 0.0096, "step": 218900 }, { "epoch": 20.442453094371324, "grad_norm": 0.11956387758255005, "learning_rate": 2.9612830823903494e-06, "loss": 0.0106, "step": 219000 }, { "epoch": 20.451787547839075, "grad_norm": 1.345323085784912, "learning_rate": 2.960347891143739e-06, "loss": 0.0108, "step": 219100 }, { "epoch": 20.461122001306823, "grad_norm": 0.9425498843193054, "learning_rate": 2.959412699897129e-06, "loss": 0.0113, "step": 219200 }, { "epoch": 20.470456454774574, "grad_norm": 1.9977941513061523, "learning_rate": 2.958477508650519e-06, "loss": 0.0103, "step": 219300 }, { "epoch": 20.47979090824232, "grad_norm": 2.5572800636291504, "learning_rate": 2.957542317403909e-06, "loss": 0.0118, "step": 219400 }, { "epoch": 20.489125361710073, "grad_norm": 1.449840784072876, "learning_rate": 2.956607126157299e-06, "loss": 0.0119, "step": 219500 }, { "epoch": 20.49845981517782, "grad_norm": 0.6846389174461365, "learning_rate": 2.9556719349106896e-06, "loss": 0.0094, "step": 219600 }, { "epoch": 20.507794268645572, "grad_norm": 0.4102117121219635, "learning_rate": 2.9547367436640796e-06, "loss": 0.01, "step": 219700 }, { "epoch": 20.51712872211332, "grad_norm": 1.1171274185180664, "learning_rate": 2.9538015524174696e-06, "loss": 0.0102, "step": 219800 }, { "epoch": 20.52646317558107, "grad_norm": 0.22288024425506592, "learning_rate": 2.9528663611708597e-06, "loss": 0.0102, "step": 219900 }, { "epoch": 20.53579762904882, "grad_norm": 2.1533396244049072, "learning_rate": 2.9519311699242497e-06, "loss": 0.0114, "step": 220000 }, { "epoch": 20.53579762904882, "eval_accuracy": 0.6975495915985997, "eval_f1": 0.8232685329640472, "eval_loss": 0.2265012115240097, "eval_roc_auc": 0.9082108536480956, "eval_runtime": 350.3619, "eval_samples_per_second": 122.302, "eval_steps_per_second": 122.302, "step": 220000 }, { "epoch": 20.54513208251657, "grad_norm": 0.8534783124923706, "learning_rate": 2.9509959786776397e-06, "loss": 0.0118, "step": 220100 }, { "epoch": 20.554466535984318, "grad_norm": 2.86320161819458, "learning_rate": 2.95006078743103e-06, "loss": 0.0102, "step": 220200 }, { "epoch": 20.56380098945207, "grad_norm": 3.885847806930542, "learning_rate": 2.94912559618442e-06, "loss": 0.012, "step": 220300 }, { "epoch": 20.573135442919817, "grad_norm": 0.8208932876586914, "learning_rate": 2.9481904049378102e-06, "loss": 0.0109, "step": 220400 }, { "epoch": 20.582469896387565, "grad_norm": 1.2597991228103638, "learning_rate": 2.9472552136912003e-06, "loss": 0.0121, "step": 220500 }, { "epoch": 20.591804349855316, "grad_norm": 5.095491886138916, "learning_rate": 2.9463200224445903e-06, "loss": 0.0126, "step": 220600 }, { "epoch": 20.601138803323064, "grad_norm": 1.3763973712921143, "learning_rate": 2.9453848311979803e-06, "loss": 0.0123, "step": 220700 }, { "epoch": 20.610473256790815, "grad_norm": 0.9463834166526794, "learning_rate": 2.9444496399513704e-06, "loss": 0.0074, "step": 220800 }, { "epoch": 20.619807710258563, "grad_norm": 1.8583170175552368, "learning_rate": 2.94351444870476e-06, "loss": 0.0095, "step": 220900 }, { "epoch": 20.629142163726314, "grad_norm": 1.591292381286621, "learning_rate": 2.9425792574581504e-06, "loss": 0.0121, "step": 221000 }, { "epoch": 20.638476617194062, "grad_norm": 3.4083774089813232, "learning_rate": 2.9416440662115404e-06, "loss": 0.0116, "step": 221100 }, { "epoch": 20.647811070661813, "grad_norm": 3.167963743209839, "learning_rate": 2.9407088749649305e-06, "loss": 0.0092, "step": 221200 }, { "epoch": 20.65714552412956, "grad_norm": 0.17202746868133545, "learning_rate": 2.9397736837183205e-06, "loss": 0.0124, "step": 221300 }, { "epoch": 20.666479977597312, "grad_norm": 2.129267454147339, "learning_rate": 2.9388384924717105e-06, "loss": 0.0128, "step": 221400 }, { "epoch": 20.67581443106506, "grad_norm": 3.5611448287963867, "learning_rate": 2.9379033012251006e-06, "loss": 0.0113, "step": 221500 }, { "epoch": 20.68514888453281, "grad_norm": 2.298802137374878, "learning_rate": 2.9369681099784906e-06, "loss": 0.0104, "step": 221600 }, { "epoch": 20.69448333800056, "grad_norm": 0.7426482439041138, "learning_rate": 2.936032918731881e-06, "loss": 0.0102, "step": 221700 }, { "epoch": 20.70381779146831, "grad_norm": 0.45012372732162476, "learning_rate": 2.935097727485271e-06, "loss": 0.0103, "step": 221800 }, { "epoch": 20.713152244936058, "grad_norm": 4.311934471130371, "learning_rate": 2.934162536238661e-06, "loss": 0.0113, "step": 221900 }, { "epoch": 20.72248669840381, "grad_norm": 2.325131416320801, "learning_rate": 2.933227344992051e-06, "loss": 0.013, "step": 222000 }, { "epoch": 20.731821151871557, "grad_norm": 2.9728598594665527, "learning_rate": 2.932292153745441e-06, "loss": 0.0107, "step": 222100 }, { "epoch": 20.74115560533931, "grad_norm": 4.278093338012695, "learning_rate": 2.931356962498831e-06, "loss": 0.0131, "step": 222200 }, { "epoch": 20.750490058807056, "grad_norm": 1.6002038717269897, "learning_rate": 2.9304217712522217e-06, "loss": 0.0127, "step": 222300 }, { "epoch": 20.759824512274808, "grad_norm": 0.6092908978462219, "learning_rate": 2.9294865800056117e-06, "loss": 0.0102, "step": 222400 }, { "epoch": 20.769158965742555, "grad_norm": 0.5218339562416077, "learning_rate": 2.9285513887590017e-06, "loss": 0.0125, "step": 222500 }, { "epoch": 20.778493419210307, "grad_norm": 3.001058340072632, "learning_rate": 2.9276161975123918e-06, "loss": 0.0121, "step": 222600 }, { "epoch": 20.787827872678054, "grad_norm": 1.871188759803772, "learning_rate": 2.9266810062657814e-06, "loss": 0.0147, "step": 222700 }, { "epoch": 20.797162326145806, "grad_norm": 3.3450939655303955, "learning_rate": 2.9257458150191714e-06, "loss": 0.0119, "step": 222800 }, { "epoch": 20.806496779613553, "grad_norm": 2.4914071559906006, "learning_rate": 2.9248106237725614e-06, "loss": 0.0114, "step": 222900 }, { "epoch": 20.815831233081305, "grad_norm": 2.6074838638305664, "learning_rate": 2.9238754325259515e-06, "loss": 0.0102, "step": 223000 }, { "epoch": 20.825165686549052, "grad_norm": 0.4562038779258728, "learning_rate": 2.922940241279342e-06, "loss": 0.0092, "step": 223100 }, { "epoch": 20.834500140016804, "grad_norm": 4.298128128051758, "learning_rate": 2.922005050032732e-06, "loss": 0.0106, "step": 223200 }, { "epoch": 20.84383459348455, "grad_norm": 0.19449126720428467, "learning_rate": 2.921069858786122e-06, "loss": 0.0118, "step": 223300 }, { "epoch": 20.8531690469523, "grad_norm": 1.6479328870773315, "learning_rate": 2.920134667539512e-06, "loss": 0.013, "step": 223400 }, { "epoch": 20.86250350042005, "grad_norm": 1.0816664695739746, "learning_rate": 2.919199476292902e-06, "loss": 0.012, "step": 223500 }, { "epoch": 20.8718379538878, "grad_norm": 1.4515525102615356, "learning_rate": 2.918264285046292e-06, "loss": 0.0127, "step": 223600 }, { "epoch": 20.88117240735555, "grad_norm": 2.180238723754883, "learning_rate": 2.9173290937996825e-06, "loss": 0.0122, "step": 223700 }, { "epoch": 20.890506860823297, "grad_norm": 2.4868006706237793, "learning_rate": 2.9163939025530725e-06, "loss": 0.01, "step": 223800 }, { "epoch": 20.89984131429105, "grad_norm": 0.8150231242179871, "learning_rate": 2.9154587113064626e-06, "loss": 0.0126, "step": 223900 }, { "epoch": 20.909175767758796, "grad_norm": 0.150617316365242, "learning_rate": 2.9145235200598526e-06, "loss": 0.01, "step": 224000 }, { "epoch": 20.918510221226548, "grad_norm": 0.5578415989875793, "learning_rate": 2.9135883288132426e-06, "loss": 0.0116, "step": 224100 }, { "epoch": 20.927844674694295, "grad_norm": 1.4454619884490967, "learning_rate": 2.9126531375666327e-06, "loss": 0.0113, "step": 224200 }, { "epoch": 20.937179128162047, "grad_norm": 2.058828592300415, "learning_rate": 2.911717946320023e-06, "loss": 0.0111, "step": 224300 }, { "epoch": 20.946513581629794, "grad_norm": 2.1899592876434326, "learning_rate": 2.910782755073413e-06, "loss": 0.0114, "step": 224400 }, { "epoch": 20.955848035097546, "grad_norm": 3.861067295074463, "learning_rate": 2.9098475638268028e-06, "loss": 0.0113, "step": 224500 }, { "epoch": 20.965182488565294, "grad_norm": 3.826390027999878, "learning_rate": 2.9089123725801928e-06, "loss": 0.0114, "step": 224600 }, { "epoch": 20.974516942033045, "grad_norm": 2.0785841941833496, "learning_rate": 2.907977181333583e-06, "loss": 0.0103, "step": 224700 }, { "epoch": 20.983851395500793, "grad_norm": 3.4896278381347656, "learning_rate": 2.907041990086973e-06, "loss": 0.0141, "step": 224800 }, { "epoch": 20.993185848968544, "grad_norm": 2.7368295192718506, "learning_rate": 2.906106798840363e-06, "loss": 0.01, "step": 224900 }, { "epoch": 21.00252030243629, "grad_norm": 5.2884521484375, "learning_rate": 2.905171607593753e-06, "loss": 0.0101, "step": 225000 }, { "epoch": 21.00252030243629, "eval_accuracy": 0.6971995332555426, "eval_f1": 0.8263043288912115, "eval_loss": 0.22895999252796173, "eval_roc_auc": 0.9121408375540129, "eval_runtime": 288.9993, "eval_samples_per_second": 148.27, "eval_steps_per_second": 148.27, "step": 225000 }, { "epoch": 21.011854755904043, "grad_norm": 0.3317449390888214, "learning_rate": 2.904236416347143e-06, "loss": 0.0101, "step": 225100 }, { "epoch": 21.02118920937179, "grad_norm": 0.02826506644487381, "learning_rate": 2.9033012251005334e-06, "loss": 0.0094, "step": 225200 }, { "epoch": 21.030523662839542, "grad_norm": 0.37212929129600525, "learning_rate": 2.9023660338539234e-06, "loss": 0.0093, "step": 225300 }, { "epoch": 21.03985811630729, "grad_norm": 3.042619228363037, "learning_rate": 2.9014308426073135e-06, "loss": 0.0127, "step": 225400 }, { "epoch": 21.04919256977504, "grad_norm": 1.2912588119506836, "learning_rate": 2.9004956513607035e-06, "loss": 0.0111, "step": 225500 }, { "epoch": 21.05852702324279, "grad_norm": 4.3590312004089355, "learning_rate": 2.8995604601140935e-06, "loss": 0.0098, "step": 225600 }, { "epoch": 21.06786147671054, "grad_norm": 5.805530548095703, "learning_rate": 2.8986252688674835e-06, "loss": 0.0098, "step": 225700 }, { "epoch": 21.077195930178288, "grad_norm": 0.5308802127838135, "learning_rate": 2.897690077620874e-06, "loss": 0.0086, "step": 225800 }, { "epoch": 21.08653038364604, "grad_norm": 0.37631717324256897, "learning_rate": 2.896754886374264e-06, "loss": 0.0107, "step": 225900 }, { "epoch": 21.095864837113787, "grad_norm": 2.0109713077545166, "learning_rate": 2.895819695127654e-06, "loss": 0.009, "step": 226000 }, { "epoch": 21.105199290581538, "grad_norm": 2.086390972137451, "learning_rate": 2.894884503881044e-06, "loss": 0.0088, "step": 226100 }, { "epoch": 21.114533744049286, "grad_norm": 0.19925299286842346, "learning_rate": 2.893949312634434e-06, "loss": 0.0087, "step": 226200 }, { "epoch": 21.123868197517034, "grad_norm": 1.72901451587677, "learning_rate": 2.8930141213878237e-06, "loss": 0.0102, "step": 226300 }, { "epoch": 21.133202650984785, "grad_norm": 5.333111763000488, "learning_rate": 2.8920789301412138e-06, "loss": 0.0088, "step": 226400 }, { "epoch": 21.142537104452533, "grad_norm": 3.6176154613494873, "learning_rate": 2.891143738894604e-06, "loss": 0.0098, "step": 226500 }, { "epoch": 21.151871557920284, "grad_norm": 1.9582805633544922, "learning_rate": 2.8902085476479942e-06, "loss": 0.0092, "step": 226600 }, { "epoch": 21.161206011388032, "grad_norm": 0.806291401386261, "learning_rate": 2.8892733564013843e-06, "loss": 0.0123, "step": 226700 }, { "epoch": 21.170540464855783, "grad_norm": 0.20155656337738037, "learning_rate": 2.8883381651547743e-06, "loss": 0.0088, "step": 226800 }, { "epoch": 21.17987491832353, "grad_norm": 3.612457036972046, "learning_rate": 2.8874029739081643e-06, "loss": 0.0108, "step": 226900 }, { "epoch": 21.189209371791282, "grad_norm": 3.3941941261291504, "learning_rate": 2.8864677826615544e-06, "loss": 0.0095, "step": 227000 }, { "epoch": 21.19854382525903, "grad_norm": 5.958293437957764, "learning_rate": 2.8855325914149444e-06, "loss": 0.0118, "step": 227100 }, { "epoch": 21.20787827872678, "grad_norm": 2.155874013900757, "learning_rate": 2.884597400168335e-06, "loss": 0.0095, "step": 227200 }, { "epoch": 21.21721273219453, "grad_norm": 2.209716320037842, "learning_rate": 2.883662208921725e-06, "loss": 0.011, "step": 227300 }, { "epoch": 21.22654718566228, "grad_norm": 4.038940906524658, "learning_rate": 2.882727017675115e-06, "loss": 0.0096, "step": 227400 }, { "epoch": 21.235881639130028, "grad_norm": 0.33016863465309143, "learning_rate": 2.881791826428505e-06, "loss": 0.0112, "step": 227500 }, { "epoch": 21.24521609259778, "grad_norm": 2.495630979537964, "learning_rate": 2.880856635181895e-06, "loss": 0.0085, "step": 227600 }, { "epoch": 21.254550546065527, "grad_norm": 2.41076397895813, "learning_rate": 2.879921443935285e-06, "loss": 0.0134, "step": 227700 }, { "epoch": 21.26388499953328, "grad_norm": 3.781783103942871, "learning_rate": 2.8789862526886755e-06, "loss": 0.0079, "step": 227800 }, { "epoch": 21.273219453001026, "grad_norm": 4.229352951049805, "learning_rate": 2.8780510614420655e-06, "loss": 0.0103, "step": 227900 }, { "epoch": 21.282553906468777, "grad_norm": 1.36443293094635, "learning_rate": 2.8771158701954555e-06, "loss": 0.0109, "step": 228000 }, { "epoch": 21.291888359936525, "grad_norm": 3.022331953048706, "learning_rate": 2.876180678948845e-06, "loss": 0.0104, "step": 228100 }, { "epoch": 21.301222813404276, "grad_norm": 3.0839462280273438, "learning_rate": 2.875245487702235e-06, "loss": 0.0108, "step": 228200 }, { "epoch": 21.310557266872024, "grad_norm": 0.3029504418373108, "learning_rate": 2.874310296455625e-06, "loss": 0.0097, "step": 228300 }, { "epoch": 21.319891720339776, "grad_norm": 4.278537273406982, "learning_rate": 2.8733751052090152e-06, "loss": 0.011, "step": 228400 }, { "epoch": 21.329226173807523, "grad_norm": 5.0052595138549805, "learning_rate": 2.8724399139624053e-06, "loss": 0.01, "step": 228500 }, { "epoch": 21.338560627275275, "grad_norm": 1.7139711380004883, "learning_rate": 2.8715047227157957e-06, "loss": 0.0109, "step": 228600 }, { "epoch": 21.347895080743022, "grad_norm": 2.9608120918273926, "learning_rate": 2.8705695314691857e-06, "loss": 0.0101, "step": 228700 }, { "epoch": 21.357229534210774, "grad_norm": 1.973572015762329, "learning_rate": 2.8696343402225758e-06, "loss": 0.0107, "step": 228800 }, { "epoch": 21.36656398767852, "grad_norm": 1.536915898323059, "learning_rate": 2.868699148975966e-06, "loss": 0.0083, "step": 228900 }, { "epoch": 21.37589844114627, "grad_norm": 0.24019207060337067, "learning_rate": 2.867763957729356e-06, "loss": 0.0082, "step": 229000 }, { "epoch": 21.38523289461402, "grad_norm": 0.2334904819726944, "learning_rate": 2.866828766482746e-06, "loss": 0.0085, "step": 229100 }, { "epoch": 21.394567348081768, "grad_norm": 0.20949506759643555, "learning_rate": 2.865893575236136e-06, "loss": 0.0101, "step": 229200 }, { "epoch": 21.40390180154952, "grad_norm": 0.39402562379837036, "learning_rate": 2.8649583839895263e-06, "loss": 0.0097, "step": 229300 }, { "epoch": 21.413236255017267, "grad_norm": 0.20034991204738617, "learning_rate": 2.8640231927429164e-06, "loss": 0.0108, "step": 229400 }, { "epoch": 21.42257070848502, "grad_norm": 0.3403743803501129, "learning_rate": 2.8630880014963064e-06, "loss": 0.0114, "step": 229500 }, { "epoch": 21.431905161952766, "grad_norm": 4.338662147521973, "learning_rate": 2.8621528102496964e-06, "loss": 0.0092, "step": 229600 }, { "epoch": 21.441239615420518, "grad_norm": 1.3294055461883545, "learning_rate": 2.8612176190030865e-06, "loss": 0.0111, "step": 229700 }, { "epoch": 21.450574068888265, "grad_norm": 0.8676400184631348, "learning_rate": 2.8602824277564765e-06, "loss": 0.0105, "step": 229800 }, { "epoch": 21.459908522356017, "grad_norm": 3.3376636505126953, "learning_rate": 2.859347236509866e-06, "loss": 0.0094, "step": 229900 }, { "epoch": 21.469242975823764, "grad_norm": 1.5090012550354004, "learning_rate": 2.858412045263256e-06, "loss": 0.0108, "step": 230000 }, { "epoch": 21.469242975823764, "eval_accuracy": 0.7008168028004668, "eval_f1": 0.8266301437213018, "eval_loss": 0.2380226105451584, "eval_roc_auc": 0.9095464096593509, "eval_runtime": 241.4839, "eval_samples_per_second": 177.445, "eval_steps_per_second": 177.445, "step": 230000 }, { "epoch": 21.478577429291516, "grad_norm": 1.071527361869812, "learning_rate": 2.8574768540166466e-06, "loss": 0.0098, "step": 230100 }, { "epoch": 21.487911882759263, "grad_norm": 0.2803511321544647, "learning_rate": 2.8565416627700366e-06, "loss": 0.009, "step": 230200 }, { "epoch": 21.497246336227015, "grad_norm": 0.7000722885131836, "learning_rate": 2.8556064715234267e-06, "loss": 0.0085, "step": 230300 }, { "epoch": 21.506580789694763, "grad_norm": 1.5136494636535645, "learning_rate": 2.8546712802768167e-06, "loss": 0.0104, "step": 230400 }, { "epoch": 21.515915243162514, "grad_norm": 1.1086589097976685, "learning_rate": 2.8537360890302067e-06, "loss": 0.0097, "step": 230500 }, { "epoch": 21.52524969663026, "grad_norm": 0.40020087361335754, "learning_rate": 2.8528008977835967e-06, "loss": 0.0109, "step": 230600 }, { "epoch": 21.534584150098013, "grad_norm": 2.374065637588501, "learning_rate": 2.851865706536987e-06, "loss": 0.0101, "step": 230700 }, { "epoch": 21.54391860356576, "grad_norm": 0.2772108316421509, "learning_rate": 2.8509305152903772e-06, "loss": 0.0105, "step": 230800 }, { "epoch": 21.553253057033512, "grad_norm": 0.08314776420593262, "learning_rate": 2.8499953240437673e-06, "loss": 0.0105, "step": 230900 }, { "epoch": 21.56258751050126, "grad_norm": 0.9438610672950745, "learning_rate": 2.8490601327971573e-06, "loss": 0.0094, "step": 231000 }, { "epoch": 21.57192196396901, "grad_norm": 3.593350410461426, "learning_rate": 2.8481249415505473e-06, "loss": 0.0121, "step": 231100 }, { "epoch": 21.58125641743676, "grad_norm": 1.8129709959030151, "learning_rate": 2.8471897503039374e-06, "loss": 0.0085, "step": 231200 }, { "epoch": 21.59059087090451, "grad_norm": 3.2788400650024414, "learning_rate": 2.846254559057328e-06, "loss": 0.0114, "step": 231300 }, { "epoch": 21.599925324372258, "grad_norm": 3.839353322982788, "learning_rate": 2.845319367810718e-06, "loss": 0.0117, "step": 231400 }, { "epoch": 21.60925977784001, "grad_norm": 1.5504121780395508, "learning_rate": 2.844384176564108e-06, "loss": 0.0097, "step": 231500 }, { "epoch": 21.618594231307757, "grad_norm": 3.8403568267822266, "learning_rate": 2.843448985317498e-06, "loss": 0.0117, "step": 231600 }, { "epoch": 21.627928684775505, "grad_norm": 2.471665143966675, "learning_rate": 2.8425137940708875e-06, "loss": 0.0138, "step": 231700 }, { "epoch": 21.637263138243256, "grad_norm": 0.3715001940727234, "learning_rate": 2.8415786028242775e-06, "loss": 0.0068, "step": 231800 }, { "epoch": 21.646597591711004, "grad_norm": 5.263282299041748, "learning_rate": 2.8406434115776676e-06, "loss": 0.011, "step": 231900 }, { "epoch": 21.655932045178755, "grad_norm": 0.15624602138996124, "learning_rate": 2.8397082203310576e-06, "loss": 0.0101, "step": 232000 }, { "epoch": 21.665266498646503, "grad_norm": 4.833462238311768, "learning_rate": 2.838773029084448e-06, "loss": 0.0094, "step": 232100 }, { "epoch": 21.674600952114254, "grad_norm": 2.2435412406921387, "learning_rate": 2.837837837837838e-06, "loss": 0.0096, "step": 232200 }, { "epoch": 21.683935405582, "grad_norm": 3.2109546661376953, "learning_rate": 2.836902646591228e-06, "loss": 0.0108, "step": 232300 }, { "epoch": 21.693269859049753, "grad_norm": 4.200677394866943, "learning_rate": 2.835967455344618e-06, "loss": 0.0104, "step": 232400 }, { "epoch": 21.7026043125175, "grad_norm": 1.5365921258926392, "learning_rate": 2.835032264098008e-06, "loss": 0.0101, "step": 232500 }, { "epoch": 21.711938765985252, "grad_norm": 0.4495246112346649, "learning_rate": 2.834097072851398e-06, "loss": 0.0107, "step": 232600 }, { "epoch": 21.721273219453, "grad_norm": 0.6687055230140686, "learning_rate": 2.8331618816047882e-06, "loss": 0.0105, "step": 232700 }, { "epoch": 21.73060767292075, "grad_norm": 5.026508331298828, "learning_rate": 2.8322266903581787e-06, "loss": 0.009, "step": 232800 }, { "epoch": 21.7399421263885, "grad_norm": 3.964979410171509, "learning_rate": 2.8312914991115687e-06, "loss": 0.0093, "step": 232900 }, { "epoch": 21.74927657985625, "grad_norm": 2.952591896057129, "learning_rate": 2.8303563078649588e-06, "loss": 0.0098, "step": 233000 }, { "epoch": 21.758611033323998, "grad_norm": 1.844728946685791, "learning_rate": 2.8294211166183488e-06, "loss": 0.013, "step": 233100 }, { "epoch": 21.76794548679175, "grad_norm": 0.05961408466100693, "learning_rate": 2.828485925371739e-06, "loss": 0.0098, "step": 233200 }, { "epoch": 21.777279940259497, "grad_norm": 1.1078156232833862, "learning_rate": 2.827550734125129e-06, "loss": 0.0104, "step": 233300 }, { "epoch": 21.78661439372725, "grad_norm": 1.328568935394287, "learning_rate": 2.8266155428785193e-06, "loss": 0.0101, "step": 233400 }, { "epoch": 21.795948847194996, "grad_norm": 2.251323938369751, "learning_rate": 2.8256803516319085e-06, "loss": 0.0102, "step": 233500 }, { "epoch": 21.805283300662747, "grad_norm": 2.291846752166748, "learning_rate": 2.824745160385299e-06, "loss": 0.011, "step": 233600 }, { "epoch": 21.814617754130495, "grad_norm": 4.906413555145264, "learning_rate": 2.823809969138689e-06, "loss": 0.0093, "step": 233700 }, { "epoch": 21.823952207598246, "grad_norm": 0.19831286370754242, "learning_rate": 2.822874777892079e-06, "loss": 0.0117, "step": 233800 }, { "epoch": 21.833286661065994, "grad_norm": 2.039537191390991, "learning_rate": 2.821939586645469e-06, "loss": 0.0096, "step": 233900 }, { "epoch": 21.842621114533745, "grad_norm": 1.1654855012893677, "learning_rate": 2.821004395398859e-06, "loss": 0.0103, "step": 234000 }, { "epoch": 21.851955568001493, "grad_norm": 3.0695858001708984, "learning_rate": 2.820069204152249e-06, "loss": 0.0093, "step": 234100 }, { "epoch": 21.861290021469244, "grad_norm": 4.405457019805908, "learning_rate": 2.8191340129056395e-06, "loss": 0.0096, "step": 234200 }, { "epoch": 21.870624474936992, "grad_norm": 0.3806150257587433, "learning_rate": 2.8181988216590296e-06, "loss": 0.0111, "step": 234300 }, { "epoch": 21.879958928404744, "grad_norm": 0.7411838173866272, "learning_rate": 2.8172636304124196e-06, "loss": 0.0104, "step": 234400 }, { "epoch": 21.88929338187249, "grad_norm": 1.5457432270050049, "learning_rate": 2.8163284391658096e-06, "loss": 0.0082, "step": 234500 }, { "epoch": 21.898627835340243, "grad_norm": 0.46212518215179443, "learning_rate": 2.8153932479191997e-06, "loss": 0.0093, "step": 234600 }, { "epoch": 21.90796228880799, "grad_norm": 2.4837515354156494, "learning_rate": 2.8144580566725897e-06, "loss": 0.0089, "step": 234700 }, { "epoch": 21.917296742275738, "grad_norm": 0.6134015321731567, "learning_rate": 2.81352286542598e-06, "loss": 0.0111, "step": 234800 }, { "epoch": 21.92663119574349, "grad_norm": 0.6854500770568848, "learning_rate": 2.81258767417937e-06, "loss": 0.0091, "step": 234900 }, { "epoch": 21.935965649211237, "grad_norm": 1.1587570905685425, "learning_rate": 2.81165248293276e-06, "loss": 0.0117, "step": 235000 }, { "epoch": 21.935965649211237, "eval_accuracy": 0.6979929988331388, "eval_f1": 0.8232554857828265, "eval_loss": 0.23708966374397278, "eval_roc_auc": 0.906042216913093, "eval_runtime": 232.4698, "eval_samples_per_second": 184.325, "eval_steps_per_second": 184.325, "step": 235000 }, { "epoch": 21.94530010267899, "grad_norm": 2.946023464202881, "learning_rate": 2.8107172916861502e-06, "loss": 0.0076, "step": 235100 }, { "epoch": 21.954634556146736, "grad_norm": 2.0403950214385986, "learning_rate": 2.8097821004395403e-06, "loss": 0.0113, "step": 235200 }, { "epoch": 21.963969009614488, "grad_norm": 2.307710886001587, "learning_rate": 2.8088469091929303e-06, "loss": 0.01, "step": 235300 }, { "epoch": 21.973303463082235, "grad_norm": 4.641949653625488, "learning_rate": 2.80791171794632e-06, "loss": 0.0102, "step": 235400 }, { "epoch": 21.982637916549987, "grad_norm": 0.09677653759717941, "learning_rate": 2.80697652669971e-06, "loss": 0.0102, "step": 235500 }, { "epoch": 21.991972370017734, "grad_norm": 0.8791242837905884, "learning_rate": 2.8060413354531004e-06, "loss": 0.0102, "step": 235600 }, { "epoch": 22.001306823485486, "grad_norm": 1.5768687725067139, "learning_rate": 2.8051061442064904e-06, "loss": 0.0102, "step": 235700 }, { "epoch": 22.010641276953233, "grad_norm": 2.8897087574005127, "learning_rate": 2.8041709529598805e-06, "loss": 0.009, "step": 235800 }, { "epoch": 22.019975730420985, "grad_norm": 1.656325340270996, "learning_rate": 2.8032357617132705e-06, "loss": 0.0089, "step": 235900 }, { "epoch": 22.029310183888732, "grad_norm": 1.5619853734970093, "learning_rate": 2.8023005704666605e-06, "loss": 0.0077, "step": 236000 }, { "epoch": 22.038644637356484, "grad_norm": 0.18872223794460297, "learning_rate": 2.8013653792200505e-06, "loss": 0.0089, "step": 236100 }, { "epoch": 22.04797909082423, "grad_norm": 0.6643452048301697, "learning_rate": 2.800430187973441e-06, "loss": 0.0079, "step": 236200 }, { "epoch": 22.057313544291983, "grad_norm": 0.2459431141614914, "learning_rate": 2.799494996726831e-06, "loss": 0.0092, "step": 236300 }, { "epoch": 22.06664799775973, "grad_norm": 3.167778730392456, "learning_rate": 2.798559805480221e-06, "loss": 0.0107, "step": 236400 }, { "epoch": 22.075982451227482, "grad_norm": 3.7656450271606445, "learning_rate": 2.797624614233611e-06, "loss": 0.0083, "step": 236500 }, { "epoch": 22.08531690469523, "grad_norm": 1.1774852275848389, "learning_rate": 2.796689422987001e-06, "loss": 0.0097, "step": 236600 }, { "epoch": 22.09465135816298, "grad_norm": 0.335507869720459, "learning_rate": 2.795754231740391e-06, "loss": 0.0082, "step": 236700 }, { "epoch": 22.10398581163073, "grad_norm": 0.445943683385849, "learning_rate": 2.794819040493781e-06, "loss": 0.0105, "step": 236800 }, { "epoch": 22.11332026509848, "grad_norm": 0.3181352913379669, "learning_rate": 2.7938838492471716e-06, "loss": 0.0091, "step": 236900 }, { "epoch": 22.122654718566228, "grad_norm": 0.8648593425750732, "learning_rate": 2.7929486580005617e-06, "loss": 0.0068, "step": 237000 }, { "epoch": 22.13198917203398, "grad_norm": 3.1293702125549316, "learning_rate": 2.7920134667539517e-06, "loss": 0.0072, "step": 237100 }, { "epoch": 22.141323625501727, "grad_norm": 1.7336819171905518, "learning_rate": 2.7910782755073413e-06, "loss": 0.0078, "step": 237200 }, { "epoch": 22.150658078969478, "grad_norm": 4.073586940765381, "learning_rate": 2.7901430842607313e-06, "loss": 0.0089, "step": 237300 }, { "epoch": 22.159992532437226, "grad_norm": 0.15928728878498077, "learning_rate": 2.7892078930141214e-06, "loss": 0.0085, "step": 237400 }, { "epoch": 22.169326985904974, "grad_norm": 0.5549577474594116, "learning_rate": 2.7882727017675114e-06, "loss": 0.0085, "step": 237500 }, { "epoch": 22.178661439372725, "grad_norm": 1.005282998085022, "learning_rate": 2.7873375105209014e-06, "loss": 0.0103, "step": 237600 }, { "epoch": 22.187995892840473, "grad_norm": 1.6500416994094849, "learning_rate": 2.786402319274292e-06, "loss": 0.0106, "step": 237700 }, { "epoch": 22.197330346308224, "grad_norm": 0.04180588945746422, "learning_rate": 2.785467128027682e-06, "loss": 0.0077, "step": 237800 }, { "epoch": 22.20666479977597, "grad_norm": 0.24256671965122223, "learning_rate": 2.784531936781072e-06, "loss": 0.0094, "step": 237900 }, { "epoch": 22.215999253243723, "grad_norm": 0.1912059783935547, "learning_rate": 2.783596745534462e-06, "loss": 0.0091, "step": 238000 }, { "epoch": 22.22533370671147, "grad_norm": 3.321178436279297, "learning_rate": 2.782661554287852e-06, "loss": 0.0096, "step": 238100 }, { "epoch": 22.234668160179222, "grad_norm": 2.9821722507476807, "learning_rate": 2.781726363041242e-06, "loss": 0.0086, "step": 238200 }, { "epoch": 22.24400261364697, "grad_norm": 4.461535453796387, "learning_rate": 2.7807911717946325e-06, "loss": 0.0088, "step": 238300 }, { "epoch": 22.25333706711472, "grad_norm": 0.976064145565033, "learning_rate": 2.7798559805480225e-06, "loss": 0.0098, "step": 238400 }, { "epoch": 22.26267152058247, "grad_norm": 2.7385504245758057, "learning_rate": 2.7789207893014126e-06, "loss": 0.0122, "step": 238500 }, { "epoch": 22.27200597405022, "grad_norm": 0.2045956552028656, "learning_rate": 2.7779855980548026e-06, "loss": 0.0078, "step": 238600 }, { "epoch": 22.281340427517968, "grad_norm": 1.0908880233764648, "learning_rate": 2.7770504068081926e-06, "loss": 0.0081, "step": 238700 }, { "epoch": 22.29067488098572, "grad_norm": 0.9778925776481628, "learning_rate": 2.7761152155615826e-06, "loss": 0.0079, "step": 238800 }, { "epoch": 22.300009334453467, "grad_norm": 0.24620242416858673, "learning_rate": 2.775180024314973e-06, "loss": 0.0072, "step": 238900 }, { "epoch": 22.309343787921218, "grad_norm": 2.872539520263672, "learning_rate": 2.7742448330683623e-06, "loss": 0.0097, "step": 239000 }, { "epoch": 22.318678241388966, "grad_norm": 5.224985122680664, "learning_rate": 2.7733096418217527e-06, "loss": 0.0114, "step": 239100 }, { "epoch": 22.328012694856717, "grad_norm": 4.335788249969482, "learning_rate": 2.7723744505751428e-06, "loss": 0.0082, "step": 239200 }, { "epoch": 22.337347148324465, "grad_norm": 0.03236832097172737, "learning_rate": 2.771439259328533e-06, "loss": 0.0062, "step": 239300 }, { "epoch": 22.346681601792216, "grad_norm": 4.676285266876221, "learning_rate": 2.770504068081923e-06, "loss": 0.008, "step": 239400 }, { "epoch": 22.356016055259964, "grad_norm": 2.5839505195617676, "learning_rate": 2.769568876835313e-06, "loss": 0.0104, "step": 239500 }, { "epoch": 22.365350508727715, "grad_norm": 1.0903849601745605, "learning_rate": 2.768633685588703e-06, "loss": 0.0084, "step": 239600 }, { "epoch": 22.374684962195463, "grad_norm": 0.5908926725387573, "learning_rate": 2.7676984943420933e-06, "loss": 0.0082, "step": 239700 }, { "epoch": 22.384019415663214, "grad_norm": 2.508406162261963, "learning_rate": 2.7667633030954834e-06, "loss": 0.0097, "step": 239800 }, { "epoch": 22.393353869130962, "grad_norm": 2.885796308517456, "learning_rate": 2.7658281118488734e-06, "loss": 0.0089, "step": 239900 }, { "epoch": 22.402688322598713, "grad_norm": 1.1489471197128296, "learning_rate": 2.7648929206022634e-06, "loss": 0.008, "step": 240000 }, { "epoch": 22.402688322598713, "eval_accuracy": 0.696686114352392, "eval_f1": 0.8246448813938843, "eval_loss": 0.2465812861919403, "eval_roc_auc": 0.9097134426318498, "eval_runtime": 262.3933, "eval_samples_per_second": 163.304, "eval_steps_per_second": 163.304, "step": 240000 }, { "epoch": 22.41202277606646, "grad_norm": 0.4649658501148224, "learning_rate": 2.7639577293556535e-06, "loss": 0.0101, "step": 240100 }, { "epoch": 22.421357229534213, "grad_norm": 3.2833521366119385, "learning_rate": 2.7630225381090435e-06, "loss": 0.0119, "step": 240200 }, { "epoch": 22.43069168300196, "grad_norm": 4.503511905670166, "learning_rate": 2.7620873468624335e-06, "loss": 0.0098, "step": 240300 }, { "epoch": 22.440026136469708, "grad_norm": 2.5887227058410645, "learning_rate": 2.761152155615824e-06, "loss": 0.0094, "step": 240400 }, { "epoch": 22.44936058993746, "grad_norm": 0.6591362357139587, "learning_rate": 2.760216964369214e-06, "loss": 0.0077, "step": 240500 }, { "epoch": 22.458695043405207, "grad_norm": 0.45134037733078003, "learning_rate": 2.759281773122604e-06, "loss": 0.0092, "step": 240600 }, { "epoch": 22.46802949687296, "grad_norm": 0.6094759702682495, "learning_rate": 2.758346581875994e-06, "loss": 0.0081, "step": 240700 }, { "epoch": 22.477363950340706, "grad_norm": 3.199056386947632, "learning_rate": 2.7574113906293837e-06, "loss": 0.0082, "step": 240800 }, { "epoch": 22.486698403808457, "grad_norm": 1.2377862930297852, "learning_rate": 2.7564761993827737e-06, "loss": 0.0091, "step": 240900 }, { "epoch": 22.496032857276205, "grad_norm": 3.6688778400421143, "learning_rate": 2.7555410081361637e-06, "loss": 0.0077, "step": 241000 }, { "epoch": 22.505367310743956, "grad_norm": 4.539977550506592, "learning_rate": 2.7546058168895538e-06, "loss": 0.0083, "step": 241100 }, { "epoch": 22.514701764211704, "grad_norm": 1.3210628032684326, "learning_rate": 2.7536706256429442e-06, "loss": 0.0075, "step": 241200 }, { "epoch": 22.524036217679456, "grad_norm": 1.051027774810791, "learning_rate": 2.7527354343963343e-06, "loss": 0.0084, "step": 241300 }, { "epoch": 22.533370671147203, "grad_norm": 2.7243571281433105, "learning_rate": 2.7518002431497243e-06, "loss": 0.0087, "step": 241400 }, { "epoch": 22.542705124614955, "grad_norm": 3.317124843597412, "learning_rate": 2.7508650519031143e-06, "loss": 0.0097, "step": 241500 }, { "epoch": 22.552039578082702, "grad_norm": 2.9210522174835205, "learning_rate": 2.7499298606565044e-06, "loss": 0.0093, "step": 241600 }, { "epoch": 22.561374031550454, "grad_norm": 2.5740225315093994, "learning_rate": 2.7489946694098944e-06, "loss": 0.0059, "step": 241700 }, { "epoch": 22.5707084850182, "grad_norm": 2.3780853748321533, "learning_rate": 2.748059478163285e-06, "loss": 0.0097, "step": 241800 }, { "epoch": 22.580042938485953, "grad_norm": 0.25585928559303284, "learning_rate": 2.747124286916675e-06, "loss": 0.0088, "step": 241900 }, { "epoch": 22.5893773919537, "grad_norm": 0.3444155752658844, "learning_rate": 2.746189095670065e-06, "loss": 0.0107, "step": 242000 }, { "epoch": 22.59871184542145, "grad_norm": 1.2657642364501953, "learning_rate": 2.745253904423455e-06, "loss": 0.0083, "step": 242100 }, { "epoch": 22.6080462988892, "grad_norm": 1.03476083278656, "learning_rate": 2.744318713176845e-06, "loss": 0.0097, "step": 242200 }, { "epoch": 22.61738075235695, "grad_norm": 0.019354505464434624, "learning_rate": 2.743383521930235e-06, "loss": 0.0091, "step": 242300 }, { "epoch": 22.6267152058247, "grad_norm": 0.25771036744117737, "learning_rate": 2.7424483306836254e-06, "loss": 0.0083, "step": 242400 }, { "epoch": 22.63604965929245, "grad_norm": 0.03111942857503891, "learning_rate": 2.7415131394370155e-06, "loss": 0.0081, "step": 242500 }, { "epoch": 22.645384112760198, "grad_norm": 0.9412291646003723, "learning_rate": 2.740577948190405e-06, "loss": 0.0086, "step": 242600 }, { "epoch": 22.65471856622795, "grad_norm": 1.5487630367279053, "learning_rate": 2.739642756943795e-06, "loss": 0.0088, "step": 242700 }, { "epoch": 22.664053019695697, "grad_norm": 7.323740005493164, "learning_rate": 2.738707565697185e-06, "loss": 0.0115, "step": 242800 }, { "epoch": 22.673387473163448, "grad_norm": 2.9414620399475098, "learning_rate": 2.737772374450575e-06, "loss": 0.0127, "step": 242900 }, { "epoch": 22.682721926631196, "grad_norm": 0.7261058688163757, "learning_rate": 2.736837183203965e-06, "loss": 0.0086, "step": 243000 }, { "epoch": 22.692056380098947, "grad_norm": 1.4305150508880615, "learning_rate": 2.7359019919573552e-06, "loss": 0.0082, "step": 243100 }, { "epoch": 22.701390833566695, "grad_norm": 3.1735951900482178, "learning_rate": 2.7349668007107457e-06, "loss": 0.0085, "step": 243200 }, { "epoch": 22.710725287034442, "grad_norm": 2.274240016937256, "learning_rate": 2.7340316094641357e-06, "loss": 0.0113, "step": 243300 }, { "epoch": 22.720059740502194, "grad_norm": 0.24289946258068085, "learning_rate": 2.7330964182175257e-06, "loss": 0.0106, "step": 243400 }, { "epoch": 22.72939419396994, "grad_norm": 1.2864363193511963, "learning_rate": 2.7321612269709158e-06, "loss": 0.0092, "step": 243500 }, { "epoch": 22.738728647437693, "grad_norm": 0.10559940338134766, "learning_rate": 2.731226035724306e-06, "loss": 0.01, "step": 243600 }, { "epoch": 22.74806310090544, "grad_norm": 4.175566673278809, "learning_rate": 2.730290844477696e-06, "loss": 0.0093, "step": 243700 }, { "epoch": 22.757397554373192, "grad_norm": 0.8294838070869446, "learning_rate": 2.7293556532310863e-06, "loss": 0.0082, "step": 243800 }, { "epoch": 22.76673200784094, "grad_norm": 3.1862339973449707, "learning_rate": 2.7284204619844763e-06, "loss": 0.0088, "step": 243900 }, { "epoch": 22.77606646130869, "grad_norm": 0.8197754621505737, "learning_rate": 2.7274852707378664e-06, "loss": 0.0081, "step": 244000 }, { "epoch": 22.78540091477644, "grad_norm": 1.9567484855651855, "learning_rate": 2.7265500794912564e-06, "loss": 0.0114, "step": 244100 }, { "epoch": 22.79473536824419, "grad_norm": 1.832493782043457, "learning_rate": 2.7256148882446464e-06, "loss": 0.008, "step": 244200 }, { "epoch": 22.804069821711938, "grad_norm": 0.5847102999687195, "learning_rate": 2.7246796969980364e-06, "loss": 0.01, "step": 244300 }, { "epoch": 22.81340427517969, "grad_norm": 5.5233154296875, "learning_rate": 2.723744505751426e-06, "loss": 0.0072, "step": 244400 }, { "epoch": 22.822738728647437, "grad_norm": 5.907074928283691, "learning_rate": 2.722809314504816e-06, "loss": 0.0077, "step": 244500 }, { "epoch": 22.832073182115188, "grad_norm": 0.04999824985861778, "learning_rate": 2.721874123258206e-06, "loss": 0.008, "step": 244600 }, { "epoch": 22.841407635582936, "grad_norm": 2.035435676574707, "learning_rate": 2.7209389320115966e-06, "loss": 0.0108, "step": 244700 }, { "epoch": 22.850742089050687, "grad_norm": 4.114819049835205, "learning_rate": 2.7200037407649866e-06, "loss": 0.0069, "step": 244800 }, { "epoch": 22.860076542518435, "grad_norm": 0.43811845779418945, "learning_rate": 2.7190685495183766e-06, "loss": 0.0093, "step": 244900 }, { "epoch": 22.869410995986186, "grad_norm": 1.2074834108352661, "learning_rate": 2.7181333582717667e-06, "loss": 0.0095, "step": 245000 }, { "epoch": 22.869410995986186, "eval_accuracy": 0.695075845974329, "eval_f1": 0.8221121388803647, "eval_loss": 0.24782489240169525, "eval_roc_auc": 0.9074297196450435, "eval_runtime": 301.446, "eval_samples_per_second": 142.148, "eval_steps_per_second": 142.148, "step": 245000 }, { "epoch": 22.878745449453934, "grad_norm": 5.067676067352295, "learning_rate": 2.7171981670251567e-06, "loss": 0.0092, "step": 245100 }, { "epoch": 22.888079902921685, "grad_norm": 2.439814805984497, "learning_rate": 2.7162629757785467e-06, "loss": 0.0073, "step": 245200 }, { "epoch": 22.897414356389433, "grad_norm": 0.03324517980217934, "learning_rate": 2.715327784531937e-06, "loss": 0.0097, "step": 245300 }, { "epoch": 22.906748809857184, "grad_norm": 0.7647516131401062, "learning_rate": 2.714392593285327e-06, "loss": 0.0088, "step": 245400 }, { "epoch": 22.916083263324932, "grad_norm": 6.663356781005859, "learning_rate": 2.7134574020387172e-06, "loss": 0.01, "step": 245500 }, { "epoch": 22.925417716792683, "grad_norm": 0.252411425113678, "learning_rate": 2.7125222107921073e-06, "loss": 0.0083, "step": 245600 }, { "epoch": 22.93475217026043, "grad_norm": 1.4065412282943726, "learning_rate": 2.7115870195454973e-06, "loss": 0.0082, "step": 245700 }, { "epoch": 22.944086623728182, "grad_norm": 1.731519103050232, "learning_rate": 2.7106518282988873e-06, "loss": 0.0093, "step": 245800 }, { "epoch": 22.95342107719593, "grad_norm": 0.11390013247728348, "learning_rate": 2.7097166370522778e-06, "loss": 0.0124, "step": 245900 }, { "epoch": 22.962755530663678, "grad_norm": 0.2485383152961731, "learning_rate": 2.708781445805668e-06, "loss": 0.0064, "step": 246000 }, { "epoch": 22.97208998413143, "grad_norm": 0.36099565029144287, "learning_rate": 2.707846254559058e-06, "loss": 0.0061, "step": 246100 }, { "epoch": 22.981424437599177, "grad_norm": 4.138914108276367, "learning_rate": 2.7069110633124475e-06, "loss": 0.0113, "step": 246200 }, { "epoch": 22.99075889106693, "grad_norm": 0.18705497682094574, "learning_rate": 2.7059758720658375e-06, "loss": 0.0094, "step": 246300 }, { "epoch": 23.000093344534676, "grad_norm": 0.6703884601593018, "learning_rate": 2.7050406808192275e-06, "loss": 0.0083, "step": 246400 }, { "epoch": 23.009427798002427, "grad_norm": 3.345000982284546, "learning_rate": 2.7041054895726175e-06, "loss": 0.0079, "step": 246500 }, { "epoch": 23.018762251470175, "grad_norm": 4.730309009552002, "learning_rate": 2.7031702983260076e-06, "loss": 0.0081, "step": 246600 }, { "epoch": 23.028096704937926, "grad_norm": 0.05815302953124046, "learning_rate": 2.702235107079398e-06, "loss": 0.0074, "step": 246700 }, { "epoch": 23.037431158405674, "grad_norm": 0.0405343621969223, "learning_rate": 2.701299915832788e-06, "loss": 0.0062, "step": 246800 }, { "epoch": 23.046765611873425, "grad_norm": 3.718137741088867, "learning_rate": 2.700364724586178e-06, "loss": 0.0064, "step": 246900 }, { "epoch": 23.056100065341173, "grad_norm": 2.3593664169311523, "learning_rate": 2.699429533339568e-06, "loss": 0.0072, "step": 247000 }, { "epoch": 23.065434518808924, "grad_norm": 3.9024674892425537, "learning_rate": 2.698494342092958e-06, "loss": 0.0087, "step": 247100 }, { "epoch": 23.074768972276672, "grad_norm": 6.284780979156494, "learning_rate": 2.697559150846348e-06, "loss": 0.007, "step": 247200 }, { "epoch": 23.084103425744424, "grad_norm": 0.26539677381515503, "learning_rate": 2.6966239595997386e-06, "loss": 0.0079, "step": 247300 }, { "epoch": 23.09343787921217, "grad_norm": 2.286700487136841, "learning_rate": 2.6956887683531287e-06, "loss": 0.0075, "step": 247400 }, { "epoch": 23.102772332679923, "grad_norm": 2.6349594593048096, "learning_rate": 2.6947535771065187e-06, "loss": 0.0078, "step": 247500 }, { "epoch": 23.11210678614767, "grad_norm": 0.004243507049977779, "learning_rate": 2.6938183858599087e-06, "loss": 0.0068, "step": 247600 }, { "epoch": 23.12144123961542, "grad_norm": 1.6838949918746948, "learning_rate": 2.6928831946132988e-06, "loss": 0.0077, "step": 247700 }, { "epoch": 23.13077569308317, "grad_norm": 2.5331249237060547, "learning_rate": 2.691948003366689e-06, "loss": 0.0094, "step": 247800 }, { "epoch": 23.14011014655092, "grad_norm": 0.7572336792945862, "learning_rate": 2.691012812120079e-06, "loss": 0.0071, "step": 247900 }, { "epoch": 23.14944460001867, "grad_norm": 0.435373455286026, "learning_rate": 2.6900776208734684e-06, "loss": 0.0064, "step": 248000 }, { "epoch": 23.15877905348642, "grad_norm": 1.4566682577133179, "learning_rate": 2.689142429626859e-06, "loss": 0.0065, "step": 248100 }, { "epoch": 23.168113506954167, "grad_norm": 0.01950191892683506, "learning_rate": 2.688207238380249e-06, "loss": 0.0093, "step": 248200 }, { "epoch": 23.17744796042192, "grad_norm": 0.2946583330631256, "learning_rate": 2.687272047133639e-06, "loss": 0.0073, "step": 248300 }, { "epoch": 23.186782413889667, "grad_norm": 0.279318630695343, "learning_rate": 2.686336855887029e-06, "loss": 0.0078, "step": 248400 }, { "epoch": 23.196116867357418, "grad_norm": 0.4605204463005066, "learning_rate": 2.685401664640419e-06, "loss": 0.0075, "step": 248500 }, { "epoch": 23.205451320825166, "grad_norm": 3.8126778602600098, "learning_rate": 2.684466473393809e-06, "loss": 0.0108, "step": 248600 }, { "epoch": 23.214785774292917, "grad_norm": 1.2195473909378052, "learning_rate": 2.683531282147199e-06, "loss": 0.0076, "step": 248700 }, { "epoch": 23.224120227760665, "grad_norm": 1.5057649612426758, "learning_rate": 2.6825960909005895e-06, "loss": 0.0069, "step": 248800 }, { "epoch": 23.233454681228412, "grad_norm": 2.272029161453247, "learning_rate": 2.6816608996539796e-06, "loss": 0.0067, "step": 248900 }, { "epoch": 23.242789134696164, "grad_norm": 1.7880960702896118, "learning_rate": 2.6807257084073696e-06, "loss": 0.0078, "step": 249000 }, { "epoch": 23.25212358816391, "grad_norm": 1.062099814414978, "learning_rate": 2.6797905171607596e-06, "loss": 0.0065, "step": 249100 }, { "epoch": 23.261458041631663, "grad_norm": 1.0031805038452148, "learning_rate": 2.6788553259141496e-06, "loss": 0.0075, "step": 249200 }, { "epoch": 23.27079249509941, "grad_norm": 0.8828320503234863, "learning_rate": 2.6779201346675397e-06, "loss": 0.0068, "step": 249300 }, { "epoch": 23.280126948567162, "grad_norm": 2.2610113620758057, "learning_rate": 2.67698494342093e-06, "loss": 0.008, "step": 249400 }, { "epoch": 23.28946140203491, "grad_norm": 0.8092742562294006, "learning_rate": 2.67604975217432e-06, "loss": 0.0098, "step": 249500 }, { "epoch": 23.29879585550266, "grad_norm": 1.0663777589797974, "learning_rate": 2.67511456092771e-06, "loss": 0.0079, "step": 249600 }, { "epoch": 23.30813030897041, "grad_norm": 3.8998498916625977, "learning_rate": 2.6741793696811002e-06, "loss": 0.0069, "step": 249700 }, { "epoch": 23.31746476243816, "grad_norm": 17.152511596679688, "learning_rate": 2.67324417843449e-06, "loss": 0.0083, "step": 249800 }, { "epoch": 23.326799215905908, "grad_norm": 0.055109020322561264, "learning_rate": 2.67230898718788e-06, "loss": 0.0081, "step": 249900 }, { "epoch": 23.33613366937366, "grad_norm": 0.25019097328186035, "learning_rate": 2.67137379594127e-06, "loss": 0.0073, "step": 250000 }, { "epoch": 23.33613366937366, "eval_accuracy": 0.6946791131855309, "eval_f1": 0.826290673202671, "eval_loss": 0.26000723242759705, "eval_roc_auc": 0.9141305764080829, "eval_runtime": 356.4852, "eval_samples_per_second": 120.201, "eval_steps_per_second": 120.201, "step": 250000 }, { "epoch": 23.345468122841407, "grad_norm": 2.367158889770508, "learning_rate": 2.67043860469466e-06, "loss": 0.008, "step": 250100 }, { "epoch": 23.354802576309158, "grad_norm": 0.08930651843547821, "learning_rate": 2.6695034134480504e-06, "loss": 0.0054, "step": 250200 }, { "epoch": 23.364137029776906, "grad_norm": 1.6954704523086548, "learning_rate": 2.6685682222014404e-06, "loss": 0.0083, "step": 250300 }, { "epoch": 23.373471483244657, "grad_norm": 1.3407785892486572, "learning_rate": 2.6676330309548304e-06, "loss": 0.0073, "step": 250400 }, { "epoch": 23.382805936712405, "grad_norm": 2.3496150970458984, "learning_rate": 2.6666978397082205e-06, "loss": 0.0081, "step": 250500 }, { "epoch": 23.392140390180156, "grad_norm": 1.5433318614959717, "learning_rate": 2.6657626484616105e-06, "loss": 0.0086, "step": 250600 }, { "epoch": 23.401474843647904, "grad_norm": 2.796708345413208, "learning_rate": 2.6648274572150005e-06, "loss": 0.0058, "step": 250700 }, { "epoch": 23.410809297115655, "grad_norm": 1.1639217138290405, "learning_rate": 2.663892265968391e-06, "loss": 0.0078, "step": 250800 }, { "epoch": 23.420143750583403, "grad_norm": 0.07552025467157364, "learning_rate": 2.662957074721781e-06, "loss": 0.0086, "step": 250900 }, { "epoch": 23.429478204051154, "grad_norm": 3.7005457878112793, "learning_rate": 2.662021883475171e-06, "loss": 0.0081, "step": 251000 }, { "epoch": 23.438812657518902, "grad_norm": 4.045814037322998, "learning_rate": 2.661086692228561e-06, "loss": 0.0082, "step": 251100 }, { "epoch": 23.448147110986653, "grad_norm": 0.009038818068802357, "learning_rate": 2.660151500981951e-06, "loss": 0.0081, "step": 251200 }, { "epoch": 23.4574815644544, "grad_norm": 2.2433531284332275, "learning_rate": 2.659216309735341e-06, "loss": 0.0076, "step": 251300 }, { "epoch": 23.466816017922152, "grad_norm": 0.7887830138206482, "learning_rate": 2.6582811184887316e-06, "loss": 0.007, "step": 251400 }, { "epoch": 23.4761504713899, "grad_norm": 3.518347978591919, "learning_rate": 2.6573459272421216e-06, "loss": 0.0093, "step": 251500 }, { "epoch": 23.48548492485765, "grad_norm": 8.212498664855957, "learning_rate": 2.6564107359955112e-06, "loss": 0.0118, "step": 251600 }, { "epoch": 23.4948193783254, "grad_norm": 0.7889426350593567, "learning_rate": 2.6554755447489013e-06, "loss": 0.0079, "step": 251700 }, { "epoch": 23.504153831793147, "grad_norm": 2.2418439388275146, "learning_rate": 2.6545403535022913e-06, "loss": 0.007, "step": 251800 }, { "epoch": 23.513488285260898, "grad_norm": 1.915453553199768, "learning_rate": 2.6536051622556813e-06, "loss": 0.0061, "step": 251900 }, { "epoch": 23.522822738728646, "grad_norm": 1.7590463161468506, "learning_rate": 2.6526699710090713e-06, "loss": 0.0085, "step": 252000 }, { "epoch": 23.532157192196397, "grad_norm": 0.04964792728424072, "learning_rate": 2.6517347797624614e-06, "loss": 0.0093, "step": 252100 }, { "epoch": 23.541491645664145, "grad_norm": 0.24132844805717468, "learning_rate": 2.6507995885158514e-06, "loss": 0.0081, "step": 252200 }, { "epoch": 23.550826099131896, "grad_norm": 2.1206393241882324, "learning_rate": 2.649864397269242e-06, "loss": 0.008, "step": 252300 }, { "epoch": 23.560160552599644, "grad_norm": 0.833889901638031, "learning_rate": 2.648929206022632e-06, "loss": 0.0073, "step": 252400 }, { "epoch": 23.569495006067395, "grad_norm": 2.8276405334472656, "learning_rate": 2.647994014776022e-06, "loss": 0.008, "step": 252500 }, { "epoch": 23.578829459535143, "grad_norm": 2.8553996086120605, "learning_rate": 2.647058823529412e-06, "loss": 0.0093, "step": 252600 }, { "epoch": 23.588163913002894, "grad_norm": 3.2782905101776123, "learning_rate": 2.646123632282802e-06, "loss": 0.0069, "step": 252700 }, { "epoch": 23.597498366470642, "grad_norm": 2.37542462348938, "learning_rate": 2.645188441036192e-06, "loss": 0.0083, "step": 252800 }, { "epoch": 23.606832819938393, "grad_norm": 0.04438645392656326, "learning_rate": 2.6442532497895825e-06, "loss": 0.0088, "step": 252900 }, { "epoch": 23.61616727340614, "grad_norm": 0.166884183883667, "learning_rate": 2.6433180585429725e-06, "loss": 0.008, "step": 253000 }, { "epoch": 23.625501726873892, "grad_norm": 4.574504852294922, "learning_rate": 2.6423828672963625e-06, "loss": 0.0097, "step": 253100 }, { "epoch": 23.63483618034164, "grad_norm": 1.6976369619369507, "learning_rate": 2.6414476760497526e-06, "loss": 0.0097, "step": 253200 }, { "epoch": 23.64417063380939, "grad_norm": 3.7160584926605225, "learning_rate": 2.6405124848031426e-06, "loss": 0.0078, "step": 253300 }, { "epoch": 23.65350508727714, "grad_norm": 0.04494257643818855, "learning_rate": 2.639577293556532e-06, "loss": 0.0091, "step": 253400 }, { "epoch": 23.66283954074489, "grad_norm": 0.28456199169158936, "learning_rate": 2.6386421023099222e-06, "loss": 0.0078, "step": 253500 }, { "epoch": 23.67217399421264, "grad_norm": 4.652538299560547, "learning_rate": 2.6377069110633123e-06, "loss": 0.0109, "step": 253600 }, { "epoch": 23.68150844768039, "grad_norm": 2.0537819862365723, "learning_rate": 2.6367717198167027e-06, "loss": 0.0076, "step": 253700 }, { "epoch": 23.690842901148137, "grad_norm": 0.2058487832546234, "learning_rate": 2.6358365285700927e-06, "loss": 0.0068, "step": 253800 }, { "epoch": 23.70017735461589, "grad_norm": 5.873623371124268, "learning_rate": 2.6349013373234828e-06, "loss": 0.0071, "step": 253900 }, { "epoch": 23.709511808083636, "grad_norm": 0.5168663859367371, "learning_rate": 2.633966146076873e-06, "loss": 0.0095, "step": 254000 }, { "epoch": 23.718846261551388, "grad_norm": 1.8284802436828613, "learning_rate": 2.633030954830263e-06, "loss": 0.0107, "step": 254100 }, { "epoch": 23.728180715019136, "grad_norm": 1.8792469501495361, "learning_rate": 2.632095763583653e-06, "loss": 0.0079, "step": 254200 }, { "epoch": 23.737515168486887, "grad_norm": 0.4849226772785187, "learning_rate": 2.6311605723370433e-06, "loss": 0.0059, "step": 254300 }, { "epoch": 23.746849621954635, "grad_norm": 0.23808689415454865, "learning_rate": 2.6302253810904334e-06, "loss": 0.009, "step": 254400 }, { "epoch": 23.756184075422382, "grad_norm": 0.7349711656570435, "learning_rate": 2.6292901898438234e-06, "loss": 0.0072, "step": 254500 }, { "epoch": 23.765518528890134, "grad_norm": 3.414299249649048, "learning_rate": 2.6283549985972134e-06, "loss": 0.0099, "step": 254600 }, { "epoch": 23.77485298235788, "grad_norm": 4.046874523162842, "learning_rate": 2.6274198073506034e-06, "loss": 0.0071, "step": 254700 }, { "epoch": 23.784187435825633, "grad_norm": 1.7586009502410889, "learning_rate": 2.6264846161039935e-06, "loss": 0.01, "step": 254800 }, { "epoch": 23.79352188929338, "grad_norm": 0.04644396901130676, "learning_rate": 2.625549424857384e-06, "loss": 0.0081, "step": 254900 }, { "epoch": 23.80285634276113, "grad_norm": 0.23363465070724487, "learning_rate": 2.624614233610774e-06, "loss": 0.0081, "step": 255000 }, { "epoch": 23.80285634276113, "eval_accuracy": 0.6951925320886815, "eval_f1": 0.8220177999788933, "eval_loss": 0.25851863622665405, "eval_roc_auc": 0.9063216072299441, "eval_runtime": 424.7284, "eval_samples_per_second": 100.888, "eval_steps_per_second": 100.888, "step": 255000 }, { "epoch": 23.81219079622888, "grad_norm": 0.42949363589286804, "learning_rate": 2.623679042364164e-06, "loss": 0.0091, "step": 255100 }, { "epoch": 23.82152524969663, "grad_norm": 0.17986011505126953, "learning_rate": 2.622743851117554e-06, "loss": 0.0075, "step": 255200 }, { "epoch": 23.83085970316438, "grad_norm": 0.2982785105705261, "learning_rate": 2.6218086598709436e-06, "loss": 0.0094, "step": 255300 }, { "epoch": 23.84019415663213, "grad_norm": 0.18835392594337463, "learning_rate": 2.6208734686243337e-06, "loss": 0.0107, "step": 255400 }, { "epoch": 23.849528610099878, "grad_norm": 0.4307057857513428, "learning_rate": 2.6199382773777237e-06, "loss": 0.0081, "step": 255500 }, { "epoch": 23.85886306356763, "grad_norm": 0.9793701171875, "learning_rate": 2.6190030861311137e-06, "loss": 0.0093, "step": 255600 }, { "epoch": 23.868197517035377, "grad_norm": 1.1051058769226074, "learning_rate": 2.618067894884504e-06, "loss": 0.0078, "step": 255700 }, { "epoch": 23.877531970503128, "grad_norm": 0.7857577204704285, "learning_rate": 2.617132703637894e-06, "loss": 0.0074, "step": 255800 }, { "epoch": 23.886866423970876, "grad_norm": 0.05473601445555687, "learning_rate": 2.6161975123912842e-06, "loss": 0.0084, "step": 255900 }, { "epoch": 23.896200877438627, "grad_norm": 1.1726365089416504, "learning_rate": 2.6152623211446743e-06, "loss": 0.0089, "step": 256000 }, { "epoch": 23.905535330906375, "grad_norm": 3.5698060989379883, "learning_rate": 2.6143271298980643e-06, "loss": 0.0075, "step": 256100 }, { "epoch": 23.914869784374126, "grad_norm": 3.2296080589294434, "learning_rate": 2.6133919386514543e-06, "loss": 0.008, "step": 256200 }, { "epoch": 23.924204237841874, "grad_norm": 1.5735024213790894, "learning_rate": 2.6124567474048444e-06, "loss": 0.0105, "step": 256300 }, { "epoch": 23.933538691309625, "grad_norm": 0.8865410089492798, "learning_rate": 2.611521556158235e-06, "loss": 0.0093, "step": 256400 }, { "epoch": 23.942873144777373, "grad_norm": 2.307253837585449, "learning_rate": 2.610586364911625e-06, "loss": 0.0088, "step": 256500 }, { "epoch": 23.952207598245124, "grad_norm": 2.439653158187866, "learning_rate": 2.609651173665015e-06, "loss": 0.0063, "step": 256600 }, { "epoch": 23.961542051712872, "grad_norm": 0.2269274741411209, "learning_rate": 2.608715982418405e-06, "loss": 0.0056, "step": 256700 }, { "epoch": 23.970876505180623, "grad_norm": 4.333026885986328, "learning_rate": 2.607780791171795e-06, "loss": 0.0103, "step": 256800 }, { "epoch": 23.98021095864837, "grad_norm": 0.028682956472039223, "learning_rate": 2.606845599925185e-06, "loss": 0.0082, "step": 256900 }, { "epoch": 23.989545412116122, "grad_norm": 0.3388897180557251, "learning_rate": 2.6059104086785754e-06, "loss": 0.0063, "step": 257000 }, { "epoch": 23.99887986558387, "grad_norm": 2.8399789333343506, "learning_rate": 2.6049752174319646e-06, "loss": 0.0085, "step": 257100 }, { "epoch": 24.00821431905162, "grad_norm": 0.08815859258174896, "learning_rate": 2.604040026185355e-06, "loss": 0.007, "step": 257200 }, { "epoch": 24.01754877251937, "grad_norm": 6.499048709869385, "learning_rate": 2.603104834938745e-06, "loss": 0.0086, "step": 257300 }, { "epoch": 24.026883225987117, "grad_norm": 0.49926304817199707, "learning_rate": 2.602169643692135e-06, "loss": 0.0068, "step": 257400 }, { "epoch": 24.036217679454868, "grad_norm": 0.0011052577756345272, "learning_rate": 2.601234452445525e-06, "loss": 0.0066, "step": 257500 }, { "epoch": 24.045552132922616, "grad_norm": 0.026992423459887505, "learning_rate": 2.600299261198915e-06, "loss": 0.0069, "step": 257600 }, { "epoch": 24.054886586390367, "grad_norm": 9.992321968078613, "learning_rate": 2.5993640699523052e-06, "loss": 0.0077, "step": 257700 }, { "epoch": 24.064221039858115, "grad_norm": 3.7268459796905518, "learning_rate": 2.5984288787056957e-06, "loss": 0.0062, "step": 257800 }, { "epoch": 24.073555493325866, "grad_norm": 0.14281931519508362, "learning_rate": 2.5974936874590857e-06, "loss": 0.008, "step": 257900 }, { "epoch": 24.082889946793614, "grad_norm": 1.3403759002685547, "learning_rate": 2.5965584962124757e-06, "loss": 0.0066, "step": 258000 }, { "epoch": 24.092224400261365, "grad_norm": 0.044313568621873856, "learning_rate": 2.5956233049658658e-06, "loss": 0.0057, "step": 258100 }, { "epoch": 24.101558853729113, "grad_norm": 0.24705880880355835, "learning_rate": 2.594688113719256e-06, "loss": 0.0056, "step": 258200 }, { "epoch": 24.110893307196864, "grad_norm": 0.1125391349196434, "learning_rate": 2.593752922472646e-06, "loss": 0.0064, "step": 258300 }, { "epoch": 24.120227760664612, "grad_norm": 1.9573873281478882, "learning_rate": 2.5928177312260363e-06, "loss": 0.0068, "step": 258400 }, { "epoch": 24.129562214132363, "grad_norm": 2.889387607574463, "learning_rate": 2.5918825399794263e-06, "loss": 0.0069, "step": 258500 }, { "epoch": 24.13889666760011, "grad_norm": 0.13158154487609863, "learning_rate": 2.5909473487328163e-06, "loss": 0.0063, "step": 258600 }, { "epoch": 24.148231121067862, "grad_norm": 0.9265145659446716, "learning_rate": 2.5900121574862064e-06, "loss": 0.0062, "step": 258700 }, { "epoch": 24.15756557453561, "grad_norm": 1.2983254194259644, "learning_rate": 2.5890769662395964e-06, "loss": 0.0082, "step": 258800 }, { "epoch": 24.16690002800336, "grad_norm": 0.05933251604437828, "learning_rate": 2.588141774992986e-06, "loss": 0.0077, "step": 258900 }, { "epoch": 24.17623448147111, "grad_norm": 11.360016822814941, "learning_rate": 2.587206583746376e-06, "loss": 0.007, "step": 259000 }, { "epoch": 24.18556893493886, "grad_norm": 0.2623929977416992, "learning_rate": 2.586271392499766e-06, "loss": 0.007, "step": 259100 }, { "epoch": 24.19490338840661, "grad_norm": 5.160435676574707, "learning_rate": 2.5853362012531565e-06, "loss": 0.0086, "step": 259200 }, { "epoch": 24.20423784187436, "grad_norm": 1.661001205444336, "learning_rate": 2.5844010100065466e-06, "loss": 0.0086, "step": 259300 }, { "epoch": 24.213572295342107, "grad_norm": 0.7222966551780701, "learning_rate": 2.5834658187599366e-06, "loss": 0.0093, "step": 259400 }, { "epoch": 24.22290674880986, "grad_norm": 2.3750224113464355, "learning_rate": 2.5825306275133266e-06, "loss": 0.0093, "step": 259500 }, { "epoch": 24.232241202277606, "grad_norm": 5.280463218688965, "learning_rate": 2.5815954362667166e-06, "loss": 0.0063, "step": 259600 }, { "epoch": 24.241575655745358, "grad_norm": 0.09667915850877762, "learning_rate": 2.5806602450201067e-06, "loss": 0.0088, "step": 259700 }, { "epoch": 24.250910109213105, "grad_norm": 0.8957066535949707, "learning_rate": 2.5797250537734967e-06, "loss": 0.0072, "step": 259800 }, { "epoch": 24.260244562680857, "grad_norm": 4.384435653686523, "learning_rate": 2.578789862526887e-06, "loss": 0.0074, "step": 259900 }, { "epoch": 24.269579016148604, "grad_norm": 3.2903311252593994, "learning_rate": 2.577854671280277e-06, "loss": 0.0076, "step": 260000 }, { "epoch": 24.269579016148604, "eval_accuracy": 0.6985997666277713, "eval_f1": 0.8255226603058791, "eval_loss": 0.26250991225242615, "eval_roc_auc": 0.9093131049894799, "eval_runtime": 434.3265, "eval_samples_per_second": 98.659, "eval_steps_per_second": 98.659, "step": 260000 }, { "epoch": 24.278913469616352, "grad_norm": 3.1639018058776855, "learning_rate": 2.5769194800336672e-06, "loss": 0.0089, "step": 260100 }, { "epoch": 24.288247923084104, "grad_norm": 0.08272681385278702, "learning_rate": 2.5759842887870572e-06, "loss": 0.0072, "step": 260200 }, { "epoch": 24.29758237655185, "grad_norm": 0.6161157488822937, "learning_rate": 2.5750490975404473e-06, "loss": 0.0075, "step": 260300 }, { "epoch": 24.306916830019603, "grad_norm": 7.479611396789551, "learning_rate": 2.5741139062938373e-06, "loss": 0.0059, "step": 260400 }, { "epoch": 24.31625128348735, "grad_norm": 4.853935241699219, "learning_rate": 2.5731787150472278e-06, "loss": 0.0075, "step": 260500 }, { "epoch": 24.3255857369551, "grad_norm": 4.097402572631836, "learning_rate": 2.572243523800618e-06, "loss": 0.0072, "step": 260600 }, { "epoch": 24.33492019042285, "grad_norm": 1.4064775705337524, "learning_rate": 2.5713083325540074e-06, "loss": 0.0086, "step": 260700 }, { "epoch": 24.3442546438906, "grad_norm": 0.6552013754844666, "learning_rate": 2.5703731413073974e-06, "loss": 0.008, "step": 260800 }, { "epoch": 24.35358909735835, "grad_norm": 1.3357467651367188, "learning_rate": 2.5694379500607875e-06, "loss": 0.0052, "step": 260900 }, { "epoch": 24.3629235508261, "grad_norm": 0.00477164750918746, "learning_rate": 2.5685027588141775e-06, "loss": 0.0077, "step": 261000 }, { "epoch": 24.372258004293847, "grad_norm": 3.7783403396606445, "learning_rate": 2.5675675675675675e-06, "loss": 0.0065, "step": 261100 }, { "epoch": 24.3815924577616, "grad_norm": 3.6953794956207275, "learning_rate": 2.5666323763209576e-06, "loss": 0.0069, "step": 261200 }, { "epoch": 24.390926911229347, "grad_norm": 1.4649510383605957, "learning_rate": 2.565697185074348e-06, "loss": 0.0088, "step": 261300 }, { "epoch": 24.400261364697098, "grad_norm": 3.186350107192993, "learning_rate": 2.564761993827738e-06, "loss": 0.0075, "step": 261400 }, { "epoch": 24.409595818164846, "grad_norm": 0.09395711869001389, "learning_rate": 2.563826802581128e-06, "loss": 0.0051, "step": 261500 }, { "epoch": 24.418930271632597, "grad_norm": 1.1817668676376343, "learning_rate": 2.562891611334518e-06, "loss": 0.009, "step": 261600 }, { "epoch": 24.428264725100345, "grad_norm": 0.04600508511066437, "learning_rate": 2.561956420087908e-06, "loss": 0.0068, "step": 261700 }, { "epoch": 24.437599178568096, "grad_norm": 0.8322649002075195, "learning_rate": 2.561021228841298e-06, "loss": 0.0093, "step": 261800 }, { "epoch": 24.446933632035844, "grad_norm": 2.917024850845337, "learning_rate": 2.5600860375946886e-06, "loss": 0.0068, "step": 261900 }, { "epoch": 24.456268085503595, "grad_norm": 0.07194796949625015, "learning_rate": 2.5591508463480786e-06, "loss": 0.0076, "step": 262000 }, { "epoch": 24.465602538971343, "grad_norm": 0.3372882306575775, "learning_rate": 2.5582156551014687e-06, "loss": 0.0083, "step": 262100 }, { "epoch": 24.474936992439094, "grad_norm": 0.533175528049469, "learning_rate": 2.5572804638548587e-06, "loss": 0.0072, "step": 262200 }, { "epoch": 24.484271445906842, "grad_norm": 0.7735044360160828, "learning_rate": 2.5563452726082487e-06, "loss": 0.0074, "step": 262300 }, { "epoch": 24.493605899374593, "grad_norm": 3.0382087230682373, "learning_rate": 2.5554100813616388e-06, "loss": 0.0079, "step": 262400 }, { "epoch": 24.50294035284234, "grad_norm": 2.700773000717163, "learning_rate": 2.5544748901150284e-06, "loss": 0.0061, "step": 262500 }, { "epoch": 24.512274806310092, "grad_norm": 5.228200912475586, "learning_rate": 2.5535396988684184e-06, "loss": 0.007, "step": 262600 }, { "epoch": 24.52160925977784, "grad_norm": 3.3887534141540527, "learning_rate": 2.552604507621809e-06, "loss": 0.008, "step": 262700 }, { "epoch": 24.53094371324559, "grad_norm": 0.046238142997026443, "learning_rate": 2.551669316375199e-06, "loss": 0.007, "step": 262800 }, { "epoch": 24.54027816671334, "grad_norm": 4.592182636260986, "learning_rate": 2.550734125128589e-06, "loss": 0.0069, "step": 262900 }, { "epoch": 24.549612620181087, "grad_norm": 3.3011605739593506, "learning_rate": 2.549798933881979e-06, "loss": 0.008, "step": 263000 }, { "epoch": 24.558947073648838, "grad_norm": 2.1268436908721924, "learning_rate": 2.548863742635369e-06, "loss": 0.0067, "step": 263100 }, { "epoch": 24.568281527116586, "grad_norm": 1.7717981338500977, "learning_rate": 2.547928551388759e-06, "loss": 0.0082, "step": 263200 }, { "epoch": 24.577615980584337, "grad_norm": 2.4972548484802246, "learning_rate": 2.5469933601421495e-06, "loss": 0.0082, "step": 263300 }, { "epoch": 24.586950434052085, "grad_norm": 0.14948517084121704, "learning_rate": 2.5460581688955395e-06, "loss": 0.0092, "step": 263400 }, { "epoch": 24.596284887519836, "grad_norm": 0.9373655319213867, "learning_rate": 2.5451229776489295e-06, "loss": 0.0079, "step": 263500 }, { "epoch": 24.605619340987584, "grad_norm": 0.07930448651313782, "learning_rate": 2.5441877864023196e-06, "loss": 0.0068, "step": 263600 }, { "epoch": 24.614953794455335, "grad_norm": 0.9861688613891602, "learning_rate": 2.5432525951557096e-06, "loss": 0.008, "step": 263700 }, { "epoch": 24.624288247923083, "grad_norm": 0.02831580489873886, "learning_rate": 2.5423174039090996e-06, "loss": 0.0067, "step": 263800 }, { "epoch": 24.633622701390834, "grad_norm": 3.345632791519165, "learning_rate": 2.5413822126624897e-06, "loss": 0.008, "step": 263900 }, { "epoch": 24.642957154858582, "grad_norm": 1.1990745067596436, "learning_rate": 2.54044702141588e-06, "loss": 0.0073, "step": 264000 }, { "epoch": 24.652291608326333, "grad_norm": 1.995010256767273, "learning_rate": 2.53951183016927e-06, "loss": 0.008, "step": 264100 }, { "epoch": 24.66162606179408, "grad_norm": 0.013904445804655552, "learning_rate": 2.53857663892266e-06, "loss": 0.0061, "step": 264200 }, { "epoch": 24.670960515261832, "grad_norm": 2.5749552249908447, "learning_rate": 2.5376414476760498e-06, "loss": 0.0076, "step": 264300 }, { "epoch": 24.68029496872958, "grad_norm": 0.4673696756362915, "learning_rate": 2.53670625642944e-06, "loss": 0.006, "step": 264400 }, { "epoch": 24.68962942219733, "grad_norm": 0.11575033515691757, "learning_rate": 2.53577106518283e-06, "loss": 0.0076, "step": 264500 }, { "epoch": 24.69896387566508, "grad_norm": 0.842179536819458, "learning_rate": 2.53483587393622e-06, "loss": 0.0062, "step": 264600 }, { "epoch": 24.70829832913283, "grad_norm": 3.0513203144073486, "learning_rate": 2.53390068268961e-06, "loss": 0.0076, "step": 264700 }, { "epoch": 24.717632782600578, "grad_norm": 0.7935064435005188, "learning_rate": 2.5329654914430004e-06, "loss": 0.0075, "step": 264800 }, { "epoch": 24.72696723606833, "grad_norm": 0.040496502071619034, "learning_rate": 2.5320303001963904e-06, "loss": 0.0076, "step": 264900 }, { "epoch": 24.736301689536077, "grad_norm": 7.629501819610596, "learning_rate": 2.5310951089497804e-06, "loss": 0.0075, "step": 265000 }, { "epoch": 24.736301689536077, "eval_accuracy": 0.6996966161026837, "eval_f1": 0.8272085921870064, "eval_loss": 0.2686510384082794, "eval_roc_auc": 0.9100703319355739, "eval_runtime": 352.9744, "eval_samples_per_second": 121.397, "eval_steps_per_second": 121.397, "step": 265000 }, { "epoch": 24.74563614300383, "grad_norm": 3.796674966812134, "learning_rate": 2.5301599177031704e-06, "loss": 0.0063, "step": 265100 }, { "epoch": 24.754970596471576, "grad_norm": 0.3938326835632324, "learning_rate": 2.5292247264565605e-06, "loss": 0.0065, "step": 265200 }, { "epoch": 24.764305049939328, "grad_norm": 2.688673496246338, "learning_rate": 2.5282895352099505e-06, "loss": 0.0071, "step": 265300 }, { "epoch": 24.773639503407075, "grad_norm": 0.06714150309562683, "learning_rate": 2.527354343963341e-06, "loss": 0.0096, "step": 265400 }, { "epoch": 24.782973956874827, "grad_norm": 0.2085028439760208, "learning_rate": 2.526419152716731e-06, "loss": 0.0079, "step": 265500 }, { "epoch": 24.792308410342574, "grad_norm": 3.2006423473358154, "learning_rate": 2.525483961470121e-06, "loss": 0.0075, "step": 265600 }, { "epoch": 24.801642863810322, "grad_norm": 2.834618330001831, "learning_rate": 2.524548770223511e-06, "loss": 0.008, "step": 265700 }, { "epoch": 24.810977317278073, "grad_norm": 0.9187889099121094, "learning_rate": 2.523613578976901e-06, "loss": 0.0071, "step": 265800 }, { "epoch": 24.82031177074582, "grad_norm": 0.8097953796386719, "learning_rate": 2.522678387730291e-06, "loss": 0.0067, "step": 265900 }, { "epoch": 24.829646224213572, "grad_norm": 5.315380096435547, "learning_rate": 2.5217431964836816e-06, "loss": 0.0092, "step": 266000 }, { "epoch": 24.83898067768132, "grad_norm": 1.7054082155227661, "learning_rate": 2.5208080052370708e-06, "loss": 0.0083, "step": 266100 }, { "epoch": 24.84831513114907, "grad_norm": 0.1781255453824997, "learning_rate": 2.519872813990461e-06, "loss": 0.0077, "step": 266200 }, { "epoch": 24.85764958461682, "grad_norm": 0.25849446654319763, "learning_rate": 2.5189376227438512e-06, "loss": 0.0071, "step": 266300 }, { "epoch": 24.86698403808457, "grad_norm": 0.7287214994430542, "learning_rate": 2.5180024314972413e-06, "loss": 0.0087, "step": 266400 }, { "epoch": 24.87631849155232, "grad_norm": 5.263782501220703, "learning_rate": 2.5170672402506313e-06, "loss": 0.0078, "step": 266500 }, { "epoch": 24.88565294502007, "grad_norm": 0.16123399138450623, "learning_rate": 2.5161320490040213e-06, "loss": 0.0073, "step": 266600 }, { "epoch": 24.894987398487817, "grad_norm": 0.035890333354473114, "learning_rate": 2.5151968577574114e-06, "loss": 0.0087, "step": 266700 }, { "epoch": 24.90432185195557, "grad_norm": 0.19127123057842255, "learning_rate": 2.514261666510802e-06, "loss": 0.0069, "step": 266800 }, { "epoch": 24.913656305423316, "grad_norm": 2.8393301963806152, "learning_rate": 2.513326475264192e-06, "loss": 0.0059, "step": 266900 }, { "epoch": 24.922990758891068, "grad_norm": 0.023987378925085068, "learning_rate": 2.512391284017582e-06, "loss": 0.0081, "step": 267000 }, { "epoch": 24.932325212358815, "grad_norm": 0.8513519167900085, "learning_rate": 2.511456092770972e-06, "loss": 0.0085, "step": 267100 }, { "epoch": 24.941659665826567, "grad_norm": 1.8529176712036133, "learning_rate": 2.510520901524362e-06, "loss": 0.0086, "step": 267200 }, { "epoch": 24.950994119294315, "grad_norm": 7.076110363006592, "learning_rate": 2.509585710277752e-06, "loss": 0.0078, "step": 267300 }, { "epoch": 24.960328572762066, "grad_norm": 0.38246995210647583, "learning_rate": 2.508650519031142e-06, "loss": 0.0086, "step": 267400 }, { "epoch": 24.969663026229814, "grad_norm": 1.1288647651672363, "learning_rate": 2.5077153277845325e-06, "loss": 0.0069, "step": 267500 }, { "epoch": 24.978997479697565, "grad_norm": 0.132628932595253, "learning_rate": 2.5067801365379225e-06, "loss": 0.0062, "step": 267600 }, { "epoch": 24.988331933165313, "grad_norm": 0.29156526923179626, "learning_rate": 2.5058449452913125e-06, "loss": 0.0063, "step": 267700 }, { "epoch": 24.997666386633064, "grad_norm": 2.7283012866973877, "learning_rate": 2.5049097540447025e-06, "loss": 0.0089, "step": 267800 }, { "epoch": 25.00700084010081, "grad_norm": 5.143849849700928, "learning_rate": 2.503974562798092e-06, "loss": 0.0072, "step": 267900 }, { "epoch": 25.016335293568563, "grad_norm": 1.5608537197113037, "learning_rate": 2.503039371551482e-06, "loss": 0.0062, "step": 268000 }, { "epoch": 25.02566974703631, "grad_norm": 0.227829709649086, "learning_rate": 2.5021041803048722e-06, "loss": 0.006, "step": 268100 }, { "epoch": 25.035004200504062, "grad_norm": 3.4495224952697754, "learning_rate": 2.5011689890582622e-06, "loss": 0.0088, "step": 268200 }, { "epoch": 25.04433865397181, "grad_norm": 2.0994105339050293, "learning_rate": 2.5002337978116527e-06, "loss": 0.0057, "step": 268300 }, { "epoch": 25.05367310743956, "grad_norm": 2.2531559467315674, "learning_rate": 2.4992986065650427e-06, "loss": 0.0046, "step": 268400 }, { "epoch": 25.06300756090731, "grad_norm": 0.7813023924827576, "learning_rate": 2.4983634153184328e-06, "loss": 0.0078, "step": 268500 }, { "epoch": 25.072342014375057, "grad_norm": 4.587038516998291, "learning_rate": 2.497428224071823e-06, "loss": 0.0065, "step": 268600 }, { "epoch": 25.081676467842808, "grad_norm": 2.664161205291748, "learning_rate": 2.496493032825213e-06, "loss": 0.0066, "step": 268700 }, { "epoch": 25.091010921310556, "grad_norm": 3.1991429328918457, "learning_rate": 2.495557841578603e-06, "loss": 0.0059, "step": 268800 }, { "epoch": 25.100345374778307, "grad_norm": 0.363660991191864, "learning_rate": 2.4946226503319933e-06, "loss": 0.0074, "step": 268900 }, { "epoch": 25.109679828246055, "grad_norm": 2.3233232498168945, "learning_rate": 2.4936874590853833e-06, "loss": 0.0075, "step": 269000 }, { "epoch": 25.119014281713806, "grad_norm": 0.08799878507852554, "learning_rate": 2.4927522678387734e-06, "loss": 0.0068, "step": 269100 }, { "epoch": 25.128348735181554, "grad_norm": 0.7793639898300171, "learning_rate": 2.4918170765921634e-06, "loss": 0.0057, "step": 269200 }, { "epoch": 25.137683188649305, "grad_norm": 0.12948133051395416, "learning_rate": 2.4908818853455534e-06, "loss": 0.0053, "step": 269300 }, { "epoch": 25.147017642117053, "grad_norm": 0.26030173897743225, "learning_rate": 2.4899466940989435e-06, "loss": 0.0077, "step": 269400 }, { "epoch": 25.156352095584804, "grad_norm": 1.7125133275985718, "learning_rate": 2.4890115028523335e-06, "loss": 0.0082, "step": 269500 }, { "epoch": 25.165686549052552, "grad_norm": 5.269349575042725, "learning_rate": 2.4880763116057235e-06, "loss": 0.006, "step": 269600 }, { "epoch": 25.175021002520303, "grad_norm": 3.800816535949707, "learning_rate": 2.4871411203591135e-06, "loss": 0.0063, "step": 269700 }, { "epoch": 25.18435545598805, "grad_norm": 0.429829865694046, "learning_rate": 2.4862059291125036e-06, "loss": 0.0057, "step": 269800 }, { "epoch": 25.193689909455802, "grad_norm": 0.764010488986969, "learning_rate": 2.485270737865894e-06, "loss": 0.0063, "step": 269900 }, { "epoch": 25.20302436292355, "grad_norm": 0.6159539818763733, "learning_rate": 2.484335546619284e-06, "loss": 0.0057, "step": 270000 }, { "epoch": 25.20302436292355, "eval_accuracy": 0.6989498249708285, "eval_f1": 0.8267932360127153, "eval_loss": 0.27152371406555176, "eval_roc_auc": 0.9105337115517926, "eval_runtime": 146.5963, "eval_samples_per_second": 292.299, "eval_steps_per_second": 292.299, "step": 270000 }, { "epoch": 25.2123588163913, "grad_norm": 1.8821773529052734, "learning_rate": 2.483400355372674e-06, "loss": 0.0062, "step": 270100 }, { "epoch": 25.22169326985905, "grad_norm": 2.456024169921875, "learning_rate": 2.4824651641260637e-06, "loss": 0.0061, "step": 270200 }, { "epoch": 25.2310277233268, "grad_norm": 3.5525529384613037, "learning_rate": 2.481529972879454e-06, "loss": 0.0065, "step": 270300 }, { "epoch": 25.240362176794548, "grad_norm": 0.49045971035957336, "learning_rate": 2.480594781632844e-06, "loss": 0.0067, "step": 270400 }, { "epoch": 25.2496966302623, "grad_norm": 0.8647233247756958, "learning_rate": 2.4796595903862342e-06, "loss": 0.0065, "step": 270500 }, { "epoch": 25.259031083730047, "grad_norm": 0.003812377108260989, "learning_rate": 2.4787243991396242e-06, "loss": 0.0074, "step": 270600 }, { "epoch": 25.2683655371978, "grad_norm": 3.1096408367156982, "learning_rate": 2.4777892078930143e-06, "loss": 0.0069, "step": 270700 }, { "epoch": 25.277699990665546, "grad_norm": 3.1160261631011963, "learning_rate": 2.4768540166464043e-06, "loss": 0.0078, "step": 270800 }, { "epoch": 25.287034444133297, "grad_norm": 4.788257598876953, "learning_rate": 2.4759188253997948e-06, "loss": 0.0073, "step": 270900 }, { "epoch": 25.296368897601045, "grad_norm": 1.7373126745224, "learning_rate": 2.474983634153185e-06, "loss": 0.0069, "step": 271000 }, { "epoch": 25.305703351068797, "grad_norm": 0.6778011918067932, "learning_rate": 2.4740484429065744e-06, "loss": 0.0061, "step": 271100 }, { "epoch": 25.315037804536544, "grad_norm": 0.5934848785400391, "learning_rate": 2.4731132516599644e-06, "loss": 0.0082, "step": 271200 }, { "epoch": 25.324372258004296, "grad_norm": 1.1355698108673096, "learning_rate": 2.4721780604133545e-06, "loss": 0.0058, "step": 271300 }, { "epoch": 25.333706711472043, "grad_norm": 1.0206133127212524, "learning_rate": 2.471242869166745e-06, "loss": 0.0066, "step": 271400 }, { "epoch": 25.34304116493979, "grad_norm": 0.07312865555286407, "learning_rate": 2.470307677920135e-06, "loss": 0.0074, "step": 271500 }, { "epoch": 25.352375618407542, "grad_norm": 0.9319019317626953, "learning_rate": 2.469372486673525e-06, "loss": 0.0063, "step": 271600 }, { "epoch": 25.36171007187529, "grad_norm": 0.6304168701171875, "learning_rate": 2.468437295426915e-06, "loss": 0.0061, "step": 271700 }, { "epoch": 25.37104452534304, "grad_norm": 0.4795822203159332, "learning_rate": 2.467502104180305e-06, "loss": 0.0068, "step": 271800 }, { "epoch": 25.38037897881079, "grad_norm": 0.13483908772468567, "learning_rate": 2.466566912933695e-06, "loss": 0.0059, "step": 271900 }, { "epoch": 25.38971343227854, "grad_norm": 2.0830907821655273, "learning_rate": 2.465631721687085e-06, "loss": 0.0065, "step": 272000 }, { "epoch": 25.39904788574629, "grad_norm": 3.75514554977417, "learning_rate": 2.464696530440475e-06, "loss": 0.007, "step": 272100 }, { "epoch": 25.40838233921404, "grad_norm": 0.2621631622314453, "learning_rate": 2.463761339193865e-06, "loss": 0.0062, "step": 272200 }, { "epoch": 25.417716792681787, "grad_norm": 3.487557888031006, "learning_rate": 2.462826147947255e-06, "loss": 0.0053, "step": 272300 }, { "epoch": 25.42705124614954, "grad_norm": 1.8302744626998901, "learning_rate": 2.4618909567006456e-06, "loss": 0.005, "step": 272400 }, { "epoch": 25.436385699617286, "grad_norm": 1.0405570268630981, "learning_rate": 2.4609557654540357e-06, "loss": 0.0089, "step": 272500 }, { "epoch": 25.445720153085038, "grad_norm": 5.910071849822998, "learning_rate": 2.4600205742074257e-06, "loss": 0.0067, "step": 272600 }, { "epoch": 25.455054606552785, "grad_norm": 1.0083223581314087, "learning_rate": 2.4590853829608157e-06, "loss": 0.0081, "step": 272700 }, { "epoch": 25.464389060020537, "grad_norm": 0.7014073729515076, "learning_rate": 2.4581501917142058e-06, "loss": 0.0055, "step": 272800 }, { "epoch": 25.473723513488284, "grad_norm": 5.133476734161377, "learning_rate": 2.457215000467596e-06, "loss": 0.0062, "step": 272900 }, { "epoch": 25.483057966956036, "grad_norm": 4.992822647094727, "learning_rate": 2.456279809220986e-06, "loss": 0.0075, "step": 273000 }, { "epoch": 25.492392420423784, "grad_norm": 2.4334030151367188, "learning_rate": 2.455344617974376e-06, "loss": 0.0067, "step": 273100 }, { "epoch": 25.501726873891535, "grad_norm": 4.901073455810547, "learning_rate": 2.454409426727766e-06, "loss": 0.007, "step": 273200 }, { "epoch": 25.511061327359283, "grad_norm": 0.1515137404203415, "learning_rate": 2.453474235481156e-06, "loss": 0.0081, "step": 273300 }, { "epoch": 25.520395780827034, "grad_norm": 3.331700086593628, "learning_rate": 2.4525390442345464e-06, "loss": 0.0086, "step": 273400 }, { "epoch": 25.52973023429478, "grad_norm": 0.15868403017520905, "learning_rate": 2.4516038529879364e-06, "loss": 0.0064, "step": 273500 }, { "epoch": 25.539064687762533, "grad_norm": 3.9027109146118164, "learning_rate": 2.4506686617413264e-06, "loss": 0.0066, "step": 273600 }, { "epoch": 25.54839914123028, "grad_norm": 0.11045870929956436, "learning_rate": 2.4497334704947165e-06, "loss": 0.006, "step": 273700 }, { "epoch": 25.557733594698032, "grad_norm": 0.6206362247467041, "learning_rate": 2.4487982792481065e-06, "loss": 0.0056, "step": 273800 }, { "epoch": 25.56706804816578, "grad_norm": 1.9290508031845093, "learning_rate": 2.4478630880014965e-06, "loss": 0.0068, "step": 273900 }, { "epoch": 25.57640250163353, "grad_norm": 0.07401689887046814, "learning_rate": 2.4469278967548866e-06, "loss": 0.0061, "step": 274000 }, { "epoch": 25.58573695510128, "grad_norm": 1.466079831123352, "learning_rate": 2.4459927055082766e-06, "loss": 0.0064, "step": 274100 }, { "epoch": 25.595071408569027, "grad_norm": 0.45443716645240784, "learning_rate": 2.4450575142616666e-06, "loss": 0.0081, "step": 274200 }, { "epoch": 25.604405862036778, "grad_norm": 3.70302677154541, "learning_rate": 2.4441223230150567e-06, "loss": 0.007, "step": 274300 }, { "epoch": 25.613740315504526, "grad_norm": 1.2767648696899414, "learning_rate": 2.443187131768447e-06, "loss": 0.0052, "step": 274400 }, { "epoch": 25.623074768972277, "grad_norm": 2.5856173038482666, "learning_rate": 2.442251940521837e-06, "loss": 0.0071, "step": 274500 }, { "epoch": 25.632409222440025, "grad_norm": 0.1945524662733078, "learning_rate": 2.441316749275227e-06, "loss": 0.0061, "step": 274600 }, { "epoch": 25.641743675907776, "grad_norm": 15.485455513000488, "learning_rate": 2.4403815580286168e-06, "loss": 0.0053, "step": 274700 }, { "epoch": 25.651078129375524, "grad_norm": 0.7061570286750793, "learning_rate": 2.4394463667820072e-06, "loss": 0.0072, "step": 274800 }, { "epoch": 25.660412582843275, "grad_norm": 3.838859796524048, "learning_rate": 2.4385111755353973e-06, "loss": 0.0066, "step": 274900 }, { "epoch": 25.669747036311023, "grad_norm": 0.9931791424751282, "learning_rate": 2.4375759842887873e-06, "loss": 0.0061, "step": 275000 }, { "epoch": 25.669747036311023, "eval_accuracy": 0.6989498249708285, "eval_f1": 0.8252653423730006, "eval_loss": 0.27360498905181885, "eval_roc_auc": 0.9082159462581593, "eval_runtime": 146.6851, "eval_samples_per_second": 292.122, "eval_steps_per_second": 292.122, "step": 275000 }, { "epoch": 25.679081489778774, "grad_norm": 0.2843722403049469, "learning_rate": 2.4366407930421773e-06, "loss": 0.0079, "step": 275100 }, { "epoch": 25.688415943246522, "grad_norm": 0.11964209377765656, "learning_rate": 2.4357056017955674e-06, "loss": 0.0068, "step": 275200 }, { "epoch": 25.697750396714273, "grad_norm": 6.650740146636963, "learning_rate": 2.4347704105489574e-06, "loss": 0.0098, "step": 275300 }, { "epoch": 25.70708485018202, "grad_norm": 0.32141828536987305, "learning_rate": 2.4338352193023474e-06, "loss": 0.0067, "step": 275400 }, { "epoch": 25.716419303649772, "grad_norm": 6.042462348937988, "learning_rate": 2.432900028055738e-06, "loss": 0.0085, "step": 275500 }, { "epoch": 25.72575375711752, "grad_norm": 4.452946186065674, "learning_rate": 2.4319648368091275e-06, "loss": 0.0063, "step": 275600 }, { "epoch": 25.73508821058527, "grad_norm": 0.25587648153305054, "learning_rate": 2.4310296455625175e-06, "loss": 0.0078, "step": 275700 }, { "epoch": 25.74442266405302, "grad_norm": 1.4646114110946655, "learning_rate": 2.4300944543159075e-06, "loss": 0.0063, "step": 275800 }, { "epoch": 25.75375711752077, "grad_norm": 1.7607089281082153, "learning_rate": 2.429159263069298e-06, "loss": 0.0084, "step": 275900 }, { "epoch": 25.763091570988518, "grad_norm": 2.972454071044922, "learning_rate": 2.428224071822688e-06, "loss": 0.007, "step": 276000 }, { "epoch": 25.77242602445627, "grad_norm": 2.518359661102295, "learning_rate": 2.427288880576078e-06, "loss": 0.0086, "step": 276100 }, { "epoch": 25.781760477924017, "grad_norm": 3.9220755100250244, "learning_rate": 2.426353689329468e-06, "loss": 0.0063, "step": 276200 }, { "epoch": 25.79109493139177, "grad_norm": 4.516615390777588, "learning_rate": 2.425418498082858e-06, "loss": 0.0083, "step": 276300 }, { "epoch": 25.800429384859516, "grad_norm": 0.19463223218917847, "learning_rate": 2.424483306836248e-06, "loss": 0.0078, "step": 276400 }, { "epoch": 25.809763838327267, "grad_norm": 1.2960563898086548, "learning_rate": 2.423548115589638e-06, "loss": 0.0074, "step": 276500 }, { "epoch": 25.819098291795015, "grad_norm": 0.05422159656882286, "learning_rate": 2.422612924343028e-06, "loss": 0.0059, "step": 276600 }, { "epoch": 25.828432745262766, "grad_norm": 0.05528430640697479, "learning_rate": 2.4216777330964182e-06, "loss": 0.0065, "step": 276700 }, { "epoch": 25.837767198730514, "grad_norm": 0.7251713871955872, "learning_rate": 2.4207425418498083e-06, "loss": 0.0059, "step": 276800 }, { "epoch": 25.847101652198265, "grad_norm": 1.1032336950302124, "learning_rate": 2.4198073506031987e-06, "loss": 0.0064, "step": 276900 }, { "epoch": 25.856436105666013, "grad_norm": 3.229106903076172, "learning_rate": 2.4188721593565888e-06, "loss": 0.007, "step": 277000 }, { "epoch": 25.86577055913376, "grad_norm": 0.14287951588630676, "learning_rate": 2.4179369681099788e-06, "loss": 0.0061, "step": 277100 }, { "epoch": 25.875105012601512, "grad_norm": 0.5197017788887024, "learning_rate": 2.417001776863369e-06, "loss": 0.0067, "step": 277200 }, { "epoch": 25.88443946606926, "grad_norm": 0.8294327259063721, "learning_rate": 2.416066585616759e-06, "loss": 0.0072, "step": 277300 }, { "epoch": 25.89377391953701, "grad_norm": 0.6909797787666321, "learning_rate": 2.415131394370149e-06, "loss": 0.0071, "step": 277400 }, { "epoch": 25.90310837300476, "grad_norm": 0.054012298583984375, "learning_rate": 2.414196203123539e-06, "loss": 0.0065, "step": 277500 }, { "epoch": 25.91244282647251, "grad_norm": 1.2329227924346924, "learning_rate": 2.413261011876929e-06, "loss": 0.0072, "step": 277600 }, { "epoch": 25.921777279940258, "grad_norm": 0.49659132957458496, "learning_rate": 2.412325820630319e-06, "loss": 0.0074, "step": 277700 }, { "epoch": 25.93111173340801, "grad_norm": 1.0029362440109253, "learning_rate": 2.411390629383709e-06, "loss": 0.0063, "step": 277800 }, { "epoch": 25.940446186875757, "grad_norm": 0.03920453414320946, "learning_rate": 2.4104554381370995e-06, "loss": 0.0084, "step": 277900 }, { "epoch": 25.94978064034351, "grad_norm": 4.872620582580566, "learning_rate": 2.4095202468904895e-06, "loss": 0.007, "step": 278000 }, { "epoch": 25.959115093811256, "grad_norm": 0.020474189892411232, "learning_rate": 2.4085850556438795e-06, "loss": 0.0072, "step": 278100 }, { "epoch": 25.968449547279008, "grad_norm": 0.012553735636174679, "learning_rate": 2.4076498643972695e-06, "loss": 0.0067, "step": 278200 }, { "epoch": 25.977784000746755, "grad_norm": 7.775627136230469, "learning_rate": 2.4067146731506596e-06, "loss": 0.0062, "step": 278300 }, { "epoch": 25.987118454214507, "grad_norm": 0.2918202877044678, "learning_rate": 2.4057794819040496e-06, "loss": 0.0072, "step": 278400 }, { "epoch": 25.996452907682254, "grad_norm": 4.650539875030518, "learning_rate": 2.4048442906574396e-06, "loss": 0.0041, "step": 278500 }, { "epoch": 26.005787361150006, "grad_norm": 0.32224294543266296, "learning_rate": 2.4039090994108297e-06, "loss": 0.0068, "step": 278600 }, { "epoch": 26.015121814617753, "grad_norm": 2.3547797203063965, "learning_rate": 2.4029739081642197e-06, "loss": 0.0053, "step": 278700 }, { "epoch": 26.024456268085505, "grad_norm": 0.8180122375488281, "learning_rate": 2.4020387169176097e-06, "loss": 0.0055, "step": 278800 }, { "epoch": 26.033790721553252, "grad_norm": 1.2576074600219727, "learning_rate": 2.4011035256709998e-06, "loss": 0.006, "step": 278900 }, { "epoch": 26.043125175021004, "grad_norm": 2.7765610218048096, "learning_rate": 2.40016833442439e-06, "loss": 0.0051, "step": 279000 }, { "epoch": 26.05245962848875, "grad_norm": 0.163173645734787, "learning_rate": 2.3992331431777802e-06, "loss": 0.0055, "step": 279100 }, { "epoch": 26.061794081956503, "grad_norm": 1.05585515499115, "learning_rate": 2.39829795193117e-06, "loss": 0.0066, "step": 279200 }, { "epoch": 26.07112853542425, "grad_norm": 0.5329868793487549, "learning_rate": 2.39736276068456e-06, "loss": 0.0061, "step": 279300 }, { "epoch": 26.080462988892002, "grad_norm": 0.017992151901125908, "learning_rate": 2.3964275694379503e-06, "loss": 0.007, "step": 279400 }, { "epoch": 26.08979744235975, "grad_norm": 0.014525151811540127, "learning_rate": 2.3954923781913404e-06, "loss": 0.0078, "step": 279500 }, { "epoch": 26.0991318958275, "grad_norm": 2.787858486175537, "learning_rate": 2.3945571869447304e-06, "loss": 0.0045, "step": 279600 }, { "epoch": 26.10846634929525, "grad_norm": 5.997899532318115, "learning_rate": 2.3936219956981204e-06, "loss": 0.0062, "step": 279700 }, { "epoch": 26.117800802763, "grad_norm": 0.08745671808719635, "learning_rate": 2.3926868044515105e-06, "loss": 0.0051, "step": 279800 }, { "epoch": 26.127135256230748, "grad_norm": 4.131542205810547, "learning_rate": 2.3917516132049005e-06, "loss": 0.0034, "step": 279900 }, { "epoch": 26.136469709698495, "grad_norm": 3.0956382751464844, "learning_rate": 2.390816421958291e-06, "loss": 0.0056, "step": 280000 }, { "epoch": 26.136469709698495, "eval_accuracy": 0.6942590431738623, "eval_f1": 0.8252150486404075, "eval_loss": 0.28327426314353943, "eval_roc_auc": 0.9134335281087825, "eval_runtime": 146.1897, "eval_samples_per_second": 293.112, "eval_steps_per_second": 293.112, "step": 280000 }, { "epoch": 26.145804163166247, "grad_norm": 0.2323392629623413, "learning_rate": 2.3898812307116805e-06, "loss": 0.0057, "step": 280100 }, { "epoch": 26.155138616633995, "grad_norm": 0.3731217682361603, "learning_rate": 2.3889460394650706e-06, "loss": 0.0078, "step": 280200 }, { "epoch": 26.164473070101746, "grad_norm": 0.45992034673690796, "learning_rate": 2.3880108482184606e-06, "loss": 0.0061, "step": 280300 }, { "epoch": 26.173807523569494, "grad_norm": 0.8073437213897705, "learning_rate": 2.387075656971851e-06, "loss": 0.0062, "step": 280400 }, { "epoch": 26.183141977037245, "grad_norm": 3.031398296356201, "learning_rate": 2.386140465725241e-06, "loss": 0.0058, "step": 280500 }, { "epoch": 26.192476430504993, "grad_norm": 5.693144798278809, "learning_rate": 2.385205274478631e-06, "loss": 0.0066, "step": 280600 }, { "epoch": 26.201810883972744, "grad_norm": 0.3587820827960968, "learning_rate": 2.384270083232021e-06, "loss": 0.0064, "step": 280700 }, { "epoch": 26.21114533744049, "grad_norm": 3.2670795917510986, "learning_rate": 2.383334891985411e-06, "loss": 0.0059, "step": 280800 }, { "epoch": 26.220479790908243, "grad_norm": 0.4632854163646698, "learning_rate": 2.3823997007388012e-06, "loss": 0.0055, "step": 280900 }, { "epoch": 26.22981424437599, "grad_norm": 1.847341775894165, "learning_rate": 2.3814645094921912e-06, "loss": 0.0063, "step": 281000 }, { "epoch": 26.239148697843742, "grad_norm": 1.325996994972229, "learning_rate": 2.3805293182455813e-06, "loss": 0.0061, "step": 281100 }, { "epoch": 26.24848315131149, "grad_norm": 0.41811203956604004, "learning_rate": 2.3795941269989713e-06, "loss": 0.0069, "step": 281200 }, { "epoch": 26.25781760477924, "grad_norm": 5.625559329986572, "learning_rate": 2.3786589357523613e-06, "loss": 0.0065, "step": 281300 }, { "epoch": 26.26715205824699, "grad_norm": 0.9784310460090637, "learning_rate": 2.377723744505752e-06, "loss": 0.0063, "step": 281400 }, { "epoch": 26.27648651171474, "grad_norm": 0.9806200861930847, "learning_rate": 2.376788553259142e-06, "loss": 0.006, "step": 281500 }, { "epoch": 26.285820965182488, "grad_norm": 4.968928337097168, "learning_rate": 2.375853362012532e-06, "loss": 0.004, "step": 281600 }, { "epoch": 26.29515541865024, "grad_norm": 4.519594192504883, "learning_rate": 2.374918170765922e-06, "loss": 0.0065, "step": 281700 }, { "epoch": 26.304489872117987, "grad_norm": 0.01706347055733204, "learning_rate": 2.373982979519312e-06, "loss": 0.0066, "step": 281800 }, { "epoch": 26.31382432558574, "grad_norm": 3.960470199584961, "learning_rate": 2.373047788272702e-06, "loss": 0.0048, "step": 281900 }, { "epoch": 26.323158779053486, "grad_norm": 0.04310346022248268, "learning_rate": 2.372112597026092e-06, "loss": 0.0066, "step": 282000 }, { "epoch": 26.332493232521237, "grad_norm": 0.4616706967353821, "learning_rate": 2.371177405779482e-06, "loss": 0.0056, "step": 282100 }, { "epoch": 26.341827685988985, "grad_norm": 0.06578268855810165, "learning_rate": 2.370242214532872e-06, "loss": 0.0069, "step": 282200 }, { "epoch": 26.351162139456736, "grad_norm": 0.4207251965999603, "learning_rate": 2.369307023286262e-06, "loss": 0.0068, "step": 282300 }, { "epoch": 26.360496592924484, "grad_norm": 4.1620378494262695, "learning_rate": 2.3683718320396525e-06, "loss": 0.0053, "step": 282400 }, { "epoch": 26.369831046392235, "grad_norm": 0.0623793825507164, "learning_rate": 2.3674366407930426e-06, "loss": 0.0062, "step": 282500 }, { "epoch": 26.379165499859983, "grad_norm": 0.0031388578936457634, "learning_rate": 2.3665014495464326e-06, "loss": 0.0052, "step": 282600 }, { "epoch": 26.38849995332773, "grad_norm": 0.6868655681610107, "learning_rate": 2.3655662582998226e-06, "loss": 0.0052, "step": 282700 }, { "epoch": 26.397834406795482, "grad_norm": 0.32324713468551636, "learning_rate": 2.3646310670532126e-06, "loss": 0.0065, "step": 282800 }, { "epoch": 26.40716886026323, "grad_norm": 0.06762594729661942, "learning_rate": 2.3636958758066027e-06, "loss": 0.0058, "step": 282900 }, { "epoch": 26.41650331373098, "grad_norm": 6.754627704620361, "learning_rate": 2.3627606845599927e-06, "loss": 0.0067, "step": 283000 }, { "epoch": 26.42583776719873, "grad_norm": 0.1595713496208191, "learning_rate": 2.3618254933133827e-06, "loss": 0.0085, "step": 283100 }, { "epoch": 26.43517222066648, "grad_norm": 2.0992071628570557, "learning_rate": 2.3608903020667728e-06, "loss": 0.0066, "step": 283200 }, { "epoch": 26.444506674134228, "grad_norm": 0.5353805422782898, "learning_rate": 2.359955110820163e-06, "loss": 0.0046, "step": 283300 }, { "epoch": 26.45384112760198, "grad_norm": 1.5029250383377075, "learning_rate": 2.359019919573553e-06, "loss": 0.0049, "step": 283400 }, { "epoch": 26.463175581069727, "grad_norm": 0.32753893733024597, "learning_rate": 2.3580847283269433e-06, "loss": 0.0069, "step": 283500 }, { "epoch": 26.47251003453748, "grad_norm": 1.4033992290496826, "learning_rate": 2.3571495370803333e-06, "loss": 0.0051, "step": 283600 }, { "epoch": 26.481844488005226, "grad_norm": 0.04956977814435959, "learning_rate": 2.3562143458337233e-06, "loss": 0.0046, "step": 283700 }, { "epoch": 26.491178941472977, "grad_norm": 0.034047313034534454, "learning_rate": 2.355279154587113e-06, "loss": 0.0054, "step": 283800 }, { "epoch": 26.500513394940725, "grad_norm": 3.9738516807556152, "learning_rate": 2.3543439633405034e-06, "loss": 0.0056, "step": 283900 }, { "epoch": 26.509847848408477, "grad_norm": 4.227583408355713, "learning_rate": 2.3534087720938934e-06, "loss": 0.0064, "step": 284000 }, { "epoch": 26.519182301876224, "grad_norm": 0.21689096093177795, "learning_rate": 2.3524735808472835e-06, "loss": 0.0052, "step": 284100 }, { "epoch": 26.528516755343976, "grad_norm": 0.3542209267616272, "learning_rate": 2.3515383896006735e-06, "loss": 0.0047, "step": 284200 }, { "epoch": 26.537851208811723, "grad_norm": 1.8655396699905396, "learning_rate": 2.3506031983540635e-06, "loss": 0.0084, "step": 284300 }, { "epoch": 26.547185662279475, "grad_norm": 2.5650765895843506, "learning_rate": 2.3496680071074536e-06, "loss": 0.0053, "step": 284400 }, { "epoch": 26.556520115747222, "grad_norm": 0.0031017689034342766, "learning_rate": 2.348732815860844e-06, "loss": 0.0054, "step": 284500 }, { "epoch": 26.565854569214974, "grad_norm": 0.3058086931705475, "learning_rate": 2.347797624614234e-06, "loss": 0.0062, "step": 284600 }, { "epoch": 26.57518902268272, "grad_norm": 2.81972074508667, "learning_rate": 2.3468624333676237e-06, "loss": 0.0069, "step": 284700 }, { "epoch": 26.584523476150473, "grad_norm": 0.5581037998199463, "learning_rate": 2.3459272421210137e-06, "loss": 0.0052, "step": 284800 }, { "epoch": 26.59385792961822, "grad_norm": 4.304443359375, "learning_rate": 2.344992050874404e-06, "loss": 0.0057, "step": 284900 }, { "epoch": 26.603192383085972, "grad_norm": 0.310161292552948, "learning_rate": 2.344056859627794e-06, "loss": 0.0076, "step": 285000 }, { "epoch": 26.603192383085972, "eval_accuracy": 0.6979229871645274, "eval_f1": 0.8273799663461119, "eval_loss": 0.28704240918159485, "eval_roc_auc": 0.9124147760141041, "eval_runtime": 146.537, "eval_samples_per_second": 292.418, "eval_steps_per_second": 292.418, "step": 285000 }, { "epoch": 26.61252683655372, "grad_norm": 4.312740802764893, "learning_rate": 2.343121668381184e-06, "loss": 0.0054, "step": 285100 }, { "epoch": 26.62186129002147, "grad_norm": 2.649298667907715, "learning_rate": 2.3421864771345742e-06, "loss": 0.0079, "step": 285200 }, { "epoch": 26.63119574348922, "grad_norm": 0.006459313910454512, "learning_rate": 2.3412512858879643e-06, "loss": 0.005, "step": 285300 }, { "epoch": 26.64053019695697, "grad_norm": 0.37164655327796936, "learning_rate": 2.3403160946413543e-06, "loss": 0.0053, "step": 285400 }, { "epoch": 26.649864650424718, "grad_norm": 3.719619035720825, "learning_rate": 2.3393809033947447e-06, "loss": 0.0062, "step": 285500 }, { "epoch": 26.659199103892465, "grad_norm": 2.517784357070923, "learning_rate": 2.3384457121481344e-06, "loss": 0.0075, "step": 285600 }, { "epoch": 26.668533557360217, "grad_norm": 0.32631802558898926, "learning_rate": 2.3375105209015244e-06, "loss": 0.0069, "step": 285700 }, { "epoch": 26.677868010827964, "grad_norm": 0.01779346540570259, "learning_rate": 2.3365753296549144e-06, "loss": 0.0078, "step": 285800 }, { "epoch": 26.687202464295716, "grad_norm": 4.200437545776367, "learning_rate": 2.335640138408305e-06, "loss": 0.005, "step": 285900 }, { "epoch": 26.696536917763463, "grad_norm": 3.3637006282806396, "learning_rate": 2.334704947161695e-06, "loss": 0.0055, "step": 286000 }, { "epoch": 26.705871371231215, "grad_norm": 0.32597875595092773, "learning_rate": 2.333769755915085e-06, "loss": 0.0079, "step": 286100 }, { "epoch": 26.715205824698963, "grad_norm": 8.204741477966309, "learning_rate": 2.332834564668475e-06, "loss": 0.0058, "step": 286200 }, { "epoch": 26.724540278166714, "grad_norm": 1.8660684823989868, "learning_rate": 2.331899373421865e-06, "loss": 0.0076, "step": 286300 }, { "epoch": 26.73387473163446, "grad_norm": 3.647599935531616, "learning_rate": 2.330964182175255e-06, "loss": 0.0052, "step": 286400 }, { "epoch": 26.743209185102213, "grad_norm": 0.19581376016139984, "learning_rate": 2.330028990928645e-06, "loss": 0.0058, "step": 286500 }, { "epoch": 26.75254363856996, "grad_norm": 0.264304518699646, "learning_rate": 2.329093799682035e-06, "loss": 0.0045, "step": 286600 }, { "epoch": 26.761878092037712, "grad_norm": 1.1080851554870605, "learning_rate": 2.328158608435425e-06, "loss": 0.0065, "step": 286700 }, { "epoch": 26.77121254550546, "grad_norm": 0.5604456067085266, "learning_rate": 2.327223417188815e-06, "loss": 0.0062, "step": 286800 }, { "epoch": 26.78054699897321, "grad_norm": 0.6696583032608032, "learning_rate": 2.326288225942205e-06, "loss": 0.0071, "step": 286900 }, { "epoch": 26.78988145244096, "grad_norm": 4.343842029571533, "learning_rate": 2.3253530346955956e-06, "loss": 0.0063, "step": 287000 }, { "epoch": 26.79921590590871, "grad_norm": 0.4647919237613678, "learning_rate": 2.3244178434489857e-06, "loss": 0.0055, "step": 287100 }, { "epoch": 26.808550359376458, "grad_norm": 3.6680452823638916, "learning_rate": 2.3234826522023757e-06, "loss": 0.007, "step": 287200 }, { "epoch": 26.81788481284421, "grad_norm": 0.3027377724647522, "learning_rate": 2.3225474609557657e-06, "loss": 0.0064, "step": 287300 }, { "epoch": 26.827219266311957, "grad_norm": 1.0676220655441284, "learning_rate": 2.3216122697091557e-06, "loss": 0.0066, "step": 287400 }, { "epoch": 26.836553719779708, "grad_norm": 2.5164740085601807, "learning_rate": 2.3206770784625458e-06, "loss": 0.0065, "step": 287500 }, { "epoch": 26.845888173247456, "grad_norm": 1.4820890426635742, "learning_rate": 2.319741887215936e-06, "loss": 0.0048, "step": 287600 }, { "epoch": 26.855222626715207, "grad_norm": 2.4032232761383057, "learning_rate": 2.318806695969326e-06, "loss": 0.0064, "step": 287700 }, { "epoch": 26.864557080182955, "grad_norm": 0.18480730056762695, "learning_rate": 2.317871504722716e-06, "loss": 0.005, "step": 287800 }, { "epoch": 26.873891533650706, "grad_norm": 0.03487369790673256, "learning_rate": 2.316936313476106e-06, "loss": 0.0055, "step": 287900 }, { "epoch": 26.883225987118454, "grad_norm": 0.053034111857414246, "learning_rate": 2.3160011222294964e-06, "loss": 0.0074, "step": 288000 }, { "epoch": 26.892560440586205, "grad_norm": 3.5190088748931885, "learning_rate": 2.3150659309828864e-06, "loss": 0.0073, "step": 288100 }, { "epoch": 26.901894894053953, "grad_norm": 3.5592870712280273, "learning_rate": 2.3141307397362764e-06, "loss": 0.0062, "step": 288200 }, { "epoch": 26.911229347521704, "grad_norm": 0.017989296466112137, "learning_rate": 2.313195548489666e-06, "loss": 0.0064, "step": 288300 }, { "epoch": 26.920563800989452, "grad_norm": 0.04541776329278946, "learning_rate": 2.3122603572430565e-06, "loss": 0.0058, "step": 288400 }, { "epoch": 26.9298982544572, "grad_norm": 3.858668804168701, "learning_rate": 2.3113251659964465e-06, "loss": 0.0061, "step": 288500 }, { "epoch": 26.93923270792495, "grad_norm": 4.006608009338379, "learning_rate": 2.3103899747498365e-06, "loss": 0.0058, "step": 288600 }, { "epoch": 26.9485671613927, "grad_norm": 0.10452180355787277, "learning_rate": 2.3094547835032266e-06, "loss": 0.0059, "step": 288700 }, { "epoch": 26.95790161486045, "grad_norm": 3.3569459915161133, "learning_rate": 2.3085195922566166e-06, "loss": 0.0061, "step": 288800 }, { "epoch": 26.967236068328198, "grad_norm": 0.08666028082370758, "learning_rate": 2.3075844010100066e-06, "loss": 0.006, "step": 288900 }, { "epoch": 26.97657052179595, "grad_norm": 2.306678533554077, "learning_rate": 2.306649209763397e-06, "loss": 0.0055, "step": 289000 }, { "epoch": 26.985904975263697, "grad_norm": 0.07391053438186646, "learning_rate": 2.305714018516787e-06, "loss": 0.0068, "step": 289100 }, { "epoch": 26.99523942873145, "grad_norm": 3.061683416366577, "learning_rate": 2.3047788272701767e-06, "loss": 0.0076, "step": 289200 }, { "epoch": 27.004573882199196, "grad_norm": 0.07552212476730347, "learning_rate": 2.3038436360235668e-06, "loss": 0.0047, "step": 289300 }, { "epoch": 27.013908335666947, "grad_norm": 4.516263961791992, "learning_rate": 2.302908444776957e-06, "loss": 0.0047, "step": 289400 }, { "epoch": 27.023242789134695, "grad_norm": 0.4238384962081909, "learning_rate": 2.3019732535303472e-06, "loss": 0.0063, "step": 289500 }, { "epoch": 27.032577242602446, "grad_norm": 3.4181628227233887, "learning_rate": 2.3010380622837373e-06, "loss": 0.0045, "step": 289600 }, { "epoch": 27.041911696070194, "grad_norm": 0.1758597493171692, "learning_rate": 2.3001028710371273e-06, "loss": 0.0045, "step": 289700 }, { "epoch": 27.051246149537945, "grad_norm": 19.270906448364258, "learning_rate": 2.2991676797905173e-06, "loss": 0.0062, "step": 289800 }, { "epoch": 27.060580603005693, "grad_norm": 0.02831912599503994, "learning_rate": 2.2982324885439074e-06, "loss": 0.0054, "step": 289900 }, { "epoch": 27.069915056473445, "grad_norm": 1.7558162212371826, "learning_rate": 2.297297297297298e-06, "loss": 0.005, "step": 290000 }, { "epoch": 27.069915056473445, "eval_accuracy": 0.6992765460910152, "eval_f1": 0.8252640421417383, "eval_loss": 0.2868571877479553, "eval_roc_auc": 0.9099254299660956, "eval_runtime": 146.3627, "eval_samples_per_second": 292.766, "eval_steps_per_second": 292.766, "step": 290000 }, { "epoch": 27.079249509941192, "grad_norm": 1.2128626108169556, "learning_rate": 2.2963621060506874e-06, "loss": 0.0043, "step": 290100 }, { "epoch": 27.088583963408944, "grad_norm": 0.928057849407196, "learning_rate": 2.2954269148040775e-06, "loss": 0.0052, "step": 290200 }, { "epoch": 27.09791841687669, "grad_norm": 0.2013024091720581, "learning_rate": 2.2944917235574675e-06, "loss": 0.0064, "step": 290300 }, { "epoch": 27.107252870344443, "grad_norm": 3.8339221477508545, "learning_rate": 2.293556532310858e-06, "loss": 0.0066, "step": 290400 }, { "epoch": 27.11658732381219, "grad_norm": 1.2163115739822388, "learning_rate": 2.292621341064248e-06, "loss": 0.0058, "step": 290500 }, { "epoch": 27.12592177727994, "grad_norm": 0.1556462049484253, "learning_rate": 2.291686149817638e-06, "loss": 0.0072, "step": 290600 }, { "epoch": 27.13525623074769, "grad_norm": 1.7917091846466064, "learning_rate": 2.290750958571028e-06, "loss": 0.0069, "step": 290700 }, { "epoch": 27.14459068421544, "grad_norm": 1.6945325136184692, "learning_rate": 2.289815767324418e-06, "loss": 0.007, "step": 290800 }, { "epoch": 27.15392513768319, "grad_norm": 0.38881707191467285, "learning_rate": 2.288880576077808e-06, "loss": 0.0049, "step": 290900 }, { "epoch": 27.16325959115094, "grad_norm": 2.4787039756774902, "learning_rate": 2.287945384831198e-06, "loss": 0.0045, "step": 291000 }, { "epoch": 27.172594044618688, "grad_norm": 4.167172431945801, "learning_rate": 2.287010193584588e-06, "loss": 0.0075, "step": 291100 }, { "epoch": 27.181928498086435, "grad_norm": 0.5116770267486572, "learning_rate": 2.286075002337978e-06, "loss": 0.0043, "step": 291200 }, { "epoch": 27.191262951554187, "grad_norm": 2.327972173690796, "learning_rate": 2.2851398110913682e-06, "loss": 0.0055, "step": 291300 }, { "epoch": 27.200597405021934, "grad_norm": 3.246744155883789, "learning_rate": 2.2842046198447582e-06, "loss": 0.0064, "step": 291400 }, { "epoch": 27.209931858489686, "grad_norm": 5.92725944519043, "learning_rate": 2.2832694285981487e-06, "loss": 0.0061, "step": 291500 }, { "epoch": 27.219266311957433, "grad_norm": 0.2838914692401886, "learning_rate": 2.2823342373515387e-06, "loss": 0.0047, "step": 291600 }, { "epoch": 27.228600765425185, "grad_norm": 0.1022624671459198, "learning_rate": 2.2813990461049288e-06, "loss": 0.0075, "step": 291700 }, { "epoch": 27.237935218892932, "grad_norm": 1.810828447341919, "learning_rate": 2.280463854858319e-06, "loss": 0.0048, "step": 291800 }, { "epoch": 27.247269672360684, "grad_norm": 0.43569016456604004, "learning_rate": 2.279528663611709e-06, "loss": 0.004, "step": 291900 }, { "epoch": 27.25660412582843, "grad_norm": 2.5225372314453125, "learning_rate": 2.278593472365099e-06, "loss": 0.0059, "step": 292000 }, { "epoch": 27.265938579296183, "grad_norm": 6.041843891143799, "learning_rate": 2.277658281118489e-06, "loss": 0.0046, "step": 292100 }, { "epoch": 27.27527303276393, "grad_norm": 1.0361080169677734, "learning_rate": 2.276723089871879e-06, "loss": 0.0052, "step": 292200 }, { "epoch": 27.284607486231682, "grad_norm": 0.3810477554798126, "learning_rate": 2.275787898625269e-06, "loss": 0.0062, "step": 292300 }, { "epoch": 27.29394193969943, "grad_norm": 0.9801885485649109, "learning_rate": 2.274852707378659e-06, "loss": 0.0066, "step": 292400 }, { "epoch": 27.30327639316718, "grad_norm": 2.9478793144226074, "learning_rate": 2.2739175161320494e-06, "loss": 0.005, "step": 292500 }, { "epoch": 27.31261084663493, "grad_norm": 0.08112777024507523, "learning_rate": 2.2729823248854395e-06, "loss": 0.0055, "step": 292600 }, { "epoch": 27.32194530010268, "grad_norm": 4.406986236572266, "learning_rate": 2.2720471336388295e-06, "loss": 0.0063, "step": 292700 }, { "epoch": 27.331279753570428, "grad_norm": 2.737438678741455, "learning_rate": 2.271111942392219e-06, "loss": 0.0057, "step": 292800 }, { "epoch": 27.34061420703818, "grad_norm": 0.1518474966287613, "learning_rate": 2.2701767511456096e-06, "loss": 0.0052, "step": 292900 }, { "epoch": 27.349948660505927, "grad_norm": 1.8373647928237915, "learning_rate": 2.2692415598989996e-06, "loss": 0.0052, "step": 293000 }, { "epoch": 27.359283113973678, "grad_norm": 1.71800696849823, "learning_rate": 2.2683063686523896e-06, "loss": 0.0062, "step": 293100 }, { "epoch": 27.368617567441426, "grad_norm": 0.16103799641132355, "learning_rate": 2.2673711774057796e-06, "loss": 0.0061, "step": 293200 }, { "epoch": 27.377952020909177, "grad_norm": 0.2533770799636841, "learning_rate": 2.2664359861591697e-06, "loss": 0.0045, "step": 293300 }, { "epoch": 27.387286474376925, "grad_norm": 0.6856314539909363, "learning_rate": 2.2655007949125597e-06, "loss": 0.0056, "step": 293400 }, { "epoch": 27.396620927844676, "grad_norm": 0.28544023633003235, "learning_rate": 2.26456560366595e-06, "loss": 0.0061, "step": 293500 }, { "epoch": 27.405955381312424, "grad_norm": 1.4690643548965454, "learning_rate": 2.26363041241934e-06, "loss": 0.0056, "step": 293600 }, { "epoch": 27.415289834780175, "grad_norm": 0.023105274885892868, "learning_rate": 2.26269522117273e-06, "loss": 0.0054, "step": 293700 }, { "epoch": 27.424624288247923, "grad_norm": 0.09072453528642654, "learning_rate": 2.26176002992612e-06, "loss": 0.0071, "step": 293800 }, { "epoch": 27.433958741715674, "grad_norm": 1.535322666168213, "learning_rate": 2.2608248386795103e-06, "loss": 0.0035, "step": 293900 }, { "epoch": 27.443293195183422, "grad_norm": 1.1512267589569092, "learning_rate": 2.2598896474329003e-06, "loss": 0.0041, "step": 294000 }, { "epoch": 27.45262764865117, "grad_norm": 0.11793335527181625, "learning_rate": 2.2589544561862903e-06, "loss": 0.005, "step": 294100 }, { "epoch": 27.46196210211892, "grad_norm": 2.9981741905212402, "learning_rate": 2.2580192649396804e-06, "loss": 0.0057, "step": 294200 }, { "epoch": 27.47129655558667, "grad_norm": 2.4635350704193115, "learning_rate": 2.2570840736930704e-06, "loss": 0.0065, "step": 294300 }, { "epoch": 27.48063100905442, "grad_norm": 0.47296804189682007, "learning_rate": 2.2561488824464604e-06, "loss": 0.0059, "step": 294400 }, { "epoch": 27.489965462522168, "grad_norm": 0.3944322466850281, "learning_rate": 2.2552136911998505e-06, "loss": 0.0056, "step": 294500 }, { "epoch": 27.49929991598992, "grad_norm": 2.434701681137085, "learning_rate": 2.2542784999532405e-06, "loss": 0.0078, "step": 294600 }, { "epoch": 27.508634369457667, "grad_norm": 0.24055732786655426, "learning_rate": 2.2533433087066305e-06, "loss": 0.0059, "step": 294700 }, { "epoch": 27.51796882292542, "grad_norm": 0.5740146040916443, "learning_rate": 2.2524081174600206e-06, "loss": 0.0054, "step": 294800 }, { "epoch": 27.527303276393166, "grad_norm": 0.18230846524238586, "learning_rate": 2.2514729262134106e-06, "loss": 0.0049, "step": 294900 }, { "epoch": 27.536637729860917, "grad_norm": 0.04517395421862602, "learning_rate": 2.250537734966801e-06, "loss": 0.0046, "step": 295000 }, { "epoch": 27.536637729860917, "eval_accuracy": 0.700583430571762, "eval_f1": 0.8274995827807008, "eval_loss": 0.2903461754322052, "eval_roc_auc": 0.9098843893421775, "eval_runtime": 146.5763, "eval_samples_per_second": 292.339, "eval_steps_per_second": 292.339, "step": 295000 }, { "epoch": 27.545972183328665, "grad_norm": 4.480972766876221, "learning_rate": 2.249602543720191e-06, "loss": 0.0051, "step": 295100 }, { "epoch": 27.555306636796416, "grad_norm": 0.8009829521179199, "learning_rate": 2.248667352473581e-06, "loss": 0.0055, "step": 295200 }, { "epoch": 27.564641090264164, "grad_norm": 0.4380700886249542, "learning_rate": 2.247732161226971e-06, "loss": 0.0062, "step": 295300 }, { "epoch": 27.573975543731915, "grad_norm": 0.6460286974906921, "learning_rate": 2.246796969980361e-06, "loss": 0.0051, "step": 295400 }, { "epoch": 27.583309997199663, "grad_norm": 3.5143320560455322, "learning_rate": 2.245861778733751e-06, "loss": 0.0055, "step": 295500 }, { "epoch": 27.592644450667414, "grad_norm": 3.687116861343384, "learning_rate": 2.2449265874871412e-06, "loss": 0.0056, "step": 295600 }, { "epoch": 27.601978904135162, "grad_norm": 6.78965425491333, "learning_rate": 2.2439913962405313e-06, "loss": 0.0059, "step": 295700 }, { "epoch": 27.611313357602913, "grad_norm": 1.2053314447402954, "learning_rate": 2.2430562049939213e-06, "loss": 0.0052, "step": 295800 }, { "epoch": 27.62064781107066, "grad_norm": 2.066948175430298, "learning_rate": 2.2421210137473113e-06, "loss": 0.0051, "step": 295900 }, { "epoch": 27.629982264538413, "grad_norm": 8.154534339904785, "learning_rate": 2.2411858225007018e-06, "loss": 0.006, "step": 296000 }, { "epoch": 27.63931671800616, "grad_norm": 0.630691647529602, "learning_rate": 2.240250631254092e-06, "loss": 0.0052, "step": 296100 }, { "epoch": 27.64865117147391, "grad_norm": 0.11996511369943619, "learning_rate": 2.239315440007482e-06, "loss": 0.0056, "step": 296200 }, { "epoch": 27.65798562494166, "grad_norm": 0.142459437251091, "learning_rate": 2.238380248760872e-06, "loss": 0.005, "step": 296300 }, { "epoch": 27.66732007840941, "grad_norm": 0.17643210291862488, "learning_rate": 2.237445057514262e-06, "loss": 0.0044, "step": 296400 }, { "epoch": 27.67665453187716, "grad_norm": 0.5281691551208496, "learning_rate": 2.236509866267652e-06, "loss": 0.0071, "step": 296500 }, { "epoch": 27.68598898534491, "grad_norm": 0.022334914654493332, "learning_rate": 2.235574675021042e-06, "loss": 0.0049, "step": 296600 }, { "epoch": 27.695323438812657, "grad_norm": 0.06212342903017998, "learning_rate": 2.234639483774432e-06, "loss": 0.0064, "step": 296700 }, { "epoch": 27.70465789228041, "grad_norm": 2.070523738861084, "learning_rate": 2.233704292527822e-06, "loss": 0.0072, "step": 296800 }, { "epoch": 27.713992345748157, "grad_norm": 2.4690210819244385, "learning_rate": 2.232769101281212e-06, "loss": 0.0066, "step": 296900 }, { "epoch": 27.723326799215904, "grad_norm": 0.5960809588432312, "learning_rate": 2.2318339100346025e-06, "loss": 0.0045, "step": 297000 }, { "epoch": 27.732661252683656, "grad_norm": 1.9790903329849243, "learning_rate": 2.2308987187879925e-06, "loss": 0.0041, "step": 297100 }, { "epoch": 27.741995706151403, "grad_norm": 0.10604947805404663, "learning_rate": 2.2299635275413826e-06, "loss": 0.0055, "step": 297200 }, { "epoch": 27.751330159619155, "grad_norm": 3.332761287689209, "learning_rate": 2.229028336294772e-06, "loss": 0.0056, "step": 297300 }, { "epoch": 27.760664613086902, "grad_norm": 0.014367963187396526, "learning_rate": 2.2280931450481626e-06, "loss": 0.0061, "step": 297400 }, { "epoch": 27.769999066554654, "grad_norm": 1.364732027053833, "learning_rate": 2.2271579538015527e-06, "loss": 0.006, "step": 297500 }, { "epoch": 27.7793335200224, "grad_norm": 2.2568047046661377, "learning_rate": 2.2262227625549427e-06, "loss": 0.006, "step": 297600 }, { "epoch": 27.788667973490153, "grad_norm": 0.6260440945625305, "learning_rate": 2.2252875713083327e-06, "loss": 0.005, "step": 297700 }, { "epoch": 27.7980024269579, "grad_norm": 0.45374786853790283, "learning_rate": 2.2243523800617227e-06, "loss": 0.0049, "step": 297800 }, { "epoch": 27.80733688042565, "grad_norm": 0.22019241750240326, "learning_rate": 2.2234171888151128e-06, "loss": 0.0045, "step": 297900 }, { "epoch": 27.8166713338934, "grad_norm": 0.40951937437057495, "learning_rate": 2.2224819975685032e-06, "loss": 0.0056, "step": 298000 }, { "epoch": 27.82600578736115, "grad_norm": 0.49019506573677063, "learning_rate": 2.2215468063218933e-06, "loss": 0.0069, "step": 298100 }, { "epoch": 27.8353402408289, "grad_norm": 0.404096394777298, "learning_rate": 2.220611615075283e-06, "loss": 0.0037, "step": 298200 }, { "epoch": 27.84467469429665, "grad_norm": 4.6195197105407715, "learning_rate": 2.219676423828673e-06, "loss": 0.0073, "step": 298300 }, { "epoch": 27.854009147764398, "grad_norm": 2.9208643436431885, "learning_rate": 2.218741232582063e-06, "loss": 0.0054, "step": 298400 }, { "epoch": 27.86334360123215, "grad_norm": 1.1848360300064087, "learning_rate": 2.2178060413354534e-06, "loss": 0.0064, "step": 298500 }, { "epoch": 27.872678054699897, "grad_norm": 0.029532186686992645, "learning_rate": 2.2168708500888434e-06, "loss": 0.005, "step": 298600 }, { "epoch": 27.882012508167648, "grad_norm": 5.055510520935059, "learning_rate": 2.2159356588422334e-06, "loss": 0.0057, "step": 298700 }, { "epoch": 27.891346961635396, "grad_norm": 0.014592733234167099, "learning_rate": 2.2150004675956235e-06, "loss": 0.0056, "step": 298800 }, { "epoch": 27.900681415103147, "grad_norm": 0.13884221017360687, "learning_rate": 2.2140652763490135e-06, "loss": 0.0056, "step": 298900 }, { "epoch": 27.910015868570895, "grad_norm": 5.19483757019043, "learning_rate": 2.2131300851024035e-06, "loss": 0.0073, "step": 299000 }, { "epoch": 27.919350322038646, "grad_norm": 0.05970945581793785, "learning_rate": 2.2121948938557936e-06, "loss": 0.0054, "step": 299100 }, { "epoch": 27.928684775506394, "grad_norm": 3.0909814834594727, "learning_rate": 2.2112597026091836e-06, "loss": 0.0052, "step": 299200 }, { "epoch": 27.938019228974145, "grad_norm": 0.2951227128505707, "learning_rate": 2.2103245113625736e-06, "loss": 0.0074, "step": 299300 }, { "epoch": 27.947353682441893, "grad_norm": 1.866163730621338, "learning_rate": 2.2093893201159637e-06, "loss": 0.0058, "step": 299400 }, { "epoch": 27.956688135909644, "grad_norm": 1.9154143333435059, "learning_rate": 2.208454128869354e-06, "loss": 0.0065, "step": 299500 }, { "epoch": 27.966022589377392, "grad_norm": 1.9213536977767944, "learning_rate": 2.207518937622744e-06, "loss": 0.0062, "step": 299600 }, { "epoch": 27.97535704284514, "grad_norm": 0.021765727549791336, "learning_rate": 2.206583746376134e-06, "loss": 0.0065, "step": 299700 }, { "epoch": 27.98469149631289, "grad_norm": 2.304338216781616, "learning_rate": 2.205648555129524e-06, "loss": 0.0071, "step": 299800 }, { "epoch": 27.99402594978064, "grad_norm": 0.9917670488357544, "learning_rate": 2.2047133638829142e-06, "loss": 0.0065, "step": 299900 }, { "epoch": 28.00336040324839, "grad_norm": 0.30984431505203247, "learning_rate": 2.2037781726363043e-06, "loss": 0.0057, "step": 300000 }, { "epoch": 28.00336040324839, "eval_accuracy": 0.6936989498249708, "eval_f1": 0.8244399468394673, "eval_loss": 0.2931285500526428, "eval_roc_auc": 0.912020388661309, "eval_runtime": 145.7466, "eval_samples_per_second": 294.004, "eval_steps_per_second": 294.004, "step": 300000 }, { "epoch": 28.012694856716138, "grad_norm": 1.253564476966858, "learning_rate": 2.2028429813896943e-06, "loss": 0.0042, "step": 300100 }, { "epoch": 28.02202931018389, "grad_norm": 3.6514813899993896, "learning_rate": 2.2019077901430843e-06, "loss": 0.005, "step": 300200 }, { "epoch": 28.031363763651637, "grad_norm": 0.29585573077201843, "learning_rate": 2.2009725988964744e-06, "loss": 0.0047, "step": 300300 }, { "epoch": 28.040698217119388, "grad_norm": 0.018885372206568718, "learning_rate": 2.2000374076498644e-06, "loss": 0.0065, "step": 300400 }, { "epoch": 28.050032670587136, "grad_norm": 0.8307755589485168, "learning_rate": 2.199102216403255e-06, "loss": 0.0044, "step": 300500 }, { "epoch": 28.059367124054887, "grad_norm": 0.9234731197357178, "learning_rate": 2.198167025156645e-06, "loss": 0.0034, "step": 300600 }, { "epoch": 28.068701577522635, "grad_norm": 0.5319408178329468, "learning_rate": 2.197231833910035e-06, "loss": 0.006, "step": 300700 }, { "epoch": 28.078036030990386, "grad_norm": 1.0215274095535278, "learning_rate": 2.196296642663425e-06, "loss": 0.0065, "step": 300800 }, { "epoch": 28.087370484458134, "grad_norm": 7.324454307556152, "learning_rate": 2.195361451416815e-06, "loss": 0.0047, "step": 300900 }, { "epoch": 28.096704937925885, "grad_norm": 3.2355759143829346, "learning_rate": 2.194426260170205e-06, "loss": 0.0042, "step": 301000 }, { "epoch": 28.106039391393633, "grad_norm": 3.8379194736480713, "learning_rate": 2.193491068923595e-06, "loss": 0.0063, "step": 301100 }, { "epoch": 28.115373844861384, "grad_norm": 0.6515693068504333, "learning_rate": 2.192555877676985e-06, "loss": 0.0051, "step": 301200 }, { "epoch": 28.124708298329132, "grad_norm": 0.08785936236381531, "learning_rate": 2.191620686430375e-06, "loss": 0.005, "step": 301300 }, { "epoch": 28.134042751796883, "grad_norm": 2.0579562187194824, "learning_rate": 2.190685495183765e-06, "loss": 0.0054, "step": 301400 }, { "epoch": 28.14337720526463, "grad_norm": 5.25317907333374, "learning_rate": 2.1897503039371556e-06, "loss": 0.0074, "step": 301500 }, { "epoch": 28.152711658732382, "grad_norm": 0.45478904247283936, "learning_rate": 2.1888151126905456e-06, "loss": 0.0047, "step": 301600 }, { "epoch": 28.16204611220013, "grad_norm": 0.6479339599609375, "learning_rate": 2.1878799214439356e-06, "loss": 0.0053, "step": 301700 }, { "epoch": 28.17138056566788, "grad_norm": 0.2370050698518753, "learning_rate": 2.1869447301973252e-06, "loss": 0.0055, "step": 301800 }, { "epoch": 28.18071501913563, "grad_norm": 3.272047519683838, "learning_rate": 2.1860095389507157e-06, "loss": 0.0054, "step": 301900 }, { "epoch": 28.19004947260338, "grad_norm": 1.2073830366134644, "learning_rate": 2.1850743477041057e-06, "loss": 0.0058, "step": 302000 }, { "epoch": 28.19938392607113, "grad_norm": 0.25774380564689636, "learning_rate": 2.1841391564574958e-06, "loss": 0.0056, "step": 302100 }, { "epoch": 28.20871837953888, "grad_norm": 0.07685324549674988, "learning_rate": 2.183203965210886e-06, "loss": 0.0064, "step": 302200 }, { "epoch": 28.218052833006627, "grad_norm": 0.15003778040409088, "learning_rate": 2.182268773964276e-06, "loss": 0.0065, "step": 302300 }, { "epoch": 28.22738728647438, "grad_norm": 5.075178146362305, "learning_rate": 2.181333582717666e-06, "loss": 0.0047, "step": 302400 }, { "epoch": 28.236721739942126, "grad_norm": 0.005306928418576717, "learning_rate": 2.180398391471056e-06, "loss": 0.0047, "step": 302500 }, { "epoch": 28.246056193409874, "grad_norm": 0.6666282415390015, "learning_rate": 2.1794632002244463e-06, "loss": 0.0039, "step": 302600 }, { "epoch": 28.255390646877625, "grad_norm": 0.20811481773853302, "learning_rate": 2.1785280089778364e-06, "loss": 0.007, "step": 302700 }, { "epoch": 28.264725100345373, "grad_norm": 0.4345727264881134, "learning_rate": 2.177592817731226e-06, "loss": 0.0061, "step": 302800 }, { "epoch": 28.274059553813125, "grad_norm": 0.7233768701553345, "learning_rate": 2.176657626484616e-06, "loss": 0.0034, "step": 302900 }, { "epoch": 28.283394007280872, "grad_norm": 2.587709903717041, "learning_rate": 2.1757224352380065e-06, "loss": 0.0063, "step": 303000 }, { "epoch": 28.292728460748624, "grad_norm": 0.014825794845819473, "learning_rate": 2.1747872439913965e-06, "loss": 0.005, "step": 303100 }, { "epoch": 28.30206291421637, "grad_norm": 4.417530536651611, "learning_rate": 2.1738520527447865e-06, "loss": 0.0037, "step": 303200 }, { "epoch": 28.311397367684123, "grad_norm": 0.026693249121308327, "learning_rate": 2.1729168614981766e-06, "loss": 0.0067, "step": 303300 }, { "epoch": 28.32073182115187, "grad_norm": 0.4539344608783722, "learning_rate": 2.1719816702515666e-06, "loss": 0.0054, "step": 303400 }, { "epoch": 28.33006627461962, "grad_norm": 0.13416841626167297, "learning_rate": 2.1710464790049566e-06, "loss": 0.0049, "step": 303500 }, { "epoch": 28.33940072808737, "grad_norm": 0.12241113185882568, "learning_rate": 2.170111287758347e-06, "loss": 0.0044, "step": 303600 }, { "epoch": 28.34873518155512, "grad_norm": 4.3105621337890625, "learning_rate": 2.1691760965117367e-06, "loss": 0.005, "step": 303700 }, { "epoch": 28.35806963502287, "grad_norm": 2.7270703315734863, "learning_rate": 2.1682409052651267e-06, "loss": 0.0051, "step": 303800 }, { "epoch": 28.36740408849062, "grad_norm": 0.3870588541030884, "learning_rate": 2.1673057140185167e-06, "loss": 0.0045, "step": 303900 }, { "epoch": 28.376738541958368, "grad_norm": 4.669999122619629, "learning_rate": 2.166370522771907e-06, "loss": 0.0048, "step": 304000 }, { "epoch": 28.38607299542612, "grad_norm": 0.08878142386674881, "learning_rate": 2.1654353315252972e-06, "loss": 0.0045, "step": 304100 }, { "epoch": 28.395407448893867, "grad_norm": 0.3122158646583557, "learning_rate": 2.1645001402786873e-06, "loss": 0.0053, "step": 304200 }, { "epoch": 28.404741902361618, "grad_norm": 3.0238406658172607, "learning_rate": 2.1635649490320773e-06, "loss": 0.0046, "step": 304300 }, { "epoch": 28.414076355829366, "grad_norm": 2.0169365406036377, "learning_rate": 2.1626297577854673e-06, "loss": 0.0058, "step": 304400 }, { "epoch": 28.423410809297117, "grad_norm": 3.4404001235961914, "learning_rate": 2.1616945665388573e-06, "loss": 0.0054, "step": 304500 }, { "epoch": 28.432745262764865, "grad_norm": 0.08552476018667221, "learning_rate": 2.1607593752922474e-06, "loss": 0.0039, "step": 304600 }, { "epoch": 28.442079716232616, "grad_norm": 0.5904267430305481, "learning_rate": 2.1598241840456374e-06, "loss": 0.0051, "step": 304700 }, { "epoch": 28.451414169700364, "grad_norm": 1.4861524105072021, "learning_rate": 2.1588889927990274e-06, "loss": 0.0047, "step": 304800 }, { "epoch": 28.460748623168115, "grad_norm": 0.005851989146322012, "learning_rate": 2.1579538015524175e-06, "loss": 0.0036, "step": 304900 }, { "epoch": 28.470083076635863, "grad_norm": 2.4828293323516846, "learning_rate": 2.157018610305808e-06, "loss": 0.0053, "step": 305000 }, { "epoch": 28.470083076635863, "eval_accuracy": 0.6952858809801633, "eval_f1": 0.825281712744086, "eval_loss": 0.2989096939563751, "eval_roc_auc": 0.9129774204153771, "eval_runtime": 145.998, "eval_samples_per_second": 293.497, "eval_steps_per_second": 293.497, "step": 305000 }, { "epoch": 28.479417530103614, "grad_norm": 0.13261355459690094, "learning_rate": 2.156083419059198e-06, "loss": 0.0061, "step": 305100 }, { "epoch": 28.488751983571362, "grad_norm": 2.600893497467041, "learning_rate": 2.155148227812588e-06, "loss": 0.0081, "step": 305200 }, { "epoch": 28.498086437039113, "grad_norm": 2.1908137798309326, "learning_rate": 2.154213036565978e-06, "loss": 0.0055, "step": 305300 }, { "epoch": 28.50742089050686, "grad_norm": 0.0071766572073102, "learning_rate": 2.153277845319368e-06, "loss": 0.0046, "step": 305400 }, { "epoch": 28.51675534397461, "grad_norm": 0.04506257548928261, "learning_rate": 2.152342654072758e-06, "loss": 0.0044, "step": 305500 }, { "epoch": 28.52608979744236, "grad_norm": 0.027884408831596375, "learning_rate": 2.151407462826148e-06, "loss": 0.005, "step": 305600 }, { "epoch": 28.535424250910108, "grad_norm": 0.016784032806754112, "learning_rate": 2.150472271579538e-06, "loss": 0.0051, "step": 305700 }, { "epoch": 28.54475870437786, "grad_norm": 0.43948057293891907, "learning_rate": 2.149537080332928e-06, "loss": 0.0046, "step": 305800 }, { "epoch": 28.554093157845607, "grad_norm": 0.024691829457879066, "learning_rate": 2.148601889086318e-06, "loss": 0.005, "step": 305900 }, { "epoch": 28.563427611313358, "grad_norm": 0.8598390817642212, "learning_rate": 2.1476666978397082e-06, "loss": 0.007, "step": 306000 }, { "epoch": 28.572762064781106, "grad_norm": 0.012477968819439411, "learning_rate": 2.1467315065930987e-06, "loss": 0.005, "step": 306100 }, { "epoch": 28.582096518248857, "grad_norm": 0.04363585263490677, "learning_rate": 2.1457963153464887e-06, "loss": 0.0044, "step": 306200 }, { "epoch": 28.591430971716605, "grad_norm": 1.0197595357894897, "learning_rate": 2.1448611240998787e-06, "loss": 0.0054, "step": 306300 }, { "epoch": 28.600765425184356, "grad_norm": 1.4337447881698608, "learning_rate": 2.1439259328532683e-06, "loss": 0.0053, "step": 306400 }, { "epoch": 28.610099878652104, "grad_norm": 0.18148300051689148, "learning_rate": 2.142990741606659e-06, "loss": 0.0051, "step": 306500 }, { "epoch": 28.619434332119855, "grad_norm": 0.6711371541023254, "learning_rate": 2.142055550360049e-06, "loss": 0.0041, "step": 306600 }, { "epoch": 28.628768785587603, "grad_norm": 0.7588862776756287, "learning_rate": 2.141120359113439e-06, "loss": 0.0043, "step": 306700 }, { "epoch": 28.638103239055354, "grad_norm": 0.12314416468143463, "learning_rate": 2.140185167866829e-06, "loss": 0.005, "step": 306800 }, { "epoch": 28.647437692523102, "grad_norm": 4.106958389282227, "learning_rate": 2.139249976620219e-06, "loss": 0.0072, "step": 306900 }, { "epoch": 28.656772145990853, "grad_norm": 0.13079263269901276, "learning_rate": 2.138314785373609e-06, "loss": 0.0057, "step": 307000 }, { "epoch": 28.6661065994586, "grad_norm": 2.8312454223632812, "learning_rate": 2.1373795941269994e-06, "loss": 0.0062, "step": 307100 }, { "epoch": 28.675441052926352, "grad_norm": 0.0457991361618042, "learning_rate": 2.1364444028803894e-06, "loss": 0.004, "step": 307200 }, { "epoch": 28.6847755063941, "grad_norm": 3.608208656311035, "learning_rate": 2.135509211633779e-06, "loss": 0.0077, "step": 307300 }, { "epoch": 28.69410995986185, "grad_norm": 1.6672093868255615, "learning_rate": 2.134574020387169e-06, "loss": 0.0053, "step": 307400 }, { "epoch": 28.7034444133296, "grad_norm": 1.265648365020752, "learning_rate": 2.1336388291405595e-06, "loss": 0.0064, "step": 307500 }, { "epoch": 28.71277886679735, "grad_norm": 0.855194628238678, "learning_rate": 2.1327036378939496e-06, "loss": 0.0044, "step": 307600 }, { "epoch": 28.7221133202651, "grad_norm": 0.08778933435678482, "learning_rate": 2.1317684466473396e-06, "loss": 0.0048, "step": 307700 }, { "epoch": 28.73144777373285, "grad_norm": 0.13483446836471558, "learning_rate": 2.1308332554007296e-06, "loss": 0.0046, "step": 307800 }, { "epoch": 28.740782227200597, "grad_norm": 2.4034292697906494, "learning_rate": 2.1298980641541197e-06, "loss": 0.0062, "step": 307900 }, { "epoch": 28.75011668066835, "grad_norm": 0.07624388486146927, "learning_rate": 2.1289628729075097e-06, "loss": 0.0042, "step": 308000 }, { "epoch": 28.759451134136096, "grad_norm": 0.47356662154197693, "learning_rate": 2.1280276816609e-06, "loss": 0.0048, "step": 308100 }, { "epoch": 28.768785587603844, "grad_norm": 0.16223151981830597, "learning_rate": 2.1270924904142897e-06, "loss": 0.0075, "step": 308200 }, { "epoch": 28.778120041071595, "grad_norm": 3.419325351715088, "learning_rate": 2.1261572991676798e-06, "loss": 0.0053, "step": 308300 }, { "epoch": 28.787454494539343, "grad_norm": 1.6256648302078247, "learning_rate": 2.12522210792107e-06, "loss": 0.0059, "step": 308400 }, { "epoch": 28.796788948007094, "grad_norm": 1.9455511569976807, "learning_rate": 2.1242869166744603e-06, "loss": 0.0071, "step": 308500 }, { "epoch": 28.806123401474842, "grad_norm": 0.7645415663719177, "learning_rate": 2.1233517254278503e-06, "loss": 0.0043, "step": 308600 }, { "epoch": 28.815457854942593, "grad_norm": 2.370492935180664, "learning_rate": 2.1224165341812403e-06, "loss": 0.0047, "step": 308700 }, { "epoch": 28.82479230841034, "grad_norm": 0.4567888379096985, "learning_rate": 2.1214813429346304e-06, "loss": 0.0065, "step": 308800 }, { "epoch": 28.834126761878093, "grad_norm": 0.8236410021781921, "learning_rate": 2.1205461516880204e-06, "loss": 0.0027, "step": 308900 }, { "epoch": 28.84346121534584, "grad_norm": 2.8008828163146973, "learning_rate": 2.1196109604414104e-06, "loss": 0.0047, "step": 309000 }, { "epoch": 28.85279566881359, "grad_norm": 1.6993392705917358, "learning_rate": 2.1186757691948004e-06, "loss": 0.0046, "step": 309100 }, { "epoch": 28.86213012228134, "grad_norm": 0.7695661783218384, "learning_rate": 2.1177405779481905e-06, "loss": 0.0049, "step": 309200 }, { "epoch": 28.87146457574909, "grad_norm": 1.4876418113708496, "learning_rate": 2.1168053867015805e-06, "loss": 0.0045, "step": 309300 }, { "epoch": 28.88079902921684, "grad_norm": 1.0714869499206543, "learning_rate": 2.1158701954549705e-06, "loss": 0.0052, "step": 309400 }, { "epoch": 28.89013348268459, "grad_norm": 1.1587296724319458, "learning_rate": 2.114935004208361e-06, "loss": 0.0039, "step": 309500 }, { "epoch": 28.899467936152337, "grad_norm": 0.09002380073070526, "learning_rate": 2.113999812961751e-06, "loss": 0.0061, "step": 309600 }, { "epoch": 28.90880238962009, "grad_norm": 0.005982883740216494, "learning_rate": 2.113064621715141e-06, "loss": 0.0045, "step": 309700 }, { "epoch": 28.918136843087836, "grad_norm": 1.406709909439087, "learning_rate": 2.112129430468531e-06, "loss": 0.0044, "step": 309800 }, { "epoch": 28.927471296555588, "grad_norm": 0.14579956233501434, "learning_rate": 2.111194239221921e-06, "loss": 0.0056, "step": 309900 }, { "epoch": 28.936805750023336, "grad_norm": 2.592895746231079, "learning_rate": 2.110259047975311e-06, "loss": 0.0056, "step": 310000 }, { "epoch": 28.936805750023336, "eval_accuracy": 0.6975029171528588, "eval_f1": 0.8278454419973436, "eval_loss": 0.3014014959335327, "eval_roc_auc": 0.9142041188366399, "eval_runtime": 145.8111, "eval_samples_per_second": 293.873, "eval_steps_per_second": 293.873, "step": 310000 }, { "epoch": 28.946140203491087, "grad_norm": 0.15769241750240326, "learning_rate": 2.109323856728701e-06, "loss": 0.0054, "step": 310100 }, { "epoch": 28.955474656958835, "grad_norm": 3.5920050144195557, "learning_rate": 2.108388665482091e-06, "loss": 0.0045, "step": 310200 }, { "epoch": 28.964809110426586, "grad_norm": 0.6785930395126343, "learning_rate": 2.1074534742354812e-06, "loss": 0.0052, "step": 310300 }, { "epoch": 28.974143563894334, "grad_norm": 2.853637218475342, "learning_rate": 2.1065182829888713e-06, "loss": 0.0047, "step": 310400 }, { "epoch": 28.983478017362085, "grad_norm": 0.09897184371948242, "learning_rate": 2.1055830917422613e-06, "loss": 0.0046, "step": 310500 }, { "epoch": 28.992812470829833, "grad_norm": 0.30188092589378357, "learning_rate": 2.1046479004956518e-06, "loss": 0.0044, "step": 310600 }, { "epoch": 29.002146924297584, "grad_norm": 1.5311001539230347, "learning_rate": 2.1037127092490418e-06, "loss": 0.006, "step": 310700 }, { "epoch": 29.01148137776533, "grad_norm": 0.5426590442657471, "learning_rate": 2.102777518002432e-06, "loss": 0.0059, "step": 310800 }, { "epoch": 29.020815831233083, "grad_norm": 0.04322017356753349, "learning_rate": 2.1018423267558214e-06, "loss": 0.0037, "step": 310900 }, { "epoch": 29.03015028470083, "grad_norm": 1.7747896909713745, "learning_rate": 2.100907135509212e-06, "loss": 0.0053, "step": 311000 }, { "epoch": 29.03948473816858, "grad_norm": 2.8850183486938477, "learning_rate": 2.099971944262602e-06, "loss": 0.0039, "step": 311100 }, { "epoch": 29.04881919163633, "grad_norm": 7.431553840637207, "learning_rate": 2.099036753015992e-06, "loss": 0.0046, "step": 311200 }, { "epoch": 29.058153645104078, "grad_norm": 0.0803159773349762, "learning_rate": 2.098101561769382e-06, "loss": 0.0027, "step": 311300 }, { "epoch": 29.06748809857183, "grad_norm": 5.1098313331604, "learning_rate": 2.097166370522772e-06, "loss": 0.0038, "step": 311400 }, { "epoch": 29.076822552039577, "grad_norm": 0.11799436807632446, "learning_rate": 2.096231179276162e-06, "loss": 0.0047, "step": 311500 }, { "epoch": 29.086157005507328, "grad_norm": 0.012420600280165672, "learning_rate": 2.0952959880295525e-06, "loss": 0.0045, "step": 311600 }, { "epoch": 29.095491458975076, "grad_norm": 4.553661823272705, "learning_rate": 2.0943607967829425e-06, "loss": 0.0049, "step": 311700 }, { "epoch": 29.104825912442827, "grad_norm": 0.13330808281898499, "learning_rate": 2.093425605536332e-06, "loss": 0.0047, "step": 311800 }, { "epoch": 29.114160365910575, "grad_norm": 2.93241810798645, "learning_rate": 2.092490414289722e-06, "loss": 0.0047, "step": 311900 }, { "epoch": 29.123494819378326, "grad_norm": 0.014739309437572956, "learning_rate": 2.0915552230431126e-06, "loss": 0.0035, "step": 312000 }, { "epoch": 29.132829272846074, "grad_norm": 2.2088820934295654, "learning_rate": 2.0906200317965026e-06, "loss": 0.0053, "step": 312100 }, { "epoch": 29.142163726313825, "grad_norm": 0.1470232605934143, "learning_rate": 2.0896848405498927e-06, "loss": 0.0049, "step": 312200 }, { "epoch": 29.151498179781573, "grad_norm": 0.018117059022188187, "learning_rate": 2.0887496493032827e-06, "loss": 0.0054, "step": 312300 }, { "epoch": 29.160832633249324, "grad_norm": 3.6029648780822754, "learning_rate": 2.0878144580566727e-06, "loss": 0.0067, "step": 312400 }, { "epoch": 29.170167086717072, "grad_norm": 0.43167728185653687, "learning_rate": 2.0868792668100628e-06, "loss": 0.0048, "step": 312500 }, { "epoch": 29.179501540184823, "grad_norm": 0.006720306351780891, "learning_rate": 2.0859440755634532e-06, "loss": 0.0054, "step": 312600 }, { "epoch": 29.18883599365257, "grad_norm": 0.5632047057151794, "learning_rate": 2.085008884316843e-06, "loss": 0.0036, "step": 312700 }, { "epoch": 29.198170447120322, "grad_norm": 2.5987493991851807, "learning_rate": 2.084073693070233e-06, "loss": 0.0042, "step": 312800 }, { "epoch": 29.20750490058807, "grad_norm": 2.3793983459472656, "learning_rate": 2.083138501823623e-06, "loss": 0.0036, "step": 312900 }, { "epoch": 29.21683935405582, "grad_norm": 2.285222053527832, "learning_rate": 2.0822033105770133e-06, "loss": 0.003, "step": 313000 }, { "epoch": 29.22617380752357, "grad_norm": 0.06946951895952225, "learning_rate": 2.0812681193304034e-06, "loss": 0.0047, "step": 313100 }, { "epoch": 29.23550826099132, "grad_norm": 2.825199842453003, "learning_rate": 2.0803329280837934e-06, "loss": 0.0045, "step": 313200 }, { "epoch": 29.244842714459068, "grad_norm": 0.1166975125670433, "learning_rate": 2.0793977368371834e-06, "loss": 0.0051, "step": 313300 }, { "epoch": 29.25417716792682, "grad_norm": 1.9976269006729126, "learning_rate": 2.0784625455905735e-06, "loss": 0.0052, "step": 313400 }, { "epoch": 29.263511621394567, "grad_norm": 0.756905198097229, "learning_rate": 2.0775273543439635e-06, "loss": 0.0051, "step": 313500 }, { "epoch": 29.27284607486232, "grad_norm": 1.9608988761901855, "learning_rate": 2.0765921630973535e-06, "loss": 0.0055, "step": 313600 }, { "epoch": 29.282180528330066, "grad_norm": 0.0597650483250618, "learning_rate": 2.0756569718507436e-06, "loss": 0.006, "step": 313700 }, { "epoch": 29.291514981797818, "grad_norm": 4.686561584472656, "learning_rate": 2.0747217806041336e-06, "loss": 0.005, "step": 313800 }, { "epoch": 29.300849435265565, "grad_norm": 2.935765504837036, "learning_rate": 2.0737865893575236e-06, "loss": 0.0051, "step": 313900 }, { "epoch": 29.310183888733313, "grad_norm": 0.470058798789978, "learning_rate": 2.0728513981109136e-06, "loss": 0.0049, "step": 314000 }, { "epoch": 29.319518342201064, "grad_norm": 1.1365033388137817, "learning_rate": 2.071916206864304e-06, "loss": 0.0057, "step": 314100 }, { "epoch": 29.328852795668812, "grad_norm": 0.31655773520469666, "learning_rate": 2.070981015617694e-06, "loss": 0.006, "step": 314200 }, { "epoch": 29.338187249136563, "grad_norm": 4.413196086883545, "learning_rate": 2.070045824371084e-06, "loss": 0.0049, "step": 314300 }, { "epoch": 29.34752170260431, "grad_norm": 0.6573553085327148, "learning_rate": 2.069110633124474e-06, "loss": 0.0041, "step": 314400 }, { "epoch": 29.356856156072062, "grad_norm": 4.431276798248291, "learning_rate": 2.0681754418778642e-06, "loss": 0.0048, "step": 314500 }, { "epoch": 29.36619060953981, "grad_norm": 0.02392866089940071, "learning_rate": 2.0672402506312542e-06, "loss": 0.0055, "step": 314600 }, { "epoch": 29.37552506300756, "grad_norm": 2.316862106323242, "learning_rate": 2.0663050593846443e-06, "loss": 0.0049, "step": 314700 }, { "epoch": 29.38485951647531, "grad_norm": 1.8820799589157104, "learning_rate": 2.0653698681380343e-06, "loss": 0.0048, "step": 314800 }, { "epoch": 29.39419396994306, "grad_norm": 0.05044008418917656, "learning_rate": 2.0644346768914243e-06, "loss": 0.0056, "step": 314900 }, { "epoch": 29.40352842341081, "grad_norm": 0.03598593920469284, "learning_rate": 2.0634994856448144e-06, "loss": 0.0054, "step": 315000 }, { "epoch": 29.40352842341081, "eval_accuracy": 0.7012135355892649, "eval_f1": 0.8289356500480224, "eval_loss": 0.3021254241466522, "eval_roc_auc": 0.9127978444782817, "eval_runtime": 147.6949, "eval_samples_per_second": 290.125, "eval_steps_per_second": 290.125, "step": 315000 }, { "epoch": 29.41286287687856, "grad_norm": 1.3465895652770996, "learning_rate": 2.062564294398205e-06, "loss": 0.0041, "step": 315100 }, { "epoch": 29.422197330346307, "grad_norm": 2.0546717643737793, "learning_rate": 2.061629103151595e-06, "loss": 0.0047, "step": 315200 }, { "epoch": 29.43153178381406, "grad_norm": 0.37703245878219604, "learning_rate": 2.060693911904985e-06, "loss": 0.0029, "step": 315300 }, { "epoch": 29.440866237281806, "grad_norm": 0.3023110330104828, "learning_rate": 2.0597587206583745e-06, "loss": 0.0052, "step": 315400 }, { "epoch": 29.450200690749558, "grad_norm": 0.05026570335030556, "learning_rate": 2.058823529411765e-06, "loss": 0.0047, "step": 315500 }, { "epoch": 29.459535144217305, "grad_norm": 2.474522352218628, "learning_rate": 2.057888338165155e-06, "loss": 0.0053, "step": 315600 }, { "epoch": 29.468869597685057, "grad_norm": 0.40894049406051636, "learning_rate": 2.056953146918545e-06, "loss": 0.006, "step": 315700 }, { "epoch": 29.478204051152805, "grad_norm": 0.02560594119131565, "learning_rate": 2.056017955671935e-06, "loss": 0.0046, "step": 315800 }, { "epoch": 29.487538504620556, "grad_norm": 0.5592018365859985, "learning_rate": 2.055082764425325e-06, "loss": 0.0053, "step": 315900 }, { "epoch": 29.496872958088304, "grad_norm": 4.967058181762695, "learning_rate": 2.054147573178715e-06, "loss": 0.0085, "step": 316000 }, { "epoch": 29.506207411556055, "grad_norm": 0.411451131105423, "learning_rate": 2.0532123819321056e-06, "loss": 0.0046, "step": 316100 }, { "epoch": 29.515541865023803, "grad_norm": 0.09084062278270721, "learning_rate": 2.0522771906854956e-06, "loss": 0.0041, "step": 316200 }, { "epoch": 29.524876318491554, "grad_norm": 0.4075307548046112, "learning_rate": 2.051341999438885e-06, "loss": 0.0047, "step": 316300 }, { "epoch": 29.5342107719593, "grad_norm": 2.1589035987854004, "learning_rate": 2.0504068081922752e-06, "loss": 0.0059, "step": 316400 }, { "epoch": 29.543545225427053, "grad_norm": 0.0601174458861351, "learning_rate": 2.0494716169456657e-06, "loss": 0.0048, "step": 316500 }, { "epoch": 29.5528796788948, "grad_norm": 0.21194978058338165, "learning_rate": 2.0485364256990557e-06, "loss": 0.0055, "step": 316600 }, { "epoch": 29.56221413236255, "grad_norm": 1.301263689994812, "learning_rate": 2.0476012344524457e-06, "loss": 0.0042, "step": 316700 }, { "epoch": 29.5715485858303, "grad_norm": 0.04909225180745125, "learning_rate": 2.0466660432058358e-06, "loss": 0.0041, "step": 316800 }, { "epoch": 29.580883039298048, "grad_norm": 6.703638553619385, "learning_rate": 2.045730851959226e-06, "loss": 0.0043, "step": 316900 }, { "epoch": 29.5902174927658, "grad_norm": 7.924221515655518, "learning_rate": 2.044795660712616e-06, "loss": 0.005, "step": 317000 }, { "epoch": 29.599551946233547, "grad_norm": 1.5305393934249878, "learning_rate": 2.0438604694660063e-06, "loss": 0.0046, "step": 317100 }, { "epoch": 29.608886399701298, "grad_norm": 0.8677778840065002, "learning_rate": 2.042925278219396e-06, "loss": 0.0037, "step": 317200 }, { "epoch": 29.618220853169046, "grad_norm": 0.061472732573747635, "learning_rate": 2.041990086972786e-06, "loss": 0.0048, "step": 317300 }, { "epoch": 29.627555306636797, "grad_norm": 2.0432024002075195, "learning_rate": 2.041054895726176e-06, "loss": 0.0043, "step": 317400 }, { "epoch": 29.636889760104545, "grad_norm": 0.6032141447067261, "learning_rate": 2.0401197044795664e-06, "loss": 0.0049, "step": 317500 }, { "epoch": 29.646224213572296, "grad_norm": 0.028979210183024406, "learning_rate": 2.0391845132329564e-06, "loss": 0.0058, "step": 317600 }, { "epoch": 29.655558667040044, "grad_norm": 1.6939393281936646, "learning_rate": 2.0382493219863465e-06, "loss": 0.0048, "step": 317700 }, { "epoch": 29.664893120507795, "grad_norm": 0.4738968014717102, "learning_rate": 2.0373141307397365e-06, "loss": 0.0056, "step": 317800 }, { "epoch": 29.674227573975543, "grad_norm": 0.03765077516436577, "learning_rate": 2.0363789394931265e-06, "loss": 0.0046, "step": 317900 }, { "epoch": 29.683562027443294, "grad_norm": 0.08940548449754715, "learning_rate": 2.0354437482465166e-06, "loss": 0.0052, "step": 318000 }, { "epoch": 29.692896480911042, "grad_norm": 3.810988187789917, "learning_rate": 2.0345085569999066e-06, "loss": 0.0061, "step": 318100 }, { "epoch": 29.702230934378793, "grad_norm": 0.06816519051790237, "learning_rate": 2.0335733657532966e-06, "loss": 0.0063, "step": 318200 }, { "epoch": 29.71156538784654, "grad_norm": 0.019635995849967003, "learning_rate": 2.0326381745066867e-06, "loss": 0.0066, "step": 318300 }, { "epoch": 29.720899841314292, "grad_norm": 0.027576586231589317, "learning_rate": 2.0317029832600767e-06, "loss": 0.0056, "step": 318400 }, { "epoch": 29.73023429478204, "grad_norm": 0.2678394913673401, "learning_rate": 2.0307677920134667e-06, "loss": 0.0041, "step": 318500 }, { "epoch": 29.73956874824979, "grad_norm": 4.197035312652588, "learning_rate": 2.029832600766857e-06, "loss": 0.0057, "step": 318600 }, { "epoch": 29.74890320171754, "grad_norm": 0.6101654767990112, "learning_rate": 2.028897409520247e-06, "loss": 0.0039, "step": 318700 }, { "epoch": 29.75823765518529, "grad_norm": 1.6861176490783691, "learning_rate": 2.0279622182736372e-06, "loss": 0.0061, "step": 318800 }, { "epoch": 29.767572108653038, "grad_norm": 0.05967346578836441, "learning_rate": 2.0270270270270273e-06, "loss": 0.0052, "step": 318900 }, { "epoch": 29.77690656212079, "grad_norm": 0.07135438919067383, "learning_rate": 2.0260918357804173e-06, "loss": 0.0059, "step": 319000 }, { "epoch": 29.786241015588537, "grad_norm": 0.24635730683803558, "learning_rate": 2.0251566445338073e-06, "loss": 0.0052, "step": 319100 }, { "epoch": 29.79557546905629, "grad_norm": 0.9302059412002563, "learning_rate": 2.0242214532871974e-06, "loss": 0.0059, "step": 319200 }, { "epoch": 29.804909922524036, "grad_norm": 0.460521936416626, "learning_rate": 2.0232862620405874e-06, "loss": 0.0046, "step": 319300 }, { "epoch": 29.814244375991784, "grad_norm": 5.0589680671691895, "learning_rate": 2.0223510707939774e-06, "loss": 0.0056, "step": 319400 }, { "epoch": 29.823578829459535, "grad_norm": 0.01950776018202305, "learning_rate": 2.0214158795473674e-06, "loss": 0.005, "step": 319500 }, { "epoch": 29.832913282927283, "grad_norm": 3.3697400093078613, "learning_rate": 2.020480688300758e-06, "loss": 0.0039, "step": 319600 }, { "epoch": 29.842247736395034, "grad_norm": 3.148401975631714, "learning_rate": 2.019545497054148e-06, "loss": 0.0053, "step": 319700 }, { "epoch": 29.851582189862782, "grad_norm": 8.061349868774414, "learning_rate": 2.018610305807538e-06, "loss": 0.0043, "step": 319800 }, { "epoch": 29.860916643330533, "grad_norm": 0.2386171519756317, "learning_rate": 2.0176751145609276e-06, "loss": 0.0046, "step": 319900 }, { "epoch": 29.87025109679828, "grad_norm": 0.028674503788352013, "learning_rate": 2.016739923314318e-06, "loss": 0.0041, "step": 320000 }, { "epoch": 29.87025109679828, "eval_accuracy": 0.6974095682613769, "eval_f1": 0.8247457091158231, "eval_loss": 0.30510812997817993, "eval_roc_auc": 0.9083005767900985, "eval_runtime": 146.1939, "eval_samples_per_second": 293.104, "eval_steps_per_second": 293.104, "step": 320000 }, { "epoch": 29.879585550266032, "grad_norm": 0.8016414642333984, "learning_rate": 2.015804732067708e-06, "loss": 0.0057, "step": 320100 }, { "epoch": 29.88892000373378, "grad_norm": 0.02715836837887764, "learning_rate": 2.014869540821098e-06, "loss": 0.0046, "step": 320200 }, { "epoch": 29.89825445720153, "grad_norm": 0.10976065695285797, "learning_rate": 2.013934349574488e-06, "loss": 0.0071, "step": 320300 }, { "epoch": 29.90758891066928, "grad_norm": 7.798217296600342, "learning_rate": 2.012999158327878e-06, "loss": 0.0068, "step": 320400 }, { "epoch": 29.91692336413703, "grad_norm": 0.01359857153147459, "learning_rate": 2.012063967081268e-06, "loss": 0.0043, "step": 320500 }, { "epoch": 29.926257817604778, "grad_norm": 0.2572348117828369, "learning_rate": 2.0111287758346586e-06, "loss": 0.0043, "step": 320600 }, { "epoch": 29.93559227107253, "grad_norm": 2.1385838985443115, "learning_rate": 2.0101935845880487e-06, "loss": 0.0046, "step": 320700 }, { "epoch": 29.944926724540277, "grad_norm": 6.904308319091797, "learning_rate": 2.0092583933414383e-06, "loss": 0.0046, "step": 320800 }, { "epoch": 29.95426117800803, "grad_norm": 8.123433113098145, "learning_rate": 2.0083232020948283e-06, "loss": 0.0043, "step": 320900 }, { "epoch": 29.963595631475776, "grad_norm": 4.041991710662842, "learning_rate": 2.0073880108482188e-06, "loss": 0.0062, "step": 321000 }, { "epoch": 29.972930084943528, "grad_norm": 4.256819248199463, "learning_rate": 2.0064528196016088e-06, "loss": 0.0054, "step": 321100 }, { "epoch": 29.982264538411275, "grad_norm": 0.17810572683811188, "learning_rate": 2.005517628354999e-06, "loss": 0.0047, "step": 321200 }, { "epoch": 29.991598991879027, "grad_norm": 4.662420272827148, "learning_rate": 2.004582437108389e-06, "loss": 0.0034, "step": 321300 }, { "epoch": 30.000933445346774, "grad_norm": 1.1667596101760864, "learning_rate": 2.003647245861779e-06, "loss": 0.0037, "step": 321400 }, { "epoch": 30.010267898814526, "grad_norm": 0.055810198187828064, "learning_rate": 2.002712054615169e-06, "loss": 0.0035, "step": 321500 }, { "epoch": 30.019602352282273, "grad_norm": 4.634125232696533, "learning_rate": 2.001776863368559e-06, "loss": 0.004, "step": 321600 }, { "epoch": 30.028936805750025, "grad_norm": 0.010111263953149319, "learning_rate": 2.000841672121949e-06, "loss": 0.0032, "step": 321700 }, { "epoch": 30.038271259217773, "grad_norm": 0.05897799879312515, "learning_rate": 1.999906480875339e-06, "loss": 0.0043, "step": 321800 }, { "epoch": 30.047605712685524, "grad_norm": 0.6294082403182983, "learning_rate": 1.998971289628729e-06, "loss": 0.0044, "step": 321900 }, { "epoch": 30.05694016615327, "grad_norm": 0.11815737932920456, "learning_rate": 1.998036098382119e-06, "loss": 0.0036, "step": 322000 }, { "epoch": 30.066274619621023, "grad_norm": 2.8619680404663086, "learning_rate": 1.9971009071355095e-06, "loss": 0.0037, "step": 322100 }, { "epoch": 30.07560907308877, "grad_norm": 0.0730455219745636, "learning_rate": 1.9961657158888995e-06, "loss": 0.0044, "step": 322200 }, { "epoch": 30.08494352655652, "grad_norm": 6.626955032348633, "learning_rate": 1.9952305246422896e-06, "loss": 0.0047, "step": 322300 }, { "epoch": 30.09427798002427, "grad_norm": 0.28223007917404175, "learning_rate": 1.9942953333956796e-06, "loss": 0.0041, "step": 322400 }, { "epoch": 30.103612433492017, "grad_norm": 0.09782370179891586, "learning_rate": 1.9933601421490696e-06, "loss": 0.0029, "step": 322500 }, { "epoch": 30.11294688695977, "grad_norm": 0.039438243955373764, "learning_rate": 1.9924249509024597e-06, "loss": 0.0044, "step": 322600 }, { "epoch": 30.122281340427516, "grad_norm": 1.2759183645248413, "learning_rate": 1.9914897596558497e-06, "loss": 0.0041, "step": 322700 }, { "epoch": 30.131615793895268, "grad_norm": 5.173035144805908, "learning_rate": 1.9905545684092397e-06, "loss": 0.0054, "step": 322800 }, { "epoch": 30.140950247363016, "grad_norm": 0.031241528689861298, "learning_rate": 1.9896193771626298e-06, "loss": 0.0028, "step": 322900 }, { "epoch": 30.150284700830767, "grad_norm": 0.06576158851385117, "learning_rate": 1.98868418591602e-06, "loss": 0.0059, "step": 323000 }, { "epoch": 30.159619154298515, "grad_norm": 0.03660472109913826, "learning_rate": 1.9877489946694102e-06, "loss": 0.0044, "step": 323100 }, { "epoch": 30.168953607766266, "grad_norm": 0.5759453177452087, "learning_rate": 1.9868138034228003e-06, "loss": 0.0047, "step": 323200 }, { "epoch": 30.178288061234014, "grad_norm": 0.17687533795833588, "learning_rate": 1.9858786121761903e-06, "loss": 0.0048, "step": 323300 }, { "epoch": 30.187622514701765, "grad_norm": 0.3378660976886749, "learning_rate": 1.9849434209295803e-06, "loss": 0.0033, "step": 323400 }, { "epoch": 30.196956968169513, "grad_norm": 0.12946023046970367, "learning_rate": 1.9840082296829704e-06, "loss": 0.0045, "step": 323500 }, { "epoch": 30.206291421637264, "grad_norm": 0.3229657709598541, "learning_rate": 1.9830730384363604e-06, "loss": 0.0037, "step": 323600 }, { "epoch": 30.21562587510501, "grad_norm": 0.23720239102840424, "learning_rate": 1.9821378471897504e-06, "loss": 0.0043, "step": 323700 }, { "epoch": 30.224960328572763, "grad_norm": 0.22465486824512482, "learning_rate": 1.9812026559431405e-06, "loss": 0.0054, "step": 323800 }, { "epoch": 30.23429478204051, "grad_norm": 0.05617561936378479, "learning_rate": 1.9802674646965305e-06, "loss": 0.003, "step": 323900 }, { "epoch": 30.243629235508262, "grad_norm": 0.1144610270857811, "learning_rate": 1.9793322734499205e-06, "loss": 0.0045, "step": 324000 }, { "epoch": 30.25296368897601, "grad_norm": 2.371769905090332, "learning_rate": 1.978397082203311e-06, "loss": 0.0036, "step": 324100 }, { "epoch": 30.26229814244376, "grad_norm": 8.317680358886719, "learning_rate": 1.977461890956701e-06, "loss": 0.0034, "step": 324200 }, { "epoch": 30.27163259591151, "grad_norm": 0.07376698404550552, "learning_rate": 1.976526699710091e-06, "loss": 0.0044, "step": 324300 }, { "epoch": 30.28096704937926, "grad_norm": 1.3732800483703613, "learning_rate": 1.975591508463481e-06, "loss": 0.0041, "step": 324400 }, { "epoch": 30.290301502847008, "grad_norm": 0.10830061137676239, "learning_rate": 1.974656317216871e-06, "loss": 0.0056, "step": 324500 }, { "epoch": 30.29963595631476, "grad_norm": 2.8507912158966064, "learning_rate": 1.973721125970261e-06, "loss": 0.0047, "step": 324600 }, { "epoch": 30.308970409782507, "grad_norm": 0.20157945156097412, "learning_rate": 1.972785934723651e-06, "loss": 0.0043, "step": 324700 }, { "epoch": 30.31830486325026, "grad_norm": 1.2538617849349976, "learning_rate": 1.971850743477041e-06, "loss": 0.0045, "step": 324800 }, { "epoch": 30.327639316718006, "grad_norm": 0.07605201005935669, "learning_rate": 1.9709155522304312e-06, "loss": 0.0044, "step": 324900 }, { "epoch": 30.336973770185757, "grad_norm": 3.0305795669555664, "learning_rate": 1.9699803609838212e-06, "loss": 0.0043, "step": 325000 }, { "epoch": 30.336973770185757, "eval_accuracy": 0.6960793465577596, "eval_f1": 0.8273154327514778, "eval_loss": 0.31253781914711, "eval_roc_auc": 0.9144643747055012, "eval_runtime": 145.9639, "eval_samples_per_second": 293.566, "eval_steps_per_second": 293.566, "step": 325000 }, { "epoch": 30.346308223653505, "grad_norm": 4.464924335479736, "learning_rate": 1.9690451697372117e-06, "loss": 0.0043, "step": 325100 }, { "epoch": 30.355642677121253, "grad_norm": 3.914762020111084, "learning_rate": 1.9681099784906017e-06, "loss": 0.0055, "step": 325200 }, { "epoch": 30.364977130589004, "grad_norm": 0.15454542636871338, "learning_rate": 1.9671747872439918e-06, "loss": 0.0054, "step": 325300 }, { "epoch": 30.374311584056752, "grad_norm": 0.014207666739821434, "learning_rate": 1.9662395959973814e-06, "loss": 0.0054, "step": 325400 }, { "epoch": 30.383646037524503, "grad_norm": 0.3922434151172638, "learning_rate": 1.9653044047507714e-06, "loss": 0.0039, "step": 325500 }, { "epoch": 30.39298049099225, "grad_norm": 0.0012627443065866828, "learning_rate": 1.964369213504162e-06, "loss": 0.0061, "step": 325600 }, { "epoch": 30.402314944460002, "grad_norm": 0.8807443380355835, "learning_rate": 1.963434022257552e-06, "loss": 0.0069, "step": 325700 }, { "epoch": 30.41164939792775, "grad_norm": 0.07717331498861313, "learning_rate": 1.962498831010942e-06, "loss": 0.0049, "step": 325800 }, { "epoch": 30.4209838513955, "grad_norm": 0.21639160811901093, "learning_rate": 1.961563639764332e-06, "loss": 0.0067, "step": 325900 }, { "epoch": 30.43031830486325, "grad_norm": 0.0010936990147456527, "learning_rate": 1.960628448517722e-06, "loss": 0.0027, "step": 326000 }, { "epoch": 30.439652758331, "grad_norm": 3.430699586868286, "learning_rate": 1.959693257271112e-06, "loss": 0.0052, "step": 326100 }, { "epoch": 30.448987211798748, "grad_norm": 0.24424713850021362, "learning_rate": 1.9587580660245025e-06, "loss": 0.0047, "step": 326200 }, { "epoch": 30.4583216652665, "grad_norm": 1.3258169889450073, "learning_rate": 1.957822874777892e-06, "loss": 0.0029, "step": 326300 }, { "epoch": 30.467656118734247, "grad_norm": 0.020342634990811348, "learning_rate": 1.956887683531282e-06, "loss": 0.0059, "step": 326400 }, { "epoch": 30.476990572202, "grad_norm": 6.923943996429443, "learning_rate": 1.955952492284672e-06, "loss": 0.0045, "step": 326500 }, { "epoch": 30.486325025669746, "grad_norm": 0.4523909389972687, "learning_rate": 1.9550173010380626e-06, "loss": 0.0047, "step": 326600 }, { "epoch": 30.495659479137498, "grad_norm": 0.023104025050997734, "learning_rate": 1.9540821097914526e-06, "loss": 0.0046, "step": 326700 }, { "epoch": 30.504993932605245, "grad_norm": 6.053701400756836, "learning_rate": 1.9531469185448426e-06, "loss": 0.0057, "step": 326800 }, { "epoch": 30.514328386072997, "grad_norm": 1.9188600778579712, "learning_rate": 1.9522117272982327e-06, "loss": 0.0039, "step": 326900 }, { "epoch": 30.523662839540744, "grad_norm": 0.1304318904876709, "learning_rate": 1.9512765360516227e-06, "loss": 0.0061, "step": 327000 }, { "epoch": 30.532997293008496, "grad_norm": 0.07360488176345825, "learning_rate": 1.9503413448050127e-06, "loss": 0.0046, "step": 327100 }, { "epoch": 30.542331746476243, "grad_norm": 1.5855200290679932, "learning_rate": 1.9494061535584028e-06, "loss": 0.0031, "step": 327200 }, { "epoch": 30.551666199943995, "grad_norm": 3.299734592437744, "learning_rate": 1.948470962311793e-06, "loss": 0.0045, "step": 327300 }, { "epoch": 30.561000653411742, "grad_norm": 0.020937427878379822, "learning_rate": 1.947535771065183e-06, "loss": 0.0056, "step": 327400 }, { "epoch": 30.570335106879494, "grad_norm": 0.05343509465456009, "learning_rate": 1.946600579818573e-06, "loss": 0.0029, "step": 327500 }, { "epoch": 30.57966956034724, "grad_norm": 0.010110697709023952, "learning_rate": 1.9456653885719633e-06, "loss": 0.0037, "step": 327600 }, { "epoch": 30.589004013814993, "grad_norm": 5.407670974731445, "learning_rate": 1.9447301973253533e-06, "loss": 0.0044, "step": 327700 }, { "epoch": 30.59833846728274, "grad_norm": 0.05987139791250229, "learning_rate": 1.9437950060787434e-06, "loss": 0.0058, "step": 327800 }, { "epoch": 30.60767292075049, "grad_norm": 0.030794218182563782, "learning_rate": 1.9428598148321334e-06, "loss": 0.004, "step": 327900 }, { "epoch": 30.61700737421824, "grad_norm": 0.2874319851398468, "learning_rate": 1.9419246235855234e-06, "loss": 0.0041, "step": 328000 }, { "epoch": 30.626341827685987, "grad_norm": 0.6497468948364258, "learning_rate": 1.9409894323389135e-06, "loss": 0.0036, "step": 328100 }, { "epoch": 30.63567628115374, "grad_norm": 0.017703143879771233, "learning_rate": 1.9400542410923035e-06, "loss": 0.0033, "step": 328200 }, { "epoch": 30.645010734621486, "grad_norm": 1.2750481367111206, "learning_rate": 1.9391190498456935e-06, "loss": 0.0049, "step": 328300 }, { "epoch": 30.654345188089238, "grad_norm": 2.7820327281951904, "learning_rate": 1.9381838585990836e-06, "loss": 0.0044, "step": 328400 }, { "epoch": 30.663679641556985, "grad_norm": 0.6860239505767822, "learning_rate": 1.9372486673524736e-06, "loss": 0.0032, "step": 328500 }, { "epoch": 30.673014095024737, "grad_norm": 0.15677355229854584, "learning_rate": 1.936313476105864e-06, "loss": 0.0045, "step": 328600 }, { "epoch": 30.682348548492484, "grad_norm": 0.5964334011077881, "learning_rate": 1.935378284859254e-06, "loss": 0.0033, "step": 328700 }, { "epoch": 30.691683001960236, "grad_norm": 0.07900845259428024, "learning_rate": 1.934443093612644e-06, "loss": 0.004, "step": 328800 }, { "epoch": 30.701017455427984, "grad_norm": 3.466132402420044, "learning_rate": 1.933507902366034e-06, "loss": 0.0041, "step": 328900 }, { "epoch": 30.710351908895735, "grad_norm": 0.5290659070014954, "learning_rate": 1.932572711119424e-06, "loss": 0.0037, "step": 329000 }, { "epoch": 30.719686362363483, "grad_norm": 0.04495813697576523, "learning_rate": 1.931637519872814e-06, "loss": 0.0027, "step": 329100 }, { "epoch": 30.729020815831234, "grad_norm": 1.9895166158676147, "learning_rate": 1.9307023286262042e-06, "loss": 0.0048, "step": 329200 }, { "epoch": 30.73835526929898, "grad_norm": 2.0636987686157227, "learning_rate": 1.9297671373795943e-06, "loss": 0.0053, "step": 329300 }, { "epoch": 30.747689722766733, "grad_norm": 3.7392945289611816, "learning_rate": 1.9288319461329843e-06, "loss": 0.0045, "step": 329400 }, { "epoch": 30.75702417623448, "grad_norm": 3.6942334175109863, "learning_rate": 1.9278967548863743e-06, "loss": 0.0044, "step": 329500 }, { "epoch": 30.766358629702232, "grad_norm": 0.10097524523735046, "learning_rate": 1.9269615636397644e-06, "loss": 0.0059, "step": 329600 }, { "epoch": 30.77569308316998, "grad_norm": 0.11303795874118805, "learning_rate": 1.926026372393155e-06, "loss": 0.0037, "step": 329700 }, { "epoch": 30.78502753663773, "grad_norm": 0.7151175141334534, "learning_rate": 1.925091181146545e-06, "loss": 0.0058, "step": 329800 }, { "epoch": 30.79436199010548, "grad_norm": 0.09736522287130356, "learning_rate": 1.9241559898999344e-06, "loss": 0.0043, "step": 329900 }, { "epoch": 30.80369644357323, "grad_norm": 0.15018293261528015, "learning_rate": 1.9232207986533245e-06, "loss": 0.0051, "step": 330000 }, { "epoch": 30.80369644357323, "eval_accuracy": 0.6965460910151692, "eval_f1": 0.8251704411879947, "eval_loss": 0.31328779458999634, "eval_roc_auc": 0.9116060972052441, "eval_runtime": 145.7795, "eval_samples_per_second": 293.937, "eval_steps_per_second": 293.937, "step": 330000 }, { "epoch": 30.813030897040978, "grad_norm": 1.8686176538467407, "learning_rate": 1.922285607406715e-06, "loss": 0.0049, "step": 330100 }, { "epoch": 30.82236535050873, "grad_norm": 0.001644740579649806, "learning_rate": 1.921350416160105e-06, "loss": 0.0048, "step": 330200 }, { "epoch": 30.831699803976477, "grad_norm": 0.07855426520109177, "learning_rate": 1.920415224913495e-06, "loss": 0.0045, "step": 330300 }, { "epoch": 30.841034257444228, "grad_norm": 0.3060835003852844, "learning_rate": 1.919480033666885e-06, "loss": 0.0033, "step": 330400 }, { "epoch": 30.850368710911976, "grad_norm": 1.6703124046325684, "learning_rate": 1.918544842420275e-06, "loss": 0.0057, "step": 330500 }, { "epoch": 30.859703164379727, "grad_norm": 0.0077467989176511765, "learning_rate": 1.917609651173665e-06, "loss": 0.0071, "step": 330600 }, { "epoch": 30.869037617847475, "grad_norm": 3.311934232711792, "learning_rate": 1.9166744599270555e-06, "loss": 0.0035, "step": 330700 }, { "epoch": 30.878372071315226, "grad_norm": 0.004413304850459099, "learning_rate": 1.915739268680445e-06, "loss": 0.0072, "step": 330800 }, { "epoch": 30.887706524782974, "grad_norm": 0.4987945854663849, "learning_rate": 1.914804077433835e-06, "loss": 0.0049, "step": 330900 }, { "epoch": 30.897040978250722, "grad_norm": 0.09157902002334595, "learning_rate": 1.913868886187225e-06, "loss": 0.0048, "step": 331000 }, { "epoch": 30.906375431718473, "grad_norm": 0.011886524967849255, "learning_rate": 1.9129336949406157e-06, "loss": 0.0034, "step": 331100 }, { "epoch": 30.91570988518622, "grad_norm": 0.7323275208473206, "learning_rate": 1.9119985036940057e-06, "loss": 0.0047, "step": 331200 }, { "epoch": 30.925044338653972, "grad_norm": 0.005618801806122065, "learning_rate": 1.9110633124473957e-06, "loss": 0.0047, "step": 331300 }, { "epoch": 30.93437879212172, "grad_norm": 0.9849934577941895, "learning_rate": 1.9101281212007858e-06, "loss": 0.0046, "step": 331400 }, { "epoch": 30.94371324558947, "grad_norm": 0.04053429886698723, "learning_rate": 1.9091929299541758e-06, "loss": 0.0044, "step": 331500 }, { "epoch": 30.95304769905722, "grad_norm": 4.017446994781494, "learning_rate": 1.908257738707566e-06, "loss": 0.006, "step": 331600 }, { "epoch": 30.96238215252497, "grad_norm": 0.018305394798517227, "learning_rate": 1.9073225474609558e-06, "loss": 0.003, "step": 331700 }, { "epoch": 30.971716605992718, "grad_norm": 2.199390411376953, "learning_rate": 1.9063873562143459e-06, "loss": 0.0059, "step": 331800 }, { "epoch": 30.98105105946047, "grad_norm": 0.028205962851643562, "learning_rate": 1.905452164967736e-06, "loss": 0.0039, "step": 331900 }, { "epoch": 30.990385512928217, "grad_norm": 0.36713963747024536, "learning_rate": 1.9045169737211261e-06, "loss": 0.0055, "step": 332000 }, { "epoch": 30.99971996639597, "grad_norm": 0.42816323041915894, "learning_rate": 1.9035817824745162e-06, "loss": 0.0037, "step": 332100 }, { "epoch": 31.009054419863716, "grad_norm": 1.068349838256836, "learning_rate": 1.9026465912279062e-06, "loss": 0.0027, "step": 332200 }, { "epoch": 31.018388873331467, "grad_norm": 4.312209606170654, "learning_rate": 1.9017113999812965e-06, "loss": 0.0049, "step": 332300 }, { "epoch": 31.027723326799215, "grad_norm": 0.2818322777748108, "learning_rate": 1.9007762087346865e-06, "loss": 0.0037, "step": 332400 }, { "epoch": 31.037057780266966, "grad_norm": 0.00316547485999763, "learning_rate": 1.8998410174880765e-06, "loss": 0.0033, "step": 332500 }, { "epoch": 31.046392233734714, "grad_norm": 0.2758195400238037, "learning_rate": 1.8989058262414663e-06, "loss": 0.0038, "step": 332600 }, { "epoch": 31.055726687202466, "grad_norm": 2.130836248397827, "learning_rate": 1.8979706349948566e-06, "loss": 0.0024, "step": 332700 }, { "epoch": 31.065061140670213, "grad_norm": 0.08947693556547165, "learning_rate": 1.8970354437482466e-06, "loss": 0.0038, "step": 332800 }, { "epoch": 31.074395594137965, "grad_norm": 1.6380356550216675, "learning_rate": 1.8961002525016366e-06, "loss": 0.0045, "step": 332900 }, { "epoch": 31.083730047605712, "grad_norm": 0.8196345567703247, "learning_rate": 1.8951650612550269e-06, "loss": 0.004, "step": 333000 }, { "epoch": 31.093064501073464, "grad_norm": 0.17092812061309814, "learning_rate": 1.894229870008417e-06, "loss": 0.0047, "step": 333100 }, { "epoch": 31.10239895454121, "grad_norm": 0.0021082714665681124, "learning_rate": 1.893294678761807e-06, "loss": 0.0034, "step": 333200 }, { "epoch": 31.111733408008963, "grad_norm": 0.6645957827568054, "learning_rate": 1.8923594875151972e-06, "loss": 0.0034, "step": 333300 }, { "epoch": 31.12106786147671, "grad_norm": 0.016514675691723824, "learning_rate": 1.8914242962685872e-06, "loss": 0.0046, "step": 333400 }, { "epoch": 31.13040231494446, "grad_norm": 3.926057815551758, "learning_rate": 1.890489105021977e-06, "loss": 0.006, "step": 333500 }, { "epoch": 31.13973676841221, "grad_norm": 0.22652190923690796, "learning_rate": 1.889553913775367e-06, "loss": 0.0046, "step": 333600 }, { "epoch": 31.149071221879957, "grad_norm": 0.19263888895511627, "learning_rate": 1.8886187225287573e-06, "loss": 0.0049, "step": 333700 }, { "epoch": 31.15840567534771, "grad_norm": 1.8662669658660889, "learning_rate": 1.8876835312821473e-06, "loss": 0.0039, "step": 333800 }, { "epoch": 31.167740128815456, "grad_norm": 0.054613955318927765, "learning_rate": 1.8867483400355374e-06, "loss": 0.0028, "step": 333900 }, { "epoch": 31.177074582283208, "grad_norm": 0.36273062229156494, "learning_rate": 1.8858131487889276e-06, "loss": 0.0051, "step": 334000 }, { "epoch": 31.186409035750955, "grad_norm": 0.2153017520904541, "learning_rate": 1.8848779575423176e-06, "loss": 0.0032, "step": 334100 }, { "epoch": 31.195743489218707, "grad_norm": 0.27329394221305847, "learning_rate": 1.8839427662957077e-06, "loss": 0.0043, "step": 334200 }, { "epoch": 31.205077942686454, "grad_norm": 3.79518723487854, "learning_rate": 1.883007575049098e-06, "loss": 0.0032, "step": 334300 }, { "epoch": 31.214412396154206, "grad_norm": 0.5909180045127869, "learning_rate": 1.8820723838024877e-06, "loss": 0.0047, "step": 334400 }, { "epoch": 31.223746849621953, "grad_norm": 0.7096433043479919, "learning_rate": 1.8811371925558778e-06, "loss": 0.0033, "step": 334500 }, { "epoch": 31.233081303089705, "grad_norm": 0.49575918912887573, "learning_rate": 1.8802020013092678e-06, "loss": 0.0029, "step": 334600 }, { "epoch": 31.242415756557453, "grad_norm": 0.48446381092071533, "learning_rate": 1.879266810062658e-06, "loss": 0.0028, "step": 334700 }, { "epoch": 31.251750210025204, "grad_norm": 0.10462455451488495, "learning_rate": 1.878331618816048e-06, "loss": 0.0036, "step": 334800 }, { "epoch": 31.26108466349295, "grad_norm": 0.02086225338280201, "learning_rate": 1.877396427569438e-06, "loss": 0.0055, "step": 334900 }, { "epoch": 31.270419116960703, "grad_norm": 0.3209587037563324, "learning_rate": 1.8764612363228281e-06, "loss": 0.0049, "step": 335000 }, { "epoch": 31.270419116960703, "eval_accuracy": 0.697316219369895, "eval_f1": 0.825323047083184, "eval_loss": 0.31417304277420044, "eval_roc_auc": 0.9104968487383972, "eval_runtime": 146.2182, "eval_samples_per_second": 293.055, "eval_steps_per_second": 293.055, "step": 335000 }, { "epoch": 31.27975357042845, "grad_norm": 0.24615320563316345, "learning_rate": 1.8755260450762184e-06, "loss": 0.0036, "step": 335100 }, { "epoch": 31.289088023896202, "grad_norm": 3.3895957469940186, "learning_rate": 1.8745908538296084e-06, "loss": 0.0033, "step": 335200 }, { "epoch": 31.29842247736395, "grad_norm": 0.19832409918308258, "learning_rate": 1.8736556625829982e-06, "loss": 0.0048, "step": 335300 }, { "epoch": 31.3077569308317, "grad_norm": 4.210199356079102, "learning_rate": 1.8727204713363882e-06, "loss": 0.0043, "step": 335400 }, { "epoch": 31.31709138429945, "grad_norm": 3.6893272399902344, "learning_rate": 1.8717852800897785e-06, "loss": 0.0056, "step": 335500 }, { "epoch": 31.3264258377672, "grad_norm": 5.771296977996826, "learning_rate": 1.8708500888431685e-06, "loss": 0.0028, "step": 335600 }, { "epoch": 31.335760291234948, "grad_norm": 0.5880802869796753, "learning_rate": 1.8699148975965586e-06, "loss": 0.0047, "step": 335700 }, { "epoch": 31.3450947447027, "grad_norm": 1.8070284128189087, "learning_rate": 1.8689797063499488e-06, "loss": 0.0042, "step": 335800 }, { "epoch": 31.354429198170447, "grad_norm": 1.1830154657363892, "learning_rate": 1.8680445151033388e-06, "loss": 0.0037, "step": 335900 }, { "epoch": 31.363763651638198, "grad_norm": 3.039950132369995, "learning_rate": 1.8671093238567289e-06, "loss": 0.004, "step": 336000 }, { "epoch": 31.373098105105946, "grad_norm": 0.300476998090744, "learning_rate": 1.866174132610119e-06, "loss": 0.0034, "step": 336100 }, { "epoch": 31.382432558573697, "grad_norm": 3.094350814819336, "learning_rate": 1.865238941363509e-06, "loss": 0.0047, "step": 336200 }, { "epoch": 31.391767012041445, "grad_norm": 0.013184569776058197, "learning_rate": 1.864303750116899e-06, "loss": 0.0029, "step": 336300 }, { "epoch": 31.401101465509193, "grad_norm": 3.838836908340454, "learning_rate": 1.863368558870289e-06, "loss": 0.0056, "step": 336400 }, { "epoch": 31.410435918976944, "grad_norm": 0.37020471692085266, "learning_rate": 1.8624333676236792e-06, "loss": 0.0044, "step": 336500 }, { "epoch": 31.41977037244469, "grad_norm": 4.151675701141357, "learning_rate": 1.8614981763770693e-06, "loss": 0.0056, "step": 336600 }, { "epoch": 31.429104825912443, "grad_norm": 2.467414379119873, "learning_rate": 1.8605629851304593e-06, "loss": 0.0062, "step": 336700 }, { "epoch": 31.43843927938019, "grad_norm": 0.17749421298503876, "learning_rate": 1.8596277938838495e-06, "loss": 0.0069, "step": 336800 }, { "epoch": 31.447773732847942, "grad_norm": 2.4016919136047363, "learning_rate": 1.8586926026372396e-06, "loss": 0.0042, "step": 336900 }, { "epoch": 31.45710818631569, "grad_norm": 0.7574518322944641, "learning_rate": 1.8577574113906296e-06, "loss": 0.0028, "step": 337000 }, { "epoch": 31.46644263978344, "grad_norm": 3.0236876010894775, "learning_rate": 1.8568222201440194e-06, "loss": 0.0041, "step": 337100 }, { "epoch": 31.47577709325119, "grad_norm": 0.2764246165752411, "learning_rate": 1.8558870288974096e-06, "loss": 0.0041, "step": 337200 }, { "epoch": 31.48511154671894, "grad_norm": 0.0027712825685739517, "learning_rate": 1.8549518376507997e-06, "loss": 0.0034, "step": 337300 }, { "epoch": 31.494446000186688, "grad_norm": 0.014547038823366165, "learning_rate": 1.8540166464041897e-06, "loss": 0.0038, "step": 337400 }, { "epoch": 31.50378045365444, "grad_norm": 3.527279853820801, "learning_rate": 1.85308145515758e-06, "loss": 0.0023, "step": 337500 }, { "epoch": 31.513114907122187, "grad_norm": 3.210984706878662, "learning_rate": 1.85214626391097e-06, "loss": 0.0041, "step": 337600 }, { "epoch": 31.52244936058994, "grad_norm": 0.1559065282344818, "learning_rate": 1.85121107266436e-06, "loss": 0.0031, "step": 337700 }, { "epoch": 31.531783814057686, "grad_norm": 0.05127463489770889, "learning_rate": 1.8502758814177503e-06, "loss": 0.0039, "step": 337800 }, { "epoch": 31.541118267525437, "grad_norm": 0.038934193551540375, "learning_rate": 1.8493406901711403e-06, "loss": 0.0035, "step": 337900 }, { "epoch": 31.550452720993185, "grad_norm": 0.005585892591625452, "learning_rate": 1.84840549892453e-06, "loss": 0.0057, "step": 338000 }, { "epoch": 31.559787174460936, "grad_norm": 3.614647626876831, "learning_rate": 1.8474703076779201e-06, "loss": 0.0036, "step": 338100 }, { "epoch": 31.569121627928684, "grad_norm": 0.018496859818696976, "learning_rate": 1.8465351164313104e-06, "loss": 0.0046, "step": 338200 }, { "epoch": 31.578456081396435, "grad_norm": 0.019740255549550056, "learning_rate": 1.8455999251847004e-06, "loss": 0.004, "step": 338300 }, { "epoch": 31.587790534864183, "grad_norm": 0.8315768837928772, "learning_rate": 1.8446647339380904e-06, "loss": 0.0047, "step": 338400 }, { "epoch": 31.597124988331935, "grad_norm": 1.4443250894546509, "learning_rate": 1.8437295426914807e-06, "loss": 0.0034, "step": 338500 }, { "epoch": 31.606459441799682, "grad_norm": 1.5513696670532227, "learning_rate": 1.8427943514448707e-06, "loss": 0.0043, "step": 338600 }, { "epoch": 31.615793895267434, "grad_norm": 4.900601863861084, "learning_rate": 1.8418591601982607e-06, "loss": 0.0043, "step": 338700 }, { "epoch": 31.62512834873518, "grad_norm": 2.9184958934783936, "learning_rate": 1.8409239689516508e-06, "loss": 0.0049, "step": 338800 }, { "epoch": 31.634462802202933, "grad_norm": 0.0016648268792778254, "learning_rate": 1.8399887777050408e-06, "loss": 0.0031, "step": 338900 }, { "epoch": 31.64379725567068, "grad_norm": 1.7647099494934082, "learning_rate": 1.8390535864584308e-06, "loss": 0.0029, "step": 339000 }, { "epoch": 31.65313170913843, "grad_norm": 0.11185863614082336, "learning_rate": 1.8381183952118209e-06, "loss": 0.0051, "step": 339100 }, { "epoch": 31.66246616260618, "grad_norm": 1.8889985084533691, "learning_rate": 1.8371832039652109e-06, "loss": 0.0049, "step": 339200 }, { "epoch": 31.671800616073927, "grad_norm": 0.11619017273187637, "learning_rate": 1.8362480127186011e-06, "loss": 0.0038, "step": 339300 }, { "epoch": 31.68113506954168, "grad_norm": 2.0771706104278564, "learning_rate": 1.8353128214719912e-06, "loss": 0.0034, "step": 339400 }, { "epoch": 31.690469523009426, "grad_norm": 0.40564650297164917, "learning_rate": 1.8343776302253812e-06, "loss": 0.0037, "step": 339500 }, { "epoch": 31.699803976477178, "grad_norm": 0.14088594913482666, "learning_rate": 1.8334424389787714e-06, "loss": 0.0027, "step": 339600 }, { "epoch": 31.709138429944925, "grad_norm": 7.626332759857178, "learning_rate": 1.8325072477321615e-06, "loss": 0.004, "step": 339700 }, { "epoch": 31.718472883412677, "grad_norm": 0.1155967265367508, "learning_rate": 1.8315720564855513e-06, "loss": 0.0039, "step": 339800 }, { "epoch": 31.727807336880424, "grad_norm": 0.2470710426568985, "learning_rate": 1.8306368652389413e-06, "loss": 0.0038, "step": 339900 }, { "epoch": 31.737141790348176, "grad_norm": 1.5942336320877075, "learning_rate": 1.8297016739923316e-06, "loss": 0.0027, "step": 340000 }, { "epoch": 31.737141790348176, "eval_accuracy": 0.701026837806301, "eval_f1": 0.8268933810626798, "eval_loss": 0.3209822177886963, "eval_roc_auc": 0.9099266588085887, "eval_runtime": 145.3481, "eval_samples_per_second": 294.809, "eval_steps_per_second": 294.809, "step": 340000 }, { "epoch": 31.746476243815923, "grad_norm": 0.3305841088294983, "learning_rate": 1.8287664827457216e-06, "loss": 0.004, "step": 340100 }, { "epoch": 31.755810697283675, "grad_norm": 0.02020084299147129, "learning_rate": 1.8278312914991116e-06, "loss": 0.0039, "step": 340200 }, { "epoch": 31.765145150751422, "grad_norm": 0.7214512228965759, "learning_rate": 1.8268961002525019e-06, "loss": 0.0038, "step": 340300 }, { "epoch": 31.774479604219174, "grad_norm": 0.2579055428504944, "learning_rate": 1.825960909005892e-06, "loss": 0.004, "step": 340400 }, { "epoch": 31.78381405768692, "grad_norm": 0.17052970826625824, "learning_rate": 1.825025717759282e-06, "loss": 0.0033, "step": 340500 }, { "epoch": 31.793148511154673, "grad_norm": 0.06051759049296379, "learning_rate": 1.8240905265126722e-06, "loss": 0.0046, "step": 340600 }, { "epoch": 31.80248296462242, "grad_norm": 2.142622947692871, "learning_rate": 1.823155335266062e-06, "loss": 0.0033, "step": 340700 }, { "epoch": 31.811817418090172, "grad_norm": 0.04973688721656799, "learning_rate": 1.822220144019452e-06, "loss": 0.0033, "step": 340800 }, { "epoch": 31.82115187155792, "grad_norm": 0.46899083256721497, "learning_rate": 1.821284952772842e-06, "loss": 0.0028, "step": 340900 }, { "epoch": 31.83048632502567, "grad_norm": 2.505326509475708, "learning_rate": 1.8203497615262323e-06, "loss": 0.0043, "step": 341000 }, { "epoch": 31.83982077849342, "grad_norm": 2.6519618034362793, "learning_rate": 1.8194145702796223e-06, "loss": 0.0043, "step": 341100 }, { "epoch": 31.84915523196117, "grad_norm": 0.04137295112013817, "learning_rate": 1.8184793790330124e-06, "loss": 0.0037, "step": 341200 }, { "epoch": 31.858489685428918, "grad_norm": 0.05203774943947792, "learning_rate": 1.8175441877864026e-06, "loss": 0.0042, "step": 341300 }, { "epoch": 31.86782413889667, "grad_norm": 0.3553411364555359, "learning_rate": 1.8166089965397926e-06, "loss": 0.0035, "step": 341400 }, { "epoch": 31.877158592364417, "grad_norm": 1.1072617769241333, "learning_rate": 1.8156738052931827e-06, "loss": 0.0063, "step": 341500 }, { "epoch": 31.886493045832168, "grad_norm": 5.882597923278809, "learning_rate": 1.814738614046573e-06, "loss": 0.0046, "step": 341600 }, { "epoch": 31.895827499299916, "grad_norm": 0.2826109826564789, "learning_rate": 1.8138034227999627e-06, "loss": 0.0047, "step": 341700 }, { "epoch": 31.905161952767667, "grad_norm": 0.035021472722291946, "learning_rate": 1.8128682315533527e-06, "loss": 0.0041, "step": 341800 }, { "epoch": 31.914496406235415, "grad_norm": 0.07684609293937683, "learning_rate": 1.8119330403067428e-06, "loss": 0.004, "step": 341900 }, { "epoch": 31.923830859703166, "grad_norm": 0.11821316927671432, "learning_rate": 1.810997849060133e-06, "loss": 0.0037, "step": 342000 }, { "epoch": 31.933165313170914, "grad_norm": 0.06765300035476685, "learning_rate": 1.810062657813523e-06, "loss": 0.0025, "step": 342100 }, { "epoch": 31.94249976663866, "grad_norm": 0.0035392185673117638, "learning_rate": 1.809127466566913e-06, "loss": 0.0042, "step": 342200 }, { "epoch": 31.951834220106413, "grad_norm": 1.9289417266845703, "learning_rate": 1.8081922753203033e-06, "loss": 0.0047, "step": 342300 }, { "epoch": 31.96116867357416, "grad_norm": 0.04285493493080139, "learning_rate": 1.8072570840736934e-06, "loss": 0.0035, "step": 342400 }, { "epoch": 31.970503127041912, "grad_norm": 0.009150183759629726, "learning_rate": 1.8063218928270834e-06, "loss": 0.0026, "step": 342500 }, { "epoch": 31.97983758050966, "grad_norm": 0.018499059602618217, "learning_rate": 1.8053867015804732e-06, "loss": 0.0036, "step": 342600 }, { "epoch": 31.98917203397741, "grad_norm": 0.06568681448698044, "learning_rate": 1.8044515103338634e-06, "loss": 0.0041, "step": 342700 }, { "epoch": 31.99850648744516, "grad_norm": 1.7290881872177124, "learning_rate": 1.8035163190872535e-06, "loss": 0.0046, "step": 342800 }, { "epoch": 32.00784094091291, "grad_norm": 3.1394171714782715, "learning_rate": 1.8025811278406435e-06, "loss": 0.0025, "step": 342900 }, { "epoch": 32.01717539438066, "grad_norm": 0.186242014169693, "learning_rate": 1.8016459365940335e-06, "loss": 0.004, "step": 343000 }, { "epoch": 32.02650984784841, "grad_norm": 0.09804253280162811, "learning_rate": 1.8007107453474238e-06, "loss": 0.0048, "step": 343100 }, { "epoch": 32.03584430131616, "grad_norm": 1.5334669351577759, "learning_rate": 1.7997755541008138e-06, "loss": 0.0048, "step": 343200 }, { "epoch": 32.045178754783905, "grad_norm": 0.32666897773742676, "learning_rate": 1.7988403628542038e-06, "loss": 0.0035, "step": 343300 }, { "epoch": 32.054513208251656, "grad_norm": 0.016940448433160782, "learning_rate": 1.797905171607594e-06, "loss": 0.003, "step": 343400 }, { "epoch": 32.06384766171941, "grad_norm": 5.3478169441223145, "learning_rate": 1.796969980360984e-06, "loss": 0.0035, "step": 343500 }, { "epoch": 32.07318211518716, "grad_norm": 0.05238497629761696, "learning_rate": 1.796034789114374e-06, "loss": 0.0027, "step": 343600 }, { "epoch": 32.0825165686549, "grad_norm": 0.7197490334510803, "learning_rate": 1.795099597867764e-06, "loss": 0.0049, "step": 343700 }, { "epoch": 32.091851022122654, "grad_norm": 0.2311205416917801, "learning_rate": 1.7941644066211542e-06, "loss": 0.0033, "step": 343800 }, { "epoch": 32.101185475590405, "grad_norm": 0.4390159547328949, "learning_rate": 1.7932292153745442e-06, "loss": 0.0042, "step": 343900 }, { "epoch": 32.11051992905816, "grad_norm": 5.225611686706543, "learning_rate": 1.7922940241279343e-06, "loss": 0.0053, "step": 344000 }, { "epoch": 32.1198543825259, "grad_norm": 0.18093782663345337, "learning_rate": 1.7913588328813245e-06, "loss": 0.0058, "step": 344100 }, { "epoch": 32.12918883599365, "grad_norm": 0.3424740433692932, "learning_rate": 1.7904236416347145e-06, "loss": 0.0031, "step": 344200 }, { "epoch": 32.1385232894614, "grad_norm": 1.9656490087509155, "learning_rate": 1.7894884503881046e-06, "loss": 0.0038, "step": 344300 }, { "epoch": 32.147857742929155, "grad_norm": 3.597606897354126, "learning_rate": 1.7885532591414944e-06, "loss": 0.0051, "step": 344400 }, { "epoch": 32.1571921963969, "grad_norm": 0.2601391673088074, "learning_rate": 1.7876180678948846e-06, "loss": 0.0041, "step": 344500 }, { "epoch": 32.16652664986465, "grad_norm": 0.0034588014241307974, "learning_rate": 1.7866828766482747e-06, "loss": 0.005, "step": 344600 }, { "epoch": 32.1758611033324, "grad_norm": 2.743818998336792, "learning_rate": 1.7857476854016647e-06, "loss": 0.0036, "step": 344700 }, { "epoch": 32.18519555680015, "grad_norm": 0.023326532915234566, "learning_rate": 1.784812494155055e-06, "loss": 0.003, "step": 344800 }, { "epoch": 32.1945300102679, "grad_norm": 0.01151767373085022, "learning_rate": 1.783877302908445e-06, "loss": 0.0044, "step": 344900 }, { "epoch": 32.20386446373565, "grad_norm": 0.20107083022594452, "learning_rate": 1.782942111661835e-06, "loss": 0.0037, "step": 345000 }, { "epoch": 32.20386446373565, "eval_accuracy": 0.6985297549591598, "eval_f1": 0.8274859654467123, "eval_loss": 0.3212384283542633, "eval_roc_auc": 0.9130827342388943, "eval_runtime": 145.6332, "eval_samples_per_second": 294.232, "eval_steps_per_second": 294.232, "step": 345000 }, { "epoch": 32.2131989172034, "grad_norm": 0.07210194319486618, "learning_rate": 1.7820069204152252e-06, "loss": 0.004, "step": 345100 }, { "epoch": 32.222533370671144, "grad_norm": 1.3721027374267578, "learning_rate": 1.7810717291686153e-06, "loss": 0.0034, "step": 345200 }, { "epoch": 32.231867824138895, "grad_norm": 0.13639846444129944, "learning_rate": 1.780136537922005e-06, "loss": 0.0046, "step": 345300 }, { "epoch": 32.24120227760665, "grad_norm": 5.191885471343994, "learning_rate": 1.7792013466753951e-06, "loss": 0.0024, "step": 345400 }, { "epoch": 32.2505367310744, "grad_norm": 0.04711063206195831, "learning_rate": 1.7782661554287854e-06, "loss": 0.0027, "step": 345500 }, { "epoch": 32.25987118454214, "grad_norm": 0.04315119609236717, "learning_rate": 1.7773309641821754e-06, "loss": 0.005, "step": 345600 }, { "epoch": 32.26920563800989, "grad_norm": 4.322885036468506, "learning_rate": 1.7763957729355654e-06, "loss": 0.004, "step": 345700 }, { "epoch": 32.278540091477645, "grad_norm": 0.03157166391611099, "learning_rate": 1.7754605816889557e-06, "loss": 0.0038, "step": 345800 }, { "epoch": 32.287874544945396, "grad_norm": 0.05637924373149872, "learning_rate": 1.7745253904423457e-06, "loss": 0.0039, "step": 345900 }, { "epoch": 32.29720899841314, "grad_norm": 3.882502794265747, "learning_rate": 1.7735901991957357e-06, "loss": 0.0058, "step": 346000 }, { "epoch": 32.30654345188089, "grad_norm": 0.05365987494587898, "learning_rate": 1.772655007949126e-06, "loss": 0.0055, "step": 346100 }, { "epoch": 32.31587790534864, "grad_norm": 3.6942784786224365, "learning_rate": 1.7717198167025158e-06, "loss": 0.0041, "step": 346200 }, { "epoch": 32.325212358816394, "grad_norm": 0.22445116937160492, "learning_rate": 1.7707846254559058e-06, "loss": 0.0051, "step": 346300 }, { "epoch": 32.33454681228414, "grad_norm": 9.250184059143066, "learning_rate": 1.7698494342092959e-06, "loss": 0.0023, "step": 346400 }, { "epoch": 32.34388126575189, "grad_norm": 0.557424008846283, "learning_rate": 1.768914242962686e-06, "loss": 0.0029, "step": 346500 }, { "epoch": 32.35321571921964, "grad_norm": 0.0061666397377848625, "learning_rate": 1.7679790517160761e-06, "loss": 0.0036, "step": 346600 }, { "epoch": 32.36255017268739, "grad_norm": 0.005484063643962145, "learning_rate": 1.7670438604694662e-06, "loss": 0.0034, "step": 346700 }, { "epoch": 32.371884626155136, "grad_norm": 0.3039393424987793, "learning_rate": 1.7661086692228562e-06, "loss": 0.0031, "step": 346800 }, { "epoch": 32.38121907962289, "grad_norm": 0.04563762620091438, "learning_rate": 1.7651734779762464e-06, "loss": 0.0024, "step": 346900 }, { "epoch": 32.39055353309064, "grad_norm": 0.07914496213197708, "learning_rate": 1.7642382867296365e-06, "loss": 0.0039, "step": 347000 }, { "epoch": 32.39988798655839, "grad_norm": 1.5233699083328247, "learning_rate": 1.7633030954830263e-06, "loss": 0.0041, "step": 347100 }, { "epoch": 32.409222440026134, "grad_norm": 0.15725432336330414, "learning_rate": 1.7623679042364163e-06, "loss": 0.0034, "step": 347200 }, { "epoch": 32.418556893493886, "grad_norm": 0.5217418074607849, "learning_rate": 1.7614327129898066e-06, "loss": 0.0038, "step": 347300 }, { "epoch": 32.42789134696164, "grad_norm": 3.404552459716797, "learning_rate": 1.7604975217431966e-06, "loss": 0.0038, "step": 347400 }, { "epoch": 32.43722580042939, "grad_norm": 0.713662326335907, "learning_rate": 1.7595623304965866e-06, "loss": 0.0038, "step": 347500 }, { "epoch": 32.44656025389713, "grad_norm": 0.20974105596542358, "learning_rate": 1.7586271392499769e-06, "loss": 0.0042, "step": 347600 }, { "epoch": 32.455894707364884, "grad_norm": 5.105913162231445, "learning_rate": 1.7576919480033669e-06, "loss": 0.0053, "step": 347700 }, { "epoch": 32.465229160832635, "grad_norm": 0.68758624792099, "learning_rate": 1.756756756756757e-06, "loss": 0.0047, "step": 347800 }, { "epoch": 32.47456361430038, "grad_norm": 0.4017389118671417, "learning_rate": 1.7558215655101472e-06, "loss": 0.0031, "step": 347900 }, { "epoch": 32.48389806776813, "grad_norm": 0.020316680893301964, "learning_rate": 1.754886374263537e-06, "loss": 0.0026, "step": 348000 }, { "epoch": 32.49323252123588, "grad_norm": 0.024702126160264015, "learning_rate": 1.753951183016927e-06, "loss": 0.0041, "step": 348100 }, { "epoch": 32.50256697470363, "grad_norm": 0.450163871049881, "learning_rate": 1.753015991770317e-06, "loss": 0.0031, "step": 348200 }, { "epoch": 32.51190142817138, "grad_norm": 0.0833345353603363, "learning_rate": 1.7520808005237073e-06, "loss": 0.0045, "step": 348300 }, { "epoch": 32.52123588163913, "grad_norm": 2.137829065322876, "learning_rate": 1.7511456092770973e-06, "loss": 0.0055, "step": 348400 }, { "epoch": 32.53057033510688, "grad_norm": 0.059810835868120193, "learning_rate": 1.7502104180304873e-06, "loss": 0.0034, "step": 348500 }, { "epoch": 32.53990478857463, "grad_norm": 0.028466369956731796, "learning_rate": 1.7492752267838776e-06, "loss": 0.0035, "step": 348600 }, { "epoch": 32.549239242042376, "grad_norm": 1.3755356073379517, "learning_rate": 1.7483400355372676e-06, "loss": 0.0036, "step": 348700 }, { "epoch": 32.55857369551013, "grad_norm": 0.33889907598495483, "learning_rate": 1.7474048442906576e-06, "loss": 0.0034, "step": 348800 }, { "epoch": 32.56790814897788, "grad_norm": 0.6944507956504822, "learning_rate": 1.7464696530440475e-06, "loss": 0.004, "step": 348900 }, { "epoch": 32.57724260244563, "grad_norm": 2.6014821529388428, "learning_rate": 1.7455344617974377e-06, "loss": 0.0042, "step": 349000 }, { "epoch": 32.586577055913374, "grad_norm": 0.07239403575658798, "learning_rate": 1.7445992705508277e-06, "loss": 0.0045, "step": 349100 }, { "epoch": 32.595911509381125, "grad_norm": 1.5216026306152344, "learning_rate": 1.7436640793042178e-06, "loss": 0.005, "step": 349200 }, { "epoch": 32.605245962848876, "grad_norm": 0.04123558849096298, "learning_rate": 1.742728888057608e-06, "loss": 0.0049, "step": 349300 }, { "epoch": 32.61458041631663, "grad_norm": 0.25729554891586304, "learning_rate": 1.741793696810998e-06, "loss": 0.0031, "step": 349400 }, { "epoch": 32.62391486978437, "grad_norm": 0.1865082085132599, "learning_rate": 1.740858505564388e-06, "loss": 0.0028, "step": 349500 }, { "epoch": 32.63324932325212, "grad_norm": 0.003952524159103632, "learning_rate": 1.7399233143177783e-06, "loss": 0.0038, "step": 349600 }, { "epoch": 32.642583776719874, "grad_norm": 0.19912201166152954, "learning_rate": 1.7389881230711683e-06, "loss": 0.0035, "step": 349700 }, { "epoch": 32.651918230187626, "grad_norm": 5.538145065307617, "learning_rate": 1.7380529318245582e-06, "loss": 0.0059, "step": 349800 }, { "epoch": 32.66125268365537, "grad_norm": 2.6367604732513428, "learning_rate": 1.7371177405779482e-06, "loss": 0.0042, "step": 349900 }, { "epoch": 32.67058713712312, "grad_norm": 1.1233738660812378, "learning_rate": 1.7361825493313384e-06, "loss": 0.0027, "step": 350000 }, { "epoch": 32.67058713712312, "eval_accuracy": 0.699416569428238, "eval_f1": 0.8270961297501905, "eval_loss": 0.32311293482780457, "eval_roc_auc": 0.9105052590962154, "eval_runtime": 145.8082, "eval_samples_per_second": 293.879, "eval_steps_per_second": 293.879, "step": 350000 }, { "epoch": 32.67992159059087, "grad_norm": 0.25093314051628113, "learning_rate": 1.7352473580847285e-06, "loss": 0.0025, "step": 350100 }, { "epoch": 32.689256044058624, "grad_norm": 0.9534094333648682, "learning_rate": 1.7343121668381185e-06, "loss": 0.0047, "step": 350200 }, { "epoch": 32.69859049752637, "grad_norm": 3.5281145572662354, "learning_rate": 1.7333769755915087e-06, "loss": 0.0047, "step": 350300 }, { "epoch": 32.70792495099412, "grad_norm": 2.768843173980713, "learning_rate": 1.7324417843448988e-06, "loss": 0.0055, "step": 350400 }, { "epoch": 32.71725940446187, "grad_norm": 0.009961501695215702, "learning_rate": 1.7315065930982888e-06, "loss": 0.0031, "step": 350500 }, { "epoch": 32.726593857929615, "grad_norm": 1.1560730934143066, "learning_rate": 1.7305714018516788e-06, "loss": 0.0033, "step": 350600 }, { "epoch": 32.735928311397366, "grad_norm": 0.048988547176122665, "learning_rate": 1.7296362106050687e-06, "loss": 0.0044, "step": 350700 }, { "epoch": 32.74526276486512, "grad_norm": 0.32270127534866333, "learning_rate": 1.728701019358459e-06, "loss": 0.004, "step": 350800 }, { "epoch": 32.75459721833287, "grad_norm": 2.504103899002075, "learning_rate": 1.727765828111849e-06, "loss": 0.0038, "step": 350900 }, { "epoch": 32.76393167180061, "grad_norm": 5.2609076499938965, "learning_rate": 1.726830636865239e-06, "loss": 0.0044, "step": 351000 }, { "epoch": 32.773266125268364, "grad_norm": 0.005890487227588892, "learning_rate": 1.7258954456186292e-06, "loss": 0.0042, "step": 351100 }, { "epoch": 32.782600578736115, "grad_norm": 0.35102900862693787, "learning_rate": 1.7249602543720192e-06, "loss": 0.0038, "step": 351200 }, { "epoch": 32.79193503220387, "grad_norm": 0.16333197057247162, "learning_rate": 1.7240250631254093e-06, "loss": 0.0032, "step": 351300 }, { "epoch": 32.80126948567161, "grad_norm": 0.10082226246595383, "learning_rate": 1.7230898718787995e-06, "loss": 0.0037, "step": 351400 }, { "epoch": 32.81060393913936, "grad_norm": 10.857118606567383, "learning_rate": 1.7221546806321895e-06, "loss": 0.0045, "step": 351500 }, { "epoch": 32.81993839260711, "grad_norm": 0.011608061380684376, "learning_rate": 1.7212194893855794e-06, "loss": 0.004, "step": 351600 }, { "epoch": 32.829272846074865, "grad_norm": 0.006057558115571737, "learning_rate": 1.7202842981389694e-06, "loss": 0.0031, "step": 351700 }, { "epoch": 32.83860729954261, "grad_norm": 0.168789342045784, "learning_rate": 1.7193491068923596e-06, "loss": 0.0042, "step": 351800 }, { "epoch": 32.84794175301036, "grad_norm": 0.7468329071998596, "learning_rate": 1.7184139156457497e-06, "loss": 0.0044, "step": 351900 }, { "epoch": 32.85727620647811, "grad_norm": 0.25371718406677246, "learning_rate": 1.7174787243991397e-06, "loss": 0.0032, "step": 352000 }, { "epoch": 32.86661065994586, "grad_norm": 0.5306461453437805, "learning_rate": 1.71654353315253e-06, "loss": 0.0029, "step": 352100 }, { "epoch": 32.87594511341361, "grad_norm": 0.06928067654371262, "learning_rate": 1.71560834190592e-06, "loss": 0.0047, "step": 352200 }, { "epoch": 32.88527956688136, "grad_norm": 0.11369329690933228, "learning_rate": 1.71467315065931e-06, "loss": 0.0042, "step": 352300 }, { "epoch": 32.89461402034911, "grad_norm": 0.04897873103618622, "learning_rate": 1.7137379594127002e-06, "loss": 0.0045, "step": 352400 }, { "epoch": 32.90394847381686, "grad_norm": 1.3871079683303833, "learning_rate": 1.71280276816609e-06, "loss": 0.0046, "step": 352500 }, { "epoch": 32.913282927284605, "grad_norm": 0.4510086178779602, "learning_rate": 1.71186757691948e-06, "loss": 0.0053, "step": 352600 }, { "epoch": 32.92261738075236, "grad_norm": 0.657210648059845, "learning_rate": 1.7109323856728701e-06, "loss": 0.0035, "step": 352700 }, { "epoch": 32.93195183422011, "grad_norm": 0.3591620624065399, "learning_rate": 1.7099971944262604e-06, "loss": 0.0033, "step": 352800 }, { "epoch": 32.94128628768786, "grad_norm": 0.8995446562767029, "learning_rate": 1.7090620031796504e-06, "loss": 0.0049, "step": 352900 }, { "epoch": 32.9506207411556, "grad_norm": 1.33119797706604, "learning_rate": 1.7081268119330404e-06, "loss": 0.0029, "step": 353000 }, { "epoch": 32.959955194623355, "grad_norm": 0.21145664155483246, "learning_rate": 1.7071916206864307e-06, "loss": 0.0049, "step": 353100 }, { "epoch": 32.969289648091106, "grad_norm": 1.7632379531860352, "learning_rate": 1.7062564294398207e-06, "loss": 0.0028, "step": 353200 }, { "epoch": 32.97862410155885, "grad_norm": 6.6991963386535645, "learning_rate": 1.7053212381932107e-06, "loss": 0.0035, "step": 353300 }, { "epoch": 32.9879585550266, "grad_norm": 0.2982717752456665, "learning_rate": 1.7043860469466005e-06, "loss": 0.0021, "step": 353400 }, { "epoch": 32.99729300849435, "grad_norm": 0.43078896403312683, "learning_rate": 1.7034508556999908e-06, "loss": 0.0049, "step": 353500 }, { "epoch": 33.006627461962104, "grad_norm": 6.239160537719727, "learning_rate": 1.7025156644533808e-06, "loss": 0.0048, "step": 353600 }, { "epoch": 33.01596191542985, "grad_norm": 0.015027749352157116, "learning_rate": 1.7015804732067708e-06, "loss": 0.003, "step": 353700 }, { "epoch": 33.0252963688976, "grad_norm": 0.23018407821655273, "learning_rate": 1.700645281960161e-06, "loss": 0.0028, "step": 353800 }, { "epoch": 33.03463082236535, "grad_norm": 0.1484295278787613, "learning_rate": 1.6997100907135511e-06, "loss": 0.0036, "step": 353900 }, { "epoch": 33.0439652758331, "grad_norm": 1.0197677612304688, "learning_rate": 1.6987748994669411e-06, "loss": 0.0032, "step": 354000 }, { "epoch": 33.053299729300846, "grad_norm": 0.3479684889316559, "learning_rate": 1.6978397082203314e-06, "loss": 0.003, "step": 354100 }, { "epoch": 33.0626341827686, "grad_norm": 0.05462948977947235, "learning_rate": 1.6969045169737214e-06, "loss": 0.0035, "step": 354200 }, { "epoch": 33.07196863623635, "grad_norm": 0.15339802205562592, "learning_rate": 1.6959693257271112e-06, "loss": 0.0041, "step": 354300 }, { "epoch": 33.0813030897041, "grad_norm": 0.12501871585845947, "learning_rate": 1.6950341344805013e-06, "loss": 0.0041, "step": 354400 }, { "epoch": 33.090637543171844, "grad_norm": 0.015072465874254704, "learning_rate": 1.6940989432338913e-06, "loss": 0.0017, "step": 354500 }, { "epoch": 33.099971996639596, "grad_norm": 0.325387179851532, "learning_rate": 1.6931637519872815e-06, "loss": 0.0039, "step": 354600 }, { "epoch": 33.10930645010735, "grad_norm": 0.0011479702079668641, "learning_rate": 1.6922285607406716e-06, "loss": 0.0029, "step": 354700 }, { "epoch": 33.1186409035751, "grad_norm": 1.917184591293335, "learning_rate": 1.6912933694940616e-06, "loss": 0.0032, "step": 354800 }, { "epoch": 33.12797535704284, "grad_norm": 0.006843049544841051, "learning_rate": 1.6903581782474518e-06, "loss": 0.0033, "step": 354900 }, { "epoch": 33.137309810510594, "grad_norm": 0.08334121108055115, "learning_rate": 1.6894229870008419e-06, "loss": 0.0035, "step": 355000 }, { "epoch": 33.137309810510594, "eval_accuracy": 0.6989964994165694, "eval_f1": 0.8259268952281252, "eval_loss": 0.3267640173435211, "eval_roc_auc": 0.9113728853261186, "eval_runtime": 145.6724, "eval_samples_per_second": 294.153, "eval_steps_per_second": 294.153, "step": 355000 }, { "epoch": 33.146644263978345, "grad_norm": 0.645178496837616, "learning_rate": 1.688487795754232e-06, "loss": 0.0022, "step": 355100 }, { "epoch": 33.1559787174461, "grad_norm": 2.4388413429260254, "learning_rate": 1.6875526045076217e-06, "loss": 0.0037, "step": 355200 }, { "epoch": 33.16531317091384, "grad_norm": 0.003500531194731593, "learning_rate": 1.686617413261012e-06, "loss": 0.0025, "step": 355300 }, { "epoch": 33.17464762438159, "grad_norm": 4.010622978210449, "learning_rate": 1.685682222014402e-06, "loss": 0.0033, "step": 355400 }, { "epoch": 33.18398207784934, "grad_norm": 0.040682367980480194, "learning_rate": 1.684747030767792e-06, "loss": 0.0043, "step": 355500 }, { "epoch": 33.193316531317095, "grad_norm": 0.2328370213508606, "learning_rate": 1.6838118395211823e-06, "loss": 0.0027, "step": 355600 }, { "epoch": 33.20265098478484, "grad_norm": 0.0021440850105136633, "learning_rate": 1.6828766482745723e-06, "loss": 0.0036, "step": 355700 }, { "epoch": 33.21198543825259, "grad_norm": 0.1232384443283081, "learning_rate": 1.6819414570279623e-06, "loss": 0.0037, "step": 355800 }, { "epoch": 33.22131989172034, "grad_norm": 1.1601970195770264, "learning_rate": 1.6810062657813526e-06, "loss": 0.002, "step": 355900 }, { "epoch": 33.23065434518809, "grad_norm": 0.013570146635174751, "learning_rate": 1.6800710745347426e-06, "loss": 0.0039, "step": 356000 }, { "epoch": 33.23998879865584, "grad_norm": 7.129637718200684, "learning_rate": 1.6791358832881324e-06, "loss": 0.003, "step": 356100 }, { "epoch": 33.24932325212359, "grad_norm": 0.06075910106301308, "learning_rate": 1.6782006920415225e-06, "loss": 0.0039, "step": 356200 }, { "epoch": 33.25865770559134, "grad_norm": 0.6254529356956482, "learning_rate": 1.6772655007949127e-06, "loss": 0.0045, "step": 356300 }, { "epoch": 33.267992159059084, "grad_norm": 1.2654563188552856, "learning_rate": 1.6763303095483027e-06, "loss": 0.0036, "step": 356400 }, { "epoch": 33.277326612526835, "grad_norm": 0.5137890577316284, "learning_rate": 1.6753951183016928e-06, "loss": 0.0041, "step": 356500 }, { "epoch": 33.286661065994586, "grad_norm": 4.623119831085205, "learning_rate": 1.674459927055083e-06, "loss": 0.0042, "step": 356600 }, { "epoch": 33.29599551946234, "grad_norm": 0.08590810000896454, "learning_rate": 1.673524735808473e-06, "loss": 0.0049, "step": 356700 }, { "epoch": 33.30532997293008, "grad_norm": 0.5245876908302307, "learning_rate": 1.672589544561863e-06, "loss": 0.0035, "step": 356800 }, { "epoch": 33.31466442639783, "grad_norm": 7.089583873748779, "learning_rate": 1.6716543533152533e-06, "loss": 0.0044, "step": 356900 }, { "epoch": 33.323998879865584, "grad_norm": 1.4101203680038452, "learning_rate": 1.6707191620686431e-06, "loss": 0.0041, "step": 357000 }, { "epoch": 33.333333333333336, "grad_norm": 0.4033152163028717, "learning_rate": 1.6697839708220332e-06, "loss": 0.0038, "step": 357100 }, { "epoch": 33.34266778680108, "grad_norm": 0.17618118226528168, "learning_rate": 1.6688487795754232e-06, "loss": 0.0046, "step": 357200 }, { "epoch": 33.35200224026883, "grad_norm": 0.7347105145454407, "learning_rate": 1.6679135883288134e-06, "loss": 0.0046, "step": 357300 }, { "epoch": 33.36133669373658, "grad_norm": 2.2456748485565186, "learning_rate": 1.6669783970822035e-06, "loss": 0.0034, "step": 357400 }, { "epoch": 33.370671147204334, "grad_norm": 0.30590108036994934, "learning_rate": 1.6660432058355935e-06, "loss": 0.0018, "step": 357500 }, { "epoch": 33.38000560067208, "grad_norm": 0.5412154197692871, "learning_rate": 1.6651080145889837e-06, "loss": 0.004, "step": 357600 }, { "epoch": 33.38934005413983, "grad_norm": 4.2852911949157715, "learning_rate": 1.6641728233423738e-06, "loss": 0.0033, "step": 357700 }, { "epoch": 33.39867450760758, "grad_norm": 0.16167102754116058, "learning_rate": 1.6632376320957638e-06, "loss": 0.0049, "step": 357800 }, { "epoch": 33.40800896107533, "grad_norm": 1.6271814107894897, "learning_rate": 1.6623024408491536e-06, "loss": 0.0044, "step": 357900 }, { "epoch": 33.417343414543076, "grad_norm": 0.20083343982696533, "learning_rate": 1.6613672496025439e-06, "loss": 0.004, "step": 358000 }, { "epoch": 33.42667786801083, "grad_norm": 5.4298014640808105, "learning_rate": 1.6604320583559339e-06, "loss": 0.0045, "step": 358100 }, { "epoch": 33.43601232147858, "grad_norm": 0.009539446793496609, "learning_rate": 1.659496867109324e-06, "loss": 0.0036, "step": 358200 }, { "epoch": 33.44534677494633, "grad_norm": 10.019521713256836, "learning_rate": 1.658561675862714e-06, "loss": 0.0026, "step": 358300 }, { "epoch": 33.454681228414074, "grad_norm": 5.276742458343506, "learning_rate": 1.6576264846161042e-06, "loss": 0.0025, "step": 358400 }, { "epoch": 33.464015681881826, "grad_norm": 0.19039815664291382, "learning_rate": 1.6566912933694942e-06, "loss": 0.004, "step": 358500 }, { "epoch": 33.47335013534958, "grad_norm": 0.3303285837173462, "learning_rate": 1.6557561021228843e-06, "loss": 0.003, "step": 358600 }, { "epoch": 33.48268458881733, "grad_norm": 0.04299455136060715, "learning_rate": 1.6548209108762745e-06, "loss": 0.0049, "step": 358700 }, { "epoch": 33.49201904228507, "grad_norm": 0.0028473688289523125, "learning_rate": 1.6538857196296643e-06, "loss": 0.0037, "step": 358800 }, { "epoch": 33.501353495752824, "grad_norm": 0.6361075639724731, "learning_rate": 1.6529505283830543e-06, "loss": 0.0033, "step": 358900 }, { "epoch": 33.510687949220575, "grad_norm": 0.3981289863586426, "learning_rate": 1.6520153371364444e-06, "loss": 0.0044, "step": 359000 }, { "epoch": 33.52002240268832, "grad_norm": 0.0057185799814760685, "learning_rate": 1.6510801458898346e-06, "loss": 0.0031, "step": 359100 }, { "epoch": 33.52935685615607, "grad_norm": 0.07324742525815964, "learning_rate": 1.6501449546432246e-06, "loss": 0.0045, "step": 359200 }, { "epoch": 33.53869130962382, "grad_norm": 0.6110164523124695, "learning_rate": 1.6492097633966147e-06, "loss": 0.0026, "step": 359300 }, { "epoch": 33.54802576309157, "grad_norm": 21.478185653686523, "learning_rate": 1.648274572150005e-06, "loss": 0.0035, "step": 359400 }, { "epoch": 33.55736021655932, "grad_norm": 0.07108082622289658, "learning_rate": 1.647339380903395e-06, "loss": 0.002, "step": 359500 }, { "epoch": 33.56669467002707, "grad_norm": 0.007011127192527056, "learning_rate": 1.646404189656785e-06, "loss": 0.0038, "step": 359600 }, { "epoch": 33.57602912349482, "grad_norm": 3.9393389225006104, "learning_rate": 1.6454689984101748e-06, "loss": 0.0021, "step": 359700 }, { "epoch": 33.58536357696257, "grad_norm": 0.04926180839538574, "learning_rate": 1.644533807163565e-06, "loss": 0.0039, "step": 359800 }, { "epoch": 33.594698030430315, "grad_norm": 6.151070594787598, "learning_rate": 1.643598615916955e-06, "loss": 0.0024, "step": 359900 }, { "epoch": 33.60403248389807, "grad_norm": 3.0609426498413086, "learning_rate": 1.642663424670345e-06, "loss": 0.0033, "step": 360000 }, { "epoch": 33.60403248389807, "eval_accuracy": 0.7015169194865811, "eval_f1": 0.8255477871960544, "eval_loss": 0.32603153586387634, "eval_roc_auc": 0.9065929119565865, "eval_runtime": 145.6934, "eval_samples_per_second": 294.111, "eval_steps_per_second": 294.111, "step": 360000 }, { "epoch": 33.61336693736582, "grad_norm": 0.047910209745168686, "learning_rate": 1.6417282334237353e-06, "loss": 0.0037, "step": 360100 }, { "epoch": 33.62270139083357, "grad_norm": 0.6499600410461426, "learning_rate": 1.6407930421771254e-06, "loss": 0.0033, "step": 360200 }, { "epoch": 33.63203584430131, "grad_norm": 0.17719656229019165, "learning_rate": 1.6398578509305154e-06, "loss": 0.0037, "step": 360300 }, { "epoch": 33.641370297769065, "grad_norm": 0.04045332223176956, "learning_rate": 1.6389226596839056e-06, "loss": 0.0049, "step": 360400 }, { "epoch": 33.650704751236816, "grad_norm": 0.9812530279159546, "learning_rate": 1.6379874684372957e-06, "loss": 0.0028, "step": 360500 }, { "epoch": 33.66003920470457, "grad_norm": 0.1863204389810562, "learning_rate": 1.6370522771906855e-06, "loss": 0.0044, "step": 360600 }, { "epoch": 33.66937365817231, "grad_norm": 0.16757188737392426, "learning_rate": 1.6361170859440755e-06, "loss": 0.0033, "step": 360700 }, { "epoch": 33.67870811164006, "grad_norm": 2.602302074432373, "learning_rate": 1.6351818946974658e-06, "loss": 0.0033, "step": 360800 }, { "epoch": 33.688042565107814, "grad_norm": 0.009734321385622025, "learning_rate": 1.6342467034508558e-06, "loss": 0.0027, "step": 360900 }, { "epoch": 33.697377018575565, "grad_norm": 0.006599494256079197, "learning_rate": 1.6333115122042458e-06, "loss": 0.0041, "step": 361000 }, { "epoch": 33.70671147204331, "grad_norm": 0.8055398464202881, "learning_rate": 1.632376320957636e-06, "loss": 0.0042, "step": 361100 }, { "epoch": 33.71604592551106, "grad_norm": 0.0021683392114937305, "learning_rate": 1.6314411297110261e-06, "loss": 0.0029, "step": 361200 }, { "epoch": 33.72538037897881, "grad_norm": 0.855817973613739, "learning_rate": 1.6305059384644161e-06, "loss": 0.0035, "step": 361300 }, { "epoch": 33.73471483244656, "grad_norm": 0.9583255052566528, "learning_rate": 1.6295707472178064e-06, "loss": 0.0037, "step": 361400 }, { "epoch": 33.74404928591431, "grad_norm": 0.28152820467948914, "learning_rate": 1.6286355559711964e-06, "loss": 0.0037, "step": 361500 }, { "epoch": 33.75338373938206, "grad_norm": 0.07445676624774933, "learning_rate": 1.6277003647245862e-06, "loss": 0.0034, "step": 361600 }, { "epoch": 33.76271819284981, "grad_norm": 0.10154695808887482, "learning_rate": 1.6267651734779763e-06, "loss": 0.0027, "step": 361700 }, { "epoch": 33.77205264631756, "grad_norm": 0.0043042744509875774, "learning_rate": 1.6258299822313665e-06, "loss": 0.0033, "step": 361800 }, { "epoch": 33.781387099785306, "grad_norm": 0.013559600338339806, "learning_rate": 1.6248947909847565e-06, "loss": 0.0037, "step": 361900 }, { "epoch": 33.79072155325306, "grad_norm": 0.07444263994693756, "learning_rate": 1.6239595997381466e-06, "loss": 0.0043, "step": 362000 }, { "epoch": 33.80005600672081, "grad_norm": 2.025010585784912, "learning_rate": 1.6230244084915366e-06, "loss": 0.0037, "step": 362100 }, { "epoch": 33.80939046018855, "grad_norm": 2.8248448371887207, "learning_rate": 1.6220892172449268e-06, "loss": 0.0031, "step": 362200 }, { "epoch": 33.818724913656304, "grad_norm": 1.1756435632705688, "learning_rate": 1.6211540259983169e-06, "loss": 0.0033, "step": 362300 }, { "epoch": 33.828059367124055, "grad_norm": 0.1900942474603653, "learning_rate": 1.620218834751707e-06, "loss": 0.0028, "step": 362400 }, { "epoch": 33.83739382059181, "grad_norm": 0.09841928631067276, "learning_rate": 1.6192836435050967e-06, "loss": 0.0042, "step": 362500 }, { "epoch": 33.84672827405955, "grad_norm": 0.07728878408670425, "learning_rate": 1.618348452258487e-06, "loss": 0.003, "step": 362600 }, { "epoch": 33.8560627275273, "grad_norm": 0.10778121650218964, "learning_rate": 1.617413261011877e-06, "loss": 0.0031, "step": 362700 }, { "epoch": 33.86539718099505, "grad_norm": 3.7027571201324463, "learning_rate": 1.616478069765267e-06, "loss": 0.0039, "step": 362800 }, { "epoch": 33.874731634462805, "grad_norm": 0.03243861719965935, "learning_rate": 1.6155428785186573e-06, "loss": 0.0032, "step": 362900 }, { "epoch": 33.88406608793055, "grad_norm": 2.125716209411621, "learning_rate": 1.6146076872720473e-06, "loss": 0.0029, "step": 363000 }, { "epoch": 33.8934005413983, "grad_norm": 0.020046329125761986, "learning_rate": 1.6136724960254373e-06, "loss": 0.0038, "step": 363100 }, { "epoch": 33.90273499486605, "grad_norm": 0.2869279980659485, "learning_rate": 1.6127373047788276e-06, "loss": 0.003, "step": 363200 }, { "epoch": 33.9120694483338, "grad_norm": 0.17942196130752563, "learning_rate": 1.6118021135322176e-06, "loss": 0.0029, "step": 363300 }, { "epoch": 33.92140390180155, "grad_norm": 0.06970027834177017, "learning_rate": 1.6108669222856074e-06, "loss": 0.0039, "step": 363400 }, { "epoch": 33.9307383552693, "grad_norm": 0.3040977418422699, "learning_rate": 1.6099317310389974e-06, "loss": 0.0039, "step": 363500 }, { "epoch": 33.94007280873705, "grad_norm": 2.2115073204040527, "learning_rate": 1.6089965397923877e-06, "loss": 0.0049, "step": 363600 }, { "epoch": 33.9494072622048, "grad_norm": 1.6938018798828125, "learning_rate": 1.6080613485457777e-06, "loss": 0.0036, "step": 363700 }, { "epoch": 33.958741715672545, "grad_norm": 6.967803478240967, "learning_rate": 1.6071261572991678e-06, "loss": 0.0029, "step": 363800 }, { "epoch": 33.968076169140296, "grad_norm": 0.27258697152137756, "learning_rate": 1.606190966052558e-06, "loss": 0.0041, "step": 363900 }, { "epoch": 33.97741062260805, "grad_norm": 3.162381649017334, "learning_rate": 1.605255774805948e-06, "loss": 0.0029, "step": 364000 }, { "epoch": 33.9867450760758, "grad_norm": 0.014831384643912315, "learning_rate": 1.604320583559338e-06, "loss": 0.0031, "step": 364100 }, { "epoch": 33.99607952954354, "grad_norm": 1.3933284282684326, "learning_rate": 1.6033853923127283e-06, "loss": 0.0043, "step": 364200 }, { "epoch": 34.005413983011294, "grad_norm": 0.3093816936016083, "learning_rate": 1.6024502010661181e-06, "loss": 0.0032, "step": 364300 }, { "epoch": 34.014748436479046, "grad_norm": 0.02031560055911541, "learning_rate": 1.6015150098195081e-06, "loss": 0.0026, "step": 364400 }, { "epoch": 34.0240828899468, "grad_norm": 0.03441466763615608, "learning_rate": 1.6005798185728982e-06, "loss": 0.0027, "step": 364500 }, { "epoch": 34.03341734341454, "grad_norm": 0.1052248477935791, "learning_rate": 1.5996446273262884e-06, "loss": 0.0043, "step": 364600 }, { "epoch": 34.04275179688229, "grad_norm": 0.620538592338562, "learning_rate": 1.5987094360796784e-06, "loss": 0.004, "step": 364700 }, { "epoch": 34.052086250350044, "grad_norm": 0.14544546604156494, "learning_rate": 1.5977742448330685e-06, "loss": 0.003, "step": 364800 }, { "epoch": 34.06142070381779, "grad_norm": 0.10811523348093033, "learning_rate": 1.5968390535864587e-06, "loss": 0.0029, "step": 364900 }, { "epoch": 34.07075515728554, "grad_norm": 0.08779037743806839, "learning_rate": 1.5959038623398488e-06, "loss": 0.0027, "step": 365000 }, { "epoch": 34.07075515728554, "eval_accuracy": 0.6992065344224038, "eval_f1": 0.8269546677851176, "eval_loss": 0.33075591921806335, "eval_roc_auc": 0.9113949758919092, "eval_runtime": 146.0404, "eval_samples_per_second": 293.412, "eval_steps_per_second": 293.412, "step": 365000 }, { "epoch": 34.08008961075329, "grad_norm": 3.324721574783325, "learning_rate": 1.5949686710932388e-06, "loss": 0.0032, "step": 365100 }, { "epoch": 34.08942406422104, "grad_norm": 2.4547975063323975, "learning_rate": 1.5940334798466286e-06, "loss": 0.0043, "step": 365200 }, { "epoch": 34.098758517688786, "grad_norm": 0.07064037024974823, "learning_rate": 1.5930982886000188e-06, "loss": 0.004, "step": 365300 }, { "epoch": 34.10809297115654, "grad_norm": 0.011591854505240917, "learning_rate": 1.5921630973534089e-06, "loss": 0.0034, "step": 365400 }, { "epoch": 34.11742742462429, "grad_norm": 0.015866825357079506, "learning_rate": 1.591227906106799e-06, "loss": 0.0039, "step": 365500 }, { "epoch": 34.12676187809204, "grad_norm": 0.19098542630672455, "learning_rate": 1.5902927148601891e-06, "loss": 0.0019, "step": 365600 }, { "epoch": 34.136096331559784, "grad_norm": 3.5595476627349854, "learning_rate": 1.5893575236135792e-06, "loss": 0.0031, "step": 365700 }, { "epoch": 34.145430785027536, "grad_norm": 0.04057936742901802, "learning_rate": 1.5884223323669692e-06, "loss": 0.0024, "step": 365800 }, { "epoch": 34.15476523849529, "grad_norm": 0.27397438883781433, "learning_rate": 1.5874871411203592e-06, "loss": 0.0029, "step": 365900 }, { "epoch": 34.16409969196304, "grad_norm": 0.0029533447232097387, "learning_rate": 1.5865519498737495e-06, "loss": 0.0018, "step": 366000 }, { "epoch": 34.17343414543078, "grad_norm": 0.12367954850196838, "learning_rate": 1.5856167586271393e-06, "loss": 0.0045, "step": 366100 }, { "epoch": 34.182768598898534, "grad_norm": 5.500366687774658, "learning_rate": 1.5846815673805293e-06, "loss": 0.0034, "step": 366200 }, { "epoch": 34.192103052366285, "grad_norm": 2.8360235691070557, "learning_rate": 1.5837463761339194e-06, "loss": 0.0036, "step": 366300 }, { "epoch": 34.201437505834036, "grad_norm": 0.486004114151001, "learning_rate": 1.5828111848873096e-06, "loss": 0.003, "step": 366400 }, { "epoch": 34.21077195930178, "grad_norm": 1.2462576627731323, "learning_rate": 1.5818759936406996e-06, "loss": 0.0035, "step": 366500 }, { "epoch": 34.22010641276953, "grad_norm": 0.037141501903533936, "learning_rate": 1.5809408023940897e-06, "loss": 0.0032, "step": 366600 }, { "epoch": 34.22944086623728, "grad_norm": 3.138927936553955, "learning_rate": 1.58000561114748e-06, "loss": 0.003, "step": 366700 }, { "epoch": 34.238775319705034, "grad_norm": 0.13207213580608368, "learning_rate": 1.57907041990087e-06, "loss": 0.0034, "step": 366800 }, { "epoch": 34.24810977317278, "grad_norm": 3.7464263439178467, "learning_rate": 1.57813522865426e-06, "loss": 0.004, "step": 366900 }, { "epoch": 34.25744422664053, "grad_norm": 0.4419115483760834, "learning_rate": 1.5772000374076498e-06, "loss": 0.0019, "step": 367000 }, { "epoch": 34.26677868010828, "grad_norm": 0.017685173079371452, "learning_rate": 1.57626484616104e-06, "loss": 0.0036, "step": 367100 }, { "epoch": 34.27611313357603, "grad_norm": 0.17279161512851715, "learning_rate": 1.57532965491443e-06, "loss": 0.0046, "step": 367200 }, { "epoch": 34.28544758704378, "grad_norm": 0.01864880882203579, "learning_rate": 1.57439446366782e-06, "loss": 0.0055, "step": 367300 }, { "epoch": 34.29478204051153, "grad_norm": 0.12955757975578308, "learning_rate": 1.5734592724212103e-06, "loss": 0.0023, "step": 367400 }, { "epoch": 34.30411649397928, "grad_norm": 0.023473860695958138, "learning_rate": 1.5725240811746004e-06, "loss": 0.003, "step": 367500 }, { "epoch": 34.31345094744702, "grad_norm": 1.3031988143920898, "learning_rate": 1.5715888899279904e-06, "loss": 0.0031, "step": 367600 }, { "epoch": 34.322785400914775, "grad_norm": 0.26329222321510315, "learning_rate": 1.5706536986813806e-06, "loss": 0.0032, "step": 367700 }, { "epoch": 34.332119854382526, "grad_norm": 0.2574961185455322, "learning_rate": 1.5697185074347707e-06, "loss": 0.0022, "step": 367800 }, { "epoch": 34.34145430785028, "grad_norm": 0.2628682255744934, "learning_rate": 1.5687833161881605e-06, "loss": 0.003, "step": 367900 }, { "epoch": 34.35078876131802, "grad_norm": 4.123023509979248, "learning_rate": 1.5678481249415505e-06, "loss": 0.0034, "step": 368000 }, { "epoch": 34.36012321478577, "grad_norm": 0.00027996517019346356, "learning_rate": 1.5669129336949408e-06, "loss": 0.0046, "step": 368100 }, { "epoch": 34.369457668253524, "grad_norm": 0.060121260583400726, "learning_rate": 1.5659777424483308e-06, "loss": 0.0029, "step": 368200 }, { "epoch": 34.378792121721276, "grad_norm": 0.02774617075920105, "learning_rate": 1.5650425512017208e-06, "loss": 0.0032, "step": 368300 }, { "epoch": 34.38812657518902, "grad_norm": 0.3318583071231842, "learning_rate": 1.564107359955111e-06, "loss": 0.0019, "step": 368400 }, { "epoch": 34.39746102865677, "grad_norm": 0.2871420383453369, "learning_rate": 1.563172168708501e-06, "loss": 0.0024, "step": 368500 }, { "epoch": 34.40679548212452, "grad_norm": 0.006246705073863268, "learning_rate": 1.5622369774618911e-06, "loss": 0.0042, "step": 368600 }, { "epoch": 34.416129935592274, "grad_norm": 0.09044964611530304, "learning_rate": 1.5613017862152814e-06, "loss": 0.0039, "step": 368700 }, { "epoch": 34.42546438906002, "grad_norm": 0.12811128795146942, "learning_rate": 1.5603665949686712e-06, "loss": 0.0032, "step": 368800 }, { "epoch": 34.43479884252777, "grad_norm": 0.029966745525598526, "learning_rate": 1.5594314037220612e-06, "loss": 0.0043, "step": 368900 }, { "epoch": 34.44413329599552, "grad_norm": 2.236987590789795, "learning_rate": 1.5584962124754512e-06, "loss": 0.0029, "step": 369000 }, { "epoch": 34.45346774946327, "grad_norm": 0.3092055022716522, "learning_rate": 1.5575610212288415e-06, "loss": 0.003, "step": 369100 }, { "epoch": 34.462802202931016, "grad_norm": 0.2757609784603119, "learning_rate": 1.5566258299822315e-06, "loss": 0.003, "step": 369200 }, { "epoch": 34.47213665639877, "grad_norm": 0.0054929545149207115, "learning_rate": 1.5556906387356216e-06, "loss": 0.0033, "step": 369300 }, { "epoch": 34.48147110986652, "grad_norm": 0.46120843291282654, "learning_rate": 1.5547554474890118e-06, "loss": 0.0039, "step": 369400 }, { "epoch": 34.49080556333427, "grad_norm": 0.027883043512701988, "learning_rate": 1.5538202562424018e-06, "loss": 0.004, "step": 369500 }, { "epoch": 34.500140016802014, "grad_norm": 0.11818135529756546, "learning_rate": 1.5528850649957919e-06, "loss": 0.0027, "step": 369600 }, { "epoch": 34.509474470269765, "grad_norm": 0.09758727252483368, "learning_rate": 1.5519498737491817e-06, "loss": 0.0034, "step": 369700 }, { "epoch": 34.51880892373752, "grad_norm": 0.06822389364242554, "learning_rate": 1.5510146825025717e-06, "loss": 0.0028, "step": 369800 }, { "epoch": 34.52814337720527, "grad_norm": 0.9172471761703491, "learning_rate": 1.550079491255962e-06, "loss": 0.0032, "step": 369900 }, { "epoch": 34.53747783067301, "grad_norm": 1.2347302436828613, "learning_rate": 1.549144300009352e-06, "loss": 0.0045, "step": 370000 }, { "epoch": 34.53747783067301, "eval_accuracy": 0.696196032672112, "eval_f1": 0.8245477788151936, "eval_loss": 0.33322209119796753, "eval_roc_auc": 0.9104904392616094, "eval_runtime": 145.9622, "eval_samples_per_second": 293.569, "eval_steps_per_second": 293.569, "step": 370000 }, { "epoch": 34.54681228414076, "grad_norm": 0.004333097487688065, "learning_rate": 1.548209108762742e-06, "loss": 0.0045, "step": 370100 }, { "epoch": 34.556146737608515, "grad_norm": 3.428715705871582, "learning_rate": 1.5472739175161323e-06, "loss": 0.0035, "step": 370200 }, { "epoch": 34.56548119107626, "grad_norm": 0.7087982296943665, "learning_rate": 1.5463387262695223e-06, "loss": 0.0027, "step": 370300 }, { "epoch": 34.57481564454401, "grad_norm": 0.06277802586555481, "learning_rate": 1.5454035350229123e-06, "loss": 0.0033, "step": 370400 }, { "epoch": 34.58415009801176, "grad_norm": 0.2830371856689453, "learning_rate": 1.5444683437763026e-06, "loss": 0.0032, "step": 370500 }, { "epoch": 34.59348455147951, "grad_norm": 2.1385669708251953, "learning_rate": 1.5435331525296924e-06, "loss": 0.0025, "step": 370600 }, { "epoch": 34.60281900494726, "grad_norm": 0.9400115013122559, "learning_rate": 1.5425979612830824e-06, "loss": 0.0036, "step": 370700 }, { "epoch": 34.61215345841501, "grad_norm": 5.109329700469971, "learning_rate": 1.5416627700364724e-06, "loss": 0.004, "step": 370800 }, { "epoch": 34.62148791188276, "grad_norm": 0.09013081341981888, "learning_rate": 1.5407275787898627e-06, "loss": 0.0031, "step": 370900 }, { "epoch": 34.63082236535051, "grad_norm": 0.6543238759040833, "learning_rate": 1.5397923875432527e-06, "loss": 0.0032, "step": 371000 }, { "epoch": 34.640156818818255, "grad_norm": 0.03952790051698685, "learning_rate": 1.5388571962966427e-06, "loss": 0.0026, "step": 371100 }, { "epoch": 34.64949127228601, "grad_norm": 2.7383861541748047, "learning_rate": 1.537922005050033e-06, "loss": 0.0039, "step": 371200 }, { "epoch": 34.65882572575376, "grad_norm": 0.3514026999473572, "learning_rate": 1.536986813803423e-06, "loss": 0.0036, "step": 371300 }, { "epoch": 34.66816017922151, "grad_norm": 0.1210467740893364, "learning_rate": 1.536051622556813e-06, "loss": 0.0038, "step": 371400 }, { "epoch": 34.67749463268925, "grad_norm": 0.00574796786531806, "learning_rate": 1.5351164313102029e-06, "loss": 0.0035, "step": 371500 }, { "epoch": 34.686829086157005, "grad_norm": 0.07784304022789001, "learning_rate": 1.534181240063593e-06, "loss": 0.0026, "step": 371600 }, { "epoch": 34.696163539624756, "grad_norm": 0.006241434253752232, "learning_rate": 1.5332460488169831e-06, "loss": 0.003, "step": 371700 }, { "epoch": 34.70549799309251, "grad_norm": 0.013279943726956844, "learning_rate": 1.5323108575703732e-06, "loss": 0.0031, "step": 371800 }, { "epoch": 34.71483244656025, "grad_norm": 0.06319969892501831, "learning_rate": 1.5313756663237634e-06, "loss": 0.0031, "step": 371900 }, { "epoch": 34.724166900028, "grad_norm": 0.022321203723549843, "learning_rate": 1.5304404750771534e-06, "loss": 0.0028, "step": 372000 }, { "epoch": 34.733501353495754, "grad_norm": 0.14276063442230225, "learning_rate": 1.5295052838305435e-06, "loss": 0.0032, "step": 372100 }, { "epoch": 34.742835806963505, "grad_norm": 0.1863078474998474, "learning_rate": 1.5285700925839337e-06, "loss": 0.0038, "step": 372200 }, { "epoch": 34.75217026043125, "grad_norm": 0.20560067892074585, "learning_rate": 1.5276349013373237e-06, "loss": 0.0029, "step": 372300 }, { "epoch": 34.761504713899, "grad_norm": 0.03899979963898659, "learning_rate": 1.5266997100907136e-06, "loss": 0.0032, "step": 372400 }, { "epoch": 34.77083916736675, "grad_norm": 0.0001336917484877631, "learning_rate": 1.5257645188441036e-06, "loss": 0.0054, "step": 372500 }, { "epoch": 34.7801736208345, "grad_norm": 0.0075004128739237785, "learning_rate": 1.5248293275974938e-06, "loss": 0.004, "step": 372600 }, { "epoch": 34.78950807430225, "grad_norm": 0.012305377051234245, "learning_rate": 1.5238941363508839e-06, "loss": 0.0049, "step": 372700 }, { "epoch": 34.79884252777, "grad_norm": 0.05313115939497948, "learning_rate": 1.522958945104274e-06, "loss": 0.004, "step": 372800 }, { "epoch": 34.80817698123775, "grad_norm": 0.010721714235842228, "learning_rate": 1.5220237538576641e-06, "loss": 0.0037, "step": 372900 }, { "epoch": 34.8175114347055, "grad_norm": 0.062326036393642426, "learning_rate": 1.5210885626110542e-06, "loss": 0.0025, "step": 373000 }, { "epoch": 34.826845888173246, "grad_norm": 1.4213942289352417, "learning_rate": 1.5201533713644442e-06, "loss": 0.0028, "step": 373100 }, { "epoch": 34.836180341641, "grad_norm": 0.7027170062065125, "learning_rate": 1.5192181801178344e-06, "loss": 0.0048, "step": 373200 }, { "epoch": 34.84551479510875, "grad_norm": 0.22720466554164886, "learning_rate": 1.5182829888712243e-06, "loss": 0.0033, "step": 373300 }, { "epoch": 34.85484924857649, "grad_norm": 3.331033706665039, "learning_rate": 1.5173477976246143e-06, "loss": 0.0046, "step": 373400 }, { "epoch": 34.864183702044244, "grad_norm": 0.027437731623649597, "learning_rate": 1.5164126063780043e-06, "loss": 0.0047, "step": 373500 }, { "epoch": 34.873518155511995, "grad_norm": 0.2721717357635498, "learning_rate": 1.5154774151313944e-06, "loss": 0.0051, "step": 373600 }, { "epoch": 34.882852608979746, "grad_norm": 0.13452568650245667, "learning_rate": 1.5145422238847846e-06, "loss": 0.0028, "step": 373700 }, { "epoch": 34.89218706244749, "grad_norm": 0.006615354213863611, "learning_rate": 1.5136070326381746e-06, "loss": 0.003, "step": 373800 }, { "epoch": 34.90152151591524, "grad_norm": 3.925438404083252, "learning_rate": 1.5126718413915647e-06, "loss": 0.0027, "step": 373900 }, { "epoch": 34.91085596938299, "grad_norm": 0.2128744274377823, "learning_rate": 1.511736650144955e-06, "loss": 0.0035, "step": 374000 }, { "epoch": 34.920190422850744, "grad_norm": 0.15844525396823883, "learning_rate": 1.510801458898345e-06, "loss": 0.0033, "step": 374100 }, { "epoch": 34.92952487631849, "grad_norm": 0.10838088393211365, "learning_rate": 1.5098662676517347e-06, "loss": 0.0028, "step": 374200 }, { "epoch": 34.93885932978624, "grad_norm": 0.042767297476530075, "learning_rate": 1.5089310764051248e-06, "loss": 0.0039, "step": 374300 }, { "epoch": 34.94819378325399, "grad_norm": 0.16810892522335052, "learning_rate": 1.507995885158515e-06, "loss": 0.0036, "step": 374400 }, { "epoch": 34.95752823672174, "grad_norm": 0.02594233863055706, "learning_rate": 1.507060693911905e-06, "loss": 0.004, "step": 374500 }, { "epoch": 34.96686269018949, "grad_norm": 1.8349229097366333, "learning_rate": 1.506125502665295e-06, "loss": 0.0027, "step": 374600 }, { "epoch": 34.97619714365724, "grad_norm": 1.0238267183303833, "learning_rate": 1.5051903114186853e-06, "loss": 0.0035, "step": 374700 }, { "epoch": 34.98553159712499, "grad_norm": 0.008774430491030216, "learning_rate": 1.5042551201720754e-06, "loss": 0.0029, "step": 374800 }, { "epoch": 34.99486605059274, "grad_norm": 0.01102161593735218, "learning_rate": 1.5033199289254654e-06, "loss": 0.0042, "step": 374900 }, { "epoch": 35.004200504060485, "grad_norm": 3.2338509559631348, "learning_rate": 1.5023847376788556e-06, "loss": 0.0033, "step": 375000 }, { "epoch": 35.004200504060485, "eval_accuracy": 0.7007234539089848, "eval_f1": 0.8271664573471162, "eval_loss": 0.3321366310119629, "eval_roc_auc": 0.909730671372812, "eval_runtime": 146.1879, "eval_samples_per_second": 293.116, "eval_steps_per_second": 293.116, "step": 375000 }, { "epoch": 35.013534957528236, "grad_norm": 0.3233812749385834, "learning_rate": 1.5014495464322454e-06, "loss": 0.0035, "step": 375100 }, { "epoch": 35.02286941099599, "grad_norm": 6.603456974029541, "learning_rate": 1.5005143551856355e-06, "loss": 0.0031, "step": 375200 }, { "epoch": 35.03220386446374, "grad_norm": 0.01687554642558098, "learning_rate": 1.4995791639390255e-06, "loss": 0.0038, "step": 375300 }, { "epoch": 35.04153831793148, "grad_norm": 0.060115016996860504, "learning_rate": 1.4986439726924158e-06, "loss": 0.004, "step": 375400 }, { "epoch": 35.050872771399234, "grad_norm": 0.0015773677732795477, "learning_rate": 1.4977087814458058e-06, "loss": 0.0031, "step": 375500 }, { "epoch": 35.060207224866986, "grad_norm": 0.031262945383787155, "learning_rate": 1.4967735901991958e-06, "loss": 0.0027, "step": 375600 }, { "epoch": 35.06954167833474, "grad_norm": 0.010956131853163242, "learning_rate": 1.495838398952586e-06, "loss": 0.0023, "step": 375700 }, { "epoch": 35.07887613180248, "grad_norm": 0.634635865688324, "learning_rate": 1.494903207705976e-06, "loss": 0.0031, "step": 375800 }, { "epoch": 35.08821058527023, "grad_norm": 0.007636335212737322, "learning_rate": 1.4939680164593661e-06, "loss": 0.0027, "step": 375900 }, { "epoch": 35.097545038737984, "grad_norm": 0.020874641835689545, "learning_rate": 1.493032825212756e-06, "loss": 0.0036, "step": 376000 }, { "epoch": 35.10687949220573, "grad_norm": 0.013979118317365646, "learning_rate": 1.4920976339661462e-06, "loss": 0.0022, "step": 376100 }, { "epoch": 35.11621394567348, "grad_norm": 0.8869831562042236, "learning_rate": 1.4911624427195362e-06, "loss": 0.0035, "step": 376200 }, { "epoch": 35.12554839914123, "grad_norm": 0.11631103605031967, "learning_rate": 1.4902272514729262e-06, "loss": 0.004, "step": 376300 }, { "epoch": 35.13488285260898, "grad_norm": 5.174437522888184, "learning_rate": 1.4892920602263165e-06, "loss": 0.0025, "step": 376400 }, { "epoch": 35.144217306076726, "grad_norm": 0.004304449073970318, "learning_rate": 1.4883568689797065e-06, "loss": 0.0029, "step": 376500 }, { "epoch": 35.15355175954448, "grad_norm": 0.05061576887965202, "learning_rate": 1.4874216777330965e-06, "loss": 0.0021, "step": 376600 }, { "epoch": 35.16288621301223, "grad_norm": 0.04024570807814598, "learning_rate": 1.4864864864864868e-06, "loss": 0.0038, "step": 376700 }, { "epoch": 35.17222066647998, "grad_norm": 0.6228541135787964, "learning_rate": 1.4855512952398768e-06, "loss": 0.0024, "step": 376800 }, { "epoch": 35.181555119947724, "grad_norm": 0.035495415329933167, "learning_rate": 1.4846161039932666e-06, "loss": 0.0037, "step": 376900 }, { "epoch": 35.190889573415475, "grad_norm": 0.012286788783967495, "learning_rate": 1.4836809127466567e-06, "loss": 0.0032, "step": 377000 }, { "epoch": 35.20022402688323, "grad_norm": 0.10977917164564133, "learning_rate": 1.482745721500047e-06, "loss": 0.0031, "step": 377100 }, { "epoch": 35.20955848035098, "grad_norm": 0.8645766377449036, "learning_rate": 1.481810530253437e-06, "loss": 0.0034, "step": 377200 }, { "epoch": 35.21889293381872, "grad_norm": 0.01935398019850254, "learning_rate": 1.480875339006827e-06, "loss": 0.0034, "step": 377300 }, { "epoch": 35.22822738728647, "grad_norm": 0.014089219272136688, "learning_rate": 1.479940147760217e-06, "loss": 0.002, "step": 377400 }, { "epoch": 35.237561840754225, "grad_norm": 0.042344897985458374, "learning_rate": 1.4790049565136072e-06, "loss": 0.0033, "step": 377500 }, { "epoch": 35.246896294221976, "grad_norm": 0.4599360227584839, "learning_rate": 1.4780697652669973e-06, "loss": 0.0027, "step": 377600 }, { "epoch": 35.25623074768972, "grad_norm": 0.08385548740625381, "learning_rate": 1.4771345740203873e-06, "loss": 0.002, "step": 377700 }, { "epoch": 35.26556520115747, "grad_norm": 0.09390033781528473, "learning_rate": 1.4761993827737771e-06, "loss": 0.0025, "step": 377800 }, { "epoch": 35.27489965462522, "grad_norm": 1.2549374103546143, "learning_rate": 1.4752641915271674e-06, "loss": 0.003, "step": 377900 }, { "epoch": 35.284234108092974, "grad_norm": 0.08387825638055801, "learning_rate": 1.4743290002805574e-06, "loss": 0.0039, "step": 378000 }, { "epoch": 35.29356856156072, "grad_norm": 0.011486373841762543, "learning_rate": 1.4733938090339474e-06, "loss": 0.0047, "step": 378100 }, { "epoch": 35.30290301502847, "grad_norm": 0.003323396435007453, "learning_rate": 1.4724586177873377e-06, "loss": 0.003, "step": 378200 }, { "epoch": 35.31223746849622, "grad_norm": 0.004176212940365076, "learning_rate": 1.4715234265407277e-06, "loss": 0.0036, "step": 378300 }, { "epoch": 35.32157192196397, "grad_norm": 0.0005481425323523581, "learning_rate": 1.4705882352941177e-06, "loss": 0.0046, "step": 378400 }, { "epoch": 35.33090637543172, "grad_norm": 8.010738372802734, "learning_rate": 1.469653044047508e-06, "loss": 0.0036, "step": 378500 }, { "epoch": 35.34024082889947, "grad_norm": 0.01654621958732605, "learning_rate": 1.468717852800898e-06, "loss": 0.0034, "step": 378600 }, { "epoch": 35.34957528236722, "grad_norm": 0.2966282367706299, "learning_rate": 1.4677826615542878e-06, "loss": 0.0024, "step": 378700 }, { "epoch": 35.35890973583497, "grad_norm": 0.0688047856092453, "learning_rate": 1.4668474703076779e-06, "loss": 0.0027, "step": 378800 }, { "epoch": 35.368244189302715, "grad_norm": 0.8365985155105591, "learning_rate": 1.465912279061068e-06, "loss": 0.0037, "step": 378900 }, { "epoch": 35.377578642770466, "grad_norm": 0.5093162655830383, "learning_rate": 1.4649770878144581e-06, "loss": 0.0032, "step": 379000 }, { "epoch": 35.38691309623822, "grad_norm": 0.2590511739253998, "learning_rate": 1.4640418965678482e-06, "loss": 0.0036, "step": 379100 }, { "epoch": 35.39624754970596, "grad_norm": 0.14211876690387726, "learning_rate": 1.4631067053212384e-06, "loss": 0.0024, "step": 379200 }, { "epoch": 35.40558200317371, "grad_norm": 0.005327424965798855, "learning_rate": 1.4621715140746284e-06, "loss": 0.0033, "step": 379300 }, { "epoch": 35.414916456641464, "grad_norm": 0.024574510753154755, "learning_rate": 1.4612363228280185e-06, "loss": 0.0027, "step": 379400 }, { "epoch": 35.424250910109215, "grad_norm": 0.03599219396710396, "learning_rate": 1.4603011315814087e-06, "loss": 0.0031, "step": 379500 }, { "epoch": 35.43358536357696, "grad_norm": 0.05103365704417229, "learning_rate": 1.4593659403347985e-06, "loss": 0.0032, "step": 379600 }, { "epoch": 35.44291981704471, "grad_norm": 0.02121477946639061, "learning_rate": 1.4584307490881886e-06, "loss": 0.0034, "step": 379700 }, { "epoch": 35.45225427051246, "grad_norm": 4.647952079772949, "learning_rate": 1.4574955578415786e-06, "loss": 0.004, "step": 379800 }, { "epoch": 35.46158872398021, "grad_norm": 0.044230345636606216, "learning_rate": 1.4565603665949688e-06, "loss": 0.0032, "step": 379900 }, { "epoch": 35.47092317744796, "grad_norm": 4.415987968444824, "learning_rate": 1.4556251753483589e-06, "loss": 0.0024, "step": 380000 }, { "epoch": 35.47092317744796, "eval_accuracy": 0.6980396732788798, "eval_f1": 0.8256045917474539, "eval_loss": 0.3354504108428955, "eval_roc_auc": 0.9101545068551855, "eval_runtime": 146.253, "eval_samples_per_second": 292.985, "eval_steps_per_second": 292.985, "step": 380000 }, { "epoch": 35.48025763091571, "grad_norm": 0.11895837634801865, "learning_rate": 1.4546899841017489e-06, "loss": 0.0025, "step": 380100 }, { "epoch": 35.48959208438346, "grad_norm": 5.879486083984375, "learning_rate": 1.4537547928551391e-06, "loss": 0.0038, "step": 380200 }, { "epoch": 35.49892653785121, "grad_norm": 0.013698006048798561, "learning_rate": 1.4528196016085292e-06, "loss": 0.004, "step": 380300 }, { "epoch": 35.508260991318956, "grad_norm": 1.338544249534607, "learning_rate": 1.4518844103619192e-06, "loss": 0.003, "step": 380400 }, { "epoch": 35.51759544478671, "grad_norm": 3.0427303314208984, "learning_rate": 1.4509492191153094e-06, "loss": 0.0038, "step": 380500 }, { "epoch": 35.52692989825446, "grad_norm": 0.047156255692243576, "learning_rate": 1.4500140278686993e-06, "loss": 0.0035, "step": 380600 }, { "epoch": 35.53626435172221, "grad_norm": 1.6373093128204346, "learning_rate": 1.4490788366220893e-06, "loss": 0.0025, "step": 380700 }, { "epoch": 35.545598805189954, "grad_norm": 0.0009199678315781057, "learning_rate": 1.4481436453754793e-06, "loss": 0.0042, "step": 380800 }, { "epoch": 35.554933258657705, "grad_norm": 0.10606865584850311, "learning_rate": 1.4472084541288696e-06, "loss": 0.0029, "step": 380900 }, { "epoch": 35.56426771212546, "grad_norm": 0.051869526505470276, "learning_rate": 1.4462732628822596e-06, "loss": 0.0035, "step": 381000 }, { "epoch": 35.57360216559321, "grad_norm": 0.026006346568465233, "learning_rate": 1.4453380716356496e-06, "loss": 0.0031, "step": 381100 }, { "epoch": 35.58293661906095, "grad_norm": 2.360718011856079, "learning_rate": 1.4444028803890396e-06, "loss": 0.0044, "step": 381200 }, { "epoch": 35.5922710725287, "grad_norm": 0.0022282141726464033, "learning_rate": 1.4434676891424299e-06, "loss": 0.0035, "step": 381300 }, { "epoch": 35.601605525996455, "grad_norm": 0.1990964114665985, "learning_rate": 1.44253249789582e-06, "loss": 0.0022, "step": 381400 }, { "epoch": 35.610939979464206, "grad_norm": 0.03909927234053612, "learning_rate": 1.4415973066492097e-06, "loss": 0.0028, "step": 381500 }, { "epoch": 35.62027443293195, "grad_norm": 0.0395224466919899, "learning_rate": 1.4406621154025998e-06, "loss": 0.0025, "step": 381600 }, { "epoch": 35.6296088863997, "grad_norm": 0.009840991348028183, "learning_rate": 1.43972692415599e-06, "loss": 0.0029, "step": 381700 }, { "epoch": 35.63894333986745, "grad_norm": 0.3375307619571686, "learning_rate": 1.43879173290938e-06, "loss": 0.0014, "step": 381800 }, { "epoch": 35.6482777933352, "grad_norm": 0.07326210290193558, "learning_rate": 1.43785654166277e-06, "loss": 0.0033, "step": 381900 }, { "epoch": 35.65761224680295, "grad_norm": 0.45054319500923157, "learning_rate": 1.4369213504161603e-06, "loss": 0.0025, "step": 382000 }, { "epoch": 35.6669467002707, "grad_norm": 0.008760358206927776, "learning_rate": 1.4359861591695503e-06, "loss": 0.0026, "step": 382100 }, { "epoch": 35.67628115373845, "grad_norm": 1.5137666463851929, "learning_rate": 1.4350509679229404e-06, "loss": 0.0039, "step": 382200 }, { "epoch": 35.685615607206195, "grad_norm": 0.09099166095256805, "learning_rate": 1.4341157766763306e-06, "loss": 0.0022, "step": 382300 }, { "epoch": 35.694950060673946, "grad_norm": 0.05466132611036301, "learning_rate": 1.4331805854297204e-06, "loss": 0.0051, "step": 382400 }, { "epoch": 35.7042845141417, "grad_norm": 0.05405924469232559, "learning_rate": 1.4322453941831105e-06, "loss": 0.0027, "step": 382500 }, { "epoch": 35.71361896760945, "grad_norm": 0.5828676819801331, "learning_rate": 1.4313102029365005e-06, "loss": 0.0036, "step": 382600 }, { "epoch": 35.72295342107719, "grad_norm": 4.004746913909912, "learning_rate": 1.4303750116898907e-06, "loss": 0.0041, "step": 382700 }, { "epoch": 35.732287874544944, "grad_norm": 0.0550793819129467, "learning_rate": 1.4294398204432808e-06, "loss": 0.0021, "step": 382800 }, { "epoch": 35.741622328012696, "grad_norm": 0.006172659806907177, "learning_rate": 1.4285046291966708e-06, "loss": 0.0034, "step": 382900 }, { "epoch": 35.75095678148045, "grad_norm": 0.05328547582030296, "learning_rate": 1.427569437950061e-06, "loss": 0.004, "step": 383000 }, { "epoch": 35.76029123494819, "grad_norm": 0.6274980902671814, "learning_rate": 1.426634246703451e-06, "loss": 0.0024, "step": 383100 }, { "epoch": 35.76962568841594, "grad_norm": 0.022896749898791313, "learning_rate": 1.4256990554568411e-06, "loss": 0.0025, "step": 383200 }, { "epoch": 35.778960141883694, "grad_norm": 6.383310794830322, "learning_rate": 1.424763864210231e-06, "loss": 0.0045, "step": 383300 }, { "epoch": 35.788294595351445, "grad_norm": 4.322093963623047, "learning_rate": 1.4238286729636212e-06, "loss": 0.0046, "step": 383400 }, { "epoch": 35.79762904881919, "grad_norm": 0.08921828120946884, "learning_rate": 1.4228934817170112e-06, "loss": 0.0013, "step": 383500 }, { "epoch": 35.80696350228694, "grad_norm": 2.3393352031707764, "learning_rate": 1.4219582904704012e-06, "loss": 0.0038, "step": 383600 }, { "epoch": 35.81629795575469, "grad_norm": 2.933311939239502, "learning_rate": 1.4210230992237915e-06, "loss": 0.0034, "step": 383700 }, { "epoch": 35.82563240922244, "grad_norm": 4.688342571258545, "learning_rate": 1.4200879079771815e-06, "loss": 0.0026, "step": 383800 }, { "epoch": 35.83496686269019, "grad_norm": 3.5208890438079834, "learning_rate": 1.4191527167305715e-06, "loss": 0.0026, "step": 383900 }, { "epoch": 35.84430131615794, "grad_norm": 0.812804102897644, "learning_rate": 1.4182175254839618e-06, "loss": 0.0032, "step": 384000 }, { "epoch": 35.85363576962569, "grad_norm": 0.0196062121540308, "learning_rate": 1.4172823342373518e-06, "loss": 0.0024, "step": 384100 }, { "epoch": 35.86297022309344, "grad_norm": 1.52512788772583, "learning_rate": 1.4163471429907416e-06, "loss": 0.0029, "step": 384200 }, { "epoch": 35.872304676561185, "grad_norm": 0.1066083237528801, "learning_rate": 1.4154119517441317e-06, "loss": 0.0027, "step": 384300 }, { "epoch": 35.88163913002894, "grad_norm": 0.07334235310554504, "learning_rate": 1.414476760497522e-06, "loss": 0.0026, "step": 384400 }, { "epoch": 35.89097358349669, "grad_norm": 0.12837621569633484, "learning_rate": 1.413541569250912e-06, "loss": 0.0034, "step": 384500 }, { "epoch": 35.90030803696443, "grad_norm": 13.021636962890625, "learning_rate": 1.412606378004302e-06, "loss": 0.0042, "step": 384600 }, { "epoch": 35.909642490432184, "grad_norm": 0.030207466334104538, "learning_rate": 1.4116711867576922e-06, "loss": 0.0029, "step": 384700 }, { "epoch": 35.918976943899935, "grad_norm": 1.3028502464294434, "learning_rate": 1.4107359955110822e-06, "loss": 0.0025, "step": 384800 }, { "epoch": 35.928311397367686, "grad_norm": 0.008515879511833191, "learning_rate": 1.4098008042644723e-06, "loss": 0.0048, "step": 384900 }, { "epoch": 35.93764585083543, "grad_norm": 0.030269749462604523, "learning_rate": 1.4088656130178623e-06, "loss": 0.003, "step": 385000 }, { "epoch": 35.93764585083543, "eval_accuracy": 0.7023803967327888, "eval_f1": 0.8280396804494776, "eval_loss": 0.3342446982860565, "eval_roc_auc": 0.9103803587872664, "eval_runtime": 145.0374, "eval_samples_per_second": 295.441, "eval_steps_per_second": 295.441, "step": 385000 }, { "epoch": 35.94698030430318, "grad_norm": 0.006739655043929815, "learning_rate": 1.4079304217712523e-06, "loss": 0.0033, "step": 385100 }, { "epoch": 35.95631475777093, "grad_norm": 6.471617221832275, "learning_rate": 1.4069952305246424e-06, "loss": 0.0031, "step": 385200 }, { "epoch": 35.965649211238684, "grad_norm": 0.05922126770019531, "learning_rate": 1.4060600392780324e-06, "loss": 0.0023, "step": 385300 }, { "epoch": 35.97498366470643, "grad_norm": 0.17964528501033783, "learning_rate": 1.4051248480314224e-06, "loss": 0.0031, "step": 385400 }, { "epoch": 35.98431811817418, "grad_norm": 0.18671903014183044, "learning_rate": 1.4041896567848127e-06, "loss": 0.0041, "step": 385500 }, { "epoch": 35.99365257164193, "grad_norm": 4.908517837524414, "learning_rate": 1.4032544655382027e-06, "loss": 0.0043, "step": 385600 }, { "epoch": 36.00298702510968, "grad_norm": 0.12367291748523712, "learning_rate": 1.4023192742915927e-06, "loss": 0.002, "step": 385700 }, { "epoch": 36.01232147857743, "grad_norm": 0.0019167516147717834, "learning_rate": 1.401384083044983e-06, "loss": 0.0021, "step": 385800 }, { "epoch": 36.02165593204518, "grad_norm": 0.0015133946435526013, "learning_rate": 1.400448891798373e-06, "loss": 0.0019, "step": 385900 }, { "epoch": 36.03099038551293, "grad_norm": 0.0036347112618386745, "learning_rate": 1.3995137005517628e-06, "loss": 0.0024, "step": 386000 }, { "epoch": 36.04032483898068, "grad_norm": 0.41407135128974915, "learning_rate": 1.3985785093051528e-06, "loss": 0.0035, "step": 386100 }, { "epoch": 36.049659292448425, "grad_norm": 0.18818391859531403, "learning_rate": 1.397643318058543e-06, "loss": 0.0025, "step": 386200 }, { "epoch": 36.058993745916176, "grad_norm": 2.004817485809326, "learning_rate": 1.3967081268119331e-06, "loss": 0.0023, "step": 386300 }, { "epoch": 36.06832819938393, "grad_norm": 0.04672456160187721, "learning_rate": 1.3957729355653231e-06, "loss": 0.0027, "step": 386400 }, { "epoch": 36.07766265285168, "grad_norm": 0.031040072441101074, "learning_rate": 1.3948377443187134e-06, "loss": 0.003, "step": 386500 }, { "epoch": 36.08699710631942, "grad_norm": 0.15606261789798737, "learning_rate": 1.3939025530721034e-06, "loss": 0.0033, "step": 386600 }, { "epoch": 36.096331559787174, "grad_norm": 1.8545202016830444, "learning_rate": 1.3929673618254934e-06, "loss": 0.0029, "step": 386700 }, { "epoch": 36.105666013254925, "grad_norm": 3.6372158527374268, "learning_rate": 1.3920321705788837e-06, "loss": 0.0033, "step": 386800 }, { "epoch": 36.11500046672268, "grad_norm": 0.6748552322387695, "learning_rate": 1.3910969793322735e-06, "loss": 0.0036, "step": 386900 }, { "epoch": 36.12433492019042, "grad_norm": 3.2367563247680664, "learning_rate": 1.3901617880856635e-06, "loss": 0.0024, "step": 387000 }, { "epoch": 36.13366937365817, "grad_norm": 2.3163106441497803, "learning_rate": 1.3892265968390536e-06, "loss": 0.0033, "step": 387100 }, { "epoch": 36.14300382712592, "grad_norm": 0.0035779662430286407, "learning_rate": 1.3882914055924438e-06, "loss": 0.0014, "step": 387200 }, { "epoch": 36.15233828059367, "grad_norm": 0.08480923622846603, "learning_rate": 1.3873562143458338e-06, "loss": 0.0023, "step": 387300 }, { "epoch": 36.16167273406142, "grad_norm": 0.00674587395042181, "learning_rate": 1.3864210230992239e-06, "loss": 0.0036, "step": 387400 }, { "epoch": 36.17100718752917, "grad_norm": 0.04585186019539833, "learning_rate": 1.3854858318526141e-06, "loss": 0.0028, "step": 387500 }, { "epoch": 36.18034164099692, "grad_norm": 0.8765541315078735, "learning_rate": 1.3845506406060041e-06, "loss": 0.005, "step": 387600 }, { "epoch": 36.189676094464666, "grad_norm": 0.042418982833623886, "learning_rate": 1.3836154493593942e-06, "loss": 0.0028, "step": 387700 }, { "epoch": 36.19901054793242, "grad_norm": 0.1400289535522461, "learning_rate": 1.382680258112784e-06, "loss": 0.003, "step": 387800 }, { "epoch": 36.20834500140017, "grad_norm": 0.32330355048179626, "learning_rate": 1.3817450668661742e-06, "loss": 0.0033, "step": 387900 }, { "epoch": 36.21767945486792, "grad_norm": 5.591104984283447, "learning_rate": 1.3808098756195643e-06, "loss": 0.0025, "step": 388000 }, { "epoch": 36.227013908335664, "grad_norm": 0.04258342459797859, "learning_rate": 1.3798746843729543e-06, "loss": 0.0031, "step": 388100 }, { "epoch": 36.236348361803415, "grad_norm": 0.0009412491344846785, "learning_rate": 1.3789394931263445e-06, "loss": 0.0035, "step": 388200 }, { "epoch": 36.24568281527117, "grad_norm": 0.04502703994512558, "learning_rate": 1.3780043018797346e-06, "loss": 0.0028, "step": 388300 }, { "epoch": 36.25501726873892, "grad_norm": 2.143099784851074, "learning_rate": 1.3770691106331246e-06, "loss": 0.0026, "step": 388400 }, { "epoch": 36.26435172220666, "grad_norm": 6.325109004974365, "learning_rate": 1.3761339193865148e-06, "loss": 0.003, "step": 388500 }, { "epoch": 36.27368617567441, "grad_norm": 0.3783659040927887, "learning_rate": 1.3751987281399049e-06, "loss": 0.0018, "step": 388600 }, { "epoch": 36.283020629142165, "grad_norm": 1.154820442199707, "learning_rate": 1.3742635368932947e-06, "loss": 0.0021, "step": 388700 }, { "epoch": 36.292355082609916, "grad_norm": 0.0006935182609595358, "learning_rate": 1.3733283456466847e-06, "loss": 0.0027, "step": 388800 }, { "epoch": 36.30168953607766, "grad_norm": 2.857196807861328, "learning_rate": 1.372393154400075e-06, "loss": 0.002, "step": 388900 }, { "epoch": 36.31102398954541, "grad_norm": 0.012682762928307056, "learning_rate": 1.371457963153465e-06, "loss": 0.0041, "step": 389000 }, { "epoch": 36.32035844301316, "grad_norm": 0.15112626552581787, "learning_rate": 1.370522771906855e-06, "loss": 0.0026, "step": 389100 }, { "epoch": 36.329692896480914, "grad_norm": 0.14497500658035278, "learning_rate": 1.369587580660245e-06, "loss": 0.0033, "step": 389200 }, { "epoch": 36.33902734994866, "grad_norm": 4.551285266876221, "learning_rate": 1.3686523894136353e-06, "loss": 0.0032, "step": 389300 }, { "epoch": 36.34836180341641, "grad_norm": 0.05212021991610527, "learning_rate": 1.3677171981670253e-06, "loss": 0.0022, "step": 389400 }, { "epoch": 36.35769625688416, "grad_norm": 0.23040683567523956, "learning_rate": 1.3667820069204154e-06, "loss": 0.0019, "step": 389500 }, { "epoch": 36.36703071035191, "grad_norm": 0.10259600728750229, "learning_rate": 1.3658468156738052e-06, "loss": 0.0025, "step": 389600 }, { "epoch": 36.376365163819656, "grad_norm": 1.44595468044281, "learning_rate": 1.3649116244271954e-06, "loss": 0.0037, "step": 389700 }, { "epoch": 36.38569961728741, "grad_norm": 0.02890961244702339, "learning_rate": 1.3639764331805855e-06, "loss": 0.0028, "step": 389800 }, { "epoch": 36.39503407075516, "grad_norm": 0.07578392326831818, "learning_rate": 1.3630412419339755e-06, "loss": 0.004, "step": 389900 }, { "epoch": 36.40436852422291, "grad_norm": 0.370111882686615, "learning_rate": 1.3621060506873657e-06, "loss": 0.0031, "step": 390000 }, { "epoch": 36.40436852422291, "eval_accuracy": 0.700536756126021, "eval_f1": 0.8283513582038661, "eval_loss": 0.3395070433616638, "eval_roc_auc": 0.9119982576582881, "eval_runtime": 146.0222, "eval_samples_per_second": 293.448, "eval_steps_per_second": 293.448, "step": 390000 }, { "epoch": 36.413702977690654, "grad_norm": 4.980401515960693, "learning_rate": 1.3611708594407558e-06, "loss": 0.0024, "step": 390100 }, { "epoch": 36.423037431158406, "grad_norm": 0.5566803216934204, "learning_rate": 1.3602356681941458e-06, "loss": 0.0024, "step": 390200 }, { "epoch": 36.43237188462616, "grad_norm": 0.30325576663017273, "learning_rate": 1.359300476947536e-06, "loss": 0.0034, "step": 390300 }, { "epoch": 36.4417063380939, "grad_norm": 2.9954562187194824, "learning_rate": 1.358365285700926e-06, "loss": 0.0021, "step": 390400 }, { "epoch": 36.45104079156165, "grad_norm": 2.843416929244995, "learning_rate": 1.3574300944543159e-06, "loss": 0.0036, "step": 390500 }, { "epoch": 36.460375245029404, "grad_norm": 0.5116952061653137, "learning_rate": 1.356494903207706e-06, "loss": 0.0023, "step": 390600 }, { "epoch": 36.469709698497155, "grad_norm": 0.006106688175350428, "learning_rate": 1.3555597119610962e-06, "loss": 0.0033, "step": 390700 }, { "epoch": 36.4790441519649, "grad_norm": 0.9717795252799988, "learning_rate": 1.3546245207144862e-06, "loss": 0.0034, "step": 390800 }, { "epoch": 36.48837860543265, "grad_norm": 0.12970545887947083, "learning_rate": 1.3536893294678762e-06, "loss": 0.0034, "step": 390900 }, { "epoch": 36.4977130589004, "grad_norm": 0.5004264712333679, "learning_rate": 1.3527541382212665e-06, "loss": 0.0037, "step": 391000 }, { "epoch": 36.50704751236815, "grad_norm": 2.0509731769561768, "learning_rate": 1.3518189469746565e-06, "loss": 0.0038, "step": 391100 }, { "epoch": 36.5163819658359, "grad_norm": 0.007547788787633181, "learning_rate": 1.3508837557280465e-06, "loss": 0.0021, "step": 391200 }, { "epoch": 36.52571641930365, "grad_norm": 0.11077240109443665, "learning_rate": 1.3499485644814368e-06, "loss": 0.007, "step": 391300 }, { "epoch": 36.5350508727714, "grad_norm": 0.4523663818836212, "learning_rate": 1.3490133732348266e-06, "loss": 0.0022, "step": 391400 }, { "epoch": 36.54438532623915, "grad_norm": 0.05871708691120148, "learning_rate": 1.3480781819882166e-06, "loss": 0.0026, "step": 391500 }, { "epoch": 36.553719779706896, "grad_norm": 0.0025561086367815733, "learning_rate": 1.3471429907416066e-06, "loss": 0.0018, "step": 391600 }, { "epoch": 36.56305423317465, "grad_norm": 0.000544212874956429, "learning_rate": 1.3462077994949969e-06, "loss": 0.0029, "step": 391700 }, { "epoch": 36.5723886866424, "grad_norm": 1.1879451274871826, "learning_rate": 1.345272608248387e-06, "loss": 0.0027, "step": 391800 }, { "epoch": 36.58172314011015, "grad_norm": 0.0028567807748913765, "learning_rate": 1.344337417001777e-06, "loss": 0.004, "step": 391900 }, { "epoch": 36.591057593577894, "grad_norm": 0.043377749621868134, "learning_rate": 1.3434022257551672e-06, "loss": 0.0028, "step": 392000 }, { "epoch": 36.600392047045645, "grad_norm": 4.2701921463012695, "learning_rate": 1.3424670345085572e-06, "loss": 0.0037, "step": 392100 }, { "epoch": 36.609726500513396, "grad_norm": 0.008841064758598804, "learning_rate": 1.3415318432619473e-06, "loss": 0.0022, "step": 392200 }, { "epoch": 36.61906095398115, "grad_norm": 0.06021866574883461, "learning_rate": 1.340596652015337e-06, "loss": 0.002, "step": 392300 }, { "epoch": 36.62839540744889, "grad_norm": 0.12472377717494965, "learning_rate": 1.3396614607687273e-06, "loss": 0.0037, "step": 392400 }, { "epoch": 36.63772986091664, "grad_norm": 0.11049499362707138, "learning_rate": 1.3387262695221173e-06, "loss": 0.0024, "step": 392500 }, { "epoch": 36.647064314384394, "grad_norm": 0.43478870391845703, "learning_rate": 1.3377910782755074e-06, "loss": 0.0034, "step": 392600 }, { "epoch": 36.656398767852146, "grad_norm": 0.049256861209869385, "learning_rate": 1.3368558870288976e-06, "loss": 0.0021, "step": 392700 }, { "epoch": 36.66573322131989, "grad_norm": 1.5348069667816162, "learning_rate": 1.3359206957822876e-06, "loss": 0.0048, "step": 392800 }, { "epoch": 36.67506767478764, "grad_norm": 0.29588955640792847, "learning_rate": 1.3349855045356777e-06, "loss": 0.0025, "step": 392900 }, { "epoch": 36.68440212825539, "grad_norm": 0.04219916835427284, "learning_rate": 1.3340503132890677e-06, "loss": 0.003, "step": 393000 }, { "epoch": 36.69373658172314, "grad_norm": 0.10411985963582993, "learning_rate": 1.333115122042458e-06, "loss": 0.0039, "step": 393100 }, { "epoch": 36.70307103519089, "grad_norm": 6.2748637199401855, "learning_rate": 1.3321799307958478e-06, "loss": 0.0024, "step": 393200 }, { "epoch": 36.71240548865864, "grad_norm": 0.3649994134902954, "learning_rate": 1.3312447395492378e-06, "loss": 0.0029, "step": 393300 }, { "epoch": 36.72173994212639, "grad_norm": 0.2523908317089081, "learning_rate": 1.3303095483026278e-06, "loss": 0.0037, "step": 393400 }, { "epoch": 36.731074395594135, "grad_norm": 0.04537256062030792, "learning_rate": 1.329374357056018e-06, "loss": 0.0037, "step": 393500 }, { "epoch": 36.740408849061886, "grad_norm": 1.4119977951049805, "learning_rate": 1.328439165809408e-06, "loss": 0.0026, "step": 393600 }, { "epoch": 36.74974330252964, "grad_norm": 0.35118287801742554, "learning_rate": 1.3275039745627981e-06, "loss": 0.0033, "step": 393700 }, { "epoch": 36.75907775599739, "grad_norm": 0.114301897585392, "learning_rate": 1.3265687833161884e-06, "loss": 0.0023, "step": 393800 }, { "epoch": 36.76841220946513, "grad_norm": 0.059295885264873505, "learning_rate": 1.3256335920695784e-06, "loss": 0.0021, "step": 393900 }, { "epoch": 36.777746662932884, "grad_norm": 4.907718181610107, "learning_rate": 1.3246984008229684e-06, "loss": 0.0024, "step": 394000 }, { "epoch": 36.787081116400635, "grad_norm": 0.04575495794415474, "learning_rate": 1.3237632095763583e-06, "loss": 0.0021, "step": 394100 }, { "epoch": 36.79641556986839, "grad_norm": 0.3550935387611389, "learning_rate": 1.3228280183297485e-06, "loss": 0.0023, "step": 394200 }, { "epoch": 36.80575002333613, "grad_norm": 0.10062967985868454, "learning_rate": 1.3218928270831385e-06, "loss": 0.0032, "step": 394300 }, { "epoch": 36.81508447680388, "grad_norm": 0.06100776791572571, "learning_rate": 1.3209576358365286e-06, "loss": 0.0031, "step": 394400 }, { "epoch": 36.824418930271634, "grad_norm": 0.14368194341659546, "learning_rate": 1.3200224445899188e-06, "loss": 0.0024, "step": 394500 }, { "epoch": 36.833753383739385, "grad_norm": 0.2178688496351242, "learning_rate": 1.3190872533433088e-06, "loss": 0.0035, "step": 394600 }, { "epoch": 36.84308783720713, "grad_norm": 0.1664189249277115, "learning_rate": 1.3181520620966989e-06, "loss": 0.0029, "step": 394700 }, { "epoch": 36.85242229067488, "grad_norm": 0.1370544284582138, "learning_rate": 1.3172168708500891e-06, "loss": 0.0039, "step": 394800 }, { "epoch": 36.86175674414263, "grad_norm": 0.013989516533911228, "learning_rate": 1.3162816796034791e-06, "loss": 0.0035, "step": 394900 }, { "epoch": 36.87109119761038, "grad_norm": 0.03354306146502495, "learning_rate": 1.315346488356869e-06, "loss": 0.0021, "step": 395000 }, { "epoch": 36.87109119761038, "eval_accuracy": 0.6970595099183198, "eval_f1": 0.8256834319010485, "eval_loss": 0.3432331085205078, "eval_roc_auc": 0.9112693915686547, "eval_runtime": 146.4492, "eval_samples_per_second": 292.593, "eval_steps_per_second": 292.593, "step": 395000 }, { "epoch": 36.88042565107813, "grad_norm": 0.26694273948669434, "learning_rate": 1.314411297110259e-06, "loss": 0.0024, "step": 395100 }, { "epoch": 36.88976010454588, "grad_norm": 0.0032359319739043713, "learning_rate": 1.3134761058636492e-06, "loss": 0.0023, "step": 395200 }, { "epoch": 36.89909455801363, "grad_norm": 0.0012230562279000878, "learning_rate": 1.3125409146170393e-06, "loss": 0.0014, "step": 395300 }, { "epoch": 36.90842901148138, "grad_norm": 2.522773027420044, "learning_rate": 1.3116057233704293e-06, "loss": 0.0039, "step": 395400 }, { "epoch": 36.917763464949125, "grad_norm": 0.0651460513472557, "learning_rate": 1.3106705321238195e-06, "loss": 0.0041, "step": 395500 }, { "epoch": 36.92709791841688, "grad_norm": 0.7865708470344543, "learning_rate": 1.3097353408772096e-06, "loss": 0.0024, "step": 395600 }, { "epoch": 36.93643237188463, "grad_norm": 8.100804328918457, "learning_rate": 1.3088001496305996e-06, "loss": 0.0038, "step": 395700 }, { "epoch": 36.94576682535238, "grad_norm": 0.059443943202495575, "learning_rate": 1.3078649583839898e-06, "loss": 0.0025, "step": 395800 }, { "epoch": 36.95510127882012, "grad_norm": 0.01029989868402481, "learning_rate": 1.3069297671373797e-06, "loss": 0.0022, "step": 395900 }, { "epoch": 36.964435732287875, "grad_norm": 0.11477924138307571, "learning_rate": 1.3059945758907697e-06, "loss": 0.0033, "step": 396000 }, { "epoch": 36.973770185755626, "grad_norm": 0.10749412328004837, "learning_rate": 1.3050593846441597e-06, "loss": 0.003, "step": 396100 }, { "epoch": 36.98310463922337, "grad_norm": 0.25121328234672546, "learning_rate": 1.30412419339755e-06, "loss": 0.0027, "step": 396200 }, { "epoch": 36.99243909269112, "grad_norm": 0.12134679406881332, "learning_rate": 1.30318900215094e-06, "loss": 0.0031, "step": 396300 }, { "epoch": 37.00177354615887, "grad_norm": 0.38787534832954407, "learning_rate": 1.30225381090433e-06, "loss": 0.0029, "step": 396400 }, { "epoch": 37.011107999626624, "grad_norm": 0.18803119659423828, "learning_rate": 1.3013186196577203e-06, "loss": 0.0026, "step": 396500 }, { "epoch": 37.02044245309437, "grad_norm": 0.016263354569673538, "learning_rate": 1.3003834284111103e-06, "loss": 0.0036, "step": 396600 }, { "epoch": 37.02977690656212, "grad_norm": 4.264042854309082, "learning_rate": 1.2994482371645003e-06, "loss": 0.0031, "step": 396700 }, { "epoch": 37.03911136002987, "grad_norm": 1.4437967538833618, "learning_rate": 1.2985130459178901e-06, "loss": 0.0019, "step": 396800 }, { "epoch": 37.04844581349762, "grad_norm": 0.1934203952550888, "learning_rate": 1.2975778546712802e-06, "loss": 0.0015, "step": 396900 }, { "epoch": 37.057780266965366, "grad_norm": 0.11826132982969284, "learning_rate": 1.2966426634246704e-06, "loss": 0.0018, "step": 397000 }, { "epoch": 37.06711472043312, "grad_norm": 1.321751356124878, "learning_rate": 1.2957074721780604e-06, "loss": 0.0022, "step": 397100 }, { "epoch": 37.07644917390087, "grad_norm": 0.11550211161375046, "learning_rate": 1.2947722809314505e-06, "loss": 0.0047, "step": 397200 }, { "epoch": 37.08578362736862, "grad_norm": 0.389514684677124, "learning_rate": 1.2938370896848407e-06, "loss": 0.0019, "step": 397300 }, { "epoch": 37.095118080836365, "grad_norm": 0.1073416993021965, "learning_rate": 1.2929018984382308e-06, "loss": 0.0027, "step": 397400 }, { "epoch": 37.104452534304116, "grad_norm": 0.0037626640405505896, "learning_rate": 1.2919667071916208e-06, "loss": 0.0017, "step": 397500 }, { "epoch": 37.11378698777187, "grad_norm": 0.01001680362969637, "learning_rate": 1.291031515945011e-06, "loss": 0.0036, "step": 397600 }, { "epoch": 37.12312144123962, "grad_norm": 7.481360912322998, "learning_rate": 1.2900963246984008e-06, "loss": 0.0032, "step": 397700 }, { "epoch": 37.13245589470736, "grad_norm": 2.0926270484924316, "learning_rate": 1.2891611334517909e-06, "loss": 0.003, "step": 397800 }, { "epoch": 37.141790348175114, "grad_norm": 0.0014568333281204104, "learning_rate": 1.288225942205181e-06, "loss": 0.0019, "step": 397900 }, { "epoch": 37.151124801642865, "grad_norm": 0.27776166796684265, "learning_rate": 1.2872907509585711e-06, "loss": 0.0018, "step": 398000 }, { "epoch": 37.16045925511062, "grad_norm": 4.180841445922852, "learning_rate": 1.2863555597119612e-06, "loss": 0.0031, "step": 398100 }, { "epoch": 37.16979370857836, "grad_norm": 0.06373821198940277, "learning_rate": 1.2854203684653512e-06, "loss": 0.0042, "step": 398200 }, { "epoch": 37.17912816204611, "grad_norm": 0.05243508145213127, "learning_rate": 1.2844851772187415e-06, "loss": 0.0025, "step": 398300 }, { "epoch": 37.18846261551386, "grad_norm": 1.3010802268981934, "learning_rate": 1.2835499859721315e-06, "loss": 0.0033, "step": 398400 }, { "epoch": 37.197797068981615, "grad_norm": 0.36730197072029114, "learning_rate": 1.2826147947255215e-06, "loss": 0.002, "step": 398500 }, { "epoch": 37.20713152244936, "grad_norm": 7.5453314781188965, "learning_rate": 1.2816796034789113e-06, "loss": 0.0026, "step": 398600 }, { "epoch": 37.21646597591711, "grad_norm": 1.7457879781723022, "learning_rate": 1.2807444122323016e-06, "loss": 0.0018, "step": 398700 }, { "epoch": 37.22580042938486, "grad_norm": 0.054592862725257874, "learning_rate": 1.2798092209856916e-06, "loss": 0.003, "step": 398800 }, { "epoch": 37.235134882852606, "grad_norm": 0.0410696379840374, "learning_rate": 1.2788740297390816e-06, "loss": 0.0024, "step": 398900 }, { "epoch": 37.24446933632036, "grad_norm": 0.0009878079872578382, "learning_rate": 1.2779388384924719e-06, "loss": 0.0012, "step": 399000 }, { "epoch": 37.25380378978811, "grad_norm": 0.007967378944158554, "learning_rate": 1.277003647245862e-06, "loss": 0.0038, "step": 399100 }, { "epoch": 37.26313824325586, "grad_norm": 0.013001919724047184, "learning_rate": 1.276068455999252e-06, "loss": 0.0021, "step": 399200 }, { "epoch": 37.272472696723604, "grad_norm": 0.24465271830558777, "learning_rate": 1.2751332647526422e-06, "loss": 0.0022, "step": 399300 }, { "epoch": 37.281807150191355, "grad_norm": 1.0939611196517944, "learning_rate": 1.2741980735060322e-06, "loss": 0.0024, "step": 399400 }, { "epoch": 37.291141603659106, "grad_norm": 2.6039273738861084, "learning_rate": 1.273262882259422e-06, "loss": 0.0026, "step": 399500 }, { "epoch": 37.30047605712686, "grad_norm": 0.015270901843905449, "learning_rate": 1.272327691012812e-06, "loss": 0.003, "step": 399600 }, { "epoch": 37.3098105105946, "grad_norm": 3.2714245319366455, "learning_rate": 1.2713924997662023e-06, "loss": 0.002, "step": 399700 }, { "epoch": 37.31914496406235, "grad_norm": 0.008654295466840267, "learning_rate": 1.2704573085195923e-06, "loss": 0.0036, "step": 399800 }, { "epoch": 37.328479417530104, "grad_norm": 5.458124160766602, "learning_rate": 1.2695221172729824e-06, "loss": 0.0039, "step": 399900 }, { "epoch": 37.337813870997856, "grad_norm": 1.0217320919036865, "learning_rate": 1.2685869260263726e-06, "loss": 0.0024, "step": 400000 }, { "epoch": 37.337813870997856, "eval_accuracy": 0.7017269544924154, "eval_f1": 0.827615810498283, "eval_loss": 0.34201958775520325, "eval_roc_auc": 0.9108797815913422, "eval_runtime": 145.9778, "eval_samples_per_second": 293.538, "eval_steps_per_second": 293.538, "step": 400000 }, { "epoch": 37.3471483244656, "grad_norm": 0.06382410228252411, "learning_rate": 1.2676517347797626e-06, "loss": 0.0032, "step": 400100 }, { "epoch": 37.35648277793335, "grad_norm": 0.07355760037899017, "learning_rate": 1.2667165435331527e-06, "loss": 0.0034, "step": 400200 }, { "epoch": 37.3658172314011, "grad_norm": 0.33536916971206665, "learning_rate": 1.265781352286543e-06, "loss": 0.0019, "step": 400300 }, { "epoch": 37.375151684868854, "grad_norm": 0.29095345735549927, "learning_rate": 1.264846161039933e-06, "loss": 0.0022, "step": 400400 }, { "epoch": 37.3844861383366, "grad_norm": 0.5944725275039673, "learning_rate": 1.2639109697933228e-06, "loss": 0.003, "step": 400500 }, { "epoch": 37.39382059180435, "grad_norm": 0.09711537510156631, "learning_rate": 1.2629757785467128e-06, "loss": 0.0041, "step": 400600 }, { "epoch": 37.4031550452721, "grad_norm": 0.057198408991098404, "learning_rate": 1.2620405873001028e-06, "loss": 0.0017, "step": 400700 }, { "epoch": 37.41248949873985, "grad_norm": 0.005650304723531008, "learning_rate": 1.261105396053493e-06, "loss": 0.0033, "step": 400800 }, { "epoch": 37.421823952207596, "grad_norm": 0.18480254709720612, "learning_rate": 1.260170204806883e-06, "loss": 0.003, "step": 400900 }, { "epoch": 37.43115840567535, "grad_norm": 0.0004619262181222439, "learning_rate": 1.2592350135602731e-06, "loss": 0.0027, "step": 401000 }, { "epoch": 37.4404928591431, "grad_norm": 3.8844845294952393, "learning_rate": 1.2582998223136634e-06, "loss": 0.0026, "step": 401100 }, { "epoch": 37.44982731261085, "grad_norm": 0.022511711344122887, "learning_rate": 1.2573646310670534e-06, "loss": 0.0017, "step": 401200 }, { "epoch": 37.459161766078594, "grad_norm": 0.02594054490327835, "learning_rate": 1.2564294398204434e-06, "loss": 0.0031, "step": 401300 }, { "epoch": 37.468496219546346, "grad_norm": 0.03686030954122543, "learning_rate": 1.2554942485738332e-06, "loss": 0.0026, "step": 401400 }, { "epoch": 37.4778306730141, "grad_norm": 0.0107142748311162, "learning_rate": 1.2545590573272235e-06, "loss": 0.0036, "step": 401500 }, { "epoch": 37.48716512648184, "grad_norm": 0.10539250820875168, "learning_rate": 1.2536238660806135e-06, "loss": 0.0026, "step": 401600 }, { "epoch": 37.49649957994959, "grad_norm": 0.001194833079352975, "learning_rate": 1.2526886748340036e-06, "loss": 0.0016, "step": 401700 }, { "epoch": 37.505834033417344, "grad_norm": 0.0029820192139595747, "learning_rate": 1.2517534835873938e-06, "loss": 0.0032, "step": 401800 }, { "epoch": 37.515168486885095, "grad_norm": 0.41167914867401123, "learning_rate": 1.2508182923407838e-06, "loss": 0.002, "step": 401900 }, { "epoch": 37.52450294035284, "grad_norm": 0.9985921382904053, "learning_rate": 1.2498831010941739e-06, "loss": 0.0023, "step": 402000 }, { "epoch": 37.53383739382059, "grad_norm": 0.10556675493717194, "learning_rate": 1.2489479098475639e-06, "loss": 0.0033, "step": 402100 }, { "epoch": 37.54317184728834, "grad_norm": 0.10472029447555542, "learning_rate": 1.248012718600954e-06, "loss": 0.0025, "step": 402200 }, { "epoch": 37.55250630075609, "grad_norm": 0.06168674677610397, "learning_rate": 1.2470775273543442e-06, "loss": 0.0026, "step": 402300 }, { "epoch": 37.56184075422384, "grad_norm": 0.3578072786331177, "learning_rate": 1.2461423361077342e-06, "loss": 0.0018, "step": 402400 }, { "epoch": 37.57117520769159, "grad_norm": 0.2870294451713562, "learning_rate": 1.2452071448611242e-06, "loss": 0.0025, "step": 402500 }, { "epoch": 37.58050966115934, "grad_norm": 1.855111002922058, "learning_rate": 1.2442719536145143e-06, "loss": 0.0019, "step": 402600 }, { "epoch": 37.58984411462709, "grad_norm": 0.011662672273814678, "learning_rate": 1.2433367623679043e-06, "loss": 0.0025, "step": 402700 }, { "epoch": 37.599178568094835, "grad_norm": 2.4253878593444824, "learning_rate": 1.2424015711212945e-06, "loss": 0.003, "step": 402800 }, { "epoch": 37.60851302156259, "grad_norm": 0.04312540218234062, "learning_rate": 1.2414663798746843e-06, "loss": 0.0028, "step": 402900 }, { "epoch": 37.61784747503034, "grad_norm": 0.0873066708445549, "learning_rate": 1.2405311886280746e-06, "loss": 0.0045, "step": 403000 }, { "epoch": 37.62718192849809, "grad_norm": 5.0774760246276855, "learning_rate": 1.2395959973814646e-06, "loss": 0.0024, "step": 403100 }, { "epoch": 37.63651638196583, "grad_norm": 0.011238767765462399, "learning_rate": 1.2386608061348546e-06, "loss": 0.0022, "step": 403200 }, { "epoch": 37.645850835433585, "grad_norm": 0.0076649924740195274, "learning_rate": 1.2377256148882449e-06, "loss": 0.0026, "step": 403300 }, { "epoch": 37.655185288901336, "grad_norm": 0.02123410440981388, "learning_rate": 1.2367904236416347e-06, "loss": 0.0037, "step": 403400 }, { "epoch": 37.66451974236909, "grad_norm": 0.8971595168113708, "learning_rate": 1.235855232395025e-06, "loss": 0.002, "step": 403500 }, { "epoch": 37.67385419583683, "grad_norm": 0.4493635594844818, "learning_rate": 1.234920041148415e-06, "loss": 0.0021, "step": 403600 }, { "epoch": 37.68318864930458, "grad_norm": 0.14857614040374756, "learning_rate": 1.233984849901805e-06, "loss": 0.0029, "step": 403700 }, { "epoch": 37.692523102772334, "grad_norm": 0.011085246689617634, "learning_rate": 1.233049658655195e-06, "loss": 0.0024, "step": 403800 }, { "epoch": 37.701857556240086, "grad_norm": 5.207937717437744, "learning_rate": 1.232114467408585e-06, "loss": 0.0029, "step": 403900 }, { "epoch": 37.71119200970783, "grad_norm": 0.07659745961427689, "learning_rate": 1.2311792761619753e-06, "loss": 0.0038, "step": 404000 }, { "epoch": 37.72052646317558, "grad_norm": 0.07585028558969498, "learning_rate": 1.2302440849153653e-06, "loss": 0.0028, "step": 404100 }, { "epoch": 37.72986091664333, "grad_norm": 0.3753564655780792, "learning_rate": 1.2293088936687554e-06, "loss": 0.004, "step": 404200 }, { "epoch": 37.73919537011108, "grad_norm": 0.009945419616997242, "learning_rate": 1.2283737024221454e-06, "loss": 0.0039, "step": 404300 }, { "epoch": 37.74852982357883, "grad_norm": 0.5873762369155884, "learning_rate": 1.2274385111755354e-06, "loss": 0.0035, "step": 404400 }, { "epoch": 37.75786427704658, "grad_norm": 0.3939467966556549, "learning_rate": 1.2265033199289255e-06, "loss": 0.0021, "step": 404500 }, { "epoch": 37.76719873051433, "grad_norm": 0.0066550434567034245, "learning_rate": 1.2255681286823157e-06, "loss": 0.0027, "step": 404600 }, { "epoch": 37.776533183982075, "grad_norm": 5.63255500793457, "learning_rate": 1.2246329374357057e-06, "loss": 0.0025, "step": 404700 }, { "epoch": 37.785867637449826, "grad_norm": 0.4593789875507355, "learning_rate": 1.2236977461890958e-06, "loss": 0.0026, "step": 404800 }, { "epoch": 37.79520209091758, "grad_norm": 0.022335093468427658, "learning_rate": 1.2227625549424858e-06, "loss": 0.0024, "step": 404900 }, { "epoch": 37.80453654438533, "grad_norm": 0.27143770456314087, "learning_rate": 1.2218273636958758e-06, "loss": 0.0027, "step": 405000 }, { "epoch": 37.80453654438533, "eval_accuracy": 0.6985064177362894, "eval_f1": 0.8267005632993496, "eval_loss": 0.34803053736686707, "eval_roc_auc": 0.9125193142241989, "eval_runtime": 145.3321, "eval_samples_per_second": 294.842, "eval_steps_per_second": 294.842, "step": 405000 }, { "epoch": 37.81387099785307, "grad_norm": 0.05374858155846596, "learning_rate": 1.220892172449266e-06, "loss": 0.003, "step": 405100 }, { "epoch": 37.823205451320824, "grad_norm": 0.9291340708732605, "learning_rate": 1.219956981202656e-06, "loss": 0.0023, "step": 405200 }, { "epoch": 37.832539904788575, "grad_norm": 1.9136075973510742, "learning_rate": 1.2190217899560461e-06, "loss": 0.0021, "step": 405300 }, { "epoch": 37.84187435825633, "grad_norm": 0.02880449965596199, "learning_rate": 1.2180865987094362e-06, "loss": 0.0017, "step": 405400 }, { "epoch": 37.85120881172407, "grad_norm": 0.0018058293499052525, "learning_rate": 1.2171514074628262e-06, "loss": 0.0019, "step": 405500 }, { "epoch": 37.86054326519182, "grad_norm": 0.016214709728956223, "learning_rate": 1.2162162162162164e-06, "loss": 0.0022, "step": 405600 }, { "epoch": 37.86987771865957, "grad_norm": 0.0020329332910478115, "learning_rate": 1.2152810249696063e-06, "loss": 0.0018, "step": 405700 }, { "epoch": 37.879212172127325, "grad_norm": 0.14503751695156097, "learning_rate": 1.2143458337229965e-06, "loss": 0.0035, "step": 405800 }, { "epoch": 37.88854662559507, "grad_norm": 0.26547160744667053, "learning_rate": 1.2134106424763865e-06, "loss": 0.0026, "step": 405900 }, { "epoch": 37.89788107906282, "grad_norm": 0.01896641030907631, "learning_rate": 1.2124754512297766e-06, "loss": 0.0024, "step": 406000 }, { "epoch": 37.90721553253057, "grad_norm": 0.05965280532836914, "learning_rate": 1.2115402599831666e-06, "loss": 0.0021, "step": 406100 }, { "epoch": 37.91654998599832, "grad_norm": 0.04015364125370979, "learning_rate": 1.2106050687365566e-06, "loss": 0.002, "step": 406200 }, { "epoch": 37.92588443946607, "grad_norm": 0.6366254091262817, "learning_rate": 1.2096698774899469e-06, "loss": 0.0036, "step": 406300 }, { "epoch": 37.93521889293382, "grad_norm": 0.01782837137579918, "learning_rate": 1.208734686243337e-06, "loss": 0.0014, "step": 406400 }, { "epoch": 37.94455334640157, "grad_norm": 8.117307662963867, "learning_rate": 1.207799494996727e-06, "loss": 0.0035, "step": 406500 }, { "epoch": 37.95388779986932, "grad_norm": 0.007678859401494265, "learning_rate": 1.206864303750117e-06, "loss": 0.0025, "step": 406600 }, { "epoch": 37.963222253337065, "grad_norm": 0.0034046522341668606, "learning_rate": 1.205929112503507e-06, "loss": 0.0029, "step": 406700 }, { "epoch": 37.972556706804816, "grad_norm": 6.338762283325195, "learning_rate": 1.2049939212568972e-06, "loss": 0.0029, "step": 406800 }, { "epoch": 37.98189116027257, "grad_norm": 0.032650239765644073, "learning_rate": 1.2040587300102873e-06, "loss": 0.0033, "step": 406900 }, { "epoch": 37.99122561374031, "grad_norm": 0.10023938864469528, "learning_rate": 1.2031235387636773e-06, "loss": 0.0043, "step": 407000 }, { "epoch": 38.00056006720806, "grad_norm": 1.3646725416183472, "learning_rate": 1.2021883475170673e-06, "loss": 0.0037, "step": 407100 }, { "epoch": 38.009894520675815, "grad_norm": 3.720980644226074, "learning_rate": 1.2012531562704574e-06, "loss": 0.004, "step": 407200 }, { "epoch": 38.019228974143566, "grad_norm": 7.002684116363525, "learning_rate": 1.2003179650238476e-06, "loss": 0.0019, "step": 407300 }, { "epoch": 38.02856342761131, "grad_norm": 3.462742328643799, "learning_rate": 1.1993827737772376e-06, "loss": 0.0024, "step": 407400 }, { "epoch": 38.03789788107906, "grad_norm": 0.031536784023046494, "learning_rate": 1.1984475825306277e-06, "loss": 0.003, "step": 407500 }, { "epoch": 38.04723233454681, "grad_norm": 0.002347042551264167, "learning_rate": 1.1975123912840177e-06, "loss": 0.0027, "step": 407600 }, { "epoch": 38.056566788014564, "grad_norm": 0.0995447039604187, "learning_rate": 1.1965772000374077e-06, "loss": 0.002, "step": 407700 }, { "epoch": 38.06590124148231, "grad_norm": 0.0369919054210186, "learning_rate": 1.195642008790798e-06, "loss": 0.0023, "step": 407800 }, { "epoch": 38.07523569495006, "grad_norm": 0.03724168986082077, "learning_rate": 1.1947068175441878e-06, "loss": 0.0021, "step": 407900 }, { "epoch": 38.08457014841781, "grad_norm": 0.002654020907357335, "learning_rate": 1.193771626297578e-06, "loss": 0.0023, "step": 408000 }, { "epoch": 38.09390460188556, "grad_norm": 0.44839516282081604, "learning_rate": 1.192836435050968e-06, "loss": 0.0022, "step": 408100 }, { "epoch": 38.103239055353306, "grad_norm": 1.7285419702529907, "learning_rate": 1.191901243804358e-06, "loss": 0.002, "step": 408200 }, { "epoch": 38.11257350882106, "grad_norm": 5.041800498962402, "learning_rate": 1.1909660525577481e-06, "loss": 0.0024, "step": 408300 }, { "epoch": 38.12190796228881, "grad_norm": 1.6561737060546875, "learning_rate": 1.1900308613111381e-06, "loss": 0.0031, "step": 408400 }, { "epoch": 38.13124241575656, "grad_norm": 0.19983725249767303, "learning_rate": 1.1890956700645282e-06, "loss": 0.0019, "step": 408500 }, { "epoch": 38.140576869224304, "grad_norm": 0.0011417688801884651, "learning_rate": 1.1881604788179184e-06, "loss": 0.004, "step": 408600 }, { "epoch": 38.149911322692056, "grad_norm": 0.6275866627693176, "learning_rate": 1.1872252875713085e-06, "loss": 0.0017, "step": 408700 }, { "epoch": 38.15924577615981, "grad_norm": 0.010230201296508312, "learning_rate": 1.1862900963246985e-06, "loss": 0.0015, "step": 408800 }, { "epoch": 38.16858022962756, "grad_norm": 1.0574774742126465, "learning_rate": 1.1853549050780885e-06, "loss": 0.0025, "step": 408900 }, { "epoch": 38.1779146830953, "grad_norm": 0.046486616134643555, "learning_rate": 1.1844197138314785e-06, "loss": 0.0027, "step": 409000 }, { "epoch": 38.187249136563054, "grad_norm": 0.010484246537089348, "learning_rate": 1.1834845225848688e-06, "loss": 0.0036, "step": 409100 }, { "epoch": 38.196583590030805, "grad_norm": 0.0006712300237268209, "learning_rate": 1.1825493313382588e-06, "loss": 0.0028, "step": 409200 }, { "epoch": 38.205918043498556, "grad_norm": 0.05887536704540253, "learning_rate": 1.1816141400916488e-06, "loss": 0.002, "step": 409300 }, { "epoch": 38.2152524969663, "grad_norm": 2.891683578491211, "learning_rate": 1.1806789488450389e-06, "loss": 0.0014, "step": 409400 }, { "epoch": 38.22458695043405, "grad_norm": 0.014088544994592667, "learning_rate": 1.179743757598429e-06, "loss": 0.0025, "step": 409500 }, { "epoch": 38.2339214039018, "grad_norm": 0.2805967330932617, "learning_rate": 1.1788085663518191e-06, "loss": 0.0027, "step": 409600 }, { "epoch": 38.243255857369554, "grad_norm": 0.3840297758579254, "learning_rate": 1.177873375105209e-06, "loss": 0.0031, "step": 409700 }, { "epoch": 38.2525903108373, "grad_norm": 0.010899716056883335, "learning_rate": 1.1769381838585992e-06, "loss": 0.0027, "step": 409800 }, { "epoch": 38.26192476430505, "grad_norm": 0.03330165147781372, "learning_rate": 1.1760029926119892e-06, "loss": 0.0029, "step": 409900 }, { "epoch": 38.2712592177728, "grad_norm": 1.006510853767395, "learning_rate": 1.1750678013653793e-06, "loss": 0.0012, "step": 410000 }, { "epoch": 38.2712592177728, "eval_accuracy": 0.7008868144690782, "eval_f1": 0.826432785503582, "eval_loss": 0.346250981092453, "eval_roc_auc": 0.9094426886915375, "eval_runtime": 145.9651, "eval_samples_per_second": 293.563, "eval_steps_per_second": 293.563, "step": 410000 }, { "epoch": 38.280593671240545, "grad_norm": 0.2214205116033554, "learning_rate": 1.1741326101187695e-06, "loss": 0.0018, "step": 410100 }, { "epoch": 38.2899281247083, "grad_norm": 0.4349832832813263, "learning_rate": 1.1731974188721593e-06, "loss": 0.003, "step": 410200 }, { "epoch": 38.29926257817605, "grad_norm": 0.02776012010872364, "learning_rate": 1.1722622276255496e-06, "loss": 0.0037, "step": 410300 }, { "epoch": 38.3085970316438, "grad_norm": 0.0032148556783795357, "learning_rate": 1.1713270363789396e-06, "loss": 0.0029, "step": 410400 }, { "epoch": 38.317931485111544, "grad_norm": 0.0810672789812088, "learning_rate": 1.1703918451323296e-06, "loss": 0.002, "step": 410500 }, { "epoch": 38.327265938579295, "grad_norm": 0.34009966254234314, "learning_rate": 1.1694566538857197e-06, "loss": 0.002, "step": 410600 }, { "epoch": 38.336600392047046, "grad_norm": 0.0016459883190691471, "learning_rate": 1.1685214626391097e-06, "loss": 0.0024, "step": 410700 }, { "epoch": 38.3459348455148, "grad_norm": 1.6556181907653809, "learning_rate": 1.1675862713925e-06, "loss": 0.002, "step": 410800 }, { "epoch": 38.35526929898254, "grad_norm": 0.0432024821639061, "learning_rate": 1.16665108014589e-06, "loss": 0.0028, "step": 410900 }, { "epoch": 38.36460375245029, "grad_norm": 0.08405964076519012, "learning_rate": 1.16571588889928e-06, "loss": 0.0029, "step": 411000 }, { "epoch": 38.373938205918044, "grad_norm": 0.047205302864313126, "learning_rate": 1.16478069765267e-06, "loss": 0.002, "step": 411100 }, { "epoch": 38.383272659385796, "grad_norm": 2.8466310501098633, "learning_rate": 1.16384550640606e-06, "loss": 0.0032, "step": 411200 }, { "epoch": 38.39260711285354, "grad_norm": 1.4765931367874146, "learning_rate": 1.1629103151594503e-06, "loss": 0.0027, "step": 411300 }, { "epoch": 38.40194156632129, "grad_norm": 0.04120161384344101, "learning_rate": 1.1619751239128403e-06, "loss": 0.0021, "step": 411400 }, { "epoch": 38.41127601978904, "grad_norm": 3.1191728115081787, "learning_rate": 1.1610399326662304e-06, "loss": 0.0025, "step": 411500 }, { "epoch": 38.420610473256794, "grad_norm": 0.00884980708360672, "learning_rate": 1.1601047414196204e-06, "loss": 0.0023, "step": 411600 }, { "epoch": 38.42994492672454, "grad_norm": 0.07466147094964981, "learning_rate": 1.1591695501730104e-06, "loss": 0.0021, "step": 411700 }, { "epoch": 38.43927938019229, "grad_norm": 0.00031288486206904054, "learning_rate": 1.1582343589264007e-06, "loss": 0.0031, "step": 411800 }, { "epoch": 38.44861383366004, "grad_norm": 0.22128795087337494, "learning_rate": 1.1572991676797907e-06, "loss": 0.0017, "step": 411900 }, { "epoch": 38.45794828712779, "grad_norm": 0.13762858510017395, "learning_rate": 1.1563639764331807e-06, "loss": 0.0029, "step": 412000 }, { "epoch": 38.467282740595536, "grad_norm": 3.613652229309082, "learning_rate": 1.1554287851865708e-06, "loss": 0.0029, "step": 412100 }, { "epoch": 38.47661719406329, "grad_norm": 0.007080894894897938, "learning_rate": 1.1544935939399608e-06, "loss": 0.0028, "step": 412200 }, { "epoch": 38.48595164753104, "grad_norm": 0.0029896784108132124, "learning_rate": 1.1535584026933508e-06, "loss": 0.0022, "step": 412300 }, { "epoch": 38.49528610099879, "grad_norm": 5.359054088592529, "learning_rate": 1.1526232114467409e-06, "loss": 0.003, "step": 412400 }, { "epoch": 38.504620554466534, "grad_norm": 5.774864196777344, "learning_rate": 1.1516880202001309e-06, "loss": 0.0021, "step": 412500 }, { "epoch": 38.513955007934285, "grad_norm": 0.02201276458799839, "learning_rate": 1.1507528289535211e-06, "loss": 0.0033, "step": 412600 }, { "epoch": 38.52328946140204, "grad_norm": 0.27247971296310425, "learning_rate": 1.1498176377069112e-06, "loss": 0.0018, "step": 412700 }, { "epoch": 38.53262391486978, "grad_norm": 0.0015010180650278926, "learning_rate": 1.1488824464603012e-06, "loss": 0.0023, "step": 412800 }, { "epoch": 38.54195836833753, "grad_norm": 0.05704183503985405, "learning_rate": 1.1479472552136912e-06, "loss": 0.0024, "step": 412900 }, { "epoch": 38.55129282180528, "grad_norm": 0.37836724519729614, "learning_rate": 1.1470120639670813e-06, "loss": 0.0019, "step": 413000 }, { "epoch": 38.560627275273035, "grad_norm": 0.2436804473400116, "learning_rate": 1.1460768727204715e-06, "loss": 0.0031, "step": 413100 }, { "epoch": 38.56996172874078, "grad_norm": 1.1694777011871338, "learning_rate": 1.1451416814738615e-06, "loss": 0.002, "step": 413200 }, { "epoch": 38.57929618220853, "grad_norm": 0.08072256296873093, "learning_rate": 1.1442064902272516e-06, "loss": 0.002, "step": 413300 }, { "epoch": 38.58863063567628, "grad_norm": 0.3967907726764679, "learning_rate": 1.1432712989806416e-06, "loss": 0.0027, "step": 413400 }, { "epoch": 38.59796508914403, "grad_norm": 0.0221639983355999, "learning_rate": 1.1423361077340316e-06, "loss": 0.0031, "step": 413500 }, { "epoch": 38.60729954261178, "grad_norm": 0.02426373027265072, "learning_rate": 1.1414009164874219e-06, "loss": 0.0018, "step": 413600 }, { "epoch": 38.61663399607953, "grad_norm": 0.016076913103461266, "learning_rate": 1.1404657252408119e-06, "loss": 0.0018, "step": 413700 }, { "epoch": 38.62596844954728, "grad_norm": 0.2609350085258484, "learning_rate": 1.139530533994202e-06, "loss": 0.0029, "step": 413800 }, { "epoch": 38.63530290301503, "grad_norm": 9.425407409667969, "learning_rate": 1.138595342747592e-06, "loss": 0.0024, "step": 413900 }, { "epoch": 38.644637356482775, "grad_norm": 5.274183750152588, "learning_rate": 1.137660151500982e-06, "loss": 0.0036, "step": 414000 }, { "epoch": 38.65397180995053, "grad_norm": 0.16067643463611603, "learning_rate": 1.1367249602543722e-06, "loss": 0.0029, "step": 414100 }, { "epoch": 38.66330626341828, "grad_norm": 0.029708359390497208, "learning_rate": 1.1357897690077623e-06, "loss": 0.0018, "step": 414200 }, { "epoch": 38.67264071688603, "grad_norm": 6.087866306304932, "learning_rate": 1.1348545777611523e-06, "loss": 0.0025, "step": 414300 }, { "epoch": 38.68197517035377, "grad_norm": 0.46657830476760864, "learning_rate": 1.1339193865145423e-06, "loss": 0.0028, "step": 414400 }, { "epoch": 38.691309623821525, "grad_norm": 0.0026466415729373693, "learning_rate": 1.1329841952679323e-06, "loss": 0.0025, "step": 414500 }, { "epoch": 38.700644077289276, "grad_norm": 0.123350590467453, "learning_rate": 1.1320490040213226e-06, "loss": 0.0032, "step": 414600 }, { "epoch": 38.70997853075703, "grad_norm": 4.515524864196777, "learning_rate": 1.1311138127747124e-06, "loss": 0.004, "step": 414700 }, { "epoch": 38.71931298422477, "grad_norm": 0.006280920002609491, "learning_rate": 1.1301786215281026e-06, "loss": 0.0027, "step": 414800 }, { "epoch": 38.72864743769252, "grad_norm": 0.06085117533802986, "learning_rate": 1.1292434302814927e-06, "loss": 0.0041, "step": 414900 }, { "epoch": 38.737981891160274, "grad_norm": 0.2343282848596573, "learning_rate": 1.1283082390348827e-06, "loss": 0.0023, "step": 415000 }, { "epoch": 38.737981891160274, "eval_accuracy": 0.6986231038506417, "eval_f1": 0.827365650897113, "eval_loss": 0.3499149978160858, "eval_roc_auc": 0.9125904013510263, "eval_runtime": 146.6119, "eval_samples_per_second": 292.268, "eval_steps_per_second": 292.268, "step": 415000 }, { "epoch": 38.747316344628025, "grad_norm": 0.30839332938194275, "learning_rate": 1.127373047788273e-06, "loss": 0.002, "step": 415100 }, { "epoch": 38.75665079809577, "grad_norm": 0.3632122278213501, "learning_rate": 1.1264378565416628e-06, "loss": 0.0028, "step": 415200 }, { "epoch": 38.76598525156352, "grad_norm": 0.04339781403541565, "learning_rate": 1.125502665295053e-06, "loss": 0.0021, "step": 415300 }, { "epoch": 38.77531970503127, "grad_norm": 0.08212874829769135, "learning_rate": 1.124567474048443e-06, "loss": 0.0026, "step": 415400 }, { "epoch": 38.78465415849902, "grad_norm": 0.021973837167024612, "learning_rate": 1.123632282801833e-06, "loss": 0.0017, "step": 415500 }, { "epoch": 38.79398861196677, "grad_norm": 1.0812125205993652, "learning_rate": 1.1226970915552231e-06, "loss": 0.0033, "step": 415600 }, { "epoch": 38.80332306543452, "grad_norm": 5.801876544952393, "learning_rate": 1.1217619003086131e-06, "loss": 0.0032, "step": 415700 }, { "epoch": 38.81265751890227, "grad_norm": 0.00997558981180191, "learning_rate": 1.1208267090620034e-06, "loss": 0.0035, "step": 415800 }, { "epoch": 38.821991972370014, "grad_norm": 1.2368406057357788, "learning_rate": 1.1198915178153934e-06, "loss": 0.0033, "step": 415900 }, { "epoch": 38.831326425837766, "grad_norm": 0.0017674629343673587, "learning_rate": 1.1189563265687834e-06, "loss": 0.0021, "step": 416000 }, { "epoch": 38.84066087930552, "grad_norm": 0.008164430037140846, "learning_rate": 1.1180211353221735e-06, "loss": 0.0028, "step": 416100 }, { "epoch": 38.84999533277327, "grad_norm": 0.06175462156534195, "learning_rate": 1.1170859440755635e-06, "loss": 0.0022, "step": 416200 }, { "epoch": 38.85932978624101, "grad_norm": 0.025002023205161095, "learning_rate": 1.1161507528289535e-06, "loss": 0.0032, "step": 416300 }, { "epoch": 38.868664239708764, "grad_norm": 0.37196484208106995, "learning_rate": 1.1152155615823438e-06, "loss": 0.0021, "step": 416400 }, { "epoch": 38.877998693176515, "grad_norm": 0.5887548923492432, "learning_rate": 1.1142803703357336e-06, "loss": 0.0023, "step": 416500 }, { "epoch": 38.887333146644266, "grad_norm": 1.6008961200714111, "learning_rate": 1.1133451790891238e-06, "loss": 0.0022, "step": 416600 }, { "epoch": 38.89666760011201, "grad_norm": 0.29365110397338867, "learning_rate": 1.1124099878425139e-06, "loss": 0.0028, "step": 416700 }, { "epoch": 38.90600205357976, "grad_norm": 0.39143070578575134, "learning_rate": 1.111474796595904e-06, "loss": 0.0022, "step": 416800 }, { "epoch": 38.91533650704751, "grad_norm": 0.0037178336642682552, "learning_rate": 1.1105396053492941e-06, "loss": 0.0022, "step": 416900 }, { "epoch": 38.924670960515265, "grad_norm": 1.4775102138519287, "learning_rate": 1.109604414102684e-06, "loss": 0.0024, "step": 417000 }, { "epoch": 38.93400541398301, "grad_norm": 0.8145736455917358, "learning_rate": 1.1086692228560742e-06, "loss": 0.0023, "step": 417100 }, { "epoch": 38.94333986745076, "grad_norm": 0.007466132286936045, "learning_rate": 1.1077340316094642e-06, "loss": 0.0026, "step": 417200 }, { "epoch": 38.95267432091851, "grad_norm": 0.6312122344970703, "learning_rate": 1.1067988403628543e-06, "loss": 0.004, "step": 417300 }, { "epoch": 38.96200877438626, "grad_norm": 0.012378214858472347, "learning_rate": 1.1058636491162443e-06, "loss": 0.0023, "step": 417400 }, { "epoch": 38.97134322785401, "grad_norm": 0.025709180161356926, "learning_rate": 1.1049284578696343e-06, "loss": 0.0023, "step": 417500 }, { "epoch": 38.98067768132176, "grad_norm": 0.21776047348976135, "learning_rate": 1.1039932666230246e-06, "loss": 0.0028, "step": 417600 }, { "epoch": 38.99001213478951, "grad_norm": 0.045823466032743454, "learning_rate": 1.1030580753764146e-06, "loss": 0.0026, "step": 417700 }, { "epoch": 38.99934658825726, "grad_norm": 0.060410771518945694, "learning_rate": 1.1021228841298046e-06, "loss": 0.0017, "step": 417800 }, { "epoch": 39.008681041725005, "grad_norm": 4.956025123596191, "learning_rate": 1.1011876928831947e-06, "loss": 0.0026, "step": 417900 }, { "epoch": 39.018015495192756, "grad_norm": 0.0036168380174785852, "learning_rate": 1.1002525016365847e-06, "loss": 0.0011, "step": 418000 }, { "epoch": 39.02734994866051, "grad_norm": 0.0009174463339149952, "learning_rate": 1.099317310389975e-06, "loss": 0.0022, "step": 418100 }, { "epoch": 39.03668440212826, "grad_norm": 0.06328033655881882, "learning_rate": 1.098382119143365e-06, "loss": 0.0019, "step": 418200 }, { "epoch": 39.046018855596, "grad_norm": 1.5870351791381836, "learning_rate": 1.097446927896755e-06, "loss": 0.0042, "step": 418300 }, { "epoch": 39.055353309063754, "grad_norm": 0.019261913374066353, "learning_rate": 1.096511736650145e-06, "loss": 0.0026, "step": 418400 }, { "epoch": 39.064687762531506, "grad_norm": 0.05717376992106438, "learning_rate": 1.095576545403535e-06, "loss": 0.003, "step": 418500 }, { "epoch": 39.07402221599925, "grad_norm": 1.43630850315094, "learning_rate": 1.0946413541569253e-06, "loss": 0.0023, "step": 418600 }, { "epoch": 39.083356669467, "grad_norm": 0.008866120129823685, "learning_rate": 1.0937061629103153e-06, "loss": 0.0016, "step": 418700 }, { "epoch": 39.09269112293475, "grad_norm": 0.05344675853848457, "learning_rate": 1.0927709716637054e-06, "loss": 0.0034, "step": 418800 }, { "epoch": 39.102025576402504, "grad_norm": 0.7812521457672119, "learning_rate": 1.0918357804170954e-06, "loss": 0.0014, "step": 418900 }, { "epoch": 39.11136002987025, "grad_norm": 0.06615054607391357, "learning_rate": 1.0909005891704854e-06, "loss": 0.0027, "step": 419000 }, { "epoch": 39.120694483338, "grad_norm": 0.1816847175359726, "learning_rate": 1.0899653979238757e-06, "loss": 0.0022, "step": 419100 }, { "epoch": 39.13002893680575, "grad_norm": 10.385703086853027, "learning_rate": 1.0890302066772655e-06, "loss": 0.0027, "step": 419200 }, { "epoch": 39.1393633902735, "grad_norm": 3.361903429031372, "learning_rate": 1.0880950154306557e-06, "loss": 0.002, "step": 419300 }, { "epoch": 39.148697843741246, "grad_norm": 0.1207926794886589, "learning_rate": 1.0871598241840458e-06, "loss": 0.0027, "step": 419400 }, { "epoch": 39.158032297209, "grad_norm": 0.00658536097034812, "learning_rate": 1.0862246329374358e-06, "loss": 0.0016, "step": 419500 }, { "epoch": 39.16736675067675, "grad_norm": 1.8132647275924683, "learning_rate": 1.085289441690826e-06, "loss": 0.0035, "step": 419600 }, { "epoch": 39.1767012041445, "grad_norm": 0.05292878299951553, "learning_rate": 1.0843542504442158e-06, "loss": 0.002, "step": 419700 }, { "epoch": 39.186035657612244, "grad_norm": 0.20349431037902832, "learning_rate": 1.083419059197606e-06, "loss": 0.0031, "step": 419800 }, { "epoch": 39.195370111079995, "grad_norm": 1.3853843212127686, "learning_rate": 1.0824838679509961e-06, "loss": 0.0026, "step": 419900 }, { "epoch": 39.20470456454775, "grad_norm": 0.0060560498386621475, "learning_rate": 1.0815486767043861e-06, "loss": 0.0023, "step": 420000 }, { "epoch": 39.20470456454775, "eval_accuracy": 0.6996032672112019, "eval_f1": 0.8246298289889346, "eval_loss": 0.34782397747039795, "eval_roc_auc": 0.9066479075858284, "eval_runtime": 145.786, "eval_samples_per_second": 293.924, "eval_steps_per_second": 293.924, "step": 420000 }, { "epoch": 39.2140390180155, "grad_norm": 0.42098456621170044, "learning_rate": 1.0806134854577762e-06, "loss": 0.0021, "step": 420100 }, { "epoch": 39.22337347148324, "grad_norm": 4.659387111663818, "learning_rate": 1.0796782942111662e-06, "loss": 0.0023, "step": 420200 }, { "epoch": 39.232707924950994, "grad_norm": 0.017900487408041954, "learning_rate": 1.0787431029645562e-06, "loss": 0.0021, "step": 420300 }, { "epoch": 39.242042378418745, "grad_norm": 0.16684691607952118, "learning_rate": 1.0778079117179465e-06, "loss": 0.0022, "step": 420400 }, { "epoch": 39.251376831886496, "grad_norm": 0.19231052696704865, "learning_rate": 1.0768727204713365e-06, "loss": 0.0019, "step": 420500 }, { "epoch": 39.26071128535424, "grad_norm": 3.8017992973327637, "learning_rate": 1.0759375292247265e-06, "loss": 0.0019, "step": 420600 }, { "epoch": 39.27004573882199, "grad_norm": 2.5283703804016113, "learning_rate": 1.0750023379781166e-06, "loss": 0.0033, "step": 420700 }, { "epoch": 39.27938019228974, "grad_norm": 0.0016744306776672602, "learning_rate": 1.0740671467315066e-06, "loss": 0.0035, "step": 420800 }, { "epoch": 39.288714645757494, "grad_norm": 0.822329580783844, "learning_rate": 1.0731319554848968e-06, "loss": 0.0022, "step": 420900 }, { "epoch": 39.29804909922524, "grad_norm": 0.2620086967945099, "learning_rate": 1.0721967642382867e-06, "loss": 0.0021, "step": 421000 }, { "epoch": 39.30738355269299, "grad_norm": 8.462345123291016, "learning_rate": 1.071261572991677e-06, "loss": 0.0021, "step": 421100 }, { "epoch": 39.31671800616074, "grad_norm": 0.022592732682824135, "learning_rate": 1.070326381745067e-06, "loss": 0.0027, "step": 421200 }, { "epoch": 39.326052459628485, "grad_norm": 0.15858104825019836, "learning_rate": 1.069391190498457e-06, "loss": 0.0033, "step": 421300 }, { "epoch": 39.33538691309624, "grad_norm": 0.048828527331352234, "learning_rate": 1.0684559992518472e-06, "loss": 0.0018, "step": 421400 }, { "epoch": 39.34472136656399, "grad_norm": 0.0016736822435632348, "learning_rate": 1.067520808005237e-06, "loss": 0.0022, "step": 421500 }, { "epoch": 39.35405582003174, "grad_norm": 0.4730512797832489, "learning_rate": 1.0665856167586273e-06, "loss": 0.0033, "step": 421600 }, { "epoch": 39.36339027349948, "grad_norm": 3.055579662322998, "learning_rate": 1.0656504255120173e-06, "loss": 0.0018, "step": 421700 }, { "epoch": 39.372724726967235, "grad_norm": 0.011468447744846344, "learning_rate": 1.0647152342654073e-06, "loss": 0.002, "step": 421800 }, { "epoch": 39.382059180434986, "grad_norm": 0.24548569321632385, "learning_rate": 1.0637800430187974e-06, "loss": 0.0015, "step": 421900 }, { "epoch": 39.39139363390274, "grad_norm": 3.1999595165252686, "learning_rate": 1.0628448517721874e-06, "loss": 0.0022, "step": 422000 }, { "epoch": 39.40072808737048, "grad_norm": 1.6769965887069702, "learning_rate": 1.0619096605255776e-06, "loss": 0.0019, "step": 422100 }, { "epoch": 39.41006254083823, "grad_norm": 0.005094636231660843, "learning_rate": 1.0609744692789677e-06, "loss": 0.0023, "step": 422200 }, { "epoch": 39.419396994305984, "grad_norm": 0.07127164304256439, "learning_rate": 1.0600392780323577e-06, "loss": 0.0022, "step": 422300 }, { "epoch": 39.428731447773735, "grad_norm": 0.009460099041461945, "learning_rate": 1.0591040867857477e-06, "loss": 0.0021, "step": 422400 }, { "epoch": 39.43806590124148, "grad_norm": 0.11802827566862106, "learning_rate": 1.0581688955391378e-06, "loss": 0.0023, "step": 422500 }, { "epoch": 39.44740035470923, "grad_norm": 0.03174301236867905, "learning_rate": 1.057233704292528e-06, "loss": 0.0028, "step": 422600 }, { "epoch": 39.45673480817698, "grad_norm": 0.252638041973114, "learning_rate": 1.056298513045918e-06, "loss": 0.001, "step": 422700 }, { "epoch": 39.46606926164473, "grad_norm": 0.004990797955542803, "learning_rate": 1.055363321799308e-06, "loss": 0.0027, "step": 422800 }, { "epoch": 39.47540371511248, "grad_norm": 0.007735382299870253, "learning_rate": 1.054428130552698e-06, "loss": 0.0027, "step": 422900 }, { "epoch": 39.48473816858023, "grad_norm": 2.2330121994018555, "learning_rate": 1.0534929393060881e-06, "loss": 0.0018, "step": 423000 }, { "epoch": 39.49407262204798, "grad_norm": 0.32521677017211914, "learning_rate": 1.0525577480594784e-06, "loss": 0.002, "step": 423100 }, { "epoch": 39.50340707551573, "grad_norm": 0.04011121019721031, "learning_rate": 1.0516225568128684e-06, "loss": 0.0031, "step": 423200 }, { "epoch": 39.512741528983476, "grad_norm": 0.05218912661075592, "learning_rate": 1.0506873655662584e-06, "loss": 0.0026, "step": 423300 }, { "epoch": 39.52207598245123, "grad_norm": 2.015380859375, "learning_rate": 1.0497521743196485e-06, "loss": 0.0021, "step": 423400 }, { "epoch": 39.53141043591898, "grad_norm": 0.007128533907234669, "learning_rate": 1.0488169830730385e-06, "loss": 0.0019, "step": 423500 }, { "epoch": 39.54074488938673, "grad_norm": 0.263603150844574, "learning_rate": 1.0478817918264287e-06, "loss": 0.0029, "step": 423600 }, { "epoch": 39.550079342854474, "grad_norm": 0.2947543263435364, "learning_rate": 1.0469466005798186e-06, "loss": 0.0027, "step": 423700 }, { "epoch": 39.559413796322225, "grad_norm": 0.0297849103808403, "learning_rate": 1.0460114093332086e-06, "loss": 0.0022, "step": 423800 }, { "epoch": 39.56874824978998, "grad_norm": 0.006186520680785179, "learning_rate": 1.0450762180865988e-06, "loss": 0.0025, "step": 423900 }, { "epoch": 39.57808270325772, "grad_norm": 4.330118179321289, "learning_rate": 1.0441410268399889e-06, "loss": 0.0022, "step": 424000 }, { "epoch": 39.58741715672547, "grad_norm": 6.20100212097168, "learning_rate": 1.0432058355933789e-06, "loss": 0.0038, "step": 424100 }, { "epoch": 39.59675161019322, "grad_norm": 0.0066221184097230434, "learning_rate": 1.042270644346769e-06, "loss": 0.0013, "step": 424200 }, { "epoch": 39.606086063660975, "grad_norm": 0.1445472687482834, "learning_rate": 1.041335453100159e-06, "loss": 0.0026, "step": 424300 }, { "epoch": 39.61542051712872, "grad_norm": 6.83854866027832, "learning_rate": 1.0404002618535492e-06, "loss": 0.0028, "step": 424400 }, { "epoch": 39.62475497059647, "grad_norm": 0.0005251576658338308, "learning_rate": 1.0394650706069392e-06, "loss": 0.0013, "step": 424500 }, { "epoch": 39.63408942406422, "grad_norm": 0.0010998307261615992, "learning_rate": 1.0385298793603293e-06, "loss": 0.0016, "step": 424600 }, { "epoch": 39.64342387753197, "grad_norm": 0.07464086264371872, "learning_rate": 1.0375946881137193e-06, "loss": 0.0013, "step": 424700 }, { "epoch": 39.65275833099972, "grad_norm": 0.13023212552070618, "learning_rate": 1.0366594968671093e-06, "loss": 0.0021, "step": 424800 }, { "epoch": 39.66209278446747, "grad_norm": 1.354491949081421, "learning_rate": 1.0357243056204996e-06, "loss": 0.004, "step": 424900 }, { "epoch": 39.67142723793522, "grad_norm": 0.132780522108078, "learning_rate": 1.0347891143738896e-06, "loss": 0.0022, "step": 425000 }, { "epoch": 39.67142723793522, "eval_accuracy": 0.6994399066511086, "eval_f1": 0.8266445066480056, "eval_loss": 0.35177046060562134, "eval_roc_auc": 0.9108371469200919, "eval_runtime": 146.0479, "eval_samples_per_second": 293.397, "eval_steps_per_second": 293.397, "step": 425000 }, { "epoch": 39.68076169140297, "grad_norm": 0.42738577723503113, "learning_rate": 1.0338539231272796e-06, "loss": 0.0033, "step": 425100 }, { "epoch": 39.690096144870715, "grad_norm": 4.000425815582275, "learning_rate": 1.0329187318806696e-06, "loss": 0.0023, "step": 425200 }, { "epoch": 39.699430598338466, "grad_norm": 0.012401211075484753, "learning_rate": 1.0319835406340597e-06, "loss": 0.0024, "step": 425300 }, { "epoch": 39.70876505180622, "grad_norm": 0.005343446042388678, "learning_rate": 1.03104834938745e-06, "loss": 0.003, "step": 425400 }, { "epoch": 39.71809950527397, "grad_norm": 5.0322794914245605, "learning_rate": 1.03011315814084e-06, "loss": 0.0019, "step": 425500 }, { "epoch": 39.72743395874171, "grad_norm": 0.004124558065086603, "learning_rate": 1.02917796689423e-06, "loss": 0.0028, "step": 425600 }, { "epoch": 39.736768412209464, "grad_norm": 0.0008580610738135874, "learning_rate": 1.02824277564762e-06, "loss": 0.0025, "step": 425700 }, { "epoch": 39.746102865677216, "grad_norm": 0.10831420868635178, "learning_rate": 1.02730758440101e-06, "loss": 0.0023, "step": 425800 }, { "epoch": 39.75543731914497, "grad_norm": 0.013649099506437778, "learning_rate": 1.0263723931544003e-06, "loss": 0.0021, "step": 425900 }, { "epoch": 39.76477177261271, "grad_norm": 0.02829056978225708, "learning_rate": 1.02543720190779e-06, "loss": 0.0022, "step": 426000 }, { "epoch": 39.77410622608046, "grad_norm": 0.1393798142671585, "learning_rate": 1.0245020106611803e-06, "loss": 0.0014, "step": 426100 }, { "epoch": 39.783440679548214, "grad_norm": 0.1530047357082367, "learning_rate": 1.0235668194145704e-06, "loss": 0.0019, "step": 426200 }, { "epoch": 39.792775133015965, "grad_norm": 0.044372644275426865, "learning_rate": 1.0226316281679604e-06, "loss": 0.0022, "step": 426300 }, { "epoch": 39.80210958648371, "grad_norm": 0.005953626707196236, "learning_rate": 1.0216964369213507e-06, "loss": 0.0029, "step": 426400 }, { "epoch": 39.81144403995146, "grad_norm": 6.526873588562012, "learning_rate": 1.0207612456747405e-06, "loss": 0.0026, "step": 426500 }, { "epoch": 39.82077849341921, "grad_norm": 3.2149927616119385, "learning_rate": 1.0198260544281307e-06, "loss": 0.0026, "step": 426600 }, { "epoch": 39.83011294688696, "grad_norm": 0.00783882848918438, "learning_rate": 1.0188908631815207e-06, "loss": 0.0028, "step": 426700 }, { "epoch": 39.83944740035471, "grad_norm": 0.7718892097473145, "learning_rate": 1.0179556719349108e-06, "loss": 0.0033, "step": 426800 }, { "epoch": 39.84878185382246, "grad_norm": 0.07181908190250397, "learning_rate": 1.0170204806883008e-06, "loss": 0.0013, "step": 426900 }, { "epoch": 39.85811630729021, "grad_norm": 0.011467082425951958, "learning_rate": 1.0160852894416908e-06, "loss": 0.0028, "step": 427000 }, { "epoch": 39.867450760757954, "grad_norm": 0.0011545326560735703, "learning_rate": 1.015150098195081e-06, "loss": 0.0018, "step": 427100 }, { "epoch": 39.876785214225706, "grad_norm": 0.09376892447471619, "learning_rate": 1.0142149069484711e-06, "loss": 0.0026, "step": 427200 }, { "epoch": 39.88611966769346, "grad_norm": 0.1573861837387085, "learning_rate": 1.0132797157018611e-06, "loss": 0.0034, "step": 427300 }, { "epoch": 39.89545412116121, "grad_norm": 0.03330027684569359, "learning_rate": 1.0123445244552512e-06, "loss": 0.0014, "step": 427400 }, { "epoch": 39.90478857462895, "grad_norm": 0.0010401929030194879, "learning_rate": 1.0114093332086412e-06, "loss": 0.0019, "step": 427500 }, { "epoch": 39.914123028096704, "grad_norm": 0.1399272382259369, "learning_rate": 1.0104741419620312e-06, "loss": 0.003, "step": 427600 }, { "epoch": 39.923457481564455, "grad_norm": 0.17818449437618256, "learning_rate": 1.0095389507154215e-06, "loss": 0.0023, "step": 427700 }, { "epoch": 39.932791935032206, "grad_norm": 0.06650343537330627, "learning_rate": 1.0086037594688113e-06, "loss": 0.0021, "step": 427800 }, { "epoch": 39.94212638849995, "grad_norm": 0.01587821915745735, "learning_rate": 1.0076685682222015e-06, "loss": 0.0028, "step": 427900 }, { "epoch": 39.9514608419677, "grad_norm": 8.472638130187988, "learning_rate": 1.0067333769755916e-06, "loss": 0.0021, "step": 428000 }, { "epoch": 39.96079529543545, "grad_norm": 0.0022544830571860075, "learning_rate": 1.0057981857289816e-06, "loss": 0.0019, "step": 428100 }, { "epoch": 39.970129748903204, "grad_norm": 0.01845708303153515, "learning_rate": 1.0048629944823718e-06, "loss": 0.0022, "step": 428200 }, { "epoch": 39.97946420237095, "grad_norm": 0.193101704120636, "learning_rate": 1.0039278032357617e-06, "loss": 0.0018, "step": 428300 }, { "epoch": 39.9887986558387, "grad_norm": 0.019607266411185265, "learning_rate": 1.002992611989152e-06, "loss": 0.0027, "step": 428400 }, { "epoch": 39.99813310930645, "grad_norm": 0.009232921525835991, "learning_rate": 1.002057420742542e-06, "loss": 0.0015, "step": 428500 }, { "epoch": 40.0074675627742, "grad_norm": 1.9918662309646606, "learning_rate": 1.001122229495932e-06, "loss": 0.0018, "step": 428600 }, { "epoch": 40.01680201624195, "grad_norm": 0.08975950628519058, "learning_rate": 1.000187038249322e-06, "loss": 0.0023, "step": 428700 }, { "epoch": 40.0261364697097, "grad_norm": 0.03989408165216446, "learning_rate": 9.99251847002712e-07, "loss": 0.0023, "step": 428800 }, { "epoch": 40.03547092317745, "grad_norm": 0.022470232099294662, "learning_rate": 9.983166557561023e-07, "loss": 0.002, "step": 428900 }, { "epoch": 40.0448053766452, "grad_norm": 4.544216156005859, "learning_rate": 9.973814645094923e-07, "loss": 0.0026, "step": 429000 }, { "epoch": 40.054139830112945, "grad_norm": 0.000727594131603837, "learning_rate": 9.964462732628823e-07, "loss": 0.0021, "step": 429100 }, { "epoch": 40.063474283580696, "grad_norm": 0.38852477073669434, "learning_rate": 9.955110820162724e-07, "loss": 0.003, "step": 429200 }, { "epoch": 40.07280873704845, "grad_norm": 0.03760674223303795, "learning_rate": 9.945758907696624e-07, "loss": 0.0029, "step": 429300 }, { "epoch": 40.0821431905162, "grad_norm": 0.1876915842294693, "learning_rate": 9.936406995230526e-07, "loss": 0.0012, "step": 429400 }, { "epoch": 40.09147764398394, "grad_norm": 0.0012100113090127707, "learning_rate": 9.927055082764427e-07, "loss": 0.0025, "step": 429500 }, { "epoch": 40.100812097451694, "grad_norm": 0.01154739037156105, "learning_rate": 9.917703170298327e-07, "loss": 0.0018, "step": 429600 }, { "epoch": 40.110146550919445, "grad_norm": 0.0901985839009285, "learning_rate": 9.908351257832227e-07, "loss": 0.0015, "step": 429700 }, { "epoch": 40.11948100438719, "grad_norm": 0.049605969339609146, "learning_rate": 9.898999345366128e-07, "loss": 0.0026, "step": 429800 }, { "epoch": 40.12881545785494, "grad_norm": 0.0008353708544746041, "learning_rate": 9.88964743290003e-07, "loss": 0.0018, "step": 429900 }, { "epoch": 40.13814991132269, "grad_norm": 7.062812328338623, "learning_rate": 9.88029552043393e-07, "loss": 0.0032, "step": 430000 }, { "epoch": 40.13814991132269, "eval_accuracy": 0.7003500583430572, "eval_f1": 0.825976260710774, "eval_loss": 0.3522302210330963, "eval_roc_auc": 0.9091815227896494, "eval_runtime": 146.2647, "eval_samples_per_second": 292.962, "eval_steps_per_second": 292.962, "step": 430000 }, { "epoch": 40.147484364790444, "grad_norm": 0.07243302464485168, "learning_rate": 9.87094360796783e-07, "loss": 0.0021, "step": 430100 }, { "epoch": 40.15681881825819, "grad_norm": 0.00016787915956228971, "learning_rate": 9.86159169550173e-07, "loss": 0.0036, "step": 430200 }, { "epoch": 40.16615327172594, "grad_norm": 0.17066824436187744, "learning_rate": 9.852239783035631e-07, "loss": 0.0012, "step": 430300 }, { "epoch": 40.17548772519369, "grad_norm": 2.3646352291107178, "learning_rate": 9.842887870569534e-07, "loss": 0.0019, "step": 430400 }, { "epoch": 40.18482217866144, "grad_norm": 0.00224959384649992, "learning_rate": 9.833535958103432e-07, "loss": 0.0023, "step": 430500 }, { "epoch": 40.194156632129186, "grad_norm": 0.052364643663167953, "learning_rate": 9.824184045637334e-07, "loss": 0.0019, "step": 430600 }, { "epoch": 40.20349108559694, "grad_norm": 0.0037195126060396433, "learning_rate": 9.814832133171235e-07, "loss": 0.0013, "step": 430700 }, { "epoch": 40.21282553906469, "grad_norm": 5.541834354400635, "learning_rate": 9.805480220705135e-07, "loss": 0.003, "step": 430800 }, { "epoch": 40.22215999253244, "grad_norm": 0.0005010822205804288, "learning_rate": 9.796128308239037e-07, "loss": 0.0017, "step": 430900 }, { "epoch": 40.231494446000184, "grad_norm": 0.6372461318969727, "learning_rate": 9.786776395772935e-07, "loss": 0.0019, "step": 431000 }, { "epoch": 40.240828899467935, "grad_norm": 2.289839744567871, "learning_rate": 9.777424483306838e-07, "loss": 0.0021, "step": 431100 }, { "epoch": 40.25016335293569, "grad_norm": 0.256838321685791, "learning_rate": 9.768072570840738e-07, "loss": 0.0027, "step": 431200 }, { "epoch": 40.25949780640344, "grad_norm": 1.1588376760482788, "learning_rate": 9.758720658374638e-07, "loss": 0.0018, "step": 431300 }, { "epoch": 40.26883225987118, "grad_norm": 0.26351824402809143, "learning_rate": 9.749368745908539e-07, "loss": 0.0018, "step": 431400 }, { "epoch": 40.27816671333893, "grad_norm": 0.40045174956321716, "learning_rate": 9.74001683344244e-07, "loss": 0.0037, "step": 431500 }, { "epoch": 40.287501166806685, "grad_norm": 0.011187924072146416, "learning_rate": 9.73066492097634e-07, "loss": 0.0018, "step": 431600 }, { "epoch": 40.296835620274436, "grad_norm": 0.03811948001384735, "learning_rate": 9.721313008510242e-07, "loss": 0.0025, "step": 431700 }, { "epoch": 40.30617007374218, "grad_norm": 0.08221200108528137, "learning_rate": 9.711961096044142e-07, "loss": 0.0023, "step": 431800 }, { "epoch": 40.31550452720993, "grad_norm": 0.006777768488973379, "learning_rate": 9.702609183578042e-07, "loss": 0.0017, "step": 431900 }, { "epoch": 40.32483898067768, "grad_norm": 6.146360397338867, "learning_rate": 9.693257271111943e-07, "loss": 0.0023, "step": 432000 }, { "epoch": 40.334173434145434, "grad_norm": 0.020526554435491562, "learning_rate": 9.683905358645843e-07, "loss": 0.002, "step": 432100 }, { "epoch": 40.34350788761318, "grad_norm": 0.004093356896191835, "learning_rate": 9.674553446179745e-07, "loss": 0.0017, "step": 432200 }, { "epoch": 40.35284234108093, "grad_norm": 0.004225854761898518, "learning_rate": 9.665201533713644e-07, "loss": 0.0024, "step": 432300 }, { "epoch": 40.36217679454868, "grad_norm": 0.34687745571136475, "learning_rate": 9.655849621247546e-07, "loss": 0.0033, "step": 432400 }, { "epoch": 40.37151124801643, "grad_norm": 3.072289228439331, "learning_rate": 9.646497708781446e-07, "loss": 0.0019, "step": 432500 }, { "epoch": 40.380845701484176, "grad_norm": 0.023983245715498924, "learning_rate": 9.637145796315347e-07, "loss": 0.0033, "step": 432600 }, { "epoch": 40.39018015495193, "grad_norm": 0.0036181467585265636, "learning_rate": 9.62779388384925e-07, "loss": 0.0032, "step": 432700 }, { "epoch": 40.39951460841968, "grad_norm": 0.6563630104064941, "learning_rate": 9.618441971383147e-07, "loss": 0.0022, "step": 432800 }, { "epoch": 40.40884906188742, "grad_norm": 3.2093472480773926, "learning_rate": 9.60909005891705e-07, "loss": 0.0026, "step": 432900 }, { "epoch": 40.418183515355175, "grad_norm": 0.08899662643671036, "learning_rate": 9.59973814645095e-07, "loss": 0.0018, "step": 433000 }, { "epoch": 40.427517968822926, "grad_norm": 2.3261046409606934, "learning_rate": 9.59038623398485e-07, "loss": 0.0017, "step": 433100 }, { "epoch": 40.43685242229068, "grad_norm": 0.9068961143493652, "learning_rate": 9.58103432151875e-07, "loss": 0.0024, "step": 433200 }, { "epoch": 40.44618687575842, "grad_norm": 8.941900253295898, "learning_rate": 9.57168240905265e-07, "loss": 0.0019, "step": 433300 }, { "epoch": 40.45552132922617, "grad_norm": 0.1672469973564148, "learning_rate": 9.562330496586553e-07, "loss": 0.0019, "step": 433400 }, { "epoch": 40.464855782693924, "grad_norm": 0.09479925781488419, "learning_rate": 9.552978584120454e-07, "loss": 0.0019, "step": 433500 }, { "epoch": 40.474190236161675, "grad_norm": 0.016866305842995644, "learning_rate": 9.543626671654354e-07, "loss": 0.0017, "step": 433600 }, { "epoch": 40.48352468962942, "grad_norm": 0.6531490087509155, "learning_rate": 9.534274759188254e-07, "loss": 0.0011, "step": 433700 }, { "epoch": 40.49285914309717, "grad_norm": 0.004476440604776144, "learning_rate": 9.524922846722156e-07, "loss": 0.0028, "step": 433800 }, { "epoch": 40.50219359656492, "grad_norm": 1.2467453479766846, "learning_rate": 9.515570934256056e-07, "loss": 0.0023, "step": 433900 }, { "epoch": 40.51152805003267, "grad_norm": 0.009227518923580647, "learning_rate": 9.506219021789957e-07, "loss": 0.0014, "step": 434000 }, { "epoch": 40.52086250350042, "grad_norm": 0.06240660324692726, "learning_rate": 9.496867109323859e-07, "loss": 0.0028, "step": 434100 }, { "epoch": 40.53019695696817, "grad_norm": 0.6717493534088135, "learning_rate": 9.487515196857758e-07, "loss": 0.0025, "step": 434200 }, { "epoch": 40.53953141043592, "grad_norm": 2.7564644813537598, "learning_rate": 9.478163284391659e-07, "loss": 0.0024, "step": 434300 }, { "epoch": 40.54886586390367, "grad_norm": 0.09299127012491226, "learning_rate": 9.46881137192556e-07, "loss": 0.0038, "step": 434400 }, { "epoch": 40.558200317371416, "grad_norm": 0.0953914150595665, "learning_rate": 9.459459459459461e-07, "loss": 0.0015, "step": 434500 }, { "epoch": 40.56753477083917, "grad_norm": 0.03989231958985329, "learning_rate": 9.45010754699336e-07, "loss": 0.002, "step": 434600 }, { "epoch": 40.57686922430692, "grad_norm": 5.28693151473999, "learning_rate": 9.440755634527262e-07, "loss": 0.0021, "step": 434700 }, { "epoch": 40.58620367777467, "grad_norm": 0.00300313881598413, "learning_rate": 9.431403722061162e-07, "loss": 0.0021, "step": 434800 }, { "epoch": 40.595538131242414, "grad_norm": 0.938600480556488, "learning_rate": 9.422051809595063e-07, "loss": 0.0015, "step": 434900 }, { "epoch": 40.604872584710165, "grad_norm": 0.011824716813862324, "learning_rate": 9.412699897128965e-07, "loss": 0.0018, "step": 435000 }, { "epoch": 40.604872584710165, "eval_accuracy": 0.6983663943990666, "eval_f1": 0.8257847651394458, "eval_loss": 0.35394585132598877, "eval_roc_auc": 0.9100928745388065, "eval_runtime": 145.51, "eval_samples_per_second": 294.482, "eval_steps_per_second": 294.482, "step": 435000 }, { "epoch": 40.614207038177916, "grad_norm": 0.04386294633150101, "learning_rate": 9.403347984662864e-07, "loss": 0.0021, "step": 435100 }, { "epoch": 40.62354149164567, "grad_norm": 0.12483498454093933, "learning_rate": 9.393996072196765e-07, "loss": 0.0022, "step": 435200 }, { "epoch": 40.63287594511341, "grad_norm": 0.3777672052383423, "learning_rate": 9.384644159730666e-07, "loss": 0.0019, "step": 435300 }, { "epoch": 40.64221039858116, "grad_norm": 0.06478376686573029, "learning_rate": 9.375292247264567e-07, "loss": 0.0013, "step": 435400 }, { "epoch": 40.651544852048914, "grad_norm": 0.2501518130302429, "learning_rate": 9.365940334798466e-07, "loss": 0.0018, "step": 435500 }, { "epoch": 40.66087930551666, "grad_norm": 0.013525975868105888, "learning_rate": 9.356588422332368e-07, "loss": 0.0022, "step": 435600 }, { "epoch": 40.67021375898441, "grad_norm": 0.005432969890534878, "learning_rate": 9.347236509866269e-07, "loss": 0.0033, "step": 435700 }, { "epoch": 40.67954821245216, "grad_norm": 0.2750708758831024, "learning_rate": 9.337884597400169e-07, "loss": 0.0017, "step": 435800 }, { "epoch": 40.68888266591991, "grad_norm": 0.004599440842866898, "learning_rate": 9.328532684934071e-07, "loss": 0.0024, "step": 435900 }, { "epoch": 40.69821711938766, "grad_norm": 0.08437951654195786, "learning_rate": 9.31918077246797e-07, "loss": 0.001, "step": 436000 }, { "epoch": 40.70755157285541, "grad_norm": 0.24193017184734344, "learning_rate": 9.309828860001871e-07, "loss": 0.0033, "step": 436100 }, { "epoch": 40.71688602632316, "grad_norm": 0.03126104921102524, "learning_rate": 9.300476947535773e-07, "loss": 0.0016, "step": 436200 }, { "epoch": 40.72622047979091, "grad_norm": 0.25144103169441223, "learning_rate": 9.291125035069673e-07, "loss": 0.0027, "step": 436300 }, { "epoch": 40.735554933258655, "grad_norm": 0.6085944175720215, "learning_rate": 9.281773122603572e-07, "loss": 0.0015, "step": 436400 }, { "epoch": 40.744889386726406, "grad_norm": 0.050211213529109955, "learning_rate": 9.272421210137473e-07, "loss": 0.0023, "step": 436500 }, { "epoch": 40.75422384019416, "grad_norm": 0.044606417417526245, "learning_rate": 9.263069297671375e-07, "loss": 0.0021, "step": 436600 }, { "epoch": 40.76355829366191, "grad_norm": 0.002293987898156047, "learning_rate": 9.253717385205275e-07, "loss": 0.0012, "step": 436700 }, { "epoch": 40.77289274712965, "grad_norm": 0.010066051036119461, "learning_rate": 9.244365472739176e-07, "loss": 0.0017, "step": 436800 }, { "epoch": 40.782227200597404, "grad_norm": 1.8361190557479858, "learning_rate": 9.235013560273076e-07, "loss": 0.0027, "step": 436900 }, { "epoch": 40.791561654065156, "grad_norm": 0.054572511464357376, "learning_rate": 9.225661647806977e-07, "loss": 0.0025, "step": 437000 }, { "epoch": 40.80089610753291, "grad_norm": 0.01627521589398384, "learning_rate": 9.216309735340878e-07, "loss": 0.0032, "step": 437100 }, { "epoch": 40.81023056100065, "grad_norm": 0.008258986286818981, "learning_rate": 9.206957822874779e-07, "loss": 0.0021, "step": 437200 }, { "epoch": 40.8195650144684, "grad_norm": 0.05383961275219917, "learning_rate": 9.197605910408679e-07, "loss": 0.0016, "step": 437300 }, { "epoch": 40.828899467936154, "grad_norm": 0.011171643622219563, "learning_rate": 9.188253997942579e-07, "loss": 0.002, "step": 437400 }, { "epoch": 40.838233921403905, "grad_norm": 0.13009469211101532, "learning_rate": 9.178902085476481e-07, "loss": 0.003, "step": 437500 }, { "epoch": 40.84756837487165, "grad_norm": 0.002423591446131468, "learning_rate": 9.169550173010382e-07, "loss": 0.0027, "step": 437600 }, { "epoch": 40.8569028283394, "grad_norm": 0.16380487382411957, "learning_rate": 9.160198260544282e-07, "loss": 0.0021, "step": 437700 }, { "epoch": 40.86623728180715, "grad_norm": 0.8104886412620544, "learning_rate": 9.150846348078183e-07, "loss": 0.002, "step": 437800 }, { "epoch": 40.8755717352749, "grad_norm": 0.014287515543401241, "learning_rate": 9.141494435612083e-07, "loss": 0.0023, "step": 437900 }, { "epoch": 40.88490618874265, "grad_norm": 0.0025088025722652674, "learning_rate": 9.132142523145984e-07, "loss": 0.0026, "step": 438000 }, { "epoch": 40.8942406422104, "grad_norm": 2.719298839569092, "learning_rate": 9.122790610679886e-07, "loss": 0.0016, "step": 438100 }, { "epoch": 40.90357509567815, "grad_norm": 0.07883651554584503, "learning_rate": 9.113438698213785e-07, "loss": 0.0016, "step": 438200 }, { "epoch": 40.912909549145894, "grad_norm": 0.017301376909017563, "learning_rate": 9.104086785747685e-07, "loss": 0.0024, "step": 438300 }, { "epoch": 40.922244002613645, "grad_norm": 0.18470849096775055, "learning_rate": 9.094734873281587e-07, "loss": 0.0026, "step": 438400 }, { "epoch": 40.9315784560814, "grad_norm": 0.13380886614322662, "learning_rate": 9.085382960815488e-07, "loss": 0.0029, "step": 438500 }, { "epoch": 40.94091290954915, "grad_norm": 0.037701159715652466, "learning_rate": 9.076031048349388e-07, "loss": 0.0032, "step": 438600 }, { "epoch": 40.95024736301689, "grad_norm": 0.002427746308967471, "learning_rate": 9.066679135883289e-07, "loss": 0.001, "step": 438700 }, { "epoch": 40.95958181648464, "grad_norm": 0.3812382221221924, "learning_rate": 9.057327223417189e-07, "loss": 0.0029, "step": 438800 }, { "epoch": 40.968916269952395, "grad_norm": 0.7097658514976501, "learning_rate": 9.04797531095109e-07, "loss": 0.0016, "step": 438900 }, { "epoch": 40.978250723420146, "grad_norm": 0.449208527803421, "learning_rate": 9.038623398484992e-07, "loss": 0.0026, "step": 439000 }, { "epoch": 40.98758517688789, "grad_norm": 0.03886300325393677, "learning_rate": 9.029271486018891e-07, "loss": 0.0017, "step": 439100 }, { "epoch": 40.99691963035564, "grad_norm": 0.012566556222736835, "learning_rate": 9.019919573552792e-07, "loss": 0.0017, "step": 439200 }, { "epoch": 41.00625408382339, "grad_norm": 0.029449662193655968, "learning_rate": 9.010567661086693e-07, "loss": 0.0018, "step": 439300 }, { "epoch": 41.015588537291144, "grad_norm": 0.21592110395431519, "learning_rate": 9.001215748620594e-07, "loss": 0.0011, "step": 439400 }, { "epoch": 41.02492299075889, "grad_norm": 0.05591439828276634, "learning_rate": 8.991863836154495e-07, "loss": 0.002, "step": 439500 }, { "epoch": 41.03425744422664, "grad_norm": 0.005514182616025209, "learning_rate": 8.982511923688395e-07, "loss": 0.001, "step": 439600 }, { "epoch": 41.04359189769439, "grad_norm": 0.007018192671239376, "learning_rate": 8.973160011222296e-07, "loss": 0.0017, "step": 439700 }, { "epoch": 41.05292635116214, "grad_norm": 3.9386565685272217, "learning_rate": 8.963808098756196e-07, "loss": 0.0024, "step": 439800 }, { "epoch": 41.06226080462989, "grad_norm": 5.318870544433594, "learning_rate": 8.954456186290098e-07, "loss": 0.0021, "step": 439900 }, { "epoch": 41.07159525809764, "grad_norm": 0.02886221557855606, "learning_rate": 8.945104273823997e-07, "loss": 0.0018, "step": 440000 }, { "epoch": 41.07159525809764, "eval_accuracy": 0.69950991831972, "eval_f1": 0.8259824561403509, "eval_loss": 0.3551497161388397, "eval_roc_auc": 0.9094785950765335, "eval_runtime": 146.0062, "eval_samples_per_second": 293.481, "eval_steps_per_second": 293.481, "step": 440000 }, { "epoch": 41.08092971156539, "grad_norm": 0.023767393082380295, "learning_rate": 8.935752361357898e-07, "loss": 0.0017, "step": 440100 }, { "epoch": 41.09026416503314, "grad_norm": 0.08192571997642517, "learning_rate": 8.926400448891799e-07, "loss": 0.0023, "step": 440200 }, { "epoch": 41.099598618500885, "grad_norm": 0.12786880135536194, "learning_rate": 8.9170485364257e-07, "loss": 0.0021, "step": 440300 }, { "epoch": 41.108933071968636, "grad_norm": 0.005572366062551737, "learning_rate": 8.907696623959601e-07, "loss": 0.0021, "step": 440400 }, { "epoch": 41.11826752543639, "grad_norm": 0.5689314603805542, "learning_rate": 8.898344711493501e-07, "loss": 0.0025, "step": 440500 }, { "epoch": 41.12760197890414, "grad_norm": 0.6330790519714355, "learning_rate": 8.888992799027402e-07, "loss": 0.0018, "step": 440600 }, { "epoch": 41.13693643237188, "grad_norm": 2.3292596340179443, "learning_rate": 8.879640886561302e-07, "loss": 0.0015, "step": 440700 }, { "epoch": 41.146270885839634, "grad_norm": 0.05583438277244568, "learning_rate": 8.870288974095204e-07, "loss": 0.0024, "step": 440800 }, { "epoch": 41.155605339307385, "grad_norm": 0.0029800806660205126, "learning_rate": 8.860937061629103e-07, "loss": 0.0024, "step": 440900 }, { "epoch": 41.16493979277513, "grad_norm": 0.9759595990180969, "learning_rate": 8.851585149163004e-07, "loss": 0.0031, "step": 441000 }, { "epoch": 41.17427424624288, "grad_norm": 0.006009499076753855, "learning_rate": 8.842233236696906e-07, "loss": 0.0019, "step": 441100 }, { "epoch": 41.18360869971063, "grad_norm": 0.013547606766223907, "learning_rate": 8.832881324230806e-07, "loss": 0.0017, "step": 441200 }, { "epoch": 41.19294315317838, "grad_norm": 0.0025903319474309683, "learning_rate": 8.823529411764707e-07, "loss": 0.0019, "step": 441300 }, { "epoch": 41.20227760664613, "grad_norm": 0.2550543546676636, "learning_rate": 8.814177499298606e-07, "loss": 0.0012, "step": 441400 }, { "epoch": 41.21161206011388, "grad_norm": 0.009244353510439396, "learning_rate": 8.804825586832508e-07, "loss": 0.0015, "step": 441500 }, { "epoch": 41.22094651358163, "grad_norm": 0.0474996417760849, "learning_rate": 8.795473674366409e-07, "loss": 0.0017, "step": 441600 }, { "epoch": 41.23028096704938, "grad_norm": 1.4599010944366455, "learning_rate": 8.78612176190031e-07, "loss": 0.0009, "step": 441700 }, { "epoch": 41.239615420517126, "grad_norm": 0.11358469724655151, "learning_rate": 8.77676984943421e-07, "loss": 0.0015, "step": 441800 }, { "epoch": 41.24894987398488, "grad_norm": 0.6325699090957642, "learning_rate": 8.76741793696811e-07, "loss": 0.002, "step": 441900 }, { "epoch": 41.25828432745263, "grad_norm": 0.014946239069104195, "learning_rate": 8.758066024502011e-07, "loss": 0.0025, "step": 442000 }, { "epoch": 41.26761878092038, "grad_norm": 2.481696128845215, "learning_rate": 8.748714112035912e-07, "loss": 0.0022, "step": 442100 }, { "epoch": 41.276953234388124, "grad_norm": 0.856517493724823, "learning_rate": 8.739362199569813e-07, "loss": 0.0019, "step": 442200 }, { "epoch": 41.286287687855875, "grad_norm": 0.0005321509670466185, "learning_rate": 8.730010287103712e-07, "loss": 0.0022, "step": 442300 }, { "epoch": 41.295622141323626, "grad_norm": 4.359401226043701, "learning_rate": 8.720658374637614e-07, "loss": 0.002, "step": 442400 }, { "epoch": 41.30495659479138, "grad_norm": 6.984706401824951, "learning_rate": 8.711306462171515e-07, "loss": 0.0032, "step": 442500 }, { "epoch": 41.31429104825912, "grad_norm": 0.816115140914917, "learning_rate": 8.701954549705415e-07, "loss": 0.0024, "step": 442600 }, { "epoch": 41.32362550172687, "grad_norm": 0.009408270008862019, "learning_rate": 8.692602637239316e-07, "loss": 0.0016, "step": 442700 }, { "epoch": 41.332959955194625, "grad_norm": 0.05586450919508934, "learning_rate": 8.683250724773216e-07, "loss": 0.0018, "step": 442800 }, { "epoch": 41.342294408662376, "grad_norm": 8.391992568969727, "learning_rate": 8.673898812307117e-07, "loss": 0.0027, "step": 442900 }, { "epoch": 41.35162886213012, "grad_norm": 0.22896653413772583, "learning_rate": 8.664546899841019e-07, "loss": 0.0012, "step": 443000 }, { "epoch": 41.36096331559787, "grad_norm": 0.015824101865291595, "learning_rate": 8.655194987374919e-07, "loss": 0.0018, "step": 443100 }, { "epoch": 41.37029776906562, "grad_norm": 0.01028572116047144, "learning_rate": 8.645843074908819e-07, "loss": 0.0015, "step": 443200 }, { "epoch": 41.379632222533374, "grad_norm": 3.714212417602539, "learning_rate": 8.63649116244272e-07, "loss": 0.0018, "step": 443300 }, { "epoch": 41.38896667600112, "grad_norm": 0.10550784319639206, "learning_rate": 8.627139249976621e-07, "loss": 0.0015, "step": 443400 }, { "epoch": 41.39830112946887, "grad_norm": 1.2171730995178223, "learning_rate": 8.617787337510522e-07, "loss": 0.0044, "step": 443500 }, { "epoch": 41.40763558293662, "grad_norm": 0.0415855310857296, "learning_rate": 8.608435425044423e-07, "loss": 0.0025, "step": 443600 }, { "epoch": 41.41697003640437, "grad_norm": 0.6032770872116089, "learning_rate": 8.599083512578323e-07, "loss": 0.0011, "step": 443700 }, { "epoch": 41.426304489872116, "grad_norm": 0.0037238553632050753, "learning_rate": 8.589731600112223e-07, "loss": 0.0018, "step": 443800 }, { "epoch": 41.43563894333987, "grad_norm": 0.08617293834686279, "learning_rate": 8.580379687646125e-07, "loss": 0.003, "step": 443900 }, { "epoch": 41.44497339680762, "grad_norm": 0.0014708448434248567, "learning_rate": 8.571027775180025e-07, "loss": 0.003, "step": 444000 }, { "epoch": 41.45430785027536, "grad_norm": 0.2616041600704193, "learning_rate": 8.561675862713925e-07, "loss": 0.0023, "step": 444100 }, { "epoch": 41.463642303743114, "grad_norm": 2.9745934009552, "learning_rate": 8.552323950247826e-07, "loss": 0.0017, "step": 444200 }, { "epoch": 41.472976757210866, "grad_norm": 0.04386202618479729, "learning_rate": 8.542972037781727e-07, "loss": 0.0028, "step": 444300 }, { "epoch": 41.48231121067862, "grad_norm": 1.7227287292480469, "learning_rate": 8.533620125315628e-07, "loss": 0.0012, "step": 444400 }, { "epoch": 41.49164566414636, "grad_norm": 0.01551277656108141, "learning_rate": 8.524268212849529e-07, "loss": 0.0025, "step": 444500 }, { "epoch": 41.50098011761411, "grad_norm": 0.7021285891532898, "learning_rate": 8.514916300383429e-07, "loss": 0.0019, "step": 444600 }, { "epoch": 41.510314571081864, "grad_norm": 0.03139800950884819, "learning_rate": 8.505564387917329e-07, "loss": 0.0026, "step": 444700 }, { "epoch": 41.519649024549615, "grad_norm": 0.028629682958126068, "learning_rate": 8.496212475451231e-07, "loss": 0.0026, "step": 444800 }, { "epoch": 41.52898347801736, "grad_norm": 0.011636226437985897, "learning_rate": 8.486860562985132e-07, "loss": 0.0016, "step": 444900 }, { "epoch": 41.53831793148511, "grad_norm": 0.7448590993881226, "learning_rate": 8.477508650519031e-07, "loss": 0.0029, "step": 445000 }, { "epoch": 41.53831793148511, "eval_accuracy": 0.7007701283547257, "eval_f1": 0.8280392773526226, "eval_loss": 0.35732513666152954, "eval_roc_auc": 0.9120920241912097, "eval_runtime": 146.3444, "eval_samples_per_second": 292.803, "eval_steps_per_second": 292.803, "step": 445000 }, { "epoch": 41.54765238495286, "grad_norm": 0.09682817757129669, "learning_rate": 8.468156738052933e-07, "loss": 0.0013, "step": 445100 }, { "epoch": 41.55698683842061, "grad_norm": 0.0563163086771965, "learning_rate": 8.458804825586833e-07, "loss": 0.0009, "step": 445200 }, { "epoch": 41.56632129188836, "grad_norm": 0.09589052945375443, "learning_rate": 8.449452913120734e-07, "loss": 0.0026, "step": 445300 }, { "epoch": 41.57565574535611, "grad_norm": 0.002262812340632081, "learning_rate": 8.440101000654636e-07, "loss": 0.0021, "step": 445400 }, { "epoch": 41.58499019882386, "grad_norm": 0.10988710820674896, "learning_rate": 8.430749088188535e-07, "loss": 0.0015, "step": 445500 }, { "epoch": 41.59432465229161, "grad_norm": 0.51181960105896, "learning_rate": 8.421397175722436e-07, "loss": 0.0022, "step": 445600 }, { "epoch": 41.603659105759355, "grad_norm": 0.004181498195976019, "learning_rate": 8.412045263256337e-07, "loss": 0.0026, "step": 445700 }, { "epoch": 41.61299355922711, "grad_norm": 2.478198528289795, "learning_rate": 8.402693350790238e-07, "loss": 0.0021, "step": 445800 }, { "epoch": 41.62232801269486, "grad_norm": 0.016745340079069138, "learning_rate": 8.393341438324137e-07, "loss": 0.0036, "step": 445900 }, { "epoch": 41.63166246616261, "grad_norm": 0.09719493985176086, "learning_rate": 8.383989525858039e-07, "loss": 0.0027, "step": 446000 }, { "epoch": 41.64099691963035, "grad_norm": 1.7649742364883423, "learning_rate": 8.374637613391939e-07, "loss": 0.0023, "step": 446100 }, { "epoch": 41.650331373098105, "grad_norm": 1.0571472644805908, "learning_rate": 8.36528570092584e-07, "loss": 0.002, "step": 446200 }, { "epoch": 41.659665826565856, "grad_norm": 0.04168383777141571, "learning_rate": 8.355933788459742e-07, "loss": 0.001, "step": 446300 }, { "epoch": 41.66900028003361, "grad_norm": 0.028578443452715874, "learning_rate": 8.346581875993641e-07, "loss": 0.0017, "step": 446400 }, { "epoch": 41.67833473350135, "grad_norm": 1.1159420013427734, "learning_rate": 8.337229963527542e-07, "loss": 0.0021, "step": 446500 }, { "epoch": 41.6876691869691, "grad_norm": 0.4229795038700104, "learning_rate": 8.327878051061443e-07, "loss": 0.0016, "step": 446600 }, { "epoch": 41.697003640436854, "grad_norm": 0.05996720865368843, "learning_rate": 8.318526138595344e-07, "loss": 0.0029, "step": 446700 }, { "epoch": 41.7063380939046, "grad_norm": 0.0472172312438488, "learning_rate": 8.309174226129243e-07, "loss": 0.0022, "step": 446800 }, { "epoch": 41.71567254737235, "grad_norm": 0.0002619512379169464, "learning_rate": 8.299822313663145e-07, "loss": 0.002, "step": 446900 }, { "epoch": 41.7250070008401, "grad_norm": 0.007207442075014114, "learning_rate": 8.290470401197046e-07, "loss": 0.0028, "step": 447000 }, { "epoch": 41.73434145430785, "grad_norm": 1.0214166641235352, "learning_rate": 8.281118488730946e-07, "loss": 0.0023, "step": 447100 }, { "epoch": 41.7436759077756, "grad_norm": 0.42616212368011475, "learning_rate": 8.271766576264848e-07, "loss": 0.0024, "step": 447200 }, { "epoch": 41.75301036124335, "grad_norm": 2.2110164165496826, "learning_rate": 8.262414663798747e-07, "loss": 0.0024, "step": 447300 }, { "epoch": 41.7623448147111, "grad_norm": 6.360258102416992, "learning_rate": 8.253062751332648e-07, "loss": 0.003, "step": 447400 }, { "epoch": 41.77167926817885, "grad_norm": 1.7253323793411255, "learning_rate": 8.24371083886655e-07, "loss": 0.001, "step": 447500 }, { "epoch": 41.781013721646595, "grad_norm": 0.010576218366622925, "learning_rate": 8.23435892640045e-07, "loss": 0.0027, "step": 447600 }, { "epoch": 41.790348175114346, "grad_norm": 0.5882430076599121, "learning_rate": 8.22500701393435e-07, "loss": 0.0025, "step": 447700 }, { "epoch": 41.7996826285821, "grad_norm": 0.002108825370669365, "learning_rate": 8.21565510146825e-07, "loss": 0.0026, "step": 447800 }, { "epoch": 41.80901708204985, "grad_norm": 0.08703585714101791, "learning_rate": 8.206303189002152e-07, "loss": 0.001, "step": 447900 }, { "epoch": 41.81835153551759, "grad_norm": 0.04145458713173866, "learning_rate": 8.196951276536052e-07, "loss": 0.0025, "step": 448000 }, { "epoch": 41.827685988985344, "grad_norm": 3.0987770557403564, "learning_rate": 8.187599364069953e-07, "loss": 0.0018, "step": 448100 }, { "epoch": 41.837020442453095, "grad_norm": 3.088928699493408, "learning_rate": 8.178247451603853e-07, "loss": 0.0017, "step": 448200 }, { "epoch": 41.84635489592085, "grad_norm": 0.02216804400086403, "learning_rate": 8.168895539137754e-07, "loss": 0.0032, "step": 448300 }, { "epoch": 41.85568934938859, "grad_norm": 0.001745093846693635, "learning_rate": 8.159543626671655e-07, "loss": 0.0011, "step": 448400 }, { "epoch": 41.86502380285634, "grad_norm": 0.11870453506708145, "learning_rate": 8.150191714205556e-07, "loss": 0.0014, "step": 448500 }, { "epoch": 41.87435825632409, "grad_norm": 0.045225679874420166, "learning_rate": 8.140839801739456e-07, "loss": 0.0016, "step": 448600 }, { "epoch": 41.883692709791845, "grad_norm": 0.01370394229888916, "learning_rate": 8.131487889273356e-07, "loss": 0.0009, "step": 448700 }, { "epoch": 41.89302716325959, "grad_norm": 0.016136005520820618, "learning_rate": 8.122135976807258e-07, "loss": 0.001, "step": 448800 }, { "epoch": 41.90236161672734, "grad_norm": 0.07200615108013153, "learning_rate": 8.112784064341159e-07, "loss": 0.0037, "step": 448900 }, { "epoch": 41.91169607019509, "grad_norm": 0.34584349393844604, "learning_rate": 8.103432151875059e-07, "loss": 0.0031, "step": 449000 }, { "epoch": 41.92103052366284, "grad_norm": 0.20133769512176514, "learning_rate": 8.09408023940896e-07, "loss": 0.002, "step": 449100 }, { "epoch": 41.93036497713059, "grad_norm": 0.003326545702293515, "learning_rate": 8.08472832694286e-07, "loss": 0.0014, "step": 449200 }, { "epoch": 41.93969943059834, "grad_norm": 0.016297636553645134, "learning_rate": 8.075376414476761e-07, "loss": 0.0021, "step": 449300 }, { "epoch": 41.94903388406609, "grad_norm": 0.05731848254799843, "learning_rate": 8.066024502010663e-07, "loss": 0.0015, "step": 449400 }, { "epoch": 41.95836833753384, "grad_norm": 0.6229529976844788, "learning_rate": 8.056672589544562e-07, "loss": 0.002, "step": 449500 }, { "epoch": 41.967702791001585, "grad_norm": 0.1861894279718399, "learning_rate": 8.047320677078463e-07, "loss": 0.002, "step": 449600 }, { "epoch": 41.97703724446934, "grad_norm": 0.01892111822962761, "learning_rate": 8.037968764612364e-07, "loss": 0.0024, "step": 449700 }, { "epoch": 41.98637169793709, "grad_norm": 2.163231134414673, "learning_rate": 8.028616852146265e-07, "loss": 0.0028, "step": 449800 }, { "epoch": 41.99570615140483, "grad_norm": 0.16150186955928802, "learning_rate": 8.019264939680165e-07, "loss": 0.0019, "step": 449900 }, { "epoch": 42.00504060487258, "grad_norm": 0.4463611841201782, "learning_rate": 8.009913027214066e-07, "loss": 0.0028, "step": 450000 }, { "epoch": 42.00504060487258, "eval_accuracy": 0.6998599766627771, "eval_f1": 0.826991072991423, "eval_loss": 0.35855966806411743, "eval_roc_auc": 0.9108523735883958, "eval_runtime": 147.361, "eval_samples_per_second": 290.782, "eval_steps_per_second": 290.782, "step": 450000 }, { "epoch": 42.014375058340335, "grad_norm": 2.940261125564575, "learning_rate": 8.000561114747966e-07, "loss": 0.0019, "step": 450100 }, { "epoch": 42.023709511808086, "grad_norm": 1.5367909669876099, "learning_rate": 7.991209202281867e-07, "loss": 0.002, "step": 450200 }, { "epoch": 42.03304396527583, "grad_norm": 0.44675442576408386, "learning_rate": 7.981857289815769e-07, "loss": 0.0022, "step": 450300 }, { "epoch": 42.04237841874358, "grad_norm": 6.305240631103516, "learning_rate": 7.972505377349668e-07, "loss": 0.0017, "step": 450400 }, { "epoch": 42.05171287221133, "grad_norm": 0.31245702505111694, "learning_rate": 7.963153464883569e-07, "loss": 0.0021, "step": 450500 }, { "epoch": 42.061047325679084, "grad_norm": 0.050328612327575684, "learning_rate": 7.95380155241747e-07, "loss": 0.0026, "step": 450600 }, { "epoch": 42.07038177914683, "grad_norm": 0.14488813281059265, "learning_rate": 7.944449639951371e-07, "loss": 0.0027, "step": 450700 }, { "epoch": 42.07971623261458, "grad_norm": 0.023119723424315453, "learning_rate": 7.935097727485272e-07, "loss": 0.0015, "step": 450800 }, { "epoch": 42.08905068608233, "grad_norm": 0.036561496555805206, "learning_rate": 7.925745815019172e-07, "loss": 0.0025, "step": 450900 }, { "epoch": 42.09838513955008, "grad_norm": 6.69545316696167, "learning_rate": 7.916393902553073e-07, "loss": 0.0016, "step": 451000 }, { "epoch": 42.107719593017826, "grad_norm": 3.562073230743408, "learning_rate": 7.907041990086973e-07, "loss": 0.0012, "step": 451100 }, { "epoch": 42.11705404648558, "grad_norm": 0.8507174849510193, "learning_rate": 7.897690077620875e-07, "loss": 0.0019, "step": 451200 }, { "epoch": 42.12638849995333, "grad_norm": 0.3470190465450287, "learning_rate": 7.888338165154774e-07, "loss": 0.0029, "step": 451300 }, { "epoch": 42.13572295342108, "grad_norm": 0.19646205008029938, "learning_rate": 7.878986252688675e-07, "loss": 0.0017, "step": 451400 }, { "epoch": 42.145057406888824, "grad_norm": 0.0011142537696287036, "learning_rate": 7.869634340222577e-07, "loss": 0.0015, "step": 451500 }, { "epoch": 42.154391860356576, "grad_norm": 0.0013485626550391316, "learning_rate": 7.860282427756477e-07, "loss": 0.0018, "step": 451600 }, { "epoch": 42.16372631382433, "grad_norm": 0.039784811437129974, "learning_rate": 7.850930515290378e-07, "loss": 0.002, "step": 451700 }, { "epoch": 42.17306076729208, "grad_norm": 0.08211230486631393, "learning_rate": 7.841578602824278e-07, "loss": 0.0026, "step": 451800 }, { "epoch": 42.18239522075982, "grad_norm": 0.044876642525196075, "learning_rate": 7.832226690358179e-07, "loss": 0.0019, "step": 451900 }, { "epoch": 42.191729674227574, "grad_norm": 1.8001298904418945, "learning_rate": 7.822874777892079e-07, "loss": 0.0016, "step": 452000 }, { "epoch": 42.201064127695325, "grad_norm": 2.0607426166534424, "learning_rate": 7.813522865425981e-07, "loss": 0.0023, "step": 452100 }, { "epoch": 42.210398581163076, "grad_norm": 0.12220479547977448, "learning_rate": 7.80417095295988e-07, "loss": 0.0012, "step": 452200 }, { "epoch": 42.21973303463082, "grad_norm": 0.3712502419948578, "learning_rate": 7.794819040493781e-07, "loss": 0.0017, "step": 452300 }, { "epoch": 42.22906748809857, "grad_norm": 1.375274896621704, "learning_rate": 7.785467128027683e-07, "loss": 0.0025, "step": 452400 }, { "epoch": 42.23840194156632, "grad_norm": 0.01486134435981512, "learning_rate": 7.776115215561583e-07, "loss": 0.0025, "step": 452500 }, { "epoch": 42.24773639503407, "grad_norm": 0.0002522075374145061, "learning_rate": 7.766763303095484e-07, "loss": 0.0023, "step": 452600 }, { "epoch": 42.25707084850182, "grad_norm": 0.059956423938274384, "learning_rate": 7.757411390629383e-07, "loss": 0.0017, "step": 452700 }, { "epoch": 42.26640530196957, "grad_norm": 0.001193983480334282, "learning_rate": 7.748059478163285e-07, "loss": 0.0013, "step": 452800 }, { "epoch": 42.27573975543732, "grad_norm": 3.4008545875549316, "learning_rate": 7.738707565697186e-07, "loss": 0.0019, "step": 452900 }, { "epoch": 42.285074208905066, "grad_norm": 0.005995835177600384, "learning_rate": 7.729355653231086e-07, "loss": 0.0026, "step": 453000 }, { "epoch": 42.29440866237282, "grad_norm": 0.010610627010464668, "learning_rate": 7.720003740764988e-07, "loss": 0.002, "step": 453100 }, { "epoch": 42.30374311584057, "grad_norm": 2.055992603302002, "learning_rate": 7.710651828298887e-07, "loss": 0.003, "step": 453200 }, { "epoch": 42.31307756930832, "grad_norm": 0.007376169785857201, "learning_rate": 7.701299915832788e-07, "loss": 0.0014, "step": 453300 }, { "epoch": 42.322412022776064, "grad_norm": 0.08529713749885559, "learning_rate": 7.69194800336669e-07, "loss": 0.0028, "step": 453400 }, { "epoch": 42.331746476243815, "grad_norm": 0.29785171151161194, "learning_rate": 7.68259609090059e-07, "loss": 0.0014, "step": 453500 }, { "epoch": 42.341080929711566, "grad_norm": 2.285768508911133, "learning_rate": 7.67324417843449e-07, "loss": 0.0018, "step": 453600 }, { "epoch": 42.35041538317932, "grad_norm": 0.20404143631458282, "learning_rate": 7.663892265968391e-07, "loss": 0.0015, "step": 453700 }, { "epoch": 42.35974983664706, "grad_norm": 9.353243827819824, "learning_rate": 7.654540353502292e-07, "loss": 0.0031, "step": 453800 }, { "epoch": 42.36908429011481, "grad_norm": 0.001421136548742652, "learning_rate": 7.645188441036192e-07, "loss": 0.0025, "step": 453900 }, { "epoch": 42.378418743582564, "grad_norm": 0.07214381545782089, "learning_rate": 7.635836528570094e-07, "loss": 0.0032, "step": 454000 }, { "epoch": 42.387753197050316, "grad_norm": 0.4870496392250061, "learning_rate": 7.626484616103993e-07, "loss": 0.002, "step": 454100 }, { "epoch": 42.39708765051806, "grad_norm": 0.0002655674470588565, "learning_rate": 7.617132703637894e-07, "loss": 0.0018, "step": 454200 }, { "epoch": 42.40642210398581, "grad_norm": 1.0384896993637085, "learning_rate": 7.607780791171796e-07, "loss": 0.0017, "step": 454300 }, { "epoch": 42.41575655745356, "grad_norm": 0.008962414227426052, "learning_rate": 7.598428878705696e-07, "loss": 0.0029, "step": 454400 }, { "epoch": 42.425091010921314, "grad_norm": 0.006358286365866661, "learning_rate": 7.589076966239596e-07, "loss": 0.0024, "step": 454500 }, { "epoch": 42.43442546438906, "grad_norm": 0.036980412900447845, "learning_rate": 7.579725053773497e-07, "loss": 0.0014, "step": 454600 }, { "epoch": 42.44375991785681, "grad_norm": 1.0781670808792114, "learning_rate": 7.570373141307398e-07, "loss": 0.0026, "step": 454700 }, { "epoch": 42.45309437132456, "grad_norm": 0.017094673588871956, "learning_rate": 7.561021228841299e-07, "loss": 0.0012, "step": 454800 }, { "epoch": 42.46242882479231, "grad_norm": 3.582676649093628, "learning_rate": 7.5516693163752e-07, "loss": 0.0017, "step": 454900 }, { "epoch": 42.471763278260056, "grad_norm": 0.008111861534416676, "learning_rate": 7.5423174039091e-07, "loss": 0.0015, "step": 455000 }, { "epoch": 42.471763278260056, "eval_accuracy": 0.6991831971995333, "eval_f1": 0.8278667107944747, "eval_loss": 0.3627680838108063, "eval_roc_auc": 0.9130662811113545, "eval_runtime": 146.1458, "eval_samples_per_second": 293.2, "eval_steps_per_second": 293.2, "step": 455000 }, { "epoch": 42.48109773172781, "grad_norm": 1.601552963256836, "learning_rate": 7.532965491443e-07, "loss": 0.0017, "step": 455100 }, { "epoch": 42.49043218519556, "grad_norm": 0.29177340865135193, "learning_rate": 7.523613578976902e-07, "loss": 0.0016, "step": 455200 }, { "epoch": 42.4997666386633, "grad_norm": 0.22639448940753937, "learning_rate": 7.514261666510803e-07, "loss": 0.0026, "step": 455300 }, { "epoch": 42.509101092131054, "grad_norm": 1.1183278560638428, "learning_rate": 7.504909754044702e-07, "loss": 0.0023, "step": 455400 }, { "epoch": 42.518435545598805, "grad_norm": 0.31455087661743164, "learning_rate": 7.495557841578604e-07, "loss": 0.0008, "step": 455500 }, { "epoch": 42.52776999906656, "grad_norm": 0.006853132043033838, "learning_rate": 7.486205929112504e-07, "loss": 0.0026, "step": 455600 }, { "epoch": 42.5371044525343, "grad_norm": 0.7553168535232544, "learning_rate": 7.476854016646405e-07, "loss": 0.0026, "step": 455700 }, { "epoch": 42.54643890600205, "grad_norm": 0.006991676986217499, "learning_rate": 7.467502104180306e-07, "loss": 0.002, "step": 455800 }, { "epoch": 42.5557733594698, "grad_norm": 2.360618829727173, "learning_rate": 7.458150191714206e-07, "loss": 0.0021, "step": 455900 }, { "epoch": 42.565107812937555, "grad_norm": 1.4383525848388672, "learning_rate": 7.448798279248106e-07, "loss": 0.003, "step": 456000 }, { "epoch": 42.5744422664053, "grad_norm": 0.11023110896348953, "learning_rate": 7.439446366782008e-07, "loss": 0.0017, "step": 456100 }, { "epoch": 42.58377671987305, "grad_norm": 0.008255749009549618, "learning_rate": 7.430094454315909e-07, "loss": 0.0023, "step": 456200 }, { "epoch": 42.5931111733408, "grad_norm": 0.053717199712991714, "learning_rate": 7.420742541849808e-07, "loss": 0.0024, "step": 456300 }, { "epoch": 42.60244562680855, "grad_norm": 0.23153552412986755, "learning_rate": 7.41139062938371e-07, "loss": 0.0017, "step": 456400 }, { "epoch": 42.6117800802763, "grad_norm": 0.009412912651896477, "learning_rate": 7.40203871691761e-07, "loss": 0.0023, "step": 456500 }, { "epoch": 42.62111453374405, "grad_norm": 0.15344732999801636, "learning_rate": 7.392686804451511e-07, "loss": 0.0019, "step": 456600 }, { "epoch": 42.6304489872118, "grad_norm": 1.7181400060653687, "learning_rate": 7.383334891985413e-07, "loss": 0.0016, "step": 456700 }, { "epoch": 42.63978344067955, "grad_norm": 0.0031853835098445415, "learning_rate": 7.373982979519312e-07, "loss": 0.0018, "step": 456800 }, { "epoch": 42.649117894147295, "grad_norm": 0.4855248034000397, "learning_rate": 7.364631067053213e-07, "loss": 0.0016, "step": 456900 }, { "epoch": 42.65845234761505, "grad_norm": 0.12458748370409012, "learning_rate": 7.355279154587114e-07, "loss": 0.002, "step": 457000 }, { "epoch": 42.6677868010828, "grad_norm": 0.06543563306331635, "learning_rate": 7.345927242121015e-07, "loss": 0.0012, "step": 457100 }, { "epoch": 42.67712125455055, "grad_norm": 0.2519598603248596, "learning_rate": 7.336575329654914e-07, "loss": 0.0018, "step": 457200 }, { "epoch": 42.68645570801829, "grad_norm": 0.18424563109874725, "learning_rate": 7.327223417188816e-07, "loss": 0.0022, "step": 457300 }, { "epoch": 42.695790161486045, "grad_norm": 0.019018007442355156, "learning_rate": 7.317871504722717e-07, "loss": 0.002, "step": 457400 }, { "epoch": 42.705124614953796, "grad_norm": 0.001326136989519, "learning_rate": 7.308519592256617e-07, "loss": 0.0016, "step": 457500 }, { "epoch": 42.71445906842155, "grad_norm": 0.09780623018741608, "learning_rate": 7.299167679790519e-07, "loss": 0.0014, "step": 457600 }, { "epoch": 42.72379352188929, "grad_norm": 0.3423632085323334, "learning_rate": 7.289815767324418e-07, "loss": 0.0012, "step": 457700 }, { "epoch": 42.73312797535704, "grad_norm": 0.2660248577594757, "learning_rate": 7.280463854858319e-07, "loss": 0.0018, "step": 457800 }, { "epoch": 42.742462428824794, "grad_norm": 0.004546635318547487, "learning_rate": 7.27111194239222e-07, "loss": 0.0015, "step": 457900 }, { "epoch": 42.75179688229254, "grad_norm": 0.007807364221662283, "learning_rate": 7.261760029926121e-07, "loss": 0.0021, "step": 458000 }, { "epoch": 42.76113133576029, "grad_norm": 0.01104939915239811, "learning_rate": 7.25240811746002e-07, "loss": 0.0021, "step": 458100 }, { "epoch": 42.77046578922804, "grad_norm": 0.11722541600465775, "learning_rate": 7.243056204993921e-07, "loss": 0.0008, "step": 458200 }, { "epoch": 42.77980024269579, "grad_norm": 0.13599523901939392, "learning_rate": 7.233704292527823e-07, "loss": 0.0007, "step": 458300 }, { "epoch": 42.789134696163536, "grad_norm": 0.10104122757911682, "learning_rate": 7.224352380061723e-07, "loss": 0.001, "step": 458400 }, { "epoch": 42.79846914963129, "grad_norm": 0.008371967822313309, "learning_rate": 7.215000467595625e-07, "loss": 0.0024, "step": 458500 }, { "epoch": 42.80780360309904, "grad_norm": 1.7523064613342285, "learning_rate": 7.205648555129524e-07, "loss": 0.0015, "step": 458600 }, { "epoch": 42.81713805656679, "grad_norm": 0.99396812915802, "learning_rate": 7.196296642663425e-07, "loss": 0.0012, "step": 458700 }, { "epoch": 42.826472510034534, "grad_norm": 0.40689024329185486, "learning_rate": 7.186944730197327e-07, "loss": 0.0025, "step": 458800 }, { "epoch": 42.835806963502286, "grad_norm": 0.006834174040704966, "learning_rate": 7.177592817731227e-07, "loss": 0.003, "step": 458900 }, { "epoch": 42.84514141697004, "grad_norm": 0.012186394073069096, "learning_rate": 7.168240905265127e-07, "loss": 0.0035, "step": 459000 }, { "epoch": 42.85447587043779, "grad_norm": 0.01815919391810894, "learning_rate": 7.158888992799027e-07, "loss": 0.0011, "step": 459100 }, { "epoch": 42.86381032390553, "grad_norm": 0.007414640858769417, "learning_rate": 7.149537080332929e-07, "loss": 0.0018, "step": 459200 }, { "epoch": 42.873144777373284, "grad_norm": 0.22705361247062683, "learning_rate": 7.14018516786683e-07, "loss": 0.0015, "step": 459300 }, { "epoch": 42.882479230841035, "grad_norm": 3.1010570526123047, "learning_rate": 7.13083325540073e-07, "loss": 0.0019, "step": 459400 }, { "epoch": 42.89181368430879, "grad_norm": 1.5057497024536133, "learning_rate": 7.12148134293463e-07, "loss": 0.0023, "step": 459500 }, { "epoch": 42.90114813777653, "grad_norm": 3.979221820831299, "learning_rate": 7.112129430468531e-07, "loss": 0.0014, "step": 459600 }, { "epoch": 42.91048259124428, "grad_norm": 0.25802233815193176, "learning_rate": 7.102777518002432e-07, "loss": 0.0033, "step": 459700 }, { "epoch": 42.91981704471203, "grad_norm": 0.0024658802431076765, "learning_rate": 7.093425605536333e-07, "loss": 0.0017, "step": 459800 }, { "epoch": 42.929151498179785, "grad_norm": 0.16411684453487396, "learning_rate": 7.084073693070233e-07, "loss": 0.0017, "step": 459900 }, { "epoch": 42.93848595164753, "grad_norm": 0.0167844295501709, "learning_rate": 7.074721780604133e-07, "loss": 0.0016, "step": 460000 }, { "epoch": 42.93848595164753, "eval_accuracy": 0.7011435239206535, "eval_f1": 0.8275384911807465, "eval_loss": 0.36022767424583435, "eval_roc_auc": 0.9109274148902822, "eval_runtime": 147.0931, "eval_samples_per_second": 291.312, "eval_steps_per_second": 291.312, "step": 460000 }, { "epoch": 42.94782040511528, "grad_norm": 0.6503751873970032, "learning_rate": 7.065369868138035e-07, "loss": 0.0033, "step": 460100 }, { "epoch": 42.95715485858303, "grad_norm": 3.593961477279663, "learning_rate": 7.056017955671936e-07, "loss": 0.0019, "step": 460200 }, { "epoch": 42.96648931205078, "grad_norm": 0.20587489008903503, "learning_rate": 7.046666043205836e-07, "loss": 0.0014, "step": 460300 }, { "epoch": 42.97582376551853, "grad_norm": 0.005914905574172735, "learning_rate": 7.037314130739737e-07, "loss": 0.0022, "step": 460400 }, { "epoch": 42.98515821898628, "grad_norm": 0.2652168869972229, "learning_rate": 7.027962218273637e-07, "loss": 0.0015, "step": 460500 }, { "epoch": 42.99449267245403, "grad_norm": 0.024843653663992882, "learning_rate": 7.018610305807538e-07, "loss": 0.0016, "step": 460600 }, { "epoch": 43.00382712592178, "grad_norm": 0.041743360459804535, "learning_rate": 7.00925839334144e-07, "loss": 0.0019, "step": 460700 }, { "epoch": 43.013161579389525, "grad_norm": 0.007376746274530888, "learning_rate": 6.999906480875339e-07, "loss": 0.0024, "step": 460800 }, { "epoch": 43.022496032857276, "grad_norm": 0.0012749542947858572, "learning_rate": 6.99055456840924e-07, "loss": 0.0019, "step": 460900 }, { "epoch": 43.03183048632503, "grad_norm": 0.4672754406929016, "learning_rate": 6.981202655943141e-07, "loss": 0.0018, "step": 461000 }, { "epoch": 43.04116493979277, "grad_norm": 0.18202871084213257, "learning_rate": 6.971850743477042e-07, "loss": 0.0018, "step": 461100 }, { "epoch": 43.05049939326052, "grad_norm": 0.003005247563123703, "learning_rate": 6.962498831010943e-07, "loss": 0.0025, "step": 461200 }, { "epoch": 43.059833846728274, "grad_norm": 6.13429069519043, "learning_rate": 6.953146918544843e-07, "loss": 0.0033, "step": 461300 }, { "epoch": 43.069168300196026, "grad_norm": 0.11863049119710922, "learning_rate": 6.943795006078743e-07, "loss": 0.002, "step": 461400 }, { "epoch": 43.07850275366377, "grad_norm": 0.4673613905906677, "learning_rate": 6.934443093612644e-07, "loss": 0.0042, "step": 461500 }, { "epoch": 43.08783720713152, "grad_norm": 0.18208153545856476, "learning_rate": 6.925091181146546e-07, "loss": 0.0024, "step": 461600 }, { "epoch": 43.09717166059927, "grad_norm": 0.25869959592819214, "learning_rate": 6.915739268680445e-07, "loss": 0.0015, "step": 461700 }, { "epoch": 43.106506114067024, "grad_norm": 0.003860039636492729, "learning_rate": 6.906387356214346e-07, "loss": 0.002, "step": 461800 }, { "epoch": 43.11584056753477, "grad_norm": 0.01280305813997984, "learning_rate": 6.897035443748247e-07, "loss": 0.0026, "step": 461900 }, { "epoch": 43.12517502100252, "grad_norm": 0.20505301654338837, "learning_rate": 6.887683531282148e-07, "loss": 0.0024, "step": 462000 }, { "epoch": 43.13450947447027, "grad_norm": 0.008197847753763199, "learning_rate": 6.878331618816049e-07, "loss": 0.0026, "step": 462100 }, { "epoch": 43.14384392793802, "grad_norm": 0.045050349086523056, "learning_rate": 6.868979706349949e-07, "loss": 0.0023, "step": 462200 }, { "epoch": 43.153178381405766, "grad_norm": 0.034402452409267426, "learning_rate": 6.85962779388385e-07, "loss": 0.0019, "step": 462300 }, { "epoch": 43.16251283487352, "grad_norm": 3.587714195251465, "learning_rate": 6.85027588141775e-07, "loss": 0.0023, "step": 462400 }, { "epoch": 43.17184728834127, "grad_norm": 0.02526264451444149, "learning_rate": 6.840923968951652e-07, "loss": 0.0022, "step": 462500 }, { "epoch": 43.18118174180902, "grad_norm": 4.1742424964904785, "learning_rate": 6.831572056485551e-07, "loss": 0.0014, "step": 462600 }, { "epoch": 43.190516195276764, "grad_norm": 0.5699930787086487, "learning_rate": 6.822220144019452e-07, "loss": 0.0013, "step": 462700 }, { "epoch": 43.199850648744516, "grad_norm": 0.27295392751693726, "learning_rate": 6.812868231553354e-07, "loss": 0.001, "step": 462800 }, { "epoch": 43.20918510221227, "grad_norm": 0.11070293933153152, "learning_rate": 6.803516319087254e-07, "loss": 0.0018, "step": 462900 }, { "epoch": 43.21851955568002, "grad_norm": 0.019505904987454414, "learning_rate": 6.794164406621155e-07, "loss": 0.0026, "step": 463000 }, { "epoch": 43.22785400914776, "grad_norm": 0.08085977286100388, "learning_rate": 6.784812494155055e-07, "loss": 0.0027, "step": 463100 }, { "epoch": 43.237188462615514, "grad_norm": 2.86643123626709, "learning_rate": 6.775460581688956e-07, "loss": 0.0021, "step": 463200 }, { "epoch": 43.246522916083265, "grad_norm": 3.37627911567688, "learning_rate": 6.766108669222856e-07, "loss": 0.002, "step": 463300 }, { "epoch": 43.255857369551016, "grad_norm": 0.0038270012009888887, "learning_rate": 6.756756756756758e-07, "loss": 0.0022, "step": 463400 }, { "epoch": 43.26519182301876, "grad_norm": 2.9192278385162354, "learning_rate": 6.747404844290659e-07, "loss": 0.0023, "step": 463500 }, { "epoch": 43.27452627648651, "grad_norm": 0.02221398986876011, "learning_rate": 6.738052931824558e-07, "loss": 0.002, "step": 463600 }, { "epoch": 43.28386072995426, "grad_norm": 0.006962216924875975, "learning_rate": 6.72870101935846e-07, "loss": 0.0011, "step": 463700 }, { "epoch": 43.29319518342201, "grad_norm": 0.48698824644088745, "learning_rate": 6.71934910689236e-07, "loss": 0.0015, "step": 463800 }, { "epoch": 43.30252963688976, "grad_norm": 0.033777620643377304, "learning_rate": 6.709997194426261e-07, "loss": 0.0026, "step": 463900 }, { "epoch": 43.31186409035751, "grad_norm": 0.34106215834617615, "learning_rate": 6.70064528196016e-07, "loss": 0.0034, "step": 464000 }, { "epoch": 43.32119854382526, "grad_norm": 0.017551664263010025, "learning_rate": 6.691293369494062e-07, "loss": 0.0014, "step": 464100 }, { "epoch": 43.330532997293005, "grad_norm": 0.8631325960159302, "learning_rate": 6.681941457027963e-07, "loss": 0.0024, "step": 464200 }, { "epoch": 43.33986745076076, "grad_norm": 0.0008166341576725245, "learning_rate": 6.672589544561863e-07, "loss": 0.0022, "step": 464300 }, { "epoch": 43.34920190422851, "grad_norm": 0.07572289556264877, "learning_rate": 6.663237632095765e-07, "loss": 0.0019, "step": 464400 }, { "epoch": 43.35853635769626, "grad_norm": 0.8200817704200745, "learning_rate": 6.653885719629664e-07, "loss": 0.0019, "step": 464500 }, { "epoch": 43.367870811164, "grad_norm": 0.012061499990522861, "learning_rate": 6.644533807163565e-07, "loss": 0.0011, "step": 464600 }, { "epoch": 43.377205264631755, "grad_norm": 1.3732788562774658, "learning_rate": 6.635181894697467e-07, "loss": 0.0017, "step": 464700 }, { "epoch": 43.386539718099506, "grad_norm": 0.0005364975659176707, "learning_rate": 6.625829982231367e-07, "loss": 0.002, "step": 464800 }, { "epoch": 43.39587417156726, "grad_norm": 0.092276930809021, "learning_rate": 6.616478069765267e-07, "loss": 0.0014, "step": 464900 }, { "epoch": 43.405208625035, "grad_norm": 1.0158138275146484, "learning_rate": 6.607126157299168e-07, "loss": 0.0024, "step": 465000 }, { "epoch": 43.405208625035, "eval_accuracy": 0.6979463243873979, "eval_f1": 0.8260816468946267, "eval_loss": 0.36307254433631897, "eval_roc_auc": 0.9114925597970285, "eval_runtime": 146.6687, "eval_samples_per_second": 292.155, "eval_steps_per_second": 292.155, "step": 465000 }, { "epoch": 43.41454307850275, "grad_norm": 0.015089000575244427, "learning_rate": 6.597774244833069e-07, "loss": 0.0015, "step": 465100 }, { "epoch": 43.423877531970504, "grad_norm": 2.659353733062744, "learning_rate": 6.588422332366969e-07, "loss": 0.0008, "step": 465200 }, { "epoch": 43.433211985438255, "grad_norm": 0.0047720023430883884, "learning_rate": 6.579070419900871e-07, "loss": 0.0026, "step": 465300 }, { "epoch": 43.442546438906, "grad_norm": 0.015175740234553814, "learning_rate": 6.56971850743477e-07, "loss": 0.0025, "step": 465400 }, { "epoch": 43.45188089237375, "grad_norm": 0.03337784484028816, "learning_rate": 6.560366594968671e-07, "loss": 0.0012, "step": 465500 }, { "epoch": 43.4612153458415, "grad_norm": 0.010081068612635136, "learning_rate": 6.551014682502573e-07, "loss": 0.002, "step": 465600 }, { "epoch": 43.470549799309254, "grad_norm": 0.02896086685359478, "learning_rate": 6.541662770036473e-07, "loss": 0.003, "step": 465700 }, { "epoch": 43.479884252777, "grad_norm": 0.0011394465109333396, "learning_rate": 6.532310857570373e-07, "loss": 0.002, "step": 465800 }, { "epoch": 43.48921870624475, "grad_norm": 0.726508378982544, "learning_rate": 6.522958945104274e-07, "loss": 0.0021, "step": 465900 }, { "epoch": 43.4985531597125, "grad_norm": 1.0584111213684082, "learning_rate": 6.513607032638175e-07, "loss": 0.0016, "step": 466000 }, { "epoch": 43.50788761318025, "grad_norm": 0.2275928407907486, "learning_rate": 6.504255120172076e-07, "loss": 0.0027, "step": 466100 }, { "epoch": 43.517222066647996, "grad_norm": 2.178596258163452, "learning_rate": 6.494903207705977e-07, "loss": 0.001, "step": 466200 }, { "epoch": 43.52655652011575, "grad_norm": 0.2428123652935028, "learning_rate": 6.485551295239877e-07, "loss": 0.0026, "step": 466300 }, { "epoch": 43.5358909735835, "grad_norm": 0.025415636599063873, "learning_rate": 6.476199382773777e-07, "loss": 0.0014, "step": 466400 }, { "epoch": 43.54522542705124, "grad_norm": 0.03879796341061592, "learning_rate": 6.466847470307679e-07, "loss": 0.0021, "step": 466500 }, { "epoch": 43.554559880518994, "grad_norm": 0.3401234745979309, "learning_rate": 6.45749555784158e-07, "loss": 0.0012, "step": 466600 }, { "epoch": 43.563894333986745, "grad_norm": 10.391097068786621, "learning_rate": 6.448143645375479e-07, "loss": 0.0025, "step": 466700 }, { "epoch": 43.5732287874545, "grad_norm": 0.0031508111860603094, "learning_rate": 6.438791732909381e-07, "loss": 0.0024, "step": 466800 }, { "epoch": 43.58256324092224, "grad_norm": 0.3296085596084595, "learning_rate": 6.429439820443281e-07, "loss": 0.0011, "step": 466900 }, { "epoch": 43.59189769438999, "grad_norm": 0.008533659391105175, "learning_rate": 6.420087907977182e-07, "loss": 0.0019, "step": 467000 }, { "epoch": 43.60123214785774, "grad_norm": 0.5096091032028198, "learning_rate": 6.410735995511083e-07, "loss": 0.0019, "step": 467100 }, { "epoch": 43.610566601325495, "grad_norm": 0.01287063118070364, "learning_rate": 6.401384083044983e-07, "loss": 0.0026, "step": 467200 }, { "epoch": 43.61990105479324, "grad_norm": 0.007648816332221031, "learning_rate": 6.392032170578883e-07, "loss": 0.0012, "step": 467300 }, { "epoch": 43.62923550826099, "grad_norm": 0.08811986446380615, "learning_rate": 6.382680258112785e-07, "loss": 0.001, "step": 467400 }, { "epoch": 43.63856996172874, "grad_norm": 0.005755885504186153, "learning_rate": 6.373328345646686e-07, "loss": 0.0026, "step": 467500 }, { "epoch": 43.64790441519649, "grad_norm": 4.020175457000732, "learning_rate": 6.363976433180585e-07, "loss": 0.0023, "step": 467600 }, { "epoch": 43.65723886866424, "grad_norm": 0.07542301714420319, "learning_rate": 6.354624520714487e-07, "loss": 0.0032, "step": 467700 }, { "epoch": 43.66657332213199, "grad_norm": 0.005432680249214172, "learning_rate": 6.345272608248387e-07, "loss": 0.0019, "step": 467800 }, { "epoch": 43.67590777559974, "grad_norm": 0.16420459747314453, "learning_rate": 6.335920695782288e-07, "loss": 0.0022, "step": 467900 }, { "epoch": 43.68524222906749, "grad_norm": 3.4493417739868164, "learning_rate": 6.32656878331619e-07, "loss": 0.002, "step": 468000 }, { "epoch": 43.694576682535235, "grad_norm": 0.32160064578056335, "learning_rate": 6.317216870850089e-07, "loss": 0.0023, "step": 468100 }, { "epoch": 43.703911136002986, "grad_norm": 0.2281501591205597, "learning_rate": 6.30786495838399e-07, "loss": 0.0019, "step": 468200 }, { "epoch": 43.71324558947074, "grad_norm": 0.3061775863170624, "learning_rate": 6.298513045917891e-07, "loss": 0.0016, "step": 468300 }, { "epoch": 43.72258004293849, "grad_norm": 4.448955535888672, "learning_rate": 6.289161133451792e-07, "loss": 0.0022, "step": 468400 }, { "epoch": 43.73191449640623, "grad_norm": 0.27373915910720825, "learning_rate": 6.279809220985691e-07, "loss": 0.0012, "step": 468500 }, { "epoch": 43.741248949873984, "grad_norm": 0.017164135351777077, "learning_rate": 6.270457308519593e-07, "loss": 0.0021, "step": 468600 }, { "epoch": 43.750583403341736, "grad_norm": 0.008824924007058144, "learning_rate": 6.261105396053494e-07, "loss": 0.0014, "step": 468700 }, { "epoch": 43.75991785680949, "grad_norm": 0.39909547567367554, "learning_rate": 6.251753483587394e-07, "loss": 0.0015, "step": 468800 }, { "epoch": 43.76925231027723, "grad_norm": 0.10505799949169159, "learning_rate": 6.242401571121295e-07, "loss": 0.0019, "step": 468900 }, { "epoch": 43.77858676374498, "grad_norm": 0.32562246918678284, "learning_rate": 6.233049658655196e-07, "loss": 0.0022, "step": 469000 }, { "epoch": 43.787921217212734, "grad_norm": 0.07520440965890884, "learning_rate": 6.223697746189096e-07, "loss": 0.0019, "step": 469100 }, { "epoch": 43.797255670680485, "grad_norm": 1.51053786277771, "learning_rate": 6.214345833722996e-07, "loss": 0.0012, "step": 469200 }, { "epoch": 43.80659012414823, "grad_norm": 0.01024672668427229, "learning_rate": 6.204993921256897e-07, "loss": 0.0024, "step": 469300 }, { "epoch": 43.81592457761598, "grad_norm": 0.00048157587298192084, "learning_rate": 6.195642008790798e-07, "loss": 0.0019, "step": 469400 }, { "epoch": 43.82525903108373, "grad_norm": 0.004205954261124134, "learning_rate": 6.186290096324698e-07, "loss": 0.0012, "step": 469500 }, { "epoch": 43.834593484551476, "grad_norm": 0.006765980273485184, "learning_rate": 6.1769381838586e-07, "loss": 0.0023, "step": 469600 }, { "epoch": 43.84392793801923, "grad_norm": 0.018164923414587975, "learning_rate": 6.1675862713925e-07, "loss": 0.0018, "step": 469700 }, { "epoch": 43.85326239148698, "grad_norm": 6.049109935760498, "learning_rate": 6.1582343589264e-07, "loss": 0.0012, "step": 469800 }, { "epoch": 43.86259684495473, "grad_norm": 0.010395179502665997, "learning_rate": 6.148882446460302e-07, "loss": 0.0029, "step": 469900 }, { "epoch": 43.871931298422474, "grad_norm": 0.033838048577308655, "learning_rate": 6.139530533994202e-07, "loss": 0.002, "step": 470000 }, { "epoch": 43.871931298422474, "eval_accuracy": 0.6967561260210035, "eval_f1": 0.8266388816532083, "eval_loss": 0.365881085395813, "eval_roc_auc": 0.9134356657926739, "eval_runtime": 146.9801, "eval_samples_per_second": 291.536, "eval_steps_per_second": 291.536, "step": 470000 }, { "epoch": 43.881265751890226, "grad_norm": 0.14941610395908356, "learning_rate": 6.130178621528103e-07, "loss": 0.0022, "step": 470100 }, { "epoch": 43.89060020535798, "grad_norm": 0.0011656712740659714, "learning_rate": 6.120826709062004e-07, "loss": 0.0021, "step": 470200 }, { "epoch": 43.89993465882573, "grad_norm": 0.029591074213385582, "learning_rate": 6.111474796595904e-07, "loss": 0.0019, "step": 470300 }, { "epoch": 43.90926911229347, "grad_norm": 0.03941216319799423, "learning_rate": 6.102122884129805e-07, "loss": 0.0017, "step": 470400 }, { "epoch": 43.918603565761224, "grad_norm": 0.10787713527679443, "learning_rate": 6.092770971663706e-07, "loss": 0.0011, "step": 470500 }, { "epoch": 43.927938019228975, "grad_norm": 0.44331228733062744, "learning_rate": 6.083419059197607e-07, "loss": 0.0022, "step": 470600 }, { "epoch": 43.937272472696726, "grad_norm": 0.04901808872818947, "learning_rate": 6.074067146731507e-07, "loss": 0.0018, "step": 470700 }, { "epoch": 43.94660692616447, "grad_norm": 0.295360803604126, "learning_rate": 6.064715234265408e-07, "loss": 0.0021, "step": 470800 }, { "epoch": 43.95594137963222, "grad_norm": 0.1651511788368225, "learning_rate": 6.055363321799308e-07, "loss": 0.0015, "step": 470900 }, { "epoch": 43.96527583309997, "grad_norm": 0.057638540863990784, "learning_rate": 6.046011409333209e-07, "loss": 0.002, "step": 471000 }, { "epoch": 43.974610286567724, "grad_norm": 0.019031411036849022, "learning_rate": 6.03665949686711e-07, "loss": 0.0019, "step": 471100 }, { "epoch": 43.98394474003547, "grad_norm": 0.23689042031764984, "learning_rate": 6.02730758440101e-07, "loss": 0.0014, "step": 471200 }, { "epoch": 43.99327919350322, "grad_norm": 0.003803363535553217, "learning_rate": 6.017955671934911e-07, "loss": 0.0022, "step": 471300 }, { "epoch": 44.00261364697097, "grad_norm": 0.018022878095507622, "learning_rate": 6.008603759468812e-07, "loss": 0.0016, "step": 471400 }, { "epoch": 44.01194810043872, "grad_norm": 0.015486695803701878, "learning_rate": 5.999251847002713e-07, "loss": 0.002, "step": 471500 }, { "epoch": 44.02128255390647, "grad_norm": 1.7895442247390747, "learning_rate": 5.989899934536613e-07, "loss": 0.002, "step": 471600 }, { "epoch": 44.03061700737422, "grad_norm": 0.2472539097070694, "learning_rate": 5.980548022070514e-07, "loss": 0.0013, "step": 471700 }, { "epoch": 44.03995146084197, "grad_norm": 0.016685403883457184, "learning_rate": 5.971196109604414e-07, "loss": 0.0016, "step": 471800 }, { "epoch": 44.04928591430972, "grad_norm": 0.0036439034156501293, "learning_rate": 5.961844197138315e-07, "loss": 0.0016, "step": 471900 }, { "epoch": 44.058620367777465, "grad_norm": 0.24933522939682007, "learning_rate": 5.952492284672216e-07, "loss": 0.0008, "step": 472000 }, { "epoch": 44.067954821245216, "grad_norm": 0.25924474000930786, "learning_rate": 5.943140372206117e-07, "loss": 0.0023, "step": 472100 }, { "epoch": 44.07728927471297, "grad_norm": 0.006885192822664976, "learning_rate": 5.933788459740017e-07, "loss": 0.0014, "step": 472200 }, { "epoch": 44.08662372818071, "grad_norm": 0.0013710195198655128, "learning_rate": 5.924436547273918e-07, "loss": 0.002, "step": 472300 }, { "epoch": 44.09595818164846, "grad_norm": 0.05167869105935097, "learning_rate": 5.915084634807819e-07, "loss": 0.0008, "step": 472400 }, { "epoch": 44.105292635116214, "grad_norm": 3.6033542156219482, "learning_rate": 5.905732722341719e-07, "loss": 0.0011, "step": 472500 }, { "epoch": 44.114627088583966, "grad_norm": 0.0009780293330550194, "learning_rate": 5.896380809875621e-07, "loss": 0.0017, "step": 472600 }, { "epoch": 44.12396154205171, "grad_norm": 0.07750631868839264, "learning_rate": 5.887028897409521e-07, "loss": 0.0018, "step": 472700 }, { "epoch": 44.13329599551946, "grad_norm": 0.23749582469463348, "learning_rate": 5.877676984943421e-07, "loss": 0.0018, "step": 472800 }, { "epoch": 44.14263044898721, "grad_norm": 3.700747013092041, "learning_rate": 5.868325072477322e-07, "loss": 0.0019, "step": 472900 }, { "epoch": 44.151964902454964, "grad_norm": 0.009358738549053669, "learning_rate": 5.858973160011223e-07, "loss": 0.0013, "step": 473000 }, { "epoch": 44.16129935592271, "grad_norm": 0.7869122624397278, "learning_rate": 5.849621247545123e-07, "loss": 0.0015, "step": 473100 }, { "epoch": 44.17063380939046, "grad_norm": 7.106084823608398, "learning_rate": 5.840269335079024e-07, "loss": 0.0027, "step": 473200 }, { "epoch": 44.17996826285821, "grad_norm": 0.009602286852896214, "learning_rate": 5.830917422612925e-07, "loss": 0.0012, "step": 473300 }, { "epoch": 44.18930271632596, "grad_norm": 0.1099320575594902, "learning_rate": 5.821565510146825e-07, "loss": 0.0014, "step": 473400 }, { "epoch": 44.198637169793706, "grad_norm": 0.016748493537306786, "learning_rate": 5.812213597680727e-07, "loss": 0.002, "step": 473500 }, { "epoch": 44.20797162326146, "grad_norm": 0.4460200369358063, "learning_rate": 5.802861685214627e-07, "loss": 0.0022, "step": 473600 }, { "epoch": 44.21730607672921, "grad_norm": 0.2658277750015259, "learning_rate": 5.793509772748527e-07, "loss": 0.002, "step": 473700 }, { "epoch": 44.22664053019696, "grad_norm": 0.43189531564712524, "learning_rate": 5.784157860282428e-07, "loss": 0.0014, "step": 473800 }, { "epoch": 44.235974983664704, "grad_norm": 0.008196348324418068, "learning_rate": 5.774805947816329e-07, "loss": 0.0023, "step": 473900 }, { "epoch": 44.245309437132455, "grad_norm": 0.013586879707872868, "learning_rate": 5.76545403535023e-07, "loss": 0.0019, "step": 474000 }, { "epoch": 44.25464389060021, "grad_norm": 0.12035326659679413, "learning_rate": 5.756102122884131e-07, "loss": 0.0016, "step": 474100 }, { "epoch": 44.26397834406796, "grad_norm": 6.340120315551758, "learning_rate": 5.746750210418031e-07, "loss": 0.0024, "step": 474200 }, { "epoch": 44.2733127975357, "grad_norm": 0.47091659903526306, "learning_rate": 5.737398297951931e-07, "loss": 0.001, "step": 474300 }, { "epoch": 44.28264725100345, "grad_norm": 0.09193124622106552, "learning_rate": 5.728046385485833e-07, "loss": 0.0012, "step": 474400 }, { "epoch": 44.291981704471205, "grad_norm": 6.6809844970703125, "learning_rate": 5.718694473019733e-07, "loss": 0.0015, "step": 474500 }, { "epoch": 44.301316157938956, "grad_norm": 2.4450175762176514, "learning_rate": 5.709342560553634e-07, "loss": 0.0013, "step": 474600 }, { "epoch": 44.3106506114067, "grad_norm": 0.03823103383183479, "learning_rate": 5.699990648087535e-07, "loss": 0.0015, "step": 474700 }, { "epoch": 44.31998506487445, "grad_norm": 0.09149434417486191, "learning_rate": 5.690638735621435e-07, "loss": 0.0006, "step": 474800 }, { "epoch": 44.3293195183422, "grad_norm": 0.04942906275391579, "learning_rate": 5.681286823155336e-07, "loss": 0.0016, "step": 474900 }, { "epoch": 44.33865397180995, "grad_norm": 0.014869040809571743, "learning_rate": 5.671934910689236e-07, "loss": 0.0021, "step": 475000 }, { "epoch": 44.33865397180995, "eval_accuracy": 0.7006767794632439, "eval_f1": 0.8276272804505286, "eval_loss": 0.36441120505332947, "eval_roc_auc": 0.9109630036574402, "eval_runtime": 146.4221, "eval_samples_per_second": 292.647, "eval_steps_per_second": 292.647, "step": 475000 }, { "epoch": 44.3479884252777, "grad_norm": 0.9703243970870972, "learning_rate": 5.662582998223137e-07, "loss": 0.0019, "step": 475100 }, { "epoch": 44.35732287874545, "grad_norm": 0.05925079807639122, "learning_rate": 5.653231085757037e-07, "loss": 0.0014, "step": 475200 }, { "epoch": 44.3666573322132, "grad_norm": 0.06320837140083313, "learning_rate": 5.643879173290938e-07, "loss": 0.0012, "step": 475300 }, { "epoch": 44.375991785680945, "grad_norm": 0.05149206891655922, "learning_rate": 5.634527260824839e-07, "loss": 0.0033, "step": 475400 }, { "epoch": 44.3853262391487, "grad_norm": 0.004136098548769951, "learning_rate": 5.62517534835874e-07, "loss": 0.0016, "step": 475500 }, { "epoch": 44.39466069261645, "grad_norm": 2.670487642288208, "learning_rate": 5.61582343589264e-07, "loss": 0.002, "step": 475600 }, { "epoch": 44.4039951460842, "grad_norm": 0.10389243066310883, "learning_rate": 5.606471523426541e-07, "loss": 0.002, "step": 475700 }, { "epoch": 44.41332959955194, "grad_norm": 0.17267347872257233, "learning_rate": 5.597119610960442e-07, "loss": 0.0021, "step": 475800 }, { "epoch": 44.422664053019695, "grad_norm": 1.4256857633590698, "learning_rate": 5.587767698494342e-07, "loss": 0.0016, "step": 475900 }, { "epoch": 44.431998506487446, "grad_norm": 0.03725295886397362, "learning_rate": 5.578415786028244e-07, "loss": 0.0018, "step": 476000 }, { "epoch": 44.4413329599552, "grad_norm": 1.0815562009811401, "learning_rate": 5.569063873562144e-07, "loss": 0.0015, "step": 476100 }, { "epoch": 44.45066741342294, "grad_norm": 0.01096101850271225, "learning_rate": 5.559711961096044e-07, "loss": 0.0013, "step": 476200 }, { "epoch": 44.46000186689069, "grad_norm": 0.001543583464808762, "learning_rate": 5.550360048629945e-07, "loss": 0.0022, "step": 476300 }, { "epoch": 44.469336320358444, "grad_norm": 0.1823229044675827, "learning_rate": 5.541008136163846e-07, "loss": 0.0021, "step": 476400 }, { "epoch": 44.478670773826195, "grad_norm": 0.8410739898681641, "learning_rate": 5.531656223697747e-07, "loss": 0.0018, "step": 476500 }, { "epoch": 44.48800522729394, "grad_norm": 0.845122218132019, "learning_rate": 5.522304311231648e-07, "loss": 0.0024, "step": 476600 }, { "epoch": 44.49733968076169, "grad_norm": 0.09412667155265808, "learning_rate": 5.512952398765548e-07, "loss": 0.0021, "step": 476700 }, { "epoch": 44.50667413422944, "grad_norm": 0.009533349424600601, "learning_rate": 5.503600486299448e-07, "loss": 0.0014, "step": 476800 }, { "epoch": 44.51600858769719, "grad_norm": 0.14293985068798065, "learning_rate": 5.49424857383335e-07, "loss": 0.0013, "step": 476900 }, { "epoch": 44.52534304116494, "grad_norm": 0.005516583099961281, "learning_rate": 5.48489666136725e-07, "loss": 0.0019, "step": 477000 }, { "epoch": 44.53467749463269, "grad_norm": 1.7165206670761108, "learning_rate": 5.47554474890115e-07, "loss": 0.0025, "step": 477100 }, { "epoch": 44.54401194810044, "grad_norm": 0.014697851613163948, "learning_rate": 5.466192836435051e-07, "loss": 0.0014, "step": 477200 }, { "epoch": 44.55334640156819, "grad_norm": 0.1970815509557724, "learning_rate": 5.456840923968952e-07, "loss": 0.0013, "step": 477300 }, { "epoch": 44.562680855035936, "grad_norm": 0.8345784544944763, "learning_rate": 5.447489011502853e-07, "loss": 0.0022, "step": 477400 }, { "epoch": 44.57201530850369, "grad_norm": 0.009876743890345097, "learning_rate": 5.438137099036754e-07, "loss": 0.0019, "step": 477500 }, { "epoch": 44.58134976197144, "grad_norm": 0.25346142053604126, "learning_rate": 5.428785186570654e-07, "loss": 0.0026, "step": 477600 }, { "epoch": 44.59068421543918, "grad_norm": 0.009776011109352112, "learning_rate": 5.419433274104554e-07, "loss": 0.0016, "step": 477700 }, { "epoch": 44.600018668906934, "grad_norm": 0.00572167569771409, "learning_rate": 5.410081361638456e-07, "loss": 0.0015, "step": 477800 }, { "epoch": 44.609353122374685, "grad_norm": 0.8362804055213928, "learning_rate": 5.400729449172356e-07, "loss": 0.0018, "step": 477900 }, { "epoch": 44.618687575842436, "grad_norm": 0.004805571399629116, "learning_rate": 5.391377536706257e-07, "loss": 0.0011, "step": 478000 }, { "epoch": 44.62802202931018, "grad_norm": 6.822221755981445, "learning_rate": 5.382025624240158e-07, "loss": 0.0026, "step": 478100 }, { "epoch": 44.63735648277793, "grad_norm": 0.0026235217228531837, "learning_rate": 5.372673711774058e-07, "loss": 0.002, "step": 478200 }, { "epoch": 44.64669093624568, "grad_norm": 0.16554608941078186, "learning_rate": 5.363321799307959e-07, "loss": 0.0017, "step": 478300 }, { "epoch": 44.656025389713434, "grad_norm": 5.788506507873535, "learning_rate": 5.35396988684186e-07, "loss": 0.003, "step": 478400 }, { "epoch": 44.66535984318118, "grad_norm": 0.010620285756886005, "learning_rate": 5.344617974375761e-07, "loss": 0.0018, "step": 478500 }, { "epoch": 44.67469429664893, "grad_norm": 0.06404261291027069, "learning_rate": 5.335266061909661e-07, "loss": 0.0013, "step": 478600 }, { "epoch": 44.68402875011668, "grad_norm": 2.98460054397583, "learning_rate": 5.325914149443562e-07, "loss": 0.001, "step": 478700 }, { "epoch": 44.69336320358443, "grad_norm": 0.4428948760032654, "learning_rate": 5.316562236977462e-07, "loss": 0.0013, "step": 478800 }, { "epoch": 44.70269765705218, "grad_norm": 0.2347501516342163, "learning_rate": 5.307210324511363e-07, "loss": 0.0029, "step": 478900 }, { "epoch": 44.71203211051993, "grad_norm": 0.12698954343795776, "learning_rate": 5.297858412045264e-07, "loss": 0.0023, "step": 479000 }, { "epoch": 44.72136656398768, "grad_norm": 0.10485626757144928, "learning_rate": 5.288506499579164e-07, "loss": 0.0016, "step": 479100 }, { "epoch": 44.73070101745543, "grad_norm": 0.07311763614416122, "learning_rate": 5.279154587113065e-07, "loss": 0.0021, "step": 479200 }, { "epoch": 44.740035470923175, "grad_norm": 0.13527576625347137, "learning_rate": 5.269802674646966e-07, "loss": 0.0017, "step": 479300 }, { "epoch": 44.749369924390926, "grad_norm": 0.008331133052706718, "learning_rate": 5.260450762180867e-07, "loss": 0.0017, "step": 479400 }, { "epoch": 44.75870437785868, "grad_norm": 0.014004736207425594, "learning_rate": 5.251098849714767e-07, "loss": 0.0014, "step": 479500 }, { "epoch": 44.76803883132643, "grad_norm": 0.021578429266810417, "learning_rate": 5.241746937248668e-07, "loss": 0.0012, "step": 479600 }, { "epoch": 44.77737328479417, "grad_norm": 2.161295175552368, "learning_rate": 5.232395024782568e-07, "loss": 0.0019, "step": 479700 }, { "epoch": 44.786707738261924, "grad_norm": 0.08008108288049698, "learning_rate": 5.223043112316469e-07, "loss": 0.0025, "step": 479800 }, { "epoch": 44.796042191729676, "grad_norm": 0.4822738766670227, "learning_rate": 5.213691199850371e-07, "loss": 0.001, "step": 479900 }, { "epoch": 44.80537664519743, "grad_norm": 0.5409873723983765, "learning_rate": 5.204339287384271e-07, "loss": 0.0015, "step": 480000 }, { "epoch": 44.80537664519743, "eval_accuracy": 0.7002333722287047, "eval_f1": 0.8273612207313865, "eval_loss": 0.36339622735977173, "eval_roc_auc": 0.9103599502930392, "eval_runtime": 146.5822, "eval_samples_per_second": 292.327, "eval_steps_per_second": 292.327, "step": 480000 }, { "epoch": 44.81471109866517, "grad_norm": 0.5730020403862, "learning_rate": 5.194987374918171e-07, "loss": 0.0019, "step": 480100 }, { "epoch": 44.82404555213292, "grad_norm": 0.01687265932559967, "learning_rate": 5.185635462452071e-07, "loss": 0.0024, "step": 480200 }, { "epoch": 44.833380005600674, "grad_norm": 0.10965230315923691, "learning_rate": 5.176283549985973e-07, "loss": 0.0012, "step": 480300 }, { "epoch": 44.842714459068425, "grad_norm": 4.734686374664307, "learning_rate": 5.166931637519873e-07, "loss": 0.0018, "step": 480400 }, { "epoch": 44.85204891253617, "grad_norm": 0.08249721676111221, "learning_rate": 5.157579725053775e-07, "loss": 0.0025, "step": 480500 }, { "epoch": 44.86138336600392, "grad_norm": 0.03714266046881676, "learning_rate": 5.148227812587675e-07, "loss": 0.0027, "step": 480600 }, { "epoch": 44.87071781947167, "grad_norm": 0.008435795083642006, "learning_rate": 5.138875900121575e-07, "loss": 0.0016, "step": 480700 }, { "epoch": 44.880052272939416, "grad_norm": 0.05796653404831886, "learning_rate": 5.129523987655477e-07, "loss": 0.0019, "step": 480800 }, { "epoch": 44.88938672640717, "grad_norm": 0.004902693908661604, "learning_rate": 5.120172075189377e-07, "loss": 0.0013, "step": 480900 }, { "epoch": 44.89872117987492, "grad_norm": 1.2159204483032227, "learning_rate": 5.110820162723277e-07, "loss": 0.0012, "step": 481000 }, { "epoch": 44.90805563334267, "grad_norm": 0.01377931423485279, "learning_rate": 5.101468250257177e-07, "loss": 0.0023, "step": 481100 }, { "epoch": 44.917390086810414, "grad_norm": 0.006139456294476986, "learning_rate": 5.092116337791079e-07, "loss": 0.0025, "step": 481200 }, { "epoch": 44.926724540278165, "grad_norm": 0.5359872579574585, "learning_rate": 5.082764425324979e-07, "loss": 0.0017, "step": 481300 }, { "epoch": 44.93605899374592, "grad_norm": 4.9172234535217285, "learning_rate": 5.07341251285888e-07, "loss": 0.0016, "step": 481400 }, { "epoch": 44.94539344721367, "grad_norm": 1.026479721069336, "learning_rate": 5.064060600392781e-07, "loss": 0.0015, "step": 481500 }, { "epoch": 44.95472790068141, "grad_norm": 0.12388958781957626, "learning_rate": 5.054708687926681e-07, "loss": 0.0019, "step": 481600 }, { "epoch": 44.96406235414916, "grad_norm": 0.0678764283657074, "learning_rate": 5.045356775460582e-07, "loss": 0.001, "step": 481700 }, { "epoch": 44.973396807616915, "grad_norm": 0.0074876598082482815, "learning_rate": 5.036004862994483e-07, "loss": 0.0011, "step": 481800 }, { "epoch": 44.982731261084666, "grad_norm": 0.7846354842185974, "learning_rate": 5.026652950528384e-07, "loss": 0.0034, "step": 481900 }, { "epoch": 44.99206571455241, "grad_norm": 0.03648332133889198, "learning_rate": 5.017301038062284e-07, "loss": 0.001, "step": 482000 }, { "epoch": 45.00140016802016, "grad_norm": 2.6781740188598633, "learning_rate": 5.007949125596185e-07, "loss": 0.0015, "step": 482100 }, { "epoch": 45.01073462148791, "grad_norm": 0.006997373420745134, "learning_rate": 4.998597213130085e-07, "loss": 0.0028, "step": 482200 }, { "epoch": 45.020069074955664, "grad_norm": 0.0025211418978869915, "learning_rate": 4.989245300663986e-07, "loss": 0.0011, "step": 482300 }, { "epoch": 45.02940352842341, "grad_norm": 0.003946974407881498, "learning_rate": 4.979893388197887e-07, "loss": 0.0017, "step": 482400 }, { "epoch": 45.03873798189116, "grad_norm": 0.09412775933742523, "learning_rate": 4.970541475731788e-07, "loss": 0.0017, "step": 482500 }, { "epoch": 45.04807243535891, "grad_norm": 0.3333100378513336, "learning_rate": 4.961189563265688e-07, "loss": 0.0014, "step": 482600 }, { "epoch": 45.05740688882666, "grad_norm": 0.1764400154352188, "learning_rate": 4.951837650799589e-07, "loss": 0.0013, "step": 482700 }, { "epoch": 45.06674134229441, "grad_norm": 0.8563956618309021, "learning_rate": 4.94248573833349e-07, "loss": 0.0011, "step": 482800 }, { "epoch": 45.07607579576216, "grad_norm": 0.1981395035982132, "learning_rate": 4.93313382586739e-07, "loss": 0.0016, "step": 482900 }, { "epoch": 45.08541024922991, "grad_norm": 0.014332089573144913, "learning_rate": 4.923781913401291e-07, "loss": 0.0014, "step": 483000 }, { "epoch": 45.09474470269766, "grad_norm": 0.002373770345002413, "learning_rate": 4.914430000935191e-07, "loss": 0.0009, "step": 483100 }, { "epoch": 45.104079156165405, "grad_norm": 3.976956844329834, "learning_rate": 4.905078088469092e-07, "loss": 0.0026, "step": 483200 }, { "epoch": 45.113413609633156, "grad_norm": 7.827044486999512, "learning_rate": 4.895726176002993e-07, "loss": 0.0016, "step": 483300 }, { "epoch": 45.12274806310091, "grad_norm": 0.002142038196325302, "learning_rate": 4.886374263536894e-07, "loss": 0.0019, "step": 483400 }, { "epoch": 45.13208251656865, "grad_norm": 0.0020537839736789465, "learning_rate": 4.877022351070794e-07, "loss": 0.0013, "step": 483500 }, { "epoch": 45.1414169700364, "grad_norm": 0.004384816624224186, "learning_rate": 4.867670438604695e-07, "loss": 0.0018, "step": 483600 }, { "epoch": 45.150751423504154, "grad_norm": 0.5403677821159363, "learning_rate": 4.858318526138596e-07, "loss": 0.0012, "step": 483700 }, { "epoch": 45.160085876971905, "grad_norm": 0.21106955409049988, "learning_rate": 4.848966613672496e-07, "loss": 0.0014, "step": 483800 }, { "epoch": 45.16942033043965, "grad_norm": 0.07497818022966385, "learning_rate": 4.839614701206398e-07, "loss": 0.0011, "step": 483900 }, { "epoch": 45.1787547839074, "grad_norm": 0.017731746658682823, "learning_rate": 4.830262788740298e-07, "loss": 0.0014, "step": 484000 }, { "epoch": 45.18808923737515, "grad_norm": 0.05305920168757439, "learning_rate": 4.820910876274198e-07, "loss": 0.0018, "step": 484100 }, { "epoch": 45.1974236908429, "grad_norm": 0.43518564105033875, "learning_rate": 4.811558963808099e-07, "loss": 0.0009, "step": 484200 }, { "epoch": 45.20675814431065, "grad_norm": 0.007532649673521519, "learning_rate": 4.802207051342e-07, "loss": 0.0016, "step": 484300 }, { "epoch": 45.2160925977784, "grad_norm": 0.010734695009887218, "learning_rate": 4.792855138875901e-07, "loss": 0.0013, "step": 484400 }, { "epoch": 45.22542705124615, "grad_norm": 1.1563800573349, "learning_rate": 4.783503226409802e-07, "loss": 0.0016, "step": 484500 }, { "epoch": 45.2347615047139, "grad_norm": 0.015855317935347557, "learning_rate": 4.774151313943702e-07, "loss": 0.0016, "step": 484600 }, { "epoch": 45.244095958181646, "grad_norm": 0.0237994696944952, "learning_rate": 4.764799401477602e-07, "loss": 0.0019, "step": 484700 }, { "epoch": 45.2534304116494, "grad_norm": 0.05861466005444527, "learning_rate": 4.7554474890115036e-07, "loss": 0.0027, "step": 484800 }, { "epoch": 45.26276486511715, "grad_norm": 6.896127700805664, "learning_rate": 4.746095576545404e-07, "loss": 0.0029, "step": 484900 }, { "epoch": 45.2720993185849, "grad_norm": 0.2263270765542984, "learning_rate": 4.7367436640793047e-07, "loss": 0.0012, "step": 485000 }, { "epoch": 45.2720993185849, "eval_accuracy": 0.7022403733955659, "eval_f1": 0.8283262933267763, "eval_loss": 0.36341074109077454, "eval_roc_auc": 0.9104307650057732, "eval_runtime": 146.7796, "eval_samples_per_second": 291.934, "eval_steps_per_second": 291.934, "step": 485000 }, { "epoch": 45.281433772052644, "grad_norm": 0.17373479902744293, "learning_rate": 4.7273917516132056e-07, "loss": 0.0027, "step": 485100 }, { "epoch": 45.290768225520395, "grad_norm": 0.00239551835693419, "learning_rate": 4.718039839147106e-07, "loss": 0.0016, "step": 485200 }, { "epoch": 45.30010267898815, "grad_norm": 0.014856339432299137, "learning_rate": 4.7086879266810067e-07, "loss": 0.0022, "step": 485300 }, { "epoch": 45.3094371324559, "grad_norm": 0.03160097822546959, "learning_rate": 4.699336014214907e-07, "loss": 0.001, "step": 485400 }, { "epoch": 45.31877158592364, "grad_norm": 0.03228652477264404, "learning_rate": 4.6899841017488084e-07, "loss": 0.0024, "step": 485500 }, { "epoch": 45.32810603939139, "grad_norm": 0.05191531032323837, "learning_rate": 4.6806321892827087e-07, "loss": 0.0006, "step": 485600 }, { "epoch": 45.337440492859145, "grad_norm": 1.4276657104492188, "learning_rate": 4.6712802768166095e-07, "loss": 0.0018, "step": 485700 }, { "epoch": 45.346774946326896, "grad_norm": 0.0035186554305255413, "learning_rate": 4.66192836435051e-07, "loss": 0.0018, "step": 485800 }, { "epoch": 45.35610939979464, "grad_norm": 0.06835384666919708, "learning_rate": 4.6525764518844107e-07, "loss": 0.0018, "step": 485900 }, { "epoch": 45.36544385326239, "grad_norm": 0.01049178559333086, "learning_rate": 4.6432245394183115e-07, "loss": 0.0016, "step": 486000 }, { "epoch": 45.37477830673014, "grad_norm": 0.11493603885173798, "learning_rate": 4.633872626952212e-07, "loss": 0.0012, "step": 486100 }, { "epoch": 45.384112760197894, "grad_norm": 0.025305625051259995, "learning_rate": 4.624520714486113e-07, "loss": 0.0022, "step": 486200 }, { "epoch": 45.39344721366564, "grad_norm": 0.00683426670730114, "learning_rate": 4.6151688020200135e-07, "loss": 0.0019, "step": 486300 }, { "epoch": 45.40278166713339, "grad_norm": 0.010299654677510262, "learning_rate": 4.6058168895539143e-07, "loss": 0.0012, "step": 486400 }, { "epoch": 45.41211612060114, "grad_norm": 0.0815727636218071, "learning_rate": 4.5964649770878146e-07, "loss": 0.0016, "step": 486500 }, { "epoch": 45.421450574068885, "grad_norm": 0.0016171397874131799, "learning_rate": 4.5871130646217155e-07, "loss": 0.0009, "step": 486600 }, { "epoch": 45.430785027536636, "grad_norm": 0.1550065577030182, "learning_rate": 4.577761152155616e-07, "loss": 0.0008, "step": 486700 }, { "epoch": 45.44011948100439, "grad_norm": 0.8426225781440735, "learning_rate": 4.568409239689517e-07, "loss": 0.0014, "step": 486800 }, { "epoch": 45.44945393447214, "grad_norm": 0.03659811243414879, "learning_rate": 4.559057327223418e-07, "loss": 0.0011, "step": 486900 }, { "epoch": 45.45878838793988, "grad_norm": 0.17011575400829315, "learning_rate": 4.5497054147573183e-07, "loss": 0.0017, "step": 487000 }, { "epoch": 45.468122841407634, "grad_norm": 1.272952914237976, "learning_rate": 4.540353502291219e-07, "loss": 0.0007, "step": 487100 }, { "epoch": 45.477457294875386, "grad_norm": 0.04665472358465195, "learning_rate": 4.5310015898251194e-07, "loss": 0.0012, "step": 487200 }, { "epoch": 45.48679174834314, "grad_norm": 0.4953325390815735, "learning_rate": 4.52164967735902e-07, "loss": 0.0008, "step": 487300 }, { "epoch": 45.49612620181088, "grad_norm": 0.021935105323791504, "learning_rate": 4.5122977648929206e-07, "loss": 0.0013, "step": 487400 }, { "epoch": 45.50546065527863, "grad_norm": 7.437084197998047, "learning_rate": 4.502945852426822e-07, "loss": 0.0014, "step": 487500 }, { "epoch": 45.514795108746384, "grad_norm": 0.04416705295443535, "learning_rate": 4.493593939960722e-07, "loss": 0.0014, "step": 487600 }, { "epoch": 45.524129562214135, "grad_norm": 0.0013479633489623666, "learning_rate": 4.484242027494623e-07, "loss": 0.0017, "step": 487700 }, { "epoch": 45.53346401568188, "grad_norm": 0.2008526474237442, "learning_rate": 4.474890115028524e-07, "loss": 0.0016, "step": 487800 }, { "epoch": 45.54279846914963, "grad_norm": 0.22337540984153748, "learning_rate": 4.465538202562424e-07, "loss": 0.0015, "step": 487900 }, { "epoch": 45.55213292261738, "grad_norm": 0.0037093879655003548, "learning_rate": 4.456186290096325e-07, "loss": 0.0006, "step": 488000 }, { "epoch": 45.56146737608513, "grad_norm": 0.10056743770837784, "learning_rate": 4.4468343776302253e-07, "loss": 0.0016, "step": 488100 }, { "epoch": 45.57080182955288, "grad_norm": 0.005056849680840969, "learning_rate": 4.4374824651641267e-07, "loss": 0.0019, "step": 488200 }, { "epoch": 45.58013628302063, "grad_norm": 0.009139874018728733, "learning_rate": 4.428130552698027e-07, "loss": 0.0022, "step": 488300 }, { "epoch": 45.58947073648838, "grad_norm": 3.577589988708496, "learning_rate": 4.418778640231928e-07, "loss": 0.0018, "step": 488400 }, { "epoch": 45.59880518995613, "grad_norm": 0.17073649168014526, "learning_rate": 4.409426727765828e-07, "loss": 0.0012, "step": 488500 }, { "epoch": 45.608139643423875, "grad_norm": 9.230406761169434, "learning_rate": 4.400074815299729e-07, "loss": 0.0012, "step": 488600 }, { "epoch": 45.61747409689163, "grad_norm": 0.0005583607708103955, "learning_rate": 4.3907229028336304e-07, "loss": 0.0016, "step": 488700 }, { "epoch": 45.62680855035938, "grad_norm": 1.3664498329162598, "learning_rate": 4.3813709903675307e-07, "loss": 0.0016, "step": 488800 }, { "epoch": 45.63614300382713, "grad_norm": 0.000862940913066268, "learning_rate": 4.3720190779014315e-07, "loss": 0.001, "step": 488900 }, { "epoch": 45.645477457294874, "grad_norm": 1.4655529260635376, "learning_rate": 4.362667165435332e-07, "loss": 0.0016, "step": 489000 }, { "epoch": 45.654811910762625, "grad_norm": 0.0376693420112133, "learning_rate": 4.3533152529692326e-07, "loss": 0.0016, "step": 489100 }, { "epoch": 45.664146364230376, "grad_norm": 7.228760719299316, "learning_rate": 4.343963340503133e-07, "loss": 0.0017, "step": 489200 }, { "epoch": 45.67348081769812, "grad_norm": 0.021521301940083504, "learning_rate": 4.334611428037034e-07, "loss": 0.0009, "step": 489300 }, { "epoch": 45.68281527116587, "grad_norm": 5.395725727081299, "learning_rate": 4.325259515570934e-07, "loss": 0.0016, "step": 489400 }, { "epoch": 45.69214972463362, "grad_norm": 0.20011703670024872, "learning_rate": 4.3159076031048355e-07, "loss": 0.0017, "step": 489500 }, { "epoch": 45.701484178101374, "grad_norm": 0.0007330627413466573, "learning_rate": 4.3065556906387363e-07, "loss": 0.002, "step": 489600 }, { "epoch": 45.71081863156912, "grad_norm": 0.020439427345991135, "learning_rate": 4.2972037781726366e-07, "loss": 0.0021, "step": 489700 }, { "epoch": 45.72015308503687, "grad_norm": 0.00787602923810482, "learning_rate": 4.2878518657065374e-07, "loss": 0.001, "step": 489800 }, { "epoch": 45.72948753850462, "grad_norm": 0.03835166245698929, "learning_rate": 4.278499953240438e-07, "loss": 0.0014, "step": 489900 }, { "epoch": 45.73882199197237, "grad_norm": 0.004867241717875004, "learning_rate": 4.2691480407743386e-07, "loss": 0.0019, "step": 490000 }, { "epoch": 45.73882199197237, "eval_accuracy": 0.7013535589264878, "eval_f1": 0.8272987848840276, "eval_loss": 0.36482343077659607, "eval_roc_auc": 0.9094171911621325, "eval_runtime": 145.9114, "eval_samples_per_second": 293.671, "eval_steps_per_second": 293.671, "step": 490000 }, { "epoch": 45.74815644544012, "grad_norm": 0.00934677105396986, "learning_rate": 4.259796128308239e-07, "loss": 0.0013, "step": 490100 }, { "epoch": 45.75749089890787, "grad_norm": 8.632099151611328, "learning_rate": 4.25044421584214e-07, "loss": 0.0021, "step": 490200 }, { "epoch": 45.76682535237562, "grad_norm": 0.11533229798078537, "learning_rate": 4.241092303376041e-07, "loss": 0.0013, "step": 490300 }, { "epoch": 45.77615980584337, "grad_norm": 0.043071478605270386, "learning_rate": 4.2317403909099414e-07, "loss": 0.0017, "step": 490400 }, { "epoch": 45.785494259311115, "grad_norm": 0.1363682746887207, "learning_rate": 4.222388478443842e-07, "loss": 0.0014, "step": 490500 }, { "epoch": 45.794828712778866, "grad_norm": 0.0016280885320156813, "learning_rate": 4.2130365659777425e-07, "loss": 0.0012, "step": 490600 }, { "epoch": 45.80416316624662, "grad_norm": 0.00781291350722313, "learning_rate": 4.203684653511644e-07, "loss": 0.0016, "step": 490700 }, { "epoch": 45.81349761971437, "grad_norm": 0.2861731946468353, "learning_rate": 4.1943327410455437e-07, "loss": 0.0014, "step": 490800 }, { "epoch": 45.82283207318211, "grad_norm": 8.171974182128906, "learning_rate": 4.184980828579445e-07, "loss": 0.0009, "step": 490900 }, { "epoch": 45.832166526649864, "grad_norm": 0.005383563227951527, "learning_rate": 4.1756289161133454e-07, "loss": 0.0022, "step": 491000 }, { "epoch": 45.841500980117615, "grad_norm": 0.0049102636985480785, "learning_rate": 4.166277003647246e-07, "loss": 0.0014, "step": 491100 }, { "epoch": 45.85083543358537, "grad_norm": 0.07118210196495056, "learning_rate": 4.156925091181147e-07, "loss": 0.0018, "step": 491200 }, { "epoch": 45.86016988705311, "grad_norm": 0.12364990264177322, "learning_rate": 4.1475731787150473e-07, "loss": 0.0014, "step": 491300 }, { "epoch": 45.86950434052086, "grad_norm": 0.41106095910072327, "learning_rate": 4.1382212662489487e-07, "loss": 0.0008, "step": 491400 }, { "epoch": 45.87883879398861, "grad_norm": 0.1195339635014534, "learning_rate": 4.128869353782849e-07, "loss": 0.0019, "step": 491500 }, { "epoch": 45.888173247456365, "grad_norm": 0.0011537334648892283, "learning_rate": 4.11951744131675e-07, "loss": 0.0026, "step": 491600 }, { "epoch": 45.89750770092411, "grad_norm": 0.06014120578765869, "learning_rate": 4.11016552885065e-07, "loss": 0.0009, "step": 491700 }, { "epoch": 45.90684215439186, "grad_norm": 0.11066964268684387, "learning_rate": 4.100813616384551e-07, "loss": 0.0014, "step": 491800 }, { "epoch": 45.91617660785961, "grad_norm": 0.2071886658668518, "learning_rate": 4.0914617039184513e-07, "loss": 0.0013, "step": 491900 }, { "epoch": 45.925511061327356, "grad_norm": 0.08645957708358765, "learning_rate": 4.082109791452352e-07, "loss": 0.0018, "step": 492000 }, { "epoch": 45.93484551479511, "grad_norm": 0.023377401754260063, "learning_rate": 4.0727578789862535e-07, "loss": 0.0037, "step": 492100 }, { "epoch": 45.94417996826286, "grad_norm": 0.013642500154674053, "learning_rate": 4.063405966520154e-07, "loss": 0.0012, "step": 492200 }, { "epoch": 45.95351442173061, "grad_norm": 1.8027006387710571, "learning_rate": 4.0540540540540546e-07, "loss": 0.0017, "step": 492300 }, { "epoch": 45.962848875198354, "grad_norm": 0.007350405212491751, "learning_rate": 4.044702141587955e-07, "loss": 0.0013, "step": 492400 }, { "epoch": 45.972183328666105, "grad_norm": 0.0017297682352364063, "learning_rate": 4.035350229121856e-07, "loss": 0.0019, "step": 492500 }, { "epoch": 45.98151778213386, "grad_norm": 0.0012594804866239429, "learning_rate": 4.025998316655756e-07, "loss": 0.0014, "step": 492600 }, { "epoch": 45.99085223560161, "grad_norm": 2.564502716064453, "learning_rate": 4.016646404189657e-07, "loss": 0.001, "step": 492700 }, { "epoch": 46.00018668906935, "grad_norm": 0.00859558954834938, "learning_rate": 4.007294491723557e-07, "loss": 0.0014, "step": 492800 }, { "epoch": 46.0095211425371, "grad_norm": 0.22521553933620453, "learning_rate": 3.9979425792574586e-07, "loss": 0.0015, "step": 492900 }, { "epoch": 46.018855596004855, "grad_norm": 0.004695019219070673, "learning_rate": 3.9885906667913594e-07, "loss": 0.0025, "step": 493000 }, { "epoch": 46.028190049472606, "grad_norm": 1.3877098560333252, "learning_rate": 3.9792387543252597e-07, "loss": 0.0019, "step": 493100 }, { "epoch": 46.03752450294035, "grad_norm": 0.0047006462700665, "learning_rate": 3.9698868418591606e-07, "loss": 0.0012, "step": 493200 }, { "epoch": 46.0468589564081, "grad_norm": 0.6321419477462769, "learning_rate": 3.960534929393061e-07, "loss": 0.0013, "step": 493300 }, { "epoch": 46.05619340987585, "grad_norm": 0.030690468847751617, "learning_rate": 3.951183016926962e-07, "loss": 0.0011, "step": 493400 }, { "epoch": 46.065527863343604, "grad_norm": 3.7169342041015625, "learning_rate": 3.9418311044608625e-07, "loss": 0.0015, "step": 493500 }, { "epoch": 46.07486231681135, "grad_norm": 0.03802104294300079, "learning_rate": 3.9324791919947634e-07, "loss": 0.0012, "step": 493600 }, { "epoch": 46.0841967702791, "grad_norm": 0.0006702758837491274, "learning_rate": 3.9231272795286637e-07, "loss": 0.0008, "step": 493700 }, { "epoch": 46.09353122374685, "grad_norm": 0.062380947172641754, "learning_rate": 3.9137753670625645e-07, "loss": 0.0008, "step": 493800 }, { "epoch": 46.1028656772146, "grad_norm": 6.596574306488037, "learning_rate": 3.9044234545964654e-07, "loss": 0.0016, "step": 493900 }, { "epoch": 46.112200130682346, "grad_norm": 1.7670201063156128, "learning_rate": 3.8950715421303657e-07, "loss": 0.0011, "step": 494000 }, { "epoch": 46.1215345841501, "grad_norm": 0.0022957581095397472, "learning_rate": 3.885719629664267e-07, "loss": 0.0009, "step": 494100 }, { "epoch": 46.13086903761785, "grad_norm": 0.008736377581954002, "learning_rate": 3.8763677171981673e-07, "loss": 0.0013, "step": 494200 }, { "epoch": 46.1402034910856, "grad_norm": 0.24368752539157867, "learning_rate": 3.867015804732068e-07, "loss": 0.0019, "step": 494300 }, { "epoch": 46.149537944553344, "grad_norm": 0.010547627694904804, "learning_rate": 3.8576638922659685e-07, "loss": 0.002, "step": 494400 }, { "epoch": 46.158872398021096, "grad_norm": 0.011794854886829853, "learning_rate": 3.8483119797998693e-07, "loss": 0.0026, "step": 494500 }, { "epoch": 46.16820685148885, "grad_norm": 0.099042609333992, "learning_rate": 3.8389600673337707e-07, "loss": 0.0016, "step": 494600 }, { "epoch": 46.17754130495659, "grad_norm": 0.009849962778389454, "learning_rate": 3.8296081548676705e-07, "loss": 0.0016, "step": 494700 }, { "epoch": 46.18687575842434, "grad_norm": 0.034108102321624756, "learning_rate": 3.820256242401572e-07, "loss": 0.0018, "step": 494800 }, { "epoch": 46.196210211892094, "grad_norm": 0.1590411365032196, "learning_rate": 3.810904329935472e-07, "loss": 0.0017, "step": 494900 }, { "epoch": 46.205544665359845, "grad_norm": 0.288488507270813, "learning_rate": 3.801552417469373e-07, "loss": 0.0011, "step": 495000 }, { "epoch": 46.205544665359845, "eval_accuracy": 0.7014002333722287, "eval_f1": 0.8281553908426641, "eval_loss": 0.36585840582847595, "eval_roc_auc": 0.9111461745228979, "eval_runtime": 145.4143, "eval_samples_per_second": 294.675, "eval_steps_per_second": 294.675, "step": 495000 } ], "logging_steps": 100, "max_steps": 535650, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 5000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.5960932344066803e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }