{ "best_global_step": 55000, "best_metric": 0.8768783517240833, "best_model_checkpoint": "./lang-ner-xlmr/checkpoint-55000", "epoch": 2.0, "eval_steps": 2500, "global_step": 55278, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0036180759072325336, "grad_norm": 5.75448751449585, "learning_rate": 4.9910452621295995e-05, "loss": 4.179392395019531, "step": 100 }, { "epoch": 0.007236151814465067, "grad_norm": 2.6520659923553467, "learning_rate": 4.9820000723615186e-05, "loss": 0.6058632278442383, "step": 200 }, { "epoch": 0.010854227721697602, "grad_norm": 3.474226951599121, "learning_rate": 4.972954882593437e-05, "loss": 0.3028737449645996, "step": 300 }, { "epoch": 0.014472303628930134, "grad_norm": 1.4948221445083618, "learning_rate": 4.963909692825356e-05, "loss": 0.18973339080810547, "step": 400 }, { "epoch": 0.01809037953616267, "grad_norm": 1.389740228652954, "learning_rate": 4.9548645030572745e-05, "loss": 0.15398676872253417, "step": 500 }, { "epoch": 0.021708455443395204, "grad_norm": 1.4510504007339478, "learning_rate": 4.945819313289193e-05, "loss": 0.13108017921447754, "step": 600 }, { "epoch": 0.025326531350627735, "grad_norm": 1.4420865774154663, "learning_rate": 4.936774123521112e-05, "loss": 0.12688090324401854, "step": 700 }, { "epoch": 0.02894460725786027, "grad_norm": 0.9447225332260132, "learning_rate": 4.92772893375303e-05, "loss": 0.11376466751098632, "step": 800 }, { "epoch": 0.0325626831650928, "grad_norm": 1.9140123128890991, "learning_rate": 4.9186837439849494e-05, "loss": 0.10734249114990234, "step": 900 }, { "epoch": 0.03618075907232534, "grad_norm": 1.2182528972625732, "learning_rate": 4.909638554216868e-05, "loss": 0.09950636863708497, "step": 1000 }, { "epoch": 0.03979883497955787, "grad_norm": 1.5587440729141235, "learning_rate": 4.900593364448786e-05, "loss": 0.08896804809570312, "step": 1100 }, { "epoch": 0.04341691088679041, "grad_norm": 2.021667242050171, "learning_rate": 4.891548174680705e-05, "loss": 0.09553884506225586, "step": 1200 }, { "epoch": 0.04703498679402294, "grad_norm": 3.561288595199585, "learning_rate": 4.882502984912624e-05, "loss": 0.0916118335723877, "step": 1300 }, { "epoch": 0.05065306270125547, "grad_norm": 2.239180088043213, "learning_rate": 4.873457795144543e-05, "loss": 0.08524966239929199, "step": 1400 }, { "epoch": 0.054271138608488007, "grad_norm": 1.880850076675415, "learning_rate": 4.864412605376461e-05, "loss": 0.08407029151916504, "step": 1500 }, { "epoch": 0.05788921451572054, "grad_norm": 2.365021228790283, "learning_rate": 4.8553674156083796e-05, "loss": 0.09083961486816407, "step": 1600 }, { "epoch": 0.061507290422953075, "grad_norm": 1.8810335397720337, "learning_rate": 4.8463222258402987e-05, "loss": 0.0841958236694336, "step": 1700 }, { "epoch": 0.0651253663301856, "grad_norm": 1.7592241764068604, "learning_rate": 4.837277036072217e-05, "loss": 0.08484026908874512, "step": 1800 }, { "epoch": 0.06874344223741814, "grad_norm": 1.4012072086334229, "learning_rate": 4.828231846304136e-05, "loss": 0.07917069911956787, "step": 1900 }, { "epoch": 0.07236151814465068, "grad_norm": 1.6757310628890991, "learning_rate": 4.8191866565360545e-05, "loss": 0.0806041145324707, "step": 2000 }, { "epoch": 0.0759795940518832, "grad_norm": 0.6598155498504639, "learning_rate": 4.810141466767973e-05, "loss": 0.07851210594177246, "step": 2100 }, { "epoch": 0.07959766995911574, "grad_norm": 1.5423673391342163, "learning_rate": 4.801096276999892e-05, "loss": 0.08287395477294922, "step": 2200 }, { "epoch": 0.08321574586634828, "grad_norm": 0.4928501546382904, "learning_rate": 4.7920510872318104e-05, "loss": 0.07287377834320069, "step": 2300 }, { "epoch": 0.08683382177358082, "grad_norm": 1.8151744604110718, "learning_rate": 4.7830058974637295e-05, "loss": 0.06640945911407471, "step": 2400 }, { "epoch": 0.09045189768081334, "grad_norm": 1.1932594776153564, "learning_rate": 4.773960707695648e-05, "loss": 0.07295094966888428, "step": 2500 }, { "epoch": 0.09045189768081334, "eval_accuracy": 0.975962734636331, "eval_f1": 0.7717093579748968, "eval_loss": 0.10806787014007568, "eval_precision": 0.7241184528264584, "eval_recall": 0.8259959084392468, "eval_runtime": 117.8075, "eval_samples_per_second": 169.768, "eval_steps_per_second": 4.72, "step": 2500 }, { "epoch": 0.09406997358804588, "grad_norm": 1.0983343124389648, "learning_rate": 4.764915517927566e-05, "loss": 0.06925168514251709, "step": 2600 }, { "epoch": 0.09768804949527841, "grad_norm": 0.8816857933998108, "learning_rate": 4.7558703281594854e-05, "loss": 0.06958985328674316, "step": 2700 }, { "epoch": 0.10130612540251094, "grad_norm": 0.8671173453330994, "learning_rate": 4.746825138391404e-05, "loss": 0.07468698024749756, "step": 2800 }, { "epoch": 0.10492420130974348, "grad_norm": 0.27838993072509766, "learning_rate": 4.737779948623322e-05, "loss": 0.07403119087219238, "step": 2900 }, { "epoch": 0.10854227721697601, "grad_norm": 0.4557673931121826, "learning_rate": 4.728734758855241e-05, "loss": 0.07262114524841308, "step": 3000 }, { "epoch": 0.11216035312420855, "grad_norm": 0.8267778158187866, "learning_rate": 4.71968956908716e-05, "loss": 0.07057662963867188, "step": 3100 }, { "epoch": 0.11577842903144107, "grad_norm": 1.401780128479004, "learning_rate": 4.710644379319079e-05, "loss": 0.06252509117126465, "step": 3200 }, { "epoch": 0.11939650493867361, "grad_norm": 1.7423473596572876, "learning_rate": 4.701599189550997e-05, "loss": 0.06425057411193848, "step": 3300 }, { "epoch": 0.12301458084590615, "grad_norm": 0.7547276616096497, "learning_rate": 4.6925539997829156e-05, "loss": 0.06438188076019287, "step": 3400 }, { "epoch": 0.12663265675313867, "grad_norm": 0.4259902238845825, "learning_rate": 4.6835088100148346e-05, "loss": 0.0666530466079712, "step": 3500 }, { "epoch": 0.1302507326603712, "grad_norm": 0.42786452174186707, "learning_rate": 4.674463620246753e-05, "loss": 0.05976760864257812, "step": 3600 }, { "epoch": 0.13386880856760375, "grad_norm": 1.1275266408920288, "learning_rate": 4.665418430478672e-05, "loss": 0.06228343009948731, "step": 3700 }, { "epoch": 0.13748688447483629, "grad_norm": 1.345894455909729, "learning_rate": 4.6563732407105905e-05, "loss": 0.0695729398727417, "step": 3800 }, { "epoch": 0.14110496038206882, "grad_norm": 0.5640186071395874, "learning_rate": 4.647328050942509e-05, "loss": 0.06416056156158448, "step": 3900 }, { "epoch": 0.14472303628930136, "grad_norm": 1.5667623281478882, "learning_rate": 4.638282861174428e-05, "loss": 0.06927279949188232, "step": 4000 }, { "epoch": 0.14834111219653387, "grad_norm": 0.4014199674129486, "learning_rate": 4.6292376714063464e-05, "loss": 0.060500779151916505, "step": 4100 }, { "epoch": 0.1519591881037664, "grad_norm": 0.8349173069000244, "learning_rate": 4.6201924816382655e-05, "loss": 0.05734441757202149, "step": 4200 }, { "epoch": 0.15557726401099894, "grad_norm": 0.48946359753608704, "learning_rate": 4.611147291870184e-05, "loss": 0.0637766456604004, "step": 4300 }, { "epoch": 0.15919533991823148, "grad_norm": 0.44791749119758606, "learning_rate": 4.602102102102102e-05, "loss": 0.0613397216796875, "step": 4400 }, { "epoch": 0.16281341582546402, "grad_norm": 1.0726768970489502, "learning_rate": 4.5930569123340214e-05, "loss": 0.07220725536346435, "step": 4500 }, { "epoch": 0.16643149173269656, "grad_norm": 0.48238834738731384, "learning_rate": 4.58401172256594e-05, "loss": 0.05229937076568603, "step": 4600 }, { "epoch": 0.1700495676399291, "grad_norm": 0.4427547752857208, "learning_rate": 4.574966532797859e-05, "loss": 0.06027111530303955, "step": 4700 }, { "epoch": 0.17366764354716163, "grad_norm": 0.44010627269744873, "learning_rate": 4.565921343029777e-05, "loss": 0.06117689609527588, "step": 4800 }, { "epoch": 0.17728571945439414, "grad_norm": 0.26065585017204285, "learning_rate": 4.5568761532616956e-05, "loss": 0.060817084312438964, "step": 4900 }, { "epoch": 0.18090379536162668, "grad_norm": 0.41624584794044495, "learning_rate": 4.547830963493615e-05, "loss": 0.06215104579925537, "step": 5000 }, { "epoch": 0.18090379536162668, "eval_accuracy": 0.9724426137358435, "eval_f1": 0.741559979115958, "eval_loss": 0.12759321928024292, "eval_precision": 0.6822080909213909, "eval_recall": 0.8122231350376133, "eval_runtime": 63.257, "eval_samples_per_second": 316.17, "eval_steps_per_second": 8.79, "step": 5000 }, { "epoch": 0.18452187126885922, "grad_norm": 1.1262469291687012, "learning_rate": 4.538785773725533e-05, "loss": 0.056777148246765136, "step": 5100 }, { "epoch": 0.18813994717609175, "grad_norm": 0.44265300035476685, "learning_rate": 4.5297405839574515e-05, "loss": 0.05986386775970459, "step": 5200 }, { "epoch": 0.1917580230833243, "grad_norm": 0.5468171238899231, "learning_rate": 4.5206953941893706e-05, "loss": 0.05671721935272217, "step": 5300 }, { "epoch": 0.19537609899055683, "grad_norm": 0.3858329653739929, "learning_rate": 4.511650204421289e-05, "loss": 0.05604006290435791, "step": 5400 }, { "epoch": 0.19899417489778937, "grad_norm": 1.0813618898391724, "learning_rate": 4.502605014653208e-05, "loss": 0.05299887180328369, "step": 5500 }, { "epoch": 0.20261225080502188, "grad_norm": 0.7834122776985168, "learning_rate": 4.4935598248851265e-05, "loss": 0.0669465970993042, "step": 5600 }, { "epoch": 0.2062303267122544, "grad_norm": 0.8666114211082458, "learning_rate": 4.484514635117045e-05, "loss": 0.06568387985229492, "step": 5700 }, { "epoch": 0.20984840261948695, "grad_norm": 0.7354055643081665, "learning_rate": 4.475469445348964e-05, "loss": 0.06354703903198242, "step": 5800 }, { "epoch": 0.2134664785267195, "grad_norm": 0.3984626829624176, "learning_rate": 4.4664242555808824e-05, "loss": 0.05610593318939209, "step": 5900 }, { "epoch": 0.21708455443395203, "grad_norm": 0.5307297110557556, "learning_rate": 4.4573790658128014e-05, "loss": 0.058310718536376954, "step": 6000 }, { "epoch": 0.22070263034118456, "grad_norm": 0.23685064911842346, "learning_rate": 4.44833387604472e-05, "loss": 0.0474505615234375, "step": 6100 }, { "epoch": 0.2243207062484171, "grad_norm": 0.6271052360534668, "learning_rate": 4.439288686276638e-05, "loss": 0.05871774673461914, "step": 6200 }, { "epoch": 0.22793878215564964, "grad_norm": 0.6762889623641968, "learning_rate": 4.430243496508557e-05, "loss": 0.05517944812774658, "step": 6300 }, { "epoch": 0.23155685806288215, "grad_norm": 0.9603418111801147, "learning_rate": 4.421198306740476e-05, "loss": 0.05483291625976563, "step": 6400 }, { "epoch": 0.23517493397011469, "grad_norm": 0.6032853126525879, "learning_rate": 4.412153116972395e-05, "loss": 0.05903904914855957, "step": 6500 }, { "epoch": 0.23879300987734722, "grad_norm": 0.40814077854156494, "learning_rate": 4.403107927204313e-05, "loss": 0.05642669677734375, "step": 6600 }, { "epoch": 0.24241108578457976, "grad_norm": 0.5799020528793335, "learning_rate": 4.3940627374362316e-05, "loss": 0.055092153549194334, "step": 6700 }, { "epoch": 0.2460291616918123, "grad_norm": 1.0993859767913818, "learning_rate": 4.385017547668151e-05, "loss": 0.054167227745056154, "step": 6800 }, { "epoch": 0.24964723759904484, "grad_norm": 1.9801974296569824, "learning_rate": 4.375972357900069e-05, "loss": 0.057117671966552735, "step": 6900 }, { "epoch": 0.25326531350627735, "grad_norm": 0.4046414792537689, "learning_rate": 4.366927168131988e-05, "loss": 0.054672832489013674, "step": 7000 }, { "epoch": 0.2568833894135099, "grad_norm": 0.41931968927383423, "learning_rate": 4.3578819783639066e-05, "loss": 0.05668231964111328, "step": 7100 }, { "epoch": 0.2605014653207424, "grad_norm": 0.5075521469116211, "learning_rate": 4.348836788595825e-05, "loss": 0.05900467395782471, "step": 7200 }, { "epoch": 0.264119541227975, "grad_norm": 1.0615949630737305, "learning_rate": 4.339791598827744e-05, "loss": 0.060022168159484864, "step": 7300 }, { "epoch": 0.2677376171352075, "grad_norm": 0.6786783337593079, "learning_rate": 4.3307464090596625e-05, "loss": 0.053788251876831054, "step": 7400 }, { "epoch": 0.27135569304244, "grad_norm": 0.7518507838249207, "learning_rate": 4.321701219291581e-05, "loss": 0.05555037975311279, "step": 7500 }, { "epoch": 0.27135569304244, "eval_accuracy": 0.9812751684036897, "eval_f1": 0.8064070486745359, "eval_loss": 0.08261791616678238, "eval_precision": 0.7701385325808107, "eval_recall": 0.8462604101225857, "eval_runtime": 62.4561, "eval_samples_per_second": 320.225, "eval_steps_per_second": 8.902, "step": 7500 }, { "epoch": 0.27497376894967257, "grad_norm": 0.8300764560699463, "learning_rate": 4.3126560295235e-05, "loss": 0.051460466384887694, "step": 7600 }, { "epoch": 0.2785918448569051, "grad_norm": 1.0100982189178467, "learning_rate": 4.303610839755418e-05, "loss": 0.05660095691680908, "step": 7700 }, { "epoch": 0.28220992076413765, "grad_norm": 0.5547285676002502, "learning_rate": 4.2945656499873374e-05, "loss": 0.05661679267883301, "step": 7800 }, { "epoch": 0.28582799667137015, "grad_norm": 0.49258002638816833, "learning_rate": 4.285520460219256e-05, "loss": 0.04981692790985107, "step": 7900 }, { "epoch": 0.2894460725786027, "grad_norm": 2.1518049240112305, "learning_rate": 4.276475270451174e-05, "loss": 0.04876615524291992, "step": 8000 }, { "epoch": 0.29306414848583523, "grad_norm": 0.973175048828125, "learning_rate": 4.267430080683093e-05, "loss": 0.0555543327331543, "step": 8100 }, { "epoch": 0.29668222439306774, "grad_norm": 2.2509944438934326, "learning_rate": 4.258384890915012e-05, "loss": 0.05133993148803711, "step": 8200 }, { "epoch": 0.3003003003003003, "grad_norm": 1.938225507736206, "learning_rate": 4.249339701146931e-05, "loss": 0.05030904769897461, "step": 8300 }, { "epoch": 0.3039183762075328, "grad_norm": 0.5656659007072449, "learning_rate": 4.240294511378849e-05, "loss": 0.05507714748382568, "step": 8400 }, { "epoch": 0.3075364521147654, "grad_norm": 0.7741718888282776, "learning_rate": 4.2312493216107676e-05, "loss": 0.05459506511688232, "step": 8500 }, { "epoch": 0.3111545280219979, "grad_norm": 0.547379195690155, "learning_rate": 4.2222041318426867e-05, "loss": 0.050563540458679196, "step": 8600 }, { "epoch": 0.31477260392923045, "grad_norm": 0.5133877396583557, "learning_rate": 4.213158942074605e-05, "loss": 0.05503926753997803, "step": 8700 }, { "epoch": 0.31839067983646296, "grad_norm": 0.4732136130332947, "learning_rate": 4.204113752306524e-05, "loss": 0.04883493423461914, "step": 8800 }, { "epoch": 0.32200875574369553, "grad_norm": 0.7309387922286987, "learning_rate": 4.1950685625384425e-05, "loss": 0.0464065933227539, "step": 8900 }, { "epoch": 0.32562683165092804, "grad_norm": 0.9696952104568481, "learning_rate": 4.186023372770361e-05, "loss": 0.05353004455566406, "step": 9000 }, { "epoch": 0.32924490755816055, "grad_norm": 0.6350353956222534, "learning_rate": 4.17697818300228e-05, "loss": 0.05357151508331299, "step": 9100 }, { "epoch": 0.3328629834653931, "grad_norm": 0.5927383899688721, "learning_rate": 4.1679329932341984e-05, "loss": 0.0496389102935791, "step": 9200 }, { "epoch": 0.3364810593726256, "grad_norm": 0.555016040802002, "learning_rate": 4.1588878034661175e-05, "loss": 0.048683485984802245, "step": 9300 }, { "epoch": 0.3400991352798582, "grad_norm": 0.33153098821640015, "learning_rate": 4.149842613698036e-05, "loss": 0.049552416801452635, "step": 9400 }, { "epoch": 0.3437172111870907, "grad_norm": 0.7421421408653259, "learning_rate": 4.140797423929954e-05, "loss": 0.050444388389587404, "step": 9500 }, { "epoch": 0.34733528709432326, "grad_norm": 0.7501067519187927, "learning_rate": 4.1317522341618734e-05, "loss": 0.05306045532226562, "step": 9600 }, { "epoch": 0.3509533630015558, "grad_norm": 0.9074022173881531, "learning_rate": 4.122707044393792e-05, "loss": 0.04894153594970703, "step": 9700 }, { "epoch": 0.3545714389087883, "grad_norm": 0.6082141399383545, "learning_rate": 4.11366185462571e-05, "loss": 0.05211612224578857, "step": 9800 }, { "epoch": 0.35818951481602085, "grad_norm": 0.6638932824134827, "learning_rate": 4.104616664857629e-05, "loss": 0.05089833736419678, "step": 9900 }, { "epoch": 0.36180759072325336, "grad_norm": 0.8939893841743469, "learning_rate": 4.095571475089548e-05, "loss": 0.05038036823272705, "step": 10000 }, { "epoch": 0.36180759072325336, "eval_accuracy": 0.9821651815196725, "eval_f1": 0.8226399325197526, "eval_loss": 0.07629744708538055, "eval_precision": 0.7916120576671035, "eval_recall": 0.8561993588814253, "eval_runtime": 62.5369, "eval_samples_per_second": 319.811, "eval_steps_per_second": 8.891, "step": 10000 }, { "epoch": 0.3654256666304859, "grad_norm": 0.3776226043701172, "learning_rate": 4.086526285321467e-05, "loss": 0.05038893222808838, "step": 10100 }, { "epoch": 0.36904374253771843, "grad_norm": 0.29007160663604736, "learning_rate": 4.077481095553385e-05, "loss": 0.05022284507751465, "step": 10200 }, { "epoch": 0.372661818444951, "grad_norm": 0.2021007239818573, "learning_rate": 4.0684359057853036e-05, "loss": 0.049036202430725095, "step": 10300 }, { "epoch": 0.3762798943521835, "grad_norm": 0.2728661894798279, "learning_rate": 4.0593907160172226e-05, "loss": 0.05147543907165528, "step": 10400 }, { "epoch": 0.379897970259416, "grad_norm": 0.6017497181892395, "learning_rate": 4.050345526249141e-05, "loss": 0.052560653686523434, "step": 10500 }, { "epoch": 0.3835160461666486, "grad_norm": 0.5500878095626831, "learning_rate": 4.0413003364810594e-05, "loss": 0.0445310115814209, "step": 10600 }, { "epoch": 0.3871341220738811, "grad_norm": 1.6260461807250977, "learning_rate": 4.0322551467129785e-05, "loss": 0.04827467441558838, "step": 10700 }, { "epoch": 0.39075219798111366, "grad_norm": 1.0797089338302612, "learning_rate": 4.023209956944897e-05, "loss": 0.0508196496963501, "step": 10800 }, { "epoch": 0.39437027388834617, "grad_norm": 0.33457517623901367, "learning_rate": 4.014164767176816e-05, "loss": 0.04953153133392334, "step": 10900 }, { "epoch": 0.39798834979557873, "grad_norm": 0.5582904815673828, "learning_rate": 4.0051195774087344e-05, "loss": 0.04928678035736084, "step": 11000 }, { "epoch": 0.40160642570281124, "grad_norm": 0.21949921548366547, "learning_rate": 3.996074387640653e-05, "loss": 0.05192047119140625, "step": 11100 }, { "epoch": 0.40522450161004375, "grad_norm": 0.7574787139892578, "learning_rate": 3.987029197872572e-05, "loss": 0.049414234161376955, "step": 11200 }, { "epoch": 0.4088425775172763, "grad_norm": 1.8344570398330688, "learning_rate": 3.97798400810449e-05, "loss": 0.05043137550354004, "step": 11300 }, { "epoch": 0.4124606534245088, "grad_norm": 0.618725061416626, "learning_rate": 3.968938818336409e-05, "loss": 0.04852957248687744, "step": 11400 }, { "epoch": 0.4160787293317414, "grad_norm": 0.6515002250671387, "learning_rate": 3.959893628568328e-05, "loss": 0.051465816497802734, "step": 11500 }, { "epoch": 0.4196968052389739, "grad_norm": 0.6772841215133667, "learning_rate": 3.950848438800246e-05, "loss": 0.05751809120178222, "step": 11600 }, { "epoch": 0.42331488114620647, "grad_norm": 0.3189091384410858, "learning_rate": 3.941803249032165e-05, "loss": 0.047155842781066895, "step": 11700 }, { "epoch": 0.426932957053439, "grad_norm": 0.2367490977048874, "learning_rate": 3.9327580592640836e-05, "loss": 0.043431487083435055, "step": 11800 }, { "epoch": 0.43055103296067154, "grad_norm": 0.38205036520957947, "learning_rate": 3.923712869496002e-05, "loss": 0.04606367588043213, "step": 11900 }, { "epoch": 0.43416910886790405, "grad_norm": 0.539438009262085, "learning_rate": 3.914667679727921e-05, "loss": 0.04509395122528076, "step": 12000 }, { "epoch": 0.43778718477513656, "grad_norm": 1.1849830150604248, "learning_rate": 3.9056224899598395e-05, "loss": 0.045330324172973634, "step": 12100 }, { "epoch": 0.4414052606823691, "grad_norm": 0.6970862746238708, "learning_rate": 3.896577300191758e-05, "loss": 0.04937627792358398, "step": 12200 }, { "epoch": 0.44502333658960164, "grad_norm": 0.3145708739757538, "learning_rate": 3.887532110423677e-05, "loss": 0.04958348274230957, "step": 12300 }, { "epoch": 0.4486414124968342, "grad_norm": 1.822594404220581, "learning_rate": 3.8784869206555954e-05, "loss": 0.05177441120147705, "step": 12400 }, { "epoch": 0.4522594884040667, "grad_norm": 0.3980540335178375, "learning_rate": 3.8694417308875145e-05, "loss": 0.04803945064544678, "step": 12500 }, { "epoch": 0.4522594884040667, "eval_accuracy": 0.9839402163062075, "eval_f1": 0.8303541577576488, "eval_loss": 0.07028726488351822, "eval_precision": 0.8025429842491283, "eval_recall": 0.8601620515794391, "eval_runtime": 61.9616, "eval_samples_per_second": 322.781, "eval_steps_per_second": 8.973, "step": 12500 }, { "epoch": 0.4558775643112993, "grad_norm": 2.3516685962677, "learning_rate": 3.860396541119433e-05, "loss": 0.04993240833282471, "step": 12600 }, { "epoch": 0.4594956402185318, "grad_norm": 0.9219645857810974, "learning_rate": 3.851351351351351e-05, "loss": 0.04464954853057861, "step": 12700 }, { "epoch": 0.4631137161257643, "grad_norm": 0.7087405920028687, "learning_rate": 3.8423061615832704e-05, "loss": 0.041380634307861326, "step": 12800 }, { "epoch": 0.46673179203299686, "grad_norm": 0.3233760893344879, "learning_rate": 3.833260971815189e-05, "loss": 0.05234696865081787, "step": 12900 }, { "epoch": 0.47034986794022937, "grad_norm": 0.31167057156562805, "learning_rate": 3.824215782047107e-05, "loss": 0.04531662464141846, "step": 13000 }, { "epoch": 0.47396794384746194, "grad_norm": 0.9034203886985779, "learning_rate": 3.815170592279026e-05, "loss": 0.04655809879302979, "step": 13100 }, { "epoch": 0.47758601975469445, "grad_norm": 0.3943072259426117, "learning_rate": 3.8061254025109447e-05, "loss": 0.0500339937210083, "step": 13200 }, { "epoch": 0.481204095661927, "grad_norm": 0.9143586158752441, "learning_rate": 3.797080212742864e-05, "loss": 0.04793615818023682, "step": 13300 }, { "epoch": 0.4848221715691595, "grad_norm": 1.2170947790145874, "learning_rate": 3.788035022974782e-05, "loss": 0.04486670970916748, "step": 13400 }, { "epoch": 0.48844024747639203, "grad_norm": 0.4851992130279541, "learning_rate": 3.7789898332067005e-05, "loss": 0.0455370569229126, "step": 13500 }, { "epoch": 0.4920583233836246, "grad_norm": 0.3209129273891449, "learning_rate": 3.7699446434386196e-05, "loss": 0.04612759113311768, "step": 13600 }, { "epoch": 0.4956763992908571, "grad_norm": 0.6042996644973755, "learning_rate": 3.760899453670538e-05, "loss": 0.04637802600860596, "step": 13700 }, { "epoch": 0.49929447519808967, "grad_norm": 0.422635018825531, "learning_rate": 3.751854263902457e-05, "loss": 0.050551199913024904, "step": 13800 }, { "epoch": 0.5029125511053222, "grad_norm": 0.9524370431900024, "learning_rate": 3.7428090741343755e-05, "loss": 0.04804905891418457, "step": 13900 }, { "epoch": 0.5065306270125547, "grad_norm": 0.8618633151054382, "learning_rate": 3.733763884366294e-05, "loss": 0.0453568172454834, "step": 14000 }, { "epoch": 0.5101487029197873, "grad_norm": 0.8186506032943726, "learning_rate": 3.724718694598213e-05, "loss": 0.04810242176055908, "step": 14100 }, { "epoch": 0.5137667788270198, "grad_norm": 0.4649534225463867, "learning_rate": 3.7156735048301314e-05, "loss": 0.041149930953979494, "step": 14200 }, { "epoch": 0.5173848547342523, "grad_norm": 1.2224235534667969, "learning_rate": 3.70662831506205e-05, "loss": 0.0440573263168335, "step": 14300 }, { "epoch": 0.5210029306414848, "grad_norm": 1.2368969917297363, "learning_rate": 3.697583125293969e-05, "loss": 0.045858840942382816, "step": 14400 }, { "epoch": 0.5246210065487174, "grad_norm": 1.4308712482452393, "learning_rate": 3.688537935525887e-05, "loss": 0.0431610631942749, "step": 14500 }, { "epoch": 0.52823908245595, "grad_norm": 1.7747290134429932, "learning_rate": 3.6794927457578063e-05, "loss": 0.04555936813354492, "step": 14600 }, { "epoch": 0.5318571583631825, "grad_norm": 0.6626078486442566, "learning_rate": 3.670447555989725e-05, "loss": 0.04809264183044434, "step": 14700 }, { "epoch": 0.535475234270415, "grad_norm": 0.49305254220962524, "learning_rate": 3.661402366221643e-05, "loss": 0.044796910285949704, "step": 14800 }, { "epoch": 0.5390933101776475, "grad_norm": 0.5383502840995789, "learning_rate": 3.652357176453562e-05, "loss": 0.04197264194488525, "step": 14900 }, { "epoch": 0.54271138608488, "grad_norm": 0.9339898824691772, "learning_rate": 3.6433119866854806e-05, "loss": 0.04077723026275635, "step": 15000 }, { "epoch": 0.54271138608488, "eval_accuracy": 0.9837071542003397, "eval_f1": 0.8344733667950663, "eval_loss": 0.0750078409910202, "eval_precision": 0.8071688796555565, "eval_recall": 0.8636898145910855, "eval_runtime": 62.6857, "eval_samples_per_second": 319.052, "eval_steps_per_second": 8.87, "step": 15000 }, { "epoch": 0.5463294619921126, "grad_norm": 0.7692775130271912, "learning_rate": 3.634266796917399e-05, "loss": 0.04739581108093262, "step": 15100 }, { "epoch": 0.5499475378993451, "grad_norm": 1.047753095626831, "learning_rate": 3.625221607149318e-05, "loss": 0.04375821590423584, "step": 15200 }, { "epoch": 0.5535656138065776, "grad_norm": 0.9720122218132019, "learning_rate": 3.6161764173812365e-05, "loss": 0.0421258020401001, "step": 15300 }, { "epoch": 0.5571836897138102, "grad_norm": 0.3475571274757385, "learning_rate": 3.6071312276131556e-05, "loss": 0.04756541728973389, "step": 15400 }, { "epoch": 0.5608017656210428, "grad_norm": 0.8692478537559509, "learning_rate": 3.598086037845074e-05, "loss": 0.04661733150482178, "step": 15500 }, { "epoch": 0.5644198415282753, "grad_norm": 1.0307046175003052, "learning_rate": 3.5890408480769924e-05, "loss": 0.044859604835510256, "step": 15600 }, { "epoch": 0.5680379174355078, "grad_norm": 0.654683530330658, "learning_rate": 3.5799956583089115e-05, "loss": 0.04575653076171875, "step": 15700 }, { "epoch": 0.5716559933427403, "grad_norm": 2.222489356994629, "learning_rate": 3.57095046854083e-05, "loss": 0.04321366310119629, "step": 15800 }, { "epoch": 0.5752740692499728, "grad_norm": 1.1416321992874146, "learning_rate": 3.561905278772748e-05, "loss": 0.043632102012634275, "step": 15900 }, { "epoch": 0.5788921451572054, "grad_norm": 1.0366028547286987, "learning_rate": 3.5528600890046673e-05, "loss": 0.04524300575256348, "step": 16000 }, { "epoch": 0.582510221064438, "grad_norm": 0.7538347840309143, "learning_rate": 3.543814899236586e-05, "loss": 0.04251582622528076, "step": 16100 }, { "epoch": 0.5861282969716705, "grad_norm": 0.2561816871166229, "learning_rate": 3.534769709468505e-05, "loss": 0.04683804512023926, "step": 16200 }, { "epoch": 0.589746372878903, "grad_norm": 0.9383835196495056, "learning_rate": 3.525724519700423e-05, "loss": 0.0412297248840332, "step": 16300 }, { "epoch": 0.5933644487861355, "grad_norm": 0.5518015623092651, "learning_rate": 3.5166793299323416e-05, "loss": 0.0455796480178833, "step": 16400 }, { "epoch": 0.5969825246933681, "grad_norm": 0.5094241499900818, "learning_rate": 3.507634140164261e-05, "loss": 0.04736936569213867, "step": 16500 }, { "epoch": 0.6006006006006006, "grad_norm": 0.2816466987133026, "learning_rate": 3.498588950396179e-05, "loss": 0.042105512619018556, "step": 16600 }, { "epoch": 0.6042186765078331, "grad_norm": 0.4187323749065399, "learning_rate": 3.489543760628098e-05, "loss": 0.044366950988769534, "step": 16700 }, { "epoch": 0.6078367524150656, "grad_norm": 0.28667891025543213, "learning_rate": 3.4804985708600166e-05, "loss": 0.03723037719726562, "step": 16800 }, { "epoch": 0.6114548283222982, "grad_norm": 0.3902330994606018, "learning_rate": 3.471453381091935e-05, "loss": 0.042644596099853514, "step": 16900 }, { "epoch": 0.6150729042295308, "grad_norm": 0.465101033449173, "learning_rate": 3.462408191323854e-05, "loss": 0.04263707160949707, "step": 17000 }, { "epoch": 0.6186909801367633, "grad_norm": 1.1710171699523926, "learning_rate": 3.4533630015557725e-05, "loss": 0.044122686386108396, "step": 17100 }, { "epoch": 0.6223090560439958, "grad_norm": 0.4717200696468353, "learning_rate": 3.444317811787691e-05, "loss": 0.042054853439331054, "step": 17200 }, { "epoch": 0.6259271319512283, "grad_norm": 0.18602319061756134, "learning_rate": 3.43527262201961e-05, "loss": 0.03980276823043823, "step": 17300 }, { "epoch": 0.6295452078584609, "grad_norm": 2.258084535598755, "learning_rate": 3.4262274322515284e-05, "loss": 0.043924779891967775, "step": 17400 }, { "epoch": 0.6331632837656934, "grad_norm": 0.5568512082099915, "learning_rate": 3.4171822424834474e-05, "loss": 0.04432165145874024, "step": 17500 }, { "epoch": 0.6331632837656934, "eval_accuracy": 0.9848981898715126, "eval_f1": 0.8395063656955402, "eval_loss": 0.06519697606563568, "eval_precision": 0.8148625494685449, "eval_recall": 0.8656872694469949, "eval_runtime": 61.9341, "eval_samples_per_second": 322.924, "eval_steps_per_second": 8.977, "step": 17500 }, { "epoch": 0.6367813596729259, "grad_norm": 0.302276611328125, "learning_rate": 3.408137052715366e-05, "loss": 0.04175849914550781, "step": 17600 }, { "epoch": 0.6403994355801584, "grad_norm": 0.20687709748744965, "learning_rate": 3.399091862947284e-05, "loss": 0.042713408470153806, "step": 17700 }, { "epoch": 0.6440175114873911, "grad_norm": 0.5285593271255493, "learning_rate": 3.390046673179203e-05, "loss": 0.041079201698303223, "step": 17800 }, { "epoch": 0.6476355873946236, "grad_norm": 0.359951913356781, "learning_rate": 3.381001483411122e-05, "loss": 0.047190561294555664, "step": 17900 }, { "epoch": 0.6512536633018561, "grad_norm": 0.5516379475593567, "learning_rate": 3.371956293643041e-05, "loss": 0.049062256813049314, "step": 18000 }, { "epoch": 0.6548717392090886, "grad_norm": 0.2408919632434845, "learning_rate": 3.362911103874959e-05, "loss": 0.041800622940063474, "step": 18100 }, { "epoch": 0.6584898151163211, "grad_norm": 0.5572479963302612, "learning_rate": 3.3538659141068776e-05, "loss": 0.04303212165832519, "step": 18200 }, { "epoch": 0.6621078910235537, "grad_norm": 1.1610311269760132, "learning_rate": 3.344820724338797e-05, "loss": 0.04213200092315674, "step": 18300 }, { "epoch": 0.6657259669307862, "grad_norm": 0.945891797542572, "learning_rate": 3.335775534570715e-05, "loss": 0.0419348955154419, "step": 18400 }, { "epoch": 0.6693440428380187, "grad_norm": 0.40828007459640503, "learning_rate": 3.326730344802634e-05, "loss": 0.039156782627105716, "step": 18500 }, { "epoch": 0.6729621187452512, "grad_norm": 2.0386905670166016, "learning_rate": 3.3176851550345526e-05, "loss": 0.042091598510742186, "step": 18600 }, { "epoch": 0.6765801946524838, "grad_norm": 2.043750762939453, "learning_rate": 3.308639965266471e-05, "loss": 0.04341127872467041, "step": 18700 }, { "epoch": 0.6801982705597164, "grad_norm": 1.103946328163147, "learning_rate": 3.29959477549839e-05, "loss": 0.04109795570373535, "step": 18800 }, { "epoch": 0.6838163464669489, "grad_norm": 1.6356172561645508, "learning_rate": 3.2905495857303084e-05, "loss": 0.04152417182922363, "step": 18900 }, { "epoch": 0.6874344223741814, "grad_norm": 0.5166067481040955, "learning_rate": 3.2815043959622275e-05, "loss": 0.03941408634185791, "step": 19000 }, { "epoch": 0.6910524982814139, "grad_norm": 0.341791570186615, "learning_rate": 3.272459206194146e-05, "loss": 0.04008223056793213, "step": 19100 }, { "epoch": 0.6946705741886465, "grad_norm": 0.2977801263332367, "learning_rate": 3.263414016426064e-05, "loss": 0.046716113090515134, "step": 19200 }, { "epoch": 0.698288650095879, "grad_norm": 1.640602707862854, "learning_rate": 3.2543688266579834e-05, "loss": 0.043398504257202146, "step": 19300 }, { "epoch": 0.7019067260031115, "grad_norm": 0.3690544366836548, "learning_rate": 3.245323636889902e-05, "loss": 0.03948961734771728, "step": 19400 }, { "epoch": 0.7055248019103441, "grad_norm": 2.460749387741089, "learning_rate": 3.236278447121821e-05, "loss": 0.04185768127441406, "step": 19500 }, { "epoch": 0.7091428778175766, "grad_norm": 0.5380750894546509, "learning_rate": 3.227233257353739e-05, "loss": 0.040400395393371584, "step": 19600 }, { "epoch": 0.7127609537248092, "grad_norm": 0.44135797023773193, "learning_rate": 3.218188067585658e-05, "loss": 0.04154191017150879, "step": 19700 }, { "epoch": 0.7163790296320417, "grad_norm": 0.5789956450462341, "learning_rate": 3.209142877817577e-05, "loss": 0.0443493127822876, "step": 19800 }, { "epoch": 0.7199971055392742, "grad_norm": 0.32769912481307983, "learning_rate": 3.200097688049495e-05, "loss": 0.03976017475128174, "step": 19900 }, { "epoch": 0.7236151814465067, "grad_norm": 0.6033921837806702, "learning_rate": 3.1910524982814136e-05, "loss": 0.04033390522003174, "step": 20000 }, { "epoch": 0.7236151814465067, "eval_accuracy": 0.9859394821797719, "eval_f1": 0.8507431047883741, "eval_loss": 0.064690500497818, "eval_precision": 0.8298106965631318, "eval_recall": 0.8727589039771904, "eval_runtime": 62.6781, "eval_samples_per_second": 319.091, "eval_steps_per_second": 8.871, "step": 20000 }, { "epoch": 0.7272332573537392, "grad_norm": 0.21106982231140137, "learning_rate": 3.1820073085133327e-05, "loss": 0.0368848705291748, "step": 20100 }, { "epoch": 0.7308513332609718, "grad_norm": 0.8279436826705933, "learning_rate": 3.172962118745251e-05, "loss": 0.040103306770324705, "step": 20200 }, { "epoch": 0.7344694091682044, "grad_norm": 0.21994882822036743, "learning_rate": 3.16391692897717e-05, "loss": 0.037559795379638675, "step": 20300 }, { "epoch": 0.7380874850754369, "grad_norm": 1.8766059875488281, "learning_rate": 3.1548717392090885e-05, "loss": 0.04059103012084961, "step": 20400 }, { "epoch": 0.7417055609826694, "grad_norm": 0.6307962536811829, "learning_rate": 3.145826549441007e-05, "loss": 0.03980612993240357, "step": 20500 }, { "epoch": 0.745323636889902, "grad_norm": 0.33936986327171326, "learning_rate": 3.136781359672926e-05, "loss": 0.043472270965576175, "step": 20600 }, { "epoch": 0.7489417127971345, "grad_norm": 0.7730916738510132, "learning_rate": 3.1277361699048444e-05, "loss": 0.040565075874328616, "step": 20700 }, { "epoch": 0.752559788704367, "grad_norm": 0.3246110677719116, "learning_rate": 3.1186909801367635e-05, "loss": 0.04017134189605713, "step": 20800 }, { "epoch": 0.7561778646115995, "grad_norm": 0.8956949710845947, "learning_rate": 3.109645790368682e-05, "loss": 0.04045989513397217, "step": 20900 }, { "epoch": 0.759795940518832, "grad_norm": 2.5085365772247314, "learning_rate": 3.1006006006006e-05, "loss": 0.0404241943359375, "step": 21000 }, { "epoch": 0.7634140164260647, "grad_norm": 0.1668255627155304, "learning_rate": 3.0915554108325194e-05, "loss": 0.039553046226501465, "step": 21100 }, { "epoch": 0.7670320923332972, "grad_norm": 0.39517688751220703, "learning_rate": 3.082510221064438e-05, "loss": 0.04120331764221191, "step": 21200 }, { "epoch": 0.7706501682405297, "grad_norm": 0.6607240438461304, "learning_rate": 3.073465031296357e-05, "loss": 0.03997873306274414, "step": 21300 }, { "epoch": 0.7742682441477622, "grad_norm": 0.44018736481666565, "learning_rate": 3.064419841528275e-05, "loss": 0.041695055961608884, "step": 21400 }, { "epoch": 0.7778863200549948, "grad_norm": 0.15856041014194489, "learning_rate": 3.055374651760194e-05, "loss": 0.04077398300170899, "step": 21500 }, { "epoch": 0.7815043959622273, "grad_norm": 0.39261528849601746, "learning_rate": 3.0463294619921127e-05, "loss": 0.041572155952453616, "step": 21600 }, { "epoch": 0.7851224718694598, "grad_norm": 0.28265002369880676, "learning_rate": 3.0372842722240315e-05, "loss": 0.045727620124816896, "step": 21700 }, { "epoch": 0.7887405477766923, "grad_norm": 0.6709412336349487, "learning_rate": 3.0282390824559502e-05, "loss": 0.04259458065032959, "step": 21800 }, { "epoch": 0.7923586236839248, "grad_norm": 0.24202914535999298, "learning_rate": 3.0191938926878686e-05, "loss": 0.03839920997619629, "step": 21900 }, { "epoch": 0.7959766995911575, "grad_norm": 0.4965508282184601, "learning_rate": 3.0101487029197874e-05, "loss": 0.03700316905975342, "step": 22000 }, { "epoch": 0.79959477549839, "grad_norm": 0.596442461013794, "learning_rate": 3.001103513151706e-05, "loss": 0.04116812229156494, "step": 22100 }, { "epoch": 0.8032128514056225, "grad_norm": 0.5273512601852417, "learning_rate": 2.992058323383625e-05, "loss": 0.04079509735107422, "step": 22200 }, { "epoch": 0.806830927312855, "grad_norm": 0.24124516546726227, "learning_rate": 2.9830131336155432e-05, "loss": 0.03795903921127319, "step": 22300 }, { "epoch": 0.8104490032200875, "grad_norm": 0.46343305706977844, "learning_rate": 2.973967943847462e-05, "loss": 0.038403522968292234, "step": 22400 }, { "epoch": 0.8140670791273201, "grad_norm": 0.2311462014913559, "learning_rate": 2.9649227540793807e-05, "loss": 0.04132327079772949, "step": 22500 }, { "epoch": 0.8140670791273201, "eval_accuracy": 0.9865150342336365, "eval_f1": 0.8464219002621376, "eval_loss": 0.05898759886622429, "eval_precision": 0.8253309864544272, "eval_recall": 0.8686190177032491, "eval_runtime": 62.4843, "eval_samples_per_second": 320.08, "eval_steps_per_second": 8.898, "step": 22500 }, { "epoch": 0.8176851550345526, "grad_norm": 0.6530361175537109, "learning_rate": 2.9558775643112995e-05, "loss": 0.04163932323455811, "step": 22600 }, { "epoch": 0.8213032309417851, "grad_norm": 1.38533353805542, "learning_rate": 2.946832374543218e-05, "loss": 0.03626733779907226, "step": 22700 }, { "epoch": 0.8249213068490177, "grad_norm": 1.6181460618972778, "learning_rate": 2.9377871847751366e-05, "loss": 0.03692409038543701, "step": 22800 }, { "epoch": 0.8285393827562503, "grad_norm": 6.322599411010742, "learning_rate": 2.9287419950070554e-05, "loss": 0.03785946369171143, "step": 22900 }, { "epoch": 0.8321574586634828, "grad_norm": 0.24266965687274933, "learning_rate": 2.919696805238974e-05, "loss": 0.03527719974517822, "step": 23000 }, { "epoch": 0.8357755345707153, "grad_norm": 0.41426071524620056, "learning_rate": 2.910651615470893e-05, "loss": 0.0348510479927063, "step": 23100 }, { "epoch": 0.8393936104779478, "grad_norm": 0.3566010892391205, "learning_rate": 2.9016064257028112e-05, "loss": 0.03639560461044312, "step": 23200 }, { "epoch": 0.8430116863851803, "grad_norm": 0.14937593042850494, "learning_rate": 2.89256123593473e-05, "loss": 0.033641955852508544, "step": 23300 }, { "epoch": 0.8466297622924129, "grad_norm": 0.5473237037658691, "learning_rate": 2.8835160461666487e-05, "loss": 0.03712946176528931, "step": 23400 }, { "epoch": 0.8502478381996454, "grad_norm": 0.3679254949092865, "learning_rate": 2.874470856398567e-05, "loss": 0.03785475969314575, "step": 23500 }, { "epoch": 0.853865914106878, "grad_norm": 0.20851418375968933, "learning_rate": 2.8654256666304862e-05, "loss": 0.04206960201263428, "step": 23600 }, { "epoch": 0.8574839900141105, "grad_norm": 0.22139862179756165, "learning_rate": 2.8563804768624046e-05, "loss": 0.03989522218704224, "step": 23700 }, { "epoch": 0.8611020659213431, "grad_norm": 0.14680643379688263, "learning_rate": 2.8473352870943233e-05, "loss": 0.03717276811599732, "step": 23800 }, { "epoch": 0.8647201418285756, "grad_norm": 0.2279856950044632, "learning_rate": 2.838290097326242e-05, "loss": 0.039047441482543944, "step": 23900 }, { "epoch": 0.8683382177358081, "grad_norm": 1.1088160276412964, "learning_rate": 2.8292449075581605e-05, "loss": 0.03408738613128662, "step": 24000 }, { "epoch": 0.8719562936430406, "grad_norm": 0.8532550930976868, "learning_rate": 2.8201997177900796e-05, "loss": 0.036566758155822755, "step": 24100 }, { "epoch": 0.8755743695502731, "grad_norm": 0.1683458536863327, "learning_rate": 2.811154528021998e-05, "loss": 0.0397763442993164, "step": 24200 }, { "epoch": 0.8791924454575057, "grad_norm": 0.3468044102191925, "learning_rate": 2.8021093382539164e-05, "loss": 0.036167433261871336, "step": 24300 }, { "epoch": 0.8828105213647383, "grad_norm": 1.5043731927871704, "learning_rate": 2.7930641484858354e-05, "loss": 0.04083109855651856, "step": 24400 }, { "epoch": 0.8864285972719708, "grad_norm": 2.7504560947418213, "learning_rate": 2.784018958717754e-05, "loss": 0.039477238655090334, "step": 24500 }, { "epoch": 0.8900466731792033, "grad_norm": 0.27413201332092285, "learning_rate": 2.7749737689496726e-05, "loss": 0.03859598875045776, "step": 24600 }, { "epoch": 0.8936647490864358, "grad_norm": 0.4622710645198822, "learning_rate": 2.7659285791815913e-05, "loss": 0.03455983877182007, "step": 24700 }, { "epoch": 0.8972828249936684, "grad_norm": 1.0147453546524048, "learning_rate": 2.7568833894135097e-05, "loss": 0.03525468587875366, "step": 24800 }, { "epoch": 0.9009009009009009, "grad_norm": 0.34606319665908813, "learning_rate": 2.7478381996454288e-05, "loss": 0.03580186367034912, "step": 24900 }, { "epoch": 0.9045189768081334, "grad_norm": 0.3202800750732422, "learning_rate": 2.7387930098773472e-05, "loss": 0.03665663719177246, "step": 25000 }, { "epoch": 0.9045189768081334, "eval_accuracy": 0.986656714492393, "eval_f1": 0.8509657594381035, "eval_loss": 0.05820872634649277, "eval_precision": 0.8288109453496006, "eval_recall": 0.8743375376536349, "eval_runtime": 62.5862, "eval_samples_per_second": 319.559, "eval_steps_per_second": 8.884, "step": 25000 }, { "epoch": 0.9081370527153659, "grad_norm": 0.557600736618042, "learning_rate": 2.7297478201092656e-05, "loss": 0.03967963457107544, "step": 25100 }, { "epoch": 0.9117551286225986, "grad_norm": 0.4092039465904236, "learning_rate": 2.7207026303411847e-05, "loss": 0.03797311782836914, "step": 25200 }, { "epoch": 0.9153732045298311, "grad_norm": 0.40534520149230957, "learning_rate": 2.711657440573103e-05, "loss": 0.036147847175598144, "step": 25300 }, { "epoch": 0.9189912804370636, "grad_norm": 0.4325968623161316, "learning_rate": 2.702612250805022e-05, "loss": 0.03767855882644653, "step": 25400 }, { "epoch": 0.9226093563442961, "grad_norm": 0.25961676239967346, "learning_rate": 2.6935670610369406e-05, "loss": 0.03738126039505005, "step": 25500 }, { "epoch": 0.9262274322515286, "grad_norm": 0.2495643049478531, "learning_rate": 2.684521871268859e-05, "loss": 0.03809333562850952, "step": 25600 }, { "epoch": 0.9298455081587612, "grad_norm": 0.20810630917549133, "learning_rate": 2.675476681500778e-05, "loss": 0.03803467035293579, "step": 25700 }, { "epoch": 0.9334635840659937, "grad_norm": 0.3630845844745636, "learning_rate": 2.6664314917326964e-05, "loss": 0.04232705593109131, "step": 25800 }, { "epoch": 0.9370816599732262, "grad_norm": 0.6230679154396057, "learning_rate": 2.6573863019646155e-05, "loss": 0.03966914892196655, "step": 25900 }, { "epoch": 0.9406997358804587, "grad_norm": 0.6846088767051697, "learning_rate": 2.648341112196534e-05, "loss": 0.03988933086395264, "step": 26000 }, { "epoch": 0.9443178117876913, "grad_norm": 0.29151585698127747, "learning_rate": 2.6392959224284523e-05, "loss": 0.036113507747650146, "step": 26100 }, { "epoch": 0.9479358876949239, "grad_norm": 0.3652597963809967, "learning_rate": 2.6302507326603714e-05, "loss": 0.03595402717590332, "step": 26200 }, { "epoch": 0.9515539636021564, "grad_norm": 0.3763394355773926, "learning_rate": 2.6212055428922898e-05, "loss": 0.03632761478424072, "step": 26300 }, { "epoch": 0.9551720395093889, "grad_norm": 0.16137683391571045, "learning_rate": 2.612160353124209e-05, "loss": 0.03010902166366577, "step": 26400 }, { "epoch": 0.9587901154166214, "grad_norm": 0.5310078859329224, "learning_rate": 2.6031151633561273e-05, "loss": 0.034855997562408446, "step": 26500 }, { "epoch": 0.962408191323854, "grad_norm": 0.4904273748397827, "learning_rate": 2.5940699735880457e-05, "loss": 0.03756725311279297, "step": 26600 }, { "epoch": 0.9660262672310865, "grad_norm": 0.7692480087280273, "learning_rate": 2.5850247838199648e-05, "loss": 0.03645958185195923, "step": 26700 }, { "epoch": 0.969644343138319, "grad_norm": 0.45624640583992004, "learning_rate": 2.5759795940518832e-05, "loss": 0.037951292991638186, "step": 26800 }, { "epoch": 0.9732624190455516, "grad_norm": 0.41989752650260925, "learning_rate": 2.5669344042838023e-05, "loss": 0.03396618366241455, "step": 26900 }, { "epoch": 0.9768804949527841, "grad_norm": 0.5218580961227417, "learning_rate": 2.5578892145157207e-05, "loss": 0.034535303115844726, "step": 27000 }, { "epoch": 0.9804985708600167, "grad_norm": 0.24635274708271027, "learning_rate": 2.548844024747639e-05, "loss": 0.034599866867065426, "step": 27100 }, { "epoch": 0.9841166467672492, "grad_norm": 0.8805984258651733, "learning_rate": 2.539798834979558e-05, "loss": 0.0382379937171936, "step": 27200 }, { "epoch": 0.9877347226744817, "grad_norm": 0.4743868410587311, "learning_rate": 2.5307536452114765e-05, "loss": 0.03450409173965454, "step": 27300 }, { "epoch": 0.9913527985817142, "grad_norm": 0.4024532735347748, "learning_rate": 2.521708455443395e-05, "loss": 0.032371597290039064, "step": 27400 }, { "epoch": 0.9949708744889468, "grad_norm": 1.2098551988601685, "learning_rate": 2.512663265675314e-05, "loss": 0.03947657585144043, "step": 27500 }, { "epoch": 0.9949708744889468, "eval_accuracy": 0.9862055646169487, "eval_f1": 0.8529879572824359, "eval_loss": 0.05825402960181236, "eval_precision": 0.8304042715484363, "eval_recall": 0.8768343562235217, "eval_runtime": 62.2283, "eval_samples_per_second": 321.397, "eval_steps_per_second": 8.935, "step": 27500 }, { "epoch": 0.9985889503961793, "grad_norm": 0.3243059515953064, "learning_rate": 2.5036180759072324e-05, "loss": 0.03721761703491211, "step": 27600 }, { "epoch": 1.0022070263034117, "grad_norm": 0.5898327231407166, "learning_rate": 2.494572886139151e-05, "loss": 0.03310096025466919, "step": 27700 }, { "epoch": 1.0058251022106444, "grad_norm": 0.30443838238716125, "learning_rate": 2.48552769637107e-05, "loss": 0.033098301887512206, "step": 27800 }, { "epoch": 1.009443178117877, "grad_norm": 0.7985163331031799, "learning_rate": 2.4764825066029886e-05, "loss": 0.031821844577789304, "step": 27900 }, { "epoch": 1.0130612540251094, "grad_norm": 0.6274137496948242, "learning_rate": 2.4674373168349074e-05, "loss": 0.03217078447341919, "step": 28000 }, { "epoch": 1.016679329932342, "grad_norm": 0.744652271270752, "learning_rate": 2.4583921270668258e-05, "loss": 0.030337939262390135, "step": 28100 }, { "epoch": 1.0202974058395746, "grad_norm": 0.20680102705955505, "learning_rate": 2.4493469372987445e-05, "loss": 0.03135863780975342, "step": 28200 }, { "epoch": 1.023915481746807, "grad_norm": 0.5819505453109741, "learning_rate": 2.4403017475306633e-05, "loss": 0.030997350215911865, "step": 28300 }, { "epoch": 1.0275335576540396, "grad_norm": 0.8105890154838562, "learning_rate": 2.431256557762582e-05, "loss": 0.029717042446136474, "step": 28400 }, { "epoch": 1.031151633561272, "grad_norm": 0.4248642325401306, "learning_rate": 2.4222113679945007e-05, "loss": 0.02956360101699829, "step": 28500 }, { "epoch": 1.0347697094685047, "grad_norm": 0.17442703247070312, "learning_rate": 2.413166178226419e-05, "loss": 0.03415003776550293, "step": 28600 }, { "epoch": 1.0383877853757373, "grad_norm": 0.3765491843223572, "learning_rate": 2.404120988458338e-05, "loss": 0.03359386682510376, "step": 28700 }, { "epoch": 1.0420058612829697, "grad_norm": 0.2846165895462036, "learning_rate": 2.3950757986902566e-05, "loss": 0.03219552993774414, "step": 28800 }, { "epoch": 1.0456239371902023, "grad_norm": 0.6828330755233765, "learning_rate": 2.3860306089221754e-05, "loss": 0.028468940258026123, "step": 28900 }, { "epoch": 1.0492420130974347, "grad_norm": 0.24457824230194092, "learning_rate": 2.3769854191540938e-05, "loss": 0.03526209592819214, "step": 29000 }, { "epoch": 1.0528600890046673, "grad_norm": 0.4728795886039734, "learning_rate": 2.3679402293860125e-05, "loss": 0.027564334869384765, "step": 29100 }, { "epoch": 1.0564781649119, "grad_norm": 0.34912073612213135, "learning_rate": 2.3588950396179312e-05, "loss": 0.03199338912963867, "step": 29200 }, { "epoch": 1.0600962408191323, "grad_norm": 0.7076539993286133, "learning_rate": 2.34984984984985e-05, "loss": 0.02838871717453003, "step": 29300 }, { "epoch": 1.063714316726365, "grad_norm": 0.22086426615715027, "learning_rate": 2.3408046600817687e-05, "loss": 0.03132739543914795, "step": 29400 }, { "epoch": 1.0673323926335974, "grad_norm": 0.4026763439178467, "learning_rate": 2.331759470313687e-05, "loss": 0.030288333892822265, "step": 29500 }, { "epoch": 1.07095046854083, "grad_norm": 0.6986600160598755, "learning_rate": 2.322714280545606e-05, "loss": 0.027701468467712403, "step": 29600 }, { "epoch": 1.0745685444480626, "grad_norm": 0.3440704047679901, "learning_rate": 2.3136690907775246e-05, "loss": 0.03199631690979004, "step": 29700 }, { "epoch": 1.078186620355295, "grad_norm": 0.5154510736465454, "learning_rate": 2.3046239010094434e-05, "loss": 0.03085195779800415, "step": 29800 }, { "epoch": 1.0818046962625276, "grad_norm": 1.2285401821136475, "learning_rate": 2.295578711241362e-05, "loss": 0.031190474033355713, "step": 29900 }, { "epoch": 1.08542277216976, "grad_norm": 0.3479061722755432, "learning_rate": 2.2865335214732805e-05, "loss": 0.03375990152359009, "step": 30000 }, { "epoch": 1.08542277216976, "eval_accuracy": 0.9868820974514447, "eval_f1": 0.8562118190241375, "eval_loss": 0.05674006789922714, "eval_precision": 0.8352508617387974, "eval_recall": 0.8782519048309412, "eval_runtime": 63.2356, "eval_samples_per_second": 316.278, "eval_steps_per_second": 8.793, "step": 30000 }, { "epoch": 1.0890408480769926, "grad_norm": 0.18956594169139862, "learning_rate": 2.2774883317051992e-05, "loss": 0.027218008041381837, "step": 30100 }, { "epoch": 1.0926589239842253, "grad_norm": 0.24030227959156036, "learning_rate": 2.268443141937118e-05, "loss": 0.03073176145553589, "step": 30200 }, { "epoch": 1.0962769998914577, "grad_norm": 0.1687329262495041, "learning_rate": 2.2593979521690367e-05, "loss": 0.033424663543701175, "step": 30300 }, { "epoch": 1.0998950757986903, "grad_norm": 1.2173426151275635, "learning_rate": 2.250352762400955e-05, "loss": 0.03079766035079956, "step": 30400 }, { "epoch": 1.103513151705923, "grad_norm": 0.35310184955596924, "learning_rate": 2.241307572632874e-05, "loss": 0.03289975881576538, "step": 30500 }, { "epoch": 1.1071312276131553, "grad_norm": 0.14718961715698242, "learning_rate": 2.2322623828647926e-05, "loss": 0.03266577005386353, "step": 30600 }, { "epoch": 1.110749303520388, "grad_norm": 0.29442161321640015, "learning_rate": 2.2232171930967113e-05, "loss": 0.02883612871170044, "step": 30700 }, { "epoch": 1.1143673794276203, "grad_norm": 0.36244460940361023, "learning_rate": 2.21417200332863e-05, "loss": 0.030666334629058836, "step": 30800 }, { "epoch": 1.117985455334853, "grad_norm": 0.2421630471944809, "learning_rate": 2.2051268135605485e-05, "loss": 0.02931546211242676, "step": 30900 }, { "epoch": 1.1216035312420856, "grad_norm": 0.5055842995643616, "learning_rate": 2.1960816237924672e-05, "loss": 0.030934171676635744, "step": 31000 }, { "epoch": 1.125221607149318, "grad_norm": 0.27207571268081665, "learning_rate": 2.187036434024386e-05, "loss": 0.03155987024307251, "step": 31100 }, { "epoch": 1.1288396830565506, "grad_norm": 0.5190430879592896, "learning_rate": 2.1779912442563047e-05, "loss": 0.030766298770904543, "step": 31200 }, { "epoch": 1.132457758963783, "grad_norm": 0.5578451156616211, "learning_rate": 2.168946054488223e-05, "loss": 0.030352199077606203, "step": 31300 }, { "epoch": 1.1360758348710156, "grad_norm": 0.775244951248169, "learning_rate": 2.159900864720142e-05, "loss": 0.027431459426879884, "step": 31400 }, { "epoch": 1.1396939107782482, "grad_norm": 0.17452310025691986, "learning_rate": 2.1508556749520606e-05, "loss": 0.02899331569671631, "step": 31500 }, { "epoch": 1.1433119866854806, "grad_norm": 1.0152820348739624, "learning_rate": 2.1418104851839793e-05, "loss": 0.02969914197921753, "step": 31600 }, { "epoch": 1.1469300625927132, "grad_norm": 0.21474546194076538, "learning_rate": 2.132765295415898e-05, "loss": 0.03098618268966675, "step": 31700 }, { "epoch": 1.1505481384999456, "grad_norm": 0.27076786756515503, "learning_rate": 2.1237201056478165e-05, "loss": 0.026145567893981935, "step": 31800 }, { "epoch": 1.1541662144071783, "grad_norm": 0.20778276026248932, "learning_rate": 2.1146749158797352e-05, "loss": 0.030465993881225586, "step": 31900 }, { "epoch": 1.1577842903144109, "grad_norm": 0.2573922276496887, "learning_rate": 2.105629726111654e-05, "loss": 0.031988742351531985, "step": 32000 }, { "epoch": 1.1614023662216433, "grad_norm": 0.33712247014045715, "learning_rate": 2.0965845363435727e-05, "loss": 0.031969892978668216, "step": 32100 }, { "epoch": 1.165020442128876, "grad_norm": 0.5677493214607239, "learning_rate": 2.0875393465754914e-05, "loss": 0.02892348051071167, "step": 32200 }, { "epoch": 1.1686385180361083, "grad_norm": 0.19627009332180023, "learning_rate": 2.0784941568074098e-05, "loss": 0.02890573740005493, "step": 32300 }, { "epoch": 1.172256593943341, "grad_norm": 0.2041957825422287, "learning_rate": 2.0694489670393286e-05, "loss": 0.02606424331665039, "step": 32400 }, { "epoch": 1.1758746698505735, "grad_norm": 0.36798298358917236, "learning_rate": 2.0604037772712473e-05, "loss": 0.029083385467529296, "step": 32500 }, { "epoch": 1.1758746698505735, "eval_accuracy": 0.9877625116339074, "eval_f1": 0.8611236096967975, "eval_loss": 0.05370509624481201, "eval_precision": 0.8443082257515248, "eval_recall": 0.8786224004896986, "eval_runtime": 62.1854, "eval_samples_per_second": 321.619, "eval_steps_per_second": 8.941, "step": 32500 }, { "epoch": 1.179492745757806, "grad_norm": 0.2152443379163742, "learning_rate": 2.051358587503166e-05, "loss": 0.028284170627593995, "step": 32600 }, { "epoch": 1.1831108216650386, "grad_norm": 0.2933087646961212, "learning_rate": 2.0423133977350845e-05, "loss": 0.034238841533660885, "step": 32700 }, { "epoch": 1.1867288975722712, "grad_norm": 0.36995938420295715, "learning_rate": 2.0332682079670032e-05, "loss": 0.03170938491821289, "step": 32800 }, { "epoch": 1.1903469734795036, "grad_norm": 0.7478405833244324, "learning_rate": 2.024223018198922e-05, "loss": 0.029751029014587402, "step": 32900 }, { "epoch": 1.1939650493867362, "grad_norm": 0.44457152485847473, "learning_rate": 2.0151778284308407e-05, "loss": 0.02949444770812988, "step": 33000 }, { "epoch": 1.1975831252939686, "grad_norm": 0.4324032664299011, "learning_rate": 2.0061326386627594e-05, "loss": 0.030652081966400145, "step": 33100 }, { "epoch": 1.2012012012012012, "grad_norm": 1.3409758806228638, "learning_rate": 1.9970874488946778e-05, "loss": 0.02934673547744751, "step": 33200 }, { "epoch": 1.2048192771084336, "grad_norm": 0.3867700397968292, "learning_rate": 1.9880422591265966e-05, "loss": 0.02774231195449829, "step": 33300 }, { "epoch": 1.2084373530156662, "grad_norm": 0.1256304383277893, "learning_rate": 1.9789970693585153e-05, "loss": 0.030440127849578856, "step": 33400 }, { "epoch": 1.2120554289228989, "grad_norm": 0.574845552444458, "learning_rate": 1.969951879590434e-05, "loss": 0.030182530879974367, "step": 33500 }, { "epoch": 1.2156735048301313, "grad_norm": 0.501304566860199, "learning_rate": 1.9609066898223528e-05, "loss": 0.03053757667541504, "step": 33600 }, { "epoch": 1.2192915807373639, "grad_norm": 0.1869884878396988, "learning_rate": 1.9518615000542712e-05, "loss": 0.02801114559173584, "step": 33700 }, { "epoch": 1.2229096566445965, "grad_norm": 0.44489210844039917, "learning_rate": 1.94281631028619e-05, "loss": 0.02709296464920044, "step": 33800 }, { "epoch": 1.226527732551829, "grad_norm": 0.2928631007671356, "learning_rate": 1.9337711205181087e-05, "loss": 0.033639376163482664, "step": 33900 }, { "epoch": 1.2301458084590615, "grad_norm": 0.2070285826921463, "learning_rate": 1.9247259307500274e-05, "loss": 0.03141526222229004, "step": 34000 }, { "epoch": 1.233763884366294, "grad_norm": 0.4693046510219574, "learning_rate": 1.9156807409819458e-05, "loss": 0.029341881275177003, "step": 34100 }, { "epoch": 1.2373819602735265, "grad_norm": 0.187980055809021, "learning_rate": 1.9066355512138645e-05, "loss": 0.033849341869354246, "step": 34200 }, { "epoch": 1.2410000361807592, "grad_norm": 0.7411011457443237, "learning_rate": 1.8975903614457833e-05, "loss": 0.027842617034912108, "step": 34300 }, { "epoch": 1.2446181120879916, "grad_norm": 0.4449065327644348, "learning_rate": 1.888545171677702e-05, "loss": 0.031680150032043455, "step": 34400 }, { "epoch": 1.2482361879952242, "grad_norm": 0.7327262759208679, "learning_rate": 1.8794999819096208e-05, "loss": 0.02651881694793701, "step": 34500 }, { "epoch": 1.2518542639024566, "grad_norm": 0.41838428378105164, "learning_rate": 1.870454792141539e-05, "loss": 0.032553679943084717, "step": 34600 }, { "epoch": 1.2554723398096892, "grad_norm": 0.3279021382331848, "learning_rate": 1.861409602373458e-05, "loss": 0.02605849742889404, "step": 34700 }, { "epoch": 1.2590904157169218, "grad_norm": 0.23042799532413483, "learning_rate": 1.8523644126053766e-05, "loss": 0.02857684135437012, "step": 34800 }, { "epoch": 1.2627084916241542, "grad_norm": 0.14856815338134766, "learning_rate": 1.8433192228372954e-05, "loss": 0.030806925296783447, "step": 34900 }, { "epoch": 1.2663265675313868, "grad_norm": 0.48354101181030273, "learning_rate": 1.8342740330692138e-05, "loss": 0.030027375221252442, "step": 35000 }, { "epoch": 1.2663265675313868, "eval_accuracy": 0.9877813255436068, "eval_f1": 0.8615969042346098, "eval_loss": 0.05214959755539894, "eval_precision": 0.8434818838343312, "eval_recall": 0.8805070957972906, "eval_runtime": 62.9193, "eval_samples_per_second": 317.867, "eval_steps_per_second": 8.837, "step": 35000 }, { "epoch": 1.2699446434386195, "grad_norm": 0.13334180414676666, "learning_rate": 1.8252288433011325e-05, "loss": 0.027159340381622314, "step": 35100 }, { "epoch": 1.2735627193458519, "grad_norm": 0.7394197583198547, "learning_rate": 1.8161836535330513e-05, "loss": 0.03075253963470459, "step": 35200 }, { "epoch": 1.2771807952530845, "grad_norm": 0.2870982587337494, "learning_rate": 1.80713846376497e-05, "loss": 0.030658049583435057, "step": 35300 }, { "epoch": 1.2807988711603169, "grad_norm": 0.9762187004089355, "learning_rate": 1.7980932739968887e-05, "loss": 0.031029996871948243, "step": 35400 }, { "epoch": 1.2844169470675495, "grad_norm": 0.44388410449028015, "learning_rate": 1.789048084228807e-05, "loss": 0.03051720142364502, "step": 35500 }, { "epoch": 1.288035022974782, "grad_norm": 0.7785915732383728, "learning_rate": 1.780002894460726e-05, "loss": 0.02536651849746704, "step": 35600 }, { "epoch": 1.2916530988820145, "grad_norm": 0.1702079176902771, "learning_rate": 1.7709577046926446e-05, "loss": 0.030427489280700683, "step": 35700 }, { "epoch": 1.2952711747892471, "grad_norm": 0.4802360236644745, "learning_rate": 1.7619125149245634e-05, "loss": 0.03049640417098999, "step": 35800 }, { "epoch": 1.2988892506964795, "grad_norm": 0.40013861656188965, "learning_rate": 1.752867325156482e-05, "loss": 0.030040171146392822, "step": 35900 }, { "epoch": 1.3025073266037122, "grad_norm": 0.34162065386772156, "learning_rate": 1.7438221353884005e-05, "loss": 0.031596968173980715, "step": 36000 }, { "epoch": 1.3061254025109448, "grad_norm": 0.34575241804122925, "learning_rate": 1.7347769456203193e-05, "loss": 0.03362387895584106, "step": 36100 }, { "epoch": 1.3097434784181772, "grad_norm": 0.4098789691925049, "learning_rate": 1.725731755852238e-05, "loss": 0.027526361942291258, "step": 36200 }, { "epoch": 1.3133615543254098, "grad_norm": 0.35067400336265564, "learning_rate": 1.7166865660841567e-05, "loss": 0.02835451364517212, "step": 36300 }, { "epoch": 1.3169796302326424, "grad_norm": 0.1685800403356552, "learning_rate": 1.707641376316075e-05, "loss": 0.028891866207122804, "step": 36400 }, { "epoch": 1.3205977061398748, "grad_norm": 0.32651832699775696, "learning_rate": 1.698596186547994e-05, "loss": 0.026589181423187256, "step": 36500 }, { "epoch": 1.3242157820471072, "grad_norm": 0.3153350353240967, "learning_rate": 1.6895509967799126e-05, "loss": 0.031108696460723877, "step": 36600 }, { "epoch": 1.3278338579543398, "grad_norm": 0.4476368725299835, "learning_rate": 1.6805058070118314e-05, "loss": 0.030014872550964355, "step": 36700 }, { "epoch": 1.3314519338615725, "grad_norm": 0.1972656548023224, "learning_rate": 1.67146061724375e-05, "loss": 0.029410278797149657, "step": 36800 }, { "epoch": 1.3350700097688049, "grad_norm": 0.7246927618980408, "learning_rate": 1.6624154274756685e-05, "loss": 0.03080254316329956, "step": 36900 }, { "epoch": 1.3386880856760375, "grad_norm": 0.3670811355113983, "learning_rate": 1.6533702377075872e-05, "loss": 0.02861506223678589, "step": 37000 }, { "epoch": 1.34230616158327, "grad_norm": 0.22275477647781372, "learning_rate": 1.644325047939506e-05, "loss": 0.0255238938331604, "step": 37100 }, { "epoch": 1.3459242374905025, "grad_norm": 0.3272339999675751, "learning_rate": 1.6352798581714247e-05, "loss": 0.028979463577270506, "step": 37200 }, { "epoch": 1.3495423133977351, "grad_norm": 0.5552839040756226, "learning_rate": 1.626234668403343e-05, "loss": 0.028283817768096922, "step": 37300 }, { "epoch": 1.3531603893049677, "grad_norm": 0.33792686462402344, "learning_rate": 1.617189478635262e-05, "loss": 0.03224069595336914, "step": 37400 }, { "epoch": 1.3567784652122001, "grad_norm": 1.0481899976730347, "learning_rate": 1.6081442888671806e-05, "loss": 0.02690179109573364, "step": 37500 }, { "epoch": 1.3567784652122001, "eval_accuracy": 0.9878715555186957, "eval_f1": 0.8683487542236398, "eval_loss": 0.05309534817934036, "eval_precision": 0.851476257567078, "eval_recall": 0.8859034456096264, "eval_runtime": 62.1337, "eval_samples_per_second": 321.887, "eval_steps_per_second": 8.948, "step": 37500 }, { "epoch": 1.3603965411194328, "grad_norm": 0.20256465673446655, "learning_rate": 1.5990990990990993e-05, "loss": 0.027432169914245606, "step": 37600 }, { "epoch": 1.3640146170266652, "grad_norm": 0.3237811028957367, "learning_rate": 1.590053909331018e-05, "loss": 0.030464730262756347, "step": 37700 }, { "epoch": 1.3676326929338978, "grad_norm": 0.31953930854797363, "learning_rate": 1.5810087195629365e-05, "loss": 0.027273902893066405, "step": 37800 }, { "epoch": 1.3712507688411302, "grad_norm": 0.38057664036750793, "learning_rate": 1.5719635297948552e-05, "loss": 0.0259963059425354, "step": 37900 }, { "epoch": 1.3748688447483628, "grad_norm": 0.6410769820213318, "learning_rate": 1.562918340026774e-05, "loss": 0.031271641254425046, "step": 38000 }, { "epoch": 1.3784869206555954, "grad_norm": 0.8330540060997009, "learning_rate": 1.5538731502586927e-05, "loss": 0.02934875011444092, "step": 38100 }, { "epoch": 1.3821049965628278, "grad_norm": 1.1677355766296387, "learning_rate": 1.5448279604906114e-05, "loss": 0.02971445083618164, "step": 38200 }, { "epoch": 1.3857230724700604, "grad_norm": 0.4667145609855652, "learning_rate": 1.53578277072253e-05, "loss": 0.02775926113128662, "step": 38300 }, { "epoch": 1.389341148377293, "grad_norm": 0.4434032440185547, "learning_rate": 1.5267375809544486e-05, "loss": 0.026833882331848146, "step": 38400 }, { "epoch": 1.3929592242845255, "grad_norm": 0.2564474642276764, "learning_rate": 1.5176923911863672e-05, "loss": 0.02980698347091675, "step": 38500 }, { "epoch": 1.396577300191758, "grad_norm": 0.43813377618789673, "learning_rate": 1.5086472014182859e-05, "loss": 0.028636832237243653, "step": 38600 }, { "epoch": 1.4001953760989905, "grad_norm": 0.928669810295105, "learning_rate": 1.4996020116502043e-05, "loss": 0.02784595012664795, "step": 38700 }, { "epoch": 1.403813452006223, "grad_norm": 1.0816453695297241, "learning_rate": 1.490556821882123e-05, "loss": 0.031624915599823, "step": 38800 }, { "epoch": 1.4074315279134555, "grad_norm": 1.6790099143981934, "learning_rate": 1.4815116321140418e-05, "loss": 0.02443223476409912, "step": 38900 }, { "epoch": 1.4110496038206881, "grad_norm": 0.39879387617111206, "learning_rate": 1.4724664423459605e-05, "loss": 0.02753525972366333, "step": 39000 }, { "epoch": 1.4146676797279207, "grad_norm": 0.6372315883636475, "learning_rate": 1.4634212525778793e-05, "loss": 0.02859419822692871, "step": 39100 }, { "epoch": 1.4182857556351531, "grad_norm": 0.4357219934463501, "learning_rate": 1.4543760628097977e-05, "loss": 0.02929396152496338, "step": 39200 }, { "epoch": 1.4219038315423858, "grad_norm": 0.8673311471939087, "learning_rate": 1.4453308730417164e-05, "loss": 0.027733774185180665, "step": 39300 }, { "epoch": 1.4255219074496184, "grad_norm": 0.31178081035614014, "learning_rate": 1.4362856832736351e-05, "loss": 0.029380517005920412, "step": 39400 }, { "epoch": 1.4291399833568508, "grad_norm": 0.9862114191055298, "learning_rate": 1.4272404935055539e-05, "loss": 0.02801510810852051, "step": 39500 }, { "epoch": 1.4327580592640834, "grad_norm": 0.3226287364959717, "learning_rate": 1.4181953037374726e-05, "loss": 0.02600921630859375, "step": 39600 }, { "epoch": 1.436376135171316, "grad_norm": 1.0932515859603882, "learning_rate": 1.409150113969391e-05, "loss": 0.027818257808685302, "step": 39700 }, { "epoch": 1.4399942110785484, "grad_norm": 0.4064158797264099, "learning_rate": 1.4001049242013098e-05, "loss": 0.030927972793579103, "step": 39800 }, { "epoch": 1.443612286985781, "grad_norm": 0.6574753522872925, "learning_rate": 1.3910597344332285e-05, "loss": 0.028972697257995606, "step": 39900 }, { "epoch": 1.4472303628930134, "grad_norm": 0.24314340949058533, "learning_rate": 1.3820145446651472e-05, "loss": 0.029455924034118654, "step": 40000 }, { "epoch": 1.4472303628930134, "eval_accuracy": 0.9882140454666924, "eval_f1": 0.8711891990109102, "eval_loss": 0.05167451128363609, "eval_precision": 0.8548262069393198, "eval_recall": 0.8881908535897808, "eval_runtime": 62.5842, "eval_samples_per_second": 319.57, "eval_steps_per_second": 8.884, "step": 40000 }, { "epoch": 1.450848438800246, "grad_norm": 0.28122034668922424, "learning_rate": 1.3729693548970656e-05, "loss": 0.029821088314056398, "step": 40100 }, { "epoch": 1.4544665147074785, "grad_norm": 0.45019853115081787, "learning_rate": 1.3639241651289844e-05, "loss": 0.027684724330902098, "step": 40200 }, { "epoch": 1.458084590614711, "grad_norm": 0.6584652066230774, "learning_rate": 1.3548789753609031e-05, "loss": 0.026381478309631348, "step": 40300 }, { "epoch": 1.4617026665219437, "grad_norm": 2.1259236335754395, "learning_rate": 1.3458337855928219e-05, "loss": 0.02868267774581909, "step": 40400 }, { "epoch": 1.465320742429176, "grad_norm": 0.9566027522087097, "learning_rate": 1.3367885958247406e-05, "loss": 0.027485811710357667, "step": 40500 }, { "epoch": 1.4689388183364087, "grad_norm": 0.9289085268974304, "learning_rate": 1.327743406056659e-05, "loss": 0.030939743518829346, "step": 40600 }, { "epoch": 1.4725568942436413, "grad_norm": 0.6716954112052917, "learning_rate": 1.3186982162885778e-05, "loss": 0.026526257991790772, "step": 40700 }, { "epoch": 1.4761749701508737, "grad_norm": 0.26186442375183105, "learning_rate": 1.3096530265204965e-05, "loss": 0.027606160640716554, "step": 40800 }, { "epoch": 1.4797930460581064, "grad_norm": 0.5962882041931152, "learning_rate": 1.3006078367524152e-05, "loss": 0.03013371229171753, "step": 40900 }, { "epoch": 1.4834111219653388, "grad_norm": 0.28622719645500183, "learning_rate": 1.2915626469843336e-05, "loss": 0.026788763999938965, "step": 41000 }, { "epoch": 1.4870291978725714, "grad_norm": 0.2146042138338089, "learning_rate": 1.2825174572162524e-05, "loss": 0.026920742988586426, "step": 41100 }, { "epoch": 1.4906472737798038, "grad_norm": 0.30449753999710083, "learning_rate": 1.2734722674481711e-05, "loss": 0.028757052421569826, "step": 41200 }, { "epoch": 1.4942653496870364, "grad_norm": 0.11651007831096649, "learning_rate": 1.2644270776800899e-05, "loss": 0.029123516082763673, "step": 41300 }, { "epoch": 1.497883425594269, "grad_norm": 3.1146299839019775, "learning_rate": 1.2553818879120086e-05, "loss": 0.028435797691345216, "step": 41400 }, { "epoch": 1.5015015015015014, "grad_norm": 0.2705380916595459, "learning_rate": 1.2463366981439272e-05, "loss": 0.03229628562927246, "step": 41500 }, { "epoch": 1.505119577408734, "grad_norm": 0.5641364455223083, "learning_rate": 1.2372915083758457e-05, "loss": 0.02912388801574707, "step": 41600 }, { "epoch": 1.5087376533159667, "grad_norm": 0.4726872444152832, "learning_rate": 1.2282463186077645e-05, "loss": 0.028761823177337647, "step": 41700 }, { "epoch": 1.512355729223199, "grad_norm": 2.5604758262634277, "learning_rate": 1.2192011288396832e-05, "loss": 0.02635906219482422, "step": 41800 }, { "epoch": 1.5159738051304317, "grad_norm": 0.3598019778728485, "learning_rate": 1.2101559390716018e-05, "loss": 0.026577677726745606, "step": 41900 }, { "epoch": 1.5195918810376643, "grad_norm": 0.31742435693740845, "learning_rate": 1.2011107493035205e-05, "loss": 0.02479785919189453, "step": 42000 }, { "epoch": 1.5232099569448967, "grad_norm": 1.0102005004882812, "learning_rate": 1.1920655595354391e-05, "loss": 0.028279991149902345, "step": 42100 }, { "epoch": 1.526828032852129, "grad_norm": 0.4230172038078308, "learning_rate": 1.1830203697673578e-05, "loss": 0.027808871269226074, "step": 42200 }, { "epoch": 1.530446108759362, "grad_norm": 0.35221824049949646, "learning_rate": 1.1739751799992764e-05, "loss": 0.02666907787322998, "step": 42300 }, { "epoch": 1.5340641846665943, "grad_norm": 0.37867021560668945, "learning_rate": 1.1649299902311952e-05, "loss": 0.028237838745117188, "step": 42400 }, { "epoch": 1.5376822605738267, "grad_norm": 1.1692699193954468, "learning_rate": 1.1558848004631137e-05, "loss": 0.027906298637390137, "step": 42500 }, { "epoch": 1.5376822605738267, "eval_accuracy": 0.9883852904406909, "eval_f1": 0.8713540843735187, "eval_loss": 0.048916082829236984, "eval_precision": 0.8549944962093611, "eval_recall": 0.8883519386588057, "eval_runtime": 62.2278, "eval_samples_per_second": 321.4, "eval_steps_per_second": 8.935, "step": 42500 }, { "epoch": 1.5413003364810594, "grad_norm": 0.273318886756897, "learning_rate": 1.1468396106950325e-05, "loss": 0.031116650104522706, "step": 42600 }, { "epoch": 1.544918412388292, "grad_norm": 0.48087653517723083, "learning_rate": 1.1377944209269512e-05, "loss": 0.026544408798217775, "step": 42700 }, { "epoch": 1.5485364882955244, "grad_norm": 0.7746985554695129, "learning_rate": 1.1287492311588698e-05, "loss": 0.026500403881072998, "step": 42800 }, { "epoch": 1.552154564202757, "grad_norm": 0.1549975574016571, "learning_rate": 1.1197040413907885e-05, "loss": 0.026587300300598145, "step": 42900 }, { "epoch": 1.5557726401099896, "grad_norm": 1.972495198249817, "learning_rate": 1.110658851622707e-05, "loss": 0.029258613586425782, "step": 43000 }, { "epoch": 1.559390716017222, "grad_norm": 0.6956634521484375, "learning_rate": 1.1016136618546258e-05, "loss": 0.026978886127471922, "step": 43100 }, { "epoch": 1.5630087919244544, "grad_norm": 0.16629020869731903, "learning_rate": 1.0925684720865444e-05, "loss": 0.03226327657699585, "step": 43200 }, { "epoch": 1.5666268678316873, "grad_norm": 0.37136366963386536, "learning_rate": 1.0835232823184631e-05, "loss": 0.028375396728515623, "step": 43300 }, { "epoch": 1.5702449437389197, "grad_norm": 0.2561453580856323, "learning_rate": 1.0744780925503819e-05, "loss": 0.027073240280151366, "step": 43400 }, { "epoch": 1.573863019646152, "grad_norm": 0.42630210518836975, "learning_rate": 1.0654329027823004e-05, "loss": 0.026704757213592528, "step": 43500 }, { "epoch": 1.5774810955533847, "grad_norm": 0.4090301990509033, "learning_rate": 1.0563877130142192e-05, "loss": 0.02855618476867676, "step": 43600 }, { "epoch": 1.5810991714606173, "grad_norm": 0.24324025213718414, "learning_rate": 1.0473425232461378e-05, "loss": 0.025224699974060058, "step": 43700 }, { "epoch": 1.5847172473678497, "grad_norm": 0.4220653772354126, "learning_rate": 1.0382973334780565e-05, "loss": 0.029145328998565673, "step": 43800 }, { "epoch": 1.5883353232750823, "grad_norm": 0.4333362281322479, "learning_rate": 1.029252143709975e-05, "loss": 0.025774214267730713, "step": 43900 }, { "epoch": 1.591953399182315, "grad_norm": 0.15959997475147247, "learning_rate": 1.0202069539418938e-05, "loss": 0.026988446712493896, "step": 44000 }, { "epoch": 1.5955714750895473, "grad_norm": 0.2643369138240814, "learning_rate": 1.0111617641738126e-05, "loss": 0.0258998441696167, "step": 44100 }, { "epoch": 1.59918955099678, "grad_norm": 0.8528566360473633, "learning_rate": 1.0021165744057311e-05, "loss": 0.02746238708496094, "step": 44200 }, { "epoch": 1.6028076269040126, "grad_norm": 0.999005138874054, "learning_rate": 9.930713846376499e-06, "loss": 0.028600902557373048, "step": 44300 }, { "epoch": 1.606425702811245, "grad_norm": 0.6834824681282043, "learning_rate": 9.840261948695684e-06, "loss": 0.028850455284118653, "step": 44400 }, { "epoch": 1.6100437787184774, "grad_norm": 0.3043724298477173, "learning_rate": 9.749810051014872e-06, "loss": 0.0262698769569397, "step": 44500 }, { "epoch": 1.6136618546257102, "grad_norm": 0.8399735689163208, "learning_rate": 9.659358153334057e-06, "loss": 0.02827603816986084, "step": 44600 }, { "epoch": 1.6172799305329426, "grad_norm": 0.9611870646476746, "learning_rate": 9.568906255653245e-06, "loss": 0.02755260467529297, "step": 44700 }, { "epoch": 1.620898006440175, "grad_norm": 0.23461508750915527, "learning_rate": 9.47845435797243e-06, "loss": 0.0311501145362854, "step": 44800 }, { "epoch": 1.6245160823474076, "grad_norm": 2.882127046585083, "learning_rate": 9.388002460291618e-06, "loss": 0.029984614849090575, "step": 44900 }, { "epoch": 1.6281341582546403, "grad_norm": 0.32786279916763306, "learning_rate": 9.297550562610804e-06, "loss": 0.028132951259613036, "step": 45000 }, { "epoch": 1.6281341582546403, "eval_accuracy": 0.9886770980197016, "eval_f1": 0.8710388819944511, "eval_loss": 0.047967541962862015, "eval_precision": 0.855134094859697, "eval_recall": 0.887546513313681, "eval_runtime": 62.7107, "eval_samples_per_second": 318.925, "eval_steps_per_second": 8.866, "step": 45000 }, { "epoch": 1.6317522341618727, "grad_norm": 1.6328613758087158, "learning_rate": 9.207098664929991e-06, "loss": 0.028099877834320067, "step": 45100 }, { "epoch": 1.6353703100691053, "grad_norm": 1.1488419771194458, "learning_rate": 9.116646767249177e-06, "loss": 0.025699715614318847, "step": 45200 }, { "epoch": 1.638988385976338, "grad_norm": 1.2527875900268555, "learning_rate": 9.026194869568364e-06, "loss": 0.02980081081390381, "step": 45300 }, { "epoch": 1.6426064618835703, "grad_norm": 0.25659850239753723, "learning_rate": 8.93574297188755e-06, "loss": 0.02849080801010132, "step": 45400 }, { "epoch": 1.6462245377908027, "grad_norm": 0.24858339130878448, "learning_rate": 8.845291074206737e-06, "loss": 0.02909574508666992, "step": 45500 }, { "epoch": 1.6498426136980355, "grad_norm": 0.35774946212768555, "learning_rate": 8.754839176525923e-06, "loss": 0.028034112453460693, "step": 45600 }, { "epoch": 1.653460689605268, "grad_norm": 0.28512680530548096, "learning_rate": 8.66438727884511e-06, "loss": 0.029735114574432373, "step": 45700 }, { "epoch": 1.6570787655125003, "grad_norm": 0.12049074470996857, "learning_rate": 8.573935381164296e-06, "loss": 0.03128848075866699, "step": 45800 }, { "epoch": 1.660696841419733, "grad_norm": 0.5767261385917664, "learning_rate": 8.483483483483484e-06, "loss": 0.02762418031692505, "step": 45900 }, { "epoch": 1.6643149173269656, "grad_norm": 0.12318204343318939, "learning_rate": 8.39303158580267e-06, "loss": 0.026004743576049805, "step": 46000 }, { "epoch": 1.667932993234198, "grad_norm": 0.311279833316803, "learning_rate": 8.302579688121857e-06, "loss": 0.024458692073822022, "step": 46100 }, { "epoch": 1.6715510691414306, "grad_norm": 0.2753770351409912, "learning_rate": 8.212127790441042e-06, "loss": 0.026231870651245118, "step": 46200 }, { "epoch": 1.6751691450486632, "grad_norm": 0.8421895503997803, "learning_rate": 8.12167589276023e-06, "loss": 0.02496417760848999, "step": 46300 }, { "epoch": 1.6787872209558956, "grad_norm": 0.6493498086929321, "learning_rate": 8.031223995079417e-06, "loss": 0.026742682456970215, "step": 46400 }, { "epoch": 1.6824052968631282, "grad_norm": 0.3029896318912506, "learning_rate": 7.940772097398603e-06, "loss": 0.024227650165557862, "step": 46500 }, { "epoch": 1.6860233727703609, "grad_norm": 0.34622183442115784, "learning_rate": 7.85032019971779e-06, "loss": 0.025336668491363526, "step": 46600 }, { "epoch": 1.6896414486775932, "grad_norm": 1.1520912647247314, "learning_rate": 7.759868302036976e-06, "loss": 0.028549084663391112, "step": 46700 }, { "epoch": 1.6932595245848256, "grad_norm": 0.11390261352062225, "learning_rate": 7.669416404356163e-06, "loss": 0.025614957809448242, "step": 46800 }, { "epoch": 1.6968776004920583, "grad_norm": 0.20818683505058289, "learning_rate": 7.57896450667535e-06, "loss": 0.02624866247177124, "step": 46900 }, { "epoch": 1.700495676399291, "grad_norm": 0.11861401051282883, "learning_rate": 7.488512608994537e-06, "loss": 0.029836065769195556, "step": 47000 }, { "epoch": 1.7041137523065233, "grad_norm": 0.21509072184562683, "learning_rate": 7.398060711313724e-06, "loss": 0.02764824151992798, "step": 47100 }, { "epoch": 1.707731828213756, "grad_norm": 0.09410534054040909, "learning_rate": 7.3076088136329105e-06, "loss": 0.026358423233032228, "step": 47200 }, { "epoch": 1.7113499041209885, "grad_norm": 0.4441370666027069, "learning_rate": 7.217156915952097e-06, "loss": 0.028589205741882326, "step": 47300 }, { "epoch": 1.714967980028221, "grad_norm": 0.301600843667984, "learning_rate": 7.1267050182712836e-06, "loss": 0.02586300849914551, "step": 47400 }, { "epoch": 1.7185860559354535, "grad_norm": 0.2969602942466736, "learning_rate": 7.03625312059047e-06, "loss": 0.027719602584838868, "step": 47500 }, { "epoch": 1.7185860559354535, "eval_accuracy": 0.9887869098191715, "eval_f1": 0.8751810891473175, "eval_loss": 0.04670108109712601, "eval_precision": 0.8604607721046077, "eval_recall": 0.8904138275423251, "eval_runtime": 62.4542, "eval_samples_per_second": 320.234, "eval_steps_per_second": 8.903, "step": 47500 }, { "epoch": 1.7222041318426862, "grad_norm": 2.922269582748413, "learning_rate": 6.945801222909657e-06, "loss": 0.026613037586212158, "step": 47600 }, { "epoch": 1.7258222077499186, "grad_norm": 0.3603607714176178, "learning_rate": 6.855349325228843e-06, "loss": 0.02875258445739746, "step": 47700 }, { "epoch": 1.729440283657151, "grad_norm": 0.17424313724040985, "learning_rate": 6.764897427548031e-06, "loss": 0.028092458248138427, "step": 47800 }, { "epoch": 1.7330583595643838, "grad_norm": 0.39376911520957947, "learning_rate": 6.674445529867217e-06, "loss": 0.029860684871673582, "step": 47900 }, { "epoch": 1.7366764354716162, "grad_norm": 0.30766257643699646, "learning_rate": 6.583993632186404e-06, "loss": 0.027765181064605713, "step": 48000 }, { "epoch": 1.7402945113788486, "grad_norm": 0.4809003472328186, "learning_rate": 6.49354173450559e-06, "loss": 0.025850486755371094, "step": 48100 }, { "epoch": 1.7439125872860812, "grad_norm": 0.31469446420669556, "learning_rate": 6.403089836824777e-06, "loss": 0.024390408992767332, "step": 48200 }, { "epoch": 1.7475306631933138, "grad_norm": 0.1946684867143631, "learning_rate": 6.312637939143963e-06, "loss": 0.02534383535385132, "step": 48300 }, { "epoch": 1.7511487391005462, "grad_norm": 0.31097686290740967, "learning_rate": 6.22218604146315e-06, "loss": 0.02695645809173584, "step": 48400 }, { "epoch": 1.7547668150077789, "grad_norm": 0.7921291589736938, "learning_rate": 6.1317341437823365e-06, "loss": 0.023772099018096925, "step": 48500 }, { "epoch": 1.7583848909150115, "grad_norm": 0.3385520577430725, "learning_rate": 6.041282246101523e-06, "loss": 0.024593567848205565, "step": 48600 }, { "epoch": 1.7620029668222439, "grad_norm": 0.23133955895900726, "learning_rate": 5.95083034842071e-06, "loss": 0.025404906272888182, "step": 48700 }, { "epoch": 1.7656210427294765, "grad_norm": 0.17175310850143433, "learning_rate": 5.860378450739896e-06, "loss": 0.024191346168518067, "step": 48800 }, { "epoch": 1.7692391186367091, "grad_norm": 1.453963041305542, "learning_rate": 5.769926553059084e-06, "loss": 0.023371386528015136, "step": 48900 }, { "epoch": 1.7728571945439415, "grad_norm": 0.4487530291080475, "learning_rate": 5.67947465537827e-06, "loss": 0.024376935958862304, "step": 49000 }, { "epoch": 1.776475270451174, "grad_norm": 0.17453834414482117, "learning_rate": 5.589022757697457e-06, "loss": 0.027640838623046875, "step": 49100 }, { "epoch": 1.7800933463584065, "grad_norm": 0.24941837787628174, "learning_rate": 5.498570860016643e-06, "loss": 0.02413508415222168, "step": 49200 }, { "epoch": 1.7837114222656392, "grad_norm": 0.3545306622982025, "learning_rate": 5.40811896233583e-06, "loss": 0.025269722938537596, "step": 49300 }, { "epoch": 1.7873294981728716, "grad_norm": 0.21222856640815735, "learning_rate": 5.317667064655016e-06, "loss": 0.02443007230758667, "step": 49400 }, { "epoch": 1.7909475740801042, "grad_norm": 0.5955353379249573, "learning_rate": 5.227215166974203e-06, "loss": 0.027793030738830566, "step": 49500 }, { "epoch": 1.7945656499873368, "grad_norm": 1.0362492799758911, "learning_rate": 5.13676326929339e-06, "loss": 0.02576704978942871, "step": 49600 }, { "epoch": 1.7981837258945692, "grad_norm": 0.2961190938949585, "learning_rate": 5.046311371612577e-06, "loss": 0.027634003162384034, "step": 49700 }, { "epoch": 1.8018018018018018, "grad_norm": 0.2701990604400635, "learning_rate": 4.9558594739317635e-06, "loss": 0.026762216091156005, "step": 49800 }, { "epoch": 1.8054198777090344, "grad_norm": 0.3419773280620575, "learning_rate": 4.86540757625095e-06, "loss": 0.028021221160888673, "step": 49900 }, { "epoch": 1.8090379536162668, "grad_norm": 0.3847455680370331, "learning_rate": 4.7749556785701366e-06, "loss": 0.028925769329071045, "step": 50000 }, { "epoch": 1.8090379536162668, "eval_accuracy": 0.9891697152879526, "eval_f1": 0.8756019071264223, "eval_loss": 0.04578976333141327, "eval_precision": 0.8598627201292046, "eval_recall": 0.8919280271911596, "eval_runtime": 62.7397, "eval_samples_per_second": 318.777, "eval_steps_per_second": 8.862, "step": 50000 }, { "epoch": 1.8126560295234992, "grad_norm": 0.12807752192020416, "learning_rate": 4.684503780889323e-06, "loss": 0.024477434158325196, "step": 50100 }, { "epoch": 1.816274105430732, "grad_norm": 0.5839409828186035, "learning_rate": 4.59405188320851e-06, "loss": 0.029098427295684813, "step": 50200 }, { "epoch": 1.8198921813379645, "grad_norm": 0.1988334357738495, "learning_rate": 4.503599985527696e-06, "loss": 0.027852838039398194, "step": 50300 }, { "epoch": 1.8235102572451969, "grad_norm": 1.1250760555267334, "learning_rate": 4.413148087846884e-06, "loss": 0.025283007621765136, "step": 50400 }, { "epoch": 1.8271283331524295, "grad_norm": 0.3275587558746338, "learning_rate": 4.32269619016607e-06, "loss": 0.0253476619720459, "step": 50500 }, { "epoch": 1.8307464090596621, "grad_norm": 0.2422463297843933, "learning_rate": 4.232244292485257e-06, "loss": 0.025618109703063965, "step": 50600 }, { "epoch": 1.8343644849668945, "grad_norm": 0.6434578895568848, "learning_rate": 4.141792394804443e-06, "loss": 0.026464188098907472, "step": 50700 }, { "epoch": 1.8379825608741271, "grad_norm": 0.16934601962566376, "learning_rate": 4.05134049712363e-06, "loss": 0.025098586082458497, "step": 50800 }, { "epoch": 1.8416006367813598, "grad_norm": 0.21844395995140076, "learning_rate": 3.9608885994428164e-06, "loss": 0.023906781673431396, "step": 50900 }, { "epoch": 1.8452187126885922, "grad_norm": 0.2674906253814697, "learning_rate": 3.870436701762003e-06, "loss": 0.026905314922332765, "step": 51000 }, { "epoch": 1.8488367885958248, "grad_norm": 0.4344836473464966, "learning_rate": 3.77998480408119e-06, "loss": 0.026017348766326904, "step": 51100 }, { "epoch": 1.8524548645030574, "grad_norm": 0.5953734517097473, "learning_rate": 3.6895329064003765e-06, "loss": 0.02634397745132446, "step": 51200 }, { "epoch": 1.8560729404102898, "grad_norm": 0.14901016652584076, "learning_rate": 3.599081008719563e-06, "loss": 0.02832331895828247, "step": 51300 }, { "epoch": 1.8596910163175222, "grad_norm": 0.7816808223724365, "learning_rate": 3.5086291110387496e-06, "loss": 0.026141095161437988, "step": 51400 }, { "epoch": 1.8633090922247548, "grad_norm": 0.5734632015228271, "learning_rate": 3.418177213357936e-06, "loss": 0.02372182607650757, "step": 51500 }, { "epoch": 1.8669271681319874, "grad_norm": 0.9664448499679565, "learning_rate": 3.3277253156771227e-06, "loss": 0.024712865352630616, "step": 51600 }, { "epoch": 1.8705452440392198, "grad_norm": 0.390066921710968, "learning_rate": 3.2372734179963093e-06, "loss": 0.026522459983825682, "step": 51700 }, { "epoch": 1.8741633199464525, "grad_norm": 0.6472379565238953, "learning_rate": 3.146821520315496e-06, "loss": 0.024525246620178222, "step": 51800 }, { "epoch": 1.877781395853685, "grad_norm": 0.4985784888267517, "learning_rate": 3.056369622634683e-06, "loss": 0.02446552038192749, "step": 51900 }, { "epoch": 1.8813994717609175, "grad_norm": 0.22120802104473114, "learning_rate": 2.9659177249538694e-06, "loss": 0.025269200801849367, "step": 52000 }, { "epoch": 1.88501754766815, "grad_norm": 0.3579547703266144, "learning_rate": 2.8754658272730564e-06, "loss": 0.025214505195617676, "step": 52100 }, { "epoch": 1.8886356235753827, "grad_norm": 0.7338326573371887, "learning_rate": 2.785013929592243e-06, "loss": 0.02668466329574585, "step": 52200 }, { "epoch": 1.8922536994826151, "grad_norm": 0.3315567970275879, "learning_rate": 2.6945620319114295e-06, "loss": 0.030078487396240236, "step": 52300 }, { "epoch": 1.8958717753898475, "grad_norm": 0.35072797536849976, "learning_rate": 2.6041101342306165e-06, "loss": 0.02516920804977417, "step": 52400 }, { "epoch": 1.8994898512970804, "grad_norm": 0.43289047479629517, "learning_rate": 2.513658236549803e-06, "loss": 0.026839351654052733, "step": 52500 }, { "epoch": 1.8994898512970804, "eval_accuracy": 0.9891036746253344, "eval_f1": 0.876242095754291, "eval_loss": 0.045680414885282516, "eval_precision": 0.8623029055350209, "eval_recall": 0.89063934663896, "eval_runtime": 62.307, "eval_samples_per_second": 320.991, "eval_steps_per_second": 8.924, "step": 52500 }, { "epoch": 1.9031079272043128, "grad_norm": 0.4170491099357605, "learning_rate": 2.4232063388689896e-06, "loss": 0.027149310111999513, "step": 52600 }, { "epoch": 1.9067260031115452, "grad_norm": 0.33568137884140015, "learning_rate": 2.332754441188176e-06, "loss": 0.024306225776672363, "step": 52700 }, { "epoch": 1.9103440790187778, "grad_norm": 0.831928551197052, "learning_rate": 2.242302543507363e-06, "loss": 0.025090248584747316, "step": 52800 }, { "epoch": 1.9139621549260104, "grad_norm": 0.2261083424091339, "learning_rate": 2.1518506458265497e-06, "loss": 0.02992173671722412, "step": 52900 }, { "epoch": 1.9175802308332428, "grad_norm": 0.36420953273773193, "learning_rate": 2.0613987481457362e-06, "loss": 0.026374735832214356, "step": 53000 }, { "epoch": 1.9211983067404754, "grad_norm": 0.3849758207798004, "learning_rate": 1.970946850464923e-06, "loss": 0.024311881065368652, "step": 53100 }, { "epoch": 1.924816382647708, "grad_norm": 0.1625661551952362, "learning_rate": 1.8804949527841096e-06, "loss": 0.028159475326538085, "step": 53200 }, { "epoch": 1.9284344585549404, "grad_norm": 0.10745652765035629, "learning_rate": 1.7900430551032961e-06, "loss": 0.028279855251312255, "step": 53300 }, { "epoch": 1.932052534462173, "grad_norm": 0.3585937023162842, "learning_rate": 1.6995911574224827e-06, "loss": 0.025097475051879883, "step": 53400 }, { "epoch": 1.9356706103694057, "grad_norm": 0.3355402648448944, "learning_rate": 1.6091392597416697e-06, "loss": 0.0232719612121582, "step": 53500 }, { "epoch": 1.939288686276638, "grad_norm": 0.6301077604293823, "learning_rate": 1.5186873620608562e-06, "loss": 0.023976569175720216, "step": 53600 }, { "epoch": 1.9429067621838705, "grad_norm": 1.720951795578003, "learning_rate": 1.4282354643800428e-06, "loss": 0.027393877506256104, "step": 53700 }, { "epoch": 1.946524838091103, "grad_norm": 1.0819095373153687, "learning_rate": 1.3377835666992295e-06, "loss": 0.028527204990386964, "step": 53800 }, { "epoch": 1.9501429139983357, "grad_norm": 0.4960351884365082, "learning_rate": 1.247331669018416e-06, "loss": 0.023636491298675538, "step": 53900 }, { "epoch": 1.9537609899055681, "grad_norm": 0.6555366516113281, "learning_rate": 1.1568797713376029e-06, "loss": 0.02606668949127197, "step": 54000 }, { "epoch": 1.9573790658128007, "grad_norm": 0.17520390450954437, "learning_rate": 1.0664278736567894e-06, "loss": 0.024348812103271486, "step": 54100 }, { "epoch": 1.9609971417200334, "grad_norm": 0.2867375612258911, "learning_rate": 9.75975975975976e-07, "loss": 0.024609763622283936, "step": 54200 }, { "epoch": 1.9646152176272658, "grad_norm": 0.11981488019227982, "learning_rate": 8.855240782951626e-07, "loss": 0.02563744068145752, "step": 54300 }, { "epoch": 1.9682332935344984, "grad_norm": 0.25503483414649963, "learning_rate": 7.950721806143494e-07, "loss": 0.026204137802124022, "step": 54400 }, { "epoch": 1.971851369441731, "grad_norm": 0.23244522511959076, "learning_rate": 7.04620282933536e-07, "loss": 0.0256950044631958, "step": 54500 }, { "epoch": 1.9754694453489634, "grad_norm": 0.20025278627872467, "learning_rate": 6.141683852527226e-07, "loss": 0.025686397552490234, "step": 54600 }, { "epoch": 1.9790875212561958, "grad_norm": 0.4756115972995758, "learning_rate": 5.237164875719093e-07, "loss": 0.02578796148300171, "step": 54700 }, { "epoch": 1.9827055971634286, "grad_norm": 0.27420374751091003, "learning_rate": 4.3326458989109595e-07, "loss": 0.023311092853546142, "step": 54800 }, { "epoch": 1.986323673070661, "grad_norm": 0.19387075304985046, "learning_rate": 3.4281269221028255e-07, "loss": 0.02670889377593994, "step": 54900 }, { "epoch": 1.9899417489778934, "grad_norm": 0.726769745349884, "learning_rate": 2.523607945294692e-07, "loss": 0.03058022975921631, "step": 55000 }, { "epoch": 1.9899417489778934, "eval_accuracy": 0.9892449709267501, "eval_f1": 0.8768783517240833, "eval_loss": 0.0451948419213295, "eval_precision": 0.8626445559677067, "eval_recall": 0.8915897485462072, "eval_runtime": 62.8103, "eval_samples_per_second": 318.419, "eval_steps_per_second": 8.852, "step": 55000 }, { "epoch": 1.993559824885126, "grad_norm": 0.22022511065006256, "learning_rate": 1.6190889684865588e-07, "loss": 0.026084864139556886, "step": 55100 }, { "epoch": 1.9971779007923587, "grad_norm": 0.5684672594070435, "learning_rate": 7.145699916784254e-08, "loss": 0.027587156295776367, "step": 55200 }, { "epoch": 2.0, "step": 55278, "total_flos": 1.9407141577440333e+18, "train_loss": 0.04855243214227653, "train_runtime": 26239.1933, "train_samples_per_second": 303.363, "train_steps_per_second": 2.107 } ], "logging_steps": 100, "max_steps": 55278, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 2500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.9407141577440333e+18, "train_batch_size": 72, "trial_name": null, "trial_params": null }