polyglot-tagger-100L-4M / trainer_state.json
DerivedFunction's picture
End of training
1913959
{
"best_global_step": 55000,
"best_metric": 0.8768783517240833,
"best_model_checkpoint": "./lang-ner-xlmr/checkpoint-55000",
"epoch": 2.0,
"eval_steps": 2500,
"global_step": 55278,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0036180759072325336,
"grad_norm": 5.75448751449585,
"learning_rate": 4.9910452621295995e-05,
"loss": 4.179392395019531,
"step": 100
},
{
"epoch": 0.007236151814465067,
"grad_norm": 2.6520659923553467,
"learning_rate": 4.9820000723615186e-05,
"loss": 0.6058632278442383,
"step": 200
},
{
"epoch": 0.010854227721697602,
"grad_norm": 3.474226951599121,
"learning_rate": 4.972954882593437e-05,
"loss": 0.3028737449645996,
"step": 300
},
{
"epoch": 0.014472303628930134,
"grad_norm": 1.4948221445083618,
"learning_rate": 4.963909692825356e-05,
"loss": 0.18973339080810547,
"step": 400
},
{
"epoch": 0.01809037953616267,
"grad_norm": 1.389740228652954,
"learning_rate": 4.9548645030572745e-05,
"loss": 0.15398676872253417,
"step": 500
},
{
"epoch": 0.021708455443395204,
"grad_norm": 1.4510504007339478,
"learning_rate": 4.945819313289193e-05,
"loss": 0.13108017921447754,
"step": 600
},
{
"epoch": 0.025326531350627735,
"grad_norm": 1.4420865774154663,
"learning_rate": 4.936774123521112e-05,
"loss": 0.12688090324401854,
"step": 700
},
{
"epoch": 0.02894460725786027,
"grad_norm": 0.9447225332260132,
"learning_rate": 4.92772893375303e-05,
"loss": 0.11376466751098632,
"step": 800
},
{
"epoch": 0.0325626831650928,
"grad_norm": 1.9140123128890991,
"learning_rate": 4.9186837439849494e-05,
"loss": 0.10734249114990234,
"step": 900
},
{
"epoch": 0.03618075907232534,
"grad_norm": 1.2182528972625732,
"learning_rate": 4.909638554216868e-05,
"loss": 0.09950636863708497,
"step": 1000
},
{
"epoch": 0.03979883497955787,
"grad_norm": 1.5587440729141235,
"learning_rate": 4.900593364448786e-05,
"loss": 0.08896804809570312,
"step": 1100
},
{
"epoch": 0.04341691088679041,
"grad_norm": 2.021667242050171,
"learning_rate": 4.891548174680705e-05,
"loss": 0.09553884506225586,
"step": 1200
},
{
"epoch": 0.04703498679402294,
"grad_norm": 3.561288595199585,
"learning_rate": 4.882502984912624e-05,
"loss": 0.0916118335723877,
"step": 1300
},
{
"epoch": 0.05065306270125547,
"grad_norm": 2.239180088043213,
"learning_rate": 4.873457795144543e-05,
"loss": 0.08524966239929199,
"step": 1400
},
{
"epoch": 0.054271138608488007,
"grad_norm": 1.880850076675415,
"learning_rate": 4.864412605376461e-05,
"loss": 0.08407029151916504,
"step": 1500
},
{
"epoch": 0.05788921451572054,
"grad_norm": 2.365021228790283,
"learning_rate": 4.8553674156083796e-05,
"loss": 0.09083961486816407,
"step": 1600
},
{
"epoch": 0.061507290422953075,
"grad_norm": 1.8810335397720337,
"learning_rate": 4.8463222258402987e-05,
"loss": 0.0841958236694336,
"step": 1700
},
{
"epoch": 0.0651253663301856,
"grad_norm": 1.7592241764068604,
"learning_rate": 4.837277036072217e-05,
"loss": 0.08484026908874512,
"step": 1800
},
{
"epoch": 0.06874344223741814,
"grad_norm": 1.4012072086334229,
"learning_rate": 4.828231846304136e-05,
"loss": 0.07917069911956787,
"step": 1900
},
{
"epoch": 0.07236151814465068,
"grad_norm": 1.6757310628890991,
"learning_rate": 4.8191866565360545e-05,
"loss": 0.0806041145324707,
"step": 2000
},
{
"epoch": 0.0759795940518832,
"grad_norm": 0.6598155498504639,
"learning_rate": 4.810141466767973e-05,
"loss": 0.07851210594177246,
"step": 2100
},
{
"epoch": 0.07959766995911574,
"grad_norm": 1.5423673391342163,
"learning_rate": 4.801096276999892e-05,
"loss": 0.08287395477294922,
"step": 2200
},
{
"epoch": 0.08321574586634828,
"grad_norm": 0.4928501546382904,
"learning_rate": 4.7920510872318104e-05,
"loss": 0.07287377834320069,
"step": 2300
},
{
"epoch": 0.08683382177358082,
"grad_norm": 1.8151744604110718,
"learning_rate": 4.7830058974637295e-05,
"loss": 0.06640945911407471,
"step": 2400
},
{
"epoch": 0.09045189768081334,
"grad_norm": 1.1932594776153564,
"learning_rate": 4.773960707695648e-05,
"loss": 0.07295094966888428,
"step": 2500
},
{
"epoch": 0.09045189768081334,
"eval_accuracy": 0.975962734636331,
"eval_f1": 0.7717093579748968,
"eval_loss": 0.10806787014007568,
"eval_precision": 0.7241184528264584,
"eval_recall": 0.8259959084392468,
"eval_runtime": 117.8075,
"eval_samples_per_second": 169.768,
"eval_steps_per_second": 4.72,
"step": 2500
},
{
"epoch": 0.09406997358804588,
"grad_norm": 1.0983343124389648,
"learning_rate": 4.764915517927566e-05,
"loss": 0.06925168514251709,
"step": 2600
},
{
"epoch": 0.09768804949527841,
"grad_norm": 0.8816857933998108,
"learning_rate": 4.7558703281594854e-05,
"loss": 0.06958985328674316,
"step": 2700
},
{
"epoch": 0.10130612540251094,
"grad_norm": 0.8671173453330994,
"learning_rate": 4.746825138391404e-05,
"loss": 0.07468698024749756,
"step": 2800
},
{
"epoch": 0.10492420130974348,
"grad_norm": 0.27838993072509766,
"learning_rate": 4.737779948623322e-05,
"loss": 0.07403119087219238,
"step": 2900
},
{
"epoch": 0.10854227721697601,
"grad_norm": 0.4557673931121826,
"learning_rate": 4.728734758855241e-05,
"loss": 0.07262114524841308,
"step": 3000
},
{
"epoch": 0.11216035312420855,
"grad_norm": 0.8267778158187866,
"learning_rate": 4.71968956908716e-05,
"loss": 0.07057662963867188,
"step": 3100
},
{
"epoch": 0.11577842903144107,
"grad_norm": 1.401780128479004,
"learning_rate": 4.710644379319079e-05,
"loss": 0.06252509117126465,
"step": 3200
},
{
"epoch": 0.11939650493867361,
"grad_norm": 1.7423473596572876,
"learning_rate": 4.701599189550997e-05,
"loss": 0.06425057411193848,
"step": 3300
},
{
"epoch": 0.12301458084590615,
"grad_norm": 0.7547276616096497,
"learning_rate": 4.6925539997829156e-05,
"loss": 0.06438188076019287,
"step": 3400
},
{
"epoch": 0.12663265675313867,
"grad_norm": 0.4259902238845825,
"learning_rate": 4.6835088100148346e-05,
"loss": 0.0666530466079712,
"step": 3500
},
{
"epoch": 0.1302507326603712,
"grad_norm": 0.42786452174186707,
"learning_rate": 4.674463620246753e-05,
"loss": 0.05976760864257812,
"step": 3600
},
{
"epoch": 0.13386880856760375,
"grad_norm": 1.1275266408920288,
"learning_rate": 4.665418430478672e-05,
"loss": 0.06228343009948731,
"step": 3700
},
{
"epoch": 0.13748688447483629,
"grad_norm": 1.345894455909729,
"learning_rate": 4.6563732407105905e-05,
"loss": 0.0695729398727417,
"step": 3800
},
{
"epoch": 0.14110496038206882,
"grad_norm": 0.5640186071395874,
"learning_rate": 4.647328050942509e-05,
"loss": 0.06416056156158448,
"step": 3900
},
{
"epoch": 0.14472303628930136,
"grad_norm": 1.5667623281478882,
"learning_rate": 4.638282861174428e-05,
"loss": 0.06927279949188232,
"step": 4000
},
{
"epoch": 0.14834111219653387,
"grad_norm": 0.4014199674129486,
"learning_rate": 4.6292376714063464e-05,
"loss": 0.060500779151916505,
"step": 4100
},
{
"epoch": 0.1519591881037664,
"grad_norm": 0.8349173069000244,
"learning_rate": 4.6201924816382655e-05,
"loss": 0.05734441757202149,
"step": 4200
},
{
"epoch": 0.15557726401099894,
"grad_norm": 0.48946359753608704,
"learning_rate": 4.611147291870184e-05,
"loss": 0.0637766456604004,
"step": 4300
},
{
"epoch": 0.15919533991823148,
"grad_norm": 0.44791749119758606,
"learning_rate": 4.602102102102102e-05,
"loss": 0.0613397216796875,
"step": 4400
},
{
"epoch": 0.16281341582546402,
"grad_norm": 1.0726768970489502,
"learning_rate": 4.5930569123340214e-05,
"loss": 0.07220725536346435,
"step": 4500
},
{
"epoch": 0.16643149173269656,
"grad_norm": 0.48238834738731384,
"learning_rate": 4.58401172256594e-05,
"loss": 0.05229937076568603,
"step": 4600
},
{
"epoch": 0.1700495676399291,
"grad_norm": 0.4427547752857208,
"learning_rate": 4.574966532797859e-05,
"loss": 0.06027111530303955,
"step": 4700
},
{
"epoch": 0.17366764354716163,
"grad_norm": 0.44010627269744873,
"learning_rate": 4.565921343029777e-05,
"loss": 0.06117689609527588,
"step": 4800
},
{
"epoch": 0.17728571945439414,
"grad_norm": 0.26065585017204285,
"learning_rate": 4.5568761532616956e-05,
"loss": 0.060817084312438964,
"step": 4900
},
{
"epoch": 0.18090379536162668,
"grad_norm": 0.41624584794044495,
"learning_rate": 4.547830963493615e-05,
"loss": 0.06215104579925537,
"step": 5000
},
{
"epoch": 0.18090379536162668,
"eval_accuracy": 0.9724426137358435,
"eval_f1": 0.741559979115958,
"eval_loss": 0.12759321928024292,
"eval_precision": 0.6822080909213909,
"eval_recall": 0.8122231350376133,
"eval_runtime": 63.257,
"eval_samples_per_second": 316.17,
"eval_steps_per_second": 8.79,
"step": 5000
},
{
"epoch": 0.18452187126885922,
"grad_norm": 1.1262469291687012,
"learning_rate": 4.538785773725533e-05,
"loss": 0.056777148246765136,
"step": 5100
},
{
"epoch": 0.18813994717609175,
"grad_norm": 0.44265300035476685,
"learning_rate": 4.5297405839574515e-05,
"loss": 0.05986386775970459,
"step": 5200
},
{
"epoch": 0.1917580230833243,
"grad_norm": 0.5468171238899231,
"learning_rate": 4.5206953941893706e-05,
"loss": 0.05671721935272217,
"step": 5300
},
{
"epoch": 0.19537609899055683,
"grad_norm": 0.3858329653739929,
"learning_rate": 4.511650204421289e-05,
"loss": 0.05604006290435791,
"step": 5400
},
{
"epoch": 0.19899417489778937,
"grad_norm": 1.0813618898391724,
"learning_rate": 4.502605014653208e-05,
"loss": 0.05299887180328369,
"step": 5500
},
{
"epoch": 0.20261225080502188,
"grad_norm": 0.7834122776985168,
"learning_rate": 4.4935598248851265e-05,
"loss": 0.0669465970993042,
"step": 5600
},
{
"epoch": 0.2062303267122544,
"grad_norm": 0.8666114211082458,
"learning_rate": 4.484514635117045e-05,
"loss": 0.06568387985229492,
"step": 5700
},
{
"epoch": 0.20984840261948695,
"grad_norm": 0.7354055643081665,
"learning_rate": 4.475469445348964e-05,
"loss": 0.06354703903198242,
"step": 5800
},
{
"epoch": 0.2134664785267195,
"grad_norm": 0.3984626829624176,
"learning_rate": 4.4664242555808824e-05,
"loss": 0.05610593318939209,
"step": 5900
},
{
"epoch": 0.21708455443395203,
"grad_norm": 0.5307297110557556,
"learning_rate": 4.4573790658128014e-05,
"loss": 0.058310718536376954,
"step": 6000
},
{
"epoch": 0.22070263034118456,
"grad_norm": 0.23685064911842346,
"learning_rate": 4.44833387604472e-05,
"loss": 0.0474505615234375,
"step": 6100
},
{
"epoch": 0.2243207062484171,
"grad_norm": 0.6271052360534668,
"learning_rate": 4.439288686276638e-05,
"loss": 0.05871774673461914,
"step": 6200
},
{
"epoch": 0.22793878215564964,
"grad_norm": 0.6762889623641968,
"learning_rate": 4.430243496508557e-05,
"loss": 0.05517944812774658,
"step": 6300
},
{
"epoch": 0.23155685806288215,
"grad_norm": 0.9603418111801147,
"learning_rate": 4.421198306740476e-05,
"loss": 0.05483291625976563,
"step": 6400
},
{
"epoch": 0.23517493397011469,
"grad_norm": 0.6032853126525879,
"learning_rate": 4.412153116972395e-05,
"loss": 0.05903904914855957,
"step": 6500
},
{
"epoch": 0.23879300987734722,
"grad_norm": 0.40814077854156494,
"learning_rate": 4.403107927204313e-05,
"loss": 0.05642669677734375,
"step": 6600
},
{
"epoch": 0.24241108578457976,
"grad_norm": 0.5799020528793335,
"learning_rate": 4.3940627374362316e-05,
"loss": 0.055092153549194334,
"step": 6700
},
{
"epoch": 0.2460291616918123,
"grad_norm": 1.0993859767913818,
"learning_rate": 4.385017547668151e-05,
"loss": 0.054167227745056154,
"step": 6800
},
{
"epoch": 0.24964723759904484,
"grad_norm": 1.9801974296569824,
"learning_rate": 4.375972357900069e-05,
"loss": 0.057117671966552735,
"step": 6900
},
{
"epoch": 0.25326531350627735,
"grad_norm": 0.4046414792537689,
"learning_rate": 4.366927168131988e-05,
"loss": 0.054672832489013674,
"step": 7000
},
{
"epoch": 0.2568833894135099,
"grad_norm": 0.41931968927383423,
"learning_rate": 4.3578819783639066e-05,
"loss": 0.05668231964111328,
"step": 7100
},
{
"epoch": 0.2605014653207424,
"grad_norm": 0.5075521469116211,
"learning_rate": 4.348836788595825e-05,
"loss": 0.05900467395782471,
"step": 7200
},
{
"epoch": 0.264119541227975,
"grad_norm": 1.0615949630737305,
"learning_rate": 4.339791598827744e-05,
"loss": 0.060022168159484864,
"step": 7300
},
{
"epoch": 0.2677376171352075,
"grad_norm": 0.6786783337593079,
"learning_rate": 4.3307464090596625e-05,
"loss": 0.053788251876831054,
"step": 7400
},
{
"epoch": 0.27135569304244,
"grad_norm": 0.7518507838249207,
"learning_rate": 4.321701219291581e-05,
"loss": 0.05555037975311279,
"step": 7500
},
{
"epoch": 0.27135569304244,
"eval_accuracy": 0.9812751684036897,
"eval_f1": 0.8064070486745359,
"eval_loss": 0.08261791616678238,
"eval_precision": 0.7701385325808107,
"eval_recall": 0.8462604101225857,
"eval_runtime": 62.4561,
"eval_samples_per_second": 320.225,
"eval_steps_per_second": 8.902,
"step": 7500
},
{
"epoch": 0.27497376894967257,
"grad_norm": 0.8300764560699463,
"learning_rate": 4.3126560295235e-05,
"loss": 0.051460466384887694,
"step": 7600
},
{
"epoch": 0.2785918448569051,
"grad_norm": 1.0100982189178467,
"learning_rate": 4.303610839755418e-05,
"loss": 0.05660095691680908,
"step": 7700
},
{
"epoch": 0.28220992076413765,
"grad_norm": 0.5547285676002502,
"learning_rate": 4.2945656499873374e-05,
"loss": 0.05661679267883301,
"step": 7800
},
{
"epoch": 0.28582799667137015,
"grad_norm": 0.49258002638816833,
"learning_rate": 4.285520460219256e-05,
"loss": 0.04981692790985107,
"step": 7900
},
{
"epoch": 0.2894460725786027,
"grad_norm": 2.1518049240112305,
"learning_rate": 4.276475270451174e-05,
"loss": 0.04876615524291992,
"step": 8000
},
{
"epoch": 0.29306414848583523,
"grad_norm": 0.973175048828125,
"learning_rate": 4.267430080683093e-05,
"loss": 0.0555543327331543,
"step": 8100
},
{
"epoch": 0.29668222439306774,
"grad_norm": 2.2509944438934326,
"learning_rate": 4.258384890915012e-05,
"loss": 0.05133993148803711,
"step": 8200
},
{
"epoch": 0.3003003003003003,
"grad_norm": 1.938225507736206,
"learning_rate": 4.249339701146931e-05,
"loss": 0.05030904769897461,
"step": 8300
},
{
"epoch": 0.3039183762075328,
"grad_norm": 0.5656659007072449,
"learning_rate": 4.240294511378849e-05,
"loss": 0.05507714748382568,
"step": 8400
},
{
"epoch": 0.3075364521147654,
"grad_norm": 0.7741718888282776,
"learning_rate": 4.2312493216107676e-05,
"loss": 0.05459506511688232,
"step": 8500
},
{
"epoch": 0.3111545280219979,
"grad_norm": 0.547379195690155,
"learning_rate": 4.2222041318426867e-05,
"loss": 0.050563540458679196,
"step": 8600
},
{
"epoch": 0.31477260392923045,
"grad_norm": 0.5133877396583557,
"learning_rate": 4.213158942074605e-05,
"loss": 0.05503926753997803,
"step": 8700
},
{
"epoch": 0.31839067983646296,
"grad_norm": 0.4732136130332947,
"learning_rate": 4.204113752306524e-05,
"loss": 0.04883493423461914,
"step": 8800
},
{
"epoch": 0.32200875574369553,
"grad_norm": 0.7309387922286987,
"learning_rate": 4.1950685625384425e-05,
"loss": 0.0464065933227539,
"step": 8900
},
{
"epoch": 0.32562683165092804,
"grad_norm": 0.9696952104568481,
"learning_rate": 4.186023372770361e-05,
"loss": 0.05353004455566406,
"step": 9000
},
{
"epoch": 0.32924490755816055,
"grad_norm": 0.6350353956222534,
"learning_rate": 4.17697818300228e-05,
"loss": 0.05357151508331299,
"step": 9100
},
{
"epoch": 0.3328629834653931,
"grad_norm": 0.5927383899688721,
"learning_rate": 4.1679329932341984e-05,
"loss": 0.0496389102935791,
"step": 9200
},
{
"epoch": 0.3364810593726256,
"grad_norm": 0.555016040802002,
"learning_rate": 4.1588878034661175e-05,
"loss": 0.048683485984802245,
"step": 9300
},
{
"epoch": 0.3400991352798582,
"grad_norm": 0.33153098821640015,
"learning_rate": 4.149842613698036e-05,
"loss": 0.049552416801452635,
"step": 9400
},
{
"epoch": 0.3437172111870907,
"grad_norm": 0.7421421408653259,
"learning_rate": 4.140797423929954e-05,
"loss": 0.050444388389587404,
"step": 9500
},
{
"epoch": 0.34733528709432326,
"grad_norm": 0.7501067519187927,
"learning_rate": 4.1317522341618734e-05,
"loss": 0.05306045532226562,
"step": 9600
},
{
"epoch": 0.3509533630015558,
"grad_norm": 0.9074022173881531,
"learning_rate": 4.122707044393792e-05,
"loss": 0.04894153594970703,
"step": 9700
},
{
"epoch": 0.3545714389087883,
"grad_norm": 0.6082141399383545,
"learning_rate": 4.11366185462571e-05,
"loss": 0.05211612224578857,
"step": 9800
},
{
"epoch": 0.35818951481602085,
"grad_norm": 0.6638932824134827,
"learning_rate": 4.104616664857629e-05,
"loss": 0.05089833736419678,
"step": 9900
},
{
"epoch": 0.36180759072325336,
"grad_norm": 0.8939893841743469,
"learning_rate": 4.095571475089548e-05,
"loss": 0.05038036823272705,
"step": 10000
},
{
"epoch": 0.36180759072325336,
"eval_accuracy": 0.9821651815196725,
"eval_f1": 0.8226399325197526,
"eval_loss": 0.07629744708538055,
"eval_precision": 0.7916120576671035,
"eval_recall": 0.8561993588814253,
"eval_runtime": 62.5369,
"eval_samples_per_second": 319.811,
"eval_steps_per_second": 8.891,
"step": 10000
},
{
"epoch": 0.3654256666304859,
"grad_norm": 0.3776226043701172,
"learning_rate": 4.086526285321467e-05,
"loss": 0.05038893222808838,
"step": 10100
},
{
"epoch": 0.36904374253771843,
"grad_norm": 0.29007160663604736,
"learning_rate": 4.077481095553385e-05,
"loss": 0.05022284507751465,
"step": 10200
},
{
"epoch": 0.372661818444951,
"grad_norm": 0.2021007239818573,
"learning_rate": 4.0684359057853036e-05,
"loss": 0.049036202430725095,
"step": 10300
},
{
"epoch": 0.3762798943521835,
"grad_norm": 0.2728661894798279,
"learning_rate": 4.0593907160172226e-05,
"loss": 0.05147543907165528,
"step": 10400
},
{
"epoch": 0.379897970259416,
"grad_norm": 0.6017497181892395,
"learning_rate": 4.050345526249141e-05,
"loss": 0.052560653686523434,
"step": 10500
},
{
"epoch": 0.3835160461666486,
"grad_norm": 0.5500878095626831,
"learning_rate": 4.0413003364810594e-05,
"loss": 0.0445310115814209,
"step": 10600
},
{
"epoch": 0.3871341220738811,
"grad_norm": 1.6260461807250977,
"learning_rate": 4.0322551467129785e-05,
"loss": 0.04827467441558838,
"step": 10700
},
{
"epoch": 0.39075219798111366,
"grad_norm": 1.0797089338302612,
"learning_rate": 4.023209956944897e-05,
"loss": 0.0508196496963501,
"step": 10800
},
{
"epoch": 0.39437027388834617,
"grad_norm": 0.33457517623901367,
"learning_rate": 4.014164767176816e-05,
"loss": 0.04953153133392334,
"step": 10900
},
{
"epoch": 0.39798834979557873,
"grad_norm": 0.5582904815673828,
"learning_rate": 4.0051195774087344e-05,
"loss": 0.04928678035736084,
"step": 11000
},
{
"epoch": 0.40160642570281124,
"grad_norm": 0.21949921548366547,
"learning_rate": 3.996074387640653e-05,
"loss": 0.05192047119140625,
"step": 11100
},
{
"epoch": 0.40522450161004375,
"grad_norm": 0.7574787139892578,
"learning_rate": 3.987029197872572e-05,
"loss": 0.049414234161376955,
"step": 11200
},
{
"epoch": 0.4088425775172763,
"grad_norm": 1.8344570398330688,
"learning_rate": 3.97798400810449e-05,
"loss": 0.05043137550354004,
"step": 11300
},
{
"epoch": 0.4124606534245088,
"grad_norm": 0.618725061416626,
"learning_rate": 3.968938818336409e-05,
"loss": 0.04852957248687744,
"step": 11400
},
{
"epoch": 0.4160787293317414,
"grad_norm": 0.6515002250671387,
"learning_rate": 3.959893628568328e-05,
"loss": 0.051465816497802734,
"step": 11500
},
{
"epoch": 0.4196968052389739,
"grad_norm": 0.6772841215133667,
"learning_rate": 3.950848438800246e-05,
"loss": 0.05751809120178222,
"step": 11600
},
{
"epoch": 0.42331488114620647,
"grad_norm": 0.3189091384410858,
"learning_rate": 3.941803249032165e-05,
"loss": 0.047155842781066895,
"step": 11700
},
{
"epoch": 0.426932957053439,
"grad_norm": 0.2367490977048874,
"learning_rate": 3.9327580592640836e-05,
"loss": 0.043431487083435055,
"step": 11800
},
{
"epoch": 0.43055103296067154,
"grad_norm": 0.38205036520957947,
"learning_rate": 3.923712869496002e-05,
"loss": 0.04606367588043213,
"step": 11900
},
{
"epoch": 0.43416910886790405,
"grad_norm": 0.539438009262085,
"learning_rate": 3.914667679727921e-05,
"loss": 0.04509395122528076,
"step": 12000
},
{
"epoch": 0.43778718477513656,
"grad_norm": 1.1849830150604248,
"learning_rate": 3.9056224899598395e-05,
"loss": 0.045330324172973634,
"step": 12100
},
{
"epoch": 0.4414052606823691,
"grad_norm": 0.6970862746238708,
"learning_rate": 3.896577300191758e-05,
"loss": 0.04937627792358398,
"step": 12200
},
{
"epoch": 0.44502333658960164,
"grad_norm": 0.3145708739757538,
"learning_rate": 3.887532110423677e-05,
"loss": 0.04958348274230957,
"step": 12300
},
{
"epoch": 0.4486414124968342,
"grad_norm": 1.822594404220581,
"learning_rate": 3.8784869206555954e-05,
"loss": 0.05177441120147705,
"step": 12400
},
{
"epoch": 0.4522594884040667,
"grad_norm": 0.3980540335178375,
"learning_rate": 3.8694417308875145e-05,
"loss": 0.04803945064544678,
"step": 12500
},
{
"epoch": 0.4522594884040667,
"eval_accuracy": 0.9839402163062075,
"eval_f1": 0.8303541577576488,
"eval_loss": 0.07028726488351822,
"eval_precision": 0.8025429842491283,
"eval_recall": 0.8601620515794391,
"eval_runtime": 61.9616,
"eval_samples_per_second": 322.781,
"eval_steps_per_second": 8.973,
"step": 12500
},
{
"epoch": 0.4558775643112993,
"grad_norm": 2.3516685962677,
"learning_rate": 3.860396541119433e-05,
"loss": 0.04993240833282471,
"step": 12600
},
{
"epoch": 0.4594956402185318,
"grad_norm": 0.9219645857810974,
"learning_rate": 3.851351351351351e-05,
"loss": 0.04464954853057861,
"step": 12700
},
{
"epoch": 0.4631137161257643,
"grad_norm": 0.7087405920028687,
"learning_rate": 3.8423061615832704e-05,
"loss": 0.041380634307861326,
"step": 12800
},
{
"epoch": 0.46673179203299686,
"grad_norm": 0.3233760893344879,
"learning_rate": 3.833260971815189e-05,
"loss": 0.05234696865081787,
"step": 12900
},
{
"epoch": 0.47034986794022937,
"grad_norm": 0.31167057156562805,
"learning_rate": 3.824215782047107e-05,
"loss": 0.04531662464141846,
"step": 13000
},
{
"epoch": 0.47396794384746194,
"grad_norm": 0.9034203886985779,
"learning_rate": 3.815170592279026e-05,
"loss": 0.04655809879302979,
"step": 13100
},
{
"epoch": 0.47758601975469445,
"grad_norm": 0.3943072259426117,
"learning_rate": 3.8061254025109447e-05,
"loss": 0.0500339937210083,
"step": 13200
},
{
"epoch": 0.481204095661927,
"grad_norm": 0.9143586158752441,
"learning_rate": 3.797080212742864e-05,
"loss": 0.04793615818023682,
"step": 13300
},
{
"epoch": 0.4848221715691595,
"grad_norm": 1.2170947790145874,
"learning_rate": 3.788035022974782e-05,
"loss": 0.04486670970916748,
"step": 13400
},
{
"epoch": 0.48844024747639203,
"grad_norm": 0.4851992130279541,
"learning_rate": 3.7789898332067005e-05,
"loss": 0.0455370569229126,
"step": 13500
},
{
"epoch": 0.4920583233836246,
"grad_norm": 0.3209129273891449,
"learning_rate": 3.7699446434386196e-05,
"loss": 0.04612759113311768,
"step": 13600
},
{
"epoch": 0.4956763992908571,
"grad_norm": 0.6042996644973755,
"learning_rate": 3.760899453670538e-05,
"loss": 0.04637802600860596,
"step": 13700
},
{
"epoch": 0.49929447519808967,
"grad_norm": 0.422635018825531,
"learning_rate": 3.751854263902457e-05,
"loss": 0.050551199913024904,
"step": 13800
},
{
"epoch": 0.5029125511053222,
"grad_norm": 0.9524370431900024,
"learning_rate": 3.7428090741343755e-05,
"loss": 0.04804905891418457,
"step": 13900
},
{
"epoch": 0.5065306270125547,
"grad_norm": 0.8618633151054382,
"learning_rate": 3.733763884366294e-05,
"loss": 0.0453568172454834,
"step": 14000
},
{
"epoch": 0.5101487029197873,
"grad_norm": 0.8186506032943726,
"learning_rate": 3.724718694598213e-05,
"loss": 0.04810242176055908,
"step": 14100
},
{
"epoch": 0.5137667788270198,
"grad_norm": 0.4649534225463867,
"learning_rate": 3.7156735048301314e-05,
"loss": 0.041149930953979494,
"step": 14200
},
{
"epoch": 0.5173848547342523,
"grad_norm": 1.2224235534667969,
"learning_rate": 3.70662831506205e-05,
"loss": 0.0440573263168335,
"step": 14300
},
{
"epoch": 0.5210029306414848,
"grad_norm": 1.2368969917297363,
"learning_rate": 3.697583125293969e-05,
"loss": 0.045858840942382816,
"step": 14400
},
{
"epoch": 0.5246210065487174,
"grad_norm": 1.4308712482452393,
"learning_rate": 3.688537935525887e-05,
"loss": 0.0431610631942749,
"step": 14500
},
{
"epoch": 0.52823908245595,
"grad_norm": 1.7747290134429932,
"learning_rate": 3.6794927457578063e-05,
"loss": 0.04555936813354492,
"step": 14600
},
{
"epoch": 0.5318571583631825,
"grad_norm": 0.6626078486442566,
"learning_rate": 3.670447555989725e-05,
"loss": 0.04809264183044434,
"step": 14700
},
{
"epoch": 0.535475234270415,
"grad_norm": 0.49305254220962524,
"learning_rate": 3.661402366221643e-05,
"loss": 0.044796910285949704,
"step": 14800
},
{
"epoch": 0.5390933101776475,
"grad_norm": 0.5383502840995789,
"learning_rate": 3.652357176453562e-05,
"loss": 0.04197264194488525,
"step": 14900
},
{
"epoch": 0.54271138608488,
"grad_norm": 0.9339898824691772,
"learning_rate": 3.6433119866854806e-05,
"loss": 0.04077723026275635,
"step": 15000
},
{
"epoch": 0.54271138608488,
"eval_accuracy": 0.9837071542003397,
"eval_f1": 0.8344733667950663,
"eval_loss": 0.0750078409910202,
"eval_precision": 0.8071688796555565,
"eval_recall": 0.8636898145910855,
"eval_runtime": 62.6857,
"eval_samples_per_second": 319.052,
"eval_steps_per_second": 8.87,
"step": 15000
},
{
"epoch": 0.5463294619921126,
"grad_norm": 0.7692775130271912,
"learning_rate": 3.634266796917399e-05,
"loss": 0.04739581108093262,
"step": 15100
},
{
"epoch": 0.5499475378993451,
"grad_norm": 1.047753095626831,
"learning_rate": 3.625221607149318e-05,
"loss": 0.04375821590423584,
"step": 15200
},
{
"epoch": 0.5535656138065776,
"grad_norm": 0.9720122218132019,
"learning_rate": 3.6161764173812365e-05,
"loss": 0.0421258020401001,
"step": 15300
},
{
"epoch": 0.5571836897138102,
"grad_norm": 0.3475571274757385,
"learning_rate": 3.6071312276131556e-05,
"loss": 0.04756541728973389,
"step": 15400
},
{
"epoch": 0.5608017656210428,
"grad_norm": 0.8692478537559509,
"learning_rate": 3.598086037845074e-05,
"loss": 0.04661733150482178,
"step": 15500
},
{
"epoch": 0.5644198415282753,
"grad_norm": 1.0307046175003052,
"learning_rate": 3.5890408480769924e-05,
"loss": 0.044859604835510256,
"step": 15600
},
{
"epoch": 0.5680379174355078,
"grad_norm": 0.654683530330658,
"learning_rate": 3.5799956583089115e-05,
"loss": 0.04575653076171875,
"step": 15700
},
{
"epoch": 0.5716559933427403,
"grad_norm": 2.222489356994629,
"learning_rate": 3.57095046854083e-05,
"loss": 0.04321366310119629,
"step": 15800
},
{
"epoch": 0.5752740692499728,
"grad_norm": 1.1416321992874146,
"learning_rate": 3.561905278772748e-05,
"loss": 0.043632102012634275,
"step": 15900
},
{
"epoch": 0.5788921451572054,
"grad_norm": 1.0366028547286987,
"learning_rate": 3.5528600890046673e-05,
"loss": 0.04524300575256348,
"step": 16000
},
{
"epoch": 0.582510221064438,
"grad_norm": 0.7538347840309143,
"learning_rate": 3.543814899236586e-05,
"loss": 0.04251582622528076,
"step": 16100
},
{
"epoch": 0.5861282969716705,
"grad_norm": 0.2561816871166229,
"learning_rate": 3.534769709468505e-05,
"loss": 0.04683804512023926,
"step": 16200
},
{
"epoch": 0.589746372878903,
"grad_norm": 0.9383835196495056,
"learning_rate": 3.525724519700423e-05,
"loss": 0.0412297248840332,
"step": 16300
},
{
"epoch": 0.5933644487861355,
"grad_norm": 0.5518015623092651,
"learning_rate": 3.5166793299323416e-05,
"loss": 0.0455796480178833,
"step": 16400
},
{
"epoch": 0.5969825246933681,
"grad_norm": 0.5094241499900818,
"learning_rate": 3.507634140164261e-05,
"loss": 0.04736936569213867,
"step": 16500
},
{
"epoch": 0.6006006006006006,
"grad_norm": 0.2816466987133026,
"learning_rate": 3.498588950396179e-05,
"loss": 0.042105512619018556,
"step": 16600
},
{
"epoch": 0.6042186765078331,
"grad_norm": 0.4187323749065399,
"learning_rate": 3.489543760628098e-05,
"loss": 0.044366950988769534,
"step": 16700
},
{
"epoch": 0.6078367524150656,
"grad_norm": 0.28667891025543213,
"learning_rate": 3.4804985708600166e-05,
"loss": 0.03723037719726562,
"step": 16800
},
{
"epoch": 0.6114548283222982,
"grad_norm": 0.3902330994606018,
"learning_rate": 3.471453381091935e-05,
"loss": 0.042644596099853514,
"step": 16900
},
{
"epoch": 0.6150729042295308,
"grad_norm": 0.465101033449173,
"learning_rate": 3.462408191323854e-05,
"loss": 0.04263707160949707,
"step": 17000
},
{
"epoch": 0.6186909801367633,
"grad_norm": 1.1710171699523926,
"learning_rate": 3.4533630015557725e-05,
"loss": 0.044122686386108396,
"step": 17100
},
{
"epoch": 0.6223090560439958,
"grad_norm": 0.4717200696468353,
"learning_rate": 3.444317811787691e-05,
"loss": 0.042054853439331054,
"step": 17200
},
{
"epoch": 0.6259271319512283,
"grad_norm": 0.18602319061756134,
"learning_rate": 3.43527262201961e-05,
"loss": 0.03980276823043823,
"step": 17300
},
{
"epoch": 0.6295452078584609,
"grad_norm": 2.258084535598755,
"learning_rate": 3.4262274322515284e-05,
"loss": 0.043924779891967775,
"step": 17400
},
{
"epoch": 0.6331632837656934,
"grad_norm": 0.5568512082099915,
"learning_rate": 3.4171822424834474e-05,
"loss": 0.04432165145874024,
"step": 17500
},
{
"epoch": 0.6331632837656934,
"eval_accuracy": 0.9848981898715126,
"eval_f1": 0.8395063656955402,
"eval_loss": 0.06519697606563568,
"eval_precision": 0.8148625494685449,
"eval_recall": 0.8656872694469949,
"eval_runtime": 61.9341,
"eval_samples_per_second": 322.924,
"eval_steps_per_second": 8.977,
"step": 17500
},
{
"epoch": 0.6367813596729259,
"grad_norm": 0.302276611328125,
"learning_rate": 3.408137052715366e-05,
"loss": 0.04175849914550781,
"step": 17600
},
{
"epoch": 0.6403994355801584,
"grad_norm": 0.20687709748744965,
"learning_rate": 3.399091862947284e-05,
"loss": 0.042713408470153806,
"step": 17700
},
{
"epoch": 0.6440175114873911,
"grad_norm": 0.5285593271255493,
"learning_rate": 3.390046673179203e-05,
"loss": 0.041079201698303223,
"step": 17800
},
{
"epoch": 0.6476355873946236,
"grad_norm": 0.359951913356781,
"learning_rate": 3.381001483411122e-05,
"loss": 0.047190561294555664,
"step": 17900
},
{
"epoch": 0.6512536633018561,
"grad_norm": 0.5516379475593567,
"learning_rate": 3.371956293643041e-05,
"loss": 0.049062256813049314,
"step": 18000
},
{
"epoch": 0.6548717392090886,
"grad_norm": 0.2408919632434845,
"learning_rate": 3.362911103874959e-05,
"loss": 0.041800622940063474,
"step": 18100
},
{
"epoch": 0.6584898151163211,
"grad_norm": 0.5572479963302612,
"learning_rate": 3.3538659141068776e-05,
"loss": 0.04303212165832519,
"step": 18200
},
{
"epoch": 0.6621078910235537,
"grad_norm": 1.1610311269760132,
"learning_rate": 3.344820724338797e-05,
"loss": 0.04213200092315674,
"step": 18300
},
{
"epoch": 0.6657259669307862,
"grad_norm": 0.945891797542572,
"learning_rate": 3.335775534570715e-05,
"loss": 0.0419348955154419,
"step": 18400
},
{
"epoch": 0.6693440428380187,
"grad_norm": 0.40828007459640503,
"learning_rate": 3.326730344802634e-05,
"loss": 0.039156782627105716,
"step": 18500
},
{
"epoch": 0.6729621187452512,
"grad_norm": 2.0386905670166016,
"learning_rate": 3.3176851550345526e-05,
"loss": 0.042091598510742186,
"step": 18600
},
{
"epoch": 0.6765801946524838,
"grad_norm": 2.043750762939453,
"learning_rate": 3.308639965266471e-05,
"loss": 0.04341127872467041,
"step": 18700
},
{
"epoch": 0.6801982705597164,
"grad_norm": 1.103946328163147,
"learning_rate": 3.29959477549839e-05,
"loss": 0.04109795570373535,
"step": 18800
},
{
"epoch": 0.6838163464669489,
"grad_norm": 1.6356172561645508,
"learning_rate": 3.2905495857303084e-05,
"loss": 0.04152417182922363,
"step": 18900
},
{
"epoch": 0.6874344223741814,
"grad_norm": 0.5166067481040955,
"learning_rate": 3.2815043959622275e-05,
"loss": 0.03941408634185791,
"step": 19000
},
{
"epoch": 0.6910524982814139,
"grad_norm": 0.341791570186615,
"learning_rate": 3.272459206194146e-05,
"loss": 0.04008223056793213,
"step": 19100
},
{
"epoch": 0.6946705741886465,
"grad_norm": 0.2977801263332367,
"learning_rate": 3.263414016426064e-05,
"loss": 0.046716113090515134,
"step": 19200
},
{
"epoch": 0.698288650095879,
"grad_norm": 1.640602707862854,
"learning_rate": 3.2543688266579834e-05,
"loss": 0.043398504257202146,
"step": 19300
},
{
"epoch": 0.7019067260031115,
"grad_norm": 0.3690544366836548,
"learning_rate": 3.245323636889902e-05,
"loss": 0.03948961734771728,
"step": 19400
},
{
"epoch": 0.7055248019103441,
"grad_norm": 2.460749387741089,
"learning_rate": 3.236278447121821e-05,
"loss": 0.04185768127441406,
"step": 19500
},
{
"epoch": 0.7091428778175766,
"grad_norm": 0.5380750894546509,
"learning_rate": 3.227233257353739e-05,
"loss": 0.040400395393371584,
"step": 19600
},
{
"epoch": 0.7127609537248092,
"grad_norm": 0.44135797023773193,
"learning_rate": 3.218188067585658e-05,
"loss": 0.04154191017150879,
"step": 19700
},
{
"epoch": 0.7163790296320417,
"grad_norm": 0.5789956450462341,
"learning_rate": 3.209142877817577e-05,
"loss": 0.0443493127822876,
"step": 19800
},
{
"epoch": 0.7199971055392742,
"grad_norm": 0.32769912481307983,
"learning_rate": 3.200097688049495e-05,
"loss": 0.03976017475128174,
"step": 19900
},
{
"epoch": 0.7236151814465067,
"grad_norm": 0.6033921837806702,
"learning_rate": 3.1910524982814136e-05,
"loss": 0.04033390522003174,
"step": 20000
},
{
"epoch": 0.7236151814465067,
"eval_accuracy": 0.9859394821797719,
"eval_f1": 0.8507431047883741,
"eval_loss": 0.064690500497818,
"eval_precision": 0.8298106965631318,
"eval_recall": 0.8727589039771904,
"eval_runtime": 62.6781,
"eval_samples_per_second": 319.091,
"eval_steps_per_second": 8.871,
"step": 20000
},
{
"epoch": 0.7272332573537392,
"grad_norm": 0.21106982231140137,
"learning_rate": 3.1820073085133327e-05,
"loss": 0.0368848705291748,
"step": 20100
},
{
"epoch": 0.7308513332609718,
"grad_norm": 0.8279436826705933,
"learning_rate": 3.172962118745251e-05,
"loss": 0.040103306770324705,
"step": 20200
},
{
"epoch": 0.7344694091682044,
"grad_norm": 0.21994882822036743,
"learning_rate": 3.16391692897717e-05,
"loss": 0.037559795379638675,
"step": 20300
},
{
"epoch": 0.7380874850754369,
"grad_norm": 1.8766059875488281,
"learning_rate": 3.1548717392090885e-05,
"loss": 0.04059103012084961,
"step": 20400
},
{
"epoch": 0.7417055609826694,
"grad_norm": 0.6307962536811829,
"learning_rate": 3.145826549441007e-05,
"loss": 0.03980612993240357,
"step": 20500
},
{
"epoch": 0.745323636889902,
"grad_norm": 0.33936986327171326,
"learning_rate": 3.136781359672926e-05,
"loss": 0.043472270965576175,
"step": 20600
},
{
"epoch": 0.7489417127971345,
"grad_norm": 0.7730916738510132,
"learning_rate": 3.1277361699048444e-05,
"loss": 0.040565075874328616,
"step": 20700
},
{
"epoch": 0.752559788704367,
"grad_norm": 0.3246110677719116,
"learning_rate": 3.1186909801367635e-05,
"loss": 0.04017134189605713,
"step": 20800
},
{
"epoch": 0.7561778646115995,
"grad_norm": 0.8956949710845947,
"learning_rate": 3.109645790368682e-05,
"loss": 0.04045989513397217,
"step": 20900
},
{
"epoch": 0.759795940518832,
"grad_norm": 2.5085365772247314,
"learning_rate": 3.1006006006006e-05,
"loss": 0.0404241943359375,
"step": 21000
},
{
"epoch": 0.7634140164260647,
"grad_norm": 0.1668255627155304,
"learning_rate": 3.0915554108325194e-05,
"loss": 0.039553046226501465,
"step": 21100
},
{
"epoch": 0.7670320923332972,
"grad_norm": 0.39517688751220703,
"learning_rate": 3.082510221064438e-05,
"loss": 0.04120331764221191,
"step": 21200
},
{
"epoch": 0.7706501682405297,
"grad_norm": 0.6607240438461304,
"learning_rate": 3.073465031296357e-05,
"loss": 0.03997873306274414,
"step": 21300
},
{
"epoch": 0.7742682441477622,
"grad_norm": 0.44018736481666565,
"learning_rate": 3.064419841528275e-05,
"loss": 0.041695055961608884,
"step": 21400
},
{
"epoch": 0.7778863200549948,
"grad_norm": 0.15856041014194489,
"learning_rate": 3.055374651760194e-05,
"loss": 0.04077398300170899,
"step": 21500
},
{
"epoch": 0.7815043959622273,
"grad_norm": 0.39261528849601746,
"learning_rate": 3.0463294619921127e-05,
"loss": 0.041572155952453616,
"step": 21600
},
{
"epoch": 0.7851224718694598,
"grad_norm": 0.28265002369880676,
"learning_rate": 3.0372842722240315e-05,
"loss": 0.045727620124816896,
"step": 21700
},
{
"epoch": 0.7887405477766923,
"grad_norm": 0.6709412336349487,
"learning_rate": 3.0282390824559502e-05,
"loss": 0.04259458065032959,
"step": 21800
},
{
"epoch": 0.7923586236839248,
"grad_norm": 0.24202914535999298,
"learning_rate": 3.0191938926878686e-05,
"loss": 0.03839920997619629,
"step": 21900
},
{
"epoch": 0.7959766995911575,
"grad_norm": 0.4965508282184601,
"learning_rate": 3.0101487029197874e-05,
"loss": 0.03700316905975342,
"step": 22000
},
{
"epoch": 0.79959477549839,
"grad_norm": 0.596442461013794,
"learning_rate": 3.001103513151706e-05,
"loss": 0.04116812229156494,
"step": 22100
},
{
"epoch": 0.8032128514056225,
"grad_norm": 0.5273512601852417,
"learning_rate": 2.992058323383625e-05,
"loss": 0.04079509735107422,
"step": 22200
},
{
"epoch": 0.806830927312855,
"grad_norm": 0.24124516546726227,
"learning_rate": 2.9830131336155432e-05,
"loss": 0.03795903921127319,
"step": 22300
},
{
"epoch": 0.8104490032200875,
"grad_norm": 0.46343305706977844,
"learning_rate": 2.973967943847462e-05,
"loss": 0.038403522968292234,
"step": 22400
},
{
"epoch": 0.8140670791273201,
"grad_norm": 0.2311462014913559,
"learning_rate": 2.9649227540793807e-05,
"loss": 0.04132327079772949,
"step": 22500
},
{
"epoch": 0.8140670791273201,
"eval_accuracy": 0.9865150342336365,
"eval_f1": 0.8464219002621376,
"eval_loss": 0.05898759886622429,
"eval_precision": 0.8253309864544272,
"eval_recall": 0.8686190177032491,
"eval_runtime": 62.4843,
"eval_samples_per_second": 320.08,
"eval_steps_per_second": 8.898,
"step": 22500
},
{
"epoch": 0.8176851550345526,
"grad_norm": 0.6530361175537109,
"learning_rate": 2.9558775643112995e-05,
"loss": 0.04163932323455811,
"step": 22600
},
{
"epoch": 0.8213032309417851,
"grad_norm": 1.38533353805542,
"learning_rate": 2.946832374543218e-05,
"loss": 0.03626733779907226,
"step": 22700
},
{
"epoch": 0.8249213068490177,
"grad_norm": 1.6181460618972778,
"learning_rate": 2.9377871847751366e-05,
"loss": 0.03692409038543701,
"step": 22800
},
{
"epoch": 0.8285393827562503,
"grad_norm": 6.322599411010742,
"learning_rate": 2.9287419950070554e-05,
"loss": 0.03785946369171143,
"step": 22900
},
{
"epoch": 0.8321574586634828,
"grad_norm": 0.24266965687274933,
"learning_rate": 2.919696805238974e-05,
"loss": 0.03527719974517822,
"step": 23000
},
{
"epoch": 0.8357755345707153,
"grad_norm": 0.41426071524620056,
"learning_rate": 2.910651615470893e-05,
"loss": 0.0348510479927063,
"step": 23100
},
{
"epoch": 0.8393936104779478,
"grad_norm": 0.3566010892391205,
"learning_rate": 2.9016064257028112e-05,
"loss": 0.03639560461044312,
"step": 23200
},
{
"epoch": 0.8430116863851803,
"grad_norm": 0.14937593042850494,
"learning_rate": 2.89256123593473e-05,
"loss": 0.033641955852508544,
"step": 23300
},
{
"epoch": 0.8466297622924129,
"grad_norm": 0.5473237037658691,
"learning_rate": 2.8835160461666487e-05,
"loss": 0.03712946176528931,
"step": 23400
},
{
"epoch": 0.8502478381996454,
"grad_norm": 0.3679254949092865,
"learning_rate": 2.874470856398567e-05,
"loss": 0.03785475969314575,
"step": 23500
},
{
"epoch": 0.853865914106878,
"grad_norm": 0.20851418375968933,
"learning_rate": 2.8654256666304862e-05,
"loss": 0.04206960201263428,
"step": 23600
},
{
"epoch": 0.8574839900141105,
"grad_norm": 0.22139862179756165,
"learning_rate": 2.8563804768624046e-05,
"loss": 0.03989522218704224,
"step": 23700
},
{
"epoch": 0.8611020659213431,
"grad_norm": 0.14680643379688263,
"learning_rate": 2.8473352870943233e-05,
"loss": 0.03717276811599732,
"step": 23800
},
{
"epoch": 0.8647201418285756,
"grad_norm": 0.2279856950044632,
"learning_rate": 2.838290097326242e-05,
"loss": 0.039047441482543944,
"step": 23900
},
{
"epoch": 0.8683382177358081,
"grad_norm": 1.1088160276412964,
"learning_rate": 2.8292449075581605e-05,
"loss": 0.03408738613128662,
"step": 24000
},
{
"epoch": 0.8719562936430406,
"grad_norm": 0.8532550930976868,
"learning_rate": 2.8201997177900796e-05,
"loss": 0.036566758155822755,
"step": 24100
},
{
"epoch": 0.8755743695502731,
"grad_norm": 0.1683458536863327,
"learning_rate": 2.811154528021998e-05,
"loss": 0.0397763442993164,
"step": 24200
},
{
"epoch": 0.8791924454575057,
"grad_norm": 0.3468044102191925,
"learning_rate": 2.8021093382539164e-05,
"loss": 0.036167433261871336,
"step": 24300
},
{
"epoch": 0.8828105213647383,
"grad_norm": 1.5043731927871704,
"learning_rate": 2.7930641484858354e-05,
"loss": 0.04083109855651856,
"step": 24400
},
{
"epoch": 0.8864285972719708,
"grad_norm": 2.7504560947418213,
"learning_rate": 2.784018958717754e-05,
"loss": 0.039477238655090334,
"step": 24500
},
{
"epoch": 0.8900466731792033,
"grad_norm": 0.27413201332092285,
"learning_rate": 2.7749737689496726e-05,
"loss": 0.03859598875045776,
"step": 24600
},
{
"epoch": 0.8936647490864358,
"grad_norm": 0.4622710645198822,
"learning_rate": 2.7659285791815913e-05,
"loss": 0.03455983877182007,
"step": 24700
},
{
"epoch": 0.8972828249936684,
"grad_norm": 1.0147453546524048,
"learning_rate": 2.7568833894135097e-05,
"loss": 0.03525468587875366,
"step": 24800
},
{
"epoch": 0.9009009009009009,
"grad_norm": 0.34606319665908813,
"learning_rate": 2.7478381996454288e-05,
"loss": 0.03580186367034912,
"step": 24900
},
{
"epoch": 0.9045189768081334,
"grad_norm": 0.3202800750732422,
"learning_rate": 2.7387930098773472e-05,
"loss": 0.03665663719177246,
"step": 25000
},
{
"epoch": 0.9045189768081334,
"eval_accuracy": 0.986656714492393,
"eval_f1": 0.8509657594381035,
"eval_loss": 0.05820872634649277,
"eval_precision": 0.8288109453496006,
"eval_recall": 0.8743375376536349,
"eval_runtime": 62.5862,
"eval_samples_per_second": 319.559,
"eval_steps_per_second": 8.884,
"step": 25000
},
{
"epoch": 0.9081370527153659,
"grad_norm": 0.557600736618042,
"learning_rate": 2.7297478201092656e-05,
"loss": 0.03967963457107544,
"step": 25100
},
{
"epoch": 0.9117551286225986,
"grad_norm": 0.4092039465904236,
"learning_rate": 2.7207026303411847e-05,
"loss": 0.03797311782836914,
"step": 25200
},
{
"epoch": 0.9153732045298311,
"grad_norm": 0.40534520149230957,
"learning_rate": 2.711657440573103e-05,
"loss": 0.036147847175598144,
"step": 25300
},
{
"epoch": 0.9189912804370636,
"grad_norm": 0.4325968623161316,
"learning_rate": 2.702612250805022e-05,
"loss": 0.03767855882644653,
"step": 25400
},
{
"epoch": 0.9226093563442961,
"grad_norm": 0.25961676239967346,
"learning_rate": 2.6935670610369406e-05,
"loss": 0.03738126039505005,
"step": 25500
},
{
"epoch": 0.9262274322515286,
"grad_norm": 0.2495643049478531,
"learning_rate": 2.684521871268859e-05,
"loss": 0.03809333562850952,
"step": 25600
},
{
"epoch": 0.9298455081587612,
"grad_norm": 0.20810630917549133,
"learning_rate": 2.675476681500778e-05,
"loss": 0.03803467035293579,
"step": 25700
},
{
"epoch": 0.9334635840659937,
"grad_norm": 0.3630845844745636,
"learning_rate": 2.6664314917326964e-05,
"loss": 0.04232705593109131,
"step": 25800
},
{
"epoch": 0.9370816599732262,
"grad_norm": 0.6230679154396057,
"learning_rate": 2.6573863019646155e-05,
"loss": 0.03966914892196655,
"step": 25900
},
{
"epoch": 0.9406997358804587,
"grad_norm": 0.6846088767051697,
"learning_rate": 2.648341112196534e-05,
"loss": 0.03988933086395264,
"step": 26000
},
{
"epoch": 0.9443178117876913,
"grad_norm": 0.29151585698127747,
"learning_rate": 2.6392959224284523e-05,
"loss": 0.036113507747650146,
"step": 26100
},
{
"epoch": 0.9479358876949239,
"grad_norm": 0.3652597963809967,
"learning_rate": 2.6302507326603714e-05,
"loss": 0.03595402717590332,
"step": 26200
},
{
"epoch": 0.9515539636021564,
"grad_norm": 0.3763394355773926,
"learning_rate": 2.6212055428922898e-05,
"loss": 0.03632761478424072,
"step": 26300
},
{
"epoch": 0.9551720395093889,
"grad_norm": 0.16137683391571045,
"learning_rate": 2.612160353124209e-05,
"loss": 0.03010902166366577,
"step": 26400
},
{
"epoch": 0.9587901154166214,
"grad_norm": 0.5310078859329224,
"learning_rate": 2.6031151633561273e-05,
"loss": 0.034855997562408446,
"step": 26500
},
{
"epoch": 0.962408191323854,
"grad_norm": 0.4904273748397827,
"learning_rate": 2.5940699735880457e-05,
"loss": 0.03756725311279297,
"step": 26600
},
{
"epoch": 0.9660262672310865,
"grad_norm": 0.7692480087280273,
"learning_rate": 2.5850247838199648e-05,
"loss": 0.03645958185195923,
"step": 26700
},
{
"epoch": 0.969644343138319,
"grad_norm": 0.45624640583992004,
"learning_rate": 2.5759795940518832e-05,
"loss": 0.037951292991638186,
"step": 26800
},
{
"epoch": 0.9732624190455516,
"grad_norm": 0.41989752650260925,
"learning_rate": 2.5669344042838023e-05,
"loss": 0.03396618366241455,
"step": 26900
},
{
"epoch": 0.9768804949527841,
"grad_norm": 0.5218580961227417,
"learning_rate": 2.5578892145157207e-05,
"loss": 0.034535303115844726,
"step": 27000
},
{
"epoch": 0.9804985708600167,
"grad_norm": 0.24635274708271027,
"learning_rate": 2.548844024747639e-05,
"loss": 0.034599866867065426,
"step": 27100
},
{
"epoch": 0.9841166467672492,
"grad_norm": 0.8805984258651733,
"learning_rate": 2.539798834979558e-05,
"loss": 0.0382379937171936,
"step": 27200
},
{
"epoch": 0.9877347226744817,
"grad_norm": 0.4743868410587311,
"learning_rate": 2.5307536452114765e-05,
"loss": 0.03450409173965454,
"step": 27300
},
{
"epoch": 0.9913527985817142,
"grad_norm": 0.4024532735347748,
"learning_rate": 2.521708455443395e-05,
"loss": 0.032371597290039064,
"step": 27400
},
{
"epoch": 0.9949708744889468,
"grad_norm": 1.2098551988601685,
"learning_rate": 2.512663265675314e-05,
"loss": 0.03947657585144043,
"step": 27500
},
{
"epoch": 0.9949708744889468,
"eval_accuracy": 0.9862055646169487,
"eval_f1": 0.8529879572824359,
"eval_loss": 0.05825402960181236,
"eval_precision": 0.8304042715484363,
"eval_recall": 0.8768343562235217,
"eval_runtime": 62.2283,
"eval_samples_per_second": 321.397,
"eval_steps_per_second": 8.935,
"step": 27500
},
{
"epoch": 0.9985889503961793,
"grad_norm": 0.3243059515953064,
"learning_rate": 2.5036180759072324e-05,
"loss": 0.03721761703491211,
"step": 27600
},
{
"epoch": 1.0022070263034117,
"grad_norm": 0.5898327231407166,
"learning_rate": 2.494572886139151e-05,
"loss": 0.03310096025466919,
"step": 27700
},
{
"epoch": 1.0058251022106444,
"grad_norm": 0.30443838238716125,
"learning_rate": 2.48552769637107e-05,
"loss": 0.033098301887512206,
"step": 27800
},
{
"epoch": 1.009443178117877,
"grad_norm": 0.7985163331031799,
"learning_rate": 2.4764825066029886e-05,
"loss": 0.031821844577789304,
"step": 27900
},
{
"epoch": 1.0130612540251094,
"grad_norm": 0.6274137496948242,
"learning_rate": 2.4674373168349074e-05,
"loss": 0.03217078447341919,
"step": 28000
},
{
"epoch": 1.016679329932342,
"grad_norm": 0.744652271270752,
"learning_rate": 2.4583921270668258e-05,
"loss": 0.030337939262390135,
"step": 28100
},
{
"epoch": 1.0202974058395746,
"grad_norm": 0.20680102705955505,
"learning_rate": 2.4493469372987445e-05,
"loss": 0.03135863780975342,
"step": 28200
},
{
"epoch": 1.023915481746807,
"grad_norm": 0.5819505453109741,
"learning_rate": 2.4403017475306633e-05,
"loss": 0.030997350215911865,
"step": 28300
},
{
"epoch": 1.0275335576540396,
"grad_norm": 0.8105890154838562,
"learning_rate": 2.431256557762582e-05,
"loss": 0.029717042446136474,
"step": 28400
},
{
"epoch": 1.031151633561272,
"grad_norm": 0.4248642325401306,
"learning_rate": 2.4222113679945007e-05,
"loss": 0.02956360101699829,
"step": 28500
},
{
"epoch": 1.0347697094685047,
"grad_norm": 0.17442703247070312,
"learning_rate": 2.413166178226419e-05,
"loss": 0.03415003776550293,
"step": 28600
},
{
"epoch": 1.0383877853757373,
"grad_norm": 0.3765491843223572,
"learning_rate": 2.404120988458338e-05,
"loss": 0.03359386682510376,
"step": 28700
},
{
"epoch": 1.0420058612829697,
"grad_norm": 0.2846165895462036,
"learning_rate": 2.3950757986902566e-05,
"loss": 0.03219552993774414,
"step": 28800
},
{
"epoch": 1.0456239371902023,
"grad_norm": 0.6828330755233765,
"learning_rate": 2.3860306089221754e-05,
"loss": 0.028468940258026123,
"step": 28900
},
{
"epoch": 1.0492420130974347,
"grad_norm": 0.24457824230194092,
"learning_rate": 2.3769854191540938e-05,
"loss": 0.03526209592819214,
"step": 29000
},
{
"epoch": 1.0528600890046673,
"grad_norm": 0.4728795886039734,
"learning_rate": 2.3679402293860125e-05,
"loss": 0.027564334869384765,
"step": 29100
},
{
"epoch": 1.0564781649119,
"grad_norm": 0.34912073612213135,
"learning_rate": 2.3588950396179312e-05,
"loss": 0.03199338912963867,
"step": 29200
},
{
"epoch": 1.0600962408191323,
"grad_norm": 0.7076539993286133,
"learning_rate": 2.34984984984985e-05,
"loss": 0.02838871717453003,
"step": 29300
},
{
"epoch": 1.063714316726365,
"grad_norm": 0.22086426615715027,
"learning_rate": 2.3408046600817687e-05,
"loss": 0.03132739543914795,
"step": 29400
},
{
"epoch": 1.0673323926335974,
"grad_norm": 0.4026763439178467,
"learning_rate": 2.331759470313687e-05,
"loss": 0.030288333892822265,
"step": 29500
},
{
"epoch": 1.07095046854083,
"grad_norm": 0.6986600160598755,
"learning_rate": 2.322714280545606e-05,
"loss": 0.027701468467712403,
"step": 29600
},
{
"epoch": 1.0745685444480626,
"grad_norm": 0.3440704047679901,
"learning_rate": 2.3136690907775246e-05,
"loss": 0.03199631690979004,
"step": 29700
},
{
"epoch": 1.078186620355295,
"grad_norm": 0.5154510736465454,
"learning_rate": 2.3046239010094434e-05,
"loss": 0.03085195779800415,
"step": 29800
},
{
"epoch": 1.0818046962625276,
"grad_norm": 1.2285401821136475,
"learning_rate": 2.295578711241362e-05,
"loss": 0.031190474033355713,
"step": 29900
},
{
"epoch": 1.08542277216976,
"grad_norm": 0.3479061722755432,
"learning_rate": 2.2865335214732805e-05,
"loss": 0.03375990152359009,
"step": 30000
},
{
"epoch": 1.08542277216976,
"eval_accuracy": 0.9868820974514447,
"eval_f1": 0.8562118190241375,
"eval_loss": 0.05674006789922714,
"eval_precision": 0.8352508617387974,
"eval_recall": 0.8782519048309412,
"eval_runtime": 63.2356,
"eval_samples_per_second": 316.278,
"eval_steps_per_second": 8.793,
"step": 30000
},
{
"epoch": 1.0890408480769926,
"grad_norm": 0.18956594169139862,
"learning_rate": 2.2774883317051992e-05,
"loss": 0.027218008041381837,
"step": 30100
},
{
"epoch": 1.0926589239842253,
"grad_norm": 0.24030227959156036,
"learning_rate": 2.268443141937118e-05,
"loss": 0.03073176145553589,
"step": 30200
},
{
"epoch": 1.0962769998914577,
"grad_norm": 0.1687329262495041,
"learning_rate": 2.2593979521690367e-05,
"loss": 0.033424663543701175,
"step": 30300
},
{
"epoch": 1.0998950757986903,
"grad_norm": 1.2173426151275635,
"learning_rate": 2.250352762400955e-05,
"loss": 0.03079766035079956,
"step": 30400
},
{
"epoch": 1.103513151705923,
"grad_norm": 0.35310184955596924,
"learning_rate": 2.241307572632874e-05,
"loss": 0.03289975881576538,
"step": 30500
},
{
"epoch": 1.1071312276131553,
"grad_norm": 0.14718961715698242,
"learning_rate": 2.2322623828647926e-05,
"loss": 0.03266577005386353,
"step": 30600
},
{
"epoch": 1.110749303520388,
"grad_norm": 0.29442161321640015,
"learning_rate": 2.2232171930967113e-05,
"loss": 0.02883612871170044,
"step": 30700
},
{
"epoch": 1.1143673794276203,
"grad_norm": 0.36244460940361023,
"learning_rate": 2.21417200332863e-05,
"loss": 0.030666334629058836,
"step": 30800
},
{
"epoch": 1.117985455334853,
"grad_norm": 0.2421630471944809,
"learning_rate": 2.2051268135605485e-05,
"loss": 0.02931546211242676,
"step": 30900
},
{
"epoch": 1.1216035312420856,
"grad_norm": 0.5055842995643616,
"learning_rate": 2.1960816237924672e-05,
"loss": 0.030934171676635744,
"step": 31000
},
{
"epoch": 1.125221607149318,
"grad_norm": 0.27207571268081665,
"learning_rate": 2.187036434024386e-05,
"loss": 0.03155987024307251,
"step": 31100
},
{
"epoch": 1.1288396830565506,
"grad_norm": 0.5190430879592896,
"learning_rate": 2.1779912442563047e-05,
"loss": 0.030766298770904543,
"step": 31200
},
{
"epoch": 1.132457758963783,
"grad_norm": 0.5578451156616211,
"learning_rate": 2.168946054488223e-05,
"loss": 0.030352199077606203,
"step": 31300
},
{
"epoch": 1.1360758348710156,
"grad_norm": 0.775244951248169,
"learning_rate": 2.159900864720142e-05,
"loss": 0.027431459426879884,
"step": 31400
},
{
"epoch": 1.1396939107782482,
"grad_norm": 0.17452310025691986,
"learning_rate": 2.1508556749520606e-05,
"loss": 0.02899331569671631,
"step": 31500
},
{
"epoch": 1.1433119866854806,
"grad_norm": 1.0152820348739624,
"learning_rate": 2.1418104851839793e-05,
"loss": 0.02969914197921753,
"step": 31600
},
{
"epoch": 1.1469300625927132,
"grad_norm": 0.21474546194076538,
"learning_rate": 2.132765295415898e-05,
"loss": 0.03098618268966675,
"step": 31700
},
{
"epoch": 1.1505481384999456,
"grad_norm": 0.27076786756515503,
"learning_rate": 2.1237201056478165e-05,
"loss": 0.026145567893981935,
"step": 31800
},
{
"epoch": 1.1541662144071783,
"grad_norm": 0.20778276026248932,
"learning_rate": 2.1146749158797352e-05,
"loss": 0.030465993881225586,
"step": 31900
},
{
"epoch": 1.1577842903144109,
"grad_norm": 0.2573922276496887,
"learning_rate": 2.105629726111654e-05,
"loss": 0.031988742351531985,
"step": 32000
},
{
"epoch": 1.1614023662216433,
"grad_norm": 0.33712247014045715,
"learning_rate": 2.0965845363435727e-05,
"loss": 0.031969892978668216,
"step": 32100
},
{
"epoch": 1.165020442128876,
"grad_norm": 0.5677493214607239,
"learning_rate": 2.0875393465754914e-05,
"loss": 0.02892348051071167,
"step": 32200
},
{
"epoch": 1.1686385180361083,
"grad_norm": 0.19627009332180023,
"learning_rate": 2.0784941568074098e-05,
"loss": 0.02890573740005493,
"step": 32300
},
{
"epoch": 1.172256593943341,
"grad_norm": 0.2041957825422287,
"learning_rate": 2.0694489670393286e-05,
"loss": 0.02606424331665039,
"step": 32400
},
{
"epoch": 1.1758746698505735,
"grad_norm": 0.36798298358917236,
"learning_rate": 2.0604037772712473e-05,
"loss": 0.029083385467529296,
"step": 32500
},
{
"epoch": 1.1758746698505735,
"eval_accuracy": 0.9877625116339074,
"eval_f1": 0.8611236096967975,
"eval_loss": 0.05370509624481201,
"eval_precision": 0.8443082257515248,
"eval_recall": 0.8786224004896986,
"eval_runtime": 62.1854,
"eval_samples_per_second": 321.619,
"eval_steps_per_second": 8.941,
"step": 32500
},
{
"epoch": 1.179492745757806,
"grad_norm": 0.2152443379163742,
"learning_rate": 2.051358587503166e-05,
"loss": 0.028284170627593995,
"step": 32600
},
{
"epoch": 1.1831108216650386,
"grad_norm": 0.2933087646961212,
"learning_rate": 2.0423133977350845e-05,
"loss": 0.034238841533660885,
"step": 32700
},
{
"epoch": 1.1867288975722712,
"grad_norm": 0.36995938420295715,
"learning_rate": 2.0332682079670032e-05,
"loss": 0.03170938491821289,
"step": 32800
},
{
"epoch": 1.1903469734795036,
"grad_norm": 0.7478405833244324,
"learning_rate": 2.024223018198922e-05,
"loss": 0.029751029014587402,
"step": 32900
},
{
"epoch": 1.1939650493867362,
"grad_norm": 0.44457152485847473,
"learning_rate": 2.0151778284308407e-05,
"loss": 0.02949444770812988,
"step": 33000
},
{
"epoch": 1.1975831252939686,
"grad_norm": 0.4324032664299011,
"learning_rate": 2.0061326386627594e-05,
"loss": 0.030652081966400145,
"step": 33100
},
{
"epoch": 1.2012012012012012,
"grad_norm": 1.3409758806228638,
"learning_rate": 1.9970874488946778e-05,
"loss": 0.02934673547744751,
"step": 33200
},
{
"epoch": 1.2048192771084336,
"grad_norm": 0.3867700397968292,
"learning_rate": 1.9880422591265966e-05,
"loss": 0.02774231195449829,
"step": 33300
},
{
"epoch": 1.2084373530156662,
"grad_norm": 0.1256304383277893,
"learning_rate": 1.9789970693585153e-05,
"loss": 0.030440127849578856,
"step": 33400
},
{
"epoch": 1.2120554289228989,
"grad_norm": 0.574845552444458,
"learning_rate": 1.969951879590434e-05,
"loss": 0.030182530879974367,
"step": 33500
},
{
"epoch": 1.2156735048301313,
"grad_norm": 0.501304566860199,
"learning_rate": 1.9609066898223528e-05,
"loss": 0.03053757667541504,
"step": 33600
},
{
"epoch": 1.2192915807373639,
"grad_norm": 0.1869884878396988,
"learning_rate": 1.9518615000542712e-05,
"loss": 0.02801114559173584,
"step": 33700
},
{
"epoch": 1.2229096566445965,
"grad_norm": 0.44489210844039917,
"learning_rate": 1.94281631028619e-05,
"loss": 0.02709296464920044,
"step": 33800
},
{
"epoch": 1.226527732551829,
"grad_norm": 0.2928631007671356,
"learning_rate": 1.9337711205181087e-05,
"loss": 0.033639376163482664,
"step": 33900
},
{
"epoch": 1.2301458084590615,
"grad_norm": 0.2070285826921463,
"learning_rate": 1.9247259307500274e-05,
"loss": 0.03141526222229004,
"step": 34000
},
{
"epoch": 1.233763884366294,
"grad_norm": 0.4693046510219574,
"learning_rate": 1.9156807409819458e-05,
"loss": 0.029341881275177003,
"step": 34100
},
{
"epoch": 1.2373819602735265,
"grad_norm": 0.187980055809021,
"learning_rate": 1.9066355512138645e-05,
"loss": 0.033849341869354246,
"step": 34200
},
{
"epoch": 1.2410000361807592,
"grad_norm": 0.7411011457443237,
"learning_rate": 1.8975903614457833e-05,
"loss": 0.027842617034912108,
"step": 34300
},
{
"epoch": 1.2446181120879916,
"grad_norm": 0.4449065327644348,
"learning_rate": 1.888545171677702e-05,
"loss": 0.031680150032043455,
"step": 34400
},
{
"epoch": 1.2482361879952242,
"grad_norm": 0.7327262759208679,
"learning_rate": 1.8794999819096208e-05,
"loss": 0.02651881694793701,
"step": 34500
},
{
"epoch": 1.2518542639024566,
"grad_norm": 0.41838428378105164,
"learning_rate": 1.870454792141539e-05,
"loss": 0.032553679943084717,
"step": 34600
},
{
"epoch": 1.2554723398096892,
"grad_norm": 0.3279021382331848,
"learning_rate": 1.861409602373458e-05,
"loss": 0.02605849742889404,
"step": 34700
},
{
"epoch": 1.2590904157169218,
"grad_norm": 0.23042799532413483,
"learning_rate": 1.8523644126053766e-05,
"loss": 0.02857684135437012,
"step": 34800
},
{
"epoch": 1.2627084916241542,
"grad_norm": 0.14856815338134766,
"learning_rate": 1.8433192228372954e-05,
"loss": 0.030806925296783447,
"step": 34900
},
{
"epoch": 1.2663265675313868,
"grad_norm": 0.48354101181030273,
"learning_rate": 1.8342740330692138e-05,
"loss": 0.030027375221252442,
"step": 35000
},
{
"epoch": 1.2663265675313868,
"eval_accuracy": 0.9877813255436068,
"eval_f1": 0.8615969042346098,
"eval_loss": 0.05214959755539894,
"eval_precision": 0.8434818838343312,
"eval_recall": 0.8805070957972906,
"eval_runtime": 62.9193,
"eval_samples_per_second": 317.867,
"eval_steps_per_second": 8.837,
"step": 35000
},
{
"epoch": 1.2699446434386195,
"grad_norm": 0.13334180414676666,
"learning_rate": 1.8252288433011325e-05,
"loss": 0.027159340381622314,
"step": 35100
},
{
"epoch": 1.2735627193458519,
"grad_norm": 0.7394197583198547,
"learning_rate": 1.8161836535330513e-05,
"loss": 0.03075253963470459,
"step": 35200
},
{
"epoch": 1.2771807952530845,
"grad_norm": 0.2870982587337494,
"learning_rate": 1.80713846376497e-05,
"loss": 0.030658049583435057,
"step": 35300
},
{
"epoch": 1.2807988711603169,
"grad_norm": 0.9762187004089355,
"learning_rate": 1.7980932739968887e-05,
"loss": 0.031029996871948243,
"step": 35400
},
{
"epoch": 1.2844169470675495,
"grad_norm": 0.44388410449028015,
"learning_rate": 1.789048084228807e-05,
"loss": 0.03051720142364502,
"step": 35500
},
{
"epoch": 1.288035022974782,
"grad_norm": 0.7785915732383728,
"learning_rate": 1.780002894460726e-05,
"loss": 0.02536651849746704,
"step": 35600
},
{
"epoch": 1.2916530988820145,
"grad_norm": 0.1702079176902771,
"learning_rate": 1.7709577046926446e-05,
"loss": 0.030427489280700683,
"step": 35700
},
{
"epoch": 1.2952711747892471,
"grad_norm": 0.4802360236644745,
"learning_rate": 1.7619125149245634e-05,
"loss": 0.03049640417098999,
"step": 35800
},
{
"epoch": 1.2988892506964795,
"grad_norm": 0.40013861656188965,
"learning_rate": 1.752867325156482e-05,
"loss": 0.030040171146392822,
"step": 35900
},
{
"epoch": 1.3025073266037122,
"grad_norm": 0.34162065386772156,
"learning_rate": 1.7438221353884005e-05,
"loss": 0.031596968173980715,
"step": 36000
},
{
"epoch": 1.3061254025109448,
"grad_norm": 0.34575241804122925,
"learning_rate": 1.7347769456203193e-05,
"loss": 0.03362387895584106,
"step": 36100
},
{
"epoch": 1.3097434784181772,
"grad_norm": 0.4098789691925049,
"learning_rate": 1.725731755852238e-05,
"loss": 0.027526361942291258,
"step": 36200
},
{
"epoch": 1.3133615543254098,
"grad_norm": 0.35067400336265564,
"learning_rate": 1.7166865660841567e-05,
"loss": 0.02835451364517212,
"step": 36300
},
{
"epoch": 1.3169796302326424,
"grad_norm": 0.1685800403356552,
"learning_rate": 1.707641376316075e-05,
"loss": 0.028891866207122804,
"step": 36400
},
{
"epoch": 1.3205977061398748,
"grad_norm": 0.32651832699775696,
"learning_rate": 1.698596186547994e-05,
"loss": 0.026589181423187256,
"step": 36500
},
{
"epoch": 1.3242157820471072,
"grad_norm": 0.3153350353240967,
"learning_rate": 1.6895509967799126e-05,
"loss": 0.031108696460723877,
"step": 36600
},
{
"epoch": 1.3278338579543398,
"grad_norm": 0.4476368725299835,
"learning_rate": 1.6805058070118314e-05,
"loss": 0.030014872550964355,
"step": 36700
},
{
"epoch": 1.3314519338615725,
"grad_norm": 0.1972656548023224,
"learning_rate": 1.67146061724375e-05,
"loss": 0.029410278797149657,
"step": 36800
},
{
"epoch": 1.3350700097688049,
"grad_norm": 0.7246927618980408,
"learning_rate": 1.6624154274756685e-05,
"loss": 0.03080254316329956,
"step": 36900
},
{
"epoch": 1.3386880856760375,
"grad_norm": 0.3670811355113983,
"learning_rate": 1.6533702377075872e-05,
"loss": 0.02861506223678589,
"step": 37000
},
{
"epoch": 1.34230616158327,
"grad_norm": 0.22275477647781372,
"learning_rate": 1.644325047939506e-05,
"loss": 0.0255238938331604,
"step": 37100
},
{
"epoch": 1.3459242374905025,
"grad_norm": 0.3272339999675751,
"learning_rate": 1.6352798581714247e-05,
"loss": 0.028979463577270506,
"step": 37200
},
{
"epoch": 1.3495423133977351,
"grad_norm": 0.5552839040756226,
"learning_rate": 1.626234668403343e-05,
"loss": 0.028283817768096922,
"step": 37300
},
{
"epoch": 1.3531603893049677,
"grad_norm": 0.33792686462402344,
"learning_rate": 1.617189478635262e-05,
"loss": 0.03224069595336914,
"step": 37400
},
{
"epoch": 1.3567784652122001,
"grad_norm": 1.0481899976730347,
"learning_rate": 1.6081442888671806e-05,
"loss": 0.02690179109573364,
"step": 37500
},
{
"epoch": 1.3567784652122001,
"eval_accuracy": 0.9878715555186957,
"eval_f1": 0.8683487542236398,
"eval_loss": 0.05309534817934036,
"eval_precision": 0.851476257567078,
"eval_recall": 0.8859034456096264,
"eval_runtime": 62.1337,
"eval_samples_per_second": 321.887,
"eval_steps_per_second": 8.948,
"step": 37500
},
{
"epoch": 1.3603965411194328,
"grad_norm": 0.20256465673446655,
"learning_rate": 1.5990990990990993e-05,
"loss": 0.027432169914245606,
"step": 37600
},
{
"epoch": 1.3640146170266652,
"grad_norm": 0.3237811028957367,
"learning_rate": 1.590053909331018e-05,
"loss": 0.030464730262756347,
"step": 37700
},
{
"epoch": 1.3676326929338978,
"grad_norm": 0.31953930854797363,
"learning_rate": 1.5810087195629365e-05,
"loss": 0.027273902893066405,
"step": 37800
},
{
"epoch": 1.3712507688411302,
"grad_norm": 0.38057664036750793,
"learning_rate": 1.5719635297948552e-05,
"loss": 0.0259963059425354,
"step": 37900
},
{
"epoch": 1.3748688447483628,
"grad_norm": 0.6410769820213318,
"learning_rate": 1.562918340026774e-05,
"loss": 0.031271641254425046,
"step": 38000
},
{
"epoch": 1.3784869206555954,
"grad_norm": 0.8330540060997009,
"learning_rate": 1.5538731502586927e-05,
"loss": 0.02934875011444092,
"step": 38100
},
{
"epoch": 1.3821049965628278,
"grad_norm": 1.1677355766296387,
"learning_rate": 1.5448279604906114e-05,
"loss": 0.02971445083618164,
"step": 38200
},
{
"epoch": 1.3857230724700604,
"grad_norm": 0.4667145609855652,
"learning_rate": 1.53578277072253e-05,
"loss": 0.02775926113128662,
"step": 38300
},
{
"epoch": 1.389341148377293,
"grad_norm": 0.4434032440185547,
"learning_rate": 1.5267375809544486e-05,
"loss": 0.026833882331848146,
"step": 38400
},
{
"epoch": 1.3929592242845255,
"grad_norm": 0.2564474642276764,
"learning_rate": 1.5176923911863672e-05,
"loss": 0.02980698347091675,
"step": 38500
},
{
"epoch": 1.396577300191758,
"grad_norm": 0.43813377618789673,
"learning_rate": 1.5086472014182859e-05,
"loss": 0.028636832237243653,
"step": 38600
},
{
"epoch": 1.4001953760989905,
"grad_norm": 0.928669810295105,
"learning_rate": 1.4996020116502043e-05,
"loss": 0.02784595012664795,
"step": 38700
},
{
"epoch": 1.403813452006223,
"grad_norm": 1.0816453695297241,
"learning_rate": 1.490556821882123e-05,
"loss": 0.031624915599823,
"step": 38800
},
{
"epoch": 1.4074315279134555,
"grad_norm": 1.6790099143981934,
"learning_rate": 1.4815116321140418e-05,
"loss": 0.02443223476409912,
"step": 38900
},
{
"epoch": 1.4110496038206881,
"grad_norm": 0.39879387617111206,
"learning_rate": 1.4724664423459605e-05,
"loss": 0.02753525972366333,
"step": 39000
},
{
"epoch": 1.4146676797279207,
"grad_norm": 0.6372315883636475,
"learning_rate": 1.4634212525778793e-05,
"loss": 0.02859419822692871,
"step": 39100
},
{
"epoch": 1.4182857556351531,
"grad_norm": 0.4357219934463501,
"learning_rate": 1.4543760628097977e-05,
"loss": 0.02929396152496338,
"step": 39200
},
{
"epoch": 1.4219038315423858,
"grad_norm": 0.8673311471939087,
"learning_rate": 1.4453308730417164e-05,
"loss": 0.027733774185180665,
"step": 39300
},
{
"epoch": 1.4255219074496184,
"grad_norm": 0.31178081035614014,
"learning_rate": 1.4362856832736351e-05,
"loss": 0.029380517005920412,
"step": 39400
},
{
"epoch": 1.4291399833568508,
"grad_norm": 0.9862114191055298,
"learning_rate": 1.4272404935055539e-05,
"loss": 0.02801510810852051,
"step": 39500
},
{
"epoch": 1.4327580592640834,
"grad_norm": 0.3226287364959717,
"learning_rate": 1.4181953037374726e-05,
"loss": 0.02600921630859375,
"step": 39600
},
{
"epoch": 1.436376135171316,
"grad_norm": 1.0932515859603882,
"learning_rate": 1.409150113969391e-05,
"loss": 0.027818257808685302,
"step": 39700
},
{
"epoch": 1.4399942110785484,
"grad_norm": 0.4064158797264099,
"learning_rate": 1.4001049242013098e-05,
"loss": 0.030927972793579103,
"step": 39800
},
{
"epoch": 1.443612286985781,
"grad_norm": 0.6574753522872925,
"learning_rate": 1.3910597344332285e-05,
"loss": 0.028972697257995606,
"step": 39900
},
{
"epoch": 1.4472303628930134,
"grad_norm": 0.24314340949058533,
"learning_rate": 1.3820145446651472e-05,
"loss": 0.029455924034118654,
"step": 40000
},
{
"epoch": 1.4472303628930134,
"eval_accuracy": 0.9882140454666924,
"eval_f1": 0.8711891990109102,
"eval_loss": 0.05167451128363609,
"eval_precision": 0.8548262069393198,
"eval_recall": 0.8881908535897808,
"eval_runtime": 62.5842,
"eval_samples_per_second": 319.57,
"eval_steps_per_second": 8.884,
"step": 40000
},
{
"epoch": 1.450848438800246,
"grad_norm": 0.28122034668922424,
"learning_rate": 1.3729693548970656e-05,
"loss": 0.029821088314056398,
"step": 40100
},
{
"epoch": 1.4544665147074785,
"grad_norm": 0.45019853115081787,
"learning_rate": 1.3639241651289844e-05,
"loss": 0.027684724330902098,
"step": 40200
},
{
"epoch": 1.458084590614711,
"grad_norm": 0.6584652066230774,
"learning_rate": 1.3548789753609031e-05,
"loss": 0.026381478309631348,
"step": 40300
},
{
"epoch": 1.4617026665219437,
"grad_norm": 2.1259236335754395,
"learning_rate": 1.3458337855928219e-05,
"loss": 0.02868267774581909,
"step": 40400
},
{
"epoch": 1.465320742429176,
"grad_norm": 0.9566027522087097,
"learning_rate": 1.3367885958247406e-05,
"loss": 0.027485811710357667,
"step": 40500
},
{
"epoch": 1.4689388183364087,
"grad_norm": 0.9289085268974304,
"learning_rate": 1.327743406056659e-05,
"loss": 0.030939743518829346,
"step": 40600
},
{
"epoch": 1.4725568942436413,
"grad_norm": 0.6716954112052917,
"learning_rate": 1.3186982162885778e-05,
"loss": 0.026526257991790772,
"step": 40700
},
{
"epoch": 1.4761749701508737,
"grad_norm": 0.26186442375183105,
"learning_rate": 1.3096530265204965e-05,
"loss": 0.027606160640716554,
"step": 40800
},
{
"epoch": 1.4797930460581064,
"grad_norm": 0.5962882041931152,
"learning_rate": 1.3006078367524152e-05,
"loss": 0.03013371229171753,
"step": 40900
},
{
"epoch": 1.4834111219653388,
"grad_norm": 0.28622719645500183,
"learning_rate": 1.2915626469843336e-05,
"loss": 0.026788763999938965,
"step": 41000
},
{
"epoch": 1.4870291978725714,
"grad_norm": 0.2146042138338089,
"learning_rate": 1.2825174572162524e-05,
"loss": 0.026920742988586426,
"step": 41100
},
{
"epoch": 1.4906472737798038,
"grad_norm": 0.30449753999710083,
"learning_rate": 1.2734722674481711e-05,
"loss": 0.028757052421569826,
"step": 41200
},
{
"epoch": 1.4942653496870364,
"grad_norm": 0.11651007831096649,
"learning_rate": 1.2644270776800899e-05,
"loss": 0.029123516082763673,
"step": 41300
},
{
"epoch": 1.497883425594269,
"grad_norm": 3.1146299839019775,
"learning_rate": 1.2553818879120086e-05,
"loss": 0.028435797691345216,
"step": 41400
},
{
"epoch": 1.5015015015015014,
"grad_norm": 0.2705380916595459,
"learning_rate": 1.2463366981439272e-05,
"loss": 0.03229628562927246,
"step": 41500
},
{
"epoch": 1.505119577408734,
"grad_norm": 0.5641364455223083,
"learning_rate": 1.2372915083758457e-05,
"loss": 0.02912388801574707,
"step": 41600
},
{
"epoch": 1.5087376533159667,
"grad_norm": 0.4726872444152832,
"learning_rate": 1.2282463186077645e-05,
"loss": 0.028761823177337647,
"step": 41700
},
{
"epoch": 1.512355729223199,
"grad_norm": 2.5604758262634277,
"learning_rate": 1.2192011288396832e-05,
"loss": 0.02635906219482422,
"step": 41800
},
{
"epoch": 1.5159738051304317,
"grad_norm": 0.3598019778728485,
"learning_rate": 1.2101559390716018e-05,
"loss": 0.026577677726745606,
"step": 41900
},
{
"epoch": 1.5195918810376643,
"grad_norm": 0.31742435693740845,
"learning_rate": 1.2011107493035205e-05,
"loss": 0.02479785919189453,
"step": 42000
},
{
"epoch": 1.5232099569448967,
"grad_norm": 1.0102005004882812,
"learning_rate": 1.1920655595354391e-05,
"loss": 0.028279991149902345,
"step": 42100
},
{
"epoch": 1.526828032852129,
"grad_norm": 0.4230172038078308,
"learning_rate": 1.1830203697673578e-05,
"loss": 0.027808871269226074,
"step": 42200
},
{
"epoch": 1.530446108759362,
"grad_norm": 0.35221824049949646,
"learning_rate": 1.1739751799992764e-05,
"loss": 0.02666907787322998,
"step": 42300
},
{
"epoch": 1.5340641846665943,
"grad_norm": 0.37867021560668945,
"learning_rate": 1.1649299902311952e-05,
"loss": 0.028237838745117188,
"step": 42400
},
{
"epoch": 1.5376822605738267,
"grad_norm": 1.1692699193954468,
"learning_rate": 1.1558848004631137e-05,
"loss": 0.027906298637390137,
"step": 42500
},
{
"epoch": 1.5376822605738267,
"eval_accuracy": 0.9883852904406909,
"eval_f1": 0.8713540843735187,
"eval_loss": 0.048916082829236984,
"eval_precision": 0.8549944962093611,
"eval_recall": 0.8883519386588057,
"eval_runtime": 62.2278,
"eval_samples_per_second": 321.4,
"eval_steps_per_second": 8.935,
"step": 42500
},
{
"epoch": 1.5413003364810594,
"grad_norm": 0.273318886756897,
"learning_rate": 1.1468396106950325e-05,
"loss": 0.031116650104522706,
"step": 42600
},
{
"epoch": 1.544918412388292,
"grad_norm": 0.48087653517723083,
"learning_rate": 1.1377944209269512e-05,
"loss": 0.026544408798217775,
"step": 42700
},
{
"epoch": 1.5485364882955244,
"grad_norm": 0.7746985554695129,
"learning_rate": 1.1287492311588698e-05,
"loss": 0.026500403881072998,
"step": 42800
},
{
"epoch": 1.552154564202757,
"grad_norm": 0.1549975574016571,
"learning_rate": 1.1197040413907885e-05,
"loss": 0.026587300300598145,
"step": 42900
},
{
"epoch": 1.5557726401099896,
"grad_norm": 1.972495198249817,
"learning_rate": 1.110658851622707e-05,
"loss": 0.029258613586425782,
"step": 43000
},
{
"epoch": 1.559390716017222,
"grad_norm": 0.6956634521484375,
"learning_rate": 1.1016136618546258e-05,
"loss": 0.026978886127471922,
"step": 43100
},
{
"epoch": 1.5630087919244544,
"grad_norm": 0.16629020869731903,
"learning_rate": 1.0925684720865444e-05,
"loss": 0.03226327657699585,
"step": 43200
},
{
"epoch": 1.5666268678316873,
"grad_norm": 0.37136366963386536,
"learning_rate": 1.0835232823184631e-05,
"loss": 0.028375396728515623,
"step": 43300
},
{
"epoch": 1.5702449437389197,
"grad_norm": 0.2561453580856323,
"learning_rate": 1.0744780925503819e-05,
"loss": 0.027073240280151366,
"step": 43400
},
{
"epoch": 1.573863019646152,
"grad_norm": 0.42630210518836975,
"learning_rate": 1.0654329027823004e-05,
"loss": 0.026704757213592528,
"step": 43500
},
{
"epoch": 1.5774810955533847,
"grad_norm": 0.4090301990509033,
"learning_rate": 1.0563877130142192e-05,
"loss": 0.02855618476867676,
"step": 43600
},
{
"epoch": 1.5810991714606173,
"grad_norm": 0.24324025213718414,
"learning_rate": 1.0473425232461378e-05,
"loss": 0.025224699974060058,
"step": 43700
},
{
"epoch": 1.5847172473678497,
"grad_norm": 0.4220653772354126,
"learning_rate": 1.0382973334780565e-05,
"loss": 0.029145328998565673,
"step": 43800
},
{
"epoch": 1.5883353232750823,
"grad_norm": 0.4333362281322479,
"learning_rate": 1.029252143709975e-05,
"loss": 0.025774214267730713,
"step": 43900
},
{
"epoch": 1.591953399182315,
"grad_norm": 0.15959997475147247,
"learning_rate": 1.0202069539418938e-05,
"loss": 0.026988446712493896,
"step": 44000
},
{
"epoch": 1.5955714750895473,
"grad_norm": 0.2643369138240814,
"learning_rate": 1.0111617641738126e-05,
"loss": 0.0258998441696167,
"step": 44100
},
{
"epoch": 1.59918955099678,
"grad_norm": 0.8528566360473633,
"learning_rate": 1.0021165744057311e-05,
"loss": 0.02746238708496094,
"step": 44200
},
{
"epoch": 1.6028076269040126,
"grad_norm": 0.999005138874054,
"learning_rate": 9.930713846376499e-06,
"loss": 0.028600902557373048,
"step": 44300
},
{
"epoch": 1.606425702811245,
"grad_norm": 0.6834824681282043,
"learning_rate": 9.840261948695684e-06,
"loss": 0.028850455284118653,
"step": 44400
},
{
"epoch": 1.6100437787184774,
"grad_norm": 0.3043724298477173,
"learning_rate": 9.749810051014872e-06,
"loss": 0.0262698769569397,
"step": 44500
},
{
"epoch": 1.6136618546257102,
"grad_norm": 0.8399735689163208,
"learning_rate": 9.659358153334057e-06,
"loss": 0.02827603816986084,
"step": 44600
},
{
"epoch": 1.6172799305329426,
"grad_norm": 0.9611870646476746,
"learning_rate": 9.568906255653245e-06,
"loss": 0.02755260467529297,
"step": 44700
},
{
"epoch": 1.620898006440175,
"grad_norm": 0.23461508750915527,
"learning_rate": 9.47845435797243e-06,
"loss": 0.0311501145362854,
"step": 44800
},
{
"epoch": 1.6245160823474076,
"grad_norm": 2.882127046585083,
"learning_rate": 9.388002460291618e-06,
"loss": 0.029984614849090575,
"step": 44900
},
{
"epoch": 1.6281341582546403,
"grad_norm": 0.32786279916763306,
"learning_rate": 9.297550562610804e-06,
"loss": 0.028132951259613036,
"step": 45000
},
{
"epoch": 1.6281341582546403,
"eval_accuracy": 0.9886770980197016,
"eval_f1": 0.8710388819944511,
"eval_loss": 0.047967541962862015,
"eval_precision": 0.855134094859697,
"eval_recall": 0.887546513313681,
"eval_runtime": 62.7107,
"eval_samples_per_second": 318.925,
"eval_steps_per_second": 8.866,
"step": 45000
},
{
"epoch": 1.6317522341618727,
"grad_norm": 1.6328613758087158,
"learning_rate": 9.207098664929991e-06,
"loss": 0.028099877834320067,
"step": 45100
},
{
"epoch": 1.6353703100691053,
"grad_norm": 1.1488419771194458,
"learning_rate": 9.116646767249177e-06,
"loss": 0.025699715614318847,
"step": 45200
},
{
"epoch": 1.638988385976338,
"grad_norm": 1.2527875900268555,
"learning_rate": 9.026194869568364e-06,
"loss": 0.02980081081390381,
"step": 45300
},
{
"epoch": 1.6426064618835703,
"grad_norm": 0.25659850239753723,
"learning_rate": 8.93574297188755e-06,
"loss": 0.02849080801010132,
"step": 45400
},
{
"epoch": 1.6462245377908027,
"grad_norm": 0.24858339130878448,
"learning_rate": 8.845291074206737e-06,
"loss": 0.02909574508666992,
"step": 45500
},
{
"epoch": 1.6498426136980355,
"grad_norm": 0.35774946212768555,
"learning_rate": 8.754839176525923e-06,
"loss": 0.028034112453460693,
"step": 45600
},
{
"epoch": 1.653460689605268,
"grad_norm": 0.28512680530548096,
"learning_rate": 8.66438727884511e-06,
"loss": 0.029735114574432373,
"step": 45700
},
{
"epoch": 1.6570787655125003,
"grad_norm": 0.12049074470996857,
"learning_rate": 8.573935381164296e-06,
"loss": 0.03128848075866699,
"step": 45800
},
{
"epoch": 1.660696841419733,
"grad_norm": 0.5767261385917664,
"learning_rate": 8.483483483483484e-06,
"loss": 0.02762418031692505,
"step": 45900
},
{
"epoch": 1.6643149173269656,
"grad_norm": 0.12318204343318939,
"learning_rate": 8.39303158580267e-06,
"loss": 0.026004743576049805,
"step": 46000
},
{
"epoch": 1.667932993234198,
"grad_norm": 0.311279833316803,
"learning_rate": 8.302579688121857e-06,
"loss": 0.024458692073822022,
"step": 46100
},
{
"epoch": 1.6715510691414306,
"grad_norm": 0.2753770351409912,
"learning_rate": 8.212127790441042e-06,
"loss": 0.026231870651245118,
"step": 46200
},
{
"epoch": 1.6751691450486632,
"grad_norm": 0.8421895503997803,
"learning_rate": 8.12167589276023e-06,
"loss": 0.02496417760848999,
"step": 46300
},
{
"epoch": 1.6787872209558956,
"grad_norm": 0.6493498086929321,
"learning_rate": 8.031223995079417e-06,
"loss": 0.026742682456970215,
"step": 46400
},
{
"epoch": 1.6824052968631282,
"grad_norm": 0.3029896318912506,
"learning_rate": 7.940772097398603e-06,
"loss": 0.024227650165557862,
"step": 46500
},
{
"epoch": 1.6860233727703609,
"grad_norm": 0.34622183442115784,
"learning_rate": 7.85032019971779e-06,
"loss": 0.025336668491363526,
"step": 46600
},
{
"epoch": 1.6896414486775932,
"grad_norm": 1.1520912647247314,
"learning_rate": 7.759868302036976e-06,
"loss": 0.028549084663391112,
"step": 46700
},
{
"epoch": 1.6932595245848256,
"grad_norm": 0.11390261352062225,
"learning_rate": 7.669416404356163e-06,
"loss": 0.025614957809448242,
"step": 46800
},
{
"epoch": 1.6968776004920583,
"grad_norm": 0.20818683505058289,
"learning_rate": 7.57896450667535e-06,
"loss": 0.02624866247177124,
"step": 46900
},
{
"epoch": 1.700495676399291,
"grad_norm": 0.11861401051282883,
"learning_rate": 7.488512608994537e-06,
"loss": 0.029836065769195556,
"step": 47000
},
{
"epoch": 1.7041137523065233,
"grad_norm": 0.21509072184562683,
"learning_rate": 7.398060711313724e-06,
"loss": 0.02764824151992798,
"step": 47100
},
{
"epoch": 1.707731828213756,
"grad_norm": 0.09410534054040909,
"learning_rate": 7.3076088136329105e-06,
"loss": 0.026358423233032228,
"step": 47200
},
{
"epoch": 1.7113499041209885,
"grad_norm": 0.4441370666027069,
"learning_rate": 7.217156915952097e-06,
"loss": 0.028589205741882326,
"step": 47300
},
{
"epoch": 1.714967980028221,
"grad_norm": 0.301600843667984,
"learning_rate": 7.1267050182712836e-06,
"loss": 0.02586300849914551,
"step": 47400
},
{
"epoch": 1.7185860559354535,
"grad_norm": 0.2969602942466736,
"learning_rate": 7.03625312059047e-06,
"loss": 0.027719602584838868,
"step": 47500
},
{
"epoch": 1.7185860559354535,
"eval_accuracy": 0.9887869098191715,
"eval_f1": 0.8751810891473175,
"eval_loss": 0.04670108109712601,
"eval_precision": 0.8604607721046077,
"eval_recall": 0.8904138275423251,
"eval_runtime": 62.4542,
"eval_samples_per_second": 320.234,
"eval_steps_per_second": 8.903,
"step": 47500
},
{
"epoch": 1.7222041318426862,
"grad_norm": 2.922269582748413,
"learning_rate": 6.945801222909657e-06,
"loss": 0.026613037586212158,
"step": 47600
},
{
"epoch": 1.7258222077499186,
"grad_norm": 0.3603607714176178,
"learning_rate": 6.855349325228843e-06,
"loss": 0.02875258445739746,
"step": 47700
},
{
"epoch": 1.729440283657151,
"grad_norm": 0.17424313724040985,
"learning_rate": 6.764897427548031e-06,
"loss": 0.028092458248138427,
"step": 47800
},
{
"epoch": 1.7330583595643838,
"grad_norm": 0.39376911520957947,
"learning_rate": 6.674445529867217e-06,
"loss": 0.029860684871673582,
"step": 47900
},
{
"epoch": 1.7366764354716162,
"grad_norm": 0.30766257643699646,
"learning_rate": 6.583993632186404e-06,
"loss": 0.027765181064605713,
"step": 48000
},
{
"epoch": 1.7402945113788486,
"grad_norm": 0.4809003472328186,
"learning_rate": 6.49354173450559e-06,
"loss": 0.025850486755371094,
"step": 48100
},
{
"epoch": 1.7439125872860812,
"grad_norm": 0.31469446420669556,
"learning_rate": 6.403089836824777e-06,
"loss": 0.024390408992767332,
"step": 48200
},
{
"epoch": 1.7475306631933138,
"grad_norm": 0.1946684867143631,
"learning_rate": 6.312637939143963e-06,
"loss": 0.02534383535385132,
"step": 48300
},
{
"epoch": 1.7511487391005462,
"grad_norm": 0.31097686290740967,
"learning_rate": 6.22218604146315e-06,
"loss": 0.02695645809173584,
"step": 48400
},
{
"epoch": 1.7547668150077789,
"grad_norm": 0.7921291589736938,
"learning_rate": 6.1317341437823365e-06,
"loss": 0.023772099018096925,
"step": 48500
},
{
"epoch": 1.7583848909150115,
"grad_norm": 0.3385520577430725,
"learning_rate": 6.041282246101523e-06,
"loss": 0.024593567848205565,
"step": 48600
},
{
"epoch": 1.7620029668222439,
"grad_norm": 0.23133955895900726,
"learning_rate": 5.95083034842071e-06,
"loss": 0.025404906272888182,
"step": 48700
},
{
"epoch": 1.7656210427294765,
"grad_norm": 0.17175310850143433,
"learning_rate": 5.860378450739896e-06,
"loss": 0.024191346168518067,
"step": 48800
},
{
"epoch": 1.7692391186367091,
"grad_norm": 1.453963041305542,
"learning_rate": 5.769926553059084e-06,
"loss": 0.023371386528015136,
"step": 48900
},
{
"epoch": 1.7728571945439415,
"grad_norm": 0.4487530291080475,
"learning_rate": 5.67947465537827e-06,
"loss": 0.024376935958862304,
"step": 49000
},
{
"epoch": 1.776475270451174,
"grad_norm": 0.17453834414482117,
"learning_rate": 5.589022757697457e-06,
"loss": 0.027640838623046875,
"step": 49100
},
{
"epoch": 1.7800933463584065,
"grad_norm": 0.24941837787628174,
"learning_rate": 5.498570860016643e-06,
"loss": 0.02413508415222168,
"step": 49200
},
{
"epoch": 1.7837114222656392,
"grad_norm": 0.3545306622982025,
"learning_rate": 5.40811896233583e-06,
"loss": 0.025269722938537596,
"step": 49300
},
{
"epoch": 1.7873294981728716,
"grad_norm": 0.21222856640815735,
"learning_rate": 5.317667064655016e-06,
"loss": 0.02443007230758667,
"step": 49400
},
{
"epoch": 1.7909475740801042,
"grad_norm": 0.5955353379249573,
"learning_rate": 5.227215166974203e-06,
"loss": 0.027793030738830566,
"step": 49500
},
{
"epoch": 1.7945656499873368,
"grad_norm": 1.0362492799758911,
"learning_rate": 5.13676326929339e-06,
"loss": 0.02576704978942871,
"step": 49600
},
{
"epoch": 1.7981837258945692,
"grad_norm": 0.2961190938949585,
"learning_rate": 5.046311371612577e-06,
"loss": 0.027634003162384034,
"step": 49700
},
{
"epoch": 1.8018018018018018,
"grad_norm": 0.2701990604400635,
"learning_rate": 4.9558594739317635e-06,
"loss": 0.026762216091156005,
"step": 49800
},
{
"epoch": 1.8054198777090344,
"grad_norm": 0.3419773280620575,
"learning_rate": 4.86540757625095e-06,
"loss": 0.028021221160888673,
"step": 49900
},
{
"epoch": 1.8090379536162668,
"grad_norm": 0.3847455680370331,
"learning_rate": 4.7749556785701366e-06,
"loss": 0.028925769329071045,
"step": 50000
},
{
"epoch": 1.8090379536162668,
"eval_accuracy": 0.9891697152879526,
"eval_f1": 0.8756019071264223,
"eval_loss": 0.04578976333141327,
"eval_precision": 0.8598627201292046,
"eval_recall": 0.8919280271911596,
"eval_runtime": 62.7397,
"eval_samples_per_second": 318.777,
"eval_steps_per_second": 8.862,
"step": 50000
},
{
"epoch": 1.8126560295234992,
"grad_norm": 0.12807752192020416,
"learning_rate": 4.684503780889323e-06,
"loss": 0.024477434158325196,
"step": 50100
},
{
"epoch": 1.816274105430732,
"grad_norm": 0.5839409828186035,
"learning_rate": 4.59405188320851e-06,
"loss": 0.029098427295684813,
"step": 50200
},
{
"epoch": 1.8198921813379645,
"grad_norm": 0.1988334357738495,
"learning_rate": 4.503599985527696e-06,
"loss": 0.027852838039398194,
"step": 50300
},
{
"epoch": 1.8235102572451969,
"grad_norm": 1.1250760555267334,
"learning_rate": 4.413148087846884e-06,
"loss": 0.025283007621765136,
"step": 50400
},
{
"epoch": 1.8271283331524295,
"grad_norm": 0.3275587558746338,
"learning_rate": 4.32269619016607e-06,
"loss": 0.0253476619720459,
"step": 50500
},
{
"epoch": 1.8307464090596621,
"grad_norm": 0.2422463297843933,
"learning_rate": 4.232244292485257e-06,
"loss": 0.025618109703063965,
"step": 50600
},
{
"epoch": 1.8343644849668945,
"grad_norm": 0.6434578895568848,
"learning_rate": 4.141792394804443e-06,
"loss": 0.026464188098907472,
"step": 50700
},
{
"epoch": 1.8379825608741271,
"grad_norm": 0.16934601962566376,
"learning_rate": 4.05134049712363e-06,
"loss": 0.025098586082458497,
"step": 50800
},
{
"epoch": 1.8416006367813598,
"grad_norm": 0.21844395995140076,
"learning_rate": 3.9608885994428164e-06,
"loss": 0.023906781673431396,
"step": 50900
},
{
"epoch": 1.8452187126885922,
"grad_norm": 0.2674906253814697,
"learning_rate": 3.870436701762003e-06,
"loss": 0.026905314922332765,
"step": 51000
},
{
"epoch": 1.8488367885958248,
"grad_norm": 0.4344836473464966,
"learning_rate": 3.77998480408119e-06,
"loss": 0.026017348766326904,
"step": 51100
},
{
"epoch": 1.8524548645030574,
"grad_norm": 0.5953734517097473,
"learning_rate": 3.6895329064003765e-06,
"loss": 0.02634397745132446,
"step": 51200
},
{
"epoch": 1.8560729404102898,
"grad_norm": 0.14901016652584076,
"learning_rate": 3.599081008719563e-06,
"loss": 0.02832331895828247,
"step": 51300
},
{
"epoch": 1.8596910163175222,
"grad_norm": 0.7816808223724365,
"learning_rate": 3.5086291110387496e-06,
"loss": 0.026141095161437988,
"step": 51400
},
{
"epoch": 1.8633090922247548,
"grad_norm": 0.5734632015228271,
"learning_rate": 3.418177213357936e-06,
"loss": 0.02372182607650757,
"step": 51500
},
{
"epoch": 1.8669271681319874,
"grad_norm": 0.9664448499679565,
"learning_rate": 3.3277253156771227e-06,
"loss": 0.024712865352630616,
"step": 51600
},
{
"epoch": 1.8705452440392198,
"grad_norm": 0.390066921710968,
"learning_rate": 3.2372734179963093e-06,
"loss": 0.026522459983825682,
"step": 51700
},
{
"epoch": 1.8741633199464525,
"grad_norm": 0.6472379565238953,
"learning_rate": 3.146821520315496e-06,
"loss": 0.024525246620178222,
"step": 51800
},
{
"epoch": 1.877781395853685,
"grad_norm": 0.4985784888267517,
"learning_rate": 3.056369622634683e-06,
"loss": 0.02446552038192749,
"step": 51900
},
{
"epoch": 1.8813994717609175,
"grad_norm": 0.22120802104473114,
"learning_rate": 2.9659177249538694e-06,
"loss": 0.025269200801849367,
"step": 52000
},
{
"epoch": 1.88501754766815,
"grad_norm": 0.3579547703266144,
"learning_rate": 2.8754658272730564e-06,
"loss": 0.025214505195617676,
"step": 52100
},
{
"epoch": 1.8886356235753827,
"grad_norm": 0.7338326573371887,
"learning_rate": 2.785013929592243e-06,
"loss": 0.02668466329574585,
"step": 52200
},
{
"epoch": 1.8922536994826151,
"grad_norm": 0.3315567970275879,
"learning_rate": 2.6945620319114295e-06,
"loss": 0.030078487396240236,
"step": 52300
},
{
"epoch": 1.8958717753898475,
"grad_norm": 0.35072797536849976,
"learning_rate": 2.6041101342306165e-06,
"loss": 0.02516920804977417,
"step": 52400
},
{
"epoch": 1.8994898512970804,
"grad_norm": 0.43289047479629517,
"learning_rate": 2.513658236549803e-06,
"loss": 0.026839351654052733,
"step": 52500
},
{
"epoch": 1.8994898512970804,
"eval_accuracy": 0.9891036746253344,
"eval_f1": 0.876242095754291,
"eval_loss": 0.045680414885282516,
"eval_precision": 0.8623029055350209,
"eval_recall": 0.89063934663896,
"eval_runtime": 62.307,
"eval_samples_per_second": 320.991,
"eval_steps_per_second": 8.924,
"step": 52500
},
{
"epoch": 1.9031079272043128,
"grad_norm": 0.4170491099357605,
"learning_rate": 2.4232063388689896e-06,
"loss": 0.027149310111999513,
"step": 52600
},
{
"epoch": 1.9067260031115452,
"grad_norm": 0.33568137884140015,
"learning_rate": 2.332754441188176e-06,
"loss": 0.024306225776672363,
"step": 52700
},
{
"epoch": 1.9103440790187778,
"grad_norm": 0.831928551197052,
"learning_rate": 2.242302543507363e-06,
"loss": 0.025090248584747316,
"step": 52800
},
{
"epoch": 1.9139621549260104,
"grad_norm": 0.2261083424091339,
"learning_rate": 2.1518506458265497e-06,
"loss": 0.02992173671722412,
"step": 52900
},
{
"epoch": 1.9175802308332428,
"grad_norm": 0.36420953273773193,
"learning_rate": 2.0613987481457362e-06,
"loss": 0.026374735832214356,
"step": 53000
},
{
"epoch": 1.9211983067404754,
"grad_norm": 0.3849758207798004,
"learning_rate": 1.970946850464923e-06,
"loss": 0.024311881065368652,
"step": 53100
},
{
"epoch": 1.924816382647708,
"grad_norm": 0.1625661551952362,
"learning_rate": 1.8804949527841096e-06,
"loss": 0.028159475326538085,
"step": 53200
},
{
"epoch": 1.9284344585549404,
"grad_norm": 0.10745652765035629,
"learning_rate": 1.7900430551032961e-06,
"loss": 0.028279855251312255,
"step": 53300
},
{
"epoch": 1.932052534462173,
"grad_norm": 0.3585937023162842,
"learning_rate": 1.6995911574224827e-06,
"loss": 0.025097475051879883,
"step": 53400
},
{
"epoch": 1.9356706103694057,
"grad_norm": 0.3355402648448944,
"learning_rate": 1.6091392597416697e-06,
"loss": 0.0232719612121582,
"step": 53500
},
{
"epoch": 1.939288686276638,
"grad_norm": 0.6301077604293823,
"learning_rate": 1.5186873620608562e-06,
"loss": 0.023976569175720216,
"step": 53600
},
{
"epoch": 1.9429067621838705,
"grad_norm": 1.720951795578003,
"learning_rate": 1.4282354643800428e-06,
"loss": 0.027393877506256104,
"step": 53700
},
{
"epoch": 1.946524838091103,
"grad_norm": 1.0819095373153687,
"learning_rate": 1.3377835666992295e-06,
"loss": 0.028527204990386964,
"step": 53800
},
{
"epoch": 1.9501429139983357,
"grad_norm": 0.4960351884365082,
"learning_rate": 1.247331669018416e-06,
"loss": 0.023636491298675538,
"step": 53900
},
{
"epoch": 1.9537609899055681,
"grad_norm": 0.6555366516113281,
"learning_rate": 1.1568797713376029e-06,
"loss": 0.02606668949127197,
"step": 54000
},
{
"epoch": 1.9573790658128007,
"grad_norm": 0.17520390450954437,
"learning_rate": 1.0664278736567894e-06,
"loss": 0.024348812103271486,
"step": 54100
},
{
"epoch": 1.9609971417200334,
"grad_norm": 0.2867375612258911,
"learning_rate": 9.75975975975976e-07,
"loss": 0.024609763622283936,
"step": 54200
},
{
"epoch": 1.9646152176272658,
"grad_norm": 0.11981488019227982,
"learning_rate": 8.855240782951626e-07,
"loss": 0.02563744068145752,
"step": 54300
},
{
"epoch": 1.9682332935344984,
"grad_norm": 0.25503483414649963,
"learning_rate": 7.950721806143494e-07,
"loss": 0.026204137802124022,
"step": 54400
},
{
"epoch": 1.971851369441731,
"grad_norm": 0.23244522511959076,
"learning_rate": 7.04620282933536e-07,
"loss": 0.0256950044631958,
"step": 54500
},
{
"epoch": 1.9754694453489634,
"grad_norm": 0.20025278627872467,
"learning_rate": 6.141683852527226e-07,
"loss": 0.025686397552490234,
"step": 54600
},
{
"epoch": 1.9790875212561958,
"grad_norm": 0.4756115972995758,
"learning_rate": 5.237164875719093e-07,
"loss": 0.02578796148300171,
"step": 54700
},
{
"epoch": 1.9827055971634286,
"grad_norm": 0.27420374751091003,
"learning_rate": 4.3326458989109595e-07,
"loss": 0.023311092853546142,
"step": 54800
},
{
"epoch": 1.986323673070661,
"grad_norm": 0.19387075304985046,
"learning_rate": 3.4281269221028255e-07,
"loss": 0.02670889377593994,
"step": 54900
},
{
"epoch": 1.9899417489778934,
"grad_norm": 0.726769745349884,
"learning_rate": 2.523607945294692e-07,
"loss": 0.03058022975921631,
"step": 55000
},
{
"epoch": 1.9899417489778934,
"eval_accuracy": 0.9892449709267501,
"eval_f1": 0.8768783517240833,
"eval_loss": 0.0451948419213295,
"eval_precision": 0.8626445559677067,
"eval_recall": 0.8915897485462072,
"eval_runtime": 62.8103,
"eval_samples_per_second": 318.419,
"eval_steps_per_second": 8.852,
"step": 55000
},
{
"epoch": 1.993559824885126,
"grad_norm": 0.22022511065006256,
"learning_rate": 1.6190889684865588e-07,
"loss": 0.026084864139556886,
"step": 55100
},
{
"epoch": 1.9971779007923587,
"grad_norm": 0.5684672594070435,
"learning_rate": 7.145699916784254e-08,
"loss": 0.027587156295776367,
"step": 55200
},
{
"epoch": 2.0,
"step": 55278,
"total_flos": 1.9407141577440333e+18,
"train_loss": 0.04855243214227653,
"train_runtime": 26239.1933,
"train_samples_per_second": 303.363,
"train_steps_per_second": 2.107
}
],
"logging_steps": 100,
"max_steps": 55278,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 2500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.9407141577440333e+18,
"train_batch_size": 72,
"trial_name": null,
"trial_params": null
}