polyglot-tagger-66L-3M / trainer_state.json
DerivedFunction's picture
End of training
02285cd verified
{
"best_global_step": 35000,
"best_metric": 0.89652071512686,
"best_model_checkpoint": "./lang-ner-xlmr/checkpoint-35000",
"epoch": 2.0,
"eval_steps": 2500,
"global_step": 41460,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.004824043030463832,
"grad_norm": 2.7279021739959717,
"learning_rate": 4.988060781476121e-05,
"loss": 3.1182174682617188,
"step": 100
},
{
"epoch": 0.009648086060927664,
"grad_norm": 0.8386039137840271,
"learning_rate": 4.9760009647853356e-05,
"loss": 0.2861482620239258,
"step": 200
},
{
"epoch": 0.014472129091391495,
"grad_norm": 1.9196710586547852,
"learning_rate": 4.963941148094549e-05,
"loss": 0.12070045471191407,
"step": 300
},
{
"epoch": 0.019296172121855328,
"grad_norm": 6.516495227813721,
"learning_rate": 4.951881331403763e-05,
"loss": 0.09790064811706543,
"step": 400
},
{
"epoch": 0.024120215152319158,
"grad_norm": 1.2043635845184326,
"learning_rate": 4.939821514712977e-05,
"loss": 0.09528629302978515,
"step": 500
},
{
"epoch": 0.02894425818278299,
"grad_norm": 0.529084324836731,
"learning_rate": 4.92776169802219e-05,
"loss": 0.06706910610198974,
"step": 600
},
{
"epoch": 0.033768301213246824,
"grad_norm": 1.108811616897583,
"learning_rate": 4.9157018813314036e-05,
"loss": 0.07001821517944336,
"step": 700
},
{
"epoch": 0.038592344243710656,
"grad_norm": 1.234101414680481,
"learning_rate": 4.903642064640618e-05,
"loss": 0.07005959987640381,
"step": 800
},
{
"epoch": 0.04341638727417448,
"grad_norm": 0.6574804782867432,
"learning_rate": 4.8915822479498315e-05,
"loss": 0.06500310897827148,
"step": 900
},
{
"epoch": 0.048240430304638315,
"grad_norm": 0.6550615429878235,
"learning_rate": 4.879522431259045e-05,
"loss": 0.05578082084655762,
"step": 1000
},
{
"epoch": 0.05306447333510215,
"grad_norm": 0.9701142907142639,
"learning_rate": 4.867462614568259e-05,
"loss": 0.05476199150085449,
"step": 1100
},
{
"epoch": 0.05788851636556598,
"grad_norm": 0.3067728579044342,
"learning_rate": 4.855402797877472e-05,
"loss": 0.04951910972595215,
"step": 1200
},
{
"epoch": 0.06271255939602981,
"grad_norm": 0.4835965037345886,
"learning_rate": 4.843342981186686e-05,
"loss": 0.05270035743713379,
"step": 1300
},
{
"epoch": 0.06753660242649365,
"grad_norm": 0.9019619822502136,
"learning_rate": 4.8312831644959e-05,
"loss": 0.05690920352935791,
"step": 1400
},
{
"epoch": 0.07236064545695747,
"grad_norm": 1.7734606266021729,
"learning_rate": 4.819223347805114e-05,
"loss": 0.05044642925262451,
"step": 1500
},
{
"epoch": 0.07718468848742131,
"grad_norm": 1.1853278875350952,
"learning_rate": 4.8071635311143274e-05,
"loss": 0.056004085540771485,
"step": 1600
},
{
"epoch": 0.08200873151788514,
"grad_norm": 0.5660464763641357,
"learning_rate": 4.795103714423541e-05,
"loss": 0.0493979024887085,
"step": 1700
},
{
"epoch": 0.08683277454834896,
"grad_norm": 1.2197043895721436,
"learning_rate": 4.7830438977327546e-05,
"loss": 0.04784996509552002,
"step": 1800
},
{
"epoch": 0.0916568175788128,
"grad_norm": 1.1459959745407104,
"learning_rate": 4.770984081041968e-05,
"loss": 0.049839210510253903,
"step": 1900
},
{
"epoch": 0.09648086060927663,
"grad_norm": 0.3328426778316498,
"learning_rate": 4.7589242643511825e-05,
"loss": 0.04329806327819824,
"step": 2000
},
{
"epoch": 0.10130490363974047,
"grad_norm": 1.718967080116272,
"learning_rate": 4.746864447660396e-05,
"loss": 0.047143783569335934,
"step": 2100
},
{
"epoch": 0.1061289466702043,
"grad_norm": 0.7338983416557312,
"learning_rate": 4.734804630969609e-05,
"loss": 0.04436909198760986,
"step": 2200
},
{
"epoch": 0.11095298970066814,
"grad_norm": 0.5654782652854919,
"learning_rate": 4.722744814278823e-05,
"loss": 0.04844902515411377,
"step": 2300
},
{
"epoch": 0.11577703273113196,
"grad_norm": 0.4302056133747101,
"learning_rate": 4.710684997588037e-05,
"loss": 0.04093062400817871,
"step": 2400
},
{
"epoch": 0.12060107576159579,
"grad_norm": 0.554361879825592,
"learning_rate": 4.6986251808972505e-05,
"loss": 0.040434646606445315,
"step": 2500
},
{
"epoch": 0.12060107576159579,
"eval_accuracy": 0.9867528880759852,
"eval_f1": 0.8265977004331162,
"eval_loss": 0.06485302746295929,
"eval_precision": 0.7943624857764557,
"eval_recall": 0.8615597800199195,
"eval_runtime": 67.6547,
"eval_samples_per_second": 221.714,
"eval_steps_per_second": 6.164,
"step": 2500
},
{
"epoch": 0.12542511879205961,
"grad_norm": 0.8634827136993408,
"learning_rate": 4.686565364206465e-05,
"loss": 0.04507491588592529,
"step": 2600
},
{
"epoch": 0.13024916182252347,
"grad_norm": 0.6373780965805054,
"learning_rate": 4.674505547515678e-05,
"loss": 0.04472970962524414,
"step": 2700
},
{
"epoch": 0.1350732048529873,
"grad_norm": 0.35323283076286316,
"learning_rate": 4.662445730824891e-05,
"loss": 0.03842374086380005,
"step": 2800
},
{
"epoch": 0.13989724788345112,
"grad_norm": 0.8187289834022522,
"learning_rate": 4.6503859141341056e-05,
"loss": 0.04050546646118164,
"step": 2900
},
{
"epoch": 0.14472129091391495,
"grad_norm": 0.2180730253458023,
"learning_rate": 4.638326097443319e-05,
"loss": 0.04304762363433838,
"step": 3000
},
{
"epoch": 0.14954533394437877,
"grad_norm": 0.6171498894691467,
"learning_rate": 4.626266280752533e-05,
"loss": 0.03792398929595947,
"step": 3100
},
{
"epoch": 0.15436937697484263,
"grad_norm": 1.4763296842575073,
"learning_rate": 4.6142064640617464e-05,
"loss": 0.04089127063751221,
"step": 3200
},
{
"epoch": 0.15919342000530645,
"grad_norm": 0.36483830213546753,
"learning_rate": 4.60214664737096e-05,
"loss": 0.04075708866119385,
"step": 3300
},
{
"epoch": 0.16401746303577028,
"grad_norm": 0.32734522223472595,
"learning_rate": 4.5900868306801736e-05,
"loss": 0.03913374423980713,
"step": 3400
},
{
"epoch": 0.1688415060662341,
"grad_norm": 0.27289167046546936,
"learning_rate": 4.578027013989388e-05,
"loss": 0.039629595279693605,
"step": 3500
},
{
"epoch": 0.17366554909669793,
"grad_norm": 1.4992765188217163,
"learning_rate": 4.5659671972986015e-05,
"loss": 0.03849426031112671,
"step": 3600
},
{
"epoch": 0.17848959212716178,
"grad_norm": 0.7519832849502563,
"learning_rate": 4.553907380607815e-05,
"loss": 0.03754171133041382,
"step": 3700
},
{
"epoch": 0.1833136351576256,
"grad_norm": 1.4542765617370605,
"learning_rate": 4.541847563917029e-05,
"loss": 0.038514294624328614,
"step": 3800
},
{
"epoch": 0.18813767818808944,
"grad_norm": 1.8106330633163452,
"learning_rate": 4.529787747226242e-05,
"loss": 0.03961650609970093,
"step": 3900
},
{
"epoch": 0.19296172121855326,
"grad_norm": 0.3401031196117401,
"learning_rate": 4.517727930535456e-05,
"loss": 0.0379714560508728,
"step": 4000
},
{
"epoch": 0.1977857642490171,
"grad_norm": 3.1147701740264893,
"learning_rate": 4.50566811384467e-05,
"loss": 0.03555563688278198,
"step": 4100
},
{
"epoch": 0.20260980727948094,
"grad_norm": 0.3068256676197052,
"learning_rate": 4.493608297153884e-05,
"loss": 0.040891532897949216,
"step": 4200
},
{
"epoch": 0.20743385030994477,
"grad_norm": 0.22388258576393127,
"learning_rate": 4.481548480463097e-05,
"loss": 0.042806510925292966,
"step": 4300
},
{
"epoch": 0.2122578933404086,
"grad_norm": 1.0851870775222778,
"learning_rate": 4.469488663772311e-05,
"loss": 0.03217351198196411,
"step": 4400
},
{
"epoch": 0.21708193637087242,
"grad_norm": 0.14333230257034302,
"learning_rate": 4.4574288470815246e-05,
"loss": 0.036145191192626956,
"step": 4500
},
{
"epoch": 0.22190597940133627,
"grad_norm": 0.5196163654327393,
"learning_rate": 4.445369030390738e-05,
"loss": 0.04708011627197266,
"step": 4600
},
{
"epoch": 0.2267300224318001,
"grad_norm": 0.18328827619552612,
"learning_rate": 4.4333092136999525e-05,
"loss": 0.040124249458312986,
"step": 4700
},
{
"epoch": 0.23155406546226392,
"grad_norm": 0.31492918729782104,
"learning_rate": 4.4212493970091654e-05,
"loss": 0.041497902870178224,
"step": 4800
},
{
"epoch": 0.23637810849272775,
"grad_norm": 0.4818204939365387,
"learning_rate": 4.409189580318379e-05,
"loss": 0.04126156330108643,
"step": 4900
},
{
"epoch": 0.24120215152319158,
"grad_norm": 0.20825903117656708,
"learning_rate": 4.397129763627593e-05,
"loss": 0.03939923524856567,
"step": 5000
},
{
"epoch": 0.24120215152319158,
"eval_accuracy": 0.9893252582883119,
"eval_f1": 0.84304320903433,
"eval_loss": 0.05377783998847008,
"eval_precision": 0.8180670129341073,
"eval_recall": 0.8695925172130082,
"eval_runtime": 51.5792,
"eval_samples_per_second": 290.815,
"eval_steps_per_second": 8.085,
"step": 5000
},
{
"epoch": 0.24602619455365543,
"grad_norm": 0.20790189504623413,
"learning_rate": 4.385069946936807e-05,
"loss": 0.03445266008377075,
"step": 5100
},
{
"epoch": 0.25085023758411923,
"grad_norm": 0.2234453707933426,
"learning_rate": 4.3730101302460205e-05,
"loss": 0.03056433916091919,
"step": 5200
},
{
"epoch": 0.2556742806145831,
"grad_norm": 0.5091524124145508,
"learning_rate": 4.360950313555234e-05,
"loss": 0.03808696031570435,
"step": 5300
},
{
"epoch": 0.26049832364504694,
"grad_norm": 0.7598561644554138,
"learning_rate": 4.348890496864448e-05,
"loss": 0.03501533508300781,
"step": 5400
},
{
"epoch": 0.26532236667551073,
"grad_norm": 0.2233390510082245,
"learning_rate": 4.336830680173661e-05,
"loss": 0.03836148738861084,
"step": 5500
},
{
"epoch": 0.2701464097059746,
"grad_norm": 0.4892669916152954,
"learning_rate": 4.3247708634828756e-05,
"loss": 0.03670140504837036,
"step": 5600
},
{
"epoch": 0.2749704527364384,
"grad_norm": 0.3863944411277771,
"learning_rate": 4.312711046792089e-05,
"loss": 0.03313957452774048,
"step": 5700
},
{
"epoch": 0.27979449576690224,
"grad_norm": 0.457960844039917,
"learning_rate": 4.300651230101303e-05,
"loss": 0.03517037630081177,
"step": 5800
},
{
"epoch": 0.2846185387973661,
"grad_norm": 0.3622528314590454,
"learning_rate": 4.2885914134105164e-05,
"loss": 0.0420029878616333,
"step": 5900
},
{
"epoch": 0.2894425818278299,
"grad_norm": 0.9826716780662537,
"learning_rate": 4.27653159671973e-05,
"loss": 0.03928417205810547,
"step": 6000
},
{
"epoch": 0.29426662485829375,
"grad_norm": 0.3126944303512573,
"learning_rate": 4.2644717800289436e-05,
"loss": 0.03383539915084839,
"step": 6100
},
{
"epoch": 0.29909066788875754,
"grad_norm": 1.1283291578292847,
"learning_rate": 4.252411963338158e-05,
"loss": 0.034748728275299075,
"step": 6200
},
{
"epoch": 0.3039147109192214,
"grad_norm": 0.2550179958343506,
"learning_rate": 4.2403521466473715e-05,
"loss": 0.03332434177398682,
"step": 6300
},
{
"epoch": 0.30873875394968525,
"grad_norm": 0.6041121482849121,
"learning_rate": 4.2282923299565844e-05,
"loss": 0.03864547491073608,
"step": 6400
},
{
"epoch": 0.31356279698014905,
"grad_norm": 0.3217807412147522,
"learning_rate": 4.216232513265799e-05,
"loss": 0.03896953821182251,
"step": 6500
},
{
"epoch": 0.3183868400106129,
"grad_norm": 0.22055508196353912,
"learning_rate": 4.204172696575012e-05,
"loss": 0.03473591566085815,
"step": 6600
},
{
"epoch": 0.3232108830410767,
"grad_norm": 0.9059926271438599,
"learning_rate": 4.192112879884226e-05,
"loss": 0.0373721718788147,
"step": 6700
},
{
"epoch": 0.32803492607154056,
"grad_norm": 0.2131674885749817,
"learning_rate": 4.1800530631934395e-05,
"loss": 0.031450369358062745,
"step": 6800
},
{
"epoch": 0.3328589691020044,
"grad_norm": 0.1497948318719864,
"learning_rate": 4.167993246502653e-05,
"loss": 0.03357296228408813,
"step": 6900
},
{
"epoch": 0.3376830121324682,
"grad_norm": 0.3575536012649536,
"learning_rate": 4.155933429811867e-05,
"loss": 0.03312770128250122,
"step": 7000
},
{
"epoch": 0.34250705516293206,
"grad_norm": 0.27300477027893066,
"learning_rate": 4.143873613121081e-05,
"loss": 0.036876497268676756,
"step": 7100
},
{
"epoch": 0.34733109819339586,
"grad_norm": 0.36355310678482056,
"learning_rate": 4.1318137964302946e-05,
"loss": 0.034904708862304686,
"step": 7200
},
{
"epoch": 0.3521551412238597,
"grad_norm": 0.444167822599411,
"learning_rate": 4.119753979739508e-05,
"loss": 0.030987234115600587,
"step": 7300
},
{
"epoch": 0.35697918425432357,
"grad_norm": 0.22657343745231628,
"learning_rate": 4.107694163048722e-05,
"loss": 0.03171445846557617,
"step": 7400
},
{
"epoch": 0.36180322728478737,
"grad_norm": 0.3570277988910675,
"learning_rate": 4.0956343463579354e-05,
"loss": 0.03453096866607666,
"step": 7500
},
{
"epoch": 0.36180322728478737,
"eval_accuracy": 0.9905638698370478,
"eval_f1": 0.8562651098466053,
"eval_loss": 0.045637115836143494,
"eval_precision": 0.8355275356944187,
"eval_recall": 0.878058286060711,
"eval_runtime": 52.1242,
"eval_samples_per_second": 287.774,
"eval_steps_per_second": 8.0,
"step": 7500
},
{
"epoch": 0.3666272703152512,
"grad_norm": 0.4932907819747925,
"learning_rate": 4.083574529667149e-05,
"loss": 0.030991692543029786,
"step": 7600
},
{
"epoch": 0.371451313345715,
"grad_norm": 0.6475630402565002,
"learning_rate": 4.071514712976363e-05,
"loss": 0.03562487840652466,
"step": 7700
},
{
"epoch": 0.37627535637617887,
"grad_norm": 0.2954416871070862,
"learning_rate": 4.059454896285577e-05,
"loss": 0.03147151708602905,
"step": 7800
},
{
"epoch": 0.3810993994066427,
"grad_norm": 0.6999800205230713,
"learning_rate": 4.0473950795947905e-05,
"loss": 0.03395595073699951,
"step": 7900
},
{
"epoch": 0.3859234424371065,
"grad_norm": 1.9642822742462158,
"learning_rate": 4.035335262904004e-05,
"loss": 0.034128406047821046,
"step": 8000
},
{
"epoch": 0.3907474854675704,
"grad_norm": 0.8058770298957825,
"learning_rate": 4.023275446213218e-05,
"loss": 0.02912992238998413,
"step": 8100
},
{
"epoch": 0.3955715284980342,
"grad_norm": 0.5513653755187988,
"learning_rate": 4.011215629522431e-05,
"loss": 0.033489227294921875,
"step": 8200
},
{
"epoch": 0.40039557152849803,
"grad_norm": 0.5218818783760071,
"learning_rate": 3.9991558128316456e-05,
"loss": 0.03587050437927246,
"step": 8300
},
{
"epoch": 0.4052196145589619,
"grad_norm": 0.231138676404953,
"learning_rate": 3.987095996140859e-05,
"loss": 0.02900606632232666,
"step": 8400
},
{
"epoch": 0.4100436575894257,
"grad_norm": 0.941376268863678,
"learning_rate": 3.975036179450072e-05,
"loss": 0.032960660457611084,
"step": 8500
},
{
"epoch": 0.41486770061988953,
"grad_norm": 0.3743444085121155,
"learning_rate": 3.9629763627592864e-05,
"loss": 0.034282689094543455,
"step": 8600
},
{
"epoch": 0.4196917436503534,
"grad_norm": 0.20511318743228912,
"learning_rate": 3.9509165460685e-05,
"loss": 0.03301868677139282,
"step": 8700
},
{
"epoch": 0.4245157866808172,
"grad_norm": 0.5028975605964661,
"learning_rate": 3.9388567293777136e-05,
"loss": 0.030978357791900633,
"step": 8800
},
{
"epoch": 0.42933982971128104,
"grad_norm": 0.5793830752372742,
"learning_rate": 3.926796912686927e-05,
"loss": 0.03107161045074463,
"step": 8900
},
{
"epoch": 0.43416387274174484,
"grad_norm": 0.5201826095581055,
"learning_rate": 3.914737095996141e-05,
"loss": 0.03163294792175293,
"step": 9000
},
{
"epoch": 0.4389879157722087,
"grad_norm": 0.40996044874191284,
"learning_rate": 3.9026772793053544e-05,
"loss": 0.03236435651779175,
"step": 9100
},
{
"epoch": 0.44381195880267255,
"grad_norm": 0.32939156889915466,
"learning_rate": 3.890617462614569e-05,
"loss": 0.03099562406539917,
"step": 9200
},
{
"epoch": 0.44863600183313634,
"grad_norm": 0.5146192312240601,
"learning_rate": 3.878557645923782e-05,
"loss": 0.032382268905639645,
"step": 9300
},
{
"epoch": 0.4534600448636002,
"grad_norm": 0.6972792148590088,
"learning_rate": 3.866497829232996e-05,
"loss": 0.0357794189453125,
"step": 9400
},
{
"epoch": 0.458284087894064,
"grad_norm": 0.4266366958618164,
"learning_rate": 3.8544380125422095e-05,
"loss": 0.02773923635482788,
"step": 9500
},
{
"epoch": 0.46310813092452785,
"grad_norm": 0.18275046348571777,
"learning_rate": 3.842378195851423e-05,
"loss": 0.029792981147766115,
"step": 9600
},
{
"epoch": 0.4679321739549917,
"grad_norm": 0.19641897082328796,
"learning_rate": 3.830318379160637e-05,
"loss": 0.0328049373626709,
"step": 9700
},
{
"epoch": 0.4727562169854555,
"grad_norm": 0.5207920670509338,
"learning_rate": 3.818258562469851e-05,
"loss": 0.028371200561523438,
"step": 9800
},
{
"epoch": 0.47758026001591936,
"grad_norm": 1.656972050666809,
"learning_rate": 3.8061987457790646e-05,
"loss": 0.029215424060821532,
"step": 9900
},
{
"epoch": 0.48240430304638315,
"grad_norm": 0.6870591044425964,
"learning_rate": 3.794138929088278e-05,
"loss": 0.02802006721496582,
"step": 10000
},
{
"epoch": 0.48240430304638315,
"eval_accuracy": 0.989747898374386,
"eval_f1": 0.8614309687905686,
"eval_loss": 0.04934614896774292,
"eval_precision": 0.8403657255822574,
"eval_recall": 0.8835794396570389,
"eval_runtime": 51.7386,
"eval_samples_per_second": 289.919,
"eval_steps_per_second": 8.06,
"step": 10000
},
{
"epoch": 0.487228346076847,
"grad_norm": 0.1405647099018097,
"learning_rate": 3.782079112397492e-05,
"loss": 0.035042920112609864,
"step": 10100
},
{
"epoch": 0.49205238910731086,
"grad_norm": 0.4331558346748352,
"learning_rate": 3.7700192957067054e-05,
"loss": 0.032286217212677,
"step": 10200
},
{
"epoch": 0.49687643213777466,
"grad_norm": 0.3009164035320282,
"learning_rate": 3.757959479015919e-05,
"loss": 0.028717076778411864,
"step": 10300
},
{
"epoch": 0.5017004751682385,
"grad_norm": 0.3064032196998596,
"learning_rate": 3.745899662325133e-05,
"loss": 0.029738368988037108,
"step": 10400
},
{
"epoch": 0.5065245181987024,
"grad_norm": 0.34859976172447205,
"learning_rate": 3.733839845634346e-05,
"loss": 0.030203399658203126,
"step": 10500
},
{
"epoch": 0.5113485612291662,
"grad_norm": 0.4682078957557678,
"learning_rate": 3.72178002894356e-05,
"loss": 0.033402538299560545,
"step": 10600
},
{
"epoch": 0.51617260425963,
"grad_norm": 0.43761882185935974,
"learning_rate": 3.709720212252774e-05,
"loss": 0.029749608039855956,
"step": 10700
},
{
"epoch": 0.5209966472900939,
"grad_norm": 1.6429039239883423,
"learning_rate": 3.697660395561988e-05,
"loss": 0.03141383647918701,
"step": 10800
},
{
"epoch": 0.5258206903205577,
"grad_norm": 0.8676751852035522,
"learning_rate": 3.685600578871201e-05,
"loss": 0.028559036254882812,
"step": 10900
},
{
"epoch": 0.5306447333510215,
"grad_norm": 0.31465840339660645,
"learning_rate": 3.673540762180415e-05,
"loss": 0.033083460330963134,
"step": 11000
},
{
"epoch": 0.5354687763814853,
"grad_norm": 0.36446070671081543,
"learning_rate": 3.6614809454896285e-05,
"loss": 0.031009881496429442,
"step": 11100
},
{
"epoch": 0.5402928194119492,
"grad_norm": 1.3212252855300903,
"learning_rate": 3.649421128798842e-05,
"loss": 0.029797291755676268,
"step": 11200
},
{
"epoch": 0.545116862442413,
"grad_norm": 0.5250455141067505,
"learning_rate": 3.6373613121080564e-05,
"loss": 0.02991886615753174,
"step": 11300
},
{
"epoch": 0.5499409054728768,
"grad_norm": 1.0622237920761108,
"learning_rate": 3.62530149541727e-05,
"loss": 0.03123067855834961,
"step": 11400
},
{
"epoch": 0.5547649485033407,
"grad_norm": 0.3193683624267578,
"learning_rate": 3.6132416787264836e-05,
"loss": 0.026964287757873535,
"step": 11500
},
{
"epoch": 0.5595889915338045,
"grad_norm": 0.20829251408576965,
"learning_rate": 3.601181862035697e-05,
"loss": 0.023649635314941405,
"step": 11600
},
{
"epoch": 0.5644130345642683,
"grad_norm": 0.6939885020256042,
"learning_rate": 3.589122045344911e-05,
"loss": 0.03190106630325317,
"step": 11700
},
{
"epoch": 0.5692370775947322,
"grad_norm": 0.28773602843284607,
"learning_rate": 3.5770622286541244e-05,
"loss": 0.030272600650787355,
"step": 11800
},
{
"epoch": 0.574061120625196,
"grad_norm": 0.19230112433433533,
"learning_rate": 3.565002411963339e-05,
"loss": 0.02647350788116455,
"step": 11900
},
{
"epoch": 0.5788851636556598,
"grad_norm": 0.27152901887893677,
"learning_rate": 3.552942595272552e-05,
"loss": 0.025423860549926756,
"step": 12000
},
{
"epoch": 0.5837092066861237,
"grad_norm": 1.2988700866699219,
"learning_rate": 3.540882778581766e-05,
"loss": 0.03884052515029907,
"step": 12100
},
{
"epoch": 0.5885332497165875,
"grad_norm": 0.35957372188568115,
"learning_rate": 3.5288229618909795e-05,
"loss": 0.030858025550842286,
"step": 12200
},
{
"epoch": 0.5933572927470513,
"grad_norm": 1.2661397457122803,
"learning_rate": 3.516763145200193e-05,
"loss": 0.0320624303817749,
"step": 12300
},
{
"epoch": 0.5981813357775151,
"grad_norm": 0.9611783623695374,
"learning_rate": 3.504703328509407e-05,
"loss": 0.02826552391052246,
"step": 12400
},
{
"epoch": 0.603005378807979,
"grad_norm": 0.500732421875,
"learning_rate": 3.492643511818621e-05,
"loss": 0.02862701892852783,
"step": 12500
},
{
"epoch": 0.603005378807979,
"eval_accuracy": 0.9889094144247274,
"eval_f1": 0.8610719994918431,
"eval_loss": 0.051548413932323456,
"eval_precision": 0.8424585171835187,
"eval_recall": 0.8805265664920106,
"eval_runtime": 52.2114,
"eval_samples_per_second": 287.293,
"eval_steps_per_second": 7.987,
"step": 12500
},
{
"epoch": 0.6078294218384428,
"grad_norm": 0.2507345378398895,
"learning_rate": 3.480583695127834e-05,
"loss": 0.030443539619445802,
"step": 12600
},
{
"epoch": 0.6126534648689066,
"grad_norm": 1.4359475374221802,
"learning_rate": 3.4685238784370475e-05,
"loss": 0.027371883392333984,
"step": 12700
},
{
"epoch": 0.6174775078993705,
"grad_norm": 0.9925899505615234,
"learning_rate": 3.456464061746262e-05,
"loss": 0.03037006616592407,
"step": 12800
},
{
"epoch": 0.6223015509298343,
"grad_norm": 0.3496329188346863,
"learning_rate": 3.4444042450554754e-05,
"loss": 0.027849619388580323,
"step": 12900
},
{
"epoch": 0.6271255939602981,
"grad_norm": 0.5233566761016846,
"learning_rate": 3.432344428364689e-05,
"loss": 0.02637479543685913,
"step": 13000
},
{
"epoch": 0.631949636990762,
"grad_norm": 0.2668863832950592,
"learning_rate": 3.4202846116739026e-05,
"loss": 0.02920179605484009,
"step": 13100
},
{
"epoch": 0.6367736800212258,
"grad_norm": 0.20490218698978424,
"learning_rate": 3.408224794983116e-05,
"loss": 0.02677285432815552,
"step": 13200
},
{
"epoch": 0.6415977230516896,
"grad_norm": 0.3680262565612793,
"learning_rate": 3.39616497829233e-05,
"loss": 0.029742326736450195,
"step": 13300
},
{
"epoch": 0.6464217660821534,
"grad_norm": 0.4216366112232208,
"learning_rate": 3.384105161601544e-05,
"loss": 0.027399771213531494,
"step": 13400
},
{
"epoch": 0.6512458091126173,
"grad_norm": 0.13440310955047607,
"learning_rate": 3.372045344910758e-05,
"loss": 0.030674426555633544,
"step": 13500
},
{
"epoch": 0.6560698521430811,
"grad_norm": 0.14363612234592438,
"learning_rate": 3.359985528219971e-05,
"loss": 0.030937159061431886,
"step": 13600
},
{
"epoch": 0.6608938951735449,
"grad_norm": 0.7481242418289185,
"learning_rate": 3.347925711529185e-05,
"loss": 0.03099170923233032,
"step": 13700
},
{
"epoch": 0.6657179382040088,
"grad_norm": 0.2472449243068695,
"learning_rate": 3.3358658948383985e-05,
"loss": 0.028668901920318603,
"step": 13800
},
{
"epoch": 0.6705419812344726,
"grad_norm": 0.23963682353496552,
"learning_rate": 3.323806078147612e-05,
"loss": 0.026947088241577148,
"step": 13900
},
{
"epoch": 0.6753660242649364,
"grad_norm": 0.5909916758537292,
"learning_rate": 3.3117462614568264e-05,
"loss": 0.032423651218414305,
"step": 14000
},
{
"epoch": 0.6801900672954003,
"grad_norm": 0.9357315301895142,
"learning_rate": 3.29968644476604e-05,
"loss": 0.029326210021972655,
"step": 14100
},
{
"epoch": 0.6850141103258641,
"grad_norm": 0.6866487264633179,
"learning_rate": 3.2876266280752536e-05,
"loss": 0.02546304702758789,
"step": 14200
},
{
"epoch": 0.6898381533563279,
"grad_norm": 0.15798236429691315,
"learning_rate": 3.275566811384467e-05,
"loss": 0.022423455715179442,
"step": 14300
},
{
"epoch": 0.6946621963867917,
"grad_norm": 0.4801422357559204,
"learning_rate": 3.263506994693681e-05,
"loss": 0.02844859838485718,
"step": 14400
},
{
"epoch": 0.6994862394172556,
"grad_norm": 2.1221346855163574,
"learning_rate": 3.2514471780028944e-05,
"loss": 0.028370687961578368,
"step": 14500
},
{
"epoch": 0.7043102824477194,
"grad_norm": 0.604657769203186,
"learning_rate": 3.239387361312109e-05,
"loss": 0.031170213222503663,
"step": 14600
},
{
"epoch": 0.7091343254781832,
"grad_norm": 0.7991030812263489,
"learning_rate": 3.2273275446213216e-05,
"loss": 0.02627355098724365,
"step": 14700
},
{
"epoch": 0.7139583685086471,
"grad_norm": 3.5704472064971924,
"learning_rate": 3.215267727930535e-05,
"loss": 0.025982840061187742,
"step": 14800
},
{
"epoch": 0.7187824115391109,
"grad_norm": 0.49702438712120056,
"learning_rate": 3.2032079112397495e-05,
"loss": 0.029045536518096923,
"step": 14900
},
{
"epoch": 0.7236064545695747,
"grad_norm": 0.6950443387031555,
"learning_rate": 3.191148094548963e-05,
"loss": 0.027498562335968018,
"step": 15000
},
{
"epoch": 0.7236064545695747,
"eval_accuracy": 0.9904670236665706,
"eval_f1": 0.8604773530897457,
"eval_loss": 0.04225612059235573,
"eval_precision": 0.837114541955035,
"eval_recall": 0.8851816567791105,
"eval_runtime": 51.3305,
"eval_samples_per_second": 292.224,
"eval_steps_per_second": 8.124,
"step": 15000
},
{
"epoch": 0.7284304976000386,
"grad_norm": 0.28991585969924927,
"learning_rate": 3.179088277858177e-05,
"loss": 0.026163406372070312,
"step": 15100
},
{
"epoch": 0.7332545406305024,
"grad_norm": 0.08772952854633331,
"learning_rate": 3.16702846116739e-05,
"loss": 0.02845370292663574,
"step": 15200
},
{
"epoch": 0.7380785836609662,
"grad_norm": 1.0799998044967651,
"learning_rate": 3.154968644476604e-05,
"loss": 0.02897960424423218,
"step": 15300
},
{
"epoch": 0.74290262669143,
"grad_norm": 0.2629171311855316,
"learning_rate": 3.1429088277858175e-05,
"loss": 0.025154874324798585,
"step": 15400
},
{
"epoch": 0.7477266697218939,
"grad_norm": 0.9425322413444519,
"learning_rate": 3.130849011095032e-05,
"loss": 0.02195771932601929,
"step": 15500
},
{
"epoch": 0.7525507127523577,
"grad_norm": 0.2703983187675476,
"learning_rate": 3.1187891944042454e-05,
"loss": 0.02711749792098999,
"step": 15600
},
{
"epoch": 0.7573747557828215,
"grad_norm": 0.2081318199634552,
"learning_rate": 3.106729377713459e-05,
"loss": 0.0264898681640625,
"step": 15700
},
{
"epoch": 0.7621987988132854,
"grad_norm": 0.41779956221580505,
"learning_rate": 3.0946695610226726e-05,
"loss": 0.027609102725982666,
"step": 15800
},
{
"epoch": 0.7670228418437492,
"grad_norm": 0.19405648112297058,
"learning_rate": 3.082609744331886e-05,
"loss": 0.029054667949676514,
"step": 15900
},
{
"epoch": 0.771846884874213,
"grad_norm": 0.3789653182029724,
"learning_rate": 3.0705499276411e-05,
"loss": 0.02560849666595459,
"step": 16000
},
{
"epoch": 0.776670927904677,
"grad_norm": 0.18012675642967224,
"learning_rate": 3.058490110950314e-05,
"loss": 0.025810339450836182,
"step": 16100
},
{
"epoch": 0.7814949709351408,
"grad_norm": 0.21590501070022583,
"learning_rate": 3.0464302942595273e-05,
"loss": 0.026955347061157226,
"step": 16200
},
{
"epoch": 0.7863190139656046,
"grad_norm": 1.0594650506973267,
"learning_rate": 3.034370477568741e-05,
"loss": 0.02850575923919678,
"step": 16300
},
{
"epoch": 0.7911430569960684,
"grad_norm": 0.512518048286438,
"learning_rate": 3.022310660877955e-05,
"loss": 0.02473912000656128,
"step": 16400
},
{
"epoch": 0.7959671000265323,
"grad_norm": 0.4950084388256073,
"learning_rate": 3.0102508441871685e-05,
"loss": 0.029167954921722413,
"step": 16500
},
{
"epoch": 0.8007911430569961,
"grad_norm": 0.2222454696893692,
"learning_rate": 2.998191027496382e-05,
"loss": 0.02336118459701538,
"step": 16600
},
{
"epoch": 0.8056151860874599,
"grad_norm": 0.30645573139190674,
"learning_rate": 2.986131210805596e-05,
"loss": 0.026411423683166502,
"step": 16700
},
{
"epoch": 0.8104392291179238,
"grad_norm": 0.13581427931785583,
"learning_rate": 2.9740713941148096e-05,
"loss": 0.027823078632354736,
"step": 16800
},
{
"epoch": 0.8152632721483876,
"grad_norm": 0.37023600935935974,
"learning_rate": 2.9620115774240232e-05,
"loss": 0.025036261081695557,
"step": 16900
},
{
"epoch": 0.8200873151788514,
"grad_norm": 0.18537591397762299,
"learning_rate": 2.9499517607332372e-05,
"loss": 0.025412650108337403,
"step": 17000
},
{
"epoch": 0.8249113582093153,
"grad_norm": 0.3948329985141754,
"learning_rate": 2.9378919440424508e-05,
"loss": 0.03059415817260742,
"step": 17100
},
{
"epoch": 0.8297354012397791,
"grad_norm": 1.1231082677841187,
"learning_rate": 2.9258321273516644e-05,
"loss": 0.027097015380859374,
"step": 17200
},
{
"epoch": 0.8345594442702429,
"grad_norm": 0.1559356451034546,
"learning_rate": 2.9137723106608783e-05,
"loss": 0.025851171016693115,
"step": 17300
},
{
"epoch": 0.8393834873007068,
"grad_norm": 0.4749973714351654,
"learning_rate": 2.901712493970092e-05,
"loss": 0.02509115219116211,
"step": 17400
},
{
"epoch": 0.8442075303311706,
"grad_norm": 0.24519965052604675,
"learning_rate": 2.8896526772793052e-05,
"loss": 0.020944011211395264,
"step": 17500
},
{
"epoch": 0.8442075303311706,
"eval_accuracy": 0.9910574454953576,
"eval_f1": 0.8787765520040154,
"eval_loss": 0.042865537106990814,
"eval_precision": 0.8670628648500558,
"eval_recall": 0.890811068289092,
"eval_runtime": 51.479,
"eval_samples_per_second": 291.381,
"eval_steps_per_second": 8.1,
"step": 17500
},
{
"epoch": 0.8490315733616344,
"grad_norm": 0.36921805143356323,
"learning_rate": 2.8775928605885195e-05,
"loss": 0.029057729244232177,
"step": 17600
},
{
"epoch": 0.8538556163920982,
"grad_norm": 0.19858281314373016,
"learning_rate": 2.865533043897733e-05,
"loss": 0.029160046577453615,
"step": 17700
},
{
"epoch": 0.8586796594225621,
"grad_norm": 0.2647104561328888,
"learning_rate": 2.8534732272069463e-05,
"loss": 0.024375016689300536,
"step": 17800
},
{
"epoch": 0.8635037024530259,
"grad_norm": 0.24766811728477478,
"learning_rate": 2.8414134105161606e-05,
"loss": 0.028258707523345947,
"step": 17900
},
{
"epoch": 0.8683277454834897,
"grad_norm": 0.14881408214569092,
"learning_rate": 2.829353593825374e-05,
"loss": 0.02677877902984619,
"step": 18000
},
{
"epoch": 0.8731517885139536,
"grad_norm": 0.36174142360687256,
"learning_rate": 2.8172937771345875e-05,
"loss": 0.02804037570953369,
"step": 18100
},
{
"epoch": 0.8779758315444174,
"grad_norm": 0.8773052096366882,
"learning_rate": 2.8052339604438018e-05,
"loss": 0.022985424995422363,
"step": 18200
},
{
"epoch": 0.8827998745748812,
"grad_norm": 0.14455021917819977,
"learning_rate": 2.793174143753015e-05,
"loss": 0.02391258955001831,
"step": 18300
},
{
"epoch": 0.8876239176053451,
"grad_norm": 0.19167733192443848,
"learning_rate": 2.7811143270622286e-05,
"loss": 0.02640686750411987,
"step": 18400
},
{
"epoch": 0.8924479606358089,
"grad_norm": 0.3121378719806671,
"learning_rate": 2.7690545103714426e-05,
"loss": 0.02407193899154663,
"step": 18500
},
{
"epoch": 0.8972720036662727,
"grad_norm": 0.4771701991558075,
"learning_rate": 2.7569946936806562e-05,
"loss": 0.02530348062515259,
"step": 18600
},
{
"epoch": 0.9020960466967365,
"grad_norm": 0.31514617800712585,
"learning_rate": 2.7449348769898698e-05,
"loss": 0.025359327793121337,
"step": 18700
},
{
"epoch": 0.9069200897272004,
"grad_norm": 0.2182740867137909,
"learning_rate": 2.7328750602990837e-05,
"loss": 0.025950465202331543,
"step": 18800
},
{
"epoch": 0.9117441327576642,
"grad_norm": 0.3909512758255005,
"learning_rate": 2.7208152436082973e-05,
"loss": 0.024720582962036133,
"step": 18900
},
{
"epoch": 0.916568175788128,
"grad_norm": 0.1596415936946869,
"learning_rate": 2.708755426917511e-05,
"loss": 0.025378565788269043,
"step": 19000
},
{
"epoch": 0.9213922188185919,
"grad_norm": 0.1132221445441246,
"learning_rate": 2.696695610226725e-05,
"loss": 0.02627143621444702,
"step": 19100
},
{
"epoch": 0.9262162618490557,
"grad_norm": 0.38963910937309265,
"learning_rate": 2.6846357935359385e-05,
"loss": 0.026437394618988037,
"step": 19200
},
{
"epoch": 0.9310403048795195,
"grad_norm": 1.0219396352767944,
"learning_rate": 2.672575976845152e-05,
"loss": 0.02680544376373291,
"step": 19300
},
{
"epoch": 0.9358643479099834,
"grad_norm": 0.1513880342245102,
"learning_rate": 2.660516160154366e-05,
"loss": 0.02793146848678589,
"step": 19400
},
{
"epoch": 0.9406883909404472,
"grad_norm": 8.48257064819336,
"learning_rate": 2.6484563434635796e-05,
"loss": 0.026851544380187987,
"step": 19500
},
{
"epoch": 0.945512433970911,
"grad_norm": 0.12812338769435883,
"learning_rate": 2.636396526772793e-05,
"loss": 0.026399703025817872,
"step": 19600
},
{
"epoch": 0.9503364770013748,
"grad_norm": 1.4106616973876953,
"learning_rate": 2.6243367100820072e-05,
"loss": 0.026023907661437987,
"step": 19700
},
{
"epoch": 0.9551605200318387,
"grad_norm": 0.12191484868526459,
"learning_rate": 2.6122768933912208e-05,
"loss": 0.025158686637878416,
"step": 19800
},
{
"epoch": 0.9599845630623025,
"grad_norm": 0.3431759774684906,
"learning_rate": 2.600217076700434e-05,
"loss": 0.023687126636505126,
"step": 19900
},
{
"epoch": 0.9648086060927663,
"grad_norm": 0.2409236878156662,
"learning_rate": 2.5881572600096483e-05,
"loss": 0.02651404857635498,
"step": 20000
},
{
"epoch": 0.9648086060927663,
"eval_accuracy": 0.991921840043258,
"eval_f1": 0.8712219366623479,
"eval_loss": 0.037873830646276474,
"eval_precision": 0.8549691512422878,
"eval_recall": 0.8881046204477547,
"eval_runtime": 51.9311,
"eval_samples_per_second": 288.844,
"eval_steps_per_second": 8.03,
"step": 20000
},
{
"epoch": 0.9696326491232302,
"grad_norm": 0.2915472686290741,
"learning_rate": 2.5760974433188616e-05,
"loss": 0.02347031593322754,
"step": 20100
},
{
"epoch": 0.974456692153694,
"grad_norm": 0.4358366131782532,
"learning_rate": 2.5640376266280752e-05,
"loss": 0.025518434047698976,
"step": 20200
},
{
"epoch": 0.9792807351841578,
"grad_norm": 0.33405473828315735,
"learning_rate": 2.551977809937289e-05,
"loss": 0.027425525188446046,
"step": 20300
},
{
"epoch": 0.9841047782146217,
"grad_norm": 0.33355358242988586,
"learning_rate": 2.5399179932465027e-05,
"loss": 0.025573320388793945,
"step": 20400
},
{
"epoch": 0.9889288212450855,
"grad_norm": 0.7017316818237305,
"learning_rate": 2.5278581765557163e-05,
"loss": 0.027230489253997802,
"step": 20500
},
{
"epoch": 0.9937528642755493,
"grad_norm": 0.26649072766304016,
"learning_rate": 2.5157983598649303e-05,
"loss": 0.024173839092254637,
"step": 20600
},
{
"epoch": 0.9985769073060131,
"grad_norm": 1.540326714515686,
"learning_rate": 2.503738543174144e-05,
"loss": 0.02483781099319458,
"step": 20700
},
{
"epoch": 1.0033768301213246,
"grad_norm": 0.19430163502693176,
"learning_rate": 2.4916787264833578e-05,
"loss": 0.02256415843963623,
"step": 20800
},
{
"epoch": 1.0082008731517884,
"grad_norm": 0.12203595787286758,
"learning_rate": 2.479618909792571e-05,
"loss": 0.019598615169525147,
"step": 20900
},
{
"epoch": 1.0130249161822524,
"grad_norm": 0.26934438943862915,
"learning_rate": 2.467559093101785e-05,
"loss": 0.021361682415008545,
"step": 21000
},
{
"epoch": 1.0178489592127162,
"grad_norm": 0.13813284039497375,
"learning_rate": 2.455499276410999e-05,
"loss": 0.021079394817352295,
"step": 21100
},
{
"epoch": 1.02267300224318,
"grad_norm": 0.1430957019329071,
"learning_rate": 2.4434394597202122e-05,
"loss": 0.020538933277130127,
"step": 21200
},
{
"epoch": 1.0274970452736438,
"grad_norm": 0.7105738520622253,
"learning_rate": 2.4313796430294262e-05,
"loss": 0.021446900367736818,
"step": 21300
},
{
"epoch": 1.0323210883041076,
"grad_norm": 0.3810221552848816,
"learning_rate": 2.4193198263386398e-05,
"loss": 0.020385611057281493,
"step": 21400
},
{
"epoch": 1.0371451313345714,
"grad_norm": 0.14890126883983612,
"learning_rate": 2.4072600096478534e-05,
"loss": 0.021193060874938965,
"step": 21500
},
{
"epoch": 1.0419691743650352,
"grad_norm": 0.465364009141922,
"learning_rate": 2.3952001929570673e-05,
"loss": 0.02409552574157715,
"step": 21600
},
{
"epoch": 1.0467932173954992,
"grad_norm": 0.18177862465381622,
"learning_rate": 2.383140376266281e-05,
"loss": 0.024446609020233153,
"step": 21700
},
{
"epoch": 1.051617260425963,
"grad_norm": 0.38837435841560364,
"learning_rate": 2.3710805595754945e-05,
"loss": 0.02129380464553833,
"step": 21800
},
{
"epoch": 1.0564413034564268,
"grad_norm": 0.13987022638320923,
"learning_rate": 2.3590207428847085e-05,
"loss": 0.022437899112701415,
"step": 21900
},
{
"epoch": 1.0612653464868906,
"grad_norm": 0.21862603724002838,
"learning_rate": 2.3469609261939217e-05,
"loss": 0.02214601993560791,
"step": 22000
},
{
"epoch": 1.0660893895173544,
"grad_norm": 0.18493451178073883,
"learning_rate": 2.3349011095031357e-05,
"loss": 0.022123863697052003,
"step": 22100
},
{
"epoch": 1.0709134325478182,
"grad_norm": 0.44324392080307007,
"learning_rate": 2.3228412928123493e-05,
"loss": 0.01999701380729675,
"step": 22200
},
{
"epoch": 1.0757374755782823,
"grad_norm": 0.13552401959896088,
"learning_rate": 2.310781476121563e-05,
"loss": 0.018710813522338866,
"step": 22300
},
{
"epoch": 1.080561518608746,
"grad_norm": 0.5217646360397339,
"learning_rate": 2.298721659430777e-05,
"loss": 0.022998554706573485,
"step": 22400
},
{
"epoch": 1.0853855616392099,
"grad_norm": 0.13017535209655762,
"learning_rate": 2.2866618427399904e-05,
"loss": 0.022265849113464357,
"step": 22500
},
{
"epoch": 1.0853855616392099,
"eval_accuracy": 0.9918296662757423,
"eval_f1": 0.881381541146177,
"eval_loss": 0.03710692375898361,
"eval_precision": 0.8665369486986358,
"eval_recall": 0.8967436019573031,
"eval_runtime": 51.6329,
"eval_samples_per_second": 290.512,
"eval_steps_per_second": 8.076,
"step": 22500
},
{
"epoch": 1.0902096046696736,
"grad_norm": 0.26805901527404785,
"learning_rate": 2.274602026049204e-05,
"loss": 0.02201695680618286,
"step": 22600
},
{
"epoch": 1.0950336477001374,
"grad_norm": 0.20556294918060303,
"learning_rate": 2.262542209358418e-05,
"loss": 0.018640589714050294,
"step": 22700
},
{
"epoch": 1.0998576907306012,
"grad_norm": 0.16025076806545258,
"learning_rate": 2.2504823926676316e-05,
"loss": 0.02018498182296753,
"step": 22800
},
{
"epoch": 1.104681733761065,
"grad_norm": 0.22326083481311798,
"learning_rate": 2.2384225759768452e-05,
"loss": 0.020831646919250487,
"step": 22900
},
{
"epoch": 1.109505776791529,
"grad_norm": 0.18669798970222473,
"learning_rate": 2.2263627592860588e-05,
"loss": 0.020945420265197755,
"step": 23000
},
{
"epoch": 1.1143298198219929,
"grad_norm": 0.22091829776763916,
"learning_rate": 2.2143029425952727e-05,
"loss": 0.019859465360641478,
"step": 23100
},
{
"epoch": 1.1191538628524567,
"grad_norm": 0.28057217597961426,
"learning_rate": 2.2022431259044863e-05,
"loss": 0.022815022468566894,
"step": 23200
},
{
"epoch": 1.1239779058829205,
"grad_norm": 0.2595389187335968,
"learning_rate": 2.1901833092137e-05,
"loss": 0.021522111892700195,
"step": 23300
},
{
"epoch": 1.1288019489133843,
"grad_norm": 0.5332016348838806,
"learning_rate": 2.178123492522914e-05,
"loss": 0.019616042375564576,
"step": 23400
},
{
"epoch": 1.133625991943848,
"grad_norm": 0.10604680329561234,
"learning_rate": 2.1660636758321275e-05,
"loss": 0.022921762466430663,
"step": 23500
},
{
"epoch": 1.138450034974312,
"grad_norm": 0.3797323703765869,
"learning_rate": 2.154003859141341e-05,
"loss": 0.021188838481903075,
"step": 23600
},
{
"epoch": 1.1432740780047759,
"grad_norm": 0.3557288348674774,
"learning_rate": 2.141944042450555e-05,
"loss": 0.020320808887481688,
"step": 23700
},
{
"epoch": 1.1480981210352397,
"grad_norm": 0.5364207029342651,
"learning_rate": 2.1298842257597683e-05,
"loss": 0.02103010892868042,
"step": 23800
},
{
"epoch": 1.1529221640657035,
"grad_norm": 0.20504723489284515,
"learning_rate": 2.1178244090689822e-05,
"loss": 0.024899210929870606,
"step": 23900
},
{
"epoch": 1.1577462070961673,
"grad_norm": 0.3030504882335663,
"learning_rate": 2.1057645923781962e-05,
"loss": 0.018901402950286864,
"step": 24000
},
{
"epoch": 1.162570250126631,
"grad_norm": 0.2158869206905365,
"learning_rate": 2.0937047756874094e-05,
"loss": 0.018166555166244505,
"step": 24100
},
{
"epoch": 1.1673942931570949,
"grad_norm": 0.2794812321662903,
"learning_rate": 2.0816449589966234e-05,
"loss": 0.0199416983127594,
"step": 24200
},
{
"epoch": 1.1722183361875589,
"grad_norm": 0.10596510767936707,
"learning_rate": 2.069585142305837e-05,
"loss": 0.019620640277862547,
"step": 24300
},
{
"epoch": 1.1770423792180227,
"grad_norm": 1.3163063526153564,
"learning_rate": 2.0575253256150506e-05,
"loss": 0.021227221488952636,
"step": 24400
},
{
"epoch": 1.1818664222484865,
"grad_norm": 0.29747480154037476,
"learning_rate": 2.0454655089242645e-05,
"loss": 0.02037898302078247,
"step": 24500
},
{
"epoch": 1.1866904652789503,
"grad_norm": 0.722373902797699,
"learning_rate": 2.033405692233478e-05,
"loss": 0.020667204856872557,
"step": 24600
},
{
"epoch": 1.191514508309414,
"grad_norm": 0.12926365435123444,
"learning_rate": 2.0213458755426917e-05,
"loss": 0.018228678703308104,
"step": 24700
},
{
"epoch": 1.1963385513398779,
"grad_norm": 0.33814650774002075,
"learning_rate": 2.0092860588519057e-05,
"loss": 0.022069990634918213,
"step": 24800
},
{
"epoch": 1.201162594370342,
"grad_norm": 0.1276799589395523,
"learning_rate": 1.9972262421611193e-05,
"loss": 0.022927966117858887,
"step": 24900
},
{
"epoch": 1.2059866374008057,
"grad_norm": 0.18511514365673065,
"learning_rate": 1.985166425470333e-05,
"loss": 0.02195762872695923,
"step": 25000
},
{
"epoch": 1.2059866374008057,
"eval_accuracy": 0.9925814983886582,
"eval_f1": 0.8818330116962181,
"eval_loss": 0.034407418221235275,
"eval_precision": 0.8686902636277702,
"eval_recall": 0.8953795522452691,
"eval_runtime": 51.6069,
"eval_samples_per_second": 290.659,
"eval_steps_per_second": 8.08,
"step": 25000
},
{
"epoch": 1.2108106804312695,
"grad_norm": 0.28672105073928833,
"learning_rate": 1.9731066087795465e-05,
"loss": 0.022950747013092042,
"step": 25100
},
{
"epoch": 1.2156347234617333,
"grad_norm": 0.15472128987312317,
"learning_rate": 1.9610467920887604e-05,
"loss": 0.01865388870239258,
"step": 25200
},
{
"epoch": 1.220458766492197,
"grad_norm": 0.26068541407585144,
"learning_rate": 1.948986975397974e-05,
"loss": 0.021750383377075196,
"step": 25300
},
{
"epoch": 1.2252828095226609,
"grad_norm": 1.417925238609314,
"learning_rate": 1.9369271587071876e-05,
"loss": 0.021318423748016357,
"step": 25400
},
{
"epoch": 1.2301068525531247,
"grad_norm": 0.7924548387527466,
"learning_rate": 1.9248673420164016e-05,
"loss": 0.01893375873565674,
"step": 25500
},
{
"epoch": 1.2349308955835885,
"grad_norm": 0.17900590598583221,
"learning_rate": 1.9128075253256152e-05,
"loss": 0.01870368480682373,
"step": 25600
},
{
"epoch": 1.2397549386140525,
"grad_norm": 0.1943436861038208,
"learning_rate": 1.9007477086348288e-05,
"loss": 0.021407904624938964,
"step": 25700
},
{
"epoch": 1.2445789816445163,
"grad_norm": 0.1924910992383957,
"learning_rate": 1.8886878919440427e-05,
"loss": 0.02078892707824707,
"step": 25800
},
{
"epoch": 1.24940302467498,
"grad_norm": 0.1958584040403366,
"learning_rate": 1.876628075253256e-05,
"loss": 0.018969409465789795,
"step": 25900
},
{
"epoch": 1.254227067705444,
"grad_norm": 0.0961497351527214,
"learning_rate": 1.86456825856247e-05,
"loss": 0.024467270374298095,
"step": 26000
},
{
"epoch": 1.2590511107359077,
"grad_norm": 0.43624669313430786,
"learning_rate": 1.8525084418716835e-05,
"loss": 0.022932977676391603,
"step": 26100
},
{
"epoch": 1.2638751537663717,
"grad_norm": 0.17412593960762024,
"learning_rate": 1.840448625180897e-05,
"loss": 0.017692303657531737,
"step": 26200
},
{
"epoch": 1.2686991967968355,
"grad_norm": 0.4037439227104187,
"learning_rate": 1.828388808490111e-05,
"loss": 0.02168938159942627,
"step": 26300
},
{
"epoch": 1.2735232398272993,
"grad_norm": 0.20430967211723328,
"learning_rate": 1.8163289917993247e-05,
"loss": 0.018443295955657957,
"step": 26400
},
{
"epoch": 1.2783472828577631,
"grad_norm": 0.2996050715446472,
"learning_rate": 1.8042691751085383e-05,
"loss": 0.019166781902313232,
"step": 26500
},
{
"epoch": 1.283171325888227,
"grad_norm": 0.3298969864845276,
"learning_rate": 1.7922093584177522e-05,
"loss": 0.01918817639350891,
"step": 26600
},
{
"epoch": 1.2879953689186907,
"grad_norm": 0.28155457973480225,
"learning_rate": 1.780149541726966e-05,
"loss": 0.021806249618530272,
"step": 26700
},
{
"epoch": 1.2928194119491545,
"grad_norm": 0.2508911192417145,
"learning_rate": 1.7680897250361794e-05,
"loss": 0.020128331184387206,
"step": 26800
},
{
"epoch": 1.2976434549796183,
"grad_norm": 0.2319284975528717,
"learning_rate": 1.7560299083453934e-05,
"loss": 0.018995124101638793,
"step": 26900
},
{
"epoch": 1.3024674980100823,
"grad_norm": 0.12885890901088715,
"learning_rate": 1.743970091654607e-05,
"loss": 0.019624507427215575,
"step": 27000
},
{
"epoch": 1.3072915410405461,
"grad_norm": 0.1364358514547348,
"learning_rate": 1.7319102749638206e-05,
"loss": 0.01931032657623291,
"step": 27100
},
{
"epoch": 1.31211558407101,
"grad_norm": 1.741729974746704,
"learning_rate": 1.7198504582730342e-05,
"loss": 0.020110676288604735,
"step": 27200
},
{
"epoch": 1.3169396271014737,
"grad_norm": 0.5716229677200317,
"learning_rate": 1.707790641582248e-05,
"loss": 0.01891273021697998,
"step": 27300
},
{
"epoch": 1.3217636701319375,
"grad_norm": 0.9453685879707336,
"learning_rate": 1.6957308248914617e-05,
"loss": 0.020238091945648195,
"step": 27400
},
{
"epoch": 1.3265877131624015,
"grad_norm": 0.14117585122585297,
"learning_rate": 1.6836710082006753e-05,
"loss": 0.022481341361999512,
"step": 27500
},
{
"epoch": 1.3265877131624015,
"eval_accuracy": 0.992788783174592,
"eval_f1": 0.889221237047324,
"eval_loss": 0.03323497995734215,
"eval_precision": 0.877614709851552,
"eval_recall": 0.9011388732516347,
"eval_runtime": 52.1434,
"eval_samples_per_second": 287.668,
"eval_steps_per_second": 7.997,
"step": 27500
},
{
"epoch": 1.3314117561928653,
"grad_norm": 0.25555455684661865,
"learning_rate": 1.6716111915098893e-05,
"loss": 0.018107813596725465,
"step": 27600
},
{
"epoch": 1.3362357992233291,
"grad_norm": 0.20916156470775604,
"learning_rate": 1.659551374819103e-05,
"loss": 0.019892256259918212,
"step": 27700
},
{
"epoch": 1.341059842253793,
"grad_norm": 0.15623128414154053,
"learning_rate": 1.6474915581283165e-05,
"loss": 0.017413014173507692,
"step": 27800
},
{
"epoch": 1.3458838852842567,
"grad_norm": 0.15014760196208954,
"learning_rate": 1.6354317414375304e-05,
"loss": 0.020558416843414307,
"step": 27900
},
{
"epoch": 1.3507079283147205,
"grad_norm": 0.4308200180530548,
"learning_rate": 1.6233719247467437e-05,
"loss": 0.017611211538314818,
"step": 28000
},
{
"epoch": 1.3555319713451843,
"grad_norm": 0.15497736632823944,
"learning_rate": 1.6113121080559576e-05,
"loss": 0.017815752029418944,
"step": 28100
},
{
"epoch": 1.3603560143756481,
"grad_norm": 0.4078068733215332,
"learning_rate": 1.5992522913651712e-05,
"loss": 0.01794821858406067,
"step": 28200
},
{
"epoch": 1.365180057406112,
"grad_norm": 0.44584575295448303,
"learning_rate": 1.587192474674385e-05,
"loss": 0.019282504320144653,
"step": 28300
},
{
"epoch": 1.370004100436576,
"grad_norm": 0.550137460231781,
"learning_rate": 1.5751326579835988e-05,
"loss": 0.020532405376434325,
"step": 28400
},
{
"epoch": 1.3748281434670397,
"grad_norm": 0.15548627078533173,
"learning_rate": 1.5630728412928124e-05,
"loss": 0.02003218173980713,
"step": 28500
},
{
"epoch": 1.3796521864975035,
"grad_norm": 0.15787184238433838,
"learning_rate": 1.551013024602026e-05,
"loss": 0.017421540021896362,
"step": 28600
},
{
"epoch": 1.3844762295279673,
"grad_norm": 0.1659448891878128,
"learning_rate": 1.53895320791124e-05,
"loss": 0.019184736013412477,
"step": 28700
},
{
"epoch": 1.3893002725584314,
"grad_norm": 0.45317932963371277,
"learning_rate": 1.5268933912204535e-05,
"loss": 0.018715277910232545,
"step": 28800
},
{
"epoch": 1.3941243155888952,
"grad_norm": 0.16978032886981964,
"learning_rate": 1.5148335745296671e-05,
"loss": 0.019075859785079956,
"step": 28900
},
{
"epoch": 1.398948358619359,
"grad_norm": 0.31665724515914917,
"learning_rate": 1.502773757838881e-05,
"loss": 0.018271996974945068,
"step": 29000
},
{
"epoch": 1.4037724016498228,
"grad_norm": 0.3004429042339325,
"learning_rate": 1.4907139411480947e-05,
"loss": 0.019862807989120483,
"step": 29100
},
{
"epoch": 1.4085964446802866,
"grad_norm": 0.20420145988464355,
"learning_rate": 1.4786541244573083e-05,
"loss": 0.018257253170013428,
"step": 29200
},
{
"epoch": 1.4134204877107504,
"grad_norm": 0.122472383081913,
"learning_rate": 1.466594307766522e-05,
"loss": 0.025323121547698973,
"step": 29300
},
{
"epoch": 1.4182445307412141,
"grad_norm": 0.9836609363555908,
"learning_rate": 1.4545344910757358e-05,
"loss": 0.019051806926727297,
"step": 29400
},
{
"epoch": 1.423068573771678,
"grad_norm": 0.16322240233421326,
"learning_rate": 1.4424746743849493e-05,
"loss": 0.019138084650039675,
"step": 29500
},
{
"epoch": 1.4278926168021417,
"grad_norm": 0.2133868932723999,
"learning_rate": 1.430414857694163e-05,
"loss": 0.017339247465133666,
"step": 29600
},
{
"epoch": 1.4327166598326058,
"grad_norm": 0.2609802186489105,
"learning_rate": 1.418355041003377e-05,
"loss": 0.01856675386428833,
"step": 29700
},
{
"epoch": 1.4375407028630696,
"grad_norm": 0.2504105269908905,
"learning_rate": 1.4062952243125904e-05,
"loss": 0.022142369747161866,
"step": 29800
},
{
"epoch": 1.4423647458935334,
"grad_norm": 0.24993453919887543,
"learning_rate": 1.3942354076218042e-05,
"loss": 0.01751198887825012,
"step": 29900
},
{
"epoch": 1.4471887889239972,
"grad_norm": 0.126504585146904,
"learning_rate": 1.382175590931018e-05,
"loss": 0.018583767414093018,
"step": 30000
},
{
"epoch": 1.4471887889239972,
"eval_accuracy": 0.9919800326983255,
"eval_f1": 0.8861820618929587,
"eval_loss": 0.03896835818886757,
"eval_precision": 0.8710864791383457,
"eval_recall": 0.9018100723162863,
"eval_runtime": 51.5929,
"eval_samples_per_second": 290.738,
"eval_steps_per_second": 8.083,
"step": 30000
},
{
"epoch": 1.452012831954461,
"grad_norm": 0.7999847531318665,
"learning_rate": 1.3701157742402316e-05,
"loss": 0.02096844673156738,
"step": 30100
},
{
"epoch": 1.456836874984925,
"grad_norm": 0.11665287613868713,
"learning_rate": 1.3580559575494453e-05,
"loss": 0.01827834129333496,
"step": 30200
},
{
"epoch": 1.4616609180153888,
"grad_norm": 0.22630015015602112,
"learning_rate": 1.3459961408586591e-05,
"loss": 0.017943538427352905,
"step": 30300
},
{
"epoch": 1.4664849610458526,
"grad_norm": 0.21670867502689362,
"learning_rate": 1.3339363241678725e-05,
"loss": 0.020002198219299317,
"step": 30400
},
{
"epoch": 1.4713090040763164,
"grad_norm": 0.25701120495796204,
"learning_rate": 1.3218765074770865e-05,
"loss": 0.01862887978553772,
"step": 30500
},
{
"epoch": 1.4761330471067802,
"grad_norm": 0.14079546928405762,
"learning_rate": 1.3098166907863003e-05,
"loss": 0.02005054712295532,
"step": 30600
},
{
"epoch": 1.480957090137244,
"grad_norm": 0.31404340267181396,
"learning_rate": 1.2977568740955137e-05,
"loss": 0.018181434869766235,
"step": 30700
},
{
"epoch": 1.4857811331677078,
"grad_norm": 0.1643984615802765,
"learning_rate": 1.2856970574047275e-05,
"loss": 0.01885037899017334,
"step": 30800
},
{
"epoch": 1.4906051761981716,
"grad_norm": 0.1323440670967102,
"learning_rate": 1.2736372407139412e-05,
"loss": 0.018592065572738646,
"step": 30900
},
{
"epoch": 1.4954292192286356,
"grad_norm": 0.2534601092338562,
"learning_rate": 1.2615774240231548e-05,
"loss": 0.018988220691680907,
"step": 31000
},
{
"epoch": 1.5002532622590994,
"grad_norm": 0.2373075932264328,
"learning_rate": 1.2495176073323686e-05,
"loss": 0.019056109189987184,
"step": 31100
},
{
"epoch": 1.5050773052895632,
"grad_norm": 0.151611328125,
"learning_rate": 1.2374577906415822e-05,
"loss": 0.018509570360183716,
"step": 31200
},
{
"epoch": 1.509901348320027,
"grad_norm": 0.8050407767295837,
"learning_rate": 1.225397973950796e-05,
"loss": 0.01847294807434082,
"step": 31300
},
{
"epoch": 1.514725391350491,
"grad_norm": 0.4117303788661957,
"learning_rate": 1.2133381572600098e-05,
"loss": 0.016792016029357912,
"step": 31400
},
{
"epoch": 1.5195494343809548,
"grad_norm": 0.3043079674243927,
"learning_rate": 1.2012783405692234e-05,
"loss": 0.02204496622085571,
"step": 31500
},
{
"epoch": 1.5243734774114186,
"grad_norm": 0.14158490300178528,
"learning_rate": 1.1892185238784371e-05,
"loss": 0.020702006816864012,
"step": 31600
},
{
"epoch": 1.5291975204418824,
"grad_norm": 0.4225039482116699,
"learning_rate": 1.1771587071876507e-05,
"loss": 0.019063092470169067,
"step": 31700
},
{
"epoch": 1.5340215634723462,
"grad_norm": 0.3363790810108185,
"learning_rate": 1.1650988904968645e-05,
"loss": 0.017193055152893065,
"step": 31800
},
{
"epoch": 1.53884560650281,
"grad_norm": 0.12055296450853348,
"learning_rate": 1.1530390738060783e-05,
"loss": 0.019255086183547973,
"step": 31900
},
{
"epoch": 1.5436696495332738,
"grad_norm": 0.20997734367847443,
"learning_rate": 1.1409792571152919e-05,
"loss": 0.020008976459503173,
"step": 32000
},
{
"epoch": 1.5484936925637376,
"grad_norm": 0.25966885685920715,
"learning_rate": 1.1289194404245055e-05,
"loss": 0.018391019105911253,
"step": 32100
},
{
"epoch": 1.5533177355942014,
"grad_norm": 1.1394667625427246,
"learning_rate": 1.1168596237337194e-05,
"loss": 0.02040395259857178,
"step": 32200
},
{
"epoch": 1.5581417786246652,
"grad_norm": 0.11998942494392395,
"learning_rate": 1.104799807042933e-05,
"loss": 0.017555311918258668,
"step": 32300
},
{
"epoch": 1.5629658216551292,
"grad_norm": 0.11283577978610992,
"learning_rate": 1.0927399903521466e-05,
"loss": 0.018316521644592285,
"step": 32400
},
{
"epoch": 1.567789864685593,
"grad_norm": 0.8829536437988281,
"learning_rate": 1.0806801736613604e-05,
"loss": 0.019955469369888304,
"step": 32500
},
{
"epoch": 1.567789864685593,
"eval_accuracy": 0.9931243466600177,
"eval_f1": 0.8941826120457173,
"eval_loss": 0.031499363481998444,
"eval_precision": 0.8840407973253206,
"eval_recall": 0.9045598233230849,
"eval_runtime": 52.2852,
"eval_samples_per_second": 286.888,
"eval_steps_per_second": 7.975,
"step": 32500
},
{
"epoch": 1.5726139077160568,
"grad_norm": 0.6669954061508179,
"learning_rate": 1.0686203569705742e-05,
"loss": 0.018743941783905028,
"step": 32600
},
{
"epoch": 1.5774379507465208,
"grad_norm": 0.2682594358921051,
"learning_rate": 1.0565605402797878e-05,
"loss": 0.018420085906982422,
"step": 32700
},
{
"epoch": 1.5822619937769846,
"grad_norm": 0.16349567472934723,
"learning_rate": 1.0445007235890016e-05,
"loss": 0.02064610242843628,
"step": 32800
},
{
"epoch": 1.5870860368074484,
"grad_norm": 0.6123493313789368,
"learning_rate": 1.0324409068982152e-05,
"loss": 0.0181715726852417,
"step": 32900
},
{
"epoch": 1.5919100798379122,
"grad_norm": 0.2621537744998932,
"learning_rate": 1.020381090207429e-05,
"loss": 0.01923044562339783,
"step": 33000
},
{
"epoch": 1.596734122868376,
"grad_norm": 0.09542077034711838,
"learning_rate": 1.0083212735166427e-05,
"loss": 0.017349140644073488,
"step": 33100
},
{
"epoch": 1.6015581658988398,
"grad_norm": 0.18252168595790863,
"learning_rate": 9.962614568258563e-06,
"loss": 0.019681899547576903,
"step": 33200
},
{
"epoch": 1.6063822089293036,
"grad_norm": 0.19957713782787323,
"learning_rate": 9.8420164013507e-06,
"loss": 0.019357409477233887,
"step": 33300
},
{
"epoch": 1.6112062519597674,
"grad_norm": 0.5182835459709167,
"learning_rate": 9.721418234442837e-06,
"loss": 0.01951758861541748,
"step": 33400
},
{
"epoch": 1.6160302949902312,
"grad_norm": 0.4481932520866394,
"learning_rate": 9.600820067534975e-06,
"loss": 0.017961139678955077,
"step": 33500
},
{
"epoch": 1.620854338020695,
"grad_norm": 0.15489070117473602,
"learning_rate": 9.48022190062711e-06,
"loss": 0.0193113911151886,
"step": 33600
},
{
"epoch": 1.625678381051159,
"grad_norm": 0.2616223394870758,
"learning_rate": 9.359623733719248e-06,
"loss": 0.022246689796447755,
"step": 33700
},
{
"epoch": 1.6305024240816228,
"grad_norm": 0.12462881952524185,
"learning_rate": 9.239025566811384e-06,
"loss": 0.01692581295967102,
"step": 33800
},
{
"epoch": 1.6353264671120866,
"grad_norm": 0.48885273933410645,
"learning_rate": 9.118427399903522e-06,
"loss": 0.017899035215377807,
"step": 33900
},
{
"epoch": 1.6401505101425504,
"grad_norm": 1.0648194551467896,
"learning_rate": 8.99782923299566e-06,
"loss": 0.01802402377128601,
"step": 34000
},
{
"epoch": 1.6449745531730144,
"grad_norm": 0.2746858298778534,
"learning_rate": 8.877231066087796e-06,
"loss": 0.020917999744415283,
"step": 34100
},
{
"epoch": 1.6497985962034782,
"grad_norm": 0.12474814057350159,
"learning_rate": 8.756632899179932e-06,
"loss": 0.015847266912460328,
"step": 34200
},
{
"epoch": 1.654622639233942,
"grad_norm": 0.11499933153390884,
"learning_rate": 8.63603473227207e-06,
"loss": 0.017190442085266114,
"step": 34300
},
{
"epoch": 1.6594466822644058,
"grad_norm": 0.1851770579814911,
"learning_rate": 8.515436565364207e-06,
"loss": 0.018469662666320802,
"step": 34400
},
{
"epoch": 1.6642707252948696,
"grad_norm": 0.2300252914428711,
"learning_rate": 8.394838398456343e-06,
"loss": 0.01794400453567505,
"step": 34500
},
{
"epoch": 1.6690947683253334,
"grad_norm": 0.11766080558300018,
"learning_rate": 8.274240231548481e-06,
"loss": 0.018363571166992186,
"step": 34600
},
{
"epoch": 1.6739188113557972,
"grad_norm": 0.20575584471225739,
"learning_rate": 8.153642064640619e-06,
"loss": 0.016927268505096436,
"step": 34700
},
{
"epoch": 1.678742854386261,
"grad_norm": 0.35185614228248596,
"learning_rate": 8.033043897732755e-06,
"loss": 0.01612231135368347,
"step": 34800
},
{
"epoch": 1.6835668974167248,
"grad_norm": 0.1776873916387558,
"learning_rate": 7.912445730824891e-06,
"loss": 0.01680509090423584,
"step": 34900
},
{
"epoch": 1.6883909404471886,
"grad_norm": 0.25137367844581604,
"learning_rate": 7.791847563917029e-06,
"loss": 0.016988718509674074,
"step": 35000
},
{
"epoch": 1.6883909404471886,
"eval_accuracy": 0.993225440469551,
"eval_f1": 0.89652071512686,
"eval_loss": 0.031313586980104446,
"eval_precision": 0.886667796035914,
"eval_recall": 0.9065950720997705,
"eval_runtime": 51.6493,
"eval_samples_per_second": 290.42,
"eval_steps_per_second": 8.074,
"step": 35000
},
{
"epoch": 1.6932149834776526,
"grad_norm": 0.14376111328601837,
"learning_rate": 7.671249397009166e-06,
"loss": 0.019529181718826293,
"step": 35100
},
{
"epoch": 1.6980390265081164,
"grad_norm": 0.8683088421821594,
"learning_rate": 7.5506512301013025e-06,
"loss": 0.019479182958602907,
"step": 35200
},
{
"epoch": 1.7028630695385802,
"grad_norm": 0.11346932500600815,
"learning_rate": 7.43005306319344e-06,
"loss": 0.01975212812423706,
"step": 35300
},
{
"epoch": 1.7076871125690443,
"grad_norm": 0.8985689282417297,
"learning_rate": 7.309454896285576e-06,
"loss": 0.016446800231933595,
"step": 35400
},
{
"epoch": 1.712511155599508,
"grad_norm": 0.6181161403656006,
"learning_rate": 7.188856729377713e-06,
"loss": 0.016956570148468016,
"step": 35500
},
{
"epoch": 1.7173351986299719,
"grad_norm": 0.27897560596466064,
"learning_rate": 7.068258562469851e-06,
"loss": 0.020130460262298585,
"step": 35600
},
{
"epoch": 1.7221592416604357,
"grad_norm": 0.15588901937007904,
"learning_rate": 6.947660395561988e-06,
"loss": 0.016974217891693115,
"step": 35700
},
{
"epoch": 1.7269832846908995,
"grad_norm": 0.5564957857131958,
"learning_rate": 6.827062228654124e-06,
"loss": 0.017226357460021973,
"step": 35800
},
{
"epoch": 1.7318073277213633,
"grad_norm": 0.12989383935928345,
"learning_rate": 6.706464061746262e-06,
"loss": 0.015787020921707154,
"step": 35900
},
{
"epoch": 1.736631370751827,
"grad_norm": 0.24173200130462646,
"learning_rate": 6.585865894838398e-06,
"loss": 0.01873793125152588,
"step": 36000
},
{
"epoch": 1.7414554137822909,
"grad_norm": 0.21657347679138184,
"learning_rate": 6.465267727930535e-06,
"loss": 0.01680638313293457,
"step": 36100
},
{
"epoch": 1.7462794568127546,
"grad_norm": 0.12039454281330109,
"learning_rate": 6.344669561022673e-06,
"loss": 0.017534868717193605,
"step": 36200
},
{
"epoch": 1.7511034998432184,
"grad_norm": 0.08835107833147049,
"learning_rate": 6.22407139411481e-06,
"loss": 0.015722684860229492,
"step": 36300
},
{
"epoch": 1.7559275428736825,
"grad_norm": 0.15494988858699799,
"learning_rate": 6.103473227206947e-06,
"loss": 0.01669602155685425,
"step": 36400
},
{
"epoch": 1.7607515859041463,
"grad_norm": 0.2613168954849243,
"learning_rate": 5.9828750602990845e-06,
"loss": 0.018969074487686158,
"step": 36500
},
{
"epoch": 1.76557562893461,
"grad_norm": 0.26860108971595764,
"learning_rate": 5.8622768933912205e-06,
"loss": 0.018860089778900146,
"step": 36600
},
{
"epoch": 1.770399671965074,
"grad_norm": 0.3160684406757355,
"learning_rate": 5.741678726483358e-06,
"loss": 0.017936546802520752,
"step": 36700
},
{
"epoch": 1.7752237149955379,
"grad_norm": 0.16905085742473602,
"learning_rate": 5.621080559575495e-06,
"loss": 0.018141812086105345,
"step": 36800
},
{
"epoch": 1.7800477580260017,
"grad_norm": 0.23327182233333588,
"learning_rate": 5.500482392667632e-06,
"loss": 0.01744183659553528,
"step": 36900
},
{
"epoch": 1.7848718010564655,
"grad_norm": 0.15902255475521088,
"learning_rate": 5.379884225759769e-06,
"loss": 0.017776939868927002,
"step": 37000
},
{
"epoch": 1.7896958440869293,
"grad_norm": 0.240287646651268,
"learning_rate": 5.259286058851906e-06,
"loss": 0.01649364709854126,
"step": 37100
},
{
"epoch": 1.794519887117393,
"grad_norm": 0.13150164484977722,
"learning_rate": 5.138687891944043e-06,
"loss": 0.01930005669593811,
"step": 37200
},
{
"epoch": 1.7993439301478569,
"grad_norm": 0.25998786091804504,
"learning_rate": 5.0180897250361795e-06,
"loss": 0.016344897747039795,
"step": 37300
},
{
"epoch": 1.8041679731783207,
"grad_norm": 0.12845446169376373,
"learning_rate": 4.897491558128316e-06,
"loss": 0.019464727640151978,
"step": 37400
},
{
"epoch": 1.8089920162087845,
"grad_norm": 0.21504537761211395,
"learning_rate": 4.776893391220453e-06,
"loss": 0.017011468410491944,
"step": 37500
},
{
"epoch": 1.8089920162087845,
"eval_accuracy": 0.9933099684867659,
"eval_f1": 0.8917599033971296,
"eval_loss": 0.030537979677319527,
"eval_precision": 0.8804042791129492,
"eval_recall": 0.9034122894383579,
"eval_runtime": 51.6005,
"eval_samples_per_second": 290.695,
"eval_steps_per_second": 8.081,
"step": 37500
},
{
"epoch": 1.8138160592392483,
"grad_norm": 0.11677803844213486,
"learning_rate": 4.656295224312591e-06,
"loss": 0.016575688123703004,
"step": 37600
},
{
"epoch": 1.8186401022697123,
"grad_norm": 0.280719131231308,
"learning_rate": 4.535697057404727e-06,
"loss": 0.01687091827392578,
"step": 37700
},
{
"epoch": 1.823464145300176,
"grad_norm": 0.2764016389846802,
"learning_rate": 4.415098890496865e-06,
"loss": 0.016562118530273437,
"step": 37800
},
{
"epoch": 1.8282881883306399,
"grad_norm": 0.632255494594574,
"learning_rate": 4.294500723589002e-06,
"loss": 0.01787501573562622,
"step": 37900
},
{
"epoch": 1.833112231361104,
"grad_norm": 0.09340863674879074,
"learning_rate": 4.1739025566811385e-06,
"loss": 0.01698790192604065,
"step": 38000
},
{
"epoch": 1.8379362743915677,
"grad_norm": 0.1588761806488037,
"learning_rate": 4.053304389773275e-06,
"loss": 0.01741109848022461,
"step": 38100
},
{
"epoch": 1.8427603174220315,
"grad_norm": 0.10772903263568878,
"learning_rate": 3.932706222865413e-06,
"loss": 0.016634883880615233,
"step": 38200
},
{
"epoch": 1.8475843604524953,
"grad_norm": 0.22126013040542603,
"learning_rate": 3.8121080559575496e-06,
"loss": 0.01804221987724304,
"step": 38300
},
{
"epoch": 1.852408403482959,
"grad_norm": 0.3740140497684479,
"learning_rate": 3.6915098890496864e-06,
"loss": 0.01612048625946045,
"step": 38400
},
{
"epoch": 1.857232446513423,
"grad_norm": 0.13352862000465393,
"learning_rate": 3.5709117221418237e-06,
"loss": 0.01714093804359436,
"step": 38500
},
{
"epoch": 1.8620564895438867,
"grad_norm": 0.27527081966400146,
"learning_rate": 3.45031355523396e-06,
"loss": 0.01763258218765259,
"step": 38600
},
{
"epoch": 1.8668805325743505,
"grad_norm": 0.28769898414611816,
"learning_rate": 3.3297153883260975e-06,
"loss": 0.02112499475479126,
"step": 38700
},
{
"epoch": 1.8717045756048143,
"grad_norm": 0.15282955765724182,
"learning_rate": 3.209117221418235e-06,
"loss": 0.01631925821304321,
"step": 38800
},
{
"epoch": 1.876528618635278,
"grad_norm": 0.12284864485263824,
"learning_rate": 3.0885190545103717e-06,
"loss": 0.01694957971572876,
"step": 38900
},
{
"epoch": 1.881352661665742,
"grad_norm": 0.1236443966627121,
"learning_rate": 2.9679208876025086e-06,
"loss": 0.017817366123199462,
"step": 39000
},
{
"epoch": 1.886176704696206,
"grad_norm": 0.11388445645570755,
"learning_rate": 2.8473227206946454e-06,
"loss": 0.01580065131187439,
"step": 39100
},
{
"epoch": 1.8910007477266697,
"grad_norm": 0.2738426625728607,
"learning_rate": 2.7267245537867828e-06,
"loss": 0.01646868109703064,
"step": 39200
},
{
"epoch": 1.8958247907571335,
"grad_norm": 0.6833071112632751,
"learning_rate": 2.6061263868789196e-06,
"loss": 0.017300838232040407,
"step": 39300
},
{
"epoch": 1.9006488337875975,
"grad_norm": 0.12390507757663727,
"learning_rate": 2.4855282199710565e-06,
"loss": 0.017689213752746583,
"step": 39400
},
{
"epoch": 1.9054728768180613,
"grad_norm": 0.5947756171226501,
"learning_rate": 2.364930053063194e-06,
"loss": 0.016457540988922117,
"step": 39500
},
{
"epoch": 1.9102969198485251,
"grad_norm": 0.11591579020023346,
"learning_rate": 2.2443318861553307e-06,
"loss": 0.01735694646835327,
"step": 39600
},
{
"epoch": 1.915120962878989,
"grad_norm": 0.14687402546405792,
"learning_rate": 2.1237337192474676e-06,
"loss": 0.01619683623313904,
"step": 39700
},
{
"epoch": 1.9199450059094527,
"grad_norm": 0.14690209925174713,
"learning_rate": 2.003135552339605e-06,
"loss": 0.016990303993225098,
"step": 39800
},
{
"epoch": 1.9247690489399165,
"grad_norm": 0.5243352055549622,
"learning_rate": 1.8825373854317415e-06,
"loss": 0.015626425743103026,
"step": 39900
},
{
"epoch": 1.9295930919703803,
"grad_norm": 0.9058519601821899,
"learning_rate": 1.7619392185238784e-06,
"loss": 0.017552192211151122,
"step": 40000
},
{
"epoch": 1.9295930919703803,
"eval_accuracy": 0.9934548129785762,
"eval_f1": 0.8960502077710661,
"eval_loss": 0.030458878725767136,
"eval_precision": 0.8865553342092993,
"eval_recall": 0.9057506603732732,
"eval_runtime": 52.2526,
"eval_samples_per_second": 287.067,
"eval_steps_per_second": 7.98,
"step": 40000
},
{
"epoch": 1.9344171350008441,
"grad_norm": 0.2622898817062378,
"learning_rate": 1.6413410516160157e-06,
"loss": 0.016480473279953004,
"step": 40100
},
{
"epoch": 1.939241178031308,
"grad_norm": 0.1672438383102417,
"learning_rate": 1.5207428847081526e-06,
"loss": 0.018291155099868773,
"step": 40200
},
{
"epoch": 1.9440652210617717,
"grad_norm": 0.39425408840179443,
"learning_rate": 1.4001447178002895e-06,
"loss": 0.01858603596687317,
"step": 40300
},
{
"epoch": 1.9488892640922357,
"grad_norm": 0.2491266131401062,
"learning_rate": 1.2795465508924266e-06,
"loss": 0.016655097007751463,
"step": 40400
},
{
"epoch": 1.9537133071226995,
"grad_norm": 0.2883985638618469,
"learning_rate": 1.1589483839845637e-06,
"loss": 0.018044712543487548,
"step": 40500
},
{
"epoch": 1.9585373501531633,
"grad_norm": 0.1521671712398529,
"learning_rate": 1.0383502170767006e-06,
"loss": 0.019449379444122315,
"step": 40600
},
{
"epoch": 1.9633613931836273,
"grad_norm": 0.1477108597755432,
"learning_rate": 9.177520501688375e-07,
"loss": 0.016478629112243653,
"step": 40700
},
{
"epoch": 1.9681854362140911,
"grad_norm": 0.18148507177829742,
"learning_rate": 7.971538832609744e-07,
"loss": 0.016618763208389283,
"step": 40800
},
{
"epoch": 1.973009479244555,
"grad_norm": 0.17074325680732727,
"learning_rate": 6.765557163531114e-07,
"loss": 0.01620419979095459,
"step": 40900
},
{
"epoch": 1.9778335222750187,
"grad_norm": 0.23663687705993652,
"learning_rate": 5.559575494452484e-07,
"loss": 0.01673411011695862,
"step": 41000
},
{
"epoch": 1.9826575653054825,
"grad_norm": 0.1981934756040573,
"learning_rate": 4.353593825373855e-07,
"loss": 0.015512742996215821,
"step": 41100
},
{
"epoch": 1.9874816083359463,
"grad_norm": 0.19588832557201385,
"learning_rate": 3.1476121562952246e-07,
"loss": 0.017852275371551513,
"step": 41200
},
{
"epoch": 1.9923056513664101,
"grad_norm": 0.11776227504014969,
"learning_rate": 1.9416304872165945e-07,
"loss": 0.017226353883743287,
"step": 41300
},
{
"epoch": 1.997129694396874,
"grad_norm": 0.20195287466049194,
"learning_rate": 7.356488181379644e-08,
"loss": 0.016260911226272583,
"step": 41400
},
{
"epoch": 2.0,
"step": 41460,
"total_flos": 1.525095272976519e+18,
"train_loss": 0.03464991324659827,
"train_runtime": 20225.5982,
"train_samples_per_second": 295.171,
"train_steps_per_second": 2.05
}
],
"logging_steps": 100,
"max_steps": 41460,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 2500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.525095272976519e+18,
"train_batch_size": 72,
"trial_name": null,
"trial_params": null
}