{ "best_global_step": 35000, "best_metric": 0.89652071512686, "best_model_checkpoint": "./lang-ner-xlmr/checkpoint-35000", "epoch": 2.0, "eval_steps": 2500, "global_step": 41460, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.004824043030463832, "grad_norm": 2.7279021739959717, "learning_rate": 4.988060781476121e-05, "loss": 3.1182174682617188, "step": 100 }, { "epoch": 0.009648086060927664, "grad_norm": 0.8386039137840271, "learning_rate": 4.9760009647853356e-05, "loss": 0.2861482620239258, "step": 200 }, { "epoch": 0.014472129091391495, "grad_norm": 1.9196710586547852, "learning_rate": 4.963941148094549e-05, "loss": 0.12070045471191407, "step": 300 }, { "epoch": 0.019296172121855328, "grad_norm": 6.516495227813721, "learning_rate": 4.951881331403763e-05, "loss": 0.09790064811706543, "step": 400 }, { "epoch": 0.024120215152319158, "grad_norm": 1.2043635845184326, "learning_rate": 4.939821514712977e-05, "loss": 0.09528629302978515, "step": 500 }, { "epoch": 0.02894425818278299, "grad_norm": 0.529084324836731, "learning_rate": 4.92776169802219e-05, "loss": 0.06706910610198974, "step": 600 }, { "epoch": 0.033768301213246824, "grad_norm": 1.108811616897583, "learning_rate": 4.9157018813314036e-05, "loss": 0.07001821517944336, "step": 700 }, { "epoch": 0.038592344243710656, "grad_norm": 1.234101414680481, "learning_rate": 4.903642064640618e-05, "loss": 0.07005959987640381, "step": 800 }, { "epoch": 0.04341638727417448, "grad_norm": 0.6574804782867432, "learning_rate": 4.8915822479498315e-05, "loss": 0.06500310897827148, "step": 900 }, { "epoch": 0.048240430304638315, "grad_norm": 0.6550615429878235, "learning_rate": 4.879522431259045e-05, "loss": 0.05578082084655762, "step": 1000 }, { "epoch": 0.05306447333510215, "grad_norm": 0.9701142907142639, "learning_rate": 4.867462614568259e-05, "loss": 0.05476199150085449, "step": 1100 }, { "epoch": 0.05788851636556598, "grad_norm": 0.3067728579044342, "learning_rate": 4.855402797877472e-05, "loss": 0.04951910972595215, "step": 1200 }, { "epoch": 0.06271255939602981, "grad_norm": 0.4835965037345886, "learning_rate": 4.843342981186686e-05, "loss": 0.05270035743713379, "step": 1300 }, { "epoch": 0.06753660242649365, "grad_norm": 0.9019619822502136, "learning_rate": 4.8312831644959e-05, "loss": 0.05690920352935791, "step": 1400 }, { "epoch": 0.07236064545695747, "grad_norm": 1.7734606266021729, "learning_rate": 4.819223347805114e-05, "loss": 0.05044642925262451, "step": 1500 }, { "epoch": 0.07718468848742131, "grad_norm": 1.1853278875350952, "learning_rate": 4.8071635311143274e-05, "loss": 0.056004085540771485, "step": 1600 }, { "epoch": 0.08200873151788514, "grad_norm": 0.5660464763641357, "learning_rate": 4.795103714423541e-05, "loss": 0.0493979024887085, "step": 1700 }, { "epoch": 0.08683277454834896, "grad_norm": 1.2197043895721436, "learning_rate": 4.7830438977327546e-05, "loss": 0.04784996509552002, "step": 1800 }, { "epoch": 0.0916568175788128, "grad_norm": 1.1459959745407104, "learning_rate": 4.770984081041968e-05, "loss": 0.049839210510253903, "step": 1900 }, { "epoch": 0.09648086060927663, "grad_norm": 0.3328426778316498, "learning_rate": 4.7589242643511825e-05, "loss": 0.04329806327819824, "step": 2000 }, { "epoch": 0.10130490363974047, "grad_norm": 1.718967080116272, "learning_rate": 4.746864447660396e-05, "loss": 0.047143783569335934, "step": 2100 }, { "epoch": 0.1061289466702043, "grad_norm": 0.7338983416557312, "learning_rate": 4.734804630969609e-05, "loss": 0.04436909198760986, "step": 2200 }, { "epoch": 0.11095298970066814, "grad_norm": 0.5654782652854919, "learning_rate": 4.722744814278823e-05, "loss": 0.04844902515411377, "step": 2300 }, { "epoch": 0.11577703273113196, "grad_norm": 0.4302056133747101, "learning_rate": 4.710684997588037e-05, "loss": 0.04093062400817871, "step": 2400 }, { "epoch": 0.12060107576159579, "grad_norm": 0.554361879825592, "learning_rate": 4.6986251808972505e-05, "loss": 0.040434646606445315, "step": 2500 }, { "epoch": 0.12060107576159579, "eval_accuracy": 0.9867528880759852, "eval_f1": 0.8265977004331162, "eval_loss": 0.06485302746295929, "eval_precision": 0.7943624857764557, "eval_recall": 0.8615597800199195, "eval_runtime": 67.6547, "eval_samples_per_second": 221.714, "eval_steps_per_second": 6.164, "step": 2500 }, { "epoch": 0.12542511879205961, "grad_norm": 0.8634827136993408, "learning_rate": 4.686565364206465e-05, "loss": 0.04507491588592529, "step": 2600 }, { "epoch": 0.13024916182252347, "grad_norm": 0.6373780965805054, "learning_rate": 4.674505547515678e-05, "loss": 0.04472970962524414, "step": 2700 }, { "epoch": 0.1350732048529873, "grad_norm": 0.35323283076286316, "learning_rate": 4.662445730824891e-05, "loss": 0.03842374086380005, "step": 2800 }, { "epoch": 0.13989724788345112, "grad_norm": 0.8187289834022522, "learning_rate": 4.6503859141341056e-05, "loss": 0.04050546646118164, "step": 2900 }, { "epoch": 0.14472129091391495, "grad_norm": 0.2180730253458023, "learning_rate": 4.638326097443319e-05, "loss": 0.04304762363433838, "step": 3000 }, { "epoch": 0.14954533394437877, "grad_norm": 0.6171498894691467, "learning_rate": 4.626266280752533e-05, "loss": 0.03792398929595947, "step": 3100 }, { "epoch": 0.15436937697484263, "grad_norm": 1.4763296842575073, "learning_rate": 4.6142064640617464e-05, "loss": 0.04089127063751221, "step": 3200 }, { "epoch": 0.15919342000530645, "grad_norm": 0.36483830213546753, "learning_rate": 4.60214664737096e-05, "loss": 0.04075708866119385, "step": 3300 }, { "epoch": 0.16401746303577028, "grad_norm": 0.32734522223472595, "learning_rate": 4.5900868306801736e-05, "loss": 0.03913374423980713, "step": 3400 }, { "epoch": 0.1688415060662341, "grad_norm": 0.27289167046546936, "learning_rate": 4.578027013989388e-05, "loss": 0.039629595279693605, "step": 3500 }, { "epoch": 0.17366554909669793, "grad_norm": 1.4992765188217163, "learning_rate": 4.5659671972986015e-05, "loss": 0.03849426031112671, "step": 3600 }, { "epoch": 0.17848959212716178, "grad_norm": 0.7519832849502563, "learning_rate": 4.553907380607815e-05, "loss": 0.03754171133041382, "step": 3700 }, { "epoch": 0.1833136351576256, "grad_norm": 1.4542765617370605, "learning_rate": 4.541847563917029e-05, "loss": 0.038514294624328614, "step": 3800 }, { "epoch": 0.18813767818808944, "grad_norm": 1.8106330633163452, "learning_rate": 4.529787747226242e-05, "loss": 0.03961650609970093, "step": 3900 }, { "epoch": 0.19296172121855326, "grad_norm": 0.3401031196117401, "learning_rate": 4.517727930535456e-05, "loss": 0.0379714560508728, "step": 4000 }, { "epoch": 0.1977857642490171, "grad_norm": 3.1147701740264893, "learning_rate": 4.50566811384467e-05, "loss": 0.03555563688278198, "step": 4100 }, { "epoch": 0.20260980727948094, "grad_norm": 0.3068256676197052, "learning_rate": 4.493608297153884e-05, "loss": 0.040891532897949216, "step": 4200 }, { "epoch": 0.20743385030994477, "grad_norm": 0.22388258576393127, "learning_rate": 4.481548480463097e-05, "loss": 0.042806510925292966, "step": 4300 }, { "epoch": 0.2122578933404086, "grad_norm": 1.0851870775222778, "learning_rate": 4.469488663772311e-05, "loss": 0.03217351198196411, "step": 4400 }, { "epoch": 0.21708193637087242, "grad_norm": 0.14333230257034302, "learning_rate": 4.4574288470815246e-05, "loss": 0.036145191192626956, "step": 4500 }, { "epoch": 0.22190597940133627, "grad_norm": 0.5196163654327393, "learning_rate": 4.445369030390738e-05, "loss": 0.04708011627197266, "step": 4600 }, { "epoch": 0.2267300224318001, "grad_norm": 0.18328827619552612, "learning_rate": 4.4333092136999525e-05, "loss": 0.040124249458312986, "step": 4700 }, { "epoch": 0.23155406546226392, "grad_norm": 0.31492918729782104, "learning_rate": 4.4212493970091654e-05, "loss": 0.041497902870178224, "step": 4800 }, { "epoch": 0.23637810849272775, "grad_norm": 0.4818204939365387, "learning_rate": 4.409189580318379e-05, "loss": 0.04126156330108643, "step": 4900 }, { "epoch": 0.24120215152319158, "grad_norm": 0.20825903117656708, "learning_rate": 4.397129763627593e-05, "loss": 0.03939923524856567, "step": 5000 }, { "epoch": 0.24120215152319158, "eval_accuracy": 0.9893252582883119, "eval_f1": 0.84304320903433, "eval_loss": 0.05377783998847008, "eval_precision": 0.8180670129341073, "eval_recall": 0.8695925172130082, "eval_runtime": 51.5792, "eval_samples_per_second": 290.815, "eval_steps_per_second": 8.085, "step": 5000 }, { "epoch": 0.24602619455365543, "grad_norm": 0.20790189504623413, "learning_rate": 4.385069946936807e-05, "loss": 0.03445266008377075, "step": 5100 }, { "epoch": 0.25085023758411923, "grad_norm": 0.2234453707933426, "learning_rate": 4.3730101302460205e-05, "loss": 0.03056433916091919, "step": 5200 }, { "epoch": 0.2556742806145831, "grad_norm": 0.5091524124145508, "learning_rate": 4.360950313555234e-05, "loss": 0.03808696031570435, "step": 5300 }, { "epoch": 0.26049832364504694, "grad_norm": 0.7598561644554138, "learning_rate": 4.348890496864448e-05, "loss": 0.03501533508300781, "step": 5400 }, { "epoch": 0.26532236667551073, "grad_norm": 0.2233390510082245, "learning_rate": 4.336830680173661e-05, "loss": 0.03836148738861084, "step": 5500 }, { "epoch": 0.2701464097059746, "grad_norm": 0.4892669916152954, "learning_rate": 4.3247708634828756e-05, "loss": 0.03670140504837036, "step": 5600 }, { "epoch": 0.2749704527364384, "grad_norm": 0.3863944411277771, "learning_rate": 4.312711046792089e-05, "loss": 0.03313957452774048, "step": 5700 }, { "epoch": 0.27979449576690224, "grad_norm": 0.457960844039917, "learning_rate": 4.300651230101303e-05, "loss": 0.03517037630081177, "step": 5800 }, { "epoch": 0.2846185387973661, "grad_norm": 0.3622528314590454, "learning_rate": 4.2885914134105164e-05, "loss": 0.0420029878616333, "step": 5900 }, { "epoch": 0.2894425818278299, "grad_norm": 0.9826716780662537, "learning_rate": 4.27653159671973e-05, "loss": 0.03928417205810547, "step": 6000 }, { "epoch": 0.29426662485829375, "grad_norm": 0.3126944303512573, "learning_rate": 4.2644717800289436e-05, "loss": 0.03383539915084839, "step": 6100 }, { "epoch": 0.29909066788875754, "grad_norm": 1.1283291578292847, "learning_rate": 4.252411963338158e-05, "loss": 0.034748728275299075, "step": 6200 }, { "epoch": 0.3039147109192214, "grad_norm": 0.2550179958343506, "learning_rate": 4.2403521466473715e-05, "loss": 0.03332434177398682, "step": 6300 }, { "epoch": 0.30873875394968525, "grad_norm": 0.6041121482849121, "learning_rate": 4.2282923299565844e-05, "loss": 0.03864547491073608, "step": 6400 }, { "epoch": 0.31356279698014905, "grad_norm": 0.3217807412147522, "learning_rate": 4.216232513265799e-05, "loss": 0.03896953821182251, "step": 6500 }, { "epoch": 0.3183868400106129, "grad_norm": 0.22055508196353912, "learning_rate": 4.204172696575012e-05, "loss": 0.03473591566085815, "step": 6600 }, { "epoch": 0.3232108830410767, "grad_norm": 0.9059926271438599, "learning_rate": 4.192112879884226e-05, "loss": 0.0373721718788147, "step": 6700 }, { "epoch": 0.32803492607154056, "grad_norm": 0.2131674885749817, "learning_rate": 4.1800530631934395e-05, "loss": 0.031450369358062745, "step": 6800 }, { "epoch": 0.3328589691020044, "grad_norm": 0.1497948318719864, "learning_rate": 4.167993246502653e-05, "loss": 0.03357296228408813, "step": 6900 }, { "epoch": 0.3376830121324682, "grad_norm": 0.3575536012649536, "learning_rate": 4.155933429811867e-05, "loss": 0.03312770128250122, "step": 7000 }, { "epoch": 0.34250705516293206, "grad_norm": 0.27300477027893066, "learning_rate": 4.143873613121081e-05, "loss": 0.036876497268676756, "step": 7100 }, { "epoch": 0.34733109819339586, "grad_norm": 0.36355310678482056, "learning_rate": 4.1318137964302946e-05, "loss": 0.034904708862304686, "step": 7200 }, { "epoch": 0.3521551412238597, "grad_norm": 0.444167822599411, "learning_rate": 4.119753979739508e-05, "loss": 0.030987234115600587, "step": 7300 }, { "epoch": 0.35697918425432357, "grad_norm": 0.22657343745231628, "learning_rate": 4.107694163048722e-05, "loss": 0.03171445846557617, "step": 7400 }, { "epoch": 0.36180322728478737, "grad_norm": 0.3570277988910675, "learning_rate": 4.0956343463579354e-05, "loss": 0.03453096866607666, "step": 7500 }, { "epoch": 0.36180322728478737, "eval_accuracy": 0.9905638698370478, "eval_f1": 0.8562651098466053, "eval_loss": 0.045637115836143494, "eval_precision": 0.8355275356944187, "eval_recall": 0.878058286060711, "eval_runtime": 52.1242, "eval_samples_per_second": 287.774, "eval_steps_per_second": 8.0, "step": 7500 }, { "epoch": 0.3666272703152512, "grad_norm": 0.4932907819747925, "learning_rate": 4.083574529667149e-05, "loss": 0.030991692543029786, "step": 7600 }, { "epoch": 0.371451313345715, "grad_norm": 0.6475630402565002, "learning_rate": 4.071514712976363e-05, "loss": 0.03562487840652466, "step": 7700 }, { "epoch": 0.37627535637617887, "grad_norm": 0.2954416871070862, "learning_rate": 4.059454896285577e-05, "loss": 0.03147151708602905, "step": 7800 }, { "epoch": 0.3810993994066427, "grad_norm": 0.6999800205230713, "learning_rate": 4.0473950795947905e-05, "loss": 0.03395595073699951, "step": 7900 }, { "epoch": 0.3859234424371065, "grad_norm": 1.9642822742462158, "learning_rate": 4.035335262904004e-05, "loss": 0.034128406047821046, "step": 8000 }, { "epoch": 0.3907474854675704, "grad_norm": 0.8058770298957825, "learning_rate": 4.023275446213218e-05, "loss": 0.02912992238998413, "step": 8100 }, { "epoch": 0.3955715284980342, "grad_norm": 0.5513653755187988, "learning_rate": 4.011215629522431e-05, "loss": 0.033489227294921875, "step": 8200 }, { "epoch": 0.40039557152849803, "grad_norm": 0.5218818783760071, "learning_rate": 3.9991558128316456e-05, "loss": 0.03587050437927246, "step": 8300 }, { "epoch": 0.4052196145589619, "grad_norm": 0.231138676404953, "learning_rate": 3.987095996140859e-05, "loss": 0.02900606632232666, "step": 8400 }, { "epoch": 0.4100436575894257, "grad_norm": 0.941376268863678, "learning_rate": 3.975036179450072e-05, "loss": 0.032960660457611084, "step": 8500 }, { "epoch": 0.41486770061988953, "grad_norm": 0.3743444085121155, "learning_rate": 3.9629763627592864e-05, "loss": 0.034282689094543455, "step": 8600 }, { "epoch": 0.4196917436503534, "grad_norm": 0.20511318743228912, "learning_rate": 3.9509165460685e-05, "loss": 0.03301868677139282, "step": 8700 }, { "epoch": 0.4245157866808172, "grad_norm": 0.5028975605964661, "learning_rate": 3.9388567293777136e-05, "loss": 0.030978357791900633, "step": 8800 }, { "epoch": 0.42933982971128104, "grad_norm": 0.5793830752372742, "learning_rate": 3.926796912686927e-05, "loss": 0.03107161045074463, "step": 8900 }, { "epoch": 0.43416387274174484, "grad_norm": 0.5201826095581055, "learning_rate": 3.914737095996141e-05, "loss": 0.03163294792175293, "step": 9000 }, { "epoch": 0.4389879157722087, "grad_norm": 0.40996044874191284, "learning_rate": 3.9026772793053544e-05, "loss": 0.03236435651779175, "step": 9100 }, { "epoch": 0.44381195880267255, "grad_norm": 0.32939156889915466, "learning_rate": 3.890617462614569e-05, "loss": 0.03099562406539917, "step": 9200 }, { "epoch": 0.44863600183313634, "grad_norm": 0.5146192312240601, "learning_rate": 3.878557645923782e-05, "loss": 0.032382268905639645, "step": 9300 }, { "epoch": 0.4534600448636002, "grad_norm": 0.6972792148590088, "learning_rate": 3.866497829232996e-05, "loss": 0.0357794189453125, "step": 9400 }, { "epoch": 0.458284087894064, "grad_norm": 0.4266366958618164, "learning_rate": 3.8544380125422095e-05, "loss": 0.02773923635482788, "step": 9500 }, { "epoch": 0.46310813092452785, "grad_norm": 0.18275046348571777, "learning_rate": 3.842378195851423e-05, "loss": 0.029792981147766115, "step": 9600 }, { "epoch": 0.4679321739549917, "grad_norm": 0.19641897082328796, "learning_rate": 3.830318379160637e-05, "loss": 0.0328049373626709, "step": 9700 }, { "epoch": 0.4727562169854555, "grad_norm": 0.5207920670509338, "learning_rate": 3.818258562469851e-05, "loss": 0.028371200561523438, "step": 9800 }, { "epoch": 0.47758026001591936, "grad_norm": 1.656972050666809, "learning_rate": 3.8061987457790646e-05, "loss": 0.029215424060821532, "step": 9900 }, { "epoch": 0.48240430304638315, "grad_norm": 0.6870591044425964, "learning_rate": 3.794138929088278e-05, "loss": 0.02802006721496582, "step": 10000 }, { "epoch": 0.48240430304638315, "eval_accuracy": 0.989747898374386, "eval_f1": 0.8614309687905686, "eval_loss": 0.04934614896774292, "eval_precision": 0.8403657255822574, "eval_recall": 0.8835794396570389, "eval_runtime": 51.7386, "eval_samples_per_second": 289.919, "eval_steps_per_second": 8.06, "step": 10000 }, { "epoch": 0.487228346076847, "grad_norm": 0.1405647099018097, "learning_rate": 3.782079112397492e-05, "loss": 0.035042920112609864, "step": 10100 }, { "epoch": 0.49205238910731086, "grad_norm": 0.4331558346748352, "learning_rate": 3.7700192957067054e-05, "loss": 0.032286217212677, "step": 10200 }, { "epoch": 0.49687643213777466, "grad_norm": 0.3009164035320282, "learning_rate": 3.757959479015919e-05, "loss": 0.028717076778411864, "step": 10300 }, { "epoch": 0.5017004751682385, "grad_norm": 0.3064032196998596, "learning_rate": 3.745899662325133e-05, "loss": 0.029738368988037108, "step": 10400 }, { "epoch": 0.5065245181987024, "grad_norm": 0.34859976172447205, "learning_rate": 3.733839845634346e-05, "loss": 0.030203399658203126, "step": 10500 }, { "epoch": 0.5113485612291662, "grad_norm": 0.4682078957557678, "learning_rate": 3.72178002894356e-05, "loss": 0.033402538299560545, "step": 10600 }, { "epoch": 0.51617260425963, "grad_norm": 0.43761882185935974, "learning_rate": 3.709720212252774e-05, "loss": 0.029749608039855956, "step": 10700 }, { "epoch": 0.5209966472900939, "grad_norm": 1.6429039239883423, "learning_rate": 3.697660395561988e-05, "loss": 0.03141383647918701, "step": 10800 }, { "epoch": 0.5258206903205577, "grad_norm": 0.8676751852035522, "learning_rate": 3.685600578871201e-05, "loss": 0.028559036254882812, "step": 10900 }, { "epoch": 0.5306447333510215, "grad_norm": 0.31465840339660645, "learning_rate": 3.673540762180415e-05, "loss": 0.033083460330963134, "step": 11000 }, { "epoch": 0.5354687763814853, "grad_norm": 0.36446070671081543, "learning_rate": 3.6614809454896285e-05, "loss": 0.031009881496429442, "step": 11100 }, { "epoch": 0.5402928194119492, "grad_norm": 1.3212252855300903, "learning_rate": 3.649421128798842e-05, "loss": 0.029797291755676268, "step": 11200 }, { "epoch": 0.545116862442413, "grad_norm": 0.5250455141067505, "learning_rate": 3.6373613121080564e-05, "loss": 0.02991886615753174, "step": 11300 }, { "epoch": 0.5499409054728768, "grad_norm": 1.0622237920761108, "learning_rate": 3.62530149541727e-05, "loss": 0.03123067855834961, "step": 11400 }, { "epoch": 0.5547649485033407, "grad_norm": 0.3193683624267578, "learning_rate": 3.6132416787264836e-05, "loss": 0.026964287757873535, "step": 11500 }, { "epoch": 0.5595889915338045, "grad_norm": 0.20829251408576965, "learning_rate": 3.601181862035697e-05, "loss": 0.023649635314941405, "step": 11600 }, { "epoch": 0.5644130345642683, "grad_norm": 0.6939885020256042, "learning_rate": 3.589122045344911e-05, "loss": 0.03190106630325317, "step": 11700 }, { "epoch": 0.5692370775947322, "grad_norm": 0.28773602843284607, "learning_rate": 3.5770622286541244e-05, "loss": 0.030272600650787355, "step": 11800 }, { "epoch": 0.574061120625196, "grad_norm": 0.19230112433433533, "learning_rate": 3.565002411963339e-05, "loss": 0.02647350788116455, "step": 11900 }, { "epoch": 0.5788851636556598, "grad_norm": 0.27152901887893677, "learning_rate": 3.552942595272552e-05, "loss": 0.025423860549926756, "step": 12000 }, { "epoch": 0.5837092066861237, "grad_norm": 1.2988700866699219, "learning_rate": 3.540882778581766e-05, "loss": 0.03884052515029907, "step": 12100 }, { "epoch": 0.5885332497165875, "grad_norm": 0.35957372188568115, "learning_rate": 3.5288229618909795e-05, "loss": 0.030858025550842286, "step": 12200 }, { "epoch": 0.5933572927470513, "grad_norm": 1.2661397457122803, "learning_rate": 3.516763145200193e-05, "loss": 0.0320624303817749, "step": 12300 }, { "epoch": 0.5981813357775151, "grad_norm": 0.9611783623695374, "learning_rate": 3.504703328509407e-05, "loss": 0.02826552391052246, "step": 12400 }, { "epoch": 0.603005378807979, "grad_norm": 0.500732421875, "learning_rate": 3.492643511818621e-05, "loss": 0.02862701892852783, "step": 12500 }, { "epoch": 0.603005378807979, "eval_accuracy": 0.9889094144247274, "eval_f1": 0.8610719994918431, "eval_loss": 0.051548413932323456, "eval_precision": 0.8424585171835187, "eval_recall": 0.8805265664920106, "eval_runtime": 52.2114, "eval_samples_per_second": 287.293, "eval_steps_per_second": 7.987, "step": 12500 }, { "epoch": 0.6078294218384428, "grad_norm": 0.2507345378398895, "learning_rate": 3.480583695127834e-05, "loss": 0.030443539619445802, "step": 12600 }, { "epoch": 0.6126534648689066, "grad_norm": 1.4359475374221802, "learning_rate": 3.4685238784370475e-05, "loss": 0.027371883392333984, "step": 12700 }, { "epoch": 0.6174775078993705, "grad_norm": 0.9925899505615234, "learning_rate": 3.456464061746262e-05, "loss": 0.03037006616592407, "step": 12800 }, { "epoch": 0.6223015509298343, "grad_norm": 0.3496329188346863, "learning_rate": 3.4444042450554754e-05, "loss": 0.027849619388580323, "step": 12900 }, { "epoch": 0.6271255939602981, "grad_norm": 0.5233566761016846, "learning_rate": 3.432344428364689e-05, "loss": 0.02637479543685913, "step": 13000 }, { "epoch": 0.631949636990762, "grad_norm": 0.2668863832950592, "learning_rate": 3.4202846116739026e-05, "loss": 0.02920179605484009, "step": 13100 }, { "epoch": 0.6367736800212258, "grad_norm": 0.20490218698978424, "learning_rate": 3.408224794983116e-05, "loss": 0.02677285432815552, "step": 13200 }, { "epoch": 0.6415977230516896, "grad_norm": 0.3680262565612793, "learning_rate": 3.39616497829233e-05, "loss": 0.029742326736450195, "step": 13300 }, { "epoch": 0.6464217660821534, "grad_norm": 0.4216366112232208, "learning_rate": 3.384105161601544e-05, "loss": 0.027399771213531494, "step": 13400 }, { "epoch": 0.6512458091126173, "grad_norm": 0.13440310955047607, "learning_rate": 3.372045344910758e-05, "loss": 0.030674426555633544, "step": 13500 }, { "epoch": 0.6560698521430811, "grad_norm": 0.14363612234592438, "learning_rate": 3.359985528219971e-05, "loss": 0.030937159061431886, "step": 13600 }, { "epoch": 0.6608938951735449, "grad_norm": 0.7481242418289185, "learning_rate": 3.347925711529185e-05, "loss": 0.03099170923233032, "step": 13700 }, { "epoch": 0.6657179382040088, "grad_norm": 0.2472449243068695, "learning_rate": 3.3358658948383985e-05, "loss": 0.028668901920318603, "step": 13800 }, { "epoch": 0.6705419812344726, "grad_norm": 0.23963682353496552, "learning_rate": 3.323806078147612e-05, "loss": 0.026947088241577148, "step": 13900 }, { "epoch": 0.6753660242649364, "grad_norm": 0.5909916758537292, "learning_rate": 3.3117462614568264e-05, "loss": 0.032423651218414305, "step": 14000 }, { "epoch": 0.6801900672954003, "grad_norm": 0.9357315301895142, "learning_rate": 3.29968644476604e-05, "loss": 0.029326210021972655, "step": 14100 }, { "epoch": 0.6850141103258641, "grad_norm": 0.6866487264633179, "learning_rate": 3.2876266280752536e-05, "loss": 0.02546304702758789, "step": 14200 }, { "epoch": 0.6898381533563279, "grad_norm": 0.15798236429691315, "learning_rate": 3.275566811384467e-05, "loss": 0.022423455715179442, "step": 14300 }, { "epoch": 0.6946621963867917, "grad_norm": 0.4801422357559204, "learning_rate": 3.263506994693681e-05, "loss": 0.02844859838485718, "step": 14400 }, { "epoch": 0.6994862394172556, "grad_norm": 2.1221346855163574, "learning_rate": 3.2514471780028944e-05, "loss": 0.028370687961578368, "step": 14500 }, { "epoch": 0.7043102824477194, "grad_norm": 0.604657769203186, "learning_rate": 3.239387361312109e-05, "loss": 0.031170213222503663, "step": 14600 }, { "epoch": 0.7091343254781832, "grad_norm": 0.7991030812263489, "learning_rate": 3.2273275446213216e-05, "loss": 0.02627355098724365, "step": 14700 }, { "epoch": 0.7139583685086471, "grad_norm": 3.5704472064971924, "learning_rate": 3.215267727930535e-05, "loss": 0.025982840061187742, "step": 14800 }, { "epoch": 0.7187824115391109, "grad_norm": 0.49702438712120056, "learning_rate": 3.2032079112397495e-05, "loss": 0.029045536518096923, "step": 14900 }, { "epoch": 0.7236064545695747, "grad_norm": 0.6950443387031555, "learning_rate": 3.191148094548963e-05, "loss": 0.027498562335968018, "step": 15000 }, { "epoch": 0.7236064545695747, "eval_accuracy": 0.9904670236665706, "eval_f1": 0.8604773530897457, "eval_loss": 0.04225612059235573, "eval_precision": 0.837114541955035, "eval_recall": 0.8851816567791105, "eval_runtime": 51.3305, "eval_samples_per_second": 292.224, "eval_steps_per_second": 8.124, "step": 15000 }, { "epoch": 0.7284304976000386, "grad_norm": 0.28991585969924927, "learning_rate": 3.179088277858177e-05, "loss": 0.026163406372070312, "step": 15100 }, { "epoch": 0.7332545406305024, "grad_norm": 0.08772952854633331, "learning_rate": 3.16702846116739e-05, "loss": 0.02845370292663574, "step": 15200 }, { "epoch": 0.7380785836609662, "grad_norm": 1.0799998044967651, "learning_rate": 3.154968644476604e-05, "loss": 0.02897960424423218, "step": 15300 }, { "epoch": 0.74290262669143, "grad_norm": 0.2629171311855316, "learning_rate": 3.1429088277858175e-05, "loss": 0.025154874324798585, "step": 15400 }, { "epoch": 0.7477266697218939, "grad_norm": 0.9425322413444519, "learning_rate": 3.130849011095032e-05, "loss": 0.02195771932601929, "step": 15500 }, { "epoch": 0.7525507127523577, "grad_norm": 0.2703983187675476, "learning_rate": 3.1187891944042454e-05, "loss": 0.02711749792098999, "step": 15600 }, { "epoch": 0.7573747557828215, "grad_norm": 0.2081318199634552, "learning_rate": 3.106729377713459e-05, "loss": 0.0264898681640625, "step": 15700 }, { "epoch": 0.7621987988132854, "grad_norm": 0.41779956221580505, "learning_rate": 3.0946695610226726e-05, "loss": 0.027609102725982666, "step": 15800 }, { "epoch": 0.7670228418437492, "grad_norm": 0.19405648112297058, "learning_rate": 3.082609744331886e-05, "loss": 0.029054667949676514, "step": 15900 }, { "epoch": 0.771846884874213, "grad_norm": 0.3789653182029724, "learning_rate": 3.0705499276411e-05, "loss": 0.02560849666595459, "step": 16000 }, { "epoch": 0.776670927904677, "grad_norm": 0.18012675642967224, "learning_rate": 3.058490110950314e-05, "loss": 0.025810339450836182, "step": 16100 }, { "epoch": 0.7814949709351408, "grad_norm": 0.21590501070022583, "learning_rate": 3.0464302942595273e-05, "loss": 0.026955347061157226, "step": 16200 }, { "epoch": 0.7863190139656046, "grad_norm": 1.0594650506973267, "learning_rate": 3.034370477568741e-05, "loss": 0.02850575923919678, "step": 16300 }, { "epoch": 0.7911430569960684, "grad_norm": 0.512518048286438, "learning_rate": 3.022310660877955e-05, "loss": 0.02473912000656128, "step": 16400 }, { "epoch": 0.7959671000265323, "grad_norm": 0.4950084388256073, "learning_rate": 3.0102508441871685e-05, "loss": 0.029167954921722413, "step": 16500 }, { "epoch": 0.8007911430569961, "grad_norm": 0.2222454696893692, "learning_rate": 2.998191027496382e-05, "loss": 0.02336118459701538, "step": 16600 }, { "epoch": 0.8056151860874599, "grad_norm": 0.30645573139190674, "learning_rate": 2.986131210805596e-05, "loss": 0.026411423683166502, "step": 16700 }, { "epoch": 0.8104392291179238, "grad_norm": 0.13581427931785583, "learning_rate": 2.9740713941148096e-05, "loss": 0.027823078632354736, "step": 16800 }, { "epoch": 0.8152632721483876, "grad_norm": 0.37023600935935974, "learning_rate": 2.9620115774240232e-05, "loss": 0.025036261081695557, "step": 16900 }, { "epoch": 0.8200873151788514, "grad_norm": 0.18537591397762299, "learning_rate": 2.9499517607332372e-05, "loss": 0.025412650108337403, "step": 17000 }, { "epoch": 0.8249113582093153, "grad_norm": 0.3948329985141754, "learning_rate": 2.9378919440424508e-05, "loss": 0.03059415817260742, "step": 17100 }, { "epoch": 0.8297354012397791, "grad_norm": 1.1231082677841187, "learning_rate": 2.9258321273516644e-05, "loss": 0.027097015380859374, "step": 17200 }, { "epoch": 0.8345594442702429, "grad_norm": 0.1559356451034546, "learning_rate": 2.9137723106608783e-05, "loss": 0.025851171016693115, "step": 17300 }, { "epoch": 0.8393834873007068, "grad_norm": 0.4749973714351654, "learning_rate": 2.901712493970092e-05, "loss": 0.02509115219116211, "step": 17400 }, { "epoch": 0.8442075303311706, "grad_norm": 0.24519965052604675, "learning_rate": 2.8896526772793052e-05, "loss": 0.020944011211395264, "step": 17500 }, { "epoch": 0.8442075303311706, "eval_accuracy": 0.9910574454953576, "eval_f1": 0.8787765520040154, "eval_loss": 0.042865537106990814, "eval_precision": 0.8670628648500558, "eval_recall": 0.890811068289092, "eval_runtime": 51.479, "eval_samples_per_second": 291.381, "eval_steps_per_second": 8.1, "step": 17500 }, { "epoch": 0.8490315733616344, "grad_norm": 0.36921805143356323, "learning_rate": 2.8775928605885195e-05, "loss": 0.029057729244232177, "step": 17600 }, { "epoch": 0.8538556163920982, "grad_norm": 0.19858281314373016, "learning_rate": 2.865533043897733e-05, "loss": 0.029160046577453615, "step": 17700 }, { "epoch": 0.8586796594225621, "grad_norm": 0.2647104561328888, "learning_rate": 2.8534732272069463e-05, "loss": 0.024375016689300536, "step": 17800 }, { "epoch": 0.8635037024530259, "grad_norm": 0.24766811728477478, "learning_rate": 2.8414134105161606e-05, "loss": 0.028258707523345947, "step": 17900 }, { "epoch": 0.8683277454834897, "grad_norm": 0.14881408214569092, "learning_rate": 2.829353593825374e-05, "loss": 0.02677877902984619, "step": 18000 }, { "epoch": 0.8731517885139536, "grad_norm": 0.36174142360687256, "learning_rate": 2.8172937771345875e-05, "loss": 0.02804037570953369, "step": 18100 }, { "epoch": 0.8779758315444174, "grad_norm": 0.8773052096366882, "learning_rate": 2.8052339604438018e-05, "loss": 0.022985424995422363, "step": 18200 }, { "epoch": 0.8827998745748812, "grad_norm": 0.14455021917819977, "learning_rate": 2.793174143753015e-05, "loss": 0.02391258955001831, "step": 18300 }, { "epoch": 0.8876239176053451, "grad_norm": 0.19167733192443848, "learning_rate": 2.7811143270622286e-05, "loss": 0.02640686750411987, "step": 18400 }, { "epoch": 0.8924479606358089, "grad_norm": 0.3121378719806671, "learning_rate": 2.7690545103714426e-05, "loss": 0.02407193899154663, "step": 18500 }, { "epoch": 0.8972720036662727, "grad_norm": 0.4771701991558075, "learning_rate": 2.7569946936806562e-05, "loss": 0.02530348062515259, "step": 18600 }, { "epoch": 0.9020960466967365, "grad_norm": 0.31514617800712585, "learning_rate": 2.7449348769898698e-05, "loss": 0.025359327793121337, "step": 18700 }, { "epoch": 0.9069200897272004, "grad_norm": 0.2182740867137909, "learning_rate": 2.7328750602990837e-05, "loss": 0.025950465202331543, "step": 18800 }, { "epoch": 0.9117441327576642, "grad_norm": 0.3909512758255005, "learning_rate": 2.7208152436082973e-05, "loss": 0.024720582962036133, "step": 18900 }, { "epoch": 0.916568175788128, "grad_norm": 0.1596415936946869, "learning_rate": 2.708755426917511e-05, "loss": 0.025378565788269043, "step": 19000 }, { "epoch": 0.9213922188185919, "grad_norm": 0.1132221445441246, "learning_rate": 2.696695610226725e-05, "loss": 0.02627143621444702, "step": 19100 }, { "epoch": 0.9262162618490557, "grad_norm": 0.38963910937309265, "learning_rate": 2.6846357935359385e-05, "loss": 0.026437394618988037, "step": 19200 }, { "epoch": 0.9310403048795195, "grad_norm": 1.0219396352767944, "learning_rate": 2.672575976845152e-05, "loss": 0.02680544376373291, "step": 19300 }, { "epoch": 0.9358643479099834, "grad_norm": 0.1513880342245102, "learning_rate": 2.660516160154366e-05, "loss": 0.02793146848678589, "step": 19400 }, { "epoch": 0.9406883909404472, "grad_norm": 8.48257064819336, "learning_rate": 2.6484563434635796e-05, "loss": 0.026851544380187987, "step": 19500 }, { "epoch": 0.945512433970911, "grad_norm": 0.12812338769435883, "learning_rate": 2.636396526772793e-05, "loss": 0.026399703025817872, "step": 19600 }, { "epoch": 0.9503364770013748, "grad_norm": 1.4106616973876953, "learning_rate": 2.6243367100820072e-05, "loss": 0.026023907661437987, "step": 19700 }, { "epoch": 0.9551605200318387, "grad_norm": 0.12191484868526459, "learning_rate": 2.6122768933912208e-05, "loss": 0.025158686637878416, "step": 19800 }, { "epoch": 0.9599845630623025, "grad_norm": 0.3431759774684906, "learning_rate": 2.600217076700434e-05, "loss": 0.023687126636505126, "step": 19900 }, { "epoch": 0.9648086060927663, "grad_norm": 0.2409236878156662, "learning_rate": 2.5881572600096483e-05, "loss": 0.02651404857635498, "step": 20000 }, { "epoch": 0.9648086060927663, "eval_accuracy": 0.991921840043258, "eval_f1": 0.8712219366623479, "eval_loss": 0.037873830646276474, "eval_precision": 0.8549691512422878, "eval_recall": 0.8881046204477547, "eval_runtime": 51.9311, "eval_samples_per_second": 288.844, "eval_steps_per_second": 8.03, "step": 20000 }, { "epoch": 0.9696326491232302, "grad_norm": 0.2915472686290741, "learning_rate": 2.5760974433188616e-05, "loss": 0.02347031593322754, "step": 20100 }, { "epoch": 0.974456692153694, "grad_norm": 0.4358366131782532, "learning_rate": 2.5640376266280752e-05, "loss": 0.025518434047698976, "step": 20200 }, { "epoch": 0.9792807351841578, "grad_norm": 0.33405473828315735, "learning_rate": 2.551977809937289e-05, "loss": 0.027425525188446046, "step": 20300 }, { "epoch": 0.9841047782146217, "grad_norm": 0.33355358242988586, "learning_rate": 2.5399179932465027e-05, "loss": 0.025573320388793945, "step": 20400 }, { "epoch": 0.9889288212450855, "grad_norm": 0.7017316818237305, "learning_rate": 2.5278581765557163e-05, "loss": 0.027230489253997802, "step": 20500 }, { "epoch": 0.9937528642755493, "grad_norm": 0.26649072766304016, "learning_rate": 2.5157983598649303e-05, "loss": 0.024173839092254637, "step": 20600 }, { "epoch": 0.9985769073060131, "grad_norm": 1.540326714515686, "learning_rate": 2.503738543174144e-05, "loss": 0.02483781099319458, "step": 20700 }, { "epoch": 1.0033768301213246, "grad_norm": 0.19430163502693176, "learning_rate": 2.4916787264833578e-05, "loss": 0.02256415843963623, "step": 20800 }, { "epoch": 1.0082008731517884, "grad_norm": 0.12203595787286758, "learning_rate": 2.479618909792571e-05, "loss": 0.019598615169525147, "step": 20900 }, { "epoch": 1.0130249161822524, "grad_norm": 0.26934438943862915, "learning_rate": 2.467559093101785e-05, "loss": 0.021361682415008545, "step": 21000 }, { "epoch": 1.0178489592127162, "grad_norm": 0.13813284039497375, "learning_rate": 2.455499276410999e-05, "loss": 0.021079394817352295, "step": 21100 }, { "epoch": 1.02267300224318, "grad_norm": 0.1430957019329071, "learning_rate": 2.4434394597202122e-05, "loss": 0.020538933277130127, "step": 21200 }, { "epoch": 1.0274970452736438, "grad_norm": 0.7105738520622253, "learning_rate": 2.4313796430294262e-05, "loss": 0.021446900367736818, "step": 21300 }, { "epoch": 1.0323210883041076, "grad_norm": 0.3810221552848816, "learning_rate": 2.4193198263386398e-05, "loss": 0.020385611057281493, "step": 21400 }, { "epoch": 1.0371451313345714, "grad_norm": 0.14890126883983612, "learning_rate": 2.4072600096478534e-05, "loss": 0.021193060874938965, "step": 21500 }, { "epoch": 1.0419691743650352, "grad_norm": 0.465364009141922, "learning_rate": 2.3952001929570673e-05, "loss": 0.02409552574157715, "step": 21600 }, { "epoch": 1.0467932173954992, "grad_norm": 0.18177862465381622, "learning_rate": 2.383140376266281e-05, "loss": 0.024446609020233153, "step": 21700 }, { "epoch": 1.051617260425963, "grad_norm": 0.38837435841560364, "learning_rate": 2.3710805595754945e-05, "loss": 0.02129380464553833, "step": 21800 }, { "epoch": 1.0564413034564268, "grad_norm": 0.13987022638320923, "learning_rate": 2.3590207428847085e-05, "loss": 0.022437899112701415, "step": 21900 }, { "epoch": 1.0612653464868906, "grad_norm": 0.21862603724002838, "learning_rate": 2.3469609261939217e-05, "loss": 0.02214601993560791, "step": 22000 }, { "epoch": 1.0660893895173544, "grad_norm": 0.18493451178073883, "learning_rate": 2.3349011095031357e-05, "loss": 0.022123863697052003, "step": 22100 }, { "epoch": 1.0709134325478182, "grad_norm": 0.44324392080307007, "learning_rate": 2.3228412928123493e-05, "loss": 0.01999701380729675, "step": 22200 }, { "epoch": 1.0757374755782823, "grad_norm": 0.13552401959896088, "learning_rate": 2.310781476121563e-05, "loss": 0.018710813522338866, "step": 22300 }, { "epoch": 1.080561518608746, "grad_norm": 0.5217646360397339, "learning_rate": 2.298721659430777e-05, "loss": 0.022998554706573485, "step": 22400 }, { "epoch": 1.0853855616392099, "grad_norm": 0.13017535209655762, "learning_rate": 2.2866618427399904e-05, "loss": 0.022265849113464357, "step": 22500 }, { "epoch": 1.0853855616392099, "eval_accuracy": 0.9918296662757423, "eval_f1": 0.881381541146177, "eval_loss": 0.03710692375898361, "eval_precision": 0.8665369486986358, "eval_recall": 0.8967436019573031, "eval_runtime": 51.6329, "eval_samples_per_second": 290.512, "eval_steps_per_second": 8.076, "step": 22500 }, { "epoch": 1.0902096046696736, "grad_norm": 0.26805901527404785, "learning_rate": 2.274602026049204e-05, "loss": 0.02201695680618286, "step": 22600 }, { "epoch": 1.0950336477001374, "grad_norm": 0.20556294918060303, "learning_rate": 2.262542209358418e-05, "loss": 0.018640589714050294, "step": 22700 }, { "epoch": 1.0998576907306012, "grad_norm": 0.16025076806545258, "learning_rate": 2.2504823926676316e-05, "loss": 0.02018498182296753, "step": 22800 }, { "epoch": 1.104681733761065, "grad_norm": 0.22326083481311798, "learning_rate": 2.2384225759768452e-05, "loss": 0.020831646919250487, "step": 22900 }, { "epoch": 1.109505776791529, "grad_norm": 0.18669798970222473, "learning_rate": 2.2263627592860588e-05, "loss": 0.020945420265197755, "step": 23000 }, { "epoch": 1.1143298198219929, "grad_norm": 0.22091829776763916, "learning_rate": 2.2143029425952727e-05, "loss": 0.019859465360641478, "step": 23100 }, { "epoch": 1.1191538628524567, "grad_norm": 0.28057217597961426, "learning_rate": 2.2022431259044863e-05, "loss": 0.022815022468566894, "step": 23200 }, { "epoch": 1.1239779058829205, "grad_norm": 0.2595389187335968, "learning_rate": 2.1901833092137e-05, "loss": 0.021522111892700195, "step": 23300 }, { "epoch": 1.1288019489133843, "grad_norm": 0.5332016348838806, "learning_rate": 2.178123492522914e-05, "loss": 0.019616042375564576, "step": 23400 }, { "epoch": 1.133625991943848, "grad_norm": 0.10604680329561234, "learning_rate": 2.1660636758321275e-05, "loss": 0.022921762466430663, "step": 23500 }, { "epoch": 1.138450034974312, "grad_norm": 0.3797323703765869, "learning_rate": 2.154003859141341e-05, "loss": 0.021188838481903075, "step": 23600 }, { "epoch": 1.1432740780047759, "grad_norm": 0.3557288348674774, "learning_rate": 2.141944042450555e-05, "loss": 0.020320808887481688, "step": 23700 }, { "epoch": 1.1480981210352397, "grad_norm": 0.5364207029342651, "learning_rate": 2.1298842257597683e-05, "loss": 0.02103010892868042, "step": 23800 }, { "epoch": 1.1529221640657035, "grad_norm": 0.20504723489284515, "learning_rate": 2.1178244090689822e-05, "loss": 0.024899210929870606, "step": 23900 }, { "epoch": 1.1577462070961673, "grad_norm": 0.3030504882335663, "learning_rate": 2.1057645923781962e-05, "loss": 0.018901402950286864, "step": 24000 }, { "epoch": 1.162570250126631, "grad_norm": 0.2158869206905365, "learning_rate": 2.0937047756874094e-05, "loss": 0.018166555166244505, "step": 24100 }, { "epoch": 1.1673942931570949, "grad_norm": 0.2794812321662903, "learning_rate": 2.0816449589966234e-05, "loss": 0.0199416983127594, "step": 24200 }, { "epoch": 1.1722183361875589, "grad_norm": 0.10596510767936707, "learning_rate": 2.069585142305837e-05, "loss": 0.019620640277862547, "step": 24300 }, { "epoch": 1.1770423792180227, "grad_norm": 1.3163063526153564, "learning_rate": 2.0575253256150506e-05, "loss": 0.021227221488952636, "step": 24400 }, { "epoch": 1.1818664222484865, "grad_norm": 0.29747480154037476, "learning_rate": 2.0454655089242645e-05, "loss": 0.02037898302078247, "step": 24500 }, { "epoch": 1.1866904652789503, "grad_norm": 0.722373902797699, "learning_rate": 2.033405692233478e-05, "loss": 0.020667204856872557, "step": 24600 }, { "epoch": 1.191514508309414, "grad_norm": 0.12926365435123444, "learning_rate": 2.0213458755426917e-05, "loss": 0.018228678703308104, "step": 24700 }, { "epoch": 1.1963385513398779, "grad_norm": 0.33814650774002075, "learning_rate": 2.0092860588519057e-05, "loss": 0.022069990634918213, "step": 24800 }, { "epoch": 1.201162594370342, "grad_norm": 0.1276799589395523, "learning_rate": 1.9972262421611193e-05, "loss": 0.022927966117858887, "step": 24900 }, { "epoch": 1.2059866374008057, "grad_norm": 0.18511514365673065, "learning_rate": 1.985166425470333e-05, "loss": 0.02195762872695923, "step": 25000 }, { "epoch": 1.2059866374008057, "eval_accuracy": 0.9925814983886582, "eval_f1": 0.8818330116962181, "eval_loss": 0.034407418221235275, "eval_precision": 0.8686902636277702, "eval_recall": 0.8953795522452691, "eval_runtime": 51.6069, "eval_samples_per_second": 290.659, "eval_steps_per_second": 8.08, "step": 25000 }, { "epoch": 1.2108106804312695, "grad_norm": 0.28672105073928833, "learning_rate": 1.9731066087795465e-05, "loss": 0.022950747013092042, "step": 25100 }, { "epoch": 1.2156347234617333, "grad_norm": 0.15472128987312317, "learning_rate": 1.9610467920887604e-05, "loss": 0.01865388870239258, "step": 25200 }, { "epoch": 1.220458766492197, "grad_norm": 0.26068541407585144, "learning_rate": 1.948986975397974e-05, "loss": 0.021750383377075196, "step": 25300 }, { "epoch": 1.2252828095226609, "grad_norm": 1.417925238609314, "learning_rate": 1.9369271587071876e-05, "loss": 0.021318423748016357, "step": 25400 }, { "epoch": 1.2301068525531247, "grad_norm": 0.7924548387527466, "learning_rate": 1.9248673420164016e-05, "loss": 0.01893375873565674, "step": 25500 }, { "epoch": 1.2349308955835885, "grad_norm": 0.17900590598583221, "learning_rate": 1.9128075253256152e-05, "loss": 0.01870368480682373, "step": 25600 }, { "epoch": 1.2397549386140525, "grad_norm": 0.1943436861038208, "learning_rate": 1.9007477086348288e-05, "loss": 0.021407904624938964, "step": 25700 }, { "epoch": 1.2445789816445163, "grad_norm": 0.1924910992383957, "learning_rate": 1.8886878919440427e-05, "loss": 0.02078892707824707, "step": 25800 }, { "epoch": 1.24940302467498, "grad_norm": 0.1958584040403366, "learning_rate": 1.876628075253256e-05, "loss": 0.018969409465789795, "step": 25900 }, { "epoch": 1.254227067705444, "grad_norm": 0.0961497351527214, "learning_rate": 1.86456825856247e-05, "loss": 0.024467270374298095, "step": 26000 }, { "epoch": 1.2590511107359077, "grad_norm": 0.43624669313430786, "learning_rate": 1.8525084418716835e-05, "loss": 0.022932977676391603, "step": 26100 }, { "epoch": 1.2638751537663717, "grad_norm": 0.17412593960762024, "learning_rate": 1.840448625180897e-05, "loss": 0.017692303657531737, "step": 26200 }, { "epoch": 1.2686991967968355, "grad_norm": 0.4037439227104187, "learning_rate": 1.828388808490111e-05, "loss": 0.02168938159942627, "step": 26300 }, { "epoch": 1.2735232398272993, "grad_norm": 0.20430967211723328, "learning_rate": 1.8163289917993247e-05, "loss": 0.018443295955657957, "step": 26400 }, { "epoch": 1.2783472828577631, "grad_norm": 0.2996050715446472, "learning_rate": 1.8042691751085383e-05, "loss": 0.019166781902313232, "step": 26500 }, { "epoch": 1.283171325888227, "grad_norm": 0.3298969864845276, "learning_rate": 1.7922093584177522e-05, "loss": 0.01918817639350891, "step": 26600 }, { "epoch": 1.2879953689186907, "grad_norm": 0.28155457973480225, "learning_rate": 1.780149541726966e-05, "loss": 0.021806249618530272, "step": 26700 }, { "epoch": 1.2928194119491545, "grad_norm": 0.2508911192417145, "learning_rate": 1.7680897250361794e-05, "loss": 0.020128331184387206, "step": 26800 }, { "epoch": 1.2976434549796183, "grad_norm": 0.2319284975528717, "learning_rate": 1.7560299083453934e-05, "loss": 0.018995124101638793, "step": 26900 }, { "epoch": 1.3024674980100823, "grad_norm": 0.12885890901088715, "learning_rate": 1.743970091654607e-05, "loss": 0.019624507427215575, "step": 27000 }, { "epoch": 1.3072915410405461, "grad_norm": 0.1364358514547348, "learning_rate": 1.7319102749638206e-05, "loss": 0.01931032657623291, "step": 27100 }, { "epoch": 1.31211558407101, "grad_norm": 1.741729974746704, "learning_rate": 1.7198504582730342e-05, "loss": 0.020110676288604735, "step": 27200 }, { "epoch": 1.3169396271014737, "grad_norm": 0.5716229677200317, "learning_rate": 1.707790641582248e-05, "loss": 0.01891273021697998, "step": 27300 }, { "epoch": 1.3217636701319375, "grad_norm": 0.9453685879707336, "learning_rate": 1.6957308248914617e-05, "loss": 0.020238091945648195, "step": 27400 }, { "epoch": 1.3265877131624015, "grad_norm": 0.14117585122585297, "learning_rate": 1.6836710082006753e-05, "loss": 0.022481341361999512, "step": 27500 }, { "epoch": 1.3265877131624015, "eval_accuracy": 0.992788783174592, "eval_f1": 0.889221237047324, "eval_loss": 0.03323497995734215, "eval_precision": 0.877614709851552, "eval_recall": 0.9011388732516347, "eval_runtime": 52.1434, "eval_samples_per_second": 287.668, "eval_steps_per_second": 7.997, "step": 27500 }, { "epoch": 1.3314117561928653, "grad_norm": 0.25555455684661865, "learning_rate": 1.6716111915098893e-05, "loss": 0.018107813596725465, "step": 27600 }, { "epoch": 1.3362357992233291, "grad_norm": 0.20916156470775604, "learning_rate": 1.659551374819103e-05, "loss": 0.019892256259918212, "step": 27700 }, { "epoch": 1.341059842253793, "grad_norm": 0.15623128414154053, "learning_rate": 1.6474915581283165e-05, "loss": 0.017413014173507692, "step": 27800 }, { "epoch": 1.3458838852842567, "grad_norm": 0.15014760196208954, "learning_rate": 1.6354317414375304e-05, "loss": 0.020558416843414307, "step": 27900 }, { "epoch": 1.3507079283147205, "grad_norm": 0.4308200180530548, "learning_rate": 1.6233719247467437e-05, "loss": 0.017611211538314818, "step": 28000 }, { "epoch": 1.3555319713451843, "grad_norm": 0.15497736632823944, "learning_rate": 1.6113121080559576e-05, "loss": 0.017815752029418944, "step": 28100 }, { "epoch": 1.3603560143756481, "grad_norm": 0.4078068733215332, "learning_rate": 1.5992522913651712e-05, "loss": 0.01794821858406067, "step": 28200 }, { "epoch": 1.365180057406112, "grad_norm": 0.44584575295448303, "learning_rate": 1.587192474674385e-05, "loss": 0.019282504320144653, "step": 28300 }, { "epoch": 1.370004100436576, "grad_norm": 0.550137460231781, "learning_rate": 1.5751326579835988e-05, "loss": 0.020532405376434325, "step": 28400 }, { "epoch": 1.3748281434670397, "grad_norm": 0.15548627078533173, "learning_rate": 1.5630728412928124e-05, "loss": 0.02003218173980713, "step": 28500 }, { "epoch": 1.3796521864975035, "grad_norm": 0.15787184238433838, "learning_rate": 1.551013024602026e-05, "loss": 0.017421540021896362, "step": 28600 }, { "epoch": 1.3844762295279673, "grad_norm": 0.1659448891878128, "learning_rate": 1.53895320791124e-05, "loss": 0.019184736013412477, "step": 28700 }, { "epoch": 1.3893002725584314, "grad_norm": 0.45317932963371277, "learning_rate": 1.5268933912204535e-05, "loss": 0.018715277910232545, "step": 28800 }, { "epoch": 1.3941243155888952, "grad_norm": 0.16978032886981964, "learning_rate": 1.5148335745296671e-05, "loss": 0.019075859785079956, "step": 28900 }, { "epoch": 1.398948358619359, "grad_norm": 0.31665724515914917, "learning_rate": 1.502773757838881e-05, "loss": 0.018271996974945068, "step": 29000 }, { "epoch": 1.4037724016498228, "grad_norm": 0.3004429042339325, "learning_rate": 1.4907139411480947e-05, "loss": 0.019862807989120483, "step": 29100 }, { "epoch": 1.4085964446802866, "grad_norm": 0.20420145988464355, "learning_rate": 1.4786541244573083e-05, "loss": 0.018257253170013428, "step": 29200 }, { "epoch": 1.4134204877107504, "grad_norm": 0.122472383081913, "learning_rate": 1.466594307766522e-05, "loss": 0.025323121547698973, "step": 29300 }, { "epoch": 1.4182445307412141, "grad_norm": 0.9836609363555908, "learning_rate": 1.4545344910757358e-05, "loss": 0.019051806926727297, "step": 29400 }, { "epoch": 1.423068573771678, "grad_norm": 0.16322240233421326, "learning_rate": 1.4424746743849493e-05, "loss": 0.019138084650039675, "step": 29500 }, { "epoch": 1.4278926168021417, "grad_norm": 0.2133868932723999, "learning_rate": 1.430414857694163e-05, "loss": 0.017339247465133666, "step": 29600 }, { "epoch": 1.4327166598326058, "grad_norm": 0.2609802186489105, "learning_rate": 1.418355041003377e-05, "loss": 0.01856675386428833, "step": 29700 }, { "epoch": 1.4375407028630696, "grad_norm": 0.2504105269908905, "learning_rate": 1.4062952243125904e-05, "loss": 0.022142369747161866, "step": 29800 }, { "epoch": 1.4423647458935334, "grad_norm": 0.24993453919887543, "learning_rate": 1.3942354076218042e-05, "loss": 0.01751198887825012, "step": 29900 }, { "epoch": 1.4471887889239972, "grad_norm": 0.126504585146904, "learning_rate": 1.382175590931018e-05, "loss": 0.018583767414093018, "step": 30000 }, { "epoch": 1.4471887889239972, "eval_accuracy": 0.9919800326983255, "eval_f1": 0.8861820618929587, "eval_loss": 0.03896835818886757, "eval_precision": 0.8710864791383457, "eval_recall": 0.9018100723162863, "eval_runtime": 51.5929, "eval_samples_per_second": 290.738, "eval_steps_per_second": 8.083, "step": 30000 }, { "epoch": 1.452012831954461, "grad_norm": 0.7999847531318665, "learning_rate": 1.3701157742402316e-05, "loss": 0.02096844673156738, "step": 30100 }, { "epoch": 1.456836874984925, "grad_norm": 0.11665287613868713, "learning_rate": 1.3580559575494453e-05, "loss": 0.01827834129333496, "step": 30200 }, { "epoch": 1.4616609180153888, "grad_norm": 0.22630015015602112, "learning_rate": 1.3459961408586591e-05, "loss": 0.017943538427352905, "step": 30300 }, { "epoch": 1.4664849610458526, "grad_norm": 0.21670867502689362, "learning_rate": 1.3339363241678725e-05, "loss": 0.020002198219299317, "step": 30400 }, { "epoch": 1.4713090040763164, "grad_norm": 0.25701120495796204, "learning_rate": 1.3218765074770865e-05, "loss": 0.01862887978553772, "step": 30500 }, { "epoch": 1.4761330471067802, "grad_norm": 0.14079546928405762, "learning_rate": 1.3098166907863003e-05, "loss": 0.02005054712295532, "step": 30600 }, { "epoch": 1.480957090137244, "grad_norm": 0.31404340267181396, "learning_rate": 1.2977568740955137e-05, "loss": 0.018181434869766235, "step": 30700 }, { "epoch": 1.4857811331677078, "grad_norm": 0.1643984615802765, "learning_rate": 1.2856970574047275e-05, "loss": 0.01885037899017334, "step": 30800 }, { "epoch": 1.4906051761981716, "grad_norm": 0.1323440670967102, "learning_rate": 1.2736372407139412e-05, "loss": 0.018592065572738646, "step": 30900 }, { "epoch": 1.4954292192286356, "grad_norm": 0.2534601092338562, "learning_rate": 1.2615774240231548e-05, "loss": 0.018988220691680907, "step": 31000 }, { "epoch": 1.5002532622590994, "grad_norm": 0.2373075932264328, "learning_rate": 1.2495176073323686e-05, "loss": 0.019056109189987184, "step": 31100 }, { "epoch": 1.5050773052895632, "grad_norm": 0.151611328125, "learning_rate": 1.2374577906415822e-05, "loss": 0.018509570360183716, "step": 31200 }, { "epoch": 1.509901348320027, "grad_norm": 0.8050407767295837, "learning_rate": 1.225397973950796e-05, "loss": 0.01847294807434082, "step": 31300 }, { "epoch": 1.514725391350491, "grad_norm": 0.4117303788661957, "learning_rate": 1.2133381572600098e-05, "loss": 0.016792016029357912, "step": 31400 }, { "epoch": 1.5195494343809548, "grad_norm": 0.3043079674243927, "learning_rate": 1.2012783405692234e-05, "loss": 0.02204496622085571, "step": 31500 }, { "epoch": 1.5243734774114186, "grad_norm": 0.14158490300178528, "learning_rate": 1.1892185238784371e-05, "loss": 0.020702006816864012, "step": 31600 }, { "epoch": 1.5291975204418824, "grad_norm": 0.4225039482116699, "learning_rate": 1.1771587071876507e-05, "loss": 0.019063092470169067, "step": 31700 }, { "epoch": 1.5340215634723462, "grad_norm": 0.3363790810108185, "learning_rate": 1.1650988904968645e-05, "loss": 0.017193055152893065, "step": 31800 }, { "epoch": 1.53884560650281, "grad_norm": 0.12055296450853348, "learning_rate": 1.1530390738060783e-05, "loss": 0.019255086183547973, "step": 31900 }, { "epoch": 1.5436696495332738, "grad_norm": 0.20997734367847443, "learning_rate": 1.1409792571152919e-05, "loss": 0.020008976459503173, "step": 32000 }, { "epoch": 1.5484936925637376, "grad_norm": 0.25966885685920715, "learning_rate": 1.1289194404245055e-05, "loss": 0.018391019105911253, "step": 32100 }, { "epoch": 1.5533177355942014, "grad_norm": 1.1394667625427246, "learning_rate": 1.1168596237337194e-05, "loss": 0.02040395259857178, "step": 32200 }, { "epoch": 1.5581417786246652, "grad_norm": 0.11998942494392395, "learning_rate": 1.104799807042933e-05, "loss": 0.017555311918258668, "step": 32300 }, { "epoch": 1.5629658216551292, "grad_norm": 0.11283577978610992, "learning_rate": 1.0927399903521466e-05, "loss": 0.018316521644592285, "step": 32400 }, { "epoch": 1.567789864685593, "grad_norm": 0.8829536437988281, "learning_rate": 1.0806801736613604e-05, "loss": 0.019955469369888304, "step": 32500 }, { "epoch": 1.567789864685593, "eval_accuracy": 0.9931243466600177, "eval_f1": 0.8941826120457173, "eval_loss": 0.031499363481998444, "eval_precision": 0.8840407973253206, "eval_recall": 0.9045598233230849, "eval_runtime": 52.2852, "eval_samples_per_second": 286.888, "eval_steps_per_second": 7.975, "step": 32500 }, { "epoch": 1.5726139077160568, "grad_norm": 0.6669954061508179, "learning_rate": 1.0686203569705742e-05, "loss": 0.018743941783905028, "step": 32600 }, { "epoch": 1.5774379507465208, "grad_norm": 0.2682594358921051, "learning_rate": 1.0565605402797878e-05, "loss": 0.018420085906982422, "step": 32700 }, { "epoch": 1.5822619937769846, "grad_norm": 0.16349567472934723, "learning_rate": 1.0445007235890016e-05, "loss": 0.02064610242843628, "step": 32800 }, { "epoch": 1.5870860368074484, "grad_norm": 0.6123493313789368, "learning_rate": 1.0324409068982152e-05, "loss": 0.0181715726852417, "step": 32900 }, { "epoch": 1.5919100798379122, "grad_norm": 0.2621537744998932, "learning_rate": 1.020381090207429e-05, "loss": 0.01923044562339783, "step": 33000 }, { "epoch": 1.596734122868376, "grad_norm": 0.09542077034711838, "learning_rate": 1.0083212735166427e-05, "loss": 0.017349140644073488, "step": 33100 }, { "epoch": 1.6015581658988398, "grad_norm": 0.18252168595790863, "learning_rate": 9.962614568258563e-06, "loss": 0.019681899547576903, "step": 33200 }, { "epoch": 1.6063822089293036, "grad_norm": 0.19957713782787323, "learning_rate": 9.8420164013507e-06, "loss": 0.019357409477233887, "step": 33300 }, { "epoch": 1.6112062519597674, "grad_norm": 0.5182835459709167, "learning_rate": 9.721418234442837e-06, "loss": 0.01951758861541748, "step": 33400 }, { "epoch": 1.6160302949902312, "grad_norm": 0.4481932520866394, "learning_rate": 9.600820067534975e-06, "loss": 0.017961139678955077, "step": 33500 }, { "epoch": 1.620854338020695, "grad_norm": 0.15489070117473602, "learning_rate": 9.48022190062711e-06, "loss": 0.0193113911151886, "step": 33600 }, { "epoch": 1.625678381051159, "grad_norm": 0.2616223394870758, "learning_rate": 9.359623733719248e-06, "loss": 0.022246689796447755, "step": 33700 }, { "epoch": 1.6305024240816228, "grad_norm": 0.12462881952524185, "learning_rate": 9.239025566811384e-06, "loss": 0.01692581295967102, "step": 33800 }, { "epoch": 1.6353264671120866, "grad_norm": 0.48885273933410645, "learning_rate": 9.118427399903522e-06, "loss": 0.017899035215377807, "step": 33900 }, { "epoch": 1.6401505101425504, "grad_norm": 1.0648194551467896, "learning_rate": 8.99782923299566e-06, "loss": 0.01802402377128601, "step": 34000 }, { "epoch": 1.6449745531730144, "grad_norm": 0.2746858298778534, "learning_rate": 8.877231066087796e-06, "loss": 0.020917999744415283, "step": 34100 }, { "epoch": 1.6497985962034782, "grad_norm": 0.12474814057350159, "learning_rate": 8.756632899179932e-06, "loss": 0.015847266912460328, "step": 34200 }, { "epoch": 1.654622639233942, "grad_norm": 0.11499933153390884, "learning_rate": 8.63603473227207e-06, "loss": 0.017190442085266114, "step": 34300 }, { "epoch": 1.6594466822644058, "grad_norm": 0.1851770579814911, "learning_rate": 8.515436565364207e-06, "loss": 0.018469662666320802, "step": 34400 }, { "epoch": 1.6642707252948696, "grad_norm": 0.2300252914428711, "learning_rate": 8.394838398456343e-06, "loss": 0.01794400453567505, "step": 34500 }, { "epoch": 1.6690947683253334, "grad_norm": 0.11766080558300018, "learning_rate": 8.274240231548481e-06, "loss": 0.018363571166992186, "step": 34600 }, { "epoch": 1.6739188113557972, "grad_norm": 0.20575584471225739, "learning_rate": 8.153642064640619e-06, "loss": 0.016927268505096436, "step": 34700 }, { "epoch": 1.678742854386261, "grad_norm": 0.35185614228248596, "learning_rate": 8.033043897732755e-06, "loss": 0.01612231135368347, "step": 34800 }, { "epoch": 1.6835668974167248, "grad_norm": 0.1776873916387558, "learning_rate": 7.912445730824891e-06, "loss": 0.01680509090423584, "step": 34900 }, { "epoch": 1.6883909404471886, "grad_norm": 0.25137367844581604, "learning_rate": 7.791847563917029e-06, "loss": 0.016988718509674074, "step": 35000 }, { "epoch": 1.6883909404471886, "eval_accuracy": 0.993225440469551, "eval_f1": 0.89652071512686, "eval_loss": 0.031313586980104446, "eval_precision": 0.886667796035914, "eval_recall": 0.9065950720997705, "eval_runtime": 51.6493, "eval_samples_per_second": 290.42, "eval_steps_per_second": 8.074, "step": 35000 }, { "epoch": 1.6932149834776526, "grad_norm": 0.14376111328601837, "learning_rate": 7.671249397009166e-06, "loss": 0.019529181718826293, "step": 35100 }, { "epoch": 1.6980390265081164, "grad_norm": 0.8683088421821594, "learning_rate": 7.5506512301013025e-06, "loss": 0.019479182958602907, "step": 35200 }, { "epoch": 1.7028630695385802, "grad_norm": 0.11346932500600815, "learning_rate": 7.43005306319344e-06, "loss": 0.01975212812423706, "step": 35300 }, { "epoch": 1.7076871125690443, "grad_norm": 0.8985689282417297, "learning_rate": 7.309454896285576e-06, "loss": 0.016446800231933595, "step": 35400 }, { "epoch": 1.712511155599508, "grad_norm": 0.6181161403656006, "learning_rate": 7.188856729377713e-06, "loss": 0.016956570148468016, "step": 35500 }, { "epoch": 1.7173351986299719, "grad_norm": 0.27897560596466064, "learning_rate": 7.068258562469851e-06, "loss": 0.020130460262298585, "step": 35600 }, { "epoch": 1.7221592416604357, "grad_norm": 0.15588901937007904, "learning_rate": 6.947660395561988e-06, "loss": 0.016974217891693115, "step": 35700 }, { "epoch": 1.7269832846908995, "grad_norm": 0.5564957857131958, "learning_rate": 6.827062228654124e-06, "loss": 0.017226357460021973, "step": 35800 }, { "epoch": 1.7318073277213633, "grad_norm": 0.12989383935928345, "learning_rate": 6.706464061746262e-06, "loss": 0.015787020921707154, "step": 35900 }, { "epoch": 1.736631370751827, "grad_norm": 0.24173200130462646, "learning_rate": 6.585865894838398e-06, "loss": 0.01873793125152588, "step": 36000 }, { "epoch": 1.7414554137822909, "grad_norm": 0.21657347679138184, "learning_rate": 6.465267727930535e-06, "loss": 0.01680638313293457, "step": 36100 }, { "epoch": 1.7462794568127546, "grad_norm": 0.12039454281330109, "learning_rate": 6.344669561022673e-06, "loss": 0.017534868717193605, "step": 36200 }, { "epoch": 1.7511034998432184, "grad_norm": 0.08835107833147049, "learning_rate": 6.22407139411481e-06, "loss": 0.015722684860229492, "step": 36300 }, { "epoch": 1.7559275428736825, "grad_norm": 0.15494988858699799, "learning_rate": 6.103473227206947e-06, "loss": 0.01669602155685425, "step": 36400 }, { "epoch": 1.7607515859041463, "grad_norm": 0.2613168954849243, "learning_rate": 5.9828750602990845e-06, "loss": 0.018969074487686158, "step": 36500 }, { "epoch": 1.76557562893461, "grad_norm": 0.26860108971595764, "learning_rate": 5.8622768933912205e-06, "loss": 0.018860089778900146, "step": 36600 }, { "epoch": 1.770399671965074, "grad_norm": 0.3160684406757355, "learning_rate": 5.741678726483358e-06, "loss": 0.017936546802520752, "step": 36700 }, { "epoch": 1.7752237149955379, "grad_norm": 0.16905085742473602, "learning_rate": 5.621080559575495e-06, "loss": 0.018141812086105345, "step": 36800 }, { "epoch": 1.7800477580260017, "grad_norm": 0.23327182233333588, "learning_rate": 5.500482392667632e-06, "loss": 0.01744183659553528, "step": 36900 }, { "epoch": 1.7848718010564655, "grad_norm": 0.15902255475521088, "learning_rate": 5.379884225759769e-06, "loss": 0.017776939868927002, "step": 37000 }, { "epoch": 1.7896958440869293, "grad_norm": 0.240287646651268, "learning_rate": 5.259286058851906e-06, "loss": 0.01649364709854126, "step": 37100 }, { "epoch": 1.794519887117393, "grad_norm": 0.13150164484977722, "learning_rate": 5.138687891944043e-06, "loss": 0.01930005669593811, "step": 37200 }, { "epoch": 1.7993439301478569, "grad_norm": 0.25998786091804504, "learning_rate": 5.0180897250361795e-06, "loss": 0.016344897747039795, "step": 37300 }, { "epoch": 1.8041679731783207, "grad_norm": 0.12845446169376373, "learning_rate": 4.897491558128316e-06, "loss": 0.019464727640151978, "step": 37400 }, { "epoch": 1.8089920162087845, "grad_norm": 0.21504537761211395, "learning_rate": 4.776893391220453e-06, "loss": 0.017011468410491944, "step": 37500 }, { "epoch": 1.8089920162087845, "eval_accuracy": 0.9933099684867659, "eval_f1": 0.8917599033971296, "eval_loss": 0.030537979677319527, "eval_precision": 0.8804042791129492, "eval_recall": 0.9034122894383579, "eval_runtime": 51.6005, "eval_samples_per_second": 290.695, "eval_steps_per_second": 8.081, "step": 37500 }, { "epoch": 1.8138160592392483, "grad_norm": 0.11677803844213486, "learning_rate": 4.656295224312591e-06, "loss": 0.016575688123703004, "step": 37600 }, { "epoch": 1.8186401022697123, "grad_norm": 0.280719131231308, "learning_rate": 4.535697057404727e-06, "loss": 0.01687091827392578, "step": 37700 }, { "epoch": 1.823464145300176, "grad_norm": 0.2764016389846802, "learning_rate": 4.415098890496865e-06, "loss": 0.016562118530273437, "step": 37800 }, { "epoch": 1.8282881883306399, "grad_norm": 0.632255494594574, "learning_rate": 4.294500723589002e-06, "loss": 0.01787501573562622, "step": 37900 }, { "epoch": 1.833112231361104, "grad_norm": 0.09340863674879074, "learning_rate": 4.1739025566811385e-06, "loss": 0.01698790192604065, "step": 38000 }, { "epoch": 1.8379362743915677, "grad_norm": 0.1588761806488037, "learning_rate": 4.053304389773275e-06, "loss": 0.01741109848022461, "step": 38100 }, { "epoch": 1.8427603174220315, "grad_norm": 0.10772903263568878, "learning_rate": 3.932706222865413e-06, "loss": 0.016634883880615233, "step": 38200 }, { "epoch": 1.8475843604524953, "grad_norm": 0.22126013040542603, "learning_rate": 3.8121080559575496e-06, "loss": 0.01804221987724304, "step": 38300 }, { "epoch": 1.852408403482959, "grad_norm": 0.3740140497684479, "learning_rate": 3.6915098890496864e-06, "loss": 0.01612048625946045, "step": 38400 }, { "epoch": 1.857232446513423, "grad_norm": 0.13352862000465393, "learning_rate": 3.5709117221418237e-06, "loss": 0.01714093804359436, "step": 38500 }, { "epoch": 1.8620564895438867, "grad_norm": 0.27527081966400146, "learning_rate": 3.45031355523396e-06, "loss": 0.01763258218765259, "step": 38600 }, { "epoch": 1.8668805325743505, "grad_norm": 0.28769898414611816, "learning_rate": 3.3297153883260975e-06, "loss": 0.02112499475479126, "step": 38700 }, { "epoch": 1.8717045756048143, "grad_norm": 0.15282955765724182, "learning_rate": 3.209117221418235e-06, "loss": 0.01631925821304321, "step": 38800 }, { "epoch": 1.876528618635278, "grad_norm": 0.12284864485263824, "learning_rate": 3.0885190545103717e-06, "loss": 0.01694957971572876, "step": 38900 }, { "epoch": 1.881352661665742, "grad_norm": 0.1236443966627121, "learning_rate": 2.9679208876025086e-06, "loss": 0.017817366123199462, "step": 39000 }, { "epoch": 1.886176704696206, "grad_norm": 0.11388445645570755, "learning_rate": 2.8473227206946454e-06, "loss": 0.01580065131187439, "step": 39100 }, { "epoch": 1.8910007477266697, "grad_norm": 0.2738426625728607, "learning_rate": 2.7267245537867828e-06, "loss": 0.01646868109703064, "step": 39200 }, { "epoch": 1.8958247907571335, "grad_norm": 0.6833071112632751, "learning_rate": 2.6061263868789196e-06, "loss": 0.017300838232040407, "step": 39300 }, { "epoch": 1.9006488337875975, "grad_norm": 0.12390507757663727, "learning_rate": 2.4855282199710565e-06, "loss": 0.017689213752746583, "step": 39400 }, { "epoch": 1.9054728768180613, "grad_norm": 0.5947756171226501, "learning_rate": 2.364930053063194e-06, "loss": 0.016457540988922117, "step": 39500 }, { "epoch": 1.9102969198485251, "grad_norm": 0.11591579020023346, "learning_rate": 2.2443318861553307e-06, "loss": 0.01735694646835327, "step": 39600 }, { "epoch": 1.915120962878989, "grad_norm": 0.14687402546405792, "learning_rate": 2.1237337192474676e-06, "loss": 0.01619683623313904, "step": 39700 }, { "epoch": 1.9199450059094527, "grad_norm": 0.14690209925174713, "learning_rate": 2.003135552339605e-06, "loss": 0.016990303993225098, "step": 39800 }, { "epoch": 1.9247690489399165, "grad_norm": 0.5243352055549622, "learning_rate": 1.8825373854317415e-06, "loss": 0.015626425743103026, "step": 39900 }, { "epoch": 1.9295930919703803, "grad_norm": 0.9058519601821899, "learning_rate": 1.7619392185238784e-06, "loss": 0.017552192211151122, "step": 40000 }, { "epoch": 1.9295930919703803, "eval_accuracy": 0.9934548129785762, "eval_f1": 0.8960502077710661, "eval_loss": 0.030458878725767136, "eval_precision": 0.8865553342092993, "eval_recall": 0.9057506603732732, "eval_runtime": 52.2526, "eval_samples_per_second": 287.067, "eval_steps_per_second": 7.98, "step": 40000 }, { "epoch": 1.9344171350008441, "grad_norm": 0.2622898817062378, "learning_rate": 1.6413410516160157e-06, "loss": 0.016480473279953004, "step": 40100 }, { "epoch": 1.939241178031308, "grad_norm": 0.1672438383102417, "learning_rate": 1.5207428847081526e-06, "loss": 0.018291155099868773, "step": 40200 }, { "epoch": 1.9440652210617717, "grad_norm": 0.39425408840179443, "learning_rate": 1.4001447178002895e-06, "loss": 0.01858603596687317, "step": 40300 }, { "epoch": 1.9488892640922357, "grad_norm": 0.2491266131401062, "learning_rate": 1.2795465508924266e-06, "loss": 0.016655097007751463, "step": 40400 }, { "epoch": 1.9537133071226995, "grad_norm": 0.2883985638618469, "learning_rate": 1.1589483839845637e-06, "loss": 0.018044712543487548, "step": 40500 }, { "epoch": 1.9585373501531633, "grad_norm": 0.1521671712398529, "learning_rate": 1.0383502170767006e-06, "loss": 0.019449379444122315, "step": 40600 }, { "epoch": 1.9633613931836273, "grad_norm": 0.1477108597755432, "learning_rate": 9.177520501688375e-07, "loss": 0.016478629112243653, "step": 40700 }, { "epoch": 1.9681854362140911, "grad_norm": 0.18148507177829742, "learning_rate": 7.971538832609744e-07, "loss": 0.016618763208389283, "step": 40800 }, { "epoch": 1.973009479244555, "grad_norm": 0.17074325680732727, "learning_rate": 6.765557163531114e-07, "loss": 0.01620419979095459, "step": 40900 }, { "epoch": 1.9778335222750187, "grad_norm": 0.23663687705993652, "learning_rate": 5.559575494452484e-07, "loss": 0.01673411011695862, "step": 41000 }, { "epoch": 1.9826575653054825, "grad_norm": 0.1981934756040573, "learning_rate": 4.353593825373855e-07, "loss": 0.015512742996215821, "step": 41100 }, { "epoch": 1.9874816083359463, "grad_norm": 0.19588832557201385, "learning_rate": 3.1476121562952246e-07, "loss": 0.017852275371551513, "step": 41200 }, { "epoch": 1.9923056513664101, "grad_norm": 0.11776227504014969, "learning_rate": 1.9416304872165945e-07, "loss": 0.017226353883743287, "step": 41300 }, { "epoch": 1.997129694396874, "grad_norm": 0.20195287466049194, "learning_rate": 7.356488181379644e-08, "loss": 0.016260911226272583, "step": 41400 }, { "epoch": 2.0, "step": 41460, "total_flos": 1.525095272976519e+18, "train_loss": 0.03464991324659827, "train_runtime": 20225.5982, "train_samples_per_second": 295.171, "train_steps_per_second": 2.05 } ], "logging_steps": 100, "max_steps": 41460, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 2500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.525095272976519e+18, "train_batch_size": 72, "trial_name": null, "trial_params": null }