| { |
| "best_global_step": 35000, |
| "best_metric": 0.89652071512686, |
| "best_model_checkpoint": "./lang-ner-xlmr/checkpoint-35000", |
| "epoch": 2.0, |
| "eval_steps": 2500, |
| "global_step": 41460, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.004824043030463832, |
| "grad_norm": 2.7279021739959717, |
| "learning_rate": 4.988060781476121e-05, |
| "loss": 3.1182174682617188, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.009648086060927664, |
| "grad_norm": 0.8386039137840271, |
| "learning_rate": 4.9760009647853356e-05, |
| "loss": 0.2861482620239258, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.014472129091391495, |
| "grad_norm": 1.9196710586547852, |
| "learning_rate": 4.963941148094549e-05, |
| "loss": 0.12070045471191407, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.019296172121855328, |
| "grad_norm": 6.516495227813721, |
| "learning_rate": 4.951881331403763e-05, |
| "loss": 0.09790064811706543, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.024120215152319158, |
| "grad_norm": 1.2043635845184326, |
| "learning_rate": 4.939821514712977e-05, |
| "loss": 0.09528629302978515, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.02894425818278299, |
| "grad_norm": 0.529084324836731, |
| "learning_rate": 4.92776169802219e-05, |
| "loss": 0.06706910610198974, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.033768301213246824, |
| "grad_norm": 1.108811616897583, |
| "learning_rate": 4.9157018813314036e-05, |
| "loss": 0.07001821517944336, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.038592344243710656, |
| "grad_norm": 1.234101414680481, |
| "learning_rate": 4.903642064640618e-05, |
| "loss": 0.07005959987640381, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.04341638727417448, |
| "grad_norm": 0.6574804782867432, |
| "learning_rate": 4.8915822479498315e-05, |
| "loss": 0.06500310897827148, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.048240430304638315, |
| "grad_norm": 0.6550615429878235, |
| "learning_rate": 4.879522431259045e-05, |
| "loss": 0.05578082084655762, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.05306447333510215, |
| "grad_norm": 0.9701142907142639, |
| "learning_rate": 4.867462614568259e-05, |
| "loss": 0.05476199150085449, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.05788851636556598, |
| "grad_norm": 0.3067728579044342, |
| "learning_rate": 4.855402797877472e-05, |
| "loss": 0.04951910972595215, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.06271255939602981, |
| "grad_norm": 0.4835965037345886, |
| "learning_rate": 4.843342981186686e-05, |
| "loss": 0.05270035743713379, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.06753660242649365, |
| "grad_norm": 0.9019619822502136, |
| "learning_rate": 4.8312831644959e-05, |
| "loss": 0.05690920352935791, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.07236064545695747, |
| "grad_norm": 1.7734606266021729, |
| "learning_rate": 4.819223347805114e-05, |
| "loss": 0.05044642925262451, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.07718468848742131, |
| "grad_norm": 1.1853278875350952, |
| "learning_rate": 4.8071635311143274e-05, |
| "loss": 0.056004085540771485, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.08200873151788514, |
| "grad_norm": 0.5660464763641357, |
| "learning_rate": 4.795103714423541e-05, |
| "loss": 0.0493979024887085, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.08683277454834896, |
| "grad_norm": 1.2197043895721436, |
| "learning_rate": 4.7830438977327546e-05, |
| "loss": 0.04784996509552002, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.0916568175788128, |
| "grad_norm": 1.1459959745407104, |
| "learning_rate": 4.770984081041968e-05, |
| "loss": 0.049839210510253903, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.09648086060927663, |
| "grad_norm": 0.3328426778316498, |
| "learning_rate": 4.7589242643511825e-05, |
| "loss": 0.04329806327819824, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.10130490363974047, |
| "grad_norm": 1.718967080116272, |
| "learning_rate": 4.746864447660396e-05, |
| "loss": 0.047143783569335934, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.1061289466702043, |
| "grad_norm": 0.7338983416557312, |
| "learning_rate": 4.734804630969609e-05, |
| "loss": 0.04436909198760986, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.11095298970066814, |
| "grad_norm": 0.5654782652854919, |
| "learning_rate": 4.722744814278823e-05, |
| "loss": 0.04844902515411377, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.11577703273113196, |
| "grad_norm": 0.4302056133747101, |
| "learning_rate": 4.710684997588037e-05, |
| "loss": 0.04093062400817871, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.12060107576159579, |
| "grad_norm": 0.554361879825592, |
| "learning_rate": 4.6986251808972505e-05, |
| "loss": 0.040434646606445315, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.12060107576159579, |
| "eval_accuracy": 0.9867528880759852, |
| "eval_f1": 0.8265977004331162, |
| "eval_loss": 0.06485302746295929, |
| "eval_precision": 0.7943624857764557, |
| "eval_recall": 0.8615597800199195, |
| "eval_runtime": 67.6547, |
| "eval_samples_per_second": 221.714, |
| "eval_steps_per_second": 6.164, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.12542511879205961, |
| "grad_norm": 0.8634827136993408, |
| "learning_rate": 4.686565364206465e-05, |
| "loss": 0.04507491588592529, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.13024916182252347, |
| "grad_norm": 0.6373780965805054, |
| "learning_rate": 4.674505547515678e-05, |
| "loss": 0.04472970962524414, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.1350732048529873, |
| "grad_norm": 0.35323283076286316, |
| "learning_rate": 4.662445730824891e-05, |
| "loss": 0.03842374086380005, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.13989724788345112, |
| "grad_norm": 0.8187289834022522, |
| "learning_rate": 4.6503859141341056e-05, |
| "loss": 0.04050546646118164, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.14472129091391495, |
| "grad_norm": 0.2180730253458023, |
| "learning_rate": 4.638326097443319e-05, |
| "loss": 0.04304762363433838, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.14954533394437877, |
| "grad_norm": 0.6171498894691467, |
| "learning_rate": 4.626266280752533e-05, |
| "loss": 0.03792398929595947, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.15436937697484263, |
| "grad_norm": 1.4763296842575073, |
| "learning_rate": 4.6142064640617464e-05, |
| "loss": 0.04089127063751221, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.15919342000530645, |
| "grad_norm": 0.36483830213546753, |
| "learning_rate": 4.60214664737096e-05, |
| "loss": 0.04075708866119385, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.16401746303577028, |
| "grad_norm": 0.32734522223472595, |
| "learning_rate": 4.5900868306801736e-05, |
| "loss": 0.03913374423980713, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.1688415060662341, |
| "grad_norm": 0.27289167046546936, |
| "learning_rate": 4.578027013989388e-05, |
| "loss": 0.039629595279693605, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.17366554909669793, |
| "grad_norm": 1.4992765188217163, |
| "learning_rate": 4.5659671972986015e-05, |
| "loss": 0.03849426031112671, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.17848959212716178, |
| "grad_norm": 0.7519832849502563, |
| "learning_rate": 4.553907380607815e-05, |
| "loss": 0.03754171133041382, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.1833136351576256, |
| "grad_norm": 1.4542765617370605, |
| "learning_rate": 4.541847563917029e-05, |
| "loss": 0.038514294624328614, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.18813767818808944, |
| "grad_norm": 1.8106330633163452, |
| "learning_rate": 4.529787747226242e-05, |
| "loss": 0.03961650609970093, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.19296172121855326, |
| "grad_norm": 0.3401031196117401, |
| "learning_rate": 4.517727930535456e-05, |
| "loss": 0.0379714560508728, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.1977857642490171, |
| "grad_norm": 3.1147701740264893, |
| "learning_rate": 4.50566811384467e-05, |
| "loss": 0.03555563688278198, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.20260980727948094, |
| "grad_norm": 0.3068256676197052, |
| "learning_rate": 4.493608297153884e-05, |
| "loss": 0.040891532897949216, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.20743385030994477, |
| "grad_norm": 0.22388258576393127, |
| "learning_rate": 4.481548480463097e-05, |
| "loss": 0.042806510925292966, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.2122578933404086, |
| "grad_norm": 1.0851870775222778, |
| "learning_rate": 4.469488663772311e-05, |
| "loss": 0.03217351198196411, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.21708193637087242, |
| "grad_norm": 0.14333230257034302, |
| "learning_rate": 4.4574288470815246e-05, |
| "loss": 0.036145191192626956, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.22190597940133627, |
| "grad_norm": 0.5196163654327393, |
| "learning_rate": 4.445369030390738e-05, |
| "loss": 0.04708011627197266, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.2267300224318001, |
| "grad_norm": 0.18328827619552612, |
| "learning_rate": 4.4333092136999525e-05, |
| "loss": 0.040124249458312986, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.23155406546226392, |
| "grad_norm": 0.31492918729782104, |
| "learning_rate": 4.4212493970091654e-05, |
| "loss": 0.041497902870178224, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.23637810849272775, |
| "grad_norm": 0.4818204939365387, |
| "learning_rate": 4.409189580318379e-05, |
| "loss": 0.04126156330108643, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.24120215152319158, |
| "grad_norm": 0.20825903117656708, |
| "learning_rate": 4.397129763627593e-05, |
| "loss": 0.03939923524856567, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.24120215152319158, |
| "eval_accuracy": 0.9893252582883119, |
| "eval_f1": 0.84304320903433, |
| "eval_loss": 0.05377783998847008, |
| "eval_precision": 0.8180670129341073, |
| "eval_recall": 0.8695925172130082, |
| "eval_runtime": 51.5792, |
| "eval_samples_per_second": 290.815, |
| "eval_steps_per_second": 8.085, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.24602619455365543, |
| "grad_norm": 0.20790189504623413, |
| "learning_rate": 4.385069946936807e-05, |
| "loss": 0.03445266008377075, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.25085023758411923, |
| "grad_norm": 0.2234453707933426, |
| "learning_rate": 4.3730101302460205e-05, |
| "loss": 0.03056433916091919, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.2556742806145831, |
| "grad_norm": 0.5091524124145508, |
| "learning_rate": 4.360950313555234e-05, |
| "loss": 0.03808696031570435, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.26049832364504694, |
| "grad_norm": 0.7598561644554138, |
| "learning_rate": 4.348890496864448e-05, |
| "loss": 0.03501533508300781, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.26532236667551073, |
| "grad_norm": 0.2233390510082245, |
| "learning_rate": 4.336830680173661e-05, |
| "loss": 0.03836148738861084, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.2701464097059746, |
| "grad_norm": 0.4892669916152954, |
| "learning_rate": 4.3247708634828756e-05, |
| "loss": 0.03670140504837036, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.2749704527364384, |
| "grad_norm": 0.3863944411277771, |
| "learning_rate": 4.312711046792089e-05, |
| "loss": 0.03313957452774048, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.27979449576690224, |
| "grad_norm": 0.457960844039917, |
| "learning_rate": 4.300651230101303e-05, |
| "loss": 0.03517037630081177, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.2846185387973661, |
| "grad_norm": 0.3622528314590454, |
| "learning_rate": 4.2885914134105164e-05, |
| "loss": 0.0420029878616333, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.2894425818278299, |
| "grad_norm": 0.9826716780662537, |
| "learning_rate": 4.27653159671973e-05, |
| "loss": 0.03928417205810547, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.29426662485829375, |
| "grad_norm": 0.3126944303512573, |
| "learning_rate": 4.2644717800289436e-05, |
| "loss": 0.03383539915084839, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.29909066788875754, |
| "grad_norm": 1.1283291578292847, |
| "learning_rate": 4.252411963338158e-05, |
| "loss": 0.034748728275299075, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.3039147109192214, |
| "grad_norm": 0.2550179958343506, |
| "learning_rate": 4.2403521466473715e-05, |
| "loss": 0.03332434177398682, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.30873875394968525, |
| "grad_norm": 0.6041121482849121, |
| "learning_rate": 4.2282923299565844e-05, |
| "loss": 0.03864547491073608, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.31356279698014905, |
| "grad_norm": 0.3217807412147522, |
| "learning_rate": 4.216232513265799e-05, |
| "loss": 0.03896953821182251, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.3183868400106129, |
| "grad_norm": 0.22055508196353912, |
| "learning_rate": 4.204172696575012e-05, |
| "loss": 0.03473591566085815, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.3232108830410767, |
| "grad_norm": 0.9059926271438599, |
| "learning_rate": 4.192112879884226e-05, |
| "loss": 0.0373721718788147, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.32803492607154056, |
| "grad_norm": 0.2131674885749817, |
| "learning_rate": 4.1800530631934395e-05, |
| "loss": 0.031450369358062745, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.3328589691020044, |
| "grad_norm": 0.1497948318719864, |
| "learning_rate": 4.167993246502653e-05, |
| "loss": 0.03357296228408813, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.3376830121324682, |
| "grad_norm": 0.3575536012649536, |
| "learning_rate": 4.155933429811867e-05, |
| "loss": 0.03312770128250122, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.34250705516293206, |
| "grad_norm": 0.27300477027893066, |
| "learning_rate": 4.143873613121081e-05, |
| "loss": 0.036876497268676756, |
| "step": 7100 |
| }, |
| { |
| "epoch": 0.34733109819339586, |
| "grad_norm": 0.36355310678482056, |
| "learning_rate": 4.1318137964302946e-05, |
| "loss": 0.034904708862304686, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.3521551412238597, |
| "grad_norm": 0.444167822599411, |
| "learning_rate": 4.119753979739508e-05, |
| "loss": 0.030987234115600587, |
| "step": 7300 |
| }, |
| { |
| "epoch": 0.35697918425432357, |
| "grad_norm": 0.22657343745231628, |
| "learning_rate": 4.107694163048722e-05, |
| "loss": 0.03171445846557617, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.36180322728478737, |
| "grad_norm": 0.3570277988910675, |
| "learning_rate": 4.0956343463579354e-05, |
| "loss": 0.03453096866607666, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.36180322728478737, |
| "eval_accuracy": 0.9905638698370478, |
| "eval_f1": 0.8562651098466053, |
| "eval_loss": 0.045637115836143494, |
| "eval_precision": 0.8355275356944187, |
| "eval_recall": 0.878058286060711, |
| "eval_runtime": 52.1242, |
| "eval_samples_per_second": 287.774, |
| "eval_steps_per_second": 8.0, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.3666272703152512, |
| "grad_norm": 0.4932907819747925, |
| "learning_rate": 4.083574529667149e-05, |
| "loss": 0.030991692543029786, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.371451313345715, |
| "grad_norm": 0.6475630402565002, |
| "learning_rate": 4.071514712976363e-05, |
| "loss": 0.03562487840652466, |
| "step": 7700 |
| }, |
| { |
| "epoch": 0.37627535637617887, |
| "grad_norm": 0.2954416871070862, |
| "learning_rate": 4.059454896285577e-05, |
| "loss": 0.03147151708602905, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.3810993994066427, |
| "grad_norm": 0.6999800205230713, |
| "learning_rate": 4.0473950795947905e-05, |
| "loss": 0.03395595073699951, |
| "step": 7900 |
| }, |
| { |
| "epoch": 0.3859234424371065, |
| "grad_norm": 1.9642822742462158, |
| "learning_rate": 4.035335262904004e-05, |
| "loss": 0.034128406047821046, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.3907474854675704, |
| "grad_norm": 0.8058770298957825, |
| "learning_rate": 4.023275446213218e-05, |
| "loss": 0.02912992238998413, |
| "step": 8100 |
| }, |
| { |
| "epoch": 0.3955715284980342, |
| "grad_norm": 0.5513653755187988, |
| "learning_rate": 4.011215629522431e-05, |
| "loss": 0.033489227294921875, |
| "step": 8200 |
| }, |
| { |
| "epoch": 0.40039557152849803, |
| "grad_norm": 0.5218818783760071, |
| "learning_rate": 3.9991558128316456e-05, |
| "loss": 0.03587050437927246, |
| "step": 8300 |
| }, |
| { |
| "epoch": 0.4052196145589619, |
| "grad_norm": 0.231138676404953, |
| "learning_rate": 3.987095996140859e-05, |
| "loss": 0.02900606632232666, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.4100436575894257, |
| "grad_norm": 0.941376268863678, |
| "learning_rate": 3.975036179450072e-05, |
| "loss": 0.032960660457611084, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.41486770061988953, |
| "grad_norm": 0.3743444085121155, |
| "learning_rate": 3.9629763627592864e-05, |
| "loss": 0.034282689094543455, |
| "step": 8600 |
| }, |
| { |
| "epoch": 0.4196917436503534, |
| "grad_norm": 0.20511318743228912, |
| "learning_rate": 3.9509165460685e-05, |
| "loss": 0.03301868677139282, |
| "step": 8700 |
| }, |
| { |
| "epoch": 0.4245157866808172, |
| "grad_norm": 0.5028975605964661, |
| "learning_rate": 3.9388567293777136e-05, |
| "loss": 0.030978357791900633, |
| "step": 8800 |
| }, |
| { |
| "epoch": 0.42933982971128104, |
| "grad_norm": 0.5793830752372742, |
| "learning_rate": 3.926796912686927e-05, |
| "loss": 0.03107161045074463, |
| "step": 8900 |
| }, |
| { |
| "epoch": 0.43416387274174484, |
| "grad_norm": 0.5201826095581055, |
| "learning_rate": 3.914737095996141e-05, |
| "loss": 0.03163294792175293, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.4389879157722087, |
| "grad_norm": 0.40996044874191284, |
| "learning_rate": 3.9026772793053544e-05, |
| "loss": 0.03236435651779175, |
| "step": 9100 |
| }, |
| { |
| "epoch": 0.44381195880267255, |
| "grad_norm": 0.32939156889915466, |
| "learning_rate": 3.890617462614569e-05, |
| "loss": 0.03099562406539917, |
| "step": 9200 |
| }, |
| { |
| "epoch": 0.44863600183313634, |
| "grad_norm": 0.5146192312240601, |
| "learning_rate": 3.878557645923782e-05, |
| "loss": 0.032382268905639645, |
| "step": 9300 |
| }, |
| { |
| "epoch": 0.4534600448636002, |
| "grad_norm": 0.6972792148590088, |
| "learning_rate": 3.866497829232996e-05, |
| "loss": 0.0357794189453125, |
| "step": 9400 |
| }, |
| { |
| "epoch": 0.458284087894064, |
| "grad_norm": 0.4266366958618164, |
| "learning_rate": 3.8544380125422095e-05, |
| "loss": 0.02773923635482788, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.46310813092452785, |
| "grad_norm": 0.18275046348571777, |
| "learning_rate": 3.842378195851423e-05, |
| "loss": 0.029792981147766115, |
| "step": 9600 |
| }, |
| { |
| "epoch": 0.4679321739549917, |
| "grad_norm": 0.19641897082328796, |
| "learning_rate": 3.830318379160637e-05, |
| "loss": 0.0328049373626709, |
| "step": 9700 |
| }, |
| { |
| "epoch": 0.4727562169854555, |
| "grad_norm": 0.5207920670509338, |
| "learning_rate": 3.818258562469851e-05, |
| "loss": 0.028371200561523438, |
| "step": 9800 |
| }, |
| { |
| "epoch": 0.47758026001591936, |
| "grad_norm": 1.656972050666809, |
| "learning_rate": 3.8061987457790646e-05, |
| "loss": 0.029215424060821532, |
| "step": 9900 |
| }, |
| { |
| "epoch": 0.48240430304638315, |
| "grad_norm": 0.6870591044425964, |
| "learning_rate": 3.794138929088278e-05, |
| "loss": 0.02802006721496582, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.48240430304638315, |
| "eval_accuracy": 0.989747898374386, |
| "eval_f1": 0.8614309687905686, |
| "eval_loss": 0.04934614896774292, |
| "eval_precision": 0.8403657255822574, |
| "eval_recall": 0.8835794396570389, |
| "eval_runtime": 51.7386, |
| "eval_samples_per_second": 289.919, |
| "eval_steps_per_second": 8.06, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.487228346076847, |
| "grad_norm": 0.1405647099018097, |
| "learning_rate": 3.782079112397492e-05, |
| "loss": 0.035042920112609864, |
| "step": 10100 |
| }, |
| { |
| "epoch": 0.49205238910731086, |
| "grad_norm": 0.4331558346748352, |
| "learning_rate": 3.7700192957067054e-05, |
| "loss": 0.032286217212677, |
| "step": 10200 |
| }, |
| { |
| "epoch": 0.49687643213777466, |
| "grad_norm": 0.3009164035320282, |
| "learning_rate": 3.757959479015919e-05, |
| "loss": 0.028717076778411864, |
| "step": 10300 |
| }, |
| { |
| "epoch": 0.5017004751682385, |
| "grad_norm": 0.3064032196998596, |
| "learning_rate": 3.745899662325133e-05, |
| "loss": 0.029738368988037108, |
| "step": 10400 |
| }, |
| { |
| "epoch": 0.5065245181987024, |
| "grad_norm": 0.34859976172447205, |
| "learning_rate": 3.733839845634346e-05, |
| "loss": 0.030203399658203126, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.5113485612291662, |
| "grad_norm": 0.4682078957557678, |
| "learning_rate": 3.72178002894356e-05, |
| "loss": 0.033402538299560545, |
| "step": 10600 |
| }, |
| { |
| "epoch": 0.51617260425963, |
| "grad_norm": 0.43761882185935974, |
| "learning_rate": 3.709720212252774e-05, |
| "loss": 0.029749608039855956, |
| "step": 10700 |
| }, |
| { |
| "epoch": 0.5209966472900939, |
| "grad_norm": 1.6429039239883423, |
| "learning_rate": 3.697660395561988e-05, |
| "loss": 0.03141383647918701, |
| "step": 10800 |
| }, |
| { |
| "epoch": 0.5258206903205577, |
| "grad_norm": 0.8676751852035522, |
| "learning_rate": 3.685600578871201e-05, |
| "loss": 0.028559036254882812, |
| "step": 10900 |
| }, |
| { |
| "epoch": 0.5306447333510215, |
| "grad_norm": 0.31465840339660645, |
| "learning_rate": 3.673540762180415e-05, |
| "loss": 0.033083460330963134, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.5354687763814853, |
| "grad_norm": 0.36446070671081543, |
| "learning_rate": 3.6614809454896285e-05, |
| "loss": 0.031009881496429442, |
| "step": 11100 |
| }, |
| { |
| "epoch": 0.5402928194119492, |
| "grad_norm": 1.3212252855300903, |
| "learning_rate": 3.649421128798842e-05, |
| "loss": 0.029797291755676268, |
| "step": 11200 |
| }, |
| { |
| "epoch": 0.545116862442413, |
| "grad_norm": 0.5250455141067505, |
| "learning_rate": 3.6373613121080564e-05, |
| "loss": 0.02991886615753174, |
| "step": 11300 |
| }, |
| { |
| "epoch": 0.5499409054728768, |
| "grad_norm": 1.0622237920761108, |
| "learning_rate": 3.62530149541727e-05, |
| "loss": 0.03123067855834961, |
| "step": 11400 |
| }, |
| { |
| "epoch": 0.5547649485033407, |
| "grad_norm": 0.3193683624267578, |
| "learning_rate": 3.6132416787264836e-05, |
| "loss": 0.026964287757873535, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.5595889915338045, |
| "grad_norm": 0.20829251408576965, |
| "learning_rate": 3.601181862035697e-05, |
| "loss": 0.023649635314941405, |
| "step": 11600 |
| }, |
| { |
| "epoch": 0.5644130345642683, |
| "grad_norm": 0.6939885020256042, |
| "learning_rate": 3.589122045344911e-05, |
| "loss": 0.03190106630325317, |
| "step": 11700 |
| }, |
| { |
| "epoch": 0.5692370775947322, |
| "grad_norm": 0.28773602843284607, |
| "learning_rate": 3.5770622286541244e-05, |
| "loss": 0.030272600650787355, |
| "step": 11800 |
| }, |
| { |
| "epoch": 0.574061120625196, |
| "grad_norm": 0.19230112433433533, |
| "learning_rate": 3.565002411963339e-05, |
| "loss": 0.02647350788116455, |
| "step": 11900 |
| }, |
| { |
| "epoch": 0.5788851636556598, |
| "grad_norm": 0.27152901887893677, |
| "learning_rate": 3.552942595272552e-05, |
| "loss": 0.025423860549926756, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.5837092066861237, |
| "grad_norm": 1.2988700866699219, |
| "learning_rate": 3.540882778581766e-05, |
| "loss": 0.03884052515029907, |
| "step": 12100 |
| }, |
| { |
| "epoch": 0.5885332497165875, |
| "grad_norm": 0.35957372188568115, |
| "learning_rate": 3.5288229618909795e-05, |
| "loss": 0.030858025550842286, |
| "step": 12200 |
| }, |
| { |
| "epoch": 0.5933572927470513, |
| "grad_norm": 1.2661397457122803, |
| "learning_rate": 3.516763145200193e-05, |
| "loss": 0.0320624303817749, |
| "step": 12300 |
| }, |
| { |
| "epoch": 0.5981813357775151, |
| "grad_norm": 0.9611783623695374, |
| "learning_rate": 3.504703328509407e-05, |
| "loss": 0.02826552391052246, |
| "step": 12400 |
| }, |
| { |
| "epoch": 0.603005378807979, |
| "grad_norm": 0.500732421875, |
| "learning_rate": 3.492643511818621e-05, |
| "loss": 0.02862701892852783, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.603005378807979, |
| "eval_accuracy": 0.9889094144247274, |
| "eval_f1": 0.8610719994918431, |
| "eval_loss": 0.051548413932323456, |
| "eval_precision": 0.8424585171835187, |
| "eval_recall": 0.8805265664920106, |
| "eval_runtime": 52.2114, |
| "eval_samples_per_second": 287.293, |
| "eval_steps_per_second": 7.987, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.6078294218384428, |
| "grad_norm": 0.2507345378398895, |
| "learning_rate": 3.480583695127834e-05, |
| "loss": 0.030443539619445802, |
| "step": 12600 |
| }, |
| { |
| "epoch": 0.6126534648689066, |
| "grad_norm": 1.4359475374221802, |
| "learning_rate": 3.4685238784370475e-05, |
| "loss": 0.027371883392333984, |
| "step": 12700 |
| }, |
| { |
| "epoch": 0.6174775078993705, |
| "grad_norm": 0.9925899505615234, |
| "learning_rate": 3.456464061746262e-05, |
| "loss": 0.03037006616592407, |
| "step": 12800 |
| }, |
| { |
| "epoch": 0.6223015509298343, |
| "grad_norm": 0.3496329188346863, |
| "learning_rate": 3.4444042450554754e-05, |
| "loss": 0.027849619388580323, |
| "step": 12900 |
| }, |
| { |
| "epoch": 0.6271255939602981, |
| "grad_norm": 0.5233566761016846, |
| "learning_rate": 3.432344428364689e-05, |
| "loss": 0.02637479543685913, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.631949636990762, |
| "grad_norm": 0.2668863832950592, |
| "learning_rate": 3.4202846116739026e-05, |
| "loss": 0.02920179605484009, |
| "step": 13100 |
| }, |
| { |
| "epoch": 0.6367736800212258, |
| "grad_norm": 0.20490218698978424, |
| "learning_rate": 3.408224794983116e-05, |
| "loss": 0.02677285432815552, |
| "step": 13200 |
| }, |
| { |
| "epoch": 0.6415977230516896, |
| "grad_norm": 0.3680262565612793, |
| "learning_rate": 3.39616497829233e-05, |
| "loss": 0.029742326736450195, |
| "step": 13300 |
| }, |
| { |
| "epoch": 0.6464217660821534, |
| "grad_norm": 0.4216366112232208, |
| "learning_rate": 3.384105161601544e-05, |
| "loss": 0.027399771213531494, |
| "step": 13400 |
| }, |
| { |
| "epoch": 0.6512458091126173, |
| "grad_norm": 0.13440310955047607, |
| "learning_rate": 3.372045344910758e-05, |
| "loss": 0.030674426555633544, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.6560698521430811, |
| "grad_norm": 0.14363612234592438, |
| "learning_rate": 3.359985528219971e-05, |
| "loss": 0.030937159061431886, |
| "step": 13600 |
| }, |
| { |
| "epoch": 0.6608938951735449, |
| "grad_norm": 0.7481242418289185, |
| "learning_rate": 3.347925711529185e-05, |
| "loss": 0.03099170923233032, |
| "step": 13700 |
| }, |
| { |
| "epoch": 0.6657179382040088, |
| "grad_norm": 0.2472449243068695, |
| "learning_rate": 3.3358658948383985e-05, |
| "loss": 0.028668901920318603, |
| "step": 13800 |
| }, |
| { |
| "epoch": 0.6705419812344726, |
| "grad_norm": 0.23963682353496552, |
| "learning_rate": 3.323806078147612e-05, |
| "loss": 0.026947088241577148, |
| "step": 13900 |
| }, |
| { |
| "epoch": 0.6753660242649364, |
| "grad_norm": 0.5909916758537292, |
| "learning_rate": 3.3117462614568264e-05, |
| "loss": 0.032423651218414305, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.6801900672954003, |
| "grad_norm": 0.9357315301895142, |
| "learning_rate": 3.29968644476604e-05, |
| "loss": 0.029326210021972655, |
| "step": 14100 |
| }, |
| { |
| "epoch": 0.6850141103258641, |
| "grad_norm": 0.6866487264633179, |
| "learning_rate": 3.2876266280752536e-05, |
| "loss": 0.02546304702758789, |
| "step": 14200 |
| }, |
| { |
| "epoch": 0.6898381533563279, |
| "grad_norm": 0.15798236429691315, |
| "learning_rate": 3.275566811384467e-05, |
| "loss": 0.022423455715179442, |
| "step": 14300 |
| }, |
| { |
| "epoch": 0.6946621963867917, |
| "grad_norm": 0.4801422357559204, |
| "learning_rate": 3.263506994693681e-05, |
| "loss": 0.02844859838485718, |
| "step": 14400 |
| }, |
| { |
| "epoch": 0.6994862394172556, |
| "grad_norm": 2.1221346855163574, |
| "learning_rate": 3.2514471780028944e-05, |
| "loss": 0.028370687961578368, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.7043102824477194, |
| "grad_norm": 0.604657769203186, |
| "learning_rate": 3.239387361312109e-05, |
| "loss": 0.031170213222503663, |
| "step": 14600 |
| }, |
| { |
| "epoch": 0.7091343254781832, |
| "grad_norm": 0.7991030812263489, |
| "learning_rate": 3.2273275446213216e-05, |
| "loss": 0.02627355098724365, |
| "step": 14700 |
| }, |
| { |
| "epoch": 0.7139583685086471, |
| "grad_norm": 3.5704472064971924, |
| "learning_rate": 3.215267727930535e-05, |
| "loss": 0.025982840061187742, |
| "step": 14800 |
| }, |
| { |
| "epoch": 0.7187824115391109, |
| "grad_norm": 0.49702438712120056, |
| "learning_rate": 3.2032079112397495e-05, |
| "loss": 0.029045536518096923, |
| "step": 14900 |
| }, |
| { |
| "epoch": 0.7236064545695747, |
| "grad_norm": 0.6950443387031555, |
| "learning_rate": 3.191148094548963e-05, |
| "loss": 0.027498562335968018, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.7236064545695747, |
| "eval_accuracy": 0.9904670236665706, |
| "eval_f1": 0.8604773530897457, |
| "eval_loss": 0.04225612059235573, |
| "eval_precision": 0.837114541955035, |
| "eval_recall": 0.8851816567791105, |
| "eval_runtime": 51.3305, |
| "eval_samples_per_second": 292.224, |
| "eval_steps_per_second": 8.124, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.7284304976000386, |
| "grad_norm": 0.28991585969924927, |
| "learning_rate": 3.179088277858177e-05, |
| "loss": 0.026163406372070312, |
| "step": 15100 |
| }, |
| { |
| "epoch": 0.7332545406305024, |
| "grad_norm": 0.08772952854633331, |
| "learning_rate": 3.16702846116739e-05, |
| "loss": 0.02845370292663574, |
| "step": 15200 |
| }, |
| { |
| "epoch": 0.7380785836609662, |
| "grad_norm": 1.0799998044967651, |
| "learning_rate": 3.154968644476604e-05, |
| "loss": 0.02897960424423218, |
| "step": 15300 |
| }, |
| { |
| "epoch": 0.74290262669143, |
| "grad_norm": 0.2629171311855316, |
| "learning_rate": 3.1429088277858175e-05, |
| "loss": 0.025154874324798585, |
| "step": 15400 |
| }, |
| { |
| "epoch": 0.7477266697218939, |
| "grad_norm": 0.9425322413444519, |
| "learning_rate": 3.130849011095032e-05, |
| "loss": 0.02195771932601929, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.7525507127523577, |
| "grad_norm": 0.2703983187675476, |
| "learning_rate": 3.1187891944042454e-05, |
| "loss": 0.02711749792098999, |
| "step": 15600 |
| }, |
| { |
| "epoch": 0.7573747557828215, |
| "grad_norm": 0.2081318199634552, |
| "learning_rate": 3.106729377713459e-05, |
| "loss": 0.0264898681640625, |
| "step": 15700 |
| }, |
| { |
| "epoch": 0.7621987988132854, |
| "grad_norm": 0.41779956221580505, |
| "learning_rate": 3.0946695610226726e-05, |
| "loss": 0.027609102725982666, |
| "step": 15800 |
| }, |
| { |
| "epoch": 0.7670228418437492, |
| "grad_norm": 0.19405648112297058, |
| "learning_rate": 3.082609744331886e-05, |
| "loss": 0.029054667949676514, |
| "step": 15900 |
| }, |
| { |
| "epoch": 0.771846884874213, |
| "grad_norm": 0.3789653182029724, |
| "learning_rate": 3.0705499276411e-05, |
| "loss": 0.02560849666595459, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.776670927904677, |
| "grad_norm": 0.18012675642967224, |
| "learning_rate": 3.058490110950314e-05, |
| "loss": 0.025810339450836182, |
| "step": 16100 |
| }, |
| { |
| "epoch": 0.7814949709351408, |
| "grad_norm": 0.21590501070022583, |
| "learning_rate": 3.0464302942595273e-05, |
| "loss": 0.026955347061157226, |
| "step": 16200 |
| }, |
| { |
| "epoch": 0.7863190139656046, |
| "grad_norm": 1.0594650506973267, |
| "learning_rate": 3.034370477568741e-05, |
| "loss": 0.02850575923919678, |
| "step": 16300 |
| }, |
| { |
| "epoch": 0.7911430569960684, |
| "grad_norm": 0.512518048286438, |
| "learning_rate": 3.022310660877955e-05, |
| "loss": 0.02473912000656128, |
| "step": 16400 |
| }, |
| { |
| "epoch": 0.7959671000265323, |
| "grad_norm": 0.4950084388256073, |
| "learning_rate": 3.0102508441871685e-05, |
| "loss": 0.029167954921722413, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.8007911430569961, |
| "grad_norm": 0.2222454696893692, |
| "learning_rate": 2.998191027496382e-05, |
| "loss": 0.02336118459701538, |
| "step": 16600 |
| }, |
| { |
| "epoch": 0.8056151860874599, |
| "grad_norm": 0.30645573139190674, |
| "learning_rate": 2.986131210805596e-05, |
| "loss": 0.026411423683166502, |
| "step": 16700 |
| }, |
| { |
| "epoch": 0.8104392291179238, |
| "grad_norm": 0.13581427931785583, |
| "learning_rate": 2.9740713941148096e-05, |
| "loss": 0.027823078632354736, |
| "step": 16800 |
| }, |
| { |
| "epoch": 0.8152632721483876, |
| "grad_norm": 0.37023600935935974, |
| "learning_rate": 2.9620115774240232e-05, |
| "loss": 0.025036261081695557, |
| "step": 16900 |
| }, |
| { |
| "epoch": 0.8200873151788514, |
| "grad_norm": 0.18537591397762299, |
| "learning_rate": 2.9499517607332372e-05, |
| "loss": 0.025412650108337403, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.8249113582093153, |
| "grad_norm": 0.3948329985141754, |
| "learning_rate": 2.9378919440424508e-05, |
| "loss": 0.03059415817260742, |
| "step": 17100 |
| }, |
| { |
| "epoch": 0.8297354012397791, |
| "grad_norm": 1.1231082677841187, |
| "learning_rate": 2.9258321273516644e-05, |
| "loss": 0.027097015380859374, |
| "step": 17200 |
| }, |
| { |
| "epoch": 0.8345594442702429, |
| "grad_norm": 0.1559356451034546, |
| "learning_rate": 2.9137723106608783e-05, |
| "loss": 0.025851171016693115, |
| "step": 17300 |
| }, |
| { |
| "epoch": 0.8393834873007068, |
| "grad_norm": 0.4749973714351654, |
| "learning_rate": 2.901712493970092e-05, |
| "loss": 0.02509115219116211, |
| "step": 17400 |
| }, |
| { |
| "epoch": 0.8442075303311706, |
| "grad_norm": 0.24519965052604675, |
| "learning_rate": 2.8896526772793052e-05, |
| "loss": 0.020944011211395264, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.8442075303311706, |
| "eval_accuracy": 0.9910574454953576, |
| "eval_f1": 0.8787765520040154, |
| "eval_loss": 0.042865537106990814, |
| "eval_precision": 0.8670628648500558, |
| "eval_recall": 0.890811068289092, |
| "eval_runtime": 51.479, |
| "eval_samples_per_second": 291.381, |
| "eval_steps_per_second": 8.1, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.8490315733616344, |
| "grad_norm": 0.36921805143356323, |
| "learning_rate": 2.8775928605885195e-05, |
| "loss": 0.029057729244232177, |
| "step": 17600 |
| }, |
| { |
| "epoch": 0.8538556163920982, |
| "grad_norm": 0.19858281314373016, |
| "learning_rate": 2.865533043897733e-05, |
| "loss": 0.029160046577453615, |
| "step": 17700 |
| }, |
| { |
| "epoch": 0.8586796594225621, |
| "grad_norm": 0.2647104561328888, |
| "learning_rate": 2.8534732272069463e-05, |
| "loss": 0.024375016689300536, |
| "step": 17800 |
| }, |
| { |
| "epoch": 0.8635037024530259, |
| "grad_norm": 0.24766811728477478, |
| "learning_rate": 2.8414134105161606e-05, |
| "loss": 0.028258707523345947, |
| "step": 17900 |
| }, |
| { |
| "epoch": 0.8683277454834897, |
| "grad_norm": 0.14881408214569092, |
| "learning_rate": 2.829353593825374e-05, |
| "loss": 0.02677877902984619, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.8731517885139536, |
| "grad_norm": 0.36174142360687256, |
| "learning_rate": 2.8172937771345875e-05, |
| "loss": 0.02804037570953369, |
| "step": 18100 |
| }, |
| { |
| "epoch": 0.8779758315444174, |
| "grad_norm": 0.8773052096366882, |
| "learning_rate": 2.8052339604438018e-05, |
| "loss": 0.022985424995422363, |
| "step": 18200 |
| }, |
| { |
| "epoch": 0.8827998745748812, |
| "grad_norm": 0.14455021917819977, |
| "learning_rate": 2.793174143753015e-05, |
| "loss": 0.02391258955001831, |
| "step": 18300 |
| }, |
| { |
| "epoch": 0.8876239176053451, |
| "grad_norm": 0.19167733192443848, |
| "learning_rate": 2.7811143270622286e-05, |
| "loss": 0.02640686750411987, |
| "step": 18400 |
| }, |
| { |
| "epoch": 0.8924479606358089, |
| "grad_norm": 0.3121378719806671, |
| "learning_rate": 2.7690545103714426e-05, |
| "loss": 0.02407193899154663, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.8972720036662727, |
| "grad_norm": 0.4771701991558075, |
| "learning_rate": 2.7569946936806562e-05, |
| "loss": 0.02530348062515259, |
| "step": 18600 |
| }, |
| { |
| "epoch": 0.9020960466967365, |
| "grad_norm": 0.31514617800712585, |
| "learning_rate": 2.7449348769898698e-05, |
| "loss": 0.025359327793121337, |
| "step": 18700 |
| }, |
| { |
| "epoch": 0.9069200897272004, |
| "grad_norm": 0.2182740867137909, |
| "learning_rate": 2.7328750602990837e-05, |
| "loss": 0.025950465202331543, |
| "step": 18800 |
| }, |
| { |
| "epoch": 0.9117441327576642, |
| "grad_norm": 0.3909512758255005, |
| "learning_rate": 2.7208152436082973e-05, |
| "loss": 0.024720582962036133, |
| "step": 18900 |
| }, |
| { |
| "epoch": 0.916568175788128, |
| "grad_norm": 0.1596415936946869, |
| "learning_rate": 2.708755426917511e-05, |
| "loss": 0.025378565788269043, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.9213922188185919, |
| "grad_norm": 0.1132221445441246, |
| "learning_rate": 2.696695610226725e-05, |
| "loss": 0.02627143621444702, |
| "step": 19100 |
| }, |
| { |
| "epoch": 0.9262162618490557, |
| "grad_norm": 0.38963910937309265, |
| "learning_rate": 2.6846357935359385e-05, |
| "loss": 0.026437394618988037, |
| "step": 19200 |
| }, |
| { |
| "epoch": 0.9310403048795195, |
| "grad_norm": 1.0219396352767944, |
| "learning_rate": 2.672575976845152e-05, |
| "loss": 0.02680544376373291, |
| "step": 19300 |
| }, |
| { |
| "epoch": 0.9358643479099834, |
| "grad_norm": 0.1513880342245102, |
| "learning_rate": 2.660516160154366e-05, |
| "loss": 0.02793146848678589, |
| "step": 19400 |
| }, |
| { |
| "epoch": 0.9406883909404472, |
| "grad_norm": 8.48257064819336, |
| "learning_rate": 2.6484563434635796e-05, |
| "loss": 0.026851544380187987, |
| "step": 19500 |
| }, |
| { |
| "epoch": 0.945512433970911, |
| "grad_norm": 0.12812338769435883, |
| "learning_rate": 2.636396526772793e-05, |
| "loss": 0.026399703025817872, |
| "step": 19600 |
| }, |
| { |
| "epoch": 0.9503364770013748, |
| "grad_norm": 1.4106616973876953, |
| "learning_rate": 2.6243367100820072e-05, |
| "loss": 0.026023907661437987, |
| "step": 19700 |
| }, |
| { |
| "epoch": 0.9551605200318387, |
| "grad_norm": 0.12191484868526459, |
| "learning_rate": 2.6122768933912208e-05, |
| "loss": 0.025158686637878416, |
| "step": 19800 |
| }, |
| { |
| "epoch": 0.9599845630623025, |
| "grad_norm": 0.3431759774684906, |
| "learning_rate": 2.600217076700434e-05, |
| "loss": 0.023687126636505126, |
| "step": 19900 |
| }, |
| { |
| "epoch": 0.9648086060927663, |
| "grad_norm": 0.2409236878156662, |
| "learning_rate": 2.5881572600096483e-05, |
| "loss": 0.02651404857635498, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.9648086060927663, |
| "eval_accuracy": 0.991921840043258, |
| "eval_f1": 0.8712219366623479, |
| "eval_loss": 0.037873830646276474, |
| "eval_precision": 0.8549691512422878, |
| "eval_recall": 0.8881046204477547, |
| "eval_runtime": 51.9311, |
| "eval_samples_per_second": 288.844, |
| "eval_steps_per_second": 8.03, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.9696326491232302, |
| "grad_norm": 0.2915472686290741, |
| "learning_rate": 2.5760974433188616e-05, |
| "loss": 0.02347031593322754, |
| "step": 20100 |
| }, |
| { |
| "epoch": 0.974456692153694, |
| "grad_norm": 0.4358366131782532, |
| "learning_rate": 2.5640376266280752e-05, |
| "loss": 0.025518434047698976, |
| "step": 20200 |
| }, |
| { |
| "epoch": 0.9792807351841578, |
| "grad_norm": 0.33405473828315735, |
| "learning_rate": 2.551977809937289e-05, |
| "loss": 0.027425525188446046, |
| "step": 20300 |
| }, |
| { |
| "epoch": 0.9841047782146217, |
| "grad_norm": 0.33355358242988586, |
| "learning_rate": 2.5399179932465027e-05, |
| "loss": 0.025573320388793945, |
| "step": 20400 |
| }, |
| { |
| "epoch": 0.9889288212450855, |
| "grad_norm": 0.7017316818237305, |
| "learning_rate": 2.5278581765557163e-05, |
| "loss": 0.027230489253997802, |
| "step": 20500 |
| }, |
| { |
| "epoch": 0.9937528642755493, |
| "grad_norm": 0.26649072766304016, |
| "learning_rate": 2.5157983598649303e-05, |
| "loss": 0.024173839092254637, |
| "step": 20600 |
| }, |
| { |
| "epoch": 0.9985769073060131, |
| "grad_norm": 1.540326714515686, |
| "learning_rate": 2.503738543174144e-05, |
| "loss": 0.02483781099319458, |
| "step": 20700 |
| }, |
| { |
| "epoch": 1.0033768301213246, |
| "grad_norm": 0.19430163502693176, |
| "learning_rate": 2.4916787264833578e-05, |
| "loss": 0.02256415843963623, |
| "step": 20800 |
| }, |
| { |
| "epoch": 1.0082008731517884, |
| "grad_norm": 0.12203595787286758, |
| "learning_rate": 2.479618909792571e-05, |
| "loss": 0.019598615169525147, |
| "step": 20900 |
| }, |
| { |
| "epoch": 1.0130249161822524, |
| "grad_norm": 0.26934438943862915, |
| "learning_rate": 2.467559093101785e-05, |
| "loss": 0.021361682415008545, |
| "step": 21000 |
| }, |
| { |
| "epoch": 1.0178489592127162, |
| "grad_norm": 0.13813284039497375, |
| "learning_rate": 2.455499276410999e-05, |
| "loss": 0.021079394817352295, |
| "step": 21100 |
| }, |
| { |
| "epoch": 1.02267300224318, |
| "grad_norm": 0.1430957019329071, |
| "learning_rate": 2.4434394597202122e-05, |
| "loss": 0.020538933277130127, |
| "step": 21200 |
| }, |
| { |
| "epoch": 1.0274970452736438, |
| "grad_norm": 0.7105738520622253, |
| "learning_rate": 2.4313796430294262e-05, |
| "loss": 0.021446900367736818, |
| "step": 21300 |
| }, |
| { |
| "epoch": 1.0323210883041076, |
| "grad_norm": 0.3810221552848816, |
| "learning_rate": 2.4193198263386398e-05, |
| "loss": 0.020385611057281493, |
| "step": 21400 |
| }, |
| { |
| "epoch": 1.0371451313345714, |
| "grad_norm": 0.14890126883983612, |
| "learning_rate": 2.4072600096478534e-05, |
| "loss": 0.021193060874938965, |
| "step": 21500 |
| }, |
| { |
| "epoch": 1.0419691743650352, |
| "grad_norm": 0.465364009141922, |
| "learning_rate": 2.3952001929570673e-05, |
| "loss": 0.02409552574157715, |
| "step": 21600 |
| }, |
| { |
| "epoch": 1.0467932173954992, |
| "grad_norm": 0.18177862465381622, |
| "learning_rate": 2.383140376266281e-05, |
| "loss": 0.024446609020233153, |
| "step": 21700 |
| }, |
| { |
| "epoch": 1.051617260425963, |
| "grad_norm": 0.38837435841560364, |
| "learning_rate": 2.3710805595754945e-05, |
| "loss": 0.02129380464553833, |
| "step": 21800 |
| }, |
| { |
| "epoch": 1.0564413034564268, |
| "grad_norm": 0.13987022638320923, |
| "learning_rate": 2.3590207428847085e-05, |
| "loss": 0.022437899112701415, |
| "step": 21900 |
| }, |
| { |
| "epoch": 1.0612653464868906, |
| "grad_norm": 0.21862603724002838, |
| "learning_rate": 2.3469609261939217e-05, |
| "loss": 0.02214601993560791, |
| "step": 22000 |
| }, |
| { |
| "epoch": 1.0660893895173544, |
| "grad_norm": 0.18493451178073883, |
| "learning_rate": 2.3349011095031357e-05, |
| "loss": 0.022123863697052003, |
| "step": 22100 |
| }, |
| { |
| "epoch": 1.0709134325478182, |
| "grad_norm": 0.44324392080307007, |
| "learning_rate": 2.3228412928123493e-05, |
| "loss": 0.01999701380729675, |
| "step": 22200 |
| }, |
| { |
| "epoch": 1.0757374755782823, |
| "grad_norm": 0.13552401959896088, |
| "learning_rate": 2.310781476121563e-05, |
| "loss": 0.018710813522338866, |
| "step": 22300 |
| }, |
| { |
| "epoch": 1.080561518608746, |
| "grad_norm": 0.5217646360397339, |
| "learning_rate": 2.298721659430777e-05, |
| "loss": 0.022998554706573485, |
| "step": 22400 |
| }, |
| { |
| "epoch": 1.0853855616392099, |
| "grad_norm": 0.13017535209655762, |
| "learning_rate": 2.2866618427399904e-05, |
| "loss": 0.022265849113464357, |
| "step": 22500 |
| }, |
| { |
| "epoch": 1.0853855616392099, |
| "eval_accuracy": 0.9918296662757423, |
| "eval_f1": 0.881381541146177, |
| "eval_loss": 0.03710692375898361, |
| "eval_precision": 0.8665369486986358, |
| "eval_recall": 0.8967436019573031, |
| "eval_runtime": 51.6329, |
| "eval_samples_per_second": 290.512, |
| "eval_steps_per_second": 8.076, |
| "step": 22500 |
| }, |
| { |
| "epoch": 1.0902096046696736, |
| "grad_norm": 0.26805901527404785, |
| "learning_rate": 2.274602026049204e-05, |
| "loss": 0.02201695680618286, |
| "step": 22600 |
| }, |
| { |
| "epoch": 1.0950336477001374, |
| "grad_norm": 0.20556294918060303, |
| "learning_rate": 2.262542209358418e-05, |
| "loss": 0.018640589714050294, |
| "step": 22700 |
| }, |
| { |
| "epoch": 1.0998576907306012, |
| "grad_norm": 0.16025076806545258, |
| "learning_rate": 2.2504823926676316e-05, |
| "loss": 0.02018498182296753, |
| "step": 22800 |
| }, |
| { |
| "epoch": 1.104681733761065, |
| "grad_norm": 0.22326083481311798, |
| "learning_rate": 2.2384225759768452e-05, |
| "loss": 0.020831646919250487, |
| "step": 22900 |
| }, |
| { |
| "epoch": 1.109505776791529, |
| "grad_norm": 0.18669798970222473, |
| "learning_rate": 2.2263627592860588e-05, |
| "loss": 0.020945420265197755, |
| "step": 23000 |
| }, |
| { |
| "epoch": 1.1143298198219929, |
| "grad_norm": 0.22091829776763916, |
| "learning_rate": 2.2143029425952727e-05, |
| "loss": 0.019859465360641478, |
| "step": 23100 |
| }, |
| { |
| "epoch": 1.1191538628524567, |
| "grad_norm": 0.28057217597961426, |
| "learning_rate": 2.2022431259044863e-05, |
| "loss": 0.022815022468566894, |
| "step": 23200 |
| }, |
| { |
| "epoch": 1.1239779058829205, |
| "grad_norm": 0.2595389187335968, |
| "learning_rate": 2.1901833092137e-05, |
| "loss": 0.021522111892700195, |
| "step": 23300 |
| }, |
| { |
| "epoch": 1.1288019489133843, |
| "grad_norm": 0.5332016348838806, |
| "learning_rate": 2.178123492522914e-05, |
| "loss": 0.019616042375564576, |
| "step": 23400 |
| }, |
| { |
| "epoch": 1.133625991943848, |
| "grad_norm": 0.10604680329561234, |
| "learning_rate": 2.1660636758321275e-05, |
| "loss": 0.022921762466430663, |
| "step": 23500 |
| }, |
| { |
| "epoch": 1.138450034974312, |
| "grad_norm": 0.3797323703765869, |
| "learning_rate": 2.154003859141341e-05, |
| "loss": 0.021188838481903075, |
| "step": 23600 |
| }, |
| { |
| "epoch": 1.1432740780047759, |
| "grad_norm": 0.3557288348674774, |
| "learning_rate": 2.141944042450555e-05, |
| "loss": 0.020320808887481688, |
| "step": 23700 |
| }, |
| { |
| "epoch": 1.1480981210352397, |
| "grad_norm": 0.5364207029342651, |
| "learning_rate": 2.1298842257597683e-05, |
| "loss": 0.02103010892868042, |
| "step": 23800 |
| }, |
| { |
| "epoch": 1.1529221640657035, |
| "grad_norm": 0.20504723489284515, |
| "learning_rate": 2.1178244090689822e-05, |
| "loss": 0.024899210929870606, |
| "step": 23900 |
| }, |
| { |
| "epoch": 1.1577462070961673, |
| "grad_norm": 0.3030504882335663, |
| "learning_rate": 2.1057645923781962e-05, |
| "loss": 0.018901402950286864, |
| "step": 24000 |
| }, |
| { |
| "epoch": 1.162570250126631, |
| "grad_norm": 0.2158869206905365, |
| "learning_rate": 2.0937047756874094e-05, |
| "loss": 0.018166555166244505, |
| "step": 24100 |
| }, |
| { |
| "epoch": 1.1673942931570949, |
| "grad_norm": 0.2794812321662903, |
| "learning_rate": 2.0816449589966234e-05, |
| "loss": 0.0199416983127594, |
| "step": 24200 |
| }, |
| { |
| "epoch": 1.1722183361875589, |
| "grad_norm": 0.10596510767936707, |
| "learning_rate": 2.069585142305837e-05, |
| "loss": 0.019620640277862547, |
| "step": 24300 |
| }, |
| { |
| "epoch": 1.1770423792180227, |
| "grad_norm": 1.3163063526153564, |
| "learning_rate": 2.0575253256150506e-05, |
| "loss": 0.021227221488952636, |
| "step": 24400 |
| }, |
| { |
| "epoch": 1.1818664222484865, |
| "grad_norm": 0.29747480154037476, |
| "learning_rate": 2.0454655089242645e-05, |
| "loss": 0.02037898302078247, |
| "step": 24500 |
| }, |
| { |
| "epoch": 1.1866904652789503, |
| "grad_norm": 0.722373902797699, |
| "learning_rate": 2.033405692233478e-05, |
| "loss": 0.020667204856872557, |
| "step": 24600 |
| }, |
| { |
| "epoch": 1.191514508309414, |
| "grad_norm": 0.12926365435123444, |
| "learning_rate": 2.0213458755426917e-05, |
| "loss": 0.018228678703308104, |
| "step": 24700 |
| }, |
| { |
| "epoch": 1.1963385513398779, |
| "grad_norm": 0.33814650774002075, |
| "learning_rate": 2.0092860588519057e-05, |
| "loss": 0.022069990634918213, |
| "step": 24800 |
| }, |
| { |
| "epoch": 1.201162594370342, |
| "grad_norm": 0.1276799589395523, |
| "learning_rate": 1.9972262421611193e-05, |
| "loss": 0.022927966117858887, |
| "step": 24900 |
| }, |
| { |
| "epoch": 1.2059866374008057, |
| "grad_norm": 0.18511514365673065, |
| "learning_rate": 1.985166425470333e-05, |
| "loss": 0.02195762872695923, |
| "step": 25000 |
| }, |
| { |
| "epoch": 1.2059866374008057, |
| "eval_accuracy": 0.9925814983886582, |
| "eval_f1": 0.8818330116962181, |
| "eval_loss": 0.034407418221235275, |
| "eval_precision": 0.8686902636277702, |
| "eval_recall": 0.8953795522452691, |
| "eval_runtime": 51.6069, |
| "eval_samples_per_second": 290.659, |
| "eval_steps_per_second": 8.08, |
| "step": 25000 |
| }, |
| { |
| "epoch": 1.2108106804312695, |
| "grad_norm": 0.28672105073928833, |
| "learning_rate": 1.9731066087795465e-05, |
| "loss": 0.022950747013092042, |
| "step": 25100 |
| }, |
| { |
| "epoch": 1.2156347234617333, |
| "grad_norm": 0.15472128987312317, |
| "learning_rate": 1.9610467920887604e-05, |
| "loss": 0.01865388870239258, |
| "step": 25200 |
| }, |
| { |
| "epoch": 1.220458766492197, |
| "grad_norm": 0.26068541407585144, |
| "learning_rate": 1.948986975397974e-05, |
| "loss": 0.021750383377075196, |
| "step": 25300 |
| }, |
| { |
| "epoch": 1.2252828095226609, |
| "grad_norm": 1.417925238609314, |
| "learning_rate": 1.9369271587071876e-05, |
| "loss": 0.021318423748016357, |
| "step": 25400 |
| }, |
| { |
| "epoch": 1.2301068525531247, |
| "grad_norm": 0.7924548387527466, |
| "learning_rate": 1.9248673420164016e-05, |
| "loss": 0.01893375873565674, |
| "step": 25500 |
| }, |
| { |
| "epoch": 1.2349308955835885, |
| "grad_norm": 0.17900590598583221, |
| "learning_rate": 1.9128075253256152e-05, |
| "loss": 0.01870368480682373, |
| "step": 25600 |
| }, |
| { |
| "epoch": 1.2397549386140525, |
| "grad_norm": 0.1943436861038208, |
| "learning_rate": 1.9007477086348288e-05, |
| "loss": 0.021407904624938964, |
| "step": 25700 |
| }, |
| { |
| "epoch": 1.2445789816445163, |
| "grad_norm": 0.1924910992383957, |
| "learning_rate": 1.8886878919440427e-05, |
| "loss": 0.02078892707824707, |
| "step": 25800 |
| }, |
| { |
| "epoch": 1.24940302467498, |
| "grad_norm": 0.1958584040403366, |
| "learning_rate": 1.876628075253256e-05, |
| "loss": 0.018969409465789795, |
| "step": 25900 |
| }, |
| { |
| "epoch": 1.254227067705444, |
| "grad_norm": 0.0961497351527214, |
| "learning_rate": 1.86456825856247e-05, |
| "loss": 0.024467270374298095, |
| "step": 26000 |
| }, |
| { |
| "epoch": 1.2590511107359077, |
| "grad_norm": 0.43624669313430786, |
| "learning_rate": 1.8525084418716835e-05, |
| "loss": 0.022932977676391603, |
| "step": 26100 |
| }, |
| { |
| "epoch": 1.2638751537663717, |
| "grad_norm": 0.17412593960762024, |
| "learning_rate": 1.840448625180897e-05, |
| "loss": 0.017692303657531737, |
| "step": 26200 |
| }, |
| { |
| "epoch": 1.2686991967968355, |
| "grad_norm": 0.4037439227104187, |
| "learning_rate": 1.828388808490111e-05, |
| "loss": 0.02168938159942627, |
| "step": 26300 |
| }, |
| { |
| "epoch": 1.2735232398272993, |
| "grad_norm": 0.20430967211723328, |
| "learning_rate": 1.8163289917993247e-05, |
| "loss": 0.018443295955657957, |
| "step": 26400 |
| }, |
| { |
| "epoch": 1.2783472828577631, |
| "grad_norm": 0.2996050715446472, |
| "learning_rate": 1.8042691751085383e-05, |
| "loss": 0.019166781902313232, |
| "step": 26500 |
| }, |
| { |
| "epoch": 1.283171325888227, |
| "grad_norm": 0.3298969864845276, |
| "learning_rate": 1.7922093584177522e-05, |
| "loss": 0.01918817639350891, |
| "step": 26600 |
| }, |
| { |
| "epoch": 1.2879953689186907, |
| "grad_norm": 0.28155457973480225, |
| "learning_rate": 1.780149541726966e-05, |
| "loss": 0.021806249618530272, |
| "step": 26700 |
| }, |
| { |
| "epoch": 1.2928194119491545, |
| "grad_norm": 0.2508911192417145, |
| "learning_rate": 1.7680897250361794e-05, |
| "loss": 0.020128331184387206, |
| "step": 26800 |
| }, |
| { |
| "epoch": 1.2976434549796183, |
| "grad_norm": 0.2319284975528717, |
| "learning_rate": 1.7560299083453934e-05, |
| "loss": 0.018995124101638793, |
| "step": 26900 |
| }, |
| { |
| "epoch": 1.3024674980100823, |
| "grad_norm": 0.12885890901088715, |
| "learning_rate": 1.743970091654607e-05, |
| "loss": 0.019624507427215575, |
| "step": 27000 |
| }, |
| { |
| "epoch": 1.3072915410405461, |
| "grad_norm": 0.1364358514547348, |
| "learning_rate": 1.7319102749638206e-05, |
| "loss": 0.01931032657623291, |
| "step": 27100 |
| }, |
| { |
| "epoch": 1.31211558407101, |
| "grad_norm": 1.741729974746704, |
| "learning_rate": 1.7198504582730342e-05, |
| "loss": 0.020110676288604735, |
| "step": 27200 |
| }, |
| { |
| "epoch": 1.3169396271014737, |
| "grad_norm": 0.5716229677200317, |
| "learning_rate": 1.707790641582248e-05, |
| "loss": 0.01891273021697998, |
| "step": 27300 |
| }, |
| { |
| "epoch": 1.3217636701319375, |
| "grad_norm": 0.9453685879707336, |
| "learning_rate": 1.6957308248914617e-05, |
| "loss": 0.020238091945648195, |
| "step": 27400 |
| }, |
| { |
| "epoch": 1.3265877131624015, |
| "grad_norm": 0.14117585122585297, |
| "learning_rate": 1.6836710082006753e-05, |
| "loss": 0.022481341361999512, |
| "step": 27500 |
| }, |
| { |
| "epoch": 1.3265877131624015, |
| "eval_accuracy": 0.992788783174592, |
| "eval_f1": 0.889221237047324, |
| "eval_loss": 0.03323497995734215, |
| "eval_precision": 0.877614709851552, |
| "eval_recall": 0.9011388732516347, |
| "eval_runtime": 52.1434, |
| "eval_samples_per_second": 287.668, |
| "eval_steps_per_second": 7.997, |
| "step": 27500 |
| }, |
| { |
| "epoch": 1.3314117561928653, |
| "grad_norm": 0.25555455684661865, |
| "learning_rate": 1.6716111915098893e-05, |
| "loss": 0.018107813596725465, |
| "step": 27600 |
| }, |
| { |
| "epoch": 1.3362357992233291, |
| "grad_norm": 0.20916156470775604, |
| "learning_rate": 1.659551374819103e-05, |
| "loss": 0.019892256259918212, |
| "step": 27700 |
| }, |
| { |
| "epoch": 1.341059842253793, |
| "grad_norm": 0.15623128414154053, |
| "learning_rate": 1.6474915581283165e-05, |
| "loss": 0.017413014173507692, |
| "step": 27800 |
| }, |
| { |
| "epoch": 1.3458838852842567, |
| "grad_norm": 0.15014760196208954, |
| "learning_rate": 1.6354317414375304e-05, |
| "loss": 0.020558416843414307, |
| "step": 27900 |
| }, |
| { |
| "epoch": 1.3507079283147205, |
| "grad_norm": 0.4308200180530548, |
| "learning_rate": 1.6233719247467437e-05, |
| "loss": 0.017611211538314818, |
| "step": 28000 |
| }, |
| { |
| "epoch": 1.3555319713451843, |
| "grad_norm": 0.15497736632823944, |
| "learning_rate": 1.6113121080559576e-05, |
| "loss": 0.017815752029418944, |
| "step": 28100 |
| }, |
| { |
| "epoch": 1.3603560143756481, |
| "grad_norm": 0.4078068733215332, |
| "learning_rate": 1.5992522913651712e-05, |
| "loss": 0.01794821858406067, |
| "step": 28200 |
| }, |
| { |
| "epoch": 1.365180057406112, |
| "grad_norm": 0.44584575295448303, |
| "learning_rate": 1.587192474674385e-05, |
| "loss": 0.019282504320144653, |
| "step": 28300 |
| }, |
| { |
| "epoch": 1.370004100436576, |
| "grad_norm": 0.550137460231781, |
| "learning_rate": 1.5751326579835988e-05, |
| "loss": 0.020532405376434325, |
| "step": 28400 |
| }, |
| { |
| "epoch": 1.3748281434670397, |
| "grad_norm": 0.15548627078533173, |
| "learning_rate": 1.5630728412928124e-05, |
| "loss": 0.02003218173980713, |
| "step": 28500 |
| }, |
| { |
| "epoch": 1.3796521864975035, |
| "grad_norm": 0.15787184238433838, |
| "learning_rate": 1.551013024602026e-05, |
| "loss": 0.017421540021896362, |
| "step": 28600 |
| }, |
| { |
| "epoch": 1.3844762295279673, |
| "grad_norm": 0.1659448891878128, |
| "learning_rate": 1.53895320791124e-05, |
| "loss": 0.019184736013412477, |
| "step": 28700 |
| }, |
| { |
| "epoch": 1.3893002725584314, |
| "grad_norm": 0.45317932963371277, |
| "learning_rate": 1.5268933912204535e-05, |
| "loss": 0.018715277910232545, |
| "step": 28800 |
| }, |
| { |
| "epoch": 1.3941243155888952, |
| "grad_norm": 0.16978032886981964, |
| "learning_rate": 1.5148335745296671e-05, |
| "loss": 0.019075859785079956, |
| "step": 28900 |
| }, |
| { |
| "epoch": 1.398948358619359, |
| "grad_norm": 0.31665724515914917, |
| "learning_rate": 1.502773757838881e-05, |
| "loss": 0.018271996974945068, |
| "step": 29000 |
| }, |
| { |
| "epoch": 1.4037724016498228, |
| "grad_norm": 0.3004429042339325, |
| "learning_rate": 1.4907139411480947e-05, |
| "loss": 0.019862807989120483, |
| "step": 29100 |
| }, |
| { |
| "epoch": 1.4085964446802866, |
| "grad_norm": 0.20420145988464355, |
| "learning_rate": 1.4786541244573083e-05, |
| "loss": 0.018257253170013428, |
| "step": 29200 |
| }, |
| { |
| "epoch": 1.4134204877107504, |
| "grad_norm": 0.122472383081913, |
| "learning_rate": 1.466594307766522e-05, |
| "loss": 0.025323121547698973, |
| "step": 29300 |
| }, |
| { |
| "epoch": 1.4182445307412141, |
| "grad_norm": 0.9836609363555908, |
| "learning_rate": 1.4545344910757358e-05, |
| "loss": 0.019051806926727297, |
| "step": 29400 |
| }, |
| { |
| "epoch": 1.423068573771678, |
| "grad_norm": 0.16322240233421326, |
| "learning_rate": 1.4424746743849493e-05, |
| "loss": 0.019138084650039675, |
| "step": 29500 |
| }, |
| { |
| "epoch": 1.4278926168021417, |
| "grad_norm": 0.2133868932723999, |
| "learning_rate": 1.430414857694163e-05, |
| "loss": 0.017339247465133666, |
| "step": 29600 |
| }, |
| { |
| "epoch": 1.4327166598326058, |
| "grad_norm": 0.2609802186489105, |
| "learning_rate": 1.418355041003377e-05, |
| "loss": 0.01856675386428833, |
| "step": 29700 |
| }, |
| { |
| "epoch": 1.4375407028630696, |
| "grad_norm": 0.2504105269908905, |
| "learning_rate": 1.4062952243125904e-05, |
| "loss": 0.022142369747161866, |
| "step": 29800 |
| }, |
| { |
| "epoch": 1.4423647458935334, |
| "grad_norm": 0.24993453919887543, |
| "learning_rate": 1.3942354076218042e-05, |
| "loss": 0.01751198887825012, |
| "step": 29900 |
| }, |
| { |
| "epoch": 1.4471887889239972, |
| "grad_norm": 0.126504585146904, |
| "learning_rate": 1.382175590931018e-05, |
| "loss": 0.018583767414093018, |
| "step": 30000 |
| }, |
| { |
| "epoch": 1.4471887889239972, |
| "eval_accuracy": 0.9919800326983255, |
| "eval_f1": 0.8861820618929587, |
| "eval_loss": 0.03896835818886757, |
| "eval_precision": 0.8710864791383457, |
| "eval_recall": 0.9018100723162863, |
| "eval_runtime": 51.5929, |
| "eval_samples_per_second": 290.738, |
| "eval_steps_per_second": 8.083, |
| "step": 30000 |
| }, |
| { |
| "epoch": 1.452012831954461, |
| "grad_norm": 0.7999847531318665, |
| "learning_rate": 1.3701157742402316e-05, |
| "loss": 0.02096844673156738, |
| "step": 30100 |
| }, |
| { |
| "epoch": 1.456836874984925, |
| "grad_norm": 0.11665287613868713, |
| "learning_rate": 1.3580559575494453e-05, |
| "loss": 0.01827834129333496, |
| "step": 30200 |
| }, |
| { |
| "epoch": 1.4616609180153888, |
| "grad_norm": 0.22630015015602112, |
| "learning_rate": 1.3459961408586591e-05, |
| "loss": 0.017943538427352905, |
| "step": 30300 |
| }, |
| { |
| "epoch": 1.4664849610458526, |
| "grad_norm": 0.21670867502689362, |
| "learning_rate": 1.3339363241678725e-05, |
| "loss": 0.020002198219299317, |
| "step": 30400 |
| }, |
| { |
| "epoch": 1.4713090040763164, |
| "grad_norm": 0.25701120495796204, |
| "learning_rate": 1.3218765074770865e-05, |
| "loss": 0.01862887978553772, |
| "step": 30500 |
| }, |
| { |
| "epoch": 1.4761330471067802, |
| "grad_norm": 0.14079546928405762, |
| "learning_rate": 1.3098166907863003e-05, |
| "loss": 0.02005054712295532, |
| "step": 30600 |
| }, |
| { |
| "epoch": 1.480957090137244, |
| "grad_norm": 0.31404340267181396, |
| "learning_rate": 1.2977568740955137e-05, |
| "loss": 0.018181434869766235, |
| "step": 30700 |
| }, |
| { |
| "epoch": 1.4857811331677078, |
| "grad_norm": 0.1643984615802765, |
| "learning_rate": 1.2856970574047275e-05, |
| "loss": 0.01885037899017334, |
| "step": 30800 |
| }, |
| { |
| "epoch": 1.4906051761981716, |
| "grad_norm": 0.1323440670967102, |
| "learning_rate": 1.2736372407139412e-05, |
| "loss": 0.018592065572738646, |
| "step": 30900 |
| }, |
| { |
| "epoch": 1.4954292192286356, |
| "grad_norm": 0.2534601092338562, |
| "learning_rate": 1.2615774240231548e-05, |
| "loss": 0.018988220691680907, |
| "step": 31000 |
| }, |
| { |
| "epoch": 1.5002532622590994, |
| "grad_norm": 0.2373075932264328, |
| "learning_rate": 1.2495176073323686e-05, |
| "loss": 0.019056109189987184, |
| "step": 31100 |
| }, |
| { |
| "epoch": 1.5050773052895632, |
| "grad_norm": 0.151611328125, |
| "learning_rate": 1.2374577906415822e-05, |
| "loss": 0.018509570360183716, |
| "step": 31200 |
| }, |
| { |
| "epoch": 1.509901348320027, |
| "grad_norm": 0.8050407767295837, |
| "learning_rate": 1.225397973950796e-05, |
| "loss": 0.01847294807434082, |
| "step": 31300 |
| }, |
| { |
| "epoch": 1.514725391350491, |
| "grad_norm": 0.4117303788661957, |
| "learning_rate": 1.2133381572600098e-05, |
| "loss": 0.016792016029357912, |
| "step": 31400 |
| }, |
| { |
| "epoch": 1.5195494343809548, |
| "grad_norm": 0.3043079674243927, |
| "learning_rate": 1.2012783405692234e-05, |
| "loss": 0.02204496622085571, |
| "step": 31500 |
| }, |
| { |
| "epoch": 1.5243734774114186, |
| "grad_norm": 0.14158490300178528, |
| "learning_rate": 1.1892185238784371e-05, |
| "loss": 0.020702006816864012, |
| "step": 31600 |
| }, |
| { |
| "epoch": 1.5291975204418824, |
| "grad_norm": 0.4225039482116699, |
| "learning_rate": 1.1771587071876507e-05, |
| "loss": 0.019063092470169067, |
| "step": 31700 |
| }, |
| { |
| "epoch": 1.5340215634723462, |
| "grad_norm": 0.3363790810108185, |
| "learning_rate": 1.1650988904968645e-05, |
| "loss": 0.017193055152893065, |
| "step": 31800 |
| }, |
| { |
| "epoch": 1.53884560650281, |
| "grad_norm": 0.12055296450853348, |
| "learning_rate": 1.1530390738060783e-05, |
| "loss": 0.019255086183547973, |
| "step": 31900 |
| }, |
| { |
| "epoch": 1.5436696495332738, |
| "grad_norm": 0.20997734367847443, |
| "learning_rate": 1.1409792571152919e-05, |
| "loss": 0.020008976459503173, |
| "step": 32000 |
| }, |
| { |
| "epoch": 1.5484936925637376, |
| "grad_norm": 0.25966885685920715, |
| "learning_rate": 1.1289194404245055e-05, |
| "loss": 0.018391019105911253, |
| "step": 32100 |
| }, |
| { |
| "epoch": 1.5533177355942014, |
| "grad_norm": 1.1394667625427246, |
| "learning_rate": 1.1168596237337194e-05, |
| "loss": 0.02040395259857178, |
| "step": 32200 |
| }, |
| { |
| "epoch": 1.5581417786246652, |
| "grad_norm": 0.11998942494392395, |
| "learning_rate": 1.104799807042933e-05, |
| "loss": 0.017555311918258668, |
| "step": 32300 |
| }, |
| { |
| "epoch": 1.5629658216551292, |
| "grad_norm": 0.11283577978610992, |
| "learning_rate": 1.0927399903521466e-05, |
| "loss": 0.018316521644592285, |
| "step": 32400 |
| }, |
| { |
| "epoch": 1.567789864685593, |
| "grad_norm": 0.8829536437988281, |
| "learning_rate": 1.0806801736613604e-05, |
| "loss": 0.019955469369888304, |
| "step": 32500 |
| }, |
| { |
| "epoch": 1.567789864685593, |
| "eval_accuracy": 0.9931243466600177, |
| "eval_f1": 0.8941826120457173, |
| "eval_loss": 0.031499363481998444, |
| "eval_precision": 0.8840407973253206, |
| "eval_recall": 0.9045598233230849, |
| "eval_runtime": 52.2852, |
| "eval_samples_per_second": 286.888, |
| "eval_steps_per_second": 7.975, |
| "step": 32500 |
| }, |
| { |
| "epoch": 1.5726139077160568, |
| "grad_norm": 0.6669954061508179, |
| "learning_rate": 1.0686203569705742e-05, |
| "loss": 0.018743941783905028, |
| "step": 32600 |
| }, |
| { |
| "epoch": 1.5774379507465208, |
| "grad_norm": 0.2682594358921051, |
| "learning_rate": 1.0565605402797878e-05, |
| "loss": 0.018420085906982422, |
| "step": 32700 |
| }, |
| { |
| "epoch": 1.5822619937769846, |
| "grad_norm": 0.16349567472934723, |
| "learning_rate": 1.0445007235890016e-05, |
| "loss": 0.02064610242843628, |
| "step": 32800 |
| }, |
| { |
| "epoch": 1.5870860368074484, |
| "grad_norm": 0.6123493313789368, |
| "learning_rate": 1.0324409068982152e-05, |
| "loss": 0.0181715726852417, |
| "step": 32900 |
| }, |
| { |
| "epoch": 1.5919100798379122, |
| "grad_norm": 0.2621537744998932, |
| "learning_rate": 1.020381090207429e-05, |
| "loss": 0.01923044562339783, |
| "step": 33000 |
| }, |
| { |
| "epoch": 1.596734122868376, |
| "grad_norm": 0.09542077034711838, |
| "learning_rate": 1.0083212735166427e-05, |
| "loss": 0.017349140644073488, |
| "step": 33100 |
| }, |
| { |
| "epoch": 1.6015581658988398, |
| "grad_norm": 0.18252168595790863, |
| "learning_rate": 9.962614568258563e-06, |
| "loss": 0.019681899547576903, |
| "step": 33200 |
| }, |
| { |
| "epoch": 1.6063822089293036, |
| "grad_norm": 0.19957713782787323, |
| "learning_rate": 9.8420164013507e-06, |
| "loss": 0.019357409477233887, |
| "step": 33300 |
| }, |
| { |
| "epoch": 1.6112062519597674, |
| "grad_norm": 0.5182835459709167, |
| "learning_rate": 9.721418234442837e-06, |
| "loss": 0.01951758861541748, |
| "step": 33400 |
| }, |
| { |
| "epoch": 1.6160302949902312, |
| "grad_norm": 0.4481932520866394, |
| "learning_rate": 9.600820067534975e-06, |
| "loss": 0.017961139678955077, |
| "step": 33500 |
| }, |
| { |
| "epoch": 1.620854338020695, |
| "grad_norm": 0.15489070117473602, |
| "learning_rate": 9.48022190062711e-06, |
| "loss": 0.0193113911151886, |
| "step": 33600 |
| }, |
| { |
| "epoch": 1.625678381051159, |
| "grad_norm": 0.2616223394870758, |
| "learning_rate": 9.359623733719248e-06, |
| "loss": 0.022246689796447755, |
| "step": 33700 |
| }, |
| { |
| "epoch": 1.6305024240816228, |
| "grad_norm": 0.12462881952524185, |
| "learning_rate": 9.239025566811384e-06, |
| "loss": 0.01692581295967102, |
| "step": 33800 |
| }, |
| { |
| "epoch": 1.6353264671120866, |
| "grad_norm": 0.48885273933410645, |
| "learning_rate": 9.118427399903522e-06, |
| "loss": 0.017899035215377807, |
| "step": 33900 |
| }, |
| { |
| "epoch": 1.6401505101425504, |
| "grad_norm": 1.0648194551467896, |
| "learning_rate": 8.99782923299566e-06, |
| "loss": 0.01802402377128601, |
| "step": 34000 |
| }, |
| { |
| "epoch": 1.6449745531730144, |
| "grad_norm": 0.2746858298778534, |
| "learning_rate": 8.877231066087796e-06, |
| "loss": 0.020917999744415283, |
| "step": 34100 |
| }, |
| { |
| "epoch": 1.6497985962034782, |
| "grad_norm": 0.12474814057350159, |
| "learning_rate": 8.756632899179932e-06, |
| "loss": 0.015847266912460328, |
| "step": 34200 |
| }, |
| { |
| "epoch": 1.654622639233942, |
| "grad_norm": 0.11499933153390884, |
| "learning_rate": 8.63603473227207e-06, |
| "loss": 0.017190442085266114, |
| "step": 34300 |
| }, |
| { |
| "epoch": 1.6594466822644058, |
| "grad_norm": 0.1851770579814911, |
| "learning_rate": 8.515436565364207e-06, |
| "loss": 0.018469662666320802, |
| "step": 34400 |
| }, |
| { |
| "epoch": 1.6642707252948696, |
| "grad_norm": 0.2300252914428711, |
| "learning_rate": 8.394838398456343e-06, |
| "loss": 0.01794400453567505, |
| "step": 34500 |
| }, |
| { |
| "epoch": 1.6690947683253334, |
| "grad_norm": 0.11766080558300018, |
| "learning_rate": 8.274240231548481e-06, |
| "loss": 0.018363571166992186, |
| "step": 34600 |
| }, |
| { |
| "epoch": 1.6739188113557972, |
| "grad_norm": 0.20575584471225739, |
| "learning_rate": 8.153642064640619e-06, |
| "loss": 0.016927268505096436, |
| "step": 34700 |
| }, |
| { |
| "epoch": 1.678742854386261, |
| "grad_norm": 0.35185614228248596, |
| "learning_rate": 8.033043897732755e-06, |
| "loss": 0.01612231135368347, |
| "step": 34800 |
| }, |
| { |
| "epoch": 1.6835668974167248, |
| "grad_norm": 0.1776873916387558, |
| "learning_rate": 7.912445730824891e-06, |
| "loss": 0.01680509090423584, |
| "step": 34900 |
| }, |
| { |
| "epoch": 1.6883909404471886, |
| "grad_norm": 0.25137367844581604, |
| "learning_rate": 7.791847563917029e-06, |
| "loss": 0.016988718509674074, |
| "step": 35000 |
| }, |
| { |
| "epoch": 1.6883909404471886, |
| "eval_accuracy": 0.993225440469551, |
| "eval_f1": 0.89652071512686, |
| "eval_loss": 0.031313586980104446, |
| "eval_precision": 0.886667796035914, |
| "eval_recall": 0.9065950720997705, |
| "eval_runtime": 51.6493, |
| "eval_samples_per_second": 290.42, |
| "eval_steps_per_second": 8.074, |
| "step": 35000 |
| }, |
| { |
| "epoch": 1.6932149834776526, |
| "grad_norm": 0.14376111328601837, |
| "learning_rate": 7.671249397009166e-06, |
| "loss": 0.019529181718826293, |
| "step": 35100 |
| }, |
| { |
| "epoch": 1.6980390265081164, |
| "grad_norm": 0.8683088421821594, |
| "learning_rate": 7.5506512301013025e-06, |
| "loss": 0.019479182958602907, |
| "step": 35200 |
| }, |
| { |
| "epoch": 1.7028630695385802, |
| "grad_norm": 0.11346932500600815, |
| "learning_rate": 7.43005306319344e-06, |
| "loss": 0.01975212812423706, |
| "step": 35300 |
| }, |
| { |
| "epoch": 1.7076871125690443, |
| "grad_norm": 0.8985689282417297, |
| "learning_rate": 7.309454896285576e-06, |
| "loss": 0.016446800231933595, |
| "step": 35400 |
| }, |
| { |
| "epoch": 1.712511155599508, |
| "grad_norm": 0.6181161403656006, |
| "learning_rate": 7.188856729377713e-06, |
| "loss": 0.016956570148468016, |
| "step": 35500 |
| }, |
| { |
| "epoch": 1.7173351986299719, |
| "grad_norm": 0.27897560596466064, |
| "learning_rate": 7.068258562469851e-06, |
| "loss": 0.020130460262298585, |
| "step": 35600 |
| }, |
| { |
| "epoch": 1.7221592416604357, |
| "grad_norm": 0.15588901937007904, |
| "learning_rate": 6.947660395561988e-06, |
| "loss": 0.016974217891693115, |
| "step": 35700 |
| }, |
| { |
| "epoch": 1.7269832846908995, |
| "grad_norm": 0.5564957857131958, |
| "learning_rate": 6.827062228654124e-06, |
| "loss": 0.017226357460021973, |
| "step": 35800 |
| }, |
| { |
| "epoch": 1.7318073277213633, |
| "grad_norm": 0.12989383935928345, |
| "learning_rate": 6.706464061746262e-06, |
| "loss": 0.015787020921707154, |
| "step": 35900 |
| }, |
| { |
| "epoch": 1.736631370751827, |
| "grad_norm": 0.24173200130462646, |
| "learning_rate": 6.585865894838398e-06, |
| "loss": 0.01873793125152588, |
| "step": 36000 |
| }, |
| { |
| "epoch": 1.7414554137822909, |
| "grad_norm": 0.21657347679138184, |
| "learning_rate": 6.465267727930535e-06, |
| "loss": 0.01680638313293457, |
| "step": 36100 |
| }, |
| { |
| "epoch": 1.7462794568127546, |
| "grad_norm": 0.12039454281330109, |
| "learning_rate": 6.344669561022673e-06, |
| "loss": 0.017534868717193605, |
| "step": 36200 |
| }, |
| { |
| "epoch": 1.7511034998432184, |
| "grad_norm": 0.08835107833147049, |
| "learning_rate": 6.22407139411481e-06, |
| "loss": 0.015722684860229492, |
| "step": 36300 |
| }, |
| { |
| "epoch": 1.7559275428736825, |
| "grad_norm": 0.15494988858699799, |
| "learning_rate": 6.103473227206947e-06, |
| "loss": 0.01669602155685425, |
| "step": 36400 |
| }, |
| { |
| "epoch": 1.7607515859041463, |
| "grad_norm": 0.2613168954849243, |
| "learning_rate": 5.9828750602990845e-06, |
| "loss": 0.018969074487686158, |
| "step": 36500 |
| }, |
| { |
| "epoch": 1.76557562893461, |
| "grad_norm": 0.26860108971595764, |
| "learning_rate": 5.8622768933912205e-06, |
| "loss": 0.018860089778900146, |
| "step": 36600 |
| }, |
| { |
| "epoch": 1.770399671965074, |
| "grad_norm": 0.3160684406757355, |
| "learning_rate": 5.741678726483358e-06, |
| "loss": 0.017936546802520752, |
| "step": 36700 |
| }, |
| { |
| "epoch": 1.7752237149955379, |
| "grad_norm": 0.16905085742473602, |
| "learning_rate": 5.621080559575495e-06, |
| "loss": 0.018141812086105345, |
| "step": 36800 |
| }, |
| { |
| "epoch": 1.7800477580260017, |
| "grad_norm": 0.23327182233333588, |
| "learning_rate": 5.500482392667632e-06, |
| "loss": 0.01744183659553528, |
| "step": 36900 |
| }, |
| { |
| "epoch": 1.7848718010564655, |
| "grad_norm": 0.15902255475521088, |
| "learning_rate": 5.379884225759769e-06, |
| "loss": 0.017776939868927002, |
| "step": 37000 |
| }, |
| { |
| "epoch": 1.7896958440869293, |
| "grad_norm": 0.240287646651268, |
| "learning_rate": 5.259286058851906e-06, |
| "loss": 0.01649364709854126, |
| "step": 37100 |
| }, |
| { |
| "epoch": 1.794519887117393, |
| "grad_norm": 0.13150164484977722, |
| "learning_rate": 5.138687891944043e-06, |
| "loss": 0.01930005669593811, |
| "step": 37200 |
| }, |
| { |
| "epoch": 1.7993439301478569, |
| "grad_norm": 0.25998786091804504, |
| "learning_rate": 5.0180897250361795e-06, |
| "loss": 0.016344897747039795, |
| "step": 37300 |
| }, |
| { |
| "epoch": 1.8041679731783207, |
| "grad_norm": 0.12845446169376373, |
| "learning_rate": 4.897491558128316e-06, |
| "loss": 0.019464727640151978, |
| "step": 37400 |
| }, |
| { |
| "epoch": 1.8089920162087845, |
| "grad_norm": 0.21504537761211395, |
| "learning_rate": 4.776893391220453e-06, |
| "loss": 0.017011468410491944, |
| "step": 37500 |
| }, |
| { |
| "epoch": 1.8089920162087845, |
| "eval_accuracy": 0.9933099684867659, |
| "eval_f1": 0.8917599033971296, |
| "eval_loss": 0.030537979677319527, |
| "eval_precision": 0.8804042791129492, |
| "eval_recall": 0.9034122894383579, |
| "eval_runtime": 51.6005, |
| "eval_samples_per_second": 290.695, |
| "eval_steps_per_second": 8.081, |
| "step": 37500 |
| }, |
| { |
| "epoch": 1.8138160592392483, |
| "grad_norm": 0.11677803844213486, |
| "learning_rate": 4.656295224312591e-06, |
| "loss": 0.016575688123703004, |
| "step": 37600 |
| }, |
| { |
| "epoch": 1.8186401022697123, |
| "grad_norm": 0.280719131231308, |
| "learning_rate": 4.535697057404727e-06, |
| "loss": 0.01687091827392578, |
| "step": 37700 |
| }, |
| { |
| "epoch": 1.823464145300176, |
| "grad_norm": 0.2764016389846802, |
| "learning_rate": 4.415098890496865e-06, |
| "loss": 0.016562118530273437, |
| "step": 37800 |
| }, |
| { |
| "epoch": 1.8282881883306399, |
| "grad_norm": 0.632255494594574, |
| "learning_rate": 4.294500723589002e-06, |
| "loss": 0.01787501573562622, |
| "step": 37900 |
| }, |
| { |
| "epoch": 1.833112231361104, |
| "grad_norm": 0.09340863674879074, |
| "learning_rate": 4.1739025566811385e-06, |
| "loss": 0.01698790192604065, |
| "step": 38000 |
| }, |
| { |
| "epoch": 1.8379362743915677, |
| "grad_norm": 0.1588761806488037, |
| "learning_rate": 4.053304389773275e-06, |
| "loss": 0.01741109848022461, |
| "step": 38100 |
| }, |
| { |
| "epoch": 1.8427603174220315, |
| "grad_norm": 0.10772903263568878, |
| "learning_rate": 3.932706222865413e-06, |
| "loss": 0.016634883880615233, |
| "step": 38200 |
| }, |
| { |
| "epoch": 1.8475843604524953, |
| "grad_norm": 0.22126013040542603, |
| "learning_rate": 3.8121080559575496e-06, |
| "loss": 0.01804221987724304, |
| "step": 38300 |
| }, |
| { |
| "epoch": 1.852408403482959, |
| "grad_norm": 0.3740140497684479, |
| "learning_rate": 3.6915098890496864e-06, |
| "loss": 0.01612048625946045, |
| "step": 38400 |
| }, |
| { |
| "epoch": 1.857232446513423, |
| "grad_norm": 0.13352862000465393, |
| "learning_rate": 3.5709117221418237e-06, |
| "loss": 0.01714093804359436, |
| "step": 38500 |
| }, |
| { |
| "epoch": 1.8620564895438867, |
| "grad_norm": 0.27527081966400146, |
| "learning_rate": 3.45031355523396e-06, |
| "loss": 0.01763258218765259, |
| "step": 38600 |
| }, |
| { |
| "epoch": 1.8668805325743505, |
| "grad_norm": 0.28769898414611816, |
| "learning_rate": 3.3297153883260975e-06, |
| "loss": 0.02112499475479126, |
| "step": 38700 |
| }, |
| { |
| "epoch": 1.8717045756048143, |
| "grad_norm": 0.15282955765724182, |
| "learning_rate": 3.209117221418235e-06, |
| "loss": 0.01631925821304321, |
| "step": 38800 |
| }, |
| { |
| "epoch": 1.876528618635278, |
| "grad_norm": 0.12284864485263824, |
| "learning_rate": 3.0885190545103717e-06, |
| "loss": 0.01694957971572876, |
| "step": 38900 |
| }, |
| { |
| "epoch": 1.881352661665742, |
| "grad_norm": 0.1236443966627121, |
| "learning_rate": 2.9679208876025086e-06, |
| "loss": 0.017817366123199462, |
| "step": 39000 |
| }, |
| { |
| "epoch": 1.886176704696206, |
| "grad_norm": 0.11388445645570755, |
| "learning_rate": 2.8473227206946454e-06, |
| "loss": 0.01580065131187439, |
| "step": 39100 |
| }, |
| { |
| "epoch": 1.8910007477266697, |
| "grad_norm": 0.2738426625728607, |
| "learning_rate": 2.7267245537867828e-06, |
| "loss": 0.01646868109703064, |
| "step": 39200 |
| }, |
| { |
| "epoch": 1.8958247907571335, |
| "grad_norm": 0.6833071112632751, |
| "learning_rate": 2.6061263868789196e-06, |
| "loss": 0.017300838232040407, |
| "step": 39300 |
| }, |
| { |
| "epoch": 1.9006488337875975, |
| "grad_norm": 0.12390507757663727, |
| "learning_rate": 2.4855282199710565e-06, |
| "loss": 0.017689213752746583, |
| "step": 39400 |
| }, |
| { |
| "epoch": 1.9054728768180613, |
| "grad_norm": 0.5947756171226501, |
| "learning_rate": 2.364930053063194e-06, |
| "loss": 0.016457540988922117, |
| "step": 39500 |
| }, |
| { |
| "epoch": 1.9102969198485251, |
| "grad_norm": 0.11591579020023346, |
| "learning_rate": 2.2443318861553307e-06, |
| "loss": 0.01735694646835327, |
| "step": 39600 |
| }, |
| { |
| "epoch": 1.915120962878989, |
| "grad_norm": 0.14687402546405792, |
| "learning_rate": 2.1237337192474676e-06, |
| "loss": 0.01619683623313904, |
| "step": 39700 |
| }, |
| { |
| "epoch": 1.9199450059094527, |
| "grad_norm": 0.14690209925174713, |
| "learning_rate": 2.003135552339605e-06, |
| "loss": 0.016990303993225098, |
| "step": 39800 |
| }, |
| { |
| "epoch": 1.9247690489399165, |
| "grad_norm": 0.5243352055549622, |
| "learning_rate": 1.8825373854317415e-06, |
| "loss": 0.015626425743103026, |
| "step": 39900 |
| }, |
| { |
| "epoch": 1.9295930919703803, |
| "grad_norm": 0.9058519601821899, |
| "learning_rate": 1.7619392185238784e-06, |
| "loss": 0.017552192211151122, |
| "step": 40000 |
| }, |
| { |
| "epoch": 1.9295930919703803, |
| "eval_accuracy": 0.9934548129785762, |
| "eval_f1": 0.8960502077710661, |
| "eval_loss": 0.030458878725767136, |
| "eval_precision": 0.8865553342092993, |
| "eval_recall": 0.9057506603732732, |
| "eval_runtime": 52.2526, |
| "eval_samples_per_second": 287.067, |
| "eval_steps_per_second": 7.98, |
| "step": 40000 |
| }, |
| { |
| "epoch": 1.9344171350008441, |
| "grad_norm": 0.2622898817062378, |
| "learning_rate": 1.6413410516160157e-06, |
| "loss": 0.016480473279953004, |
| "step": 40100 |
| }, |
| { |
| "epoch": 1.939241178031308, |
| "grad_norm": 0.1672438383102417, |
| "learning_rate": 1.5207428847081526e-06, |
| "loss": 0.018291155099868773, |
| "step": 40200 |
| }, |
| { |
| "epoch": 1.9440652210617717, |
| "grad_norm": 0.39425408840179443, |
| "learning_rate": 1.4001447178002895e-06, |
| "loss": 0.01858603596687317, |
| "step": 40300 |
| }, |
| { |
| "epoch": 1.9488892640922357, |
| "grad_norm": 0.2491266131401062, |
| "learning_rate": 1.2795465508924266e-06, |
| "loss": 0.016655097007751463, |
| "step": 40400 |
| }, |
| { |
| "epoch": 1.9537133071226995, |
| "grad_norm": 0.2883985638618469, |
| "learning_rate": 1.1589483839845637e-06, |
| "loss": 0.018044712543487548, |
| "step": 40500 |
| }, |
| { |
| "epoch": 1.9585373501531633, |
| "grad_norm": 0.1521671712398529, |
| "learning_rate": 1.0383502170767006e-06, |
| "loss": 0.019449379444122315, |
| "step": 40600 |
| }, |
| { |
| "epoch": 1.9633613931836273, |
| "grad_norm": 0.1477108597755432, |
| "learning_rate": 9.177520501688375e-07, |
| "loss": 0.016478629112243653, |
| "step": 40700 |
| }, |
| { |
| "epoch": 1.9681854362140911, |
| "grad_norm": 0.18148507177829742, |
| "learning_rate": 7.971538832609744e-07, |
| "loss": 0.016618763208389283, |
| "step": 40800 |
| }, |
| { |
| "epoch": 1.973009479244555, |
| "grad_norm": 0.17074325680732727, |
| "learning_rate": 6.765557163531114e-07, |
| "loss": 0.01620419979095459, |
| "step": 40900 |
| }, |
| { |
| "epoch": 1.9778335222750187, |
| "grad_norm": 0.23663687705993652, |
| "learning_rate": 5.559575494452484e-07, |
| "loss": 0.01673411011695862, |
| "step": 41000 |
| }, |
| { |
| "epoch": 1.9826575653054825, |
| "grad_norm": 0.1981934756040573, |
| "learning_rate": 4.353593825373855e-07, |
| "loss": 0.015512742996215821, |
| "step": 41100 |
| }, |
| { |
| "epoch": 1.9874816083359463, |
| "grad_norm": 0.19588832557201385, |
| "learning_rate": 3.1476121562952246e-07, |
| "loss": 0.017852275371551513, |
| "step": 41200 |
| }, |
| { |
| "epoch": 1.9923056513664101, |
| "grad_norm": 0.11776227504014969, |
| "learning_rate": 1.9416304872165945e-07, |
| "loss": 0.017226353883743287, |
| "step": 41300 |
| }, |
| { |
| "epoch": 1.997129694396874, |
| "grad_norm": 0.20195287466049194, |
| "learning_rate": 7.356488181379644e-08, |
| "loss": 0.016260911226272583, |
| "step": 41400 |
| }, |
| { |
| "epoch": 2.0, |
| "step": 41460, |
| "total_flos": 1.525095272976519e+18, |
| "train_loss": 0.03464991324659827, |
| "train_runtime": 20225.5982, |
| "train_samples_per_second": 295.171, |
| "train_steps_per_second": 2.05 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 41460, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 2500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.525095272976519e+18, |
| "train_batch_size": 72, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|