| { |
| "best_global_step": 246750, |
| "best_metric": 0.0014928707387298346, |
| "best_model_checkpoint": "./results/checkpoint-246750", |
| "epoch": 5.0, |
| "eval_steps": 500, |
| "global_step": 246750, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.010131712259371834, |
| "grad_norm": 0.7652040123939514, |
| "learning_rate": 4.9898885511651475e-05, |
| "loss": 0.1118, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.020263424518743668, |
| "grad_norm": 2.674142599105835, |
| "learning_rate": 4.9797568389057755e-05, |
| "loss": 0.0342, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.030395136778115502, |
| "grad_norm": 0.5787509679794312, |
| "learning_rate": 4.9696251266464036e-05, |
| "loss": 0.0297, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.040526849037487336, |
| "grad_norm": 0.32276612520217896, |
| "learning_rate": 4.9594934143870316e-05, |
| "loss": 0.0259, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.05065856129685917, |
| "grad_norm": 0.3678061068058014, |
| "learning_rate": 4.9493617021276603e-05, |
| "loss": 0.0214, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.060790273556231005, |
| "grad_norm": 0.1107838898897171, |
| "learning_rate": 4.9392299898682884e-05, |
| "loss": 0.0195, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.07092198581560284, |
| "grad_norm": 0.7422420978546143, |
| "learning_rate": 4.9290982776089164e-05, |
| "loss": 0.0176, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.08105369807497467, |
| "grad_norm": 0.23030279576778412, |
| "learning_rate": 4.9189665653495445e-05, |
| "loss": 0.0157, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.0911854103343465, |
| "grad_norm": 0.6237834692001343, |
| "learning_rate": 4.9088348530901725e-05, |
| "loss": 0.0156, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.10131712259371833, |
| "grad_norm": 0.027093010023236275, |
| "learning_rate": 4.8987031408308006e-05, |
| "loss": 0.0148, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.11144883485309018, |
| "grad_norm": 1.697365164756775, |
| "learning_rate": 4.888571428571429e-05, |
| "loss": 0.0137, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.12158054711246201, |
| "grad_norm": 1.082137942314148, |
| "learning_rate": 4.8784397163120573e-05, |
| "loss": 0.0125, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.13171225937183384, |
| "grad_norm": 8.637746810913086, |
| "learning_rate": 4.8683080040526854e-05, |
| "loss": 0.012, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.14184397163120568, |
| "grad_norm": 0.4965957999229431, |
| "learning_rate": 4.8581762917933134e-05, |
| "loss": 0.014, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.1519756838905775, |
| "grad_norm": 0.3297726511955261, |
| "learning_rate": 4.8480445795339415e-05, |
| "loss": 0.0114, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.16210739614994935, |
| "grad_norm": 0.7570741176605225, |
| "learning_rate": 4.8379128672745695e-05, |
| "loss": 0.0137, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.17223910840932116, |
| "grad_norm": 1.3462743759155273, |
| "learning_rate": 4.827781155015198e-05, |
| "loss": 0.0113, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.182370820668693, |
| "grad_norm": 1.4103180170059204, |
| "learning_rate": 4.817649442755826e-05, |
| "loss": 0.0106, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.19250253292806485, |
| "grad_norm": 0.20714280009269714, |
| "learning_rate": 4.8075177304964543e-05, |
| "loss": 0.0102, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.20263424518743667, |
| "grad_norm": 0.728708803653717, |
| "learning_rate": 4.7973860182370824e-05, |
| "loss": 0.011, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.2127659574468085, |
| "grad_norm": 0.21020011603832245, |
| "learning_rate": 4.7872543059777104e-05, |
| "loss": 0.01, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.22289766970618036, |
| "grad_norm": 1.3205281496047974, |
| "learning_rate": 4.7771225937183385e-05, |
| "loss": 0.0118, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.23302938196555217, |
| "grad_norm": 0.061988379806280136, |
| "learning_rate": 4.766990881458967e-05, |
| "loss": 0.0088, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.24316109422492402, |
| "grad_norm": 0.7140718102455139, |
| "learning_rate": 4.756859169199595e-05, |
| "loss": 0.0087, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.25329280648429586, |
| "grad_norm": 0.3282340168952942, |
| "learning_rate": 4.746727456940223e-05, |
| "loss": 0.0105, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.2634245187436677, |
| "grad_norm": 0.18000195920467377, |
| "learning_rate": 4.7365957446808513e-05, |
| "loss": 0.0082, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.2735562310030395, |
| "grad_norm": 0.07064808160066605, |
| "learning_rate": 4.7264640324214794e-05, |
| "loss": 0.0076, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.28368794326241137, |
| "grad_norm": 0.012272284366190434, |
| "learning_rate": 4.7163323201621074e-05, |
| "loss": 0.0087, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.2938196555217832, |
| "grad_norm": 0.7345269918441772, |
| "learning_rate": 4.706200607902736e-05, |
| "loss": 0.0093, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.303951367781155, |
| "grad_norm": 0.02252453938126564, |
| "learning_rate": 4.696068895643364e-05, |
| "loss": 0.0094, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.3140830800405269, |
| "grad_norm": 1.4309351444244385, |
| "learning_rate": 4.685937183383992e-05, |
| "loss": 0.0096, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.3242147922998987, |
| "grad_norm": 2.0277745723724365, |
| "learning_rate": 4.67580547112462e-05, |
| "loss": 0.0091, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.3343465045592705, |
| "grad_norm": 1.0250506401062012, |
| "learning_rate": 4.6656737588652483e-05, |
| "loss": 0.0079, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.3444782168186423, |
| "grad_norm": 0.019971124827861786, |
| "learning_rate": 4.6555420466058764e-05, |
| "loss": 0.0077, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.3546099290780142, |
| "grad_norm": 0.28336408734321594, |
| "learning_rate": 4.645410334346505e-05, |
| "loss": 0.0079, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.364741641337386, |
| "grad_norm": 0.04426710680127144, |
| "learning_rate": 4.635278622087133e-05, |
| "loss": 0.0076, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.37487335359675783, |
| "grad_norm": 0.017167454585433006, |
| "learning_rate": 4.625146909827761e-05, |
| "loss": 0.0087, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.3850050658561297, |
| "grad_norm": 0.04213930293917656, |
| "learning_rate": 4.615015197568389e-05, |
| "loss": 0.0083, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.3951367781155015, |
| "grad_norm": 0.01774449646472931, |
| "learning_rate": 4.604883485309017e-05, |
| "loss": 0.0085, |
| "step": 19500 |
| }, |
| { |
| "epoch": 0.40526849037487334, |
| "grad_norm": 1.1058021783828735, |
| "learning_rate": 4.594751773049646e-05, |
| "loss": 0.0085, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.4154002026342452, |
| "grad_norm": 0.2536062002182007, |
| "learning_rate": 4.584620060790274e-05, |
| "loss": 0.0074, |
| "step": 20500 |
| }, |
| { |
| "epoch": 0.425531914893617, |
| "grad_norm": 0.16014184057712555, |
| "learning_rate": 4.574488348530902e-05, |
| "loss": 0.007, |
| "step": 21000 |
| }, |
| { |
| "epoch": 0.43566362715298884, |
| "grad_norm": 0.17190662026405334, |
| "learning_rate": 4.56435663627153e-05, |
| "loss": 0.0088, |
| "step": 21500 |
| }, |
| { |
| "epoch": 0.4457953394123607, |
| "grad_norm": 0.04603414237499237, |
| "learning_rate": 4.554224924012158e-05, |
| "loss": 0.0076, |
| "step": 22000 |
| }, |
| { |
| "epoch": 0.45592705167173253, |
| "grad_norm": 0.10868274420499802, |
| "learning_rate": 4.544093211752786e-05, |
| "loss": 0.0072, |
| "step": 22500 |
| }, |
| { |
| "epoch": 0.46605876393110435, |
| "grad_norm": 0.061556026339530945, |
| "learning_rate": 4.533961499493415e-05, |
| "loss": 0.008, |
| "step": 23000 |
| }, |
| { |
| "epoch": 0.47619047619047616, |
| "grad_norm": 0.16614557802677155, |
| "learning_rate": 4.523829787234043e-05, |
| "loss": 0.0066, |
| "step": 23500 |
| }, |
| { |
| "epoch": 0.48632218844984804, |
| "grad_norm": 0.024508880451321602, |
| "learning_rate": 4.513698074974671e-05, |
| "loss": 0.0079, |
| "step": 24000 |
| }, |
| { |
| "epoch": 0.49645390070921985, |
| "grad_norm": 0.017630083486437798, |
| "learning_rate": 4.503566362715299e-05, |
| "loss": 0.0061, |
| "step": 24500 |
| }, |
| { |
| "epoch": 0.5065856129685917, |
| "grad_norm": 0.7771974802017212, |
| "learning_rate": 4.493434650455927e-05, |
| "loss": 0.0059, |
| "step": 25000 |
| }, |
| { |
| "epoch": 0.5167173252279635, |
| "grad_norm": 0.3426854908466339, |
| "learning_rate": 4.483302938196555e-05, |
| "loss": 0.0068, |
| "step": 25500 |
| }, |
| { |
| "epoch": 0.5268490374873354, |
| "grad_norm": 0.004861881025135517, |
| "learning_rate": 4.473171225937184e-05, |
| "loss": 0.0066, |
| "step": 26000 |
| }, |
| { |
| "epoch": 0.5369807497467072, |
| "grad_norm": 0.2573753595352173, |
| "learning_rate": 4.463039513677812e-05, |
| "loss": 0.0076, |
| "step": 26500 |
| }, |
| { |
| "epoch": 0.547112462006079, |
| "grad_norm": 0.0056276340037584305, |
| "learning_rate": 4.45290780141844e-05, |
| "loss": 0.0061, |
| "step": 27000 |
| }, |
| { |
| "epoch": 0.5572441742654508, |
| "grad_norm": 1.3307100534439087, |
| "learning_rate": 4.442776089159068e-05, |
| "loss": 0.0078, |
| "step": 27500 |
| }, |
| { |
| "epoch": 0.5673758865248227, |
| "grad_norm": 0.08794938027858734, |
| "learning_rate": 4.432644376899696e-05, |
| "loss": 0.0059, |
| "step": 28000 |
| }, |
| { |
| "epoch": 0.5775075987841946, |
| "grad_norm": 0.02371417172253132, |
| "learning_rate": 4.422512664640324e-05, |
| "loss": 0.0069, |
| "step": 28500 |
| }, |
| { |
| "epoch": 0.5876393110435664, |
| "grad_norm": 0.005987431854009628, |
| "learning_rate": 4.412380952380953e-05, |
| "loss": 0.0055, |
| "step": 29000 |
| }, |
| { |
| "epoch": 0.5977710233029382, |
| "grad_norm": 0.01846960373222828, |
| "learning_rate": 4.402249240121581e-05, |
| "loss": 0.0064, |
| "step": 29500 |
| }, |
| { |
| "epoch": 0.60790273556231, |
| "grad_norm": 0.13479246199131012, |
| "learning_rate": 4.392117527862209e-05, |
| "loss": 0.006, |
| "step": 30000 |
| }, |
| { |
| "epoch": 0.6180344478216818, |
| "grad_norm": 0.9626930952072144, |
| "learning_rate": 4.381985815602837e-05, |
| "loss": 0.0072, |
| "step": 30500 |
| }, |
| { |
| "epoch": 0.6281661600810537, |
| "grad_norm": 3.426116704940796, |
| "learning_rate": 4.371854103343465e-05, |
| "loss": 0.0067, |
| "step": 31000 |
| }, |
| { |
| "epoch": 0.6382978723404256, |
| "grad_norm": 0.00803771149367094, |
| "learning_rate": 4.361722391084093e-05, |
| "loss": 0.0054, |
| "step": 31500 |
| }, |
| { |
| "epoch": 0.6484295845997974, |
| "grad_norm": 2.7232067584991455, |
| "learning_rate": 4.351590678824722e-05, |
| "loss": 0.0062, |
| "step": 32000 |
| }, |
| { |
| "epoch": 0.6585612968591692, |
| "grad_norm": 0.07605724781751633, |
| "learning_rate": 4.34145896656535e-05, |
| "loss": 0.0056, |
| "step": 32500 |
| }, |
| { |
| "epoch": 0.668693009118541, |
| "grad_norm": 0.0936700776219368, |
| "learning_rate": 4.331327254305978e-05, |
| "loss": 0.0057, |
| "step": 33000 |
| }, |
| { |
| "epoch": 0.6788247213779128, |
| "grad_norm": 0.3686704635620117, |
| "learning_rate": 4.321195542046606e-05, |
| "loss": 0.0073, |
| "step": 33500 |
| }, |
| { |
| "epoch": 0.6889564336372846, |
| "grad_norm": 2.1731717586517334, |
| "learning_rate": 4.311063829787234e-05, |
| "loss": 0.0051, |
| "step": 34000 |
| }, |
| { |
| "epoch": 0.6990881458966566, |
| "grad_norm": 0.5393068194389343, |
| "learning_rate": 4.300932117527862e-05, |
| "loss": 0.0051, |
| "step": 34500 |
| }, |
| { |
| "epoch": 0.7092198581560284, |
| "grad_norm": 0.010527299717068672, |
| "learning_rate": 4.290800405268491e-05, |
| "loss": 0.0045, |
| "step": 35000 |
| }, |
| { |
| "epoch": 0.7193515704154002, |
| "grad_norm": 0.009113574400544167, |
| "learning_rate": 4.280668693009119e-05, |
| "loss": 0.0053, |
| "step": 35500 |
| }, |
| { |
| "epoch": 0.729483282674772, |
| "grad_norm": 0.011620788834989071, |
| "learning_rate": 4.270536980749747e-05, |
| "loss": 0.0046, |
| "step": 36000 |
| }, |
| { |
| "epoch": 0.7396149949341438, |
| "grad_norm": 0.005289976019412279, |
| "learning_rate": 4.260405268490375e-05, |
| "loss": 0.0055, |
| "step": 36500 |
| }, |
| { |
| "epoch": 0.7497467071935157, |
| "grad_norm": 0.027446379885077477, |
| "learning_rate": 4.250273556231003e-05, |
| "loss": 0.0052, |
| "step": 37000 |
| }, |
| { |
| "epoch": 0.7598784194528876, |
| "grad_norm": 0.02491973526775837, |
| "learning_rate": 4.240141843971631e-05, |
| "loss": 0.0052, |
| "step": 37500 |
| }, |
| { |
| "epoch": 0.7700101317122594, |
| "grad_norm": 0.06567023694515228, |
| "learning_rate": 4.23001013171226e-05, |
| "loss": 0.0045, |
| "step": 38000 |
| }, |
| { |
| "epoch": 0.7801418439716312, |
| "grad_norm": 0.07404550909996033, |
| "learning_rate": 4.219878419452888e-05, |
| "loss": 0.0052, |
| "step": 38500 |
| }, |
| { |
| "epoch": 0.790273556231003, |
| "grad_norm": 0.04177823290228844, |
| "learning_rate": 4.209746707193516e-05, |
| "loss": 0.0059, |
| "step": 39000 |
| }, |
| { |
| "epoch": 0.8004052684903749, |
| "grad_norm": 0.23891448974609375, |
| "learning_rate": 4.199614994934144e-05, |
| "loss": 0.0049, |
| "step": 39500 |
| }, |
| { |
| "epoch": 0.8105369807497467, |
| "grad_norm": 0.029912158846855164, |
| "learning_rate": 4.189483282674772e-05, |
| "loss": 0.0053, |
| "step": 40000 |
| }, |
| { |
| "epoch": 0.8206686930091185, |
| "grad_norm": 1.465671420097351, |
| "learning_rate": 4.1793515704154e-05, |
| "loss": 0.0054, |
| "step": 40500 |
| }, |
| { |
| "epoch": 0.8308004052684904, |
| "grad_norm": 0.002502155490219593, |
| "learning_rate": 4.169219858156029e-05, |
| "loss": 0.0042, |
| "step": 41000 |
| }, |
| { |
| "epoch": 0.8409321175278622, |
| "grad_norm": 0.05517476052045822, |
| "learning_rate": 4.159088145896657e-05, |
| "loss": 0.0053, |
| "step": 41500 |
| }, |
| { |
| "epoch": 0.851063829787234, |
| "grad_norm": 0.0024023025762289762, |
| "learning_rate": 4.148956433637285e-05, |
| "loss": 0.0049, |
| "step": 42000 |
| }, |
| { |
| "epoch": 0.8611955420466059, |
| "grad_norm": 0.003541674930602312, |
| "learning_rate": 4.138824721377913e-05, |
| "loss": 0.0047, |
| "step": 42500 |
| }, |
| { |
| "epoch": 0.8713272543059777, |
| "grad_norm": 0.04199780896306038, |
| "learning_rate": 4.128693009118541e-05, |
| "loss": 0.005, |
| "step": 43000 |
| }, |
| { |
| "epoch": 0.8814589665653495, |
| "grad_norm": 1.3863078355789185, |
| "learning_rate": 4.118561296859169e-05, |
| "loss": 0.0046, |
| "step": 43500 |
| }, |
| { |
| "epoch": 0.8915906788247214, |
| "grad_norm": 0.03199724853038788, |
| "learning_rate": 4.108429584599798e-05, |
| "loss": 0.0048, |
| "step": 44000 |
| }, |
| { |
| "epoch": 0.9017223910840932, |
| "grad_norm": 0.02803684026002884, |
| "learning_rate": 4.098297872340426e-05, |
| "loss": 0.0045, |
| "step": 44500 |
| }, |
| { |
| "epoch": 0.9118541033434651, |
| "grad_norm": 0.04623283073306084, |
| "learning_rate": 4.088166160081054e-05, |
| "loss": 0.005, |
| "step": 45000 |
| }, |
| { |
| "epoch": 0.9219858156028369, |
| "grad_norm": 0.0006883647874929011, |
| "learning_rate": 4.078034447821682e-05, |
| "loss": 0.0053, |
| "step": 45500 |
| }, |
| { |
| "epoch": 0.9321175278622087, |
| "grad_norm": 0.15720270574092865, |
| "learning_rate": 4.06790273556231e-05, |
| "loss": 0.0051, |
| "step": 46000 |
| }, |
| { |
| "epoch": 0.9422492401215805, |
| "grad_norm": 0.0048390720039606094, |
| "learning_rate": 4.057771023302938e-05, |
| "loss": 0.0049, |
| "step": 46500 |
| }, |
| { |
| "epoch": 0.9523809523809523, |
| "grad_norm": 0.04854992404580116, |
| "learning_rate": 4.0476393110435666e-05, |
| "loss": 0.0038, |
| "step": 47000 |
| }, |
| { |
| "epoch": 0.9625126646403243, |
| "grad_norm": 0.011257442645728588, |
| "learning_rate": 4.037507598784195e-05, |
| "loss": 0.0033, |
| "step": 47500 |
| }, |
| { |
| "epoch": 0.9726443768996961, |
| "grad_norm": 1.3036562204360962, |
| "learning_rate": 4.027375886524823e-05, |
| "loss": 0.0039, |
| "step": 48000 |
| }, |
| { |
| "epoch": 0.9827760891590679, |
| "grad_norm": 0.019078070297837257, |
| "learning_rate": 4.017244174265451e-05, |
| "loss": 0.0046, |
| "step": 48500 |
| }, |
| { |
| "epoch": 0.9929078014184397, |
| "grad_norm": 0.0359899140894413, |
| "learning_rate": 4.007112462006079e-05, |
| "loss": 0.0042, |
| "step": 49000 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.9990064868474964, |
| "eval_f1": 0.9990066101460954, |
| "eval_loss": 0.0038088823202997446, |
| "eval_precision": 0.9990069135746219, |
| "eval_recall": 0.9990064868474964, |
| "eval_runtime": 377.94, |
| "eval_samples_per_second": 185.342, |
| "eval_steps_per_second": 11.584, |
| "step": 49350 |
| }, |
| { |
| "epoch": 1.0030395136778116, |
| "grad_norm": 0.014290682971477509, |
| "learning_rate": 3.996980749746707e-05, |
| "loss": 0.0035, |
| "step": 49500 |
| }, |
| { |
| "epoch": 1.0131712259371835, |
| "grad_norm": 0.06438656151294708, |
| "learning_rate": 3.9868490374873356e-05, |
| "loss": 0.0048, |
| "step": 50000 |
| }, |
| { |
| "epoch": 1.0233029381965553, |
| "grad_norm": 0.006202331744134426, |
| "learning_rate": 3.9767173252279636e-05, |
| "loss": 0.0039, |
| "step": 50500 |
| }, |
| { |
| "epoch": 1.033434650455927, |
| "grad_norm": 0.0030405428260564804, |
| "learning_rate": 3.966585612968592e-05, |
| "loss": 0.003, |
| "step": 51000 |
| }, |
| { |
| "epoch": 1.043566362715299, |
| "grad_norm": 0.12991833686828613, |
| "learning_rate": 3.95645390070922e-05, |
| "loss": 0.0025, |
| "step": 51500 |
| }, |
| { |
| "epoch": 1.0536980749746707, |
| "grad_norm": 0.0097044100984931, |
| "learning_rate": 3.946322188449848e-05, |
| "loss": 0.0037, |
| "step": 52000 |
| }, |
| { |
| "epoch": 1.0638297872340425, |
| "grad_norm": 0.012757817283272743, |
| "learning_rate": 3.9361904761904765e-05, |
| "loss": 0.0033, |
| "step": 52500 |
| }, |
| { |
| "epoch": 1.0739614994934144, |
| "grad_norm": 0.0028119811322540045, |
| "learning_rate": 3.9260587639311045e-05, |
| "loss": 0.0036, |
| "step": 53000 |
| }, |
| { |
| "epoch": 1.0840932117527862, |
| "grad_norm": 0.004565235693007708, |
| "learning_rate": 3.9159270516717326e-05, |
| "loss": 0.0037, |
| "step": 53500 |
| }, |
| { |
| "epoch": 1.094224924012158, |
| "grad_norm": 0.00749659538269043, |
| "learning_rate": 3.9057953394123606e-05, |
| "loss": 0.0028, |
| "step": 54000 |
| }, |
| { |
| "epoch": 1.1043566362715298, |
| "grad_norm": 0.005823603365570307, |
| "learning_rate": 3.895663627152989e-05, |
| "loss": 0.0032, |
| "step": 54500 |
| }, |
| { |
| "epoch": 1.1144883485309016, |
| "grad_norm": 1.319741129875183, |
| "learning_rate": 3.885531914893617e-05, |
| "loss": 0.0022, |
| "step": 55000 |
| }, |
| { |
| "epoch": 1.1246200607902737, |
| "grad_norm": 0.0010931927245110273, |
| "learning_rate": 3.8754002026342454e-05, |
| "loss": 0.0044, |
| "step": 55500 |
| }, |
| { |
| "epoch": 1.1347517730496455, |
| "grad_norm": 2.641359567642212, |
| "learning_rate": 3.8652684903748735e-05, |
| "loss": 0.0031, |
| "step": 56000 |
| }, |
| { |
| "epoch": 1.1448834853090173, |
| "grad_norm": 0.05208117142319679, |
| "learning_rate": 3.8551367781155015e-05, |
| "loss": 0.005, |
| "step": 56500 |
| }, |
| { |
| "epoch": 1.155015197568389, |
| "grad_norm": 0.055079296231269836, |
| "learning_rate": 3.8450050658561296e-05, |
| "loss": 0.0037, |
| "step": 57000 |
| }, |
| { |
| "epoch": 1.165146909827761, |
| "grad_norm": 0.016849618405103683, |
| "learning_rate": 3.8348733535967576e-05, |
| "loss": 0.0028, |
| "step": 57500 |
| }, |
| { |
| "epoch": 1.1752786220871327, |
| "grad_norm": 0.009639640338718891, |
| "learning_rate": 3.824741641337386e-05, |
| "loss": 0.0033, |
| "step": 58000 |
| }, |
| { |
| "epoch": 1.1854103343465046, |
| "grad_norm": 0.003612485248595476, |
| "learning_rate": 3.8146099290780144e-05, |
| "loss": 0.0034, |
| "step": 58500 |
| }, |
| { |
| "epoch": 1.1955420466058764, |
| "grad_norm": 0.0050128428265452385, |
| "learning_rate": 3.8044782168186424e-05, |
| "loss": 0.004, |
| "step": 59000 |
| }, |
| { |
| "epoch": 1.2056737588652482, |
| "grad_norm": 0.04756532609462738, |
| "learning_rate": 3.7943465045592705e-05, |
| "loss": 0.0033, |
| "step": 59500 |
| }, |
| { |
| "epoch": 1.21580547112462, |
| "grad_norm": 0.06244517117738724, |
| "learning_rate": 3.7842147922998985e-05, |
| "loss": 0.0038, |
| "step": 60000 |
| }, |
| { |
| "epoch": 1.2259371833839918, |
| "grad_norm": 0.017557090148329735, |
| "learning_rate": 3.7740830800405266e-05, |
| "loss": 0.0026, |
| "step": 60500 |
| }, |
| { |
| "epoch": 1.2360688956433636, |
| "grad_norm": 0.013284939341247082, |
| "learning_rate": 3.763951367781155e-05, |
| "loss": 0.0043, |
| "step": 61000 |
| }, |
| { |
| "epoch": 1.2462006079027357, |
| "grad_norm": 0.0017136982642114162, |
| "learning_rate": 3.7538196555217833e-05, |
| "loss": 0.0039, |
| "step": 61500 |
| }, |
| { |
| "epoch": 1.2563323201621075, |
| "grad_norm": 0.009458661079406738, |
| "learning_rate": 3.7436879432624114e-05, |
| "loss": 0.0027, |
| "step": 62000 |
| }, |
| { |
| "epoch": 1.2664640324214793, |
| "grad_norm": 0.0020438162609934807, |
| "learning_rate": 3.7335562310030394e-05, |
| "loss": 0.0025, |
| "step": 62500 |
| }, |
| { |
| "epoch": 1.2765957446808511, |
| "grad_norm": 0.0019806961063295603, |
| "learning_rate": 3.7234245187436675e-05, |
| "loss": 0.0033, |
| "step": 63000 |
| }, |
| { |
| "epoch": 1.286727456940223, |
| "grad_norm": 0.0010404183994978666, |
| "learning_rate": 3.713292806484296e-05, |
| "loss": 0.0032, |
| "step": 63500 |
| }, |
| { |
| "epoch": 1.2968591691995948, |
| "grad_norm": 0.0007222663261927664, |
| "learning_rate": 3.703161094224924e-05, |
| "loss": 0.0033, |
| "step": 64000 |
| }, |
| { |
| "epoch": 1.3069908814589666, |
| "grad_norm": 0.008753558620810509, |
| "learning_rate": 3.693029381965552e-05, |
| "loss": 0.0031, |
| "step": 64500 |
| }, |
| { |
| "epoch": 1.3171225937183384, |
| "grad_norm": 0.2641207277774811, |
| "learning_rate": 3.6828976697061803e-05, |
| "loss": 0.0036, |
| "step": 65000 |
| }, |
| { |
| "epoch": 1.3272543059777102, |
| "grad_norm": 0.01977156661450863, |
| "learning_rate": 3.672765957446809e-05, |
| "loss": 0.0028, |
| "step": 65500 |
| }, |
| { |
| "epoch": 1.337386018237082, |
| "grad_norm": 0.004643771797418594, |
| "learning_rate": 3.662634245187437e-05, |
| "loss": 0.0029, |
| "step": 66000 |
| }, |
| { |
| "epoch": 1.3475177304964538, |
| "grad_norm": 0.00434250058606267, |
| "learning_rate": 3.652502532928065e-05, |
| "loss": 0.0031, |
| "step": 66500 |
| }, |
| { |
| "epoch": 1.3576494427558257, |
| "grad_norm": 0.10578258335590363, |
| "learning_rate": 3.642370820668693e-05, |
| "loss": 0.0031, |
| "step": 67000 |
| }, |
| { |
| "epoch": 1.3677811550151975, |
| "grad_norm": 0.0011634805705398321, |
| "learning_rate": 3.632239108409321e-05, |
| "loss": 0.0022, |
| "step": 67500 |
| }, |
| { |
| "epoch": 1.3779128672745693, |
| "grad_norm": 0.0011249127564951777, |
| "learning_rate": 3.62210739614995e-05, |
| "loss": 0.0032, |
| "step": 68000 |
| }, |
| { |
| "epoch": 1.3880445795339411, |
| "grad_norm": 0.009079035371541977, |
| "learning_rate": 3.611975683890578e-05, |
| "loss": 0.0032, |
| "step": 68500 |
| }, |
| { |
| "epoch": 1.3981762917933132, |
| "grad_norm": 0.01014864444732666, |
| "learning_rate": 3.601843971631206e-05, |
| "loss": 0.0039, |
| "step": 69000 |
| }, |
| { |
| "epoch": 1.408308004052685, |
| "grad_norm": 0.005819142330437899, |
| "learning_rate": 3.591712259371834e-05, |
| "loss": 0.0028, |
| "step": 69500 |
| }, |
| { |
| "epoch": 1.4184397163120568, |
| "grad_norm": 0.011299582198262215, |
| "learning_rate": 3.581580547112462e-05, |
| "loss": 0.0034, |
| "step": 70000 |
| }, |
| { |
| "epoch": 1.4285714285714286, |
| "grad_norm": 0.0007169672753661871, |
| "learning_rate": 3.571448834853091e-05, |
| "loss": 0.0027, |
| "step": 70500 |
| }, |
| { |
| "epoch": 1.4387031408308004, |
| "grad_norm": 0.01162696722894907, |
| "learning_rate": 3.561317122593719e-05, |
| "loss": 0.0035, |
| "step": 71000 |
| }, |
| { |
| "epoch": 1.4488348530901722, |
| "grad_norm": 0.03597528859972954, |
| "learning_rate": 3.551185410334347e-05, |
| "loss": 0.0031, |
| "step": 71500 |
| }, |
| { |
| "epoch": 1.458966565349544, |
| "grad_norm": 0.010613600723445415, |
| "learning_rate": 3.541053698074975e-05, |
| "loss": 0.0025, |
| "step": 72000 |
| }, |
| { |
| "epoch": 1.4690982776089159, |
| "grad_norm": 0.013661106117069721, |
| "learning_rate": 3.530921985815603e-05, |
| "loss": 0.0022, |
| "step": 72500 |
| }, |
| { |
| "epoch": 1.4792299898682877, |
| "grad_norm": 0.4801454544067383, |
| "learning_rate": 3.520790273556231e-05, |
| "loss": 0.0032, |
| "step": 73000 |
| }, |
| { |
| "epoch": 1.4893617021276595, |
| "grad_norm": 0.005630165338516235, |
| "learning_rate": 3.51065856129686e-05, |
| "loss": 0.0025, |
| "step": 73500 |
| }, |
| { |
| "epoch": 1.4994934143870315, |
| "grad_norm": 0.018407883122563362, |
| "learning_rate": 3.500526849037488e-05, |
| "loss": 0.0029, |
| "step": 74000 |
| }, |
| { |
| "epoch": 1.5096251266464034, |
| "grad_norm": 0.010126540437340736, |
| "learning_rate": 3.490395136778116e-05, |
| "loss": 0.0027, |
| "step": 74500 |
| }, |
| { |
| "epoch": 1.5197568389057752, |
| "grad_norm": 0.025962965562939644, |
| "learning_rate": 3.480263424518744e-05, |
| "loss": 0.0028, |
| "step": 75000 |
| }, |
| { |
| "epoch": 1.529888551165147, |
| "grad_norm": 0.10553637146949768, |
| "learning_rate": 3.470131712259372e-05, |
| "loss": 0.0038, |
| "step": 75500 |
| }, |
| { |
| "epoch": 1.5400202634245188, |
| "grad_norm": 0.0019956612959504128, |
| "learning_rate": 3.46e-05, |
| "loss": 0.0033, |
| "step": 76000 |
| }, |
| { |
| "epoch": 1.5501519756838906, |
| "grad_norm": 0.0006352249765768647, |
| "learning_rate": 3.449868287740629e-05, |
| "loss": 0.0025, |
| "step": 76500 |
| }, |
| { |
| "epoch": 1.5602836879432624, |
| "grad_norm": 0.0011086476733908057, |
| "learning_rate": 3.439736575481257e-05, |
| "loss": 0.0029, |
| "step": 77000 |
| }, |
| { |
| "epoch": 1.5704154002026343, |
| "grad_norm": 0.00939366314560175, |
| "learning_rate": 3.429604863221885e-05, |
| "loss": 0.0031, |
| "step": 77500 |
| }, |
| { |
| "epoch": 1.580547112462006, |
| "grad_norm": 0.25609418749809265, |
| "learning_rate": 3.419473150962513e-05, |
| "loss": 0.0033, |
| "step": 78000 |
| }, |
| { |
| "epoch": 1.590678824721378, |
| "grad_norm": 0.09414645284414291, |
| "learning_rate": 3.409341438703141e-05, |
| "loss": 0.0029, |
| "step": 78500 |
| }, |
| { |
| "epoch": 1.6008105369807497, |
| "grad_norm": 0.04403573274612427, |
| "learning_rate": 3.399209726443769e-05, |
| "loss": 0.0024, |
| "step": 79000 |
| }, |
| { |
| "epoch": 1.6109422492401215, |
| "grad_norm": 0.0013993962202221155, |
| "learning_rate": 3.389078014184398e-05, |
| "loss": 0.0025, |
| "step": 79500 |
| }, |
| { |
| "epoch": 1.6210739614994933, |
| "grad_norm": 0.0016776573611423373, |
| "learning_rate": 3.378946301925026e-05, |
| "loss": 0.0031, |
| "step": 80000 |
| }, |
| { |
| "epoch": 1.6312056737588652, |
| "grad_norm": 0.016962487250566483, |
| "learning_rate": 3.368814589665654e-05, |
| "loss": 0.0025, |
| "step": 80500 |
| }, |
| { |
| "epoch": 1.641337386018237, |
| "grad_norm": 0.04853259399533272, |
| "learning_rate": 3.358682877406282e-05, |
| "loss": 0.0027, |
| "step": 81000 |
| }, |
| { |
| "epoch": 1.6514690982776088, |
| "grad_norm": 0.00218728045001626, |
| "learning_rate": 3.34855116514691e-05, |
| "loss": 0.0027, |
| "step": 81500 |
| }, |
| { |
| "epoch": 1.6616008105369806, |
| "grad_norm": 0.008054674603044987, |
| "learning_rate": 3.338419452887538e-05, |
| "loss": 0.0027, |
| "step": 82000 |
| }, |
| { |
| "epoch": 1.6717325227963524, |
| "grad_norm": 0.0009625882375985384, |
| "learning_rate": 3.328287740628167e-05, |
| "loss": 0.0021, |
| "step": 82500 |
| }, |
| { |
| "epoch": 1.6818642350557245, |
| "grad_norm": 0.017836738377809525, |
| "learning_rate": 3.318156028368795e-05, |
| "loss": 0.0035, |
| "step": 83000 |
| }, |
| { |
| "epoch": 1.6919959473150963, |
| "grad_norm": 0.5696132183074951, |
| "learning_rate": 3.308024316109423e-05, |
| "loss": 0.004, |
| "step": 83500 |
| }, |
| { |
| "epoch": 1.702127659574468, |
| "grad_norm": 0.00031232935725711286, |
| "learning_rate": 3.297892603850051e-05, |
| "loss": 0.0019, |
| "step": 84000 |
| }, |
| { |
| "epoch": 1.71225937183384, |
| "grad_norm": 0.006237703841179609, |
| "learning_rate": 3.287760891590679e-05, |
| "loss": 0.0026, |
| "step": 84500 |
| }, |
| { |
| "epoch": 1.7223910840932117, |
| "grad_norm": 0.02137162908911705, |
| "learning_rate": 3.2776291793313076e-05, |
| "loss": 0.0024, |
| "step": 85000 |
| }, |
| { |
| "epoch": 1.7325227963525835, |
| "grad_norm": 0.01595192588865757, |
| "learning_rate": 3.267497467071936e-05, |
| "loss": 0.0021, |
| "step": 85500 |
| }, |
| { |
| "epoch": 1.7426545086119554, |
| "grad_norm": 0.0017637086566537619, |
| "learning_rate": 3.257365754812564e-05, |
| "loss": 0.0027, |
| "step": 86000 |
| }, |
| { |
| "epoch": 1.7527862208713274, |
| "grad_norm": 0.0031725901644676924, |
| "learning_rate": 3.247234042553192e-05, |
| "loss": 0.0021, |
| "step": 86500 |
| }, |
| { |
| "epoch": 1.7629179331306992, |
| "grad_norm": 0.015843555331230164, |
| "learning_rate": 3.23710233029382e-05, |
| "loss": 0.0037, |
| "step": 87000 |
| }, |
| { |
| "epoch": 1.773049645390071, |
| "grad_norm": 0.02725142426788807, |
| "learning_rate": 3.226970618034448e-05, |
| "loss": 0.0023, |
| "step": 87500 |
| }, |
| { |
| "epoch": 1.7831813576494429, |
| "grad_norm": 0.015493770129978657, |
| "learning_rate": 3.2168389057750766e-05, |
| "loss": 0.0028, |
| "step": 88000 |
| }, |
| { |
| "epoch": 1.7933130699088147, |
| "grad_norm": 0.0021028113551437855, |
| "learning_rate": 3.2067071935157046e-05, |
| "loss": 0.0022, |
| "step": 88500 |
| }, |
| { |
| "epoch": 1.8034447821681865, |
| "grad_norm": 0.0224838238209486, |
| "learning_rate": 3.196575481256333e-05, |
| "loss": 0.0027, |
| "step": 89000 |
| }, |
| { |
| "epoch": 1.8135764944275583, |
| "grad_norm": 0.0014610164798796177, |
| "learning_rate": 3.186443768996961e-05, |
| "loss": 0.0027, |
| "step": 89500 |
| }, |
| { |
| "epoch": 1.8237082066869301, |
| "grad_norm": 0.0022999641951173544, |
| "learning_rate": 3.176312056737589e-05, |
| "loss": 0.0029, |
| "step": 90000 |
| }, |
| { |
| "epoch": 1.833839918946302, |
| "grad_norm": 0.003701185341924429, |
| "learning_rate": 3.166180344478217e-05, |
| "loss": 0.0031, |
| "step": 90500 |
| }, |
| { |
| "epoch": 1.8439716312056738, |
| "grad_norm": 0.0023422616068273783, |
| "learning_rate": 3.1560486322188455e-05, |
| "loss": 0.003, |
| "step": 91000 |
| }, |
| { |
| "epoch": 1.8541033434650456, |
| "grad_norm": 0.0010673481738194823, |
| "learning_rate": 3.1459169199594736e-05, |
| "loss": 0.0026, |
| "step": 91500 |
| }, |
| { |
| "epoch": 1.8642350557244174, |
| "grad_norm": 2.150301456451416, |
| "learning_rate": 3.1357852077001016e-05, |
| "loss": 0.0019, |
| "step": 92000 |
| }, |
| { |
| "epoch": 1.8743667679837892, |
| "grad_norm": 0.0014611236983910203, |
| "learning_rate": 3.12565349544073e-05, |
| "loss": 0.0023, |
| "step": 92500 |
| }, |
| { |
| "epoch": 1.884498480243161, |
| "grad_norm": 0.004396792501211166, |
| "learning_rate": 3.115521783181358e-05, |
| "loss": 0.0022, |
| "step": 93000 |
| }, |
| { |
| "epoch": 1.8946301925025328, |
| "grad_norm": 0.006988595239818096, |
| "learning_rate": 3.105390070921986e-05, |
| "loss": 0.0026, |
| "step": 93500 |
| }, |
| { |
| "epoch": 1.9047619047619047, |
| "grad_norm": 0.3570442795753479, |
| "learning_rate": 3.0952583586626145e-05, |
| "loss": 0.0023, |
| "step": 94000 |
| }, |
| { |
| "epoch": 1.9148936170212765, |
| "grad_norm": 0.0019152691820636392, |
| "learning_rate": 3.0851266464032425e-05, |
| "loss": 0.0026, |
| "step": 94500 |
| }, |
| { |
| "epoch": 1.9250253292806483, |
| "grad_norm": 0.03153735399246216, |
| "learning_rate": 3.0749949341438706e-05, |
| "loss": 0.0021, |
| "step": 95000 |
| }, |
| { |
| "epoch": 1.93515704154002, |
| "grad_norm": 0.003688658820465207, |
| "learning_rate": 3.0648632218844986e-05, |
| "loss": 0.0016, |
| "step": 95500 |
| }, |
| { |
| "epoch": 1.9452887537993921, |
| "grad_norm": 0.32523173093795776, |
| "learning_rate": 3.054731509625127e-05, |
| "loss": 0.002, |
| "step": 96000 |
| }, |
| { |
| "epoch": 1.955420466058764, |
| "grad_norm": 0.021606747061014175, |
| "learning_rate": 3.0445997973657547e-05, |
| "loss": 0.0049, |
| "step": 96500 |
| }, |
| { |
| "epoch": 1.9655521783181358, |
| "grad_norm": 0.0006940297316759825, |
| "learning_rate": 3.0344680851063834e-05, |
| "loss": 0.0027, |
| "step": 97000 |
| }, |
| { |
| "epoch": 1.9756838905775076, |
| "grad_norm": 1.9993683099746704, |
| "learning_rate": 3.0243363728470115e-05, |
| "loss": 0.0026, |
| "step": 97500 |
| }, |
| { |
| "epoch": 1.9858156028368794, |
| "grad_norm": 0.10824126750230789, |
| "learning_rate": 3.0142046605876395e-05, |
| "loss": 0.0021, |
| "step": 98000 |
| }, |
| { |
| "epoch": 1.9959473150962512, |
| "grad_norm": 0.01038323249667883, |
| "learning_rate": 3.0040729483282676e-05, |
| "loss": 0.0025, |
| "step": 98500 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.9992859494267464, |
| "eval_f1": 0.9992860626946858, |
| "eval_loss": 0.0031463655177503824, |
| "eval_precision": 0.9992865610887268, |
| "eval_recall": 0.9992859494267464, |
| "eval_runtime": 377.8329, |
| "eval_samples_per_second": 185.394, |
| "eval_steps_per_second": 11.587, |
| "step": 98700 |
| }, |
| { |
| "epoch": 2.0060790273556233, |
| "grad_norm": 0.06401953846216202, |
| "learning_rate": 2.9939412360688956e-05, |
| "loss": 0.0028, |
| "step": 99000 |
| }, |
| { |
| "epoch": 2.016210739614995, |
| "grad_norm": 0.808768630027771, |
| "learning_rate": 2.9838095238095237e-05, |
| "loss": 0.0021, |
| "step": 99500 |
| }, |
| { |
| "epoch": 2.026342451874367, |
| "grad_norm": 0.001931357546709478, |
| "learning_rate": 2.9736778115501524e-05, |
| "loss": 0.0023, |
| "step": 100000 |
| }, |
| { |
| "epoch": 2.0364741641337387, |
| "grad_norm": 0.13670825958251953, |
| "learning_rate": 2.9635460992907804e-05, |
| "loss": 0.0019, |
| "step": 100500 |
| }, |
| { |
| "epoch": 2.0466058763931105, |
| "grad_norm": 0.06261293590068817, |
| "learning_rate": 2.9534143870314085e-05, |
| "loss": 0.0017, |
| "step": 101000 |
| }, |
| { |
| "epoch": 2.0567375886524824, |
| "grad_norm": 0.006652528885751963, |
| "learning_rate": 2.9432826747720365e-05, |
| "loss": 0.0018, |
| "step": 101500 |
| }, |
| { |
| "epoch": 2.066869300911854, |
| "grad_norm": 0.012849073857069016, |
| "learning_rate": 2.9331509625126646e-05, |
| "loss": 0.0021, |
| "step": 102000 |
| }, |
| { |
| "epoch": 2.077001013171226, |
| "grad_norm": 0.0033175491262227297, |
| "learning_rate": 2.9230192502532926e-05, |
| "loss": 0.0015, |
| "step": 102500 |
| }, |
| { |
| "epoch": 2.087132725430598, |
| "grad_norm": 0.010253848508000374, |
| "learning_rate": 2.9128875379939213e-05, |
| "loss": 0.0016, |
| "step": 103000 |
| }, |
| { |
| "epoch": 2.0972644376899696, |
| "grad_norm": 0.001028141938149929, |
| "learning_rate": 2.9027558257345494e-05, |
| "loss": 0.0022, |
| "step": 103500 |
| }, |
| { |
| "epoch": 2.1073961499493414, |
| "grad_norm": 0.0003175963065586984, |
| "learning_rate": 2.8926241134751774e-05, |
| "loss": 0.0014, |
| "step": 104000 |
| }, |
| { |
| "epoch": 2.1175278622087133, |
| "grad_norm": 0.004258031025528908, |
| "learning_rate": 2.8824924012158055e-05, |
| "loss": 0.0026, |
| "step": 104500 |
| }, |
| { |
| "epoch": 2.127659574468085, |
| "grad_norm": 0.015609141439199448, |
| "learning_rate": 2.8723606889564335e-05, |
| "loss": 0.0019, |
| "step": 105000 |
| }, |
| { |
| "epoch": 2.137791286727457, |
| "grad_norm": 0.025085508823394775, |
| "learning_rate": 2.8622289766970616e-05, |
| "loss": 0.0017, |
| "step": 105500 |
| }, |
| { |
| "epoch": 2.1479229989868287, |
| "grad_norm": 0.005206093192100525, |
| "learning_rate": 2.8520972644376903e-05, |
| "loss": 0.0016, |
| "step": 106000 |
| }, |
| { |
| "epoch": 2.1580547112462005, |
| "grad_norm": 0.002129113767296076, |
| "learning_rate": 2.8419655521783183e-05, |
| "loss": 0.0021, |
| "step": 106500 |
| }, |
| { |
| "epoch": 2.1681864235055723, |
| "grad_norm": 0.0303476732224226, |
| "learning_rate": 2.8318338399189464e-05, |
| "loss": 0.0023, |
| "step": 107000 |
| }, |
| { |
| "epoch": 2.178318135764944, |
| "grad_norm": 0.00023147836327552795, |
| "learning_rate": 2.8217021276595744e-05, |
| "loss": 0.0014, |
| "step": 107500 |
| }, |
| { |
| "epoch": 2.188449848024316, |
| "grad_norm": 0.01121602300554514, |
| "learning_rate": 2.8115704154002025e-05, |
| "loss": 0.0025, |
| "step": 108000 |
| }, |
| { |
| "epoch": 2.198581560283688, |
| "grad_norm": 0.01344907283782959, |
| "learning_rate": 2.8014387031408305e-05, |
| "loss": 0.002, |
| "step": 108500 |
| }, |
| { |
| "epoch": 2.2087132725430596, |
| "grad_norm": 0.0011041724355891347, |
| "learning_rate": 2.7913069908814593e-05, |
| "loss": 0.0027, |
| "step": 109000 |
| }, |
| { |
| "epoch": 2.2188449848024314, |
| "grad_norm": 0.02955365553498268, |
| "learning_rate": 2.7811752786220873e-05, |
| "loss": 0.0022, |
| "step": 109500 |
| }, |
| { |
| "epoch": 2.2289766970618032, |
| "grad_norm": 0.07166969031095505, |
| "learning_rate": 2.7710435663627154e-05, |
| "loss": 0.0017, |
| "step": 110000 |
| }, |
| { |
| "epoch": 2.239108409321175, |
| "grad_norm": 0.011252596974372864, |
| "learning_rate": 2.7609118541033434e-05, |
| "loss": 0.0014, |
| "step": 110500 |
| }, |
| { |
| "epoch": 2.2492401215805473, |
| "grad_norm": 0.045478031039237976, |
| "learning_rate": 2.7507801418439714e-05, |
| "loss": 0.0015, |
| "step": 111000 |
| }, |
| { |
| "epoch": 2.259371833839919, |
| "grad_norm": 0.025144068524241447, |
| "learning_rate": 2.7406484295845998e-05, |
| "loss": 0.0017, |
| "step": 111500 |
| }, |
| { |
| "epoch": 2.269503546099291, |
| "grad_norm": 0.009457019157707691, |
| "learning_rate": 2.7305167173252282e-05, |
| "loss": 0.0015, |
| "step": 112000 |
| }, |
| { |
| "epoch": 2.2796352583586628, |
| "grad_norm": 0.003307552542537451, |
| "learning_rate": 2.7203850050658563e-05, |
| "loss": 0.0018, |
| "step": 112500 |
| }, |
| { |
| "epoch": 2.2897669706180346, |
| "grad_norm": 0.0021774822380393744, |
| "learning_rate": 2.7102532928064843e-05, |
| "loss": 0.0009, |
| "step": 113000 |
| }, |
| { |
| "epoch": 2.2998986828774064, |
| "grad_norm": 0.03337857872247696, |
| "learning_rate": 2.7001215805471124e-05, |
| "loss": 0.0012, |
| "step": 113500 |
| }, |
| { |
| "epoch": 2.310030395136778, |
| "grad_norm": 0.002895305398851633, |
| "learning_rate": 2.6899898682877407e-05, |
| "loss": 0.0015, |
| "step": 114000 |
| }, |
| { |
| "epoch": 2.32016210739615, |
| "grad_norm": 0.00023546746524516493, |
| "learning_rate": 2.6798581560283688e-05, |
| "loss": 0.0013, |
| "step": 114500 |
| }, |
| { |
| "epoch": 2.330293819655522, |
| "grad_norm": 0.001964944414794445, |
| "learning_rate": 2.669726443768997e-05, |
| "loss": 0.002, |
| "step": 115000 |
| }, |
| { |
| "epoch": 2.3404255319148937, |
| "grad_norm": 0.00665094843134284, |
| "learning_rate": 2.6595947315096252e-05, |
| "loss": 0.0022, |
| "step": 115500 |
| }, |
| { |
| "epoch": 2.3505572441742655, |
| "grad_norm": 0.015557374805212021, |
| "learning_rate": 2.6494630192502533e-05, |
| "loss": 0.002, |
| "step": 116000 |
| }, |
| { |
| "epoch": 2.3606889564336373, |
| "grad_norm": 0.00035607043537311256, |
| "learning_rate": 2.6393313069908816e-05, |
| "loss": 0.0014, |
| "step": 116500 |
| }, |
| { |
| "epoch": 2.370820668693009, |
| "grad_norm": 2.188824415206909, |
| "learning_rate": 2.6291995947315097e-05, |
| "loss": 0.0012, |
| "step": 117000 |
| }, |
| { |
| "epoch": 2.380952380952381, |
| "grad_norm": 0.005363579839468002, |
| "learning_rate": 2.619067882472138e-05, |
| "loss": 0.0018, |
| "step": 117500 |
| }, |
| { |
| "epoch": 2.3910840932117527, |
| "grad_norm": 0.014328660443425179, |
| "learning_rate": 2.608936170212766e-05, |
| "loss": 0.0017, |
| "step": 118000 |
| }, |
| { |
| "epoch": 2.4012158054711246, |
| "grad_norm": 0.9693813920021057, |
| "learning_rate": 2.598804457953394e-05, |
| "loss": 0.0019, |
| "step": 118500 |
| }, |
| { |
| "epoch": 2.4113475177304964, |
| "grad_norm": 0.002820476656779647, |
| "learning_rate": 2.5886727456940226e-05, |
| "loss": 0.0019, |
| "step": 119000 |
| }, |
| { |
| "epoch": 2.421479229989868, |
| "grad_norm": 0.000413126457715407, |
| "learning_rate": 2.5785410334346506e-05, |
| "loss": 0.0011, |
| "step": 119500 |
| }, |
| { |
| "epoch": 2.43161094224924, |
| "grad_norm": 0.0005565093597397208, |
| "learning_rate": 2.5684093211752786e-05, |
| "loss": 0.0014, |
| "step": 120000 |
| }, |
| { |
| "epoch": 2.441742654508612, |
| "grad_norm": 0.1878264844417572, |
| "learning_rate": 2.558277608915907e-05, |
| "loss": 0.0013, |
| "step": 120500 |
| }, |
| { |
| "epoch": 2.4518743667679836, |
| "grad_norm": 0.0014664519112557173, |
| "learning_rate": 2.5481458966565354e-05, |
| "loss": 0.0014, |
| "step": 121000 |
| }, |
| { |
| "epoch": 2.4620060790273555, |
| "grad_norm": 0.002391642890870571, |
| "learning_rate": 2.5380141843971635e-05, |
| "loss": 0.0022, |
| "step": 121500 |
| }, |
| { |
| "epoch": 2.4721377912867273, |
| "grad_norm": 0.0009669638238847256, |
| "learning_rate": 2.5278824721377915e-05, |
| "loss": 0.0028, |
| "step": 122000 |
| }, |
| { |
| "epoch": 2.482269503546099, |
| "grad_norm": 0.001344940159469843, |
| "learning_rate": 2.5177507598784196e-05, |
| "loss": 0.0021, |
| "step": 122500 |
| }, |
| { |
| "epoch": 2.4924012158054714, |
| "grad_norm": 0.048559609800577164, |
| "learning_rate": 2.5076190476190476e-05, |
| "loss": 0.0012, |
| "step": 123000 |
| }, |
| { |
| "epoch": 2.502532928064843, |
| "grad_norm": 0.003990110941231251, |
| "learning_rate": 2.497487335359676e-05, |
| "loss": 0.0017, |
| "step": 123500 |
| }, |
| { |
| "epoch": 2.512664640324215, |
| "grad_norm": 0.030355116352438927, |
| "learning_rate": 2.487355623100304e-05, |
| "loss": 0.0016, |
| "step": 124000 |
| }, |
| { |
| "epoch": 2.522796352583587, |
| "grad_norm": 1.8318172693252563, |
| "learning_rate": 2.4772239108409324e-05, |
| "loss": 0.0018, |
| "step": 124500 |
| }, |
| { |
| "epoch": 2.5329280648429586, |
| "grad_norm": 0.019709262996912003, |
| "learning_rate": 2.4670921985815605e-05, |
| "loss": 0.0017, |
| "step": 125000 |
| }, |
| { |
| "epoch": 2.5430597771023304, |
| "grad_norm": 0.0011386788683012128, |
| "learning_rate": 2.4569604863221885e-05, |
| "loss": 0.0011, |
| "step": 125500 |
| }, |
| { |
| "epoch": 2.5531914893617023, |
| "grad_norm": 0.0009728266159072518, |
| "learning_rate": 2.446828774062817e-05, |
| "loss": 0.0014, |
| "step": 126000 |
| }, |
| { |
| "epoch": 2.563323201621074, |
| "grad_norm": 0.001098209759220481, |
| "learning_rate": 2.436697061803445e-05, |
| "loss": 0.0012, |
| "step": 126500 |
| }, |
| { |
| "epoch": 2.573454913880446, |
| "grad_norm": 0.0009475924889557064, |
| "learning_rate": 2.426565349544073e-05, |
| "loss": 0.001, |
| "step": 127000 |
| }, |
| { |
| "epoch": 2.5835866261398177, |
| "grad_norm": 0.0004112945171073079, |
| "learning_rate": 2.4164336372847014e-05, |
| "loss": 0.0007, |
| "step": 127500 |
| }, |
| { |
| "epoch": 2.5937183383991895, |
| "grad_norm": 0.0023858449421823025, |
| "learning_rate": 2.4063019250253294e-05, |
| "loss": 0.0021, |
| "step": 128000 |
| }, |
| { |
| "epoch": 2.6038500506585613, |
| "grad_norm": 0.018019314855337143, |
| "learning_rate": 2.3961702127659575e-05, |
| "loss": 0.0013, |
| "step": 128500 |
| }, |
| { |
| "epoch": 2.613981762917933, |
| "grad_norm": 0.0006694953772239387, |
| "learning_rate": 2.386038500506586e-05, |
| "loss": 0.002, |
| "step": 129000 |
| }, |
| { |
| "epoch": 2.624113475177305, |
| "grad_norm": 0.0005967771867290139, |
| "learning_rate": 2.375906788247214e-05, |
| "loss": 0.0007, |
| "step": 129500 |
| }, |
| { |
| "epoch": 2.634245187436677, |
| "grad_norm": 0.001148981973528862, |
| "learning_rate": 2.365775075987842e-05, |
| "loss": 0.0018, |
| "step": 130000 |
| }, |
| { |
| "epoch": 2.6443768996960486, |
| "grad_norm": 0.005810345523059368, |
| "learning_rate": 2.3556433637284703e-05, |
| "loss": 0.0014, |
| "step": 130500 |
| }, |
| { |
| "epoch": 2.6545086119554204, |
| "grad_norm": 0.012930807657539845, |
| "learning_rate": 2.3455116514690984e-05, |
| "loss": 0.0012, |
| "step": 131000 |
| }, |
| { |
| "epoch": 2.6646403242147922, |
| "grad_norm": 0.0818137601017952, |
| "learning_rate": 2.3353799392097264e-05, |
| "loss": 0.0019, |
| "step": 131500 |
| }, |
| { |
| "epoch": 2.674772036474164, |
| "grad_norm": 0.010759086348116398, |
| "learning_rate": 2.3252482269503548e-05, |
| "loss": 0.0009, |
| "step": 132000 |
| }, |
| { |
| "epoch": 2.684903748733536, |
| "grad_norm": 0.05643817409873009, |
| "learning_rate": 2.315116514690983e-05, |
| "loss": 0.0012, |
| "step": 132500 |
| }, |
| { |
| "epoch": 2.6950354609929077, |
| "grad_norm": 0.004631399642676115, |
| "learning_rate": 2.3049848024316112e-05, |
| "loss": 0.0008, |
| "step": 133000 |
| }, |
| { |
| "epoch": 2.7051671732522795, |
| "grad_norm": 0.03901492431759834, |
| "learning_rate": 2.2948530901722393e-05, |
| "loss": 0.0019, |
| "step": 133500 |
| }, |
| { |
| "epoch": 2.7152988855116513, |
| "grad_norm": 0.00697283074259758, |
| "learning_rate": 2.2847213779128673e-05, |
| "loss": 0.0013, |
| "step": 134000 |
| }, |
| { |
| "epoch": 2.725430597771023, |
| "grad_norm": 0.008329696953296661, |
| "learning_rate": 2.2745896656534957e-05, |
| "loss": 0.0014, |
| "step": 134500 |
| }, |
| { |
| "epoch": 2.735562310030395, |
| "grad_norm": 0.00027154709096066654, |
| "learning_rate": 2.2644579533941238e-05, |
| "loss": 0.0016, |
| "step": 135000 |
| }, |
| { |
| "epoch": 2.745694022289767, |
| "grad_norm": 0.0021239016205072403, |
| "learning_rate": 2.2543262411347518e-05, |
| "loss": 0.0016, |
| "step": 135500 |
| }, |
| { |
| "epoch": 2.7558257345491386, |
| "grad_norm": 0.05723918229341507, |
| "learning_rate": 2.2441945288753802e-05, |
| "loss": 0.0024, |
| "step": 136000 |
| }, |
| { |
| "epoch": 2.7659574468085104, |
| "grad_norm": 0.0015372316120192409, |
| "learning_rate": 2.2340628166160082e-05, |
| "loss": 0.0017, |
| "step": 136500 |
| }, |
| { |
| "epoch": 2.7760891590678822, |
| "grad_norm": 0.009359728544950485, |
| "learning_rate": 2.2239311043566363e-05, |
| "loss": 0.0016, |
| "step": 137000 |
| }, |
| { |
| "epoch": 2.786220871327254, |
| "grad_norm": 0.000444738136138767, |
| "learning_rate": 2.2137993920972647e-05, |
| "loss": 0.0012, |
| "step": 137500 |
| }, |
| { |
| "epoch": 2.7963525835866263, |
| "grad_norm": 0.0034484388306736946, |
| "learning_rate": 2.2036676798378927e-05, |
| "loss": 0.001, |
| "step": 138000 |
| }, |
| { |
| "epoch": 2.806484295845998, |
| "grad_norm": 0.011139455251395702, |
| "learning_rate": 2.1935359675785208e-05, |
| "loss": 0.0012, |
| "step": 138500 |
| }, |
| { |
| "epoch": 2.81661600810537, |
| "grad_norm": 0.004759063478559256, |
| "learning_rate": 2.183404255319149e-05, |
| "loss": 0.0016, |
| "step": 139000 |
| }, |
| { |
| "epoch": 2.8267477203647418, |
| "grad_norm": 0.0018992675468325615, |
| "learning_rate": 2.1732725430597772e-05, |
| "loss": 0.0016, |
| "step": 139500 |
| }, |
| { |
| "epoch": 2.8368794326241136, |
| "grad_norm": 0.04305073618888855, |
| "learning_rate": 2.1631408308004052e-05, |
| "loss": 0.0007, |
| "step": 140000 |
| }, |
| { |
| "epoch": 2.8470111448834854, |
| "grad_norm": 0.0012538008159026504, |
| "learning_rate": 2.1530091185410336e-05, |
| "loss": 0.0015, |
| "step": 140500 |
| }, |
| { |
| "epoch": 2.857142857142857, |
| "grad_norm": 0.013261191546916962, |
| "learning_rate": 2.1428774062816617e-05, |
| "loss": 0.0012, |
| "step": 141000 |
| }, |
| { |
| "epoch": 2.867274569402229, |
| "grad_norm": 0.0025184724945575, |
| "learning_rate": 2.1327456940222897e-05, |
| "loss": 0.0014, |
| "step": 141500 |
| }, |
| { |
| "epoch": 2.877406281661601, |
| "grad_norm": 0.0010861046612262726, |
| "learning_rate": 2.122613981762918e-05, |
| "loss": 0.001, |
| "step": 142000 |
| }, |
| { |
| "epoch": 2.8875379939209727, |
| "grad_norm": 0.0007479240885004401, |
| "learning_rate": 2.112482269503546e-05, |
| "loss": 0.0011, |
| "step": 142500 |
| }, |
| { |
| "epoch": 2.8976697061803445, |
| "grad_norm": 0.00030417501693591475, |
| "learning_rate": 2.1023505572441742e-05, |
| "loss": 0.0013, |
| "step": 143000 |
| }, |
| { |
| "epoch": 2.9078014184397163, |
| "grad_norm": 0.0005445684073492885, |
| "learning_rate": 2.0922188449848026e-05, |
| "loss": 0.0011, |
| "step": 143500 |
| }, |
| { |
| "epoch": 2.917933130699088, |
| "grad_norm": 14.009148597717285, |
| "learning_rate": 2.0820871327254306e-05, |
| "loss": 0.0009, |
| "step": 144000 |
| }, |
| { |
| "epoch": 2.92806484295846, |
| "grad_norm": 0.0036419560201466084, |
| "learning_rate": 2.0719554204660587e-05, |
| "loss": 0.0014, |
| "step": 144500 |
| }, |
| { |
| "epoch": 2.9381965552178317, |
| "grad_norm": 0.00013747498451266438, |
| "learning_rate": 2.061823708206687e-05, |
| "loss": 0.0011, |
| "step": 145000 |
| }, |
| { |
| "epoch": 2.9483282674772036, |
| "grad_norm": 2.2749545574188232, |
| "learning_rate": 2.051691995947315e-05, |
| "loss": 0.0013, |
| "step": 145500 |
| }, |
| { |
| "epoch": 2.9584599797365754, |
| "grad_norm": 0.003953267820179462, |
| "learning_rate": 2.041560283687943e-05, |
| "loss": 0.0013, |
| "step": 146000 |
| }, |
| { |
| "epoch": 2.968591691995947, |
| "grad_norm": 0.00029023364186286926, |
| "learning_rate": 2.0314285714285715e-05, |
| "loss": 0.0011, |
| "step": 146500 |
| }, |
| { |
| "epoch": 2.978723404255319, |
| "grad_norm": 0.00021805072901770473, |
| "learning_rate": 2.0212968591691996e-05, |
| "loss": 0.0009, |
| "step": 147000 |
| }, |
| { |
| "epoch": 2.988855116514691, |
| "grad_norm": 0.0005817350465804338, |
| "learning_rate": 2.0111651469098276e-05, |
| "loss": 0.001, |
| "step": 147500 |
| }, |
| { |
| "epoch": 2.998986828774063, |
| "grad_norm": 0.00011477700900286436, |
| "learning_rate": 2.001033434650456e-05, |
| "loss": 0.001, |
| "step": 148000 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.9995950160927816, |
| "eval_f1": 0.9995950488462076, |
| "eval_loss": 0.0019772385712713003, |
| "eval_precision": 0.9995952100990623, |
| "eval_recall": 0.9995950160927816, |
| "eval_runtime": 378.5667, |
| "eval_samples_per_second": 185.035, |
| "eval_steps_per_second": 11.565, |
| "step": 148050 |
| }, |
| { |
| "epoch": 3.0091185410334345, |
| "grad_norm": 0.00040647145942784846, |
| "learning_rate": 1.990901722391084e-05, |
| "loss": 0.0005, |
| "step": 148500 |
| }, |
| { |
| "epoch": 3.0192502532928063, |
| "grad_norm": 0.0007454357692040503, |
| "learning_rate": 1.980770010131712e-05, |
| "loss": 0.0008, |
| "step": 149000 |
| }, |
| { |
| "epoch": 3.0293819655521785, |
| "grad_norm": 0.0005696163279935718, |
| "learning_rate": 1.9706382978723405e-05, |
| "loss": 0.0013, |
| "step": 149500 |
| }, |
| { |
| "epoch": 3.0395136778115504, |
| "grad_norm": 0.012222293764352798, |
| "learning_rate": 1.9605065856129685e-05, |
| "loss": 0.0008, |
| "step": 150000 |
| }, |
| { |
| "epoch": 3.049645390070922, |
| "grad_norm": 0.00012108933151466772, |
| "learning_rate": 1.950374873353597e-05, |
| "loss": 0.0003, |
| "step": 150500 |
| }, |
| { |
| "epoch": 3.059777102330294, |
| "grad_norm": 0.00036620517494156957, |
| "learning_rate": 1.940243161094225e-05, |
| "loss": 0.0006, |
| "step": 151000 |
| }, |
| { |
| "epoch": 3.069908814589666, |
| "grad_norm": 0.08871813118457794, |
| "learning_rate": 1.930111448834853e-05, |
| "loss": 0.0006, |
| "step": 151500 |
| }, |
| { |
| "epoch": 3.0800405268490376, |
| "grad_norm": 0.0008102179854176939, |
| "learning_rate": 1.9199797365754814e-05, |
| "loss": 0.0011, |
| "step": 152000 |
| }, |
| { |
| "epoch": 3.0901722391084094, |
| "grad_norm": 0.000266701215878129, |
| "learning_rate": 1.9098480243161094e-05, |
| "loss": 0.0005, |
| "step": 152500 |
| }, |
| { |
| "epoch": 3.1003039513677813, |
| "grad_norm": 0.00016724316810723394, |
| "learning_rate": 1.8997163120567378e-05, |
| "loss": 0.0012, |
| "step": 153000 |
| }, |
| { |
| "epoch": 3.110435663627153, |
| "grad_norm": 0.15629072487354279, |
| "learning_rate": 1.889584599797366e-05, |
| "loss": 0.0013, |
| "step": 153500 |
| }, |
| { |
| "epoch": 3.120567375886525, |
| "grad_norm": 0.00016377937572542578, |
| "learning_rate": 1.879452887537994e-05, |
| "loss": 0.0005, |
| "step": 154000 |
| }, |
| { |
| "epoch": 3.1306990881458967, |
| "grad_norm": 0.0007321849116124213, |
| "learning_rate": 1.8693211752786223e-05, |
| "loss": 0.0007, |
| "step": 154500 |
| }, |
| { |
| "epoch": 3.1408308004052685, |
| "grad_norm": 0.0024997428990900517, |
| "learning_rate": 1.8591894630192504e-05, |
| "loss": 0.0014, |
| "step": 155000 |
| }, |
| { |
| "epoch": 3.1509625126646403, |
| "grad_norm": 0.0005427179858088493, |
| "learning_rate": 1.8490577507598787e-05, |
| "loss": 0.0006, |
| "step": 155500 |
| }, |
| { |
| "epoch": 3.161094224924012, |
| "grad_norm": 0.0002773651503957808, |
| "learning_rate": 1.8389260385005068e-05, |
| "loss": 0.0008, |
| "step": 156000 |
| }, |
| { |
| "epoch": 3.171225937183384, |
| "grad_norm": 9.259460784960538e-05, |
| "learning_rate": 1.8287943262411348e-05, |
| "loss": 0.0007, |
| "step": 156500 |
| }, |
| { |
| "epoch": 3.181357649442756, |
| "grad_norm": 0.0008258196176029742, |
| "learning_rate": 1.8186626139817632e-05, |
| "loss": 0.0012, |
| "step": 157000 |
| }, |
| { |
| "epoch": 3.1914893617021276, |
| "grad_norm": 0.02231917716562748, |
| "learning_rate": 1.8085309017223913e-05, |
| "loss": 0.0005, |
| "step": 157500 |
| }, |
| { |
| "epoch": 3.2016210739614994, |
| "grad_norm": 0.00040244663250632584, |
| "learning_rate": 1.7983991894630193e-05, |
| "loss": 0.0004, |
| "step": 158000 |
| }, |
| { |
| "epoch": 3.2117527862208712, |
| "grad_norm": 0.0008584433817304671, |
| "learning_rate": 1.7882674772036477e-05, |
| "loss": 0.0007, |
| "step": 158500 |
| }, |
| { |
| "epoch": 3.221884498480243, |
| "grad_norm": 1.4710029363632202, |
| "learning_rate": 1.7781357649442757e-05, |
| "loss": 0.0007, |
| "step": 159000 |
| }, |
| { |
| "epoch": 3.232016210739615, |
| "grad_norm": 0.0021673429291695356, |
| "learning_rate": 1.7680040526849038e-05, |
| "loss": 0.0013, |
| "step": 159500 |
| }, |
| { |
| "epoch": 3.2421479229989867, |
| "grad_norm": 0.0007125946576707065, |
| "learning_rate": 1.757872340425532e-05, |
| "loss": 0.0003, |
| "step": 160000 |
| }, |
| { |
| "epoch": 3.2522796352583585, |
| "grad_norm": 2.0340616703033447, |
| "learning_rate": 1.7477406281661602e-05, |
| "loss": 0.0007, |
| "step": 160500 |
| }, |
| { |
| "epoch": 3.2624113475177303, |
| "grad_norm": 0.02512693777680397, |
| "learning_rate": 1.7376089159067883e-05, |
| "loss": 0.0013, |
| "step": 161000 |
| }, |
| { |
| "epoch": 3.272543059777102, |
| "grad_norm": 0.0026464995462447405, |
| "learning_rate": 1.7274772036474166e-05, |
| "loss": 0.0005, |
| "step": 161500 |
| }, |
| { |
| "epoch": 3.282674772036474, |
| "grad_norm": 0.002218346344307065, |
| "learning_rate": 1.7173454913880447e-05, |
| "loss": 0.0003, |
| "step": 162000 |
| }, |
| { |
| "epoch": 3.2928064842958458, |
| "grad_norm": 0.004217283334583044, |
| "learning_rate": 1.7072137791286727e-05, |
| "loss": 0.0006, |
| "step": 162500 |
| }, |
| { |
| "epoch": 3.3029381965552176, |
| "grad_norm": 0.09187914431095123, |
| "learning_rate": 1.697082066869301e-05, |
| "loss": 0.0008, |
| "step": 163000 |
| }, |
| { |
| "epoch": 3.31306990881459, |
| "grad_norm": 0.004053326323628426, |
| "learning_rate": 1.6869503546099292e-05, |
| "loss": 0.0007, |
| "step": 163500 |
| }, |
| { |
| "epoch": 3.3232016210739617, |
| "grad_norm": 0.00016366604540962726, |
| "learning_rate": 1.6768186423505572e-05, |
| "loss": 0.0004, |
| "step": 164000 |
| }, |
| { |
| "epoch": 3.3333333333333335, |
| "grad_norm": 0.00019909192633349448, |
| "learning_rate": 1.6666869300911856e-05, |
| "loss": 0.0004, |
| "step": 164500 |
| }, |
| { |
| "epoch": 3.3434650455927053, |
| "grad_norm": 0.0006264941766858101, |
| "learning_rate": 1.6565552178318136e-05, |
| "loss": 0.0006, |
| "step": 165000 |
| }, |
| { |
| "epoch": 3.353596757852077, |
| "grad_norm": 0.0018695942126214504, |
| "learning_rate": 1.646423505572442e-05, |
| "loss": 0.0008, |
| "step": 165500 |
| }, |
| { |
| "epoch": 3.363728470111449, |
| "grad_norm": 0.0007150270394049585, |
| "learning_rate": 1.63629179331307e-05, |
| "loss": 0.0008, |
| "step": 166000 |
| }, |
| { |
| "epoch": 3.3738601823708207, |
| "grad_norm": 7.787420327076688e-05, |
| "learning_rate": 1.626160081053698e-05, |
| "loss": 0.0004, |
| "step": 166500 |
| }, |
| { |
| "epoch": 3.3839918946301926, |
| "grad_norm": 0.00045614209375344217, |
| "learning_rate": 1.6160283687943265e-05, |
| "loss": 0.0006, |
| "step": 167000 |
| }, |
| { |
| "epoch": 3.3941236068895644, |
| "grad_norm": 0.0001107916614273563, |
| "learning_rate": 1.6058966565349546e-05, |
| "loss": 0.0003, |
| "step": 167500 |
| }, |
| { |
| "epoch": 3.404255319148936, |
| "grad_norm": 0.008644777350127697, |
| "learning_rate": 1.5957649442755826e-05, |
| "loss": 0.0007, |
| "step": 168000 |
| }, |
| { |
| "epoch": 3.414387031408308, |
| "grad_norm": 0.00043247168650850654, |
| "learning_rate": 1.585633232016211e-05, |
| "loss": 0.0006, |
| "step": 168500 |
| }, |
| { |
| "epoch": 3.42451874366768, |
| "grad_norm": 0.0008023619302548468, |
| "learning_rate": 1.575501519756839e-05, |
| "loss": 0.0004, |
| "step": 169000 |
| }, |
| { |
| "epoch": 3.4346504559270516, |
| "grad_norm": 0.0007962311501614749, |
| "learning_rate": 1.565369807497467e-05, |
| "loss": 0.0006, |
| "step": 169500 |
| }, |
| { |
| "epoch": 3.4447821681864235, |
| "grad_norm": 0.00028413927066139877, |
| "learning_rate": 1.5552380952380955e-05, |
| "loss": 0.0008, |
| "step": 170000 |
| }, |
| { |
| "epoch": 3.4549138804457953, |
| "grad_norm": 0.00016883590433280915, |
| "learning_rate": 1.5451063829787235e-05, |
| "loss": 0.0006, |
| "step": 170500 |
| }, |
| { |
| "epoch": 3.465045592705167, |
| "grad_norm": 0.014377252198755741, |
| "learning_rate": 1.5349746707193516e-05, |
| "loss": 0.0006, |
| "step": 171000 |
| }, |
| { |
| "epoch": 3.475177304964539, |
| "grad_norm": 0.010873903520405293, |
| "learning_rate": 1.52484295845998e-05, |
| "loss": 0.0007, |
| "step": 171500 |
| }, |
| { |
| "epoch": 3.4853090172239107, |
| "grad_norm": 0.001349785947240889, |
| "learning_rate": 1.514711246200608e-05, |
| "loss": 0.0008, |
| "step": 172000 |
| }, |
| { |
| "epoch": 3.4954407294832825, |
| "grad_norm": 6.550106627400964e-05, |
| "learning_rate": 1.504579533941236e-05, |
| "loss": 0.0004, |
| "step": 172500 |
| }, |
| { |
| "epoch": 3.5055724417426544, |
| "grad_norm": 0.004185553174465895, |
| "learning_rate": 1.4944478216818644e-05, |
| "loss": 0.0005, |
| "step": 173000 |
| }, |
| { |
| "epoch": 3.515704154002026, |
| "grad_norm": 0.0002081769343931228, |
| "learning_rate": 1.4843161094224925e-05, |
| "loss": 0.0005, |
| "step": 173500 |
| }, |
| { |
| "epoch": 3.5258358662613984, |
| "grad_norm": 0.0002064239961327985, |
| "learning_rate": 1.4741843971631205e-05, |
| "loss": 0.001, |
| "step": 174000 |
| }, |
| { |
| "epoch": 3.5359675785207703, |
| "grad_norm": 0.006560925859957933, |
| "learning_rate": 1.4640526849037489e-05, |
| "loss": 0.0006, |
| "step": 174500 |
| }, |
| { |
| "epoch": 3.546099290780142, |
| "grad_norm": 0.001367397839203477, |
| "learning_rate": 1.453920972644377e-05, |
| "loss": 0.0006, |
| "step": 175000 |
| }, |
| { |
| "epoch": 3.556231003039514, |
| "grad_norm": 0.0019000001484528184, |
| "learning_rate": 1.443789260385005e-05, |
| "loss": 0.0006, |
| "step": 175500 |
| }, |
| { |
| "epoch": 3.5663627152988857, |
| "grad_norm": 0.000535793777089566, |
| "learning_rate": 1.4336575481256334e-05, |
| "loss": 0.0009, |
| "step": 176000 |
| }, |
| { |
| "epoch": 3.5764944275582575, |
| "grad_norm": 0.00025122836814261973, |
| "learning_rate": 1.4235258358662614e-05, |
| "loss": 0.0004, |
| "step": 176500 |
| }, |
| { |
| "epoch": 3.5866261398176293, |
| "grad_norm": 0.00024198205210268497, |
| "learning_rate": 1.4133941236068896e-05, |
| "loss": 0.0004, |
| "step": 177000 |
| }, |
| { |
| "epoch": 3.596757852077001, |
| "grad_norm": 0.00032863879459910095, |
| "learning_rate": 1.4032624113475179e-05, |
| "loss": 0.0008, |
| "step": 177500 |
| }, |
| { |
| "epoch": 3.606889564336373, |
| "grad_norm": 0.0001286083715967834, |
| "learning_rate": 1.3931306990881459e-05, |
| "loss": 0.0003, |
| "step": 178000 |
| }, |
| { |
| "epoch": 3.617021276595745, |
| "grad_norm": 7.639949035365134e-05, |
| "learning_rate": 1.3829989868287741e-05, |
| "loss": 0.0004, |
| "step": 178500 |
| }, |
| { |
| "epoch": 3.6271529888551166, |
| "grad_norm": 0.002776580862700939, |
| "learning_rate": 1.3728672745694023e-05, |
| "loss": 0.0009, |
| "step": 179000 |
| }, |
| { |
| "epoch": 3.6372847011144884, |
| "grad_norm": 0.00013075117021799088, |
| "learning_rate": 1.3627355623100305e-05, |
| "loss": 0.0004, |
| "step": 179500 |
| }, |
| { |
| "epoch": 3.6474164133738602, |
| "grad_norm": 0.0912652462720871, |
| "learning_rate": 1.3526038500506586e-05, |
| "loss": 0.0003, |
| "step": 180000 |
| }, |
| { |
| "epoch": 3.657548125633232, |
| "grad_norm": 0.0013257339596748352, |
| "learning_rate": 1.3424721377912868e-05, |
| "loss": 0.0009, |
| "step": 180500 |
| }, |
| { |
| "epoch": 3.667679837892604, |
| "grad_norm": 4.812105544260703e-05, |
| "learning_rate": 1.332340425531915e-05, |
| "loss": 0.0005, |
| "step": 181000 |
| }, |
| { |
| "epoch": 3.6778115501519757, |
| "grad_norm": 0.0011839779326692224, |
| "learning_rate": 1.322208713272543e-05, |
| "loss": 0.0009, |
| "step": 181500 |
| }, |
| { |
| "epoch": 3.6879432624113475, |
| "grad_norm": 0.0005408598226495087, |
| "learning_rate": 1.3120770010131715e-05, |
| "loss": 0.0006, |
| "step": 182000 |
| }, |
| { |
| "epoch": 3.6980749746707193, |
| "grad_norm": 9.703055548015982e-05, |
| "learning_rate": 1.3019452887537995e-05, |
| "loss": 0.0005, |
| "step": 182500 |
| }, |
| { |
| "epoch": 3.708206686930091, |
| "grad_norm": 0.0002428248117212206, |
| "learning_rate": 1.2918135764944275e-05, |
| "loss": 0.0004, |
| "step": 183000 |
| }, |
| { |
| "epoch": 3.718338399189463, |
| "grad_norm": 0.00048470127512700856, |
| "learning_rate": 1.281681864235056e-05, |
| "loss": 0.0007, |
| "step": 183500 |
| }, |
| { |
| "epoch": 3.728470111448835, |
| "grad_norm": 0.00018880152492783964, |
| "learning_rate": 1.271550151975684e-05, |
| "loss": 0.0004, |
| "step": 184000 |
| }, |
| { |
| "epoch": 3.7386018237082066, |
| "grad_norm": 0.0866980105638504, |
| "learning_rate": 1.261418439716312e-05, |
| "loss": 0.001, |
| "step": 184500 |
| }, |
| { |
| "epoch": 3.7487335359675784, |
| "grad_norm": 0.0004920652718283236, |
| "learning_rate": 1.2512867274569404e-05, |
| "loss": 0.0004, |
| "step": 185000 |
| }, |
| { |
| "epoch": 3.7588652482269502, |
| "grad_norm": 0.0006933720433153212, |
| "learning_rate": 1.2411550151975685e-05, |
| "loss": 0.0007, |
| "step": 185500 |
| }, |
| { |
| "epoch": 3.768996960486322, |
| "grad_norm": 9.502648754278198e-05, |
| "learning_rate": 1.2310233029381967e-05, |
| "loss": 0.0002, |
| "step": 186000 |
| }, |
| { |
| "epoch": 3.779128672745694, |
| "grad_norm": 0.14055226743221283, |
| "learning_rate": 1.2208915906788247e-05, |
| "loss": 0.0009, |
| "step": 186500 |
| }, |
| { |
| "epoch": 3.7892603850050657, |
| "grad_norm": 0.0009207709226757288, |
| "learning_rate": 1.210759878419453e-05, |
| "loss": 0.0007, |
| "step": 187000 |
| }, |
| { |
| "epoch": 3.7993920972644375, |
| "grad_norm": 0.0017948386957868934, |
| "learning_rate": 1.2006281661600811e-05, |
| "loss": 0.0006, |
| "step": 187500 |
| }, |
| { |
| "epoch": 3.8095238095238093, |
| "grad_norm": 0.0010630637407302856, |
| "learning_rate": 1.1904964539007092e-05, |
| "loss": 0.0007, |
| "step": 188000 |
| }, |
| { |
| "epoch": 3.819655521783181, |
| "grad_norm": 0.001550987595692277, |
| "learning_rate": 1.1803647416413374e-05, |
| "loss": 0.0004, |
| "step": 188500 |
| }, |
| { |
| "epoch": 3.829787234042553, |
| "grad_norm": 0.00022620504023507237, |
| "learning_rate": 1.1702330293819656e-05, |
| "loss": 0.001, |
| "step": 189000 |
| }, |
| { |
| "epoch": 3.8399189463019248, |
| "grad_norm": 0.005701087880879641, |
| "learning_rate": 1.1601013171225937e-05, |
| "loss": 0.0005, |
| "step": 189500 |
| }, |
| { |
| "epoch": 3.850050658561297, |
| "grad_norm": 0.002242797054350376, |
| "learning_rate": 1.1499696048632219e-05, |
| "loss": 0.0005, |
| "step": 190000 |
| }, |
| { |
| "epoch": 3.860182370820669, |
| "grad_norm": 0.000945412612054497, |
| "learning_rate": 1.1398378926038501e-05, |
| "loss": 0.0004, |
| "step": 190500 |
| }, |
| { |
| "epoch": 3.8703140830800407, |
| "grad_norm": 0.00039639745955355465, |
| "learning_rate": 1.1297061803444783e-05, |
| "loss": 0.0007, |
| "step": 191000 |
| }, |
| { |
| "epoch": 3.8804457953394125, |
| "grad_norm": 0.00015946484927553684, |
| "learning_rate": 1.1195744680851064e-05, |
| "loss": 0.0004, |
| "step": 191500 |
| }, |
| { |
| "epoch": 3.8905775075987843, |
| "grad_norm": 0.0012002813164144754, |
| "learning_rate": 1.1094427558257346e-05, |
| "loss": 0.0006, |
| "step": 192000 |
| }, |
| { |
| "epoch": 3.900709219858156, |
| "grad_norm": 0.0018312711035832763, |
| "learning_rate": 1.0993110435663628e-05, |
| "loss": 0.0006, |
| "step": 192500 |
| }, |
| { |
| "epoch": 3.910840932117528, |
| "grad_norm": 0.0029842143412679434, |
| "learning_rate": 1.0891793313069908e-05, |
| "loss": 0.0004, |
| "step": 193000 |
| }, |
| { |
| "epoch": 3.9209726443768997, |
| "grad_norm": 0.00016236377996392548, |
| "learning_rate": 1.079047619047619e-05, |
| "loss": 0.0004, |
| "step": 193500 |
| }, |
| { |
| "epoch": 3.9311043566362716, |
| "grad_norm": 4.384133815765381, |
| "learning_rate": 1.0689159067882473e-05, |
| "loss": 0.0003, |
| "step": 194000 |
| }, |
| { |
| "epoch": 3.9412360688956434, |
| "grad_norm": 0.0002483314019627869, |
| "learning_rate": 1.0587841945288753e-05, |
| "loss": 0.0003, |
| "step": 194500 |
| }, |
| { |
| "epoch": 3.951367781155015, |
| "grad_norm": 4.876391540165059e-05, |
| "learning_rate": 1.0486524822695035e-05, |
| "loss": 0.0002, |
| "step": 195000 |
| }, |
| { |
| "epoch": 3.961499493414387, |
| "grad_norm": 6.163517537061125e-05, |
| "learning_rate": 1.0385207700101318e-05, |
| "loss": 0.0002, |
| "step": 195500 |
| }, |
| { |
| "epoch": 3.971631205673759, |
| "grad_norm": 0.2390281707048416, |
| "learning_rate": 1.02838905775076e-05, |
| "loss": 0.0003, |
| "step": 196000 |
| }, |
| { |
| "epoch": 3.9817629179331306, |
| "grad_norm": 0.0019110542489215732, |
| "learning_rate": 1.0182573454913882e-05, |
| "loss": 0.0015, |
| "step": 196500 |
| }, |
| { |
| "epoch": 3.9918946301925025, |
| "grad_norm": 0.001141904853284359, |
| "learning_rate": 1.0081256332320162e-05, |
| "loss": 0.0003, |
| "step": 197000 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.9997418523632351, |
| "eval_f1": 0.9997418729241718, |
| "eval_loss": 0.0016791160451248288, |
| "eval_precision": 0.9997419693903039, |
| "eval_recall": 0.9997418523632351, |
| "eval_runtime": 379.6081, |
| "eval_samples_per_second": 184.527, |
| "eval_steps_per_second": 11.533, |
| "step": 197400 |
| }, |
| { |
| "epoch": 4.002026342451875, |
| "grad_norm": 0.0005896133952774107, |
| "learning_rate": 9.979939209726444e-06, |
| "loss": 0.0003, |
| "step": 197500 |
| }, |
| { |
| "epoch": 4.0121580547112465, |
| "grad_norm": 0.0067860777489840984, |
| "learning_rate": 9.878622087132727e-06, |
| "loss": 0.0006, |
| "step": 198000 |
| }, |
| { |
| "epoch": 4.022289766970618, |
| "grad_norm": 0.001142342109233141, |
| "learning_rate": 9.777304964539009e-06, |
| "loss": 0.0001, |
| "step": 198500 |
| }, |
| { |
| "epoch": 4.03242147922999, |
| "grad_norm": 0.0005706630763597786, |
| "learning_rate": 9.67598784194529e-06, |
| "loss": 0.0003, |
| "step": 199000 |
| }, |
| { |
| "epoch": 4.042553191489362, |
| "grad_norm": 0.0017065483843907714, |
| "learning_rate": 9.574670719351571e-06, |
| "loss": 0.0005, |
| "step": 199500 |
| }, |
| { |
| "epoch": 4.052684903748734, |
| "grad_norm": 0.0004346190544310957, |
| "learning_rate": 9.473353596757854e-06, |
| "loss": 0.0003, |
| "step": 200000 |
| }, |
| { |
| "epoch": 4.062816616008106, |
| "grad_norm": 0.00011587599874474108, |
| "learning_rate": 9.372036474164134e-06, |
| "loss": 0.0005, |
| "step": 200500 |
| }, |
| { |
| "epoch": 4.072948328267477, |
| "grad_norm": 0.0020452928729355335, |
| "learning_rate": 9.270719351570416e-06, |
| "loss": 0.0006, |
| "step": 201000 |
| }, |
| { |
| "epoch": 4.083080040526849, |
| "grad_norm": 0.0030393574852496386, |
| "learning_rate": 9.169402228976698e-06, |
| "loss": 0.0001, |
| "step": 201500 |
| }, |
| { |
| "epoch": 4.093211752786221, |
| "grad_norm": 0.00011766282113967463, |
| "learning_rate": 9.068085106382979e-06, |
| "loss": 0.0003, |
| "step": 202000 |
| }, |
| { |
| "epoch": 4.103343465045593, |
| "grad_norm": 0.00012966316717211157, |
| "learning_rate": 8.966767983789261e-06, |
| "loss": 0.0002, |
| "step": 202500 |
| }, |
| { |
| "epoch": 4.113475177304965, |
| "grad_norm": 0.0005606951890513301, |
| "learning_rate": 8.865450861195543e-06, |
| "loss": 0.0002, |
| "step": 203000 |
| }, |
| { |
| "epoch": 4.1236068895643365, |
| "grad_norm": 6.66538835503161e-05, |
| "learning_rate": 8.764133738601824e-06, |
| "loss": 0.0004, |
| "step": 203500 |
| }, |
| { |
| "epoch": 4.133738601823708, |
| "grad_norm": 0.00035967957228422165, |
| "learning_rate": 8.662816616008106e-06, |
| "loss": 0.0003, |
| "step": 204000 |
| }, |
| { |
| "epoch": 4.14387031408308, |
| "grad_norm": 3.555602233973332e-05, |
| "learning_rate": 8.561499493414388e-06, |
| "loss": 0.0002, |
| "step": 204500 |
| }, |
| { |
| "epoch": 4.154002026342452, |
| "grad_norm": 7.583157275803387e-05, |
| "learning_rate": 8.460182370820668e-06, |
| "loss": 0.0003, |
| "step": 205000 |
| }, |
| { |
| "epoch": 4.164133738601824, |
| "grad_norm": 0.00014735900913365185, |
| "learning_rate": 8.35886524822695e-06, |
| "loss": 0.0003, |
| "step": 205500 |
| }, |
| { |
| "epoch": 4.174265450861196, |
| "grad_norm": 0.00036512804217636585, |
| "learning_rate": 8.257548125633233e-06, |
| "loss": 0.0002, |
| "step": 206000 |
| }, |
| { |
| "epoch": 4.184397163120567, |
| "grad_norm": 0.0002252118574688211, |
| "learning_rate": 8.156231003039515e-06, |
| "loss": 0.0009, |
| "step": 206500 |
| }, |
| { |
| "epoch": 4.194528875379939, |
| "grad_norm": 0.00020840394427068532, |
| "learning_rate": 8.054913880445795e-06, |
| "loss": 0.0002, |
| "step": 207000 |
| }, |
| { |
| "epoch": 4.204660587639311, |
| "grad_norm": 0.004410985857248306, |
| "learning_rate": 7.953596757852077e-06, |
| "loss": 0.0005, |
| "step": 207500 |
| }, |
| { |
| "epoch": 4.214792299898683, |
| "grad_norm": 0.00022406030620913953, |
| "learning_rate": 7.85227963525836e-06, |
| "loss": 0.0002, |
| "step": 208000 |
| }, |
| { |
| "epoch": 4.224924012158055, |
| "grad_norm": 9.579696779837832e-05, |
| "learning_rate": 7.75096251266464e-06, |
| "loss": 0.0001, |
| "step": 208500 |
| }, |
| { |
| "epoch": 4.2350557244174265, |
| "grad_norm": 0.0025997899938374758, |
| "learning_rate": 7.649645390070922e-06, |
| "loss": 0.0002, |
| "step": 209000 |
| }, |
| { |
| "epoch": 4.245187436676798, |
| "grad_norm": 0.00013335005496628582, |
| "learning_rate": 7.548328267477204e-06, |
| "loss": 0.0001, |
| "step": 209500 |
| }, |
| { |
| "epoch": 4.25531914893617, |
| "grad_norm": 0.0007246573222801089, |
| "learning_rate": 7.447011144883486e-06, |
| "loss": 0.0006, |
| "step": 210000 |
| }, |
| { |
| "epoch": 4.265450861195542, |
| "grad_norm": 0.00013572497118730098, |
| "learning_rate": 7.345694022289768e-06, |
| "loss": 0.0003, |
| "step": 210500 |
| }, |
| { |
| "epoch": 4.275582573454914, |
| "grad_norm": 0.0002135665126843378, |
| "learning_rate": 7.24437689969605e-06, |
| "loss": 0.0001, |
| "step": 211000 |
| }, |
| { |
| "epoch": 4.285714285714286, |
| "grad_norm": 6.381842831615359e-05, |
| "learning_rate": 7.14305977710233e-06, |
| "loss": 0.0001, |
| "step": 211500 |
| }, |
| { |
| "epoch": 4.295845997973657, |
| "grad_norm": 3.025340811291244e-05, |
| "learning_rate": 7.0417426545086126e-06, |
| "loss": 0.0001, |
| "step": 212000 |
| }, |
| { |
| "epoch": 4.305977710233029, |
| "grad_norm": 3.8568006857531145e-05, |
| "learning_rate": 6.940425531914895e-06, |
| "loss": 0.0001, |
| "step": 212500 |
| }, |
| { |
| "epoch": 4.316109422492401, |
| "grad_norm": 3.65682462870609e-05, |
| "learning_rate": 6.839108409321175e-06, |
| "loss": 0.0003, |
| "step": 213000 |
| }, |
| { |
| "epoch": 4.326241134751773, |
| "grad_norm": 2.7566075004870072e-05, |
| "learning_rate": 6.737791286727457e-06, |
| "loss": 0.0001, |
| "step": 213500 |
| }, |
| { |
| "epoch": 4.336372847011145, |
| "grad_norm": 8.40180873638019e-05, |
| "learning_rate": 6.6364741641337395e-06, |
| "loss": 0.0001, |
| "step": 214000 |
| }, |
| { |
| "epoch": 4.3465045592705165, |
| "grad_norm": 0.0006221074727363884, |
| "learning_rate": 6.53515704154002e-06, |
| "loss": 0.0003, |
| "step": 214500 |
| }, |
| { |
| "epoch": 4.356636271529888, |
| "grad_norm": 2.8866035790997557e-05, |
| "learning_rate": 6.433839918946302e-06, |
| "loss": 0.0001, |
| "step": 215000 |
| }, |
| { |
| "epoch": 4.36676798378926, |
| "grad_norm": 5.898380756378174, |
| "learning_rate": 6.332522796352584e-06, |
| "loss": 0.0001, |
| "step": 215500 |
| }, |
| { |
| "epoch": 4.376899696048632, |
| "grad_norm": 2.5054974685190246e-05, |
| "learning_rate": 6.231205673758866e-06, |
| "loss": 0.0002, |
| "step": 216000 |
| }, |
| { |
| "epoch": 4.387031408308004, |
| "grad_norm": 7.587042637169361e-05, |
| "learning_rate": 6.129888551165147e-06, |
| "loss": 0.0004, |
| "step": 216500 |
| }, |
| { |
| "epoch": 4.397163120567376, |
| "grad_norm": 4.567088762996718e-05, |
| "learning_rate": 6.028571428571428e-06, |
| "loss": 0.0002, |
| "step": 217000 |
| }, |
| { |
| "epoch": 4.407294832826747, |
| "grad_norm": 5.717075691791251e-05, |
| "learning_rate": 5.92725430597771e-06, |
| "loss": 0.0005, |
| "step": 217500 |
| }, |
| { |
| "epoch": 4.417426545086119, |
| "grad_norm": 0.0001339384471066296, |
| "learning_rate": 5.825937183383992e-06, |
| "loss": 0.0002, |
| "step": 218000 |
| }, |
| { |
| "epoch": 4.427558257345491, |
| "grad_norm": 0.0016710502095520496, |
| "learning_rate": 5.724620060790274e-06, |
| "loss": 0.0002, |
| "step": 218500 |
| }, |
| { |
| "epoch": 4.437689969604863, |
| "grad_norm": 4.579993037623353e-05, |
| "learning_rate": 5.623302938196556e-06, |
| "loss": 0.0001, |
| "step": 219000 |
| }, |
| { |
| "epoch": 4.447821681864235, |
| "grad_norm": 0.00017921006656251848, |
| "learning_rate": 5.521985815602837e-06, |
| "loss": 0.0002, |
| "step": 219500 |
| }, |
| { |
| "epoch": 4.4579533941236065, |
| "grad_norm": 7.257221295731142e-05, |
| "learning_rate": 5.4206686930091195e-06, |
| "loss": 0.0, |
| "step": 220000 |
| }, |
| { |
| "epoch": 4.468085106382979, |
| "grad_norm": 0.00014980675769038498, |
| "learning_rate": 5.319351570415401e-06, |
| "loss": 0.0003, |
| "step": 220500 |
| }, |
| { |
| "epoch": 4.47821681864235, |
| "grad_norm": 0.00026508086011745036, |
| "learning_rate": 5.218034447821682e-06, |
| "loss": 0.0, |
| "step": 221000 |
| }, |
| { |
| "epoch": 4.488348530901723, |
| "grad_norm": 3.1956707971403375e-05, |
| "learning_rate": 5.116717325227964e-06, |
| "loss": 0.0002, |
| "step": 221500 |
| }, |
| { |
| "epoch": 4.498480243161095, |
| "grad_norm": 0.00025195363559760153, |
| "learning_rate": 5.0154002026342455e-06, |
| "loss": 0.0002, |
| "step": 222000 |
| }, |
| { |
| "epoch": 4.508611955420466, |
| "grad_norm": 2.8796304832212627e-05, |
| "learning_rate": 4.914083080040527e-06, |
| "loss": 0.0003, |
| "step": 222500 |
| }, |
| { |
| "epoch": 4.518743667679838, |
| "grad_norm": 7.115295738913119e-05, |
| "learning_rate": 4.812765957446809e-06, |
| "loss": 0.0002, |
| "step": 223000 |
| }, |
| { |
| "epoch": 4.52887537993921, |
| "grad_norm": 0.00043551792623475194, |
| "learning_rate": 4.71144883485309e-06, |
| "loss": 0.0001, |
| "step": 223500 |
| }, |
| { |
| "epoch": 4.539007092198582, |
| "grad_norm": 0.00012999169121030718, |
| "learning_rate": 4.610131712259372e-06, |
| "loss": 0.0003, |
| "step": 224000 |
| }, |
| { |
| "epoch": 4.549138804457954, |
| "grad_norm": 9.113108535530046e-05, |
| "learning_rate": 4.508814589665654e-06, |
| "loss": 0.0001, |
| "step": 224500 |
| }, |
| { |
| "epoch": 4.5592705167173255, |
| "grad_norm": 3.268069849582389e-05, |
| "learning_rate": 4.407497467071935e-06, |
| "loss": 0.0001, |
| "step": 225000 |
| }, |
| { |
| "epoch": 4.569402228976697, |
| "grad_norm": 3.147554525639862e-05, |
| "learning_rate": 4.306180344478216e-06, |
| "loss": 0.0002, |
| "step": 225500 |
| }, |
| { |
| "epoch": 4.579533941236069, |
| "grad_norm": 3.103091876255348e-05, |
| "learning_rate": 4.2048632218844985e-06, |
| "loss": 0.0003, |
| "step": 226000 |
| }, |
| { |
| "epoch": 4.589665653495441, |
| "grad_norm": 4.341394014772959e-05, |
| "learning_rate": 4.10354609929078e-06, |
| "loss": 0.0002, |
| "step": 226500 |
| }, |
| { |
| "epoch": 4.599797365754813, |
| "grad_norm": 0.006706151645630598, |
| "learning_rate": 4.002228976697062e-06, |
| "loss": 0.0007, |
| "step": 227000 |
| }, |
| { |
| "epoch": 4.609929078014185, |
| "grad_norm": 4.3482647015480325e-05, |
| "learning_rate": 3.900911854103344e-06, |
| "loss": 0.0, |
| "step": 227500 |
| }, |
| { |
| "epoch": 4.620060790273556, |
| "grad_norm": 7.099405775079504e-05, |
| "learning_rate": 3.799594731509625e-06, |
| "loss": 0.0001, |
| "step": 228000 |
| }, |
| { |
| "epoch": 4.630192502532928, |
| "grad_norm": 2.243010931124445e-05, |
| "learning_rate": 3.6982776089159072e-06, |
| "loss": 0.0002, |
| "step": 228500 |
| }, |
| { |
| "epoch": 4.6403242147923, |
| "grad_norm": 9.873649833025411e-05, |
| "learning_rate": 3.5969604863221885e-06, |
| "loss": 0.0001, |
| "step": 229000 |
| }, |
| { |
| "epoch": 4.650455927051672, |
| "grad_norm": 3.25652799801901e-05, |
| "learning_rate": 3.4956433637284703e-06, |
| "loss": 0.0001, |
| "step": 229500 |
| }, |
| { |
| "epoch": 4.660587639311044, |
| "grad_norm": 4.1377668821951374e-05, |
| "learning_rate": 3.3943262411347524e-06, |
| "loss": 0.0002, |
| "step": 230000 |
| }, |
| { |
| "epoch": 4.6707193515704155, |
| "grad_norm": 5.47610288776923e-05, |
| "learning_rate": 3.2930091185410337e-06, |
| "loss": 0.0003, |
| "step": 230500 |
| }, |
| { |
| "epoch": 4.680851063829787, |
| "grad_norm": 6.64242179482244e-05, |
| "learning_rate": 3.191691995947315e-06, |
| "loss": 0.0001, |
| "step": 231000 |
| }, |
| { |
| "epoch": 4.690982776089159, |
| "grad_norm": 7.267168984981254e-05, |
| "learning_rate": 3.0903748733535968e-06, |
| "loss": 0.0, |
| "step": 231500 |
| }, |
| { |
| "epoch": 4.701114488348531, |
| "grad_norm": 2.0616351321223192e-05, |
| "learning_rate": 2.9890577507598785e-06, |
| "loss": 0.0001, |
| "step": 232000 |
| }, |
| { |
| "epoch": 4.711246200607903, |
| "grad_norm": 0.0016979483189061284, |
| "learning_rate": 2.8877406281661602e-06, |
| "loss": 0.0003, |
| "step": 232500 |
| }, |
| { |
| "epoch": 4.721377912867275, |
| "grad_norm": 5.4885382269276306e-05, |
| "learning_rate": 2.7864235055724415e-06, |
| "loss": 0.0001, |
| "step": 233000 |
| }, |
| { |
| "epoch": 4.731509625126646, |
| "grad_norm": 2.725724334595725e-05, |
| "learning_rate": 2.6851063829787233e-06, |
| "loss": 0.0002, |
| "step": 233500 |
| }, |
| { |
| "epoch": 4.741641337386018, |
| "grad_norm": 0.0002646965440362692, |
| "learning_rate": 2.5837892603850054e-06, |
| "loss": 0.0001, |
| "step": 234000 |
| }, |
| { |
| "epoch": 4.75177304964539, |
| "grad_norm": 0.0002549967903178185, |
| "learning_rate": 2.482472137791287e-06, |
| "loss": 0.0001, |
| "step": 234500 |
| }, |
| { |
| "epoch": 4.761904761904762, |
| "grad_norm": 0.001608754275366664, |
| "learning_rate": 2.3811550151975685e-06, |
| "loss": 0.0002, |
| "step": 235000 |
| }, |
| { |
| "epoch": 4.772036474164134, |
| "grad_norm": 0.00016235760995186865, |
| "learning_rate": 2.27983789260385e-06, |
| "loss": 0.0002, |
| "step": 235500 |
| }, |
| { |
| "epoch": 4.7821681864235055, |
| "grad_norm": 2.1720326913055032e-05, |
| "learning_rate": 2.178520770010132e-06, |
| "loss": 0.0002, |
| "step": 236000 |
| }, |
| { |
| "epoch": 4.792299898682877, |
| "grad_norm": 2.442936965962872e-05, |
| "learning_rate": 2.0772036474164132e-06, |
| "loss": 0.0001, |
| "step": 236500 |
| }, |
| { |
| "epoch": 4.802431610942249, |
| "grad_norm": 4.4601965782931075e-05, |
| "learning_rate": 1.975886524822695e-06, |
| "loss": 0.0, |
| "step": 237000 |
| }, |
| { |
| "epoch": 4.812563323201621, |
| "grad_norm": 4.2209729144815356e-05, |
| "learning_rate": 1.874569402228977e-06, |
| "loss": 0.0, |
| "step": 237500 |
| }, |
| { |
| "epoch": 4.822695035460993, |
| "grad_norm": 0.00013748419587500393, |
| "learning_rate": 1.7732522796352587e-06, |
| "loss": 0.0002, |
| "step": 238000 |
| }, |
| { |
| "epoch": 4.832826747720365, |
| "grad_norm": 3.161181302857585e-05, |
| "learning_rate": 1.67193515704154e-06, |
| "loss": 0.0, |
| "step": 238500 |
| }, |
| { |
| "epoch": 4.842958459979736, |
| "grad_norm": 0.0026995555963367224, |
| "learning_rate": 1.5706180344478217e-06, |
| "loss": 0.0, |
| "step": 239000 |
| }, |
| { |
| "epoch": 4.853090172239108, |
| "grad_norm": 0.00017810733697842807, |
| "learning_rate": 1.4693009118541034e-06, |
| "loss": 0.0001, |
| "step": 239500 |
| }, |
| { |
| "epoch": 4.86322188449848, |
| "grad_norm": 0.0014064594870433211, |
| "learning_rate": 1.3679837892603852e-06, |
| "loss": 0.0001, |
| "step": 240000 |
| }, |
| { |
| "epoch": 4.873353596757852, |
| "grad_norm": 1.7768637917470187e-05, |
| "learning_rate": 1.2666666666666667e-06, |
| "loss": 0.0, |
| "step": 240500 |
| }, |
| { |
| "epoch": 4.883485309017224, |
| "grad_norm": 0.00014705142530146986, |
| "learning_rate": 1.1653495440729484e-06, |
| "loss": 0.0001, |
| "step": 241000 |
| }, |
| { |
| "epoch": 4.8936170212765955, |
| "grad_norm": 1.4760345038666856e-05, |
| "learning_rate": 1.06403242147923e-06, |
| "loss": 0.0001, |
| "step": 241500 |
| }, |
| { |
| "epoch": 4.903748733535967, |
| "grad_norm": 3.4190128644695505e-05, |
| "learning_rate": 9.627152988855117e-07, |
| "loss": 0.0001, |
| "step": 242000 |
| }, |
| { |
| "epoch": 4.913880445795339, |
| "grad_norm": 1.7926526197697967e-05, |
| "learning_rate": 8.613981762917934e-07, |
| "loss": 0.0001, |
| "step": 242500 |
| }, |
| { |
| "epoch": 4.924012158054711, |
| "grad_norm": 0.00015696664922870696, |
| "learning_rate": 7.60081053698075e-07, |
| "loss": 0.0001, |
| "step": 243000 |
| }, |
| { |
| "epoch": 4.934143870314083, |
| "grad_norm": 2.8205437047290616e-05, |
| "learning_rate": 6.587639311043567e-07, |
| "loss": 0.0002, |
| "step": 243500 |
| }, |
| { |
| "epoch": 4.944275582573455, |
| "grad_norm": 8.939866529544815e-05, |
| "learning_rate": 5.574468085106383e-07, |
| "loss": 0.0001, |
| "step": 244000 |
| }, |
| { |
| "epoch": 4.954407294832826, |
| "grad_norm": 4.093222742085345e-05, |
| "learning_rate": 4.5612968591691996e-07, |
| "loss": 0.0002, |
| "step": 244500 |
| }, |
| { |
| "epoch": 4.964539007092198, |
| "grad_norm": 4.5221910113468766e-05, |
| "learning_rate": 3.5481256332320164e-07, |
| "loss": 0.0001, |
| "step": 245000 |
| }, |
| { |
| "epoch": 4.97467071935157, |
| "grad_norm": 1.8222008293378167e-05, |
| "learning_rate": 2.5349544072948327e-07, |
| "loss": 0.0003, |
| "step": 245500 |
| }, |
| { |
| "epoch": 4.984802431610943, |
| "grad_norm": 2.017403494392056e-05, |
| "learning_rate": 1.5217831813576495e-07, |
| "loss": 0.0001, |
| "step": 246000 |
| }, |
| { |
| "epoch": 4.994934143870314, |
| "grad_norm": 5.0148733862442896e-05, |
| "learning_rate": 5.0861195542046605e-08, |
| "loss": 0.0, |
| "step": 246500 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.9997915872290338, |
| "eval_f1": 0.9997916000286831, |
| "eval_loss": 0.0014928707387298346, |
| "eval_precision": 0.9997916658440986, |
| "eval_recall": 0.9997915872290338, |
| "eval_runtime": 378.3235, |
| "eval_samples_per_second": 185.154, |
| "eval_steps_per_second": 11.572, |
| "step": 246750 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 246750, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 3, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.579005974361536e+17, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|