viVSL-word-segmentation / trainer_state.json
tkhangg0910's picture
Upload folder using huggingface_hub
5853228 verified
{
"best_global_step": 246750,
"best_metric": 0.0014928707387298346,
"best_model_checkpoint": "./results/checkpoint-246750",
"epoch": 5.0,
"eval_steps": 500,
"global_step": 246750,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.010131712259371834,
"grad_norm": 0.7652040123939514,
"learning_rate": 4.9898885511651475e-05,
"loss": 0.1118,
"step": 500
},
{
"epoch": 0.020263424518743668,
"grad_norm": 2.674142599105835,
"learning_rate": 4.9797568389057755e-05,
"loss": 0.0342,
"step": 1000
},
{
"epoch": 0.030395136778115502,
"grad_norm": 0.5787509679794312,
"learning_rate": 4.9696251266464036e-05,
"loss": 0.0297,
"step": 1500
},
{
"epoch": 0.040526849037487336,
"grad_norm": 0.32276612520217896,
"learning_rate": 4.9594934143870316e-05,
"loss": 0.0259,
"step": 2000
},
{
"epoch": 0.05065856129685917,
"grad_norm": 0.3678061068058014,
"learning_rate": 4.9493617021276603e-05,
"loss": 0.0214,
"step": 2500
},
{
"epoch": 0.060790273556231005,
"grad_norm": 0.1107838898897171,
"learning_rate": 4.9392299898682884e-05,
"loss": 0.0195,
"step": 3000
},
{
"epoch": 0.07092198581560284,
"grad_norm": 0.7422420978546143,
"learning_rate": 4.9290982776089164e-05,
"loss": 0.0176,
"step": 3500
},
{
"epoch": 0.08105369807497467,
"grad_norm": 0.23030279576778412,
"learning_rate": 4.9189665653495445e-05,
"loss": 0.0157,
"step": 4000
},
{
"epoch": 0.0911854103343465,
"grad_norm": 0.6237834692001343,
"learning_rate": 4.9088348530901725e-05,
"loss": 0.0156,
"step": 4500
},
{
"epoch": 0.10131712259371833,
"grad_norm": 0.027093010023236275,
"learning_rate": 4.8987031408308006e-05,
"loss": 0.0148,
"step": 5000
},
{
"epoch": 0.11144883485309018,
"grad_norm": 1.697365164756775,
"learning_rate": 4.888571428571429e-05,
"loss": 0.0137,
"step": 5500
},
{
"epoch": 0.12158054711246201,
"grad_norm": 1.082137942314148,
"learning_rate": 4.8784397163120573e-05,
"loss": 0.0125,
"step": 6000
},
{
"epoch": 0.13171225937183384,
"grad_norm": 8.637746810913086,
"learning_rate": 4.8683080040526854e-05,
"loss": 0.012,
"step": 6500
},
{
"epoch": 0.14184397163120568,
"grad_norm": 0.4965957999229431,
"learning_rate": 4.8581762917933134e-05,
"loss": 0.014,
"step": 7000
},
{
"epoch": 0.1519756838905775,
"grad_norm": 0.3297726511955261,
"learning_rate": 4.8480445795339415e-05,
"loss": 0.0114,
"step": 7500
},
{
"epoch": 0.16210739614994935,
"grad_norm": 0.7570741176605225,
"learning_rate": 4.8379128672745695e-05,
"loss": 0.0137,
"step": 8000
},
{
"epoch": 0.17223910840932116,
"grad_norm": 1.3462743759155273,
"learning_rate": 4.827781155015198e-05,
"loss": 0.0113,
"step": 8500
},
{
"epoch": 0.182370820668693,
"grad_norm": 1.4103180170059204,
"learning_rate": 4.817649442755826e-05,
"loss": 0.0106,
"step": 9000
},
{
"epoch": 0.19250253292806485,
"grad_norm": 0.20714280009269714,
"learning_rate": 4.8075177304964543e-05,
"loss": 0.0102,
"step": 9500
},
{
"epoch": 0.20263424518743667,
"grad_norm": 0.728708803653717,
"learning_rate": 4.7973860182370824e-05,
"loss": 0.011,
"step": 10000
},
{
"epoch": 0.2127659574468085,
"grad_norm": 0.21020011603832245,
"learning_rate": 4.7872543059777104e-05,
"loss": 0.01,
"step": 10500
},
{
"epoch": 0.22289766970618036,
"grad_norm": 1.3205281496047974,
"learning_rate": 4.7771225937183385e-05,
"loss": 0.0118,
"step": 11000
},
{
"epoch": 0.23302938196555217,
"grad_norm": 0.061988379806280136,
"learning_rate": 4.766990881458967e-05,
"loss": 0.0088,
"step": 11500
},
{
"epoch": 0.24316109422492402,
"grad_norm": 0.7140718102455139,
"learning_rate": 4.756859169199595e-05,
"loss": 0.0087,
"step": 12000
},
{
"epoch": 0.25329280648429586,
"grad_norm": 0.3282340168952942,
"learning_rate": 4.746727456940223e-05,
"loss": 0.0105,
"step": 12500
},
{
"epoch": 0.2634245187436677,
"grad_norm": 0.18000195920467377,
"learning_rate": 4.7365957446808513e-05,
"loss": 0.0082,
"step": 13000
},
{
"epoch": 0.2735562310030395,
"grad_norm": 0.07064808160066605,
"learning_rate": 4.7264640324214794e-05,
"loss": 0.0076,
"step": 13500
},
{
"epoch": 0.28368794326241137,
"grad_norm": 0.012272284366190434,
"learning_rate": 4.7163323201621074e-05,
"loss": 0.0087,
"step": 14000
},
{
"epoch": 0.2938196555217832,
"grad_norm": 0.7345269918441772,
"learning_rate": 4.706200607902736e-05,
"loss": 0.0093,
"step": 14500
},
{
"epoch": 0.303951367781155,
"grad_norm": 0.02252453938126564,
"learning_rate": 4.696068895643364e-05,
"loss": 0.0094,
"step": 15000
},
{
"epoch": 0.3140830800405269,
"grad_norm": 1.4309351444244385,
"learning_rate": 4.685937183383992e-05,
"loss": 0.0096,
"step": 15500
},
{
"epoch": 0.3242147922998987,
"grad_norm": 2.0277745723724365,
"learning_rate": 4.67580547112462e-05,
"loss": 0.0091,
"step": 16000
},
{
"epoch": 0.3343465045592705,
"grad_norm": 1.0250506401062012,
"learning_rate": 4.6656737588652483e-05,
"loss": 0.0079,
"step": 16500
},
{
"epoch": 0.3444782168186423,
"grad_norm": 0.019971124827861786,
"learning_rate": 4.6555420466058764e-05,
"loss": 0.0077,
"step": 17000
},
{
"epoch": 0.3546099290780142,
"grad_norm": 0.28336408734321594,
"learning_rate": 4.645410334346505e-05,
"loss": 0.0079,
"step": 17500
},
{
"epoch": 0.364741641337386,
"grad_norm": 0.04426710680127144,
"learning_rate": 4.635278622087133e-05,
"loss": 0.0076,
"step": 18000
},
{
"epoch": 0.37487335359675783,
"grad_norm": 0.017167454585433006,
"learning_rate": 4.625146909827761e-05,
"loss": 0.0087,
"step": 18500
},
{
"epoch": 0.3850050658561297,
"grad_norm": 0.04213930293917656,
"learning_rate": 4.615015197568389e-05,
"loss": 0.0083,
"step": 19000
},
{
"epoch": 0.3951367781155015,
"grad_norm": 0.01774449646472931,
"learning_rate": 4.604883485309017e-05,
"loss": 0.0085,
"step": 19500
},
{
"epoch": 0.40526849037487334,
"grad_norm": 1.1058021783828735,
"learning_rate": 4.594751773049646e-05,
"loss": 0.0085,
"step": 20000
},
{
"epoch": 0.4154002026342452,
"grad_norm": 0.2536062002182007,
"learning_rate": 4.584620060790274e-05,
"loss": 0.0074,
"step": 20500
},
{
"epoch": 0.425531914893617,
"grad_norm": 0.16014184057712555,
"learning_rate": 4.574488348530902e-05,
"loss": 0.007,
"step": 21000
},
{
"epoch": 0.43566362715298884,
"grad_norm": 0.17190662026405334,
"learning_rate": 4.56435663627153e-05,
"loss": 0.0088,
"step": 21500
},
{
"epoch": 0.4457953394123607,
"grad_norm": 0.04603414237499237,
"learning_rate": 4.554224924012158e-05,
"loss": 0.0076,
"step": 22000
},
{
"epoch": 0.45592705167173253,
"grad_norm": 0.10868274420499802,
"learning_rate": 4.544093211752786e-05,
"loss": 0.0072,
"step": 22500
},
{
"epoch": 0.46605876393110435,
"grad_norm": 0.061556026339530945,
"learning_rate": 4.533961499493415e-05,
"loss": 0.008,
"step": 23000
},
{
"epoch": 0.47619047619047616,
"grad_norm": 0.16614557802677155,
"learning_rate": 4.523829787234043e-05,
"loss": 0.0066,
"step": 23500
},
{
"epoch": 0.48632218844984804,
"grad_norm": 0.024508880451321602,
"learning_rate": 4.513698074974671e-05,
"loss": 0.0079,
"step": 24000
},
{
"epoch": 0.49645390070921985,
"grad_norm": 0.017630083486437798,
"learning_rate": 4.503566362715299e-05,
"loss": 0.0061,
"step": 24500
},
{
"epoch": 0.5065856129685917,
"grad_norm": 0.7771974802017212,
"learning_rate": 4.493434650455927e-05,
"loss": 0.0059,
"step": 25000
},
{
"epoch": 0.5167173252279635,
"grad_norm": 0.3426854908466339,
"learning_rate": 4.483302938196555e-05,
"loss": 0.0068,
"step": 25500
},
{
"epoch": 0.5268490374873354,
"grad_norm": 0.004861881025135517,
"learning_rate": 4.473171225937184e-05,
"loss": 0.0066,
"step": 26000
},
{
"epoch": 0.5369807497467072,
"grad_norm": 0.2573753595352173,
"learning_rate": 4.463039513677812e-05,
"loss": 0.0076,
"step": 26500
},
{
"epoch": 0.547112462006079,
"grad_norm": 0.0056276340037584305,
"learning_rate": 4.45290780141844e-05,
"loss": 0.0061,
"step": 27000
},
{
"epoch": 0.5572441742654508,
"grad_norm": 1.3307100534439087,
"learning_rate": 4.442776089159068e-05,
"loss": 0.0078,
"step": 27500
},
{
"epoch": 0.5673758865248227,
"grad_norm": 0.08794938027858734,
"learning_rate": 4.432644376899696e-05,
"loss": 0.0059,
"step": 28000
},
{
"epoch": 0.5775075987841946,
"grad_norm": 0.02371417172253132,
"learning_rate": 4.422512664640324e-05,
"loss": 0.0069,
"step": 28500
},
{
"epoch": 0.5876393110435664,
"grad_norm": 0.005987431854009628,
"learning_rate": 4.412380952380953e-05,
"loss": 0.0055,
"step": 29000
},
{
"epoch": 0.5977710233029382,
"grad_norm": 0.01846960373222828,
"learning_rate": 4.402249240121581e-05,
"loss": 0.0064,
"step": 29500
},
{
"epoch": 0.60790273556231,
"grad_norm": 0.13479246199131012,
"learning_rate": 4.392117527862209e-05,
"loss": 0.006,
"step": 30000
},
{
"epoch": 0.6180344478216818,
"grad_norm": 0.9626930952072144,
"learning_rate": 4.381985815602837e-05,
"loss": 0.0072,
"step": 30500
},
{
"epoch": 0.6281661600810537,
"grad_norm": 3.426116704940796,
"learning_rate": 4.371854103343465e-05,
"loss": 0.0067,
"step": 31000
},
{
"epoch": 0.6382978723404256,
"grad_norm": 0.00803771149367094,
"learning_rate": 4.361722391084093e-05,
"loss": 0.0054,
"step": 31500
},
{
"epoch": 0.6484295845997974,
"grad_norm": 2.7232067584991455,
"learning_rate": 4.351590678824722e-05,
"loss": 0.0062,
"step": 32000
},
{
"epoch": 0.6585612968591692,
"grad_norm": 0.07605724781751633,
"learning_rate": 4.34145896656535e-05,
"loss": 0.0056,
"step": 32500
},
{
"epoch": 0.668693009118541,
"grad_norm": 0.0936700776219368,
"learning_rate": 4.331327254305978e-05,
"loss": 0.0057,
"step": 33000
},
{
"epoch": 0.6788247213779128,
"grad_norm": 0.3686704635620117,
"learning_rate": 4.321195542046606e-05,
"loss": 0.0073,
"step": 33500
},
{
"epoch": 0.6889564336372846,
"grad_norm": 2.1731717586517334,
"learning_rate": 4.311063829787234e-05,
"loss": 0.0051,
"step": 34000
},
{
"epoch": 0.6990881458966566,
"grad_norm": 0.5393068194389343,
"learning_rate": 4.300932117527862e-05,
"loss": 0.0051,
"step": 34500
},
{
"epoch": 0.7092198581560284,
"grad_norm": 0.010527299717068672,
"learning_rate": 4.290800405268491e-05,
"loss": 0.0045,
"step": 35000
},
{
"epoch": 0.7193515704154002,
"grad_norm": 0.009113574400544167,
"learning_rate": 4.280668693009119e-05,
"loss": 0.0053,
"step": 35500
},
{
"epoch": 0.729483282674772,
"grad_norm": 0.011620788834989071,
"learning_rate": 4.270536980749747e-05,
"loss": 0.0046,
"step": 36000
},
{
"epoch": 0.7396149949341438,
"grad_norm": 0.005289976019412279,
"learning_rate": 4.260405268490375e-05,
"loss": 0.0055,
"step": 36500
},
{
"epoch": 0.7497467071935157,
"grad_norm": 0.027446379885077477,
"learning_rate": 4.250273556231003e-05,
"loss": 0.0052,
"step": 37000
},
{
"epoch": 0.7598784194528876,
"grad_norm": 0.02491973526775837,
"learning_rate": 4.240141843971631e-05,
"loss": 0.0052,
"step": 37500
},
{
"epoch": 0.7700101317122594,
"grad_norm": 0.06567023694515228,
"learning_rate": 4.23001013171226e-05,
"loss": 0.0045,
"step": 38000
},
{
"epoch": 0.7801418439716312,
"grad_norm": 0.07404550909996033,
"learning_rate": 4.219878419452888e-05,
"loss": 0.0052,
"step": 38500
},
{
"epoch": 0.790273556231003,
"grad_norm": 0.04177823290228844,
"learning_rate": 4.209746707193516e-05,
"loss": 0.0059,
"step": 39000
},
{
"epoch": 0.8004052684903749,
"grad_norm": 0.23891448974609375,
"learning_rate": 4.199614994934144e-05,
"loss": 0.0049,
"step": 39500
},
{
"epoch": 0.8105369807497467,
"grad_norm": 0.029912158846855164,
"learning_rate": 4.189483282674772e-05,
"loss": 0.0053,
"step": 40000
},
{
"epoch": 0.8206686930091185,
"grad_norm": 1.465671420097351,
"learning_rate": 4.1793515704154e-05,
"loss": 0.0054,
"step": 40500
},
{
"epoch": 0.8308004052684904,
"grad_norm": 0.002502155490219593,
"learning_rate": 4.169219858156029e-05,
"loss": 0.0042,
"step": 41000
},
{
"epoch": 0.8409321175278622,
"grad_norm": 0.05517476052045822,
"learning_rate": 4.159088145896657e-05,
"loss": 0.0053,
"step": 41500
},
{
"epoch": 0.851063829787234,
"grad_norm": 0.0024023025762289762,
"learning_rate": 4.148956433637285e-05,
"loss": 0.0049,
"step": 42000
},
{
"epoch": 0.8611955420466059,
"grad_norm": 0.003541674930602312,
"learning_rate": 4.138824721377913e-05,
"loss": 0.0047,
"step": 42500
},
{
"epoch": 0.8713272543059777,
"grad_norm": 0.04199780896306038,
"learning_rate": 4.128693009118541e-05,
"loss": 0.005,
"step": 43000
},
{
"epoch": 0.8814589665653495,
"grad_norm": 1.3863078355789185,
"learning_rate": 4.118561296859169e-05,
"loss": 0.0046,
"step": 43500
},
{
"epoch": 0.8915906788247214,
"grad_norm": 0.03199724853038788,
"learning_rate": 4.108429584599798e-05,
"loss": 0.0048,
"step": 44000
},
{
"epoch": 0.9017223910840932,
"grad_norm": 0.02803684026002884,
"learning_rate": 4.098297872340426e-05,
"loss": 0.0045,
"step": 44500
},
{
"epoch": 0.9118541033434651,
"grad_norm": 0.04623283073306084,
"learning_rate": 4.088166160081054e-05,
"loss": 0.005,
"step": 45000
},
{
"epoch": 0.9219858156028369,
"grad_norm": 0.0006883647874929011,
"learning_rate": 4.078034447821682e-05,
"loss": 0.0053,
"step": 45500
},
{
"epoch": 0.9321175278622087,
"grad_norm": 0.15720270574092865,
"learning_rate": 4.06790273556231e-05,
"loss": 0.0051,
"step": 46000
},
{
"epoch": 0.9422492401215805,
"grad_norm": 0.0048390720039606094,
"learning_rate": 4.057771023302938e-05,
"loss": 0.0049,
"step": 46500
},
{
"epoch": 0.9523809523809523,
"grad_norm": 0.04854992404580116,
"learning_rate": 4.0476393110435666e-05,
"loss": 0.0038,
"step": 47000
},
{
"epoch": 0.9625126646403243,
"grad_norm": 0.011257442645728588,
"learning_rate": 4.037507598784195e-05,
"loss": 0.0033,
"step": 47500
},
{
"epoch": 0.9726443768996961,
"grad_norm": 1.3036562204360962,
"learning_rate": 4.027375886524823e-05,
"loss": 0.0039,
"step": 48000
},
{
"epoch": 0.9827760891590679,
"grad_norm": 0.019078070297837257,
"learning_rate": 4.017244174265451e-05,
"loss": 0.0046,
"step": 48500
},
{
"epoch": 0.9929078014184397,
"grad_norm": 0.0359899140894413,
"learning_rate": 4.007112462006079e-05,
"loss": 0.0042,
"step": 49000
},
{
"epoch": 1.0,
"eval_accuracy": 0.9990064868474964,
"eval_f1": 0.9990066101460954,
"eval_loss": 0.0038088823202997446,
"eval_precision": 0.9990069135746219,
"eval_recall": 0.9990064868474964,
"eval_runtime": 377.94,
"eval_samples_per_second": 185.342,
"eval_steps_per_second": 11.584,
"step": 49350
},
{
"epoch": 1.0030395136778116,
"grad_norm": 0.014290682971477509,
"learning_rate": 3.996980749746707e-05,
"loss": 0.0035,
"step": 49500
},
{
"epoch": 1.0131712259371835,
"grad_norm": 0.06438656151294708,
"learning_rate": 3.9868490374873356e-05,
"loss": 0.0048,
"step": 50000
},
{
"epoch": 1.0233029381965553,
"grad_norm": 0.006202331744134426,
"learning_rate": 3.9767173252279636e-05,
"loss": 0.0039,
"step": 50500
},
{
"epoch": 1.033434650455927,
"grad_norm": 0.0030405428260564804,
"learning_rate": 3.966585612968592e-05,
"loss": 0.003,
"step": 51000
},
{
"epoch": 1.043566362715299,
"grad_norm": 0.12991833686828613,
"learning_rate": 3.95645390070922e-05,
"loss": 0.0025,
"step": 51500
},
{
"epoch": 1.0536980749746707,
"grad_norm": 0.0097044100984931,
"learning_rate": 3.946322188449848e-05,
"loss": 0.0037,
"step": 52000
},
{
"epoch": 1.0638297872340425,
"grad_norm": 0.012757817283272743,
"learning_rate": 3.9361904761904765e-05,
"loss": 0.0033,
"step": 52500
},
{
"epoch": 1.0739614994934144,
"grad_norm": 0.0028119811322540045,
"learning_rate": 3.9260587639311045e-05,
"loss": 0.0036,
"step": 53000
},
{
"epoch": 1.0840932117527862,
"grad_norm": 0.004565235693007708,
"learning_rate": 3.9159270516717326e-05,
"loss": 0.0037,
"step": 53500
},
{
"epoch": 1.094224924012158,
"grad_norm": 0.00749659538269043,
"learning_rate": 3.9057953394123606e-05,
"loss": 0.0028,
"step": 54000
},
{
"epoch": 1.1043566362715298,
"grad_norm": 0.005823603365570307,
"learning_rate": 3.895663627152989e-05,
"loss": 0.0032,
"step": 54500
},
{
"epoch": 1.1144883485309016,
"grad_norm": 1.319741129875183,
"learning_rate": 3.885531914893617e-05,
"loss": 0.0022,
"step": 55000
},
{
"epoch": 1.1246200607902737,
"grad_norm": 0.0010931927245110273,
"learning_rate": 3.8754002026342454e-05,
"loss": 0.0044,
"step": 55500
},
{
"epoch": 1.1347517730496455,
"grad_norm": 2.641359567642212,
"learning_rate": 3.8652684903748735e-05,
"loss": 0.0031,
"step": 56000
},
{
"epoch": 1.1448834853090173,
"grad_norm": 0.05208117142319679,
"learning_rate": 3.8551367781155015e-05,
"loss": 0.005,
"step": 56500
},
{
"epoch": 1.155015197568389,
"grad_norm": 0.055079296231269836,
"learning_rate": 3.8450050658561296e-05,
"loss": 0.0037,
"step": 57000
},
{
"epoch": 1.165146909827761,
"grad_norm": 0.016849618405103683,
"learning_rate": 3.8348733535967576e-05,
"loss": 0.0028,
"step": 57500
},
{
"epoch": 1.1752786220871327,
"grad_norm": 0.009639640338718891,
"learning_rate": 3.824741641337386e-05,
"loss": 0.0033,
"step": 58000
},
{
"epoch": 1.1854103343465046,
"grad_norm": 0.003612485248595476,
"learning_rate": 3.8146099290780144e-05,
"loss": 0.0034,
"step": 58500
},
{
"epoch": 1.1955420466058764,
"grad_norm": 0.0050128428265452385,
"learning_rate": 3.8044782168186424e-05,
"loss": 0.004,
"step": 59000
},
{
"epoch": 1.2056737588652482,
"grad_norm": 0.04756532609462738,
"learning_rate": 3.7943465045592705e-05,
"loss": 0.0033,
"step": 59500
},
{
"epoch": 1.21580547112462,
"grad_norm": 0.06244517117738724,
"learning_rate": 3.7842147922998985e-05,
"loss": 0.0038,
"step": 60000
},
{
"epoch": 1.2259371833839918,
"grad_norm": 0.017557090148329735,
"learning_rate": 3.7740830800405266e-05,
"loss": 0.0026,
"step": 60500
},
{
"epoch": 1.2360688956433636,
"grad_norm": 0.013284939341247082,
"learning_rate": 3.763951367781155e-05,
"loss": 0.0043,
"step": 61000
},
{
"epoch": 1.2462006079027357,
"grad_norm": 0.0017136982642114162,
"learning_rate": 3.7538196555217833e-05,
"loss": 0.0039,
"step": 61500
},
{
"epoch": 1.2563323201621075,
"grad_norm": 0.009458661079406738,
"learning_rate": 3.7436879432624114e-05,
"loss": 0.0027,
"step": 62000
},
{
"epoch": 1.2664640324214793,
"grad_norm": 0.0020438162609934807,
"learning_rate": 3.7335562310030394e-05,
"loss": 0.0025,
"step": 62500
},
{
"epoch": 1.2765957446808511,
"grad_norm": 0.0019806961063295603,
"learning_rate": 3.7234245187436675e-05,
"loss": 0.0033,
"step": 63000
},
{
"epoch": 1.286727456940223,
"grad_norm": 0.0010404183994978666,
"learning_rate": 3.713292806484296e-05,
"loss": 0.0032,
"step": 63500
},
{
"epoch": 1.2968591691995948,
"grad_norm": 0.0007222663261927664,
"learning_rate": 3.703161094224924e-05,
"loss": 0.0033,
"step": 64000
},
{
"epoch": 1.3069908814589666,
"grad_norm": 0.008753558620810509,
"learning_rate": 3.693029381965552e-05,
"loss": 0.0031,
"step": 64500
},
{
"epoch": 1.3171225937183384,
"grad_norm": 0.2641207277774811,
"learning_rate": 3.6828976697061803e-05,
"loss": 0.0036,
"step": 65000
},
{
"epoch": 1.3272543059777102,
"grad_norm": 0.01977156661450863,
"learning_rate": 3.672765957446809e-05,
"loss": 0.0028,
"step": 65500
},
{
"epoch": 1.337386018237082,
"grad_norm": 0.004643771797418594,
"learning_rate": 3.662634245187437e-05,
"loss": 0.0029,
"step": 66000
},
{
"epoch": 1.3475177304964538,
"grad_norm": 0.00434250058606267,
"learning_rate": 3.652502532928065e-05,
"loss": 0.0031,
"step": 66500
},
{
"epoch": 1.3576494427558257,
"grad_norm": 0.10578258335590363,
"learning_rate": 3.642370820668693e-05,
"loss": 0.0031,
"step": 67000
},
{
"epoch": 1.3677811550151975,
"grad_norm": 0.0011634805705398321,
"learning_rate": 3.632239108409321e-05,
"loss": 0.0022,
"step": 67500
},
{
"epoch": 1.3779128672745693,
"grad_norm": 0.0011249127564951777,
"learning_rate": 3.62210739614995e-05,
"loss": 0.0032,
"step": 68000
},
{
"epoch": 1.3880445795339411,
"grad_norm": 0.009079035371541977,
"learning_rate": 3.611975683890578e-05,
"loss": 0.0032,
"step": 68500
},
{
"epoch": 1.3981762917933132,
"grad_norm": 0.01014864444732666,
"learning_rate": 3.601843971631206e-05,
"loss": 0.0039,
"step": 69000
},
{
"epoch": 1.408308004052685,
"grad_norm": 0.005819142330437899,
"learning_rate": 3.591712259371834e-05,
"loss": 0.0028,
"step": 69500
},
{
"epoch": 1.4184397163120568,
"grad_norm": 0.011299582198262215,
"learning_rate": 3.581580547112462e-05,
"loss": 0.0034,
"step": 70000
},
{
"epoch": 1.4285714285714286,
"grad_norm": 0.0007169672753661871,
"learning_rate": 3.571448834853091e-05,
"loss": 0.0027,
"step": 70500
},
{
"epoch": 1.4387031408308004,
"grad_norm": 0.01162696722894907,
"learning_rate": 3.561317122593719e-05,
"loss": 0.0035,
"step": 71000
},
{
"epoch": 1.4488348530901722,
"grad_norm": 0.03597528859972954,
"learning_rate": 3.551185410334347e-05,
"loss": 0.0031,
"step": 71500
},
{
"epoch": 1.458966565349544,
"grad_norm": 0.010613600723445415,
"learning_rate": 3.541053698074975e-05,
"loss": 0.0025,
"step": 72000
},
{
"epoch": 1.4690982776089159,
"grad_norm": 0.013661106117069721,
"learning_rate": 3.530921985815603e-05,
"loss": 0.0022,
"step": 72500
},
{
"epoch": 1.4792299898682877,
"grad_norm": 0.4801454544067383,
"learning_rate": 3.520790273556231e-05,
"loss": 0.0032,
"step": 73000
},
{
"epoch": 1.4893617021276595,
"grad_norm": 0.005630165338516235,
"learning_rate": 3.51065856129686e-05,
"loss": 0.0025,
"step": 73500
},
{
"epoch": 1.4994934143870315,
"grad_norm": 0.018407883122563362,
"learning_rate": 3.500526849037488e-05,
"loss": 0.0029,
"step": 74000
},
{
"epoch": 1.5096251266464034,
"grad_norm": 0.010126540437340736,
"learning_rate": 3.490395136778116e-05,
"loss": 0.0027,
"step": 74500
},
{
"epoch": 1.5197568389057752,
"grad_norm": 0.025962965562939644,
"learning_rate": 3.480263424518744e-05,
"loss": 0.0028,
"step": 75000
},
{
"epoch": 1.529888551165147,
"grad_norm": 0.10553637146949768,
"learning_rate": 3.470131712259372e-05,
"loss": 0.0038,
"step": 75500
},
{
"epoch": 1.5400202634245188,
"grad_norm": 0.0019956612959504128,
"learning_rate": 3.46e-05,
"loss": 0.0033,
"step": 76000
},
{
"epoch": 1.5501519756838906,
"grad_norm": 0.0006352249765768647,
"learning_rate": 3.449868287740629e-05,
"loss": 0.0025,
"step": 76500
},
{
"epoch": 1.5602836879432624,
"grad_norm": 0.0011086476733908057,
"learning_rate": 3.439736575481257e-05,
"loss": 0.0029,
"step": 77000
},
{
"epoch": 1.5704154002026343,
"grad_norm": 0.00939366314560175,
"learning_rate": 3.429604863221885e-05,
"loss": 0.0031,
"step": 77500
},
{
"epoch": 1.580547112462006,
"grad_norm": 0.25609418749809265,
"learning_rate": 3.419473150962513e-05,
"loss": 0.0033,
"step": 78000
},
{
"epoch": 1.590678824721378,
"grad_norm": 0.09414645284414291,
"learning_rate": 3.409341438703141e-05,
"loss": 0.0029,
"step": 78500
},
{
"epoch": 1.6008105369807497,
"grad_norm": 0.04403573274612427,
"learning_rate": 3.399209726443769e-05,
"loss": 0.0024,
"step": 79000
},
{
"epoch": 1.6109422492401215,
"grad_norm": 0.0013993962202221155,
"learning_rate": 3.389078014184398e-05,
"loss": 0.0025,
"step": 79500
},
{
"epoch": 1.6210739614994933,
"grad_norm": 0.0016776573611423373,
"learning_rate": 3.378946301925026e-05,
"loss": 0.0031,
"step": 80000
},
{
"epoch": 1.6312056737588652,
"grad_norm": 0.016962487250566483,
"learning_rate": 3.368814589665654e-05,
"loss": 0.0025,
"step": 80500
},
{
"epoch": 1.641337386018237,
"grad_norm": 0.04853259399533272,
"learning_rate": 3.358682877406282e-05,
"loss": 0.0027,
"step": 81000
},
{
"epoch": 1.6514690982776088,
"grad_norm": 0.00218728045001626,
"learning_rate": 3.34855116514691e-05,
"loss": 0.0027,
"step": 81500
},
{
"epoch": 1.6616008105369806,
"grad_norm": 0.008054674603044987,
"learning_rate": 3.338419452887538e-05,
"loss": 0.0027,
"step": 82000
},
{
"epoch": 1.6717325227963524,
"grad_norm": 0.0009625882375985384,
"learning_rate": 3.328287740628167e-05,
"loss": 0.0021,
"step": 82500
},
{
"epoch": 1.6818642350557245,
"grad_norm": 0.017836738377809525,
"learning_rate": 3.318156028368795e-05,
"loss": 0.0035,
"step": 83000
},
{
"epoch": 1.6919959473150963,
"grad_norm": 0.5696132183074951,
"learning_rate": 3.308024316109423e-05,
"loss": 0.004,
"step": 83500
},
{
"epoch": 1.702127659574468,
"grad_norm": 0.00031232935725711286,
"learning_rate": 3.297892603850051e-05,
"loss": 0.0019,
"step": 84000
},
{
"epoch": 1.71225937183384,
"grad_norm": 0.006237703841179609,
"learning_rate": 3.287760891590679e-05,
"loss": 0.0026,
"step": 84500
},
{
"epoch": 1.7223910840932117,
"grad_norm": 0.02137162908911705,
"learning_rate": 3.2776291793313076e-05,
"loss": 0.0024,
"step": 85000
},
{
"epoch": 1.7325227963525835,
"grad_norm": 0.01595192588865757,
"learning_rate": 3.267497467071936e-05,
"loss": 0.0021,
"step": 85500
},
{
"epoch": 1.7426545086119554,
"grad_norm": 0.0017637086566537619,
"learning_rate": 3.257365754812564e-05,
"loss": 0.0027,
"step": 86000
},
{
"epoch": 1.7527862208713274,
"grad_norm": 0.0031725901644676924,
"learning_rate": 3.247234042553192e-05,
"loss": 0.0021,
"step": 86500
},
{
"epoch": 1.7629179331306992,
"grad_norm": 0.015843555331230164,
"learning_rate": 3.23710233029382e-05,
"loss": 0.0037,
"step": 87000
},
{
"epoch": 1.773049645390071,
"grad_norm": 0.02725142426788807,
"learning_rate": 3.226970618034448e-05,
"loss": 0.0023,
"step": 87500
},
{
"epoch": 1.7831813576494429,
"grad_norm": 0.015493770129978657,
"learning_rate": 3.2168389057750766e-05,
"loss": 0.0028,
"step": 88000
},
{
"epoch": 1.7933130699088147,
"grad_norm": 0.0021028113551437855,
"learning_rate": 3.2067071935157046e-05,
"loss": 0.0022,
"step": 88500
},
{
"epoch": 1.8034447821681865,
"grad_norm": 0.0224838238209486,
"learning_rate": 3.196575481256333e-05,
"loss": 0.0027,
"step": 89000
},
{
"epoch": 1.8135764944275583,
"grad_norm": 0.0014610164798796177,
"learning_rate": 3.186443768996961e-05,
"loss": 0.0027,
"step": 89500
},
{
"epoch": 1.8237082066869301,
"grad_norm": 0.0022999641951173544,
"learning_rate": 3.176312056737589e-05,
"loss": 0.0029,
"step": 90000
},
{
"epoch": 1.833839918946302,
"grad_norm": 0.003701185341924429,
"learning_rate": 3.166180344478217e-05,
"loss": 0.0031,
"step": 90500
},
{
"epoch": 1.8439716312056738,
"grad_norm": 0.0023422616068273783,
"learning_rate": 3.1560486322188455e-05,
"loss": 0.003,
"step": 91000
},
{
"epoch": 1.8541033434650456,
"grad_norm": 0.0010673481738194823,
"learning_rate": 3.1459169199594736e-05,
"loss": 0.0026,
"step": 91500
},
{
"epoch": 1.8642350557244174,
"grad_norm": 2.150301456451416,
"learning_rate": 3.1357852077001016e-05,
"loss": 0.0019,
"step": 92000
},
{
"epoch": 1.8743667679837892,
"grad_norm": 0.0014611236983910203,
"learning_rate": 3.12565349544073e-05,
"loss": 0.0023,
"step": 92500
},
{
"epoch": 1.884498480243161,
"grad_norm": 0.004396792501211166,
"learning_rate": 3.115521783181358e-05,
"loss": 0.0022,
"step": 93000
},
{
"epoch": 1.8946301925025328,
"grad_norm": 0.006988595239818096,
"learning_rate": 3.105390070921986e-05,
"loss": 0.0026,
"step": 93500
},
{
"epoch": 1.9047619047619047,
"grad_norm": 0.3570442795753479,
"learning_rate": 3.0952583586626145e-05,
"loss": 0.0023,
"step": 94000
},
{
"epoch": 1.9148936170212765,
"grad_norm": 0.0019152691820636392,
"learning_rate": 3.0851266464032425e-05,
"loss": 0.0026,
"step": 94500
},
{
"epoch": 1.9250253292806483,
"grad_norm": 0.03153735399246216,
"learning_rate": 3.0749949341438706e-05,
"loss": 0.0021,
"step": 95000
},
{
"epoch": 1.93515704154002,
"grad_norm": 0.003688658820465207,
"learning_rate": 3.0648632218844986e-05,
"loss": 0.0016,
"step": 95500
},
{
"epoch": 1.9452887537993921,
"grad_norm": 0.32523173093795776,
"learning_rate": 3.054731509625127e-05,
"loss": 0.002,
"step": 96000
},
{
"epoch": 1.955420466058764,
"grad_norm": 0.021606747061014175,
"learning_rate": 3.0445997973657547e-05,
"loss": 0.0049,
"step": 96500
},
{
"epoch": 1.9655521783181358,
"grad_norm": 0.0006940297316759825,
"learning_rate": 3.0344680851063834e-05,
"loss": 0.0027,
"step": 97000
},
{
"epoch": 1.9756838905775076,
"grad_norm": 1.9993683099746704,
"learning_rate": 3.0243363728470115e-05,
"loss": 0.0026,
"step": 97500
},
{
"epoch": 1.9858156028368794,
"grad_norm": 0.10824126750230789,
"learning_rate": 3.0142046605876395e-05,
"loss": 0.0021,
"step": 98000
},
{
"epoch": 1.9959473150962512,
"grad_norm": 0.01038323249667883,
"learning_rate": 3.0040729483282676e-05,
"loss": 0.0025,
"step": 98500
},
{
"epoch": 2.0,
"eval_accuracy": 0.9992859494267464,
"eval_f1": 0.9992860626946858,
"eval_loss": 0.0031463655177503824,
"eval_precision": 0.9992865610887268,
"eval_recall": 0.9992859494267464,
"eval_runtime": 377.8329,
"eval_samples_per_second": 185.394,
"eval_steps_per_second": 11.587,
"step": 98700
},
{
"epoch": 2.0060790273556233,
"grad_norm": 0.06401953846216202,
"learning_rate": 2.9939412360688956e-05,
"loss": 0.0028,
"step": 99000
},
{
"epoch": 2.016210739614995,
"grad_norm": 0.808768630027771,
"learning_rate": 2.9838095238095237e-05,
"loss": 0.0021,
"step": 99500
},
{
"epoch": 2.026342451874367,
"grad_norm": 0.001931357546709478,
"learning_rate": 2.9736778115501524e-05,
"loss": 0.0023,
"step": 100000
},
{
"epoch": 2.0364741641337387,
"grad_norm": 0.13670825958251953,
"learning_rate": 2.9635460992907804e-05,
"loss": 0.0019,
"step": 100500
},
{
"epoch": 2.0466058763931105,
"grad_norm": 0.06261293590068817,
"learning_rate": 2.9534143870314085e-05,
"loss": 0.0017,
"step": 101000
},
{
"epoch": 2.0567375886524824,
"grad_norm": 0.006652528885751963,
"learning_rate": 2.9432826747720365e-05,
"loss": 0.0018,
"step": 101500
},
{
"epoch": 2.066869300911854,
"grad_norm": 0.012849073857069016,
"learning_rate": 2.9331509625126646e-05,
"loss": 0.0021,
"step": 102000
},
{
"epoch": 2.077001013171226,
"grad_norm": 0.0033175491262227297,
"learning_rate": 2.9230192502532926e-05,
"loss": 0.0015,
"step": 102500
},
{
"epoch": 2.087132725430598,
"grad_norm": 0.010253848508000374,
"learning_rate": 2.9128875379939213e-05,
"loss": 0.0016,
"step": 103000
},
{
"epoch": 2.0972644376899696,
"grad_norm": 0.001028141938149929,
"learning_rate": 2.9027558257345494e-05,
"loss": 0.0022,
"step": 103500
},
{
"epoch": 2.1073961499493414,
"grad_norm": 0.0003175963065586984,
"learning_rate": 2.8926241134751774e-05,
"loss": 0.0014,
"step": 104000
},
{
"epoch": 2.1175278622087133,
"grad_norm": 0.004258031025528908,
"learning_rate": 2.8824924012158055e-05,
"loss": 0.0026,
"step": 104500
},
{
"epoch": 2.127659574468085,
"grad_norm": 0.015609141439199448,
"learning_rate": 2.8723606889564335e-05,
"loss": 0.0019,
"step": 105000
},
{
"epoch": 2.137791286727457,
"grad_norm": 0.025085508823394775,
"learning_rate": 2.8622289766970616e-05,
"loss": 0.0017,
"step": 105500
},
{
"epoch": 2.1479229989868287,
"grad_norm": 0.005206093192100525,
"learning_rate": 2.8520972644376903e-05,
"loss": 0.0016,
"step": 106000
},
{
"epoch": 2.1580547112462005,
"grad_norm": 0.002129113767296076,
"learning_rate": 2.8419655521783183e-05,
"loss": 0.0021,
"step": 106500
},
{
"epoch": 2.1681864235055723,
"grad_norm": 0.0303476732224226,
"learning_rate": 2.8318338399189464e-05,
"loss": 0.0023,
"step": 107000
},
{
"epoch": 2.178318135764944,
"grad_norm": 0.00023147836327552795,
"learning_rate": 2.8217021276595744e-05,
"loss": 0.0014,
"step": 107500
},
{
"epoch": 2.188449848024316,
"grad_norm": 0.01121602300554514,
"learning_rate": 2.8115704154002025e-05,
"loss": 0.0025,
"step": 108000
},
{
"epoch": 2.198581560283688,
"grad_norm": 0.01344907283782959,
"learning_rate": 2.8014387031408305e-05,
"loss": 0.002,
"step": 108500
},
{
"epoch": 2.2087132725430596,
"grad_norm": 0.0011041724355891347,
"learning_rate": 2.7913069908814593e-05,
"loss": 0.0027,
"step": 109000
},
{
"epoch": 2.2188449848024314,
"grad_norm": 0.02955365553498268,
"learning_rate": 2.7811752786220873e-05,
"loss": 0.0022,
"step": 109500
},
{
"epoch": 2.2289766970618032,
"grad_norm": 0.07166969031095505,
"learning_rate": 2.7710435663627154e-05,
"loss": 0.0017,
"step": 110000
},
{
"epoch": 2.239108409321175,
"grad_norm": 0.011252596974372864,
"learning_rate": 2.7609118541033434e-05,
"loss": 0.0014,
"step": 110500
},
{
"epoch": 2.2492401215805473,
"grad_norm": 0.045478031039237976,
"learning_rate": 2.7507801418439714e-05,
"loss": 0.0015,
"step": 111000
},
{
"epoch": 2.259371833839919,
"grad_norm": 0.025144068524241447,
"learning_rate": 2.7406484295845998e-05,
"loss": 0.0017,
"step": 111500
},
{
"epoch": 2.269503546099291,
"grad_norm": 0.009457019157707691,
"learning_rate": 2.7305167173252282e-05,
"loss": 0.0015,
"step": 112000
},
{
"epoch": 2.2796352583586628,
"grad_norm": 0.003307552542537451,
"learning_rate": 2.7203850050658563e-05,
"loss": 0.0018,
"step": 112500
},
{
"epoch": 2.2897669706180346,
"grad_norm": 0.0021774822380393744,
"learning_rate": 2.7102532928064843e-05,
"loss": 0.0009,
"step": 113000
},
{
"epoch": 2.2998986828774064,
"grad_norm": 0.03337857872247696,
"learning_rate": 2.7001215805471124e-05,
"loss": 0.0012,
"step": 113500
},
{
"epoch": 2.310030395136778,
"grad_norm": 0.002895305398851633,
"learning_rate": 2.6899898682877407e-05,
"loss": 0.0015,
"step": 114000
},
{
"epoch": 2.32016210739615,
"grad_norm": 0.00023546746524516493,
"learning_rate": 2.6798581560283688e-05,
"loss": 0.0013,
"step": 114500
},
{
"epoch": 2.330293819655522,
"grad_norm": 0.001964944414794445,
"learning_rate": 2.669726443768997e-05,
"loss": 0.002,
"step": 115000
},
{
"epoch": 2.3404255319148937,
"grad_norm": 0.00665094843134284,
"learning_rate": 2.6595947315096252e-05,
"loss": 0.0022,
"step": 115500
},
{
"epoch": 2.3505572441742655,
"grad_norm": 0.015557374805212021,
"learning_rate": 2.6494630192502533e-05,
"loss": 0.002,
"step": 116000
},
{
"epoch": 2.3606889564336373,
"grad_norm": 0.00035607043537311256,
"learning_rate": 2.6393313069908816e-05,
"loss": 0.0014,
"step": 116500
},
{
"epoch": 2.370820668693009,
"grad_norm": 2.188824415206909,
"learning_rate": 2.6291995947315097e-05,
"loss": 0.0012,
"step": 117000
},
{
"epoch": 2.380952380952381,
"grad_norm": 0.005363579839468002,
"learning_rate": 2.619067882472138e-05,
"loss": 0.0018,
"step": 117500
},
{
"epoch": 2.3910840932117527,
"grad_norm": 0.014328660443425179,
"learning_rate": 2.608936170212766e-05,
"loss": 0.0017,
"step": 118000
},
{
"epoch": 2.4012158054711246,
"grad_norm": 0.9693813920021057,
"learning_rate": 2.598804457953394e-05,
"loss": 0.0019,
"step": 118500
},
{
"epoch": 2.4113475177304964,
"grad_norm": 0.002820476656779647,
"learning_rate": 2.5886727456940226e-05,
"loss": 0.0019,
"step": 119000
},
{
"epoch": 2.421479229989868,
"grad_norm": 0.000413126457715407,
"learning_rate": 2.5785410334346506e-05,
"loss": 0.0011,
"step": 119500
},
{
"epoch": 2.43161094224924,
"grad_norm": 0.0005565093597397208,
"learning_rate": 2.5684093211752786e-05,
"loss": 0.0014,
"step": 120000
},
{
"epoch": 2.441742654508612,
"grad_norm": 0.1878264844417572,
"learning_rate": 2.558277608915907e-05,
"loss": 0.0013,
"step": 120500
},
{
"epoch": 2.4518743667679836,
"grad_norm": 0.0014664519112557173,
"learning_rate": 2.5481458966565354e-05,
"loss": 0.0014,
"step": 121000
},
{
"epoch": 2.4620060790273555,
"grad_norm": 0.002391642890870571,
"learning_rate": 2.5380141843971635e-05,
"loss": 0.0022,
"step": 121500
},
{
"epoch": 2.4721377912867273,
"grad_norm": 0.0009669638238847256,
"learning_rate": 2.5278824721377915e-05,
"loss": 0.0028,
"step": 122000
},
{
"epoch": 2.482269503546099,
"grad_norm": 0.001344940159469843,
"learning_rate": 2.5177507598784196e-05,
"loss": 0.0021,
"step": 122500
},
{
"epoch": 2.4924012158054714,
"grad_norm": 0.048559609800577164,
"learning_rate": 2.5076190476190476e-05,
"loss": 0.0012,
"step": 123000
},
{
"epoch": 2.502532928064843,
"grad_norm": 0.003990110941231251,
"learning_rate": 2.497487335359676e-05,
"loss": 0.0017,
"step": 123500
},
{
"epoch": 2.512664640324215,
"grad_norm": 0.030355116352438927,
"learning_rate": 2.487355623100304e-05,
"loss": 0.0016,
"step": 124000
},
{
"epoch": 2.522796352583587,
"grad_norm": 1.8318172693252563,
"learning_rate": 2.4772239108409324e-05,
"loss": 0.0018,
"step": 124500
},
{
"epoch": 2.5329280648429586,
"grad_norm": 0.019709262996912003,
"learning_rate": 2.4670921985815605e-05,
"loss": 0.0017,
"step": 125000
},
{
"epoch": 2.5430597771023304,
"grad_norm": 0.0011386788683012128,
"learning_rate": 2.4569604863221885e-05,
"loss": 0.0011,
"step": 125500
},
{
"epoch": 2.5531914893617023,
"grad_norm": 0.0009728266159072518,
"learning_rate": 2.446828774062817e-05,
"loss": 0.0014,
"step": 126000
},
{
"epoch": 2.563323201621074,
"grad_norm": 0.001098209759220481,
"learning_rate": 2.436697061803445e-05,
"loss": 0.0012,
"step": 126500
},
{
"epoch": 2.573454913880446,
"grad_norm": 0.0009475924889557064,
"learning_rate": 2.426565349544073e-05,
"loss": 0.001,
"step": 127000
},
{
"epoch": 2.5835866261398177,
"grad_norm": 0.0004112945171073079,
"learning_rate": 2.4164336372847014e-05,
"loss": 0.0007,
"step": 127500
},
{
"epoch": 2.5937183383991895,
"grad_norm": 0.0023858449421823025,
"learning_rate": 2.4063019250253294e-05,
"loss": 0.0021,
"step": 128000
},
{
"epoch": 2.6038500506585613,
"grad_norm": 0.018019314855337143,
"learning_rate": 2.3961702127659575e-05,
"loss": 0.0013,
"step": 128500
},
{
"epoch": 2.613981762917933,
"grad_norm": 0.0006694953772239387,
"learning_rate": 2.386038500506586e-05,
"loss": 0.002,
"step": 129000
},
{
"epoch": 2.624113475177305,
"grad_norm": 0.0005967771867290139,
"learning_rate": 2.375906788247214e-05,
"loss": 0.0007,
"step": 129500
},
{
"epoch": 2.634245187436677,
"grad_norm": 0.001148981973528862,
"learning_rate": 2.365775075987842e-05,
"loss": 0.0018,
"step": 130000
},
{
"epoch": 2.6443768996960486,
"grad_norm": 0.005810345523059368,
"learning_rate": 2.3556433637284703e-05,
"loss": 0.0014,
"step": 130500
},
{
"epoch": 2.6545086119554204,
"grad_norm": 0.012930807657539845,
"learning_rate": 2.3455116514690984e-05,
"loss": 0.0012,
"step": 131000
},
{
"epoch": 2.6646403242147922,
"grad_norm": 0.0818137601017952,
"learning_rate": 2.3353799392097264e-05,
"loss": 0.0019,
"step": 131500
},
{
"epoch": 2.674772036474164,
"grad_norm": 0.010759086348116398,
"learning_rate": 2.3252482269503548e-05,
"loss": 0.0009,
"step": 132000
},
{
"epoch": 2.684903748733536,
"grad_norm": 0.05643817409873009,
"learning_rate": 2.315116514690983e-05,
"loss": 0.0012,
"step": 132500
},
{
"epoch": 2.6950354609929077,
"grad_norm": 0.004631399642676115,
"learning_rate": 2.3049848024316112e-05,
"loss": 0.0008,
"step": 133000
},
{
"epoch": 2.7051671732522795,
"grad_norm": 0.03901492431759834,
"learning_rate": 2.2948530901722393e-05,
"loss": 0.0019,
"step": 133500
},
{
"epoch": 2.7152988855116513,
"grad_norm": 0.00697283074259758,
"learning_rate": 2.2847213779128673e-05,
"loss": 0.0013,
"step": 134000
},
{
"epoch": 2.725430597771023,
"grad_norm": 0.008329696953296661,
"learning_rate": 2.2745896656534957e-05,
"loss": 0.0014,
"step": 134500
},
{
"epoch": 2.735562310030395,
"grad_norm": 0.00027154709096066654,
"learning_rate": 2.2644579533941238e-05,
"loss": 0.0016,
"step": 135000
},
{
"epoch": 2.745694022289767,
"grad_norm": 0.0021239016205072403,
"learning_rate": 2.2543262411347518e-05,
"loss": 0.0016,
"step": 135500
},
{
"epoch": 2.7558257345491386,
"grad_norm": 0.05723918229341507,
"learning_rate": 2.2441945288753802e-05,
"loss": 0.0024,
"step": 136000
},
{
"epoch": 2.7659574468085104,
"grad_norm": 0.0015372316120192409,
"learning_rate": 2.2340628166160082e-05,
"loss": 0.0017,
"step": 136500
},
{
"epoch": 2.7760891590678822,
"grad_norm": 0.009359728544950485,
"learning_rate": 2.2239311043566363e-05,
"loss": 0.0016,
"step": 137000
},
{
"epoch": 2.786220871327254,
"grad_norm": 0.000444738136138767,
"learning_rate": 2.2137993920972647e-05,
"loss": 0.0012,
"step": 137500
},
{
"epoch": 2.7963525835866263,
"grad_norm": 0.0034484388306736946,
"learning_rate": 2.2036676798378927e-05,
"loss": 0.001,
"step": 138000
},
{
"epoch": 2.806484295845998,
"grad_norm": 0.011139455251395702,
"learning_rate": 2.1935359675785208e-05,
"loss": 0.0012,
"step": 138500
},
{
"epoch": 2.81661600810537,
"grad_norm": 0.004759063478559256,
"learning_rate": 2.183404255319149e-05,
"loss": 0.0016,
"step": 139000
},
{
"epoch": 2.8267477203647418,
"grad_norm": 0.0018992675468325615,
"learning_rate": 2.1732725430597772e-05,
"loss": 0.0016,
"step": 139500
},
{
"epoch": 2.8368794326241136,
"grad_norm": 0.04305073618888855,
"learning_rate": 2.1631408308004052e-05,
"loss": 0.0007,
"step": 140000
},
{
"epoch": 2.8470111448834854,
"grad_norm": 0.0012538008159026504,
"learning_rate": 2.1530091185410336e-05,
"loss": 0.0015,
"step": 140500
},
{
"epoch": 2.857142857142857,
"grad_norm": 0.013261191546916962,
"learning_rate": 2.1428774062816617e-05,
"loss": 0.0012,
"step": 141000
},
{
"epoch": 2.867274569402229,
"grad_norm": 0.0025184724945575,
"learning_rate": 2.1327456940222897e-05,
"loss": 0.0014,
"step": 141500
},
{
"epoch": 2.877406281661601,
"grad_norm": 0.0010861046612262726,
"learning_rate": 2.122613981762918e-05,
"loss": 0.001,
"step": 142000
},
{
"epoch": 2.8875379939209727,
"grad_norm": 0.0007479240885004401,
"learning_rate": 2.112482269503546e-05,
"loss": 0.0011,
"step": 142500
},
{
"epoch": 2.8976697061803445,
"grad_norm": 0.00030417501693591475,
"learning_rate": 2.1023505572441742e-05,
"loss": 0.0013,
"step": 143000
},
{
"epoch": 2.9078014184397163,
"grad_norm": 0.0005445684073492885,
"learning_rate": 2.0922188449848026e-05,
"loss": 0.0011,
"step": 143500
},
{
"epoch": 2.917933130699088,
"grad_norm": 14.009148597717285,
"learning_rate": 2.0820871327254306e-05,
"loss": 0.0009,
"step": 144000
},
{
"epoch": 2.92806484295846,
"grad_norm": 0.0036419560201466084,
"learning_rate": 2.0719554204660587e-05,
"loss": 0.0014,
"step": 144500
},
{
"epoch": 2.9381965552178317,
"grad_norm": 0.00013747498451266438,
"learning_rate": 2.061823708206687e-05,
"loss": 0.0011,
"step": 145000
},
{
"epoch": 2.9483282674772036,
"grad_norm": 2.2749545574188232,
"learning_rate": 2.051691995947315e-05,
"loss": 0.0013,
"step": 145500
},
{
"epoch": 2.9584599797365754,
"grad_norm": 0.003953267820179462,
"learning_rate": 2.041560283687943e-05,
"loss": 0.0013,
"step": 146000
},
{
"epoch": 2.968591691995947,
"grad_norm": 0.00029023364186286926,
"learning_rate": 2.0314285714285715e-05,
"loss": 0.0011,
"step": 146500
},
{
"epoch": 2.978723404255319,
"grad_norm": 0.00021805072901770473,
"learning_rate": 2.0212968591691996e-05,
"loss": 0.0009,
"step": 147000
},
{
"epoch": 2.988855116514691,
"grad_norm": 0.0005817350465804338,
"learning_rate": 2.0111651469098276e-05,
"loss": 0.001,
"step": 147500
},
{
"epoch": 2.998986828774063,
"grad_norm": 0.00011477700900286436,
"learning_rate": 2.001033434650456e-05,
"loss": 0.001,
"step": 148000
},
{
"epoch": 3.0,
"eval_accuracy": 0.9995950160927816,
"eval_f1": 0.9995950488462076,
"eval_loss": 0.0019772385712713003,
"eval_precision": 0.9995952100990623,
"eval_recall": 0.9995950160927816,
"eval_runtime": 378.5667,
"eval_samples_per_second": 185.035,
"eval_steps_per_second": 11.565,
"step": 148050
},
{
"epoch": 3.0091185410334345,
"grad_norm": 0.00040647145942784846,
"learning_rate": 1.990901722391084e-05,
"loss": 0.0005,
"step": 148500
},
{
"epoch": 3.0192502532928063,
"grad_norm": 0.0007454357692040503,
"learning_rate": 1.980770010131712e-05,
"loss": 0.0008,
"step": 149000
},
{
"epoch": 3.0293819655521785,
"grad_norm": 0.0005696163279935718,
"learning_rate": 1.9706382978723405e-05,
"loss": 0.0013,
"step": 149500
},
{
"epoch": 3.0395136778115504,
"grad_norm": 0.012222293764352798,
"learning_rate": 1.9605065856129685e-05,
"loss": 0.0008,
"step": 150000
},
{
"epoch": 3.049645390070922,
"grad_norm": 0.00012108933151466772,
"learning_rate": 1.950374873353597e-05,
"loss": 0.0003,
"step": 150500
},
{
"epoch": 3.059777102330294,
"grad_norm": 0.00036620517494156957,
"learning_rate": 1.940243161094225e-05,
"loss": 0.0006,
"step": 151000
},
{
"epoch": 3.069908814589666,
"grad_norm": 0.08871813118457794,
"learning_rate": 1.930111448834853e-05,
"loss": 0.0006,
"step": 151500
},
{
"epoch": 3.0800405268490376,
"grad_norm": 0.0008102179854176939,
"learning_rate": 1.9199797365754814e-05,
"loss": 0.0011,
"step": 152000
},
{
"epoch": 3.0901722391084094,
"grad_norm": 0.000266701215878129,
"learning_rate": 1.9098480243161094e-05,
"loss": 0.0005,
"step": 152500
},
{
"epoch": 3.1003039513677813,
"grad_norm": 0.00016724316810723394,
"learning_rate": 1.8997163120567378e-05,
"loss": 0.0012,
"step": 153000
},
{
"epoch": 3.110435663627153,
"grad_norm": 0.15629072487354279,
"learning_rate": 1.889584599797366e-05,
"loss": 0.0013,
"step": 153500
},
{
"epoch": 3.120567375886525,
"grad_norm": 0.00016377937572542578,
"learning_rate": 1.879452887537994e-05,
"loss": 0.0005,
"step": 154000
},
{
"epoch": 3.1306990881458967,
"grad_norm": 0.0007321849116124213,
"learning_rate": 1.8693211752786223e-05,
"loss": 0.0007,
"step": 154500
},
{
"epoch": 3.1408308004052685,
"grad_norm": 0.0024997428990900517,
"learning_rate": 1.8591894630192504e-05,
"loss": 0.0014,
"step": 155000
},
{
"epoch": 3.1509625126646403,
"grad_norm": 0.0005427179858088493,
"learning_rate": 1.8490577507598787e-05,
"loss": 0.0006,
"step": 155500
},
{
"epoch": 3.161094224924012,
"grad_norm": 0.0002773651503957808,
"learning_rate": 1.8389260385005068e-05,
"loss": 0.0008,
"step": 156000
},
{
"epoch": 3.171225937183384,
"grad_norm": 9.259460784960538e-05,
"learning_rate": 1.8287943262411348e-05,
"loss": 0.0007,
"step": 156500
},
{
"epoch": 3.181357649442756,
"grad_norm": 0.0008258196176029742,
"learning_rate": 1.8186626139817632e-05,
"loss": 0.0012,
"step": 157000
},
{
"epoch": 3.1914893617021276,
"grad_norm": 0.02231917716562748,
"learning_rate": 1.8085309017223913e-05,
"loss": 0.0005,
"step": 157500
},
{
"epoch": 3.2016210739614994,
"grad_norm": 0.00040244663250632584,
"learning_rate": 1.7983991894630193e-05,
"loss": 0.0004,
"step": 158000
},
{
"epoch": 3.2117527862208712,
"grad_norm": 0.0008584433817304671,
"learning_rate": 1.7882674772036477e-05,
"loss": 0.0007,
"step": 158500
},
{
"epoch": 3.221884498480243,
"grad_norm": 1.4710029363632202,
"learning_rate": 1.7781357649442757e-05,
"loss": 0.0007,
"step": 159000
},
{
"epoch": 3.232016210739615,
"grad_norm": 0.0021673429291695356,
"learning_rate": 1.7680040526849038e-05,
"loss": 0.0013,
"step": 159500
},
{
"epoch": 3.2421479229989867,
"grad_norm": 0.0007125946576707065,
"learning_rate": 1.757872340425532e-05,
"loss": 0.0003,
"step": 160000
},
{
"epoch": 3.2522796352583585,
"grad_norm": 2.0340616703033447,
"learning_rate": 1.7477406281661602e-05,
"loss": 0.0007,
"step": 160500
},
{
"epoch": 3.2624113475177303,
"grad_norm": 0.02512693777680397,
"learning_rate": 1.7376089159067883e-05,
"loss": 0.0013,
"step": 161000
},
{
"epoch": 3.272543059777102,
"grad_norm": 0.0026464995462447405,
"learning_rate": 1.7274772036474166e-05,
"loss": 0.0005,
"step": 161500
},
{
"epoch": 3.282674772036474,
"grad_norm": 0.002218346344307065,
"learning_rate": 1.7173454913880447e-05,
"loss": 0.0003,
"step": 162000
},
{
"epoch": 3.2928064842958458,
"grad_norm": 0.004217283334583044,
"learning_rate": 1.7072137791286727e-05,
"loss": 0.0006,
"step": 162500
},
{
"epoch": 3.3029381965552176,
"grad_norm": 0.09187914431095123,
"learning_rate": 1.697082066869301e-05,
"loss": 0.0008,
"step": 163000
},
{
"epoch": 3.31306990881459,
"grad_norm": 0.004053326323628426,
"learning_rate": 1.6869503546099292e-05,
"loss": 0.0007,
"step": 163500
},
{
"epoch": 3.3232016210739617,
"grad_norm": 0.00016366604540962726,
"learning_rate": 1.6768186423505572e-05,
"loss": 0.0004,
"step": 164000
},
{
"epoch": 3.3333333333333335,
"grad_norm": 0.00019909192633349448,
"learning_rate": 1.6666869300911856e-05,
"loss": 0.0004,
"step": 164500
},
{
"epoch": 3.3434650455927053,
"grad_norm": 0.0006264941766858101,
"learning_rate": 1.6565552178318136e-05,
"loss": 0.0006,
"step": 165000
},
{
"epoch": 3.353596757852077,
"grad_norm": 0.0018695942126214504,
"learning_rate": 1.646423505572442e-05,
"loss": 0.0008,
"step": 165500
},
{
"epoch": 3.363728470111449,
"grad_norm": 0.0007150270394049585,
"learning_rate": 1.63629179331307e-05,
"loss": 0.0008,
"step": 166000
},
{
"epoch": 3.3738601823708207,
"grad_norm": 7.787420327076688e-05,
"learning_rate": 1.626160081053698e-05,
"loss": 0.0004,
"step": 166500
},
{
"epoch": 3.3839918946301926,
"grad_norm": 0.00045614209375344217,
"learning_rate": 1.6160283687943265e-05,
"loss": 0.0006,
"step": 167000
},
{
"epoch": 3.3941236068895644,
"grad_norm": 0.0001107916614273563,
"learning_rate": 1.6058966565349546e-05,
"loss": 0.0003,
"step": 167500
},
{
"epoch": 3.404255319148936,
"grad_norm": 0.008644777350127697,
"learning_rate": 1.5957649442755826e-05,
"loss": 0.0007,
"step": 168000
},
{
"epoch": 3.414387031408308,
"grad_norm": 0.00043247168650850654,
"learning_rate": 1.585633232016211e-05,
"loss": 0.0006,
"step": 168500
},
{
"epoch": 3.42451874366768,
"grad_norm": 0.0008023619302548468,
"learning_rate": 1.575501519756839e-05,
"loss": 0.0004,
"step": 169000
},
{
"epoch": 3.4346504559270516,
"grad_norm": 0.0007962311501614749,
"learning_rate": 1.565369807497467e-05,
"loss": 0.0006,
"step": 169500
},
{
"epoch": 3.4447821681864235,
"grad_norm": 0.00028413927066139877,
"learning_rate": 1.5552380952380955e-05,
"loss": 0.0008,
"step": 170000
},
{
"epoch": 3.4549138804457953,
"grad_norm": 0.00016883590433280915,
"learning_rate": 1.5451063829787235e-05,
"loss": 0.0006,
"step": 170500
},
{
"epoch": 3.465045592705167,
"grad_norm": 0.014377252198755741,
"learning_rate": 1.5349746707193516e-05,
"loss": 0.0006,
"step": 171000
},
{
"epoch": 3.475177304964539,
"grad_norm": 0.010873903520405293,
"learning_rate": 1.52484295845998e-05,
"loss": 0.0007,
"step": 171500
},
{
"epoch": 3.4853090172239107,
"grad_norm": 0.001349785947240889,
"learning_rate": 1.514711246200608e-05,
"loss": 0.0008,
"step": 172000
},
{
"epoch": 3.4954407294832825,
"grad_norm": 6.550106627400964e-05,
"learning_rate": 1.504579533941236e-05,
"loss": 0.0004,
"step": 172500
},
{
"epoch": 3.5055724417426544,
"grad_norm": 0.004185553174465895,
"learning_rate": 1.4944478216818644e-05,
"loss": 0.0005,
"step": 173000
},
{
"epoch": 3.515704154002026,
"grad_norm": 0.0002081769343931228,
"learning_rate": 1.4843161094224925e-05,
"loss": 0.0005,
"step": 173500
},
{
"epoch": 3.5258358662613984,
"grad_norm": 0.0002064239961327985,
"learning_rate": 1.4741843971631205e-05,
"loss": 0.001,
"step": 174000
},
{
"epoch": 3.5359675785207703,
"grad_norm": 0.006560925859957933,
"learning_rate": 1.4640526849037489e-05,
"loss": 0.0006,
"step": 174500
},
{
"epoch": 3.546099290780142,
"grad_norm": 0.001367397839203477,
"learning_rate": 1.453920972644377e-05,
"loss": 0.0006,
"step": 175000
},
{
"epoch": 3.556231003039514,
"grad_norm": 0.0019000001484528184,
"learning_rate": 1.443789260385005e-05,
"loss": 0.0006,
"step": 175500
},
{
"epoch": 3.5663627152988857,
"grad_norm": 0.000535793777089566,
"learning_rate": 1.4336575481256334e-05,
"loss": 0.0009,
"step": 176000
},
{
"epoch": 3.5764944275582575,
"grad_norm": 0.00025122836814261973,
"learning_rate": 1.4235258358662614e-05,
"loss": 0.0004,
"step": 176500
},
{
"epoch": 3.5866261398176293,
"grad_norm": 0.00024198205210268497,
"learning_rate": 1.4133941236068896e-05,
"loss": 0.0004,
"step": 177000
},
{
"epoch": 3.596757852077001,
"grad_norm": 0.00032863879459910095,
"learning_rate": 1.4032624113475179e-05,
"loss": 0.0008,
"step": 177500
},
{
"epoch": 3.606889564336373,
"grad_norm": 0.0001286083715967834,
"learning_rate": 1.3931306990881459e-05,
"loss": 0.0003,
"step": 178000
},
{
"epoch": 3.617021276595745,
"grad_norm": 7.639949035365134e-05,
"learning_rate": 1.3829989868287741e-05,
"loss": 0.0004,
"step": 178500
},
{
"epoch": 3.6271529888551166,
"grad_norm": 0.002776580862700939,
"learning_rate": 1.3728672745694023e-05,
"loss": 0.0009,
"step": 179000
},
{
"epoch": 3.6372847011144884,
"grad_norm": 0.00013075117021799088,
"learning_rate": 1.3627355623100305e-05,
"loss": 0.0004,
"step": 179500
},
{
"epoch": 3.6474164133738602,
"grad_norm": 0.0912652462720871,
"learning_rate": 1.3526038500506586e-05,
"loss": 0.0003,
"step": 180000
},
{
"epoch": 3.657548125633232,
"grad_norm": 0.0013257339596748352,
"learning_rate": 1.3424721377912868e-05,
"loss": 0.0009,
"step": 180500
},
{
"epoch": 3.667679837892604,
"grad_norm": 4.812105544260703e-05,
"learning_rate": 1.332340425531915e-05,
"loss": 0.0005,
"step": 181000
},
{
"epoch": 3.6778115501519757,
"grad_norm": 0.0011839779326692224,
"learning_rate": 1.322208713272543e-05,
"loss": 0.0009,
"step": 181500
},
{
"epoch": 3.6879432624113475,
"grad_norm": 0.0005408598226495087,
"learning_rate": 1.3120770010131715e-05,
"loss": 0.0006,
"step": 182000
},
{
"epoch": 3.6980749746707193,
"grad_norm": 9.703055548015982e-05,
"learning_rate": 1.3019452887537995e-05,
"loss": 0.0005,
"step": 182500
},
{
"epoch": 3.708206686930091,
"grad_norm": 0.0002428248117212206,
"learning_rate": 1.2918135764944275e-05,
"loss": 0.0004,
"step": 183000
},
{
"epoch": 3.718338399189463,
"grad_norm": 0.00048470127512700856,
"learning_rate": 1.281681864235056e-05,
"loss": 0.0007,
"step": 183500
},
{
"epoch": 3.728470111448835,
"grad_norm": 0.00018880152492783964,
"learning_rate": 1.271550151975684e-05,
"loss": 0.0004,
"step": 184000
},
{
"epoch": 3.7386018237082066,
"grad_norm": 0.0866980105638504,
"learning_rate": 1.261418439716312e-05,
"loss": 0.001,
"step": 184500
},
{
"epoch": 3.7487335359675784,
"grad_norm": 0.0004920652718283236,
"learning_rate": 1.2512867274569404e-05,
"loss": 0.0004,
"step": 185000
},
{
"epoch": 3.7588652482269502,
"grad_norm": 0.0006933720433153212,
"learning_rate": 1.2411550151975685e-05,
"loss": 0.0007,
"step": 185500
},
{
"epoch": 3.768996960486322,
"grad_norm": 9.502648754278198e-05,
"learning_rate": 1.2310233029381967e-05,
"loss": 0.0002,
"step": 186000
},
{
"epoch": 3.779128672745694,
"grad_norm": 0.14055226743221283,
"learning_rate": 1.2208915906788247e-05,
"loss": 0.0009,
"step": 186500
},
{
"epoch": 3.7892603850050657,
"grad_norm": 0.0009207709226757288,
"learning_rate": 1.210759878419453e-05,
"loss": 0.0007,
"step": 187000
},
{
"epoch": 3.7993920972644375,
"grad_norm": 0.0017948386957868934,
"learning_rate": 1.2006281661600811e-05,
"loss": 0.0006,
"step": 187500
},
{
"epoch": 3.8095238095238093,
"grad_norm": 0.0010630637407302856,
"learning_rate": 1.1904964539007092e-05,
"loss": 0.0007,
"step": 188000
},
{
"epoch": 3.819655521783181,
"grad_norm": 0.001550987595692277,
"learning_rate": 1.1803647416413374e-05,
"loss": 0.0004,
"step": 188500
},
{
"epoch": 3.829787234042553,
"grad_norm": 0.00022620504023507237,
"learning_rate": 1.1702330293819656e-05,
"loss": 0.001,
"step": 189000
},
{
"epoch": 3.8399189463019248,
"grad_norm": 0.005701087880879641,
"learning_rate": 1.1601013171225937e-05,
"loss": 0.0005,
"step": 189500
},
{
"epoch": 3.850050658561297,
"grad_norm": 0.002242797054350376,
"learning_rate": 1.1499696048632219e-05,
"loss": 0.0005,
"step": 190000
},
{
"epoch": 3.860182370820669,
"grad_norm": 0.000945412612054497,
"learning_rate": 1.1398378926038501e-05,
"loss": 0.0004,
"step": 190500
},
{
"epoch": 3.8703140830800407,
"grad_norm": 0.00039639745955355465,
"learning_rate": 1.1297061803444783e-05,
"loss": 0.0007,
"step": 191000
},
{
"epoch": 3.8804457953394125,
"grad_norm": 0.00015946484927553684,
"learning_rate": 1.1195744680851064e-05,
"loss": 0.0004,
"step": 191500
},
{
"epoch": 3.8905775075987843,
"grad_norm": 0.0012002813164144754,
"learning_rate": 1.1094427558257346e-05,
"loss": 0.0006,
"step": 192000
},
{
"epoch": 3.900709219858156,
"grad_norm": 0.0018312711035832763,
"learning_rate": 1.0993110435663628e-05,
"loss": 0.0006,
"step": 192500
},
{
"epoch": 3.910840932117528,
"grad_norm": 0.0029842143412679434,
"learning_rate": 1.0891793313069908e-05,
"loss": 0.0004,
"step": 193000
},
{
"epoch": 3.9209726443768997,
"grad_norm": 0.00016236377996392548,
"learning_rate": 1.079047619047619e-05,
"loss": 0.0004,
"step": 193500
},
{
"epoch": 3.9311043566362716,
"grad_norm": 4.384133815765381,
"learning_rate": 1.0689159067882473e-05,
"loss": 0.0003,
"step": 194000
},
{
"epoch": 3.9412360688956434,
"grad_norm": 0.0002483314019627869,
"learning_rate": 1.0587841945288753e-05,
"loss": 0.0003,
"step": 194500
},
{
"epoch": 3.951367781155015,
"grad_norm": 4.876391540165059e-05,
"learning_rate": 1.0486524822695035e-05,
"loss": 0.0002,
"step": 195000
},
{
"epoch": 3.961499493414387,
"grad_norm": 6.163517537061125e-05,
"learning_rate": 1.0385207700101318e-05,
"loss": 0.0002,
"step": 195500
},
{
"epoch": 3.971631205673759,
"grad_norm": 0.2390281707048416,
"learning_rate": 1.02838905775076e-05,
"loss": 0.0003,
"step": 196000
},
{
"epoch": 3.9817629179331306,
"grad_norm": 0.0019110542489215732,
"learning_rate": 1.0182573454913882e-05,
"loss": 0.0015,
"step": 196500
},
{
"epoch": 3.9918946301925025,
"grad_norm": 0.001141904853284359,
"learning_rate": 1.0081256332320162e-05,
"loss": 0.0003,
"step": 197000
},
{
"epoch": 4.0,
"eval_accuracy": 0.9997418523632351,
"eval_f1": 0.9997418729241718,
"eval_loss": 0.0016791160451248288,
"eval_precision": 0.9997419693903039,
"eval_recall": 0.9997418523632351,
"eval_runtime": 379.6081,
"eval_samples_per_second": 184.527,
"eval_steps_per_second": 11.533,
"step": 197400
},
{
"epoch": 4.002026342451875,
"grad_norm": 0.0005896133952774107,
"learning_rate": 9.979939209726444e-06,
"loss": 0.0003,
"step": 197500
},
{
"epoch": 4.0121580547112465,
"grad_norm": 0.0067860777489840984,
"learning_rate": 9.878622087132727e-06,
"loss": 0.0006,
"step": 198000
},
{
"epoch": 4.022289766970618,
"grad_norm": 0.001142342109233141,
"learning_rate": 9.777304964539009e-06,
"loss": 0.0001,
"step": 198500
},
{
"epoch": 4.03242147922999,
"grad_norm": 0.0005706630763597786,
"learning_rate": 9.67598784194529e-06,
"loss": 0.0003,
"step": 199000
},
{
"epoch": 4.042553191489362,
"grad_norm": 0.0017065483843907714,
"learning_rate": 9.574670719351571e-06,
"loss": 0.0005,
"step": 199500
},
{
"epoch": 4.052684903748734,
"grad_norm": 0.0004346190544310957,
"learning_rate": 9.473353596757854e-06,
"loss": 0.0003,
"step": 200000
},
{
"epoch": 4.062816616008106,
"grad_norm": 0.00011587599874474108,
"learning_rate": 9.372036474164134e-06,
"loss": 0.0005,
"step": 200500
},
{
"epoch": 4.072948328267477,
"grad_norm": 0.0020452928729355335,
"learning_rate": 9.270719351570416e-06,
"loss": 0.0006,
"step": 201000
},
{
"epoch": 4.083080040526849,
"grad_norm": 0.0030393574852496386,
"learning_rate": 9.169402228976698e-06,
"loss": 0.0001,
"step": 201500
},
{
"epoch": 4.093211752786221,
"grad_norm": 0.00011766282113967463,
"learning_rate": 9.068085106382979e-06,
"loss": 0.0003,
"step": 202000
},
{
"epoch": 4.103343465045593,
"grad_norm": 0.00012966316717211157,
"learning_rate": 8.966767983789261e-06,
"loss": 0.0002,
"step": 202500
},
{
"epoch": 4.113475177304965,
"grad_norm": 0.0005606951890513301,
"learning_rate": 8.865450861195543e-06,
"loss": 0.0002,
"step": 203000
},
{
"epoch": 4.1236068895643365,
"grad_norm": 6.66538835503161e-05,
"learning_rate": 8.764133738601824e-06,
"loss": 0.0004,
"step": 203500
},
{
"epoch": 4.133738601823708,
"grad_norm": 0.00035967957228422165,
"learning_rate": 8.662816616008106e-06,
"loss": 0.0003,
"step": 204000
},
{
"epoch": 4.14387031408308,
"grad_norm": 3.555602233973332e-05,
"learning_rate": 8.561499493414388e-06,
"loss": 0.0002,
"step": 204500
},
{
"epoch": 4.154002026342452,
"grad_norm": 7.583157275803387e-05,
"learning_rate": 8.460182370820668e-06,
"loss": 0.0003,
"step": 205000
},
{
"epoch": 4.164133738601824,
"grad_norm": 0.00014735900913365185,
"learning_rate": 8.35886524822695e-06,
"loss": 0.0003,
"step": 205500
},
{
"epoch": 4.174265450861196,
"grad_norm": 0.00036512804217636585,
"learning_rate": 8.257548125633233e-06,
"loss": 0.0002,
"step": 206000
},
{
"epoch": 4.184397163120567,
"grad_norm": 0.0002252118574688211,
"learning_rate": 8.156231003039515e-06,
"loss": 0.0009,
"step": 206500
},
{
"epoch": 4.194528875379939,
"grad_norm": 0.00020840394427068532,
"learning_rate": 8.054913880445795e-06,
"loss": 0.0002,
"step": 207000
},
{
"epoch": 4.204660587639311,
"grad_norm": 0.004410985857248306,
"learning_rate": 7.953596757852077e-06,
"loss": 0.0005,
"step": 207500
},
{
"epoch": 4.214792299898683,
"grad_norm": 0.00022406030620913953,
"learning_rate": 7.85227963525836e-06,
"loss": 0.0002,
"step": 208000
},
{
"epoch": 4.224924012158055,
"grad_norm": 9.579696779837832e-05,
"learning_rate": 7.75096251266464e-06,
"loss": 0.0001,
"step": 208500
},
{
"epoch": 4.2350557244174265,
"grad_norm": 0.0025997899938374758,
"learning_rate": 7.649645390070922e-06,
"loss": 0.0002,
"step": 209000
},
{
"epoch": 4.245187436676798,
"grad_norm": 0.00013335005496628582,
"learning_rate": 7.548328267477204e-06,
"loss": 0.0001,
"step": 209500
},
{
"epoch": 4.25531914893617,
"grad_norm": 0.0007246573222801089,
"learning_rate": 7.447011144883486e-06,
"loss": 0.0006,
"step": 210000
},
{
"epoch": 4.265450861195542,
"grad_norm": 0.00013572497118730098,
"learning_rate": 7.345694022289768e-06,
"loss": 0.0003,
"step": 210500
},
{
"epoch": 4.275582573454914,
"grad_norm": 0.0002135665126843378,
"learning_rate": 7.24437689969605e-06,
"loss": 0.0001,
"step": 211000
},
{
"epoch": 4.285714285714286,
"grad_norm": 6.381842831615359e-05,
"learning_rate": 7.14305977710233e-06,
"loss": 0.0001,
"step": 211500
},
{
"epoch": 4.295845997973657,
"grad_norm": 3.025340811291244e-05,
"learning_rate": 7.0417426545086126e-06,
"loss": 0.0001,
"step": 212000
},
{
"epoch": 4.305977710233029,
"grad_norm": 3.8568006857531145e-05,
"learning_rate": 6.940425531914895e-06,
"loss": 0.0001,
"step": 212500
},
{
"epoch": 4.316109422492401,
"grad_norm": 3.65682462870609e-05,
"learning_rate": 6.839108409321175e-06,
"loss": 0.0003,
"step": 213000
},
{
"epoch": 4.326241134751773,
"grad_norm": 2.7566075004870072e-05,
"learning_rate": 6.737791286727457e-06,
"loss": 0.0001,
"step": 213500
},
{
"epoch": 4.336372847011145,
"grad_norm": 8.40180873638019e-05,
"learning_rate": 6.6364741641337395e-06,
"loss": 0.0001,
"step": 214000
},
{
"epoch": 4.3465045592705165,
"grad_norm": 0.0006221074727363884,
"learning_rate": 6.53515704154002e-06,
"loss": 0.0003,
"step": 214500
},
{
"epoch": 4.356636271529888,
"grad_norm": 2.8866035790997557e-05,
"learning_rate": 6.433839918946302e-06,
"loss": 0.0001,
"step": 215000
},
{
"epoch": 4.36676798378926,
"grad_norm": 5.898380756378174,
"learning_rate": 6.332522796352584e-06,
"loss": 0.0001,
"step": 215500
},
{
"epoch": 4.376899696048632,
"grad_norm": 2.5054974685190246e-05,
"learning_rate": 6.231205673758866e-06,
"loss": 0.0002,
"step": 216000
},
{
"epoch": 4.387031408308004,
"grad_norm": 7.587042637169361e-05,
"learning_rate": 6.129888551165147e-06,
"loss": 0.0004,
"step": 216500
},
{
"epoch": 4.397163120567376,
"grad_norm": 4.567088762996718e-05,
"learning_rate": 6.028571428571428e-06,
"loss": 0.0002,
"step": 217000
},
{
"epoch": 4.407294832826747,
"grad_norm": 5.717075691791251e-05,
"learning_rate": 5.92725430597771e-06,
"loss": 0.0005,
"step": 217500
},
{
"epoch": 4.417426545086119,
"grad_norm": 0.0001339384471066296,
"learning_rate": 5.825937183383992e-06,
"loss": 0.0002,
"step": 218000
},
{
"epoch": 4.427558257345491,
"grad_norm": 0.0016710502095520496,
"learning_rate": 5.724620060790274e-06,
"loss": 0.0002,
"step": 218500
},
{
"epoch": 4.437689969604863,
"grad_norm": 4.579993037623353e-05,
"learning_rate": 5.623302938196556e-06,
"loss": 0.0001,
"step": 219000
},
{
"epoch": 4.447821681864235,
"grad_norm": 0.00017921006656251848,
"learning_rate": 5.521985815602837e-06,
"loss": 0.0002,
"step": 219500
},
{
"epoch": 4.4579533941236065,
"grad_norm": 7.257221295731142e-05,
"learning_rate": 5.4206686930091195e-06,
"loss": 0.0,
"step": 220000
},
{
"epoch": 4.468085106382979,
"grad_norm": 0.00014980675769038498,
"learning_rate": 5.319351570415401e-06,
"loss": 0.0003,
"step": 220500
},
{
"epoch": 4.47821681864235,
"grad_norm": 0.00026508086011745036,
"learning_rate": 5.218034447821682e-06,
"loss": 0.0,
"step": 221000
},
{
"epoch": 4.488348530901723,
"grad_norm": 3.1956707971403375e-05,
"learning_rate": 5.116717325227964e-06,
"loss": 0.0002,
"step": 221500
},
{
"epoch": 4.498480243161095,
"grad_norm": 0.00025195363559760153,
"learning_rate": 5.0154002026342455e-06,
"loss": 0.0002,
"step": 222000
},
{
"epoch": 4.508611955420466,
"grad_norm": 2.8796304832212627e-05,
"learning_rate": 4.914083080040527e-06,
"loss": 0.0003,
"step": 222500
},
{
"epoch": 4.518743667679838,
"grad_norm": 7.115295738913119e-05,
"learning_rate": 4.812765957446809e-06,
"loss": 0.0002,
"step": 223000
},
{
"epoch": 4.52887537993921,
"grad_norm": 0.00043551792623475194,
"learning_rate": 4.71144883485309e-06,
"loss": 0.0001,
"step": 223500
},
{
"epoch": 4.539007092198582,
"grad_norm": 0.00012999169121030718,
"learning_rate": 4.610131712259372e-06,
"loss": 0.0003,
"step": 224000
},
{
"epoch": 4.549138804457954,
"grad_norm": 9.113108535530046e-05,
"learning_rate": 4.508814589665654e-06,
"loss": 0.0001,
"step": 224500
},
{
"epoch": 4.5592705167173255,
"grad_norm": 3.268069849582389e-05,
"learning_rate": 4.407497467071935e-06,
"loss": 0.0001,
"step": 225000
},
{
"epoch": 4.569402228976697,
"grad_norm": 3.147554525639862e-05,
"learning_rate": 4.306180344478216e-06,
"loss": 0.0002,
"step": 225500
},
{
"epoch": 4.579533941236069,
"grad_norm": 3.103091876255348e-05,
"learning_rate": 4.2048632218844985e-06,
"loss": 0.0003,
"step": 226000
},
{
"epoch": 4.589665653495441,
"grad_norm": 4.341394014772959e-05,
"learning_rate": 4.10354609929078e-06,
"loss": 0.0002,
"step": 226500
},
{
"epoch": 4.599797365754813,
"grad_norm": 0.006706151645630598,
"learning_rate": 4.002228976697062e-06,
"loss": 0.0007,
"step": 227000
},
{
"epoch": 4.609929078014185,
"grad_norm": 4.3482647015480325e-05,
"learning_rate": 3.900911854103344e-06,
"loss": 0.0,
"step": 227500
},
{
"epoch": 4.620060790273556,
"grad_norm": 7.099405775079504e-05,
"learning_rate": 3.799594731509625e-06,
"loss": 0.0001,
"step": 228000
},
{
"epoch": 4.630192502532928,
"grad_norm": 2.243010931124445e-05,
"learning_rate": 3.6982776089159072e-06,
"loss": 0.0002,
"step": 228500
},
{
"epoch": 4.6403242147923,
"grad_norm": 9.873649833025411e-05,
"learning_rate": 3.5969604863221885e-06,
"loss": 0.0001,
"step": 229000
},
{
"epoch": 4.650455927051672,
"grad_norm": 3.25652799801901e-05,
"learning_rate": 3.4956433637284703e-06,
"loss": 0.0001,
"step": 229500
},
{
"epoch": 4.660587639311044,
"grad_norm": 4.1377668821951374e-05,
"learning_rate": 3.3943262411347524e-06,
"loss": 0.0002,
"step": 230000
},
{
"epoch": 4.6707193515704155,
"grad_norm": 5.47610288776923e-05,
"learning_rate": 3.2930091185410337e-06,
"loss": 0.0003,
"step": 230500
},
{
"epoch": 4.680851063829787,
"grad_norm": 6.64242179482244e-05,
"learning_rate": 3.191691995947315e-06,
"loss": 0.0001,
"step": 231000
},
{
"epoch": 4.690982776089159,
"grad_norm": 7.267168984981254e-05,
"learning_rate": 3.0903748733535968e-06,
"loss": 0.0,
"step": 231500
},
{
"epoch": 4.701114488348531,
"grad_norm": 2.0616351321223192e-05,
"learning_rate": 2.9890577507598785e-06,
"loss": 0.0001,
"step": 232000
},
{
"epoch": 4.711246200607903,
"grad_norm": 0.0016979483189061284,
"learning_rate": 2.8877406281661602e-06,
"loss": 0.0003,
"step": 232500
},
{
"epoch": 4.721377912867275,
"grad_norm": 5.4885382269276306e-05,
"learning_rate": 2.7864235055724415e-06,
"loss": 0.0001,
"step": 233000
},
{
"epoch": 4.731509625126646,
"grad_norm": 2.725724334595725e-05,
"learning_rate": 2.6851063829787233e-06,
"loss": 0.0002,
"step": 233500
},
{
"epoch": 4.741641337386018,
"grad_norm": 0.0002646965440362692,
"learning_rate": 2.5837892603850054e-06,
"loss": 0.0001,
"step": 234000
},
{
"epoch": 4.75177304964539,
"grad_norm": 0.0002549967903178185,
"learning_rate": 2.482472137791287e-06,
"loss": 0.0001,
"step": 234500
},
{
"epoch": 4.761904761904762,
"grad_norm": 0.001608754275366664,
"learning_rate": 2.3811550151975685e-06,
"loss": 0.0002,
"step": 235000
},
{
"epoch": 4.772036474164134,
"grad_norm": 0.00016235760995186865,
"learning_rate": 2.27983789260385e-06,
"loss": 0.0002,
"step": 235500
},
{
"epoch": 4.7821681864235055,
"grad_norm": 2.1720326913055032e-05,
"learning_rate": 2.178520770010132e-06,
"loss": 0.0002,
"step": 236000
},
{
"epoch": 4.792299898682877,
"grad_norm": 2.442936965962872e-05,
"learning_rate": 2.0772036474164132e-06,
"loss": 0.0001,
"step": 236500
},
{
"epoch": 4.802431610942249,
"grad_norm": 4.4601965782931075e-05,
"learning_rate": 1.975886524822695e-06,
"loss": 0.0,
"step": 237000
},
{
"epoch": 4.812563323201621,
"grad_norm": 4.2209729144815356e-05,
"learning_rate": 1.874569402228977e-06,
"loss": 0.0,
"step": 237500
},
{
"epoch": 4.822695035460993,
"grad_norm": 0.00013748419587500393,
"learning_rate": 1.7732522796352587e-06,
"loss": 0.0002,
"step": 238000
},
{
"epoch": 4.832826747720365,
"grad_norm": 3.161181302857585e-05,
"learning_rate": 1.67193515704154e-06,
"loss": 0.0,
"step": 238500
},
{
"epoch": 4.842958459979736,
"grad_norm": 0.0026995555963367224,
"learning_rate": 1.5706180344478217e-06,
"loss": 0.0,
"step": 239000
},
{
"epoch": 4.853090172239108,
"grad_norm": 0.00017810733697842807,
"learning_rate": 1.4693009118541034e-06,
"loss": 0.0001,
"step": 239500
},
{
"epoch": 4.86322188449848,
"grad_norm": 0.0014064594870433211,
"learning_rate": 1.3679837892603852e-06,
"loss": 0.0001,
"step": 240000
},
{
"epoch": 4.873353596757852,
"grad_norm": 1.7768637917470187e-05,
"learning_rate": 1.2666666666666667e-06,
"loss": 0.0,
"step": 240500
},
{
"epoch": 4.883485309017224,
"grad_norm": 0.00014705142530146986,
"learning_rate": 1.1653495440729484e-06,
"loss": 0.0001,
"step": 241000
},
{
"epoch": 4.8936170212765955,
"grad_norm": 1.4760345038666856e-05,
"learning_rate": 1.06403242147923e-06,
"loss": 0.0001,
"step": 241500
},
{
"epoch": 4.903748733535967,
"grad_norm": 3.4190128644695505e-05,
"learning_rate": 9.627152988855117e-07,
"loss": 0.0001,
"step": 242000
},
{
"epoch": 4.913880445795339,
"grad_norm": 1.7926526197697967e-05,
"learning_rate": 8.613981762917934e-07,
"loss": 0.0001,
"step": 242500
},
{
"epoch": 4.924012158054711,
"grad_norm": 0.00015696664922870696,
"learning_rate": 7.60081053698075e-07,
"loss": 0.0001,
"step": 243000
},
{
"epoch": 4.934143870314083,
"grad_norm": 2.8205437047290616e-05,
"learning_rate": 6.587639311043567e-07,
"loss": 0.0002,
"step": 243500
},
{
"epoch": 4.944275582573455,
"grad_norm": 8.939866529544815e-05,
"learning_rate": 5.574468085106383e-07,
"loss": 0.0001,
"step": 244000
},
{
"epoch": 4.954407294832826,
"grad_norm": 4.093222742085345e-05,
"learning_rate": 4.5612968591691996e-07,
"loss": 0.0002,
"step": 244500
},
{
"epoch": 4.964539007092198,
"grad_norm": 4.5221910113468766e-05,
"learning_rate": 3.5481256332320164e-07,
"loss": 0.0001,
"step": 245000
},
{
"epoch": 4.97467071935157,
"grad_norm": 1.8222008293378167e-05,
"learning_rate": 2.5349544072948327e-07,
"loss": 0.0003,
"step": 245500
},
{
"epoch": 4.984802431610943,
"grad_norm": 2.017403494392056e-05,
"learning_rate": 1.5217831813576495e-07,
"loss": 0.0001,
"step": 246000
},
{
"epoch": 4.994934143870314,
"grad_norm": 5.0148733862442896e-05,
"learning_rate": 5.0861195542046605e-08,
"loss": 0.0,
"step": 246500
},
{
"epoch": 5.0,
"eval_accuracy": 0.9997915872290338,
"eval_f1": 0.9997916000286831,
"eval_loss": 0.0014928707387298346,
"eval_precision": 0.9997916658440986,
"eval_recall": 0.9997915872290338,
"eval_runtime": 378.3235,
"eval_samples_per_second": 185.154,
"eval_steps_per_second": 11.572,
"step": 246750
}
],
"logging_steps": 500,
"max_steps": 246750,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.579005974361536e+17,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}