{ "best_global_step": 21448, "best_metric": 0.9201277955271566, "best_model_checkpoint": "outputs\\kpf-kdpii-ner\\checkpoint-21448", "epoch": 4.0, "eval_steps": 500, "global_step": 21448, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.009324878776575904, "grad_norm": 4.206214904785156, "learning_rate": 4.568764568764569e-07, "loss": 4.162920837402344, "step": 50 }, { "epoch": 0.01864975755315181, "grad_norm": 4.167344570159912, "learning_rate": 9.230769230769232e-07, "loss": 4.108631591796875, "step": 100 }, { "epoch": 0.027974636329727715, "grad_norm": 6.085664749145508, "learning_rate": 1.3892773892773895e-06, "loss": 3.964860534667969, "step": 150 }, { "epoch": 0.03729951510630362, "grad_norm": 9.482864379882812, "learning_rate": 1.8554778554778559e-06, "loss": 3.5463021850585936, "step": 200 }, { "epoch": 0.04662439388287952, "grad_norm": 10.59736156463623, "learning_rate": 2.321678321678322e-06, "loss": 2.1147555541992187, "step": 250 }, { "epoch": 0.05594927265945543, "grad_norm": 6.983637809753418, "learning_rate": 2.7878787878787885e-06, "loss": 0.868929214477539, "step": 300 }, { "epoch": 0.06527415143603134, "grad_norm": 5.8941168785095215, "learning_rate": 3.254079254079254e-06, "loss": 0.6754582214355469, "step": 350 }, { "epoch": 0.07459903021260723, "grad_norm": 6.817300319671631, "learning_rate": 3.7202797202797207e-06, "loss": 0.6864476013183594, "step": 400 }, { "epoch": 0.08392390898918314, "grad_norm": 3.412003517150879, "learning_rate": 4.186480186480187e-06, "loss": 0.6888908386230469, "step": 450 }, { "epoch": 0.09324878776575904, "grad_norm": 4.4841694831848145, "learning_rate": 4.652680652680653e-06, "loss": 0.6424143981933593, "step": 500 }, { "epoch": 0.10257366654233495, "grad_norm": 0.5611337423324585, "learning_rate": 5.118881118881119e-06, "loss": 0.6713462066650391, "step": 550 }, { "epoch": 0.11189854531891086, "grad_norm": 2.851097583770752, "learning_rate": 5.585081585081585e-06, "loss": 0.557415771484375, "step": 600 }, { "epoch": 0.12122342409548675, "grad_norm": 1.917966604232788, "learning_rate": 6.051282051282051e-06, "loss": 0.5822576141357422, "step": 650 }, { "epoch": 0.13054830287206268, "grad_norm": 0.473528116941452, "learning_rate": 6.517482517482518e-06, "loss": 0.5558326721191407, "step": 700 }, { "epoch": 0.13987318164863857, "grad_norm": 5.384398460388184, "learning_rate": 6.983682983682984e-06, "loss": 0.5160323715209961, "step": 750 }, { "epoch": 0.14919806042521447, "grad_norm": 3.535067558288574, "learning_rate": 7.44988344988345e-06, "loss": 0.5048127365112305, "step": 800 }, { "epoch": 0.15852293920179036, "grad_norm": 2.199418306350708, "learning_rate": 7.916083916083917e-06, "loss": 0.5268861770629882, "step": 850 }, { "epoch": 0.1678478179783663, "grad_norm": 5.3116774559021, "learning_rate": 8.382284382284382e-06, "loss": 0.504549446105957, "step": 900 }, { "epoch": 0.17717269675494218, "grad_norm": 5.913183689117432, "learning_rate": 8.84848484848485e-06, "loss": 0.4870978546142578, "step": 950 }, { "epoch": 0.18649757553151808, "grad_norm": 1.0376594066619873, "learning_rate": 9.314685314685316e-06, "loss": 0.4489225769042969, "step": 1000 }, { "epoch": 0.195822454308094, "grad_norm": 6.973942279815674, "learning_rate": 9.780885780885782e-06, "loss": 0.3977284240722656, "step": 1050 }, { "epoch": 0.2051473330846699, "grad_norm": 3.3436081409454346, "learning_rate": 1.0247086247086249e-05, "loss": 0.38577865600585937, "step": 1100 }, { "epoch": 0.2144722118612458, "grad_norm": 11.863536834716797, "learning_rate": 1.0713286713286714e-05, "loss": 0.3806105422973633, "step": 1150 }, { "epoch": 0.22379709063782172, "grad_norm": 5.786470413208008, "learning_rate": 1.117948717948718e-05, "loss": 0.37290851593017577, "step": 1200 }, { "epoch": 0.2331219694143976, "grad_norm": 3.0955147743225098, "learning_rate": 1.1645687645687646e-05, "loss": 0.33695747375488283, "step": 1250 }, { "epoch": 0.2424468481909735, "grad_norm": 10.411843299865723, "learning_rate": 1.2111888111888113e-05, "loss": 0.3169963836669922, "step": 1300 }, { "epoch": 0.2517717269675494, "grad_norm": 1.3671921491622925, "learning_rate": 1.2578088578088578e-05, "loss": 0.274707088470459, "step": 1350 }, { "epoch": 0.26109660574412535, "grad_norm": 0.8266241550445557, "learning_rate": 1.3044289044289045e-05, "loss": 0.24858610153198243, "step": 1400 }, { "epoch": 0.27042148452070125, "grad_norm": 3.4048688411712646, "learning_rate": 1.351048951048951e-05, "loss": 0.2767606163024902, "step": 1450 }, { "epoch": 0.27974636329727715, "grad_norm": 5.141544342041016, "learning_rate": 1.3976689976689979e-05, "loss": 0.23059135437011719, "step": 1500 }, { "epoch": 0.28907124207385304, "grad_norm": 2.9960217475891113, "learning_rate": 1.4442890442890444e-05, "loss": 0.2576522636413574, "step": 1550 }, { "epoch": 0.29839612085042894, "grad_norm": 7.788145542144775, "learning_rate": 1.4909090909090911e-05, "loss": 0.21580177307128906, "step": 1600 }, { "epoch": 0.30772099962700483, "grad_norm": 1.1988259553909302, "learning_rate": 1.5375291375291378e-05, "loss": 0.20308712005615234, "step": 1650 }, { "epoch": 0.3170458784035807, "grad_norm": 1.3631523847579956, "learning_rate": 1.5841491841491843e-05, "loss": 0.17964347839355468, "step": 1700 }, { "epoch": 0.3263707571801567, "grad_norm": 0.7174279689788818, "learning_rate": 1.630769230769231e-05, "loss": 0.18456392288208007, "step": 1750 }, { "epoch": 0.3356956359567326, "grad_norm": 2.560981273651123, "learning_rate": 1.6773892773892774e-05, "loss": 0.16574619293212892, "step": 1800 }, { "epoch": 0.34502051473330847, "grad_norm": 0.93900465965271, "learning_rate": 1.724009324009324e-05, "loss": 0.16731924057006836, "step": 1850 }, { "epoch": 0.35434539350988437, "grad_norm": 3.983893394470215, "learning_rate": 1.7706293706293708e-05, "loss": 0.14913288116455078, "step": 1900 }, { "epoch": 0.36367027228646026, "grad_norm": 3.581357479095459, "learning_rate": 1.8172494172494176e-05, "loss": 0.1477263832092285, "step": 1950 }, { "epoch": 0.37299515106303616, "grad_norm": 1.5302927494049072, "learning_rate": 1.8638694638694642e-05, "loss": 0.139080171585083, "step": 2000 }, { "epoch": 0.3823200298396121, "grad_norm": 4.7187910079956055, "learning_rate": 1.9104895104895107e-05, "loss": 0.1504351806640625, "step": 2050 }, { "epoch": 0.391644908616188, "grad_norm": 0.5396754145622253, "learning_rate": 1.9571095571095572e-05, "loss": 0.12960749626159668, "step": 2100 }, { "epoch": 0.4009697873927639, "grad_norm": 25.248533248901367, "learning_rate": 1.9995855566492257e-05, "loss": 0.14916876792907716, "step": 2150 }, { "epoch": 0.4102946661693398, "grad_norm": 6.426814079284668, "learning_rate": 1.9944050147645447e-05, "loss": 0.1148387622833252, "step": 2200 }, { "epoch": 0.4196195449459157, "grad_norm": 2.097109317779541, "learning_rate": 1.9892244728798633e-05, "loss": 0.11595455169677735, "step": 2250 }, { "epoch": 0.4289444237224916, "grad_norm": 1.893579125404358, "learning_rate": 1.9840439309951823e-05, "loss": 0.10183592796325684, "step": 2300 }, { "epoch": 0.43826930249906754, "grad_norm": 5.724792003631592, "learning_rate": 1.9788633891105013e-05, "loss": 0.09552323341369628, "step": 2350 }, { "epoch": 0.44759418127564343, "grad_norm": 1.2438207864761353, "learning_rate": 1.97368284722582e-05, "loss": 0.0971086597442627, "step": 2400 }, { "epoch": 0.45691906005221933, "grad_norm": 0.3428623676300049, "learning_rate": 1.968502305341139e-05, "loss": 0.09295537948608398, "step": 2450 }, { "epoch": 0.4662439388287952, "grad_norm": 1.6153521537780762, "learning_rate": 1.9633217634564575e-05, "loss": 0.12522640228271484, "step": 2500 }, { "epoch": 0.4755688176053711, "grad_norm": 2.685026168823242, "learning_rate": 1.9581412215717765e-05, "loss": 0.08630707740783691, "step": 2550 }, { "epoch": 0.484893696381947, "grad_norm": 0.31434252858161926, "learning_rate": 1.9529606796870955e-05, "loss": 0.10333613395690917, "step": 2600 }, { "epoch": 0.49421857515852297, "grad_norm": 2.9683053493499756, "learning_rate": 1.9477801378024144e-05, "loss": 0.07084932804107666, "step": 2650 }, { "epoch": 0.5035434539350988, "grad_norm": 3.0713419914245605, "learning_rate": 1.942599595917733e-05, "loss": 0.08680294990539551, "step": 2700 }, { "epoch": 0.5128683327116748, "grad_norm": 3.3729348182678223, "learning_rate": 1.937419054033052e-05, "loss": 0.07697622299194336, "step": 2750 }, { "epoch": 0.5221932114882507, "grad_norm": 24.873640060424805, "learning_rate": 1.932238512148371e-05, "loss": 0.10056709289550782, "step": 2800 }, { "epoch": 0.5315180902648265, "grad_norm": 0.8206455707550049, "learning_rate": 1.9270579702636897e-05, "loss": 0.09658415794372559, "step": 2850 }, { "epoch": 0.5408429690414025, "grad_norm": 11.821130752563477, "learning_rate": 1.9218774283790087e-05, "loss": 0.06848039150238037, "step": 2900 }, { "epoch": 0.5501678478179783, "grad_norm": 0.6246572136878967, "learning_rate": 1.9166968864943273e-05, "loss": 0.08011377334594727, "step": 2950 }, { "epoch": 0.5594927265945543, "grad_norm": 3.210092306137085, "learning_rate": 1.9115163446096463e-05, "loss": 0.10572279930114746, "step": 3000 }, { "epoch": 0.5688176053711301, "grad_norm": 3.658480644226074, "learning_rate": 1.9063358027249653e-05, "loss": 0.0694641637802124, "step": 3050 }, { "epoch": 0.5781424841477061, "grad_norm": 0.8585368394851685, "learning_rate": 1.9011552608402842e-05, "loss": 0.07078958988189697, "step": 3100 }, { "epoch": 0.587467362924282, "grad_norm": 0.5600335001945496, "learning_rate": 1.895974718955603e-05, "loss": 0.07252558708190918, "step": 3150 }, { "epoch": 0.5967922417008579, "grad_norm": 4.424919605255127, "learning_rate": 1.890794177070922e-05, "loss": 0.08730278968811035, "step": 3200 }, { "epoch": 0.6061171204774338, "grad_norm": 4.6426849365234375, "learning_rate": 1.8856136351862405e-05, "loss": 0.050492286682128906, "step": 3250 }, { "epoch": 0.6154419992540097, "grad_norm": 4.4583210945129395, "learning_rate": 1.8804330933015595e-05, "loss": 0.054503369331359866, "step": 3300 }, { "epoch": 0.6247668780305856, "grad_norm": 0.5928723812103271, "learning_rate": 1.8752525514168784e-05, "loss": 0.0773204231262207, "step": 3350 }, { "epoch": 0.6340917568071615, "grad_norm": 0.8700105547904968, "learning_rate": 1.870072009532197e-05, "loss": 0.08818217277526856, "step": 3400 }, { "epoch": 0.6434166355837374, "grad_norm": 6.234158515930176, "learning_rate": 1.864891467647516e-05, "loss": 0.05500625610351562, "step": 3450 }, { "epoch": 0.6527415143603134, "grad_norm": 0.5930687785148621, "learning_rate": 1.859710925762835e-05, "loss": 0.051360769271850584, "step": 3500 }, { "epoch": 0.6620663931368892, "grad_norm": 0.048168476670980453, "learning_rate": 1.854530383878154e-05, "loss": 0.07871677875518798, "step": 3550 }, { "epoch": 0.6713912719134651, "grad_norm": 0.26890629529953003, "learning_rate": 1.8493498419934727e-05, "loss": 0.03978243350982666, "step": 3600 }, { "epoch": 0.680716150690041, "grad_norm": 1.0152816772460938, "learning_rate": 1.8441693001087916e-05, "loss": 0.06742914199829102, "step": 3650 }, { "epoch": 0.6900410294666169, "grad_norm": 5.40765905380249, "learning_rate": 1.8389887582241103e-05, "loss": 0.04563611030578613, "step": 3700 }, { "epoch": 0.6993659082431929, "grad_norm": 9.407204627990723, "learning_rate": 1.8338082163394293e-05, "loss": 0.06598632335662842, "step": 3750 }, { "epoch": 0.7086907870197687, "grad_norm": 1.0526869297027588, "learning_rate": 1.8286276744547482e-05, "loss": 0.06473824024200439, "step": 3800 }, { "epoch": 0.7180156657963447, "grad_norm": 5.696482181549072, "learning_rate": 1.823447132570067e-05, "loss": 0.0610739803314209, "step": 3850 }, { "epoch": 0.7273405445729205, "grad_norm": 0.10160894691944122, "learning_rate": 1.818266590685386e-05, "loss": 0.05340108394622803, "step": 3900 }, { "epoch": 0.7366654233494965, "grad_norm": 3.2599477767944336, "learning_rate": 1.813086048800705e-05, "loss": 0.06807507038116455, "step": 3950 }, { "epoch": 0.7459903021260723, "grad_norm": 1.383055329322815, "learning_rate": 1.8079055069160235e-05, "loss": 0.058188986778259275, "step": 4000 }, { "epoch": 0.7553151809026483, "grad_norm": 6.310545444488525, "learning_rate": 1.8027249650313424e-05, "loss": 0.06925914287567139, "step": 4050 }, { "epoch": 0.7646400596792242, "grad_norm": 2.753561496734619, "learning_rate": 1.797544423146661e-05, "loss": 0.06061097145080566, "step": 4100 }, { "epoch": 0.7739649384558001, "grad_norm": 0.06244755908846855, "learning_rate": 1.79236388126198e-05, "loss": 0.05539895057678223, "step": 4150 }, { "epoch": 0.783289817232376, "grad_norm": 1.5955125093460083, "learning_rate": 1.787183339377299e-05, "loss": 0.04949520111083985, "step": 4200 }, { "epoch": 0.7926146960089518, "grad_norm": 0.15867096185684204, "learning_rate": 1.782002797492618e-05, "loss": 0.04355106830596924, "step": 4250 }, { "epoch": 0.8019395747855278, "grad_norm": 0.03898247703909874, "learning_rate": 1.7768222556079367e-05, "loss": 0.06238871097564697, "step": 4300 }, { "epoch": 0.8112644535621036, "grad_norm": 0.10622036457061768, "learning_rate": 1.7716417137232556e-05, "loss": 0.06471785068511964, "step": 4350 }, { "epoch": 0.8205893323386796, "grad_norm": 2.5175602436065674, "learning_rate": 1.7664611718385746e-05, "loss": 0.04465628147125244, "step": 4400 }, { "epoch": 0.8299142111152555, "grad_norm": 0.6827256679534912, "learning_rate": 1.7612806299538933e-05, "loss": 0.05508995056152344, "step": 4450 }, { "epoch": 0.8392390898918314, "grad_norm": 4.929401397705078, "learning_rate": 1.7561000880692122e-05, "loss": 0.028713507652282713, "step": 4500 }, { "epoch": 0.8485639686684073, "grad_norm": 3.8355817794799805, "learning_rate": 1.750919546184531e-05, "loss": 0.054467902183532715, "step": 4550 }, { "epoch": 0.8578888474449832, "grad_norm": 0.07267450541257858, "learning_rate": 1.74573900429985e-05, "loss": 0.05773499965667725, "step": 4600 }, { "epoch": 0.8672137262215591, "grad_norm": 2.4586944580078125, "learning_rate": 1.740558462415169e-05, "loss": 0.06556248664855957, "step": 4650 }, { "epoch": 0.8765386049981351, "grad_norm": 4.859276294708252, "learning_rate": 1.7353779205304878e-05, "loss": 0.06336853981018066, "step": 4700 }, { "epoch": 0.8858634837747109, "grad_norm": 0.44299831986427307, "learning_rate": 1.7301973786458065e-05, "loss": 0.05126949310302734, "step": 4750 }, { "epoch": 0.8951883625512869, "grad_norm": 5.093299865722656, "learning_rate": 1.7250168367611254e-05, "loss": 0.05324135303497315, "step": 4800 }, { "epoch": 0.9045132413278627, "grad_norm": 1.6905597448349, "learning_rate": 1.719836294876444e-05, "loss": 0.048749656677246095, "step": 4850 }, { "epoch": 0.9138381201044387, "grad_norm": 0.30517128109931946, "learning_rate": 1.714655752991763e-05, "loss": 0.05433460712432861, "step": 4900 }, { "epoch": 0.9231629988810145, "grad_norm": 0.4588942527770996, "learning_rate": 1.709475211107082e-05, "loss": 0.04207270622253418, "step": 4950 }, { "epoch": 0.9324878776575904, "grad_norm": 0.036567509174346924, "learning_rate": 1.7042946692224007e-05, "loss": 0.05857636451721191, "step": 5000 }, { "epoch": 0.9418127564341664, "grad_norm": 3.270030975341797, "learning_rate": 1.6991141273377196e-05, "loss": 0.05915598869323731, "step": 5050 }, { "epoch": 0.9511376352107422, "grad_norm": 6.163786888122559, "learning_rate": 1.6939335854530386e-05, "loss": 0.051976222991943356, "step": 5100 }, { "epoch": 0.9604625139873182, "grad_norm": 0.16877496242523193, "learning_rate": 1.6887530435683576e-05, "loss": 0.051587677001953124, "step": 5150 }, { "epoch": 0.969787392763894, "grad_norm": 0.4458121657371521, "learning_rate": 1.6835725016836762e-05, "loss": 0.037312333583831785, "step": 5200 }, { "epoch": 0.97911227154047, "grad_norm": 0.01349574513733387, "learning_rate": 1.6783919597989952e-05, "loss": 0.04749881267547607, "step": 5250 }, { "epoch": 0.9884371503170459, "grad_norm": 0.6714735627174377, "learning_rate": 1.673211417914314e-05, "loss": 0.050176200866699217, "step": 5300 }, { "epoch": 0.9977620290936218, "grad_norm": 11.990230560302734, "learning_rate": 1.668030876029633e-05, "loss": 0.07070876598358154, "step": 5350 }, { "epoch": 1.0, "eval_accuracy": 0.9874433707806322, "eval_f1": 0.8682108626198082, "eval_loss": 0.050337210297584534, "eval_precision": 0.8606492478226445, "eval_recall": 0.8759065269943593, "eval_runtime": 27.5818, "eval_samples_per_second": 198.065, "eval_steps_per_second": 24.763, "step": 5362 }, { "epoch": 1.0070869078701976, "grad_norm": 0.7046685814857483, "learning_rate": 1.6628503341449518e-05, "loss": 0.037335155010223386, "step": 5400 }, { "epoch": 1.0164117866467737, "grad_norm": 2.07792592048645, "learning_rate": 1.6576697922602705e-05, "loss": 0.03664821147918701, "step": 5450 }, { "epoch": 1.0257366654233495, "grad_norm": 1.8149992227554321, "learning_rate": 1.6524892503755894e-05, "loss": 0.03763864040374756, "step": 5500 }, { "epoch": 1.0350615441999254, "grad_norm": 0.8814394474029541, "learning_rate": 1.6473087084909084e-05, "loss": 0.0462725305557251, "step": 5550 }, { "epoch": 1.0443864229765012, "grad_norm": 8.331986427307129, "learning_rate": 1.642128166606227e-05, "loss": 0.025801122188568115, "step": 5600 }, { "epoch": 1.0537113017530773, "grad_norm": 0.5653894543647766, "learning_rate": 1.636947624721546e-05, "loss": 0.0302036452293396, "step": 5650 }, { "epoch": 1.063036180529653, "grad_norm": 0.1264486312866211, "learning_rate": 1.6317670828368647e-05, "loss": 0.03399224281311035, "step": 5700 }, { "epoch": 1.072361059306229, "grad_norm": 1.2637239694595337, "learning_rate": 1.6265865409521836e-05, "loss": 0.04418774604797363, "step": 5750 }, { "epoch": 1.081685938082805, "grad_norm": 5.040623188018799, "learning_rate": 1.6214059990675026e-05, "loss": 0.02509807586669922, "step": 5800 }, { "epoch": 1.0910108168593808, "grad_norm": 0.03714745491743088, "learning_rate": 1.6162254571828216e-05, "loss": 0.026492388248443605, "step": 5850 }, { "epoch": 1.1003356956359567, "grad_norm": 0.7756729125976562, "learning_rate": 1.6110449152981402e-05, "loss": 0.02965877056121826, "step": 5900 }, { "epoch": 1.1096605744125327, "grad_norm": 0.217277392745018, "learning_rate": 1.6058643734134592e-05, "loss": 0.022216553688049315, "step": 5950 }, { "epoch": 1.1189854531891086, "grad_norm": 4.127126216888428, "learning_rate": 1.6006838315287782e-05, "loss": 0.03008180618286133, "step": 6000 }, { "epoch": 1.1283103319656844, "grad_norm": 0.01144993957132101, "learning_rate": 1.595503289644097e-05, "loss": 0.04714715957641601, "step": 6050 }, { "epoch": 1.1376352107422603, "grad_norm": 0.037526026368141174, "learning_rate": 1.5903227477594158e-05, "loss": 0.04247360706329346, "step": 6100 }, { "epoch": 1.1469600895188363, "grad_norm": 0.9934174418449402, "learning_rate": 1.5851422058747345e-05, "loss": 0.02983764886856079, "step": 6150 }, { "epoch": 1.1562849682954122, "grad_norm": 0.4428967237472534, "learning_rate": 1.5799616639900534e-05, "loss": 0.027351632118225097, "step": 6200 }, { "epoch": 1.165609847071988, "grad_norm": 0.05002899840474129, "learning_rate": 1.5747811221053724e-05, "loss": 0.02518010139465332, "step": 6250 }, { "epoch": 1.174934725848564, "grad_norm": 0.18001802265644073, "learning_rate": 1.5696005802206914e-05, "loss": 0.040030746459960936, "step": 6300 }, { "epoch": 1.18425960462514, "grad_norm": 0.21795284748077393, "learning_rate": 1.56442003833601e-05, "loss": 0.03228116512298584, "step": 6350 }, { "epoch": 1.1935844834017157, "grad_norm": 0.33233147859573364, "learning_rate": 1.559239496451329e-05, "loss": 0.04077389717102051, "step": 6400 }, { "epoch": 1.2029093621782916, "grad_norm": 0.07459854334592819, "learning_rate": 1.5540589545666476e-05, "loss": 0.035812277793884274, "step": 6450 }, { "epoch": 1.2122342409548676, "grad_norm": 0.03117297776043415, "learning_rate": 1.5488784126819666e-05, "loss": 0.043056426048278806, "step": 6500 }, { "epoch": 1.2215591197314435, "grad_norm": 2.1351895332336426, "learning_rate": 1.5436978707972856e-05, "loss": 0.026498048305511473, "step": 6550 }, { "epoch": 1.2308839985080193, "grad_norm": 0.1372031569480896, "learning_rate": 1.5385173289126042e-05, "loss": 0.03637035131454468, "step": 6600 }, { "epoch": 1.2402088772845954, "grad_norm": 0.06523732095956802, "learning_rate": 1.5333367870279232e-05, "loss": 0.06686021327972412, "step": 6650 }, { "epoch": 1.2495337560611712, "grad_norm": 0.02072199061512947, "learning_rate": 1.5281562451432422e-05, "loss": 0.03420682668685913, "step": 6700 }, { "epoch": 1.258858634837747, "grad_norm": 9.351777076721191, "learning_rate": 1.522975703258561e-05, "loss": 0.026438066959381102, "step": 6750 }, { "epoch": 1.2681835136143231, "grad_norm": 0.40086886286735535, "learning_rate": 1.5177951613738796e-05, "loss": 0.046449775695800784, "step": 6800 }, { "epoch": 1.277508392390899, "grad_norm": 0.5892062783241272, "learning_rate": 1.5126146194891986e-05, "loss": 0.0309269380569458, "step": 6850 }, { "epoch": 1.2868332711674748, "grad_norm": 0.002104206709191203, "learning_rate": 1.5074340776045176e-05, "loss": 0.023648200035095216, "step": 6900 }, { "epoch": 1.2961581499440507, "grad_norm": 0.0258804801851511, "learning_rate": 1.5022535357198364e-05, "loss": 0.04035449504852295, "step": 6950 }, { "epoch": 1.3054830287206267, "grad_norm": 4.397562026977539, "learning_rate": 1.4970729938351554e-05, "loss": 0.05229721546173096, "step": 7000 }, { "epoch": 1.3148079074972026, "grad_norm": 0.007150724530220032, "learning_rate": 1.491892451950474e-05, "loss": 0.03332861661911011, "step": 7050 }, { "epoch": 1.3241327862737784, "grad_norm": 3.109645128250122, "learning_rate": 1.486711910065793e-05, "loss": 0.047160525321960446, "step": 7100 }, { "epoch": 1.3334576650503545, "grad_norm": 0.41086408495903015, "learning_rate": 1.4815313681811118e-05, "loss": 0.04890709400177002, "step": 7150 }, { "epoch": 1.3427825438269303, "grad_norm": 0.05025002732872963, "learning_rate": 1.4763508262964308e-05, "loss": 0.04013650417327881, "step": 7200 }, { "epoch": 1.3521074226035061, "grad_norm": 0.03028084896504879, "learning_rate": 1.4711702844117498e-05, "loss": 0.0283713960647583, "step": 7250 }, { "epoch": 1.361432301380082, "grad_norm": 0.031166842207312584, "learning_rate": 1.4659897425270684e-05, "loss": 0.03349567174911499, "step": 7300 }, { "epoch": 1.370757180156658, "grad_norm": 4.032196521759033, "learning_rate": 1.4608092006423872e-05, "loss": 0.029915103912353514, "step": 7350 }, { "epoch": 1.3800820589332339, "grad_norm": 3.165501594543457, "learning_rate": 1.4556286587577062e-05, "loss": 0.040238561630249026, "step": 7400 }, { "epoch": 1.3894069377098097, "grad_norm": 0.05803289636969566, "learning_rate": 1.4504481168730252e-05, "loss": 0.03673480272293091, "step": 7450 }, { "epoch": 1.3987318164863858, "grad_norm": 0.0027874386869370937, "learning_rate": 1.4452675749883438e-05, "loss": 0.03083367109298706, "step": 7500 }, { "epoch": 1.4080566952629616, "grad_norm": 0.39723604917526245, "learning_rate": 1.4400870331036628e-05, "loss": 0.03706796646118164, "step": 7550 }, { "epoch": 1.4173815740395375, "grad_norm": 0.006277570500969887, "learning_rate": 1.4349064912189816e-05, "loss": 0.030687217712402345, "step": 7600 }, { "epoch": 1.4267064528161133, "grad_norm": 2.196660041809082, "learning_rate": 1.4297259493343006e-05, "loss": 0.03273656129837036, "step": 7650 }, { "epoch": 1.4360313315926894, "grad_norm": 2.9575355052948, "learning_rate": 1.4245454074496194e-05, "loss": 0.025758986473083497, "step": 7700 }, { "epoch": 1.4453562103692652, "grad_norm": 0.08796069771051407, "learning_rate": 1.4193648655649382e-05, "loss": 0.03270584583282471, "step": 7750 }, { "epoch": 1.454681089145841, "grad_norm": 0.9201443791389465, "learning_rate": 1.414184323680257e-05, "loss": 0.023307127952575682, "step": 7800 }, { "epoch": 1.464005967922417, "grad_norm": 1.7311280965805054, "learning_rate": 1.409003781795576e-05, "loss": 0.03505758047103882, "step": 7850 }, { "epoch": 1.473330846698993, "grad_norm": 7.217854022979736, "learning_rate": 1.4038232399108948e-05, "loss": 0.032117910385131836, "step": 7900 }, { "epoch": 1.4826557254755688, "grad_norm": 0.16375161707401276, "learning_rate": 1.3986426980262136e-05, "loss": 0.024907276630401612, "step": 7950 }, { "epoch": 1.4919806042521446, "grad_norm": 2.5342984199523926, "learning_rate": 1.3934621561415324e-05, "loss": 0.02424616813659668, "step": 8000 }, { "epoch": 1.5013054830287205, "grad_norm": 0.5667886137962341, "learning_rate": 1.3882816142568514e-05, "loss": 0.035223734378814694, "step": 8050 }, { "epoch": 1.5106303618052965, "grad_norm": 4.529999732971191, "learning_rate": 1.3831010723721704e-05, "loss": 0.043730239868164066, "step": 8100 }, { "epoch": 1.5199552405818726, "grad_norm": 0.31222647428512573, "learning_rate": 1.3779205304874892e-05, "loss": 0.02817868709564209, "step": 8150 }, { "epoch": 1.5292801193584484, "grad_norm": 0.054891835898160934, "learning_rate": 1.3727399886028078e-05, "loss": 0.022635526657104492, "step": 8200 }, { "epoch": 1.5386049981350243, "grad_norm": 3.897071361541748, "learning_rate": 1.3675594467181268e-05, "loss": 0.0365865421295166, "step": 8250 }, { "epoch": 1.5479298769116001, "grad_norm": 7.58866024017334, "learning_rate": 1.3623789048334458e-05, "loss": 0.03705208778381348, "step": 8300 }, { "epoch": 1.557254755688176, "grad_norm": 0.2579911947250366, "learning_rate": 1.3571983629487646e-05, "loss": 0.036470816135406495, "step": 8350 }, { "epoch": 1.566579634464752, "grad_norm": 0.04304761812090874, "learning_rate": 1.3520178210640836e-05, "loss": 0.024144577980041503, "step": 8400 }, { "epoch": 1.5759045132413279, "grad_norm": 0.011871698312461376, "learning_rate": 1.3468372791794022e-05, "loss": 0.02437096118927002, "step": 8450 }, { "epoch": 1.585229392017904, "grad_norm": 1.8747565746307373, "learning_rate": 1.3416567372947212e-05, "loss": 0.021970641613006592, "step": 8500 }, { "epoch": 1.5945542707944798, "grad_norm": 0.0068074301816523075, "learning_rate": 1.33647619541004e-05, "loss": 0.03055704355239868, "step": 8550 }, { "epoch": 1.6038791495710556, "grad_norm": 0.7720779776573181, "learning_rate": 1.331295653525359e-05, "loss": 0.05535665988922119, "step": 8600 }, { "epoch": 1.6132040283476314, "grad_norm": 0.052967652678489685, "learning_rate": 1.3261151116406776e-05, "loss": 0.037333052158355716, "step": 8650 }, { "epoch": 1.6225289071242073, "grad_norm": 0.4131523370742798, "learning_rate": 1.3209345697559966e-05, "loss": 0.022513895034790038, "step": 8700 }, { "epoch": 1.6318537859007833, "grad_norm": 0.0903526097536087, "learning_rate": 1.3157540278713154e-05, "loss": 0.018794809579849244, "step": 8750 }, { "epoch": 1.6411786646773592, "grad_norm": 0.03226502984762192, "learning_rate": 1.3105734859866344e-05, "loss": 0.021845638751983643, "step": 8800 }, { "epoch": 1.6505035434539352, "grad_norm": 1.2775359153747559, "learning_rate": 1.3053929441019533e-05, "loss": 0.03150895118713379, "step": 8850 }, { "epoch": 1.659828422230511, "grad_norm": 0.0202046986669302, "learning_rate": 1.300212402217272e-05, "loss": 0.02440279006958008, "step": 8900 }, { "epoch": 1.669153301007087, "grad_norm": 5.867640495300293, "learning_rate": 1.2950318603325908e-05, "loss": 0.033769989013671876, "step": 8950 }, { "epoch": 1.6784781797836628, "grad_norm": 0.006102518644183874, "learning_rate": 1.2898513184479098e-05, "loss": 0.020930655002593994, "step": 9000 }, { "epoch": 1.6878030585602386, "grad_norm": 0.06524361670017242, "learning_rate": 1.2846707765632288e-05, "loss": 0.0303147554397583, "step": 9050 }, { "epoch": 1.6971279373368147, "grad_norm": 1.1646746397018433, "learning_rate": 1.2794902346785474e-05, "loss": 0.025315618515014647, "step": 9100 }, { "epoch": 1.7064528161133905, "grad_norm": 2.134981393814087, "learning_rate": 1.2743096927938664e-05, "loss": 0.04284055233001709, "step": 9150 }, { "epoch": 1.7157776948899666, "grad_norm": 2.9764657020568848, "learning_rate": 1.2691291509091852e-05, "loss": 0.02323296308517456, "step": 9200 }, { "epoch": 1.7251025736665424, "grad_norm": 0.15592370927333832, "learning_rate": 1.2639486090245042e-05, "loss": 0.03630294561386108, "step": 9250 }, { "epoch": 1.7344274524431182, "grad_norm": 1.1410564184188843, "learning_rate": 1.258768067139823e-05, "loss": 0.02985832929611206, "step": 9300 }, { "epoch": 1.743752331219694, "grad_norm": 1.3886200189590454, "learning_rate": 1.2535875252551418e-05, "loss": 0.019391053915023805, "step": 9350 }, { "epoch": 1.75307720999627, "grad_norm": 12.997761726379395, "learning_rate": 1.2484069833704606e-05, "loss": 0.02593435287475586, "step": 9400 }, { "epoch": 1.762402088772846, "grad_norm": 2.852426052093506, "learning_rate": 1.2432264414857796e-05, "loss": 0.036953463554382324, "step": 9450 }, { "epoch": 1.7717269675494218, "grad_norm": 1.0583350658416748, "learning_rate": 1.2380458996010984e-05, "loss": 0.025919597148895263, "step": 9500 }, { "epoch": 1.781051846325998, "grad_norm": 0.06280253827571869, "learning_rate": 1.2328653577164172e-05, "loss": 0.024321415424346925, "step": 9550 }, { "epoch": 1.7903767251025737, "grad_norm": 1.4471710920333862, "learning_rate": 1.227684815831736e-05, "loss": 0.02954728364944458, "step": 9600 }, { "epoch": 1.7997016038791496, "grad_norm": 0.3254970610141754, "learning_rate": 1.222504273947055e-05, "loss": 0.0403021764755249, "step": 9650 }, { "epoch": 1.8090264826557254, "grad_norm": 0.026926545426249504, "learning_rate": 1.217323732062374e-05, "loss": 0.01865153431892395, "step": 9700 }, { "epoch": 1.8183513614323012, "grad_norm": 0.037455275654792786, "learning_rate": 1.2121431901776928e-05, "loss": 0.02834453582763672, "step": 9750 }, { "epoch": 1.8276762402088773, "grad_norm": 1.9724242687225342, "learning_rate": 1.2069626482930114e-05, "loss": 0.02292172908782959, "step": 9800 }, { "epoch": 1.8370011189854532, "grad_norm": 2.2518837451934814, "learning_rate": 1.2017821064083304e-05, "loss": 0.029299042224884032, "step": 9850 }, { "epoch": 1.8463259977620292, "grad_norm": 0.8918629884719849, "learning_rate": 1.1966015645236493e-05, "loss": 0.02729450464248657, "step": 9900 }, { "epoch": 1.855650876538605, "grad_norm": 0.015352281741797924, "learning_rate": 1.1914210226389682e-05, "loss": 0.025739452838897704, "step": 9950 }, { "epoch": 1.864975755315181, "grad_norm": 12.12820816040039, "learning_rate": 1.1862404807542871e-05, "loss": 0.05083851337432861, "step": 10000 }, { "epoch": 1.8743006340917567, "grad_norm": 0.03783294931054115, "learning_rate": 1.1810599388696058e-05, "loss": 0.03862152814865112, "step": 10050 }, { "epoch": 1.8836255128683326, "grad_norm": 1.312626838684082, "learning_rate": 1.1758793969849248e-05, "loss": 0.040201754570007325, "step": 10100 }, { "epoch": 1.8929503916449086, "grad_norm": 0.27149704098701477, "learning_rate": 1.1706988551002436e-05, "loss": 0.020465714931488035, "step": 10150 }, { "epoch": 1.9022752704214845, "grad_norm": 0.051149722188711166, "learning_rate": 1.1655183132155625e-05, "loss": 0.019956029653549194, "step": 10200 }, { "epoch": 1.9116001491980605, "grad_norm": 0.012450406327843666, "learning_rate": 1.1603377713308812e-05, "loss": 0.030698204040527345, "step": 10250 }, { "epoch": 1.9209250279746364, "grad_norm": 0.04320710152387619, "learning_rate": 1.1551572294462002e-05, "loss": 0.01924089193344116, "step": 10300 }, { "epoch": 1.9302499067512122, "grad_norm": 0.028835974633693695, "learning_rate": 1.149976687561519e-05, "loss": 0.03460402250289917, "step": 10350 }, { "epoch": 1.939574785527788, "grad_norm": 0.032503023743629456, "learning_rate": 1.144796145676838e-05, "loss": 0.021525814533233642, "step": 10400 }, { "epoch": 1.948899664304364, "grad_norm": 0.054762404412031174, "learning_rate": 1.139615603792157e-05, "loss": 0.016051357984542845, "step": 10450 }, { "epoch": 1.95822454308094, "grad_norm": 0.41409963369369507, "learning_rate": 1.1344350619074756e-05, "loss": 0.02211181879043579, "step": 10500 }, { "epoch": 1.9675494218575158, "grad_norm": 0.6710904836654663, "learning_rate": 1.1292545200227944e-05, "loss": 0.03442399978637695, "step": 10550 }, { "epoch": 1.9768743006340919, "grad_norm": 0.20875470340251923, "learning_rate": 1.1240739781381133e-05, "loss": 0.029538695812225343, "step": 10600 }, { "epoch": 1.9861991794106677, "grad_norm": 7.152144432067871, "learning_rate": 1.1188934362534323e-05, "loss": 0.02945619821548462, "step": 10650 }, { "epoch": 1.9955240581872435, "grad_norm": 0.0340808629989624, "learning_rate": 1.113712894368751e-05, "loss": 0.021653232574462892, "step": 10700 }, { "epoch": 2.0, "eval_accuracy": 0.9903632126116217, "eval_f1": 0.9054189162167566, "eval_loss": 0.04485788941383362, "eval_precision": 0.8987693529178246, "eval_recall": 0.9121676067687349, "eval_runtime": 7.0416, "eval_samples_per_second": 775.82, "eval_steps_per_second": 96.995, "step": 10724 }, { "epoch": 2.0048489369638194, "grad_norm": 0.006561782211065292, "learning_rate": 1.10853235248407e-05, "loss": 0.023352961540222168, "step": 10750 }, { "epoch": 2.0141738157403952, "grad_norm": 0.03905324265360832, "learning_rate": 1.1033518105993888e-05, "loss": 0.007830613255500794, "step": 10800 }, { "epoch": 2.023498694516971, "grad_norm": 0.002779081929475069, "learning_rate": 1.0981712687147077e-05, "loss": 0.012746865749359132, "step": 10850 }, { "epoch": 2.0328235732935473, "grad_norm": 3.28019642829895, "learning_rate": 1.0929907268300265e-05, "loss": 0.021561498641967772, "step": 10900 }, { "epoch": 2.042148452070123, "grad_norm": 0.11420201510190964, "learning_rate": 1.0878101849453453e-05, "loss": 0.01144665241241455, "step": 10950 }, { "epoch": 2.051473330846699, "grad_norm": 0.01909773238003254, "learning_rate": 1.0826296430606642e-05, "loss": 0.006808329224586487, "step": 11000 }, { "epoch": 2.060798209623275, "grad_norm": 0.03136987239122391, "learning_rate": 1.0774491011759831e-05, "loss": 0.015448588132858276, "step": 11050 }, { "epoch": 2.0701230883998507, "grad_norm": 0.0069969939067959785, "learning_rate": 1.072268559291302e-05, "loss": 0.020947656631469726, "step": 11100 }, { "epoch": 2.0794479671764265, "grad_norm": 0.008591468445956707, "learning_rate": 1.0670880174066208e-05, "loss": 0.013551335334777832, "step": 11150 }, { "epoch": 2.0887728459530024, "grad_norm": 0.007207474671304226, "learning_rate": 1.0619074755219396e-05, "loss": 0.010735607147216797, "step": 11200 }, { "epoch": 2.0980977247295787, "grad_norm": 0.007553383708000183, "learning_rate": 1.0567269336372585e-05, "loss": 0.0180646276473999, "step": 11250 }, { "epoch": 2.1074226035061545, "grad_norm": 0.0854165256023407, "learning_rate": 1.0515463917525775e-05, "loss": 0.012175880670547486, "step": 11300 }, { "epoch": 2.1167474822827304, "grad_norm": 1.2997490167617798, "learning_rate": 1.0463658498678963e-05, "loss": 0.028563385009765626, "step": 11350 }, { "epoch": 2.126072361059306, "grad_norm": 0.028747934848070145, "learning_rate": 1.041185307983215e-05, "loss": 0.008224156498908997, "step": 11400 }, { "epoch": 2.135397239835882, "grad_norm": 0.02653522975742817, "learning_rate": 1.036004766098534e-05, "loss": 0.014218298196792602, "step": 11450 }, { "epoch": 2.144722118612458, "grad_norm": 0.0075917416252195835, "learning_rate": 1.030824224213853e-05, "loss": 0.010074301958084106, "step": 11500 }, { "epoch": 2.1540469973890337, "grad_norm": 0.01568465493619442, "learning_rate": 1.0256436823291717e-05, "loss": 0.009785271286964416, "step": 11550 }, { "epoch": 2.16337187616561, "grad_norm": 2.6329779624938965, "learning_rate": 1.0204631404444907e-05, "loss": 0.039693479537963865, "step": 11600 }, { "epoch": 2.172696754942186, "grad_norm": 0.25744888186454773, "learning_rate": 1.0152825985598094e-05, "loss": 0.009682031273841858, "step": 11650 }, { "epoch": 2.1820216337187617, "grad_norm": 0.048078108578920364, "learning_rate": 1.0101020566751283e-05, "loss": 0.012061976194381714, "step": 11700 }, { "epoch": 2.1913465124953375, "grad_norm": 0.009185828268527985, "learning_rate": 1.0049215147904471e-05, "loss": 0.01473943829536438, "step": 11750 }, { "epoch": 2.2006713912719134, "grad_norm": 0.3359212279319763, "learning_rate": 9.99740972905766e-06, "loss": 0.022451975345611573, "step": 11800 }, { "epoch": 2.209996270048489, "grad_norm": 0.03128429129719734, "learning_rate": 9.94560431021085e-06, "loss": 0.015020393133163452, "step": 11850 }, { "epoch": 2.2193211488250655, "grad_norm": 0.01077917031943798, "learning_rate": 9.893798891364037e-06, "loss": 0.007385715842247009, "step": 11900 }, { "epoch": 2.2286460276016413, "grad_norm": 0.0009410646744072437, "learning_rate": 9.841993472517225e-06, "loss": 0.010898010730743408, "step": 11950 }, { "epoch": 2.237970906378217, "grad_norm": 0.23428411781787872, "learning_rate": 9.790188053670415e-06, "loss": 0.017517651319503783, "step": 12000 }, { "epoch": 2.247295785154793, "grad_norm": 5.2552947998046875, "learning_rate": 9.738382634823603e-06, "loss": 0.011954027414321899, "step": 12050 }, { "epoch": 2.256620663931369, "grad_norm": 0.1022522896528244, "learning_rate": 9.686577215976793e-06, "loss": 0.0103814697265625, "step": 12100 }, { "epoch": 2.2659455427079447, "grad_norm": 0.01425126288086176, "learning_rate": 9.634771797129981e-06, "loss": 0.016681231260299682, "step": 12150 }, { "epoch": 2.2752704214845205, "grad_norm": 0.010022806003689766, "learning_rate": 9.58296637828317e-06, "loss": 0.007602689266204834, "step": 12200 }, { "epoch": 2.2845953002610964, "grad_norm": 0.09281191229820251, "learning_rate": 9.531160959436357e-06, "loss": 0.015772578716278077, "step": 12250 }, { "epoch": 2.2939201790376726, "grad_norm": 1.6627157926559448, "learning_rate": 9.479355540589547e-06, "loss": 0.0149391770362854, "step": 12300 }, { "epoch": 2.3032450578142485, "grad_norm": 0.039720647037029266, "learning_rate": 9.427550121742735e-06, "loss": 0.004919275641441345, "step": 12350 }, { "epoch": 2.3125699365908243, "grad_norm": 0.13361865282058716, "learning_rate": 9.375744702895923e-06, "loss": 0.0066268140077590946, "step": 12400 }, { "epoch": 2.3218948153674, "grad_norm": 0.004165187943726778, "learning_rate": 9.323939284049113e-06, "loss": 0.008859132528305053, "step": 12450 }, { "epoch": 2.331219694143976, "grad_norm": 0.01734941452741623, "learning_rate": 9.272133865202301e-06, "loss": 0.01958281397819519, "step": 12500 }, { "epoch": 2.340544572920552, "grad_norm": 1.4992754459381104, "learning_rate": 9.22032844635549e-06, "loss": 0.024173910617828368, "step": 12550 }, { "epoch": 2.349869451697128, "grad_norm": 3.266171455383301, "learning_rate": 9.168523027508677e-06, "loss": 0.026157324314117433, "step": 12600 }, { "epoch": 2.359194330473704, "grad_norm": 0.034271348267793655, "learning_rate": 9.116717608661867e-06, "loss": 0.004791333377361298, "step": 12650 }, { "epoch": 2.36851920925028, "grad_norm": 0.020556321367621422, "learning_rate": 9.064912189815055e-06, "loss": 0.023116433620452882, "step": 12700 }, { "epoch": 2.3778440880268557, "grad_norm": 2.9007959365844727, "learning_rate": 9.013106770968243e-06, "loss": 0.003782390058040619, "step": 12750 }, { "epoch": 2.3871689668034315, "grad_norm": 0.04751985892653465, "learning_rate": 8.961301352121433e-06, "loss": 0.004796516001224518, "step": 12800 }, { "epoch": 2.3964938455800073, "grad_norm": 0.09174877405166626, "learning_rate": 8.909495933274621e-06, "loss": 0.01988631725311279, "step": 12850 }, { "epoch": 2.405818724356583, "grad_norm": 0.897373378276825, "learning_rate": 8.857690514427811e-06, "loss": 0.011833161115646362, "step": 12900 }, { "epoch": 2.4151436031331595, "grad_norm": 0.026099465787410736, "learning_rate": 8.805885095580999e-06, "loss": 0.021891412734985353, "step": 12950 }, { "epoch": 2.4244684819097353, "grad_norm": 0.005264167208224535, "learning_rate": 8.754079676734187e-06, "loss": 0.014649747610092164, "step": 13000 }, { "epoch": 2.433793360686311, "grad_norm": 0.0665712058544159, "learning_rate": 8.702274257887375e-06, "loss": 0.018104093074798586, "step": 13050 }, { "epoch": 2.443118239462887, "grad_norm": 0.01004517637193203, "learning_rate": 8.650468839040565e-06, "loss": 0.004754712581634521, "step": 13100 }, { "epoch": 2.452443118239463, "grad_norm": 0.011136854998767376, "learning_rate": 8.598663420193753e-06, "loss": 0.008313758969306946, "step": 13150 }, { "epoch": 2.4617679970160387, "grad_norm": 0.0015451794024556875, "learning_rate": 8.546858001346941e-06, "loss": 0.008117977380752563, "step": 13200 }, { "epoch": 2.471092875792615, "grad_norm": 1.5158227682113647, "learning_rate": 8.495052582500131e-06, "loss": 0.02230316638946533, "step": 13250 }, { "epoch": 2.480417754569191, "grad_norm": 0.015987800434231758, "learning_rate": 8.443247163653319e-06, "loss": 0.0033962687849998473, "step": 13300 }, { "epoch": 2.4897426333457666, "grad_norm": 0.2436022162437439, "learning_rate": 8.391441744806507e-06, "loss": 0.009453248977661134, "step": 13350 }, { "epoch": 2.4990675121223425, "grad_norm": 0.007971422746777534, "learning_rate": 8.339636325959695e-06, "loss": 0.005669102668762207, "step": 13400 }, { "epoch": 2.5083923908989183, "grad_norm": 0.030247289687395096, "learning_rate": 8.287830907112885e-06, "loss": 0.007165596485137939, "step": 13450 }, { "epoch": 2.517717269675494, "grad_norm": 0.03285367041826248, "learning_rate": 8.236025488266073e-06, "loss": 0.01023703694343567, "step": 13500 }, { "epoch": 2.52704214845207, "grad_norm": 1.4136919975280762, "learning_rate": 8.184220069419261e-06, "loss": 0.019952696561813355, "step": 13550 }, { "epoch": 2.5363670272286463, "grad_norm": 0.13177263736724854, "learning_rate": 8.132414650572451e-06, "loss": 0.008267701864242553, "step": 13600 }, { "epoch": 2.5456919060052217, "grad_norm": 13.017802238464355, "learning_rate": 8.080609231725639e-06, "loss": 0.013502672910690308, "step": 13650 }, { "epoch": 2.555016784781798, "grad_norm": 20.80805015563965, "learning_rate": 8.028803812878829e-06, "loss": 0.014624173641204835, "step": 13700 }, { "epoch": 2.564341663558374, "grad_norm": 0.05195024982094765, "learning_rate": 7.976998394032017e-06, "loss": 0.025228326320648194, "step": 13750 }, { "epoch": 2.5736665423349496, "grad_norm": 0.004629973322153091, "learning_rate": 7.925192975185205e-06, "loss": 0.02166285514831543, "step": 13800 }, { "epoch": 2.5829914211115255, "grad_norm": 0.0022503056097775698, "learning_rate": 7.873387556338393e-06, "loss": 0.02188849925994873, "step": 13850 }, { "epoch": 2.5923162998881013, "grad_norm": 0.8524413108825684, "learning_rate": 7.821582137491583e-06, "loss": 0.007161260843276978, "step": 13900 }, { "epoch": 2.6016411786646776, "grad_norm": 2.9589359760284424, "learning_rate": 7.769776718644771e-06, "loss": 0.009217590093612671, "step": 13950 }, { "epoch": 2.6109660574412534, "grad_norm": 0.0014888152945786715, "learning_rate": 7.717971299797959e-06, "loss": 0.007640480399131775, "step": 14000 }, { "epoch": 2.6202909362178293, "grad_norm": 0.0024451257195323706, "learning_rate": 7.666165880951149e-06, "loss": 0.009097555875778198, "step": 14050 }, { "epoch": 2.629615814994405, "grad_norm": 1.4727226495742798, "learning_rate": 7.614360462104337e-06, "loss": 0.022815148830413818, "step": 14100 }, { "epoch": 2.638940693770981, "grad_norm": 0.14492234587669373, "learning_rate": 7.562555043257526e-06, "loss": 0.00907568097114563, "step": 14150 }, { "epoch": 2.648265572547557, "grad_norm": 0.006422064267098904, "learning_rate": 7.510749624410714e-06, "loss": 0.012345269918441773, "step": 14200 }, { "epoch": 2.6575904513241326, "grad_norm": 0.003297192510217428, "learning_rate": 7.458944205563903e-06, "loss": 0.013943998813629151, "step": 14250 }, { "epoch": 2.666915330100709, "grad_norm": 0.009486474096775055, "learning_rate": 7.407138786717091e-06, "loss": 0.007204347848892212, "step": 14300 }, { "epoch": 2.6762402088772848, "grad_norm": 0.001906346995383501, "learning_rate": 7.35533336787028e-06, "loss": 0.0064238041639328005, "step": 14350 }, { "epoch": 2.6855650876538606, "grad_norm": 0.009013752453029156, "learning_rate": 7.303527949023469e-06, "loss": 0.019118592739105225, "step": 14400 }, { "epoch": 2.6948899664304364, "grad_norm": 9.203516006469727, "learning_rate": 7.251722530176657e-06, "loss": 0.014843382835388184, "step": 14450 }, { "epoch": 2.7042148452070123, "grad_norm": 0.013872411102056503, "learning_rate": 7.199917111329846e-06, "loss": 0.017863935232162474, "step": 14500 }, { "epoch": 2.713539723983588, "grad_norm": 0.008059196174144745, "learning_rate": 7.148111692483034e-06, "loss": 0.007313421964645386, "step": 14550 }, { "epoch": 2.722864602760164, "grad_norm": 0.007967078126966953, "learning_rate": 7.096306273636223e-06, "loss": 0.005426759123802185, "step": 14600 }, { "epoch": 2.7321894815367402, "grad_norm": 0.052042555063962936, "learning_rate": 7.044500854789411e-06, "loss": 0.010500948429107666, "step": 14650 }, { "epoch": 2.741514360313316, "grad_norm": 0.008007310330867767, "learning_rate": 6.9926954359426e-06, "loss": 0.011817890405654907, "step": 14700 }, { "epoch": 2.750839239089892, "grad_norm": 0.553403377532959, "learning_rate": 6.940890017095788e-06, "loss": 0.011694425344467163, "step": 14750 }, { "epoch": 2.7601641178664678, "grad_norm": 0.011203479021787643, "learning_rate": 6.889084598248977e-06, "loss": 0.009664978981018067, "step": 14800 }, { "epoch": 2.7694889966430436, "grad_norm": 0.031599052250385284, "learning_rate": 6.837279179402167e-06, "loss": 0.0062562096118927, "step": 14850 }, { "epoch": 2.7788138754196194, "grad_norm": 0.07515502721071243, "learning_rate": 6.785473760555355e-06, "loss": 0.008186891674995422, "step": 14900 }, { "epoch": 2.7881387541961953, "grad_norm": 0.004041098989546299, "learning_rate": 6.733668341708544e-06, "loss": 0.008758670091629029, "step": 14950 }, { "epoch": 2.7974636329727716, "grad_norm": 0.010477816686034203, "learning_rate": 6.681862922861732e-06, "loss": 0.008421186804771424, "step": 15000 }, { "epoch": 2.8067885117493474, "grad_norm": 0.037119459360837936, "learning_rate": 6.630057504014921e-06, "loss": 0.0044859576225280764, "step": 15050 }, { "epoch": 2.8161133905259232, "grad_norm": 2.2909059524536133, "learning_rate": 6.578252085168109e-06, "loss": 0.007467656135559082, "step": 15100 }, { "epoch": 2.825438269302499, "grad_norm": 0.028654785826802254, "learning_rate": 6.526446666321298e-06, "loss": 0.011730804443359374, "step": 15150 }, { "epoch": 2.834763148079075, "grad_norm": 0.00396377919241786, "learning_rate": 6.474641247474487e-06, "loss": 0.007885778546333313, "step": 15200 }, { "epoch": 2.8440880268556508, "grad_norm": 4.526209354400635, "learning_rate": 6.422835828627675e-06, "loss": 0.013013125658035278, "step": 15250 }, { "epoch": 2.8534129056322266, "grad_norm": 0.006890705320984125, "learning_rate": 6.371030409780864e-06, "loss": 0.020241425037384034, "step": 15300 }, { "epoch": 2.862737784408803, "grad_norm": 0.04351874813437462, "learning_rate": 6.319224990934052e-06, "loss": 0.02235487461090088, "step": 15350 }, { "epoch": 2.8720626631853787, "grad_norm": 0.004027783405035734, "learning_rate": 6.267419572087241e-06, "loss": 0.012025052309036255, "step": 15400 }, { "epoch": 2.8813875419619546, "grad_norm": 0.017081253230571747, "learning_rate": 6.215614153240429e-06, "loss": 0.012213168144226074, "step": 15450 }, { "epoch": 2.8907124207385304, "grad_norm": 5.580208778381348, "learning_rate": 6.163808734393618e-06, "loss": 0.011020108461380004, "step": 15500 }, { "epoch": 2.9000372995151062, "grad_norm": 0.026449229568243027, "learning_rate": 6.112003315546806e-06, "loss": 0.020866034030914308, "step": 15550 }, { "epoch": 2.909362178291682, "grad_norm": 0.018465599045157433, "learning_rate": 6.060197896699996e-06, "loss": 0.006286224126815796, "step": 15600 }, { "epoch": 2.918687057068258, "grad_norm": 0.004978302400559187, "learning_rate": 6.008392477853185e-06, "loss": 0.01425373911857605, "step": 15650 }, { "epoch": 2.928011935844834, "grad_norm": 0.008023403584957123, "learning_rate": 5.956587059006373e-06, "loss": 0.008588857650756836, "step": 15700 }, { "epoch": 2.93733681462141, "grad_norm": 0.014545072801411152, "learning_rate": 5.904781640159562e-06, "loss": 0.009176114797592163, "step": 15750 }, { "epoch": 2.946661693397986, "grad_norm": 0.0036765779368579388, "learning_rate": 5.85297622131275e-06, "loss": 0.019455695152282716, "step": 15800 }, { "epoch": 2.9559865721745617, "grad_norm": 8.891608238220215, "learning_rate": 5.801170802465939e-06, "loss": 0.012102892398834228, "step": 15850 }, { "epoch": 2.9653114509511376, "grad_norm": 0.05219835415482521, "learning_rate": 5.749365383619127e-06, "loss": 0.009157007336616516, "step": 15900 }, { "epoch": 2.9746363297277134, "grad_norm": 0.003453275188803673, "learning_rate": 5.697559964772316e-06, "loss": 0.009056896567344666, "step": 15950 }, { "epoch": 2.9839612085042893, "grad_norm": 0.004969852045178413, "learning_rate": 5.645754545925505e-06, "loss": 0.012932000160217285, "step": 16000 }, { "epoch": 2.9932860872808655, "grad_norm": 2.0237090587615967, "learning_rate": 5.593949127078693e-06, "loss": 0.018866615295410158, "step": 16050 }, { "epoch": 3.0, "eval_accuracy": 0.9921177364024406, "eval_f1": 0.9189297124600639, "eval_loss": 0.04323037713766098, "eval_precision": 0.9109263657957245, "eval_recall": 0.9270749395648671, "eval_runtime": 7.1026, "eval_samples_per_second": 769.16, "eval_steps_per_second": 96.163, "step": 16086 }, { "epoch": 3.0026109660574414, "grad_norm": 0.006980204954743385, "learning_rate": 5.542143708231882e-06, "loss": 0.012530730962753296, "step": 16100 }, { "epoch": 3.011935844834017, "grad_norm": 0.008406821638345718, "learning_rate": 5.49033828938507e-06, "loss": 0.0016689696907997132, "step": 16150 }, { "epoch": 3.021260723610593, "grad_norm": 0.26228681206703186, "learning_rate": 5.438532870538259e-06, "loss": 0.007807348966598511, "step": 16200 }, { "epoch": 3.030585602387169, "grad_norm": 0.47371771931648254, "learning_rate": 5.386727451691447e-06, "loss": 0.006382474303245544, "step": 16250 }, { "epoch": 3.0399104811637447, "grad_norm": 0.0065447427332401276, "learning_rate": 5.334922032844636e-06, "loss": 0.004743200242519379, "step": 16300 }, { "epoch": 3.0492353599403206, "grad_norm": 0.008346166461706161, "learning_rate": 5.283116613997824e-06, "loss": 0.0055571597814559935, "step": 16350 }, { "epoch": 3.058560238716897, "grad_norm": 5.690232276916504, "learning_rate": 5.2313111951510135e-06, "loss": 0.00496139645576477, "step": 16400 }, { "epoch": 3.0678851174934727, "grad_norm": 0.3915584981441498, "learning_rate": 5.1795057763042025e-06, "loss": 0.0035722294449806215, "step": 16450 }, { "epoch": 3.0772099962700485, "grad_norm": 0.002272524405270815, "learning_rate": 5.1277003574573906e-06, "loss": 0.011130574941635132, "step": 16500 }, { "epoch": 3.0865348750466244, "grad_norm": 2.395972967147827, "learning_rate": 5.0758949386105795e-06, "loss": 0.003059500753879547, "step": 16550 }, { "epoch": 3.0958597538232002, "grad_norm": 0.004218028858304024, "learning_rate": 5.024089519763768e-06, "loss": 0.0016028760373592377, "step": 16600 }, { "epoch": 3.105184632599776, "grad_norm": 15.134767532348633, "learning_rate": 4.9722841009169565e-06, "loss": 0.008084517717361451, "step": 16650 }, { "epoch": 3.114509511376352, "grad_norm": 0.0018907383782789111, "learning_rate": 4.920478682070145e-06, "loss": 0.003314727246761322, "step": 16700 }, { "epoch": 3.123834390152928, "grad_norm": 0.0029481553938239813, "learning_rate": 4.8686732632233335e-06, "loss": 0.0049530166387557984, "step": 16750 }, { "epoch": 3.133159268929504, "grad_norm": 0.16056513786315918, "learning_rate": 4.8168678443765225e-06, "loss": 0.004786551296710968, "step": 16800 }, { "epoch": 3.14248414770608, "grad_norm": 0.2876565158367157, "learning_rate": 4.7650624255297106e-06, "loss": 0.0019748318195343018, "step": 16850 }, { "epoch": 3.1518090264826557, "grad_norm": 0.0028331561479717493, "learning_rate": 4.7132570066828995e-06, "loss": 0.002513662874698639, "step": 16900 }, { "epoch": 3.1611339052592315, "grad_norm": 0.006282527931034565, "learning_rate": 4.6614515878360884e-06, "loss": 0.01659904956817627, "step": 16950 }, { "epoch": 3.1704587840358074, "grad_norm": 0.007699803449213505, "learning_rate": 4.6096461689892765e-06, "loss": 0.00718508780002594, "step": 17000 }, { "epoch": 3.1797836628123832, "grad_norm": 0.00149145582690835, "learning_rate": 4.5578407501424655e-06, "loss": 0.001901312619447708, "step": 17050 }, { "epoch": 3.1891085415889595, "grad_norm": 0.019138796254992485, "learning_rate": 4.5060353312956535e-06, "loss": 0.01903280019760132, "step": 17100 }, { "epoch": 3.1984334203655354, "grad_norm": 0.544906497001648, "learning_rate": 4.4542299124488425e-06, "loss": 0.004833935499191284, "step": 17150 }, { "epoch": 3.207758299142111, "grad_norm": 0.009669867344200611, "learning_rate": 4.402424493602031e-06, "loss": 0.00512764036655426, "step": 17200 }, { "epoch": 3.217083177918687, "grad_norm": 0.01658560521900654, "learning_rate": 4.3506190747552195e-06, "loss": 0.0008115243166685104, "step": 17250 }, { "epoch": 3.226408056695263, "grad_norm": 0.024577626958489418, "learning_rate": 4.2988136559084084e-06, "loss": 0.0021865896880626677, "step": 17300 }, { "epoch": 3.2357329354718387, "grad_norm": 32.374088287353516, "learning_rate": 4.247008237061597e-06, "loss": 0.004616082906723023, "step": 17350 }, { "epoch": 3.2450578142484146, "grad_norm": 0.06743080914020538, "learning_rate": 4.1952028182147855e-06, "loss": 0.0010297740995883942, "step": 17400 }, { "epoch": 3.254382693024991, "grad_norm": 0.0024560948368161917, "learning_rate": 4.143397399367974e-06, "loss": 0.01637653708457947, "step": 17450 }, { "epoch": 3.2637075718015667, "grad_norm": 0.00366505840793252, "learning_rate": 4.0915919805211625e-06, "loss": 0.0007262816280126571, "step": 17500 }, { "epoch": 3.2730324505781425, "grad_norm": 0.02281450480222702, "learning_rate": 4.039786561674351e-06, "loss": 0.0034805700182914735, "step": 17550 }, { "epoch": 3.2823573293547184, "grad_norm": 0.0021532338578253984, "learning_rate": 3.98798114282754e-06, "loss": 0.005815493464469909, "step": 17600 }, { "epoch": 3.291682208131294, "grad_norm": 0.025134483352303505, "learning_rate": 3.9361757239807284e-06, "loss": 0.0013174866139888763, "step": 17650 }, { "epoch": 3.30100708690787, "grad_norm": 0.011261076666414738, "learning_rate": 3.884370305133917e-06, "loss": 0.002750571370124817, "step": 17700 }, { "epoch": 3.310331965684446, "grad_norm": 0.005523109342902899, "learning_rate": 3.832564886287106e-06, "loss": 0.006260217428207398, "step": 17750 }, { "epoch": 3.319656844461022, "grad_norm": 0.0017233422258868814, "learning_rate": 3.780759467440295e-06, "loss": 0.002834466993808746, "step": 17800 }, { "epoch": 3.328981723237598, "grad_norm": 0.3033665120601654, "learning_rate": 3.7289540485934833e-06, "loss": 0.0014140091836452485, "step": 17850 }, { "epoch": 3.338306602014174, "grad_norm": 0.007635418325662613, "learning_rate": 3.677148629746672e-06, "loss": 0.009239104390144349, "step": 17900 }, { "epoch": 3.3476314807907497, "grad_norm": 0.008077415637671947, "learning_rate": 3.6253432108998604e-06, "loss": 0.0028353652358055117, "step": 17950 }, { "epoch": 3.3569563595673255, "grad_norm": 0.0055144126527011395, "learning_rate": 3.573537792053049e-06, "loss": 0.01257444977760315, "step": 18000 }, { "epoch": 3.3662812383439014, "grad_norm": 0.10481590777635574, "learning_rate": 3.5217323732062374e-06, "loss": 0.008266312479972839, "step": 18050 }, { "epoch": 3.375606117120477, "grad_norm": 0.004028915427625179, "learning_rate": 3.469926954359426e-06, "loss": 0.00322272926568985, "step": 18100 }, { "epoch": 3.3849309958970535, "grad_norm": 0.007838011719286442, "learning_rate": 3.4181215355126153e-06, "loss": 0.0017492137849330902, "step": 18150 }, { "epoch": 3.3942558746736293, "grad_norm": 0.008134761825203896, "learning_rate": 3.3663161166658038e-06, "loss": 0.004277588129043579, "step": 18200 }, { "epoch": 3.403580753450205, "grad_norm": 1.130017638206482, "learning_rate": 3.3145106978189923e-06, "loss": 0.003342975378036499, "step": 18250 }, { "epoch": 3.412905632226781, "grad_norm": 0.0033679301850497723, "learning_rate": 3.262705278972181e-06, "loss": 0.005864649415016175, "step": 18300 }, { "epoch": 3.422230511003357, "grad_norm": 1.1952660083770752, "learning_rate": 3.2108998601253693e-06, "loss": 0.009021402597427368, "step": 18350 }, { "epoch": 3.4315553897799327, "grad_norm": 0.4340899884700775, "learning_rate": 3.159094441278558e-06, "loss": 0.0038458964228630065, "step": 18400 }, { "epoch": 3.4408802685565085, "grad_norm": 0.007966181263327599, "learning_rate": 3.1072890224317463e-06, "loss": 0.004797542989253997, "step": 18450 }, { "epoch": 3.450205147333085, "grad_norm": 0.0008151158690452576, "learning_rate": 3.055483603584935e-06, "loss": 0.008470645546913147, "step": 18500 }, { "epoch": 3.4595300261096606, "grad_norm": 0.0033519044518470764, "learning_rate": 3.003678184738124e-06, "loss": 0.004539164900779724, "step": 18550 }, { "epoch": 3.4688549048862365, "grad_norm": 0.043223973363637924, "learning_rate": 2.9518727658913127e-06, "loss": 0.0028054285049438476, "step": 18600 }, { "epoch": 3.4781797836628123, "grad_norm": 0.011569101363420486, "learning_rate": 2.9000673470445012e-06, "loss": 0.007232290506362915, "step": 18650 }, { "epoch": 3.487504662439388, "grad_norm": 0.007914524525403976, "learning_rate": 2.8482619281976897e-06, "loss": 0.0002942212298512459, "step": 18700 }, { "epoch": 3.496829541215964, "grad_norm": 0.0006849826313555241, "learning_rate": 2.7964565093508782e-06, "loss": 0.00410827487707138, "step": 18750 }, { "epoch": 3.50615441999254, "grad_norm": 0.0026959700044244528, "learning_rate": 2.7446510905040668e-06, "loss": 0.0011774758994579316, "step": 18800 }, { "epoch": 3.515479298769116, "grad_norm": 0.0012961579486727715, "learning_rate": 2.6928456716572553e-06, "loss": 0.0034612080454826354, "step": 18850 }, { "epoch": 3.524804177545692, "grad_norm": 0.06650816649198532, "learning_rate": 2.6410402528104438e-06, "loss": 0.013415820598602295, "step": 18900 }, { "epoch": 3.534129056322268, "grad_norm": 0.0007635413203388453, "learning_rate": 2.589234833963633e-06, "loss": 0.0015386410057544708, "step": 18950 }, { "epoch": 3.5434539350988437, "grad_norm": 0.001662875059992075, "learning_rate": 2.5374294151168216e-06, "loss": 0.002086118161678314, "step": 19000 }, { "epoch": 3.5527788138754195, "grad_norm": 0.0031695894431322813, "learning_rate": 2.48562399627001e-06, "loss": 0.0013567799329757691, "step": 19050 }, { "epoch": 3.562103692651996, "grad_norm": 0.019759224727749825, "learning_rate": 2.4338185774231987e-06, "loss": 0.008338750004768372, "step": 19100 }, { "epoch": 3.571428571428571, "grad_norm": 0.01757100783288479, "learning_rate": 2.382013158576387e-06, "loss": 0.0016921743750572204, "step": 19150 }, { "epoch": 3.5807534502051475, "grad_norm": 2.4479777812957764, "learning_rate": 2.3302077397295757e-06, "loss": 0.011807719469070435, "step": 19200 }, { "epoch": 3.5900783289817233, "grad_norm": 12.382244110107422, "learning_rate": 2.2784023208827646e-06, "loss": 0.005563015937805176, "step": 19250 }, { "epoch": 3.599403207758299, "grad_norm": 0.012547838501632214, "learning_rate": 2.226596902035953e-06, "loss": 0.00420228123664856, "step": 19300 }, { "epoch": 3.608728086534875, "grad_norm": 0.009670069441199303, "learning_rate": 2.1747914831891417e-06, "loss": 0.005122922658920288, "step": 19350 }, { "epoch": 3.618052965311451, "grad_norm": 0.3705468773841858, "learning_rate": 2.12298606434233e-06, "loss": 0.0016570650041103363, "step": 19400 }, { "epoch": 3.627377844088027, "grad_norm": 0.03667959198355675, "learning_rate": 2.071180645495519e-06, "loss": 0.008318853378295899, "step": 19450 }, { "epoch": 3.6367027228646025, "grad_norm": 0.026855269446969032, "learning_rate": 2.0193752266487076e-06, "loss": 0.011855947971343993, "step": 19500 }, { "epoch": 3.646027601641179, "grad_norm": 0.004127690568566322, "learning_rate": 1.967569807801896e-06, "loss": 0.0037176933884620665, "step": 19550 }, { "epoch": 3.6553524804177546, "grad_norm": 0.033966220915317535, "learning_rate": 1.9157643889550846e-06, "loss": 0.0034821495413780213, "step": 19600 }, { "epoch": 3.6646773591943305, "grad_norm": 0.008595237508416176, "learning_rate": 1.8639589701082736e-06, "loss": 0.003079477548599243, "step": 19650 }, { "epoch": 3.6740022379709063, "grad_norm": 0.18409447371959686, "learning_rate": 1.812153551261462e-06, "loss": 0.0028409546613693236, "step": 19700 }, { "epoch": 3.683327116747482, "grad_norm": 0.009265055879950523, "learning_rate": 1.7603481324146506e-06, "loss": 0.001990189254283905, "step": 19750 }, { "epoch": 3.6926519955240584, "grad_norm": 0.004075230099260807, "learning_rate": 1.7085427135678393e-06, "loss": 0.009914104342460633, "step": 19800 }, { "epoch": 3.701976874300634, "grad_norm": 0.012730306945741177, "learning_rate": 1.656737294721028e-06, "loss": 0.00510865867137909, "step": 19850 }, { "epoch": 3.71130175307721, "grad_norm": 0.9975103735923767, "learning_rate": 1.6049318758742165e-06, "loss": 0.00289763867855072, "step": 19900 }, { "epoch": 3.720626631853786, "grad_norm": 0.14549227058887482, "learning_rate": 1.553126457027405e-06, "loss": 0.014574718475341798, "step": 19950 }, { "epoch": 3.729951510630362, "grad_norm": 0.002959158504381776, "learning_rate": 1.5013210381805938e-06, "loss": 0.0035466670989990233, "step": 20000 }, { "epoch": 3.7392763894069376, "grad_norm": 0.044310204684734344, "learning_rate": 1.4495156193337825e-06, "loss": 0.005526635646820068, "step": 20050 }, { "epoch": 3.7486012681835135, "grad_norm": 0.06063301861286163, "learning_rate": 1.397710200486971e-06, "loss": 0.01945833921432495, "step": 20100 }, { "epoch": 3.7579261469600898, "grad_norm": 1.9790464639663696, "learning_rate": 1.3459047816401597e-06, "loss": 0.009157074689865112, "step": 20150 }, { "epoch": 3.767251025736665, "grad_norm": 0.002332707168534398, "learning_rate": 1.2940993627933483e-06, "loss": 0.003599865138530731, "step": 20200 }, { "epoch": 3.7765759045132414, "grad_norm": 0.007876844145357609, "learning_rate": 1.242293943946537e-06, "loss": 0.002118881195783615, "step": 20250 }, { "epoch": 3.7859007832898173, "grad_norm": 0.06033371388912201, "learning_rate": 1.1904885250997255e-06, "loss": 0.013430379629135132, "step": 20300 }, { "epoch": 3.795225662066393, "grad_norm": 0.007944832555949688, "learning_rate": 1.1386831062529142e-06, "loss": 0.011043739318847657, "step": 20350 }, { "epoch": 3.804550540842969, "grad_norm": 0.009239411912858486, "learning_rate": 1.0868776874061027e-06, "loss": 0.007748922109603882, "step": 20400 }, { "epoch": 3.813875419619545, "grad_norm": 0.0014006602577865124, "learning_rate": 1.0350722685592914e-06, "loss": 0.006385021805763245, "step": 20450 }, { "epoch": 3.823200298396121, "grad_norm": 1.487459421157837, "learning_rate": 9.8326684971248e-07, "loss": 0.009500337243080139, "step": 20500 }, { "epoch": 3.832525177172697, "grad_norm": 0.029605276882648468, "learning_rate": 9.314614308656686e-07, "loss": 0.003571970164775848, "step": 20550 }, { "epoch": 3.8418500559492728, "grad_norm": 0.0038495927583426237, "learning_rate": 8.796560120188573e-07, "loss": 0.006721885204315186, "step": 20600 }, { "epoch": 3.8511749347258486, "grad_norm": 0.015677325427532196, "learning_rate": 8.278505931720458e-07, "loss": 0.0021590781211853027, "step": 20650 }, { "epoch": 3.8604998135024244, "grad_norm": 0.00773986428976059, "learning_rate": 7.760451743252345e-07, "loss": 0.0072018647193908695, "step": 20700 }, { "epoch": 3.8698246922790003, "grad_norm": 0.006910277064889669, "learning_rate": 7.24239755478423e-07, "loss": 0.0017287896573543549, "step": 20750 }, { "epoch": 3.879149571055576, "grad_norm": 0.16674445569515228, "learning_rate": 6.724343366316118e-07, "loss": 0.003435662090778351, "step": 20800 }, { "epoch": 3.8884744498321524, "grad_norm": 0.11410090327262878, "learning_rate": 6.206289177848004e-07, "loss": 0.0015013472735881806, "step": 20850 }, { "epoch": 3.8977993286087282, "grad_norm": 0.1628509759902954, "learning_rate": 5.68823498937989e-07, "loss": 0.0035611391067504883, "step": 20900 }, { "epoch": 3.907124207385304, "grad_norm": 0.22206370532512665, "learning_rate": 5.170180800911776e-07, "loss": 0.005973511338233948, "step": 20950 }, { "epoch": 3.91644908616188, "grad_norm": 0.285854309797287, "learning_rate": 4.6521266124436624e-07, "loss": 0.002481496632099152, "step": 21000 }, { "epoch": 3.9257739649384558, "grad_norm": 0.0018572107655927539, "learning_rate": 4.1340724239755486e-07, "loss": 0.0034265148639678954, "step": 21050 }, { "epoch": 3.9350988437150316, "grad_norm": 0.00111959979403764, "learning_rate": 3.6160182355074347e-07, "loss": 0.003972585201263428, "step": 21100 }, { "epoch": 3.9444237224916074, "grad_norm": 0.006579425185918808, "learning_rate": 3.0979640470393204e-07, "loss": 0.007484051585197449, "step": 21150 }, { "epoch": 3.9537486012681837, "grad_norm": 0.0016149668954312801, "learning_rate": 2.5799098585712066e-07, "loss": 0.005609593391418457, "step": 21200 }, { "epoch": 3.9630734800447596, "grad_norm": 8.504377365112305, "learning_rate": 2.061855670103093e-07, "loss": 0.00788326621055603, "step": 21250 }, { "epoch": 3.9723983588213354, "grad_norm": 0.29231831431388855, "learning_rate": 1.5438014816349792e-07, "loss": 0.0017250549793243408, "step": 21300 }, { "epoch": 3.9817232375979112, "grad_norm": 0.006406121421605349, "learning_rate": 1.0257472931668653e-07, "loss": 0.0028238424658775328, "step": 21350 }, { "epoch": 3.991048116374487, "grad_norm": 0.005788094364106655, "learning_rate": 5.076931046987516e-08, "loss": 0.0027461829781532288, "step": 21400 }, { "epoch": 4.0, "eval_accuracy": 0.9926021944640846, "eval_f1": 0.9201277955271566, "eval_loss": 0.045499056577682495, "eval_precision": 0.9121140142517815, "eval_recall": 0.9282836422240129, "eval_runtime": 7.0731, "eval_samples_per_second": 772.358, "eval_steps_per_second": 96.562, "step": 21448 }, { "epoch": 4.0, "step": 21448, "total_flos": 2300195917669620.0, "train_loss": 0.0979897177003356, "train_runtime": 1420.7755, "train_samples_per_second": 120.762, "train_steps_per_second": 15.096 }, { "epoch": 4.0, "step": 21448, "validation_accuracy": 0.9926283813863357, "validation_f1": 0.9199760526840951, "validation_loss": 0.04548870399594307, "validation_precision": 0.911427441676552, "validation_recall": 0.9286865431103949, "validation_runtime": 6.1777, "validation_samples_per_second": 884.309, "validation_steps_per_second": 110.559 }, { "epoch": 4.0, "step": 21448, "test_accuracy": 0.9926283813863357, "test_f1": 0.9199760526840951, "test_loss": 0.04548870399594307, "test_precision": 0.911427441676552, "test_recall": 0.9286865431103949, "test_runtime": 6.2499, "test_samples_per_second": 874.087, "test_steps_per_second": 109.281 } ], "logging_steps": 50, "max_steps": 21448, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2300195917669620.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }