kpfbert-kdpii / trainer_state.json
townboy's picture
Upload KDPII fine-tuned model
c91ef0b verified
{
"best_global_step": 21448,
"best_metric": 0.9201277955271566,
"best_model_checkpoint": "outputs\\kpf-kdpii-ner\\checkpoint-21448",
"epoch": 4.0,
"eval_steps": 500,
"global_step": 21448,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.009324878776575904,
"grad_norm": 4.206214904785156,
"learning_rate": 4.568764568764569e-07,
"loss": 4.162920837402344,
"step": 50
},
{
"epoch": 0.01864975755315181,
"grad_norm": 4.167344570159912,
"learning_rate": 9.230769230769232e-07,
"loss": 4.108631591796875,
"step": 100
},
{
"epoch": 0.027974636329727715,
"grad_norm": 6.085664749145508,
"learning_rate": 1.3892773892773895e-06,
"loss": 3.964860534667969,
"step": 150
},
{
"epoch": 0.03729951510630362,
"grad_norm": 9.482864379882812,
"learning_rate": 1.8554778554778559e-06,
"loss": 3.5463021850585936,
"step": 200
},
{
"epoch": 0.04662439388287952,
"grad_norm": 10.59736156463623,
"learning_rate": 2.321678321678322e-06,
"loss": 2.1147555541992187,
"step": 250
},
{
"epoch": 0.05594927265945543,
"grad_norm": 6.983637809753418,
"learning_rate": 2.7878787878787885e-06,
"loss": 0.868929214477539,
"step": 300
},
{
"epoch": 0.06527415143603134,
"grad_norm": 5.8941168785095215,
"learning_rate": 3.254079254079254e-06,
"loss": 0.6754582214355469,
"step": 350
},
{
"epoch": 0.07459903021260723,
"grad_norm": 6.817300319671631,
"learning_rate": 3.7202797202797207e-06,
"loss": 0.6864476013183594,
"step": 400
},
{
"epoch": 0.08392390898918314,
"grad_norm": 3.412003517150879,
"learning_rate": 4.186480186480187e-06,
"loss": 0.6888908386230469,
"step": 450
},
{
"epoch": 0.09324878776575904,
"grad_norm": 4.4841694831848145,
"learning_rate": 4.652680652680653e-06,
"loss": 0.6424143981933593,
"step": 500
},
{
"epoch": 0.10257366654233495,
"grad_norm": 0.5611337423324585,
"learning_rate": 5.118881118881119e-06,
"loss": 0.6713462066650391,
"step": 550
},
{
"epoch": 0.11189854531891086,
"grad_norm": 2.851097583770752,
"learning_rate": 5.585081585081585e-06,
"loss": 0.557415771484375,
"step": 600
},
{
"epoch": 0.12122342409548675,
"grad_norm": 1.917966604232788,
"learning_rate": 6.051282051282051e-06,
"loss": 0.5822576141357422,
"step": 650
},
{
"epoch": 0.13054830287206268,
"grad_norm": 0.473528116941452,
"learning_rate": 6.517482517482518e-06,
"loss": 0.5558326721191407,
"step": 700
},
{
"epoch": 0.13987318164863857,
"grad_norm": 5.384398460388184,
"learning_rate": 6.983682983682984e-06,
"loss": 0.5160323715209961,
"step": 750
},
{
"epoch": 0.14919806042521447,
"grad_norm": 3.535067558288574,
"learning_rate": 7.44988344988345e-06,
"loss": 0.5048127365112305,
"step": 800
},
{
"epoch": 0.15852293920179036,
"grad_norm": 2.199418306350708,
"learning_rate": 7.916083916083917e-06,
"loss": 0.5268861770629882,
"step": 850
},
{
"epoch": 0.1678478179783663,
"grad_norm": 5.3116774559021,
"learning_rate": 8.382284382284382e-06,
"loss": 0.504549446105957,
"step": 900
},
{
"epoch": 0.17717269675494218,
"grad_norm": 5.913183689117432,
"learning_rate": 8.84848484848485e-06,
"loss": 0.4870978546142578,
"step": 950
},
{
"epoch": 0.18649757553151808,
"grad_norm": 1.0376594066619873,
"learning_rate": 9.314685314685316e-06,
"loss": 0.4489225769042969,
"step": 1000
},
{
"epoch": 0.195822454308094,
"grad_norm": 6.973942279815674,
"learning_rate": 9.780885780885782e-06,
"loss": 0.3977284240722656,
"step": 1050
},
{
"epoch": 0.2051473330846699,
"grad_norm": 3.3436081409454346,
"learning_rate": 1.0247086247086249e-05,
"loss": 0.38577865600585937,
"step": 1100
},
{
"epoch": 0.2144722118612458,
"grad_norm": 11.863536834716797,
"learning_rate": 1.0713286713286714e-05,
"loss": 0.3806105422973633,
"step": 1150
},
{
"epoch": 0.22379709063782172,
"grad_norm": 5.786470413208008,
"learning_rate": 1.117948717948718e-05,
"loss": 0.37290851593017577,
"step": 1200
},
{
"epoch": 0.2331219694143976,
"grad_norm": 3.0955147743225098,
"learning_rate": 1.1645687645687646e-05,
"loss": 0.33695747375488283,
"step": 1250
},
{
"epoch": 0.2424468481909735,
"grad_norm": 10.411843299865723,
"learning_rate": 1.2111888111888113e-05,
"loss": 0.3169963836669922,
"step": 1300
},
{
"epoch": 0.2517717269675494,
"grad_norm": 1.3671921491622925,
"learning_rate": 1.2578088578088578e-05,
"loss": 0.274707088470459,
"step": 1350
},
{
"epoch": 0.26109660574412535,
"grad_norm": 0.8266241550445557,
"learning_rate": 1.3044289044289045e-05,
"loss": 0.24858610153198243,
"step": 1400
},
{
"epoch": 0.27042148452070125,
"grad_norm": 3.4048688411712646,
"learning_rate": 1.351048951048951e-05,
"loss": 0.2767606163024902,
"step": 1450
},
{
"epoch": 0.27974636329727715,
"grad_norm": 5.141544342041016,
"learning_rate": 1.3976689976689979e-05,
"loss": 0.23059135437011719,
"step": 1500
},
{
"epoch": 0.28907124207385304,
"grad_norm": 2.9960217475891113,
"learning_rate": 1.4442890442890444e-05,
"loss": 0.2576522636413574,
"step": 1550
},
{
"epoch": 0.29839612085042894,
"grad_norm": 7.788145542144775,
"learning_rate": 1.4909090909090911e-05,
"loss": 0.21580177307128906,
"step": 1600
},
{
"epoch": 0.30772099962700483,
"grad_norm": 1.1988259553909302,
"learning_rate": 1.5375291375291378e-05,
"loss": 0.20308712005615234,
"step": 1650
},
{
"epoch": 0.3170458784035807,
"grad_norm": 1.3631523847579956,
"learning_rate": 1.5841491841491843e-05,
"loss": 0.17964347839355468,
"step": 1700
},
{
"epoch": 0.3263707571801567,
"grad_norm": 0.7174279689788818,
"learning_rate": 1.630769230769231e-05,
"loss": 0.18456392288208007,
"step": 1750
},
{
"epoch": 0.3356956359567326,
"grad_norm": 2.560981273651123,
"learning_rate": 1.6773892773892774e-05,
"loss": 0.16574619293212892,
"step": 1800
},
{
"epoch": 0.34502051473330847,
"grad_norm": 0.93900465965271,
"learning_rate": 1.724009324009324e-05,
"loss": 0.16731924057006836,
"step": 1850
},
{
"epoch": 0.35434539350988437,
"grad_norm": 3.983893394470215,
"learning_rate": 1.7706293706293708e-05,
"loss": 0.14913288116455078,
"step": 1900
},
{
"epoch": 0.36367027228646026,
"grad_norm": 3.581357479095459,
"learning_rate": 1.8172494172494176e-05,
"loss": 0.1477263832092285,
"step": 1950
},
{
"epoch": 0.37299515106303616,
"grad_norm": 1.5302927494049072,
"learning_rate": 1.8638694638694642e-05,
"loss": 0.139080171585083,
"step": 2000
},
{
"epoch": 0.3823200298396121,
"grad_norm": 4.7187910079956055,
"learning_rate": 1.9104895104895107e-05,
"loss": 0.1504351806640625,
"step": 2050
},
{
"epoch": 0.391644908616188,
"grad_norm": 0.5396754145622253,
"learning_rate": 1.9571095571095572e-05,
"loss": 0.12960749626159668,
"step": 2100
},
{
"epoch": 0.4009697873927639,
"grad_norm": 25.248533248901367,
"learning_rate": 1.9995855566492257e-05,
"loss": 0.14916876792907716,
"step": 2150
},
{
"epoch": 0.4102946661693398,
"grad_norm": 6.426814079284668,
"learning_rate": 1.9944050147645447e-05,
"loss": 0.1148387622833252,
"step": 2200
},
{
"epoch": 0.4196195449459157,
"grad_norm": 2.097109317779541,
"learning_rate": 1.9892244728798633e-05,
"loss": 0.11595455169677735,
"step": 2250
},
{
"epoch": 0.4289444237224916,
"grad_norm": 1.893579125404358,
"learning_rate": 1.9840439309951823e-05,
"loss": 0.10183592796325684,
"step": 2300
},
{
"epoch": 0.43826930249906754,
"grad_norm": 5.724792003631592,
"learning_rate": 1.9788633891105013e-05,
"loss": 0.09552323341369628,
"step": 2350
},
{
"epoch": 0.44759418127564343,
"grad_norm": 1.2438207864761353,
"learning_rate": 1.97368284722582e-05,
"loss": 0.0971086597442627,
"step": 2400
},
{
"epoch": 0.45691906005221933,
"grad_norm": 0.3428623676300049,
"learning_rate": 1.968502305341139e-05,
"loss": 0.09295537948608398,
"step": 2450
},
{
"epoch": 0.4662439388287952,
"grad_norm": 1.6153521537780762,
"learning_rate": 1.9633217634564575e-05,
"loss": 0.12522640228271484,
"step": 2500
},
{
"epoch": 0.4755688176053711,
"grad_norm": 2.685026168823242,
"learning_rate": 1.9581412215717765e-05,
"loss": 0.08630707740783691,
"step": 2550
},
{
"epoch": 0.484893696381947,
"grad_norm": 0.31434252858161926,
"learning_rate": 1.9529606796870955e-05,
"loss": 0.10333613395690917,
"step": 2600
},
{
"epoch": 0.49421857515852297,
"grad_norm": 2.9683053493499756,
"learning_rate": 1.9477801378024144e-05,
"loss": 0.07084932804107666,
"step": 2650
},
{
"epoch": 0.5035434539350988,
"grad_norm": 3.0713419914245605,
"learning_rate": 1.942599595917733e-05,
"loss": 0.08680294990539551,
"step": 2700
},
{
"epoch": 0.5128683327116748,
"grad_norm": 3.3729348182678223,
"learning_rate": 1.937419054033052e-05,
"loss": 0.07697622299194336,
"step": 2750
},
{
"epoch": 0.5221932114882507,
"grad_norm": 24.873640060424805,
"learning_rate": 1.932238512148371e-05,
"loss": 0.10056709289550782,
"step": 2800
},
{
"epoch": 0.5315180902648265,
"grad_norm": 0.8206455707550049,
"learning_rate": 1.9270579702636897e-05,
"loss": 0.09658415794372559,
"step": 2850
},
{
"epoch": 0.5408429690414025,
"grad_norm": 11.821130752563477,
"learning_rate": 1.9218774283790087e-05,
"loss": 0.06848039150238037,
"step": 2900
},
{
"epoch": 0.5501678478179783,
"grad_norm": 0.6246572136878967,
"learning_rate": 1.9166968864943273e-05,
"loss": 0.08011377334594727,
"step": 2950
},
{
"epoch": 0.5594927265945543,
"grad_norm": 3.210092306137085,
"learning_rate": 1.9115163446096463e-05,
"loss": 0.10572279930114746,
"step": 3000
},
{
"epoch": 0.5688176053711301,
"grad_norm": 3.658480644226074,
"learning_rate": 1.9063358027249653e-05,
"loss": 0.0694641637802124,
"step": 3050
},
{
"epoch": 0.5781424841477061,
"grad_norm": 0.8585368394851685,
"learning_rate": 1.9011552608402842e-05,
"loss": 0.07078958988189697,
"step": 3100
},
{
"epoch": 0.587467362924282,
"grad_norm": 0.5600335001945496,
"learning_rate": 1.895974718955603e-05,
"loss": 0.07252558708190918,
"step": 3150
},
{
"epoch": 0.5967922417008579,
"grad_norm": 4.424919605255127,
"learning_rate": 1.890794177070922e-05,
"loss": 0.08730278968811035,
"step": 3200
},
{
"epoch": 0.6061171204774338,
"grad_norm": 4.6426849365234375,
"learning_rate": 1.8856136351862405e-05,
"loss": 0.050492286682128906,
"step": 3250
},
{
"epoch": 0.6154419992540097,
"grad_norm": 4.4583210945129395,
"learning_rate": 1.8804330933015595e-05,
"loss": 0.054503369331359866,
"step": 3300
},
{
"epoch": 0.6247668780305856,
"grad_norm": 0.5928723812103271,
"learning_rate": 1.8752525514168784e-05,
"loss": 0.0773204231262207,
"step": 3350
},
{
"epoch": 0.6340917568071615,
"grad_norm": 0.8700105547904968,
"learning_rate": 1.870072009532197e-05,
"loss": 0.08818217277526856,
"step": 3400
},
{
"epoch": 0.6434166355837374,
"grad_norm": 6.234158515930176,
"learning_rate": 1.864891467647516e-05,
"loss": 0.05500625610351562,
"step": 3450
},
{
"epoch": 0.6527415143603134,
"grad_norm": 0.5930687785148621,
"learning_rate": 1.859710925762835e-05,
"loss": 0.051360769271850584,
"step": 3500
},
{
"epoch": 0.6620663931368892,
"grad_norm": 0.048168476670980453,
"learning_rate": 1.854530383878154e-05,
"loss": 0.07871677875518798,
"step": 3550
},
{
"epoch": 0.6713912719134651,
"grad_norm": 0.26890629529953003,
"learning_rate": 1.8493498419934727e-05,
"loss": 0.03978243350982666,
"step": 3600
},
{
"epoch": 0.680716150690041,
"grad_norm": 1.0152816772460938,
"learning_rate": 1.8441693001087916e-05,
"loss": 0.06742914199829102,
"step": 3650
},
{
"epoch": 0.6900410294666169,
"grad_norm": 5.40765905380249,
"learning_rate": 1.8389887582241103e-05,
"loss": 0.04563611030578613,
"step": 3700
},
{
"epoch": 0.6993659082431929,
"grad_norm": 9.407204627990723,
"learning_rate": 1.8338082163394293e-05,
"loss": 0.06598632335662842,
"step": 3750
},
{
"epoch": 0.7086907870197687,
"grad_norm": 1.0526869297027588,
"learning_rate": 1.8286276744547482e-05,
"loss": 0.06473824024200439,
"step": 3800
},
{
"epoch": 0.7180156657963447,
"grad_norm": 5.696482181549072,
"learning_rate": 1.823447132570067e-05,
"loss": 0.0610739803314209,
"step": 3850
},
{
"epoch": 0.7273405445729205,
"grad_norm": 0.10160894691944122,
"learning_rate": 1.818266590685386e-05,
"loss": 0.05340108394622803,
"step": 3900
},
{
"epoch": 0.7366654233494965,
"grad_norm": 3.2599477767944336,
"learning_rate": 1.813086048800705e-05,
"loss": 0.06807507038116455,
"step": 3950
},
{
"epoch": 0.7459903021260723,
"grad_norm": 1.383055329322815,
"learning_rate": 1.8079055069160235e-05,
"loss": 0.058188986778259275,
"step": 4000
},
{
"epoch": 0.7553151809026483,
"grad_norm": 6.310545444488525,
"learning_rate": 1.8027249650313424e-05,
"loss": 0.06925914287567139,
"step": 4050
},
{
"epoch": 0.7646400596792242,
"grad_norm": 2.753561496734619,
"learning_rate": 1.797544423146661e-05,
"loss": 0.06061097145080566,
"step": 4100
},
{
"epoch": 0.7739649384558001,
"grad_norm": 0.06244755908846855,
"learning_rate": 1.79236388126198e-05,
"loss": 0.05539895057678223,
"step": 4150
},
{
"epoch": 0.783289817232376,
"grad_norm": 1.5955125093460083,
"learning_rate": 1.787183339377299e-05,
"loss": 0.04949520111083985,
"step": 4200
},
{
"epoch": 0.7926146960089518,
"grad_norm": 0.15867096185684204,
"learning_rate": 1.782002797492618e-05,
"loss": 0.04355106830596924,
"step": 4250
},
{
"epoch": 0.8019395747855278,
"grad_norm": 0.03898247703909874,
"learning_rate": 1.7768222556079367e-05,
"loss": 0.06238871097564697,
"step": 4300
},
{
"epoch": 0.8112644535621036,
"grad_norm": 0.10622036457061768,
"learning_rate": 1.7716417137232556e-05,
"loss": 0.06471785068511964,
"step": 4350
},
{
"epoch": 0.8205893323386796,
"grad_norm": 2.5175602436065674,
"learning_rate": 1.7664611718385746e-05,
"loss": 0.04465628147125244,
"step": 4400
},
{
"epoch": 0.8299142111152555,
"grad_norm": 0.6827256679534912,
"learning_rate": 1.7612806299538933e-05,
"loss": 0.05508995056152344,
"step": 4450
},
{
"epoch": 0.8392390898918314,
"grad_norm": 4.929401397705078,
"learning_rate": 1.7561000880692122e-05,
"loss": 0.028713507652282713,
"step": 4500
},
{
"epoch": 0.8485639686684073,
"grad_norm": 3.8355817794799805,
"learning_rate": 1.750919546184531e-05,
"loss": 0.054467902183532715,
"step": 4550
},
{
"epoch": 0.8578888474449832,
"grad_norm": 0.07267450541257858,
"learning_rate": 1.74573900429985e-05,
"loss": 0.05773499965667725,
"step": 4600
},
{
"epoch": 0.8672137262215591,
"grad_norm": 2.4586944580078125,
"learning_rate": 1.740558462415169e-05,
"loss": 0.06556248664855957,
"step": 4650
},
{
"epoch": 0.8765386049981351,
"grad_norm": 4.859276294708252,
"learning_rate": 1.7353779205304878e-05,
"loss": 0.06336853981018066,
"step": 4700
},
{
"epoch": 0.8858634837747109,
"grad_norm": 0.44299831986427307,
"learning_rate": 1.7301973786458065e-05,
"loss": 0.05126949310302734,
"step": 4750
},
{
"epoch": 0.8951883625512869,
"grad_norm": 5.093299865722656,
"learning_rate": 1.7250168367611254e-05,
"loss": 0.05324135303497315,
"step": 4800
},
{
"epoch": 0.9045132413278627,
"grad_norm": 1.6905597448349,
"learning_rate": 1.719836294876444e-05,
"loss": 0.048749656677246095,
"step": 4850
},
{
"epoch": 0.9138381201044387,
"grad_norm": 0.30517128109931946,
"learning_rate": 1.714655752991763e-05,
"loss": 0.05433460712432861,
"step": 4900
},
{
"epoch": 0.9231629988810145,
"grad_norm": 0.4588942527770996,
"learning_rate": 1.709475211107082e-05,
"loss": 0.04207270622253418,
"step": 4950
},
{
"epoch": 0.9324878776575904,
"grad_norm": 0.036567509174346924,
"learning_rate": 1.7042946692224007e-05,
"loss": 0.05857636451721191,
"step": 5000
},
{
"epoch": 0.9418127564341664,
"grad_norm": 3.270030975341797,
"learning_rate": 1.6991141273377196e-05,
"loss": 0.05915598869323731,
"step": 5050
},
{
"epoch": 0.9511376352107422,
"grad_norm": 6.163786888122559,
"learning_rate": 1.6939335854530386e-05,
"loss": 0.051976222991943356,
"step": 5100
},
{
"epoch": 0.9604625139873182,
"grad_norm": 0.16877496242523193,
"learning_rate": 1.6887530435683576e-05,
"loss": 0.051587677001953124,
"step": 5150
},
{
"epoch": 0.969787392763894,
"grad_norm": 0.4458121657371521,
"learning_rate": 1.6835725016836762e-05,
"loss": 0.037312333583831785,
"step": 5200
},
{
"epoch": 0.97911227154047,
"grad_norm": 0.01349574513733387,
"learning_rate": 1.6783919597989952e-05,
"loss": 0.04749881267547607,
"step": 5250
},
{
"epoch": 0.9884371503170459,
"grad_norm": 0.6714735627174377,
"learning_rate": 1.673211417914314e-05,
"loss": 0.050176200866699217,
"step": 5300
},
{
"epoch": 0.9977620290936218,
"grad_norm": 11.990230560302734,
"learning_rate": 1.668030876029633e-05,
"loss": 0.07070876598358154,
"step": 5350
},
{
"epoch": 1.0,
"eval_accuracy": 0.9874433707806322,
"eval_f1": 0.8682108626198082,
"eval_loss": 0.050337210297584534,
"eval_precision": 0.8606492478226445,
"eval_recall": 0.8759065269943593,
"eval_runtime": 27.5818,
"eval_samples_per_second": 198.065,
"eval_steps_per_second": 24.763,
"step": 5362
},
{
"epoch": 1.0070869078701976,
"grad_norm": 0.7046685814857483,
"learning_rate": 1.6628503341449518e-05,
"loss": 0.037335155010223386,
"step": 5400
},
{
"epoch": 1.0164117866467737,
"grad_norm": 2.07792592048645,
"learning_rate": 1.6576697922602705e-05,
"loss": 0.03664821147918701,
"step": 5450
},
{
"epoch": 1.0257366654233495,
"grad_norm": 1.8149992227554321,
"learning_rate": 1.6524892503755894e-05,
"loss": 0.03763864040374756,
"step": 5500
},
{
"epoch": 1.0350615441999254,
"grad_norm": 0.8814394474029541,
"learning_rate": 1.6473087084909084e-05,
"loss": 0.0462725305557251,
"step": 5550
},
{
"epoch": 1.0443864229765012,
"grad_norm": 8.331986427307129,
"learning_rate": 1.642128166606227e-05,
"loss": 0.025801122188568115,
"step": 5600
},
{
"epoch": 1.0537113017530773,
"grad_norm": 0.5653894543647766,
"learning_rate": 1.636947624721546e-05,
"loss": 0.0302036452293396,
"step": 5650
},
{
"epoch": 1.063036180529653,
"grad_norm": 0.1264486312866211,
"learning_rate": 1.6317670828368647e-05,
"loss": 0.03399224281311035,
"step": 5700
},
{
"epoch": 1.072361059306229,
"grad_norm": 1.2637239694595337,
"learning_rate": 1.6265865409521836e-05,
"loss": 0.04418774604797363,
"step": 5750
},
{
"epoch": 1.081685938082805,
"grad_norm": 5.040623188018799,
"learning_rate": 1.6214059990675026e-05,
"loss": 0.02509807586669922,
"step": 5800
},
{
"epoch": 1.0910108168593808,
"grad_norm": 0.03714745491743088,
"learning_rate": 1.6162254571828216e-05,
"loss": 0.026492388248443605,
"step": 5850
},
{
"epoch": 1.1003356956359567,
"grad_norm": 0.7756729125976562,
"learning_rate": 1.6110449152981402e-05,
"loss": 0.02965877056121826,
"step": 5900
},
{
"epoch": 1.1096605744125327,
"grad_norm": 0.217277392745018,
"learning_rate": 1.6058643734134592e-05,
"loss": 0.022216553688049315,
"step": 5950
},
{
"epoch": 1.1189854531891086,
"grad_norm": 4.127126216888428,
"learning_rate": 1.6006838315287782e-05,
"loss": 0.03008180618286133,
"step": 6000
},
{
"epoch": 1.1283103319656844,
"grad_norm": 0.01144993957132101,
"learning_rate": 1.595503289644097e-05,
"loss": 0.04714715957641601,
"step": 6050
},
{
"epoch": 1.1376352107422603,
"grad_norm": 0.037526026368141174,
"learning_rate": 1.5903227477594158e-05,
"loss": 0.04247360706329346,
"step": 6100
},
{
"epoch": 1.1469600895188363,
"grad_norm": 0.9934174418449402,
"learning_rate": 1.5851422058747345e-05,
"loss": 0.02983764886856079,
"step": 6150
},
{
"epoch": 1.1562849682954122,
"grad_norm": 0.4428967237472534,
"learning_rate": 1.5799616639900534e-05,
"loss": 0.027351632118225097,
"step": 6200
},
{
"epoch": 1.165609847071988,
"grad_norm": 0.05002899840474129,
"learning_rate": 1.5747811221053724e-05,
"loss": 0.02518010139465332,
"step": 6250
},
{
"epoch": 1.174934725848564,
"grad_norm": 0.18001802265644073,
"learning_rate": 1.5696005802206914e-05,
"loss": 0.040030746459960936,
"step": 6300
},
{
"epoch": 1.18425960462514,
"grad_norm": 0.21795284748077393,
"learning_rate": 1.56442003833601e-05,
"loss": 0.03228116512298584,
"step": 6350
},
{
"epoch": 1.1935844834017157,
"grad_norm": 0.33233147859573364,
"learning_rate": 1.559239496451329e-05,
"loss": 0.04077389717102051,
"step": 6400
},
{
"epoch": 1.2029093621782916,
"grad_norm": 0.07459854334592819,
"learning_rate": 1.5540589545666476e-05,
"loss": 0.035812277793884274,
"step": 6450
},
{
"epoch": 1.2122342409548676,
"grad_norm": 0.03117297776043415,
"learning_rate": 1.5488784126819666e-05,
"loss": 0.043056426048278806,
"step": 6500
},
{
"epoch": 1.2215591197314435,
"grad_norm": 2.1351895332336426,
"learning_rate": 1.5436978707972856e-05,
"loss": 0.026498048305511473,
"step": 6550
},
{
"epoch": 1.2308839985080193,
"grad_norm": 0.1372031569480896,
"learning_rate": 1.5385173289126042e-05,
"loss": 0.03637035131454468,
"step": 6600
},
{
"epoch": 1.2402088772845954,
"grad_norm": 0.06523732095956802,
"learning_rate": 1.5333367870279232e-05,
"loss": 0.06686021327972412,
"step": 6650
},
{
"epoch": 1.2495337560611712,
"grad_norm": 0.02072199061512947,
"learning_rate": 1.5281562451432422e-05,
"loss": 0.03420682668685913,
"step": 6700
},
{
"epoch": 1.258858634837747,
"grad_norm": 9.351777076721191,
"learning_rate": 1.522975703258561e-05,
"loss": 0.026438066959381102,
"step": 6750
},
{
"epoch": 1.2681835136143231,
"grad_norm": 0.40086886286735535,
"learning_rate": 1.5177951613738796e-05,
"loss": 0.046449775695800784,
"step": 6800
},
{
"epoch": 1.277508392390899,
"grad_norm": 0.5892062783241272,
"learning_rate": 1.5126146194891986e-05,
"loss": 0.0309269380569458,
"step": 6850
},
{
"epoch": 1.2868332711674748,
"grad_norm": 0.002104206709191203,
"learning_rate": 1.5074340776045176e-05,
"loss": 0.023648200035095216,
"step": 6900
},
{
"epoch": 1.2961581499440507,
"grad_norm": 0.0258804801851511,
"learning_rate": 1.5022535357198364e-05,
"loss": 0.04035449504852295,
"step": 6950
},
{
"epoch": 1.3054830287206267,
"grad_norm": 4.397562026977539,
"learning_rate": 1.4970729938351554e-05,
"loss": 0.05229721546173096,
"step": 7000
},
{
"epoch": 1.3148079074972026,
"grad_norm": 0.007150724530220032,
"learning_rate": 1.491892451950474e-05,
"loss": 0.03332861661911011,
"step": 7050
},
{
"epoch": 1.3241327862737784,
"grad_norm": 3.109645128250122,
"learning_rate": 1.486711910065793e-05,
"loss": 0.047160525321960446,
"step": 7100
},
{
"epoch": 1.3334576650503545,
"grad_norm": 0.41086408495903015,
"learning_rate": 1.4815313681811118e-05,
"loss": 0.04890709400177002,
"step": 7150
},
{
"epoch": 1.3427825438269303,
"grad_norm": 0.05025002732872963,
"learning_rate": 1.4763508262964308e-05,
"loss": 0.04013650417327881,
"step": 7200
},
{
"epoch": 1.3521074226035061,
"grad_norm": 0.03028084896504879,
"learning_rate": 1.4711702844117498e-05,
"loss": 0.0283713960647583,
"step": 7250
},
{
"epoch": 1.361432301380082,
"grad_norm": 0.031166842207312584,
"learning_rate": 1.4659897425270684e-05,
"loss": 0.03349567174911499,
"step": 7300
},
{
"epoch": 1.370757180156658,
"grad_norm": 4.032196521759033,
"learning_rate": 1.4608092006423872e-05,
"loss": 0.029915103912353514,
"step": 7350
},
{
"epoch": 1.3800820589332339,
"grad_norm": 3.165501594543457,
"learning_rate": 1.4556286587577062e-05,
"loss": 0.040238561630249026,
"step": 7400
},
{
"epoch": 1.3894069377098097,
"grad_norm": 0.05803289636969566,
"learning_rate": 1.4504481168730252e-05,
"loss": 0.03673480272293091,
"step": 7450
},
{
"epoch": 1.3987318164863858,
"grad_norm": 0.0027874386869370937,
"learning_rate": 1.4452675749883438e-05,
"loss": 0.03083367109298706,
"step": 7500
},
{
"epoch": 1.4080566952629616,
"grad_norm": 0.39723604917526245,
"learning_rate": 1.4400870331036628e-05,
"loss": 0.03706796646118164,
"step": 7550
},
{
"epoch": 1.4173815740395375,
"grad_norm": 0.006277570500969887,
"learning_rate": 1.4349064912189816e-05,
"loss": 0.030687217712402345,
"step": 7600
},
{
"epoch": 1.4267064528161133,
"grad_norm": 2.196660041809082,
"learning_rate": 1.4297259493343006e-05,
"loss": 0.03273656129837036,
"step": 7650
},
{
"epoch": 1.4360313315926894,
"grad_norm": 2.9575355052948,
"learning_rate": 1.4245454074496194e-05,
"loss": 0.025758986473083497,
"step": 7700
},
{
"epoch": 1.4453562103692652,
"grad_norm": 0.08796069771051407,
"learning_rate": 1.4193648655649382e-05,
"loss": 0.03270584583282471,
"step": 7750
},
{
"epoch": 1.454681089145841,
"grad_norm": 0.9201443791389465,
"learning_rate": 1.414184323680257e-05,
"loss": 0.023307127952575682,
"step": 7800
},
{
"epoch": 1.464005967922417,
"grad_norm": 1.7311280965805054,
"learning_rate": 1.409003781795576e-05,
"loss": 0.03505758047103882,
"step": 7850
},
{
"epoch": 1.473330846698993,
"grad_norm": 7.217854022979736,
"learning_rate": 1.4038232399108948e-05,
"loss": 0.032117910385131836,
"step": 7900
},
{
"epoch": 1.4826557254755688,
"grad_norm": 0.16375161707401276,
"learning_rate": 1.3986426980262136e-05,
"loss": 0.024907276630401612,
"step": 7950
},
{
"epoch": 1.4919806042521446,
"grad_norm": 2.5342984199523926,
"learning_rate": 1.3934621561415324e-05,
"loss": 0.02424616813659668,
"step": 8000
},
{
"epoch": 1.5013054830287205,
"grad_norm": 0.5667886137962341,
"learning_rate": 1.3882816142568514e-05,
"loss": 0.035223734378814694,
"step": 8050
},
{
"epoch": 1.5106303618052965,
"grad_norm": 4.529999732971191,
"learning_rate": 1.3831010723721704e-05,
"loss": 0.043730239868164066,
"step": 8100
},
{
"epoch": 1.5199552405818726,
"grad_norm": 0.31222647428512573,
"learning_rate": 1.3779205304874892e-05,
"loss": 0.02817868709564209,
"step": 8150
},
{
"epoch": 1.5292801193584484,
"grad_norm": 0.054891835898160934,
"learning_rate": 1.3727399886028078e-05,
"loss": 0.022635526657104492,
"step": 8200
},
{
"epoch": 1.5386049981350243,
"grad_norm": 3.897071361541748,
"learning_rate": 1.3675594467181268e-05,
"loss": 0.0365865421295166,
"step": 8250
},
{
"epoch": 1.5479298769116001,
"grad_norm": 7.58866024017334,
"learning_rate": 1.3623789048334458e-05,
"loss": 0.03705208778381348,
"step": 8300
},
{
"epoch": 1.557254755688176,
"grad_norm": 0.2579911947250366,
"learning_rate": 1.3571983629487646e-05,
"loss": 0.036470816135406495,
"step": 8350
},
{
"epoch": 1.566579634464752,
"grad_norm": 0.04304761812090874,
"learning_rate": 1.3520178210640836e-05,
"loss": 0.024144577980041503,
"step": 8400
},
{
"epoch": 1.5759045132413279,
"grad_norm": 0.011871698312461376,
"learning_rate": 1.3468372791794022e-05,
"loss": 0.02437096118927002,
"step": 8450
},
{
"epoch": 1.585229392017904,
"grad_norm": 1.8747565746307373,
"learning_rate": 1.3416567372947212e-05,
"loss": 0.021970641613006592,
"step": 8500
},
{
"epoch": 1.5945542707944798,
"grad_norm": 0.0068074301816523075,
"learning_rate": 1.33647619541004e-05,
"loss": 0.03055704355239868,
"step": 8550
},
{
"epoch": 1.6038791495710556,
"grad_norm": 0.7720779776573181,
"learning_rate": 1.331295653525359e-05,
"loss": 0.05535665988922119,
"step": 8600
},
{
"epoch": 1.6132040283476314,
"grad_norm": 0.052967652678489685,
"learning_rate": 1.3261151116406776e-05,
"loss": 0.037333052158355716,
"step": 8650
},
{
"epoch": 1.6225289071242073,
"grad_norm": 0.4131523370742798,
"learning_rate": 1.3209345697559966e-05,
"loss": 0.022513895034790038,
"step": 8700
},
{
"epoch": 1.6318537859007833,
"grad_norm": 0.0903526097536087,
"learning_rate": 1.3157540278713154e-05,
"loss": 0.018794809579849244,
"step": 8750
},
{
"epoch": 1.6411786646773592,
"grad_norm": 0.03226502984762192,
"learning_rate": 1.3105734859866344e-05,
"loss": 0.021845638751983643,
"step": 8800
},
{
"epoch": 1.6505035434539352,
"grad_norm": 1.2775359153747559,
"learning_rate": 1.3053929441019533e-05,
"loss": 0.03150895118713379,
"step": 8850
},
{
"epoch": 1.659828422230511,
"grad_norm": 0.0202046986669302,
"learning_rate": 1.300212402217272e-05,
"loss": 0.02440279006958008,
"step": 8900
},
{
"epoch": 1.669153301007087,
"grad_norm": 5.867640495300293,
"learning_rate": 1.2950318603325908e-05,
"loss": 0.033769989013671876,
"step": 8950
},
{
"epoch": 1.6784781797836628,
"grad_norm": 0.006102518644183874,
"learning_rate": 1.2898513184479098e-05,
"loss": 0.020930655002593994,
"step": 9000
},
{
"epoch": 1.6878030585602386,
"grad_norm": 0.06524361670017242,
"learning_rate": 1.2846707765632288e-05,
"loss": 0.0303147554397583,
"step": 9050
},
{
"epoch": 1.6971279373368147,
"grad_norm": 1.1646746397018433,
"learning_rate": 1.2794902346785474e-05,
"loss": 0.025315618515014647,
"step": 9100
},
{
"epoch": 1.7064528161133905,
"grad_norm": 2.134981393814087,
"learning_rate": 1.2743096927938664e-05,
"loss": 0.04284055233001709,
"step": 9150
},
{
"epoch": 1.7157776948899666,
"grad_norm": 2.9764657020568848,
"learning_rate": 1.2691291509091852e-05,
"loss": 0.02323296308517456,
"step": 9200
},
{
"epoch": 1.7251025736665424,
"grad_norm": 0.15592370927333832,
"learning_rate": 1.2639486090245042e-05,
"loss": 0.03630294561386108,
"step": 9250
},
{
"epoch": 1.7344274524431182,
"grad_norm": 1.1410564184188843,
"learning_rate": 1.258768067139823e-05,
"loss": 0.02985832929611206,
"step": 9300
},
{
"epoch": 1.743752331219694,
"grad_norm": 1.3886200189590454,
"learning_rate": 1.2535875252551418e-05,
"loss": 0.019391053915023805,
"step": 9350
},
{
"epoch": 1.75307720999627,
"grad_norm": 12.997761726379395,
"learning_rate": 1.2484069833704606e-05,
"loss": 0.02593435287475586,
"step": 9400
},
{
"epoch": 1.762402088772846,
"grad_norm": 2.852426052093506,
"learning_rate": 1.2432264414857796e-05,
"loss": 0.036953463554382324,
"step": 9450
},
{
"epoch": 1.7717269675494218,
"grad_norm": 1.0583350658416748,
"learning_rate": 1.2380458996010984e-05,
"loss": 0.025919597148895263,
"step": 9500
},
{
"epoch": 1.781051846325998,
"grad_norm": 0.06280253827571869,
"learning_rate": 1.2328653577164172e-05,
"loss": 0.024321415424346925,
"step": 9550
},
{
"epoch": 1.7903767251025737,
"grad_norm": 1.4471710920333862,
"learning_rate": 1.227684815831736e-05,
"loss": 0.02954728364944458,
"step": 9600
},
{
"epoch": 1.7997016038791496,
"grad_norm": 0.3254970610141754,
"learning_rate": 1.222504273947055e-05,
"loss": 0.0403021764755249,
"step": 9650
},
{
"epoch": 1.8090264826557254,
"grad_norm": 0.026926545426249504,
"learning_rate": 1.217323732062374e-05,
"loss": 0.01865153431892395,
"step": 9700
},
{
"epoch": 1.8183513614323012,
"grad_norm": 0.037455275654792786,
"learning_rate": 1.2121431901776928e-05,
"loss": 0.02834453582763672,
"step": 9750
},
{
"epoch": 1.8276762402088773,
"grad_norm": 1.9724242687225342,
"learning_rate": 1.2069626482930114e-05,
"loss": 0.02292172908782959,
"step": 9800
},
{
"epoch": 1.8370011189854532,
"grad_norm": 2.2518837451934814,
"learning_rate": 1.2017821064083304e-05,
"loss": 0.029299042224884032,
"step": 9850
},
{
"epoch": 1.8463259977620292,
"grad_norm": 0.8918629884719849,
"learning_rate": 1.1966015645236493e-05,
"loss": 0.02729450464248657,
"step": 9900
},
{
"epoch": 1.855650876538605,
"grad_norm": 0.015352281741797924,
"learning_rate": 1.1914210226389682e-05,
"loss": 0.025739452838897704,
"step": 9950
},
{
"epoch": 1.864975755315181,
"grad_norm": 12.12820816040039,
"learning_rate": 1.1862404807542871e-05,
"loss": 0.05083851337432861,
"step": 10000
},
{
"epoch": 1.8743006340917567,
"grad_norm": 0.03783294931054115,
"learning_rate": 1.1810599388696058e-05,
"loss": 0.03862152814865112,
"step": 10050
},
{
"epoch": 1.8836255128683326,
"grad_norm": 1.312626838684082,
"learning_rate": 1.1758793969849248e-05,
"loss": 0.040201754570007325,
"step": 10100
},
{
"epoch": 1.8929503916449086,
"grad_norm": 0.27149704098701477,
"learning_rate": 1.1706988551002436e-05,
"loss": 0.020465714931488035,
"step": 10150
},
{
"epoch": 1.9022752704214845,
"grad_norm": 0.051149722188711166,
"learning_rate": 1.1655183132155625e-05,
"loss": 0.019956029653549194,
"step": 10200
},
{
"epoch": 1.9116001491980605,
"grad_norm": 0.012450406327843666,
"learning_rate": 1.1603377713308812e-05,
"loss": 0.030698204040527345,
"step": 10250
},
{
"epoch": 1.9209250279746364,
"grad_norm": 0.04320710152387619,
"learning_rate": 1.1551572294462002e-05,
"loss": 0.01924089193344116,
"step": 10300
},
{
"epoch": 1.9302499067512122,
"grad_norm": 0.028835974633693695,
"learning_rate": 1.149976687561519e-05,
"loss": 0.03460402250289917,
"step": 10350
},
{
"epoch": 1.939574785527788,
"grad_norm": 0.032503023743629456,
"learning_rate": 1.144796145676838e-05,
"loss": 0.021525814533233642,
"step": 10400
},
{
"epoch": 1.948899664304364,
"grad_norm": 0.054762404412031174,
"learning_rate": 1.139615603792157e-05,
"loss": 0.016051357984542845,
"step": 10450
},
{
"epoch": 1.95822454308094,
"grad_norm": 0.41409963369369507,
"learning_rate": 1.1344350619074756e-05,
"loss": 0.02211181879043579,
"step": 10500
},
{
"epoch": 1.9675494218575158,
"grad_norm": 0.6710904836654663,
"learning_rate": 1.1292545200227944e-05,
"loss": 0.03442399978637695,
"step": 10550
},
{
"epoch": 1.9768743006340919,
"grad_norm": 0.20875470340251923,
"learning_rate": 1.1240739781381133e-05,
"loss": 0.029538695812225343,
"step": 10600
},
{
"epoch": 1.9861991794106677,
"grad_norm": 7.152144432067871,
"learning_rate": 1.1188934362534323e-05,
"loss": 0.02945619821548462,
"step": 10650
},
{
"epoch": 1.9955240581872435,
"grad_norm": 0.0340808629989624,
"learning_rate": 1.113712894368751e-05,
"loss": 0.021653232574462892,
"step": 10700
},
{
"epoch": 2.0,
"eval_accuracy": 0.9903632126116217,
"eval_f1": 0.9054189162167566,
"eval_loss": 0.04485788941383362,
"eval_precision": 0.8987693529178246,
"eval_recall": 0.9121676067687349,
"eval_runtime": 7.0416,
"eval_samples_per_second": 775.82,
"eval_steps_per_second": 96.995,
"step": 10724
},
{
"epoch": 2.0048489369638194,
"grad_norm": 0.006561782211065292,
"learning_rate": 1.10853235248407e-05,
"loss": 0.023352961540222168,
"step": 10750
},
{
"epoch": 2.0141738157403952,
"grad_norm": 0.03905324265360832,
"learning_rate": 1.1033518105993888e-05,
"loss": 0.007830613255500794,
"step": 10800
},
{
"epoch": 2.023498694516971,
"grad_norm": 0.002779081929475069,
"learning_rate": 1.0981712687147077e-05,
"loss": 0.012746865749359132,
"step": 10850
},
{
"epoch": 2.0328235732935473,
"grad_norm": 3.28019642829895,
"learning_rate": 1.0929907268300265e-05,
"loss": 0.021561498641967772,
"step": 10900
},
{
"epoch": 2.042148452070123,
"grad_norm": 0.11420201510190964,
"learning_rate": 1.0878101849453453e-05,
"loss": 0.01144665241241455,
"step": 10950
},
{
"epoch": 2.051473330846699,
"grad_norm": 0.01909773238003254,
"learning_rate": 1.0826296430606642e-05,
"loss": 0.006808329224586487,
"step": 11000
},
{
"epoch": 2.060798209623275,
"grad_norm": 0.03136987239122391,
"learning_rate": 1.0774491011759831e-05,
"loss": 0.015448588132858276,
"step": 11050
},
{
"epoch": 2.0701230883998507,
"grad_norm": 0.0069969939067959785,
"learning_rate": 1.072268559291302e-05,
"loss": 0.020947656631469726,
"step": 11100
},
{
"epoch": 2.0794479671764265,
"grad_norm": 0.008591468445956707,
"learning_rate": 1.0670880174066208e-05,
"loss": 0.013551335334777832,
"step": 11150
},
{
"epoch": 2.0887728459530024,
"grad_norm": 0.007207474671304226,
"learning_rate": 1.0619074755219396e-05,
"loss": 0.010735607147216797,
"step": 11200
},
{
"epoch": 2.0980977247295787,
"grad_norm": 0.007553383708000183,
"learning_rate": 1.0567269336372585e-05,
"loss": 0.0180646276473999,
"step": 11250
},
{
"epoch": 2.1074226035061545,
"grad_norm": 0.0854165256023407,
"learning_rate": 1.0515463917525775e-05,
"loss": 0.012175880670547486,
"step": 11300
},
{
"epoch": 2.1167474822827304,
"grad_norm": 1.2997490167617798,
"learning_rate": 1.0463658498678963e-05,
"loss": 0.028563385009765626,
"step": 11350
},
{
"epoch": 2.126072361059306,
"grad_norm": 0.028747934848070145,
"learning_rate": 1.041185307983215e-05,
"loss": 0.008224156498908997,
"step": 11400
},
{
"epoch": 2.135397239835882,
"grad_norm": 0.02653522975742817,
"learning_rate": 1.036004766098534e-05,
"loss": 0.014218298196792602,
"step": 11450
},
{
"epoch": 2.144722118612458,
"grad_norm": 0.0075917416252195835,
"learning_rate": 1.030824224213853e-05,
"loss": 0.010074301958084106,
"step": 11500
},
{
"epoch": 2.1540469973890337,
"grad_norm": 0.01568465493619442,
"learning_rate": 1.0256436823291717e-05,
"loss": 0.009785271286964416,
"step": 11550
},
{
"epoch": 2.16337187616561,
"grad_norm": 2.6329779624938965,
"learning_rate": 1.0204631404444907e-05,
"loss": 0.039693479537963865,
"step": 11600
},
{
"epoch": 2.172696754942186,
"grad_norm": 0.25744888186454773,
"learning_rate": 1.0152825985598094e-05,
"loss": 0.009682031273841858,
"step": 11650
},
{
"epoch": 2.1820216337187617,
"grad_norm": 0.048078108578920364,
"learning_rate": 1.0101020566751283e-05,
"loss": 0.012061976194381714,
"step": 11700
},
{
"epoch": 2.1913465124953375,
"grad_norm": 0.009185828268527985,
"learning_rate": 1.0049215147904471e-05,
"loss": 0.01473943829536438,
"step": 11750
},
{
"epoch": 2.2006713912719134,
"grad_norm": 0.3359212279319763,
"learning_rate": 9.99740972905766e-06,
"loss": 0.022451975345611573,
"step": 11800
},
{
"epoch": 2.209996270048489,
"grad_norm": 0.03128429129719734,
"learning_rate": 9.94560431021085e-06,
"loss": 0.015020393133163452,
"step": 11850
},
{
"epoch": 2.2193211488250655,
"grad_norm": 0.01077917031943798,
"learning_rate": 9.893798891364037e-06,
"loss": 0.007385715842247009,
"step": 11900
},
{
"epoch": 2.2286460276016413,
"grad_norm": 0.0009410646744072437,
"learning_rate": 9.841993472517225e-06,
"loss": 0.010898010730743408,
"step": 11950
},
{
"epoch": 2.237970906378217,
"grad_norm": 0.23428411781787872,
"learning_rate": 9.790188053670415e-06,
"loss": 0.017517651319503783,
"step": 12000
},
{
"epoch": 2.247295785154793,
"grad_norm": 5.2552947998046875,
"learning_rate": 9.738382634823603e-06,
"loss": 0.011954027414321899,
"step": 12050
},
{
"epoch": 2.256620663931369,
"grad_norm": 0.1022522896528244,
"learning_rate": 9.686577215976793e-06,
"loss": 0.0103814697265625,
"step": 12100
},
{
"epoch": 2.2659455427079447,
"grad_norm": 0.01425126288086176,
"learning_rate": 9.634771797129981e-06,
"loss": 0.016681231260299682,
"step": 12150
},
{
"epoch": 2.2752704214845205,
"grad_norm": 0.010022806003689766,
"learning_rate": 9.58296637828317e-06,
"loss": 0.007602689266204834,
"step": 12200
},
{
"epoch": 2.2845953002610964,
"grad_norm": 0.09281191229820251,
"learning_rate": 9.531160959436357e-06,
"loss": 0.015772578716278077,
"step": 12250
},
{
"epoch": 2.2939201790376726,
"grad_norm": 1.6627157926559448,
"learning_rate": 9.479355540589547e-06,
"loss": 0.0149391770362854,
"step": 12300
},
{
"epoch": 2.3032450578142485,
"grad_norm": 0.039720647037029266,
"learning_rate": 9.427550121742735e-06,
"loss": 0.004919275641441345,
"step": 12350
},
{
"epoch": 2.3125699365908243,
"grad_norm": 0.13361865282058716,
"learning_rate": 9.375744702895923e-06,
"loss": 0.0066268140077590946,
"step": 12400
},
{
"epoch": 2.3218948153674,
"grad_norm": 0.004165187943726778,
"learning_rate": 9.323939284049113e-06,
"loss": 0.008859132528305053,
"step": 12450
},
{
"epoch": 2.331219694143976,
"grad_norm": 0.01734941452741623,
"learning_rate": 9.272133865202301e-06,
"loss": 0.01958281397819519,
"step": 12500
},
{
"epoch": 2.340544572920552,
"grad_norm": 1.4992754459381104,
"learning_rate": 9.22032844635549e-06,
"loss": 0.024173910617828368,
"step": 12550
},
{
"epoch": 2.349869451697128,
"grad_norm": 3.266171455383301,
"learning_rate": 9.168523027508677e-06,
"loss": 0.026157324314117433,
"step": 12600
},
{
"epoch": 2.359194330473704,
"grad_norm": 0.034271348267793655,
"learning_rate": 9.116717608661867e-06,
"loss": 0.004791333377361298,
"step": 12650
},
{
"epoch": 2.36851920925028,
"grad_norm": 0.020556321367621422,
"learning_rate": 9.064912189815055e-06,
"loss": 0.023116433620452882,
"step": 12700
},
{
"epoch": 2.3778440880268557,
"grad_norm": 2.9007959365844727,
"learning_rate": 9.013106770968243e-06,
"loss": 0.003782390058040619,
"step": 12750
},
{
"epoch": 2.3871689668034315,
"grad_norm": 0.04751985892653465,
"learning_rate": 8.961301352121433e-06,
"loss": 0.004796516001224518,
"step": 12800
},
{
"epoch": 2.3964938455800073,
"grad_norm": 0.09174877405166626,
"learning_rate": 8.909495933274621e-06,
"loss": 0.01988631725311279,
"step": 12850
},
{
"epoch": 2.405818724356583,
"grad_norm": 0.897373378276825,
"learning_rate": 8.857690514427811e-06,
"loss": 0.011833161115646362,
"step": 12900
},
{
"epoch": 2.4151436031331595,
"grad_norm": 0.026099465787410736,
"learning_rate": 8.805885095580999e-06,
"loss": 0.021891412734985353,
"step": 12950
},
{
"epoch": 2.4244684819097353,
"grad_norm": 0.005264167208224535,
"learning_rate": 8.754079676734187e-06,
"loss": 0.014649747610092164,
"step": 13000
},
{
"epoch": 2.433793360686311,
"grad_norm": 0.0665712058544159,
"learning_rate": 8.702274257887375e-06,
"loss": 0.018104093074798586,
"step": 13050
},
{
"epoch": 2.443118239462887,
"grad_norm": 0.01004517637193203,
"learning_rate": 8.650468839040565e-06,
"loss": 0.004754712581634521,
"step": 13100
},
{
"epoch": 2.452443118239463,
"grad_norm": 0.011136854998767376,
"learning_rate": 8.598663420193753e-06,
"loss": 0.008313758969306946,
"step": 13150
},
{
"epoch": 2.4617679970160387,
"grad_norm": 0.0015451794024556875,
"learning_rate": 8.546858001346941e-06,
"loss": 0.008117977380752563,
"step": 13200
},
{
"epoch": 2.471092875792615,
"grad_norm": 1.5158227682113647,
"learning_rate": 8.495052582500131e-06,
"loss": 0.02230316638946533,
"step": 13250
},
{
"epoch": 2.480417754569191,
"grad_norm": 0.015987800434231758,
"learning_rate": 8.443247163653319e-06,
"loss": 0.0033962687849998473,
"step": 13300
},
{
"epoch": 2.4897426333457666,
"grad_norm": 0.2436022162437439,
"learning_rate": 8.391441744806507e-06,
"loss": 0.009453248977661134,
"step": 13350
},
{
"epoch": 2.4990675121223425,
"grad_norm": 0.007971422746777534,
"learning_rate": 8.339636325959695e-06,
"loss": 0.005669102668762207,
"step": 13400
},
{
"epoch": 2.5083923908989183,
"grad_norm": 0.030247289687395096,
"learning_rate": 8.287830907112885e-06,
"loss": 0.007165596485137939,
"step": 13450
},
{
"epoch": 2.517717269675494,
"grad_norm": 0.03285367041826248,
"learning_rate": 8.236025488266073e-06,
"loss": 0.01023703694343567,
"step": 13500
},
{
"epoch": 2.52704214845207,
"grad_norm": 1.4136919975280762,
"learning_rate": 8.184220069419261e-06,
"loss": 0.019952696561813355,
"step": 13550
},
{
"epoch": 2.5363670272286463,
"grad_norm": 0.13177263736724854,
"learning_rate": 8.132414650572451e-06,
"loss": 0.008267701864242553,
"step": 13600
},
{
"epoch": 2.5456919060052217,
"grad_norm": 13.017802238464355,
"learning_rate": 8.080609231725639e-06,
"loss": 0.013502672910690308,
"step": 13650
},
{
"epoch": 2.555016784781798,
"grad_norm": 20.80805015563965,
"learning_rate": 8.028803812878829e-06,
"loss": 0.014624173641204835,
"step": 13700
},
{
"epoch": 2.564341663558374,
"grad_norm": 0.05195024982094765,
"learning_rate": 7.976998394032017e-06,
"loss": 0.025228326320648194,
"step": 13750
},
{
"epoch": 2.5736665423349496,
"grad_norm": 0.004629973322153091,
"learning_rate": 7.925192975185205e-06,
"loss": 0.02166285514831543,
"step": 13800
},
{
"epoch": 2.5829914211115255,
"grad_norm": 0.0022503056097775698,
"learning_rate": 7.873387556338393e-06,
"loss": 0.02188849925994873,
"step": 13850
},
{
"epoch": 2.5923162998881013,
"grad_norm": 0.8524413108825684,
"learning_rate": 7.821582137491583e-06,
"loss": 0.007161260843276978,
"step": 13900
},
{
"epoch": 2.6016411786646776,
"grad_norm": 2.9589359760284424,
"learning_rate": 7.769776718644771e-06,
"loss": 0.009217590093612671,
"step": 13950
},
{
"epoch": 2.6109660574412534,
"grad_norm": 0.0014888152945786715,
"learning_rate": 7.717971299797959e-06,
"loss": 0.007640480399131775,
"step": 14000
},
{
"epoch": 2.6202909362178293,
"grad_norm": 0.0024451257195323706,
"learning_rate": 7.666165880951149e-06,
"loss": 0.009097555875778198,
"step": 14050
},
{
"epoch": 2.629615814994405,
"grad_norm": 1.4727226495742798,
"learning_rate": 7.614360462104337e-06,
"loss": 0.022815148830413818,
"step": 14100
},
{
"epoch": 2.638940693770981,
"grad_norm": 0.14492234587669373,
"learning_rate": 7.562555043257526e-06,
"loss": 0.00907568097114563,
"step": 14150
},
{
"epoch": 2.648265572547557,
"grad_norm": 0.006422064267098904,
"learning_rate": 7.510749624410714e-06,
"loss": 0.012345269918441773,
"step": 14200
},
{
"epoch": 2.6575904513241326,
"grad_norm": 0.003297192510217428,
"learning_rate": 7.458944205563903e-06,
"loss": 0.013943998813629151,
"step": 14250
},
{
"epoch": 2.666915330100709,
"grad_norm": 0.009486474096775055,
"learning_rate": 7.407138786717091e-06,
"loss": 0.007204347848892212,
"step": 14300
},
{
"epoch": 2.6762402088772848,
"grad_norm": 0.001906346995383501,
"learning_rate": 7.35533336787028e-06,
"loss": 0.0064238041639328005,
"step": 14350
},
{
"epoch": 2.6855650876538606,
"grad_norm": 0.009013752453029156,
"learning_rate": 7.303527949023469e-06,
"loss": 0.019118592739105225,
"step": 14400
},
{
"epoch": 2.6948899664304364,
"grad_norm": 9.203516006469727,
"learning_rate": 7.251722530176657e-06,
"loss": 0.014843382835388184,
"step": 14450
},
{
"epoch": 2.7042148452070123,
"grad_norm": 0.013872411102056503,
"learning_rate": 7.199917111329846e-06,
"loss": 0.017863935232162474,
"step": 14500
},
{
"epoch": 2.713539723983588,
"grad_norm": 0.008059196174144745,
"learning_rate": 7.148111692483034e-06,
"loss": 0.007313421964645386,
"step": 14550
},
{
"epoch": 2.722864602760164,
"grad_norm": 0.007967078126966953,
"learning_rate": 7.096306273636223e-06,
"loss": 0.005426759123802185,
"step": 14600
},
{
"epoch": 2.7321894815367402,
"grad_norm": 0.052042555063962936,
"learning_rate": 7.044500854789411e-06,
"loss": 0.010500948429107666,
"step": 14650
},
{
"epoch": 2.741514360313316,
"grad_norm": 0.008007310330867767,
"learning_rate": 6.9926954359426e-06,
"loss": 0.011817890405654907,
"step": 14700
},
{
"epoch": 2.750839239089892,
"grad_norm": 0.553403377532959,
"learning_rate": 6.940890017095788e-06,
"loss": 0.011694425344467163,
"step": 14750
},
{
"epoch": 2.7601641178664678,
"grad_norm": 0.011203479021787643,
"learning_rate": 6.889084598248977e-06,
"loss": 0.009664978981018067,
"step": 14800
},
{
"epoch": 2.7694889966430436,
"grad_norm": 0.031599052250385284,
"learning_rate": 6.837279179402167e-06,
"loss": 0.0062562096118927,
"step": 14850
},
{
"epoch": 2.7788138754196194,
"grad_norm": 0.07515502721071243,
"learning_rate": 6.785473760555355e-06,
"loss": 0.008186891674995422,
"step": 14900
},
{
"epoch": 2.7881387541961953,
"grad_norm": 0.004041098989546299,
"learning_rate": 6.733668341708544e-06,
"loss": 0.008758670091629029,
"step": 14950
},
{
"epoch": 2.7974636329727716,
"grad_norm": 0.010477816686034203,
"learning_rate": 6.681862922861732e-06,
"loss": 0.008421186804771424,
"step": 15000
},
{
"epoch": 2.8067885117493474,
"grad_norm": 0.037119459360837936,
"learning_rate": 6.630057504014921e-06,
"loss": 0.0044859576225280764,
"step": 15050
},
{
"epoch": 2.8161133905259232,
"grad_norm": 2.2909059524536133,
"learning_rate": 6.578252085168109e-06,
"loss": 0.007467656135559082,
"step": 15100
},
{
"epoch": 2.825438269302499,
"grad_norm": 0.028654785826802254,
"learning_rate": 6.526446666321298e-06,
"loss": 0.011730804443359374,
"step": 15150
},
{
"epoch": 2.834763148079075,
"grad_norm": 0.00396377919241786,
"learning_rate": 6.474641247474487e-06,
"loss": 0.007885778546333313,
"step": 15200
},
{
"epoch": 2.8440880268556508,
"grad_norm": 4.526209354400635,
"learning_rate": 6.422835828627675e-06,
"loss": 0.013013125658035278,
"step": 15250
},
{
"epoch": 2.8534129056322266,
"grad_norm": 0.006890705320984125,
"learning_rate": 6.371030409780864e-06,
"loss": 0.020241425037384034,
"step": 15300
},
{
"epoch": 2.862737784408803,
"grad_norm": 0.04351874813437462,
"learning_rate": 6.319224990934052e-06,
"loss": 0.02235487461090088,
"step": 15350
},
{
"epoch": 2.8720626631853787,
"grad_norm": 0.004027783405035734,
"learning_rate": 6.267419572087241e-06,
"loss": 0.012025052309036255,
"step": 15400
},
{
"epoch": 2.8813875419619546,
"grad_norm": 0.017081253230571747,
"learning_rate": 6.215614153240429e-06,
"loss": 0.012213168144226074,
"step": 15450
},
{
"epoch": 2.8907124207385304,
"grad_norm": 5.580208778381348,
"learning_rate": 6.163808734393618e-06,
"loss": 0.011020108461380004,
"step": 15500
},
{
"epoch": 2.9000372995151062,
"grad_norm": 0.026449229568243027,
"learning_rate": 6.112003315546806e-06,
"loss": 0.020866034030914308,
"step": 15550
},
{
"epoch": 2.909362178291682,
"grad_norm": 0.018465599045157433,
"learning_rate": 6.060197896699996e-06,
"loss": 0.006286224126815796,
"step": 15600
},
{
"epoch": 2.918687057068258,
"grad_norm": 0.004978302400559187,
"learning_rate": 6.008392477853185e-06,
"loss": 0.01425373911857605,
"step": 15650
},
{
"epoch": 2.928011935844834,
"grad_norm": 0.008023403584957123,
"learning_rate": 5.956587059006373e-06,
"loss": 0.008588857650756836,
"step": 15700
},
{
"epoch": 2.93733681462141,
"grad_norm": 0.014545072801411152,
"learning_rate": 5.904781640159562e-06,
"loss": 0.009176114797592163,
"step": 15750
},
{
"epoch": 2.946661693397986,
"grad_norm": 0.0036765779368579388,
"learning_rate": 5.85297622131275e-06,
"loss": 0.019455695152282716,
"step": 15800
},
{
"epoch": 2.9559865721745617,
"grad_norm": 8.891608238220215,
"learning_rate": 5.801170802465939e-06,
"loss": 0.012102892398834228,
"step": 15850
},
{
"epoch": 2.9653114509511376,
"grad_norm": 0.05219835415482521,
"learning_rate": 5.749365383619127e-06,
"loss": 0.009157007336616516,
"step": 15900
},
{
"epoch": 2.9746363297277134,
"grad_norm": 0.003453275188803673,
"learning_rate": 5.697559964772316e-06,
"loss": 0.009056896567344666,
"step": 15950
},
{
"epoch": 2.9839612085042893,
"grad_norm": 0.004969852045178413,
"learning_rate": 5.645754545925505e-06,
"loss": 0.012932000160217285,
"step": 16000
},
{
"epoch": 2.9932860872808655,
"grad_norm": 2.0237090587615967,
"learning_rate": 5.593949127078693e-06,
"loss": 0.018866615295410158,
"step": 16050
},
{
"epoch": 3.0,
"eval_accuracy": 0.9921177364024406,
"eval_f1": 0.9189297124600639,
"eval_loss": 0.04323037713766098,
"eval_precision": 0.9109263657957245,
"eval_recall": 0.9270749395648671,
"eval_runtime": 7.1026,
"eval_samples_per_second": 769.16,
"eval_steps_per_second": 96.163,
"step": 16086
},
{
"epoch": 3.0026109660574414,
"grad_norm": 0.006980204954743385,
"learning_rate": 5.542143708231882e-06,
"loss": 0.012530730962753296,
"step": 16100
},
{
"epoch": 3.011935844834017,
"grad_norm": 0.008406821638345718,
"learning_rate": 5.49033828938507e-06,
"loss": 0.0016689696907997132,
"step": 16150
},
{
"epoch": 3.021260723610593,
"grad_norm": 0.26228681206703186,
"learning_rate": 5.438532870538259e-06,
"loss": 0.007807348966598511,
"step": 16200
},
{
"epoch": 3.030585602387169,
"grad_norm": 0.47371771931648254,
"learning_rate": 5.386727451691447e-06,
"loss": 0.006382474303245544,
"step": 16250
},
{
"epoch": 3.0399104811637447,
"grad_norm": 0.0065447427332401276,
"learning_rate": 5.334922032844636e-06,
"loss": 0.004743200242519379,
"step": 16300
},
{
"epoch": 3.0492353599403206,
"grad_norm": 0.008346166461706161,
"learning_rate": 5.283116613997824e-06,
"loss": 0.0055571597814559935,
"step": 16350
},
{
"epoch": 3.058560238716897,
"grad_norm": 5.690232276916504,
"learning_rate": 5.2313111951510135e-06,
"loss": 0.00496139645576477,
"step": 16400
},
{
"epoch": 3.0678851174934727,
"grad_norm": 0.3915584981441498,
"learning_rate": 5.1795057763042025e-06,
"loss": 0.0035722294449806215,
"step": 16450
},
{
"epoch": 3.0772099962700485,
"grad_norm": 0.002272524405270815,
"learning_rate": 5.1277003574573906e-06,
"loss": 0.011130574941635132,
"step": 16500
},
{
"epoch": 3.0865348750466244,
"grad_norm": 2.395972967147827,
"learning_rate": 5.0758949386105795e-06,
"loss": 0.003059500753879547,
"step": 16550
},
{
"epoch": 3.0958597538232002,
"grad_norm": 0.004218028858304024,
"learning_rate": 5.024089519763768e-06,
"loss": 0.0016028760373592377,
"step": 16600
},
{
"epoch": 3.105184632599776,
"grad_norm": 15.134767532348633,
"learning_rate": 4.9722841009169565e-06,
"loss": 0.008084517717361451,
"step": 16650
},
{
"epoch": 3.114509511376352,
"grad_norm": 0.0018907383782789111,
"learning_rate": 4.920478682070145e-06,
"loss": 0.003314727246761322,
"step": 16700
},
{
"epoch": 3.123834390152928,
"grad_norm": 0.0029481553938239813,
"learning_rate": 4.8686732632233335e-06,
"loss": 0.0049530166387557984,
"step": 16750
},
{
"epoch": 3.133159268929504,
"grad_norm": 0.16056513786315918,
"learning_rate": 4.8168678443765225e-06,
"loss": 0.004786551296710968,
"step": 16800
},
{
"epoch": 3.14248414770608,
"grad_norm": 0.2876565158367157,
"learning_rate": 4.7650624255297106e-06,
"loss": 0.0019748318195343018,
"step": 16850
},
{
"epoch": 3.1518090264826557,
"grad_norm": 0.0028331561479717493,
"learning_rate": 4.7132570066828995e-06,
"loss": 0.002513662874698639,
"step": 16900
},
{
"epoch": 3.1611339052592315,
"grad_norm": 0.006282527931034565,
"learning_rate": 4.6614515878360884e-06,
"loss": 0.01659904956817627,
"step": 16950
},
{
"epoch": 3.1704587840358074,
"grad_norm": 0.007699803449213505,
"learning_rate": 4.6096461689892765e-06,
"loss": 0.00718508780002594,
"step": 17000
},
{
"epoch": 3.1797836628123832,
"grad_norm": 0.00149145582690835,
"learning_rate": 4.5578407501424655e-06,
"loss": 0.001901312619447708,
"step": 17050
},
{
"epoch": 3.1891085415889595,
"grad_norm": 0.019138796254992485,
"learning_rate": 4.5060353312956535e-06,
"loss": 0.01903280019760132,
"step": 17100
},
{
"epoch": 3.1984334203655354,
"grad_norm": 0.544906497001648,
"learning_rate": 4.4542299124488425e-06,
"loss": 0.004833935499191284,
"step": 17150
},
{
"epoch": 3.207758299142111,
"grad_norm": 0.009669867344200611,
"learning_rate": 4.402424493602031e-06,
"loss": 0.00512764036655426,
"step": 17200
},
{
"epoch": 3.217083177918687,
"grad_norm": 0.01658560521900654,
"learning_rate": 4.3506190747552195e-06,
"loss": 0.0008115243166685104,
"step": 17250
},
{
"epoch": 3.226408056695263,
"grad_norm": 0.024577626958489418,
"learning_rate": 4.2988136559084084e-06,
"loss": 0.0021865896880626677,
"step": 17300
},
{
"epoch": 3.2357329354718387,
"grad_norm": 32.374088287353516,
"learning_rate": 4.247008237061597e-06,
"loss": 0.004616082906723023,
"step": 17350
},
{
"epoch": 3.2450578142484146,
"grad_norm": 0.06743080914020538,
"learning_rate": 4.1952028182147855e-06,
"loss": 0.0010297740995883942,
"step": 17400
},
{
"epoch": 3.254382693024991,
"grad_norm": 0.0024560948368161917,
"learning_rate": 4.143397399367974e-06,
"loss": 0.01637653708457947,
"step": 17450
},
{
"epoch": 3.2637075718015667,
"grad_norm": 0.00366505840793252,
"learning_rate": 4.0915919805211625e-06,
"loss": 0.0007262816280126571,
"step": 17500
},
{
"epoch": 3.2730324505781425,
"grad_norm": 0.02281450480222702,
"learning_rate": 4.039786561674351e-06,
"loss": 0.0034805700182914735,
"step": 17550
},
{
"epoch": 3.2823573293547184,
"grad_norm": 0.0021532338578253984,
"learning_rate": 3.98798114282754e-06,
"loss": 0.005815493464469909,
"step": 17600
},
{
"epoch": 3.291682208131294,
"grad_norm": 0.025134483352303505,
"learning_rate": 3.9361757239807284e-06,
"loss": 0.0013174866139888763,
"step": 17650
},
{
"epoch": 3.30100708690787,
"grad_norm": 0.011261076666414738,
"learning_rate": 3.884370305133917e-06,
"loss": 0.002750571370124817,
"step": 17700
},
{
"epoch": 3.310331965684446,
"grad_norm": 0.005523109342902899,
"learning_rate": 3.832564886287106e-06,
"loss": 0.006260217428207398,
"step": 17750
},
{
"epoch": 3.319656844461022,
"grad_norm": 0.0017233422258868814,
"learning_rate": 3.780759467440295e-06,
"loss": 0.002834466993808746,
"step": 17800
},
{
"epoch": 3.328981723237598,
"grad_norm": 0.3033665120601654,
"learning_rate": 3.7289540485934833e-06,
"loss": 0.0014140091836452485,
"step": 17850
},
{
"epoch": 3.338306602014174,
"grad_norm": 0.007635418325662613,
"learning_rate": 3.677148629746672e-06,
"loss": 0.009239104390144349,
"step": 17900
},
{
"epoch": 3.3476314807907497,
"grad_norm": 0.008077415637671947,
"learning_rate": 3.6253432108998604e-06,
"loss": 0.0028353652358055117,
"step": 17950
},
{
"epoch": 3.3569563595673255,
"grad_norm": 0.0055144126527011395,
"learning_rate": 3.573537792053049e-06,
"loss": 0.01257444977760315,
"step": 18000
},
{
"epoch": 3.3662812383439014,
"grad_norm": 0.10481590777635574,
"learning_rate": 3.5217323732062374e-06,
"loss": 0.008266312479972839,
"step": 18050
},
{
"epoch": 3.375606117120477,
"grad_norm": 0.004028915427625179,
"learning_rate": 3.469926954359426e-06,
"loss": 0.00322272926568985,
"step": 18100
},
{
"epoch": 3.3849309958970535,
"grad_norm": 0.007838011719286442,
"learning_rate": 3.4181215355126153e-06,
"loss": 0.0017492137849330902,
"step": 18150
},
{
"epoch": 3.3942558746736293,
"grad_norm": 0.008134761825203896,
"learning_rate": 3.3663161166658038e-06,
"loss": 0.004277588129043579,
"step": 18200
},
{
"epoch": 3.403580753450205,
"grad_norm": 1.130017638206482,
"learning_rate": 3.3145106978189923e-06,
"loss": 0.003342975378036499,
"step": 18250
},
{
"epoch": 3.412905632226781,
"grad_norm": 0.0033679301850497723,
"learning_rate": 3.262705278972181e-06,
"loss": 0.005864649415016175,
"step": 18300
},
{
"epoch": 3.422230511003357,
"grad_norm": 1.1952660083770752,
"learning_rate": 3.2108998601253693e-06,
"loss": 0.009021402597427368,
"step": 18350
},
{
"epoch": 3.4315553897799327,
"grad_norm": 0.4340899884700775,
"learning_rate": 3.159094441278558e-06,
"loss": 0.0038458964228630065,
"step": 18400
},
{
"epoch": 3.4408802685565085,
"grad_norm": 0.007966181263327599,
"learning_rate": 3.1072890224317463e-06,
"loss": 0.004797542989253997,
"step": 18450
},
{
"epoch": 3.450205147333085,
"grad_norm": 0.0008151158690452576,
"learning_rate": 3.055483603584935e-06,
"loss": 0.008470645546913147,
"step": 18500
},
{
"epoch": 3.4595300261096606,
"grad_norm": 0.0033519044518470764,
"learning_rate": 3.003678184738124e-06,
"loss": 0.004539164900779724,
"step": 18550
},
{
"epoch": 3.4688549048862365,
"grad_norm": 0.043223973363637924,
"learning_rate": 2.9518727658913127e-06,
"loss": 0.0028054285049438476,
"step": 18600
},
{
"epoch": 3.4781797836628123,
"grad_norm": 0.011569101363420486,
"learning_rate": 2.9000673470445012e-06,
"loss": 0.007232290506362915,
"step": 18650
},
{
"epoch": 3.487504662439388,
"grad_norm": 0.007914524525403976,
"learning_rate": 2.8482619281976897e-06,
"loss": 0.0002942212298512459,
"step": 18700
},
{
"epoch": 3.496829541215964,
"grad_norm": 0.0006849826313555241,
"learning_rate": 2.7964565093508782e-06,
"loss": 0.00410827487707138,
"step": 18750
},
{
"epoch": 3.50615441999254,
"grad_norm": 0.0026959700044244528,
"learning_rate": 2.7446510905040668e-06,
"loss": 0.0011774758994579316,
"step": 18800
},
{
"epoch": 3.515479298769116,
"grad_norm": 0.0012961579486727715,
"learning_rate": 2.6928456716572553e-06,
"loss": 0.0034612080454826354,
"step": 18850
},
{
"epoch": 3.524804177545692,
"grad_norm": 0.06650816649198532,
"learning_rate": 2.6410402528104438e-06,
"loss": 0.013415820598602295,
"step": 18900
},
{
"epoch": 3.534129056322268,
"grad_norm": 0.0007635413203388453,
"learning_rate": 2.589234833963633e-06,
"loss": 0.0015386410057544708,
"step": 18950
},
{
"epoch": 3.5434539350988437,
"grad_norm": 0.001662875059992075,
"learning_rate": 2.5374294151168216e-06,
"loss": 0.002086118161678314,
"step": 19000
},
{
"epoch": 3.5527788138754195,
"grad_norm": 0.0031695894431322813,
"learning_rate": 2.48562399627001e-06,
"loss": 0.0013567799329757691,
"step": 19050
},
{
"epoch": 3.562103692651996,
"grad_norm": 0.019759224727749825,
"learning_rate": 2.4338185774231987e-06,
"loss": 0.008338750004768372,
"step": 19100
},
{
"epoch": 3.571428571428571,
"grad_norm": 0.01757100783288479,
"learning_rate": 2.382013158576387e-06,
"loss": 0.0016921743750572204,
"step": 19150
},
{
"epoch": 3.5807534502051475,
"grad_norm": 2.4479777812957764,
"learning_rate": 2.3302077397295757e-06,
"loss": 0.011807719469070435,
"step": 19200
},
{
"epoch": 3.5900783289817233,
"grad_norm": 12.382244110107422,
"learning_rate": 2.2784023208827646e-06,
"loss": 0.005563015937805176,
"step": 19250
},
{
"epoch": 3.599403207758299,
"grad_norm": 0.012547838501632214,
"learning_rate": 2.226596902035953e-06,
"loss": 0.00420228123664856,
"step": 19300
},
{
"epoch": 3.608728086534875,
"grad_norm": 0.009670069441199303,
"learning_rate": 2.1747914831891417e-06,
"loss": 0.005122922658920288,
"step": 19350
},
{
"epoch": 3.618052965311451,
"grad_norm": 0.3705468773841858,
"learning_rate": 2.12298606434233e-06,
"loss": 0.0016570650041103363,
"step": 19400
},
{
"epoch": 3.627377844088027,
"grad_norm": 0.03667959198355675,
"learning_rate": 2.071180645495519e-06,
"loss": 0.008318853378295899,
"step": 19450
},
{
"epoch": 3.6367027228646025,
"grad_norm": 0.026855269446969032,
"learning_rate": 2.0193752266487076e-06,
"loss": 0.011855947971343993,
"step": 19500
},
{
"epoch": 3.646027601641179,
"grad_norm": 0.004127690568566322,
"learning_rate": 1.967569807801896e-06,
"loss": 0.0037176933884620665,
"step": 19550
},
{
"epoch": 3.6553524804177546,
"grad_norm": 0.033966220915317535,
"learning_rate": 1.9157643889550846e-06,
"loss": 0.0034821495413780213,
"step": 19600
},
{
"epoch": 3.6646773591943305,
"grad_norm": 0.008595237508416176,
"learning_rate": 1.8639589701082736e-06,
"loss": 0.003079477548599243,
"step": 19650
},
{
"epoch": 3.6740022379709063,
"grad_norm": 0.18409447371959686,
"learning_rate": 1.812153551261462e-06,
"loss": 0.0028409546613693236,
"step": 19700
},
{
"epoch": 3.683327116747482,
"grad_norm": 0.009265055879950523,
"learning_rate": 1.7603481324146506e-06,
"loss": 0.001990189254283905,
"step": 19750
},
{
"epoch": 3.6926519955240584,
"grad_norm": 0.004075230099260807,
"learning_rate": 1.7085427135678393e-06,
"loss": 0.009914104342460633,
"step": 19800
},
{
"epoch": 3.701976874300634,
"grad_norm": 0.012730306945741177,
"learning_rate": 1.656737294721028e-06,
"loss": 0.00510865867137909,
"step": 19850
},
{
"epoch": 3.71130175307721,
"grad_norm": 0.9975103735923767,
"learning_rate": 1.6049318758742165e-06,
"loss": 0.00289763867855072,
"step": 19900
},
{
"epoch": 3.720626631853786,
"grad_norm": 0.14549227058887482,
"learning_rate": 1.553126457027405e-06,
"loss": 0.014574718475341798,
"step": 19950
},
{
"epoch": 3.729951510630362,
"grad_norm": 0.002959158504381776,
"learning_rate": 1.5013210381805938e-06,
"loss": 0.0035466670989990233,
"step": 20000
},
{
"epoch": 3.7392763894069376,
"grad_norm": 0.044310204684734344,
"learning_rate": 1.4495156193337825e-06,
"loss": 0.005526635646820068,
"step": 20050
},
{
"epoch": 3.7486012681835135,
"grad_norm": 0.06063301861286163,
"learning_rate": 1.397710200486971e-06,
"loss": 0.01945833921432495,
"step": 20100
},
{
"epoch": 3.7579261469600898,
"grad_norm": 1.9790464639663696,
"learning_rate": 1.3459047816401597e-06,
"loss": 0.009157074689865112,
"step": 20150
},
{
"epoch": 3.767251025736665,
"grad_norm": 0.002332707168534398,
"learning_rate": 1.2940993627933483e-06,
"loss": 0.003599865138530731,
"step": 20200
},
{
"epoch": 3.7765759045132414,
"grad_norm": 0.007876844145357609,
"learning_rate": 1.242293943946537e-06,
"loss": 0.002118881195783615,
"step": 20250
},
{
"epoch": 3.7859007832898173,
"grad_norm": 0.06033371388912201,
"learning_rate": 1.1904885250997255e-06,
"loss": 0.013430379629135132,
"step": 20300
},
{
"epoch": 3.795225662066393,
"grad_norm": 0.007944832555949688,
"learning_rate": 1.1386831062529142e-06,
"loss": 0.011043739318847657,
"step": 20350
},
{
"epoch": 3.804550540842969,
"grad_norm": 0.009239411912858486,
"learning_rate": 1.0868776874061027e-06,
"loss": 0.007748922109603882,
"step": 20400
},
{
"epoch": 3.813875419619545,
"grad_norm": 0.0014006602577865124,
"learning_rate": 1.0350722685592914e-06,
"loss": 0.006385021805763245,
"step": 20450
},
{
"epoch": 3.823200298396121,
"grad_norm": 1.487459421157837,
"learning_rate": 9.8326684971248e-07,
"loss": 0.009500337243080139,
"step": 20500
},
{
"epoch": 3.832525177172697,
"grad_norm": 0.029605276882648468,
"learning_rate": 9.314614308656686e-07,
"loss": 0.003571970164775848,
"step": 20550
},
{
"epoch": 3.8418500559492728,
"grad_norm": 0.0038495927583426237,
"learning_rate": 8.796560120188573e-07,
"loss": 0.006721885204315186,
"step": 20600
},
{
"epoch": 3.8511749347258486,
"grad_norm": 0.015677325427532196,
"learning_rate": 8.278505931720458e-07,
"loss": 0.0021590781211853027,
"step": 20650
},
{
"epoch": 3.8604998135024244,
"grad_norm": 0.00773986428976059,
"learning_rate": 7.760451743252345e-07,
"loss": 0.0072018647193908695,
"step": 20700
},
{
"epoch": 3.8698246922790003,
"grad_norm": 0.006910277064889669,
"learning_rate": 7.24239755478423e-07,
"loss": 0.0017287896573543549,
"step": 20750
},
{
"epoch": 3.879149571055576,
"grad_norm": 0.16674445569515228,
"learning_rate": 6.724343366316118e-07,
"loss": 0.003435662090778351,
"step": 20800
},
{
"epoch": 3.8884744498321524,
"grad_norm": 0.11410090327262878,
"learning_rate": 6.206289177848004e-07,
"loss": 0.0015013472735881806,
"step": 20850
},
{
"epoch": 3.8977993286087282,
"grad_norm": 0.1628509759902954,
"learning_rate": 5.68823498937989e-07,
"loss": 0.0035611391067504883,
"step": 20900
},
{
"epoch": 3.907124207385304,
"grad_norm": 0.22206370532512665,
"learning_rate": 5.170180800911776e-07,
"loss": 0.005973511338233948,
"step": 20950
},
{
"epoch": 3.91644908616188,
"grad_norm": 0.285854309797287,
"learning_rate": 4.6521266124436624e-07,
"loss": 0.002481496632099152,
"step": 21000
},
{
"epoch": 3.9257739649384558,
"grad_norm": 0.0018572107655927539,
"learning_rate": 4.1340724239755486e-07,
"loss": 0.0034265148639678954,
"step": 21050
},
{
"epoch": 3.9350988437150316,
"grad_norm": 0.00111959979403764,
"learning_rate": 3.6160182355074347e-07,
"loss": 0.003972585201263428,
"step": 21100
},
{
"epoch": 3.9444237224916074,
"grad_norm": 0.006579425185918808,
"learning_rate": 3.0979640470393204e-07,
"loss": 0.007484051585197449,
"step": 21150
},
{
"epoch": 3.9537486012681837,
"grad_norm": 0.0016149668954312801,
"learning_rate": 2.5799098585712066e-07,
"loss": 0.005609593391418457,
"step": 21200
},
{
"epoch": 3.9630734800447596,
"grad_norm": 8.504377365112305,
"learning_rate": 2.061855670103093e-07,
"loss": 0.00788326621055603,
"step": 21250
},
{
"epoch": 3.9723983588213354,
"grad_norm": 0.29231831431388855,
"learning_rate": 1.5438014816349792e-07,
"loss": 0.0017250549793243408,
"step": 21300
},
{
"epoch": 3.9817232375979112,
"grad_norm": 0.006406121421605349,
"learning_rate": 1.0257472931668653e-07,
"loss": 0.0028238424658775328,
"step": 21350
},
{
"epoch": 3.991048116374487,
"grad_norm": 0.005788094364106655,
"learning_rate": 5.076931046987516e-08,
"loss": 0.0027461829781532288,
"step": 21400
},
{
"epoch": 4.0,
"eval_accuracy": 0.9926021944640846,
"eval_f1": 0.9201277955271566,
"eval_loss": 0.045499056577682495,
"eval_precision": 0.9121140142517815,
"eval_recall": 0.9282836422240129,
"eval_runtime": 7.0731,
"eval_samples_per_second": 772.358,
"eval_steps_per_second": 96.562,
"step": 21448
},
{
"epoch": 4.0,
"step": 21448,
"total_flos": 2300195917669620.0,
"train_loss": 0.0979897177003356,
"train_runtime": 1420.7755,
"train_samples_per_second": 120.762,
"train_steps_per_second": 15.096
},
{
"epoch": 4.0,
"step": 21448,
"validation_accuracy": 0.9926283813863357,
"validation_f1": 0.9199760526840951,
"validation_loss": 0.04548870399594307,
"validation_precision": 0.911427441676552,
"validation_recall": 0.9286865431103949,
"validation_runtime": 6.1777,
"validation_samples_per_second": 884.309,
"validation_steps_per_second": 110.559
},
{
"epoch": 4.0,
"step": 21448,
"test_accuracy": 0.9926283813863357,
"test_f1": 0.9199760526840951,
"test_loss": 0.04548870399594307,
"test_precision": 0.911427441676552,
"test_recall": 0.9286865431103949,
"test_runtime": 6.2499,
"test_samples_per_second": 874.087,
"test_steps_per_second": 109.281
}
],
"logging_steps": 50,
"max_steps": 21448,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2300195917669620.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}