emotion-koelectra / checkpoint-16135 /trainer_state.json
Seonghaa's picture
๐Ÿš€ Upload KoELECTRA emotion classification model
3330fd3 verified
{
"best_global_step": 16135,
"best_metric": 0.7960859271865419,
"best_model_checkpoint": "/content/drive/MyDrive/\uac10\uc815\ubd84\ub958/data/emotion_model/checkpoint-16135",
"epoch": 5.0,
"eval_steps": 500,
"global_step": 16135,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.015494267121165169,
"grad_norm": 1.9321871995925903,
"learning_rate": 4.5553145336225596e-07,
"loss": 1.7919,
"step": 50
},
{
"epoch": 0.030988534242330338,
"grad_norm": 1.7712626457214355,
"learning_rate": 9.203594669972111e-07,
"loss": 1.7865,
"step": 100
},
{
"epoch": 0.04648280136349551,
"grad_norm": 1.9185744524002075,
"learning_rate": 1.385187480632166e-06,
"loss": 1.7864,
"step": 150
},
{
"epoch": 0.061977068484660676,
"grad_norm": 1.9046003818511963,
"learning_rate": 1.8500154942671213e-06,
"loss": 1.7864,
"step": 200
},
{
"epoch": 0.07747133560582585,
"grad_norm": 2.0914034843444824,
"learning_rate": 2.3148435079020763e-06,
"loss": 1.7813,
"step": 250
},
{
"epoch": 0.09296560272699102,
"grad_norm": 2.088219165802002,
"learning_rate": 2.7796715215370313e-06,
"loss": 1.7773,
"step": 300
},
{
"epoch": 0.10845986984815618,
"grad_norm": 2.1377577781677246,
"learning_rate": 3.2444995351719864e-06,
"loss": 1.7587,
"step": 350
},
{
"epoch": 0.12395413696932135,
"grad_norm": 2.2140750885009766,
"learning_rate": 3.7093275488069414e-06,
"loss": 1.7388,
"step": 400
},
{
"epoch": 0.13944840409048653,
"grad_norm": 2.1278295516967773,
"learning_rate": 4.174155562441896e-06,
"loss": 1.6861,
"step": 450
},
{
"epoch": 0.1549426712116517,
"grad_norm": 4.734658241271973,
"learning_rate": 4.638983576076852e-06,
"loss": 1.6267,
"step": 500
},
{
"epoch": 0.17043693833281687,
"grad_norm": 4.140384197235107,
"learning_rate": 5.103811589711806e-06,
"loss": 1.5732,
"step": 550
},
{
"epoch": 0.18593120545398203,
"grad_norm": 2.705493688583374,
"learning_rate": 5.568639603346762e-06,
"loss": 1.5438,
"step": 600
},
{
"epoch": 0.2014254725751472,
"grad_norm": 5.3791422843933105,
"learning_rate": 6.0334676169817164e-06,
"loss": 1.4644,
"step": 650
},
{
"epoch": 0.21691973969631237,
"grad_norm": 2.8989250659942627,
"learning_rate": 6.498295630616672e-06,
"loss": 1.4743,
"step": 700
},
{
"epoch": 0.23241400681747754,
"grad_norm": 3.40291166305542,
"learning_rate": 6.963123644251627e-06,
"loss": 1.4187,
"step": 750
},
{
"epoch": 0.2479082739386427,
"grad_norm": 5.748068809509277,
"learning_rate": 7.427951657886583e-06,
"loss": 1.3533,
"step": 800
},
{
"epoch": 0.26340254105980787,
"grad_norm": 5.777422904968262,
"learning_rate": 7.892779671521537e-06,
"loss": 1.3099,
"step": 850
},
{
"epoch": 0.27889680818097307,
"grad_norm": 4.8046112060546875,
"learning_rate": 8.357607685156493e-06,
"loss": 1.2723,
"step": 900
},
{
"epoch": 0.2943910753021382,
"grad_norm": 5.549858570098877,
"learning_rate": 8.822435698791447e-06,
"loss": 1.2325,
"step": 950
},
{
"epoch": 0.3098853424233034,
"grad_norm": 6.851742744445801,
"learning_rate": 9.287263712426402e-06,
"loss": 1.1884,
"step": 1000
},
{
"epoch": 0.32537960954446854,
"grad_norm": 5.2497735023498535,
"learning_rate": 9.752091726061357e-06,
"loss": 1.2224,
"step": 1050
},
{
"epoch": 0.34087387666563373,
"grad_norm": 7.023674488067627,
"learning_rate": 1.0216919739696313e-05,
"loss": 1.177,
"step": 1100
},
{
"epoch": 0.3563681437867989,
"grad_norm": 4.888996124267578,
"learning_rate": 1.0681747753331269e-05,
"loss": 1.1577,
"step": 1150
},
{
"epoch": 0.37186241090796407,
"grad_norm": 6.2133660316467285,
"learning_rate": 1.1146575766966222e-05,
"loss": 1.1726,
"step": 1200
},
{
"epoch": 0.3873566780291292,
"grad_norm": 6.936697483062744,
"learning_rate": 1.1611403780601178e-05,
"loss": 1.1001,
"step": 1250
},
{
"epoch": 0.4028509451502944,
"grad_norm": 8.526293754577637,
"learning_rate": 1.2076231794236133e-05,
"loss": 1.0752,
"step": 1300
},
{
"epoch": 0.41834521227145954,
"grad_norm": 5.5933756828308105,
"learning_rate": 1.254105980787109e-05,
"loss": 1.0809,
"step": 1350
},
{
"epoch": 0.43383947939262474,
"grad_norm": 6.998812675476074,
"learning_rate": 1.3005887821506042e-05,
"loss": 1.0566,
"step": 1400
},
{
"epoch": 0.4493337465137899,
"grad_norm": 7.077617645263672,
"learning_rate": 1.3470715835140998e-05,
"loss": 1.0993,
"step": 1450
},
{
"epoch": 0.4648280136349551,
"grad_norm": 8.715201377868652,
"learning_rate": 1.3935543848775953e-05,
"loss": 1.0375,
"step": 1500
},
{
"epoch": 0.4803222807561202,
"grad_norm": 6.017217636108398,
"learning_rate": 1.440037186241091e-05,
"loss": 1.0388,
"step": 1550
},
{
"epoch": 0.4958165478772854,
"grad_norm": 5.349973201751709,
"learning_rate": 1.4865199876045862e-05,
"loss": 1.0354,
"step": 1600
},
{
"epoch": 0.5113108149984505,
"grad_norm": 12.728338241577148,
"learning_rate": 1.533002788968082e-05,
"loss": 1.0314,
"step": 1650
},
{
"epoch": 0.5268050821196157,
"grad_norm": 5.962468147277832,
"learning_rate": 1.5794855903315773e-05,
"loss": 1.03,
"step": 1700
},
{
"epoch": 0.5422993492407809,
"grad_norm": 5.971400260925293,
"learning_rate": 1.6259683916950726e-05,
"loss": 1.0541,
"step": 1750
},
{
"epoch": 0.5577936163619461,
"grad_norm": 6.260463714599609,
"learning_rate": 1.6724511930585682e-05,
"loss": 0.9831,
"step": 1800
},
{
"epoch": 0.5732878834831112,
"grad_norm": 7.8115010261535645,
"learning_rate": 1.718933994422064e-05,
"loss": 0.966,
"step": 1850
},
{
"epoch": 0.5887821506042764,
"grad_norm": 5.005403995513916,
"learning_rate": 1.7654167957855595e-05,
"loss": 0.9592,
"step": 1900
},
{
"epoch": 0.6042764177254416,
"grad_norm": 7.7732157707214355,
"learning_rate": 1.8118995971490548e-05,
"loss": 0.9766,
"step": 1950
},
{
"epoch": 0.6197706848466068,
"grad_norm": 7.265392303466797,
"learning_rate": 1.8583823985125504e-05,
"loss": 1.0171,
"step": 2000
},
{
"epoch": 0.6352649519677719,
"grad_norm": 15.946109771728516,
"learning_rate": 1.904865199876046e-05,
"loss": 0.9824,
"step": 2050
},
{
"epoch": 0.6507592190889371,
"grad_norm": 7.261445999145508,
"learning_rate": 1.9513480012395417e-05,
"loss": 0.9699,
"step": 2100
},
{
"epoch": 0.6662534862101023,
"grad_norm": 8.201744079589844,
"learning_rate": 1.997830802603037e-05,
"loss": 0.9957,
"step": 2150
},
{
"epoch": 0.6817477533312675,
"grad_norm": 6.183067798614502,
"learning_rate": 2.0443136039665322e-05,
"loss": 0.8554,
"step": 2200
},
{
"epoch": 0.6972420204524326,
"grad_norm": 7.481590270996094,
"learning_rate": 2.090796405330028e-05,
"loss": 0.9929,
"step": 2250
},
{
"epoch": 0.7127362875735977,
"grad_norm": 7.3274030685424805,
"learning_rate": 2.1372792066935235e-05,
"loss": 0.9438,
"step": 2300
},
{
"epoch": 0.7282305546947629,
"grad_norm": 11.69247055053711,
"learning_rate": 2.183762008057019e-05,
"loss": 0.9767,
"step": 2350
},
{
"epoch": 0.7437248218159281,
"grad_norm": 7.929721832275391,
"learning_rate": 2.2302448094205144e-05,
"loss": 1.0036,
"step": 2400
},
{
"epoch": 0.7592190889370932,
"grad_norm": 9.753717422485352,
"learning_rate": 2.27672761078401e-05,
"loss": 0.9726,
"step": 2450
},
{
"epoch": 0.7747133560582584,
"grad_norm": 7.797086715698242,
"learning_rate": 2.3232104121475057e-05,
"loss": 0.9322,
"step": 2500
},
{
"epoch": 0.7902076231794236,
"grad_norm": 6.927332878112793,
"learning_rate": 2.369693213511001e-05,
"loss": 0.9378,
"step": 2550
},
{
"epoch": 0.8057018903005888,
"grad_norm": 3.726092576980591,
"learning_rate": 2.4161760148744962e-05,
"loss": 0.958,
"step": 2600
},
{
"epoch": 0.821196157421754,
"grad_norm": 5.661774635314941,
"learning_rate": 2.462658816237992e-05,
"loss": 0.9651,
"step": 2650
},
{
"epoch": 0.8366904245429191,
"grad_norm": 6.513345718383789,
"learning_rate": 2.5091416176014875e-05,
"loss": 0.971,
"step": 2700
},
{
"epoch": 0.8521846916640843,
"grad_norm": 6.713255405426025,
"learning_rate": 2.555624418964983e-05,
"loss": 0.8616,
"step": 2750
},
{
"epoch": 0.8676789587852495,
"grad_norm": 8.527266502380371,
"learning_rate": 2.6021072203284784e-05,
"loss": 0.9413,
"step": 2800
},
{
"epoch": 0.8831732259064147,
"grad_norm": 6.599502086639404,
"learning_rate": 2.648590021691974e-05,
"loss": 0.9985,
"step": 2850
},
{
"epoch": 0.8986674930275798,
"grad_norm": 4.0680155754089355,
"learning_rate": 2.6950728230554697e-05,
"loss": 0.9415,
"step": 2900
},
{
"epoch": 0.914161760148745,
"grad_norm": 5.083493232727051,
"learning_rate": 2.741555624418965e-05,
"loss": 0.9805,
"step": 2950
},
{
"epoch": 0.9296560272699101,
"grad_norm": 4.0469069480896,
"learning_rate": 2.7880384257824606e-05,
"loss": 0.9547,
"step": 3000
},
{
"epoch": 0.9451502943910753,
"grad_norm": 6.075752258300781,
"learning_rate": 2.834521227145956e-05,
"loss": 0.9623,
"step": 3050
},
{
"epoch": 0.9606445615122404,
"grad_norm": 6.5252299308776855,
"learning_rate": 2.8810040285094515e-05,
"loss": 0.9838,
"step": 3100
},
{
"epoch": 0.9761388286334056,
"grad_norm": 6.530562877655029,
"learning_rate": 2.927486829872947e-05,
"loss": 0.911,
"step": 3150
},
{
"epoch": 0.9916330957545708,
"grad_norm": 8.217161178588867,
"learning_rate": 2.9739696312364428e-05,
"loss": 0.9457,
"step": 3200
},
{
"epoch": 1.0,
"eval_accuracy": 0.7087787983737389,
"eval_f1": 0.7069046924545164,
"eval_loss": 0.8573769330978394,
"eval_runtime": 25.5149,
"eval_samples_per_second": 260.279,
"eval_steps_per_second": 16.304,
"step": 3227
},
{
"epoch": 1.007127362875736,
"grad_norm": 4.367455005645752,
"learning_rate": 2.997727507488896e-05,
"loss": 0.9143,
"step": 3250
},
{
"epoch": 1.022621629996901,
"grad_norm": 4.636725902557373,
"learning_rate": 2.9925627517818407e-05,
"loss": 0.9304,
"step": 3300
},
{
"epoch": 1.0381158971180664,
"grad_norm": 4.251437664031982,
"learning_rate": 2.9873979960747857e-05,
"loss": 0.8549,
"step": 3350
},
{
"epoch": 1.0536101642392315,
"grad_norm": 6.648655414581299,
"learning_rate": 2.9822332403677307e-05,
"loss": 0.8698,
"step": 3400
},
{
"epoch": 1.0691044313603966,
"grad_norm": 7.102205276489258,
"learning_rate": 2.9770684846606757e-05,
"loss": 0.8597,
"step": 3450
},
{
"epoch": 1.0845986984815619,
"grad_norm": 10.821270942687988,
"learning_rate": 2.9719037289536206e-05,
"loss": 0.8457,
"step": 3500
},
{
"epoch": 1.100092965602727,
"grad_norm": 6.111588001251221,
"learning_rate": 2.9667389732465652e-05,
"loss": 0.8973,
"step": 3550
},
{
"epoch": 1.1155872327238923,
"grad_norm": 9.016953468322754,
"learning_rate": 2.9615742175395106e-05,
"loss": 0.8228,
"step": 3600
},
{
"epoch": 1.1310814998450573,
"grad_norm": 7.717069625854492,
"learning_rate": 2.9564094618324555e-05,
"loss": 0.8103,
"step": 3650
},
{
"epoch": 1.1465757669662224,
"grad_norm": 7.848579406738281,
"learning_rate": 2.9512447061254005e-05,
"loss": 0.8097,
"step": 3700
},
{
"epoch": 1.1620700340873877,
"grad_norm": 4.738124847412109,
"learning_rate": 2.9460799504183455e-05,
"loss": 0.9105,
"step": 3750
},
{
"epoch": 1.1775643012085528,
"grad_norm": 5.289875507354736,
"learning_rate": 2.94091519471129e-05,
"loss": 0.8876,
"step": 3800
},
{
"epoch": 1.1930585683297181,
"grad_norm": 6.445308685302734,
"learning_rate": 2.935750439004235e-05,
"loss": 0.8377,
"step": 3850
},
{
"epoch": 1.2085528354508832,
"grad_norm": 4.725327968597412,
"learning_rate": 2.93058568329718e-05,
"loss": 0.9332,
"step": 3900
},
{
"epoch": 1.2240471025720483,
"grad_norm": 53.85081481933594,
"learning_rate": 2.925420927590125e-05,
"loss": 0.8882,
"step": 3950
},
{
"epoch": 1.2395413696932136,
"grad_norm": 5.677978515625,
"learning_rate": 2.9202561718830703e-05,
"loss": 0.8481,
"step": 4000
},
{
"epoch": 1.2550356368143787,
"grad_norm": 3.941765785217285,
"learning_rate": 2.915091416176015e-05,
"loss": 0.835,
"step": 4050
},
{
"epoch": 1.2705299039355438,
"grad_norm": 8.099725723266602,
"learning_rate": 2.90992666046896e-05,
"loss": 0.8322,
"step": 4100
},
{
"epoch": 1.286024171056709,
"grad_norm": 6.59591007232666,
"learning_rate": 2.904761904761905e-05,
"loss": 0.8809,
"step": 4150
},
{
"epoch": 1.3015184381778742,
"grad_norm": 7.200226306915283,
"learning_rate": 2.8995971490548498e-05,
"loss": 0.8609,
"step": 4200
},
{
"epoch": 1.3170127052990392,
"grad_norm": 4.902937412261963,
"learning_rate": 2.8944323933477948e-05,
"loss": 0.8633,
"step": 4250
},
{
"epoch": 1.3325069724202045,
"grad_norm": 5.792146682739258,
"learning_rate": 2.8892676376407394e-05,
"loss": 0.8684,
"step": 4300
},
{
"epoch": 1.3480012395413696,
"grad_norm": 4.636809349060059,
"learning_rate": 2.8841028819336844e-05,
"loss": 0.8245,
"step": 4350
},
{
"epoch": 1.363495506662535,
"grad_norm": 5.28842306137085,
"learning_rate": 2.8789381262266297e-05,
"loss": 0.7859,
"step": 4400
},
{
"epoch": 1.3789897737837,
"grad_norm": 4.259128570556641,
"learning_rate": 2.8737733705195747e-05,
"loss": 0.8228,
"step": 4450
},
{
"epoch": 1.394484040904865,
"grad_norm": 7.914375305175781,
"learning_rate": 2.8686086148125196e-05,
"loss": 0.9448,
"step": 4500
},
{
"epoch": 1.4099783080260304,
"grad_norm": 7.636547088623047,
"learning_rate": 2.8634438591054643e-05,
"loss": 0.8781,
"step": 4550
},
{
"epoch": 1.4254725751471955,
"grad_norm": 8.681707382202148,
"learning_rate": 2.8582791033984092e-05,
"loss": 0.8367,
"step": 4600
},
{
"epoch": 1.4409668422683608,
"grad_norm": 7.864759922027588,
"learning_rate": 2.8531143476913542e-05,
"loss": 0.9088,
"step": 4650
},
{
"epoch": 1.4564611093895259,
"grad_norm": 4.892348289489746,
"learning_rate": 2.8479495919842992e-05,
"loss": 0.8993,
"step": 4700
},
{
"epoch": 1.471955376510691,
"grad_norm": 23.208873748779297,
"learning_rate": 2.842784836277244e-05,
"loss": 0.8542,
"step": 4750
},
{
"epoch": 1.4874496436318563,
"grad_norm": 8.983469009399414,
"learning_rate": 2.837620080570189e-05,
"loss": 0.949,
"step": 4800
},
{
"epoch": 1.5029439107530214,
"grad_norm": 10.706644058227539,
"learning_rate": 2.832455324863134e-05,
"loss": 0.9008,
"step": 4850
},
{
"epoch": 1.5184381778741867,
"grad_norm": 4.685935020446777,
"learning_rate": 2.827290569156079e-05,
"loss": 0.8613,
"step": 4900
},
{
"epoch": 1.5339324449953518,
"grad_norm": 5.286406993865967,
"learning_rate": 2.822125813449024e-05,
"loss": 0.8929,
"step": 4950
},
{
"epoch": 1.5494267121165168,
"grad_norm": 4.907707691192627,
"learning_rate": 2.816961057741969e-05,
"loss": 0.8321,
"step": 5000
},
{
"epoch": 1.564920979237682,
"grad_norm": 6.398087501525879,
"learning_rate": 2.8117963020349136e-05,
"loss": 0.8626,
"step": 5050
},
{
"epoch": 1.5804152463588472,
"grad_norm": 5.323617458343506,
"learning_rate": 2.8066315463278586e-05,
"loss": 0.8324,
"step": 5100
},
{
"epoch": 1.5959095134800125,
"grad_norm": 4.136271953582764,
"learning_rate": 2.8014667906208035e-05,
"loss": 0.879,
"step": 5150
},
{
"epoch": 1.6114037806011776,
"grad_norm": 6.873619556427002,
"learning_rate": 2.796302034913749e-05,
"loss": 0.9043,
"step": 5200
},
{
"epoch": 1.6268980477223427,
"grad_norm": 7.138693809509277,
"learning_rate": 2.7911372792066938e-05,
"loss": 0.8183,
"step": 5250
},
{
"epoch": 1.6423923148435078,
"grad_norm": 6.483767032623291,
"learning_rate": 2.7859725234996384e-05,
"loss": 0.8867,
"step": 5300
},
{
"epoch": 1.657886581964673,
"grad_norm": 3.2249104976654053,
"learning_rate": 2.7808077677925834e-05,
"loss": 0.8097,
"step": 5350
},
{
"epoch": 1.6733808490858384,
"grad_norm": 6.961575984954834,
"learning_rate": 2.7756430120855284e-05,
"loss": 0.8364,
"step": 5400
},
{
"epoch": 1.6888751162070035,
"grad_norm": 7.0920000076293945,
"learning_rate": 2.7704782563784733e-05,
"loss": 0.8283,
"step": 5450
},
{
"epoch": 1.7043693833281686,
"grad_norm": 5.436604976654053,
"learning_rate": 2.7653135006714183e-05,
"loss": 0.8786,
"step": 5500
},
{
"epoch": 1.7198636504493336,
"grad_norm": 4.0141282081604,
"learning_rate": 2.760148744964363e-05,
"loss": 0.8452,
"step": 5550
},
{
"epoch": 1.735357917570499,
"grad_norm": 5.783074378967285,
"learning_rate": 2.7549839892573083e-05,
"loss": 0.8168,
"step": 5600
},
{
"epoch": 1.750852184691664,
"grad_norm": 7.773756504058838,
"learning_rate": 2.7498192335502532e-05,
"loss": 0.8598,
"step": 5650
},
{
"epoch": 1.7663464518128293,
"grad_norm": 5.375339984893799,
"learning_rate": 2.7446544778431982e-05,
"loss": 0.8366,
"step": 5700
},
{
"epoch": 1.7818407189339944,
"grad_norm": 4.240859031677246,
"learning_rate": 2.739489722136143e-05,
"loss": 0.8136,
"step": 5750
},
{
"epoch": 1.7973349860551595,
"grad_norm": 6.107599258422852,
"learning_rate": 2.734324966429088e-05,
"loss": 0.8074,
"step": 5800
},
{
"epoch": 1.8128292531763246,
"grad_norm": 6.027589797973633,
"learning_rate": 2.7291602107220328e-05,
"loss": 0.7808,
"step": 5850
},
{
"epoch": 1.82832352029749,
"grad_norm": 4.829204559326172,
"learning_rate": 2.7239954550149777e-05,
"loss": 0.8473,
"step": 5900
},
{
"epoch": 1.8438177874186552,
"grad_norm": 5.385358810424805,
"learning_rate": 2.7188306993079227e-05,
"loss": 0.844,
"step": 5950
},
{
"epoch": 1.8593120545398203,
"grad_norm": 5.991063594818115,
"learning_rate": 2.713665943600868e-05,
"loss": 0.8667,
"step": 6000
},
{
"epoch": 1.8748063216609854,
"grad_norm": 4.269604682922363,
"learning_rate": 2.708501187893813e-05,
"loss": 0.8987,
"step": 6050
},
{
"epoch": 1.8903005887821505,
"grad_norm": 6.90878438949585,
"learning_rate": 2.7033364321867576e-05,
"loss": 0.8517,
"step": 6100
},
{
"epoch": 1.9057948559033158,
"grad_norm": 8.742233276367188,
"learning_rate": 2.6981716764797026e-05,
"loss": 0.8729,
"step": 6150
},
{
"epoch": 1.921289123024481,
"grad_norm": 9.10084342956543,
"learning_rate": 2.6930069207726475e-05,
"loss": 0.8803,
"step": 6200
},
{
"epoch": 1.9367833901456462,
"grad_norm": 4.210537433624268,
"learning_rate": 2.6878421650655925e-05,
"loss": 0.7938,
"step": 6250
},
{
"epoch": 1.9522776572668112,
"grad_norm": 6.604791641235352,
"learning_rate": 2.6826774093585375e-05,
"loss": 0.7958,
"step": 6300
},
{
"epoch": 1.9677719243879763,
"grad_norm": 6.213857173919678,
"learning_rate": 2.677512653651482e-05,
"loss": 0.8463,
"step": 6350
},
{
"epoch": 1.9832661915091416,
"grad_norm": 4.303800582885742,
"learning_rate": 2.6723478979444274e-05,
"loss": 0.7909,
"step": 6400
},
{
"epoch": 1.998760458630307,
"grad_norm": 4.933095932006836,
"learning_rate": 2.6671831422373724e-05,
"loss": 0.7888,
"step": 6450
},
{
"epoch": 2.0,
"eval_accuracy": 0.7590724288510766,
"eval_f1": 0.7587245577707713,
"eval_loss": 0.7010347247123718,
"eval_runtime": 25.4199,
"eval_samples_per_second": 261.252,
"eval_steps_per_second": 16.365,
"step": 6454
},
{
"epoch": 2.014254725751472,
"grad_norm": 4.0570244789123535,
"learning_rate": 2.6620183865303173e-05,
"loss": 0.7236,
"step": 6500
},
{
"epoch": 2.029748992872637,
"grad_norm": 5.307652473449707,
"learning_rate": 2.6568536308232623e-05,
"loss": 0.7213,
"step": 6550
},
{
"epoch": 2.045243259993802,
"grad_norm": 5.398072719573975,
"learning_rate": 2.651688875116207e-05,
"loss": 0.6839,
"step": 6600
},
{
"epoch": 2.0607375271149673,
"grad_norm": 5.296418190002441,
"learning_rate": 2.646524119409152e-05,
"loss": 0.6856,
"step": 6650
},
{
"epoch": 2.076231794236133,
"grad_norm": 4.173377990722656,
"learning_rate": 2.641359363702097e-05,
"loss": 0.7109,
"step": 6700
},
{
"epoch": 2.091726061357298,
"grad_norm": 5.590676784515381,
"learning_rate": 2.636194607995042e-05,
"loss": 0.6814,
"step": 6750
},
{
"epoch": 2.107220328478463,
"grad_norm": 8.112780570983887,
"learning_rate": 2.631029852287987e-05,
"loss": 0.7302,
"step": 6800
},
{
"epoch": 2.122714595599628,
"grad_norm": 6.514364242553711,
"learning_rate": 2.6258650965809318e-05,
"loss": 0.6917,
"step": 6850
},
{
"epoch": 2.138208862720793,
"grad_norm": 8.156841278076172,
"learning_rate": 2.6207003408738767e-05,
"loss": 0.6568,
"step": 6900
},
{
"epoch": 2.1537031298419587,
"grad_norm": 7.641481876373291,
"learning_rate": 2.6155355851668217e-05,
"loss": 0.6132,
"step": 6950
},
{
"epoch": 2.1691973969631237,
"grad_norm": 6.33613395690918,
"learning_rate": 2.6103708294597667e-05,
"loss": 0.6393,
"step": 7000
},
{
"epoch": 2.184691664084289,
"grad_norm": 4.2916436195373535,
"learning_rate": 2.6052060737527116e-05,
"loss": 0.6709,
"step": 7050
},
{
"epoch": 2.200185931205454,
"grad_norm": 4.763488292694092,
"learning_rate": 2.6000413180456563e-05,
"loss": 0.6919,
"step": 7100
},
{
"epoch": 2.215680198326619,
"grad_norm": 8.614394187927246,
"learning_rate": 2.5948765623386012e-05,
"loss": 0.6501,
"step": 7150
},
{
"epoch": 2.2311744654477845,
"grad_norm": 9.684426307678223,
"learning_rate": 2.5897118066315465e-05,
"loss": 0.6947,
"step": 7200
},
{
"epoch": 2.2466687325689496,
"grad_norm": 6.210818767547607,
"learning_rate": 2.5845470509244915e-05,
"loss": 0.6873,
"step": 7250
},
{
"epoch": 2.2621629996901147,
"grad_norm": 6.774372577667236,
"learning_rate": 2.5793822952174365e-05,
"loss": 0.7195,
"step": 7300
},
{
"epoch": 2.27765726681128,
"grad_norm": 6.014688491821289,
"learning_rate": 2.574217539510381e-05,
"loss": 0.6298,
"step": 7350
},
{
"epoch": 2.293151533932445,
"grad_norm": 14.994784355163574,
"learning_rate": 2.569052783803326e-05,
"loss": 0.7403,
"step": 7400
},
{
"epoch": 2.3086458010536104,
"grad_norm": 6.315488815307617,
"learning_rate": 2.563888028096271e-05,
"loss": 0.6679,
"step": 7450
},
{
"epoch": 2.3241400681747755,
"grad_norm": 8.482314109802246,
"learning_rate": 2.558723272389216e-05,
"loss": 0.7173,
"step": 7500
},
{
"epoch": 2.3396343352959406,
"grad_norm": 10.161298751831055,
"learning_rate": 2.553558516682161e-05,
"loss": 0.732,
"step": 7550
},
{
"epoch": 2.3551286024171056,
"grad_norm": 6.758267402648926,
"learning_rate": 2.548393760975106e-05,
"loss": 0.6192,
"step": 7600
},
{
"epoch": 2.3706228695382707,
"grad_norm": 4.528532981872559,
"learning_rate": 2.543229005268051e-05,
"loss": 0.7614,
"step": 7650
},
{
"epoch": 2.3861171366594363,
"grad_norm": 6.397975921630859,
"learning_rate": 2.538064249560996e-05,
"loss": 0.6951,
"step": 7700
},
{
"epoch": 2.4016114037806013,
"grad_norm": 5.440258979797363,
"learning_rate": 2.532899493853941e-05,
"loss": 0.7188,
"step": 7750
},
{
"epoch": 2.4171056709017664,
"grad_norm": 2.4531173706054688,
"learning_rate": 2.5277347381468858e-05,
"loss": 0.6347,
"step": 7800
},
{
"epoch": 2.4325999380229315,
"grad_norm": 15.269991874694824,
"learning_rate": 2.5225699824398304e-05,
"loss": 0.6601,
"step": 7850
},
{
"epoch": 2.4480942051440966,
"grad_norm": 6.438554286956787,
"learning_rate": 2.5174052267327754e-05,
"loss": 0.698,
"step": 7900
},
{
"epoch": 2.4635884722652617,
"grad_norm": 8.922213554382324,
"learning_rate": 2.5122404710257204e-05,
"loss": 0.6958,
"step": 7950
},
{
"epoch": 2.479082739386427,
"grad_norm": 6.724533557891846,
"learning_rate": 2.5070757153186657e-05,
"loss": 0.7131,
"step": 8000
},
{
"epoch": 2.4945770065075923,
"grad_norm": 5.617169380187988,
"learning_rate": 2.5019109596116107e-05,
"loss": 0.7711,
"step": 8050
},
{
"epoch": 2.5100712736287574,
"grad_norm": 6.441185474395752,
"learning_rate": 2.4967462039045553e-05,
"loss": 0.6612,
"step": 8100
},
{
"epoch": 2.5255655407499225,
"grad_norm": 6.033916473388672,
"learning_rate": 2.4915814481975003e-05,
"loss": 0.698,
"step": 8150
},
{
"epoch": 2.5410598078710875,
"grad_norm": 6.174665451049805,
"learning_rate": 2.4864166924904452e-05,
"loss": 0.6968,
"step": 8200
},
{
"epoch": 2.5565540749922526,
"grad_norm": 20.01167869567871,
"learning_rate": 2.4812519367833902e-05,
"loss": 0.6456,
"step": 8250
},
{
"epoch": 2.572048342113418,
"grad_norm": 10.404682159423828,
"learning_rate": 2.476087181076335e-05,
"loss": 0.6808,
"step": 8300
},
{
"epoch": 2.5875426092345832,
"grad_norm": 5.160488128662109,
"learning_rate": 2.47092242536928e-05,
"loss": 0.6913,
"step": 8350
},
{
"epoch": 2.6030368763557483,
"grad_norm": 6.452591896057129,
"learning_rate": 2.465757669662225e-05,
"loss": 0.6594,
"step": 8400
},
{
"epoch": 2.6185311434769134,
"grad_norm": 12.436300277709961,
"learning_rate": 2.46059291395517e-05,
"loss": 0.7255,
"step": 8450
},
{
"epoch": 2.6340254105980785,
"grad_norm": 6.132791042327881,
"learning_rate": 2.455428158248115e-05,
"loss": 0.6753,
"step": 8500
},
{
"epoch": 2.649519677719244,
"grad_norm": 10.712909698486328,
"learning_rate": 2.45026340254106e-05,
"loss": 0.6445,
"step": 8550
},
{
"epoch": 2.665013944840409,
"grad_norm": 12.122429847717285,
"learning_rate": 2.445098646834005e-05,
"loss": 0.6424,
"step": 8600
},
{
"epoch": 2.680508211961574,
"grad_norm": 8.575897216796875,
"learning_rate": 2.4399338911269496e-05,
"loss": 0.7242,
"step": 8650
},
{
"epoch": 2.6960024790827393,
"grad_norm": 8.740906715393066,
"learning_rate": 2.4347691354198946e-05,
"loss": 0.6949,
"step": 8700
},
{
"epoch": 2.7114967462039044,
"grad_norm": 4.871994972229004,
"learning_rate": 2.4296043797128395e-05,
"loss": 0.787,
"step": 8750
},
{
"epoch": 2.72699101332507,
"grad_norm": 6.642944812774658,
"learning_rate": 2.424439624005785e-05,
"loss": 0.6925,
"step": 8800
},
{
"epoch": 2.742485280446235,
"grad_norm": 12.149236679077148,
"learning_rate": 2.4192748682987298e-05,
"loss": 0.6972,
"step": 8850
},
{
"epoch": 2.7579795475674,
"grad_norm": 8.100613594055176,
"learning_rate": 2.4141101125916744e-05,
"loss": 0.7358,
"step": 8900
},
{
"epoch": 2.773473814688565,
"grad_norm": 12.28987979888916,
"learning_rate": 2.4089453568846194e-05,
"loss": 0.7176,
"step": 8950
},
{
"epoch": 2.78896808180973,
"grad_norm": 9.355488777160645,
"learning_rate": 2.4037806011775644e-05,
"loss": 0.6856,
"step": 9000
},
{
"epoch": 2.8044623489308957,
"grad_norm": 11.875406265258789,
"learning_rate": 2.3986158454705093e-05,
"loss": 0.6501,
"step": 9050
},
{
"epoch": 2.819956616052061,
"grad_norm": 8.061235427856445,
"learning_rate": 2.3934510897634543e-05,
"loss": 0.6823,
"step": 9100
},
{
"epoch": 2.835450883173226,
"grad_norm": 7.949320316314697,
"learning_rate": 2.388286334056399e-05,
"loss": 0.6764,
"step": 9150
},
{
"epoch": 2.850945150294391,
"grad_norm": 5.9249587059021,
"learning_rate": 2.3831215783493442e-05,
"loss": 0.6511,
"step": 9200
},
{
"epoch": 2.866439417415556,
"grad_norm": 8.400185585021973,
"learning_rate": 2.3779568226422892e-05,
"loss": 0.6515,
"step": 9250
},
{
"epoch": 2.8819336845367216,
"grad_norm": 11.487894058227539,
"learning_rate": 2.3727920669352342e-05,
"loss": 0.6719,
"step": 9300
},
{
"epoch": 2.8974279516578867,
"grad_norm": 8.317901611328125,
"learning_rate": 2.367627311228179e-05,
"loss": 0.6697,
"step": 9350
},
{
"epoch": 2.9129222187790518,
"grad_norm": 9.878332138061523,
"learning_rate": 2.3624625555211238e-05,
"loss": 0.6801,
"step": 9400
},
{
"epoch": 2.928416485900217,
"grad_norm": 8.855628967285156,
"learning_rate": 2.3572977998140687e-05,
"loss": 0.6445,
"step": 9450
},
{
"epoch": 2.943910753021382,
"grad_norm": 5.350094318389893,
"learning_rate": 2.3521330441070137e-05,
"loss": 0.6891,
"step": 9500
},
{
"epoch": 2.9594050201425475,
"grad_norm": 8.540812492370605,
"learning_rate": 2.3469682883999587e-05,
"loss": 0.6556,
"step": 9550
},
{
"epoch": 2.9748992872637126,
"grad_norm": 4.337664604187012,
"learning_rate": 2.341803532692904e-05,
"loss": 0.7013,
"step": 9600
},
{
"epoch": 2.9903935543848776,
"grad_norm": 7.002617359161377,
"learning_rate": 2.3366387769858486e-05,
"loss": 0.6518,
"step": 9650
},
{
"epoch": 3.0,
"eval_accuracy": 0.777292576419214,
"eval_f1": 0.775972842719567,
"eval_loss": 0.69657963514328,
"eval_runtime": 25.4856,
"eval_samples_per_second": 260.579,
"eval_steps_per_second": 16.323,
"step": 9681
},
{
"epoch": 3.0058878215060427,
"grad_norm": 9.228548049926758,
"learning_rate": 2.3314740212787936e-05,
"loss": 0.5385,
"step": 9700
},
{
"epoch": 3.021382088627208,
"grad_norm": 4.332932472229004,
"learning_rate": 2.3263092655717385e-05,
"loss": 0.5513,
"step": 9750
},
{
"epoch": 3.036876355748373,
"grad_norm": 6.478864669799805,
"learning_rate": 2.3211445098646835e-05,
"loss": 0.4542,
"step": 9800
},
{
"epoch": 3.0523706228695384,
"grad_norm": 14.028499603271484,
"learning_rate": 2.3159797541576285e-05,
"loss": 0.4549,
"step": 9850
},
{
"epoch": 3.0678648899907035,
"grad_norm": 5.590787887573242,
"learning_rate": 2.310814998450573e-05,
"loss": 0.4624,
"step": 9900
},
{
"epoch": 3.0833591571118686,
"grad_norm": 5.623167514801025,
"learning_rate": 2.305650242743518e-05,
"loss": 0.4479,
"step": 9950
},
{
"epoch": 3.0988534242330337,
"grad_norm": 10.343826293945312,
"learning_rate": 2.3004854870364634e-05,
"loss": 0.5079,
"step": 10000
},
{
"epoch": 3.1143476913541988,
"grad_norm": 2.780686616897583,
"learning_rate": 2.2953207313294084e-05,
"loss": 0.43,
"step": 10050
},
{
"epoch": 3.1298419584753643,
"grad_norm": 10.917914390563965,
"learning_rate": 2.2901559756223533e-05,
"loss": 0.4932,
"step": 10100
},
{
"epoch": 3.1453362255965294,
"grad_norm": 14.870561599731445,
"learning_rate": 2.284991219915298e-05,
"loss": 0.483,
"step": 10150
},
{
"epoch": 3.1608304927176945,
"grad_norm": 15.64564323425293,
"learning_rate": 2.279826464208243e-05,
"loss": 0.5047,
"step": 10200
},
{
"epoch": 3.1763247598388595,
"grad_norm": 8.148391723632812,
"learning_rate": 2.274661708501188e-05,
"loss": 0.498,
"step": 10250
},
{
"epoch": 3.1918190269600246,
"grad_norm": 9.916448593139648,
"learning_rate": 2.269496952794133e-05,
"loss": 0.4896,
"step": 10300
},
{
"epoch": 3.20731329408119,
"grad_norm": 10.014134407043457,
"learning_rate": 2.2643321970870778e-05,
"loss": 0.4572,
"step": 10350
},
{
"epoch": 3.2228075612023552,
"grad_norm": 9.647527694702148,
"learning_rate": 2.2591674413800228e-05,
"loss": 0.4965,
"step": 10400
},
{
"epoch": 3.2383018283235203,
"grad_norm": 11.77087116241455,
"learning_rate": 2.2540026856729678e-05,
"loss": 0.512,
"step": 10450
},
{
"epoch": 3.2537960954446854,
"grad_norm": 3.3613386154174805,
"learning_rate": 2.2488379299659127e-05,
"loss": 0.5522,
"step": 10500
},
{
"epoch": 3.2692903625658505,
"grad_norm": 17.92693519592285,
"learning_rate": 2.2436731742588577e-05,
"loss": 0.4915,
"step": 10550
},
{
"epoch": 3.2847846296870156,
"grad_norm": 8.389365196228027,
"learning_rate": 2.2385084185518027e-05,
"loss": 0.5343,
"step": 10600
},
{
"epoch": 3.300278896808181,
"grad_norm": 9.849445343017578,
"learning_rate": 2.2333436628447473e-05,
"loss": 0.4925,
"step": 10650
},
{
"epoch": 3.315773163929346,
"grad_norm": 7.494227886199951,
"learning_rate": 2.2281789071376923e-05,
"loss": 0.5242,
"step": 10700
},
{
"epoch": 3.3312674310505113,
"grad_norm": 12.774617195129395,
"learning_rate": 2.2230141514306372e-05,
"loss": 0.522,
"step": 10750
},
{
"epoch": 3.3467616981716763,
"grad_norm": 4.167229175567627,
"learning_rate": 2.2178493957235822e-05,
"loss": 0.4852,
"step": 10800
},
{
"epoch": 3.3622559652928414,
"grad_norm": 7.823596000671387,
"learning_rate": 2.2126846400165275e-05,
"loss": 0.521,
"step": 10850
},
{
"epoch": 3.377750232414007,
"grad_norm": 9.712186813354492,
"learning_rate": 2.2075198843094725e-05,
"loss": 0.4931,
"step": 10900
},
{
"epoch": 3.393244499535172,
"grad_norm": 9.726935386657715,
"learning_rate": 2.202355128602417e-05,
"loss": 0.531,
"step": 10950
},
{
"epoch": 3.408738766656337,
"grad_norm": 8.613348007202148,
"learning_rate": 2.197190372895362e-05,
"loss": 0.4902,
"step": 11000
},
{
"epoch": 3.424233033777502,
"grad_norm": 17.698650360107422,
"learning_rate": 2.192025617188307e-05,
"loss": 0.4967,
"step": 11050
},
{
"epoch": 3.4397273008986673,
"grad_norm": 13.304680824279785,
"learning_rate": 2.186860861481252e-05,
"loss": 0.4998,
"step": 11100
},
{
"epoch": 3.455221568019833,
"grad_norm": 9.090615272521973,
"learning_rate": 2.181696105774197e-05,
"loss": 0.4797,
"step": 11150
},
{
"epoch": 3.470715835140998,
"grad_norm": 6.544071197509766,
"learning_rate": 2.176531350067142e-05,
"loss": 0.5405,
"step": 11200
},
{
"epoch": 3.486210102262163,
"grad_norm": 10.908158302307129,
"learning_rate": 2.171366594360087e-05,
"loss": 0.4663,
"step": 11250
},
{
"epoch": 3.501704369383328,
"grad_norm": 9.044700622558594,
"learning_rate": 2.166201838653032e-05,
"loss": 0.4755,
"step": 11300
},
{
"epoch": 3.517198636504493,
"grad_norm": 7.633232116699219,
"learning_rate": 2.161037082945977e-05,
"loss": 0.4182,
"step": 11350
},
{
"epoch": 3.5326929036256587,
"grad_norm": 5.32473087310791,
"learning_rate": 2.1558723272389218e-05,
"loss": 0.4987,
"step": 11400
},
{
"epoch": 3.5481871707468238,
"grad_norm": 9.8456392288208,
"learning_rate": 2.1507075715318664e-05,
"loss": 0.587,
"step": 11450
},
{
"epoch": 3.563681437867989,
"grad_norm": 12.52115535736084,
"learning_rate": 2.1455428158248114e-05,
"loss": 0.5331,
"step": 11500
},
{
"epoch": 3.579175704989154,
"grad_norm": 18.225566864013672,
"learning_rate": 2.1403780601177564e-05,
"loss": 0.4794,
"step": 11550
},
{
"epoch": 3.594669972110319,
"grad_norm": 8.749368667602539,
"learning_rate": 2.1352133044107013e-05,
"loss": 0.4967,
"step": 11600
},
{
"epoch": 3.6101642392314846,
"grad_norm": 8.760223388671875,
"learning_rate": 2.1300485487036466e-05,
"loss": 0.4963,
"step": 11650
},
{
"epoch": 3.6256585063526496,
"grad_norm": 15.518270492553711,
"learning_rate": 2.1248837929965913e-05,
"loss": 0.4456,
"step": 11700
},
{
"epoch": 3.6411527734738147,
"grad_norm": 9.451664924621582,
"learning_rate": 2.1197190372895362e-05,
"loss": 0.5235,
"step": 11750
},
{
"epoch": 3.65664704059498,
"grad_norm": 17.736055374145508,
"learning_rate": 2.1145542815824812e-05,
"loss": 0.4876,
"step": 11800
},
{
"epoch": 3.672141307716145,
"grad_norm": 24.323490142822266,
"learning_rate": 2.1093895258754262e-05,
"loss": 0.483,
"step": 11850
},
{
"epoch": 3.6876355748373104,
"grad_norm": 15.389254570007324,
"learning_rate": 2.104224770168371e-05,
"loss": 0.5127,
"step": 11900
},
{
"epoch": 3.7031298419584755,
"grad_norm": 11.283272743225098,
"learning_rate": 2.0990600144613158e-05,
"loss": 0.5282,
"step": 11950
},
{
"epoch": 3.7186241090796406,
"grad_norm": 11.002310752868652,
"learning_rate": 2.0938952587542607e-05,
"loss": 0.5459,
"step": 12000
},
{
"epoch": 3.7341183762008057,
"grad_norm": 6.972140312194824,
"learning_rate": 2.088730503047206e-05,
"loss": 0.536,
"step": 12050
},
{
"epoch": 3.7496126433219708,
"grad_norm": 4.202858924865723,
"learning_rate": 2.083565747340151e-05,
"loss": 0.5736,
"step": 12100
},
{
"epoch": 3.7651069104431363,
"grad_norm": 15.748515129089355,
"learning_rate": 2.078400991633096e-05,
"loss": 0.4715,
"step": 12150
},
{
"epoch": 3.7806011775643014,
"grad_norm": 6.696774482727051,
"learning_rate": 2.0732362359260406e-05,
"loss": 0.5545,
"step": 12200
},
{
"epoch": 3.7960954446854664,
"grad_norm": 7.366288661956787,
"learning_rate": 2.0680714802189856e-05,
"loss": 0.5736,
"step": 12250
},
{
"epoch": 3.8115897118066315,
"grad_norm": 13.58438777923584,
"learning_rate": 2.0629067245119306e-05,
"loss": 0.4255,
"step": 12300
},
{
"epoch": 3.8270839789277966,
"grad_norm": 9.109688758850098,
"learning_rate": 2.0577419688048755e-05,
"loss": 0.4565,
"step": 12350
},
{
"epoch": 3.842578246048962,
"grad_norm": 11.448044776916504,
"learning_rate": 2.0525772130978205e-05,
"loss": 0.5117,
"step": 12400
},
{
"epoch": 3.858072513170127,
"grad_norm": 6.876945495605469,
"learning_rate": 2.0474124573907655e-05,
"loss": 0.5543,
"step": 12450
},
{
"epoch": 3.8735667802912923,
"grad_norm": 11.25009536743164,
"learning_rate": 2.0422477016837104e-05,
"loss": 0.456,
"step": 12500
},
{
"epoch": 3.8890610474124574,
"grad_norm": 13.992502212524414,
"learning_rate": 2.0370829459766554e-05,
"loss": 0.4907,
"step": 12550
},
{
"epoch": 3.9045553145336225,
"grad_norm": 11.92656421661377,
"learning_rate": 2.0319181902696004e-05,
"loss": 0.4841,
"step": 12600
},
{
"epoch": 3.9200495816547876,
"grad_norm": 7.212582111358643,
"learning_rate": 2.0267534345625453e-05,
"loss": 0.5529,
"step": 12650
},
{
"epoch": 3.9355438487759526,
"grad_norm": 14.616645812988281,
"learning_rate": 2.02158867885549e-05,
"loss": 0.5366,
"step": 12700
},
{
"epoch": 3.951038115897118,
"grad_norm": 9.052292823791504,
"learning_rate": 2.016423923148435e-05,
"loss": 0.5459,
"step": 12750
},
{
"epoch": 3.9665323830182833,
"grad_norm": 18.27539825439453,
"learning_rate": 2.01125916744138e-05,
"loss": 0.5631,
"step": 12800
},
{
"epoch": 3.9820266501394483,
"grad_norm": 12.429372787475586,
"learning_rate": 2.0060944117343252e-05,
"loss": 0.4885,
"step": 12850
},
{
"epoch": 3.9975209172606134,
"grad_norm": 4.481673240661621,
"learning_rate": 2.00092965602727e-05,
"loss": 0.4565,
"step": 12900
},
{
"epoch": 4.0,
"eval_accuracy": 0.7863273603372986,
"eval_f1": 0.7874334496964743,
"eval_loss": 0.7491569519042969,
"eval_runtime": 25.5609,
"eval_samples_per_second": 259.811,
"eval_steps_per_second": 16.275,
"step": 12908
},
{
"epoch": 4.0130151843817785,
"grad_norm": 21.53974151611328,
"learning_rate": 1.9957649003202148e-05,
"loss": 0.2695,
"step": 12950
},
{
"epoch": 4.028509451502944,
"grad_norm": 9.07942008972168,
"learning_rate": 1.9906001446131598e-05,
"loss": 0.3282,
"step": 13000
},
{
"epoch": 4.044003718624109,
"grad_norm": 16.323549270629883,
"learning_rate": 1.9854353889061047e-05,
"loss": 0.3079,
"step": 13050
},
{
"epoch": 4.059497985745274,
"grad_norm": 6.679697036743164,
"learning_rate": 1.9802706331990497e-05,
"loss": 0.3889,
"step": 13100
},
{
"epoch": 4.07499225286644,
"grad_norm": 17.357574462890625,
"learning_rate": 1.9751058774919947e-05,
"loss": 0.3336,
"step": 13150
},
{
"epoch": 4.090486519987604,
"grad_norm": 5.116195201873779,
"learning_rate": 1.9699411217849393e-05,
"loss": 0.3102,
"step": 13200
},
{
"epoch": 4.10598078710877,
"grad_norm": 29.05538558959961,
"learning_rate": 1.9647763660778846e-05,
"loss": 0.2902,
"step": 13250
},
{
"epoch": 4.1214750542299345,
"grad_norm": 6.254473686218262,
"learning_rate": 1.9596116103708296e-05,
"loss": 0.3816,
"step": 13300
},
{
"epoch": 4.1369693213511,
"grad_norm": 11.854185104370117,
"learning_rate": 1.9544468546637745e-05,
"loss": 0.3455,
"step": 13350
},
{
"epoch": 4.152463588472266,
"grad_norm": 16.399444580078125,
"learning_rate": 1.9492820989567195e-05,
"loss": 0.3713,
"step": 13400
},
{
"epoch": 4.16795785559343,
"grad_norm": 18.26226234436035,
"learning_rate": 1.9441173432496645e-05,
"loss": 0.2957,
"step": 13450
},
{
"epoch": 4.183452122714596,
"grad_norm": 6.590181350708008,
"learning_rate": 1.938952587542609e-05,
"loss": 0.2905,
"step": 13500
},
{
"epoch": 4.19894638983576,
"grad_norm": 5.3814849853515625,
"learning_rate": 1.933787831835554e-05,
"loss": 0.3782,
"step": 13550
},
{
"epoch": 4.214440656956926,
"grad_norm": 8.641956329345703,
"learning_rate": 1.928623076128499e-05,
"loss": 0.3211,
"step": 13600
},
{
"epoch": 4.2299349240780915,
"grad_norm": 14.346405982971191,
"learning_rate": 1.9234583204214443e-05,
"loss": 0.3274,
"step": 13650
},
{
"epoch": 4.245429191199256,
"grad_norm": 15.577725410461426,
"learning_rate": 1.9182935647143893e-05,
"loss": 0.3568,
"step": 13700
},
{
"epoch": 4.260923458320422,
"grad_norm": 9.855398178100586,
"learning_rate": 1.913128809007334e-05,
"loss": 0.3008,
"step": 13750
},
{
"epoch": 4.276417725441586,
"grad_norm": 15.720294952392578,
"learning_rate": 1.907964053300279e-05,
"loss": 0.2979,
"step": 13800
},
{
"epoch": 4.291911992562752,
"grad_norm": 13.976778030395508,
"learning_rate": 1.902799297593224e-05,
"loss": 0.3389,
"step": 13850
},
{
"epoch": 4.307406259683917,
"grad_norm": 19.255727767944336,
"learning_rate": 1.897634541886169e-05,
"loss": 0.3636,
"step": 13900
},
{
"epoch": 4.322900526805082,
"grad_norm": 10.70836353302002,
"learning_rate": 1.8924697861791138e-05,
"loss": 0.3455,
"step": 13950
},
{
"epoch": 4.3383947939262475,
"grad_norm": 0.9212763905525208,
"learning_rate": 1.8873050304720584e-05,
"loss": 0.3703,
"step": 14000
},
{
"epoch": 4.353889061047412,
"grad_norm": 10.232623100280762,
"learning_rate": 1.8821402747650037e-05,
"loss": 0.3247,
"step": 14050
},
{
"epoch": 4.369383328168578,
"grad_norm": 11.130922317504883,
"learning_rate": 1.8769755190579487e-05,
"loss": 0.3107,
"step": 14100
},
{
"epoch": 4.384877595289743,
"grad_norm": 10.536752700805664,
"learning_rate": 1.8718107633508937e-05,
"loss": 0.3614,
"step": 14150
},
{
"epoch": 4.400371862410908,
"grad_norm": 15.330968856811523,
"learning_rate": 1.8666460076438386e-05,
"loss": 0.3984,
"step": 14200
},
{
"epoch": 4.415866129532073,
"grad_norm": 7.436588764190674,
"learning_rate": 1.8614812519367833e-05,
"loss": 0.3257,
"step": 14250
},
{
"epoch": 4.431360396653238,
"grad_norm": 7.192384243011475,
"learning_rate": 1.8563164962297282e-05,
"loss": 0.3254,
"step": 14300
},
{
"epoch": 4.4468546637744035,
"grad_norm": 7.792993545532227,
"learning_rate": 1.8511517405226732e-05,
"loss": 0.3392,
"step": 14350
},
{
"epoch": 4.462348930895569,
"grad_norm": 12.411416053771973,
"learning_rate": 1.8459869848156182e-05,
"loss": 0.3383,
"step": 14400
},
{
"epoch": 4.477843198016734,
"grad_norm": 17.897613525390625,
"learning_rate": 1.8408222291085635e-05,
"loss": 0.3392,
"step": 14450
},
{
"epoch": 4.493337465137899,
"grad_norm": 23.59228515625,
"learning_rate": 1.835657473401508e-05,
"loss": 0.3055,
"step": 14500
},
{
"epoch": 4.508831732259064,
"grad_norm": 13.722383499145508,
"learning_rate": 1.830492717694453e-05,
"loss": 0.3997,
"step": 14550
},
{
"epoch": 4.524325999380229,
"grad_norm": 17.811538696289062,
"learning_rate": 1.825327961987398e-05,
"loss": 0.266,
"step": 14600
},
{
"epoch": 4.539820266501394,
"grad_norm": 10.993431091308594,
"learning_rate": 1.820163206280343e-05,
"loss": 0.2634,
"step": 14650
},
{
"epoch": 4.55531453362256,
"grad_norm": 5.25628137588501,
"learning_rate": 1.814998450573288e-05,
"loss": 0.3563,
"step": 14700
},
{
"epoch": 4.570808800743725,
"grad_norm": 16.91241455078125,
"learning_rate": 1.8098336948662326e-05,
"loss": 0.3298,
"step": 14750
},
{
"epoch": 4.58630306786489,
"grad_norm": 27.083995819091797,
"learning_rate": 1.8046689391591776e-05,
"loss": 0.36,
"step": 14800
},
{
"epoch": 4.601797334986055,
"grad_norm": 19.726198196411133,
"learning_rate": 1.799504183452123e-05,
"loss": 0.3224,
"step": 14850
},
{
"epoch": 4.617291602107221,
"grad_norm": 6.92859411239624,
"learning_rate": 1.794339427745068e-05,
"loss": 0.335,
"step": 14900
},
{
"epoch": 4.632785869228385,
"grad_norm": 15.97644329071045,
"learning_rate": 1.7891746720380128e-05,
"loss": 0.3303,
"step": 14950
},
{
"epoch": 4.648280136349551,
"grad_norm": 24.399837493896484,
"learning_rate": 1.7840099163309575e-05,
"loss": 0.3492,
"step": 15000
},
{
"epoch": 4.663774403470716,
"grad_norm": 10.855368614196777,
"learning_rate": 1.7788451606239024e-05,
"loss": 0.3278,
"step": 15050
},
{
"epoch": 4.679268670591881,
"grad_norm": 20.869380950927734,
"learning_rate": 1.7736804049168474e-05,
"loss": 0.2913,
"step": 15100
},
{
"epoch": 4.694762937713046,
"grad_norm": 6.862913131713867,
"learning_rate": 1.7685156492097924e-05,
"loss": 0.3133,
"step": 15150
},
{
"epoch": 4.710257204834211,
"grad_norm": 19.621482849121094,
"learning_rate": 1.7633508935027373e-05,
"loss": 0.3456,
"step": 15200
},
{
"epoch": 4.725751471955377,
"grad_norm": 19.79738998413086,
"learning_rate": 1.7581861377956823e-05,
"loss": 0.3562,
"step": 15250
},
{
"epoch": 4.7412457390765415,
"grad_norm": 3.2352957725524902,
"learning_rate": 1.7530213820886273e-05,
"loss": 0.3565,
"step": 15300
},
{
"epoch": 4.756740006197707,
"grad_norm": 10.959282875061035,
"learning_rate": 1.7478566263815722e-05,
"loss": 0.3472,
"step": 15350
},
{
"epoch": 4.7722342733188725,
"grad_norm": 3.22469162940979,
"learning_rate": 1.7426918706745172e-05,
"loss": 0.3367,
"step": 15400
},
{
"epoch": 4.787728540440037,
"grad_norm": 7.619373798370361,
"learning_rate": 1.737527114967462e-05,
"loss": 0.319,
"step": 15450
},
{
"epoch": 4.803222807561203,
"grad_norm": 24.706689834594727,
"learning_rate": 1.7323623592604068e-05,
"loss": 0.3939,
"step": 15500
},
{
"epoch": 4.818717074682367,
"grad_norm": 15.918986320495605,
"learning_rate": 1.7271976035533518e-05,
"loss": 0.3618,
"step": 15550
},
{
"epoch": 4.834211341803533,
"grad_norm": 14.518546104431152,
"learning_rate": 1.7220328478462967e-05,
"loss": 0.4082,
"step": 15600
},
{
"epoch": 4.8497056089246975,
"grad_norm": 6.084866046905518,
"learning_rate": 1.716868092139242e-05,
"loss": 0.3594,
"step": 15650
},
{
"epoch": 4.865199876045863,
"grad_norm": 18.435983657836914,
"learning_rate": 1.711703336432187e-05,
"loss": 0.3182,
"step": 15700
},
{
"epoch": 4.8806941431670285,
"grad_norm": 14.745248794555664,
"learning_rate": 1.7065385807251316e-05,
"loss": 0.3375,
"step": 15750
},
{
"epoch": 4.896188410288193,
"grad_norm": 11.518832206726074,
"learning_rate": 1.7013738250180766e-05,
"loss": 0.3371,
"step": 15800
},
{
"epoch": 4.911682677409359,
"grad_norm": 17.58115005493164,
"learning_rate": 1.6962090693110216e-05,
"loss": 0.3851,
"step": 15850
},
{
"epoch": 4.927176944530523,
"grad_norm": 16.769134521484375,
"learning_rate": 1.6910443136039665e-05,
"loss": 0.3009,
"step": 15900
},
{
"epoch": 4.942671211651689,
"grad_norm": 21.518749237060547,
"learning_rate": 1.6858795578969115e-05,
"loss": 0.3155,
"step": 15950
},
{
"epoch": 4.958165478772854,
"grad_norm": 11.044340133666992,
"learning_rate": 1.6807148021898565e-05,
"loss": 0.3648,
"step": 16000
},
{
"epoch": 4.973659745894019,
"grad_norm": 3.9900588989257812,
"learning_rate": 1.6755500464828014e-05,
"loss": 0.3383,
"step": 16050
},
{
"epoch": 4.989154013015185,
"grad_norm": 16.869041442871094,
"learning_rate": 1.6703852907757464e-05,
"loss": 0.3471,
"step": 16100
},
{
"epoch": 5.0,
"eval_accuracy": 0.7964162023791598,
"eval_f1": 0.7960859271865419,
"eval_loss": 0.8064730167388916,
"eval_runtime": 25.5199,
"eval_samples_per_second": 260.228,
"eval_steps_per_second": 16.301,
"step": 16135
}
],
"logging_steps": 50,
"max_steps": 32270,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 2,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 3.39622791611904e+16,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}