arabic_9_dialect_cls / trainer_state.json
dehanalkautsar's picture
Upload folder using huggingface_hub
5168a6b verified
{
"best_global_step": 68838,
"best_metric": 0.8178100047087637,
"best_model_checkpoint": "./results/checkpoint-68838",
"epoch": 3.0,
"eval_steps": 500,
"global_step": 68838,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.021790290246666087,
"grad_norm": 54.483943939208984,
"learning_rate": 1.985502193555885e-05,
"loss": 1.0336,
"step": 500
},
{
"epoch": 0.043580580493332174,
"grad_norm": 25.309688568115234,
"learning_rate": 1.9709753333914408e-05,
"loss": 0.8222,
"step": 1000
},
{
"epoch": 0.06537087073999825,
"grad_norm": 20.759492874145508,
"learning_rate": 1.956448473226997e-05,
"loss": 0.8082,
"step": 1500
},
{
"epoch": 0.08716116098666435,
"grad_norm": 48.492088317871094,
"learning_rate": 1.9419216130625527e-05,
"loss": 0.7739,
"step": 2000
},
{
"epoch": 0.10895145123333043,
"grad_norm": 14.695459365844727,
"learning_rate": 1.927394752898109e-05,
"loss": 0.7577,
"step": 2500
},
{
"epoch": 0.1307417414799965,
"grad_norm": 45.78233337402344,
"learning_rate": 1.912867892733665e-05,
"loss": 0.7493,
"step": 3000
},
{
"epoch": 0.1525320317266626,
"grad_norm": 65.063720703125,
"learning_rate": 1.8983410325692207e-05,
"loss": 0.7372,
"step": 3500
},
{
"epoch": 0.1743223219733287,
"grad_norm": 43.56230163574219,
"learning_rate": 1.8838141724047765e-05,
"loss": 0.6989,
"step": 4000
},
{
"epoch": 0.19611261221999476,
"grad_norm": 12.578514099121094,
"learning_rate": 1.8692873122403327e-05,
"loss": 0.6864,
"step": 4500
},
{
"epoch": 0.21790290246666086,
"grad_norm": 32.62320327758789,
"learning_rate": 1.8547604520758884e-05,
"loss": 0.6947,
"step": 5000
},
{
"epoch": 0.23969319271332695,
"grad_norm": 30.916015625,
"learning_rate": 1.8402335919114446e-05,
"loss": 0.6831,
"step": 5500
},
{
"epoch": 0.261483482959993,
"grad_norm": 28.278722763061523,
"learning_rate": 1.8257067317470003e-05,
"loss": 0.6766,
"step": 6000
},
{
"epoch": 0.2832737732066591,
"grad_norm": 31.46723747253418,
"learning_rate": 1.8111798715825565e-05,
"loss": 0.6939,
"step": 6500
},
{
"epoch": 0.3050640634533252,
"grad_norm": 36.962215423583984,
"learning_rate": 1.7966530114181122e-05,
"loss": 0.6611,
"step": 7000
},
{
"epoch": 0.3268543536999913,
"grad_norm": 33.17079162597656,
"learning_rate": 1.7821261512536684e-05,
"loss": 0.6683,
"step": 7500
},
{
"epoch": 0.3486446439466574,
"grad_norm": 39.9603271484375,
"learning_rate": 1.767599291089224e-05,
"loss": 0.6626,
"step": 8000
},
{
"epoch": 0.37043493419332346,
"grad_norm": 35.70758819580078,
"learning_rate": 1.75307243092478e-05,
"loss": 0.635,
"step": 8500
},
{
"epoch": 0.39222522443998953,
"grad_norm": 36.70634841918945,
"learning_rate": 1.738545570760336e-05,
"loss": 0.6663,
"step": 9000
},
{
"epoch": 0.41401551468665565,
"grad_norm": 14.346757888793945,
"learning_rate": 1.7240187105958918e-05,
"loss": 0.6436,
"step": 9500
},
{
"epoch": 0.4358058049333217,
"grad_norm": 20.595735549926758,
"learning_rate": 1.709491850431448e-05,
"loss": 0.6469,
"step": 10000
},
{
"epoch": 0.4575960951799878,
"grad_norm": 20.74449920654297,
"learning_rate": 1.6949649902670037e-05,
"loss": 0.6503,
"step": 10500
},
{
"epoch": 0.4793863854266539,
"grad_norm": 13.161200523376465,
"learning_rate": 1.68043813010256e-05,
"loss": 0.6214,
"step": 11000
},
{
"epoch": 0.5011766756733199,
"grad_norm": 37.41954040527344,
"learning_rate": 1.6659112699381156e-05,
"loss": 0.6495,
"step": 11500
},
{
"epoch": 0.522966965919986,
"grad_norm": 22.0927791595459,
"learning_rate": 1.6513844097736717e-05,
"loss": 0.62,
"step": 12000
},
{
"epoch": 0.5447572561666522,
"grad_norm": 37.23984909057617,
"learning_rate": 1.6368575496092275e-05,
"loss": 0.6087,
"step": 12500
},
{
"epoch": 0.5665475464133182,
"grad_norm": 12.211308479309082,
"learning_rate": 1.6223306894447833e-05,
"loss": 0.6237,
"step": 13000
},
{
"epoch": 0.5883378366599843,
"grad_norm": 28.417964935302734,
"learning_rate": 1.6078038292803394e-05,
"loss": 0.6411,
"step": 13500
},
{
"epoch": 0.6101281269066504,
"grad_norm": 25.028160095214844,
"learning_rate": 1.5932769691158952e-05,
"loss": 0.6029,
"step": 14000
},
{
"epoch": 0.6319184171533165,
"grad_norm": 10.462093353271484,
"learning_rate": 1.5787501089514513e-05,
"loss": 0.5853,
"step": 14500
},
{
"epoch": 0.6537087073999825,
"grad_norm": 15.21939468383789,
"learning_rate": 1.564223248787007e-05,
"loss": 0.5997,
"step": 15000
},
{
"epoch": 0.6754989976466487,
"grad_norm": 35.362239837646484,
"learning_rate": 1.5496963886225632e-05,
"loss": 0.6184,
"step": 15500
},
{
"epoch": 0.6972892878933148,
"grad_norm": 18.156673431396484,
"learning_rate": 1.535169528458119e-05,
"loss": 0.6039,
"step": 16000
},
{
"epoch": 0.7190795781399808,
"grad_norm": 24.839574813842773,
"learning_rate": 1.5206426682936753e-05,
"loss": 0.6252,
"step": 16500
},
{
"epoch": 0.7408698683866469,
"grad_norm": 17.548309326171875,
"learning_rate": 1.506115808129231e-05,
"loss": 0.5994,
"step": 17000
},
{
"epoch": 0.762660158633313,
"grad_norm": 37.97673416137695,
"learning_rate": 1.4915889479647869e-05,
"loss": 0.604,
"step": 17500
},
{
"epoch": 0.7844504488799791,
"grad_norm": 26.855255126953125,
"learning_rate": 1.477062087800343e-05,
"loss": 0.6024,
"step": 18000
},
{
"epoch": 0.8062407391266452,
"grad_norm": 29.445913314819336,
"learning_rate": 1.4625352276358988e-05,
"loss": 0.5841,
"step": 18500
},
{
"epoch": 0.8280310293733113,
"grad_norm": 27.386213302612305,
"learning_rate": 1.4480083674714549e-05,
"loss": 0.5867,
"step": 19000
},
{
"epoch": 0.8498213196199773,
"grad_norm": 39.60457229614258,
"learning_rate": 1.4334815073070107e-05,
"loss": 0.5929,
"step": 19500
},
{
"epoch": 0.8716116098666434,
"grad_norm": 23.005340576171875,
"learning_rate": 1.4189546471425668e-05,
"loss": 0.5857,
"step": 20000
},
{
"epoch": 0.8934019001133096,
"grad_norm": 32.861236572265625,
"learning_rate": 1.4044277869781226e-05,
"loss": 0.5842,
"step": 20500
},
{
"epoch": 0.9151921903599756,
"grad_norm": 25.834049224853516,
"learning_rate": 1.3899009268136785e-05,
"loss": 0.5772,
"step": 21000
},
{
"epoch": 0.9369824806066417,
"grad_norm": 18.09916114807129,
"learning_rate": 1.3753740666492346e-05,
"loss": 0.5745,
"step": 21500
},
{
"epoch": 0.9587727708533078,
"grad_norm": 38.919830322265625,
"learning_rate": 1.3608472064847904e-05,
"loss": 0.5673,
"step": 22000
},
{
"epoch": 0.9805630610999738,
"grad_norm": 44.96213150024414,
"learning_rate": 1.3463203463203465e-05,
"loss": 0.5712,
"step": 22500
},
{
"epoch": 1.0,
"eval_accuracy": 0.8107525007082615,
"eval_f1": 0.8068179660475118,
"eval_loss": 0.5534330010414124,
"eval_runtime": 103.9311,
"eval_samples_per_second": 441.514,
"eval_steps_per_second": 27.595,
"step": 22946
},
{
"epoch": 1.0023533513466398,
"grad_norm": 10.949334144592285,
"learning_rate": 1.3317934861559023e-05,
"loss": 0.5614,
"step": 23000
},
{
"epoch": 1.024143641593306,
"grad_norm": 9.04629135131836,
"learning_rate": 1.3172666259914584e-05,
"loss": 0.4356,
"step": 23500
},
{
"epoch": 1.045933931839972,
"grad_norm": 20.525428771972656,
"learning_rate": 1.3027397658270142e-05,
"loss": 0.4322,
"step": 24000
},
{
"epoch": 1.067724222086638,
"grad_norm": 13.471968650817871,
"learning_rate": 1.2882129056625703e-05,
"loss": 0.4009,
"step": 24500
},
{
"epoch": 1.0895145123333043,
"grad_norm": 22.500341415405273,
"learning_rate": 1.2736860454981261e-05,
"loss": 0.4234,
"step": 25000
},
{
"epoch": 1.1113048025799703,
"grad_norm": 27.607267379760742,
"learning_rate": 1.2591591853336819e-05,
"loss": 0.4179,
"step": 25500
},
{
"epoch": 1.1330950928266366,
"grad_norm": 43.847557067871094,
"learning_rate": 1.244632325169238e-05,
"loss": 0.4245,
"step": 26000
},
{
"epoch": 1.1548853830733026,
"grad_norm": 15.113216400146484,
"learning_rate": 1.2301054650047938e-05,
"loss": 0.4072,
"step": 26500
},
{
"epoch": 1.1766756733199686,
"grad_norm": 23.319063186645508,
"learning_rate": 1.21557860484035e-05,
"loss": 0.4099,
"step": 27000
},
{
"epoch": 1.1984659635666346,
"grad_norm": 19.947755813598633,
"learning_rate": 1.2010517446759057e-05,
"loss": 0.3983,
"step": 27500
},
{
"epoch": 1.2202562538133008,
"grad_norm": 39.462379455566406,
"learning_rate": 1.1865248845114618e-05,
"loss": 0.4347,
"step": 28000
},
{
"epoch": 1.2420465440599668,
"grad_norm": 9.28600788116455,
"learning_rate": 1.1719980243470178e-05,
"loss": 0.4239,
"step": 28500
},
{
"epoch": 1.263836834306633,
"grad_norm": 26.335203170776367,
"learning_rate": 1.1574711641825737e-05,
"loss": 0.406,
"step": 29000
},
{
"epoch": 1.285627124553299,
"grad_norm": 16.803382873535156,
"learning_rate": 1.1429443040181297e-05,
"loss": 0.4323,
"step": 29500
},
{
"epoch": 1.307417414799965,
"grad_norm": 19.966100692749023,
"learning_rate": 1.1284174438536855e-05,
"loss": 0.4071,
"step": 30000
},
{
"epoch": 1.329207705046631,
"grad_norm": 11.231242179870605,
"learning_rate": 1.1138905836892416e-05,
"loss": 0.4185,
"step": 30500
},
{
"epoch": 1.3509979952932973,
"grad_norm": 5.798637866973877,
"learning_rate": 1.0993637235247974e-05,
"loss": 0.4094,
"step": 31000
},
{
"epoch": 1.3727882855399633,
"grad_norm": 45.0400390625,
"learning_rate": 1.0848368633603535e-05,
"loss": 0.4182,
"step": 31500
},
{
"epoch": 1.3945785757866296,
"grad_norm": 13.42186450958252,
"learning_rate": 1.0703100031959093e-05,
"loss": 0.4138,
"step": 32000
},
{
"epoch": 1.4163688660332956,
"grad_norm": 4.720645427703857,
"learning_rate": 1.0557831430314654e-05,
"loss": 0.4189,
"step": 32500
},
{
"epoch": 1.4381591562799616,
"grad_norm": 15.046038627624512,
"learning_rate": 1.0412562828670212e-05,
"loss": 0.4248,
"step": 33000
},
{
"epoch": 1.4599494465266276,
"grad_norm": 26.371877670288086,
"learning_rate": 1.0267294227025773e-05,
"loss": 0.4203,
"step": 33500
},
{
"epoch": 1.4817397367732938,
"grad_norm": 31.495922088623047,
"learning_rate": 1.012202562538133e-05,
"loss": 0.4346,
"step": 34000
},
{
"epoch": 1.5035300270199599,
"grad_norm": 91.96415710449219,
"learning_rate": 9.97675702373689e-06,
"loss": 0.4121,
"step": 34500
},
{
"epoch": 1.525320317266626,
"grad_norm": 15.824524879455566,
"learning_rate": 9.83148842209245e-06,
"loss": 0.4175,
"step": 35000
},
{
"epoch": 1.547110607513292,
"grad_norm": 32.88704299926758,
"learning_rate": 9.68621982044801e-06,
"loss": 0.4134,
"step": 35500
},
{
"epoch": 1.5689008977599581,
"grad_norm": 13.783001899719238,
"learning_rate": 9.540951218803569e-06,
"loss": 0.4131,
"step": 36000
},
{
"epoch": 1.5906911880066241,
"grad_norm": 26.346506118774414,
"learning_rate": 9.395682617159128e-06,
"loss": 0.4208,
"step": 36500
},
{
"epoch": 1.6124814782532904,
"grad_norm": 6.159854888916016,
"learning_rate": 9.250414015514688e-06,
"loss": 0.4132,
"step": 37000
},
{
"epoch": 1.6342717684999564,
"grad_norm": 53.30848693847656,
"learning_rate": 9.105145413870247e-06,
"loss": 0.4167,
"step": 37500
},
{
"epoch": 1.6560620587466226,
"grad_norm": 17.382976531982422,
"learning_rate": 8.959876812225807e-06,
"loss": 0.4213,
"step": 38000
},
{
"epoch": 1.6778523489932886,
"grad_norm": 30.866016387939453,
"learning_rate": 8.814608210581366e-06,
"loss": 0.4295,
"step": 38500
},
{
"epoch": 1.6996426392399546,
"grad_norm": 12.70687198638916,
"learning_rate": 8.669339608936926e-06,
"loss": 0.4101,
"step": 39000
},
{
"epoch": 1.7214329294866206,
"grad_norm": 17.50055694580078,
"learning_rate": 8.524071007292485e-06,
"loss": 0.4142,
"step": 39500
},
{
"epoch": 1.7432232197332869,
"grad_norm": 37.7745361328125,
"learning_rate": 8.378802405648045e-06,
"loss": 0.4008,
"step": 40000
},
{
"epoch": 1.765013509979953,
"grad_norm": 31.711212158203125,
"learning_rate": 8.233533804003603e-06,
"loss": 0.4162,
"step": 40500
},
{
"epoch": 1.786803800226619,
"grad_norm": 36.37419509887695,
"learning_rate": 8.088265202359162e-06,
"loss": 0.4004,
"step": 41000
},
{
"epoch": 1.8085940904732851,
"grad_norm": 1.8152663707733154,
"learning_rate": 7.942996600714722e-06,
"loss": 0.3669,
"step": 41500
},
{
"epoch": 1.8303843807199511,
"grad_norm": 32.72704315185547,
"learning_rate": 7.797727999070281e-06,
"loss": 0.4091,
"step": 42000
},
{
"epoch": 1.8521746709666171,
"grad_norm": 25.534046173095703,
"learning_rate": 7.65245939742584e-06,
"loss": 0.4194,
"step": 42500
},
{
"epoch": 1.8739649612132834,
"grad_norm": 64.45800018310547,
"learning_rate": 7.5071907957814e-06,
"loss": 0.406,
"step": 43000
},
{
"epoch": 1.8957552514599496,
"grad_norm": 72.69434356689453,
"learning_rate": 7.36192219413696e-06,
"loss": 0.4078,
"step": 43500
},
{
"epoch": 1.9175455417066156,
"grad_norm": 34.615509033203125,
"learning_rate": 7.216653592492519e-06,
"loss": 0.4035,
"step": 44000
},
{
"epoch": 1.9393358319532816,
"grad_norm": 31.702251434326172,
"learning_rate": 7.071384990848079e-06,
"loss": 0.4107,
"step": 44500
},
{
"epoch": 1.9611261221999476,
"grad_norm": 39.912506103515625,
"learning_rate": 6.926116389203637e-06,
"loss": 0.3938,
"step": 45000
},
{
"epoch": 1.9829164124466137,
"grad_norm": 9.400127410888672,
"learning_rate": 6.780847787559197e-06,
"loss": 0.3948,
"step": 45500
},
{
"epoch": 2.0,
"eval_accuracy": 0.818096628674788,
"eval_f1": 0.8153729521200931,
"eval_loss": 0.5731034278869629,
"eval_runtime": 103.9443,
"eval_samples_per_second": 441.458,
"eval_steps_per_second": 27.592,
"step": 45892
},
{
"epoch": 2.0047067026932797,
"grad_norm": 5.7501606941223145,
"learning_rate": 6.6355791859147564e-06,
"loss": 0.3754,
"step": 46000
},
{
"epoch": 2.026496992939946,
"grad_norm": 15.377955436706543,
"learning_rate": 6.490310584270316e-06,
"loss": 0.2614,
"step": 46500
},
{
"epoch": 2.048287283186612,
"grad_norm": 26.223241806030273,
"learning_rate": 6.3450419826258755e-06,
"loss": 0.266,
"step": 47000
},
{
"epoch": 2.070077573433278,
"grad_norm": 14.321675300598145,
"learning_rate": 6.199773380981435e-06,
"loss": 0.2725,
"step": 47500
},
{
"epoch": 2.091867863679944,
"grad_norm": 63.887062072753906,
"learning_rate": 6.0545047793369945e-06,
"loss": 0.2565,
"step": 48000
},
{
"epoch": 2.11365815392661,
"grad_norm": 8.824002265930176,
"learning_rate": 5.909236177692555e-06,
"loss": 0.2645,
"step": 48500
},
{
"epoch": 2.135448444173276,
"grad_norm": 58.92592239379883,
"learning_rate": 5.763967576048114e-06,
"loss": 0.27,
"step": 49000
},
{
"epoch": 2.1572387344199426,
"grad_norm": 15.57720947265625,
"learning_rate": 5.618698974403672e-06,
"loss": 0.2563,
"step": 49500
},
{
"epoch": 2.1790290246666086,
"grad_norm": 0.5324369072914124,
"learning_rate": 5.473430372759232e-06,
"loss": 0.2523,
"step": 50000
},
{
"epoch": 2.2008193149132746,
"grad_norm": 38.53084945678711,
"learning_rate": 5.328161771114791e-06,
"loss": 0.2404,
"step": 50500
},
{
"epoch": 2.2226096051599407,
"grad_norm": 5.76258659362793,
"learning_rate": 5.182893169470351e-06,
"loss": 0.2515,
"step": 51000
},
{
"epoch": 2.2443998954066067,
"grad_norm": 33.10593032836914,
"learning_rate": 5.03762456782591e-06,
"loss": 0.2526,
"step": 51500
},
{
"epoch": 2.266190185653273,
"grad_norm": 23.81480598449707,
"learning_rate": 4.8923559661814705e-06,
"loss": 0.2774,
"step": 52000
},
{
"epoch": 2.287980475899939,
"grad_norm": 15.034322738647461,
"learning_rate": 4.747087364537029e-06,
"loss": 0.2488,
"step": 52500
},
{
"epoch": 2.309770766146605,
"grad_norm": 32.9444694519043,
"learning_rate": 4.601818762892589e-06,
"loss": 0.241,
"step": 53000
},
{
"epoch": 2.331561056393271,
"grad_norm": 17.02962303161621,
"learning_rate": 4.456550161248148e-06,
"loss": 0.263,
"step": 53500
},
{
"epoch": 2.353351346639937,
"grad_norm": 31.476587295532227,
"learning_rate": 4.311281559603708e-06,
"loss": 0.2567,
"step": 54000
},
{
"epoch": 2.375141636886603,
"grad_norm": 34.839778900146484,
"learning_rate": 4.166012957959267e-06,
"loss": 0.2468,
"step": 54500
},
{
"epoch": 2.396931927133269,
"grad_norm": 0.312466561794281,
"learning_rate": 4.020744356314826e-06,
"loss": 0.2591,
"step": 55000
},
{
"epoch": 2.4187222173799356,
"grad_norm": 10.837553024291992,
"learning_rate": 3.875475754670385e-06,
"loss": 0.2552,
"step": 55500
},
{
"epoch": 2.4405125076266017,
"grad_norm": 2.8537871837615967,
"learning_rate": 3.7302071530259454e-06,
"loss": 0.2675,
"step": 56000
},
{
"epoch": 2.4623027978732677,
"grad_norm": 17.37003517150879,
"learning_rate": 3.584938551381505e-06,
"loss": 0.255,
"step": 56500
},
{
"epoch": 2.4840930881199337,
"grad_norm": 14.81467056274414,
"learning_rate": 3.439669949737064e-06,
"loss": 0.2538,
"step": 57000
},
{
"epoch": 2.5058833783665997,
"grad_norm": 22.952491760253906,
"learning_rate": 3.2944013480926235e-06,
"loss": 0.259,
"step": 57500
},
{
"epoch": 2.527673668613266,
"grad_norm": 51.379600524902344,
"learning_rate": 3.149132746448183e-06,
"loss": 0.2433,
"step": 58000
},
{
"epoch": 2.549463958859932,
"grad_norm": 50.78517150878906,
"learning_rate": 3.0038641448037425e-06,
"loss": 0.245,
"step": 58500
},
{
"epoch": 2.571254249106598,
"grad_norm": 70.40054321289062,
"learning_rate": 2.8585955431593016e-06,
"loss": 0.2578,
"step": 59000
},
{
"epoch": 2.593044539353264,
"grad_norm": 13.180349349975586,
"learning_rate": 2.713326941514861e-06,
"loss": 0.2569,
"step": 59500
},
{
"epoch": 2.61483482959993,
"grad_norm": 29.106149673461914,
"learning_rate": 2.5680583398704206e-06,
"loss": 0.2664,
"step": 60000
},
{
"epoch": 2.636625119846596,
"grad_norm": 48.964847564697266,
"learning_rate": 2.42278973822598e-06,
"loss": 0.2542,
"step": 60500
},
{
"epoch": 2.658415410093262,
"grad_norm": 18.914636611938477,
"learning_rate": 2.2775211365815396e-06,
"loss": 0.2515,
"step": 61000
},
{
"epoch": 2.6802057003399287,
"grad_norm": 44.629173278808594,
"learning_rate": 2.132252534937099e-06,
"loss": 0.2576,
"step": 61500
},
{
"epoch": 2.7019959905865947,
"grad_norm": 13.741023063659668,
"learning_rate": 1.986983933292658e-06,
"loss": 0.2532,
"step": 62000
},
{
"epoch": 2.7237862808332607,
"grad_norm": 30.310876846313477,
"learning_rate": 1.841715331648218e-06,
"loss": 0.2597,
"step": 62500
},
{
"epoch": 2.7455765710799267,
"grad_norm": 21.90140724182129,
"learning_rate": 1.696446730003777e-06,
"loss": 0.2435,
"step": 63000
},
{
"epoch": 2.7673668613265927,
"grad_norm": 27.499929428100586,
"learning_rate": 1.5511781283593367e-06,
"loss": 0.2577,
"step": 63500
},
{
"epoch": 2.789157151573259,
"grad_norm": 33.468074798583984,
"learning_rate": 1.4059095267148958e-06,
"loss": 0.2532,
"step": 64000
},
{
"epoch": 2.810947441819925,
"grad_norm": 14.590469360351562,
"learning_rate": 1.2606409250704555e-06,
"loss": 0.2548,
"step": 64500
},
{
"epoch": 2.832737732066591,
"grad_norm": 36.13336181640625,
"learning_rate": 1.1153723234260148e-06,
"loss": 0.2408,
"step": 65000
},
{
"epoch": 2.854528022313257,
"grad_norm": 20.17421531677246,
"learning_rate": 9.701037217815743e-07,
"loss": 0.2467,
"step": 65500
},
{
"epoch": 2.876318312559923,
"grad_norm": 129.63551330566406,
"learning_rate": 8.248351201371336e-07,
"loss": 0.2555,
"step": 66000
},
{
"epoch": 2.898108602806589,
"grad_norm": 2.310012102127075,
"learning_rate": 6.79566518492693e-07,
"loss": 0.2478,
"step": 66500
},
{
"epoch": 2.9198988930532552,
"grad_norm": 60.997982025146484,
"learning_rate": 5.342979168482524e-07,
"loss": 0.2468,
"step": 67000
},
{
"epoch": 2.9416891832999217,
"grad_norm": 2.189000129699707,
"learning_rate": 3.890293152038119e-07,
"loss": 0.2387,
"step": 67500
},
{
"epoch": 2.9634794735465877,
"grad_norm": 79.08841705322266,
"learning_rate": 2.437607135593713e-07,
"loss": 0.236,
"step": 68000
},
{
"epoch": 2.9852697637932537,
"grad_norm": 4.530211925506592,
"learning_rate": 9.849211191493072e-08,
"loss": 0.2454,
"step": 68500
},
{
"epoch": 3.0,
"eval_accuracy": 0.819404188550134,
"eval_f1": 0.8178100047087637,
"eval_loss": 0.7280585169792175,
"eval_runtime": 106.044,
"eval_samples_per_second": 432.717,
"eval_steps_per_second": 27.045,
"step": 68838
}
],
"logging_steps": 500,
"max_steps": 68838,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 7.244994585504614e+16,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}