| { |
| "best_global_step": 68838, |
| "best_metric": 0.8178100047087637, |
| "best_model_checkpoint": "./results/checkpoint-68838", |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 68838, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.021790290246666087, |
| "grad_norm": 54.483943939208984, |
| "learning_rate": 1.985502193555885e-05, |
| "loss": 1.0336, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.043580580493332174, |
| "grad_norm": 25.309688568115234, |
| "learning_rate": 1.9709753333914408e-05, |
| "loss": 0.8222, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.06537087073999825, |
| "grad_norm": 20.759492874145508, |
| "learning_rate": 1.956448473226997e-05, |
| "loss": 0.8082, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.08716116098666435, |
| "grad_norm": 48.492088317871094, |
| "learning_rate": 1.9419216130625527e-05, |
| "loss": 0.7739, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.10895145123333043, |
| "grad_norm": 14.695459365844727, |
| "learning_rate": 1.927394752898109e-05, |
| "loss": 0.7577, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.1307417414799965, |
| "grad_norm": 45.78233337402344, |
| "learning_rate": 1.912867892733665e-05, |
| "loss": 0.7493, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.1525320317266626, |
| "grad_norm": 65.063720703125, |
| "learning_rate": 1.8983410325692207e-05, |
| "loss": 0.7372, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.1743223219733287, |
| "grad_norm": 43.56230163574219, |
| "learning_rate": 1.8838141724047765e-05, |
| "loss": 0.6989, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.19611261221999476, |
| "grad_norm": 12.578514099121094, |
| "learning_rate": 1.8692873122403327e-05, |
| "loss": 0.6864, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.21790290246666086, |
| "grad_norm": 32.62320327758789, |
| "learning_rate": 1.8547604520758884e-05, |
| "loss": 0.6947, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.23969319271332695, |
| "grad_norm": 30.916015625, |
| "learning_rate": 1.8402335919114446e-05, |
| "loss": 0.6831, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.261483482959993, |
| "grad_norm": 28.278722763061523, |
| "learning_rate": 1.8257067317470003e-05, |
| "loss": 0.6766, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.2832737732066591, |
| "grad_norm": 31.46723747253418, |
| "learning_rate": 1.8111798715825565e-05, |
| "loss": 0.6939, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.3050640634533252, |
| "grad_norm": 36.962215423583984, |
| "learning_rate": 1.7966530114181122e-05, |
| "loss": 0.6611, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.3268543536999913, |
| "grad_norm": 33.17079162597656, |
| "learning_rate": 1.7821261512536684e-05, |
| "loss": 0.6683, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.3486446439466574, |
| "grad_norm": 39.9603271484375, |
| "learning_rate": 1.767599291089224e-05, |
| "loss": 0.6626, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.37043493419332346, |
| "grad_norm": 35.70758819580078, |
| "learning_rate": 1.75307243092478e-05, |
| "loss": 0.635, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.39222522443998953, |
| "grad_norm": 36.70634841918945, |
| "learning_rate": 1.738545570760336e-05, |
| "loss": 0.6663, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.41401551468665565, |
| "grad_norm": 14.346757888793945, |
| "learning_rate": 1.7240187105958918e-05, |
| "loss": 0.6436, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.4358058049333217, |
| "grad_norm": 20.595735549926758, |
| "learning_rate": 1.709491850431448e-05, |
| "loss": 0.6469, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.4575960951799878, |
| "grad_norm": 20.74449920654297, |
| "learning_rate": 1.6949649902670037e-05, |
| "loss": 0.6503, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.4793863854266539, |
| "grad_norm": 13.161200523376465, |
| "learning_rate": 1.68043813010256e-05, |
| "loss": 0.6214, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.5011766756733199, |
| "grad_norm": 37.41954040527344, |
| "learning_rate": 1.6659112699381156e-05, |
| "loss": 0.6495, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.522966965919986, |
| "grad_norm": 22.0927791595459, |
| "learning_rate": 1.6513844097736717e-05, |
| "loss": 0.62, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.5447572561666522, |
| "grad_norm": 37.23984909057617, |
| "learning_rate": 1.6368575496092275e-05, |
| "loss": 0.6087, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.5665475464133182, |
| "grad_norm": 12.211308479309082, |
| "learning_rate": 1.6223306894447833e-05, |
| "loss": 0.6237, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.5883378366599843, |
| "grad_norm": 28.417964935302734, |
| "learning_rate": 1.6078038292803394e-05, |
| "loss": 0.6411, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.6101281269066504, |
| "grad_norm": 25.028160095214844, |
| "learning_rate": 1.5932769691158952e-05, |
| "loss": 0.6029, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.6319184171533165, |
| "grad_norm": 10.462093353271484, |
| "learning_rate": 1.5787501089514513e-05, |
| "loss": 0.5853, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.6537087073999825, |
| "grad_norm": 15.21939468383789, |
| "learning_rate": 1.564223248787007e-05, |
| "loss": 0.5997, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.6754989976466487, |
| "grad_norm": 35.362239837646484, |
| "learning_rate": 1.5496963886225632e-05, |
| "loss": 0.6184, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.6972892878933148, |
| "grad_norm": 18.156673431396484, |
| "learning_rate": 1.535169528458119e-05, |
| "loss": 0.6039, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.7190795781399808, |
| "grad_norm": 24.839574813842773, |
| "learning_rate": 1.5206426682936753e-05, |
| "loss": 0.6252, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.7408698683866469, |
| "grad_norm": 17.548309326171875, |
| "learning_rate": 1.506115808129231e-05, |
| "loss": 0.5994, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.762660158633313, |
| "grad_norm": 37.97673416137695, |
| "learning_rate": 1.4915889479647869e-05, |
| "loss": 0.604, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.7844504488799791, |
| "grad_norm": 26.855255126953125, |
| "learning_rate": 1.477062087800343e-05, |
| "loss": 0.6024, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.8062407391266452, |
| "grad_norm": 29.445913314819336, |
| "learning_rate": 1.4625352276358988e-05, |
| "loss": 0.5841, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.8280310293733113, |
| "grad_norm": 27.386213302612305, |
| "learning_rate": 1.4480083674714549e-05, |
| "loss": 0.5867, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.8498213196199773, |
| "grad_norm": 39.60457229614258, |
| "learning_rate": 1.4334815073070107e-05, |
| "loss": 0.5929, |
| "step": 19500 |
| }, |
| { |
| "epoch": 0.8716116098666434, |
| "grad_norm": 23.005340576171875, |
| "learning_rate": 1.4189546471425668e-05, |
| "loss": 0.5857, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.8934019001133096, |
| "grad_norm": 32.861236572265625, |
| "learning_rate": 1.4044277869781226e-05, |
| "loss": 0.5842, |
| "step": 20500 |
| }, |
| { |
| "epoch": 0.9151921903599756, |
| "grad_norm": 25.834049224853516, |
| "learning_rate": 1.3899009268136785e-05, |
| "loss": 0.5772, |
| "step": 21000 |
| }, |
| { |
| "epoch": 0.9369824806066417, |
| "grad_norm": 18.09916114807129, |
| "learning_rate": 1.3753740666492346e-05, |
| "loss": 0.5745, |
| "step": 21500 |
| }, |
| { |
| "epoch": 0.9587727708533078, |
| "grad_norm": 38.919830322265625, |
| "learning_rate": 1.3608472064847904e-05, |
| "loss": 0.5673, |
| "step": 22000 |
| }, |
| { |
| "epoch": 0.9805630610999738, |
| "grad_norm": 44.96213150024414, |
| "learning_rate": 1.3463203463203465e-05, |
| "loss": 0.5712, |
| "step": 22500 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.8107525007082615, |
| "eval_f1": 0.8068179660475118, |
| "eval_loss": 0.5534330010414124, |
| "eval_runtime": 103.9311, |
| "eval_samples_per_second": 441.514, |
| "eval_steps_per_second": 27.595, |
| "step": 22946 |
| }, |
| { |
| "epoch": 1.0023533513466398, |
| "grad_norm": 10.949334144592285, |
| "learning_rate": 1.3317934861559023e-05, |
| "loss": 0.5614, |
| "step": 23000 |
| }, |
| { |
| "epoch": 1.024143641593306, |
| "grad_norm": 9.04629135131836, |
| "learning_rate": 1.3172666259914584e-05, |
| "loss": 0.4356, |
| "step": 23500 |
| }, |
| { |
| "epoch": 1.045933931839972, |
| "grad_norm": 20.525428771972656, |
| "learning_rate": 1.3027397658270142e-05, |
| "loss": 0.4322, |
| "step": 24000 |
| }, |
| { |
| "epoch": 1.067724222086638, |
| "grad_norm": 13.471968650817871, |
| "learning_rate": 1.2882129056625703e-05, |
| "loss": 0.4009, |
| "step": 24500 |
| }, |
| { |
| "epoch": 1.0895145123333043, |
| "grad_norm": 22.500341415405273, |
| "learning_rate": 1.2736860454981261e-05, |
| "loss": 0.4234, |
| "step": 25000 |
| }, |
| { |
| "epoch": 1.1113048025799703, |
| "grad_norm": 27.607267379760742, |
| "learning_rate": 1.2591591853336819e-05, |
| "loss": 0.4179, |
| "step": 25500 |
| }, |
| { |
| "epoch": 1.1330950928266366, |
| "grad_norm": 43.847557067871094, |
| "learning_rate": 1.244632325169238e-05, |
| "loss": 0.4245, |
| "step": 26000 |
| }, |
| { |
| "epoch": 1.1548853830733026, |
| "grad_norm": 15.113216400146484, |
| "learning_rate": 1.2301054650047938e-05, |
| "loss": 0.4072, |
| "step": 26500 |
| }, |
| { |
| "epoch": 1.1766756733199686, |
| "grad_norm": 23.319063186645508, |
| "learning_rate": 1.21557860484035e-05, |
| "loss": 0.4099, |
| "step": 27000 |
| }, |
| { |
| "epoch": 1.1984659635666346, |
| "grad_norm": 19.947755813598633, |
| "learning_rate": 1.2010517446759057e-05, |
| "loss": 0.3983, |
| "step": 27500 |
| }, |
| { |
| "epoch": 1.2202562538133008, |
| "grad_norm": 39.462379455566406, |
| "learning_rate": 1.1865248845114618e-05, |
| "loss": 0.4347, |
| "step": 28000 |
| }, |
| { |
| "epoch": 1.2420465440599668, |
| "grad_norm": 9.28600788116455, |
| "learning_rate": 1.1719980243470178e-05, |
| "loss": 0.4239, |
| "step": 28500 |
| }, |
| { |
| "epoch": 1.263836834306633, |
| "grad_norm": 26.335203170776367, |
| "learning_rate": 1.1574711641825737e-05, |
| "loss": 0.406, |
| "step": 29000 |
| }, |
| { |
| "epoch": 1.285627124553299, |
| "grad_norm": 16.803382873535156, |
| "learning_rate": 1.1429443040181297e-05, |
| "loss": 0.4323, |
| "step": 29500 |
| }, |
| { |
| "epoch": 1.307417414799965, |
| "grad_norm": 19.966100692749023, |
| "learning_rate": 1.1284174438536855e-05, |
| "loss": 0.4071, |
| "step": 30000 |
| }, |
| { |
| "epoch": 1.329207705046631, |
| "grad_norm": 11.231242179870605, |
| "learning_rate": 1.1138905836892416e-05, |
| "loss": 0.4185, |
| "step": 30500 |
| }, |
| { |
| "epoch": 1.3509979952932973, |
| "grad_norm": 5.798637866973877, |
| "learning_rate": 1.0993637235247974e-05, |
| "loss": 0.4094, |
| "step": 31000 |
| }, |
| { |
| "epoch": 1.3727882855399633, |
| "grad_norm": 45.0400390625, |
| "learning_rate": 1.0848368633603535e-05, |
| "loss": 0.4182, |
| "step": 31500 |
| }, |
| { |
| "epoch": 1.3945785757866296, |
| "grad_norm": 13.42186450958252, |
| "learning_rate": 1.0703100031959093e-05, |
| "loss": 0.4138, |
| "step": 32000 |
| }, |
| { |
| "epoch": 1.4163688660332956, |
| "grad_norm": 4.720645427703857, |
| "learning_rate": 1.0557831430314654e-05, |
| "loss": 0.4189, |
| "step": 32500 |
| }, |
| { |
| "epoch": 1.4381591562799616, |
| "grad_norm": 15.046038627624512, |
| "learning_rate": 1.0412562828670212e-05, |
| "loss": 0.4248, |
| "step": 33000 |
| }, |
| { |
| "epoch": 1.4599494465266276, |
| "grad_norm": 26.371877670288086, |
| "learning_rate": 1.0267294227025773e-05, |
| "loss": 0.4203, |
| "step": 33500 |
| }, |
| { |
| "epoch": 1.4817397367732938, |
| "grad_norm": 31.495922088623047, |
| "learning_rate": 1.012202562538133e-05, |
| "loss": 0.4346, |
| "step": 34000 |
| }, |
| { |
| "epoch": 1.5035300270199599, |
| "grad_norm": 91.96415710449219, |
| "learning_rate": 9.97675702373689e-06, |
| "loss": 0.4121, |
| "step": 34500 |
| }, |
| { |
| "epoch": 1.525320317266626, |
| "grad_norm": 15.824524879455566, |
| "learning_rate": 9.83148842209245e-06, |
| "loss": 0.4175, |
| "step": 35000 |
| }, |
| { |
| "epoch": 1.547110607513292, |
| "grad_norm": 32.88704299926758, |
| "learning_rate": 9.68621982044801e-06, |
| "loss": 0.4134, |
| "step": 35500 |
| }, |
| { |
| "epoch": 1.5689008977599581, |
| "grad_norm": 13.783001899719238, |
| "learning_rate": 9.540951218803569e-06, |
| "loss": 0.4131, |
| "step": 36000 |
| }, |
| { |
| "epoch": 1.5906911880066241, |
| "grad_norm": 26.346506118774414, |
| "learning_rate": 9.395682617159128e-06, |
| "loss": 0.4208, |
| "step": 36500 |
| }, |
| { |
| "epoch": 1.6124814782532904, |
| "grad_norm": 6.159854888916016, |
| "learning_rate": 9.250414015514688e-06, |
| "loss": 0.4132, |
| "step": 37000 |
| }, |
| { |
| "epoch": 1.6342717684999564, |
| "grad_norm": 53.30848693847656, |
| "learning_rate": 9.105145413870247e-06, |
| "loss": 0.4167, |
| "step": 37500 |
| }, |
| { |
| "epoch": 1.6560620587466226, |
| "grad_norm": 17.382976531982422, |
| "learning_rate": 8.959876812225807e-06, |
| "loss": 0.4213, |
| "step": 38000 |
| }, |
| { |
| "epoch": 1.6778523489932886, |
| "grad_norm": 30.866016387939453, |
| "learning_rate": 8.814608210581366e-06, |
| "loss": 0.4295, |
| "step": 38500 |
| }, |
| { |
| "epoch": 1.6996426392399546, |
| "grad_norm": 12.70687198638916, |
| "learning_rate": 8.669339608936926e-06, |
| "loss": 0.4101, |
| "step": 39000 |
| }, |
| { |
| "epoch": 1.7214329294866206, |
| "grad_norm": 17.50055694580078, |
| "learning_rate": 8.524071007292485e-06, |
| "loss": 0.4142, |
| "step": 39500 |
| }, |
| { |
| "epoch": 1.7432232197332869, |
| "grad_norm": 37.7745361328125, |
| "learning_rate": 8.378802405648045e-06, |
| "loss": 0.4008, |
| "step": 40000 |
| }, |
| { |
| "epoch": 1.765013509979953, |
| "grad_norm": 31.711212158203125, |
| "learning_rate": 8.233533804003603e-06, |
| "loss": 0.4162, |
| "step": 40500 |
| }, |
| { |
| "epoch": 1.786803800226619, |
| "grad_norm": 36.37419509887695, |
| "learning_rate": 8.088265202359162e-06, |
| "loss": 0.4004, |
| "step": 41000 |
| }, |
| { |
| "epoch": 1.8085940904732851, |
| "grad_norm": 1.8152663707733154, |
| "learning_rate": 7.942996600714722e-06, |
| "loss": 0.3669, |
| "step": 41500 |
| }, |
| { |
| "epoch": 1.8303843807199511, |
| "grad_norm": 32.72704315185547, |
| "learning_rate": 7.797727999070281e-06, |
| "loss": 0.4091, |
| "step": 42000 |
| }, |
| { |
| "epoch": 1.8521746709666171, |
| "grad_norm": 25.534046173095703, |
| "learning_rate": 7.65245939742584e-06, |
| "loss": 0.4194, |
| "step": 42500 |
| }, |
| { |
| "epoch": 1.8739649612132834, |
| "grad_norm": 64.45800018310547, |
| "learning_rate": 7.5071907957814e-06, |
| "loss": 0.406, |
| "step": 43000 |
| }, |
| { |
| "epoch": 1.8957552514599496, |
| "grad_norm": 72.69434356689453, |
| "learning_rate": 7.36192219413696e-06, |
| "loss": 0.4078, |
| "step": 43500 |
| }, |
| { |
| "epoch": 1.9175455417066156, |
| "grad_norm": 34.615509033203125, |
| "learning_rate": 7.216653592492519e-06, |
| "loss": 0.4035, |
| "step": 44000 |
| }, |
| { |
| "epoch": 1.9393358319532816, |
| "grad_norm": 31.702251434326172, |
| "learning_rate": 7.071384990848079e-06, |
| "loss": 0.4107, |
| "step": 44500 |
| }, |
| { |
| "epoch": 1.9611261221999476, |
| "grad_norm": 39.912506103515625, |
| "learning_rate": 6.926116389203637e-06, |
| "loss": 0.3938, |
| "step": 45000 |
| }, |
| { |
| "epoch": 1.9829164124466137, |
| "grad_norm": 9.400127410888672, |
| "learning_rate": 6.780847787559197e-06, |
| "loss": 0.3948, |
| "step": 45500 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.818096628674788, |
| "eval_f1": 0.8153729521200931, |
| "eval_loss": 0.5731034278869629, |
| "eval_runtime": 103.9443, |
| "eval_samples_per_second": 441.458, |
| "eval_steps_per_second": 27.592, |
| "step": 45892 |
| }, |
| { |
| "epoch": 2.0047067026932797, |
| "grad_norm": 5.7501606941223145, |
| "learning_rate": 6.6355791859147564e-06, |
| "loss": 0.3754, |
| "step": 46000 |
| }, |
| { |
| "epoch": 2.026496992939946, |
| "grad_norm": 15.377955436706543, |
| "learning_rate": 6.490310584270316e-06, |
| "loss": 0.2614, |
| "step": 46500 |
| }, |
| { |
| "epoch": 2.048287283186612, |
| "grad_norm": 26.223241806030273, |
| "learning_rate": 6.3450419826258755e-06, |
| "loss": 0.266, |
| "step": 47000 |
| }, |
| { |
| "epoch": 2.070077573433278, |
| "grad_norm": 14.321675300598145, |
| "learning_rate": 6.199773380981435e-06, |
| "loss": 0.2725, |
| "step": 47500 |
| }, |
| { |
| "epoch": 2.091867863679944, |
| "grad_norm": 63.887062072753906, |
| "learning_rate": 6.0545047793369945e-06, |
| "loss": 0.2565, |
| "step": 48000 |
| }, |
| { |
| "epoch": 2.11365815392661, |
| "grad_norm": 8.824002265930176, |
| "learning_rate": 5.909236177692555e-06, |
| "loss": 0.2645, |
| "step": 48500 |
| }, |
| { |
| "epoch": 2.135448444173276, |
| "grad_norm": 58.92592239379883, |
| "learning_rate": 5.763967576048114e-06, |
| "loss": 0.27, |
| "step": 49000 |
| }, |
| { |
| "epoch": 2.1572387344199426, |
| "grad_norm": 15.57720947265625, |
| "learning_rate": 5.618698974403672e-06, |
| "loss": 0.2563, |
| "step": 49500 |
| }, |
| { |
| "epoch": 2.1790290246666086, |
| "grad_norm": 0.5324369072914124, |
| "learning_rate": 5.473430372759232e-06, |
| "loss": 0.2523, |
| "step": 50000 |
| }, |
| { |
| "epoch": 2.2008193149132746, |
| "grad_norm": 38.53084945678711, |
| "learning_rate": 5.328161771114791e-06, |
| "loss": 0.2404, |
| "step": 50500 |
| }, |
| { |
| "epoch": 2.2226096051599407, |
| "grad_norm": 5.76258659362793, |
| "learning_rate": 5.182893169470351e-06, |
| "loss": 0.2515, |
| "step": 51000 |
| }, |
| { |
| "epoch": 2.2443998954066067, |
| "grad_norm": 33.10593032836914, |
| "learning_rate": 5.03762456782591e-06, |
| "loss": 0.2526, |
| "step": 51500 |
| }, |
| { |
| "epoch": 2.266190185653273, |
| "grad_norm": 23.81480598449707, |
| "learning_rate": 4.8923559661814705e-06, |
| "loss": 0.2774, |
| "step": 52000 |
| }, |
| { |
| "epoch": 2.287980475899939, |
| "grad_norm": 15.034322738647461, |
| "learning_rate": 4.747087364537029e-06, |
| "loss": 0.2488, |
| "step": 52500 |
| }, |
| { |
| "epoch": 2.309770766146605, |
| "grad_norm": 32.9444694519043, |
| "learning_rate": 4.601818762892589e-06, |
| "loss": 0.241, |
| "step": 53000 |
| }, |
| { |
| "epoch": 2.331561056393271, |
| "grad_norm": 17.02962303161621, |
| "learning_rate": 4.456550161248148e-06, |
| "loss": 0.263, |
| "step": 53500 |
| }, |
| { |
| "epoch": 2.353351346639937, |
| "grad_norm": 31.476587295532227, |
| "learning_rate": 4.311281559603708e-06, |
| "loss": 0.2567, |
| "step": 54000 |
| }, |
| { |
| "epoch": 2.375141636886603, |
| "grad_norm": 34.839778900146484, |
| "learning_rate": 4.166012957959267e-06, |
| "loss": 0.2468, |
| "step": 54500 |
| }, |
| { |
| "epoch": 2.396931927133269, |
| "grad_norm": 0.312466561794281, |
| "learning_rate": 4.020744356314826e-06, |
| "loss": 0.2591, |
| "step": 55000 |
| }, |
| { |
| "epoch": 2.4187222173799356, |
| "grad_norm": 10.837553024291992, |
| "learning_rate": 3.875475754670385e-06, |
| "loss": 0.2552, |
| "step": 55500 |
| }, |
| { |
| "epoch": 2.4405125076266017, |
| "grad_norm": 2.8537871837615967, |
| "learning_rate": 3.7302071530259454e-06, |
| "loss": 0.2675, |
| "step": 56000 |
| }, |
| { |
| "epoch": 2.4623027978732677, |
| "grad_norm": 17.37003517150879, |
| "learning_rate": 3.584938551381505e-06, |
| "loss": 0.255, |
| "step": 56500 |
| }, |
| { |
| "epoch": 2.4840930881199337, |
| "grad_norm": 14.81467056274414, |
| "learning_rate": 3.439669949737064e-06, |
| "loss": 0.2538, |
| "step": 57000 |
| }, |
| { |
| "epoch": 2.5058833783665997, |
| "grad_norm": 22.952491760253906, |
| "learning_rate": 3.2944013480926235e-06, |
| "loss": 0.259, |
| "step": 57500 |
| }, |
| { |
| "epoch": 2.527673668613266, |
| "grad_norm": 51.379600524902344, |
| "learning_rate": 3.149132746448183e-06, |
| "loss": 0.2433, |
| "step": 58000 |
| }, |
| { |
| "epoch": 2.549463958859932, |
| "grad_norm": 50.78517150878906, |
| "learning_rate": 3.0038641448037425e-06, |
| "loss": 0.245, |
| "step": 58500 |
| }, |
| { |
| "epoch": 2.571254249106598, |
| "grad_norm": 70.40054321289062, |
| "learning_rate": 2.8585955431593016e-06, |
| "loss": 0.2578, |
| "step": 59000 |
| }, |
| { |
| "epoch": 2.593044539353264, |
| "grad_norm": 13.180349349975586, |
| "learning_rate": 2.713326941514861e-06, |
| "loss": 0.2569, |
| "step": 59500 |
| }, |
| { |
| "epoch": 2.61483482959993, |
| "grad_norm": 29.106149673461914, |
| "learning_rate": 2.5680583398704206e-06, |
| "loss": 0.2664, |
| "step": 60000 |
| }, |
| { |
| "epoch": 2.636625119846596, |
| "grad_norm": 48.964847564697266, |
| "learning_rate": 2.42278973822598e-06, |
| "loss": 0.2542, |
| "step": 60500 |
| }, |
| { |
| "epoch": 2.658415410093262, |
| "grad_norm": 18.914636611938477, |
| "learning_rate": 2.2775211365815396e-06, |
| "loss": 0.2515, |
| "step": 61000 |
| }, |
| { |
| "epoch": 2.6802057003399287, |
| "grad_norm": 44.629173278808594, |
| "learning_rate": 2.132252534937099e-06, |
| "loss": 0.2576, |
| "step": 61500 |
| }, |
| { |
| "epoch": 2.7019959905865947, |
| "grad_norm": 13.741023063659668, |
| "learning_rate": 1.986983933292658e-06, |
| "loss": 0.2532, |
| "step": 62000 |
| }, |
| { |
| "epoch": 2.7237862808332607, |
| "grad_norm": 30.310876846313477, |
| "learning_rate": 1.841715331648218e-06, |
| "loss": 0.2597, |
| "step": 62500 |
| }, |
| { |
| "epoch": 2.7455765710799267, |
| "grad_norm": 21.90140724182129, |
| "learning_rate": 1.696446730003777e-06, |
| "loss": 0.2435, |
| "step": 63000 |
| }, |
| { |
| "epoch": 2.7673668613265927, |
| "grad_norm": 27.499929428100586, |
| "learning_rate": 1.5511781283593367e-06, |
| "loss": 0.2577, |
| "step": 63500 |
| }, |
| { |
| "epoch": 2.789157151573259, |
| "grad_norm": 33.468074798583984, |
| "learning_rate": 1.4059095267148958e-06, |
| "loss": 0.2532, |
| "step": 64000 |
| }, |
| { |
| "epoch": 2.810947441819925, |
| "grad_norm": 14.590469360351562, |
| "learning_rate": 1.2606409250704555e-06, |
| "loss": 0.2548, |
| "step": 64500 |
| }, |
| { |
| "epoch": 2.832737732066591, |
| "grad_norm": 36.13336181640625, |
| "learning_rate": 1.1153723234260148e-06, |
| "loss": 0.2408, |
| "step": 65000 |
| }, |
| { |
| "epoch": 2.854528022313257, |
| "grad_norm": 20.17421531677246, |
| "learning_rate": 9.701037217815743e-07, |
| "loss": 0.2467, |
| "step": 65500 |
| }, |
| { |
| "epoch": 2.876318312559923, |
| "grad_norm": 129.63551330566406, |
| "learning_rate": 8.248351201371336e-07, |
| "loss": 0.2555, |
| "step": 66000 |
| }, |
| { |
| "epoch": 2.898108602806589, |
| "grad_norm": 2.310012102127075, |
| "learning_rate": 6.79566518492693e-07, |
| "loss": 0.2478, |
| "step": 66500 |
| }, |
| { |
| "epoch": 2.9198988930532552, |
| "grad_norm": 60.997982025146484, |
| "learning_rate": 5.342979168482524e-07, |
| "loss": 0.2468, |
| "step": 67000 |
| }, |
| { |
| "epoch": 2.9416891832999217, |
| "grad_norm": 2.189000129699707, |
| "learning_rate": 3.890293152038119e-07, |
| "loss": 0.2387, |
| "step": 67500 |
| }, |
| { |
| "epoch": 2.9634794735465877, |
| "grad_norm": 79.08841705322266, |
| "learning_rate": 2.437607135593713e-07, |
| "loss": 0.236, |
| "step": 68000 |
| }, |
| { |
| "epoch": 2.9852697637932537, |
| "grad_norm": 4.530211925506592, |
| "learning_rate": 9.849211191493072e-08, |
| "loss": 0.2454, |
| "step": 68500 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.819404188550134, |
| "eval_f1": 0.8178100047087637, |
| "eval_loss": 0.7280585169792175, |
| "eval_runtime": 106.044, |
| "eval_samples_per_second": 432.717, |
| "eval_steps_per_second": 27.045, |
| "step": 68838 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 68838, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7.244994585504614e+16, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|