| { |
| "best_global_step": 1000, |
| "best_metric": 0.9877215336499479, |
| "best_model_checkpoint": "./results/wallet_deberta_v10_v3/checkpoint-1000", |
| "epoch": 0.11669292257424588, |
| "eval_steps": 500, |
| "global_step": 1000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0005834646128712294, |
| "grad_norm": 0.726335883140564, |
| "learning_rate": 4.6674445740956825e-07, |
| "loss": 0.6858, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0011669292257424587, |
| "grad_norm": 2.723215103149414, |
| "learning_rate": 1.0501750291715287e-06, |
| "loss": 0.6653, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.001750393838613688, |
| "grad_norm": 1.717950463294983, |
| "learning_rate": 1.6336056009334889e-06, |
| "loss": 0.6771, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.0023338584514849174, |
| "grad_norm": 1.431176781654358, |
| "learning_rate": 2.2170361726954492e-06, |
| "loss": 0.6689, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0029173230643561466, |
| "grad_norm": 0.4767102003097534, |
| "learning_rate": 2.80046674445741e-06, |
| "loss": 0.6794, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.003500787677227376, |
| "grad_norm": 1.1932811737060547, |
| "learning_rate": 3.38389731621937e-06, |
| "loss": 0.6816, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.004084252290098606, |
| "grad_norm": 1.114223599433899, |
| "learning_rate": 3.967327887981331e-06, |
| "loss": 0.6608, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.004667716902969835, |
| "grad_norm": 1.4470163583755493, |
| "learning_rate": 4.550758459743291e-06, |
| "loss": 0.6733, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.005251181515841064, |
| "grad_norm": 1.2226989269256592, |
| "learning_rate": 5.1341890315052505e-06, |
| "loss": 0.674, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.005834646128712293, |
| "grad_norm": 1.3562431335449219, |
| "learning_rate": 5.717619603267212e-06, |
| "loss": 0.6816, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.006418110741583523, |
| "grad_norm": 1.2481769323349, |
| "learning_rate": 6.301050175029172e-06, |
| "loss": 0.684, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.007001575354454752, |
| "grad_norm": 0.7279570698738098, |
| "learning_rate": 6.8844807467911315e-06, |
| "loss": 0.6787, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.0075850399673259815, |
| "grad_norm": 1.5943596363067627, |
| "learning_rate": 7.467911318553092e-06, |
| "loss": 0.6717, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.008168504580197211, |
| "grad_norm": 1.481000542640686, |
| "learning_rate": 8.051341890315053e-06, |
| "loss": 0.6841, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.00875196919306844, |
| "grad_norm": 2.2860443592071533, |
| "learning_rate": 8.634772462077014e-06, |
| "loss": 0.6511, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.00933543380593967, |
| "grad_norm": 1.3881680965423584, |
| "learning_rate": 9.218203033838974e-06, |
| "loss": 0.6767, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.0099188984188109, |
| "grad_norm": 0.5634646415710449, |
| "learning_rate": 9.801633605600934e-06, |
| "loss": 0.691, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.010502363031682128, |
| "grad_norm": 0.5292670130729675, |
| "learning_rate": 1.0385064177362893e-05, |
| "loss": 0.6639, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.011085827644553358, |
| "grad_norm": 1.0421162843704224, |
| "learning_rate": 1.0968494749124855e-05, |
| "loss": 0.665, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.011669292257424586, |
| "grad_norm": 1.158360242843628, |
| "learning_rate": 1.1551925320886815e-05, |
| "loss": 0.6846, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.012252756870295816, |
| "grad_norm": 2.138388156890869, |
| "learning_rate": 1.2135355892648776e-05, |
| "loss": 0.6644, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.012836221483167046, |
| "grad_norm": 0.621021568775177, |
| "learning_rate": 1.2718786464410737e-05, |
| "loss": 0.6752, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.013419686096038275, |
| "grad_norm": 0.47644492983818054, |
| "learning_rate": 1.3302217036172695e-05, |
| "loss": 0.6563, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.014003150708909505, |
| "grad_norm": 1.9346930980682373, |
| "learning_rate": 1.3885647607934657e-05, |
| "loss": 0.6601, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.014586615321780735, |
| "grad_norm": 0.47468164563179016, |
| "learning_rate": 1.4469078179696615e-05, |
| "loss": 0.686, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.015170079934651963, |
| "grad_norm": 1.0222961902618408, |
| "learning_rate": 1.5052508751458576e-05, |
| "loss": 0.6577, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.01575354454752319, |
| "grad_norm": 0.9757065773010254, |
| "learning_rate": 1.563593932322054e-05, |
| "loss": 0.6452, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.016337009160394423, |
| "grad_norm": 1.822059988975525, |
| "learning_rate": 1.6219369894982498e-05, |
| "loss": 0.6474, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.01692047377326565, |
| "grad_norm": 0.9806272387504578, |
| "learning_rate": 1.680280046674446e-05, |
| "loss": 0.6721, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.01750393838613688, |
| "grad_norm": 0.6027816534042358, |
| "learning_rate": 1.7386231038506417e-05, |
| "loss": 0.6624, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.01808740299900811, |
| "grad_norm": 0.7162922620773315, |
| "learning_rate": 1.796966161026838e-05, |
| "loss": 0.6555, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.01867086761187934, |
| "grad_norm": 0.7251244187355042, |
| "learning_rate": 1.855309218203034e-05, |
| "loss": 0.6427, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.019254332224750568, |
| "grad_norm": 1.059059500694275, |
| "learning_rate": 1.9136522753792298e-05, |
| "loss": 0.6396, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.0198377968376218, |
| "grad_norm": 0.8789103031158447, |
| "learning_rate": 1.971995332555426e-05, |
| "loss": 0.6084, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.020421261450493028, |
| "grad_norm": 0.9837176203727722, |
| "learning_rate": 2.030338389731622e-05, |
| "loss": 0.5825, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.021004726063364256, |
| "grad_norm": 1.4559673070907593, |
| "learning_rate": 2.0886814469078182e-05, |
| "loss": 0.5509, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.021588190676235488, |
| "grad_norm": 1.5943819284439087, |
| "learning_rate": 2.1470245040840144e-05, |
| "loss": 0.5045, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.022171655289106716, |
| "grad_norm": 2.3301029205322266, |
| "learning_rate": 2.2053675612602102e-05, |
| "loss": 0.4692, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.022755119901977944, |
| "grad_norm": 2.0437774658203125, |
| "learning_rate": 2.2637106184364063e-05, |
| "loss": 0.4009, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.023338584514849173, |
| "grad_norm": 2.043666362762451, |
| "learning_rate": 2.322053675612602e-05, |
| "loss": 0.4063, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.023922049127720404, |
| "grad_norm": 2.15029239654541, |
| "learning_rate": 2.3803967327887983e-05, |
| "loss": 0.3365, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.024505513740591633, |
| "grad_norm": 1.8519002199172974, |
| "learning_rate": 2.4387397899649944e-05, |
| "loss": 0.331, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.02508897835346286, |
| "grad_norm": 2.2622108459472656, |
| "learning_rate": 2.4970828471411902e-05, |
| "loss": 0.3014, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.025672442966334093, |
| "grad_norm": 5.565720558166504, |
| "learning_rate": 2.555425904317386e-05, |
| "loss": 0.2924, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.02625590757920532, |
| "grad_norm": 1.4099078178405762, |
| "learning_rate": 2.6137689614935822e-05, |
| "loss": 0.2953, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.02683937219207655, |
| "grad_norm": 3.1705520153045654, |
| "learning_rate": 2.6721120186697783e-05, |
| "loss": 0.3318, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.02742283680494778, |
| "grad_norm": 15.037497520446777, |
| "learning_rate": 2.7304550758459745e-05, |
| "loss": 0.286, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.02800630141781901, |
| "grad_norm": 3.520662546157837, |
| "learning_rate": 2.7887981330221706e-05, |
| "loss": 0.2891, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.028589766030690238, |
| "grad_norm": 4.476963996887207, |
| "learning_rate": 2.8471411901983664e-05, |
| "loss": 0.313, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.02917323064356147, |
| "grad_norm": 1.4843379259109497, |
| "learning_rate": 2.9054842473745626e-05, |
| "loss": 0.2193, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.029756695256432698, |
| "grad_norm": 5.573827266693115, |
| "learning_rate": 2.9638273045507587e-05, |
| "loss": 0.2222, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.030340159869303926, |
| "grad_norm": 3.0720365047454834, |
| "learning_rate": 3.022170361726955e-05, |
| "loss": 0.2242, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.030923624482175158, |
| "grad_norm": 1.8503894805908203, |
| "learning_rate": 3.080513418903151e-05, |
| "loss": 0.2179, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.03150708909504638, |
| "grad_norm": 5.656248092651367, |
| "learning_rate": 3.138856476079347e-05, |
| "loss": 0.2042, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.032090553707917614, |
| "grad_norm": 1.437711477279663, |
| "learning_rate": 3.1971995332555426e-05, |
| "loss": 0.1741, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.032674018320788846, |
| "grad_norm": 3.664780378341675, |
| "learning_rate": 3.255542590431739e-05, |
| "loss": 0.2236, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.03325748293366007, |
| "grad_norm": 3.872270107269287, |
| "learning_rate": 3.313885647607935e-05, |
| "loss": 0.1979, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.0338409475465313, |
| "grad_norm": 1.5311604738235474, |
| "learning_rate": 3.3722287047841314e-05, |
| "loss": 0.2233, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.034424412159402534, |
| "grad_norm": 1.2189322710037231, |
| "learning_rate": 3.4305717619603265e-05, |
| "loss": 0.2044, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.03500787677227376, |
| "grad_norm": 4.509505271911621, |
| "learning_rate": 3.488914819136523e-05, |
| "loss": 0.1768, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.03559134138514499, |
| "grad_norm": 4.28087854385376, |
| "learning_rate": 3.547257876312719e-05, |
| "loss": 0.2506, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.03617480599801622, |
| "grad_norm": 3.6596012115478516, |
| "learning_rate": 3.605600933488915e-05, |
| "loss": 0.1746, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.03675827061088745, |
| "grad_norm": 1.5460246801376343, |
| "learning_rate": 3.663943990665111e-05, |
| "loss": 0.1649, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.03734173522375868, |
| "grad_norm": 2.917727470397949, |
| "learning_rate": 3.722287047841307e-05, |
| "loss": 0.1534, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.03792519983662991, |
| "grad_norm": 2.845886707305908, |
| "learning_rate": 3.7806301050175034e-05, |
| "loss": 0.1418, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.038508664449501136, |
| "grad_norm": 4.808736801147461, |
| "learning_rate": 3.838973162193699e-05, |
| "loss": 0.201, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.03909212906237237, |
| "grad_norm": 6.715372085571289, |
| "learning_rate": 3.8973162193698957e-05, |
| "loss": 0.1414, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.0396755936752436, |
| "grad_norm": 5.7459588050842285, |
| "learning_rate": 3.9556592765460915e-05, |
| "loss": 0.1221, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.040259058288114824, |
| "grad_norm": 5.275432586669922, |
| "learning_rate": 4.014002333722287e-05, |
| "loss": 0.1293, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.040842522900986056, |
| "grad_norm": 3.305316209793091, |
| "learning_rate": 4.072345390898483e-05, |
| "loss": 0.1219, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.04142598751385729, |
| "grad_norm": 3.423701047897339, |
| "learning_rate": 4.1306884480746796e-05, |
| "loss": 0.1136, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.04200945212672851, |
| "grad_norm": 2.383979082107544, |
| "learning_rate": 4.1890315052508754e-05, |
| "loss": 0.1453, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.042592916739599744, |
| "grad_norm": 5.5527238845825195, |
| "learning_rate": 4.247374562427072e-05, |
| "loss": 0.1435, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.043176381352470976, |
| "grad_norm": 6.520336151123047, |
| "learning_rate": 4.305717619603267e-05, |
| "loss": 0.1492, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.0437598459653422, |
| "grad_norm": 5.854917049407959, |
| "learning_rate": 4.3640606767794635e-05, |
| "loss": 0.0702, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.04434331057821343, |
| "grad_norm": 7.508828163146973, |
| "learning_rate": 4.422403733955659e-05, |
| "loss": 0.094, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.044926775191084664, |
| "grad_norm": 3.256366014480591, |
| "learning_rate": 4.480746791131856e-05, |
| "loss": 0.113, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.04551023980395589, |
| "grad_norm": 1.85856294631958, |
| "learning_rate": 4.5390898483080515e-05, |
| "loss": 0.0679, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.04609370441682712, |
| "grad_norm": 2.1898112297058105, |
| "learning_rate": 4.5974329054842474e-05, |
| "loss": 0.089, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.046677169029698345, |
| "grad_norm": 17.4210262298584, |
| "learning_rate": 4.655775962660444e-05, |
| "loss": 0.242, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.04726063364256958, |
| "grad_norm": 8.506953239440918, |
| "learning_rate": 4.7141190198366396e-05, |
| "loss": 0.0623, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.04784409825544081, |
| "grad_norm": 4.988569259643555, |
| "learning_rate": 4.772462077012836e-05, |
| "loss": 0.1198, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.048427562868312034, |
| "grad_norm": 9.25379467010498, |
| "learning_rate": 4.830805134189031e-05, |
| "loss": 0.0531, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.049011027481183266, |
| "grad_norm": 7.709923267364502, |
| "learning_rate": 4.889148191365228e-05, |
| "loss": 0.1395, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.0495944920940545, |
| "grad_norm": 12.6926908493042, |
| "learning_rate": 4.9474912485414235e-05, |
| "loss": 0.092, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.05017795670692572, |
| "grad_norm": 0.7283574938774109, |
| "learning_rate": 5.0058343057176193e-05, |
| "loss": 0.2233, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.050761421319796954, |
| "grad_norm": 11.080142974853516, |
| "learning_rate": 5.064177362893816e-05, |
| "loss": 0.1739, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.051344885932668186, |
| "grad_norm": 8.297661781311035, |
| "learning_rate": 5.1225204200700116e-05, |
| "loss": 0.1595, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.05192835054553941, |
| "grad_norm": 21.875085830688477, |
| "learning_rate": 5.180863477246208e-05, |
| "loss": 0.1347, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.05251181515841064, |
| "grad_norm": 7.422657489776611, |
| "learning_rate": 5.239206534422404e-05, |
| "loss": 0.134, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.053095279771281874, |
| "grad_norm": 4.497678756713867, |
| "learning_rate": 5.2975495915986e-05, |
| "loss": 0.102, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.0536787443841531, |
| "grad_norm": 0.941102147102356, |
| "learning_rate": 5.355892648774796e-05, |
| "loss": 0.0748, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.05426220899702433, |
| "grad_norm": 8.182140350341797, |
| "learning_rate": 5.414235705950992e-05, |
| "loss": 0.1656, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.05484567360989556, |
| "grad_norm": 2.252357006072998, |
| "learning_rate": 5.4725787631271885e-05, |
| "loss": 0.0557, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.05542913822276679, |
| "grad_norm": 0.7837367057800293, |
| "learning_rate": 5.530921820303384e-05, |
| "loss": 0.0522, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.05601260283563802, |
| "grad_norm": 4.414269924163818, |
| "learning_rate": 5.5892648774795794e-05, |
| "loss": 0.0299, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.05659606744850925, |
| "grad_norm": 0.7398958802223206, |
| "learning_rate": 5.6476079346557766e-05, |
| "loss": 0.033, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.057179532061380475, |
| "grad_norm": 3.029676914215088, |
| "learning_rate": 5.705950991831972e-05, |
| "loss": 0.0619, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.05776299667425171, |
| "grad_norm": 7.157805442810059, |
| "learning_rate": 5.764294049008169e-05, |
| "loss": 0.1247, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.05834646128712294, |
| "grad_norm": 11.755330085754395, |
| "learning_rate": 5.822637106184364e-05, |
| "loss": 0.136, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.05834646128712294, |
| "eval_accuracy": 0.9619764005703539, |
| "eval_f1": 0.9026145866412618, |
| "eval_loss": 0.11134395748376846, |
| "eval_precision": 0.8327961715011232, |
| "eval_recall": 0.985210860774119, |
| "eval_runtime": 191.471, |
| "eval_samples_per_second": 252.733, |
| "eval_steps_per_second": 1.979, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.058929925899994164, |
| "grad_norm": 6.988606929779053, |
| "learning_rate": 5.88098016336056e-05, |
| "loss": 0.2199, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.059513390512865395, |
| "grad_norm": 0.24927297234535217, |
| "learning_rate": 5.939323220536756e-05, |
| "loss": 0.0742, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.06009685512573663, |
| "grad_norm": 6.2971601486206055, |
| "learning_rate": 5.997666277712952e-05, |
| "loss": 0.0406, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.06068031973860785, |
| "grad_norm": 2.3402936458587646, |
| "learning_rate": 6.0560093348891486e-05, |
| "loss": 0.0667, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.061263784351479084, |
| "grad_norm": 6.062570095062256, |
| "learning_rate": 6.114352392065345e-05, |
| "loss": 0.05, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.061847248964350315, |
| "grad_norm": 0.15475144982337952, |
| "learning_rate": 6.17269544924154e-05, |
| "loss": 0.0465, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.06243071357722154, |
| "grad_norm": 7.392509460449219, |
| "learning_rate": 6.231038506417737e-05, |
| "loss": 0.0428, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.06301417819009276, |
| "grad_norm": 13.624265670776367, |
| "learning_rate": 6.289381563593932e-05, |
| "loss": 0.0839, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.063597642802964, |
| "grad_norm": 2.227064847946167, |
| "learning_rate": 6.34772462077013e-05, |
| "loss": 0.074, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.06418110741583523, |
| "grad_norm": 1.5594475269317627, |
| "learning_rate": 6.406067677946325e-05, |
| "loss": 0.059, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.06476457202870646, |
| "grad_norm": 0.16706238687038422, |
| "learning_rate": 6.46441073512252e-05, |
| "loss": 0.0877, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.06534803664157769, |
| "grad_norm": 2.8842923641204834, |
| "learning_rate": 6.522753792298716e-05, |
| "loss": 0.0459, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.06593150125444892, |
| "grad_norm": 2.9185547828674316, |
| "learning_rate": 6.581096849474913e-05, |
| "loss": 0.0494, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.06651496586732014, |
| "grad_norm": 0.24510328471660614, |
| "learning_rate": 6.63943990665111e-05, |
| "loss": 0.0566, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.06709843048019137, |
| "grad_norm": 2.375892400741577, |
| "learning_rate": 6.697782963827304e-05, |
| "loss": 0.0299, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.0676818950930626, |
| "grad_norm": 0.8011118769645691, |
| "learning_rate": 6.756126021003501e-05, |
| "loss": 0.0557, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.06826535970593384, |
| "grad_norm": 4.479089736938477, |
| "learning_rate": 6.814469078179697e-05, |
| "loss": 0.1077, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.06884882431880507, |
| "grad_norm": 1.4084769487380981, |
| "learning_rate": 6.872812135355893e-05, |
| "loss": 0.0222, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.0694322889316763, |
| "grad_norm": 0.3282521665096283, |
| "learning_rate": 6.931155192532089e-05, |
| "loss": 0.0345, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.07001575354454752, |
| "grad_norm": 13.037544250488281, |
| "learning_rate": 6.989498249708286e-05, |
| "loss": 0.1076, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.07059921815741875, |
| "grad_norm": 0.489627480506897, |
| "learning_rate": 7.04784130688448e-05, |
| "loss": 0.0668, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.07118268277028998, |
| "grad_norm": 0.24774909019470215, |
| "learning_rate": 7.106184364060677e-05, |
| "loss": 0.0233, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.07176614738316121, |
| "grad_norm": 0.2003220021724701, |
| "learning_rate": 7.164527421236872e-05, |
| "loss": 0.035, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.07234961199603245, |
| "grad_norm": 5.0525617599487305, |
| "learning_rate": 7.22287047841307e-05, |
| "loss": 0.0167, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.07293307660890366, |
| "grad_norm": 0.15552781522274017, |
| "learning_rate": 7.281213535589265e-05, |
| "loss": 0.0506, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.0735165412217749, |
| "grad_norm": 2.9369659423828125, |
| "learning_rate": 7.33955659276546e-05, |
| "loss": 0.0235, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.07410000583464613, |
| "grad_norm": 9.07511043548584, |
| "learning_rate": 7.397899649941657e-05, |
| "loss": 0.1261, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.07468347044751736, |
| "grad_norm": 2.014671802520752, |
| "learning_rate": 7.456242707117853e-05, |
| "loss": 0.0975, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.07526693506038859, |
| "grad_norm": 3.288213014602661, |
| "learning_rate": 7.51458576429405e-05, |
| "loss": 0.0737, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.07585039967325982, |
| "grad_norm": 0.8137044310569763, |
| "learning_rate": 7.572928821470245e-05, |
| "loss": 0.0126, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.07643386428613104, |
| "grad_norm": 5.870395183563232, |
| "learning_rate": 7.631271878646441e-05, |
| "loss": 0.0803, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.07701732889900227, |
| "grad_norm": 1.4032009840011597, |
| "learning_rate": 7.689614935822638e-05, |
| "loss": 0.0516, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.0776007935118735, |
| "grad_norm": 0.17412960529327393, |
| "learning_rate": 7.747957992998833e-05, |
| "loss": 0.1047, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.07818425812474473, |
| "grad_norm": 6.425705432891846, |
| "learning_rate": 7.80630105017503e-05, |
| "loss": 0.0486, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.07876772273761597, |
| "grad_norm": 1.704518437385559, |
| "learning_rate": 7.864644107351226e-05, |
| "loss": 0.047, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.0793511873504872, |
| "grad_norm": 2.6653246879577637, |
| "learning_rate": 7.922987164527421e-05, |
| "loss": 0.0585, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.07993465196335842, |
| "grad_norm": 0.47766944766044617, |
| "learning_rate": 7.981330221703618e-05, |
| "loss": 0.0453, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.08051811657622965, |
| "grad_norm": 6.443741321563721, |
| "learning_rate": 8.039673278879814e-05, |
| "loss": 0.0878, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.08110158118910088, |
| "grad_norm": 4.816298961639404, |
| "learning_rate": 8.09801633605601e-05, |
| "loss": 0.0236, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.08168504580197211, |
| "grad_norm": 1.0016226768493652, |
| "learning_rate": 8.156359393232206e-05, |
| "loss": 0.0204, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.08226851041484334, |
| "grad_norm": 0.8851390480995178, |
| "learning_rate": 8.214702450408401e-05, |
| "loss": 0.0094, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.08285197502771458, |
| "grad_norm": 0.15396802127361298, |
| "learning_rate": 8.273045507584599e-05, |
| "loss": 0.0313, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.08343543964058579, |
| "grad_norm": 5.522292613983154, |
| "learning_rate": 8.331388564760794e-05, |
| "loss": 0.0652, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.08401890425345702, |
| "grad_norm": 4.942827224731445, |
| "learning_rate": 8.38973162193699e-05, |
| "loss": 0.0363, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.08460236886632826, |
| "grad_norm": 0.6745284199714661, |
| "learning_rate": 8.448074679113185e-05, |
| "loss": 0.0286, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.08518583347919949, |
| "grad_norm": 4.792232990264893, |
| "learning_rate": 8.506417736289382e-05, |
| "loss": 0.0802, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.08576929809207072, |
| "grad_norm": 1.8626902103424072, |
| "learning_rate": 8.564760793465578e-05, |
| "loss": 0.1293, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.08635276270494195, |
| "grad_norm": 4.577139854431152, |
| "learning_rate": 8.623103850641774e-05, |
| "loss": 0.053, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.08693622731781317, |
| "grad_norm": 0.4114917814731598, |
| "learning_rate": 8.68144690781797e-05, |
| "loss": 0.0128, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.0875196919306844, |
| "grad_norm": 2.752854108810425, |
| "learning_rate": 8.739789964994166e-05, |
| "loss": 0.0398, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.08810315654355563, |
| "grad_norm": 3.019674777984619, |
| "learning_rate": 8.798133022170362e-05, |
| "loss": 0.014, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.08868662115642686, |
| "grad_norm": 8.21313190460205, |
| "learning_rate": 8.856476079346558e-05, |
| "loss": 0.0953, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.0892700857692981, |
| "grad_norm": 3.093966484069824, |
| "learning_rate": 8.914819136522755e-05, |
| "loss": 0.0498, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.08985355038216933, |
| "grad_norm": 0.18179702758789062, |
| "learning_rate": 8.973162193698951e-05, |
| "loss": 0.032, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.09043701499504055, |
| "grad_norm": 0.4583202004432678, |
| "learning_rate": 9.031505250875146e-05, |
| "loss": 0.1394, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.09102047960791178, |
| "grad_norm": 1.7563403844833374, |
| "learning_rate": 9.089848308051341e-05, |
| "loss": 0.022, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.09160394422078301, |
| "grad_norm": 0.7116678953170776, |
| "learning_rate": 9.148191365227539e-05, |
| "loss": 0.053, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.09218740883365424, |
| "grad_norm": 0.4474166929721832, |
| "learning_rate": 9.206534422403734e-05, |
| "loss": 0.0222, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.09277087344652547, |
| "grad_norm": 2.1701552867889404, |
| "learning_rate": 9.264877479579931e-05, |
| "loss": 0.0286, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.09335433805939669, |
| "grad_norm": 0.10418248921632767, |
| "learning_rate": 9.323220536756126e-05, |
| "loss": 0.004, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.09393780267226792, |
| "grad_norm": 8.407377243041992, |
| "learning_rate": 9.381563593932322e-05, |
| "loss": 0.0109, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.09452126728513915, |
| "grad_norm": 6.232142925262451, |
| "learning_rate": 9.439906651108519e-05, |
| "loss": 0.0693, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.09510473189801039, |
| "grad_norm": 3.055527448654175, |
| "learning_rate": 9.498249708284714e-05, |
| "loss": 0.0143, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.09568819651088162, |
| "grad_norm": 0.11404174566268921, |
| "learning_rate": 9.55659276546091e-05, |
| "loss": 0.0228, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.09627166112375285, |
| "grad_norm": 3.9539661407470703, |
| "learning_rate": 9.614935822637107e-05, |
| "loss": 0.0576, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.09685512573662407, |
| "grad_norm": 0.44163885712623596, |
| "learning_rate": 9.673278879813302e-05, |
| "loss": 0.0399, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.0974385903494953, |
| "grad_norm": 6.48396635055542, |
| "learning_rate": 9.731621936989499e-05, |
| "loss": 0.0921, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.09802205496236653, |
| "grad_norm": 1.521036982536316, |
| "learning_rate": 9.789964994165695e-05, |
| "loss": 0.046, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.09860551957523776, |
| "grad_norm": 1.3476589918136597, |
| "learning_rate": 9.848308051341892e-05, |
| "loss": 0.0248, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.099188984188109, |
| "grad_norm": 0.9856148362159729, |
| "learning_rate": 9.906651108518087e-05, |
| "loss": 0.0186, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.09977244880098023, |
| "grad_norm": 0.7555924654006958, |
| "learning_rate": 9.964994165694283e-05, |
| "loss": 0.0267, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.10035591341385144, |
| "grad_norm": 5.517045021057129, |
| "learning_rate": 0.0001002333722287048, |
| "loss": 0.0685, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.10093937802672268, |
| "grad_norm": 0.3978430926799774, |
| "learning_rate": 0.00010081680280046675, |
| "loss": 0.0139, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.10152284263959391, |
| "grad_norm": 2.891540050506592, |
| "learning_rate": 0.0001014002333722287, |
| "loss": 0.0328, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.10210630725246514, |
| "grad_norm": 4.364936828613281, |
| "learning_rate": 0.00010198366394399066, |
| "loss": 0.1126, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.10268977186533637, |
| "grad_norm": 4.818276405334473, |
| "learning_rate": 0.00010256709451575264, |
| "loss": 0.0511, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.1032732364782076, |
| "grad_norm": 2.0347578525543213, |
| "learning_rate": 0.00010315052508751459, |
| "loss": 0.1189, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.10385670109107882, |
| "grad_norm": 25.81111717224121, |
| "learning_rate": 0.00010373395565927654, |
| "loss": 0.1336, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.10444016570395005, |
| "grad_norm": 0.3832674026489258, |
| "learning_rate": 0.00010431738623103851, |
| "loss": 0.078, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.10502363031682128, |
| "grad_norm": 1.3876867294311523, |
| "learning_rate": 0.00010490081680280046, |
| "loss": 0.0367, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.10560709492969252, |
| "grad_norm": 0.6100406050682068, |
| "learning_rate": 0.00010548424737456244, |
| "loss": 0.0524, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.10619055954256375, |
| "grad_norm": 2.4452576637268066, |
| "learning_rate": 0.00010606767794632439, |
| "loss": 0.022, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.10677402415543498, |
| "grad_norm": 0.22773054242134094, |
| "learning_rate": 0.00010665110851808636, |
| "loss": 0.0234, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.1073574887683062, |
| "grad_norm": 4.204925537109375, |
| "learning_rate": 0.0001072345390898483, |
| "loss": 0.0198, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.10794095338117743, |
| "grad_norm": 0.04586046561598778, |
| "learning_rate": 0.00010781796966161026, |
| "loss": 0.1117, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.10852441799404866, |
| "grad_norm": 2.5276756286621094, |
| "learning_rate": 0.00010840140023337224, |
| "loss": 0.0142, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.10910788260691989, |
| "grad_norm": 8.639892578125, |
| "learning_rate": 0.0001089848308051342, |
| "loss": 0.0722, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.10969134721979112, |
| "grad_norm": 0.016679294407367706, |
| "learning_rate": 0.00010956826137689615, |
| "loss": 0.0354, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.11027481183266234, |
| "grad_norm": 0.15765951573848724, |
| "learning_rate": 0.0001101516919486581, |
| "loss": 0.0264, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.11085827644553357, |
| "grad_norm": 0.09224901348352432, |
| "learning_rate": 0.00011073512252042007, |
| "loss": 0.0375, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.1114417410584048, |
| "grad_norm": 0.11374855786561966, |
| "learning_rate": 0.00011131855309218205, |
| "loss": 0.0159, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.11202520567127604, |
| "grad_norm": 1.5367220640182495, |
| "learning_rate": 0.000111901983663944, |
| "loss": 0.021, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.11260867028414727, |
| "grad_norm": 1.5304830074310303, |
| "learning_rate": 0.00011248541423570595, |
| "loss": 0.0058, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.1131921348970185, |
| "grad_norm": 3.098538398742676, |
| "learning_rate": 0.00011306884480746791, |
| "loss": 0.0158, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.11377559950988972, |
| "grad_norm": 0.043856799602508545, |
| "learning_rate": 0.00011365227537922987, |
| "loss": 0.0165, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.11435906412276095, |
| "grad_norm": 0.050305627286434174, |
| "learning_rate": 0.00011423570595099184, |
| "loss": 0.0247, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.11494252873563218, |
| "grad_norm": 0.0370689295232296, |
| "learning_rate": 0.0001148191365227538, |
| "loss": 0.0296, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.11552599334850341, |
| "grad_norm": 3.8449175357818604, |
| "learning_rate": 0.00011540256709451576, |
| "loss": 0.0691, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.11610945796137465, |
| "grad_norm": 0.6539208292961121, |
| "learning_rate": 0.00011598599766627771, |
| "loss": 0.0047, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.11669292257424588, |
| "grad_norm": 0.08624271303415298, |
| "learning_rate": 0.00011656942823803966, |
| "loss": 0.0044, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.11669292257424588, |
| "eval_accuracy": 0.9956190200657147, |
| "eval_f1": 0.9877215336499479, |
| "eval_loss": 0.0239239651709795, |
| "eval_precision": 0.9902450354198119, |
| "eval_recall": 0.985210860774119, |
| "eval_runtime": 191.5492, |
| "eval_samples_per_second": 252.63, |
| "eval_steps_per_second": 1.979, |
| "step": 1000 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 17140, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2237376654704640.0, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|