| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 1911, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0005232862375719519, | |
| "grad_norm": 0.41251502735307816, | |
| "learning_rate": 5.208333333333333e-08, | |
| "loss": 1.3294, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0026164311878597592, | |
| "grad_norm": 0.40979864724426074, | |
| "learning_rate": 2.604166666666667e-07, | |
| "loss": 1.3882, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0052328623757195184, | |
| "grad_norm": 0.4459342483347168, | |
| "learning_rate": 5.208333333333334e-07, | |
| "loss": 1.4122, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.007849293563579277, | |
| "grad_norm": 0.45405295074425656, | |
| "learning_rate": 7.8125e-07, | |
| "loss": 1.4033, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.010465724751439037, | |
| "grad_norm": 0.4273117179560934, | |
| "learning_rate": 1.0416666666666667e-06, | |
| "loss": 1.4211, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.013082155939298797, | |
| "grad_norm": 0.403952059588716, | |
| "learning_rate": 1.3020833333333335e-06, | |
| "loss": 1.3848, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.015698587127158554, | |
| "grad_norm": 0.37092991190019975, | |
| "learning_rate": 1.5625e-06, | |
| "loss": 1.3566, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.018315018315018316, | |
| "grad_norm": 0.3874048824303396, | |
| "learning_rate": 1.8229166666666666e-06, | |
| "loss": 1.4002, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.020931449502878074, | |
| "grad_norm": 0.4340106660669964, | |
| "learning_rate": 2.0833333333333334e-06, | |
| "loss": 1.3953, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.023547880690737835, | |
| "grad_norm": 0.38396439222660794, | |
| "learning_rate": 2.3437500000000002e-06, | |
| "loss": 1.3482, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.026164311878597593, | |
| "grad_norm": 0.39907513943654244, | |
| "learning_rate": 2.604166666666667e-06, | |
| "loss": 1.3687, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.02878074306645735, | |
| "grad_norm": 0.3796895756070553, | |
| "learning_rate": 2.8645833333333334e-06, | |
| "loss": 1.3401, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.03139717425431711, | |
| "grad_norm": 0.3527290292483327, | |
| "learning_rate": 3.125e-06, | |
| "loss": 1.3073, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.034013605442176874, | |
| "grad_norm": 0.3662321246340989, | |
| "learning_rate": 3.385416666666667e-06, | |
| "loss": 1.3531, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.03663003663003663, | |
| "grad_norm": 0.34563167668734196, | |
| "learning_rate": 3.6458333333333333e-06, | |
| "loss": 1.3286, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.03924646781789639, | |
| "grad_norm": 0.3717533990051037, | |
| "learning_rate": 3.90625e-06, | |
| "loss": 1.3045, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.04186289900575615, | |
| "grad_norm": 0.38164915069460925, | |
| "learning_rate": 4.166666666666667e-06, | |
| "loss": 1.3164, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.044479330193615906, | |
| "grad_norm": 0.39989916881220006, | |
| "learning_rate": 4.427083333333334e-06, | |
| "loss": 1.2543, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.04709576138147567, | |
| "grad_norm": 0.3913923463317125, | |
| "learning_rate": 4.6875000000000004e-06, | |
| "loss": 1.2043, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.04971219256933543, | |
| "grad_norm": 0.34376738438126636, | |
| "learning_rate": 4.947916666666667e-06, | |
| "loss": 1.2341, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.052328623757195186, | |
| "grad_norm": 0.30134298158483813, | |
| "learning_rate": 5.208333333333334e-06, | |
| "loss": 1.1862, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.054945054945054944, | |
| "grad_norm": 0.3052784311309599, | |
| "learning_rate": 5.468750000000001e-06, | |
| "loss": 1.2606, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.0575614861329147, | |
| "grad_norm": 0.25824143374638053, | |
| "learning_rate": 5.729166666666667e-06, | |
| "loss": 1.1568, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.06017791732077447, | |
| "grad_norm": 0.23929713795671043, | |
| "learning_rate": 5.989583333333334e-06, | |
| "loss": 1.1385, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.06279434850863422, | |
| "grad_norm": 0.26831071944855933, | |
| "learning_rate": 6.25e-06, | |
| "loss": 1.1467, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.06541077969649398, | |
| "grad_norm": 0.2216800876461612, | |
| "learning_rate": 6.510416666666667e-06, | |
| "loss": 1.1384, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.06802721088435375, | |
| "grad_norm": 0.1588265640264393, | |
| "learning_rate": 6.770833333333334e-06, | |
| "loss": 1.1093, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.0706436420722135, | |
| "grad_norm": 0.13582836155536646, | |
| "learning_rate": 7.031250000000001e-06, | |
| "loss": 1.1229, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.07326007326007326, | |
| "grad_norm": 0.13024757101994675, | |
| "learning_rate": 7.291666666666667e-06, | |
| "loss": 1.0914, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.07587650444793302, | |
| "grad_norm": 0.1504965272682143, | |
| "learning_rate": 7.552083333333334e-06, | |
| "loss": 1.0998, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.07849293563579278, | |
| "grad_norm": 0.13072789026197298, | |
| "learning_rate": 7.8125e-06, | |
| "loss": 1.0952, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.08110936682365254, | |
| "grad_norm": 0.11686548134375568, | |
| "learning_rate": 8.072916666666667e-06, | |
| "loss": 1.1244, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.0837257980115123, | |
| "grad_norm": 0.12930751237395446, | |
| "learning_rate": 8.333333333333334e-06, | |
| "loss": 1.0971, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.08634222919937205, | |
| "grad_norm": 0.1061664187333161, | |
| "learning_rate": 8.59375e-06, | |
| "loss": 1.1035, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.08895866038723181, | |
| "grad_norm": 0.11344291158179244, | |
| "learning_rate": 8.854166666666667e-06, | |
| "loss": 1.103, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.09157509157509157, | |
| "grad_norm": 0.10243199669671689, | |
| "learning_rate": 9.114583333333334e-06, | |
| "loss": 1.1019, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.09419152276295134, | |
| "grad_norm": 0.10697804468237891, | |
| "learning_rate": 9.375000000000001e-06, | |
| "loss": 1.115, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.0968079539508111, | |
| "grad_norm": 0.11081612483346633, | |
| "learning_rate": 9.635416666666668e-06, | |
| "loss": 1.1086, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.09942438513867086, | |
| "grad_norm": 0.09768440427757896, | |
| "learning_rate": 9.895833333333334e-06, | |
| "loss": 1.1114, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.10204081632653061, | |
| "grad_norm": 0.10117332468515122, | |
| "learning_rate": 9.999924849924331e-06, | |
| "loss": 1.1144, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.10465724751439037, | |
| "grad_norm": 0.10099056071649591, | |
| "learning_rate": 9.999465607642677e-06, | |
| "loss": 1.1013, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.10727367870225013, | |
| "grad_norm": 0.09721250361582028, | |
| "learning_rate": 9.998588911421522e-06, | |
| "loss": 1.0639, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.10989010989010989, | |
| "grad_norm": 0.09082971568988933, | |
| "learning_rate": 9.99729483446475e-06, | |
| "loss": 1.1006, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.11250654107796965, | |
| "grad_norm": 0.09628763078940285, | |
| "learning_rate": 9.995583484827415e-06, | |
| "loss": 1.1218, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.1151229722658294, | |
| "grad_norm": 0.09538277823430605, | |
| "learning_rate": 9.993455005406717e-06, | |
| "loss": 1.0624, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.11773940345368916, | |
| "grad_norm": 0.09244037114645652, | |
| "learning_rate": 9.990909573930075e-06, | |
| "loss": 1.0869, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.12035583464154893, | |
| "grad_norm": 0.09421878782184312, | |
| "learning_rate": 9.987947402940285e-06, | |
| "loss": 1.1554, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.12297226582940869, | |
| "grad_norm": 0.0988988378382322, | |
| "learning_rate": 9.984568739777776e-06, | |
| "loss": 1.0685, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.12558869701726844, | |
| "grad_norm": 0.09841274533116046, | |
| "learning_rate": 9.980773866559946e-06, | |
| "loss": 1.0956, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.1282051282051282, | |
| "grad_norm": 0.09262674109612956, | |
| "learning_rate": 9.976563100157615e-06, | |
| "loss": 1.0921, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.13082155939298795, | |
| "grad_norm": 0.10163823923602844, | |
| "learning_rate": 9.971936792168569e-06, | |
| "loss": 1.084, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.13343799058084774, | |
| "grad_norm": 0.0928908378416407, | |
| "learning_rate": 9.966895328888195e-06, | |
| "loss": 1.0872, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.1360544217687075, | |
| "grad_norm": 0.09797583946011011, | |
| "learning_rate": 9.961439131277223e-06, | |
| "loss": 1.0954, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.13867085295656725, | |
| "grad_norm": 0.09844694340152337, | |
| "learning_rate": 9.955568654926582e-06, | |
| "loss": 1.064, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.141287284144427, | |
| "grad_norm": 0.09504782674434727, | |
| "learning_rate": 9.949284390019362e-06, | |
| "loss": 1.063, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.14390371533228677, | |
| "grad_norm": 0.09690578824204586, | |
| "learning_rate": 9.942586861289874e-06, | |
| "loss": 1.1025, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.14652014652014653, | |
| "grad_norm": 0.09392460425308424, | |
| "learning_rate": 9.935476627979837e-06, | |
| "loss": 1.0764, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.14913657770800628, | |
| "grad_norm": 0.09275089601979723, | |
| "learning_rate": 9.927954283791687e-06, | |
| "loss": 1.0744, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.15175300889586604, | |
| "grad_norm": 0.10547186198215683, | |
| "learning_rate": 9.920020456838998e-06, | |
| "loss": 1.1103, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.1543694400837258, | |
| "grad_norm": 0.08956055611126217, | |
| "learning_rate": 9.911675809594042e-06, | |
| "loss": 1.1022, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.15698587127158556, | |
| "grad_norm": 0.0915409382279811, | |
| "learning_rate": 9.902921038832456e-06, | |
| "loss": 1.0757, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.15960230245944532, | |
| "grad_norm": 0.08782792485869619, | |
| "learning_rate": 9.893756875575082e-06, | |
| "loss": 1.0637, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.16221873364730507, | |
| "grad_norm": 0.09483514308455616, | |
| "learning_rate": 9.884184085026918e-06, | |
| "loss": 1.0579, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.16483516483516483, | |
| "grad_norm": 0.0937486987355325, | |
| "learning_rate": 9.874203466513215e-06, | |
| "loss": 1.0794, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.1674515960230246, | |
| "grad_norm": 0.11040618987740987, | |
| "learning_rate": 9.863815853412748e-06, | |
| "loss": 1.0725, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.17006802721088435, | |
| "grad_norm": 0.09280233328929079, | |
| "learning_rate": 9.853022113088223e-06, | |
| "loss": 1.1286, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.1726844583987441, | |
| "grad_norm": 0.0966074215386229, | |
| "learning_rate": 9.84182314681385e-06, | |
| "loss": 1.0971, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.17530088958660386, | |
| "grad_norm": 0.08727047002748183, | |
| "learning_rate": 9.83021988970009e-06, | |
| "loss": 1.0925, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.17791732077446362, | |
| "grad_norm": 0.0975611999540034, | |
| "learning_rate": 9.818213310615575e-06, | |
| "loss": 1.0636, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.18053375196232338, | |
| "grad_norm": 0.08877657381309818, | |
| "learning_rate": 9.805804412106197e-06, | |
| "loss": 1.0761, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.18315018315018314, | |
| "grad_norm": 0.0883430654150276, | |
| "learning_rate": 9.792994230311419e-06, | |
| "loss": 1.0307, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.1857666143380429, | |
| "grad_norm": 0.09377538490108409, | |
| "learning_rate": 9.779783834877727e-06, | |
| "loss": 1.0707, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.18838304552590268, | |
| "grad_norm": 0.09194753389941379, | |
| "learning_rate": 9.766174328869344e-06, | |
| "loss": 1.0414, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.19099947671376244, | |
| "grad_norm": 0.0935350901917584, | |
| "learning_rate": 9.752166848676101e-06, | |
| "loss": 1.0779, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.1936159079016222, | |
| "grad_norm": 0.09345100438692536, | |
| "learning_rate": 9.737762563918564e-06, | |
| "loss": 1.0708, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.19623233908948196, | |
| "grad_norm": 0.09320056834864819, | |
| "learning_rate": 9.722962677350367e-06, | |
| "loss": 1.0878, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.1988487702773417, | |
| "grad_norm": 0.09174250505149908, | |
| "learning_rate": 9.707768424757778e-06, | |
| "loss": 1.06, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.20146520146520147, | |
| "grad_norm": 0.09645992745579468, | |
| "learning_rate": 9.692181074856515e-06, | |
| "loss": 1.121, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.20408163265306123, | |
| "grad_norm": 0.09439400336644131, | |
| "learning_rate": 9.676201929185809e-06, | |
| "loss": 1.0752, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.206698063840921, | |
| "grad_norm": 0.09402118031041617, | |
| "learning_rate": 9.659832321999727e-06, | |
| "loss": 1.0975, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.20931449502878074, | |
| "grad_norm": 0.09267411234316543, | |
| "learning_rate": 9.643073620155755e-06, | |
| "loss": 1.0694, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.2119309262166405, | |
| "grad_norm": 0.09447481222401165, | |
| "learning_rate": 9.625927223000679e-06, | |
| "loss": 1.1162, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.21454735740450026, | |
| "grad_norm": 0.0897442545907557, | |
| "learning_rate": 9.608394562253724e-06, | |
| "loss": 1.0606, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.21716378859236002, | |
| "grad_norm": 0.10131176500816004, | |
| "learning_rate": 9.590477101887016e-06, | |
| "loss": 1.1174, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.21978021978021978, | |
| "grad_norm": 0.09563089966390936, | |
| "learning_rate": 9.572176338003341e-06, | |
| "loss": 1.071, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.22239665096807953, | |
| "grad_norm": 0.09288447437566862, | |
| "learning_rate": 9.553493798711217e-06, | |
| "loss": 1.0665, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.2250130821559393, | |
| "grad_norm": 0.09375408215693064, | |
| "learning_rate": 9.534431043997298e-06, | |
| "loss": 1.0589, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.22762951334379905, | |
| "grad_norm": 0.09071511164365616, | |
| "learning_rate": 9.514989665596114e-06, | |
| "loss": 1.091, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.2302459445316588, | |
| "grad_norm": 0.09087938695208862, | |
| "learning_rate": 9.495171286857171e-06, | |
| "loss": 1.1039, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.23286237571951857, | |
| "grad_norm": 0.09478252628006645, | |
| "learning_rate": 9.47497756260939e-06, | |
| "loss": 1.0358, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.23547880690737832, | |
| "grad_norm": 0.09404925603530015, | |
| "learning_rate": 9.454410179022932e-06, | |
| "loss": 1.0814, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.23809523809523808, | |
| "grad_norm": 0.09569946020536148, | |
| "learning_rate": 9.433470853468409e-06, | |
| "loss": 1.0801, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.24071166928309787, | |
| "grad_norm": 0.09057080839056063, | |
| "learning_rate": 9.412161334373477e-06, | |
| "loss": 1.088, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.24332810047095763, | |
| "grad_norm": 0.09363629436103157, | |
| "learning_rate": 9.39048340107685e-06, | |
| "loss": 1.0878, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.24594453165881738, | |
| "grad_norm": 0.09512091417096423, | |
| "learning_rate": 9.36843886367972e-06, | |
| "loss": 1.0747, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.24856096284667714, | |
| "grad_norm": 0.09185138387241662, | |
| "learning_rate": 9.346029562894616e-06, | |
| "loss": 1.0812, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.25117739403453687, | |
| "grad_norm": 0.09502875842140078, | |
| "learning_rate": 9.323257369891702e-06, | |
| "loss": 1.0475, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.25379382522239663, | |
| "grad_norm": 0.10046813934117133, | |
| "learning_rate": 9.300124186142542e-06, | |
| "loss": 1.0923, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.2564102564102564, | |
| "grad_norm": 0.09865406096117707, | |
| "learning_rate": 9.276631943261325e-06, | |
| "loss": 1.0735, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.25902668759811615, | |
| "grad_norm": 0.09315382567566974, | |
| "learning_rate": 9.252782602843565e-06, | |
| "loss": 1.116, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.2616431187859759, | |
| "grad_norm": 0.09161515179394478, | |
| "learning_rate": 9.228578156302327e-06, | |
| "loss": 1.0629, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.26425954997383566, | |
| "grad_norm": 0.09149080159187764, | |
| "learning_rate": 9.204020624701932e-06, | |
| "loss": 1.0941, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.2668759811616955, | |
| "grad_norm": 0.0936206412453983, | |
| "learning_rate": 9.1791120585892e-06, | |
| "loss": 1.0404, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.26949241234955523, | |
| "grad_norm": 0.08801875893120217, | |
| "learning_rate": 9.153854537822235e-06, | |
| "loss": 1.0809, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.272108843537415, | |
| "grad_norm": 0.10316067285625621, | |
| "learning_rate": 9.12825017139675e-06, | |
| "loss": 1.0924, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.27472527472527475, | |
| "grad_norm": 0.09546467171869157, | |
| "learning_rate": 9.102301097269974e-06, | |
| "loss": 1.1002, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.2773417059131345, | |
| "grad_norm": 0.09418760117929899, | |
| "learning_rate": 9.076009482182132e-06, | |
| "loss": 1.0746, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.27995813710099426, | |
| "grad_norm": 0.10045604004858837, | |
| "learning_rate": 9.049377521475514e-06, | |
| "loss": 1.0673, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.282574568288854, | |
| "grad_norm": 0.09277215287425432, | |
| "learning_rate": 9.022407438911177e-06, | |
| "loss": 1.0999, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.2851909994767138, | |
| "grad_norm": 0.09793572677691421, | |
| "learning_rate": 8.99510148648325e-06, | |
| "loss": 1.0647, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.28780743066457354, | |
| "grad_norm": 0.0914330914420494, | |
| "learning_rate": 8.967461944230908e-06, | |
| "loss": 1.0707, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.2904238618524333, | |
| "grad_norm": 0.10116198703780697, | |
| "learning_rate": 8.939491120047974e-06, | |
| "loss": 1.1074, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.29304029304029305, | |
| "grad_norm": 0.09833855526771558, | |
| "learning_rate": 8.911191349490215e-06, | |
| "loss": 1.078, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.2956567242281528, | |
| "grad_norm": 0.0922948074646419, | |
| "learning_rate": 8.882564995580329e-06, | |
| "loss": 1.0805, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.29827315541601257, | |
| "grad_norm": 0.09315441419416741, | |
| "learning_rate": 8.85361444861063e-06, | |
| "loss": 1.0742, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.3008895866038723, | |
| "grad_norm": 0.0909274708268345, | |
| "learning_rate": 8.824342125943461e-06, | |
| "loss": 1.0631, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.3035060177917321, | |
| "grad_norm": 0.09296453722572297, | |
| "learning_rate": 8.79475047180934e-06, | |
| "loss": 1.0974, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.30612244897959184, | |
| "grad_norm": 0.0995080373967825, | |
| "learning_rate": 8.764841957102866e-06, | |
| "loss": 1.1179, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.3087388801674516, | |
| "grad_norm": 0.09489715107187809, | |
| "learning_rate": 8.734619079176416e-06, | |
| "loss": 1.1005, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.31135531135531136, | |
| "grad_norm": 0.09161784868162753, | |
| "learning_rate": 8.704084361631597e-06, | |
| "loss": 1.0846, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.3139717425431711, | |
| "grad_norm": 0.09436043183499114, | |
| "learning_rate": 8.673240354108539e-06, | |
| "loss": 1.0449, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.3165881737310309, | |
| "grad_norm": 0.09854128956565812, | |
| "learning_rate": 8.642089632072992e-06, | |
| "loss": 1.0379, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.31920460491889063, | |
| "grad_norm": 0.09269580428590124, | |
| "learning_rate": 8.61063479660128e-06, | |
| "loss": 1.1128, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.3218210361067504, | |
| "grad_norm": 0.09715619099295432, | |
| "learning_rate": 8.578878474163115e-06, | |
| "loss": 1.1173, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.32443746729461015, | |
| "grad_norm": 0.09436716379561796, | |
| "learning_rate": 8.546823316402282e-06, | |
| "loss": 1.0931, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.3270538984824699, | |
| "grad_norm": 0.09350634807491594, | |
| "learning_rate": 8.514471999915229e-06, | |
| "loss": 1.0982, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.32967032967032966, | |
| "grad_norm": 0.09818057505285775, | |
| "learning_rate": 8.48182722602757e-06, | |
| "loss": 1.0905, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.3322867608581894, | |
| "grad_norm": 0.09365290817053802, | |
| "learning_rate": 8.448891720568535e-06, | |
| "loss": 1.0608, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.3349031920460492, | |
| "grad_norm": 0.09673654758538655, | |
| "learning_rate": 8.415668233643346e-06, | |
| "loss": 1.0557, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.33751962323390894, | |
| "grad_norm": 0.09191403641587385, | |
| "learning_rate": 8.382159539403605e-06, | |
| "loss": 1.1119, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.3401360544217687, | |
| "grad_norm": 0.09807617570314092, | |
| "learning_rate": 8.348368435815636e-06, | |
| "loss": 1.0681, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.34275248560962845, | |
| "grad_norm": 0.09778187174635387, | |
| "learning_rate": 8.314297744426865e-06, | |
| "loss": 1.0692, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.3453689167974882, | |
| "grad_norm": 0.09095407317509857, | |
| "learning_rate": 8.279950310130218e-06, | |
| "loss": 1.0614, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.34798534798534797, | |
| "grad_norm": 0.09242319578220776, | |
| "learning_rate": 8.245329000926574e-06, | |
| "loss": 1.0741, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.35060177917320773, | |
| "grad_norm": 0.09304773246714779, | |
| "learning_rate": 8.210436707685286e-06, | |
| "loss": 1.0906, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.3532182103610675, | |
| "grad_norm": 0.09331708902557105, | |
| "learning_rate": 8.175276343902802e-06, | |
| "loss": 1.0776, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.35583464154892724, | |
| "grad_norm": 0.09446277869965898, | |
| "learning_rate": 8.139850845459378e-06, | |
| "loss": 1.0676, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.358451072736787, | |
| "grad_norm": 0.0938388275229434, | |
| "learning_rate": 8.104163170373942e-06, | |
| "loss": 1.0672, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.36106750392464676, | |
| "grad_norm": 0.12962056716126813, | |
| "learning_rate": 8.068216298557088e-06, | |
| "loss": 1.0963, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.3636839351125065, | |
| "grad_norm": 0.09405642339543371, | |
| "learning_rate": 8.032013231562271e-06, | |
| "loss": 1.0945, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.3663003663003663, | |
| "grad_norm": 0.09925780964813577, | |
| "learning_rate": 7.995556992335168e-06, | |
| "loss": 1.0877, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.36891679748822603, | |
| "grad_norm": 0.0980038239194205, | |
| "learning_rate": 7.95885062496126e-06, | |
| "loss": 1.079, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.3715332286760858, | |
| "grad_norm": 0.09125569142161302, | |
| "learning_rate": 7.92189719441166e-06, | |
| "loss": 1.0443, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.3741496598639456, | |
| "grad_norm": 0.09723893590902837, | |
| "learning_rate": 7.884699786287188e-06, | |
| "loss": 1.1071, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.37676609105180536, | |
| "grad_norm": 0.09426282615134741, | |
| "learning_rate": 7.847261506560716e-06, | |
| "loss": 1.0847, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.3793825222396651, | |
| "grad_norm": 0.09803497564078997, | |
| "learning_rate": 7.809585481317824e-06, | |
| "loss": 1.0713, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.3819989534275249, | |
| "grad_norm": 0.09256420121815938, | |
| "learning_rate": 7.77167485649578e-06, | |
| "loss": 1.0891, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.38461538461538464, | |
| "grad_norm": 0.08982827618462035, | |
| "learning_rate": 7.733532797620849e-06, | |
| "loss": 1.092, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.3872318158032444, | |
| "grad_norm": 0.09801625597212717, | |
| "learning_rate": 7.695162489543966e-06, | |
| "loss": 1.1073, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.38984824699110415, | |
| "grad_norm": 0.09663426139648307, | |
| "learning_rate": 7.656567136174817e-06, | |
| "loss": 1.0449, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.3924646781789639, | |
| "grad_norm": 0.08898097158844268, | |
| "learning_rate": 7.6177499602143e-06, | |
| "loss": 1.0489, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.39508110936682367, | |
| "grad_norm": 0.09647893879433624, | |
| "learning_rate": 7.578714202885436e-06, | |
| "loss": 1.1077, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.3976975405546834, | |
| "grad_norm": 0.09473829426348951, | |
| "learning_rate": 7.53946312366273e-06, | |
| "loss": 1.0726, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.4003139717425432, | |
| "grad_norm": 0.09551273813668852, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 1.0269, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.40293040293040294, | |
| "grad_norm": 0.0939472699528477, | |
| "learning_rate": 7.460328127056718e-06, | |
| "loss": 1.1167, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.4055468341182627, | |
| "grad_norm": 0.09571931423617905, | |
| "learning_rate": 7.420450817422855e-06, | |
| "loss": 1.0889, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.40816326530612246, | |
| "grad_norm": 0.09870761455794679, | |
| "learning_rate": 7.38037140084229e-06, | |
| "loss": 1.0777, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.4107796964939822, | |
| "grad_norm": 0.09101557193296496, | |
| "learning_rate": 7.340093223934775e-06, | |
| "loss": 1.0747, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.413396127681842, | |
| "grad_norm": 0.09668189656194194, | |
| "learning_rate": 7.29961964991649e-06, | |
| "loss": 1.0535, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.41601255886970173, | |
| "grad_norm": 0.09867723507934317, | |
| "learning_rate": 7.2589540583192165e-06, | |
| "loss": 1.1367, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.4186289900575615, | |
| "grad_norm": 0.09512923445822234, | |
| "learning_rate": 7.218099844708152e-06, | |
| "loss": 1.1037, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.42124542124542125, | |
| "grad_norm": 0.09195485364743144, | |
| "learning_rate": 7.177060420398376e-06, | |
| "loss": 1.074, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.423861852433281, | |
| "grad_norm": 0.09688356988311692, | |
| "learning_rate": 7.135839212170008e-06, | |
| "loss": 1.0683, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.42647828362114076, | |
| "grad_norm": 0.09429469053403353, | |
| "learning_rate": 7.094439661982072e-06, | |
| "loss": 1.092, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.4290947148090005, | |
| "grad_norm": 0.09314877066180605, | |
| "learning_rate": 7.0528652266850935e-06, | |
| "loss": 1.0766, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.4317111459968603, | |
| "grad_norm": 0.09523611617484085, | |
| "learning_rate": 7.011119377732459e-06, | |
| "loss": 1.0791, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.43432757718472004, | |
| "grad_norm": 0.09312672889281959, | |
| "learning_rate": 6.969205600890539e-06, | |
| "loss": 1.092, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.4369440083725798, | |
| "grad_norm": 0.09545411150057426, | |
| "learning_rate": 6.9271273959476415e-06, | |
| "loss": 1.0849, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.43956043956043955, | |
| "grad_norm": 0.09884148719701276, | |
| "learning_rate": 6.884888276421766e-06, | |
| "loss": 1.0637, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.4421768707482993, | |
| "grad_norm": 0.0998219817486141, | |
| "learning_rate": 6.842491769267241e-06, | |
| "loss": 1.0853, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.44479330193615907, | |
| "grad_norm": 0.09522170275309631, | |
| "learning_rate": 6.79994141458021e-06, | |
| "loss": 1.0937, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.4474097331240188, | |
| "grad_norm": 0.09485382353614892, | |
| "learning_rate": 6.757240765303047e-06, | |
| "loss": 1.0558, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.4500261643118786, | |
| "grad_norm": 0.09267386409488955, | |
| "learning_rate": 6.7143933869276755e-06, | |
| "loss": 1.0488, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.45264259549973834, | |
| "grad_norm": 0.09449688321986219, | |
| "learning_rate": 6.671402857197864e-06, | |
| "loss": 1.0584, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.4552590266875981, | |
| "grad_norm": 0.09311294383757475, | |
| "learning_rate": 6.628272765810468e-06, | |
| "loss": 1.1122, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.45787545787545786, | |
| "grad_norm": 0.09022986066920076, | |
| "learning_rate": 6.585006714115709e-06, | |
| "loss": 1.087, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.4604918890633176, | |
| "grad_norm": 0.10090818397481849, | |
| "learning_rate": 6.541608314816451e-06, | |
| "loss": 1.0964, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.4631083202511774, | |
| "grad_norm": 0.09223178631995127, | |
| "learning_rate": 6.498081191666549e-06, | |
| "loss": 1.0216, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.46572475143903713, | |
| "grad_norm": 0.09188733569284858, | |
| "learning_rate": 6.454428979168257e-06, | |
| "loss": 1.0836, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.4683411826268969, | |
| "grad_norm": 0.09513933717488653, | |
| "learning_rate": 6.410655322268758e-06, | |
| "loss": 1.0931, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.47095761381475665, | |
| "grad_norm": 0.09699426832466113, | |
| "learning_rate": 6.3667638760558055e-06, | |
| "loss": 1.1089, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.4735740450026164, | |
| "grad_norm": 0.0968122803203442, | |
| "learning_rate": 6.3227583054525296e-06, | |
| "loss": 1.0557, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.47619047619047616, | |
| "grad_norm": 0.1396287419627416, | |
| "learning_rate": 6.2786422849114074e-06, | |
| "loss": 1.0716, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.478806907378336, | |
| "grad_norm": 0.11593861025598927, | |
| "learning_rate": 6.2344194981074616e-06, | |
| "loss": 1.0898, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.48142333856619574, | |
| "grad_norm": 0.10152935163698419, | |
| "learning_rate": 6.190093637630662e-06, | |
| "loss": 1.0978, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.4840397697540555, | |
| "grad_norm": 0.09218318493310448, | |
| "learning_rate": 6.145668404677604e-06, | |
| "loss": 1.051, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.48665620094191525, | |
| "grad_norm": 0.10001083419198578, | |
| "learning_rate": 6.101147508742456e-06, | |
| "loss": 1.0521, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.489272632129775, | |
| "grad_norm": 0.09257516098496528, | |
| "learning_rate": 6.056534667307212e-06, | |
| "loss": 1.0863, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.49188906331763477, | |
| "grad_norm": 0.09696703586123687, | |
| "learning_rate": 6.011833605531295e-06, | |
| "loss": 1.0487, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.4945054945054945, | |
| "grad_norm": 0.09428737542943791, | |
| "learning_rate": 5.967048055940503e-06, | |
| "loss": 1.0624, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.4971219256933543, | |
| "grad_norm": 0.09431439265711451, | |
| "learning_rate": 5.922181758115333e-06, | |
| "loss": 1.1092, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.49973835688121404, | |
| "grad_norm": 0.11242720357502867, | |
| "learning_rate": 5.8772384583787455e-06, | |
| "loss": 1.0841, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.5023547880690737, | |
| "grad_norm": 0.0954704281289005, | |
| "learning_rate": 5.832221909483334e-06, | |
| "loss": 1.1396, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.5049712192569336, | |
| "grad_norm": 0.09808551016470506, | |
| "learning_rate": 5.787135870297976e-06, | |
| "loss": 1.0486, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.5075876504447933, | |
| "grad_norm": 0.10067013564209391, | |
| "learning_rate": 5.741984105493967e-06, | |
| "loss": 1.0986, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.5102040816326531, | |
| "grad_norm": 0.09222170650944995, | |
| "learning_rate": 5.696770385230679e-06, | |
| "loss": 1.0619, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.5128205128205128, | |
| "grad_norm": 0.09783124847704958, | |
| "learning_rate": 5.651498484840737e-06, | |
| "loss": 1.0985, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.5154369440083726, | |
| "grad_norm": 0.09486749930774387, | |
| "learning_rate": 5.6061721845148e-06, | |
| "loss": 1.06, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.5180533751962323, | |
| "grad_norm": 0.090867649707542, | |
| "learning_rate": 5.560795268985899e-06, | |
| "loss": 1.0616, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.5206698063840921, | |
| "grad_norm": 0.09527483968765536, | |
| "learning_rate": 5.515371527213422e-06, | |
| "loss": 1.0545, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.5232862375719518, | |
| "grad_norm": 0.09712192876033669, | |
| "learning_rate": 5.469904752066736e-06, | |
| "loss": 1.0704, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.5259026687598116, | |
| "grad_norm": 0.09682112292363323, | |
| "learning_rate": 5.424398740008481e-06, | |
| "loss": 1.0898, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.5285190999476713, | |
| "grad_norm": 0.0962887584876426, | |
| "learning_rate": 5.378857290777566e-06, | |
| "loss": 1.0687, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.5311355311355311, | |
| "grad_norm": 0.09201422555865771, | |
| "learning_rate": 5.333284207071901e-06, | |
| "loss": 1.0725, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.533751962323391, | |
| "grad_norm": 0.09900825725242525, | |
| "learning_rate": 5.287683294230855e-06, | |
| "loss": 1.111, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.5363683935112507, | |
| "grad_norm": 0.09680456662693733, | |
| "learning_rate": 5.242058359917531e-06, | |
| "loss": 1.0633, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.5389848246991105, | |
| "grad_norm": 0.09800208940192005, | |
| "learning_rate": 5.196413213800812e-06, | |
| "loss": 1.063, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.5416012558869702, | |
| "grad_norm": 0.09737511198024548, | |
| "learning_rate": 5.150751667237266e-06, | |
| "loss": 1.0615, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.54421768707483, | |
| "grad_norm": 0.09664431106857257, | |
| "learning_rate": 5.1050775329528865e-06, | |
| "loss": 1.0944, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.5468341182626897, | |
| "grad_norm": 0.09763501189085519, | |
| "learning_rate": 5.059394624724749e-06, | |
| "loss": 1.0972, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.5494505494505495, | |
| "grad_norm": 0.09632886842372163, | |
| "learning_rate": 5.0137067570625345e-06, | |
| "loss": 1.0666, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.5520669806384092, | |
| "grad_norm": 0.09848798309692536, | |
| "learning_rate": 4.968017744890052e-06, | |
| "loss": 1.0833, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.554683411826269, | |
| "grad_norm": 0.09786398375895347, | |
| "learning_rate": 4.922331403226667e-06, | |
| "loss": 1.0592, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.5572998430141287, | |
| "grad_norm": 0.09284160129771099, | |
| "learning_rate": 4.876651546868759e-06, | |
| "loss": 1.1064, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.5599162742019885, | |
| "grad_norm": 0.09725195790127035, | |
| "learning_rate": 4.830981990071193e-06, | |
| "loss": 1.065, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.5625327053898482, | |
| "grad_norm": 0.09310816838440825, | |
| "learning_rate": 4.785326546228818e-06, | |
| "loss": 1.1083, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.565149136577708, | |
| "grad_norm": 0.09015768387151926, | |
| "learning_rate": 4.739689027558052e-06, | |
| "loss": 1.0563, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.5677655677655677, | |
| "grad_norm": 0.0920102183405243, | |
| "learning_rate": 4.694073244778571e-06, | |
| "loss": 1.096, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 0.5703819989534276, | |
| "grad_norm": 0.09262548047252805, | |
| "learning_rate": 4.648483006795115e-06, | |
| "loss": 1.0787, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.5729984301412873, | |
| "grad_norm": 0.10108832973136253, | |
| "learning_rate": 4.602922120379432e-06, | |
| "loss": 1.0416, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 0.5756148613291471, | |
| "grad_norm": 0.10323564152612687, | |
| "learning_rate": 4.557394389852427e-06, | |
| "loss": 1.1073, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.5782312925170068, | |
| "grad_norm": 0.09695438843197027, | |
| "learning_rate": 4.5119036167664966e-06, | |
| "loss": 1.0849, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 0.5808477237048666, | |
| "grad_norm": 0.09576429446485862, | |
| "learning_rate": 4.466453599588103e-06, | |
| "loss": 1.1107, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.5834641548927263, | |
| "grad_norm": 0.0944725229689017, | |
| "learning_rate": 4.421048133380601e-06, | |
| "loss": 1.0389, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 0.5860805860805861, | |
| "grad_norm": 0.09327957568881835, | |
| "learning_rate": 4.375691009487351e-06, | |
| "loss": 1.0991, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.5886970172684458, | |
| "grad_norm": 0.09617124954370675, | |
| "learning_rate": 4.330386015215145e-06, | |
| "loss": 1.1254, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.5913134484563056, | |
| "grad_norm": 0.09607531748333169, | |
| "learning_rate": 4.285136933517971e-06, | |
| "loss": 1.0538, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.5939298796441653, | |
| "grad_norm": 0.09181487613174973, | |
| "learning_rate": 4.239947542681125e-06, | |
| "loss": 1.0298, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 0.5965463108320251, | |
| "grad_norm": 0.09929275099194719, | |
| "learning_rate": 4.194821616005738e-06, | |
| "loss": 1.0328, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.5991627420198848, | |
| "grad_norm": 0.09240194833786142, | |
| "learning_rate": 4.1497629214937e-06, | |
| "loss": 1.0608, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 0.6017791732077447, | |
| "grad_norm": 0.09879706415248848, | |
| "learning_rate": 4.104775221533039e-06, | |
| "loss": 1.101, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.6043956043956044, | |
| "grad_norm": 0.09766476042682787, | |
| "learning_rate": 4.059862272583755e-06, | |
| "loss": 1.0974, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 0.6070120355834642, | |
| "grad_norm": 0.09336780343588688, | |
| "learning_rate": 4.015027824864158e-06, | |
| "loss": 1.1005, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.6096284667713239, | |
| "grad_norm": 0.09624359639351643, | |
| "learning_rate": 3.97027562203773e-06, | |
| "loss": 1.0715, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 0.6122448979591837, | |
| "grad_norm": 0.09737756540928204, | |
| "learning_rate": 3.92560940090053e-06, | |
| "loss": 1.0739, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.6148613291470434, | |
| "grad_norm": 0.09954071312596614, | |
| "learning_rate": 3.881032891069169e-06, | |
| "loss": 1.0821, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.6174777603349032, | |
| "grad_norm": 0.09841578747390028, | |
| "learning_rate": 3.836549814669389e-06, | |
| "loss": 1.0876, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.6200941915227629, | |
| "grad_norm": 0.0957583395446334, | |
| "learning_rate": 3.7921638860252674e-06, | |
| "loss": 1.0702, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 0.6227106227106227, | |
| "grad_norm": 0.09851189698378275, | |
| "learning_rate": 3.747878811349075e-06, | |
| "loss": 1.0992, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.6253270538984824, | |
| "grad_norm": 0.1015149061503031, | |
| "learning_rate": 3.703698288431801e-06, | |
| "loss": 1.0655, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 0.6279434850863422, | |
| "grad_norm": 0.09369966485062493, | |
| "learning_rate": 3.659626006334395e-06, | |
| "loss": 1.0823, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.6305599162742019, | |
| "grad_norm": 0.09785800936328935, | |
| "learning_rate": 3.615665645079728e-06, | |
| "loss": 1.0693, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 0.6331763474620618, | |
| "grad_norm": 0.09500352690563862, | |
| "learning_rate": 3.5718208753453166e-06, | |
| "loss": 1.1132, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.6357927786499215, | |
| "grad_norm": 0.108876739833781, | |
| "learning_rate": 3.5280953581568155e-06, | |
| "loss": 1.0797, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 0.6384092098377813, | |
| "grad_norm": 0.0941872407043151, | |
| "learning_rate": 3.484492744582325e-06, | |
| "loss": 1.0786, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.6410256410256411, | |
| "grad_norm": 0.0935925316762949, | |
| "learning_rate": 3.441016675427532e-06, | |
| "loss": 1.0726, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.6436420722135008, | |
| "grad_norm": 0.08980497698236649, | |
| "learning_rate": 3.397670780931699e-06, | |
| "loss": 1.0592, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.6462585034013606, | |
| "grad_norm": 0.09421923213295101, | |
| "learning_rate": 3.354458680464543e-06, | |
| "loss": 1.0573, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 0.6488749345892203, | |
| "grad_norm": 0.0948795372692727, | |
| "learning_rate": 3.311383982224017e-06, | |
| "loss": 1.0636, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.6514913657770801, | |
| "grad_norm": 0.09462125103971682, | |
| "learning_rate": 3.268450282935026e-06, | |
| "loss": 1.0819, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 0.6541077969649398, | |
| "grad_norm": 0.09535306467898946, | |
| "learning_rate": 3.2256611675491096e-06, | |
| "loss": 1.0751, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.6567242281527996, | |
| "grad_norm": 0.09697269370087673, | |
| "learning_rate": 3.183020208945086e-06, | |
| "loss": 1.0692, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 0.6593406593406593, | |
| "grad_norm": 0.09984001691532364, | |
| "learning_rate": 3.1405309676307283e-06, | |
| "loss": 1.1082, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.6619570905285191, | |
| "grad_norm": 0.09984099095960929, | |
| "learning_rate": 3.0981969914454555e-06, | |
| "loss": 1.101, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 0.6645735217163788, | |
| "grad_norm": 0.10101665465999785, | |
| "learning_rate": 3.056021815264102e-06, | |
| "loss": 1.0816, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.6671899529042387, | |
| "grad_norm": 0.09369041796257908, | |
| "learning_rate": 3.0140089607017386e-06, | |
| "loss": 1.1139, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.6698063840920984, | |
| "grad_norm": 0.10155516666787288, | |
| "learning_rate": 2.972161935819632e-06, | |
| "loss": 1.056, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.6724228152799582, | |
| "grad_norm": 0.0935206115676816, | |
| "learning_rate": 2.930484234832315e-06, | |
| "loss": 1.0795, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 0.6750392464678179, | |
| "grad_norm": 0.09081518195104453, | |
| "learning_rate": 2.8889793378158284e-06, | |
| "loss": 1.0583, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.6776556776556777, | |
| "grad_norm": 0.0986874598400787, | |
| "learning_rate": 2.8476507104171273e-06, | |
| "loss": 1.0779, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 0.6802721088435374, | |
| "grad_norm": 0.093161000008791, | |
| "learning_rate": 2.806501803564708e-06, | |
| "loss": 1.0886, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.6828885400313972, | |
| "grad_norm": 0.0967519147424789, | |
| "learning_rate": 2.765536053180447e-06, | |
| "loss": 1.0707, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 0.6855049712192569, | |
| "grad_norm": 0.09712918913631942, | |
| "learning_rate": 2.724756879892717e-06, | |
| "loss": 1.0778, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.6881214024071167, | |
| "grad_norm": 0.10289096314665017, | |
| "learning_rate": 2.6841676887507505e-06, | |
| "loss": 1.063, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 0.6907378335949764, | |
| "grad_norm": 0.09642137985208184, | |
| "learning_rate": 2.643771868940327e-06, | |
| "loss": 1.0742, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.6933542647828362, | |
| "grad_norm": 0.09473659314131959, | |
| "learning_rate": 2.603572793500775e-06, | |
| "loss": 1.0616, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.6959706959706959, | |
| "grad_norm": 0.09477724423521382, | |
| "learning_rate": 2.5635738190433252e-06, | |
| "loss": 1.0604, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.6985871271585558, | |
| "grad_norm": 0.09860220421589404, | |
| "learning_rate": 2.523778285470835e-06, | |
| "loss": 1.0964, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 0.7012035583464155, | |
| "grad_norm": 0.09687005352585684, | |
| "learning_rate": 2.4841895156989047e-06, | |
| "loss": 1.0859, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.7038199895342753, | |
| "grad_norm": 0.09812689795649823, | |
| "learning_rate": 2.444810815378416e-06, | |
| "loss": 1.039, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 0.706436420722135, | |
| "grad_norm": 0.09649230087172285, | |
| "learning_rate": 2.4056454726195166e-06, | |
| "loss": 1.0851, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.7090528519099948, | |
| "grad_norm": 0.09678262936163672, | |
| "learning_rate": 2.366696757717054e-06, | |
| "loss": 1.054, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 0.7116692830978545, | |
| "grad_norm": 0.09476116997366278, | |
| "learning_rate": 2.327967922877515e-06, | |
| "loss": 1.1372, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.7142857142857143, | |
| "grad_norm": 0.09299375374162118, | |
| "learning_rate": 2.28946220194746e-06, | |
| "loss": 1.0791, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 0.716902145473574, | |
| "grad_norm": 0.09228476281928448, | |
| "learning_rate": 2.2511828101435105e-06, | |
| "loss": 1.092, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.7195185766614338, | |
| "grad_norm": 0.08848476689061922, | |
| "learning_rate": 2.213132943783864e-06, | |
| "loss": 1.0507, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.7221350078492935, | |
| "grad_norm": 0.09681365452713796, | |
| "learning_rate": 2.1753157800214107e-06, | |
| "loss": 1.0635, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.7247514390371533, | |
| "grad_norm": 0.09519917830920975, | |
| "learning_rate": 2.137734476578443e-06, | |
| "loss": 1.1137, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 0.727367870225013, | |
| "grad_norm": 0.0921260944978358, | |
| "learning_rate": 2.1003921714829823e-06, | |
| "loss": 1.0856, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.7299843014128728, | |
| "grad_norm": 0.09320308136649647, | |
| "learning_rate": 2.063291982806759e-06, | |
| "loss": 1.0397, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 0.7326007326007326, | |
| "grad_norm": 0.09958309277334744, | |
| "learning_rate": 2.0264370084048498e-06, | |
| "loss": 1.1013, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.7352171637885924, | |
| "grad_norm": 0.09565285974158727, | |
| "learning_rate": 1.9898303256570093e-06, | |
| "loss": 1.0515, | |
| "step": 1405 | |
| }, | |
| { | |
| "epoch": 0.7378335949764521, | |
| "grad_norm": 0.09469438105504369, | |
| "learning_rate": 1.953474991210717e-06, | |
| "loss": 1.0732, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.7404500261643119, | |
| "grad_norm": 0.09542383178021345, | |
| "learning_rate": 1.917374040725935e-06, | |
| "loss": 1.0871, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 0.7430664573521716, | |
| "grad_norm": 0.09337335796748239, | |
| "learning_rate": 1.8815304886216385e-06, | |
| "loss": 1.0675, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.7456828885400314, | |
| "grad_norm": 0.10072152703813067, | |
| "learning_rate": 1.8459473278241125e-06, | |
| "loss": 1.0377, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.7482993197278912, | |
| "grad_norm": 0.09577252647834208, | |
| "learning_rate": 1.8106275295170462e-06, | |
| "loss": 1.0672, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.7509157509157509, | |
| "grad_norm": 0.09272785376120715, | |
| "learning_rate": 1.7755740428934333e-06, | |
| "loss": 1.0544, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 0.7535321821036107, | |
| "grad_norm": 0.0941666839069239, | |
| "learning_rate": 1.7407897949093184e-06, | |
| "loss": 1.0413, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.7561486132914704, | |
| "grad_norm": 0.09262337135711587, | |
| "learning_rate": 1.7062776900393979e-06, | |
| "loss": 1.1074, | |
| "step": 1445 | |
| }, | |
| { | |
| "epoch": 0.7587650444793302, | |
| "grad_norm": 0.09899062894642105, | |
| "learning_rate": 1.6720406100344977e-06, | |
| "loss": 1.0788, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.7613814756671899, | |
| "grad_norm": 0.09269004355802055, | |
| "learning_rate": 1.6380814136809442e-06, | |
| "loss": 1.0673, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 0.7639979068550498, | |
| "grad_norm": 0.09381570988420367, | |
| "learning_rate": 1.6044029365618612e-06, | |
| "loss": 1.0319, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.7666143380429095, | |
| "grad_norm": 0.09410544581818935, | |
| "learning_rate": 1.571007990820394e-06, | |
| "loss": 1.0962, | |
| "step": 1465 | |
| }, | |
| { | |
| "epoch": 0.7692307692307693, | |
| "grad_norm": 0.09285318709463383, | |
| "learning_rate": 1.5378993649249053e-06, | |
| "loss": 1.0594, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.771847200418629, | |
| "grad_norm": 0.09800001168669942, | |
| "learning_rate": 1.5050798234361269e-06, | |
| "loss": 1.1042, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 0.7744636316064888, | |
| "grad_norm": 0.09469245083260618, | |
| "learning_rate": 1.4725521067763298e-06, | |
| "loss": 1.0584, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.7770800627943485, | |
| "grad_norm": 0.09574538735115234, | |
| "learning_rate": 1.4403189310004917e-06, | |
| "loss": 1.0798, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 0.7796964939822083, | |
| "grad_norm": 0.10149571930990543, | |
| "learning_rate": 1.4083829875695172e-06, | |
| "loss": 1.0606, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.782312925170068, | |
| "grad_norm": 0.09078842532414053, | |
| "learning_rate": 1.376746943125491e-06, | |
| "loss": 1.0719, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 0.7849293563579278, | |
| "grad_norm": 0.09812598662162697, | |
| "learning_rate": 1.34541343926902e-06, | |
| "loss": 1.0721, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.7875457875457875, | |
| "grad_norm": 0.09747573254806928, | |
| "learning_rate": 1.3143850923386586e-06, | |
| "loss": 1.0719, | |
| "step": 1505 | |
| }, | |
| { | |
| "epoch": 0.7901622187336473, | |
| "grad_norm": 0.09959826128263043, | |
| "learning_rate": 1.2836644931924469e-06, | |
| "loss": 1.0973, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.792778649921507, | |
| "grad_norm": 0.0957616232199979, | |
| "learning_rate": 1.2532542069915722e-06, | |
| "loss": 1.0634, | |
| "step": 1515 | |
| }, | |
| { | |
| "epoch": 0.7953950811093669, | |
| "grad_norm": 0.0926840275240788, | |
| "learning_rate": 1.2231567729861809e-06, | |
| "loss": 1.0515, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.7980115122972266, | |
| "grad_norm": 0.09549741967656641, | |
| "learning_rate": 1.1933747043033505e-06, | |
| "loss": 1.077, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 0.8006279434850864, | |
| "grad_norm": 0.09818556699167169, | |
| "learning_rate": 1.1639104877372475e-06, | |
| "loss": 1.085, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.8032443746729461, | |
| "grad_norm": 0.10185328564977968, | |
| "learning_rate": 1.134766583541475e-06, | |
| "loss": 1.072, | |
| "step": 1535 | |
| }, | |
| { | |
| "epoch": 0.8058608058608059, | |
| "grad_norm": 0.10325354090839255, | |
| "learning_rate": 1.1059454252236457e-06, | |
| "loss": 1.0807, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.8084772370486656, | |
| "grad_norm": 0.09026566064040906, | |
| "learning_rate": 1.0774494193421842e-06, | |
| "loss": 1.0837, | |
| "step": 1545 | |
| }, | |
| { | |
| "epoch": 0.8110936682365254, | |
| "grad_norm": 0.09216555834272212, | |
| "learning_rate": 1.0492809453053836e-06, | |
| "loss": 1.0616, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.8137100994243851, | |
| "grad_norm": 0.10356251448995982, | |
| "learning_rate": 1.0214423551727188e-06, | |
| "loss": 1.0768, | |
| "step": 1555 | |
| }, | |
| { | |
| "epoch": 0.8163265306122449, | |
| "grad_norm": 0.09707729936777112, | |
| "learning_rate": 9.939359734584552e-07, | |
| "loss": 1.049, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.8189429618001046, | |
| "grad_norm": 0.09534275994935477, | |
| "learning_rate": 9.667640969375465e-07, | |
| "loss": 1.0492, | |
| "step": 1565 | |
| }, | |
| { | |
| "epoch": 0.8215593929879644, | |
| "grad_norm": 0.09929844462568642, | |
| "learning_rate": 9.399289944538664e-07, | |
| "loss": 1.0791, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.8241758241758241, | |
| "grad_norm": 0.10015979787589827, | |
| "learning_rate": 9.134329067307485e-07, | |
| "loss": 1.0963, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 0.826792255363684, | |
| "grad_norm": 0.09048377966095525, | |
| "learning_rate": 8.872780461838931e-07, | |
| "loss": 1.0929, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.8294086865515437, | |
| "grad_norm": 0.09228876737280277, | |
| "learning_rate": 8.614665967366276e-07, | |
| "loss": 1.108, | |
| "step": 1585 | |
| }, | |
| { | |
| "epoch": 0.8320251177394035, | |
| "grad_norm": 0.09079351608675855, | |
| "learning_rate": 8.360007136375553e-07, | |
| "loss": 1.0874, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.8346415489272632, | |
| "grad_norm": 0.09486940153765996, | |
| "learning_rate": 8.108825232805856e-07, | |
| "loss": 1.082, | |
| "step": 1595 | |
| }, | |
| { | |
| "epoch": 0.837257980115123, | |
| "grad_norm": 0.09465621125079711, | |
| "learning_rate": 7.861141230273839e-07, | |
| "loss": 1.0544, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.8398744113029827, | |
| "grad_norm": 0.09407212657727718, | |
| "learning_rate": 7.61697581032243e-07, | |
| "loss": 1.0981, | |
| "step": 1605 | |
| }, | |
| { | |
| "epoch": 0.8424908424908425, | |
| "grad_norm": 0.09059190465302502, | |
| "learning_rate": 7.376349360693952e-07, | |
| "loss": 1.0736, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.8451072736787022, | |
| "grad_norm": 0.10244509362525825, | |
| "learning_rate": 7.139281973627693e-07, | |
| "loss": 1.1207, | |
| "step": 1615 | |
| }, | |
| { | |
| "epoch": 0.847723704866562, | |
| "grad_norm": 0.09294450820782799, | |
| "learning_rate": 6.905793444182257e-07, | |
| "loss": 1.0324, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.8503401360544217, | |
| "grad_norm": 0.09248053013567557, | |
| "learning_rate": 6.675903268582623e-07, | |
| "loss": 1.0698, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 0.8529565672422815, | |
| "grad_norm": 0.09408682320579052, | |
| "learning_rate": 6.449630642592336e-07, | |
| "loss": 1.0563, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.8555729984301413, | |
| "grad_norm": 0.09902606656264573, | |
| "learning_rate": 6.22699445991054e-07, | |
| "loss": 1.116, | |
| "step": 1635 | |
| }, | |
| { | |
| "epoch": 0.858189429618001, | |
| "grad_norm": 0.09712401155003872, | |
| "learning_rate": 6.008013310594418e-07, | |
| "loss": 1.0568, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.8608058608058609, | |
| "grad_norm": 0.09712433978230986, | |
| "learning_rate": 5.7927054795069e-07, | |
| "loss": 1.0815, | |
| "step": 1645 | |
| }, | |
| { | |
| "epoch": 0.8634222919937206, | |
| "grad_norm": 0.09779456921612323, | |
| "learning_rate": 5.581088944789953e-07, | |
| "loss": 1.0765, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.8660387231815804, | |
| "grad_norm": 0.09170714487859494, | |
| "learning_rate": 5.373181376363312e-07, | |
| "loss": 1.1131, | |
| "step": 1655 | |
| }, | |
| { | |
| "epoch": 0.8686551543694401, | |
| "grad_norm": 0.09618717160359205, | |
| "learning_rate": 5.169000134449115e-07, | |
| "loss": 1.1032, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.8712715855572999, | |
| "grad_norm": 0.09790108184832103, | |
| "learning_rate": 4.968562268122285e-07, | |
| "loss": 1.0955, | |
| "step": 1665 | |
| }, | |
| { | |
| "epoch": 0.8738880167451596, | |
| "grad_norm": 0.09561591922856301, | |
| "learning_rate": 4.771884513886998e-07, | |
| "loss": 1.1074, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.8765044479330194, | |
| "grad_norm": 0.09621992226965802, | |
| "learning_rate": 4.578983294279138e-07, | |
| "loss": 1.0647, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 0.8791208791208791, | |
| "grad_norm": 0.09930969193000325, | |
| "learning_rate": 4.389874716495013e-07, | |
| "loss": 1.0914, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.8817373103087389, | |
| "grad_norm": 0.09405257946333198, | |
| "learning_rate": 4.204574571046438e-07, | |
| "loss": 1.097, | |
| "step": 1685 | |
| }, | |
| { | |
| "epoch": 0.8843537414965986, | |
| "grad_norm": 0.09397788285874713, | |
| "learning_rate": 4.0230983304422543e-07, | |
| "loss": 1.1008, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.8869701726844584, | |
| "grad_norm": 0.09687652981774121, | |
| "learning_rate": 3.8454611478963235e-07, | |
| "loss": 1.1045, | |
| "step": 1695 | |
| }, | |
| { | |
| "epoch": 0.8895866038723181, | |
| "grad_norm": 0.09528618292766945, | |
| "learning_rate": 3.671677856062261e-07, | |
| "loss": 1.0559, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.892203035060178, | |
| "grad_norm": 0.09928035328010386, | |
| "learning_rate": 3.501762965794919e-07, | |
| "loss": 1.108, | |
| "step": 1705 | |
| }, | |
| { | |
| "epoch": 0.8948194662480377, | |
| "grad_norm": 0.09163131587036641, | |
| "learning_rate": 3.335730664938758e-07, | |
| "loss": 1.0753, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.8974358974358975, | |
| "grad_norm": 0.09388990845956159, | |
| "learning_rate": 3.1735948171431e-07, | |
| "loss": 1.0705, | |
| "step": 1715 | |
| }, | |
| { | |
| "epoch": 0.9000523286237572, | |
| "grad_norm": 0.09573342861955501, | |
| "learning_rate": 3.015368960704584e-07, | |
| "loss": 1.112, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.902668759811617, | |
| "grad_norm": 0.10001705841286741, | |
| "learning_rate": 2.8610663074366773e-07, | |
| "loss": 1.0886, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 0.9052851909994767, | |
| "grad_norm": 0.09094229807732417, | |
| "learning_rate": 2.7106997415665527e-07, | |
| "loss": 1.052, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.9079016221873365, | |
| "grad_norm": 0.09650640058485531, | |
| "learning_rate": 2.564281818659159e-07, | |
| "loss": 1.0476, | |
| "step": 1735 | |
| }, | |
| { | |
| "epoch": 0.9105180533751962, | |
| "grad_norm": 0.10041718587089403, | |
| "learning_rate": 2.4218247645689306e-07, | |
| "loss": 1.1137, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.913134484563056, | |
| "grad_norm": 0.09306796891683689, | |
| "learning_rate": 2.2833404744188824e-07, | |
| "loss": 1.0617, | |
| "step": 1745 | |
| }, | |
| { | |
| "epoch": 0.9157509157509157, | |
| "grad_norm": 0.100239290761894, | |
| "learning_rate": 2.1488405116074028e-07, | |
| "loss": 1.0408, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.9183673469387755, | |
| "grad_norm": 0.09397127167902947, | |
| "learning_rate": 2.0183361068426778e-07, | |
| "loss": 1.0878, | |
| "step": 1755 | |
| }, | |
| { | |
| "epoch": 0.9209837781266352, | |
| "grad_norm": 0.09458207930981834, | |
| "learning_rate": 1.8918381572049393e-07, | |
| "loss": 1.0678, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.923600209314495, | |
| "grad_norm": 0.09508047513436788, | |
| "learning_rate": 1.7693572252365841e-07, | |
| "loss": 1.0722, | |
| "step": 1765 | |
| }, | |
| { | |
| "epoch": 0.9262166405023547, | |
| "grad_norm": 0.09733028951573541, | |
| "learning_rate": 1.650903538060189e-07, | |
| "loss": 1.0811, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.9288330716902146, | |
| "grad_norm": 0.09631275276270197, | |
| "learning_rate": 1.536486986524538e-07, | |
| "loss": 1.0944, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 0.9314495028780743, | |
| "grad_norm": 0.09636213160724866, | |
| "learning_rate": 1.426117124378762e-07, | |
| "loss": 1.0777, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.9340659340659341, | |
| "grad_norm": 0.09657812048629409, | |
| "learning_rate": 1.3198031674745814e-07, | |
| "loss": 1.0325, | |
| "step": 1785 | |
| }, | |
| { | |
| "epoch": 0.9366823652537938, | |
| "grad_norm": 0.09804660298802706, | |
| "learning_rate": 1.2175539929968117e-07, | |
| "loss": 1.0653, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.9392987964416536, | |
| "grad_norm": 0.09264029111784397, | |
| "learning_rate": 1.1193781387220936e-07, | |
| "loss": 1.0701, | |
| "step": 1795 | |
| }, | |
| { | |
| "epoch": 0.9419152276295133, | |
| "grad_norm": 0.10902005715807923, | |
| "learning_rate": 1.0252838023059985e-07, | |
| "loss": 1.1091, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.9445316588173731, | |
| "grad_norm": 0.09846420181714893, | |
| "learning_rate": 9.352788405985469e-08, | |
| "loss": 1.0669, | |
| "step": 1805 | |
| }, | |
| { | |
| "epoch": 0.9471480900052328, | |
| "grad_norm": 0.09532316632506668, | |
| "learning_rate": 8.493707689881448e-08, | |
| "loss": 1.0679, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.9497645211930926, | |
| "grad_norm": 0.09093683738005763, | |
| "learning_rate": 7.675667607740356e-08, | |
| "loss": 1.0818, | |
| "step": 1815 | |
| }, | |
| { | |
| "epoch": 0.9523809523809523, | |
| "grad_norm": 0.10443186130031959, | |
| "learning_rate": 6.898736465673739e-08, | |
| "loss": 1.0814, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.9549973835688121, | |
| "grad_norm": 0.09761832875769212, | |
| "learning_rate": 6.162979137208314e-08, | |
| "loss": 1.0915, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 0.957613814756672, | |
| "grad_norm": 0.09373123105729916, | |
| "learning_rate": 5.468457057869358e-08, | |
| "loss": 1.065, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.9602302459445317, | |
| "grad_norm": 0.09628764600258595, | |
| "learning_rate": 4.815228220050538e-08, | |
| "loss": 1.0706, | |
| "step": 1835 | |
| }, | |
| { | |
| "epoch": 0.9628466771323915, | |
| "grad_norm": 0.10115588220407606, | |
| "learning_rate": 4.2033471681718895e-08, | |
| "loss": 1.0661, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.9654631083202512, | |
| "grad_norm": 0.098607617926353, | |
| "learning_rate": 3.632864994125129e-08, | |
| "loss": 1.0416, | |
| "step": 1845 | |
| }, | |
| { | |
| "epoch": 0.968079539508111, | |
| "grad_norm": 0.09383738080780944, | |
| "learning_rate": 3.103829333007624e-08, | |
| "loss": 1.0784, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.9706959706959707, | |
| "grad_norm": 0.09783787837370606, | |
| "learning_rate": 2.616284359144794e-08, | |
| "loss": 1.0471, | |
| "step": 1855 | |
| }, | |
| { | |
| "epoch": 0.9733124018838305, | |
| "grad_norm": 0.09582809551354422, | |
| "learning_rate": 2.1702707824017287e-08, | |
| "loss": 1.0934, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.9759288330716902, | |
| "grad_norm": 0.09524690201100454, | |
| "learning_rate": 1.7658258447836306e-08, | |
| "loss": 1.0975, | |
| "step": 1865 | |
| }, | |
| { | |
| "epoch": 0.97854526425955, | |
| "grad_norm": 0.09873313263669789, | |
| "learning_rate": 1.4029833173264673e-08, | |
| "loss": 1.0873, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.9811616954474097, | |
| "grad_norm": 0.09687852845634715, | |
| "learning_rate": 1.0817734972768946e-08, | |
| "loss": 1.0732, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 0.9837781266352695, | |
| "grad_norm": 0.10309251536634813, | |
| "learning_rate": 8.022232055623913e-09, | |
| "loss": 1.0568, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.9863945578231292, | |
| "grad_norm": 0.09726473663813696, | |
| "learning_rate": 5.643557845518843e-09, | |
| "loss": 1.0979, | |
| "step": 1885 | |
| }, | |
| { | |
| "epoch": 0.989010989010989, | |
| "grad_norm": 0.09723030101081766, | |
| "learning_rate": 3.6819109610658486e-09, | |
| "loss": 1.0804, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.9916274201988488, | |
| "grad_norm": 0.09425603701053707, | |
| "learning_rate": 2.137455199215377e-09, | |
| "loss": 1.0291, | |
| "step": 1895 | |
| }, | |
| { | |
| "epoch": 0.9942438513867086, | |
| "grad_norm": 0.10028648603892673, | |
| "learning_rate": 1.0103195215788175e-09, | |
| "loss": 1.0752, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.9968602825745683, | |
| "grad_norm": 0.0921531358049198, | |
| "learning_rate": 3.005980436604494e-10, | |
| "loss": 1.1026, | |
| "step": 1905 | |
| }, | |
| { | |
| "epoch": 0.9994767137624281, | |
| "grad_norm": 0.09999533286583566, | |
| "learning_rate": 8.350027000392224e-12, | |
| "loss": 1.0887, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_runtime": 3.2394, | |
| "eval_samples_per_second": 3.087, | |
| "eval_steps_per_second": 0.926, | |
| "step": 1911 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 1911, | |
| "total_flos": 495538831097856.0, | |
| "train_loss": 1.094423781010195, | |
| "train_runtime": 15943.1465, | |
| "train_samples_per_second": 1.917, | |
| "train_steps_per_second": 0.12 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 1911, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 495538831097856.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |