| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 536, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0018656716417910447, | |
| "grad_norm": 2.57972321339612, | |
| "learning_rate": 1.8518518518518518e-07, | |
| "loss": 2.092, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.009328358208955223, | |
| "grad_norm": 2.3735119503905793, | |
| "learning_rate": 9.259259259259259e-07, | |
| "loss": 2.0863, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.018656716417910446, | |
| "grad_norm": 2.569828354184601, | |
| "learning_rate": 1.8518518518518519e-06, | |
| "loss": 2.0955, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.027985074626865673, | |
| "grad_norm": 2.3365878728458847, | |
| "learning_rate": 2.7777777777777783e-06, | |
| "loss": 2.078, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.03731343283582089, | |
| "grad_norm": 2.258292749629048, | |
| "learning_rate": 3.7037037037037037e-06, | |
| "loss": 2.0535, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.04664179104477612, | |
| "grad_norm": 2.046409624248939, | |
| "learning_rate": 4.62962962962963e-06, | |
| "loss": 1.9936, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.055970149253731345, | |
| "grad_norm": 1.7763941617915107, | |
| "learning_rate": 5.555555555555557e-06, | |
| "loss": 1.9172, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.06529850746268656, | |
| "grad_norm": 1.5289482279435747, | |
| "learning_rate": 6.481481481481482e-06, | |
| "loss": 1.8216, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.07462686567164178, | |
| "grad_norm": 1.3374159627357958, | |
| "learning_rate": 7.4074074074074075e-06, | |
| "loss": 1.7237, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.08395522388059702, | |
| "grad_norm": 1.038988897758925, | |
| "learning_rate": 8.333333333333334e-06, | |
| "loss": 1.6205, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.09328358208955224, | |
| "grad_norm": 0.5713243982808286, | |
| "learning_rate": 9.25925925925926e-06, | |
| "loss": 1.5364, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.10261194029850747, | |
| "grad_norm": 0.3049992132098635, | |
| "learning_rate": 9.999893795201304e-06, | |
| "loss": 1.491, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.11194029850746269, | |
| "grad_norm": 0.36148501744582134, | |
| "learning_rate": 9.996177100962714e-06, | |
| "loss": 1.473, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.12126865671641791, | |
| "grad_norm": 0.37327333184682066, | |
| "learning_rate": 9.987154677711482e-06, | |
| "loss": 1.4594, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.13059701492537312, | |
| "grad_norm": 0.31133363324217617, | |
| "learning_rate": 9.972836106879936e-06, | |
| "loss": 1.4442, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.13992537313432835, | |
| "grad_norm": 0.2375327029162037, | |
| "learning_rate": 9.953236594185396e-06, | |
| "loss": 1.4258, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.14925373134328357, | |
| "grad_norm": 0.19998776915472055, | |
| "learning_rate": 9.928376953482343e-06, | |
| "loss": 1.4093, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.15858208955223882, | |
| "grad_norm": 0.19069793139067803, | |
| "learning_rate": 9.898283584658988e-06, | |
| "loss": 1.4029, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.16791044776119404, | |
| "grad_norm": 0.17979433709822593, | |
| "learning_rate": 9.86298844560169e-06, | |
| "loss": 1.3876, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.17723880597014927, | |
| "grad_norm": 0.1750060584120198, | |
| "learning_rate": 9.822529018257049e-06, | |
| "loss": 1.3842, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.1865671641791045, | |
| "grad_norm": 0.17054790759610952, | |
| "learning_rate": 9.776948268827658e-06, | |
| "loss": 1.3756, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.1958955223880597, | |
| "grad_norm": 0.17397761641849066, | |
| "learning_rate": 9.726294602143807e-06, | |
| "loss": 1.3544, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.20522388059701493, | |
| "grad_norm": 0.17494592787959154, | |
| "learning_rate": 9.670621810259596e-06, | |
| "loss": 1.338, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.21455223880597016, | |
| "grad_norm": 0.17874287198232588, | |
| "learning_rate": 9.609989015328052e-06, | |
| "loss": 1.3321, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.22388059701492538, | |
| "grad_norm": 0.18177787677872814, | |
| "learning_rate": 9.544460606815901e-06, | |
| "loss": 1.3231, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.2332089552238806, | |
| "grad_norm": 0.17485139906929326, | |
| "learning_rate": 9.474106173124667e-06, | |
| "loss": 1.3029, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.24253731343283583, | |
| "grad_norm": 0.16541538036070813, | |
| "learning_rate": 9.399000427690736e-06, | |
| "loss": 1.2914, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.251865671641791, | |
| "grad_norm": 0.15475993597407106, | |
| "learning_rate": 9.31922312964284e-06, | |
| "loss": 1.2731, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.26119402985074625, | |
| "grad_norm": 0.14876033685341322, | |
| "learning_rate": 9.234858999101232e-06, | |
| "loss": 1.2612, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.27052238805970147, | |
| "grad_norm": 0.13102260537341828, | |
| "learning_rate": 9.1459976272085e-06, | |
| "loss": 1.2406, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.2798507462686567, | |
| "grad_norm": 0.1231370916125911, | |
| "learning_rate": 9.052733380987555e-06, | |
| "loss": 1.2402, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.2891791044776119, | |
| "grad_norm": 0.11617076992843688, | |
| "learning_rate": 8.955165303127841e-06, | |
| "loss": 1.2287, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.29850746268656714, | |
| "grad_norm": 0.10711292754756604, | |
| "learning_rate": 8.853397006806183e-06, | |
| "loss": 1.2293, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.30783582089552236, | |
| "grad_norm": 0.10583142709305077, | |
| "learning_rate": 8.747536565653966e-06, | |
| "loss": 1.2196, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.31716417910447764, | |
| "grad_norm": 0.10071371091495551, | |
| "learning_rate": 8.637696398987517e-06, | |
| "loss": 1.2218, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.32649253731343286, | |
| "grad_norm": 0.09786904166381197, | |
| "learning_rate": 8.523993152423522e-06, | |
| "loss": 1.2076, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.3358208955223881, | |
| "grad_norm": 0.09308216883502402, | |
| "learning_rate": 8.406547574006326e-06, | |
| "loss": 1.2069, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.3451492537313433, | |
| "grad_norm": 0.09403316911201794, | |
| "learning_rate": 8.285484385978598e-06, | |
| "loss": 1.2151, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.35447761194029853, | |
| "grad_norm": 0.09276385606464647, | |
| "learning_rate": 8.160932152331587e-06, | |
| "loss": 1.2033, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.36380597014925375, | |
| "grad_norm": 0.09044032546865181, | |
| "learning_rate": 8.03302314227559e-06, | |
| "loss": 1.2028, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.373134328358209, | |
| "grad_norm": 0.08960581233300505, | |
| "learning_rate": 7.90189318977564e-06, | |
| "loss": 1.2036, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.3824626865671642, | |
| "grad_norm": 0.08546838672930177, | |
| "learning_rate": 7.767681549301576e-06, | |
| "loss": 1.1932, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.3917910447761194, | |
| "grad_norm": 0.08706831337193889, | |
| "learning_rate": 7.630530747945672e-06, | |
| "loss": 1.2016, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.40111940298507465, | |
| "grad_norm": 0.08501828118277771, | |
| "learning_rate": 7.490586434064893e-06, | |
| "loss": 1.1984, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.41044776119402987, | |
| "grad_norm": 0.08231749758944934, | |
| "learning_rate": 7.3479972226084925e-06, | |
| "loss": 1.1934, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.4197761194029851, | |
| "grad_norm": 0.0846325939740857, | |
| "learning_rate": 7.202914537295211e-06, | |
| "loss": 1.1871, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.4291044776119403, | |
| "grad_norm": 0.0839943062987978, | |
| "learning_rate": 7.055492449807684e-06, | |
| "loss": 1.1847, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.43843283582089554, | |
| "grad_norm": 0.0842974194320567, | |
| "learning_rate": 6.905887516174827e-06, | |
| "loss": 1.1823, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.44776119402985076, | |
| "grad_norm": 0.08246202494295994, | |
| "learning_rate": 6.754258610515949e-06, | |
| "loss": 1.1874, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.457089552238806, | |
| "grad_norm": 0.08059952388104133, | |
| "learning_rate": 6.60076675632314e-06, | |
| "loss": 1.1768, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.4664179104477612, | |
| "grad_norm": 0.08278287209635887, | |
| "learning_rate": 6.445574955461134e-06, | |
| "loss": 1.1743, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.47574626865671643, | |
| "grad_norm": 0.08064613485931339, | |
| "learning_rate": 6.288848015066211e-06, | |
| "loss": 1.1787, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.48507462686567165, | |
| "grad_norm": 0.08402784054638568, | |
| "learning_rate": 6.130752372527981e-06, | |
| "loss": 1.1797, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.4944029850746269, | |
| "grad_norm": 0.07813087594533498, | |
| "learning_rate": 5.9714559187399094e-06, | |
| "loss": 1.1814, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.503731343283582, | |
| "grad_norm": 0.08253282031035683, | |
| "learning_rate": 5.811127819806277e-06, | |
| "loss": 1.1767, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.5130597014925373, | |
| "grad_norm": 0.0795025665014561, | |
| "learning_rate": 5.649938337394932e-06, | |
| "loss": 1.1684, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.5223880597014925, | |
| "grad_norm": 0.08201269451158993, | |
| "learning_rate": 5.4880586479265774e-06, | |
| "loss": 1.1704, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.5317164179104478, | |
| "grad_norm": 0.08039664736174637, | |
| "learning_rate": 5.325660660792657e-06, | |
| "loss": 1.1704, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.5410447761194029, | |
| "grad_norm": 0.08098088631592301, | |
| "learning_rate": 5.162916835794843e-06, | |
| "loss": 1.1722, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.5503731343283582, | |
| "grad_norm": 0.08014102460089276, | |
| "learning_rate": 5e-06, | |
| "loss": 1.1748, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.5597014925373134, | |
| "grad_norm": 0.08067154136929545, | |
| "learning_rate": 4.837083164205159e-06, | |
| "loss": 1.1741, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.5690298507462687, | |
| "grad_norm": 0.07702944221391651, | |
| "learning_rate": 4.6743393392073435e-06, | |
| "loss": 1.1734, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.5783582089552238, | |
| "grad_norm": 0.08366251841352572, | |
| "learning_rate": 4.511941352073424e-06, | |
| "loss": 1.1674, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.5876865671641791, | |
| "grad_norm": 0.08401515307564614, | |
| "learning_rate": 4.3500616626050705e-06, | |
| "loss": 1.171, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.5970149253731343, | |
| "grad_norm": 0.07880234388575032, | |
| "learning_rate": 4.188872180193723e-06, | |
| "loss": 1.1617, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.6063432835820896, | |
| "grad_norm": 0.08075724129202204, | |
| "learning_rate": 4.028544081260093e-06, | |
| "loss": 1.1664, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.6156716417910447, | |
| "grad_norm": 0.08145891878774718, | |
| "learning_rate": 3.869247627472021e-06, | |
| "loss": 1.1647, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.625, | |
| "grad_norm": 0.07981030112455705, | |
| "learning_rate": 3.7111519849337908e-06, | |
| "loss": 1.1661, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.6343283582089553, | |
| "grad_norm": 0.07719220748480114, | |
| "learning_rate": 3.554425044538868e-06, | |
| "loss": 1.164, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.6436567164179104, | |
| "grad_norm": 0.07965079201392798, | |
| "learning_rate": 3.3992332436768615e-06, | |
| "loss": 1.1685, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.6529850746268657, | |
| "grad_norm": 0.07929508580781615, | |
| "learning_rate": 3.2457413894840516e-06, | |
| "loss": 1.166, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.6623134328358209, | |
| "grad_norm": 0.08260668169292941, | |
| "learning_rate": 3.0941124838251734e-06, | |
| "loss": 1.1641, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.6716417910447762, | |
| "grad_norm": 0.07880705452928782, | |
| "learning_rate": 2.944507550192318e-06, | |
| "loss": 1.1697, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.6809701492537313, | |
| "grad_norm": 0.08617336475255008, | |
| "learning_rate": 2.7970854627047893e-06, | |
| "loss": 1.1617, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.6902985074626866, | |
| "grad_norm": 0.08504447838669933, | |
| "learning_rate": 2.6520027773915075e-06, | |
| "loss": 1.1694, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.6996268656716418, | |
| "grad_norm": 0.08177432381677913, | |
| "learning_rate": 2.509413565935107e-06, | |
| "loss": 1.1663, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.7089552238805971, | |
| "grad_norm": 0.08035338660262055, | |
| "learning_rate": 2.3694692520543293e-06, | |
| "loss": 1.1617, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.7182835820895522, | |
| "grad_norm": 0.08099302263307083, | |
| "learning_rate": 2.2323184506984257e-06, | |
| "loss": 1.1601, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.7276119402985075, | |
| "grad_norm": 0.0788199344055389, | |
| "learning_rate": 2.098106810224362e-06, | |
| "loss": 1.1678, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.7369402985074627, | |
| "grad_norm": 0.0810220420611739, | |
| "learning_rate": 1.9669768577244107e-06, | |
| "loss": 1.1659, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.746268656716418, | |
| "grad_norm": 0.07813643594984888, | |
| "learning_rate": 1.8390678476684143e-06, | |
| "loss": 1.1619, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.7555970149253731, | |
| "grad_norm": 0.07787996158898178, | |
| "learning_rate": 1.7145156140214032e-06, | |
| "loss": 1.1647, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.7649253731343284, | |
| "grad_norm": 0.08149237459503167, | |
| "learning_rate": 1.5934524259936757e-06, | |
| "loss": 1.1663, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.7742537313432836, | |
| "grad_norm": 0.08133121141847001, | |
| "learning_rate": 1.4760068475764789e-06, | |
| "loss": 1.1481, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.7835820895522388, | |
| "grad_norm": 0.07796965232583761, | |
| "learning_rate": 1.3623036010124845e-06, | |
| "loss": 1.1592, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.792910447761194, | |
| "grad_norm": 0.07862074938919475, | |
| "learning_rate": 1.2524634343460335e-06, | |
| "loss": 1.1537, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.8022388059701493, | |
| "grad_norm": 0.08047006906879658, | |
| "learning_rate": 1.1466029931938182e-06, | |
| "loss": 1.1575, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.8115671641791045, | |
| "grad_norm": 0.07919282621278656, | |
| "learning_rate": 1.0448346968721596e-06, | |
| "loss": 1.1591, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.8208955223880597, | |
| "grad_norm": 0.08069925871151505, | |
| "learning_rate": 9.472666190124457e-07, | |
| "loss": 1.1588, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.8302238805970149, | |
| "grad_norm": 0.07925111301368296, | |
| "learning_rate": 8.540023727915015e-07, | |
| "loss": 1.1552, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.8395522388059702, | |
| "grad_norm": 0.08107260891150957, | |
| "learning_rate": 7.651410008987698e-07, | |
| "loss": 1.1568, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.8488805970149254, | |
| "grad_norm": 0.07855238487611285, | |
| "learning_rate": 6.807768703571616e-07, | |
| "loss": 1.1498, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.8582089552238806, | |
| "grad_norm": 0.08067373717155321, | |
| "learning_rate": 6.009995723092655e-07, | |
| "loss": 1.163, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.8675373134328358, | |
| "grad_norm": 0.083123312332487, | |
| "learning_rate": 5.258938268753344e-07, | |
| "loss": 1.155, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.8768656716417911, | |
| "grad_norm": 0.07901297859441447, | |
| "learning_rate": 4.555393931841001e-07, | |
| "loss": 1.1615, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.8861940298507462, | |
| "grad_norm": 0.07759270476319681, | |
| "learning_rate": 3.9001098467194907e-07, | |
| "loss": 1.1575, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.8955223880597015, | |
| "grad_norm": 0.07822412070732043, | |
| "learning_rate": 3.2937818974040637e-07, | |
| "loss": 1.1597, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.9048507462686567, | |
| "grad_norm": 0.0799398408165139, | |
| "learning_rate": 2.737053978561943e-07, | |
| "loss": 1.1602, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.914179104477612, | |
| "grad_norm": 0.07777635865631954, | |
| "learning_rate": 2.2305173117234236e-07, | |
| "loss": 1.1568, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.9235074626865671, | |
| "grad_norm": 0.08036060154131985, | |
| "learning_rate": 1.7747098174295208e-07, | |
| "loss": 1.1652, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.9328358208955224, | |
| "grad_norm": 0.08306752546345667, | |
| "learning_rate": 1.3701155439831249e-07, | |
| "loss": 1.1558, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.9421641791044776, | |
| "grad_norm": 0.07992649084819468, | |
| "learning_rate": 1.017164153410144e-07, | |
| "loss": 1.1566, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.9514925373134329, | |
| "grad_norm": 0.07911970141612473, | |
| "learning_rate": 7.16230465176565e-08, | |
| "loss": 1.1559, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.960820895522388, | |
| "grad_norm": 0.0846383351709255, | |
| "learning_rate": 4.6763405814604926e-08, | |
| "loss": 1.1514, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.9701492537313433, | |
| "grad_norm": 0.07934050493191654, | |
| "learning_rate": 2.7163893120066288e-08, | |
| "loss": 1.1542, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.9794776119402985, | |
| "grad_norm": 0.08167448133397126, | |
| "learning_rate": 1.284532228851998e-08, | |
| "loss": 1.161, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.9888059701492538, | |
| "grad_norm": 0.07857080161491098, | |
| "learning_rate": 3.822899037286276e-09, | |
| "loss": 1.1579, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.9981343283582089, | |
| "grad_norm": 0.07847093836951612, | |
| "learning_rate": 1.0620479869771772e-10, | |
| "loss": 1.1582, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_runtime": 3.2225, | |
| "eval_samples_per_second": 3.103, | |
| "eval_steps_per_second": 0.931, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 536, | |
| "total_flos": 555957746663424.0, | |
| "train_loss": 1.2752919873194908, | |
| "train_runtime": 16895.3411, | |
| "train_samples_per_second": 2.029, | |
| "train_steps_per_second": 0.032 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 536, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 555957746663424.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |