| { | |
| "best_metric": 3.2069807052612305, | |
| "best_model_checkpoint": "output/kanye-west/checkpoint-1098", | |
| "epoch": 2.0, | |
| "global_step": 1098, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00013717009945820184, | |
| "loss": 4.0869, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0001370804238981834, | |
| "loss": 3.916, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00013693105149335086, | |
| "loss": 3.8652, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00013672211245699381, | |
| "loss": 3.7385, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0001364537889287738, | |
| "loss": 3.9589, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0001361263148159465, | |
| "loss": 3.7062, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0001357399755894565, | |
| "loss": 3.7812, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00013529510803508195, | |
| "loss": 3.7942, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00013479209995984626, | |
| "loss": 3.5225, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0001342313898539529, | |
| "loss": 3.5656, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00013361346650853767, | |
| "loss": 3.6263, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0001329388685895719, | |
| "loss": 3.4553, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0001322081841682881, | |
| "loss": 3.5207, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00013142205020853694, | |
| "loss": 3.6126, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0001305811520115232, | |
| "loss": 3.7414, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0001296862226184039, | |
| "loss": 3.7092, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0001287380421712702, | |
| "loss": 3.6186, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0001277374372330694, | |
| "loss": 3.5288, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00012668528006706028, | |
| "loss": 3.6937, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.0001255824878764301, | |
| "loss": 3.4367, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00012443002200473538, | |
| "loss": 3.6998, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.0001232288870978644, | |
| "loss": 3.6457, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00012198013022825123, | |
| "loss": 3.7173, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00012068483998210507, | |
| "loss": 3.6357, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00011934414551045056, | |
| "loss": 3.5173, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00011795921554480633, | |
| "loss": 3.5689, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00011653125737835967, | |
| "loss": 3.7171, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00011506151581352585, | |
| "loss": 3.5809, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00011355127207680896, | |
| "loss": 3.6263, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.0001120018427019109, | |
| "loss": 3.2874, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00011041457838206164, | |
| "loss": 3.5598, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00010879086279257143, | |
| "loss": 3.4501, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00010713211138463122, | |
| "loss": 3.5461, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00010543977015141323, | |
| "loss": 3.7201, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00010371531436754662, | |
| "loss": 3.5397, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.0001019602473030677, | |
| "loss": 3.4675, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00010017609891296537, | |
| "loss": 3.5239, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 9.836442450346448e-05, | |
| "loss": 3.7958, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 9.652680337620956e-05, | |
| "loss": 3.4768, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 9.466483745153069e-05, | |
| "loss": 3.743, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 9.278014987199233e-05, | |
| "loss": 3.4125, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 9.08743835874413e-05, | |
| "loss": 3.4941, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 8.894919992278866e-05, | |
| "loss": 3.501, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 8.700627712977306e-05, | |
| "loss": 3.5539, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 8.50473089239683e-05, | |
| "loss": 3.6408, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 8.307400300831083e-05, | |
| "loss": 3.4931, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 8.108807958443359e-05, | |
| "loss": 3.5534, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 7.909126985310467e-05, | |
| "loss": 3.4332, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 7.708531450507727e-05, | |
| "loss": 3.3946, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 7.507196220366703e-05, | |
| "loss": 3.5149, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 7.305296806037965e-05, | |
| "loss": 3.5828, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 7.10300921049169e-05, | |
| "loss": 3.5274, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 6.900509775089564e-05, | |
| "loss": 3.5936, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 6.697975025861685e-05, | |
| "loss": 3.75, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 6.495581519622486e-05, | |
| "loss": 3.5977, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 6.293505690059801e-05, | |
| "loss": 3.5072, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 6.0919236939313083e-05, | |
| "loss": 3.402, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 5.8910112575023444e-05, | |
| "loss": 3.4915, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 5.690943523359007e-05, | |
| "loss": 3.428, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 5.4918948977300765e-05, | |
| "loss": 3.4624, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 5.294038898450848e-05, | |
| "loss": 3.5907, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 5.097548003701393e-05, | |
| "loss": 3.3823, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.902593501651139e-05, | |
| "loss": 3.392, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.709345341140823e-05, | |
| "loss": 3.5123, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 4.517971983531982e-05, | |
| "loss": 3.3342, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.3286402558531396e-05, | |
| "loss": 3.3545, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 4.141515205370683e-05, | |
| "loss": 3.4315, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 3.956759955711245e-05, | |
| "loss": 3.3875, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 3.7745355646609715e-05, | |
| "loss": 3.5645, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 3.5950008837656755e-05, | |
| "loss": 3.6196, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 3.418312419854227e-05, | |
| "loss": 3.3208, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 3.244624198605935e-05, | |
| "loss": 3.4232, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 3.0740876302808455e-05, | |
| "loss": 3.3791, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 2.9068513777299503e-05, | |
| "loss": 3.4745, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 2.7430612268004742e-05, | |
| "loss": 3.3075, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 2.5828599592490882e-05, | |
| "loss": 3.4197, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 2.4263872282739445e-05, | |
| "loss": 3.4358, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 2.2737794367739613e-05, | |
| "loss": 3.6132, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 2.1251696184415103e-05, | |
| "loss": 3.4919, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 1.980687321792179e-05, | |
| "loss": 3.6213, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.8404584972326772e-05, | |
| "loss": 3.3808, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 1.704605387265335e-05, | |
| "loss": 3.4937, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.573246419924937e-05, | |
| "loss": 3.5387, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.4464961055407408e-05, | |
| "loss": 3.5004, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.3244649369137107e-05, | |
| "loss": 3.2293, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.2072592929959826e-05, | |
| "loss": 3.4326, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.0949813461565069e-05, | |
| "loss": 3.3648, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 9.87728973113722e-06, | |
| "loss": 3.5226, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 8.855956696129068e-06, | |
| "loss": 3.464, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 7.88670468922587e-06, | |
| "loss": 3.3838, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 6.97037864221025e-06, | |
| "loss": 3.6019, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 6.107777349404974e-06, | |
| "loss": 3.3959, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 5.299652771335096e-06, | |
| "loss": 3.2737, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 4.546709379217107e-06, | |
| "loss": 3.5271, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.849603540845984e-06, | |
| "loss": 3.1793, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.2089429484158363e-06, | |
| "loss": 3.5368, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 2.625286088772771e-06, | |
| "loss": 3.4441, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 2.0991417565619363e-06, | |
| "loss": 3.3483, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 1.6309686106928596e-06, | |
| "loss": 3.5513, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.221174774510045e-06, | |
| "loss": 3.3951, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 8.701174800172293e-07, | |
| "loss": 3.5312, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 5.78102756465344e-07, | |
| "loss": 3.4318, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 3.45385163575888e-07, | |
| "loss": 3.4111, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 1.7216756963205766e-07, | |
| "loss": 3.2725, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 5.860097463115006e-08, | |
| "loss": 3.6447, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 4.784378652498966e-09, | |
| "loss": 3.4154, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 3.426886558532715, | |
| "eval_runtime": 40.4727, | |
| "eval_samples_per_second": 22.04, | |
| "eval_steps_per_second": 2.767, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 2.200254683906266e-07, | |
| "loss": 3.4087, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 9.09574339006257e-08, | |
| "loss": 3.3394, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 1.797008773112274e-08, | |
| "loss": 3.413, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 3.219294786453247, | |
| "eval_runtime": 34.625, | |
| "eval_samples_per_second": 21.863, | |
| "eval_steps_per_second": 2.744, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 1.123176456552244e-09, | |
| "loss": 3.4155, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 4.0430490758340044e-08, | |
| "loss": 3.3359, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 1.3585985413598608e-07, | |
| "loss": 3.4372, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 2.8733314924638363e-07, | |
| "loss": 3.5177, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 4.947263818497525e-07, | |
| "loss": 3.3014, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 7.578697823098032e-07, | |
| "loss": 3.4521, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 1.0765479445652215e-06, | |
| "loss": 3.263, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 1.4505000024584544e-06, | |
| "loss": 3.3575, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 1.8794198432776367e-06, | |
| "loss": 3.5385, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 2.362956358336902e-06, | |
| "loss": 3.2621, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 2.900713730389673e-06, | |
| "loss": 3.3471, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 3.4922517576400357e-06, | |
| "loss": 3.3317, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 4.137086214086682e-06, | |
| "loss": 3.3101, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 4.834689245904529e-06, | |
| "loss": 3.5044, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 5.584489803539656e-06, | |
| "loss": 3.2145, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 6.385874109163592e-06, | |
| "loss": 3.088, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 7.238186159104615e-06, | |
| "loss": 3.2756, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 8.140728260844604e-06, | |
| "loss": 3.3539, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 9.09276160414173e-06, | |
| "loss": 3.3302, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 1.0093506865811846e-05, | |
| "loss": 3.3183, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 1.1142144847673131e-05, | |
| "loss": 3.3733, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 1.2237817147132003e-05, | |
| "loss": 3.3992, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 1.3379626859861349e-05, | |
| "loss": 3.4621, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 1.4566639313995615e-05, | |
| "loss": 3.4193, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 1.5797882835242138e-05, | |
| "loss": 3.4483, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 1.7072349542282338e-05, | |
| "loss": 3.1321, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 1.8388996171811105e-05, | |
| "loss": 3.5093, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 1.9746744932540055e-05, | |
| "loss": 3.3516, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 2.1144484387464373e-05, | |
| "loss": 3.3896, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 2.258107036367215e-05, | |
| "loss": 3.4337, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 2.4055326888950756e-05, | |
| "loss": 3.3167, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 2.5566047154423428e-05, | |
| "loss": 3.3134, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 2.711199450242914e-05, | |
| "loss": 3.2474, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 2.8691903438835377e-05, | |
| "loss": 3.234, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 3.030448066895703e-05, | |
| "loss": 3.3668, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 3.194840615623184e-05, | |
| "loss": 3.4867, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 3.36223342027866e-05, | |
| "loss": 3.3089, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 3.53248945510095e-05, | |
| "loss": 3.273, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 3.7054693505226934e-05, | |
| "loss": 3.3763, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 3.8810315072565846e-05, | |
| "loss": 3.4708, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 4.0590322122068896e-05, | |
| "loss": 3.3732, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 4.239325756111289e-05, | |
| "loss": 3.3236, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 4.421764552816718e-05, | |
| "loss": 3.3189, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 4.6061992600916735e-05, | |
| "loss": 3.3587, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 4.792478901875958e-05, | |
| "loss": 3.3553, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 4.980450991867903e-05, | |
| "loss": 3.3437, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 5.1699616583478625e-05, | |
| "loss": 3.113, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 5.360855770135732e-05, | |
| "loss": 3.5307, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 5.5529770635795374e-05, | |
| "loss": 3.277, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 5.746168270470958e-05, | |
| "loss": 3.4986, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 5.940271246783254e-05, | |
| "loss": 3.0947, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 6.13512710212607e-05, | |
| "loss": 3.453, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 6.330576329811206e-05, | |
| "loss": 3.2989, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 6.526458937422915e-05, | |
| "loss": 3.2466, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 6.722614577785811e-05, | |
| "loss": 3.3078, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 6.918882680223113e-05, | |
| "loss": 3.3115, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 7.115102581997948e-05, | |
| "loss": 3.401, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 7.311113659829913e-05, | |
| "loss": 3.388, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 7.506755461379409e-05, | |
| "loss": 3.1893, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 7.701867836592025e-05, | |
| "loss": 3.1964, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 7.896291068795451e-05, | |
| "loss": 3.1817, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 8.089866005441645e-05, | |
| "loss": 3.4222, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 8.28243418838726e-05, | |
| "loss": 3.3935, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 8.473837983605534e-05, | |
| "loss": 3.317, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 8.663920710223691e-05, | |
| "loss": 3.3184, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 8.852526768780001e-05, | |
| "loss": 3.3909, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 9.039501768595671e-05, | |
| "loss": 3.3281, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 9.224692654157222e-05, | |
| "loss": 3.4081, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 9.407947830405896e-05, | |
| "loss": 3.3443, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 9.589117286831579e-05, | |
| "loss": 3.2831, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 9.768052720269644e-05, | |
| "loss": 3.3635, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 9.944607656300112e-05, | |
| "loss": 3.3023, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 0.0001011863756914993, | |
| "loss": 3.3958, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 0.00010290000000000001, | |
| "loss": 3.439, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 0.00010458554673600303, | |
| "loss": 3.3205, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 0.00010624163613097559, | |
| "loss": 3.3548, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 0.00010786691252981412, | |
| "loss": 3.402, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 0.00010946004550056765, | |
| "loss": 3.1367, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 0.00011101973092351368, | |
| "loss": 3.3214, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 0.0001125446920586948, | |
| "loss": 3.4555, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 0.0001140336805910432, | |
| "loss": 3.1367, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 0.00011548547765223617, | |
| "loss": 3.391, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 0.00011689889481844766, | |
| "loss": 3.0626, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 0.00011827277508317747, | |
| "loss": 3.4176, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 0.00011960599380436336, | |
| "loss": 3.2121, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 0.00012089745962499954, | |
| "loss": 3.231, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 0.00012214611536650856, | |
| "loss": 3.6329, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 0.000123350938894135, | |
| "loss": 3.1573, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 0.00012451094395365327, | |
| "loss": 3.3721, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 0.00012562518097870298, | |
| "loss": 3.3447, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 0.0001266927378680932, | |
| "loss": 3.3603, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 0.00012771274073243748, | |
| "loss": 3.1145, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 0.00012868435460950916, | |
| "loss": 3.2607, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 0.00012960678414773185, | |
| "loss": 3.3824, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 0.00013047927425724446, | |
| "loss": 3.4136, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 0.000131301110728009, | |
| "loss": 3.386, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 0.00013207162081445436, | |
| "loss": 3.2408, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 0.00013279017378617784, | |
| "loss": 3.2928, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 0.00013345618144425364, | |
| "loss": 3.16, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 0.00013406909860272533, | |
| "loss": 3.444, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 0.00013462842353488877, | |
| "loss": 3.5022, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 0.0001351336983839996, | |
| "loss": 3.4006, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 0.00013558450953806924, | |
| "loss": 3.436, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 0.00013598048796844284, | |
| "loss": 3.3172, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 0.0001363213095318818, | |
| "loss": 3.4138, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 0.00013660669523590338, | |
| "loss": 3.3525, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 0.00013683641146716082, | |
| "loss": 3.5252, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 0.00013701027018267633, | |
| "loss": 3.264, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 0.00013712812906377096, | |
| "loss": 3.262, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 0.00013718989163256484, | |
| "loss": 3.1863, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 3.2069807052612305, | |
| "eval_runtime": 34.3846, | |
| "eval_samples_per_second": 22.016, | |
| "eval_steps_per_second": 2.763, | |
| "step": 1098 | |
| } | |
| ], | |
| "max_steps": 1098, | |
| "num_train_epochs": 2, | |
| "total_flos": 1146157498368000.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |