| { | |
| "best_metric": 1.0933133363723755, | |
| "best_model_checkpoint": "output/coldplay/checkpoint-550", | |
| "epoch": 11.0, | |
| "global_step": 550, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00013355940688336427, | |
| "loss": 3.2771, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00012302403914397873, | |
| "loss": 2.8643, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00010671211798514472, | |
| "loss": 2.6223, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 8.635498649403293e-05, | |
| "loss": 2.9051, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 6.411334533481218e-05, | |
| "loss": 2.8302, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.2347916539754844e-05, | |
| "loss": 2.8412, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 2.3368877084135277e-05, | |
| "loss": 2.7828, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 9.190657300387505e-06, | |
| "loss": 2.6878, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.3181297643383925e-06, | |
| "loss": 2.57, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 2.627840757369995, | |
| "eval_runtime": 3.4104, | |
| "eval_samples_per_second": 21.405, | |
| "eval_steps_per_second": 2.932, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 1.3746270344901413e-06, | |
| "loss": 2.7251, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 9.576451662754438e-06, | |
| "loss": 2.4207, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 2.4309929383066146e-05, | |
| "loss": 2.7058, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 4.3944626783346644e-05, | |
| "loss": 2.6908, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 6.630773257727353e-05, | |
| "loss": 2.7345, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 8.892450484875447e-05, | |
| "loss": 2.4105, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 0.00010929213048843373, | |
| "loss": 2.5926, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 0.00012515669103944476, | |
| "loss": 2.4909, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 0.00013476258540873022, | |
| "loss": 2.5389, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 2.446333646774292, | |
| "eval_runtime": 1.0636, | |
| "eval_samples_per_second": 76.156, | |
| "eval_steps_per_second": 10.342, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 0.00013704680787354832, | |
| "loss": 2.3598, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 0.00013175658222600302, | |
| "loss": 2.2124, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 0.00011947733444744994, | |
| "loss": 2.1658, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 0.0001015679084058065, | |
| "loss": 2.1519, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 8.001019372440279e-05, | |
| "loss": 2.243, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 5.718980627559731e-05, | |
| "loss": 2.2952, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 3.563209159419354e-05, | |
| "loss": 2.2394, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 1.772266555255008e-05, | |
| "loss": 2.2106, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 5.443417773996978e-06, | |
| "loss": 2.2222, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 1.5319212645169297e-07, | |
| "loss": 2.1897, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 2.3795111179351807, | |
| "eval_runtime": 1.0634, | |
| "eval_samples_per_second": 76.169, | |
| "eval_steps_per_second": 10.344, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 3.09, | |
| "learning_rate": 2.4374145912697595e-06, | |
| "loss": 2.0277, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "learning_rate": 1.204330896055522e-05, | |
| "loss": 2.0337, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 3.3, | |
| "learning_rate": 2.790786951156628e-05, | |
| "loss": 2.0818, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "learning_rate": 4.8275495151245426e-05, | |
| "loss": 2.1041, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 3.51, | |
| "learning_rate": 7.089226742272638e-05, | |
| "loss": 2.1708, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "learning_rate": 9.325537321665337e-05, | |
| "loss": 2.0552, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 3.72, | |
| "learning_rate": 0.00011289007061693382, | |
| "loss": 2.1205, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "learning_rate": 0.00012762354833724553, | |
| "loss": 1.9704, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 3.94, | |
| "learning_rate": 0.00013582537296550986, | |
| "loss": 1.9882, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 2.3495991230010986, | |
| "eval_runtime": 1.0619, | |
| "eval_samples_per_second": 76.281, | |
| "eval_steps_per_second": 10.359, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "learning_rate": 0.0001365879156874179, | |
| "loss": 2.1933, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 4.15, | |
| "learning_rate": 0.00012982679213998792, | |
| "loss": 1.8172, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 4.26, | |
| "learning_rate": 0.00011629020053848047, | |
| "loss": 1.6238, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 4.36, | |
| "learning_rate": 9.747612470258382e-05, | |
| "loss": 1.8408, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 4.47, | |
| "learning_rate": 7.546656444541333e-05, | |
| "loss": 1.8867, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "learning_rate": 5.2697137691647635e-05, | |
| "loss": 2.0636, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 4.68, | |
| "learning_rate": 3.1687550572992616e-05, | |
| "loss": 1.8887, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 4.79, | |
| "learning_rate": 1.4762762169883855e-05, | |
| "loss": 1.9152, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 4.89, | |
| "learning_rate": 3.795700315696817e-06, | |
| "loss": 1.8236, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 0.0, | |
| "loss": 1.6923, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_loss": 2.332808017730713, | |
| "eval_runtime": 1.0629, | |
| "eval_samples_per_second": 76.208, | |
| "eval_steps_per_second": 10.349, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 5.11, | |
| "learning_rate": 3.7957003156967485e-06, | |
| "loss": 1.6147, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 5.21, | |
| "learning_rate": 1.4762762169883802e-05, | |
| "loss": 1.7048, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 5.32, | |
| "learning_rate": 3.168755057299255e-05, | |
| "loss": 1.5544, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 5.43, | |
| "learning_rate": 5.269713769164743e-05, | |
| "loss": 1.5129, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 5.53, | |
| "learning_rate": 7.546656444541325e-05, | |
| "loss": 1.7164, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 5.64, | |
| "learning_rate": 9.747612470258363e-05, | |
| "loss": 1.6792, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 5.74, | |
| "learning_rate": 0.0001162902005384805, | |
| "loss": 1.7173, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 5.85, | |
| "learning_rate": 0.00012982679213998787, | |
| "loss": 1.7795, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 5.96, | |
| "learning_rate": 0.00013658791568741792, | |
| "loss": 1.9717, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_loss": 2.336082935333252, | |
| "eval_runtime": 1.0719, | |
| "eval_samples_per_second": 75.567, | |
| "eval_steps_per_second": 10.262, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 6.06, | |
| "learning_rate": 0.00013582537296550986, | |
| "loss": 1.6967, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 6.17, | |
| "learning_rate": 0.00012762354833724559, | |
| "loss": 1.4993, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 6.28, | |
| "learning_rate": 0.0001128900706169339, | |
| "loss": 1.3665, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 6.38, | |
| "learning_rate": 9.325537321665346e-05, | |
| "loss": 1.5098, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 6.49, | |
| "learning_rate": 7.089226742272658e-05, | |
| "loss": 1.6949, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 6.6, | |
| "learning_rate": 4.827549515124539e-05, | |
| "loss": 1.5465, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 6.7, | |
| "learning_rate": 2.7907869511566348e-05, | |
| "loss": 1.506, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 6.81, | |
| "learning_rate": 1.2043308960555334e-05, | |
| "loss": 1.6257, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 6.91, | |
| "learning_rate": 2.437414591269752e-06, | |
| "loss": 1.3859, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_loss": 2.334944009780884, | |
| "eval_runtime": 1.0709, | |
| "eval_samples_per_second": 75.638, | |
| "eval_steps_per_second": 10.272, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 7.02, | |
| "learning_rate": 1.5319212645167772e-07, | |
| "loss": 1.5912, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 7.13, | |
| "learning_rate": 5.443417773996994e-06, | |
| "loss": 1.4726, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 7.23, | |
| "learning_rate": 1.772266555255011e-05, | |
| "loss": 1.2431, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 7.34, | |
| "learning_rate": 3.563209159419346e-05, | |
| "loss": 1.3668, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 7.45, | |
| "learning_rate": 5.718980627559723e-05, | |
| "loss": 1.3476, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 7.55, | |
| "learning_rate": 8.001019372440265e-05, | |
| "loss": 1.2561, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 7.66, | |
| "learning_rate": 0.00010156790840580641, | |
| "loss": 1.6616, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 7.77, | |
| "learning_rate": 0.0001194773344474498, | |
| "loss": 1.4607, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 7.87, | |
| "learning_rate": 0.00013175658222600294, | |
| "loss": 1.3461, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 7.98, | |
| "learning_rate": 0.00013704680787354832, | |
| "loss": 1.3842, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_loss": 2.3194692134857178, | |
| "eval_runtime": 1.0661, | |
| "eval_samples_per_second": 75.98, | |
| "eval_steps_per_second": 10.318, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 7.6, | |
| "learning_rate": 8.979856581412133e-05, | |
| "loss": 1.3418, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 7.7, | |
| "learning_rate": 0.0001089220683072637, | |
| "loss": 1.3258, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 7.8, | |
| "learning_rate": 0.00012409856581412136, | |
| "loss": 1.67, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 7.9, | |
| "learning_rate": 0.00013384247701784757, | |
| "loss": 1.6416, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 0.0001372, | |
| "loss": 1.5041, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_loss": 1.550144910812378, | |
| "eval_runtime": 2.6997, | |
| "eval_samples_per_second": 22.225, | |
| "eval_steps_per_second": 2.963, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 8.1, | |
| "learning_rate": 0.0001338424770178476, | |
| "loss": 1.3775, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 8.2, | |
| "learning_rate": 0.00012409856581412158, | |
| "loss": 1.3052, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 8.3, | |
| "learning_rate": 0.00010892206830726361, | |
| "loss": 1.4469, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 8.4, | |
| "learning_rate": 8.979856581412145e-05, | |
| "loss": 1.5571, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 8.5, | |
| "learning_rate": 6.859999999999995e-05, | |
| "loss": 1.3572, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 8.6, | |
| "learning_rate": 4.740143418587868e-05, | |
| "loss": 1.5739, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 8.7, | |
| "learning_rate": 2.8277931692736505e-05, | |
| "loss": 1.4984, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 8.8, | |
| "learning_rate": 1.3101434185878506e-05, | |
| "loss": 1.4842, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 8.9, | |
| "learning_rate": 3.3575229821524526e-06, | |
| "loss": 1.3847, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "learning_rate": 0.0, | |
| "loss": 1.3742, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_loss": 1.5336766242980957, | |
| "eval_runtime": 2.6931, | |
| "eval_samples_per_second": 22.279, | |
| "eval_steps_per_second": 2.971, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 9.48, | |
| "learning_rate": 6.411334533481208e-05, | |
| "loss": 1.183, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 9.58, | |
| "learning_rate": 8.635498649403306e-05, | |
| "loss": 1.514, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 9.69, | |
| "learning_rate": 0.00010671211798514463, | |
| "loss": 1.3301, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 9.79, | |
| "learning_rate": 0.0001230240391439787, | |
| "loss": 1.5153, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 9.9, | |
| "learning_rate": 0.0001335594068833643, | |
| "loss": 1.3648, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 0.0001372, | |
| "loss": 1.3593, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_loss": 1.2220196723937988, | |
| "eval_runtime": 3.1642, | |
| "eval_samples_per_second": 23.386, | |
| "eval_steps_per_second": 3.16, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 10.1, | |
| "learning_rate": 0.00013355940688336435, | |
| "loss": 1.2865, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 10.21, | |
| "learning_rate": 0.00012302403914397878, | |
| "loss": 1.315, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 10.31, | |
| "learning_rate": 0.00010671211798514499, | |
| "loss": 1.237, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 10.42, | |
| "learning_rate": 8.635498649403298e-05, | |
| "loss": 1.1549, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 10.52, | |
| "learning_rate": 6.4113345334812e-05, | |
| "loss": 1.3219, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 10.62, | |
| "learning_rate": 4.23479165397549e-05, | |
| "loss": 1.2553, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 10.73, | |
| "learning_rate": 2.3368877084135498e-05, | |
| "loss": 1.1603, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 10.83, | |
| "learning_rate": 9.190657300387535e-06, | |
| "loss": 1.2085, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 10.94, | |
| "learning_rate": 1.3181297643384459e-06, | |
| "loss": 1.2453, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_loss": 1.2095028162002563, | |
| "eval_runtime": 3.297, | |
| "eval_samples_per_second": 22.445, | |
| "eval_steps_per_second": 3.033, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 10.6, | |
| "learning_rate": 4.7401434185878923e-05, | |
| "loss": 1.0609, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 10.7, | |
| "learning_rate": 2.8277931692736518e-05, | |
| "loss": 1.2072, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 10.8, | |
| "learning_rate": 1.3101434185878659e-05, | |
| "loss": 1.2221, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 10.9, | |
| "learning_rate": 3.3575229821524526e-06, | |
| "loss": 1.0503, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "learning_rate": 0.0, | |
| "loss": 1.0194, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_loss": 1.0933133363723755, | |
| "eval_runtime": 1.2839, | |
| "eval_samples_per_second": 43.616, | |
| "eval_steps_per_second": 5.452, | |
| "step": 550 | |
| } | |
| ], | |
| "max_steps": 600, | |
| "num_train_epochs": 12, | |
| "total_flos": 572229550080000.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |