| { | |
| "best_metric": 2.7318851947784424, | |
| "best_model_checkpoint": "output/doja-cat/checkpoint-335", | |
| "epoch": 5.0, | |
| "global_step": 335, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00013532330330528217, | |
| "loss": 3.5814, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00012979589515943672, | |
| "loss": 3.3945, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0001209202032183722, | |
| "loss": 3.258, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00010918185377156764, | |
| "loss": 3.0648, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 9.522310109331633e-05, | |
| "loss": 3.0572, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 7.980768702075116e-05, | |
| "loss": 2.9453, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 6.377905343835293e-05, | |
| "loss": 2.8372, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.8014194038976637e-05, | |
| "loss": 2.8698, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 3.337567032488183e-05, | |
| "loss": 2.9529, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 2.0664417254371463e-05, | |
| "loss": 2.9508, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.0575920734825916e-05, | |
| "loss": 2.9673, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.66216467507622e-06, | |
| "loss": 3.0803, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 3.014296332977907e-07, | |
| "loss": 2.8364, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 2.8956077098846436, | |
| "eval_runtime": 3.3695, | |
| "eval_samples_per_second": 21.368, | |
| "eval_steps_per_second": 2.671, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 6.775955029229377e-07, | |
| "loss": 2.9377, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 4.7700806733338495e-06, | |
| "loss": 2.722, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 1.235496813600306e-05, | |
| "loss": 2.6699, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 2.3017256922096845e-05, | |
| "loss": 2.8672, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 3.6173568544064456e-05, | |
| "loss": 2.812, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 5.110406607666898e-05, | |
| "loss": 2.6143, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 6.699183945081632e-05, | |
| "loss": 2.6499, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 8.296760202575751e-05, | |
| "loss": 2.832, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 9.815725290295059e-05, | |
| "loss": 2.6562, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 0.00011172970264813468, | |
| "loss": 2.8987, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 0.00012294234567600346, | |
| "loss": 2.705, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 0.00013118169131281192, | |
| "loss": 2.5747, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 0.00013599693044126453, | |
| "loss": 2.6942, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 2.8365488052368164, | |
| "eval_runtime": 3.3573, | |
| "eval_samples_per_second": 21.446, | |
| "eval_steps_per_second": 2.681, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 0.0001371246011559198, | |
| "loss": 2.804, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 0.00013450300386777127, | |
| "loss": 2.4535, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 0.00012827557714724304, | |
| "loss": 2.3801, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 0.00011878304959908774, | |
| "loss": 2.5012, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 0.00010654479717298873, | |
| "loss": 2.2923, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 9.223042592950526e-05, | |
| "loss": 2.3414, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 7.662313508728495e-05, | |
| "loss": 2.5064, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 6.0576864912715095e-05, | |
| "loss": 2.5332, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 4.496957407049471e-05, | |
| "loss": 2.4362, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 3.065520282701126e-05, | |
| "loss": 2.4042, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 1.8416950400912332e-05, | |
| "loss": 2.3006, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 8.924422852757e-06, | |
| "loss": 2.3545, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 2.6969961322287634e-06, | |
| "loss": 2.4371, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 7.53988440801922e-08, | |
| "loss": 2.2676, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 2.7877070903778076, | |
| "eval_runtime": 3.3652, | |
| "eval_samples_per_second": 21.396, | |
| "eval_steps_per_second": 2.674, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "learning_rate": 1.2030695587354792e-06, | |
| "loss": 2.4161, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "learning_rate": 6.018308687188092e-06, | |
| "loss": 2.3933, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "learning_rate": 1.4257654323996543e-05, | |
| "loss": 2.2934, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "learning_rate": 2.5470297351865334e-05, | |
| "loss": 2.2132, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "learning_rate": 3.9042747097049484e-05, | |
| "loss": 1.9654, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "learning_rate": 5.423239797424245e-05, | |
| "loss": 2.2451, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 3.51, | |
| "learning_rate": 7.020816054918364e-05, | |
| "loss": 2.36, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "learning_rate": 8.609593392333099e-05, | |
| "loss": 2.3897, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "learning_rate": 0.00010102643145593556, | |
| "loss": 2.1583, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 3.73, | |
| "learning_rate": 0.00011418274307790318, | |
| "loss": 2.3118, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 3.81, | |
| "learning_rate": 0.00012484503186399693, | |
| "loss": 2.2451, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "learning_rate": 0.00013242991932666616, | |
| "loss": 2.131, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "learning_rate": 0.00013652240449707706, | |
| "loss": 2.0297, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 2.8032355308532715, | |
| "eval_runtime": 3.3675, | |
| "eval_samples_per_second": 21.381, | |
| "eval_steps_per_second": 2.673, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "learning_rate": 0.00013689857036670224, | |
| "loss": 1.9968, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 4.1, | |
| "learning_rate": 0.0001335378353249238, | |
| "loss": 2.1578, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 4.18, | |
| "learning_rate": 0.00012662407926517416, | |
| "loss": 2.1444, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "learning_rate": 0.00011653558274562858, | |
| "loss": 1.9796, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 4.33, | |
| "learning_rate": 0.00010382432967511827, | |
| "loss": 1.8756, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "learning_rate": 8.918580596102339e-05, | |
| "loss": 1.9379, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "learning_rate": 7.342094656164722e-05, | |
| "loss": 2.1349, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 4.55, | |
| "learning_rate": 5.739231297924882e-05, | |
| "loss": 2.048, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 4.63, | |
| "learning_rate": 4.197689890668377e-05, | |
| "loss": 1.8569, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "learning_rate": 2.8018146228432437e-05, | |
| "loss": 1.8989, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 4.78, | |
| "learning_rate": 1.6279796781627843e-05, | |
| "loss": 2.258, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 4.85, | |
| "learning_rate": 7.404104840563317e-06, | |
| "loss": 2.0248, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 4.93, | |
| "learning_rate": 1.8766966947178655e-06, | |
| "loss": 1.8796, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 0.0, | |
| "loss": 1.8645, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_loss": 2.7318851947784424, | |
| "eval_runtime": 3.3458, | |
| "eval_samples_per_second": 21.519, | |
| "eval_steps_per_second": 2.69, | |
| "step": 335 | |
| } | |
| ], | |
| "max_steps": 670, | |
| "num_train_epochs": 10, | |
| "total_flos": 348171632640000.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |