| { | |
| "best_metric": 1.948645830154419, | |
| "best_model_checkpoint": "output/hyuna/checkpoint-290", | |
| "epoch": 5.0, | |
| "global_step": 290, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00013469952948681868, | |
| "loss": 3.106, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0001273804022850966, | |
| "loss": 2.6073, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00011577618287734484, | |
| "loss": 2.406, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00010073281903200561, | |
| "loss": 2.6274, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 8.334697219847626e-05, | |
| "loss": 2.4792, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 6.488607087104036e-05, | |
| "loss": 2.3865, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.6695915032671784e-05, | |
| "loss": 2.3743, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 3.0102567316140575e-05, | |
| "loss": 2.5462, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.6315683018244145e-05, | |
| "loss": 2.3818, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 6.340326210572357e-06, | |
| "loss": 2.3368, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 9.037005536513067e-07, | |
| "loss": 2.371, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 2.4857912063598633, | |
| "eval_runtime": 0.9931, | |
| "eval_samples_per_second": 75.519, | |
| "eval_steps_per_second": 10.069, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 4.0213613921093164e-07, | |
| "loss": 2.3666, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 4.8721970205680935e-06, | |
| "loss": 2.4303, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 1.3988015692592823e-05, | |
| "loss": 2.3856, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 2.708504883770769e-05, | |
| "loss": 2.1811, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 4.320852254368187e-05, | |
| "loss": 2.4049, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 6.118303533611755e-05, | |
| "loss": 2.2865, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 7.969824496351964e-05, | |
| "loss": 2.1265, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 9.740439236703416e-05, | |
| "loss": 2.1689, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 0.00011301069913603334, | |
| "loss": 2.4769, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 0.00012537946527356269, | |
| "loss": 2.282, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 0.00013360900754314024, | |
| "loss": 2.2972, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 0.0001370993921901871, | |
| "loss": 2.3702, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 2.0102086067199707, | |
| "eval_runtime": 1.0576, | |
| "eval_samples_per_second": 75.643, | |
| "eval_steps_per_second": 9.455, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 0.00013559617012171197, | |
| "loss": 2.0597, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 0.00012920892624899717, | |
| "loss": 2.1937, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 0.00011840329074592684, | |
| "loss": 2.1953, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 0.00010396699460234374, | |
| "loss": 2.0115, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 8.69524440231046e-05, | |
| "loss": 2.0135, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 6.860000000000001e-05, | |
| "loss": 1.9796, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 5.024755597689551e-05, | |
| "loss": 2.0685, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 3.3233005397656285e-05, | |
| "loss": 2.075, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 1.8796709254073232e-05, | |
| "loss": 2.0578, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 7.99107375100285e-06, | |
| "loss": 1.9644, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 1.6038298782880706e-06, | |
| "loss": 2.0836, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 1.9711263179779053, | |
| "eval_runtime": 1.056, | |
| "eval_samples_per_second": 75.756, | |
| "eval_steps_per_second": 9.469, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "learning_rate": 1.0060780981290602e-07, | |
| "loss": 2.0553, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "learning_rate": 3.5909924568597365e-06, | |
| "loss": 1.9319, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "learning_rate": 1.182053472643733e-05, | |
| "loss": 2.0587, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "learning_rate": 2.418930086396662e-05, | |
| "loss": 2.1358, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "learning_rate": 3.9795607632965815e-05, | |
| "loss": 1.9452, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "learning_rate": 5.750175503648027e-05, | |
| "loss": 1.9591, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 3.53, | |
| "learning_rate": 7.601696466388229e-05, | |
| "loss": 1.8235, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "learning_rate": 9.39914774563181e-05, | |
| "loss": 1.8749, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 3.71, | |
| "learning_rate": 0.00011011495116229225, | |
| "loss": 2.0178, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "learning_rate": 0.00012321198430740717, | |
| "loss": 1.9038, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "learning_rate": 0.0001323278029794319, | |
| "loss": 2.1059, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 3.97, | |
| "learning_rate": 0.00013679786386078908, | |
| "loss": 1.946, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 1.9748159646987915, | |
| "eval_runtime": 1.0572, | |
| "eval_samples_per_second": 75.673, | |
| "eval_steps_per_second": 9.459, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 4.05, | |
| "learning_rate": 0.0001362962994463487, | |
| "loss": 1.8662, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 4.14, | |
| "learning_rate": 0.00013085967378942767, | |
| "loss": 1.9053, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 4.22, | |
| "learning_rate": 0.00012088431698175582, | |
| "loss": 1.8573, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 4.31, | |
| "learning_rate": 0.00010709743268385941, | |
| "loss": 1.9869, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "learning_rate": 9.050408496732835e-05, | |
| "loss": 1.7389, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "learning_rate": 7.231392912895982e-05, | |
| "loss": 2.0663, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "learning_rate": 5.385302780152384e-05, | |
| "loss": 1.8779, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 4.66, | |
| "learning_rate": 3.646718096799452e-05, | |
| "loss": 1.7718, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 4.74, | |
| "learning_rate": 2.14238171226552e-05, | |
| "loss": 1.7325, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 4.83, | |
| "learning_rate": 9.819597714903422e-06, | |
| "loss": 1.7267, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 4.91, | |
| "learning_rate": 2.5004705131813084e-06, | |
| "loss": 1.8411, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 0.0, | |
| "loss": 1.6194, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_loss": 1.948645830154419, | |
| "eval_runtime": 1.0612, | |
| "eval_samples_per_second": 75.387, | |
| "eval_steps_per_second": 9.423, | |
| "step": 290 | |
| } | |
| ], | |
| "max_steps": 1218, | |
| "num_train_epochs": 21, | |
| "total_flos": 299832606720000.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |