| { | |
| "best_metric": 0.20580817759037018, | |
| "best_model_checkpoint": "autotrain-qtwrd-y9iny/checkpoint-360", | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 360, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 5.555555555555556e-06, | |
| "loss": 1.6662, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.388888888888889e-05, | |
| "loss": 1.2219, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 2.2222222222222223e-05, | |
| "loss": 0.5655, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 3.055555555555556e-05, | |
| "loss": 0.5646, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 3.888888888888889e-05, | |
| "loss": 0.5171, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.722222222222222e-05, | |
| "loss": 0.477, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.938271604938271e-05, | |
| "loss": 0.2809, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.845679012345679e-05, | |
| "loss": 0.4842, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.7530864197530866e-05, | |
| "loss": 0.4148, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.6604938271604945e-05, | |
| "loss": 0.5962, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.567901234567901e-05, | |
| "loss": 0.2418, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.4753086419753084e-05, | |
| "loss": 0.3418, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.3827160493827164e-05, | |
| "loss": 0.263, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.290123456790124e-05, | |
| "loss": 0.3013, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 4.197530864197531e-05, | |
| "loss": 0.1821, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 4.104938271604938e-05, | |
| "loss": 0.2559, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4.012345679012346e-05, | |
| "loss": 0.696, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.9197530864197535e-05, | |
| "loss": 0.4992, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 3.82716049382716e-05, | |
| "loss": 0.3305, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 3.734567901234568e-05, | |
| "loss": 0.1607, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_gen_len": 118.7, | |
| "eval_loss": 0.2715524733066559, | |
| "eval_rouge1": 48.1918, | |
| "eval_rouge2": 37.1255, | |
| "eval_rougeL": 43.0239, | |
| "eval_rougeLsum": 47.9099, | |
| "eval_runtime": 451.4323, | |
| "eval_samples_per_second": 0.266, | |
| "eval_steps_per_second": 0.033, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 3.6419753086419754e-05, | |
| "loss": 0.1584, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 3.5493827160493834e-05, | |
| "loss": 0.2017, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 3.45679012345679e-05, | |
| "loss": 0.3175, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 3.364197530864198e-05, | |
| "loss": 0.348, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 3.271604938271605e-05, | |
| "loss": 0.2901, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 3.1790123456790125e-05, | |
| "loss": 0.1567, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 3.08641975308642e-05, | |
| "loss": 0.236, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 2.993827160493827e-05, | |
| "loss": 0.2284, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 2.9012345679012347e-05, | |
| "loss": 0.2008, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 2.8086419753086424e-05, | |
| "loss": 0.1082, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 2.7160493827160493e-05, | |
| "loss": 0.3563, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 2.623456790123457e-05, | |
| "loss": 0.1463, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 2.5308641975308646e-05, | |
| "loss": 0.1531, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 2.438271604938272e-05, | |
| "loss": 0.1886, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 2.345679012345679e-05, | |
| "loss": 0.1699, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 2.2530864197530865e-05, | |
| "loss": 0.2024, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 2.1604938271604937e-05, | |
| "loss": 0.12, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 2.0679012345679014e-05, | |
| "loss": 0.1441, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 1.9753086419753087e-05, | |
| "loss": 0.1217, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.8827160493827163e-05, | |
| "loss": 0.2564, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_gen_len": 115.3167, | |
| "eval_loss": 0.2248244434595108, | |
| "eval_rouge1": 52.3593, | |
| "eval_rouge2": 39.8513, | |
| "eval_rougeL": 47.1605, | |
| "eval_rougeLsum": 52.1913, | |
| "eval_runtime": 451.0124, | |
| "eval_samples_per_second": 0.266, | |
| "eval_steps_per_second": 0.033, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 1.7901234567901236e-05, | |
| "loss": 0.1657, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 1.697530864197531e-05, | |
| "loss": 0.0853, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 1.604938271604938e-05, | |
| "loss": 0.2708, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 1.5123456790123458e-05, | |
| "loss": 0.0697, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 1.419753086419753e-05, | |
| "loss": 0.2495, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 1.3271604938271605e-05, | |
| "loss": 0.1598, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 1.2345679012345678e-05, | |
| "loss": 0.0863, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 1.1419753086419753e-05, | |
| "loss": 0.067, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 1.0493827160493827e-05, | |
| "loss": 0.1219, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 9.5679012345679e-06, | |
| "loss": 0.1327, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 8.641975308641975e-06, | |
| "loss": 0.0823, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 7.71604938271605e-06, | |
| "loss": 0.0862, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 6.790123456790123e-06, | |
| "loss": 0.0591, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 5.864197530864198e-06, | |
| "loss": 0.0881, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 4.938271604938272e-06, | |
| "loss": 0.0518, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 4.012345679012345e-06, | |
| "loss": 0.0701, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 3.0864197530864196e-06, | |
| "loss": 0.0682, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 2.1604938271604937e-06, | |
| "loss": 0.06, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 1.234567901234568e-06, | |
| "loss": 0.1058, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 3.08641975308642e-07, | |
| "loss": 0.088, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_gen_len": 116.6083, | |
| "eval_loss": 0.20580817759037018, | |
| "eval_rouge1": 54.3657, | |
| "eval_rouge2": 43.8004, | |
| "eval_rougeL": 50.056, | |
| "eval_rougeLsum": 53.9699, | |
| "eval_runtime": 450.9703, | |
| "eval_samples_per_second": 0.266, | |
| "eval_steps_per_second": 0.033, | |
| "step": 360 | |
| } | |
| ], | |
| "logging_steps": 6, | |
| "max_steps": 360, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "total_flos": 3000639907577856.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |