| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 15.988480921526278, | |
| "eval_steps": 500, | |
| "global_step": 16656, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 1.9399615754082615e-05, | |
| "loss": 1.8112, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.879923150816523e-05, | |
| "loss": 1.6328, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_f1": 0.9029, | |
| "eval_gen_len": 19.87818181818182, | |
| "eval_loss": 1.4800708293914795, | |
| "eval_precision": 0.9134, | |
| "eval_recall": 0.893, | |
| "eval_rouge1": 0.448, | |
| "eval_rouge2": 0.2243, | |
| "eval_rougeL": 0.385, | |
| "eval_rougeLsum": 0.385, | |
| "eval_runtime": 603.3554, | |
| "eval_samples_per_second": 9.116, | |
| "eval_steps_per_second": 0.57, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 1.819884726224784e-05, | |
| "loss": 1.4991, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 1.7598463016330453e-05, | |
| "loss": 1.4598, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_f1": 0.9022, | |
| "eval_gen_len": 19.934363636363635, | |
| "eval_loss": 1.405110478401184, | |
| "eval_precision": 0.9147, | |
| "eval_recall": 0.8903, | |
| "eval_rouge1": 0.4428, | |
| "eval_rouge2": 0.2273, | |
| "eval_rougeL": 0.3851, | |
| "eval_rougeLsum": 0.385, | |
| "eval_runtime": 669.8531, | |
| "eval_samples_per_second": 8.211, | |
| "eval_steps_per_second": 0.514, | |
| "step": 2083 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 1.6998078770413066e-05, | |
| "loss": 1.3652, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 1.6397694524495677e-05, | |
| "loss": 1.3402, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_f1": 0.9034, | |
| "eval_gen_len": 19.95, | |
| "eval_loss": 1.3839877843856812, | |
| "eval_precision": 0.9158, | |
| "eval_recall": 0.8918, | |
| "eval_rouge1": 0.4498, | |
| "eval_rouge2": 0.2318, | |
| "eval_rougeL": 0.3921, | |
| "eval_rougeLsum": 0.392, | |
| "eval_runtime": 670.3562, | |
| "eval_samples_per_second": 8.205, | |
| "eval_steps_per_second": 0.513, | |
| "step": 3125 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "learning_rate": 1.579731027857829e-05, | |
| "loss": 1.2679, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "learning_rate": 1.5196926032660904e-05, | |
| "loss": 1.2446, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_f1": 0.9054, | |
| "eval_gen_len": 19.884, | |
| "eval_loss": 1.3682185411453247, | |
| "eval_precision": 0.9169, | |
| "eval_recall": 0.8944, | |
| "eval_rouge1": 0.4604, | |
| "eval_rouge2": 0.2405, | |
| "eval_rougeL": 0.4014, | |
| "eval_rougeLsum": 0.4014, | |
| "eval_runtime": 577.6339, | |
| "eval_samples_per_second": 9.522, | |
| "eval_steps_per_second": 0.596, | |
| "step": 4167 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "learning_rate": 1.4596541786743516e-05, | |
| "loss": 1.1877, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "learning_rate": 1.399615754082613e-05, | |
| "loss": 1.1651, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_f1": 0.9055, | |
| "eval_gen_len": 19.894, | |
| "eval_loss": 1.3695330619812012, | |
| "eval_precision": 0.9173, | |
| "eval_recall": 0.8942, | |
| "eval_rouge1": 0.4594, | |
| "eval_rouge2": 0.2401, | |
| "eval_rougeL": 0.3995, | |
| "eval_rougeLsum": 0.3995, | |
| "eval_runtime": 669.362, | |
| "eval_samples_per_second": 8.217, | |
| "eval_steps_per_second": 0.514, | |
| "step": 5208 | |
| }, | |
| { | |
| "epoch": 5.28, | |
| "learning_rate": 1.3395773294908743e-05, | |
| "loss": 1.1201, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 5.76, | |
| "learning_rate": 1.2795389048991355e-05, | |
| "loss": 1.1002, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_f1": 0.9053, | |
| "eval_gen_len": 19.91181818181818, | |
| "eval_loss": 1.3782570362091064, | |
| "eval_precision": 0.9166, | |
| "eval_recall": 0.8945, | |
| "eval_rouge1": 0.4607, | |
| "eval_rouge2": 0.2423, | |
| "eval_rougeL": 0.4014, | |
| "eval_rougeLsum": 0.4014, | |
| "eval_runtime": 671.1543, | |
| "eval_samples_per_second": 8.195, | |
| "eval_steps_per_second": 0.513, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 6.24, | |
| "learning_rate": 1.2195004803073969e-05, | |
| "loss": 1.0653, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 6.72, | |
| "learning_rate": 1.1594620557156582e-05, | |
| "loss": 1.0427, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_f1": 0.9056, | |
| "eval_gen_len": 19.907454545454545, | |
| "eval_loss": 1.3850913047790527, | |
| "eval_precision": 0.9172, | |
| "eval_recall": 0.8946, | |
| "eval_rouge1": 0.462, | |
| "eval_rouge2": 0.2432, | |
| "eval_rougeL": 0.4028, | |
| "eval_rougeLsum": 0.4028, | |
| "eval_runtime": 669.8936, | |
| "eval_samples_per_second": 8.21, | |
| "eval_steps_per_second": 0.514, | |
| "step": 7292 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "learning_rate": 1.0994236311239194e-05, | |
| "loss": 1.0163, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 7.68, | |
| "learning_rate": 1.0393852065321808e-05, | |
| "loss": 0.9881, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_f1": 0.9059, | |
| "eval_gen_len": 19.907090909090908, | |
| "eval_loss": 1.3910883665084839, | |
| "eval_precision": 0.9177, | |
| "eval_recall": 0.8947, | |
| "eval_rouge1": 0.4635, | |
| "eval_rouge2": 0.2442, | |
| "eval_rougeL": 0.4038, | |
| "eval_rougeLsum": 0.4037, | |
| "eval_runtime": 573.3321, | |
| "eval_samples_per_second": 9.593, | |
| "eval_steps_per_second": 0.6, | |
| "step": 8334 | |
| }, | |
| { | |
| "epoch": 8.16, | |
| "learning_rate": 9.79346781940442e-06, | |
| "loss": 0.9742, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 8.64, | |
| "learning_rate": 9.193083573487034e-06, | |
| "loss": 0.9435, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_f1": 0.9067, | |
| "eval_gen_len": 19.880545454545455, | |
| "eval_loss": 1.4075220823287964, | |
| "eval_precision": 0.918, | |
| "eval_recall": 0.8959, | |
| "eval_rouge1": 0.468, | |
| "eval_rouge2": 0.2471, | |
| "eval_rougeL": 0.4085, | |
| "eval_rougeLsum": 0.4084, | |
| "eval_runtime": 599.9366, | |
| "eval_samples_per_second": 9.168, | |
| "eval_steps_per_second": 0.573, | |
| "step": 9375 | |
| }, | |
| { | |
| "epoch": 9.12, | |
| "learning_rate": 8.592699327569645e-06, | |
| "loss": 0.9362, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 9.6, | |
| "learning_rate": 7.992315081652257e-06, | |
| "loss": 0.9035, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_f1": 0.9064, | |
| "eval_gen_len": 19.881090909090908, | |
| "eval_loss": 1.412468671798706, | |
| "eval_precision": 0.9178, | |
| "eval_recall": 0.8957, | |
| "eval_rouge1": 0.4675, | |
| "eval_rouge2": 0.248, | |
| "eval_rougeL": 0.4085, | |
| "eval_rougeLsum": 0.4086, | |
| "eval_runtime": 566.1377, | |
| "eval_samples_per_second": 9.715, | |
| "eval_steps_per_second": 0.608, | |
| "step": 10417 | |
| }, | |
| { | |
| "epoch": 10.08, | |
| "learning_rate": 7.391930835734871e-06, | |
| "loss": 0.9014, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 10.56, | |
| "learning_rate": 6.791546589817484e-06, | |
| "loss": 0.8702, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_f1": 0.9063, | |
| "eval_gen_len": 19.894727272727273, | |
| "eval_loss": 1.4218909740447998, | |
| "eval_precision": 0.9181, | |
| "eval_recall": 0.895, | |
| "eval_rouge1": 0.4646, | |
| "eval_rouge2": 0.2455, | |
| "eval_rougeL": 0.405, | |
| "eval_rougeLsum": 0.4051, | |
| "eval_runtime": 670.3799, | |
| "eval_samples_per_second": 8.204, | |
| "eval_steps_per_second": 0.513, | |
| "step": 11459 | |
| }, | |
| { | |
| "epoch": 11.04, | |
| "learning_rate": 6.191162343900097e-06, | |
| "loss": 0.8741, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 11.52, | |
| "learning_rate": 5.590778097982709e-06, | |
| "loss": 0.8395, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "learning_rate": 4.990393852065322e-06, | |
| "loss": 0.8458, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_f1": 0.9061, | |
| "eval_gen_len": 19.898545454545456, | |
| "eval_loss": 1.4338867664337158, | |
| "eval_precision": 0.9177, | |
| "eval_recall": 0.8952, | |
| "eval_rouge1": 0.4643, | |
| "eval_rouge2": 0.2447, | |
| "eval_rougeL": 0.4055, | |
| "eval_rougeLsum": 0.4055, | |
| "eval_runtime": 670.6829, | |
| "eval_samples_per_second": 8.201, | |
| "eval_steps_per_second": 0.513, | |
| "step": 12501 | |
| }, | |
| { | |
| "epoch": 12.48, | |
| "learning_rate": 4.390009606147935e-06, | |
| "loss": 0.8172, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 12.96, | |
| "learning_rate": 3.7896253602305477e-06, | |
| "loss": 0.8207, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_f1": 0.9064, | |
| "eval_gen_len": 19.905272727272727, | |
| "eval_loss": 1.44303560256958, | |
| "eval_precision": 0.9182, | |
| "eval_recall": 0.8952, | |
| "eval_rouge1": 0.4671, | |
| "eval_rouge2": 0.2463, | |
| "eval_rougeL": 0.4068, | |
| "eval_rougeLsum": 0.4069, | |
| "eval_runtime": 650.7057, | |
| "eval_samples_per_second": 8.452, | |
| "eval_steps_per_second": 0.529, | |
| "step": 13542 | |
| }, | |
| { | |
| "epoch": 13.44, | |
| "learning_rate": 3.189241114313161e-06, | |
| "loss": 0.8006, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 13.92, | |
| "learning_rate": 2.5888568683957737e-06, | |
| "loss": 0.7987, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_f1": 0.9059, | |
| "eval_gen_len": 19.918, | |
| "eval_loss": 1.449475646018982, | |
| "eval_precision": 0.9179, | |
| "eval_recall": 0.8944, | |
| "eval_rouge1": 0.4633, | |
| "eval_rouge2": 0.2455, | |
| "eval_rougeL": 0.4046, | |
| "eval_rougeLsum": 0.4047, | |
| "eval_runtime": 661.0314, | |
| "eval_samples_per_second": 8.32, | |
| "eval_steps_per_second": 0.52, | |
| "step": 14584 | |
| }, | |
| { | |
| "epoch": 14.4, | |
| "learning_rate": 1.988472622478386e-06, | |
| "loss": 0.7843, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 14.88, | |
| "learning_rate": 1.3880883765609993e-06, | |
| "loss": 0.787, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_f1": 0.9064, | |
| "eval_gen_len": 19.895636363636363, | |
| "eval_loss": 1.4560260772705078, | |
| "eval_precision": 0.9182, | |
| "eval_recall": 0.8953, | |
| "eval_rouge1": 0.4666, | |
| "eval_rouge2": 0.2471, | |
| "eval_rougeL": 0.407, | |
| "eval_rougeLsum": 0.4072, | |
| "eval_runtime": 670.9962, | |
| "eval_samples_per_second": 8.197, | |
| "eval_steps_per_second": 0.513, | |
| "step": 15626 | |
| }, | |
| { | |
| "epoch": 15.36, | |
| "learning_rate": 7.87704130643612e-07, | |
| "loss": 0.7775, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 15.84, | |
| "learning_rate": 1.8731988472622478e-07, | |
| "loss": 0.772, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 15.99, | |
| "eval_f1": 0.9068, | |
| "eval_gen_len": 19.881636363636364, | |
| "eval_loss": 1.4622657299041748, | |
| "eval_precision": 0.9185, | |
| "eval_recall": 0.8957, | |
| "eval_rouge1": 0.4678, | |
| "eval_rouge2": 0.2472, | |
| "eval_rougeL": 0.4081, | |
| "eval_rougeLsum": 0.4082, | |
| "eval_runtime": 669.6134, | |
| "eval_samples_per_second": 8.214, | |
| "eval_steps_per_second": 0.514, | |
| "step": 16656 | |
| }, | |
| { | |
| "epoch": 15.99, | |
| "step": 16656, | |
| "total_flos": 3.421567656204632e+18, | |
| "train_loss": 1.050457198154915, | |
| "train_runtime": 71670.9422, | |
| "train_samples_per_second": 22.324, | |
| "train_steps_per_second": 0.232 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 16656, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 16, | |
| "save_steps": 500, | |
| "total_flos": 3.421567656204632e+18, | |
| "train_batch_size": 24, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |