| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.28496929455851133, | |
| "eval_steps": 1250, | |
| "global_step": 20000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 4.097230911254883, | |
| "learning_rate": 9.94067861010316e-05, | |
| "loss": 1.3238, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_bertscore": 0.6956291198730469, | |
| "eval_genLen": 82.92592592592592, | |
| "eval_loss": 1.1427265405654907, | |
| "eval_rouge1": 0.5159529451611213, | |
| "eval_rouge2": 0.2838663532543666, | |
| "eval_rougeL": 0.38821121834167605, | |
| "eval_rougeLsum": 0.46501560637668704, | |
| "eval_runtime": 69.1213, | |
| "eval_samples_per_second": 0.781, | |
| "eval_steps_per_second": 0.203, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 3.617356777191162, | |
| "learning_rate": 9.881309725098315e-05, | |
| "loss": 1.2895, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_bertscore": 0.7066090703010559, | |
| "eval_genLen": 96.03703703703704, | |
| "eval_loss": 1.1249319314956665, | |
| "eval_rouge1": 0.5330587830251434, | |
| "eval_rouge2": 0.3099036809054595, | |
| "eval_rougeL": 0.412661355969875, | |
| "eval_rougeLsum": 0.48944550025103617, | |
| "eval_runtime": 75.8923, | |
| "eval_samples_per_second": 0.712, | |
| "eval_steps_per_second": 0.184, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 2.907381772994995, | |
| "learning_rate": 9.821940840093471e-05, | |
| "loss": 1.2834, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_bertscore": 0.7053420543670654, | |
| "eval_genLen": 83.77777777777777, | |
| "eval_loss": 1.105911374092102, | |
| "eval_rouge1": 0.5321256596115037, | |
| "eval_rouge2": 0.31714209563590884, | |
| "eval_rougeL": 0.40925237690133615, | |
| "eval_rougeLsum": 0.4886856652277769, | |
| "eval_runtime": 68.7624, | |
| "eval_samples_per_second": 0.785, | |
| "eval_steps_per_second": 0.204, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 2.9756619930267334, | |
| "learning_rate": 9.762571955088627e-05, | |
| "loss": 1.2906, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "eval_bertscore": 0.6933568120002747, | |
| "eval_genLen": 97.16666666666667, | |
| "eval_loss": 1.1062666177749634, | |
| "eval_rouge1": 0.5053956542564317, | |
| "eval_rouge2": 0.28624901599606994, | |
| "eval_rougeL": 0.37970285907081175, | |
| "eval_rougeLsum": 0.4607084512601043, | |
| "eval_runtime": 78.42, | |
| "eval_samples_per_second": 0.689, | |
| "eval_steps_per_second": 0.179, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 2.9864869117736816, | |
| "learning_rate": 9.703203070083782e-05, | |
| "loss": 1.2374, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "eval_bertscore": 0.6971917748451233, | |
| "eval_genLen": 86.77777777777777, | |
| "eval_loss": 1.1013126373291016, | |
| "eval_rouge1": 0.5188516454195439, | |
| "eval_rouge2": 0.2934087447149625, | |
| "eval_rougeL": 0.3991622356187133, | |
| "eval_rougeLsum": 0.4742825888884543, | |
| "eval_runtime": 72.3945, | |
| "eval_samples_per_second": 0.746, | |
| "eval_steps_per_second": 0.193, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "grad_norm": 4.1992363929748535, | |
| "learning_rate": 9.643834185078938e-05, | |
| "loss": 1.2413, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "eval_bertscore": 0.7139999866485596, | |
| "eval_genLen": 92.64814814814815, | |
| "eval_loss": 1.0914992094039917, | |
| "eval_rouge1": 0.5466003367313871, | |
| "eval_rouge2": 0.31922844421283547, | |
| "eval_rougeL": 0.4169582184321336, | |
| "eval_rougeLsum": 0.498443793296382, | |
| "eval_runtime": 72.3925, | |
| "eval_samples_per_second": 0.746, | |
| "eval_steps_per_second": 0.193, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 3.1927878856658936, | |
| "learning_rate": 9.584465300074092e-05, | |
| "loss": 1.2493, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_bertscore": 0.7134243845939636, | |
| "eval_genLen": 107.05555555555556, | |
| "eval_loss": 1.0917757749557495, | |
| "eval_rouge1": 0.5403323301826519, | |
| "eval_rouge2": 0.3239136090541552, | |
| "eval_rougeL": 0.41631360091859104, | |
| "eval_rougeLsum": 0.49708733766871216, | |
| "eval_runtime": 112.5216, | |
| "eval_samples_per_second": 0.48, | |
| "eval_steps_per_second": 0.124, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 1.4977918863296509, | |
| "learning_rate": 9.525096415069248e-05, | |
| "loss": 1.2496, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "eval_bertscore": 0.7065363526344299, | |
| "eval_genLen": 90.01851851851852, | |
| "eval_loss": 1.086106538772583, | |
| "eval_rouge1": 0.5282150269891435, | |
| "eval_rouge2": 0.3110563425680395, | |
| "eval_rougeL": 0.41248087650028814, | |
| "eval_rougeLsum": 0.4829780978935056, | |
| "eval_runtime": 72.7148, | |
| "eval_samples_per_second": 0.743, | |
| "eval_steps_per_second": 0.193, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 2.4515347480773926, | |
| "learning_rate": 9.465727530064403e-05, | |
| "loss": 1.2659, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_bertscore": 0.7218929529190063, | |
| "eval_genLen": 94.70370370370371, | |
| "eval_loss": 1.1032713651657104, | |
| "eval_rouge1": 0.5623524814225724, | |
| "eval_rouge2": 0.3452227568645328, | |
| "eval_rougeL": 0.44544692856518386, | |
| "eval_rougeLsum": 0.5178650508126328, | |
| "eval_runtime": 74.6907, | |
| "eval_samples_per_second": 0.723, | |
| "eval_steps_per_second": 0.187, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 2.8818466663360596, | |
| "learning_rate": 9.40635864505956e-05, | |
| "loss": 1.2304, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "eval_bertscore": 0.7083500623703003, | |
| "eval_genLen": 90.20370370370371, | |
| "eval_loss": 1.0610899925231934, | |
| "eval_rouge1": 0.5240863402691536, | |
| "eval_rouge2": 0.3121053723996501, | |
| "eval_rougeL": 0.41266003903413684, | |
| "eval_rougeLsum": 0.48580101489974525, | |
| "eval_runtime": 71.9067, | |
| "eval_samples_per_second": 0.751, | |
| "eval_steps_per_second": 0.195, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 2.978266477584839, | |
| "learning_rate": 9.346989760054715e-05, | |
| "loss": 1.2222, | |
| "step": 13750 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_bertscore": 0.7070857882499695, | |
| "eval_genLen": 93.9074074074074, | |
| "eval_loss": 1.0647282600402832, | |
| "eval_rouge1": 0.5252929761765078, | |
| "eval_rouge2": 0.3070788446707112, | |
| "eval_rougeL": 0.40577497004774793, | |
| "eval_rougeLsum": 0.4817171660087853, | |
| "eval_runtime": 74.0259, | |
| "eval_samples_per_second": 0.729, | |
| "eval_steps_per_second": 0.189, | |
| "step": 13750 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 3.7086610794067383, | |
| "learning_rate": 9.287620875049871e-05, | |
| "loss": 1.1829, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "eval_bertscore": 0.7091460824012756, | |
| "eval_genLen": 89.61111111111111, | |
| "eval_loss": 1.0691784620285034, | |
| "eval_rouge1": 0.5315680119437703, | |
| "eval_rouge2": 0.3113181358860274, | |
| "eval_rougeL": 0.4111183678664766, | |
| "eval_rougeLsum": 0.4913397034516531, | |
| "eval_runtime": 70.6818, | |
| "eval_samples_per_second": 0.764, | |
| "eval_steps_per_second": 0.198, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 2.5603418350219727, | |
| "learning_rate": 9.228251990045025e-05, | |
| "loss": 1.2126, | |
| "step": 16250 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "eval_bertscore": 0.7132083773612976, | |
| "eval_genLen": 87.33333333333333, | |
| "eval_loss": 1.0543395280838013, | |
| "eval_rouge1": 0.5440156640908351, | |
| "eval_rouge2": 0.32363148687662013, | |
| "eval_rougeL": 0.4293179442078574, | |
| "eval_rougeLsum": 0.4991435323865428, | |
| "eval_runtime": 67.7885, | |
| "eval_samples_per_second": 0.797, | |
| "eval_steps_per_second": 0.207, | |
| "step": 16250 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 2.4381914138793945, | |
| "learning_rate": 9.168883105040181e-05, | |
| "loss": 1.2411, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "eval_bertscore": 0.7285012602806091, | |
| "eval_genLen": 90.31481481481481, | |
| "eval_loss": 1.0491299629211426, | |
| "eval_rouge1": 0.5765411555528399, | |
| "eval_rouge2": 0.3629828097288318, | |
| "eval_rougeL": 0.45430697726862734, | |
| "eval_rougeLsum": 0.531885901521512, | |
| "eval_runtime": 71.4771, | |
| "eval_samples_per_second": 0.755, | |
| "eval_steps_per_second": 0.196, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "grad_norm": 3.7035632133483887, | |
| "learning_rate": 9.109514220035336e-05, | |
| "loss": 1.2083, | |
| "step": 18750 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "eval_bertscore": 0.7063838839530945, | |
| "eval_genLen": 83.29629629629629, | |
| "eval_loss": 1.063607096672058, | |
| "eval_rouge1": 0.5297099978440998, | |
| "eval_rouge2": 0.30795176196250096, | |
| "eval_rougeL": 0.41380056155261313, | |
| "eval_rougeLsum": 0.48693110888669633, | |
| "eval_runtime": 65.6916, | |
| "eval_samples_per_second": 0.822, | |
| "eval_steps_per_second": 0.213, | |
| "step": 18750 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 1.8611701726913452, | |
| "learning_rate": 9.050145335030492e-05, | |
| "loss": 1.1949, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "eval_bertscore": 0.7123867869377136, | |
| "eval_genLen": 81.72222222222223, | |
| "eval_loss": 1.0569322109222412, | |
| "eval_rouge1": 0.5364389527230491, | |
| "eval_rouge2": 0.32713509260895823, | |
| "eval_rougeL": 0.4193597171317168, | |
| "eval_rougeLsum": 0.4937233394672767, | |
| "eval_runtime": 65.0848, | |
| "eval_samples_per_second": 0.83, | |
| "eval_steps_per_second": 0.215, | |
| "step": 20000 | |
| } | |
| ], | |
| "logging_steps": 1250, | |
| "max_steps": 210549, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 5000, | |
| "total_flos": 5.102853475575398e+16, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |