{ "best_metric": null, "best_model_checkpoint": null, "epoch": 7.0, "eval_steps": 500, "global_step": 2639, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_gen_len": 117.37, "eval_loss": 1.6928337812423706, "eval_rouge1": 0.42757910674263067, "eval_rouge2": 0.15720196581442164, "eval_rougeL": 0.256418241790792, "eval_rougeLsum": 0.35382355041095964, "eval_runtime": 1351.6766, "eval_samples_per_second": 0.74, "eval_steps_per_second": 0.047, "step": 377 }, { "epoch": 1.3262599469496021, "grad_norm": 3.7333056926727295, "learning_rate": 2.8741879956059633e-06, "loss": 1.7717, "step": 500 }, { "epoch": 2.0, "eval_gen_len": 116.608, "eval_loss": 1.6410094499588013, "eval_rouge1": 0.4286897823447751, "eval_rouge2": 0.15765392626100322, "eval_rougeL": 0.25895063496135456, "eval_rougeLsum": 0.3538857213392972, "eval_runtime": 1170.2376, "eval_samples_per_second": 0.855, "eval_steps_per_second": 0.054, "step": 754 }, { "epoch": 2.6525198938992043, "grad_norm": 4.453255653381348, "learning_rate": 2.514178628889199e-06, "loss": 1.5895, "step": 1000 }, { "epoch": 3.0, "eval_gen_len": 117.178, "eval_loss": 1.6089894771575928, "eval_rouge1": 0.42709156909402823, "eval_rouge2": 0.1553778143480612, "eval_rougeL": 0.25488136514137727, "eval_rougeLsum": 0.3525718986787997, "eval_runtime": 1384.4242, "eval_samples_per_second": 0.722, "eval_steps_per_second": 0.046, "step": 1131 }, { "epoch": 3.9787798408488064, "grad_norm": 3.2527501583099365, "learning_rate": 1.981320706041849e-06, "loss": 1.5182, "step": 1500 }, { "epoch": 4.0, "eval_gen_len": 118.94, "eval_loss": 1.597896933555603, "eval_rouge1": 0.4321139550542189, "eval_rouge2": 0.16019449900934246, "eval_rougeL": 0.2577516929717559, "eval_rougeLsum": 0.3566245729299692, "eval_runtime": 1616.3945, "eval_samples_per_second": 0.619, "eval_steps_per_second": 0.039, "step": 1508 }, { "epoch": 5.0, "eval_gen_len": 116.603, "eval_loss": 1.583080530166626, "eval_rouge1": 0.43043600066735005, "eval_rouge2": 0.16036196231543565, "eval_rougeL": 0.2578722844279166, "eval_rougeLsum": 0.35735455171087505, "eval_runtime": 1754.8754, "eval_samples_per_second": 0.57, "eval_steps_per_second": 0.036, "step": 1885 }, { "epoch": 5.305039787798409, "grad_norm": 3.344116687774658, "learning_rate": 1.3651853437341204e-06, "loss": 1.4547, "step": 2000 }, { "epoch": 6.0, "eval_gen_len": 116.67, "eval_loss": 1.5650923252105713, "eval_rouge1": 0.4348875537307479, "eval_rouge2": 0.16312989568941516, "eval_rougeL": 0.26082852541259915, "eval_rougeLsum": 0.358369227586099, "eval_runtime": 1733.5816, "eval_samples_per_second": 0.577, "eval_steps_per_second": 0.036, "step": 2262 }, { "epoch": 6.63129973474801, "grad_norm": 3.3940606117248535, "learning_rate": 7.721175674180988e-07, "loss": 1.4188, "step": 2500 }, { "epoch": 7.0, "eval_gen_len": 117.259, "eval_loss": 1.5626862049102783, "eval_rouge1": 0.43666460369707594, "eval_rouge2": 0.16469845252758708, "eval_rougeL": 0.26181956536662193, "eval_rougeLsum": 0.3620078400661802, "eval_runtime": 1745.3989, "eval_samples_per_second": 0.573, "eval_steps_per_second": 0.036, "step": 2639 } ], "logging_steps": 500, "max_steps": 3770, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.0123914662589235e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }