| { | |
| "best_metric": 0.9563, | |
| "best_model_checkpoint": ".//t5-base-medium-title-generation/checkpoint-26/checkpoint-4200", | |
| "epoch": 0.4445385266723116, | |
| "eval_steps": 100, | |
| "global_step": 4200, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 3.9788314987298904e-05, | |
| "loss": 3.5864, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_gen_len": 0.0, | |
| "eval_loss": 0.3862649202346802, | |
| "eval_rouge1": 0.0, | |
| "eval_rouge2": 0.0, | |
| "eval_rougeL": 0.0, | |
| "eval_rougeLsum": 0.0, | |
| "eval_runtime": 4322.4201, | |
| "eval_samples_per_second": 0.694, | |
| "eval_steps_per_second": 0.087, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 3.95766299745978e-05, | |
| "loss": 0.3908, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_gen_len": 27.545, | |
| "eval_loss": 0.15635716915130615, | |
| "eval_rouge1": 0.8943, | |
| "eval_rouge2": 0.8113, | |
| "eval_rougeL": 0.8832, | |
| "eval_rougeLsum": 0.8831, | |
| "eval_runtime": 874.5949, | |
| "eval_samples_per_second": 3.43, | |
| "eval_steps_per_second": 0.429, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 3.93649449618967e-05, | |
| "loss": 0.2331, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_gen_len": 27.9503, | |
| "eval_loss": 0.12861451506614685, | |
| "eval_rouge1": 0.9143, | |
| "eval_rouge2": 0.8356, | |
| "eval_rougeL": 0.9013, | |
| "eval_rougeLsum": 0.9013, | |
| "eval_runtime": 935.387, | |
| "eval_samples_per_second": 3.207, | |
| "eval_steps_per_second": 0.401, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 3.91532599491956e-05, | |
| "loss": 0.2134, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_gen_len": 27.837, | |
| "eval_loss": 0.113029845058918, | |
| "eval_rouge1": 0.9219, | |
| "eval_rouge2": 0.8468, | |
| "eval_rougeL": 0.909, | |
| "eval_rougeLsum": 0.909, | |
| "eval_runtime": 940.1053, | |
| "eval_samples_per_second": 3.191, | |
| "eval_steps_per_second": 0.399, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 3.89415749364945e-05, | |
| "loss": 0.1904, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_gen_len": 27.55, | |
| "eval_loss": 0.10243146866559982, | |
| "eval_rouge1": 0.9225, | |
| "eval_rouge2": 0.8485, | |
| "eval_rougeL": 0.91, | |
| "eval_rougeLsum": 0.9099, | |
| "eval_runtime": 932.1314, | |
| "eval_samples_per_second": 3.218, | |
| "eval_steps_per_second": 0.402, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 3.87298899237934e-05, | |
| "loss": 0.1284, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_gen_len": 27.7693, | |
| "eval_loss": 0.09492403268814087, | |
| "eval_rouge1": 0.9265, | |
| "eval_rouge2": 0.8568, | |
| "eval_rougeL": 0.9153, | |
| "eval_rougeLsum": 0.9153, | |
| "eval_runtime": 943.4884, | |
| "eval_samples_per_second": 3.18, | |
| "eval_steps_per_second": 0.397, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 3.85182049110923e-05, | |
| "loss": 0.1306, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "eval_gen_len": 27.888, | |
| "eval_loss": 0.08669973909854889, | |
| "eval_rouge1": 0.9287, | |
| "eval_rouge2": 0.861, | |
| "eval_rougeL": 0.918, | |
| "eval_rougeLsum": 0.9179, | |
| "eval_runtime": 966.9202, | |
| "eval_samples_per_second": 3.103, | |
| "eval_steps_per_second": 0.388, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 3.83065198983912e-05, | |
| "loss": 0.186, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_gen_len": 27.8787, | |
| "eval_loss": 0.0818461924791336, | |
| "eval_rouge1": 0.9291, | |
| "eval_rouge2": 0.8629, | |
| "eval_rougeL": 0.9185, | |
| "eval_rougeLsum": 0.9184, | |
| "eval_runtime": 1023.9611, | |
| "eval_samples_per_second": 2.93, | |
| "eval_steps_per_second": 0.366, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 3.80948348856901e-05, | |
| "loss": 0.1131, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_gen_len": 28.089, | |
| "eval_loss": 0.0774107277393341, | |
| "eval_rouge1": 0.9314, | |
| "eval_rouge2": 0.868, | |
| "eval_rougeL": 0.9212, | |
| "eval_rougeLsum": 0.9212, | |
| "eval_runtime": 1130.8505, | |
| "eval_samples_per_second": 2.653, | |
| "eval_steps_per_second": 0.332, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 3.7883149872988995e-05, | |
| "loss": 0.1014, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "eval_gen_len": 28.0063, | |
| "eval_loss": 0.0748332068324089, | |
| "eval_rouge1": 0.9329, | |
| "eval_rouge2": 0.8706, | |
| "eval_rougeL": 0.9233, | |
| "eval_rougeLsum": 0.9234, | |
| "eval_runtime": 1138.4406, | |
| "eval_samples_per_second": 2.635, | |
| "eval_steps_per_second": 0.329, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 3.7671464860287896e-05, | |
| "loss": 0.1921, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_gen_len": 28.159, | |
| "eval_loss": 0.07171747088432312, | |
| "eval_rouge1": 0.9329, | |
| "eval_rouge2": 0.871, | |
| "eval_rougeL": 0.9231, | |
| "eval_rougeLsum": 0.9232, | |
| "eval_runtime": 1151.7707, | |
| "eval_samples_per_second": 2.605, | |
| "eval_steps_per_second": 0.326, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 3.745977984758679e-05, | |
| "loss": 0.1029, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "eval_gen_len": 28.3397, | |
| "eval_loss": 0.06978185474872589, | |
| "eval_rouge1": 0.9341, | |
| "eval_rouge2": 0.8739, | |
| "eval_rougeL": 0.9249, | |
| "eval_rougeLsum": 0.9249, | |
| "eval_runtime": 1159.7932, | |
| "eval_samples_per_second": 2.587, | |
| "eval_steps_per_second": 0.323, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 3.724809483488569e-05, | |
| "loss": 0.0969, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "eval_gen_len": 28.2103, | |
| "eval_loss": 0.06691702455282211, | |
| "eval_rouge1": 0.9342, | |
| "eval_rouge2": 0.8759, | |
| "eval_rougeL": 0.9254, | |
| "eval_rougeLsum": 0.9253, | |
| "eval_runtime": 1162.6586, | |
| "eval_samples_per_second": 2.58, | |
| "eval_steps_per_second": 0.323, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 3.703640982218459e-05, | |
| "loss": 0.085, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "eval_gen_len": 28.33, | |
| "eval_loss": 0.06451628357172012, | |
| "eval_rouge1": 0.9361, | |
| "eval_rouge2": 0.8786, | |
| "eval_rougeL": 0.9266, | |
| "eval_rougeLsum": 0.9267, | |
| "eval_runtime": 1171.8855, | |
| "eval_samples_per_second": 2.56, | |
| "eval_steps_per_second": 0.32, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 3.6824724809483495e-05, | |
| "loss": 0.1119, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_gen_len": 28.3293, | |
| "eval_loss": 0.06343190371990204, | |
| "eval_rouge1": 0.9369, | |
| "eval_rouge2": 0.8805, | |
| "eval_rougeL": 0.928, | |
| "eval_rougeLsum": 0.9281, | |
| "eval_runtime": 1156.9521, | |
| "eval_samples_per_second": 2.593, | |
| "eval_steps_per_second": 0.324, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 3.661303979678239e-05, | |
| "loss": 0.0942, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "eval_gen_len": 28.678, | |
| "eval_loss": 0.06122381612658501, | |
| "eval_rouge1": 0.9364, | |
| "eval_rouge2": 0.8806, | |
| "eval_rougeL": 0.9276, | |
| "eval_rougeLsum": 0.9276, | |
| "eval_runtime": 1176.3988, | |
| "eval_samples_per_second": 2.55, | |
| "eval_steps_per_second": 0.319, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 3.640135478408129e-05, | |
| "loss": 0.1276, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "eval_gen_len": 28.7967, | |
| "eval_loss": 0.05943905562162399, | |
| "eval_rouge1": 0.9384, | |
| "eval_rouge2": 0.8842, | |
| "eval_rougeL": 0.9292, | |
| "eval_rougeLsum": 0.9292, | |
| "eval_runtime": 1185.1481, | |
| "eval_samples_per_second": 2.531, | |
| "eval_steps_per_second": 0.316, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 3.618966977138019e-05, | |
| "loss": 0.0933, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_gen_len": 28.5933, | |
| "eval_loss": 0.058085959404706955, | |
| "eval_rouge1": 0.9371, | |
| "eval_rouge2": 0.8832, | |
| "eval_rougeL": 0.9286, | |
| "eval_rougeLsum": 0.9286, | |
| "eval_runtime": 1170.1927, | |
| "eval_samples_per_second": 2.564, | |
| "eval_steps_per_second": 0.32, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 3.597798475867909e-05, | |
| "loss": 0.0871, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_gen_len": 28.6173, | |
| "eval_loss": 0.05704612657427788, | |
| "eval_rouge1": 0.9399, | |
| "eval_rouge2": 0.8878, | |
| "eval_rougeL": 0.9316, | |
| "eval_rougeLsum": 0.9316, | |
| "eval_runtime": 1168.2768, | |
| "eval_samples_per_second": 2.568, | |
| "eval_steps_per_second": 0.321, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 3.576629974597799e-05, | |
| "loss": 0.0877, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "eval_gen_len": 28.888, | |
| "eval_loss": 0.05608168616890907, | |
| "eval_rouge1": 0.9402, | |
| "eval_rouge2": 0.8879, | |
| "eval_rougeL": 0.9318, | |
| "eval_rougeLsum": 0.9317, | |
| "eval_runtime": 1184.9776, | |
| "eval_samples_per_second": 2.532, | |
| "eval_steps_per_second": 0.316, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 3.555461473327689e-05, | |
| "loss": 0.1032, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "eval_gen_len": 28.7567, | |
| "eval_loss": 0.054695576429367065, | |
| "eval_rouge1": 0.9427, | |
| "eval_rouge2": 0.8934, | |
| "eval_rougeL": 0.935, | |
| "eval_rougeLsum": 0.9351, | |
| "eval_runtime": 1149.8597, | |
| "eval_samples_per_second": 2.609, | |
| "eval_steps_per_second": 0.326, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 3.534292972057578e-05, | |
| "loss": 0.0769, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "eval_gen_len": 29.4927, | |
| "eval_loss": 0.05339239910244942, | |
| "eval_rouge1": 0.9454, | |
| "eval_rouge2": 0.8973, | |
| "eval_rougeL": 0.9377, | |
| "eval_rougeLsum": 0.9376, | |
| "eval_runtime": 1232.2333, | |
| "eval_samples_per_second": 2.435, | |
| "eval_steps_per_second": 0.304, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 3.5131244707874684e-05, | |
| "loss": 0.0772, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "eval_gen_len": 29.5703, | |
| "eval_loss": 0.05187981203198433, | |
| "eval_rouge1": 0.9442, | |
| "eval_rouge2": 0.8973, | |
| "eval_rougeL": 0.937, | |
| "eval_rougeLsum": 0.937, | |
| "eval_runtime": 1204.0588, | |
| "eval_samples_per_second": 2.492, | |
| "eval_steps_per_second": 0.311, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 3.4919559695173585e-05, | |
| "loss": 0.0687, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "eval_gen_len": 29.593, | |
| "eval_loss": 0.05095092952251434, | |
| "eval_rouge1": 0.9454, | |
| "eval_rouge2": 0.8989, | |
| "eval_rougeL": 0.9381, | |
| "eval_rougeLsum": 0.938, | |
| "eval_runtime": 1322.9946, | |
| "eval_samples_per_second": 2.268, | |
| "eval_steps_per_second": 0.283, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 3.4707874682472486e-05, | |
| "loss": 0.0903, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "eval_gen_len": 29.057, | |
| "eval_loss": 0.05085957422852516, | |
| "eval_rouge1": 0.9462, | |
| "eval_rouge2": 0.8999, | |
| "eval_rougeL": 0.9389, | |
| "eval_rougeLsum": 0.9387, | |
| "eval_runtime": 1349.0205, | |
| "eval_samples_per_second": 2.224, | |
| "eval_steps_per_second": 0.278, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 3.449618966977138e-05, | |
| "loss": 0.0938, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "eval_gen_len": 29.21, | |
| "eval_loss": 0.04919895529747009, | |
| "eval_rouge1": 0.9469, | |
| "eval_rouge2": 0.9017, | |
| "eval_rougeL": 0.9398, | |
| "eval_rougeLsum": 0.9397, | |
| "eval_runtime": 1312.3378, | |
| "eval_samples_per_second": 2.286, | |
| "eval_steps_per_second": 0.286, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.428450465707028e-05, | |
| "loss": 0.0681, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "eval_gen_len": 29.3417, | |
| "eval_loss": 0.0489104762673378, | |
| "eval_rouge1": 0.9469, | |
| "eval_rouge2": 0.9023, | |
| "eval_rougeL": 0.9393, | |
| "eval_rougeLsum": 0.9392, | |
| "eval_runtime": 1314.1986, | |
| "eval_samples_per_second": 2.283, | |
| "eval_steps_per_second": 0.285, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.4072819644369177e-05, | |
| "loss": 0.0802, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "eval_gen_len": 29.0317, | |
| "eval_loss": 0.04862497001886368, | |
| "eval_rouge1": 0.9499, | |
| "eval_rouge2": 0.9058, | |
| "eval_rougeL": 0.9427, | |
| "eval_rougeLsum": 0.9427, | |
| "eval_runtime": 1272.9233, | |
| "eval_samples_per_second": 2.357, | |
| "eval_steps_per_second": 0.295, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.3861134631668085e-05, | |
| "loss": 0.0682, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "eval_gen_len": 29.1793, | |
| "eval_loss": 0.04769635945558548, | |
| "eval_rouge1": 0.9487, | |
| "eval_rouge2": 0.9048, | |
| "eval_rougeL": 0.9412, | |
| "eval_rougeLsum": 0.9411, | |
| "eval_runtime": 1278.3871, | |
| "eval_samples_per_second": 2.347, | |
| "eval_steps_per_second": 0.293, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.364944961896698e-05, | |
| "loss": 0.0708, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "eval_gen_len": 29.5263, | |
| "eval_loss": 0.047115448862314224, | |
| "eval_rouge1": 0.9507, | |
| "eval_rouge2": 0.9085, | |
| "eval_rougeL": 0.9431, | |
| "eval_rougeLsum": 0.9431, | |
| "eval_runtime": 1291.228, | |
| "eval_samples_per_second": 2.323, | |
| "eval_steps_per_second": 0.29, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.343776460626588e-05, | |
| "loss": 0.0644, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "eval_gen_len": 28.9983, | |
| "eval_loss": 0.0471009723842144, | |
| "eval_rouge1": 0.949, | |
| "eval_rouge2": 0.9066, | |
| "eval_rougeL": 0.9419, | |
| "eval_rougeLsum": 0.9419, | |
| "eval_runtime": 1231.9334, | |
| "eval_samples_per_second": 2.435, | |
| "eval_steps_per_second": 0.304, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.3226079593564775e-05, | |
| "loss": 0.0728, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "eval_gen_len": 29.0363, | |
| "eval_loss": 0.045959603041410446, | |
| "eval_rouge1": 0.9501, | |
| "eval_rouge2": 0.9082, | |
| "eval_rougeL": 0.9434, | |
| "eval_rougeLsum": 0.9433, | |
| "eval_runtime": 1207.0552, | |
| "eval_samples_per_second": 2.485, | |
| "eval_steps_per_second": 0.311, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.3014394580863676e-05, | |
| "loss": 0.0853, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "eval_gen_len": 29.644, | |
| "eval_loss": 0.04529280588030815, | |
| "eval_rouge1": 0.9525, | |
| "eval_rouge2": 0.9102, | |
| "eval_rougeL": 0.9441, | |
| "eval_rougeLsum": 0.9441, | |
| "eval_runtime": 1254.7579, | |
| "eval_samples_per_second": 2.391, | |
| "eval_steps_per_second": 0.299, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.280270956816258e-05, | |
| "loss": 0.0873, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "eval_gen_len": 29.32, | |
| "eval_loss": 0.0445793978869915, | |
| "eval_rouge1": 0.9533, | |
| "eval_rouge2": 0.9119, | |
| "eval_rougeL": 0.9456, | |
| "eval_rougeLsum": 0.9455, | |
| "eval_runtime": 1224.3613, | |
| "eval_samples_per_second": 2.45, | |
| "eval_steps_per_second": 0.306, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.259102455546148e-05, | |
| "loss": 0.086, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "eval_gen_len": 29.097, | |
| "eval_loss": 0.04455171898007393, | |
| "eval_rouge1": 0.9522, | |
| "eval_rouge2": 0.9112, | |
| "eval_rougeL": 0.9449, | |
| "eval_rougeLsum": 0.9448, | |
| "eval_runtime": 1171.6567, | |
| "eval_samples_per_second": 2.56, | |
| "eval_steps_per_second": 0.32, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.237933954276037e-05, | |
| "loss": 0.0753, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "eval_gen_len": 29.339, | |
| "eval_loss": 0.04441582411527634, | |
| "eval_rouge1": 0.9548, | |
| "eval_rouge2": 0.9144, | |
| "eval_rougeL": 0.9469, | |
| "eval_rougeLsum": 0.9469, | |
| "eval_runtime": 1151.3782, | |
| "eval_samples_per_second": 2.606, | |
| "eval_steps_per_second": 0.326, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.2167654530059274e-05, | |
| "loss": 0.063, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "eval_gen_len": 29.336, | |
| "eval_loss": 0.043806854635477066, | |
| "eval_rouge1": 0.9545, | |
| "eval_rouge2": 0.9137, | |
| "eval_rougeL": 0.9467, | |
| "eval_rougeLsum": 0.9466, | |
| "eval_runtime": 1180.9941, | |
| "eval_samples_per_second": 2.54, | |
| "eval_steps_per_second": 0.318, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.1955969517358175e-05, | |
| "loss": 0.071, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_gen_len": 29.4487, | |
| "eval_loss": 0.042950402945280075, | |
| "eval_rouge1": 0.9554, | |
| "eval_rouge2": 0.9147, | |
| "eval_rougeL": 0.9476, | |
| "eval_rougeLsum": 0.9475, | |
| "eval_runtime": 1170.8217, | |
| "eval_samples_per_second": 2.562, | |
| "eval_steps_per_second": 0.32, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 3.1744284504657076e-05, | |
| "loss": 0.0872, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "eval_gen_len": 29.1883, | |
| "eval_loss": 0.0422741137444973, | |
| "eval_rouge1": 0.955, | |
| "eval_rouge2": 0.9156, | |
| "eval_rougeL": 0.9475, | |
| "eval_rougeLsum": 0.9474, | |
| "eval_runtime": 1137.7076, | |
| "eval_samples_per_second": 2.637, | |
| "eval_steps_per_second": 0.33, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 3.153259949195597e-05, | |
| "loss": 0.061, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "eval_gen_len": 29.233, | |
| "eval_loss": 0.04252360388636589, | |
| "eval_rouge1": 0.9542, | |
| "eval_rouge2": 0.9138, | |
| "eval_rougeL": 0.9467, | |
| "eval_rougeLsum": 0.9466, | |
| "eval_runtime": 1123.062, | |
| "eval_samples_per_second": 2.671, | |
| "eval_steps_per_second": 0.334, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 3.132091447925487e-05, | |
| "loss": 0.0586, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "eval_gen_len": 29.2253, | |
| "eval_loss": 0.042439624667167664, | |
| "eval_rouge1": 0.9545, | |
| "eval_rouge2": 0.9142, | |
| "eval_rougeL": 0.9467, | |
| "eval_rougeLsum": 0.9466, | |
| "eval_runtime": 1102.1056, | |
| "eval_samples_per_second": 2.722, | |
| "eval_steps_per_second": 0.34, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 3.1109229466553767e-05, | |
| "loss": 0.0753, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "eval_gen_len": 29.6723, | |
| "eval_loss": 0.04101056233048439, | |
| "eval_rouge1": 0.9563, | |
| "eval_rouge2": 0.9169, | |
| "eval_rougeL": 0.9485, | |
| "eval_rougeLsum": 0.9483, | |
| "eval_runtime": 1138.1006, | |
| "eval_samples_per_second": 2.636, | |
| "eval_steps_per_second": 0.329, | |
| "step": 4200 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 18896, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 200, | |
| "total_flos": 1600799565938688.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |