{ "best_metric": 0.9563, "best_model_checkpoint": ".//t5-base-medium-title-generation/checkpoint-26/checkpoint-4200", "epoch": 0.4445385266723116, "eval_steps": 100, "global_step": 4200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 3.9788314987298904e-05, "loss": 3.5864, "step": 100 }, { "epoch": 0.01, "eval_gen_len": 0.0, "eval_loss": 0.3862649202346802, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 4322.4201, "eval_samples_per_second": 0.694, "eval_steps_per_second": 0.087, "step": 100 }, { "epoch": 0.02, "learning_rate": 3.95766299745978e-05, "loss": 0.3908, "step": 200 }, { "epoch": 0.02, "eval_gen_len": 27.545, "eval_loss": 0.15635716915130615, "eval_rouge1": 0.8943, "eval_rouge2": 0.8113, "eval_rougeL": 0.8832, "eval_rougeLsum": 0.8831, "eval_runtime": 874.5949, "eval_samples_per_second": 3.43, "eval_steps_per_second": 0.429, "step": 200 }, { "epoch": 0.03, "learning_rate": 3.93649449618967e-05, "loss": 0.2331, "step": 300 }, { "epoch": 0.03, "eval_gen_len": 27.9503, "eval_loss": 0.12861451506614685, "eval_rouge1": 0.9143, "eval_rouge2": 0.8356, "eval_rougeL": 0.9013, "eval_rougeLsum": 0.9013, "eval_runtime": 935.387, "eval_samples_per_second": 3.207, "eval_steps_per_second": 0.401, "step": 300 }, { "epoch": 0.04, "learning_rate": 3.91532599491956e-05, "loss": 0.2134, "step": 400 }, { "epoch": 0.04, "eval_gen_len": 27.837, "eval_loss": 0.113029845058918, "eval_rouge1": 0.9219, "eval_rouge2": 0.8468, "eval_rougeL": 0.909, "eval_rougeLsum": 0.909, "eval_runtime": 940.1053, "eval_samples_per_second": 3.191, "eval_steps_per_second": 0.399, "step": 400 }, { "epoch": 0.05, "learning_rate": 3.89415749364945e-05, "loss": 0.1904, "step": 500 }, { "epoch": 0.05, "eval_gen_len": 27.55, "eval_loss": 0.10243146866559982, "eval_rouge1": 0.9225, "eval_rouge2": 0.8485, "eval_rougeL": 0.91, "eval_rougeLsum": 0.9099, "eval_runtime": 932.1314, "eval_samples_per_second": 3.218, "eval_steps_per_second": 0.402, "step": 500 }, { "epoch": 0.06, "learning_rate": 3.87298899237934e-05, "loss": 0.1284, "step": 600 }, { "epoch": 0.06, "eval_gen_len": 27.7693, "eval_loss": 0.09492403268814087, "eval_rouge1": 0.9265, "eval_rouge2": 0.8568, "eval_rougeL": 0.9153, "eval_rougeLsum": 0.9153, "eval_runtime": 943.4884, "eval_samples_per_second": 3.18, "eval_steps_per_second": 0.397, "step": 600 }, { "epoch": 0.07, "learning_rate": 3.85182049110923e-05, "loss": 0.1306, "step": 700 }, { "epoch": 0.07, "eval_gen_len": 27.888, "eval_loss": 0.08669973909854889, "eval_rouge1": 0.9287, "eval_rouge2": 0.861, "eval_rougeL": 0.918, "eval_rougeLsum": 0.9179, "eval_runtime": 966.9202, "eval_samples_per_second": 3.103, "eval_steps_per_second": 0.388, "step": 700 }, { "epoch": 0.08, "learning_rate": 3.83065198983912e-05, "loss": 0.186, "step": 800 }, { "epoch": 0.08, "eval_gen_len": 27.8787, "eval_loss": 0.0818461924791336, "eval_rouge1": 0.9291, "eval_rouge2": 0.8629, "eval_rougeL": 0.9185, "eval_rougeLsum": 0.9184, "eval_runtime": 1023.9611, "eval_samples_per_second": 2.93, "eval_steps_per_second": 0.366, "step": 800 }, { "epoch": 0.1, "learning_rate": 3.80948348856901e-05, "loss": 0.1131, "step": 900 }, { "epoch": 0.1, "eval_gen_len": 28.089, "eval_loss": 0.0774107277393341, "eval_rouge1": 0.9314, "eval_rouge2": 0.868, "eval_rougeL": 0.9212, "eval_rougeLsum": 0.9212, "eval_runtime": 1130.8505, "eval_samples_per_second": 2.653, "eval_steps_per_second": 0.332, "step": 900 }, { "epoch": 0.11, "learning_rate": 3.7883149872988995e-05, "loss": 0.1014, "step": 1000 }, { "epoch": 0.11, "eval_gen_len": 28.0063, "eval_loss": 0.0748332068324089, "eval_rouge1": 0.9329, "eval_rouge2": 0.8706, "eval_rougeL": 0.9233, "eval_rougeLsum": 0.9234, "eval_runtime": 1138.4406, "eval_samples_per_second": 2.635, "eval_steps_per_second": 0.329, "step": 1000 }, { "epoch": 0.12, "learning_rate": 3.7671464860287896e-05, "loss": 0.1921, "step": 1100 }, { "epoch": 0.12, "eval_gen_len": 28.159, "eval_loss": 0.07171747088432312, "eval_rouge1": 0.9329, "eval_rouge2": 0.871, "eval_rougeL": 0.9231, "eval_rougeLsum": 0.9232, "eval_runtime": 1151.7707, "eval_samples_per_second": 2.605, "eval_steps_per_second": 0.326, "step": 1100 }, { "epoch": 0.13, "learning_rate": 3.745977984758679e-05, "loss": 0.1029, "step": 1200 }, { "epoch": 0.13, "eval_gen_len": 28.3397, "eval_loss": 0.06978185474872589, "eval_rouge1": 0.9341, "eval_rouge2": 0.8739, "eval_rougeL": 0.9249, "eval_rougeLsum": 0.9249, "eval_runtime": 1159.7932, "eval_samples_per_second": 2.587, "eval_steps_per_second": 0.323, "step": 1200 }, { "epoch": 0.14, "learning_rate": 3.724809483488569e-05, "loss": 0.0969, "step": 1300 }, { "epoch": 0.14, "eval_gen_len": 28.2103, "eval_loss": 0.06691702455282211, "eval_rouge1": 0.9342, "eval_rouge2": 0.8759, "eval_rougeL": 0.9254, "eval_rougeLsum": 0.9253, "eval_runtime": 1162.6586, "eval_samples_per_second": 2.58, "eval_steps_per_second": 0.323, "step": 1300 }, { "epoch": 0.15, "learning_rate": 3.703640982218459e-05, "loss": 0.085, "step": 1400 }, { "epoch": 0.15, "eval_gen_len": 28.33, "eval_loss": 0.06451628357172012, "eval_rouge1": 0.9361, "eval_rouge2": 0.8786, "eval_rougeL": 0.9266, "eval_rougeLsum": 0.9267, "eval_runtime": 1171.8855, "eval_samples_per_second": 2.56, "eval_steps_per_second": 0.32, "step": 1400 }, { "epoch": 0.16, "learning_rate": 3.6824724809483495e-05, "loss": 0.1119, "step": 1500 }, { "epoch": 0.16, "eval_gen_len": 28.3293, "eval_loss": 0.06343190371990204, "eval_rouge1": 0.9369, "eval_rouge2": 0.8805, "eval_rougeL": 0.928, "eval_rougeLsum": 0.9281, "eval_runtime": 1156.9521, "eval_samples_per_second": 2.593, "eval_steps_per_second": 0.324, "step": 1500 }, { "epoch": 0.17, "learning_rate": 3.661303979678239e-05, "loss": 0.0942, "step": 1600 }, { "epoch": 0.17, "eval_gen_len": 28.678, "eval_loss": 0.06122381612658501, "eval_rouge1": 0.9364, "eval_rouge2": 0.8806, "eval_rougeL": 0.9276, "eval_rougeLsum": 0.9276, "eval_runtime": 1176.3988, "eval_samples_per_second": 2.55, "eval_steps_per_second": 0.319, "step": 1600 }, { "epoch": 0.18, "learning_rate": 3.640135478408129e-05, "loss": 0.1276, "step": 1700 }, { "epoch": 0.18, "eval_gen_len": 28.7967, "eval_loss": 0.05943905562162399, "eval_rouge1": 0.9384, "eval_rouge2": 0.8842, "eval_rougeL": 0.9292, "eval_rougeLsum": 0.9292, "eval_runtime": 1185.1481, "eval_samples_per_second": 2.531, "eval_steps_per_second": 0.316, "step": 1700 }, { "epoch": 0.19, "learning_rate": 3.618966977138019e-05, "loss": 0.0933, "step": 1800 }, { "epoch": 0.19, "eval_gen_len": 28.5933, "eval_loss": 0.058085959404706955, "eval_rouge1": 0.9371, "eval_rouge2": 0.8832, "eval_rougeL": 0.9286, "eval_rougeLsum": 0.9286, "eval_runtime": 1170.1927, "eval_samples_per_second": 2.564, "eval_steps_per_second": 0.32, "step": 1800 }, { "epoch": 0.2, "learning_rate": 3.597798475867909e-05, "loss": 0.0871, "step": 1900 }, { "epoch": 0.2, "eval_gen_len": 28.6173, "eval_loss": 0.05704612657427788, "eval_rouge1": 0.9399, "eval_rouge2": 0.8878, "eval_rougeL": 0.9316, "eval_rougeLsum": 0.9316, "eval_runtime": 1168.2768, "eval_samples_per_second": 2.568, "eval_steps_per_second": 0.321, "step": 1900 }, { "epoch": 0.21, "learning_rate": 3.576629974597799e-05, "loss": 0.0877, "step": 2000 }, { "epoch": 0.21, "eval_gen_len": 28.888, "eval_loss": 0.05608168616890907, "eval_rouge1": 0.9402, "eval_rouge2": 0.8879, "eval_rougeL": 0.9318, "eval_rougeLsum": 0.9317, "eval_runtime": 1184.9776, "eval_samples_per_second": 2.532, "eval_steps_per_second": 0.316, "step": 2000 }, { "epoch": 0.22, "learning_rate": 3.555461473327689e-05, "loss": 0.1032, "step": 2100 }, { "epoch": 0.22, "eval_gen_len": 28.7567, "eval_loss": 0.054695576429367065, "eval_rouge1": 0.9427, "eval_rouge2": 0.8934, "eval_rougeL": 0.935, "eval_rougeLsum": 0.9351, "eval_runtime": 1149.8597, "eval_samples_per_second": 2.609, "eval_steps_per_second": 0.326, "step": 2100 }, { "epoch": 0.23, "learning_rate": 3.534292972057578e-05, "loss": 0.0769, "step": 2200 }, { "epoch": 0.23, "eval_gen_len": 29.4927, "eval_loss": 0.05339239910244942, "eval_rouge1": 0.9454, "eval_rouge2": 0.8973, "eval_rougeL": 0.9377, "eval_rougeLsum": 0.9376, "eval_runtime": 1232.2333, "eval_samples_per_second": 2.435, "eval_steps_per_second": 0.304, "step": 2200 }, { "epoch": 0.24, "learning_rate": 3.5131244707874684e-05, "loss": 0.0772, "step": 2300 }, { "epoch": 0.24, "eval_gen_len": 29.5703, "eval_loss": 0.05187981203198433, "eval_rouge1": 0.9442, "eval_rouge2": 0.8973, "eval_rougeL": 0.937, "eval_rougeLsum": 0.937, "eval_runtime": 1204.0588, "eval_samples_per_second": 2.492, "eval_steps_per_second": 0.311, "step": 2300 }, { "epoch": 0.25, "learning_rate": 3.4919559695173585e-05, "loss": 0.0687, "step": 2400 }, { "epoch": 0.25, "eval_gen_len": 29.593, "eval_loss": 0.05095092952251434, "eval_rouge1": 0.9454, "eval_rouge2": 0.8989, "eval_rougeL": 0.9381, "eval_rougeLsum": 0.938, "eval_runtime": 1322.9946, "eval_samples_per_second": 2.268, "eval_steps_per_second": 0.283, "step": 2400 }, { "epoch": 0.26, "learning_rate": 3.4707874682472486e-05, "loss": 0.0903, "step": 2500 }, { "epoch": 0.26, "eval_gen_len": 29.057, "eval_loss": 0.05085957422852516, "eval_rouge1": 0.9462, "eval_rouge2": 0.8999, "eval_rougeL": 0.9389, "eval_rougeLsum": 0.9387, "eval_runtime": 1349.0205, "eval_samples_per_second": 2.224, "eval_steps_per_second": 0.278, "step": 2500 }, { "epoch": 0.28, "learning_rate": 3.449618966977138e-05, "loss": 0.0938, "step": 2600 }, { "epoch": 0.28, "eval_gen_len": 29.21, "eval_loss": 0.04919895529747009, "eval_rouge1": 0.9469, "eval_rouge2": 0.9017, "eval_rougeL": 0.9398, "eval_rougeLsum": 0.9397, "eval_runtime": 1312.3378, "eval_samples_per_second": 2.286, "eval_steps_per_second": 0.286, "step": 2600 }, { "epoch": 0.29, "learning_rate": 3.428450465707028e-05, "loss": 0.0681, "step": 2700 }, { "epoch": 0.29, "eval_gen_len": 29.3417, "eval_loss": 0.0489104762673378, "eval_rouge1": 0.9469, "eval_rouge2": 0.9023, "eval_rougeL": 0.9393, "eval_rougeLsum": 0.9392, "eval_runtime": 1314.1986, "eval_samples_per_second": 2.283, "eval_steps_per_second": 0.285, "step": 2700 }, { "epoch": 0.3, "learning_rate": 3.4072819644369177e-05, "loss": 0.0802, "step": 2800 }, { "epoch": 0.3, "eval_gen_len": 29.0317, "eval_loss": 0.04862497001886368, "eval_rouge1": 0.9499, "eval_rouge2": 0.9058, "eval_rougeL": 0.9427, "eval_rougeLsum": 0.9427, "eval_runtime": 1272.9233, "eval_samples_per_second": 2.357, "eval_steps_per_second": 0.295, "step": 2800 }, { "epoch": 0.31, "learning_rate": 3.3861134631668085e-05, "loss": 0.0682, "step": 2900 }, { "epoch": 0.31, "eval_gen_len": 29.1793, "eval_loss": 0.04769635945558548, "eval_rouge1": 0.9487, "eval_rouge2": 0.9048, "eval_rougeL": 0.9412, "eval_rougeLsum": 0.9411, "eval_runtime": 1278.3871, "eval_samples_per_second": 2.347, "eval_steps_per_second": 0.293, "step": 2900 }, { "epoch": 0.32, "learning_rate": 3.364944961896698e-05, "loss": 0.0708, "step": 3000 }, { "epoch": 0.32, "eval_gen_len": 29.5263, "eval_loss": 0.047115448862314224, "eval_rouge1": 0.9507, "eval_rouge2": 0.9085, "eval_rougeL": 0.9431, "eval_rougeLsum": 0.9431, "eval_runtime": 1291.228, "eval_samples_per_second": 2.323, "eval_steps_per_second": 0.29, "step": 3000 }, { "epoch": 0.33, "learning_rate": 3.343776460626588e-05, "loss": 0.0644, "step": 3100 }, { "epoch": 0.33, "eval_gen_len": 28.9983, "eval_loss": 0.0471009723842144, "eval_rouge1": 0.949, "eval_rouge2": 0.9066, "eval_rougeL": 0.9419, "eval_rougeLsum": 0.9419, "eval_runtime": 1231.9334, "eval_samples_per_second": 2.435, "eval_steps_per_second": 0.304, "step": 3100 }, { "epoch": 0.34, "learning_rate": 3.3226079593564775e-05, "loss": 0.0728, "step": 3200 }, { "epoch": 0.34, "eval_gen_len": 29.0363, "eval_loss": 0.045959603041410446, "eval_rouge1": 0.9501, "eval_rouge2": 0.9082, "eval_rougeL": 0.9434, "eval_rougeLsum": 0.9433, "eval_runtime": 1207.0552, "eval_samples_per_second": 2.485, "eval_steps_per_second": 0.311, "step": 3200 }, { "epoch": 0.35, "learning_rate": 3.3014394580863676e-05, "loss": 0.0853, "step": 3300 }, { "epoch": 0.35, "eval_gen_len": 29.644, "eval_loss": 0.04529280588030815, "eval_rouge1": 0.9525, "eval_rouge2": 0.9102, "eval_rougeL": 0.9441, "eval_rougeLsum": 0.9441, "eval_runtime": 1254.7579, "eval_samples_per_second": 2.391, "eval_steps_per_second": 0.299, "step": 3300 }, { "epoch": 0.36, "learning_rate": 3.280270956816258e-05, "loss": 0.0873, "step": 3400 }, { "epoch": 0.36, "eval_gen_len": 29.32, "eval_loss": 0.0445793978869915, "eval_rouge1": 0.9533, "eval_rouge2": 0.9119, "eval_rougeL": 0.9456, "eval_rougeLsum": 0.9455, "eval_runtime": 1224.3613, "eval_samples_per_second": 2.45, "eval_steps_per_second": 0.306, "step": 3400 }, { "epoch": 0.37, "learning_rate": 3.259102455546148e-05, "loss": 0.086, "step": 3500 }, { "epoch": 0.37, "eval_gen_len": 29.097, "eval_loss": 0.04455171898007393, "eval_rouge1": 0.9522, "eval_rouge2": 0.9112, "eval_rougeL": 0.9449, "eval_rougeLsum": 0.9448, "eval_runtime": 1171.6567, "eval_samples_per_second": 2.56, "eval_steps_per_second": 0.32, "step": 3500 }, { "epoch": 0.38, "learning_rate": 3.237933954276037e-05, "loss": 0.0753, "step": 3600 }, { "epoch": 0.38, "eval_gen_len": 29.339, "eval_loss": 0.04441582411527634, "eval_rouge1": 0.9548, "eval_rouge2": 0.9144, "eval_rougeL": 0.9469, "eval_rougeLsum": 0.9469, "eval_runtime": 1151.3782, "eval_samples_per_second": 2.606, "eval_steps_per_second": 0.326, "step": 3600 }, { "epoch": 0.39, "learning_rate": 3.2167654530059274e-05, "loss": 0.063, "step": 3700 }, { "epoch": 0.39, "eval_gen_len": 29.336, "eval_loss": 0.043806854635477066, "eval_rouge1": 0.9545, "eval_rouge2": 0.9137, "eval_rougeL": 0.9467, "eval_rougeLsum": 0.9466, "eval_runtime": 1180.9941, "eval_samples_per_second": 2.54, "eval_steps_per_second": 0.318, "step": 3700 }, { "epoch": 0.4, "learning_rate": 3.1955969517358175e-05, "loss": 0.071, "step": 3800 }, { "epoch": 0.4, "eval_gen_len": 29.4487, "eval_loss": 0.042950402945280075, "eval_rouge1": 0.9554, "eval_rouge2": 0.9147, "eval_rougeL": 0.9476, "eval_rougeLsum": 0.9475, "eval_runtime": 1170.8217, "eval_samples_per_second": 2.562, "eval_steps_per_second": 0.32, "step": 3800 }, { "epoch": 0.41, "learning_rate": 3.1744284504657076e-05, "loss": 0.0872, "step": 3900 }, { "epoch": 0.41, "eval_gen_len": 29.1883, "eval_loss": 0.0422741137444973, "eval_rouge1": 0.955, "eval_rouge2": 0.9156, "eval_rougeL": 0.9475, "eval_rougeLsum": 0.9474, "eval_runtime": 1137.7076, "eval_samples_per_second": 2.637, "eval_steps_per_second": 0.33, "step": 3900 }, { "epoch": 0.42, "learning_rate": 3.153259949195597e-05, "loss": 0.061, "step": 4000 }, { "epoch": 0.42, "eval_gen_len": 29.233, "eval_loss": 0.04252360388636589, "eval_rouge1": 0.9542, "eval_rouge2": 0.9138, "eval_rougeL": 0.9467, "eval_rougeLsum": 0.9466, "eval_runtime": 1123.062, "eval_samples_per_second": 2.671, "eval_steps_per_second": 0.334, "step": 4000 }, { "epoch": 0.43, "learning_rate": 3.132091447925487e-05, "loss": 0.0586, "step": 4100 }, { "epoch": 0.43, "eval_gen_len": 29.2253, "eval_loss": 0.042439624667167664, "eval_rouge1": 0.9545, "eval_rouge2": 0.9142, "eval_rougeL": 0.9467, "eval_rougeLsum": 0.9466, "eval_runtime": 1102.1056, "eval_samples_per_second": 2.722, "eval_steps_per_second": 0.34, "step": 4100 }, { "epoch": 0.44, "learning_rate": 3.1109229466553767e-05, "loss": 0.0753, "step": 4200 }, { "epoch": 0.44, "eval_gen_len": 29.6723, "eval_loss": 0.04101056233048439, "eval_rouge1": 0.9563, "eval_rouge2": 0.9169, "eval_rougeL": 0.9485, "eval_rougeLsum": 0.9483, "eval_runtime": 1138.1006, "eval_samples_per_second": 2.636, "eval_steps_per_second": 0.329, "step": 4200 } ], "logging_steps": 100, "max_steps": 18896, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 200, "total_flos": 1600799565938688.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }