{ "best_metric": null, "best_model_checkpoint": null, "epoch": 13.44, "global_step": 42000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.16, "learning_rate": 0.011603174731135368, "loss": 1.5357, "step": 500 }, { "epoch": 0.32, "learning_rate": 0.023230696097016335, "loss": 0.0593, "step": 1000 }, { "epoch": 0.48, "learning_rate": 0.034898791462183, "loss": 0.0531, "step": 1500 }, { "epoch": 0.64, "learning_rate": 0.04661125689744949, "loss": 0.044, "step": 2000 }, { "epoch": 0.8, "learning_rate": 0.05841406062245369, "loss": 0.0461, "step": 2500 }, { "epoch": 0.96, "learning_rate": 0.07035055011510849, "loss": 0.0433, "step": 3000 }, { "epoch": 1.0, "eval_avg_length": 17.2926, "eval_bleu": 0.0241, "eval_loss": 0.06959044933319092, "eval_rouge1": 0.5901, "eval_rouge2": 0.5715, "eval_runtime": 56.7394, "eval_samples_per_second": 88.122, "eval_steps_per_second": 2.767, "step": 3125 }, { "epoch": 1.12, "learning_rate": 0.08241234719753265, "loss": 0.0408, "step": 3500 }, { "epoch": 1.28, "learning_rate": 0.09467849135398865, "loss": 0.0406, "step": 4000 }, { "epoch": 1.44, "learning_rate": 0.10735532641410828, "loss": 0.0445, "step": 4500 }, { "epoch": 1.6, "learning_rate": 0.12028851360082626, "loss": 0.0406, "step": 5000 }, { "epoch": 1.76, "learning_rate": 0.13352635502815247, "loss": 0.0405, "step": 5500 }, { "epoch": 1.92, "learning_rate": 0.1470595896244049, "loss": 0.0412, "step": 6000 }, { "epoch": 2.0, "eval_avg_length": 17.3172, "eval_bleu": 0.0237, "eval_loss": 0.07180308550596237, "eval_rouge1": 0.5879, "eval_rouge2": 0.5692, "eval_runtime": 55.7642, "eval_samples_per_second": 89.663, "eval_steps_per_second": 2.815, "step": 6250 }, { "epoch": 2.08, "learning_rate": 0.16096152365207672, "loss": 0.0395, "step": 6500 }, { "epoch": 2.24, "learning_rate": 0.17507727444171906, "loss": 0.0407, "step": 7000 }, { "epoch": 2.4, "learning_rate": 0.18983061611652374, "loss": 0.0393, "step": 7500 }, { "epoch": 2.56, "learning_rate": 0.2049846649169922, "loss": 0.0401, "step": 8000 }, { "epoch": 2.72, "learning_rate": 0.22153085470199585, "loss": 0.0414, "step": 8500 }, { "epoch": 2.88, "learning_rate": 0.23806197941303253, "loss": 0.0425, "step": 9000 }, { "epoch": 3.0, "eval_avg_length": 17.3112, "eval_bleu": 0.0237, "eval_loss": 0.07267959415912628, "eval_rouge1": 0.5871, "eval_rouge2": 0.5681, "eval_runtime": 54.2458, "eval_samples_per_second": 92.173, "eval_steps_per_second": 2.894, "step": 9375 }, { "epoch": 3.04, "learning_rate": 0.2547408640384674, "loss": 0.0393, "step": 9500 }, { "epoch": 3.2, "learning_rate": 0.27236127853393555, "loss": 0.0377, "step": 10000 }, { "epoch": 3.36, "learning_rate": 0.26978379487991333, "loss": 0.0372, "step": 10500 }, { "epoch": 3.52, "learning_rate": 0.26798078417778015, "loss": 0.0383, "step": 11000 }, { "epoch": 3.68, "learning_rate": 0.2658974826335907, "loss": 0.0391, "step": 11500 }, { "epoch": 3.84, "learning_rate": 0.2626633942127228, "loss": 0.0392, "step": 12000 }, { "epoch": 4.0, "learning_rate": 0.26069551706314087, "loss": 0.0381, "step": 12500 }, { "epoch": 4.0, "eval_avg_length": 17.321, "eval_bleu": 0.0238, "eval_loss": 0.07149858772754669, "eval_rouge1": 0.5866, "eval_rouge2": 0.5677, "eval_runtime": 52.6009, "eval_samples_per_second": 95.055, "eval_steps_per_second": 2.985, "step": 12500 }, { "epoch": 4.16, "learning_rate": 0.25828537344932556, "loss": 0.0341, "step": 13000 }, { "epoch": 4.32, "learning_rate": 0.2559824585914612, "loss": 0.0323, "step": 13500 }, { "epoch": 4.48, "learning_rate": 0.2536338269710541, "loss": 0.0316, "step": 14000 }, { "epoch": 4.64, "learning_rate": 0.2517462372779846, "loss": 0.0314, "step": 14500 }, { "epoch": 4.8, "learning_rate": 0.25038695335388184, "loss": 0.0317, "step": 15000 }, { "epoch": 4.96, "learning_rate": 0.24828433990478516, "loss": 0.0334, "step": 15500 }, { "epoch": 5.0, "eval_avg_length": 17.3102, "eval_bleu": 0.0237, "eval_loss": 0.07320648431777954, "eval_rouge1": 0.5873, "eval_rouge2": 0.5682, "eval_runtime": 52.0328, "eval_samples_per_second": 96.093, "eval_steps_per_second": 3.017, "step": 15625 }, { "epoch": 5.12, "learning_rate": 0.24564550817012787, "loss": 0.028, "step": 16000 }, { "epoch": 5.28, "learning_rate": 0.24398992955684662, "loss": 0.0279, "step": 16500 }, { "epoch": 5.44, "learning_rate": 0.24214749038219452, "loss": 0.0277, "step": 17000 }, { "epoch": 5.6, "learning_rate": 0.24071773886680603, "loss": 0.0302, "step": 17500 }, { "epoch": 5.76, "learning_rate": 0.23937062919139862, "loss": 0.0268, "step": 18000 }, { "epoch": 5.92, "learning_rate": 0.23846393823623657, "loss": 0.0293, "step": 18500 }, { "epoch": 6.0, "eval_avg_length": 17.306, "eval_bleu": 0.0237, "eval_loss": 0.07230091094970703, "eval_rouge1": 0.5874, "eval_rouge2": 0.5673, "eval_runtime": 51.9475, "eval_samples_per_second": 96.251, "eval_steps_per_second": 3.022, "step": 18750 }, { "epoch": 6.08, "learning_rate": 0.23700089752674103, "loss": 0.0268, "step": 19000 }, { "epoch": 6.24, "learning_rate": 0.23510468006134033, "loss": 0.025, "step": 19500 }, { "epoch": 6.4, "learning_rate": 0.23372133076190948, "loss": 0.0234, "step": 20000 }, { "epoch": 6.56, "learning_rate": 0.23236095905303955, "loss": 0.0242, "step": 20500 }, { "epoch": 6.72, "learning_rate": 0.23128560185432434, "loss": 0.0243, "step": 21000 }, { "epoch": 6.88, "learning_rate": 0.23010540008544922, "loss": 0.0251, "step": 21500 }, { "epoch": 7.0, "eval_avg_length": 17.3118, "eval_bleu": 0.0237, "eval_loss": 0.07389520108699799, "eval_rouge1": 0.5872, "eval_rouge2": 0.5672, "eval_runtime": 52.1854, "eval_samples_per_second": 95.812, "eval_steps_per_second": 3.009, "step": 21875 }, { "epoch": 7.04, "learning_rate": 0.22910726070404053, "loss": 0.0244, "step": 22000 }, { "epoch": 7.2, "learning_rate": 0.22765885293483734, "loss": 0.0194, "step": 22500 }, { "epoch": 7.36, "learning_rate": 0.22604411840438843, "loss": 0.0211, "step": 23000 }, { "epoch": 7.52, "learning_rate": 0.22470736503601074, "loss": 0.0205, "step": 23500 }, { "epoch": 7.68, "learning_rate": 0.22348329424858093, "loss": 0.0223, "step": 24000 }, { "epoch": 7.84, "learning_rate": 0.22219397127628326, "loss": 0.0226, "step": 24500 }, { "epoch": 8.0, "learning_rate": 0.2213691771030426, "loss": 0.0222, "step": 25000 }, { "epoch": 8.0, "eval_avg_length": 17.307, "eval_bleu": 0.0237, "eval_loss": 0.07857740670442581, "eval_rouge1": 0.5869, "eval_rouge2": 0.5671, "eval_runtime": 52.6845, "eval_samples_per_second": 94.905, "eval_steps_per_second": 2.98, "step": 25000 }, { "epoch": 8.16, "learning_rate": 0.22002732753753662, "loss": 0.0166, "step": 25500 }, { "epoch": 8.32, "learning_rate": 0.21867167949676514, "loss": 0.018, "step": 26000 }, { "epoch": 8.48, "learning_rate": 0.21748971939086914, "loss": 0.0193, "step": 26500 }, { "epoch": 8.64, "learning_rate": 0.2165619432926178, "loss": 0.0194, "step": 27000 }, { "epoch": 8.8, "learning_rate": 0.2155638188123703, "loss": 0.019, "step": 27500 }, { "epoch": 8.96, "learning_rate": 0.2147785723209381, "loss": 0.0183, "step": 28000 }, { "epoch": 9.0, "eval_avg_length": 17.3154, "eval_bleu": 0.0237, "eval_loss": 0.07949095219373703, "eval_rouge1": 0.5873, "eval_rouge2": 0.5677, "eval_runtime": 52.9652, "eval_samples_per_second": 94.402, "eval_steps_per_second": 2.964, "step": 28125 }, { "epoch": 9.12, "learning_rate": 0.21370729804039001, "loss": 0.0163, "step": 28500 }, { "epoch": 9.28, "learning_rate": 0.2124933898448944, "loss": 0.0155, "step": 29000 }, { "epoch": 9.44, "learning_rate": 0.21130582690238953, "loss": 0.0152, "step": 29500 }, { "epoch": 9.6, "learning_rate": 0.21038685739040375, "loss": 0.0158, "step": 30000 }, { "epoch": 9.76, "learning_rate": 0.20950140058994293, "loss": 0.0165, "step": 30500 }, { "epoch": 9.92, "learning_rate": 0.20879273116588593, "loss": 0.0183, "step": 31000 }, { "epoch": 10.0, "eval_avg_length": 17.308, "eval_bleu": 0.0236, "eval_loss": 0.08381666988134384, "eval_rouge1": 0.5869, "eval_rouge2": 0.5668, "eval_runtime": 53.7004, "eval_samples_per_second": 93.109, "eval_steps_per_second": 2.924, "step": 31250 }, { "epoch": 10.08, "learning_rate": 0.20795859396457672, "loss": 0.0153, "step": 31500 }, { "epoch": 10.24, "learning_rate": 0.20694369077682495, "loss": 0.0132, "step": 32000 }, { "epoch": 10.4, "learning_rate": 0.20593030750751495, "loss": 0.0145, "step": 32500 }, { "epoch": 10.56, "learning_rate": 0.20512063801288605, "loss": 0.0142, "step": 33000 }, { "epoch": 10.72, "learning_rate": 0.204327791929245, "loss": 0.0136, "step": 33500 }, { "epoch": 10.88, "learning_rate": 0.2036626935005188, "loss": 0.0147, "step": 34000 }, { "epoch": 11.0, "eval_avg_length": 17.3118, "eval_bleu": 0.0236, "eval_loss": 0.08516956865787506, "eval_rouge1": 0.5867, "eval_rouge2": 0.567, "eval_runtime": 53.2317, "eval_samples_per_second": 93.929, "eval_steps_per_second": 2.949, "step": 34375 }, { "epoch": 11.04, "learning_rate": 0.20300111174583435, "loss": 0.0145, "step": 34500 }, { "epoch": 11.2, "learning_rate": 0.20208755135536194, "loss": 0.0112, "step": 35000 }, { "epoch": 11.36, "learning_rate": 0.20132538676261902, "loss": 0.0123, "step": 35500 }, { "epoch": 11.52, "learning_rate": 0.2005048245191574, "loss": 0.012, "step": 36000 }, { "epoch": 11.68, "learning_rate": 0.1997680366039276, "loss": 0.013, "step": 36500 }, { "epoch": 11.84, "learning_rate": 0.19915533065795898, "loss": 0.0131, "step": 37000 }, { "epoch": 12.0, "learning_rate": 0.19845974445343018, "loss": 0.0132, "step": 37500 }, { "epoch": 12.0, "eval_avg_length": 17.3078, "eval_bleu": 0.0236, "eval_loss": 0.08685878664255142, "eval_rouge1": 0.5872, "eval_rouge2": 0.5671, "eval_runtime": 53.7846, "eval_samples_per_second": 92.963, "eval_steps_per_second": 2.919, "step": 37500 }, { "epoch": 12.16, "learning_rate": 0.19766050577163696, "loss": 0.0098, "step": 38000 }, { "epoch": 12.32, "learning_rate": 0.19691884517669678, "loss": 0.0105, "step": 38500 }, { "epoch": 12.48, "learning_rate": 0.19617730379104614, "loss": 0.0101, "step": 39000 }, { "epoch": 12.64, "learning_rate": 0.19556531310081482, "loss": 0.0118, "step": 39500 }, { "epoch": 12.8, "learning_rate": 0.1950163096189499, "loss": 0.0112, "step": 40000 }, { "epoch": 12.96, "learning_rate": 0.19457842409610748, "loss": 0.0128, "step": 40500 }, { "epoch": 13.0, "eval_avg_length": 17.313, "eval_bleu": 0.0236, "eval_loss": 0.09262268990278244, "eval_rouge1": 0.5859, "eval_rouge2": 0.5654, "eval_runtime": 54.0715, "eval_samples_per_second": 92.47, "eval_steps_per_second": 2.904, "step": 40625 }, { "epoch": 13.12, "learning_rate": 0.19387850165367126, "loss": 0.0086, "step": 41000 }, { "epoch": 13.28, "learning_rate": 0.19318543374538422, "loss": 0.0087, "step": 41500 }, { "epoch": 13.44, "learning_rate": 0.19256973266601562, "loss": 0.0095, "step": 42000 } ], "max_steps": 312500, "num_train_epochs": 100, "total_flos": 9.0949690589184e+16, "trial_name": null, "trial_params": null }