{ "best_metric": 42.8213, "best_model_checkpoint": "facebook/bart-base-finetuned/checkpoint-60000", "epoch": 7.8237058286608425, "eval_steps": 5000, "global_step": 60000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "learning_rate": 3.967401225713913e-05, "loss": 1.5862, "step": 500 }, { "epoch": 0.13, "learning_rate": 3.9348024514278265e-05, "loss": 1.0258, "step": 1000 }, { "epoch": 0.2, "learning_rate": 3.90220367714174e-05, "loss": 0.8928, "step": 1500 }, { "epoch": 0.26, "learning_rate": 3.869604902855653e-05, "loss": 0.8012, "step": 2000 }, { "epoch": 0.33, "learning_rate": 3.837006128569566e-05, "loss": 0.7591, "step": 2500 }, { "epoch": 0.39, "learning_rate": 3.804407354283479e-05, "loss": 0.7188, "step": 3000 }, { "epoch": 0.46, "learning_rate": 3.7718085799973924e-05, "loss": 0.6802, "step": 3500 }, { "epoch": 0.52, "learning_rate": 3.739209805711306e-05, "loss": 0.6529, "step": 4000 }, { "epoch": 0.59, "learning_rate": 3.7066110314252186e-05, "loss": 0.6373, "step": 4500 }, { "epoch": 0.65, "learning_rate": 3.674012257139132e-05, "loss": 0.6121, "step": 5000 }, { "epoch": 0.65, "eval_bleu": 37.8898, "eval_gen_len": 18.0188, "eval_loss": 0.5747564435005188, "eval_runtime": 121.8837, "eval_samples_per_second": 41.105, "eval_steps_per_second": 0.648, "step": 5000 }, { "epoch": 0.72, "learning_rate": 3.641413482853045e-05, "loss": 0.5929, "step": 5500 }, { "epoch": 0.78, "learning_rate": 3.608814708566958e-05, "loss": 0.5763, "step": 6000 }, { "epoch": 0.85, "learning_rate": 3.576215934280872e-05, "loss": 0.5636, "step": 6500 }, { "epoch": 0.91, "learning_rate": 3.5436171599947845e-05, "loss": 0.5576, "step": 7000 }, { "epoch": 0.98, "learning_rate": 3.511018385708698e-05, "loss": 0.5442, "step": 7500 }, { "epoch": 1.04, "learning_rate": 3.478419611422611e-05, "loss": 0.5211, "step": 8000 }, { "epoch": 1.11, "learning_rate": 3.445820837136524e-05, "loss": 0.508, "step": 8500 }, { "epoch": 1.17, "learning_rate": 3.413222062850437e-05, "loss": 0.5007, "step": 9000 }, { "epoch": 1.24, "learning_rate": 3.38062328856435e-05, "loss": 0.5062, "step": 9500 }, { "epoch": 1.3, "learning_rate": 3.348024514278264e-05, "loss": 0.4967, "step": 10000 }, { "epoch": 1.3, "eval_bleu": 39.3429, "eval_gen_len": 18.0347, "eval_loss": 0.4752778112888336, "eval_runtime": 118.4418, "eval_samples_per_second": 42.299, "eval_steps_per_second": 0.667, "step": 10000 }, { "epoch": 1.37, "learning_rate": 3.3154257399921765e-05, "loss": 0.4876, "step": 10500 }, { "epoch": 1.43, "learning_rate": 3.282826965706089e-05, "loss": 0.4762, "step": 11000 }, { "epoch": 1.5, "learning_rate": 3.250228191420003e-05, "loss": 0.4716, "step": 11500 }, { "epoch": 1.56, "learning_rate": 3.217629417133916e-05, "loss": 0.4632, "step": 12000 }, { "epoch": 1.63, "learning_rate": 3.185030642847829e-05, "loss": 0.4588, "step": 12500 }, { "epoch": 1.7, "learning_rate": 3.1524318685617424e-05, "loss": 0.4605, "step": 13000 }, { "epoch": 1.76, "learning_rate": 3.119833094275655e-05, "loss": 0.457, "step": 13500 }, { "epoch": 1.83, "learning_rate": 3.0872343199895686e-05, "loss": 0.4573, "step": 14000 }, { "epoch": 1.89, "learning_rate": 3.054635545703482e-05, "loss": 0.4527, "step": 14500 }, { "epoch": 1.96, "learning_rate": 3.0220367714173952e-05, "loss": 0.4437, "step": 15000 }, { "epoch": 1.96, "eval_bleu": 40.7435, "eval_gen_len": 18.2399, "eval_loss": 0.4312632977962494, "eval_runtime": 118.0141, "eval_samples_per_second": 42.453, "eval_steps_per_second": 0.669, "step": 15000 }, { "epoch": 2.02, "learning_rate": 2.9894379971313083e-05, "loss": 0.442, "step": 15500 }, { "epoch": 2.09, "learning_rate": 2.956839222845221e-05, "loss": 0.4233, "step": 16000 }, { "epoch": 2.15, "learning_rate": 2.924240448559134e-05, "loss": 0.4186, "step": 16500 }, { "epoch": 2.22, "learning_rate": 2.891641674273048e-05, "loss": 0.4156, "step": 17000 }, { "epoch": 2.28, "learning_rate": 2.8590428999869607e-05, "loss": 0.4211, "step": 17500 }, { "epoch": 2.35, "learning_rate": 2.8264441257008738e-05, "loss": 0.4177, "step": 18000 }, { "epoch": 2.41, "learning_rate": 2.793845351414787e-05, "loss": 0.4088, "step": 18500 }, { "epoch": 2.48, "learning_rate": 2.7612465771287e-05, "loss": 0.4082, "step": 19000 }, { "epoch": 2.54, "learning_rate": 2.7286478028426135e-05, "loss": 0.4094, "step": 19500 }, { "epoch": 2.61, "learning_rate": 2.6960490285565266e-05, "loss": 0.4064, "step": 20000 }, { "epoch": 2.61, "eval_bleu": 41.105, "eval_gen_len": 18.2493, "eval_loss": 0.4060722589492798, "eval_runtime": 118.819, "eval_samples_per_second": 42.165, "eval_steps_per_second": 0.665, "step": 20000 }, { "epoch": 2.67, "learning_rate": 2.6634502542704397e-05, "loss": 0.4069, "step": 20500 }, { "epoch": 2.74, "learning_rate": 2.6308514799843528e-05, "loss": 0.4023, "step": 21000 }, { "epoch": 2.8, "learning_rate": 2.598252705698266e-05, "loss": 0.3994, "step": 21500 }, { "epoch": 2.87, "learning_rate": 2.5656539314121793e-05, "loss": 0.4005, "step": 22000 }, { "epoch": 2.93, "learning_rate": 2.5330551571260924e-05, "loss": 0.4006, "step": 22500 }, { "epoch": 3.0, "learning_rate": 2.5004563828400055e-05, "loss": 0.4013, "step": 23000 }, { "epoch": 3.06, "learning_rate": 2.4678576085539186e-05, "loss": 0.3765, "step": 23500 }, { "epoch": 3.13, "learning_rate": 2.4352588342678317e-05, "loss": 0.3796, "step": 24000 }, { "epoch": 3.19, "learning_rate": 2.402660059981745e-05, "loss": 0.3726, "step": 24500 }, { "epoch": 3.26, "learning_rate": 2.3700612856956583e-05, "loss": 0.3795, "step": 25000 }, { "epoch": 3.26, "eval_bleu": 42.0027, "eval_gen_len": 18.2441, "eval_loss": 0.3876380920410156, "eval_runtime": 120.3412, "eval_samples_per_second": 41.632, "eval_steps_per_second": 0.656, "step": 25000 }, { "epoch": 3.33, "learning_rate": 2.3374625114095714e-05, "loss": 0.3766, "step": 25500 }, { "epoch": 3.39, "learning_rate": 2.304863737123484e-05, "loss": 0.38, "step": 26000 }, { "epoch": 3.46, "learning_rate": 2.2722649628373973e-05, "loss": 0.3732, "step": 26500 }, { "epoch": 3.52, "learning_rate": 2.2396661885513104e-05, "loss": 0.3719, "step": 27000 }, { "epoch": 3.59, "learning_rate": 2.2070674142652238e-05, "loss": 0.3693, "step": 27500 }, { "epoch": 3.65, "learning_rate": 2.174468639979137e-05, "loss": 0.3758, "step": 28000 }, { "epoch": 3.72, "learning_rate": 2.14186986569305e-05, "loss": 0.3599, "step": 28500 }, { "epoch": 3.78, "learning_rate": 2.109271091406963e-05, "loss": 0.3673, "step": 29000 }, { "epoch": 3.85, "learning_rate": 2.0766723171208762e-05, "loss": 0.369, "step": 29500 }, { "epoch": 3.91, "learning_rate": 2.0440735428347897e-05, "loss": 0.3728, "step": 30000 }, { "epoch": 3.91, "eval_bleu": 42.191, "eval_gen_len": 18.2934, "eval_loss": 0.3774366080760956, "eval_runtime": 118.342, "eval_samples_per_second": 42.335, "eval_steps_per_second": 0.668, "step": 30000 }, { "epoch": 3.98, "learning_rate": 2.0114747685487028e-05, "loss": 0.3673, "step": 30500 }, { "epoch": 4.04, "learning_rate": 1.978875994262616e-05, "loss": 0.3562, "step": 31000 }, { "epoch": 4.11, "learning_rate": 1.946277219976529e-05, "loss": 0.3518, "step": 31500 }, { "epoch": 4.17, "learning_rate": 1.913678445690442e-05, "loss": 0.3484, "step": 32000 }, { "epoch": 4.24, "learning_rate": 1.8810796714043552e-05, "loss": 0.3489, "step": 32500 }, { "epoch": 4.3, "learning_rate": 1.8484808971182686e-05, "loss": 0.3529, "step": 33000 }, { "epoch": 4.37, "learning_rate": 1.8158821228321817e-05, "loss": 0.347, "step": 33500 }, { "epoch": 4.43, "learning_rate": 1.783283348546095e-05, "loss": 0.3442, "step": 34000 }, { "epoch": 4.5, "learning_rate": 1.750684574260008e-05, "loss": 0.3498, "step": 34500 }, { "epoch": 4.56, "learning_rate": 1.718085799973921e-05, "loss": 0.3464, "step": 35000 }, { "epoch": 4.56, "eval_bleu": 42.1307, "eval_gen_len": 18.2251, "eval_loss": 0.3665723502635956, "eval_runtime": 117.0033, "eval_samples_per_second": 42.819, "eval_steps_per_second": 0.675, "step": 35000 }, { "epoch": 4.63, "learning_rate": 1.685487025687834e-05, "loss": 0.3484, "step": 35500 }, { "epoch": 4.69, "learning_rate": 1.6528882514017476e-05, "loss": 0.3432, "step": 36000 }, { "epoch": 4.76, "learning_rate": 1.6202894771156604e-05, "loss": 0.3477, "step": 36500 }, { "epoch": 4.82, "learning_rate": 1.5876907028295738e-05, "loss": 0.3476, "step": 37000 }, { "epoch": 4.89, "learning_rate": 1.555091928543487e-05, "loss": 0.3471, "step": 37500 }, { "epoch": 4.96, "learning_rate": 1.5224931542574e-05, "loss": 0.3462, "step": 38000 }, { "epoch": 5.02, "learning_rate": 1.4898943799713133e-05, "loss": 0.3407, "step": 38500 }, { "epoch": 5.09, "learning_rate": 1.4572956056852262e-05, "loss": 0.3312, "step": 39000 }, { "epoch": 5.15, "learning_rate": 1.4246968313991397e-05, "loss": 0.3297, "step": 39500 }, { "epoch": 5.22, "learning_rate": 1.3920980571130526e-05, "loss": 0.3321, "step": 40000 }, { "epoch": 5.22, "eval_bleu": 42.2362, "eval_gen_len": 18.1764, "eval_loss": 0.36170148849487305, "eval_runtime": 117.3524, "eval_samples_per_second": 42.692, "eval_steps_per_second": 0.673, "step": 40000 }, { "epoch": 5.28, "learning_rate": 1.3594992828269657e-05, "loss": 0.33, "step": 40500 }, { "epoch": 5.35, "learning_rate": 1.326900508540879e-05, "loss": 0.3308, "step": 41000 }, { "epoch": 5.41, "learning_rate": 1.2943017342547921e-05, "loss": 0.3305, "step": 41500 }, { "epoch": 5.48, "learning_rate": 1.2617029599687052e-05, "loss": 0.3259, "step": 42000 }, { "epoch": 5.54, "learning_rate": 1.2291041856826185e-05, "loss": 0.3337, "step": 42500 }, { "epoch": 5.61, "learning_rate": 1.1965054113965316e-05, "loss": 0.3334, "step": 43000 }, { "epoch": 5.67, "learning_rate": 1.1639066371104449e-05, "loss": 0.329, "step": 43500 }, { "epoch": 5.74, "learning_rate": 1.131307862824358e-05, "loss": 0.3306, "step": 44000 }, { "epoch": 5.8, "learning_rate": 1.0987090885382709e-05, "loss": 0.33, "step": 44500 }, { "epoch": 5.87, "learning_rate": 1.0661103142521842e-05, "loss": 0.3264, "step": 45000 }, { "epoch": 5.87, "eval_bleu": 42.4323, "eval_gen_len": 18.2745, "eval_loss": 0.3556722104549408, "eval_runtime": 116.9876, "eval_samples_per_second": 42.825, "eval_steps_per_second": 0.675, "step": 45000 }, { "epoch": 5.93, "learning_rate": 1.0335115399660973e-05, "loss": 0.3275, "step": 45500 }, { "epoch": 6.0, "learning_rate": 1.0009127656800106e-05, "loss": 0.3225, "step": 46000 }, { "epoch": 6.06, "learning_rate": 9.683139913939237e-06, "loss": 0.3143, "step": 46500 }, { "epoch": 6.13, "learning_rate": 9.35715217107837e-06, "loss": 0.3169, "step": 47000 }, { "epoch": 6.19, "learning_rate": 9.031164428217499e-06, "loss": 0.3134, "step": 47500 }, { "epoch": 6.26, "learning_rate": 8.705176685356631e-06, "loss": 0.3218, "step": 48000 }, { "epoch": 6.32, "learning_rate": 8.379188942495763e-06, "loss": 0.3169, "step": 48500 }, { "epoch": 6.39, "learning_rate": 8.053201199634895e-06, "loss": 0.3163, "step": 49000 }, { "epoch": 6.45, "learning_rate": 7.727213456774026e-06, "loss": 0.3199, "step": 49500 }, { "epoch": 6.52, "learning_rate": 7.401225713913157e-06, "loss": 0.321, "step": 50000 }, { "epoch": 6.52, "eval_bleu": 42.797, "eval_gen_len": 18.177, "eval_loss": 0.3521122634410858, "eval_runtime": 118.7785, "eval_samples_per_second": 42.179, "eval_steps_per_second": 0.665, "step": 50000 }, { "epoch": 6.58, "learning_rate": 7.075237971052289e-06, "loss": 0.3166, "step": 50500 }, { "epoch": 6.65, "learning_rate": 6.74925022819142e-06, "loss": 0.3137, "step": 51000 }, { "epoch": 6.72, "learning_rate": 6.423262485330552e-06, "loss": 0.314, "step": 51500 }, { "epoch": 6.78, "learning_rate": 6.097274742469683e-06, "loss": 0.3163, "step": 52000 }, { "epoch": 6.85, "learning_rate": 5.771286999608815e-06, "loss": 0.3177, "step": 52500 }, { "epoch": 6.91, "learning_rate": 5.445299256747947e-06, "loss": 0.3153, "step": 53000 }, { "epoch": 6.98, "learning_rate": 5.119311513887079e-06, "loss": 0.3153, "step": 53500 }, { "epoch": 7.04, "learning_rate": 4.79332377102621e-06, "loss": 0.3095, "step": 54000 }, { "epoch": 7.11, "learning_rate": 4.467336028165342e-06, "loss": 0.3073, "step": 54500 }, { "epoch": 7.17, "learning_rate": 4.141348285304473e-06, "loss": 0.3063, "step": 55000 }, { "epoch": 7.17, "eval_bleu": 42.6295, "eval_gen_len": 18.2265, "eval_loss": 0.3495286703109741, "eval_runtime": 120.504, "eval_samples_per_second": 41.575, "eval_steps_per_second": 0.656, "step": 55000 }, { "epoch": 7.24, "learning_rate": 3.815360542443605e-06, "loss": 0.3107, "step": 55500 }, { "epoch": 7.3, "learning_rate": 3.489372799582736e-06, "loss": 0.3138, "step": 56000 }, { "epoch": 7.37, "learning_rate": 3.163385056721868e-06, "loss": 0.3092, "step": 56500 }, { "epoch": 7.43, "learning_rate": 2.837397313860999e-06, "loss": 0.3097, "step": 57000 }, { "epoch": 7.5, "learning_rate": 2.5114095710001308e-06, "loss": 0.3057, "step": 57500 }, { "epoch": 7.56, "learning_rate": 2.1854218281392622e-06, "loss": 0.3045, "step": 58000 }, { "epoch": 7.63, "learning_rate": 1.8594340852783937e-06, "loss": 0.3088, "step": 58500 }, { "epoch": 7.69, "learning_rate": 1.5334463424175252e-06, "loss": 0.306, "step": 59000 }, { "epoch": 7.76, "learning_rate": 1.2074585995566569e-06, "loss": 0.3118, "step": 59500 }, { "epoch": 7.82, "learning_rate": 8.814708566957883e-07, "loss": 0.3043, "step": 60000 }, { "epoch": 7.82, "eval_bleu": 42.8213, "eval_gen_len": 18.2248, "eval_loss": 0.34873583912849426, "eval_runtime": 121.0894, "eval_samples_per_second": 41.374, "eval_steps_per_second": 0.652, "step": 60000 } ], "logging_steps": 500, "max_steps": 61352, "num_input_tokens_seen": 0, "num_train_epochs": 8, "save_steps": 5000, "total_flos": 3.672168663840768e+16, "train_batch_size": 64, "trial_name": null, "trial_params": null }