| { | |
| "best_metric": 2.025848865509033, | |
| "best_model_checkpoint": "./checkpoints/barthez-deft-sciences_de_l_information/checkpoint-424", | |
| "epoch": 20.0, | |
| "global_step": 2120, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 2.869811320754717e-05, | |
| "loss": 3.3405, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_gen_len": 14.9714, | |
| "eval_loss": 2.368244171142578, | |
| "eval_rouge1": 31.3511, | |
| "eval_rouge2": 12.1973, | |
| "eval_rougeL": 25.6977, | |
| "eval_rougeLsum": 25.6851, | |
| "eval_runtime": 3.4096, | |
| "eval_samples_per_second": 20.53, | |
| "eval_steps_per_second": 5.279, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 2.728301886792453e-05, | |
| "loss": 2.4219, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_gen_len": 14.0429, | |
| "eval_loss": 2.1890599727630615, | |
| "eval_rouge1": 30.1154, | |
| "eval_rouge2": 13.3459, | |
| "eval_rougeL": 25.4854, | |
| "eval_rougeLsum": 25.5403, | |
| "eval_runtime": 3.216, | |
| "eval_samples_per_second": 21.766, | |
| "eval_steps_per_second": 5.597, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 2.586792452830189e-05, | |
| "loss": 2.0789, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_gen_len": 15.2, | |
| "eval_loss": 2.0993804931640625, | |
| "eval_rouge1": 32.153, | |
| "eval_rouge2": 15.3865, | |
| "eval_rougeL": 26.1859, | |
| "eval_rougeLsum": 26.1672, | |
| "eval_runtime": 3.3607, | |
| "eval_samples_per_second": 20.829, | |
| "eval_steps_per_second": 5.356, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 3.77, | |
| "learning_rate": 2.4452830188679248e-05, | |
| "loss": 1.869, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_gen_len": 16.9857, | |
| "eval_loss": 2.025848865509033, | |
| "eval_rouge1": 34.5797, | |
| "eval_rouge2": 16.4194, | |
| "eval_rougeL": 27.6909, | |
| "eval_rougeLsum": 27.7201, | |
| "eval_runtime": 3.3358, | |
| "eval_samples_per_second": 20.985, | |
| "eval_steps_per_second": 5.396, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "learning_rate": 2.3037735849056604e-05, | |
| "loss": 1.6569, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_gen_len": 15.2429, | |
| "eval_loss": 2.0417497158050537, | |
| "eval_rouge1": 34.3854, | |
| "eval_rouge2": 16.5237, | |
| "eval_rougeL": 28.7036, | |
| "eval_rougeLsum": 28.8258, | |
| "eval_runtime": 3.3091, | |
| "eval_samples_per_second": 21.154, | |
| "eval_steps_per_second": 5.44, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 5.66, | |
| "learning_rate": 2.162264150943396e-05, | |
| "loss": 1.5414, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_gen_len": 16.0143, | |
| "eval_loss": 2.050337791442871, | |
| "eval_rouge1": 33.1768, | |
| "eval_rouge2": 15.4851, | |
| "eval_rougeL": 27.2818, | |
| "eval_rougeLsum": 27.2884, | |
| "eval_runtime": 3.4107, | |
| "eval_samples_per_second": 20.524, | |
| "eval_steps_per_second": 5.278, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 6.6, | |
| "learning_rate": 2.020754716981132e-05, | |
| "loss": 1.4461, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_gen_len": 16.6857, | |
| "eval_loss": 2.029313802719116, | |
| "eval_rouge1": 35.4273, | |
| "eval_rouge2": 16.118, | |
| "eval_rougeL": 27.3622, | |
| "eval_rougeLsum": 27.393, | |
| "eval_runtime": 3.3635, | |
| "eval_samples_per_second": 20.812, | |
| "eval_steps_per_second": 5.352, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 7.55, | |
| "learning_rate": 1.879245283018868e-05, | |
| "loss": 1.3435, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_gen_len": 17.2, | |
| "eval_loss": 2.033561944961548, | |
| "eval_rouge1": 35.3471, | |
| "eval_rouge2": 15.9695, | |
| "eval_rougeL": 27.668, | |
| "eval_rougeLsum": 27.6749, | |
| "eval_runtime": 3.4311, | |
| "eval_samples_per_second": 20.402, | |
| "eval_steps_per_second": 5.246, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 8.49, | |
| "learning_rate": 1.737735849056604e-05, | |
| "loss": 1.2624, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_gen_len": 17.1857, | |
| "eval_loss": 2.077875852584839, | |
| "eval_rouge1": 35.9201, | |
| "eval_rouge2": 17.2547, | |
| "eval_rougeL": 27.409, | |
| "eval_rougeLsum": 27.3293, | |
| "eval_runtime": 3.4, | |
| "eval_samples_per_second": 20.588, | |
| "eval_steps_per_second": 5.294, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 9.43, | |
| "learning_rate": 1.5962264150943395e-05, | |
| "loss": 1.1807, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_gen_len": 17.1286, | |
| "eval_loss": 2.130077600479126, | |
| "eval_rouge1": 35.7061, | |
| "eval_rouge2": 15.9138, | |
| "eval_rougeL": 27.3968, | |
| "eval_rougeLsum": 27.4716, | |
| "eval_runtime": 3.3605, | |
| "eval_samples_per_second": 20.83, | |
| "eval_steps_per_second": 5.356, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 10.38, | |
| "learning_rate": 1.4547169811320755e-05, | |
| "loss": 1.0972, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_gen_len": 17.1429, | |
| "eval_loss": 2.1725852489471436, | |
| "eval_rouge1": 34.3194, | |
| "eval_rouge2": 16.1313, | |
| "eval_rougeL": 27.0367, | |
| "eval_rougeLsum": 27.0737, | |
| "eval_runtime": 3.3905, | |
| "eval_samples_per_second": 20.646, | |
| "eval_steps_per_second": 5.309, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 11.32, | |
| "learning_rate": 1.3132075471698114e-05, | |
| "loss": 1.0224, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_gen_len": 16.6571, | |
| "eval_loss": 2.1703763008117676, | |
| "eval_rouge1": 34.9278, | |
| "eval_rouge2": 16.7958, | |
| "eval_rougeL": 27.8754, | |
| "eval_rougeLsum": 27.932, | |
| "eval_runtime": 3.3798, | |
| "eval_samples_per_second": 20.711, | |
| "eval_steps_per_second": 5.326, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 12.26, | |
| "learning_rate": 1.1716981132075474e-05, | |
| "loss": 1.0181, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_gen_len": 16.7571, | |
| "eval_loss": 2.245802879333496, | |
| "eval_rouge1": 34.472, | |
| "eval_rouge2": 15.9111, | |
| "eval_rougeL": 28.2938, | |
| "eval_rougeLsum": 28.2946, | |
| "eval_runtime": 3.367, | |
| "eval_samples_per_second": 20.79, | |
| "eval_steps_per_second": 5.346, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 13.21, | |
| "learning_rate": 1.030188679245283e-05, | |
| "loss": 0.9769, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_gen_len": 16.5429, | |
| "eval_loss": 2.3404934406280518, | |
| "eval_rouge1": 35.1592, | |
| "eval_rouge2": 16.3135, | |
| "eval_rougeL": 29.0956, | |
| "eval_rougeLsum": 29.0858, | |
| "eval_runtime": 3.3904, | |
| "eval_samples_per_second": 20.647, | |
| "eval_steps_per_second": 5.309, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 14.15, | |
| "learning_rate": 8.900943396226416e-06, | |
| "loss": 0.8866, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_gen_len": 16.2429, | |
| "eval_loss": 2.3303470611572266, | |
| "eval_rouge1": 34.8732, | |
| "eval_rouge2": 15.6709, | |
| "eval_rougeL": 27.5858, | |
| "eval_rougeLsum": 27.6169, | |
| "eval_runtime": 3.4313, | |
| "eval_samples_per_second": 20.401, | |
| "eval_steps_per_second": 5.246, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 15.09, | |
| "learning_rate": 7.485849056603774e-06, | |
| "loss": 0.8888, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_gen_len": 17.5143, | |
| "eval_loss": 2.297647476196289, | |
| "eval_rouge1": 35.3034, | |
| "eval_rouge2": 16.8011, | |
| "eval_rougeL": 27.7988, | |
| "eval_rougeLsum": 27.7569, | |
| "eval_runtime": 3.3934, | |
| "eval_samples_per_second": 20.628, | |
| "eval_steps_per_second": 5.304, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 16.04, | |
| "learning_rate": 6.070754716981133e-06, | |
| "loss": 0.8194, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 16.98, | |
| "learning_rate": 4.6556603773584905e-06, | |
| "loss": 0.8358, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_gen_len": 16.8143, | |
| "eval_loss": 2.334933042526245, | |
| "eval_rouge1": 35.505, | |
| "eval_rouge2": 16.8851, | |
| "eval_rougeL": 28.3651, | |
| "eval_rougeLsum": 28.413, | |
| "eval_runtime": 3.4202, | |
| "eval_samples_per_second": 20.467, | |
| "eval_steps_per_second": 5.263, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 17.92, | |
| "learning_rate": 3.240566037735849e-06, | |
| "loss": 0.8026, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_gen_len": 16.6143, | |
| "eval_loss": 2.373809814453125, | |
| "eval_rouge1": 35.2328, | |
| "eval_rouge2": 17.0358, | |
| "eval_rougeL": 28.544, | |
| "eval_rougeLsum": 28.6211, | |
| "eval_runtime": 3.4477, | |
| "eval_samples_per_second": 20.303, | |
| "eval_steps_per_second": 5.221, | |
| "step": 1908 | |
| }, | |
| { | |
| "epoch": 18.87, | |
| "learning_rate": 1.8254716981132076e-06, | |
| "loss": 0.7487, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_gen_len": 16.7286, | |
| "eval_loss": 2.4102871417999268, | |
| "eval_rouge1": 34.0793, | |
| "eval_rouge2": 15.4468, | |
| "eval_rougeL": 27.8057, | |
| "eval_rougeLsum": 27.8586, | |
| "eval_runtime": 3.4317, | |
| "eval_samples_per_second": 20.398, | |
| "eval_steps_per_second": 5.245, | |
| "step": 2014 | |
| }, | |
| { | |
| "epoch": 19.81, | |
| "learning_rate": 4.1037735849056606e-07, | |
| "loss": 0.7722, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_gen_len": 16.9286, | |
| "eval_loss": 2.3990561962127686, | |
| "eval_rouge1": 34.8116, | |
| "eval_rouge2": 15.8706, | |
| "eval_rougeL": 27.9173, | |
| "eval_rougeLsum": 27.983, | |
| "eval_runtime": 3.3605, | |
| "eval_samples_per_second": 20.83, | |
| "eval_steps_per_second": 5.356, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "step": 2120, | |
| "total_flos": 1197078338174976.0, | |
| "train_loss": 1.328409050995449, | |
| "train_runtime": 326.8588, | |
| "train_samples_per_second": 25.944, | |
| "train_steps_per_second": 6.486 | |
| } | |
| ], | |
| "max_steps": 2120, | |
| "num_train_epochs": 20, | |
| "total_flos": 1197078338174976.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |