| { |
| "best_metric": 2.132361888885498, |
| "best_model_checkpoint": "./26-125356_megasuperkanin/checkpoint-100000", |
| "epoch": 0.9769822970807769, |
| "global_step": 100000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.02, |
| "learning_rate": 5e-05, |
| "loss": 2.6761, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 5e-05, |
| "loss": 2.551, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.05, |
| "eval_gen_len": 28.4674, |
| "eval_loss": 2.423037052154541, |
| "eval_rouge1": 0.214, |
| "eval_rouge2": 0.0668, |
| "eval_rougeL": 0.1717, |
| "eval_rougeLsum": 0.1777, |
| "eval_runtime": 1015.6418, |
| "eval_samples_per_second": 2.265, |
| "eval_steps_per_second": 0.284, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 5e-05, |
| "loss": 2.5186, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 5e-05, |
| "loss": 2.4717, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.1, |
| "eval_gen_len": 25.6604, |
| "eval_loss": 2.3709843158721924, |
| "eval_rouge1": 0.2071, |
| "eval_rouge2": 0.0634, |
| "eval_rougeL": 0.1686, |
| "eval_rougeLsum": 0.1745, |
| "eval_runtime": 951.1096, |
| "eval_samples_per_second": 2.418, |
| "eval_steps_per_second": 0.303, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 5e-05, |
| "loss": 2.4593, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 5e-05, |
| "loss": 2.4281, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.15, |
| "eval_gen_len": 28.8296, |
| "eval_loss": 2.3228819370269775, |
| "eval_rouge1": 0.2137, |
| "eval_rouge2": 0.0662, |
| "eval_rougeL": 0.1711, |
| "eval_rougeLsum": 0.1768, |
| "eval_runtime": 1022.9494, |
| "eval_samples_per_second": 2.248, |
| "eval_steps_per_second": 0.282, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 5e-05, |
| "loss": 2.4049, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 5e-05, |
| "loss": 2.3735, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.2, |
| "eval_gen_len": 29.9183, |
| "eval_loss": 2.2881429195404053, |
| "eval_rouge1": 0.2164, |
| "eval_rouge2": 0.0668, |
| "eval_rougeL": 0.1735, |
| "eval_rougeLsum": 0.1808, |
| "eval_runtime": 1036.2984, |
| "eval_samples_per_second": 2.219, |
| "eval_steps_per_second": 0.278, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 5e-05, |
| "loss": 2.3732, |
| "step": 22500 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 5e-05, |
| "loss": 2.377, |
| "step": 25000 |
| }, |
| { |
| "epoch": 0.24, |
| "eval_gen_len": 29.5183, |
| "eval_loss": 2.2759358882904053, |
| "eval_rouge1": 0.2209, |
| "eval_rouge2": 0.0694, |
| "eval_rougeL": 0.1782, |
| "eval_rougeLsum": 0.1851, |
| "eval_runtime": 1036.1071, |
| "eval_samples_per_second": 2.22, |
| "eval_steps_per_second": 0.278, |
| "step": 25000 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 5e-05, |
| "loss": 2.3513, |
| "step": 27500 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 5e-05, |
| "loss": 2.3444, |
| "step": 30000 |
| }, |
| { |
| "epoch": 0.29, |
| "eval_gen_len": 29.3183, |
| "eval_loss": 2.2552034854888916, |
| "eval_rouge1": 0.2194, |
| "eval_rouge2": 0.0679, |
| "eval_rougeL": 0.1757, |
| "eval_rougeLsum": 0.1829, |
| "eval_runtime": 1037.4604, |
| "eval_samples_per_second": 2.217, |
| "eval_steps_per_second": 0.278, |
| "step": 30000 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 5e-05, |
| "loss": 2.3504, |
| "step": 32500 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 5e-05, |
| "loss": 2.3203, |
| "step": 35000 |
| }, |
| { |
| "epoch": 0.34, |
| "eval_gen_len": 32.2061, |
| "eval_loss": 2.235518455505371, |
| "eval_rouge1": 0.2284, |
| "eval_rouge2": 0.0722, |
| "eval_rougeL": 0.1819, |
| "eval_rougeLsum": 0.1892, |
| "eval_runtime": 1121.1561, |
| "eval_samples_per_second": 2.051, |
| "eval_steps_per_second": 0.257, |
| "step": 35000 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 5e-05, |
| "loss": 2.3087, |
| "step": 37500 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 5e-05, |
| "loss": 2.3132, |
| "step": 40000 |
| }, |
| { |
| "epoch": 0.39, |
| "eval_gen_len": 29.5452, |
| "eval_loss": 2.2289836406707764, |
| "eval_rouge1": 0.2183, |
| "eval_rouge2": 0.0673, |
| "eval_rougeL": 0.1759, |
| "eval_rougeLsum": 0.1827, |
| "eval_runtime": 1055.2895, |
| "eval_samples_per_second": 2.179, |
| "eval_steps_per_second": 0.273, |
| "step": 40000 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 5e-05, |
| "loss": 2.3063, |
| "step": 42500 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 5e-05, |
| "loss": 2.3116, |
| "step": 45000 |
| }, |
| { |
| "epoch": 0.44, |
| "eval_gen_len": 30.2935, |
| "eval_loss": 2.218207359313965, |
| "eval_rouge1": 0.2239, |
| "eval_rouge2": 0.07, |
| "eval_rougeL": 0.1798, |
| "eval_rougeLsum": 0.1879, |
| "eval_runtime": 1063.5185, |
| "eval_samples_per_second": 2.163, |
| "eval_steps_per_second": 0.271, |
| "step": 45000 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 5e-05, |
| "loss": 2.3014, |
| "step": 47500 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 5e-05, |
| "loss": 2.2852, |
| "step": 50000 |
| }, |
| { |
| "epoch": 0.49, |
| "eval_gen_len": 28.6443, |
| "eval_loss": 2.2090706825256348, |
| "eval_rouge1": 0.2251, |
| "eval_rouge2": 0.0703, |
| "eval_rougeL": 0.1812, |
| "eval_rougeLsum": 0.1887, |
| "eval_runtime": 1045.7282, |
| "eval_samples_per_second": 2.199, |
| "eval_steps_per_second": 0.275, |
| "step": 50000 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 5e-05, |
| "loss": 2.2963, |
| "step": 52500 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 5e-05, |
| "loss": 2.2683, |
| "step": 55000 |
| }, |
| { |
| "epoch": 0.54, |
| "eval_gen_len": 29.9661, |
| "eval_loss": 2.1879115104675293, |
| "eval_rouge1": 0.2257, |
| "eval_rouge2": 0.0716, |
| "eval_rougeL": 0.1806, |
| "eval_rougeLsum": 0.1876, |
| "eval_runtime": 1061.3075, |
| "eval_samples_per_second": 2.167, |
| "eval_steps_per_second": 0.271, |
| "step": 55000 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 5e-05, |
| "loss": 2.2735, |
| "step": 57500 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 5e-05, |
| "loss": 2.2614, |
| "step": 60000 |
| }, |
| { |
| "epoch": 0.59, |
| "eval_gen_len": 30.4435, |
| "eval_loss": 2.1871089935302734, |
| "eval_rouge1": 0.2316, |
| "eval_rouge2": 0.075, |
| "eval_rougeL": 0.1863, |
| "eval_rougeLsum": 0.1936, |
| "eval_runtime": 1083.7377, |
| "eval_samples_per_second": 2.122, |
| "eval_steps_per_second": 0.266, |
| "step": 60000 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 5e-05, |
| "loss": 2.2735, |
| "step": 62500 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 5e-05, |
| "loss": 2.252, |
| "step": 65000 |
| }, |
| { |
| "epoch": 0.64, |
| "eval_gen_len": 30.6239, |
| "eval_loss": 2.175469160079956, |
| "eval_rouge1": 0.226, |
| "eval_rouge2": 0.0729, |
| "eval_rougeL": 0.1834, |
| "eval_rougeLsum": 0.1914, |
| "eval_runtime": 1080.4009, |
| "eval_samples_per_second": 2.129, |
| "eval_steps_per_second": 0.267, |
| "step": 65000 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 5e-05, |
| "loss": 2.2509, |
| "step": 67500 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 5e-05, |
| "loss": 2.262, |
| "step": 70000 |
| }, |
| { |
| "epoch": 0.68, |
| "eval_gen_len": 30.9983, |
| "eval_loss": 2.16789174079895, |
| "eval_rouge1": 0.2256, |
| "eval_rouge2": 0.0716, |
| "eval_rougeL": 0.1815, |
| "eval_rougeLsum": 0.1889, |
| "eval_runtime": 1104.0224, |
| "eval_samples_per_second": 2.083, |
| "eval_steps_per_second": 0.261, |
| "step": 70000 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 5e-05, |
| "loss": 2.2398, |
| "step": 72500 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 5e-05, |
| "loss": 2.228, |
| "step": 75000 |
| }, |
| { |
| "epoch": 0.73, |
| "eval_gen_len": 29.9704, |
| "eval_loss": 2.1669178009033203, |
| "eval_rouge1": 0.2253, |
| "eval_rouge2": 0.0725, |
| "eval_rougeL": 0.1822, |
| "eval_rougeLsum": 0.1894, |
| "eval_runtime": 1052.7669, |
| "eval_samples_per_second": 2.185, |
| "eval_steps_per_second": 0.274, |
| "step": 75000 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 5e-05, |
| "loss": 2.25, |
| "step": 77500 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 5e-05, |
| "loss": 2.234, |
| "step": 80000 |
| }, |
| { |
| "epoch": 0.78, |
| "eval_gen_len": 29.4826, |
| "eval_loss": 2.1604671478271484, |
| "eval_rouge1": 0.2283, |
| "eval_rouge2": 0.0747, |
| "eval_rougeL": 0.1855, |
| "eval_rougeLsum": 0.1937, |
| "eval_runtime": 1075.8159, |
| "eval_samples_per_second": 2.138, |
| "eval_steps_per_second": 0.268, |
| "step": 80000 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 5e-05, |
| "loss": 2.236, |
| "step": 82500 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 5e-05, |
| "loss": 2.2289, |
| "step": 85000 |
| }, |
| { |
| "epoch": 0.83, |
| "eval_gen_len": 30.0213, |
| "eval_loss": 2.1517326831817627, |
| "eval_rouge1": 0.2226, |
| "eval_rouge2": 0.0705, |
| "eval_rougeL": 0.1801, |
| "eval_rougeLsum": 0.1873, |
| "eval_runtime": 1072.8178, |
| "eval_samples_per_second": 2.144, |
| "eval_steps_per_second": 0.268, |
| "step": 85000 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 5e-05, |
| "loss": 2.2214, |
| "step": 87500 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 5e-05, |
| "loss": 2.2043, |
| "step": 90000 |
| }, |
| { |
| "epoch": 0.88, |
| "eval_gen_len": 29.5361, |
| "eval_loss": 2.1455490589141846, |
| "eval_rouge1": 0.2265, |
| "eval_rouge2": 0.075, |
| "eval_rougeL": 0.1838, |
| "eval_rougeLsum": 0.1908, |
| "eval_runtime": 1058.731, |
| "eval_samples_per_second": 2.172, |
| "eval_steps_per_second": 0.272, |
| "step": 90000 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 5e-05, |
| "loss": 2.2419, |
| "step": 92500 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 5e-05, |
| "loss": 2.2259, |
| "step": 95000 |
| }, |
| { |
| "epoch": 0.93, |
| "eval_gen_len": 29.6874, |
| "eval_loss": 2.1389129161834717, |
| "eval_rouge1": 0.2287, |
| "eval_rouge2": 0.0713, |
| "eval_rougeL": 0.1844, |
| "eval_rougeLsum": 0.1911, |
| "eval_runtime": 1069.2344, |
| "eval_samples_per_second": 2.151, |
| "eval_steps_per_second": 0.269, |
| "step": 95000 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 5e-05, |
| "loss": 2.2202, |
| "step": 97500 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 5e-05, |
| "loss": 2.2307, |
| "step": 100000 |
| }, |
| { |
| "epoch": 0.98, |
| "eval_gen_len": 30.7513, |
| "eval_loss": 2.132361888885498, |
| "eval_rouge1": 0.2293, |
| "eval_rouge2": 0.0741, |
| "eval_rougeL": 0.1845, |
| "eval_rougeLsum": 0.1924, |
| "eval_runtime": 1089.9927, |
| "eval_samples_per_second": 2.11, |
| "eval_steps_per_second": 0.264, |
| "step": 100000 |
| } |
| ], |
| "max_steps": 102356, |
| "num_train_epochs": 1, |
| "total_flos": 1.8696291573252096e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|