| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 19.99640028797696, | |
| "eval_steps": 500, | |
| "global_step": 13880, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 1.9399615754082615e-05, | |
| "loss": 2.0304, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.879923150816523e-05, | |
| "loss": 1.781, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_f1": 0.9088, | |
| "eval_gen_len": 26.88909090909091, | |
| "eval_loss": 1.579687476158142, | |
| "eval_precision": 0.908, | |
| "eval_recall": 0.91, | |
| "eval_rouge1": 0.4708, | |
| "eval_rouge2": 0.2219, | |
| "eval_rougeL": 0.3892, | |
| "eval_rougeLsum": 0.389, | |
| "eval_runtime": 1186.1406, | |
| "eval_samples_per_second": 4.637, | |
| "eval_steps_per_second": 0.29, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 1.729827089337176e-05, | |
| "loss": 1.7026, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 1.6397694524495677e-05, | |
| "loss": 1.6618, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_f1": 0.91, | |
| "eval_gen_len": 26.728181818181817, | |
| "eval_loss": 1.5411016941070557, | |
| "eval_precision": 0.9094, | |
| "eval_recall": 0.9111, | |
| "eval_rouge1": 0.4776, | |
| "eval_rouge2": 0.2303, | |
| "eval_rougeL": 0.3977, | |
| "eval_rougeLsum": 0.3973, | |
| "eval_runtime": 1083.838, | |
| "eval_samples_per_second": 5.075, | |
| "eval_steps_per_second": 0.317, | |
| "step": 2083 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "learning_rate": 1.5497118155619597e-05, | |
| "loss": 1.626, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_f1": 0.911, | |
| "eval_gen_len": 26.759636363636364, | |
| "eval_loss": 1.5170917510986328, | |
| "eval_precision": 0.9102, | |
| "eval_recall": 0.9121, | |
| "eval_rouge1": 0.4834, | |
| "eval_rouge2": 0.2345, | |
| "eval_rougeL": 0.402, | |
| "eval_rougeLsum": 0.402, | |
| "eval_runtime": 1053.82, | |
| "eval_samples_per_second": 5.219, | |
| "eval_steps_per_second": 0.326, | |
| "step": 2776 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "learning_rate": 1.4596541786743516e-05, | |
| "loss": 1.5918, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_f1": 0.9112, | |
| "eval_gen_len": 26.647636363636362, | |
| "eval_loss": 1.500138521194458, | |
| "eval_precision": 0.9106, | |
| "eval_recall": 0.9122, | |
| "eval_rouge1": 0.4853, | |
| "eval_rouge2": 0.2365, | |
| "eval_rougeL": 0.4045, | |
| "eval_rougeLsum": 0.4045, | |
| "eval_runtime": 1079.0919, | |
| "eval_samples_per_second": 5.097, | |
| "eval_steps_per_second": 0.319, | |
| "step": 3471 | |
| }, | |
| { | |
| "epoch": 5.04, | |
| "learning_rate": 1.3695965417867436e-05, | |
| "loss": 1.5798, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 5.76, | |
| "learning_rate": 1.2795389048991355e-05, | |
| "loss": 1.5586, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_f1": 0.9116, | |
| "eval_gen_len": 26.777818181818184, | |
| "eval_loss": 1.4880452156066895, | |
| "eval_precision": 0.9108, | |
| "eval_recall": 0.9127, | |
| "eval_rouge1": 0.4875, | |
| "eval_rouge2": 0.2373, | |
| "eval_rougeL": 0.4063, | |
| "eval_rougeLsum": 0.4063, | |
| "eval_runtime": 1027.5441, | |
| "eval_samples_per_second": 5.353, | |
| "eval_steps_per_second": 0.335, | |
| "step": 4164 | |
| }, | |
| { | |
| "epoch": 6.48, | |
| "learning_rate": 1.1894812680115276e-05, | |
| "loss": 1.5375, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_f1": 0.912, | |
| "eval_gen_len": 26.39909090909091, | |
| "eval_loss": 1.4768402576446533, | |
| "eval_precision": 0.9116, | |
| "eval_recall": 0.9128, | |
| "eval_rouge1": 0.4898, | |
| "eval_rouge2": 0.24, | |
| "eval_rougeL": 0.4083, | |
| "eval_rougeLsum": 0.4083, | |
| "eval_runtime": 922.1893, | |
| "eval_samples_per_second": 5.964, | |
| "eval_steps_per_second": 0.373, | |
| "step": 4858 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "learning_rate": 1.0994236311239194e-05, | |
| "loss": 1.5228, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 7.92, | |
| "learning_rate": 1.0093659942363115e-05, | |
| "loss": 1.5146, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_f1": 0.9126, | |
| "eval_gen_len": 26.156, | |
| "eval_loss": 1.4685654640197754, | |
| "eval_precision": 0.9123, | |
| "eval_recall": 0.9133, | |
| "eval_rouge1": 0.4907, | |
| "eval_rouge2": 0.241, | |
| "eval_rougeL": 0.4088, | |
| "eval_rougeLsum": 0.4089, | |
| "eval_runtime": 865.3485, | |
| "eval_samples_per_second": 6.356, | |
| "eval_steps_per_second": 0.398, | |
| "step": 5553 | |
| }, | |
| { | |
| "epoch": 8.64, | |
| "learning_rate": 9.193083573487034e-06, | |
| "loss": 1.5006, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_f1": 0.9127, | |
| "eval_gen_len": 26.26290909090909, | |
| "eval_loss": 1.4636152982711792, | |
| "eval_precision": 0.9122, | |
| "eval_recall": 0.9135, | |
| "eval_rouge1": 0.4914, | |
| "eval_rouge2": 0.2419, | |
| "eval_rougeL": 0.4097, | |
| "eval_rougeLsum": 0.4099, | |
| "eval_runtime": 874.612, | |
| "eval_samples_per_second": 6.289, | |
| "eval_steps_per_second": 0.393, | |
| "step": 6247 | |
| }, | |
| { | |
| "epoch": 9.36, | |
| "learning_rate": 8.29250720461095e-06, | |
| "loss": 1.49, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_f1": 0.9127, | |
| "eval_gen_len": 26.027272727272727, | |
| "eval_loss": 1.4580360651016235, | |
| "eval_precision": 0.9125, | |
| "eval_recall": 0.9133, | |
| "eval_rouge1": 0.4911, | |
| "eval_rouge2": 0.2429, | |
| "eval_rougeL": 0.4109, | |
| "eval_rougeLsum": 0.411, | |
| "eval_runtime": 855.8845, | |
| "eval_samples_per_second": 6.426, | |
| "eval_steps_per_second": 0.402, | |
| "step": 6942 | |
| }, | |
| { | |
| "epoch": 10.08, | |
| "learning_rate": 7.391930835734871e-06, | |
| "loss": 1.485, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 10.8, | |
| "learning_rate": 6.491354466858791e-06, | |
| "loss": 1.4749, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_f1": 0.9131, | |
| "eval_gen_len": 26.230363636363638, | |
| "eval_loss": 1.4546109437942505, | |
| "eval_precision": 0.9127, | |
| "eval_recall": 0.9138, | |
| "eval_rouge1": 0.4932, | |
| "eval_rouge2": 0.244, | |
| "eval_rougeL": 0.4121, | |
| "eval_rougeLsum": 0.4123, | |
| "eval_runtime": 871.4205, | |
| "eval_samples_per_second": 6.312, | |
| "eval_steps_per_second": 0.395, | |
| "step": 7636 | |
| }, | |
| { | |
| "epoch": 11.52, | |
| "learning_rate": 5.590778097982709e-06, | |
| "loss": 1.4661, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_f1": 0.9132, | |
| "eval_gen_len": 25.87781818181818, | |
| "eval_loss": 1.4514495134353638, | |
| "eval_precision": 0.9133, | |
| "eval_recall": 0.9136, | |
| "eval_rouge1": 0.4937, | |
| "eval_rouge2": 0.2448, | |
| "eval_rougeL": 0.4126, | |
| "eval_rougeLsum": 0.4127, | |
| "eval_runtime": 867.3574, | |
| "eval_samples_per_second": 6.341, | |
| "eval_steps_per_second": 0.397, | |
| "step": 8331 | |
| }, | |
| { | |
| "epoch": 12.24, | |
| "learning_rate": 4.690201729106629e-06, | |
| "loss": 1.4626, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 12.96, | |
| "learning_rate": 3.7896253602305477e-06, | |
| "loss": 1.4575, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_f1": 0.9133, | |
| "eval_gen_len": 26.11509090909091, | |
| "eval_loss": 1.4499082565307617, | |
| "eval_precision": 0.913, | |
| "eval_recall": 0.914, | |
| "eval_rouge1": 0.4947, | |
| "eval_rouge2": 0.2453, | |
| "eval_rougeL": 0.4139, | |
| "eval_rougeLsum": 0.414, | |
| "eval_runtime": 860.9844, | |
| "eval_samples_per_second": 6.388, | |
| "eval_steps_per_second": 0.4, | |
| "step": 9025 | |
| }, | |
| { | |
| "epoch": 13.68, | |
| "learning_rate": 2.8890489913544673e-06, | |
| "loss": 1.4511, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_f1": 0.9133, | |
| "eval_gen_len": 26.028727272727274, | |
| "eval_loss": 1.44780433177948, | |
| "eval_precision": 0.9131, | |
| "eval_recall": 0.9138, | |
| "eval_rouge1": 0.4939, | |
| "eval_rouge2": 0.2451, | |
| "eval_rougeL": 0.4133, | |
| "eval_rougeLsum": 0.4134, | |
| "eval_runtime": 862.0827, | |
| "eval_samples_per_second": 6.38, | |
| "eval_steps_per_second": 0.399, | |
| "step": 9720 | |
| }, | |
| { | |
| "epoch": 14.4, | |
| "learning_rate": 1.988472622478386e-06, | |
| "loss": 1.4519, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_f1": 0.9133, | |
| "eval_gen_len": 25.907818181818183, | |
| "eval_loss": 1.4471020698547363, | |
| "eval_precision": 0.9132, | |
| "eval_recall": 0.9137, | |
| "eval_rouge1": 0.4938, | |
| "eval_rouge2": 0.2451, | |
| "eval_rougeL": 0.4134, | |
| "eval_rougeLsum": 0.4134, | |
| "eval_runtime": 855.2673, | |
| "eval_samples_per_second": 6.431, | |
| "eval_steps_per_second": 0.402, | |
| "step": 10414 | |
| }, | |
| { | |
| "epoch": 15.12, | |
| "learning_rate": 1.0878962536023055e-06, | |
| "loss": 1.4475, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 15.84, | |
| "learning_rate": 1.8731988472622478e-07, | |
| "loss": 1.4439, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_f1": 0.9133, | |
| "eval_gen_len": 26.034545454545455, | |
| "eval_loss": 1.4474281072616577, | |
| "eval_precision": 0.9131, | |
| "eval_recall": 0.9139, | |
| "eval_rouge1": 0.4942, | |
| "eval_rouge2": 0.2456, | |
| "eval_rougeL": 0.4133, | |
| "eval_rougeLsum": 0.4134, | |
| "eval_runtime": 875.1275, | |
| "eval_samples_per_second": 6.285, | |
| "eval_steps_per_second": 0.393, | |
| "step": 11104 | |
| }, | |
| { | |
| "epoch": 16.57, | |
| "learning_rate": 3.4293948126801158e-06, | |
| "loss": 1.4441, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_f1": 0.9134, | |
| "eval_gen_len": 25.939090909090908, | |
| "eval_loss": 1.4446682929992676, | |
| "eval_precision": 0.9133, | |
| "eval_recall": 0.9138, | |
| "eval_rouge1": 0.4945, | |
| "eval_rouge2": 0.2457, | |
| "eval_rougeL": 0.4139, | |
| "eval_rougeLsum": 0.414, | |
| "eval_runtime": 853.4658, | |
| "eval_samples_per_second": 6.444, | |
| "eval_steps_per_second": 0.403, | |
| "step": 11799 | |
| }, | |
| { | |
| "epoch": 17.29, | |
| "learning_rate": 2.708933717579251e-06, | |
| "loss": 1.444, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_f1": 0.9135, | |
| "eval_gen_len": 26.010727272727273, | |
| "eval_loss": 1.4445807933807373, | |
| "eval_precision": 0.9133, | |
| "eval_recall": 0.9141, | |
| "eval_rouge1": 0.4957, | |
| "eval_rouge2": 0.2473, | |
| "eval_rougeL": 0.415, | |
| "eval_rougeLsum": 0.4151, | |
| "eval_runtime": 869.7396, | |
| "eval_samples_per_second": 6.324, | |
| "eval_steps_per_second": 0.396, | |
| "step": 12493 | |
| }, | |
| { | |
| "epoch": 18.01, | |
| "learning_rate": 1.988472622478386e-06, | |
| "loss": 1.4378, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 18.73, | |
| "learning_rate": 1.2680115273775217e-06, | |
| "loss": 1.4375, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_f1": 0.9136, | |
| "eval_gen_len": 25.88690909090909, | |
| "eval_loss": 1.4433233737945557, | |
| "eval_precision": 0.9136, | |
| "eval_recall": 0.914, | |
| "eval_rouge1": 0.4961, | |
| "eval_rouge2": 0.2473, | |
| "eval_rougeL": 0.4153, | |
| "eval_rougeLsum": 0.4153, | |
| "eval_runtime": 854.4011, | |
| "eval_samples_per_second": 6.437, | |
| "eval_steps_per_second": 0.403, | |
| "step": 13188 | |
| }, | |
| { | |
| "epoch": 19.45, | |
| "learning_rate": 5.475504322766571e-07, | |
| "loss": 1.4361, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_f1": 0.9137, | |
| "eval_gen_len": 25.862909090909092, | |
| "eval_loss": 1.4432713985443115, | |
| "eval_precision": 0.9136, | |
| "eval_recall": 0.914, | |
| "eval_rouge1": 0.4961, | |
| "eval_rouge2": 0.2476, | |
| "eval_rougeL": 0.4155, | |
| "eval_rougeLsum": 0.4154, | |
| "eval_runtime": 863.7254, | |
| "eval_samples_per_second": 6.368, | |
| "eval_steps_per_second": 0.398, | |
| "step": 13880 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "step": 13880, | |
| "total_flos": 2.818047373345161e+18, | |
| "train_loss": 0.2986434628709249, | |
| "train_runtime": 16684.611, | |
| "train_samples_per_second": 119.871, | |
| "train_steps_per_second": 0.832 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 13880, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 20, | |
| "save_steps": 500, | |
| "total_flos": 2.818047373345161e+18, | |
| "train_batch_size": 24, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |