| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 16.0, | |
| "eval_steps": 500, | |
| "global_step": 8336, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.9360204734484968e-05, | |
| "loss": 2.0443, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_f1": 0.9049, | |
| "eval_gen_len": 28.363272727272726, | |
| "eval_loss": 1.7046316862106323, | |
| "eval_precision": 0.9041, | |
| "eval_recall": 0.9061, | |
| "eval_rouge1": 0.4488, | |
| "eval_rouge2": 0.203, | |
| "eval_rougeL": 0.3633, | |
| "eval_rougeLsum": 0.3633, | |
| "eval_runtime": 577.3748, | |
| "eval_samples_per_second": 4.763, | |
| "eval_steps_per_second": 0.298, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 1.872040946896993e-05, | |
| "loss": 1.7826, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_f1": 0.9072, | |
| "eval_gen_len": 28.19490909090909, | |
| "eval_loss": 1.6347475051879883, | |
| "eval_precision": 0.9062, | |
| "eval_recall": 0.9085, | |
| "eval_rouge1": 0.4616, | |
| "eval_rouge2": 0.2133, | |
| "eval_rougeL": 0.3761, | |
| "eval_rougeLsum": 0.3758, | |
| "eval_runtime": 524.6485, | |
| "eval_samples_per_second": 5.242, | |
| "eval_steps_per_second": 0.328, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 1.8080614203454897e-05, | |
| "loss": 1.7134, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_f1": 0.9084, | |
| "eval_gen_len": 28.521818181818183, | |
| "eval_loss": 1.5991039276123047, | |
| "eval_precision": 0.9072, | |
| "eval_recall": 0.91, | |
| "eval_rouge1": 0.4683, | |
| "eval_rouge2": 0.2186, | |
| "eval_rougeL": 0.3824, | |
| "eval_rougeLsum": 0.3822, | |
| "eval_runtime": 539.0316, | |
| "eval_samples_per_second": 5.102, | |
| "eval_steps_per_second": 0.319, | |
| "step": 1563 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "learning_rate": 1.744081893793986e-05, | |
| "loss": 1.6664, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_f1": 0.9096, | |
| "eval_gen_len": 28.24981818181818, | |
| "eval_loss": 1.5767467021942139, | |
| "eval_precision": 0.9087, | |
| "eval_recall": 0.9109, | |
| "eval_rouge1": 0.4738, | |
| "eval_rouge2": 0.2233, | |
| "eval_rougeL": 0.3878, | |
| "eval_rougeLsum": 0.3876, | |
| "eval_runtime": 529.9968, | |
| "eval_samples_per_second": 5.189, | |
| "eval_steps_per_second": 0.325, | |
| "step": 2084 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "learning_rate": 1.6801023672424827e-05, | |
| "loss": 1.6296, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_f1": 0.9103, | |
| "eval_gen_len": 28.239636363636365, | |
| "eval_loss": 1.5595422983169556, | |
| "eval_precision": 0.9093, | |
| "eval_recall": 0.9117, | |
| "eval_rouge1": 0.4775, | |
| "eval_rouge2": 0.2265, | |
| "eval_rougeL": 0.3911, | |
| "eval_rougeLsum": 0.391, | |
| "eval_runtime": 526.5193, | |
| "eval_samples_per_second": 5.223, | |
| "eval_steps_per_second": 0.327, | |
| "step": 2605 | |
| }, | |
| { | |
| "epoch": 5.76, | |
| "learning_rate": 1.616122840690979e-05, | |
| "loss": 1.5984, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_f1": 0.9109, | |
| "eval_gen_len": 28.28, | |
| "eval_loss": 1.5468252897262573, | |
| "eval_precision": 0.9098, | |
| "eval_recall": 0.9124, | |
| "eval_rouge1": 0.4805, | |
| "eval_rouge2": 0.2284, | |
| "eval_rougeL": 0.3941, | |
| "eval_rougeLsum": 0.3938, | |
| "eval_runtime": 512.3397, | |
| "eval_samples_per_second": 5.368, | |
| "eval_steps_per_second": 0.336, | |
| "step": 3126 | |
| }, | |
| { | |
| "epoch": 6.72, | |
| "learning_rate": 1.5521433141394756e-05, | |
| "loss": 1.5738, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_f1": 0.9113, | |
| "eval_gen_len": 27.837818181818182, | |
| "eval_loss": 1.5370196104049683, | |
| "eval_precision": 0.9105, | |
| "eval_recall": 0.9124, | |
| "eval_rouge1": 0.4807, | |
| "eval_rouge2": 0.2296, | |
| "eval_rougeL": 0.3945, | |
| "eval_rougeLsum": 0.3946, | |
| "eval_runtime": 509.6023, | |
| "eval_samples_per_second": 5.396, | |
| "eval_steps_per_second": 0.338, | |
| "step": 3647 | |
| }, | |
| { | |
| "epoch": 7.68, | |
| "learning_rate": 1.0403071017274472e-05, | |
| "loss": 1.5476, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_f1": 0.9114, | |
| "eval_gen_len": 27.736363636363638, | |
| "eval_loss": 1.530755639076233, | |
| "eval_precision": 0.9108, | |
| "eval_recall": 0.9125, | |
| "eval_rouge1": 0.4823, | |
| "eval_rouge2": 0.2315, | |
| "eval_rougeL": 0.3963, | |
| "eval_rougeLsum": 0.3965, | |
| "eval_runtime": 510.2185, | |
| "eval_samples_per_second": 5.39, | |
| "eval_steps_per_second": 0.337, | |
| "step": 4168 | |
| }, | |
| { | |
| "epoch": 8.64, | |
| "learning_rate": 9.203454894433782e-06, | |
| "loss": 1.535, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_f1": 0.9116, | |
| "eval_gen_len": 27.653454545454544, | |
| "eval_loss": 1.5260871648788452, | |
| "eval_precision": 0.911, | |
| "eval_recall": 0.9125, | |
| "eval_rouge1": 0.4829, | |
| "eval_rouge2": 0.2309, | |
| "eval_rougeL": 0.3974, | |
| "eval_rougeLsum": 0.3974, | |
| "eval_runtime": 503.2649, | |
| "eval_samples_per_second": 5.464, | |
| "eval_steps_per_second": 0.342, | |
| "step": 4689 | |
| }, | |
| { | |
| "epoch": 9.6, | |
| "learning_rate": 8.003838771593091e-06, | |
| "loss": 1.52, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_f1": 0.9117, | |
| "eval_gen_len": 27.816, | |
| "eval_loss": 1.52312433719635, | |
| "eval_precision": 0.911, | |
| "eval_recall": 0.9128, | |
| "eval_rouge1": 0.4847, | |
| "eval_rouge2": 0.2332, | |
| "eval_rougeL": 0.3992, | |
| "eval_rougeLsum": 0.3993, | |
| "eval_runtime": 522.9989, | |
| "eval_samples_per_second": 5.258, | |
| "eval_steps_per_second": 0.329, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 10.56, | |
| "learning_rate": 6.8042226487524e-06, | |
| "loss": 1.5145, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_f1": 0.9121, | |
| "eval_gen_len": 27.360363636363637, | |
| "eval_loss": 1.519996166229248, | |
| "eval_precision": 0.9119, | |
| "eval_recall": 0.9127, | |
| "eval_rouge1": 0.4851, | |
| "eval_rouge2": 0.2339, | |
| "eval_rougeL": 0.4004, | |
| "eval_rougeLsum": 0.4006, | |
| "eval_runtime": 501.564, | |
| "eval_samples_per_second": 5.483, | |
| "eval_steps_per_second": 0.343, | |
| "step": 5731 | |
| }, | |
| { | |
| "epoch": 11.52, | |
| "learning_rate": 5.6046065259117085e-06, | |
| "loss": 1.5028, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_f1": 0.9122, | |
| "eval_gen_len": 27.462545454545456, | |
| "eval_loss": 1.5178437232971191, | |
| "eval_precision": 0.9118, | |
| "eval_recall": 0.9129, | |
| "eval_rouge1": 0.4858, | |
| "eval_rouge2": 0.2345, | |
| "eval_rougeL": 0.4001, | |
| "eval_rougeLsum": 0.4002, | |
| "eval_runtime": 501.8356, | |
| "eval_samples_per_second": 5.48, | |
| "eval_steps_per_second": 0.343, | |
| "step": 6252 | |
| }, | |
| { | |
| "epoch": 12.48, | |
| "learning_rate": 4.404990403071018e-06, | |
| "loss": 1.4946, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_f1": 0.9121, | |
| "eval_gen_len": 27.67890909090909, | |
| "eval_loss": 1.5164216756820679, | |
| "eval_precision": 0.9115, | |
| "eval_recall": 0.9131, | |
| "eval_rouge1": 0.4859, | |
| "eval_rouge2": 0.2341, | |
| "eval_rougeL": 0.4004, | |
| "eval_rougeLsum": 0.4005, | |
| "eval_runtime": 506.9944, | |
| "eval_samples_per_second": 5.424, | |
| "eval_steps_per_second": 0.339, | |
| "step": 6773 | |
| }, | |
| { | |
| "epoch": 13.44, | |
| "learning_rate": 3.2053742802303266e-06, | |
| "loss": 1.4877, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_f1": 0.9123, | |
| "eval_gen_len": 27.580363636363636, | |
| "eval_loss": 1.515085220336914, | |
| "eval_precision": 0.9119, | |
| "eval_recall": 0.9131, | |
| "eval_rouge1": 0.4868, | |
| "eval_rouge2": 0.235, | |
| "eval_rougeL": 0.4013, | |
| "eval_rougeLsum": 0.4013, | |
| "eval_runtime": 510.129, | |
| "eval_samples_per_second": 5.391, | |
| "eval_steps_per_second": 0.337, | |
| "step": 7294 | |
| }, | |
| { | |
| "epoch": 14.4, | |
| "learning_rate": 2.0057581573896352e-06, | |
| "loss": 1.4855, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_f1": 0.9122, | |
| "eval_gen_len": 27.584363636363637, | |
| "eval_loss": 1.5146222114562988, | |
| "eval_precision": 0.9117, | |
| "eval_recall": 0.9131, | |
| "eval_rouge1": 0.4863, | |
| "eval_rouge2": 0.2349, | |
| "eval_rougeL": 0.4014, | |
| "eval_rougeLsum": 0.4016, | |
| "eval_runtime": 507.3504, | |
| "eval_samples_per_second": 5.42, | |
| "eval_steps_per_second": 0.339, | |
| "step": 7815 | |
| }, | |
| { | |
| "epoch": 15.36, | |
| "learning_rate": 8.061420345489445e-07, | |
| "loss": 1.4782, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_f1": 0.9122, | |
| "eval_gen_len": 27.571636363636365, | |
| "eval_loss": 1.514625906944275, | |
| "eval_precision": 0.9118, | |
| "eval_recall": 0.9131, | |
| "eval_rouge1": 0.4863, | |
| "eval_rouge2": 0.2348, | |
| "eval_rougeL": 0.4011, | |
| "eval_rougeLsum": 0.4012, | |
| "eval_runtime": 505.7467, | |
| "eval_samples_per_second": 5.438, | |
| "eval_steps_per_second": 0.34, | |
| "step": 8336 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "step": 8336, | |
| "total_flos": 1.1557816346520453e+18, | |
| "train_loss": 0.8733468595713434, | |
| "train_runtime": 22758.1802, | |
| "train_samples_per_second": 35.152, | |
| "train_steps_per_second": 0.366 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 8336, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 16, | |
| "save_steps": 500, | |
| "total_flos": 1.1557816346520453e+18, | |
| "train_batch_size": 24, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |