{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 4.972439765930176, "learning_rate": 4.75e-05, "loss": 0.5634, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.7192982456140351, "eval_f1": 0.638095238095238, "eval_loss": 0.5084609985351562, "eval_precision": 0.6523297491039426, "eval_recall": 0.6313875250045463, "eval_runtime": 1.7151, "eval_samples_per_second": 232.636, "eval_steps_per_second": 29.152, "step": 122 }, { "epoch": 2.0, "grad_norm": 4.525497913360596, "learning_rate": 4.5e-05, "loss": 0.4815, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.7468671679197995, "eval_f1": 0.7146872455662148, "eval_loss": 0.4543021321296692, "eval_precision": 0.7073059114891784, "eval_recall": 0.7334060738316057, "eval_runtime": 1.713, "eval_samples_per_second": 232.927, "eval_steps_per_second": 29.189, "step": 244 }, { "epoch": 3.0, "grad_norm": 5.549623489379883, "learning_rate": 4.25e-05, "loss": 0.4003, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.8270676691729323, "eval_f1": 0.7898027898027897, "eval_loss": 0.3836011290550232, "eval_precision": 0.7921052631578948, "eval_recall": 0.7876432078559739, "eval_runtime": 1.7117, "eval_samples_per_second": 233.096, "eval_steps_per_second": 29.21, "step": 366 }, { "epoch": 4.0, "grad_norm": 5.769104957580566, "learning_rate": 4e-05, "loss": 0.3683, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.8370927318295739, "eval_f1": 0.8029928975654221, "eval_loss": 0.35736599564552307, "eval_precision": 0.8037650785914463, "eval_recall": 0.8022367703218767, "eval_runtime": 1.7125, "eval_samples_per_second": 232.998, "eval_steps_per_second": 29.198, "step": 488 }, { "epoch": 5.0, "grad_norm": 5.723803997039795, "learning_rate": 3.7500000000000003e-05, "loss": 0.3396, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.8596491228070176, "eval_f1": 0.8298403801632752, "eval_loss": 0.34212788939476013, "eval_precision": 0.8315523576240049, "eval_recall": 0.8281960356428442, "eval_runtime": 1.7123, "eval_samples_per_second": 233.026, "eval_steps_per_second": 29.201, "step": 610 }, { "epoch": 6.0, "grad_norm": 5.710361480712891, "learning_rate": 3.5e-05, "loss": 0.3082, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.8521303258145363, "eval_f1": 0.8285395893573786, "eval_loss": 0.3541903495788574, "eval_precision": 0.8185863137144467, "eval_recall": 0.8428805237315875, "eval_runtime": 1.7126, "eval_samples_per_second": 232.974, "eval_steps_per_second": 29.195, "step": 732 }, { "epoch": 7.0, "grad_norm": 2.925419330596924, "learning_rate": 3.2500000000000004e-05, "loss": 0.2908, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.8571428571428571, "eval_f1": 0.8254579780661698, "eval_loss": 0.32880380749702454, "eval_precision": 0.8299216027874565, "eval_recall": 0.8214220767412257, "eval_runtime": 1.7119, "eval_samples_per_second": 233.076, "eval_steps_per_second": 29.208, "step": 854 }, { "epoch": 8.0, "grad_norm": 6.063595294952393, "learning_rate": 3e-05, "loss": 0.2705, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8596491228070176, "eval_f1": 0.8347043853938569, "eval_loss": 0.332290917634964, "eval_precision": 0.8277993283927745, "eval_recall": 0.8431987634115294, "eval_runtime": 1.7111, "eval_samples_per_second": 233.18, "eval_steps_per_second": 29.22, "step": 976 }, { "epoch": 9.0, "grad_norm": 4.576409339904785, "learning_rate": 2.7500000000000004e-05, "loss": 0.2608, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.8671679197994987, "eval_f1": 0.8359744037230948, "eval_loss": 0.3119361996650696, "eval_precision": 0.8448835433371515, "eval_recall": 0.828514275322786, "eval_runtime": 1.7115, "eval_samples_per_second": 233.133, "eval_steps_per_second": 29.215, "step": 1098 }, { "epoch": 10.0, "grad_norm": 6.930353164672852, "learning_rate": 2.5e-05, "loss": 0.2489, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.8671679197994987, "eval_f1": 0.8409485773595975, "eval_loss": 0.306035578250885, "eval_precision": 0.8385304659498208, "eval_recall": 0.8435170030914712, "eval_runtime": 1.7123, "eval_samples_per_second": 233.017, "eval_steps_per_second": 29.2, "step": 1220 }, { "epoch": 11.0, "grad_norm": 11.753581047058105, "learning_rate": 2.25e-05, "loss": 0.2452, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.8545433391506689, "eval_loss": 0.3053016662597656, "eval_precision": 0.8639270714012982, "eval_recall": 0.846653937079469, "eval_runtime": 1.7113, "eval_samples_per_second": 233.152, "eval_steps_per_second": 29.217, "step": 1342 }, { "epoch": 12.0, "grad_norm": 21.677701950073242, "learning_rate": 2e-05, "loss": 0.2357, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.851257564821498, "eval_loss": 0.30789536237716675, "eval_precision": 0.8714882943143812, "eval_recall": 0.8366521185670122, "eval_runtime": 1.7109, "eval_samples_per_second": 233.216, "eval_steps_per_second": 29.225, "step": 1464 }, { "epoch": 13.0, "grad_norm": 2.892643690109253, "learning_rate": 1.75e-05, "loss": 0.2206, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.8847117794486216, "eval_f1": 0.8539996181748759, "eval_loss": 0.3078624904155731, "eval_precision": 0.8759595959595959, "eval_recall": 0.8384251682124022, "eval_runtime": 1.716, "eval_samples_per_second": 232.512, "eval_steps_per_second": 29.137, "step": 1586 }, { "epoch": 14.0, "grad_norm": 8.2120943069458, "learning_rate": 1.5e-05, "loss": 0.2272, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.8721804511278195, "eval_f1": 0.8484099018899409, "eval_loss": 0.2965851128101349, "eval_precision": 0.8430645161290322, "eval_recall": 0.8545644662665939, "eval_runtime": 1.7126, "eval_samples_per_second": 232.98, "eval_steps_per_second": 29.196, "step": 1708 }, { "epoch": 15.0, "grad_norm": 1.116047978401184, "learning_rate": 1.25e-05, "loss": 0.2125, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.8568221901555235, "eval_loss": 0.2929779589176178, "eval_precision": 0.8596491228070176, "eval_recall": 0.8541553009638116, "eval_runtime": 1.7099, "eval_samples_per_second": 233.345, "eval_steps_per_second": 29.241, "step": 1830 }, { "epoch": 16.0, "grad_norm": 3.386876344680786, "learning_rate": 1e-05, "loss": 0.2094, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.8796992481203008, "eval_f1": 0.8548827059465357, "eval_loss": 0.2945634126663208, "eval_precision": 0.8548827059465357, "eval_recall": 0.8548827059465357, "eval_runtime": 1.7109, "eval_samples_per_second": 233.211, "eval_steps_per_second": 29.224, "step": 1952 }, { "epoch": 17.0, "grad_norm": 2.8860654830932617, "learning_rate": 7.5e-06, "loss": 0.2067, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.8847117794486216, "eval_f1": 0.8602260265626904, "eval_loss": 0.29596519470214844, "eval_precision": 0.8620943049601959, "eval_recall": 0.8584288052373159, "eval_runtime": 1.7108, "eval_samples_per_second": 233.226, "eval_steps_per_second": 29.226, "step": 2074 }, { "epoch": 18.0, "grad_norm": 8.721236228942871, "learning_rate": 5e-06, "loss": 0.2058, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.8771929824561403, "eval_f1": 0.8514869535493182, "eval_loss": 0.29655298590660095, "eval_precision": 0.8523821128305106, "eval_recall": 0.8506092016730314, "eval_runtime": 1.7113, "eval_samples_per_second": 233.15, "eval_steps_per_second": 29.217, "step": 2196 }, { "epoch": 19.0, "grad_norm": 4.918961524963379, "learning_rate": 2.5e-06, "loss": 0.2003, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.8796992481203008, "eval_f1": 0.8533986527862829, "eval_loss": 0.29609641432762146, "eval_precision": 0.8572003218020917, "eval_recall": 0.8498817966903074, "eval_runtime": 1.7119, "eval_samples_per_second": 233.075, "eval_steps_per_second": 29.207, "step": 2318 }, { "epoch": 20.0, "grad_norm": 4.333648681640625, "learning_rate": 0.0, "loss": 0.2019, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.8796992481203008, "eval_f1": 0.8533986527862829, "eval_loss": 0.2955795228481293, "eval_precision": 0.8572003218020917, "eval_recall": 0.8498817966903074, "eval_runtime": 1.7141, "eval_samples_per_second": 232.777, "eval_steps_per_second": 29.17, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 7662265464912000.0, "train_loss": 0.28487560397288836, "train_runtime": 616.8461, "train_samples_per_second": 117.955, "train_steps_per_second": 3.956 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 7662265464912000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }