| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 20.0, | |
| "eval_steps": 500, | |
| "global_step": 2440, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 4.972439765930176, | |
| "learning_rate": 4.75e-05, | |
| "loss": 0.5634, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.7192982456140351, | |
| "eval_f1": 0.638095238095238, | |
| "eval_loss": 0.5084609985351562, | |
| "eval_precision": 0.6523297491039426, | |
| "eval_recall": 0.6313875250045463, | |
| "eval_runtime": 1.7151, | |
| "eval_samples_per_second": 232.636, | |
| "eval_steps_per_second": 29.152, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 4.525497913360596, | |
| "learning_rate": 4.5e-05, | |
| "loss": 0.4815, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.7468671679197995, | |
| "eval_f1": 0.7146872455662148, | |
| "eval_loss": 0.4543021321296692, | |
| "eval_precision": 0.7073059114891784, | |
| "eval_recall": 0.7334060738316057, | |
| "eval_runtime": 1.713, | |
| "eval_samples_per_second": 232.927, | |
| "eval_steps_per_second": 29.189, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 5.549623489379883, | |
| "learning_rate": 4.25e-05, | |
| "loss": 0.4003, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.8270676691729323, | |
| "eval_f1": 0.7898027898027897, | |
| "eval_loss": 0.3836011290550232, | |
| "eval_precision": 0.7921052631578948, | |
| "eval_recall": 0.7876432078559739, | |
| "eval_runtime": 1.7117, | |
| "eval_samples_per_second": 233.096, | |
| "eval_steps_per_second": 29.21, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 5.769104957580566, | |
| "learning_rate": 4e-05, | |
| "loss": 0.3683, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.8370927318295739, | |
| "eval_f1": 0.8029928975654221, | |
| "eval_loss": 0.35736599564552307, | |
| "eval_precision": 0.8037650785914463, | |
| "eval_recall": 0.8022367703218767, | |
| "eval_runtime": 1.7125, | |
| "eval_samples_per_second": 232.998, | |
| "eval_steps_per_second": 29.198, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 5.723803997039795, | |
| "learning_rate": 3.7500000000000003e-05, | |
| "loss": 0.3396, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.8596491228070176, | |
| "eval_f1": 0.8298403801632752, | |
| "eval_loss": 0.34212788939476013, | |
| "eval_precision": 0.8315523576240049, | |
| "eval_recall": 0.8281960356428442, | |
| "eval_runtime": 1.7123, | |
| "eval_samples_per_second": 233.026, | |
| "eval_steps_per_second": 29.201, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 5.710361480712891, | |
| "learning_rate": 3.5e-05, | |
| "loss": 0.3082, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.8521303258145363, | |
| "eval_f1": 0.8285395893573786, | |
| "eval_loss": 0.3541903495788574, | |
| "eval_precision": 0.8185863137144467, | |
| "eval_recall": 0.8428805237315875, | |
| "eval_runtime": 1.7126, | |
| "eval_samples_per_second": 232.974, | |
| "eval_steps_per_second": 29.195, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 2.925419330596924, | |
| "learning_rate": 3.2500000000000004e-05, | |
| "loss": 0.2908, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.8571428571428571, | |
| "eval_f1": 0.8254579780661698, | |
| "eval_loss": 0.32880380749702454, | |
| "eval_precision": 0.8299216027874565, | |
| "eval_recall": 0.8214220767412257, | |
| "eval_runtime": 1.7119, | |
| "eval_samples_per_second": 233.076, | |
| "eval_steps_per_second": 29.208, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 6.063595294952393, | |
| "learning_rate": 3e-05, | |
| "loss": 0.2705, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.8596491228070176, | |
| "eval_f1": 0.8347043853938569, | |
| "eval_loss": 0.332290917634964, | |
| "eval_precision": 0.8277993283927745, | |
| "eval_recall": 0.8431987634115294, | |
| "eval_runtime": 1.7111, | |
| "eval_samples_per_second": 233.18, | |
| "eval_steps_per_second": 29.22, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 4.576409339904785, | |
| "learning_rate": 2.7500000000000004e-05, | |
| "loss": 0.2608, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.8671679197994987, | |
| "eval_f1": 0.8359744037230948, | |
| "eval_loss": 0.3119361996650696, | |
| "eval_precision": 0.8448835433371515, | |
| "eval_recall": 0.828514275322786, | |
| "eval_runtime": 1.7115, | |
| "eval_samples_per_second": 233.133, | |
| "eval_steps_per_second": 29.215, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 6.930353164672852, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.2489, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.8671679197994987, | |
| "eval_f1": 0.8409485773595975, | |
| "eval_loss": 0.306035578250885, | |
| "eval_precision": 0.8385304659498208, | |
| "eval_recall": 0.8435170030914712, | |
| "eval_runtime": 1.7123, | |
| "eval_samples_per_second": 233.017, | |
| "eval_steps_per_second": 29.2, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "grad_norm": 11.753581047058105, | |
| "learning_rate": 2.25e-05, | |
| "loss": 0.2452, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.8822055137844611, | |
| "eval_f1": 0.8545433391506689, | |
| "eval_loss": 0.3053016662597656, | |
| "eval_precision": 0.8639270714012982, | |
| "eval_recall": 0.846653937079469, | |
| "eval_runtime": 1.7113, | |
| "eval_samples_per_second": 233.152, | |
| "eval_steps_per_second": 29.217, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "grad_norm": 21.677701950073242, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2357, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.8822055137844611, | |
| "eval_f1": 0.851257564821498, | |
| "eval_loss": 0.30789536237716675, | |
| "eval_precision": 0.8714882943143812, | |
| "eval_recall": 0.8366521185670122, | |
| "eval_runtime": 1.7109, | |
| "eval_samples_per_second": 233.216, | |
| "eval_steps_per_second": 29.225, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "grad_norm": 2.892643690109253, | |
| "learning_rate": 1.75e-05, | |
| "loss": 0.2206, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.8847117794486216, | |
| "eval_f1": 0.8539996181748759, | |
| "eval_loss": 0.3078624904155731, | |
| "eval_precision": 0.8759595959595959, | |
| "eval_recall": 0.8384251682124022, | |
| "eval_runtime": 1.716, | |
| "eval_samples_per_second": 232.512, | |
| "eval_steps_per_second": 29.137, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "grad_norm": 8.2120943069458, | |
| "learning_rate": 1.5e-05, | |
| "loss": 0.2272, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.8721804511278195, | |
| "eval_f1": 0.8484099018899409, | |
| "eval_loss": 0.2965851128101349, | |
| "eval_precision": 0.8430645161290322, | |
| "eval_recall": 0.8545644662665939, | |
| "eval_runtime": 1.7126, | |
| "eval_samples_per_second": 232.98, | |
| "eval_steps_per_second": 29.196, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "grad_norm": 1.116047978401184, | |
| "learning_rate": 1.25e-05, | |
| "loss": 0.2125, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.8822055137844611, | |
| "eval_f1": 0.8568221901555235, | |
| "eval_loss": 0.2929779589176178, | |
| "eval_precision": 0.8596491228070176, | |
| "eval_recall": 0.8541553009638116, | |
| "eval_runtime": 1.7099, | |
| "eval_samples_per_second": 233.345, | |
| "eval_steps_per_second": 29.241, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "grad_norm": 3.386876344680786, | |
| "learning_rate": 1e-05, | |
| "loss": 0.2094, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.8796992481203008, | |
| "eval_f1": 0.8548827059465357, | |
| "eval_loss": 0.2945634126663208, | |
| "eval_precision": 0.8548827059465357, | |
| "eval_recall": 0.8548827059465357, | |
| "eval_runtime": 1.7109, | |
| "eval_samples_per_second": 233.211, | |
| "eval_steps_per_second": 29.224, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "grad_norm": 2.8860654830932617, | |
| "learning_rate": 7.5e-06, | |
| "loss": 0.2067, | |
| "step": 2074 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.8847117794486216, | |
| "eval_f1": 0.8602260265626904, | |
| "eval_loss": 0.29596519470214844, | |
| "eval_precision": 0.8620943049601959, | |
| "eval_recall": 0.8584288052373159, | |
| "eval_runtime": 1.7108, | |
| "eval_samples_per_second": 233.226, | |
| "eval_steps_per_second": 29.226, | |
| "step": 2074 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "grad_norm": 8.721236228942871, | |
| "learning_rate": 5e-06, | |
| "loss": 0.2058, | |
| "step": 2196 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.8771929824561403, | |
| "eval_f1": 0.8514869535493182, | |
| "eval_loss": 0.29655298590660095, | |
| "eval_precision": 0.8523821128305106, | |
| "eval_recall": 0.8506092016730314, | |
| "eval_runtime": 1.7113, | |
| "eval_samples_per_second": 233.15, | |
| "eval_steps_per_second": 29.217, | |
| "step": 2196 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "grad_norm": 4.918961524963379, | |
| "learning_rate": 2.5e-06, | |
| "loss": 0.2003, | |
| "step": 2318 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.8796992481203008, | |
| "eval_f1": 0.8533986527862829, | |
| "eval_loss": 0.29609641432762146, | |
| "eval_precision": 0.8572003218020917, | |
| "eval_recall": 0.8498817966903074, | |
| "eval_runtime": 1.7119, | |
| "eval_samples_per_second": 233.075, | |
| "eval_steps_per_second": 29.207, | |
| "step": 2318 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 4.333648681640625, | |
| "learning_rate": 0.0, | |
| "loss": 0.2019, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.8796992481203008, | |
| "eval_f1": 0.8533986527862829, | |
| "eval_loss": 0.2955795228481293, | |
| "eval_precision": 0.8572003218020917, | |
| "eval_recall": 0.8498817966903074, | |
| "eval_runtime": 1.7141, | |
| "eval_samples_per_second": 232.777, | |
| "eval_steps_per_second": 29.17, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "step": 2440, | |
| "total_flos": 7662265464912000.0, | |
| "train_loss": 0.28487560397288836, | |
| "train_runtime": 616.8461, | |
| "train_samples_per_second": 117.955, | |
| "train_steps_per_second": 3.956 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 2440, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 20, | |
| "save_steps": 500, | |
| "total_flos": 7662265464912000.0, | |
| "train_batch_size": 30, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |