{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 6.880457878112793, "learning_rate": 4.75e-05, "loss": 0.371, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.8847117794486216, "eval_f1": 0.8556621579112929, "eval_loss": 0.2788735032081604, "eval_precision": 0.871654421411703, "eval_recall": 0.8434260774686306, "eval_runtime": 1.6426, "eval_samples_per_second": 242.907, "eval_steps_per_second": 30.439, "step": 122 }, { "epoch": 2.0, "grad_norm": 6.694629192352295, "learning_rate": 4.5e-05, "loss": 0.214, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.8947368421052632, "eval_f1": 0.8760282890453928, "eval_loss": 0.2702818810939789, "eval_precision": 0.8682260305697083, "eval_recall": 0.8855246408437898, "eval_runtime": 1.6739, "eval_samples_per_second": 238.362, "eval_steps_per_second": 29.87, "step": 244 }, { "epoch": 3.0, "grad_norm": 3.0087978839874268, "learning_rate": 4.25e-05, "loss": 0.141, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.87468671679198, "eval_f1": 0.843111041207927, "eval_loss": 0.4446346163749695, "eval_precision": 0.8585673051692468, "eval_recall": 0.831332969630842, "eval_runtime": 1.6711, "eval_samples_per_second": 238.76, "eval_steps_per_second": 29.92, "step": 366 }, { "epoch": 4.0, "grad_norm": 15.80390739440918, "learning_rate": 4e-05, "loss": 0.093, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.8696741854636592, "eval_f1": 0.8368354828562441, "eval_loss": 0.5896394848823547, "eval_precision": 0.8520237470480189, "eval_recall": 0.8252864157119476, "eval_runtime": 1.6746, "eval_samples_per_second": 238.269, "eval_steps_per_second": 29.858, "step": 488 }, { "epoch": 5.0, "grad_norm": 0.24515673518180847, "learning_rate": 3.7500000000000003e-05, "loss": 0.0469, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.8796992481203008, "eval_f1": 0.8563025210084034, "eval_loss": 0.6098523736000061, "eval_precision": 0.8529936381473334, "eval_recall": 0.8598836152027641, "eval_runtime": 1.6766, "eval_samples_per_second": 237.984, "eval_steps_per_second": 29.823, "step": 610 }, { "epoch": 6.0, "grad_norm": 0.06188611686229706, "learning_rate": 3.5e-05, "loss": 0.0498, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.899749373433584, "eval_f1": 0.871530684525726, "eval_loss": 0.6609992980957031, "eval_precision": 0.9016447368421052, "eval_recall": 0.8515639207128569, "eval_runtime": 1.6777, "eval_samples_per_second": 237.819, "eval_steps_per_second": 29.802, "step": 732 }, { "epoch": 7.0, "grad_norm": 1.2003283500671387, "learning_rate": 3.2500000000000004e-05, "loss": 0.0257, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.8872180451127819, "eval_f1": 0.8532332818362393, "eval_loss": 0.6781011819839478, "eval_precision": 0.891747572815534, "eval_recall": 0.8301963993453355, "eval_runtime": 1.6733, "eval_samples_per_second": 238.446, "eval_steps_per_second": 29.88, "step": 854 }, { "epoch": 8.0, "grad_norm": 1.8772869110107422, "learning_rate": 3e-05, "loss": 0.0267, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8872180451127819, "eval_f1": 0.8523012380208119, "eval_loss": 0.8199848532676697, "eval_precision": 0.8950599239988307, "eval_recall": 0.8276959447172213, "eval_runtime": 1.6789, "eval_samples_per_second": 237.65, "eval_steps_per_second": 29.781, "step": 976 }, { "epoch": 9.0, "grad_norm": 2.237112522125244, "learning_rate": 2.7500000000000004e-05, "loss": 0.016, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.899749373433584, "eval_f1": 0.881931703852755, "eval_loss": 0.5965662002563477, "eval_precision": 0.8740012737378415, "eval_recall": 0.8915711947626841, "eval_runtime": 1.673, "eval_samples_per_second": 238.498, "eval_steps_per_second": 29.887, "step": 1098 }, { "epoch": 10.0, "grad_norm": 0.010850044898688793, "learning_rate": 2.5e-05, "loss": 0.0132, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.9022556390977443, "eval_f1": 0.8835263209107715, "eval_loss": 0.6436753273010254, "eval_precision": 0.8791501449961532, "eval_recall": 0.8883433351518457, "eval_runtime": 1.6736, "eval_samples_per_second": 238.404, "eval_steps_per_second": 29.875, "step": 1220 }, { "epoch": 11.0, "grad_norm": 0.008115105330944061, "learning_rate": 2.25e-05, "loss": 0.0161, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.9072681704260651, "eval_f1": 0.8867007927797945, "eval_loss": 0.679655909538269, "eval_precision": 0.89198606271777, "eval_recall": 0.8818876159301692, "eval_runtime": 1.6719, "eval_samples_per_second": 238.648, "eval_steps_per_second": 29.906, "step": 1342 }, { "epoch": 12.0, "grad_norm": 0.003001323202624917, "learning_rate": 2e-05, "loss": 0.0091, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.9097744360902256, "eval_f1": 0.8882839721254355, "eval_loss": 0.6953954696655273, "eval_precision": 0.8998687748047625, "eval_recall": 0.8786597563193308, "eval_runtime": 1.6653, "eval_samples_per_second": 239.59, "eval_steps_per_second": 30.024, "step": 1464 }, { "epoch": 13.0, "grad_norm": 0.0026766008231788874, "learning_rate": 1.75e-05, "loss": 0.0101, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.9122807017543859, "eval_f1": 0.8954723392788977, "eval_loss": 0.6750524044036865, "eval_precision": 0.8909569746108776, "eval_recall": 0.9004364429896345, "eval_runtime": 1.6681, "eval_samples_per_second": 239.196, "eval_steps_per_second": 29.974, "step": 1586 }, { "epoch": 14.0, "grad_norm": 0.0015771281905472279, "learning_rate": 1.5e-05, "loss": 0.0025, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.9022556390977443, "eval_f1": 0.8779692119482108, "eval_loss": 0.7316891551017761, "eval_precision": 0.8934322033898305, "eval_recall": 0.865839243498818, "eval_runtime": 1.6607, "eval_samples_per_second": 240.257, "eval_steps_per_second": 30.107, "step": 1708 }, { "epoch": 15.0, "grad_norm": 0.003234785981476307, "learning_rate": 1.25e-05, "loss": 0.0088, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.8897243107769424, "eval_f1": 0.8669758137843244, "eval_loss": 0.6788834929466248, "eval_precision": 0.8669758137843244, "eval_recall": 0.8669758137843244, "eval_runtime": 1.6627, "eval_samples_per_second": 239.966, "eval_steps_per_second": 30.071, "step": 1830 }, { "epoch": 16.0, "grad_norm": 0.0018662125803530216, "learning_rate": 1e-05, "loss": 0.0017, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.8897243107769424, "eval_f1": 0.8676337535436396, "eval_loss": 0.7504969239234924, "eval_precision": 0.8658613445378152, "eval_recall": 0.8694762684124386, "eval_runtime": 1.6676, "eval_samples_per_second": 239.272, "eval_steps_per_second": 29.984, "step": 1952 }, { "epoch": 17.0, "grad_norm": 0.0024691985454410315, "learning_rate": 7.5e-06, "loss": 0.0017, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.8897243107769424, "eval_f1": 0.8676337535436396, "eval_loss": 0.7755724787712097, "eval_precision": 0.8658613445378152, "eval_recall": 0.8694762684124386, "eval_runtime": 1.6752, "eval_samples_per_second": 238.182, "eval_steps_per_second": 29.847, "step": 2074 }, { "epoch": 18.0, "grad_norm": 0.0015471646329388022, "learning_rate": 5e-06, "loss": 0.0011, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.8922305764411027, "eval_f1": 0.8715803025426456, "eval_loss": 0.8041408061981201, "eval_precision": 0.8673433153814287, "eval_recall": 0.8762502273140571, "eval_runtime": 1.6768, "eval_samples_per_second": 237.95, "eval_steps_per_second": 29.818, "step": 2196 }, { "epoch": 19.0, "grad_norm": 0.0015901036094874144, "learning_rate": 2.5e-06, "loss": 0.0017, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.8922305764411027, "eval_f1": 0.8703223612108386, "eval_loss": 0.8064257502555847, "eval_precision": 0.8694131129742446, "eval_recall": 0.8712493180578287, "eval_runtime": 1.6795, "eval_samples_per_second": 237.568, "eval_steps_per_second": 29.77, "step": 2318 }, { "epoch": 20.0, "grad_norm": 0.0011909452732652426, "learning_rate": 0.0, "loss": 0.0008, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.8922305764411027, "eval_f1": 0.8703223612108386, "eval_loss": 0.8053188323974609, "eval_precision": 0.8694131129742446, "eval_recall": 0.8712493180578287, "eval_runtime": 1.6806, "eval_samples_per_second": 237.419, "eval_steps_per_second": 29.752, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 7598755382040000.0, "train_loss": 0.05254678238855034, "train_runtime": 869.5662, "train_samples_per_second": 83.835, "train_steps_per_second": 2.806 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 7598755382040000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }