| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 20.0, | |
| "eval_steps": 500, | |
| "global_step": 2440, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 6.880457878112793, | |
| "learning_rate": 4.75e-05, | |
| "loss": 0.371, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.8847117794486216, | |
| "eval_f1": 0.8556621579112929, | |
| "eval_loss": 0.2788735032081604, | |
| "eval_precision": 0.871654421411703, | |
| "eval_recall": 0.8434260774686306, | |
| "eval_runtime": 1.6426, | |
| "eval_samples_per_second": 242.907, | |
| "eval_steps_per_second": 30.439, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 6.694629192352295, | |
| "learning_rate": 4.5e-05, | |
| "loss": 0.214, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.8947368421052632, | |
| "eval_f1": 0.8760282890453928, | |
| "eval_loss": 0.2702818810939789, | |
| "eval_precision": 0.8682260305697083, | |
| "eval_recall": 0.8855246408437898, | |
| "eval_runtime": 1.6739, | |
| "eval_samples_per_second": 238.362, | |
| "eval_steps_per_second": 29.87, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 3.0087978839874268, | |
| "learning_rate": 4.25e-05, | |
| "loss": 0.141, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.87468671679198, | |
| "eval_f1": 0.843111041207927, | |
| "eval_loss": 0.4446346163749695, | |
| "eval_precision": 0.8585673051692468, | |
| "eval_recall": 0.831332969630842, | |
| "eval_runtime": 1.6711, | |
| "eval_samples_per_second": 238.76, | |
| "eval_steps_per_second": 29.92, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 15.80390739440918, | |
| "learning_rate": 4e-05, | |
| "loss": 0.093, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.8696741854636592, | |
| "eval_f1": 0.8368354828562441, | |
| "eval_loss": 0.5896394848823547, | |
| "eval_precision": 0.8520237470480189, | |
| "eval_recall": 0.8252864157119476, | |
| "eval_runtime": 1.6746, | |
| "eval_samples_per_second": 238.269, | |
| "eval_steps_per_second": 29.858, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.24515673518180847, | |
| "learning_rate": 3.7500000000000003e-05, | |
| "loss": 0.0469, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.8796992481203008, | |
| "eval_f1": 0.8563025210084034, | |
| "eval_loss": 0.6098523736000061, | |
| "eval_precision": 0.8529936381473334, | |
| "eval_recall": 0.8598836152027641, | |
| "eval_runtime": 1.6766, | |
| "eval_samples_per_second": 237.984, | |
| "eval_steps_per_second": 29.823, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 0.06188611686229706, | |
| "learning_rate": 3.5e-05, | |
| "loss": 0.0498, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.899749373433584, | |
| "eval_f1": 0.871530684525726, | |
| "eval_loss": 0.6609992980957031, | |
| "eval_precision": 0.9016447368421052, | |
| "eval_recall": 0.8515639207128569, | |
| "eval_runtime": 1.6777, | |
| "eval_samples_per_second": 237.819, | |
| "eval_steps_per_second": 29.802, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 1.2003283500671387, | |
| "learning_rate": 3.2500000000000004e-05, | |
| "loss": 0.0257, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.8872180451127819, | |
| "eval_f1": 0.8532332818362393, | |
| "eval_loss": 0.6781011819839478, | |
| "eval_precision": 0.891747572815534, | |
| "eval_recall": 0.8301963993453355, | |
| "eval_runtime": 1.6733, | |
| "eval_samples_per_second": 238.446, | |
| "eval_steps_per_second": 29.88, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 1.8772869110107422, | |
| "learning_rate": 3e-05, | |
| "loss": 0.0267, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.8872180451127819, | |
| "eval_f1": 0.8523012380208119, | |
| "eval_loss": 0.8199848532676697, | |
| "eval_precision": 0.8950599239988307, | |
| "eval_recall": 0.8276959447172213, | |
| "eval_runtime": 1.6789, | |
| "eval_samples_per_second": 237.65, | |
| "eval_steps_per_second": 29.781, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 2.237112522125244, | |
| "learning_rate": 2.7500000000000004e-05, | |
| "loss": 0.016, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.899749373433584, | |
| "eval_f1": 0.881931703852755, | |
| "eval_loss": 0.5965662002563477, | |
| "eval_precision": 0.8740012737378415, | |
| "eval_recall": 0.8915711947626841, | |
| "eval_runtime": 1.673, | |
| "eval_samples_per_second": 238.498, | |
| "eval_steps_per_second": 29.887, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 0.010850044898688793, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.0132, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.9022556390977443, | |
| "eval_f1": 0.8835263209107715, | |
| "eval_loss": 0.6436753273010254, | |
| "eval_precision": 0.8791501449961532, | |
| "eval_recall": 0.8883433351518457, | |
| "eval_runtime": 1.6736, | |
| "eval_samples_per_second": 238.404, | |
| "eval_steps_per_second": 29.875, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "grad_norm": 0.008115105330944061, | |
| "learning_rate": 2.25e-05, | |
| "loss": 0.0161, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.9072681704260651, | |
| "eval_f1": 0.8867007927797945, | |
| "eval_loss": 0.679655909538269, | |
| "eval_precision": 0.89198606271777, | |
| "eval_recall": 0.8818876159301692, | |
| "eval_runtime": 1.6719, | |
| "eval_samples_per_second": 238.648, | |
| "eval_steps_per_second": 29.906, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "grad_norm": 0.003001323202624917, | |
| "learning_rate": 2e-05, | |
| "loss": 0.0091, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.9097744360902256, | |
| "eval_f1": 0.8882839721254355, | |
| "eval_loss": 0.6953954696655273, | |
| "eval_precision": 0.8998687748047625, | |
| "eval_recall": 0.8786597563193308, | |
| "eval_runtime": 1.6653, | |
| "eval_samples_per_second": 239.59, | |
| "eval_steps_per_second": 30.024, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "grad_norm": 0.0026766008231788874, | |
| "learning_rate": 1.75e-05, | |
| "loss": 0.0101, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.9122807017543859, | |
| "eval_f1": 0.8954723392788977, | |
| "eval_loss": 0.6750524044036865, | |
| "eval_precision": 0.8909569746108776, | |
| "eval_recall": 0.9004364429896345, | |
| "eval_runtime": 1.6681, | |
| "eval_samples_per_second": 239.196, | |
| "eval_steps_per_second": 29.974, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "grad_norm": 0.0015771281905472279, | |
| "learning_rate": 1.5e-05, | |
| "loss": 0.0025, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.9022556390977443, | |
| "eval_f1": 0.8779692119482108, | |
| "eval_loss": 0.7316891551017761, | |
| "eval_precision": 0.8934322033898305, | |
| "eval_recall": 0.865839243498818, | |
| "eval_runtime": 1.6607, | |
| "eval_samples_per_second": 240.257, | |
| "eval_steps_per_second": 30.107, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "grad_norm": 0.003234785981476307, | |
| "learning_rate": 1.25e-05, | |
| "loss": 0.0088, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.8897243107769424, | |
| "eval_f1": 0.8669758137843244, | |
| "eval_loss": 0.6788834929466248, | |
| "eval_precision": 0.8669758137843244, | |
| "eval_recall": 0.8669758137843244, | |
| "eval_runtime": 1.6627, | |
| "eval_samples_per_second": 239.966, | |
| "eval_steps_per_second": 30.071, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "grad_norm": 0.0018662125803530216, | |
| "learning_rate": 1e-05, | |
| "loss": 0.0017, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.8897243107769424, | |
| "eval_f1": 0.8676337535436396, | |
| "eval_loss": 0.7504969239234924, | |
| "eval_precision": 0.8658613445378152, | |
| "eval_recall": 0.8694762684124386, | |
| "eval_runtime": 1.6676, | |
| "eval_samples_per_second": 239.272, | |
| "eval_steps_per_second": 29.984, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "grad_norm": 0.0024691985454410315, | |
| "learning_rate": 7.5e-06, | |
| "loss": 0.0017, | |
| "step": 2074 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.8897243107769424, | |
| "eval_f1": 0.8676337535436396, | |
| "eval_loss": 0.7755724787712097, | |
| "eval_precision": 0.8658613445378152, | |
| "eval_recall": 0.8694762684124386, | |
| "eval_runtime": 1.6752, | |
| "eval_samples_per_second": 238.182, | |
| "eval_steps_per_second": 29.847, | |
| "step": 2074 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "grad_norm": 0.0015471646329388022, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0011, | |
| "step": 2196 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.8922305764411027, | |
| "eval_f1": 0.8715803025426456, | |
| "eval_loss": 0.8041408061981201, | |
| "eval_precision": 0.8673433153814287, | |
| "eval_recall": 0.8762502273140571, | |
| "eval_runtime": 1.6768, | |
| "eval_samples_per_second": 237.95, | |
| "eval_steps_per_second": 29.818, | |
| "step": 2196 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "grad_norm": 0.0015901036094874144, | |
| "learning_rate": 2.5e-06, | |
| "loss": 0.0017, | |
| "step": 2318 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.8922305764411027, | |
| "eval_f1": 0.8703223612108386, | |
| "eval_loss": 0.8064257502555847, | |
| "eval_precision": 0.8694131129742446, | |
| "eval_recall": 0.8712493180578287, | |
| "eval_runtime": 1.6795, | |
| "eval_samples_per_second": 237.568, | |
| "eval_steps_per_second": 29.77, | |
| "step": 2318 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 0.0011909452732652426, | |
| "learning_rate": 0.0, | |
| "loss": 0.0008, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.8922305764411027, | |
| "eval_f1": 0.8703223612108386, | |
| "eval_loss": 0.8053188323974609, | |
| "eval_precision": 0.8694131129742446, | |
| "eval_recall": 0.8712493180578287, | |
| "eval_runtime": 1.6806, | |
| "eval_samples_per_second": 237.419, | |
| "eval_steps_per_second": 29.752, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "step": 2440, | |
| "total_flos": 7598755382040000.0, | |
| "train_loss": 0.05254678238855034, | |
| "train_runtime": 869.5662, | |
| "train_samples_per_second": 83.835, | |
| "train_steps_per_second": 2.806 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 2440, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 20, | |
| "save_steps": 500, | |
| "total_flos": 7598755382040000.0, | |
| "train_batch_size": 30, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |